"""
Base class for prefix code encoder-decoders for the coding experiments library.
This module provides an abstract base class for implementing prefix codes,
which are variable-length codes where no codeword is a prefix of another.
This property enables unique decoding without separators between codewords.
Common examples include Huffman coding, Shannon-Fano coding, and others.
"""
# Module metadata
__author__ = "Mikhail Mikhailov"
__license__ = "MIT"
__version__ = "0.1.0"
__all__ = ["PrefixEncoderDecoder"]
from typing import Sequence, Dict, Optional, List
from abc import ABC, abstractmethod
from .prefix_code_tree import PrefixCodeTree, TreeNode
from ..interfaces import Encoder, Decoder
from ..types import SourceChar, ChannelChar
[docs]
class PrefixEncoderDecoder(
Encoder[SourceChar, ChannelChar], Decoder[SourceChar, ChannelChar], ABC
):
"""
Abstract base class for prefix code encoder-decoders.
This class implements the common functionality for prefix codes,
including encoding, decoding, and maintaining both a code table
and a decoding tree. Concrete implementations must define how to
build the prefix code tree based on their specific algorithm.
Attributes:
_source_alphabet: Alphabet of source symbols to encode
_channel_alphabet: Alphabet of channel symbols for encoding
_code_table: Mapping from source symbols to their code sequences
_tree: Prefix code tree used for decoding
"""
[docs]
def __init__(
self,
source_alphabet: Sequence[SourceChar],
channel_alphabet: Sequence[ChannelChar],
) -> None:
"""
Initialize the prefix encoder-decoder with alphabets.
Args:
source_alphabet: Sequence of source symbols to be encoded
channel_alphabet: Sequence of channel symbols available for encoding
Raises:
ValueError: If either alphabet is empty
"""
if not source_alphabet:
raise ValueError("Source alphabet cannot be empty")
if not channel_alphabet:
raise ValueError("Channel alphabet cannot be empty")
self._source_alphabet = source_alphabet
"""Alphabet of source symbols to encode."""
self._channel_alphabet = channel_alphabet
"""Alphabet of channel symbols for encoding."""
self._code_table: Optional[Dict[SourceChar, Sequence[ChannelChar]]] = {}
"""Mapping from source symbols to their code sequences."""
self._tree: Optional[PrefixCodeTree[ChannelChar, SourceChar]] = None
"""Prefix code tree used for decoding."""
self._build_prefix_code_tree()
self._build_table_from_tree()
@abstractmethod
def _build_prefix_code_tree(self) -> None:
"""
Abstract method to build the prefix code tree.
Concrete subclasses must implement this method to construct
a prefix code tree according to their specific algorithm.
The implementation should populate either _code_table or _tree,
and then call the appropriate helper method to ensure both
representations are available.
"""
pass
def _build_tree_from_table(self) -> None:
"""
Build a decoding tree from the code table.
This method constructs a prefix code tree by inserting each
code sequence from the code table into the tree. After calling
this method, the tree can be used for efficient decoding.
Raises:
RuntimeError: If the code table is empty or None
"""
if self._code_table is None or not self._code_table:
raise RuntimeError("Cannot build tree from empty code table")
self._tree = PrefixCodeTree()
for symbol, code in self._code_table.items():
self._tree.insert_code(list(code), symbol)
def _build_table_from_tree(self) -> None:
"""
Build a code table from the decoding tree.
This method traverses the prefix code tree and constructs a
mapping from source symbols to their code sequences. After
calling this method, the code table can be used for encoding.
Raises:
RuntimeError: If the tree is empty or None
"""
if self._tree is None or self._tree.root is None:
raise RuntimeError("Cannot build table from empty tree")
self._code_table = {}
self._collect_codes_from_node(self._tree.root, [])
def _collect_codes_from_node(
self, node: TreeNode[ChannelChar, SourceChar], current_code: List[ChannelChar]
) -> None:
"""
Recursively collect code sequences from the tree.
This helper method performs a depth-first traversal of the
prefix code tree, building code sequences for each leaf node
and storing them in the code table.
Args:
node: Current node in the tree traversal
current_code: Code sequence accumulated so far
Note:
This method modifies self._code_table in place.
"""
assert self._code_table is not None
if node.value is not None:
# Leaf node: store the accumulated code for this symbol
self._code_table[node.value] = list(current_code)
for char, child_node in node.children.items():
# Recursively traverse child nodes
self._collect_codes_from_node(child_node, current_code + [char])
[docs]
def encode(self, message: Sequence[SourceChar]) -> Sequence[ChannelChar]:
"""
Encode a source message using the prefix code.
Args:
message: Sequence of source symbols to encode
Returns:
Sequence of channel symbols representing the encoded message
Raises:
RuntimeError: If the code table has not been built
ValueError: If any symbol in the message is not in the source alphabet
"""
if self._code_table is None or not self._code_table:
raise RuntimeError("Code table not built")
encoded: List[ChannelChar] = []
for symbol in message:
if symbol not in self._code_table.keys():
raise ValueError(f"Symbol {symbol} not in source alphabet")
encoded.extend(self._code_table[symbol])
return encoded
[docs]
def decode(self, encoded: Sequence[ChannelChar]) -> Sequence[SourceChar]:
"""
Decode a sequence of channel symbols using the prefix code tree.
Args:
encoded: Sequence of channel symbols to decode
Returns:
Sequence of source symbols representing the decoded message
Raises:
RuntimeError: If the decoding tree has not been built
ValueError: If the encoded sequence cannot be decoded
"""
if self._tree is None:
raise RuntimeError("Decoding tree not built")
position = 0
decoded = []
encoded_list = list(encoded)
while position < len(encoded_list):
symbol, new_position = self._tree.decode(encoded_list, position)
if symbol is None:
raise ValueError(
f"Cannot decode sequence {encoded}. Processed to {position}"
)
decoded.append(symbol)
position = new_position
return decoded
@property
def code_table(self) -> Optional[Dict[SourceChar, Sequence[ChannelChar]]]:
"""
Get the encoding table if available.
Returns:
Dictionary mapping source symbols to their code sequences,
or None if no code table has been built
"""
return self._code_table
@property
def tree(self) -> Optional[PrefixCodeTree[ChannelChar, SourceChar]]:
"""
Get the decoding tree if available.
Returns:
Prefix code tree used for decoding, or None if no tree
has been built
"""
return self._tree
@property
def source_alphabet(self) -> Sequence[SourceChar]:
"""
Get the source alphabet.
Returns:
Sequence of source symbols that can be encoded
"""
return self._source_alphabet
@property
def channel_alphabet(self) -> Sequence[ChannelChar]:
"""
Get the channel alphabet.
Returns:
Sequence of channel symbols available for encoding
"""
return self._channel_alphabet