Source code for codinglab.senders.probabilistic

"""
Probabilistic sender implementation for the coding experiments library.

This module provides a sender implementation that generates messages
randomly according to a specified probability distribution over the
source alphabet. It's useful for simulating realistic message sources
and testing coding schemes under various statistical conditions.
"""

# Module metadata
__author__ = "Mikhail Mikhailov"
__license__ = "MIT"
__version__ = "0.1.0"
__all__ = ["ProbabilisticSender"]

import random
import time
from typing import Sequence, Iterator, Dict, Tuple, Optional
from .base import BaseSender
from ..interfaces import SourceChar, ChannelChar, Encoder
from ..types import Message, TransmissionEvent, TransmissionLog, TransmissionLogger
from ..logger import NullLogger


[docs] class ProbabilisticSender(BaseSender[SourceChar, ChannelChar]): """ Sender that generates random messages with specified probabilities. This sender generates messages by randomly selecting symbols from a source alphabet according to a given probability distribution. Message lengths are uniformly distributed within a specified range. Attributes: _probabilities: Probability distribution over source symbols _alphabet: List of source symbols (keys from probabilities dict) _weights: List of probabilities corresponding to alphabet symbols _min_len: Minimum message length _max_len: Maximum message length _rng: Random number generator instance _message_id: Counter for assigning unique message IDs """
[docs] def __init__( self, encoder: Encoder[SourceChar, ChannelChar], probabilities: Dict[SourceChar, float], message_length_range: Tuple[int, int], logger: TransmissionLogger = NullLogger(), seed: Optional[int] = None, ) -> None: """ Initialize the probabilistic sender. Args: encoder: Encoder instance for converting source to channel symbols probabilities: Dictionary mapping source symbols to their probabilities (must sum to 1.0) message_length_range: Tuple of (min_length, max_length) for generated messages logger: Logger for recording transmission events (defaults to NullLogger) seed: Optional seed for the random number generator forreproducible experiments Raises: ValueError: If probabilities don't sum to approximately 1.0, or if message length range is invalid """ # Validate probabilities prob_sum = sum(probabilities.values()) if not (0.999 <= prob_sum <= 1.001): # Allow for floating-point errors raise ValueError(f"Probabilities must sum to 1.0, got {prob_sum}") # Validate message length range min_len, max_len = message_length_range if min_len <= 0: raise ValueError(f"Minimum message length must be positive, got {min_len}") if max_len < min_len: raise ValueError( f"Maximum message length ({max_len}) must be >= minimum ({min_len})" ) super().__init__(encoder, logger) self._probabilities = probabilities """Probability distribution over source symbols.""" self._alphabet = list(probabilities.keys()) """List of source symbols.""" self._weights = list(probabilities.values()) """List of probabilities corresponding to alphabet symbols.""" self._min_len = min_len """Minimum message length.""" self._max_len = max_len """Maximum message length.""" self._rng = random.Random(seed) """Random number generator instance.""" self._message_id = 0 """Counter for assigning unique message IDs."""
@property def alphabet(self) -> Sequence[SourceChar]: """ Get the source alphabet from the probabilities dictionary. Returns: Sequence of source symbols that can appear in messages """ return self._alphabet
[docs] def message_stream(self, stream_len: int) -> Iterator[Message[ChannelChar]]: """ Generate a stream of random encoded messages. Each message is generated by randomly selecting symbols from the source alphabet according to the specified probability distribution. Message lengths are uniformly distributed within the configured range. Args: stream_len: Number of messages to generate Yields: Encoded messages wrapped in Message containers with unique IDs Raises: ValueError: If stream_len <= 0 """ if stream_len <= 0: raise ValueError(f"stream_len must be positive, got {stream_len}") for _ in range(stream_len): # Generate random message length = self._rng.randint(self._min_len, self._max_len) source_data = self._rng.choices( self._alphabet, weights=self._weights, k=length ) # Create source message with unique ID source_message = Message(id=self._message_id, data=source_data) self._last_message = source_message self._message_id += 1 # Log message generation self._logger.log( TransmissionLog( timestamp=time.time(), event=TransmissionEvent.SOURCE_GENERATED, message=source_message, data={ "length": length, "alphabet": self._alphabet, "probabilities": self._probabilities, }, ) ) # Encode and yield the message encoded_data = self._encoder.encode(source_data) yield Message(id=source_message.id, data=encoded_data)
[docs] def reset(self) -> None: """ Reset the sender's message ID counter. This method resets the message ID counter to 0, allowing the sender to start fresh with a new sequence of message IDs. """ self._message_id = 0