Source code for bitlist.bitlist

"""
Pure-Python library for working with bit vectors.
"""
from __future__ import annotations
from typing import Union, Optional, Set, Sequence, Iterable
import doctest
import collections.abc
from parts import parts

[docs]class bitlist: """ Data structure for representing bit vectors. The constructor accepts a variety of input types (including integers, bytes-like objects, strings of binary digits, iterables of binary digits, and other :obj:`bitlist` instances) and parses them in an appropriate manner to build a bit vector. Integer arguments are converted into a big-endian binary representation. >>> bitlist(1) bitlist('1') >>> bitlist(123) bitlist('1111011') >>> bitlist('1111011') bitlist('1111011') >>> bitlist(bytes([255, 254])) bitlist('1111111111111110') >>> bitlist([1, 0, 1, 1]) bitlist('1011') >>> bitlist(bitlist('1010')) bitlist('1010') The :obj:`fromhex` method can be used to convert a hexadecimal string into an instance. This is equivalent to converting the hexadecimal string into a bytes-like object and then creating a bit vector from that object. >>> bitlist.fromhex('abcd') bitlist('1010101111001101') >>> bitlist(bytes.fromhex('abcd')) bitlist('1010101111001101') A :obj:`bitlist` instance can be converted into an integer using the built-in :obj:`int` function. By default, a big-endian representation of integers is used. The recommended approach for switching to a little-endian representation is to reverse the bit vector. >>> b = bitlist('1111011') >>> int(b) 123 >>> int(bitlist(list(reversed(b)))) 111 An instance can be converted into a string of binary characters using the :obj:`bin` method and into a hexadecimal string using the :obj:`hex` method. Conversion to a bytes-like object is possible via the built-in :obj:`to_bytes` method. >>> b.bin() '1111011' >>> b.hex() '7b' >>> b.to_bytes().hex() '7b' >>> list(b.to_bytes()) [123] An instance is itself a :obj:`~collections.abc.Sequence` and is iterable. Iterating over an instance yields the sequence of bits (*i.e.*, integers that are ``0`` or ``1``). >>> [bit for bit in bitlist('10010011')] [1, 0, 0, 1, 0, 0, 1, 1] An individual bit can be retrieved (and assigned a new value) via its index. Slice notation is also supported. >>> b = bitlist('0101') >>> b[3] 1 >>> b[3] = 0 >>> b bitlist('0100') >>> b[1:-1] bitlist('10') Instances are mutable. Conversion to an integer, binary string, or tuple is recommended if a corresponding immutable object is required. >>> tuple(bitlist('1010')) (1, 0, 1, 0) When the constructor is supplied a :obj:`bitlist` instance, a distinct copy of the supplied instance is created. >>> b = bitlist(123, 8) >>> c = bitlist(b) >>> c[0] = 1 >>> b bitlist('01111011') >>> c bitlist('11111011') When the constructor is applied to a bytes-like object, the leading zero digits (*i.e.*, those on the left-hand side) **are retained** (up to the least multiple of eight larger than the minimum number of binary digits required). >>> bitlist(bytes([123])) bitlist('01111011') >>> bitlist(bytearray([123, 123])) bitlist('0111101101111011') When the constructor is applied to a string (consisting only of characters that correspond to binary digits), leading zero digits are also retained. >>> bitlist('01111011') bitlist('01111011') However, when the constructor is applied to an integer argument, the created bit vector has no leading (*i.e.*, left-hand) zeros and contains the minimum number of bits necessary to represent the supplied argument (using a big-endian representation). >>> bitlist(2) bitlist('10') >>> bitlist(16) bitlist('10000') The above implies that the empty bit vector represents (and is equivalent to) the numerical value of zero. >>> bitlist() == bitlist(0) True >>> bitlist() == bitlist('0') == bitlist('00') == bitlist('000') True >>> bitlist() == bitlist('') True >>> bitlist() == bitlist([]) True While the equality method :obj:`__eq__` determines equality between two bit vectors based on the integer values they represent, the fact that leading zero digits are retained in some cases means that two equivalent bit vectors *may have different lengths*. >>> bitlist('0') == bitlist('000') True >>> len(bitlist('0')) == len(bitlist('000')) False For all other (non-integer) input types, the length of the vector (and consequently the number of leading zeos) is preserved. >>> bitlist('0000') bitlist('0000') >>> bitlist([0, 1, 1]) bitlist('011') >>> bitlist([0, 0, 1, 1]) bitlist('0011') >>> bitlist(bitlist('00010')) bitlist('00010') Only strings that consist of characters corresponding to binary digits are accepted by the constructor. >>> bitlist('abcd') Traceback (most recent call last): ... ValueError: each character in string must be '0' or '1' To convert the underlying binary representation of a string into a bit vector, it is necessary to encode the string as a bytes-like object. >>> bitlist('abcd'.encode('utf8')) bitlist('01100001011000100110001101100100') The ``length`` parameter can be used to specify the length of the bit vector, overriding the default behaviors. If the length parameter has a value that is greater than the number of bits that would be included according to a default behavior, the bit vector is padded with zero bits *on the left-hand side* to match the specified length. >>> bitlist(bytes([123]), 16) bitlist('0000000001111011') >>> bitlist(16, 64) bitlist('0000000000000000000000000000000000000000000000000000000000010000') >>> bitlist(bitlist(123), 8) bitlist('01111011') If the ``length`` parameter has a value that is less than the minimum number of bits that would be included according to a default behavior, the bit vector is truncated *on the left-hand side* to match the specified length. >>> bitlist(bytes([123]), 7) bitlist('1111011') >>> bitlist(bytes([123]), 4) bitlist('1011') >>> bitlist(bytes([123]), 2) bitlist('11') >>> bitlist(bytes([123]), 0) bitlist() >>> bitlist(123, 0) bitlist() >>> bitlist([1, 1, 1], 0) bitlist() Any attempt to construct an instance using unsupported arguments raises an exception. >>> bitlist([1.1, 2.2, 3.3]) Traceback (most recent call last): ... TypeError: items in iterable must be integers >>> bitlist([2, 3, 4, 5]) Traceback (most recent call last): ... ValueError: each integer in iterable must be 0 or 1 >>> bitlist(float(1)) Traceback (most recent call last): ... TypeError: bitlist constructor received unsupported argument """ def __init__( self: bitlist, argument: Union[int, str, bytes, bytearray, bitlist, Iterable[int], None] = None, length: Optional[int] = None ): """ Parse argument depending on its type and build a bit vector instance. """ # pylint: disable=too-many-branches if argument is None: # By default, always return the bit vector representing zero. self.bits = bytearray([0]) elif isinstance(argument, int): # Convert any integer into its bit representation, # starting with the first non-zero digit. self.bits = bytearray(reversed([int(b) for b in f'{argument:b}'])) elif isinstance(argument, str): if not all(c in ('0', '1') for c in argument): raise ValueError("each character in string must be '0' or '1'") # Convert string of binary digit characters. self.bits = bytearray(reversed([int(b) for b in argument])) elif isinstance(argument, (bytes, bytearray)): # Convert bytes-like object into its constituent bits, # with exactly eight bits per byte (i.e., leading zeros # are included). self.bits = \ bytearray([ b for byte in reversed(argument) for b in [(byte >> i) % 2 for i in range(0, 8)] ]) elif isinstance(argument, bitlist): # Make constructor idempotent (but have it iterate # to reflect the behavior of the :obj:`list` function). self.bits = bytearray(list(argument.bits)) elif isinstance(argument, collections.abc.Iterable): items = list(argument) if not all(isinstance(item, int) for item in items): raise TypeError('items in iterable must be integers') if not all(item in (0, 1) for item in items): raise ValueError('each integer in iterable must be 0 or 1') # Convert list of binary digits represented as integers. self.bits = bytearray(reversed(items)) if len(items) > 0 else bytearray([0]) else: raise TypeError('bitlist constructor received unsupported argument') if length is not None: # Pad or truncate the bit vector to ensure the specified length. if length > len(self.bits): self.bits = self.bits + bytes([0] * (length - len(self.bits))) elif length < len(self.bits): self.bits = self.bits[0:length]
[docs] @staticmethod def fromhex(s: str) -> bitlist: """ Build an instance from a hexadecimal string. >>> bitlist.fromhex('abcd') bitlist('1010101111001101') """ return bitlist(bytes.fromhex(s))
[docs] def __str__(self: bitlist) -> str: """ Return a string representation (that can also be evaluated as a valid Python expression if the class is in the namespace). >>> bitlist('01') bitlist('01') """ return \ 'bitlist(' + \ (("'" + self.bin() + "'") if len(self.bits) > 0 else '') + \ ')'
[docs] def __repr__(self: bitlist) -> str: """ Return a string representation (that can also be evaluated as a valid Python expression if the class is in the namespace). """ return str(self)
[docs] def __int__(self: bitlist) -> int: """ Interpret the bit vector as a big-endian representation of an integer and return that integer. >>> int(bitlist(bytes([128, 129]))) == int.from_bytes(bytes([128, 129]), 'big') True """ return sum(b*(2**i) for (i, b) in enumerate(self.bits))
[docs] def to_bytes(self: bitlist) -> bytes: """ Return a bytes-like object representation. Note that the number of bits will be padded (on the left) to a multiple of eight. >>> int.from_bytes(bitlist('10000000').to_bytes(), 'big') 128 >>> int.from_bytes(bitlist('1000000010000011').to_bytes(), 'big') 32899 >>> int.from_bytes(bitlist('110000000').to_bytes(), 'big') 384 >>> bitlist(129 + 128*256).to_bytes().hex() '8081' >>> bitlist('11').to_bytes().hex() '03' """ return bytes(reversed([ int(bitlist(list(reversed(bs)))) for bs in parts(self.bits, length=8) ]))
[docs] def bin(self: bitlist) -> str: """ Return a binary string representation. This matches the string emitted as part of the output of the default string conversion method. >>> bitlist('010011').bin() '010011' """ return ''.join(list(reversed([str(b) for b in self.bits])))
[docs] def hex(self: bitlist) -> str: """ Return a hexadecimal string representation. Note that the number of bits will be padded (on the left) to a multiple of eight. >>> bitlist(bytes([123])).hex() '7b' """ return self.to_bytes().hex()
[docs] def __len__(self: bitlist) -> int: """ Return length of bit vector (defined to be the number of bits it contains). >>> bitlist('11') + bitlist('10') bitlist('1110') """ return len(self.bits)
[docs] def __add__(self: bitlist, other: bitlist) -> bitlist: """ The addition operator can be used for concatenation, as with other objects that have sequence types. >>> bitlist('11') + bitlist('10') bitlist('1110') """ return bitlist(list(reversed(list(other.bits)+list(self.bits))))
[docs] def __mul__(self: bitlist, other: int) -> bitlist: """ The multiplication operator can be used for repetition, as with other objects that have sequence types. >>> bitlist(256)*2 bitlist('100000000100000000') >>> bitlist(256)*'a' Traceback (most recent call last): ... ValueError: repetition parameter must be an integer """ if isinstance(other, int): return bitlist(list(reversed(list(self.bits)))*other) raise ValueError('repetition parameter must be an integer')
[docs] def __truediv__( self: bitlist, other: Union[int, Set[int], Sequence[int]] ) -> Sequence[bitlist]: """ The division operator can be used to partition a bit vector into the specified number of parts, into parts of a specified length, or into a sequence of parts in which each part's length is specified in a sequence of integers (leveraging and mirroring the capabilities of the :obj:`~parts.parts.parts` function). >>> bitlist('11010001') / 2 [bitlist('1101'), bitlist('0001')] >>> bitlist('11010001') / [2, 6] [bitlist('11'), bitlist('010001')] >>> bitlist('11010001') / {4} [bitlist('1101'), bitlist('0001')] >>> bitlist('11010001') / 3 [bitlist('110'), bitlist('100'), bitlist('01')] """ if isinstance(other, set) and len(other) == 1 and isinstance(list(other)[0], int): ps = parts(self.bits, length=list(other)[0]) elif isinstance(other, list): ps = parts(self.bits, length=list(reversed(other))) else: ps = parts(self.bits, other) return list(reversed([bitlist(list(reversed(p))) for p in ps]))
[docs] def __getitem__(self: bitlist, key: Union[int, slice]) -> Union[int, bitlist]: """ Retrieve the bit at the specified index, or construct a slice of the bit vector. >>> bitlist('1111011')[2] 1 >>> bitlist('0111011')[0] 0 >>> bitlist('10101000')[0:5] bitlist('10101') >>> bitlist('10101000101010001010100010101000')[0:16] bitlist('1010100010101000') >>> bitlist('101')[4] Traceback (most recent call last): ... IndexError: bitlist index out of range >>> bitlist('101')['a'] Traceback (most recent call last): ... TypeError: bitlist indices must be integers or slices """ if isinstance(key, int): if key < 0: # Support big-endian interface using negative indices. return self.bits[abs(key) - 1] if abs(key) <= len(self.bits) else 0 if key < len(self.bits): return self.bits[len(self.bits) - 1 - key] raise IndexError('bitlist index out of range') if isinstance(key, slice): return bitlist(list(reversed(self.bits))[key]) raise TypeError('bitlist indices must be integers or slices')
[docs] def __setitem__(self: bitlist, i: int, b: int): """ Set the bit at the specified index to the supplied value. >>> x = bitlist('1111011') >>> x[2] = 0 >>> x bitlist('1101011') >>> x[7] = 0 Traceback (most recent call last): ... IndexError: bitlist index out of range """ if i < 0: # Support big-endian interface using negative indices. self.bits = \ bytearray([ (self[j] if j != i else b) for j in range(-1, min(-len(self.bits), -abs(i)) - 1, -1) ]) elif i < len(self.bits): i = len(self.bits) - 1 - i self.bits = \ bytearray([ (self.bits[j] if j != i else b) for j in range(0, len(self.bits)) ]) else: raise IndexError('bitlist index out of range')
[docs] def __lshift__(self: bitlist, n: Union[int, Set[int]]) -> bitlist: """ The left shift operator can be used for both performing a bit shift in the traditional manner (increasing the length of the bit vector) or for bit rotation (if the second parameter is a set). >>> bitlist('11') << 2 bitlist('1100') >>> bitlist('11011') << {0} bitlist('11011') >>> bitlist('11011') << {1} bitlist('10111') >>> bitlist('11011') << {2} bitlist('01111') >>> bitlist('11011') << {3} bitlist('11110') >>> bitlist('11011') << {13} bitlist('11110') >>> bitlist('1') << {13} bitlist('1') """ if isinstance(n, set) and len(n) == 1 and isinstance(list(n)[0], int): n = list(n)[0] % len(self) # Allow rotations to wrap around. return bitlist(list(self.bits[n:]) + list(self.bits[:n])) return bitlist(list(reversed(list([0] * n) + list(self.bits))))
[docs] def __rshift__(self: bitlist, n: Union[int, Set[int]]) -> bitlist: """ The right shift operator can be used for both performing a bit shift in the traditional manner (truncating bits on the right-hand side as necessary) or for bit rotation (if the second parameter is a set). >>> bitlist('1111') >> 2 bitlist('11') >>> bitlist('11011') >> {0} bitlist('11011') >>> bitlist('11011') >> {1} bitlist('11101') >>> bitlist('11011') >> {2} bitlist('11110') >>> bitlist('11011') >> {3} bitlist('01111') >>> bitlist('11011') >> {13} bitlist('01111') >>> bitlist('1') >> {13} bitlist('1') """ if isinstance(n, set) and len(n) == 1 and isinstance(list(n)[0], int): n = list(n)[0] % len(self) # Allow rotations to wrap around. return bitlist(list(self.bits[-n:]) + list(self.bits[:-n])) return bitlist(list(reversed(self.bits[n:])))
[docs] def __and__(self: bitlist, other: bitlist) -> bitlist: """ Logical operators are applied bitwise without changing the length. >>> bitlist('0100') & bitlist('1100') bitlist('0100') >>> bitlist('010') & bitlist('11') Traceback (most recent call last): ... ValueError: arguments to logical operations must have equal lengths """ if len(self) != len(other): raise ValueError( 'arguments to logical operations must have equal lengths' ) return bitlist(list(reversed( [a & b for (a, b) in zip(self.bits, other.bits)] )))
[docs] def __or__(self: bitlist, other: bitlist) -> bitlist: """ Logical operators are applied bitwise without changing the length. >>> bitlist('0100') | bitlist('1100') bitlist('1100') >>> bitlist('010') | bitlist('11') Traceback (most recent call last): ... ValueError: arguments to logical operations must have equal lengths """ if len(self) != len(other): raise ValueError( 'arguments to logical operations must have equal lengths' ) return bitlist(list(reversed( [a | b for (a, b) in zip(self.bits, other.bits)] )))
[docs] def __xor__(self: bitlist, other: bitlist) -> bitlist: """ Logical operators are applied bitwise without changing the length. >>> bitlist('0100') ^ bitlist('1101') bitlist('1001') >>> bitlist('010') ^ bitlist('11') Traceback (most recent call last): ... ValueError: arguments to logical operations must have equal lengths """ if len(self) != len(other): raise ValueError( 'arguments to logical operations must have equal lengths' ) return bitlist(list(reversed( [a ^ b for (a, b) in zip(self.bits, other.bits)] )))
[docs] def __invert__(self: bitlist) -> bitlist: """ Logical operators are applied bitwise without changing the length. Inversion flips all bits and corresponds to bitwise logical negation. >>> ~bitlist('0100') bitlist('1011') """ return bitlist(list(reversed([1-b for b in self.bits])))
[docs] def __bool__(self: bitlist) -> bool: """ Any non-zero instance is interpreted as ``True``. >>> bool(bitlist('0100')) True >>> bool(bitlist('0000')) False """ return 1 in self.bits
[docs] def __eq__(self: bitlist, other: bitlist) -> bool: """ Instances are interpreted as integers when relational operators are applied. >>> bitlist('111') == bitlist(7) True >>> bitlist(123) == bitlist(0) False >>> bitlist(123) == bitlist('0001111011') True >>> bitlist('001') == bitlist('1') True """ # Ignores leading zeros in representation. return int(self) == int(other)
[docs] def __ne__(self: bitlist, other: bitlist) -> bool: """ Instances are interpreted as integers when relational operators are applied. >>> bitlist('111') != bitlist(7) False >>> bitlist(123) != bitlist(0) True >>> bitlist('001') != bitlist('1') False """ # Ignores leading zeros in representation. return int(self) != int(other)
[docs] def __lt__(self: bitlist, other: bitlist) -> bool: """ Instances are interpreted as integers when relational operators are applied. >>> bitlist(123) < bitlist(0) False >>> bitlist(123) < bitlist(123) False >>> bitlist(12) < bitlist(23) True """ return int(self) < int(other)
[docs] def __le__(self: bitlist, other: bitlist) -> bool: """ Instances are interpreted as integers when relational operators are applied. >>> bitlist(123) <= bitlist(0) False >>> bitlist(123) <= bitlist(123) True >>> bitlist(12) <= bitlist(23) True """ return int(self) <= int(other)
[docs] def __gt__(self: bitlist, other: bitlist) -> bool: """ Instances are interpreted as integers when relational operators are applied. >>> bitlist(123) > bitlist(0) True >>> bitlist(123) > bitlist(123) False >>> bitlist(12) > bitlist(23) False """ return int(self) > int(other)
[docs] def __ge__(self: bitlist, other: bitlist) -> bool: """ Instances are interpreted as integers when relational operators are applied. >>> bitlist(123) >= bitlist(0) True >>> bitlist(123) >= bitlist(123) True >>> bitlist(12) >= bitlist(23) False """ return int(self) >= int(other)
if __name__ == '__main__': doctest.testmod() # pragma: no cover