1#!/usr/bin/env python 2r""" 3This package defines classes that simplify bit-wise creation, manipulation and 4interpretation of data. 5 6Classes: 7 8Bits -- An immutable container for binary data. 9BitArray -- A mutable container for binary data. 10ConstBitStream -- An immutable container with streaming methods. 11BitStream -- A mutable container with streaming methods. 12 13 Bits (base class) 14 / \ 15 + mutating methods / \ + streaming methods 16 / \ 17 BitArray ConstBitStream 18 \ / 19 \ / 20 \ / 21 BitStream 22 23Functions: 24 25pack -- Create a BitStream from a format string. 26 27Exceptions: 28 29Error -- Module exception base class. 30CreationError -- Error during creation. 31InterpretError -- Inappropriate interpretation of binary data. 32ByteAlignError -- Whole byte position or length needed. 33ReadError -- Reading or peeking past the end of a bitstring. 34 35https://github.com/scott-griffiths/bitstring 36""" 37 38__licence__ = """ 39The MIT License 40 41Copyright (c) 2006 Scott Griffiths (dr.scottgriffiths@gmail.com) 42 43Permission is hereby granted, free of charge, to any person obtaining a copy 44of this software and associated documentation files (the "Software"), to deal 45in the Software without restriction, including without limitation the rights 46to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 47copies of the Software, and to permit persons to whom the Software is 48furnished to do so, subject to the following conditions: 49 50The above copyright notice and this permission notice shall be included in 51all copies or substantial portions of the Software. 52 53THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 54IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 55FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 56AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 57LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 58OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 59THE SOFTWARE. 60""" 61 62__version__ = "3.1.9" 63 64__author__ = "Scott Griffiths" 65 66import numbers 67import copy 68import sys 69import re 70import binascii 71import mmap 72import os 73import struct 74import operator 75import array 76import io 77import collections 78 79try: 80 collectionsAbc = collections.abc 81except AttributeError: # Python 2.7 82 collectionsAbc = collections 83 84byteorder = sys.byteorder 85 86bytealigned = False 87"""Determines whether a number of methods default to working only on byte boundaries.""" 88 89 90# Maximum number of digits to use in __str__ and __repr__. 91MAX_CHARS = 250 92 93# Maximum size of caches used for speed optimisations. 94CACHE_SIZE = 1000 95 96# Set this to True for extra assertions for debugging. 97_debug = False 98 99 100class Error(Exception): 101 """Base class for errors in the bitstring module.""" 102 103 def __init__(self, *params): 104 self.msg = params[0] if params else '' 105 self.params = params[1:] 106 107 def __str__(self): 108 if self.params: 109 return self.msg.format(*self.params) 110 return self.msg 111 112 113class ReadError(Error, IndexError): 114 """Reading or peeking past the end of a bitstring.""" 115 116 def __init__(self, *params): 117 Error.__init__(self, *params) 118 119 120class InterpretError(Error, ValueError): 121 """Inappropriate interpretation of binary data.""" 122 123 def __init__(self, *params): 124 Error.__init__(self, *params) 125 126 127class ByteAlignError(Error): 128 """Whole-byte position or length needed.""" 129 130 def __init__(self, *params): 131 Error.__init__(self, *params) 132 133 134class CreationError(Error, ValueError): 135 """Inappropriate argument during bitstring creation.""" 136 137 def __init__(self, *params): 138 Error.__init__(self, *params) 139 140 141class ConstByteStore(object): 142 """Stores raw bytes together with a bit offset and length. 143 144 Used internally - not part of public interface. 145 """ 146 147 __slots__ = ('offset', '_rawarray', 'bitlength') 148 149 def __init__(self, data, bitlength=None, offset=None): 150 """data is either a bytearray or a MmapByteArray""" 151 self._rawarray = data 152 if offset is None: 153 offset = 0 154 if bitlength is None: 155 bitlength = 8 * len(data) - offset 156 self.offset = offset 157 self.bitlength = bitlength 158 159 def __iter__(self): 160 start_byte, start_bit = divmod(self.offset, 8) 161 end_byte, end_bit = divmod(self.offset + self.bitlength, 8) 162 163 for byte_index in xrange(start_byte, end_byte): 164 byte = self._rawarray[byte_index] 165 for bit in range(start_bit, 8): 166 yield bool(byte & (128 >> bit)) 167 start_bit = 0 168 169 if end_bit: 170 byte = self._rawarray[end_byte] 171 for bit in range(start_bit, end_bit): 172 yield bool(byte & (128 >> bit)) 173 174 def _getbit_lsb0(self, pos): 175 assert 0 <= pos < self.bitlength 176 pos = self.bitlength - pos - 1 177 byte, bit = divmod(self.offset + pos, 8) 178 return bool(self._rawarray[byte] & (128 >> bit)) 179 180 def _getbit_msb0(self, pos): 181 assert 0 <= pos < self.bitlength 182 byte, bit = divmod(self.offset + pos, 8) 183 return bool(self._rawarray[byte] & (128 >> bit)) 184 185 def getbyte(self, pos): 186 """Direct access to byte data.""" 187 return self._rawarray[pos] 188 189 def getbyteslice(self, start, end): 190 """Direct access to byte data.""" 191 c = self._rawarray[start:end] 192 return c 193 194 @property 195 def bytelength(self): 196 if not self.bitlength: 197 return 0 198 sb = self.offset // 8 199 eb = (self.offset + self.bitlength - 1) // 8 200 return eb - sb + 1 201 202 def __copy__(self): 203 return ByteStore(self._rawarray[:], self.bitlength, self.offset) 204 205 def _appendstore(self, store): 206 """Join another store on to the end of this one.""" 207 if not store.bitlength: 208 return 209 # Set new array offset to the number of bits in the final byte of current array. 210 store = offsetcopy(store, (self.offset + self.bitlength) % 8) 211 if store.offset: 212 # first do the byte with the join. 213 joinval = (self._rawarray.pop() & (255 ^ (255 >> store.offset)) | 214 (store.getbyte(0) & (255 >> store.offset))) 215 self._rawarray.append(joinval) 216 self._rawarray.extend(store._rawarray[1:]) 217 else: 218 self._rawarray.extend(store._rawarray) 219 self.bitlength += store.bitlength 220 221 def _prependstore(self, store): 222 """Join another store on to the start of this one.""" 223 if not store.bitlength: 224 return 225 # Set the offset of copy of store so that it's final byte 226 # ends in a position that matches the offset of self, 227 # then join self on to the end of it. 228 store = offsetcopy(store, (self.offset - store.bitlength) % 8) 229 assert (store.offset + store.bitlength) % 8 == self.offset % 8 230 bit_offset = self.offset % 8 231 if bit_offset: 232 # first do the byte with the join. 233 joinval = (store.getbyte(-1) & (255 ^ (255 >> bit_offset)) | 234 (self._rawarray[self.byteoffset] & (255 >> bit_offset))) 235 store._rawarray[-1] = joinval 236 store._rawarray.extend(self._rawarray[self.byteoffset + 1: self.byteoffset + self.bytelength]) 237 else: 238 store._rawarray.extend(self._rawarray[self.byteoffset: self.byteoffset + self.bytelength]) 239 self._rawarray = store._rawarray 240 self.offset = store.offset 241 self.bitlength += store.bitlength 242 243 @property 244 def byteoffset(self): 245 return self.offset // 8 246 247 @property 248 def rawbytes(self): 249 return self._rawarray 250 251 252class ByteStore(ConstByteStore): 253 """Adding mutating methods to ConstByteStore 254 255 Used internally - not part of public interface. 256 """ 257 __slots__ = () 258 259 def _setbit_lsb0(self, pos): 260 assert 0 <= pos < self.bitlength 261 pos = self.bitlength - pos - 1 262 byte, bit = divmod(self.offset + pos, 8) 263 self._rawarray[byte] |= (128 >> bit) 264 265 def _setbit_msb0(self, pos): 266 assert 0 <= pos < self.bitlength 267 byte, bit = divmod(self.offset + pos, 8) 268 self._rawarray[byte] |= (128 >> bit) 269 270 def _unsetbit_lsb0(self, pos): 271 assert 0 <= pos < self.bitlength 272 pos = self.bitlength - pos - 1 273 byte, bit = divmod(self.offset + pos, 8) 274 self._rawarray[byte] &= ~(128 >> bit) 275 276 def _unsetbit_msb0(self, pos): 277 assert 0 <= pos < self.bitlength 278 byte, bit = divmod(self.offset + pos, 8) 279 self._rawarray[byte] &= ~(128 >> bit) 280 281 def _invertbit_lsb0(self, pos): 282 assert 0 <= pos < self.bitlength 283 pos = self.bitlength - pos - 1 284 byte, bit = divmod(self.offset + pos, 8) 285 self._rawarray[byte] ^= (128 >> bit) 286 287 def _invertbit_msb0(self, pos): 288 assert 0 <= pos < self.bitlength 289 byte, bit = divmod(self.offset + pos, 8) 290 self._rawarray[byte] ^= (128 >> bit) 291 292 def setbyte(self, pos, value): 293 self._rawarray[pos] = value 294 295 def setbyteslice(self, start, end, value): 296 self._rawarray[start:end] = value 297 298 299def offsetcopy(s, newoffset): 300 """Return a copy of a ByteStore with the newoffset. 301 302 Not part of public interface. 303 """ 304 assert 0 <= newoffset < 8 305 if not s.bitlength: 306 return copy.copy(s) 307 else: 308 if newoffset == s.offset % 8: 309 return type(s)(s.getbyteslice(s.byteoffset, s.byteoffset + s.bytelength), s.bitlength, newoffset) 310 newdata = [] 311 d = s._rawarray 312 assert newoffset != s.offset % 8 313 if newoffset < s.offset % 8: 314 # We need to shift everything left 315 shiftleft = s.offset % 8 - newoffset 316 # First deal with everything except for the final byte 317 for x in range(s.byteoffset, s.byteoffset + s.bytelength - 1): 318 newdata.append(((d[x] << shiftleft) & 0xff) + (d[x + 1] >> (8 - shiftleft))) 319 bits_in_last_byte = (s.offset + s.bitlength) % 8 320 if not bits_in_last_byte: 321 bits_in_last_byte = 8 322 if bits_in_last_byte > shiftleft: 323 newdata.append((d[s.byteoffset + s.bytelength - 1] << shiftleft) & 0xff) 324 else: # newoffset > s._offset % 8 325 shiftright = newoffset - s.offset % 8 326 newdata.append(s.getbyte(0) >> shiftright) 327 for x in range(s.byteoffset + 1, s.byteoffset + s.bytelength): 328 newdata.append(((d[x - 1] << (8 - shiftright)) & 0xff) + (d[x] >> shiftright)) 329 bits_in_last_byte = (s.offset + s.bitlength) % 8 330 if not bits_in_last_byte: 331 bits_in_last_byte = 8 332 if bits_in_last_byte + shiftright > 8: 333 newdata.append((d[s.byteoffset + s.bytelength - 1] << (8 - shiftright)) & 0xff) 334 new_s = type(s)(bytearray(newdata), s.bitlength, newoffset) 335 assert new_s.offset == newoffset 336 return new_s 337 338 339def equal(a, b): 340 """Return True if ByteStores a == b. 341 342 Not part of public interface. 343 """ 344 # We want to return False for inequality as soon as possible, which 345 # means we get lots of special cases. 346 # First the easy one - compare lengths: 347 a_bitlength = a.bitlength 348 b_bitlength = b.bitlength 349 if a_bitlength != b_bitlength: 350 return False 351 if not a_bitlength: 352 assert b_bitlength == 0 353 return True 354 # Make 'a' the one with the smaller offset 355 if (a.offset % 8) > (b.offset % 8): 356 a, b = b, a 357 # and create some aliases 358 a_bitoff = a.offset % 8 359 b_bitoff = b.offset % 8 360 a_byteoffset = a.byteoffset 361 b_byteoffset = b.byteoffset 362 a_bytelength = a.bytelength 363 b_bytelength = b.bytelength 364 da = a._rawarray 365 db = b._rawarray 366 367 # If they are pointing to the same data, they must be equal 368 if da is db and a.offset == b.offset: 369 return True 370 371 if a_bitoff == b_bitoff: 372 bits_spare_in_last_byte = 8 - (a_bitoff + a_bitlength) % 8 373 if bits_spare_in_last_byte == 8: 374 bits_spare_in_last_byte = 0 375 # Special case for a, b contained in a single byte 376 if a_bytelength == 1: 377 a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength) 378 b_val = ((db[b_byteoffset] << b_bitoff) & 0xff) >> (8 - b_bitlength) 379 return a_val == b_val 380 # Otherwise check first byte 381 if da[a_byteoffset] & (0xff >> a_bitoff) != db[b_byteoffset] & (0xff >> b_bitoff): 382 return False 383 # then everything up to the last 384 b_a_offset = b_byteoffset - a_byteoffset 385 for x in range(1 + a_byteoffset, a_byteoffset + a_bytelength - 1): 386 if da[x] != db[b_a_offset + x]: 387 return False 388 # and finally the last byte 389 return (da[a_byteoffset + a_bytelength - 1] >> bits_spare_in_last_byte == 390 db[b_byteoffset + b_bytelength - 1] >> bits_spare_in_last_byte) 391 392 assert a_bitoff != b_bitoff 393 # This is how much we need to shift a to the right to compare with b: 394 shift = b_bitoff - a_bitoff 395 # Special case for b only one byte long 396 if b_bytelength == 1: 397 assert a_bytelength == 1 398 a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength) 399 b_val = ((db[b_byteoffset] << b_bitoff) & 0xff) >> (8 - b_bitlength) 400 return a_val == b_val 401 # Special case for a only one byte long 402 if a_bytelength == 1: 403 assert b_bytelength == 2 404 a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength) 405 b_val = ((db[b_byteoffset] << 8) + db[b_byteoffset + 1]) << b_bitoff 406 b_val &= 0xffff 407 b_val >>= 16 - b_bitlength 408 return a_val == b_val 409 410 # Compare first byte of b with bits from first byte of a 411 if (da[a_byteoffset] & (0xff >> a_bitoff)) >> shift != db[b_byteoffset] & (0xff >> b_bitoff): 412 return False 413 # Now compare every full byte of b with bits from 2 bytes of a 414 for x in range(1, b_bytelength - 1): 415 # Construct byte from 2 bytes in a to compare to byte in b 416 b_val = db[b_byteoffset + x] 417 a_val = ((da[a_byteoffset + x - 1] << 8) + da[a_byteoffset + x]) >> shift 418 a_val &= 0xff 419 if a_val != b_val: 420 return False 421 422 # Now check bits in final byte of b 423 final_b_bits = (b.offset + b_bitlength) % 8 424 if not final_b_bits: 425 final_b_bits = 8 426 b_val = db[b_byteoffset + b_bytelength - 1] >> (8 - final_b_bits) 427 final_a_bits = (a.offset + a_bitlength) % 8 428 if not final_a_bits: 429 final_a_bits = 8 430 if b.bytelength > a_bytelength: 431 assert b_bytelength == a_bytelength + 1 432 a_val = da[a_byteoffset + a_bytelength - 1] >> (8 - final_a_bits) 433 a_val &= 0xff >> (8 - final_b_bits) 434 return a_val == b_val 435 assert a_bytelength == b_bytelength 436 a_val = da[a_byteoffset + a_bytelength - 2] << 8 437 a_val += da[a_byteoffset + a_bytelength - 1] 438 a_val >>= (8 - final_a_bits) 439 a_val &= 0xff >> (8 - final_b_bits) 440 return a_val == b_val 441 442 443class MmapByteArray(object): 444 """Looks like a bytearray, but from an mmap. 445 446 Not part of public interface. 447 """ 448 449 __slots__ = ('filemap', 'filelength', 'source', 'byteoffset', 'bytelength') 450 451 def __init__(self, source, bytelength=None, byteoffset=None): 452 self.source = source 453 source.seek(0, os.SEEK_END) 454 self.filelength = source.tell() 455 if byteoffset is None: 456 byteoffset = 0 457 if bytelength is None: 458 bytelength = self.filelength - byteoffset 459 self.byteoffset = byteoffset 460 self.bytelength = bytelength 461 self.filemap = mmap.mmap(source.fileno(), 0, access=mmap.ACCESS_READ) 462 463 def __getitem__(self, key): 464 try: 465 start = key.start 466 stop = key.stop 467 except AttributeError: 468 try: 469 assert 0 <= key < self.bytelength 470 return ord(self.filemap[key + self.byteoffset]) 471 except TypeError: 472 # for Python 3 473 return self.filemap[key + self.byteoffset] 474 else: 475 if start is None: 476 start = 0 477 if stop is None: 478 stop = self.bytelength 479 assert key.step is None 480 assert 0 <= start < self.bytelength 481 assert 0 <= stop <= self.bytelength 482 s = slice(start + self.byteoffset, stop + self.byteoffset) 483 return bytearray(self.filemap.__getitem__(s)) 484 485 def __len__(self): 486 return self.bytelength 487 488 489# This creates a dictionary for every possible byte with the value being 490# the key with its bits reversed. 491BYTE_REVERSAL_DICT = dict() 492 493# For Python 2.7/ 3.x coexistence 494# Yes this is very very hacky. 495if sys.version_info[0] == 2: 496 for i in range(256): 497 BYTE_REVERSAL_DICT[i] = chr(int("{0:08b}".format(i)[::-1], 2)) 498else: 499 for i in range(256): 500 BYTE_REVERSAL_DICT[i] = bytes([int("{0:08b}".format(i)[::-1], 2)]) 501 from io import IOBase as file 502 xrange = range 503 basestring = str 504 505# Python 2.x octals start with '0', in Python 3 it's '0o' 506LEADING_OCT_CHARS = len(oct(1)) - 1 507 508 509def tidy_input_string(s): 510 """Return string made lowercase and with all whitespace removed.""" 511 s = ''.join(s.split()).lower() 512 return s 513 514 515INIT_NAMES = ('uint', 'int', 'ue', 'se', 'sie', 'uie', 'hex', 'oct', 'bin', 'bits', 516 'uintbe', 'intbe', 'uintle', 'intle', 'uintne', 'intne', 517 'float', 'floatbe', 'floatle', 'floatne', 'bytes', 'bool', 'pad') 518 519TOKEN_RE = re.compile(r'(?P<name>' + '|'.join(INIT_NAMES) + 520 r')(:(?P<len>[^=]+))?(=(?P<value>.*))?$', re.IGNORECASE) 521DEFAULT_UINT = re.compile(r'(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE) 522 523MULTIPLICATIVE_RE = re.compile(r'(?P<factor>.*)\*(?P<token>.+)') 524 525# Hex, oct or binary literals 526LITERAL_RE = re.compile(r'(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE) 527 528# An endianness indicator followed by one or more struct.pack codes 529STRUCT_PACK_RE = re.compile(r'(?P<endian>[<>@])?(?P<fmt>(?:\d*[bBhHlLqQfd])+)$') 530 531# A number followed by a single character struct.pack code 532STRUCT_SPLIT_RE = re.compile(r'\d*[bBhHlLqQfd]') 533 534# These replicate the struct.pack codes 535# Big-endian 536REPLACEMENTS_BE = {'b': 'intbe:8', 'B': 'uintbe:8', 537 'h': 'intbe:16', 'H': 'uintbe:16', 538 'l': 'intbe:32', 'L': 'uintbe:32', 539 'q': 'intbe:64', 'Q': 'uintbe:64', 540 'f': 'floatbe:32', 'd': 'floatbe:64'} 541# Little-endian 542REPLACEMENTS_LE = {'b': 'intle:8', 'B': 'uintle:8', 543 'h': 'intle:16', 'H': 'uintle:16', 544 'l': 'intle:32', 'L': 'uintle:32', 545 'q': 'intle:64', 'Q': 'uintle:64', 546 'f': 'floatle:32', 'd': 'floatle:64'} 547 548# Size in bytes of all the pack codes. 549PACK_CODE_SIZE = {'b': 1, 'B': 1, 'h': 2, 'H': 2, 'l': 4, 'L': 4, 550 'q': 8, 'Q': 8, 'f': 4, 'd': 8} 551 552_tokenname_to_initialiser = {'hex': 'hex', '0x': 'hex', '0X': 'hex', 'oct': 'oct', 553 '0o': 'oct', '0O': 'oct', 'bin': 'bin', '0b': 'bin', 554 '0B': 'bin', 'bits': 'auto', 'bytes': 'bytes', 'pad': 'pad'} 555 556 557def structparser(token): 558 """Parse struct-like format string token into sub-token list.""" 559 m = STRUCT_PACK_RE.match(token) 560 if not m: 561 return [token] 562 else: 563 endian = m.group('endian') 564 if endian is None: 565 return [token] 566 # Split the format string into a list of 'q', '4h' etc. 567 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt')) 568 # Now deal with multiplicative factors, 4h -> hhhh etc. 569 fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else 570 f for f in formatlist]) 571 if endian == '@': 572 # Native endianness 573 if byteorder == 'little': 574 endian = '<' 575 else: 576 assert byteorder == 'big' 577 endian = '>' 578 if endian == '<': 579 tokens = [REPLACEMENTS_LE[c] for c in fmt] 580 else: 581 assert endian == '>' 582 tokens = [REPLACEMENTS_BE[c] for c in fmt] 583 return tokens 584 585 586def tokenparser(fmt, keys=None, token_cache={}): 587 """Divide the format string into tokens and parse them. 588 589 Return stretchy token and list of [initialiser, length, value] 590 initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc. 591 length is None if not known, as is value. 592 593 If the token is in the keyword dictionary (keys) then it counts as a 594 special case and isn't messed with. 595 596 tokens must be of the form: [factor*][initialiser][:][length][=value] 597 598 """ 599 try: 600 return token_cache[(fmt, keys)] 601 except KeyError: 602 token_key = (fmt, keys) 603 # Very inefficient expanding of brackets. 604 fmt = expand_brackets(fmt) 605 # Split tokens by ',' and remove whitespace 606 # The meta_tokens can either be ordinary single tokens or multiple 607 # struct-format token strings. 608 meta_tokens = (''.join(f.split()) for f in fmt.split(',')) 609 return_values = [] 610 stretchy_token = False 611 for meta_token in meta_tokens: 612 # See if it has a multiplicative factor 613 m = MULTIPLICATIVE_RE.match(meta_token) 614 if not m: 615 factor = 1 616 else: 617 factor = int(m.group('factor')) 618 meta_token = m.group('token') 619 # See if it's a struct-like format 620 tokens = structparser(meta_token) 621 ret_vals = [] 622 for token in tokens: 623 if keys and token in keys: 624 # Don't bother parsing it, it's a keyword argument 625 ret_vals.append([token, None, None]) 626 continue 627 value = length = None 628 if token == '': 629 continue 630 # Match literal tokens of the form 0x... 0o... and 0b... 631 m = LITERAL_RE.match(token) 632 if m: 633 name = m.group('name') 634 value = m.group('value') 635 ret_vals.append([name, length, value]) 636 continue 637 # Match everything else: 638 m1 = TOKEN_RE.match(token) 639 if not m1: 640 # and if you don't specify a 'name' then the default is 'uint': 641 m2 = DEFAULT_UINT.match(token) 642 if not m2: 643 raise ValueError("Don't understand token '{0}'.".format(token)) 644 if m1: 645 name = m1.group('name') 646 length = m1.group('len') 647 if m1.group('value'): 648 value = m1.group('value') 649 else: 650 assert m2 651 name = 'uint' 652 length = m2.group('len') 653 if m2.group('value'): 654 value = m2.group('value') 655 if name == 'bool': 656 if length is not None and length != '1': 657 raise ValueError("You can only specify one bit sized bool tokens or leave unspecified.") 658 length = 1 659 if length is None and name not in ('se', 'ue', 'sie', 'uie'): 660 stretchy_token = True 661 if length is not None: 662 # Try converting length to int, otherwise check it's a key. 663 try: 664 length = int(length) 665 if length < 0: 666 raise Error 667 # For the 'bytes' token convert length to bits. 668 if name == 'bytes': 669 length *= 8 670 except Error: 671 raise ValueError("Can't read a token with a negative length.") 672 except ValueError: 673 if not keys or length not in keys: 674 raise ValueError("Don't understand length '{0}' of token.".format(length)) 675 ret_vals.append([name, length, value]) 676 # This multiplies by the multiplicative factor, but this means that 677 # we can't allow keyword values as multipliers (e.g. n*uint:8). 678 # The only way to do this would be to return the factor in some fashion 679 # (we can't use the key's value here as it would mean that we couldn't 680 # sensibly continue to cache the function's results. (TODO). 681 return_values.extend(ret_vals * factor) 682 return_values = [tuple(x) for x in return_values] 683 if len(token_cache) < CACHE_SIZE: 684 token_cache[token_key] = stretchy_token, return_values 685 return stretchy_token, return_values 686 687 688# Looks for first number*( 689BRACKET_RE = re.compile(r'(?P<factor>\d+)\*\(') 690 691 692def expand_brackets(s): 693 """Remove whitespace and expand all brackets.""" 694 s = ''.join(s.split()) 695 while True: 696 start = s.find('(') 697 if start == -1: 698 break 699 count = 1 # Number of hanging open brackets 700 p = start + 1 701 while p < len(s): 702 if s[p] == '(': 703 count += 1 704 if s[p] == ')': 705 count -= 1 706 if not count: 707 break 708 p += 1 709 if count: 710 raise ValueError("Unbalanced parenthesis in '{0}'.".format(s)) 711 if start == 0 or s[start - 1] != '*': 712 s = s[0:start] + s[start + 1:p] + s[p + 1:] 713 else: 714 m = BRACKET_RE.search(s) 715 if m: 716 factor = int(m.group('factor')) 717 matchstart = m.start('factor') 718 s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:] 719 else: 720 raise ValueError("Failed to parse '{0}'.".format(s)) 721 return s 722 723 724# This converts a single octal digit to 3 bits. 725OCT_TO_BITS = ['{0:03b}'.format(i) for i in xrange(8)] 726 727# A dictionary of number of 1 bits contained in binary representation of any byte 728BIT_COUNT = dict(zip(xrange(256), [bin(i).count('1') for i in xrange(256)])) 729 730 731class Bits(object): 732 """A container holding an immutable sequence of bits. 733 734 For a mutable container use the BitArray class instead. 735 736 Methods: 737 738 all() -- Check if all specified bits are set to 1 or 0. 739 any() -- Check if any of specified bits are set to 1 or 0. 740 count() -- Count the number of bits set to 1 or 0. 741 cut() -- Create generator of constant sized chunks. 742 endswith() -- Return whether the bitstring ends with a sub-string. 743 find() -- Find a sub-bitstring in the current bitstring. 744 findall() -- Find all occurrences of a sub-bitstring in the current bitstring. 745 join() -- Join bitstrings together using current bitstring. 746 rfind() -- Seek backwards to find a sub-bitstring. 747 split() -- Create generator of chunks split by a delimiter. 748 startswith() -- Return whether the bitstring starts with a sub-bitstring. 749 tobytes() -- Return bitstring as bytes, padding if needed. 750 tofile() -- Write bitstring to file, padding if needed. 751 unpack() -- Interpret bits using format string. 752 753 Special methods: 754 755 Also available are the operators [], ==, !=, +, *, ~, <<, >>, &, |, ^. 756 757 Properties: 758 759 bin -- The bitstring as a binary string. 760 bool -- For single bit bitstrings, interpret as True or False. 761 bytes -- The bitstring as a bytes object. 762 float -- Interpret as a floating point number. 763 floatbe -- Interpret as a big-endian floating point number. 764 floatle -- Interpret as a little-endian floating point number. 765 floatne -- Interpret as a native-endian floating point number. 766 hex -- The bitstring as a hexadecimal string. 767 int -- Interpret as a two's complement signed integer. 768 intbe -- Interpret as a big-endian signed integer. 769 intle -- Interpret as a little-endian signed integer. 770 intne -- Interpret as a native-endian signed integer. 771 len -- Length of the bitstring in bits. 772 oct -- The bitstring as an octal string. 773 se -- Interpret as a signed exponential-Golomb code. 774 ue -- Interpret as an unsigned exponential-Golomb code. 775 sie -- Interpret as a signed interleaved exponential-Golomb code. 776 uie -- Interpret as an unsigned interleaved exponential-Golomb code. 777 uint -- Interpret as a two's complement unsigned integer. 778 uintbe -- Interpret as a big-endian unsigned integer. 779 uintle -- Interpret as a little-endian unsigned integer. 780 uintne -- Interpret as a native-endian unsigned integer. 781 782 """ 783 784 __slots__ = ('_datastore') 785 786 def __init__(self, auto=None, length=None, offset=None, **kwargs): 787 """Either specify an 'auto' initialiser: 788 auto -- a string of comma separated tokens, an integer, a file object, 789 a bytearray, a boolean iterable, an array or another bitstring. 790 791 Or initialise via **kwargs with one (and only one) of: 792 bytes -- raw data as a string, for example read from a binary file. 793 bin -- binary string representation, e.g. '0b001010'. 794 hex -- hexadecimal string representation, e.g. '0x2ef' 795 oct -- octal string representation, e.g. '0o777'. 796 uint -- an unsigned integer. 797 int -- a signed integer. 798 float -- a floating point number. 799 uintbe -- an unsigned big-endian whole byte integer. 800 intbe -- a signed big-endian whole byte integer. 801 floatbe - a big-endian floating point number. 802 uintle -- an unsigned little-endian whole byte integer. 803 intle -- a signed little-endian whole byte integer. 804 floatle -- a little-endian floating point number. 805 uintne -- an unsigned native-endian whole byte integer. 806 intne -- a signed native-endian whole byte integer. 807 floatne -- a native-endian floating point number. 808 se -- a signed exponential-Golomb code. 809 ue -- an unsigned exponential-Golomb code. 810 sie -- a signed interleaved exponential-Golomb code. 811 uie -- an unsigned interleaved exponential-Golomb code. 812 bool -- a boolean (True or False). 813 filename -- a file which will be opened in binary read-only mode. 814 815 Other keyword arguments: 816 length -- length of the bitstring in bits, if needed and appropriate. 817 It must be supplied for all integer and float initialisers. 818 offset -- bit offset to the data. These offset bits are 819 ignored and this is mainly intended for use when 820 initialising using 'bytes' or 'filename'. 821 822 """ 823 pass 824 825 def __new__(cls, auto=None, length=None, offset=None, _cache={}, **kwargs): 826 # For instances auto-initialised with a string we intern the 827 # instance for re-use. 828 try: 829 if isinstance(auto, basestring): 830 try: 831 return _cache[auto] 832 except KeyError: 833 x = object.__new__(Bits) 834 try: 835 _, tokens = tokenparser(auto) 836 except ValueError as e: 837 raise CreationError(*e.args) 838 if offset is not None: 839 raise CreationError("offset should not be specified when using string initialisation.") 840 if length is not None: 841 raise CreationError("length should not be specified when using string initialisation.") 842 x._datastore = ConstByteStore(bytearray(0), 0, 0) 843 for token in tokens: 844 x._datastore._appendstore(Bits._init_with_token(*token)._datastore) 845 assert x._assertsanity() 846 if len(_cache) < CACHE_SIZE: 847 _cache[auto] = x 848 return x 849 if type(auto) == Bits: 850 return auto 851 except TypeError: 852 pass 853 x = super(Bits, cls).__new__(cls) 854 x._datastore = ConstByteStore(b'') 855 x._initialise(auto, length, offset, **kwargs) 856 return x 857 858 def _initialise(self, auto, length, offset, **kwargs): 859 if length is not None and length < 0: 860 raise CreationError("bitstring length cannot be negative.") 861 if offset is not None and offset < 0: 862 raise CreationError("offset must be >= 0.") 863 if auto is not None: 864 self._initialise_from_auto(auto, length, offset) 865 return 866 if not kwargs: 867 # No initialisers, so initialise with nothing or zero bits 868 if length is not None and length != 0: 869 data = bytearray((length + 7) // 8) 870 self._setbytes_unsafe(data, length, 0) 871 return 872 self._setbytes_unsafe(bytearray(0), 0, 0) 873 return 874 k, v = kwargs.popitem() 875 try: 876 init_without_length_or_offset[k](self, v) 877 if length is not None or offset is not None: 878 raise CreationError("Cannot use length or offset with this initialiser.") 879 except KeyError: 880 try: 881 init_with_length_only[k](self, v, length) 882 if offset is not None: 883 raise CreationError("Cannot use offset with this initialiser.") 884 except KeyError: 885 if offset is None: 886 offset = 0 887 try: 888 init_with_length_and_offset[k](self, v, length, offset) 889 except KeyError: 890 raise CreationError("Unrecognised keyword '{0}' used to initialise.", k) 891 892 def _initialise_from_auto(self, auto, length, offset): 893 if offset is None: 894 offset = 0 895 self._setauto(auto, length, offset) 896 return 897 898 def __iter__(self): 899 return iter(self._datastore) 900 901 def __copy__(self): 902 """Return a new copy of the Bits for the copy module.""" 903 # Note that if you want a new copy (different ID), use _copy instead. 904 # The copy can return self as it's immutable. 905 return self 906 907 def __lt__(self, other): 908 raise TypeError("unorderable type: {0}".format(type(self).__name__)) 909 910 def __gt__(self, other): 911 raise TypeError("unorderable type: {0}".format(type(self).__name__)) 912 913 def __le__(self, other): 914 raise TypeError("unorderable type: {0}".format(type(self).__name__)) 915 916 def __ge__(self, other): 917 raise TypeError("unorderable type: {0}".format(type(self).__name__)) 918 919 def __add__(self, bs): 920 """Concatenate bitstrings and return new bitstring. 921 922 bs -- the bitstring to append. 923 924 """ 925 bs = Bits(bs) 926 if bs.len <= self.len: 927 s = self._copy() 928 s._addright(bs) 929 else: 930 s = bs._copy() 931 s = self.__class__(s) 932 s._addleft(self) 933 return s 934 935 def __radd__(self, bs): 936 """Append current bitstring to bs and return new bitstring. 937 938 bs -- the string for the 'auto' initialiser that will be appended to. 939 940 """ 941 bs = self._converttobitstring(bs) 942 return bs.__add__(self) 943 944 def __getitem__(self, key): 945 """Return a new bitstring representing a slice of the current bitstring. 946 947 Indices are in units of the step parameter (default 1 bit). 948 Stepping is used to specify the number of bits in each item. 949 950 >>> print BitArray('0b00110')[1:4] 951 '0b011' 952 >>> print BitArray('0x00112233')[1:3:8] 953 '0x1122' 954 955 """ 956 length = self.len 957 if isinstance(key, slice): 958 step = key.step if key.step is not None else 1 959 if step != 1: 960 # convert to binary string and use string slicing 961 bs = self.__class__() 962 if _lsb0: 963 start = length - key.start - 1 if key.start is not None else None 964 stop = length - key.stop - 1 if key.stop is not None else None 965 bs._setbin_unsafe(self._getbin().__getitem__(slice(start, stop, -step))[::-1]) 966 else: 967 bs._setbin_unsafe(self._getbin().__getitem__(key)) 968 return bs 969 start, stop = 0, length 970 if key.start is not None: 971 start = key.start 972 if key.start < 0: 973 start += stop 974 if key.stop is not None: 975 stop = key.stop 976 if key.stop < 0: 977 stop += length 978 start = max(start, 0) 979 stop = min(stop, length) 980 if start < stop: 981 return self._slice(start, stop) 982 else: 983 return self.__class__() 984 else: 985 # single element 986 if key < 0: 987 key += length 988 if not 0 <= key < length: 989 raise IndexError("Slice index out of range.") 990 # Single bit, return True or False 991 return self._datastore.getbit(key) 992 993 def __len__(self): 994 """Return the length of the bitstring in bits.""" 995 return self._getlength() 996 997 def __str__(self): 998 """Return approximate string representation of bitstring for printing. 999 1000 Short strings will be given wholly in hexadecimal or binary. Longer 1001 strings may be part hexadecimal and part binary. Very long strings will 1002 be truncated with '...'. 1003 1004 """ 1005 length = self.len 1006 if not length: 1007 return '' 1008 if length > MAX_CHARS * 4: 1009 # Too long for hex. Truncate... 1010 return ''.join(('0x', self._readhex(MAX_CHARS * 4, 0), '...')) 1011 # If it's quite short and we can't do hex then use bin 1012 if length < 32 and length % 4 != 0: 1013 return '0b' + self.bin 1014 # If we can use hex then do so 1015 if not length % 4: 1016 return '0x' + self.hex 1017 # Otherwise first we do as much as we can in hex 1018 # then add on 1, 2 or 3 bits on at the end 1019 bits_at_end = length % 4 1020 return ''.join(('0x', self._readhex(length - bits_at_end, 0), 1021 ', ', '0b', 1022 self._readbin(bits_at_end, length - bits_at_end))) 1023 1024 def __repr__(self): 1025 """Return representation that could be used to recreate the bitstring. 1026 1027 If the returned string is too long it will be truncated. See __str__(). 1028 1029 """ 1030 length = self.len 1031 try: 1032 pos = self._pos 1033 pos_string = "" if pos == 0 else ", pos={0}".format(pos) 1034 except AttributeError: 1035 pos_string = "" 1036 if isinstance(self._datastore._rawarray, MmapByteArray): 1037 offsetstring = '' 1038 if self._datastore.byteoffset or self._offset: 1039 offsetstring = ", offset=%d" % (self._datastore._rawarray.byteoffset * 8 + self._offset) 1040 lengthstring = ", length=%d" % length 1041 return "{0}(filename='{1}'{2}{3}{4})".format(self.__class__.__name__, 1042 self._datastore._rawarray.source.name, 1043 lengthstring, offsetstring, pos_string) 1044 else: 1045 s = self.__str__() 1046 lengthstring = '' 1047 if s.endswith('...'): 1048 lengthstring = " # length={0}".format(length) 1049 return "{0}('{1}'{2}){3}".format(self.__class__.__name__, s, pos_string, lengthstring) 1050 1051 def __eq__(self, bs): 1052 """Return True if two bitstrings have the same binary representation. 1053 1054 >>> BitArray('0b1110') == '0xe' 1055 True 1056 1057 """ 1058 try: 1059 bs = Bits(bs) 1060 except TypeError: 1061 return False 1062 return equal(self._datastore, bs._datastore) 1063 1064 def __ne__(self, bs): 1065 """Return False if two bitstrings have the same binary representation. 1066 1067 >>> BitArray('0b111') == '0x7' 1068 False 1069 1070 """ 1071 return not self.__eq__(bs) 1072 1073 def __invert__(self): 1074 """Return bitstring with every bit inverted. 1075 1076 Raises Error if the bitstring is empty. 1077 1078 """ 1079 if not self.len: 1080 raise Error("Cannot invert empty bitstring.") 1081 s = self._copy() 1082 s._invert_all() 1083 return s 1084 1085 def __lshift__(self, n): 1086 """Return bitstring with bits shifted by n to the left. 1087 1088 n -- the number of bits to shift. Must be >= 0. 1089 1090 """ 1091 if n < 0: 1092 raise ValueError("Cannot shift by a negative amount.") 1093 if not self.len: 1094 raise ValueError("Cannot shift an empty bitstring.") 1095 n = min(n, self.len) 1096 s = self._slice(n, self.len) 1097 s._addright(Bits(n)) 1098 return s 1099 1100 def __rshift__(self, n): 1101 """Return bitstring with bits shifted by n to the right. 1102 1103 n -- the number of bits to shift. Must be >= 0. 1104 1105 """ 1106 if n < 0: 1107 raise ValueError("Cannot shift by a negative amount.") 1108 if not self.len: 1109 raise ValueError("Cannot shift an empty bitstring.") 1110 if not n: 1111 return self._copy() 1112 s = self.__class__(length=min(n, self.len)) 1113 s._addright(self[:-n]) 1114 return s 1115 1116 def __mul__(self, n): 1117 """Return bitstring consisting of n concatenations of self. 1118 1119 Called for expression of the form 'a = b*3'. 1120 n -- The number of concatenations. Must be >= 0. 1121 1122 """ 1123 if n < 0: 1124 raise ValueError("Cannot multiply by a negative integer.") 1125 if not n: 1126 return self.__class__() 1127 s = self._copy() 1128 s._imul(n) 1129 return s 1130 1131 def __rmul__(self, n): 1132 """Return bitstring consisting of n concatenations of self. 1133 1134 Called for expressions of the form 'a = 3*b'. 1135 n -- The number of concatenations. Must be >= 0. 1136 1137 """ 1138 return self.__mul__(n) 1139 1140 def __and__(self, bs): 1141 """Bit-wise 'and' between two bitstrings. Returns new bitstring. 1142 1143 bs -- The bitstring to '&' with. 1144 1145 Raises ValueError if the two bitstrings have differing lengths. 1146 1147 """ 1148 bs = Bits(bs) 1149 if self.len != bs.len: 1150 raise ValueError("Bitstrings must have the same length " 1151 "for & operator.") 1152 s = self._copy() 1153 s._iand(bs) 1154 return s 1155 1156 def __rand__(self, bs): 1157 """Bit-wise 'and' between two bitstrings. Returns new bitstring. 1158 1159 bs -- the bitstring to '&' with. 1160 1161 Raises ValueError if the two bitstrings have differing lengths. 1162 1163 """ 1164 return self.__and__(bs) 1165 1166 def __or__(self, bs): 1167 """Bit-wise 'or' between two bitstrings. Returns new bitstring. 1168 1169 bs -- The bitstring to '|' with. 1170 1171 Raises ValueError if the two bitstrings have differing lengths. 1172 1173 """ 1174 bs = Bits(bs) 1175 if self.len != bs.len: 1176 raise ValueError("Bitstrings must have the same length " 1177 "for | operator.") 1178 s = self._copy() 1179 s._ior(bs) 1180 return s 1181 1182 def __ror__(self, bs): 1183 """Bit-wise 'or' between two bitstrings. Returns new bitstring. 1184 1185 bs -- The bitstring to '|' with. 1186 1187 Raises ValueError if the two bitstrings have differing lengths. 1188 1189 """ 1190 return self.__or__(bs) 1191 1192 def __xor__(self, bs): 1193 """Bit-wise 'xor' between two bitstrings. Returns new bitstring. 1194 1195 bs -- The bitstring to '^' with. 1196 1197 Raises ValueError if the two bitstrings have differing lengths. 1198 1199 """ 1200 bs = Bits(bs) 1201 if self.len != bs.len: 1202 raise ValueError("Bitstrings must have the same length " 1203 "for ^ operator.") 1204 s = self._copy() 1205 s._ixor(bs) 1206 return s 1207 1208 def __rxor__(self, bs): 1209 """Bit-wise 'xor' between two bitstrings. Returns new bitstring. 1210 1211 bs -- The bitstring to '^' with. 1212 1213 Raises ValueError if the two bitstrings have differing lengths. 1214 1215 """ 1216 return self.__xor__(bs) 1217 1218 def __contains__(self, bs): 1219 """Return whether bs is contained in the current bitstring. 1220 1221 bs -- The bitstring to search for. 1222 1223 """ 1224 # Don't want to change pos 1225 try: 1226 pos = self._pos 1227 except AttributeError: 1228 pass 1229 found = Bits.find(self, bs, bytealigned=False) 1230 try: 1231 self._pos = pos 1232 except UnboundLocalError: 1233 pass 1234 return bool(found) 1235 1236 def __hash__(self): 1237 """Return an integer hash of the object.""" 1238 # We can't in general hash the whole bitstring (it could take hours!) 1239 # So instead take some bits from the start and end. 1240 if self.len <= 160: 1241 # Use the whole bitstring. 1242 shorter = self 1243 else: 1244 # Take 10 bytes from start and end 1245 shorter = self[:80] + self[-80:] 1246 h = 0 1247 for byte in shorter.tobytes(): 1248 try: 1249 h = (h << 4) + ord(byte) 1250 except TypeError: 1251 # Python 3 1252 h = (h << 4) + byte 1253 g = h & 0xf0000000 1254 if g & (1 << 31): 1255 h ^= (g >> 24) 1256 h ^= g 1257 return h % 1442968193 1258 1259 # This is only used in Python 2.x... 1260 def __nonzero__(self): 1261 """Return True if any bits are set to 1, otherwise return False.""" 1262 return self.any(True) 1263 1264 # ...whereas this is used in Python 3.x 1265 __bool__ = __nonzero__ 1266 1267 if _debug is True: 1268 def _assertsanity(self): 1269 """Check internal self consistency as a debugging aid.""" 1270 assert self.len >= 0 1271 assert 0 <= self._offset, "offset={0}".format(self._offset) 1272 assert (self.len + self._offset + 7) // 8 == self._datastore.bytelength + self._datastore.byteoffset, "len={0}, offset={1}, bytelength={2}, byteoffset={3}".format(self.len, self._offset, self._datastore.bytelength, self._datastore.byteoffset) 1273 return True 1274 else: 1275 @staticmethod 1276 def _assertsanity(): 1277 return True 1278 1279 @classmethod 1280 def _init_with_token(cls, name, token_length, value): 1281 if token_length is not None: 1282 token_length = int(token_length) 1283 if token_length == 0: 1284 return cls() 1285 # For pad token just return the length in zero bits 1286 if name == 'pad': 1287 return cls(token_length) 1288 1289 if value is None: 1290 if token_length is None: 1291 error = "Token has no value ({0}=???).".format(name) 1292 else: 1293 error = "Token has no value ({0}:{1}=???).".format(name, token_length) 1294 raise ValueError(error) 1295 try: 1296 b = cls(**{_tokenname_to_initialiser[name]: value}) 1297 except KeyError: 1298 if name in ('se', 'ue', 'sie', 'uie'): 1299 b = cls(**{name: int(value)}) 1300 elif name in ('uint', 'int', 'uintbe', 'intbe', 'uintle', 'intle', 'uintne', 'intne'): 1301 b = cls(**{name: int(value), 'length': token_length}) 1302 elif name in ('float', 'floatbe', 'floatle', 'floatne'): 1303 b = cls(**{name: float(value), 'length': token_length}) 1304 elif name == 'bool': 1305 if value in (1, 'True', '1'): 1306 b = cls(bool=True) 1307 elif value in (0, 'False', '0'): 1308 b = cls(bool=False) 1309 else: 1310 raise CreationError("bool token can only be 'True' or 'False'.") 1311 else: 1312 raise CreationError("Can't parse token name {0}.", name) 1313 if token_length is not None and b.len != token_length: 1314 msg = "Token with length {0} packed with value of length {1} ({2}:{3}={4})." 1315 raise CreationError(msg, token_length, b.len, name, token_length, value) 1316 return b 1317 1318 def _clear(self): 1319 """Reset the bitstring to an empty state.""" 1320 self._datastore = ByteStore(bytearray(0)) 1321 1322 def _setauto(self, s, length, offset): 1323 """Set bitstring from a bitstring, file, bool, integer, array, iterable or string.""" 1324 # As s can be so many different things it's important to do the checks 1325 # in the correct order, as some types are also other allowed types. 1326 # So basestring must be checked before Iterable 1327 # and bytes/bytearray before Iterable but after basestring! 1328 if isinstance(s, Bits): 1329 if length is None: 1330 length = s.len - offset 1331 self._setbytes_unsafe(s._datastore.rawbytes, length, s._offset + offset) 1332 return 1333 1334 if isinstance(s, io.BytesIO): 1335 if offset is None: 1336 offset = 0 1337 if length is None: 1338 length = s.seek(0, 2) * 8 - offset 1339 byteoffset, offset = divmod(offset, 8) 1340 bytelength = (length + byteoffset * 8 + offset + 7) // 8 - byteoffset 1341 if length + byteoffset * 8 + offset > s.seek(0, 2) * 8: 1342 raise CreationError("BytesIO object is not long enough for specified " 1343 "length and offset.") 1344 self._datastore = ConstByteStore(bytearray(s.getvalue()[byteoffset: byteoffset + bytelength]), length, offset) 1345 return 1346 1347 if isinstance(s, file): 1348 if offset is None: 1349 offset = 0 1350 if length is None: 1351 length = os.path.getsize(s.name) * 8 - offset 1352 byteoffset, offset = divmod(offset, 8) 1353 bytelength = (length + byteoffset * 8 + offset + 7) // 8 - byteoffset 1354 m = MmapByteArray(s, bytelength, byteoffset) 1355 if length + byteoffset * 8 + offset > m.filelength * 8: 1356 raise CreationError("File is not long enough for specified " 1357 "length and offset.") 1358 self._datastore = ConstByteStore(m, length, offset) 1359 return 1360 1361 if length is not None: 1362 raise CreationError("The length keyword isn't applicable to this initialiser.") 1363 if offset: 1364 raise CreationError("The offset keyword isn't applicable to this initialiser.") 1365 if isinstance(s, basestring): 1366 bs = self._converttobitstring(s) 1367 assert bs._offset == 0 1368 self._setbytes_unsafe(bs._datastore.rawbytes, bs.length, 0) 1369 return 1370 if isinstance(s, (bytes, bytearray)): 1371 self._setbytes_unsafe(bytearray(s), len(s) * 8, 0) 1372 return 1373 if isinstance(s, array.array): 1374 try: 1375 b = s.tobytes() 1376 except AttributeError: 1377 b = s.tostring() # Python 2.7 1378 self._setbytes_unsafe(bytearray(b), len(b) * 8, 0) 1379 return 1380 if isinstance(s, numbers.Integral): 1381 # Initialise with s zero bits. 1382 if s < 0: 1383 msg = "Can't create bitstring of negative length {0}." 1384 raise CreationError(msg, s) 1385 data = bytearray((s + 7) // 8) 1386 self._datastore = ByteStore(data, s, 0) 1387 return 1388 if isinstance(s, collectionsAbc.Iterable): 1389 # Evaluate each item as True or False and set bits to 1 or 0. 1390 self._setbin_unsafe(''.join(str(int(bool(x))) for x in s)) 1391 return 1392 raise TypeError("Cannot initialise bitstring from {0}.".format(type(s))) 1393 1394 def _setfile(self, filename, length, offset): 1395 """Use file as source of bits.""" 1396 with open(filename, 'rb') as source: 1397 if offset is None: 1398 offset = 0 1399 if length is None: 1400 length = os.path.getsize(source.name) * 8 - offset 1401 byteoffset, offset = divmod(offset, 8) 1402 bytelength = (length + byteoffset * 8 + offset + 7) // 8 - byteoffset 1403 m = MmapByteArray(source, bytelength, byteoffset) 1404 if length + byteoffset * 8 + offset > m.filelength * 8: 1405 raise CreationError("File is not long enough for specified " 1406 "length and offset.") 1407 self._datastore = ConstByteStore(m, length, offset) 1408 1409 def _setbytes_safe(self, data, length=None, offset=0): 1410 """Set the data from a string.""" 1411 data = bytearray(data) 1412 if length is None: 1413 # Use to the end of the data 1414 length = len(data)*8 - offset 1415 self._datastore = ByteStore(data, length, offset) 1416 else: 1417 if length + offset > len(data) * 8: 1418 msg = "Not enough data present. Need {0} bits, have {1}." 1419 raise CreationError(msg, length + offset, len(data) * 8) 1420 if length == 0: 1421 self._datastore = ByteStore(bytearray(0)) 1422 else: 1423 self._datastore = ByteStore(data, length, offset) 1424 1425 def _setbytes_unsafe(self, data, length, offset): 1426 """Unchecked version of _setbytes_safe.""" 1427 self._datastore = type(self._datastore)(data[:], length, offset) 1428 assert self._assertsanity() 1429 1430 def _readbytes(self, length, start): 1431 """Read bytes and return them. Note that length is in bits.""" 1432 assert length % 8 == 0 1433 assert start + length <= self.len 1434 if not (start + self._offset) % 8: 1435 return bytes(self._datastore.getbyteslice((start + self._offset) // 8, 1436 (start + self._offset + length) // 8)) 1437 return self._slice(start, start + length).tobytes() 1438 1439 def _getbytes(self): 1440 """Return the data as an ordinary string.""" 1441 if self.len % 8: 1442 raise InterpretError("Cannot interpret as bytes unambiguously - " 1443 "not multiple of 8 bits.") 1444 return self._readbytes(self.len, 0) 1445 1446 def _setuint(self, uint, length=None): 1447 """Reset the bitstring to have given unsigned int interpretation.""" 1448 try: 1449 if length is None: 1450 # Use the whole length. Deliberately not using .len here. 1451 length = self._datastore.bitlength 1452 except AttributeError: 1453 # bitstring doesn't have a _datastore as it hasn't been created! 1454 pass 1455 if length is None or length == 0: 1456 raise CreationError("A non-zero length must be specified with a " 1457 "uint initialiser.") 1458 if uint >= (1 << length): 1459 msg = "{0} is too large an unsigned integer for a bitstring of length {1}. "\ 1460 "The allowed range is [0, {2}]." 1461 raise CreationError(msg, uint, length, (1 << length) - 1) 1462 if uint < 0: 1463 raise CreationError("uint cannot be initialised by a negative number.") 1464 s = hex(uint)[2:] 1465 s = s.rstrip('L') 1466 if len(s) & 1: 1467 s = '0' + s 1468 try: 1469 data = bytes.fromhex(s) 1470 except AttributeError: 1471 # the Python 2.x way 1472 data = binascii.unhexlify(s) 1473 # Now add bytes as needed to get the right length. 1474 extrabytes = ((length + 7) // 8) - len(data) 1475 if extrabytes > 0: 1476 data = b'\x00' * extrabytes + data 1477 offset = 8 - (length % 8) 1478 if offset == 8: 1479 offset = 0 1480 self._setbytes_unsafe(bytearray(data), length, offset) 1481 1482 def _readuint_lsb0(self, length, start): 1483 # TODO: This needs a complete rewrite - can't delegate to _readuint_msb0 1484 return self._readuint_msb0(length, self.len - start - length) 1485 1486 def _readuint_msb0(self, length, start): 1487 """Read bits and interpret as an unsigned int.""" 1488 if not length: 1489 raise InterpretError("Cannot interpret a zero length bitstring " 1490 "as an integer.") 1491 offset = self._offset 1492 startbyte = (start + offset) // 8 1493 endbyte = (start + offset + length - 1) // 8 1494 1495 b = binascii.hexlify(bytes(self._datastore.getbyteslice(startbyte, endbyte + 1))) 1496 assert b 1497 i = int(b, 16) 1498 final_bits = 8 - ((start + offset + length) % 8) 1499 if final_bits != 8: 1500 i >>= final_bits 1501 i &= (1 << length) - 1 1502 return i 1503 1504 def _getuint(self): 1505 """Return data as an unsigned int.""" 1506 return self._readuint(self.len, 0) 1507 1508 def _setint(self, int_, length=None): 1509 """Reset the bitstring to have given signed int interpretation.""" 1510 # If no length given, and we've previously been given a length, use it. 1511 if length is None and hasattr(self, 'len') and self.len != 0: 1512 length = self.len 1513 if length is None or length == 0: 1514 raise CreationError("A non-zero length must be specified with an int initialiser.") 1515 if int_ >= (1 << (length - 1)) or int_ < -(1 << (length - 1)): 1516 raise CreationError("{0} is too large a signed integer for a bitstring of length {1}. " 1517 "The allowed range is [{2}, {3}].", int_, length, -(1 << (length - 1)), 1518 (1 << (length - 1)) - 1) 1519 if int_ >= 0: 1520 self._setuint(int_, length) 1521 return 1522 # Do the 2's complement thing. Add one, set to minus number, then flip bits. 1523 self._setuint((-int_ - 1) ^ ((1 << length) - 1), length) 1524 1525 def _readint(self, length, start): 1526 """Read bits and interpret as a signed int""" 1527 ui = self._readuint(length, start) 1528 if not ui >> (length - 1): 1529 # Top bit not set, number is positive 1530 return ui 1531 # Top bit is set, so number is negative 1532 tmp = (~(ui - 1)) & ((1 << length) - 1) 1533 return -tmp 1534 1535 def _getint(self): 1536 """Return data as a two's complement signed int.""" 1537 return self._readint(self.len, 0) 1538 1539 def _setuintbe(self, uintbe, length=None): 1540 """Set the bitstring to a big-endian unsigned int interpretation.""" 1541 if length is not None and length % 8 != 0: 1542 raise CreationError("Big-endian integers must be whole-byte. " 1543 "Length = {0} bits.", length) 1544 self._setuint(uintbe, length) 1545 1546 def _readuintbe(self, length, start): 1547 """Read bits and interpret as a big-endian unsigned int.""" 1548 if length % 8: 1549 raise InterpretError("Big-endian integers must be whole-byte. " 1550 "Length = {0} bits.", length) 1551 return self._readuint(length, start) 1552 1553 def _getuintbe(self): 1554 """Return data as a big-endian two's complement unsigned int.""" 1555 return self._readuintbe(self.len, 0) 1556 1557 def _setintbe(self, intbe, length=None): 1558 """Set bitstring to a big-endian signed int interpretation.""" 1559 if length is not None and length % 8 != 0: 1560 raise CreationError("Big-endian integers must be whole-byte. " 1561 "Length = {0} bits.", length) 1562 self._setint(intbe, length) 1563 1564 def _readintbe(self, length, start): 1565 """Read bits and interpret as a big-endian signed int.""" 1566 if length % 8: 1567 raise InterpretError("Big-endian integers must be whole-byte. " 1568 "Length = {0} bits.", length) 1569 return self._readint(length, start) 1570 1571 def _getintbe(self): 1572 """Return data as a big-endian two's complement signed int.""" 1573 return self._readintbe(self.len, 0) 1574 1575 def _setuintle(self, uintle, length=None): 1576 if length is not None and length % 8 != 0: 1577 raise CreationError("Little-endian integers must be whole-byte. " 1578 "Length = {0} bits.", length) 1579 self._setuint(uintle, length) 1580 self._datastore._rawarray = self._datastore._rawarray[::-1] 1581 1582 def _readuintle(self, length, start): 1583 """Read bits and interpret as a little-endian unsigned int.""" 1584 if length % 8: 1585 raise InterpretError("Little-endian integers must be whole-byte. " 1586 "Length = {0} bits.", length) 1587 assert start + length <= self.len 1588 absolute_pos = start + self._offset 1589 startbyte, offset = divmod(absolute_pos, 8) 1590 val = 0 1591 if not offset: 1592 endbyte = (absolute_pos + length - 1) // 8 1593 chunksize = 4 # for 'L' format 1594 while endbyte - chunksize + 1 >= startbyte: 1595 val <<= 8 * chunksize 1596 val += struct.unpack('<L', bytes(self._datastore.getbyteslice(endbyte + 1 - chunksize, endbyte + 1)))[0] 1597 endbyte -= chunksize 1598 for b in xrange(endbyte, startbyte - 1, -1): 1599 val <<= 8 1600 val += self._datastore.getbyte(b) 1601 else: 1602 data = self._slice(start, start + length) 1603 assert data.len % 8 == 0 1604 data._reversebytes(0, self.len) 1605 for b in bytearray(data.bytes): 1606 val <<= 8 1607 val += b 1608 return val 1609 1610 def _getuintle(self): 1611 return self._readuintle(self.len, 0) 1612 1613 def _setintle(self, intle, length=None): 1614 if length is not None and length % 8 != 0: 1615 raise CreationError("Little-endian integers must be whole-byte. " 1616 "Length = {0} bits.", length) 1617 self._setint(intle, length) 1618 self._datastore._rawarray = self._datastore._rawarray[::-1] 1619 1620 def _readintle(self, length, start): 1621 """Read bits and interpret as a little-endian signed int.""" 1622 ui = self._readuintle(length, start) 1623 if not ui >> (length - 1): 1624 # Top bit not set, number is positive 1625 return ui 1626 # Top bit is set, so number is negative 1627 tmp = (~(ui - 1)) & ((1 << length) - 1) 1628 return -tmp 1629 1630 def _getintle(self): 1631 return self._readintle(self.len, 0) 1632 1633 def _setfloat(self, f, length=None): 1634 # If no length given, and we've previously been given a length, use it. 1635 if length is None and hasattr(self, 'len') and self.len != 0: 1636 length = self.len 1637 if length is None or length == 0: 1638 raise CreationError("A non-zero length must be specified with a " 1639 "float initialiser.") 1640 if length == 32: 1641 b = struct.pack('>f', f) 1642 elif length == 64: 1643 b = struct.pack('>d', f) 1644 else: 1645 raise CreationError("floats can only be 32 or 64 bits long, " 1646 "not {0} bits", length) 1647 self._setbytes_unsafe(bytearray(b), length, 0) 1648 1649 def _readfloat(self, length, start): 1650 """Read bits and interpret as a float.""" 1651 if not (start + self._offset) % 8: 1652 startbyte = (start + self._offset) // 8 1653 if length == 32: 1654 f, = struct.unpack('>f', bytes(self._datastore.getbyteslice(startbyte, startbyte + 4))) 1655 elif length == 64: 1656 f, = struct.unpack('>d', bytes(self._datastore.getbyteslice(startbyte, startbyte + 8))) 1657 else: 1658 if length == 32: 1659 f, = struct.unpack('>f', self._readbytes(32, start)) 1660 elif length == 64: 1661 f, = struct.unpack('>d', self._readbytes(64, start)) 1662 try: 1663 return f 1664 except NameError: 1665 raise InterpretError("floats can only be 32 or 64 bits long, not {0} bits", length) 1666 1667 def _getfloat(self): 1668 """Interpret the whole bitstring as a float.""" 1669 return self._readfloat(self.len, 0) 1670 1671 def _setfloatle(self, f, length=None): 1672 # If no length given, and we've previously been given a length, use it. 1673 if length is None and hasattr(self, 'len') and self.len != 0: 1674 length = self.len 1675 if length is None or length == 0: 1676 raise CreationError("A non-zero length must be specified with a " 1677 "float initialiser.") 1678 if length == 32: 1679 b = struct.pack('<f', f) 1680 elif length == 64: 1681 b = struct.pack('<d', f) 1682 else: 1683 raise CreationError("floats can only be 32 or 64 bits long, " 1684 "not {0} bits", length) 1685 self._setbytes_unsafe(bytearray(b), length, 0) 1686 1687 def _readfloatle(self, length, start): 1688 """Read bits and interpret as a little-endian float.""" 1689 startbyte, offset = divmod(start + self._offset, 8) 1690 if not offset: 1691 if length == 32: 1692 f, = struct.unpack('<f', bytes(self._datastore.getbyteslice(startbyte, startbyte + 4))) 1693 elif length == 64: 1694 f, = struct.unpack('<d', bytes(self._datastore.getbyteslice(startbyte, startbyte + 8))) 1695 else: 1696 if length == 32: 1697 f, = struct.unpack('<f', self._readbytes(32, start)) 1698 elif length == 64: 1699 f, = struct.unpack('<d', self._readbytes(64, start)) 1700 try: 1701 return f 1702 except NameError: 1703 raise InterpretError("floats can only be 32 or 64 bits long, " 1704 "not {0} bits", length) 1705 1706 def _getfloatle(self): 1707 """Interpret the whole bitstring as a little-endian float.""" 1708 return self._readfloatle(self.len, 0) 1709 1710 def _setue(self, i): 1711 """Initialise bitstring with unsigned exponential-Golomb code for integer i. 1712 1713 Raises CreationError if i < 0. 1714 1715 """ 1716 if i < 0: 1717 raise CreationError("Cannot use negative initialiser for unsigned " 1718 "exponential-Golomb.") 1719 if not i: 1720 self._setbin_unsafe('1') 1721 return 1722 tmp = i + 1 1723 leadingzeros = -1 1724 while tmp > 0: 1725 tmp >>= 1 1726 leadingzeros += 1 1727 remainingpart = i + 1 - (1 << leadingzeros) 1728 binstring = '0' * leadingzeros + '1' + Bits(uint=remainingpart, 1729 length=leadingzeros).bin 1730 self._setbin_unsafe(binstring) 1731 1732 def _readue(self, pos): 1733 """Return interpretation of next bits as unsigned exponential-Golomb code. 1734 1735 Raises ReadError if the end of the bitstring is encountered while 1736 reading the code. 1737 1738 """ 1739 oldpos = pos 1740 try: 1741 while not self[pos]: 1742 pos += 1 1743 except IndexError: 1744 raise ReadError("Read off end of bitstring trying to read code.") 1745 leadingzeros = pos - oldpos 1746 codenum = (1 << leadingzeros) - 1 1747 if leadingzeros > 0: 1748 if pos + leadingzeros + 1 > self.len: 1749 raise ReadError("Read off end of bitstring trying to read code.") 1750 codenum += self._readuint(leadingzeros, pos + 1) 1751 pos += leadingzeros + 1 1752 else: 1753 assert codenum == 0 1754 pos += 1 1755 return codenum, pos 1756 1757 def _getue(self): 1758 """Return data as unsigned exponential-Golomb code. 1759 1760 Raises InterpretError if bitstring is not a single exponential-Golomb code. 1761 1762 """ 1763 try: 1764 value, newpos = self._readue(0) 1765 if value is None or newpos != self.len: 1766 raise ReadError 1767 except ReadError: 1768 raise InterpretError("Bitstring is not a single exponential-Golomb code.") 1769 return value 1770 1771 def _setse(self, i): 1772 """Initialise bitstring with signed exponential-Golomb code for integer i.""" 1773 if i > 0: 1774 u = (i * 2) - 1 1775 else: 1776 u = -2 * i 1777 self._setue(u) 1778 1779 def _getse(self): 1780 """Return data as signed exponential-Golomb code. 1781 1782 Raises InterpretError if bitstring is not a single exponential-Golomb code. 1783 1784 """ 1785 try: 1786 value, newpos = self._readse(0) 1787 if value is None or newpos != self.len: 1788 raise ReadError 1789 except ReadError: 1790 raise InterpretError("Bitstring is not a single exponential-Golomb code.") 1791 return value 1792 1793 def _readse(self, pos): 1794 """Return interpretation of next bits as a signed exponential-Golomb code. 1795 1796 Advances position to after the read code. 1797 1798 Raises ReadError if the end of the bitstring is encountered while 1799 reading the code. 1800 1801 """ 1802 codenum, pos = self._readue(pos) 1803 m = (codenum + 1) // 2 1804 if not codenum % 2: 1805 return -m, pos 1806 else: 1807 return m, pos 1808 1809 def _setuie(self, i): 1810 """Initialise bitstring with unsigned interleaved exponential-Golomb code for integer i. 1811 1812 Raises CreationError if i < 0. 1813 1814 """ 1815 if i < 0: 1816 raise CreationError("Cannot use negative initialiser for unsigned " 1817 "interleaved exponential-Golomb.") 1818 self._setbin_unsafe('1' if i == 0 else '0' + '0'.join(bin(i + 1)[3:]) + '1') 1819 1820 def _readuie(self, pos): 1821 """Return interpretation of next bits as unsigned interleaved exponential-Golomb code. 1822 1823 Raises ReadError if the end of the bitstring is encountered while 1824 reading the code. 1825 1826 """ 1827 try: 1828 codenum = 1 1829 while not self[pos]: 1830 pos += 1 1831 codenum <<= 1 1832 codenum += self[pos] 1833 pos += 1 1834 pos += 1 1835 except IndexError: 1836 raise ReadError("Read off end of bitstring trying to read code.") 1837 codenum -= 1 1838 return codenum, pos 1839 1840 def _getuie(self): 1841 """Return data as unsigned interleaved exponential-Golomb code. 1842 1843 Raises InterpretError if bitstring is not a single exponential-Golomb code. 1844 1845 """ 1846 try: 1847 value, newpos = self._readuie(0) 1848 if value is None or newpos != self.len: 1849 raise ReadError 1850 except ReadError: 1851 raise InterpretError("Bitstring is not a single interleaved exponential-Golomb code.") 1852 return value 1853 1854 def _setsie(self, i): 1855 """Initialise bitstring with signed interleaved exponential-Golomb code for integer i.""" 1856 if not i: 1857 self._setbin_unsafe('1') 1858 else: 1859 self._setuie(abs(i)) 1860 self._addright(Bits([i < 0])) 1861 1862 def _getsie(self): 1863 """Return data as signed interleaved exponential-Golomb code. 1864 1865 Raises InterpretError if bitstring is not a single exponential-Golomb code. 1866 1867 """ 1868 try: 1869 value, newpos = self._readsie(0) 1870 if value is None or newpos != self.len: 1871 raise ReadError 1872 except ReadError: 1873 raise InterpretError("Bitstring is not a single interleaved exponential-Golomb code.") 1874 return value 1875 1876 def _readsie(self, pos): 1877 """Return interpretation of next bits as a signed interleaved exponential-Golomb code. 1878 1879 Advances position to after the read code. 1880 1881 Raises ReadError if the end of the bitstring is encountered while 1882 reading the code. 1883 1884 """ 1885 codenum, pos = self._readuie(pos) 1886 if not codenum: 1887 return 0, pos 1888 try: 1889 if self[pos]: 1890 return -codenum, pos + 1 1891 else: 1892 return codenum, pos + 1 1893 except IndexError: 1894 raise ReadError("Read off end of bitstring trying to read code.") 1895 1896 def _setbool(self, value): 1897 # We deliberately don't want to have implicit conversions to bool here. 1898 # If we did then it would be difficult to deal with the 'False' string. 1899 if value in (1, 'True'): 1900 self._setbytes_unsafe(bytearray(b'\x80'), 1, 0) 1901 elif value in (0, 'False'): 1902 self._setbytes_unsafe(bytearray(b'\x00'), 1, 0) 1903 else: 1904 raise CreationError('Cannot initialise boolean with {0}.', value) 1905 1906 def _getbool(self): 1907 if self.length != 1: 1908 msg = "For a bool interpretation a bitstring must be 1 bit long, not {0} bits." 1909 raise InterpretError(msg, self.length) 1910 return self[0] 1911 1912 def _readbool(self, pos): 1913 return self[pos], pos + 1 1914 1915 def _setbin_safe(self, binstring): 1916 """Reset the bitstring to the value given in binstring.""" 1917 binstring = tidy_input_string(binstring) 1918 # remove any 0b if present 1919 binstring = binstring.replace('0b', '') 1920 self._setbin_unsafe(binstring) 1921 1922 def _setbin_unsafe(self, binstring): 1923 """Same as _setbin_safe, but input isn't sanity checked. binstring mustn't start with '0b'.""" 1924 length = len(binstring) 1925 # pad with zeros up to byte boundary if needed 1926 boundary = ((length + 7) // 8) * 8 1927 padded_binstring = binstring + '0' * (boundary - length)\ 1928 if len(binstring) < boundary else binstring 1929 try: 1930 bytelist = [int(padded_binstring[x:x + 8], 2) 1931 for x in xrange(0, len(padded_binstring), 8)] 1932 except ValueError: 1933 raise CreationError("Invalid character in bin initialiser {0}.", binstring) 1934 self._setbytes_unsafe(bytearray(bytelist), length, 0) 1935 1936 def _readbin(self, length, start): 1937 """Read bits and interpret as a binary string.""" 1938 if not length: 1939 return '' 1940 # Get the byte slice containing our bit slice 1941 startbyte, startoffset = divmod(start + self._offset, 8) 1942 endbyte = (start + self._offset + length - 1) // 8 1943 b = self._datastore.getbyteslice(startbyte, endbyte + 1) 1944 # Convert to a string of '0' and '1's (via a hex string an and int!) 1945 c = "{:0{}b}".format(int(binascii.hexlify(b), 16), 8*len(b)) 1946 # Finally chop off any extra bits. 1947 return c[startoffset:startoffset + length] 1948 1949 def _getbin(self): 1950 """Return interpretation as a binary string.""" 1951 return self._readbin(self.len, 0) 1952 1953 def _setoct(self, octstring): 1954 """Reset the bitstring to have the value given in octstring.""" 1955 octstring = tidy_input_string(octstring) 1956 # remove any 0o if present 1957 octstring = octstring.replace('0o', '') 1958 binlist = [] 1959 for i in octstring: 1960 try: 1961 binlist.append(OCT_TO_BITS[int(i)]) 1962 except (ValueError, IndexError): 1963 raise CreationError("Invalid symbol '{0}' in oct initialiser.", i) 1964 1965 self._setbin_unsafe(''.join(binlist)) 1966 1967 def _readoct(self, length, start): 1968 """Read bits and interpret as an octal string.""" 1969 if length % 3: 1970 raise InterpretError("Cannot convert to octal unambiguously - " 1971 "not multiple of 3 bits.") 1972 if not length: 1973 return '' 1974 # Get main octal bit by converting from int. 1975 # Strip starting 0 or 0o depending on Python version. 1976 end = oct(self._readuint(length, start))[LEADING_OCT_CHARS:] 1977 if end.endswith('L'): 1978 end = end[:-1] 1979 middle = '0' * (length // 3 - len(end)) 1980 return middle + end 1981 1982 def _getoct(self): 1983 """Return interpretation as an octal string.""" 1984 return self._readoct(self.len, 0) 1985 1986 def _sethex(self, hexstring): 1987 """Reset the bitstring to have the value given in hexstring.""" 1988 hexstring = tidy_input_string(hexstring) 1989 # remove any 0x if present 1990 hexstring = hexstring.replace('0x', '') 1991 length = len(hexstring) 1992 if length % 2: 1993 hexstring += '0' 1994 try: 1995 data = bytearray.fromhex(hexstring) 1996 except ValueError: 1997 raise CreationError("Invalid symbol in hex initialiser.") 1998 self._setbytes_unsafe(data, length * 4, 0) 1999 2000 def _readhex(self, length, start): 2001 """Read bits and interpret as a hex string.""" 2002 if length % 4: 2003 raise InterpretError("Cannot convert to hex unambiguously - " 2004 "not multiple of 4 bits.") 2005 if not length: 2006 return '' 2007 s = self._slice(start, start + length).tobytes() 2008 try: 2009 s = s.hex() # Available in Python 3.5+ 2010 except AttributeError: 2011 # This monstrosity is the only thing I could get to work for both 2.6 and 3.1. 2012 s = str(binascii.hexlify(s).decode('utf-8')) 2013 # If there's one nibble too many then cut it off 2014 return s[:-1] if (length // 4) % 2 else s 2015 2016 def _gethex(self): 2017 """Return the hexadecimal representation as a string prefixed with '0x'. 2018 2019 Raises an InterpretError if the bitstring's length is not a multiple of 4. 2020 2021 """ 2022 return self._readhex(self.len, 0) 2023 2024 def _getoffset(self): 2025 return self._datastore.offset 2026 2027 def _getlength(self): 2028 """Return the length of the bitstring in bits.""" 2029 return self._datastore.bitlength 2030 2031 def _ensureinmemory(self): 2032 """Ensure the data is held in memory, not in a file.""" 2033 self._setbytes_unsafe(self._datastore.getbyteslice(0, self._datastore.bytelength), 2034 self.len, self._offset) 2035 2036 @classmethod 2037 def _converttobitstring(cls, bs, offset=0, cache={}): 2038 """Convert bs to a bitstring and return it. 2039 2040 offset gives the suggested bit offset of first significant 2041 bit, to optimise append etc. 2042 2043 """ 2044 if isinstance(bs, Bits): 2045 return bs 2046 try: 2047 return cache[(bs, offset)] 2048 except KeyError: 2049 if isinstance(bs, basestring): 2050 b = cls() 2051 try: 2052 _, tokens = tokenparser(bs) 2053 except ValueError as e: 2054 raise CreationError(*e.args) 2055 if tokens: 2056 b._addright(Bits._init_with_token(*tokens[0])) 2057 b._datastore = offsetcopy(b._datastore, offset) 2058 for token in tokens[1:]: 2059 b._addright(Bits._init_with_token(*token)) 2060 assert b._assertsanity() 2061 assert b.len == 0 or b._offset == offset 2062 if len(cache) < CACHE_SIZE: 2063 cache[(bs, offset)] = b 2064 return b 2065 except TypeError: 2066 # Unhashable type 2067 pass 2068 return cls(bs) 2069 2070 def _copy(self): 2071 """Create and return a new copy of the Bits (always in memory).""" 2072 s_copy = self.__class__() 2073 s_copy._setbytes_unsafe(self._datastore.getbyteslice(0, self._datastore.bytelength), 2074 self.len, self._offset) 2075 return s_copy 2076 2077 def _slice_lsb0(self, start, end): 2078 """Used internally to get a slice, without error checking (LSB0).""" 2079 return self._slice_msb0(self.length - end, self.length - start) 2080 2081 def _slice_msb0(self, start, end): 2082 """Used internally to get a slice, without error checking.""" 2083 if end == start: 2084 return self.__class__() 2085 assert start < end, "start={0}, end={1}".format(start, end) 2086 offset = self._offset 2087 startbyte, newoffset = divmod(start + offset, 8) 2088 endbyte = (end + offset - 1) // 8 2089 bs = self.__class__() 2090 bs._setbytes_unsafe(self._datastore.getbyteslice(startbyte, endbyte + 1), end - start, newoffset) 2091 return bs 2092 2093 def _readtoken(self, name, pos, length): 2094 """Reads a token from the bitstring and returns the result.""" 2095 if length is not None and int(length) > self.length - pos: 2096 raise ReadError("Reading off the end of the data. " 2097 "Tried to read {0} bits when only {1} available.".format(int(length), self.length - pos)) 2098 try: 2099 val = name_to_read[name](self, length, pos) 2100 return val, pos + length 2101 except KeyError: 2102 if name == 'pad': 2103 return None, pos + length 2104 raise ValueError("Can't parse token {0}:{1}".format(name, length)) 2105 except TypeError: 2106 # This is for the 'ue', 'se' and 'bool' tokens. They will also return the new pos. 2107 return name_to_read[name](self, pos) 2108 2109 def _addright(self, bs): 2110 """Add a bitstring to the RHS of the current bitstring.""" 2111 self._datastore._appendstore(bs._datastore) 2112 2113 def _addleft(self, bs): 2114 """Prepend a bitstring to the current bitstring.""" 2115 self._datastore._prependstore(bs._datastore) 2116 2117 def _reverse(self): 2118 """Reverse all bits in-place.""" 2119 # Reverse the contents of each byte 2120 n = [BYTE_REVERSAL_DICT[b] for b in self._datastore.rawbytes] 2121 # Then reverse the order of the bytes 2122 n.reverse() 2123 # The new offset is the number of bits that were unused at the end. 2124 newoffset = 8 - (self._offset + self.len) % 8 2125 if newoffset == 8: 2126 newoffset = 0 2127 self._setbytes_unsafe(bytearray().join(n), self.length, newoffset) 2128 2129 def _truncateleft(self, bits): 2130 """Truncate bits from the start of the bitstring.""" 2131 assert 0 <= bits <= self.len 2132 if not bits: 2133 return Bits() 2134 truncated_bits = self._slice_msb0(0, bits) 2135 if bits == self.len: 2136 self._clear() 2137 return truncated_bits 2138 bytepos, offset = divmod(self._offset + bits, 8) 2139 self._setbytes_unsafe(self._datastore.getbyteslice(bytepos, self._datastore.bytelength), self.len - bits, 2140 offset) 2141 assert self._assertsanity() 2142 return truncated_bits 2143 2144 def _truncateright(self, bits): 2145 """Truncate bits from the end of the bitstring.""" 2146 assert 0 <= bits <= self.len 2147 if not bits: 2148 return Bits() 2149 truncated_bits = self._slice_lsb0(0, bits) 2150 if bits == self.len: 2151 self._clear() 2152 return truncated_bits 2153 newlength_in_bytes = (self._offset + self.len - bits + 7) // 8 2154 self._setbytes_unsafe(self._datastore.getbyteslice(0, newlength_in_bytes), self.len - bits, 2155 self._offset) 2156 assert self._assertsanity() 2157 return truncated_bits 2158 2159 def _insert_lsb0(self, bs, pos): 2160 """Insert bs at pos (LSB0).""" 2161 self._insert_msb0(bs, self.len - pos) 2162 2163 def _insert_msb0(self, bs, pos): 2164 """Insert bs at pos.""" 2165 assert 0 <= pos <= self.len 2166 if pos > self.len // 2: 2167 # Inserting nearer end, so cut off end. 2168 # end = self._slice(pos, self.len) 2169 end = self._truncateright(self.len - pos) 2170 self._addright(bs) 2171 self._addright(end) 2172 else: 2173 # Inserting nearer start, so cut off start. 2174 start = self._slice(0, pos) 2175 self._truncateleft(pos) 2176 self._addleft(bs) 2177 self._addleft(start) 2178 try: 2179 self._pos = pos + bs.len 2180 except AttributeError: 2181 pass 2182 assert self._assertsanity() 2183 2184 def _overwrite_lsb0(self, bs, pos): 2185 """Overwrite with bs at pos (LSB0).""" 2186 self._overwrite_msb0(bs, self.len - pos - bs.len) 2187 2188 def _overwrite_msb0(self, bs, pos): 2189 """Overwrite with bs at pos.""" 2190 assert 0 <= pos < self.len 2191 if bs is self: 2192 # Just overwriting with self, so do nothing. 2193 assert pos == 0 2194 return 2195 firstbytepos = (self._offset + pos) // 8 2196 lastbytepos = (self._offset + pos + bs.len - 1) // 8 2197 bytepos, bitoffset = divmod(self._offset + pos, 8) 2198 if firstbytepos == lastbytepos: 2199 mask = ((1 << bs.len) - 1) << (8 - bs.len - bitoffset) 2200 self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) & (~mask)) 2201 d = offsetcopy(bs._datastore, bitoffset) 2202 self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) | (d.getbyte(0) & mask)) 2203 else: 2204 # Do first byte 2205 mask = (1 << (8 - bitoffset)) - 1 2206 self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) & (~mask)) 2207 d = offsetcopy(bs._datastore, bitoffset) 2208 self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) | (d.getbyte(0) & mask)) 2209 # Now do all the full bytes 2210 self._datastore.setbyteslice(firstbytepos + 1, lastbytepos, d.getbyteslice(1, lastbytepos - firstbytepos)) 2211 # and finally the last byte 2212 bitsleft = (self._offset + pos + bs.len) % 8 2213 if not bitsleft: 2214 bitsleft = 8 2215 mask = (1 << (8 - bitsleft)) - 1 2216 self._datastore.setbyte(lastbytepos, self._datastore.getbyte(lastbytepos) & mask) 2217 self._datastore.setbyte(lastbytepos, 2218 self._datastore.getbyte(lastbytepos) | (d.getbyte(d.bytelength - 1) & ~mask)) 2219 assert self._assertsanity() 2220 2221 def _delete_lsb0(self, bits, pos): 2222 """Delete bits at pos (LSB0).""" 2223 self._delete_msb0(bits, self.len - pos - bits) 2224 2225 def _delete_msb0(self, bits, pos): 2226 """Delete bits at pos.""" 2227 assert 0 <= pos <= self.len 2228 assert pos + bits <= self.len, "pos={}, bits={}, len={}".format(pos, bits, self.len) 2229 if not pos: 2230 # Cutting bits off at the start. 2231 self._truncateleft(bits) 2232 return 2233 if pos + bits == self.len: 2234 # Cutting bits off at the end. 2235 self._truncateright(bits) 2236 return 2237 if pos > self.len - pos - bits: 2238 # More bits before cut point than after it, so do bit shifting 2239 # on the final bits. 2240 end = self._slice_msb0(pos + bits, self.len) 2241 assert self.len - pos > 0 2242 self._truncateright(self.len - pos) 2243 self._addright(end) 2244 return 2245 # More bits after the cut point than before it. 2246 start = self._slice_msb0(0, pos) 2247 self._truncateleft(pos + bits) 2248 self._addleft(start) 2249 return 2250 2251 def _reversebytes(self, start, end): 2252 """Reverse bytes in-place.""" 2253 # Make the start occur on a byte boundary 2254 # TODO: We could be cleverer here to avoid changing the offset. 2255 newoffset = 8 - (start % 8) 2256 if newoffset == 8: 2257 newoffset = 0 2258 self._datastore = offsetcopy(self._datastore, newoffset) 2259 # Now just reverse the byte data 2260 toreverse = bytearray(self._datastore.getbyteslice((newoffset + start) // 8, (newoffset + end) // 8)) 2261 toreverse.reverse() 2262 self._datastore.setbyteslice((newoffset + start) // 8, (newoffset + end) // 8, toreverse) 2263 2264 def _set(self, pos): 2265 """Set bit at pos to 1.""" 2266 assert 0 <= pos < self.len 2267 self._datastore.setbit(pos) 2268 2269 def _unset(self, pos): 2270 """Set bit at pos to 0.""" 2271 assert 0 <= pos < self.len 2272 self._datastore.unsetbit(pos) 2273 2274 def _invert(self, pos): 2275 """Flip bit at pos 1<->0.""" 2276 assert 0 <= pos < self.len 2277 self._datastore.invertbit(pos) 2278 2279 def _invert_all(self): 2280 """Invert every bit.""" 2281 for p in xrange(self._datastore.byteoffset, self._datastore.byteoffset + self._datastore.bytelength): 2282 self._datastore._rawarray[p] = 256 + ~self._datastore._rawarray[p] 2283 2284 def _ilshift(self, n): 2285 """Shift bits by n to the left in place. Return self.""" 2286 assert 0 < n <= self.len 2287 self._addright(Bits(n)) 2288 self._truncateleft(n) 2289 return self 2290 2291 def _irshift(self, n): 2292 """Shift bits by n to the right in place. Return self.""" 2293 assert 0 < n <= self.len 2294 self._addleft(Bits(n)) 2295 self._truncateright(n) 2296 return self 2297 2298 def _imul(self, n): 2299 """Concatenate n copies of self in place. Return self.""" 2300 assert n >= 0 2301 if not n: 2302 self._clear() 2303 return self 2304 m = 1 2305 old_len = self.len 2306 while m * 2 < n: 2307 self._addright(self) 2308 m *= 2 2309 self._addright(self[0:(n - m) * old_len]) 2310 return self 2311 2312 def _inplace_logical_helper(self, bs, f): 2313 """Helper function containing most of the __ior__, __iand__, __ixor__ code.""" 2314 # Give the two bitstrings the same offset (modulo 8) 2315 self_byteoffset, self_bitoffset = divmod(self._offset, 8) 2316 bs_byteoffset, bs_bitoffset = divmod(bs._offset, 8) 2317 if bs_bitoffset != self_bitoffset: 2318 if not self_bitoffset: 2319 bs._datastore = offsetcopy(bs._datastore, 0) 2320 else: 2321 self._datastore = offsetcopy(self._datastore, bs_bitoffset) 2322 a = self._datastore.rawbytes 2323 b = bs._datastore.rawbytes 2324 for i in xrange(len(a)): 2325 a[i] = f(a[i + self_byteoffset], b[i + bs_byteoffset]) 2326 return self 2327 2328 def _ior(self, bs): 2329 return self._inplace_logical_helper(bs, operator.ior) 2330 2331 def _iand(self, bs): 2332 return self._inplace_logical_helper(bs, operator.iand) 2333 2334 def _ixor(self, bs): 2335 return self._inplace_logical_helper(bs, operator.xor) 2336 2337 def _readbits(self, length, start): 2338 """Read some bits from the bitstring and return newly constructed bitstring.""" 2339 return self._slice(start, start + length) 2340 2341 def _validate_slice_msb0(self, start, end): 2342 """Validate start and end and return them as positive bit positions.""" 2343 if start is None: 2344 start = 0 2345 elif start < 0: 2346 start += self.len 2347 if end is None: 2348 end = self.len 2349 elif end < 0: 2350 end += self.len 2351 if not 0 <= end <= self.len: 2352 raise ValueError("end is not a valid position in the bitstring.") 2353 if not 0 <= start <= self.len: 2354 raise ValueError("start is not a valid position in the bitstring.") 2355 if end < start: 2356 raise ValueError("end must not be less than start.") 2357 return start, end 2358 2359 def _validate_slice_lsb0(self, start, end): 2360 start, end = self._validate_slice_msb0(start, end) 2361 return self.len - end, self.len - start 2362 2363 def unpack(self, fmt, **kwargs): 2364 """Interpret the whole bitstring using fmt and return list. 2365 2366 fmt -- A single string or a list of strings with comma separated tokens 2367 describing how to interpret the bits in the bitstring. Items 2368 can also be integers, for reading new bitstring of the given length. 2369 kwargs -- A dictionary or keyword-value pairs - the keywords used in the 2370 format string will be replaced with their given value. 2371 2372 Raises ValueError if the format is not understood. If not enough bits 2373 are available then all bits to the end of the bitstring will be used. 2374 2375 See the docstring for 'read' for token examples. 2376 2377 """ 2378 return self._readlist(fmt, 0, **kwargs)[0] 2379 2380 def _readlist(self, fmt, pos, **kwargs): 2381 tokens = [] 2382 stretchy_token = None 2383 if isinstance(fmt, basestring): 2384 fmt = [fmt] 2385 # Replace integers with 'bits' tokens 2386 for i, f in enumerate(fmt): 2387 if isinstance(f, numbers.Integral): 2388 fmt[i] = "bits:{0}".format(f) 2389 for f_item in fmt: 2390 stretchy, tkns = tokenparser(f_item, tuple(sorted(kwargs.keys()))) 2391 if stretchy: 2392 if stretchy_token: 2393 raise Error("It's not possible to have more than one 'filler' token.") 2394 stretchy_token = stretchy 2395 tokens.extend(tkns) 2396 if not stretchy_token: 2397 lst = [] 2398 for name, length, _ in tokens: 2399 if length in kwargs: 2400 length = kwargs[length] 2401 if name == 'bytes': 2402 length *= 8 2403 if name in kwargs and length is None: 2404 # Using default 'uint' - the name is really the length. 2405 value, pos = self._readtoken('uint', pos, kwargs[name]) 2406 lst.append(value) 2407 continue 2408 value, pos = self._readtoken(name, pos, length) 2409 if value is not None: # Don't append pad tokens 2410 lst.append(value) 2411 return lst, pos 2412 stretchy_token = False 2413 bits_after_stretchy_token = 0 2414 for token in tokens: 2415 name, length, _ = token 2416 if length in kwargs: 2417 length = kwargs[length] 2418 if name == 'bytes': 2419 length *= 8 2420 if name in kwargs and length is None: 2421 # Default 'uint'. 2422 length = kwargs[name] 2423 if stretchy_token: 2424 if name in ('se', 'ue', 'sie', 'uie'): 2425 raise Error("It's not possible to parse a variable" 2426 "length token after a 'filler' token.") 2427 else: 2428 if length is None: 2429 raise Error("It's not possible to have more than " 2430 "one 'filler' token.") 2431 bits_after_stretchy_token += length 2432 if length is None and name not in ('se', 'ue', 'sie', 'uie'): 2433 assert not stretchy_token 2434 stretchy_token = token 2435 bits_left = self.len - pos 2436 return_values = [] 2437 for token in tokens: 2438 name, length, _ = token 2439 if token is stretchy_token: 2440 # Set length to the remaining bits 2441 length = max(bits_left - bits_after_stretchy_token, 0) 2442 if length in kwargs: 2443 length = kwargs[length] 2444 if name == 'bytes': 2445 length *= 8 2446 if name in kwargs and length is None: 2447 # Default 'uint' 2448 length = kwargs[name] 2449 if length is not None: 2450 bits_left -= length 2451 value, pos = self._readtoken(name, pos, length) 2452 if value is not None: 2453 return_values.append(value) 2454 return return_values, pos 2455 2456 def _findbytes(self, bytes_, start, end, bytealigned): 2457 """Quicker version of find when everything's whole byte 2458 and byte aligned. 2459 2460 """ 2461 assert self._datastore.offset == 0 2462 assert bytealigned is True 2463 # Extract data bytes from bitstring to be found. 2464 bytepos = (start + 7) // 8 2465 found = False 2466 p = bytepos 2467 finalpos = end // 8 2468 increment = max(1024, len(bytes_) * 10) 2469 buffersize = increment + len(bytes_) 2470 while p < finalpos: 2471 # Read in file or from memory in overlapping chunks and search the chunks. 2472 buf = bytearray(self._datastore.getbyteslice(p, min(p + buffersize, finalpos))) 2473 pos = buf.find(bytes_) 2474 if pos != -1: 2475 found = True 2476 p += pos 2477 break 2478 p += increment 2479 if not found: 2480 return () 2481 return (p * 8,) 2482 2483 def _findregex(self, reg_ex, start, end, bytealigned): 2484 """Find first occurrence of a compiled regular expression. 2485 2486 Note that this doesn't support arbitrary regexes, in particular they 2487 must match a known length. 2488 2489 """ 2490 p = start 2491 length = len(reg_ex.pattern) 2492 # We grab overlapping chunks of the binary representation and 2493 # do an ordinary string search within that. 2494 increment = max(4096, length * 10) 2495 buffersize = increment + length 2496 while p < end: 2497 buf = self._readbin(min(buffersize, end - p), p) 2498 # Test using regular expressions... 2499 m = reg_ex.search(buf) 2500 if m: 2501 pos = m.start() 2502 # pos = buf.find(targetbin) 2503 # if pos != -1: 2504 # if bytealigned then we only accept byte aligned positions. 2505 if not bytealigned or (p + pos) % 8 == 0: 2506 return (p + pos,) 2507 if bytealigned: 2508 # Advance to just beyond the non-byte-aligned match and try again... 2509 p += pos + 1 2510 continue 2511 p += increment 2512 # Not found, return empty tuple 2513 return () 2514 2515 def find(self, bs, start=None, end=None, bytealigned=None): 2516 """Find first occurrence of substring bs. 2517 2518 Returns a single item tuple with the bit position if found, or an 2519 empty tuple if not found. The bit position (pos property) will 2520 also be set to the start of the substring if it is found. 2521 2522 bs -- The bitstring to find. 2523 start -- The bit position to start the search. Defaults to 0. 2524 end -- The bit position one past the last bit to search. 2525 Defaults to self.len. 2526 bytealigned -- If True the bitstring will only be 2527 found on byte boundaries. 2528 2529 Raises ValueError if bs is empty, if start < 0, if end > self.len or 2530 if end < start. 2531 2532 >>> BitArray('0xc3e').find('0b1111') 2533 (6,) 2534 2535 """ 2536 return self._find(bs, start, end, bytealigned) 2537 2538 def _find_lsb0(self, bs, start=None, end=None, bytealigned=None): 2539 bs = Bits(bs) 2540 start, end = self._validate_slice_lsb0(start, end) 2541 p = self.rfind(bs, start, end, bytealigned) 2542 if p: 2543 return (self.len - p[0] - bs.length,) 2544 2545 def _find_msb0(self, bs, start=None, end=None, bytealigned=None): 2546 bs = Bits(bs) 2547 if not bs.len: 2548 raise ValueError("Cannot find an empty bitstring.") 2549 start, end = self._validate_slice(start, end) 2550 if bytealigned is None: 2551 bytealigned = globals()['bytealigned'] 2552 if bytealigned and not bs.len % 8 and not self._datastore.offset: 2553 p = self._findbytes(bs.bytes, start, end, bytealigned) 2554 else: 2555 p = self._findregex(re.compile(bs._getbin()), start, end, bytealigned) 2556 # If called from a class that has a pos, set it 2557 try: 2558 self._pos = p[0] 2559 except (AttributeError, IndexError): 2560 pass 2561 return p 2562 2563 def findall(self, bs, start=None, end=None, count=None, bytealigned=None): 2564 """Find all occurrences of bs. Return generator of bit positions. 2565 2566 bs -- The bitstring to find. 2567 start -- The bit position to start the search. Defaults to 0. 2568 end -- The bit position one past the last bit to search. 2569 Defaults to self.len. 2570 count -- The maximum number of occurrences to find. 2571 bytealigned -- If True the bitstring will only be found on 2572 byte boundaries. 2573 2574 Raises ValueError if bs is empty, if start < 0, if end > self.len or 2575 if end < start. 2576 2577 Note that all occurrences of bs are found, even if they overlap. 2578 2579 """ 2580 if count is not None and count < 0: 2581 raise ValueError("In findall, count must be >= 0.") 2582 bs = Bits(bs) 2583 start, end = self._validate_slice(start, end) 2584 if bytealigned is None: 2585 bytealigned = globals()['bytealigned'] 2586 c = 0 2587 if bytealigned and not bs.len % 8 and not self._datastore.offset: 2588 # Use the quick find method 2589 f = self._findbytes 2590 x = bs._getbytes() 2591 else: 2592 f = self._findregex 2593 x = re.compile(bs._getbin()) 2594 while True: 2595 2596 p = f(x, start, end, bytealigned) 2597 if not p: 2598 break 2599 if count is not None and c >= count: 2600 return 2601 c += 1 2602 try: 2603 self._pos = p[0] 2604 except AttributeError: 2605 pass 2606 yield p[0] 2607 if bytealigned: 2608 start = p[0] + 8 2609 else: 2610 start = p[0] + 1 2611 if start >= end: 2612 break 2613 return 2614 2615 def rfind(self, bs, start=None, end=None, bytealigned=None): 2616 """Find final occurrence of substring bs. 2617 2618 Returns a single item tuple with the bit position if found, or an 2619 empty tuple if not found. The bit position (pos property) will 2620 also be set to the start of the substring if it is found. 2621 2622 bs -- The bitstring to find. 2623 start -- The bit position to end the reverse search. Defaults to 0. 2624 end -- The bit position one past the first bit to reverse search. 2625 Defaults to self.len. 2626 bytealigned -- If True the bitstring will only be found on byte 2627 boundaries. 2628 2629 Raises ValueError if bs is empty, if start < 0, if end > self.len or 2630 if end < start. 2631 2632 """ 2633 bs = Bits(bs) 2634 start, end = self._validate_slice(start, end) 2635 if bytealigned is None: 2636 bytealigned = globals()['bytealigned'] 2637 if not bs.len: 2638 raise ValueError("Cannot find an empty bitstring.") 2639 # Search chunks starting near the end and then moving back 2640 # until we find bs. 2641 increment = max(8192, bs.len * 80) 2642 buffersize = min(increment + bs.len, end - start) 2643 pos = max(start, end - buffersize) 2644 while True: 2645 found = list(self.findall(bs, start=pos, end=pos + buffersize, 2646 bytealigned=bytealigned)) 2647 if not found: 2648 if pos == start: 2649 return () 2650 pos = max(start, pos - increment) 2651 continue 2652 return (found[-1],) 2653 2654 def cut(self, bits, start=None, end=None, count=None): 2655 """Return bitstring generator by cutting into bits sized chunks. 2656 2657 bits -- The size in bits of the bitstring chunks to generate. 2658 start -- The bit position to start the first cut. Defaults to 0. 2659 end -- The bit position one past the last bit to use in the cut. 2660 Defaults to self.len. 2661 count -- If specified then at most count items are generated. 2662 Default is to cut as many times as possible. 2663 2664 """ 2665 start, end = self._validate_slice(start, end) 2666 if count is not None and count < 0: 2667 raise ValueError("Cannot cut - count must be >= 0.") 2668 if bits <= 0: 2669 raise ValueError("Cannot cut - bits must be >= 0.") 2670 c = 0 2671 while count is None or c < count: 2672 c += 1 2673 nextchunk = self._slice(start, min(start + bits, end)) 2674 if nextchunk.len != bits: 2675 return 2676 assert nextchunk._assertsanity() 2677 yield nextchunk 2678 start += bits 2679 return 2680 2681 def split(self, delimiter, start=None, end=None, count=None, 2682 bytealigned=None): 2683 """Return bitstring generator by splittling using a delimiter. 2684 2685 The first item returned is the initial bitstring before the delimiter, 2686 which may be an empty bitstring. 2687 2688 delimiter -- The bitstring used as the divider. 2689 start -- The bit position to start the split. Defaults to 0. 2690 end -- The bit position one past the last bit to use in the split. 2691 Defaults to self.len. 2692 count -- If specified then at most count items are generated. 2693 Default is to split as many times as possible. 2694 bytealigned -- If True splits will only occur on byte boundaries. 2695 2696 Raises ValueError if the delimiter is empty. 2697 2698 """ 2699 delimiter = Bits(delimiter) 2700 if not delimiter.len: 2701 raise ValueError("split delimiter cannot be empty.") 2702 start, end = self._validate_slice(start, end) 2703 if bytealigned is None: 2704 bytealigned = globals()['bytealigned'] 2705 if count is not None and count < 0: 2706 raise ValueError("Cannot split - count must be >= 0.") 2707 if count == 0: 2708 return 2709 if bytealigned and not delimiter.len % 8 and not self._datastore.offset: 2710 # Use the quick find method 2711 f = self._findbytes 2712 x = delimiter._getbytes() 2713 else: 2714 f = self._findregex 2715 x = re.compile(delimiter._getbin()) 2716 found = f(x, start, end, bytealigned) 2717 if not found: 2718 # Initial bits are the whole bitstring being searched 2719 yield self._slice(start, end) 2720 return 2721 # yield the bytes before the first occurrence of the delimiter, even if empty 2722 yield self._slice(start, found[0]) 2723 startpos = pos = found[0] 2724 c = 1 2725 while count is None or c < count: 2726 pos += delimiter.len 2727 found = f(x, pos, end, bytealigned) 2728 if not found: 2729 # No more occurrences, so return the rest of the bitstring 2730 yield self._slice(startpos, end) 2731 return 2732 c += 1 2733 yield self._slice(startpos, found[0]) 2734 startpos = pos = found[0] 2735 # Have generated count bitstrings, so time to quit. 2736 return 2737 2738 def join(self, sequence): 2739 """Return concatenation of bitstrings joined by self. 2740 2741 sequence -- A sequence of bitstrings. 2742 2743 """ 2744 s = self.__class__() 2745 i = iter(sequence) 2746 try: 2747 s._addright(Bits(next(i))) 2748 while True: 2749 n = next(i) 2750 s._addright(self) 2751 s._addright(Bits(n)) 2752 except StopIteration: 2753 pass 2754 return s 2755 2756 def tobytes(self): 2757 """Return the bitstring as bytes, padding with zero bits if needed. 2758 2759 Up to seven zero bits will be added at the end to byte align. 2760 2761 """ 2762 d = offsetcopy(self._datastore, 0).rawbytes 2763 # Need to ensure that unused bits at end are set to zero 2764 unusedbits = 8 - self.len % 8 2765 if unusedbits != 8: 2766 d[-1] &= (0xff << unusedbits) 2767 return bytes(d) 2768 2769 def tofile(self, f): 2770 """Write the bitstring to a file object, padding with zero bits if needed. 2771 2772 Up to seven zero bits will be added at the end to byte align. 2773 2774 """ 2775 # If the bitstring is file based then we don't want to read it all 2776 # in to memory. 2777 chunksize = 1024 * 1024 # 1 MiB chunks 2778 if not self._offset: 2779 a = 0 2780 bytelen = self._datastore.bytelength 2781 p = self._datastore.getbyteslice(a, min(a + chunksize, bytelen - 1)) 2782 while len(p) == chunksize: 2783 f.write(p) 2784 a += chunksize 2785 p = self._datastore.getbyteslice(a, min(a + chunksize, bytelen - 1)) 2786 f.write(p) 2787 # Now the final byte, ensuring that unused bits at end are set to 0. 2788 bits_in_final_byte = self.len % 8 2789 if not bits_in_final_byte: 2790 bits_in_final_byte = 8 2791 f.write(self[-bits_in_final_byte:].tobytes()) 2792 else: 2793 # Really quite inefficient... 2794 a = 0 2795 b = a + chunksize * 8 2796 while b <= self.len: 2797 f.write(self._slice(a, b)._getbytes()) 2798 a += chunksize * 8 2799 b += chunksize * 8 2800 if a != self.len: 2801 f.write(self._slice(a, self.len).tobytes()) 2802 2803 def startswith(self, prefix, start=None, end=None): 2804 """Return whether the current bitstring starts with prefix. 2805 2806 prefix -- The bitstring to search for. 2807 start -- The bit position to start from. Defaults to 0. 2808 end -- The bit position to end at. Defaults to self.len. 2809 2810 """ 2811 prefix = Bits(prefix) 2812 start, end = self._validate_slice_msb0(start, end) # the _slice deals with msb0/lsb0 2813 if end < start + prefix.len: 2814 return False 2815 end = start + prefix.len 2816 return self._slice(start, end) == prefix 2817 2818 def endswith(self, suffix, start=None, end=None): 2819 """Return whether the current bitstring ends with suffix. 2820 2821 suffix -- The bitstring to search for. 2822 start -- The bit position to start from. Defaults to 0. 2823 end -- The bit position to end at. Defaults to self.len. 2824 2825 """ 2826 suffix = Bits(suffix) 2827 start, end = self._validate_slice(start, end) 2828 if start + suffix.len > end: 2829 return False 2830 start = end - suffix.len 2831 return self._slice(start, end) == suffix 2832 2833 def all(self, value, pos=None): 2834 """Return True if one or many bits are all set to value. 2835 2836 value -- If value is True then checks for bits set to 1, otherwise 2837 checks for bits set to 0. 2838 pos -- An iterable of bit positions. Negative numbers are treated in 2839 the same way as slice indices. Defaults to the whole bitstring. 2840 2841 """ 2842 value = bool(value) 2843 length = self.len 2844 if pos is None: 2845 pos = xrange(self.len) 2846 for p in pos: 2847 if p < 0: 2848 p += length 2849 if not 0 <= p < length: 2850 raise IndexError("Bit position {0} out of range.".format(p)) 2851 if not self._datastore.getbit(p) is value: 2852 return False 2853 return True 2854 2855 def any(self, value, pos=None): 2856 """Return True if any of one or many bits are set to value. 2857 2858 value -- If value is True then checks for bits set to 1, otherwise 2859 checks for bits set to 0. 2860 pos -- An iterable of bit positions. Negative numbers are treated in 2861 the same way as slice indices. Defaults to the whole bitstring. 2862 2863 """ 2864 value = bool(value) 2865 length = self.len 2866 if pos is None: 2867 pos = xrange(self.len) 2868 for p in pos: 2869 if p < 0: 2870 p += length 2871 if not 0 <= p < length: 2872 raise IndexError("Bit position {0} out of range.".format(p)) 2873 if self._datastore.getbit(p) is value: 2874 return True 2875 return False 2876 2877 def count(self, value): 2878 """Return count of total number of either zero or one bits. 2879 2880 value -- If True then bits set to 1 are counted, otherwise bits set 2881 to 0 are counted. 2882 2883 >>> Bits('0xef').count(1) 2884 7 2885 2886 """ 2887 if not self.len: 2888 return 0 2889 # count the number of 1s (from which it's easy to work out the 0s). 2890 # Don't count the final byte yet. 2891 count = sum(BIT_COUNT[self._datastore.getbyte(i)] for i in xrange(self._datastore.bytelength - 1)) 2892 # adjust for bits at start that aren't part of the bitstring 2893 if self._offset: 2894 count -= BIT_COUNT[self._datastore.getbyte(0) >> (8 - self._offset)] 2895 # and count the last 1 - 8 bits at the end. 2896 endbits = self._datastore.bytelength * 8 - (self._offset + self.len) 2897 count += BIT_COUNT[self._datastore.getbyte(self._datastore.bytelength - 1) >> endbits] 2898 return count if value else self.len - count 2899 2900 # Create native-endian functions as aliases depending on the byteorder 2901 if byteorder == 'little': 2902 _setfloatne = _setfloatle 2903 _readfloatne = _readfloatle 2904 _getfloatne = _getfloatle 2905 _setuintne = _setuintle 2906 _readuintne = _readuintle 2907 _getuintne = _getuintle 2908 _setintne = _setintle 2909 _readintne = _readintle 2910 _getintne = _getintle 2911 else: 2912 _setfloatne = _setfloat 2913 _readfloatne = _readfloat 2914 _getfloatne = _getfloat 2915 _setuintne = _setuintbe 2916 _readuintne = _readuintbe 2917 _getuintne = _getuintbe 2918 _setintne = _setintbe 2919 _readintne = _readintbe 2920 _getintne = _getintbe 2921 2922 _offset = property(_getoffset) 2923 2924 len = property(_getlength, 2925 doc="""The length of the bitstring in bits. Read only. 2926 """) 2927 length = property(_getlength, 2928 doc="""The length of the bitstring in bits. Read only. 2929 """) 2930 bool = property(_getbool, 2931 doc="""The bitstring as a bool (True or False). Read only. 2932 """) 2933 hex = property(_gethex, 2934 doc="""The bitstring as a hexadecimal string. Read only. 2935 """) 2936 bin = property(_getbin, 2937 doc="""The bitstring as a binary string. Read only. 2938 """) 2939 oct = property(_getoct, 2940 doc="""The bitstring as an octal string. Read only. 2941 """) 2942 bytes = property(_getbytes, 2943 doc="""The bitstring as a bytes object. Read only. 2944 """) 2945 int = property(_getint, 2946 doc="""The bitstring as a two's complement signed int. Read only. 2947 """) 2948 uint = property(_getuint, 2949 doc="""The bitstring as a two's complement unsigned int. Read only. 2950 """) 2951 float = property(_getfloat, 2952 doc="""The bitstring as a floating point number. Read only. 2953 """) 2954 intbe = property(_getintbe, 2955 doc="""The bitstring as a two's complement big-endian signed int. Read only. 2956 """) 2957 uintbe = property(_getuintbe, 2958 doc="""The bitstring as a two's complement big-endian unsigned int. Read only. 2959 """) 2960 floatbe = property(_getfloat, 2961 doc="""The bitstring as a big-endian floating point number. Read only. 2962 """) 2963 intle = property(_getintle, 2964 doc="""The bitstring as a two's complement little-endian signed int. Read only. 2965 """) 2966 uintle = property(_getuintle, 2967 doc="""The bitstring as a two's complement little-endian unsigned int. Read only. 2968 """) 2969 floatle = property(_getfloatle, 2970 doc="""The bitstring as a little-endian floating point number. Read only. 2971 """) 2972 intne = property(_getintne, 2973 doc="""The bitstring as a two's complement native-endian signed int. Read only. 2974 """) 2975 uintne = property(_getuintne, 2976 doc="""The bitstring as a two's complement native-endian unsigned int. Read only. 2977 """) 2978 floatne = property(_getfloatne, 2979 doc="""The bitstring as a native-endian floating point number. Read only. 2980 """) 2981 ue = property(_getue, 2982 doc="""The bitstring as an unsigned exponential-Golomb code. Read only. 2983 """) 2984 se = property(_getse, 2985 doc="""The bitstring as a signed exponential-Golomb code. Read only. 2986 """) 2987 uie = property(_getuie, 2988 doc="""The bitstring as an unsigned interleaved exponential-Golomb code. Read only. 2989 """) 2990 sie = property(_getsie, 2991 doc="""The bitstring as a signed interleaved exponential-Golomb code. Read only. 2992 """) 2993 2994 2995 2996 2997 2998class BitArray(Bits): 2999 """A container holding a mutable sequence of bits. 3000 3001 Subclass of the immutable Bits class. Inherits all of its 3002 methods (except __hash__) and adds mutating methods. 3003 3004 Mutating methods: 3005 3006 append() -- Append a bitstring. 3007 byteswap() -- Change byte endianness in-place. 3008 insert() -- Insert a bitstring. 3009 invert() -- Flip bit(s) between one and zero. 3010 overwrite() -- Overwrite a section with a new bitstring. 3011 prepend() -- Prepend a bitstring. 3012 replace() -- Replace occurrences of one bitstring with another. 3013 reverse() -- Reverse bits in-place. 3014 rol() -- Rotate bits to the left. 3015 ror() -- Rotate bits to the right. 3016 set() -- Set bit(s) to 1 or 0. 3017 3018 Methods inherited from Bits: 3019 3020 all() -- Check if all specified bits are set to 1 or 0. 3021 any() -- Check if any of specified bits are set to 1 or 0. 3022 count() -- Count the number of bits set to 1 or 0. 3023 cut() -- Create generator of constant sized chunks. 3024 endswith() -- Return whether the bitstring ends with a sub-string. 3025 find() -- Find a sub-bitstring in the current bitstring. 3026 findall() -- Find all occurrences of a sub-bitstring in the current bitstring. 3027 join() -- Join bitstrings together using current bitstring. 3028 rfind() -- Seek backwards to find a sub-bitstring. 3029 split() -- Create generator of chunks split by a delimiter. 3030 startswith() -- Return whether the bitstring starts with a sub-bitstring. 3031 tobytes() -- Return bitstring as bytes, padding if needed. 3032 tofile() -- Write bitstring to file, padding if needed. 3033 unpack() -- Interpret bits using format string. 3034 3035 Special methods: 3036 3037 Mutating operators are available: [], <<=, >>=, +=, *=, &=, |= and ^= 3038 in addition to the inherited [], ==, !=, +, *, ~, <<, >>, &, | and ^. 3039 3040 Properties: 3041 3042 bin -- The bitstring as a binary string. 3043 bool -- For single bit bitstrings, interpret as True or False. 3044 bytepos -- The current byte position in the bitstring. 3045 bytes -- The bitstring as a bytes object. 3046 float -- Interpret as a floating point number. 3047 floatbe -- Interpret as a big-endian floating point number. 3048 floatle -- Interpret as a little-endian floating point number. 3049 floatne -- Interpret as a native-endian floating point number. 3050 hex -- The bitstring as a hexadecimal string. 3051 int -- Interpret as a two's complement signed integer. 3052 intbe -- Interpret as a big-endian signed integer. 3053 intle -- Interpret as a little-endian signed integer. 3054 intne -- Interpret as a native-endian signed integer. 3055 len -- Length of the bitstring in bits. 3056 oct -- The bitstring as an octal string. 3057 pos -- The current bit position in the bitstring. 3058 se -- Interpret as a signed exponential-Golomb code. 3059 ue -- Interpret as an unsigned exponential-Golomb code. 3060 sie -- Interpret as a signed interleaved exponential-Golomb code. 3061 uie -- Interpret as an unsigned interleaved exponential-Golomb code. 3062 uint -- Interpret as a two's complement unsigned integer. 3063 uintbe -- Interpret as a big-endian unsigned integer. 3064 uintle -- Interpret as a little-endian unsigned integer. 3065 uintne -- Interpret as a native-endian unsigned integer. 3066 3067 """ 3068 3069 __slots__ = () 3070 3071 # As BitArray objects are mutable, we shouldn't allow them to be hashed. 3072 __hash__ = None 3073 3074 def __init__(self, auto=None, length=None, offset=None, **kwargs): 3075 """Either specify an 'auto' initialiser: 3076 auto -- a string of comma separated tokens, an integer, a file object, 3077 a bytearray, a boolean iterable or another bitstring. 3078 3079 Or initialise via **kwargs with one (and only one) of: 3080 bytes -- raw data as a string, for example read from a binary file. 3081 bin -- binary string representation, e.g. '0b001010'. 3082 hex -- hexadecimal string representation, e.g. '0x2ef' 3083 oct -- octal string representation, e.g. '0o777'. 3084 uint -- an unsigned integer. 3085 int -- a signed integer. 3086 float -- a floating point number. 3087 uintbe -- an unsigned big-endian whole byte integer. 3088 intbe -- a signed big-endian whole byte integer. 3089 floatbe - a big-endian floating point number. 3090 uintle -- an unsigned little-endian whole byte integer. 3091 intle -- a signed little-endian whole byte integer. 3092 floatle -- a little-endian floating point number. 3093 uintne -- an unsigned native-endian whole byte integer. 3094 intne -- a signed native-endian whole byte integer. 3095 floatne -- a native-endian floating point number. 3096 se -- a signed exponential-Golomb code. 3097 ue -- an unsigned exponential-Golomb code. 3098 sie -- a signed interleaved exponential-Golomb code. 3099 uie -- an unsigned interleaved exponential-Golomb code. 3100 bool -- a boolean (True or False). 3101 filename -- a file which will be opened in binary read-only mode. 3102 3103 Other keyword arguments: 3104 length -- length of the bitstring in bits, if needed and appropriate. 3105 It must be supplied for all integer and float initialisers. 3106 offset -- bit offset to the data. These offset bits are 3107 ignored and this is intended for use when 3108 initialising using 'bytes' or 'filename'. 3109 3110 """ 3111 # For mutable BitArrays we always read in files to memory: 3112 if not isinstance(self._datastore, ByteStore): 3113 self._ensureinmemory() 3114 3115 def __new__(cls, auto=None, length=None, offset=None, **kwargs): 3116 x = super(BitArray, cls).__new__(cls) 3117 y = Bits.__new__(BitArray, auto, length, offset, **kwargs) 3118 x._datastore = ByteStore(y._datastore._rawarray[:], 3119 y._datastore.bitlength, 3120 y._datastore.offset) 3121 return x 3122 3123 def __iadd__(self, bs): 3124 """Append bs to current bitstring. Return self. 3125 3126 bs -- the bitstring to append. 3127 3128 """ 3129 self._append(bs) 3130 return self 3131 3132 def __copy__(self): 3133 """Return a new copy of the BitArray.""" 3134 s_copy = BitArray() 3135 if not isinstance(self._datastore, ByteStore): 3136 # Let them both point to the same (invariant) array. 3137 # If either gets modified then at that point they'll be read into memory. 3138 s_copy._datastore = self._datastore 3139 else: 3140 s_copy._datastore = copy.copy(self._datastore) 3141 return s_copy 3142 3143 def __setitem__(self, key, value): 3144 try: 3145 # A slice 3146 start, step = 0, 1 3147 if key.step is not None: 3148 step = key.step 3149 except AttributeError: 3150 # single element 3151 if key < 0: 3152 key += self.len 3153 if not 0 <= key < self.len: 3154 raise IndexError("Slice index out of range.") 3155 if isinstance(value, numbers.Integral): 3156 if not value: 3157 self._unset(key) 3158 return 3159 if value in (1, -1): 3160 self._set(key) 3161 return 3162 raise ValueError("Cannot set a single bit with integer {0}.".format(value)) 3163 value = Bits(value) 3164 if value.len == 1: 3165 if value[0]: 3166 self._set(key) 3167 else: 3168 self._unset(key) 3169 else: 3170 self._delete(1, key) 3171 self._insert(value, key) 3172 return 3173 else: 3174 if step != 1: 3175 # convert to binary string and use string slicing 3176 # TODO: Horribly inefficient 3177 temp = list(self._getbin()) 3178 v = list(Bits(value)._getbin()) 3179 temp.__setitem__(key, v) 3180 self._setbin_unsafe(''.join(temp)) 3181 return 3182 3183 # If value is an integer then we want to set the slice to that 3184 # value rather than initialise a new bitstring of that length. 3185 if not isinstance(value, numbers.Integral): 3186 try: 3187 value = Bits(value) 3188 except TypeError: 3189 raise TypeError("Bitstring, integer or string expected. " 3190 "Got {0}.".format(type(value))) 3191 if key.start is not None: 3192 start = key.start 3193 if key.start < 0: 3194 start += self.len 3195 if start < 0: 3196 start = 0 3197 stop = self.len 3198 if key.stop is not None: 3199 stop = key.stop 3200 if key.stop < 0: 3201 stop += self.len 3202 if start > stop: 3203 # The standard behaviour for lists is to just insert at the 3204 # start position if stop < start and step == 1. 3205 stop = start 3206 if isinstance(value, numbers.Integral): 3207 if value >= 0: 3208 value = self.__class__(uint=value, length=stop - start) 3209 else: 3210 value = self.__class__(int=value, length=stop - start) 3211 stop = min(stop, self.len) 3212 start = max(start, 0) 3213 start = min(start, stop) 3214 if (stop - start) == value.len: 3215 if not value.len: 3216 return 3217 if step >= 0: 3218 self._overwrite(value, start) 3219 else: 3220 self._overwrite(value.__getitem__(slice(None, None, 1)), start) 3221 else: 3222 # TODO: A delete then insert is wasteful - it could do unneeded shifts. 3223 # Could be either overwrite + insert or overwrite + delete. 3224 self._delete(stop - start, start) 3225 if step >= 0: 3226 self._insert(value, start) 3227 else: 3228 self._insert(value.__getitem__(slice(None, None, 1)), start) 3229 # pos is now after the inserted piece. 3230 return 3231 3232 def __delitem__(self, key): 3233 """Delete item or range. 3234 3235 Indices are in units of the step parameter (default 1 bit). 3236 Stepping is used to specify the number of bits in each item. 3237 3238 >>> a = BitArray('0x001122') 3239 >>> del a[1:2:8] 3240 >>> print a 3241 0x0022 3242 3243 """ 3244 try: 3245 # A slice 3246 start = 0 3247 step = key.step if key.step is not None else 1 3248 except AttributeError: 3249 # single element 3250 if key < 0: 3251 key += self.len 3252 if not 0 <= key < self.len: 3253 raise IndexError("Slice index out of range.") 3254 self._delete(1, key) 3255 return 3256 else: 3257 if step != 1: 3258 # convert to binary string and use string slicing 3259 # TODO: Horribly inefficient 3260 temp = list(self._getbin()) 3261 temp.__delitem__(key) 3262 self._setbin_unsafe(''.join(temp)) 3263 return 3264 if key.start is not None: 3265 start = key.start 3266 if key.start < 0: 3267 start += self.len 3268 if start < 0: 3269 start = 0 3270 stop = self.len 3271 if key.stop is not None: 3272 stop = key.stop 3273 if key.stop < 0: 3274 stop += self.len 3275 if start > stop: 3276 return 3277 stop = min(stop, self.len) 3278 start = max(start, 0) 3279 start = min(start, stop) 3280 self._delete(stop - start, start) 3281 return 3282 3283 def __ilshift__(self, n): 3284 """Shift bits by n to the left in place. Return self. 3285 3286 n -- the number of bits to shift. Must be >= 0. 3287 3288 """ 3289 if n < 0: 3290 raise ValueError("Cannot shift by a negative amount.") 3291 if not self.len: 3292 raise ValueError("Cannot shift an empty bitstring.") 3293 if not n: 3294 return self 3295 n = min(n, self.len) 3296 return self._ilshift(n) 3297 3298 def __irshift__(self, n): 3299 """Shift bits by n to the right in place. Return self. 3300 3301 n -- the number of bits to shift. Must be >= 0. 3302 3303 """ 3304 if n < 0: 3305 raise ValueError("Cannot shift by a negative amount.") 3306 if not self.len: 3307 raise ValueError("Cannot shift an empty bitstring.") 3308 if not n: 3309 return self 3310 n = min(n, self.len) 3311 return self._irshift(n) 3312 3313 def __imul__(self, n): 3314 """Concatenate n copies of self in place. Return self. 3315 3316 Called for expressions of the form 'a *= 3'. 3317 n -- The number of concatenations. Must be >= 0. 3318 3319 """ 3320 if n < 0: 3321 raise ValueError("Cannot multiply by a negative integer.") 3322 return self._imul(n) 3323 3324 def __ior__(self, bs): 3325 bs = Bits(bs) 3326 if self.len != bs.len: 3327 raise ValueError("Bitstrings must have the same length " 3328 "for |= operator.") 3329 return self._ior(bs) 3330 3331 def __iand__(self, bs): 3332 bs = Bits(bs) 3333 if self.len != bs.len: 3334 raise ValueError("Bitstrings must have the same length " 3335 "for &= operator.") 3336 return self._iand(bs) 3337 3338 def __ixor__(self, bs): 3339 bs = Bits(bs) 3340 if self.len != bs.len: 3341 raise ValueError("Bitstrings must have the same length " 3342 "for ^= operator.") 3343 return self._ixor(bs) 3344 3345 def replace(self, old, new, start=None, end=None, count=None, 3346 bytealigned=None): 3347 """Replace all occurrences of old with new in place. 3348 3349 Returns number of replacements made. 3350 3351 old -- The bitstring to replace. 3352 new -- The replacement bitstring. 3353 start -- Any occurrences that start before this will not be replaced. 3354 Defaults to 0. 3355 end -- Any occurrences that finish after this will not be replaced. 3356 Defaults to self.len. 3357 count -- The maximum number of replacements to make. Defaults to 3358 replace all occurrences. 3359 bytealigned -- If True replacements will only be made on byte 3360 boundaries. 3361 3362 Raises ValueError if old is empty or if start or end are 3363 out of range. 3364 3365 """ 3366 old = Bits(old) 3367 new = Bits(new) 3368 if not old.len: 3369 raise ValueError("Empty bitstring cannot be replaced.") 3370 start, end = self._validate_slice(start, end) 3371 if bytealigned is None: 3372 bytealigned = globals()['bytealigned'] 3373 # Adjust count for use in split() 3374 if count is not None: 3375 count += 1 3376 sections = self.split(old, start, end, count, bytealigned) 3377 lengths = [s.len for s in sections] 3378 if len(lengths) == 1: 3379 # Didn't find anything to replace. 3380 return 0 # no replacements done 3381 if new is self: 3382 # Prevent self assignment woes 3383 new = copy.copy(self) 3384 positions = [lengths[0] + start] 3385 for l in lengths[1:-1]: 3386 # Next position is the previous one plus the length of the next section. 3387 positions.append(positions[-1] + l) 3388 # We have all the positions that need replacements. We do them 3389 # in reverse order so that they won't move around as we replace. 3390 positions.reverse() 3391 try: 3392 # Need to calculate new pos, if this is a bitstream 3393 newpos = self._pos 3394 for p in positions: 3395 self[p:p + old.len] = new 3396 if old.len != new.len: 3397 diff = new.len - old.len 3398 for p in positions: 3399 if p >= newpos: 3400 continue 3401 if p + old.len <= newpos: 3402 newpos += diff 3403 else: 3404 newpos = p 3405 self._pos = newpos 3406 except AttributeError: 3407 for p in positions: 3408 self[p:p + old.len] = new 3409 assert self._assertsanity() 3410 return len(lengths) - 1 3411 3412 def insert(self, bs, pos=None): 3413 """Insert bs at bit position pos. 3414 3415 bs -- The bitstring to insert. 3416 pos -- The bit position to insert at. 3417 3418 Raises ValueError if pos < 0 or pos > self.len. 3419 3420 """ 3421 bs = Bits(bs) 3422 if not bs.len: 3423 return self 3424 if bs is self: 3425 bs = self.__copy__() 3426 if pos is None: 3427 try: 3428 pos = self._pos 3429 except AttributeError: 3430 raise TypeError("insert needs a bit position specified when used on a BitArray.") 3431 if pos < 0: 3432 pos += self.len 3433 if not 0 <= pos <= self.len: 3434 raise ValueError("Invalid insert position.") 3435 self._insert(bs, pos) 3436 3437 def overwrite(self, bs, pos=None): 3438 """Overwrite with bs at bit position pos. 3439 3440 bs -- The bitstring to overwrite with. 3441 pos -- The bit position to begin overwriting from. 3442 3443 Raises ValueError if pos < 0 or pos + bs.len > self.len 3444 3445 """ 3446 bs = Bits(bs) 3447 if not bs.len: 3448 return 3449 if pos is None: 3450 try: 3451 pos = self._pos 3452 except AttributeError: 3453 raise TypeError("overwrite needs a bit position specified when used on a BitArray.") 3454 if pos < 0: 3455 pos += self.len 3456 if pos < 0 or pos + bs.len > self.len: 3457 raise ValueError("Overwrite exceeds boundary of bitstring.") 3458 self._overwrite(bs, pos) 3459 try: 3460 self._pos = pos + bs.len 3461 except AttributeError: 3462 pass 3463 3464 def append(self, bs): 3465 """Append a bitstring to the current bitstring. 3466 3467 bs -- The bitstring to append. 3468 3469 """ 3470 self._append(bs) 3471 3472 def prepend(self, bs): 3473 """Prepend a bitstring to the current bitstring. 3474 3475 bs -- The bitstring to prepend. 3476 3477 """ 3478 self._prepend(bs) 3479 3480 def _append_msb0(self, bs): 3481 # The offset is a hint to make bs easily appendable. 3482 bs = self._converttobitstring(bs, offset=(self.len + self._offset) % 8) 3483 self._addright(bs) 3484 3485 def _append_lsb0(self, bs): 3486 bs = Bits(bs) 3487 self._addleft(bs) 3488 3489 def reverse(self, start=None, end=None): 3490 """Reverse bits in-place. 3491 3492 start -- Position of first bit to reverse. Defaults to 0. 3493 end -- One past the position of the last bit to reverse. 3494 Defaults to self.len. 3495 3496 Using on an empty bitstring will have no effect. 3497 3498 Raises ValueError if start < 0, end > self.len or end < start. 3499 3500 """ 3501 start, end = self._validate_slice(start, end) 3502 if start == 0 and end == self.len: 3503 self._reverse() 3504 return 3505 s = self._slice(start, end) 3506 s._reverse() 3507 self[start:end] = s 3508 3509 def set(self, value, pos=None): 3510 """Set one or many bits to 1 or 0. 3511 3512 value -- If True bits are set to 1, otherwise they are set to 0. 3513 pos -- Either a single bit position or an iterable of bit positions. 3514 Negative numbers are treated in the same way as slice indices. 3515 Defaults to the entire bitstring. 3516 3517 Raises IndexError if pos < -self.len or pos >= self.len. 3518 3519 """ 3520 f = self._set if value else self._unset 3521 if pos is None: 3522 pos = xrange(self.len) 3523 try: 3524 length = self.len 3525 for p in pos: 3526 if p < 0: 3527 p += length 3528 if not 0 <= p < length: 3529 raise IndexError("Bit position {0} out of range.".format(p)) 3530 f(p) 3531 except TypeError: 3532 # Single pos 3533 if pos < 0: 3534 pos += self.len 3535 if not 0 <= pos < length: 3536 raise IndexError("Bit position {0} out of range.".format(pos)) 3537 f(pos) 3538 3539 def invert(self, pos=None): 3540 """Invert one or many bits from 0 to 1 or vice versa. 3541 3542 pos -- Either a single bit position or an iterable of bit positions. 3543 Negative numbers are treated in the same way as slice indices. 3544 3545 Raises IndexError if pos < -self.len or pos >= self.len. 3546 3547 """ 3548 if pos is None: 3549 self._invert_all() 3550 return 3551 if not isinstance(pos, collectionsAbc.Iterable): 3552 pos = (pos,) 3553 length = self.len 3554 3555 for p in pos: 3556 if p < 0: 3557 p += length 3558 if not 0 <= p < length: 3559 raise IndexError("Bit position {0} out of range.".format(p)) 3560 self._invert(p) 3561 3562 def ror(self, bits, start=None, end=None): 3563 """Rotate bits to the right in-place. 3564 3565 bits -- The number of bits to rotate by. 3566 start -- Start of slice to rotate. Defaults to 0. 3567 end -- End of slice to rotate. Defaults to self.len. 3568 3569 Raises ValueError if bits < 0. 3570 3571 """ 3572 if not self.len: 3573 raise Error("Cannot rotate an empty bitstring.") 3574 if bits < 0: 3575 raise ValueError("Cannot rotate by negative amount.") 3576 self._ror(bits, start, end) 3577 3578 def _ror_msb0(self, bits, start=None, end=None): 3579 start, end = self._validate_slice_msb0(start, end) # the _slice deals with msb0/lsb0 3580 bits %= (end - start) 3581 if not bits: 3582 return 3583 rhs = self._slice(end - bits, end) 3584 self._delete(bits, end - bits) 3585 self._insert(rhs, start) 3586 3587 def rol(self, bits, start=None, end=None): 3588 """Rotate bits to the left in-place. 3589 3590 bits -- The number of bits to rotate by. 3591 start -- Start of slice to rotate. Defaults to 0. 3592 end -- End of slice to rotate. Defaults to self.len. 3593 3594 Raises ValueError if bits < 0. 3595 3596 """ 3597 if not self.len: 3598 raise Error("Cannot rotate an empty bitstring.") 3599 if bits < 0: 3600 raise ValueError("Cannot rotate by negative amount.") 3601 self._rol(bits, start, end) 3602 3603 def _rol_msb0(self, bits, start=None, end=None): 3604 start, end = self._validate_slice_msb0(start, end) 3605 bits %= (end - start) 3606 if not bits: 3607 return 3608 lhs = self._slice(start, start + bits) 3609 self._delete(bits, start) 3610 self._insert(lhs, end - bits) 3611 3612 def byteswap(self, fmt=None, start=None, end=None, repeat=True): 3613 """Change the endianness in-place. Return number of repeats of fmt done. 3614 3615 fmt -- A compact structure string, an integer number of bytes or 3616 an iterable of integers. Defaults to 0, which byte reverses the 3617 whole bitstring. 3618 start -- Start bit position, defaults to 0. 3619 end -- End bit position, defaults to self.len. 3620 repeat -- If True (the default) the byte swapping pattern is repeated 3621 as much as possible. 3622 3623 """ 3624 start, end = self._validate_slice(start, end) 3625 if fmt is None or fmt == 0: 3626 # reverse all of the whole bytes. 3627 bytesizes = [(end - start) // 8] 3628 elif isinstance(fmt, numbers.Integral): 3629 if fmt < 0: 3630 raise ValueError("Improper byte length {0}.".format(fmt)) 3631 bytesizes = [fmt] 3632 elif isinstance(fmt, basestring): 3633 m = STRUCT_PACK_RE.match(fmt) 3634 if not m: 3635 raise ValueError("Cannot parse format string {0}.".format(fmt)) 3636 # Split the format string into a list of 'q', '4h' etc. 3637 formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt')) 3638 # Now deal with multiplicative factors, 4h -> hhhh etc. 3639 bytesizes = [] 3640 for f in formatlist: 3641 if len(f) == 1: 3642 bytesizes.append(PACK_CODE_SIZE[f]) 3643 else: 3644 bytesizes.extend([PACK_CODE_SIZE[f[-1]]] * int(f[:-1])) 3645 elif isinstance(fmt, collectionsAbc.Iterable): 3646 bytesizes = fmt 3647 for bytesize in bytesizes: 3648 if not isinstance(bytesize, numbers.Integral) or bytesize < 0: 3649 raise ValueError("Improper byte length {0}.".format(bytesize)) 3650 else: 3651 raise TypeError("Format must be an integer, string or iterable.") 3652 3653 repeats = 0 3654 totalbitsize = 8 * sum(bytesizes) 3655 if not totalbitsize: 3656 return 0 3657 if repeat: 3658 # Try to repeat up to the end of the bitstring. 3659 finalbit = end 3660 else: 3661 # Just try one (set of) byteswap(s). 3662 finalbit = start + totalbitsize 3663 for patternend in xrange(start + totalbitsize, finalbit + 1, totalbitsize): 3664 bytestart = patternend - totalbitsize 3665 for bytesize in bytesizes: 3666 byteend = bytestart + bytesize * 8 3667 self._reversebytes(bytestart, byteend) 3668 bytestart += bytesize * 8 3669 repeats += 1 3670 return repeats 3671 3672 def clear(self): 3673 """Remove all bits, reset to zero length.""" 3674 self._clear() 3675 3676 def copy(self): 3677 """Return a copy of the bitstring.""" 3678 return self._copy() 3679 3680 int = property(Bits._getint, Bits._setint, 3681 doc="""The bitstring as a two's complement signed int. Read and write. 3682 """) 3683 uint = property(Bits._getuint, Bits._setuint, 3684 doc="""The bitstring as a two's complement unsigned int. Read and write. 3685 """) 3686 float = property(Bits._getfloat, Bits._setfloat, 3687 doc="""The bitstring as a floating point number. Read and write. 3688 """) 3689 intbe = property(Bits._getintbe, Bits._setintbe, 3690 doc="""The bitstring as a two's complement big-endian signed int. Read and write. 3691 """) 3692 uintbe = property(Bits._getuintbe, Bits._setuintbe, 3693 doc="""The bitstring as a two's complement big-endian unsigned int. Read and write. 3694 """) 3695 floatbe = property(Bits._getfloat, Bits._setfloat, 3696 doc="""The bitstring as a big-endian floating point number. Read and write. 3697 """) 3698 intle = property(Bits._getintle, Bits._setintle, 3699 doc="""The bitstring as a two's complement little-endian signed int. Read and write. 3700 """) 3701 uintle = property(Bits._getuintle, Bits._setuintle, 3702 doc="""The bitstring as a two's complement little-endian unsigned int. Read and write. 3703 """) 3704 floatle = property(Bits._getfloatle, Bits._setfloatle, 3705 doc="""The bitstring as a little-endian floating point number. Read and write. 3706 """) 3707 intne = property(Bits._getintne, Bits._setintne, 3708 doc="""The bitstring as a two's complement native-endian signed int. Read and write. 3709 """) 3710 uintne = property(Bits._getuintne, Bits._setuintne, 3711 doc="""The bitstring as a two's complement native-endian unsigned int. Read and write. 3712 """) 3713 floatne = property(Bits._getfloatne, Bits._setfloatne, 3714 doc="""The bitstring as a native-endian floating point number. Read and write. 3715 """) 3716 ue = property(Bits._getue, Bits._setue, 3717 doc="""The bitstring as an unsigned exponential-Golomb code. Read and write. 3718 """) 3719 se = property(Bits._getse, Bits._setse, 3720 doc="""The bitstring as a signed exponential-Golomb code. Read and write. 3721 """) 3722 uie = property(Bits._getuie, Bits._setuie, 3723 doc="""The bitstring as an unsigned interleaved exponential-Golomb code. Read and write. 3724 """) 3725 sie = property(Bits._getsie, Bits._setsie, 3726 doc="""The bitstring as a signed interleaved exponential-Golomb code. Read and write. 3727 """) 3728 hex = property(Bits._gethex, Bits._sethex, 3729 doc="""The bitstring as a hexadecimal string. Read and write. 3730 """) 3731 bin = property(Bits._getbin, Bits._setbin_safe, 3732 doc="""The bitstring as a binary string. Read and write. 3733 """) 3734 oct = property(Bits._getoct, Bits._setoct, 3735 doc="""The bitstring as an octal string. Read and write. 3736 """) 3737 bool = property(Bits._getbool, Bits._setbool, 3738 doc="""The bitstring as a bool (True or False). Read and write. 3739 """) 3740 bytes = property(Bits._getbytes, Bits._setbytes_safe, 3741 doc="""The bitstring as a ordinary string. Read and write. 3742 """) 3743 3744 3745 3746class ConstBitStream(Bits): 3747 """A container or stream holding an immutable sequence of bits. 3748 3749 For a mutable container use the BitStream class instead. 3750 3751 Methods inherited from Bits: 3752 3753 all() -- Check if all specified bits are set to 1 or 0. 3754 any() -- Check if any of specified bits are set to 1 or 0. 3755 count() -- Count the number of bits set to 1 or 0. 3756 cut() -- Create generator of constant sized chunks. 3757 endswith() -- Return whether the bitstring ends with a sub-string. 3758 find() -- Find a sub-bitstring in the current bitstring. 3759 findall() -- Find all occurrences of a sub-bitstring in the current bitstring. 3760 join() -- Join bitstrings together using current bitstring. 3761 rfind() -- Seek backwards to find a sub-bitstring. 3762 split() -- Create generator of chunks split by a delimiter. 3763 startswith() -- Return whether the bitstring starts with a sub-bitstring. 3764 tobytes() -- Return bitstring as bytes, padding if needed. 3765 tofile() -- Write bitstring to file, padding if needed. 3766 unpack() -- Interpret bits using format string. 3767 3768 Other methods: 3769 3770 bytealign() -- Align to next byte boundary. 3771 peek() -- Peek at and interpret next bits as a single item. 3772 peeklist() -- Peek at and interpret next bits as a list of items. 3773 read() -- Read and interpret next bits as a single item. 3774 readlist() -- Read and interpret next bits as a list of items. 3775 3776 Special methods: 3777 3778 Also available are the operators [], ==, !=, +, *, ~, <<, >>, &, |, ^. 3779 3780 Properties: 3781 3782 bin -- The bitstring as a binary string. 3783 bool -- For single bit bitstrings, interpret as True or False. 3784 bytepos -- The current byte position in the bitstring. 3785 bytes -- The bitstring as a bytes object. 3786 float -- Interpret as a floating point number. 3787 floatbe -- Interpret as a big-endian floating point number. 3788 floatle -- Interpret as a little-endian floating point number. 3789 floatne -- Interpret as a native-endian floating point number. 3790 hex -- The bitstring as a hexadecimal string. 3791 int -- Interpret as a two's complement signed integer. 3792 intbe -- Interpret as a big-endian signed integer. 3793 intle -- Interpret as a little-endian signed integer. 3794 intne -- Interpret as a native-endian signed integer. 3795 len -- Length of the bitstring in bits. 3796 oct -- The bitstring as an octal string. 3797 pos -- The current bit position in the bitstring. 3798 se -- Interpret as a signed exponential-Golomb code. 3799 ue -- Interpret as an unsigned exponential-Golomb code. 3800 sie -- Interpret as a signed interleaved exponential-Golomb code. 3801 uie -- Interpret as an unsigned interleaved exponential-Golomb code. 3802 uint -- Interpret as a two's complement unsigned integer. 3803 uintbe -- Interpret as a big-endian unsigned integer. 3804 uintle -- Interpret as a little-endian unsigned integer. 3805 uintne -- Interpret as a native-endian unsigned integer. 3806 3807 """ 3808 3809 __slots__ = ('_pos',) 3810 3811 def __init__(self, auto=None, length=None, offset=None, pos=0, **kwargs): 3812 """Either specify an 'auto' initialiser: 3813 auto -- a string of comma separated tokens, an integer, a file object, 3814 a bytearray, a boolean iterable or another bitstring. 3815 3816 Or initialise via **kwargs with one (and only one) of: 3817 bytes -- raw data as a string, for example read from a binary file. 3818 bin -- binary string representation, e.g. '0b001010'. 3819 hex -- hexadecimal string representation, e.g. '0x2ef' 3820 oct -- octal string representation, e.g. '0o777'. 3821 uint -- an unsigned integer. 3822 int -- a signed integer. 3823 float -- a floating point number. 3824 uintbe -- an unsigned big-endian whole byte integer. 3825 intbe -- a signed big-endian whole byte integer. 3826 floatbe - a big-endian floating point number. 3827 uintle -- an unsigned little-endian whole byte integer. 3828 intle -- a signed little-endian whole byte integer. 3829 floatle -- a little-endian floating point number. 3830 uintne -- an unsigned native-endian whole byte integer. 3831 intne -- a signed native-endian whole byte integer. 3832 floatne -- a native-endian floating point number. 3833 se -- a signed exponential-Golomb code. 3834 ue -- an unsigned exponential-Golomb code. 3835 sie -- a signed interleaved exponential-Golomb code. 3836 uie -- an unsigned interleaved exponential-Golomb code. 3837 bool -- a boolean (True or False). 3838 filename -- a file which will be opened in binary read-only mode. 3839 3840 Other keyword arguments: 3841 length -- length of the bitstring in bits, if needed and appropriate. 3842 It must be supplied for all integer and float initialisers. 3843 offset -- bit offset to the data. These offset bits are 3844 ignored and this is intended for use when 3845 initialising using 'bytes' or 'filename'. 3846 pos -- Initial bit position, defaults to 0. 3847 3848 """ 3849 pass 3850 3851 def __new__(cls, auto=None, length=None, offset=None, pos=0, **kwargs): 3852 x = super(ConstBitStream, cls).__new__(cls) 3853 x._initialise(auto, length, offset, **kwargs) 3854 x._pos = x._datastore.bitlength + pos if pos < 0 else pos 3855 if x._pos < 0 or x._pos > x._datastore.bitlength: 3856 raise CreationError("Cannot set pos to {0} when length is {1}".format(pos, x._datastore.bitlength)) 3857 return x 3858 3859 def _setbytepos(self, bytepos): 3860 """Move to absolute byte-aligned position in stream.""" 3861 self._setbitpos(bytepos * 8) 3862 3863 def _getbytepos(self): 3864 """Return the current position in the stream in bytes. Must be byte aligned.""" 3865 if self._pos % 8: 3866 raise ByteAlignError("Not byte aligned in _getbytepos().") 3867 return self._pos // 8 3868 3869 def _setbitpos(self, pos): 3870 """Move to absolute position bit in bitstream.""" 3871 if pos < 0: 3872 raise ValueError("Bit position cannot be negative.") 3873 if pos > self.len: 3874 raise ValueError("Cannot seek past the end of the data.") 3875 self._pos = pos 3876 3877 def _getbitpos(self): 3878 """Return the current position in the stream in bits.""" 3879 return self._pos 3880 3881 def _clear(self): 3882 Bits._clear(self) 3883 self._pos = 0 3884 3885 def __copy__(self): 3886 """Return a new copy of the ConstBitStream for the copy module.""" 3887 # Note that if you want a new copy (different ID), use _copy instead. 3888 # The copy can use the same datastore as it's immutable. 3889 s = ConstBitStream() 3890 s._datastore = self._datastore 3891 # Reset the bit position, don't copy it. 3892 s._pos = 0 3893 return s 3894 3895 def __add__(self, bs): 3896 """Concatenate bitstrings and return new bitstring. 3897 3898 bs -- the bitstring to append. 3899 3900 """ 3901 s = Bits.__add__(self, bs) 3902 s._pos = 0 3903 return s 3904 3905 def read(self, fmt): 3906 """Interpret next bits according to the format string and return result. 3907 3908 fmt -- Token string describing how to interpret the next bits. 3909 3910 Token examples: 'int:12' : 12 bits as a signed integer 3911 'uint:8' : 8 bits as an unsigned integer 3912 'float:64' : 8 bytes as a big-endian float 3913 'intbe:16' : 2 bytes as a big-endian signed integer 3914 'uintbe:16' : 2 bytes as a big-endian unsigned integer 3915 'intle:32' : 4 bytes as a little-endian signed integer 3916 'uintle:32' : 4 bytes as a little-endian unsigned integer 3917 'floatle:64': 8 bytes as a little-endian float 3918 'intne:24' : 3 bytes as a native-endian signed integer 3919 'uintne:24' : 3 bytes as a native-endian unsigned integer 3920 'floatne:32': 4 bytes as a native-endian float 3921 'hex:80' : 80 bits as a hex string 3922 'oct:9' : 9 bits as an octal string 3923 'bin:1' : single bit binary string 3924 'ue' : next bits as unsigned exp-Golomb code 3925 'se' : next bits as signed exp-Golomb code 3926 'uie' : next bits as unsigned interleaved exp-Golomb code 3927 'sie' : next bits as signed interleaved exp-Golomb code 3928 'bits:5' : 5 bits as a bitstring 3929 'bytes:10' : 10 bytes as a bytes object 3930 'bool' : 1 bit as a bool 3931 'pad:3' : 3 bits of padding to ignore - returns None 3932 3933 fmt may also be an integer, which will be treated like the 'bits' token. 3934 3935 The position in the bitstring is advanced to after the read items. 3936 3937 Raises ReadError if not enough bits are available. 3938 Raises ValueError if the format is not understood. 3939 3940 """ 3941 if isinstance(fmt, numbers.Integral): 3942 if fmt < 0: 3943 raise ValueError("Cannot read negative amount.") 3944 if fmt > self.len - self._pos: 3945 raise ReadError("Cannot read {0} bits, only {1} available.", 3946 fmt, self.len - self._pos) 3947 bs = self._slice(self._pos, self._pos + fmt) 3948 self._pos += fmt 3949 return bs 3950 p = self._pos 3951 _, token = tokenparser(fmt) 3952 if len(token) != 1: 3953 self._pos = p 3954 raise ValueError("Format string should be a single token, not {0} " 3955 "tokens - use readlist() instead.".format(len(token))) 3956 name, length, _ = token[0] 3957 if length is None: 3958 length = self.len - self._pos 3959 value, self._pos = self._readtoken(name, self._pos, length) 3960 return value 3961 3962 def readlist(self, fmt, **kwargs): 3963 """Interpret next bits according to format string(s) and return list. 3964 3965 fmt -- A single string or list of strings with comma separated tokens 3966 describing how to interpret the next bits in the bitstring. Items 3967 can also be integers, for reading new bitstring of the given length. 3968 kwargs -- A dictionary or keyword-value pairs - the keywords used in the 3969 format string will be replaced with their given value. 3970 3971 The position in the bitstring is advanced to after the read items. 3972 3973 Raises ReadError is not enough bits are available. 3974 Raises ValueError if the format is not understood. 3975 3976 See the docstring for 'read' for token examples. 'pad' tokens are skipped 3977 and not added to the returned list. 3978 3979 >>> h, b1, b2 = s.readlist('hex:20, bin:5, bin:3') 3980 >>> i, bs1, bs2 = s.readlist(['uint:12', 10, 10]) 3981 3982 """ 3983 value, self._pos = self._readlist(fmt, self._pos, **kwargs) 3984 return value 3985 3986 def readto(self, bs, bytealigned=None): 3987 """Read up to and including next occurrence of bs and return result. 3988 3989 bs -- The bitstring to find. An integer is not permitted. 3990 bytealigned -- If True the bitstring will only be 3991 found on byte boundaries. 3992 3993 Raises ValueError if bs is empty. 3994 Raises ReadError if bs is not found. 3995 3996 """ 3997 if isinstance(bs, numbers.Integral): 3998 raise ValueError("Integers cannot be searched for") 3999 bs = Bits(bs) 4000 oldpos = self._pos 4001 p = self.find(bs, self._pos, bytealigned=bytealigned) 4002 if not p: 4003 raise ReadError("Substring not found") 4004 self._pos += bs.len 4005 return self._slice(oldpos, self._pos) 4006 4007 def peek(self, fmt): 4008 """Interpret next bits according to format string and return result. 4009 4010 fmt -- Token string describing how to interpret the next bits. 4011 4012 The position in the bitstring is not changed. If not enough bits are 4013 available then all bits to the end of the bitstring will be used. 4014 4015 Raises ReadError if not enough bits are available. 4016 Raises ValueError if the format is not understood. 4017 4018 See the docstring for 'read' for token examples. 4019 4020 """ 4021 pos_before = self._pos 4022 value = self.read(fmt) 4023 self._pos = pos_before 4024 return value 4025 4026 def peeklist(self, fmt, **kwargs): 4027 """Interpret next bits according to format string(s) and return list. 4028 4029 fmt -- One or more strings with comma separated tokens describing 4030 how to interpret the next bits in the bitstring. 4031 kwargs -- A dictionary or keyword-value pairs - the keywords used in the 4032 format string will be replaced with their given value. 4033 4034 The position in the bitstring is not changed. If not enough bits are 4035 available then all bits to the end of the bitstring will be used. 4036 4037 Raises ReadError if not enough bits are available. 4038 Raises ValueError if the format is not understood. 4039 4040 See the docstring for 'read' for token examples. 4041 4042 """ 4043 pos = self._pos 4044 return_values = self.readlist(fmt, **kwargs) 4045 self._pos = pos 4046 return return_values 4047 4048 def bytealign(self): 4049 """Align to next byte and return number of skipped bits. 4050 4051 Raises ValueError if the end of the bitstring is reached before 4052 aligning to the next byte. 4053 4054 """ 4055 skipped = (8 - (self._pos % 8)) % 8 4056 self.pos += self._offset + skipped 4057 assert self._assertsanity() 4058 return skipped 4059 4060 pos = property(_getbitpos, _setbitpos, 4061 doc="""The position in the bitstring in bits. Read and write. 4062 """) 4063 bitpos = property(_getbitpos, _setbitpos, 4064 doc="""The position in the bitstring in bits. Read and write. 4065 """) 4066 bytepos = property(_getbytepos, _setbytepos, 4067 doc="""The position in the bitstring in bytes. Read and write. 4068 """) 4069 4070 4071class BitStream(ConstBitStream, BitArray): 4072 """A container or stream holding a mutable sequence of bits 4073 4074 Subclass of the ConstBitStream and BitArray classes. Inherits all of 4075 their methods. 4076 4077 Methods: 4078 4079 all() -- Check if all specified bits are set to 1 or 0. 4080 any() -- Check if any of specified bits are set to 1 or 0. 4081 append() -- Append a bitstring. 4082 bytealign() -- Align to next byte boundary. 4083 byteswap() -- Change byte endianness in-place. 4084 count() -- Count the number of bits set to 1 or 0. 4085 cut() -- Create generator of constant sized chunks. 4086 endswith() -- Return whether the bitstring ends with a sub-string. 4087 find() -- Find a sub-bitstring in the current bitstring. 4088 findall() -- Find all occurrences of a sub-bitstring in the current bitstring. 4089 insert() -- Insert a bitstring. 4090 invert() -- Flip bit(s) between one and zero. 4091 join() -- Join bitstrings together using current bitstring. 4092 overwrite() -- Overwrite a section with a new bitstring. 4093 peek() -- Peek at and interpret next bits as a single item. 4094 peeklist() -- Peek at and interpret next bits as a list of items. 4095 prepend() -- Prepend a bitstring. 4096 read() -- Read and interpret next bits as a single item. 4097 readlist() -- Read and interpret next bits as a list of items. 4098 replace() -- Replace occurrences of one bitstring with another. 4099 reverse() -- Reverse bits in-place. 4100 rfind() -- Seek backwards to find a sub-bitstring. 4101 rol() -- Rotate bits to the left. 4102 ror() -- Rotate bits to the right. 4103 set() -- Set bit(s) to 1 or 0. 4104 split() -- Create generator of chunks split by a delimiter. 4105 startswith() -- Return whether the bitstring starts with a sub-bitstring. 4106 tobytes() -- Return bitstring as bytes, padding if needed. 4107 tofile() -- Write bitstring to file, padding if needed. 4108 unpack() -- Interpret bits using format string. 4109 4110 Special methods: 4111 4112 Mutating operators are available: [], <<=, >>=, +=, *=, &=, |= and ^= 4113 in addition to [], ==, !=, +, *, ~, <<, >>, &, | and ^. 4114 4115 Properties: 4116 4117 bin -- The bitstring as a binary string. 4118 bool -- For single bit bitstrings, interpret as True or False. 4119 bytepos -- The current byte position in the bitstring. 4120 bytes -- The bitstring as a bytes object. 4121 float -- Interpret as a floating point number. 4122 floatbe -- Interpret as a big-endian floating point number. 4123 floatle -- Interpret as a little-endian floating point number. 4124 floatne -- Interpret as a native-endian floating point number. 4125 hex -- The bitstring as a hexadecimal string. 4126 int -- Interpret as a two's complement signed integer. 4127 intbe -- Interpret as a big-endian signed integer. 4128 intle -- Interpret as a little-endian signed integer. 4129 intne -- Interpret as a native-endian signed integer. 4130 len -- Length of the bitstring in bits. 4131 oct -- The bitstring as an octal string. 4132 pos -- The current bit position in the bitstring. 4133 se -- Interpret as a signed exponential-Golomb code. 4134 ue -- Interpret as an unsigned exponential-Golomb code. 4135 sie -- Interpret as a signed interleaved exponential-Golomb code. 4136 uie -- Interpret as an unsigned interleaved exponential-Golomb code. 4137 uint -- Interpret as a two's complement unsigned integer. 4138 uintbe -- Interpret as a big-endian unsigned integer. 4139 uintle -- Interpret as a little-endian unsigned integer. 4140 uintne -- Interpret as a native-endian unsigned integer. 4141 4142 """ 4143 4144 __slots__ = () 4145 4146 # As BitStream objects are mutable, we shouldn't allow them to be hashed. 4147 __hash__ = None 4148 4149 def __init__(self, auto=None, length=None, offset=None, pos=0, **kwargs): 4150 """Either specify an 'auto' initialiser: 4151 auto -- a string of comma separated tokens, an integer, a file object, 4152 a bytearray, a boolean iterable or another bitstring. 4153 4154 Or initialise via **kwargs with one (and only one) of: 4155 bytes -- raw data as a string, for example read from a binary file. 4156 bin -- binary string representation, e.g. '0b001010'. 4157 hex -- hexadecimal string representation, e.g. '0x2ef' 4158 oct -- octal string representation, e.g. '0o777'. 4159 uint -- an unsigned integer. 4160 int -- a signed integer. 4161 float -- a floating point number. 4162 uintbe -- an unsigned big-endian whole byte integer. 4163 intbe -- a signed big-endian whole byte integer. 4164 floatbe - a big-endian floating point number. 4165 uintle -- an unsigned little-endian whole byte integer. 4166 intle -- a signed little-endian whole byte integer. 4167 floatle -- a little-endian floating point number. 4168 uintne -- an unsigned native-endian whole byte integer. 4169 intne -- a signed native-endian whole byte integer. 4170 floatne -- a native-endian floating point number. 4171 se -- a signed exponential-Golomb code. 4172 ue -- an unsigned exponential-Golomb code. 4173 sie -- a signed interleaved exponential-Golomb code. 4174 uie -- an unsigned interleaved exponential-Golomb code. 4175 bool -- a boolean (True or False). 4176 filename -- a file which will be opened in binary read-only mode. 4177 4178 Other keyword arguments: 4179 length -- length of the bitstring in bits, if needed and appropriate. 4180 It must be supplied for all integer and float initialisers. 4181 offset -- bit offset to the data. These offset bits are 4182 ignored and this is intended for use when 4183 initialising using 'bytes' or 'filename'. 4184 pos -- Initial bit position, defaults to 0. 4185 4186 """ 4187 # For mutable BitStreams we always read in files to memory: 4188 if not isinstance(self._datastore, (ByteStore, ConstByteStore)): 4189 self._ensureinmemory() 4190 4191 def __new__(cls, auto=None, length=None, offset=None, pos=0, **kwargs): 4192 x = super(BitStream, cls).__new__(cls) 4193 y = ConstBitStream.__new__(BitStream, auto, length, offset, pos, **kwargs) 4194 x._datastore = ByteStore(y._datastore._rawarray[:], 4195 y._datastore.bitlength, 4196 y._datastore.offset) 4197 x._pos = y._pos 4198 return x 4199 4200 def __copy__(self): 4201 """Return a new copy of the BitStream.""" 4202 s_copy = BitStream() 4203 s_copy._pos = 0 4204 if not isinstance(self._datastore, ByteStore): 4205 # Let them both point to the same (invariant) array. 4206 # If either gets modified then at that point they'll be read into memory. 4207 s_copy._datastore = self._datastore 4208 else: 4209 s_copy._datastore = ByteStore(self._datastore._rawarray[:], 4210 self._datastore.bitlength, 4211 self._datastore.offset) 4212 return s_copy 4213 4214 def prepend(self, bs): 4215 """Prepend a bitstring to the current bitstring. 4216 4217 bs -- The bitstring to prepend. 4218 4219 """ 4220 bs = self._converttobitstring(bs) 4221 self._addleft(bs) 4222 self._pos += bs.len 4223 4224 4225def pack(fmt, *values, **kwargs): 4226 """Pack the values according to the format string and return a new BitStream. 4227 4228 fmt -- A single string or a list of strings with comma separated tokens 4229 describing how to create the BitStream. 4230 values -- Zero or more values to pack according to the format. 4231 kwargs -- A dictionary or keyword-value pairs - the keywords used in the 4232 format string will be replaced with their given value. 4233 4234 Token examples: 'int:12' : 12 bits as a signed integer 4235 'uint:8' : 8 bits as an unsigned integer 4236 'float:64' : 8 bytes as a big-endian float 4237 'intbe:16' : 2 bytes as a big-endian signed integer 4238 'uintbe:16' : 2 bytes as a big-endian unsigned integer 4239 'intle:32' : 4 bytes as a little-endian signed integer 4240 'uintle:32' : 4 bytes as a little-endian unsigned integer 4241 'floatle:64': 8 bytes as a little-endian float 4242 'intne:24' : 3 bytes as a native-endian signed integer 4243 'uintne:24' : 3 bytes as a native-endian unsigned integer 4244 'floatne:32': 4 bytes as a native-endian float 4245 'hex:80' : 80 bits as a hex string 4246 'oct:9' : 9 bits as an octal string 4247 'bin:1' : single bit binary string 4248 'ue' / 'uie': next bits as unsigned exp-Golomb code 4249 'se' / 'sie': next bits as signed exp-Golomb code 4250 'bits:5' : 5 bits as a bitstring object 4251 'bytes:10' : 10 bytes as a bytes object 4252 'bool' : 1 bit as a bool 4253 'pad:3' : 3 zero bits as padding 4254 4255 >>> s = pack('uint:12, bits', 100, '0xffe') 4256 >>> t = pack(['bits', 'bin:3'], s, '111') 4257 >>> u = pack('uint:8=a, uint:8=b, uint:55=a', a=6, b=44) 4258 4259 """ 4260 tokens = [] 4261 if isinstance(fmt, basestring): 4262 fmt = [fmt] 4263 try: 4264 for f_item in fmt: 4265 _, tkns = tokenparser(f_item, tuple(sorted(kwargs.keys()))) 4266 tokens.extend(tkns) 4267 except ValueError as e: 4268 raise CreationError(*e.args) 4269 value_iter = iter(values) 4270 s = BitStream() 4271 try: 4272 for name, length, value in tokens: 4273 # If the value is in the kwd dictionary then it takes precedence. 4274 if value in kwargs: 4275 value = kwargs[value] 4276 # If the length is in the kwd dictionary then use that too. 4277 if length in kwargs: 4278 length = kwargs[length] 4279 # Also if we just have a dictionary name then we want to use it 4280 if name in kwargs and length is None and value is None: 4281 s._append(kwargs[name]) 4282 continue 4283 if length is not None: 4284 length = int(length) 4285 if value is None and name != 'pad': 4286 # Take the next value from the ones provided 4287 value = next(value_iter) 4288 s._addright(BitStream._init_with_token(name, length, value)) 4289 except StopIteration: 4290 raise CreationError("Not enough parameters present to pack according to the " 4291 "format. {0} values are needed.", len(tokens)) 4292 try: 4293 next(value_iter) 4294 except StopIteration: 4295 # Good, we've used up all the *values. 4296 return s 4297 raise CreationError("Too many parameters present to pack according to the format.") 4298 4299 4300# Whether to label the Least Significant Bit as bit 0. Default is False. Experimental feature. 4301_lsb0 = False 4302 4303# Dictionary that maps token names to the function that reads them. Is set in next function. 4304name_to_read = {} 4305 4306 4307def _switch_lsb0_methods(lsb0): 4308 global _lsb0 4309 _lsb0 = lsb0 4310 if lsb0: 4311 ConstByteStore.getbit = ConstByteStore._getbit_lsb0 4312 Bits._find = Bits._find_lsb0 4313 Bits._slice = Bits._slice_lsb0 4314 BitArray._overwrite = BitArray._overwrite_lsb0 4315 BitArray._insert = BitArray._insert_lsb0 4316 BitArray._delete = BitArray._delete_lsb0 4317 BitArray._ror = BitArray._rol_msb0 4318 BitArray._rol = BitArray._ror_msb0 4319 ByteStore.setbit = ByteStore._setbit_lsb0 4320 ByteStore.unsetbit = ByteStore._unsetbit_lsb0 4321 ByteStore.invertbit = ByteStore._invertbit_lsb0 4322 BitArray._append = BitArray._append_lsb0 4323 BitArray._prepend = BitArray._append_msb0 # An LSB0 prepend is an MSB0 append 4324 Bits._readuint = Bits._readuint_lsb0 4325 Bits._truncatestart = Bits._truncateright 4326 Bits._truncateend = Bits._truncateleft 4327 Bits._validate_slice = Bits._validate_slice_lsb0 4328 else: 4329 ConstByteStore.getbit = ConstByteStore._getbit_msb0 4330 Bits._find = Bits._find_msb0 4331 Bits._slice = Bits._slice_msb0 4332 BitArray._overwrite = BitArray._overwrite_msb0 4333 BitArray._insert = BitArray._insert_msb0 4334 BitArray._delete = BitArray._delete_msb0 4335 BitArray._ror = BitArray._ror_msb0 4336 BitArray._rol = BitArray._rol_msb0 4337 ByteStore.setbit = ByteStore._setbit_msb0 4338 ByteStore.unsetbit = ByteStore._unsetbit_msb0 4339 ByteStore.invertbit = ByteStore._invertbit_msb0 4340 BitArray._append = BitArray._append_msb0 4341 BitArray._prepend = BitArray._append_lsb0 4342 Bits._readuint = Bits._readuint_msb0 4343 Bits._truncatestart = Bits._truncateleft 4344 Bits._truncateend = Bits._truncateright 4345 Bits._validate_slice = Bits._validate_slice_msb0 4346 4347 global name_to_read 4348 name_to_read = {'uint': Bits._readuint, 4349 'uintle': Bits._readuintle, 4350 'uintbe': Bits._readuintbe, 4351 'uintne': Bits._readuintne, 4352 'int': Bits._readint, 4353 'intle': Bits._readintle, 4354 'intbe': Bits._readintbe, 4355 'intne': Bits._readintne, 4356 'float': Bits._readfloat, 4357 'floatbe': Bits._readfloat, # floatbe is a synonym for float 4358 'floatle': Bits._readfloatle, 4359 'floatne': Bits._readfloatne, 4360 'hex': Bits._readhex, 4361 'oct': Bits._readoct, 4362 'bin': Bits._readbin, 4363 'bits': Bits._readbits, 4364 'bytes': Bits._readbytes, 4365 'ue': Bits._readue, 4366 'se': Bits._readse, 4367 'uie': Bits._readuie, 4368 'sie': Bits._readsie, 4369 'bool': Bits._readbool, 4370 } 4371 4372 4373def set_lsb0(v=True): 4374 """Experimental method changing the bit numbering so that the least significant bit is bit 0""" 4375 _switch_lsb0_methods(v) 4376 4377 4378def set_msb0(v=True): 4379 """Experimental method to reset the bit numbering so that the most significant bit is bit 0""" 4380 set_lsb0(not v) 4381 4382 4383# Initialise the default behaviour 4384set_msb0() 4385 4386 4387# Dictionaries for mapping init keywords with init functions. 4388init_with_length_and_offset = {'bytes': Bits._setbytes_safe, 4389 'filename': Bits._setfile, 4390 } 4391 4392init_with_length_only = {'uint': Bits._setuint, 4393 'int': Bits._setint, 4394 'float': Bits._setfloat, 4395 'uintbe': Bits._setuintbe, 4396 'intbe': Bits._setintbe, 4397 'floatbe': Bits._setfloat, 4398 'uintle': Bits._setuintle, 4399 'intle': Bits._setintle, 4400 'floatle': Bits._setfloatle, 4401 'uintne': Bits._setuintne, 4402 'intne': Bits._setintne, 4403 'floatne': Bits._setfloatne, 4404 } 4405 4406init_without_length_or_offset = {'bin': Bits._setbin_safe, 4407 'hex': Bits._sethex, 4408 'oct': Bits._setoct, 4409 'ue': Bits._setue, 4410 'se': Bits._setse, 4411 'uie': Bits._setuie, 4412 'sie': Bits._setsie, 4413 'bool': Bits._setbool, 4414 } 4415 4416 4417# Aliases for backward compatibility 4418ConstBitArray = Bits 4419BitString = BitStream 4420 4421__all__ = ['ConstBitArray', 'ConstBitStream', 'BitStream', 'BitArray', 4422 'Bits', 'BitString', 'pack', 'Error', 'ReadError', 'InterpretError', 4423 'ByteAlignError', 'CreationError', 'bytealigned', 'set_lsb0', 'set_msb0'] 4424 4425 4426def main(): 4427 # check if final parameter is an interpretation string 4428 fp = sys.argv[-1] 4429 if fp == '--help' or len(sys.argv) == 1: 4430 print("""Create and interpret a bitstring from command-line parameters. 4431 4432Command-line parameters are concatenated and a bitstring created 4433from them. If the final parameter is either an interpretation string 4434or ends with a '.' followed by an interpretation string then that 4435interpretation of the bitstring will be used when printing it. 4436 4437Typical usage might be invoking the Python module from a console 4438as a one-off calculation: 4439 4440$ python -m bitstring int:16=-400 44410xfe70 4442$ python -m bitstring float:32=0.2 bin 444300111110010011001100110011001101 4444$ python -m bitstring 0xff 3*0b01,0b11 uint 444565367 4446$ python -m bitstring hex=01, uint:12=352.hex 444701160 4448 4449This feature is experimental and is subject to change or removal. 4450 """) 4451 elif fp in name_to_read.keys(): 4452 # concatenate all other parameters and interpret using the final one 4453 b1 = Bits(','.join(sys.argv[1: -1])) 4454 print(b1._readtoken(fp, 0, b1.__len__())[0]) 4455 else: 4456 # does final parameter end with a dot then an interpretation string? 4457 interp = fp[fp.rfind('.') + 1:] 4458 if interp in name_to_read.keys(): 4459 sys.argv[-1] = fp[:fp.rfind('.')] 4460 b1 = Bits(','.join(sys.argv[1:])) 4461 print(b1._readtoken(interp, 0, b1.__len__())[0]) 4462 else: 4463 # No interpretation - just use default print 4464 b1 = Bits(','.join(sys.argv[1:])) 4465 print(b1) 4466 4467 4468if __name__ == '__main__': 4469 main() 4470