1#!/usr/bin/env python
2r"""
3This package defines classes that simplify bit-wise creation, manipulation and
4interpretation of data.
5
6Classes:
7
8Bits -- An immutable container for binary data.
9BitArray -- A mutable container for binary data.
10ConstBitStream -- An immutable container with streaming methods.
11BitStream -- A mutable container with streaming methods.
12
13                      Bits (base class)
14                     /    \
15 + mutating methods /      \ + streaming methods
16                   /        \
17              BitArray   ConstBitStream
18                   \        /
19                    \      /
20                     \    /
21                    BitStream
22
23Functions:
24
25pack -- Create a BitStream from a format string.
26
27Exceptions:
28
29Error -- Module exception base class.
30CreationError -- Error during creation.
31InterpretError -- Inappropriate interpretation of binary data.
32ByteAlignError -- Whole byte position or length needed.
33ReadError -- Reading or peeking past the end of a bitstring.
34
35https://github.com/scott-griffiths/bitstring
36"""
37
38__licence__ = """
39The MIT License
40
41Copyright (c) 2006 Scott Griffiths (dr.scottgriffiths@gmail.com)
42
43Permission is hereby granted, free of charge, to any person obtaining a copy
44of this software and associated documentation files (the "Software"), to deal
45in the Software without restriction, including without limitation the rights
46to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
47copies of the Software, and to permit persons to whom the Software is
48furnished to do so, subject to the following conditions:
49
50The above copyright notice and this permission notice shall be included in
51all copies or substantial portions of the Software.
52
53THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
54IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
55FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
56AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
57LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
58OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
59THE SOFTWARE.
60"""
61
62__version__ = "3.1.9"
63
64__author__ = "Scott Griffiths"
65
66import numbers
67import copy
68import sys
69import re
70import binascii
71import mmap
72import os
73import struct
74import operator
75import array
76import io
77import collections
78
79try:
80    collectionsAbc = collections.abc
81except AttributeError:  # Python 2.7
82    collectionsAbc = collections
83
84byteorder = sys.byteorder
85
86bytealigned = False
87"""Determines whether a number of methods default to working only on byte boundaries."""
88
89
90# Maximum number of digits to use in __str__ and __repr__.
91MAX_CHARS = 250
92
93# Maximum size of caches used for speed optimisations.
94CACHE_SIZE = 1000
95
96# Set this to True for extra assertions for debugging.
97_debug = False
98
99
100class Error(Exception):
101    """Base class for errors in the bitstring module."""
102
103    def __init__(self, *params):
104        self.msg = params[0] if params else ''
105        self.params = params[1:]
106
107    def __str__(self):
108        if self.params:
109            return self.msg.format(*self.params)
110        return self.msg
111
112
113class ReadError(Error, IndexError):
114    """Reading or peeking past the end of a bitstring."""
115
116    def __init__(self, *params):
117        Error.__init__(self, *params)
118
119
120class InterpretError(Error, ValueError):
121    """Inappropriate interpretation of binary data."""
122
123    def __init__(self, *params):
124        Error.__init__(self, *params)
125
126
127class ByteAlignError(Error):
128    """Whole-byte position or length needed."""
129
130    def __init__(self, *params):
131        Error.__init__(self, *params)
132
133
134class CreationError(Error, ValueError):
135    """Inappropriate argument during bitstring creation."""
136
137    def __init__(self, *params):
138        Error.__init__(self, *params)
139
140
141class ConstByteStore(object):
142    """Stores raw bytes together with a bit offset and length.
143
144    Used internally - not part of public interface.
145    """
146
147    __slots__ = ('offset', '_rawarray', 'bitlength')
148
149    def __init__(self, data, bitlength=None, offset=None):
150        """data is either a bytearray or a MmapByteArray"""
151        self._rawarray = data
152        if offset is None:
153            offset = 0
154        if bitlength is None:
155            bitlength = 8 * len(data) - offset
156        self.offset = offset
157        self.bitlength = bitlength
158
159    def __iter__(self):
160        start_byte, start_bit = divmod(self.offset, 8)
161        end_byte, end_bit = divmod(self.offset + self.bitlength, 8)
162
163        for byte_index in xrange(start_byte, end_byte):
164            byte = self._rawarray[byte_index]
165            for bit in range(start_bit, 8):
166                yield bool(byte & (128 >> bit))
167            start_bit = 0
168
169        if end_bit:
170            byte = self._rawarray[end_byte]
171            for bit in range(start_bit, end_bit):
172                yield bool(byte & (128 >> bit))
173
174    def _getbit_lsb0(self, pos):
175        assert 0 <= pos < self.bitlength
176        pos = self.bitlength - pos - 1
177        byte, bit = divmod(self.offset + pos, 8)
178        return bool(self._rawarray[byte] & (128 >> bit))
179
180    def _getbit_msb0(self, pos):
181        assert 0 <= pos < self.bitlength
182        byte, bit = divmod(self.offset + pos, 8)
183        return bool(self._rawarray[byte] & (128 >> bit))
184
185    def getbyte(self, pos):
186        """Direct access to byte data."""
187        return self._rawarray[pos]
188
189    def getbyteslice(self, start, end):
190        """Direct access to byte data."""
191        c = self._rawarray[start:end]
192        return c
193
194    @property
195    def bytelength(self):
196        if not self.bitlength:
197            return 0
198        sb = self.offset // 8
199        eb = (self.offset + self.bitlength - 1) // 8
200        return eb - sb + 1
201
202    def __copy__(self):
203        return ByteStore(self._rawarray[:], self.bitlength, self.offset)
204
205    def _appendstore(self, store):
206        """Join another store on to the end of this one."""
207        if not store.bitlength:
208            return
209        # Set new array offset to the number of bits in the final byte of current array.
210        store = offsetcopy(store, (self.offset + self.bitlength) % 8)
211        if store.offset:
212            # first do the byte with the join.
213            joinval = (self._rawarray.pop() & (255 ^ (255 >> store.offset)) |
214                       (store.getbyte(0) & (255 >> store.offset)))
215            self._rawarray.append(joinval)
216            self._rawarray.extend(store._rawarray[1:])
217        else:
218            self._rawarray.extend(store._rawarray)
219        self.bitlength += store.bitlength
220
221    def _prependstore(self, store):
222        """Join another store on to the start of this one."""
223        if not store.bitlength:
224            return
225            # Set the offset of copy of store so that it's final byte
226        # ends in a position that matches the offset of self,
227        # then join self on to the end of it.
228        store = offsetcopy(store, (self.offset - store.bitlength) % 8)
229        assert (store.offset + store.bitlength) % 8 == self.offset % 8
230        bit_offset = self.offset % 8
231        if bit_offset:
232            # first do the byte with the join.
233            joinval = (store.getbyte(-1) & (255 ^ (255 >> bit_offset)) |
234                      (self._rawarray[self.byteoffset] & (255 >> bit_offset)))
235            store._rawarray[-1] = joinval
236            store._rawarray.extend(self._rawarray[self.byteoffset + 1: self.byteoffset + self.bytelength])
237        else:
238            store._rawarray.extend(self._rawarray[self.byteoffset: self.byteoffset + self.bytelength])
239        self._rawarray = store._rawarray
240        self.offset = store.offset
241        self.bitlength += store.bitlength
242
243    @property
244    def byteoffset(self):
245        return self.offset // 8
246
247    @property
248    def rawbytes(self):
249        return self._rawarray
250
251
252class ByteStore(ConstByteStore):
253    """Adding mutating methods to ConstByteStore
254
255    Used internally - not part of public interface.
256    """
257    __slots__ = ()
258
259    def _setbit_lsb0(self, pos):
260        assert 0 <= pos < self.bitlength
261        pos = self.bitlength - pos - 1
262        byte, bit = divmod(self.offset + pos, 8)
263        self._rawarray[byte] |= (128 >> bit)
264
265    def _setbit_msb0(self, pos):
266        assert 0 <= pos < self.bitlength
267        byte, bit = divmod(self.offset + pos, 8)
268        self._rawarray[byte] |= (128 >> bit)
269
270    def _unsetbit_lsb0(self, pos):
271        assert 0 <= pos < self.bitlength
272        pos = self.bitlength - pos - 1
273        byte, bit = divmod(self.offset + pos, 8)
274        self._rawarray[byte] &= ~(128 >> bit)
275
276    def _unsetbit_msb0(self, pos):
277        assert 0 <= pos < self.bitlength
278        byte, bit = divmod(self.offset + pos, 8)
279        self._rawarray[byte] &= ~(128 >> bit)
280
281    def _invertbit_lsb0(self, pos):
282        assert 0 <= pos < self.bitlength
283        pos = self.bitlength - pos - 1
284        byte, bit = divmod(self.offset + pos, 8)
285        self._rawarray[byte] ^= (128 >> bit)
286
287    def _invertbit_msb0(self, pos):
288        assert 0 <= pos < self.bitlength
289        byte, bit = divmod(self.offset + pos, 8)
290        self._rawarray[byte] ^= (128 >> bit)
291
292    def setbyte(self, pos, value):
293        self._rawarray[pos] = value
294
295    def setbyteslice(self, start, end, value):
296        self._rawarray[start:end] = value
297
298
299def offsetcopy(s, newoffset):
300    """Return a copy of a ByteStore with the newoffset.
301
302    Not part of public interface.
303    """
304    assert 0 <= newoffset < 8
305    if not s.bitlength:
306        return copy.copy(s)
307    else:
308        if newoffset == s.offset % 8:
309            return type(s)(s.getbyteslice(s.byteoffset, s.byteoffset + s.bytelength), s.bitlength, newoffset)
310        newdata = []
311        d = s._rawarray
312        assert newoffset != s.offset % 8
313        if newoffset < s.offset % 8:
314            # We need to shift everything left
315            shiftleft = s.offset % 8 - newoffset
316            # First deal with everything except for the final byte
317            for x in range(s.byteoffset, s.byteoffset + s.bytelength - 1):
318                newdata.append(((d[x] << shiftleft) & 0xff) + (d[x + 1] >> (8 - shiftleft)))
319            bits_in_last_byte = (s.offset + s.bitlength) % 8
320            if not bits_in_last_byte:
321                bits_in_last_byte = 8
322            if bits_in_last_byte > shiftleft:
323                newdata.append((d[s.byteoffset + s.bytelength - 1] << shiftleft) & 0xff)
324        else:  # newoffset > s._offset % 8
325            shiftright = newoffset - s.offset % 8
326            newdata.append(s.getbyte(0) >> shiftright)
327            for x in range(s.byteoffset + 1, s.byteoffset + s.bytelength):
328                newdata.append(((d[x - 1] << (8 - shiftright)) & 0xff) + (d[x] >> shiftright))
329            bits_in_last_byte = (s.offset + s.bitlength) % 8
330            if not bits_in_last_byte:
331                bits_in_last_byte = 8
332            if bits_in_last_byte + shiftright > 8:
333                newdata.append((d[s.byteoffset + s.bytelength - 1] << (8 - shiftright)) & 0xff)
334        new_s = type(s)(bytearray(newdata), s.bitlength, newoffset)
335        assert new_s.offset == newoffset
336        return new_s
337
338
339def equal(a, b):
340    """Return True if ByteStores a == b.
341
342    Not part of public interface.
343    """
344    # We want to return False for inequality as soon as possible, which
345    # means we get lots of special cases.
346    # First the easy one - compare lengths:
347    a_bitlength = a.bitlength
348    b_bitlength = b.bitlength
349    if a_bitlength != b_bitlength:
350        return False
351    if not a_bitlength:
352        assert b_bitlength == 0
353        return True
354    # Make 'a' the one with the smaller offset
355    if (a.offset % 8) > (b.offset % 8):
356        a, b = b, a
357    # and create some aliases
358    a_bitoff = a.offset % 8
359    b_bitoff = b.offset % 8
360    a_byteoffset = a.byteoffset
361    b_byteoffset = b.byteoffset
362    a_bytelength = a.bytelength
363    b_bytelength = b.bytelength
364    da = a._rawarray
365    db = b._rawarray
366
367    # If they are pointing to the same data, they must be equal
368    if da is db and a.offset == b.offset:
369        return True
370
371    if a_bitoff == b_bitoff:
372        bits_spare_in_last_byte = 8 - (a_bitoff + a_bitlength) % 8
373        if bits_spare_in_last_byte == 8:
374            bits_spare_in_last_byte = 0
375        # Special case for a, b contained in a single byte
376        if a_bytelength == 1:
377            a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
378            b_val = ((db[b_byteoffset] << b_bitoff) & 0xff) >> (8 - b_bitlength)
379            return a_val == b_val
380        # Otherwise check first byte
381        if da[a_byteoffset] & (0xff >> a_bitoff) != db[b_byteoffset] & (0xff >> b_bitoff):
382            return False
383        # then everything up to the last
384        b_a_offset = b_byteoffset - a_byteoffset
385        for x in range(1 + a_byteoffset, a_byteoffset + a_bytelength - 1):
386            if da[x] != db[b_a_offset + x]:
387                return False
388        # and finally the last byte
389        return (da[a_byteoffset + a_bytelength - 1] >> bits_spare_in_last_byte ==
390                db[b_byteoffset + b_bytelength - 1] >> bits_spare_in_last_byte)
391
392    assert a_bitoff != b_bitoff
393    # This is how much we need to shift a to the right to compare with b:
394    shift = b_bitoff - a_bitoff
395    # Special case for b only one byte long
396    if b_bytelength == 1:
397        assert a_bytelength == 1
398        a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
399        b_val = ((db[b_byteoffset] << b_bitoff) & 0xff) >> (8 - b_bitlength)
400        return a_val == b_val
401    # Special case for a only one byte long
402    if a_bytelength == 1:
403        assert b_bytelength == 2
404        a_val = ((da[a_byteoffset] << a_bitoff) & 0xff) >> (8 - a_bitlength)
405        b_val = ((db[b_byteoffset] << 8) + db[b_byteoffset + 1]) << b_bitoff
406        b_val &= 0xffff
407        b_val >>= 16 - b_bitlength
408        return a_val == b_val
409
410    # Compare first byte of b with bits from first byte of a
411    if (da[a_byteoffset] & (0xff >> a_bitoff)) >> shift != db[b_byteoffset] & (0xff >> b_bitoff):
412        return False
413    # Now compare every full byte of b with bits from 2 bytes of a
414    for x in range(1, b_bytelength - 1):
415        # Construct byte from 2 bytes in a to compare to byte in b
416        b_val = db[b_byteoffset + x]
417        a_val = ((da[a_byteoffset + x - 1] << 8) + da[a_byteoffset + x]) >> shift
418        a_val &= 0xff
419        if a_val != b_val:
420            return False
421
422    # Now check bits in final byte of b
423    final_b_bits = (b.offset + b_bitlength) % 8
424    if not final_b_bits:
425        final_b_bits = 8
426    b_val = db[b_byteoffset + b_bytelength - 1] >> (8 - final_b_bits)
427    final_a_bits = (a.offset + a_bitlength) % 8
428    if not final_a_bits:
429        final_a_bits = 8
430    if b.bytelength > a_bytelength:
431        assert b_bytelength == a_bytelength + 1
432        a_val = da[a_byteoffset + a_bytelength - 1] >> (8 - final_a_bits)
433        a_val &= 0xff >> (8 - final_b_bits)
434        return a_val == b_val
435    assert a_bytelength == b_bytelength
436    a_val = da[a_byteoffset + a_bytelength - 2] << 8
437    a_val += da[a_byteoffset + a_bytelength - 1]
438    a_val >>= (8 - final_a_bits)
439    a_val &= 0xff >> (8 - final_b_bits)
440    return a_val == b_val
441
442
443class MmapByteArray(object):
444    """Looks like a bytearray, but from an mmap.
445
446    Not part of public interface.
447    """
448
449    __slots__ = ('filemap', 'filelength', 'source', 'byteoffset', 'bytelength')
450
451    def __init__(self, source, bytelength=None, byteoffset=None):
452        self.source = source
453        source.seek(0, os.SEEK_END)
454        self.filelength = source.tell()
455        if byteoffset is None:
456            byteoffset = 0
457        if bytelength is None:
458            bytelength = self.filelength - byteoffset
459        self.byteoffset = byteoffset
460        self.bytelength = bytelength
461        self.filemap = mmap.mmap(source.fileno(), 0, access=mmap.ACCESS_READ)
462
463    def __getitem__(self, key):
464        try:
465            start = key.start
466            stop = key.stop
467        except AttributeError:
468            try:
469                assert 0 <= key < self.bytelength
470                return ord(self.filemap[key + self.byteoffset])
471            except TypeError:
472                # for Python 3
473                return self.filemap[key + self.byteoffset]
474        else:
475            if start is None:
476                start = 0
477            if stop is None:
478                stop = self.bytelength
479            assert key.step is None
480            assert 0 <= start < self.bytelength
481            assert 0 <= stop <= self.bytelength
482            s = slice(start + self.byteoffset, stop + self.byteoffset)
483            return bytearray(self.filemap.__getitem__(s))
484
485    def __len__(self):
486        return self.bytelength
487
488
489# This creates a dictionary for every possible byte with the value being
490# the key with its bits reversed.
491BYTE_REVERSAL_DICT = dict()
492
493# For Python 2.7/ 3.x coexistence
494# Yes this is very very hacky.
495if sys.version_info[0] == 2:
496    for i in range(256):
497        BYTE_REVERSAL_DICT[i] = chr(int("{0:08b}".format(i)[::-1], 2))
498else:
499    for i in range(256):
500        BYTE_REVERSAL_DICT[i] = bytes([int("{0:08b}".format(i)[::-1], 2)])
501    from io import IOBase as file
502    xrange = range
503    basestring = str
504
505# Python 2.x octals start with '0', in Python 3 it's '0o'
506LEADING_OCT_CHARS = len(oct(1)) - 1
507
508
509def tidy_input_string(s):
510    """Return string made lowercase and with all whitespace removed."""
511    s = ''.join(s.split()).lower()
512    return s
513
514
515INIT_NAMES = ('uint', 'int', 'ue', 'se', 'sie', 'uie', 'hex', 'oct', 'bin', 'bits',
516              'uintbe', 'intbe', 'uintle', 'intle', 'uintne', 'intne',
517              'float', 'floatbe', 'floatle', 'floatne', 'bytes', 'bool', 'pad')
518
519TOKEN_RE = re.compile(r'(?P<name>' + '|'.join(INIT_NAMES) +
520                      r')(:(?P<len>[^=]+))?(=(?P<value>.*))?$', re.IGNORECASE)
521DEFAULT_UINT = re.compile(r'(?P<len>[^=]+)?(=(?P<value>.*))?$', re.IGNORECASE)
522
523MULTIPLICATIVE_RE = re.compile(r'(?P<factor>.*)\*(?P<token>.+)')
524
525# Hex, oct or binary literals
526LITERAL_RE = re.compile(r'(?P<name>0([xob]))(?P<value>.+)', re.IGNORECASE)
527
528# An endianness indicator followed by one or more struct.pack codes
529STRUCT_PACK_RE = re.compile(r'(?P<endian>[<>@])?(?P<fmt>(?:\d*[bBhHlLqQfd])+)$')
530
531# A number followed by a single character struct.pack code
532STRUCT_SPLIT_RE = re.compile(r'\d*[bBhHlLqQfd]')
533
534# These replicate the struct.pack codes
535# Big-endian
536REPLACEMENTS_BE = {'b': 'intbe:8', 'B': 'uintbe:8',
537                   'h': 'intbe:16', 'H': 'uintbe:16',
538                   'l': 'intbe:32', 'L': 'uintbe:32',
539                   'q': 'intbe:64', 'Q': 'uintbe:64',
540                   'f': 'floatbe:32', 'd': 'floatbe:64'}
541# Little-endian
542REPLACEMENTS_LE = {'b': 'intle:8', 'B': 'uintle:8',
543                   'h': 'intle:16', 'H': 'uintle:16',
544                   'l': 'intle:32', 'L': 'uintle:32',
545                   'q': 'intle:64', 'Q': 'uintle:64',
546                   'f': 'floatle:32', 'd': 'floatle:64'}
547
548# Size in bytes of all the pack codes.
549PACK_CODE_SIZE = {'b': 1, 'B': 1, 'h': 2, 'H': 2, 'l': 4, 'L': 4,
550                  'q': 8, 'Q': 8, 'f': 4, 'd': 8}
551
552_tokenname_to_initialiser = {'hex': 'hex', '0x': 'hex', '0X': 'hex', 'oct': 'oct',
553                             '0o': 'oct', '0O': 'oct', 'bin': 'bin', '0b': 'bin',
554                             '0B': 'bin', 'bits': 'auto', 'bytes': 'bytes', 'pad': 'pad'}
555
556
557def structparser(token):
558    """Parse struct-like format string token into sub-token list."""
559    m = STRUCT_PACK_RE.match(token)
560    if not m:
561        return [token]
562    else:
563        endian = m.group('endian')
564        if endian is None:
565            return [token]
566        # Split the format string into a list of 'q', '4h' etc.
567        formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))
568        # Now deal with multiplicative factors, 4h -> hhhh etc.
569        fmt = ''.join([f[-1] * int(f[:-1]) if len(f) != 1 else
570                       f for f in formatlist])
571        if endian == '@':
572            # Native endianness
573            if byteorder == 'little':
574                endian = '<'
575            else:
576                assert byteorder == 'big'
577                endian = '>'
578        if endian == '<':
579            tokens = [REPLACEMENTS_LE[c] for c in fmt]
580        else:
581            assert endian == '>'
582            tokens = [REPLACEMENTS_BE[c] for c in fmt]
583    return tokens
584
585
586def tokenparser(fmt, keys=None, token_cache={}):
587    """Divide the format string into tokens and parse them.
588
589    Return stretchy token and list of [initialiser, length, value]
590    initialiser is one of: hex, oct, bin, uint, int, se, ue, 0x, 0o, 0b etc.
591    length is None if not known, as is value.
592
593    If the token is in the keyword dictionary (keys) then it counts as a
594    special case and isn't messed with.
595
596    tokens must be of the form: [factor*][initialiser][:][length][=value]
597
598    """
599    try:
600        return token_cache[(fmt, keys)]
601    except KeyError:
602        token_key = (fmt, keys)
603    # Very inefficient expanding of brackets.
604    fmt = expand_brackets(fmt)
605    # Split tokens by ',' and remove whitespace
606    # The meta_tokens can either be ordinary single tokens or multiple
607    # struct-format token strings.
608    meta_tokens = (''.join(f.split()) for f in fmt.split(','))
609    return_values = []
610    stretchy_token = False
611    for meta_token in meta_tokens:
612        # See if it has a multiplicative factor
613        m = MULTIPLICATIVE_RE.match(meta_token)
614        if not m:
615            factor = 1
616        else:
617            factor = int(m.group('factor'))
618            meta_token = m.group('token')
619        # See if it's a struct-like format
620        tokens = structparser(meta_token)
621        ret_vals = []
622        for token in tokens:
623            if keys and token in keys:
624                # Don't bother parsing it, it's a keyword argument
625                ret_vals.append([token, None, None])
626                continue
627            value = length = None
628            if token == '':
629                continue
630            # Match literal tokens of the form 0x... 0o... and 0b...
631            m = LITERAL_RE.match(token)
632            if m:
633                name = m.group('name')
634                value = m.group('value')
635                ret_vals.append([name, length, value])
636                continue
637            # Match everything else:
638            m1 = TOKEN_RE.match(token)
639            if not m1:
640                # and if you don't specify a 'name' then the default is 'uint':
641                m2 = DEFAULT_UINT.match(token)
642                if not m2:
643                    raise ValueError("Don't understand token '{0}'.".format(token))
644            if m1:
645                name = m1.group('name')
646                length = m1.group('len')
647                if m1.group('value'):
648                    value = m1.group('value')
649            else:
650                assert m2
651                name = 'uint'
652                length = m2.group('len')
653                if m2.group('value'):
654                    value = m2.group('value')
655            if name == 'bool':
656                if length is not None and length != '1':
657                    raise ValueError("You can only specify one bit sized bool tokens or leave unspecified.")
658                length = 1
659            if length is None and name not in ('se', 'ue', 'sie', 'uie'):
660                stretchy_token = True
661            if length is not None:
662                # Try converting length to int, otherwise check it's a key.
663                try:
664                    length = int(length)
665                    if length < 0:
666                        raise Error
667                    # For the 'bytes' token convert length to bits.
668                    if name == 'bytes':
669                        length *= 8
670                except Error:
671                    raise ValueError("Can't read a token with a negative length.")
672                except ValueError:
673                    if not keys or length not in keys:
674                        raise ValueError("Don't understand length '{0}' of token.".format(length))
675            ret_vals.append([name, length, value])
676        # This multiplies by the multiplicative factor, but this means that
677        # we can't allow keyword values as multipliers (e.g. n*uint:8).
678        # The only way to do this would be to return the factor in some fashion
679        # (we can't use the key's value here as it would mean that we couldn't
680        # sensibly continue to cache the function's results. (TODO).
681        return_values.extend(ret_vals * factor)
682    return_values = [tuple(x) for x in return_values]
683    if len(token_cache) < CACHE_SIZE:
684        token_cache[token_key] = stretchy_token, return_values
685    return stretchy_token, return_values
686
687
688# Looks for first number*(
689BRACKET_RE = re.compile(r'(?P<factor>\d+)\*\(')
690
691
692def expand_brackets(s):
693    """Remove whitespace and expand all brackets."""
694    s = ''.join(s.split())
695    while True:
696        start = s.find('(')
697        if start == -1:
698            break
699        count = 1  # Number of hanging open brackets
700        p = start + 1
701        while p < len(s):
702            if s[p] == '(':
703                count += 1
704            if s[p] == ')':
705                count -= 1
706            if not count:
707                break
708            p += 1
709        if count:
710            raise ValueError("Unbalanced parenthesis in '{0}'.".format(s))
711        if start == 0 or s[start - 1] != '*':
712            s = s[0:start] + s[start + 1:p] + s[p + 1:]
713        else:
714            m = BRACKET_RE.search(s)
715            if m:
716                factor = int(m.group('factor'))
717                matchstart = m.start('factor')
718                s = s[0:matchstart] + (factor - 1) * (s[start + 1:p] + ',') + s[start + 1:p] + s[p + 1:]
719            else:
720                raise ValueError("Failed to parse '{0}'.".format(s))
721    return s
722
723
724# This converts a single octal digit to 3 bits.
725OCT_TO_BITS = ['{0:03b}'.format(i) for i in xrange(8)]
726
727# A dictionary of number of 1 bits contained in binary representation of any byte
728BIT_COUNT = dict(zip(xrange(256), [bin(i).count('1') for i in xrange(256)]))
729
730
731class Bits(object):
732    """A container holding an immutable sequence of bits.
733
734    For a mutable container use the BitArray class instead.
735
736    Methods:
737
738    all() -- Check if all specified bits are set to 1 or 0.
739    any() -- Check if any of specified bits are set to 1 or 0.
740    count() -- Count the number of bits set to 1 or 0.
741    cut() -- Create generator of constant sized chunks.
742    endswith() -- Return whether the bitstring ends with a sub-string.
743    find() -- Find a sub-bitstring in the current bitstring.
744    findall() -- Find all occurrences of a sub-bitstring in the current bitstring.
745    join() -- Join bitstrings together using current bitstring.
746    rfind() -- Seek backwards to find a sub-bitstring.
747    split() -- Create generator of chunks split by a delimiter.
748    startswith() -- Return whether the bitstring starts with a sub-bitstring.
749    tobytes() -- Return bitstring as bytes, padding if needed.
750    tofile() -- Write bitstring to file, padding if needed.
751    unpack() -- Interpret bits using format string.
752
753    Special methods:
754
755    Also available are the operators [], ==, !=, +, *, ~, <<, >>, &, |, ^.
756
757    Properties:
758
759    bin -- The bitstring as a binary string.
760    bool -- For single bit bitstrings, interpret as True or False.
761    bytes -- The bitstring as a bytes object.
762    float -- Interpret as a floating point number.
763    floatbe -- Interpret as a big-endian floating point number.
764    floatle -- Interpret as a little-endian floating point number.
765    floatne -- Interpret as a native-endian floating point number.
766    hex -- The bitstring as a hexadecimal string.
767    int -- Interpret as a two's complement signed integer.
768    intbe -- Interpret as a big-endian signed integer.
769    intle -- Interpret as a little-endian signed integer.
770    intne -- Interpret as a native-endian signed integer.
771    len -- Length of the bitstring in bits.
772    oct -- The bitstring as an octal string.
773    se -- Interpret as a signed exponential-Golomb code.
774    ue -- Interpret as an unsigned exponential-Golomb code.
775    sie -- Interpret as a signed interleaved exponential-Golomb code.
776    uie -- Interpret as an unsigned interleaved exponential-Golomb code.
777    uint -- Interpret as a two's complement unsigned integer.
778    uintbe -- Interpret as a big-endian unsigned integer.
779    uintle -- Interpret as a little-endian unsigned integer.
780    uintne -- Interpret as a native-endian unsigned integer.
781
782    """
783
784    __slots__ = ('_datastore')
785
786    def __init__(self, auto=None, length=None, offset=None, **kwargs):
787        """Either specify an 'auto' initialiser:
788        auto -- a string of comma separated tokens, an integer, a file object,
789                a bytearray, a boolean iterable, an array or another bitstring.
790
791        Or initialise via **kwargs with one (and only one) of:
792        bytes -- raw data as a string, for example read from a binary file.
793        bin -- binary string representation, e.g. '0b001010'.
794        hex -- hexadecimal string representation, e.g. '0x2ef'
795        oct -- octal string representation, e.g. '0o777'.
796        uint -- an unsigned integer.
797        int -- a signed integer.
798        float -- a floating point number.
799        uintbe -- an unsigned big-endian whole byte integer.
800        intbe -- a signed big-endian whole byte integer.
801        floatbe - a big-endian floating point number.
802        uintle -- an unsigned little-endian whole byte integer.
803        intle -- a signed little-endian whole byte integer.
804        floatle -- a little-endian floating point number.
805        uintne -- an unsigned native-endian whole byte integer.
806        intne -- a signed native-endian whole byte integer.
807        floatne -- a native-endian floating point number.
808        se -- a signed exponential-Golomb code.
809        ue -- an unsigned exponential-Golomb code.
810        sie -- a signed interleaved exponential-Golomb code.
811        uie -- an unsigned interleaved exponential-Golomb code.
812        bool -- a boolean (True or False).
813        filename -- a file which will be opened in binary read-only mode.
814
815        Other keyword arguments:
816        length -- length of the bitstring in bits, if needed and appropriate.
817                  It must be supplied for all integer and float initialisers.
818        offset -- bit offset to the data. These offset bits are
819                  ignored and this is mainly intended for use when
820                  initialising using 'bytes' or 'filename'.
821
822        """
823        pass
824
825    def __new__(cls, auto=None, length=None, offset=None, _cache={}, **kwargs):
826        # For instances auto-initialised with a string we intern the
827        # instance for re-use.
828        try:
829            if isinstance(auto, basestring):
830                try:
831                    return _cache[auto]
832                except KeyError:
833                    x = object.__new__(Bits)
834                    try:
835                        _, tokens = tokenparser(auto)
836                    except ValueError as e:
837                        raise CreationError(*e.args)
838                    if offset is not None:
839                        raise CreationError("offset should not be specified when using string initialisation.")
840                    if length is not None:
841                        raise CreationError("length should not be specified when using string initialisation.")
842                    x._datastore = ConstByteStore(bytearray(0), 0, 0)
843                    for token in tokens:
844                        x._datastore._appendstore(Bits._init_with_token(*token)._datastore)
845                    assert x._assertsanity()
846                    if len(_cache) < CACHE_SIZE:
847                        _cache[auto] = x
848                    return x
849            if type(auto) == Bits:
850                return auto
851        except TypeError:
852            pass
853        x = super(Bits, cls).__new__(cls)
854        x._datastore = ConstByteStore(b'')
855        x._initialise(auto, length, offset, **kwargs)
856        return x
857
858    def _initialise(self, auto, length, offset, **kwargs):
859        if length is not None and length < 0:
860            raise CreationError("bitstring length cannot be negative.")
861        if offset is not None and offset < 0:
862            raise CreationError("offset must be >= 0.")
863        if auto is not None:
864            self._initialise_from_auto(auto, length, offset)
865            return
866        if not kwargs:
867            # No initialisers, so initialise with nothing or zero bits
868            if length is not None and length != 0:
869                data = bytearray((length + 7) // 8)
870                self._setbytes_unsafe(data, length, 0)
871                return
872            self._setbytes_unsafe(bytearray(0), 0, 0)
873            return
874        k, v = kwargs.popitem()
875        try:
876            init_without_length_or_offset[k](self, v)
877            if length is not None or offset is not None:
878                raise CreationError("Cannot use length or offset with this initialiser.")
879        except KeyError:
880            try:
881                init_with_length_only[k](self, v, length)
882                if offset is not None:
883                    raise CreationError("Cannot use offset with this initialiser.")
884            except KeyError:
885                if offset is None:
886                    offset = 0
887                try:
888                    init_with_length_and_offset[k](self, v, length, offset)
889                except KeyError:
890                    raise CreationError("Unrecognised keyword '{0}' used to initialise.", k)
891
892    def _initialise_from_auto(self, auto, length, offset):
893        if offset is None:
894            offset = 0
895        self._setauto(auto, length, offset)
896        return
897
898    def __iter__(self):
899        return iter(self._datastore)
900
901    def __copy__(self):
902        """Return a new copy of the Bits for the copy module."""
903        # Note that if you want a new copy (different ID), use _copy instead.
904        # The copy can return self as it's immutable.
905        return self
906
907    def __lt__(self, other):
908        raise TypeError("unorderable type: {0}".format(type(self).__name__))
909
910    def __gt__(self, other):
911        raise TypeError("unorderable type: {0}".format(type(self).__name__))
912
913    def __le__(self, other):
914        raise TypeError("unorderable type: {0}".format(type(self).__name__))
915
916    def __ge__(self, other):
917        raise TypeError("unorderable type: {0}".format(type(self).__name__))
918
919    def __add__(self, bs):
920        """Concatenate bitstrings and return new bitstring.
921
922        bs -- the bitstring to append.
923
924        """
925        bs = Bits(bs)
926        if bs.len <= self.len:
927            s = self._copy()
928            s._addright(bs)
929        else:
930            s = bs._copy()
931            s = self.__class__(s)
932            s._addleft(self)
933        return s
934
935    def __radd__(self, bs):
936        """Append current bitstring to bs and return new bitstring.
937
938        bs -- the string for the 'auto' initialiser that will be appended to.
939
940        """
941        bs = self._converttobitstring(bs)
942        return bs.__add__(self)
943
944    def __getitem__(self, key):
945        """Return a new bitstring representing a slice of the current bitstring.
946
947        Indices are in units of the step parameter (default 1 bit).
948        Stepping is used to specify the number of bits in each item.
949
950        >>> print BitArray('0b00110')[1:4]
951        '0b011'
952        >>> print BitArray('0x00112233')[1:3:8]
953        '0x1122'
954
955        """
956        length = self.len
957        if isinstance(key, slice):
958            step = key.step if key.step is not None else 1
959            if step != 1:
960                # convert to binary string and use string slicing
961                bs = self.__class__()
962                if _lsb0:
963                    start = length - key.start - 1 if key.start is not None else None
964                    stop = length - key.stop - 1 if key.stop is not None else None
965                    bs._setbin_unsafe(self._getbin().__getitem__(slice(start, stop, -step))[::-1])
966                else:
967                    bs._setbin_unsafe(self._getbin().__getitem__(key))
968                return bs
969            start, stop = 0, length
970            if key.start is not None:
971                start = key.start
972                if key.start < 0:
973                    start += stop
974            if key.stop is not None:
975                stop = key.stop
976                if key.stop < 0:
977                    stop += length
978            start = max(start, 0)
979            stop = min(stop, length)
980            if start < stop:
981                return self._slice(start, stop)
982            else:
983                return self.__class__()
984        else:
985            # single element
986            if key < 0:
987                key += length
988            if not 0 <= key < length:
989                raise IndexError("Slice index out of range.")
990            # Single bit, return True or False
991            return self._datastore.getbit(key)
992
993    def __len__(self):
994        """Return the length of the bitstring in bits."""
995        return self._getlength()
996
997    def __str__(self):
998        """Return approximate string representation of bitstring for printing.
999
1000        Short strings will be given wholly in hexadecimal or binary. Longer
1001        strings may be part hexadecimal and part binary. Very long strings will
1002        be truncated with '...'.
1003
1004        """
1005        length = self.len
1006        if not length:
1007            return ''
1008        if length > MAX_CHARS * 4:
1009            # Too long for hex. Truncate...
1010            return ''.join(('0x', self._readhex(MAX_CHARS * 4, 0), '...'))
1011        # If it's quite short and we can't do hex then use bin
1012        if length < 32 and length % 4 != 0:
1013            return '0b' + self.bin
1014        # If we can use hex then do so
1015        if not length % 4:
1016            return '0x' + self.hex
1017        # Otherwise first we do as much as we can in hex
1018        # then add on 1, 2 or 3 bits on at the end
1019        bits_at_end = length % 4
1020        return ''.join(('0x', self._readhex(length - bits_at_end, 0),
1021                        ', ', '0b',
1022                        self._readbin(bits_at_end, length - bits_at_end)))
1023
1024    def __repr__(self):
1025        """Return representation that could be used to recreate the bitstring.
1026
1027        If the returned string is too long it will be truncated. See __str__().
1028
1029        """
1030        length = self.len
1031        try:
1032            pos = self._pos
1033            pos_string = "" if pos == 0 else ", pos={0}".format(pos)
1034        except AttributeError:
1035            pos_string = ""
1036        if isinstance(self._datastore._rawarray, MmapByteArray):
1037            offsetstring = ''
1038            if self._datastore.byteoffset or self._offset:
1039                offsetstring = ", offset=%d" % (self._datastore._rawarray.byteoffset * 8 + self._offset)
1040            lengthstring = ", length=%d" % length
1041            return "{0}(filename='{1}'{2}{3}{4})".format(self.__class__.__name__,
1042                                                      self._datastore._rawarray.source.name,
1043                                                      lengthstring, offsetstring, pos_string)
1044        else:
1045            s = self.__str__()
1046            lengthstring = ''
1047            if s.endswith('...'):
1048                lengthstring = " # length={0}".format(length)
1049            return "{0}('{1}'{2}){3}".format(self.__class__.__name__, s, pos_string, lengthstring)
1050
1051    def __eq__(self, bs):
1052        """Return True if two bitstrings have the same binary representation.
1053
1054        >>> BitArray('0b1110') == '0xe'
1055        True
1056
1057        """
1058        try:
1059            bs = Bits(bs)
1060        except TypeError:
1061            return False
1062        return equal(self._datastore, bs._datastore)
1063
1064    def __ne__(self, bs):
1065        """Return False if two bitstrings have the same binary representation.
1066
1067        >>> BitArray('0b111') == '0x7'
1068        False
1069
1070        """
1071        return not self.__eq__(bs)
1072
1073    def __invert__(self):
1074        """Return bitstring with every bit inverted.
1075
1076        Raises Error if the bitstring is empty.
1077
1078        """
1079        if not self.len:
1080            raise Error("Cannot invert empty bitstring.")
1081        s = self._copy()
1082        s._invert_all()
1083        return s
1084
1085    def __lshift__(self, n):
1086        """Return bitstring with bits shifted by n to the left.
1087
1088        n -- the number of bits to shift. Must be >= 0.
1089
1090        """
1091        if n < 0:
1092            raise ValueError("Cannot shift by a negative amount.")
1093        if not self.len:
1094            raise ValueError("Cannot shift an empty bitstring.")
1095        n = min(n, self.len)
1096        s = self._slice(n, self.len)
1097        s._addright(Bits(n))
1098        return s
1099
1100    def __rshift__(self, n):
1101        """Return bitstring with bits shifted by n to the right.
1102
1103        n -- the number of bits to shift. Must be >= 0.
1104
1105        """
1106        if n < 0:
1107            raise ValueError("Cannot shift by a negative amount.")
1108        if not self.len:
1109            raise ValueError("Cannot shift an empty bitstring.")
1110        if not n:
1111            return self._copy()
1112        s = self.__class__(length=min(n, self.len))
1113        s._addright(self[:-n])
1114        return s
1115
1116    def __mul__(self, n):
1117        """Return bitstring consisting of n concatenations of self.
1118
1119        Called for expression of the form 'a = b*3'.
1120        n -- The number of concatenations. Must be >= 0.
1121
1122        """
1123        if n < 0:
1124            raise ValueError("Cannot multiply by a negative integer.")
1125        if not n:
1126            return self.__class__()
1127        s = self._copy()
1128        s._imul(n)
1129        return s
1130
1131    def __rmul__(self, n):
1132        """Return bitstring consisting of n concatenations of self.
1133
1134        Called for expressions of the form 'a = 3*b'.
1135        n -- The number of concatenations. Must be >= 0.
1136
1137        """
1138        return self.__mul__(n)
1139
1140    def __and__(self, bs):
1141        """Bit-wise 'and' between two bitstrings. Returns new bitstring.
1142
1143        bs -- The bitstring to '&' with.
1144
1145        Raises ValueError if the two bitstrings have differing lengths.
1146
1147        """
1148        bs = Bits(bs)
1149        if self.len != bs.len:
1150            raise ValueError("Bitstrings must have the same length "
1151                             "for & operator.")
1152        s = self._copy()
1153        s._iand(bs)
1154        return s
1155
1156    def __rand__(self, bs):
1157        """Bit-wise 'and' between two bitstrings. Returns new bitstring.
1158
1159        bs -- the bitstring to '&' with.
1160
1161        Raises ValueError if the two bitstrings have differing lengths.
1162
1163        """
1164        return self.__and__(bs)
1165
1166    def __or__(self, bs):
1167        """Bit-wise 'or' between two bitstrings. Returns new bitstring.
1168
1169        bs -- The bitstring to '|' with.
1170
1171        Raises ValueError if the two bitstrings have differing lengths.
1172
1173        """
1174        bs = Bits(bs)
1175        if self.len != bs.len:
1176            raise ValueError("Bitstrings must have the same length "
1177                             "for | operator.")
1178        s = self._copy()
1179        s._ior(bs)
1180        return s
1181
1182    def __ror__(self, bs):
1183        """Bit-wise 'or' between two bitstrings. Returns new bitstring.
1184
1185        bs -- The bitstring to '|' with.
1186
1187        Raises ValueError if the two bitstrings have differing lengths.
1188
1189        """
1190        return self.__or__(bs)
1191
1192    def __xor__(self, bs):
1193        """Bit-wise 'xor' between two bitstrings. Returns new bitstring.
1194
1195        bs -- The bitstring to '^' with.
1196
1197        Raises ValueError if the two bitstrings have differing lengths.
1198
1199        """
1200        bs = Bits(bs)
1201        if self.len != bs.len:
1202            raise ValueError("Bitstrings must have the same length "
1203                             "for ^ operator.")
1204        s = self._copy()
1205        s._ixor(bs)
1206        return s
1207
1208    def __rxor__(self, bs):
1209        """Bit-wise 'xor' between two bitstrings. Returns new bitstring.
1210
1211        bs -- The bitstring to '^' with.
1212
1213        Raises ValueError if the two bitstrings have differing lengths.
1214
1215        """
1216        return self.__xor__(bs)
1217
1218    def __contains__(self, bs):
1219        """Return whether bs is contained in the current bitstring.
1220
1221        bs -- The bitstring to search for.
1222
1223        """
1224        # Don't want to change pos
1225        try:
1226            pos = self._pos
1227        except AttributeError:
1228            pass
1229        found = Bits.find(self, bs, bytealigned=False)
1230        try:
1231            self._pos = pos
1232        except UnboundLocalError:
1233            pass
1234        return bool(found)
1235
1236    def __hash__(self):
1237        """Return an integer hash of the object."""
1238        # We can't in general hash the whole bitstring (it could take hours!)
1239        # So instead take some bits from the start and end.
1240        if self.len <= 160:
1241            # Use the whole bitstring.
1242            shorter = self
1243        else:
1244            # Take 10 bytes from start and end
1245            shorter = self[:80] + self[-80:]
1246        h = 0
1247        for byte in shorter.tobytes():
1248            try:
1249                h = (h << 4) + ord(byte)
1250            except TypeError:
1251                # Python 3
1252                h = (h << 4) + byte
1253            g = h & 0xf0000000
1254            if g & (1 << 31):
1255                h ^= (g >> 24)
1256                h ^= g
1257        return h % 1442968193
1258
1259    # This is only used in Python 2.x...
1260    def __nonzero__(self):
1261        """Return True if any bits are set to 1, otherwise return False."""
1262        return self.any(True)
1263
1264    # ...whereas this is used in Python 3.x
1265    __bool__ = __nonzero__
1266
1267    if _debug is True:
1268        def _assertsanity(self):
1269            """Check internal self consistency as a debugging aid."""
1270            assert self.len >= 0
1271            assert 0 <= self._offset, "offset={0}".format(self._offset)
1272            assert (self.len + self._offset + 7) // 8 == self._datastore.bytelength + self._datastore.byteoffset, "len={0}, offset={1}, bytelength={2}, byteoffset={3}".format(self.len, self._offset, self._datastore.bytelength, self._datastore.byteoffset)
1273            return True
1274    else:
1275        @staticmethod
1276        def _assertsanity():
1277            return True
1278
1279    @classmethod
1280    def _init_with_token(cls, name, token_length, value):
1281        if token_length is not None:
1282            token_length = int(token_length)
1283        if token_length == 0:
1284            return cls()
1285        # For pad token just return the length in zero bits
1286        if name == 'pad':
1287            return cls(token_length)
1288
1289        if value is None:
1290            if token_length is None:
1291                error = "Token has no value ({0}=???).".format(name)
1292            else:
1293                error = "Token has no value ({0}:{1}=???).".format(name, token_length)
1294            raise ValueError(error)
1295        try:
1296            b = cls(**{_tokenname_to_initialiser[name]: value})
1297        except KeyError:
1298            if name in ('se', 'ue', 'sie', 'uie'):
1299                b = cls(**{name: int(value)})
1300            elif name in ('uint', 'int', 'uintbe', 'intbe', 'uintle', 'intle', 'uintne', 'intne'):
1301                b = cls(**{name: int(value), 'length': token_length})
1302            elif name in ('float', 'floatbe', 'floatle', 'floatne'):
1303                b = cls(**{name: float(value), 'length': token_length})
1304            elif name == 'bool':
1305                if value in (1, 'True', '1'):
1306                    b = cls(bool=True)
1307                elif value in (0, 'False', '0'):
1308                    b = cls(bool=False)
1309                else:
1310                    raise CreationError("bool token can only be 'True' or 'False'.")
1311            else:
1312                raise CreationError("Can't parse token name {0}.", name)
1313        if token_length is not None and b.len != token_length:
1314            msg = "Token with length {0} packed with value of length {1} ({2}:{3}={4})."
1315            raise CreationError(msg, token_length, b.len, name, token_length, value)
1316        return b
1317
1318    def _clear(self):
1319        """Reset the bitstring to an empty state."""
1320        self._datastore = ByteStore(bytearray(0))
1321
1322    def _setauto(self, s, length, offset):
1323        """Set bitstring from a bitstring, file, bool, integer, array, iterable or string."""
1324        # As s can be so many different things it's important to do the checks
1325        # in the correct order, as some types are also other allowed types.
1326        # So basestring must be checked before Iterable
1327        # and bytes/bytearray before Iterable but after basestring!
1328        if isinstance(s, Bits):
1329            if length is None:
1330                length = s.len - offset
1331            self._setbytes_unsafe(s._datastore.rawbytes, length, s._offset + offset)
1332            return
1333
1334        if isinstance(s, io.BytesIO):
1335            if offset is None:
1336                offset = 0
1337            if length is None:
1338                length = s.seek(0, 2) * 8 - offset
1339            byteoffset, offset = divmod(offset, 8)
1340            bytelength = (length + byteoffset * 8 + offset + 7) // 8 - byteoffset
1341            if length + byteoffset * 8 + offset > s.seek(0, 2) * 8:
1342                raise CreationError("BytesIO object is not long enough for specified "
1343                                    "length and offset.")
1344            self._datastore = ConstByteStore(bytearray(s.getvalue()[byteoffset: byteoffset + bytelength]), length, offset)
1345            return
1346
1347        if isinstance(s, file):
1348            if offset is None:
1349                offset = 0
1350            if length is None:
1351                length = os.path.getsize(s.name) * 8 - offset
1352            byteoffset, offset = divmod(offset, 8)
1353            bytelength = (length + byteoffset * 8 + offset + 7) // 8 - byteoffset
1354            m = MmapByteArray(s, bytelength, byteoffset)
1355            if length + byteoffset * 8 + offset > m.filelength * 8:
1356                raise CreationError("File is not long enough for specified "
1357                                    "length and offset.")
1358            self._datastore = ConstByteStore(m, length, offset)
1359            return
1360
1361        if length is not None:
1362            raise CreationError("The length keyword isn't applicable to this initialiser.")
1363        if offset:
1364            raise CreationError("The offset keyword isn't applicable to this initialiser.")
1365        if isinstance(s, basestring):
1366            bs = self._converttobitstring(s)
1367            assert bs._offset == 0
1368            self._setbytes_unsafe(bs._datastore.rawbytes, bs.length, 0)
1369            return
1370        if isinstance(s, (bytes, bytearray)):
1371            self._setbytes_unsafe(bytearray(s), len(s) * 8, 0)
1372            return
1373        if isinstance(s, array.array):
1374            try:
1375                b = s.tobytes()
1376            except AttributeError:
1377                b = s.tostring()  # Python 2.7
1378            self._setbytes_unsafe(bytearray(b), len(b) * 8, 0)
1379            return
1380        if isinstance(s, numbers.Integral):
1381            # Initialise with s zero bits.
1382            if s < 0:
1383                msg = "Can't create bitstring of negative length {0}."
1384                raise CreationError(msg, s)
1385            data = bytearray((s + 7) // 8)
1386            self._datastore = ByteStore(data, s, 0)
1387            return
1388        if isinstance(s, collectionsAbc.Iterable):
1389            # Evaluate each item as True or False and set bits to 1 or 0.
1390            self._setbin_unsafe(''.join(str(int(bool(x))) for x in s))
1391            return
1392        raise TypeError("Cannot initialise bitstring from {0}.".format(type(s)))
1393
1394    def _setfile(self, filename, length, offset):
1395        """Use file as source of bits."""
1396        with open(filename, 'rb') as source:
1397            if offset is None:
1398                offset = 0
1399            if length is None:
1400                length = os.path.getsize(source.name) * 8 - offset
1401            byteoffset, offset = divmod(offset, 8)
1402            bytelength = (length + byteoffset * 8 + offset + 7) // 8 - byteoffset
1403            m = MmapByteArray(source, bytelength, byteoffset)
1404            if length + byteoffset * 8 + offset > m.filelength * 8:
1405                raise CreationError("File is not long enough for specified "
1406                                    "length and offset.")
1407            self._datastore = ConstByteStore(m, length, offset)
1408
1409    def _setbytes_safe(self, data, length=None, offset=0):
1410        """Set the data from a string."""
1411        data = bytearray(data)
1412        if length is None:
1413            # Use to the end of the data
1414            length = len(data)*8 - offset
1415            self._datastore = ByteStore(data, length, offset)
1416        else:
1417            if length + offset > len(data) * 8:
1418                msg = "Not enough data present. Need {0} bits, have {1}."
1419                raise CreationError(msg, length + offset, len(data) * 8)
1420            if length == 0:
1421                self._datastore = ByteStore(bytearray(0))
1422            else:
1423                self._datastore = ByteStore(data, length, offset)
1424
1425    def _setbytes_unsafe(self, data, length, offset):
1426        """Unchecked version of _setbytes_safe."""
1427        self._datastore = type(self._datastore)(data[:], length, offset)
1428        assert self._assertsanity()
1429
1430    def _readbytes(self, length, start):
1431        """Read bytes and return them. Note that length is in bits."""
1432        assert length % 8 == 0
1433        assert start + length <= self.len
1434        if not (start + self._offset) % 8:
1435            return bytes(self._datastore.getbyteslice((start + self._offset) // 8,
1436                                                      (start + self._offset + length) // 8))
1437        return self._slice(start, start + length).tobytes()
1438
1439    def _getbytes(self):
1440        """Return the data as an ordinary string."""
1441        if self.len % 8:
1442            raise InterpretError("Cannot interpret as bytes unambiguously - "
1443                                 "not multiple of 8 bits.")
1444        return self._readbytes(self.len, 0)
1445
1446    def _setuint(self, uint, length=None):
1447        """Reset the bitstring to have given unsigned int interpretation."""
1448        try:
1449            if length is None:
1450                # Use the whole length. Deliberately not using .len here.
1451                length = self._datastore.bitlength
1452        except AttributeError:
1453            # bitstring doesn't have a _datastore as it hasn't been created!
1454            pass
1455        if length is None or length == 0:
1456            raise CreationError("A non-zero length must be specified with a "
1457                                "uint initialiser.")
1458        if uint >= (1 << length):
1459            msg = "{0} is too large an unsigned integer for a bitstring of length {1}. "\
1460                  "The allowed range is [0, {2}]."
1461            raise CreationError(msg, uint, length, (1 << length) - 1)
1462        if uint < 0:
1463            raise CreationError("uint cannot be initialised by a negative number.")
1464        s = hex(uint)[2:]
1465        s = s.rstrip('L')
1466        if len(s) & 1:
1467            s = '0' + s
1468        try:
1469            data = bytes.fromhex(s)
1470        except AttributeError:
1471            # the Python 2.x way
1472            data = binascii.unhexlify(s)
1473        # Now add bytes as needed to get the right length.
1474        extrabytes = ((length + 7) // 8) - len(data)
1475        if extrabytes > 0:
1476            data = b'\x00' * extrabytes + data
1477        offset = 8 - (length % 8)
1478        if offset == 8:
1479            offset = 0
1480        self._setbytes_unsafe(bytearray(data), length, offset)
1481
1482    def _readuint_lsb0(self, length, start):
1483        # TODO: This needs a complete rewrite - can't delegate to _readuint_msb0
1484        return self._readuint_msb0(length, self.len - start - length)
1485
1486    def _readuint_msb0(self, length, start):
1487        """Read bits and interpret as an unsigned int."""
1488        if not length:
1489            raise InterpretError("Cannot interpret a zero length bitstring "
1490                                           "as an integer.")
1491        offset = self._offset
1492        startbyte = (start + offset) // 8
1493        endbyte = (start + offset + length - 1) // 8
1494
1495        b = binascii.hexlify(bytes(self._datastore.getbyteslice(startbyte, endbyte + 1)))
1496        assert b
1497        i = int(b, 16)
1498        final_bits = 8 - ((start + offset + length) % 8)
1499        if final_bits != 8:
1500            i >>= final_bits
1501        i &= (1 << length) - 1
1502        return i
1503
1504    def _getuint(self):
1505        """Return data as an unsigned int."""
1506        return self._readuint(self.len, 0)
1507
1508    def _setint(self, int_, length=None):
1509        """Reset the bitstring to have given signed int interpretation."""
1510        # If no length given, and we've previously been given a length, use it.
1511        if length is None and hasattr(self, 'len') and self.len != 0:
1512            length = self.len
1513        if length is None or length == 0:
1514            raise CreationError("A non-zero length must be specified with an int initialiser.")
1515        if int_ >= (1 << (length - 1)) or int_ < -(1 << (length - 1)):
1516            raise CreationError("{0} is too large a signed integer for a bitstring of length {1}. "
1517                                "The allowed range is [{2}, {3}].", int_, length, -(1 << (length - 1)),
1518                                (1 << (length - 1)) - 1)
1519        if int_ >= 0:
1520            self._setuint(int_, length)
1521            return
1522        # Do the 2's complement thing. Add one, set to minus number, then flip bits.
1523        self._setuint((-int_ - 1) ^ ((1 << length) - 1), length)
1524
1525    def _readint(self, length, start):
1526        """Read bits and interpret as a signed int"""
1527        ui = self._readuint(length, start)
1528        if not ui >> (length - 1):
1529            # Top bit not set, number is positive
1530            return ui
1531        # Top bit is set, so number is negative
1532        tmp = (~(ui - 1)) & ((1 << length) - 1)
1533        return -tmp
1534
1535    def _getint(self):
1536        """Return data as a two's complement signed int."""
1537        return self._readint(self.len, 0)
1538
1539    def _setuintbe(self, uintbe, length=None):
1540        """Set the bitstring to a big-endian unsigned int interpretation."""
1541        if length is not None and length % 8 != 0:
1542            raise CreationError("Big-endian integers must be whole-byte. "
1543                                "Length = {0} bits.", length)
1544        self._setuint(uintbe, length)
1545
1546    def _readuintbe(self, length, start):
1547        """Read bits and interpret as a big-endian unsigned int."""
1548        if length % 8:
1549            raise InterpretError("Big-endian integers must be whole-byte. "
1550                                 "Length = {0} bits.", length)
1551        return self._readuint(length, start)
1552
1553    def _getuintbe(self):
1554        """Return data as a big-endian two's complement unsigned int."""
1555        return self._readuintbe(self.len, 0)
1556
1557    def _setintbe(self, intbe, length=None):
1558        """Set bitstring to a big-endian signed int interpretation."""
1559        if length is not None and length % 8 != 0:
1560            raise CreationError("Big-endian integers must be whole-byte. "
1561                                "Length = {0} bits.", length)
1562        self._setint(intbe, length)
1563
1564    def _readintbe(self, length, start):
1565        """Read bits and interpret as a big-endian signed int."""
1566        if length % 8:
1567            raise InterpretError("Big-endian integers must be whole-byte. "
1568                                 "Length = {0} bits.", length)
1569        return self._readint(length, start)
1570
1571    def _getintbe(self):
1572        """Return data as a big-endian two's complement signed int."""
1573        return self._readintbe(self.len, 0)
1574
1575    def _setuintle(self, uintle, length=None):
1576        if length is not None and length % 8 != 0:
1577            raise CreationError("Little-endian integers must be whole-byte. "
1578                                "Length = {0} bits.", length)
1579        self._setuint(uintle, length)
1580        self._datastore._rawarray = self._datastore._rawarray[::-1]
1581
1582    def _readuintle(self, length, start):
1583        """Read bits and interpret as a little-endian unsigned int."""
1584        if length % 8:
1585            raise InterpretError("Little-endian integers must be whole-byte. "
1586                                 "Length = {0} bits.", length)
1587        assert start + length <= self.len
1588        absolute_pos = start + self._offset
1589        startbyte, offset = divmod(absolute_pos, 8)
1590        val = 0
1591        if not offset:
1592            endbyte = (absolute_pos + length - 1) // 8
1593            chunksize = 4 # for 'L' format
1594            while endbyte - chunksize + 1 >= startbyte:
1595                val <<= 8 * chunksize
1596                val += struct.unpack('<L', bytes(self._datastore.getbyteslice(endbyte + 1 - chunksize, endbyte + 1)))[0]
1597                endbyte -= chunksize
1598            for b in xrange(endbyte, startbyte - 1, -1):
1599                val <<= 8
1600                val += self._datastore.getbyte(b)
1601        else:
1602            data = self._slice(start, start + length)
1603            assert data.len % 8 == 0
1604            data._reversebytes(0, self.len)
1605            for b in bytearray(data.bytes):
1606                val <<= 8
1607                val += b
1608        return val
1609
1610    def _getuintle(self):
1611        return self._readuintle(self.len, 0)
1612
1613    def _setintle(self, intle, length=None):
1614        if length is not None and length % 8 != 0:
1615            raise CreationError("Little-endian integers must be whole-byte. "
1616                                "Length = {0} bits.", length)
1617        self._setint(intle, length)
1618        self._datastore._rawarray = self._datastore._rawarray[::-1]
1619
1620    def _readintle(self, length, start):
1621        """Read bits and interpret as a little-endian signed int."""
1622        ui = self._readuintle(length, start)
1623        if not ui >> (length - 1):
1624            # Top bit not set, number is positive
1625            return ui
1626        # Top bit is set, so number is negative
1627        tmp = (~(ui - 1)) & ((1 << length) - 1)
1628        return -tmp
1629
1630    def _getintle(self):
1631        return self._readintle(self.len, 0)
1632
1633    def _setfloat(self, f, length=None):
1634        # If no length given, and we've previously been given a length, use it.
1635        if length is None and hasattr(self, 'len') and self.len != 0:
1636            length = self.len
1637        if length is None or length == 0:
1638            raise CreationError("A non-zero length must be specified with a "
1639                                "float initialiser.")
1640        if length == 32:
1641            b = struct.pack('>f', f)
1642        elif length == 64:
1643            b = struct.pack('>d', f)
1644        else:
1645            raise CreationError("floats can only be 32 or 64 bits long, "
1646                                "not {0} bits", length)
1647        self._setbytes_unsafe(bytearray(b), length, 0)
1648
1649    def _readfloat(self, length, start):
1650        """Read bits and interpret as a float."""
1651        if not (start + self._offset) % 8:
1652            startbyte = (start + self._offset) // 8
1653            if length == 32:
1654                f, = struct.unpack('>f', bytes(self._datastore.getbyteslice(startbyte, startbyte + 4)))
1655            elif length == 64:
1656                f, = struct.unpack('>d', bytes(self._datastore.getbyteslice(startbyte, startbyte + 8)))
1657        else:
1658            if length == 32:
1659                f, = struct.unpack('>f', self._readbytes(32, start))
1660            elif length == 64:
1661                f, = struct.unpack('>d', self._readbytes(64, start))
1662        try:
1663            return f
1664        except NameError:
1665            raise InterpretError("floats can only be 32 or 64 bits long, not {0} bits", length)
1666
1667    def _getfloat(self):
1668        """Interpret the whole bitstring as a float."""
1669        return self._readfloat(self.len, 0)
1670
1671    def _setfloatle(self, f, length=None):
1672        # If no length given, and we've previously been given a length, use it.
1673        if length is None and hasattr(self, 'len') and self.len != 0:
1674            length = self.len
1675        if length is None or length == 0:
1676            raise CreationError("A non-zero length must be specified with a "
1677                                "float initialiser.")
1678        if length == 32:
1679            b = struct.pack('<f', f)
1680        elif length == 64:
1681            b = struct.pack('<d', f)
1682        else:
1683            raise CreationError("floats can only be 32 or 64 bits long, "
1684                                "not {0} bits", length)
1685        self._setbytes_unsafe(bytearray(b), length, 0)
1686
1687    def _readfloatle(self, length, start):
1688        """Read bits and interpret as a little-endian float."""
1689        startbyte, offset = divmod(start + self._offset, 8)
1690        if not offset:
1691            if length == 32:
1692                f, = struct.unpack('<f', bytes(self._datastore.getbyteslice(startbyte, startbyte + 4)))
1693            elif length == 64:
1694                f, = struct.unpack('<d', bytes(self._datastore.getbyteslice(startbyte, startbyte + 8)))
1695        else:
1696            if length == 32:
1697                f, = struct.unpack('<f', self._readbytes(32, start))
1698            elif length == 64:
1699                f, = struct.unpack('<d', self._readbytes(64, start))
1700        try:
1701            return f
1702        except NameError:
1703            raise InterpretError("floats can only be 32 or 64 bits long, "
1704                                 "not {0} bits", length)
1705
1706    def _getfloatle(self):
1707        """Interpret the whole bitstring as a little-endian float."""
1708        return self._readfloatle(self.len, 0)
1709
1710    def _setue(self, i):
1711        """Initialise bitstring with unsigned exponential-Golomb code for integer i.
1712
1713        Raises CreationError if i < 0.
1714
1715        """
1716        if i < 0:
1717            raise CreationError("Cannot use negative initialiser for unsigned "
1718                                "exponential-Golomb.")
1719        if not i:
1720            self._setbin_unsafe('1')
1721            return
1722        tmp = i + 1
1723        leadingzeros = -1
1724        while tmp > 0:
1725            tmp >>= 1
1726            leadingzeros += 1
1727        remainingpart = i + 1 - (1 << leadingzeros)
1728        binstring = '0' * leadingzeros + '1' + Bits(uint=remainingpart,
1729                                                             length=leadingzeros).bin
1730        self._setbin_unsafe(binstring)
1731
1732    def _readue(self, pos):
1733        """Return interpretation of next bits as unsigned exponential-Golomb code.
1734
1735        Raises ReadError if the end of the bitstring is encountered while
1736        reading the code.
1737
1738        """
1739        oldpos = pos
1740        try:
1741            while not self[pos]:
1742                pos += 1
1743        except IndexError:
1744            raise ReadError("Read off end of bitstring trying to read code.")
1745        leadingzeros = pos - oldpos
1746        codenum = (1 << leadingzeros) - 1
1747        if leadingzeros > 0:
1748            if pos + leadingzeros + 1 > self.len:
1749                raise ReadError("Read off end of bitstring trying to read code.")
1750            codenum += self._readuint(leadingzeros, pos + 1)
1751            pos += leadingzeros + 1
1752        else:
1753            assert codenum == 0
1754            pos += 1
1755        return codenum, pos
1756
1757    def _getue(self):
1758        """Return data as unsigned exponential-Golomb code.
1759
1760        Raises InterpretError if bitstring is not a single exponential-Golomb code.
1761
1762        """
1763        try:
1764            value, newpos = self._readue(0)
1765            if value is None or newpos != self.len:
1766                raise ReadError
1767        except ReadError:
1768            raise InterpretError("Bitstring is not a single exponential-Golomb code.")
1769        return value
1770
1771    def _setse(self, i):
1772        """Initialise bitstring with signed exponential-Golomb code for integer i."""
1773        if i > 0:
1774            u = (i * 2) - 1
1775        else:
1776            u = -2 * i
1777        self._setue(u)
1778
1779    def _getse(self):
1780        """Return data as signed exponential-Golomb code.
1781
1782        Raises InterpretError if bitstring is not a single exponential-Golomb code.
1783
1784        """
1785        try:
1786            value, newpos = self._readse(0)
1787            if value is None or newpos != self.len:
1788                raise ReadError
1789        except ReadError:
1790            raise InterpretError("Bitstring is not a single exponential-Golomb code.")
1791        return value
1792
1793    def _readse(self, pos):
1794        """Return interpretation of next bits as a signed exponential-Golomb code.
1795
1796        Advances position to after the read code.
1797
1798        Raises ReadError if the end of the bitstring is encountered while
1799        reading the code.
1800
1801        """
1802        codenum, pos = self._readue(pos)
1803        m = (codenum + 1) // 2
1804        if not codenum % 2:
1805            return -m, pos
1806        else:
1807            return m, pos
1808
1809    def _setuie(self, i):
1810        """Initialise bitstring with unsigned interleaved exponential-Golomb code for integer i.
1811
1812        Raises CreationError if i < 0.
1813
1814        """
1815        if i < 0:
1816            raise CreationError("Cannot use negative initialiser for unsigned "
1817                                "interleaved exponential-Golomb.")
1818        self._setbin_unsafe('1' if i == 0 else '0' + '0'.join(bin(i + 1)[3:]) + '1')
1819
1820    def _readuie(self, pos):
1821        """Return interpretation of next bits as unsigned interleaved exponential-Golomb code.
1822
1823        Raises ReadError if the end of the bitstring is encountered while
1824        reading the code.
1825
1826        """
1827        try:
1828            codenum = 1
1829            while not self[pos]:
1830                pos += 1
1831                codenum <<= 1
1832                codenum += self[pos]
1833                pos += 1
1834            pos += 1
1835        except IndexError:
1836            raise ReadError("Read off end of bitstring trying to read code.")
1837        codenum -= 1
1838        return codenum, pos
1839
1840    def _getuie(self):
1841        """Return data as unsigned interleaved exponential-Golomb code.
1842
1843        Raises InterpretError if bitstring is not a single exponential-Golomb code.
1844
1845        """
1846        try:
1847            value, newpos = self._readuie(0)
1848            if value is None or newpos != self.len:
1849                raise ReadError
1850        except ReadError:
1851            raise InterpretError("Bitstring is not a single interleaved exponential-Golomb code.")
1852        return value
1853
1854    def _setsie(self, i):
1855        """Initialise bitstring with signed interleaved exponential-Golomb code for integer i."""
1856        if not i:
1857            self._setbin_unsafe('1')
1858        else:
1859            self._setuie(abs(i))
1860            self._addright(Bits([i < 0]))
1861
1862    def _getsie(self):
1863        """Return data as signed interleaved exponential-Golomb code.
1864
1865        Raises InterpretError if bitstring is not a single exponential-Golomb code.
1866
1867        """
1868        try:
1869            value, newpos = self._readsie(0)
1870            if value is None or newpos != self.len:
1871                raise ReadError
1872        except ReadError:
1873            raise InterpretError("Bitstring is not a single interleaved exponential-Golomb code.")
1874        return value
1875
1876    def _readsie(self, pos):
1877        """Return interpretation of next bits as a signed interleaved exponential-Golomb code.
1878
1879        Advances position to after the read code.
1880
1881        Raises ReadError if the end of the bitstring is encountered while
1882        reading the code.
1883
1884        """
1885        codenum, pos = self._readuie(pos)
1886        if not codenum:
1887            return 0, pos
1888        try:
1889            if self[pos]:
1890                return -codenum, pos + 1
1891            else:
1892                return codenum, pos + 1
1893        except IndexError:
1894            raise ReadError("Read off end of bitstring trying to read code.")
1895
1896    def _setbool(self, value):
1897        # We deliberately don't want to have implicit conversions to bool here.
1898        # If we did then it would be difficult to deal with the 'False' string.
1899        if value in (1, 'True'):
1900            self._setbytes_unsafe(bytearray(b'\x80'), 1, 0)
1901        elif value in (0, 'False'):
1902            self._setbytes_unsafe(bytearray(b'\x00'), 1, 0)
1903        else:
1904            raise CreationError('Cannot initialise boolean with {0}.', value)
1905
1906    def _getbool(self):
1907        if self.length != 1:
1908            msg = "For a bool interpretation a bitstring must be 1 bit long, not {0} bits."
1909            raise InterpretError(msg, self.length)
1910        return self[0]
1911
1912    def _readbool(self, pos):
1913        return self[pos], pos + 1
1914
1915    def _setbin_safe(self, binstring):
1916        """Reset the bitstring to the value given in binstring."""
1917        binstring = tidy_input_string(binstring)
1918        # remove any 0b if present
1919        binstring = binstring.replace('0b', '')
1920        self._setbin_unsafe(binstring)
1921
1922    def _setbin_unsafe(self, binstring):
1923        """Same as _setbin_safe, but input isn't sanity checked. binstring mustn't start with '0b'."""
1924        length = len(binstring)
1925        # pad with zeros up to byte boundary if needed
1926        boundary = ((length + 7) // 8) * 8
1927        padded_binstring = binstring + '0' * (boundary - length)\
1928                           if len(binstring) < boundary else binstring
1929        try:
1930            bytelist = [int(padded_binstring[x:x + 8], 2)
1931                        for x in xrange(0, len(padded_binstring), 8)]
1932        except ValueError:
1933            raise CreationError("Invalid character in bin initialiser {0}.", binstring)
1934        self._setbytes_unsafe(bytearray(bytelist), length, 0)
1935
1936    def _readbin(self, length, start):
1937        """Read bits and interpret as a binary string."""
1938        if not length:
1939            return ''
1940        # Get the byte slice containing our bit slice
1941        startbyte, startoffset = divmod(start + self._offset, 8)
1942        endbyte = (start + self._offset + length - 1) // 8
1943        b = self._datastore.getbyteslice(startbyte, endbyte + 1)
1944        # Convert to a string of '0' and '1's (via a hex string an and int!)
1945        c = "{:0{}b}".format(int(binascii.hexlify(b), 16), 8*len(b))
1946        # Finally chop off any extra bits.
1947        return c[startoffset:startoffset + length]
1948
1949    def _getbin(self):
1950        """Return interpretation as a binary string."""
1951        return self._readbin(self.len, 0)
1952
1953    def _setoct(self, octstring):
1954        """Reset the bitstring to have the value given in octstring."""
1955        octstring = tidy_input_string(octstring)
1956        # remove any 0o if present
1957        octstring = octstring.replace('0o', '')
1958        binlist = []
1959        for i in octstring:
1960            try:
1961                binlist.append(OCT_TO_BITS[int(i)])
1962            except (ValueError, IndexError):
1963                raise CreationError("Invalid symbol '{0}' in oct initialiser.", i)
1964
1965        self._setbin_unsafe(''.join(binlist))
1966
1967    def _readoct(self, length, start):
1968        """Read bits and interpret as an octal string."""
1969        if length % 3:
1970            raise InterpretError("Cannot convert to octal unambiguously - "
1971                                 "not multiple of 3 bits.")
1972        if not length:
1973            return ''
1974        # Get main octal bit by converting from int.
1975        # Strip starting 0 or 0o depending on Python version.
1976        end = oct(self._readuint(length, start))[LEADING_OCT_CHARS:]
1977        if end.endswith('L'):
1978            end = end[:-1]
1979        middle = '0' * (length // 3 - len(end))
1980        return middle + end
1981
1982    def _getoct(self):
1983        """Return interpretation as an octal string."""
1984        return self._readoct(self.len, 0)
1985
1986    def _sethex(self, hexstring):
1987        """Reset the bitstring to have the value given in hexstring."""
1988        hexstring = tidy_input_string(hexstring)
1989        # remove any 0x if present
1990        hexstring = hexstring.replace('0x', '')
1991        length = len(hexstring)
1992        if length % 2:
1993            hexstring += '0'
1994        try:
1995            data = bytearray.fromhex(hexstring)
1996        except ValueError:
1997            raise CreationError("Invalid symbol in hex initialiser.")
1998        self._setbytes_unsafe(data, length * 4, 0)
1999
2000    def _readhex(self, length, start):
2001        """Read bits and interpret as a hex string."""
2002        if length % 4:
2003            raise InterpretError("Cannot convert to hex unambiguously - "
2004                                           "not multiple of 4 bits.")
2005        if not length:
2006            return ''
2007        s = self._slice(start, start + length).tobytes()
2008        try:
2009            s = s.hex() # Available in Python 3.5+
2010        except AttributeError:
2011            # This monstrosity is the only thing I could get to work for both 2.6 and 3.1.
2012            s = str(binascii.hexlify(s).decode('utf-8'))
2013        # If there's one nibble too many then cut it off
2014        return s[:-1] if (length // 4) % 2 else s
2015
2016    def _gethex(self):
2017        """Return the hexadecimal representation as a string prefixed with '0x'.
2018
2019        Raises an InterpretError if the bitstring's length is not a multiple of 4.
2020
2021        """
2022        return self._readhex(self.len, 0)
2023
2024    def _getoffset(self):
2025        return self._datastore.offset
2026
2027    def _getlength(self):
2028        """Return the length of the bitstring in bits."""
2029        return self._datastore.bitlength
2030
2031    def _ensureinmemory(self):
2032        """Ensure the data is held in memory, not in a file."""
2033        self._setbytes_unsafe(self._datastore.getbyteslice(0, self._datastore.bytelength),
2034                              self.len, self._offset)
2035
2036    @classmethod
2037    def _converttobitstring(cls, bs, offset=0, cache={}):
2038        """Convert bs to a bitstring and return it.
2039
2040        offset gives the suggested bit offset of first significant
2041        bit, to optimise append etc.
2042
2043        """
2044        if isinstance(bs, Bits):
2045            return bs
2046        try:
2047            return cache[(bs, offset)]
2048        except KeyError:
2049            if isinstance(bs, basestring):
2050                b = cls()
2051                try:
2052                    _, tokens = tokenparser(bs)
2053                except ValueError as e:
2054                    raise CreationError(*e.args)
2055                if tokens:
2056                    b._addright(Bits._init_with_token(*tokens[0]))
2057                    b._datastore = offsetcopy(b._datastore, offset)
2058                    for token in tokens[1:]:
2059                        b._addright(Bits._init_with_token(*token))
2060                assert b._assertsanity()
2061                assert b.len == 0 or b._offset == offset
2062                if len(cache) < CACHE_SIZE:
2063                    cache[(bs, offset)] = b
2064                return b
2065        except TypeError:
2066            # Unhashable type
2067            pass
2068        return cls(bs)
2069
2070    def _copy(self):
2071        """Create and return a new copy of the Bits (always in memory)."""
2072        s_copy = self.__class__()
2073        s_copy._setbytes_unsafe(self._datastore.getbyteslice(0, self._datastore.bytelength),
2074                                self.len, self._offset)
2075        return s_copy
2076
2077    def _slice_lsb0(self, start, end):
2078        """Used internally to get a slice, without error checking (LSB0)."""
2079        return self._slice_msb0(self.length - end, self.length - start)
2080
2081    def _slice_msb0(self, start, end):
2082        """Used internally to get a slice, without error checking."""
2083        if end == start:
2084            return self.__class__()
2085        assert start < end, "start={0}, end={1}".format(start, end)
2086        offset = self._offset
2087        startbyte, newoffset = divmod(start + offset, 8)
2088        endbyte = (end + offset - 1) // 8
2089        bs = self.__class__()
2090        bs._setbytes_unsafe(self._datastore.getbyteslice(startbyte, endbyte + 1), end - start, newoffset)
2091        return bs
2092
2093    def _readtoken(self, name, pos, length):
2094        """Reads a token from the bitstring and returns the result."""
2095        if length is not None and int(length) > self.length - pos:
2096            raise ReadError("Reading off the end of the data. "
2097                            "Tried to read {0} bits when only {1} available.".format(int(length), self.length - pos))
2098        try:
2099            val = name_to_read[name](self, length, pos)
2100            return val, pos + length
2101        except KeyError:
2102            if name == 'pad':
2103                return None, pos + length
2104            raise ValueError("Can't parse token {0}:{1}".format(name, length))
2105        except TypeError:
2106            # This is for the 'ue', 'se' and 'bool' tokens. They will also return the new pos.
2107            return name_to_read[name](self, pos)
2108
2109    def _addright(self, bs):
2110        """Add a bitstring to the RHS of the current bitstring."""
2111        self._datastore._appendstore(bs._datastore)
2112
2113    def _addleft(self, bs):
2114        """Prepend a bitstring to the current bitstring."""
2115        self._datastore._prependstore(bs._datastore)
2116
2117    def _reverse(self):
2118        """Reverse all bits in-place."""
2119        # Reverse the contents of each byte
2120        n = [BYTE_REVERSAL_DICT[b] for b in self._datastore.rawbytes]
2121        # Then reverse the order of the bytes
2122        n.reverse()
2123        # The new offset is the number of bits that were unused at the end.
2124        newoffset = 8 - (self._offset + self.len) % 8
2125        if newoffset == 8:
2126            newoffset = 0
2127        self._setbytes_unsafe(bytearray().join(n), self.length, newoffset)
2128
2129    def _truncateleft(self, bits):
2130        """Truncate bits from the start of the bitstring."""
2131        assert 0 <= bits <= self.len
2132        if not bits:
2133            return Bits()
2134        truncated_bits = self._slice_msb0(0, bits)
2135        if bits == self.len:
2136            self._clear()
2137            return truncated_bits
2138        bytepos, offset = divmod(self._offset + bits, 8)
2139        self._setbytes_unsafe(self._datastore.getbyteslice(bytepos, self._datastore.bytelength), self.len - bits,
2140                              offset)
2141        assert self._assertsanity()
2142        return truncated_bits
2143
2144    def _truncateright(self, bits):
2145        """Truncate bits from the end of the bitstring."""
2146        assert 0 <= bits <= self.len
2147        if not bits:
2148            return Bits()
2149        truncated_bits = self._slice_lsb0(0, bits)
2150        if bits == self.len:
2151            self._clear()
2152            return truncated_bits
2153        newlength_in_bytes = (self._offset + self.len - bits + 7) // 8
2154        self._setbytes_unsafe(self._datastore.getbyteslice(0, newlength_in_bytes), self.len - bits,
2155                              self._offset)
2156        assert self._assertsanity()
2157        return truncated_bits
2158
2159    def _insert_lsb0(self, bs, pos):
2160        """Insert bs at pos (LSB0)."""
2161        self._insert_msb0(bs, self.len - pos)
2162
2163    def _insert_msb0(self, bs, pos):
2164        """Insert bs at pos."""
2165        assert 0 <= pos <= self.len
2166        if pos > self.len // 2:
2167            # Inserting nearer end, so cut off end.
2168            # end = self._slice(pos, self.len)
2169            end = self._truncateright(self.len - pos)
2170            self._addright(bs)
2171            self._addright(end)
2172        else:
2173            # Inserting nearer start, so cut off start.
2174            start = self._slice(0, pos)
2175            self._truncateleft(pos)
2176            self._addleft(bs)
2177            self._addleft(start)
2178        try:
2179            self._pos = pos + bs.len
2180        except AttributeError:
2181            pass
2182        assert self._assertsanity()
2183
2184    def _overwrite_lsb0(self, bs, pos):
2185        """Overwrite with bs at pos (LSB0)."""
2186        self._overwrite_msb0(bs, self.len - pos - bs.len)
2187
2188    def _overwrite_msb0(self, bs, pos):
2189        """Overwrite with bs at pos."""
2190        assert 0 <= pos < self.len
2191        if bs is self:
2192            # Just overwriting with self, so do nothing.
2193            assert pos == 0
2194            return
2195        firstbytepos = (self._offset + pos) // 8
2196        lastbytepos = (self._offset + pos + bs.len - 1) // 8
2197        bytepos, bitoffset = divmod(self._offset + pos, 8)
2198        if firstbytepos == lastbytepos:
2199            mask = ((1 << bs.len) - 1) << (8 - bs.len - bitoffset)
2200            self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) & (~mask))
2201            d = offsetcopy(bs._datastore, bitoffset)
2202            self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) | (d.getbyte(0) & mask))
2203        else:
2204            # Do first byte
2205            mask = (1 << (8 - bitoffset)) - 1
2206            self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) & (~mask))
2207            d = offsetcopy(bs._datastore, bitoffset)
2208            self._datastore.setbyte(bytepos, self._datastore.getbyte(bytepos) | (d.getbyte(0) & mask))
2209            # Now do all the full bytes
2210            self._datastore.setbyteslice(firstbytepos + 1, lastbytepos, d.getbyteslice(1, lastbytepos - firstbytepos))
2211            # and finally the last byte
2212            bitsleft = (self._offset + pos + bs.len) % 8
2213            if not bitsleft:
2214                bitsleft = 8
2215            mask = (1 << (8 - bitsleft)) - 1
2216            self._datastore.setbyte(lastbytepos, self._datastore.getbyte(lastbytepos) & mask)
2217            self._datastore.setbyte(lastbytepos,
2218                                    self._datastore.getbyte(lastbytepos) | (d.getbyte(d.bytelength - 1) & ~mask))
2219        assert self._assertsanity()
2220
2221    def _delete_lsb0(self, bits, pos):
2222        """Delete bits at pos (LSB0)."""
2223        self._delete_msb0(bits, self.len - pos - bits)
2224
2225    def _delete_msb0(self, bits, pos):
2226        """Delete bits at pos."""
2227        assert 0 <= pos <= self.len
2228        assert pos + bits <= self.len, "pos={}, bits={}, len={}".format(pos, bits, self.len)
2229        if not pos:
2230            # Cutting bits off at the start.
2231            self._truncateleft(bits)
2232            return
2233        if pos + bits == self.len:
2234            # Cutting bits off at the end.
2235            self._truncateright(bits)
2236            return
2237        if pos > self.len - pos - bits:
2238            # More bits before cut point than after it, so do bit shifting
2239            # on the final bits.
2240            end = self._slice_msb0(pos + bits, self.len)
2241            assert self.len - pos > 0
2242            self._truncateright(self.len - pos)
2243            self._addright(end)
2244            return
2245        # More bits after the cut point than before it.
2246        start = self._slice_msb0(0, pos)
2247        self._truncateleft(pos + bits)
2248        self._addleft(start)
2249        return
2250
2251    def _reversebytes(self, start, end):
2252        """Reverse bytes in-place."""
2253        # Make the start occur on a byte boundary
2254        # TODO: We could be cleverer here to avoid changing the offset.
2255        newoffset = 8 - (start % 8)
2256        if newoffset == 8:
2257            newoffset = 0
2258        self._datastore = offsetcopy(self._datastore, newoffset)
2259        # Now just reverse the byte data
2260        toreverse = bytearray(self._datastore.getbyteslice((newoffset + start) // 8, (newoffset + end) // 8))
2261        toreverse.reverse()
2262        self._datastore.setbyteslice((newoffset + start) // 8, (newoffset + end) // 8, toreverse)
2263
2264    def _set(self, pos):
2265        """Set bit at pos to 1."""
2266        assert 0 <= pos < self.len
2267        self._datastore.setbit(pos)
2268
2269    def _unset(self, pos):
2270        """Set bit at pos to 0."""
2271        assert 0 <= pos < self.len
2272        self._datastore.unsetbit(pos)
2273
2274    def _invert(self, pos):
2275        """Flip bit at pos 1<->0."""
2276        assert 0 <= pos < self.len
2277        self._datastore.invertbit(pos)
2278
2279    def _invert_all(self):
2280        """Invert every bit."""
2281        for p in xrange(self._datastore.byteoffset, self._datastore.byteoffset + self._datastore.bytelength):
2282            self._datastore._rawarray[p] = 256 + ~self._datastore._rawarray[p]
2283
2284    def _ilshift(self, n):
2285        """Shift bits by n to the left in place. Return self."""
2286        assert 0 < n <= self.len
2287        self._addright(Bits(n))
2288        self._truncateleft(n)
2289        return self
2290
2291    def _irshift(self, n):
2292        """Shift bits by n to the right in place. Return self."""
2293        assert 0 < n <= self.len
2294        self._addleft(Bits(n))
2295        self._truncateright(n)
2296        return self
2297
2298    def _imul(self, n):
2299        """Concatenate n copies of self in place. Return self."""
2300        assert n >= 0
2301        if not n:
2302            self._clear()
2303            return self
2304        m = 1
2305        old_len = self.len
2306        while m * 2 < n:
2307            self._addright(self)
2308            m *= 2
2309        self._addright(self[0:(n - m) * old_len])
2310        return self
2311
2312    def _inplace_logical_helper(self, bs, f):
2313        """Helper function containing most of the __ior__, __iand__, __ixor__ code."""
2314        # Give the two bitstrings the same offset (modulo 8)
2315        self_byteoffset, self_bitoffset = divmod(self._offset, 8)
2316        bs_byteoffset, bs_bitoffset = divmod(bs._offset, 8)
2317        if bs_bitoffset != self_bitoffset:
2318            if not self_bitoffset:
2319                bs._datastore = offsetcopy(bs._datastore, 0)
2320            else:
2321                self._datastore = offsetcopy(self._datastore, bs_bitoffset)
2322        a = self._datastore.rawbytes
2323        b = bs._datastore.rawbytes
2324        for i in xrange(len(a)):
2325            a[i] = f(a[i + self_byteoffset], b[i + bs_byteoffset])
2326        return self
2327
2328    def _ior(self, bs):
2329        return self._inplace_logical_helper(bs, operator.ior)
2330
2331    def _iand(self, bs):
2332        return self._inplace_logical_helper(bs, operator.iand)
2333
2334    def _ixor(self, bs):
2335        return self._inplace_logical_helper(bs, operator.xor)
2336
2337    def _readbits(self, length, start):
2338        """Read some bits from the bitstring and return newly constructed bitstring."""
2339        return self._slice(start, start + length)
2340
2341    def _validate_slice_msb0(self, start, end):
2342        """Validate start and end and return them as positive bit positions."""
2343        if start is None:
2344            start = 0
2345        elif start < 0:
2346            start += self.len
2347        if end is None:
2348            end = self.len
2349        elif end < 0:
2350            end += self.len
2351        if not 0 <= end <= self.len:
2352            raise ValueError("end is not a valid position in the bitstring.")
2353        if not 0 <= start <= self.len:
2354            raise ValueError("start is not a valid position in the bitstring.")
2355        if end < start:
2356            raise ValueError("end must not be less than start.")
2357        return start, end
2358
2359    def _validate_slice_lsb0(self, start, end):
2360        start, end = self._validate_slice_msb0(start, end)
2361        return self.len - end, self.len - start
2362
2363    def unpack(self, fmt, **kwargs):
2364        """Interpret the whole bitstring using fmt and return list.
2365
2366        fmt -- A single string or a list of strings with comma separated tokens
2367               describing how to interpret the bits in the bitstring. Items
2368               can also be integers, for reading new bitstring of the given length.
2369        kwargs -- A dictionary or keyword-value pairs - the keywords used in the
2370                  format string will be replaced with their given value.
2371
2372        Raises ValueError if the format is not understood. If not enough bits
2373        are available then all bits to the end of the bitstring will be used.
2374
2375        See the docstring for 'read' for token examples.
2376
2377        """
2378        return self._readlist(fmt, 0, **kwargs)[0]
2379
2380    def _readlist(self, fmt, pos, **kwargs):
2381        tokens = []
2382        stretchy_token = None
2383        if isinstance(fmt, basestring):
2384            fmt = [fmt]
2385        # Replace integers with 'bits' tokens
2386        for i, f in enumerate(fmt):
2387            if isinstance(f, numbers.Integral):
2388                fmt[i] = "bits:{0}".format(f)
2389        for f_item in fmt:
2390            stretchy, tkns = tokenparser(f_item, tuple(sorted(kwargs.keys())))
2391            if stretchy:
2392                if stretchy_token:
2393                    raise Error("It's not possible to have more than one 'filler' token.")
2394                stretchy_token = stretchy
2395            tokens.extend(tkns)
2396        if not stretchy_token:
2397            lst = []
2398            for name, length, _ in tokens:
2399                if length in kwargs:
2400                    length = kwargs[length]
2401                    if name == 'bytes':
2402                        length *= 8
2403                if name in kwargs and length is None:
2404                    # Using default 'uint' - the name is really the length.
2405                    value, pos = self._readtoken('uint', pos, kwargs[name])
2406                    lst.append(value)
2407                    continue
2408                value, pos = self._readtoken(name, pos, length)
2409                if value is not None: # Don't append pad tokens
2410                    lst.append(value)
2411            return lst, pos
2412        stretchy_token = False
2413        bits_after_stretchy_token = 0
2414        for token in tokens:
2415            name, length, _ = token
2416            if length in kwargs:
2417                length = kwargs[length]
2418                if name == 'bytes':
2419                    length *= 8
2420            if name in kwargs and length is None:
2421                # Default 'uint'.
2422                length = kwargs[name]
2423            if stretchy_token:
2424                if name in ('se', 'ue', 'sie', 'uie'):
2425                    raise Error("It's not possible to parse a variable"
2426                                "length token after a 'filler' token.")
2427                else:
2428                    if length is None:
2429                        raise Error("It's not possible to have more than "
2430                                    "one 'filler' token.")
2431                    bits_after_stretchy_token += length
2432            if length is None and name not in ('se', 'ue', 'sie', 'uie'):
2433                assert not stretchy_token
2434                stretchy_token = token
2435        bits_left = self.len - pos
2436        return_values = []
2437        for token in tokens:
2438            name, length, _ = token
2439            if token is stretchy_token:
2440                # Set length to the remaining bits
2441                length = max(bits_left - bits_after_stretchy_token, 0)
2442            if length in kwargs:
2443                length = kwargs[length]
2444                if name == 'bytes':
2445                    length *= 8
2446            if name in kwargs and length is None:
2447                # Default 'uint'
2448                length = kwargs[name]
2449            if length is not None:
2450                bits_left -= length
2451            value, pos = self._readtoken(name, pos, length)
2452            if value is not None:
2453                return_values.append(value)
2454        return return_values, pos
2455
2456    def _findbytes(self, bytes_, start, end, bytealigned):
2457        """Quicker version of find when everything's whole byte
2458        and byte aligned.
2459
2460        """
2461        assert self._datastore.offset == 0
2462        assert bytealigned is True
2463        # Extract data bytes from bitstring to be found.
2464        bytepos = (start + 7) // 8
2465        found = False
2466        p = bytepos
2467        finalpos = end // 8
2468        increment = max(1024, len(bytes_) * 10)
2469        buffersize = increment + len(bytes_)
2470        while p < finalpos:
2471            # Read in file or from memory in overlapping chunks and search the chunks.
2472            buf = bytearray(self._datastore.getbyteslice(p, min(p + buffersize, finalpos)))
2473            pos = buf.find(bytes_)
2474            if pos != -1:
2475                found = True
2476                p += pos
2477                break
2478            p += increment
2479        if not found:
2480            return ()
2481        return (p * 8,)
2482
2483    def _findregex(self, reg_ex, start, end, bytealigned):
2484        """Find first occurrence of a compiled regular expression.
2485
2486        Note that this doesn't support arbitrary regexes, in particular they
2487        must match a known length.
2488
2489        """
2490        p = start
2491        length = len(reg_ex.pattern)
2492        # We grab overlapping chunks of the binary representation and
2493        # do an ordinary string search within that.
2494        increment = max(4096, length * 10)
2495        buffersize = increment + length
2496        while p < end:
2497            buf = self._readbin(min(buffersize, end - p), p)
2498            # Test using regular expressions...
2499            m = reg_ex.search(buf)
2500            if m:
2501                pos = m.start()
2502            # pos = buf.find(targetbin)
2503            # if pos != -1:
2504                # if bytealigned then we only accept byte aligned positions.
2505                if not bytealigned or (p + pos) % 8 == 0:
2506                    return (p + pos,)
2507                if bytealigned:
2508                    # Advance to just beyond the non-byte-aligned match and try again...
2509                    p += pos + 1
2510                    continue
2511            p += increment
2512            # Not found, return empty tuple
2513        return ()
2514
2515    def find(self, bs, start=None, end=None, bytealigned=None):
2516        """Find first occurrence of substring bs.
2517
2518        Returns a single item tuple with the bit position if found, or an
2519        empty tuple if not found. The bit position (pos property) will
2520        also be set to the start of the substring if it is found.
2521
2522        bs -- The bitstring to find.
2523        start -- The bit position to start the search. Defaults to 0.
2524        end -- The bit position one past the last bit to search.
2525               Defaults to self.len.
2526        bytealigned -- If True the bitstring will only be
2527                       found on byte boundaries.
2528
2529        Raises ValueError if bs is empty, if start < 0, if end > self.len or
2530        if end < start.
2531
2532        >>> BitArray('0xc3e').find('0b1111')
2533        (6,)
2534
2535        """
2536        return self._find(bs, start, end, bytealigned)
2537
2538    def _find_lsb0(self, bs, start=None, end=None, bytealigned=None):
2539        bs = Bits(bs)
2540        start, end = self._validate_slice_lsb0(start, end)
2541        p = self.rfind(bs, start, end, bytealigned)
2542        if p:
2543            return (self.len - p[0] - bs.length,)
2544
2545    def _find_msb0(self, bs, start=None, end=None, bytealigned=None):
2546        bs = Bits(bs)
2547        if not bs.len:
2548            raise ValueError("Cannot find an empty bitstring.")
2549        start, end = self._validate_slice(start, end)
2550        if bytealigned is None:
2551            bytealigned = globals()['bytealigned']
2552        if bytealigned and not bs.len % 8 and not self._datastore.offset:
2553            p = self._findbytes(bs.bytes, start, end, bytealigned)
2554        else:
2555            p = self._findregex(re.compile(bs._getbin()), start, end, bytealigned)
2556        # If called from a class that has a pos, set it
2557        try:
2558            self._pos = p[0]
2559        except (AttributeError, IndexError):
2560            pass
2561        return p
2562
2563    def findall(self, bs, start=None, end=None, count=None, bytealigned=None):
2564        """Find all occurrences of bs. Return generator of bit positions.
2565
2566        bs -- The bitstring to find.
2567        start -- The bit position to start the search. Defaults to 0.
2568        end -- The bit position one past the last bit to search.
2569               Defaults to self.len.
2570        count -- The maximum number of occurrences to find.
2571        bytealigned -- If True the bitstring will only be found on
2572                       byte boundaries.
2573
2574        Raises ValueError if bs is empty, if start < 0, if end > self.len or
2575        if end < start.
2576
2577        Note that all occurrences of bs are found, even if they overlap.
2578
2579        """
2580        if count is not None and count < 0:
2581            raise ValueError("In findall, count must be >= 0.")
2582        bs = Bits(bs)
2583        start, end = self._validate_slice(start, end)
2584        if bytealigned is None:
2585            bytealigned = globals()['bytealigned']
2586        c = 0
2587        if bytealigned and not bs.len % 8 and not self._datastore.offset:
2588            # Use the quick find method
2589            f = self._findbytes
2590            x = bs._getbytes()
2591        else:
2592            f = self._findregex
2593            x = re.compile(bs._getbin())
2594        while True:
2595
2596            p = f(x, start, end, bytealigned)
2597            if not p:
2598                break
2599            if count is not None and c >= count:
2600                return
2601            c += 1
2602            try:
2603                self._pos = p[0]
2604            except AttributeError:
2605                pass
2606            yield p[0]
2607            if bytealigned:
2608                start = p[0] + 8
2609            else:
2610                start = p[0] + 1
2611            if start >= end:
2612                break
2613        return
2614
2615    def rfind(self, bs, start=None, end=None, bytealigned=None):
2616        """Find final occurrence of substring bs.
2617
2618        Returns a single item tuple with the bit position if found, or an
2619        empty tuple if not found. The bit position (pos property) will
2620        also be set to the start of the substring if it is found.
2621
2622        bs -- The bitstring to find.
2623        start -- The bit position to end the reverse search. Defaults to 0.
2624        end -- The bit position one past the first bit to reverse search.
2625               Defaults to self.len.
2626        bytealigned -- If True the bitstring will only be found on byte
2627                       boundaries.
2628
2629        Raises ValueError if bs is empty, if start < 0, if end > self.len or
2630        if end < start.
2631
2632        """
2633        bs = Bits(bs)
2634        start, end = self._validate_slice(start, end)
2635        if bytealigned is None:
2636            bytealigned = globals()['bytealigned']
2637        if not bs.len:
2638            raise ValueError("Cannot find an empty bitstring.")
2639        # Search chunks starting near the end and then moving back
2640        # until we find bs.
2641        increment = max(8192, bs.len * 80)
2642        buffersize = min(increment + bs.len, end - start)
2643        pos = max(start, end - buffersize)
2644        while True:
2645            found = list(self.findall(bs, start=pos, end=pos + buffersize,
2646                                      bytealigned=bytealigned))
2647            if not found:
2648                if pos == start:
2649                    return ()
2650                pos = max(start, pos - increment)
2651                continue
2652            return (found[-1],)
2653
2654    def cut(self, bits, start=None, end=None, count=None):
2655        """Return bitstring generator by cutting into bits sized chunks.
2656
2657        bits -- The size in bits of the bitstring chunks to generate.
2658        start -- The bit position to start the first cut. Defaults to 0.
2659        end -- The bit position one past the last bit to use in the cut.
2660               Defaults to self.len.
2661        count -- If specified then at most count items are generated.
2662                 Default is to cut as many times as possible.
2663
2664        """
2665        start, end = self._validate_slice(start, end)
2666        if count is not None and count < 0:
2667            raise ValueError("Cannot cut - count must be >= 0.")
2668        if bits <= 0:
2669            raise ValueError("Cannot cut - bits must be >= 0.")
2670        c = 0
2671        while count is None or c < count:
2672            c += 1
2673            nextchunk = self._slice(start, min(start + bits, end))
2674            if nextchunk.len != bits:
2675                return
2676            assert nextchunk._assertsanity()
2677            yield nextchunk
2678            start += bits
2679        return
2680
2681    def split(self, delimiter, start=None, end=None, count=None,
2682              bytealigned=None):
2683        """Return bitstring generator by splittling using a delimiter.
2684
2685        The first item returned is the initial bitstring before the delimiter,
2686        which may be an empty bitstring.
2687
2688        delimiter -- The bitstring used as the divider.
2689        start -- The bit position to start the split. Defaults to 0.
2690        end -- The bit position one past the last bit to use in the split.
2691               Defaults to self.len.
2692        count -- If specified then at most count items are generated.
2693                 Default is to split as many times as possible.
2694        bytealigned -- If True splits will only occur on byte boundaries.
2695
2696        Raises ValueError if the delimiter is empty.
2697
2698        """
2699        delimiter = Bits(delimiter)
2700        if not delimiter.len:
2701            raise ValueError("split delimiter cannot be empty.")
2702        start, end = self._validate_slice(start, end)
2703        if bytealigned is None:
2704            bytealigned = globals()['bytealigned']
2705        if count is not None and count < 0:
2706            raise ValueError("Cannot split - count must be >= 0.")
2707        if count == 0:
2708            return
2709        if bytealigned and not delimiter.len % 8 and not self._datastore.offset:
2710            # Use the quick find method
2711            f = self._findbytes
2712            x = delimiter._getbytes()
2713        else:
2714            f = self._findregex
2715            x = re.compile(delimiter._getbin())
2716        found = f(x, start, end, bytealigned)
2717        if not found:
2718            # Initial bits are the whole bitstring being searched
2719            yield self._slice(start, end)
2720            return
2721        # yield the bytes before the first occurrence of the delimiter, even if empty
2722        yield self._slice(start, found[0])
2723        startpos = pos = found[0]
2724        c = 1
2725        while count is None or c < count:
2726            pos += delimiter.len
2727            found = f(x, pos, end, bytealigned)
2728            if not found:
2729                # No more occurrences, so return the rest of the bitstring
2730                yield self._slice(startpos, end)
2731                return
2732            c += 1
2733            yield self._slice(startpos, found[0])
2734            startpos = pos = found[0]
2735        # Have generated count bitstrings, so time to quit.
2736        return
2737
2738    def join(self, sequence):
2739        """Return concatenation of bitstrings joined by self.
2740
2741        sequence -- A sequence of bitstrings.
2742
2743        """
2744        s = self.__class__()
2745        i = iter(sequence)
2746        try:
2747            s._addright(Bits(next(i)))
2748            while True:
2749                n = next(i)
2750                s._addright(self)
2751                s._addright(Bits(n))
2752        except StopIteration:
2753            pass
2754        return s
2755
2756    def tobytes(self):
2757        """Return the bitstring as bytes, padding with zero bits if needed.
2758
2759        Up to seven zero bits will be added at the end to byte align.
2760
2761        """
2762        d = offsetcopy(self._datastore, 0).rawbytes
2763        # Need to ensure that unused bits at end are set to zero
2764        unusedbits = 8 - self.len % 8
2765        if unusedbits != 8:
2766            d[-1] &= (0xff << unusedbits)
2767        return bytes(d)
2768
2769    def tofile(self, f):
2770        """Write the bitstring to a file object, padding with zero bits if needed.
2771
2772        Up to seven zero bits will be added at the end to byte align.
2773
2774        """
2775        # If the bitstring is file based then we don't want to read it all
2776        # in to memory.
2777        chunksize = 1024 * 1024  # 1 MiB chunks
2778        if not self._offset:
2779            a = 0
2780            bytelen = self._datastore.bytelength
2781            p = self._datastore.getbyteslice(a, min(a + chunksize, bytelen - 1))
2782            while len(p) == chunksize:
2783                f.write(p)
2784                a += chunksize
2785                p = self._datastore.getbyteslice(a, min(a + chunksize, bytelen - 1))
2786            f.write(p)
2787            # Now the final byte, ensuring that unused bits at end are set to 0.
2788            bits_in_final_byte = self.len % 8
2789            if not bits_in_final_byte:
2790                bits_in_final_byte = 8
2791            f.write(self[-bits_in_final_byte:].tobytes())
2792        else:
2793            # Really quite inefficient...
2794            a = 0
2795            b = a + chunksize * 8
2796            while b <= self.len:
2797                f.write(self._slice(a, b)._getbytes())
2798                a += chunksize * 8
2799                b += chunksize * 8
2800            if a != self.len:
2801                f.write(self._slice(a, self.len).tobytes())
2802
2803    def startswith(self, prefix, start=None, end=None):
2804        """Return whether the current bitstring starts with prefix.
2805
2806        prefix -- The bitstring to search for.
2807        start -- The bit position to start from. Defaults to 0.
2808        end -- The bit position to end at. Defaults to self.len.
2809
2810        """
2811        prefix = Bits(prefix)
2812        start, end = self._validate_slice_msb0(start, end)  # the _slice deals with msb0/lsb0
2813        if end < start + prefix.len:
2814            return False
2815        end = start + prefix.len
2816        return self._slice(start, end) == prefix
2817
2818    def endswith(self, suffix, start=None, end=None):
2819        """Return whether the current bitstring ends with suffix.
2820
2821        suffix -- The bitstring to search for.
2822        start -- The bit position to start from. Defaults to 0.
2823        end -- The bit position to end at. Defaults to self.len.
2824
2825        """
2826        suffix = Bits(suffix)
2827        start, end = self._validate_slice(start, end)
2828        if start + suffix.len > end:
2829            return False
2830        start = end - suffix.len
2831        return self._slice(start, end) == suffix
2832
2833    def all(self, value, pos=None):
2834        """Return True if one or many bits are all set to value.
2835
2836        value -- If value is True then checks for bits set to 1, otherwise
2837                 checks for bits set to 0.
2838        pos -- An iterable of bit positions. Negative numbers are treated in
2839               the same way as slice indices. Defaults to the whole bitstring.
2840
2841        """
2842        value = bool(value)
2843        length = self.len
2844        if pos is None:
2845            pos = xrange(self.len)
2846        for p in pos:
2847            if p < 0:
2848                p += length
2849            if not 0 <= p < length:
2850                raise IndexError("Bit position {0} out of range.".format(p))
2851            if not self._datastore.getbit(p) is value:
2852                return False
2853        return True
2854
2855    def any(self, value, pos=None):
2856        """Return True if any of one or many bits are set to value.
2857
2858        value -- If value is True then checks for bits set to 1, otherwise
2859                 checks for bits set to 0.
2860        pos -- An iterable of bit positions. Negative numbers are treated in
2861               the same way as slice indices. Defaults to the whole bitstring.
2862
2863        """
2864        value = bool(value)
2865        length = self.len
2866        if pos is None:
2867            pos = xrange(self.len)
2868        for p in pos:
2869            if p < 0:
2870                p += length
2871            if not 0 <= p < length:
2872                raise IndexError("Bit position {0} out of range.".format(p))
2873            if self._datastore.getbit(p) is value:
2874                return True
2875        return False
2876
2877    def count(self, value):
2878        """Return count of total number of either zero or one bits.
2879
2880        value -- If True then bits set to 1 are counted, otherwise bits set
2881                 to 0 are counted.
2882
2883        >>> Bits('0xef').count(1)
2884        7
2885
2886        """
2887        if not self.len:
2888            return 0
2889        # count the number of 1s (from which it's easy to work out the 0s).
2890        # Don't count the final byte yet.
2891        count = sum(BIT_COUNT[self._datastore.getbyte(i)] for i in xrange(self._datastore.bytelength - 1))
2892        # adjust for bits at start that aren't part of the bitstring
2893        if self._offset:
2894            count -= BIT_COUNT[self._datastore.getbyte(0) >> (8 - self._offset)]
2895        # and count the last 1 - 8 bits at the end.
2896        endbits = self._datastore.bytelength * 8 - (self._offset + self.len)
2897        count += BIT_COUNT[self._datastore.getbyte(self._datastore.bytelength - 1) >> endbits]
2898        return count if value else self.len - count
2899
2900    # Create native-endian functions as aliases depending on the byteorder
2901    if byteorder == 'little':
2902        _setfloatne = _setfloatle
2903        _readfloatne = _readfloatle
2904        _getfloatne = _getfloatle
2905        _setuintne = _setuintle
2906        _readuintne = _readuintle
2907        _getuintne = _getuintle
2908        _setintne = _setintle
2909        _readintne = _readintle
2910        _getintne = _getintle
2911    else:
2912        _setfloatne = _setfloat
2913        _readfloatne = _readfloat
2914        _getfloatne = _getfloat
2915        _setuintne = _setuintbe
2916        _readuintne = _readuintbe
2917        _getuintne = _getuintbe
2918        _setintne = _setintbe
2919        _readintne = _readintbe
2920        _getintne = _getintbe
2921
2922    _offset = property(_getoffset)
2923
2924    len = property(_getlength,
2925                   doc="""The length of the bitstring in bits. Read only.
2926                      """)
2927    length = property(_getlength,
2928                      doc="""The length of the bitstring in bits. Read only.
2929                      """)
2930    bool = property(_getbool,
2931                    doc="""The bitstring as a bool (True or False). Read only.
2932                    """)
2933    hex = property(_gethex,
2934                   doc="""The bitstring as a hexadecimal string. Read only.
2935                   """)
2936    bin = property(_getbin,
2937                   doc="""The bitstring as a binary string. Read only.
2938                   """)
2939    oct = property(_getoct,
2940                   doc="""The bitstring as an octal string. Read only.
2941                   """)
2942    bytes = property(_getbytes,
2943                     doc="""The bitstring as a bytes object. Read only.
2944                      """)
2945    int = property(_getint,
2946                   doc="""The bitstring as a two's complement signed int. Read only.
2947                      """)
2948    uint = property(_getuint,
2949                    doc="""The bitstring as a two's complement unsigned int. Read only.
2950                      """)
2951    float = property(_getfloat,
2952                     doc="""The bitstring as a floating point number. Read only.
2953                      """)
2954    intbe = property(_getintbe,
2955                     doc="""The bitstring as a two's complement big-endian signed int. Read only.
2956                      """)
2957    uintbe = property(_getuintbe,
2958                      doc="""The bitstring as a two's complement big-endian unsigned int. Read only.
2959                      """)
2960    floatbe = property(_getfloat,
2961                       doc="""The bitstring as a big-endian floating point number. Read only.
2962                      """)
2963    intle = property(_getintle,
2964                     doc="""The bitstring as a two's complement little-endian signed int. Read only.
2965                      """)
2966    uintle = property(_getuintle,
2967                      doc="""The bitstring as a two's complement little-endian unsigned int. Read only.
2968                      """)
2969    floatle = property(_getfloatle,
2970                       doc="""The bitstring as a little-endian floating point number. Read only.
2971                      """)
2972    intne = property(_getintne,
2973                     doc="""The bitstring as a two's complement native-endian signed int. Read only.
2974                      """)
2975    uintne = property(_getuintne,
2976                      doc="""The bitstring as a two's complement native-endian unsigned int. Read only.
2977                      """)
2978    floatne = property(_getfloatne,
2979                       doc="""The bitstring as a native-endian floating point number. Read only.
2980                      """)
2981    ue = property(_getue,
2982                  doc="""The bitstring as an unsigned exponential-Golomb code. Read only.
2983                      """)
2984    se = property(_getse,
2985                  doc="""The bitstring as a signed exponential-Golomb code. Read only.
2986                      """)
2987    uie = property(_getuie,
2988                   doc="""The bitstring as an unsigned interleaved exponential-Golomb code. Read only.
2989                      """)
2990    sie = property(_getsie,
2991                   doc="""The bitstring as a signed interleaved exponential-Golomb code. Read only.
2992                      """)
2993
2994
2995
2996
2997
2998class BitArray(Bits):
2999    """A container holding a mutable sequence of bits.
3000
3001    Subclass of the immutable Bits class. Inherits all of its
3002    methods (except __hash__) and adds mutating methods.
3003
3004    Mutating methods:
3005
3006    append() -- Append a bitstring.
3007    byteswap() -- Change byte endianness in-place.
3008    insert() -- Insert a bitstring.
3009    invert() -- Flip bit(s) between one and zero.
3010    overwrite() -- Overwrite a section with a new bitstring.
3011    prepend() -- Prepend a bitstring.
3012    replace() -- Replace occurrences of one bitstring with another.
3013    reverse() -- Reverse bits in-place.
3014    rol() -- Rotate bits to the left.
3015    ror() -- Rotate bits to the right.
3016    set() -- Set bit(s) to 1 or 0.
3017
3018    Methods inherited from Bits:
3019
3020    all() -- Check if all specified bits are set to 1 or 0.
3021    any() -- Check if any of specified bits are set to 1 or 0.
3022    count() -- Count the number of bits set to 1 or 0.
3023    cut() -- Create generator of constant sized chunks.
3024    endswith() -- Return whether the bitstring ends with a sub-string.
3025    find() -- Find a sub-bitstring in the current bitstring.
3026    findall() -- Find all occurrences of a sub-bitstring in the current bitstring.
3027    join() -- Join bitstrings together using current bitstring.
3028    rfind() -- Seek backwards to find a sub-bitstring.
3029    split() -- Create generator of chunks split by a delimiter.
3030    startswith() -- Return whether the bitstring starts with a sub-bitstring.
3031    tobytes() -- Return bitstring as bytes, padding if needed.
3032    tofile() -- Write bitstring to file, padding if needed.
3033    unpack() -- Interpret bits using format string.
3034
3035    Special methods:
3036
3037    Mutating operators are available: [], <<=, >>=, +=, *=, &=, |= and ^=
3038    in addition to the inherited [], ==, !=, +, *, ~, <<, >>, &, | and ^.
3039
3040    Properties:
3041
3042    bin -- The bitstring as a binary string.
3043    bool -- For single bit bitstrings, interpret as True or False.
3044    bytepos -- The current byte position in the bitstring.
3045    bytes -- The bitstring as a bytes object.
3046    float -- Interpret as a floating point number.
3047    floatbe -- Interpret as a big-endian floating point number.
3048    floatle -- Interpret as a little-endian floating point number.
3049    floatne -- Interpret as a native-endian floating point number.
3050    hex -- The bitstring as a hexadecimal string.
3051    int -- Interpret as a two's complement signed integer.
3052    intbe -- Interpret as a big-endian signed integer.
3053    intle -- Interpret as a little-endian signed integer.
3054    intne -- Interpret as a native-endian signed integer.
3055    len -- Length of the bitstring in bits.
3056    oct -- The bitstring as an octal string.
3057    pos -- The current bit position in the bitstring.
3058    se -- Interpret as a signed exponential-Golomb code.
3059    ue -- Interpret as an unsigned exponential-Golomb code.
3060    sie -- Interpret as a signed interleaved exponential-Golomb code.
3061    uie -- Interpret as an unsigned interleaved exponential-Golomb code.
3062    uint -- Interpret as a two's complement unsigned integer.
3063    uintbe -- Interpret as a big-endian unsigned integer.
3064    uintle -- Interpret as a little-endian unsigned integer.
3065    uintne -- Interpret as a native-endian unsigned integer.
3066
3067    """
3068
3069    __slots__ = ()
3070
3071    # As BitArray objects are mutable, we shouldn't allow them to be hashed.
3072    __hash__ = None
3073
3074    def __init__(self, auto=None, length=None, offset=None, **kwargs):
3075        """Either specify an 'auto' initialiser:
3076        auto -- a string of comma separated tokens, an integer, a file object,
3077                a bytearray, a boolean iterable or another bitstring.
3078
3079        Or initialise via **kwargs with one (and only one) of:
3080        bytes -- raw data as a string, for example read from a binary file.
3081        bin -- binary string representation, e.g. '0b001010'.
3082        hex -- hexadecimal string representation, e.g. '0x2ef'
3083        oct -- octal string representation, e.g. '0o777'.
3084        uint -- an unsigned integer.
3085        int -- a signed integer.
3086        float -- a floating point number.
3087        uintbe -- an unsigned big-endian whole byte integer.
3088        intbe -- a signed big-endian whole byte integer.
3089        floatbe - a big-endian floating point number.
3090        uintle -- an unsigned little-endian whole byte integer.
3091        intle -- a signed little-endian whole byte integer.
3092        floatle -- a little-endian floating point number.
3093        uintne -- an unsigned native-endian whole byte integer.
3094        intne -- a signed native-endian whole byte integer.
3095        floatne -- a native-endian floating point number.
3096        se -- a signed exponential-Golomb code.
3097        ue -- an unsigned exponential-Golomb code.
3098        sie -- a signed interleaved exponential-Golomb code.
3099        uie -- an unsigned interleaved exponential-Golomb code.
3100        bool -- a boolean (True or False).
3101        filename -- a file which will be opened in binary read-only mode.
3102
3103        Other keyword arguments:
3104        length -- length of the bitstring in bits, if needed and appropriate.
3105                  It must be supplied for all integer and float initialisers.
3106        offset -- bit offset to the data. These offset bits are
3107                  ignored and this is intended for use when
3108                  initialising using 'bytes' or 'filename'.
3109
3110        """
3111        # For mutable BitArrays we always read in files to memory:
3112        if not isinstance(self._datastore, ByteStore):
3113            self._ensureinmemory()
3114
3115    def __new__(cls, auto=None, length=None, offset=None, **kwargs):
3116        x = super(BitArray, cls).__new__(cls)
3117        y = Bits.__new__(BitArray, auto, length, offset, **kwargs)
3118        x._datastore = ByteStore(y._datastore._rawarray[:],
3119                                          y._datastore.bitlength,
3120                                          y._datastore.offset)
3121        return x
3122
3123    def __iadd__(self, bs):
3124        """Append bs to current bitstring. Return self.
3125
3126        bs -- the bitstring to append.
3127
3128        """
3129        self._append(bs)
3130        return self
3131
3132    def __copy__(self):
3133        """Return a new copy of the BitArray."""
3134        s_copy = BitArray()
3135        if not isinstance(self._datastore, ByteStore):
3136            # Let them both point to the same (invariant) array.
3137            # If either gets modified then at that point they'll be read into memory.
3138            s_copy._datastore = self._datastore
3139        else:
3140            s_copy._datastore = copy.copy(self._datastore)
3141        return s_copy
3142
3143    def __setitem__(self, key, value):
3144        try:
3145            # A slice
3146            start, step = 0, 1
3147            if key.step is not None:
3148                step = key.step
3149        except AttributeError:
3150            # single element
3151            if key < 0:
3152                key += self.len
3153            if not 0 <= key < self.len:
3154                raise IndexError("Slice index out of range.")
3155            if isinstance(value, numbers.Integral):
3156                if not value:
3157                    self._unset(key)
3158                    return
3159                if value in (1, -1):
3160                    self._set(key)
3161                    return
3162                raise ValueError("Cannot set a single bit with integer {0}.".format(value))
3163            value = Bits(value)
3164            if value.len == 1:
3165                if value[0]:
3166                    self._set(key)
3167                else:
3168                    self._unset(key)
3169            else:
3170                self._delete(1, key)
3171                self._insert(value, key)
3172            return
3173        else:
3174            if step != 1:
3175                # convert to binary string and use string slicing
3176                # TODO: Horribly inefficient
3177                temp = list(self._getbin())
3178                v = list(Bits(value)._getbin())
3179                temp.__setitem__(key, v)
3180                self._setbin_unsafe(''.join(temp))
3181                return
3182
3183            # If value is an integer then we want to set the slice to that
3184            # value rather than initialise a new bitstring of that length.
3185            if not isinstance(value, numbers.Integral):
3186                try:
3187                    value = Bits(value)
3188                except TypeError:
3189                    raise TypeError("Bitstring, integer or string expected. "
3190                                    "Got {0}.".format(type(value)))
3191            if key.start is not None:
3192                start = key.start
3193                if key.start < 0:
3194                    start += self.len
3195                if start < 0:
3196                    start = 0
3197            stop = self.len
3198            if key.stop is not None:
3199                stop = key.stop
3200                if key.stop < 0:
3201                    stop += self.len
3202            if start > stop:
3203                # The standard behaviour for lists is to just insert at the
3204                # start position if stop < start and step == 1.
3205                stop = start
3206            if isinstance(value, numbers.Integral):
3207                if value >= 0:
3208                    value = self.__class__(uint=value, length=stop - start)
3209                else:
3210                    value = self.__class__(int=value, length=stop - start)
3211            stop = min(stop, self.len)
3212            start = max(start, 0)
3213            start = min(start, stop)
3214            if (stop - start) == value.len:
3215                if not value.len:
3216                    return
3217                if step >= 0:
3218                    self._overwrite(value, start)
3219                else:
3220                    self._overwrite(value.__getitem__(slice(None, None, 1)), start)
3221            else:
3222                # TODO: A delete then insert is wasteful - it could do unneeded shifts.
3223                # Could be either overwrite + insert or overwrite + delete.
3224                self._delete(stop - start, start)
3225                if step >= 0:
3226                    self._insert(value, start)
3227                else:
3228                    self._insert(value.__getitem__(slice(None, None, 1)), start)
3229                # pos is now after the inserted piece.
3230            return
3231
3232    def __delitem__(self, key):
3233        """Delete item or range.
3234
3235        Indices are in units of the step parameter (default 1 bit).
3236        Stepping is used to specify the number of bits in each item.
3237
3238        >>> a = BitArray('0x001122')
3239        >>> del a[1:2:8]
3240        >>> print a
3241        0x0022
3242
3243        """
3244        try:
3245            # A slice
3246            start = 0
3247            step = key.step if key.step is not None else 1
3248        except AttributeError:
3249            # single element
3250            if key < 0:
3251                key += self.len
3252            if not 0 <= key < self.len:
3253                raise IndexError("Slice index out of range.")
3254            self._delete(1, key)
3255            return
3256        else:
3257            if step != 1:
3258                # convert to binary string and use string slicing
3259                # TODO: Horribly inefficient
3260                temp = list(self._getbin())
3261                temp.__delitem__(key)
3262                self._setbin_unsafe(''.join(temp))
3263                return
3264            if key.start is not None:
3265                start = key.start
3266                if key.start < 0:
3267                    start += self.len
3268                if start < 0:
3269                    start = 0
3270            stop = self.len
3271            if key.stop is not None:
3272                stop = key.stop
3273                if key.stop < 0:
3274                    stop += self.len
3275            if start > stop:
3276                return
3277            stop = min(stop, self.len)
3278            start = max(start, 0)
3279            start = min(start, stop)
3280            self._delete(stop - start, start)
3281            return
3282
3283    def __ilshift__(self, n):
3284        """Shift bits by n to the left in place. Return self.
3285
3286        n -- the number of bits to shift. Must be >= 0.
3287
3288        """
3289        if n < 0:
3290            raise ValueError("Cannot shift by a negative amount.")
3291        if not self.len:
3292            raise ValueError("Cannot shift an empty bitstring.")
3293        if not n:
3294            return self
3295        n = min(n, self.len)
3296        return self._ilshift(n)
3297
3298    def __irshift__(self, n):
3299        """Shift bits by n to the right in place. Return self.
3300
3301        n -- the number of bits to shift. Must be >= 0.
3302
3303        """
3304        if n < 0:
3305            raise ValueError("Cannot shift by a negative amount.")
3306        if not self.len:
3307            raise ValueError("Cannot shift an empty bitstring.")
3308        if not n:
3309            return self
3310        n = min(n, self.len)
3311        return self._irshift(n)
3312
3313    def __imul__(self, n):
3314        """Concatenate n copies of self in place. Return self.
3315
3316        Called for expressions of the form 'a *= 3'.
3317        n -- The number of concatenations. Must be >= 0.
3318
3319        """
3320        if n < 0:
3321            raise ValueError("Cannot multiply by a negative integer.")
3322        return self._imul(n)
3323
3324    def __ior__(self, bs):
3325        bs = Bits(bs)
3326        if self.len != bs.len:
3327            raise ValueError("Bitstrings must have the same length "
3328                             "for |= operator.")
3329        return self._ior(bs)
3330
3331    def __iand__(self, bs):
3332        bs = Bits(bs)
3333        if self.len != bs.len:
3334            raise ValueError("Bitstrings must have the same length "
3335                             "for &= operator.")
3336        return self._iand(bs)
3337
3338    def __ixor__(self, bs):
3339        bs = Bits(bs)
3340        if self.len != bs.len:
3341            raise ValueError("Bitstrings must have the same length "
3342                             "for ^= operator.")
3343        return self._ixor(bs)
3344
3345    def replace(self, old, new, start=None, end=None, count=None,
3346                bytealigned=None):
3347        """Replace all occurrences of old with new in place.
3348
3349        Returns number of replacements made.
3350
3351        old -- The bitstring to replace.
3352        new -- The replacement bitstring.
3353        start -- Any occurrences that start before this will not be replaced.
3354                 Defaults to 0.
3355        end -- Any occurrences that finish after this will not be replaced.
3356               Defaults to self.len.
3357        count -- The maximum number of replacements to make. Defaults to
3358                 replace all occurrences.
3359        bytealigned -- If True replacements will only be made on byte
3360                       boundaries.
3361
3362        Raises ValueError if old is empty or if start or end are
3363        out of range.
3364
3365        """
3366        old = Bits(old)
3367        new = Bits(new)
3368        if not old.len:
3369            raise ValueError("Empty bitstring cannot be replaced.")
3370        start, end = self._validate_slice(start, end)
3371        if bytealigned is None:
3372            bytealigned = globals()['bytealigned']
3373        # Adjust count for use in split()
3374        if count is not None:
3375            count += 1
3376        sections = self.split(old, start, end, count, bytealigned)
3377        lengths = [s.len for s in sections]
3378        if len(lengths) == 1:
3379            # Didn't find anything to replace.
3380            return 0  # no replacements done
3381        if new is self:
3382            # Prevent self assignment woes
3383            new = copy.copy(self)
3384        positions = [lengths[0] + start]
3385        for l in lengths[1:-1]:
3386            # Next position is the previous one plus the length of the next section.
3387            positions.append(positions[-1] + l)
3388        # We have all the positions that need replacements. We do them
3389        # in reverse order so that they won't move around as we replace.
3390        positions.reverse()
3391        try:
3392            # Need to calculate new pos, if this is a bitstream
3393            newpos = self._pos
3394            for p in positions:
3395                self[p:p + old.len] = new
3396            if old.len != new.len:
3397                diff = new.len - old.len
3398                for p in positions:
3399                    if p >= newpos:
3400                        continue
3401                    if p + old.len <= newpos:
3402                        newpos += diff
3403                    else:
3404                        newpos = p
3405            self._pos = newpos
3406        except AttributeError:
3407            for p in positions:
3408                self[p:p + old.len] = new
3409        assert self._assertsanity()
3410        return len(lengths) - 1
3411
3412    def insert(self, bs, pos=None):
3413        """Insert bs at bit position pos.
3414
3415        bs -- The bitstring to insert.
3416        pos -- The bit position to insert at.
3417
3418        Raises ValueError if pos < 0 or pos > self.len.
3419
3420        """
3421        bs = Bits(bs)
3422        if not bs.len:
3423            return self
3424        if bs is self:
3425            bs = self.__copy__()
3426        if pos is None:
3427            try:
3428                pos = self._pos
3429            except AttributeError:
3430                raise TypeError("insert needs a bit position specified when used on a BitArray.")
3431        if pos < 0:
3432            pos += self.len
3433        if not 0 <= pos <= self.len:
3434            raise ValueError("Invalid insert position.")
3435        self._insert(bs, pos)
3436
3437    def overwrite(self, bs, pos=None):
3438        """Overwrite with bs at bit position pos.
3439
3440        bs -- The bitstring to overwrite with.
3441        pos -- The bit position to begin overwriting from.
3442
3443        Raises ValueError if pos < 0 or pos + bs.len > self.len
3444
3445        """
3446        bs = Bits(bs)
3447        if not bs.len:
3448            return
3449        if pos is None:
3450            try:
3451                pos = self._pos
3452            except AttributeError:
3453                raise TypeError("overwrite needs a bit position specified when used on a BitArray.")
3454        if pos < 0:
3455            pos += self.len
3456        if pos < 0 or pos + bs.len > self.len:
3457            raise ValueError("Overwrite exceeds boundary of bitstring.")
3458        self._overwrite(bs, pos)
3459        try:
3460            self._pos = pos + bs.len
3461        except AttributeError:
3462            pass
3463
3464    def append(self, bs):
3465        """Append a bitstring to the current bitstring.
3466
3467        bs -- The bitstring to append.
3468
3469        """
3470        self._append(bs)
3471
3472    def prepend(self, bs):
3473        """Prepend a bitstring to the current bitstring.
3474
3475        bs -- The bitstring to prepend.
3476
3477        """
3478        self._prepend(bs)
3479
3480    def _append_msb0(self, bs):
3481        # The offset is a hint to make bs easily appendable.
3482        bs = self._converttobitstring(bs, offset=(self.len + self._offset) % 8)
3483        self._addright(bs)
3484
3485    def _append_lsb0(self, bs):
3486        bs = Bits(bs)
3487        self._addleft(bs)
3488
3489    def reverse(self, start=None, end=None):
3490        """Reverse bits in-place.
3491
3492        start -- Position of first bit to reverse. Defaults to 0.
3493        end -- One past the position of the last bit to reverse.
3494               Defaults to self.len.
3495
3496        Using on an empty bitstring will have no effect.
3497
3498        Raises ValueError if start < 0, end > self.len or end < start.
3499
3500        """
3501        start, end = self._validate_slice(start, end)
3502        if start == 0 and end == self.len:
3503            self._reverse()
3504            return
3505        s = self._slice(start, end)
3506        s._reverse()
3507        self[start:end] = s
3508
3509    def set(self, value, pos=None):
3510        """Set one or many bits to 1 or 0.
3511
3512        value -- If True bits are set to 1, otherwise they are set to 0.
3513        pos -- Either a single bit position or an iterable of bit positions.
3514               Negative numbers are treated in the same way as slice indices.
3515               Defaults to the entire bitstring.
3516
3517        Raises IndexError if pos < -self.len or pos >= self.len.
3518
3519        """
3520        f = self._set if value else self._unset
3521        if pos is None:
3522            pos = xrange(self.len)
3523        try:
3524            length = self.len
3525            for p in pos:
3526                if p < 0:
3527                    p += length
3528                if not 0 <= p < length:
3529                    raise IndexError("Bit position {0} out of range.".format(p))
3530                f(p)
3531        except TypeError:
3532            # Single pos
3533            if pos < 0:
3534                pos += self.len
3535            if not 0 <= pos < length:
3536                raise IndexError("Bit position {0} out of range.".format(pos))
3537            f(pos)
3538
3539    def invert(self, pos=None):
3540        """Invert one or many bits from 0 to 1 or vice versa.
3541
3542        pos -- Either a single bit position or an iterable of bit positions.
3543               Negative numbers are treated in the same way as slice indices.
3544
3545        Raises IndexError if pos < -self.len or pos >= self.len.
3546
3547        """
3548        if pos is None:
3549            self._invert_all()
3550            return
3551        if not isinstance(pos, collectionsAbc.Iterable):
3552            pos = (pos,)
3553        length = self.len
3554
3555        for p in pos:
3556            if p < 0:
3557                p += length
3558            if not 0 <= p < length:
3559                raise IndexError("Bit position {0} out of range.".format(p))
3560            self._invert(p)
3561
3562    def ror(self, bits, start=None, end=None):
3563        """Rotate bits to the right in-place.
3564
3565        bits -- The number of bits to rotate by.
3566        start -- Start of slice to rotate. Defaults to 0.
3567        end -- End of slice to rotate. Defaults to self.len.
3568
3569        Raises ValueError if bits < 0.
3570
3571        """
3572        if not self.len:
3573            raise Error("Cannot rotate an empty bitstring.")
3574        if bits < 0:
3575            raise ValueError("Cannot rotate by negative amount.")
3576        self._ror(bits, start, end)
3577
3578    def _ror_msb0(self, bits, start=None, end=None):
3579        start, end = self._validate_slice_msb0(start, end)  # the _slice deals with msb0/lsb0
3580        bits %= (end - start)
3581        if not bits:
3582            return
3583        rhs = self._slice(end - bits, end)
3584        self._delete(bits, end - bits)
3585        self._insert(rhs, start)
3586
3587    def rol(self, bits, start=None, end=None):
3588        """Rotate bits to the left in-place.
3589
3590        bits -- The number of bits to rotate by.
3591        start -- Start of slice to rotate. Defaults to 0.
3592        end -- End of slice to rotate. Defaults to self.len.
3593
3594        Raises ValueError if bits < 0.
3595
3596        """
3597        if not self.len:
3598            raise Error("Cannot rotate an empty bitstring.")
3599        if bits < 0:
3600            raise ValueError("Cannot rotate by negative amount.")
3601        self._rol(bits, start, end)
3602
3603    def _rol_msb0(self, bits, start=None, end=None):
3604        start, end = self._validate_slice_msb0(start, end)
3605        bits %= (end - start)
3606        if not bits:
3607            return
3608        lhs = self._slice(start, start + bits)
3609        self._delete(bits, start)
3610        self._insert(lhs, end - bits)
3611
3612    def byteswap(self, fmt=None, start=None, end=None, repeat=True):
3613        """Change the endianness in-place. Return number of repeats of fmt done.
3614
3615        fmt -- A compact structure string, an integer number of bytes or
3616               an iterable of integers. Defaults to 0, which byte reverses the
3617               whole bitstring.
3618        start -- Start bit position, defaults to 0.
3619        end -- End bit position, defaults to self.len.
3620        repeat -- If True (the default) the byte swapping pattern is repeated
3621                  as much as possible.
3622
3623        """
3624        start, end = self._validate_slice(start, end)
3625        if fmt is None or fmt == 0:
3626            # reverse all of the whole bytes.
3627            bytesizes = [(end - start) // 8]
3628        elif isinstance(fmt, numbers.Integral):
3629            if fmt < 0:
3630                raise ValueError("Improper byte length {0}.".format(fmt))
3631            bytesizes = [fmt]
3632        elif isinstance(fmt, basestring):
3633            m = STRUCT_PACK_RE.match(fmt)
3634            if not m:
3635                raise ValueError("Cannot parse format string {0}.".format(fmt))
3636            # Split the format string into a list of 'q', '4h' etc.
3637            formatlist = re.findall(STRUCT_SPLIT_RE, m.group('fmt'))
3638            # Now deal with multiplicative factors, 4h -> hhhh etc.
3639            bytesizes = []
3640            for f in formatlist:
3641                if len(f) == 1:
3642                    bytesizes.append(PACK_CODE_SIZE[f])
3643                else:
3644                    bytesizes.extend([PACK_CODE_SIZE[f[-1]]] * int(f[:-1]))
3645        elif isinstance(fmt, collectionsAbc.Iterable):
3646            bytesizes = fmt
3647            for bytesize in bytesizes:
3648                if not isinstance(bytesize, numbers.Integral) or bytesize < 0:
3649                    raise ValueError("Improper byte length {0}.".format(bytesize))
3650        else:
3651            raise TypeError("Format must be an integer, string or iterable.")
3652
3653        repeats = 0
3654        totalbitsize = 8 * sum(bytesizes)
3655        if not totalbitsize:
3656            return 0
3657        if repeat:
3658            # Try to repeat up to the end of the bitstring.
3659            finalbit = end
3660        else:
3661            # Just try one (set of) byteswap(s).
3662            finalbit = start + totalbitsize
3663        for patternend in xrange(start + totalbitsize, finalbit + 1, totalbitsize):
3664            bytestart = patternend - totalbitsize
3665            for bytesize in bytesizes:
3666                byteend = bytestart + bytesize * 8
3667                self._reversebytes(bytestart, byteend)
3668                bytestart += bytesize * 8
3669            repeats += 1
3670        return repeats
3671
3672    def clear(self):
3673        """Remove all bits, reset to zero length."""
3674        self._clear()
3675
3676    def copy(self):
3677        """Return a copy of the bitstring."""
3678        return self._copy()
3679
3680    int = property(Bits._getint, Bits._setint,
3681                   doc="""The bitstring as a two's complement signed int. Read and write.
3682                      """)
3683    uint = property(Bits._getuint, Bits._setuint,
3684                    doc="""The bitstring as a two's complement unsigned int. Read and write.
3685                      """)
3686    float = property(Bits._getfloat, Bits._setfloat,
3687                     doc="""The bitstring as a floating point number. Read and write.
3688                      """)
3689    intbe = property(Bits._getintbe, Bits._setintbe,
3690                     doc="""The bitstring as a two's complement big-endian signed int. Read and write.
3691                      """)
3692    uintbe = property(Bits._getuintbe, Bits._setuintbe,
3693                      doc="""The bitstring as a two's complement big-endian unsigned int. Read and write.
3694                      """)
3695    floatbe = property(Bits._getfloat, Bits._setfloat,
3696                       doc="""The bitstring as a big-endian floating point number. Read and write.
3697                      """)
3698    intle = property(Bits._getintle, Bits._setintle,
3699                     doc="""The bitstring as a two's complement little-endian signed int. Read and write.
3700                      """)
3701    uintle = property(Bits._getuintle, Bits._setuintle,
3702                      doc="""The bitstring as a two's complement little-endian unsigned int. Read and write.
3703                      """)
3704    floatle = property(Bits._getfloatle, Bits._setfloatle,
3705                       doc="""The bitstring as a little-endian floating point number. Read and write.
3706                      """)
3707    intne = property(Bits._getintne, Bits._setintne,
3708                     doc="""The bitstring as a two's complement native-endian signed int. Read and write.
3709                      """)
3710    uintne = property(Bits._getuintne, Bits._setuintne,
3711                      doc="""The bitstring as a two's complement native-endian unsigned int. Read and write.
3712                      """)
3713    floatne = property(Bits._getfloatne, Bits._setfloatne,
3714                       doc="""The bitstring as a native-endian floating point number. Read and write.
3715                      """)
3716    ue = property(Bits._getue, Bits._setue,
3717                  doc="""The bitstring as an unsigned exponential-Golomb code. Read and write.
3718                      """)
3719    se = property(Bits._getse, Bits._setse,
3720                  doc="""The bitstring as a signed exponential-Golomb code. Read and write.
3721                      """)
3722    uie = property(Bits._getuie, Bits._setuie,
3723                  doc="""The bitstring as an unsigned interleaved exponential-Golomb code. Read and write.
3724                      """)
3725    sie = property(Bits._getsie, Bits._setsie,
3726                  doc="""The bitstring as a signed interleaved exponential-Golomb code. Read and write.
3727                      """)
3728    hex = property(Bits._gethex, Bits._sethex,
3729                   doc="""The bitstring as a hexadecimal string. Read and write.
3730                       """)
3731    bin = property(Bits._getbin, Bits._setbin_safe,
3732                   doc="""The bitstring as a binary string. Read and write.
3733                       """)
3734    oct = property(Bits._getoct, Bits._setoct,
3735                   doc="""The bitstring as an octal string. Read and write.
3736                       """)
3737    bool = property(Bits._getbool, Bits._setbool,
3738                    doc="""The bitstring as a bool (True or False). Read and write.
3739                    """)
3740    bytes = property(Bits._getbytes, Bits._setbytes_safe,
3741                     doc="""The bitstring as a ordinary string. Read and write.
3742                      """)
3743
3744
3745
3746class ConstBitStream(Bits):
3747    """A container or stream holding an immutable sequence of bits.
3748
3749    For a mutable container use the BitStream class instead.
3750
3751    Methods inherited from Bits:
3752
3753    all() -- Check if all specified bits are set to 1 or 0.
3754    any() -- Check if any of specified bits are set to 1 or 0.
3755    count() -- Count the number of bits set to 1 or 0.
3756    cut() -- Create generator of constant sized chunks.
3757    endswith() -- Return whether the bitstring ends with a sub-string.
3758    find() -- Find a sub-bitstring in the current bitstring.
3759    findall() -- Find all occurrences of a sub-bitstring in the current bitstring.
3760    join() -- Join bitstrings together using current bitstring.
3761    rfind() -- Seek backwards to find a sub-bitstring.
3762    split() -- Create generator of chunks split by a delimiter.
3763    startswith() -- Return whether the bitstring starts with a sub-bitstring.
3764    tobytes() -- Return bitstring as bytes, padding if needed.
3765    tofile() -- Write bitstring to file, padding if needed.
3766    unpack() -- Interpret bits using format string.
3767
3768    Other methods:
3769
3770    bytealign() -- Align to next byte boundary.
3771    peek() -- Peek at and interpret next bits as a single item.
3772    peeklist() -- Peek at and interpret next bits as a list of items.
3773    read() -- Read and interpret next bits as a single item.
3774    readlist() -- Read and interpret next bits as a list of items.
3775
3776    Special methods:
3777
3778    Also available are the operators [], ==, !=, +, *, ~, <<, >>, &, |, ^.
3779
3780    Properties:
3781
3782    bin -- The bitstring as a binary string.
3783    bool -- For single bit bitstrings, interpret as True or False.
3784    bytepos -- The current byte position in the bitstring.
3785    bytes -- The bitstring as a bytes object.
3786    float -- Interpret as a floating point number.
3787    floatbe -- Interpret as a big-endian floating point number.
3788    floatle -- Interpret as a little-endian floating point number.
3789    floatne -- Interpret as a native-endian floating point number.
3790    hex -- The bitstring as a hexadecimal string.
3791    int -- Interpret as a two's complement signed integer.
3792    intbe -- Interpret as a big-endian signed integer.
3793    intle -- Interpret as a little-endian signed integer.
3794    intne -- Interpret as a native-endian signed integer.
3795    len -- Length of the bitstring in bits.
3796    oct -- The bitstring as an octal string.
3797    pos -- The current bit position in the bitstring.
3798    se -- Interpret as a signed exponential-Golomb code.
3799    ue -- Interpret as an unsigned exponential-Golomb code.
3800    sie -- Interpret as a signed interleaved exponential-Golomb code.
3801    uie -- Interpret as an unsigned interleaved exponential-Golomb code.
3802    uint -- Interpret as a two's complement unsigned integer.
3803    uintbe -- Interpret as a big-endian unsigned integer.
3804    uintle -- Interpret as a little-endian unsigned integer.
3805    uintne -- Interpret as a native-endian unsigned integer.
3806
3807    """
3808
3809    __slots__ = ('_pos',)
3810
3811    def __init__(self, auto=None, length=None, offset=None, pos=0, **kwargs):
3812        """Either specify an 'auto' initialiser:
3813        auto -- a string of comma separated tokens, an integer, a file object,
3814                a bytearray, a boolean iterable or another bitstring.
3815
3816        Or initialise via **kwargs with one (and only one) of:
3817        bytes -- raw data as a string, for example read from a binary file.
3818        bin -- binary string representation, e.g. '0b001010'.
3819        hex -- hexadecimal string representation, e.g. '0x2ef'
3820        oct -- octal string representation, e.g. '0o777'.
3821        uint -- an unsigned integer.
3822        int -- a signed integer.
3823        float -- a floating point number.
3824        uintbe -- an unsigned big-endian whole byte integer.
3825        intbe -- a signed big-endian whole byte integer.
3826        floatbe - a big-endian floating point number.
3827        uintle -- an unsigned little-endian whole byte integer.
3828        intle -- a signed little-endian whole byte integer.
3829        floatle -- a little-endian floating point number.
3830        uintne -- an unsigned native-endian whole byte integer.
3831        intne -- a signed native-endian whole byte integer.
3832        floatne -- a native-endian floating point number.
3833        se -- a signed exponential-Golomb code.
3834        ue -- an unsigned exponential-Golomb code.
3835        sie -- a signed interleaved exponential-Golomb code.
3836        uie -- an unsigned interleaved exponential-Golomb code.
3837        bool -- a boolean (True or False).
3838        filename -- a file which will be opened in binary read-only mode.
3839
3840        Other keyword arguments:
3841        length -- length of the bitstring in bits, if needed and appropriate.
3842                  It must be supplied for all integer and float initialisers.
3843        offset -- bit offset to the data. These offset bits are
3844                  ignored and this is intended for use when
3845                  initialising using 'bytes' or 'filename'.
3846        pos -- Initial bit position, defaults to 0.
3847
3848        """
3849        pass
3850
3851    def __new__(cls, auto=None, length=None, offset=None, pos=0, **kwargs):
3852        x = super(ConstBitStream, cls).__new__(cls)
3853        x._initialise(auto, length, offset, **kwargs)
3854        x._pos = x._datastore.bitlength + pos if pos < 0 else pos
3855        if x._pos < 0 or x._pos > x._datastore.bitlength:
3856            raise CreationError("Cannot set pos to {0} when length is {1}".format(pos, x._datastore.bitlength))
3857        return x
3858
3859    def _setbytepos(self, bytepos):
3860        """Move to absolute byte-aligned position in stream."""
3861        self._setbitpos(bytepos * 8)
3862
3863    def _getbytepos(self):
3864        """Return the current position in the stream in bytes. Must be byte aligned."""
3865        if self._pos % 8:
3866            raise ByteAlignError("Not byte aligned in _getbytepos().")
3867        return self._pos // 8
3868
3869    def _setbitpos(self, pos):
3870        """Move to absolute position bit in bitstream."""
3871        if pos < 0:
3872            raise ValueError("Bit position cannot be negative.")
3873        if pos > self.len:
3874            raise ValueError("Cannot seek past the end of the data.")
3875        self._pos = pos
3876
3877    def _getbitpos(self):
3878        """Return the current position in the stream in bits."""
3879        return self._pos
3880
3881    def _clear(self):
3882        Bits._clear(self)
3883        self._pos = 0
3884
3885    def __copy__(self):
3886        """Return a new copy of the ConstBitStream for the copy module."""
3887        # Note that if you want a new copy (different ID), use _copy instead.
3888        # The copy can use the same datastore as it's immutable.
3889        s = ConstBitStream()
3890        s._datastore = self._datastore
3891        # Reset the bit position, don't copy it.
3892        s._pos = 0
3893        return s
3894
3895    def __add__(self, bs):
3896        """Concatenate bitstrings and return new bitstring.
3897
3898        bs -- the bitstring to append.
3899
3900        """
3901        s = Bits.__add__(self, bs)
3902        s._pos = 0
3903        return s
3904
3905    def read(self, fmt):
3906        """Interpret next bits according to the format string and return result.
3907
3908        fmt -- Token string describing how to interpret the next bits.
3909
3910        Token examples: 'int:12'    : 12 bits as a signed integer
3911                        'uint:8'    : 8 bits as an unsigned integer
3912                        'float:64'  : 8 bytes as a big-endian float
3913                        'intbe:16'  : 2 bytes as a big-endian signed integer
3914                        'uintbe:16' : 2 bytes as a big-endian unsigned integer
3915                        'intle:32'  : 4 bytes as a little-endian signed integer
3916                        'uintle:32' : 4 bytes as a little-endian unsigned integer
3917                        'floatle:64': 8 bytes as a little-endian float
3918                        'intne:24'  : 3 bytes as a native-endian signed integer
3919                        'uintne:24' : 3 bytes as a native-endian unsigned integer
3920                        'floatne:32': 4 bytes as a native-endian float
3921                        'hex:80'    : 80 bits as a hex string
3922                        'oct:9'     : 9 bits as an octal string
3923                        'bin:1'     : single bit binary string
3924                        'ue'        : next bits as unsigned exp-Golomb code
3925                        'se'        : next bits as signed exp-Golomb code
3926                        'uie'       : next bits as unsigned interleaved exp-Golomb code
3927                        'sie'       : next bits as signed interleaved exp-Golomb code
3928                        'bits:5'    : 5 bits as a bitstring
3929                        'bytes:10'  : 10 bytes as a bytes object
3930                        'bool'      : 1 bit as a bool
3931                        'pad:3'     : 3 bits of padding to ignore - returns None
3932
3933        fmt may also be an integer, which will be treated like the 'bits' token.
3934
3935        The position in the bitstring is advanced to after the read items.
3936
3937        Raises ReadError if not enough bits are available.
3938        Raises ValueError if the format is not understood.
3939
3940        """
3941        if isinstance(fmt, numbers.Integral):
3942            if fmt < 0:
3943                raise ValueError("Cannot read negative amount.")
3944            if fmt > self.len - self._pos:
3945                raise ReadError("Cannot read {0} bits, only {1} available.",
3946                                fmt, self.len - self._pos)
3947            bs = self._slice(self._pos, self._pos + fmt)
3948            self._pos += fmt
3949            return bs
3950        p = self._pos
3951        _, token = tokenparser(fmt)
3952        if len(token) != 1:
3953            self._pos = p
3954            raise ValueError("Format string should be a single token, not {0} "
3955                             "tokens - use readlist() instead.".format(len(token)))
3956        name, length, _ = token[0]
3957        if length is None:
3958            length = self.len - self._pos
3959        value, self._pos = self._readtoken(name, self._pos, length)
3960        return value
3961
3962    def readlist(self, fmt, **kwargs):
3963        """Interpret next bits according to format string(s) and return list.
3964
3965        fmt -- A single string or list of strings with comma separated tokens
3966               describing how to interpret the next bits in the bitstring. Items
3967               can also be integers, for reading new bitstring of the given length.
3968        kwargs -- A dictionary or keyword-value pairs - the keywords used in the
3969                  format string will be replaced with their given value.
3970
3971        The position in the bitstring is advanced to after the read items.
3972
3973        Raises ReadError is not enough bits are available.
3974        Raises ValueError if the format is not understood.
3975
3976        See the docstring for 'read' for token examples. 'pad' tokens are skipped
3977        and not added to the returned list.
3978
3979        >>> h, b1, b2 = s.readlist('hex:20, bin:5, bin:3')
3980        >>> i, bs1, bs2 = s.readlist(['uint:12', 10, 10])
3981
3982        """
3983        value, self._pos = self._readlist(fmt, self._pos, **kwargs)
3984        return value
3985
3986    def readto(self, bs, bytealigned=None):
3987        """Read up to and including next occurrence of bs and return result.
3988
3989        bs -- The bitstring to find. An integer is not permitted.
3990        bytealigned -- If True the bitstring will only be
3991                       found on byte boundaries.
3992
3993        Raises ValueError if bs is empty.
3994        Raises ReadError if bs is not found.
3995
3996        """
3997        if isinstance(bs, numbers.Integral):
3998            raise ValueError("Integers cannot be searched for")
3999        bs = Bits(bs)
4000        oldpos = self._pos
4001        p = self.find(bs, self._pos, bytealigned=bytealigned)
4002        if not p:
4003            raise ReadError("Substring not found")
4004        self._pos += bs.len
4005        return self._slice(oldpos, self._pos)
4006
4007    def peek(self, fmt):
4008        """Interpret next bits according to format string and return result.
4009
4010        fmt -- Token string describing how to interpret the next bits.
4011
4012        The position in the bitstring is not changed. If not enough bits are
4013        available then all bits to the end of the bitstring will be used.
4014
4015        Raises ReadError if not enough bits are available.
4016        Raises ValueError if the format is not understood.
4017
4018        See the docstring for 'read' for token examples.
4019
4020        """
4021        pos_before = self._pos
4022        value = self.read(fmt)
4023        self._pos = pos_before
4024        return value
4025
4026    def peeklist(self, fmt, **kwargs):
4027        """Interpret next bits according to format string(s) and return list.
4028
4029        fmt -- One or more strings with comma separated tokens describing
4030               how to interpret the next bits in the bitstring.
4031        kwargs -- A dictionary or keyword-value pairs - the keywords used in the
4032                  format string will be replaced with their given value.
4033
4034        The position in the bitstring is not changed. If not enough bits are
4035        available then all bits to the end of the bitstring will be used.
4036
4037        Raises ReadError if not enough bits are available.
4038        Raises ValueError if the format is not understood.
4039
4040        See the docstring for 'read' for token examples.
4041
4042        """
4043        pos = self._pos
4044        return_values = self.readlist(fmt, **kwargs)
4045        self._pos = pos
4046        return return_values
4047
4048    def bytealign(self):
4049        """Align to next byte and return number of skipped bits.
4050
4051        Raises ValueError if the end of the bitstring is reached before
4052        aligning to the next byte.
4053
4054        """
4055        skipped = (8 - (self._pos % 8)) % 8
4056        self.pos += self._offset + skipped
4057        assert self._assertsanity()
4058        return skipped
4059
4060    pos = property(_getbitpos, _setbitpos,
4061                   doc="""The position in the bitstring in bits. Read and write.
4062                      """)
4063    bitpos = property(_getbitpos, _setbitpos,
4064                      doc="""The position in the bitstring in bits. Read and write.
4065                      """)
4066    bytepos = property(_getbytepos, _setbytepos,
4067                       doc="""The position in the bitstring in bytes. Read and write.
4068                      """)
4069
4070
4071class BitStream(ConstBitStream, BitArray):
4072    """A container or stream holding a mutable sequence of bits
4073
4074    Subclass of the ConstBitStream and BitArray classes. Inherits all of
4075    their methods.
4076
4077    Methods:
4078
4079    all() -- Check if all specified bits are set to 1 or 0.
4080    any() -- Check if any of specified bits are set to 1 or 0.
4081    append() -- Append a bitstring.
4082    bytealign() -- Align to next byte boundary.
4083    byteswap() -- Change byte endianness in-place.
4084    count() -- Count the number of bits set to 1 or 0.
4085    cut() -- Create generator of constant sized chunks.
4086    endswith() -- Return whether the bitstring ends with a sub-string.
4087    find() -- Find a sub-bitstring in the current bitstring.
4088    findall() -- Find all occurrences of a sub-bitstring in the current bitstring.
4089    insert() -- Insert a bitstring.
4090    invert() -- Flip bit(s) between one and zero.
4091    join() -- Join bitstrings together using current bitstring.
4092    overwrite() -- Overwrite a section with a new bitstring.
4093    peek() -- Peek at and interpret next bits as a single item.
4094    peeklist() -- Peek at and interpret next bits as a list of items.
4095    prepend() -- Prepend a bitstring.
4096    read() -- Read and interpret next bits as a single item.
4097    readlist() -- Read and interpret next bits as a list of items.
4098    replace() -- Replace occurrences of one bitstring with another.
4099    reverse() -- Reverse bits in-place.
4100    rfind() -- Seek backwards to find a sub-bitstring.
4101    rol() -- Rotate bits to the left.
4102    ror() -- Rotate bits to the right.
4103    set() -- Set bit(s) to 1 or 0.
4104    split() -- Create generator of chunks split by a delimiter.
4105    startswith() -- Return whether the bitstring starts with a sub-bitstring.
4106    tobytes() -- Return bitstring as bytes, padding if needed.
4107    tofile() -- Write bitstring to file, padding if needed.
4108    unpack() -- Interpret bits using format string.
4109
4110    Special methods:
4111
4112    Mutating operators are available: [], <<=, >>=, +=, *=, &=, |= and ^=
4113    in addition to [], ==, !=, +, *, ~, <<, >>, &, | and ^.
4114
4115    Properties:
4116
4117    bin -- The bitstring as a binary string.
4118    bool -- For single bit bitstrings, interpret as True or False.
4119    bytepos -- The current byte position in the bitstring.
4120    bytes -- The bitstring as a bytes object.
4121    float -- Interpret as a floating point number.
4122    floatbe -- Interpret as a big-endian floating point number.
4123    floatle -- Interpret as a little-endian floating point number.
4124    floatne -- Interpret as a native-endian floating point number.
4125    hex -- The bitstring as a hexadecimal string.
4126    int -- Interpret as a two's complement signed integer.
4127    intbe -- Interpret as a big-endian signed integer.
4128    intle -- Interpret as a little-endian signed integer.
4129    intne -- Interpret as a native-endian signed integer.
4130    len -- Length of the bitstring in bits.
4131    oct -- The bitstring as an octal string.
4132    pos -- The current bit position in the bitstring.
4133    se -- Interpret as a signed exponential-Golomb code.
4134    ue -- Interpret as an unsigned exponential-Golomb code.
4135    sie -- Interpret as a signed interleaved exponential-Golomb code.
4136    uie -- Interpret as an unsigned interleaved exponential-Golomb code.
4137    uint -- Interpret as a two's complement unsigned integer.
4138    uintbe -- Interpret as a big-endian unsigned integer.
4139    uintle -- Interpret as a little-endian unsigned integer.
4140    uintne -- Interpret as a native-endian unsigned integer.
4141
4142    """
4143
4144    __slots__ = ()
4145
4146    # As BitStream objects are mutable, we shouldn't allow them to be hashed.
4147    __hash__ = None
4148
4149    def __init__(self, auto=None, length=None, offset=None, pos=0, **kwargs):
4150        """Either specify an 'auto' initialiser:
4151        auto -- a string of comma separated tokens, an integer, a file object,
4152                a bytearray, a boolean iterable or another bitstring.
4153
4154        Or initialise via **kwargs with one (and only one) of:
4155        bytes -- raw data as a string, for example read from a binary file.
4156        bin -- binary string representation, e.g. '0b001010'.
4157        hex -- hexadecimal string representation, e.g. '0x2ef'
4158        oct -- octal string representation, e.g. '0o777'.
4159        uint -- an unsigned integer.
4160        int -- a signed integer.
4161        float -- a floating point number.
4162        uintbe -- an unsigned big-endian whole byte integer.
4163        intbe -- a signed big-endian whole byte integer.
4164        floatbe - a big-endian floating point number.
4165        uintle -- an unsigned little-endian whole byte integer.
4166        intle -- a signed little-endian whole byte integer.
4167        floatle -- a little-endian floating point number.
4168        uintne -- an unsigned native-endian whole byte integer.
4169        intne -- a signed native-endian whole byte integer.
4170        floatne -- a native-endian floating point number.
4171        se -- a signed exponential-Golomb code.
4172        ue -- an unsigned exponential-Golomb code.
4173        sie -- a signed interleaved exponential-Golomb code.
4174        uie -- an unsigned interleaved exponential-Golomb code.
4175        bool -- a boolean (True or False).
4176        filename -- a file which will be opened in binary read-only mode.
4177
4178        Other keyword arguments:
4179        length -- length of the bitstring in bits, if needed and appropriate.
4180                  It must be supplied for all integer and float initialisers.
4181        offset -- bit offset to the data. These offset bits are
4182                  ignored and this is intended for use when
4183                  initialising using 'bytes' or 'filename'.
4184        pos -- Initial bit position, defaults to 0.
4185
4186        """
4187        # For mutable BitStreams we always read in files to memory:
4188        if not isinstance(self._datastore, (ByteStore, ConstByteStore)):
4189            self._ensureinmemory()
4190
4191    def __new__(cls, auto=None, length=None, offset=None, pos=0, **kwargs):
4192        x = super(BitStream, cls).__new__(cls)
4193        y = ConstBitStream.__new__(BitStream, auto, length, offset, pos, **kwargs)
4194        x._datastore = ByteStore(y._datastore._rawarray[:],
4195                                          y._datastore.bitlength,
4196                                          y._datastore.offset)
4197        x._pos = y._pos
4198        return x
4199
4200    def __copy__(self):
4201        """Return a new copy of the BitStream."""
4202        s_copy = BitStream()
4203        s_copy._pos = 0
4204        if not isinstance(self._datastore, ByteStore):
4205            # Let them both point to the same (invariant) array.
4206            # If either gets modified then at that point they'll be read into memory.
4207            s_copy._datastore = self._datastore
4208        else:
4209            s_copy._datastore = ByteStore(self._datastore._rawarray[:],
4210                                          self._datastore.bitlength,
4211                                          self._datastore.offset)
4212        return s_copy
4213
4214    def prepend(self, bs):
4215        """Prepend a bitstring to the current bitstring.
4216
4217        bs -- The bitstring to prepend.
4218
4219        """
4220        bs = self._converttobitstring(bs)
4221        self._addleft(bs)
4222        self._pos += bs.len
4223
4224
4225def pack(fmt, *values, **kwargs):
4226    """Pack the values according to the format string and return a new BitStream.
4227
4228    fmt -- A single string or a list of strings with comma separated tokens
4229           describing how to create the BitStream.
4230    values -- Zero or more values to pack according to the format.
4231    kwargs -- A dictionary or keyword-value pairs - the keywords used in the
4232              format string will be replaced with their given value.
4233
4234    Token examples: 'int:12'    : 12 bits as a signed integer
4235                    'uint:8'    : 8 bits as an unsigned integer
4236                    'float:64'  : 8 bytes as a big-endian float
4237                    'intbe:16'  : 2 bytes as a big-endian signed integer
4238                    'uintbe:16' : 2 bytes as a big-endian unsigned integer
4239                    'intle:32'  : 4 bytes as a little-endian signed integer
4240                    'uintle:32' : 4 bytes as a little-endian unsigned integer
4241                    'floatle:64': 8 bytes as a little-endian float
4242                    'intne:24'  : 3 bytes as a native-endian signed integer
4243                    'uintne:24' : 3 bytes as a native-endian unsigned integer
4244                    'floatne:32': 4 bytes as a native-endian float
4245                    'hex:80'    : 80 bits as a hex string
4246                    'oct:9'     : 9 bits as an octal string
4247                    'bin:1'     : single bit binary string
4248                    'ue' / 'uie': next bits as unsigned exp-Golomb code
4249                    'se' / 'sie': next bits as signed exp-Golomb code
4250                    'bits:5'    : 5 bits as a bitstring object
4251                    'bytes:10'  : 10 bytes as a bytes object
4252                    'bool'      : 1 bit as a bool
4253                    'pad:3'     : 3 zero bits as padding
4254
4255    >>> s = pack('uint:12, bits', 100, '0xffe')
4256    >>> t = pack(['bits', 'bin:3'], s, '111')
4257    >>> u = pack('uint:8=a, uint:8=b, uint:55=a', a=6, b=44)
4258
4259    """
4260    tokens = []
4261    if isinstance(fmt, basestring):
4262        fmt = [fmt]
4263    try:
4264        for f_item in fmt:
4265            _, tkns = tokenparser(f_item, tuple(sorted(kwargs.keys())))
4266            tokens.extend(tkns)
4267    except ValueError as e:
4268        raise CreationError(*e.args)
4269    value_iter = iter(values)
4270    s = BitStream()
4271    try:
4272        for name, length, value in tokens:
4273            # If the value is in the kwd dictionary then it takes precedence.
4274            if value in kwargs:
4275                value = kwargs[value]
4276            # If the length is in the kwd dictionary then use that too.
4277            if length in kwargs:
4278                length = kwargs[length]
4279            # Also if we just have a dictionary name then we want to use it
4280            if name in kwargs and length is None and value is None:
4281                s._append(kwargs[name])
4282                continue
4283            if length is not None:
4284                length = int(length)
4285            if value is None and name != 'pad':
4286                # Take the next value from the ones provided
4287                value = next(value_iter)
4288            s._addright(BitStream._init_with_token(name, length, value))
4289    except StopIteration:
4290        raise CreationError("Not enough parameters present to pack according to the "
4291                            "format. {0} values are needed.", len(tokens))
4292    try:
4293        next(value_iter)
4294    except StopIteration:
4295        # Good, we've used up all the *values.
4296        return s
4297    raise CreationError("Too many parameters present to pack according to the format.")
4298
4299
4300# Whether to label the Least Significant Bit as bit 0. Default is False. Experimental feature.
4301_lsb0 = False
4302
4303# Dictionary that maps token names to the function that reads them. Is set in next function.
4304name_to_read = {}
4305
4306
4307def _switch_lsb0_methods(lsb0):
4308    global _lsb0
4309    _lsb0 = lsb0
4310    if lsb0:
4311        ConstByteStore.getbit = ConstByteStore._getbit_lsb0
4312        Bits._find = Bits._find_lsb0
4313        Bits._slice = Bits._slice_lsb0
4314        BitArray._overwrite = BitArray._overwrite_lsb0
4315        BitArray._insert = BitArray._insert_lsb0
4316        BitArray._delete = BitArray._delete_lsb0
4317        BitArray._ror = BitArray._rol_msb0
4318        BitArray._rol = BitArray._ror_msb0
4319        ByteStore.setbit = ByteStore._setbit_lsb0
4320        ByteStore.unsetbit = ByteStore._unsetbit_lsb0
4321        ByteStore.invertbit = ByteStore._invertbit_lsb0
4322        BitArray._append = BitArray._append_lsb0
4323        BitArray._prepend = BitArray._append_msb0  # An LSB0 prepend is an MSB0 append
4324        Bits._readuint = Bits._readuint_lsb0
4325        Bits._truncatestart = Bits._truncateright
4326        Bits._truncateend = Bits._truncateleft
4327        Bits._validate_slice = Bits._validate_slice_lsb0
4328    else:
4329        ConstByteStore.getbit = ConstByteStore._getbit_msb0
4330        Bits._find = Bits._find_msb0
4331        Bits._slice = Bits._slice_msb0
4332        BitArray._overwrite = BitArray._overwrite_msb0
4333        BitArray._insert = BitArray._insert_msb0
4334        BitArray._delete = BitArray._delete_msb0
4335        BitArray._ror = BitArray._ror_msb0
4336        BitArray._rol = BitArray._rol_msb0
4337        ByteStore.setbit = ByteStore._setbit_msb0
4338        ByteStore.unsetbit = ByteStore._unsetbit_msb0
4339        ByteStore.invertbit = ByteStore._invertbit_msb0
4340        BitArray._append = BitArray._append_msb0
4341        BitArray._prepend = BitArray._append_lsb0
4342        Bits._readuint = Bits._readuint_msb0
4343        Bits._truncatestart = Bits._truncateleft
4344        Bits._truncateend = Bits._truncateright
4345        Bits._validate_slice = Bits._validate_slice_msb0
4346
4347    global name_to_read
4348    name_to_read = {'uint': Bits._readuint,
4349                    'uintle': Bits._readuintle,
4350                    'uintbe': Bits._readuintbe,
4351                    'uintne': Bits._readuintne,
4352                    'int': Bits._readint,
4353                    'intle': Bits._readintle,
4354                    'intbe': Bits._readintbe,
4355                    'intne': Bits._readintne,
4356                    'float': Bits._readfloat,
4357                    'floatbe': Bits._readfloat,  # floatbe is a synonym for float
4358                    'floatle': Bits._readfloatle,
4359                    'floatne': Bits._readfloatne,
4360                    'hex': Bits._readhex,
4361                    'oct': Bits._readoct,
4362                    'bin': Bits._readbin,
4363                    'bits': Bits._readbits,
4364                    'bytes': Bits._readbytes,
4365                    'ue': Bits._readue,
4366                    'se': Bits._readse,
4367                    'uie': Bits._readuie,
4368                    'sie': Bits._readsie,
4369                    'bool': Bits._readbool,
4370                    }
4371
4372
4373def set_lsb0(v=True):
4374    """Experimental method changing the bit numbering so that the least significant bit is bit 0"""
4375    _switch_lsb0_methods(v)
4376
4377
4378def set_msb0(v=True):
4379    """Experimental method to reset the bit numbering so that the most significant bit is bit 0"""
4380    set_lsb0(not v)
4381
4382
4383# Initialise the default behaviour
4384set_msb0()
4385
4386
4387# Dictionaries for mapping init keywords with init functions.
4388init_with_length_and_offset = {'bytes': Bits._setbytes_safe,
4389                               'filename': Bits._setfile,
4390                               }
4391
4392init_with_length_only = {'uint': Bits._setuint,
4393                         'int': Bits._setint,
4394                         'float': Bits._setfloat,
4395                         'uintbe': Bits._setuintbe,
4396                         'intbe': Bits._setintbe,
4397                         'floatbe': Bits._setfloat,
4398                         'uintle': Bits._setuintle,
4399                         'intle': Bits._setintle,
4400                         'floatle': Bits._setfloatle,
4401                         'uintne': Bits._setuintne,
4402                         'intne': Bits._setintne,
4403                         'floatne': Bits._setfloatne,
4404                         }
4405
4406init_without_length_or_offset = {'bin': Bits._setbin_safe,
4407                                 'hex': Bits._sethex,
4408                                 'oct': Bits._setoct,
4409                                 'ue': Bits._setue,
4410                                 'se': Bits._setse,
4411                                 'uie': Bits._setuie,
4412                                 'sie': Bits._setsie,
4413                                 'bool': Bits._setbool,
4414                                 }
4415
4416
4417# Aliases for backward compatibility
4418ConstBitArray = Bits
4419BitString = BitStream
4420
4421__all__ = ['ConstBitArray', 'ConstBitStream', 'BitStream', 'BitArray',
4422           'Bits', 'BitString', 'pack', 'Error', 'ReadError', 'InterpretError',
4423           'ByteAlignError', 'CreationError', 'bytealigned', 'set_lsb0', 'set_msb0']
4424
4425
4426def main():
4427    # check if final parameter is an interpretation string
4428    fp = sys.argv[-1]
4429    if fp == '--help' or len(sys.argv) == 1:
4430        print("""Create and interpret a bitstring from command-line parameters.
4431
4432Command-line parameters are concatenated and a bitstring created
4433from them. If the final parameter is either an interpretation string
4434or ends with a '.' followed by an interpretation string then that
4435interpretation of the bitstring will be used when printing it.
4436
4437Typical usage might be invoking the Python module from a console
4438as a one-off calculation:
4439
4440$ python -m bitstring int:16=-400
44410xfe70
4442$ python -m bitstring float:32=0.2 bin
444300111110010011001100110011001101
4444$ python -m bitstring 0xff 3*0b01,0b11 uint
444565367
4446$ python -m bitstring hex=01, uint:12=352.hex
444701160
4448
4449This feature is experimental and is subject to change or removal.
4450        """)
4451    elif fp in name_to_read.keys():
4452        # concatenate all other parameters and interpret using the final one
4453        b1 = Bits(','.join(sys.argv[1: -1]))
4454        print(b1._readtoken(fp, 0, b1.__len__())[0])
4455    else:
4456        # does final parameter end with a dot then an interpretation string?
4457        interp = fp[fp.rfind('.') + 1:]
4458        if interp in name_to_read.keys():
4459            sys.argv[-1] = fp[:fp.rfind('.')]
4460            b1 = Bits(','.join(sys.argv[1:]))
4461            print(b1._readtoken(interp, 0, b1.__len__())[0])
4462        else:
4463            # No interpretation - just use default print
4464            b1 = Bits(','.join(sys.argv[1:]))
4465            print(b1)
4466
4467
4468if __name__ == '__main__':
4469    main()
4470