1from .lib.py3compat import int2byte
2from .lib import (BitStreamReader, BitStreamWriter, encode_bin,
3    decode_bin)
4from .core import (Struct, MetaField, StaticField, FormatField,
5    OnDemand, Pointer, Switch, Value, RepeatUntil, MetaArray, Sequence, Range,
6    Select, Pass, SizeofError, Buffered, Restream, Reconfig)
7from .adapters import (BitIntegerAdapter, PaddingAdapter,
8    ConstAdapter, CStringAdapter, LengthValueAdapter, IndexingAdapter,
9    PaddedStringAdapter, FlagsAdapter, StringAdapter, MappingAdapter)
10
11
12#===============================================================================
13# fields
14#===============================================================================
15def Field(name, length):
16    """
17    A field consisting of a specified number of bytes.
18
19    :param str name: the name of the field
20    :param length: the length of the field. the length can be either an integer
21      (StaticField), or a function that takes the context as an argument and
22      returns the length (MetaField)
23    """
24    if callable(length):
25        return MetaField(name, length)
26    else:
27        return StaticField(name, length)
28
29def BitField(name, length, swapped = False, signed = False, bytesize = 8):
30    """
31    BitFields, as the name suggests, are fields that operate on raw, unaligned
32    bits, and therefore must be enclosed in a BitStruct. Using them is very
33    similar to all normal fields: they take a name and a length (in bits).
34
35    :param str name: name of the field
36    :param int length: number of bits in the field, or a function that takes
37                       the context as its argument and returns the length
38    :param bool swapped: whether the value is byte-swapped
39    :param bool signed: whether the value is signed
40    :param int bytesize: number of bits per byte, for byte-swapping
41
42    >>> foo = BitStruct("foo",
43    ...     BitField("a", 3),
44    ...     Flag("b"),
45    ...     Padding(3),
46    ...     Nibble("c"),
47    ...     BitField("d", 5),
48    ... )
49    >>> foo.parse("\\xe1\\x1f")
50    Container(a = 7, b = False, c = 8, d = 31)
51    >>> foo = BitStruct("foo",
52    ...     BitField("a", 3),
53    ...     Flag("b"),
54    ...     Padding(3),
55    ...     Nibble("c"),
56    ...     Struct("bar",
57    ...             Nibble("d"),
58    ...             Bit("e"),
59    ...     )
60    ... )
61    >>> foo.parse("\\xe1\\x1f")
62    Container(a = 7, b = False, bar = Container(d = 15, e = 1), c = 8)
63    """
64
65    return BitIntegerAdapter(Field(name, length),
66        length,
67        swapped=swapped,
68        signed=signed,
69        bytesize=bytesize
70    )
71
72def Padding(length, pattern = "\x00", strict = False):
73    r"""a padding field (value is discarded)
74    * length - the length of the field. the length can be either an integer,
75      or a function that takes the context as an argument and returns the
76      length
77    * pattern - the padding pattern (character) to use. default is "\x00"
78    * strict - whether or not to raise an exception is the actual padding
79      pattern mismatches the desired pattern. default is False.
80    """
81    return PaddingAdapter(Field(None, length),
82        pattern = pattern,
83        strict = strict,
84    )
85
86def Flag(name, truth = 1, falsehood = 0, default = False):
87    """
88    A flag.
89
90    Flags are usually used to signify a Boolean value, and this construct
91    maps values onto the ``bool`` type.
92
93    .. note:: This construct works with both bit and byte contexts.
94
95    .. warning:: Flags default to False, not True. This is different from the
96        C and Python way of thinking about truth, and may be subject to change
97        in the future.
98
99    :param str name: field name
100    :param int truth: value of truth (default 1)
101    :param int falsehood: value of falsehood (default 0)
102    :param bool default: default value (default False)
103    """
104
105    return SymmetricMapping(Field(name, 1),
106        {True : int2byte(truth), False : int2byte(falsehood)},
107        default = default,
108    )
109
110#===============================================================================
111# field shortcuts
112#===============================================================================
113def Bit(name):
114    """a 1-bit BitField; must be enclosed in a BitStruct"""
115    return BitField(name, 1)
116def Nibble(name):
117    """a 4-bit BitField; must be enclosed in a BitStruct"""
118    return BitField(name, 4)
119def Octet(name):
120    """an 8-bit BitField; must be enclosed in a BitStruct"""
121    return BitField(name, 8)
122
123def UBInt8(name):
124    """unsigned, big endian 8-bit integer"""
125    return FormatField(name, ">", "B")
126def UBInt16(name):
127    """unsigned, big endian 16-bit integer"""
128    return FormatField(name, ">", "H")
129def UBInt32(name):
130    """unsigned, big endian 32-bit integer"""
131    return FormatField(name, ">", "L")
132def UBInt64(name):
133    """unsigned, big endian 64-bit integer"""
134    return FormatField(name, ">", "Q")
135
136def SBInt8(name):
137    """signed, big endian 8-bit integer"""
138    return FormatField(name, ">", "b")
139def SBInt16(name):
140    """signed, big endian 16-bit integer"""
141    return FormatField(name, ">", "h")
142def SBInt32(name):
143    """signed, big endian 32-bit integer"""
144    return FormatField(name, ">", "l")
145def SBInt64(name):
146    """signed, big endian 64-bit integer"""
147    return FormatField(name, ">", "q")
148
149def ULInt8(name):
150    """unsigned, little endian 8-bit integer"""
151    return FormatField(name, "<", "B")
152def ULInt16(name):
153    """unsigned, little endian 16-bit integer"""
154    return FormatField(name, "<", "H")
155def ULInt32(name):
156    """unsigned, little endian 32-bit integer"""
157    return FormatField(name, "<", "L")
158def ULInt64(name):
159    """unsigned, little endian 64-bit integer"""
160    return FormatField(name, "<", "Q")
161
162def SLInt8(name):
163    """signed, little endian 8-bit integer"""
164    return FormatField(name, "<", "b")
165def SLInt16(name):
166    """signed, little endian 16-bit integer"""
167    return FormatField(name, "<", "h")
168def SLInt32(name):
169    """signed, little endian 32-bit integer"""
170    return FormatField(name, "<", "l")
171def SLInt64(name):
172    """signed, little endian 64-bit integer"""
173    return FormatField(name, "<", "q")
174
175def UNInt8(name):
176    """unsigned, native endianity 8-bit integer"""
177    return FormatField(name, "=", "B")
178def UNInt16(name):
179    """unsigned, native endianity 16-bit integer"""
180    return FormatField(name, "=", "H")
181def UNInt32(name):
182    """unsigned, native endianity 32-bit integer"""
183    return FormatField(name, "=", "L")
184def UNInt64(name):
185    """unsigned, native endianity 64-bit integer"""
186    return FormatField(name, "=", "Q")
187
188def SNInt8(name):
189    """signed, native endianity 8-bit integer"""
190    return FormatField(name, "=", "b")
191def SNInt16(name):
192    """signed, native endianity 16-bit integer"""
193    return FormatField(name, "=", "h")
194def SNInt32(name):
195    """signed, native endianity 32-bit integer"""
196    return FormatField(name, "=", "l")
197def SNInt64(name):
198    """signed, native endianity 64-bit integer"""
199    return FormatField(name, "=", "q")
200
201def BFloat32(name):
202    """big endian, 32-bit IEEE floating point number"""
203    return FormatField(name, ">", "f")
204def LFloat32(name):
205    """little endian, 32-bit IEEE floating point number"""
206    return FormatField(name, "<", "f")
207def NFloat32(name):
208    """native endianity, 32-bit IEEE floating point number"""
209    return FormatField(name, "=", "f")
210
211def BFloat64(name):
212    """big endian, 64-bit IEEE floating point number"""
213    return FormatField(name, ">", "d")
214def LFloat64(name):
215    """little endian, 64-bit IEEE floating point number"""
216    return FormatField(name, "<", "d")
217def NFloat64(name):
218    """native endianity, 64-bit IEEE floating point number"""
219    return FormatField(name, "=", "d")
220
221
222#===============================================================================
223# arrays
224#===============================================================================
225def Array(count, subcon):
226    """
227    Repeats the given unit a fixed number of times.
228
229    :param int count: number of times to repeat
230    :param ``Construct`` subcon: construct to repeat
231
232    >>> c = Array(4, UBInt8("foo"))
233    >>> c.parse("\\x01\\x02\\x03\\x04")
234    [1, 2, 3, 4]
235    >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06")
236    [1, 2, 3, 4]
237    >>> c.build([5,6,7,8])
238    '\\x05\\x06\\x07\\x08'
239    >>> c.build([5,6,7,8,9])
240    Traceback (most recent call last):
241      ...
242    construct.core.RangeError: expected 4..4, found 5
243    """
244
245    if callable(count):
246        con = MetaArray(count, subcon)
247    else:
248        con = MetaArray(lambda ctx: count, subcon)
249        con._clear_flag(con.FLAG_DYNAMIC)
250    return con
251
252def PrefixedArray(subcon, length_field = UBInt8("length")):
253    """an array prefixed by a length field.
254    * subcon - the subcon to be repeated
255    * length_field - a construct returning an integer
256    """
257    return LengthValueAdapter(
258        Sequence(subcon.name,
259            length_field,
260            Array(lambda ctx: ctx[length_field.name], subcon),
261            nested = False
262        )
263    )
264
265def OpenRange(mincount, subcon):
266    from sys import maxsize
267    return Range(mincount, maxsize, subcon)
268
269def GreedyRange(subcon):
270    """
271    Repeats the given unit one or more times.
272
273    :param ``Construct`` subcon: construct to repeat
274
275    >>> from construct import GreedyRange, UBInt8
276    >>> c = GreedyRange(UBInt8("foo"))
277    >>> c.parse("\\x01")
278    [1]
279    >>> c.parse("\\x01\\x02\\x03")
280    [1, 2, 3]
281    >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06")
282    [1, 2, 3, 4, 5, 6]
283    >>> c.parse("")
284    Traceback (most recent call last):
285      ...
286    construct.core.RangeError: expected 1..2147483647, found 0
287    >>> c.build([1,2])
288    '\\x01\\x02'
289    >>> c.build([])
290    Traceback (most recent call last):
291      ...
292    construct.core.RangeError: expected 1..2147483647, found 0
293    """
294
295    return OpenRange(1, subcon)
296
297def OptionalGreedyRange(subcon):
298    """
299    Repeats the given unit zero or more times. This repeater can't
300    fail, as it accepts lists of any length.
301
302    :param ``Construct`` subcon: construct to repeat
303
304    >>> from construct import OptionalGreedyRange, UBInt8
305    >>> c = OptionalGreedyRange(UBInt8("foo"))
306    >>> c.parse("")
307    []
308    >>> c.parse("\\x01\\x02")
309    [1, 2]
310    >>> c.build([])
311    ''
312    >>> c.build([1,2])
313    '\\x01\\x02'
314    """
315
316    return OpenRange(0, subcon)
317
318
319#===============================================================================
320# subconstructs
321#===============================================================================
322def Optional(subcon):
323    """an optional construct. if parsing fails, returns None.
324    * subcon - the subcon to optionally parse or build
325    """
326    return Select(subcon.name, subcon, Pass)
327
328def Bitwise(subcon):
329    """converts the stream to bits, and passes the bitstream to subcon
330    * subcon - a bitwise construct (usually BitField)
331    """
332    # subcons larger than MAX_BUFFER will be wrapped by Restream instead
333    # of Buffered. implementation details, don't stick your nose in :)
334    MAX_BUFFER = 1024 * 8
335    def resizer(length):
336        if length & 7:
337            raise SizeofError("size must be a multiple of 8", length)
338        return length >> 3
339    if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER:
340        con = Buffered(subcon,
341            encoder = decode_bin,
342            decoder = encode_bin,
343            resizer = resizer
344        )
345    else:
346        con = Restream(subcon,
347            stream_reader = BitStreamReader,
348            stream_writer = BitStreamWriter,
349            resizer = resizer)
350    return con
351
352def Aligned(subcon, modulus = 4, pattern = "\x00"):
353    r"""aligns subcon to modulus boundary using padding pattern
354    * subcon - the subcon to align
355    * modulus - the modulus boundary (default is 4)
356    * pattern - the padding pattern (default is \x00)
357    """
358    if modulus < 2:
359        raise ValueError("modulus must be >= 2", modulus)
360    def padlength(ctx):
361        return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus
362    return SeqOfOne(subcon.name,
363        subcon,
364        # ??????
365        # ??????
366        # ??????
367        # ??????
368        Padding(padlength, pattern = pattern),
369        nested = False,
370    )
371
372def SeqOfOne(name, *args, **kw):
373    """a sequence of one element. only the first element is meaningful, the
374    rest are discarded
375    * name - the name of the sequence
376    * args - subconstructs
377    * kw - any keyword arguments to Sequence
378    """
379    return IndexingAdapter(Sequence(name, *args, **kw), index = 0)
380
381def Embedded(subcon):
382    """embeds a struct into the enclosing struct.
383    * subcon - the struct to embed
384    """
385    return Reconfig(subcon.name, subcon, subcon.FLAG_EMBED)
386
387def Rename(newname, subcon):
388    """renames an existing construct
389    * newname - the new name
390    * subcon - the subcon to rename
391    """
392    return Reconfig(newname, subcon)
393
394def Alias(newname, oldname):
395    """creates an alias for an existing element in a struct
396    * newname - the new name
397    * oldname - the name of an existing element
398    """
399    return Value(newname, lambda ctx: ctx[oldname])
400
401
402#===============================================================================
403# mapping
404#===============================================================================
405def SymmetricMapping(subcon, mapping, default = NotImplemented):
406    """defines a symmetrical mapping: a->b, b->a.
407    * subcon - the subcon to map
408    * mapping - the encoding mapping (a dict); the decoding mapping is
409      achieved by reversing this mapping
410    * default - the default value to use when no mapping is found. if no
411      default value is given, and exception is raised. setting to Pass would
412      return the value "as is" (unmapped)
413    """
414    reversed_mapping = dict((v, k) for k, v in mapping.items())
415    return MappingAdapter(subcon,
416        encoding = mapping,
417        decoding = reversed_mapping,
418        encdefault = default,
419        decdefault = default,
420    )
421
422def Enum(subcon, **kw):
423    """a set of named values mapping.
424    * subcon - the subcon to map
425    * kw - keyword arguments which serve as the encoding mapping
426    * _default_ - an optional, keyword-only argument that specifies the
427      default value to use when the mapping is undefined. if not given,
428      and exception is raised when the mapping is undefined. use `Pass` to
429      pass the unmapped value as-is
430    """
431    return SymmetricMapping(subcon, kw, kw.pop("_default_", NotImplemented))
432
433def FlagsEnum(subcon, **kw):
434    """a set of flag values mapping.
435    * subcon - the subcon to map
436    * kw - keyword arguments which serve as the encoding mapping
437    """
438    return FlagsAdapter(subcon, kw)
439
440
441#===============================================================================
442# structs
443#===============================================================================
444def AlignedStruct(name, *subcons, **kw):
445    """a struct of aligned fields
446    * name - the name of the struct
447    * subcons - the subcons that make up this structure
448    * kw - keyword arguments to pass to Aligned: 'modulus' and 'pattern'
449    """
450    return Struct(name, *(Aligned(sc, **kw) for sc in subcons))
451
452def BitStruct(name, *subcons):
453    """a struct of bitwise fields
454    * name - the name of the struct
455    * subcons - the subcons that make up this structure
456    """
457    return Bitwise(Struct(name, *subcons))
458
459def EmbeddedBitStruct(*subcons):
460    """an embedded BitStruct. no name is necessary.
461    * subcons - the subcons that make up this structure
462    """
463    return Bitwise(Embedded(Struct(None, *subcons)))
464
465#===============================================================================
466# strings
467#===============================================================================
468def String(name, length, encoding=None, padchar=None, paddir="right",
469    trimdir="right"):
470    """
471    A configurable, fixed-length string field.
472
473    The padding character must be specified for padding and trimming to work.
474
475    :param str name: name
476    :param int length: length, in bytes
477    :param str encoding: encoding (e.g. "utf8") or None for no encoding
478    :param str padchar: optional character to pad out strings
479    :param str paddir: direction to pad out strings; one of "right", "left",
480                       or "both"
481    :param str trim: direction to trim strings; one of "right", "left"
482
483    >>> from construct import String
484    >>> String("foo", 5).parse("hello")
485    'hello'
486    >>>
487    >>> String("foo", 12, encoding = "utf8").parse("hello joh\\xd4\\x83n")
488    u'hello joh\\u0503n'
489    >>>
490    >>> foo = String("foo", 10, padchar = "X", paddir = "right")
491    >>> foo.parse("helloXXXXX")
492    'hello'
493    >>> foo.build("hello")
494    'helloXXXXX'
495    """
496
497    con = StringAdapter(Field(name, length), encoding=encoding)
498    if padchar is not None:
499        con = PaddedStringAdapter(con, padchar=padchar, paddir=paddir,
500            trimdir=trimdir)
501    return con
502
503def PascalString(name, length_field=UBInt8("length"), encoding=None):
504    """
505    A length-prefixed string.
506
507    ``PascalString`` is named after the string types of Pascal, which are
508    length-prefixed. Lisp strings also follow this convention.
509
510    The length field will appear in the same ``Container`` as the
511    ``PascalString``, with the given name.
512
513    :param str name: name
514    :param ``Construct`` length_field: a field which will store the length of
515                                       the string
516    :param str encoding: encoding (e.g. "utf8") or None for no encoding
517
518    >>> foo = PascalString("foo")
519    >>> foo.parse("\\x05hello")
520    'hello'
521    >>> foo.build("hello world")
522    '\\x0bhello world'
523    >>>
524    >>> foo = PascalString("foo", length_field = UBInt16("length"))
525    >>> foo.parse("\\x00\\x05hello")
526    'hello'
527    >>> foo.build("hello")
528    '\\x00\\x05hello'
529    """
530
531    return StringAdapter(
532        LengthValueAdapter(
533            Sequence(name,
534                length_field,
535                Field("data", lambda ctx: ctx[length_field.name]),
536            )
537        ),
538        encoding=encoding,
539    )
540
541def CString(name, terminators=b"\x00", encoding=None,
542            char_field=Field(None, 1)):
543    """
544    A string ending in a terminator.
545
546    ``CString`` is similar to the strings of C, C++, and other related
547    programming languages.
548
549    By default, the terminator is the NULL byte (b``0x00``).
550
551    :param str name: name
552    :param iterable terminators: sequence of valid terminators, in order of
553                                 preference
554    :param str encoding: encoding (e.g. "utf8") or None for no encoding
555    :param ``Construct`` char_field: construct representing a single character
556
557    >>> foo = CString("foo")
558    >>> foo.parse(b"hello\\x00")
559    b'hello'
560    >>> foo.build(b"hello")
561    b'hello\\x00'
562    >>> foo = CString("foo", terminators = b"XYZ")
563    >>> foo.parse(b"helloX")
564    b'hello'
565    >>> foo.parse(b"helloY")
566    b'hello'
567    >>> foo.parse(b"helloZ")
568    b'hello'
569    >>> foo.build(b"hello")
570    b'helloX'
571    """
572
573    return Rename(name,
574        CStringAdapter(
575            RepeatUntil(lambda obj, ctx: obj in terminators, char_field),
576            terminators=terminators,
577            encoding=encoding,
578        )
579    )
580
581
582#===============================================================================
583# conditional
584#===============================================================================
585def IfThenElse(name, predicate, then_subcon, else_subcon):
586    """an if-then-else conditional construct: if the predicate indicates True,
587    `then_subcon` will be used; otherwise `else_subcon`
588    * name - the name of the construct
589    * predicate - a function taking the context as an argument and returning
590      True or False
591    * then_subcon - the subcon that will be used if the predicate returns True
592    * else_subcon - the subcon that will be used if the predicate returns False
593    """
594    return Switch(name, lambda ctx: bool(predicate(ctx)),
595        {
596            True : then_subcon,
597            False : else_subcon,
598        }
599    )
600
601def If(predicate, subcon, elsevalue = None):
602    """an if-then conditional construct: if the predicate indicates True,
603    subcon will be used; otherwise, `elsevalue` will be returned instead.
604    * predicate - a function taking the context as an argument and returning
605      True or False
606    * subcon - the subcon that will be used if the predicate returns True
607    * elsevalue - the value that will be used should the predicate return False.
608      by default this value is None.
609    """
610    return IfThenElse(subcon.name,
611        predicate,
612        subcon,
613        Value("elsevalue", lambda ctx: elsevalue)
614    )
615
616
617#===============================================================================
618# misc
619#===============================================================================
620def OnDemandPointer(offsetfunc, subcon, force_build = True):
621    """an on-demand pointer.
622    * offsetfunc - a function taking the context as an argument and returning
623      the absolute stream position
624    * subcon - the subcon that will be parsed from the `offsetfunc()` stream
625      position on demand
626    * force_build - see OnDemand. by default True.
627    """
628    return OnDemand(Pointer(offsetfunc, subcon),
629        advance_stream = False,
630        force_build = force_build
631    )
632
633def Magic(data):
634    return ConstAdapter(Field(None, len(data)), data)
635