1from .lib.py3compat import int2byte 2from .lib import (BitStreamReader, BitStreamWriter, encode_bin, 3 decode_bin) 4from .core import (Struct, MetaField, StaticField, FormatField, 5 OnDemand, Pointer, Switch, Value, RepeatUntil, MetaArray, Sequence, Range, 6 Select, Pass, SizeofError, Buffered, Restream, Reconfig) 7from .adapters import (BitIntegerAdapter, PaddingAdapter, 8 ConstAdapter, CStringAdapter, LengthValueAdapter, IndexingAdapter, 9 PaddedStringAdapter, FlagsAdapter, StringAdapter, MappingAdapter) 10 11 12#=============================================================================== 13# fields 14#=============================================================================== 15def Field(name, length): 16 """ 17 A field consisting of a specified number of bytes. 18 19 :param str name: the name of the field 20 :param length: the length of the field. the length can be either an integer 21 (StaticField), or a function that takes the context as an argument and 22 returns the length (MetaField) 23 """ 24 if callable(length): 25 return MetaField(name, length) 26 else: 27 return StaticField(name, length) 28 29def BitField(name, length, swapped = False, signed = False, bytesize = 8): 30 """ 31 BitFields, as the name suggests, are fields that operate on raw, unaligned 32 bits, and therefore must be enclosed in a BitStruct. Using them is very 33 similar to all normal fields: they take a name and a length (in bits). 34 35 :param str name: name of the field 36 :param int length: number of bits in the field, or a function that takes 37 the context as its argument and returns the length 38 :param bool swapped: whether the value is byte-swapped 39 :param bool signed: whether the value is signed 40 :param int bytesize: number of bits per byte, for byte-swapping 41 42 >>> foo = BitStruct("foo", 43 ... BitField("a", 3), 44 ... Flag("b"), 45 ... Padding(3), 46 ... Nibble("c"), 47 ... BitField("d", 5), 48 ... ) 49 >>> foo.parse("\\xe1\\x1f") 50 Container(a = 7, b = False, c = 8, d = 31) 51 >>> foo = BitStruct("foo", 52 ... BitField("a", 3), 53 ... Flag("b"), 54 ... Padding(3), 55 ... Nibble("c"), 56 ... Struct("bar", 57 ... Nibble("d"), 58 ... Bit("e"), 59 ... ) 60 ... ) 61 >>> foo.parse("\\xe1\\x1f") 62 Container(a = 7, b = False, bar = Container(d = 15, e = 1), c = 8) 63 """ 64 65 return BitIntegerAdapter(Field(name, length), 66 length, 67 swapped=swapped, 68 signed=signed, 69 bytesize=bytesize 70 ) 71 72def Padding(length, pattern = "\x00", strict = False): 73 r"""a padding field (value is discarded) 74 * length - the length of the field. the length can be either an integer, 75 or a function that takes the context as an argument and returns the 76 length 77 * pattern - the padding pattern (character) to use. default is "\x00" 78 * strict - whether or not to raise an exception is the actual padding 79 pattern mismatches the desired pattern. default is False. 80 """ 81 return PaddingAdapter(Field(None, length), 82 pattern = pattern, 83 strict = strict, 84 ) 85 86def Flag(name, truth = 1, falsehood = 0, default = False): 87 """ 88 A flag. 89 90 Flags are usually used to signify a Boolean value, and this construct 91 maps values onto the ``bool`` type. 92 93 .. note:: This construct works with both bit and byte contexts. 94 95 .. warning:: Flags default to False, not True. This is different from the 96 C and Python way of thinking about truth, and may be subject to change 97 in the future. 98 99 :param str name: field name 100 :param int truth: value of truth (default 1) 101 :param int falsehood: value of falsehood (default 0) 102 :param bool default: default value (default False) 103 """ 104 105 return SymmetricMapping(Field(name, 1), 106 {True : int2byte(truth), False : int2byte(falsehood)}, 107 default = default, 108 ) 109 110#=============================================================================== 111# field shortcuts 112#=============================================================================== 113def Bit(name): 114 """a 1-bit BitField; must be enclosed in a BitStruct""" 115 return BitField(name, 1) 116def Nibble(name): 117 """a 4-bit BitField; must be enclosed in a BitStruct""" 118 return BitField(name, 4) 119def Octet(name): 120 """an 8-bit BitField; must be enclosed in a BitStruct""" 121 return BitField(name, 8) 122 123def UBInt8(name): 124 """unsigned, big endian 8-bit integer""" 125 return FormatField(name, ">", "B") 126def UBInt16(name): 127 """unsigned, big endian 16-bit integer""" 128 return FormatField(name, ">", "H") 129def UBInt32(name): 130 """unsigned, big endian 32-bit integer""" 131 return FormatField(name, ">", "L") 132def UBInt64(name): 133 """unsigned, big endian 64-bit integer""" 134 return FormatField(name, ">", "Q") 135 136def SBInt8(name): 137 """signed, big endian 8-bit integer""" 138 return FormatField(name, ">", "b") 139def SBInt16(name): 140 """signed, big endian 16-bit integer""" 141 return FormatField(name, ">", "h") 142def SBInt32(name): 143 """signed, big endian 32-bit integer""" 144 return FormatField(name, ">", "l") 145def SBInt64(name): 146 """signed, big endian 64-bit integer""" 147 return FormatField(name, ">", "q") 148 149def ULInt8(name): 150 """unsigned, little endian 8-bit integer""" 151 return FormatField(name, "<", "B") 152def ULInt16(name): 153 """unsigned, little endian 16-bit integer""" 154 return FormatField(name, "<", "H") 155def ULInt32(name): 156 """unsigned, little endian 32-bit integer""" 157 return FormatField(name, "<", "L") 158def ULInt64(name): 159 """unsigned, little endian 64-bit integer""" 160 return FormatField(name, "<", "Q") 161 162def SLInt8(name): 163 """signed, little endian 8-bit integer""" 164 return FormatField(name, "<", "b") 165def SLInt16(name): 166 """signed, little endian 16-bit integer""" 167 return FormatField(name, "<", "h") 168def SLInt32(name): 169 """signed, little endian 32-bit integer""" 170 return FormatField(name, "<", "l") 171def SLInt64(name): 172 """signed, little endian 64-bit integer""" 173 return FormatField(name, "<", "q") 174 175def UNInt8(name): 176 """unsigned, native endianity 8-bit integer""" 177 return FormatField(name, "=", "B") 178def UNInt16(name): 179 """unsigned, native endianity 16-bit integer""" 180 return FormatField(name, "=", "H") 181def UNInt32(name): 182 """unsigned, native endianity 32-bit integer""" 183 return FormatField(name, "=", "L") 184def UNInt64(name): 185 """unsigned, native endianity 64-bit integer""" 186 return FormatField(name, "=", "Q") 187 188def SNInt8(name): 189 """signed, native endianity 8-bit integer""" 190 return FormatField(name, "=", "b") 191def SNInt16(name): 192 """signed, native endianity 16-bit integer""" 193 return FormatField(name, "=", "h") 194def SNInt32(name): 195 """signed, native endianity 32-bit integer""" 196 return FormatField(name, "=", "l") 197def SNInt64(name): 198 """signed, native endianity 64-bit integer""" 199 return FormatField(name, "=", "q") 200 201def BFloat32(name): 202 """big endian, 32-bit IEEE floating point number""" 203 return FormatField(name, ">", "f") 204def LFloat32(name): 205 """little endian, 32-bit IEEE floating point number""" 206 return FormatField(name, "<", "f") 207def NFloat32(name): 208 """native endianity, 32-bit IEEE floating point number""" 209 return FormatField(name, "=", "f") 210 211def BFloat64(name): 212 """big endian, 64-bit IEEE floating point number""" 213 return FormatField(name, ">", "d") 214def LFloat64(name): 215 """little endian, 64-bit IEEE floating point number""" 216 return FormatField(name, "<", "d") 217def NFloat64(name): 218 """native endianity, 64-bit IEEE floating point number""" 219 return FormatField(name, "=", "d") 220 221 222#=============================================================================== 223# arrays 224#=============================================================================== 225def Array(count, subcon): 226 """ 227 Repeats the given unit a fixed number of times. 228 229 :param int count: number of times to repeat 230 :param ``Construct`` subcon: construct to repeat 231 232 >>> c = Array(4, UBInt8("foo")) 233 >>> c.parse("\\x01\\x02\\x03\\x04") 234 [1, 2, 3, 4] 235 >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") 236 [1, 2, 3, 4] 237 >>> c.build([5,6,7,8]) 238 '\\x05\\x06\\x07\\x08' 239 >>> c.build([5,6,7,8,9]) 240 Traceback (most recent call last): 241 ... 242 construct.core.RangeError: expected 4..4, found 5 243 """ 244 245 if callable(count): 246 con = MetaArray(count, subcon) 247 else: 248 con = MetaArray(lambda ctx: count, subcon) 249 con._clear_flag(con.FLAG_DYNAMIC) 250 return con 251 252def PrefixedArray(subcon, length_field = UBInt8("length")): 253 """an array prefixed by a length field. 254 * subcon - the subcon to be repeated 255 * length_field - a construct returning an integer 256 """ 257 return LengthValueAdapter( 258 Sequence(subcon.name, 259 length_field, 260 Array(lambda ctx: ctx[length_field.name], subcon), 261 nested = False 262 ) 263 ) 264 265def OpenRange(mincount, subcon): 266 from sys import maxsize 267 return Range(mincount, maxsize, subcon) 268 269def GreedyRange(subcon): 270 """ 271 Repeats the given unit one or more times. 272 273 :param ``Construct`` subcon: construct to repeat 274 275 >>> from construct import GreedyRange, UBInt8 276 >>> c = GreedyRange(UBInt8("foo")) 277 >>> c.parse("\\x01") 278 [1] 279 >>> c.parse("\\x01\\x02\\x03") 280 [1, 2, 3] 281 >>> c.parse("\\x01\\x02\\x03\\x04\\x05\\x06") 282 [1, 2, 3, 4, 5, 6] 283 >>> c.parse("") 284 Traceback (most recent call last): 285 ... 286 construct.core.RangeError: expected 1..2147483647, found 0 287 >>> c.build([1,2]) 288 '\\x01\\x02' 289 >>> c.build([]) 290 Traceback (most recent call last): 291 ... 292 construct.core.RangeError: expected 1..2147483647, found 0 293 """ 294 295 return OpenRange(1, subcon) 296 297def OptionalGreedyRange(subcon): 298 """ 299 Repeats the given unit zero or more times. This repeater can't 300 fail, as it accepts lists of any length. 301 302 :param ``Construct`` subcon: construct to repeat 303 304 >>> from construct import OptionalGreedyRange, UBInt8 305 >>> c = OptionalGreedyRange(UBInt8("foo")) 306 >>> c.parse("") 307 [] 308 >>> c.parse("\\x01\\x02") 309 [1, 2] 310 >>> c.build([]) 311 '' 312 >>> c.build([1,2]) 313 '\\x01\\x02' 314 """ 315 316 return OpenRange(0, subcon) 317 318 319#=============================================================================== 320# subconstructs 321#=============================================================================== 322def Optional(subcon): 323 """an optional construct. if parsing fails, returns None. 324 * subcon - the subcon to optionally parse or build 325 """ 326 return Select(subcon.name, subcon, Pass) 327 328def Bitwise(subcon): 329 """converts the stream to bits, and passes the bitstream to subcon 330 * subcon - a bitwise construct (usually BitField) 331 """ 332 # subcons larger than MAX_BUFFER will be wrapped by Restream instead 333 # of Buffered. implementation details, don't stick your nose in :) 334 MAX_BUFFER = 1024 * 8 335 def resizer(length): 336 if length & 7: 337 raise SizeofError("size must be a multiple of 8", length) 338 return length >> 3 339 if not subcon._is_flag(subcon.FLAG_DYNAMIC) and subcon.sizeof() < MAX_BUFFER: 340 con = Buffered(subcon, 341 encoder = decode_bin, 342 decoder = encode_bin, 343 resizer = resizer 344 ) 345 else: 346 con = Restream(subcon, 347 stream_reader = BitStreamReader, 348 stream_writer = BitStreamWriter, 349 resizer = resizer) 350 return con 351 352def Aligned(subcon, modulus = 4, pattern = "\x00"): 353 r"""aligns subcon to modulus boundary using padding pattern 354 * subcon - the subcon to align 355 * modulus - the modulus boundary (default is 4) 356 * pattern - the padding pattern (default is \x00) 357 """ 358 if modulus < 2: 359 raise ValueError("modulus must be >= 2", modulus) 360 def padlength(ctx): 361 return (modulus - (subcon._sizeof(ctx) % modulus)) % modulus 362 return SeqOfOne(subcon.name, 363 subcon, 364 # ?????? 365 # ?????? 366 # ?????? 367 # ?????? 368 Padding(padlength, pattern = pattern), 369 nested = False, 370 ) 371 372def SeqOfOne(name, *args, **kw): 373 """a sequence of one element. only the first element is meaningful, the 374 rest are discarded 375 * name - the name of the sequence 376 * args - subconstructs 377 * kw - any keyword arguments to Sequence 378 """ 379 return IndexingAdapter(Sequence(name, *args, **kw), index = 0) 380 381def Embedded(subcon): 382 """embeds a struct into the enclosing struct. 383 * subcon - the struct to embed 384 """ 385 return Reconfig(subcon.name, subcon, subcon.FLAG_EMBED) 386 387def Rename(newname, subcon): 388 """renames an existing construct 389 * newname - the new name 390 * subcon - the subcon to rename 391 """ 392 return Reconfig(newname, subcon) 393 394def Alias(newname, oldname): 395 """creates an alias for an existing element in a struct 396 * newname - the new name 397 * oldname - the name of an existing element 398 """ 399 return Value(newname, lambda ctx: ctx[oldname]) 400 401 402#=============================================================================== 403# mapping 404#=============================================================================== 405def SymmetricMapping(subcon, mapping, default = NotImplemented): 406 """defines a symmetrical mapping: a->b, b->a. 407 * subcon - the subcon to map 408 * mapping - the encoding mapping (a dict); the decoding mapping is 409 achieved by reversing this mapping 410 * default - the default value to use when no mapping is found. if no 411 default value is given, and exception is raised. setting to Pass would 412 return the value "as is" (unmapped) 413 """ 414 reversed_mapping = dict((v, k) for k, v in mapping.items()) 415 return MappingAdapter(subcon, 416 encoding = mapping, 417 decoding = reversed_mapping, 418 encdefault = default, 419 decdefault = default, 420 ) 421 422def Enum(subcon, **kw): 423 """a set of named values mapping. 424 * subcon - the subcon to map 425 * kw - keyword arguments which serve as the encoding mapping 426 * _default_ - an optional, keyword-only argument that specifies the 427 default value to use when the mapping is undefined. if not given, 428 and exception is raised when the mapping is undefined. use `Pass` to 429 pass the unmapped value as-is 430 """ 431 return SymmetricMapping(subcon, kw, kw.pop("_default_", NotImplemented)) 432 433def FlagsEnum(subcon, **kw): 434 """a set of flag values mapping. 435 * subcon - the subcon to map 436 * kw - keyword arguments which serve as the encoding mapping 437 """ 438 return FlagsAdapter(subcon, kw) 439 440 441#=============================================================================== 442# structs 443#=============================================================================== 444def AlignedStruct(name, *subcons, **kw): 445 """a struct of aligned fields 446 * name - the name of the struct 447 * subcons - the subcons that make up this structure 448 * kw - keyword arguments to pass to Aligned: 'modulus' and 'pattern' 449 """ 450 return Struct(name, *(Aligned(sc, **kw) for sc in subcons)) 451 452def BitStruct(name, *subcons): 453 """a struct of bitwise fields 454 * name - the name of the struct 455 * subcons - the subcons that make up this structure 456 """ 457 return Bitwise(Struct(name, *subcons)) 458 459def EmbeddedBitStruct(*subcons): 460 """an embedded BitStruct. no name is necessary. 461 * subcons - the subcons that make up this structure 462 """ 463 return Bitwise(Embedded(Struct(None, *subcons))) 464 465#=============================================================================== 466# strings 467#=============================================================================== 468def String(name, length, encoding=None, padchar=None, paddir="right", 469 trimdir="right"): 470 """ 471 A configurable, fixed-length string field. 472 473 The padding character must be specified for padding and trimming to work. 474 475 :param str name: name 476 :param int length: length, in bytes 477 :param str encoding: encoding (e.g. "utf8") or None for no encoding 478 :param str padchar: optional character to pad out strings 479 :param str paddir: direction to pad out strings; one of "right", "left", 480 or "both" 481 :param str trim: direction to trim strings; one of "right", "left" 482 483 >>> from construct import String 484 >>> String("foo", 5).parse("hello") 485 'hello' 486 >>> 487 >>> String("foo", 12, encoding = "utf8").parse("hello joh\\xd4\\x83n") 488 u'hello joh\\u0503n' 489 >>> 490 >>> foo = String("foo", 10, padchar = "X", paddir = "right") 491 >>> foo.parse("helloXXXXX") 492 'hello' 493 >>> foo.build("hello") 494 'helloXXXXX' 495 """ 496 497 con = StringAdapter(Field(name, length), encoding=encoding) 498 if padchar is not None: 499 con = PaddedStringAdapter(con, padchar=padchar, paddir=paddir, 500 trimdir=trimdir) 501 return con 502 503def PascalString(name, length_field=UBInt8("length"), encoding=None): 504 """ 505 A length-prefixed string. 506 507 ``PascalString`` is named after the string types of Pascal, which are 508 length-prefixed. Lisp strings also follow this convention. 509 510 The length field will appear in the same ``Container`` as the 511 ``PascalString``, with the given name. 512 513 :param str name: name 514 :param ``Construct`` length_field: a field which will store the length of 515 the string 516 :param str encoding: encoding (e.g. "utf8") or None for no encoding 517 518 >>> foo = PascalString("foo") 519 >>> foo.parse("\\x05hello") 520 'hello' 521 >>> foo.build("hello world") 522 '\\x0bhello world' 523 >>> 524 >>> foo = PascalString("foo", length_field = UBInt16("length")) 525 >>> foo.parse("\\x00\\x05hello") 526 'hello' 527 >>> foo.build("hello") 528 '\\x00\\x05hello' 529 """ 530 531 return StringAdapter( 532 LengthValueAdapter( 533 Sequence(name, 534 length_field, 535 Field("data", lambda ctx: ctx[length_field.name]), 536 ) 537 ), 538 encoding=encoding, 539 ) 540 541def CString(name, terminators=b"\x00", encoding=None, 542 char_field=Field(None, 1)): 543 """ 544 A string ending in a terminator. 545 546 ``CString`` is similar to the strings of C, C++, and other related 547 programming languages. 548 549 By default, the terminator is the NULL byte (b``0x00``). 550 551 :param str name: name 552 :param iterable terminators: sequence of valid terminators, in order of 553 preference 554 :param str encoding: encoding (e.g. "utf8") or None for no encoding 555 :param ``Construct`` char_field: construct representing a single character 556 557 >>> foo = CString("foo") 558 >>> foo.parse(b"hello\\x00") 559 b'hello' 560 >>> foo.build(b"hello") 561 b'hello\\x00' 562 >>> foo = CString("foo", terminators = b"XYZ") 563 >>> foo.parse(b"helloX") 564 b'hello' 565 >>> foo.parse(b"helloY") 566 b'hello' 567 >>> foo.parse(b"helloZ") 568 b'hello' 569 >>> foo.build(b"hello") 570 b'helloX' 571 """ 572 573 return Rename(name, 574 CStringAdapter( 575 RepeatUntil(lambda obj, ctx: obj in terminators, char_field), 576 terminators=terminators, 577 encoding=encoding, 578 ) 579 ) 580 581 582#=============================================================================== 583# conditional 584#=============================================================================== 585def IfThenElse(name, predicate, then_subcon, else_subcon): 586 """an if-then-else conditional construct: if the predicate indicates True, 587 `then_subcon` will be used; otherwise `else_subcon` 588 * name - the name of the construct 589 * predicate - a function taking the context as an argument and returning 590 True or False 591 * then_subcon - the subcon that will be used if the predicate returns True 592 * else_subcon - the subcon that will be used if the predicate returns False 593 """ 594 return Switch(name, lambda ctx: bool(predicate(ctx)), 595 { 596 True : then_subcon, 597 False : else_subcon, 598 } 599 ) 600 601def If(predicate, subcon, elsevalue = None): 602 """an if-then conditional construct: if the predicate indicates True, 603 subcon will be used; otherwise, `elsevalue` will be returned instead. 604 * predicate - a function taking the context as an argument and returning 605 True or False 606 * subcon - the subcon that will be used if the predicate returns True 607 * elsevalue - the value that will be used should the predicate return False. 608 by default this value is None. 609 """ 610 return IfThenElse(subcon.name, 611 predicate, 612 subcon, 613 Value("elsevalue", lambda ctx: elsevalue) 614 ) 615 616 617#=============================================================================== 618# misc 619#=============================================================================== 620def OnDemandPointer(offsetfunc, subcon, force_build = True): 621 """an on-demand pointer. 622 * offsetfunc - a function taking the context as an argument and returning 623 the absolute stream position 624 * subcon - the subcon that will be parsed from the `offsetfunc()` stream 625 position on demand 626 * force_build - see OnDemand. by default True. 627 """ 628 return OnDemand(Pointer(offsetfunc, subcon), 629 advance_stream = False, 630 force_build = force_build 631 ) 632 633def Magic(data): 634 return ConstAdapter(Field(None, len(data)), data) 635