1# -*- coding: utf-8 -*- 2 3import struct, io, binascii, itertools, collections, pickle, sys, os, hashlib, importlib 4 5from construct.lib import * 6from construct.expr import * 7from construct.version import * 8 9 10#=============================================================================== 11# exceptions 12#=============================================================================== 13class ConstructError(Exception): 14 def __init__(self, message='', path=None): 15 self.path = path 16 if path is None: 17 super().__init__(message) 18 else: 19 message = "Error in path {}\n".format(path) + message 20 super().__init__(message) 21class SizeofError(ConstructError): 22 pass 23class AdaptationError(ConstructError): 24 pass 25class ValidationError(ConstructError): 26 pass 27class StreamError(ConstructError): 28 pass 29class FormatFieldError(ConstructError): 30 pass 31class IntegerError(ConstructError): 32 pass 33class StringError(ConstructError): 34 pass 35class MappingError(ConstructError): 36 pass 37class RangeError(ConstructError): 38 pass 39class RepeatError(ConstructError): 40 pass 41class ConstError(ConstructError): 42 pass 43class IndexFieldError(ConstructError): 44 pass 45class CheckError(ConstructError): 46 pass 47class ExplicitError(ConstructError): 48 pass 49class NamedTupleError(ConstructError): 50 pass 51class TimestampError(ConstructError): 52 pass 53class UnionError(ConstructError): 54 pass 55class SelectError(ConstructError): 56 pass 57class SwitchError(ConstructError): 58 pass 59class StopFieldError(ConstructError): 60 pass 61class PaddingError(ConstructError): 62 pass 63class TerminatedError(ConstructError): 64 pass 65class RawCopyError(ConstructError): 66 pass 67class RotationError(ConstructError): 68 pass 69class ChecksumError(ConstructError): 70 pass 71class CancelParsing(ConstructError): 72 pass 73 74 75#=============================================================================== 76# used internally 77#=============================================================================== 78def singleton(arg): 79 x = arg() 80 return x 81 82 83def stream_read(stream, length, path): 84 if length < 0: 85 raise StreamError("length must be non-negative, found %s" % length, path=path) 86 try: 87 data = stream.read(length) 88 except Exception: 89 raise StreamError("stream.read() failed, requested %s bytes" % (length,), path=path) 90 if len(data) != length: 91 raise StreamError("stream read less than specified amount, expected %d, found %d" % (length, len(data)), path=path) 92 return data 93 94 95def stream_read_entire(stream, path): 96 try: 97 return stream.read() 98 except Exception: 99 raise StreamError("stream.read() failed when reading until EOF", path=path) 100 101 102def stream_write(stream, data, length, path): 103 if not isinstance(data, bytestringtype): 104 raise StringError("given non-bytes value, perhaps unicode? %r" % (data,), path=path) 105 if length < 0: 106 raise StreamError("length must be non-negative, found %s" % length, path=path) 107 if len(data) != length: 108 raise StreamError("bytes object of wrong length, expected %d, found %d" % (length, len(data)), path=path) 109 try: 110 written = stream.write(data) 111 except Exception: 112 raise StreamError("stream.write() failed, given %r" % (data,), path=path) 113 if written != length: 114 raise StreamError("stream written less than specified, expected %d, written %d" % (length, written), path=path) 115 116 117def stream_seek(stream, offset, whence, path): 118 try: 119 return stream.seek(offset, whence) 120 except Exception: 121 raise StreamError("stream.seek() failed, offset %s, whence %s" % (offset, whence), path=path) 122 123 124def stream_tell(stream, path): 125 try: 126 return stream.tell() 127 except Exception: 128 raise StreamError("stream.tell() failed", path=path) 129 130 131def stream_size(stream): 132 fallback = stream.tell() 133 end = stream.seek(0, 2) 134 stream.seek(fallback) 135 return end 136 137 138def stream_iseof(stream): 139 fallback = stream.tell() 140 data = stream.read(1) 141 stream.seek(fallback) 142 return not data 143 144 145class CodeGen: 146 def __init__(self): 147 self.blocks = [] 148 self.nextid = 0 149 self.parsercache = {} 150 self.buildercache = {} 151 self.linkedinstances = {} 152 self.linkedparsers = {} 153 self.linkedbuilders = {} 154 155 def allocateId(self): 156 self.nextid += 1 157 return self.nextid 158 159 def append(self, block): 160 block = [s for s in block.splitlines() if s.strip()] 161 firstline = block[0] 162 trim = len(firstline) - len(firstline.lstrip()) 163 block = "\n".join(s[trim:] for s in block) 164 if block not in self.blocks: 165 self.blocks.append(block) 166 167 def toString(self): 168 return "\n".join(self.blocks + [""]) 169 170 171class KsyGen: 172 def __init__(self): 173 self.instances = {} 174 self.enums = {} 175 self.types = {} 176 self.nextid = 0 177 178 def allocateId(self): 179 self.nextid += 1 180 return self.nextid 181 182 183def hyphenatedict(d): 184 return {k.replace("_","-").rstrip("-"):v for k,v in d.items()} 185 186 187def hyphenatelist(l): 188 return [hyphenatedict(d) for d in l] 189 190 191def extractfield(sc): 192 if isinstance(sc, Renamed): 193 return extractfield(sc.subcon) 194 return sc 195 196 197def evaluate(param, context): 198 return param(context) if callable(param) else param 199 200 201#=============================================================================== 202# abstract constructs 203#=============================================================================== 204class Construct(object): 205 r""" 206 The mother of all constructs. 207 208 This object is generally not directly instantiated, and it does not directly implement parsing and building, so it is largely only of interest to subclass implementors. There are also other abstract classes sitting on top of this one. 209 210 The external user API: 211 212 * `parse` 213 * `parse_stream` 214 * `parse_file` 215 * `build` 216 * `build_stream` 217 * `build_file` 218 * `sizeof` 219 * `compile` 220 * `benchmark` 221 222 Subclass authors should not override the external methods. Instead, another API is available: 223 224 * `_parse` 225 * `_build` 226 * `_sizeof` 227 * `_actualsize` 228 * `_emitparse` 229 * `_emitbuild` 230 * `_emitseq` 231 * `_emitprimitivetype` 232 * `_emitfulltype` 233 * `__getstate__` 234 * `__setstate__` 235 236 Attributes and Inheritance: 237 238 All constructs have a name and flags. The name is used for naming struct members and context dictionaries. Note that the name can be a string, or None by default. A single underscore "_" is a reserved name, used as up-level in nested containers. The name should be descriptive, short, and valid as a Python identifier, although these rules are not enforced. The flags specify additional behavioral information about this construct. Flags are used by enclosing constructs to determine a proper course of action. Flags are often inherited from inner subconstructs but that depends on each class. 239 """ 240 241 def __init__(self): 242 self.name = None 243 self.docs = "" 244 self.flagbuildnone = False 245 self.parsed = None 246 247 def __repr__(self): 248 return "<%s%s%s%s>" % (self.__class__.__name__, " "+self.name if self.name else "", " +nonbuild" if self.flagbuildnone else "", " +docs" if self.docs else "", ) 249 250 def __getstate__(self): 251 attrs = {} 252 if hasattr(self, "__dict__"): 253 attrs.update(self.__dict__) 254 slots = [] 255 c = self.__class__ 256 while c is not None: 257 if hasattr(c, "__slots__"): 258 slots.extend(c.__slots__) 259 c = c.__base__ 260 for name in slots: 261 if hasattr(self, name): 262 attrs[name] = getattr(self, name) 263 return attrs 264 265 def __setstate__(self, attrs): 266 for name, value in attrs.items(): 267 setattr(self, name, value) 268 269 def __copy__(self): 270 self2 = object.__new__(self.__class__) 271 self2.__setstate__(self.__getstate__()) 272 return self2 273 274 def parse(self, data, **contextkw): 275 r""" 276 Parse an in-memory buffer (often bytes object). Strings, buffers, memoryviews, and other complete buffers can be parsed with this method. 277 278 Whenever data cannot be read, ConstructError or its derivative is raised. This method is NOT ALLOWED to raise any other exceptions although (1) user-defined lambdas can raise arbitrary exceptions which are propagated (2) external libraries like numpy can raise arbitrary exceptions which are propagated (3) some list and dict lookups can raise IndexError and KeyError which are propagated. 279 280 Context entries are passed only as keyword parameters \*\*contextkw. 281 282 :param \*\*contextkw: context entries, usually empty 283 284 :returns: some value, usually based on bytes read from the stream but sometimes it is computed from nothing or from the context dictionary, sometimes its non-deterministic 285 286 :raises ConstructError: raised for any reason 287 """ 288 return self.parse_stream(io.BytesIO(data), **contextkw) 289 290 def parse_stream(self, stream, **contextkw): 291 r""" 292 Parse a stream. Files, pipes, sockets, and other streaming sources of data are handled by this method. See parse(). 293 """ 294 context = Container(**contextkw) 295 context._parsing = True 296 context._building = False 297 context._sizing = False 298 context._params = context 299 try: 300 return self._parsereport(stream, context, "(parsing)") 301 except CancelParsing: 302 pass 303 304 def parse_file(self, filename, **contextkw): 305 r""" 306 Parse a closed binary file. See parse(). 307 """ 308 with open(filename, 'rb') as f: 309 return self.parse_stream(f, **contextkw) 310 311 def _parsereport(self, stream, context, path): 312 obj = self._parse(stream, context, path) 313 if self.parsed is not None: 314 self.parsed(obj, context) 315 return obj 316 317 def _parse(self, stream, context, path): 318 """Override in your subclass.""" 319 raise NotImplementedError 320 321 def build(self, obj, **contextkw): 322 r""" 323 Build an object in memory (a bytes object). 324 325 Whenever data cannot be written, ConstructError or its derivative is raised. This method is NOT ALLOWED to raise any other exceptions although (1) user-defined lambdas can raise arbitrary exceptions which are propagated (2) external libraries like numpy can raise arbitrary exceptions which are propagated (3) some list and dict lookups can raise IndexError and KeyError which are propagated. 326 327 Context entries are passed only as keyword parameters \*\*contextkw. 328 329 :param \*\*contextkw: context entries, usually empty 330 331 :returns: bytes 332 333 :raises ConstructError: raised for any reason 334 """ 335 stream = io.BytesIO() 336 self.build_stream(obj, stream, **contextkw) 337 return stream.getvalue() 338 339 def build_stream(self, obj, stream, **contextkw): 340 r""" 341 Build an object directly into a stream. See build(). 342 """ 343 context = Container(**contextkw) 344 context._parsing = False 345 context._building = True 346 context._sizing = False 347 context._params = context 348 self._build(obj, stream, context, "(building)") 349 350 def build_file(self, obj, filename, **contextkw): 351 r""" 352 Build an object into a closed binary file. See build(). 353 """ 354 # Open the file for reading as well as writing. This allows builders to 355 # read back the stream just written. For example. RawCopy does this. 356 # See issue #888. 357 with open(filename, 'w+b') as f: 358 self.build_stream(obj, f, **contextkw) 359 360 def _build(self, obj, stream, context, path): 361 """Override in your subclass.""" 362 raise NotImplementedError 363 364 def sizeof(self, **contextkw): 365 r""" 366 Calculate the size of this object, optionally using a context. 367 368 Some constructs have fixed size (like FormatField), some have variable-size and can determine their size given a context entry (like Bytes(this.otherfield1)), and some cannot determine their size (like VarInt). 369 370 Whenever size cannot be determined, SizeofError is raised. This method is NOT ALLOWED to raise any other exception, even if eg. context dictionary is missing a key, or subcon propagates ConstructError-derivative exception. 371 372 Context entries are passed only as keyword parameters \*\*contextkw. 373 374 :param \*\*contextkw: context entries, usually empty 375 376 :returns: integer if computable, SizeofError otherwise 377 378 :raises SizeofError: size could not be determined in actual context, or is impossible to be determined 379 """ 380 context = Container(**contextkw) 381 context._parsing = False 382 context._building = False 383 context._sizing = True 384 context._params = context 385 return self._sizeof(context, "(sizeof)") 386 387 def _sizeof(self, context, path): 388 """Override in your subclass.""" 389 raise SizeofError(path=path) 390 391 def _actualsize(self, stream, context, path): 392 return self._sizeof(context, path) 393 394 def compile(self, filename=None): 395 """ 396 Transforms a construct into another construct that does same thing (has same parsing and building semantics) but is much faster when parsing. Already compiled instances just compile into itself. 397 398 Optionally, partial source code can be saved to a text file. This is meant only to inspect the generated code, not to import it from external scripts. 399 400 :returns: Compiled instance 401 """ 402 403 code = CodeGen() 404 code.append(""" 405 # generated by Construct, this source is for inspection only! do not import! 406 407 from construct import * 408 from construct.lib import * 409 from io import BytesIO 410 import struct 411 import collections 412 import itertools 413 414 def restream(data, func): 415 return func(BytesIO(data)) 416 def reuse(obj, func): 417 return func(obj) 418 419 linkedinstances = {} 420 linkedparsers = {} 421 linkedbuilders = {} 422 423 len_ = len 424 sum_ = sum 425 min_ = min 426 max_ = max 427 abs_ = abs 428 """) 429 code.append(f""" 430 def parseall(io, this): 431 return {self._compileparse(code)} 432 def buildall(obj, io, this): 433 return {self._compilebuild(code)} 434 compiled = Compiled(parseall, buildall) 435 """) 436 source = code.toString() 437 438 if filename: 439 with open(filename, "wt") as f: 440 f.write(source) 441 442 modulename = hexlify(hashlib.sha1(source.encode()).digest()).decode() 443 module_spec = importlib.machinery.ModuleSpec(modulename, None) 444 module = importlib.util.module_from_spec(module_spec) 445 c = compile(source, '', 'exec') 446 exec(c, module.__dict__) 447 448 module.linkedinstances = code.linkedinstances 449 module.linkedparsers = code.linkedparsers 450 module.linkedbuilders = code.linkedbuilders 451 compiled = module.compiled 452 compiled.source = source 453 compiled.module = module 454 compiled.modulename = modulename 455 compiled.defersubcon = self 456 return compiled 457 458 def _compileinstance(self, code): 459 """Used internally.""" 460 if id(self) in code.linkedinstances: 461 return 462 code.append(f""" 463 # linkedinstances[{id(self)}] is {self} 464 """) 465 field = extractfield(self) 466 code.linkedinstances[id(self)] = field 467 code.linkedparsers[id(self)] = field._parse 468 code.linkedbuilders[id(self)] = field._build 469 470 def _compileparse(self, code): 471 """Used internally.""" 472 try: 473 if id(self) in code.parsercache: 474 return code.parsercache[id(self)] 475 emitted = self._emitparse(code) 476 code.parsercache[id(self)] = emitted 477 return emitted 478 except NotImplementedError: 479 self._compileinstance(code) 480 return f"linkedparsers[{id(self)}](io, this, '(???)')" 481 482 def _compilebuild(self, code): 483 """Used internally.""" 484 try: 485 if id(self) in code.buildercache: 486 return code.buildercache[id(self)] 487 emitted = self._emitbuild(code) 488 code.buildercache[id(self)] = emitted 489 return emitted 490 except NotImplementedError: 491 self._compileinstance(code) 492 return f"linkedbuilders[{id(self)}](obj, io, this, '(???)')" 493 494 def _emitparse(self, code): 495 """Override in your subclass.""" 496 raise NotImplementedError 497 498 def _emitbuild(self, code): 499 """Override in your subclass.""" 500 raise NotImplementedError 501 502 def benchmark(self, sampledata, filename=None): 503 """ 504 Measures performance of your construct (its parsing and building runtime), both for the original instance and the compiled instance. Uses timeit module, over at min 1 loop, and at max over 100 millisecond time. 505 506 Optionally, results are saved to a text file for later inspection. Otherwise you can print the resulting string to terminal. 507 508 :param sampledata: bytes, a valid blob parsable by this construct 509 :param filename: optional, string, results are saved to that file 510 511 :returns: string containing measurements 512 """ 513 from timeit import timeit 514 515 sampleobj = self.parse(sampledata) 516 parsetime = timeit(lambda: self.parse(sampledata), number=1) 517 runs = int(0.1/parsetime) 518 if runs > 1: 519 parsetime = timeit(lambda: self.parse(sampledata), number=runs)/runs 520 parsetime = "{:.10f} sec/call".format(parsetime) 521 522 self.build(sampleobj) 523 buildtime = timeit(lambda: self.build(sampleobj), number=1) 524 runs = int(0.1/buildtime) 525 if runs > 1: 526 buildtime = timeit(lambda: self.build(sampleobj), number=runs)/runs 527 buildtime = "{:.10f} sec/call".format(buildtime) 528 529 compiled = self.compile() 530 compiled.parse(sampledata) 531 parsetime2 = timeit(lambda: compiled.parse(sampledata), number=1) 532 runs = int(0.1/parsetime2) 533 if runs > 1: 534 parsetime2 = timeit(lambda: compiled.parse(sampledata), number=runs)/runs 535 parsetime2 = "{:.10f} sec/call".format(parsetime2) 536 537 compiled.build(sampleobj) 538 buildtime2 = timeit(lambda: compiled.build(sampleobj), number=1) 539 runs = int(0.1/buildtime2) 540 if runs > 1: 541 buildtime2 = timeit(lambda: compiled.build(sampleobj), number=runs)/runs 542 buildtime2 = "{:.10f} sec/call".format(buildtime2) 543 544 lines = [ 545 "Compiled instance performance:", 546 "parsing: {}", 547 "parsing compiled: {}", 548 "building: {}", 549 "building compiled: {}", 550 "" 551 ] 552 results = "\n".join(lines).format(parsetime, parsetime2, buildtime, buildtime2) 553 554 if filename: 555 with open(filename, "wt") as f: 556 f.write(results) 557 558 return results 559 560 def export_ksy(self, schemaname="unnamed_schema", filename=None): 561 from ruamel.yaml import YAML 562 yaml = YAML() 563 yaml.default_flow_style = False 564 output = io.StringIO() 565 gen = KsyGen() 566 main = dict(meta=dict(id=schemaname), seq=self._compileseq(gen), instances=gen.instances, enums=gen.enums, types=gen.types) 567 yaml.dump(main, output) 568 source = output.getvalue() 569 570 if filename: 571 with open(filename, "wt") as f: 572 f.write(source) 573 return source 574 575 def _compileseq(self, ksy, bitwise=False, recursion=0): 576 if recursion >= 3: 577 raise ConstructError("construct does not implement KSY export") 578 try: 579 return hyphenatelist(self._emitseq(ksy, bitwise)) 580 except NotImplementedError: 581 return [dict(id="x", **self._compilefulltype(ksy, bitwise, recursion+1))] 582 583 def _compileprimitivetype(self, ksy, bitwise=False, recursion=0): 584 if recursion >= 3: 585 raise ConstructError("construct does not implement KSY export") 586 try: 587 return self._emitprimitivetype(ksy, bitwise) 588 except NotImplementedError: 589 name = "type_%s" % ksy.allocateId() 590 ksy.types[name] = dict(seq=self._compileseq(ksy, bitwise, recursion+1)) 591 return name 592 593 def _compilefulltype(self, ksy, bitwise=False, recursion=0): 594 if recursion >= 3: 595 raise ConstructError("construct does not implement KSY export") 596 try: 597 return hyphenatedict(self._emitfulltype(ksy, bitwise)) 598 except NotImplementedError: 599 return dict(type=self._compileprimitivetype(ksy, bitwise, recursion+1)) 600 601 def _emitseq(self, ksy, bitwise): 602 """Override in your subclass.""" 603 raise NotImplementedError 604 605 def _emitprimitivetype(self, ksy, bitwise): 606 """Override in your subclass.""" 607 raise NotImplementedError 608 609 def _emitfulltype(self, ksy, bitwise): 610 """Override in your subclass.""" 611 raise NotImplementedError 612 613 def __rtruediv__(self, name): 614 """ 615 Used for renaming subcons, usually part of a Struct, like Struct("index" / Byte). 616 """ 617 return Renamed(self, newname=name) 618 619 __rdiv__ = __rtruediv__ 620 621 def __mul__(self, other): 622 """ 623 Used for adding docstrings and parsed hooks to subcons, like "field" / Byte * "docstring" * processfunc. 624 """ 625 if isinstance(other, stringtypes): 626 return Renamed(self, newdocs=other) 627 if callable(other): 628 return Renamed(self, newparsed=other) 629 raise ConstructError("operator * can only be used with string or lambda") 630 631 def __rmul__(self, other): 632 """ 633 Used for adding docstrings and parsed hooks to subcons, like "field" / Byte * "docstring" * processfunc. 634 """ 635 if isinstance(other, stringtypes): 636 return Renamed(self, newdocs=other) 637 if callable(other): 638 return Renamed(self, newparsed=other) 639 raise ConstructError("operator * can only be used with string or lambda") 640 641 def __add__(self, other): 642 """ 643 Used for making Struct like ("index"/Byte + "prefix"/Byte). 644 """ 645 lhs = self.subcons if isinstance(self, Struct) else [self] 646 rhs = other.subcons if isinstance(other, Struct) else [other] 647 return Struct(*(lhs + rhs)) 648 649 def __rshift__(self, other): 650 """ 651 Used for making Sequences like (Byte >> Short). 652 """ 653 lhs = self.subcons if isinstance(self, Sequence) else [self] 654 rhs = other.subcons if isinstance(other, Sequence) else [other] 655 return Sequence(*(lhs + rhs)) 656 657 def __getitem__(self, count): 658 """ 659 Used for making Arrays like Byte[5] and Byte[this.count]. 660 """ 661 if isinstance(count, slice): 662 raise ConstructError("subcon[N] syntax can only be used for Arrays, use GreedyRange(subcon) instead?") 663 if isinstance(count, int) or callable(count): 664 return Array(count, self) 665 raise ConstructError("subcon[N] syntax expects integer or context lambda") 666 667 668class Subconstruct(Construct): 669 r""" 670 Abstract subconstruct (wraps an inner construct, inheriting its name and flags). Parsing and building is by default deferred to subcon, same as sizeof. 671 672 :param subcon: Construct instance 673 """ 674 def __init__(self, subcon): 675 if not isinstance(subcon, Construct): 676 raise TypeError("subcon should be a Construct field") 677 super().__init__() 678 self.subcon = subcon 679 self.flagbuildnone = subcon.flagbuildnone 680 681 def __repr__(self): 682 return "<%s%s%s%s %s>" % (self.__class__.__name__, " "+self.name if self.name else "", " +nonbuild" if self.flagbuildnone else "", " +docs" if self.docs else "", repr(self.subcon), ) 683 684 def _parse(self, stream, context, path): 685 return self.subcon._parsereport(stream, context, path) 686 687 def _build(self, obj, stream, context, path): 688 return self.subcon._build(obj, stream, context, path) 689 690 def _sizeof(self, context, path): 691 return self.subcon._sizeof(context, path) 692 693 694class Adapter(Subconstruct): 695 r""" 696 Abstract adapter class. 697 698 Needs to implement `_decode()` for parsing and `_encode()` for building. 699 700 :param subcon: Construct instance 701 """ 702 def _parse(self, stream, context, path): 703 obj = self.subcon._parsereport(stream, context, path) 704 return self._decode(obj, context, path) 705 706 def _build(self, obj, stream, context, path): 707 obj2 = self._encode(obj, context, path) 708 buildret = self.subcon._build(obj2, stream, context, path) 709 return obj 710 711 def _decode(self, obj, context, path): 712 raise NotImplementedError 713 714 def _encode(self, obj, context, path): 715 raise NotImplementedError 716 717 718class SymmetricAdapter(Adapter): 719 r""" 720 Abstract adapter class. 721 722 Needs to implement `_decode()` only, for both parsing and building. 723 724 :param subcon: Construct instance 725 """ 726 def _encode(self, obj, context, path): 727 return self._decode(obj, context, path) 728 729 730class Validator(SymmetricAdapter): 731 r""" 732 Abstract class that validates a condition on the encoded/decoded object. 733 734 Needs to implement `_validate()` that returns a bool (or a truthy value) 735 736 :param subcon: Construct instance 737 """ 738 def _decode(self, obj, context, path): 739 if not self._validate(obj, context, path): 740 raise ValidationError("object failed validation: %s" % (obj,), path=path) 741 return obj 742 743 def _validate(self, obj, context, path): 744 raise NotImplementedError 745 746 747class Tunnel(Subconstruct): 748 r""" 749 Abstract class that allows other constructs to read part of the stream as if they were reading the entire stream. See Prefixed for example. 750 751 Needs to implement `_decode()` for parsing and `_encode()` for building. 752 """ 753 def _parse(self, stream, context, path): 754 data = stream_read_entire(stream, path) # reads entire stream 755 data = self._decode(data, context, path) 756 return self.subcon.parse(data, **context) 757 758 def _build(self, obj, stream, context, path): 759 stream2 = io.BytesIO() 760 buildret = self.subcon._build(obj, stream2, context, path) 761 data = stream2.getvalue() 762 data = self._encode(data, context, path) 763 stream_write(stream, data, len(data), path) 764 return obj 765 766 def _sizeof(self, context, path): 767 raise SizeofError(path=path) 768 769 def _decode(self, data, context, path): 770 raise NotImplementedError 771 772 def _encode(self, data, context, path): 773 raise NotImplementedError 774 775 776class Compiled(Construct): 777 """Used internally.""" 778 779 def __init__(self, parsefunc, buildfunc): 780 super().__init__() 781 self.source = None 782 self.defersubcon = None 783 self.parsefunc = parsefunc 784 self.buildfunc = buildfunc 785 786 def _parse(self, stream, context, path): 787 return self.parsefunc(stream, context) 788 789 def _build(self, obj, stream, context, path): 790 return self.buildfunc(obj, stream, context) 791 792 def _sizeof(self, context, path): 793 return self.defersubcon._sizeof(context, path) 794 795 def compile(self, filename=None): 796 return self 797 798 def benchmark(self, sampledata, filename=None): 799 return self.defersubcon.benchmark(sampledata, filename) 800 801 802#=============================================================================== 803# bytes and bits 804#=============================================================================== 805class Bytes(Construct): 806 r""" 807 Field consisting of a specified number of bytes. 808 809 Parses into a bytes (of given length). Builds into the stream directly (but checks that given object matches specified length). Can also build from an integer for convenience (although BytesInteger should be used instead). Size is the specified length. 810 811 Can also build from a bytearray. 812 813 :param length: integer or context lambda 814 815 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 816 :raises StringError: building from non-bytes value, perhaps unicode 817 818 Can propagate any exception from the lambda, possibly non-ConstructError. 819 820 Example:: 821 822 >>> d = Bytes(4) 823 >>> d.parse(b'beef') 824 b'beef' 825 >>> d.build(b'beef') 826 b'beef' 827 >>> d.build(0) 828 b'\x00\x00\x00\x00' 829 >>> d.sizeof() 830 4 831 832 >>> d = Struct( 833 ... "length" / Int8ub, 834 ... "data" / Bytes(this.length), 835 ... ) 836 >>> d.parse(b"\x04beef") 837 Container(length=4, data=b'beef') 838 >>> d.sizeof() 839 construct.core.SizeofError: cannot calculate size, key not found in context 840 """ 841 842 def __init__(self, length): 843 super().__init__() 844 self.length = length 845 846 def _parse(self, stream, context, path): 847 length = self.length(context) if callable(self.length) else self.length 848 return stream_read(stream, length, path) 849 850 def _build(self, obj, stream, context, path): 851 length = self.length(context) if callable(self.length) else self.length 852 data = integer2bytes(obj, length) if isinstance(obj, int) else obj 853 data = bytes(data) if type(data) is bytearray else data 854 stream_write(stream, data, length, path) 855 return data 856 857 def _sizeof(self, context, path): 858 try: 859 return self.length(context) if callable(self.length) else self.length 860 except (KeyError, AttributeError): 861 raise SizeofError("cannot calculate size, key not found in context", path=path) 862 863 def _emitparse(self, code): 864 return f"io.read({self.length})" 865 866 def _emitbuild(self, code): 867 return f"(io.write(obj), obj)[1]" 868 869 def _emitfulltype(self, ksy, bitwise): 870 return dict(size=self.length) 871 872 873@singleton 874class GreedyBytes(Construct): 875 r""" 876 Field consisting of unknown number of bytes. 877 878 Parses the stream to the end. Builds into the stream directly (without checks). Size is undefined. 879 880 Can also build from a bytearray. 881 882 :raises StreamError: stream failed when reading until EOF 883 :raises StringError: building from non-bytes value, perhaps unicode 884 885 Example:: 886 887 >>> GreedyBytes.parse(b"asislight") 888 b'asislight' 889 >>> GreedyBytes.build(b"asislight") 890 b'asislight' 891 """ 892 893 def _parse(self, stream, context, path): 894 return stream_read_entire(stream, path) 895 896 def _build(self, obj, stream, context, path): 897 data = bytes(obj) if type(obj) is bytearray else obj 898 stream_write(stream, data, len(data), path) 899 return data 900 901 def _emitparse(self, code): 902 return f"io.read()" 903 904 def _emitbuild(self, code): 905 return f"(io.write(obj), obj)[1]" 906 907 def _emitfulltype(self, ksy, bitwise): 908 return dict(size_eos=True) 909 910 911def Bitwise(subcon): 912 r""" 913 Converts the stream from bytes to bits, and passes the bitstream to underlying subcon. Bitstream is a stream that contains 8 times as many bytes, and each byte is either \\x00 or \\x01 (in documentation those bytes are called bits). 914 915 Parsing building and size are deferred to subcon, although size gets divided by 8 (therefore the subcon's size must be a multiple of 8). 916 917 Note that by default the bit ordering is from MSB to LSB for every byte (ie. bit-level big-endian). If you need it reversed, wrap this subcon with :class:`construct.core.BitsSwapped`. 918 919 :param subcon: Construct instance, any field that works with bits (like BitsInteger) or is bit-byte agnostic (like Struct or Flag) 920 921 See :class:`~construct.core.Transformed` and :class:`~construct.core.Restreamed` for raisable exceptions. 922 923 Example:: 924 925 >>> d = Bitwise(Struct( 926 ... 'a' / Nibble, 927 ... 'b' / Bytewise(Float32b), 928 ... 'c' / Padding(4), 929 ... )) 930 >>> d.parse(bytes(5)) 931 Container(a=0, b=0.0, c=None) 932 >>> d.sizeof() 933 5 934 935 Obtaining other byte or bit orderings:: 936 937 >>> d = Bitwise(Bytes(16)) 938 >>> d.parse(b'\x01\x03') 939 b'\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x01\x01' 940 >>> d = BitsSwapped(Bitwise(Bytes(16))) 941 >>> d.parse(b'\x01\x03') 942 b'\x01\x00\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00\x00\x00\x00' 943 """ 944 945 try: 946 size = subcon.sizeof() 947 macro = Transformed(subcon, bytes2bits, size//8, bits2bytes, size//8) 948 except SizeofError: 949 macro = Restreamed(subcon, bytes2bits, 1, bits2bytes, 8, lambda n: n//8) 950 def _emitseq(ksy, bitwise): 951 return subcon._compileseq(ksy, bitwise=True) 952 def _emitprimitivetype(ksy, bitwise): 953 return subcon._compileprimitivetype(ksy, bitwise=True) 954 def _emitfulltype(ksy, bitwise): 955 return subcon._compilefulltype(ksy, bitwise=True) 956 macro._emitseq = _emitseq 957 macro._emitprimitivetype = _emitprimitivetype 958 macro._emitfulltype = _emitfulltype 959 return macro 960 961 962def Bytewise(subcon): 963 r""" 964 Converts the bitstream back to normal byte stream. Must be used within :class:`~construct.core.Bitwise`. 965 966 Parsing building and size are deferred to subcon, although size gets multiplied by 8. 967 968 :param subcon: Construct instance, any field that works with bytes or is bit-byte agnostic 969 970 See :class:`~construct.core.Transformed` and :class:`~construct.core.Restreamed` for raisable exceptions. 971 972 Example:: 973 974 >>> d = Bitwise(Struct( 975 ... 'a' / Nibble, 976 ... 'b' / Bytewise(Float32b), 977 ... 'c' / Padding(4), 978 ... )) 979 >>> d.parse(bytes(5)) 980 Container(a=0, b=0.0, c=None) 981 >>> d.sizeof() 982 5 983 """ 984 985 try: 986 size = subcon.sizeof() 987 macro = Transformed(subcon, bits2bytes, size*8, bytes2bits, size*8) 988 except SizeofError: 989 macro = Restreamed(subcon, bits2bytes, 8, bytes2bits, 1, lambda n: n*8) 990 def _emitseq(ksy, bitwise): 991 return subcon._compileseq(ksy, bitwise=False) 992 def _emitprimitivetype(ksy, bitwise): 993 return subcon._compileprimitivetype(ksy, bitwise=False) 994 def _emitfulltype(ksy, bitwise): 995 return subcon._compilefulltype(ksy, bitwise=False) 996 macro._emitseq = _emitseq 997 macro._emitprimitivetype = _emitprimitivetype 998 macro._emitfulltype = _emitfulltype 999 return macro 1000 1001 1002#=============================================================================== 1003# integers and floats 1004#=============================================================================== 1005class FormatField(Construct): 1006 r""" 1007 Field that uses `struct` module to pack and unpack CPU-sized integers and floats and booleans. This is used to implement most Int* Float* fields, but for example cannot pack 24-bit integers, which is left to :class:`~construct.core.BytesInteger` class. For booleans I also recommend using Flag class instead. 1008 1009 See `struct module <https://docs.python.org/3/library/struct.html>`_ documentation for instructions on crafting format strings. 1010 1011 Parses into an integer or float or boolean. Builds from an integer or float or boolean into specified byte count and endianness. Size is determined by `struct` module according to specified format string. 1012 1013 :param endianity: string, character like: < > = 1014 :param format: string, character like: B H L Q b h l q e f d ? 1015 1016 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 1017 :raises FormatFieldError: wrong format string, or struct.(un)pack complained about the value 1018 1019 Example:: 1020 1021 >>> d = FormatField(">", "H") or Int16ub 1022 >>> d.parse(b"\x01\x00") 1023 256 1024 >>> d.build(256) 1025 b"\x01\x00" 1026 >>> d.sizeof() 1027 2 1028 """ 1029 1030 def __init__(self, endianity, format): 1031 if endianity not in list("=<>"): 1032 raise FormatFieldError("endianity must be like: = < >", endianity) 1033 if format not in list("fdBHLQbhlqe?"): 1034 raise FormatFieldError("format must be like: B H L Q b h l q e f d ?", format) 1035 1036 super().__init__() 1037 self.fmtstr = endianity+format 1038 self.length = struct.calcsize(endianity+format) 1039 1040 def _parse(self, stream, context, path): 1041 data = stream_read(stream, self.length, path) 1042 try: 1043 return struct.unpack(self.fmtstr, data)[0] 1044 except Exception: 1045 raise FormatFieldError("struct %r error during parsing" % self.fmtstr, path=path) 1046 1047 def _build(self, obj, stream, context, path): 1048 try: 1049 data = struct.pack(self.fmtstr, obj) 1050 except Exception: 1051 raise FormatFieldError("struct %r error during building, given value %r" % (self.fmtstr, obj), path=path) 1052 stream_write(stream, data, self.length, path) 1053 return obj 1054 1055 def _sizeof(self, context, path): 1056 return self.length 1057 1058 def _emitparse(self, code): 1059 fname = f"formatfield_{code.allocateId()}" 1060 code.append(f"{fname} = struct.Struct({repr(self.fmtstr)})") 1061 return f"{fname}.unpack(io.read({self.length}))[0]" 1062 1063 def _emitbuild(self, code): 1064 fname = f"formatfield_{code.allocateId()}" 1065 code.append(f"{fname} = struct.Struct({repr(self.fmtstr)})") 1066 return f"(io.write({fname}.pack(obj)), obj)[1]" 1067 1068 def _emitprimitivetype(self, ksy, bitwise): 1069 endianity,format = self.fmtstr 1070 signed = format.islower() 1071 swapped = (endianity == "<") or (endianity == "=" and sys.byteorder == "little") 1072 if format in "bhlqBHLQ": 1073 if bitwise: 1074 assert not signed 1075 assert not swapped 1076 return "b%s" % (8*self.length, ) 1077 else: 1078 return "%s%s%s" % ("s" if signed else "u", self.length, "le" if swapped else "be", ) 1079 if format in "fd": 1080 assert not bitwise 1081 return "f%s%s" % (self.length, "le" if swapped else "be", ) 1082 1083 1084class BytesInteger(Construct): 1085 r""" 1086 Field that packs integers of arbitrary size. Int24* fields use this class. 1087 1088 Parses into an integer. Builds from an integer into specified byte count and endianness. Size is specified in ctor. 1089 1090 Analog to :class:`~construct.core.BitsInteger` which operates on bits. In fact:: 1091 1092 BytesInteger(n) <--> Bitwise(BitsInteger(8*n)) 1093 BitsInteger(8*n) <--> Bytewise(BytesInteger(n)) 1094 1095 Byte ordering refers to bytes (chunks of 8 bits) so, for example:: 1096 1097 BytesInteger(n, swapped=True) <--> Bitwise(BitsInteger(8*n, swapped=True)) 1098 1099 :param length: integer or context lambda, number of bytes in the field 1100 :param signed: bool, whether the value is signed (two's complement), default is False (unsigned) 1101 :param swapped: bool or context lambda, whether to swap byte order (little endian), default is False (big endian) 1102 1103 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 1104 :raises IntegerError: length is negative 1105 :raises IntegerError: value is not an integer 1106 :raises IntegerError: number does not fit given width and signed parameters 1107 1108 Can propagate any exception from the lambda, possibly non-ConstructError. 1109 1110 Example:: 1111 1112 >>> d = BytesInteger(4) or Int32ub 1113 >>> d.parse(b"abcd") 1114 1633837924 1115 >>> d.build(1) 1116 b'\x00\x00\x00\x01' 1117 >>> d.sizeof() 1118 4 1119 """ 1120 1121 def __init__(self, length, signed=False, swapped=False): 1122 super().__init__() 1123 self.length = length 1124 self.signed = signed 1125 self.swapped = swapped 1126 1127 def _parse(self, stream, context, path): 1128 length = evaluate(self.length, context) 1129 if length < 0: 1130 raise IntegerError(f"length {length} must be non-negative", path=path) 1131 data = stream_read(stream, length, path) 1132 if evaluate(self.swapped, context): 1133 data = swapbytes(data) 1134 try: 1135 return bytes2integer(data, self.signed) 1136 except ValueError as e: 1137 raise IntegerError(str(e), path=path) 1138 1139 def _build(self, obj, stream, context, path): 1140 if not isinstance(obj, integertypes): 1141 raise IntegerError(f"value {obj} is not an integer", path=path) 1142 if obj < 0 and not self.signed: 1143 raise IntegerError(f"value {obj} is negative but signed is false", path=path) 1144 length = evaluate(self.length, context) 1145 if length < 0: 1146 raise IntegerError(f"length {length} must be non-negative", path=path) 1147 try: 1148 data = integer2bytes(obj, length, self.signed) 1149 except ValueError as e: 1150 raise IntegerError(str(e), path=path) 1151 if evaluate(self.swapped, context): 1152 data = swapbytes(data) 1153 stream_write(stream, data, length, path) 1154 return obj 1155 1156 def _sizeof(self, context, path): 1157 try: 1158 return evaluate(self.length, context) 1159 except (KeyError, AttributeError): 1160 raise SizeofError("cannot calculate size, key not found in context", path=path) 1161 1162 def _emitparse(self, code): 1163 return f"bytes2integer(swapbytes(io.read({self.length})) if {self.swapped} else io.read({self.length}), {self.signed})" 1164 1165 def _emitbuild(self, code): 1166 return f"((io.write(swapbytes(integer2bytes(obj, {self.length}, {self.signed})) if ({self.swapped}) else integer2bytes(obj, {self.length}, {self.signed}))), obj)[1]" 1167 1168 def _emitprimitivetype(self, ksy, bitwise): 1169 if bitwise: 1170 assert not self.signed 1171 assert not self.swapped 1172 return "b%s" % (8*self.length, ) 1173 else: 1174 assert not callable(self.swapped) 1175 return "%s%s%s" % ("s" if self.signed else "u", self.length, "le" if self.swapped else "be", ) 1176 1177 1178class BitsInteger(Construct): 1179 r""" 1180 Field that packs arbitrarily large (or small) integers. Some fields (Bit Nibble Octet) use this class. Must be enclosed in :class:`~construct.core.Bitwise` context. 1181 1182 Parses into an integer. Builds from an integer into specified bit count and endianness. Size (in bits) is specified in ctor. 1183 1184 Analog to :class:`~construct.core.BytesInteger` which operates on bytes. In fact:: 1185 1186 BytesInteger(n) <--> Bitwise(BitsInteger(8*n)) 1187 BitsInteger(8*n) <--> Bytewise(BytesInteger(n)) 1188 1189 Note that little-endianness is only defined for multiples of 8 bits. 1190 1191 Byte ordering (i.e. `swapped` parameter) refers to bytes (chunks of 8 bits) so, for example:: 1192 1193 BytesInteger(n, swapped=True) <--> Bitwise(BitsInteger(8*n, swapped=True)) 1194 1195 Swapped argument was recently fixed. To obtain previous (faulty) behavior, you can use `ByteSwapped`, `BitsSwapped` and `Bitwise` in whatever particular order (see examples). 1196 1197 :param length: integer or context lambda, number of bits in the field 1198 :param signed: bool, whether the value is signed (two's complement), default is False (unsigned) 1199 :param swapped: bool or context lambda, whether to swap byte order (little endian), default is False (big endian) 1200 1201 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 1202 :raises IntegerError: length is negative 1203 :raises IntegerError: value is not an integer 1204 :raises IntegerError: number does not fit given width and signed parameters 1205 :raises IntegerError: little-endianness selected but length is not multiple of 8 bits 1206 1207 Can propagate any exception from the lambda, possibly non-ConstructError. 1208 1209 Examples:: 1210 1211 >>> d = Bitwise(BitsInteger(8)) or Bitwise(Octet) 1212 >>> d.parse(b"\x10") 1213 16 1214 >>> d.build(255) 1215 b'\xff' 1216 >>> d.sizeof() 1217 1 1218 1219 Obtaining other byte or bit orderings:: 1220 1221 >>> d = BitsInteger(2) 1222 >>> d.parse(b'\x01\x00') # Bit-Level Big-Endian 1223 2 1224 >>> d = ByteSwapped(BitsInteger(2)) 1225 >>> d.parse(b'\x01\x00') # Bit-Level Little-Endian 1226 1 1227 >>> d = BitsInteger(16) # Byte-Level Big-Endian, Bit-Level Big-Endian 1228 >>> d.build(5 + 19*256) 1229 b'\x00\x00\x00\x01\x00\x00\x01\x01\x00\x00\x00\x00\x00\x01\x00\x01' 1230 >>> d = BitsInteger(16, swapped=True) # Byte-Level Little-Endian, Bit-Level Big-Endian 1231 >>> d.build(5 + 19*256) 1232 b'\x00\x00\x00\x00\x00\x01\x00\x01\x00\x00\x00\x01\x00\x00\x01\x01' 1233 >>> d = ByteSwapped(BitsInteger(16)) # Byte-Level Little-Endian, Bit-Level Little-Endian 1234 >>> d.build(5 + 19*256) 1235 b'\x01\x00\x01\x00\x00\x00\x00\x00\x01\x01\x00\x00\x01\x00\x00\x00' 1236 >>> d = ByteSwapped(BitsInteger(16, swapped=True)) # Byte-Level Big-Endian, Bit-Level Little-Endian 1237 >>> d.build(5 + 19*256) 1238 b'\x01\x01\x00\x00\x01\x00\x00\x00\x01\x00\x01\x00\x00\x00\x00\x00' 1239 """ 1240 1241 def __init__(self, length, signed=False, swapped=False): 1242 super().__init__() 1243 self.length = length 1244 self.signed = signed 1245 self.swapped = swapped 1246 1247 def _parse(self, stream, context, path): 1248 length = evaluate(self.length, context) 1249 if length < 0: 1250 raise IntegerError(f"length {length} must be non-negative", path=path) 1251 data = stream_read(stream, length, path) 1252 if evaluate(self.swapped, context): 1253 if length % 8: 1254 raise IntegerError(f"little-endianness is only defined if {length} is multiple of 8 bits", path=path) 1255 data = swapbytesinbits(data) 1256 try: 1257 return bits2integer(data, self.signed) 1258 except ValueError as e: 1259 raise IntegerError(str(e), path=path) 1260 1261 def _build(self, obj, stream, context, path): 1262 if not isinstance(obj, integertypes): 1263 raise IntegerError(f"value {obj} is not an integer", path=path) 1264 if obj < 0 and not self.signed: 1265 raise IntegerError(f"value {obj} is negative but signed is false", path=path) 1266 length = evaluate(self.length, context) 1267 if length < 0: 1268 raise IntegerError(f"length {length} must be non-negative", path=path) 1269 try: 1270 data = integer2bits(obj, length, self.signed) 1271 except ValueError as e: 1272 raise IntegerError(str(e), path=path) 1273 if evaluate(self.swapped, context): 1274 if length % 8: 1275 raise IntegerError(f"little-endianness is only defined if {length} is multiple of 8 bits", path=path) 1276 data = swapbytesinbits(data) 1277 stream_write(stream, data, length, path) 1278 return obj 1279 1280 def _sizeof(self, context, path): 1281 try: 1282 return evaluate(self.length, context) 1283 except (KeyError, AttributeError): 1284 raise SizeofError("cannot calculate size, key not found in context", path=path) 1285 1286 def _emitparse(self, code): 1287 return f"bits2integer(swapbytesinbits(io.read({self.length})) if {self.swapped} else io.read({self.length}), {self.signed})" 1288 1289 def _emitbuild(self, code): 1290 return f"((io.write(swapbytesinbits(integer2bits(obj, {self.length}, {self.signed})) if ({self.swapped}) else integer2bits(obj, {self.length}, {self.signed}))), obj)[1]" 1291 1292 def _emitprimitivetype(self, ksy, bitwise): 1293 assert not self.signed 1294 assert not self.swapped 1295 return "b%s" % (self.length, ) 1296 1297 1298@singleton 1299def Bit(): 1300 """A 1-bit integer, must be enclosed in a Bitwise (eg. BitStruct)""" 1301 return BitsInteger(1) 1302@singleton 1303def Nibble(): 1304 """A 4-bit integer, must be enclosed in a Bitwise (eg. BitStruct)""" 1305 return BitsInteger(4) 1306@singleton 1307def Octet(): 1308 """A 8-bit integer, must be enclosed in a Bitwise (eg. BitStruct)""" 1309 return BitsInteger(8) 1310 1311@singleton 1312def Int8ub(): 1313 """Unsigned, big endian 8-bit integer""" 1314 return FormatField(">", "B") 1315@singleton 1316def Int16ub(): 1317 """Unsigned, big endian 16-bit integer""" 1318 return FormatField(">", "H") 1319@singleton 1320def Int32ub(): 1321 """Unsigned, big endian 32-bit integer""" 1322 return FormatField(">", "L") 1323@singleton 1324def Int64ub(): 1325 """Unsigned, big endian 64-bit integer""" 1326 return FormatField(">", "Q") 1327 1328@singleton 1329def Int8sb(): 1330 """Signed, big endian 8-bit integer""" 1331 return FormatField(">", "b") 1332@singleton 1333def Int16sb(): 1334 """Signed, big endian 16-bit integer""" 1335 return FormatField(">", "h") 1336@singleton 1337def Int32sb(): 1338 """Signed, big endian 32-bit integer""" 1339 return FormatField(">", "l") 1340@singleton 1341def Int64sb(): 1342 """Signed, big endian 64-bit integer""" 1343 return FormatField(">", "q") 1344 1345@singleton 1346def Int8ul(): 1347 """Unsigned, little endian 8-bit integer""" 1348 return FormatField("<", "B") 1349@singleton 1350def Int16ul(): 1351 """Unsigned, little endian 16-bit integer""" 1352 return FormatField("<", "H") 1353@singleton 1354def Int32ul(): 1355 """Unsigned, little endian 32-bit integer""" 1356 return FormatField("<", "L") 1357@singleton 1358def Int64ul(): 1359 """Unsigned, little endian 64-bit integer""" 1360 return FormatField("<", "Q") 1361 1362@singleton 1363def Int8sl(): 1364 """Signed, little endian 8-bit integer""" 1365 return FormatField("<", "b") 1366@singleton 1367def Int16sl(): 1368 """Signed, little endian 16-bit integer""" 1369 return FormatField("<", "h") 1370@singleton 1371def Int32sl(): 1372 """Signed, little endian 32-bit integer""" 1373 return FormatField("<", "l") 1374@singleton 1375def Int64sl(): 1376 """Signed, little endian 64-bit integer""" 1377 return FormatField("<", "q") 1378 1379@singleton 1380def Int8un(): 1381 """Unsigned, native endianity 8-bit integer""" 1382 return FormatField("=", "B") 1383@singleton 1384def Int16un(): 1385 """Unsigned, native endianity 16-bit integer""" 1386 return FormatField("=", "H") 1387@singleton 1388def Int32un(): 1389 """Unsigned, native endianity 32-bit integer""" 1390 return FormatField("=", "L") 1391@singleton 1392def Int64un(): 1393 """Unsigned, native endianity 64-bit integer""" 1394 return FormatField("=", "Q") 1395 1396@singleton 1397def Int8sn(): 1398 """Signed, native endianity 8-bit integer""" 1399 return FormatField("=", "b") 1400@singleton 1401def Int16sn(): 1402 """Signed, native endianity 16-bit integer""" 1403 return FormatField("=", "h") 1404@singleton 1405def Int32sn(): 1406 """Signed, native endianity 32-bit integer""" 1407 return FormatField("=", "l") 1408@singleton 1409def Int64sn(): 1410 """Signed, native endianity 64-bit integer""" 1411 return FormatField("=", "q") 1412 1413Byte = Int8ub 1414Short = Int16ub 1415Int = Int32ub 1416Long = Int64ub 1417 1418@singleton 1419def Float16b(): 1420 """Big endian, 16-bit IEEE 754 floating point number""" 1421 return FormatField(">", "e") 1422@singleton 1423def Float16l(): 1424 """Little endian, 16-bit IEEE 754 floating point number""" 1425 return FormatField("<", "e") 1426@singleton 1427def Float16n(): 1428 """Native endianity, 16-bit IEEE 754 floating point number""" 1429 return FormatField("=", "e") 1430 1431@singleton 1432def Float32b(): 1433 """Big endian, 32-bit IEEE floating point number""" 1434 return FormatField(">", "f") 1435@singleton 1436def Float32l(): 1437 """Little endian, 32-bit IEEE floating point number""" 1438 return FormatField("<", "f") 1439@singleton 1440def Float32n(): 1441 """Native endianity, 32-bit IEEE floating point number""" 1442 return FormatField("=", "f") 1443 1444@singleton 1445def Float64b(): 1446 """Big endian, 64-bit IEEE floating point number""" 1447 return FormatField(">", "d") 1448@singleton 1449def Float64l(): 1450 """Little endian, 64-bit IEEE floating point number""" 1451 return FormatField("<", "d") 1452@singleton 1453def Float64n(): 1454 """Native endianity, 64-bit IEEE floating point number""" 1455 return FormatField("=", "d") 1456 1457Half = Float16b 1458Single = Float32b 1459Double = Float64b 1460 1461native = (sys.byteorder == "little") 1462 1463@singleton 1464def Int24ub(): 1465 """A 3-byte big-endian unsigned integer, as used in ancient file formats.""" 1466 return BytesInteger(3, signed=False, swapped=False) 1467@singleton 1468def Int24ul(): 1469 """A 3-byte little-endian unsigned integer, as used in ancient file formats.""" 1470 return BytesInteger(3, signed=False, swapped=True) 1471@singleton 1472def Int24un(): 1473 """A 3-byte native-endian unsigned integer, as used in ancient file formats.""" 1474 return BytesInteger(3, signed=False, swapped=native) 1475@singleton 1476def Int24sb(): 1477 """A 3-byte big-endian signed integer, as used in ancient file formats.""" 1478 return BytesInteger(3, signed=True, swapped=False) 1479@singleton 1480def Int24sl(): 1481 """A 3-byte little-endian signed integer, as used in ancient file formats.""" 1482 return BytesInteger(3, signed=True, swapped=True) 1483@singleton 1484def Int24sn(): 1485 """A 3-byte native-endian signed integer, as used in ancient file formats.""" 1486 return BytesInteger(3, signed=True, swapped=native) 1487 1488 1489@singleton 1490class VarInt(Construct): 1491 r""" 1492 VarInt encoded unsigned integer. Each 7 bits of the number are encoded in one byte of the stream, where leftmost bit (MSB) is unset when byte is terminal. Scheme is defined at Google site related to `Protocol Buffers <https://developers.google.com/protocol-buffers/docs/encoding>`_. 1493 1494 Can only encode non-negative numbers. 1495 1496 Parses into an integer. Builds from an integer. Size is undefined. 1497 1498 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 1499 :raises IntegerError: given a negative value, or not an integer 1500 1501 Example:: 1502 1503 >>> VarInt.build(1) 1504 b'\x01' 1505 >>> VarInt.build(2**100) 1506 b'\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x04' 1507 """ 1508 1509 def _parse(self, stream, context, path): 1510 acc = [] 1511 while True: 1512 b = byte2int(stream_read(stream, 1, path)) 1513 acc.append(b & 0b01111111) 1514 if b & 0b10000000 == 0: 1515 break 1516 num = 0 1517 for b in reversed(acc): 1518 num = (num << 7) | b 1519 return num 1520 1521 def _build(self, obj, stream, context, path): 1522 if not isinstance(obj, integertypes): 1523 raise IntegerError(f"value {obj} is not an integer", path=path) 1524 if obj < 0: 1525 raise IntegerError(f"VarInt cannot build from negative number {obj}", path=path) 1526 x = obj 1527 B = bytearray() 1528 while x > 0b01111111: 1529 B.append(0b10000000 | (x & 0b01111111)) 1530 x >>= 7 1531 B.append(x) 1532 stream_write(stream, bytes(B), len(B), path) 1533 return obj 1534 1535 def _emitprimitivetype(self, ksy, bitwise): 1536 return "vlq_base128_le" 1537 1538 1539@singleton 1540class ZigZag(Construct): 1541 r""" 1542 ZigZag encoded signed integer. This is a variant of VarInt encoding that also can encode negative numbers. Scheme is defined at Google site related to `Protocol Buffers <https://developers.google.com/protocol-buffers/docs/encoding>`_. 1543 1544 Can encode negative numbers. 1545 1546 Parses into an integer. Builds from an integer. Size is undefined. 1547 1548 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 1549 :raises IntegerError: given not an integer 1550 1551 Example:: 1552 1553 >>> ZigZag.build(-3) 1554 b'\x05' 1555 >>> ZigZag.build(3) 1556 b'\x06' 1557 """ 1558 1559 def _parse(self, stream, context, path): 1560 x = VarInt._parse(stream, context, path) 1561 if x & 1 == 0: 1562 x = x//2 1563 else: 1564 x = -(x//2+1) 1565 return x 1566 1567 def _build(self, obj, stream, context, path): 1568 if not isinstance(obj, integertypes): 1569 raise IntegerError(f"value {obj} is not an integer", path=path) 1570 if obj >= 0: 1571 x = 2*obj 1572 else: 1573 x = 2*abs(obj)-1 1574 VarInt._build(x, stream, context, path) 1575 return obj 1576 1577 1578#=============================================================================== 1579# strings 1580#=============================================================================== 1581 1582#: Explicitly supported encodings (by PaddedString and CString classes). 1583#: 1584possiblestringencodings = dict( 1585 ascii=1, 1586 utf8=1, utf_8=1, u8=1, 1587 utf16=2, utf_16=2, u16=2, utf_16_be=2, utf_16_le=2, 1588 utf32=4, utf_32=4, u32=4, utf_32_be=4, utf_32_le=4, 1589) 1590 1591 1592def encodingunit(encoding): 1593 """Used internally.""" 1594 encoding = encoding.replace("-","_").lower() 1595 if encoding not in possiblestringencodings: 1596 raise StringError("encoding %r not found among %r" % (encoding, possiblestringencodings,)) 1597 return bytes(possiblestringencodings[encoding]) 1598 1599 1600class StringEncoded(Adapter): 1601 """Used internally.""" 1602 1603 def __init__(self, subcon, encoding): 1604 super().__init__(subcon) 1605 if not encoding: 1606 raise StringError("String* classes require explicit encoding") 1607 self.encoding = encoding 1608 1609 def _decode(self, obj, context, path): 1610 return obj.decode(self.encoding) 1611 1612 def _encode(self, obj, context, path): 1613 if not isinstance(obj, unicodestringtype): 1614 raise StringError("string encoding failed, expected unicode string", path=path) 1615 if obj == u"": 1616 return b"" 1617 return obj.encode(self.encoding) 1618 1619 def _emitparse(self, code): 1620 return f"({self.subcon._compileparse(code)}).decode({repr(self.encoding)})" 1621 1622 def _emitbuild(self, code): 1623 raise NotImplementedError 1624 # This is not a valid implementation. obj.encode() should be inserted into subcon 1625 # return f"({self.subcon._compilebuild(code)}).encode({repr(self.encoding)})" 1626 1627 1628def PaddedString(length, encoding): 1629 r""" 1630 Configurable, fixed-length or variable-length string field. 1631 1632 When parsing, the byte string is stripped of null bytes (per encoding unit), then decoded. Length is an integer or context lambda. When building, the string is encoded and then padded to specified length. If encoded string is larger than the specified length, it fails with PaddingError. Size is same as length parameter. 1633 1634 .. warning:: PaddedString and CString only support encodings explicitly listed in :class:`~construct.core.possiblestringencodings` . 1635 1636 :param length: integer or context lambda, length in bytes (not unicode characters) 1637 :param encoding: string like: utf8 utf16 utf32 ascii 1638 1639 :raises StringError: building a non-unicode string 1640 :raises StringError: selected encoding is not on supported list 1641 1642 Can propagate any exception from the lambda, possibly non-ConstructError. 1643 1644 Example:: 1645 1646 >>> d = PaddedString(10, "utf8") 1647 >>> d.build(u"Афон") 1648 b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00\x00' 1649 >>> d.parse(_) 1650 u'Афон' 1651 """ 1652 macro = StringEncoded(FixedSized(length, NullStripped(GreedyBytes, pad=encodingunit(encoding))), encoding) 1653 def _emitfulltype(ksy, bitwise): 1654 return dict(size=length, type="strz", encoding=encoding) 1655 macro._emitfulltype = _emitfulltype 1656 return macro 1657 1658 1659def PascalString(lengthfield, encoding): 1660 r""" 1661 Length-prefixed string. The length field can be variable length (such as VarInt) or fixed length (such as Int64ub). :class:`~construct.core.VarInt` is recommended when designing new protocols. Stored length is in bytes, not characters. Size is not defined. 1662 1663 :param lengthfield: Construct instance, field used to parse and build the length (like VarInt Int64ub) 1664 :param encoding: string like: utf8 utf16 utf32 ascii 1665 1666 :raises StringError: building a non-unicode string 1667 1668 Example:: 1669 1670 >>> d = PascalString(VarInt, "utf8") 1671 >>> d.build(u"Афон") 1672 b'\x08\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd' 1673 >>> d.parse(_) 1674 u'Афон' 1675 """ 1676 macro = StringEncoded(Prefixed(lengthfield, GreedyBytes), encoding) 1677 1678 def _emitparse(code): 1679 return f"io.read({lengthfield._compileparse(code)}).decode({repr(encoding)})" 1680 macro._emitparse = _emitparse 1681 1682 def _emitseq(ksy, bitwise): 1683 return [ 1684 dict(id="lengthfield", type=lengthfield._compileprimitivetype(ksy, bitwise)), 1685 dict(id="data", size="lengthfield", type="str", encoding=encoding), 1686 ] 1687 macro._emitseq = _emitseq 1688 1689 return macro 1690 1691 1692def CString(encoding): 1693 r""" 1694 String ending in a terminating null byte (or null bytes in case of UTF16 UTF32). 1695 1696 .. warning:: String and CString only support encodings explicitly listed in :class:`~construct.core.possiblestringencodings` . 1697 1698 :param encoding: string like: utf8 utf16 utf32 ascii 1699 1700 :raises StringError: building a non-unicode string 1701 :raises StringError: selected encoding is not on supported list 1702 1703 Example:: 1704 1705 >>> d = CString("utf8") 1706 >>> d.build(u"Афон") 1707 b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00' 1708 >>> d.parse(_) 1709 u'Афон' 1710 """ 1711 macro = StringEncoded(NullTerminated(GreedyBytes, term=encodingunit(encoding)), encoding) 1712 def _emitfulltype(ksy, bitwise): 1713 return dict(type="strz", encoding=encoding) 1714 macro._emitfulltype = _emitfulltype 1715 return macro 1716 1717 1718def GreedyString(encoding): 1719 r""" 1720 String that reads entire stream until EOF, and writes a given string as-is. Analog to :class:`~construct.core.GreedyBytes` but also applies unicode-to-bytes encoding. 1721 1722 :param encoding: string like: utf8 utf16 utf32 ascii 1723 1724 :raises StringError: building a non-unicode string 1725 :raises StreamError: stream failed when reading until EOF 1726 1727 Example:: 1728 1729 >>> d = GreedyString("utf8") 1730 >>> d.build(u"Афон") 1731 b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd' 1732 >>> d.parse(_) 1733 u'Афон' 1734 """ 1735 macro = StringEncoded(GreedyBytes, encoding) 1736 def _emitfulltype(ksy, bitwise): 1737 return dict(size_eos=True, type="str", encoding=encoding) 1738 macro._emitfulltype = _emitfulltype 1739 return macro 1740 1741 1742#=============================================================================== 1743# mappings 1744#=============================================================================== 1745@singleton 1746class Flag(Construct): 1747 r""" 1748 One byte (or one bit) field that maps to True or False. Other non-zero bytes are also considered True. Size is defined as 1. 1749 1750 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 1751 1752 Example:: 1753 1754 >>> Flag.parse(b"\x01") 1755 True 1756 >>> Flag.build(True) 1757 b'\x01' 1758 """ 1759 1760 def _parse(self, stream, context, path): 1761 return stream_read(stream, 1, path) != b"\x00" 1762 1763 def _build(self, obj, stream, context, path): 1764 stream_write(stream, b"\x01" if obj else b"\x00", 1, path) 1765 return obj 1766 1767 def _sizeof(self, context, path): 1768 return 1 1769 1770 def _emitparse(self, code): 1771 return f"(io.read(1) != b'\\x00')" 1772 1773 def _emitbuild(self, code): 1774 return f"((io.write(b'\\x01') if obj else io.write(b'\\x00')), obj)[1]" 1775 1776 def _emitfulltype(self, ksy, bitwise): 1777 return dict(type=("b1" if bitwise else "u1"), _construct_render="Flag") 1778 1779 1780class EnumInteger(int): 1781 """Used internally.""" 1782 pass 1783 1784 1785class EnumIntegerString(str): 1786 """Used internally.""" 1787 1788 def __repr__(self): 1789 return "EnumIntegerString.new(%s, %s)" % (self.intvalue, str.__repr__(self), ) 1790 1791 def __int__(self): 1792 return self.intvalue 1793 1794 @staticmethod 1795 def new(intvalue, stringvalue): 1796 ret = EnumIntegerString(stringvalue) 1797 ret.intvalue = intvalue 1798 return ret 1799 1800 1801class Enum(Adapter): 1802 r""" 1803 Translates unicode label names to subcon values, and vice versa. 1804 1805 Parses integer subcon, then uses that value to lookup mapping dictionary. Returns an integer-convertible string (if mapping found) or an integer (otherwise). Building is a reversed process. Can build from an integer flag or string label. Size is same as subcon, unless it raises SizeofError. 1806 1807 There is no default parameter, because if no mapping is found, it parses into an integer without error. 1808 1809 This class supports enum34 module. See examples. 1810 1811 This class supports exposing member labels as attributes, as integer-convertible strings. See examples. 1812 1813 :param subcon: Construct instance, subcon to map to/from 1814 :param \*merge: optional, list of enum.IntEnum and enum.IntFlag instances, to merge labels and values from 1815 :param \*\*mapping: dict, mapping string names to values 1816 1817 :raises MappingError: building from string but no mapping found 1818 1819 Example:: 1820 1821 >>> d = Enum(Byte, one=1, two=2, four=4, eight=8) 1822 >>> d.parse(b"\x01") 1823 'one' 1824 >>> int(d.parse(b"\x01")) 1825 1 1826 >>> d.parse(b"\xff") 1827 255 1828 >>> int(d.parse(b"\xff")) 1829 255 1830 1831 >>> d.build(d.one or "one" or 1) 1832 b'\x01' 1833 >>> d.one 1834 'one' 1835 1836 import enum 1837 class E(enum.IntEnum or enum.IntFlag): 1838 one = 1 1839 two = 2 1840 1841 Enum(Byte, E) <--> Enum(Byte, one=1, two=2) 1842 FlagsEnum(Byte, E) <--> FlagsEnum(Byte, one=1, two=2) 1843 """ 1844 1845 def __init__(self, subcon, *merge, **mapping): 1846 super().__init__(subcon) 1847 for enum in merge: 1848 for enumentry in enum: 1849 mapping[enumentry.name] = enumentry.value 1850 self.encmapping = {EnumIntegerString.new(v,k):v for k,v in mapping.items()} 1851 self.decmapping = {v:EnumIntegerString.new(v,k) for k,v in mapping.items()} 1852 self.ksymapping = {v:k for k,v in mapping.items()} 1853 1854 def __getattr__(self, name): 1855 if name in self.encmapping: 1856 return self.decmapping[self.encmapping[name]] 1857 raise AttributeError 1858 1859 def _decode(self, obj, context, path): 1860 try: 1861 return self.decmapping[obj] 1862 except KeyError: 1863 return EnumInteger(obj) 1864 1865 def _encode(self, obj, context, path): 1866 try: 1867 if isinstance(obj, integertypes): 1868 return obj 1869 return self.encmapping[obj] 1870 except KeyError: 1871 raise MappingError("building failed, no mapping for %r" % (obj,), path=path) 1872 1873 def _emitparse(self, code): 1874 fname = f"factory_{code.allocateId()}" 1875 code.append(f"{fname} = {repr(self.decmapping)}") 1876 return f"reuse(({self.subcon._compileparse(code)}), lambda x: {fname}.get(x, EnumInteger(x)))" 1877 1878 def _emitbuild(self, code): 1879 fname = f"factory_{code.allocateId()}" 1880 code.append(f"{fname} = {repr(self.encmapping)}") 1881 return f"reuse({fname}.get(obj, obj), lambda obj: ({self.subcon._compilebuild(code)}))" 1882 1883 def _emitprimitivetype(self, ksy, bitwise): 1884 name = "enum_%s" % ksy.allocateId() 1885 ksy.enums[name] = self.ksymapping 1886 return name 1887 1888 1889class BitwisableString(str): 1890 """Used internally.""" 1891 1892 # def __repr__(self): 1893 # return "BitwisableString(%s)" % (str.__repr__(self), ) 1894 1895 def __or__(self, other): 1896 return BitwisableString("{}|{}".format(self, other)) 1897 1898 1899class FlagsEnum(Adapter): 1900 r""" 1901 Translates unicode label names to subcon integer (sub)values, and vice versa. 1902 1903 Parses integer subcon, then creates a Container, where flags define each key. Builds from a container by bitwise-oring of each flag if it matches a set key. Can build from an integer flag or string label directly, as well as | concatenations thereof (see examples). Size is same as subcon, unless it raises SizeofError. 1904 1905 This class supports enum34 module. See examples. 1906 1907 This class supports exposing member labels as attributes, as bitwisable strings. See examples. 1908 1909 :param subcon: Construct instance, must operate on integers 1910 :param \*merge: optional, list of enum.IntEnum and enum.IntFlag instances, to merge labels and values from 1911 :param \*\*flags: dict, mapping string names to integer values 1912 1913 :raises MappingError: building from object not like: integer string dict 1914 :raises MappingError: building from string but no mapping found 1915 1916 Can raise arbitrary exceptions when computing | and & and value is non-integer. 1917 1918 Example:: 1919 1920 >>> d = FlagsEnum(Byte, one=1, two=2, four=4, eight=8) 1921 >>> d.parse(b"\x03") 1922 Container(one=True, two=True, four=False, eight=False) 1923 >>> d.build(dict(one=True,two=True)) 1924 b'\x03' 1925 1926 >>> d.build(d.one|d.two or "one|two" or 1|2) 1927 b'\x03' 1928 1929 import enum 1930 class E(enum.IntEnum or enum.IntFlag): 1931 one = 1 1932 two = 2 1933 1934 Enum(Byte, E) <--> Enum(Byte, one=1, two=2) 1935 FlagsEnum(Byte, E) <--> FlagsEnum(Byte, one=1, two=2) 1936 """ 1937 1938 def __init__(self, subcon, *merge, **flags): 1939 super().__init__(subcon) 1940 for enum in merge: 1941 for enumentry in enum: 1942 flags[enumentry.name] = enumentry.value 1943 self.flags = flags 1944 self.reverseflags = {v:k for k,v in flags.items()} 1945 1946 def __getattr__(self, name): 1947 if name in self.flags: 1948 return BitwisableString(name) 1949 raise AttributeError 1950 1951 def _decode(self, obj, context, path): 1952 obj2 = Container() 1953 obj2._flagsenum = True 1954 for name,value in self.flags.items(): 1955 obj2[BitwisableString(name)] = (obj & value == value) 1956 return obj2 1957 1958 def _encode(self, obj, context, path): 1959 try: 1960 if isinstance(obj, integertypes): 1961 return obj 1962 if isinstance(obj, stringtypes): 1963 flags = 0 1964 for name in obj.split("|"): 1965 name = name.strip() 1966 if name: 1967 flags |= self.flags[name] # KeyError 1968 return flags 1969 if isinstance(obj, dict): 1970 flags = 0 1971 for name,value in obj.items(): 1972 if not name.startswith("_"): # assumes key is a string 1973 if value: 1974 flags |= self.flags[name] # KeyError 1975 return flags 1976 raise MappingError("building failed, unknown object: %r" % (obj,), path=path) 1977 except KeyError: 1978 raise MappingError("building failed, unknown label: %r" % (obj,), path=path) 1979 1980 def _emitparse(self, code): 1981 return f"reuse(({self.subcon._compileparse(code)}), lambda x: Container({', '.join(f'{k}=bool(x & {v} == {v})' for k,v in self.flags.items()) }))" 1982 1983 def _emitseq(self, ksy, bitwise): 1984 bitstotal = self.subcon.sizeof() * 8 1985 seq = [] 1986 for i in range(bitstotal): 1987 value = 1<<i 1988 name = self.reverseflags.get(value, "unknown_%s" % i) 1989 seq.append(dict(id=name, type="b1", doc=hex(value), _construct_render="Flag")) 1990 return seq 1991 1992 1993class Mapping(Adapter): 1994 r""" 1995 Adapter that maps objects to other objects. Translates objects after parsing and before building. Can for example, be used to translate between enum34 objects and strings, but Enum class supports enum34 already and is recommended. 1996 1997 :param subcon: Construct instance 1998 :param mapping: dict, for encoding (building) mapping, the reversed is used for parsing mapping 1999 2000 :raises MappingError: parsing or building but no mapping found 2001 2002 Example:: 2003 2004 >>> x = object 2005 >>> d = Mapping(Byte, {x:0}) 2006 >>> d.parse(b"\x00") 2007 x 2008 >>> d.build(x) 2009 b'\x00' 2010 """ 2011 2012 def __init__(self, subcon, mapping): 2013 super().__init__(subcon) 2014 self.decmapping = {v:k for k,v in mapping.items()} 2015 self.encmapping = mapping 2016 2017 def _decode(self, obj, context, path): 2018 try: 2019 return self.decmapping[obj] # KeyError 2020 except (KeyError, TypeError): 2021 raise MappingError("parsing failed, no decoding mapping for %r" % (obj,), path=path) 2022 2023 def _encode(self, obj, context, path): 2024 try: 2025 return self.encmapping[obj] # KeyError 2026 except (KeyError, TypeError): 2027 raise MappingError("building failed, no encoding mapping for %r" % (obj,), path=path) 2028 2029 def _emitparse(self, code): 2030 fname = f"factory_{code.allocateId()}" 2031 code.append(f"{fname} = {repr(self.decmapping)}") 2032 return f"{fname}[{self.subcon._compileparse(code)}]" 2033 2034 def _emitbuild(self, code): 2035 fname = f"factory_{code.allocateId()}" 2036 code.append(f"{fname} = {repr(self.encmapping)}") 2037 return f"reuse({fname}[obj], lambda obj: ({self.subcon._compilebuild(code)}))" 2038 2039 2040#=============================================================================== 2041# structures and sequences 2042#=============================================================================== 2043class Struct(Construct): 2044 r""" 2045 Sequence of usually named constructs, similar to structs in C. The members are parsed and build in the order they are defined. If a member is anonymous (its name is None) then it gets parsed and the value discarded, or it gets build from nothing (from None). 2046 2047 Some fields do not need to be named, since they are built without value anyway. See: Const Padding Check Error Pass Terminated Seek Tell for examples of such fields. 2048 2049 Operator + can also be used to make Structs (although not recommended). 2050 2051 Parses into a Container (dict with attribute and key access) where keys match subcon names. Builds from a dict (not necessarily a Container) where each member gets a value from the dict matching the subcon name. If field has build-from-none flag, it gets build even when there is no matching entry in the dict. Size is the sum of all subcon sizes, unless any subcon raises SizeofError. 2052 2053 This class does context nesting, meaning its members are given access to a new dictionary where the "_" entry points to the outer context. When parsing, each member gets parsed and subcon parse return value is inserted into context under matching key only if the member was named. When building, the matching entry gets inserted into context before subcon gets build, and if subcon build returns a new value (not None) that gets replaced in the context. 2054 2055 This class exposes subcons as attributes. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) by accessing the struct attributes, under same name. Also note that compiler does not support this feature. See examples. 2056 2057 This class exposes subcons in the context. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) within other inlined fields using the context. Note that you need to use a lambda (`this` expression is not supported). Also note that compiler does not support this feature. See examples. 2058 2059 This class supports stopping. If :class:`~construct.core.StopIf` field is a member, and it evaluates its lambda as positive, this class ends parsing or building as successful without processing further fields. 2060 2061 :param \*subcons: Construct instances, list of members, some can be anonymous 2062 :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) 2063 2064 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 2065 :raises KeyError: building a subcon but found no corresponding key in dictionary 2066 2067 Example:: 2068 2069 >>> d = Struct("num"/Int8ub, "data"/Bytes(this.num)) 2070 >>> d.parse(b"\x04DATA") 2071 Container(num=4, data=b"DATA") 2072 >>> d.build(dict(num=4, data=b"DATA")) 2073 b"\x04DATA" 2074 2075 >>> d = Struct(Const(b"MZ"), Padding(2), Pass, Terminated) 2076 >>> d.build({}) 2077 b'MZ\x00\x00' 2078 >>> d.parse(_) 2079 Container() 2080 >>> d.sizeof() 2081 4 2082 2083 >>> d = Struct( 2084 ... "animal" / Enum(Byte, giraffe=1), 2085 ... ) 2086 >>> d.animal.giraffe 2087 'giraffe' 2088 >>> d = Struct( 2089 ... "count" / Byte, 2090 ... "data" / Bytes(lambda this: this.count - this._subcons.count.sizeof()), 2091 ... ) 2092 >>> d.build(dict(count=3, data=b"12")) 2093 b'\x0312' 2094 2095 Alternative syntax (not recommended): 2096 >>> ("a"/Byte + "b"/Byte + "c"/Byte + "d"/Byte) 2097 2098 Alternative syntax, but requires Python 3.6 or any PyPy: 2099 >>> Struct(a=Byte, b=Byte, c=Byte, d=Byte) 2100 """ 2101 2102 def __init__(self, *subcons, **subconskw): 2103 super().__init__() 2104 self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) 2105 self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) 2106 self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) 2107 2108 def __getattr__(self, name): 2109 if name in self._subcons: 2110 return self._subcons[name] 2111 raise AttributeError 2112 2113 def _parse(self, stream, context, path): 2114 obj = Container() 2115 obj._io = stream 2116 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 2117 context._root = context._.get("_root", context) 2118 for sc in self.subcons: 2119 try: 2120 subobj = sc._parsereport(stream, context, path) 2121 if sc.name: 2122 obj[sc.name] = subobj 2123 context[sc.name] = subobj 2124 except StopFieldError: 2125 break 2126 return obj 2127 2128 def _build(self, obj, stream, context, path): 2129 if obj is None: 2130 obj = Container() 2131 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 2132 context._root = context._.get("_root", context) 2133 context.update(obj) 2134 for sc in self.subcons: 2135 try: 2136 if sc.flagbuildnone: 2137 subobj = obj.get(sc.name, None) 2138 else: 2139 subobj = obj[sc.name] # raises KeyError 2140 2141 if sc.name: 2142 context[sc.name] = subobj 2143 2144 buildret = sc._build(subobj, stream, context, path) 2145 if sc.name: 2146 context[sc.name] = buildret 2147 except StopFieldError: 2148 break 2149 return context 2150 2151 def _sizeof(self, context, path): 2152 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = None, _index = context.get("_index", None)) 2153 context._root = context._.get("_root", context) 2154 try: 2155 return sum(sc._sizeof(context, path) for sc in self.subcons) 2156 except (KeyError, AttributeError): 2157 raise SizeofError("cannot calculate size, key not found in context", path=path) 2158 2159 def _emitparse(self, code): 2160 fname = f"parse_struct_{code.allocateId()}" 2161 block = f""" 2162 def {fname}(io, this): 2163 result = Container() 2164 this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) 2165 this['_root'] = this['_'].get('_root', this) 2166 try: 2167 """ 2168 for sc in self.subcons: 2169 block += f""" 2170 {f'result[{repr(sc.name)}] = this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compileparse(code)} 2171 """ 2172 block += f""" 2173 pass 2174 except StopFieldError: 2175 pass 2176 return result 2177 """ 2178 code.append(block) 2179 return f"{fname}(io, this)" 2180 2181 def _emitbuild(self, code): 2182 fname = f"build_struct_{code.allocateId()}" 2183 block = f""" 2184 def {fname}(obj, io, this): 2185 this = Container(_ = this, _params = this['_params'], _root = None, _parsing = False, _building = True, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) 2186 this['_root'] = this['_'].get('_root', this) 2187 try: 2188 objdict = obj 2189 """ 2190 for sc in self.subcons: 2191 block += f""" 2192 {f'obj = objdict.get({repr(sc.name)}, None)' if sc.flagbuildnone else f'obj = objdict[{repr(sc.name)}]'} 2193 {f'this[{repr(sc.name)}] = obj' if sc.name else ''} 2194 {f'this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compilebuild(code)} 2195 """ 2196 block += f""" 2197 pass 2198 except StopFieldError: 2199 pass 2200 return this 2201 """ 2202 code.append(block) 2203 return f"{fname}(obj, io, this)" 2204 2205 def _emitseq(self, ksy, bitwise): 2206 return [sc._compilefulltype(ksy, bitwise) for sc in self.subcons] 2207 2208 2209class Sequence(Construct): 2210 r""" 2211 Sequence of usually un-named constructs. The members are parsed and build in the order they are defined. If a member is named, its parsed value gets inserted into the context. This allows using members that refer to previous members. 2212 2213 Operator >> can also be used to make Sequences (although not recommended). 2214 2215 Parses into a ListContainer (list with pretty-printing) where values are in same order as subcons. Builds from a list (not necessarily a ListContainer) where each subcon is given the element at respective position. Size is the sum of all subcon sizes, unless any subcon raises SizeofError. 2216 2217 This class does context nesting, meaning its members are given access to a new dictionary where the "_" entry points to the outer context. When parsing, each member gets parsed and subcon parse return value is inserted into context under matching key only if the member was named. When building, the matching entry gets inserted into context before subcon gets build, and if subcon build returns a new value (not None) that gets replaced in the context. 2218 2219 This class exposes subcons as attributes. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) by accessing the struct attributes, under same name. Also note that compiler does not support this feature. See examples. 2220 2221 This class exposes subcons in the context. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) within other inlined fields using the context. Note that you need to use a lambda (`this` expression is not supported). Also note that compiler does not support this feature. See examples. 2222 2223 This class supports stopping. If :class:`~construct.core.StopIf` field is a member, and it evaluates its lambda as positive, this class ends parsing or building as successful without processing further fields. 2224 2225 :param \*subcons: Construct instances, list of members, some can be named 2226 :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) 2227 2228 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 2229 :raises KeyError: building a subcon but found no corresponding key in dictionary 2230 2231 Example:: 2232 2233 >>> d = Sequence(Byte, Float32b) 2234 >>> d.build([0, 1.23]) 2235 b'\x00?\x9dp\xa4' 2236 >>> d.parse(_) 2237 [0, 1.2300000190734863] # a ListContainer 2238 2239 >>> d = Sequence( 2240 ... "animal" / Enum(Byte, giraffe=1), 2241 ... ) 2242 >>> d.animal.giraffe 2243 'giraffe' 2244 >>> d = Sequence( 2245 ... "count" / Byte, 2246 ... "data" / Bytes(lambda this: this.count - this._subcons.count.sizeof()), 2247 ... ) 2248 >>> d.build([3, b"12"]) 2249 b'\x0312' 2250 2251 Alternative syntax (not recommended): 2252 >>> (Byte >> "Byte >> "c"/Byte >> "d"/Byte) 2253 2254 Alternative syntax, but requires Python 3.6 or any PyPy: 2255 >>> Sequence(a=Byte, b=Byte, c=Byte, d=Byte) 2256 """ 2257 2258 def __init__(self, *subcons, **subconskw): 2259 super().__init__() 2260 self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) 2261 self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) 2262 self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) 2263 2264 def __getattr__(self, name): 2265 if name in self._subcons: 2266 return self._subcons[name] 2267 raise AttributeError 2268 2269 def _parse(self, stream, context, path): 2270 obj = ListContainer() 2271 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 2272 context._root = context._.get("_root", context) 2273 for sc in self.subcons: 2274 try: 2275 subobj = sc._parsereport(stream, context, path) 2276 obj.append(subobj) 2277 if sc.name: 2278 context[sc.name] = subobj 2279 except StopFieldError: 2280 break 2281 return obj 2282 2283 def _build(self, obj, stream, context, path): 2284 if obj is None: 2285 obj = ListContainer([None for sc in self.subcons]) 2286 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 2287 context._root = context._.get("_root", context) 2288 objiter = iter(obj) 2289 retlist = ListContainer() 2290 for i,sc in enumerate(self.subcons): 2291 try: 2292 subobj = next(objiter) 2293 if sc.name: 2294 context[sc.name] = subobj 2295 2296 buildret = sc._build(subobj, stream, context, path) 2297 retlist.append(buildret) 2298 2299 if sc.name: 2300 context[sc.name] = buildret 2301 except StopFieldError: 2302 break 2303 return retlist 2304 2305 def _sizeof(self, context, path): 2306 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = None, _index = context.get("_index", None)) 2307 context._root = context._.get("_root", context) 2308 try: 2309 return sum(sc._sizeof(context, path) for sc in self.subcons) 2310 except (KeyError, AttributeError): 2311 raise SizeofError("cannot calculate size, key not found in context", path=path) 2312 2313 def _emitparse(self, code): 2314 fname = f"parse_sequence_{code.allocateId()}" 2315 block = f""" 2316 def {fname}(io, this): 2317 result = ListContainer() 2318 this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) 2319 this['_root'] = this['_'].get('_root', this) 2320 try: 2321 """ 2322 for sc in self.subcons: 2323 block += f""" 2324 result.append({sc._compileparse(code)}) 2325 """ 2326 if sc.name: 2327 block += f""" 2328 this[{repr(sc.name)}] = result[-1] 2329 """ 2330 block += f""" 2331 pass 2332 except StopFieldError: 2333 pass 2334 return result 2335 """ 2336 code.append(block) 2337 return f"{fname}(io, this)" 2338 2339 def _emitbuild(self, code): 2340 fname = f"build_sequence_{code.allocateId()}" 2341 block = f""" 2342 def {fname}(obj, io, this): 2343 this = Container(_ = this, _params = this['_params'], _root = None, _parsing = False, _building = True, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) 2344 this['_root'] = this['_'].get('_root', this) 2345 try: 2346 objiter = iter(obj) 2347 retlist = ListContainer() 2348 """ 2349 for sc in self.subcons: 2350 block += f""" 2351 {f'obj = next(objiter)'} 2352 {f'this[{repr(sc.name)}] = obj' if sc.name else ''} 2353 {f'x = '}{sc._compilebuild(code)} 2354 {f'retlist.append(x)'} 2355 {f'this[{repr(sc.name)}] = x' if sc.name else ''} 2356 """ 2357 block += f""" 2358 pass 2359 except StopFieldError: 2360 pass 2361 return retlist 2362 """ 2363 code.append(block) 2364 return f"{fname}(obj, io, this)" 2365 2366 def _emitseq(self, ksy, bitwise): 2367 return [sc._compilefulltype(ksy, bitwise) for sc in self.subcons] 2368 2369 2370#=============================================================================== 2371# arrays ranges and repeaters 2372#=============================================================================== 2373class Array(Subconstruct): 2374 r""" 2375 Homogenous array of elements, similar to C# generic T[]. 2376 2377 Parses into a ListContainer (a list). Parsing and building processes an exact amount of elements. If given list has more or less than count elements, raises RangeError. Size is defined as count multiplied by subcon size, but only if subcon is fixed size. 2378 2379 Operator [] can be used to make Array instances (recommended syntax). 2380 2381 :param count: integer or context lambda, strict amount of elements 2382 :param subcon: Construct instance, subcon to process individual elements 2383 :param discard: optional, bool, if set then parsing returns empty list 2384 2385 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 2386 :raises RangeError: specified count is not valid 2387 :raises RangeError: given object has different length than specified count 2388 2389 Can propagate any exception from the lambdas, possibly non-ConstructError. 2390 2391 Example:: 2392 2393 >>> d = Array(5, Byte) or Byte[5] 2394 >>> d.build(range(5)) 2395 b'\x00\x01\x02\x03\x04' 2396 >>> d.parse(_) 2397 [0, 1, 2, 3, 4] 2398 """ 2399 2400 def __init__(self, count, subcon, discard=False): 2401 super().__init__(subcon) 2402 self.count = count 2403 self.discard = discard 2404 2405 def _parse(self, stream, context, path): 2406 count = evaluate(self.count, context) 2407 if not 0 <= count: 2408 raise RangeError("invalid count %s" % (count,), path=path) 2409 discard = self.discard 2410 obj = ListContainer() 2411 for i in range(count): 2412 context._index = i 2413 e = self.subcon._parsereport(stream, context, path) 2414 if not discard: 2415 obj.append(e) 2416 return obj 2417 2418 def _build(self, obj, stream, context, path): 2419 count = evaluate(self.count, context) 2420 if not 0 <= count: 2421 raise RangeError("invalid count %s" % (count,), path=path) 2422 if not len(obj) == count: 2423 raise RangeError("expected %d elements, found %d" % (count, len(obj)), path=path) 2424 discard = self.discard 2425 retlist = ListContainer() 2426 for i,e in enumerate(obj): 2427 context._index = i 2428 buildret = self.subcon._build(e, stream, context, path) 2429 if not discard: 2430 retlist.append(buildret) 2431 return retlist 2432 2433 def _sizeof(self, context, path): 2434 try: 2435 count = evaluate(self.count, context) 2436 except (KeyError, AttributeError): 2437 raise SizeofError("cannot calculate size, key not found in context", path=path) 2438 return count * self.subcon._sizeof(context, path) 2439 2440 def _emitparse(self, code): 2441 return f"ListContainer(({self.subcon._compileparse(code)}) for i in range({self.count}))" 2442 2443 def _emitbuild(self, code): 2444 return f"ListContainer(reuse(obj[i], lambda obj: ({self.subcon._compilebuild(code)})) for i in range({self.count}))" 2445 2446 def _emitfulltype(self, ksy, bitwise): 2447 return dict(type=self.subcon._compileprimitivetype(ksy, bitwise), repeat="expr", repeat_expr=self.count) 2448 2449 2450class GreedyRange(Subconstruct): 2451 r""" 2452 Homogenous array of elements, similar to C# generic IEnumerable<T>, but works with unknown count of elements by parsing until end of stream. 2453 2454 Parses into a ListContainer (a list). Parsing stops when an exception occured when parsing the subcon, either due to EOF or subcon format not being able to parse the data. Either way, when GreedyRange encounters either failure it seeks the stream back to a position after last successful subcon parsing. Builds from enumerable, each element as-is. Size is undefined. 2455 2456 This class supports stopping. If :class:`~construct.core.StopIf` field is a member, and it evaluates its lambda as positive, this class ends parsing or building as successful without processing further fields. 2457 2458 :param subcon: Construct instance, subcon to process individual elements 2459 :param discard: optional, bool, if set then parsing returns empty list 2460 2461 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 2462 :raises StreamError: stream is not seekable and tellable 2463 2464 Can propagate any exception from the lambdas, possibly non-ConstructError. 2465 2466 Example:: 2467 2468 >>> d = GreedyRange(Byte) 2469 >>> d.build(range(8)) 2470 b'\x00\x01\x02\x03\x04\x05\x06\x07' 2471 >>> d.parse(_) 2472 [0, 1, 2, 3, 4, 5, 6, 7] 2473 """ 2474 2475 def __init__(self, subcon, discard=False): 2476 super().__init__(subcon) 2477 self.discard = discard 2478 2479 def _parse(self, stream, context, path): 2480 discard = self.discard 2481 obj = ListContainer() 2482 try: 2483 for i in itertools.count(): 2484 context._index = i 2485 fallback = stream_tell(stream, path) 2486 e = self.subcon._parsereport(stream, context, path) 2487 if not discard: 2488 obj.append(e) 2489 except StopFieldError: 2490 pass 2491 except ExplicitError: 2492 raise 2493 except Exception: 2494 stream_seek(stream, fallback, 0, path) 2495 return obj 2496 2497 def _build(self, obj, stream, context, path): 2498 discard = self.discard 2499 try: 2500 retlist = ListContainer() 2501 for i,e in enumerate(obj): 2502 context._index = i 2503 buildret = self.subcon._build(e, stream, context, path) 2504 if not discard: 2505 retlist.append(buildret) 2506 return retlist 2507 except StopFieldError: 2508 pass 2509 2510 def _sizeof(self, context, path): 2511 raise SizeofError(path=path) 2512 2513 def _emitfulltype(self, ksy, bitwise): 2514 return dict(type=self.subcon._compileprimitivetype(ksy, bitwise), repeat="eos") 2515 2516 2517class RepeatUntil(Subconstruct): 2518 r""" 2519 Homogenous array of elements, similar to C# generic IEnumerable<T>, that repeats until the predicate indicates it to stop. Note that the last element (that predicate indicated as True) is included in the return list. 2520 2521 Parse iterates indefinately until last element passed the predicate. Build iterates indefinately over given list, until an element passed the precicate (or raises RepeatError if no element passed it). Size is undefined. 2522 2523 :param predicate: lambda that takes (obj, list, context) and returns True to break or False to continue (or a truthy value) 2524 :param subcon: Construct instance, subcon used to parse and build each element 2525 :param discard: optional, bool, if set then parsing returns empty list 2526 2527 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 2528 :raises RepeatError: consumed all elements in the stream but neither passed the predicate 2529 2530 Can propagate any exception from the lambda, possibly non-ConstructError. 2531 2532 Example:: 2533 2534 >>> d = RepeatUntil(lambda x,lst,ctx: x > 7, Byte) 2535 >>> d.build(range(20)) 2536 b'\x00\x01\x02\x03\x04\x05\x06\x07\x08' 2537 >>> d.parse(b"\x01\xff\x02") 2538 [1, 255] 2539 2540 >>> d = RepeatUntil(lambda x,lst,ctx: lst[-2:] == [0,0], Byte) 2541 >>> d.parse(b"\x01\x00\x00\xff") 2542 [1, 0, 0] 2543 """ 2544 2545 def __init__(self, predicate, subcon, discard=False): 2546 super().__init__(subcon) 2547 self.predicate = predicate 2548 self.discard = discard 2549 2550 def _parse(self, stream, context, path): 2551 predicate = self.predicate 2552 discard = self.discard 2553 if not callable(predicate): 2554 predicate = lambda _1,_2,_3: predicate 2555 obj = ListContainer() 2556 for i in itertools.count(): 2557 context._index = i 2558 e = self.subcon._parsereport(stream, context, path) 2559 if not discard: 2560 obj.append(e) 2561 if predicate(e, obj, context): 2562 return obj 2563 2564 def _build(self, obj, stream, context, path): 2565 predicate = self.predicate 2566 discard = self.discard 2567 if not callable(predicate): 2568 predicate = lambda _1,_2,_3: predicate 2569 partiallist = ListContainer() 2570 retlist = ListContainer() 2571 for i,e in enumerate(obj): 2572 context._index = i 2573 buildret = self.subcon._build(e, stream, context, path) 2574 if not discard: 2575 retlist.append(buildret) 2576 partiallist.append(buildret) 2577 if predicate(e, partiallist, context): 2578 break 2579 else: 2580 raise RepeatError("expected any item to match predicate, when building", path=path) 2581 return retlist 2582 2583 def _sizeof(self, context, path): 2584 raise SizeofError("cannot calculate size, amount depends on actual data", path=path) 2585 2586 def _emitparse(self, code): 2587 fname = f"parse_repeatuntil_{code.allocateId()}" 2588 block = f""" 2589 def {fname}(io, this): 2590 list_ = ListContainer() 2591 while True: 2592 obj_ = {self.subcon._compileparse(code)} 2593 if not ({self.discard}): 2594 list_.append(obj_) 2595 if ({self.predicate}): 2596 return list_ 2597 """ 2598 code.append(block) 2599 return f"{fname}(io, this)" 2600 2601 def _emitbuild(self, code): 2602 fname = f"build_repeatuntil_{code.allocateId()}" 2603 block = f""" 2604 def {fname}(obj, io, this): 2605 objiter = iter(obj) 2606 list_ = ListContainer() 2607 while True: 2608 obj_ = reuse(next(objiter), lambda obj: {self.subcon._compilebuild(code)}) 2609 list_.append(obj_) 2610 if ({self.predicate}): 2611 return list_ 2612 """ 2613 code.append(block) 2614 return f"{fname}(obj, io, this)" 2615 2616 def _emitfulltype(self, ksy, bitwise): 2617 return dict(type=self.subcon._compileprimitivetype(ksy, bitwise), repeat="until", repeat_until=repr(self.predicate).replace("obj_","_")) 2618 2619 2620#=============================================================================== 2621# specials 2622#=============================================================================== 2623class Renamed(Subconstruct): 2624 r""" 2625 Special wrapper that allows a Struct (or other similar class) to see a field as having a name (or a different name) or having a parsed hook. Library classes do not have names (its None). Renamed does not change a field, only wraps it like a candy with a label. Used internally by / and * operators. 2626 2627 Also this wrapper is responsible for building a path info (a chain of names) that gets attached to error message when parsing, building, or sizeof fails. Fields that are not named do not appear in the path string. 2628 2629 Parsing building and size are deferred to subcon. 2630 2631 :param subcon: Construct instance 2632 :param newname: optional, string 2633 :param newdocs: optional, string 2634 :param newparsed: optional, lambda 2635 2636 Example:: 2637 2638 >>> "number" / Int32ub 2639 <Renamed: number> 2640 """ 2641 2642 def __init__(self, subcon, newname=None, newdocs=None, newparsed=None): 2643 super().__init__(subcon) 2644 self.name = newname if newname else subcon.name 2645 self.docs = newdocs if newdocs else subcon.docs 2646 self.parsed = newparsed if newparsed else subcon.parsed 2647 2648 def __getattr__(self, name): 2649 return getattr(self.subcon, name) 2650 2651 def _parse(self, stream, context, path): 2652 path += " -> %s" % (self.name,) 2653 return self.subcon._parsereport(stream, context, path) 2654 2655 def _build(self, obj, stream, context, path): 2656 path += " -> %s" % (self.name,) 2657 return self.subcon._build(obj, stream, context, path) 2658 2659 def _sizeof(self, context, path): 2660 path += " -> %s" % (self.name,) 2661 return self.subcon._sizeof(context, path) 2662 2663 def _emitparse(self, code): 2664 return self.subcon._compileparse(code) 2665 2666 def _emitbuild(self, code): 2667 return self.subcon._compilebuild(code) 2668 2669 def _emitseq(self, ksy, bitwise): 2670 return self.subcon._compileseq(ksy, bitwise) 2671 2672 def _emitprimitivetype(self, ksy, bitwise): 2673 return self.subcon._compileprimitivetype(ksy, bitwise) 2674 2675 def _emitfulltype(self, ksy, bitwise): 2676 r = dict() 2677 if self.name: 2678 r.update(id=self.name) 2679 r.update(self.subcon._compilefulltype(ksy, bitwise)) 2680 if self.docs: 2681 r.update(doc=self.docs) 2682 return r 2683 2684 2685#=============================================================================== 2686# miscellaneous 2687#=============================================================================== 2688class Const(Subconstruct): 2689 r""" 2690 Field enforcing a constant. It is used for file signatures, to validate that the given pattern exists. Data in the stream must strictly match the specified value. 2691 2692 Note that a variable sized subcon may still provide positive verification. Const does not consume a precomputed amount of bytes, but depends on the subcon to read the appropriate amount (eg. VarInt is acceptable). Whatever subcon parses into, gets compared against the specified value. 2693 2694 Parses using subcon and return its value (after checking). Builds using subcon from nothing (or given object, if not None). Size is the same as subcon, unless it raises SizeofError. 2695 2696 :param value: expected value, usually a bytes literal 2697 :param subcon: optional, Construct instance, subcon used to build value from, assumed to be Bytes if value parameter was a bytes literal 2698 2699 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 2700 :raises ConstError: parsed data does not match specified value, or building from wrong value 2701 :raises StringError: building from non-bytes value, perhaps unicode 2702 2703 Example:: 2704 2705 >>> d = Const(b"IHDR") 2706 >>> d.build(None) 2707 b'IHDR' 2708 >>> d.parse(b"JPEG") 2709 construct.core.ConstError: expected b'IHDR' but parsed b'JPEG' 2710 2711 >>> d = Const(255, Int32ul) 2712 >>> d.build(None) 2713 b'\xff\x00\x00\x00' 2714 """ 2715 2716 def __init__(self, value, subcon=None): 2717 if subcon is None: 2718 if not isinstance(value, bytestringtype): 2719 raise StringError(f"given non-bytes value {repr(value)}, perhaps unicode?") 2720 subcon = Bytes(len(value)) 2721 super().__init__(subcon) 2722 self.value = value 2723 self.flagbuildnone = True 2724 2725 def _parse(self, stream, context, path): 2726 obj = self.subcon._parsereport(stream, context, path) 2727 if not obj == self.value: 2728 raise ConstError(f"parsing expected {repr(self.value)} but parsed {repr(obj)}", path=path) 2729 return obj 2730 2731 def _build(self, obj, stream, context, path): 2732 if obj not in (None, self.value): 2733 raise ConstError(f"building expected None or {repr(self.value)} but got {repr(obj)}", path=path) 2734 return self.subcon._build(self.value, stream, context, path) 2735 2736 def _sizeof(self, context, path): 2737 return self.subcon._sizeof(context, path) 2738 2739 def _emitparse(self, code): 2740 code.append(f""" 2741 def parse_const(value, expected): 2742 if not value == expected: raise ConstError 2743 return value 2744 """) 2745 return f"parse_const({self.subcon._compileparse(code)}, {repr(self.value)})" 2746 2747 def _emitbuild(self, code): 2748 if isinstance(self.value, bytes): 2749 return f"(io.write({repr(self.value)}), {repr(self.value)})[1]" 2750 else: 2751 return f"reuse({repr(self.value)}, lambda obj: {self.subcon._compilebuild(code)})" 2752 2753 def _emitfulltype(self, ksy, bitwise): 2754 data = self.subcon.build(self.value) 2755 return dict(contents=list(data)) 2756 2757 2758class Computed(Construct): 2759 r""" 2760 Field computing a value from the context dictionary or some outer source like os.urandom or random module. Underlying byte stream is unaffected. The source can be non-deterministic. 2761 2762 Parsing and Building return the value returned by the context lambda (although a constant value can also be used). Size is defined as 0 because parsing and building does not consume or produce bytes into the stream. 2763 2764 :param func: context lambda or constant value 2765 2766 Can propagate any exception from the lambda, possibly non-ConstructError. 2767 2768 Example:: 2769 >>> d = Struct( 2770 ... "width" / Byte, 2771 ... "height" / Byte, 2772 ... "total" / Computed(this.width * this.height), 2773 ... ) 2774 >>> d.build(dict(width=4,height=5)) 2775 b'\x04\x05' 2776 >>> d.parse(b"12") 2777 Container(width=49, height=50, total=2450) 2778 2779 >>> d = Computed(7) 2780 >>> d.parse(b"") 2781 7 2782 >>> d = Computed(lambda ctx: 7) 2783 >>> d.parse(b"") 2784 7 2785 2786 >>> import os 2787 >>> d = Computed(lambda ctx: os.urandom(10)) 2788 >>> d.parse(b"") 2789 b'\x98\xc2\xec\x10\x07\xf5\x8e\x98\xc2\xec' 2790 """ 2791 2792 def __init__(self, func): 2793 super().__init__() 2794 self.func = func 2795 self.flagbuildnone = True 2796 2797 def _parse(self, stream, context, path): 2798 return self.func(context) if callable(self.func) else self.func 2799 2800 def _build(self, obj, stream, context, path): 2801 return self.func(context) if callable(self.func) else self.func 2802 2803 def _sizeof(self, context, path): 2804 return 0 2805 2806 def _emitparse(self, code): 2807 return repr(self.func) 2808 2809 def _emitbuild(self, code): 2810 return repr(self.func) 2811 2812 2813@singleton 2814class Index(Construct): 2815 r""" 2816 Indexes a field inside outer :class:`~construct.core.Array` :class:`~construct.core.GreedyRange` :class:`~construct.core.RepeatUntil` context. 2817 2818 Note that you can use this class, or use `this._index` expression instead, depending on how its used. See the examples. 2819 2820 Parsing and building pulls _index key from the context. Size is 0 because stream is unaffected. 2821 2822 :raises IndexFieldError: did not find either key in context 2823 2824 Example:: 2825 2826 >>> d = Array(3, Index) 2827 >>> d.parse(b"") 2828 [0, 1, 2] 2829 >>> d = Array(3, Struct("i" / Index)) 2830 >>> d.parse(b"") 2831 [Container(i=0), Container(i=1), Container(i=2)] 2832 2833 >>> d = Array(3, Computed(this._index+1)) 2834 >>> d.parse(b"") 2835 [1, 2, 3] 2836 >>> d = Array(3, Struct("i" / Computed(this._._index+1))) 2837 >>> d.parse(b"") 2838 [Container(i=1), Container(i=2), Container(i=3)] 2839 """ 2840 2841 def __init__(self): 2842 super().__init__() 2843 self.flagbuildnone = True 2844 2845 def _parse(self, stream, context, path): 2846 return context.get("_index", None) 2847 2848 def _build(self, obj, stream, context, path): 2849 return context.get("_index", None) 2850 2851 def _sizeof(self, context, path): 2852 return 0 2853 2854 2855class Rebuild(Subconstruct): 2856 r""" 2857 Field where building does not require a value, because the value gets recomputed when needed. Comes handy when building a Struct from a dict with missing keys. Useful for length and count fields when :class:`~construct.core.Prefixed` and :class:`~construct.core.PrefixedArray` cannot be used. 2858 2859 Parsing defers to subcon. Building is defered to subcon, but it builds from a value provided by the context lambda (or constant). Size is the same as subcon, unless it raises SizeofError. 2860 2861 Difference between Default and Rebuild, is that in first the build value is optional and in second the build value is ignored. 2862 2863 :param subcon: Construct instance 2864 :param func: context lambda or constant value 2865 2866 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 2867 2868 Can propagate any exception from the lambda, possibly non-ConstructError. 2869 2870 Example:: 2871 2872 >>> d = Struct( 2873 ... "count" / Rebuild(Byte, len_(this.items)), 2874 ... "items" / Byte[this.count], 2875 ... ) 2876 >>> d.build(dict(items=[1,2,3])) 2877 b'\x03\x01\x02\x03' 2878 """ 2879 2880 def __init__(self, subcon, func): 2881 super().__init__(subcon) 2882 self.func = func 2883 self.flagbuildnone = True 2884 2885 def _build(self, obj, stream, context, path): 2886 obj = evaluate(self.func, context) 2887 return self.subcon._build(obj, stream, context, path) 2888 2889 def _emitparse(self, code): 2890 return self.subcon._compileparse(code) 2891 2892 def _emitbuild(self, code): 2893 return f"reuse({repr(self.func)}, lambda obj: ({self.subcon._compilebuild(code)}))" 2894 2895 def _emitseq(self, ksy, bitwise): 2896 return self.subcon._compileseq(ksy, bitwise) 2897 2898 def _emitprimitivetype(self, ksy, bitwise): 2899 return self.subcon._compileprimitivetype(ksy, bitwise) 2900 2901 def _emitfulltype(self, ksy, bitwise): 2902 return self.subcon._compilefulltype(ksy, bitwise) 2903 2904 2905class Default(Subconstruct): 2906 r""" 2907 Field where building does not require a value, because the value gets taken from default. Comes handy when building a Struct from a dict with missing keys. 2908 2909 Parsing defers to subcon. Building is defered to subcon, but it builds from a default (if given object is None) or from given object. Building does not require a value, but can accept one. Size is the same as subcon, unless it raises SizeofError. 2910 2911 Difference between Default and Rebuild, is that in first the build value is optional and in second the build value is ignored. 2912 2913 :param subcon: Construct instance 2914 :param value: context lambda or constant value 2915 2916 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 2917 2918 Can propagate any exception from the lambda, possibly non-ConstructError. 2919 2920 Example:: 2921 2922 >>> d = Struct( 2923 ... "a" / Default(Byte, 0), 2924 ... ) 2925 >>> d.build(dict(a=1)) 2926 b'\x01' 2927 >>> d.build(dict()) 2928 b'\x00' 2929 """ 2930 2931 def __init__(self, subcon, value): 2932 super().__init__(subcon) 2933 self.value = value 2934 self.flagbuildnone = True 2935 2936 def _build(self, obj, stream, context, path): 2937 obj = evaluate(self.value, context) if obj is None else obj 2938 return self.subcon._build(obj, stream, context, path) 2939 2940 def _emitparse(self, code): 2941 return self.subcon._compileparse(code) 2942 2943 def _emitbuild(self, code): 2944 return f"reuse({repr(self.value)} if obj is None else obj, lambda obj: ({self.subcon._compilebuild(code)}))" 2945 2946 def _emitseq(self, ksy, bitwise): 2947 return self.subcon._compileseq(ksy, bitwise) 2948 2949 def _emitprimitivetype(self, ksy, bitwise): 2950 return self.subcon._compileprimitivetype(ksy, bitwise) 2951 2952 def _emitfulltype(self, ksy, bitwise): 2953 return self.subcon._compilefulltype(ksy, bitwise) 2954 2955 2956class Check(Construct): 2957 r""" 2958 Checks for a condition, and raises CheckError if the check fails. 2959 2960 Parsing and building return nothing (but check the condition). Size is 0 because stream is unaffected. 2961 2962 :param func: bool or context lambda, that gets run on parsing and building 2963 2964 :raises CheckError: lambda returned false 2965 2966 Can propagate any exception from the lambda, possibly non-ConstructError. 2967 2968 Example:: 2969 2970 Check(lambda ctx: len(ctx.payload.data) == ctx.payload_len) 2971 Check(len_(this.payload.data) == this.payload_len) 2972 """ 2973 2974 def __init__(self, func): 2975 super().__init__() 2976 self.func = func 2977 self.flagbuildnone = True 2978 2979 def _parse(self, stream, context, path): 2980 passed = evaluate(self.func, context) 2981 if not passed: 2982 raise CheckError("check failed during parsing", path=path) 2983 2984 def _build(self, obj, stream, context, path): 2985 passed = evaluate(self.func, context) 2986 if not passed: 2987 raise CheckError("check failed during building", path=path) 2988 2989 def _sizeof(self, context, path): 2990 return 0 2991 2992 def _emitparse(self, code): 2993 code.append(f""" 2994 def parse_check(condition): 2995 if not condition: raise CheckError 2996 """) 2997 return f"parse_check({repr(self.func)})" 2998 2999 def _emitbuild(self, code): 3000 code.append(f""" 3001 def build_check(condition): 3002 if not condition: raise CheckError 3003 """) 3004 return f"build_check({repr(self.func)})" 3005 3006 3007@singleton 3008class Error(Construct): 3009 r""" 3010 Raises ExplicitError, unconditionally. 3011 3012 Parsing and building always raise ExplicitError. Size is undefined. 3013 3014 :raises ExplicitError: unconditionally, on parsing and building 3015 3016 Example:: 3017 3018 >>> d = Struct("num"/Byte, Error) 3019 >>> d.parse(b"data...") 3020 construct.core.ExplicitError: Error field was activated during parsing 3021 """ 3022 3023 def __init__(self): 3024 super().__init__() 3025 self.flagbuildnone = True 3026 3027 def _parse(self, stream, context, path): 3028 raise ExplicitError("Error field was activated during parsing", path=path) 3029 3030 def _build(self, obj, stream, context, path): 3031 raise ExplicitError("Error field was activated during building", path=path) 3032 3033 def _sizeof(self, context, path): 3034 raise SizeofError("Error does not have size, because it interrupts parsing and building", path=path) 3035 3036 def _emitparse(self, code): 3037 code.append(""" 3038 def parse_error(): 3039 raise ExplicitError 3040 """) 3041 return "parse_error()" 3042 3043 def _emitbuild(self, code): 3044 code.append(""" 3045 def build_error(): 3046 raise ExplicitError 3047 """) 3048 return "build_error()" 3049 3050 3051class FocusedSeq(Construct): 3052 r""" 3053 Allows constructing more elaborate "adapters" than Adapter class. 3054 3055 Parse does parse all subcons in sequence, but returns only the element that was selected (discards other values). Build does build all subcons in sequence, where each gets build from nothing (except the selected subcon which is given the object). Size is the sum of all subcon sizes, unless any subcon raises SizeofError. 3056 3057 This class does context nesting, meaning its members are given access to a new dictionary where the "_" entry points to the outer context. When parsing, each member gets parsed and subcon parse return value is inserted into context under matching key only if the member was named. When building, the matching entry gets inserted into context before subcon gets build, and if subcon build returns a new value (not None) that gets replaced in the context. 3058 3059 This class exposes subcons as attributes. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) by accessing the struct attributes, under same name. Also note that compiler does not support this feature. See examples. 3060 3061 This class exposes subcons in the context. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) within other inlined fields using the context. Note that you need to use a lambda (`this` expression is not supported). Also note that compiler does not support this feature. See examples. 3062 3063 This class is used internally to implement :class:`~construct.core.PrefixedArray`. 3064 3065 :param parsebuildfrom: string name or context lambda, selects a subcon 3066 :param \*subcons: Construct instances, list of members, some can be named 3067 :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) 3068 3069 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 3070 :raises UnboundLocalError: selector does not match any subcon 3071 3072 Can propagate any exception from the lambda, possibly non-ConstructError. 3073 3074 Excample:: 3075 3076 >>> d = FocusedSeq("num", Const(b"SIG"), "num"/Byte, Terminated) 3077 >>> d.parse(b"SIG\xff") 3078 255 3079 >>> d.build(255) 3080 b'SIG\xff' 3081 3082 >>> d = FocusedSeq("animal", 3083 ... "animal" / Enum(Byte, giraffe=1), 3084 ... ) 3085 >>> d.animal.giraffe 3086 'giraffe' 3087 >>> d = FocusedSeq("count", 3088 ... "count" / Byte, 3089 ... "data" / Padding(lambda this: this.count - this._subcons.count.sizeof()), 3090 ... ) 3091 >>> d.build(4) 3092 b'\x04\x00\x00\x00' 3093 3094 PrefixedArray <--> FocusedSeq("items", 3095 "count" / Rebuild(lengthfield, len_(this.items)), 3096 "items" / subcon[this.count], 3097 ) 3098 """ 3099 3100 def __init__(self, parsebuildfrom, *subcons, **subconskw): 3101 super().__init__() 3102 self.parsebuildfrom = parsebuildfrom 3103 self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) 3104 self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) 3105 3106 def __getattr__(self, name): 3107 if name in self._subcons: 3108 return self._subcons[name] 3109 raise AttributeError 3110 3111 def _parse(self, stream, context, path): 3112 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 3113 context._root = context._.get("_root", context) 3114 parsebuildfrom = evaluate(self.parsebuildfrom, context) 3115 for i,sc in enumerate(self.subcons): 3116 parseret = sc._parsereport(stream, context, path) 3117 if sc.name: 3118 context[sc.name] = parseret 3119 if sc.name == parsebuildfrom: 3120 finalret = parseret 3121 return finalret 3122 3123 def _build(self, obj, stream, context, path): 3124 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 3125 context._root = context._.get("_root", context) 3126 parsebuildfrom = evaluate(self.parsebuildfrom, context) 3127 context[parsebuildfrom] = obj 3128 for i,sc in enumerate(self.subcons): 3129 buildret = sc._build(obj if sc.name == parsebuildfrom else None, stream, context, path) 3130 if sc.name: 3131 context[sc.name] = buildret 3132 if sc.name == parsebuildfrom: 3133 finalret = buildret 3134 return finalret 3135 3136 def _sizeof(self, context, path): 3137 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = None, _index = context.get("_index", None)) 3138 context._root = context._.get("_root", context) 3139 try: 3140 return sum(sc._sizeof(context, path) for sc in self.subcons) 3141 except (KeyError, AttributeError): 3142 raise SizeofError("cannot calculate size, key not found in context", path=path) 3143 3144 def _emitparse(self, code): 3145 fname = f"parse_focusedseq_{code.allocateId()}" 3146 block = f""" 3147 def {fname}(io, this): 3148 result = [] 3149 this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) 3150 this['_root'] = this['_'].get('_root', this) 3151 """ 3152 for sc in self.subcons: 3153 block += f""" 3154 result.append({sc._compileparse(code)}) 3155 """ 3156 if sc.name: 3157 block += f""" 3158 this[{repr(sc.name)}] = result[-1] 3159 """ 3160 block += f""" 3161 return this[{repr(self.parsebuildfrom)}] 3162 """ 3163 code.append(block) 3164 return f"{fname}(io, this)" 3165 3166 def _emitbuild(self, code): 3167 fname = f"build_focusedseq_{code.allocateId()}" 3168 block = f""" 3169 def {fname}(obj, io, this): 3170 this = Container(_ = this, _params = this['_params'], _root = None, _parsing = False, _building = True, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) 3171 this['_root'] = this['_'].get('_root', this) 3172 try: 3173 this[{repr(self.parsebuildfrom)}] = obj 3174 finalobj = obj 3175 """ 3176 for sc in self.subcons: 3177 block += f""" 3178 {f'obj = {"finalobj" if sc.name == self.parsebuildfrom else "None"}'} 3179 {f'buildret = '}{sc._compilebuild(code)} 3180 {f'this[{repr(sc.name)}] = buildret' if sc.name else ''} 3181 {f'{"finalret = buildret" if sc.name == self.parsebuildfrom else ""}'} 3182 """ 3183 block += f""" 3184 pass 3185 except StopFieldError: 3186 pass 3187 return finalret 3188 """ 3189 code.append(block) 3190 return f"{fname}(obj, io, this)" 3191 3192 def _emitseq(self, ksy, bitwise): 3193 return [sc._compilefulltype(ksy, bitwise) for sc in self.subcons] 3194 3195 3196@singleton 3197class Pickled(Construct): 3198 r""" 3199 Preserves arbitrary Python objects. 3200 3201 Parses using `pickle.load() <https://docs.python.org/3/library/pickle.html#pickle.load>`_ and builds using `pickle.dump() <https://docs.python.org/3/library/pickle.html#pickle.dump>`_ functions, using default Pickle binary protocol. Size is undefined. 3202 3203 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 3204 3205 Can propagate pickle.load() and pickle.dump() exceptions. 3206 3207 Example:: 3208 3209 >>> x = [1, 2.3, {}] 3210 >>> Pickled.build(x) 3211 b'\x80\x03]q\x00(K\x01G@\x02ffffff}q\x01e.' 3212 >>> Pickled.parse(_) 3213 [1, 2.3, {}] 3214 """ 3215 3216 def _parse(self, stream, context, path): 3217 return pickle.load(stream) 3218 3219 def _build(self, obj, stream, context, path): 3220 pickle.dump(obj, stream) 3221 return obj 3222 3223 3224@singleton 3225class Numpy(Construct): 3226 r""" 3227 Preserves numpy arrays (both shape, dtype and values). 3228 3229 Parses using `numpy.load() <https://docs.scipy.org/doc/numpy/reference/generated/numpy.load.html#numpy.load>`_ and builds using `numpy.save() <https://docs.scipy.org/doc/numpy/reference/generated/numpy.save.html#numpy.save>`_ functions, using Numpy binary protocol. Size is undefined. 3230 3231 :raises ImportError: numpy could not be imported during parsing or building 3232 :raises ValueError: could not read enough bytes, or so 3233 3234 Can propagate numpy.load() and numpy.save() exceptions. 3235 3236 Example:: 3237 3238 >>> import numpy 3239 >>> a = numpy.asarray([1,2,3]) 3240 >>> Numpy.build(a) 3241 b"\x93NUMPY\x01\x00F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3,), } \n\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00" 3242 >>> Numpy.parse(_) 3243 array([1, 2, 3]) 3244 """ 3245 3246 def _parse(self, stream, context, path): 3247 import numpy 3248 return numpy.load(stream) 3249 3250 def _build(self, obj, stream, context, path): 3251 import numpy 3252 numpy.save(stream, obj) 3253 return obj 3254 3255 3256class NamedTuple(Adapter): 3257 r""" 3258 Both arrays, structs, and sequences can be mapped to a namedtuple from `collections module <https://docs.python.org/3/library/collections.html#collections.namedtuple>`_. To create a named tuple, you need to provide a name and a sequence of fields, either a string with space-separated names or a list of string names, like the standard namedtuple. 3259 3260 Parses into a collections.namedtuple instance, and builds from such instance (although it also builds from lists and dicts). Size is undefined. 3261 3262 :param tuplename: string 3263 :param tuplefields: string or list of strings 3264 :param subcon: Construct instance, either Struct Sequence Array GreedyRange 3265 3266 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 3267 :raises NamedTupleError: subcon is neither Struct Sequence Array GreedyRange 3268 3269 Can propagate collections exceptions. 3270 3271 Example:: 3272 3273 >>> d = NamedTuple("coord", "x y z", Byte[3]) 3274 >>> d = NamedTuple("coord", "x y z", Byte >> Byte >> Byte) 3275 >>> d = NamedTuple("coord", "x y z", "x"/Byte + "y"/Byte + "z"/Byte) 3276 >>> d.parse(b"123") 3277 coord(x=49, y=50, z=51) 3278 """ 3279 3280 def __init__(self, tuplename, tuplefields, subcon): 3281 if not isinstance(subcon, (Struct,Sequence,Array,GreedyRange)): 3282 raise NamedTupleError("subcon is neither Struct Sequence Array GreedyRange") 3283 super().__init__(subcon) 3284 self.tuplename = tuplename 3285 self.tuplefields = tuplefields 3286 self.factory = collections.namedtuple(tuplename, tuplefields) 3287 3288 def _decode(self, obj, context, path): 3289 if isinstance(self.subcon, Struct): 3290 del obj["_io"] 3291 return self.factory(**obj) 3292 if isinstance(self.subcon, (Sequence,Array,GreedyRange)): 3293 return self.factory(*obj) 3294 raise NamedTupleError("subcon is neither Struct Sequence Array GreedyRangeGreedyRange", path=path) 3295 3296 def _encode(self, obj, context, path): 3297 if isinstance(self.subcon, Struct): 3298 return Container({sc.name:getattr(obj,sc.name) for sc in self.subcon.subcons if sc.name}) 3299 if isinstance(self.subcon, (Sequence,Array,GreedyRange)): 3300 return list(obj) 3301 raise NamedTupleError("subcon is neither Struct Sequence Array GreedyRange", path=path) 3302 3303 def _emitparse(self, code): 3304 fname = "factory_%s" % code.allocateId() 3305 code.append(""" 3306 %s = collections.namedtuple(%r, %r) 3307 """ % (fname, self.tuplename, self.tuplefields, )) 3308 if isinstance(self.subcon, Struct): 3309 return "%s(**(%s))" % (fname, self.subcon._compileparse(code), ) 3310 if isinstance(self.subcon, (Sequence,Array,GreedyRange)): 3311 return "%s(*(%s))" % (fname, self.subcon._compileparse(code), ) 3312 raise NamedTupleError("subcon is neither Struct Sequence Array GreedyRange") 3313 3314 def _emitseq(self, ksy, bitwise): 3315 return self.subcon._compileseq(ksy, bitwise) 3316 3317 def _emitprimitivetype(self, ksy, bitwise): 3318 return self.subcon._compileprimitivetype(ksy, bitwise) 3319 3320 def _emitfulltype(self, ksy, bitwise): 3321 return self.subcon._compilefulltype(ksy, bitwise) 3322 3323 3324class TimestampAdapter(Adapter): 3325 """Used internally.""" 3326 3327 3328def Timestamp(subcon, unit, epoch): 3329 r""" 3330 Datetime, represented as `Arrow <https://pypi.org/project/arrow/>`_ object. 3331 3332 Note that accuracy is not guaranteed, because building rounds the value to integer (even when Float subcon is used), due to floating-point errors in general, and because MSDOS scheme has only 5-bit (32 values) seconds field (seconds are rounded to multiple of 2). 3333 3334 Unit is a fraction of a second. 1 is second resolution, 10**-3 is milliseconds resolution, 10**-6 is microseconds resolution, etc. Usually its 1 on Unix and MacOSX, 10**-7 on Windows. Epoch is a year (if integer) or a specific day (if Arrow object). Usually its 1970 on Unix, 1904 on MacOSX, 1600 on Windows. MSDOS format doesnt support custom unit or epoch, it uses 2-seconds resolution and 1980 epoch. 3335 3336 :param subcon: Construct instance like Int* Float*, or Int32ub with msdos format 3337 :param unit: integer or float, or msdos string 3338 :param epoch: integer, or Arrow instance, or msdos string 3339 3340 :raises ImportError: arrow could not be imported during ctor 3341 :raises TimestampError: subcon is not a Construct instance 3342 :raises TimestampError: unit or epoch is a wrong type 3343 3344 Example:: 3345 3346 >>> d = Timestamp(Int64ub, 1., 1970) 3347 >>> d.parse(b'\x00\x00\x00\x00ZIz\x00') 3348 <Arrow [2018-01-01T00:00:00+00:00]> 3349 >>> d = Timestamp(Int32ub, "msdos", "msdos") 3350 >>> d.parse(b'H9\x8c"') 3351 <Arrow [2016-01-25T17:33:04+00:00]> 3352 """ 3353 import arrow 3354 3355 if not isinstance(subcon, Construct): 3356 raise TimestampError("subcon should be Int*, experimentally Float*, or Int32ub when using msdos format") 3357 if not isinstance(unit, (integertypes, float, stringtypes)): 3358 raise TimestampError("unit must be one of: int float string") 3359 if not isinstance(epoch, (integertypes, arrow.Arrow, stringtypes)): 3360 raise TimestampError("epoch must be one of: int Arrow string") 3361 3362 if unit == "msdos" or epoch == "msdos": 3363 st = BitStruct( 3364 "year" / BitsInteger(7), 3365 "month" / BitsInteger(4), 3366 "day" / BitsInteger(5), 3367 "hour" / BitsInteger(5), 3368 "minute" / BitsInteger(6), 3369 "second" / BitsInteger(5), 3370 ) 3371 class MsdosTimestampAdapter(TimestampAdapter): 3372 def _decode(self, obj, context, path): 3373 return arrow.Arrow(1980,1,1).shift(years=obj.year, months=obj.month-1, days=obj.day-1, hours=obj.hour, minutes=obj.minute, seconds=obj.second*2) 3374 def _encode(self, obj, context, path): 3375 t = obj.timetuple() 3376 return Container(year=t.tm_year-1980, month=t.tm_mon, day=t.tm_mday, hour=t.tm_hour, minute=t.tm_min, second=t.tm_sec//2) 3377 macro = MsdosTimestampAdapter(st) 3378 3379 else: 3380 if isinstance(epoch, integertypes): 3381 epoch = arrow.Arrow(epoch, 1, 1) 3382 class EpochTimestampAdapter(TimestampAdapter): 3383 def _decode(self, obj, context, path): 3384 return epoch.shift(seconds=obj*unit) 3385 def _encode(self, obj, context, path): 3386 return int((obj-epoch).total_seconds()/unit) 3387 macro = EpochTimestampAdapter(subcon) 3388 3389 def _emitfulltype(ksy, bitwise): 3390 return subcon._compilefulltype(ksy, bitwise) 3391 def _emitprimitivetype(ksy, bitwise): 3392 return subcon._compileprimitivetype(ksy, bitwise) 3393 macro._emitfulltype = _emitfulltype 3394 macro._emitprimitivetype = _emitprimitivetype 3395 return macro 3396 3397 3398class Hex(Adapter): 3399 r""" 3400 Adapter for displaying hexadecimal/hexlified representation of integers/bytes/RawCopy dictionaries. 3401 3402 Parsing results in int-alike bytes-alike or dict-alike object, whose only difference from original is pretty-printing. If you look at the result, you will be presented with its `repr` which remains as-is. If you print it, then you will see its `str` whic is a hexlified representation. Building and sizeof defer to subcon. 3403 3404 To obtain a hexlified string (like before Hex HexDump changed semantics) use binascii.(un)hexlify on parsed results. 3405 3406 Example:: 3407 3408 >>> d = Hex(Int32ub) 3409 >>> obj = d.parse(b"\x00\x00\x01\x02") 3410 >>> obj 3411 258 3412 >>> print(obj) 3413 0x00000102 3414 3415 >>> d = Hex(GreedyBytes) 3416 >>> obj = d.parse(b"\x00\x00\x01\x02") 3417 >>> obj 3418 b'\x00\x00\x01\x02' 3419 >>> print(obj) 3420 unhexlify('00000102') 3421 3422 >>> d = Hex(RawCopy(Int32ub)) 3423 >>> obj = d.parse(b"\x00\x00\x01\x02") 3424 >>> obj 3425 {'data': b'\x00\x00\x01\x02', 3426 'length': 4, 3427 'offset1': 0, 3428 'offset2': 4, 3429 'value': 258} 3430 >>> print(obj) 3431 unhexlify('00000102') 3432 """ 3433 def _decode(self, obj, context, path): 3434 if isinstance(obj, integertypes): 3435 return HexDisplayedInteger.new(obj, "0%sX" % (2 * self.subcon._sizeof(context, path))) 3436 if isinstance(obj, bytestringtype): 3437 return HexDisplayedBytes(obj) 3438 if isinstance(obj, dict): 3439 return HexDisplayedDict(obj) 3440 return obj 3441 3442 def _encode(self, obj, context, path): 3443 return obj 3444 3445 def _emitparse(self, code): 3446 return self.subcon._compileparse(code) 3447 3448 def _emitseq(self, ksy, bitwise): 3449 return self.subcon._compileseq(ksy, bitwise) 3450 3451 def _emitprimitivetype(self, ksy, bitwise): 3452 return self.subcon._compileprimitivetype(ksy, bitwise) 3453 3454 def _emitfulltype(self, ksy, bitwise): 3455 return self.subcon._compilefulltype(ksy, bitwise) 3456 3457 3458class HexDump(Adapter): 3459 r""" 3460 Adapter for displaying hexlified representation of bytes/RawCopy dictionaries. 3461 3462 Parsing results in bytes-alike or dict-alike object, whose only difference from original is pretty-printing. If you look at the result, you will be presented with its `repr` which remains as-is. If you print it, then you will see its `str` whic is a hexlified representation. Building and sizeof defer to subcon. 3463 3464 To obtain a hexlified string (like before Hex HexDump changed semantics) use construct.lib.hexdump on parsed results. 3465 3466 Example:: 3467 3468 >>> d = HexDump(GreedyBytes) 3469 >>> obj = d.parse(b"\x00\x00\x01\x02") 3470 >>> obj 3471 b'\x00\x00\x01\x02' 3472 >>> print(obj) 3473 hexundump(''' 3474 0000 00 00 01 02 .... 3475 ''') 3476 3477 >>> d = HexDump(RawCopy(Int32ub)) 3478 >>> obj = d.parse(b"\x00\x00\x01\x02") 3479 >>> obj 3480 {'data': b'\x00\x00\x01\x02', 3481 'length': 4, 3482 'offset1': 0, 3483 'offset2': 4, 3484 'value': 258} 3485 >>> print(obj) 3486 hexundump(''' 3487 0000 00 00 01 02 .... 3488 ''') 3489 """ 3490 def _decode(self, obj, context, path): 3491 if isinstance(obj, bytestringtype): 3492 return HexDumpDisplayedBytes(obj) 3493 if isinstance(obj, dict): 3494 return HexDumpDisplayedDict(obj) 3495 return obj 3496 3497 def _encode(self, obj, context, path): 3498 return obj 3499 3500 def _emitparse(self, code): 3501 return self.subcon._compileparse(code) 3502 3503 def _emitseq(self, ksy, bitwise): 3504 return self.subcon._compileseq(ksy, bitwise) 3505 3506 def _emitprimitivetype(self, ksy, bitwise): 3507 return self.subcon._compileprimitivetype(ksy, bitwise) 3508 3509 def _emitfulltype(self, ksy, bitwise): 3510 return self.subcon._compilefulltype(ksy, bitwise) 3511 3512 3513#=============================================================================== 3514# conditional 3515#=============================================================================== 3516class Union(Construct): 3517 r""" 3518 Treats the same data as multiple constructs (similar to C union) so you can look at the data in multiple views. Fields are usually named (so parsed values are inserted into dictionary under same name). 3519 3520 Parses subcons in sequence, and reverts the stream back to original position after each subcon. Afterwards, advances the stream by selected subcon. Builds from first subcon that has a matching key in given dict. Size is undefined (because parsefrom is not used for building). 3521 3522 This class does context nesting, meaning its members are given access to a new dictionary where the "_" entry points to the outer context. When parsing, each member gets parsed and subcon parse return value is inserted into context under matching key only if the member was named. When building, the matching entry gets inserted into context before subcon gets build, and if subcon build returns a new value (not None) that gets replaced in the context. 3523 3524 This class exposes subcons as attributes. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) by accessing the struct attributes, under same name. Also note that compiler does not support this feature. See examples. 3525 3526 This class exposes subcons in the context. You can refer to subcons that were inlined (and therefore do not exist as variable in the namespace) within other inlined fields using the context. Note that you need to use a lambda (`this` expression is not supported). Also note that compiler does not support this feature. See examples. 3527 3528 .. warning:: If you skip `parsefrom` parameter then stream will be left back at starting offset, not seeked to any common denominator. 3529 3530 :param parsefrom: how to leave stream after parsing, can be integer index or string name selecting a subcon, or None (leaves stream at initial offset, the default), or context lambda 3531 :param \*subcons: Construct instances, list of members, some can be anonymous 3532 :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) 3533 3534 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 3535 :raises StreamError: stream is not seekable and tellable 3536 :raises UnionError: selector does not match any subcon, or dict given to build does not contain any keys matching any subcon 3537 :raises IndexError: selector does not match any subcon 3538 :raises KeyError: selector does not match any subcon 3539 3540 Can propagate any exception from the lambda, possibly non-ConstructError. 3541 3542 Example:: 3543 3544 >>> d = Union(0, 3545 ... "raw" / Bytes(8), 3546 ... "ints" / Int32ub[2], 3547 ... "shorts" / Int16ub[4], 3548 ... "chars" / Byte[8], 3549 ... ) 3550 >>> d.parse(b"12345678") 3551 Container(raw=b'12345678', ints=[825373492, 892745528], shorts=[12594, 13108, 13622, 14136], chars=[49, 50, 51, 52, 53, 54, 55, 56]) 3552 >>> d.build(dict(chars=range(8))) 3553 b'\x00\x01\x02\x03\x04\x05\x06\x07' 3554 3555 >>> d = Union(None, 3556 ... "animal" / Enum(Byte, giraffe=1), 3557 ... ) 3558 >>> d.animal.giraffe 3559 'giraffe' 3560 >>> d = Union(None, 3561 ... "chars" / Byte[4], 3562 ... "data" / Bytes(lambda this: this._subcons.chars.sizeof()), 3563 ... ) 3564 >>> d.parse(b"\x01\x02\x03\x04") 3565 Container(chars=[1, 2, 3, 4], data=b'\x01\x02\x03\x04') 3566 3567 Alternative syntax, but requires Python 3.6 or any PyPy: 3568 >>> Union(0, raw=Bytes(8), ints=Int32ub[2], shorts=Int16ub[4], chars=Byte[8]) 3569 """ 3570 3571 def __init__(self, parsefrom, *subcons, **subconskw): 3572 if isinstance(parsefrom, Construct): 3573 raise UnionError("parsefrom should be either: None int str context-function") 3574 super().__init__() 3575 self.parsefrom = parsefrom 3576 self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) 3577 self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) 3578 3579 def __getattr__(self, name): 3580 if name in self._subcons: 3581 return self._subcons[name] 3582 raise AttributeError 3583 3584 def _parse(self, stream, context, path): 3585 obj = Container() 3586 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 3587 context._root = context._.get("_root", context) 3588 fallback = stream_tell(stream, path) 3589 forwards = {} 3590 for i,sc in enumerate(self.subcons): 3591 subobj = sc._parsereport(stream, context, path) 3592 if sc.name: 3593 obj[sc.name] = subobj 3594 context[sc.name] = subobj 3595 forwards[i] = stream_tell(stream, path) 3596 if sc.name: 3597 forwards[sc.name] = stream_tell(stream, path) 3598 stream_seek(stream, fallback, 0, path) 3599 parsefrom = evaluate(self.parsefrom, context) 3600 if parsefrom is not None: 3601 stream_seek(stream, forwards[parsefrom], 0, path) # raises KeyError 3602 return obj 3603 3604 def _build(self, obj, stream, context, path): 3605 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 3606 context._root = context._.get("_root", context) 3607 context.update(obj) 3608 for sc in self.subcons: 3609 if sc.flagbuildnone: 3610 subobj = obj.get(sc.name, None) 3611 elif sc.name in obj: 3612 subobj = obj[sc.name] 3613 else: 3614 continue 3615 3616 if sc.name: 3617 context[sc.name] = subobj 3618 3619 buildret = sc._build(subobj, stream, context, path) 3620 if sc.name: 3621 context[sc.name] = buildret 3622 return Container({sc.name:buildret}) 3623 else: 3624 raise UnionError("cannot build, none of subcons were found in the dictionary %r" % (obj, ), path=path) 3625 3626 def _sizeof(self, context, path): 3627 raise SizeofError("Union builds depending on actual object dict, size is unknown", path=path) 3628 3629 def _emitparse(self, code): 3630 if callable(self.parsefrom): 3631 raise NotImplementedError("Union does not compile non-constant parsefrom") 3632 fname = "parse_union_%s" % code.allocateId() 3633 block = """ 3634 def %s(io, this): 3635 this = Container(_ = this, _params = this['_params'], _root = None, _parsing = True, _building = False, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) 3636 this['_root'] = this['_'].get('_root', this) 3637 fallback = io.tell() 3638 """ % (fname, ) 3639 if isinstance(self.parsefrom, type(None)): 3640 index = -1 3641 skipfallback = False 3642 skipforward = True 3643 if isinstance(self.parsefrom, int): 3644 index = self.parsefrom 3645 self.subcons[index] # raises IndexError 3646 skipfallback = True 3647 skipforward = self.subcons[index].sizeof() == self.subcons[-1].sizeof() 3648 if isinstance(self.parsefrom, str): 3649 index = {sc.name:i for i,sc in enumerate(self.subcons) if sc.name}[self.parsefrom] # raises KeyError 3650 skipfallback = True 3651 skipforward = self.subcons[index].sizeof() == self.subcons[-1].sizeof() 3652 3653 for i,sc in enumerate(self.subcons): 3654 block += """ 3655 %s%s 3656 """ % ("this[%r] = " % sc.name if sc.name else "", sc._compileparse(code)) 3657 if i == index and not skipforward: 3658 block += """ 3659 forward = io.tell() 3660 """ 3661 if i < len(self.subcons)-1: 3662 block += """ 3663 io.seek(fallback) 3664 """ 3665 if not skipfallback: 3666 block += """ 3667 io.seek(fallback) 3668 """ 3669 if not skipforward: 3670 block += """ 3671 io.seek(forward) 3672 """ 3673 block += """ 3674 del this['_'] 3675 del this['_index'] 3676 return this 3677 """ 3678 code.append(block) 3679 return "%s(io, this)" % (fname,) 3680 3681 def _emitbuild(self, code): 3682 fname = f"build_union_{code.allocateId()}" 3683 block = f""" 3684 def {fname}(obj, io, this): 3685 this = Container(_ = this, _params = this['_params'], _root = None, _parsing = False, _building = True, _sizing = False, _subcons = None, _io = io, _index = this.get('_index', None)) 3686 this['_root'] = this['_'].get('_root', this) 3687 this.update(obj) 3688 objdict = obj 3689 """ 3690 for sc in self.subcons: 3691 block += f""" 3692 if {'True' if sc.flagbuildnone else f'{repr(sc.name)} in objdict'}: 3693 {f'obj = objdict.get({repr(sc.name)}, None)' if sc.flagbuildnone else f'obj = objdict[{repr(sc.name)}]'} 3694 {f'this[{repr(sc.name)}] = obj' if sc.name else ''} 3695 {f'buildret = this[{repr(sc.name)}] = ' if sc.name else ''}{sc._compilebuild(code)} 3696 {f'return Container({{ {repr(sc.name)}:buildret }})'} 3697 """ 3698 block += f""" 3699 raise UnionError('cannot build, none of subcons were found in the dictionary') 3700 """ 3701 code.append(block) 3702 return f"{fname}(obj, io, this)" 3703 3704 3705class Select(Construct): 3706 r""" 3707 Selects the first matching subconstruct. 3708 3709 Parses and builds by literally trying each subcon in sequence until one of them parses or builds without exception. Stream gets reverted back to original position after each failed attempt, but not if parsing succeeds. Size is not defined. 3710 3711 :param \*subcons: Construct instances, list of members, some can be anonymous 3712 :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) 3713 3714 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 3715 :raises StreamError: stream is not seekable and tellable 3716 :raises SelectError: neither subcon succeded when parsing or building 3717 3718 Example:: 3719 3720 >>> d = Select(Int32ub, CString("utf8")) 3721 >>> d.build(1) 3722 b'\x00\x00\x00\x01' 3723 >>> d.build(u"Афон") 3724 b'\xd0\x90\xd1\x84\xd0\xbe\xd0\xbd\x00' 3725 3726 Alternative syntax, but requires Python 3.6 or any PyPy: 3727 >>> Select(num=Int32ub, text=CString("utf8")) 3728 """ 3729 3730 def __init__(self, *subcons, **subconskw): 3731 super().__init__() 3732 self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) 3733 self.flagbuildnone = any(sc.flagbuildnone for sc in self.subcons) 3734 3735 def _parse(self, stream, context, path): 3736 for sc in self.subcons: 3737 fallback = stream_tell(stream, path) 3738 try: 3739 obj = sc._parsereport(stream, context, path) 3740 except ExplicitError: 3741 raise 3742 except ConstructError: 3743 stream_seek(stream, fallback, 0, path) 3744 else: 3745 return obj 3746 raise SelectError("no subconstruct matched", path=path) 3747 3748 def _build(self, obj, stream, context, path): 3749 for sc in self.subcons: 3750 try: 3751 data = sc.build(obj, **context) 3752 except ExplicitError: 3753 raise 3754 except Exception: 3755 pass 3756 else: 3757 stream_write(stream, data, len(data), path) 3758 return obj 3759 raise SelectError("no subconstruct matched: %s" % (obj,), path=path) 3760 3761 3762def Optional(subcon): 3763 r""" 3764 Makes an optional field. 3765 3766 Parsing attempts to parse subcon. If sub-parsing fails, returns None and reports success. Building attempts to build subcon. If sub-building fails, writes nothing and reports success. Size is undefined, because whether bytes would be consumed or produced depends on actual data and actual context. 3767 3768 :param subcon: Construct instance 3769 3770 Example:: 3771 3772 Optional <--> Select(subcon, Pass) 3773 3774 >>> d = Optional(Int64ul) 3775 >>> d.parse(b"12345678") 3776 4050765991979987505 3777 >>> d.parse(b"") 3778 None 3779 >>> d.build(1) 3780 b'\x01\x00\x00\x00\x00\x00\x00\x00' 3781 >>> d.build(None) 3782 b'' 3783 """ 3784 return Select(subcon, Pass) 3785 3786 3787def If(condfunc, subcon): 3788 r""" 3789 If-then conditional construct. 3790 3791 Parsing evaluates condition, if True then subcon is parsed, otherwise just returns None. Building also evaluates condition, if True then subcon gets build from, otherwise does nothing. Size is either same as subcon or 0, depending how condfunc evaluates. 3792 3793 :param condfunc: bool or context lambda (or a truthy value) 3794 :param subcon: Construct instance, used if condition indicates True 3795 3796 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 3797 3798 Can propagate any exception from the lambda, possibly non-ConstructError. 3799 3800 Example:: 3801 3802 If <--> IfThenElse(condfunc, subcon, Pass) 3803 3804 >>> d = If(this.x > 0, Byte) 3805 >>> d.build(255, x=1) 3806 b'\xff' 3807 >>> d.build(255, x=0) 3808 b'' 3809 """ 3810 macro = IfThenElse(condfunc, subcon, Pass) 3811 3812 def _emitfulltype(ksy, bitwise): 3813 return dict(type=subcon._compileprimitivetype(ksy, bitwise), if_=repr(condfunc).replace("this.","")) 3814 macro._emitfulltype = _emitfulltype 3815 3816 return macro 3817 3818 3819class IfThenElse(Construct): 3820 r""" 3821 If-then-else conditional construct, similar to ternary operator. 3822 3823 Parsing and building evaluates condition, and defers to either subcon depending on the value. Size is computed the same way. 3824 3825 :param condfunc: bool or context lambda (or a truthy value) 3826 :param thensubcon: Construct instance, used if condition indicates True 3827 :param elsesubcon: Construct instance, used if condition indicates False 3828 3829 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 3830 3831 Can propagate any exception from the lambda, possibly non-ConstructError. 3832 3833 Example:: 3834 3835 >>> d = IfThenElse(this.x > 0, VarInt, Byte) 3836 >>> d.build(255, dict(x=1)) 3837 b'\xff\x01' 3838 >>> d.build(255, dict(x=0)) 3839 b'\xff' 3840 """ 3841 3842 def __init__(self, condfunc, thensubcon, elsesubcon): 3843 super().__init__() 3844 self.condfunc = condfunc 3845 self.thensubcon = thensubcon 3846 self.elsesubcon = elsesubcon 3847 self.flagbuildnone = thensubcon.flagbuildnone and elsesubcon.flagbuildnone 3848 3849 def _parse(self, stream, context, path): 3850 condfunc = evaluate(self.condfunc, context) 3851 sc = self.thensubcon if condfunc else self.elsesubcon 3852 return sc._parsereport(stream, context, path) 3853 3854 def _build(self, obj, stream, context, path): 3855 condfunc = evaluate(self.condfunc, context) 3856 sc = self.thensubcon if condfunc else self.elsesubcon 3857 return sc._build(obj, stream, context, path) 3858 3859 def _sizeof(self, context, path): 3860 condfunc = evaluate(self.condfunc, context) 3861 sc = self.thensubcon if condfunc else self.elsesubcon 3862 return sc._sizeof(context, path) 3863 3864 def _emitparse(self, code): 3865 return "((%s) if (%s) else (%s))" % (self.thensubcon._compileparse(code), self.condfunc, self.elsesubcon._compileparse(code), ) 3866 3867 def _emitbuild(self, code): 3868 return f"(({self.thensubcon._compilebuild(code)}) if ({repr(self.condfunc)}) else ({self.elsesubcon._compilebuild(code)}))" 3869 3870 def _emitseq(self, ksy, bitwise): 3871 return [ 3872 dict(id="thenvalue", type=self.thensubcon._compileprimitivetype(ksy, bitwise), if_=repr(self.condfunc).replace("this.","")), 3873 dict(id="elsesubcon", type=self.elsesubcon._compileprimitivetype(ksy, bitwise), if_=repr(~self.condfunc).replace("this.","")), 3874 ] 3875 3876 3877class Switch(Construct): 3878 r""" 3879 A conditional branch. 3880 3881 Parsing and building evaluate keyfunc and select a subcon based on the value and dictionary entries. Dictionary (cases) maps values into subcons. If no case matches then `default` is used (that is Pass by default). Note that `default` is a Construct instance, not a dictionary key. Size is evaluated in same way as parsing and building, by evaluating keyfunc and selecting a field accordingly. 3882 3883 :param keyfunc: context lambda or constant, that matches some key in cases 3884 :param cases: dict mapping keys to Construct instances 3885 :param default: optional, Construct instance, used when keyfunc is not found in cases, Pass is default value for this parameter, Error is a possible value for this parameter 3886 3887 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 3888 3889 Can propagate any exception from the lambda, possibly non-ConstructError. 3890 3891 Example:: 3892 3893 >>> d = Switch(this.n, { 1:Int8ub, 2:Int16ub, 4:Int32ub }) 3894 >>> d.build(5, n=1) 3895 b'\x05' 3896 >>> d.build(5, n=4) 3897 b'\x00\x00\x00\x05' 3898 3899 >>> d = Switch(this.n, {}, default=Byte) 3900 >>> d.parse(b"\x01", n=255) 3901 1 3902 >>> d.build(1, n=255) 3903 b"\x01" 3904 """ 3905 3906 def __init__(self, keyfunc, cases, default=None): 3907 if default is None: 3908 default = Pass 3909 super().__init__() 3910 self.keyfunc = keyfunc 3911 self.cases = cases 3912 self.default = default 3913 allcases = list(cases.values()) + [default] 3914 self.flagbuildnone = all(sc.flagbuildnone for sc in allcases) 3915 3916 def _parse(self, stream, context, path): 3917 keyfunc = evaluate(self.keyfunc, context) 3918 sc = self.cases.get(keyfunc, self.default) 3919 return sc._parsereport(stream, context, path) 3920 3921 def _build(self, obj, stream, context, path): 3922 keyfunc = evaluate(self.keyfunc, context) 3923 sc = self.cases.get(keyfunc, self.default) 3924 return sc._build(obj, stream, context, path) 3925 3926 def _sizeof(self, context, path): 3927 try: 3928 keyfunc = evaluate(self.keyfunc, context) 3929 sc = self.cases.get(keyfunc, self.default) 3930 return sc._sizeof(context, path) 3931 3932 except (KeyError, AttributeError): 3933 raise SizeofError("cannot calculate size, key not found in context", path=path) 3934 3935 def _emitparse(self, code): 3936 fname = f"switch_cases_{code.allocateId()}" 3937 code.append(f"{fname} = {{}}") 3938 for key,sc in self.cases.items(): 3939 code.append(f"{fname}[{repr(key)}] = lambda io,this: {sc._compileparse(code)}") 3940 defaultfname = f"switch_defaultcase_{code.allocateId()}" 3941 code.append(f"{defaultfname} = lambda io,this: {self.default._compileparse(code)}") 3942 return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(io, this)" 3943 3944 def _emitbuild(self, code): 3945 fname = f"switch_cases_{code.allocateId()}" 3946 code.append(f"{fname} = {{}}") 3947 for key,sc in self.cases.items(): 3948 code.append(f"{fname}[{repr(key)}] = lambda obj,io,this: {sc._compilebuild(code)}") 3949 defaultfname = f"switch_defaultcase_{code.allocateId()}" 3950 code.append(f"{defaultfname} = lambda obj,io,this: {self.default._compilebuild(code)}") 3951 return f"{fname}.get({repr(self.keyfunc)}, {defaultfname})(obj, io, this)" 3952 3953 3954class StopIf(Construct): 3955 r""" 3956 Checks for a condition, and stops certain classes (:class:`~construct.core.Struct` :class:`~construct.core.Sequence` :class:`~construct.core.GreedyRange`) from parsing or building further. 3957 3958 Parsing and building check the condition, and raise StopFieldError if indicated. Size is undefined. 3959 3960 :param condfunc: bool or context lambda (or truthy value) 3961 3962 :raises StopFieldError: used internally 3963 3964 Can propagate any exception from the lambda, possibly non-ConstructError. 3965 3966 Example:: 3967 3968 >>> Struct('x'/Byte, StopIf(this.x == 0), 'y'/Byte) 3969 >>> Sequence('x'/Byte, StopIf(this.x == 0), 'y'/Byte) 3970 >>> GreedyRange(FocusedSeq(0, 'x'/Byte, StopIf(this.x == 0))) 3971 """ 3972 3973 def __init__(self, condfunc): 3974 super().__init__() 3975 self.condfunc = condfunc 3976 self.flagbuildnone = True 3977 3978 def _parse(self, stream, context, path): 3979 condfunc = evaluate(self.condfunc, context) 3980 if condfunc: 3981 raise StopFieldError(path=path) 3982 3983 def _build(self, obj, stream, context, path): 3984 condfunc = evaluate(self.condfunc, context) 3985 if condfunc: 3986 raise StopFieldError(path=path) 3987 3988 def _sizeof(self, context, path): 3989 raise SizeofError("StopIf cannot determine size because it depends on actual context which then depends on actual data and outer constructs", path=path) 3990 3991 def _emitparse(self, code): 3992 code.append(f""" 3993 def parse_stopif(condition): 3994 if condition: 3995 raise StopFieldError 3996 """) 3997 return f"parse_stopif({repr(self.condfunc)})" 3998 3999 def _emitbuild(self, code): 4000 code.append(f""" 4001 def build_stopif(condition): 4002 if condition: 4003 raise StopFieldError 4004 """) 4005 return f"build_stopif({repr(self.condfunc)})" 4006 4007 4008#=============================================================================== 4009# alignment and padding 4010#=============================================================================== 4011def Padding(length, pattern=b"\x00"): 4012 r""" 4013 Appends null bytes. 4014 4015 Parsing consumes specified amount of bytes and discards it. Building writes specified pattern byte multiplied into specified length. Size is same as specified. 4016 4017 :param length: integer or context lambda, length of the padding 4018 :param pattern: b-character, padding pattern, default is \\x00 4019 4020 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4021 :raises PaddingError: length was negative 4022 :raises PaddingError: pattern was not bytes (b-character) 4023 4024 Can propagate any exception from the lambda, possibly non-ConstructError. 4025 4026 Example:: 4027 4028 >>> d = Padding(4) or Padded(4, Pass) 4029 >>> d.build(None) 4030 b'\x00\x00\x00\x00' 4031 >>> d.parse(b"****") 4032 None 4033 >>> d.sizeof() 4034 4 4035 """ 4036 macro = Padded(length, Pass, pattern=pattern) 4037 def _emitprimitivetype(ksy, bitwise): 4038 if not bitwise: 4039 raise NotImplementedError 4040 return "b%s" % (length, ) 4041 def _emitfulltype(ksy, bitwise): 4042 if bitwise: 4043 raise NotImplementedError 4044 return dict(size=length) 4045 macro._emitprimitivetype = _emitprimitivetype 4046 macro._emitfulltype = _emitfulltype 4047 return macro 4048 4049 4050class Padded(Subconstruct): 4051 r""" 4052 Appends additional null bytes to achieve a length. 4053 4054 Parsing first parses the subcon, then uses stream.tell() to measure how many bytes were read and consumes additional bytes accordingly. Building first builds the subcon, then uses stream.tell() to measure how many bytes were written and produces additional bytes accordingly. Size is same as `length`, but negative amount results in error. Note that subcon can actually be variable size, it is the eventual amount of bytes that is read or written during parsing or building that determines actual padding. 4055 4056 :param length: integer or context lambda, length of the padding 4057 :param subcon: Construct instance 4058 :param pattern: optional, b-character, padding pattern, default is \\x00 4059 4060 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4061 :raises PaddingError: length is negative 4062 :raises PaddingError: subcon read or written more than the length (would cause negative pad) 4063 :raises PaddingError: pattern is not bytes of length 1 4064 4065 Can propagate any exception from the lambda, possibly non-ConstructError. 4066 4067 Example:: 4068 4069 >>> d = Padded(4, Byte) 4070 >>> d.build(255) 4071 b'\xff\x00\x00\x00' 4072 >>> d.parse(_) 4073 255 4074 >>> d.sizeof() 4075 4 4076 4077 >>> d = Padded(4, VarInt) 4078 >>> d.build(1) 4079 b'\x01\x00\x00\x00' 4080 >>> d.build(70000) 4081 b'\xf0\xa2\x04\x00' 4082 """ 4083 4084 def __init__(self, length, subcon, pattern=b"\x00"): 4085 if not isinstance(pattern, bytestringtype) or len(pattern) != 1: 4086 raise PaddingError("pattern expected to be bytes of length 1") 4087 super().__init__(subcon) 4088 self.length = length 4089 self.pattern = pattern 4090 4091 def _parse(self, stream, context, path): 4092 length = evaluate(self.length, context) 4093 if length < 0: 4094 raise PaddingError("length cannot be negative", path=path) 4095 position1 = stream_tell(stream, path) 4096 obj = self.subcon._parsereport(stream, context, path) 4097 position2 = stream_tell(stream, path) 4098 pad = length - (position2 - position1) 4099 if pad < 0: 4100 raise PaddingError("subcon parsed %d bytes but was allowed only %d" % (position2-position1, length), path=path) 4101 stream_read(stream, pad, path) 4102 return obj 4103 4104 def _build(self, obj, stream, context, path): 4105 length = evaluate(self.length, context) 4106 if length < 0: 4107 raise PaddingError("length cannot be negative", path=path) 4108 position1 = stream_tell(stream, path) 4109 buildret = self.subcon._build(obj, stream, context, path) 4110 position2 = stream_tell(stream, path) 4111 pad = length - (position2 - position1) 4112 if pad < 0: 4113 raise PaddingError("subcon build %d bytes but was allowed only %d" % (position2-position1, length), path=path) 4114 stream_write(stream, self.pattern * pad, pad, path) 4115 return buildret 4116 4117 def _sizeof(self, context, path): 4118 try: 4119 length = evaluate(self.length, context) 4120 if length < 0: 4121 raise PaddingError("length cannot be negative", path=path) 4122 return length 4123 except (KeyError, AttributeError): 4124 raise SizeofError("cannot calculate size, key not found in context", path=path) 4125 4126 def _emitparse(self, code): 4127 return f"({self.subcon._compileparse(code)}, io.read(({self.length})-({self.subcon.sizeof()}) ))[0]" 4128 4129 def _emitbuild(self, code): 4130 return f"({self.subcon._compilebuild(code)}, io.write({repr(self.pattern)}*(({self.length})-({self.subcon.sizeof()})) ))[0]" 4131 4132 def _emitfulltype(self, ksy, bitwise): 4133 return dict(size=self.length, type=self.subcon._compileprimitivetype(ksy, bitwise)) 4134 4135 4136class Aligned(Subconstruct): 4137 r""" 4138 Appends additional null bytes to achieve a length that is shortest multiple of a modulus. 4139 4140 Note that subcon can actually be variable size, it is the eventual amount of bytes that is read or written during parsing or building that determines actual padding. 4141 4142 Parsing first parses subcon, then consumes an amount of bytes to sum up to specified length, and discards it. Building first builds subcon, then writes specified pattern byte to sum up to specified length. Size is subcon size plus modulo remainder, unless SizeofError was raised. 4143 4144 :param modulus: integer or context lambda, modulus to final length 4145 :param subcon: Construct instance 4146 :param pattern: optional, b-character, padding pattern, default is \\x00 4147 4148 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4149 :raises PaddingError: modulus was less than 2 4150 :raises PaddingError: pattern was not bytes (b-character) 4151 4152 Can propagate any exception from the lambda, possibly non-ConstructError. 4153 4154 Example:: 4155 4156 >>> d = Aligned(4, Int16ub) 4157 >>> d.parse(b'\x00\x01\x00\x00') 4158 1 4159 >>> d.sizeof() 4160 4 4161 """ 4162 4163 def __init__(self, modulus, subcon, pattern=b"\x00"): 4164 if not isinstance(pattern, bytestringtype) or len(pattern) != 1: 4165 raise PaddingError("pattern expected to be bytes character") 4166 super().__init__(subcon) 4167 self.modulus = modulus 4168 self.pattern = pattern 4169 4170 def _parse(self, stream, context, path): 4171 modulus = evaluate(self.modulus, context) 4172 if modulus < 2: 4173 raise PaddingError("expected modulo 2 or greater", path=path) 4174 position1 = stream_tell(stream, path) 4175 obj = self.subcon._parsereport(stream, context, path) 4176 position2 = stream_tell(stream, path) 4177 pad = -(position2 - position1) % modulus 4178 stream_read(stream, pad, path) 4179 return obj 4180 4181 def _build(self, obj, stream, context, path): 4182 modulus = evaluate(self.modulus, context) 4183 if modulus < 2: 4184 raise PaddingError("expected modulo 2 or greater", path=path) 4185 position1 = stream_tell(stream, path) 4186 buildret = self.subcon._build(obj, stream, context, path) 4187 position2 = stream_tell(stream, path) 4188 pad = -(position2 - position1) % modulus 4189 stream_write(stream, self.pattern * pad, pad, path) 4190 return buildret 4191 4192 def _sizeof(self, context, path): 4193 try: 4194 modulus = evaluate(self.modulus, context) 4195 if modulus < 2: 4196 raise PaddingError("expected modulo 2 or greater", path=path) 4197 subconlen = self.subcon._sizeof(context, path) 4198 return subconlen + (-subconlen % modulus) 4199 except (KeyError, AttributeError): 4200 raise SizeofError("cannot calculate size, key not found in context", path=path) 4201 4202 def _emitparse(self, code): 4203 return f"({self.subcon._compileparse(code)}, io.read(-({self.subcon.sizeof()}) % ({self.modulus}) ))[0]" 4204 4205 def _emitbuild(self, code): 4206 return f"({self.subcon._compilebuild(code)}, io.write({repr(self.pattern)}*(-({self.subcon.sizeof()}) % ({self.modulus}))) )[0]" 4207 4208 4209def AlignedStruct(modulus, *subcons, **subconskw): 4210 r""" 4211 Makes a structure where each field is aligned to the same modulus (it is a struct of aligned fields, NOT an aligned struct). 4212 4213 See :class:`~construct.core.Aligned` and :class:`~construct.core.Struct` for semantics and raisable exceptions. 4214 4215 :param modulus: integer or context lambda, passed to each member 4216 :param \*subcons: Construct instances, list of members, some can be anonymous 4217 :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) 4218 4219 Example:: 4220 4221 >>> d = AlignedStruct(4, "a"/Int8ub, "b"/Int16ub) 4222 >>> d.build(dict(a=0xFF,b=0xFFFF)) 4223 b'\xff\x00\x00\x00\xff\xff\x00\x00' 4224 """ 4225 subcons = list(subcons) + list(k/v for k,v in subconskw.items()) 4226 return Struct(*[sc.name / Aligned(modulus, sc) for sc in subcons]) 4227 4228 4229def BitStruct(*subcons, **subconskw): 4230 r""" 4231 Makes a structure inside a Bitwise. 4232 4233 See :class:`~construct.core.Bitwise` and :class:`~construct.core.Struct` for semantics and raisable exceptions. 4234 4235 :param \*subcons: Construct instances, list of members, some can be anonymous 4236 :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) 4237 4238 Example:: 4239 4240 BitStruct <--> Bitwise(Struct(...)) 4241 4242 >>> d = BitStruct( 4243 ... "a" / Flag, 4244 ... "b" / Nibble, 4245 ... "c" / BitsInteger(10), 4246 ... "d" / Padding(1), 4247 ... ) 4248 >>> d.parse(b"\xbe\xef") 4249 Container(a=True, b=7, c=887, d=None) 4250 >>> d.sizeof() 4251 2 4252 """ 4253 return Bitwise(Struct(*subcons, **subconskw)) 4254 4255 4256#=============================================================================== 4257# stream manipulation 4258#=============================================================================== 4259class Pointer(Subconstruct): 4260 r""" 4261 Jumps in the stream forth and back for one field. 4262 4263 Parsing and building seeks the stream to new location, processes subcon, and seeks back to original location. Size is defined as 0 but that does not mean no bytes are written into the stream. 4264 4265 Offset can be positive, indicating a position from stream beginning forward, or negative, indicating a position from EOF backwards. 4266 4267 :param offset: integer or context lambda, positive or negative 4268 :param subcon: Construct instance 4269 :param stream: None to use original stream (default), or context lambda to provide a different stream 4270 4271 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4272 :raises StreamError: stream is not seekable and tellable 4273 4274 Can propagate any exception from the lambda, possibly non-ConstructError. 4275 4276 Example:: 4277 4278 >>> d = Pointer(8, Bytes(1)) 4279 >>> d.parse(b"abcdefghijkl") 4280 b'i' 4281 >>> d.build(b"Z") 4282 b'\x00\x00\x00\x00\x00\x00\x00\x00Z' 4283 """ 4284 4285 def __init__(self, offset, subcon, stream=None): 4286 super().__init__(subcon) 4287 self.offset = offset 4288 self.stream = stream 4289 4290 def _parse(self, stream, context, path): 4291 offset = evaluate(self.offset, context) 4292 stream = evaluate(self.stream, context) or stream 4293 fallback = stream_tell(stream, path) 4294 stream_seek(stream, offset, 2 if offset < 0 else 0, path) 4295 obj = self.subcon._parsereport(stream, context, path) 4296 stream_seek(stream, fallback, 0, path) 4297 return obj 4298 4299 def _build(self, obj, stream, context, path): 4300 offset = evaluate(self.offset, context) 4301 stream = evaluate(self.stream, context) or stream 4302 fallback = stream_tell(stream, path) 4303 stream_seek(stream, offset, 2 if offset < 0 else 0, path) 4304 buildret = self.subcon._build(obj, stream, context, path) 4305 stream_seek(stream, fallback, 0, path) 4306 return buildret 4307 4308 def _sizeof(self, context, path): 4309 return 0 4310 4311 def _emitparse(self, code): 4312 code.append(f""" 4313 def parse_pointer(io, offset, func): 4314 fallback = io.tell() 4315 io.seek(offset, 2 if offset < 0 else 0) 4316 obj = func() 4317 io.seek(fallback) 4318 return obj 4319 """) 4320 return f"parse_pointer(io, {self.offset}, lambda: {self.subcon._compileparse(code)})" 4321 4322 def _emitbuild(self, code): 4323 code.append(f""" 4324 def build_pointer(obj, io, offset, func): 4325 fallback = io.tell() 4326 io.seek(offset, 2 if offset < 0 else 0) 4327 ret = func() 4328 io.seek(fallback) 4329 return ret 4330 """) 4331 return f"build_pointer(obj, io, {self.offset}, lambda: {self.subcon._compilebuild(code)})" 4332 4333 def _emitprimitivetype(self, ksy, bitwise): 4334 offset = self.offset.__getfield__() if callable(self.offset) else self.offset 4335 name = "instance_%s" % ksy.allocateId() 4336 ksy.instances[name] = dict(pos=offset, **self.subcon._compilefulltype(ksy, bitwise)) 4337 return name 4338 4339 4340class Peek(Subconstruct): 4341 r""" 4342 Peeks at the stream. 4343 4344 Parsing sub-parses (and returns None if failed), then reverts stream to original position. Building does nothing (its NOT deferred). Size is defined as 0 because there is no building. 4345 4346 This class is used in :class:`~construct.core.Union` class to parse each member. 4347 4348 :param subcon: Construct instance 4349 4350 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4351 :raises StreamError: stream is not seekable and tellable 4352 4353 Example:: 4354 4355 >>> d = Sequence(Peek(Int8ub), Peek(Int16ub)) 4356 >>> d.parse(b"\x01\x02") 4357 [1, 258] 4358 >>> d.sizeof() 4359 0 4360 """ 4361 4362 def __init__(self, subcon): 4363 super().__init__(subcon) 4364 self.flagbuildnone = True 4365 4366 def _parse(self, stream, context, path): 4367 fallback = stream_tell(stream, path) 4368 try: 4369 return self.subcon._parsereport(stream, context, path) 4370 except ExplicitError: 4371 raise 4372 except ConstructError: 4373 pass 4374 finally: 4375 stream_seek(stream, fallback, 0, path) 4376 4377 def _build(self, obj, stream, context, path): 4378 return obj 4379 4380 def _sizeof(self, context, path): 4381 return 0 4382 4383 def _emitparse(self, code): 4384 code.append(""" 4385 def parse_peek(io, func): 4386 fallback = io.tell() 4387 try: 4388 return func() 4389 except ExplicitError: 4390 raise 4391 except ConstructError: 4392 pass 4393 finally: 4394 io.seek(fallback) 4395 """) 4396 return "parse_peek(io, lambda: %s)" % (self.subcon._compileparse(code),) 4397 4398 def _emitbuild(self, code): 4399 return "obj" 4400 4401 4402class Seek(Construct): 4403 r""" 4404 Seeks the stream. 4405 4406 Parsing and building seek the stream to given location (and whence), and return stream.seek() return value. Size is not defined. 4407 4408 .. seealso:: Analog :class:`~construct.core.Pointer` wrapper that has same side effect but also processes a subcon, and also seeks back. 4409 4410 :param at: integer or context lambda, where to jump to 4411 :param whence: optional, integer or context lambda, is the offset from beginning (0) or from current position (1) or from EOF (2), default is 0 4412 4413 :raises StreamError: stream is not seekable 4414 4415 Can propagate any exception from the lambda, possibly non-ConstructError. 4416 4417 Example:: 4418 4419 >>> d = (Seek(5) >> Byte) 4420 >>> d.parse(b"01234x") 4421 [5, 120] 4422 4423 >>> d = (Bytes(10) >> Seek(5) >> Byte) 4424 >>> d.build([b"0123456789", None, 255]) 4425 b'01234\xff6789' 4426 """ 4427 4428 def __init__(self, at, whence=0): 4429 super().__init__() 4430 self.at = at 4431 self.whence = whence 4432 self.flagbuildnone = True 4433 4434 def _parse(self, stream, context, path): 4435 at = evaluate(self.at, context) 4436 whence = evaluate(self.whence, context) 4437 return stream_seek(stream, at, whence, path) 4438 4439 def _build(self, obj, stream, context, path): 4440 at = evaluate(self.at, context) 4441 whence = evaluate(self.whence, context) 4442 return stream_seek(stream, at, whence, path) 4443 4444 def _sizeof(self, context, path): 4445 raise SizeofError("Seek only moves the stream, size is not meaningful", path=path) 4446 4447 def _emitparse(self, code): 4448 return f"io.seek({self.at}, {self.whence})" 4449 4450 def _emitbuild(self, code): 4451 return f"io.seek({self.at}, {self.whence})" 4452 4453 4454@singleton 4455class Tell(Construct): 4456 r""" 4457 Tells the stream. 4458 4459 Parsing and building return current stream offset using using stream.tell(). Size is defined as 0 because parsing and building does not consume or add into the stream. 4460 4461 Tell is useful for adjusting relative offsets to absolute positions, or to measure sizes of Constructs. To get an absolute pointer, use a Tell plus a relative offset. To get a size, place two Tells and measure their difference using a Compute field. However, its recommended to use :class:`~construct.core.RawCopy` instead of manually extracting two positions and computing difference. 4462 4463 :raises StreamError: stream is not tellable 4464 4465 Example:: 4466 4467 >>> d = Struct("num"/VarInt, "offset"/Tell) 4468 >>> d.parse(b"X") 4469 Container(num=88, offset=1) 4470 >>> d.build(dict(num=88)) 4471 b'X' 4472 """ 4473 4474 def __init__(self): 4475 super().__init__() 4476 self.flagbuildnone = True 4477 4478 def _parse(self, stream, context, path): 4479 return stream_tell(stream, path) 4480 4481 def _build(self, obj, stream, context, path): 4482 return stream_tell(stream, path) 4483 4484 def _sizeof(self, context, path): 4485 return 0 4486 4487 def _emitparse(self, code): 4488 return "io.tell()" 4489 4490 def _emitbuild(self, code): 4491 return "io.tell()" 4492 4493 4494@singleton 4495class Pass(Construct): 4496 r""" 4497 No-op construct, useful as default cases for Switch and Enum. 4498 4499 Parsing returns None. Building does nothing. Size is 0 by definition. 4500 4501 Example:: 4502 4503 >>> Pass.parse(b"") 4504 None 4505 >>> Pass.build(None) 4506 b'' 4507 >>> Pass.sizeof() 4508 0 4509 """ 4510 4511 def __init__(self): 4512 super().__init__() 4513 self.flagbuildnone = True 4514 4515 def _parse(self, stream, context, path): 4516 return None 4517 4518 def _build(self, obj, stream, context, path): 4519 return obj 4520 4521 def _sizeof(self, context, path): 4522 return 0 4523 4524 def _emitparse(self, code): 4525 return "None" 4526 4527 def _emitbuild(self, code): 4528 return "None" 4529 4530 def _emitfulltype(self, ksy, bitwise): 4531 return dict(size=0) 4532 4533 4534@singleton 4535class Terminated(Construct): 4536 r""" 4537 Asserts end of stream (EOF). You can use it to ensure no more unparsed data follows in the stream. 4538 4539 Parsing checks if stream reached EOF, and raises TerminatedError if not. Building does nothing. Size is defined as 0 because parsing and building does not consume or add into the stream, as far as other constructs see it. 4540 4541 :raises TerminatedError: stream not at EOF when parsing 4542 4543 Example:: 4544 4545 >>> Terminated.parse(b"") 4546 None 4547 >>> Terminated.parse(b"remaining") 4548 construct.core.TerminatedError: expected end of stream 4549 """ 4550 4551 def __init__(self): 4552 super().__init__() 4553 self.flagbuildnone = True 4554 4555 def _parse(self, stream, context, path): 4556 if stream.read(1): 4557 raise TerminatedError("expected end of stream", path=path) 4558 4559 def _build(self, obj, stream, context, path): 4560 return obj 4561 4562 def _sizeof(self, context, path): 4563 raise SizeofError(path=path) 4564 4565 4566#=============================================================================== 4567# tunneling and byte/bit swapping 4568#=============================================================================== 4569class RawCopy(Subconstruct): 4570 r""" 4571 Used to obtain byte representation of a field (aside of object value). 4572 4573 Returns a dict containing both parsed subcon value, the raw bytes that were consumed by subcon, starting and ending offset in the stream, and amount in bytes. Builds either from raw bytes representation or a value used by subcon. Size is same as subcon. 4574 4575 Object is a dictionary with either "data" or "value" keys, or both. 4576 4577 When building, if both the "value" and "data" keys are present, then the "data" key is used and the "value" key is ignored. This is undesirable in the case that you parse some data for the purpose of modifying it and writing it back; in this case, delete the "data" key when modifying the "value" key to correctly rebuild the former. 4578 4579 :param subcon: Construct instance 4580 4581 :raises StreamError: stream is not seekable and tellable 4582 :raises RawCopyError: building and neither data or value was given 4583 :raises StringError: building from non-bytes value, perhaps unicode 4584 4585 Example:: 4586 4587 >>> d = RawCopy(Byte) 4588 >>> d.parse(b"\xff") 4589 Container(data=b'\xff', value=255, offset1=0, offset2=1, length=1) 4590 >>> d.build(dict(data=b"\xff")) 4591 '\xff' 4592 >>> d.build(dict(value=255)) 4593 '\xff' 4594 """ 4595 4596 def _parse(self, stream, context, path): 4597 offset1 = stream_tell(stream, path) 4598 obj = self.subcon._parsereport(stream, context, path) 4599 offset2 = stream_tell(stream, path) 4600 stream_seek(stream, offset1, 0, path) 4601 data = stream_read(stream, offset2-offset1, path) 4602 return Container(data=data, value=obj, offset1=offset1, offset2=offset2, length=(offset2-offset1)) 4603 4604 def _build(self, obj, stream, context, path): 4605 if obj is None and self.subcon.flagbuildnone: 4606 obj = dict(value=None) 4607 if 'data' in obj: 4608 data = obj['data'] 4609 offset1 = stream_tell(stream, path) 4610 stream_write(stream, data, len(data), path) 4611 offset2 = stream_tell(stream, path) 4612 return Container(obj, data=data, offset1=offset1, offset2=offset2, length=(offset2-offset1)) 4613 if 'value' in obj: 4614 value = obj['value'] 4615 offset1 = stream_tell(stream, path) 4616 buildret = self.subcon._build(value, stream, context, path) 4617 value = value if buildret is None else buildret 4618 offset2 = stream_tell(stream, path) 4619 stream_seek(stream, offset1, 0, path) 4620 data = stream_read(stream, offset2-offset1, path) 4621 return Container(obj, data=data, value=value, offset1=offset1, offset2=offset2, length=(offset2-offset1)) 4622 raise RawCopyError('RawCopy cannot build, both data and value keys are missing', path=path) 4623 4624 4625def ByteSwapped(subcon): 4626 r""" 4627 Swaps the byte order within boundaries of given subcon. Requires a fixed sized subcon. 4628 4629 :param subcon: Construct instance, subcon on top of byte swapped bytes 4630 4631 :raises SizeofError: ctor or compiler could not compute subcon size 4632 4633 See :class:`~construct.core.Transformed` and :class:`~construct.core.Restreamed` for raisable exceptions. 4634 4635 Example:: 4636 4637 Int24ul <--> ByteSwapped(Int24ub) <--> BytesInteger(3, swapped=True) <--> ByteSwapped(BytesInteger(3)) 4638 """ 4639 4640 size = subcon.sizeof() 4641 return Transformed(subcon, swapbytes, size, swapbytes, size) 4642 4643 4644def BitsSwapped(subcon): 4645 r""" 4646 Swaps the bit order within each byte within boundaries of given subcon. Does NOT require a fixed sized subcon. 4647 4648 :param subcon: Construct instance, subcon on top of bit swapped bytes 4649 4650 :raises SizeofError: compiler could not compute subcon size 4651 4652 See :class:`~construct.core.Transformed` and :class:`~construct.core.Restreamed` for raisable exceptions. 4653 4654 Example:: 4655 4656 >>> d = Bitwise(Bytes(8)) 4657 >>> d.parse(b"\x01") 4658 '\x00\x00\x00\x00\x00\x00\x00\x01' 4659 >>>> BitsSwapped(d).parse(b"\x01") 4660 '\x01\x00\x00\x00\x00\x00\x00\x00' 4661 """ 4662 4663 try: 4664 size = subcon.sizeof() 4665 return Transformed(subcon, swapbitsinbytes, size, swapbitsinbytes, size) 4666 except SizeofError: 4667 return Restreamed(subcon, swapbitsinbytes, 1, swapbitsinbytes, 1, lambda n: n) 4668 4669 4670class Prefixed(Subconstruct): 4671 r""" 4672 Prefixes a field with byte count. 4673 4674 Parses the length field. Then reads that amount of bytes, and parses subcon using only those bytes. Constructs that consume entire remaining stream are constrained to consuming only the specified amount of bytes (a substream). When building, data gets prefixed by its length. Optionally, length field can include its own size. Size is the sum of both fields sizes, unless either raises SizeofError. 4675 4676 Analog to :class:`~construct.core.PrefixedArray` which prefixes with an element count, instead of byte count. Semantics is similar but implementation is different. 4677 4678 :class:`~construct.core.VarInt` is recommended for new protocols, as it is more compact and never overflows. 4679 4680 :param lengthfield: Construct instance, field used for storing the length 4681 :param subcon: Construct instance, subcon used for storing the value 4682 :param includelength: optional, bool, whether length field should include its own size, default is False 4683 4684 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4685 4686 Example:: 4687 4688 >>> d = Prefixed(VarInt, GreedyRange(Int32ul)) 4689 >>> d.parse(b"\x08abcdefgh") 4690 [1684234849, 1751606885] 4691 4692 >>> d = PrefixedArray(VarInt, Int32ul) 4693 >>> d.parse(b"\x02abcdefgh") 4694 [1684234849, 1751606885] 4695 """ 4696 4697 def __init__(self, lengthfield, subcon, includelength=False): 4698 super().__init__(subcon) 4699 self.lengthfield = lengthfield 4700 self.includelength = includelength 4701 4702 def _parse(self, stream, context, path): 4703 length = self.lengthfield._parsereport(stream, context, path) 4704 if self.includelength: 4705 length -= self.lengthfield._sizeof(context, path) 4706 data = stream_read(stream, length, path) 4707 if self.subcon is GreedyBytes: 4708 return data 4709 if type(self.subcon) is GreedyString: 4710 return data.decode(self.subcon.encoding) 4711 return self.subcon._parsereport(io.BytesIO(data), context, path) 4712 4713 def _build(self, obj, stream, context, path): 4714 stream2 = io.BytesIO() 4715 buildret = self.subcon._build(obj, stream2, context, path) 4716 data = stream2.getvalue() 4717 length = len(data) 4718 if self.includelength: 4719 length += self.lengthfield._sizeof(context, path) 4720 self.lengthfield._build(length, stream, context, path) 4721 stream_write(stream, data, len(data), path) 4722 return buildret 4723 4724 def _sizeof(self, context, path): 4725 return self.lengthfield._sizeof(context, path) + self.subcon._sizeof(context, path) 4726 4727 def _actualsize(self, stream, context, path): 4728 position1 = stream_tell(stream, path) 4729 length = self.lengthfield._parse(stream, context, path) 4730 if self.includelength: 4731 length -= self.lengthfield._sizeof(context, path) 4732 position2 = stream_tell(stream, path) 4733 return (position2-position1) + length 4734 4735 def _emitparse(self, code): 4736 sub = self.lengthfield.sizeof() if self.includelength else 0 4737 return f"restream(io.read(({self.lengthfield._compileparse(code)})-({sub})), lambda io: ({self.subcon._compileparse(code)}))" 4738 4739 def _emitseq(self, ksy, bitwise): 4740 return [ 4741 dict(id="lengthfield", type=self.lengthfield._compileprimitivetype(ksy, bitwise)), 4742 dict(id="data", size="lengthfield", type=self.subcon._compileprimitivetype(ksy, bitwise)), 4743 ] 4744 4745 4746def PrefixedArray(countfield, subcon): 4747 r""" 4748 Prefixes an array with item count (as opposed to prefixed by byte count, see :class:`~construct.core.Prefixed`). 4749 4750 :class:`~construct.core.VarInt` is recommended for new protocols, as it is more compact and never overflows. 4751 4752 :param countfield: Construct instance, field used for storing the element count 4753 :param subcon: Construct instance, subcon used for storing each element 4754 4755 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4756 :raises RangeError: consumed or produced too little elements 4757 4758 Example:: 4759 4760 >>> d = Prefixed(VarInt, GreedyRange(Int32ul)) 4761 >>> d.parse(b"\x08abcdefgh") 4762 [1684234849, 1751606885] 4763 4764 >>> d = PrefixedArray(VarInt, Int32ul) 4765 >>> d.parse(b"\x02abcdefgh") 4766 [1684234849, 1751606885] 4767 """ 4768 macro = FocusedSeq("items", 4769 "count" / Rebuild(countfield, len_(this.items)), 4770 "items" / subcon[this.count], 4771 ) 4772 4773 def _emitparse(code): 4774 return "ListContainer((%s) for i in range(%s))" % (subcon._compileparse(code), countfield._compileparse(code), ) 4775 macro._emitparse = _emitparse 4776 4777 def _emitbuild(code): 4778 return f"(reuse(len(obj), lambda obj: {countfield._compilebuild(code)}), list({subcon._compilebuild(code)} for obj in obj), obj)[2]" 4779 macro._emitbuild = _emitbuild 4780 4781 def _actualsize(self, stream, context, path): 4782 position1 = stream_tell(stream, path) 4783 count = countfield._parse(stream, context, path) 4784 position2 = stream_tell(stream, path) 4785 return (position2-position1) + count * subcon._sizeof(context, path) 4786 macro._actualsize = _actualsize 4787 4788 def _emitseq(ksy, bitwise): 4789 return [ 4790 dict(id="countfield", type=countfield._compileprimitivetype(ksy, bitwise)), 4791 dict(id="data", type=subcon._compileprimitivetype(ksy, bitwise), repeat="expr", repeat_expr="countfield"), 4792 ] 4793 macro._emitseq = _emitseq 4794 4795 return macro 4796 4797 4798class FixedSized(Subconstruct): 4799 r""" 4800 Restricts parsing to specified amount of bytes. 4801 4802 Parsing reads `length` bytes, then defers to subcon using new BytesIO with said bytes. Building builds the subcon using new BytesIO, then writes said data and additional null bytes accordingly. Size is same as `length`, although negative amount raises an error. 4803 4804 :param length: integer or context lambda, total amount of bytes (both data and padding) 4805 :param subcon: Construct instance 4806 4807 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4808 :raises PaddingError: length is negative 4809 :raises PaddingError: subcon written more bytes than entire length (negative padding) 4810 4811 Can propagate any exception from the lambda, possibly non-ConstructError. 4812 4813 Example:: 4814 4815 >>> d = FixedSized(10, Byte) 4816 >>> d.parse(b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00') 4817 255 4818 >>> d.build(255) 4819 b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00' 4820 >>> d.sizeof() 4821 10 4822 """ 4823 4824 def __init__(self, length, subcon): 4825 super().__init__(subcon) 4826 self.length = length 4827 4828 def _parse(self, stream, context, path): 4829 length = evaluate(self.length, context) 4830 if length < 0: 4831 raise PaddingError("length cannot be negative", path=path) 4832 data = stream_read(stream, length, path) 4833 if self.subcon is GreedyBytes: 4834 return data 4835 if type(self.subcon) is GreedyString: 4836 return data.decode(self.subcon.encoding) 4837 return self.subcon._parsereport(io.BytesIO(data), context, path) 4838 4839 def _build(self, obj, stream, context, path): 4840 length = evaluate(self.length, context) 4841 if length < 0: 4842 raise PaddingError("length cannot be negative", path=path) 4843 stream2 = io.BytesIO() 4844 buildret = self.subcon._build(obj, stream2, context, path) 4845 data = stream2.getvalue() 4846 pad = length - len(data) 4847 if pad < 0: 4848 raise PaddingError("subcon build %d bytes but was allowed only %d" % (len(data), length), path=path) 4849 stream_write(stream, data, len(data), path) 4850 stream_write(stream, bytes(pad), pad, path) 4851 return buildret 4852 4853 def _sizeof(self, context, path): 4854 length = evaluate(self.length, context) 4855 if length < 0: 4856 raise PaddingError("length cannot be negative", path=path) 4857 return length 4858 4859 def _emitparse(self, code): 4860 return f"restream(io.read({self.length}), lambda io: ({self.subcon._compileparse(code)}))" 4861 4862 def _emitfulltype(self, ksy, bitwise): 4863 return dict(size=repr(self.length).replace("this.",""), **self.subcon._compilefulltype(ksy, bitwise)) 4864 4865 4866class NullTerminated(Subconstruct): 4867 r""" 4868 Restricts parsing to bytes preceding a null byte. 4869 4870 Parsing reads one byte at a time and accumulates it with previous bytes. When term was found, (by default) consumes but discards the term. When EOF was found, (by default) raises same StreamError exception. Then subcon is parsed using new BytesIO made with said data. Building builds the subcon and then writes the term. Size is undefined. 4871 4872 The term can be multiple bytes, to support string classes with UTF16/32 encodings. 4873 4874 :param subcon: Construct instance 4875 :param term: optional, bytes, terminator byte-string, default is \x00 single null byte 4876 :param include: optional, bool, if to include terminator in resulting data, default is False 4877 :param consume: optional, bool, if to consume terminator or leave it in the stream, default is True 4878 :param require: optional, bool, if EOF results in failure or not, default is True 4879 4880 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 4881 :raises StreamError: encountered EOF but require is not disabled 4882 :raises PaddingError: terminator is less than 1 bytes in length 4883 4884 Example:: 4885 4886 >>> d = NullTerminated(Byte) 4887 >>> d.parse(b'\xff\x00') 4888 255 4889 >>> d.build(255) 4890 b'\xff\x00' 4891 """ 4892 4893 def __init__(self, subcon, term=b"\x00", include=False, consume=True, require=True): 4894 super().__init__(subcon) 4895 self.term = term 4896 self.include = include 4897 self.consume = consume 4898 self.require = require 4899 4900 def _parse(self, stream, context, path): 4901 term = self.term 4902 unit = len(term) 4903 if unit < 1: 4904 raise PaddingError("NullTerminated term must be at least 1 byte", path=path) 4905 data = b'' 4906 while True: 4907 try: 4908 b = stream_read(stream, unit, path) 4909 except StreamError: 4910 if self.require: 4911 raise 4912 else: 4913 break 4914 if b == term: 4915 if self.include: 4916 data += b 4917 if not self.consume: 4918 stream_seek(stream, -unit, 1, path) 4919 break 4920 data += b 4921 if self.subcon is GreedyBytes: 4922 return data 4923 if type(self.subcon) is GreedyString: 4924 return data.decode(self.subcon.encoding) 4925 return self.subcon._parsereport(io.BytesIO(data), context, path) 4926 4927 def _build(self, obj, stream, context, path): 4928 buildret = self.subcon._build(obj, stream, context, path) 4929 stream_write(stream, self.term, len(self.term), path) 4930 return buildret 4931 4932 def _sizeof(self, context, path): 4933 raise SizeofError(path=path) 4934 4935 def _emitfulltype(self, ksy, bitwise): 4936 if len(self.term) > 1: 4937 raise NotImplementedError 4938 return dict(terminator=byte2int(self.term), include=self.include, consume=self.consume, eos_error=self.require, **self.subcon._compilefulltype(ksy, bitwise)) 4939 4940 4941class NullStripped(Subconstruct): 4942 r""" 4943 Restricts parsing to bytes except padding left of EOF. 4944 4945 Parsing reads entire stream, then strips the data from right to left of null bytes, then parses subcon using new BytesIO made of said data. Building defers to subcon as-is. Size is undefined, because it reads till EOF. 4946 4947 The pad can be multiple bytes, to support string classes with UTF16/32 encodings. 4948 4949 :param subcon: Construct instance 4950 :param pad: optional, bytes, padding byte-string, default is \x00 single null byte 4951 4952 :raises PaddingError: pad is less than 1 bytes in length 4953 4954 Example:: 4955 4956 >>> d = NullStripped(Byte) 4957 >>> d.parse(b'\xff\x00\x00') 4958 255 4959 >>> d.build(255) 4960 b'\xff' 4961 """ 4962 4963 def __init__(self, subcon, pad=b"\x00"): 4964 super().__init__(subcon) 4965 self.pad = pad 4966 4967 def _parse(self, stream, context, path): 4968 pad = self.pad 4969 unit = len(pad) 4970 if unit < 1: 4971 raise PaddingError("NullStripped pad must be at least 1 byte", path=path) 4972 data = stream_read_entire(stream, path) 4973 if unit == 1: 4974 data = data.rstrip(pad) 4975 else: 4976 tailunit = len(data) % unit 4977 end = len(data) 4978 if tailunit and data[-tailunit:] == pad[:tailunit]: 4979 end -= tailunit 4980 while end-unit >= 0 and data[end-unit:end] == pad: 4981 end -= unit 4982 data = data[:end] 4983 if self.subcon is GreedyBytes: 4984 return data 4985 if type(self.subcon) is GreedyString: 4986 return data.decode(self.subcon.encoding) 4987 return self.subcon._parsereport(io.BytesIO(data), context, path) 4988 4989 def _build(self, obj, stream, context, path): 4990 return self.subcon._build(obj, stream, context, path) 4991 4992 def _sizeof(self, context, path): 4993 raise SizeofError(path=path) 4994 4995 def _emitfulltype(self, ksy, bitwise): 4996 if len(self.pad) > 1: 4997 raise NotImplementedError 4998 return dict(pad_right=byte2int(self.pad), **self.subcon._compilefulltype(ksy, bitwise)) 4999 5000 5001class RestreamData(Subconstruct): 5002 r""" 5003 Parses a field on external data (but does not build). 5004 5005 Parsing defers to subcon, but provides it a separate BytesIO stream based on data provided by datafunc (a bytes literal or another BytesIO stream or Construct instances that returns bytes or context lambda). Building does nothing. Size is 0 because as far as other fields see it, this field does not produce or consume any bytes from the stream. 5006 5007 :param datafunc: bytes or BytesIO or Construct instance (that parses into bytes) or context lambda, provides data for subcon to parse from 5008 :param subcon: Construct instance 5009 5010 Can propagate any exception from the lambdas, possibly non-ConstructError. 5011 5012 Example:: 5013 5014 >>> d = RestreamData(b"\x01", Int8ub) 5015 >>> d.parse(b"") 5016 1 5017 >>> d.build(0) 5018 b'' 5019 5020 >>> d = RestreamData(NullTerminated(GreedyBytes), Int16ub) 5021 >>> d.parse(b"\x01\x02\x00") 5022 0x0102 5023 >>> d = RestreamData(FixedSized(2, GreedyBytes), Int16ub) 5024 >>> d.parse(b"\x01\x02\x00") 5025 0x0102 5026 """ 5027 5028 def __init__(self, datafunc, subcon): 5029 super().__init__(subcon) 5030 self.datafunc = datafunc 5031 self.flagbuildnone = True 5032 5033 def _parse(self, stream, context, path): 5034 data = evaluate(self.datafunc, context) 5035 if isinstance(data, bytestringtype): 5036 stream2 = io.BytesIO(data) 5037 if isinstance(data, io.BytesIO): 5038 stream2 = data 5039 if isinstance(data, Construct): 5040 stream2 = io.BytesIO(data._parsereport(stream, context, path)) 5041 return self.subcon._parsereport(stream2, context, path) 5042 5043 def _build(self, obj, stream, context, path): 5044 return obj 5045 5046 def _sizeof(self, context, path): 5047 return 0 5048 5049 def _emitparse(self, code): 5050 return "restream(%r, lambda io: %s)" % (self.datafunc, self.subcon._compileparse(code), ) 5051 5052 5053class Transformed(Subconstruct): 5054 r""" 5055 Transforms bytes between the underlying stream and the (fixed-sized) subcon. 5056 5057 Parsing reads a specified amount (or till EOF), processes data using a bytes-to-bytes decoding function, then parses subcon using those data. Building does build subcon into separate bytes, then processes it using encoding bytes-to-bytes function, then writes those data into main stream. Size is reported as `decodeamount` or `encodeamount` if those are equal, otherwise its SizeofError. 5058 5059 Used internally to implement :class:`~construct.core.Bitwise` :class:`~construct.core.Bytewise` :class:`~construct.core.ByteSwapped` :class:`~construct.core.BitsSwapped` . 5060 5061 Possible use-cases include encryption, obfuscation, byte-level encoding. 5062 5063 .. warning:: Remember that subcon must consume (or produce) an amount of bytes that is same as `decodeamount` (or `encodeamount`). 5064 5065 .. warning:: Do NOT use seeking/telling classes inside Transformed context. 5066 5067 :param subcon: Construct instance 5068 :param decodefunc: bytes-to-bytes function, applied before parsing subcon 5069 :param decodeamount: integer, amount of bytes to read 5070 :param encodefunc: bytes-to-bytes function, applied after building subcon 5071 :param encodeamount: integer, amount of bytes to write 5072 5073 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 5074 :raises StreamError: subcon build and encoder transformed more or less than `encodeamount` bytes, if amount is specified 5075 :raises StringError: building from non-bytes value, perhaps unicode 5076 5077 Can propagate any exception from the lambdas, possibly non-ConstructError. 5078 5079 Example:: 5080 5081 >>> d = Transformed(Bytes(16), bytes2bits, 2, bits2bytes, 2) 5082 >>> d.parse(b"\x00\x00") 5083 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 5084 5085 >>> d = Transformed(GreedyBytes, bytes2bits, None, bits2bytes, None) 5086 >>> d.parse(b"\x00\x00") 5087 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 5088 """ 5089 5090 def __init__(self, subcon, decodefunc, decodeamount, encodefunc, encodeamount): 5091 super().__init__(subcon) 5092 self.decodefunc = decodefunc 5093 self.decodeamount = decodeamount 5094 self.encodefunc = encodefunc 5095 self.encodeamount = encodeamount 5096 5097 def _parse(self, stream, context, path): 5098 if isinstance(self.decodeamount, type(None)): 5099 data = stream_read_entire(stream, path) 5100 if isinstance(self.decodeamount, integertypes): 5101 data = stream_read(stream, self.decodeamount, path) 5102 data = self.decodefunc(data) 5103 if self.subcon is GreedyBytes: 5104 return data 5105 if type(self.subcon) is GreedyString: 5106 return data.decode(self.subcon.encoding) 5107 return self.subcon._parsereport(io.BytesIO(data), context, path) 5108 5109 def _build(self, obj, stream, context, path): 5110 stream2 = io.BytesIO() 5111 buildret = self.subcon._build(obj, stream2, context, path) 5112 data = stream2.getvalue() 5113 data = self.encodefunc(data) 5114 if isinstance(self.encodeamount, integertypes): 5115 if len(data) != self.encodeamount: 5116 raise StreamError("encoding transformation produced wrong amount of bytes, %s instead of expected %s" % (len(data), self.encodeamount, ), path=path) 5117 stream_write(stream, data, len(data), path) 5118 return buildret 5119 5120 def _sizeof(self, context, path): 5121 if self.decodeamount is None or self.encodeamount is None: 5122 raise SizeofError(path=path) 5123 if self.decodeamount == self.encodeamount: 5124 return self.encodeamount 5125 raise SizeofError(path=path) 5126 5127 5128class Restreamed(Subconstruct): 5129 r""" 5130 Transforms bytes between the underlying stream and the (variable-sized) subcon. 5131 5132 Used internally to implement :class:`~construct.core.Bitwise` :class:`~construct.core.Bytewise` :class:`~construct.core.ByteSwapped` :class:`~construct.core.BitsSwapped` . 5133 5134 .. warning:: Remember that subcon must consume or produce an amount of bytes that is a multiple of encoding or decoding units. For example, in a Bitwise context you should process a multiple of 8 bits or the stream will fail during parsing/building. 5135 5136 .. warning:: Do NOT use seeking/telling classes inside Restreamed context. 5137 5138 :param subcon: Construct instance 5139 :param decoder: bytes-to-bytes function, used on data chunks when parsing 5140 :param decoderunit: integer, decoder takes chunks of this size 5141 :param encoder: bytes-to-bytes function, used on data chunks when building 5142 :param encoderunit: integer, encoder takes chunks of this size 5143 :param sizecomputer: function that computes amount of bytes outputed 5144 5145 Can propagate any exception from the lambda, possibly non-ConstructError. 5146 Can also raise arbitrary exceptions in RestreamedBytesIO implementation. 5147 5148 Example:: 5149 5150 Bitwise <--> Restreamed(subcon, bits2bytes, 8, bytes2bits, 1, lambda n: n//8) 5151 Bytewise <--> Restreamed(subcon, bytes2bits, 1, bits2bytes, 8, lambda n: n*8) 5152 """ 5153 5154 def __init__(self, subcon, decoder, decoderunit, encoder, encoderunit, sizecomputer): 5155 super().__init__(subcon) 5156 self.decoder = decoder 5157 self.decoderunit = decoderunit 5158 self.encoder = encoder 5159 self.encoderunit = encoderunit 5160 self.sizecomputer = sizecomputer 5161 5162 def _parse(self, stream, context, path): 5163 stream2 = RestreamedBytesIO(stream, self.decoder, self.decoderunit, self.encoder, self.encoderunit) 5164 obj = self.subcon._parsereport(stream2, context, path) 5165 stream2.close() 5166 return obj 5167 5168 def _build(self, obj, stream, context, path): 5169 stream2 = RestreamedBytesIO(stream, self.decoder, self.decoderunit, self.encoder, self.encoderunit) 5170 buildret = self.subcon._build(obj, stream2, context, path) 5171 stream2.close() 5172 return obj 5173 5174 def _sizeof(self, context, path): 5175 if self.sizecomputer is None: 5176 raise SizeofError("Restreamed cannot calculate size without a sizecomputer", path=path) 5177 else: 5178 return self.sizecomputer(self.subcon._sizeof(context, path)) 5179 5180 5181class ProcessXor(Subconstruct): 5182 r""" 5183 Transforms bytes between the underlying stream and the subcon. 5184 5185 Used internally by KaitaiStruct compiler, when translating `process: xor` tags. 5186 5187 Parsing reads till EOF, xors data with the pad, then feeds that data into subcon. Building first builds the subcon into separate BytesIO stream, xors data with the pad, then writes that data into the main stream. Size is the same as subcon, unless it raises SizeofError. 5188 5189 :param padfunc: integer or bytes or context lambda, single or multiple bytes to xor data with 5190 :param subcon: Construct instance 5191 5192 :raises StringError: pad is not integer or bytes 5193 5194 Can propagate any exception from the lambda, possibly non-ConstructError. 5195 5196 Example:: 5197 5198 >>> d = ProcessXor(0xf0 or b'\xf0', Int16ub) 5199 >>> d.parse(b"\x00\xff") 5200 0xf00f 5201 >>> d.sizeof() 5202 2 5203 """ 5204 5205 def __init__(self, padfunc, subcon): 5206 super().__init__(subcon) 5207 self.padfunc = padfunc 5208 5209 def _parse(self, stream, context, path): 5210 pad = evaluate(self.padfunc, context) 5211 if not isinstance(pad, (integertypes, bytestringtype)): 5212 raise StringError("ProcessXor needs integer or bytes pad", path=path) 5213 if isinstance(pad, bytestringtype) and len(pad) == 1: 5214 pad = byte2int(pad) 5215 data = stream_read_entire(stream, path) 5216 if isinstance(pad, integertypes): 5217 if not (pad == 0): 5218 data = integers2bytes( (b ^ pad) for b in data ) 5219 if isinstance(pad, bytestringtype): 5220 if not (len(pad) <= 64 and pad == bytes(len(pad))): 5221 data = integers2bytes( (b ^ p) for b,p in zip(data, itertools.cycle(pad)) ) 5222 if self.subcon is GreedyBytes: 5223 return data 5224 if type(self.subcon) is GreedyString: 5225 return data.decode(self.subcon.encoding) 5226 return self.subcon._parsereport(io.BytesIO(data), context, path) 5227 5228 def _build(self, obj, stream, context, path): 5229 pad = evaluate(self.padfunc, context) 5230 if not isinstance(pad, (integertypes, bytestringtype)): 5231 raise StringError("ProcessXor needs integer or bytes pad", path=path) 5232 if isinstance(pad, bytestringtype) and len(pad) == 1: 5233 pad = byte2int(pad) 5234 stream2 = io.BytesIO() 5235 buildret = self.subcon._build(obj, stream2, context, path) 5236 data = stream2.getvalue() 5237 if isinstance(pad, integertypes): 5238 if not (pad == 0): 5239 data = integers2bytes( (b ^ pad) for b in data ) 5240 if isinstance(pad, bytestringtype): 5241 if not (len(pad) <= 64 and pad == bytes(len(pad))): 5242 data = integers2bytes( (b ^ p) for b,p in zip(data, itertools.cycle(pad)) ) 5243 stream_write(stream, data, len(data), path) 5244 return buildret 5245 5246 def _sizeof(self, context, path): 5247 return self.subcon._sizeof(context, path) 5248 5249 5250class ProcessRotateLeft(Subconstruct): 5251 r""" 5252 Transforms bytes between the underlying stream and the subcon. 5253 5254 Used internally by KaitaiStruct compiler, when translating `process: rol/ror` tags. 5255 5256 Parsing reads till EOF, rotates (shifts) the data *left* by amount in bits, then feeds that data into subcon. Building first builds the subcon into separate BytesIO stream, rotates *right* by negating amount, then writes that data into the main stream. Size is the same as subcon, unless it raises SizeofError. 5257 5258 :param amount: integer or context lambda, shift by this amount in bits, treated modulo (group x 8) 5259 :param group: integer or context lambda, shifting is applied to chunks of this size in bytes 5260 :param subcon: Construct instance 5261 5262 :raises RotationError: group is less than 1 5263 :raises RotationError: data length is not a multiple of group size 5264 5265 Can propagate any exception from the lambda, possibly non-ConstructError. 5266 5267 Example:: 5268 5269 >>> d = ProcessRotateLeft(4, 1, Int16ub) 5270 >>> d.parse(b'\x0f\xf0') 5271 0xf00f 5272 >>> d = ProcessRotateLeft(4, 2, Int16ub) 5273 >>> d.parse(b'\x0f\xf0') 5274 0xff00 5275 >>> d.sizeof() 5276 2 5277 """ 5278 5279 # formula taken from: http://stackoverflow.com/a/812039 5280 precomputed_single_rotations = {amount: [(i << amount) & 0xff | (i >> (8-amount)) for i in range(256)] for amount in range(1,8)} 5281 5282 def __init__(self, amount, group, subcon): 5283 super().__init__(subcon) 5284 self.amount = amount 5285 self.group = group 5286 5287 def _parse(self, stream, context, path): 5288 amount = evaluate(self.amount, context) 5289 group = evaluate(self.group, context) 5290 if group < 1: 5291 raise RotationError("group size must be at least 1 to be valid", path=path) 5292 5293 amount = amount % (group * 8) 5294 amount_bytes = amount // 8 5295 data = stream_read_entire(stream, path) 5296 data_ints = bytes2integers(data) 5297 5298 if len(data) % group != 0: 5299 raise RotationError("data length must be a multiple of group size", path=path) 5300 5301 if amount == 0: 5302 pass 5303 5304 elif group == 1: 5305 translate = ProcessRotateLeft.precomputed_single_rotations[amount] 5306 data = integers2bytes( translate[a] for a in data_ints ) 5307 5308 elif amount % 8 == 0: 5309 indices = [(i + amount_bytes) % group for i in range(group)] 5310 data = integers2bytes( data_ints[i+k] for i in range(0,len(data),group) for k in indices ) 5311 5312 else: 5313 amount1 = amount % 8 5314 amount2 = 8 - amount1 5315 indices_pairs = [ ((i+amount_bytes) % group, (i+1+amount_bytes) % group) for i in range(group)] 5316 data = integers2bytes( (data_ints[i+k1] << amount1) & 0xff | (data_ints[i+k2] >> amount2) for i in range(0,len(data),group) for k1,k2 in indices_pairs ) 5317 5318 if self.subcon is GreedyBytes: 5319 return data 5320 if type(self.subcon) is GreedyString: 5321 return data.decode(self.subcon.encoding) 5322 return self.subcon._parsereport(io.BytesIO(data), context, path) 5323 5324 def _build(self, obj, stream, context, path): 5325 amount = evaluate(self.amount, context) 5326 group = evaluate(self.group, context) 5327 if group < 1: 5328 raise RotationError("group size must be at least 1 to be valid", path=path) 5329 5330 amount = -amount % (group * 8) 5331 amount_bytes = amount // 8 5332 stream2 = io.BytesIO() 5333 buildret = self.subcon._build(obj, stream2, context, path) 5334 data = stream2.getvalue() 5335 data_ints = bytes2integers(data) 5336 5337 if len(data) % group != 0: 5338 raise RotationError("data length must be a multiple of group size", path=path) 5339 5340 if amount == 0: 5341 pass 5342 5343 elif group == 1: 5344 translate = ProcessRotateLeft.precomputed_single_rotations[amount] 5345 data = integers2bytes( translate[a] for a in data_ints ) 5346 5347 elif amount % 8 == 0: 5348 indices = [(i + amount_bytes) % group for i in range(group)] 5349 data = integers2bytes( data_ints[i+k] for i in range(0,len(data),group) for k in indices ) 5350 5351 else: 5352 amount1 = amount % 8 5353 amount2 = 8 - amount1 5354 indices_pairs = [ ((i+amount_bytes) % group, (i+1+amount_bytes) % group) for i in range(group)] 5355 data = integers2bytes( (data_ints[i+k1] << amount1) & 0xff | (data_ints[i+k2] >> amount2) for i in range(0,len(data),group) for k1,k2 in indices_pairs ) 5356 5357 stream_write(stream, data, len(data), path) 5358 return buildret 5359 5360 def _sizeof(self, context, path): 5361 return self.subcon._sizeof(context, path) 5362 5363 5364class Checksum(Construct): 5365 r""" 5366 Field that is build or validated by a hash of a given byte range. Usually used with :class:`~construct.core.RawCopy` . 5367 5368 Parsing compares parsed subcon `checksumfield` with a context entry provided by `bytesfunc` and transformed by `hashfunc`. Building fetches the contect entry, transforms it, then writes is using subcon. Size is same as subcon. 5369 5370 :param checksumfield: a subcon field that reads the checksum, usually Bytes(int) 5371 :param hashfunc: function that takes bytes and returns whatever checksumfield takes when building, usually from hashlib module 5372 :param bytesfunc: context lambda that returns bytes (or object) to be hashed, usually like this.rawcopy1.data 5373 5374 :raises ChecksumError: parsing and actual checksum does not match actual data 5375 5376 Can propagate any exception from the lambdas, possibly non-ConstructError. 5377 5378 Example:: 5379 5380 import hashlib 5381 d = Struct( 5382 "fields" / RawCopy(Struct( 5383 Padding(1000), 5384 )), 5385 "checksum" / Checksum(Bytes(64), 5386 lambda data: hashlib.sha512(data).digest(), 5387 this.fields.data), 5388 ) 5389 d.build(dict(fields=dict(value={}))) 5390 5391 :: 5392 5393 import hashlib 5394 d = Struct( 5395 "offset" / Tell, 5396 "checksum" / Padding(64), 5397 "fields" / RawCopy(Struct( 5398 Padding(1000), 5399 )), 5400 "checksum" / Pointer(this.offset, Checksum(Bytes(64), 5401 lambda data: hashlib.sha512(data).digest(), 5402 this.fields.data)), 5403 ) 5404 d.build(dict(fields=dict(value={}))) 5405 """ 5406 5407 def __init__(self, checksumfield, hashfunc, bytesfunc): 5408 super().__init__() 5409 self.checksumfield = checksumfield 5410 self.hashfunc = hashfunc 5411 self.bytesfunc = bytesfunc 5412 self.flagbuildnone = True 5413 5414 def _parse(self, stream, context, path): 5415 hash1 = self.checksumfield._parsereport(stream, context, path) 5416 hash2 = self.hashfunc(self.bytesfunc(context)) 5417 if hash1 != hash2: 5418 raise ChecksumError( 5419 "wrong checksum, read %r, computed %r" % ( 5420 hash1 if not isinstance(hash1,bytestringtype) else binascii.hexlify(hash1), 5421 hash2 if not isinstance(hash2,bytestringtype) else binascii.hexlify(hash2), ), 5422 path=path 5423 ) 5424 return hash1 5425 5426 def _build(self, obj, stream, context, path): 5427 hash2 = self.hashfunc(self.bytesfunc(context)) 5428 self.checksumfield._build(hash2, stream, context, path) 5429 return hash2 5430 5431 def _sizeof(self, context, path): 5432 return self.checksumfield._sizeof(context, path) 5433 5434 5435class Compressed(Tunnel): 5436 r""" 5437 Compresses and decompresses underlying stream when processing subcon. When parsing, entire stream is consumed. When building, it puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` . 5438 5439 Parsing and building transforms all bytes using a specified codec. Since data is processed until EOF, it behaves similar to `GreedyBytes`. Size is undefined. 5440 5441 :param subcon: Construct instance, subcon used for storing the value 5442 :param encoding: string, any of module names like zlib/gzip/bzip2/lzma, otherwise any of codecs module bytes<->bytes encodings, each codec usually requires some Python version 5443 :param level: optional, integer between 0..9, although lzma discards it, some encoders allow different compression levels 5444 5445 :raises ImportError: needed module could not be imported by ctor 5446 :raises StreamError: stream failed when reading until EOF 5447 5448 Example:: 5449 5450 >>> d = Prefixed(VarInt, Compressed(GreedyBytes, "zlib")) 5451 >>> d.build(bytes(100)) 5452 b'\x0cx\x9cc`\xa0=\x00\x00\x00d\x00\x01' 5453 >>> len(_) 5454 13 5455 """ 5456 5457 def __init__(self, subcon, encoding, level=None): 5458 super().__init__(subcon) 5459 self.encoding = encoding 5460 self.level = level 5461 if self.encoding == "zlib": 5462 import zlib 5463 self.lib = zlib 5464 elif self.encoding == "gzip": 5465 import gzip 5466 self.lib = gzip 5467 elif self.encoding == "bzip2": 5468 import bz2 5469 self.lib = bz2 5470 elif self.encoding == "lzma": 5471 import lzma 5472 self.lib = lzma 5473 else: 5474 import codecs 5475 self.lib = codecs 5476 5477 def _decode(self, data, context, path): 5478 if self.encoding in ("zlib", "gzip", "bzip2", "lzma"): 5479 return self.lib.decompress(data) 5480 return self.lib.decode(data, self.encoding) 5481 5482 def _encode(self, data, context, path): 5483 if self.encoding in ("zlib", "gzip", "bzip2", "lzma"): 5484 if self.level is None or self.encoding == "lzma": 5485 return self.lib.compress(data) 5486 else: 5487 return self.lib.compress(data, self.level) 5488 return self.lib.encode(data, self.encoding) 5489 5490 5491class CompressedLZ4(Tunnel): 5492 r""" 5493 Compresses and decompresses underlying stream before processing subcon. When parsing, entire stream is consumed. When building, it puts compressed bytes without marking the end. This construct should be used with :class:`~construct.core.Prefixed` . 5494 5495 Parsing and building transforms all bytes using LZ4 library. Since data is processed until EOF, it behaves similar to `GreedyBytes`. Size is undefined. 5496 5497 :param subcon: Construct instance, subcon used for storing the value 5498 5499 :raises ImportError: needed module could not be imported by ctor 5500 :raises StreamError: stream failed when reading until EOF 5501 5502 Can propagate lz4.frame exceptions. 5503 5504 Example:: 5505 5506 >>> d = Prefixed(VarInt, CompressedLZ4(GreedyBytes)) 5507 >>> d.build(bytes(100)) 5508 b'"\x04"M\x18h@d\x00\x00\x00\x00\x00\x00\x00#\x0b\x00\x00\x00\x1f\x00\x01\x00KP\x00\x00\x00\x00\x00\x00\x00\x00\x00' 5509 >>> len(_) 5510 35 5511 """ 5512 5513 def __init__(self, subcon): 5514 super().__init__(subcon) 5515 import lz4.frame 5516 self.lib = lz4.frame 5517 5518 def _decode(self, data, context, path): 5519 return self.lib.decompress(data) 5520 5521 def _encode(self, data, context, path): 5522 return self.lib.compress(data) 5523 5524 5525class Rebuffered(Subconstruct): 5526 r""" 5527 Caches bytes from underlying stream, so it becomes seekable and tellable, and also becomes blocking on reading. Useful for processing non-file streams like pipes, sockets, etc. 5528 5529 .. warning:: Experimental implementation. May not be mature enough. 5530 5531 :param subcon: Construct instance, subcon which will operate on the buffered stream 5532 :param tailcutoff: optional, integer, amount of bytes kept in buffer, by default buffers everything 5533 5534 Can also raise arbitrary exceptions in its implementation. 5535 5536 Example:: 5537 5538 Rebuffered(..., tailcutoff=1024).parse_stream(nonseekable_stream) 5539 """ 5540 5541 def __init__(self, subcon, tailcutoff=None): 5542 super().__init__(subcon) 5543 self.stream2 = RebufferedBytesIO(None, tailcutoff=tailcutoff) 5544 5545 def _parse(self, stream, context, path): 5546 self.stream2.substream = stream 5547 return self.subcon._parsereport(self.stream2, context, path) 5548 5549 def _build(self, obj, stream, context, path): 5550 self.stream2.substream = stream 5551 return self.subcon._build(obj, self.stream2, context, path) 5552 5553 5554#=============================================================================== 5555# lazy equivalents 5556#=============================================================================== 5557class Lazy(Subconstruct): 5558 r""" 5559 Lazyfies a field. 5560 5561 This wrapper allows you to do lazy parsing of individual fields inside a normal Struct (without using LazyStruct which may not work in every scenario). It is also used by KaitaiStruct compiler to emit `instances` because those are not processed greedily, and they may refer to other not yet parsed fields. Those are 2 entirely different applications but semantics are the same. 5562 5563 Parsing saves the current stream offset and returns a lambda. If and when that lambda gets evaluated, it seeks the stream to then-current position, parses the subcon, and seeks the stream back to previous position. Building evaluates that lambda into an object (if needed), then defers to subcon. Size also defers to subcon. 5564 5565 :param subcon: Construct instance 5566 5567 :raises StreamError: requested reading negative amount, could not read enough bytes, requested writing different amount than actual data, or could not write all bytes 5568 :raises StreamError: stream is not seekable and tellable 5569 5570 Example:: 5571 5572 >>> d = Lazy(Byte) 5573 >>> x = d.parse(b'\x00') 5574 >>> x 5575 <function construct.core.Lazy._parse.<locals>.execute> 5576 >>> x() 5577 0 5578 >>> d.build(0) 5579 b'\x00' 5580 >>> d.build(x) 5581 b'\x00' 5582 >>> d.sizeof() 5583 1 5584 """ 5585 5586 def __init__(self, subcon): 5587 super().__init__(subcon) 5588 5589 def _parse(self, stream, context, path): 5590 offset = stream_tell(stream, path) 5591 def execute(): 5592 fallback = stream_tell(stream, path) 5593 stream_seek(stream, offset, 0, path) 5594 obj = self.subcon._parsereport(stream, context, path) 5595 stream_seek(stream, fallback, 0, path) 5596 return obj 5597 len = self.subcon._actualsize(self, context, path) 5598 stream_seek(stream, len, 1, path) 5599 return execute 5600 5601 def _build(self, obj, stream, context, path): 5602 if callable(obj): 5603 obj = obj() 5604 return self.subcon._build(obj, stream, context, path) 5605 5606 5607class LazyContainer(dict): 5608 """Used internally.""" 5609 5610 def __init__(self, struct, stream, offsets, values, context, path): 5611 self._struct = struct 5612 self._stream = stream 5613 self._offsets = offsets 5614 self._values = values 5615 self._context = context 5616 self._path = path 5617 5618 def __getattr__(self, name): 5619 if name in self._struct._subconsindexes: 5620 return self[name] 5621 raise AttributeError 5622 5623 def __getitem__(self, index): 5624 if isinstance(index, stringtypes): 5625 index = self._struct._subconsindexes[index] # KeyError 5626 if index in self._values: 5627 return self._values[index] 5628 stream_seek(self._stream, self._offsets[index], 0, self._path) # KeyError 5629 parseret = self._struct.subcons[index]._parsereport(self._stream, self._context, self._path) 5630 self._values[index] = parseret 5631 return parseret 5632 5633 def __len__(self): 5634 return len(self._struct.subcons) 5635 5636 def keys(self): 5637 return iter(self._struct._subcons) 5638 5639 def values(self): 5640 return (self[k] for k in self._struct._subcons) 5641 5642 def items(self): 5643 return ((k, self[k]) for k in self._struct._subcons) 5644 5645 __iter__ = keys 5646 5647 def __eq__(self, other): 5648 return Container.__eq__(self, other) 5649 5650 def __repr__(self): 5651 return "<LazyContainer: %s items cached, %s subcons>" % (len(self._values), len(self._struct.subcons), ) 5652 5653 5654class LazyStruct(Construct): 5655 r""" 5656 Equivalent to :class:`~construct.core.Struct`, but when this class is parsed, most fields are not parsed (they are skipped if their size can be measured by _actualsize or _sizeof method). See its docstring for details. 5657 5658 Fields are parsed depending on some factors: 5659 5660 * Some fields like Int* Float* Bytes(5) Array(5,Byte) Pointer are fixed-size and are therefore skipped. Stream is not read. 5661 * Some fields like Bytes(this.field) are variable-size but their size is known during parsing when there is a corresponding context entry. Those fields are also skipped. Stream is not read. 5662 * Some fields like Prefixed PrefixedArray PascalString are variable-size but their size can be computed by partially reading the stream. Only first few bytes are read (the lengthfield). 5663 * Other fields like VarInt need to be parsed. Stream position that is left after the field was parsed is used. 5664 * Some fields may not work properly, due to the fact that this class attempts to skip fields, and parses them only out of necessity. Miscellaneous fields often have size defined as 0, and fixed sized fields are skippable. 5665 5666 Note there are restrictions: 5667 5668 * If a field like Bytes(this.field) references another field in the same struct, you need to access the referenced field first (to trigger its parsing) and then you can access the Bytes field. Otherwise it would fail due to missing context entry. 5669 * If a field references another field within inner (nested) or outer (super) struct, things may break. Context is nested, but this class was not rigorously tested in that manner. 5670 5671 Building and sizeof are greedy, like in Struct. 5672 5673 :param \*subcons: Construct instances, list of members, some can be anonymous 5674 :param \*\*subconskw: Construct instances, list of members (requires Python 3.6) 5675 """ 5676 5677 def __init__(self, *subcons, **subconskw): 5678 super().__init__() 5679 self.subcons = list(subcons) + list(k/v for k,v in subconskw.items()) 5680 self._subcons = Container((sc.name,sc) for sc in self.subcons if sc.name) 5681 self._subconsindexes = Container((sc.name,i) for i,sc in enumerate(self.subcons) if sc.name) 5682 self.flagbuildnone = all(sc.flagbuildnone for sc in self.subcons) 5683 5684 def __getattr__(self, name): 5685 if name in self._subcons: 5686 return self._subcons[name] 5687 raise AttributeError 5688 5689 def _parse(self, stream, context, path): 5690 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 5691 context._root = context._.get("_root", context) 5692 offset = stream_tell(stream, path) 5693 offsets = {0: offset} 5694 values = {} 5695 for i,sc in enumerate(self.subcons): 5696 try: 5697 offset += sc._actualsize(stream, context, path) 5698 stream_seek(stream, offset, 0, path) 5699 except SizeofError: 5700 parseret = sc._parsereport(stream, context, path) 5701 values[i] = parseret 5702 if sc.name: 5703 context[sc.name] = parseret 5704 offset = stream_tell(stream, path) 5705 offsets[i+1] = offset 5706 return LazyContainer(self, stream, offsets, values, context, path) 5707 5708 def _build(self, obj, stream, context, path): 5709 # exact copy from Struct class 5710 if obj is None: 5711 obj = Container() 5712 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = stream, _index = context.get("_index", None)) 5713 context._root = context._.get("_root", context) 5714 context.update(obj) 5715 for sc in self.subcons: 5716 try: 5717 if sc.flagbuildnone: 5718 subobj = obj.get(sc.name, None) 5719 else: 5720 subobj = obj[sc.name] # raises KeyError 5721 5722 if sc.name: 5723 context[sc.name] = subobj 5724 5725 buildret = sc._build(subobj, stream, context, path) 5726 if sc.name: 5727 context[sc.name] = buildret 5728 except StopFieldError: 5729 break 5730 return context 5731 5732 def _sizeof(self, context, path): 5733 # exact copy from Struct class 5734 context = Container(_ = context, _params = context._params, _root = None, _parsing = context._parsing, _building = context._building, _sizing = context._sizing, _subcons = self._subcons, _io = None, _index = context.get("_index", None)) 5735 context._root = context._.get("_root", context) 5736 try: 5737 return sum(sc._sizeof(context, path) for sc in self.subcons) 5738 except (KeyError, AttributeError): 5739 raise SizeofError("cannot calculate size, key not found in context", path=path) 5740 5741 5742class LazyListContainer(list): 5743 """Used internally.""" 5744 5745 def __init__(self, subcon, stream, count, offsets, values, context, path): 5746 self._subcon = subcon 5747 self._stream = stream 5748 self._count = count 5749 self._offsets = offsets 5750 self._values = values 5751 self._context = context 5752 self._path = path 5753 5754 def __getitem__(self, index): 5755 if isinstance(index, slice): 5756 return [self[i] for i in range(*index.indices(self._count))] 5757 if index in self._values: 5758 return self._values[index] 5759 stream_seek(self._stream, self._offsets[index], 0, self._path) # KeyError 5760 parseret = self._subcon._parsereport(self._stream, self._context, self._path) 5761 self._values[index] = parseret 5762 return parseret 5763 5764 def __getslice__(self, start, stop): 5765 if stop == sys.maxsize: 5766 stop = self._count 5767 return self.__getitem__(slice(start, stop)) 5768 5769 def __len__(self): 5770 return self._count 5771 5772 def __iter__(self): 5773 return (self[i] for i in range(self._count)) 5774 5775 def __eq__(self, other): 5776 return len(self) == len(other) and all(self[i] == other[i] for i in range(self._count)) 5777 5778 def __repr__(self): 5779 return "<LazyListContainer: %s of %s items cached>" % (len(self._values), self._count, ) 5780 5781 5782class LazyArray(Subconstruct): 5783 r""" 5784 Equivalent to :class:`~construct.core.Array`, but the subcon is not parsed when possible (it gets skipped if the size can be measured by _actualsize or _sizeof method). See its docstring for details. 5785 5786 Fields are parsed depending on some factors: 5787 5788 * Some fields like Int* Float* Bytes(5) Array(5,Byte) Pointer are fixed-size and are therefore skipped. Stream is not read. 5789 * Some fields like Bytes(this.field) are variable-size but their size is known during parsing when there is a corresponding context entry. Those fields are also skipped. Stream is not read. 5790 * Some fields like Prefixed PrefixedArray PascalString are variable-size but their size can be computed by partially reading the stream. Only first few bytes are read (the lengthfield). 5791 * Other fields like VarInt need to be parsed. Stream position that is left after the field was parsed is used. 5792 * Some fields may not work properly, due to the fact that this class attempts to skip fields, and parses them only out of necessity. Miscellaneous fields often have size defined as 0, and fixed sized fields are skippable. 5793 5794 Note there are restrictions: 5795 5796 * If a field references another field within inner (nested) or outer (super) struct, things may break. Context is nested, but this class was not rigorously tested in that manner. 5797 5798 Building and sizeof are greedy, like in Array. 5799 5800 :param count: integer or context lambda, strict amount of elements 5801 :param subcon: Construct instance, subcon to process individual elements 5802 """ 5803 5804 def __init__(self, count, subcon): 5805 super().__init__(subcon) 5806 self.count = count 5807 5808 def _parse(self, stream, context, path): 5809 sc = self.subcon 5810 count = self.count 5811 if callable(count): 5812 count = count(context) 5813 if not 0 <= count: 5814 raise RangeError("invalid count %s" % (count,), path=path) 5815 offset = stream_tell(stream, path) 5816 offsets = {0: offset} 5817 values = {} 5818 for i in range(count): 5819 try: 5820 offset += sc._actualsize(stream, context, path) 5821 stream_seek(stream, offset, 0, path) 5822 except SizeofError: 5823 parseret = sc._parsereport(stream, context, path) 5824 values[i] = parseret 5825 offset = stream_tell(stream, path) 5826 offsets[i+1] = offset 5827 return LazyListContainer(sc, stream, count, offsets, values, context, path) 5828 5829 def _build(self, obj, stream, context, path): 5830 # exact copy from Array class 5831 count = self.count 5832 if callable(count): 5833 count = count(context) 5834 if not 0 <= count: 5835 raise RangeError("invalid count %s" % (count,), path=path) 5836 if not len(obj) == count: 5837 raise RangeError("expected %d elements, found %d" % (count, len(obj)), path=path) 5838 retlist = ListContainer() 5839 for i,e in enumerate(obj): 5840 context._index = i 5841 buildret = self.subcon._build(e, stream, context, path) 5842 retlist.append(buildret) 5843 return retlist 5844 5845 def _sizeof(self, context, path): 5846 # exact copy from Array class 5847 try: 5848 count = self.count 5849 if callable(count): 5850 count = count(context) 5851 except (KeyError, AttributeError): 5852 raise SizeofError("cannot calculate size, key not found in context", path=path) 5853 return count * self.subcon._sizeof(context, path) 5854 5855 5856class LazyBound(Construct): 5857 r""" 5858 Field that binds to the subcon only at runtime (during parsing and building, not ctor). Useful for recursive data structures, like linked-lists and trees, where a construct needs to refer to itself (while it does not exist yet in the namespace). 5859 5860 Note that it is possible to obtain same effect without using this class, using a loop. However there are usecases where that is not possible (if remaining nodes cannot be sized-up, and there is data following the recursive structure). There is also a significant difference, namely that LazyBound actually does greedy parsing while the loop does lazy parsing. See examples. 5861 5862 To break recursion, use `If` field. See examples. 5863 5864 :param subconfunc: parameter-less lambda returning Construct instance, can also return itself 5865 5866 Example:: 5867 5868 d = Struct( 5869 "value" / Byte, 5870 "next" / If(this.value > 0, LazyBound(lambda: d)), 5871 ) 5872 >>> print(d.parse(b"\x05\x09\x00")) 5873 Container: 5874 value = 5 5875 next = Container: 5876 value = 9 5877 next = Container: 5878 value = 0 5879 next = None 5880 5881 :: 5882 5883 d = Struct( 5884 "value" / Byte, 5885 "next" / GreedyBytes, 5886 ) 5887 data = b"\x05\x09\x00" 5888 while data: 5889 x = d.parse(data) 5890 data = x.next 5891 print(x) 5892 # print outputs 5893 Container: 5894 value = 5 5895 next = \t\x00 (total 2) 5896 # print outputs 5897 Container: 5898 value = 9 5899 next = \x00 (total 1) 5900 # print outputs 5901 Container: 5902 value = 0 5903 next = (total 0) 5904 """ 5905 5906 def __init__(self, subconfunc): 5907 super().__init__() 5908 self.subconfunc = subconfunc 5909 5910 def _parse(self, stream, context, path): 5911 sc = self.subconfunc() 5912 return sc._parsereport(stream, context, path) 5913 5914 def _build(self, obj, stream, context, path): 5915 sc = self.subconfunc() 5916 return sc._build(obj, stream, context, path) 5917 5918 5919#=============================================================================== 5920# adapters and validators 5921#=============================================================================== 5922class ExprAdapter(Adapter): 5923 r""" 5924 Generic adapter that takes `decoder` and `encoder` lambdas as parameters. You can use ExprAdapter instead of writing a full-blown class deriving from Adapter when only a simple lambda is needed. 5925 5926 :param subcon: Construct instance, subcon to adapt 5927 :param decoder: lambda that takes (obj, context, path) and returns an decoded version of obj 5928 :param encoder: lambda that takes (obj, context, path) and returns an encoded version of obj 5929 5930 Example:: 5931 5932 >>> d = ExprAdapter(Byte, obj_+1, obj_-1) 5933 >>> d.parse(b'\x04') 5934 5 5935 >>> d.build(5) 5936 b'\x04' 5937 """ 5938 def __init__(self, subcon, decoder, encoder): 5939 super().__init__(subcon) 5940 self._decode = lambda obj,ctx,path: decoder(obj,ctx) 5941 self._encode = lambda obj,ctx,path: encoder(obj,ctx) 5942 5943 5944class ExprSymmetricAdapter(ExprAdapter): 5945 """ 5946 Macro around :class:`~construct.core.ExprAdapter`. 5947 5948 :param subcon: Construct instance, subcon to adapt 5949 :param encoder: lambda that takes (obj, context, path) and returns both encoded version and decoded version of obj 5950 5951 Example:: 5952 5953 >>> d = ExprSymmetricAdapter(Byte, obj_ & 0b00001111) 5954 >>> d.parse(b"\xff") 5955 15 5956 >>> d.build(255) 5957 b'\x0f' 5958 """ 5959 def __init__(self, subcon, encoder): 5960 super().__init__(subcon, encoder, encoder) 5961 5962 5963class ExprValidator(Validator): 5964 r""" 5965 Generic adapter that takes `validator` lambda as parameter. You can use ExprValidator instead of writing a full-blown class deriving from Validator when only a simple lambda is needed. 5966 5967 :param subcon: Construct instance, subcon to adapt 5968 :param validator: lambda that takes (obj, context) and returns a bool 5969 5970 Example:: 5971 5972 >>> d = ExprValidator(Byte, obj_ & 0b11111110 == 0) 5973 >>> d.build(1) 5974 b'\x01' 5975 >>> d.build(88) 5976 ValidationError: object failed validation: 88 5977 5978 """ 5979 def __init__(self, subcon, validator): 5980 super().__init__(subcon) 5981 self._validate = lambda obj,ctx,path: validator(obj,ctx) 5982 5983 5984def OneOf(subcon, valids): 5985 r""" 5986 Validates that the object is one of the listed values, both during parsing and building. 5987 5988 .. note:: For performance, `valids` should be a set/frozenset. 5989 5990 :param subcon: Construct instance, subcon to validate 5991 :param valids: collection implementing __contains__, usually a list or set 5992 5993 :raises ValidationError: parsed or build value is not among valids 5994 5995 Example:: 5996 5997 >>> d = OneOf(Byte, [1,2,3]) 5998 >>> d.parse(b"\x01") 5999 1 6000 >>> d.parse(b"\xff") 6001 construct.core.ValidationError: object failed validation: 255 6002 """ 6003 return ExprValidator(subcon, lambda obj,ctx: obj in valids) 6004 6005 6006def NoneOf(subcon, invalids): 6007 r""" 6008 Validates that the object is none of the listed values, both during parsing and building. 6009 6010 .. note:: For performance, `valids` should be a set/frozenset. 6011 6012 :param subcon: Construct instance, subcon to validate 6013 :param invalids: collection implementing __contains__, usually a list or set 6014 6015 :raises ValidationError: parsed or build value is among invalids 6016 6017 """ 6018 return ExprValidator(subcon, lambda obj,ctx: obj not in invalids) 6019 6020 6021def Filter(predicate, subcon): 6022 r""" 6023 Filters a list leaving only the elements that passed through the predicate. 6024 6025 :param subcon: Construct instance, usually Array GreedyRange Sequence 6026 :param predicate: lambda that takes (obj, context) and returns a bool 6027 6028 Can propagate any exception from the lambda, possibly non-ConstructError. 6029 6030 Example:: 6031 6032 >>> d = Filter(obj_ != 0, Byte[:]) 6033 >>> d.parse(b"\x00\x02\x00") 6034 [2] 6035 >>> d.build([0,1,0,2,0]) 6036 b'\x01\x02' 6037 """ 6038 return ExprSymmetricAdapter(subcon, lambda obj,ctx: [x for x in obj if predicate(x,ctx)]) 6039 6040 6041class Slicing(Adapter): 6042 r""" 6043 Adapter for slicing a list. Works with GreedyRange and Sequence. 6044 6045 :param subcon: Construct instance, subcon to slice 6046 :param count: integer, expected number of elements, needed during building 6047 :param start: integer for start index (or None for entire list) 6048 :param stop: integer for stop index (or None for up-to-end) 6049 :param step: integer, step (or 1 for every element) 6050 :param empty: object, value to fill the list with, during building 6051 6052 Example:: 6053 6054 d = Slicing(Array(4,Byte), 4, 1, 3, empty=0) 6055 assert d.parse(b"\x01\x02\x03\x04") == [2,3] 6056 assert d.build([2,3]) == b"\x00\x02\x03\x00" 6057 assert d.sizeof() == 4 6058 """ 6059 def __init__(self, subcon, count, start, stop, step=1, empty=None): 6060 super().__init__(subcon) 6061 self.count = count 6062 self.start = start 6063 self.stop = stop 6064 self.step = step 6065 self.empty = empty 6066 def _decode(self, obj, context, path): 6067 return obj[self.start:self.stop:self.step] 6068 def _encode(self, obj, context, path): 6069 if self.start is None: 6070 return obj 6071 elif self.stop is None: 6072 output = [self.empty] * self.count 6073 output[self.start::self.step] = obj 6074 else: 6075 output = [self.empty] * self.count 6076 output[self.start:self.stop:self.step] = obj 6077 return output 6078 6079 6080class Indexing(Adapter): 6081 r""" 6082 Adapter for indexing a list (getting a single item from that list). Works with Range and Sequence and their lazy equivalents. 6083 6084 :param subcon: Construct instance, subcon to index 6085 :param count: integer, expected number of elements, needed during building 6086 :param index: integer, index of the list to get 6087 :param empty: object, value to fill the list with, during building 6088 6089 Example:: 6090 6091 d = Indexing(Array(4,Byte), 4, 2, empty=0) 6092 assert d.parse(b"\x01\x02\x03\x04") == 3 6093 assert d.build(3) == b"\x00\x00\x03\x00" 6094 assert d.sizeof() == 4 6095 """ 6096 def __init__(self, subcon, count, index, empty=None): 6097 super().__init__(subcon) 6098 self.count = count 6099 self.index = index 6100 self.empty = empty 6101 def _decode(self, obj, context, path): 6102 return obj[self.index] 6103 def _encode(self, obj, context, path): 6104 output = [self.empty] * self.count 6105 output[self.index] = obj 6106 return output 6107 6108 6109#=============================================================================== 6110# end of file 6111#=============================================================================== 6112