1package Data::ParseBinary; 2use strict; 3use warnings; 4no warnings 'once'; 5 6our $VERSION = 0.31; 7 8use Data::ParseBinary::Core; 9use Data::ParseBinary::Adapters; 10use Data::ParseBinary::Streams; 11use Data::ParseBinary::Stream::String; 12use Data::ParseBinary::Stream::Wrapper; 13use Data::ParseBinary::Stream::Bit; 14use Data::ParseBinary::Stream::StringBuffer; 15use Data::ParseBinary::Stream::File; 16use Data::ParseBinary::Constructs; 17use Config; 18 19our $DefaultPass = Data::ParseBinary::NullConstruct->create(); 20$Data::ParseBinary::BaseConstruct::DefaultPass = $DefaultPass; 21our $print_debug_info = undef; 22 23my $support_64_bit_int; 24eval { my $x = pack "Q", 5 }; 25if ( $@ ) { 26 $support_64_bit_int = 0; 27 require Math::BigInt; 28} else { 29 $support_64_bit_int = 1 30} 31$@ = ''; 32 33sub UBInt16 { return Data::ParseBinary::Primitive->create($_[0], 2, "n") } 34sub UBInt32 { return Data::ParseBinary::Primitive->create($_[0], 4, "N") } 35sub ULInt16 { return Data::ParseBinary::Primitive->create($_[0], 2, "v") } 36sub ULInt32 { return Data::ParseBinary::Primitive->create($_[0], 4, "V") } 37sub UNInt32 { return Data::ParseBinary::Primitive->create($_[0], 4, "L") } 38sub UNInt16 { return Data::ParseBinary::Primitive->create($_[0], 2, "S") } 39sub UNInt8 { return Data::ParseBinary::Primitive->create($_[0], 1, "C") } 40sub SNInt32 { return Data::ParseBinary::Primitive->create($_[0], 4, "l") } 41sub SNInt16 { return Data::ParseBinary::Primitive->create($_[0], 2, "s") } 42sub SNInt8 { return Data::ParseBinary::Primitive->create($_[0], 1, "c") } 43sub NFloat32{ return Data::ParseBinary::Primitive->create($_[0], 4, "f") } 44sub NFloat64{ return Data::ParseBinary::Primitive->create($_[0], 8, "d") }; 45*SBInt8 = \&SNInt8; 46*SLInt8 = \&SNInt8; 47*Byte = \&UNInt8; 48*UBInt8 = \&UNInt8; 49*ULInt8 = \&UNInt8; 50 51my $create_64_classes = sub { 52 my ($name, $is_signed, $is_be) = @_; 53 return Data::ParseBinary::ExtendedNumberAdapter->create(Field($name, 8), $is_signed, $is_be); 54}; 55 56if ($support_64_bit_int) { 57 *UNInt64 = sub { return Data::ParseBinary::Primitive->create($_[0], 8, "Q") }; 58 *SNInt64 = sub { return Data::ParseBinary::Primitive->create($_[0], 8, "q") }; 59} 60 61if ($^V ge v5.10.0) { 62 *SBInt16 = sub { return Data::ParseBinary::Primitive->create($_[0], 2, "s>") }; 63 *SLInt16 = sub { return Data::ParseBinary::Primitive->create($_[0], 2, "s<") }; 64 *SBInt32 = sub { return Data::ParseBinary::Primitive->create($_[0], 4, "l>") }; 65 *SLInt32 = sub { return Data::ParseBinary::Primitive->create($_[0], 4, "l<") }; 66 *BFloat32= sub { return Data::ParseBinary::Primitive->create($_[0], 4, "f>") }; 67 *LFloat32= sub { return Data::ParseBinary::Primitive->create($_[0], 4, "f<") }; 68 if ($support_64_bit_int) { 69 *SBInt64 = sub { return Data::ParseBinary::Primitive->create($_[0], 8, "q>") }; 70 *SLInt64 = sub { return Data::ParseBinary::Primitive->create($_[0], 8, "q<") }; 71 *UBInt64 = sub { return Data::ParseBinary::Primitive->create($_[0], 8, "Q>") }; 72 *ULInt64 = sub { return Data::ParseBinary::Primitive->create($_[0], 8, "Q<") }; 73 } else { 74 *SBInt64 = sub { $create_64_classes->($_[0], 1, 1) }; 75 *SLInt64 = sub { $create_64_classes->($_[0], 1, 0) }; 76 *UBInt64 = sub { $create_64_classes->($_[0], 0, 1) }; 77 *ULInt64 = sub { $create_64_classes->($_[0], 0, 0) }; 78 } 79 *BFloat64= sub { return Data::ParseBinary::Primitive->create($_[0], 8, "d>") }; 80 *LFloat64= sub { return Data::ParseBinary::Primitive->create($_[0], 8, "d<") }; 81} else { 82 my ($primitive_class, $reversed_class); 83 if (pack('s', -31337) eq "\x85\x97") { 84 $primitive_class = 'Data::ParseBinary::Primitive'; 85 $reversed_class = 'Data::ParseBinary::ReveresedPrimitive'; 86 } else { 87 $reversed_class = 'Data::ParseBinary::Primitive'; 88 $primitive_class = 'Data::ParseBinary::ReveresedPrimitive'; 89 } 90 *SBInt16 = sub { return $primitive_class->create($_[0], 2, "s") }; 91 *SLInt16 = sub { return $reversed_class->create($_[0], 2, "s") }; 92 *SBInt32 = sub { return $primitive_class->create($_[0], 4, "l") }; 93 *SLInt32 = sub { return $reversed_class->create($_[0], 4, "l") }; 94 *BFloat32= sub { return $primitive_class->create($_[0], 4, "f") }; 95 *LFloat32= sub { return $reversed_class->create($_[0], 4, "f") }; 96 if ($support_64_bit_int) { 97 *SBInt64 = sub { return $primitive_class->create($_[0], 8, "q") }; 98 *SLInt64 = sub { return $reversed_class->create($_[0], 8, "q") }; 99 *UBInt64 = sub { return $primitive_class->create($_[0], 8, "Q") }; 100 *ULInt64 = sub { return $reversed_class->create($_[0], 8, "Q") }; 101 } else { 102 *SBInt64 = sub { $create_64_classes->($_[0], 1, 1) }; 103 *SLInt64 = sub { $create_64_classes->($_[0], 1, 0) }; 104 *UBInt64 = sub { $create_64_classes->($_[0], 0, 1) }; 105 *ULInt64 = sub { $create_64_classes->($_[0], 0, 0) }; 106 } 107 *BFloat64= sub { return $primitive_class->create($_[0], 8, "d") }; 108 *LFloat64= sub { return $reversed_class->create($_[0], 8, "d") }; 109} 110 111sub Struct { return Data::ParseBinary::Struct->create(@_) } 112sub Sequence{ return Data::ParseBinary::Sequence->create(@_) }; 113sub Array { 114 my ($count, $sub) = @_; 115 if ($count and ref($count) and UNIVERSAL::isa($count, "CODE")) { 116 return Data::ParseBinary::MetaArray->create($count, $sub); 117 } else { 118 return Data::ParseBinary::MetaArray->create(sub {$count}, $sub); 119 } 120} 121 122sub GreedyRange { return Data::ParseBinary::Range->create(1, undef, $_[0]); } 123sub OptionalGreedyRange { return Data::ParseBinary::Range->create(0, undef, $_[0]); } 124sub Range { return Data::ParseBinary::Range->create(@_) }; 125sub Padding { return Data::ParseBinary::Padding->create($_[0]) } 126sub Flag { return Data::ParseBinary::BitField->create($_[0], 1) } 127sub Bit { return Data::ParseBinary::BitField->create($_[0], 1) } 128sub Nibble { return Data::ParseBinary::BitField->create($_[0], 4) } 129sub Octet { return Data::ParseBinary::BitField->create($_[0], 8) } 130sub BitField { return Data::ParseBinary::BitField->create(@_) } 131sub ReversedBitField { return Data::ParseBinary::ReversedBitField->create(@_) } 132 133sub ConditionalRestream { return Data::ParseBinary::ConditionalRestream->create(@_) } 134sub BitStruct { 135 my ($name, @subcons) = @_; 136 my $subcon = Struct($name, @subcons); 137 return ConditionalRestream($subcon, "Bit", sub { not $_->stream->isBitStream() }); 138} 139sub ReversedBitStruct { 140 my ($name, @subcons) = @_; 141 my $subcon = Struct($name, @subcons); 142 return ConditionalRestream($subcon, "ReversedBit", sub { not $_->stream->isBitStream() }); 143} 144sub Enum { return Data::ParseBinary::Enum->create(@_) } 145sub OneOf { 146 my ($subcon, $list) = @_; 147 my $code = sub { 148 return grep $_ == $_[0], @$list; 149 }; 150 return Data::ParseBinary::LamdaValidator->create($subcon, $code); 151} 152sub NoneOf { 153 my ($subcon, $list) = @_; 154 my $code = sub { 155 my @res = grep $_ == $_[0], @$list; 156 return @res == 0; 157 }; 158 return Data::ParseBinary::LamdaValidator->create($subcon, $code); 159} 160sub Field { 161 my ($name, $len) = @_; 162 if ($len and ref($len) and UNIVERSAL::isa($len, "CODE")) { 163 return Data::ParseBinary::MetaField->create($name, $len); 164 } else { 165 return Data::ParseBinary::StaticField->create($name, $len); 166 } 167} 168*Bytes = \&Field; 169sub RepeatUntil (&$) { return Data::ParseBinary::RepeatUntil->create(@_) } 170 171sub Char { 172 my ($name, $encoding) = @_; 173 174 # if we don't have encoding - a char is simply one byte 175 return Field($name, 1) unless $encoding; 176 177 if ( ( $encoding eq "UTF-32LE" ) or ( $encoding eq "UTF-32BE" ) ) { 178 my $subcon = Field($name, 4); 179 return Data::ParseBinary::CharacterEncodingAdapter->create($subcon, $encoding); 180 } elsif ( ( $encoding eq "UTF-16LE" ) or ( $encoding eq "UTF-16BE" ) ) { 181 my $place = $encoding eq "UTF-16LE" ? 1 : 0; 182 my $subcon = Struct($name, 183 Field("FirstUnit", 2), 184 Array( sub { my $ch = substr($_->ctx->{FirstUnit}, $place, 1); return ( ( ($ch ge "\xD8" ) and ($ch le "\xDB") ) ? 1 : 0 ) }, 185 Field("TheRest", 2) 186 ) 187 ); 188 my $assambled = Data::ParseBinary::FirstUnitAndTheRestAdapter->create($subcon, 2); 189 return Data::ParseBinary::CharacterEncodingAdapter->create($assambled, $encoding); 190 } elsif ( ( $encoding eq "utf8" ) or ( $encoding eq "UTF-8" ) ) { 191 my $subcon = Struct($name, 192 Field("FirstUnit", 1), 193 Array( sub { my $ch = $_->ctx->{FirstUnit}; return scalar(grep { $ch ge $_ } "\xC0", "\xE0", "\xF0" ) || 0 }, 194 Field("TheRest", 1) 195 ) 196 ); 197 my $assambled = Data::ParseBinary::FirstUnitAndTheRestAdapter->create($subcon, 1); 198 return Data::ParseBinary::CharacterEncodingAdapter->create($assambled, $encoding); 199 } elsif ( $encoding =~ /^(?:utf|ucs)/i ) { 200 die "Unrecognized UTF format: $encoding"; 201 } else { 202 # this is a single-byte encoding 203 return Data::ParseBinary::CharacterEncodingAdapter->create(Field($name, 1), $encoding); 204 } 205} 206 207sub PaddedString { 208 my ($name, $length, %params) = @_; 209 my $subcon = Data::ParseBinary::PaddedStringAdapter->create(Field($name, $length), length => $length, %params); 210 return $subcon unless $params{encoding}; 211 return Data::ParseBinary::CharacterEncodingAdapter->create($subcon, $params{encoding}); 212}; 213sub String { 214 my ($name, $length, %params) = @_; 215 if (defined $params{padchar}) { 216 #this is a padded string 217 return PaddedString($name, $length, %params); 218 } 219 return Data::ParseBinary::JoinAdapter->create( 220 Array($length, Char($name, $params{encoding})), 221 ); 222} 223sub LengthValueAdapter { return Data::ParseBinary::LengthValueAdapter->create(@_) } 224sub PascalString { 225 my ($name, $length_field_type, $encoding) = @_; 226 $length_field_type ||= \&UBInt8; 227 my $length_field; 228 { 229 no strict 'refs'; 230 $length_field = &$length_field_type('length'); 231 } 232 if (not $encoding) { 233 return LengthValueAdapter( 234 Sequence($name, 235 $length_field, 236 Field("data", sub { $_->ctx->[0] }), 237 ) 238 ); 239 } else { 240 return LengthValueAdapter( 241 Sequence($name, 242 $length_field, 243 Data::ParseBinary::JoinAdapter->create( 244 Array(sub { $_->ctx->[0] }, Char("data", $encoding)), 245 ), 246 ) 247 ); 248 } 249} 250 251sub CString { 252 my ($name, %params) = @_; 253 my ($terminators, $encoding, $char_field) = @params{qw{terminators encoding char_field}}; 254 $terminators = "\x00" unless defined $terminators; 255 $char_field ||= Char($name, $encoding); 256 my @t_list = split '', $terminators; 257 return Data::ParseBinary::CStringAdapter->create( 258 Data::ParseBinary::JoinAdapter->create( 259 RepeatUntil(sub { my $obj = $_->obj; grep($obj eq $_, @t_list) } ,$char_field)), 260 $terminators 261 ); 262} 263 264 265sub Switch { return Data::ParseBinary::Switch->create(@_) } 266sub Pointer { return Data::ParseBinary::Pointer->create(@_) } 267sub LazyBound { return Data::ParseBinary::LazyBound->create(@_) } 268sub Value { return Data::ParseBinary::Value->create(@_) } 269sub Anchor { my $name = shift; return Value($name, sub { $_->stream->tell } ) } 270sub Terminator { return Data::ParseBinary::Terminator->create() } 271 272sub IfThenElse { 273 my ($name, $predicate, $then_subcon, $else_subcon) = @_; 274 return Switch($name, sub { &$predicate ? 1 : 0 }, 275 { 276 1 => $then_subcon, 277 0 => $else_subcon, 278 } 279 ) 280} 281 282sub If { 283 my ($predicate, $subcon, $elsevalue) = @_; 284 return IfThenElse($subcon->_get_name(), 285 $predicate, 286 $subcon, 287 Value("elsevalue", sub { $elsevalue }) 288 ) 289} 290sub Peek { Data::ParseBinary::Peek->create(@_) } 291sub Const { Data::ParseBinary::ConstAdapter->create(@_) } 292sub Alias { 293 my ($newname, $oldname) = @_; 294 return Value($newname, sub { $_->ctx->{$oldname}}); 295} 296 297sub Union { Data::ParseBinary::Union->create(@_) } 298sub RoughUnion { Data::ParseBinary::RoughUnion->create(@_) } 299 300*CreateStreamReader = \&Data::ParseBinary::Stream::Reader::CreateStreamReader; 301*CreateStreamWriter = \&Data::ParseBinary::Stream::Writer::CreateStreamWriter; 302sub ExtractingAdapter { Data::ParseBinary::ExtractingAdapter->create(@_) }; 303 304sub Aligned { 305 my ($subcon, $modulus) = @_; 306 $modulus ||= 4; 307 die "Aligned should be more then 2" if $modulus < 2; 308 my $sub_name = $subcon->_get_name(); 309 my $s = ExtractingAdapter( 310 Struct($sub_name, 311 Anchor("Aligned_before"), 312 $subcon, 313 Anchor("Aligned_after"), 314 Padding(sub { ($modulus - (($_->ctx->{Aligned_after} - $_->ctx->{Aligned_before}) % $modulus)) % $modulus }) 315 ), 316 $sub_name); 317 return $s; 318} 319 320sub Restream { 321 my ($subcon, $stream_name) = @_; 322 return Data::ParseBinary::Restream->create($subcon, $stream_name); 323} 324sub Bitwise { 325 my ($subcon) = @_; 326 return Restream($subcon, "Bit"); 327} 328 329sub Magic { 330 my ($data) = @_; 331 return Const(Field(undef, length($data)), $data); 332} 333 334sub Select { Data::ParseBinary::Select->create(@_) } 335 336sub Optional { 337 my $subcon = shift; 338 return Select($subcon, $DefaultPass); 339} 340 341sub FlagsEnum { Data::ParseBinary::FlagsEnum->create(@_) } 342 343require Exporter; 344our @ISA = qw(Exporter); 345our @EXPORT = qw( 346 UBInt8 347 ULInt8 348 SBInt8 349 SLInt8 350 Byte 351 UBInt16 352 ULInt16 353 SBInt16 354 SLInt16 355 UBInt32 356 ULInt32 357 SBInt32 358 SLInt32 359 BFloat32 360 LFloat32 361 UBInt64 362 ULInt64 363 SBInt64 364 SLInt64 365 BFloat64 366 LFloat64 367 368 Struct 369 Sequence 370 371 Padding 372 373 Flag 374 Bit 375 Nibble 376 Octet 377 BitField 378 BitStruct 379 ReversedBitField 380 ReversedBitStruct 381 382 Enum 383 $DefaultPass 384 OneOf 385 NoneOf 386 Array 387 RepeatUntil 388 Field 389 Bytes 390 Switch 391 Pointer 392 Anchor 393 394 Char 395 String 396 PascalString 397 CString 398 PaddedString 399 400 LazyBound 401 Value 402 IfThenElse 403 If 404 Peek 405 Const 406 Terminator 407 Alias 408 Union 409 RoughUnion 410 411 CreateStreamReader 412 CreateStreamWriter 413 414 Aligned 415 ExtractingAdapter 416 Restream 417 Bitwise 418 Magic 419 420 Select 421 FlagsEnum 422); 423 424our @Neturals_depricated = qw{ 425 UNInt8 426 SNInt8 427 UNInt16 428 SNInt16 429 UNInt32 430 SNInt32 431 UNInt64 432 SNInt64 433 NFloat32 434 NFloat64 435}; 436our @EXPORT_OK = (@Neturals_depricated, qw{ 437 Range 438 GreedyRange 439 OptionalGreedyRange 440 Optional 441}); 442our %EXPORT_TAGS = ( NATURALS => \ @Neturals_depricated, all => [ @EXPORT_OK, @EXPORT ]); 443 4441; 445 446__END__ 447 448=head1 NAME 449 450Data::ParseBinary - Yet Another parser for binary structures 451 452=head1 SYNOPSIS 453 454 $s = 455 Struct("Construct", 456 Struct("Header", 457 Magic("MZ"), 458 Byte("Version"), 459 UBInt32("Expire Date"), 460 Enum(UBInt32("Data Type"), 461 Array => 0, 462 String => 1, 463 Various => 2, 464 ), 465 Byte("Have Extended Header"), 466 If ( sub { $_->ctx->{"Have Extended Header"} }, 467 CString("Author") 468 ), 469 ), 470 Switch("data", sub { $_->ctx->{Header}->{"Data Type"} }, 471 { 472 Array => Array(4, SBInt32("Signed Int 32")), 473 String => PascalString("Name"), 474 Various => 475 Struct("Various data", 476 NoneOf(Byte("value"), [4, 9]), 477 Padding(1), # byte 478 BitStruct("Mini Values", 479 Flag("done"), 480 Nibble("Short"), 481 Padding(1), #bit 482 SBInt16("something"), 483 ), 484 ), 485 } 486 ), 487 ); 488 my $data = $s->parse("MZabcde\0\0\0\1\1semuel\0\x05fghij"); 489 # $data contains: 490 # { 491 # 'Header' => 492 # { 493 # 'Version' => 97, 494 # 'Expire Date' => 1650680933 495 # 'Data Type' => 'String', 496 # 'Have Extended Header' => 1, 497 # 'Author' => 'semuel', 498 # } 499 # 'data' => 'fghij', 500 # } 501 502=head1 DESCRIPTION 503 504This module is a Perl Port for PyConstructs http://construct.wikispaces.com/ 505 506This module enables writing declarations for simple and complex binary structures, 507parsing binary to hash/array data structure, and building binary data from hash/array 508data structure. 509 510=head1 Reference Code 511 512=head2 Struct 513 514 $s = Struct("foo", 515 UBInt8("a"), 516 UBInt16("b"), 517 Struct("bar", 518 UBInt8("a"), 519 UBInt16("b"), 520 ) 521 ); 522 $data = $s->parse("ABBabb"); 523 # $data is { a => 65, b => 16962, bar => { a => 97, b => 25186 } } 524 525This is the main building block of the module - the struct. Whenever there is the 526need to bind a few varibles together, use Struct. Many constructs receive only one 527sub-construct as parameter, (for example, all the conditional constructs) so use 528Struct. 529 530=head2 Primitives 531 532=head3 Byte-Primitives 533 534But this Struct is just an empy shell. we need to fill it with data types. 535So here is a list of primitive elements: 536 537 Byte, UBInt8, ULInt8 (All three are aliases to the same things) 538 SBInt8, SLInt8 539 UBInt16 540 ULInt16 541 SBInt16 542 SLInt16 543 UBInt32 544 ULInt32 545 SBInt32 546 SLInt32 547 BFloat32 548 LFloat32 549 UBInt64 550 ULInt64 551 SBInt64 552 SLInt64 553 BFloat64 554 LFloat64 555 556S - Signed, U - Unsigned, L - Little endian, B - Big Endian 557Samples: 558 559 UBInt16("foo")->parse("\x01\x02") == 258 560 ULInt16("foo")->parse("\x01\x02") == 513 561 UBInt16("foo")->build(31337) eq 'zi' 562 SBInt16("foo")->build(-31337) eq "\x85\x97" 563 SLInt16("foo")->build(-31337) eq "\x97\x85" 564 565And of course, see Struct above to how bundle a few primitives together. 566 567Be aware that the Float data type is not portable between platforms. So 568it is advisable not to use it when there is an alternative. 569 570=head3 Bit-Primitives 571 572 Flag, Bit (1 bit) 573 Nibble (4 bits) 574 Octet (8 bits, equal to "Byte") 575 BitField (variable length) 576 577These primitive are bit-wide. however, unless using BitStruct, they take a whole 578byte from the input stream. Take for example this struct: 579 580 $s = Struct("bits", 581 Flag("a"), 582 Nibble("b"), 583 ); 584 $data = $s->parse("\x25\x27"); 585 # data is { a => 1, b => 7 } 586 587"\x25\x27" is 0010010100100111 in binary. The Flag is first, and it takes one byte 588from the stream (00100101) use the last bit (1) and discard the rest. After it comes 589the Mibble, that takes a byte too, (00100111) use the last four bits (0111) and discard 590the rest. 591 592If you need these bits to be packed tight together, see BitStruct. 593 594Examples for the rest of the bit constructs: 595 596 $s = Struct("bits", 597 Octet("a"), 598 BitField("b", 5), 599 ); 600 $data = $s->parse("\x25\x27"); 601 # data is { a => 37, b => 7 } 602 603=head2 Meta-Constructs 604 605Life isn't always simple. If you only have a rigid structure with constance types, 606then you can use other modules, that are far simplier. hack, use pack/unpack. 607 608So if you have more complicate requirements, welcome to the meta-constructs. 609Basically, you pass a code ref to the meta-construct, which will be used while 610parsing and building. 611 612For every data that the code ref needs, the $_ variable is loaded with all the 613data that you need. $_->ctx is equal to $_->ctx(0), that returns hash-ref 614containing all the data that the current struct parsed. Is you want to go another 615level up, just request $_->ctx(1). 616 617Also avialble are $_->obj, when need to inspect the current object, (see RepeatUntil) 618and $_->stream, which gives the current stream. (mostly used as $_->stream->tell to 619get the current location) 620 621As a rule, everywhere a code-ref is used, a simple number can be used too. 622If it doesn't - it's a bug. please report it. 623 624=head2 Meta-Primitives 625 626=head3 Field (Bytes) 627 628The first on is the field. a Field is a chunk of bytes, with variable length: 629 630 $s = Struct("foo", 631 Byte("length"), 632 Field("data", sub { $_->ctx->{length} }), 633 ); 634 635(it can be also in constent length, by replacing the code section with, for example, 4) 636So we have struct, that the first byte is the length of the field, and after that the field itself. 637An example: 638 639 $data = $s->parse("\x03ABC"); 640 # $data is {length => 3, data => "ABC"} 641 $data = $s->parse("\x04ABCD"); 642 # $data is {length => 4, data => "ABCD"} 643 644And so on. 645 646Field is also called Bytes. 647 648=head3 Value 649 650A calculated value - not in the stream. It is calculated on both parse and build. 651 652 $s = Struct("foo", 653 UBInt8("width"), 654 UBInt8("height"), 655 Value("total_pixels", sub { $_->ctx->{width} * $_->ctx->{height}}), 656 ); 657 658=head3 Alias 659 660Copies "a" to "b". 661 662 $s = Struct("foo", 663 Byte("a"), 664 Alias("b", "a"), 665 ); 666 $data = $s->parse("\x25"); 667 # $data is { a => 37, b => 37 } 668 669=head2 Conditionals 670 671=head3 If / IfThenElse 672 673Basic branching: 674 675 $s = Struct("foo", 676 Flag("has_options"), 677 If(sub { $_->ctx->{has_options} }, 678 Bytes("options", 5) 679 ) 680 ); 681 682The If statment takes it's name from the contained construct, and return undef 683of the condition is not met. 684 685 $s = Struct("foo", 686 Flag("long_options"), 687 IfThenElse("options", sub { $_->ctx->{long_options} }, 688 Bytes("Long Options", 5), 689 Bytes("Short Options", 3), 690 ), 691 ); 692 693The IfThenElse discard the name of the contained consturct, and use its own. 694 695=head3 Switch 696 697Multi branching. Can operate on numbers or strings. In the first example used with 698Enum to convert a value to string. 699 700The Switch discard the name of the contained consturcts, and use its own. 701return undef if $DefaultPass is used. 702 703 $s = Struct("foo", 704 Enum(Byte("type"), 705 INT1 => 1, 706 INT2 => 2, 707 INT4 => 3, 708 STRING => 4, 709 ), 710 Switch("data", sub { $_->ctx->{type} }, 711 { 712 INT1 => UBInt8("spam"), 713 INT2 => UBInt16("spam"), 714 INT4 => UBInt32("spam"), 715 STRING => String("spam", 6), 716 } 717 ) 718 ); 719 $data = $s->parse("\x01\x12"); 720 # $data is {type => "INT1", data => 18} 721 $data = $s->parse("\x02\x12\x34"); 722 # $data is {type => "INT2", data => 4660} 723 $data = $s->parse("\x04abcdef"); 724 # $data is {type => "STRING", data => 'abcdef'} 725 726And so on. Switch also have a default option: 727 728 $s = Struct("foo", 729 Byte("type"), 730 Switch("data", sub { $_->ctx->{type} }, 731 { 732 1 => UBInt8("spam"), 733 2 => UBInt16("spam"), 734 }, 735 default => UBInt8("spam") 736 ) 737 ); 738 739And can use $DefaultPass that make it to no-op. 740 741 $s = Struct("foo", 742 Byte("type"), 743 Switch("data", sub { $_->ctx->{type} }, 744 { 745 1 => UBInt8("spam"), 746 2 => UBInt16("spam"), 747 }, 748 default => $DefaultPass, 749 ) 750 ); 751 $data = $s->parse("\x01\x27"); 752 # $data is { type => 1, data => 37 } 753 754$DefaultPass is valid also as one of the options: 755 756 $s = Struct("foo", 757 Byte("type"), 758 Switch("data", sub { $_->ctx->{type} }, 759 { 760 1 => $DefaultPass, 761 2 => UBInt16("spam"), 762 }, 763 default => UBInt8("spam"), 764 ) 765 ); 766 $data = $s->parse("\x01\x27"); 767 # $data is { type => 1, data => undef } 768 769=head2 Loops 770 771=head3 Array 772 773Array, as any meta construct, and have constant length or variable lenght. 774 775 # This is an Array of four bytes 776 $s = Array(4, UBInt8("foo")); 777 $data = $s->parse("\x01\x02\x03\x04"); 778 # $data is [1, 2, 3, 4] 779 780 # Array with variable length 781 $s = Struct("foo", 782 Byte("length"), 783 Array(sub { $_->ctx->{length}}, UBInt16("data")), 784 ); 785 $data = $s->parse("\x03\x00\x01\x00\x02\x00\x03"); 786 # $data is {length => 3, data => [1, 2, 3]} 787 788=head3 RepeatUntil 789 790RepeatUntil gets for every round to inspect data on $_->obj: 791 792 $s = RepeatUntil(sub {$_->obj eq "\x00"}, Field("data", 1)); 793 $data = $s->parse("abcdef\x00this is another string"); 794 # $data is [qw{a b c d e f}, "\0"] 795 796=head2 Adapters 797 798Adapters are constructs that transform the data that they work on. It wraps some underlining 799structure, and present the data in a new, easier to use, way. There are some built-in 800adapters for general use, but it is easy to write one of your own. 801 802This is actually the easiest way to extend the framework. 803For creating an adapter, the class should inherent from the Data::ParseBinary::Adapter 804class. For example, we will take the IP address. An IP address can be viewed as 805four bytes, or one unsigned long integer, but humans like to see it as dotted numbers. 806("1.2.3.4") Here is how I would have done it. First, I'll write an adapter class: 807 808 package IpAddressAdapter; 809 our @ISA = qw{Data::ParseBinary::Adapter}; 810 sub _encode { 811 my ($self, $tvalue) = @_; 812 return [split '\.', $tvalue]; 813 } 814 sub _decode { 815 my ($self, $value) = @_; 816 return join '.', @$value; 817 } 818 819This adapter transforms dotted IP address ("1.2.3.4") to four numbers. Each number size 820is "byte", so we will use an array of four bytes. For actually creating one 821we should write: 822 823 my $ipAdapter = IpAddressAdapter->create(Array(4, Byte("foo"))); 824 825(An adapter inherits its name from the underlying data construct) 826 827Or we can create a little function: 828 829 sub IpAddressAdapterFunc { 830 my $name = shift; 831 IpAddressAdapter->create(Array(4, Byte($name))); 832 } 833 834And then: 835 836 IpAddressAdapterFunc("foo")->parse("\x01\x02\x03\x04"); 837 # will return "1.2.3.4" 838 839On additional note, it is possible to declare an "init" sub inside IpAddressAdapter, 840that will receive any extra parameter that "create" recieved. 841 842=head3 Enum 843 844One of the built-in Adapters is Enum: 845 846 $s = Enum(Byte("protocol"), 847 TCP => 6, 848 UDP => 17, 849 ); 850 $s->parse("\x06") # return 'TCP' 851 $s->parse("\x11") # return 'UDP' 852 $s->build("TCP") # returns "\x06" 853 854It is also possible to have a default: 855 856 $s = Enum(Byte("protocol"), 857 TCP => 6, 858 UDP => 17, 859 _default_ => blah => 99, 860 ); 861 $s->parse("\x12") # returns 'blah' 862 863Please note that the default tag must not be one of the supplied pairs. 864And finally: 865 866 $s = Enum(Byte("protocol"), 867 TCP => 6, 868 UDP => 17, 869 _default_ => $DefaultPass, 870 ); 871 $s->parse("\x12") # returns 18 872 873$DefaultPass tells Enum that if it isn't familiar with the value, pass it alone. 874 875=head3 FlagsEnum 876 877If the field represent a set of flags, then the library provide a construct just for that: 878 879 $s = FlagsEnum(ULInt16("characteristics"), 880 RELOCS_STRIPPED => 0x0001, 881 EXECUTABLE_IMAGE => 0x0002, 882 LINE_NUMS_STRIPPED => 0x0004, 883 REMOVABLE_RUN_FROM_SWAP => 0x0400, 884 BIG_ENDIAN_MACHINE => 0x8000, 885 ); 886 $data = $s->parse("\2\4"); 887 # $data is { EXECUTABLE_IMAGE => 1, REMOVABLE_RUN_FROM_SWAP => 1 }; 888 889Of course, this is equvalent to creating a BitStruct, and specifing Flag-s in the 890correct positions, and so on. but this is an easier way. 891 892=head2 Validators 893 894Validator... validate. they validate that the value on the stream is an expected 895one, and they validate that the value that need to be written to the stream is 896a correct one. otherwise, throws an exception. 897 898=head3 OneOf / NoneOf 899 900 OneOf(UBInt8("foo"), [4,5,6,7])->parse("\x05") # return 5 901 OneOf(UBInt8("foo"), [4,5,6,7])->parse("\x08") # dies. 902 NoneOf(UBInt8("foo"), [4,5,6,7])->parse("\x08") # returns 8 903 NoneOf(UBInt8("foo"), [4,5,6,7])->parse("\x05") # dies 904 905=head3 Const 906 907 $s = Const(Bytes("magic", 6), "FOOBAR"); 908 909On parsing: verify that the correct value is being read, and return it. 910 911On building: if value is supplied, verify that it is the correct one. if the 912value is not supplied, insert the correct one. 913 914=head3 Magic 915 916 Magic("\x89PNG\r\n\x1a\n") 917 918A constant string that is written / read and verified to / from the stream. 919For example, every PNG file starts with eight pre-defined bytes. this construct 920handle them, transparant to the calling program. (you don't need to supply a value, 921nor you will see the parsed value) 922 923=head2 BitStruct 924 925As said in the section about Bit-Primitives, these primitives are not packed tightly, 926and each will take complete bytes from the stream. 927If you need to pack them tightly, use BitStruct: 928 929 $s = BitStruct("foo", 930 BitField("a", 3), # three bit int 931 Flag("b"), # one bit 932 Nibble("c"), # four bit int 933 BitField("d", 5), # five bit int 934 ); 935 $data = $s->parse("\xe1\xf2"); 936 # $data is { a => 7, b => 0, c => 1, d => 30 } 937 938As can be seen, we start with 1110000111110010. The it is being splitted as 939a=111, b=0, c=0001, d=11110 and the rest (010) is discard. 940 941BitStruct can be inside other BitStruct. Inside BitStruct, Struct and BitStruct are equivalents. 942 943 $s = BitStruct("foo", 944 BitField("a", 3), 945 Flag("b"), 946 Nibble("c"), 947 Struct("bar", 948 Nibble("d"), 949 Bit("e"), 950 Octet("f"), 951 ) 952 ); 953 $data = $s->parse("\xe1\xf2\x34"); 954 # $data is { a => 7, b => 0, c => 1, bar => { d => 15, e => 0, f => 70 } } 955 956It is possible to mix a byte-primitives inside a BitStruct: 957 958 $s = BitStruct("foo", 959 BitField("a", 3), 960 UBInt16("int data"), 961 Nibble("b"), 962 ); 963 $data = $s->parse("\xe1\xf2\x34"); 964 # $data is { a => 7, "int data" => 3985, b => 10 } 965 966Just be aware that this UBInt16 starts from the middle of the first byte, and 967ends in the middle of the third. 968 969BitStruct is based on a BitStream (see Stream) that is not seekable. So it can't 970contain any construct that require seekability. 971 972=head3 Bitwise 973 974Use Bitwise when you are not under a BitStream, and you have single construct 975that need to be packed by bits, and you don't want to create another hash for 976just this construct. Here is an example from BMP: 977 978 Bitwise(Array(sub { $_->ctx(2)->{width} }, Nibble("index"))); 979 980We have an array of Nibble, that need to be packed together. 981 982=head2 ReversedBitStruct and ReversedBitField 983 984BitStruct assumes that each byte is arranged, bit-wise, from the most significante 985bit (MSB) to the least significante bit. (LSB) However, it is not always true. 986 987Lets say that you bytes are: 988 989 MSB LSB 990 Byte 1: A B C D E F G H 991 Byte 2: I J K M L N O P 992 993And suppose that you have a bit-struct with three fields. AF1 is three bits, 994AF2 is one bit, and AF3 is eight bits. so if: 995 996 AF1=ABC, AF2=D, AF3=EFGHIJKM 997 use: BitStruct with BitField 998 AF1=CBA, AF2=D, AF3=MKJIHGFE 999 use: BitStruct with ReversedBitField 1000 AF1=HGF, AF2=E, AF3=DCBAPONL 1001 use: ReversedBitStruct with BitField 1002 AF1=FGH, AF2=E, AF3=LNOPABCD 1003 use: ReversedBitStruct with ReversedBitField 1004 1005=head2 Padding 1006 1007Padding remove bytes from the stream 1008 1009 $s = Struct("foo", 1010 Padding(2), 1011 Flag("myflag"), 1012 Padding(5), 1013 ); 1014 $data = $s->parse("\x00\x00\x01\x00\x00\x00\x00\x00"); 1015 # $data is { myflag => 1 } 1016 1017However, if woring on Bit Stream, then Padding takes bits and not bytes 1018 1019 $s = BitStruct("foo", 1020 Padding(2), 1021 Flag("myflag"), 1022 Padding(5), 1023 ); 1024 $data = $s->parse("\x20"); 1025 # $data is { myflag => 1 } 1026 1027Padding is a meta-construct, can take code ref instead of a number 1028 1029 $s = Struct("foo", 1030 Byte("count"), 1031 Padding( sub { $_->ctx->{count} } ), 1032 Flag("myflag"), 1033 ); 1034 $data = $s->parse("\x02\0\0\1"); 1035 # $data is { count => 2, muflag => 1 } 1036 1037=head2 Peeking and Jumping 1038 1039Not all parsing is linear. sometimes you need to peek ahead to see if a certain 1040value exists ahead, or maybe you know where the data is, it's just that it is 1041some arbitary number of bytes ahead. or before. 1042 1043=head3 Pointer and Anchor 1044 1045Pointers are another animal of meta-struct. For example: 1046 1047 $s = Struct("foo", 1048 Pointer(sub { 4 }, Byte("data1")), # <-- data1 is at (absolute) position 4 1049 Pointer(sub { 7 }, Byte("data2")), # <-- data2 is at (absolute) position 7 1050 ); 1051 $data = $s->parse("\x00\x00\x00\x00\x01\x00\x00\x02"); 1052 # $data is {data1=> 1 data2=>2 } 1053 1054Literaly is says: jump to position 4, read byte, return to the beginning, jump to position 7, 1055read byte, return to the beginning. 1056 1057Anchor can help a Pointer to find it's target: 1058 1059 $s = Struct("foo", 1060 Byte("padding_length"), 1061 Padding(sub { $_->ctx->{padding_length} } ), 1062 Byte("relative_offset"), 1063 Anchor("absolute_position"), 1064 Pointer(sub { $_->ctx->{absolute_position} + $_->ctx->{relative_offset} }, Byte("data")), 1065 ); 1066 $data = $s->parse("\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\xff"); 1067 # $data is { absolute_position=> 7, relative_offset => 3, data => 255, padding_length => 5 } 1068 1069Anchor saves the current location in the stream, enable the Pointer to jump to location 1070relative to it. 1071 1072Also, $_->stream->tell will point you to the current location, giving the ability for 1073relative location without using Anchor. The above construct is quevalent to: 1074 1075 $s = Struct("foo", 1076 Byte("padding_length"), 1077 Padding(sub { $_->ctx->{padding_length} } ), 1078 Byte("relative_offset"), 1079 Pointer(sub { $_->stream->tell + $_->ctx->{relative_offset} }, Byte("data")), 1080 ); 1081 1082=head3 Peek 1083 1084 $s = Struct("foo", 1085 Byte("a"), 1086 Peek(Byte("b")), 1087 Byte("c"), 1088 ); 1089 1090Peek is like Pointer with two differences: one that it is no-op on build. 1091second the location is calculated relative to the current location, 1092while with Pointer it's absolute position. 1093 1094If no distance is supplied, zero is assumed. it is posible to supply 1095constant distance, (i.e. 5) or code ref. Examples: 1096 1097 Peek(UBInt16("b"), 5) # Peek 5 bytes ahead 1098 Peek(UBInt16("b"), sub { $_->ctx->{this_far} }) # calculated number of bytes ahead 1099 1100=head2 Strings 1101 1102=head3 Char 1103 1104The Char construct represent a single character. This can mean one byte, or 1105if it have encoding attached, a multi-byte character. 1106 1107 $s = Char("c", "utf8"); 1108 $s->build("\x{1abcd}"); 1109 # returns "\xf0\x9a\xaf\x8d" 1110 1111The allowded encodings are: 1112 1113 UTF-32LE 1114 UTF-32BE 1115 UTF-16LE 1116 UTF-16BE 1117 UTF-8 1118 utf8 1119 or any single-byte encoding supported by the Encode module 1120 for example: iso-8859-8 1121 1122If you don't know if your unicode string is BE or LE, then it's probably BE. 1123 1124=head3 String (constant length / meta) 1125 1126A string with constant length: 1127 1128 String("foo", 5)->parse("hello") 1129 # returns "hello" 1130 1131A string with variable length, and encoding: 1132 1133 String("description", sub { $_->ctx->{description_size} }, encoding => 'UTF-16LE' ) 1134 1135The string length is specified in *characters*, not bytes. 1136 1137=head3 PaddedString 1138 1139A Padded string with constant length: 1140 1141 $s = PaddedString("foo", 10, padchar => "X", paddir => "right"); 1142 $s->parse("helloXXXXX") # return "hello" 1143 $s->build("hello") # return 'helloXXXXX' 1144 1145I think that it speaks for itself. only that paddir can be one of qw{right left center}, 1146and there can be also trimdir that can be "right" or "left". 1147 1148When encoding is supplied, for example: 1149 1150 $s = PaddedString("foo", 10, encoding => "utf8"); 1151 1152The String length is still specified in *bytes*, not characters. If anyone ever 1153encouter a padded constant length string with multi byte encoding that it's length is 1154specified in characters, please send me an email. 1155 1156=head3 PascalString 1157 1158PascalString - String with a length marker in the beginning: 1159 1160 $s = PascalString("foo"); 1161 $s->build("hello world") # returns "\x0bhello world" 1162 1163The marker can be of any kind: 1164 1165 $s = PascalString("foo", \&UBInt16); 1166 $s->build("hello") # returns "\x00\x05hello" 1167 1168(the marker can be pointer to any function that get a name and return construct. 1169And on parse that construct should return a value. like the built-in primitives for example) 1170 1171With encoding: 1172 1173 $s = PascalString("foo", undef, "utf8"); 1174 1175The string length is specified in *characters*, not bytes. 1176 1177=head3 CString 1178 1179And finally, CString: 1180 1181 $s = CString("foo"); 1182 $s->parse("hello\x00") # returns 'hello' 1183 1184Can have many optional terminators: 1185 1186 $s = CString("foo", terminators => "XYZ"); 1187 $s->parse("helloY") # returns 'hello' 1188 1189With encoding: 1190 1191 $s = CString("foo", encoding => "utf8"); 1192 1193=head2 Union / RoughUnion 1194 1195 $s = Union("foo", 1196 UBInt32("a"), 1197 UBInt16("b") 1198 ); 1199 $data = $s->parse("\xaa\xbb\xcc\xdd"); 1200 # $data is { a => 2864434397, b => 43707 } 1201 1202A Union. currently work only with constant-size constructs, (like primitives, Struct and such) 1203but not on bit-stream. 1204 1205 $s = Struct("records", 1206 ULInt32("record_size"), 1207 RoughUnion("params", 1208 Field("raw", sub { $_->ctx(1)->{record_size} - 8 }), 1209 Array(sub { int(($_->ctx(1)->{record_size} - 8) / 4) }, ULInt32("params")), 1210 ), 1211 ); 1212 1213RoughUnion is a type of Union, that doesn't check the size of it's sub-constructs. 1214it is used when we don't know before-hand the size of the sub-constructs, and the size 1215of the union as a whole. In the above example, we assume that if the union target is 1216the array of integers, then it probably record_size % 4 = 0. 1217 1218If it's not, and we build this construct from the array, then we will be a few bytes 1219short. 1220 1221=head2 Aligned 1222 1223 $s = Struct("bmp", 1224 ULInt32("width"), 1225 ULInt32("height"), 1226 Array( 1227 sub { $_->ctx->{height} }, 1228 Aligned( 1229 Array( 1230 sub { $_->ctx(2)->{width} }, 1231 Byte("index") 1232 ), 1233 4), 1234 ), 1235 ); 1236 1237Aligned make sure that the contained construct's size if dividable by $modulo. the 1238syntex is: 1239 1240 Aligned($subcon, $modulo); 1241 1242In the above example, we have an excert from the BMP parser. each pixel is a byte. 1243There is an array of lines (height) that each line is an array of pixels. each line 1244is aligned to a four bytes boundary. 1245 1246The modulo can be any number. 2, 4, 8, 7, 23. 1247 1248=head2 Terminator 1249 1250 Terminator()->parse("") 1251 1252verify that we reached the end of the stream. Not very useful, unless you are 1253processing a file and need to verify that you have reached the end 1254 1255=head2 LasyBound 1256 1257This construct is estinental for recoursive constructs. 1258 1259 $s = Struct("foo", 1260 Flag("has_next"), 1261 If(sub { $_->ctx->{has_next} }, LazyBound("next", sub { $s })), 1262 ); 1263 $data = $s->parse("\x01\x01\x01\x00"); 1264 # $data is: 1265 # { 1266 # has_next => 1, 1267 # next => { 1268 # has_next => 1, 1269 # next => { 1270 # has_next => 1, 1271 # next => { 1272 # has_next => 0, 1273 # next => undef 1274 # } 1275 # } 1276 # } 1277 # } 1278 1279=head2 Sequence 1280 1281Similar to Struct, just return an arrat reference instead of hash ref 1282 1283 $s = Sequence("foo", 1284 UBInt8("a"), 1285 UBInt16("b"), 1286 Sequence("bar", 1287 UBInt8("a"), 1288 UBInt16("b"), 1289 ) 1290 ); 1291 $data = $s->parse("ABBabb"); 1292 # $data is [ 65, 16962, [ 97, 25186 ] ] 1293 1294Be aware that not every construct works well under Sequence. For example, Value 1295will have problems on building. Using Struct is prefered. 1296 1297=head1 Depricated Constructs 1298 1299A few construct are being depricated - for the reason that while parsing 1300a binary stream, you should know before-hand what are you going to get. 1301If needed, it is possible to use Peek or Pointer to look ahead. 1302 1303These will be exported only by request, or by using the :all tag 1304 1305 use Data::ParseBinary qw{:all}; 1306 use Data::ParseBinary qw{UNInt64 OptionalGreedyRange}; 1307 1308=head2 Primitives 1309 1310The following primitives are depricated, because I don't think it's good practice 1311to declare a structure with native-order byte order. 1312What if someone will run your program in a machine with the oposite byte order? 1313 1314N stand for Platform natural 1315 1316 UNInt8 1317 SNInt8 1318 UNInt16 1319 SNInt16 1320 UNInt32 1321 SNInt32 1322 UNInt64 1323 SNInt64 1324 NFloat32 1325 NFloat64 1326 1327These will be exported only by request, or by using the :NATURALS tag 1328 1329 use Data::ParseBinary qw{:NATURALS}; 1330 1331=head2 Ranges 1332 1333Please use Array, with constant or dynamic number of elements 1334 1335 # This is an array for 3 to 7 bytes 1336 $s = Range(3, 7, UBInt8("foo")); 1337 $data = $s->parse("\x01\x02\x03"); 1338 $data = $s->parse("\x01\x02\x03\x04\x05\x06\x07\x08\x09"); 1339 # in the last example, will take only 7 bytes from the stream 1340 1341 # A range with at least one byte, unlimited 1342 $s = GreedyRange(UBInt8("foo")); 1343 1344 # A range with zero to unlimited bytes 1345 $s = OptionalGreedyRange(UBInt8("foo")); 1346 1347=head2 Optional 1348 1349Optional construct may or may not be in the stream. Of course, it need a seekable stream. 1350The optional section usually have a Const in them, that indicates is this section 1351exists. 1352 1353 my $wmf_file = Struct("wmf_file", 1354 Optional( 1355 Struct("placeable_header", 1356 Const(ULInt32("key"), 0x9AC6CDD7), 1357 ULInt16("handle"), 1358 ), 1359 ), 1360 ULInt16("version"), 1361 ULInt32("size"), # file size is in words 1362 ); 1363 1364A better way is to Peek ahead, and decide if this part exists: 1365 1366 my $wmf_file = Struct("wmf_file", 1367 Peek(ULInt32("header_key")), 1368 If(sub { $_->ctx->{header_key} == 0x9AC6CDD7 }, 1369 Struct("placeable_header", 1370 Const(ULInt32("key"), 0x9AC6CDD7), 1371 ULInt16("handle"), 1372 ), 1373 ), 1374 ULInt16("version"), 1375 ULInt32("size"), # file size is in words 1376 ); 1377 1378=head1 Streams 1379 1380Until now, everything worked in single-action. build built one construct, and parse 1381parsed one construct from one string. But suppose the string have more then one 1382construct in it? Suppose we want to write two constructs into one string? (and 1383if these constructs are in bit-mode, we can't create and just join them) 1384 1385So, anyway, we have streams. A stream is an object that let a construct read and 1386parse bytes from, or build and write bytes to. 1387 1388Please note, that some constructs can only work on seekable streams. 1389 1390=head2 String 1391 1392is seekable, not bit-stream 1393 1394This is the most basic stream. 1395 1396 $data = $s->parse("aabb"); 1397 # is equivalent to: 1398 $stream = CreateStreamReader("aabb"); 1399 $data = $s->parse($stream); 1400 # also equivalent to: 1401 $stream = CreateStreamReader(String => "aabb"); 1402 $data = $s->parse($stream); 1403 1404Being that String is the default stream type, it is not needed to specify it. 1405So, if there is a string contains two or more structs, that the following code is possible: 1406 1407 $stream = CreateStreamReader(String => $my_string); 1408 $data1 = $s1->parse($stream); 1409 $data2 = $s2->parse($stream); 1410 1411The other way is equally possible: 1412 1413 $stream = CreateStreamWriter(String => undef); 1414 $s1->build($data1); 1415 $s2->build($data2); 1416 $my_string = $stream->Flush(); 1417 1418The Flush command in Writer Stream says: finish doing whatever you do, and return 1419your internal object. For string writer it is simply return the string that it built. 1420Wrapping streams (like Bit, StringBuffer) finish whatever they are doing, flush the 1421data to the internal stream, and call Flush on that internal stream. 1422 1423The special case here is Wrap, that does not call Flush on the internal stream. 1424usefull for some configurations. 1425a Flush operation happens in the end of every build operation automatically, and 1426when a stream being destroyed. 1427 1428In creation, the following lines are equvalent: 1429 1430 $stream = CreateStreamWriter(undef); 1431 $stream = CreateStreamWriter(''); 1432 $stream = CreateStreamWriter(String => undef); 1433 $stream = CreateStreamWriter(String => ''); 1434 1435Of course, it is possible to create String Stream with inital string to append to: 1436 1437 $stream = CreateStreamWriter(String => "aabb"); 1438 1439And any sequencal build operation will append to the "aabb" string. 1440 1441=head2 StringRef 1442 1443is seekable, not bit-stream 1444 1445Mainly for cases when the string is to big to play around with. Writer: 1446 1447 my $string = ''; 1448 $stream = CreateStreamWriter(StringRef => \$string); 1449 ... do build operations ... 1450 # and now the data in $string. 1451 # or refer to: ${ $stream->Flush() } 1452 1453Because Flush returns what's inside the stream - in this case a reference to a string. 1454For Reader: 1455 1456 my $string = 'MBs of data...'; 1457 $stream = CreateStreamReader(StringRef => \$string); 1458 ... parse operations ... 1459 1460=head2 Bit 1461 1462not seekable, is bit-stream 1463 1464While every stream support bit-fields, when requesting 2 bits in non-bit-streams 1465you get these two bits, but a whole byte is consumed from the stream. In bit stream, 1466only two bits are consumed. 1467 1468When you use BitStruct construct, it actually wraps the current stream with a bit stream. 1469If the stream is already bit-stream, it continues as usual. 1470 1471What does it all have to do with you? great question. Support you have a string containing 1472a few bit structs, and each struct is aligned to a byte border. Then you can use 1473the example under the BitStruct section. 1474 1475However, if the bit structs are not aligned, but compressed one against the other, then 1476you should use: 1477 1478 $s = BitStruct("foo", 1479 Padding(1), 1480 Flag("myflag"), 1481 Padding(3), 1482 ); 1483 $inner = "\x42\0"; 1484 $stream1 = CreateStreamReader(Bit => String => $inner); 1485 $data1 = $s->parse($stream1); 1486 # data1 is { myflag => 1 } 1487 $data2 = $s->parse($stream1); 1488 # data2 is { myflag => 1 } 1489 $data3 = $s->parse($stream1); 1490 # data3 is { myflag => 0 } 1491 1492Note that the Padding constructs detects that it work on bit stream, and pad in bits 1493instead of bytes. 1494 1495On Flush the bit stream write the reminding bits (up to a byte border) as 0, 1496write the last byte to the contained stream, and call Flush on the said contained stream. 1497so, if we use the $s from the previous code section: 1498 1499 $stream1 = CreateStreamWriter(Bit => String => undef); 1500 $s->build({ myflag => 1 }, $stream1); 1501 $s->build({ myflag => 1 }, $stream1); 1502 $s->build({ myflag => 0 }, $stream1); 1503 my $result = $stream1->Flush(); 1504 # $result eq "\x40\x40\0" 1505 1506In this case each build operation did Flush on the bit stream, closing the last 1507(and only) byte. so we get three bytes, each contain one record. But if we want 1508that our constructs will be compressed each against the other, then we need 1509to protect the bit stream from the Flush command: 1510 1511 $stream1 = CreateStreamWriter(Wrap => Bit => String => undef); 1512 $s->build($data1, $stream1); 1513 $s->build($data1, $stream1); 1514 $s->build($data2, $stream1); 1515 my $result = $stream1->Flush()->Flush(); 1516 # $result eq "\x42\0"; 1517 1518Ohh. Two Flushs. one for the Wrap, one for the Bit and the String. 1519However, as you can see, the structs are packed together. The Wrap stream protects 1520the Bit stream from the Flush command in the end of every build. 1521 1522=head2 StringBuffer 1523 1524is seekable, not bit-stream 1525 1526Suppose that you have some non-seekable stream. like socket. and suppose that your 1527struct do use construct that need seekable stream. What can you do? 1528 1529Enter StringBuffer. It reads from the warped stream exactly the number of bytes 1530that the struct needs, giving the struct the option to seek inside the read section. 1531and if the struct seeks ahead - it will just read enough bytes to seek to this place. 1532 1533In writer stream, the StringBuffer will pospone writing the data to the actual stream, 1534until the Flush command. 1535 1536This warper stream is usefull only when the struct seek inside it's borders, and 1537not sporadically reads data from 30 bytes ahead / back. 1538 1539 # suppose we have unseekable reader stream names $s_stream 1540 # (for example, TCP connection) 1541 $stream1 = CreateStreamReader(StringBuffer => $s_stream); 1542 # $s is some struct that uses seek. (using Peek, for example) 1543 $data = $s->parse($stream1); 1544 # the data were read, you can either drop $stream1 or continue use 1545 # it for future parses. 1546 1547 # now suppose we have a unseekable writer strea name $w_stream 1548 $stream1 = CreateStreamWriter(StringBuffer => $w_stream); 1549 # $s is some struct that uses seek. (using Peek, for example) 1550 $s->build($data1, $stream1); 1551 # data is written into $stream1, flushed to $w_stream, and sent. 1552 1553Note that in StringBuffer, the Flush operation writes the data to the underlining 1554stream, and then Flushes that stream. 1555 1556=head2 Wrap 1557 1558A simple wraping stream, whose only function is to protect the contained stream 1559from Flush commands. Usable only for writer streams, and can be used to: 1560 15611. Protect a Bit stream, so it will compress multiple structs without byte alignment 1562(see the Bit stream documentation for example) 1563 15642. Protect a StringBuffer, so it will aggregate some structs before you will 1565Flush them all as one to the socket/file/whatever. 1566 1567=head2 File 1568 1569is seekable, not bit-stream 1570 1571Reads from / Writes to a file. it is your responsebility to open the file and binmode it. 1572 1573 open my $fh, "<", "bin_data.xdf" or die "oh sh..."; 1574 binmode $fh; 1575 $stream1 = CreateStreamReader(File => $fh); 1576 1577=head1 Format Library 1578 1579The Data::ParseBinary arrive with ever-expanding set of pre-defined parsers for popular formats. 1580Each of these parsers is in it's own module. 1581And if you have a file-format, then this is how it's done: 1582 1583 use Data::ParseBinary::Graphics::BMP qw{$bmp_parser}; 1584 open my $fh2, "<", $filename or die "can not open $filename"; 1585 binmode $fh2; 1586 $data = $bmp_parser->parse(CreateStreamReader(File => $fh2)); 1587 1588And $data will contain the parsed file. In the same way, it is possible to build a BMP file. 1589 1590Please look for the documentation inside each module, 1591as it highlights various issues with the various libraries. 1592 1593=head1 Debugging 1594 1595=head2 Output on failure 1596 1597The first line of defence is the output on error. Where did it happend? 1598in which construct? In which byte of the input? 1599 1600On error, you get the following "die" messege: 1601 1602 Got Exception not enought bytes in stream 1603 1604 Streams location: 1605 1: Stream BitReader in byte #Bit 5 1606 2: Stream StringReader in byte #2 1607 Constructs Stack: 1608 1: BitField f 1609 2: Struct bar 1610 3: BitStruct foo 1611 1612It tells me that I was inside "f" under "bar" under "foo", that it's the 1613second byte in stream, and because I was inside a BitStuct I get another 1614line for the stream, pointing me to the exact bit. 1615 1616=head2 $print_debug_info 1617 1618What we miss in the "die" messege above, is knowing how did I got there. 1619If it's inside Array, how many times it happen, and what decissions taken 1620along the way. But fear not. just set $print_debug_info: 1621 1622 $Data::ParseBinary::print_debug_info = 1; 1623 1624This will trigger a print every time the parsing process enter or exit a construct. 1625So if a parsing dies, you can follow where it did. 1626 1627=head1 TODO 1628 1629The following elements were not implemented: 1630 1631 OnDemand 1632 Reconfig and a macro Rename 1633 AlignedStruct 1634 Probe 1635 Embed 1636 Tunnel (TunnelAdapter is already implemented) 1637 1638Add documentation to: ExtractingAdapter 1639 1640Move the insertion of the parsed value to the context from the Struct/Sequence constructs 1641to each indevidual construct? 1642 1643Streams: SocketStream 1644 1645FileStreamWriter::Flush : improve. 1646 1647Ability to give the CreateStreamReader/CreateStreamWriter function an ability to reconginze 1648socket / filehandle / pointer to string. 1649 1650Union need to be extended to bit-structs? 1651 1652use some nice exception system 1653 1654Fix the Graphics-EMF library : 1655Find out if the EMF file should work or not. it fails on the statment: 1656Const(ULInt32("signature"), 0x464D4520) 1657And complain that it gets "0". 1658 1659Make BitField a meta construct? 1660 1661=head1 Thread Safety 1662 1663This is a pure perl module. there should be not problems. 1664 1665=head1 BUGS 1666 1667Currently L/BFloat64 does not work if you don't have 64 bit numbers support 1668compiled in your Perl 1669 1670=head1 SEE ALSO 1671 1672Original PyConstructs homepage: http://construct.wikispaces.com/ 1673 1674=head1 AUTHOR 1675 1676Fomberg Shmuel, E<lt>owner@semuel.co.ilE<gt> 1677 1678=head1 COPYRIGHT AND LICENSE 1679 1680Copyright 2008 by Shmuel Fomberg. 1681 1682This library is free software; you can redistribute it and/or modify 1683it under the same terms as Perl itself. 1684 1685=cut 1686