1# OpenXPKI::Serialization::Simple.pm 2# Written 2006 by Michael Bell for the OpenXPKI project 3# (C) Copyright 2006 by The OpenXPKI Project 4 5use strict; 6use warnings; 7use utf8; 8use JSON; 9use Encode; 10 11package OpenXPKI::Serialization::Legacy; 12 13use OpenXPKI::VERSION; 14our $VERSION = $OpenXPKI::VERSION::VERSION; 15 16use OpenXPKI::Exception; 17use OpenXPKI::Debug; 18use Data::Dumper; 19use MIME::Base64; 20 21sub new { 22 my $that = shift; 23 my $class = ref($that) || $that; 24 25 my $self = { 26 "SEPARATOR" => "\n", 27 }; 28 29 bless $self, $class; 30 31 my $keys = shift; 32 if ( exists $keys->{SEPARATOR} ) { 33 $self->{SEPARATOR} = $keys->{SEPARATOR}; 34 } 35 36 if ( length($self->{SEPARATOR}) != 1 ) { 37 OpenXPKI::Exception->throw ( 38 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_SEPARATOR_TOO_LONG", 39 params => { 40 SEPARATOR => $self->{SEPARATOR} 41 } 42 ); 43 } 44 if ( $self->{SEPARATOR} =~ /^[0-9]$/ ) { 45 OpenXPKI::Exception->throw ( 46 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_SEPARATOR_IS_NUMERIC", 47 params => { 48 SEPARATOR => $self->{SEPARATOR} 49 } 50 ); 51 } 52 53 return $self; 54} 55 56 57 58sub _json { 59 my $self = shift; 60 if (!$self->{JSON}) { 61 $self->{JSON} = JSON->new->allow_blessed; 62 } 63 return $self->{JSON}; 64} 65 66 67sub serialize { 68 my $self = shift; 69 my $data = shift; 70 71 return $self->__write_data($data); 72} 73 74 75sub __write_data { 76 my $self = shift; 77 my $data = shift; 78 my $msg = ""; 79 80 if ( ref $data eq "" && defined $data ) { 81 # it's a scalar 82 return $self->__write_scalar($data); 83 } 84 elsif ( ref $data eq "ARRAY" && defined $data ) { 85 # it's an array 86 return $self->__write_array($data); 87 } 88 elsif ( ref $data eq "HASH" && defined $data ) { 89 # it's a hash 90 return $self->__write_hash($data); 91 } 92 elsif ( not defined $data ) { 93 # it's an undef 94 return $self->__write_undef(); 95 } 96 elsif ( "$data" ne '' ) { 97 # it's not something of the above, but seems to have a valid 98 # stringification 99 # TODO - do we want this to rather throw an exception and clean 100 # up the code that calls the serialization not to use any objects? 101 ##! 1: 'implicit stringification of ' . ref $data . ' object' 102 return $self->__write_scalar("$data"); 103 } 104 else { 105 # data type is not supported 106 OpenXPKI::Exception->throw ( 107 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_WRITE_DATA_TYPE_NOT_SUPPORTED", 108 params => { 109 DATA => $data, 110 DATA_TYPE => ref $data, 111 } 112 ); 113 } 114 115 return $msg; 116} 117 118sub __write_scalar { 119 my $self = shift; 120 my $data = shift; 121 122 my $separator = $self->{SEPARATOR}; 123 124 my $type = "SCALAR"; 125 # encode data having control chars 126 127 if ($data =~ m{[\x00-\x09]}s) { 128 ##! 8: 'Found binary data - do base64' 129 $type = "BASE64"; 130 $data = encode_base64( $data, '' ); 131 } else { 132 Encode::_utf8_on($data); 133 } 134 135 return $type.$separator. 136 length($data).$separator. 137 $data.$separator; 138} 139 140sub __write_array { 141 my $self = shift; 142 my $data = shift; 143 my $msg = ""; 144 145 my $separator = $self->{SEPARATOR}; 146 147 for (my $i = 0; $i<scalar @{$data}; $i++) { 148 $msg .= $i.$separator. 149 $self->__write_data($data->[$i]); 150 } 151 152 return "ARRAY".$separator. 153 length($msg).$separator. 154 $msg; 155} 156 157sub __write_hash { 158 my $self = shift; 159 my $data = shift; 160 my $msg = ""; 161 162 my $separator = $self->{SEPARATOR}; 163 164 foreach my $key ( sort keys %{$data} ) { 165 $msg .= length ($key).$separator. 166 $key.$separator. 167 $self->__write_data($data->{$key}); 168 } 169 170 Encode::_utf8_on($msg); 171 172 return "HASH".$separator. 173 length ($msg).$separator. 174 $msg; 175} 176 177 178sub __write_undef { 179 my $self = shift; 180 181 my $separator = $self->{SEPARATOR}; 182 183 return "UNDEF".$separator; 184} 185 186 187sub deserialize { 188 my $self = shift; 189 my $msg = shift; 190 191 unless(defined $msg){ 192 OpenXPKI::Exception->throw ( 193 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_DESERIALIZE_NO_ARG_GIVEN" 194 ); 195 } 196 197 # Catch situations where the value is already deserialized, this can 198 # happens when the workflow context is handed over via memory 199 if ($msg && (ref $msg eq 'HASH') || (ref $msg eq 'ARRAY')) { 200 return $msg; 201 } 202 203 my $ret = $self->__read_data( $msg ); 204 205 return $ret->{data}; 206} 207 208sub __read_data { 209 my $self = shift; 210 my $msg = shift; 211 212 my $separator = $self->{SEPARATOR}; 213 214 Encode::_utf8_on($msg); 215 216 if ( $msg =~ /^(SCALAR|BASE64)$separator/ ) { 217 # it's a scalar 218 return $self->__read_scalar($msg); 219 } 220 elsif ( $msg =~ /^ARRAY$separator/ ) { 221 # it's an array 222 return $self->__read_array($msg); 223 } 224 elsif ( $msg =~ /^HASH$separator/ ) { 225 # it's a hash 226 return $self->__read_hash($msg); 227 } 228 elsif ( $msg =~ /^UNDEF$separator/ ) { 229 # it's an undef 230 return $self->__read_undef($msg); 231 } 232 elsif ( $msg =~ /^JSON$separator(.*)/ ) { 233 # it's json 234 ##! 1: 'Its json' 235 return $self->__read_json($1); 236 } 237 else { 238 # data type is not supported 239 OpenXPKI::Exception->throw ( 240 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_DATA_TYPE_NOT_SUPPORTED", 241 params => { 242 SEPARATOR => $separator, 243 MSG => $msg, 244 CALLER => [ caller(1) ], 245 } 246 ); 247 } 248 249 return $msg; 250} 251 252sub __read_scalar { 253 my $self = shift; 254 my $msg = shift; 255 256 my $separator = $self->{SEPARATOR}; 257 258 my $returnmessage = ""; 259 260 # check for correct scalar format 261 if ( not $msg =~ /^(SCALAR|BASE64)$separator[0-9]+$separator/ ) { 262 # scalar is not formatted appropriately 263 OpenXPKI::Exception->throw ( 264 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_SCALAR_FORMAT_CORRUPTED", 265 params => { 266 MSG => $msg 267 } 268 ); 269 } 270 271 # extract scalar length 272 $msg =~ /^(SCALAR|BASE64)$separator([0-9]+)$separator/; 273 my $encoding = $1; 274 my $scalarlength = $2; 275 276 # extract scalar value 277 if ( ( length($msg) - length($scalarlength) - 8 ) < $scalarlength ) { 278 # remaining msg is shorter than what would be interpreted as scalar value 279 OpenXPKI::Exception->throw ( 280 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_SCALAR_DENOTED_LENGTH_TOO_LONG", 281 params => { 282 MSG => $msg, 283 DENOTED_SCALAR_LENGTH => $scalarlength, 284 REMAINING_MSG_LENGTH => length($msg) 285 } 286 ); 287 } 288 my $scalarvalue = substr ($msg, length($scalarlength) + 8, $scalarlength); 289 290 # create return message used to extract scalar data 291 $returnmessage = "$encoding$separator$scalarlength$separator$scalarvalue$separator"; 292 293 if ($encoding eq 'BASE64') { 294 ##! 8: 'Found base64 data - decode' 295 $scalarvalue = decode_base64($scalarvalue); 296 } 297 298 299 return { 300 data => $scalarvalue, 301 returnmessage => $returnmessage 302 }; 303} 304 305sub __read_array { 306 my $self = shift; 307 my $msg = shift; 308 309 my @array = (); 310 311 my $separator = $self->{SEPARATOR}; 312 313 my $returnmessage = ""; 314 315 # read length of array 316 if ( not $msg =~ /^ARRAY$separator[0-9]+$separator/ ) { 317 # array (length of array, respectively) is not formatted appropriately 318 OpenXPKI::Exception->throw ( 319 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_ARRAY_LENGTH_FORMAT_CORRUPTED", 320 params => { 321 MSG => $msg 322 } 323 ); 324 } 325 $msg =~ /^ARRAY$separator([0-9]+)$separator/; 326 my $arraylength = $1; 327 328 # create return message used to extract array 329 $msg =~ /^(ARRAY$separator[0-9]+$separator)/; 330 $returnmessage = $1; 331 332 # isolate upcoming array elements in msg 333 $msg = substr ($msg, length($returnmessage)); 334 335 # iterate through array elements 336 while ( $arraylength > 0 ) { 337 # read array element position 338 if ( not $msg =~ /^[0-9]+$separator/ ) { 339 # array (array element position, respectively) is not formatted appropriately 340 OpenXPKI::Exception->throw ( 341 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_ARRAY_ELEMENT_POSITION_FORMAT_CORRUPTED", 342 params => { 343 MSG => $msg 344 } 345 ); 346 } 347 $msg =~ /^([0-9]+)$separator/; 348 my $arrayelementposition = $1; 349 350 # add array alement position to return message 351 $msg =~ /^([0-9]+$separator)/; 352 $returnmessage .= $1; 353 354 # cut off array element position from msg 355 $msg = substr ($msg, length($arrayelementposition)+1); 356 357 # used for consistency check at the end of the while loop 358 $arraylength -= (length($arrayelementposition)+1); 359 360 # read data 361 my $data = $self->__read_data ($msg); 362 363 # process data (write data into array) 364 push (@array, $data->{data}); 365 366 # complete return message 367 $returnmessage .= $data->{returnmessage}; 368 369 # cut off the part of msg that has already been processed 370 $msg = substr ($msg, length($data->{returnmessage})); 371 372 # used for consistency check at the end of the while loop 373 $arraylength -= (length($data->{returnmessage})); 374 } 375 376 # consistency check 377 if ( $arraylength != 0 ) { 378 # array length corrupted (this should ALWAYS be zero after successful processing) 379 OpenXPKI::Exception->throw ( 380 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_ARRAY_LENGTH_CORRUPTED", 381 params => { 382 REMAINING_ARRAY_LENGTH => $arraylength, 383 EXPECTED_REMAINING_ARRAY_LENGTH => 0 384 } 385 ); 386 } 387 388 return { 389 data => \@array, 390 returnmessage => $returnmessage 391 }; 392} 393 394sub __read_hash { 395 my $self = shift; 396 my $msg = shift; 397 398 my %hash = (); 399 400 my $separator = $self->{SEPARATOR}; 401 402 my $returnmessage = ""; 403 404 # read total length of hash 405 if ( not $msg =~ /^HASH$separator[0-9]+$separator/ ) { 406 # hash (hash length, respectively) is not formatted appropriately 407 OpenXPKI::Exception->throw ( 408 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_HASH_LENGTH_FORMAT_CORRUPTED", 409 params => { 410 MSG => $msg 411 } 412 ); 413 } 414 $msg =~ /^HASH$separator([0-9]+)$separator/; 415 my $hashlength = $1; 416 417 # create return message used to extract hash 418 $msg =~ /^(HASH$separator[0-9]+$separator)/; 419 $returnmessage = $1; 420 421 # isolate upcoming hash elements in msg 422 $msg = substr ($msg, length($returnmessage)); 423 424 # iterate through hash elements 425 while ( $hashlength > 0 ) { 426 # read length of hash key 427 if ( not $msg =~ /^[0-9]+$separator/ ) { 428 # hash (hash length, respectively) is not formatted appropriately 429 OpenXPKI::Exception->throw ( 430 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_HASH_KEY_LENGTH_FORMAT_CORRUPTED", 431 params => { 432 MSG => $msg 433 } 434 ); 435 } 436 $msg =~ /^([0-9]+)$separator/; 437 my $hashkeylength = $1; 438 439 # complete return message 440 $returnmessage .= "$hashkeylength$separator"; 441 442 # cut off hash key length from msg 443 $msg = substr ($msg, length($hashkeylength)+1); 444 445 # used for consistency check at the end of the while loop 446 $hashlength -= (length($hashkeylength)+1); 447 448 # read hash key 449 $msg =~ /^([^$separator]+)$separator/; 450 my $hashkey = $1; 451 452 # complete return message 453 $returnmessage .= "$hashkey$separator"; 454 455 # cut off hash key from msg 456 $msg = substr ($msg, length($hashkey)+1); 457 458 # used for consistency check at the end of the while loop 459 $hashlength -= (length($hashkey)+1); 460 461 # check for correct hash key length 462 if( length($hashkey) != $hashkeylength ) { 463 # actual length of hash key differs from denoted length 464 OpenXPKI::Exception->throw ( 465 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_HASH_KEY_LENGTH_CORRUPTED", 466 params => { 467 ACTUAL_LENGTH => length($hashkey), 468 DENOTED_LENGTH => $hashkeylength, 469 SCALAR_VALUE => $hashkey, 470 MSG => $msg 471 } 472 ); 473 } 474 475 # read data 476 my $data = $self->__read_data ($msg); 477 478 # process data (write data into hash) 479 $hash{$hashkey} = $data->{data}; 480 481 # complete return message 482 $returnmessage .= $data->{returnmessage}; 483 484 # cut off the part of msg that has already been processed 485 $msg = substr ($msg, length($data->{returnmessage})); 486 487 # used for consistency check at the end of the while loop 488 $hashlength -= (length($data->{returnmessage})); 489 } 490 491 # consistency check 492 if ( $hashlength != 0 ) { 493 # hash length corrupted (this should ALWAYS be zero after successful processing) 494 OpenXPKI::Exception->throw ( 495 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_HASH_LENGTH_CORRUPTED", 496 params => { 497 REMAINING_HASH_LENGTH => $hashlength, 498 EXPECTED_REMAINING_HASH_LENGTH => 0 499 } 500 ); 501 } 502 503 return { 504 data => \%hash, 505 returnmessage => $returnmessage 506 }; 507} 508 509sub __read_undef { 510 my $self = shift; 511 my $msg = shift; 512 513 my $separator = $self->{SEPARATOR}; 514 515 my $returnmessage = ""; 516 517 if ( not $msg =~ /^UNDEF$separator/ ) { 518 # undef is not formatted appropriately 519 OpenXPKI::Exception->throw ( 520 message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_UNDEF_FORMAT_CORRUPTED", 521 params => { 522 MSG => $msg 523 } 524 ); 525 } 526 527 $msg =~ /^(UNDEF$separator)/; 528 $returnmessage = $1; 529 530 return { 531 data => undef, 532 returnmessage => $returnmessage 533 }; 534} 535 536# Not used yet 537sub __read_json { 538 my $self = shift; 539 my $msg = shift; 540 541 # utf8::upgrade( $msg ); 542 ##! 4: 'json data ' . $msg 543 my $json = JSON->new()->decode( $msg ); 544 545 ##! 4: 'json decoded ' . Dumper $json 546 547 return { 548 data => $json, 549 returnmessage => $msg 550 }; 551} 552 553 554 5551; 556__END__ 557 558=head1 Name 559 560OpenXPKI::Serialization::Simple 561 562=head1 Description 563 564Really simple serialization class for scalars, arrays and hashes. 565This is a platform neutral example implementation. It mainly 566demonstrates the interface and can easily be ported to other 567scripting languages. 568 569=head1 Functions 570 571=head2 new 572 573Initializes the object. 574 575=head2 serialize 576 577Returns the serialization of data passed as argument. 578 579=head2 deserialize 580 581Returns the deserialization of data passed as argument. 582 583=head2 is_serialized (static!) 584 585This checks if a given argument is a serialized string. This method is static! 586 587=head1 Internal Functions 588 589=head2 Serialization 590 591=head3 __write_data 592 593This function returns the serialization of data passed as argument by 594calling one or more of the following functions. Each of those functions 595serializes a specific data type according to the syntax (see below). An 596exception is thrown if the data type cannot be recognized. 597 598=head3 __write_scalar 599 600=head3 __write_array 601 602=head3 __write_hash 603 604=head3 __write_undef 605 606=head2 Deserialization 607 608=head3 __read_data 609 610This function returns the deserialization of data passed as argument by 611calling one or more of the following functions. Each of those functions 612deserializes a specific data type according to the syntax (see below). An 613exception is thrown if the data type cannot be recognized. 614 615Basically, the deserialization works as follows: While scalars and undefs 616are easily deserialized upon recognition, it's a bit more tricky with arrays 617and hashes. Since they can possibly contain more (complex) data, each of the 618functions below returns two values: "$data" holds the deserialized data, and 619"$returnmessage" returns the (serialized) string that was used to deserialize 620the data. The latter value is important to keep track of which part of the 621serialized string has already been deserialized. 622 623=head3 __read_scalar 624 625=head3 __read_array 626 627=head3 __read_hash 628 629=head3 __read_undef 630 631=head1 Syntax 632 633We support scalars, array references and hash references 634in any combination. The syntax is the following one: 635 636scalar ::= 'SCALAR'.SEPARATOR. 637 [0-9]+.SEPARATOR. /* length of data */ 638 data.SEPARATOR 639 640array ::= 'ARRAY'.SEPARATOR. 641 [0-9]+.SEPARATOR. /* length of array data */ 642 array_element+ 643 644array_element ::= [0-9]+.SEPARATOR. /* position in the array */ 645 (hash|array|scalar) 646 647hash ::= 'HASH'.SEPARATOR. 648 [0-9]+.SEPARATOR. /* length of hash data */ 649 hash_element+ 650 651hash_element ::= [1-9][0-9]*.SEPARATOR. /* length of the hash key */ 652 [a-zA-Z0-9_]+.SEPARATOR. /* the hash key */ 653 (hash|array|undef|scalar) 654 655undef ::= 'UNDEF'.SEPARATOR. 656 657The SEPARATOR is one character long. It can be any non number. 658The default separator is newline. The important thing is 659that you can parse every structure without knowing the used 660SEPARATOR. 661 662Perhaps the good mathmatics notice that the last SEPARATOR 663in the definition of a scalar is not necessary. This SEPARATOR 664is only used to make the resulting structure better readable 665for humans. 666