1# OpenXPKI::Serialization::Simple.pm
2# Written 2006 by Michael Bell for the OpenXPKI project
3# (C) Copyright 2006 by The OpenXPKI Project
4
5use strict;
6use warnings;
7use utf8;
8use JSON;
9use Encode;
10
11package OpenXPKI::Serialization::Legacy;
12
13use OpenXPKI::VERSION;
14our $VERSION = $OpenXPKI::VERSION::VERSION;
15
16use OpenXPKI::Exception;
17use OpenXPKI::Debug;
18use Data::Dumper;
19use MIME::Base64;
20
21sub new {
22    my $that = shift;
23    my $class = ref($that) || $that;
24
25    my $self = {
26        "SEPARATOR" => "\n",
27    };
28
29    bless $self, $class;
30
31    my $keys = shift;
32    if ( exists $keys->{SEPARATOR} ) {
33        $self->{SEPARATOR} = $keys->{SEPARATOR};
34    }
35
36    if ( length($self->{SEPARATOR}) != 1 ) {
37        OpenXPKI::Exception->throw (
38            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_SEPARATOR_TOO_LONG",
39            params  => {
40                SEPARATOR => $self->{SEPARATOR}
41            }
42        );
43    }
44    if ( $self->{SEPARATOR} =~ /^[0-9]$/ ) {
45        OpenXPKI::Exception->throw (
46            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_SEPARATOR_IS_NUMERIC",
47            params  => {
48                SEPARATOR => $self->{SEPARATOR}
49            }
50        );
51    }
52
53    return $self;
54}
55
56
57
58sub _json {
59    my $self = shift;
60    if (!$self->{JSON}) {
61        $self->{JSON} = JSON->new->allow_blessed;
62    }
63    return $self->{JSON};
64}
65
66
67sub serialize {
68    my $self = shift;
69    my $data = shift;
70
71    return $self->__write_data($data);
72}
73
74
75sub __write_data {
76    my $self = shift;
77    my $data = shift;
78    my $msg  = "";
79
80    if ( ref $data eq "" && defined $data ) {
81        # it's a scalar
82        return $self->__write_scalar($data);
83    }
84    elsif ( ref $data eq "ARRAY" && defined $data ) {
85        # it's an array
86        return $self->__write_array($data);
87    }
88    elsif ( ref $data eq "HASH" && defined $data ) {
89        # it's a hash
90        return $self->__write_hash($data);
91    }
92    elsif ( not defined $data ) {
93        # it's an undef
94        return $self->__write_undef();
95    }
96    elsif ( "$data" ne '' ) {
97        # it's not something of the above, but seems to have a valid
98        # stringification
99        # TODO - do we want this to rather throw an exception and clean
100        # up the code that calls the serialization not to use any objects?
101        ##! 1: 'implicit stringification of ' . ref $data . ' object'
102        return $self->__write_scalar("$data");
103    }
104    else {
105        # data type is not supported
106        OpenXPKI::Exception->throw (
107            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_WRITE_DATA_TYPE_NOT_SUPPORTED",
108            params  => {
109                DATA      => $data,
110                DATA_TYPE => ref $data,
111            }
112        );
113    }
114
115    return $msg;
116}
117
118sub __write_scalar {
119    my $self = shift;
120    my $data = shift;
121
122    my $separator = $self->{SEPARATOR};
123
124    my $type = "SCALAR";
125    # encode data having control chars
126
127    if ($data =~ m{[\x00-\x09]}s) {
128        ##! 8: 'Found binary data - do base64'
129        $type = "BASE64";
130        $data  = encode_base64( $data, '' );
131    } else {
132        Encode::_utf8_on($data);
133    }
134
135    return $type.$separator.
136           length($data).$separator.
137           $data.$separator;
138}
139
140sub __write_array {
141    my $self = shift;
142    my $data = shift;
143    my $msg  = "";
144
145    my $separator = $self->{SEPARATOR};
146
147    for (my $i = 0; $i<scalar @{$data}; $i++) {
148        $msg .= $i.$separator.
149                $self->__write_data($data->[$i]);
150    }
151
152    return "ARRAY".$separator.
153           length($msg).$separator.
154           $msg;
155}
156
157sub __write_hash {
158    my $self = shift;
159    my $data = shift;
160    my $msg  = "";
161
162    my $separator = $self->{SEPARATOR};
163
164    foreach my $key ( sort keys %{$data} ) {
165        $msg .= length ($key).$separator.
166                $key.$separator.
167                $self->__write_data($data->{$key});
168    }
169
170    Encode::_utf8_on($msg);
171
172    return "HASH".$separator.
173           length ($msg).$separator.
174           $msg;
175}
176
177
178sub __write_undef {
179    my $self = shift;
180
181    my $separator = $self->{SEPARATOR};
182
183    return "UNDEF".$separator;
184}
185
186
187sub deserialize {
188    my $self = shift;
189    my $msg  = shift;
190
191    unless(defined $msg){
192        OpenXPKI::Exception->throw (
193            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_DESERIALIZE_NO_ARG_GIVEN"
194        );
195    }
196
197    # Catch situations where the value is already deserialized, this can
198    # happens when the workflow context is handed over via memory
199    if ($msg && (ref $msg eq 'HASH') || (ref $msg eq 'ARRAY')) {
200        return $msg;
201    }
202
203    my $ret = $self->__read_data( $msg );
204
205    return $ret->{data};
206}
207
208sub __read_data {
209    my $self = shift;
210    my $msg  = shift;
211
212    my $separator = $self->{SEPARATOR};
213
214    Encode::_utf8_on($msg);
215
216    if ( $msg =~ /^(SCALAR|BASE64)$separator/ ) {
217        # it's a scalar
218        return $self->__read_scalar($msg);
219    }
220    elsif ( $msg =~ /^ARRAY$separator/ ) {
221        # it's an array
222        return $self->__read_array($msg);
223    }
224    elsif ( $msg =~ /^HASH$separator/ ) {
225        # it's a hash
226        return $self->__read_hash($msg);
227    }
228    elsif ( $msg =~ /^UNDEF$separator/ ) {
229        # it's an undef
230        return $self->__read_undef($msg);
231    }
232    elsif ( $msg =~ /^JSON$separator(.*)/ ) {
233        # it's json
234        ##! 1: 'Its json'
235        return $self->__read_json($1);
236    }
237    else {
238        # data type is not supported
239        OpenXPKI::Exception->throw (
240            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_DATA_TYPE_NOT_SUPPORTED",
241            params  => {
242                SEPARATOR => $separator,
243                MSG => $msg,
244                CALLER => [ caller(1) ],
245            }
246        );
247    }
248
249    return $msg;
250}
251
252sub __read_scalar {
253    my $self   = shift;
254    my $msg    = shift;
255
256    my $separator = $self->{SEPARATOR};
257
258    my $returnmessage = "";
259
260    # check for correct scalar format
261    if ( not $msg =~ /^(SCALAR|BASE64)$separator[0-9]+$separator/ ) {
262        # scalar is not formatted appropriately
263        OpenXPKI::Exception->throw (
264             message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_SCALAR_FORMAT_CORRUPTED",
265             params  => {
266                 MSG => $msg
267             }
268        );
269    }
270
271    # extract scalar length
272    $msg =~ /^(SCALAR|BASE64)$separator([0-9]+)$separator/;
273    my $encoding = $1;
274    my $scalarlength = $2;
275
276    # extract scalar value
277    if ( ( length($msg) - length($scalarlength) - 8 ) < $scalarlength ) {
278        # remaining msg is shorter than what would be interpreted as scalar value
279        OpenXPKI::Exception->throw (
280            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_SCALAR_DENOTED_LENGTH_TOO_LONG",
281            params  => {
282                MSG => $msg,
283                DENOTED_SCALAR_LENGTH => $scalarlength,
284                REMAINING_MSG_LENGTH  => length($msg)
285            }
286        );
287    }
288    my $scalarvalue = substr ($msg, length($scalarlength) + 8, $scalarlength);
289
290    # create return message used to extract scalar data
291    $returnmessage = "$encoding$separator$scalarlength$separator$scalarvalue$separator";
292
293    if ($encoding eq 'BASE64') {
294       ##! 8: 'Found base64 data - decode'
295       $scalarvalue = decode_base64($scalarvalue);
296    }
297
298
299    return {
300        data          => $scalarvalue,
301        returnmessage => $returnmessage
302    };
303}
304
305sub __read_array {
306    my $self  = shift;
307    my $msg   = shift;
308
309    my @array = ();
310
311    my $separator = $self->{SEPARATOR};
312
313    my $returnmessage = "";
314
315    # read length of array
316    if ( not $msg =~ /^ARRAY$separator[0-9]+$separator/ ) {
317        # array (length of array, respectively) is not formatted appropriately
318        OpenXPKI::Exception->throw (
319            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_ARRAY_LENGTH_FORMAT_CORRUPTED",
320            params  => {
321                MSG => $msg
322            }
323        );
324    }
325    $msg =~ /^ARRAY$separator([0-9]+)$separator/;
326    my $arraylength = $1;
327
328    # create return message used to extract array
329    $msg =~ /^(ARRAY$separator[0-9]+$separator)/;
330    $returnmessage = $1;
331
332    # isolate upcoming array elements in msg
333    $msg = substr ($msg, length($returnmessage));
334
335    # iterate through array elements
336    while ( $arraylength > 0 ) {
337        # read array element position
338        if ( not $msg =~ /^[0-9]+$separator/ ) {
339            # array (array element position, respectively) is not formatted appropriately
340            OpenXPKI::Exception->throw (
341                message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_ARRAY_ELEMENT_POSITION_FORMAT_CORRUPTED",
342                params  => {
343                    MSG => $msg
344                }
345            );
346        }
347        $msg =~ /^([0-9]+)$separator/;
348        my $arrayelementposition = $1;
349
350        # add array alement position to return message
351        $msg =~ /^([0-9]+$separator)/;
352        $returnmessage .= $1;
353
354        # cut off array element position from msg
355        $msg = substr ($msg, length($arrayelementposition)+1);
356
357        # used for consistency check at the end of the while loop
358        $arraylength -= (length($arrayelementposition)+1);
359
360        # read data
361        my $data = $self->__read_data ($msg);
362
363        # process data (write data into array)
364        push (@array, $data->{data});
365
366        # complete return message
367        $returnmessage .= $data->{returnmessage};
368
369        # cut off the part of msg that has already been processed
370        $msg = substr ($msg, length($data->{returnmessage}));
371
372        # used for consistency check at the end of the while loop
373        $arraylength -= (length($data->{returnmessage}));
374    }
375
376    # consistency check
377    if ( $arraylength != 0 ) {
378         # array length corrupted (this should ALWAYS be zero after successful processing)
379         OpenXPKI::Exception->throw (
380             message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_ARRAY_LENGTH_CORRUPTED",
381             params  => {
382                 REMAINING_ARRAY_LENGTH          => $arraylength,
383                 EXPECTED_REMAINING_ARRAY_LENGTH => 0
384             }
385         );
386    }
387
388    return {
389        data           => \@array,
390        returnmessage  => $returnmessage
391    };
392}
393
394sub __read_hash {
395    my $self = shift;
396    my $msg  = shift;
397
398    my %hash = ();
399
400    my $separator = $self->{SEPARATOR};
401
402    my $returnmessage = "";
403
404    # read total length of hash
405    if ( not $msg =~ /^HASH$separator[0-9]+$separator/ ) {
406        # hash (hash length, respectively) is not formatted appropriately
407        OpenXPKI::Exception->throw (
408            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_HASH_LENGTH_FORMAT_CORRUPTED",
409            params  => {
410                MSG => $msg
411            }
412        );
413    }
414    $msg =~ /^HASH$separator([0-9]+)$separator/;
415    my $hashlength = $1;
416
417    # create return message used to extract hash
418    $msg =~ /^(HASH$separator[0-9]+$separator)/;
419    $returnmessage = $1;
420
421    # isolate upcoming hash elements in msg
422    $msg = substr ($msg, length($returnmessage));
423
424    # iterate through hash elements
425    while ( $hashlength > 0 ) {
426        # read length of hash key
427        if ( not $msg =~ /^[0-9]+$separator/ ) {
428            # hash (hash length, respectively) is not formatted appropriately
429            OpenXPKI::Exception->throw (
430                message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_HASH_KEY_LENGTH_FORMAT_CORRUPTED",
431                params  => {
432                    MSG => $msg
433                }
434            );
435        }
436        $msg =~ /^([0-9]+)$separator/;
437        my $hashkeylength = $1;
438
439        # complete return message
440        $returnmessage .= "$hashkeylength$separator";
441
442        # cut off hash key length from msg
443        $msg = substr ($msg, length($hashkeylength)+1);
444
445        # used for consistency check at the end of the while loop
446        $hashlength -= (length($hashkeylength)+1);
447
448        # read hash key
449        $msg =~ /^([^$separator]+)$separator/;
450        my $hashkey = $1;
451
452        # complete return message
453        $returnmessage .= "$hashkey$separator";
454
455        # cut off hash key from msg
456        $msg = substr ($msg, length($hashkey)+1);
457
458        # used for consistency check at the end of the while loop
459        $hashlength -= (length($hashkey)+1);
460
461        # check for correct hash key length
462        if( length($hashkey) != $hashkeylength ) {
463            # actual length of hash key differs from denoted length
464            OpenXPKI::Exception->throw (
465                message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_HASH_KEY_LENGTH_CORRUPTED",
466                params  => {
467                    ACTUAL_LENGTH  => length($hashkey),
468                    DENOTED_LENGTH => $hashkeylength,
469                    SCALAR_VALUE   => $hashkey,
470            MSG            => $msg
471                }
472            );
473        }
474
475        # read data
476        my $data = $self->__read_data ($msg);
477
478        # process data (write data into hash)
479        $hash{$hashkey} = $data->{data};
480
481        # complete return message
482        $returnmessage .= $data->{returnmessage};
483
484        # cut off the part of msg that has already been processed
485        $msg = substr ($msg, length($data->{returnmessage}));
486
487        # used for consistency check at the end of the while loop
488        $hashlength -= (length($data->{returnmessage}));
489    }
490
491    # consistency check
492    if ( $hashlength != 0 ) {
493        # hash length corrupted (this should ALWAYS be zero after successful processing)
494        OpenXPKI::Exception->throw (
495            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_HASH_LENGTH_CORRUPTED",
496            params  => {
497                REMAINING_HASH_LENGTH          => $hashlength,
498                EXPECTED_REMAINING_HASH_LENGTH => 0
499            }
500        );
501    }
502
503    return {
504        data           => \%hash,
505        returnmessage  => $returnmessage
506    };
507}
508
509sub __read_undef {
510    my $self   = shift;
511    my $msg    = shift;
512
513    my $separator = $self->{SEPARATOR};
514
515    my $returnmessage = "";
516
517    if ( not $msg =~ /^UNDEF$separator/ ) {
518        # undef is not formatted appropriately
519        OpenXPKI::Exception->throw (
520            message => "I18N_OPENXPKI_SERIALIZATION_SIMPLE_READ_UNDEF_FORMAT_CORRUPTED",
521            params  => {
522                MSG => $msg
523            }
524        );
525    }
526
527    $msg =~ /^(UNDEF$separator)/;
528    $returnmessage = $1;
529
530    return {
531        data           => undef,
532        returnmessage  => $returnmessage
533    };
534}
535
536# Not used yet
537sub __read_json {
538    my $self   = shift;
539    my $msg    = shift;
540
541    # utf8::upgrade( $msg );
542    ##! 4: 'json data ' . $msg
543    my $json = JSON->new()->decode( $msg );
544
545    ##! 4: 'json decoded ' . Dumper $json
546
547    return {
548        data           => $json,
549        returnmessage  => $msg
550    };
551}
552
553
554
5551;
556__END__
557
558=head1 Name
559
560OpenXPKI::Serialization::Simple
561
562=head1 Description
563
564Really simple serialization class for scalars, arrays and hashes.
565This is a platform neutral example implementation. It mainly
566demonstrates the interface and can easily be ported to other
567scripting languages.
568
569=head1 Functions
570
571=head2 new
572
573Initializes the object.
574
575=head2 serialize
576
577Returns the serialization of data passed as argument.
578
579=head2 deserialize
580
581Returns the deserialization of data passed as argument.
582
583=head2 is_serialized (static!)
584
585This checks if a given argument is a serialized string. This method is static!
586
587=head1 Internal Functions
588
589=head2 Serialization
590
591=head3 __write_data
592
593This function returns the serialization of data passed as argument by
594calling one or more of the following functions. Each of those functions
595serializes a specific data type according to the syntax (see below). An
596exception is thrown if the data type cannot be recognized.
597
598=head3 __write_scalar
599
600=head3 __write_array
601
602=head3 __write_hash
603
604=head3 __write_undef
605
606=head2 Deserialization
607
608=head3 __read_data
609
610This function returns the deserialization of data passed as argument by
611calling one or more of the following functions. Each of those functions
612deserializes a specific data type according to the syntax (see below). An
613exception is thrown if the data type cannot be recognized.
614
615Basically, the deserialization works as follows: While scalars and undefs
616are easily deserialized upon recognition, it's a bit more tricky with arrays
617and hashes. Since they can possibly contain more (complex) data, each of the
618functions below returns two values: "$data" holds the deserialized data, and
619"$returnmessage" returns the (serialized) string that was used to deserialize
620the data. The latter value is important to keep track of which part of the
621serialized string has already been deserialized.
622
623=head3 __read_scalar
624
625=head3 __read_array
626
627=head3 __read_hash
628
629=head3 __read_undef
630
631=head1 Syntax
632
633We support scalars, array references and hash references
634in any combination. The syntax is the following one:
635
636scalar        ::= 'SCALAR'.SEPARATOR.
637                  [0-9]+.SEPARATOR. /* length of data */
638                  data.SEPARATOR
639
640array         ::= 'ARRAY'.SEPARATOR.
641                  [0-9]+.SEPARATOR. /* length of array data */
642                  array_element+
643
644array_element ::= [0-9]+.SEPARATOR. /* position in the array */
645                  (hash|array|scalar)
646
647hash          ::= 'HASH'.SEPARATOR.
648                  [0-9]+.SEPARATOR. /* length of hash data */
649                  hash_element+
650
651hash_element  ::= [1-9][0-9]*.SEPARATOR.    /* length of the hash key */
652                  [a-zA-Z0-9_]+.SEPARATOR.  /* the hash key */
653                  (hash|array|undef|scalar)
654
655undef         ::= 'UNDEF'.SEPARATOR.
656
657The SEPARATOR is one character long. It can be any non number.
658The default separator is newline. The important thing is
659that you can parse every structure without knowing the used
660SEPARATOR.
661
662Perhaps the good mathmatics notice that the last SEPARATOR
663in the definition of a scalar is not necessary. This SEPARATOR
664is only used to make the resulting structure better readable
665for humans.
666