1# Code in the PDF::API2::Basic::PDF namespace was originally copied from the
2# Text::PDF distribution.
3#
4# Copyright Martin Hosken <Martin_Hosken@sil.org>
5#
6# Martin Hosken's code may be used under the terms of the MIT license.
7# Subsequent versions of the code have the same license as PDF::API2.
8
9package PDF::API2::Basic::PDF::File;
10
11use strict;
12
13our $VERSION = '2.042'; # VERSION
14
15=head1 NAME
16
17PDF::API2::Basic::PDF::File - Low-level PDF file access
18
19=head1 SYNOPSIS
20
21 $p = PDF::API2::Basic::PDF::File->open("filename.pdf", 1);
22 $p->new_obj($obj_ref);
23 $p->free_obj($obj_ref);
24 $p->append_file;
25 $p->close_file;
26 $p->release;       # IMPORTANT!
27
28=head1 DESCRIPTION
29
30This class keeps track of the directory aspects of a PDF file. There are two
31parts to the directory: the main directory object which is the parent to all
32other objects and a chain of cross-reference tables and corresponding trailer
33dictionaries starting with the main directory object.
34
35=head1 INSTANCE VARIABLES
36
37Within this class hierarchy, rather than making everything visible via methods,
38which would be a lot of work, there are various instance variables which are
39accessible via associative array referencing. To distinguish instance variables
40from content variables (which may come from the PDF content itself), each such
41variable will start with a space.
42
43Variables which do not start with a space directly reflect elements in a PDF
44dictionary. In the case of a PDF::API2::Basic::PDF::File, the elements reflect those in the
45trailer dictionary.
46
47Since some variables are not designed for class users to access, variables are
48marked in the documentation with (R) to indicate that such an entry should only
49be used as read-only information. (P) indicates that the information is private
50and not designed for user use at all, but is included in the documentation for
51completeness and to ensure that nobody else tries to use it.
52
53=over
54
55=item newroot
56
57This variable allows the user to create a new root entry to occur in the trailer
58dictionary which is output when the file is written or appended. If you wish to
59over-ride the root element in the dictionary you have, use this entry to indicate
60that without losing the current Root entry. Notice that newroot should point to
61a PDF level object and not just to a dictionary which does not have object status.
62
63=item INFILE (R)
64
65Contains the filehandle used to read this information into this PDF directory. Is
66an IO object.
67
68=item fname (R)
69
70This is the filename which is reflected by INFILE, or the original IO object passed
71in.
72
73=item update (R)
74
75This indicates that the read file has been opened for update and that at some
76point, $p->appendfile() can be called to update the file with the changes that
77have been made to the memory representation.
78
79=item maxobj (R)
80
81Contains the first usable object number above any that have already appeared
82in the file so far.
83
84=item outlist (P)
85
86This is a list of Objind which are to be output when the next appendfile or outfile
87occurs.
88
89=item firstfree (P)
90
91Contains the first free object in the free object list. Free objects are removed
92from the front of the list and added to the end.
93
94=item lastfree (P)
95
96Contains the last free object in the free list. It may be the same as the firstfree
97if there is only one free object.
98
99=item objcache (P)
100
101All objects are held in the cache to ensure that a system only has one occurrence of
102each object. In effect, the objind class acts as a container type class to hold the
103PDF object structure and it would be unfortunate if there were two identical
104place-holders floating around a system.
105
106=item epos (P)
107
108The end location of the read-file.
109
110=back
111
112Each trailer dictionary contains a number of private instance variables which
113hold the chain together.
114
115=over
116
117=item loc (P)
118
119Contains the location of the start of the cross-reference table preceding the
120trailer.
121
122=item xref (P)
123
124Contains an anonymous array of each cross-reference table entry.
125
126=item prev (P)
127
128A reference to the previous table. Note this differs from the Prev entry which
129is in PDF which contains the location of the previous cross-reference table.
130
131=back
132
133=head1 METHODS
134
135=cut
136
137use Scalar::Util qw(blessed weaken);
138
139use vars qw($cr $irreg_char $reg_char $ws_char $delim_char %types);
140
141$ws_char = '[ \t\r\n\f\0]';
142$delim_char = '[][<>{}()/%]';
143$reg_char = '[^][<>{}()/% \t\r\n\f\0]';
144$irreg_char = '[][<>{}()/% \t\r\n\f\0]';
145$cr = '\s*(?:\015|\012|(?:\015\012))';
146
147my $re_comment = qr/(?:\%[^\r\n]*)/;
148my $re_whitespace = qr/(?:[ \t\r\n\f\0]|$re_comment)/;
149
150%types = (
151    'Page'  => 'PDF::API2::Basic::PDF::Page',
152    'Pages' => 'PDF::API2::Basic::PDF::Pages',
153);
154
155my $readDebug = 0;
156
157use Carp;
158use IO::File;
159
160# Now for the basic PDF types
161use PDF::API2::Basic::PDF::Utils;
162
163use PDF::API2::Basic::PDF::Array;
164use PDF::API2::Basic::PDF::Bool;
165use PDF::API2::Basic::PDF::Dict;
166use PDF::API2::Basic::PDF::Name;
167use PDF::API2::Basic::PDF::Number;
168use PDF::API2::Basic::PDF::Objind;
169use PDF::API2::Basic::PDF::String;
170use PDF::API2::Basic::PDF::Page;
171use PDF::API2::Basic::PDF::Pages;
172use PDF::API2::Basic::PDF::Null;
173use POSIX qw(ceil floor);
174
175no warnings qw[ deprecated recursion uninitialized ];
176
177
178=head2 PDF::API2::Basic::PDF::File->new
179
180Creates a new, empty file object which can act as the host to other PDF objects.
181Since there is no file associated with this object, it is assumed that the
182object is created in readiness for creating a new PDF file.
183
184=cut
185
186sub new {
187    my ($class, $root) = @_;
188    my $self = $class->_new();
189
190    unless ($root) {
191        $root = PDFDict();
192        $root->{'Type'} = PDFName('Catalog');
193    }
194    $self->new_obj($root);
195    $self->{'Root'} = $root;
196    return $self;
197}
198
199
200=head2 $p = PDF::API2::Basic::PDF::File->open($filename, $update)
201
202Opens the file and reads all the trailers and cross reference tables to build
203a complete directory of objects.
204
205$update specifies whether this file is being opened for updating and editing,
206or simply to be read.
207
208$filename may be an IO object
209
210=cut
211
212sub open {
213    my ($class, $filename, $update) = @_;
214    my ($fh, $buffer);
215
216    my $self = $class->_new();
217    if (ref $filename) {
218        $self->{' INFILE'} = $filename;
219        if ($update) {
220            $self->{' update'} = 1;
221            $self->{' OUTFILE'} = $filename;
222        }
223        $fh = $filename;
224    }
225    else {
226        die "File '$filename' does not exist"  unless -f $filename;
227        die "File '$filename' is not readable" unless -r $filename;
228        if ($update) {
229            die "File '$filename' is not writable" unless -w $filename;
230        }
231        $fh = IO::File->new(($update ? '+' : '') . "<$filename")
232            || die "Error opening '$filename': $!";
233        $self->{' INFILE'} = $fh;
234        if ($update) {
235            $self->{' update'} = 1;
236            $self->{' OUTFILE'} = $fh;
237            $self->{' fname'} = $filename;
238        }
239    }
240    binmode $fh, ':raw';
241    $fh->seek(0, 0);            # go to start of file
242    $fh->read($buffer, 255);
243    unless ($buffer =~ /^\%PDF\-([12]\.\d+)\s*$cr/m) {
244        croak "$filename does not appear to be a valid PDF";
245    }
246    $self->{' version'} = $1;
247
248    $fh->seek(0, 2);            # go to end of file
249    my $end = $fh->tell();
250    $self->{' epos'} = $end;
251    foreach my $offset (1..64) {
252    	$fh->seek($end - 16 * $offset, 0);
253    	$fh->read($buffer, 16 * $offset);
254    	last if $buffer =~ m/startxref($cr|\s*)\d+($cr|\s*)\%\%eof.*?/i;
255    }
256    unless ($buffer =~ m/startxref[^\d]+([0-9]+)($cr|\s*)\%\%eof.*?/i) {
257        die "Malformed PDF file $filename";
258    }
259    my $xpos = $1;
260    $self->{' xref_position'} = $xpos;
261
262    my $tdict = $self->readxrtr($xpos, $self);
263    foreach my $key (keys %$tdict) {
264        $self->{$key} = $tdict->{$key};
265    }
266    return $self;
267}
268
269=head2 $p->version($version)
270
271Gets/sets the PDF version (e.g. 1.4)
272
273=cut
274
275sub version {
276    my $self = shift();
277
278    if (@_) {
279        my $version = shift();
280        croak "Invalid version $version" unless $version =~ /^([12]\.[0-9]+)$/;
281        $self->header_version($version);
282        if ($version >= 1.4) {
283            $self->trailer_version($version);
284        }
285        else {
286            delete $self->{'Root'}->{'Version'};
287            $self->out_obj($self->{'Root'});
288        }
289        return $version;
290    }
291
292    my $header_version = $self->header_version();
293    my $trailer_version = $self->trailer_version();
294    return $trailer_version if $trailer_version > $header_version;
295    return $header_version;
296}
297
298=head2 $version = $p->header_version($version)
299
300Gets/sets the PDF version stored in the file header.
301
302=cut
303
304sub header_version {
305    my $self = shift();
306
307    if (@_) {
308        my $version = shift();
309        croak "Invalid version $version" unless $version =~ /^([12]\.[0-9]+)$/;
310        $self->{' version'} = $version;
311    }
312
313    return $self->{' version'};
314}
315
316=head2 $version = $p->trailer_version($version)
317
318Gets/sets the PDF version stored in the document catalog.
319
320=cut
321
322sub trailer_version {
323    my $self = shift();
324
325    if (@_) {
326        my $version = shift();
327        croak "Invalid version $version" unless $version =~ /^([12]\.[0-9]+)$/;
328        $self->{'Root'}->{'Version'} = PDFName($version);
329        $self->out_obj($self->{'Root'});
330        return $version;
331    }
332
333    return unless $self->{'Root'}->{'Version'};
334    $self->{'Root'}->{'Version'}->realise();
335    return $self->{'Root'}->{'Version'}->val();
336}
337
338=head2 $prev_version = $p->require_version($version)
339
340Ensures that the PDF version is at least C<$version>.
341
342=cut
343
344sub require_version {
345    my ($self, $min_version) = @_;
346    my $current_version = $self->version();
347    $self->version($min_version) if $current_version < $min_version;
348    return $current_version;
349}
350
351=head2 $p->release()
352
353Releases ALL of the memory used by the PDF document and all of its
354component objects.  After calling this method, do B<NOT> expect to
355have anything left in the C<PDF::API2::Basic::PDF::File> object (so if
356you need to save, be sure to do it before calling this method).
357
358B<NOTE>, that it is important that you call this method on any
359C<PDF::API2::Basic::PDF::File> object when you wish to destruct it and
360free up its memory.  Internally, PDF files have an enormous number of
361cross-references and this causes circular references within the
362internal data structures.  Calling 'C<release()>' forces a brute-force
363cleanup of the data structures, freeing up all of the memory.  Once
364you've called this method, though, don't expect to be able to do
365anything else with the C<PDF::API2::Basic::PDF::File> object; it'll
366have B<no> internal state whatsoever.
367
368=cut
369
370# Maintainer's Question: Couldn't this be handled by a DESTROY method
371# instead of requiring an explicit call to release()?
372sub release {
373    my $self = shift();
374
375    return $self unless ref($self);
376    my @tofree = values %$self;
377
378    foreach my $key (keys %$self) {
379        $self->{$key} = undef;
380        delete $self->{$key};
381    }
382
383    # PDFs with highly-interconnected page trees or outlines can hit Perl's
384    # recursion limit pretty easily, so disable the warning for this specific
385    # loop.
386    no warnings 'recursion';
387
388    while (my $item = shift @tofree) {
389        if (blessed($item) and $item->can('release')) {
390            $item->release(1);
391        }
392        elsif (ref($item) eq 'ARRAY') {
393            push @tofree, @$item;
394        }
395        elsif (ref($item) eq 'HASH') {
396            push @tofree, values %$item;
397            foreach my $key (keys %$item) {
398                $item->{$key} = undef;
399                delete $item->{$key};
400            }
401        }
402        else {
403            $item = undef;
404        }
405    }
406}
407
408=head2 $p->append_file()
409
410Appends the objects for output to the read file and then appends the appropriate table.
411
412=cut
413
414sub append_file {
415    my $self = shift();
416    return unless $self->{' update'};
417
418    my $fh = $self->{' INFILE'};
419
420    my $tdict = PDFDict();
421    $tdict->{'Prev'} = PDFNum($self->{' loc'});
422    $tdict->{'Info'} = $self->{'Info'};
423    if (defined $self->{' newroot'}) {
424        $tdict->{'Root'} = $self->{' newroot'};
425    }
426    else {
427        $tdict->{'Root'} = $self->{'Root'};
428    }
429    $tdict->{'Size'} = $self->{'Size'};
430
431    foreach my $key (grep { $_ !~ m/^\s/ } keys %$self) {
432        $tdict->{$key} = $self->{$key} unless defined $tdict->{$key};
433    }
434
435    $fh->seek($self->{' epos'}, 0);
436    $self->out_trailer($tdict, $self->{' update'});
437    close $self->{' OUTFILE'};
438}
439
440
441=head2 $p->out_file($fname)
442
443Writes a PDF file to a file of the given filename based on the current list of
444objects to be output. It creates the trailer dictionary based on information
445in $self.
446
447$fname may be an IO object;
448
449=cut
450
451sub out_file {
452    my ($self, $fname) = @_;
453
454    $self->create_file($fname);
455    $self->close_file();
456    return $self;
457}
458
459
460=head2 $p->create_file($fname)
461
462Creates a new output file (no check is made of an existing open file) of
463the given filename or IO object. Note, make sure that $p->{' version'} is set
464correctly before calling this function.
465
466=cut
467
468sub create_file {
469    my ($self, $filename) = @_;
470    my $fh;
471
472    $self->{' fname'} = $filename;
473    if (ref $filename) {
474        $fh = $filename;
475    }
476    else {
477        $fh = IO::File->new(">$filename") || die "Unable to open $filename for writing";
478        binmode($fh,':raw');
479    }
480
481    $self->{' OUTFILE'} = $fh;
482    $fh->print('%PDF-' . ($self->{' version'} // '1.4') . "\n");
483    $fh->print("%\xC6\xCD\xCD\xB5\n");   # and some binary stuff in a comment
484    return $self;
485}
486
487
488=head2 $p->clone_file($fname)
489
490Creates a copy of the input file at the specified filename and sets it as the
491output file for future writes.  A file handle may be passed instead of a
492filename.
493
494=cut
495
496sub clone_file {
497    my ($self, $filename) = @_;
498    my $fh;
499
500    $self->{' fname'} = $filename;
501    if (ref $filename) {
502        $fh = $filename;
503    }
504    else {
505        $fh = IO::File->new(">$filename") || die "Unable to open $filename for writing";
506        binmode($fh,':raw');
507    }
508
509    $self->{' OUTFILE'} = $fh;
510
511    my $in = $self->{' INFILE'};
512    $in->seek(0, 0);
513    my $data;
514    while (not $in->eof()) {
515        $in->read($data, 1024 * 1024);
516        $fh->print($data);
517    }
518    return $self;
519}
520
521=head2 $p->close_file
522
523Closes up the open file for output by outputting the trailer etc.
524
525=cut
526
527sub close_file {
528    my $self = shift();
529
530    my $tdict = PDFDict();
531    $tdict->{'Info'} = $self->{'Info'} if defined $self->{'Info'};
532    $tdict->{'Root'} = (defined $self->{' newroot'} and $self->{' newroot'} ne '') ? $self->{' newroot'} : $self->{'Root'};
533
534    # remove all freed objects from the outlist, AND the outlist_cache if not updating
535    # NO! Don't do that thing! In fact, let out_trailer do the opposite!
536
537    $tdict->{'Size'} = $self->{'Size'} || PDFNum(1);
538    $tdict->{'Prev'} = PDFNum($self->{' loc'}) if $self->{' loc'};
539    if ($self->{' update'}) {
540        foreach my $key (grep { $_ !~ m/^[\s\-]/ } keys %$self) {
541            $tdict->{$key} = $self->{$key} unless defined $tdict->{$key};
542        }
543
544        my $fh = $self->{' INFILE'};
545        $fh->seek($self->{' epos'}, 0);
546    }
547
548    $self->out_trailer($tdict, $self->{' update'});
549    close($self->{' OUTFILE'});
550    if ($^O eq 'MacOS' and not ref($self->{' fname'})) {
551        MacPerl::SetFileInfo('CARO', 'TEXT', $self->{' fname'});
552    }
553
554    return $self;
555}
556
557=head2 ($value, $str) = $p->readval($str, %opts)
558
559Reads a PDF value from the current position in the file. If $str is too short
560then read some more from the current location in the file until the whole object
561is read. This is a recursive call which may slurp in a whole big stream (unprocessed).
562
563Returns the recursive data structure read and also the current $str that has been
564read from the file.
565
566=cut
567
568sub readval {
569    my ($self, $str, %opts) = @_;
570    my $fh = $self->{' INFILE'};
571    my ($result, $value);
572
573    my $update = defined($opts{update}) ? $opts{update} : 1;
574    $str = update($fh, $str) if $update;
575
576    $str =~ s/^$ws_char+//;               # Ignore initial white space
577    $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
578
579    # Dictionary
580    if ($str =~ m/^<</s) {
581        $str = substr ($str, 2);
582        $str = update($fh, $str) if $update;
583        $result = PDFDict();
584
585        while ($str !~ m/^>>/) {
586            $str =~ s/^$ws_char+//;               # Ignore initial white space
587            $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
588
589            if ($str =~ s|^/($reg_char+)||) {
590                my $key = PDF::API2::Basic::PDF::Name::name_to_string($1, $self);
591                ($value, $str) = $self->readval($str, %opts);
592                unless ((ref($value) // '') eq 'PDF::API2::Basic::PDF::Null') {
593                    $result->{$key} = $value;
594                }
595            }
596            elsif ($str =~ s|^/$ws_char+||) {
597                # fixes a broken key problem of acrobat. -- fredo
598                ($value, $str) = $self->readval($str, %opts);
599                unless ((ref($value) // '') eq 'PDF::API2::Basic::PDF::Null') {
600                    $result->{'null'} = $value;
601                }
602            }
603            elsif ($str =~ s|^//|/|) {
604                # fixes again a broken key problem of illustrator/enfocus. -- fredo
605                ($value, $str) = $self->readval($str, %opts);
606                unless ((ref($value) // '') eq 'PDF::API2::Basic::PDF::Null') {
607                    $result->{'null'} = $value;
608                }
609            }
610            else {
611                die "Invalid dictionary key";
612            }
613            $str = update($fh, $str) if $update; # thanks gareth.jones@stud.man.ac.uk
614        }
615        $str =~ s/^>>//;
616        $str = update($fh, $str) if $update;
617        # streams can't be followed by a lone carriage-return.
618        # fredo: yes they can !!! -- use the MacOS Luke.
619        if (($str =~ s/^stream(?:(?:\015\012)|\012|\015)//) and ($result->{'Length'}->val != 0)) {   # stream
620            my $length = $result->{'Length'}->val;
621            $result->{' streamsrc'} = $fh;
622            $result->{' streamloc'} = $fh->tell - length($str);
623
624            unless ($opts{'nostreams'}) {
625                if ($length > length($str)) {
626                    $value = $str;
627                    $length -= length($str);
628                    read $fh, $str, $length + 11; # slurp the whole stream!
629                }
630                else {
631                    $value = '';
632                }
633                $value .= substr($str, 0, $length);
634                $result->{' stream'} = $value;
635                $result->{' nofilt'} = 1;
636                $str = update($fh, $str, 1) if $update;  # tell update we are in-stream and only need an endstream
637                $str = substr($str, index($str, 'endstream') + 9);
638            }
639        }
640
641        if (defined $result->{'Type'} and defined $types{$result->{'Type'}->val}) {
642            bless $result, $types{$result->{'Type'}->val};
643        }
644        # gdj: FIXME: if any of the ws chars were crs, then the whole
645        # string might not have been read.
646    }
647
648    # Indirect Object
649    elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R/s) {
650        my $num = $1;
651        $value = $2;
652        $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R//s;
653        unless ($result = $self->test_obj($num, $value)) {
654            $result = PDF::API2::Basic::PDF::Objind->new();
655            $result->{' objnum'} = $num;
656            $result->{' objgen'} = $value;
657            $self->add_obj($result, $num, $value);
658        }
659        $result->{' parent'} = $self;
660        weaken $result->{' parent'};
661
662        # Removed to address changes being lost when an indirect object is realised twice
663        # $result->{' realised'} = 0;
664
665        # gdj: FIXME: if any of the ws chars were crs, then the whole
666        # string might not have been read.
667    }
668
669    # Object
670    elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj/s) {
671        my $obj;
672        my $num = $1;
673        $value = $2;
674        $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj//s;
675        ($obj, $str) = $self->readval($str, %opts);
676        if ($result = $self->test_obj($num, $value)) {
677            $result->merge($obj);
678        }
679        else {
680            $result = $obj;
681            $self->add_obj($result, $num, $value);
682            $result->{' realised'} = 1;
683        }
684        $str = update($fh, $str) if $update;       # thanks to kundrat@kundrat.sk
685        $str =~ s/^endobj//;
686    }
687
688    # Name
689    elsif ($str =~ m|^/($reg_char*)|s) {
690        $value = $1;
691        $str =~ s|^/($reg_char*)||s;
692        $result = PDF::API2::Basic::PDF::Name->from_pdf($value, $self);
693    }
694
695    # Literal String
696    elsif ($str =~ m/^\(/) {
697        # We now need to find an unbalanced, unescaped right-paren.
698        # This can't be done with a regex.
699        my $value = '(';
700        $str = substr($str, 1);
701
702        my $nested_level = 1;
703        while (1) {
704            # Ignore everything up to the first escaped or parenthesis character
705            if ($str =~ /^([^\\()]+)(.*)/s) {
706                $value .= $1;
707                $str = $2;
708            }
709
710            # Ignore escaped parentheses
711            if ($str =~ /^(\\[()])/) {
712                $value .= $1;
713                $str = substr($str, 2);
714            }
715
716            # Left parenthesis: increase nesting
717            elsif ($str =~ /^\(/) {
718                $value .= '(';
719                $str = substr($str, 1);
720                $nested_level++;
721            }
722
723            # Right parenthesis: decrease nesting
724            elsif ($str =~ /^\)/) {
725                $value .= ')';
726                $str = substr($str, 1);
727                $nested_level--;
728                last unless $nested_level;
729            }
730
731            # Other escaped character
732            elsif ($str =~ /^(\\[^()])/) {
733                $value .= $1;
734                $str = substr($str, 2);
735            }
736
737            # If there wasn't an escaped or parenthesis character,
738            # read some more.
739            else {
740                # We don't use update because we don't want to remove
741                # whitespace or comments.
742                $fh->read($str, 255, length($str)) or die 'Unterminated string.';
743            }
744        }
745
746        $result = PDF::API2::Basic::PDF::String->from_pdf($value);
747    }
748
749    # Hex String
750    elsif ($str =~ m/^</) {
751        $str =~ s/^<//;
752        $fh->read($str, 255, length($str)) while (0 > index($str, '>'));
753        ($value, $str) = ($str =~ /^(.*?)>(.*)/s);
754        $result = PDF::API2::Basic::PDF::String->from_pdf('<' . $value . '>');
755    }
756
757    # Array
758    elsif ($str =~ m/^\[/) {
759        $str =~ s/^\[//;
760        $str = update($fh, $str) if $update;
761        $result = PDFArray();
762        while ($str !~ m/^\]/) {
763            $str =~ s/^$ws_char+//;               # Ignore initial white space
764            $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
765
766            ($value, $str) = $self->readval($str, %opts);
767            $result->add_elements($value);
768            $str = update($fh, $str) if $update;   # str might just be exhausted!
769        }
770        $str =~ s/^\]//;
771    }
772
773    # Boolean
774    elsif ($str =~ m/^(true|false)($irreg_char|$)/) {
775        $value = $1;
776        $str =~ s/^(?:true|false)//;
777        $result = PDF::API2::Basic::PDF::Bool->from_pdf($value);
778    }
779
780    # Number
781    elsif ($str =~ m/^([+-.0-9]+)($irreg_char|$)/) {
782        $value = $1;
783        $str =~ s/^([+-.0-9]+)//;
784
785        # If $str only consists of whitespace (or is empty), call update to
786        # see if this is the beginning of an indirect object or reference
787        if ($update and ($str =~ /^$re_whitespace*$/s or $str =~ /^$re_whitespace+[0-9]+$re_whitespace*$/s)) {
788            $str =~ s/^$re_whitespace+/ /s;
789            $str =~ s/$re_whitespace+$/ /s;
790            $str = update($fh, $str);
791            if ($str =~ m/^$re_whitespace*([0-9]+)$re_whitespace+(?:R|obj)/s) {
792                return $self->readval("$value $str", %opts);
793            }
794        }
795
796        $result = PDF::API2::Basic::PDF::Number->from_pdf($value);
797    }
798
799    # Null
800    elsif ($str =~ m/^null($irreg_char|$)/) {
801        $str =~ s/^null//;
802        $result = PDF::API2::Basic::PDF::Null->new;
803    }
804
805    else {
806        die "Can't parse `$str' near " . ($fh->tell()) . " length " . length($str) . ".";
807    }
808
809    $str =~ s/^$ws_char+//s;
810    return ($result, $str);
811}
812
813
814=head2 $ref = $p->read_obj($objind, %opts)
815
816Given an indirect object reference, locate it and read the object returning
817the read in object.
818
819=cut
820
821sub read_obj {
822    my ($self, $objind, %opts) = @_;
823
824    my $res = $self->read_objnum($objind->{' objnum'}, $objind->{' objgen'}, %opts) || return;
825    $objind->merge($res) unless $objind eq $res;
826    return $objind;
827}
828
829
830=head2 $ref = $p->read_objnum($num, $gen, %opts)
831
832Returns a fully read object of given number and generation in this file
833
834=cut
835
836sub read_objnum {
837    my ($self, $num, $gen, %opts) = @_;
838    croak 'Undefined object number in call to read_objnum($num, $gen)' unless defined $num;
839    croak 'Undefined object generation in call to read_objnum($num, $gen)' unless defined $gen;
840    croak "Invalid object number '$num' in call to read_objnum" unless $num =~ /^[0-9]+$/;
841    croak "Invalid object generation '$gen' in call to read_objnum" unless $gen =~ /^[0-9]+$/;
842
843    my $object_location = $self->locate_obj($num, $gen) || return;
844    my $object;
845
846    # Compressed object
847    if (ref($object_location)) {
848        my ($object_stream_num, $object_stream_pos) = @{$object_location};
849
850        my $object_stream = $self->read_objnum($object_stream_num, 0, %opts);
851        die 'Cannot find the compressed object stream' unless $object_stream;
852
853        $object_stream->read_stream() if $object_stream->{' nofilt'};
854
855        # An object stream starts with pairs of integers containing object numbers and
856        # stream offsets relative to the First key
857        my $fh;
858        my $pairs;
859        unless ($object_stream->{' streamfile'}) {
860            $pairs = substr($object_stream->{' stream'}, 0, $object_stream->{'First'}->val);
861        }
862        else {
863            CORE::open($fh, '<', $object_stream->{' streamfile'});
864            read($fh, $pairs, $object_stream->{'First'}->val());
865        }
866        my @map = split /\s+/, $pairs;
867
868        # Find the offset of the object in the stream
869        my $index = $object_stream_pos * 2;
870        die "Objind $num does not exist at index $index" unless $map[$index] == $num;
871        my $start = $map[$index + 1];
872
873        # Unless this is the last object in the stream, its length is determined by the
874        # offset of the next object
875        my $last_object_in_stream = $map[-2];
876        my $length;
877        if ($last_object_in_stream == $num) {
878            if ($object_stream->{' stream'}) {
879                $length = length($object_stream->{' stream'}) - $object_stream->{'First'}->val() - $start;
880            }
881            else {
882                $length = (-s $object_stream->{' streamfile'}) - $object_stream->{'First'}->val() - $start;
883            }
884        }
885        else {
886            my $next_start = $map[$index + 3];
887            $length = $next_start - $start;
888        }
889
890        # Read the object from the stream
891        my $stream = "$num 0 obj ";
892        unless ($object_stream->{' streamfile'}) {
893            $stream .= substr($object_stream->{' stream'}, $object_stream->{'First'}->val() + $start, $length);
894        }
895        else {
896            seek($fh, $object_stream->{'First'}->val() + $start, 0);
897            read($fh, $stream, $length, length($stream));
898            close $fh;
899        }
900
901        ($object) = $self->readval($stream, %opts, update => 0);
902        return $object;
903    }
904
905    my $current_location = $self->{' INFILE'}->tell;
906    $self->{' INFILE'}->seek($object_location, 0);
907    ($object) = $self->readval('', %opts);
908    $self->{' INFILE'}->seek($current_location, 0);
909    return $object;
910}
911
912
913=head2 $objind = $p->new_obj($obj)
914
915Creates a new, free object reference based on free space in the cross reference chain.
916If nothing free then thinks up a new number. If $obj then turns that object into this
917new object rather than returning a new object.
918
919=cut
920
921sub new_obj {
922    my ($self, $base) = @_;
923    my $res;
924
925    if (defined $self->{' free'} and scalar @{$self->{' free'}} > 0) {
926        $res = shift(@{$self->{' free'}});
927        if (defined $base) {
928            my ($num, $gen) = @{$self->{' objects'}{$res->uid}};
929            $self->remove_obj($res);
930            $self->add_obj($base, $num, $gen);
931            return $self->out_obj($base);
932        }
933        else {
934            $self->{' objects'}{$res->uid}[2] = 0;
935            return $res;
936        }
937    }
938
939    my $tdict = $self;
940    my $i;
941    while (defined $tdict) {
942        $i = $tdict->{' xref'}{defined($i) ? $i : ''}[0];
943        while (defined $i and $i != 0) {
944            my ($ni, $ng) = @{$tdict->{' xref'}{$i}};
945            unless (defined $self->locate_obj($i, $ng)) {
946                if (defined $base) {
947                    $self->add_obj($base, $i, $ng);
948                    return $base;
949                }
950                else {
951                    $res = $self->test_obj($i, $ng) || $self->add_obj(PDF::API2::Basic::PDF::Objind->new(), $i, $ng);
952                    $self->out_obj($res);
953                    return $res;
954                }
955            }
956            $i = $ni;
957        }
958        $tdict = $tdict->{' prev'};
959    }
960
961    $i = $self->{' maxobj'}++;
962    if (defined $base) {
963        $self->add_obj($base, $i, 0);
964        $self->out_obj($base);
965        return $base;
966    }
967    else {
968        $res = $self->add_obj(PDF::API2::Basic::PDF::Objind->new(), $i, 0);
969        $self->out_obj($res);
970        return $res;
971    }
972}
973
974
975=head2 $p->out_obj($objind)
976
977Indicates that the given object reference should appear in the output xref
978table whether with data or freed.
979
980=cut
981
982sub out_obj {
983    my ($self, $obj) = @_;
984
985    # This is why we've been keeping the outlist CACHE around; to speed
986    # up this method by orders of magnitude (it saves up from having to
987    # grep the full outlist each time through as we'll just do a lookup
988    # in the hash) (which is super-fast).
989    unless (exists $self->{' outlist_cache'}{$obj}) {
990        push @{$self->{' outlist'}}, $obj;
991        # weaken $self->{' outlist'}->[-1];
992        $self->{' outlist_cache'}{$obj} = 1;
993    }
994    return $obj;
995}
996
997
998=head2 $p->free_obj($objind)
999
1000Marks an object reference for output as being freed.
1001
1002=cut
1003
1004sub free_obj {
1005    my ($self, $obj) = @_;
1006
1007    push @{$self->{' free'}}, $obj;
1008    $self->{' objects'}{$obj->uid()}[2] = 1;
1009    $self->out_obj($obj);
1010}
1011
1012
1013=head2 $p->remove_obj($objind)
1014
1015Removes the object from all places where we might remember it
1016
1017=cut
1018
1019sub remove_obj {
1020    my ($self, $objind) = @_;
1021
1022    # who says it has to be fast
1023    delete $self->{' objects'}{$objind->uid()};
1024    delete $self->{' outlist_cache'}{$objind};
1025    delete $self->{' printed_cache'}{$objind};
1026    @{$self->{' outlist'}} = grep { $_ ne $objind } @{$self->{' outlist'}};
1027    @{$self->{' printed'}} = grep { $_ ne $objind } @{$self->{' printed'}};
1028    $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} = undef
1029        if $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} eq $objind;
1030    return $self;
1031}
1032
1033
1034=head2 $p->ship_out(@objects)
1035
1036Ships the given objects (or all objects for output if @objects is empty) to
1037the currently open output file (assuming there is one). Freed objects are not
1038shipped, and once an object is shipped it is switched such that this file
1039becomes its source and it will not be shipped again unless out_obj is called
1040again. Notice that a shipped out object can be re-output or even freed, but
1041that it will not cause the data already output to be changed.
1042
1043=cut
1044
1045sub ship_out {
1046    my ($self, @objs) = @_;
1047
1048    die "No output file specified" unless defined $self->{' OUTFILE'};
1049    my $fh = $self->{' OUTFILE'};
1050    seek($fh, 0, 2); # go to the end of the file
1051
1052    @objs = @{$self->{' outlist'}} unless scalar @objs > 0;
1053    foreach my $objind (@objs) {
1054        next unless $objind->is_obj($self);
1055        my $j = -1;
1056        for (my $i = 0; $i < scalar @{$self->{' outlist'}}; $i++) {
1057            if ($self->{' outlist'}[$i] eq $objind) {
1058                $j = $i;
1059                last;
1060            }
1061        }
1062        next if $j < 0;
1063        splice(@{$self->{' outlist'}}, $j, 1);
1064        delete $self->{' outlist_cache'}{$objind};
1065        next if grep { $_ eq $objind } @{$self->{' free'}};
1066
1067        map { $fh->print("\%   $_ \n") } split(/$cr/, $objind->{' comments'}) if $objind->{' comments'};
1068        $self->{' locs'}{$objind->uid()} = $fh->tell();
1069        my ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid()}}[0..1];
1070        $fh->printf('%d %d obj ', $objnum, $objgen);
1071        $objind->outobjdeep($fh, $self);
1072        $fh->print(" endobj\n");
1073
1074        # Note that we've output this obj, not forgetting to update
1075        # the cache of whats printed.
1076        unless (exists $self->{' printed_cache'}{$objind}) {
1077            push @{$self->{' printed'}}, $objind;
1078            $self->{' printed_cache'}{$objind}++;
1079        }
1080    }
1081    return $self;
1082}
1083
1084=head2 $p->copy($outpdf, \&filter)
1085
1086Iterates over every object in the file reading the object, calling filter with the object
1087and outputting the result. if filter is not defined, then just copies input to output.
1088
1089=cut
1090
1091sub copy {
1092    my ($self, $out, $filter) = @_;
1093    my ($obj, $minl, $mini, $ming);
1094
1095    foreach my $key (grep { not m/^[\s\-]/ } keys %$self) {
1096        $out->{$key} = $self->{$key} unless defined $out->{$key};
1097    }
1098
1099    my $tdict = $self;
1100    while (defined $tdict) {
1101        foreach my $i (sort {$a <=> $b} keys %{$tdict->{' xref'}}) {
1102            my ($nl, $ng, $nt) = @{$tdict->{' xref'}{$i}};
1103            next unless $nt eq 'n';
1104
1105            if ($nl < $minl or $mini == 0) {
1106                $mini = $i;
1107                $ming = $ng;
1108                $minl = $nl;
1109            }
1110            unless ($obj = $self->test_obj($i, $ng)) {
1111                $obj = PDF::API2::Basic::PDF::Objind->new();
1112                $obj->{' objnum'} = $i;
1113                $obj->{' objgen'} = $ng;
1114                $self->add_obj($obj, $i, $ng);
1115                $obj->{' parent'} = $self;
1116                weaken $obj->{' parent'};
1117                $obj->{' realised'} = 0;
1118            }
1119            $obj->realise;
1120            my $res = defined $filter ? &{$filter}($obj) : $obj;
1121            $out->new_obj($res) unless (!$res || $res->is_obj($out));
1122        }
1123        $tdict = $tdict->{' prev'};
1124    }
1125
1126    # test for linearized and remove it from output
1127    $obj = $self->test_obj($mini, $ming);
1128    if ($obj->isa('PDF::API2::Basic::PDF::Dict') && $obj->{'Linearized'}) {
1129        $out->free_obj($obj);
1130    }
1131
1132    return $self;
1133}
1134
1135
1136=head1 PRIVATE METHODS & FUNCTIONS
1137
1138The following methods and functions are considered private to this class. This
1139does not mean you cannot use them if you have a need, just that they aren't really
1140designed for users of this class.
1141
1142=head2 $offset = $p->locate_obj($num, $gen)
1143
1144Returns a file offset to the object asked for by following the chain of cross
1145reference tables until it finds the one you want.
1146
1147=cut
1148
1149sub locate_obj {
1150    my ($self, $num, $gen) = @_;
1151
1152    my $tdict = $self;
1153    while (defined $tdict) {
1154        if (ref $tdict->{' xref'}{$num}) {
1155            my $ref = $tdict->{' xref'}{$num};
1156            return $ref unless scalar(@$ref) == 3;
1157
1158            if ($ref->[1] == $gen) {
1159                return $ref->[0] if $ref->[2] eq 'n';
1160                return;        # if $ref->[2] eq 'f';
1161            }
1162        }
1163        $tdict = $tdict->{' prev'};
1164    }
1165    return;
1166}
1167
1168
1169=head2 update($fh, $str, $instream)
1170
1171Keeps reading $fh for more data to ensure that $str has at least a line full
1172for C<readval> to work on. At this point we also take the opportunity to ignore
1173comments.
1174
1175=cut
1176
1177sub update {
1178    my ($fh, $str, $instream) = @_;
1179    print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1180    if ($instream) {
1181        # we are inside a (possible binary) stream
1182        # so we fetch data till we see an 'endstream'
1183        # -- fredo/2004-09-03
1184        while ($str !~ m/endstream/ and not $fh->eof()) {
1185            print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1186            $fh->read($str, 314, length($str));
1187        }
1188    }
1189    else {
1190        $str =~ s/^$ws_char*//;
1191        while ($str !~ m/$cr/ and not $fh->eof()) {
1192            print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1193            $fh->read($str, 314, length($str));
1194            $str =~ s/^$ws_char*//so;
1195        }
1196        while ($str =~ m/^\%/) { # restructured by fredo/2003-03-23
1197            print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1198            $fh->read($str, 314, length($str)) while ($str !~ m/$cr/ and not $fh->eof());
1199            $str =~ s/^\%[^\015\012]*$ws_char*//so; # fixed for reportlab -- fredo
1200        }
1201    }
1202
1203    return $str;
1204}
1205
1206=head2 $objind = $p->test_obj($num, $gen)
1207
1208Tests the cache to see whether an object reference (which may or may not have
1209been getobj()ed) has been cached. Returns it if it has.
1210
1211=cut
1212
1213sub test_obj {
1214    my ($self, $num, $gen) = @_;
1215    return $self->{' objcache'}{$num, $gen};
1216}
1217
1218
1219=head2 $p->add_obj($objind)
1220
1221Adds the given object to the internal object cache.
1222
1223=cut
1224
1225sub add_obj {
1226    my ($self, $obj, $num, $gen) = @_;
1227
1228    $self->{' objcache'}{$num, $gen} = $obj;
1229    $self->{' objects'}{$obj->uid()} = [$num, $gen];
1230    # weaken $self->{' objcache'}{$num, $gen};
1231    return $obj;
1232}
1233
1234
1235=head2 $tdict = $p->readxrtr($xpos)
1236
1237Recursive function which reads each of the cross-reference and trailer tables
1238in turn until there are no more.
1239
1240Returns a dictionary corresponding to the trailer chain. Each trailer also
1241includes the corresponding cross-reference table.
1242
1243The structure of the xref private element in a trailer dictionary is of an
1244anonymous hash of cross reference elements by object number. Each element
1245consists of an array of 3 elements corresponding to the three elements read
1246in [location, generation number, free or used]. See the PDF specification
1247for details.
1248
1249=cut
1250
1251sub _unpack_xref_stream {
1252    my ($self, $width, $data) = @_;
1253
1254    return unpack('C', $data)       if $width == 1;
1255    return unpack('n', $data)       if $width == 2;
1256    return unpack('N', "\x00$data") if $width == 3;
1257    return unpack('N', $data)       if $width == 4;
1258    return unpack('Q>', $data)      if $width == 8;
1259
1260    die "Unsupported xref stream entry width: $width";
1261}
1262
1263sub readxrtr {
1264    my ($self, $xpos) = @_;
1265    my ($tdict, $buf, $xmin, $xnum, $xdiff);
1266
1267    my $fh = $self->{' INFILE'};
1268    $fh->seek($xpos, 0);
1269    $fh->read($buf, 22);
1270    $buf = update($fh, $buf); # fix for broken JAWS xref calculation.
1271
1272    my $xlist = {};
1273
1274    ## seams that some products calculate wrong prev entries (short)
1275    ## so we seek ahead to find one -- fredo; save for now
1276    #while($buf !~ m/^xref$cr/i && !eof($fh))
1277    #{
1278    #    $buf =~ s/^(\s+|\S+|.)//i;
1279    #    $buf=update($fh,$buf);
1280    #}
1281
1282    if ($buf =~ s/^xref$cr//i) {
1283        # Plain XRef tables.
1284        while ($buf =~ m/^$ws_char*([0-9]+)$ws_char+([0-9]+)$ws_char*$cr(.*?)$/s) {
1285            my $old_buf = $buf;
1286            $xmin = $1;
1287            $xnum = $2;
1288            $buf  = $3;
1289            unless ($old_buf =~ /^[0-9]+ [0-9]+$cr/) {
1290                # See PDF 1.7 section 7.5.4: Cross-Reference Table
1291                warn q{Malformed xref in PDF file: subsection shall begin with a line containing two numbers separated by a SPACE (20h)};
1292            }
1293            $xdiff = length($buf);
1294
1295            $fh->read($buf, 20 * $xnum - $xdiff + 15, $xdiff);
1296            while ($xnum-- > 0 and $buf =~ s/^0*([0-9]*)$ws_char+0*([0-9]+)$ws_char+([nf])$cr//) {
1297                $xlist->{$xmin} = [$1, $2, $3] unless exists $xlist->{$xmin};
1298                $xmin++;
1299            }
1300        }
1301
1302        if ($buf !~ /^\s*trailer\b/i) {
1303            die "Malformed trailer in PDF file $self->{' fname'} at " . ($fh->tell - length($buf));
1304        }
1305
1306        $buf =~ s/^\s*trailer\b//i;
1307
1308        ($tdict, $buf) = $self->readval($buf);
1309    }
1310    elsif ($buf =~ m/^(\d+)\s+(\d+)\s+obj/i) {
1311        my ($xref_obj, $xref_gen) = ($1, $2);
1312
1313        # XRef streams.
1314        ($tdict, $buf) = $self->readval($buf);
1315
1316        unless ($tdict->{' stream'}) {
1317            die "Malformed XRefStm at $xref_obj $xref_gen obj in PDF file $self->{' fname'}";
1318        }
1319        $tdict->read_stream(1);
1320
1321        my $stream = $tdict->{' stream'};
1322        my @widths = map { $_->val } @{$tdict->{W}->val};
1323
1324        my $start = 0;
1325        my $last;
1326
1327        my @index;
1328        if (defined $tdict->{Index}) {
1329            @index = map { $_->val() } @{$tdict->{Index}->val};
1330        }
1331        else {
1332            @index = (0, $tdict->{Size}->val);
1333        }
1334
1335        while (scalar @index) {
1336            $start = shift(@index);
1337            $last = $start + shift(@index) - 1;
1338
1339            for my $i ($start...$last) {
1340                # Replaced "for $xmin" because it creates a loop-specific local variable, and we
1341                # need $xmin to be correct for maxobj below.
1342                $xmin = $i;
1343
1344                my @cols;
1345
1346                for my $w (@widths) {
1347                    my $data;
1348                    $data = $self->_unpack_xref_stream($w, substr($stream, 0, $w, '')) if $w;
1349
1350                    push @cols, $data;
1351                }
1352
1353                $cols[0] = 1 unless defined $cols[0];
1354                if ($cols[0] > 2) {
1355                    die "Invalid XRefStm entry type ($cols[0]) at $xref_obj $xref_gen obj";
1356                }
1357
1358                next if exists $xlist->{$xmin};
1359
1360                my @objind = ($cols[1], defined($cols[2]) ? $cols[2] : ($xmin ? 0 : 65535));
1361                push @objind, ($cols[0] == 0 ? 'f' : 'n') if $cols[0] < 2;
1362
1363                $xlist->{$xmin} = \@objind;
1364            }
1365        }
1366    }
1367    else {
1368        die "Malformed xref in PDF file $self->{' fname'}";
1369    }
1370
1371    $tdict->{' loc'} = $xpos;
1372    $tdict->{' xref'} = $xlist;
1373    $self->{' maxobj'} = $xmin + 1 if $xmin + 1 > $self->{' maxobj'};
1374    $tdict->{' prev'} = $self->readxrtr($tdict->{'Prev'}->val)
1375        if (defined $tdict->{'Prev'} and $tdict->{'Prev'}->val != 0);
1376    delete $tdict->{' prev'} unless defined $tdict->{' prev'};
1377    return $tdict;
1378}
1379
1380
1381=head2 $p->out_trailer($tdict)
1382
1383Outputs the body and trailer for a PDF file by outputting all the objects in
1384the ' outlist' and then outputting a xref table for those objects and any
1385freed ones. It then outputs the trailing dictionary and the trailer code.
1386
1387=cut
1388
1389sub out_trailer {
1390    my ($self, $tdict, $update) = @_;
1391    my $fh = $self->{' OUTFILE'};
1392
1393    while (@{$self->{' outlist'}}) {
1394        $self->ship_out();
1395    }
1396
1397    # When writing new trailers, most dictionary entries get copied from the
1398    # previous trailer, but entries related to cross-reference streams should
1399    # get removed (and possibly recreated below).
1400    delete $tdict->{$_} for (# Entries common to streams
1401                             qw(Length Filter DecodeParms F FFilter FDecodeParms DL),
1402
1403                             # Entries specific to cross-reference streams
1404                             qw(Index W XRefStm));
1405
1406    $tdict->{'Size'} = PDFNum($self->{' maxobj'});
1407
1408    my $tloc = $fh->tell();
1409    my @out;
1410
1411    my @xreflist = sort { $self->{' objects'}{$a->uid}[0] <=> $self->{' objects'}{$b->uid}[0] } (@{$self->{' printed'} || []}, @{$self->{' free'} || []});
1412
1413    my ($i, $j, $k);
1414    unless ($update) {
1415        $i = 1;
1416        for ($j = 0; $j < @xreflist; $j++) {
1417            my @inserts;
1418            $k = $xreflist[$j];
1419            while ($i < $self->{' objects'}{$k->uid}[0]) {
1420                my ($n) = PDF::API2::Basic::PDF::Objind->new();
1421                $self->add_obj($n, $i, 0);
1422                $self->free_obj($n);
1423                push(@inserts, $n);
1424                $i++;
1425            }
1426            splice(@xreflist, $j, 0, @inserts);
1427            $j += @inserts;
1428            $i++;
1429        }
1430    }
1431
1432    my @freelist = sort { $self->{' objects'}{$a->uid}[0] <=> $self->{' objects'}{$b->uid}[0] } @{$self->{' free'} || []};
1433
1434    $j = 0; my $first = -1; $k = 0;
1435    for ($i = 0; $i <= $#xreflist + 1; $i++) {
1436        if ($i > $#xreflist || $self->{' objects'}{$xreflist[$i]->uid}[0] != $j + 1) {
1437            push @out, ($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid}[0] ") . ($i - $first) . "\n";
1438            if ($first == -1) {
1439                push @out, sprintf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid}[0] : 0);
1440                $first = 0;
1441            }
1442            for ($j = $first; $j < $i; $j++) {
1443                my $xref = $xreflist[$j];
1444                if (defined($freelist[$k]) and defined($xref) and "$freelist[$k]" eq "$xref") {
1445                    $k++;
1446                    push @out, pack("A10AA5A4",
1447                                    sprintf("%010d", (defined $freelist[$k] ?
1448                                                      $self->{' objects'}{$freelist[$k]->uid}[0] : 0)), " ",
1449                                    sprintf("%05d", $self->{' objects'}{$xref->uid}[1] + 1),
1450                                    " f \n");
1451                }
1452                else {
1453                    push @out, pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid}), " ",
1454                            sprintf("%05d", $self->{' objects'}{$xref->uid}[1]),
1455                            " n \n");
1456                }
1457            }
1458            $first = $i;
1459            $j = $self->{' objects'}{$xreflist[$i]->uid}[0] if ($i < scalar @xreflist);
1460        }
1461        else {
1462            $j++;
1463        }
1464    }
1465    if (exists $tdict->{'Type'} and $tdict->{'Type'}->val() eq 'XRef') {
1466        my (@index, @stream);
1467        for (@out) {
1468            my @a = split;
1469            @a == 2 ? push @index, @a : push @stream, \@a;
1470        }
1471        my $i = $self->{' maxobj'}++;
1472        $self->add_obj($tdict, $i, 0);
1473        $self->out_obj($tdict );
1474
1475        push @index, $i, 1;
1476        push @stream, [$tloc, 0, 'n'];
1477
1478        my $len = $tloc > 0xFFFF ? 4 : 2;           # don't expect files > 4 Gb
1479        my $tpl = $tloc > 0xFFFF ? 'CNC' : 'CnC';   # don't expect gennum > 255, it's absurd.
1480                                                    # Adobe doesn't use them anymore anyway
1481        my $stream = '';
1482        my @prev = (0) x ($len + 2);
1483        for (@stream) {
1484            $_->[1] = 0 if $_->[2] eq 'f' and $_->[1] == 65535;
1485            my @line = unpack 'C*', pack $tpl, $_->[2] eq 'n' ? 1 : 0, @{$_}[0..1];
1486
1487            $stream .= pack 'C*', 2,                # prepend filtering method, "PNG Up"
1488                map {($line[$_] - $prev[$_] + 256) % 256 } 0 .. $#line;
1489            @prev = @line;
1490        }
1491        $tdict->{'Size'}   = PDFNum($i + 1);
1492        $tdict->{'Index'}  = PDFArray(map PDFNum( $_ ), @index);
1493        $tdict->{'W'}      = PDFArray(map PDFNum( $_ ), 1, $len, 1);
1494        $tdict->{'Filter'} = PDFName('FlateDecode');
1495
1496        $tdict->{'DecodeParms'} = PDFDict();
1497        $tdict->{'DecodeParms'}->val->{'Predictor'} = PDFNum(12);
1498        $tdict->{'DecodeParms'}->val->{'Columns'}   = PDFNum($len + 2);
1499
1500        $stream = PDF::API2::Basic::PDF::Filter::FlateDecode->new->outfilt($stream, 1);
1501        $tdict->{' stream'} = $stream;
1502        $tdict->{' nofilt'} = 1;
1503        delete $tdict->{'Length'};
1504        $self->ship_out();
1505    }
1506    else {
1507        $fh->print("xref\n", @out, "trailer\n");
1508        $tdict->outobjdeep($fh, $self);
1509        $fh->print("\n");
1510    }
1511    $fh->print("startxref\n$tloc\n%%EOF\n");
1512}
1513
1514
1515=head2 PDF::API2::Basic::PDF::File->_new
1516
1517Creates a very empty PDF file object (used by new and open)
1518
1519=cut
1520
1521sub _new {
1522    my $class = shift();
1523    my $self = {};
1524
1525    bless $self, $class;
1526    $self->{' outlist'} = [];
1527    $self->{' outlist_cache'} = {};     # A cache of whats in the 'outlist'
1528    $self->{' maxobj'} = 1;
1529    $self->{' objcache'} = {};
1530    $self->{' objects'} = {};
1531
1532    return $self;
1533}
1534
15351;
1536
1537=head1 AUTHOR
1538
1539Martin Hosken Martin_Hosken@sil.org
1540
1541Copyright Martin Hosken 1999 and onwards
1542
1543No warranty or expression of effectiveness, least of all regarding anyone's
1544safety, is implied in this software or documentation.
1545