1# Code in the PDF::API2::Basic::PDF namespace was originally copied from the
2# Text::PDF distribution.
3#
4# Copyright Martin Hosken <Martin_Hosken@sil.org>
5#
6# Martin Hosken's code may be used under the terms of the MIT license.
7# Subsequent versions of the code have the same license as PDF::API2.
8
9package PDF::API2::Basic::PDF::Dict;
10
11use base 'PDF::API2::Basic::PDF::Objind';
12
13use strict;
14no warnings qw[ deprecated recursion uninitialized ];
15
16our $VERSION = '2.042'; # VERSION
17
18our $mincache = 16 * 1024 * 1024;
19
20use File::Temp;
21use PDF::API2::Basic::PDF::Array;
22use PDF::API2::Basic::PDF::Filter;
23use PDF::API2::Basic::PDF::Name;
24
25=head1 NAME
26
27PDF::API2::Basic::PDF::Dict - Low-level dictionary and stream objects
28
29=head1 INSTANCE VARIABLES
30
31There are various special instance variables which are used to look after,
32particularly, streams. Each begins with a space:
33
34=over
35
36=item stream
37
38Holds the stream contents for output
39
40=item streamfile
41
42Holds the stream contents in an external file rather than in memory. This is
43not the same as a PDF file stream. The data is stored in its unfiltered form.
44
45=item streamloc
46
47If both ' stream' and ' streamfile' are empty, this indicates where in the
48source PDF the stream starts.
49
50=back
51
52=head1 METHODS
53
54=cut
55
56sub new {
57    my $class = shift();
58    $class = ref($class) if ref($class);
59
60    my $self = $class->SUPER::new(@_);
61    $self->{' realised'} = 1;
62    return $self;
63}
64
65=head2 $type = $d->type($type)
66
67Get/Set the standard Type key.  It can be passed, and will return, a text value rather than a Name object.
68
69=cut
70
71sub type {
72    my $self = shift();
73    if (scalar @_) {
74        my $type = shift();
75        $self->{'Type'} = ref($type) ? $type : PDF::API2::Basic::PDF::Name->new($type);
76    }
77    return unless exists $self->{'Type'};
78    return $self->{'Type'}->val();
79}
80
81=head2 @filters = $d->filter(@filters)
82
83Get/Set one or more filters being used by the optional stream attached to the dictionary.
84
85=cut
86
87sub filter {
88    my ($self, @filters) = @_;
89
90    # Developer's Note: the PDF specification allows Filter to be
91    # either a name or an array, but other parts of this codebase
92    # expect an array.  If these are updated uncomment the
93    # commented-out lines in order to accept both types.
94
95    # if (scalar @filters == 1) {
96    #     $self->{'Filter'} = ref($filters[0]) ? $filters[0] : PDF::API2::Basic::PDF::Name->new($filters[0]);
97    # }
98    # elsif (scalar @filters) {
99        @filters = map { ref($_) ? $_ : PDF::API2::Basic::PDF::Name->new($_) } @filters;
100        $self->{'Filter'} = PDF::API2::Basic::PDF::Array->new(@filters);
101    # }
102}
103
104# Undocumented alias, which may be removed in a future release
105sub filters { return filter(@_); }
106
107=head2 $d->outobjdeep($fh)
108
109Outputs the contents of the dictionary to a PDF file. This is a recursive call.
110
111It also outputs a stream if the dictionary has a stream element. If this occurs
112then this method will calculate the length of the stream and insert it into the
113stream's dictionary.
114
115=cut
116
117sub outobjdeep {
118    my ($self, $fh, $pdf) = @_;
119
120    if (defined $self->{' stream'} or defined $self->{' streamfile'} or defined $self->{' streamloc'}) {
121        if ($self->{'Filter'} and $self->{' nofilt'}) {
122            $self->{'Length'} ||= PDF::API2::Basic::PDF::Number->new(length($self->{' stream'}));
123        }
124        elsif ($self->{'Filter'} or not defined $self->{' stream'}) {
125            $self->{'Length'} = PDF::API2::Basic::PDF::Number->new(0) unless defined $self->{'Length'};
126            $pdf->new_obj($self->{'Length'}) unless $self->{'Length'}->is_obj($pdf);
127        }
128        else {
129            $self->{'Length'} = PDF::API2::Basic::PDF::Number->new(length($self->{' stream'}));
130        }
131    }
132
133    $fh->print('<< ');
134    foreach my $key (sort {
135                         $a eq 'Type'    ? -1 : $b eq 'Type'    ? 1 :
136                         $a eq 'Subtype' ? -1 : $b eq 'Subtype' ? 1 : $a cmp $b
137                     } keys %$self) {
138        next if $key =~ m/^[\s\-]/o;
139        next unless $self->{$key};
140        $fh->print('/' . PDF::API2::Basic::PDF::Name::string_to_name($key, $pdf) . ' ');
141        $self->{$key}->outobj($fh, $pdf);
142        $fh->print(' ');
143    }
144    $fh->print('>>');
145
146    # Now handle the stream (if any)
147    my (@filters, $loc);
148
149    if (defined $self->{' streamloc'} and not defined $self->{' stream'}) {
150        # read a stream if in file
151        $loc = $fh->tell();
152        $self->read_stream();
153        $fh->seek($loc, 0);
154    }
155
156    if (not $self->{' nofilt'} and defined $self->{'Filter'} and (defined $self->{' stream'} or defined $self->{' streamfile'})) {
157        my $hasflate = -1;
158        for my $i (0 .. scalar(@{$self->{'Filter'}{' val'}}) - 1) {
159            my $filter = $self->{'Filter'}{' val'}[$i]->val();
160            # hack to get around LZW patent
161            if ($filter eq 'LZWDecode') {
162                if ($hasflate < -1) {
163                    $hasflate = $i;
164                    next;
165                }
166                $filter = 'FlateDecode';
167                $self->{'Filter'}{' val'}[$i]{'val'} = $filter;      # !!!
168            }
169            elsif ($filter eq 'FlateDecode') {
170                $hasflate = -2;
171            }
172            my $filter_class = "PDF::API2::Basic::PDF::Filter::$filter";
173            push (@filters, $filter_class->new());
174        }
175        splice(@{$self->{'Filter'}{' val'}}, $hasflate, 1) if $hasflate > -1;
176    }
177
178    if (defined $self->{' stream'}) {
179        $fh->print(" stream\n");
180        $loc = $fh->tell();
181        my $stream = $self->{' stream'};
182        unless ($self->{' nofilt'}) {
183            foreach my $filter (reverse @filters) {
184                $stream = $filter->outfilt($stream, 1);
185            }
186        }
187        $fh->print($stream);
188        ## $fh->print("\n"); # newline goes into endstream
189
190    }
191    elsif (defined $self->{' streamfile'}) {
192        open(my $dictfh, "<", $self->{' streamfile'}) || die "Unable to open $self->{' streamfile'}";
193        binmode($dictfh, ':raw');
194
195        $fh->print(" stream\n");
196        $loc = $fh->tell();
197        my $stream;
198        while (read($dictfh, $stream, 4096)) {
199            unless ($self->{' nofilt'}) {
200                foreach my $filter (reverse @filters) {
201                    $stream = $filter->outfilt($stream, 0);
202                }
203            }
204            $fh->print($stream);
205        }
206        close $dictfh;
207        unless ($self->{' nofilt'}) {
208            $stream = '';
209            foreach my $filter (reverse @filters) {
210                $stream = $filter->outfilt($stream, 1);
211            }
212            $fh->print($stream);
213        }
214        ## $fh->print("\n"); # newline goes into endstream
215    }
216
217    if (defined $self->{' stream'} or defined $self->{' streamfile'}) {
218        my $length = $fh->tell() - $loc;
219        unless ($self->{'Length'}{'val'} == $length) {
220            $self->{'Length'}{'val'} = $length;
221            $pdf->out_obj($self->{'Length'}) if $self->{'Length'}->is_obj($pdf);
222        }
223
224        $fh->print("\nendstream"); # next is endobj which has the final cr
225    }
226}
227
228=head2 $d->read_stream($force_memory)
229
230Reads in a stream from a PDF file. If the stream is greater than
231C<PDF::Dict::mincache> (defaults to 32768) bytes to be stored, then
232the default action is to create a file for it somewhere and to use that
233file as a data cache. If $force_memory is set, this caching will not
234occur and the data will all be stored in the $self->{' stream'}
235variable.
236
237=cut
238
239sub read_stream {
240    my ($self, $force_memory) = @_;
241
242    my $fh = $self->{' streamsrc'};
243    my $len = $self->{'Length'}->val();
244
245    $self->{' stream'} = '';
246
247    my @filters;
248    if (defined $self->{'Filter'}) {
249        my $i = 0;
250        foreach my $filter ($self->{'Filter'}->elements()) {
251            my $filter_class = "PDF::API2::Basic::PDF::Filter::" . $filter->val();
252            unless ($self->{'DecodeParms'}) {
253                push(@filters, $filter_class->new());
254            }
255            elsif ($self->{'Filter'}->isa('PDF::API2::Basic::PDF::Name') and $self->{'DecodeParms'}->isa('PDF::API2::Basic::PDF::Dict')) {
256                push(@filters, $filter_class->new($self->{'DecodeParms'}));
257            }
258            elsif ($self->{'DecodeParms'}->isa('PDF::API2::Basic::PDF::Array')) {
259                my $parms = $self->{'DecodeParms'}->val->[$i];
260                push(@filters, $filter_class->new($parms));
261            }
262            else {
263                push(@filters, $filter_class->new());
264            }
265            $i++;
266        }
267    }
268
269    my $last = 0;
270    if (defined $self->{' streamfile'}) {
271        unlink ($self->{' streamfile'});
272        $self->{' streamfile'} = undef;
273    }
274    seek $fh, $self->{' streamloc'}, 0;
275
276    my $dictfh;
277    my $readlen = 4096;
278    for (my $i = 0; $i < $len; $i += $readlen) {
279        my $data;
280        unless ($i + $readlen > $len) {
281            read $fh, $data, $readlen;
282        }
283        else {
284            $last = 1;
285            read $fh, $data, $len - $i;
286        }
287
288        foreach my $filter (@filters) {
289            $data = $filter->infilt($data, $last);
290        }
291
292        # Start using a temporary file if the stream gets too big
293        if (not $force_memory and not defined $self->{' streamfile'} and (length($self->{' stream'}) + length($data)) > $mincache) {
294            $dictfh = File::Temp->new(TEMPLATE => 'pdfXXXXX', SUFFIX => 'dat', TMPDIR => 1);
295            $self->{' streamfile'} = $dictfh->filename();
296            print $dictfh $self->{' stream'};
297            undef $self->{' stream'};
298        }
299
300        if (defined $self->{' streamfile'}) {
301            print $dictfh $data;
302        }
303        else {
304            $self->{' stream'} .= $data;
305        }
306    }
307
308    close $dictfh if defined $self->{' streamfile'};
309    $self->{' nofilt'} = 0;
310    return $self;
311}
312
313=head2 $d->val
314
315Returns the dictionary, which is itself.
316
317=cut
318
319sub val {
320    return $_[0];
321}
322
3231;
324