1# Code in the PDF::API2::Basic::PDF namespace was originally copied from the 2# Text::PDF distribution. 3# 4# Copyright Martin Hosken <Martin_Hosken@sil.org> 5# 6# Martin Hosken's code may be used under the terms of the MIT license. 7# Subsequent versions of the code have the same license as PDF::API2. 8 9package PDF::API2::Basic::PDF::File; 10 11use strict; 12 13our $VERSION = '2.042'; # VERSION 14 15=head1 NAME 16 17PDF::API2::Basic::PDF::File - Low-level PDF file access 18 19=head1 SYNOPSIS 20 21 $p = PDF::API2::Basic::PDF::File->open("filename.pdf", 1); 22 $p->new_obj($obj_ref); 23 $p->free_obj($obj_ref); 24 $p->append_file; 25 $p->close_file; 26 $p->release; # IMPORTANT! 27 28=head1 DESCRIPTION 29 30This class keeps track of the directory aspects of a PDF file. There are two 31parts to the directory: the main directory object which is the parent to all 32other objects and a chain of cross-reference tables and corresponding trailer 33dictionaries starting with the main directory object. 34 35=head1 INSTANCE VARIABLES 36 37Within this class hierarchy, rather than making everything visible via methods, 38which would be a lot of work, there are various instance variables which are 39accessible via associative array referencing. To distinguish instance variables 40from content variables (which may come from the PDF content itself), each such 41variable will start with a space. 42 43Variables which do not start with a space directly reflect elements in a PDF 44dictionary. In the case of a PDF::API2::Basic::PDF::File, the elements reflect those in the 45trailer dictionary. 46 47Since some variables are not designed for class users to access, variables are 48marked in the documentation with (R) to indicate that such an entry should only 49be used as read-only information. (P) indicates that the information is private 50and not designed for user use at all, but is included in the documentation for 51completeness and to ensure that nobody else tries to use it. 52 53=over 54 55=item newroot 56 57This variable allows the user to create a new root entry to occur in the trailer 58dictionary which is output when the file is written or appended. If you wish to 59over-ride the root element in the dictionary you have, use this entry to indicate 60that without losing the current Root entry. Notice that newroot should point to 61a PDF level object and not just to a dictionary which does not have object status. 62 63=item INFILE (R) 64 65Contains the filehandle used to read this information into this PDF directory. Is 66an IO object. 67 68=item fname (R) 69 70This is the filename which is reflected by INFILE, or the original IO object passed 71in. 72 73=item update (R) 74 75This indicates that the read file has been opened for update and that at some 76point, $p->appendfile() can be called to update the file with the changes that 77have been made to the memory representation. 78 79=item maxobj (R) 80 81Contains the first usable object number above any that have already appeared 82in the file so far. 83 84=item outlist (P) 85 86This is a list of Objind which are to be output when the next appendfile or outfile 87occurs. 88 89=item firstfree (P) 90 91Contains the first free object in the free object list. Free objects are removed 92from the front of the list and added to the end. 93 94=item lastfree (P) 95 96Contains the last free object in the free list. It may be the same as the firstfree 97if there is only one free object. 98 99=item objcache (P) 100 101All objects are held in the cache to ensure that a system only has one occurrence of 102each object. In effect, the objind class acts as a container type class to hold the 103PDF object structure and it would be unfortunate if there were two identical 104place-holders floating around a system. 105 106=item epos (P) 107 108The end location of the read-file. 109 110=back 111 112Each trailer dictionary contains a number of private instance variables which 113hold the chain together. 114 115=over 116 117=item loc (P) 118 119Contains the location of the start of the cross-reference table preceding the 120trailer. 121 122=item xref (P) 123 124Contains an anonymous array of each cross-reference table entry. 125 126=item prev (P) 127 128A reference to the previous table. Note this differs from the Prev entry which 129is in PDF which contains the location of the previous cross-reference table. 130 131=back 132 133=head1 METHODS 134 135=cut 136 137use Scalar::Util qw(blessed weaken); 138 139use vars qw($cr $irreg_char $reg_char $ws_char $delim_char %types); 140 141$ws_char = '[ \t\r\n\f\0]'; 142$delim_char = '[][<>{}()/%]'; 143$reg_char = '[^][<>{}()/% \t\r\n\f\0]'; 144$irreg_char = '[][<>{}()/% \t\r\n\f\0]'; 145$cr = '\s*(?:\015|\012|(?:\015\012))'; 146 147my $re_comment = qr/(?:\%[^\r\n]*)/; 148my $re_whitespace = qr/(?:[ \t\r\n\f\0]|$re_comment)/; 149 150%types = ( 151 'Page' => 'PDF::API2::Basic::PDF::Page', 152 'Pages' => 'PDF::API2::Basic::PDF::Pages', 153); 154 155my $readDebug = 0; 156 157use Carp; 158use IO::File; 159 160# Now for the basic PDF types 161use PDF::API2::Basic::PDF::Utils; 162 163use PDF::API2::Basic::PDF::Array; 164use PDF::API2::Basic::PDF::Bool; 165use PDF::API2::Basic::PDF::Dict; 166use PDF::API2::Basic::PDF::Name; 167use PDF::API2::Basic::PDF::Number; 168use PDF::API2::Basic::PDF::Objind; 169use PDF::API2::Basic::PDF::String; 170use PDF::API2::Basic::PDF::Page; 171use PDF::API2::Basic::PDF::Pages; 172use PDF::API2::Basic::PDF::Null; 173use POSIX qw(ceil floor); 174 175no warnings qw[ deprecated recursion uninitialized ]; 176 177 178=head2 PDF::API2::Basic::PDF::File->new 179 180Creates a new, empty file object which can act as the host to other PDF objects. 181Since there is no file associated with this object, it is assumed that the 182object is created in readiness for creating a new PDF file. 183 184=cut 185 186sub new { 187 my ($class, $root) = @_; 188 my $self = $class->_new(); 189 190 unless ($root) { 191 $root = PDFDict(); 192 $root->{'Type'} = PDFName('Catalog'); 193 } 194 $self->new_obj($root); 195 $self->{'Root'} = $root; 196 return $self; 197} 198 199 200=head2 $p = PDF::API2::Basic::PDF::File->open($filename, $update) 201 202Opens the file and reads all the trailers and cross reference tables to build 203a complete directory of objects. 204 205$update specifies whether this file is being opened for updating and editing, 206or simply to be read. 207 208$filename may be an IO object 209 210=cut 211 212sub open { 213 my ($class, $filename, $update) = @_; 214 my ($fh, $buffer); 215 216 my $self = $class->_new(); 217 if (ref $filename) { 218 $self->{' INFILE'} = $filename; 219 if ($update) { 220 $self->{' update'} = 1; 221 $self->{' OUTFILE'} = $filename; 222 } 223 $fh = $filename; 224 } 225 else { 226 die "File '$filename' does not exist" unless -f $filename; 227 die "File '$filename' is not readable" unless -r $filename; 228 if ($update) { 229 die "File '$filename' is not writable" unless -w $filename; 230 } 231 $fh = IO::File->new(($update ? '+' : '') . "<$filename") 232 || die "Error opening '$filename': $!"; 233 $self->{' INFILE'} = $fh; 234 if ($update) { 235 $self->{' update'} = 1; 236 $self->{' OUTFILE'} = $fh; 237 $self->{' fname'} = $filename; 238 } 239 } 240 binmode $fh, ':raw'; 241 $fh->seek(0, 0); # go to start of file 242 $fh->read($buffer, 255); 243 unless ($buffer =~ /^\%PDF\-([12]\.\d+)\s*$cr/m) { 244 croak "$filename does not appear to be a valid PDF"; 245 } 246 $self->{' version'} = $1; 247 248 $fh->seek(0, 2); # go to end of file 249 my $end = $fh->tell(); 250 $self->{' epos'} = $end; 251 foreach my $offset (1..64) { 252 $fh->seek($end - 16 * $offset, 0); 253 $fh->read($buffer, 16 * $offset); 254 last if $buffer =~ m/startxref($cr|\s*)\d+($cr|\s*)\%\%eof.*?/i; 255 } 256 unless ($buffer =~ m/startxref[^\d]+([0-9]+)($cr|\s*)\%\%eof.*?/i) { 257 die "Malformed PDF file $filename"; 258 } 259 my $xpos = $1; 260 $self->{' xref_position'} = $xpos; 261 262 my $tdict = $self->readxrtr($xpos, $self); 263 foreach my $key (keys %$tdict) { 264 $self->{$key} = $tdict->{$key}; 265 } 266 return $self; 267} 268 269=head2 $p->version($version) 270 271Gets/sets the PDF version (e.g. 1.4) 272 273=cut 274 275sub version { 276 my $self = shift(); 277 278 if (@_) { 279 my $version = shift(); 280 croak "Invalid version $version" unless $version =~ /^([12]\.[0-9]+)$/; 281 $self->header_version($version); 282 if ($version >= 1.4) { 283 $self->trailer_version($version); 284 } 285 else { 286 delete $self->{'Root'}->{'Version'}; 287 $self->out_obj($self->{'Root'}); 288 } 289 return $version; 290 } 291 292 my $header_version = $self->header_version(); 293 my $trailer_version = $self->trailer_version(); 294 return $trailer_version if $trailer_version > $header_version; 295 return $header_version; 296} 297 298=head2 $version = $p->header_version($version) 299 300Gets/sets the PDF version stored in the file header. 301 302=cut 303 304sub header_version { 305 my $self = shift(); 306 307 if (@_) { 308 my $version = shift(); 309 croak "Invalid version $version" unless $version =~ /^([12]\.[0-9]+)$/; 310 $self->{' version'} = $version; 311 } 312 313 return $self->{' version'}; 314} 315 316=head2 $version = $p->trailer_version($version) 317 318Gets/sets the PDF version stored in the document catalog. 319 320=cut 321 322sub trailer_version { 323 my $self = shift(); 324 325 if (@_) { 326 my $version = shift(); 327 croak "Invalid version $version" unless $version =~ /^([12]\.[0-9]+)$/; 328 $self->{'Root'}->{'Version'} = PDFName($version); 329 $self->out_obj($self->{'Root'}); 330 return $version; 331 } 332 333 return unless $self->{'Root'}->{'Version'}; 334 $self->{'Root'}->{'Version'}->realise(); 335 return $self->{'Root'}->{'Version'}->val(); 336} 337 338=head2 $prev_version = $p->require_version($version) 339 340Ensures that the PDF version is at least C<$version>. 341 342=cut 343 344sub require_version { 345 my ($self, $min_version) = @_; 346 my $current_version = $self->version(); 347 $self->version($min_version) if $current_version < $min_version; 348 return $current_version; 349} 350 351=head2 $p->release() 352 353Releases ALL of the memory used by the PDF document and all of its 354component objects. After calling this method, do B<NOT> expect to 355have anything left in the C<PDF::API2::Basic::PDF::File> object (so if 356you need to save, be sure to do it before calling this method). 357 358B<NOTE>, that it is important that you call this method on any 359C<PDF::API2::Basic::PDF::File> object when you wish to destruct it and 360free up its memory. Internally, PDF files have an enormous number of 361cross-references and this causes circular references within the 362internal data structures. Calling 'C<release()>' forces a brute-force 363cleanup of the data structures, freeing up all of the memory. Once 364you've called this method, though, don't expect to be able to do 365anything else with the C<PDF::API2::Basic::PDF::File> object; it'll 366have B<no> internal state whatsoever. 367 368=cut 369 370# Maintainer's Question: Couldn't this be handled by a DESTROY method 371# instead of requiring an explicit call to release()? 372sub release { 373 my $self = shift(); 374 375 return $self unless ref($self); 376 my @tofree = values %$self; 377 378 foreach my $key (keys %$self) { 379 $self->{$key} = undef; 380 delete $self->{$key}; 381 } 382 383 # PDFs with highly-interconnected page trees or outlines can hit Perl's 384 # recursion limit pretty easily, so disable the warning for this specific 385 # loop. 386 no warnings 'recursion'; 387 388 while (my $item = shift @tofree) { 389 if (blessed($item) and $item->can('release')) { 390 $item->release(1); 391 } 392 elsif (ref($item) eq 'ARRAY') { 393 push @tofree, @$item; 394 } 395 elsif (ref($item) eq 'HASH') { 396 push @tofree, values %$item; 397 foreach my $key (keys %$item) { 398 $item->{$key} = undef; 399 delete $item->{$key}; 400 } 401 } 402 else { 403 $item = undef; 404 } 405 } 406} 407 408=head2 $p->append_file() 409 410Appends the objects for output to the read file and then appends the appropriate table. 411 412=cut 413 414sub append_file { 415 my $self = shift(); 416 return unless $self->{' update'}; 417 418 my $fh = $self->{' INFILE'}; 419 420 my $tdict = PDFDict(); 421 $tdict->{'Prev'} = PDFNum($self->{' loc'}); 422 $tdict->{'Info'} = $self->{'Info'}; 423 if (defined $self->{' newroot'}) { 424 $tdict->{'Root'} = $self->{' newroot'}; 425 } 426 else { 427 $tdict->{'Root'} = $self->{'Root'}; 428 } 429 $tdict->{'Size'} = $self->{'Size'}; 430 431 foreach my $key (grep { $_ !~ m/^\s/ } keys %$self) { 432 $tdict->{$key} = $self->{$key} unless defined $tdict->{$key}; 433 } 434 435 $fh->seek($self->{' epos'}, 0); 436 $self->out_trailer($tdict, $self->{' update'}); 437 close $self->{' OUTFILE'}; 438} 439 440 441=head2 $p->out_file($fname) 442 443Writes a PDF file to a file of the given filename based on the current list of 444objects to be output. It creates the trailer dictionary based on information 445in $self. 446 447$fname may be an IO object; 448 449=cut 450 451sub out_file { 452 my ($self, $fname) = @_; 453 454 $self->create_file($fname); 455 $self->close_file(); 456 return $self; 457} 458 459 460=head2 $p->create_file($fname) 461 462Creates a new output file (no check is made of an existing open file) of 463the given filename or IO object. Note, make sure that $p->{' version'} is set 464correctly before calling this function. 465 466=cut 467 468sub create_file { 469 my ($self, $filename) = @_; 470 my $fh; 471 472 $self->{' fname'} = $filename; 473 if (ref $filename) { 474 $fh = $filename; 475 } 476 else { 477 $fh = IO::File->new(">$filename") || die "Unable to open $filename for writing"; 478 binmode($fh,':raw'); 479 } 480 481 $self->{' OUTFILE'} = $fh; 482 $fh->print('%PDF-' . ($self->{' version'} // '1.4') . "\n"); 483 $fh->print("%\xC6\xCD\xCD\xB5\n"); # and some binary stuff in a comment 484 return $self; 485} 486 487 488=head2 $p->clone_file($fname) 489 490Creates a copy of the input file at the specified filename and sets it as the 491output file for future writes. A file handle may be passed instead of a 492filename. 493 494=cut 495 496sub clone_file { 497 my ($self, $filename) = @_; 498 my $fh; 499 500 $self->{' fname'} = $filename; 501 if (ref $filename) { 502 $fh = $filename; 503 } 504 else { 505 $fh = IO::File->new(">$filename") || die "Unable to open $filename for writing"; 506 binmode($fh,':raw'); 507 } 508 509 $self->{' OUTFILE'} = $fh; 510 511 my $in = $self->{' INFILE'}; 512 $in->seek(0, 0); 513 my $data; 514 while (not $in->eof()) { 515 $in->read($data, 1024 * 1024); 516 $fh->print($data); 517 } 518 return $self; 519} 520 521=head2 $p->close_file 522 523Closes up the open file for output by outputting the trailer etc. 524 525=cut 526 527sub close_file { 528 my $self = shift(); 529 530 my $tdict = PDFDict(); 531 $tdict->{'Info'} = $self->{'Info'} if defined $self->{'Info'}; 532 $tdict->{'Root'} = (defined $self->{' newroot'} and $self->{' newroot'} ne '') ? $self->{' newroot'} : $self->{'Root'}; 533 534 # remove all freed objects from the outlist, AND the outlist_cache if not updating 535 # NO! Don't do that thing! In fact, let out_trailer do the opposite! 536 537 $tdict->{'Size'} = $self->{'Size'} || PDFNum(1); 538 $tdict->{'Prev'} = PDFNum($self->{' loc'}) if $self->{' loc'}; 539 if ($self->{' update'}) { 540 foreach my $key (grep { $_ !~ m/^[\s\-]/ } keys %$self) { 541 $tdict->{$key} = $self->{$key} unless defined $tdict->{$key}; 542 } 543 544 my $fh = $self->{' INFILE'}; 545 $fh->seek($self->{' epos'}, 0); 546 } 547 548 $self->out_trailer($tdict, $self->{' update'}); 549 close($self->{' OUTFILE'}); 550 if ($^O eq 'MacOS' and not ref($self->{' fname'})) { 551 MacPerl::SetFileInfo('CARO', 'TEXT', $self->{' fname'}); 552 } 553 554 return $self; 555} 556 557=head2 ($value, $str) = $p->readval($str, %opts) 558 559Reads a PDF value from the current position in the file. If $str is too short 560then read some more from the current location in the file until the whole object 561is read. This is a recursive call which may slurp in a whole big stream (unprocessed). 562 563Returns the recursive data structure read and also the current $str that has been 564read from the file. 565 566=cut 567 568sub readval { 569 my ($self, $str, %opts) = @_; 570 my $fh = $self->{' INFILE'}; 571 my ($result, $value); 572 573 my $update = defined($opts{update}) ? $opts{update} : 1; 574 $str = update($fh, $str) if $update; 575 576 $str =~ s/^$ws_char+//; # Ignore initial white space 577 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments 578 579 # Dictionary 580 if ($str =~ m/^<</s) { 581 $str = substr ($str, 2); 582 $str = update($fh, $str) if $update; 583 $result = PDFDict(); 584 585 while ($str !~ m/^>>/) { 586 $str =~ s/^$ws_char+//; # Ignore initial white space 587 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments 588 589 if ($str =~ s|^/($reg_char+)||) { 590 my $key = PDF::API2::Basic::PDF::Name::name_to_string($1, $self); 591 ($value, $str) = $self->readval($str, %opts); 592 unless ((ref($value) // '') eq 'PDF::API2::Basic::PDF::Null') { 593 $result->{$key} = $value; 594 } 595 } 596 elsif ($str =~ s|^/$ws_char+||) { 597 # fixes a broken key problem of acrobat. -- fredo 598 ($value, $str) = $self->readval($str, %opts); 599 unless ((ref($value) // '') eq 'PDF::API2::Basic::PDF::Null') { 600 $result->{'null'} = $value; 601 } 602 } 603 elsif ($str =~ s|^//|/|) { 604 # fixes again a broken key problem of illustrator/enfocus. -- fredo 605 ($value, $str) = $self->readval($str, %opts); 606 unless ((ref($value) // '') eq 'PDF::API2::Basic::PDF::Null') { 607 $result->{'null'} = $value; 608 } 609 } 610 else { 611 die "Invalid dictionary key"; 612 } 613 $str = update($fh, $str) if $update; # thanks gareth.jones@stud.man.ac.uk 614 } 615 $str =~ s/^>>//; 616 $str = update($fh, $str) if $update; 617 # streams can't be followed by a lone carriage-return. 618 # fredo: yes they can !!! -- use the MacOS Luke. 619 if (($str =~ s/^stream(?:(?:\015\012)|\012|\015)//) and ($result->{'Length'}->val != 0)) { # stream 620 my $length = $result->{'Length'}->val; 621 $result->{' streamsrc'} = $fh; 622 $result->{' streamloc'} = $fh->tell - length($str); 623 624 unless ($opts{'nostreams'}) { 625 if ($length > length($str)) { 626 $value = $str; 627 $length -= length($str); 628 read $fh, $str, $length + 11; # slurp the whole stream! 629 } 630 else { 631 $value = ''; 632 } 633 $value .= substr($str, 0, $length); 634 $result->{' stream'} = $value; 635 $result->{' nofilt'} = 1; 636 $str = update($fh, $str, 1) if $update; # tell update we are in-stream and only need an endstream 637 $str = substr($str, index($str, 'endstream') + 9); 638 } 639 } 640 641 if (defined $result->{'Type'} and defined $types{$result->{'Type'}->val}) { 642 bless $result, $types{$result->{'Type'}->val}; 643 } 644 # gdj: FIXME: if any of the ws chars were crs, then the whole 645 # string might not have been read. 646 } 647 648 # Indirect Object 649 elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R/s) { 650 my $num = $1; 651 $value = $2; 652 $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R//s; 653 unless ($result = $self->test_obj($num, $value)) { 654 $result = PDF::API2::Basic::PDF::Objind->new(); 655 $result->{' objnum'} = $num; 656 $result->{' objgen'} = $value; 657 $self->add_obj($result, $num, $value); 658 } 659 $result->{' parent'} = $self; 660 weaken $result->{' parent'}; 661 662 # Removed to address changes being lost when an indirect object is realised twice 663 # $result->{' realised'} = 0; 664 665 # gdj: FIXME: if any of the ws chars were crs, then the whole 666 # string might not have been read. 667 } 668 669 # Object 670 elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj/s) { 671 my $obj; 672 my $num = $1; 673 $value = $2; 674 $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj//s; 675 ($obj, $str) = $self->readval($str, %opts); 676 if ($result = $self->test_obj($num, $value)) { 677 $result->merge($obj); 678 } 679 else { 680 $result = $obj; 681 $self->add_obj($result, $num, $value); 682 $result->{' realised'} = 1; 683 } 684 $str = update($fh, $str) if $update; # thanks to kundrat@kundrat.sk 685 $str =~ s/^endobj//; 686 } 687 688 # Name 689 elsif ($str =~ m|^/($reg_char*)|s) { 690 $value = $1; 691 $str =~ s|^/($reg_char*)||s; 692 $result = PDF::API2::Basic::PDF::Name->from_pdf($value, $self); 693 } 694 695 # Literal String 696 elsif ($str =~ m/^\(/) { 697 # We now need to find an unbalanced, unescaped right-paren. 698 # This can't be done with a regex. 699 my $value = '('; 700 $str = substr($str, 1); 701 702 my $nested_level = 1; 703 while (1) { 704 # Ignore everything up to the first escaped or parenthesis character 705 if ($str =~ /^([^\\()]+)(.*)/s) { 706 $value .= $1; 707 $str = $2; 708 } 709 710 # Ignore escaped parentheses 711 if ($str =~ /^(\\[()])/) { 712 $value .= $1; 713 $str = substr($str, 2); 714 } 715 716 # Left parenthesis: increase nesting 717 elsif ($str =~ /^\(/) { 718 $value .= '('; 719 $str = substr($str, 1); 720 $nested_level++; 721 } 722 723 # Right parenthesis: decrease nesting 724 elsif ($str =~ /^\)/) { 725 $value .= ')'; 726 $str = substr($str, 1); 727 $nested_level--; 728 last unless $nested_level; 729 } 730 731 # Other escaped character 732 elsif ($str =~ /^(\\[^()])/) { 733 $value .= $1; 734 $str = substr($str, 2); 735 } 736 737 # If there wasn't an escaped or parenthesis character, 738 # read some more. 739 else { 740 # We don't use update because we don't want to remove 741 # whitespace or comments. 742 $fh->read($str, 255, length($str)) or die 'Unterminated string.'; 743 } 744 } 745 746 $result = PDF::API2::Basic::PDF::String->from_pdf($value); 747 } 748 749 # Hex String 750 elsif ($str =~ m/^</) { 751 $str =~ s/^<//; 752 $fh->read($str, 255, length($str)) while (0 > index($str, '>')); 753 ($value, $str) = ($str =~ /^(.*?)>(.*)/s); 754 $result = PDF::API2::Basic::PDF::String->from_pdf('<' . $value . '>'); 755 } 756 757 # Array 758 elsif ($str =~ m/^\[/) { 759 $str =~ s/^\[//; 760 $str = update($fh, $str) if $update; 761 $result = PDFArray(); 762 while ($str !~ m/^\]/) { 763 $str =~ s/^$ws_char+//; # Ignore initial white space 764 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments 765 766 ($value, $str) = $self->readval($str, %opts); 767 $result->add_elements($value); 768 $str = update($fh, $str) if $update; # str might just be exhausted! 769 } 770 $str =~ s/^\]//; 771 } 772 773 # Boolean 774 elsif ($str =~ m/^(true|false)($irreg_char|$)/) { 775 $value = $1; 776 $str =~ s/^(?:true|false)//; 777 $result = PDF::API2::Basic::PDF::Bool->from_pdf($value); 778 } 779 780 # Number 781 elsif ($str =~ m/^([+-.0-9]+)($irreg_char|$)/) { 782 $value = $1; 783 $str =~ s/^([+-.0-9]+)//; 784 785 # If $str only consists of whitespace (or is empty), call update to 786 # see if this is the beginning of an indirect object or reference 787 if ($update and ($str =~ /^$re_whitespace*$/s or $str =~ /^$re_whitespace+[0-9]+$re_whitespace*$/s)) { 788 $str =~ s/^$re_whitespace+/ /s; 789 $str =~ s/$re_whitespace+$/ /s; 790 $str = update($fh, $str); 791 if ($str =~ m/^$re_whitespace*([0-9]+)$re_whitespace+(?:R|obj)/s) { 792 return $self->readval("$value $str", %opts); 793 } 794 } 795 796 $result = PDF::API2::Basic::PDF::Number->from_pdf($value); 797 } 798 799 # Null 800 elsif ($str =~ m/^null($irreg_char|$)/) { 801 $str =~ s/^null//; 802 $result = PDF::API2::Basic::PDF::Null->new; 803 } 804 805 else { 806 die "Can't parse `$str' near " . ($fh->tell()) . " length " . length($str) . "."; 807 } 808 809 $str =~ s/^$ws_char+//s; 810 return ($result, $str); 811} 812 813 814=head2 $ref = $p->read_obj($objind, %opts) 815 816Given an indirect object reference, locate it and read the object returning 817the read in object. 818 819=cut 820 821sub read_obj { 822 my ($self, $objind, %opts) = @_; 823 824 my $res = $self->read_objnum($objind->{' objnum'}, $objind->{' objgen'}, %opts) || return; 825 $objind->merge($res) unless $objind eq $res; 826 return $objind; 827} 828 829 830=head2 $ref = $p->read_objnum($num, $gen, %opts) 831 832Returns a fully read object of given number and generation in this file 833 834=cut 835 836sub read_objnum { 837 my ($self, $num, $gen, %opts) = @_; 838 croak 'Undefined object number in call to read_objnum($num, $gen)' unless defined $num; 839 croak 'Undefined object generation in call to read_objnum($num, $gen)' unless defined $gen; 840 croak "Invalid object number '$num' in call to read_objnum" unless $num =~ /^[0-9]+$/; 841 croak "Invalid object generation '$gen' in call to read_objnum" unless $gen =~ /^[0-9]+$/; 842 843 my $object_location = $self->locate_obj($num, $gen) || return; 844 my $object; 845 846 # Compressed object 847 if (ref($object_location)) { 848 my ($object_stream_num, $object_stream_pos) = @{$object_location}; 849 850 my $object_stream = $self->read_objnum($object_stream_num, 0, %opts); 851 die 'Cannot find the compressed object stream' unless $object_stream; 852 853 $object_stream->read_stream() if $object_stream->{' nofilt'}; 854 855 # An object stream starts with pairs of integers containing object numbers and 856 # stream offsets relative to the First key 857 my $fh; 858 my $pairs; 859 unless ($object_stream->{' streamfile'}) { 860 $pairs = substr($object_stream->{' stream'}, 0, $object_stream->{'First'}->val); 861 } 862 else { 863 CORE::open($fh, '<', $object_stream->{' streamfile'}); 864 read($fh, $pairs, $object_stream->{'First'}->val()); 865 } 866 my @map = split /\s+/, $pairs; 867 868 # Find the offset of the object in the stream 869 my $index = $object_stream_pos * 2; 870 die "Objind $num does not exist at index $index" unless $map[$index] == $num; 871 my $start = $map[$index + 1]; 872 873 # Unless this is the last object in the stream, its length is determined by the 874 # offset of the next object 875 my $last_object_in_stream = $map[-2]; 876 my $length; 877 if ($last_object_in_stream == $num) { 878 if ($object_stream->{' stream'}) { 879 $length = length($object_stream->{' stream'}) - $object_stream->{'First'}->val() - $start; 880 } 881 else { 882 $length = (-s $object_stream->{' streamfile'}) - $object_stream->{'First'}->val() - $start; 883 } 884 } 885 else { 886 my $next_start = $map[$index + 3]; 887 $length = $next_start - $start; 888 } 889 890 # Read the object from the stream 891 my $stream = "$num 0 obj "; 892 unless ($object_stream->{' streamfile'}) { 893 $stream .= substr($object_stream->{' stream'}, $object_stream->{'First'}->val() + $start, $length); 894 } 895 else { 896 seek($fh, $object_stream->{'First'}->val() + $start, 0); 897 read($fh, $stream, $length, length($stream)); 898 close $fh; 899 } 900 901 ($object) = $self->readval($stream, %opts, update => 0); 902 return $object; 903 } 904 905 my $current_location = $self->{' INFILE'}->tell; 906 $self->{' INFILE'}->seek($object_location, 0); 907 ($object) = $self->readval('', %opts); 908 $self->{' INFILE'}->seek($current_location, 0); 909 return $object; 910} 911 912 913=head2 $objind = $p->new_obj($obj) 914 915Creates a new, free object reference based on free space in the cross reference chain. 916If nothing free then thinks up a new number. If $obj then turns that object into this 917new object rather than returning a new object. 918 919=cut 920 921sub new_obj { 922 my ($self, $base) = @_; 923 my $res; 924 925 if (defined $self->{' free'} and scalar @{$self->{' free'}} > 0) { 926 $res = shift(@{$self->{' free'}}); 927 if (defined $base) { 928 my ($num, $gen) = @{$self->{' objects'}{$res->uid}}; 929 $self->remove_obj($res); 930 $self->add_obj($base, $num, $gen); 931 return $self->out_obj($base); 932 } 933 else { 934 $self->{' objects'}{$res->uid}[2] = 0; 935 return $res; 936 } 937 } 938 939 my $tdict = $self; 940 my $i; 941 while (defined $tdict) { 942 $i = $tdict->{' xref'}{defined($i) ? $i : ''}[0]; 943 while (defined $i and $i != 0) { 944 my ($ni, $ng) = @{$tdict->{' xref'}{$i}}; 945 unless (defined $self->locate_obj($i, $ng)) { 946 if (defined $base) { 947 $self->add_obj($base, $i, $ng); 948 return $base; 949 } 950 else { 951 $res = $self->test_obj($i, $ng) || $self->add_obj(PDF::API2::Basic::PDF::Objind->new(), $i, $ng); 952 $self->out_obj($res); 953 return $res; 954 } 955 } 956 $i = $ni; 957 } 958 $tdict = $tdict->{' prev'}; 959 } 960 961 $i = $self->{' maxobj'}++; 962 if (defined $base) { 963 $self->add_obj($base, $i, 0); 964 $self->out_obj($base); 965 return $base; 966 } 967 else { 968 $res = $self->add_obj(PDF::API2::Basic::PDF::Objind->new(), $i, 0); 969 $self->out_obj($res); 970 return $res; 971 } 972} 973 974 975=head2 $p->out_obj($objind) 976 977Indicates that the given object reference should appear in the output xref 978table whether with data or freed. 979 980=cut 981 982sub out_obj { 983 my ($self, $obj) = @_; 984 985 # This is why we've been keeping the outlist CACHE around; to speed 986 # up this method by orders of magnitude (it saves up from having to 987 # grep the full outlist each time through as we'll just do a lookup 988 # in the hash) (which is super-fast). 989 unless (exists $self->{' outlist_cache'}{$obj}) { 990 push @{$self->{' outlist'}}, $obj; 991 # weaken $self->{' outlist'}->[-1]; 992 $self->{' outlist_cache'}{$obj} = 1; 993 } 994 return $obj; 995} 996 997 998=head2 $p->free_obj($objind) 999 1000Marks an object reference for output as being freed. 1001 1002=cut 1003 1004sub free_obj { 1005 my ($self, $obj) = @_; 1006 1007 push @{$self->{' free'}}, $obj; 1008 $self->{' objects'}{$obj->uid()}[2] = 1; 1009 $self->out_obj($obj); 1010} 1011 1012 1013=head2 $p->remove_obj($objind) 1014 1015Removes the object from all places where we might remember it 1016 1017=cut 1018 1019sub remove_obj { 1020 my ($self, $objind) = @_; 1021 1022 # who says it has to be fast 1023 delete $self->{' objects'}{$objind->uid()}; 1024 delete $self->{' outlist_cache'}{$objind}; 1025 delete $self->{' printed_cache'}{$objind}; 1026 @{$self->{' outlist'}} = grep { $_ ne $objind } @{$self->{' outlist'}}; 1027 @{$self->{' printed'}} = grep { $_ ne $objind } @{$self->{' printed'}}; 1028 $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} = undef 1029 if $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} eq $objind; 1030 return $self; 1031} 1032 1033 1034=head2 $p->ship_out(@objects) 1035 1036Ships the given objects (or all objects for output if @objects is empty) to 1037the currently open output file (assuming there is one). Freed objects are not 1038shipped, and once an object is shipped it is switched such that this file 1039becomes its source and it will not be shipped again unless out_obj is called 1040again. Notice that a shipped out object can be re-output or even freed, but 1041that it will not cause the data already output to be changed. 1042 1043=cut 1044 1045sub ship_out { 1046 my ($self, @objs) = @_; 1047 1048 die "No output file specified" unless defined $self->{' OUTFILE'}; 1049 my $fh = $self->{' OUTFILE'}; 1050 seek($fh, 0, 2); # go to the end of the file 1051 1052 @objs = @{$self->{' outlist'}} unless scalar @objs > 0; 1053 foreach my $objind (@objs) { 1054 next unless $objind->is_obj($self); 1055 my $j = -1; 1056 for (my $i = 0; $i < scalar @{$self->{' outlist'}}; $i++) { 1057 if ($self->{' outlist'}[$i] eq $objind) { 1058 $j = $i; 1059 last; 1060 } 1061 } 1062 next if $j < 0; 1063 splice(@{$self->{' outlist'}}, $j, 1); 1064 delete $self->{' outlist_cache'}{$objind}; 1065 next if grep { $_ eq $objind } @{$self->{' free'}}; 1066 1067 map { $fh->print("\% $_ \n") } split(/$cr/, $objind->{' comments'}) if $objind->{' comments'}; 1068 $self->{' locs'}{$objind->uid()} = $fh->tell(); 1069 my ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid()}}[0..1]; 1070 $fh->printf('%d %d obj ', $objnum, $objgen); 1071 $objind->outobjdeep($fh, $self); 1072 $fh->print(" endobj\n"); 1073 1074 # Note that we've output this obj, not forgetting to update 1075 # the cache of whats printed. 1076 unless (exists $self->{' printed_cache'}{$objind}) { 1077 push @{$self->{' printed'}}, $objind; 1078 $self->{' printed_cache'}{$objind}++; 1079 } 1080 } 1081 return $self; 1082} 1083 1084=head2 $p->copy($outpdf, \&filter) 1085 1086Iterates over every object in the file reading the object, calling filter with the object 1087and outputting the result. if filter is not defined, then just copies input to output. 1088 1089=cut 1090 1091sub copy { 1092 my ($self, $out, $filter) = @_; 1093 my ($obj, $minl, $mini, $ming); 1094 1095 foreach my $key (grep { not m/^[\s\-]/ } keys %$self) { 1096 $out->{$key} = $self->{$key} unless defined $out->{$key}; 1097 } 1098 1099 my $tdict = $self; 1100 while (defined $tdict) { 1101 foreach my $i (sort {$a <=> $b} keys %{$tdict->{' xref'}}) { 1102 my ($nl, $ng, $nt) = @{$tdict->{' xref'}{$i}}; 1103 next unless $nt eq 'n'; 1104 1105 if ($nl < $minl or $mini == 0) { 1106 $mini = $i; 1107 $ming = $ng; 1108 $minl = $nl; 1109 } 1110 unless ($obj = $self->test_obj($i, $ng)) { 1111 $obj = PDF::API2::Basic::PDF::Objind->new(); 1112 $obj->{' objnum'} = $i; 1113 $obj->{' objgen'} = $ng; 1114 $self->add_obj($obj, $i, $ng); 1115 $obj->{' parent'} = $self; 1116 weaken $obj->{' parent'}; 1117 $obj->{' realised'} = 0; 1118 } 1119 $obj->realise; 1120 my $res = defined $filter ? &{$filter}($obj) : $obj; 1121 $out->new_obj($res) unless (!$res || $res->is_obj($out)); 1122 } 1123 $tdict = $tdict->{' prev'}; 1124 } 1125 1126 # test for linearized and remove it from output 1127 $obj = $self->test_obj($mini, $ming); 1128 if ($obj->isa('PDF::API2::Basic::PDF::Dict') && $obj->{'Linearized'}) { 1129 $out->free_obj($obj); 1130 } 1131 1132 return $self; 1133} 1134 1135 1136=head1 PRIVATE METHODS & FUNCTIONS 1137 1138The following methods and functions are considered private to this class. This 1139does not mean you cannot use them if you have a need, just that they aren't really 1140designed for users of this class. 1141 1142=head2 $offset = $p->locate_obj($num, $gen) 1143 1144Returns a file offset to the object asked for by following the chain of cross 1145reference tables until it finds the one you want. 1146 1147=cut 1148 1149sub locate_obj { 1150 my ($self, $num, $gen) = @_; 1151 1152 my $tdict = $self; 1153 while (defined $tdict) { 1154 if (ref $tdict->{' xref'}{$num}) { 1155 my $ref = $tdict->{' xref'}{$num}; 1156 return $ref unless scalar(@$ref) == 3; 1157 1158 if ($ref->[1] == $gen) { 1159 return $ref->[0] if $ref->[2] eq 'n'; 1160 return; # if $ref->[2] eq 'f'; 1161 } 1162 } 1163 $tdict = $tdict->{' prev'}; 1164 } 1165 return; 1166} 1167 1168 1169=head2 update($fh, $str, $instream) 1170 1171Keeps reading $fh for more data to ensure that $str has at least a line full 1172for C<readval> to work on. At this point we also take the opportunity to ignore 1173comments. 1174 1175=cut 1176 1177sub update { 1178 my ($fh, $str, $instream) = @_; 1179 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug; 1180 if ($instream) { 1181 # we are inside a (possible binary) stream 1182 # so we fetch data till we see an 'endstream' 1183 # -- fredo/2004-09-03 1184 while ($str !~ m/endstream/ and not $fh->eof()) { 1185 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug; 1186 $fh->read($str, 314, length($str)); 1187 } 1188 } 1189 else { 1190 $str =~ s/^$ws_char*//; 1191 while ($str !~ m/$cr/ and not $fh->eof()) { 1192 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug; 1193 $fh->read($str, 314, length($str)); 1194 $str =~ s/^$ws_char*//so; 1195 } 1196 while ($str =~ m/^\%/) { # restructured by fredo/2003-03-23 1197 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug; 1198 $fh->read($str, 314, length($str)) while ($str !~ m/$cr/ and not $fh->eof()); 1199 $str =~ s/^\%[^\015\012]*$ws_char*//so; # fixed for reportlab -- fredo 1200 } 1201 } 1202 1203 return $str; 1204} 1205 1206=head2 $objind = $p->test_obj($num, $gen) 1207 1208Tests the cache to see whether an object reference (which may or may not have 1209been getobj()ed) has been cached. Returns it if it has. 1210 1211=cut 1212 1213sub test_obj { 1214 my ($self, $num, $gen) = @_; 1215 return $self->{' objcache'}{$num, $gen}; 1216} 1217 1218 1219=head2 $p->add_obj($objind) 1220 1221Adds the given object to the internal object cache. 1222 1223=cut 1224 1225sub add_obj { 1226 my ($self, $obj, $num, $gen) = @_; 1227 1228 $self->{' objcache'}{$num, $gen} = $obj; 1229 $self->{' objects'}{$obj->uid()} = [$num, $gen]; 1230 # weaken $self->{' objcache'}{$num, $gen}; 1231 return $obj; 1232} 1233 1234 1235=head2 $tdict = $p->readxrtr($xpos) 1236 1237Recursive function which reads each of the cross-reference and trailer tables 1238in turn until there are no more. 1239 1240Returns a dictionary corresponding to the trailer chain. Each trailer also 1241includes the corresponding cross-reference table. 1242 1243The structure of the xref private element in a trailer dictionary is of an 1244anonymous hash of cross reference elements by object number. Each element 1245consists of an array of 3 elements corresponding to the three elements read 1246in [location, generation number, free or used]. See the PDF specification 1247for details. 1248 1249=cut 1250 1251sub _unpack_xref_stream { 1252 my ($self, $width, $data) = @_; 1253 1254 return unpack('C', $data) if $width == 1; 1255 return unpack('n', $data) if $width == 2; 1256 return unpack('N', "\x00$data") if $width == 3; 1257 return unpack('N', $data) if $width == 4; 1258 return unpack('Q>', $data) if $width == 8; 1259 1260 die "Unsupported xref stream entry width: $width"; 1261} 1262 1263sub readxrtr { 1264 my ($self, $xpos) = @_; 1265 my ($tdict, $buf, $xmin, $xnum, $xdiff); 1266 1267 my $fh = $self->{' INFILE'}; 1268 $fh->seek($xpos, 0); 1269 $fh->read($buf, 22); 1270 $buf = update($fh, $buf); # fix for broken JAWS xref calculation. 1271 1272 my $xlist = {}; 1273 1274 ## seams that some products calculate wrong prev entries (short) 1275 ## so we seek ahead to find one -- fredo; save for now 1276 #while($buf !~ m/^xref$cr/i && !eof($fh)) 1277 #{ 1278 # $buf =~ s/^(\s+|\S+|.)//i; 1279 # $buf=update($fh,$buf); 1280 #} 1281 1282 if ($buf =~ s/^xref$cr//i) { 1283 # Plain XRef tables. 1284 while ($buf =~ m/^$ws_char*([0-9]+)$ws_char+([0-9]+)$ws_char*$cr(.*?)$/s) { 1285 my $old_buf = $buf; 1286 $xmin = $1; 1287 $xnum = $2; 1288 $buf = $3; 1289 unless ($old_buf =~ /^[0-9]+ [0-9]+$cr/) { 1290 # See PDF 1.7 section 7.5.4: Cross-Reference Table 1291 warn q{Malformed xref in PDF file: subsection shall begin with a line containing two numbers separated by a SPACE (20h)}; 1292 } 1293 $xdiff = length($buf); 1294 1295 $fh->read($buf, 20 * $xnum - $xdiff + 15, $xdiff); 1296 while ($xnum-- > 0 and $buf =~ s/^0*([0-9]*)$ws_char+0*([0-9]+)$ws_char+([nf])$cr//) { 1297 $xlist->{$xmin} = [$1, $2, $3] unless exists $xlist->{$xmin}; 1298 $xmin++; 1299 } 1300 } 1301 1302 if ($buf !~ /^\s*trailer\b/i) { 1303 die "Malformed trailer in PDF file $self->{' fname'} at " . ($fh->tell - length($buf)); 1304 } 1305 1306 $buf =~ s/^\s*trailer\b//i; 1307 1308 ($tdict, $buf) = $self->readval($buf); 1309 } 1310 elsif ($buf =~ m/^(\d+)\s+(\d+)\s+obj/i) { 1311 my ($xref_obj, $xref_gen) = ($1, $2); 1312 1313 # XRef streams. 1314 ($tdict, $buf) = $self->readval($buf); 1315 1316 unless ($tdict->{' stream'}) { 1317 die "Malformed XRefStm at $xref_obj $xref_gen obj in PDF file $self->{' fname'}"; 1318 } 1319 $tdict->read_stream(1); 1320 1321 my $stream = $tdict->{' stream'}; 1322 my @widths = map { $_->val } @{$tdict->{W}->val}; 1323 1324 my $start = 0; 1325 my $last; 1326 1327 my @index; 1328 if (defined $tdict->{Index}) { 1329 @index = map { $_->val() } @{$tdict->{Index}->val}; 1330 } 1331 else { 1332 @index = (0, $tdict->{Size}->val); 1333 } 1334 1335 while (scalar @index) { 1336 $start = shift(@index); 1337 $last = $start + shift(@index) - 1; 1338 1339 for my $i ($start...$last) { 1340 # Replaced "for $xmin" because it creates a loop-specific local variable, and we 1341 # need $xmin to be correct for maxobj below. 1342 $xmin = $i; 1343 1344 my @cols; 1345 1346 for my $w (@widths) { 1347 my $data; 1348 $data = $self->_unpack_xref_stream($w, substr($stream, 0, $w, '')) if $w; 1349 1350 push @cols, $data; 1351 } 1352 1353 $cols[0] = 1 unless defined $cols[0]; 1354 if ($cols[0] > 2) { 1355 die "Invalid XRefStm entry type ($cols[0]) at $xref_obj $xref_gen obj"; 1356 } 1357 1358 next if exists $xlist->{$xmin}; 1359 1360 my @objind = ($cols[1], defined($cols[2]) ? $cols[2] : ($xmin ? 0 : 65535)); 1361 push @objind, ($cols[0] == 0 ? 'f' : 'n') if $cols[0] < 2; 1362 1363 $xlist->{$xmin} = \@objind; 1364 } 1365 } 1366 } 1367 else { 1368 die "Malformed xref in PDF file $self->{' fname'}"; 1369 } 1370 1371 $tdict->{' loc'} = $xpos; 1372 $tdict->{' xref'} = $xlist; 1373 $self->{' maxobj'} = $xmin + 1 if $xmin + 1 > $self->{' maxobj'}; 1374 $tdict->{' prev'} = $self->readxrtr($tdict->{'Prev'}->val) 1375 if (defined $tdict->{'Prev'} and $tdict->{'Prev'}->val != 0); 1376 delete $tdict->{' prev'} unless defined $tdict->{' prev'}; 1377 return $tdict; 1378} 1379 1380 1381=head2 $p->out_trailer($tdict) 1382 1383Outputs the body and trailer for a PDF file by outputting all the objects in 1384the ' outlist' and then outputting a xref table for those objects and any 1385freed ones. It then outputs the trailing dictionary and the trailer code. 1386 1387=cut 1388 1389sub out_trailer { 1390 my ($self, $tdict, $update) = @_; 1391 my $fh = $self->{' OUTFILE'}; 1392 1393 while (@{$self->{' outlist'}}) { 1394 $self->ship_out(); 1395 } 1396 1397 # When writing new trailers, most dictionary entries get copied from the 1398 # previous trailer, but entries related to cross-reference streams should 1399 # get removed (and possibly recreated below). 1400 delete $tdict->{$_} for (# Entries common to streams 1401 qw(Length Filter DecodeParms F FFilter FDecodeParms DL), 1402 1403 # Entries specific to cross-reference streams 1404 qw(Index W XRefStm)); 1405 1406 $tdict->{'Size'} = PDFNum($self->{' maxobj'}); 1407 1408 my $tloc = $fh->tell(); 1409 my @out; 1410 1411 my @xreflist = sort { $self->{' objects'}{$a->uid}[0] <=> $self->{' objects'}{$b->uid}[0] } (@{$self->{' printed'} || []}, @{$self->{' free'} || []}); 1412 1413 my ($i, $j, $k); 1414 unless ($update) { 1415 $i = 1; 1416 for ($j = 0; $j < @xreflist; $j++) { 1417 my @inserts; 1418 $k = $xreflist[$j]; 1419 while ($i < $self->{' objects'}{$k->uid}[0]) { 1420 my ($n) = PDF::API2::Basic::PDF::Objind->new(); 1421 $self->add_obj($n, $i, 0); 1422 $self->free_obj($n); 1423 push(@inserts, $n); 1424 $i++; 1425 } 1426 splice(@xreflist, $j, 0, @inserts); 1427 $j += @inserts; 1428 $i++; 1429 } 1430 } 1431 1432 my @freelist = sort { $self->{' objects'}{$a->uid}[0] <=> $self->{' objects'}{$b->uid}[0] } @{$self->{' free'} || []}; 1433 1434 $j = 0; my $first = -1; $k = 0; 1435 for ($i = 0; $i <= $#xreflist + 1; $i++) { 1436 if ($i > $#xreflist || $self->{' objects'}{$xreflist[$i]->uid}[0] != $j + 1) { 1437 push @out, ($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid}[0] ") . ($i - $first) . "\n"; 1438 if ($first == -1) { 1439 push @out, sprintf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid}[0] : 0); 1440 $first = 0; 1441 } 1442 for ($j = $first; $j < $i; $j++) { 1443 my $xref = $xreflist[$j]; 1444 if (defined($freelist[$k]) and defined($xref) and "$freelist[$k]" eq "$xref") { 1445 $k++; 1446 push @out, pack("A10AA5A4", 1447 sprintf("%010d", (defined $freelist[$k] ? 1448 $self->{' objects'}{$freelist[$k]->uid}[0] : 0)), " ", 1449 sprintf("%05d", $self->{' objects'}{$xref->uid}[1] + 1), 1450 " f \n"); 1451 } 1452 else { 1453 push @out, pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid}), " ", 1454 sprintf("%05d", $self->{' objects'}{$xref->uid}[1]), 1455 " n \n"); 1456 } 1457 } 1458 $first = $i; 1459 $j = $self->{' objects'}{$xreflist[$i]->uid}[0] if ($i < scalar @xreflist); 1460 } 1461 else { 1462 $j++; 1463 } 1464 } 1465 if (exists $tdict->{'Type'} and $tdict->{'Type'}->val() eq 'XRef') { 1466 my (@index, @stream); 1467 for (@out) { 1468 my @a = split; 1469 @a == 2 ? push @index, @a : push @stream, \@a; 1470 } 1471 my $i = $self->{' maxobj'}++; 1472 $self->add_obj($tdict, $i, 0); 1473 $self->out_obj($tdict ); 1474 1475 push @index, $i, 1; 1476 push @stream, [$tloc, 0, 'n']; 1477 1478 my $len = $tloc > 0xFFFF ? 4 : 2; # don't expect files > 4 Gb 1479 my $tpl = $tloc > 0xFFFF ? 'CNC' : 'CnC'; # don't expect gennum > 255, it's absurd. 1480 # Adobe doesn't use them anymore anyway 1481 my $stream = ''; 1482 my @prev = (0) x ($len + 2); 1483 for (@stream) { 1484 $_->[1] = 0 if $_->[2] eq 'f' and $_->[1] == 65535; 1485 my @line = unpack 'C*', pack $tpl, $_->[2] eq 'n' ? 1 : 0, @{$_}[0..1]; 1486 1487 $stream .= pack 'C*', 2, # prepend filtering method, "PNG Up" 1488 map {($line[$_] - $prev[$_] + 256) % 256 } 0 .. $#line; 1489 @prev = @line; 1490 } 1491 $tdict->{'Size'} = PDFNum($i + 1); 1492 $tdict->{'Index'} = PDFArray(map PDFNum( $_ ), @index); 1493 $tdict->{'W'} = PDFArray(map PDFNum( $_ ), 1, $len, 1); 1494 $tdict->{'Filter'} = PDFName('FlateDecode'); 1495 1496 $tdict->{'DecodeParms'} = PDFDict(); 1497 $tdict->{'DecodeParms'}->val->{'Predictor'} = PDFNum(12); 1498 $tdict->{'DecodeParms'}->val->{'Columns'} = PDFNum($len + 2); 1499 1500 $stream = PDF::API2::Basic::PDF::Filter::FlateDecode->new->outfilt($stream, 1); 1501 $tdict->{' stream'} = $stream; 1502 $tdict->{' nofilt'} = 1; 1503 delete $tdict->{'Length'}; 1504 $self->ship_out(); 1505 } 1506 else { 1507 $fh->print("xref\n", @out, "trailer\n"); 1508 $tdict->outobjdeep($fh, $self); 1509 $fh->print("\n"); 1510 } 1511 $fh->print("startxref\n$tloc\n%%EOF\n"); 1512} 1513 1514 1515=head2 PDF::API2::Basic::PDF::File->_new 1516 1517Creates a very empty PDF file object (used by new and open) 1518 1519=cut 1520 1521sub _new { 1522 my $class = shift(); 1523 my $self = {}; 1524 1525 bless $self, $class; 1526 $self->{' outlist'} = []; 1527 $self->{' outlist_cache'} = {}; # A cache of whats in the 'outlist' 1528 $self->{' maxobj'} = 1; 1529 $self->{' objcache'} = {}; 1530 $self->{' objects'} = {}; 1531 1532 return $self; 1533} 1534 15351; 1536 1537=head1 AUTHOR 1538 1539Martin Hosken Martin_Hosken@sil.org 1540 1541Copyright Martin Hosken 1999 and onwards 1542 1543No warranty or expression of effectiveness, least of all regarding anyone's 1544safety, is implied in this software or documentation. 1545