1#------------------------------------------------------------------------------ 2# File: ZIP.pm 3# 4# Description: Read ZIP archive meta information 5# 6# Revisions: 10/28/2007 - P. Harvey Created 7# 8# References: 1) http://www.pkware.com/documents/casestudies/APPNOTE.TXT 9# 2) http://www.cpanforum.com/threads/9046 10# 3) http://www.gzip.org/zlib/rfc-gzip.html 11# 4) http://DataCompression.info/ArchiveFormats/RAR202.txt 12# 5) https://jira.atlassian.com/browse/CONF-21706 13# 6) http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf 14#------------------------------------------------------------------------------ 15 16package Image::ExifTool::ZIP; 17 18use strict; 19use vars qw($VERSION $warnString); 20use Image::ExifTool qw(:DataAccess :Utils); 21 22$VERSION = '1.26'; 23 24sub WarnProc($) { $warnString = $_[0]; } 25 26# file types for recognized Open Document "mimetype" values 27my %openDocType = ( 28 'application/vnd.oasis.opendocument.database' => 'ODB', #5 29 'application/vnd.oasis.opendocument.chart' => 'ODC', #5 30 'application/vnd.oasis.opendocument.formula' => 'ODF', #5 31 'application/vnd.oasis.opendocument.graphics' => 'ODG', #5 32 'application/vnd.oasis.opendocument.image' => 'ODI', #5 33 'application/vnd.oasis.opendocument.presentation' => 'ODP', 34 'application/vnd.oasis.opendocument.spreadsheet' => 'ODS', 35 'application/vnd.oasis.opendocument.text' => 'ODT', 36 'application/vnd.adobe.indesign-idml-package' => 'IDML', #6 (not open doc) 37 'application/epub+zip' => 'EPUB', #PH (not open doc) 38); 39 40# iWork file types based on names of files found in the zip archive 41my %iWorkFile = ( 42 'Index/Slide.iwa' => 'KEY', 43 'Index/Tables/DataList.iwa' => 'NUMBERS', 44); 45 46my %iWorkType = ( 47 NUMBERS => 'NUMBERS', 48 PAGES => 'PAGES', 49 KEY => 'KEY', 50 KTH => 'KTH', 51 NMBTEMPLATE => 'NMBTEMPLATE', 52); 53 54# ZIP metadata blocks 55%Image::ExifTool::ZIP::Main = ( 56 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, 57 GROUPS => { 2 => 'Other' }, 58 FORMAT => 'int16u', 59 NOTES => q{ 60 The following tags are extracted from ZIP archives. ExifTool also extracts 61 additional meta information from compressed documents inside some ZIP-based 62 files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC, 63 ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One 64 Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML), 65 Electronic Publication (EPUB), and Sketch design files (SKETCH). The 66 ExifTool family 3 groups may be used to organize ZIP tags by embedded 67 document number (ie. the exiftool C<-g3> option). 68 }, 69 2 => 'ZipRequiredVersion', 70 3 => { 71 Name => 'ZipBitFlag', 72 PrintConv => '$val ? sprintf("0x%.4x",$val) : $val', 73 }, 74 4 => { 75 Name => 'ZipCompression', 76 PrintConv => { 77 0 => 'None', 78 1 => 'Shrunk', 79 2 => 'Reduced with compression factor 1', 80 3 => 'Reduced with compression factor 2', 81 4 => 'Reduced with compression factor 3', 82 5 => 'Reduced with compression factor 4', 83 6 => 'Imploded', 84 7 => 'Tokenized', 85 8 => 'Deflated', 86 9 => 'Enhanced Deflate using Deflate64(tm)', 87 10 => 'Imploded (old IBM TERSE)', 88 12 => 'BZIP2', 89 14 => 'LZMA (EFS)', 90 18 => 'IBM TERSE (new)', 91 19 => 'IBM LZ77 z Architecture (PFS)', 92 96 => 'JPEG recompressed', #2 93 97 => 'WavPack compressed', #2 94 98 => 'PPMd version I, Rev 1', 95 }, 96 }, 97 5 => { 98 Name => 'ZipModifyDate', 99 Format => 'int32u', 100 Groups => { 2 => 'Time' }, 101 ValueConv => sub { 102 my $val = shift; 103 return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d', 104 ($val >> 25) + 1980, # year 105 ($val >> 21) & 0x0f, # month 106 ($val >> 16) & 0x1f, # day 107 ($val >> 11) & 0x1f, # hour 108 ($val >> 5) & 0x3f, # minute 109 ($val & 0x1f) * 2 # second 110 ); 111 }, 112 PrintConv => '$self->ConvertDateTime($val)', 113 }, 114 7 => { Name => 'ZipCRC', Format => 'int32u', PrintConv => 'sprintf("0x%.8x",$val)' }, 115 9 => { Name => 'ZipCompressedSize', Format => 'int32u' }, 116 11 => { Name => 'ZipUncompressedSize', Format => 'int32u' }, 117 13 => { 118 Name => 'ZipFileNameLength', 119 # don't store a tag -- just extract the value for use with ZipFileName 120 Hidden => 1, 121 RawConv => '$$self{ZipFileNameLength} = $val; undef', 122 }, 123 # 14 => 'ZipExtraFieldLength', 124 15 => { 125 Name => 'ZipFileName', 126 Format => 'string[$$self{ZipFileNameLength}]', 127 }, 128); 129 130# GNU ZIP tags (ref 3) 131%Image::ExifTool::ZIP::GZIP = ( 132 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, 133 GROUPS => { 2 => 'Other' }, 134 NOTES => q{ 135 These tags are extracted from GZIP (GNU ZIP) archives, but currently only 136 for the first file in the archive. 137 }, 138 2 => { 139 Name => 'Compression', 140 PrintConv => { 141 8 => 'Deflated', 142 }, 143 }, 144 3 => { 145 Name => 'Flags', 146 PrintConv => { BITMASK => { 147 0 => 'Text', 148 1 => 'CRC16', 149 2 => 'ExtraFields', 150 3 => 'FileName', 151 4 => 'Comment', 152 }}, 153 }, 154 4 => { 155 Name => 'ModifyDate', 156 Format => 'int32u', 157 Groups => { 2 => 'Time' }, 158 ValueConv => 'ConvertUnixTime($val,1)', 159 PrintConv => '$self->ConvertDateTime($val)', 160 }, 161 8 => { 162 Name => 'ExtraFlags', 163 PrintConv => { 164 0 => '(none)', 165 2 => 'Maximum Compression', 166 4 => 'Fastest Algorithm', 167 }, 168 }, 169 9 => { 170 Name => 'OperatingSystem', 171 PrintConv => { 172 0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)', 173 1 => 'Amiga', 174 2 => 'VMS (or OpenVMS)', 175 3 => 'Unix', 176 4 => 'VM/CMS', 177 5 => 'Atari TOS', 178 6 => 'HPFS filesystem (OS/2, NT)', 179 7 => 'Macintosh', 180 8 => 'Z-System', 181 9 => 'CP/M', 182 10 => 'TOPS-20', 183 11 => 'NTFS filesystem (NT)', 184 12 => 'QDOS', 185 13 => 'Acorn RISCOS', 186 255 => 'unknown', 187 }, 188 }, 189 10 => 'ArchivedFileName', 190 11 => 'Comment', 191); 192 193# RAR tags (ref 4) 194%Image::ExifTool::ZIP::RAR = ( 195 PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, 196 GROUPS => { 2 => 'Other' }, 197 NOTES => 'These tags are extracted from RAR archive files.', 198 0 => { 199 Name => 'CompressedSize', 200 Format => 'int32u', 201 }, 202 4 => { 203 Name => 'UncompressedSize', 204 Format => 'int32u', 205 }, 206 8 => { 207 Name => 'OperatingSystem', 208 PrintConv => { 209 0 => 'MS-DOS', 210 1 => 'OS/2', 211 2 => 'Win32', 212 3 => 'Unix', 213 }, 214 }, 215 13 => { 216 Name => 'ModifyDate', 217 Format => 'int32u', 218 Groups => { 2 => 'Time' }, 219 ValueConv => sub { 220 my $val = shift; 221 return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d', 222 ($val >> 25) + 1980, # year 223 ($val >> 21) & 0x0f, # month 224 ($val >> 16) & 0x1f, # day 225 ($val >> 11) & 0x1f, # hour 226 ($val >> 5) & 0x3f, # minute 227 ($val & 0x1f) * 2 # second 228 ); 229 }, 230 PrintConv => '$self->ConvertDateTime($val)', 231 }, 232 18 => { 233 Name => 'PackingMethod', 234 PrintHex => 1, 235 PrintConv => { 236 0x30 => 'Stored', 237 0x31 => 'Fastest', 238 0x32 => 'Fast', 239 0x33 => 'Normal', 240 0x34 => 'Good Compression', 241 0x35 => 'Best Compression', 242 }, 243 }, 244 19 => { 245 Name => 'FileNameLength', 246 Format => 'int16u', 247 Hidden => 1, 248 RawConv => '$$self{FileNameLength} = $val; undef', 249 }, 250 25 => { 251 Name => 'ArchivedFileName', 252 Format => 'string[$$self{FileNameLength}]', 253 }, 254); 255 256#------------------------------------------------------------------------------ 257# Extract information from a RAR file (ref 4) 258# Inputs: 0) ExifTool object reference, 1) dirInfo reference 259# Returns: 1 on success, 0 if this wasn't a valid RAR file 260sub ProcessRAR($$) 261{ 262 my ($et, $dirInfo) = @_; 263 my $raf = $$dirInfo{RAF}; 264 my ($flags, $buff); 265 266 return 0 unless $raf->Read($buff, 7) and $buff eq "Rar!\x1a\x07\0"; 267 268 $et->SetFileType(); 269 SetByteOrder('II'); 270 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR'); 271 my $docNum = 0; 272 273 for (;;) { 274 # read block header 275 $raf->Read($buff, 7) == 7 or last; 276 my ($type, $flags, $size) = unpack('xxCvv', $buff); 277 $size -= 7; 278 if ($flags & 0x8000) { 279 $raf->Read($buff, 4) == 4 or last; 280 $size += unpack('V',$buff) - 4; 281 } 282 last if $size < 0; 283 next unless $size; # ignore blocks with no data 284 # don't try to read very large blocks unless LargeFileSupport is enabled 285 if ($size >= 0x80000000 and not $et->Options('LargeFileSupport')) { 286 $et->Warn('Large block encountered. Aborting.'); 287 last; 288 } 289 # process the block 290 if ($type == 0x74) { # file block 291 # read maximum 4 KB from a file block 292 my $n = $size > 4096 ? 4096 : $size; 293 $raf->Read($buff, $n) == $n or last; 294 # add compressed size to start of data so we can extract it with the other tags 295 $buff = pack('V',$size) . $buff; 296 $$et{DOC_NUM} = ++$docNum; 297 $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr); 298 $size -= $n; 299 } elsif ($type == 0x75 and $size > 6) { # comment block 300 $raf->Read($buff, $size) == $size or last; 301 # save comment, only if "Stored" (this is untested) 302 if (Get8u(\$buff, 3) == 0x30) { 303 $et->FoundTag('Comment', substr($buff, 6)); 304 } 305 next; 306 } 307 # seek to the start of the next block 308 $raf->Seek($size, 1) or last if $size; 309 } 310 $$et{DOC_NUM} = 0; 311 if ($docNum > 1 and not $et->Options('Duplicates')) { 312 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); 313 } 314 315 return 1; 316} 317 318#------------------------------------------------------------------------------ 319# Extract information from a GNU ZIP file (ref 3) 320# Inputs: 0) ExifTool object reference, 1) dirInfo reference 321# Returns: 1 on success, 0 if this wasn't a valid GZIP file 322sub ProcessGZIP($$) 323{ 324 my ($et, $dirInfo) = @_; 325 my $raf = $$dirInfo{RAF}; 326 my ($flags, $buff); 327 328 return 0 unless $raf->Read($buff, 10) and $buff =~ /^\x1f\x8b\x08/; 329 330 $et->SetFileType(); 331 SetByteOrder('II'); 332 333 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::GZIP'); 334 $et->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2)); 335 $et->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3)); 336 $et->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4)); 337 $et->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8)); 338 $et->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9)); 339 340 # extract file name and comment if they exist 341 if ($flags & 0x18) { 342 if ($flags & 0x04) { 343 # skip extra field 344 $raf->Read($buff, 2) == 2 or return 1; 345 my $len = Get16u(\$buff, 0); 346 $raf->Read($buff, $len) == $len or return 1; 347 } 348 $raf->Read($buff, 4096) or return 1; 349 my $pos = 0; 350 my $tagID; 351 # loop for ArchivedFileName (10) and Comment (11) tags 352 foreach $tagID (10, 11) { 353 my $mask = $tagID == 10 ? 0x08 : 0x10; 354 next unless $flags & $mask; 355 my $end = $buff =~ /\0/g ? pos($buff) - 1 : length($buff); 356 # (the doc specifies the string should be ISO 8859-1, 357 # but in OS X it seems to be UTF-8, so don't translate 358 # it because I could just as easily screw it up) 359 my $str = substr($buff, $pos, $end - $pos); 360 $et->HandleTag($tagTablePtr, $tagID, $str); 361 last if $end >= length $buff; 362 $pos = $end + 1; 363 } 364 } 365 return 1; 366} 367 368#------------------------------------------------------------------------------ 369# Call HandleTags for attributes of an Archive::Zip member 370# Inputs: 0) ExifTool object ref, 1) member ref, 2) optional tag table ref 371sub HandleMember($$;$) 372{ 373 my ($et, $member, $tagTablePtr) = @_; 374 $tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main'); 375 $et->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract()); 376 $et->HandleTag($tagTablePtr, 3, $member->bitFlag()); 377 $et->HandleTag($tagTablePtr, 4, $member->compressionMethod()); 378 $et->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime()); 379 $et->HandleTag($tagTablePtr, 7, $member->crc32()); 380 $et->HandleTag($tagTablePtr, 9, $member->compressedSize()); 381 $et->HandleTag($tagTablePtr, 11, $member->uncompressedSize()); 382 $et->HandleTag($tagTablePtr, 15, $member->fileName()); 383} 384 385#------------------------------------------------------------------------------ 386# Extract information from a ZIP file 387# Inputs: 0) ExifTool object reference, 1) dirInfo reference 388# Returns: 1 on success, 0 if this wasn't a valid ZIP file 389sub ProcessZIP($$) 390{ 391 my ($et, $dirInfo) = @_; 392 my $raf = $$dirInfo{RAF}; 393 my ($buff, $buf2, $zip); 394 395 return 0 unless $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/; 396 397 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main'); 398 my $docNum = 0; 399 400 # use Archive::Zip if available 401 for (;;) { 402 unless (eval { require Archive::Zip } and eval { require IO::File }) { 403 if ($$et{FILE_EXT} and $$et{FILE_EXT} ne 'ZIP') { 404 $et->Warn("Install Archive::Zip to decode compressed ZIP information"); 405 } 406 last; 407 } 408 # Archive::Zip requires a seekable IO::File object 409 my $fh; 410 if ($raf->{TESTED} >= 0) { 411 unless (eval { require IO::File }) { 412 # (this shouldn't happen because IO::File is a prerequisite of Archive::Zip) 413 $et->Warn("Install IO::File to decode compressed ZIP information"); 414 last; 415 } 416 $raf->Seek(0,0); 417 $fh = $raf->{FILE_PT}; 418 bless $fh, 'IO::File'; # Archive::Zip expects an IO::File object 419 } elsif (eval { require IO::String }) { 420 # read the whole file into memory (what else can I do?) 421 $raf->Slurp(); 422 $fh = new IO::String ${$raf->{BUFF_PT}}; 423 } else { 424 my $type = $raf->{FILE_PT} ? 'pipe or socket' : 'scalar reference'; 425 $et->Warn("Install IO::String to decode compressed ZIP information from a $type"); 426 last; 427 } 428 $et->VPrint(1, " --- using Archive::Zip ---\n"); 429 $zip = new Archive::Zip; 430 # catch all warnings! (Archive::Zip is bad for this) 431 local $SIG{'__WARN__'} = \&WarnProc; 432 my $status = $zip->readFromFileHandle($fh); 433 if ($status eq '4' and $raf->{TESTED} >= 0 and eval { require IO::String } and 434 $raf->Seek(0,2) and $raf->Tell() < 100000000) 435 { 436 # try again, reading it ourself this time in an attempt to avoid 437 # a failed test with Perl 5.6.2 GNU/Linux 2.6.32-5-686 i686-linux-64int-ld 438 $raf->Seek(0,0); 439 $raf->Slurp(); 440 $fh = new IO::String ${$raf->{BUFF_PT}}; 441 $zip = new Archive::Zip; 442 $status = $zip->readFromFileHandle($fh); 443 } 444 if ($status) { 445 undef $zip; 446 my %err = ( 1=>'Stream end error', 3=>'Format error', 4=>'IO error' ); 447 my $err = $err{$status} || "Error $status"; 448 $et->Warn("$err reading ZIP file"); 449 last; 450 } 451 $$dirInfo{ZIP} = $zip; 452 453 # check for an Office Open file (DOCX, etc) 454 # --> read '[Content_Types].xml' to determine the file type 455 my ($mime, @members); 456 my $cType = $zip->memberNamed('[Content_Types].xml'); 457 if ($cType) { 458 ($buff, $status) = $zip->contents($cType); 459 if (not $status and ( 460 # first look for the main document with the expected name 461 $buff =~ m{\sPartName\s*=\s*['"](?:/ppt/presentation.xml|/word/document.xml|/xl/workbook.xml)['"][^>]*\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1} or 462 # then look for the main part 463 $buff =~ /<Override[^>]*\sPartName[^<]+\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/ or 464 # and if all else fails, use the default main 465 $buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/)) 466 { 467 $mime = $2; 468 } 469 } 470 # check for docProps if we couldn't find a MIME type 471 $mime or @members = $zip->membersMatching('^docProps/.*\.(xml|XML)$'); 472 if ($mime or @members) { 473 $$dirInfo{MIME} = $mime; 474 require Image::ExifTool::OOXML; 475 Image::ExifTool::OOXML::ProcessDOCX($et, $dirInfo); 476 delete $$dirInfo{MIME}; 477 last; 478 } 479 480 # check for an EIP file 481 @members = $zip->membersMatching('^CaptureOne/.*\.(cos|COS)$'); 482 if (@members) { 483 require Image::ExifTool::CaptureOne; 484 Image::ExifTool::CaptureOne::ProcessEIP($et, $dirInfo); 485 last; 486 } 487 488 # check for an iWork file 489 @members = $zip->membersMatching('(?i)^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+\.(pages|numbers|key)/Index.(zip|xml|apxl))$'); 490 if (@members) { 491 require Image::ExifTool::iWork; 492 Image::ExifTool::iWork::Process_iWork($et, $dirInfo); 493 last; 494 } 495 496 # check for an Open Document, IDML or EPUB file 497 my $mType = $zip->memberNamed('mimetype'); 498 if ($mType) { 499 ($mime, $status) = $zip->contents($mType); 500 if (not $status and $mime =~ /([\x21-\xfe]+)/s) { 501 # clean up MIME type just in case (note that MIME is case insensitive) 502 $mime = lc $1; 503 $et->SetFileType($openDocType{$mime} || 'ZIP', $mime); 504 $et->Warn("Unrecognized MIMEType $mime") unless $openDocType{$mime}; 505 # extract Open Document metadata from "meta.xml" 506 my $meta = $zip->memberNamed('meta.xml'); 507 # IDML files have metadata in a different place (ref 6) 508 $meta or $meta = $zip->memberNamed('META-INF/metadata.xml'); 509 if ($meta) { 510 ($buff, $status) = $zip->contents($meta); 511 unless ($status) { 512 my %dirInfo = ( 513 DirName => 'XML', 514 DataPt => \$buff, 515 DirLen => length $buff, 516 DataLen => length $buff, 517 ); 518 # (avoid structure warnings when copying from XML) 519 my $oldWarn = $$et{NO_STRUCT_WARN}; 520 $$et{NO_STRUCT_WARN} = 1; 521 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::Main')); 522 $$et{NO_STRUCT_WARN} = $oldWarn; 523 } 524 } 525 # process rootfile of EPUB container if applicable 526 for (;;) { 527 last if $meta and $mime ne 'application/epub+zip'; 528 my $container = $zip->memberNamed('META-INF/container.xml'); 529 ($buff, $status) = $zip->contents($container); 530 last if $status; 531 $buff =~ /<rootfile\s+[^>]*?\bfull-path=(['"])(.*?)\1/s or last; 532 # load the rootfile data (OPF extension; contains XML metadata) 533 my $meta2 = $zip->memberNamed($2) or last; 534 $meta = $meta2; 535 ($buff, $status) = $zip->contents($meta); 536 last if $status; 537 # use opf:event to generate more meaningful tag names for dc:date 538 while ($buff =~ s{<dc:date opf:event="(\w+)">([^<]+)</dc:date>}{<dc:${1}Date>$2</dc:${1}Date>}s) { 539 my $dcTable = GetTagTable('Image::ExifTool::XMP::dc'); 540 my $tag = "${1}Date"; 541 AddTagToTable($dcTable, $tag, { 542 Name => ucfirst $tag, 543 Groups => { 2 => 'Time' }, 544 List => 'Seq', 545 %Image::ExifTool::XMP::dateTimeInfo 546 }) unless $$dcTable{$tag}; 547 } 548 my %dirInfo = ( 549 DataPt => \$buff, 550 DirLen => length $buff, 551 DataLen => length $buff, 552 IgnoreProp => { 'package' => 1, metadata => 1 }, 553 ); 554 # (avoid structure warnings when copying from XML) 555 my $oldWarn = $$et{NO_STRUCT_WARN}; 556 $$et{NO_STRUCT_WARN} = 1; 557 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::XML')); 558 $$et{NO_STRUCT_WARN} = $oldWarn; 559 last; 560 } 561 if ($openDocType{$mime} or $meta) { 562 # extract preview image(s) from "Thumbnails" directory if they exist 563 my $type; 564 my %tag = ( jpg => 'PreviewImage', png => 'PreviewPNG' ); 565 foreach $type ('jpg', 'png') { 566 my $thumb = $zip->memberNamed("Thumbnails/thumbnail.$type"); 567 next unless $thumb; 568 ($buff, $status) = $zip->contents($thumb); 569 $et->FoundTag($tag{$type}, $buff) unless $status; 570 } 571 last; # all done since we recognized the MIME type or found metadata 572 } 573 # continue on to list ZIP contents... 574 } 575 } 576 577 # otherwise just extract general ZIP information 578 $et->SetFileType(); 579 @members = $zip->members(); 580 my ($member, $iWorkType); 581 # special files to extract 582 my %extract = ( 583 'meta.json' => 1, 584 'previews/preview.png' => 'PreviewPNG', 585 'preview.jpg' => 'PreviewImage', # (iWork 2013 files) 586 'preview-web.jpg' => 'OtherImage', # (iWork 2013 files) 587 'preview-micro.jpg' => 'ThumbnailImage', # (iWork 2013 files) 588 'QuickLook/Thumbnail.jpg' => 'ThumbnailImage', # (iWork 2009 files) 589 'QuickLook/Preview.pdf' => 'PreviewPDF', # (iWork 2009 files) 590 ); 591 foreach $member (@members) { 592 $$et{DOC_NUM} = ++$docNum; 593 HandleMember($et, $member, $tagTablePtr); 594 my $file = $member->fileName(); 595 # extract things from Sketch files 596 if ($extract{$file}) { 597 ($buff, $status) = $zip->contents($member); 598 $status and $et->Warn("Error extracting $file"), next; 599 if ($file eq 'meta.json') { 600 $et->ExtractInfo(\$buff, { ReEntry => 1 }); 601 if ($$et{VALUE}{App} and $$et{VALUE}{App} =~ /sketch/i) { 602 $et->OverrideFileType('SKETCH'); 603 } 604 } else { 605 $et->FoundTag($extract{$file} => $buff); 606 } 607 } elsif ($file eq 'Index/Document.iwa' and not $iWorkType) { 608 my $type = $iWorkType{$$et{FILE_EXT} || ''}; 609 $iWorkType = $type || 'PAGES'; 610 } elsif ($iWorkFile{$file}) { 611 $iWorkType = $iWorkFile{$file}; 612 } 613 } 614 $et->OverrideFileType($iWorkType) if $iWorkType; 615 last; 616 } 617 # all done if we processed this using Archive::Zip 618 if ($zip) { 619 delete $$dirInfo{ZIP}; 620 delete $$et{DOC_NUM}; 621 if ($docNum > 1 and not $et->Options('Duplicates')) { 622 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); 623 } 624 return 1; 625 } 626# 627# process the ZIP file by hand (funny, but this seems easier than using Archive::Zip) 628# 629 $et->VPrint(1, " -- processing as binary data --\n"); 630 $raf->Seek(30, 0); 631 $et->SetFileType(); 632 SetByteOrder('II'); 633 634 # A. Local file header: 635 # local file header signature 0) 4 bytes (0x04034b50) 636 # version needed to extract 4) 2 bytes 637 # general purpose bit flag 6) 2 bytes 638 # compression method 8) 2 bytes 639 # last mod file time 10) 2 bytes 640 # last mod file date 12) 2 bytes 641 # crc-32 14) 4 bytes 642 # compressed size 18) 4 bytes 643 # uncompressed size 22) 4 bytes 644 # file name length 26) 2 bytes 645 # extra field length 28) 2 bytes 646 for (;;) { 647 my $len = Get16u(\$buff, 26) + Get16u(\$buff, 28); 648 $raf->Read($buf2, $len) == $len or last; 649 650 $$et{DOC_NUM} = ++$docNum; 651 $buff .= $buf2; 652 my %dirInfo = ( 653 DataPt => \$buff, 654 DataPos => $raf->Tell() - 30 - $len, 655 DataLen => 30 + $len, 656 DirStart => 0, 657 DirLen => 30 + $len, 658 ); 659 $et->ProcessDirectory(\%dirInfo, $tagTablePtr); 660 my $flags = Get16u(\$buff, 6); 661 if ($flags & 0x08) { 662 # we don't yet support skipping stream mode data 663 # (when this happens, the CRC, compressed size and uncompressed 664 # sizes are set to 0 in the header. Instead, they are stored 665 # after the compressed data with an optional header of 0x08074b50) 666 $et->Warn('Stream mode data encountered, file list may be incomplete'); 667 last; 668 } 669 $len = Get32u(\$buff, 18); # file data length 670 $raf->Seek($len, 1) or last; # skip file data 671 $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/ or last; 672 } 673 delete $$et{DOC_NUM}; 674 if ($docNum > 1 and not $et->Options('Duplicates')) { 675 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); 676 } 677 return 1; 678} 679 6801; # end 681 682__END__ 683 684=head1 NAME 685 686Image::ExifTool::ZIP - Read ZIP archive meta information 687 688=head1 SYNOPSIS 689 690This module is used by Image::ExifTool 691 692=head1 DESCRIPTION 693 694This module contains definitions required by Image::ExifTool to extract meta 695information from ZIP, GZIP and RAR archives. This includes ZIP-based file 696types like Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC, 697ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One 698Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML), 699Electronic Publication (EPUB), and Sketch design files (SKETCH). 700 701=head1 AUTHOR 702 703Copyright 2003-2021, Phil Harvey (philharvey66 at gmail.com) 704 705This library is free software; you can redistribute it and/or modify it 706under the same terms as Perl itself. 707 708=head1 REFERENCES 709 710=over 4 711 712=item L<http://www.pkware.com/documents/casestudies/APPNOTE.TXT> 713 714=item L<http://www.gzip.org/zlib/rfc-gzip.html> 715 716=item L<http://DataCompression.info/ArchiveFormats/RAR202.txt> 717 718=back 719 720=head1 SEE ALSO 721 722L<Image::ExifTool::TagNames/ZIP Tags>, 723L<Image::ExifTool::TagNames/OOXML Tags>, 724L<Image::ExifTool(3pm)|Image::ExifTool> 725 726=cut 727 728