1# IPTCInfo: extractor for IPTC metadata embedded in images 2# Copyright (C) 2000-2004 Josh Carter <josh@multipart-mixed.com> 3# All rights reserved. 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the same terms as Perl itself. 7 8package Image::IPTCInfo; 9use IO::File; 10 11use vars qw($VERSION); 12$VERSION = '1.95'; 13 14# 15# Global vars 16# 17use vars ('%datasets', # master list of dataset id's 18 '%datanames', # reverse mapping (for saving) 19 '%listdatasets', # master list of repeating dataset id's 20 '%listdatanames', # reverse 21 '$MAX_FILE_OFFSET', # maximum offset for blind scan 22 ); 23 24$MAX_FILE_OFFSET = 8192; # default blind scan depth 25 26# Debug off for production use 27my $debugMode = 0; 28my $error; 29 30##################################### 31# These names match the codes defined in ITPC's IIM record 2. 32# This hash is for non-repeating data items; repeating ones 33# are in %listdatasets below. 34%datasets = ( 35# 0 => 'record version', # skip -- binary data 36 5 => 'object name', 37 7 => 'edit status', 38 8 => 'editorial update', 39 10 => 'urgency', 40 12 => 'subject reference', 41 15 => 'category', 42# 20 => 'supplemental category', # in listdatasets (see below) 43 22 => 'fixture identifier', 44# 25 => 'keywords', # in listdatasets 45 26 => 'content location code', 46 27 => 'content location name', 47 30 => 'release date', 48 35 => 'release time', 49 37 => 'expiration date', 50 38 => 'expiration time', 51 40 => 'special instructions', 52 42 => 'action advised', 53 45 => 'reference service', 54 47 => 'reference date', 55 50 => 'reference number', 56 55 => 'date created', 57 60 => 'time created', 58 62 => 'digital creation date', 59 63 => 'digital creation time', 60 65 => 'originating program', 61 70 => 'program version', 62 75 => 'object cycle', 63 80 => 'by-line', 64 85 => 'by-line title', 65 90 => 'city', 66 92 => 'sub-location', 67 95 => 'province/state', 68 100 => 'country/primary location code', 69 101 => 'country/primary location name', 70 103 => 'original transmission reference', 71 105 => 'headline', 72 110 => 'credit', 73 115 => 'source', 74 116 => 'copyright notice', 75# 118 => 'contact', # in listdatasets 76 120 => 'caption/abstract', 77 121 => 'local caption', 78 122 => 'writer/editor', 79# 125 => 'rasterized caption', # unsupported (binary data) 80 130 => 'image type', 81 131 => 'image orientation', 82 135 => 'language identifier', 83 200 => 'custom1', # These are NOT STANDARD, but are used by 84 201 => 'custom2', # Fotostation. Use at your own risk. They're 85 202 => 'custom3', # here in case you need to store some special 86 203 => 'custom4', # stuff, but note that other programs won't 87 204 => 'custom5', # recognize them and may blow them away if 88 205 => 'custom6', # you open and re-save the file. (Except with 89 206 => 'custom7', # Fotostation, of course.) 90 207 => 'custom8', 91 208 => 'custom9', 92 209 => 'custom10', 93 210 => 'custom11', 94 211 => 'custom12', 95 212 => 'custom13', 96 213 => 'custom14', 97 214 => 'custom15', 98 215 => 'custom16', 99 216 => 'custom17', 100 217 => 'custom18', 101 218 => 'custom19', 102 219 => 'custom20', 103 ); 104 105# this will get filled in if we save data back to file 106%datanames = (); 107 108%listdatasets = ( 109 20 => 'supplemental category', 110 25 => 'keywords', 111 118 => 'contact', 112 ); 113 114# this will get filled in if we save data back to file 115%listdatanames = (); 116 117####################################################################### 118# New, Save, Destroy, Error 119####################################################################### 120 121# 122# new 123# 124# $info = new IPTCInfo('image filename goes here') 125# 126# Returns IPTCInfo object filled with metadata from the given image 127# file. File on disk will be closed, and changes made to the IPTCInfo 128# object will *not* be flushed back to disk. 129# 130sub new 131{ 132 my ($pkg, $file, $force) = @_; 133 134 my $input_is_handle = eval {$file->isa('IO::Handle')}; 135 if ($input_is_handle and not $file->isa('IO::Seekable')) { 136 $error = "Handle must be seekable."; Log($error); 137 return undef; 138 } 139 140 # 141 # Open file and snarf data from it. 142 # 143 my $handle = $input_is_handle ? $file : IO::File->new($file); 144 unless($handle) 145 { 146 $error = "Can't open file: $!"; Log($error); 147 return undef; 148 } 149 150 binmode($handle); 151 152 my $datafound = ScanToFirstIMMTag($handle); 153 unless ($datafound || defined($force)) 154 { 155 $error = "No IPTC data found."; Log($error); 156 # don't close unless we opened it 157 $handle->close() unless $input_is_handle; 158 return undef; 159 } 160 161 my $self = bless 162 { 163 '_data' => {}, # empty hashes; wil be 164 '_listdata' => {}, # filled in CollectIIMInfo 165 '_handle' => $handle, 166 }, $pkg; 167 168 $self->{_filename} = $file unless $input_is_handle; 169 170 # Do the real snarfing here 171 $self->CollectIIMInfo() if $datafound; 172 173 $handle->close() unless $input_is_handle; 174 175 return $self; 176} 177 178# 179# create 180# 181# Like new, but forces an object to always be returned. This allows 182# you to start adding stuff to files that don't have IPTC info and then 183# save it. 184# 185sub create 186{ 187 my ($pkg, $filename) = @_; 188 189 return new($pkg, $filename, 'force'); 190} 191 192# 193# Save 194# 195# Saves JPEG with IPTC data back to the same file it came from. 196# 197sub Save 198{ 199 my ($self, $options) = @_; 200 201 return $self->SaveAs($self->{'_filename'}, $options); 202} 203 204# 205# Save 206# 207# Saves JPEG with IPTC data to a given file name. 208# 209sub SaveAs 210{ 211 my ($self, $newfile, $options) = @_; 212 213 # 214 # Open file and snarf data from it. 215 # 216 my $handle = $self->{_filename} ? IO::File->new($self->{_filename}) : $self->{_handle}; 217 unless($handle) 218 { 219 $error = "Can't open file: $!"; Log($error); 220 return undef; 221 } 222 223 $handle->seek(0, 0); 224 binmode($handle); 225 226 unless (FileIsJPEG($handle)) 227 { 228 $error = "Source file is not a JPEG; I can only save JPEGs. Sorry."; 229 Log($error); 230 return undef; 231 } 232 233 my $ret = JPEGCollectFileParts($handle, $options); 234 235 if ($ret == 0) 236 { 237 Log("collectfileparts failed"); 238 return undef; 239 } 240 241 if ($self->{_filename}) { 242 $handle->close(); 243 unless ($handle = IO::File->new($newfile, ">")) { 244 $error = "Can't open output file: $!"; Log($error); 245 return undef; 246 } 247 binmode($handle); 248 } else { 249 unless ($handle->truncate(0)) { 250 $error = "Can't truncate, handle might be read-only"; Log($error); 251 return undef; 252 } 253 } 254 255 my ($start, $end, $adobe) = @$ret; 256 257 if (defined($options) && defined($options->{'discardAdobeParts'})) 258 { 259 undef $adobe; 260 } 261 262 263 $handle->print($start); 264 $handle->print($self->PhotoshopIIMBlock($adobe, $self->PackedIIMData())); 265 $handle->print($end); 266 267 $handle->close() if $self->{_filename}; 268 269 return 1; 270} 271 272# 273# DESTROY 274# 275# Called when object is destroyed. No action necessary in this case. 276# 277sub DESTROY 278{ 279 # no action necessary 280} 281 282# 283# Error 284# 285# Returns description of the last error. 286# 287sub Error 288{ 289 return $error; 290} 291 292####################################################################### 293# Attributes for clients 294####################################################################### 295 296# 297# Attribute/SetAttribute 298# 299# Returns/Changes value of a given data item. 300# 301sub Attribute 302{ 303 my ($self, $attribute) = @_; 304 305 return $self->{_data}->{$attribute}; 306} 307 308sub SetAttribute 309{ 310 my ($self, $attribute, $newval) = @_; 311 312 $self->{_data}->{$attribute} = $newval; 313} 314 315sub ClearAttributes 316{ 317 my $self = shift; 318 319 $self->{_data} = {}; 320} 321 322sub ClearAllData 323{ 324 my $self = shift; 325 326 $self->{_data} = {}; 327 $self->{_listdata} = {}; 328} 329 330# 331# Keywords/Clear/Add 332# 333# Returns reference to a list of keywords/clears the keywords 334# list/adds a keyword. 335# 336sub Keywords 337{ 338 my $self = shift; 339 return $self->{_listdata}->{'keywords'}; 340} 341 342sub ClearKeywords 343{ 344 my $self = shift; 345 $self->{_listdata}->{'keywords'} = undef; 346} 347 348sub AddKeyword 349{ 350 my ($self, $add) = @_; 351 352 $self->AddListData('keywords', $add); 353} 354 355# 356# SupplementalCategories/Clear/Add 357# 358# Returns reference to a list of supplemental categories. 359# 360sub SupplementalCategories 361{ 362 my $self = shift; 363 return $self->{_listdata}->{'supplemental category'}; 364} 365 366sub ClearSupplementalCategories 367{ 368 my $self = shift; 369 $self->{_listdata}->{'supplemental category'} = undef; 370} 371 372sub AddSupplementalCategories 373{ 374 my ($self, $add) = @_; 375 376 $self->AddListData('supplemental category', $add); 377} 378 379# 380# Contacts/Clear/Add 381# 382# Returns reference to a list of contactss/clears the contacts 383# list/adds a contact. 384# 385sub Contacts 386{ 387 my $self = shift; 388 return $self->{_listdata}->{'contact'}; 389} 390 391sub ClearContacts 392{ 393 my $self = shift; 394 $self->{_listdata}->{'contact'} = undef; 395} 396 397sub AddContact 398{ 399 my ($self, $add) = @_; 400 401 $self->AddListData('contact', $add); 402} 403 404sub AddListData 405{ 406 my ($self, $list, $add) = @_; 407 408 # did user pass in a list ref? 409 if (ref($add) eq 'ARRAY') 410 { 411 # yes, add list contents 412 push(@{$self->{_listdata}->{$list}}, @$add); 413 } 414 else 415 { 416 # no, just a literal item 417 push(@{$self->{_listdata}->{$list}}, $add); 418 } 419} 420 421####################################################################### 422# XML, SQL export 423####################################################################### 424 425# 426# ExportXML 427# 428# $xml = $info->ExportXML('entity-name', \%extra-data, 429# 'optional output file name'); 430# 431# Exports XML containing all image metadata. Attribute names are 432# translated into XML tags, making adjustments to spaces and slashes 433# for compatibility. (Spaces become underbars, slashes become dashes.) 434# Caller provides an entity name; all data will be contained within 435# this entity. Caller optionally provides a reference to a hash of 436# extra data. This will be output into the XML, too. Keys must be 437# valid XML tag names. Optionally provide a filename, and the XML 438# will be dumped into there. 439# 440sub ExportXML 441{ 442 my ($self, $basetag, $extraRef, $filename) = @_; 443 my $out; 444 445 $basetag = 'photo' unless length($basetag); 446 447 $out .= "<$basetag>\n"; 448 449 # dump extra info first, if any 450 foreach my $key (keys %$extraRef) 451 { 452 $out .= "\t<$key>" . $extraRef->{$key} . "</$key>\n"; 453 } 454 455 # dump our stuff 456 foreach my $key (keys %{$self->{_data}}) 457 { 458 my $cleankey = $key; 459 $cleankey =~ s/ /_/g; 460 $cleankey =~ s/\//-/g; 461 462 $out .= "\t<$cleankey>" . $self->{_data}->{$key} . "</$cleankey>\n"; 463 } 464 465 if (defined ($self->Keywords())) 466 { 467 # print keywords 468 $out .= "\t<keywords>\n"; 469 470 foreach my $keyword (@{$self->Keywords()}) 471 { 472 $out .= "\t\t<keyword>$keyword</keyword>\n"; 473 } 474 475 $out .= "\t</keywords>\n"; 476 } 477 478 if (defined ($self->SupplementalCategories())) 479 { 480 # print supplemental categories 481 $out .= "\t<supplemental_categories>\n"; 482 483 foreach my $category (@{$self->SupplementalCategories()}) 484 { 485 $out .= "\t\t<supplemental_category>$category</supplemental_category>\n"; 486 } 487 488 $out .= "\t</supplemental_categories>\n"; 489 } 490 491 if (defined ($self->Contacts())) 492 { 493 # print contacts 494 $out .= "\t<contacts>\n"; 495 496 foreach my $contact (@{$self->Contacts()}) 497 { 498 $out .= "\t\t<contact>$contact</contact>\n"; 499 } 500 501 $out .= "\t</contacts>\n"; 502 } 503 504 # close base tag 505 $out .= "</$basetag>\n"; 506 507 # export to file if caller asked for it. 508 if (length($filename)) 509 { 510 open(XMLOUT, ">$filename"); 511 print XMLOUT $out; 512 close(XMLOUT); 513 } 514 515 return $out; 516} 517 518# 519# ExportSQL 520# 521# my %mappings = ( 522# 'IPTC dataset name here' => 'your table column name here', 523# 'caption/abstract' => 'caption', 524# 'city' => 'city', 525# 'province/state' => 'state); # etc etc etc. 526# 527# $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data); 528# 529# Returns a SQL statement to insert into your given table name 530# a set of values from the image. Caller passes in a reference to 531# a hash which maps IPTC dataset names into column names for the 532# database table. Optionally pass in a ref to a hash of extra data 533# which will also be included in the insert statement. Keys in that 534# hash must be valid column names. 535# 536sub ExportSQL 537{ 538 my ($self, $tablename, $mappingsRef, $extraRef) = @_; 539 my ($statement, $columns, $values); 540 541 return undef if (($tablename eq undef) || ($mappingsRef eq undef)); 542 543 # start with extra data, if any 544 foreach my $column (keys %$extraRef) 545 { 546 my $value = $extraRef->{$column}; 547 $value =~ s/'/''/g; # escape single quotes 548 549 $columns .= $column . ", "; 550 $values .= "\'$value\', "; 551 } 552 553 # process our data 554 foreach my $attribute (keys %$mappingsRef) 555 { 556 my $value = $self->Attribute($attribute); 557 $value =~ s/'/''/g; # escape single quotes 558 559 $columns .= $mappingsRef->{$attribute} . ", "; 560 $values .= "\'$value\', "; 561 } 562 563 # must trim the trailing ", " from both 564 $columns =~ s/, $//; 565 $values =~ s/, $//; 566 567 $statement = "INSERT INTO $tablename ($columns) VALUES ($values)"; 568 569 return $statement; 570} 571 572####################################################################### 573# File parsing functions (private) 574####################################################################### 575 576# 577# ScanToFirstIMMTag 578# 579# Scans to first IIM Record 2 tag in the file. The will either use 580# smart scanning for JPEGs or blind scanning for other file types. 581# 582sub ScanToFirstIMMTag 583{ 584 my $handle = shift @_; 585 586 if (FileIsJPEG($handle)) 587 { 588 Log("File is JPEG, proceeding with JPEGScan"); 589 return JPEGScan($handle); 590 } 591 else 592 { 593 Log("File not a JPEG, trying BlindScan"); 594 return BlindScan($handle); 595 } 596} 597 598# 599# FileIsJPEG 600# 601# Checks to see if this file is a JPEG/JFIF or not. Will reset the 602# file position back to 0 after it's done in either case. 603# 604sub FileIsJPEG 605{ 606 my $handle = shift @_; 607 608 # reset to beginning just in case 609 $handle->seek(0, 0); 610 611 if ($debugMode) 612 { 613 Log("Opening 16 bytes of file:\n"); 614 my $dump; 615 $handle->read($dump, 16); 616 HexDump($dump); 617 $handle->seek(0, 0); 618 } 619 620 # check start of file marker 621 my ($ff, $soi); 622 $handle->read($ff, 1) || goto notjpeg; 623 $handle->read($soi, 1); 624 625 goto notjpeg unless (ord($ff) == 0xff && ord($soi) == 0xd8); 626 627 # now check for APP0 marker. I'll assume that anything with a SOI 628 # followed by APP0 is "close enough" for our purposes. (We're not 629 # dinking with image data, so anything following the JPEG tagging 630 # system should work.) 631 my ($app0, $len, $jpeg); 632 $handle->read($ff, 1); 633 $handle->read($app0, 1); 634 635 goto notjpeg unless (ord($ff) == 0xff); 636 637 # reset to beginning of file 638 $handle->seek(0, 0); 639 return 1; 640 641 notjpeg: 642 $handle->seek(0, 0); 643 return 0; 644} 645 646# 647# JPEGScan 648# 649# Assuming the file is a JPEG (see above), this will scan through the 650# markers looking for the APP13 marker, where IPTC/IIM data should be 651# found. While this isn't a formally defined standard, all programs 652# have (supposedly) adopted Adobe's technique of putting the data in 653# APP13. 654# 655sub JPEGScan 656{ 657 my $handle = shift @_; 658 659 # Skip past start of file marker 660 my ($ff, $soi); 661 $handle->read($ff, 1) || return 0; 662 $handle->read($soi, 1); 663 664 unless (ord($ff) == 0xff && ord($soi) == 0xd8) 665 { 666 $error = "JPEGScan: invalid start of file"; Log($error); 667 return 0; 668 } 669 670 # Scan for the APP13 marker which will contain our IPTC info (I hope). 671 672 my $marker = JPEGNextMarker($handle); 673 674 while (ord($marker) != 0xed) 675 { 676 if (ord($marker) == 0) 677 { $error = "Marker scan failed"; Log($error); return 0; } 678 679 if (ord($marker) == 0xd9) 680 { $error = "Marker scan hit end of image marker"; 681 Log($error); return 0; } 682 683 if (ord($marker) == 0xda) 684 { $error = "Marker scan hit start of image data"; 685 Log($error); return 0; } 686 687 if (JPEGSkipVariable($handle) == 0) 688 { $error = "JPEGSkipVariable failed"; 689 Log($error); return 0; } 690 691 $marker = JPEGNextMarker($handle); 692 } 693 694 # If were's here, we must have found the right marker. Now 695 # BlindScan through the data. 696 return BlindScan($handle, JPEGGetVariableLength($handle)); 697} 698 699# 700# JPEGNextMarker 701# 702# Scans to the start of the next valid-looking marker. Return value is 703# the marker id. 704# 705sub JPEGNextMarker 706{ 707 my $handle = shift @_; 708 709 my $byte; 710 711 # Find 0xff byte. We should already be on it. 712 $handle->read($byte, 1) || return 0; 713 while (ord($byte) != 0xff) 714 { 715 Log("JPEGNextMarker: warning: bogus stuff in JPEG file"); 716 $handle->read($byte, 1) || return 0; 717 } 718 719 # Now skip any extra 0xffs, which are valid padding. 720 do 721 { 722 $handle->read($byte, 1) || return 0; 723 } while (ord($byte) == 0xff); 724 725 # $byte should now contain the marker id. 726 Log("JPEGNextMarker: at marker " . unpack("H*", $byte)); 727 return $byte; 728} 729 730# 731# JPEGGetVariableLength 732# 733# Gets length of current variable-length section. File position at 734# start must be on the marker itself, e.g. immediately after call to 735# JPEGNextMarker. File position is updated to just past the length 736# field. 737# 738sub JPEGGetVariableLength 739{ 740 my $handle = shift @_; 741 742 # Get the marker parameter length count 743 my $length; 744 $handle->read($length, 2) || return 0; 745 746 ($length) = unpack("n", $length); 747 748 Log("JPEG variable length: $length"); 749 750 # Length includes itself, so must be at least 2 751 if ($length < 2) 752 { 753 Log("JPEGGetVariableLength: erroneous JPEG marker length"); 754 return 0; 755 } 756 $length -= 2; 757 758 return $length; 759} 760 761# 762# JPEGSkipVariable 763# 764# Skips variable-length section of JPEG block. Should always be called 765# between calls to JPEGNextMarker to ensure JPEGNextMarker is at the 766# start of data it can properly parse. 767# 768sub JPEGSkipVariable 769{ 770 my $handle = shift; 771 my $rSave = shift; 772 773 my $length = JPEGGetVariableLength($handle); 774 return if ($length == 0); 775 776 # Skip remaining bytes 777 my $temp; 778 if (defined($rSave) || $debugMode) 779 { 780 unless ($handle->read($temp, $length)) 781 { 782 Log("JPEGSkipVariable: read failed while skipping var data"); 783 return 0; 784 } 785 786 # prints out a heck of a lot of stuff 787 # HexDump($temp); 788 } 789 else 790 { 791 # Just seek 792 unless($handle->seek($length, 1)) 793 { 794 Log("JPEGSkipVariable: read failed while skipping var data"); 795 return 0; 796 } 797 } 798 799 $$rSave = $temp if defined($rSave); 800 801 return 1; 802} 803 804# 805# BlindScan 806# 807# Scans blindly to first IIM Record 2 tag in the file. This method may 808# or may not work on any arbitrary file type, but it doesn't hurt to 809# check. We expect to see this tag within the first 8k of data. (This 810# limit may need to be changed or eliminated depending on how other 811# programs choose to store IIM.) 812# 813sub BlindScan 814{ 815 my $handle = shift; 816 my $maxoff = shift() || $MAX_FILE_OFFSET; 817 818 Log("BlindScan: starting scan, max length $maxoff"); 819 820 # start digging 821 my $offset = 0; 822 while ($offset <= $maxoff) 823 { 824 my $temp; 825 826 unless ($handle->read($temp, 1)) 827 { 828 Log("BlindScan: hit EOF while scanning"); 829 return 0; 830 } 831 832 # look for tag identifier 0x1c 833 if (ord($temp) == 0x1c) 834 { 835 # if we found that, look for record 2, dataset 0 836 # (record version number) 837 my ($record, $dataset); 838 $handle->read($record, 1); 839 $handle->read($dataset, 1); 840 841 if (ord($record) == 2) 842 { 843 # found it. seek to start of this tag and return. 844 Log("BlindScan: found IIM start at offset $offset"); 845 $handle->seek(-3, 1); # seek rel to current position 846 return $offset; 847 } 848 else 849 { 850 # didn't find it. back up 2 to make up for 851 # those reads above. 852 $handle->seek(-2, 1); # seek rel to current position 853 } 854 } 855 856 # no tag, keep scanning 857 $offset++; 858 } 859 860 return 0; 861} 862 863# 864# CollectIIMInfo 865# 866# Assuming file is seeked to start of IIM data (using above), this 867# reads all the data into our object's hashes 868# 869sub CollectIIMInfo 870{ 871 my $self = shift; 872 873 my $handle = $self->{_handle}; 874 875 # NOTE: file should already be at the start of the first 876 # IPTC code: record 2, dataset 0. 877 878 while (1) 879 { 880 my $header; 881 return unless $handle->read($header, 5); 882 883 ($tag, $record, $dataset, $length) = unpack("CCCn", $header); 884 885 # bail if we're past end of IIM record 2 data 886 return unless ($tag == 0x1c) && ($record == 2); 887 888 # print "tag : " . $tag . "\n"; 889 # print "record : " . $record . "\n"; 890 # print "dataset : " . $dataset . "\n"; 891 # print "length : " . $length . "\n"; 892 893 my $value; 894 $handle->read($value, $length); 895 896 # try to extract first into _listdata (keywords, categories) 897 # and, if unsuccessful, into _data. Tags which are not in the 898 # current IIM spec (version 4) are currently discarded. 899 if (exists $listdatasets{$dataset}) 900 { 901 my $dataname = $listdatasets{$dataset}; 902 my $listref = $listdata{$dataname}; 903 904 push(@{$self->{_listdata}->{$dataname}}, $value); 905 } 906 elsif (exists $datasets{$dataset}) 907 { 908 my $dataname = $datasets{$dataset}; 909 910 $self->{_data}->{$dataname} = $value; 911 } 912 # else discard 913 } 914} 915 916####################################################################### 917# File Saving 918####################################################################### 919 920# 921# JPEGCollectFileParts 922# 923# Collects all pieces of the file except for the IPTC info that we'll 924# replace when saving. Returns the stuff before the info, stuff after, 925# and the contents of the Adobe Resource Block that the IPTC data goes 926# in. Returns undef if a file parsing error occured. 927# 928sub JPEGCollectFileParts 929{ 930 my $handle = shift; 931 my ($options) = @_; 932 my ($start, $end, $adobeParts); 933 my $discardAppParts = 0; 934 935 if (defined($options) && defined($options->{'discardAppParts'})) 936 { $discardAppParts = 1; } 937 938 # Start at beginning of file 939 $handle->seek(0, 0); 940 941 # Skip past start of file marker 942 my ($ff, $soi); 943 $handle->read($ff, 1) || return 0; 944 $handle->read($soi, 1); 945 946 unless (ord($ff) == 0xff && ord($soi) == 0xd8) 947 { 948 $error = "JPEGScan: invalid start of file"; Log($error); 949 return 0; 950 } 951 952 # 953 # Begin building start of file 954 # 955 $start .= pack("CC", 0xff, 0xd8); 956 957 # Get first marker in file. This will be APP0 for JFIF or APP1 for 958 # EXIF. 959 my $marker = JPEGNextMarker($handle); 960 961 my $app0data; 962 if (JPEGSkipVariable($handle, \$app0data) == 0) 963 { $error = "JPEGSkipVariable failed"; 964 Log($error); return 0; } 965 966 if (ord($marker) == 0xe0 || !$discardAppParts) 967 { 968 # Always include APP0 marker at start if it's present. 969 $start .= pack("CC", 0xff, ord($marker)); 970 # Remember that the length must include itself (2 bytes) 971 $start .= pack("n", length($app0data) + 2); 972 $start .= $app0data; 973 } 974 else 975 { 976 # Manually insert APP0 if we're trashing application parts, since 977 # all JFIF format images should start with the version block. 978 $start .= pack("CC", 0xff, 0xe0); 979 $start .= pack("n", 16); # length (including these 2 bytes) 980 $start .= "JFIF"; # format 981 $start .= pack("CC", 1, 2); # call it version 1.2 (current JFIF) 982 $start .= pack(C8, 0); # zero everything else 983 } 984 985 # 986 # Now scan through all markers in file until we hit image data or 987 # IPTC stuff. 988 # 989 $marker = JPEGNextMarker($handle); 990 991 while (1) 992 { 993 if (ord($marker) == 0) 994 { $error = "Marker scan failed"; Log($error); return 0; } 995 996 # Check for end of image 997 if (ord($marker) == 0xd9) 998 { 999 Log("JPEGCollectFileParts: saw end of image marker"); 1000 $end .= pack("CC", 0xff, ord($marker)); 1001 goto doneScanning; 1002 } 1003 1004 # Check for start of compressed data 1005 if (ord($marker) == 0xda) 1006 { 1007 Log("JPEGCollectFileParts: saw start of compressed data"); 1008 $end .= pack("CC", 0xff, ord($marker)); 1009 goto doneScanning; 1010 } 1011 1012 my $partdata; 1013 if (JPEGSkipVariable($handle, \$partdata) == 0) 1014 { $error = "JPEGSkipVariable failed"; 1015 Log($error); return 0; } 1016 1017 # Take all parts aside from APP13, which we'll replace 1018 # ourselves. 1019 if ($discardAppParts && ord($marker) >= 0xe0 && ord($marker) <= 0xef) 1020 { 1021 # Skip all application markers, including Adobe parts 1022 undef $adobeParts; 1023 } 1024 elsif (ord($marker) == 0xed) 1025 { 1026 # Collect the adobe stuff from part 13 1027 $adobeParts = CollectAdobeParts($partdata); 1028 goto doneScanning; 1029 } 1030 else 1031 { 1032 # Append all other parts to start section 1033 $start .= pack("CC", 0xff, ord($marker)); 1034 $start .= pack("n", length($partdata) + 2); 1035 $start .= $partdata; 1036 } 1037 1038 $marker = JPEGNextMarker($handle); 1039 } 1040 1041 doneScanning: 1042 1043 # 1044 # Append rest of file to $end 1045 # 1046 my $buffer; 1047 1048 while ($handle->read($buffer, 16384)) 1049 { 1050 $end .= $buffer; 1051 } 1052 1053 return [$start, $end, $adobeParts]; 1054} 1055 1056# 1057# CollectAdobeParts 1058# 1059# Part APP13 contains yet another markup format, one defined by Adobe. 1060# See "File Formats Specification" in the Photoshop SDK (avail from 1061# www.adobe.com). We must take everything but the IPTC data so that 1062# way we can write the file back without losing everything else 1063# Photoshop stuffed into the APP13 block. 1064# 1065sub CollectAdobeParts 1066{ 1067 my ($data) = @_; 1068 my $length = length($data); 1069 my $offset = 0; 1070 my $out = ''; 1071 1072 # Skip preamble 1073 $offset = length('Photoshop 3.0 '); 1074 1075 # Process everything 1076 while ($offset < $length) 1077 { 1078 # Get OSType and ID 1079 my ($ostype, $id1, $id2) = unpack("NCC", substr($data, $offset, 6)); 1080 last unless (($offset += 6) < $length); # $offset += 6; 1081 1082 # printf("CollectAdobeParts: ID %2.2x %2.2x\n", $id1, $id2); 1083 1084 # Get pascal string 1085 my ($stringlen) = unpack("C", substr($data, $offset, 1)); 1086 last unless (++$offset < $length); # $offset += 1; 1087 1088 # printf("CollectAdobeParts: str len %d\n", $stringlen); 1089 1090 my $string = substr($data, $offset, $stringlen); 1091 $offset += $stringlen; 1092 # round up if odd 1093 $offset++ if ($stringlen % 2 != 0); 1094 # there should be a null if string len is 0 1095 $offset++ if ($stringlen == 0); 1096 last unless ($offset < $length); 1097 1098 # Get variable-size data 1099 my ($size) = unpack("N", substr($data, $offset, 4)); 1100 last unless (($offset += 4) < $length); # $offset += 4; 1101 1102 # printf("CollectAdobeParts: size %d\n", $size); 1103 1104 my $var = substr($data, $offset, $size); 1105 $offset += $size; 1106 $offset++ if ($size % 2 != 0); # round up if odd 1107 1108 # skip IIM data (0x0404), but write everything else out 1109 unless ($id1 == 4 && $id2 == 4) 1110 { 1111 $out .= pack("NCC", $ostype, $id1, $id2); 1112 $out .= pack("C", $stringlen); 1113 $out .= $string; 1114 $out .= pack("C", 0) if ($stringlen == 0 || $stringlen % 2 != 0); 1115 $out .= pack("N", $size); 1116 $out .= $var; 1117 $out .= pack("C", 0) if ($size % 2 != 0 && length($out) % 2 != 0); 1118 } 1119 } 1120 1121 return $out; 1122} 1123 1124# 1125# PackedIIMData 1126# 1127# Assembles and returns our _data and _listdata into IIM format for 1128# embedding into an image. 1129# 1130sub PackedIIMData 1131{ 1132 my $self = shift; 1133 my $out; 1134 1135 # First, we need to build a mapping of datanames to dataset 1136 # numbers if we haven't already. 1137 unless (scalar(keys %datanames)) 1138 { 1139 foreach my $dataset (keys %datasets) 1140 { 1141 my $dataname = $datasets{$dataset}; 1142 $datanames{$dataname} = $dataset; 1143 } 1144 } 1145 1146 # Ditto for the lists 1147 unless (scalar(keys %listdatanames)) 1148 { 1149 foreach my $dataset (keys %listdatasets) 1150 { 1151 my $dataname = $listdatasets{$dataset}; 1152 $listdatanames{$dataname} = $dataset; 1153 } 1154 } 1155 1156 # Print record version 1157 # tag - record - dataset - len (short) - 2 (short) 1158 $out .= pack("CCCnn", 0x1c, 2, 0, 2, 2); 1159 1160 # Iterate over data sets 1161 foreach my $key (keys %{$self->{_data}}) 1162 { 1163 my $dataset = $datanames{$key}; 1164 my $value = $self->{_data}->{$key}; 1165 1166 if ($dataset == 0) 1167 { Log("PackedIIMData: illegal dataname $key"); next; } 1168 1169 next unless $value; 1170 1171 my ($tag, $record) = (0x1c, 0x02); 1172 1173 $out .= pack("CCCn", $tag, $record, $dataset, length($value)); 1174 $out .= $value; 1175 } 1176 1177 # Do the same for list data sets 1178 foreach my $key (keys %{$self->{_listdata}}) 1179 { 1180 my $dataset = $listdatanames{$key}; 1181 1182 if ($dataset == 0) 1183 { Log("PackedIIMData: illegal dataname $key"); next; } 1184 1185 foreach my $value (@{$self->{_listdata}->{$key}}) 1186 { 1187 next unless $value; 1188 1189 my ($tag, $record) = (0x1c, 0x02); 1190 1191 $out .= pack("CCCn", $tag, $record, $dataset, length($value)); 1192 $out .= $value; 1193 } 1194 } 1195 1196 return $out; 1197} 1198 1199# 1200# PhotoshopIIMBlock 1201# 1202# Assembles the blob of Photoshop "resource data" that includes our 1203# fresh IIM data (from PackedIIMData) and the other Adobe parts we 1204# found in the file, if there were any. 1205# 1206sub PhotoshopIIMBlock 1207{ 1208 my ($self, $otherparts, $data) = @_; 1209 my $resourceBlock; 1210 my $out; 1211 1212 $resourceBlock .= "Photoshop 3.0"; 1213 $resourceBlock .= pack("C", 0); 1214 # Photoshop identifier 1215 $resourceBlock .= "8BIM"; 1216 # 0x0404 is IIM data, 00 is required empty string 1217 $resourceBlock .= pack("CCCC", 0x04, 0x04, 0, 0); 1218 # length of data as 32-bit, network-byte order 1219 $resourceBlock .= pack("N", length($data)); 1220 # Now tack data on there 1221 $resourceBlock .= $data; 1222 # Pad with a blank if not even size 1223 $resourceBlock .= pack("C", 0) if (length($data) % 2 != 0); 1224 # Finally tack on other data 1225 $resourceBlock .= $otherparts if defined($otherparts); 1226 1227 $out .= pack("CC", 0xff, 0xed); # JPEG start of block, APP13 1228 $out .= pack("n", length($resourceBlock) + 2); # length 1229 $out .= $resourceBlock; 1230 1231 return $out; 1232} 1233 1234####################################################################### 1235# Helpers, docs 1236####################################################################### 1237 1238# 1239# Log: just prints a message to STDERR if $debugMode is on. 1240# 1241sub Log 1242{ 1243 if ($debugMode) 1244 { my $message = shift; print STDERR "**IPTC** $message\n"; } 1245} 1246 1247# 1248# HexDump 1249# 1250# Very helpful when debugging. 1251# 1252sub HexDump 1253{ 1254 my $dump = shift; 1255 my $len = length($dump); 1256 my $offset = 0; 1257 my ($dcol1, $dcol2); 1258 1259 while ($offset < $len) 1260 { 1261 my $temp = substr($dump, $offset++, 1); 1262 1263 my $hex = unpack("H*", $temp); 1264 $dcol1 .= " " . $hex; 1265 if (ord($temp) >= 0x21 && ord($temp) <= 0x7e) 1266 { $dcol2 .= " $temp"; } 1267 else 1268 { $dcol2 .= " ."; } 1269 1270 if ($offset % 16 == 0) 1271 { 1272 print STDERR $dcol1 . " | " . $dcol2 . "\n"; 1273 undef $dcol1; undef $dcol2; 1274 } 1275 } 1276 1277 if (defined($dcol1) || defined($dcol2)) 1278 { 1279 print STDERR $dcol1 . " | " . $dcol2 . "\n"; 1280 undef $dcol1; undef $dcol2; 1281 } 1282} 1283 1284# 1285# JPEGDebugScan 1286# 1287# Also very helpful when debugging. 1288# 1289sub JPEGDebugScan 1290{ 1291 my $filename = shift; 1292 my $handle = IO::File->new($filename); 1293 $handle or die "Can't open $filename: $!"; 1294 1295 # Skip past start of file marker 1296 my ($ff, $soi); 1297 $handle->read($ff, 1) || return 0; 1298 $handle->read($soi, 1); 1299 1300 unless (ord($ff) == 0xff && ord($soi) == 0xd8) 1301 { 1302 Log("JPEGScan: invalid start of file"); 1303 goto done; 1304 } 1305 1306 # scan to 0xDA (start of scan), dumping the markers we see between 1307 # here and there. 1308 my $marker = JPEGNextMarker($handle); 1309 1310 while (ord($marker) != 0xda) 1311 { 1312 if (ord($marker) == 0) 1313 { Log("Marker scan failed"); goto done; } 1314 1315 if (ord($marker) == 0xd9) 1316 {Log("Marker scan hit end of image marker"); goto done; } 1317 1318 if (JPEGSkipVariable($handle) == 0) 1319 { Log("JPEGSkipVariable failed"); return 0; } 1320 1321 $marker = JPEGNextMarker($handle); 1322 } 1323 1324done: 1325 $handle->close(); 1326} 1327 1328# sucessful package load 13291; 1330 1331__END__ 1332 1333=head1 NAME 1334 1335Image::IPTCInfo - Perl extension for extracting IPTC image meta-data 1336 1337=head1 SYNOPSIS 1338 1339 use Image::IPTCInfo; 1340 1341 # Create new info object 1342 my $info = new Image::IPTCInfo('file-name-here.jpg'); 1343 1344 # Check if file had IPTC data 1345 unless (defined($info)) { die Image::IPTCInfo::Error(); } 1346 1347 # Get list of keywords, supplemental categories, or contacts 1348 my $keywordsRef = $info->Keywords(); 1349 my $suppCatsRef = $info->SupplementalCategories(); 1350 my $contactsRef = $info->Contacts(); 1351 1352 # Get specific attributes... 1353 my $caption = $info->Attribute('caption/abstract'); 1354 1355 # Create object for file that may or may not have IPTC data. 1356 $info = create Image::IPTCInfo('file-name-here.jpg'); 1357 1358 # Add/change an attribute 1359 $info->SetAttribute('caption/abstract', 'Witty caption here'); 1360 1361 # Save new info to file 1362 ##### See disclaimer in 'SAVING FILES' section ##### 1363 $info->Save(); 1364 $info->SaveAs('new-file-name.jpg'); 1365 1366=head1 DESCRIPTION 1367 1368Ever wish you add information to your photos like a caption, the place 1369you took it, the date, and perhaps even keywords and categories? You 1370already can. The International Press Telecommunications Council (IPTC) 1371defines a format for exchanging meta-information in news content, and 1372that includes photographs. You can embed all kinds of information in 1373your images. The trick is putting it to use. 1374 1375That's where this IPTCInfo Perl module comes into play. You can embed 1376information using many programs, including Adobe Photoshop, and 1377IPTCInfo will let your web server -- and other automated server 1378programs -- pull it back out. You can use the information directly in 1379Perl programs, export it to XML, or even export SQL statements ready 1380to be fed into a database. 1381 1382=head1 USING IPTCINFO 1383 1384Install the module as documented in the README file. You can try out 1385the demo program called "demo.pl" which extracts info from the images 1386in the "demo-images" directory. 1387 1388To integrate with your own code, simply do something like what's in 1389the synopsys above. 1390 1391The complete list of possible attributes is given below. These are as 1392specified in the IPTC IIM standard, version 4. Keywords and categories 1393are handled differently: since these are lists, the module allows you 1394to access them as Perl lists. Call Keywords() and Categories() to get 1395a reference to each list. 1396 1397=head2 NEW VS. CREATE 1398 1399You can either create an object using new() or create(): 1400 1401 $info = new Image::IPTCInfo('file-name-here.jpg'); 1402 $info = create Image::IPTCInfo('file-name-here.jpg'); 1403 1404new() will create a new object only if the file had IPTC data in it. 1405It will return undef otherwise, and you can check Error() to see what 1406the reason was. Using create(), on the other hand, always returns a 1407new IPTCInfo object if there was data or not. If there wasn't any IPTC 1408info there, calling Attribute() on anything will just return undef; 1409i.e. the info object will be more-or-less empty. 1410 1411If you're only reading IPTC data, call new(). If you want to add or 1412change info, call create(). Even if there's no useful stuff in the 1413info object, you can then start adding attributes and save the file. 1414That brings us to the next topic.... 1415 1416=head2 MODIFYING IPTC DATA 1417 1418You can modify IPTC data in JPEG files and save the file back to 1419disk. Here are the commands for doing so: 1420 1421 # Set a given attribute 1422 $info->SetAttribute('iptc attribute here', 'new value here'); 1423 1424 # Clear the keywords or supp. categories list 1425 $info->ClearKeywords(); 1426 $info->ClearSupplementalCategories(); 1427 $info->ClearContacts(); 1428 1429 # Add keywords or supp. categories 1430 $info->AddKeyword('frob'); 1431 1432 # You can also add a list reference 1433 $info->AddKeyword(['frob', 'nob', 'widget']); 1434 1435=head2 SAVING FILES 1436 1437With JPEG files you can add/change attributes, add keywords, etc., and 1438then call: 1439 1440 $info->Save(); 1441 $info->SaveAs('new-file-name.jpg'); 1442 1443This will save the file with the updated IPTC info. Please only run 1444this on *copies* of your images -- not your precious originals! -- 1445because I'm not liable for any corruption of your images. (If you read 1446software license agreements, nobody else is liable, either. Make 1447backups of your originals!) 1448 1449If you're into image wizardry, there are a couple handy options you 1450can use on saving. One feature is to trash the Adobe block of data, 1451which contains IPTC info, color settings, Photoshop print settings, 1452and stuff like that. The other is to trash all application blocks, 1453including stuff like EXIF and FlashPix data. This can be handy for 1454reducing file sizes. The options are passed as a hashref to Save() and 1455SaveAs(), e.g.: 1456 1457 $info->Save({'discardAdobeParts' => 'on'}); 1458 $info->SaveAs('new-file-name.jpg', {'discardAppParts' => 'on'}); 1459 1460Note that if there was IPTC info in the image, or you added some 1461yourself, the new image will have an Adobe part with only the IPTC 1462information. 1463 1464=head2 XML AND SQL EXPORT FEATURES 1465 1466IPTCInfo also allows you to easily generate XML and SQL from the image 1467metadata. For XML, call: 1468 1469 $xml = $info->ExportXML('entity-name', \%extra-data, 1470 'optional output file name'); 1471 1472This returns XML containing all image metadata. Attribute names are 1473translated into XML tags, making adjustments to spaces and slashes for 1474compatibility. (Spaces become underbars, slashes become dashes.) You 1475provide an entity name; all data will be contained within this entity. 1476You can optionally provides a reference to a hash of extra data. This 1477will get put into the XML, too. (Example: you may want to put info on 1478the image's location into the XML.) Keys must be valid XML tag names. 1479You can also provide a filename, and the XML will be dumped into 1480there. See the "demo.pl" script for examples. 1481 1482For SQL, it goes like this: 1483 1484 my %mappings = ( 1485 'IPTC dataset name here' => 'your table column name here', 1486 'caption/abstract' => 'caption', 1487 'city' => 'city', 1488 'province/state' => 'state); # etc etc etc. 1489 1490 $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data); 1491 1492This returns a SQL statement to insert into your given table name a 1493set of values from the image. You pass in a reference to a hash which 1494maps IPTC dataset names into column names for the database table. As 1495with XML export, you can also provide extra information to be stuck 1496into the SQL. 1497 1498=head1 IPTC ATTRIBUTE REFERENCE 1499 1500 object name originating program 1501 edit status program version 1502 editorial update object cycle 1503 urgency by-line 1504 subject reference by-line title 1505 category city 1506 fixture identifier sub-location 1507 content location code province/state 1508 content location name country/primary location code 1509 release date country/primary location name 1510 release time original transmission reference 1511 expiration date headline 1512 expiration time credit 1513 special instructions source 1514 action advised copyright notice 1515 reference service contact 1516 reference date caption/abstract 1517 reference number local caption 1518 date created writer/editor 1519 time created image type 1520 digital creation date image orientation 1521 digital creation time language identifier 1522 1523 custom1 - custom20: NOT STANDARD but used by Fotostation. 1524 IPTCInfo also supports these fields. 1525 1526=head1 KNOWN BUGS 1527 1528IPTC meta-info on MacOS may be stored in the resource fork instead 1529of the data fork. This program will currently not scan the resource 1530fork. 1531 1532I have heard that some programs will embed IPTC info at the end of the 1533file instead of the beginning. The module will currently only look 1534near the front of the file. If you have a file with IPTC data that 1535IPTCInfo can't find, please contact me! I would like to ensure 1536IPTCInfo works with everyone's files. 1537 1538=head1 AUTHOR 1539 1540Josh Carter, josh@multipart-mixed.com 1541 1542=head1 SEE ALSO 1543 1544perl(1). 1545 1546=cut 1547