1# IPTCInfo: extractor for IPTC metadata embedded in images
2# Copyright (C) 2000-2004 Josh Carter <josh@multipart-mixed.com>
3# All rights reserved.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the same terms as Perl itself.
7
8package Image::IPTCInfo;
9use IO::File;
10
11use vars qw($VERSION);
12$VERSION = '1.95';
13
14#
15# Global vars
16#
17use vars ('%datasets',		  # master list of dataset id's
18		  '%datanames',       # reverse mapping (for saving)
19		  '%listdatasets',	  # master list of repeating dataset id's
20		  '%listdatanames',   # reverse
21		  '$MAX_FILE_OFFSET', # maximum offset for blind scan
22		  );
23
24$MAX_FILE_OFFSET = 8192; # default blind scan depth
25
26# Debug off for production use
27my $debugMode = 0;
28my $error;
29
30#####################################
31# These names match the codes defined in ITPC's IIM record 2.
32# This hash is for non-repeating data items; repeating ones
33# are in %listdatasets below.
34%datasets = (
35#	0	=> 'record version',		# skip -- binary data
36	5	=> 'object name',
37	7	=> 'edit status',
38	8	=> 'editorial update',
39	10	=> 'urgency',
40	12	=> 'subject reference',
41	15	=> 'category',
42#	20	=> 'supplemental category',	# in listdatasets (see below)
43	22	=> 'fixture identifier',
44#	25	=> 'keywords',				# in listdatasets
45	26	=> 'content location code',
46	27	=> 'content location name',
47	30	=> 'release date',
48	35	=> 'release time',
49	37	=> 'expiration date',
50	38	=> 'expiration time',
51	40	=> 'special instructions',
52	42	=> 'action advised',
53	45	=> 'reference service',
54	47	=> 'reference date',
55	50	=> 'reference number',
56	55	=> 'date created',
57	60	=> 'time created',
58	62	=> 'digital creation date',
59	63	=> 'digital creation time',
60	65	=> 'originating program',
61	70	=> 'program version',
62	75	=> 'object cycle',
63	80	=> 'by-line',
64	85	=> 'by-line title',
65	90	=> 'city',
66	92	=> 'sub-location',
67	95	=> 'province/state',
68	100	=> 'country/primary location code',
69	101	=> 'country/primary location name',
70	103	=> 'original transmission reference',
71	105	=> 'headline',
72	110	=> 'credit',
73	115	=> 'source',
74	116	=> 'copyright notice',
75#	118	=> 'contact',            # in listdatasets
76	120	=> 'caption/abstract',
77	121	=> 'local caption',
78	122	=> 'writer/editor',
79#	125	=> 'rasterized caption', # unsupported (binary data)
80	130	=> 'image type',
81	131	=> 'image orientation',
82	135	=> 'language identifier',
83	200	=> 'custom1', # These are NOT STANDARD, but are used by
84	201	=> 'custom2', # Fotostation. Use at your own risk. They're
85	202	=> 'custom3', # here in case you need to store some special
86	203	=> 'custom4', # stuff, but note that other programs won't
87	204	=> 'custom5', # recognize them and may blow them away if
88	205	=> 'custom6', # you open and re-save the file. (Except with
89	206	=> 'custom7', # Fotostation, of course.)
90	207	=> 'custom8',
91	208	=> 'custom9',
92	209	=> 'custom10',
93	210	=> 'custom11',
94	211	=> 'custom12',
95	212	=> 'custom13',
96	213	=> 'custom14',
97	214	=> 'custom15',
98	215	=> 'custom16',
99	216	=> 'custom17',
100	217	=> 'custom18',
101	218	=> 'custom19',
102	219	=> 'custom20',
103	);
104
105# this will get filled in if we save data back to file
106%datanames = ();
107
108%listdatasets = (
109	20	=> 'supplemental category',
110	25	=> 'keywords',
111	118	=> 'contact',
112	);
113
114# this will get filled in if we save data back to file
115%listdatanames = ();
116
117#######################################################################
118# New, Save, Destroy, Error
119#######################################################################
120
121#
122# new
123#
124# $info = new IPTCInfo('image filename goes here')
125#
126# Returns IPTCInfo object filled with metadata from the given image
127# file. File on disk will be closed, and changes made to the IPTCInfo
128# object will *not* be flushed back to disk.
129#
130sub new
131{
132	my ($pkg, $file, $force) = @_;
133
134	my $input_is_handle = eval {$file->isa('IO::Handle')};
135	if ($input_is_handle and not $file->isa('IO::Seekable')) {
136		$error = "Handle must be seekable."; Log($error);
137		return undef;
138	}
139
140	#
141	# Open file and snarf data from it.
142	#
143	my $handle = $input_is_handle ? $file : IO::File->new($file);
144	unless($handle)
145	{
146		$error = "Can't open file: $!"; Log($error);
147		return undef;
148	}
149
150	binmode($handle);
151
152	my $datafound = ScanToFirstIMMTag($handle);
153	unless ($datafound || defined($force))
154	{
155		$error = "No IPTC data found."; Log($error);
156		# don't close unless we opened it
157		$handle->close() unless $input_is_handle;
158		return undef;
159	}
160
161	my $self = bless
162	{
163		'_data'		=> {},	# empty hashes; wil be
164		'_listdata'	=> {},	# filled in CollectIIMInfo
165		'_handle'   => $handle,
166	}, $pkg;
167
168	$self->{_filename} = $file unless $input_is_handle;
169
170	# Do the real snarfing here
171	$self->CollectIIMInfo() if $datafound;
172
173	$handle->close() unless $input_is_handle;
174
175	return $self;
176}
177
178#
179# create
180#
181# Like new, but forces an object to always be returned. This allows
182# you to start adding stuff to files that don't have IPTC info and then
183# save it.
184#
185sub create
186{
187	my ($pkg, $filename) = @_;
188
189	return new($pkg, $filename, 'force');
190}
191
192#
193# Save
194#
195# Saves JPEG with IPTC data back to the same file it came from.
196#
197sub Save
198{
199	my ($self, $options) = @_;
200
201	return $self->SaveAs($self->{'_filename'}, $options);
202}
203
204#
205# Save
206#
207# Saves JPEG with IPTC data to a given file name.
208#
209sub SaveAs
210{
211	my ($self, $newfile, $options) = @_;
212
213	#
214	# Open file and snarf data from it.
215	#
216	my $handle = $self->{_filename} ? IO::File->new($self->{_filename}) : $self->{_handle};
217	unless($handle)
218	{
219		$error = "Can't open file: $!"; Log($error);
220		return undef;
221	}
222
223	$handle->seek(0, 0);
224	binmode($handle);
225
226	unless (FileIsJPEG($handle))
227	{
228		$error = "Source file is not a JPEG; I can only save JPEGs. Sorry.";
229		Log($error);
230		return undef;
231	}
232
233	my $ret = JPEGCollectFileParts($handle, $options);
234
235	if ($ret == 0)
236	{
237		Log("collectfileparts failed");
238		return undef;
239	}
240
241	if ($self->{_filename}) {
242		$handle->close();
243		unless ($handle = IO::File->new($newfile, ">")) {
244			$error = "Can't open output file: $!"; Log($error);
245			return undef;
246		}
247		binmode($handle);
248	} else {
249		unless ($handle->truncate(0)) {
250			$error = "Can't truncate, handle might be read-only"; Log($error);
251			return undef;
252		}
253	}
254
255	my ($start, $end, $adobe) = @$ret;
256
257	if (defined($options) && defined($options->{'discardAdobeParts'}))
258	{
259		undef $adobe;
260	}
261
262
263	$handle->print($start);
264	$handle->print($self->PhotoshopIIMBlock($adobe, $self->PackedIIMData()));
265	$handle->print($end);
266
267	$handle->close() if $self->{_filename};
268
269	return 1;
270}
271
272#
273# DESTROY
274#
275# Called when object is destroyed. No action necessary in this case.
276#
277sub DESTROY
278{
279	# no action necessary
280}
281
282#
283# Error
284#
285# Returns description of the last error.
286#
287sub Error
288{
289	return $error;
290}
291
292#######################################################################
293# Attributes for clients
294#######################################################################
295
296#
297# Attribute/SetAttribute
298#
299# Returns/Changes value of a given data item.
300#
301sub Attribute
302{
303	my ($self, $attribute) = @_;
304
305	return $self->{_data}->{$attribute};
306}
307
308sub SetAttribute
309{
310	my ($self, $attribute, $newval) = @_;
311
312	$self->{_data}->{$attribute} = $newval;
313}
314
315sub ClearAttributes
316{
317	my $self = shift;
318
319	$self->{_data} = {};
320}
321
322sub ClearAllData
323{
324	my $self = shift;
325
326	$self->{_data} = {};
327	$self->{_listdata} = {};
328}
329
330#
331# Keywords/Clear/Add
332#
333# Returns reference to a list of keywords/clears the keywords
334# list/adds a keyword.
335#
336sub Keywords
337{
338	my $self = shift;
339	return $self->{_listdata}->{'keywords'};
340}
341
342sub ClearKeywords
343{
344	my $self = shift;
345	$self->{_listdata}->{'keywords'} = undef;
346}
347
348sub AddKeyword
349{
350	my ($self, $add) = @_;
351
352	$self->AddListData('keywords', $add);
353}
354
355#
356# SupplementalCategories/Clear/Add
357#
358# Returns reference to a list of supplemental categories.
359#
360sub SupplementalCategories
361{
362	my $self = shift;
363	return $self->{_listdata}->{'supplemental category'};
364}
365
366sub ClearSupplementalCategories
367{
368	my $self = shift;
369	$self->{_listdata}->{'supplemental category'} = undef;
370}
371
372sub AddSupplementalCategories
373{
374	my ($self, $add) = @_;
375
376	$self->AddListData('supplemental category', $add);
377}
378
379#
380# Contacts/Clear/Add
381#
382# Returns reference to a list of contactss/clears the contacts
383# list/adds a contact.
384#
385sub Contacts
386{
387	my $self = shift;
388	return $self->{_listdata}->{'contact'};
389}
390
391sub ClearContacts
392{
393	my $self = shift;
394	$self->{_listdata}->{'contact'} = undef;
395}
396
397sub AddContact
398{
399	my ($self, $add) = @_;
400
401	$self->AddListData('contact', $add);
402}
403
404sub AddListData
405{
406	my ($self, $list, $add) = @_;
407
408	# did user pass in a list ref?
409	if (ref($add) eq 'ARRAY')
410	{
411		# yes, add list contents
412		push(@{$self->{_listdata}->{$list}}, @$add);
413	}
414	else
415	{
416		# no, just a literal item
417		push(@{$self->{_listdata}->{$list}}, $add);
418	}
419}
420
421#######################################################################
422# XML, SQL export
423#######################################################################
424
425#
426# ExportXML
427#
428# $xml = $info->ExportXML('entity-name', \%extra-data,
429#                         'optional output file name');
430#
431# Exports XML containing all image metadata. Attribute names are
432# translated into XML tags, making adjustments to spaces and slashes
433# for compatibility. (Spaces become underbars, slashes become dashes.)
434# Caller provides an entity name; all data will be contained within
435# this entity. Caller optionally provides a reference to a hash of
436# extra data. This will be output into the XML, too. Keys must be
437# valid XML tag names. Optionally provide a filename, and the XML
438# will be dumped into there.
439#
440sub ExportXML
441{
442	my ($self, $basetag, $extraRef, $filename) = @_;
443	my $out;
444
445	$basetag = 'photo' unless length($basetag);
446
447	$out .= "<$basetag>\n";
448
449	# dump extra info first, if any
450	foreach my $key (keys %$extraRef)
451	{
452		$out .= "\t<$key>" . $extraRef->{$key} . "</$key>\n";
453	}
454
455	# dump our stuff
456	foreach my $key (keys %{$self->{_data}})
457	{
458		my $cleankey = $key;
459		$cleankey =~ s/ /_/g;
460		$cleankey =~ s/\//-/g;
461
462		$out .= "\t<$cleankey>" . $self->{_data}->{$key} . "</$cleankey>\n";
463	}
464
465	if (defined ($self->Keywords()))
466	{
467		# print keywords
468		$out .= "\t<keywords>\n";
469
470		foreach my $keyword (@{$self->Keywords()})
471		{
472			$out .= "\t\t<keyword>$keyword</keyword>\n";
473		}
474
475		$out .= "\t</keywords>\n";
476	}
477
478	if (defined ($self->SupplementalCategories()))
479	{
480		# print supplemental categories
481		$out .= "\t<supplemental_categories>\n";
482
483		foreach my $category (@{$self->SupplementalCategories()})
484		{
485			$out .= "\t\t<supplemental_category>$category</supplemental_category>\n";
486		}
487
488		$out .= "\t</supplemental_categories>\n";
489	}
490
491	if (defined ($self->Contacts()))
492	{
493		# print contacts
494		$out .= "\t<contacts>\n";
495
496		foreach my $contact (@{$self->Contacts()})
497		{
498			$out .= "\t\t<contact>$contact</contact>\n";
499		}
500
501		$out .= "\t</contacts>\n";
502	}
503
504	# close base tag
505	$out .= "</$basetag>\n";
506
507	# export to file if caller asked for it.
508	if (length($filename))
509	{
510		open(XMLOUT, ">$filename");
511		print XMLOUT $out;
512		close(XMLOUT);
513	}
514
515	return $out;
516}
517
518#
519# ExportSQL
520#
521# my %mappings = (
522#   'IPTC dataset name here'    => 'your table column name here',
523#   'caption/abstract'          => 'caption',
524#   'city'                      => 'city',
525#   'province/state'            => 'state); # etc etc etc.
526#
527# $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data);
528#
529# Returns a SQL statement to insert into your given table name
530# a set of values from the image. Caller passes in a reference to
531# a hash which maps IPTC dataset names into column names for the
532# database table. Optionally pass in a ref to a hash of extra data
533# which will also be included in the insert statement. Keys in that
534# hash must be valid column names.
535#
536sub ExportSQL
537{
538	my ($self, $tablename, $mappingsRef, $extraRef) = @_;
539	my ($statement, $columns, $values);
540
541	return undef if (($tablename eq undef) || ($mappingsRef eq undef));
542
543	# start with extra data, if any
544	foreach my $column (keys %$extraRef)
545	{
546		my $value = $extraRef->{$column};
547		$value =~ s/'/''/g; # escape single quotes
548
549		$columns .= $column . ", ";
550		$values  .= "\'$value\', ";
551	}
552
553	# process our data
554	foreach my $attribute (keys %$mappingsRef)
555	{
556		my $value = $self->Attribute($attribute);
557		$value =~ s/'/''/g; # escape single quotes
558
559		$columns .= $mappingsRef->{$attribute} . ", ";
560		$values  .= "\'$value\', ";
561	}
562
563	# must trim the trailing ", " from both
564	$columns =~ s/, $//;
565	$values  =~ s/, $//;
566
567	$statement = "INSERT INTO $tablename ($columns) VALUES ($values)";
568
569	return $statement;
570}
571
572#######################################################################
573# File parsing functions (private)
574#######################################################################
575
576#
577# ScanToFirstIMMTag
578#
579# Scans to first IIM Record 2 tag in the file. The will either use
580# smart scanning for JPEGs or blind scanning for other file types.
581#
582sub ScanToFirstIMMTag
583{
584	my $handle = shift @_;
585
586	if (FileIsJPEG($handle))
587	{
588		Log("File is JPEG, proceeding with JPEGScan");
589		return JPEGScan($handle);
590	}
591	else
592	{
593		Log("File not a JPEG, trying BlindScan");
594		return BlindScan($handle);
595	}
596}
597
598#
599# FileIsJPEG
600#
601# Checks to see if this file is a JPEG/JFIF or not. Will reset the
602# file position back to 0 after it's done in either case.
603#
604sub FileIsJPEG
605{
606	my $handle = shift @_;
607
608	# reset to beginning just in case
609	$handle->seek(0, 0);
610
611	if ($debugMode)
612	{
613		Log("Opening 16 bytes of file:\n");
614		my $dump;
615		$handle->read($dump, 16);
616		HexDump($dump);
617		$handle->seek(0, 0);
618	}
619
620	# check start of file marker
621	my ($ff, $soi);
622	$handle->read($ff, 1) || goto notjpeg;
623	$handle->read($soi, 1);
624
625	goto notjpeg unless (ord($ff) == 0xff && ord($soi) == 0xd8);
626
627	# now check for APP0 marker. I'll assume that anything with a SOI
628	# followed by APP0 is "close enough" for our purposes. (We're not
629	# dinking with image data, so anything following the JPEG tagging
630	# system should work.)
631	my ($app0, $len, $jpeg);
632	$handle->read($ff, 1);
633	$handle->read($app0, 1);
634
635	goto notjpeg unless (ord($ff) == 0xff);
636
637	# reset to beginning of file
638	$handle->seek(0, 0);
639	return 1;
640
641  notjpeg:
642	$handle->seek(0, 0);
643	return 0;
644}
645
646#
647# JPEGScan
648#
649# Assuming the file is a JPEG (see above), this will scan through the
650# markers looking for the APP13 marker, where IPTC/IIM data should be
651# found. While this isn't a formally defined standard, all programs
652# have (supposedly) adopted Adobe's technique of putting the data in
653# APP13.
654#
655sub JPEGScan
656{
657	my $handle = shift @_;
658
659	# Skip past start of file marker
660	my ($ff, $soi);
661	$handle->read($ff, 1) || return 0;
662	$handle->read($soi, 1);
663
664	unless (ord($ff) == 0xff && ord($soi) == 0xd8)
665	{
666		$error = "JPEGScan: invalid start of file"; Log($error);
667		return 0;
668	}
669
670	# Scan for the APP13 marker which will contain our IPTC info (I hope).
671
672	my $marker = JPEGNextMarker($handle);
673
674	while (ord($marker) != 0xed)
675	{
676		if (ord($marker) == 0)
677		{ $error = "Marker scan failed"; Log($error); return 0; }
678
679		if (ord($marker) == 0xd9)
680		{ $error = "Marker scan hit end of image marker";
681		  Log($error); return 0; }
682
683		if (ord($marker) == 0xda)
684		{ $error = "Marker scan hit start of image data";
685		  Log($error); return 0; }
686
687		if (JPEGSkipVariable($handle) == 0)
688		{ $error = "JPEGSkipVariable failed";
689		  Log($error); return 0; }
690
691		$marker = JPEGNextMarker($handle);
692	}
693
694	# If were's here, we must have found the right marker. Now
695	# BlindScan through the data.
696	return BlindScan($handle, JPEGGetVariableLength($handle));
697}
698
699#
700# JPEGNextMarker
701#
702# Scans to the start of the next valid-looking marker. Return value is
703# the marker id.
704#
705sub JPEGNextMarker
706{
707	my $handle = shift @_;
708
709	my $byte;
710
711	# Find 0xff byte. We should already be on it.
712	$handle->read($byte, 1) || return 0;
713	while (ord($byte) != 0xff)
714	{
715		Log("JPEGNextMarker: warning: bogus stuff in JPEG file");
716		$handle->read($byte, 1) || return 0;
717	}
718
719	# Now skip any extra 0xffs, which are valid padding.
720	do
721	{
722		$handle->read($byte, 1) || return 0;
723	} while (ord($byte) == 0xff);
724
725	# $byte should now contain the marker id.
726	Log("JPEGNextMarker: at marker " . unpack("H*", $byte));
727	return $byte;
728}
729
730#
731# JPEGGetVariableLength
732#
733# Gets length of current variable-length section. File position at
734# start must be on the marker itself, e.g. immediately after call to
735# JPEGNextMarker. File position is updated to just past the length
736# field.
737#
738sub JPEGGetVariableLength
739{
740	my $handle = shift @_;
741
742	# Get the marker parameter length count
743	my $length;
744	$handle->read($length, 2) || return 0;
745
746	($length) = unpack("n", $length);
747
748	Log("JPEG variable length: $length");
749
750	# Length includes itself, so must be at least 2
751	if ($length < 2)
752	{
753		Log("JPEGGetVariableLength: erroneous JPEG marker length");
754		return 0;
755	}
756	$length -= 2;
757
758	return $length;
759}
760
761#
762# JPEGSkipVariable
763#
764# Skips variable-length section of JPEG block. Should always be called
765# between calls to JPEGNextMarker to ensure JPEGNextMarker is at the
766# start of data it can properly parse.
767#
768sub JPEGSkipVariable
769{
770	my $handle = shift;
771	my $rSave = shift;
772
773	my $length = JPEGGetVariableLength($handle);
774	return if ($length == 0);
775
776	# Skip remaining bytes
777	my $temp;
778	if (defined($rSave) || $debugMode)
779	{
780		unless ($handle->read($temp, $length))
781		{
782			Log("JPEGSkipVariable: read failed while skipping var data");
783			return 0;
784		}
785
786		# prints out a heck of a lot of stuff
787		# HexDump($temp);
788	}
789	else
790	{
791		# Just seek
792		unless($handle->seek($length, 1))
793		{
794			Log("JPEGSkipVariable: read failed while skipping var data");
795			return 0;
796		}
797	}
798
799	$$rSave = $temp if defined($rSave);
800
801	return 1;
802}
803
804#
805# BlindScan
806#
807# Scans blindly to first IIM Record 2 tag in the file. This method may
808# or may not work on any arbitrary file type, but it doesn't hurt to
809# check. We expect to see this tag within the first 8k of data. (This
810# limit may need to be changed or eliminated depending on how other
811# programs choose to store IIM.)
812#
813sub BlindScan
814{
815	my $handle = shift;
816    my $maxoff = shift() || $MAX_FILE_OFFSET;
817
818	Log("BlindScan: starting scan, max length $maxoff");
819
820	# start digging
821	my $offset = 0;
822	while ($offset <= $maxoff)
823	{
824		my $temp;
825
826		unless ($handle->read($temp, 1))
827		{
828			Log("BlindScan: hit EOF while scanning");
829			return 0;
830		}
831
832		# look for tag identifier 0x1c
833		if (ord($temp) == 0x1c)
834		{
835			# if we found that, look for record 2, dataset 0
836			# (record version number)
837			my ($record, $dataset);
838			$handle->read($record, 1);
839			$handle->read($dataset, 1);
840
841			if (ord($record) == 2)
842			{
843				# found it. seek to start of this tag and return.
844				Log("BlindScan: found IIM start at offset $offset");
845				$handle->seek(-3, 1); # seek rel to current position
846				return $offset;
847			}
848			else
849			{
850				# didn't find it. back up 2 to make up for
851				# those reads above.
852				$handle->seek(-2, 1); # seek rel to current position
853			}
854		}
855
856		# no tag, keep scanning
857		$offset++;
858	}
859
860	return 0;
861}
862
863#
864# CollectIIMInfo
865#
866# Assuming file is seeked to start of IIM data (using above), this
867# reads all the data into our object's hashes
868#
869sub CollectIIMInfo
870{
871	my $self = shift;
872
873	my $handle = $self->{_handle};
874
875	# NOTE: file should already be at the start of the first
876	# IPTC code: record 2, dataset 0.
877
878	while (1)
879	{
880		my $header;
881		return unless $handle->read($header, 5);
882
883		($tag, $record, $dataset, $length) = unpack("CCCn", $header);
884
885		# bail if we're past end of IIM record 2 data
886		return unless ($tag == 0x1c) && ($record == 2);
887
888		# print "tag     : " . $tag . "\n";
889		# print "record  : " . $record . "\n";
890		# print "dataset : " . $dataset . "\n";
891		# print "length  : " . $length  . "\n";
892
893		my $value;
894		$handle->read($value, $length);
895
896		# try to extract first into _listdata (keywords, categories)
897		# and, if unsuccessful, into _data. Tags which are not in the
898		# current IIM spec (version 4) are currently discarded.
899		if (exists $listdatasets{$dataset})
900		{
901			my $dataname = $listdatasets{$dataset};
902			my $listref  = $listdata{$dataname};
903
904			push(@{$self->{_listdata}->{$dataname}}, $value);
905		}
906		elsif (exists $datasets{$dataset})
907		{
908			my $dataname = $datasets{$dataset};
909
910			$self->{_data}->{$dataname} = $value;
911		}
912		# else discard
913	}
914}
915
916#######################################################################
917# File Saving
918#######################################################################
919
920#
921# JPEGCollectFileParts
922#
923# Collects all pieces of the file except for the IPTC info that we'll
924# replace when saving. Returns the stuff before the info, stuff after,
925# and the contents of the Adobe Resource Block that the IPTC data goes
926# in. Returns undef if a file parsing error occured.
927#
928sub JPEGCollectFileParts
929{
930	my $handle = shift;
931	my ($options) = @_;
932	my ($start, $end, $adobeParts);
933	my $discardAppParts = 0;
934
935	if (defined($options) && defined($options->{'discardAppParts'}))
936	{ $discardAppParts = 1; }
937
938	# Start at beginning of file
939	$handle->seek(0, 0);
940
941	# Skip past start of file marker
942	my ($ff, $soi);
943	$handle->read($ff, 1) || return 0;
944	$handle->read($soi, 1);
945
946	unless (ord($ff) == 0xff && ord($soi) == 0xd8)
947	{
948		$error = "JPEGScan: invalid start of file"; Log($error);
949		return 0;
950	}
951
952	#
953	# Begin building start of file
954	#
955	$start .= pack("CC", 0xff, 0xd8);
956
957	# Get first marker in file. This will be APP0 for JFIF or APP1 for
958	# EXIF.
959	my $marker = JPEGNextMarker($handle);
960
961	my $app0data;
962	if (JPEGSkipVariable($handle, \$app0data) == 0)
963	{ $error = "JPEGSkipVariable failed";
964	  Log($error); return 0; }
965
966	if (ord($marker) == 0xe0 || !$discardAppParts)
967	{
968		# Always include APP0 marker at start if it's present.
969		$start .= pack("CC", 0xff, ord($marker));
970		# Remember that the length must include itself (2 bytes)
971		$start .= pack("n", length($app0data) + 2);
972		$start .= $app0data;
973	}
974	else
975	{
976		# Manually insert APP0 if we're trashing application parts, since
977		# all JFIF format images should start with the version block.
978		$start .= pack("CC", 0xff, 0xe0);
979		$start .= pack("n", 16);    # length (including these 2 bytes)
980		$start .= "JFIF";           # format
981		$start .= pack("CC", 1, 2); # call it version 1.2 (current JFIF)
982		$start .= pack(C8, 0);      # zero everything else
983	}
984
985	#
986	# Now scan through all markers in file until we hit image data or
987	# IPTC stuff.
988	#
989	$marker = JPEGNextMarker($handle);
990
991	while (1)
992	{
993		if (ord($marker) == 0)
994		{ $error = "Marker scan failed"; Log($error); return 0; }
995
996		# Check for end of image
997		if (ord($marker) == 0xd9)
998		{
999			Log("JPEGCollectFileParts: saw end of image marker");
1000			$end .= pack("CC", 0xff, ord($marker));
1001			goto doneScanning;
1002		}
1003
1004		# Check for start of compressed data
1005		if (ord($marker) == 0xda)
1006		{
1007			Log("JPEGCollectFileParts: saw start of compressed data");
1008			$end .= pack("CC", 0xff, ord($marker));
1009			goto doneScanning;
1010		}
1011
1012		my $partdata;
1013		if (JPEGSkipVariable($handle, \$partdata) == 0)
1014		{ $error = "JPEGSkipVariable failed";
1015		  Log($error); return 0; }
1016
1017		# Take all parts aside from APP13, which we'll replace
1018		# ourselves.
1019		if ($discardAppParts && ord($marker) >= 0xe0 && ord($marker) <= 0xef)
1020		{
1021			# Skip all application markers, including Adobe parts
1022			undef $adobeParts;
1023		}
1024		elsif (ord($marker) == 0xed)
1025		{
1026			# Collect the adobe stuff from part 13
1027			$adobeParts = CollectAdobeParts($partdata);
1028			goto doneScanning;
1029		}
1030		else
1031		{
1032			# Append all other parts to start section
1033			$start .= pack("CC", 0xff, ord($marker));
1034			$start .= pack("n", length($partdata) + 2);
1035			$start .= $partdata;
1036		}
1037
1038		$marker = JPEGNextMarker($handle);
1039	}
1040
1041  doneScanning:
1042
1043	#
1044	# Append rest of file to $end
1045	#
1046	my $buffer;
1047
1048	while ($handle->read($buffer, 16384))
1049	{
1050		$end .= $buffer;
1051	}
1052
1053	return [$start, $end, $adobeParts];
1054}
1055
1056#
1057# CollectAdobeParts
1058#
1059# Part APP13 contains yet another markup format, one defined by Adobe.
1060# See "File Formats Specification" in the Photoshop SDK (avail from
1061# www.adobe.com). We must take everything but the IPTC data so that
1062# way we can write the file back without losing everything else
1063# Photoshop stuffed into the APP13 block.
1064#
1065sub CollectAdobeParts
1066{
1067	my ($data) = @_;
1068	my $length = length($data);
1069	my $offset = 0;
1070	my $out = '';
1071
1072	# Skip preamble
1073	$offset = length('Photoshop 3.0 ');
1074
1075	# Process everything
1076	while ($offset < $length)
1077	{
1078		# Get OSType and ID
1079		my ($ostype, $id1, $id2) = unpack("NCC", substr($data, $offset, 6));
1080		last unless (($offset += 6) < $length); # $offset += 6;
1081
1082		# printf("CollectAdobeParts: ID %2.2x %2.2x\n", $id1, $id2);
1083
1084		# Get pascal string
1085		my ($stringlen) = unpack("C", substr($data, $offset, 1));
1086		last unless (++$offset < $length); # $offset += 1;
1087
1088		# printf("CollectAdobeParts: str len %d\n", $stringlen);
1089
1090		my $string = substr($data, $offset, $stringlen);
1091		$offset += $stringlen;
1092		# round up if odd
1093		$offset++ if ($stringlen % 2 != 0);
1094		# there should be a null if string len is 0
1095		$offset++ if ($stringlen == 0);
1096		last unless ($offset < $length);
1097
1098		# Get variable-size data
1099		my ($size) = unpack("N", substr($data, $offset, 4));
1100		last unless (($offset += 4) < $length);  # $offset += 4;
1101
1102		# printf("CollectAdobeParts: size %d\n", $size);
1103
1104		my $var = substr($data, $offset, $size);
1105		$offset += $size;
1106		$offset++ if ($size % 2 != 0); # round up if odd
1107
1108		# skip IIM data (0x0404), but write everything else out
1109		unless ($id1 == 4 && $id2 == 4)
1110		{
1111			$out .= pack("NCC", $ostype, $id1, $id2);
1112			$out .= pack("C", $stringlen);
1113			$out .= $string;
1114			$out .= pack("C", 0) if ($stringlen == 0 || $stringlen % 2 != 0);
1115			$out .= pack("N", $size);
1116			$out .= $var;
1117			$out .= pack("C", 0) if ($size % 2 != 0 && length($out) % 2 != 0);
1118		}
1119	}
1120
1121	return $out;
1122}
1123
1124#
1125# PackedIIMData
1126#
1127# Assembles and returns our _data and _listdata into IIM format for
1128# embedding into an image.
1129#
1130sub PackedIIMData
1131{
1132	my $self = shift;
1133	my $out;
1134
1135	# First, we need to build a mapping of datanames to dataset
1136	# numbers if we haven't already.
1137	unless (scalar(keys %datanames))
1138	{
1139		foreach my $dataset (keys %datasets)
1140		{
1141			my $dataname = $datasets{$dataset};
1142			$datanames{$dataname} = $dataset;
1143		}
1144	}
1145
1146	# Ditto for the lists
1147	unless (scalar(keys %listdatanames))
1148	{
1149		foreach my $dataset (keys %listdatasets)
1150		{
1151			my $dataname = $listdatasets{$dataset};
1152			$listdatanames{$dataname} = $dataset;
1153		}
1154	}
1155
1156	# Print record version
1157	# tag - record - dataset - len (short) - 2 (short)
1158	$out .= pack("CCCnn", 0x1c, 2, 0, 2, 2);
1159
1160	# Iterate over data sets
1161	foreach my $key (keys %{$self->{_data}})
1162	{
1163		my $dataset = $datanames{$key};
1164		my $value   = $self->{_data}->{$key};
1165
1166		if ($dataset == 0)
1167		{ Log("PackedIIMData: illegal dataname $key"); next; }
1168
1169        next unless $value;
1170
1171		my ($tag, $record) = (0x1c, 0x02);
1172
1173		$out .= pack("CCCn", $tag, $record, $dataset, length($value));
1174		$out .= $value;
1175	}
1176
1177	# Do the same for list data sets
1178	foreach my $key (keys %{$self->{_listdata}})
1179	{
1180		my $dataset = $listdatanames{$key};
1181
1182		if ($dataset == 0)
1183		{ Log("PackedIIMData: illegal dataname $key"); next; }
1184
1185		foreach my $value (@{$self->{_listdata}->{$key}})
1186		{
1187		    next unless $value;
1188
1189			my ($tag, $record) = (0x1c, 0x02);
1190
1191			$out .= pack("CCCn", $tag, $record, $dataset, length($value));
1192			$out .= $value;
1193		}
1194	}
1195
1196	return $out;
1197}
1198
1199#
1200# PhotoshopIIMBlock
1201#
1202# Assembles the blob of Photoshop "resource data" that includes our
1203# fresh IIM data (from PackedIIMData) and the other Adobe parts we
1204# found in the file, if there were any.
1205#
1206sub PhotoshopIIMBlock
1207{
1208	my ($self, $otherparts, $data) = @_;
1209	my $resourceBlock;
1210	my $out;
1211
1212	$resourceBlock .= "Photoshop 3.0";
1213	$resourceBlock .= pack("C", 0);
1214	# Photoshop identifier
1215	$resourceBlock .= "8BIM";
1216	# 0x0404 is IIM data, 00 is required empty string
1217	$resourceBlock .= pack("CCCC", 0x04, 0x04, 0, 0);
1218	# length of data as 32-bit, network-byte order
1219	$resourceBlock .= pack("N", length($data));
1220	# Now tack data on there
1221	$resourceBlock .= $data;
1222	# Pad with a blank if not even size
1223	$resourceBlock .= pack("C", 0) if (length($data) % 2 != 0);
1224	# Finally tack on other data
1225	$resourceBlock .= $otherparts if defined($otherparts);
1226
1227	$out .= pack("CC", 0xff, 0xed); # JPEG start of block, APP13
1228	$out .= pack("n", length($resourceBlock) + 2); # length
1229	$out .= $resourceBlock;
1230
1231	return $out;
1232}
1233
1234#######################################################################
1235# Helpers, docs
1236#######################################################################
1237
1238#
1239# Log: just prints a message to STDERR if $debugMode is on.
1240#
1241sub Log
1242{
1243	if ($debugMode)
1244	{ my $message = shift; print STDERR "**IPTC** $message\n"; }
1245}
1246
1247#
1248# HexDump
1249#
1250# Very helpful when debugging.
1251#
1252sub HexDump
1253{
1254	my $dump = shift;
1255	my $len  = length($dump);
1256	my $offset = 0;
1257	my ($dcol1, $dcol2);
1258
1259	while ($offset < $len)
1260	{
1261		my $temp = substr($dump, $offset++, 1);
1262
1263		my $hex = unpack("H*", $temp);
1264		$dcol1 .= " " . $hex;
1265		if (ord($temp) >= 0x21 && ord($temp) <= 0x7e)
1266		{ $dcol2 .= " $temp"; }
1267		else
1268		{ $dcol2 .= " ."; }
1269
1270		if ($offset % 16 == 0)
1271		{
1272			print STDERR $dcol1 . " | " . $dcol2 . "\n";
1273			undef $dcol1; undef $dcol2;
1274		}
1275	}
1276
1277	if (defined($dcol1) || defined($dcol2))
1278	{
1279		print STDERR $dcol1 . " | " . $dcol2 . "\n";
1280		undef $dcol1; undef $dcol2;
1281	}
1282}
1283
1284#
1285# JPEGDebugScan
1286#
1287# Also very helpful when debugging.
1288#
1289sub JPEGDebugScan
1290{
1291	my $filename = shift;
1292	my $handle = IO::File->new($filename);
1293	$handle or die "Can't open $filename: $!";
1294
1295	# Skip past start of file marker
1296	my ($ff, $soi);
1297	$handle->read($ff, 1) || return 0;
1298	$handle->read($soi, 1);
1299
1300	unless (ord($ff) == 0xff && ord($soi) == 0xd8)
1301	{
1302		Log("JPEGScan: invalid start of file");
1303		goto done;
1304	}
1305
1306	# scan to 0xDA (start of scan), dumping the markers we see between
1307	# here and there.
1308	my $marker = JPEGNextMarker($handle);
1309
1310	while (ord($marker) != 0xda)
1311	{
1312		if (ord($marker) == 0)
1313		{ Log("Marker scan failed"); goto done; }
1314
1315		if (ord($marker) == 0xd9)
1316		{Log("Marker scan hit end of image marker"); goto done; }
1317
1318		if (JPEGSkipVariable($handle) == 0)
1319		{ Log("JPEGSkipVariable failed"); return 0; }
1320
1321		$marker = JPEGNextMarker($handle);
1322	}
1323
1324done:
1325	$handle->close();
1326}
1327
1328# sucessful package load
13291;
1330
1331__END__
1332
1333=head1 NAME
1334
1335Image::IPTCInfo - Perl extension for extracting IPTC image meta-data
1336
1337=head1 SYNOPSIS
1338
1339  use Image::IPTCInfo;
1340
1341  # Create new info object
1342  my $info = new Image::IPTCInfo('file-name-here.jpg');
1343
1344  # Check if file had IPTC data
1345  unless (defined($info)) { die Image::IPTCInfo::Error(); }
1346
1347  # Get list of keywords, supplemental categories, or contacts
1348  my $keywordsRef = $info->Keywords();
1349  my $suppCatsRef = $info->SupplementalCategories();
1350  my $contactsRef = $info->Contacts();
1351
1352  # Get specific attributes...
1353  my $caption = $info->Attribute('caption/abstract');
1354
1355  # Create object for file that may or may not have IPTC data.
1356  $info = create Image::IPTCInfo('file-name-here.jpg');
1357
1358  # Add/change an attribute
1359  $info->SetAttribute('caption/abstract', 'Witty caption here');
1360
1361  # Save new info to file
1362  ##### See disclaimer in 'SAVING FILES' section #####
1363  $info->Save();
1364  $info->SaveAs('new-file-name.jpg');
1365
1366=head1 DESCRIPTION
1367
1368Ever wish you add information to your photos like a caption, the place
1369you took it, the date, and perhaps even keywords and categories? You
1370already can. The International Press Telecommunications Council (IPTC)
1371defines a format for exchanging meta-information in news content, and
1372that includes photographs. You can embed all kinds of information in
1373your images. The trick is putting it to use.
1374
1375That's where this IPTCInfo Perl module comes into play. You can embed
1376information using many programs, including Adobe Photoshop, and
1377IPTCInfo will let your web server -- and other automated server
1378programs -- pull it back out. You can use the information directly in
1379Perl programs, export it to XML, or even export SQL statements ready
1380to be fed into a database.
1381
1382=head1 USING IPTCINFO
1383
1384Install the module as documented in the README file. You can try out
1385the demo program called "demo.pl" which extracts info from the images
1386in the "demo-images" directory.
1387
1388To integrate with your own code, simply do something like what's in
1389the synopsys above.
1390
1391The complete list of possible attributes is given below. These are as
1392specified in the IPTC IIM standard, version 4. Keywords and categories
1393are handled differently: since these are lists, the module allows you
1394to access them as Perl lists. Call Keywords() and Categories() to get
1395a reference to each list.
1396
1397=head2 NEW VS. CREATE
1398
1399You can either create an object using new() or create():
1400
1401  $info = new Image::IPTCInfo('file-name-here.jpg');
1402  $info = create Image::IPTCInfo('file-name-here.jpg');
1403
1404new() will create a new object only if the file had IPTC data in it.
1405It will return undef otherwise, and you can check Error() to see what
1406the reason was. Using create(), on the other hand, always returns a
1407new IPTCInfo object if there was data or not. If there wasn't any IPTC
1408info there, calling Attribute() on anything will just return undef;
1409i.e. the info object will be more-or-less empty.
1410
1411If you're only reading IPTC data, call new(). If you want to add or
1412change info, call create(). Even if there's no useful stuff in the
1413info object, you can then start adding attributes and save the file.
1414That brings us to the next topic....
1415
1416=head2 MODIFYING IPTC DATA
1417
1418You can modify IPTC data in JPEG files and save the file back to
1419disk. Here are the commands for doing so:
1420
1421  # Set a given attribute
1422  $info->SetAttribute('iptc attribute here', 'new value here');
1423
1424  # Clear the keywords or supp. categories list
1425  $info->ClearKeywords();
1426  $info->ClearSupplementalCategories();
1427  $info->ClearContacts();
1428
1429  # Add keywords or supp. categories
1430  $info->AddKeyword('frob');
1431
1432  # You can also add a list reference
1433  $info->AddKeyword(['frob', 'nob', 'widget']);
1434
1435=head2 SAVING FILES
1436
1437With JPEG files you can add/change attributes, add keywords, etc., and
1438then call:
1439
1440  $info->Save();
1441  $info->SaveAs('new-file-name.jpg');
1442
1443This will save the file with the updated IPTC info. Please only run
1444this on *copies* of your images -- not your precious originals! --
1445because I'm not liable for any corruption of your images. (If you read
1446software license agreements, nobody else is liable, either. Make
1447backups of your originals!)
1448
1449If you're into image wizardry, there are a couple handy options you
1450can use on saving. One feature is to trash the Adobe block of data,
1451which contains IPTC info, color settings, Photoshop print settings,
1452and stuff like that. The other is to trash all application blocks,
1453including stuff like EXIF and FlashPix data. This can be handy for
1454reducing file sizes. The options are passed as a hashref to Save() and
1455SaveAs(), e.g.:
1456
1457  $info->Save({'discardAdobeParts' => 'on'});
1458  $info->SaveAs('new-file-name.jpg', {'discardAppParts' => 'on'});
1459
1460Note that if there was IPTC info in the image, or you added some
1461yourself, the new image will have an Adobe part with only the IPTC
1462information.
1463
1464=head2 XML AND SQL EXPORT FEATURES
1465
1466IPTCInfo also allows you to easily generate XML and SQL from the image
1467metadata. For XML, call:
1468
1469  $xml = $info->ExportXML('entity-name', \%extra-data,
1470                          'optional output file name');
1471
1472This returns XML containing all image metadata. Attribute names are
1473translated into XML tags, making adjustments to spaces and slashes for
1474compatibility. (Spaces become underbars, slashes become dashes.) You
1475provide an entity name; all data will be contained within this entity.
1476You can optionally provides a reference to a hash of extra data. This
1477will get put into the XML, too. (Example: you may want to put info on
1478the image's location into the XML.) Keys must be valid XML tag names.
1479You can also provide a filename, and the XML will be dumped into
1480there. See the "demo.pl" script for examples.
1481
1482For SQL, it goes like this:
1483
1484  my %mappings = (
1485       'IPTC dataset name here' => 'your table column name here',
1486       'caption/abstract'       => 'caption',
1487       'city'                   => 'city',
1488       'province/state'         => 'state); # etc etc etc.
1489
1490  $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data);
1491
1492This returns a SQL statement to insert into your given table name a
1493set of values from the image. You pass in a reference to a hash which
1494maps IPTC dataset names into column names for the database table. As
1495with XML export, you can also provide extra information to be stuck
1496into the SQL.
1497
1498=head1 IPTC ATTRIBUTE REFERENCE
1499
1500  object name               originating program
1501  edit status               program version
1502  editorial update          object cycle
1503  urgency                   by-line
1504  subject reference         by-line title
1505  category                  city
1506  fixture identifier        sub-location
1507  content location code     province/state
1508  content location name     country/primary location code
1509  release date              country/primary location name
1510  release time              original transmission reference
1511  expiration date           headline
1512  expiration time           credit
1513  special instructions      source
1514  action advised            copyright notice
1515  reference service         contact
1516  reference date            caption/abstract
1517  reference number          local caption
1518  date created              writer/editor
1519  time created              image type
1520  digital creation date     image orientation
1521  digital creation time     language identifier
1522
1523  custom1 - custom20: NOT STANDARD but used by Fotostation.
1524  IPTCInfo also supports these fields.
1525
1526=head1 KNOWN BUGS
1527
1528IPTC meta-info on MacOS may be stored in the resource fork instead
1529of the data fork. This program will currently not scan the resource
1530fork.
1531
1532I have heard that some programs will embed IPTC info at the end of the
1533file instead of the beginning. The module will currently only look
1534near the front of the file. If you have a file with IPTC data that
1535IPTCInfo can't find, please contact me! I would like to ensure
1536IPTCInfo works with everyone's files.
1537
1538=head1 AUTHOR
1539
1540Josh Carter, josh@multipart-mixed.com
1541
1542=head1 SEE ALSO
1543
1544perl(1).
1545
1546=cut
1547