1###########################################################
2# A Perl package for showing/modifying JPEG (meta)data.   #
3# Copyright (C) 2004,2005,2006 Stefano Bettelli           #
4# See the COPYING and LICENSE files for license terms.    #
5###########################################################
6use Image::MetaData::JPEG::data::Tables qw(:TagsAPP1_XMP);
7no  integer;
8use strict;
9use warnings;
10
11###########################################################
12# This method is the entry point for APP1 XMP segments.   #
13# Such APP1 segments are used by Adobe for recording an   #
14# XMP packet in JPEG files (this is a special XML block   #
15# storing metadata information, similarly to Exif APP1 or #
16# IPTC APP13). The advantage of XMP is that it is exten-  #
17# sible and that it can be embedded in many file types,   #
18# like JPEG, PNG, GIF, HTML, PDF, PostScript, ecc...      #
19# Only the envelope changes. The format is the following: #
20#---------------------------------------------------------#
21# 29 bytes  namespace = http://ns.adobe.com/xap/1.0/\000  #
22#  ....     XMP packet (in some Unicode encoding)         #
23#=========================================================#
24# First, check that the mandatory Adobe namespace string  #
25# is there. Then, parse the XML and save the intermediate #
26# results. Last, Check that the XML block conforms to the #
27# RDF and XMP specifications (issue an error otherwise).  #
28###########################################################
29# Ref: "XMP Specification", version 3.2, June 2005, Adobe #
30#      Systems Inc., San Jose, CA, http://www.adobe.com   #
31###########################################################
32sub parse_app1_xmp {
33    my ($this) = @_;
34    # slurp the segment as a single string
35    my $packet = $this->read_record($ASCII, 0, $this->size());
36    # get rid of newline chars
37    $packet =~ y/\n\r//d;
38    # the ID must be Adobe's namespace; die if it is not correct
39    $packet =~ s/^($APP1_XMP_TAG|.{0,15})(.*)$/$2/;
40    $this->die("Incorrect XMP namespace ($1)") unless $1 eq $APP1_XMP_TAG;
41    $this->store_record('NAMESPACE', $ASCII, \ "$1");
42    # (TODO): find the used Unicode encoding and deal with it
43    use Encode; Encode::_utf8_on($packet);
44    # analyse the XML packet (this cannot fail)
45    $this->parse_xml_string(\ $packet); # writes into $this->{private_list}
46    #print join '::', @$_, "\n" for @{$this->{private_list}};
47    # check header (xpacket, x:x[am]pmeta and the outer rdf:RDF)
48    $this->test_xmp_header();
49    # test that XMP syntax is correct; [Dlist(ABOUT)] := [Desc(ABOUT)]+
50    $this->parse_rdf_description()
51	while $this->list_equal(['OPEN', 'rdf:Description']);
52    # cleanup
53    delete $this->{private_list};
54}
55
56###########################################################
57# This private method runs a series of regular expression #
58# match tests against the private list (starting at posi- #
59# tion $offset). $regexps_array is either a reference to  #
60# a list of references to regexp rules, or a reference to #
61# a single such list. A regexp rule consists of a list of #
62# regular express.s and variables to assign submatches to.#
63###########################################################
64sub list_equal {
65    my ($this, $regexps_array, $offset) = (@_, 0);
66    # convert a single rule into a list of rules
67    $regexps_array = [$regexps_array] unless ref $$regexps_array[0] eq 'ARRAY';
68    # check each rule separately, return as soon as possible
69    for my $pos ($offset..$offset + $#$regexps_array) {
70	return 0 unless exists $this->{private_list}->[$pos];
71	# do not modify the private list for the time being
72	my $elements = [ @{$this->{private_list}->[$pos]} ];
73	my $regexps  = $regexps_array->[$pos];
74	while (@{$regexps}) {
75	    return 0 unless @$elements;
76	    my ($e, $r) = (shift(@$elements), shift(@$regexps));
77	    my @matches = $e =~ /^$r$/; return 0 unless @matches;
78	    ${shift @$regexps} = shift @matches while ref $$regexps[0]; } }
79    return 1 + $#$regexps_array; }
80
81###########################################################
82# This private method is almost the same as list_equal,   #
83# but, if the match is positive, it also removes matching #
84# lines from the private list.                            #
85###########################################################
86sub list_extract {
87    my ($this, $regexps_array, $offset, $number) = (@_, 0);
88    my $lines = $this->list_equal($regexps_array, $offset) || return 0;
89    splice @{$this->{private_list}}, $offset, $lines; return 1; }
90
91###########################################################
92# Private method for saving a piece of information into   #
93# the private list (always undefined type). Arguments are:#
94# $pdir --> (list ref) identifies a subdirectory          #
95# $name --> of the Record to be saved                     #
96# $value --> content to be saved in the Record            #
97# $extra --> optonal info for {extra} field of a Record   #
98###########################################################
99sub store_xmp_value {
100    my ($this, $pdir, $name, $value, $extra) = @_;
101    my $rec = $this->store_record
102	($this->provide_subdirectory(@$pdir), $name, $UNDEF, \$value);
103    $rec->{extra} = $extra if $extra; }
104
105###########################################################
106# Private method for the extracting a list of attributes  #
107# and saving them in the private list; the arguments are: #
108# $pdir --> (list ref) identifies a subdirectory          #
109# $regexp --> to match the attribute name against         #
110# $extra --> info for the {extra} field of a Record       #
111###########################################################
112sub extract_attributes {
113    my ($this, $pdir, $regexp, $extra) = @_; my ($name, $value, %summary)= ();
114    $this->store_xmp_value($pdir, $name, $value, $extra),
115    $summary{$name} = $value while $this->list_extract
116	(['ATTRIBUTE', $regexp, \$name, '(.*)', \$value]);
117    return \ %summary; }
118
119###########################################################
120# This private method parses a generic XML string and     #
121# writes its findings in an array of array references.    #
122# Each sublist in the main list starts with a sublist     #
123# type, which can be OPEN, OPEN_ABBR, OPEN_SPECIAL,       #
124# ATTRIBUTE, COMMENT, CONTENT or CLOSE. The parsing algo- #
125# rithm is my current understanding of what XML is .....  #
126# ------------------------------------------------------- #
127# Spaces before a tag are not meaningful, but they cannot #
128# be thrown away before textual values. Keeping track of  #
129# this condition is the reason for the $f flag.           #
130###########################################################
131sub parse_xml_string {
132    my ($this, $string) = @_;
133    # initialisation of this private, intermediate list
134    $this->{private_list} = [] unless exists $this->{private_list};
135    # some variables and their initialisation
136    my $mkp_tag = qr/[\w:-]+/o; my $spaces; my $f = 0;
137    # how to push a new list of strings onto the private list
138    my $lpush = sub { push @{$this->{private_list}}, [@_] };
139    # how to extract the attribute list of a tag
140    my $apush = sub { my ($p) = @_; &$lpush('ATTRIBUTE', $1, $3) while $p
141			  =~ s/^\s*($mkp_tag)=([\'\"])([^\'\"]*)\2//o;
142		      &$lpush('IMPOSSIBLE', $p) if $p; };
143  PARSE_LOOP:
144    # extract spaces at the beginning (they are important for content!)
145    $$string =~ s/^(\s*)//o; $spaces = $1 || '';
146    # try to speed regular expressions up by lookint at the
147    # first two characters of the current string
148    if (substr($$string, 0, 1) eq '<') {
149	my $s = substr($$string, 1, 1);
150	# extract a closing markup
151	if ($s eq '/' && $$string =~ s/^<\/($mkp_tag)>//o) {
152	    &$lpush('CONTENT', $spaces) if $f; $f=0; &$lpush('CLOSE', $1); }
153	# extract a comment, if present ( <!-- comment --> )
154	elsif ($s eq '!' && $$string =~ s/^<!-- *(.*?) *-->//o) {
155	    &$lpush('COMMENT', $1); $f=0; }
156	# extract header tags ( <?some:thing val='1'?> ) + attributes
157	elsif ($s eq '?' && $$string =~ s/^<\?($mkp_tag) ?([^\?]*?)\?>//o) {
158	    &$lpush('OPEN_SPECIAL', $1); &$apush($2) if $2; $f=0; }
159	# extract an opening markup with or without attributes
160	# extract also self-contained tags ( <.... /> ), (not closing)
161	elsif ($$string =~ s/^<($mkp_tag) ?([^\?]*?)(\/?)>//o) {
162	    &$lpush($3 ? 'OPEN_ABBR' : 'OPEN', $1); &$apush($2) if $2;
163	    $3 ? &$lpush ('CLOSE_ABBR') : $f = 1; }
164	# an impossible case
165	else { &$lpush('IMPOSSIBLE', $$string) if $string; $$string = ""; }
166	# extract content (spaces are important ...)
167    } else { $$string =~ s/^([^<]+)//o; &$lpush('CONTENT', $spaces.$1); $f=0; }
168    # parse the rest of the string
169    $$string ? goto PARSE_LOOP : return;
170}
171
172###########################################################
173# Framework for the XMP packet. The packet content is     #
174# sandwiched between a header and a trailer, and may      #
175# contain padding whitespaces at the end. The 'xpacket'   #
176# header has two mandatory attributes, 'begin' and 'id'   #
177# (order is important), separated by exactly one space.   #
178# Attribute values, here and in the following, are enclo- #
179# sed by single quotes or double quotes. The value of     #
180# 'begin' must be the Unicode "zero-width non-breaking    #
181# space" (U+FEFF); an empty value is also acceptable (for #
182# backward compatibility), and means UTF-8. The value of  #
183# 'id' is fixed. Other attributes may be ignored. A pad-  #
184# ding of 2KB or 4KB, with a newline every 100 spaces, is #
185# recommended. The 'end' attribute of the trailer may     #
186# have a value of "r" (read-only) or "w" (modifiable).    #
187# ------------------------------------------------------- #
188# The structure of the packet content is as follows.      #
189# There is an optional x:xmpmeta (or x:xapmeta for older  #
190# files) element, with a mandatory xmlns:x attribute set  #
191# to "adobe:ns:meta/" and other optional attributes,      #
192# which can be ignored. Inside it (or at top level, if it #
193# is absent), there is exactly one rdf:RDF element with   #
194# an attribute specifying the xmlns:rdf namespace (other  #
195# namespaces can be listed here as additional attributes).#
196# Inside the 'rdf:RDF' element then, all XMP properties   #
197# are stored inside one or more rdf:Description element.  #
198# ------------------------------------------------------- #
199# <?xpacket begin="..." id="...XMP id ..." ...?>          #
200#   <x:xmpmeta xmlns:x='adobe:ns:meta/' ..attributes..>   #
201#     <rdf:RDF xmlns:rdf="...URI...">                     #
202#       [rdf:Description]+                                #
203#     </rdf:RDF>                                          #
204#   </x:xmpmeta>                                          #
205#   ... padding with XML whitespaces ...                  #
206# <?xpacket end="w"?>                                     #
207###########################################################
208sub test_xmp_header {
209    my ($this) = @_;
210    my ($rw, $filter, $f1, $f2, $meta, $ns, $URI) = ();
211    # search for <?xpacket begin="..." id="...XMP id ...";
212    $this->list_extract(['OPEN_SPECIAL', 'xpacket'])
213	|| $this->die('XMP not starting with "xpacket"');
214    $this->list_extract(['ATTRIBUTE', 'begin', $APP1_XMP_XPACKET_BEGIN])
215	|| $this->die('XMP xpacket-begin not zero-width Unicode space');
216    $this->list_extract(['ATTRIBUTE', 'id', $APP1_XMP_XPACKET_ID])
217	|| $this->die('XMP xpacket-id not correct');
218    # extract all additional attributes in the opening tag
219    $this->extract_attributes(['XMP_HEADER'], '(.*)', 'xpacket');
220    # search for <?xpacket end="w|r"?> at the end
221    $this->list_extract(['ATTRIBUTE', 'end', '(w|r)', \$rw], -1)
222	|| $this->die('XMP xpacket end attribute not found');
223    $this->list_extract(['OPEN_SPECIAL', 'xpacket'], -1) # OPEN, not CLOSE ...
224	|| $this->die('XMP not ending with "xpacket"');
225    $this->store_xmp_value(['XMP_HEADER'], 'xpacket-rw', $rw);
226    # extract additional filters (are these undocumented?)
227    while ($this->list_extract(['OPEN_SPECIAL', '(.*)', \$filter])) {
228	$this->list_extract(['ATTRIBUTE', '(.*)', \$f1, '(.*)', \$f2]);
229	$this->store_xmp_value(['XMP_HEADER'], $filter, "$f1=\"$f2\""); }
230    # take care of the xmpmeta/xapmeta tags, if present
231    $this->list_extract(['OPEN', '(x:x[am]pmeta)', \$meta]) || goto NO_XMPMETA;
232    $this->store_xmp_value(['XMP_HEADER'], 'meta', $meta);
233    $this->list_extract(['CLOSE', $meta], -1)
234	|| $this->die('XMP x:x[am]pmeta not closing');
235    $this->list_extract(['ATTRIBUTE', 'xmlns:x', $APP1_XMP_META_NS])
236	|| $this->die('XMP x:x[am]pmeta without namespace');
237    $this->extract_attributes(['XMP_HEADER'], '(.*)', 'meta');
238  NO_XMPMETA:
239    # take care of the outer rdf:RDF and its namespace
240    $this->list_extract(['OPEN', 'rdf:RDF'])
241	|| $this->die('Outer rdf:RDF not found');
242    $this->list_extract(['ATTRIBUTE', 'xmlns:rdf', $APP1_XMP_OUTER_RDF_NS])
243	|| $this->die('Namespace not correct/found in outer rdf:RDF');
244    $this->list_extract(['CLOSE', 'rdf:RDF'], -1)
245	|| $this->die('Outer rdf:RDF not closing');
246    # save additional namespaces if present (undocumented?)
247    $this->extract_attributes(['SCHEMAS'], 'xmlns:(.*)', 'rdf:RDF');
248    # extract all rdf:about and check that they are the same
249    # (sometimes 'rdf:' is missing, how should I treat this case?)
250    my @abouts = map { $$_[2] } grep { $$_[1] =~ /(rdf:|)about/ }
251                 grep { $$_[0] eq 'ATTRIBUTE' } @{$this->{private_list}};
252    $this->die("Inconsistent rdf:about's") if grep { $_ ne $abouts[0]} @abouts;
253    $this->store_xmp_value(['XMP_HEADER'], 'rdf:about', $abouts[0]);
254}
255
256###########################################################
257# Description elements: rdf:Description elements and XMP  #
258# schemas are usually in one-to-one correspondence. Each  #
259# element has two mandatory attributes, 'rdf:about' and   #
260# 'xmlns:NAME'. 'rdf:about' is usually empty (however, it #
261# can contain an application specific URI), and its value #
262# *must* be shared among all rdf:Description elements.    #
263# 'xmlns:NAME' specifies the local namespace prefix (NAME #
264# stands for the actual prefix). Additional namespaces    #
265# can be specified via 'xmlns' attributes.                #
266# ------------------------------------------------------- #
267# [rdf:Description] := <rdf:Description rdf:About='ABOUT' #
268#                           xmlns:NAME='text' ..ns..>     #
269#                         [property(NAME)]+               #
270#                      </rdf:Description>                 #
271# ------------------------------------------------------- #
272# There exists also an abbreviated form where properties  #
273# are listed as attributes of the rdf:Description tag (in #
274# this case there is no closing rdf:Description> tag, and #
275# the opening tags ends with the '/' character).          #
276# ------------------------------------------------------- #
277# [rdf:Description] := <rdf:Description rdf:About='ABOUT' #
278#                    xmlns:NAME='text' [inlineP(NAME)]+/> #
279# [inlineP(NAME)] := "NAME:name='value'"                  #
280###########################################################
281sub parse_rdf_description {
282    my ($this) = @_; my ($type, $ns) = ();
283    # extract description opening ($type is OPEN or OPEN_ABBR)
284    $this->list_extract(['(OPEN.*)', \$type, 'rdf:Description']) ||
285	$this->die('first-level rdf:Description opening tag not found');
286    # mandatory rdf:about attribute (its value is already checked)
287    $this->list_extract(['ATTRIBUTE', '(rdf:|)about', '.*'])
288	|| $this->die('rdf:about failure (missing or inconsistent)');
289    # mandatory main namespace in xmlns:abbreviation
290    $this->list_equal(['ATTRIBUTE', 'xmlns:.*', '.*'])
291	|| $this->die('rdf:Description namespace not found');
292    # extract all additional namespaces (and find the secondary one)
293    # the exact meaning of this operation is to be clarified (TODO)
294    my $nss = $this->extract_attributes(['SCHEMAS'], 'xmlns:(.*)');
295    do { $ns = $_ if $$nss{$_}!~ /\#$/ && ! defined $ns } for keys %$nss;
296    # if $type is OPEN_ABBR, all simple properties are attributes
297    $this->extract_attributes(['PROPERTIES'], '(.*)', 'abbr'), return
298	if $type eq 'OPEN_ABBR';
299    # some rdf:Description's are there only as placeholders (only empty
300    # content) --> do not try to extract properties in this case. In
301    # the general case, parse all properties in this rdf:Description
302    unless ($this->list_extract(['CONTENT', '\s*'])) {
303	$this->parse_rdf_property($ns, ['PROPERTIES'])
304	    while ! $this->list_equal(['CLOSE', 'rdf:Description']); }
305    # parse the close tag of rdf:Description
306    $this->list_extract(['CLOSE', 'rdf:Description'])
307	|| $this->die('first-level rdf:Description closing tag not found');
308    1 }
309
310###########################################################
311# This private method is a dispatcher for the abstract    #
312# concept of XMP property. Actual properties are either   #
313# simple or structured or they are array properties.      #
314# ------------------------------------------------------- #
315# [property(NAME)] := [simpleP(NAME)]                     #
316#                  or [structuredP(NAME)]                 #
317#                  or [arrayP(NAME)]                      #
318###########################################################
319sub parse_rdf_property {
320    my ($this, $ns, $pdir) = @_;
321    $this->parse_comment                ($ns, $pdir) ||
322	$this->parse_rdf_simple_property($ns, $pdir) ||
323	$this->parse_rdf_struct_property($ns, $pdir) ||
324	$this->parse_rdf_array_property ($ns, $pdir) ||
325	$this->die('parse_rdf_property: unhandled case');
326    1 }
327
328###########################################################
329# Comments: this is undocumented in the XMP manual by     #
330# Adobe, but there is evidence that some properties may   #
331# be replaced by a comment, usually carrying its name.    #
332# ------------------------------------------------------- #
333# [comment] := <!-- this is a comment -->                 #
334###########################################################
335sub parse_comment {
336    my ($this, $ns, $pdir) = @_; my $comment = '';
337    return 0 unless $this->list_extract(['COMMENT', '(.*)', \$comment]);
338    $this->store_xmp_value($pdir, "$ns:COMMENT", $comment);
339    1 }
340
341###########################################################
342# Simple properties: a simple property is usually just    #
343# some literal value between opening and closing tags     #
344# carrying the property name; it can have qualifiers      #
345# (attributes). Just to make things easier, it seems that #
346# there is the (undocumented) possibility of replacing    #
347# the property value (text) with a sequence of general    #
348# properties (i.e., a clone of a structured property ...) #
349# ------------------------------------------------------- #
350# [simpleP(NAME)] := <NAME:name [qualifier]*>text</NAME:name>
351#                 or <NAME:name [qualifier]*>[property(name)]+</NAME:name>
352# [qualifier] := "name:pnam='text'"                       #
353###########################################################
354sub parse_rdf_simple_property {
355    my ($this, $ns, $pdir) = @_; my ($name, $n, $content, $v) = ();
356    # try to match structure and return on failure; indeed, it
357    # is difficult to "match" a simple property, so, we try to
358    # exclude all other cases here ...
359    return 0 if $this->list_equal([['OPEN', '.*'], ['OPEN', 'rdf:.*']]);
360    # extract the opening tag with the property name
361    $this->list_extract(['OPEN', "($ns:.*)", \$name])
362	|| $this->die('simple property: error at opening tag');
363    # property qualifiers not yet supported yet!! (TODO)
364    # case I: the value is simply text
365    if ($this->list_extract(['CONTENT', '(.*)', \$content])) {
366	$this->store_xmp_value($pdir, $name, $content); }
367    # case II: the "value" is a sequence of properties
368    # this is to be clarified .... (TODO)
369    else { push @$pdir, $name;
370	   $this->extract_attributes($pdir, '(.*)', 'ATTRIBUTE');
371	   $this->store_xmp_value($pdir, 'CONTENT', $v)
372	       while $this->list_extract(['CONTENT', '(.*)', \$v]);
373	   $this->parse_rdf_simple_property($ns, $pdir)
374	       while ! $this->list_equal(['CLOSE', "$name"]);
375	   pop @$pdir; }
376    # closing tag
377    $this->list_extract(['CLOSE', "$name"])
378	|| $this->die('simple property: error at closing tag');
379    1 }
380
381###########################################################
382# Structured properties: agglomerates of properties of    #
383# different type. The inner properties are stored inside  #
384# a secondary rdf:Description tag, which also contains a  #
385# secondary namespace definition, to be used by inner     #
386# properties. I hope this is all.                         #
387# ------------------------------------------------------- #
388# [structuredP(NAME)] := <NAME:name>                      #
389#                          <rdf:Description xmlns:N2="...">
390#                            [property(N2)]+              #
391#                          </rdf:Description>             #
392#                        </NAME:name>                     #
393###########################################################
394sub parse_rdf_struct_property {
395    my ($this, $ns, $pdir) = @_; my ($name, $ns_2, $ns_2_v) = ();
396    # try to match structure and return on failure
397    return 0 unless $this->list_extract
398	(['OPEN', "$ns:(.*)", \$name], ['OPEN', 'rdf:Description'],
399	 ['ATTRIBUTE', 'xmlns:(.*)', \$ns_2, '(.*)', \$ns_2_v]);
400    # store the property content
401    $this->store_xmp_value(['SCHEMAS'], $ns_2, $ns_2_v);
402    # get all embedded properties
403    $this->parse_rdf_property($ns_2, [@$pdir, $name])
404	while ! $this->list_equal(['CLOSE', $name]);
405    # find where tags are closing
406    $this->list_extract(['CLOSE', $name])
407	|| $this->die('structured property: error at closing tag');
408    1 }
409
410###########################################################
411# Array properties: rdf:Seq is for an ordered list of     #
412# properties, rdf:Bag for an unordered set of properties  #
413# and rdf:Alt for a list of alternatives. Items are most  #
414# often homogeneous, but this is not a rule. There is a   #
415# namespace problem for qualified items (TODO)            #
416# ------------------------------------------------------- #
417# [arrayP(NAME)] := <NAME:name>                           #
418#                     <rdf:[Bag|Seq|Alt]>                 #
419#                       [item]+                           #
420#                     </rdf:[Bag|Seq|Alt]>                #
421#                   </NAME:name>                          #
422# [item] := [simple_item] or [prop_item] or               #
423#              [qualif_item(N2)] or [lang_item]           #
424# ------------------------------------------------------- #
425# Note: a [lang_item] can be found only in an rdf:Alt,    #
426# and this rdf:Alt must in turn contain only [lang_item]  #
427# items, but this check is not yet implemented (TODO).    #
428###########################################################
429sub parse_rdf_array_property {
430    my ($this, $ns, $pdir) = @_; my ($name, $type) = ();
431    # try to match structure and return on failure
432    return 0 unless $this->list_extract
433	([['OPEN',"($ns:.*)",\$name], ['OPEN','(rdf:(Bag|Seq|Alt))',\$type]]);
434    # get all items in this array property
435    while (! $this->list_equal(['CLOSE', $type])) {
436	$this->parse_rdf_item          ([@$pdir, $name]) && next;
437	$this->parse_rdf_item_lang     ([@$pdir, $name]) && next;
438	$this->parse_rdf_item_property ([@$pdir, $name]) && next;
439	$this->parse_rdf_item_qualified([@$pdir, $name]) && next;
440	$this->die('parse_rdf_array_property: unhandled case'); }
441    # store the property type in the subdirectory
442    $this->search_record(@$pdir, $name)->{extra} = $type;
443    # find where tags are closing
444    $this->list_extract([['CLOSE', $type], ['CLOSE', "$name"]])
445	|| $this->die('array property: error at closing tag');
446    1 }
447
448###########################################################
449# Simple items: just text strings inside rdf:li tags. It  #
450# is the simplest case for rdf:Bag, rdf:Set and rdf:Alt   #
451# array properties. It does not need a subdirectory.      #
452# ------------------------------------------------------- #
453# [simple_item] := <rdf:li>text<rdf:li>                   #
454###########################################################
455sub parse_rdf_item {
456    my ($this, $pdir) = @_; my ($content) = ();
457    # try to match structure and return on failure
458    return 0 unless $this->list_extract
459	([['OPEN','rdf:li'],['CONTENT','(.*)',\$content],['CLOSE','rdf:li']]);
460    # store the property content
461    $this->store_xmp_value($pdir, 'ITEM', $content);
462    1 }
463
464###########################################################
465# Property items: these items contain another property    #
466# which is not simple text, e.g., a structured property   #
467# or an array property. Additional qualifiers can be spe- #
468# cified as attributes of the rdf:li tag. Such properties #
469# in general require their own subdirectories.            #
470# ------------------------------------------------------- #
471# [prop_item] := <rdf:li [qualifier]>[simplP(NAME)]<rdf:li>
472###########################################################
473sub parse_rdf_item_property {
474    my ($this, $pdir) = @_; my ($name, $value) = ();
475    # try to match structure and return on failure
476    return 0 unless $this->list_equal
477	([['OPEN', 'rdf:li'], ['ATTRIBUTE', 'rdf:.*', '.*'], ['OPEN', '.*']]);
478    $this->list_extract([['OPEN', 'rdf:li'],
479			 ['ATTRIBUTE', '(rdf:.*)', \$name, '(.*)', \$value]]);
480    # store the property content
481    $this->store_xmp_value([@$pdir, 'ITEM'], $name, $value, 'QUALIFIER');
482    # this is plainly wrong: how to extract the correct namespace? TODO
483    $this->parse_rdf_property('stJob', [@$pdir, 'ITEM']);
484    $this->list_extract(['CLOSE', 'rdf:li'])
485	|| $this->die('item_property: error at closing tag');
486    1 }
487
488###########################################################
489# Qualified items: these items can be found inside an     #
490# array property ('Bag', 'Seq' or 'Alt') and differ from  #
491# standard items because they do not only have a value,   #
492# but also one or more "qualifiers"; they remain unnamed, #
493# however. The namespace of the qualifiers can be diffe-  #
494# rent from the main namespace, but this is not yet taken #
495# into account (TODO).                                    #
496# ------------------------------------------------------- #
497# [qualif_item(N2)] := <rdf:li>                           #
498#                        <rdf:Description>                #
499#                          <rdf:value>text</rdf:value>    #
500#                          [qualifier(N2)]*               #
501#                        </rdf:Description>               #
502#                      </rdf:li>                          #
503# [qualifier(N2)] := <N2:role>text</N2:role>              #
504###########################################################
505sub parse_rdf_item_qualified {
506    my ($this, $pdir) = @_; my ($name, $value) = ('qualified-ITEM');
507    # try to match structure and return on failure
508    return 0 unless $this->list_extract
509	([['OPEN','rdf:li'], ['OPEN','rdf:Description'], ['OPEN','rdf:value'],
510	  ['CONTENT', '(.*)', \$value], ['CLOSE', 'rdf:value']]);
511    # store the qualified property value, then all qualifiers;
512    # we need a new subdirectory to store all this stuff
513    $this->store_xmp_value([@$pdir, $name], 'ITEM', $value);
514    1 while $this->parse_rdf_simple_property('.*', [@$pdir, $name]);
515    # find where tags are closing
516    $this->list_extract([['CLOSE', 'rdf:Description'], ['CLOSE', 'rdf:li']])
517	|| $this->die('item_qualified: error at closing tag');
518    1 }
519
520###########################################################
521# Language alternatives: these are items inside an 'Alt'  #
522# array properties. It should not be possible to mix      #
523# language alternatives and normal items, but this is not #
524# currently checked (TODO ?)                              #
525# ------------------------------------------------------- #
526# [lang_item] := <rdf:li xml:lang='...'>text</rdf:li>     #
527###########################################################
528sub parse_rdf_item_lang {
529    my ($this, $pdir) = @_; my ($language, $content) = ();
530    # try to match structure and return on failure
531    return 0 unless $this->list_extract
532	([['OPEN', 'rdf:li'], ['ATTRIBUTE', 'xml:lang', '(.*)', \$language],
533	  ['CONTENT', '(.*)', \$content], ['CLOSE', 'rdf:li']]);
534    # store the property content
535    $this->store_xmp_value($pdir, $language, $content, 'lang-alt');
536    1 }
537
538# successful load
5391;
540