1########################################################### 2# A Perl package for showing/modifying JPEG (meta)data. # 3# Copyright (C) 2004,2005,2006 Stefano Bettelli # 4# See the COPYING and LICENSE files for license terms. # 5########################################################### 6use Image::MetaData::JPEG::data::Tables qw(:TagsAPP1_XMP); 7no integer; 8use strict; 9use warnings; 10 11########################################################### 12# This method is the entry point for APP1 XMP segments. # 13# Such APP1 segments are used by Adobe for recording an # 14# XMP packet in JPEG files (this is a special XML block # 15# storing metadata information, similarly to Exif APP1 or # 16# IPTC APP13). The advantage of XMP is that it is exten- # 17# sible and that it can be embedded in many file types, # 18# like JPEG, PNG, GIF, HTML, PDF, PostScript, ecc... # 19# Only the envelope changes. The format is the following: # 20#---------------------------------------------------------# 21# 29 bytes namespace = http://ns.adobe.com/xap/1.0/\000 # 22# .... XMP packet (in some Unicode encoding) # 23#=========================================================# 24# First, check that the mandatory Adobe namespace string # 25# is there. Then, parse the XML and save the intermediate # 26# results. Last, Check that the XML block conforms to the # 27# RDF and XMP specifications (issue an error otherwise). # 28########################################################### 29# Ref: "XMP Specification", version 3.2, June 2005, Adobe # 30# Systems Inc., San Jose, CA, http://www.adobe.com # 31########################################################### 32sub parse_app1_xmp { 33 my ($this) = @_; 34 # slurp the segment as a single string 35 my $packet = $this->read_record($ASCII, 0, $this->size()); 36 # get rid of newline chars 37 $packet =~ y/\n\r//d; 38 # the ID must be Adobe's namespace; die if it is not correct 39 $packet =~ s/^($APP1_XMP_TAG|.{0,15})(.*)$/$2/; 40 $this->die("Incorrect XMP namespace ($1)") unless $1 eq $APP1_XMP_TAG; 41 $this->store_record('NAMESPACE', $ASCII, \ "$1"); 42 # (TODO): find the used Unicode encoding and deal with it 43 use Encode; Encode::_utf8_on($packet); 44 # analyse the XML packet (this cannot fail) 45 $this->parse_xml_string(\ $packet); # writes into $this->{private_list} 46 #print join '::', @$_, "\n" for @{$this->{private_list}}; 47 # check header (xpacket, x:x[am]pmeta and the outer rdf:RDF) 48 $this->test_xmp_header(); 49 # test that XMP syntax is correct; [Dlist(ABOUT)] := [Desc(ABOUT)]+ 50 $this->parse_rdf_description() 51 while $this->list_equal(['OPEN', 'rdf:Description']); 52 # cleanup 53 delete $this->{private_list}; 54} 55 56########################################################### 57# This private method runs a series of regular expression # 58# match tests against the private list (starting at posi- # 59# tion $offset). $regexps_array is either a reference to # 60# a list of references to regexp rules, or a reference to # 61# a single such list. A regexp rule consists of a list of # 62# regular express.s and variables to assign submatches to.# 63########################################################### 64sub list_equal { 65 my ($this, $regexps_array, $offset) = (@_, 0); 66 # convert a single rule into a list of rules 67 $regexps_array = [$regexps_array] unless ref $$regexps_array[0] eq 'ARRAY'; 68 # check each rule separately, return as soon as possible 69 for my $pos ($offset..$offset + $#$regexps_array) { 70 return 0 unless exists $this->{private_list}->[$pos]; 71 # do not modify the private list for the time being 72 my $elements = [ @{$this->{private_list}->[$pos]} ]; 73 my $regexps = $regexps_array->[$pos]; 74 while (@{$regexps}) { 75 return 0 unless @$elements; 76 my ($e, $r) = (shift(@$elements), shift(@$regexps)); 77 my @matches = $e =~ /^$r$/; return 0 unless @matches; 78 ${shift @$regexps} = shift @matches while ref $$regexps[0]; } } 79 return 1 + $#$regexps_array; } 80 81########################################################### 82# This private method is almost the same as list_equal, # 83# but, if the match is positive, it also removes matching # 84# lines from the private list. # 85########################################################### 86sub list_extract { 87 my ($this, $regexps_array, $offset, $number) = (@_, 0); 88 my $lines = $this->list_equal($regexps_array, $offset) || return 0; 89 splice @{$this->{private_list}}, $offset, $lines; return 1; } 90 91########################################################### 92# Private method for saving a piece of information into # 93# the private list (always undefined type). Arguments are:# 94# $pdir --> (list ref) identifies a subdirectory # 95# $name --> of the Record to be saved # 96# $value --> content to be saved in the Record # 97# $extra --> optonal info for {extra} field of a Record # 98########################################################### 99sub store_xmp_value { 100 my ($this, $pdir, $name, $value, $extra) = @_; 101 my $rec = $this->store_record 102 ($this->provide_subdirectory(@$pdir), $name, $UNDEF, \$value); 103 $rec->{extra} = $extra if $extra; } 104 105########################################################### 106# Private method for the extracting a list of attributes # 107# and saving them in the private list; the arguments are: # 108# $pdir --> (list ref) identifies a subdirectory # 109# $regexp --> to match the attribute name against # 110# $extra --> info for the {extra} field of a Record # 111########################################################### 112sub extract_attributes { 113 my ($this, $pdir, $regexp, $extra) = @_; my ($name, $value, %summary)= (); 114 $this->store_xmp_value($pdir, $name, $value, $extra), 115 $summary{$name} = $value while $this->list_extract 116 (['ATTRIBUTE', $regexp, \$name, '(.*)', \$value]); 117 return \ %summary; } 118 119########################################################### 120# This private method parses a generic XML string and # 121# writes its findings in an array of array references. # 122# Each sublist in the main list starts with a sublist # 123# type, which can be OPEN, OPEN_ABBR, OPEN_SPECIAL, # 124# ATTRIBUTE, COMMENT, CONTENT or CLOSE. The parsing algo- # 125# rithm is my current understanding of what XML is ..... # 126# ------------------------------------------------------- # 127# Spaces before a tag are not meaningful, but they cannot # 128# be thrown away before textual values. Keeping track of # 129# this condition is the reason for the $f flag. # 130########################################################### 131sub parse_xml_string { 132 my ($this, $string) = @_; 133 # initialisation of this private, intermediate list 134 $this->{private_list} = [] unless exists $this->{private_list}; 135 # some variables and their initialisation 136 my $mkp_tag = qr/[\w:-]+/o; my $spaces; my $f = 0; 137 # how to push a new list of strings onto the private list 138 my $lpush = sub { push @{$this->{private_list}}, [@_] }; 139 # how to extract the attribute list of a tag 140 my $apush = sub { my ($p) = @_; &$lpush('ATTRIBUTE', $1, $3) while $p 141 =~ s/^\s*($mkp_tag)=([\'\"])([^\'\"]*)\2//o; 142 &$lpush('IMPOSSIBLE', $p) if $p; }; 143 PARSE_LOOP: 144 # extract spaces at the beginning (they are important for content!) 145 $$string =~ s/^(\s*)//o; $spaces = $1 || ''; 146 # try to speed regular expressions up by lookint at the 147 # first two characters of the current string 148 if (substr($$string, 0, 1) eq '<') { 149 my $s = substr($$string, 1, 1); 150 # extract a closing markup 151 if ($s eq '/' && $$string =~ s/^<\/($mkp_tag)>//o) { 152 &$lpush('CONTENT', $spaces) if $f; $f=0; &$lpush('CLOSE', $1); } 153 # extract a comment, if present ( <!-- comment --> ) 154 elsif ($s eq '!' && $$string =~ s/^<!-- *(.*?) *-->//o) { 155 &$lpush('COMMENT', $1); $f=0; } 156 # extract header tags ( <?some:thing val='1'?> ) + attributes 157 elsif ($s eq '?' && $$string =~ s/^<\?($mkp_tag) ?([^\?]*?)\?>//o) { 158 &$lpush('OPEN_SPECIAL', $1); &$apush($2) if $2; $f=0; } 159 # extract an opening markup with or without attributes 160 # extract also self-contained tags ( <.... /> ), (not closing) 161 elsif ($$string =~ s/^<($mkp_tag) ?([^\?]*?)(\/?)>//o) { 162 &$lpush($3 ? 'OPEN_ABBR' : 'OPEN', $1); &$apush($2) if $2; 163 $3 ? &$lpush ('CLOSE_ABBR') : $f = 1; } 164 # an impossible case 165 else { &$lpush('IMPOSSIBLE', $$string) if $string; $$string = ""; } 166 # extract content (spaces are important ...) 167 } else { $$string =~ s/^([^<]+)//o; &$lpush('CONTENT', $spaces.$1); $f=0; } 168 # parse the rest of the string 169 $$string ? goto PARSE_LOOP : return; 170} 171 172########################################################### 173# Framework for the XMP packet. The packet content is # 174# sandwiched between a header and a trailer, and may # 175# contain padding whitespaces at the end. The 'xpacket' # 176# header has two mandatory attributes, 'begin' and 'id' # 177# (order is important), separated by exactly one space. # 178# Attribute values, here and in the following, are enclo- # 179# sed by single quotes or double quotes. The value of # 180# 'begin' must be the Unicode "zero-width non-breaking # 181# space" (U+FEFF); an empty value is also acceptable (for # 182# backward compatibility), and means UTF-8. The value of # 183# 'id' is fixed. Other attributes may be ignored. A pad- # 184# ding of 2KB or 4KB, with a newline every 100 spaces, is # 185# recommended. The 'end' attribute of the trailer may # 186# have a value of "r" (read-only) or "w" (modifiable). # 187# ------------------------------------------------------- # 188# The structure of the packet content is as follows. # 189# There is an optional x:xmpmeta (or x:xapmeta for older # 190# files) element, with a mandatory xmlns:x attribute set # 191# to "adobe:ns:meta/" and other optional attributes, # 192# which can be ignored. Inside it (or at top level, if it # 193# is absent), there is exactly one rdf:RDF element with # 194# an attribute specifying the xmlns:rdf namespace (other # 195# namespaces can be listed here as additional attributes).# 196# Inside the 'rdf:RDF' element then, all XMP properties # 197# are stored inside one or more rdf:Description element. # 198# ------------------------------------------------------- # 199# <?xpacket begin="..." id="...XMP id ..." ...?> # 200# <x:xmpmeta xmlns:x='adobe:ns:meta/' ..attributes..> # 201# <rdf:RDF xmlns:rdf="...URI..."> # 202# [rdf:Description]+ # 203# </rdf:RDF> # 204# </x:xmpmeta> # 205# ... padding with XML whitespaces ... # 206# <?xpacket end="w"?> # 207########################################################### 208sub test_xmp_header { 209 my ($this) = @_; 210 my ($rw, $filter, $f1, $f2, $meta, $ns, $URI) = (); 211 # search for <?xpacket begin="..." id="...XMP id ..."; 212 $this->list_extract(['OPEN_SPECIAL', 'xpacket']) 213 || $this->die('XMP not starting with "xpacket"'); 214 $this->list_extract(['ATTRIBUTE', 'begin', $APP1_XMP_XPACKET_BEGIN]) 215 || $this->die('XMP xpacket-begin not zero-width Unicode space'); 216 $this->list_extract(['ATTRIBUTE', 'id', $APP1_XMP_XPACKET_ID]) 217 || $this->die('XMP xpacket-id not correct'); 218 # extract all additional attributes in the opening tag 219 $this->extract_attributes(['XMP_HEADER'], '(.*)', 'xpacket'); 220 # search for <?xpacket end="w|r"?> at the end 221 $this->list_extract(['ATTRIBUTE', 'end', '(w|r)', \$rw], -1) 222 || $this->die('XMP xpacket end attribute not found'); 223 $this->list_extract(['OPEN_SPECIAL', 'xpacket'], -1) # OPEN, not CLOSE ... 224 || $this->die('XMP not ending with "xpacket"'); 225 $this->store_xmp_value(['XMP_HEADER'], 'xpacket-rw', $rw); 226 # extract additional filters (are these undocumented?) 227 while ($this->list_extract(['OPEN_SPECIAL', '(.*)', \$filter])) { 228 $this->list_extract(['ATTRIBUTE', '(.*)', \$f1, '(.*)', \$f2]); 229 $this->store_xmp_value(['XMP_HEADER'], $filter, "$f1=\"$f2\""); } 230 # take care of the xmpmeta/xapmeta tags, if present 231 $this->list_extract(['OPEN', '(x:x[am]pmeta)', \$meta]) || goto NO_XMPMETA; 232 $this->store_xmp_value(['XMP_HEADER'], 'meta', $meta); 233 $this->list_extract(['CLOSE', $meta], -1) 234 || $this->die('XMP x:x[am]pmeta not closing'); 235 $this->list_extract(['ATTRIBUTE', 'xmlns:x', $APP1_XMP_META_NS]) 236 || $this->die('XMP x:x[am]pmeta without namespace'); 237 $this->extract_attributes(['XMP_HEADER'], '(.*)', 'meta'); 238 NO_XMPMETA: 239 # take care of the outer rdf:RDF and its namespace 240 $this->list_extract(['OPEN', 'rdf:RDF']) 241 || $this->die('Outer rdf:RDF not found'); 242 $this->list_extract(['ATTRIBUTE', 'xmlns:rdf', $APP1_XMP_OUTER_RDF_NS]) 243 || $this->die('Namespace not correct/found in outer rdf:RDF'); 244 $this->list_extract(['CLOSE', 'rdf:RDF'], -1) 245 || $this->die('Outer rdf:RDF not closing'); 246 # save additional namespaces if present (undocumented?) 247 $this->extract_attributes(['SCHEMAS'], 'xmlns:(.*)', 'rdf:RDF'); 248 # extract all rdf:about and check that they are the same 249 # (sometimes 'rdf:' is missing, how should I treat this case?) 250 my @abouts = map { $$_[2] } grep { $$_[1] =~ /(rdf:|)about/ } 251 grep { $$_[0] eq 'ATTRIBUTE' } @{$this->{private_list}}; 252 $this->die("Inconsistent rdf:about's") if grep { $_ ne $abouts[0]} @abouts; 253 $this->store_xmp_value(['XMP_HEADER'], 'rdf:about', $abouts[0]); 254} 255 256########################################################### 257# Description elements: rdf:Description elements and XMP # 258# schemas are usually in one-to-one correspondence. Each # 259# element has two mandatory attributes, 'rdf:about' and # 260# 'xmlns:NAME'. 'rdf:about' is usually empty (however, it # 261# can contain an application specific URI), and its value # 262# *must* be shared among all rdf:Description elements. # 263# 'xmlns:NAME' specifies the local namespace prefix (NAME # 264# stands for the actual prefix). Additional namespaces # 265# can be specified via 'xmlns' attributes. # 266# ------------------------------------------------------- # 267# [rdf:Description] := <rdf:Description rdf:About='ABOUT' # 268# xmlns:NAME='text' ..ns..> # 269# [property(NAME)]+ # 270# </rdf:Description> # 271# ------------------------------------------------------- # 272# There exists also an abbreviated form where properties # 273# are listed as attributes of the rdf:Description tag (in # 274# this case there is no closing rdf:Description> tag, and # 275# the opening tags ends with the '/' character). # 276# ------------------------------------------------------- # 277# [rdf:Description] := <rdf:Description rdf:About='ABOUT' # 278# xmlns:NAME='text' [inlineP(NAME)]+/> # 279# [inlineP(NAME)] := "NAME:name='value'" # 280########################################################### 281sub parse_rdf_description { 282 my ($this) = @_; my ($type, $ns) = (); 283 # extract description opening ($type is OPEN or OPEN_ABBR) 284 $this->list_extract(['(OPEN.*)', \$type, 'rdf:Description']) || 285 $this->die('first-level rdf:Description opening tag not found'); 286 # mandatory rdf:about attribute (its value is already checked) 287 $this->list_extract(['ATTRIBUTE', '(rdf:|)about', '.*']) 288 || $this->die('rdf:about failure (missing or inconsistent)'); 289 # mandatory main namespace in xmlns:abbreviation 290 $this->list_equal(['ATTRIBUTE', 'xmlns:.*', '.*']) 291 || $this->die('rdf:Description namespace not found'); 292 # extract all additional namespaces (and find the secondary one) 293 # the exact meaning of this operation is to be clarified (TODO) 294 my $nss = $this->extract_attributes(['SCHEMAS'], 'xmlns:(.*)'); 295 do { $ns = $_ if $$nss{$_}!~ /\#$/ && ! defined $ns } for keys %$nss; 296 # if $type is OPEN_ABBR, all simple properties are attributes 297 $this->extract_attributes(['PROPERTIES'], '(.*)', 'abbr'), return 298 if $type eq 'OPEN_ABBR'; 299 # some rdf:Description's are there only as placeholders (only empty 300 # content) --> do not try to extract properties in this case. In 301 # the general case, parse all properties in this rdf:Description 302 unless ($this->list_extract(['CONTENT', '\s*'])) { 303 $this->parse_rdf_property($ns, ['PROPERTIES']) 304 while ! $this->list_equal(['CLOSE', 'rdf:Description']); } 305 # parse the close tag of rdf:Description 306 $this->list_extract(['CLOSE', 'rdf:Description']) 307 || $this->die('first-level rdf:Description closing tag not found'); 308 1 } 309 310########################################################### 311# This private method is a dispatcher for the abstract # 312# concept of XMP property. Actual properties are either # 313# simple or structured or they are array properties. # 314# ------------------------------------------------------- # 315# [property(NAME)] := [simpleP(NAME)] # 316# or [structuredP(NAME)] # 317# or [arrayP(NAME)] # 318########################################################### 319sub parse_rdf_property { 320 my ($this, $ns, $pdir) = @_; 321 $this->parse_comment ($ns, $pdir) || 322 $this->parse_rdf_simple_property($ns, $pdir) || 323 $this->parse_rdf_struct_property($ns, $pdir) || 324 $this->parse_rdf_array_property ($ns, $pdir) || 325 $this->die('parse_rdf_property: unhandled case'); 326 1 } 327 328########################################################### 329# Comments: this is undocumented in the XMP manual by # 330# Adobe, but there is evidence that some properties may # 331# be replaced by a comment, usually carrying its name. # 332# ------------------------------------------------------- # 333# [comment] := <!-- this is a comment --> # 334########################################################### 335sub parse_comment { 336 my ($this, $ns, $pdir) = @_; my $comment = ''; 337 return 0 unless $this->list_extract(['COMMENT', '(.*)', \$comment]); 338 $this->store_xmp_value($pdir, "$ns:COMMENT", $comment); 339 1 } 340 341########################################################### 342# Simple properties: a simple property is usually just # 343# some literal value between opening and closing tags # 344# carrying the property name; it can have qualifiers # 345# (attributes). Just to make things easier, it seems that # 346# there is the (undocumented) possibility of replacing # 347# the property value (text) with a sequence of general # 348# properties (i.e., a clone of a structured property ...) # 349# ------------------------------------------------------- # 350# [simpleP(NAME)] := <NAME:name [qualifier]*>text</NAME:name> 351# or <NAME:name [qualifier]*>[property(name)]+</NAME:name> 352# [qualifier] := "name:pnam='text'" # 353########################################################### 354sub parse_rdf_simple_property { 355 my ($this, $ns, $pdir) = @_; my ($name, $n, $content, $v) = (); 356 # try to match structure and return on failure; indeed, it 357 # is difficult to "match" a simple property, so, we try to 358 # exclude all other cases here ... 359 return 0 if $this->list_equal([['OPEN', '.*'], ['OPEN', 'rdf:.*']]); 360 # extract the opening tag with the property name 361 $this->list_extract(['OPEN', "($ns:.*)", \$name]) 362 || $this->die('simple property: error at opening tag'); 363 # property qualifiers not yet supported yet!! (TODO) 364 # case I: the value is simply text 365 if ($this->list_extract(['CONTENT', '(.*)', \$content])) { 366 $this->store_xmp_value($pdir, $name, $content); } 367 # case II: the "value" is a sequence of properties 368 # this is to be clarified .... (TODO) 369 else { push @$pdir, $name; 370 $this->extract_attributes($pdir, '(.*)', 'ATTRIBUTE'); 371 $this->store_xmp_value($pdir, 'CONTENT', $v) 372 while $this->list_extract(['CONTENT', '(.*)', \$v]); 373 $this->parse_rdf_simple_property($ns, $pdir) 374 while ! $this->list_equal(['CLOSE', "$name"]); 375 pop @$pdir; } 376 # closing tag 377 $this->list_extract(['CLOSE', "$name"]) 378 || $this->die('simple property: error at closing tag'); 379 1 } 380 381########################################################### 382# Structured properties: agglomerates of properties of # 383# different type. The inner properties are stored inside # 384# a secondary rdf:Description tag, which also contains a # 385# secondary namespace definition, to be used by inner # 386# properties. I hope this is all. # 387# ------------------------------------------------------- # 388# [structuredP(NAME)] := <NAME:name> # 389# <rdf:Description xmlns:N2="..."> 390# [property(N2)]+ # 391# </rdf:Description> # 392# </NAME:name> # 393########################################################### 394sub parse_rdf_struct_property { 395 my ($this, $ns, $pdir) = @_; my ($name, $ns_2, $ns_2_v) = (); 396 # try to match structure and return on failure 397 return 0 unless $this->list_extract 398 (['OPEN', "$ns:(.*)", \$name], ['OPEN', 'rdf:Description'], 399 ['ATTRIBUTE', 'xmlns:(.*)', \$ns_2, '(.*)', \$ns_2_v]); 400 # store the property content 401 $this->store_xmp_value(['SCHEMAS'], $ns_2, $ns_2_v); 402 # get all embedded properties 403 $this->parse_rdf_property($ns_2, [@$pdir, $name]) 404 while ! $this->list_equal(['CLOSE', $name]); 405 # find where tags are closing 406 $this->list_extract(['CLOSE', $name]) 407 || $this->die('structured property: error at closing tag'); 408 1 } 409 410########################################################### 411# Array properties: rdf:Seq is for an ordered list of # 412# properties, rdf:Bag for an unordered set of properties # 413# and rdf:Alt for a list of alternatives. Items are most # 414# often homogeneous, but this is not a rule. There is a # 415# namespace problem for qualified items (TODO) # 416# ------------------------------------------------------- # 417# [arrayP(NAME)] := <NAME:name> # 418# <rdf:[Bag|Seq|Alt]> # 419# [item]+ # 420# </rdf:[Bag|Seq|Alt]> # 421# </NAME:name> # 422# [item] := [simple_item] or [prop_item] or # 423# [qualif_item(N2)] or [lang_item] # 424# ------------------------------------------------------- # 425# Note: a [lang_item] can be found only in an rdf:Alt, # 426# and this rdf:Alt must in turn contain only [lang_item] # 427# items, but this check is not yet implemented (TODO). # 428########################################################### 429sub parse_rdf_array_property { 430 my ($this, $ns, $pdir) = @_; my ($name, $type) = (); 431 # try to match structure and return on failure 432 return 0 unless $this->list_extract 433 ([['OPEN',"($ns:.*)",\$name], ['OPEN','(rdf:(Bag|Seq|Alt))',\$type]]); 434 # get all items in this array property 435 while (! $this->list_equal(['CLOSE', $type])) { 436 $this->parse_rdf_item ([@$pdir, $name]) && next; 437 $this->parse_rdf_item_lang ([@$pdir, $name]) && next; 438 $this->parse_rdf_item_property ([@$pdir, $name]) && next; 439 $this->parse_rdf_item_qualified([@$pdir, $name]) && next; 440 $this->die('parse_rdf_array_property: unhandled case'); } 441 # store the property type in the subdirectory 442 $this->search_record(@$pdir, $name)->{extra} = $type; 443 # find where tags are closing 444 $this->list_extract([['CLOSE', $type], ['CLOSE', "$name"]]) 445 || $this->die('array property: error at closing tag'); 446 1 } 447 448########################################################### 449# Simple items: just text strings inside rdf:li tags. It # 450# is the simplest case for rdf:Bag, rdf:Set and rdf:Alt # 451# array properties. It does not need a subdirectory. # 452# ------------------------------------------------------- # 453# [simple_item] := <rdf:li>text<rdf:li> # 454########################################################### 455sub parse_rdf_item { 456 my ($this, $pdir) = @_; my ($content) = (); 457 # try to match structure and return on failure 458 return 0 unless $this->list_extract 459 ([['OPEN','rdf:li'],['CONTENT','(.*)',\$content],['CLOSE','rdf:li']]); 460 # store the property content 461 $this->store_xmp_value($pdir, 'ITEM', $content); 462 1 } 463 464########################################################### 465# Property items: these items contain another property # 466# which is not simple text, e.g., a structured property # 467# or an array property. Additional qualifiers can be spe- # 468# cified as attributes of the rdf:li tag. Such properties # 469# in general require their own subdirectories. # 470# ------------------------------------------------------- # 471# [prop_item] := <rdf:li [qualifier]>[simplP(NAME)]<rdf:li> 472########################################################### 473sub parse_rdf_item_property { 474 my ($this, $pdir) = @_; my ($name, $value) = (); 475 # try to match structure and return on failure 476 return 0 unless $this->list_equal 477 ([['OPEN', 'rdf:li'], ['ATTRIBUTE', 'rdf:.*', '.*'], ['OPEN', '.*']]); 478 $this->list_extract([['OPEN', 'rdf:li'], 479 ['ATTRIBUTE', '(rdf:.*)', \$name, '(.*)', \$value]]); 480 # store the property content 481 $this->store_xmp_value([@$pdir, 'ITEM'], $name, $value, 'QUALIFIER'); 482 # this is plainly wrong: how to extract the correct namespace? TODO 483 $this->parse_rdf_property('stJob', [@$pdir, 'ITEM']); 484 $this->list_extract(['CLOSE', 'rdf:li']) 485 || $this->die('item_property: error at closing tag'); 486 1 } 487 488########################################################### 489# Qualified items: these items can be found inside an # 490# array property ('Bag', 'Seq' or 'Alt') and differ from # 491# standard items because they do not only have a value, # 492# but also one or more "qualifiers"; they remain unnamed, # 493# however. The namespace of the qualifiers can be diffe- # 494# rent from the main namespace, but this is not yet taken # 495# into account (TODO). # 496# ------------------------------------------------------- # 497# [qualif_item(N2)] := <rdf:li> # 498# <rdf:Description> # 499# <rdf:value>text</rdf:value> # 500# [qualifier(N2)]* # 501# </rdf:Description> # 502# </rdf:li> # 503# [qualifier(N2)] := <N2:role>text</N2:role> # 504########################################################### 505sub parse_rdf_item_qualified { 506 my ($this, $pdir) = @_; my ($name, $value) = ('qualified-ITEM'); 507 # try to match structure and return on failure 508 return 0 unless $this->list_extract 509 ([['OPEN','rdf:li'], ['OPEN','rdf:Description'], ['OPEN','rdf:value'], 510 ['CONTENT', '(.*)', \$value], ['CLOSE', 'rdf:value']]); 511 # store the qualified property value, then all qualifiers; 512 # we need a new subdirectory to store all this stuff 513 $this->store_xmp_value([@$pdir, $name], 'ITEM', $value); 514 1 while $this->parse_rdf_simple_property('.*', [@$pdir, $name]); 515 # find where tags are closing 516 $this->list_extract([['CLOSE', 'rdf:Description'], ['CLOSE', 'rdf:li']]) 517 || $this->die('item_qualified: error at closing tag'); 518 1 } 519 520########################################################### 521# Language alternatives: these are items inside an 'Alt' # 522# array properties. It should not be possible to mix # 523# language alternatives and normal items, but this is not # 524# currently checked (TODO ?) # 525# ------------------------------------------------------- # 526# [lang_item] := <rdf:li xml:lang='...'>text</rdf:li> # 527########################################################### 528sub parse_rdf_item_lang { 529 my ($this, $pdir) = @_; my ($language, $content) = (); 530 # try to match structure and return on failure 531 return 0 unless $this->list_extract 532 ([['OPEN', 'rdf:li'], ['ATTRIBUTE', 'xml:lang', '(.*)', \$language], 533 ['CONTENT', '(.*)', \$content], ['CLOSE', 'rdf:li']]); 534 # store the property content 535 $this->store_xmp_value($pdir, $language, $content, 'lang-alt'); 536 1 } 537 538# successful load 5391; 540