1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>' 7 8import re, sys, copy, json 9from itertools import repeat 10from collections import defaultdict 11 12from lxml import etree 13from lxml.builder import ElementMaker 14 15from calibre import prints 16from calibre.ebooks.metadata import string_to_authors, check_isbn, check_doi 17from calibre.utils.xml_parse import safe_xml_fromstring 18from calibre.ebooks.metadata.book.base import Metadata 19from calibre.ebooks.metadata.opf2 import dump_dict 20from calibre.utils.date import parse_date, isoformat, now 21from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 22from polyglot.builtins import iteritems, string_or_bytes 23 24_xml_declaration = re.compile(r'<\?xml[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE) 25 26NS_MAP = { 27 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 28 'dc': 'http://purl.org/dc/elements/1.1/', 29 'pdf': 'http://ns.adobe.com/pdf/1.3/', 30 'pdfx': 'http://ns.adobe.com/pdfx/1.3/', 31 'xmp': 'http://ns.adobe.com/xap/1.0/', 32 'xmpidq': 'http://ns.adobe.com/xmp/Identifier/qual/1.0/', 33 'xmpMM': 'http://ns.adobe.com/xap/1.0/mm/', 34 'xmpRights': 'http://ns.adobe.com/xap/1.0/rights/', 35 'xmpBJ': 'http://ns.adobe.com/xap/1.0/bj/', 36 'xmpTPg': 'http://ns.adobe.com/xap/1.0/t/pg/', 37 'xmpDM': 'http://ns.adobe.com/xmp/1.0/DynamicMedia/', 38 'prism': 'http://prismstandard.org/namespaces/basic/2.0/', 39 'crossmark': 'http://crossref.org/crossmark/1.0/', 40 'xml': 'http://www.w3.org/XML/1998/namespace', 41 'x': 'adobe:ns:meta/', 42 'calibre': 'http://calibre-ebook.com/xmp-namespace', 43 'calibreSI': 'http://calibre-ebook.com/xmp-namespace-series-index', 44 'calibreCC': 'http://calibre-ebook.com/xmp-namespace-custom-columns', 45} 46KNOWN_ID_SCHEMES = {'isbn', 'url', 'doi'} 47 48 49def expand(name): 50 prefix, name = name.partition(':')[::2] 51 return '{%s}%s' % (NS_MAP[prefix], name) 52 53 54xpath_cache = {} 55 56 57def XPath(expr): 58 ans = xpath_cache.get(expr, None) 59 if ans is None: 60 xpath_cache[expr] = ans = etree.XPath(expr, namespaces=NS_MAP) 61 return ans 62 63 64def parse_xmp_packet(raw_bytes): 65 raw_bytes = raw_bytes.strip() 66 enc = None 67 pat = r'''<?xpacket\s+[^>]*?begin\s*=\s*['"]([^'"]*)['"]''' 68 encodings = ('8', '16-le', '16-be', '32-le', '32-be') 69 header = raw_bytes[:1024] 70 emap = {'\ufeff'.encode('utf-'+x):'utf-'+x for x in encodings} 71 emap[b''] = 'utf-8' 72 for q in encodings: 73 m = re.search(pat.encode('utf-'+q), header) 74 if m is not None: 75 enc = emap.get(m.group(1), enc) 76 break 77 if enc is None: 78 return safe_xml_fromstring(raw_bytes) 79 raw = _xml_declaration.sub('', raw_bytes.decode(enc)) # lxml barfs if encoding declaration present in unicode string 80 return safe_xml_fromstring(raw) 81 82 83def serialize_xmp_packet(root, encoding='utf-8'): 84 root.tail = '\n' + '\n'.join(repeat(' '*100, 30)) # Adobe spec recommends inserting padding at the end of the packet 85 raw_bytes = etree.tostring(root, encoding=encoding, pretty_print=True, with_tail=True, method='xml') 86 return b'<?xpacket begin="%s" id="W5M0MpCehiHzreSzNTczkc9d"?>\n%s\n<?xpacket end="w"?>' % ('\ufeff'.encode(encoding), raw_bytes) 87 88 89def read_simple_property(elem): 90 # A simple property 91 if elem is not None: 92 if elem.text: 93 return elem.text 94 return elem.get(expand('rdf:resource'), '') 95 96 97def read_lang_alt(parent): 98 # A text value with possible alternate values in different languages 99 items = XPath('descendant::rdf:li[@xml:lang="x-default"]')(parent) 100 if items: 101 return items[0] 102 items = XPath('descendant::rdf:li')(parent) 103 if items: 104 return items[0] 105 106 107def read_sequence(parent): 108 # A sequence or set of values (assumes simple properties in the sequence) 109 for item in XPath('descendant::rdf:li')(parent): 110 yield read_simple_property(item) 111 112 113def uniq(vals, kmap=lambda x:x): 114 ''' Remove all duplicates from vals, while preserving order. kmap must be a 115 callable that returns a hashable value for every item in vals ''' 116 vals = vals or () 117 lvals = (kmap(x) for x in vals) 118 seen = set() 119 seen_add = seen.add 120 return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k)) 121 122 123def multiple_sequences(expr, root): 124 # Get all values for sequence elements matching expr, ensuring the returned 125 # list contains distinct non-null elements preserving their order. 126 ans = [] 127 for item in XPath(expr)(root): 128 ans += list(read_sequence(item)) 129 return list(filter(None, uniq(ans))) 130 131 132def first_alt(expr, root): 133 # The first element matching expr, assumes that the element contains a 134 # language alternate array 135 for item in XPath(expr)(root): 136 q = read_simple_property(read_lang_alt(item)) 137 if q: 138 return q 139 140 141def first_simple(expr, root): 142 # The value for the first occurrence of an element matching expr (assumes 143 # simple property) 144 for item in XPath(expr)(root): 145 q = read_simple_property(item) 146 if q: 147 return q 148 149 150def first_sequence(expr, root): 151 # The first item in a sequence 152 for item in XPath(expr)(root): 153 for ans in read_sequence(item): 154 return ans 155 156 157def read_series(root): 158 for item in XPath('//calibre:series')(root): 159 val = XPath('descendant::rdf:value')(item) 160 if val: 161 series = val[0].text 162 if series and series.strip(): 163 series_index = 1.0 164 for si in XPath('descendant::calibreSI:series_index')(item): 165 try: 166 series_index = float(si.text) 167 except (TypeError, ValueError): 168 continue 169 else: 170 break 171 return series, series_index 172 return None, None 173 174 175def read_user_metadata(mi, root): 176 from calibre.utils.config import from_json 177 from calibre.ebooks.metadata.book.json_codec import decode_is_multiple 178 fields = set() 179 for item in XPath('//calibre:custom_metadata')(root): 180 for li in XPath('./rdf:Bag/rdf:li')(item): 181 name = XPath('descendant::calibreCC:name')(li) 182 if name: 183 name = name[0].text 184 if name.startswith('#') and name not in fields: 185 val = XPath('descendant::rdf:value')(li) 186 if val: 187 fm = val[0].text 188 try: 189 fm = json.loads(fm, object_hook=from_json) 190 decode_is_multiple(fm) 191 mi.set_user_metadata(name, fm) 192 fields.add(name) 193 except: 194 prints('Failed to read user metadata:', name) 195 import traceback 196 traceback.print_exc() 197 198 199def read_xmp_identifers(parent): 200 ''' For example: 201 <rdf:li rdf:parseType="Resource"><xmpidq:Scheme>URL</xmp:idq><rdf:value>http://foo.com</rdf:value></rdf:li> 202 or the longer form: 203 <rdf:li><rdf:Description><xmpidq:Scheme>URL</xmp:idq><rdf:value>http://foo.com</rdf:value></rdf:Description></rdf:li> 204 ''' 205 for li in XPath('./rdf:Bag/rdf:li')(parent): 206 is_resource = li.attrib.get(expand('rdf:parseType'), None) == 'Resource' 207 is_resource = is_resource or (len(li) == 1 and li[0].tag == expand('rdf:Description')) 208 if not is_resource: 209 yield None, li.text or '' 210 value = XPath('descendant::rdf:value')(li) 211 if not value: 212 continue 213 value = value[0].text or '' 214 scheme = XPath('descendant::xmpidq:Scheme')(li) 215 if not scheme: 216 yield None, value 217 else: 218 yield scheme[0].text or '', value 219 220 221def safe_parse_date(raw): 222 if raw: 223 try: 224 return parse_date(raw) 225 except Exception: 226 pass 227 228 229def more_recent(one, two): 230 if one is None: 231 return two 232 if two is None: 233 return one 234 try: 235 return max(one, two) 236 except Exception: 237 return one 238 239 240def metadata_from_xmp_packet(raw_bytes): 241 root = parse_xmp_packet(raw_bytes) 242 mi = Metadata(_('Unknown')) 243 title = first_alt('//dc:title', root) 244 if title: 245 if title.startswith(r'\376\377'): 246 # corrupted XMP packet generated by Nitro PDF. See 247 # https://bugs.launchpad.net/calibre/+bug/1541981 248 raise ValueError('Corrupted XMP metadata packet detected, probably generated by Nitro PDF') 249 mi.title = title 250 authors = multiple_sequences('//dc:creator', root) 251 if authors: 252 mi.authors = [au for aus in authors for au in string_to_authors(aus)] 253 tags = multiple_sequences('//dc:subject', root) or multiple_sequences('//pdf:Keywords', root) 254 if tags: 255 mi.tags = tags 256 comments = first_alt('//dc:description', root) 257 if comments: 258 mi.comments = comments 259 publishers = multiple_sequences('//dc:publisher', root) 260 if publishers: 261 mi.publisher = publishers[0] 262 try: 263 pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False) 264 except: 265 pass 266 else: 267 mi.pubdate = pubdate 268 bkp = first_simple('//xmp:CreatorTool', root) 269 if bkp: 270 mi.book_producer = bkp 271 md = safe_parse_date(first_simple('//xmp:MetadataDate', root)) 272 mod = safe_parse_date(first_simple('//xmp:ModifyDate', root)) 273 fd = more_recent(md, mod) 274 if fd is not None: 275 mi.metadata_date = fd 276 rating = first_simple('//calibre:rating', root) 277 if rating is not None: 278 try: 279 rating = float(rating) 280 if 0 <= rating <= 10: 281 mi.rating = rating 282 except (ValueError, TypeError): 283 pass 284 series, series_index = read_series(root) 285 if series: 286 mi.series, mi.series_index = series, series_index 287 for x in ('title_sort', 'author_sort'): 288 for elem in XPath('//calibre:' + x)(root): 289 val = read_simple_property(elem) 290 if val: 291 setattr(mi, x, val) 292 break 293 for x in ('author_link_map', 'user_categories'): 294 val = first_simple('//calibre:'+x, root) 295 if val: 296 try: 297 setattr(mi, x, json.loads(val)) 298 except: 299 pass 300 301 languages = multiple_sequences('//dc:language', root) 302 if languages: 303 languages = list(filter(None, map(canonicalize_lang, languages))) 304 if languages: 305 mi.languages = languages 306 307 identifiers = {} 308 for xmpid in XPath('//xmp:Identifier')(root): 309 for scheme, value in read_xmp_identifers(xmpid): 310 if scheme and value: 311 identifiers[scheme.lower()] = value 312 313 for namespace in ('prism', 'pdfx'): 314 for scheme in KNOWN_ID_SCHEMES: 315 if scheme not in identifiers: 316 val = first_simple('//%s:%s' % (namespace, scheme), root) 317 scheme = scheme.lower() 318 if scheme == 'isbn': 319 val = check_isbn(val) 320 elif scheme == 'doi': 321 val = check_doi(val) 322 if val: 323 identifiers[scheme] = val 324 325 # Check Dublin Core for recognizable identifier types 326 for scheme, check_func in iteritems({'doi':check_doi, 'isbn':check_isbn}): 327 if scheme not in identifiers: 328 val = check_func(first_simple('//dc:identifier', root)) 329 if val: 330 identifiers['doi'] = val 331 332 if identifiers: 333 mi.set_identifiers(identifiers) 334 335 read_user_metadata(mi, root) 336 337 return mi 338 339 340def consolidate_metadata(info_mi, info): 341 ''' When both the PDF Info dict and XMP metadata are present, prefer the xmp 342 metadata unless the Info ModDate is never than the XMP MetadataDate. This 343 is the algorithm recommended by the PDF spec. ''' 344 try: 345 raw = info['xmp_metadata'].rstrip() 346 if not raw: 347 return info_mi 348 xmp_mi = metadata_from_xmp_packet(raw) 349 except Exception: 350 import traceback 351 traceback.print_exc() 352 return info_mi 353 info_title, info_authors, info_tags = info_mi.title or _('Unknown'), list(info_mi.authors or ()), list(info_mi.tags or ()) 354 info_mi.smart_update(xmp_mi, replace_metadata=True) 355 prefer_info = False 356 if 'ModDate' in info and hasattr(xmp_mi, 'metadata_date'): 357 try: 358 info_date = parse_date(info['ModDate']) 359 except Exception: 360 pass 361 else: 362 prefer_info = info_date > xmp_mi.metadata_date 363 if prefer_info: 364 info_mi.title, info_mi.authors, info_mi.tags = info_title, info_authors, info_tags 365 else: 366 # We'll use the xmp tags/authors but fallback to the info ones if the 367 # xmp does not have tags/authors. smart_update() should have taken care of 368 # the rest 369 info_mi.authors, info_mi.tags = (info_authors if xmp_mi.is_null('authors') else xmp_mi.authors), xmp_mi.tags or info_tags 370 return info_mi 371 372 373def nsmap(*args): 374 return {x:NS_MAP[x] for x in args} 375 376 377def create_simple_property(parent, tag, value): 378 e = parent.makeelement(expand(tag)) 379 parent.append(e) 380 e.text = value 381 382 383def create_alt_property(parent, tag, value): 384 e = parent.makeelement(expand(tag)) 385 parent.append(e) 386 alt = e.makeelement(expand('rdf:Alt')) 387 e.append(alt) 388 li = alt.makeelement(expand('rdf:li')) 389 alt.append(li) 390 li.set(expand('xml:lang'), 'x-default') 391 li.text = value 392 393 394def create_sequence_property(parent, tag, val, ordered=True): 395 e = parent.makeelement(expand(tag)) 396 parent.append(e) 397 seq = e.makeelement(expand('rdf:' + ('Seq' if ordered else 'Bag'))) 398 e.append(seq) 399 for x in val: 400 li = seq.makeelement(expand('rdf:li')) 401 li.text = x 402 seq.append(li) 403 404 405def create_identifiers(xmp, identifiers): 406 xmpid = xmp.makeelement(expand('xmp:Identifier')) 407 xmp.append(xmpid) 408 bag = xmpid.makeelement(expand('rdf:Bag')) 409 xmpid.append(bag) 410 for scheme, value in iteritems(identifiers): 411 li = bag.makeelement(expand('rdf:li')) 412 li.set(expand('rdf:parseType'), 'Resource') 413 bag.append(li) 414 s = li.makeelement(expand('xmpidq:Scheme')) 415 s.text = scheme 416 li.append(s) 417 val = li.makeelement(expand('rdf:value')) 418 li.append(val) 419 val.text = value 420 421 422def create_series(calibre, series, series_index): 423 s = calibre.makeelement(expand('calibre:series')) 424 s.set(expand('rdf:parseType'), 'Resource') 425 calibre.append(s) 426 val = s.makeelement(expand('rdf:value')) 427 s.append(val) 428 val.text = series 429 try: 430 series_index = float(series_index) 431 except (TypeError, ValueError): 432 series_index = 1.0 433 si = s.makeelement(expand('calibreSI:series_index')) 434 si.text = '%.2f' % series_index 435 s.append(si) 436 437 438def create_user_metadata(calibre, all_user_metadata): 439 from calibre.utils.config import to_json 440 from calibre.ebooks.metadata.book.json_codec import object_to_unicode, encode_is_multiple 441 442 s = calibre.makeelement(expand('calibre:custom_metadata')) 443 calibre.append(s) 444 bag = s.makeelement(expand('rdf:Bag')) 445 s.append(bag) 446 for name, fm in iteritems(all_user_metadata): 447 try: 448 fm = copy.copy(fm) 449 encode_is_multiple(fm) 450 fm = object_to_unicode(fm) 451 fm = json.dumps(fm, default=to_json, ensure_ascii=False) 452 except: 453 prints('Failed to write user metadata:', name) 454 import traceback 455 traceback.print_exc() 456 continue 457 li = bag.makeelement(expand('rdf:li')) 458 li.set(expand('rdf:parseType'), 'Resource') 459 bag.append(li) 460 n = li.makeelement(expand('calibreCC:name')) 461 li.append(n) 462 n.text = name 463 val = li.makeelement(expand('rdf:value')) 464 val.text = fm 465 li.append(val) 466 467 468def metadata_to_xmp_packet(mi): 469 A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x')) 470 R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf')) 471 root = A.xmpmeta(R.RDF) 472 rdf = root[0] 473 dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc')) 474 dc.set(expand('rdf:about'), '') 475 rdf.append(dc) 476 for prop, tag in iteritems({'title':'dc:title', 'comments':'dc:description'}): 477 val = mi.get(prop) or '' 478 create_alt_property(dc, tag, val) 479 for prop, (tag, ordered) in iteritems({ 480 'authors':('dc:creator', True), 'tags':('dc:subject', False), 'publisher':('dc:publisher', False), 481 }): 482 val = mi.get(prop) or () 483 if isinstance(val, string_or_bytes): 484 val = [val] 485 create_sequence_property(dc, tag, val, ordered) 486 if not mi.is_null('pubdate'): 487 create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False)]) # Adobe spec recommends local time 488 if not mi.is_null('languages'): 489 langs = list(filter(None, map(lambda x:lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages))) 490 if langs: 491 create_sequence_property(dc, 'dc:language', langs, ordered=False) 492 493 xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq')) 494 xmp.set(expand('rdf:about'), '') 495 rdf.append(xmp) 496 extra_ids = {} 497 for x in ('prism', 'pdfx'): 498 p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x)) 499 p.set(expand('rdf:about'), '') 500 rdf.append(p) 501 502 identifiers = mi.get_identifiers() 503 if identifiers: 504 create_identifiers(xmp, identifiers) 505 for scheme, val in iteritems(identifiers): 506 if scheme in {'isbn', 'doi'}: 507 for prefix, parent in iteritems(extra_ids): 508 ie = parent.makeelement(expand('%s:%s'%(prefix, scheme))) 509 ie.text = val 510 parent.append(ie) 511 512 d = xmp.makeelement(expand('xmp:MetadataDate')) 513 d.text = isoformat(now(), as_utc=False) 514 xmp.append(d) 515 516 calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC')) 517 calibre.set(expand('rdf:about'), '') 518 rdf.append(calibre) 519 if not mi.is_null('rating'): 520 try: 521 r = float(mi.rating) 522 except (TypeError, ValueError): 523 pass 524 else: 525 create_simple_property(calibre, 'calibre:rating', '%g' % r) 526 if not mi.is_null('series'): 527 create_series(calibre, mi.series, mi.series_index) 528 if not mi.is_null('timestamp'): 529 create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False)) 530 for x in ('author_link_map', 'user_categories'): 531 val = getattr(mi, x, None) 532 if val: 533 create_simple_property(calibre, 'calibre:'+x, dump_dict(val)) 534 535 for x in ('title_sort', 'author_sort'): 536 if not mi.is_null(x): 537 create_simple_property(calibre, 'calibre:'+x, getattr(mi, x)) 538 539 all_user_metadata = mi.get_all_user_metadata(True) 540 if all_user_metadata: 541 create_user_metadata(calibre, all_user_metadata) 542 return serialize_xmp_packet(root) 543 544 545def find_used_namespaces(elem): 546 getns = lambda x: (x.partition('}')[0][1:] if '}' in x else None) 547 ans = {getns(x) for x in list(elem.attrib) + [elem.tag]} 548 for child in elem.iterchildren(etree.Element): 549 ans |= find_used_namespaces(child) 550 return ans 551 552 553def find_preferred_prefix(namespace, elems): 554 for elem in elems: 555 ans = {v:k for k, v in iteritems(elem.nsmap)}.get(namespace, None) 556 if ans is not None: 557 return ans 558 return find_preferred_prefix(namespace, elem.iterchildren(etree.Element)) 559 560 561def find_nsmap(elems): 562 used_namespaces = set() 563 for elem in elems: 564 used_namespaces |= find_used_namespaces(elem) 565 ans = {} 566 used_namespaces -= {NS_MAP['xml'], NS_MAP['x'], None, NS_MAP['rdf']} 567 rmap = {v:k for k, v in iteritems(NS_MAP)} 568 i = 0 569 for ns in used_namespaces: 570 if ns in rmap: 571 ans[rmap[ns]] = ns 572 else: 573 pp = find_preferred_prefix(ns, elems) 574 if pp and pp not in ans: 575 ans[pp] = ns 576 else: 577 i += 1 578 ans['ns%d' % i] = ns 579 return ans 580 581 582def clone_into(parent, elem): 583 ' Clone the element, assuming that all namespace declarations are present in parent ' 584 clone = parent.makeelement(elem.tag) 585 parent.append(clone) 586 if elem.text and not elem.text.isspace(): 587 clone.text = elem.text 588 if elem.tail and not elem.tail.isspace(): 589 clone.tail = elem.tail 590 clone.attrib.update(elem.attrib) 591 for child in elem.iterchildren(etree.Element): 592 clone_into(clone, child) 593 594 595def merge_xmp_packet(old, new): 596 ''' Merge metadata present in the old packet that is not present in the new 597 one into the new one. Assumes the new packet was generated by 598 metadata_to_xmp_packet() ''' 599 old, new = parse_xmp_packet(old), parse_xmp_packet(new) 600 # As per the adobe spec all metadata items have to be present inside top-level rdf:Description containers 601 item_xpath = XPath('//rdf:RDF/rdf:Description/*') 602 603 # First remove all data fields that metadata_to_xmp_packet() knowns about, 604 # since either they will have been set or if not present, imply they have 605 # been cleared 606 defined_tags = {expand(prefix + ':' + scheme) for prefix in ('prism', 'pdfx') for scheme in KNOWN_ID_SCHEMES} 607 defined_tags |= {expand('dc:' + x) for x in ('identifier', 'title', 'creator', 'date', 'description', 'language', 'publisher', 'subject')} 608 defined_tags |= {expand('xmp:' + x) for x in ('MetadataDate', 'Identifier')} 609 # For redundancy also remove all fields explicitly set in the new packet 610 defined_tags |= {x.tag for x in item_xpath(new)} 611 calibrens = '{%s}' % NS_MAP['calibre'] 612 for elem in item_xpath(old): 613 if elem.tag in defined_tags or (elem.tag and elem.tag.startswith(calibrens)): 614 elem.getparent().remove(elem) 615 616 # Group all items into groups based on their namespaces 617 groups = defaultdict(list) 618 for item in item_xpath(new): 619 ns = item.nsmap[item.prefix] 620 groups[ns].append(item) 621 622 for item in item_xpath(old): 623 ns = item.nsmap[item.prefix] 624 groups[ns].append(item) 625 626 A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x')) 627 R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf')) 628 root = A.xmpmeta(R.RDF) 629 rdf = root[0] 630 631 for namespace in sorted(groups, key=lambda x:{NS_MAP['dc']:'a', NS_MAP['xmp']:'b', NS_MAP['calibre']:'c'}.get(x, 'z'+x)): 632 items = groups[namespace] 633 desc = rdf.makeelement(expand('rdf:Description'), nsmap=find_nsmap(items)) 634 desc.set(expand('rdf:about'), '') 635 rdf.append(desc) 636 for item in items: 637 clone_into(desc, item) 638 639 return serialize_xmp_packet(root) 640 641 642if __name__ == '__main__': 643 from calibre.utils.podofo import get_xmp_metadata 644 xmp_packet = get_xmp_metadata(sys.argv[-1]) 645 mi = metadata_from_xmp_packet(xmp_packet) 646 np = metadata_to_xmp_packet(mi) 647 print(merge_xmp_packet(xmp_packet, np)) 648