1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3
4
5__license__ = 'GPL v3'
6__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
7
8import re, sys, copy, json
9from itertools import repeat
10from collections import defaultdict
11
12from lxml import etree
13from lxml.builder import ElementMaker
14
15from calibre import prints
16from calibre.ebooks.metadata import string_to_authors, check_isbn, check_doi
17from calibre.utils.xml_parse import safe_xml_fromstring
18from calibre.ebooks.metadata.book.base import Metadata
19from calibre.ebooks.metadata.opf2 import dump_dict
20from calibre.utils.date import parse_date, isoformat, now
21from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
22from polyglot.builtins import iteritems, string_or_bytes
23
24_xml_declaration = re.compile(r'<\?xml[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>', re.IGNORECASE)
25
26NS_MAP = {
27    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
28    'dc': 'http://purl.org/dc/elements/1.1/',
29    'pdf': 'http://ns.adobe.com/pdf/1.3/',
30    'pdfx': 'http://ns.adobe.com/pdfx/1.3/',
31    'xmp': 'http://ns.adobe.com/xap/1.0/',
32    'xmpidq': 'http://ns.adobe.com/xmp/Identifier/qual/1.0/',
33    'xmpMM': 'http://ns.adobe.com/xap/1.0/mm/',
34    'xmpRights': 'http://ns.adobe.com/xap/1.0/rights/',
35    'xmpBJ': 'http://ns.adobe.com/xap/1.0/bj/',
36    'xmpTPg': 'http://ns.adobe.com/xap/1.0/t/pg/',
37    'xmpDM': 'http://ns.adobe.com/xmp/1.0/DynamicMedia/',
38    'prism': 'http://prismstandard.org/namespaces/basic/2.0/',
39    'crossmark': 'http://crossref.org/crossmark/1.0/',
40    'xml': 'http://www.w3.org/XML/1998/namespace',
41    'x': 'adobe:ns:meta/',
42    'calibre': 'http://calibre-ebook.com/xmp-namespace',
43    'calibreSI': 'http://calibre-ebook.com/xmp-namespace-series-index',
44    'calibreCC': 'http://calibre-ebook.com/xmp-namespace-custom-columns',
45}
46KNOWN_ID_SCHEMES = {'isbn', 'url', 'doi'}
47
48
49def expand(name):
50    prefix, name = name.partition(':')[::2]
51    return '{%s}%s' % (NS_MAP[prefix], name)
52
53
54xpath_cache = {}
55
56
57def XPath(expr):
58    ans = xpath_cache.get(expr, None)
59    if ans is None:
60        xpath_cache[expr] = ans = etree.XPath(expr, namespaces=NS_MAP)
61    return ans
62
63
64def parse_xmp_packet(raw_bytes):
65    raw_bytes = raw_bytes.strip()
66    enc = None
67    pat = r'''<?xpacket\s+[^>]*?begin\s*=\s*['"]([^'"]*)['"]'''
68    encodings = ('8', '16-le', '16-be', '32-le', '32-be')
69    header = raw_bytes[:1024]
70    emap = {'\ufeff'.encode('utf-'+x):'utf-'+x for x in encodings}
71    emap[b''] = 'utf-8'
72    for q in encodings:
73        m = re.search(pat.encode('utf-'+q), header)
74        if m is not None:
75            enc = emap.get(m.group(1), enc)
76            break
77    if enc is None:
78        return safe_xml_fromstring(raw_bytes)
79    raw = _xml_declaration.sub('', raw_bytes.decode(enc))  # lxml barfs if encoding declaration present in unicode string
80    return safe_xml_fromstring(raw)
81
82
83def serialize_xmp_packet(root, encoding='utf-8'):
84    root.tail = '\n' + '\n'.join(repeat(' '*100, 30))  # Adobe spec recommends inserting padding at the end of the packet
85    raw_bytes = etree.tostring(root, encoding=encoding, pretty_print=True, with_tail=True, method='xml')
86    return b'<?xpacket begin="%s" id="W5M0MpCehiHzreSzNTczkc9d"?>\n%s\n<?xpacket end="w"?>' % ('\ufeff'.encode(encoding), raw_bytes)
87
88
89def read_simple_property(elem):
90    # A simple property
91    if elem is not None:
92        if elem.text:
93            return elem.text
94        return elem.get(expand('rdf:resource'), '')
95
96
97def read_lang_alt(parent):
98    # A text value with possible alternate values in different languages
99    items = XPath('descendant::rdf:li[@xml:lang="x-default"]')(parent)
100    if items:
101        return items[0]
102    items = XPath('descendant::rdf:li')(parent)
103    if items:
104        return items[0]
105
106
107def read_sequence(parent):
108    # A sequence or set of values (assumes simple properties in the sequence)
109    for item in XPath('descendant::rdf:li')(parent):
110        yield read_simple_property(item)
111
112
113def uniq(vals, kmap=lambda x:x):
114    ''' Remove all duplicates from vals, while preserving order. kmap must be a
115    callable that returns a hashable value for every item in vals '''
116    vals = vals or ()
117    lvals = (kmap(x) for x in vals)
118    seen = set()
119    seen_add = seen.add
120    return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k))
121
122
123def multiple_sequences(expr, root):
124    # Get all values for sequence elements matching expr, ensuring the returned
125    # list contains distinct non-null elements preserving their order.
126    ans = []
127    for item in XPath(expr)(root):
128        ans += list(read_sequence(item))
129    return list(filter(None, uniq(ans)))
130
131
132def first_alt(expr, root):
133    # The first element matching expr, assumes that the element contains a
134    # language alternate array
135    for item in XPath(expr)(root):
136        q = read_simple_property(read_lang_alt(item))
137        if q:
138            return q
139
140
141def first_simple(expr, root):
142    # The value for the first occurrence of an element matching expr (assumes
143    # simple property)
144    for item in XPath(expr)(root):
145        q = read_simple_property(item)
146        if q:
147            return q
148
149
150def first_sequence(expr, root):
151    # The first item in a sequence
152    for item in XPath(expr)(root):
153        for ans in read_sequence(item):
154            return ans
155
156
157def read_series(root):
158    for item in XPath('//calibre:series')(root):
159        val = XPath('descendant::rdf:value')(item)
160        if val:
161            series = val[0].text
162            if series and series.strip():
163                series_index = 1.0
164                for si in XPath('descendant::calibreSI:series_index')(item):
165                    try:
166                        series_index = float(si.text)
167                    except (TypeError, ValueError):
168                        continue
169                    else:
170                        break
171                return series, series_index
172    return None, None
173
174
175def read_user_metadata(mi, root):
176    from calibre.utils.config import from_json
177    from calibre.ebooks.metadata.book.json_codec import decode_is_multiple
178    fields = set()
179    for item in XPath('//calibre:custom_metadata')(root):
180        for li in XPath('./rdf:Bag/rdf:li')(item):
181            name = XPath('descendant::calibreCC:name')(li)
182            if name:
183                name = name[0].text
184                if name.startswith('#') and name not in fields:
185                    val = XPath('descendant::rdf:value')(li)
186                    if val:
187                        fm = val[0].text
188                        try:
189                            fm = json.loads(fm, object_hook=from_json)
190                            decode_is_multiple(fm)
191                            mi.set_user_metadata(name, fm)
192                            fields.add(name)
193                        except:
194                            prints('Failed to read user metadata:', name)
195                            import traceback
196                            traceback.print_exc()
197
198
199def read_xmp_identifers(parent):
200    ''' For example:
201    <rdf:li rdf:parseType="Resource"><xmpidq:Scheme>URL</xmp:idq><rdf:value>http://foo.com</rdf:value></rdf:li>
202    or the longer form:
203    <rdf:li><rdf:Description><xmpidq:Scheme>URL</xmp:idq><rdf:value>http://foo.com</rdf:value></rdf:Description></rdf:li>
204    '''
205    for li in XPath('./rdf:Bag/rdf:li')(parent):
206        is_resource = li.attrib.get(expand('rdf:parseType'), None) == 'Resource'
207        is_resource = is_resource or (len(li) == 1 and li[0].tag == expand('rdf:Description'))
208        if not is_resource:
209            yield None, li.text or ''
210        value = XPath('descendant::rdf:value')(li)
211        if not value:
212            continue
213        value = value[0].text or ''
214        scheme = XPath('descendant::xmpidq:Scheme')(li)
215        if not scheme:
216            yield None, value
217        else:
218            yield scheme[0].text or '', value
219
220
221def safe_parse_date(raw):
222    if raw:
223        try:
224            return parse_date(raw)
225        except Exception:
226            pass
227
228
229def more_recent(one, two):
230    if one is None:
231        return two
232    if two is None:
233        return one
234    try:
235        return max(one, two)
236    except Exception:
237        return one
238
239
240def metadata_from_xmp_packet(raw_bytes):
241    root = parse_xmp_packet(raw_bytes)
242    mi = Metadata(_('Unknown'))
243    title = first_alt('//dc:title', root)
244    if title:
245        if title.startswith(r'\376\377'):
246            # corrupted XMP packet generated by Nitro PDF. See
247            # https://bugs.launchpad.net/calibre/+bug/1541981
248            raise ValueError('Corrupted XMP metadata packet detected, probably generated by Nitro PDF')
249        mi.title = title
250    authors = multiple_sequences('//dc:creator', root)
251    if authors:
252        mi.authors = [au for aus in authors for au in string_to_authors(aus)]
253    tags = multiple_sequences('//dc:subject', root) or multiple_sequences('//pdf:Keywords', root)
254    if tags:
255        mi.tags = tags
256    comments = first_alt('//dc:description', root)
257    if comments:
258        mi.comments = comments
259    publishers = multiple_sequences('//dc:publisher', root)
260    if publishers:
261        mi.publisher = publishers[0]
262    try:
263        pubdate = parse_date(first_sequence('//dc:date', root) or first_simple('//xmp:CreateDate', root), assume_utc=False)
264    except:
265        pass
266    else:
267        mi.pubdate = pubdate
268    bkp = first_simple('//xmp:CreatorTool', root)
269    if bkp:
270        mi.book_producer = bkp
271    md = safe_parse_date(first_simple('//xmp:MetadataDate', root))
272    mod = safe_parse_date(first_simple('//xmp:ModifyDate', root))
273    fd = more_recent(md, mod)
274    if fd is not None:
275        mi.metadata_date = fd
276    rating = first_simple('//calibre:rating', root)
277    if rating is not None:
278        try:
279            rating = float(rating)
280            if 0 <= rating <= 10:
281                mi.rating = rating
282        except (ValueError, TypeError):
283            pass
284    series, series_index = read_series(root)
285    if series:
286        mi.series, mi.series_index = series, series_index
287    for x in ('title_sort', 'author_sort'):
288        for elem in XPath('//calibre:' + x)(root):
289            val = read_simple_property(elem)
290            if val:
291                setattr(mi, x, val)
292                break
293    for x in ('author_link_map', 'user_categories'):
294        val = first_simple('//calibre:'+x, root)
295        if val:
296            try:
297                setattr(mi, x, json.loads(val))
298            except:
299                pass
300
301    languages = multiple_sequences('//dc:language', root)
302    if languages:
303        languages = list(filter(None, map(canonicalize_lang, languages)))
304        if languages:
305            mi.languages = languages
306
307    identifiers = {}
308    for xmpid in XPath('//xmp:Identifier')(root):
309        for scheme, value in read_xmp_identifers(xmpid):
310            if scheme and value:
311                identifiers[scheme.lower()] = value
312
313    for namespace in ('prism', 'pdfx'):
314        for scheme in KNOWN_ID_SCHEMES:
315            if scheme not in identifiers:
316                val = first_simple('//%s:%s' % (namespace, scheme), root)
317                scheme = scheme.lower()
318                if scheme == 'isbn':
319                    val = check_isbn(val)
320                elif scheme == 'doi':
321                    val = check_doi(val)
322                if val:
323                    identifiers[scheme] = val
324
325    # Check Dublin Core for recognizable identifier types
326    for scheme, check_func in iteritems({'doi':check_doi, 'isbn':check_isbn}):
327        if scheme not in identifiers:
328            val = check_func(first_simple('//dc:identifier', root))
329            if val:
330                identifiers['doi'] = val
331
332    if identifiers:
333        mi.set_identifiers(identifiers)
334
335    read_user_metadata(mi, root)
336
337    return mi
338
339
340def consolidate_metadata(info_mi, info):
341    ''' When both the PDF Info dict and XMP metadata are present, prefer the xmp
342    metadata unless the Info ModDate is never than the XMP MetadataDate. This
343    is the algorithm recommended by the PDF spec. '''
344    try:
345        raw = info['xmp_metadata'].rstrip()
346        if not raw:
347            return info_mi
348        xmp_mi = metadata_from_xmp_packet(raw)
349    except Exception:
350        import traceback
351        traceback.print_exc()
352        return info_mi
353    info_title, info_authors, info_tags = info_mi.title or _('Unknown'), list(info_mi.authors or ()), list(info_mi.tags or ())
354    info_mi.smart_update(xmp_mi, replace_metadata=True)
355    prefer_info = False
356    if 'ModDate' in info and hasattr(xmp_mi, 'metadata_date'):
357        try:
358            info_date = parse_date(info['ModDate'])
359        except Exception:
360            pass
361        else:
362            prefer_info = info_date > xmp_mi.metadata_date
363    if prefer_info:
364        info_mi.title, info_mi.authors, info_mi.tags = info_title, info_authors, info_tags
365    else:
366        # We'll use the xmp tags/authors but fallback to the info ones if the
367        # xmp does not have tags/authors. smart_update() should have taken care of
368        # the rest
369        info_mi.authors, info_mi.tags = (info_authors if xmp_mi.is_null('authors') else xmp_mi.authors), xmp_mi.tags or info_tags
370    return info_mi
371
372
373def nsmap(*args):
374    return {x:NS_MAP[x] for x in args}
375
376
377def create_simple_property(parent, tag, value):
378    e = parent.makeelement(expand(tag))
379    parent.append(e)
380    e.text = value
381
382
383def create_alt_property(parent, tag, value):
384    e = parent.makeelement(expand(tag))
385    parent.append(e)
386    alt = e.makeelement(expand('rdf:Alt'))
387    e.append(alt)
388    li = alt.makeelement(expand('rdf:li'))
389    alt.append(li)
390    li.set(expand('xml:lang'), 'x-default')
391    li.text = value
392
393
394def create_sequence_property(parent, tag, val, ordered=True):
395    e = parent.makeelement(expand(tag))
396    parent.append(e)
397    seq = e.makeelement(expand('rdf:' + ('Seq' if ordered else 'Bag')))
398    e.append(seq)
399    for x in val:
400        li = seq.makeelement(expand('rdf:li'))
401        li.text = x
402        seq.append(li)
403
404
405def create_identifiers(xmp, identifiers):
406    xmpid = xmp.makeelement(expand('xmp:Identifier'))
407    xmp.append(xmpid)
408    bag = xmpid.makeelement(expand('rdf:Bag'))
409    xmpid.append(bag)
410    for scheme, value in iteritems(identifiers):
411        li = bag.makeelement(expand('rdf:li'))
412        li.set(expand('rdf:parseType'), 'Resource')
413        bag.append(li)
414        s = li.makeelement(expand('xmpidq:Scheme'))
415        s.text = scheme
416        li.append(s)
417        val = li.makeelement(expand('rdf:value'))
418        li.append(val)
419        val.text = value
420
421
422def create_series(calibre, series, series_index):
423    s = calibre.makeelement(expand('calibre:series'))
424    s.set(expand('rdf:parseType'), 'Resource')
425    calibre.append(s)
426    val = s.makeelement(expand('rdf:value'))
427    s.append(val)
428    val.text = series
429    try:
430        series_index = float(series_index)
431    except (TypeError, ValueError):
432        series_index = 1.0
433    si = s.makeelement(expand('calibreSI:series_index'))
434    si.text = '%.2f' % series_index
435    s.append(si)
436
437
438def create_user_metadata(calibre, all_user_metadata):
439    from calibre.utils.config import to_json
440    from calibre.ebooks.metadata.book.json_codec import object_to_unicode, encode_is_multiple
441
442    s = calibre.makeelement(expand('calibre:custom_metadata'))
443    calibre.append(s)
444    bag = s.makeelement(expand('rdf:Bag'))
445    s.append(bag)
446    for name, fm in iteritems(all_user_metadata):
447        try:
448            fm = copy.copy(fm)
449            encode_is_multiple(fm)
450            fm = object_to_unicode(fm)
451            fm = json.dumps(fm, default=to_json, ensure_ascii=False)
452        except:
453            prints('Failed to write user metadata:', name)
454            import traceback
455            traceback.print_exc()
456            continue
457        li = bag.makeelement(expand('rdf:li'))
458        li.set(expand('rdf:parseType'), 'Resource')
459        bag.append(li)
460        n = li.makeelement(expand('calibreCC:name'))
461        li.append(n)
462        n.text = name
463        val = li.makeelement(expand('rdf:value'))
464        val.text = fm
465        li.append(val)
466
467
468def metadata_to_xmp_packet(mi):
469    A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
470    R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
471    root = A.xmpmeta(R.RDF)
472    rdf = root[0]
473    dc = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('dc'))
474    dc.set(expand('rdf:about'), '')
475    rdf.append(dc)
476    for prop, tag in iteritems({'title':'dc:title', 'comments':'dc:description'}):
477        val = mi.get(prop) or ''
478        create_alt_property(dc, tag, val)
479    for prop, (tag, ordered) in iteritems({
480        'authors':('dc:creator', True), 'tags':('dc:subject', False), 'publisher':('dc:publisher', False),
481    }):
482        val = mi.get(prop) or ()
483        if isinstance(val, string_or_bytes):
484            val = [val]
485        create_sequence_property(dc, tag, val, ordered)
486    if not mi.is_null('pubdate'):
487        create_sequence_property(dc, 'dc:date', [isoformat(mi.pubdate, as_utc=False)])  # Adobe spec recommends local time
488    if not mi.is_null('languages'):
489        langs = list(filter(None, map(lambda x:lang_as_iso639_1(x) or canonicalize_lang(x), mi.languages)))
490        if langs:
491            create_sequence_property(dc, 'dc:language', langs, ordered=False)
492
493    xmp = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('xmp', 'xmpidq'))
494    xmp.set(expand('rdf:about'), '')
495    rdf.append(xmp)
496    extra_ids = {}
497    for x in ('prism', 'pdfx'):
498        p = extra_ids[x] = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap(x))
499        p.set(expand('rdf:about'), '')
500        rdf.append(p)
501
502    identifiers = mi.get_identifiers()
503    if identifiers:
504        create_identifiers(xmp, identifiers)
505        for scheme, val in iteritems(identifiers):
506            if scheme in {'isbn', 'doi'}:
507                for prefix, parent in iteritems(extra_ids):
508                    ie = parent.makeelement(expand('%s:%s'%(prefix, scheme)))
509                    ie.text = val
510                    parent.append(ie)
511
512    d = xmp.makeelement(expand('xmp:MetadataDate'))
513    d.text = isoformat(now(), as_utc=False)
514    xmp.append(d)
515
516    calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))
517    calibre.set(expand('rdf:about'), '')
518    rdf.append(calibre)
519    if not mi.is_null('rating'):
520        try:
521            r = float(mi.rating)
522        except (TypeError, ValueError):
523            pass
524        else:
525            create_simple_property(calibre, 'calibre:rating', '%g' % r)
526    if not mi.is_null('series'):
527        create_series(calibre, mi.series, mi.series_index)
528    if not mi.is_null('timestamp'):
529        create_simple_property(calibre, 'calibre:timestamp', isoformat(mi.timestamp, as_utc=False))
530    for x in ('author_link_map', 'user_categories'):
531        val = getattr(mi, x, None)
532        if val:
533            create_simple_property(calibre, 'calibre:'+x, dump_dict(val))
534
535    for x in ('title_sort', 'author_sort'):
536        if not mi.is_null(x):
537            create_simple_property(calibre, 'calibre:'+x, getattr(mi, x))
538
539    all_user_metadata = mi.get_all_user_metadata(True)
540    if all_user_metadata:
541        create_user_metadata(calibre, all_user_metadata)
542    return serialize_xmp_packet(root)
543
544
545def find_used_namespaces(elem):
546    getns = lambda x: (x.partition('}')[0][1:] if '}' in x else None)
547    ans = {getns(x) for x in list(elem.attrib) + [elem.tag]}
548    for child in elem.iterchildren(etree.Element):
549        ans |= find_used_namespaces(child)
550    return ans
551
552
553def find_preferred_prefix(namespace, elems):
554    for elem in elems:
555        ans = {v:k for k, v in iteritems(elem.nsmap)}.get(namespace, None)
556        if ans is not None:
557            return ans
558        return find_preferred_prefix(namespace, elem.iterchildren(etree.Element))
559
560
561def find_nsmap(elems):
562    used_namespaces = set()
563    for elem in elems:
564        used_namespaces |= find_used_namespaces(elem)
565    ans = {}
566    used_namespaces -= {NS_MAP['xml'], NS_MAP['x'], None, NS_MAP['rdf']}
567    rmap = {v:k for k, v in iteritems(NS_MAP)}
568    i = 0
569    for ns in used_namespaces:
570        if ns in rmap:
571            ans[rmap[ns]] = ns
572        else:
573            pp = find_preferred_prefix(ns, elems)
574            if pp and pp not in ans:
575                ans[pp] = ns
576            else:
577                i += 1
578                ans['ns%d' % i] = ns
579    return ans
580
581
582def clone_into(parent, elem):
583    ' Clone the element, assuming that all namespace declarations are present in parent '
584    clone = parent.makeelement(elem.tag)
585    parent.append(clone)
586    if elem.text and not elem.text.isspace():
587        clone.text = elem.text
588    if elem.tail and not elem.tail.isspace():
589        clone.tail = elem.tail
590    clone.attrib.update(elem.attrib)
591    for child in elem.iterchildren(etree.Element):
592        clone_into(clone, child)
593
594
595def merge_xmp_packet(old, new):
596    ''' Merge metadata present in the old packet that is not present in the new
597    one into the new one. Assumes the new packet was generated by
598    metadata_to_xmp_packet() '''
599    old, new = parse_xmp_packet(old), parse_xmp_packet(new)
600    # As per the adobe spec all metadata items have to be present inside top-level rdf:Description containers
601    item_xpath = XPath('//rdf:RDF/rdf:Description/*')
602
603    # First remove all data fields that metadata_to_xmp_packet() knowns about,
604    # since either they will have been set or if not present, imply they have
605    # been cleared
606    defined_tags = {expand(prefix + ':' + scheme) for prefix in ('prism', 'pdfx') for scheme in KNOWN_ID_SCHEMES}
607    defined_tags |= {expand('dc:' + x) for x in ('identifier', 'title', 'creator', 'date', 'description', 'language', 'publisher', 'subject')}
608    defined_tags |= {expand('xmp:' + x) for x in ('MetadataDate', 'Identifier')}
609    # For redundancy also remove all fields explicitly set in the new packet
610    defined_tags |= {x.tag for x in item_xpath(new)}
611    calibrens = '{%s}' % NS_MAP['calibre']
612    for elem in item_xpath(old):
613        if elem.tag in defined_tags or (elem.tag and elem.tag.startswith(calibrens)):
614            elem.getparent().remove(elem)
615
616    # Group all items into groups based on their namespaces
617    groups = defaultdict(list)
618    for item in item_xpath(new):
619        ns = item.nsmap[item.prefix]
620        groups[ns].append(item)
621
622    for item in item_xpath(old):
623        ns = item.nsmap[item.prefix]
624        groups[ns].append(item)
625
626    A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
627    R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
628    root = A.xmpmeta(R.RDF)
629    rdf = root[0]
630
631    for namespace in sorted(groups, key=lambda x:{NS_MAP['dc']:'a', NS_MAP['xmp']:'b', NS_MAP['calibre']:'c'}.get(x, 'z'+x)):
632        items = groups[namespace]
633        desc = rdf.makeelement(expand('rdf:Description'), nsmap=find_nsmap(items))
634        desc.set(expand('rdf:about'), '')
635        rdf.append(desc)
636        for item in items:
637            clone_into(desc, item)
638
639    return serialize_xmp_packet(root)
640
641
642if __name__ == '__main__':
643    from calibre.utils.podofo import get_xmp_metadata
644    xmp_packet = get_xmp_metadata(sys.argv[-1])
645    mi = metadata_from_xmp_packet(xmp_packet)
646    np = metadata_to_xmp_packet(mi)
647    print(merge_xmp_packet(xmp_packet, np))
648