1#!/usr/local/bin/python3.8
2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
3
4
5__license__   = 'GPL v3'
6__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
7__docformat__ = 'restructuredtext en'
8
9import os, time
10from datetime import date
11
12from calibre import prints, guess_type, isbytestring, fsync
13from calibre.devices.errors import DeviceError
14from calibre.devices.usbms.driver import debug_print
15from calibre.constants import DEBUG, preferred_encoding
16from calibre.ebooks.chardet import xml_to_unicode
17from calibre.ebooks.metadata import authors_to_string, title_sort, \
18                                    authors_to_sort_string
19from polyglot.binary import from_base64_bytes
20
21'''
22cacheExt.xml
23
24Periodical identifier sample from a PRS-650:
25
26<?xml version="1.0" encoding="UTF-8"?>
27<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
28    <text conformsTo="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0" periodicalName="The Atlantic"
29    description="Current affairs and politics focused on the US" publicationDate="Tue, 19 Oct 2010 00:00:00 GMT"
30    path="database/media/books/calibre/Atlantic [Mon, 18 Oct 2010], The - calibre_1701.epub">
31        <thumbnail width="167" height="217">main_thumbnail.jpg</thumbnail>
32    </text>
33</cacheExt>
34
35'''
36
37# Utility functions {{{
38EMPTY_CARD_CACHE = b'''\
39<?xml version="1.0" encoding="UTF-8"?>
40<cache xmlns="http://www.kinoma.com/FskCache/1">
41</cache>
42'''
43
44EMPTY_EXT_CACHE = b'''\
45<?xml version="1.0" encoding="UTF-8"?>
46<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
47</cacheExt>
48'''
49
50MIME_MAP   = {
51                "lrf" : "application/x-sony-bbeb",
52                'lrx' : 'application/x-sony-bbeb',
53                "rtf" : "application/rtf",
54                "pdf" : "application/pdf",
55                "txt" : "text/plain" ,
56                'epub': 'application/epub+zip',
57              }
58
59DAY_MAP   = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6)
60MONTH_MAP = dict(Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12)
61INVERSE_DAY_MAP = dict(zip(DAY_MAP.values(), DAY_MAP.keys()))
62INVERSE_MONTH_MAP = dict(zip(MONTH_MAP.values(), MONTH_MAP.keys()))
63
64
65def strptime(src):
66    src = src.strip()
67    src = src.split()
68    src[0] = str(DAY_MAP[src[0][:-1]])+','
69    src[2] = str(MONTH_MAP[src[2]])
70    return time.strptime(' '.join(src), '%w, %d %m %Y %H:%M:%S %Z')
71
72
73def strftime(epoch, zone=time.localtime):
74    try:
75        src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone(epoch)).split()
76    except:
77        src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone()).split()
78
79    src[0] = INVERSE_DAY_MAP[int(src[0][:-1])]+','
80    src[2] = INVERSE_MONTH_MAP[int(src[2])]
81    return ' '.join(src)
82
83
84def uuid():
85    from uuid import uuid4
86    return str(uuid4()).replace('-', '', 1).upper()
87
88# }}}
89
90
91class XMLCache:
92
93    def __init__(self, paths, ext_paths, prefixes, use_author_sort):
94        from calibre.utils.xml_parse import safe_xml_fromstring
95
96        if DEBUG:
97            debug_print('Building XMLCache...', paths)
98        self.paths = paths
99        self.prefixes = prefixes
100        self.use_author_sort = use_author_sort
101
102        # Parse XML files {{{
103        self.roots = {}
104        for source_id, path in paths.items():
105            if source_id == 0:
106                if not os.path.exists(path):
107                    raise DeviceError(('The SONY XML cache %r does not exist. Try'
108                        ' disconnecting and reconnecting your reader.')%repr(path))
109                with lopen(path, 'rb') as f:
110                    raw = f.read()
111            else:
112                raw = EMPTY_CARD_CACHE
113                if os.access(path, os.R_OK):
114                    with lopen(path, 'rb') as f:
115                        raw = f.read()
116
117            self.roots[source_id] = safe_xml_fromstring(
118                xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0]
119            )
120            if self.roots[source_id] is None:
121                raise Exception(('The SONY database at %r is corrupted. Try '
122                        ' disconnecting and reconnecting your reader.')%path)
123
124        self.ext_paths, self.ext_roots = {}, {}
125        for source_id, path in ext_paths.items():
126            if not os.path.exists(path):
127                try:
128                    with lopen(path, 'wb') as f:
129                        f.write(EMPTY_EXT_CACHE)
130                        fsync(f)
131                except:
132                    pass
133            if os.access(path, os.W_OK):
134                try:
135                    with lopen(path, 'rb') as f:
136                        self.ext_roots[source_id] = safe_xml_fromstring(
137                            xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0]
138                        )
139                        self.ext_paths[source_id] = path
140                except:
141                    pass
142
143        # }}}
144
145        recs = self.roots[0].xpath('//*[local-name()="records"]')
146        if not recs:
147            raise DeviceError('The SONY XML database is corrupted (no'
148                    ' <records>). Try disconnecting an reconnecting'
149                    ' your reader.')
150        self.record_roots = {}
151        self.record_roots.update(self.roots)
152        self.record_roots[0] = recs[0]
153
154        self.detect_namespaces()
155        debug_print('Done building XMLCache...')
156
157    # Playlist management {{{
158    def purge_broken_playlist_items(self, root):
159        id_map = self.build_id_map(root)
160        for pl in root.xpath('//*[local-name()="playlist"]'):
161            seen = set()
162            for item in list(pl):
163                id_ = item.get('id', None)
164                if id_ is None or id_ in seen or id_map.get(id_, None) is None:
165                    if DEBUG:
166                        if id_ is None:
167                            cause = 'invalid id'
168                        elif id_ in seen:
169                            cause = 'duplicate item'
170                        else:
171                            cause = 'id not found'
172                        prints('Purging broken playlist item:',
173                                id_, 'from playlist:', pl.get('title', None),
174                                'because:', cause)
175                    item.getparent().remove(item)
176                    continue
177                seen.add(id_)
178
179    def prune_empty_playlists(self):
180        for i, root in self.record_roots.items():
181            self.purge_broken_playlist_items(root)
182            for playlist in root.xpath('//*[local-name()="playlist"]'):
183                if len(playlist) == 0 or not playlist.get('title', None):
184                    if DEBUG:
185                        debug_print('Removing playlist id:', playlist.get('id', None),
186                                playlist.get('title', None))
187                    playlist.getparent().remove(playlist)
188
189    def ensure_unique_playlist_titles(self):
190        for i, root in self.record_roots.items():
191            seen = set()
192            for playlist in root.xpath('//*[local-name()="playlist"]'):
193                title = playlist.get('title', None)
194                if title is None:
195                    title = _('Unnamed')
196                    playlist.set('title', title)
197                if title in seen:
198                    for i in range(2, 1000):
199                        if title+str(i) not in seen:
200                            title = title+str(i)
201                            playlist.set('title', title)
202                            seen.add(title)
203                            break
204                else:
205                    seen.add(title)
206
207    def build_id_playlist_map(self, bl_index):
208        '''
209        Return a map of the collections in books: {lpaths: [collection names]}
210        '''
211        debug_print('Start build_id_playlist_map')
212        self.ensure_unique_playlist_titles()
213        self.prune_empty_playlists()
214        debug_print('after cleaning playlists')
215        root = self.record_roots[bl_index]
216        if root is None:
217            return
218        id_map = self.build_id_map(root)
219        playlist_map = {}
220        # foreach playlist, get the lpaths for the ids in it, then add to dict
221        for playlist in root.xpath('//*[local-name()="playlist"]'):
222            name = playlist.get('title')
223            if name is None:
224                debug_print('build_id_playlist_map: unnamed playlist!')
225                continue
226            for item in playlist:
227                # translate each id into its lpath
228                id_ = item.get('id', None)
229                if id_ is None:
230                    debug_print('build_id_playlist_map: id_ is None!')
231                    continue
232                bk = id_map.get(id_, None)
233                if bk is None:
234                    debug_print('build_id_playlist_map: book is None!', id_)
235                    continue
236                lpath = bk.get('path', None)
237                if lpath is None:
238                    debug_print('build_id_playlist_map: lpath is None!', id_)
239                    continue
240                if lpath not in playlist_map:
241                    playlist_map[lpath] = []
242                playlist_map[lpath].append(name)
243        debug_print('Finish build_id_playlist_map. Found', len(playlist_map))
244        return playlist_map
245
246    def reset_existing_playlists_map(self):
247        '''
248        Call this method before calling get_or_create_playlist in the context of
249        a given job. Call it again after deleting any playlists. The current
250        implementation adds all new playlists before deleting any, so that
251        constraint is respected.
252        '''
253        self._playlist_to_playlist_id_map = {}
254
255    def get_or_create_playlist(self, bl_idx, title):
256        # maintain a private map of playlists to their ids. Don't check if it
257        # exists, because reset_existing_playlist_map must be called before it
258        # is used to ensure that deleted playlists are taken into account
259        root = self.record_roots[bl_idx]
260        if bl_idx not in self._playlist_to_playlist_id_map:
261            self._playlist_to_playlist_id_map[bl_idx] = {}
262            for playlist in root.xpath('//*[local-name()="playlist"]'):
263                pl_title = playlist.get('title', None)
264                if pl_title is not None:
265                    self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist
266        if title in self._playlist_to_playlist_id_map[bl_idx]:
267            return self._playlist_to_playlist_id_map[bl_idx][title]
268        debug_print('Creating playlist:', title)
269        ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
270                nsmap=root.nsmap, attrib={
271                    'uuid' : uuid(),
272                    'title': title,
273                    'id'   : str(self.max_id(root)+1),
274                    'sourceid': '1'
275                    })
276        root.append(ans)
277        self._playlist_to_playlist_id_map[bl_idx][title] = ans
278        return ans
279    # }}}
280
281    def fix_ids(self):  # {{{
282        debug_print('Running fix_ids()')
283
284        def ensure_numeric_ids(root):
285            idmap = {}
286            for x in root.xpath('child::*[@id]'):
287                id_ = x.get('id')
288                try:
289                    id_ = int(id_)
290                except:
291                    x.set('id', '-1')
292                    idmap[id_] = '-1'
293
294            if DEBUG and idmap:
295                debug_print('Found non numeric ids:')
296                debug_print(list(idmap.keys()))
297            return idmap
298
299        def remap_playlist_references(root, idmap):
300            for playlist in root.xpath('//*[local-name()="playlist"]'):
301                for item in playlist.xpath(
302                        'descendant::*[@id and local-name()="item"]'):
303                    id_ = item.get('id')
304                    if id_ in idmap:
305                        item.set('id', idmap[id_])
306                        if DEBUG:
307                            debug_print('Remapping id %s to %s'%(id_, idmap[id_]))
308
309        def ensure_media_xml_base_ids(root):
310            for num, tag in enumerate(('library', 'watchSpecial')):
311                for x in root.xpath('//*[local-name()="%s"]'%tag):
312                    x.set('id', str(num))
313
314        def rebase_ids(root, base, sourceid, pl_sourceid):
315            'Rebase all ids and also make them consecutive'
316            for item in root.xpath('//*[@sourceid]'):
317                sid = pl_sourceid if item.tag.endswith('playlist') else sourceid
318                item.set('sourceid', str(sid))
319            # Only rebase ids of nodes that are immediate children of the
320            # record root (that way playlist/itemnodes are unaffected
321            items = root.xpath('child::*[@id]')
322            items.sort(key=lambda x: int(x.get('id')))
323            idmap = {}
324            for i, item in enumerate(items):
325                old = int(item.get('id'))
326                new = base + i
327                if old != new:
328                    item.set('id', str(new))
329                    idmap[str(old)] = str(new)
330            return idmap
331
332        self.prune_empty_playlists()
333
334        for i in sorted(self.roots.keys()):
335            root = self.record_roots[i]
336            if i == 0:
337                ensure_media_xml_base_ids(root)
338
339            idmap = ensure_numeric_ids(root)
340            if len(idmap) > 0:
341                debug_print('fix_ids: found some non-numeric ids')
342                remap_playlist_references(root, idmap)
343            if i == 0:
344                sourceid, playlist_sid = 1, 0
345                base = 0
346            else:
347                previous = i-1
348                if previous not in self.roots:
349                    previous = 0
350                max_id = self.max_id(self.roots[previous])
351                sourceid = playlist_sid = max_id + 1
352                base = max_id + 2
353            idmap = rebase_ids(root, base, sourceid, playlist_sid)
354            remap_playlist_references(root, idmap)
355
356        last_bl = max(self.roots.keys())
357        max_id = self.max_id(self.roots[last_bl])
358        self.roots[0].set('nextID', str(max_id+1))
359        debug_print('Finished running fix_ids()')
360
361    # }}}
362
363    # Update JSON from XML {{{
364    def update_booklist(self, bl, bl_index):
365        if bl_index not in self.record_roots:
366            return
367        debug_print('Updating JSON cache:', bl_index)
368        playlist_map = self.build_id_playlist_map(bl_index)
369        root = self.record_roots[bl_index]
370        lpath_map = self.build_lpath_map(root)
371        for book in bl:
372            record = lpath_map.get(book.lpath, None)
373            if record is not None:
374                for thumbnail in record.xpath(
375                        'descendant::*[local-name()="thumbnail"]'):
376                    for img in thumbnail.xpath(
377                            'descendant::*[local-name()="jpeg"]|'
378                            'descendant::*[local-name()="png"]'):
379                        if img.text:
380                            try:
381                                raw = from_base64_bytes(img.text.strip())
382                            except Exception:
383                                continue
384                            book.thumbnail = raw
385                            break
386                    break
387                book.device_collections = playlist_map.get(book.lpath, [])
388        debug_print('Finished updating JSON cache:', bl_index)
389
390    # }}}
391
392    # Update XML from JSON {{{
393    def update(self, booklists, collections_attributes, plugboard):
394        debug_print('Starting update', collections_attributes)
395        use_tz_var = False
396        for i, booklist in booklists.items():
397            playlist_map = self.build_id_playlist_map(i)
398            debug_print('Updating XML Cache:', i)
399            root = self.record_roots[i]
400            lpath_map = self.build_lpath_map(root)
401            ext_root = self.ext_roots[i] if i in self.ext_roots else None
402            ext_lpath_map = None
403            if ext_root is not None:
404                ext_lpath_map = self.build_lpath_map(ext_root)
405            gtz_count = ltz_count = 0
406            use_tz_var = False
407            for book in booklist:
408                path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
409                record = lpath_map.get(book.lpath, None)
410                created = False
411                if record is None:
412                    created = True
413                    record = self.create_text_record(root, i, book.lpath)
414                if plugboard is not None:
415                    newmi = book.deepcopy_metadata()
416                    newmi.template_to_attribute(book, plugboard)
417                    newmi.set('_new_book', getattr(book, '_new_book', False))
418                    book.set('_pb_title_sort',
419                             newmi.get('title_sort', newmi.get('title', None)))
420                    book.set('_pb_author_sort', newmi.get('author_sort', ''))
421                else:
422                    newmi = book
423                (gtz_count, ltz_count, use_tz_var) = \
424                    self.update_text_record(record, newmi, path, i,
425                                            gtz_count, ltz_count, use_tz_var)
426                # Ensure the collections in the XML database are recorded for
427                # this book
428                if book.device_collections is None:
429                    book.device_collections = []
430                book.device_collections = playlist_map.get(book.lpath, [])
431
432                if created and ext_root is not None and \
433                        ext_lpath_map.get(book.lpath, None) is None:
434                    ext_record = self.create_ext_text_record(ext_root, i,
435                            book.lpath, book.thumbnail)
436                    self.periodicalize_book(book, ext_record)
437
438            debug_print('Timezone votes: %d GMT, %d LTZ, use_tz_var=%s'%
439                                        (gtz_count, ltz_count, use_tz_var))
440            self.update_playlists(i, root, booklist, collections_attributes)
441        # Update the device collections because update playlist could have added
442        # some new ones.
443        debug_print('In update/ Starting refresh of device_collections')
444        for i, booklist in booklists.items():
445            playlist_map = self.build_id_playlist_map(i)
446            for book in booklist:
447                book.device_collections = playlist_map.get(book.lpath, [])
448        self.fix_ids()
449        debug_print('Finished update')
450
451    def is_sony_periodical(self, book):
452        if _('News') not in book.tags:
453            return False
454        if not book.lpath.lower().endswith('.epub'):
455            return False
456        if book.pubdate.date() < date(2010, 10, 17):
457            return False
458        return True
459
460    def periodicalize_book(self, book, record):
461        if not self.is_sony_periodical(book):
462            return
463        record.set('conformsTo',
464            "http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0")
465
466        record.set('description', '')
467
468        name = None
469        if '[' in book.title:
470            name = book.title.split('[')[0].strip()
471            if len(name) < 4:
472                name = None
473        if not name:
474            try:
475                name = [t for t in book.tags if t != _('News')][0]
476            except:
477                name = None
478
479        if not name:
480            name = book.title
481
482        record.set('periodicalName', name)
483
484        try:
485            pubdate = strftime(book.pubdate.utctimetuple(),
486                    zone=lambda x : x)
487            record.set('publicationDate', pubdate)
488        except:
489            pass
490
491    def rebuild_collections(self, booklist, bl_index):
492        if bl_index not in self.record_roots:
493            return
494        root = self.record_roots[bl_index]
495        self.update_playlists(bl_index, root, booklist, [])
496        self.fix_ids()
497
498    def update_playlists(self, bl_index, root, booklist, collections_attributes):
499        debug_print('Starting update_playlists', collections_attributes, bl_index)
500        self.reset_existing_playlists_map()
501        collections = booklist.get_collections(collections_attributes)
502        lpath_map = self.build_lpath_map(root)
503        debug_print('update_playlists: finished building maps')
504        for category, books in collections.items():
505            records = [lpath_map.get(b.lpath, None) for b in books]
506            # Remove any books that were not found, although this
507            # *should* never happen
508            if DEBUG and None in records:
509                debug_print('WARNING: Some elements in the JSON cache were not'
510                        ' found in the XML cache')
511            records = [x for x in records if x is not None]
512            # Ensure each book has an ID.
513            for rec in records:
514                if rec.get('id', None) is None:
515                    rec.set('id', str(self.max_id(root)+1))
516            ids = [x.get('id', None) for x in records]
517            # Given that we set the ids, there shouldn't be any None's. But
518            # better to be safe...
519            if None in ids:
520                debug_print('WARNING: Some <text> elements do not have ids')
521                ids = [x for x in ids if x is not None]
522
523            playlist = self.get_or_create_playlist(bl_index, category)
524            # Get the books currently in the playlist. We will need them to be
525            # sure to put back any books that were manually added.
526            playlist_ids = []
527            for item in playlist:
528                id_ = item.get('id', None)
529                if id_ is not None:
530                    playlist_ids.append(id_)
531            # Empty the playlist. We do this so that the playlist will have the
532            # order specified by get_collections
533            for item in list(playlist):
534                playlist.remove(item)
535
536            # Get a list of ids not known by get_collections
537            extra_ids = [x for x in playlist_ids if x not in ids]
538            # Rebuild the collection in the order specified by get_collections. Then
539            # add the ids that get_collections didn't know about.
540            for id_ in ids + extra_ids:
541                item = playlist.makeelement(
542                        '{%s}item'%self.namespaces[bl_index],
543                        nsmap=playlist.nsmap, attrib={'id':id_})
544                playlist.append(item)
545
546        # Delete playlist entries not in collections
547        for playlist in root.xpath('//*[local-name()="playlist"]'):
548            title = playlist.get('title', None)
549            if title not in collections:
550                if DEBUG:
551                    debug_print('Deleting playlist:', playlist.get('title', ''))
552                playlist.getparent().remove(playlist)
553                continue
554            books = collections[title]
555            records = [lpath_map.get(b.lpath, None) for b in books]
556            records = [x for x in records if x is not None]
557            ids = [x.get('id', None) for x in records]
558            ids = [x for x in ids if x is not None]
559            for item in list(playlist):
560                if item.get('id', None) not in ids:
561                    if DEBUG:
562                        debug_print('Deleting item:', item.get('id', ''),
563                                'from playlist:', playlist.get('title', ''))
564                    playlist.remove(item)
565        debug_print('Finishing update_playlists')
566
567    def create_text_record(self, root, bl_id, lpath):
568        namespace = self.namespaces[bl_id]
569        id_ = self.max_id(root)+1
570        attrib = {
571                'page':'0', 'part':'0','pageOffset':'0','scale':'0',
572                'id':str(id_), 'sourceid':'1', 'path':lpath}
573        ans = root.makeelement('{%s}text'%namespace, attrib=attrib, nsmap=root.nsmap)
574        root.append(ans)
575        return ans
576
577    def create_ext_text_record(self, root, bl_id, lpath, thumbnail):
578        namespace = root.nsmap[None]
579        attrib = {'path': lpath}
580        ans = root.makeelement('{%s}text'%namespace, attrib=attrib,
581                nsmap=root.nsmap)
582        ans.tail = '\n'
583        if len(root) > 0:
584            root[-1].tail = '\n\t'
585        else:
586            root.text = '\n\t'
587        root.append(ans)
588        if thumbnail and thumbnail[-1]:
589            ans.text = '\n' + '\t\t'
590            t = root.makeelement('{%s}thumbnail'%namespace,
591                attrib={'width':str(thumbnail[0]), 'height':str(thumbnail[1])},
592                nsmap=root.nsmap)
593            t.text = 'main_thumbnail.jpg'
594            ans.append(t)
595            t.tail = '\n\t'
596        return ans
597
598    def update_text_record(self, record, book, path, bl_index,
599                           gtz_count, ltz_count, use_tz_var):
600        '''
601        Update the Sony database from the book. This is done if the timestamp in
602        the db differs from the timestamp on the file.
603        '''
604
605        # It seems that a Sony device can sometimes know what timezone it is in,
606        # and apparently converts the dates to GMT when it writes them to its
607        # DB. We can detect that a device is timezone-aware because there is a
608        # 'tz' variable in the Sony DB, which we can set to "0" to tell the
609        # device to ignore its own timezone when comparing mtime to the date in
610        # the DB.
611
612        # Unfortunately, if there is no tz variable in the DB, then we can't
613        # tell when the device applies a timezone conversion. We use a horrible
614        # heuristic to work around this problem. First, set dates only for new
615        # books, trying to avoid upsetting the sony. Second, voting: if a book
616        # is not new, compare its Sony DB date against localtime and gmtime.
617        # Count the matches. When we must set a date, use the one with the most
618        # matches. Use localtime if the case of a tie, and hope it is right.
619        try:
620            timestamp = os.path.getmtime(path)
621        except:
622            debug_print('Failed to get timestamp for:', path)
623            timestamp = time.time()
624        rec_date = record.get('date', None)
625
626        def clean(x):
627            if isbytestring(x):
628                x = x.decode(preferred_encoding, 'replace')
629            x.replace('\0', '')
630            return x
631
632        def record_set(k, v):
633            try:
634                record.set(k, clean(v))
635            except:
636                # v is not suitable for XML, ignore
637                pass
638
639        if not getattr(book, '_new_book', False):  # book is not new
640            if record.get('tz', None) is not None:
641                use_tz_var = True
642            if strftime(timestamp, zone=time.gmtime) == rec_date:
643                gtz_count += 1
644            elif strftime(timestamp, zone=time.localtime) == rec_date:
645                ltz_count += 1
646        else:  # book is new. Set the time using the current votes
647            if use_tz_var:
648                tz = time.localtime
649                record.set('tz', '0')
650                debug_print("Use localtime TZ and tz='0' for new book", book.lpath)
651            elif ltz_count >= gtz_count:
652                tz = time.localtime
653                debug_print("Use localtime TZ for new book", book.lpath)
654            else:
655                tz = time.gmtime
656                debug_print("Use GMT TZ for new book", book.lpath)
657            date = strftime(timestamp, zone=tz)
658            record.set('date', clean(date))
659        try:
660            record.set('size', clean(str(os.stat(path).st_size)))
661        except:
662            record.set('size', '0')
663        title = book.title if book.title else _('Unknown')
664        record_set('title', title)
665        ts = book.title_sort
666        if not ts:
667            ts = title_sort(title)
668        record_set('titleSorter', ts)
669        if self.use_author_sort:
670            if book.author_sort:
671                aus = book.author_sort
672            else:
673                debug_print('Author_sort is None for book', book.lpath)
674                aus = authors_to_sort_string(book.authors)
675            record_set('author', aus)
676        else:
677            record_set('author', authors_to_string(book.authors))
678        ext = os.path.splitext(path)[1]
679        if ext:
680            ext = ext[1:].lower()
681            mime = MIME_MAP.get(ext, None)
682            if mime is None:
683                mime = guess_type('a.'+ext)[0]
684            if mime is not None:
685                record.set('mime', clean(mime))
686        if 'sourceid' not in record.attrib:
687            record.set('sourceid', '1')
688        if 'id' not in record.attrib:
689            num = self.max_id(record.getroottree().getroot())
690            record.set('id', str(num+1))
691        return (gtz_count, ltz_count, use_tz_var)
692    # }}}
693
694    # Writing the XML files {{{
695    def cleanup_whitespace(self, bl_index):
696        root = self.record_roots[bl_index]
697        level = 2 if bl_index == 0 else 1
698        if len(root) > 0:
699            root.text = '\n'+'\t'*level
700            for child in root:
701                child.tail = '\n'+'\t'*level
702                if len(child) > 0:
703                    child.text = '\n'+'\t'*(level+1)
704                    for gc in child:
705                        gc.tail = '\n'+'\t'*(level+1)
706                    next(child.iterchildren(reversed=True)).tail = '\n'+'\t'*level
707            next(root.iterchildren(reversed=True)).tail = '\n'+'\t'*(level-1)
708
709    def move_playlists_to_bottom(self):
710        for root in self.record_roots.values():
711            seen = []
712            for pl in root.xpath('//*[local-name()="playlist"]'):
713                pl.getparent().remove(pl)
714                seen.append(pl)
715            for pl in seen:
716                root.append(pl)
717
718    def write(self):
719        from lxml import etree
720
721        for i, path in self.paths.items():
722            self.move_playlists_to_bottom()
723            self.cleanup_whitespace(i)
724            raw = etree.tostring(self.roots[i], encoding='UTF-8',
725                    xml_declaration=True)
726            raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
727                    b'<?xml version="1.0" encoding="UTF-8"?>')
728            with lopen(path, 'wb') as f:
729                f.write(raw)
730                fsync(f)
731
732        for i, path in self.ext_paths.items():
733            try:
734                raw = etree.tostring(self.ext_roots[i], encoding='UTF-8',
735                    xml_declaration=True)
736            except:
737                continue
738            raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
739                    b'<?xml version="1.0" encoding="UTF-8"?>')
740            with lopen(path, 'wb') as f:
741                f.write(raw)
742                fsync(f)
743
744    # }}}
745
746    # Utility methods {{{
747
748    def build_lpath_map(self, root):
749        m = {}
750        for bk in root.xpath('//*[local-name()="text"]'):
751            m[bk.get('path')] = bk
752        return m
753
754    def build_id_map(self, root):
755        m = {}
756        for bk in root.xpath('//*[local-name()="text"]'):
757            m[bk.get('id')] = bk
758        return m
759
760    def book_by_lpath(self, lpath, root):
761        matches = root.xpath('//*[local-name()="text" and @path="%s"]'%lpath)
762        if matches:
763            return matches[0]
764
765    def max_id(self, root):
766        ans = -1
767        for x in root.xpath('//*[@id]'):
768            id_ = x.get('id')
769            try:
770                num = int(id_)
771                if num > ans:
772                    ans = num
773            except:
774                continue
775        return ans
776
777    def detect_namespaces(self):
778        self.nsmaps = {}
779        for i, root in self.roots.items():
780            self.nsmaps[i] = root.nsmap
781
782        self.namespaces = {}
783        for i in self.roots:
784            for c in ('library', 'text', 'image', 'playlist', 'thumbnail',
785                    'watchSpecial'):
786                matches = self.record_roots[i].xpath('//*[local-name()="%s"]'%c)
787                if matches:
788                    e = matches[0]
789                    self.namespaces[i] = e.nsmap[e.prefix]
790                    break
791            if i not in self.namespaces:
792                ns = self.nsmaps[i].get(None, None)
793                for prefix in self.nsmaps[i]:
794                    if prefix is not None:
795                        ns = self.nsmaps[i][prefix]
796                        break
797                self.namespaces[i] = ns
798
799    # }}}
800