2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
5__license__   = 'GPL v3'
6__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
7__docformat__ = 'restructuredtext en'
9import os, time
10from datetime import date
12from calibre import prints, guess_type, isbytestring, fsync
13from calibre.devices.errors import DeviceError
14from calibre.devices.usbms.driver import debug_print
15from calibre.constants import DEBUG, preferred_encoding
16from calibre.ebooks.chardet import xml_to_unicode
17from calibre.ebooks.metadata import authors_to_string, title_sort, \
18                                    authors_to_sort_string
19from polyglot.binary import from_base64_bytes
24Periodical identifier sample from a PRS-650:
26<?xml version="1.0" encoding="UTF-8"?>
27<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
28    <text conformsTo="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0" periodicalName="The Atlantic"
29    description="Current affairs and politics focused on the US" publicationDate="Tue, 19 Oct 2010 00:00:00 GMT"
30    path="database/media/books/calibre/Atlantic [Mon, 18 Oct 2010], The - calibre_1701.epub">
31        <thumbnail width="167" height="217">main_thumbnail.jpg</thumbnail>
32    </text>
37# Utility functions {{{
39<?xml version="1.0" encoding="UTF-8"?>
40<cache xmlns="http://www.kinoma.com/FskCache/1">
45<?xml version="1.0" encoding="UTF-8"?>
46<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1">
50MIME_MAP   = {
51                "lrf" : "application/x-sony-bbeb",
52                'lrx' : 'application/x-sony-bbeb',
53                "rtf" : "application/rtf",
54                "pdf" : "application/pdf",
55                "txt" : "text/plain" ,
56                'epub': 'application/epub+zip',
57              }
59DAY_MAP   = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6)
60MONTH_MAP = dict(Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12)
61INVERSE_DAY_MAP = dict(zip(DAY_MAP.values(), DAY_MAP.keys()))
62INVERSE_MONTH_MAP = dict(zip(MONTH_MAP.values(), MONTH_MAP.keys()))
65def strptime(src):
66    src = src.strip()
67    src = src.split()
68    src[0] = str(DAY_MAP[src[0][:-1]])+','
69    src[2] = str(MONTH_MAP[src[2]])
70    return time.strptime(' '.join(src), '%w, %d %m %Y %H:%M:%S %Z')
73def strftime(epoch, zone=time.localtime):
74    try:
75        src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone(epoch)).split()
76    except:
77        src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone()).split()
79    src[0] = INVERSE_DAY_MAP[int(src[0][:-1])]+','
80    src[2] = INVERSE_MONTH_MAP[int(src[2])]
81    return ' '.join(src)
84def uuid():
85    from uuid import uuid4
86    return str(uuid4()).replace('-', '', 1).upper()
88# }}}
91class XMLCache:
93    def __init__(self, paths, ext_paths, prefixes, use_author_sort):
94        from calibre.utils.xml_parse import safe_xml_fromstring
96        if DEBUG:
97            debug_print('Building XMLCache...', paths)
98        self.paths = paths
99        self.prefixes = prefixes
100        self.use_author_sort = use_author_sort
102        # Parse XML files {{{
103        self.roots = {}
104        for source_id, path in paths.items():
105            if source_id == 0:
106                if not os.path.exists(path):
107                    raise DeviceError(('The SONY XML cache %r does not exist. Try'
108                        ' disconnecting and reconnecting your reader.')%repr(path))
109                with lopen(path, 'rb') as f:
110                    raw = f.read()
111            else:
112                raw = EMPTY_CARD_CACHE
113                if os.access(path, os.R_OK):
114                    with lopen(path, 'rb') as f:
115                        raw = f.read()
117            self.roots[source_id] = safe_xml_fromstring(
118                xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0]
119            )
120            if self.roots[source_id] is None:
121                raise Exception(('The SONY database at %r is corrupted. Try '
122                        ' disconnecting and reconnecting your reader.')%path)
124        self.ext_paths, self.ext_roots = {}, {}
125        for source_id, path in ext_paths.items():
126            if not os.path.exists(path):
127                try:
128                    with lopen(path, 'wb') as f:
129                        f.write(EMPTY_EXT_CACHE)
130                        fsync(f)
131                except:
132                    pass
133            if os.access(path, os.W_OK):
134                try:
135                    with lopen(path, 'rb') as f:
136                        self.ext_roots[source_id] = safe_xml_fromstring(
137                            xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0]
138                        )
139                        self.ext_paths[source_id] = path
140                except:
141                    pass
143        # }}}
145        recs = self.roots[0].xpath('//*[local-name()="records"]')
146        if not recs:
147            raise DeviceError('The SONY XML database is corrupted (no'
148                    ' <records>). Try disconnecting an reconnecting'
149                    ' your reader.')
150        self.record_roots = {}
151        self.record_roots.update(self.roots)
152        self.record_roots[0] = recs[0]
154        self.detect_namespaces()
155        debug_print('Done building XMLCache...')
157    # Playlist management {{{
158    def purge_broken_playlist_items(self, root):
159        id_map = self.build_id_map(root)
160        for pl in root.xpath('//*[local-name()="playlist"]'):
161            seen = set()
162            for item in list(pl):
163                id_ = item.get('id', None)
164                if id_ is None or id_ in seen or id_map.get(id_, None) is None:
165                    if DEBUG:
166                        if id_ is None:
167                            cause = 'invalid id'
168                        elif id_ in seen:
169                            cause = 'duplicate item'
170                        else:
171                            cause = 'id not found'
172                        prints('Purging broken playlist item:',
173                                id_, 'from playlist:', pl.get('title', None),
174                                'because:', cause)
175                    item.getparent().remove(item)
176                    continue
177                seen.add(id_)
179    def prune_empty_playlists(self):
180        for i, root in self.record_roots.items():
181            self.purge_broken_playlist_items(root)
182            for playlist in root.xpath('//*[local-name()="playlist"]'):
183                if len(playlist) == 0 or not playlist.get('title', None):
184                    if DEBUG:
185                        debug_print('Removing playlist id:', playlist.get('id', None),
186                                playlist.get('title', None))
187                    playlist.getparent().remove(playlist)
189    def ensure_unique_playlist_titles(self):
190        for i, root in self.record_roots.items():
191            seen = set()
192            for playlist in root.xpath('//*[local-name()="playlist"]'):
193                title = playlist.get('title', None)
194                if title is None:
195                    title = _('Unnamed')
196                    playlist.set('title', title)
197                if title in seen:
198                    for i in range(2, 1000):
199                        if title+str(i) not in seen:
200                            title = title+str(i)
201                            playlist.set('title', title)
202                            seen.add(title)
203                            break
204                else:
205                    seen.add(title)
207    def build_id_playlist_map(self, bl_index):
208        '''
209        Return a map of the collections in books: {lpaths: [collection names]}
210        '''
211        debug_print('Start build_id_playlist_map')
212        self.ensure_unique_playlist_titles()
213        self.prune_empty_playlists()
214        debug_print('after cleaning playlists')
215        root = self.record_roots[bl_index]
216        if root is None:
217            return
218        id_map = self.build_id_map(root)
219        playlist_map = {}
220        # foreach playlist, get the lpaths for the ids in it, then add to dict
221        for playlist in root.xpath('//*[local-name()="playlist"]'):
222            name = playlist.get('title')
223            if name is None:
224                debug_print('build_id_playlist_map: unnamed playlist!')
225                continue
226            for item in playlist:
227                # translate each id into its lpath
228                id_ = item.get('id', None)
229                if id_ is None:
230                    debug_print('build_id_playlist_map: id_ is None!')
231                    continue
232                bk = id_map.get(id_, None)
233                if bk is None:
234                    debug_print('build_id_playlist_map: book is None!', id_)
235                    continue
236                lpath = bk.get('path', None)
237                if lpath is None:
238                    debug_print('build_id_playlist_map: lpath is None!', id_)
239                    continue
240                if lpath not in playlist_map:
241                    playlist_map[lpath] = []
242                playlist_map[lpath].append(name)
243        debug_print('Finish build_id_playlist_map. Found', len(playlist_map))
244        return playlist_map
246    def reset_existing_playlists_map(self):
247        '''
248        Call this method before calling get_or_create_playlist in the context of
249        a given job. Call it again after deleting any playlists. The current
250        implementation adds all new playlists before deleting any, so that
251        constraint is respected.
252        '''
253        self._playlist_to_playlist_id_map = {}
255    def get_or_create_playlist(self, bl_idx, title):
256        # maintain a private map of playlists to their ids. Don't check if it
257        # exists, because reset_existing_playlist_map must be called before it
258        # is used to ensure that deleted playlists are taken into account
259        root = self.record_roots[bl_idx]
260        if bl_idx not in self._playlist_to_playlist_id_map:
261            self._playlist_to_playlist_id_map[bl_idx] = {}
262            for playlist in root.xpath('//*[local-name()="playlist"]'):
263                pl_title = playlist.get('title', None)
264                if pl_title is not None:
265                    self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist
266        if title in self._playlist_to_playlist_id_map[bl_idx]:
267            return self._playlist_to_playlist_id_map[bl_idx][title]
268        debug_print('Creating playlist:', title)
269        ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx],
270                nsmap=root.nsmap, attrib={
271                    'uuid' : uuid(),
272                    'title': title,
273                    'id'   : str(self.max_id(root)+1),
274                    'sourceid': '1'
275                    })
276        root.append(ans)
277        self._playlist_to_playlist_id_map[bl_idx][title] = ans
278        return ans
279    # }}}
281    def fix_ids(self):  # {{{
282        debug_print('Running fix_ids()')
284        def ensure_numeric_ids(root):
285            idmap = {}
286            for x in root.xpath('child::*[@id]'):
287                id_ = x.get('id')
288                try:
289                    id_ = int(id_)
290                except:
291                    x.set('id', '-1')
292                    idmap[id_] = '-1'
294            if DEBUG and idmap:
295                debug_print('Found non numeric ids:')
296                debug_print(list(idmap.keys()))
297            return idmap
299        def remap_playlist_references(root, idmap):
300            for playlist in root.xpath('//*[local-name()="playlist"]'):
301                for item in playlist.xpath(
302                        'descendant::*[@id and local-name()="item"]'):
303                    id_ = item.get('id')
304                    if id_ in idmap:
305                        item.set('id', idmap[id_])
306                        if DEBUG:
307                            debug_print('Remapping id %s to %s'%(id_, idmap[id_]))
309        def ensure_media_xml_base_ids(root):
310            for num, tag in enumerate(('library', 'watchSpecial')):
311                for x in root.xpath('//*[local-name()="%s"]'%tag):
312                    x.set('id', str(num))
314        def rebase_ids(root, base, sourceid, pl_sourceid):
315            'Rebase all ids and also make them consecutive'
316            for item in root.xpath('//*[@sourceid]'):
317                sid = pl_sourceid if item.tag.endswith('playlist') else sourceid
318                item.set('sourceid', str(sid))
319            # Only rebase ids of nodes that are immediate children of the
320            # record root (that way playlist/itemnodes are unaffected
321            items = root.xpath('child::*[@id]')
322            items.sort(key=lambda x: int(x.get('id')))
323            idmap = {}
324            for i, item in enumerate(items):
325                old = int(item.get('id'))
326                new = base + i
327                if old != new:
328                    item.set('id', str(new))
329                    idmap[str(old)] = str(new)
330            return idmap
332        self.prune_empty_playlists()
334        for i in sorted(self.roots.keys()):
335            root = self.record_roots[i]
336            if i == 0:
337                ensure_media_xml_base_ids(root)
339            idmap = ensure_numeric_ids(root)
340            if len(idmap) > 0:
341                debug_print('fix_ids: found some non-numeric ids')
342                remap_playlist_references(root, idmap)
343            if i == 0:
344                sourceid, playlist_sid = 1, 0
345                base = 0
346            else:
347                previous = i-1
348                if previous not in self.roots:
349                    previous = 0
350                max_id = self.max_id(self.roots[previous])
351                sourceid = playlist_sid = max_id + 1
352                base = max_id + 2
353            idmap = rebase_ids(root, base, sourceid, playlist_sid)
354            remap_playlist_references(root, idmap)
356        last_bl = max(self.roots.keys())
357        max_id = self.max_id(self.roots[last_bl])
358        self.roots[0].set('nextID', str(max_id+1))
359        debug_print('Finished running fix_ids()')
361    # }}}
363    # Update JSON from XML {{{
364    def update_booklist(self, bl, bl_index):
365        if bl_index not in self.record_roots:
366            return
367        debug_print('Updating JSON cache:', bl_index)
368        playlist_map = self.build_id_playlist_map(bl_index)
369        root = self.record_roots[bl_index]
370        lpath_map = self.build_lpath_map(root)
371        for book in bl:
372            record = lpath_map.get(book.lpath, None)
373            if record is not None:
374                for thumbnail in record.xpath(
375                        'descendant::*[local-name()="thumbnail"]'):
376                    for img in thumbnail.xpath(
377                            'descendant::*[local-name()="jpeg"]|'
378                            'descendant::*[local-name()="png"]'):
379                        if img.text:
380                            try:
381                                raw = from_base64_bytes(img.text.strip())
382                            except Exception:
383                                continue
384                            book.thumbnail = raw
385                            break
386                    break
387                book.device_collections = playlist_map.get(book.lpath, [])
388        debug_print('Finished updating JSON cache:', bl_index)
390    # }}}
392    # Update XML from JSON {{{
393    def update(self, booklists, collections_attributes, plugboard):
394        debug_print('Starting update', collections_attributes)
395        use_tz_var = False
396        for i, booklist in booklists.items():
397            playlist_map = self.build_id_playlist_map(i)
398            debug_print('Updating XML Cache:', i)
399            root = self.record_roots[i]
400            lpath_map = self.build_lpath_map(root)
401            ext_root = self.ext_roots[i] if i in self.ext_roots else None
402            ext_lpath_map = None
403            if ext_root is not None:
404                ext_lpath_map = self.build_lpath_map(ext_root)
405            gtz_count = ltz_count = 0
406            use_tz_var = False
407            for book in booklist:
408                path = os.path.join(self.prefixes[i], *(book.lpath.split('/')))
409                record = lpath_map.get(book.lpath, None)
410                created = False
411                if record is None:
412                    created = True
413                    record = self.create_text_record(root, i, book.lpath)
414                if plugboard is not None:
415                    newmi = book.deepcopy_metadata()
416                    newmi.template_to_attribute(book, plugboard)
417                    newmi.set('_new_book', getattr(book, '_new_book', False))
418                    book.set('_pb_title_sort',
419                             newmi.get('title_sort', newmi.get('title', None)))
420                    book.set('_pb_author_sort', newmi.get('author_sort', ''))
421                else:
422                    newmi = book
423                (gtz_count, ltz_count, use_tz_var) = \
424                    self.update_text_record(record, newmi, path, i,
425                                            gtz_count, ltz_count, use_tz_var)
426                # Ensure the collections in the XML database are recorded for
427                # this book
428                if book.device_collections is None:
429                    book.device_collections = []
430                book.device_collections = playlist_map.get(book.lpath, [])
432                if created and ext_root is not None and \
433                        ext_lpath_map.get(book.lpath, None) is None:
434                    ext_record = self.create_ext_text_record(ext_root, i,
435                            book.lpath, book.thumbnail)
436                    self.periodicalize_book(book, ext_record)
438            debug_print('Timezone votes: %d GMT, %d LTZ, use_tz_var=%s'%
439                                        (gtz_count, ltz_count, use_tz_var))
440            self.update_playlists(i, root, booklist, collections_attributes)
441        # Update the device collections because update playlist could have added
442        # some new ones.
443        debug_print('In update/ Starting refresh of device_collections')
444        for i, booklist in booklists.items():
445            playlist_map = self.build_id_playlist_map(i)
446            for book in booklist:
447                book.device_collections = playlist_map.get(book.lpath, [])
448        self.fix_ids()
449        debug_print('Finished update')
451    def is_sony_periodical(self, book):
452        if _('News') not in book.tags:
453            return False
454        if not book.lpath.lower().endswith('.epub'):
455            return False
456        if book.pubdate.date() < date(2010, 10, 17):
457            return False
458        return True
460    def periodicalize_book(self, book, record):
461        if not self.is_sony_periodical(book):
462            return
463        record.set('conformsTo',
464            "http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0")
466        record.set('description', '')
468        name = None
469        if '[' in book.title:
470            name = book.title.split('[')[0].strip()
471            if len(name) < 4:
472                name = None
473        if not name:
474            try:
475                name = [t for t in book.tags if t != _('News')][0]
476            except:
477                name = None
479        if not name:
480            name = book.title
482        record.set('periodicalName', name)
484        try:
485            pubdate = strftime(book.pubdate.utctimetuple(),
486                    zone=lambda x : x)
487            record.set('publicationDate', pubdate)
488        except:
489            pass
491    def rebuild_collections(self, booklist, bl_index):
492        if bl_index not in self.record_roots:
493            return
494        root = self.record_roots[bl_index]
495        self.update_playlists(bl_index, root, booklist, [])
496        self.fix_ids()
498    def update_playlists(self, bl_index, root, booklist, collections_attributes):
499        debug_print('Starting update_playlists', collections_attributes, bl_index)
500        self.reset_existing_playlists_map()
501        collections = booklist.get_collections(collections_attributes)
502        lpath_map = self.build_lpath_map(root)
503        debug_print('update_playlists: finished building maps')
504        for category, books in collections.items():
505            records = [lpath_map.get(b.lpath, None) for b in books]
506            # Remove any books that were not found, although this
507            # *should* never happen
508            if DEBUG and None in records:
509                debug_print('WARNING: Some elements in the JSON cache were not'
510                        ' found in the XML cache')
511            records = [x for x in records if x is not None]
512            # Ensure each book has an ID.
513            for rec in records:
514                if rec.get('id', None) is None:
515                    rec.set('id', str(self.max_id(root)+1))
516            ids = [x.get('id', None) for x in records]
517            # Given that we set the ids, there shouldn't be any None's. But
518            # better to be safe...
519            if None in ids:
520                debug_print('WARNING: Some <text> elements do not have ids')
521                ids = [x for x in ids if x is not None]
523            playlist = self.get_or_create_playlist(bl_index, category)
524            # Get the books currently in the playlist. We will need them to be
525            # sure to put back any books that were manually added.
526            playlist_ids = []
527            for item in playlist:
528                id_ = item.get('id', None)
529                if id_ is not None:
530                    playlist_ids.append(id_)
531            # Empty the playlist. We do this so that the playlist will have the
532            # order specified by get_collections
533            for item in list(playlist):
534                playlist.remove(item)
536            # Get a list of ids not known by get_collections
537            extra_ids = [x for x in playlist_ids if x not in ids]
538            # Rebuild the collection in the order specified by get_collections. Then
539            # add the ids that get_collections didn't know about.
540            for id_ in ids + extra_ids:
541                item = playlist.makeelement(
542                        '{%s}item'%self.namespaces[bl_index],
543                        nsmap=playlist.nsmap, attrib={'id':id_})
544                playlist.append(item)
546        # Delete playlist entries not in collections
547        for playlist in root.xpath('//*[local-name()="playlist"]'):
548            title = playlist.get('title', None)
549            if title not in collections:
550                if DEBUG:
551                    debug_print('Deleting playlist:', playlist.get('title', ''))
552                playlist.getparent().remove(playlist)
553                continue
554            books = collections[title]
555            records = [lpath_map.get(b.lpath, None) for b in books]
556            records = [x for x in records if x is not None]
557            ids = [x.get('id', None) for x in records]
558            ids = [x for x in ids if x is not None]
559            for item in list(playlist):
560                if item.get('id', None) not in ids:
561                    if DEBUG:
562                        debug_print('Deleting item:', item.get('id', ''),
563                                'from playlist:', playlist.get('title', ''))
564                    playlist.remove(item)
565        debug_print('Finishing update_playlists')
567    def create_text_record(self, root, bl_id, lpath):
568        namespace = self.namespaces[bl_id]
569        id_ = self.max_id(root)+1
570        attrib = {
571                'page':'0', 'part':'0','pageOffset':'0','scale':'0',
572                'id':str(id_), 'sourceid':'1', 'path':lpath}
573        ans = root.makeelement('{%s}text'%namespace, attrib=attrib, nsmap=root.nsmap)
574        root.append(ans)
575        return ans
577    def create_ext_text_record(self, root, bl_id, lpath, thumbnail):
578        namespace = root.nsmap[None]
579        attrib = {'path': lpath}
580        ans = root.makeelement('{%s}text'%namespace, attrib=attrib,
581                nsmap=root.nsmap)
582        ans.tail = '\n'
583        if len(root) > 0:
584            root[-1].tail = '\n\t'
585        else:
586            root.text = '\n\t'
587        root.append(ans)
588        if thumbnail and thumbnail[-1]:
589            ans.text = '\n' + '\t\t'
590            t = root.makeelement('{%s}thumbnail'%namespace,
591                attrib={'width':str(thumbnail[0]), 'height':str(thumbnail[1])},
592                nsmap=root.nsmap)
593            t.text = 'main_thumbnail.jpg'
594            ans.append(t)
595            t.tail = '\n\t'
596        return ans
598    def update_text_record(self, record, book, path, bl_index,
599                           gtz_count, ltz_count, use_tz_var):
600        '''
601        Update the Sony database from the book. This is done if the timestamp in
602        the db differs from the timestamp on the file.
603        '''
605        # It seems that a Sony device can sometimes know what timezone it is in,
606        # and apparently converts the dates to GMT when it writes them to its
607        # DB. We can detect that a device is timezone-aware because there is a
608        # 'tz' variable in the Sony DB, which we can set to "0" to tell the
609        # device to ignore its own timezone when comparing mtime to the date in
610        # the DB.
612        # Unfortunately, if there is no tz variable in the DB, then we can't
613        # tell when the device applies a timezone conversion. We use a horrible
614        # heuristic to work around this problem. First, set dates only for new
615        # books, trying to avoid upsetting the sony. Second, voting: if a book
616        # is not new, compare its Sony DB date against localtime and gmtime.
617        # Count the matches. When we must set a date, use the one with the most
618        # matches. Use localtime if the case of a tie, and hope it is right.
619        try:
620            timestamp = os.path.getmtime(path)
621        except:
622            debug_print('Failed to get timestamp for:', path)
623            timestamp = time.time()
624        rec_date = record.get('date', None)
626        def clean(x):
627            if isbytestring(x):
628                x = x.decode(preferred_encoding, 'replace')
629            x.replace('\0', '')
630            return x
632        def record_set(k, v):
633            try:
634                record.set(k, clean(v))
635            except:
636                # v is not suitable for XML, ignore
637                pass
639        if not getattr(book, '_new_book', False):  # book is not new
640            if record.get('tz', None) is not None:
641                use_tz_var = True
642            if strftime(timestamp, zone=time.gmtime) == rec_date:
643                gtz_count += 1
644            elif strftime(timestamp, zone=time.localtime) == rec_date:
645                ltz_count += 1
646        else:  # book is new. Set the time using the current votes
647            if use_tz_var:
648                tz = time.localtime
649                record.set('tz', '0')
650                debug_print("Use localtime TZ and tz='0' for new book", book.lpath)
651            elif ltz_count >= gtz_count:
652                tz = time.localtime
653                debug_print("Use localtime TZ for new book", book.lpath)
654            else:
655                tz = time.gmtime
656                debug_print("Use GMT TZ for new book", book.lpath)
657            date = strftime(timestamp, zone=tz)
658            record.set('date', clean(date))
659        try:
660            record.set('size', clean(str(os.stat(path).st_size)))
661        except:
662            record.set('size', '0')
663        title = book.title if book.title else _('Unknown')
664        record_set('title', title)
665        ts = book.title_sort
666        if not ts:
667            ts = title_sort(title)
668        record_set('titleSorter', ts)
669        if self.use_author_sort:
670            if book.author_sort:
671                aus = book.author_sort
672            else:
673                debug_print('Author_sort is None for book', book.lpath)
674                aus = authors_to_sort_string(book.authors)
675            record_set('author', aus)
676        else:
677            record_set('author', authors_to_string(book.authors))
678        ext = os.path.splitext(path)[1]
679        if ext:
680            ext = ext[1:].lower()
681            mime = MIME_MAP.get(ext, None)
682            if mime is None:
683                mime = guess_type('a.'+ext)[0]
684            if mime is not None:
685                record.set('mime', clean(mime))
686        if 'sourceid' not in record.attrib:
687            record.set('sourceid', '1')
688        if 'id' not in record.attrib:
689            num = self.max_id(record.getroottree().getroot())
690            record.set('id', str(num+1))
691        return (gtz_count, ltz_count, use_tz_var)
692    # }}}
694    # Writing the XML files {{{
695    def cleanup_whitespace(self, bl_index):
696        root = self.record_roots[bl_index]
697        level = 2 if bl_index == 0 else 1
698        if len(root) > 0:
699            root.text = '\n'+'\t'*level
700            for child in root:
701                child.tail = '\n'+'\t'*level
702                if len(child) > 0:
703                    child.text = '\n'+'\t'*(level+1)
704                    for gc in child:
705                        gc.tail = '\n'+'\t'*(level+1)
706                    next(child.iterchildren(reversed=True)).tail = '\n'+'\t'*level
707            next(root.iterchildren(reversed=True)).tail = '\n'+'\t'*(level-1)
709    def move_playlists_to_bottom(self):
710        for root in self.record_roots.values():
711            seen = []
712            for pl in root.xpath('//*[local-name()="playlist"]'):
713                pl.getparent().remove(pl)
714                seen.append(pl)
715            for pl in seen:
716                root.append(pl)
718    def write(self):
719        from lxml import etree
721        for i, path in self.paths.items():
722            self.move_playlists_to_bottom()
723            self.cleanup_whitespace(i)
724            raw = etree.tostring(self.roots[i], encoding='UTF-8',
725                    xml_declaration=True)
726            raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
727                    b'<?xml version="1.0" encoding="UTF-8"?>')
728            with lopen(path, 'wb') as f:
729                f.write(raw)
730                fsync(f)
732        for i, path in self.ext_paths.items():
733            try:
734                raw = etree.tostring(self.ext_roots[i], encoding='UTF-8',
735                    xml_declaration=True)
736            except:
737                continue
738            raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>",
739                    b'<?xml version="1.0" encoding="UTF-8"?>')
740            with lopen(path, 'wb') as f:
741                f.write(raw)
742                fsync(f)
744    # }}}
746    # Utility methods {{{
748    def build_lpath_map(self, root):
749        m = {}
750        for bk in root.xpath('//*[local-name()="text"]'):
751            m[bk.get('path')] = bk
752        return m
754    def build_id_map(self, root):
755        m = {}
756        for bk in root.xpath('//*[local-name()="text"]'):
757            m[bk.get('id')] = bk
758        return m
760    def book_by_lpath(self, lpath, root):
761        matches = root.xpath('//*[local-name()="text" and @path="%s"]'%lpath)
762        if matches:
763            return matches[0]
765    def max_id(self, root):
766        ans = -1
767        for x in root.xpath('//*[@id]'):
768            id_ = x.get('id')
769            try:
770                num = int(id_)
771                if num > ans:
772                    ans = num
773            except:
774                continue
775        return ans
777    def detect_namespaces(self):
778        self.nsmaps = {}
779        for i, root in self.roots.items():
780            self.nsmaps[i] = root.nsmap
782        self.namespaces = {}
783        for i in self.roots:
784            for c in ('library', 'text', 'image', 'playlist', 'thumbnail',
785                    'watchSpecial'):
786                matches = self.record_roots[i].xpath('//*[local-name()="%s"]'%c)
787                if matches:
788                    e = matches[0]
789                    self.namespaces[i] = e.nsmap[e.prefix]
790                    break
791            if i not in self.namespaces:
792                ns = self.nsmaps[i].get(None, None)
793                for prefix in self.nsmaps[i]:
794                    if prefix is not None:
795                        ns = self.nsmaps[i][prefix]
796                        break
797                self.namespaces[i] = ns
799    # }}}