1#!/usr/local/bin/python3.8 2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' 7__docformat__ = 'restructuredtext en' 8 9import os, time 10from datetime import date 11 12from calibre import prints, guess_type, isbytestring, fsync 13from calibre.devices.errors import DeviceError 14from calibre.devices.usbms.driver import debug_print 15from calibre.constants import DEBUG, preferred_encoding 16from calibre.ebooks.chardet import xml_to_unicode 17from calibre.ebooks.metadata import authors_to_string, title_sort, \ 18 authors_to_sort_string 19from polyglot.binary import from_base64_bytes 20 21''' 22cacheExt.xml 23 24Periodical identifier sample from a PRS-650: 25 26<?xml version="1.0" encoding="UTF-8"?> 27<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1"> 28 <text conformsTo="http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0" periodicalName="The Atlantic" 29 description="Current affairs and politics focused on the US" publicationDate="Tue, 19 Oct 2010 00:00:00 GMT" 30 path="database/media/books/calibre/Atlantic [Mon, 18 Oct 2010], The - calibre_1701.epub"> 31 <thumbnail width="167" height="217">main_thumbnail.jpg</thumbnail> 32 </text> 33</cacheExt> 34 35''' 36 37# Utility functions {{{ 38EMPTY_CARD_CACHE = b'''\ 39<?xml version="1.0" encoding="UTF-8"?> 40<cache xmlns="http://www.kinoma.com/FskCache/1"> 41</cache> 42''' 43 44EMPTY_EXT_CACHE = b'''\ 45<?xml version="1.0" encoding="UTF-8"?> 46<cacheExt xmlns="http://www.sony.com/xmlns/product/prs/device/1"> 47</cacheExt> 48''' 49 50MIME_MAP = { 51 "lrf" : "application/x-sony-bbeb", 52 'lrx' : 'application/x-sony-bbeb', 53 "rtf" : "application/rtf", 54 "pdf" : "application/pdf", 55 "txt" : "text/plain" , 56 'epub': 'application/epub+zip', 57 } 58 59DAY_MAP = dict(Sun=0, Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6) 60MONTH_MAP = dict(Jan=1, Feb=2, Mar=3, Apr=4, May=5, Jun=6, Jul=7, Aug=8, Sep=9, Oct=10, Nov=11, Dec=12) 61INVERSE_DAY_MAP = dict(zip(DAY_MAP.values(), DAY_MAP.keys())) 62INVERSE_MONTH_MAP = dict(zip(MONTH_MAP.values(), MONTH_MAP.keys())) 63 64 65def strptime(src): 66 src = src.strip() 67 src = src.split() 68 src[0] = str(DAY_MAP[src[0][:-1]])+',' 69 src[2] = str(MONTH_MAP[src[2]]) 70 return time.strptime(' '.join(src), '%w, %d %m %Y %H:%M:%S %Z') 71 72 73def strftime(epoch, zone=time.localtime): 74 try: 75 src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone(epoch)).split() 76 except: 77 src = time.strftime("%w, %d %m %Y %H:%M:%S GMT", zone()).split() 78 79 src[0] = INVERSE_DAY_MAP[int(src[0][:-1])]+',' 80 src[2] = INVERSE_MONTH_MAP[int(src[2])] 81 return ' '.join(src) 82 83 84def uuid(): 85 from uuid import uuid4 86 return str(uuid4()).replace('-', '', 1).upper() 87 88# }}} 89 90 91class XMLCache: 92 93 def __init__(self, paths, ext_paths, prefixes, use_author_sort): 94 from calibre.utils.xml_parse import safe_xml_fromstring 95 96 if DEBUG: 97 debug_print('Building XMLCache...', paths) 98 self.paths = paths 99 self.prefixes = prefixes 100 self.use_author_sort = use_author_sort 101 102 # Parse XML files {{{ 103 self.roots = {} 104 for source_id, path in paths.items(): 105 if source_id == 0: 106 if not os.path.exists(path): 107 raise DeviceError(('The SONY XML cache %r does not exist. Try' 108 ' disconnecting and reconnecting your reader.')%repr(path)) 109 with lopen(path, 'rb') as f: 110 raw = f.read() 111 else: 112 raw = EMPTY_CARD_CACHE 113 if os.access(path, os.R_OK): 114 with lopen(path, 'rb') as f: 115 raw = f.read() 116 117 self.roots[source_id] = safe_xml_fromstring( 118 xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0] 119 ) 120 if self.roots[source_id] is None: 121 raise Exception(('The SONY database at %r is corrupted. Try ' 122 ' disconnecting and reconnecting your reader.')%path) 123 124 self.ext_paths, self.ext_roots = {}, {} 125 for source_id, path in ext_paths.items(): 126 if not os.path.exists(path): 127 try: 128 with lopen(path, 'wb') as f: 129 f.write(EMPTY_EXT_CACHE) 130 fsync(f) 131 except: 132 pass 133 if os.access(path, os.W_OK): 134 try: 135 with lopen(path, 'rb') as f: 136 self.ext_roots[source_id] = safe_xml_fromstring( 137 xml_to_unicode(f.read(), strip_encoding_pats=True, assume_utf8=True, verbose=DEBUG)[0] 138 ) 139 self.ext_paths[source_id] = path 140 except: 141 pass 142 143 # }}} 144 145 recs = self.roots[0].xpath('//*[local-name()="records"]') 146 if not recs: 147 raise DeviceError('The SONY XML database is corrupted (no' 148 ' <records>). Try disconnecting an reconnecting' 149 ' your reader.') 150 self.record_roots = {} 151 self.record_roots.update(self.roots) 152 self.record_roots[0] = recs[0] 153 154 self.detect_namespaces() 155 debug_print('Done building XMLCache...') 156 157 # Playlist management {{{ 158 def purge_broken_playlist_items(self, root): 159 id_map = self.build_id_map(root) 160 for pl in root.xpath('//*[local-name()="playlist"]'): 161 seen = set() 162 for item in list(pl): 163 id_ = item.get('id', None) 164 if id_ is None or id_ in seen or id_map.get(id_, None) is None: 165 if DEBUG: 166 if id_ is None: 167 cause = 'invalid id' 168 elif id_ in seen: 169 cause = 'duplicate item' 170 else: 171 cause = 'id not found' 172 prints('Purging broken playlist item:', 173 id_, 'from playlist:', pl.get('title', None), 174 'because:', cause) 175 item.getparent().remove(item) 176 continue 177 seen.add(id_) 178 179 def prune_empty_playlists(self): 180 for i, root in self.record_roots.items(): 181 self.purge_broken_playlist_items(root) 182 for playlist in root.xpath('//*[local-name()="playlist"]'): 183 if len(playlist) == 0 or not playlist.get('title', None): 184 if DEBUG: 185 debug_print('Removing playlist id:', playlist.get('id', None), 186 playlist.get('title', None)) 187 playlist.getparent().remove(playlist) 188 189 def ensure_unique_playlist_titles(self): 190 for i, root in self.record_roots.items(): 191 seen = set() 192 for playlist in root.xpath('//*[local-name()="playlist"]'): 193 title = playlist.get('title', None) 194 if title is None: 195 title = _('Unnamed') 196 playlist.set('title', title) 197 if title in seen: 198 for i in range(2, 1000): 199 if title+str(i) not in seen: 200 title = title+str(i) 201 playlist.set('title', title) 202 seen.add(title) 203 break 204 else: 205 seen.add(title) 206 207 def build_id_playlist_map(self, bl_index): 208 ''' 209 Return a map of the collections in books: {lpaths: [collection names]} 210 ''' 211 debug_print('Start build_id_playlist_map') 212 self.ensure_unique_playlist_titles() 213 self.prune_empty_playlists() 214 debug_print('after cleaning playlists') 215 root = self.record_roots[bl_index] 216 if root is None: 217 return 218 id_map = self.build_id_map(root) 219 playlist_map = {} 220 # foreach playlist, get the lpaths for the ids in it, then add to dict 221 for playlist in root.xpath('//*[local-name()="playlist"]'): 222 name = playlist.get('title') 223 if name is None: 224 debug_print('build_id_playlist_map: unnamed playlist!') 225 continue 226 for item in playlist: 227 # translate each id into its lpath 228 id_ = item.get('id', None) 229 if id_ is None: 230 debug_print('build_id_playlist_map: id_ is None!') 231 continue 232 bk = id_map.get(id_, None) 233 if bk is None: 234 debug_print('build_id_playlist_map: book is None!', id_) 235 continue 236 lpath = bk.get('path', None) 237 if lpath is None: 238 debug_print('build_id_playlist_map: lpath is None!', id_) 239 continue 240 if lpath not in playlist_map: 241 playlist_map[lpath] = [] 242 playlist_map[lpath].append(name) 243 debug_print('Finish build_id_playlist_map. Found', len(playlist_map)) 244 return playlist_map 245 246 def reset_existing_playlists_map(self): 247 ''' 248 Call this method before calling get_or_create_playlist in the context of 249 a given job. Call it again after deleting any playlists. The current 250 implementation adds all new playlists before deleting any, so that 251 constraint is respected. 252 ''' 253 self._playlist_to_playlist_id_map = {} 254 255 def get_or_create_playlist(self, bl_idx, title): 256 # maintain a private map of playlists to their ids. Don't check if it 257 # exists, because reset_existing_playlist_map must be called before it 258 # is used to ensure that deleted playlists are taken into account 259 root = self.record_roots[bl_idx] 260 if bl_idx not in self._playlist_to_playlist_id_map: 261 self._playlist_to_playlist_id_map[bl_idx] = {} 262 for playlist in root.xpath('//*[local-name()="playlist"]'): 263 pl_title = playlist.get('title', None) 264 if pl_title is not None: 265 self._playlist_to_playlist_id_map[bl_idx][pl_title] = playlist 266 if title in self._playlist_to_playlist_id_map[bl_idx]: 267 return self._playlist_to_playlist_id_map[bl_idx][title] 268 debug_print('Creating playlist:', title) 269 ans = root.makeelement('{%s}playlist'%self.namespaces[bl_idx], 270 nsmap=root.nsmap, attrib={ 271 'uuid' : uuid(), 272 'title': title, 273 'id' : str(self.max_id(root)+1), 274 'sourceid': '1' 275 }) 276 root.append(ans) 277 self._playlist_to_playlist_id_map[bl_idx][title] = ans 278 return ans 279 # }}} 280 281 def fix_ids(self): # {{{ 282 debug_print('Running fix_ids()') 283 284 def ensure_numeric_ids(root): 285 idmap = {} 286 for x in root.xpath('child::*[@id]'): 287 id_ = x.get('id') 288 try: 289 id_ = int(id_) 290 except: 291 x.set('id', '-1') 292 idmap[id_] = '-1' 293 294 if DEBUG and idmap: 295 debug_print('Found non numeric ids:') 296 debug_print(list(idmap.keys())) 297 return idmap 298 299 def remap_playlist_references(root, idmap): 300 for playlist in root.xpath('//*[local-name()="playlist"]'): 301 for item in playlist.xpath( 302 'descendant::*[@id and local-name()="item"]'): 303 id_ = item.get('id') 304 if id_ in idmap: 305 item.set('id', idmap[id_]) 306 if DEBUG: 307 debug_print('Remapping id %s to %s'%(id_, idmap[id_])) 308 309 def ensure_media_xml_base_ids(root): 310 for num, tag in enumerate(('library', 'watchSpecial')): 311 for x in root.xpath('//*[local-name()="%s"]'%tag): 312 x.set('id', str(num)) 313 314 def rebase_ids(root, base, sourceid, pl_sourceid): 315 'Rebase all ids and also make them consecutive' 316 for item in root.xpath('//*[@sourceid]'): 317 sid = pl_sourceid if item.tag.endswith('playlist') else sourceid 318 item.set('sourceid', str(sid)) 319 # Only rebase ids of nodes that are immediate children of the 320 # record root (that way playlist/itemnodes are unaffected 321 items = root.xpath('child::*[@id]') 322 items.sort(key=lambda x: int(x.get('id'))) 323 idmap = {} 324 for i, item in enumerate(items): 325 old = int(item.get('id')) 326 new = base + i 327 if old != new: 328 item.set('id', str(new)) 329 idmap[str(old)] = str(new) 330 return idmap 331 332 self.prune_empty_playlists() 333 334 for i in sorted(self.roots.keys()): 335 root = self.record_roots[i] 336 if i == 0: 337 ensure_media_xml_base_ids(root) 338 339 idmap = ensure_numeric_ids(root) 340 if len(idmap) > 0: 341 debug_print('fix_ids: found some non-numeric ids') 342 remap_playlist_references(root, idmap) 343 if i == 0: 344 sourceid, playlist_sid = 1, 0 345 base = 0 346 else: 347 previous = i-1 348 if previous not in self.roots: 349 previous = 0 350 max_id = self.max_id(self.roots[previous]) 351 sourceid = playlist_sid = max_id + 1 352 base = max_id + 2 353 idmap = rebase_ids(root, base, sourceid, playlist_sid) 354 remap_playlist_references(root, idmap) 355 356 last_bl = max(self.roots.keys()) 357 max_id = self.max_id(self.roots[last_bl]) 358 self.roots[0].set('nextID', str(max_id+1)) 359 debug_print('Finished running fix_ids()') 360 361 # }}} 362 363 # Update JSON from XML {{{ 364 def update_booklist(self, bl, bl_index): 365 if bl_index not in self.record_roots: 366 return 367 debug_print('Updating JSON cache:', bl_index) 368 playlist_map = self.build_id_playlist_map(bl_index) 369 root = self.record_roots[bl_index] 370 lpath_map = self.build_lpath_map(root) 371 for book in bl: 372 record = lpath_map.get(book.lpath, None) 373 if record is not None: 374 for thumbnail in record.xpath( 375 'descendant::*[local-name()="thumbnail"]'): 376 for img in thumbnail.xpath( 377 'descendant::*[local-name()="jpeg"]|' 378 'descendant::*[local-name()="png"]'): 379 if img.text: 380 try: 381 raw = from_base64_bytes(img.text.strip()) 382 except Exception: 383 continue 384 book.thumbnail = raw 385 break 386 break 387 book.device_collections = playlist_map.get(book.lpath, []) 388 debug_print('Finished updating JSON cache:', bl_index) 389 390 # }}} 391 392 # Update XML from JSON {{{ 393 def update(self, booklists, collections_attributes, plugboard): 394 debug_print('Starting update', collections_attributes) 395 use_tz_var = False 396 for i, booklist in booklists.items(): 397 playlist_map = self.build_id_playlist_map(i) 398 debug_print('Updating XML Cache:', i) 399 root = self.record_roots[i] 400 lpath_map = self.build_lpath_map(root) 401 ext_root = self.ext_roots[i] if i in self.ext_roots else None 402 ext_lpath_map = None 403 if ext_root is not None: 404 ext_lpath_map = self.build_lpath_map(ext_root) 405 gtz_count = ltz_count = 0 406 use_tz_var = False 407 for book in booklist: 408 path = os.path.join(self.prefixes[i], *(book.lpath.split('/'))) 409 record = lpath_map.get(book.lpath, None) 410 created = False 411 if record is None: 412 created = True 413 record = self.create_text_record(root, i, book.lpath) 414 if plugboard is not None: 415 newmi = book.deepcopy_metadata() 416 newmi.template_to_attribute(book, plugboard) 417 newmi.set('_new_book', getattr(book, '_new_book', False)) 418 book.set('_pb_title_sort', 419 newmi.get('title_sort', newmi.get('title', None))) 420 book.set('_pb_author_sort', newmi.get('author_sort', '')) 421 else: 422 newmi = book 423 (gtz_count, ltz_count, use_tz_var) = \ 424 self.update_text_record(record, newmi, path, i, 425 gtz_count, ltz_count, use_tz_var) 426 # Ensure the collections in the XML database are recorded for 427 # this book 428 if book.device_collections is None: 429 book.device_collections = [] 430 book.device_collections = playlist_map.get(book.lpath, []) 431 432 if created and ext_root is not None and \ 433 ext_lpath_map.get(book.lpath, None) is None: 434 ext_record = self.create_ext_text_record(ext_root, i, 435 book.lpath, book.thumbnail) 436 self.periodicalize_book(book, ext_record) 437 438 debug_print('Timezone votes: %d GMT, %d LTZ, use_tz_var=%s'% 439 (gtz_count, ltz_count, use_tz_var)) 440 self.update_playlists(i, root, booklist, collections_attributes) 441 # Update the device collections because update playlist could have added 442 # some new ones. 443 debug_print('In update/ Starting refresh of device_collections') 444 for i, booklist in booklists.items(): 445 playlist_map = self.build_id_playlist_map(i) 446 for book in booklist: 447 book.device_collections = playlist_map.get(book.lpath, []) 448 self.fix_ids() 449 debug_print('Finished update') 450 451 def is_sony_periodical(self, book): 452 if _('News') not in book.tags: 453 return False 454 if not book.lpath.lower().endswith('.epub'): 455 return False 456 if book.pubdate.date() < date(2010, 10, 17): 457 return False 458 return True 459 460 def periodicalize_book(self, book, record): 461 if not self.is_sony_periodical(book): 462 return 463 record.set('conformsTo', 464 "http://xmlns.sony.net/e-book/prs/periodicals/1.0/newspaper/1.0") 465 466 record.set('description', '') 467 468 name = None 469 if '[' in book.title: 470 name = book.title.split('[')[0].strip() 471 if len(name) < 4: 472 name = None 473 if not name: 474 try: 475 name = [t for t in book.tags if t != _('News')][0] 476 except: 477 name = None 478 479 if not name: 480 name = book.title 481 482 record.set('periodicalName', name) 483 484 try: 485 pubdate = strftime(book.pubdate.utctimetuple(), 486 zone=lambda x : x) 487 record.set('publicationDate', pubdate) 488 except: 489 pass 490 491 def rebuild_collections(self, booklist, bl_index): 492 if bl_index not in self.record_roots: 493 return 494 root = self.record_roots[bl_index] 495 self.update_playlists(bl_index, root, booklist, []) 496 self.fix_ids() 497 498 def update_playlists(self, bl_index, root, booklist, collections_attributes): 499 debug_print('Starting update_playlists', collections_attributes, bl_index) 500 self.reset_existing_playlists_map() 501 collections = booklist.get_collections(collections_attributes) 502 lpath_map = self.build_lpath_map(root) 503 debug_print('update_playlists: finished building maps') 504 for category, books in collections.items(): 505 records = [lpath_map.get(b.lpath, None) for b in books] 506 # Remove any books that were not found, although this 507 # *should* never happen 508 if DEBUG and None in records: 509 debug_print('WARNING: Some elements in the JSON cache were not' 510 ' found in the XML cache') 511 records = [x for x in records if x is not None] 512 # Ensure each book has an ID. 513 for rec in records: 514 if rec.get('id', None) is None: 515 rec.set('id', str(self.max_id(root)+1)) 516 ids = [x.get('id', None) for x in records] 517 # Given that we set the ids, there shouldn't be any None's. But 518 # better to be safe... 519 if None in ids: 520 debug_print('WARNING: Some <text> elements do not have ids') 521 ids = [x for x in ids if x is not None] 522 523 playlist = self.get_or_create_playlist(bl_index, category) 524 # Get the books currently in the playlist. We will need them to be 525 # sure to put back any books that were manually added. 526 playlist_ids = [] 527 for item in playlist: 528 id_ = item.get('id', None) 529 if id_ is not None: 530 playlist_ids.append(id_) 531 # Empty the playlist. We do this so that the playlist will have the 532 # order specified by get_collections 533 for item in list(playlist): 534 playlist.remove(item) 535 536 # Get a list of ids not known by get_collections 537 extra_ids = [x for x in playlist_ids if x not in ids] 538 # Rebuild the collection in the order specified by get_collections. Then 539 # add the ids that get_collections didn't know about. 540 for id_ in ids + extra_ids: 541 item = playlist.makeelement( 542 '{%s}item'%self.namespaces[bl_index], 543 nsmap=playlist.nsmap, attrib={'id':id_}) 544 playlist.append(item) 545 546 # Delete playlist entries not in collections 547 for playlist in root.xpath('//*[local-name()="playlist"]'): 548 title = playlist.get('title', None) 549 if title not in collections: 550 if DEBUG: 551 debug_print('Deleting playlist:', playlist.get('title', '')) 552 playlist.getparent().remove(playlist) 553 continue 554 books = collections[title] 555 records = [lpath_map.get(b.lpath, None) for b in books] 556 records = [x for x in records if x is not None] 557 ids = [x.get('id', None) for x in records] 558 ids = [x for x in ids if x is not None] 559 for item in list(playlist): 560 if item.get('id', None) not in ids: 561 if DEBUG: 562 debug_print('Deleting item:', item.get('id', ''), 563 'from playlist:', playlist.get('title', '')) 564 playlist.remove(item) 565 debug_print('Finishing update_playlists') 566 567 def create_text_record(self, root, bl_id, lpath): 568 namespace = self.namespaces[bl_id] 569 id_ = self.max_id(root)+1 570 attrib = { 571 'page':'0', 'part':'0','pageOffset':'0','scale':'0', 572 'id':str(id_), 'sourceid':'1', 'path':lpath} 573 ans = root.makeelement('{%s}text'%namespace, attrib=attrib, nsmap=root.nsmap) 574 root.append(ans) 575 return ans 576 577 def create_ext_text_record(self, root, bl_id, lpath, thumbnail): 578 namespace = root.nsmap[None] 579 attrib = {'path': lpath} 580 ans = root.makeelement('{%s}text'%namespace, attrib=attrib, 581 nsmap=root.nsmap) 582 ans.tail = '\n' 583 if len(root) > 0: 584 root[-1].tail = '\n\t' 585 else: 586 root.text = '\n\t' 587 root.append(ans) 588 if thumbnail and thumbnail[-1]: 589 ans.text = '\n' + '\t\t' 590 t = root.makeelement('{%s}thumbnail'%namespace, 591 attrib={'width':str(thumbnail[0]), 'height':str(thumbnail[1])}, 592 nsmap=root.nsmap) 593 t.text = 'main_thumbnail.jpg' 594 ans.append(t) 595 t.tail = '\n\t' 596 return ans 597 598 def update_text_record(self, record, book, path, bl_index, 599 gtz_count, ltz_count, use_tz_var): 600 ''' 601 Update the Sony database from the book. This is done if the timestamp in 602 the db differs from the timestamp on the file. 603 ''' 604 605 # It seems that a Sony device can sometimes know what timezone it is in, 606 # and apparently converts the dates to GMT when it writes them to its 607 # DB. We can detect that a device is timezone-aware because there is a 608 # 'tz' variable in the Sony DB, which we can set to "0" to tell the 609 # device to ignore its own timezone when comparing mtime to the date in 610 # the DB. 611 612 # Unfortunately, if there is no tz variable in the DB, then we can't 613 # tell when the device applies a timezone conversion. We use a horrible 614 # heuristic to work around this problem. First, set dates only for new 615 # books, trying to avoid upsetting the sony. Second, voting: if a book 616 # is not new, compare its Sony DB date against localtime and gmtime. 617 # Count the matches. When we must set a date, use the one with the most 618 # matches. Use localtime if the case of a tie, and hope it is right. 619 try: 620 timestamp = os.path.getmtime(path) 621 except: 622 debug_print('Failed to get timestamp for:', path) 623 timestamp = time.time() 624 rec_date = record.get('date', None) 625 626 def clean(x): 627 if isbytestring(x): 628 x = x.decode(preferred_encoding, 'replace') 629 x.replace('\0', '') 630 return x 631 632 def record_set(k, v): 633 try: 634 record.set(k, clean(v)) 635 except: 636 # v is not suitable for XML, ignore 637 pass 638 639 if not getattr(book, '_new_book', False): # book is not new 640 if record.get('tz', None) is not None: 641 use_tz_var = True 642 if strftime(timestamp, zone=time.gmtime) == rec_date: 643 gtz_count += 1 644 elif strftime(timestamp, zone=time.localtime) == rec_date: 645 ltz_count += 1 646 else: # book is new. Set the time using the current votes 647 if use_tz_var: 648 tz = time.localtime 649 record.set('tz', '0') 650 debug_print("Use localtime TZ and tz='0' for new book", book.lpath) 651 elif ltz_count >= gtz_count: 652 tz = time.localtime 653 debug_print("Use localtime TZ for new book", book.lpath) 654 else: 655 tz = time.gmtime 656 debug_print("Use GMT TZ for new book", book.lpath) 657 date = strftime(timestamp, zone=tz) 658 record.set('date', clean(date)) 659 try: 660 record.set('size', clean(str(os.stat(path).st_size))) 661 except: 662 record.set('size', '0') 663 title = book.title if book.title else _('Unknown') 664 record_set('title', title) 665 ts = book.title_sort 666 if not ts: 667 ts = title_sort(title) 668 record_set('titleSorter', ts) 669 if self.use_author_sort: 670 if book.author_sort: 671 aus = book.author_sort 672 else: 673 debug_print('Author_sort is None for book', book.lpath) 674 aus = authors_to_sort_string(book.authors) 675 record_set('author', aus) 676 else: 677 record_set('author', authors_to_string(book.authors)) 678 ext = os.path.splitext(path)[1] 679 if ext: 680 ext = ext[1:].lower() 681 mime = MIME_MAP.get(ext, None) 682 if mime is None: 683 mime = guess_type('a.'+ext)[0] 684 if mime is not None: 685 record.set('mime', clean(mime)) 686 if 'sourceid' not in record.attrib: 687 record.set('sourceid', '1') 688 if 'id' not in record.attrib: 689 num = self.max_id(record.getroottree().getroot()) 690 record.set('id', str(num+1)) 691 return (gtz_count, ltz_count, use_tz_var) 692 # }}} 693 694 # Writing the XML files {{{ 695 def cleanup_whitespace(self, bl_index): 696 root = self.record_roots[bl_index] 697 level = 2 if bl_index == 0 else 1 698 if len(root) > 0: 699 root.text = '\n'+'\t'*level 700 for child in root: 701 child.tail = '\n'+'\t'*level 702 if len(child) > 0: 703 child.text = '\n'+'\t'*(level+1) 704 for gc in child: 705 gc.tail = '\n'+'\t'*(level+1) 706 next(child.iterchildren(reversed=True)).tail = '\n'+'\t'*level 707 next(root.iterchildren(reversed=True)).tail = '\n'+'\t'*(level-1) 708 709 def move_playlists_to_bottom(self): 710 for root in self.record_roots.values(): 711 seen = [] 712 for pl in root.xpath('//*[local-name()="playlist"]'): 713 pl.getparent().remove(pl) 714 seen.append(pl) 715 for pl in seen: 716 root.append(pl) 717 718 def write(self): 719 from lxml import etree 720 721 for i, path in self.paths.items(): 722 self.move_playlists_to_bottom() 723 self.cleanup_whitespace(i) 724 raw = etree.tostring(self.roots[i], encoding='UTF-8', 725 xml_declaration=True) 726 raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>", 727 b'<?xml version="1.0" encoding="UTF-8"?>') 728 with lopen(path, 'wb') as f: 729 f.write(raw) 730 fsync(f) 731 732 for i, path in self.ext_paths.items(): 733 try: 734 raw = etree.tostring(self.ext_roots[i], encoding='UTF-8', 735 xml_declaration=True) 736 except: 737 continue 738 raw = raw.replace(b"<?xml version='1.0' encoding='UTF-8'?>", 739 b'<?xml version="1.0" encoding="UTF-8"?>') 740 with lopen(path, 'wb') as f: 741 f.write(raw) 742 fsync(f) 743 744 # }}} 745 746 # Utility methods {{{ 747 748 def build_lpath_map(self, root): 749 m = {} 750 for bk in root.xpath('//*[local-name()="text"]'): 751 m[bk.get('path')] = bk 752 return m 753 754 def build_id_map(self, root): 755 m = {} 756 for bk in root.xpath('//*[local-name()="text"]'): 757 m[bk.get('id')] = bk 758 return m 759 760 def book_by_lpath(self, lpath, root): 761 matches = root.xpath('//*[local-name()="text" and @path="%s"]'%lpath) 762 if matches: 763 return matches[0] 764 765 def max_id(self, root): 766 ans = -1 767 for x in root.xpath('//*[@id]'): 768 id_ = x.get('id') 769 try: 770 num = int(id_) 771 if num > ans: 772 ans = num 773 except: 774 continue 775 return ans 776 777 def detect_namespaces(self): 778 self.nsmaps = {} 779 for i, root in self.roots.items(): 780 self.nsmaps[i] = root.nsmap 781 782 self.namespaces = {} 783 for i in self.roots: 784 for c in ('library', 'text', 'image', 'playlist', 'thumbnail', 785 'watchSpecial'): 786 matches = self.record_roots[i].xpath('//*[local-name()="%s"]'%c) 787 if matches: 788 e = matches[0] 789 self.namespaces[i] = e.nsmap[e.prefix] 790 break 791 if i not in self.namespaces: 792 ns = self.nsmaps[i].get(None, None) 793 for prefix in self.nsmaps[i]: 794 if prefix is not None: 795 ns = self.nsmaps[i][prefix] 796 break 797 self.namespaces[i] = ns 798 799 # }}} 800