1#!/usr/local/bin/python3.8 2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' 7__docformat__ = 'restructuredtext en' 8 9import re 10from collections import Counter, OrderedDict 11from functools import partial 12from operator import itemgetter 13 14from lxml import etree 15from lxml.builder import ElementMaker 16 17from calibre import __version__ 18from calibre.ebooks.oeb.base import ( 19 XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML, XHTML_NS, serialize, EPUB_NS, XML_NS, OEB_DOCS) 20from calibre.ebooks.oeb.polish.errors import MalformedMarkup 21from calibre.ebooks.oeb.polish.utils import guess_type, extract 22from calibre.ebooks.oeb.polish.opf import set_guide_item, get_book_language 23from calibre.ebooks.oeb.polish.pretty import pretty_html_tree, pretty_xml_tree 24from calibre.translations.dynamic import translate 25from calibre.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1 26from polyglot.builtins import iteritems 27from polyglot.urllib import urlparse 28 29ns = etree.FunctionNamespace('calibre_xpath_extensions') 30ns.prefix = 'calibre' 31ns['lower-case'] = lambda c, x: x.lower() if hasattr(x, 'lower') else x 32 33 34class TOC: 35 36 toc_title = None 37 38 def __init__(self, title=None, dest=None, frag=None): 39 self.title, self.dest, self.frag = title, dest, frag 40 self.dest_exists = self.dest_error = None 41 if self.title: 42 self.title = self.title.strip() 43 self.parent = None 44 self.children = [] 45 self.page_list = [] 46 47 def add(self, title, dest, frag=None): 48 c = TOC(title, dest, frag) 49 self.children.append(c) 50 c.parent = self 51 return c 52 53 def remove(self, child): 54 self.children.remove(child) 55 child.parent = None 56 57 def remove_from_parent(self): 58 if self.parent is None: 59 return 60 idx = self.parent.children.index(self) 61 for child in reversed(self.children): 62 child.parent = self.parent 63 self.parent.children.insert(idx, child) 64 self.parent.children.remove(self) 65 self.parent = None 66 67 def __iter__(self): 68 yield from self.children 69 70 def __len__(self): 71 return len(self.children) 72 73 def iterdescendants(self, level=None): 74 gc_level = None if level is None else level + 1 75 for child in self: 76 if level is None: 77 yield child 78 else: 79 yield level, child 80 yield from child.iterdescendants(level=gc_level) 81 82 def remove_duplicates(self, only_text=True): 83 seen = set() 84 remove = [] 85 for child in self: 86 key = child.title if only_text else (child.title, child.dest, (child.frag or None)) 87 if key in seen: 88 remove.append(child) 89 else: 90 seen.add(key) 91 child.remove_duplicates() 92 for child in remove: 93 self.remove(child) 94 95 @property 96 def depth(self): 97 """The maximum depth of the navigation tree rooted at this node.""" 98 try: 99 return max(node.depth for node in self) + 1 100 except ValueError: 101 return 1 102 103 @property 104 def last_child(self): 105 return self.children[-1] if self.children else None 106 107 def get_lines(self, lvl=0): 108 frag = ('#'+self.frag) if self.frag else '' 109 ans = [('\t'*lvl) + 'TOC: %s --> %s%s'%(self.title, self.dest, frag)] 110 for child in self: 111 ans.extend(child.get_lines(lvl+1)) 112 return ans 113 114 def __str__(self): 115 return '\n'.join(self.get_lines()) 116 117 def to_dict(self, node_counter=None): 118 ans = { 119 'title':self.title, 'dest':self.dest, 'frag':self.frag, 120 'children':[c.to_dict(node_counter) for c in self.children] 121 } 122 if self.dest_exists is not None: 123 ans['dest_exists'] = self.dest_exists 124 if self.dest_error is not None: 125 ans['dest_error'] = self.dest_error 126 if node_counter is not None: 127 ans['id'] = next(node_counter) 128 return ans 129 130 @property 131 def as_dict(self): 132 return self.to_dict() 133 134 135def child_xpath(tag, name): 136 return tag.xpath('./*[calibre:lower-case(local-name()) = "%s"]'%name) 137 138 139def add_from_navpoint(container, navpoint, parent, ncx_name): 140 dest = frag = text = None 141 nl = child_xpath(navpoint, 'navlabel') 142 if nl: 143 nl = nl[0] 144 text = '' 145 for txt in child_xpath(nl, 'text'): 146 text += etree.tostring(txt, method='text', 147 encoding='unicode', with_tail=False) 148 content = child_xpath(navpoint, 'content') 149 if content: 150 content = content[0] 151 href = content.get('src', None) 152 if href: 153 dest = container.href_to_name(href, base=ncx_name) 154 frag = urlparse(href).fragment or None 155 return parent.add(text or None, dest or None, frag or None) 156 157 158def process_ncx_node(container, node, toc_parent, ncx_name): 159 for navpoint in node.xpath('./*[calibre:lower-case(local-name()) = "navpoint"]'): 160 child = add_from_navpoint(container, navpoint, toc_parent, ncx_name) 161 if child is not None: 162 process_ncx_node(container, navpoint, child, ncx_name) 163 164 165def parse_ncx(container, ncx_name): 166 root = container.parsed(ncx_name) 167 toc_root = TOC() 168 navmaps = root.xpath('//*[calibre:lower-case(local-name()) = "navmap"]') 169 if navmaps: 170 process_ncx_node(container, navmaps[0], toc_root, ncx_name) 171 toc_root.lang = toc_root.uid = None 172 for attr, val in iteritems(root.attrib): 173 if attr.endswith('lang'): 174 toc_root.lang = str(val) 175 break 176 for uid in root.xpath('//*[calibre:lower-case(local-name()) = "meta" and @name="dtb:uid"]/@content'): 177 if uid: 178 toc_root.uid = str(uid) 179 break 180 for pl in root.xpath('//*[calibre:lower-case(local-name()) = "pagelist"]'): 181 for pt in pl.xpath('descendant::*[calibre:lower-case(local-name()) = "pagetarget"]'): 182 pagenum = pt.get('value') 183 if pagenum: 184 href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src') 185 if href: 186 dest = container.href_to_name(href[0], base=ncx_name) 187 frag = urlparse(href[0]).fragment or None 188 toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag}) 189 return toc_root 190 191 192def add_from_li(container, li, parent, nav_name): 193 dest = frag = text = None 194 for x in li.iterchildren(XHTML('a'), XHTML('span')): 195 text = etree.tostring(x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip() 196 href = x.get('href') 197 if href: 198 dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name) 199 frag = urlparse(href).fragment or None 200 break 201 return parent.add(text or None, dest or None, frag or None) 202 203 204def first_child(parent, tagname): 205 try: 206 return next(parent.iterchildren(tagname)) 207 except StopIteration: 208 return None 209 210 211def process_nav_node(container, node, toc_parent, nav_name): 212 for li in node.iterchildren(XHTML('li')): 213 child = add_from_li(container, li, toc_parent, nav_name) 214 ol = first_child(li, XHTML('ol')) 215 if child is not None and ol is not None: 216 process_nav_node(container, ol, child, nav_name) 217 218 219def parse_nav(container, nav_name): 220 root = container.parsed(nav_name) 221 toc_root = TOC() 222 toc_root.lang = toc_root.uid = None 223 et = '{%s}type' % EPUB_NS 224 for nav in root.iterdescendants(XHTML('nav')): 225 if nav.get(et) == 'toc': 226 ol = first_child(nav, XHTML('ol')) 227 if ol is not None: 228 process_nav_node(container, ol, toc_root, nav_name) 229 for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())): 230 text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title') 231 if text: 232 toc_root.toc_title = text 233 break 234 break 235 return toc_root 236 237 238def verify_toc_destinations(container, toc): 239 anchor_map = {} 240 anchor_xpath = XPath('//*/@id|//h:a/@name') 241 for item in toc.iterdescendants(): 242 name = item.dest 243 if not name: 244 item.dest_exists = False 245 item.dest_error = _('No file named %s exists')%name 246 continue 247 try: 248 root = container.parsed(name) 249 except KeyError: 250 item.dest_exists = False 251 item.dest_error = _('No file named %s exists')%name 252 continue 253 if not hasattr(root, 'xpath'): 254 item.dest_exists = False 255 item.dest_error = _('No HTML file named %s exists')%name 256 continue 257 if not item.frag: 258 item.dest_exists = True 259 continue 260 if name not in anchor_map: 261 anchor_map[name] = frozenset(anchor_xpath(root)) 262 item.dest_exists = item.frag in anchor_map[name] 263 if not item.dest_exists: 264 item.dest_error = _( 265 'The anchor %(a)s does not exist in file %(f)s')%dict( 266 a=item.frag, f=name) 267 268 269def find_existing_ncx_toc(container): 270 toc = container.opf_xpath('//opf:spine/@toc') 271 if toc: 272 toc = container.manifest_id_map.get(toc[0], None) 273 if not toc: 274 ncx = guess_type('a.ncx') 275 toc = container.manifest_type_map.get(ncx, [None])[0] 276 return toc or None 277 278 279def find_existing_nav_toc(container): 280 for name in container.manifest_items_with_property('nav'): 281 return name 282 283 284def get_x_toc(container, find_toc, parse_toc, verify_destinations=True): 285 def empty_toc(): 286 ans = TOC() 287 ans.lang = ans.uid = None 288 return ans 289 toc = find_toc(container) 290 ans = empty_toc() if toc is None or not container.has_name(toc) else parse_toc(container, toc) 291 ans.toc_file_name = toc if toc and container.has_name(toc) else None 292 if verify_destinations: 293 verify_toc_destinations(container, ans) 294 return ans 295 296 297def get_toc(container, verify_destinations=True): 298 ver = container.opf_version_parsed 299 if ver.major < 3: 300 return get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations) 301 else: 302 ans = get_x_toc(container, find_existing_nav_toc, parse_nav, verify_destinations=verify_destinations) 303 if len(ans) == 0: 304 ans = get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations) 305 return ans 306 307 308def get_guide_landmarks(container): 309 for ref in container.opf_xpath('./opf:guide/opf:reference'): 310 href, title, rtype = ref.get('href'), ref.get('title'), ref.get('type') 311 href, frag = href.partition('#')[::2] 312 name = container.href_to_name(href, container.opf_name) 313 if container.has_name(name): 314 yield {'dest':name, 'frag':frag, 'title':title or '', 'type':rtype or ''} 315 316 317def get_nav_landmarks(container): 318 nav = find_existing_nav_toc(container) 319 if nav and container.has_name(nav): 320 root = container.parsed(nav) 321 et = '{%s}type' % EPUB_NS 322 for elem in root.iterdescendants(XHTML('nav')): 323 if elem.get(et) == 'landmarks': 324 for li in elem.iterdescendants(XHTML('li')): 325 for a in li.iterdescendants(XHTML('a')): 326 href, rtype = a.get('href'), a.get(et) 327 if href: 328 title = etree.tostring(a, method='text', encoding='unicode', with_tail=False).strip() 329 href, frag = href.partition('#')[::2] 330 name = container.href_to_name(href, nav) 331 if container.has_name(name): 332 yield {'dest':name, 'frag':frag, 'title':title or '', 'type':rtype or ''} 333 break 334 335 336def get_landmarks(container): 337 ver = container.opf_version_parsed 338 if ver.major < 3: 339 return list(get_guide_landmarks(container)) 340 ans = list(get_nav_landmarks(container)) 341 if len(ans) == 0: 342 ans = list(get_guide_landmarks(container)) 343 return ans 344 345 346def ensure_id(elem, all_ids): 347 elem_id = elem.get('id') 348 if elem_id: 349 return False, elem_id 350 if elem.tag == XHTML('a'): 351 anchor = elem.get('name', None) 352 if anchor: 353 elem.set('id', anchor) 354 return False, anchor 355 c = 0 356 while True: 357 c += 1 358 q = 'toc_{}'.format(c) 359 if q not in all_ids: 360 elem.set('id', q) 361 all_ids.add(q) 362 break 363 return True, elem.get('id') 364 365 366def elem_to_toc_text(elem): 367 text = xml2text(elem).strip() 368 if not text: 369 text = elem.get('title', '') 370 if not text: 371 text = elem.get('alt', '') 372 text = re.sub(r'\s+', ' ', text.strip()) 373 text = text[:1000].strip() 374 if not text: 375 text = _('(Untitled)') 376 return text 377 378 379def item_at_top(elem): 380 try: 381 body = XPath('//h:body')(elem.getroottree().getroot())[0] 382 except (TypeError, IndexError, KeyError, AttributeError): 383 return False 384 tree = body.getroottree() 385 path = tree.getpath(elem) 386 for el in body.iterdescendants(etree.Element): 387 epath = tree.getpath(el) 388 if epath == path: 389 break 390 try: 391 if el.tag.endswith('}img') or (el.text and el.text.strip()): 392 return False 393 except: 394 return False 395 if not path.startswith(epath): 396 # Only check tail of non-parent elements 397 if el.tail and el.tail.strip(): 398 return False 399 return True 400 401 402def from_xpaths(container, xpaths): 403 ''' 404 Generate a Table of Contents from a list of XPath expressions. Each 405 expression in the list corresponds to a level of the generate ToC. For 406 example: :code:`['//h:h1', '//h:h2', '//h:h3']` will generate a three level 407 Table of Contents from the ``<h1>``, ``<h2>`` and ``<h3>`` tags. 408 ''' 409 tocroot = TOC() 410 xpaths = [XPath(xp) for xp in xpaths] 411 412 # Find those levels that have no elements in all spine items 413 maps = OrderedDict() 414 empty_levels = {i+1 for i, xp in enumerate(xpaths)} 415 for spinepath in container.spine_items: 416 name = container.abspath_to_name(spinepath) 417 root = container.parsed(name) 418 level_item_map = maps[name] = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)} 419 for lvl, elems in iteritems(level_item_map): 420 if elems: 421 empty_levels.discard(lvl) 422 # Remove empty levels from all level_maps 423 if empty_levels: 424 for name, lmap in tuple(iteritems(maps)): 425 lmap = {lvl:items for lvl, items in iteritems(lmap) if lvl not in empty_levels} 426 lmap = sorted(iteritems(lmap), key=itemgetter(0)) 427 lmap = {i+1:items for i, (l, items) in enumerate(lmap)} 428 maps[name] = lmap 429 430 node_level_map = {tocroot: 0} 431 432 def parent_for_level(child_level): 433 limit = child_level - 1 434 435 def process_node(node): 436 child = node.last_child 437 if child is None: 438 return node 439 lvl = node_level_map[child] 440 return node if lvl > limit else child if lvl == limit else process_node(child) 441 442 return process_node(tocroot) 443 444 for name, level_item_map in iteritems(maps): 445 root = container.parsed(name) 446 item_level_map = {e:i for i, elems in iteritems(level_item_map) for e in elems} 447 item_dirtied = False 448 all_ids = set(root.xpath('//*/@id')) 449 450 for item in root.iterdescendants(etree.Element): 451 lvl = item_level_map.get(item, None) 452 if lvl is None: 453 continue 454 text = elem_to_toc_text(item) 455 parent = parent_for_level(lvl) 456 if item_at_top(item): 457 dirtied, elem_id = False, None 458 else: 459 dirtied, elem_id = ensure_id(item, all_ids) 460 item_dirtied = dirtied or item_dirtied 461 toc = parent.add(text, name, elem_id) 462 node_level_map[toc] = lvl 463 toc.dest_exists = True 464 465 if item_dirtied: 466 container.commit_item(name, keep_parsed=True) 467 468 return tocroot 469 470 471def from_links(container): 472 ''' 473 Generate a Table of Contents from links in the book. 474 ''' 475 toc = TOC() 476 link_path = XPath('//h:a[@href]') 477 seen_titles, seen_dests = set(), set() 478 for name, is_linear in container.spine_names: 479 root = container.parsed(name) 480 for a in link_path(root): 481 href = a.get('href') 482 if not href or not href.strip(): 483 continue 484 frag = None 485 if href.startswith('#'): 486 dest = name 487 frag = href[1:] 488 else: 489 href, _, frag = href.partition('#') 490 dest = container.href_to_name(href, base=name) 491 frag = frag or None 492 if (dest, frag) in seen_dests: 493 continue 494 seen_dests.add((dest, frag)) 495 text = elem_to_toc_text(a) 496 if text in seen_titles: 497 continue 498 seen_titles.add(text) 499 toc.add(text, dest, frag=frag) 500 verify_toc_destinations(container, toc) 501 for child in toc: 502 if not child.dest_exists: 503 toc.remove(child) 504 return toc 505 506 507def find_text(node): 508 LIMIT = 200 509 pat = re.compile(r'\s+') 510 for child in node: 511 if isinstance(child, etree._Element): 512 text = xml2text(child).strip() 513 text = pat.sub(' ', text) 514 if len(text) < 1: 515 continue 516 if len(text) > LIMIT: 517 # Look for less text in a child of this node, recursively 518 ntext = find_text(child) 519 return ntext or (text[:LIMIT] + '...') 520 else: 521 return text 522 523 524def from_files(container): 525 ''' 526 Generate a Table of Contents from files in the book. 527 ''' 528 toc = TOC() 529 for i, spinepath in enumerate(container.spine_items): 530 name = container.abspath_to_name(spinepath) 531 root = container.parsed(name) 532 body = XPath('//h:body')(root) 533 if not body: 534 continue 535 text = find_text(body[0]) 536 if not text: 537 text = name.rpartition('/')[-1] 538 if i == 0 and text.rpartition('.')[0].lower() in {'titlepage', 'cover'}: 539 text = _('Cover') 540 toc.add(text, name) 541 return toc 542 543 544def node_from_loc(root, locs, totals=None): 545 node = root.xpath('//*[local-name()="body"]')[0] 546 for i, loc in enumerate(locs): 547 children = tuple(node.iterchildren(etree.Element)) 548 if totals is not None and totals[i] != len(children): 549 raise MalformedMarkup() 550 node = children[loc] 551 return node 552 553 554def add_id(container, name, loc, totals=None): 555 root = container.parsed(name) 556 try: 557 node = node_from_loc(root, loc, totals=totals) 558 except MalformedMarkup: 559 # The webkit HTML parser and the container parser have yielded 560 # different node counts, this can happen if the file is valid XML 561 # but contains constructs like nested <p> tags. So force parse it 562 # with the HTML 5 parser and try again. 563 raw = container.raw_data(name) 564 root = container.parse_xhtml(raw, fname=name, force_html5_parse=True) 565 try: 566 node = node_from_loc(root, loc, totals=totals) 567 except MalformedMarkup: 568 raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool' 569 ' before editing.') % name) 570 container.replace(name, root) 571 572 if not node.get('id'): 573 ensure_id(node, set(root.xpath('//*/@id'))) 574 container.commit_item(name, keep_parsed=True) 575 return node.get('id') 576 577 578def create_ncx(toc, to_href, btitle, lang, uid): 579 lang = lang.replace('_', '-') 580 ncx = etree.Element(NCX('ncx'), 581 attrib={'version': '2005-1', XML('lang'): lang}, 582 nsmap={None: NCX_NS}) 583 head = etree.SubElement(ncx, NCX('head')) 584 etree.SubElement(head, NCX('meta'), 585 name='dtb:uid', content=str(uid)) 586 etree.SubElement(head, NCX('meta'), 587 name='dtb:depth', content=str(toc.depth)) 588 generator = ''.join(['calibre (', __version__, ')']) 589 etree.SubElement(head, NCX('meta'), 590 name='dtb:generator', content=generator) 591 etree.SubElement(head, NCX('meta'), name='dtb:totalPageCount', content='0') 592 etree.SubElement(head, NCX('meta'), name='dtb:maxPageNumber', content='0') 593 title = etree.SubElement(ncx, NCX('docTitle')) 594 text = etree.SubElement(title, NCX('text')) 595 text.text = btitle 596 navmap = etree.SubElement(ncx, NCX('navMap')) 597 spat = re.compile(r'\s+') 598 599 play_order = Counter() 600 601 def process_node(xml_parent, toc_parent): 602 for child in toc_parent: 603 play_order['c'] += 1 604 point = etree.SubElement(xml_parent, NCX('navPoint'), id='num_%d' % play_order['c'], 605 playOrder=str(play_order['c'])) 606 label = etree.SubElement(point, NCX('navLabel')) 607 title = child.title 608 if title: 609 title = spat.sub(' ', title) 610 etree.SubElement(label, NCX('text')).text = title 611 if child.dest: 612 href = to_href(child.dest) 613 if child.frag: 614 href += '#'+child.frag 615 etree.SubElement(point, NCX('content'), src=href) 616 process_node(point, child) 617 618 process_node(navmap, toc) 619 return ncx 620 621 622def commit_ncx_toc(container, toc, lang=None, uid=None): 623 tocname = find_existing_ncx_toc(container) 624 if tocname is None: 625 item = container.generate_item('toc.ncx', id_prefix='toc') 626 tocname = container.href_to_name(item.get('href'), base=container.opf_name) 627 ncx_id = item.get('id') 628 [s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')] 629 if not lang: 630 lang = get_lang() 631 for l in container.opf_xpath('//dc:language'): 632 l = canonicalize_lang(xml2text(l).strip()) 633 if l: 634 lang = l 635 lang = lang_as_iso639_1(l) or l 636 break 637 lang = lang_as_iso639_1(lang) or lang 638 if not uid: 639 uid = uuid_id() 640 eid = container.opf.get('unique-identifier', None) 641 if eid: 642 m = container.opf_xpath('//*[@id="%s"]'%eid) 643 if m: 644 uid = xml2text(m[0]) 645 646 title = _('Table of Contents') 647 m = container.opf_xpath('//dc:title') 648 if m: 649 x = xml2text(m[0]).strip() 650 title = x or title 651 652 to_href = partial(container.name_to_href, base=tocname) 653 root = create_ncx(toc, to_href, title, lang, uid) 654 container.replace(tocname, root) 655 container.pretty_print.add(tocname) 656 657 658def ensure_single_nav_of_type(root, ntype='toc'): 659 et = '{%s}type' % EPUB_NS 660 navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == ntype] 661 for x in navs[1:]: 662 extract(x) 663 if navs: 664 nav = navs[0] 665 tail = nav.tail 666 attrib = dict(nav.attrib) 667 nav.clear() 668 nav.attrib.update(attrib) 669 nav.tail = tail 670 else: 671 nav = root.makeelement(XHTML('nav')) 672 first_child(root, XHTML('body')).append(nav) 673 nav.set('{%s}type' % EPUB_NS, ntype) 674 return nav 675 676 677def ensure_container_has_nav(container, lang=None, previous_nav=None): 678 tocname = find_existing_nav_toc(container) 679 if previous_nav is not None: 680 nav_name = container.href_to_name(previous_nav[0]) 681 if nav_name and container.exists(nav_name): 682 tocname = nav_name 683 container.apply_unique_properties(tocname, 'nav') 684 if tocname is None: 685 item = container.generate_item('nav.xhtml', id_prefix='nav') 686 item.set('properties', 'nav') 687 tocname = container.href_to_name(item.get('href'), base=container.opf_name) 688 if previous_nav is not None: 689 root = previous_nav[1] 690 else: 691 root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8')) 692 container.replace(tocname, root) 693 else: 694 root = container.parsed(tocname) 695 if lang: 696 lang = lang_as_iso639_1(lang) or lang 697 root.set('lang', lang) 698 root.set('{%s}lang' % XML_NS, lang) 699 return tocname, root 700 701 702def collapse_li(parent): 703 for li in parent.iterdescendants(XHTML('li')): 704 if len(li) == 1: 705 li.text = None 706 li[0].tail = None 707 708 709def create_nav_li(container, ol, entry, tocname): 710 li = ol.makeelement(XHTML('li')) 711 ol.append(li) 712 a = li.makeelement(XHTML('a')) 713 li.append(a) 714 href = container.name_to_href(entry['dest'], tocname) 715 if entry['frag']: 716 href += '#' + entry['frag'] 717 a.set('href', href) 718 return a 719 720 721def set_landmarks(container, root, tocname, landmarks): 722 nav = ensure_single_nav_of_type(root, 'landmarks') 723 nav.set('hidden', '') 724 ol = nav.makeelement(XHTML('ol')) 725 nav.append(ol) 726 for entry in landmarks: 727 if entry['type'] and container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: 728 a = create_nav_li(container, ol, entry, tocname) 729 a.set('{%s}type' % EPUB_NS, entry['type']) 730 a.text = entry['title'] or None 731 pretty_xml_tree(nav) 732 collapse_li(nav) 733 734 735def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None): 736 tocname, root = ensure_container_has_nav(container, lang=lang, previous_nav=previous_nav) 737 nav = ensure_single_nav_of_type(root, 'toc') 738 if toc.toc_title: 739 nav.append(nav.makeelement(XHTML('h1'))) 740 nav[-1].text = toc.toc_title 741 742 rnode = nav.makeelement(XHTML('ol')) 743 nav.append(rnode) 744 to_href = partial(container.name_to_href, base=tocname) 745 spat = re.compile(r'\s+') 746 747 def process_node(xml_parent, toc_parent): 748 for child in toc_parent: 749 li = xml_parent.makeelement(XHTML('li')) 750 xml_parent.append(li) 751 title = child.title or '' 752 title = spat.sub(' ', title).strip() 753 a = li.makeelement(XHTML('a' if child.dest else 'span')) 754 a.text = title 755 li.append(a) 756 if child.dest: 757 href = to_href(child.dest) 758 if child.frag: 759 href += '#'+child.frag 760 a.set('href', href) 761 if len(child): 762 ol = li.makeelement(XHTML('ol')) 763 li.append(ol) 764 process_node(ol, child) 765 process_node(rnode, toc) 766 pretty_xml_tree(nav) 767 768 collapse_li(nav) 769 nav.tail = '\n' 770 771 if toc.page_list: 772 nav = ensure_single_nav_of_type(root, 'page-list') 773 nav.set('hidden', '') 774 ol = nav.makeelement(XHTML('ol')) 775 nav.append(ol) 776 for entry in toc.page_list: 777 if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: 778 a = create_nav_li(container, ol, entry, tocname) 779 a.text = str(entry['pagenum']) 780 pretty_xml_tree(nav) 781 collapse_li(nav) 782 container.replace(tocname, root) 783 784 785def commit_toc(container, toc, lang=None, uid=None): 786 commit_ncx_toc(container, toc, lang=lang, uid=uid) 787 if container.opf_version_parsed.major > 2: 788 commit_nav_toc(container, toc, lang=lang) 789 790 791def remove_names_from_toc(container, names): 792 changed = [] 793 names = frozenset(names) 794 for find_toc, parse_toc, commit_toc in ( 795 (find_existing_ncx_toc, parse_ncx, commit_ncx_toc), 796 (find_existing_nav_toc, parse_nav, commit_nav_toc), 797 ): 798 toc = get_x_toc(container, find_toc, parse_toc, verify_destinations=False) 799 if len(toc) > 0: 800 remove = [] 801 for node in toc.iterdescendants(): 802 if node.dest in names: 803 remove.append(node) 804 if remove: 805 for node in reversed(remove): 806 node.remove_from_parent() 807 commit_toc(container, toc) 808 changed.append(find_toc(container)) 809 return changed 810 811 812def find_inline_toc(container): 813 for name, linear in container.spine_names: 814 if container.parsed(name).xpath('//*[local-name()="body" and @id="calibre_generated_inline_toc"]'): 815 return name 816 817 818def toc_to_html(toc, container, toc_name, title, lang=None): 819 820 def process_node(html_parent, toc, level=1, indent=' ', style_level=2): 821 li = html_parent.makeelement(XHTML('li')) 822 li.tail = '\n'+ (indent*level) 823 html_parent.append(li) 824 name, frag = toc.dest, toc.frag 825 href = '#' 826 if name: 827 href = container.name_to_href(name, toc_name) 828 if frag: 829 href += '#' + frag 830 a = li.makeelement(XHTML('a'), href=href) 831 a.text = toc.title 832 li.append(a) 833 if len(toc) > 0: 834 parent = li.makeelement(XHTML('ul')) 835 parent.set('class', 'level%d' % (style_level)) 836 li.append(parent) 837 a.tail = '\n\n' + (indent*(level+2)) 838 parent.text = '\n'+(indent*(level+3)) 839 parent.tail = '\n\n' + (indent*(level+1)) 840 for child in toc: 841 process_node(parent, child, level+3, style_level=style_level + 1) 842 parent[-1].tail = '\n' + (indent*(level+2)) 843 844 E = ElementMaker(namespace=XHTML_NS, nsmap={None:XHTML_NS}) 845 html = E.html( 846 E.head( 847 E.title(title), 848 E.style(P('templates/inline_toc_styles.css', data=True).decode('utf-8'), type='text/css'), 849 ), 850 E.body( 851 E.h2(title), 852 E.ul(), 853 id="calibre_generated_inline_toc", 854 ) 855 ) 856 857 ul = html[1][1] 858 ul.set('class', 'level1') 859 for child in toc: 860 process_node(ul, child) 861 if lang: 862 html.set('lang', lang) 863 pretty_html_tree(container, html) 864 return html 865 866 867def create_inline_toc(container, title=None): 868 ''' 869 Create an inline (HTML) Table of Contents from an existing NCX Table of Contents. 870 871 :param title: The title for this table of contents. 872 ''' 873 lang = get_book_language(container) 874 default_title = 'Table of Contents' 875 if lang: 876 lang = lang_as_iso639_1(lang) or lang 877 default_title = translate(lang, default_title) 878 title = title or default_title 879 toc = get_toc(container) 880 if len(toc) == 0: 881 return None 882 toc_name = find_inline_toc(container) 883 884 name = toc_name 885 html = toc_to_html(toc, container, name, title, lang) 886 raw = serialize(html, 'text/html') 887 if name is None: 888 name, c = 'toc.xhtml', 0 889 while container.has_name(name): 890 c += 1 891 name = 'toc%d.xhtml' % c 892 container.add_file(name, raw, spine_index=0) 893 else: 894 with container.open(name, 'wb') as f: 895 f.write(raw) 896 set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc') 897 return name 898