1#!/usr/local/bin/python3.8
2# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
3
4
5__license__   = 'GPL v3'
6__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
7__docformat__ = 'restructuredtext en'
8
9import re
10from collections import Counter, OrderedDict
11from functools import partial
12from operator import itemgetter
13
14from lxml import etree
15from lxml.builder import ElementMaker
16
17from calibre import __version__
18from calibre.ebooks.oeb.base import (
19    XPath, uuid_id, xml2text, NCX, NCX_NS, XML, XHTML, XHTML_NS, serialize, EPUB_NS, XML_NS, OEB_DOCS)
20from calibre.ebooks.oeb.polish.errors import MalformedMarkup
21from calibre.ebooks.oeb.polish.utils import guess_type, extract
22from calibre.ebooks.oeb.polish.opf import set_guide_item, get_book_language
23from calibre.ebooks.oeb.polish.pretty import pretty_html_tree, pretty_xml_tree
24from calibre.translations.dynamic import translate
25from calibre.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1
26from polyglot.builtins import iteritems
27from polyglot.urllib import urlparse
28
29ns = etree.FunctionNamespace('calibre_xpath_extensions')
30ns.prefix = 'calibre'
31ns['lower-case'] = lambda c, x: x.lower() if hasattr(x, 'lower') else x
32
33
34class TOC:
35
36    toc_title = None
37
38    def __init__(self, title=None, dest=None, frag=None):
39        self.title, self.dest, self.frag = title, dest, frag
40        self.dest_exists = self.dest_error = None
41        if self.title:
42            self.title = self.title.strip()
43        self.parent = None
44        self.children = []
45        self.page_list = []
46
47    def add(self, title, dest, frag=None):
48        c = TOC(title, dest, frag)
49        self.children.append(c)
50        c.parent = self
51        return c
52
53    def remove(self, child):
54        self.children.remove(child)
55        child.parent = None
56
57    def remove_from_parent(self):
58        if self.parent is None:
59            return
60        idx = self.parent.children.index(self)
61        for child in reversed(self.children):
62            child.parent = self.parent
63            self.parent.children.insert(idx, child)
64        self.parent.children.remove(self)
65        self.parent = None
66
67    def __iter__(self):
68        yield from self.children
69
70    def __len__(self):
71        return len(self.children)
72
73    def iterdescendants(self, level=None):
74        gc_level = None if level is None else level + 1
75        for child in self:
76            if level is None:
77                yield child
78            else:
79                yield level, child
80            yield from child.iterdescendants(level=gc_level)
81
82    def remove_duplicates(self, only_text=True):
83        seen = set()
84        remove = []
85        for child in self:
86            key = child.title if only_text else (child.title, child.dest, (child.frag or None))
87            if key in seen:
88                remove.append(child)
89            else:
90                seen.add(key)
91                child.remove_duplicates()
92        for child in remove:
93            self.remove(child)
94
95    @property
96    def depth(self):
97        """The maximum depth of the navigation tree rooted at this node."""
98        try:
99            return max(node.depth for node in self) + 1
100        except ValueError:
101            return 1
102
103    @property
104    def last_child(self):
105        return self.children[-1] if self.children else None
106
107    def get_lines(self, lvl=0):
108        frag = ('#'+self.frag) if self.frag else ''
109        ans = [('\t'*lvl) + 'TOC: %s --> %s%s'%(self.title, self.dest, frag)]
110        for child in self:
111            ans.extend(child.get_lines(lvl+1))
112        return ans
113
114    def __str__(self):
115        return '\n'.join(self.get_lines())
116
117    def to_dict(self, node_counter=None):
118        ans = {
119            'title':self.title, 'dest':self.dest, 'frag':self.frag,
120            'children':[c.to_dict(node_counter) for c in self.children]
121        }
122        if self.dest_exists is not None:
123            ans['dest_exists'] = self.dest_exists
124        if self.dest_error is not None:
125            ans['dest_error'] = self.dest_error
126        if node_counter is not None:
127            ans['id'] = next(node_counter)
128        return ans
129
130    @property
131    def as_dict(self):
132        return self.to_dict()
133
134
135def child_xpath(tag, name):
136    return tag.xpath('./*[calibre:lower-case(local-name()) = "%s"]'%name)
137
138
139def add_from_navpoint(container, navpoint, parent, ncx_name):
140    dest = frag = text = None
141    nl = child_xpath(navpoint, 'navlabel')
142    if nl:
143        nl = nl[0]
144        text = ''
145        for txt in child_xpath(nl, 'text'):
146            text += etree.tostring(txt, method='text',
147                    encoding='unicode', with_tail=False)
148    content = child_xpath(navpoint, 'content')
149    if content:
150        content = content[0]
151        href = content.get('src', None)
152        if href:
153            dest = container.href_to_name(href, base=ncx_name)
154            frag = urlparse(href).fragment or None
155    return parent.add(text or None, dest or None, frag or None)
156
157
158def process_ncx_node(container, node, toc_parent, ncx_name):
159    for navpoint in node.xpath('./*[calibre:lower-case(local-name()) = "navpoint"]'):
160        child = add_from_navpoint(container, navpoint, toc_parent, ncx_name)
161        if child is not None:
162            process_ncx_node(container, navpoint, child, ncx_name)
163
164
165def parse_ncx(container, ncx_name):
166    root = container.parsed(ncx_name)
167    toc_root = TOC()
168    navmaps = root.xpath('//*[calibre:lower-case(local-name()) = "navmap"]')
169    if navmaps:
170        process_ncx_node(container, navmaps[0], toc_root, ncx_name)
171    toc_root.lang = toc_root.uid = None
172    for attr, val in iteritems(root.attrib):
173        if attr.endswith('lang'):
174            toc_root.lang = str(val)
175            break
176    for uid in root.xpath('//*[calibre:lower-case(local-name()) = "meta" and @name="dtb:uid"]/@content'):
177        if uid:
178            toc_root.uid = str(uid)
179            break
180    for pl in root.xpath('//*[calibre:lower-case(local-name()) = "pagelist"]'):
181        for pt in pl.xpath('descendant::*[calibre:lower-case(local-name()) = "pagetarget"]'):
182            pagenum = pt.get('value')
183            if pagenum:
184                href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src')
185                if href:
186                    dest = container.href_to_name(href[0], base=ncx_name)
187                    frag = urlparse(href[0]).fragment or None
188                    toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag})
189    return toc_root
190
191
192def add_from_li(container, li, parent, nav_name):
193    dest = frag = text = None
194    for x in li.iterchildren(XHTML('a'), XHTML('span')):
195        text = etree.tostring(x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
196        href = x.get('href')
197        if href:
198            dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
199            frag = urlparse(href).fragment or None
200        break
201    return parent.add(text or None, dest or None, frag or None)
202
203
204def first_child(parent, tagname):
205    try:
206        return next(parent.iterchildren(tagname))
207    except StopIteration:
208        return None
209
210
211def process_nav_node(container, node, toc_parent, nav_name):
212    for li in node.iterchildren(XHTML('li')):
213        child = add_from_li(container, li, toc_parent, nav_name)
214        ol = first_child(li, XHTML('ol'))
215        if child is not None and ol is not None:
216            process_nav_node(container, ol, child, nav_name)
217
218
219def parse_nav(container, nav_name):
220    root = container.parsed(nav_name)
221    toc_root = TOC()
222    toc_root.lang = toc_root.uid = None
223    et = '{%s}type' % EPUB_NS
224    for nav in root.iterdescendants(XHTML('nav')):
225        if nav.get(et) == 'toc':
226            ol = first_child(nav, XHTML('ol'))
227            if ol is not None:
228                process_nav_node(container, ol, toc_root, nav_name)
229                for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
230                    text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title')
231                    if text:
232                        toc_root.toc_title = text
233                        break
234                break
235    return toc_root
236
237
238def verify_toc_destinations(container, toc):
239    anchor_map = {}
240    anchor_xpath = XPath('//*/@id|//h:a/@name')
241    for item in toc.iterdescendants():
242        name = item.dest
243        if not name:
244            item.dest_exists = False
245            item.dest_error = _('No file named %s exists')%name
246            continue
247        try:
248            root = container.parsed(name)
249        except KeyError:
250            item.dest_exists = False
251            item.dest_error = _('No file named %s exists')%name
252            continue
253        if not hasattr(root, 'xpath'):
254            item.dest_exists = False
255            item.dest_error = _('No HTML file named %s exists')%name
256            continue
257        if not item.frag:
258            item.dest_exists = True
259            continue
260        if name not in anchor_map:
261            anchor_map[name] = frozenset(anchor_xpath(root))
262        item.dest_exists = item.frag in anchor_map[name]
263        if not item.dest_exists:
264            item.dest_error = _(
265                'The anchor %(a)s does not exist in file %(f)s')%dict(
266                a=item.frag, f=name)
267
268
269def find_existing_ncx_toc(container):
270    toc = container.opf_xpath('//opf:spine/@toc')
271    if toc:
272        toc = container.manifest_id_map.get(toc[0], None)
273    if not toc:
274        ncx = guess_type('a.ncx')
275        toc = container.manifest_type_map.get(ncx, [None])[0]
276    return toc or None
277
278
279def find_existing_nav_toc(container):
280    for name in container.manifest_items_with_property('nav'):
281        return name
282
283
284def get_x_toc(container, find_toc, parse_toc, verify_destinations=True):
285    def empty_toc():
286        ans = TOC()
287        ans.lang = ans.uid = None
288        return ans
289    toc = find_toc(container)
290    ans = empty_toc() if toc is None or not container.has_name(toc) else parse_toc(container, toc)
291    ans.toc_file_name = toc if toc and container.has_name(toc) else None
292    if verify_destinations:
293        verify_toc_destinations(container, ans)
294    return ans
295
296
297def get_toc(container, verify_destinations=True):
298    ver = container.opf_version_parsed
299    if ver.major < 3:
300        return get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations)
301    else:
302        ans = get_x_toc(container, find_existing_nav_toc, parse_nav, verify_destinations=verify_destinations)
303        if len(ans) == 0:
304            ans = get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations)
305        return ans
306
307
308def get_guide_landmarks(container):
309    for ref in container.opf_xpath('./opf:guide/opf:reference'):
310        href, title, rtype = ref.get('href'), ref.get('title'), ref.get('type')
311        href, frag = href.partition('#')[::2]
312        name = container.href_to_name(href, container.opf_name)
313        if container.has_name(name):
314            yield {'dest':name, 'frag':frag, 'title':title or '', 'type':rtype or ''}
315
316
317def get_nav_landmarks(container):
318    nav = find_existing_nav_toc(container)
319    if nav and container.has_name(nav):
320        root = container.parsed(nav)
321        et = '{%s}type' % EPUB_NS
322        for elem in root.iterdescendants(XHTML('nav')):
323            if elem.get(et) == 'landmarks':
324                for li in elem.iterdescendants(XHTML('li')):
325                    for a in li.iterdescendants(XHTML('a')):
326                        href, rtype = a.get('href'), a.get(et)
327                        if href:
328                            title = etree.tostring(a, method='text', encoding='unicode', with_tail=False).strip()
329                            href, frag = href.partition('#')[::2]
330                            name = container.href_to_name(href, nav)
331                            if container.has_name(name):
332                                yield {'dest':name, 'frag':frag, 'title':title or '', 'type':rtype or ''}
333                            break
334
335
336def get_landmarks(container):
337    ver = container.opf_version_parsed
338    if ver.major < 3:
339        return list(get_guide_landmarks(container))
340    ans = list(get_nav_landmarks(container))
341    if len(ans) == 0:
342        ans = list(get_guide_landmarks(container))
343    return ans
344
345
346def ensure_id(elem, all_ids):
347    elem_id = elem.get('id')
348    if elem_id:
349        return False, elem_id
350    if elem.tag == XHTML('a'):
351        anchor = elem.get('name', None)
352        if anchor:
353            elem.set('id', anchor)
354            return False, anchor
355    c = 0
356    while True:
357        c += 1
358        q = 'toc_{}'.format(c)
359        if q not in all_ids:
360            elem.set('id', q)
361            all_ids.add(q)
362            break
363    return True, elem.get('id')
364
365
366def elem_to_toc_text(elem):
367    text = xml2text(elem).strip()
368    if not text:
369        text = elem.get('title', '')
370    if not text:
371        text = elem.get('alt', '')
372    text = re.sub(r'\s+', ' ', text.strip())
373    text = text[:1000].strip()
374    if not text:
375        text = _('(Untitled)')
376    return text
377
378
379def item_at_top(elem):
380    try:
381        body = XPath('//h:body')(elem.getroottree().getroot())[0]
382    except (TypeError, IndexError, KeyError, AttributeError):
383        return False
384    tree = body.getroottree()
385    path = tree.getpath(elem)
386    for el in body.iterdescendants(etree.Element):
387        epath = tree.getpath(el)
388        if epath == path:
389            break
390        try:
391            if el.tag.endswith('}img') or (el.text and el.text.strip()):
392                return False
393        except:
394            return False
395        if not path.startswith(epath):
396            # Only check tail of non-parent elements
397            if el.tail and el.tail.strip():
398                return False
399    return True
400
401
402def from_xpaths(container, xpaths):
403    '''
404    Generate a Table of Contents from a list of XPath expressions. Each
405    expression in the list corresponds to a level of the generate ToC. For
406    example: :code:`['//h:h1', '//h:h2', '//h:h3']` will generate a three level
407    Table of Contents from the ``<h1>``, ``<h2>`` and ``<h3>`` tags.
408    '''
409    tocroot = TOC()
410    xpaths = [XPath(xp) for xp in xpaths]
411
412    # Find those levels that have no elements in all spine items
413    maps = OrderedDict()
414    empty_levels = {i+1 for i, xp in enumerate(xpaths)}
415    for spinepath in container.spine_items:
416        name = container.abspath_to_name(spinepath)
417        root = container.parsed(name)
418        level_item_map = maps[name] = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)}
419        for lvl, elems in iteritems(level_item_map):
420            if elems:
421                empty_levels.discard(lvl)
422    # Remove empty levels from all level_maps
423    if empty_levels:
424        for name, lmap in tuple(iteritems(maps)):
425            lmap = {lvl:items for lvl, items in iteritems(lmap) if lvl not in empty_levels}
426            lmap = sorted(iteritems(lmap), key=itemgetter(0))
427            lmap = {i+1:items for i, (l, items) in enumerate(lmap)}
428            maps[name] = lmap
429
430    node_level_map = {tocroot: 0}
431
432    def parent_for_level(child_level):
433        limit = child_level - 1
434
435        def process_node(node):
436            child = node.last_child
437            if child is None:
438                return node
439            lvl = node_level_map[child]
440            return node if lvl > limit else child if lvl == limit else process_node(child)
441
442        return process_node(tocroot)
443
444    for name, level_item_map in iteritems(maps):
445        root = container.parsed(name)
446        item_level_map = {e:i for i, elems in iteritems(level_item_map) for e in elems}
447        item_dirtied = False
448        all_ids = set(root.xpath('//*/@id'))
449
450        for item in root.iterdescendants(etree.Element):
451            lvl = item_level_map.get(item, None)
452            if lvl is None:
453                continue
454            text = elem_to_toc_text(item)
455            parent = parent_for_level(lvl)
456            if item_at_top(item):
457                dirtied, elem_id = False, None
458            else:
459                dirtied, elem_id = ensure_id(item, all_ids)
460            item_dirtied = dirtied or item_dirtied
461            toc = parent.add(text, name, elem_id)
462            node_level_map[toc] = lvl
463            toc.dest_exists = True
464
465        if item_dirtied:
466            container.commit_item(name, keep_parsed=True)
467
468    return tocroot
469
470
471def from_links(container):
472    '''
473    Generate a Table of Contents from links in the book.
474    '''
475    toc = TOC()
476    link_path = XPath('//h:a[@href]')
477    seen_titles, seen_dests = set(), set()
478    for name, is_linear in container.spine_names:
479        root = container.parsed(name)
480        for a in link_path(root):
481            href = a.get('href')
482            if not href or not href.strip():
483                continue
484            frag = None
485            if href.startswith('#'):
486                dest = name
487                frag = href[1:]
488            else:
489                href, _, frag = href.partition('#')
490                dest = container.href_to_name(href, base=name)
491            frag = frag or None
492            if (dest, frag) in seen_dests:
493                continue
494            seen_dests.add((dest, frag))
495            text = elem_to_toc_text(a)
496            if text in seen_titles:
497                continue
498            seen_titles.add(text)
499            toc.add(text, dest, frag=frag)
500    verify_toc_destinations(container, toc)
501    for child in toc:
502        if not child.dest_exists:
503            toc.remove(child)
504    return toc
505
506
507def find_text(node):
508    LIMIT = 200
509    pat = re.compile(r'\s+')
510    for child in node:
511        if isinstance(child, etree._Element):
512            text = xml2text(child).strip()
513            text = pat.sub(' ', text)
514            if len(text) < 1:
515                continue
516            if len(text) > LIMIT:
517                # Look for less text in a child of this node, recursively
518                ntext = find_text(child)
519                return ntext or (text[:LIMIT] + '...')
520            else:
521                return text
522
523
524def from_files(container):
525    '''
526    Generate a Table of Contents from files in the book.
527    '''
528    toc = TOC()
529    for i, spinepath in enumerate(container.spine_items):
530        name = container.abspath_to_name(spinepath)
531        root = container.parsed(name)
532        body = XPath('//h:body')(root)
533        if not body:
534            continue
535        text = find_text(body[0])
536        if not text:
537            text = name.rpartition('/')[-1]
538            if i == 0 and text.rpartition('.')[0].lower() in {'titlepage', 'cover'}:
539                text = _('Cover')
540        toc.add(text, name)
541    return toc
542
543
544def node_from_loc(root, locs, totals=None):
545    node = root.xpath('//*[local-name()="body"]')[0]
546    for i, loc in enumerate(locs):
547        children = tuple(node.iterchildren(etree.Element))
548        if totals is not None and totals[i] != len(children):
549            raise MalformedMarkup()
550        node = children[loc]
551    return node
552
553
554def add_id(container, name, loc, totals=None):
555    root = container.parsed(name)
556    try:
557        node = node_from_loc(root, loc, totals=totals)
558    except MalformedMarkup:
559        # The webkit HTML parser and the container parser have yielded
560        # different node counts, this can happen if the file is valid XML
561        # but contains constructs like nested <p> tags. So force parse it
562        # with the HTML 5 parser and try again.
563        raw = container.raw_data(name)
564        root = container.parse_xhtml(raw, fname=name, force_html5_parse=True)
565        try:
566            node = node_from_loc(root, loc, totals=totals)
567        except MalformedMarkup:
568            raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool'
569                                    ' before editing.') % name)
570        container.replace(name, root)
571
572    if not node.get('id'):
573        ensure_id(node, set(root.xpath('//*/@id')))
574    container.commit_item(name, keep_parsed=True)
575    return node.get('id')
576
577
578def create_ncx(toc, to_href, btitle, lang, uid):
579    lang = lang.replace('_', '-')
580    ncx = etree.Element(NCX('ncx'),
581        attrib={'version': '2005-1', XML('lang'): lang},
582        nsmap={None: NCX_NS})
583    head = etree.SubElement(ncx, NCX('head'))
584    etree.SubElement(head, NCX('meta'),
585        name='dtb:uid', content=str(uid))
586    etree.SubElement(head, NCX('meta'),
587        name='dtb:depth', content=str(toc.depth))
588    generator = ''.join(['calibre (', __version__, ')'])
589    etree.SubElement(head, NCX('meta'),
590        name='dtb:generator', content=generator)
591    etree.SubElement(head, NCX('meta'), name='dtb:totalPageCount', content='0')
592    etree.SubElement(head, NCX('meta'), name='dtb:maxPageNumber', content='0')
593    title = etree.SubElement(ncx, NCX('docTitle'))
594    text = etree.SubElement(title, NCX('text'))
595    text.text = btitle
596    navmap = etree.SubElement(ncx, NCX('navMap'))
597    spat = re.compile(r'\s+')
598
599    play_order = Counter()
600
601    def process_node(xml_parent, toc_parent):
602        for child in toc_parent:
603            play_order['c'] += 1
604            point = etree.SubElement(xml_parent, NCX('navPoint'), id='num_%d' % play_order['c'],
605                            playOrder=str(play_order['c']))
606            label = etree.SubElement(point, NCX('navLabel'))
607            title = child.title
608            if title:
609                title = spat.sub(' ', title)
610            etree.SubElement(label, NCX('text')).text = title
611            if child.dest:
612                href = to_href(child.dest)
613                if child.frag:
614                    href += '#'+child.frag
615                etree.SubElement(point, NCX('content'), src=href)
616            process_node(point, child)
617
618    process_node(navmap, toc)
619    return ncx
620
621
622def commit_ncx_toc(container, toc, lang=None, uid=None):
623    tocname = find_existing_ncx_toc(container)
624    if tocname is None:
625        item = container.generate_item('toc.ncx', id_prefix='toc')
626        tocname = container.href_to_name(item.get('href'), base=container.opf_name)
627        ncx_id = item.get('id')
628        [s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')]
629    if not lang:
630        lang = get_lang()
631        for l in container.opf_xpath('//dc:language'):
632            l = canonicalize_lang(xml2text(l).strip())
633            if l:
634                lang = l
635                lang = lang_as_iso639_1(l) or l
636                break
637    lang = lang_as_iso639_1(lang) or lang
638    if not uid:
639        uid = uuid_id()
640        eid = container.opf.get('unique-identifier', None)
641        if eid:
642            m = container.opf_xpath('//*[@id="%s"]'%eid)
643            if m:
644                uid = xml2text(m[0])
645
646    title = _('Table of Contents')
647    m = container.opf_xpath('//dc:title')
648    if m:
649        x = xml2text(m[0]).strip()
650        title = x or title
651
652    to_href = partial(container.name_to_href, base=tocname)
653    root = create_ncx(toc, to_href, title, lang, uid)
654    container.replace(tocname, root)
655    container.pretty_print.add(tocname)
656
657
658def ensure_single_nav_of_type(root, ntype='toc'):
659    et = '{%s}type' % EPUB_NS
660    navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == ntype]
661    for x in navs[1:]:
662        extract(x)
663    if navs:
664        nav = navs[0]
665        tail = nav.tail
666        attrib = dict(nav.attrib)
667        nav.clear()
668        nav.attrib.update(attrib)
669        nav.tail = tail
670    else:
671        nav = root.makeelement(XHTML('nav'))
672        first_child(root, XHTML('body')).append(nav)
673    nav.set('{%s}type' % EPUB_NS, ntype)
674    return nav
675
676
677def ensure_container_has_nav(container, lang=None, previous_nav=None):
678    tocname = find_existing_nav_toc(container)
679    if previous_nav is not None:
680        nav_name = container.href_to_name(previous_nav[0])
681        if nav_name and container.exists(nav_name):
682            tocname = nav_name
683            container.apply_unique_properties(tocname, 'nav')
684    if tocname is None:
685        item = container.generate_item('nav.xhtml', id_prefix='nav')
686        item.set('properties', 'nav')
687        tocname = container.href_to_name(item.get('href'), base=container.opf_name)
688        if previous_nav is not None:
689            root = previous_nav[1]
690        else:
691            root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
692        container.replace(tocname, root)
693    else:
694        root = container.parsed(tocname)
695    if lang:
696        lang = lang_as_iso639_1(lang) or lang
697        root.set('lang', lang)
698        root.set('{%s}lang' % XML_NS, lang)
699    return tocname, root
700
701
702def collapse_li(parent):
703    for li in parent.iterdescendants(XHTML('li')):
704        if len(li) == 1:
705            li.text = None
706            li[0].tail = None
707
708
709def create_nav_li(container, ol, entry, tocname):
710    li = ol.makeelement(XHTML('li'))
711    ol.append(li)
712    a = li.makeelement(XHTML('a'))
713    li.append(a)
714    href = container.name_to_href(entry['dest'], tocname)
715    if entry['frag']:
716        href += '#' + entry['frag']
717    a.set('href', href)
718    return a
719
720
721def set_landmarks(container, root, tocname, landmarks):
722    nav = ensure_single_nav_of_type(root, 'landmarks')
723    nav.set('hidden', '')
724    ol = nav.makeelement(XHTML('ol'))
725    nav.append(ol)
726    for entry in landmarks:
727        if entry['type'] and container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS:
728            a = create_nav_li(container, ol, entry, tocname)
729            a.set('{%s}type' % EPUB_NS, entry['type'])
730            a.text = entry['title'] or None
731    pretty_xml_tree(nav)
732    collapse_li(nav)
733
734
735def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None):
736    tocname, root = ensure_container_has_nav(container, lang=lang, previous_nav=previous_nav)
737    nav = ensure_single_nav_of_type(root, 'toc')
738    if toc.toc_title:
739        nav.append(nav.makeelement(XHTML('h1')))
740        nav[-1].text = toc.toc_title
741
742    rnode = nav.makeelement(XHTML('ol'))
743    nav.append(rnode)
744    to_href = partial(container.name_to_href, base=tocname)
745    spat = re.compile(r'\s+')
746
747    def process_node(xml_parent, toc_parent):
748        for child in toc_parent:
749            li = xml_parent.makeelement(XHTML('li'))
750            xml_parent.append(li)
751            title = child.title or ''
752            title = spat.sub(' ', title).strip()
753            a = li.makeelement(XHTML('a' if child.dest else 'span'))
754            a.text = title
755            li.append(a)
756            if child.dest:
757                href = to_href(child.dest)
758                if child.frag:
759                    href += '#'+child.frag
760                a.set('href', href)
761            if len(child):
762                ol = li.makeelement(XHTML('ol'))
763                li.append(ol)
764                process_node(ol, child)
765    process_node(rnode, toc)
766    pretty_xml_tree(nav)
767
768    collapse_li(nav)
769    nav.tail = '\n'
770
771    if toc.page_list:
772        nav = ensure_single_nav_of_type(root, 'page-list')
773        nav.set('hidden', '')
774        ol = nav.makeelement(XHTML('ol'))
775        nav.append(ol)
776        for entry in toc.page_list:
777            if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS:
778                a = create_nav_li(container, ol, entry, tocname)
779                a.text = str(entry['pagenum'])
780        pretty_xml_tree(nav)
781        collapse_li(nav)
782    container.replace(tocname, root)
783
784
785def commit_toc(container, toc, lang=None, uid=None):
786    commit_ncx_toc(container, toc, lang=lang, uid=uid)
787    if container.opf_version_parsed.major > 2:
788        commit_nav_toc(container, toc, lang=lang)
789
790
791def remove_names_from_toc(container, names):
792    changed = []
793    names = frozenset(names)
794    for find_toc, parse_toc, commit_toc in (
795            (find_existing_ncx_toc, parse_ncx, commit_ncx_toc),
796            (find_existing_nav_toc, parse_nav, commit_nav_toc),
797    ):
798        toc = get_x_toc(container, find_toc, parse_toc, verify_destinations=False)
799        if len(toc) > 0:
800            remove = []
801            for node in toc.iterdescendants():
802                if node.dest in names:
803                    remove.append(node)
804            if remove:
805                for node in reversed(remove):
806                    node.remove_from_parent()
807                commit_toc(container, toc)
808                changed.append(find_toc(container))
809    return changed
810
811
812def find_inline_toc(container):
813    for name, linear in container.spine_names:
814        if container.parsed(name).xpath('//*[local-name()="body" and @id="calibre_generated_inline_toc"]'):
815            return name
816
817
818def toc_to_html(toc, container, toc_name, title, lang=None):
819
820    def process_node(html_parent, toc, level=1, indent='  ', style_level=2):
821        li = html_parent.makeelement(XHTML('li'))
822        li.tail = '\n'+ (indent*level)
823        html_parent.append(li)
824        name, frag = toc.dest, toc.frag
825        href = '#'
826        if name:
827            href = container.name_to_href(name, toc_name)
828            if frag:
829                href += '#' + frag
830        a = li.makeelement(XHTML('a'), href=href)
831        a.text = toc.title
832        li.append(a)
833        if len(toc) > 0:
834            parent = li.makeelement(XHTML('ul'))
835            parent.set('class', 'level%d' % (style_level))
836            li.append(parent)
837            a.tail = '\n\n' + (indent*(level+2))
838            parent.text = '\n'+(indent*(level+3))
839            parent.tail = '\n\n' + (indent*(level+1))
840            for child in toc:
841                process_node(parent, child, level+3, style_level=style_level + 1)
842            parent[-1].tail = '\n' + (indent*(level+2))
843
844    E = ElementMaker(namespace=XHTML_NS, nsmap={None:XHTML_NS})
845    html = E.html(
846        E.head(
847            E.title(title),
848            E.style(P('templates/inline_toc_styles.css', data=True).decode('utf-8'), type='text/css'),
849        ),
850        E.body(
851            E.h2(title),
852            E.ul(),
853            id="calibre_generated_inline_toc",
854        )
855    )
856
857    ul = html[1][1]
858    ul.set('class', 'level1')
859    for child in toc:
860        process_node(ul, child)
861    if lang:
862        html.set('lang', lang)
863    pretty_html_tree(container, html)
864    return html
865
866
867def create_inline_toc(container, title=None):
868    '''
869    Create an inline (HTML) Table of Contents from an existing NCX Table of Contents.
870
871    :param title: The title for this table of contents.
872    '''
873    lang = get_book_language(container)
874    default_title = 'Table of Contents'
875    if lang:
876        lang = lang_as_iso639_1(lang) or lang
877        default_title = translate(lang, default_title)
878    title = title or default_title
879    toc = get_toc(container)
880    if len(toc) == 0:
881        return None
882    toc_name = find_inline_toc(container)
883
884    name = toc_name
885    html = toc_to_html(toc, container, name, title, lang)
886    raw = serialize(html, 'text/html')
887    if name is None:
888        name, c = 'toc.xhtml', 0
889        while container.has_name(name):
890            c += 1
891            name = 'toc%d.xhtml' % c
892        container.add_file(name, raw, spine_index=0)
893    else:
894        with container.open(name, 'wb') as f:
895            f.write(raw)
896    set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc')
897    return name
898