1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3
4
5__license__ = 'GPL v3'
6__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
7
8import posixpath, re
9from uuid import uuid4
10
11from calibre.ebooks.oeb.base import urlquote
12from calibre.utils.filenames import ascii_text
13from polyglot.urllib import urlparse
14
15
16def start_text(tag, prefix_len=0, top_level=True):
17    ans = tag.text or ''
18    limit = 50 - prefix_len
19    if len(ans) < limit:
20        for child in tag.iterchildren('*'):
21            ans += start_text(child, len(ans), top_level=False) + (child.tail or '')
22            if len(ans) >= limit:
23                break
24    if top_level and len(ans) > limit:
25        ans = ans[:limit] + '...'
26    return ans
27
28
29class TOCItem:
30
31    def __init__(self, title, bmark, level):
32        self.title, self.bmark, self.level = title, bmark, level
33        self.is_first = self.is_last = False
34
35    def serialize(self, body, makeelement):
36        p = makeelement(body, 'w:p', append=False)
37        ppr = makeelement(p, 'w:pPr')
38        makeelement(ppr, 'w:pStyle', w_val="Normal")
39        makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=str(200 * self.level))
40        if self.is_first:
41            makeelement(ppr, 'w:pageBreakBefore', w_val='off')
42            r = makeelement(p, 'w:r')
43            makeelement(r, 'w:fldChar', w_fldCharType='begin')
44            r = makeelement(p, 'w:r')
45            makeelement(r, 'w:instrText').text = r' TOC \h '
46            r[0].set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
47            r = makeelement(p, 'w:r')
48            makeelement(r, 'w:fldChar', w_fldCharType='separate')
49        hl = makeelement(p, 'w:hyperlink', w_anchor=self.bmark)
50        r = makeelement(hl, 'w:r')
51        rpr = makeelement(r, 'w:rPr')
52        makeelement(rpr, 'w:color', w_val='0000FF', w_themeColor='hyperlink')
53        makeelement(rpr, 'w:u', w_val='single')
54        makeelement(r, 'w:t').text = self.title
55        if self.is_last:
56            r = makeelement(p, 'w:r')
57            makeelement(r, 'w:fldChar', w_fldCharType='end')
58        body.insert(0, p)
59
60
61def sanitize_bookmark_name(base):
62    # Max length allowed by Word appears to be 40, we use 32 to leave some
63    # space for making the name unique
64    return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))[:32].rstrip('_')
65
66
67class LinksManager:
68
69    def __init__(self, namespace, document_relationships, log):
70        self.namespace = namespace
71        self.log = log
72        self.document_relationships = document_relationships
73        self.top_anchor = str(uuid4().hex)
74        self.anchor_map = {}
75        self.used_bookmark_names = set()
76        self.bmark_id = 0
77        self.document_hrefs = set()
78        self.external_links = {}
79        self.toc = []
80
81    def bookmark_for_anchor(self, anchor, current_item, html_tag):
82        key = (current_item.href, anchor)
83        if key in self.anchor_map:
84            return self.anchor_map[key]
85        if anchor == self.top_anchor:
86            name = ('Top of %s' % posixpath.basename(current_item.href))
87            self.document_hrefs.add(current_item.href)
88        else:
89            name = start_text(html_tag).strip() or anchor
90        name = sanitize_bookmark_name(name)
91        i, bname = 0, name
92        while name in self.used_bookmark_names:
93            i += 1
94            name  = bname + ('_%d' % i)
95        self.anchor_map[key] = name
96        self.used_bookmark_names.add(name)
97        return name
98
99    @property
100    def bookmark_id(self):
101        self.bmark_id += 1
102        return self.bmark_id
103
104    def serialize_hyperlink(self, parent, link):
105        item, url, tooltip = link
106        purl = urlparse(url)
107        href = purl.path
108
109        def make_link(parent, anchor=None, id=None, tooltip=None):
110            kw = {}
111            if anchor is not None:
112                kw['w_anchor'] = anchor
113            elif id is not None:
114                kw['r_id'] = id
115            if tooltip:
116                kw['w_tooltip'] = tooltip
117            return self.namespace.makeelement(parent, 'w:hyperlink', **kw)
118
119        if not purl.scheme:
120            href = item.abshref(href)
121            if href not in self.document_hrefs:
122                href = urlquote(href)
123            if href in self.document_hrefs:
124                key = (href, purl.fragment or self.top_anchor)
125                if key in self.anchor_map:
126                    bmark = self.anchor_map[key]
127                else:
128                    bmark = self.anchor_map[(href, self.top_anchor)]
129                return make_link(parent, anchor=bmark, tooltip=tooltip)
130            else:
131                self.log.warn('Ignoring internal hyperlink with href (%s) pointing to unknown destination' % url)
132        if purl.scheme in {'http', 'https', 'ftp'}:
133            if url not in self.external_links:
134                self.external_links[url] = self.document_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External')
135            return make_link(parent, id=self.external_links[url], tooltip=tooltip)
136        return parent
137
138    def process_toc_node(self, toc, level=0):
139        href = toc.href
140        if href:
141            purl = urlparse(href)
142            href = purl.path
143            if href in self.document_hrefs:
144                key = (href, purl.fragment or self.top_anchor)
145                if key in self.anchor_map:
146                    bmark = self.anchor_map[key]
147                else:
148                    bmark = self.anchor_map[(href, self.top_anchor)]
149                self.toc.append(TOCItem(toc.title, bmark, level))
150        for child in toc:
151            self.process_toc_node(child, level+1)
152
153    def process_toc_links(self, oeb):
154        self.toc = []
155        has_toc = oeb.toc and oeb.toc.count() > 1
156        if not has_toc:
157            return
158        for child in oeb.toc:
159            self.process_toc_node(child)
160        if self.toc:
161            self.toc[0].is_first = True
162            self.toc[-1].is_last = True
163
164    def serialize_toc(self, body, primary_heading_style):
165        pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0]
166        pbb.set('{%s}val' % self.namespace.namespaces['w'], 'on')
167        for block in reversed(self.toc):
168            block.serialize(body, self.namespace.makeelement)
169        title = __('Table of Contents')
170        makeelement = self.namespace.makeelement
171        p = makeelement(body, 'w:p', append=False)
172        ppr = makeelement(p, 'w:pPr')
173        if primary_heading_style is not None:
174            makeelement(ppr, 'w:pStyle', w_val=primary_heading_style.id)
175        makeelement(ppr, 'w:pageBreakBefore', w_val='off')
176        makeelement(makeelement(p, 'w:r'), 'w:t').text = title
177        body.insert(0, p)
178