1#!/usr/local/bin/python3.8 2# vim:fileencoding=utf-8 3 4 5__license__ = 'GPL v3' 6__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>' 7 8import posixpath, re 9from uuid import uuid4 10 11from calibre.ebooks.oeb.base import urlquote 12from calibre.utils.filenames import ascii_text 13from polyglot.urllib import urlparse 14 15 16def start_text(tag, prefix_len=0, top_level=True): 17 ans = tag.text or '' 18 limit = 50 - prefix_len 19 if len(ans) < limit: 20 for child in tag.iterchildren('*'): 21 ans += start_text(child, len(ans), top_level=False) + (child.tail or '') 22 if len(ans) >= limit: 23 break 24 if top_level and len(ans) > limit: 25 ans = ans[:limit] + '...' 26 return ans 27 28 29class TOCItem: 30 31 def __init__(self, title, bmark, level): 32 self.title, self.bmark, self.level = title, bmark, level 33 self.is_first = self.is_last = False 34 35 def serialize(self, body, makeelement): 36 p = makeelement(body, 'w:p', append=False) 37 ppr = makeelement(p, 'w:pPr') 38 makeelement(ppr, 'w:pStyle', w_val="Normal") 39 makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=str(200 * self.level)) 40 if self.is_first: 41 makeelement(ppr, 'w:pageBreakBefore', w_val='off') 42 r = makeelement(p, 'w:r') 43 makeelement(r, 'w:fldChar', w_fldCharType='begin') 44 r = makeelement(p, 'w:r') 45 makeelement(r, 'w:instrText').text = r' TOC \h ' 46 r[0].set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') 47 r = makeelement(p, 'w:r') 48 makeelement(r, 'w:fldChar', w_fldCharType='separate') 49 hl = makeelement(p, 'w:hyperlink', w_anchor=self.bmark) 50 r = makeelement(hl, 'w:r') 51 rpr = makeelement(r, 'w:rPr') 52 makeelement(rpr, 'w:color', w_val='0000FF', w_themeColor='hyperlink') 53 makeelement(rpr, 'w:u', w_val='single') 54 makeelement(r, 'w:t').text = self.title 55 if self.is_last: 56 r = makeelement(p, 'w:r') 57 makeelement(r, 'w:fldChar', w_fldCharType='end') 58 body.insert(0, p) 59 60 61def sanitize_bookmark_name(base): 62 # Max length allowed by Word appears to be 40, we use 32 to leave some 63 # space for making the name unique 64 return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))[:32].rstrip('_') 65 66 67class LinksManager: 68 69 def __init__(self, namespace, document_relationships, log): 70 self.namespace = namespace 71 self.log = log 72 self.document_relationships = document_relationships 73 self.top_anchor = str(uuid4().hex) 74 self.anchor_map = {} 75 self.used_bookmark_names = set() 76 self.bmark_id = 0 77 self.document_hrefs = set() 78 self.external_links = {} 79 self.toc = [] 80 81 def bookmark_for_anchor(self, anchor, current_item, html_tag): 82 key = (current_item.href, anchor) 83 if key in self.anchor_map: 84 return self.anchor_map[key] 85 if anchor == self.top_anchor: 86 name = ('Top of %s' % posixpath.basename(current_item.href)) 87 self.document_hrefs.add(current_item.href) 88 else: 89 name = start_text(html_tag).strip() or anchor 90 name = sanitize_bookmark_name(name) 91 i, bname = 0, name 92 while name in self.used_bookmark_names: 93 i += 1 94 name = bname + ('_%d' % i) 95 self.anchor_map[key] = name 96 self.used_bookmark_names.add(name) 97 return name 98 99 @property 100 def bookmark_id(self): 101 self.bmark_id += 1 102 return self.bmark_id 103 104 def serialize_hyperlink(self, parent, link): 105 item, url, tooltip = link 106 purl = urlparse(url) 107 href = purl.path 108 109 def make_link(parent, anchor=None, id=None, tooltip=None): 110 kw = {} 111 if anchor is not None: 112 kw['w_anchor'] = anchor 113 elif id is not None: 114 kw['r_id'] = id 115 if tooltip: 116 kw['w_tooltip'] = tooltip 117 return self.namespace.makeelement(parent, 'w:hyperlink', **kw) 118 119 if not purl.scheme: 120 href = item.abshref(href) 121 if href not in self.document_hrefs: 122 href = urlquote(href) 123 if href in self.document_hrefs: 124 key = (href, purl.fragment or self.top_anchor) 125 if key in self.anchor_map: 126 bmark = self.anchor_map[key] 127 else: 128 bmark = self.anchor_map[(href, self.top_anchor)] 129 return make_link(parent, anchor=bmark, tooltip=tooltip) 130 else: 131 self.log.warn('Ignoring internal hyperlink with href (%s) pointing to unknown destination' % url) 132 if purl.scheme in {'http', 'https', 'ftp'}: 133 if url not in self.external_links: 134 self.external_links[url] = self.document_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External') 135 return make_link(parent, id=self.external_links[url], tooltip=tooltip) 136 return parent 137 138 def process_toc_node(self, toc, level=0): 139 href = toc.href 140 if href: 141 purl = urlparse(href) 142 href = purl.path 143 if href in self.document_hrefs: 144 key = (href, purl.fragment or self.top_anchor) 145 if key in self.anchor_map: 146 bmark = self.anchor_map[key] 147 else: 148 bmark = self.anchor_map[(href, self.top_anchor)] 149 self.toc.append(TOCItem(toc.title, bmark, level)) 150 for child in toc: 151 self.process_toc_node(child, level+1) 152 153 def process_toc_links(self, oeb): 154 self.toc = [] 155 has_toc = oeb.toc and oeb.toc.count() > 1 156 if not has_toc: 157 return 158 for child in oeb.toc: 159 self.process_toc_node(child) 160 if self.toc: 161 self.toc[0].is_first = True 162 self.toc[-1].is_last = True 163 164 def serialize_toc(self, body, primary_heading_style): 165 pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0] 166 pbb.set('{%s}val' % self.namespace.namespaces['w'], 'on') 167 for block in reversed(self.toc): 168 block.serialize(body, self.namespace.makeelement) 169 title = __('Table of Contents') 170 makeelement = self.namespace.makeelement 171 p = makeelement(body, 'w:p', append=False) 172 ppr = makeelement(p, 'w:pPr') 173 if primary_heading_style is not None: 174 makeelement(ppr, 'w:pStyle', w_val=primary_heading_style.id) 175 makeelement(ppr, 'w:pageBreakBefore', w_val='off') 176 makeelement(makeelement(p, 'w:r'), 'w:t').text = title 177 body.insert(0, p) 178