1#!/usr/local/bin/python3.8 2# -*- coding: utf-8 -*- 3# Copyright (C) 2006-2010 Søren Roug, European Environment Agency 4# 5# This library is free software; you can redistribute it and/or 6# modify it under the terms of the GNU Lesser General Public 7# License as published by the Free Software Foundation; either 8# version 2.1 of the License, or (at your option) any later version. 9# 10# This library is distributed in the hope that it will be useful, 11# but WITHOUT ANY WARRANTY; without even the implied warranty of 12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13# Lesser General Public License for more details. 14# 15# You should have received a copy of the GNU Lesser General Public 16# License along with this library; if not, write to the Free Software 17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 18# 19# Contributor(s): 20# 21# import pdb 22# pdb.set_trace() 23 24from collections import defaultdict 25from xml.sax import handler 26from xml.sax.saxutils import escape, quoteattr 27from xml.dom import Node 28 29from .opendocument import load 30 31from .namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \ 32 FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \ 33 SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS 34from polyglot.builtins import unicode_type 35 36if False: # Added by Kovid 37 DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS 38 39# Handling of styles 40# 41# First there are font face declarations. These set up a font style that will be 42# referenced from a text-property. The declaration describes the font making 43# it possible for the application to find a similar font should the system not 44# have that particular one. The StyleToCSS stores these attributes to be used 45# for the CSS2 font declaration. 46# 47# Then there are default-styles. These set defaults for various style types: 48# "text", "paragraph", "section", "ruby", "table", "table-column", "table-row", 49# "table-cell", "graphic", "presentation", "drawing-page", "chart". 50# Since CSS2 can't refer to another style, ODF2XHTML add these to all 51# styles unless overridden. 52# 53# The real styles are declared in the <style:style> element. They have a 54# family referring to the default-styles, and may have a parent style. 55# 56# Styles have scope. The same name can be used for both paragraph and 57# character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant) 58# In ODF a style can have a parent, these parents can be chained. 59 60 61class StyleToCSS: 62 63 """ The purpose of the StyleToCSS class is to contain the rules to convert 64 ODF styles to CSS2. Since it needs the generic fonts, it would probably 65 make sense to also contain the Styles in a dict as well.. 66 """ 67 68 def __init__(self): 69 # Font declarations 70 self.fontdict = {} 71 72 # Fill-images from presentations for backgrounds 73 self.fillimages = {} 74 75 self.ruleconversions = { 76 (DRAWNS,u'fill-image-name'): self.c_drawfillimage, 77 (FONS,u"background-color"): self.c_fo, 78 (FONS,u"border"): self.c_fo, 79 (FONS,u"border-bottom"): self.c_fo, 80 (FONS,u"border-left"): self.c_fo, 81 (FONS,u"border-right"): self.c_fo, 82 (FONS,u"border-top"): self.c_fo, 83 (FONS,u"break-after"): self.c_break, # Added by Kovid 84 (FONS,u"break-before"): self.c_break, # Added by Kovid 85 (FONS,u"color"): self.c_fo, 86 (FONS,u"font-family"): self.c_fo, 87 (FONS,u"font-size"): self.c_fo, 88 (FONS,u"font-style"): self.c_fo, 89 (FONS,u"font-variant"): self.c_fo, 90 (FONS,u"font-weight"): self.c_fo, 91 (FONS,u"line-height"): self.c_fo, 92 (FONS,u"margin"): self.c_fo, 93 (FONS,u"margin-bottom"): self.c_fo, 94 (FONS,u"margin-left"): self.c_fo, 95 (FONS,u"margin-right"): self.c_fo, 96 (FONS,u"margin-top"): self.c_fo, 97 (FONS,u"min-height"): self.c_fo, 98 (FONS,u"padding"): self.c_fo, 99 (FONS,u"padding-bottom"): self.c_fo, 100 (FONS,u"padding-left"): self.c_fo, 101 (FONS,u"padding-right"): self.c_fo, 102 (FONS,u"padding-top"): self.c_fo, 103 (FONS,u"page-width"): self.c_page_width, 104 (FONS,u"page-height"): self.c_page_height, 105 (FONS,u"text-align"): self.c_text_align, 106 (FONS,u"text-indent") :self.c_fo, 107 (TABLENS,u'border-model') :self.c_border_model, 108 (STYLENS,u'column-width') : self.c_width, 109 (STYLENS,u"font-name"): self.c_fn, 110 (STYLENS,u'horizontal-pos'): self.c_hp, 111 (STYLENS,u'text-position'): self.c_text_position, 112 (STYLENS,u'text-line-through-style'): self.c_text_line_through_style, 113 (STYLENS,u'text-underline-style'): self.c_text_underline_style, 114 (STYLENS,u'width') : self.c_width, 115 # FIXME Should do style:vertical-pos here 116 } 117 118 def save_font(self, name, family, generic): 119 """ It is possible that the HTML browser doesn't know how to 120 show a particular font. Fortunately ODF provides generic fallbacks. 121 Unfortunately they are not the same as CSS2. 122 CSS2: serif, sans-serif, cursive, fantasy, monospace 123 ODF: roman, swiss, modern, decorative, script, system 124 This method put the font and fallback into a dictionary 125 """ 126 htmlgeneric = "sans-serif" 127 if generic == "roman": 128 htmlgeneric = "serif" 129 elif generic == "swiss": 130 htmlgeneric = "sans-serif" 131 elif generic == "modern": 132 htmlgeneric = "monospace" 133 elif generic == "decorative": 134 htmlgeneric = "sans-serif" 135 elif generic == "script": 136 htmlgeneric = "monospace" 137 elif generic == "system": 138 htmlgeneric = "serif" 139 self.fontdict[name] = (family, htmlgeneric) 140 141 def c_drawfillimage(self, ruleset, sdict, rule, val): 142 """ Fill a figure with an image. Since CSS doesn't let you resize images 143 this should really be implemented as an absolutely position <img> 144 with a width and a height 145 """ 146 sdict['background-image'] = "url('%s')" % self.fillimages[val] 147 148 def c_fo(self, ruleset, sdict, rule, val): 149 """ XSL formatting attributes """ 150 selector = rule[1] 151 sdict[selector] = val 152 153 def c_break(self, ruleset, sdict, rule, val): # Added by Kovid 154 property = 'page-' + rule[1] 155 values = {'auto': 'auto', 'column': 'always', 'page': 'always', 156 'even-page': 'left', 'odd-page': 'right', 157 'inherit': 'inherit'} 158 sdict[property] = values.get(val, 'auto') 159 160 def c_border_model(self, ruleset, sdict, rule, val): 161 """ Convert to CSS2 border model """ 162 if val == 'collapsing': 163 sdict['border-collapse'] ='collapse' 164 else: 165 sdict['border-collapse'] ='separate' 166 167 def c_width(self, ruleset, sdict, rule, val): 168 """ Set width of box """ 169 sdict['width'] = val 170 171 def c_text_align(self, ruleset, sdict, rule, align): 172 """ Text align """ 173 if align == "start": 174 align = "left" 175 if align == "end": 176 align = "right" 177 sdict['text-align'] = align 178 179 def c_fn(self, ruleset, sdict, rule, fontstyle): 180 """ Generate the CSS font family 181 A generic font can be found in two ways. In a <style:font-face> 182 element or as a font-family-generic attribute in text-properties. 183 """ 184 generic = ruleset.get((STYLENS,'font-family-generic')) 185 if generic is not None: 186 self.save_font(fontstyle, fontstyle, generic) 187 family, htmlgeneric = self.fontdict.get(fontstyle, (fontstyle, 'serif')) 188 sdict['font-family'] = '%s, %s' % (family, htmlgeneric) 189 190 def c_text_position(self, ruleset, sdict, rule, tp): 191 """ Text position. This is used e.g. to make superscript and subscript 192 This attribute can have one or two values. 193 194 The first value must be present and specifies the vertical 195 text position as a percentage that relates to the current font 196 height or it takes one of the values sub or super. Negative 197 percentages or the sub value place the text below the 198 baseline. Positive percentages or the super value place 199 the text above the baseline. If sub or super is specified, 200 the application can choose an appropriate text position. 201 202 The second value is optional and specifies the font height 203 as a percentage that relates to the current font-height. If 204 this value is not specified, an appropriate font height is 205 used. Although this value may change the font height that 206 is displayed, it never changes the current font height that 207 is used for additional calculations. 208 """ 209 textpos = tp.split(' ') 210 if len(textpos) == 2 and textpos[0] != "0%": 211 # Bug in OpenOffice. If vertical-align is 0% - ignore the text size. 212 sdict['font-size'] = textpos[1] 213 if textpos[0] == "super": 214 sdict['vertical-align'] = "33%" 215 elif textpos[0] == "sub": 216 sdict['vertical-align'] = "-33%" 217 else: 218 sdict['vertical-align'] = textpos[0] 219 220 def c_hp(self, ruleset, sdict, rule, hpos): 221 # FIXME: Frames wrap-style defaults to 'parallel', graphics to 'none'. 222 # It is properly set in the parent-styles, but the program doesn't 223 # collect the information. 224 wrap = ruleset.get((STYLENS,'wrap'),'parallel') 225 # Can have: from-left, left, center, right, from-inside, inside, outside 226 if hpos == "center": 227 sdict['margin-left'] = "auto" 228 sdict['margin-right'] = "auto" 229 # else: 230 # # force it to be *something* then delete it 231 # sdict['margin-left'] = sdict['margin-right'] = '' 232 # del sdict['margin-left'], sdict['margin-right'] 233 234 if hpos in ("right","outside"): 235 if wrap in ("left", "parallel","dynamic"): 236 sdict['float'] = "right" 237 elif wrap == "run-through": 238 sdict['position'] = "absolute" # Simulate run-through 239 sdict['top'] = "0" 240 sdict['right'] = "0" 241 else: # No wrapping 242 sdict['margin-left'] = "auto" 243 sdict['margin-right'] = "0px" 244 elif hpos in ("left", "inside"): 245 if wrap in ("right", "parallel","dynamic"): 246 sdict['float'] = "left" 247 elif wrap == "run-through": 248 sdict['position'] = "absolute" # Simulate run-through 249 sdict['top'] = "0" 250 sdict['left'] = "0" 251 else: # No wrapping 252 sdict['margin-left'] = "0px" 253 sdict['margin-right'] = "auto" 254 elif hpos in ("from-left", "from-inside"): 255 if wrap in ("right", "parallel"): 256 sdict['float'] = "left" 257 else: 258 sdict['position'] = "relative" # No wrapping 259 if (SVGNS,'x') in ruleset: 260 sdict['left'] = ruleset[(SVGNS,'x')] 261 262 def c_page_width(self, ruleset, sdict, rule, val): 263 """ Set width of box 264 HTML doesn't really have a page-width. It is always 100% of the browser width 265 """ 266 sdict['width'] = val 267 268 def c_text_underline_style(self, ruleset, sdict, rule, val): 269 """ Set underline decoration 270 HTML doesn't really have a page-width. It is always 100% of the browser width 271 """ 272 if val and val != "none": 273 sdict['text-decoration'] = "underline" 274 275 def c_text_line_through_style(self, ruleset, sdict, rule, val): 276 """ Set underline decoration 277 HTML doesn't really have a page-width. It is always 100% of the browser width 278 """ 279 if val and val != "none": 280 sdict['text-decoration'] = "line-through" 281 282 def c_page_height(self, ruleset, sdict, rule, val): 283 """ Set height of box """ 284 sdict['height'] = val 285 286 def convert_styles(self, ruleset): 287 """ Rule is a tuple of (namespace, name). If the namespace is '' then 288 it is already CSS2 289 """ 290 sdict = {} 291 for rule,val in ruleset.items(): 292 if rule[0] == '': 293 sdict[rule[1]] = val 294 continue 295 method = self.ruleconversions.get(rule, None) 296 if method: 297 method(ruleset, sdict, rule, val) 298 return sdict 299 300 301class TagStack: 302 303 def __init__(self): 304 self.stack = [] 305 306 def push(self, tag, attrs): 307 self.stack.append((tag, attrs)) 308 309 def pop(self): 310 item = self.stack.pop() 311 return item 312 313 def stackparent(self): 314 item = self.stack[-1] 315 return item[1] 316 317 def rfindattr(self, attr): 318 """ Find a tag with the given attribute """ 319 for tag, attrs in self.stack: 320 if attr in attrs: 321 return attrs[attr] 322 return None 323 324 def count_tags(self, tag): 325 c = 0 326 for ttag, tattrs in self.stack: 327 if ttag == tag: 328 c = c + 1 329 return c 330 331 332special_styles = { 333 'S-Emphasis':'em', 334 'S-Citation':'cite', 335 'S-Strong_20_Emphasis':'strong', 336 'S-Variable':'var', 337 'S-Definition':'dfn', 338 'S-Teletype':'tt', 339 'P-Heading_20_1':'h1', 340 'P-Heading_20_2':'h2', 341 'P-Heading_20_3':'h3', 342 'P-Heading_20_4':'h4', 343 'P-Heading_20_5':'h5', 344 'P-Heading_20_6':'h6', 345# 'P-Caption':'caption', 346 'P-Addressee':'address', 347# 'P-List_20_Heading':'dt', 348# 'P-List_20_Contents':'dd', 349 'P-Preformatted_20_Text':'pre', 350# 'P-Table_20_Heading':'th', 351# 'P-Table_20_Contents':'td', 352# 'P-Text_20_body':'p' 353} 354 355# ----------------------------------------------------------------------------- 356# 357# ODFCONTENTHANDLER 358# 359# ----------------------------------------------------------------------------- 360 361 362class ODF2XHTML(handler.ContentHandler): 363 364 """ The ODF2XHTML parses an ODF file and produces XHTML""" 365 366 def __init__(self, generate_css=True, embedable=False): 367 # Tags 368 self.generate_css = generate_css 369 self.frame_stack = [] 370 self.list_number_map = defaultdict(lambda : 1) 371 self.list_id_map = {} 372 self.list_class_stack = [] 373 self.elements = { 374 (DCNS, 'title'): (self.s_processcont, self.e_dc_title), 375 (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage), 376 (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator), 377 (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag), 378 (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag), 379 (DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape), 380 (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame), 381 (DRAWNS, 'image'): (self.s_draw_image, None), 382 (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None), 383 (DRAWNS, "layer-set"):(self.s_ignorexml, None), 384 (DRAWNS, 'object'): (self.s_draw_object, None), 385 (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None), 386 (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page), 387 (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox), 388 (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag), 389 (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag), 390 (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag), 391 (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag), 392 (NUMBERNS, "boolean-style"):(self.s_ignorexml, None), 393 (NUMBERNS, "currency-style"):(self.s_ignorexml, None), 394 (NUMBERNS, "date-style"):(self.s_ignorexml, None), 395 (NUMBERNS, "number-style"):(self.s_ignorexml, None), 396 (NUMBERNS, "text-style"):(self.s_ignorexml, None), 397 (OFFICENS, "annotation"):(self.s_ignorexml, None), 398 (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None), 399 (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content), 400 (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content), 401 (OFFICENS, "forms"):(self.s_ignorexml, None), 402 (OFFICENS, "master-styles"):(self.s_office_master_styles, None), 403 (OFFICENS, "meta"):(self.s_ignorecont, None), 404 (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation), 405 (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet), 406 (OFFICENS, "styles"):(self.s_office_styles, None), 407 (OFFICENS, "text"):(self.s_office_text, self.e_office_text), 408 (OFFICENS, "scripts"):(self.s_ignorexml, None), 409 (OFFICENS, "settings"):(self.s_ignorexml, None), 410 (PRESENTATIONNS, "notes"):(self.s_ignorexml, None), 411# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout), 412 (STYLENS, "default-page-layout"):(self.s_ignorexml, None), 413 (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style), 414 (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None), 415 (STYLENS, "font-face"):(self.s_style_font_face, None), 416# (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer), 417# (STYLENS, "footer-style"):(self.s_style_footer_style, None), 418 (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None), 419 (STYLENS, "handout-master"):(self.s_ignorexml, None), 420# (STYLENS, "header"):(self.s_style_header, self.e_style_header), 421# (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None), 422# (STYLENS, "header-style"):(self.s_style_header_style, None), 423 (STYLENS, "master-page"):(self.s_style_master_page, None), 424 (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None), 425 (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), 426# (STYLENS, "page-layout"):(self.s_ignorexml, None), 427 (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None), 428 (STYLENS, "style"):(self.s_style_style, self.e_style_style), 429 (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None), 430 (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None), 431 (STYLENS, "table-properties"):(self.s_style_handle_properties, None), 432 (STYLENS, "text-properties"):(self.s_style_handle_properties, None), 433 (SVGNS, 'desc'): (self.s_ignorexml, None), 434 (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None), 435 (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell), 436 (TABLENS, 'table-column'): (self.s_table_table_column, None), 437 (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row), 438 (TABLENS, 'table'): (self.s_table_table, self.e_table_table), 439 (TEXTNS, 'a'): (self.s_text_a, self.e_text_a), 440 (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source), 441 (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None), 442 (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source), 443 (TEXTNS, 'bookmark'): (self.s_text_bookmark, None), 444 (TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None), 445 (TEXTNS, 'reference-mark-start'): (self.s_text_bookmark, None), # Added by Kovid 446 (TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a), 447 (TEXTNS, 'reference-ref'): (self.s_text_bookmark_ref, self.e_text_a), # Added by Kovid 448 (TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None), 449 (TEXTNS, 'h'): (self.s_text_h, self.e_text_h), 450 (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source), 451 (TEXTNS, 'line-break'):(self.s_text_line_break, None), 452 (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None), 453 (TEXTNS, "list"):(self.s_text_list, self.e_text_list), 454 (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item), 455 (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet), 456 (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number), 457 (TEXTNS, "list-style"):(None, None), 458 (TEXTNS, "note"):(self.s_text_note, None), 459 (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body), 460 (TEXTNS, "note-citation"):(None, self.e_text_note_citation), 461 (TEXTNS, "notes-configuration"):(self.s_ignorexml, None), 462 (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source), 463 (TEXTNS, 'p'): (self.s_text_p, self.e_text_p), 464 (TEXTNS, 's'): (self.s_text_s, None), 465 (TEXTNS, 'span'): (self.s_text_span, self.e_text_span), 466 (TEXTNS, 'tab'): (self.s_text_tab, None), 467 (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source), 468 (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source), 469 (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source), 470 } 471 if embedable: 472 self.make_embedable() 473 self._resetobject() 474 475 def set_plain(self): 476 """ Tell the parser to not generate CSS """ 477 self.generate_css = False 478 479 def set_embedable(self): 480 """ Tells the converter to only output the parts inside the <body>""" 481 self.elements[(OFFICENS, u"text")] = (None,None) 482 self.elements[(OFFICENS, u"spreadsheet")] = (None,None) 483 self.elements[(OFFICENS, u"presentation")] = (None,None) 484 self.elements[(OFFICENS, u"document-content")] = (None,None) 485 486 def add_style_file(self, stylefilename, media=None): 487 """ Add a link to an external style file. 488 Also turns of the embedding of styles in the HTML 489 """ 490 self.use_internal_css = False 491 self.stylefilename = stylefilename 492 if media: 493 self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media)) 494 else: 495 self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename)) 496 497 def _resetfootnotes(self): 498 # Footnotes and endnotes 499 self.notedict = {} 500 self.currentnote = 0 501 self.notebody = '' 502 503 def _resetobject(self): 504 self.lines = [] 505 self._wfunc = self._wlines 506 self.xmlfile = '' 507 self.title = '' 508 self.language = '' 509 self.creator = '' 510 self.data = [] 511 self.tagstack = TagStack() 512 self.htmlstack = [] 513 self.pstack = [] 514 self.processelem = True 515 self.processcont = True 516 self.listtypes = {} 517 self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10 518 self.use_internal_css = True 519 self.cs = StyleToCSS() 520 self.anchors = {} 521 522 # Style declarations 523 self.stylestack = [] 524 self.styledict = {} 525 self.currentstyle = None 526 self.list_starts = {} 527 528 self._resetfootnotes() 529 530 # Tags from meta.xml 531 self.metatags = [] 532 533 def writeout(self, s): 534 if s != '': 535 self._wfunc(s) 536 537 def writedata(self): 538 d = ''.join(self.data) 539 if d != '': 540 self.writeout(escape(d)) 541 542 def opentag(self, tag, attrs={}, block=False): 543 """ Create an open HTML tag """ 544 self.htmlstack.append((tag,attrs,block)) 545 a = [] 546 for key,val in attrs.items(): 547 a.append('''%s=%s''' % (key, quoteattr(val))) 548 if len(a) == 0: 549 self.writeout("<%s>" % tag) 550 else: 551 self.writeout("<%s %s>" % (tag, " ".join(a))) 552 if block: 553 self.writeout("\n") 554 555 def closetag(self, tag, block=True): 556 """ Close an open HTML tag """ 557 self.htmlstack.pop() 558 self.writeout("</%s>" % tag) 559 if block: 560 self.writeout("\n") 561 562 def emptytag(self, tag, attrs={}): 563 a = [] 564 for key,val in attrs.items(): 565 a.append('''%s=%s''' % (key, quoteattr(val))) 566 self.writeout("<%s %s/>\n" % (tag, " ".join(a))) 567 568# -------------------------------------------------- 569# Interface to parser 570# -------------------------------------------------- 571 def characters(self, data): 572 if self.processelem and self.processcont: 573 self.data.append(data) 574 575 def startElementNS(self, tag, qname, attrs): 576 self.pstack.append((self.processelem, self.processcont)) 577 if self.processelem: 578 method = self.elements.get(tag, (None, None))[0] 579 if method: 580 self.handle_starttag(tag, method, attrs) 581 else: 582 self.unknown_starttag(tag,attrs) 583 self.tagstack.push(tag, attrs) 584 585 def endElementNS(self, tag, qname): 586 stag, attrs = self.tagstack.pop() 587 if self.processelem: 588 method = self.elements.get(tag, (None, None))[1] 589 if method: 590 self.handle_endtag(tag, attrs, method) 591 else: 592 self.unknown_endtag(tag, attrs) 593 self.processelem, self.processcont = self.pstack.pop() 594 595# -------------------------------------------------- 596 def handle_starttag(self, tag, method, attrs): 597 method(tag,attrs) 598 599 def handle_endtag(self, tag, attrs, method): 600 method(tag, attrs) 601 602 def unknown_starttag(self, tag, attrs): 603 pass 604 605 def unknown_endtag(self, tag, attrs): 606 pass 607 608 def s_ignorexml(self, tag, attrs): 609 """ Ignore this xml element and all children of it 610 It will automatically stop ignoring 611 """ 612 self.processelem = False 613 614 def s_ignorecont(self, tag, attrs): 615 """ Stop processing the text nodes """ 616 self.processcont = False 617 618 def s_processcont(self, tag, attrs): 619 """ Start processing the text nodes """ 620 self.processcont = True 621 622 def classname(self, attrs): 623 """ Generate a class name from a style name """ 624 c = attrs.get((TEXTNS,'style-name'),'') 625 c = c.replace(".","_") 626 return c 627 628 def get_anchor(self, name): 629 """ Create a unique anchor id for a href name """ 630 if name not in self.anchors: 631 # Changed by Kovid 632 self.anchors[name] = "anchor%d" % (len(self.anchors) + 1) 633 return self.anchors.get(name) 634 635 def purgedata(self): 636 self.data = [] 637 638# ----------------------------------------------------------------------------- 639# 640# Handle meta data 641# 642# ----------------------------------------------------------------------------- 643 def e_dc_title(self, tag, attrs): 644 """ Get the title from the meta data and create a HTML <title> 645 """ 646 self.title = ''.join(self.data) 647 # self.metatags.append('<title>%s</title>\n' % escape(self.title)) 648 self.data = [] 649 650 def e_dc_metatag(self, tag, attrs): 651 """ Any other meta data is added as a <meta> element 652 """ 653 self.metatags.append('<meta name="%s" content=%s/>\n' % (tag[1], quoteattr(''.join(self.data)))) 654 self.data = [] 655 656 def e_dc_contentlanguage(self, tag, attrs): 657 """ Set the content language. Identifies the targeted audience 658 """ 659 self.language = ''.join(self.data) 660 self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % escape(self.language)) 661 self.data = [] 662 663 def e_dc_creator(self, tag, attrs): 664 """ Set the content creator. Identifies the targeted audience 665 """ 666 self.creator = ''.join(self.data) 667 self.metatags.append('<meta http-equiv="creator" content="%s"/>\n' % escape(self.creator)) 668 self.data = [] 669 670 def s_custom_shape(self, tag, attrs): 671 """ A <draw:custom-shape> is made into a <div> in HTML which is then styled 672 """ 673 anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound') 674 htmltag = 'div' 675 name = "G-" + attrs.get((DRAWNS,'style-name'), "") 676 if name == 'G-': 677 name = "PR-" + attrs.get((PRESENTATIONNS,'style-name'), "") 678 name = name.replace(".","_") 679 if anchor_type == "paragraph": 680 style = 'position:absolute;' 681 elif anchor_type == 'char': 682 style = "position:absolute;" 683 elif anchor_type == 'as-char': 684 htmltag = 'div' 685 style = '' 686 else: 687 style = "position: absolute;" 688 if (SVGNS,"width") in attrs: 689 style = style + "width:" + attrs[(SVGNS,"width")] + ";" 690 if (SVGNS,"height") in attrs: 691 style = style + "height:" + attrs[(SVGNS,"height")] + ";" 692 if (SVGNS,"x") in attrs: 693 style = style + "left:" + attrs[(SVGNS,"x")] + ";" 694 if (SVGNS,"y") in attrs: 695 style = style + "top:" + attrs[(SVGNS,"y")] + ";" 696 if self.generate_css: 697 self.opentag(htmltag, {'class': name, 'style': style}) 698 else: 699 self.opentag(htmltag) 700 701 def e_custom_shape(self, tag, attrs): 702 """ End the <draw:frame> 703 """ 704 self.closetag('div') 705 706 def s_draw_frame(self, tag, attrs): 707 """ A <draw:frame> is made into a <div> in HTML which is then styled 708 """ 709 self.frame_stack.append([]) 710 anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound') 711 htmltag = 'div' 712 name = "G-" + attrs.get((DRAWNS,'style-name'), "") 713 if name == 'G-': 714 name = "PR-" + attrs.get((PRESENTATIONNS,'style-name'), "") 715 name = name.replace(".","_") 716 if anchor_type == "paragraph": 717 style = 'position:relative;' 718 elif anchor_type == 'char': 719 style = "position:relative;" 720 elif anchor_type == 'as-char': 721 htmltag = 'div' 722 style = '' 723 else: 724 style = "position:absolute;" 725 if (SVGNS,"width") in attrs: 726 style = style + "width:" + attrs[(SVGNS,"width")] + ";" 727 if (SVGNS,"height") in attrs: 728 style = style + "height:" + attrs[(SVGNS,"height")] + ";" 729 if (SVGNS,"x") in attrs: 730 style = style + "left:" + attrs[(SVGNS,"x")] + ";" 731 if (SVGNS,"y") in attrs: 732 style = style + "top:" + attrs[(SVGNS,"y")] + ";" 733 if self.generate_css: 734 self.opentag(htmltag, {'class': name, 'style': style}) 735 else: 736 self.opentag(htmltag) 737 738 def e_draw_frame(self, tag, attrs): 739 """ End the <draw:frame> 740 """ 741 self.closetag('div') 742 self.frame_stack.pop() 743 744 def s_draw_fill_image(self, tag, attrs): 745 name = attrs.get((DRAWNS,'name'), "NoName") 746 imghref = attrs[(XLINKNS,"href")] 747 imghref = self.rewritelink(imghref) 748 self.cs.fillimages[name] = imghref 749 750 def rewritelink(self, imghref): 751 """ Intended to be overloaded if you don't store your pictures 752 in a Pictures subfolder 753 """ 754 return imghref 755 756 def s_draw_image(self, tag, attrs): 757 """ A <draw:image> becomes an <img/> element 758 """ 759 if self.frame_stack: 760 if self.frame_stack[-1]: 761 return 762 self.frame_stack[-1].append('img') 763 parent = self.tagstack.stackparent() 764 anchor_type = parent.get((TEXTNS,'anchor-type')) 765 imghref = attrs[(XLINKNS,"href")] 766 imghref = self.rewritelink(imghref) 767 htmlattrs = {'alt':"", 'src':imghref} 768 if self.generate_css: 769 if anchor_type != "char": 770 htmlattrs['style'] = "display: block;" 771 self.emptytag('img', htmlattrs) 772 773 def s_draw_object(self, tag, attrs): 774 """ A <draw:object> is embedded object in the document (e.g. spreadsheet in presentation). 775 """ 776 return # Added by Kovid 777 objhref = attrs[(XLINKNS,"href")] 778 # Remove leading "./": from "./Object 1" to "Object 1" 779# objhref = objhref [2:] 780 781 # Not using os.path.join since it fails to find the file on Windows. 782# objcontentpath = '/'.join([objhref, 'content.xml']) 783 784 for c in self.document.childnodes: 785 if c.folder == objhref: 786 self._walknode(c.topnode) 787 788 def s_draw_object_ole(self, tag, attrs): 789 """ A <draw:object-ole> is embedded OLE object in the document (e.g. MS Graph). 790 """ 791 try: 792 class_id = attrs[(DRAWNS,"class-id")] 793 except KeyError: # Added by Kovid to ignore <draw> without the right 794 return # attributes 795 if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": # Microsoft Graph 97 Chart 796 tagattrs = {'name':'object_ole_graph', 'class':'ole-graph'} 797 self.opentag('a', tagattrs) 798 self.closetag('a', tagattrs) 799 800 def s_draw_page(self, tag, attrs): 801 """ A <draw:page> is a slide in a presentation. We use a <fieldset> element in HTML. 802 Therefore if you convert a ODP file, you get a series of <fieldset>s. 803 Override this for your own purpose. 804 """ 805 name = attrs.get((DRAWNS,'name'), "NoName") 806 stylename = attrs.get((DRAWNS,'style-name'), "") 807 stylename = stylename.replace(".","_") 808 masterpage = attrs.get((DRAWNS,'master-page-name'),"") 809 masterpage = masterpage.replace(".","_") 810 if self.generate_css: 811 self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage)}) 812 else: 813 self.opentag('fieldset') 814 self.opentag('legend') 815 self.writeout(escape(name)) 816 self.closetag('legend') 817 818 def e_draw_page(self, tag, attrs): 819 self.closetag('fieldset') 820 821 def s_draw_textbox(self, tag, attrs): 822 style = '' 823 if (FONS,"min-height") in attrs: 824 style = style + "min-height:" + attrs[(FONS,"min-height")] + ";" 825 self.opentag('div') 826# self.opentag('div', {'style': style}) 827 828 def e_draw_textbox(self, tag, attrs): 829 """ End the <draw:text-box> 830 """ 831 self.closetag('div') 832 833 def html_body(self, tag, attrs): 834 self.writedata() 835 if self.generate_css and self.use_internal_css: 836 self.opentag('style', {'type':"text/css"}, True) 837 self.writeout('/*<![CDATA[*/\n') 838 self.generate_stylesheet() 839 self.writeout('/*]]>*/\n') 840 self.closetag('style') 841 self.purgedata() 842 self.closetag('head') 843 self.opentag('body', block=True) 844 845 # background-color: white removed by Kovid for #9118 846 # Specifying an explicit bg color prevents ebook readers 847 # from successfully inverting colors 848 # Added styling for endnotes 849 default_styles = """ 850img { width: 100%; height: 100%; } 851* { padding: 0; margin: 0; } 852body { margin: 0 1em; } 853ol, ul { padding-left: 2em; } 854a.citation { text-decoration: none } 855h1.notes-header { page-break-before: always } 856dl.notes dt { font-size: large } 857dl.notes dt a { text-decoration: none } 858dl.notes dd { page-break-after: always } 859dl.notes dd:last-of-type { page-break-after: avoid } 860""" 861 862 def generate_stylesheet(self): 863 for name in self.stylestack: 864 styles = self.styledict.get(name) 865 # Preload with the family's default style 866 if '__style-family' in styles and styles['__style-family'] in self.styledict: 867 familystyle = self.styledict[styles['__style-family']].copy() 868 del styles['__style-family'] 869 for style, val in styles.items(): 870 familystyle[style] = val 871 styles = familystyle 872 # Resolve the remaining parent styles 873 while '__parent-style-name' in styles and styles['__parent-style-name'] in self.styledict: 874 parentstyle = self.styledict[styles['__parent-style-name']].copy() 875 del styles['__parent-style-name'] 876 for style, val in styles.items(): 877 parentstyle[style] = val 878 styles = parentstyle 879 self.styledict[name] = styles 880 # Write the styles to HTML 881 self.writeout(self.default_styles) 882 # Changed by Kovid to not write out endless copies of the same style 883 css_styles = {} 884 for name in self.stylestack: 885 styles = self.styledict.get(name) 886 css2 = tuple(self.cs.convert_styles(styles).items()) 887 if css2 in css_styles: 888 css_styles[css2].append(name) 889 else: 890 css_styles[css2] = [name] 891 892 def filter_margins(css2): 893 names = {k for k, v in css2} 894 ignore = set() 895 if {'margin-left', 'margin-right', 'margin-top', 896 'margin-bottom'}.issubset(names): 897 # These come from XML and we cannot preserve XML attribute 898 # order so we assume that margin is to be overridden See 899 # https://bugs.launchpad.net/calibre/+bug/941134 and 900 # https://bugs.launchpad.net/calibre/+bug/1002702 901 ignore.add('margin') 902 css2 = sorted(css2, key=lambda x:{'margin':0}.get(x[0], 1)) 903 for k, v in css2: 904 if k not in ignore: 905 yield k, v 906 907 for css2, names in css_styles.items(): 908 self.writeout("%s {\n" % ', '.join(names)) 909 for style, val in filter_margins(css2): 910 self.writeout("\t%s: %s;\n" % (style, val)) 911 self.writeout("}\n") 912 913 def generate_footnotes(self): 914 if self.currentnote == 0: 915 return 916 # Changed by Kovid to improve endnote functionality 917 self.opentag('h1', {'class':'notes-header'}) 918 self.writeout(_('Notes')) 919 self.closetag('h1') 920 self.opentag('dl', {'class':'notes'}) 921 for key in range(1,self.currentnote+1): 922 note = self.notedict[key] 923# for key,note in self.notedict.items(): 924 self.opentag('dt', {'id':"footnote-%d" % key}) 925# self.opentag('sup') 926# self.writeout(escape(note['citation'])) 927# self.closetag('sup', False) 928 self.writeout('[') 929 self.opentag('a', {'href': "#citation-%d" % key}) 930 self.writeout("←%d" % key) 931 self.closetag('a') 932 self.writeout(']\xa0') 933 self.closetag('dt') 934 self.opentag('dd') 935 self.writeout(note['body']) 936 self.closetag('dd') 937 self.closetag('dl') 938 939 def s_office_automatic_styles(self, tag, attrs): 940 if self.xmlfile == 'styles.xml': 941 self.autoprefix = "A" 942 else: 943 self.autoprefix = "" 944 945 def s_office_document_content(self, tag, attrs): 946 """ First tag in the content.xml file""" 947 self.writeout('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ') 948 self.writeout('"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n') 949 self.opentag('html', {'xmlns':"http://www.w3.org/1999/xhtml"}, True) 950 self.opentag('head', block=True) 951 self.emptytag('meta', {'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"}) 952 for metaline in self.metatags: 953 self.writeout(metaline) 954 self.writeout('<title>%s</title>\n' % escape(self.title)) 955 956 def e_office_document_content(self, tag, attrs): 957 """ Last tag """ 958 self.closetag('html') 959 960 def s_office_master_styles(self, tag, attrs): 961 """ """ 962 963 def s_office_presentation(self, tag, attrs): 964 """ For some odd reason, OpenOffice Impress doesn't define a default-style 965 for the 'paragraph'. We therefore force a standard when we see 966 it is a presentation 967 """ 968 self.styledict['p'] = {(FONS,u'font-size'): u"24pt"} 969 self.styledict['presentation'] = {(FONS,u'font-size'): u"24pt"} 970 self.html_body(tag, attrs) 971 972 def e_office_presentation(self, tag, attrs): 973 self.generate_footnotes() 974 self.closetag('body') 975 976 def s_office_spreadsheet(self, tag, attrs): 977 self.html_body(tag, attrs) 978 979 def e_office_spreadsheet(self, tag, attrs): 980 self.generate_footnotes() 981 self.closetag('body') 982 983 def s_office_styles(self, tag, attrs): 984 self.autoprefix = "" 985 986 def s_office_text(self, tag, attrs): 987 """ OpenDocument text """ 988 self.styledict['frame'] = {(STYLENS,'wrap'): u'parallel'} 989 self.html_body(tag, attrs) 990 991 def e_office_text(self, tag, attrs): 992 self.generate_footnotes() 993 self.closetag('body') 994 995 def s_style_handle_properties(self, tag, attrs): 996 """ Copy all attributes to a struct. 997 We will later convert them to CSS2 998 """ 999 if self.currentstyle is None: # Added by Kovid 1000 return 1001 1002 for key,attr in attrs.items(): 1003 self.styledict[self.currentstyle][key] = attr 1004 1005 familymap = {'frame':'frame', 'paragraph':'p', 'presentation':'presentation', 1006 'text':'span','section':'div', 1007 'table':'table','table-cell':'td','table-column':'col', 1008 'table-row':'tr','graphic':'graphic'} 1009 1010 def s_style_default_style(self, tag, attrs): 1011 """ A default style is like a style on an HTML tag 1012 """ 1013 family = attrs[(STYLENS,'family')] 1014 htmlfamily = self.familymap.get(family,'unknown') 1015 self.currentstyle = htmlfamily 1016# self.stylestack.append(self.currentstyle) 1017 self.styledict[self.currentstyle] = {} 1018 1019 def e_style_default_style(self, tag, attrs): 1020 self.currentstyle = None 1021 1022 def s_style_font_face(self, tag, attrs): 1023 """ It is possible that the HTML browser doesn't know how to 1024 show a particular font. Luckily ODF provides generic fallbacks 1025 Unfortunately they are not the same as CSS2. 1026 CSS2: serif, sans-serif, cursive, fantasy, monospace 1027 ODF: roman, swiss, modern, decorative, script, system 1028 """ 1029 name = attrs[(STYLENS,"name")] 1030 family = attrs[(SVGNS,"font-family")] 1031 generic = attrs.get((STYLENS,'font-family-generic'),"") 1032 self.cs.save_font(name, family, generic) 1033 1034 def s_style_footer(self, tag, attrs): 1035 self.opentag('div', {'id':"footer"}) 1036 self.purgedata() 1037 1038 def e_style_footer(self, tag, attrs): 1039 self.writedata() 1040 self.closetag('div') 1041 self.purgedata() 1042 1043 def s_style_footer_style(self, tag, attrs): 1044 self.currentstyle = "@print #footer" 1045 self.stylestack.append(self.currentstyle) 1046 self.styledict[self.currentstyle] = {} 1047 1048 def s_style_header(self, tag, attrs): 1049 self.opentag('div', {'id':"header"}) 1050 self.purgedata() 1051 1052 def e_style_header(self, tag, attrs): 1053 self.writedata() 1054 self.closetag('div') 1055 self.purgedata() 1056 1057 def s_style_header_style(self, tag, attrs): 1058 self.currentstyle = "@print #header" 1059 self.stylestack.append(self.currentstyle) 1060 self.styledict[self.currentstyle] = {} 1061 1062 def s_style_default_page_layout(self, tag, attrs): 1063 """ Collect the formatting for the default page layout style. 1064 """ 1065 self.currentstyle = "@page" 1066 self.stylestack.append(self.currentstyle) 1067 self.styledict[self.currentstyle] = {} 1068 1069 def s_style_page_layout(self, tag, attrs): 1070 """ Collect the formatting for the page layout style. 1071 This won't work in CSS 2.1, as page identifiers are not allowed. 1072 It is legal in CSS3, but the rest of the application doesn't specify when to use what page layout 1073 """ 1074 name = attrs[(STYLENS,'name')] 1075 name = name.replace(".","_") 1076 self.currentstyle = ".PL-" + name 1077 self.stylestack.append(self.currentstyle) 1078 self.styledict[self.currentstyle] = {} 1079 1080 def e_style_page_layout(self, tag, attrs): 1081 """ End this style 1082 """ 1083 self.currentstyle = None 1084 1085 def s_style_master_page(self, tag, attrs): 1086 """ Collect the formatting for the page layout style. 1087 """ 1088 name = attrs[(STYLENS,'name')] 1089 name = name.replace(".","_") 1090 1091 self.currentstyle = ".MP-" + name 1092 self.stylestack.append(self.currentstyle) 1093 self.styledict[self.currentstyle] = {('','position'):'relative'} 1094 # Then load the pagelayout style if we find it 1095 pagelayout = attrs.get((STYLENS,'page-layout-name'), None) 1096 if pagelayout: 1097 pagelayout = ".PL-" + pagelayout 1098 if pagelayout in self.styledict: 1099 styles = self.styledict[pagelayout] 1100 for style, val in styles.items(): 1101 self.styledict[self.currentstyle][style] = val 1102 else: 1103 self.styledict[self.currentstyle]['__parent-style-name'] = pagelayout 1104 self.s_ignorexml(tag, attrs) 1105 1106 # Short prefixes for class selectors 1107 _familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR', 1108 'text':'S', 'section':'D', 1109 'table':'T', 'table-cell':'TD', 'table-column':'TC', 1110 'table-row':'TR', 'graphic':'G'} 1111 1112 def s_style_style(self, tag, attrs): 1113 """ Collect the formatting for the style. 1114 Styles have scope. The same name can be used for both paragraph and 1115 character styles Since CSS has no scope we use a prefix. (Not elegant) 1116 In ODF a style can have a parent, these parents can be chained. 1117 We may not have encountered the parent yet, but if we have, we resolve it. 1118 """ 1119 name = attrs[(STYLENS,'name')] 1120 name = name.replace(".","_") 1121 family = attrs[(STYLENS,'family')] 1122 htmlfamily = self.familymap.get(family,'unknown') 1123 sfamily = self._familyshort.get(family,'X') 1124 name = "%s%s-%s" % (self.autoprefix, sfamily, name) 1125 parent = attrs.get((STYLENS,'parent-style-name')) 1126 self.currentstyle = special_styles.get(name,"."+name) 1127 self.stylestack.append(self.currentstyle) 1128 if self.currentstyle not in self.styledict: 1129 self.styledict[self.currentstyle] = {} 1130 1131 self.styledict[self.currentstyle]['__style-family'] = htmlfamily 1132 1133 # Then load the parent style if we find it 1134 if parent: 1135 parent = parent.replace(".", "_") 1136 parent = "%s-%s" % (sfamily, parent) 1137 parent = special_styles.get(parent, "."+parent) 1138 if parent in self.styledict: 1139 styles = self.styledict[parent] 1140 for style, val in styles.items(): 1141 self.styledict[self.currentstyle][style] = val 1142 else: 1143 self.styledict[self.currentstyle]['__parent-style-name'] = parent 1144 1145 def e_style_style(self, tag, attrs): 1146 """ End this style 1147 """ 1148 self.currentstyle = None 1149 1150 def s_table_table(self, tag, attrs): 1151 """ Start a table 1152 """ 1153 c = attrs.get((TABLENS,'style-name'), None) 1154 if c and self.generate_css: 1155 c = c.replace(".","_") 1156 self.opentag('table',{'class': "T-%s" % c}) 1157 else: 1158 self.opentag('table') 1159 self.purgedata() 1160 1161 def e_table_table(self, tag, attrs): 1162 """ End a table 1163 """ 1164 self.writedata() 1165 self.closetag('table') 1166 self.purgedata() 1167 1168 def s_table_table_cell(self, tag, attrs): 1169 """ Start a table cell """ 1170 # FIXME: number-columns-repeated § 8.1.3 1171 # repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1)) 1172 htmlattrs = {} 1173 rowspan = attrs.get((TABLENS,'number-rows-spanned')) 1174 if rowspan: 1175 htmlattrs['rowspan'] = rowspan 1176 colspan = attrs.get((TABLENS,'number-columns-spanned')) 1177 if colspan: 1178 htmlattrs['colspan'] = colspan 1179 1180 c = attrs.get((TABLENS,'style-name')) 1181 if c: 1182 htmlattrs['class'] = 'TD-%s' % c.replace(".","_") 1183 self.opentag('td', htmlattrs) 1184 self.purgedata() 1185 1186 def e_table_table_cell(self, tag, attrs): 1187 """ End a table cell """ 1188 self.writedata() 1189 self.closetag('td') 1190 self.purgedata() 1191 1192 def s_table_table_column(self, tag, attrs): 1193 """ Start a table column """ 1194 c = attrs.get((TABLENS,'style-name'), None) 1195 repeated = int(attrs.get((TABLENS,'number-columns-repeated'), 1)) 1196 htmlattrs = {} 1197 if c: 1198 htmlattrs['class'] = "TC-%s" % c.replace(".","_") 1199 for x in range(repeated): 1200 self.emptytag('col', htmlattrs) 1201 self.purgedata() 1202 1203 def s_table_table_row(self, tag, attrs): 1204 """ Start a table row """ 1205 # FIXME: table:number-rows-repeated 1206 c = attrs.get((TABLENS,'style-name'), None) 1207 htmlattrs = {} 1208 if c: 1209 htmlattrs['class'] = "TR-%s" % c.replace(".","_") 1210 self.opentag('tr', htmlattrs) 1211 self.purgedata() 1212 1213 def e_table_table_row(self, tag, attrs): 1214 """ End a table row """ 1215 self.writedata() 1216 self.closetag('tr') 1217 self.purgedata() 1218 1219 def s_text_a(self, tag, attrs): 1220 """ Anchors start """ 1221 self.writedata() 1222 href = attrs[(XLINKNS,"href")].split("|")[0] 1223 if href[:1] == "#": # Changed by Kovid 1224 href = "#" + self.get_anchor(href[1:]) 1225 self.opentag('a', {'href':href}) 1226 self.purgedata() 1227 1228 def e_text_a(self, tag, attrs): 1229 """ End an anchor or bookmark reference """ 1230 self.writedata() 1231 self.closetag('a', False) 1232 self.purgedata() 1233 1234 def s_text_bookmark(self, tag, attrs): 1235 """ Bookmark definition """ 1236 name = attrs[(TEXTNS,'name')] 1237 html_id = self.get_anchor(name) 1238 self.writedata() 1239 self.opentag('span', {'id':html_id}) 1240 self.closetag('span', False) 1241 self.purgedata() 1242 1243 def s_text_bookmark_ref(self, tag, attrs): 1244 """ Bookmark reference """ 1245 name = attrs[(TEXTNS,'ref-name')] 1246 html_id = "#" + self.get_anchor(name) 1247 self.writedata() 1248 self.opentag('a', {'href':html_id}) 1249 self.purgedata() 1250 1251 def s_text_h(self, tag, attrs): 1252 """ Headings start """ 1253 level = int(attrs[(TEXTNS,'outline-level')]) 1254 if level > 6: 1255 level = 6 # Heading levels go only to 6 in XHTML 1256 if level < 1: 1257 level = 1 1258 self.headinglevels[level] = self.headinglevels[level] + 1 1259 name = self.classname(attrs) 1260 for x in range(level + 1,10): 1261 self.headinglevels[x] = 0 1262 special = special_styles.get("P-"+name) 1263 if special or not self.generate_css: 1264 self.opentag('h%s' % level) 1265 else: 1266 self.opentag('h%s' % level, {'class':"P-%s" % name}) 1267 self.purgedata() 1268 1269 def e_text_h(self, tag, attrs): 1270 """ Headings end 1271 Side-effect: If there is no title in the metadata, then it is taken 1272 from the first heading of any level. 1273 """ 1274 self.writedata() 1275 level = int(attrs[(TEXTNS,'outline-level')]) 1276 if level > 6: 1277 level = 6 # Heading levels go only to 6 in XHTML 1278 if level < 1: 1279 level = 1 1280 lev = self.headinglevels[1:level+1] 1281 outline = '.'.join(map(str,lev)) 1282 heading = ''.join(self.data) 1283 if self.title == '': 1284 self.title = heading 1285 # Changed by Kovid 1286 tail = ''.join(self.data) 1287 anchor = self.get_anchor("%s.%s" % (outline, tail)) 1288 anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506 1289 self.opentag('a', {'id': anchor}) 1290 self.closetag('a', False) 1291 self.opentag('a', {'id': anchor2}) 1292 self.closetag('a', False) 1293 self.closetag('h%s' % level) 1294 self.purgedata() 1295 1296 def s_text_line_break(self, tag, attrs): 1297 """ Force a line break (<br/>) """ 1298 self.writedata() 1299 self.emptytag('br') 1300 self.purgedata() 1301 1302 def s_text_list(self, tag, attrs): 1303 """ Start a list (<ul> or <ol>) 1304 To know which level we're at, we have to count the number 1305 of <text:list> elements on the tagstack. 1306 """ 1307 name = attrs.get((TEXTNS,'style-name')) 1308 continue_numbering = attrs.get((TEXTNS, 'continue-numbering')) == 'true' 1309 continue_list = attrs.get((TEXTNS, 'continue-list')) 1310 list_id = attrs.get(('http://www.w3.org/XML/1998/namespace', 'id')) 1311 level = self.tagstack.count_tags(tag) + 1 1312 if name: 1313 name = name.replace(".","_") 1314 else: 1315 # FIXME: If a list is contained in a table cell or text box, 1316 # the list level must return to 1, even though the table or 1317 # textbox itself may be nested within another list. 1318 name = self.tagstack.rfindattr((TEXTNS,'style-name')) 1319 list_class = "%s_%d" % (name, level) 1320 tag_name = self.listtypes.get(list_class,'ul') 1321 number_class = tag_name + list_class 1322 if list_id: 1323 self.list_id_map[list_id] = number_class 1324 if continue_list: 1325 if continue_list in self.list_id_map: 1326 tglc = self.list_id_map[continue_list] 1327 self.list_number_map[number_class] = self.list_number_map[tglc] 1328 else: 1329 self.list_number_map.pop(number_class, None) 1330 else: 1331 if not continue_numbering: 1332 self.list_number_map.pop(number_class, None) 1333 self.list_class_stack.append(number_class) 1334 attrs = {} 1335 if tag_name == 'ol' and self.list_number_map[number_class] != 1: 1336 attrs = {'start': unicode_type(self.list_number_map[number_class])} 1337 if self.generate_css: 1338 attrs['class'] = list_class 1339 self.opentag('%s' % tag_name, attrs) 1340 self.purgedata() 1341 1342 def e_text_list(self, tag, attrs): 1343 """ End a list """ 1344 self.writedata() 1345 if self.list_class_stack: 1346 self.list_class_stack.pop() 1347 name = attrs.get((TEXTNS,'style-name')) 1348 level = self.tagstack.count_tags(tag) + 1 1349 if name: 1350 name = name.replace(".","_") 1351 else: 1352 # FIXME: If a list is contained in a table cell or text box, 1353 # the list level must return to 1, even though the table or 1354 # textbox itself may be nested within another list. 1355 name = self.tagstack.rfindattr((TEXTNS,'style-name')) 1356 list_class = "%s_%d" % (name, level) 1357 self.closetag(self.listtypes.get(list_class,'ul')) 1358 self.purgedata() 1359 1360 def s_text_list_item(self, tag, attrs): 1361 """ Start list item """ 1362 number_class = self.list_class_stack[-1] if self.list_class_stack else None 1363 if number_class: 1364 self.list_number_map[number_class] += 1 1365 self.opentag('li') 1366 self.purgedata() 1367 1368 def e_text_list_item(self, tag, attrs): 1369 """ End list item """ 1370 self.writedata() 1371 self.closetag('li') 1372 self.purgedata() 1373 1374 def s_text_list_level_style_bullet(self, tag, attrs): 1375 """ CSS doesn't have the ability to set the glyph 1376 to a particular character, so we just go through 1377 the available glyphs 1378 """ 1379 name = self.tagstack.rfindattr((STYLENS,'name')) 1380 level = attrs[(TEXTNS,'level')] 1381 self.prevstyle = self.currentstyle 1382 list_class = "%s_%s" % (name, level) 1383 self.listtypes[list_class] = 'ul' 1384 self.currentstyle = ".%s_%s" % (name.replace(".","_"), level) 1385 self.stylestack.append(self.currentstyle) 1386 self.styledict[self.currentstyle] = {} 1387 1388 level = int(level) 1389 listtype = ("square", "disc", "circle")[level % 3] 1390 self.styledict[self.currentstyle][('','list-style-type')] = listtype 1391 1392 def e_text_list_level_style_bullet(self, tag, attrs): 1393 self.currentstyle = self.prevstyle 1394 del self.prevstyle 1395 1396 def s_text_list_level_style_number(self, tag, attrs): 1397 name = self.tagstack.stackparent()[(STYLENS,'name')] 1398 level = attrs[(TEXTNS,'level')] 1399 num_format = attrs.get((STYLENS,'num-format'),"1") 1400 start_value = attrs.get((TEXTNS, 'start-value'), '1') 1401 list_class = "%s_%s" % (name, level) 1402 self.prevstyle = self.currentstyle 1403 self.currentstyle = ".%s_%s" % (name.replace(".","_"), level) 1404 if start_value != '1': 1405 self.list_starts[self.currentstyle] = start_value 1406 self.listtypes[list_class] = 'ol' 1407 self.stylestack.append(self.currentstyle) 1408 self.styledict[self.currentstyle] = {} 1409 if num_format == "1": 1410 listtype = "decimal" 1411 elif num_format == "I": 1412 listtype = "upper-roman" 1413 elif num_format == "i": 1414 listtype = "lower-roman" 1415 elif num_format == "A": 1416 listtype = "upper-alpha" 1417 elif num_format == "a": 1418 listtype = "lower-alpha" 1419 else: 1420 listtype = "decimal" 1421 self.styledict[self.currentstyle][('','list-style-type')] = listtype 1422 1423 def e_text_list_level_style_number(self, tag, attrs): 1424 self.currentstyle = self.prevstyle 1425 del self.prevstyle 1426 1427 def s_text_note(self, tag, attrs): 1428 self.writedata() 1429 self.purgedata() 1430 self.currentnote = self.currentnote + 1 1431 self.notedict[self.currentnote] = {} 1432 self.notebody = [] 1433 1434 def e_text_note(self, tag, attrs): 1435 pass 1436 1437 def collectnote(self,s): 1438 if s != '': 1439 self.notebody.append(s) 1440 1441 def s_text_note_body(self, tag, attrs): 1442 self._orgwfunc = self._wfunc 1443 self._wfunc = self.collectnote 1444 1445 def e_text_note_body(self, tag, attrs): 1446 self._wfunc = self._orgwfunc 1447 self.notedict[self.currentnote]['body'] = ''.join(self.notebody) 1448 self.notebody = '' 1449 del self._orgwfunc 1450 1451 def e_text_note_citation(self, tag, attrs): 1452 # Changed by Kovid to improve formatting and enable backlinks 1453 mark = ''.join(self.data) 1454 self.notedict[self.currentnote]['citation'] = mark 1455 self.opentag('sup') 1456 self.opentag('a', { 1457 'href': "#footnote-%s" % self.currentnote, 1458 'class': 'citation', 1459 'id':'citation-%s' % self.currentnote 1460 }) 1461# self.writeout( escape(mark) ) 1462 # Since HTML only knows about endnotes, there is too much risk that the 1463 # marker is reused in the source. Therefore we force numeric markers 1464 self.writeout(type(u'')(self.currentnote)) 1465 self.closetag('a') 1466 self.closetag('sup') 1467 1468 def s_text_p(self, tag, attrs): 1469 """ Paragraph 1470 """ 1471 htmlattrs = {} 1472 specialtag = "p" 1473 c = attrs.get((TEXTNS,'style-name'), None) 1474 if c: 1475 c = c.replace(".","_") 1476 specialtag = special_styles.get("P-"+c) 1477 if specialtag is None: 1478 specialtag = 'p' 1479 if self.generate_css: 1480 htmlattrs['class'] = "P-%s" % c 1481 self.opentag(specialtag, htmlattrs) 1482 self.purgedata() 1483 1484 def e_text_p(self, tag, attrs): 1485 """ End Paragraph 1486 """ 1487 specialtag = "p" 1488 c = attrs.get((TEXTNS,'style-name'), None) 1489 if c: 1490 c = c.replace(".","_") 1491 specialtag = special_styles.get("P-"+c) 1492 if specialtag is None: 1493 specialtag = 'p' 1494 self.writedata() 1495 if not self.data: # Added by Kovid 1496 # Give substance to empty paragraphs, as rendered by OOo 1497 self.writeout(' ') 1498 self.closetag(specialtag) 1499 self.purgedata() 1500 1501 def s_text_s(self, tag, attrs): 1502 # Changed by Kovid to fix non breaking spaces being prepended to 1503 # element instead of being part of the text flow. 1504 # We don't use an entity for the nbsp as the contents of self.data will 1505 # be escaped on writeout. 1506 """ Generate a number of spaces. We use the non breaking space for 1507 the text:s ODF element. 1508 """ 1509 try: 1510 c = int(attrs.get((TEXTNS, 'c'), 1)) 1511 except: 1512 c = 0 1513 if c > 0: 1514 self.data.append(u'\u00a0'*c) 1515 1516 def s_text_span(self, tag, attrs): 1517 """ The <text:span> element matches the <span> element in HTML. It is 1518 typically used to properties of the text. 1519 """ 1520 self.writedata() 1521 c = attrs.get((TEXTNS,'style-name'), None) 1522 htmlattrs = {} 1523 # Changed by Kovid to handle inline special styles defined on <text:span> tags. 1524 # Apparently LibreOffice does this. 1525 special = 'span' 1526 if c: 1527 c = c.replace(".","_") 1528 special = special_styles.get("S-"+c) 1529 if special is None: 1530 special = 'span' 1531 if self.generate_css: 1532 htmlattrs['class'] = "S-%s" % c 1533 1534 self.opentag(special, htmlattrs) 1535 self.purgedata() 1536 1537 def e_text_span(self, tag, attrs): 1538 """ End the <text:span> """ 1539 self.writedata() 1540 c = attrs.get((TEXTNS,'style-name'), None) 1541 # Changed by Kovid to handle inline special styles defined on <text:span> tags. 1542 # Apparently LibreOffice does this. 1543 special = 'span' 1544 if c: 1545 c = c.replace(".","_") 1546 special = special_styles.get("S-"+c) 1547 if special is None: 1548 special = 'span' 1549 1550 self.closetag(special, False) 1551 self.purgedata() 1552 1553 def s_text_tab(self, tag, attrs): 1554 """ Move to the next tabstop. We ignore this in HTML 1555 """ 1556 self.writedata() 1557 self.writeout(' ') 1558 self.purgedata() 1559 1560 def s_text_x_source(self, tag, attrs): 1561 """ Various indexes and tables of contents. We ignore those. 1562 """ 1563 self.writedata() 1564 self.purgedata() 1565 self.s_ignorexml(tag, attrs) 1566 1567 def e_text_x_source(self, tag, attrs): 1568 """ Various indexes and tables of contents. We ignore those. 1569 """ 1570 self.writedata() 1571 self.purgedata() 1572 1573 # ----------------------------------------------------------------------------- 1574 # 1575 # Reading the file 1576 # 1577 # ----------------------------------------------------------------------------- 1578 1579 def load(self, odffile): 1580 """ Loads a document into the parser and parses it. 1581 The argument can either be a filename or a document in memory. 1582 """ 1583 self.lines = [] 1584 self._wfunc = self._wlines 1585 if isinstance(odffile, (bytes, type(u''))) or hasattr(odffile, 'read'): # Added by Kovid 1586 self.document = load(odffile) 1587 else: 1588 self.document = odffile 1589 self._walknode(self.document.topnode) 1590 1591 def _walknode(self, node): 1592 if node.nodeType == Node.ELEMENT_NODE: 1593 self.startElementNS(node.qname, node.tagName, node.attributes) 1594 for c in node.childNodes: 1595 self._walknode(c) 1596 self.endElementNS(node.qname, node.tagName) 1597 if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE: 1598 self.characters(type(u'')(node)) 1599 1600 def odf2xhtml(self, odffile): 1601 """ Load a file and return the XHTML 1602 """ 1603 self.load(odffile) 1604 return self.xhtml() 1605 1606 def _wlines(self,s): 1607 if s: 1608 self.lines.append(s) 1609 1610 def xhtml(self): 1611 """ Returns the xhtml 1612 """ 1613 return ''.join(self.lines) 1614 1615 def _writecss(self, s): 1616 if s: 1617 self._csslines.append(s) 1618 1619 def _writenothing(self, s): 1620 pass 1621 1622 def css(self): 1623 """ Returns the CSS content """ 1624 self._csslines = [] 1625 self._wfunc = self._writecss 1626 self.generate_stylesheet() 1627 res = ''.join(self._csslines) 1628 self._wfunc = self._wlines 1629 del self._csslines 1630 return res 1631 1632 def save(self, outputfile, addsuffix=False): 1633 """ Save the HTML under the filename. 1634 If the filename is '-' then save to stdout 1635 We have the last style filename in self.stylefilename 1636 """ 1637 if outputfile == '-': 1638 import sys # Added by Kovid 1639 outputfp = sys.stdout 1640 else: 1641 if addsuffix: 1642 outputfile = outputfile + ".html" 1643 outputfp = open(outputfile, "wb") 1644 outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace')) 1645 outputfp.close() 1646 1647 1648class ODF2XHTMLembedded(ODF2XHTML): 1649 1650 """ The ODF2XHTML parses an ODF file and produces XHTML""" 1651 1652 def __init__(self, lines, generate_css=True, embedable=False): 1653 self._resetobject() 1654 self.lines = lines 1655 1656 # Tags 1657 self.generate_css = generate_css 1658 self.elements = { 1659# (DCNS, 'title'): (self.s_processcont, self.e_dc_title), 1660# (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage), 1661# (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag), 1662# (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag), 1663# (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag), 1664 (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame), 1665 (DRAWNS, 'image'): (self.s_draw_image, None), 1666 (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None), 1667 (DRAWNS, "layer-set"):(self.s_ignorexml, None), 1668 (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page), 1669 (DRAWNS, 'object'): (self.s_draw_object, None), 1670 (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None), 1671 (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox), 1672# (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag), 1673# (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag), 1674# (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag), 1675# (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag), 1676 (NUMBERNS, "boolean-style"):(self.s_ignorexml, None), 1677 (NUMBERNS, "currency-style"):(self.s_ignorexml, None), 1678 (NUMBERNS, "date-style"):(self.s_ignorexml, None), 1679 (NUMBERNS, "number-style"):(self.s_ignorexml, None), 1680 (NUMBERNS, "text-style"):(self.s_ignorexml, None), 1681# (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None), 1682# (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content), 1683 (OFFICENS, "forms"):(self.s_ignorexml, None), 1684# (OFFICENS, "master-styles"):(self.s_office_master_styles, None), 1685 (OFFICENS, "meta"):(self.s_ignorecont, None), 1686# (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation), 1687# (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet), 1688# (OFFICENS, "styles"):(self.s_office_styles, None), 1689# (OFFICENS, "text"):(self.s_office_text, self.e_office_text), 1690 (OFFICENS, "scripts"):(self.s_ignorexml, None), 1691 (PRESENTATIONNS, "notes"):(self.s_ignorexml, None), 1692# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout), 1693# (STYLENS, "default-page-layout"):(self.s_ignorexml, None), 1694# (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style), 1695# (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None), 1696# (STYLENS, "font-face"):(self.s_style_font_face, None), 1697# (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer), 1698# (STYLENS, "footer-style"):(self.s_style_footer_style, None), 1699# (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None), 1700# (STYLENS, "handout-master"):(self.s_ignorexml, None), 1701# (STYLENS, "header"):(self.s_style_header, self.e_style_header), 1702# (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None), 1703# (STYLENS, "header-style"):(self.s_style_header_style, None), 1704# (STYLENS, "master-page"):(self.s_style_master_page, None), 1705# (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None), 1706# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), 1707# (STYLENS, "page-layout"):(self.s_ignorexml, None), 1708# (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None), 1709# (STYLENS, "style"):(self.s_style_style, self.e_style_style), 1710# (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None), 1711# (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None), 1712# (STYLENS, "table-properties"):(self.s_style_handle_properties, None), 1713# (STYLENS, "text-properties"):(self.s_style_handle_properties, None), 1714 (SVGNS, 'desc'): (self.s_ignorexml, None), 1715 (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None), 1716 (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell), 1717 (TABLENS, 'table-column'): (self.s_table_table_column, None), 1718 (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row), 1719 (TABLENS, 'table'): (self.s_table_table, self.e_table_table), 1720 (TEXTNS, 'a'): (self.s_text_a, self.e_text_a), 1721 (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source), 1722 (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None), 1723 (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source), 1724 (TEXTNS, 'h'): (self.s_text_h, self.e_text_h), 1725 (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source), 1726 (TEXTNS, 'line-break'):(self.s_text_line_break, None), 1727 (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None), 1728 (TEXTNS, "list"):(self.s_text_list, self.e_text_list), 1729 (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item), 1730 (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet), 1731 (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number), 1732 (TEXTNS, "list-style"):(None, None), 1733 (TEXTNS, "note"):(self.s_text_note, None), 1734 (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body), 1735 (TEXTNS, "note-citation"):(None, self.e_text_note_citation), 1736 (TEXTNS, "notes-configuration"):(self.s_ignorexml, None), 1737 (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source), 1738 (TEXTNS, 'p'): (self.s_text_p, self.e_text_p), 1739 (TEXTNS, 's'): (self.s_text_s, None), 1740 (TEXTNS, 'span'): (self.s_text_span, self.e_text_span), 1741 (TEXTNS, 'tab'): (self.s_text_tab, None), 1742 (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source), 1743 (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source), 1744 (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source), 1745 (TEXTNS, "page-number"):(None, None), 1746 } 1747