1#!/usr/local/bin/python3.8
2# -*- coding: utf-8 -*-
3# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
4#
5# This library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9#
10# This library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU Lesser General Public
16# License along with this library; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18#
19# Contributor(s):
20#
21# import pdb
22# pdb.set_trace()
23
24from collections import defaultdict
25from xml.sax import handler
26from xml.sax.saxutils import escape, quoteattr
27from xml.dom import Node
28
29from .opendocument import load
30
31from .namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
32  FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
33  SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
34from polyglot.builtins import unicode_type
35
36if False:  # Added by Kovid
37    DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS
38
39# Handling of styles
40#
41# First there are font face declarations. These set up a font style that will be
42# referenced from a text-property. The declaration describes the font making
43# it possible for the application to find a similar font should the system not
44# have that particular one. The StyleToCSS stores these attributes to be used
45# for the CSS2 font declaration.
46#
47# Then there are default-styles. These set defaults for various style types:
48#  "text", "paragraph", "section", "ruby", "table", "table-column", "table-row",
49#  "table-cell", "graphic", "presentation", "drawing-page", "chart".
50# Since CSS2 can't refer to another style, ODF2XHTML add these to all
51# styles unless overridden.
52#
53# The real styles are declared in the <style:style> element. They have a
54# family referring to the default-styles, and may have a parent style.
55#
56# Styles have scope. The same name can be used for both paragraph and
57# character etc. styles Since CSS2 has no scope we use a prefix. (Not elegant)
58# In ODF a style can have a parent, these parents can be chained.
59
60
61class StyleToCSS:
62
63    """ The purpose of the StyleToCSS class is to contain the rules to convert
64        ODF styles to CSS2. Since it needs the generic fonts, it would probably
65        make sense to also contain the Styles in a dict as well..
66    """
67
68    def __init__(self):
69        # Font declarations
70        self.fontdict = {}
71
72        # Fill-images from presentations for backgrounds
73        self.fillimages = {}
74
75        self.ruleconversions = {
76            (DRAWNS,u'fill-image-name'): self.c_drawfillimage,
77            (FONS,u"background-color"): self.c_fo,
78            (FONS,u"border"): self.c_fo,
79            (FONS,u"border-bottom"): self.c_fo,
80            (FONS,u"border-left"): self.c_fo,
81            (FONS,u"border-right"): self.c_fo,
82            (FONS,u"border-top"): self.c_fo,
83            (FONS,u"break-after"): self.c_break,  # Added by Kovid
84            (FONS,u"break-before"): self.c_break,  # Added by Kovid
85            (FONS,u"color"): self.c_fo,
86            (FONS,u"font-family"): self.c_fo,
87            (FONS,u"font-size"): self.c_fo,
88            (FONS,u"font-style"): self.c_fo,
89            (FONS,u"font-variant"): self.c_fo,
90            (FONS,u"font-weight"): self.c_fo,
91            (FONS,u"line-height"): self.c_fo,
92            (FONS,u"margin"): self.c_fo,
93            (FONS,u"margin-bottom"): self.c_fo,
94            (FONS,u"margin-left"): self.c_fo,
95            (FONS,u"margin-right"): self.c_fo,
96            (FONS,u"margin-top"): self.c_fo,
97            (FONS,u"min-height"): self.c_fo,
98            (FONS,u"padding"): self.c_fo,
99            (FONS,u"padding-bottom"): self.c_fo,
100            (FONS,u"padding-left"): self.c_fo,
101            (FONS,u"padding-right"): self.c_fo,
102            (FONS,u"padding-top"): self.c_fo,
103            (FONS,u"page-width"): self.c_page_width,
104            (FONS,u"page-height"): self.c_page_height,
105            (FONS,u"text-align"): self.c_text_align,
106            (FONS,u"text-indent") :self.c_fo,
107            (TABLENS,u'border-model') :self.c_border_model,
108            (STYLENS,u'column-width') : self.c_width,
109            (STYLENS,u"font-name"): self.c_fn,
110            (STYLENS,u'horizontal-pos'): self.c_hp,
111            (STYLENS,u'text-position'): self.c_text_position,
112            (STYLENS,u'text-line-through-style'): self.c_text_line_through_style,
113            (STYLENS,u'text-underline-style'): self.c_text_underline_style,
114            (STYLENS,u'width') : self.c_width,
115            # FIXME Should do style:vertical-pos here
116        }
117
118    def save_font(self, name, family, generic):
119        """ It is possible that the HTML browser doesn't know how to
120            show a particular font. Fortunately ODF provides generic fallbacks.
121            Unfortunately they are not the same as CSS2.
122            CSS2: serif, sans-serif, cursive, fantasy, monospace
123            ODF: roman, swiss, modern, decorative, script, system
124            This method put the font and fallback into a dictionary
125        """
126        htmlgeneric = "sans-serif"
127        if generic == "roman":
128            htmlgeneric = "serif"
129        elif generic == "swiss":
130            htmlgeneric = "sans-serif"
131        elif generic == "modern":
132            htmlgeneric = "monospace"
133        elif generic == "decorative":
134            htmlgeneric = "sans-serif"
135        elif generic == "script":
136            htmlgeneric = "monospace"
137        elif generic == "system":
138            htmlgeneric = "serif"
139        self.fontdict[name] = (family, htmlgeneric)
140
141    def c_drawfillimage(self, ruleset, sdict, rule, val):
142        """ Fill a figure with an image. Since CSS doesn't let you resize images
143            this should really be implemented as an absolutely position <img>
144            with a width and a height
145        """
146        sdict['background-image'] = "url('%s')" % self.fillimages[val]
147
148    def c_fo(self, ruleset, sdict, rule, val):
149        """ XSL formatting attributes """
150        selector = rule[1]
151        sdict[selector] = val
152
153    def c_break(self, ruleset, sdict, rule, val):  # Added by Kovid
154        property = 'page-' + rule[1]
155        values = {'auto': 'auto', 'column': 'always', 'page': 'always',
156                  'even-page': 'left', 'odd-page': 'right',
157                  'inherit': 'inherit'}
158        sdict[property] = values.get(val, 'auto')
159
160    def c_border_model(self, ruleset, sdict, rule, val):
161        """ Convert to CSS2 border model """
162        if val == 'collapsing':
163            sdict['border-collapse'] ='collapse'
164        else:
165            sdict['border-collapse'] ='separate'
166
167    def c_width(self, ruleset, sdict, rule, val):
168        """ Set width of box """
169        sdict['width'] = val
170
171    def c_text_align(self, ruleset, sdict, rule, align):
172        """ Text align """
173        if align == "start":
174            align = "left"
175        if align == "end":
176            align = "right"
177        sdict['text-align'] = align
178
179    def c_fn(self, ruleset, sdict, rule, fontstyle):
180        """ Generate the CSS font family
181            A generic font can be found in two ways. In a <style:font-face>
182            element or as a font-family-generic attribute in text-properties.
183        """
184        generic = ruleset.get((STYLENS,'font-family-generic'))
185        if generic is not None:
186            self.save_font(fontstyle, fontstyle, generic)
187        family, htmlgeneric = self.fontdict.get(fontstyle, (fontstyle, 'serif'))
188        sdict['font-family'] = '%s, %s'  % (family, htmlgeneric)
189
190    def c_text_position(self, ruleset, sdict, rule, tp):
191        """ Text position. This is used e.g. to make superscript and subscript
192            This attribute can have one or two values.
193
194            The first value must be present and specifies the vertical
195            text position as a percentage that relates to the current font
196            height or it takes one of the values sub or super. Negative
197            percentages or the sub value place the text below the
198            baseline. Positive percentages or the super value place
199            the text above the baseline. If sub or super is specified,
200            the application can choose an appropriate text position.
201
202            The second value is optional and specifies the font height
203            as a percentage that relates to the current font-height. If
204            this value is not specified, an appropriate font height is
205            used. Although this value may change the font height that
206            is displayed, it never changes the current font height that
207            is used for additional calculations.
208        """
209        textpos = tp.split(' ')
210        if len(textpos) == 2 and textpos[0] != "0%":
211            # Bug in OpenOffice. If vertical-align is 0% - ignore the text size.
212            sdict['font-size'] = textpos[1]
213        if textpos[0] == "super":
214            sdict['vertical-align'] = "33%"
215        elif textpos[0] == "sub":
216            sdict['vertical-align'] = "-33%"
217        else:
218            sdict['vertical-align'] = textpos[0]
219
220    def c_hp(self, ruleset, sdict, rule, hpos):
221        # FIXME: Frames wrap-style defaults to 'parallel', graphics to 'none'.
222        # It is properly set in the parent-styles, but the program doesn't
223        # collect the information.
224        wrap = ruleset.get((STYLENS,'wrap'),'parallel')
225        # Can have: from-left, left, center, right, from-inside, inside, outside
226        if hpos == "center":
227            sdict['margin-left'] = "auto"
228            sdict['margin-right'] = "auto"
229        # else:
230        #     # force it to be *something* then delete it
231        #     sdict['margin-left'] = sdict['margin-right'] = ''
232        #     del sdict['margin-left'], sdict['margin-right']
233
234        if hpos in ("right","outside"):
235            if wrap in ("left", "parallel","dynamic"):
236                sdict['float'] = "right"
237            elif wrap == "run-through":
238                sdict['position'] = "absolute"  # Simulate run-through
239                sdict['top'] = "0"
240                sdict['right'] = "0"
241            else:  # No wrapping
242                sdict['margin-left'] = "auto"
243                sdict['margin-right'] = "0px"
244        elif hpos in ("left", "inside"):
245            if wrap in ("right", "parallel","dynamic"):
246                sdict['float'] = "left"
247            elif wrap == "run-through":
248                sdict['position'] = "absolute"  # Simulate run-through
249                sdict['top'] = "0"
250                sdict['left'] = "0"
251            else:  # No wrapping
252                sdict['margin-left'] = "0px"
253                sdict['margin-right'] = "auto"
254        elif hpos in ("from-left", "from-inside"):
255            if wrap in ("right", "parallel"):
256                sdict['float'] = "left"
257            else:
258                sdict['position'] = "relative"  # No wrapping
259                if (SVGNS,'x') in ruleset:
260                    sdict['left'] = ruleset[(SVGNS,'x')]
261
262    def c_page_width(self, ruleset, sdict, rule, val):
263        """ Set width of box
264            HTML doesn't really have a page-width. It is always 100% of the browser width
265        """
266        sdict['width'] = val
267
268    def c_text_underline_style(self, ruleset, sdict, rule, val):
269        """ Set underline decoration
270            HTML doesn't really have a page-width. It is always 100% of the browser width
271        """
272        if val and val != "none":
273            sdict['text-decoration'] = "underline"
274
275    def c_text_line_through_style(self, ruleset, sdict, rule, val):
276        """ Set underline decoration
277            HTML doesn't really have a page-width. It is always 100% of the browser width
278        """
279        if val and val != "none":
280            sdict['text-decoration'] = "line-through"
281
282    def c_page_height(self, ruleset, sdict, rule, val):
283        """ Set height of box """
284        sdict['height'] = val
285
286    def convert_styles(self, ruleset):
287        """ Rule is a tuple of (namespace, name). If the namespace is '' then
288            it is already CSS2
289        """
290        sdict = {}
291        for rule,val in ruleset.items():
292            if rule[0] == '':
293                sdict[rule[1]] = val
294                continue
295            method = self.ruleconversions.get(rule, None)
296            if method:
297                method(ruleset, sdict, rule, val)
298        return sdict
299
300
301class TagStack:
302
303    def __init__(self):
304        self.stack = []
305
306    def push(self, tag, attrs):
307        self.stack.append((tag, attrs))
308
309    def pop(self):
310        item = self.stack.pop()
311        return item
312
313    def stackparent(self):
314        item = self.stack[-1]
315        return item[1]
316
317    def rfindattr(self, attr):
318        """ Find a tag with the given attribute """
319        for tag, attrs in self.stack:
320            if attr in attrs:
321                return attrs[attr]
322        return None
323
324    def count_tags(self, tag):
325        c = 0
326        for ttag, tattrs in self.stack:
327            if ttag == tag:
328                c = c + 1
329        return c
330
331
332special_styles = {
333   'S-Emphasis':'em',
334   'S-Citation':'cite',
335   'S-Strong_20_Emphasis':'strong',
336   'S-Variable':'var',
337   'S-Definition':'dfn',
338   'S-Teletype':'tt',
339   'P-Heading_20_1':'h1',
340   'P-Heading_20_2':'h2',
341   'P-Heading_20_3':'h3',
342   'P-Heading_20_4':'h4',
343   'P-Heading_20_5':'h5',
344   'P-Heading_20_6':'h6',
345#  'P-Caption':'caption',
346   'P-Addressee':'address',
347#  'P-List_20_Heading':'dt',
348#  'P-List_20_Contents':'dd',
349   'P-Preformatted_20_Text':'pre',
350#  'P-Table_20_Heading':'th',
351#  'P-Table_20_Contents':'td',
352#  'P-Text_20_body':'p'
353}
354
355# -----------------------------------------------------------------------------
356#
357# ODFCONTENTHANDLER
358#
359# -----------------------------------------------------------------------------
360
361
362class ODF2XHTML(handler.ContentHandler):
363
364    """ The ODF2XHTML parses an ODF file and produces XHTML"""
365
366    def __init__(self, generate_css=True, embedable=False):
367        # Tags
368        self.generate_css = generate_css
369        self.frame_stack = []
370        self.list_number_map = defaultdict(lambda : 1)
371        self.list_id_map = {}
372        self.list_class_stack = []
373        self.elements = {
374        (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
375        (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
376        (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator),
377        (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
378        (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
379        (DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape),
380        (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
381        (DRAWNS, 'image'): (self.s_draw_image, None),
382        (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
383        (DRAWNS, "layer-set"):(self.s_ignorexml, None),
384        (DRAWNS, 'object'): (self.s_draw_object, None),
385        (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
386        (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
387        (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
388        (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
389        (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
390        (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
391        (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
392        (NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
393        (NUMBERNS, "currency-style"):(self.s_ignorexml, None),
394        (NUMBERNS, "date-style"):(self.s_ignorexml, None),
395        (NUMBERNS, "number-style"):(self.s_ignorexml, None),
396        (NUMBERNS, "text-style"):(self.s_ignorexml, None),
397        (OFFICENS, "annotation"):(self.s_ignorexml, None),
398        (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
399        (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
400        (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
401        (OFFICENS, "forms"):(self.s_ignorexml, None),
402        (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
403        (OFFICENS, "meta"):(self.s_ignorecont, None),
404        (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
405        (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
406        (OFFICENS, "styles"):(self.s_office_styles, None),
407        (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
408        (OFFICENS, "scripts"):(self.s_ignorexml, None),
409        (OFFICENS, "settings"):(self.s_ignorexml, None),
410        (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
411#       (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
412        (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
413        (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
414        (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
415        (STYLENS, "font-face"):(self.s_style_font_face, None),
416#       (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
417#       (STYLENS, "footer-style"):(self.s_style_footer_style, None),
418        (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
419        (STYLENS, "handout-master"):(self.s_ignorexml, None),
420#       (STYLENS, "header"):(self.s_style_header, self.e_style_header),
421#       (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
422#       (STYLENS, "header-style"):(self.s_style_header_style, None),
423        (STYLENS, "master-page"):(self.s_style_master_page, None),
424        (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
425        (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
426#       (STYLENS, "page-layout"):(self.s_ignorexml, None),
427        (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
428        (STYLENS, "style"):(self.s_style_style, self.e_style_style),
429        (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
430        (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None),
431        (STYLENS, "table-properties"):(self.s_style_handle_properties, None),
432        (STYLENS, "text-properties"):(self.s_style_handle_properties, None),
433        (SVGNS, 'desc'): (self.s_ignorexml, None),
434        (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
435        (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
436        (TABLENS, 'table-column'): (self.s_table_table_column, None),
437        (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
438        (TABLENS, 'table'): (self.s_table_table, self.e_table_table),
439        (TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
440        (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
441        (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
442        (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
443        (TEXTNS, 'bookmark'): (self.s_text_bookmark, None),
444        (TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None),
445        (TEXTNS, 'reference-mark-start'): (self.s_text_bookmark, None),  # Added by Kovid
446        (TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a),
447        (TEXTNS, 'reference-ref'): (self.s_text_bookmark_ref, self.e_text_a),  # Added by Kovid
448        (TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None),
449        (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
450        (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
451        (TEXTNS, 'line-break'):(self.s_text_line_break, None),
452        (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
453        (TEXTNS, "list"):(self.s_text_list, self.e_text_list),
454        (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
455        (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
456        (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
457        (TEXTNS, "list-style"):(None, None),
458        (TEXTNS, "note"):(self.s_text_note, None),
459        (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
460        (TEXTNS, "note-citation"):(None, self.e_text_note_citation),
461        (TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
462        (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
463        (TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
464        (TEXTNS, 's'): (self.s_text_s, None),
465        (TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
466        (TEXTNS, 'tab'): (self.s_text_tab, None),
467        (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
468        (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
469        (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
470        }
471        if embedable:
472            self.make_embedable()
473        self._resetobject()
474
475    def set_plain(self):
476        """ Tell the parser to not generate CSS """
477        self.generate_css = False
478
479    def set_embedable(self):
480        """ Tells the converter to only output the parts inside the <body>"""
481        self.elements[(OFFICENS, u"text")] = (None,None)
482        self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
483        self.elements[(OFFICENS, u"presentation")] = (None,None)
484        self.elements[(OFFICENS, u"document-content")] = (None,None)
485
486    def add_style_file(self, stylefilename, media=None):
487        """ Add a link to an external style file.
488            Also turns of the embedding of styles in the HTML
489        """
490        self.use_internal_css = False
491        self.stylefilename = stylefilename
492        if media:
493            self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media))
494        else:
495            self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename))
496
497    def _resetfootnotes(self):
498        # Footnotes and endnotes
499        self.notedict = {}
500        self.currentnote = 0
501        self.notebody = ''
502
503    def _resetobject(self):
504        self.lines = []
505        self._wfunc = self._wlines
506        self.xmlfile = ''
507        self.title = ''
508        self.language = ''
509        self.creator = ''
510        self.data = []
511        self.tagstack = TagStack()
512        self.htmlstack = []
513        self.pstack = []
514        self.processelem = True
515        self.processcont = True
516        self.listtypes = {}
517        self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0]  # level 0 to 10
518        self.use_internal_css = True
519        self.cs = StyleToCSS()
520        self.anchors = {}
521
522        # Style declarations
523        self.stylestack = []
524        self.styledict = {}
525        self.currentstyle = None
526        self.list_starts = {}
527
528        self._resetfootnotes()
529
530        # Tags from meta.xml
531        self.metatags = []
532
533    def writeout(self, s):
534        if s != '':
535            self._wfunc(s)
536
537    def writedata(self):
538        d = ''.join(self.data)
539        if d != '':
540            self.writeout(escape(d))
541
542    def opentag(self, tag, attrs={}, block=False):
543        """ Create an open HTML tag """
544        self.htmlstack.append((tag,attrs,block))
545        a = []
546        for key,val in attrs.items():
547            a.append('''%s=%s''' % (key, quoteattr(val)))
548        if len(a) == 0:
549            self.writeout("<%s>" % tag)
550        else:
551            self.writeout("<%s %s>" % (tag, " ".join(a)))
552        if block:
553            self.writeout("\n")
554
555    def closetag(self, tag, block=True):
556        """ Close an open HTML tag """
557        self.htmlstack.pop()
558        self.writeout("</%s>" % tag)
559        if block:
560            self.writeout("\n")
561
562    def emptytag(self, tag, attrs={}):
563        a = []
564        for key,val in attrs.items():
565            a.append('''%s=%s''' % (key, quoteattr(val)))
566        self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
567
568# --------------------------------------------------
569# Interface to parser
570# --------------------------------------------------
571    def characters(self, data):
572        if self.processelem and self.processcont:
573            self.data.append(data)
574
575    def startElementNS(self, tag, qname, attrs):
576        self.pstack.append((self.processelem, self.processcont))
577        if self.processelem:
578            method = self.elements.get(tag, (None, None))[0]
579            if method:
580                self.handle_starttag(tag, method, attrs)
581            else:
582                self.unknown_starttag(tag,attrs)
583        self.tagstack.push(tag, attrs)
584
585    def endElementNS(self, tag, qname):
586        stag, attrs = self.tagstack.pop()
587        if self.processelem:
588            method = self.elements.get(tag, (None, None))[1]
589            if method:
590                self.handle_endtag(tag, attrs, method)
591            else:
592                self.unknown_endtag(tag, attrs)
593        self.processelem, self.processcont = self.pstack.pop()
594
595# --------------------------------------------------
596    def handle_starttag(self, tag, method, attrs):
597        method(tag,attrs)
598
599    def handle_endtag(self, tag, attrs, method):
600        method(tag, attrs)
601
602    def unknown_starttag(self, tag, attrs):
603        pass
604
605    def unknown_endtag(self, tag, attrs):
606        pass
607
608    def s_ignorexml(self, tag, attrs):
609        """ Ignore this xml element and all children of it
610            It will automatically stop ignoring
611        """
612        self.processelem = False
613
614    def s_ignorecont(self, tag, attrs):
615        """ Stop processing the text nodes """
616        self.processcont = False
617
618    def s_processcont(self, tag, attrs):
619        """ Start processing the text nodes """
620        self.processcont = True
621
622    def classname(self, attrs):
623        """ Generate a class name from a style name """
624        c = attrs.get((TEXTNS,'style-name'),'')
625        c = c.replace(".","_")
626        return c
627
628    def get_anchor(self, name):
629        """ Create a unique anchor id for a href name """
630        if name not in self.anchors:
631            # Changed by Kovid
632            self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
633        return self.anchors.get(name)
634
635    def purgedata(self):
636        self.data = []
637
638# -----------------------------------------------------------------------------
639#
640# Handle meta data
641#
642# -----------------------------------------------------------------------------
643    def e_dc_title(self, tag, attrs):
644        """ Get the title from the meta data and create a HTML <title>
645        """
646        self.title = ''.join(self.data)
647        # self.metatags.append('<title>%s</title>\n' % escape(self.title))
648        self.data = []
649
650    def e_dc_metatag(self, tag, attrs):
651        """ Any other meta data is added as a <meta> element
652        """
653        self.metatags.append('<meta name="%s" content=%s/>\n' % (tag[1], quoteattr(''.join(self.data))))
654        self.data = []
655
656    def e_dc_contentlanguage(self, tag, attrs):
657        """ Set the content language. Identifies the targeted audience
658        """
659        self.language = ''.join(self.data)
660        self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % escape(self.language))
661        self.data = []
662
663    def e_dc_creator(self, tag, attrs):
664        """ Set the content creator. Identifies the targeted audience
665        """
666        self.creator = ''.join(self.data)
667        self.metatags.append('<meta http-equiv="creator" content="%s"/>\n' % escape(self.creator))
668        self.data = []
669
670    def s_custom_shape(self, tag, attrs):
671        """ A <draw:custom-shape> is made into a <div> in HTML which is then styled
672        """
673        anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
674        htmltag = 'div'
675        name = "G-" + attrs.get((DRAWNS,'style-name'), "")
676        if name == 'G-':
677            name = "PR-" + attrs.get((PRESENTATIONNS,'style-name'), "")
678        name = name.replace(".","_")
679        if anchor_type == "paragraph":
680            style = 'position:absolute;'
681        elif anchor_type == 'char':
682            style = "position:absolute;"
683        elif anchor_type == 'as-char':
684            htmltag = 'div'
685            style = ''
686        else:
687            style = "position: absolute;"
688        if (SVGNS,"width") in attrs:
689            style = style + "width:" + attrs[(SVGNS,"width")] + ";"
690        if (SVGNS,"height") in attrs:
691            style = style + "height:" +  attrs[(SVGNS,"height")] + ";"
692        if (SVGNS,"x") in attrs:
693            style = style + "left:" +  attrs[(SVGNS,"x")] + ";"
694        if (SVGNS,"y") in attrs:
695            style = style + "top:" +  attrs[(SVGNS,"y")] + ";"
696        if self.generate_css:
697            self.opentag(htmltag, {'class': name, 'style': style})
698        else:
699            self.opentag(htmltag)
700
701    def e_custom_shape(self, tag, attrs):
702        """ End the <draw:frame>
703        """
704        self.closetag('div')
705
706    def s_draw_frame(self, tag, attrs):
707        """ A <draw:frame> is made into a <div> in HTML which is then styled
708        """
709        self.frame_stack.append([])
710        anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
711        htmltag = 'div'
712        name = "G-" + attrs.get((DRAWNS,'style-name'), "")
713        if name == 'G-':
714            name = "PR-" + attrs.get((PRESENTATIONNS,'style-name'), "")
715        name = name.replace(".","_")
716        if anchor_type == "paragraph":
717            style = 'position:relative;'
718        elif anchor_type == 'char':
719            style = "position:relative;"
720        elif anchor_type == 'as-char':
721            htmltag = 'div'
722            style = ''
723        else:
724            style = "position:absolute;"
725        if (SVGNS,"width") in attrs:
726            style = style + "width:" + attrs[(SVGNS,"width")] + ";"
727        if (SVGNS,"height") in attrs:
728            style = style + "height:" +  attrs[(SVGNS,"height")] + ";"
729        if (SVGNS,"x") in attrs:
730            style = style + "left:" +  attrs[(SVGNS,"x")] + ";"
731        if (SVGNS,"y") in attrs:
732            style = style + "top:" +  attrs[(SVGNS,"y")] + ";"
733        if self.generate_css:
734            self.opentag(htmltag, {'class': name, 'style': style})
735        else:
736            self.opentag(htmltag)
737
738    def e_draw_frame(self, tag, attrs):
739        """ End the <draw:frame>
740        """
741        self.closetag('div')
742        self.frame_stack.pop()
743
744    def s_draw_fill_image(self, tag, attrs):
745        name = attrs.get((DRAWNS,'name'), "NoName")
746        imghref = attrs[(XLINKNS,"href")]
747        imghref = self.rewritelink(imghref)
748        self.cs.fillimages[name] = imghref
749
750    def rewritelink(self, imghref):
751        """ Intended to be overloaded if you don't store your pictures
752            in a Pictures subfolder
753        """
754        return imghref
755
756    def s_draw_image(self, tag, attrs):
757        """ A <draw:image> becomes an <img/> element
758        """
759        if self.frame_stack:
760            if self.frame_stack[-1]:
761                return
762            self.frame_stack[-1].append('img')
763        parent = self.tagstack.stackparent()
764        anchor_type = parent.get((TEXTNS,'anchor-type'))
765        imghref = attrs[(XLINKNS,"href")]
766        imghref = self.rewritelink(imghref)
767        htmlattrs = {'alt':"", 'src':imghref}
768        if self.generate_css:
769            if anchor_type != "char":
770                htmlattrs['style'] = "display: block;"
771        self.emptytag('img', htmlattrs)
772
773    def s_draw_object(self, tag, attrs):
774        """ A <draw:object> is embedded object in the document (e.g. spreadsheet in presentation).
775        """
776        return  # Added by Kovid
777        objhref = attrs[(XLINKNS,"href")]
778        # Remove leading "./": from "./Object 1" to "Object 1"
779#       objhref = objhref [2:]
780
781        # Not using os.path.join since it fails to find the file on Windows.
782#       objcontentpath = '/'.join([objhref, 'content.xml'])
783
784        for c in self.document.childnodes:
785            if c.folder == objhref:
786                self._walknode(c.topnode)
787
788    def s_draw_object_ole(self, tag, attrs):
789        """ A <draw:object-ole> is embedded OLE object in the document (e.g. MS Graph).
790        """
791        try:
792            class_id = attrs[(DRAWNS,"class-id")]
793        except KeyError:  # Added by Kovid to ignore <draw> without the right
794            return       # attributes
795        if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046":  # Microsoft Graph 97 Chart
796            tagattrs = {'name':'object_ole_graph', 'class':'ole-graph'}
797            self.opentag('a', tagattrs)
798            self.closetag('a', tagattrs)
799
800    def s_draw_page(self, tag, attrs):
801        """ A <draw:page> is a slide in a presentation. We use a <fieldset> element in HTML.
802            Therefore if you convert a ODP file, you get a series of <fieldset>s.
803            Override this for your own purpose.
804        """
805        name = attrs.get((DRAWNS,'name'), "NoName")
806        stylename = attrs.get((DRAWNS,'style-name'), "")
807        stylename = stylename.replace(".","_")
808        masterpage = attrs.get((DRAWNS,'master-page-name'),"")
809        masterpage = masterpage.replace(".","_")
810        if self.generate_css:
811            self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage)})
812        else:
813            self.opentag('fieldset')
814        self.opentag('legend')
815        self.writeout(escape(name))
816        self.closetag('legend')
817
818    def e_draw_page(self, tag, attrs):
819        self.closetag('fieldset')
820
821    def s_draw_textbox(self, tag, attrs):
822        style = ''
823        if (FONS,"min-height") in attrs:
824            style = style + "min-height:" +  attrs[(FONS,"min-height")] + ";"
825        self.opentag('div')
826#       self.opentag('div', {'style': style})
827
828    def e_draw_textbox(self, tag, attrs):
829        """ End the <draw:text-box>
830        """
831        self.closetag('div')
832
833    def html_body(self, tag, attrs):
834        self.writedata()
835        if self.generate_css and self.use_internal_css:
836            self.opentag('style', {'type':"text/css"}, True)
837            self.writeout('/*<![CDATA[*/\n')
838            self.generate_stylesheet()
839            self.writeout('/*]]>*/\n')
840            self.closetag('style')
841        self.purgedata()
842        self.closetag('head')
843        self.opentag('body', block=True)
844
845    # background-color: white removed by Kovid for #9118
846    # Specifying an explicit bg color prevents ebook readers
847    # from successfully inverting colors
848    # Added styling for endnotes
849    default_styles = """
850img { width: 100%; height: 100%; }
851* { padding: 0; margin: 0; }
852body { margin: 0 1em; }
853ol, ul { padding-left: 2em; }
854a.citation { text-decoration: none }
855h1.notes-header { page-break-before: always }
856dl.notes dt { font-size: large }
857dl.notes dt a { text-decoration: none }
858dl.notes dd { page-break-after: always }
859dl.notes dd:last-of-type { page-break-after: avoid }
860"""
861
862    def generate_stylesheet(self):
863        for name in self.stylestack:
864            styles = self.styledict.get(name)
865            # Preload with the family's default style
866            if '__style-family' in styles and styles['__style-family'] in self.styledict:
867                familystyle = self.styledict[styles['__style-family']].copy()
868                del styles['__style-family']
869                for style, val in styles.items():
870                    familystyle[style] = val
871                styles = familystyle
872            # Resolve the remaining parent styles
873            while '__parent-style-name' in styles and styles['__parent-style-name'] in self.styledict:
874                parentstyle = self.styledict[styles['__parent-style-name']].copy()
875                del styles['__parent-style-name']
876                for style, val in styles.items():
877                    parentstyle[style] = val
878                styles = parentstyle
879            self.styledict[name] = styles
880        # Write the styles to HTML
881        self.writeout(self.default_styles)
882        # Changed by Kovid to not write out endless copies of the same style
883        css_styles = {}
884        for name in self.stylestack:
885            styles = self.styledict.get(name)
886            css2 = tuple(self.cs.convert_styles(styles).items())
887            if css2 in css_styles:
888                css_styles[css2].append(name)
889            else:
890                css_styles[css2] = [name]
891
892        def filter_margins(css2):
893            names = {k for k, v in css2}
894            ignore = set()
895            if {'margin-left', 'margin-right', 'margin-top',
896                    'margin-bottom'}.issubset(names):
897                # These come from XML and we cannot preserve XML attribute
898                # order so we assume that margin is to be overridden See
899                # https://bugs.launchpad.net/calibre/+bug/941134 and
900                # https://bugs.launchpad.net/calibre/+bug/1002702
901                ignore.add('margin')
902            css2 = sorted(css2, key=lambda x:{'margin':0}.get(x[0], 1))
903            for k, v in css2:
904                if k not in ignore:
905                    yield k, v
906
907        for css2, names in css_styles.items():
908            self.writeout("%s {\n" % ', '.join(names))
909            for style, val in filter_margins(css2):
910                self.writeout("\t%s: %s;\n" % (style, val))
911            self.writeout("}\n")
912
913    def generate_footnotes(self):
914        if self.currentnote == 0:
915            return
916        # Changed by Kovid to improve endnote functionality
917        self.opentag('h1', {'class':'notes-header'})
918        self.writeout(_('Notes'))
919        self.closetag('h1')
920        self.opentag('dl', {'class':'notes'})
921        for key in range(1,self.currentnote+1):
922            note = self.notedict[key]
923#       for key,note in self.notedict.items():
924            self.opentag('dt', {'id':"footnote-%d" % key})
925#           self.opentag('sup')
926#           self.writeout(escape(note['citation']))
927#           self.closetag('sup', False)
928            self.writeout('[')
929            self.opentag('a', {'href': "#citation-%d" % key})
930            self.writeout("←%d" % key)
931            self.closetag('a')
932            self.writeout(']\xa0')
933            self.closetag('dt')
934            self.opentag('dd')
935            self.writeout(note['body'])
936            self.closetag('dd')
937        self.closetag('dl')
938
939    def s_office_automatic_styles(self, tag, attrs):
940        if self.xmlfile == 'styles.xml':
941            self.autoprefix = "A"
942        else:
943            self.autoprefix = ""
944
945    def s_office_document_content(self, tag, attrs):
946        """ First tag in the content.xml file"""
947        self.writeout('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ')
948        self.writeout('"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
949        self.opentag('html', {'xmlns':"http://www.w3.org/1999/xhtml"}, True)
950        self.opentag('head', block=True)
951        self.emptytag('meta', {'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
952        for metaline in self.metatags:
953            self.writeout(metaline)
954        self.writeout('<title>%s</title>\n' % escape(self.title))
955
956    def e_office_document_content(self, tag, attrs):
957        """ Last tag """
958        self.closetag('html')
959
960    def s_office_master_styles(self, tag, attrs):
961        """ """
962
963    def s_office_presentation(self, tag, attrs):
964        """ For some odd reason, OpenOffice Impress doesn't define a default-style
965            for the 'paragraph'. We therefore force a standard when we see
966            it is a presentation
967        """
968        self.styledict['p'] = {(FONS,u'font-size'): u"24pt"}
969        self.styledict['presentation'] = {(FONS,u'font-size'): u"24pt"}
970        self.html_body(tag, attrs)
971
972    def e_office_presentation(self, tag, attrs):
973        self.generate_footnotes()
974        self.closetag('body')
975
976    def s_office_spreadsheet(self, tag, attrs):
977        self.html_body(tag, attrs)
978
979    def e_office_spreadsheet(self, tag, attrs):
980        self.generate_footnotes()
981        self.closetag('body')
982
983    def s_office_styles(self, tag, attrs):
984        self.autoprefix = ""
985
986    def s_office_text(self, tag, attrs):
987        """ OpenDocument text """
988        self.styledict['frame'] = {(STYLENS,'wrap'): u'parallel'}
989        self.html_body(tag, attrs)
990
991    def e_office_text(self, tag, attrs):
992        self.generate_footnotes()
993        self.closetag('body')
994
995    def s_style_handle_properties(self, tag, attrs):
996        """ Copy all attributes to a struct.
997            We will later convert them to CSS2
998        """
999        if self.currentstyle is None:  # Added by Kovid
1000            return
1001
1002        for key,attr in attrs.items():
1003            self.styledict[self.currentstyle][key] = attr
1004
1005    familymap = {'frame':'frame', 'paragraph':'p', 'presentation':'presentation',
1006        'text':'span','section':'div',
1007        'table':'table','table-cell':'td','table-column':'col',
1008        'table-row':'tr','graphic':'graphic'}
1009
1010    def s_style_default_style(self, tag, attrs):
1011        """ A default style is like a style on an HTML tag
1012        """
1013        family = attrs[(STYLENS,'family')]
1014        htmlfamily = self.familymap.get(family,'unknown')
1015        self.currentstyle = htmlfamily
1016#       self.stylestack.append(self.currentstyle)
1017        self.styledict[self.currentstyle] = {}
1018
1019    def e_style_default_style(self, tag, attrs):
1020        self.currentstyle = None
1021
1022    def s_style_font_face(self, tag, attrs):
1023        """ It is possible that the HTML browser doesn't know how to
1024            show a particular font. Luckily ODF provides generic fallbacks
1025            Unfortunately they are not the same as CSS2.
1026            CSS2: serif, sans-serif, cursive, fantasy, monospace
1027            ODF: roman, swiss, modern, decorative, script, system
1028        """
1029        name = attrs[(STYLENS,"name")]
1030        family = attrs[(SVGNS,"font-family")]
1031        generic = attrs.get((STYLENS,'font-family-generic'),"")
1032        self.cs.save_font(name, family, generic)
1033
1034    def s_style_footer(self, tag, attrs):
1035        self.opentag('div', {'id':"footer"})
1036        self.purgedata()
1037
1038    def e_style_footer(self, tag, attrs):
1039        self.writedata()
1040        self.closetag('div')
1041        self.purgedata()
1042
1043    def s_style_footer_style(self, tag, attrs):
1044        self.currentstyle = "@print #footer"
1045        self.stylestack.append(self.currentstyle)
1046        self.styledict[self.currentstyle] = {}
1047
1048    def s_style_header(self, tag, attrs):
1049        self.opentag('div', {'id':"header"})
1050        self.purgedata()
1051
1052    def e_style_header(self, tag, attrs):
1053        self.writedata()
1054        self.closetag('div')
1055        self.purgedata()
1056
1057    def s_style_header_style(self, tag, attrs):
1058        self.currentstyle = "@print #header"
1059        self.stylestack.append(self.currentstyle)
1060        self.styledict[self.currentstyle] = {}
1061
1062    def s_style_default_page_layout(self, tag, attrs):
1063        """ Collect the formatting for the default page layout style.
1064        """
1065        self.currentstyle = "@page"
1066        self.stylestack.append(self.currentstyle)
1067        self.styledict[self.currentstyle] = {}
1068
1069    def s_style_page_layout(self, tag, attrs):
1070        """ Collect the formatting for the page layout style.
1071            This won't work in CSS 2.1, as page identifiers are not allowed.
1072            It is legal in CSS3, but the rest of the application doesn't specify when to use what page layout
1073        """
1074        name = attrs[(STYLENS,'name')]
1075        name = name.replace(".","_")
1076        self.currentstyle = ".PL-" + name
1077        self.stylestack.append(self.currentstyle)
1078        self.styledict[self.currentstyle] = {}
1079
1080    def e_style_page_layout(self, tag, attrs):
1081        """ End this style
1082        """
1083        self.currentstyle = None
1084
1085    def s_style_master_page(self, tag, attrs):
1086        """ Collect the formatting for the page layout style.
1087        """
1088        name = attrs[(STYLENS,'name')]
1089        name = name.replace(".","_")
1090
1091        self.currentstyle = ".MP-" + name
1092        self.stylestack.append(self.currentstyle)
1093        self.styledict[self.currentstyle] = {('','position'):'relative'}
1094        # Then load the pagelayout style if we find it
1095        pagelayout = attrs.get((STYLENS,'page-layout-name'), None)
1096        if pagelayout:
1097            pagelayout = ".PL-" + pagelayout
1098            if pagelayout in self.styledict:
1099                styles = self.styledict[pagelayout]
1100                for style, val in styles.items():
1101                    self.styledict[self.currentstyle][style] = val
1102            else:
1103                self.styledict[self.currentstyle]['__parent-style-name'] = pagelayout
1104        self.s_ignorexml(tag, attrs)
1105
1106    # Short prefixes for class selectors
1107    _familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
1108        'text':'S', 'section':'D',
1109         'table':'T', 'table-cell':'TD', 'table-column':'TC',
1110         'table-row':'TR', 'graphic':'G'}
1111
1112    def s_style_style(self, tag, attrs):
1113        """ Collect the formatting for the style.
1114            Styles have scope. The same name can be used for both paragraph and
1115            character styles Since CSS has no scope we use a prefix. (Not elegant)
1116            In ODF a style can have a parent, these parents can be chained.
1117            We may not have encountered the parent yet, but if we have, we resolve it.
1118        """
1119        name = attrs[(STYLENS,'name')]
1120        name = name.replace(".","_")
1121        family = attrs[(STYLENS,'family')]
1122        htmlfamily = self.familymap.get(family,'unknown')
1123        sfamily = self._familyshort.get(family,'X')
1124        name = "%s%s-%s" % (self.autoprefix, sfamily, name)
1125        parent = attrs.get((STYLENS,'parent-style-name'))
1126        self.currentstyle = special_styles.get(name,"."+name)
1127        self.stylestack.append(self.currentstyle)
1128        if self.currentstyle not in self.styledict:
1129            self.styledict[self.currentstyle] = {}
1130
1131        self.styledict[self.currentstyle]['__style-family'] = htmlfamily
1132
1133        # Then load the parent style if we find it
1134        if parent:
1135            parent = parent.replace(".", "_")
1136            parent = "%s-%s" % (sfamily, parent)
1137            parent = special_styles.get(parent, "."+parent)
1138            if parent in self.styledict:
1139                styles = self.styledict[parent]
1140                for style, val in styles.items():
1141                    self.styledict[self.currentstyle][style] = val
1142            else:
1143                self.styledict[self.currentstyle]['__parent-style-name'] = parent
1144
1145    def e_style_style(self, tag, attrs):
1146        """ End this style
1147        """
1148        self.currentstyle = None
1149
1150    def s_table_table(self, tag, attrs):
1151        """ Start a table
1152        """
1153        c = attrs.get((TABLENS,'style-name'), None)
1154        if c and self.generate_css:
1155            c = c.replace(".","_")
1156            self.opentag('table',{'class': "T-%s" % c})
1157        else:
1158            self.opentag('table')
1159        self.purgedata()
1160
1161    def e_table_table(self, tag, attrs):
1162        """ End a table
1163        """
1164        self.writedata()
1165        self.closetag('table')
1166        self.purgedata()
1167
1168    def s_table_table_cell(self, tag, attrs):
1169        """ Start a table cell """
1170        # FIXME: number-columns-repeated § 8.1.3
1171        # repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
1172        htmlattrs = {}
1173        rowspan = attrs.get((TABLENS,'number-rows-spanned'))
1174        if rowspan:
1175            htmlattrs['rowspan'] = rowspan
1176        colspan = attrs.get((TABLENS,'number-columns-spanned'))
1177        if colspan:
1178            htmlattrs['colspan'] = colspan
1179
1180        c = attrs.get((TABLENS,'style-name'))
1181        if c:
1182            htmlattrs['class'] = 'TD-%s' % c.replace(".","_")
1183        self.opentag('td', htmlattrs)
1184        self.purgedata()
1185
1186    def e_table_table_cell(self, tag, attrs):
1187        """ End a table cell """
1188        self.writedata()
1189        self.closetag('td')
1190        self.purgedata()
1191
1192    def s_table_table_column(self, tag, attrs):
1193        """ Start a table column """
1194        c = attrs.get((TABLENS,'style-name'), None)
1195        repeated = int(attrs.get((TABLENS,'number-columns-repeated'), 1))
1196        htmlattrs = {}
1197        if c:
1198            htmlattrs['class'] = "TC-%s" % c.replace(".","_")
1199        for x in range(repeated):
1200            self.emptytag('col', htmlattrs)
1201        self.purgedata()
1202
1203    def s_table_table_row(self, tag, attrs):
1204        """ Start a table row """
1205        # FIXME: table:number-rows-repeated
1206        c = attrs.get((TABLENS,'style-name'), None)
1207        htmlattrs = {}
1208        if c:
1209            htmlattrs['class'] = "TR-%s" % c.replace(".","_")
1210        self.opentag('tr', htmlattrs)
1211        self.purgedata()
1212
1213    def e_table_table_row(self, tag, attrs):
1214        """ End a table row """
1215        self.writedata()
1216        self.closetag('tr')
1217        self.purgedata()
1218
1219    def s_text_a(self, tag, attrs):
1220        """ Anchors start """
1221        self.writedata()
1222        href = attrs[(XLINKNS,"href")].split("|")[0]
1223        if href[:1] == "#":  # Changed by Kovid
1224            href = "#" + self.get_anchor(href[1:])
1225        self.opentag('a', {'href':href})
1226        self.purgedata()
1227
1228    def e_text_a(self, tag, attrs):
1229        """ End an anchor or bookmark reference """
1230        self.writedata()
1231        self.closetag('a', False)
1232        self.purgedata()
1233
1234    def s_text_bookmark(self, tag, attrs):
1235        """ Bookmark definition """
1236        name = attrs[(TEXTNS,'name')]
1237        html_id = self.get_anchor(name)
1238        self.writedata()
1239        self.opentag('span', {'id':html_id})
1240        self.closetag('span', False)
1241        self.purgedata()
1242
1243    def s_text_bookmark_ref(self, tag, attrs):
1244        """ Bookmark reference """
1245        name = attrs[(TEXTNS,'ref-name')]
1246        html_id = "#" + self.get_anchor(name)
1247        self.writedata()
1248        self.opentag('a', {'href':html_id})
1249        self.purgedata()
1250
1251    def s_text_h(self, tag, attrs):
1252        """ Headings start """
1253        level = int(attrs[(TEXTNS,'outline-level')])
1254        if level > 6:
1255            level = 6  # Heading levels go only to 6 in XHTML
1256        if level < 1:
1257            level = 1
1258        self.headinglevels[level] = self.headinglevels[level] + 1
1259        name = self.classname(attrs)
1260        for x in range(level + 1,10):
1261            self.headinglevels[x] = 0
1262        special = special_styles.get("P-"+name)
1263        if special or not self.generate_css:
1264            self.opentag('h%s' % level)
1265        else:
1266            self.opentag('h%s' % level, {'class':"P-%s" % name})
1267        self.purgedata()
1268
1269    def e_text_h(self, tag, attrs):
1270        """ Headings end
1271            Side-effect: If there is no title in the metadata, then it is taken
1272            from the first heading of any level.
1273        """
1274        self.writedata()
1275        level = int(attrs[(TEXTNS,'outline-level')])
1276        if level > 6:
1277            level = 6  # Heading levels go only to 6 in XHTML
1278        if level < 1:
1279            level = 1
1280        lev = self.headinglevels[1:level+1]
1281        outline = '.'.join(map(str,lev))
1282        heading = ''.join(self.data)
1283        if self.title == '':
1284            self.title = heading
1285        # Changed by Kovid
1286        tail = ''.join(self.data)
1287        anchor = self.get_anchor("%s.%s" % (outline, tail))
1288        anchor2 = self.get_anchor(tail)  # Added by kovid to fix #7506
1289        self.opentag('a', {'id': anchor})
1290        self.closetag('a', False)
1291        self.opentag('a', {'id': anchor2})
1292        self.closetag('a', False)
1293        self.closetag('h%s' % level)
1294        self.purgedata()
1295
1296    def s_text_line_break(self, tag, attrs):
1297        """ Force a line break (<br/>) """
1298        self.writedata()
1299        self.emptytag('br')
1300        self.purgedata()
1301
1302    def s_text_list(self, tag, attrs):
1303        """ Start a list (<ul> or <ol>)
1304            To know which level we're at, we have to count the number
1305            of <text:list> elements on the tagstack.
1306        """
1307        name = attrs.get((TEXTNS,'style-name'))
1308        continue_numbering = attrs.get((TEXTNS, 'continue-numbering')) == 'true'
1309        continue_list = attrs.get((TEXTNS, 'continue-list'))
1310        list_id = attrs.get(('http://www.w3.org/XML/1998/namespace', 'id'))
1311        level = self.tagstack.count_tags(tag) + 1
1312        if name:
1313            name = name.replace(".","_")
1314        else:
1315            # FIXME: If a list is contained in a table cell or text box,
1316            # the list level must return to 1, even though the table or
1317            # textbox itself may be nested within another list.
1318            name = self.tagstack.rfindattr((TEXTNS,'style-name'))
1319        list_class = "%s_%d" % (name, level)
1320        tag_name = self.listtypes.get(list_class,'ul')
1321        number_class = tag_name + list_class
1322        if list_id:
1323            self.list_id_map[list_id] = number_class
1324        if continue_list:
1325            if continue_list in self.list_id_map:
1326                tglc = self.list_id_map[continue_list]
1327                self.list_number_map[number_class] = self.list_number_map[tglc]
1328            else:
1329                self.list_number_map.pop(number_class, None)
1330        else:
1331            if not continue_numbering:
1332                self.list_number_map.pop(number_class, None)
1333        self.list_class_stack.append(number_class)
1334        attrs = {}
1335        if tag_name == 'ol' and self.list_number_map[number_class] != 1:
1336            attrs = {'start': unicode_type(self.list_number_map[number_class])}
1337        if self.generate_css:
1338            attrs['class'] = list_class
1339        self.opentag('%s' % tag_name, attrs)
1340        self.purgedata()
1341
1342    def e_text_list(self, tag, attrs):
1343        """ End a list """
1344        self.writedata()
1345        if self.list_class_stack:
1346            self.list_class_stack.pop()
1347        name = attrs.get((TEXTNS,'style-name'))
1348        level = self.tagstack.count_tags(tag) + 1
1349        if name:
1350            name = name.replace(".","_")
1351        else:
1352            # FIXME: If a list is contained in a table cell or text box,
1353            # the list level must return to 1, even though the table or
1354            # textbox itself may be nested within another list.
1355            name = self.tagstack.rfindattr((TEXTNS,'style-name'))
1356        list_class = "%s_%d" % (name, level)
1357        self.closetag(self.listtypes.get(list_class,'ul'))
1358        self.purgedata()
1359
1360    def s_text_list_item(self, tag, attrs):
1361        """ Start list item """
1362        number_class = self.list_class_stack[-1] if self.list_class_stack else None
1363        if number_class:
1364            self.list_number_map[number_class] += 1
1365        self.opentag('li')
1366        self.purgedata()
1367
1368    def e_text_list_item(self, tag, attrs):
1369        """ End list item """
1370        self.writedata()
1371        self.closetag('li')
1372        self.purgedata()
1373
1374    def s_text_list_level_style_bullet(self, tag, attrs):
1375        """ CSS doesn't have the ability to set the glyph
1376            to a particular character, so we just go through
1377            the available glyphs
1378        """
1379        name = self.tagstack.rfindattr((STYLENS,'name'))
1380        level = attrs[(TEXTNS,'level')]
1381        self.prevstyle = self.currentstyle
1382        list_class = "%s_%s" % (name, level)
1383        self.listtypes[list_class] = 'ul'
1384        self.currentstyle = ".%s_%s" % (name.replace(".","_"), level)
1385        self.stylestack.append(self.currentstyle)
1386        self.styledict[self.currentstyle] = {}
1387
1388        level = int(level)
1389        listtype = ("square", "disc", "circle")[level % 3]
1390        self.styledict[self.currentstyle][('','list-style-type')] = listtype
1391
1392    def e_text_list_level_style_bullet(self, tag, attrs):
1393        self.currentstyle = self.prevstyle
1394        del self.prevstyle
1395
1396    def s_text_list_level_style_number(self, tag, attrs):
1397        name = self.tagstack.stackparent()[(STYLENS,'name')]
1398        level = attrs[(TEXTNS,'level')]
1399        num_format = attrs.get((STYLENS,'num-format'),"1")
1400        start_value = attrs.get((TEXTNS, 'start-value'), '1')
1401        list_class = "%s_%s" % (name, level)
1402        self.prevstyle = self.currentstyle
1403        self.currentstyle = ".%s_%s" % (name.replace(".","_"), level)
1404        if start_value != '1':
1405            self.list_starts[self.currentstyle] = start_value
1406        self.listtypes[list_class] = 'ol'
1407        self.stylestack.append(self.currentstyle)
1408        self.styledict[self.currentstyle] = {}
1409        if num_format == "1":
1410            listtype = "decimal"
1411        elif num_format == "I":
1412            listtype = "upper-roman"
1413        elif num_format == "i":
1414            listtype = "lower-roman"
1415        elif num_format == "A":
1416            listtype = "upper-alpha"
1417        elif num_format == "a":
1418            listtype = "lower-alpha"
1419        else:
1420            listtype = "decimal"
1421        self.styledict[self.currentstyle][('','list-style-type')] = listtype
1422
1423    def e_text_list_level_style_number(self, tag, attrs):
1424        self.currentstyle = self.prevstyle
1425        del self.prevstyle
1426
1427    def s_text_note(self, tag, attrs):
1428        self.writedata()
1429        self.purgedata()
1430        self.currentnote = self.currentnote + 1
1431        self.notedict[self.currentnote] = {}
1432        self.notebody = []
1433
1434    def e_text_note(self, tag, attrs):
1435        pass
1436
1437    def collectnote(self,s):
1438        if s != '':
1439            self.notebody.append(s)
1440
1441    def s_text_note_body(self, tag, attrs):
1442        self._orgwfunc = self._wfunc
1443        self._wfunc = self.collectnote
1444
1445    def e_text_note_body(self, tag, attrs):
1446        self._wfunc = self._orgwfunc
1447        self.notedict[self.currentnote]['body'] = ''.join(self.notebody)
1448        self.notebody = ''
1449        del self._orgwfunc
1450
1451    def e_text_note_citation(self, tag, attrs):
1452        # Changed by Kovid to improve formatting and enable backlinks
1453        mark = ''.join(self.data)
1454        self.notedict[self.currentnote]['citation'] = mark
1455        self.opentag('sup')
1456        self.opentag('a', {
1457            'href': "#footnote-%s" % self.currentnote,
1458            'class': 'citation',
1459            'id':'citation-%s' % self.currentnote
1460        })
1461#        self.writeout( escape(mark) )
1462        # Since HTML only knows about endnotes, there is too much risk that the
1463        # marker is reused in the source. Therefore we force numeric markers
1464        self.writeout(type(u'')(self.currentnote))
1465        self.closetag('a')
1466        self.closetag('sup')
1467
1468    def s_text_p(self, tag, attrs):
1469        """ Paragraph
1470        """
1471        htmlattrs = {}
1472        specialtag = "p"
1473        c = attrs.get((TEXTNS,'style-name'), None)
1474        if c:
1475            c = c.replace(".","_")
1476            specialtag = special_styles.get("P-"+c)
1477            if specialtag is None:
1478                specialtag = 'p'
1479                if self.generate_css:
1480                    htmlattrs['class'] = "P-%s" % c
1481        self.opentag(specialtag, htmlattrs)
1482        self.purgedata()
1483
1484    def e_text_p(self, tag, attrs):
1485        """ End Paragraph
1486        """
1487        specialtag = "p"
1488        c = attrs.get((TEXTNS,'style-name'), None)
1489        if c:
1490            c = c.replace(".","_")
1491            specialtag = special_styles.get("P-"+c)
1492            if specialtag is None:
1493                specialtag = 'p'
1494        self.writedata()
1495        if not self.data:  # Added by Kovid
1496            # Give substance to empty paragraphs, as rendered by OOo
1497            self.writeout('&#160;')
1498        self.closetag(specialtag)
1499        self.purgedata()
1500
1501    def s_text_s(self, tag, attrs):
1502        # Changed by Kovid to fix non breaking spaces being prepended to
1503        # element instead of being part of the text flow.
1504        # We don't use an entity for the nbsp as the contents of self.data will
1505        # be escaped on writeout.
1506        """ Generate a number of spaces. We use the non breaking space for
1507        the text:s ODF element.
1508        """
1509        try:
1510            c = int(attrs.get((TEXTNS, 'c'), 1))
1511        except:
1512            c = 0
1513        if c > 0:
1514            self.data.append(u'\u00a0'*c)
1515
1516    def s_text_span(self, tag, attrs):
1517        """ The <text:span> element matches the <span> element in HTML. It is
1518            typically used to properties of the text.
1519        """
1520        self.writedata()
1521        c = attrs.get((TEXTNS,'style-name'), None)
1522        htmlattrs = {}
1523        # Changed by Kovid to handle inline special styles defined on <text:span> tags.
1524        # Apparently LibreOffice does this.
1525        special = 'span'
1526        if c:
1527            c = c.replace(".","_")
1528            special = special_styles.get("S-"+c)
1529            if special is None:
1530                special = 'span'
1531                if self.generate_css:
1532                    htmlattrs['class'] = "S-%s" % c
1533
1534        self.opentag(special, htmlattrs)
1535        self.purgedata()
1536
1537    def e_text_span(self, tag, attrs):
1538        """ End the <text:span> """
1539        self.writedata()
1540        c = attrs.get((TEXTNS,'style-name'), None)
1541        # Changed by Kovid to handle inline special styles defined on <text:span> tags.
1542        # Apparently LibreOffice does this.
1543        special = 'span'
1544        if c:
1545            c = c.replace(".","_")
1546            special = special_styles.get("S-"+c)
1547            if special is None:
1548                special = 'span'
1549
1550        self.closetag(special, False)
1551        self.purgedata()
1552
1553    def s_text_tab(self, tag, attrs):
1554        """ Move to the next tabstop. We ignore this in HTML
1555        """
1556        self.writedata()
1557        self.writeout(' ')
1558        self.purgedata()
1559
1560    def s_text_x_source(self, tag, attrs):
1561        """ Various indexes and tables of contents. We ignore those.
1562        """
1563        self.writedata()
1564        self.purgedata()
1565        self.s_ignorexml(tag, attrs)
1566
1567    def e_text_x_source(self, tag, attrs):
1568        """ Various indexes and tables of contents. We ignore those.
1569        """
1570        self.writedata()
1571        self.purgedata()
1572
1573    # -----------------------------------------------------------------------------
1574    #
1575    # Reading the file
1576    #
1577    # -----------------------------------------------------------------------------
1578
1579    def load(self, odffile):
1580        """ Loads a document into the parser and parses it.
1581            The argument can either be a filename or a document in memory.
1582        """
1583        self.lines = []
1584        self._wfunc = self._wlines
1585        if isinstance(odffile, (bytes, type(u''))) or hasattr(odffile, 'read'):  # Added by Kovid
1586            self.document = load(odffile)
1587        else:
1588            self.document = odffile
1589        self._walknode(self.document.topnode)
1590
1591    def _walknode(self, node):
1592        if node.nodeType == Node.ELEMENT_NODE:
1593            self.startElementNS(node.qname, node.tagName, node.attributes)
1594            for c in node.childNodes:
1595                self._walknode(c)
1596            self.endElementNS(node.qname, node.tagName)
1597        if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
1598            self.characters(type(u'')(node))
1599
1600    def odf2xhtml(self, odffile):
1601        """ Load a file and return the XHTML
1602        """
1603        self.load(odffile)
1604        return self.xhtml()
1605
1606    def _wlines(self,s):
1607        if s:
1608            self.lines.append(s)
1609
1610    def xhtml(self):
1611        """ Returns the xhtml
1612        """
1613        return ''.join(self.lines)
1614
1615    def _writecss(self, s):
1616        if s:
1617            self._csslines.append(s)
1618
1619    def _writenothing(self, s):
1620        pass
1621
1622    def css(self):
1623        """ Returns the CSS content """
1624        self._csslines = []
1625        self._wfunc = self._writecss
1626        self.generate_stylesheet()
1627        res = ''.join(self._csslines)
1628        self._wfunc = self._wlines
1629        del self._csslines
1630        return res
1631
1632    def save(self, outputfile, addsuffix=False):
1633        """ Save the HTML under the filename.
1634            If the filename is '-' then save to stdout
1635            We have the last style filename in self.stylefilename
1636        """
1637        if outputfile == '-':
1638            import sys  # Added by Kovid
1639            outputfp = sys.stdout
1640        else:
1641            if addsuffix:
1642                outputfile = outputfile + ".html"
1643            outputfp = open(outputfile, "wb")
1644        outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace'))
1645        outputfp.close()
1646
1647
1648class ODF2XHTMLembedded(ODF2XHTML):
1649
1650    """ The ODF2XHTML parses an ODF file and produces XHTML"""
1651
1652    def __init__(self, lines, generate_css=True, embedable=False):
1653        self._resetobject()
1654        self.lines = lines
1655
1656        # Tags
1657        self.generate_css = generate_css
1658        self.elements = {
1659#        (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
1660#        (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
1661#        (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
1662#        (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
1663#        (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
1664        (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
1665        (DRAWNS, 'image'): (self.s_draw_image, None),
1666        (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
1667        (DRAWNS, "layer-set"):(self.s_ignorexml, None),
1668        (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
1669        (DRAWNS, 'object'): (self.s_draw_object, None),
1670        (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
1671        (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
1672#        (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
1673#        (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
1674#        (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
1675#        (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
1676        (NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
1677        (NUMBERNS, "currency-style"):(self.s_ignorexml, None),
1678        (NUMBERNS, "date-style"):(self.s_ignorexml, None),
1679        (NUMBERNS, "number-style"):(self.s_ignorexml, None),
1680        (NUMBERNS, "text-style"):(self.s_ignorexml, None),
1681#        (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
1682#        (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
1683        (OFFICENS, "forms"):(self.s_ignorexml, None),
1684#        (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
1685        (OFFICENS, "meta"):(self.s_ignorecont, None),
1686#        (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
1687#        (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
1688#        (OFFICENS, "styles"):(self.s_office_styles, None),
1689#        (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
1690        (OFFICENS, "scripts"):(self.s_ignorexml, None),
1691        (PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
1692# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
1693#        (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
1694#        (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
1695#        (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
1696#        (STYLENS, "font-face"):(self.s_style_font_face, None),
1697# (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
1698# (STYLENS, "footer-style"):(self.s_style_footer_style, None),
1699#        (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
1700#        (STYLENS, "handout-master"):(self.s_ignorexml, None),
1701# (STYLENS, "header"):(self.s_style_header, self.e_style_header),
1702# (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
1703# (STYLENS, "header-style"):(self.s_style_header_style, None),
1704#        (STYLENS, "master-page"):(self.s_style_master_page, None),
1705#        (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
1706# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
1707#        (STYLENS, "page-layout"):(self.s_ignorexml, None),
1708#        (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
1709#        (STYLENS, "style"):(self.s_style_style, self.e_style_style),
1710#        (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
1711#        (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None),
1712#        (STYLENS, "table-properties"):(self.s_style_handle_properties, None),
1713#        (STYLENS, "text-properties"):(self.s_style_handle_properties, None),
1714        (SVGNS, 'desc'): (self.s_ignorexml, None),
1715        (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
1716        (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
1717        (TABLENS, 'table-column'): (self.s_table_table_column, None),
1718        (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
1719        (TABLENS, 'table'): (self.s_table_table, self.e_table_table),
1720        (TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
1721        (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
1722        (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
1723        (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
1724        (TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
1725        (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
1726        (TEXTNS, 'line-break'):(self.s_text_line_break, None),
1727        (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
1728        (TEXTNS, "list"):(self.s_text_list, self.e_text_list),
1729        (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
1730        (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
1731        (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
1732        (TEXTNS, "list-style"):(None, None),
1733        (TEXTNS, "note"):(self.s_text_note, None),
1734        (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
1735        (TEXTNS, "note-citation"):(None, self.e_text_note_citation),
1736        (TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
1737        (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
1738        (TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
1739        (TEXTNS, 's'): (self.s_text_s, None),
1740        (TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
1741        (TEXTNS, 'tab'): (self.s_text_tab, None),
1742        (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
1743        (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
1744        (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
1745        (TEXTNS, "page-number"):(None, None),
1746        }
1747