1 #include <lvxml.h>
2 #include <lvtinydom.h>
3 
4 // build FB2 DOM, comment out to build HTML DOM
5 #define DOCX_FB2_DOM_STRUCTURE 1
6 //If true <title class="hx"><p>...</p></title> else <title><hx>..</hx></title>
7 #define DOCX_USE_CLASS_FOR_HEADING true
8 // comment this out to disable in-page footnotes
9 #define ODX_CRENGINE_IN_PAGE_FOOTNOTES 1
10 
11 enum odx_style_type {
12     odx_invalid_style,
13     odx_paragraph_style,
14     odx_character_style,
15     odx_table_style,
16     odx_numbering_style
17 };
18 
19 enum odx_lineRule_type {
20     odx_lineRule_atLeast,
21     odx_lineRule_auto,
22     odx_lineRule_exact
23 };
24 
25 class odx_StylePropertiesGetter
26 {
27 public:
28     virtual css_length_t get(int index) const = 0;
29 };
30 
31 class odx_Style;
32 typedef LVFastRef< odx_Style > odx_StyleRef;
33 
34 class odx_ImportContext;
35 
36 template <int N>
37 class odx_StylePropertiesContainer : public odx_StylePropertiesGetter
38 {
39     odx_style_type m_styleType;
40     lString32 m_styleId;
41 public:
42     static const int PROP_COUNT = N;
43 
reset()44     virtual void reset() {
45         init();
46         m_styleId.clear();
47     }
48 
~odx_StylePropertiesContainer()49     virtual ~odx_StylePropertiesContainer() {}
50 
odx_StylePropertiesContainer(odx_style_type styleType)51     odx_StylePropertiesContainer(odx_style_type styleType) : m_styleType(styleType) {
52         init();
53     }
54 
get(int index)55     css_length_t get(int index) const {
56         if( index < N ) {
57             return m_properties[index];
58         }
59         return css_length_t(css_val_unspecified, 0);
60     }
61 
set(int index,int value)62     void set(int index, int value) {
63         if ( index < N ) {
64             m_properties[index].type = css_val_pt;
65             m_properties[index].value = value;
66         }
67     }
68 
set(int index,css_length_t & value)69     void set(int index, css_length_t& value) {
70         if ( index < N ) {
71             m_properties[index] = value;
72         }
73     }
74 
75     template<class T, typename U = void>
getValue(int index,T defaultValue)76     T getValue(int index, T defaultValue) const {
77         css_length_t property = get(index);
78         if(property.type != css_val_unspecified)
79             return (T)property.value;
80         return defaultValue;
81     }
82 
83     template<typename U>
getValue(int index,bool defaultValue)84     bool getValue(int index, bool defaultValue) const {
85         css_length_t property = get(index);
86         if(property.type != css_val_unspecified)
87             return (property.value != 0);
88         return defaultValue;
89     }
90 
combineWith(const odx_StylePropertiesGetter * other)91     void combineWith(const odx_StylePropertiesGetter* other)
92     {
93         for(int i = 0; other && i < PROP_COUNT; i++) {
94             css_length_t baseValue = other->get(i);
95             if( get(i).type == css_val_unspecified &&
96                 baseValue.type != css_val_unspecified)
97                 set(i, baseValue);
98         }
99     }
100     void setStyleId(odx_ImportContext* context, const lChar32* styleId);
101     odx_Style* getStyle(odx_ImportContext* context);
102 protected:
103     css_length_t m_properties[N];
104 private:
init()105     void init() {
106         for(int i = 0; i < N; i++) {
107             m_properties[i].type = css_val_unspecified;
108             m_properties[i].value = 0;
109         }
110     }
111 };
112 
113 
114 enum odx_run_properties
115 {
116     odx_run_italic_prop,
117     odx_run_bold_prop,
118     odx_run_underline_prop,
119     odx_run_strikethrough_prop,
120     odx_run_hidden_prop,
121     odx_run_halign_prop,
122     odx_run_valign_prop,
123     odx_run_font_size_prop,
124     odx_run_max_prop
125 };
126 
127 class odx_rPr : public odx_StylePropertiesContainer<odx_run_max_prop>
128 {
129 public:
130     odx_rPr();
131     ///properties
isBold()132     inline bool isBold() const { return getValue(odx_run_bold_prop, false); }
setBold(bool value)133     inline void setBold(bool value) { set(odx_run_bold_prop, value); }
isItalic()134     inline bool isItalic() const { return getValue(odx_run_italic_prop, false); }
setItalic(bool value)135     inline void setItalic(bool value) { set(odx_run_italic_prop, value); }
isUnderline()136     inline bool isUnderline() const { return getValue(odx_run_underline_prop, false); }
setUnderline(bool value)137     inline void setUnderline(bool value) { set(odx_run_underline_prop, value); }
isStrikeThrough()138     inline bool isStrikeThrough() const { return getValue(odx_run_strikethrough_prop, false); }
setStrikeThrough(bool value)139     inline void setStrikeThrough(bool value) { set(odx_run_strikethrough_prop, value); }
isSubScript()140     inline bool isSubScript() const { return (getVertAlign() == css_va_sub);  }
isSuperScript()141     inline bool isSuperScript() const { return (getVertAlign() == css_va_super); }
isHidden()142     inline bool isHidden() const { return getValue(odx_run_hidden_prop, false); }
setHidden(bool value)143     inline void setHidden(bool value) { set(odx_run_hidden_prop, value); }
getTextAlign()144     inline css_text_align_t getTextAlign() const {
145         return getValue(odx_run_halign_prop, css_ta_inherit);
146     }
setTextAlign(css_text_align_t value)147     inline void setTextAlign( css_text_align_t value ) { set(odx_run_halign_prop, value); }
getVertAlign()148     inline css_vertical_align_t getVertAlign() const {
149         return getValue(odx_run_valign_prop, css_va_inherit);
150     }
setVertAlign(css_vertical_align_t value)151     inline void setVertAlign(css_vertical_align_t value) { set(odx_run_valign_prop,value); }
152     lString32 getCss();
153 };
154 
155 enum odx_p_properties {
156     odx_p_page_break_before_prop,
157     odx_p_keep_next_prop,
158     odx_p_mirror_indents_prop,
159     odx_p_halign_prop,
160     odx_p_valign_prop,
161     odx_p_line_rule_prop,
162     odx_p_hyphenate_prop,
163     odx_p_before_spacing_prop,
164     odx_p_after_spacing_prop,
165     odx_p_before_auto_spacing_prop,
166     odx_p_after_auto_spacing_prop,
167     odx_p_line_spacing_prop,
168     odx_p_line_height_prop,
169     odx_p_left_margin_prop,
170     odx_p_right_margin_prop,
171     odx_p_indent_prop,
172     odx_p_hanging_prop,
173     odx_p_outline_level_prop,
174     odx_p_num_id_prop,
175     odx_p_ilvl_prop,
176     odx_p_max_prop
177 };
178 
179 class odx_pPr : public odx_StylePropertiesContainer<odx_p_max_prop>
180 {
181 public:
182     odx_pPr();
183 
184     ///properties
getTextAlign()185     inline css_text_align_t getTextAlign() const {
186         return getValue(odx_p_halign_prop, css_ta_inherit);
187     }
setTextAlign(css_text_align_t value)188     inline void setTextAlign( css_text_align_t value ) { set(odx_p_halign_prop, value); }
getVertAlign()189     inline css_vertical_align_t getVertAlign() const {
190         return getValue(odx_p_valign_prop, css_va_inherit);
191     }
setVertAlign(css_vertical_align_t value)192     inline void setVertAlign(css_vertical_align_t value) { set(odx_p_valign_prop, value); }
getHyphenate()193     inline css_hyphenate_t getHyphenate() const {
194         return getValue(odx_p_hyphenate_prop, css_hyph_inherit);
195     }
setHyphenate(css_hyphenate_t value)196     inline void setHyphenate( css_hyphenate_t value ) { set(odx_p_hyphenate_prop, value); }
197     // page-break-before:always
isPageBreakBefore()198     inline bool isPageBreakBefore() const { return getValue(odx_p_page_break_before_prop, false); }
setPageBreakBefore(bool value)199     inline void setPageBreakBefore(bool value) { set(odx_p_page_break_before_prop, value); }
200     // page-break-after:avoid
isKeepNext()201     inline bool isKeepNext() const { return getValue(odx_p_keep_next_prop, false); }
setKeepNext(bool value)202     inline void setKeepNext(bool value) { set(odx_p_keep_next_prop, value); }
isMirrorIndents()203     inline bool isMirrorIndents() const { return getValue(odx_p_mirror_indents_prop, false); }
setMirrorIndents(bool value)204     inline void setMirrorIndents(bool value) { set(odx_p_mirror_indents_prop, value); }
getLineRule()205     inline odx_lineRule_type getLineRule() const { return getValue(odx_p_line_rule_prop, odx_lineRule_auto); }
setLineRule(odx_lineRule_type value)206     inline void setLineRule(odx_lineRule_type value) { set(odx_p_line_rule_prop, value); }
getNumberingId()207     inline int getNumberingId() { return getValue(odx_p_num_id_prop, 0); }
getOutlineLvl()208     css_length_t getOutlineLvl() { return get(odx_p_outline_level_prop); }
getNumberingLevel()209     inline int getNumberingLevel() { return get(odx_p_ilvl_prop).value; }
210     lString32 getCss();
211 };
212 
213 class odx_ImportContext
214 {
215     LVHashTable<lString32, odx_StyleRef> m_styles;
216     odx_rPr m_rPrDefault;
217     odx_pPr m_pPrDefault;
218 protected:
219     ldomDocument* m_doc;
220 public:
odx_ImportContext(ldomDocument * doc)221     odx_ImportContext(ldomDocument* doc) : m_styles(64), m_doc(doc) { }
~odx_ImportContext()222     virtual ~odx_ImportContext() {}
223     void addStyle( odx_StyleRef style );
getStyle(lString32 id)224     odx_Style * getStyle( lString32 id ) {
225         return m_styles.get(id).get();
226     }
get_rPrDefault()227     inline odx_rPr * get_rPrDefault() { return &m_rPrDefault; }
get_pPrDefault()228     inline odx_pPr * get_pPrDefault() { return &m_pPrDefault; }
229     void setLanguage(const lChar32 *lang);
230     lString32 getListStyleCss(css_list_style_type_t listType);
231     void startDocument(ldomDocumentWriter& writer);
232     void endDocument(ldomDocumentWriter& writer);
233 };
234 
235 class odx_Style : public LVRefCounter
236 {
237     lString32 m_Name;
238     lString32 m_Id;
239     lString32 m_basedOn;
240     odx_style_type m_type;
241     odx_pPr m_pPr;
242     odx_rPr m_rPr;
243     bool m_pPrMerged;
244     bool m_rPrMerged;
245 public:
246     odx_Style();
247 
getName()248     inline lString32 getName() const { return m_Name; }
setName(const lChar32 * value)249     inline void setName(const lChar32 * value) { m_Name = value; }
250 
getId()251     inline lString32 getId() const { return m_Id; }
setId(const lChar32 * value)252     inline void setId(const lChar32 * value) { m_Id = value; }
253 
getBasedOn()254     inline lString32 getBasedOn() const { return m_basedOn; }
setBasedOn(const lChar32 * value)255     inline void setBasedOn(const lChar32 * value) { m_basedOn = value; }
256     bool isValid() const;
257 
getStyleType()258     inline odx_style_type getStyleType() const { return m_type; }
setStyleType(odx_style_type value)259     inline void setStyleType(odx_style_type value) { m_type = value; }
260     odx_Style* getBaseStyle(odx_ImportContext* context);
261     odx_pPr * get_pPr(odx_ImportContext* context);
262     odx_rPr * get_rPr(odx_ImportContext* context);
get_pPrPointer()263     inline odx_pPr * get_pPrPointer() { return &m_pPr; }
get_rPrPointer()264     inline odx_rPr * get_rPrPointer() { return &m_rPr; }
265     odx_StylePropertiesGetter* getStyleProperties(odx_ImportContext* context,
266                                                   odx_style_type styleType);
267 };
268 
269 template<int N>
setStyleId(odx_ImportContext * context,const lChar32 * styleId)270 void odx_StylePropertiesContainer<N>::setStyleId(odx_ImportContext *context, const lChar32 *styleId) {
271     m_styleId = styleId;
272     if ( !m_styleId.empty() ) {
273         odx_Style *style = context->getStyle(m_styleId);
274         if( style && (m_styleType == style->getStyleType()) ) {
275             combineWith(style->getStyleProperties(context, m_styleType));
276         }
277     }
278 }
279 
280 template<int N>
getStyle(odx_ImportContext * context)281 odx_Style *odx_StylePropertiesContainer<N>::getStyle(odx_ImportContext *context) {
282     odx_Style* ret = NULL;
283 
284     if (!m_styleId.empty() ) {
285         ret = context->getStyle(m_styleId);
286     }
287     return ret;
288 }
289 
290 /// known docx items name and identifier
291 struct item_def_t {
292     int      id;
293     const lChar32 * name;
294 };
295 
296 class xml_ElementHandler;
297 
298 class docXMLreader : public LVXMLParserCallback
299 {
300 private:
301     enum xml_doc_reader_state {
302         xml_doc_in_start,
303         xml_doc_in_xml_declaration,
304         xml_doc_in_body,
305         xml_doc_in_document
306     };
307     int m_skipTag;
308     xml_doc_reader_state m_state;
309 protected:
310     xml_ElementHandler *m_handler;
311     ldomDocumentWriter *m_writer;
312 
isSkipping()313     inline bool isSkipping()
314     {
315         return (m_skipTag != 0);
316     }
317 
skipped()318     inline void skipped()
319     {
320         m_skipTag--;
321     }
322 
323 public:
324     /// constructor
docXMLreader(ldomDocumentWriter * writer)325     docXMLreader(ldomDocumentWriter *writer) : m_skipTag(0), m_state(xml_doc_in_start),
326         m_handler(NULL), m_writer(writer)
327     {
328     }
329 
330     /// destructor
~docXMLreader()331     virtual ~docXMLreader() { }
332     /// called on parsing start
333     virtual void OnStart(LVFileFormatParser *);
334     /// called on parsing end
OnStop()335     virtual void OnStop() {  }
336 
skip()337     inline void skip()
338     {
339         m_skipTag++;
340     }
341 
342     /// called on opening tag <
343     ldomNode * OnTagOpen( const lChar32 * nsname, const lChar32 * tagname);
344 
345     /// called after > of opening tag (when entering tag body)
346     void OnTagBody();
347 
348     /// called on tag close
349     void OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag=false );
350 
351     /// called on element attribute
352     void OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue );
353 
354     /// called on text
355     void OnText( const lChar32 * text, int len, lUInt32 flags );
356 
357     /// add named BLOB data to document
358     bool OnBlob(lString32 name, const lUInt8 * data, int size);
359 
getHandler()360     xml_ElementHandler * getHandler()
361     {
362         return m_handler;
363     }
364 
setHandler(xml_ElementHandler * a_handler)365     void setHandler(xml_ElementHandler *a_handler)
366     {
367         m_handler = a_handler;
368     }
369 
setWriter(ldomDocumentWriter * writer)370     void setWriter(ldomDocumentWriter *writer)
371     {
372         m_writer = writer;
373     }
374 };
375 
376 class xml_ElementHandler
377 {
378 protected:
379     docXMLreader * m_reader;
380     ldomDocumentWriter *m_writer;
381     xml_ElementHandler *m_savedHandler;
382     const item_def_t *m_children;
383     int m_element;
384     int m_state;
385 protected:
xml_ElementHandler(docXMLreader * reader,ldomDocumentWriter * writer,int element,const struct item_def_t * children)386     xml_ElementHandler(docXMLreader * reader, ldomDocumentWriter *writer,
387                        int element, const struct item_def_t *children) :
388         m_reader(reader), m_writer(writer), m_children(children), m_element(element),
389         m_state(element)
390     {
391     }
~xml_ElementHandler()392     virtual ~xml_ElementHandler() {}
parseTagName(const lChar32 * tagname)393     virtual int parseTagName(const lChar32 *tagname) {
394         if(m_children)
395             return parse_name(m_children, tagname);
396         return -1;
397     }
398 public:
399     static int parse_name(const struct item_def_t *tags, const lChar32 * nameValue);
400     static void parse_int(const lChar32 * attrValue, css_length_t & result);
401     void setChildrenInfo(const struct item_def_t *tags);
402     ldomNode * handleTagOpen(const lChar32 * nsname, const lChar32 * tagname);
403     virtual ldomNode * handleTagOpen(int tagId);
handleAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)404     void handleAttribute(const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue)
405     {
406         CR_UNUSED(nsname);
407 
408         handleAttribute(attrname, attrvalue);
409     }
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)410     virtual void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue) {
411         CR_UNUSED2(attrname, attrvalue);
412     }
handleTagBody()413     virtual void handleTagBody() {}
handleText(const lChar32 * text,int len,lUInt32 flags)414     virtual void handleText( const lChar32 * text, int len, lUInt32 flags ) {
415         CR_UNUSED3(text,len,flags);
416     }
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)417     virtual void handleTagClose( const lChar32 * nsname, const lChar32 * tagname )
418     {
419         CR_UNUSED2(nsname, tagname);
420 
421         if(m_state == m_element)
422             stop();
423         else
424             m_state = m_element;
425     }
426     virtual void start();
427     virtual void stop();
428     virtual void reset();
429 };
430 
431 class xml_SkipElementHandler : public xml_ElementHandler
432 {
433 public:
xml_SkipElementHandler(docXMLreader * reader,ldomDocumentWriter * writer,int element)434     xml_SkipElementHandler(docXMLreader * reader, ldomDocumentWriter *writer,
435                            int element) : xml_ElementHandler(reader, writer, element, NULL) {}
skipElement(int element)436     void skipElement(int element) {
437         m_state = element;
438         start();
439     }
440 };
441 
442 class odx_styleTagsHandler
443 {
444     lString32 m_styleTags;
445     int styleTagPos(lChar32 ch);
446 protected:
447     const lChar32 * getStyleTagName( lChar32 ch );
448     void closeStyleTag( lChar32 ch, ldomDocumentWriter *writer);
449     void openStyleTag(lChar32 ch, ldomDocumentWriter *writer);
450 public:
odx_styleTagsHandler()451     odx_styleTagsHandler() {}
452     void openStyleTags(odx_rPr* runProps, ldomDocumentWriter *writer);
453     void closeStyleTags(odx_rPr* runProps, ldomDocumentWriter *writer);
454     void closeStyleTags(ldomDocumentWriter *writer);
455 };
456 
457 class odx_titleHandler
458 {
459 public:
460     odx_titleHandler(ldomDocumentWriter *writer, bool useClassName=false) :
m_writer(writer)461         m_writer(writer), m_titleLevel(), m_useClassName(useClassName) {}
~odx_titleHandler()462     virtual ~odx_titleHandler() {}
463     virtual ldomNode* onBodyStart();
464     virtual void onTitleStart(int level, bool noSection = false);
465     virtual void onTitleEnd();
onBodyEnd()466     virtual void onBodyEnd() {}
useClassForTitle()467     bool useClassForTitle() { return m_useClassName; }
468 protected:
469     ldomDocumentWriter *m_writer;
470     int m_titleLevel;
471     bool m_useClassName;
472 };
473 
474 class odx_fb2TitleHandler : public odx_titleHandler
475 {
476 public:
odx_fb2TitleHandler(ldomDocumentWriter * writer,bool useClassName)477     odx_fb2TitleHandler(ldomDocumentWriter *writer, bool useClassName) :
478         odx_titleHandler(writer, useClassName), m_hasTitle(false)
479     {}
480     ldomNode* onBodyStart();
481     void onTitleStart(int level, bool noSection = false);
482     void onTitleEnd();
483 private:
484     void makeSection(int startIndex);
485     void openSection(int level);
486     void closeSection(int level);
487 private:
488     ldomNode *m_section;
489     bool m_hasTitle;
490 };
491