1 #include <lvxml.h>
2 #include <lvtinydom.h>
3
4 // build FB2 DOM, comment out to build HTML DOM
5 #define DOCX_FB2_DOM_STRUCTURE 1
6 //If true <title class="hx"><p>...</p></title> else <title><hx>..</hx></title>
7 #define DOCX_USE_CLASS_FOR_HEADING true
8 // comment this out to disable in-page footnotes
9 #define ODX_CRENGINE_IN_PAGE_FOOTNOTES 1
10
11 enum odx_style_type {
12 odx_invalid_style,
13 odx_paragraph_style,
14 odx_character_style,
15 odx_table_style,
16 odx_numbering_style
17 };
18
19 enum odx_lineRule_type {
20 odx_lineRule_atLeast,
21 odx_lineRule_auto,
22 odx_lineRule_exact
23 };
24
25 class odx_StylePropertiesGetter
26 {
27 public:
28 virtual css_length_t get(int index) const = 0;
29 };
30
31 class odx_Style;
32 typedef LVFastRef< odx_Style > odx_StyleRef;
33
34 class odx_ImportContext;
35
36 template <int N>
37 class odx_StylePropertiesContainer : public odx_StylePropertiesGetter
38 {
39 odx_style_type m_styleType;
40 lString32 m_styleId;
41 public:
42 static const int PROP_COUNT = N;
43
reset()44 virtual void reset() {
45 init();
46 m_styleId.clear();
47 }
48
~odx_StylePropertiesContainer()49 virtual ~odx_StylePropertiesContainer() {}
50
odx_StylePropertiesContainer(odx_style_type styleType)51 odx_StylePropertiesContainer(odx_style_type styleType) : m_styleType(styleType) {
52 init();
53 }
54
get(int index)55 css_length_t get(int index) const {
56 if( index < N ) {
57 return m_properties[index];
58 }
59 return css_length_t(css_val_unspecified, 0);
60 }
61
set(int index,int value)62 void set(int index, int value) {
63 if ( index < N ) {
64 m_properties[index].type = css_val_pt;
65 m_properties[index].value = value;
66 }
67 }
68
set(int index,css_length_t & value)69 void set(int index, css_length_t& value) {
70 if ( index < N ) {
71 m_properties[index] = value;
72 }
73 }
74
75 template<class T, typename U = void>
getValue(int index,T defaultValue)76 T getValue(int index, T defaultValue) const {
77 css_length_t property = get(index);
78 if(property.type != css_val_unspecified)
79 return (T)property.value;
80 return defaultValue;
81 }
82
83 template<typename U>
getValue(int index,bool defaultValue)84 bool getValue(int index, bool defaultValue) const {
85 css_length_t property = get(index);
86 if(property.type != css_val_unspecified)
87 return (property.value != 0);
88 return defaultValue;
89 }
90
combineWith(const odx_StylePropertiesGetter * other)91 void combineWith(const odx_StylePropertiesGetter* other)
92 {
93 for(int i = 0; other && i < PROP_COUNT; i++) {
94 css_length_t baseValue = other->get(i);
95 if( get(i).type == css_val_unspecified &&
96 baseValue.type != css_val_unspecified)
97 set(i, baseValue);
98 }
99 }
100 void setStyleId(odx_ImportContext* context, const lChar32* styleId);
101 odx_Style* getStyle(odx_ImportContext* context);
102 protected:
103 css_length_t m_properties[N];
104 private:
init()105 void init() {
106 for(int i = 0; i < N; i++) {
107 m_properties[i].type = css_val_unspecified;
108 m_properties[i].value = 0;
109 }
110 }
111 };
112
113
114 enum odx_run_properties
115 {
116 odx_run_italic_prop,
117 odx_run_bold_prop,
118 odx_run_underline_prop,
119 odx_run_strikethrough_prop,
120 odx_run_hidden_prop,
121 odx_run_halign_prop,
122 odx_run_valign_prop,
123 odx_run_font_size_prop,
124 odx_run_max_prop
125 };
126
127 class odx_rPr : public odx_StylePropertiesContainer<odx_run_max_prop>
128 {
129 public:
130 odx_rPr();
131 ///properties
isBold()132 inline bool isBold() const { return getValue(odx_run_bold_prop, false); }
setBold(bool value)133 inline void setBold(bool value) { set(odx_run_bold_prop, value); }
isItalic()134 inline bool isItalic() const { return getValue(odx_run_italic_prop, false); }
setItalic(bool value)135 inline void setItalic(bool value) { set(odx_run_italic_prop, value); }
isUnderline()136 inline bool isUnderline() const { return getValue(odx_run_underline_prop, false); }
setUnderline(bool value)137 inline void setUnderline(bool value) { set(odx_run_underline_prop, value); }
isStrikeThrough()138 inline bool isStrikeThrough() const { return getValue(odx_run_strikethrough_prop, false); }
setStrikeThrough(bool value)139 inline void setStrikeThrough(bool value) { set(odx_run_strikethrough_prop, value); }
isSubScript()140 inline bool isSubScript() const { return (getVertAlign() == css_va_sub); }
isSuperScript()141 inline bool isSuperScript() const { return (getVertAlign() == css_va_super); }
isHidden()142 inline bool isHidden() const { return getValue(odx_run_hidden_prop, false); }
setHidden(bool value)143 inline void setHidden(bool value) { set(odx_run_hidden_prop, value); }
getTextAlign()144 inline css_text_align_t getTextAlign() const {
145 return getValue(odx_run_halign_prop, css_ta_inherit);
146 }
setTextAlign(css_text_align_t value)147 inline void setTextAlign( css_text_align_t value ) { set(odx_run_halign_prop, value); }
getVertAlign()148 inline css_vertical_align_t getVertAlign() const {
149 return getValue(odx_run_valign_prop, css_va_inherit);
150 }
setVertAlign(css_vertical_align_t value)151 inline void setVertAlign(css_vertical_align_t value) { set(odx_run_valign_prop,value); }
152 lString32 getCss();
153 };
154
155 enum odx_p_properties {
156 odx_p_page_break_before_prop,
157 odx_p_keep_next_prop,
158 odx_p_mirror_indents_prop,
159 odx_p_halign_prop,
160 odx_p_valign_prop,
161 odx_p_line_rule_prop,
162 odx_p_hyphenate_prop,
163 odx_p_before_spacing_prop,
164 odx_p_after_spacing_prop,
165 odx_p_before_auto_spacing_prop,
166 odx_p_after_auto_spacing_prop,
167 odx_p_line_spacing_prop,
168 odx_p_line_height_prop,
169 odx_p_left_margin_prop,
170 odx_p_right_margin_prop,
171 odx_p_indent_prop,
172 odx_p_hanging_prop,
173 odx_p_outline_level_prop,
174 odx_p_num_id_prop,
175 odx_p_ilvl_prop,
176 odx_p_max_prop
177 };
178
179 class odx_pPr : public odx_StylePropertiesContainer<odx_p_max_prop>
180 {
181 public:
182 odx_pPr();
183
184 ///properties
getTextAlign()185 inline css_text_align_t getTextAlign() const {
186 return getValue(odx_p_halign_prop, css_ta_inherit);
187 }
setTextAlign(css_text_align_t value)188 inline void setTextAlign( css_text_align_t value ) { set(odx_p_halign_prop, value); }
getVertAlign()189 inline css_vertical_align_t getVertAlign() const {
190 return getValue(odx_p_valign_prop, css_va_inherit);
191 }
setVertAlign(css_vertical_align_t value)192 inline void setVertAlign(css_vertical_align_t value) { set(odx_p_valign_prop, value); }
getHyphenate()193 inline css_hyphenate_t getHyphenate() const {
194 return getValue(odx_p_hyphenate_prop, css_hyph_inherit);
195 }
setHyphenate(css_hyphenate_t value)196 inline void setHyphenate( css_hyphenate_t value ) { set(odx_p_hyphenate_prop, value); }
197 // page-break-before:always
isPageBreakBefore()198 inline bool isPageBreakBefore() const { return getValue(odx_p_page_break_before_prop, false); }
setPageBreakBefore(bool value)199 inline void setPageBreakBefore(bool value) { set(odx_p_page_break_before_prop, value); }
200 // page-break-after:avoid
isKeepNext()201 inline bool isKeepNext() const { return getValue(odx_p_keep_next_prop, false); }
setKeepNext(bool value)202 inline void setKeepNext(bool value) { set(odx_p_keep_next_prop, value); }
isMirrorIndents()203 inline bool isMirrorIndents() const { return getValue(odx_p_mirror_indents_prop, false); }
setMirrorIndents(bool value)204 inline void setMirrorIndents(bool value) { set(odx_p_mirror_indents_prop, value); }
getLineRule()205 inline odx_lineRule_type getLineRule() const { return getValue(odx_p_line_rule_prop, odx_lineRule_auto); }
setLineRule(odx_lineRule_type value)206 inline void setLineRule(odx_lineRule_type value) { set(odx_p_line_rule_prop, value); }
getNumberingId()207 inline int getNumberingId() { return getValue(odx_p_num_id_prop, 0); }
getOutlineLvl()208 css_length_t getOutlineLvl() { return get(odx_p_outline_level_prop); }
getNumberingLevel()209 inline int getNumberingLevel() { return get(odx_p_ilvl_prop).value; }
210 lString32 getCss();
211 };
212
213 class odx_ImportContext
214 {
215 LVHashTable<lString32, odx_StyleRef> m_styles;
216 odx_rPr m_rPrDefault;
217 odx_pPr m_pPrDefault;
218 protected:
219 ldomDocument* m_doc;
220 public:
odx_ImportContext(ldomDocument * doc)221 odx_ImportContext(ldomDocument* doc) : m_styles(64), m_doc(doc) { }
~odx_ImportContext()222 virtual ~odx_ImportContext() {}
223 void addStyle( odx_StyleRef style );
getStyle(lString32 id)224 odx_Style * getStyle( lString32 id ) {
225 return m_styles.get(id).get();
226 }
get_rPrDefault()227 inline odx_rPr * get_rPrDefault() { return &m_rPrDefault; }
get_pPrDefault()228 inline odx_pPr * get_pPrDefault() { return &m_pPrDefault; }
229 void setLanguage(const lChar32 *lang);
230 lString32 getListStyleCss(css_list_style_type_t listType);
231 void startDocument(ldomDocumentWriter& writer);
232 void endDocument(ldomDocumentWriter& writer);
233 };
234
235 class odx_Style : public LVRefCounter
236 {
237 lString32 m_Name;
238 lString32 m_Id;
239 lString32 m_basedOn;
240 odx_style_type m_type;
241 odx_pPr m_pPr;
242 odx_rPr m_rPr;
243 bool m_pPrMerged;
244 bool m_rPrMerged;
245 public:
246 odx_Style();
247
getName()248 inline lString32 getName() const { return m_Name; }
setName(const lChar32 * value)249 inline void setName(const lChar32 * value) { m_Name = value; }
250
getId()251 inline lString32 getId() const { return m_Id; }
setId(const lChar32 * value)252 inline void setId(const lChar32 * value) { m_Id = value; }
253
getBasedOn()254 inline lString32 getBasedOn() const { return m_basedOn; }
setBasedOn(const lChar32 * value)255 inline void setBasedOn(const lChar32 * value) { m_basedOn = value; }
256 bool isValid() const;
257
getStyleType()258 inline odx_style_type getStyleType() const { return m_type; }
setStyleType(odx_style_type value)259 inline void setStyleType(odx_style_type value) { m_type = value; }
260 odx_Style* getBaseStyle(odx_ImportContext* context);
261 odx_pPr * get_pPr(odx_ImportContext* context);
262 odx_rPr * get_rPr(odx_ImportContext* context);
get_pPrPointer()263 inline odx_pPr * get_pPrPointer() { return &m_pPr; }
get_rPrPointer()264 inline odx_rPr * get_rPrPointer() { return &m_rPr; }
265 odx_StylePropertiesGetter* getStyleProperties(odx_ImportContext* context,
266 odx_style_type styleType);
267 };
268
269 template<int N>
setStyleId(odx_ImportContext * context,const lChar32 * styleId)270 void odx_StylePropertiesContainer<N>::setStyleId(odx_ImportContext *context, const lChar32 *styleId) {
271 m_styleId = styleId;
272 if ( !m_styleId.empty() ) {
273 odx_Style *style = context->getStyle(m_styleId);
274 if( style && (m_styleType == style->getStyleType()) ) {
275 combineWith(style->getStyleProperties(context, m_styleType));
276 }
277 }
278 }
279
280 template<int N>
getStyle(odx_ImportContext * context)281 odx_Style *odx_StylePropertiesContainer<N>::getStyle(odx_ImportContext *context) {
282 odx_Style* ret = NULL;
283
284 if (!m_styleId.empty() ) {
285 ret = context->getStyle(m_styleId);
286 }
287 return ret;
288 }
289
290 /// known docx items name and identifier
291 struct item_def_t {
292 int id;
293 const lChar32 * name;
294 };
295
296 class xml_ElementHandler;
297
298 class docXMLreader : public LVXMLParserCallback
299 {
300 private:
301 enum xml_doc_reader_state {
302 xml_doc_in_start,
303 xml_doc_in_xml_declaration,
304 xml_doc_in_body,
305 xml_doc_in_document
306 };
307 int m_skipTag;
308 xml_doc_reader_state m_state;
309 protected:
310 xml_ElementHandler *m_handler;
311 ldomDocumentWriter *m_writer;
312
isSkipping()313 inline bool isSkipping()
314 {
315 return (m_skipTag != 0);
316 }
317
skipped()318 inline void skipped()
319 {
320 m_skipTag--;
321 }
322
323 public:
324 /// constructor
docXMLreader(ldomDocumentWriter * writer)325 docXMLreader(ldomDocumentWriter *writer) : m_skipTag(0), m_state(xml_doc_in_start),
326 m_handler(NULL), m_writer(writer)
327 {
328 }
329
330 /// destructor
~docXMLreader()331 virtual ~docXMLreader() { }
332 /// called on parsing start
333 virtual void OnStart(LVFileFormatParser *);
334 /// called on parsing end
OnStop()335 virtual void OnStop() { }
336
skip()337 inline void skip()
338 {
339 m_skipTag++;
340 }
341
342 /// called on opening tag <
343 ldomNode * OnTagOpen( const lChar32 * nsname, const lChar32 * tagname);
344
345 /// called after > of opening tag (when entering tag body)
346 void OnTagBody();
347
348 /// called on tag close
349 void OnTagClose( const lChar32 * nsname, const lChar32 * tagname, bool self_closing_tag=false );
350
351 /// called on element attribute
352 void OnAttribute( const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue );
353
354 /// called on text
355 void OnText( const lChar32 * text, int len, lUInt32 flags );
356
357 /// add named BLOB data to document
358 bool OnBlob(lString32 name, const lUInt8 * data, int size);
359
getHandler()360 xml_ElementHandler * getHandler()
361 {
362 return m_handler;
363 }
364
setHandler(xml_ElementHandler * a_handler)365 void setHandler(xml_ElementHandler *a_handler)
366 {
367 m_handler = a_handler;
368 }
369
setWriter(ldomDocumentWriter * writer)370 void setWriter(ldomDocumentWriter *writer)
371 {
372 m_writer = writer;
373 }
374 };
375
376 class xml_ElementHandler
377 {
378 protected:
379 docXMLreader * m_reader;
380 ldomDocumentWriter *m_writer;
381 xml_ElementHandler *m_savedHandler;
382 const item_def_t *m_children;
383 int m_element;
384 int m_state;
385 protected:
xml_ElementHandler(docXMLreader * reader,ldomDocumentWriter * writer,int element,const struct item_def_t * children)386 xml_ElementHandler(docXMLreader * reader, ldomDocumentWriter *writer,
387 int element, const struct item_def_t *children) :
388 m_reader(reader), m_writer(writer), m_children(children), m_element(element),
389 m_state(element)
390 {
391 }
~xml_ElementHandler()392 virtual ~xml_ElementHandler() {}
parseTagName(const lChar32 * tagname)393 virtual int parseTagName(const lChar32 *tagname) {
394 if(m_children)
395 return parse_name(m_children, tagname);
396 return -1;
397 }
398 public:
399 static int parse_name(const struct item_def_t *tags, const lChar32 * nameValue);
400 static void parse_int(const lChar32 * attrValue, css_length_t & result);
401 void setChildrenInfo(const struct item_def_t *tags);
402 ldomNode * handleTagOpen(const lChar32 * nsname, const lChar32 * tagname);
403 virtual ldomNode * handleTagOpen(int tagId);
handleAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)404 void handleAttribute(const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue)
405 {
406 CR_UNUSED(nsname);
407
408 handleAttribute(attrname, attrvalue);
409 }
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)410 virtual void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue) {
411 CR_UNUSED2(attrname, attrvalue);
412 }
handleTagBody()413 virtual void handleTagBody() {}
handleText(const lChar32 * text,int len,lUInt32 flags)414 virtual void handleText( const lChar32 * text, int len, lUInt32 flags ) {
415 CR_UNUSED3(text,len,flags);
416 }
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)417 virtual void handleTagClose( const lChar32 * nsname, const lChar32 * tagname )
418 {
419 CR_UNUSED2(nsname, tagname);
420
421 if(m_state == m_element)
422 stop();
423 else
424 m_state = m_element;
425 }
426 virtual void start();
427 virtual void stop();
428 virtual void reset();
429 };
430
431 class xml_SkipElementHandler : public xml_ElementHandler
432 {
433 public:
xml_SkipElementHandler(docXMLreader * reader,ldomDocumentWriter * writer,int element)434 xml_SkipElementHandler(docXMLreader * reader, ldomDocumentWriter *writer,
435 int element) : xml_ElementHandler(reader, writer, element, NULL) {}
skipElement(int element)436 void skipElement(int element) {
437 m_state = element;
438 start();
439 }
440 };
441
442 class odx_styleTagsHandler
443 {
444 lString32 m_styleTags;
445 int styleTagPos(lChar32 ch);
446 protected:
447 const lChar32 * getStyleTagName( lChar32 ch );
448 void closeStyleTag( lChar32 ch, ldomDocumentWriter *writer);
449 void openStyleTag(lChar32 ch, ldomDocumentWriter *writer);
450 public:
odx_styleTagsHandler()451 odx_styleTagsHandler() {}
452 void openStyleTags(odx_rPr* runProps, ldomDocumentWriter *writer);
453 void closeStyleTags(odx_rPr* runProps, ldomDocumentWriter *writer);
454 void closeStyleTags(ldomDocumentWriter *writer);
455 };
456
457 class odx_titleHandler
458 {
459 public:
460 odx_titleHandler(ldomDocumentWriter *writer, bool useClassName=false) :
m_writer(writer)461 m_writer(writer), m_titleLevel(), m_useClassName(useClassName) {}
~odx_titleHandler()462 virtual ~odx_titleHandler() {}
463 virtual ldomNode* onBodyStart();
464 virtual void onTitleStart(int level, bool noSection = false);
465 virtual void onTitleEnd();
onBodyEnd()466 virtual void onBodyEnd() {}
useClassForTitle()467 bool useClassForTitle() { return m_useClassName; }
468 protected:
469 ldomDocumentWriter *m_writer;
470 int m_titleLevel;
471 bool m_useClassName;
472 };
473
474 class odx_fb2TitleHandler : public odx_titleHandler
475 {
476 public:
odx_fb2TitleHandler(ldomDocumentWriter * writer,bool useClassName)477 odx_fb2TitleHandler(ldomDocumentWriter *writer, bool useClassName) :
478 odx_titleHandler(writer, useClassName), m_hasTitle(false)
479 {}
480 ldomNode* onBodyStart();
481 void onTitleStart(int level, bool noSection = false);
482 void onTitleEnd();
483 private:
484 void makeSection(int startIndex);
485 void openSection(int level);
486 void closeSection(int level);
487 private:
488 ldomNode *m_section;
489 bool m_hasTitle;
490 };
491