1 #include "../include/docxfmt.h"
2 #include "../include/lvtinydom.h"
3 #include "../include/fb2def.h"
4 #include "../include/lvopc.h"
5 #include "../include/crlog.h"
6 #include "odxutil.h"
7 
8 #define DOCX_TAG_NAME(itm) docx_el_##itm##_name
9 #define DOCX_TAG_ID(itm) docx_el_##itm
10 #define DOCX_TAG_CHILD(itm) { DOCX_TAG_ID(itm), DOCX_TAG_NAME(itm) }
11 #define DOCX_LAST_ITEM { -1, NULL }
12 
13 static const lChar32* const docx_DocumentContentType   = U"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
14 static const lChar32* const docx_NumberingContentType  = U"application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml";
15 static const lChar32* const docx_StylesContentType     = U"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
16 static const lChar32* const docx_ImageRelationship     = U"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
17 static const lChar32* const docx_HyperlinkRelationship = U"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
18 static const lChar32* const docx_FootNotesRelationShip = U"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes";
19 static const lChar32* const docx_EndNotesRelationShip  = U"http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes";
20 
21 enum {
22 #define DOCX_NUM_FMT(itm)
23 #define DOCX_TAG(itm) 	DOCX_TAG_ID(itm),
24     docx_el_NULL = 0,
25     #include "docxdtd.inc"
26     docx_el_MAX_ID
27 };
28 
29 #define DOCX_NUM_FMT(itm)
30 #define DOCX_TAG(itm) static const lChar32 * const DOCX_TAG_NAME(itm) = U ## #itm;
31     #include "docxdtd.inc"
32 
33 const struct item_def_t styles_elements[] = {
34     DOCX_TAG_CHILD(styles),
35     DOCX_TAG_CHILD(style),
36     DOCX_TAG_CHILD(docDefaults),
37     DOCX_LAST_ITEM
38 };
39 
40 enum docx_multilevel_type {
41     docx_hybrid_multilevel,
42     docx_multilevel,
43     docx_singlelevel
44 };
45 
46 #define DOCX_NUM_FMT(itm) docx_numFormat_##itm ,
47 #define DOCX_TAG(itm)
48 enum docx_numFormat_type {
49     #include "docxdtd.inc"
50     docx_numFormat_MAX_ID
51 };
52 
53 enum docx_LevelSuffix_type {
54     docx_level_suffix_tab,
55     docx_level_suffix_space,
56     docx_level_suffix_nothing
57 };
58 
59 const struct item_def_t style_elements[] = {
60     DOCX_TAG_CHILD(name),
61     DOCX_TAG_CHILD(basedOn),
62     DOCX_TAG_CHILD(pPr),
63     DOCX_TAG_CHILD(rPr),
64     DOCX_TAG_CHILD(tblPr),
65     DOCX_TAG_CHILD(trPr),
66     DOCX_TAG_CHILD(tcPr),
67     DOCX_LAST_ITEM
68 };
69 
70 const struct item_def_t docDefaults_elements[] = {
71     DOCX_TAG_CHILD(pPr),
72     DOCX_TAG_CHILD(pPrDefault),
73     DOCX_TAG_CHILD(rPrDefault),
74     DOCX_TAG_CHILD(rPr),
75     DOCX_LAST_ITEM
76 };
77 
78 const struct item_def_t rPr_elements[] = {
79     DOCX_TAG_CHILD(b),
80     DOCX_TAG_CHILD(i),
81     DOCX_TAG_CHILD(color),
82     DOCX_TAG_CHILD(jc),
83     DOCX_TAG_CHILD(lang),
84     DOCX_TAG_CHILD(rFonts),
85     DOCX_TAG_CHILD(rStyle),
86     DOCX_TAG_CHILD(u),
87     DOCX_TAG_CHILD(vertAlign),
88     DOCX_TAG_CHILD(sz),
89     DOCX_TAG_CHILD(vanish),
90     DOCX_TAG_CHILD(strike),
91     DOCX_LAST_ITEM
92 };
93 
94 const struct item_def_t numPr_elements[] = {
95     DOCX_TAG_CHILD(ilvl),
96     DOCX_TAG_CHILD(numId),
97     DOCX_LAST_ITEM
98 };
99 
100 const struct item_def_t pPr_elements[] = {
101     DOCX_TAG_CHILD(pageBreakBefore),
102     DOCX_TAG_CHILD(keepNext),
103     DOCX_TAG_CHILD(pStyle),
104     DOCX_TAG_CHILD(jc),
105     DOCX_TAG_CHILD(spacing),
106     DOCX_TAG_CHILD(numPr),
107     DOCX_TAG_CHILD(textAlignment),
108     DOCX_TAG_CHILD(ind),
109     DOCX_TAG_CHILD(suppressAutoHyphens),
110 //    DOCX_TAG_CHILD(rPr), don't care about Paragraph merker formatting
111     DOCX_TAG_CHILD(outlineLvl),
112     DOCX_LAST_ITEM
113 };
114 
115 const struct item_def_t p_elements[] = {
116     DOCX_TAG_CHILD(r),
117     DOCX_TAG_CHILD(pPr),
118     DOCX_TAG_CHILD(hyperlink),
119     DOCX_TAG_CHILD(bookmarkStart),
120     DOCX_LAST_ITEM
121 };
122 
123 const struct item_def_t r_elements[] = {
124     DOCX_TAG_CHILD(br),
125     DOCX_TAG_CHILD(t),
126     DOCX_TAG_CHILD(tab),
127     DOCX_TAG_CHILD(drawing),
128     DOCX_TAG_CHILD(rPr),
129     DOCX_TAG_CHILD(footnoteReference),
130     DOCX_TAG_CHILD(footnoteRef),
131     DOCX_TAG_CHILD(endnoteReference),
132     DOCX_TAG_CHILD(endnoteRef),
133     DOCX_TAG_CHILD(fldChar),
134     DOCX_TAG_CHILD(instrText),
135     DOCX_LAST_ITEM
136 };
137 
138 const struct item_def_t hyperlink_elements[] = {
139     DOCX_TAG_CHILD(r),
140     DOCX_LAST_ITEM
141 };
142 
143 const struct item_def_t drawing_elements[] = {
144     DOCX_TAG_CHILD(blipFill),
145     DOCX_TAG_CHILD(blip),
146     DOCX_TAG_CHILD(graphic),
147     DOCX_TAG_CHILD(graphicData),
148     DOCX_TAG_CHILD(inline),
149     DOCX_TAG_CHILD(anchor),
150     DOCX_TAG_CHILD(pic),
151     DOCX_LAST_ITEM
152 };
153 
154 const struct item_def_t tbl_elements[] = {
155     DOCX_TAG_CHILD(bookmarkStart),
156     DOCX_TAG_CHILD(tblPr),
157     DOCX_TAG_CHILD(tblGrid),
158     DOCX_TAG_CHILD(tcPr),
159     DOCX_TAG_CHILD(gridCol),
160     DOCX_TAG_CHILD(gridSpan),
161     DOCX_TAG_CHILD(tr),
162     DOCX_TAG_CHILD(tc),
163     DOCX_TAG_CHILD(p),
164     DOCX_TAG_CHILD(vMerge),
165     DOCX_LAST_ITEM
166 };
167 
168 const struct item_def_t lvl_elements[] = {
169     DOCX_TAG_CHILD(isLgl),
170     DOCX_TAG_CHILD(lvlJc),
171     DOCX_TAG_CHILD(lvlRestart),
172     DOCX_TAG_CHILD(lvlText),
173     DOCX_TAG_CHILD(numFmt),
174     DOCX_TAG_CHILD(pPr),
175     DOCX_TAG_CHILD(pStyle),
176     DOCX_TAG_CHILD(rPr),
177     DOCX_TAG_CHILD(start),
178     DOCX_TAG_CHILD(suff),
179     DOCX_LAST_ITEM
180 };
181 
182 const struct item_def_t numbering_elements[] = {
183     DOCX_TAG_CHILD(numbering),
184     DOCX_TAG_CHILD(abstractNum),
185     DOCX_TAG_CHILD(num),
186     DOCX_LAST_ITEM
187 };
188 
189 const struct item_def_t abstractNum_elements[] = {
190     DOCX_TAG_CHILD(lvl),
191     DOCX_LAST_ITEM
192 };
193 
194 const struct item_def_t num_elements[] = {
195     DOCX_TAG_CHILD(abstractNumId),
196     DOCX_TAG_CHILD(lvlOverride),
197     DOCX_LAST_ITEM
198 };
199 
200 
201 const struct item_def_t document_elements[] = {
202     DOCX_TAG_CHILD(document),
203     DOCX_TAG_CHILD(body),
204     DOCX_TAG_CHILD(p),
205     DOCX_TAG_CHILD(tbl),
206     DOCX_LAST_ITEM
207 };
208 
209 const struct item_def_t footnotes_elements[] = {
210     DOCX_TAG_CHILD(footnotes),
211     DOCX_TAG_CHILD(footnote),
212     DOCX_TAG_CHILD(endnotes),
213     DOCX_TAG_CHILD(endnote),
214     DOCX_TAG_CHILD(p),
215     DOCX_LAST_ITEM
216 };
217 
218 const struct item_def_t no_elements[] = {
219     DOCX_LAST_ITEM
220 };
221 
222 const struct item_def_t jc_attr_values[] = {
223     { css_ta_left, U"left"},
224     { css_ta_right, U"right" },
225     { css_ta_center, U"center" },
226     { css_ta_justify, U"both" },
227     DOCX_LAST_ITEM
228 };
229 
230 const struct item_def_t vertAlign_attr_values[] = {
231     { css_va_baseline, U"baseline"},
232     { css_va_super, U"superscript" },
233     { css_va_sub, U"subscript" },
234     DOCX_LAST_ITEM
235 };
236 
237 const struct item_def_t textAlignment_attr_values[] = {
238     { css_va_inherit, U"auto" },
239     { css_va_baseline, U"baseline"},
240     { css_va_bottom, U"bottom"},
241     { css_va_middle, U"center" },
242     { css_va_top, U"top" },
243     DOCX_LAST_ITEM
244 };
245 
246 const struct item_def_t lineRule_attr_values[] = {
247     { odx_lineRule_atLeast, U"atLeast" },
248     { odx_lineRule_auto, U"auto"},
249     { odx_lineRule_exact, U"exact"},
250     DOCX_LAST_ITEM
251 };
252 
253 const struct item_def_t styleType_attr_values[] = {
254     { odx_paragraph_style, U"paragraph" },
255     { odx_character_style, U"character"},
256     { odx_numbering_style, U"numbering"},
257     { odx_table_style, U"table"},
258     DOCX_LAST_ITEM
259 };
260 
261 const struct item_def_t lvlSuff_attr_values[] = {
262     { docx_level_suffix_tab, U"tab" },
263     { docx_level_suffix_space, U"space" },
264     { docx_level_suffix_nothing, U"nothing" },
265     DOCX_LAST_ITEM
266 };
267 
268 #define DOCX_TAG(itm)
269 #define DOCX_NUM_FMT(itm) { docx_numFormat_##itm , U ## #itm },
270 const struct item_def_t numFmt_attr_values[] = {
271     #include "docxdtd.inc"
272     DOCX_LAST_ITEM
273 };
274 
DetectDocXFormat(LVStreamRef stream)275 bool DetectDocXFormat( LVStreamRef stream )
276 {
277     LVContainerRef m_arc = LVOpenArchieve( stream );
278     if ( m_arc.isNull() )
279         return false; // not a ZIP archive
280 
281     OpcPackage package(m_arc);
282 
283     return package.partExist(package.getContentPartName(docx_DocumentContentType));
284 }
285 
286 class docxImportContext;
287 
288 class docxNumLevel : public LVRefCounter
289 {
290 private:
291     bool m_isLgl;
292     css_text_align_t m_lvlJc;
293     css_length_t m_ilvl;
294     css_length_t m_lvlRestart;
295     lString32 m_lvlText;
296     bool m_lvlTextNull;
297     docx_numFormat_type m_lvlNumFormat;
298     odx_pPr m_pPr;
299     odx_rPr m_rPr;
300     lString32 m_pStyle;
301     css_length_t m_lvlStart;
302     docx_LevelSuffix_type m_suffix;
303 public:
304     docxNumLevel();
~docxNumLevel()305     virtual ~docxNumLevel() {}
306     void reset();
307     ///properties
isLgl() const308     inline bool isLgl() const { return m_isLgl; }
setLgl(bool value)309     inline void setLgl(bool value) { m_isLgl = value; }
310 
getLevelAlign() const311     inline css_text_align_t getLevelAlign() const { return m_lvlJc; }
setLevelAlign(css_text_align_t value)312     inline void setLevelAlign( css_text_align_t value ) { m_lvlJc = value; }
getLevel() const313     inline css_length_t getLevel() const { return m_ilvl; }
setLevel(const css_length_t & value)314     inline void setLevel(const css_length_t &value) { m_ilvl = value; }
getLevelRestart() const315     inline css_length_t getLevelRestart() const { return m_lvlRestart; }
setLevelRestart(const css_length_t & value)316     inline void setLevelRestart(const css_length_t &value) { m_lvlRestart = value; }
getLevelText() const317     inline lString32 getLevelText() const { return m_lvlText; }
setLevelText(const lString32 value)318     inline void setLevelText(const lString32 value) { m_lvlText = value; }
getLevelTextNull() const319     inline bool getLevelTextNull() const { return m_lvlTextNull; }
setLevelTextNull(const bool value)320     inline void setLevelTextNull(const bool value) { m_lvlTextNull = value; }
getNumberFormat() const321     inline docx_numFormat_type getNumberFormat() const { return m_lvlNumFormat; }
setNumberFormat(const docx_numFormat_type value)322     inline void setNumberFormat(const docx_numFormat_type value) { m_lvlNumFormat = value; }
getReferencedStyleId() const323     inline lString32 getReferencedStyleId() const { return m_pStyle; }
setReferencedStyleId(const lString32 value)324     inline void setReferencedStyleId(const lString32 value) { m_pStyle = value; }
getLevelStart() const325     inline css_length_t getLevelStart() const { return m_lvlStart; }
setLevelStart(const css_length_t & value)326     inline void setLevelStart(const css_length_t &value) { m_lvlStart = value; }
getLevelSuffix() const327     inline docx_LevelSuffix_type getLevelSuffix() const { return m_suffix; }
setLevelSuffix(const docx_LevelSuffix_type value)328     inline void setLevelSuffix(const docx_LevelSuffix_type value) { m_suffix = value; }
get_rPr()329     inline odx_rPr * get_rPr() { return &m_rPr; }
get_pPr()330     inline odx_pPr * get_pPr() { return &m_pPr; }
331     css_list_style_type_t getListType() const;
332 };
333 
334 typedef LVFastRef< docxNumLevel > docxNumLevelRef;
335 
336 class docxAbstractNum : public LVRefCounter
337 {
338 private:
339     docx_multilevel_type m_multilevel;
340     css_length_t m_abstractNumId;
341     LVHashTable<lUInt32, docxNumLevelRef> m_levels;
342 public:
343     docxAbstractNum();
344     docxNumLevel* getLevel(int level);
345     void addLevel(docxNumLevelRef docxLevel);
setId(int id)346     void setId(int id) { m_abstractNumId.value = id; m_abstractNumId.type = css_val_in; }
getId()347     int getId() { return m_abstractNumId.value; }
~docxAbstractNum()348     virtual ~docxAbstractNum() {}
349     void reset();
350 };
351 
352 typedef LVFastRef< docxAbstractNum > docxAbstractNumRef;
353 
354 class docxNum : public LVRefCounter
355 {
356 private:
357     css_length_t m_id;
358     css_length_t m_abstractNumId;
359     LVHashTable<lUInt32, docxNumLevelRef> m_overrides;
360 public:
docxNum()361     docxNum() : m_id(css_val_unspecified, 0), m_abstractNumId(css_val_unspecified, 0),
362         m_overrides(10) {
363     }
364     const docxAbstractNumRef getBase(docxImportContext &context) const;
setId(int id)365     void setId(int id) { m_id.value = id; m_id.type = css_val_in; }
getId() const366     int getId() const { return m_id.value; }
setBaseId(int id)367     void setBaseId(int id) { m_abstractNumId.value = id; m_abstractNumId.type = css_val_in; }
getBaseId() const368     int getBaseId() const { return m_abstractNumId.value; }
369     void overrideLevel(docxNumLevelRef docxLevel);
370     docxNumLevel* getDocxLevel(docxImportContext &context, int level);
371     bool isValid() const;
372     void reset();
373 };
374 
375 typedef LVFastRef< docxNum > docxNumRef;
376 
377 class docxImportContext : public odx_ImportContext
378 {
379 private:
380     LVHashTable<lUInt32, docxAbstractNumRef> m_abstractNumbers;
381     LVHashTable<lUInt32, docxNumRef> m_Numbers;
382     LVArray<css_list_style_type_t> m_ListLevels;
383     OpcPartRef m_docPart;
384     OpcPartRef m_relatedPart;
385     OpcPackage* m_package;
386 public:
387     docxImportContext(OpcPackage *package, ldomDocument * doc);
388     virtual ~docxImportContext();
389     void addNum( docxNumRef num );
390     void addAbstractNum(docxAbstractNumRef abstractNum );
getNum(lUInt32 id)391     docxNumRef getNum(lUInt32 id) { return m_Numbers.get(id); }
getAbstractNum(lUInt32 id)392     docxAbstractNumRef getAbstractNum(lUInt32 id) { return m_abstractNumbers.get(id); }
getImageTarget(lString32 id)393     lString32 getImageTarget(lString32 id) {
394         return getRelationTarget(docx_ImageRelationship, id);
395     }
getLinkTarget(lString32 id)396     lString32 getLinkTarget(lString32 id) {
397         return getRelationTarget(docx_HyperlinkRelationship, id);
398     }
getRelationTarget(const lChar32 * const relationType,lString32 id)399     lString32 getRelationTarget(const lChar32 * const relationType, lString32 id) {
400         if ( !m_relatedPart.isNull() )
401             return m_relatedPart->getRelatedPartName(relationType, id);
402         return m_docPart->getRelatedPartName(relationType, id);
403     }
404     LVStreamRef openContentPart(const lChar32 * const contentType);
405     LVStreamRef openRelatedPart(const lChar32 * const relationshipType);
406     void closeRelatedPart();
407     void openList(int level, int numid, ldomDocumentWriter *writer);
408     void closeList(int level, ldomDocumentWriter *writer);
getListLevel()409     inline int getListLevel() { return m_ListLevels.length(); }
isInList()410     inline bool isInList() { return m_ListLevels.length() != 0; }
411     lString32 m_footNoteId;
412     int m_footNoteCount;
413     int m_endNoteCount;
414     bool m_inField;
415     ldomNode *m_linkNode;
416     odx_Style* m_pStyle;
417 private:
418     lString32 getListStyle(css_list_style_type_t listType);
419 };
420 
421 class docx_ElementHandler : public xml_ElementHandler
422 {
423 protected:
424     docxImportContext *m_importContext;
425 protected:
426     static bool parse_OnOff_attribute(const lChar32 * attrValue);
427     void generateLink(const lChar32 * target, const lChar32 * type, const lChar32 *text);
docx_ElementHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context,int element,const struct item_def_t * children)428     docx_ElementHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context,
429                         int element, const struct item_def_t *children) : xml_ElementHandler(reader, writer, element, children),
430         m_importContext(context)
431     {
432     }
~docx_ElementHandler()433     virtual ~docx_ElementHandler() {}
434 };
435 
436 class docx_rPrHandler : public docx_ElementHandler
437 {
438 private:
439     odx_rPr *m_rPr;
440 public:
docx_rPrHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)441     docx_rPrHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
442         docx_ElementHandler(reader, writer, context, docx_el_rPr, rPr_elements), m_rPr(NULL)
443     {
444     }
445     ldomNode * handleTagOpen(int tagId);
446     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
447     void start(odx_rPr *rPr);
448     void reset();
449 };
450 
451 class docx_drawingHandler  : public docx_ElementHandler
452 {
453 private:
454     int m_level;
455 public:
docx_drawingHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)456     docx_drawingHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
457         docx_ElementHandler(reader, writer, context, docx_el_drawing, drawing_elements), m_level(0)
458     {
459     }
460     ldomNode * handleTagOpen(int tagId);
461     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
462     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
reset()463     void reset() { m_level = 1; }
464 };
465 
466 class docx_pHandler;
467 class docx_rHandler : public docx_ElementHandler
468 {
469 private:
470     odx_rPr m_rPr;
471     docx_pHandler* m_pHandler;
472     docx_rPrHandler m_rPrHandler;
473     lString32 m_footnoteId;
474     lString32 m_instruction;
475     docx_drawingHandler m_drawingHandler;
476     bool m_content;
477 private:
478     void handleInstruction(lString32& instruction, lString32 parameters);
479 public:
docx_rHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context,docx_pHandler * pHandler)480     docx_rHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_pHandler* pHandler) :
481         docx_ElementHandler(reader, writer, context, docx_el_r, r_elements), m_pHandler(pHandler),
482         m_rPrHandler(reader, writer, context),
483         m_drawingHandler(reader, writer, context),
484         m_content(false)
485     {
486     }
487     ldomNode * handleTagOpen(int tagId);
488     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
489     void handleText( const lChar32 * text, int len, lUInt32 flags );
490     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
491     void reset();
492 };
493 
494 class docx_pPrHandler : public docx_ElementHandler
495 {
496 private:
497     odx_pPr *m_pPr;
498 public:
docx_pPrHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)499     docx_pPrHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
500         docx_ElementHandler(reader, writer, context, docx_el_pPr, pPr_elements), m_pPr(NULL)
501     {
502     }
503     ldomNode * handleTagOpen(int tagId);
504     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
505     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
506     void start(odx_pPr *pPr);
507     void reset();
508 };
509 
510 class docx_hyperlinkHandler  : public docx_ElementHandler
511 {
512     docx_rHandler m_rHandler;
513     lString32 m_target;
514     int m_runCount;
515 public:
docx_hyperlinkHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context,docx_pHandler * pHandler)516     docx_hyperlinkHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, docx_pHandler* pHandler) :
517         docx_ElementHandler(reader, writer, context, docx_el_hyperlink, hyperlink_elements),
518         m_rHandler(reader, writer, context, pHandler), m_runCount(0)
519     {
520     }
521     ldomNode * handleTagOpen(int tagId);
522     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
523     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
reset()524     void reset() { m_target.clear(); m_rHandler.reset(); m_runCount = 0; }
525 };
526 
527 class docx_documentHandler;
528 
529 class docx_pHandler : public docx_ElementHandler, public odx_styleTagsHandler
530 {
531 private:
532     docx_pPrHandler m_pPrHandler;
533     odx_pPr m_pPr;
534     docx_rHandler m_rHandler;
535     odx_titleHandler* m_titleHandler;
536     docx_hyperlinkHandler m_hyperlinkHandler;
537     int m_runCount;
538     bool m_inTitle;
539 public:
docx_pHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context,odx_titleHandler * p_documentHandler)540     docx_pHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, odx_titleHandler* p_documentHandler) :
541         docx_ElementHandler(reader, writer, context, docx_el_p, p_elements),
542         m_pPrHandler(reader, writer, context),
543         m_rHandler(reader, writer, context, this),
544         m_titleHandler(p_documentHandler),
545         m_hyperlinkHandler(reader, writer, context, this), m_runCount(0), m_inTitle(false)
546     {
547     }
548     ldomNode * handleTagOpen(int tagId);
549     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
550     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
551     void reset();
552 };
553 
554 struct docx_row_span_info {
555     ldomNode *column;
556     int rows;
docx_row_span_infodocx_row_span_info557     docx_row_span_info() : column(NULL), rows(1) {}
docx_row_span_infodocx_row_span_info558     docx_row_span_info(ldomNode *column) : column(column), rows(1) {}
559 };
560 
561 class docx_tblHandler : public docx_ElementHandler
562 {
563 private:
564     LVArray<int> m_levels;
565     LVArray<docx_row_span_info> m_rowSpaninfo;
566     int m_rowCount;
567     odx_titleHandler m_titleHandler;
568     docx_pHandler m_pHandler;
569     xml_SkipElementHandler m_skipHandler;
570     xml_ElementHandler* m_pHandler_;
571     int m_colSpan;
572     int m_column;
573     int m_columnCount;
574     enum vMergeState_tyep {
575         VMERGE_NONE,
576         VMERGE_RESET,
577         VMERGE_CONTINUE
578     };
579     int m_vMergeState;
580     void endRowSpan(int column);
581 public:
docx_tblHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context,odx_titleHandler * titleHandler)582     docx_tblHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, odx_titleHandler* titleHandler) :
583         docx_ElementHandler(reader, writer, context, docx_el_tbl, tbl_elements),
584         m_rowCount(0), m_titleHandler(writer, titleHandler->useClassForTitle()),
585         m_pHandler(reader, writer, context, &m_titleHandler),
586         m_skipHandler(reader, writer, docx_el_p), m_pHandler_(NULL), m_colSpan(1),
587         m_column(0), m_columnCount(0), m_vMergeState(VMERGE_NONE)
588     {
589     }
590     ldomNode * handleTagOpen(int tagId);
591     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
592     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
593     void reset();
594 };
595 
596 class docx_footnotesHandler : public docx_ElementHandler
597 {
598 private:
599     bool m_normal;
600     int m_pCount;
601     docx_pHandler paragraphHandler;
602 private:
isEndNote()603     bool isEndNote() { return m_element == docx_el_endnotes; }
604 public:
docx_footnotesHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context,int element)605     docx_footnotesHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, int element) :
606         docx_ElementHandler(reader, writer, context, element, footnotes_elements), m_normal(), m_pCount(),
607         paragraphHandler(reader, writer, context, NULL)
608     {
609     }
610     ldomNode * handleTagOpen(int tagId);
611     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
612     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
613 };
614 
615 class docx_documentHandler : public docx_ElementHandler
616 {
617 private:
618     docx_pHandler paragraphHandler;
619     docx_tblHandler m_tableHandler;
620 protected:
621     odx_titleHandler* m_titleHandler;
622 public:
docx_documentHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context,odx_titleHandler * titleHandler)623     docx_documentHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context, odx_titleHandler* titleHandler) :
624         docx_ElementHandler(reader, writer, context, docx_el_document, document_elements),
625         paragraphHandler(reader, writer, context, titleHandler),
626         m_tableHandler(reader, writer, context, titleHandler), m_titleHandler(titleHandler)
627     {
628     }
629     ldomNode * handleTagOpen(int tagId);
630     void handleAttribute(const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue);
631     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
632 };
633 
634 class docx_styleHandler : public docx_ElementHandler
635 {
636 private:
637     docx_pPrHandler m_pPrHandler;
638     docx_rPrHandler m_rPrHandler;
639     odx_StyleRef m_styleRef;
640     odx_Style *m_style;
641 public:
642     /// constructor
docx_styleHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)643     docx_styleHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
644         docx_ElementHandler(reader, writer, context, docx_el_style, style_elements),
645         m_pPrHandler(reader, writer, context),
646         m_rPrHandler(reader, writer, context)
647     {
648     }
649     ldomNode * handleTagOpen(int tagId);
650     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
651     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
652     void start();
653 };
654 
655 class docx_stylesHandler : public docx_ElementHandler
656 {
657 private:
658     docx_styleHandler m_styleHandler;
659     docx_pPrHandler m_pPrHandler;
660     docx_rPrHandler m_rPrHandler;
661 public:
662     /// constructor
docx_stylesHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)663     docx_stylesHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
664         docx_ElementHandler(reader, writer, context, docx_el_styles, styles_elements),
665         m_styleHandler(reader, writer, context),
666         m_pPrHandler(reader, writer, context),
667         m_rPrHandler(reader, writer, context)
668     {
669     }
670     /// destructor
671     ldomNode * handleTagOpen(int tagId);
672     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
673     void reset();
674 };
675 
676 class docx_lvlHandler : public docx_ElementHandler
677 {
678 private:
679     docxNumLevel *m_lvl;
680     docx_pPrHandler m_pPrHandler;
681     docx_rPrHandler m_rPrHandler;
682 public:
683     /// constructor
docx_lvlHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)684     docx_lvlHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
685         docx_ElementHandler(reader, writer, context, docx_el_lvl, lvl_elements),
686         m_pPrHandler(reader, writer, context),
687         m_rPrHandler(reader, writer, context)
688     {
689     }
start(docxNumLevel * level)690     void start(docxNumLevel* level) {
691         m_lvl = level;
692         docx_ElementHandler::start();
693     }
694     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
695     ldomNode * handleTagOpen(int tagId);
696     void reset();
697 };
698 
699 class docx_numHandler : public docx_ElementHandler
700 {
701     docx_lvlHandler m_lvlHandler;
702     docxNumRef m_numRef;
703     docxNumLevelRef m_levelRef;
704 public:
docx_numHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)705     docx_numHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
706         docx_ElementHandler(reader, writer, context, docx_el_num, num_elements),
707         m_lvlHandler(reader, writer, context)
708     {
709     }
710     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
711     ldomNode * handleTagOpen(int tagId);
712     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
713     void start();
714 };
715 
716 class docx_abstractNumHandler : public docx_ElementHandler
717 {
718     docx_lvlHandler m_lvlHandler;
719     docxNumLevelRef m_levelRef;
720     docxAbstractNumRef m_abstractNumRef;
721 public:
docx_abstractNumHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)722     docx_abstractNumHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
723         docx_ElementHandler(reader, writer, context, docx_el_abstractNum, abstractNum_elements),
724         m_lvlHandler(reader, writer, context)
725     {
726     }
727     void handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue);
728     ldomNode * handleTagOpen(int tagId);
729     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
730     void start();
731 };
732 
733 class docx_numberingHandler : public docx_ElementHandler
734 {
735 private:
736     docx_numHandler m_numHandler;
737     docx_abstractNumHandler m_abstractNumHandler;
738 public:
739     /// constructor
docx_numberingHandler(docXMLreader * reader,ldomDocumentWriter * writer,docxImportContext * context)740     docx_numberingHandler(docXMLreader * reader, ldomDocumentWriter *writer, docxImportContext *context) :
741         docx_ElementHandler(reader, writer, context, docx_el_numbering, numbering_elements),
742         m_numHandler(reader, writer, context),
743         m_abstractNumHandler(reader, writer, context)
744     {
745     }
746     ldomNode * handleTagOpen(int tagId);
747     void handleTagClose( const lChar32 * nsname, const lChar32 * tagname );
748 };
749 
750 
docxNumLevel()751 docxNumLevel::docxNumLevel() :
752     m_isLgl(false), m_lvlJc(css_ta_inherit), m_ilvl(css_val_unspecified, 0),
753     m_lvlRestart(css_val_unspecified, 0), m_lvlTextNull(false), m_lvlNumFormat(docx_numFormat_ordinal),
754     m_lvlStart(css_val_unspecified, 0), m_suffix(docx_level_suffix_space)
755 {
756 }
757 
reset()758 void docxNumLevel::reset()
759 {
760     m_isLgl = false;
761     m_lvlJc = css_ta_inherit;
762     m_ilvl.type = css_val_unspecified;
763     m_lvlRestart.type = css_val_unspecified;
764     m_lvlText.clear();
765     m_lvlTextNull = false;
766     m_lvlNumFormat = docx_numFormat_ordinal;
767     m_pPr.reset();
768     m_rPr.reset();
769     m_pStyle.clear();
770     m_lvlStart.type = css_val_unspecified;
771     m_suffix = docx_level_suffix_space;
772 }
773 
getListType() const774 css_list_style_type_t docxNumLevel::getListType() const
775 {
776     if(m_isLgl)
777         return css_lst_decimal;
778     switch(m_lvlNumFormat) {
779     case docx_numFormat_lowerLetter:
780         return css_lst_lower_alpha;
781     case docx_numFormat_lowerRoman:
782         return css_lst_lower_roman;
783     case docx_numFormat_upperLetter:
784         return css_lst_upper_alpha;
785     case docx_numFormat_upperRoman:
786         return css_lst_upper_roman;
787     case docx_numFormat_bullet:
788         if ( getLevelText() == U"\xf0a7" )
789             return css_lst_square;
790         return css_lst_disc;
791     case docx_numFormat_decimal:
792         return css_lst_decimal;
793     default:
794         return css_lst_none;
795     }
796 }
797 
parse_OnOff_attribute(const lChar32 * attrValue)798 bool docx_ElementHandler::parse_OnOff_attribute(const lChar32 * attrValue)
799 {
800     if ( !lStr_cmp(attrValue, "1") || !lStr_cmp(attrValue, "on") || !lStr_cmp(attrValue, "true") )
801         return true;
802     return false;
803 }
804 
generateLink(const lChar32 * target,const lChar32 * type,const lChar32 * text)805 void docx_ElementHandler::generateLink(const lChar32 *target, const lChar32 *type, const lChar32 *text)
806 {
807     m_writer->OnTagOpen(U"", U"a");
808     m_writer->OnAttribute(U"", U"href", target );
809     if(type)
810         m_writer->OnAttribute(U"", U"type", type);
811     // Add classic role=doc-noteref attribute to allow popup/in-page footnotes
812     m_writer->OnAttribute(U"", U"role", U"doc-noteref");
813     m_writer->OnTagBody();
814 #ifndef ODX_CRENGINE_IN_PAGE_FOOTNOTES
815     if( !lStr_cmp(type, "note") ) {
816         // For footnotes (but not endnotes), wrap in <sup> (to get the
817         // same effect as the following in docx.css:
818         //   a[type="note"] { vertical-align: super; font-size: 70%; }
819         m_writer->OnTagOpen(U"", U"sup");
820         m_writer->OnTagBody();
821     }
822 #endif
823     lString32 t(text);
824     m_writer->OnText(t.c_str(), t.length(), 0);
825 #ifndef ODX_CRENGINE_IN_PAGE_FOOTNOTES
826     if( !lStr_cmp(type, "note") ) {
827         m_writer->OnTagClose(U"", U"sup");
828     }
829 #endif
830     m_writer->OnTagClose(U"", U"a");
831 }
832 
handleTagOpen(int tagId)833 ldomNode * docx_rPrHandler::handleTagOpen(int tagId)
834 {
835     m_state = tagId;
836     switch(tagId) {
837     case docx_el_b:
838         m_rPr->setBold(true);
839         break;
840     case docx_el_i:
841         m_rPr->setItalic(true);
842         break;
843     case docx_el_u:
844         m_rPr->setUnderline(true);
845         break;
846     case docx_el_vanish:
847         m_rPr->setHidden(true);
848         break;
849     case docx_el_strike:
850         m_rPr->setStrikeThrough(true);
851         break;
852     default:
853         break;
854     }
855     return NULL;
856 }
857 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)858 void docx_rPrHandler::handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue)
859 {
860     int attr_value;
861     switch(m_state) {
862     case docx_el_lang:
863         if( !lStr_cmp(attrname, "val") ) {
864             if( m_rPr == m_importContext->get_rPrDefault() ) {
865                 m_importContext->setLanguage(attrvalue);
866             }
867         }
868         break;
869     case docx_el_color:
870         // todo
871         break;
872     case docx_el_b:
873         if( !lStr_cmp(attrname, "val") )
874             m_rPr->setBold(parse_OnOff_attribute( attrvalue ));
875         break;
876     case docx_el_i:
877         if( !lStr_cmp(attrname, "val") )
878             m_rPr->setItalic(parse_OnOff_attribute( attrvalue ));
879         break;
880     case docx_el_u:
881         if( !lStr_cmp(attrname, "val") )
882             m_rPr->setUnderline( lStr_cmp(attrvalue, "none") != 0);
883         break;
884     case docx_el_jc:
885         if( !lStr_cmp(attrname, "val") ) {
886             attr_value = parse_name(jc_attr_values, attrvalue);
887             if(attr_value != -1)
888                 m_rPr->setTextAlign((css_text_align_t)attr_value);
889         }
890         break;
891     case docx_el_rFonts:
892         //todo
893         break;
894     case docx_el_rStyle:
895         m_rPr->setStyleId(m_importContext, attrvalue);
896         break;
897     case docx_el_strike:
898         if( !lStr_cmp(attrname, "val") )
899             m_rPr->setStrikeThrough(parse_OnOff_attribute(attrvalue));
900         break;
901     case docx_el_vertAlign:
902         if( !lStr_cmp(attrname, "val") ) {
903             attr_value = parse_name(vertAlign_attr_values, attrvalue);
904             if(attr_value != -1)
905                 m_rPr->setVertAlign((css_vertical_align_t)attr_value);
906         }
907         break;
908     case docx_el_sz:
909         //todo
910         break;
911     case docx_el_vanish:
912         if ( !lStr_cmp(attrname, "val") )
913             m_rPr->setHidden(parse_OnOff_attribute(attrvalue));
914         break;
915     default:
916         break;
917     }
918 }
919 
reset()920 void docx_rPrHandler::reset()
921 {
922     m_state = m_element;
923     if (m_rPr)
924         m_rPr->reset();
925 }
926 
start(odx_rPr * const rPr)927 void docx_rPrHandler::start(odx_rPr * const rPr)
928 {
929     m_rPr = rPr;
930     docx_ElementHandler::start();
931 }
932 
handleInstruction(lString32 & instruction,lString32 parameters)933 void docx_rHandler::handleInstruction(lString32 &instruction, lString32 parameters)
934 {
935     if( instruction == cs32("REF") || instruction == cs32("NOTEREF") || instruction == cs32("PAGEREF") ) {
936         lString32 argument, switches;
937         if( parameters.split2( cs32(" "), argument, switches) && !argument.empty() )
938         {
939             m_importContext->m_linkNode = m_writer->OnTagOpen(U"", U"a");
940             lString32 target = U"#";
941             target  << argument;
942             m_writer->OnAttribute(U"", U"href", target.c_str());
943             m_writer->OnTagBody();
944         }
945     }
946 }
947 
handleTagOpen(int tagId)948 ldomNode *docx_rHandler::handleTagOpen(int tagId)
949 {
950     switch(tagId) {
951     case docx_el_br:
952     case docx_el_t:
953     case docx_el_tab:
954         if( !m_content ) {
955             if( m_importContext->m_pStyle )
956                 m_rPr.combineWith(m_importContext->m_pStyle->get_rPr(m_importContext));
957             m_rPr.combineWith(m_importContext->get_rPrDefault());
958             m_pHandler->closeStyleTags(&m_rPr, m_writer);
959             m_pHandler->openStyleTags(&m_rPr, m_writer);
960             m_content = true;
961         }
962         m_state = tagId;
963         break;
964     case docx_el_rPr:
965         m_rPrHandler.start(&m_rPr);
966         break;
967     case docx_el_footnoteRef:
968     case docx_el_endnoteRef:
969         m_state = tagId;
970         break;
971     case docx_el_drawing:
972         m_drawingHandler.start();
973         break;
974     case docx_el_footnoteReference:
975     case docx_el_endnoteReference:
976         m_footnoteId.clear();
977     default:
978         m_state = tagId;
979         break;
980     }
981     return NULL;
982 }
983 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)984 void docx_rHandler::handleAttribute(const lChar32 *attrname, const lChar32 *attrvalue)
985 {
986     if( (docx_el_footnoteReference == m_state || docx_el_endnoteReference == m_state) &&
987        !lStr_cmp(attrname, "id") ) {
988         m_footnoteId = attrvalue;
989     }
990     if( docx_el_fldChar == m_state && !lStr_cmp(attrname, "fldCharType") ) {
991         if( !lStr_cmp(attrvalue, "begin") ) {
992             m_importContext->m_inField = true;
993         } else if( !lStr_cmp(attrvalue, "end") ) {
994             if( m_importContext->m_linkNode ) {
995                 m_writer->OnTagClose(U"", U"a");
996                 m_importContext->m_linkNode = NULL;
997             }
998             m_importContext->m_inField = false;
999         }
1000     }
1001 }
1002 
handleText(const lChar32 * text,int len,lUInt32 flags)1003 void docx_rHandler::handleText(const lChar32 *text, int len, lUInt32 flags)
1004 {
1005     switch(m_state) {
1006     case docx_el_t:
1007         m_writer->OnText(text, len, flags);
1008         break;
1009     case docx_el_instrText:
1010         m_instruction = text;
1011         break;
1012     default:
1013         break;
1014     }
1015 }
1016 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1017 void docx_rHandler::handleTagClose(const lChar32 *nsname, const lChar32 *tagname)
1018 {
1019     lChar32 nobsp = 0x00A0;
1020     CR_UNUSED2(nsname, tagname);
1021 
1022     switch(m_state) {
1023     case docx_el_br:
1024         m_writer->OnTagOpenAndClose(U"", U"br");
1025         m_state = docx_el_r;
1026         break;
1027     case docx_el_r:
1028         stop();
1029         break;
1030     case docx_el_tab:
1031         m_writer->OnText(&nobsp, 1, 0);
1032         m_state = docx_el_r;
1033         break;
1034     case docx_el_footnoteReference:
1035         if( !m_footnoteId.empty() ) {
1036             m_importContext->m_footNoteCount++;
1037             lString32 target = U"#n_";
1038             target  << m_footnoteId;
1039             generateLink(target.c_str(), U"note", m_footnoteId.c_str());
1040         }
1041         m_state = docx_el_r;
1042         break;
1043     case docx_el_instrText:
1044         if( m_importContext->m_inField ) {
1045             m_instruction.trim();
1046             if ( !m_instruction.empty() ) {
1047                 lString32 instruction, parameters;
1048                 if ( m_instruction.split2(cs32(" "), instruction, parameters) )
1049                     handleInstruction(instruction, parameters);
1050             }
1051         }
1052         m_state = docx_el_r;
1053         break;
1054     case docx_el_endnoteReference:
1055         if( !m_footnoteId.empty() ) {
1056             m_importContext->m_endNoteCount++;
1057             lString32 target = U"#c_";
1058             target  << m_footnoteId;
1059             generateLink(target.c_str(), U"comment", m_footnoteId.c_str());
1060         }
1061         m_state = docx_el_r;
1062         break;
1063     case docx_el_footnoteRef:
1064     case docx_el_endnoteRef:
1065         if(!m_importContext->m_footNoteId.empty()) {
1066             m_writer->OnTagOpen(U"", U"sup");
1067             m_writer->OnTagBody();
1068             m_writer->OnText(m_importContext->m_footNoteId.c_str(), m_importContext->m_footNoteId.length(), 0);
1069             m_writer->OnTagClose(U"", U"sup");
1070         }
1071     default:
1072         m_state = docx_el_r;
1073         break;
1074     }
1075 }
1076 
reset()1077 void docx_rHandler::reset()
1078 {
1079     m_rPrHandler.reset();
1080     m_state = docx_el_r;
1081     m_content = false;
1082 }
1083 
handleTagOpen(int tagId)1084 ldomNode * docx_pPrHandler::handleTagOpen(int tagId)
1085 {
1086     switch(tagId) {
1087     case docx_el_rPr:
1088         break;
1089     case docx_el_numPr:
1090         m_state = tagId;
1091         setChildrenInfo(numPr_elements);
1092         break;
1093     case docx_el_pageBreakBefore:
1094         m_state = tagId;
1095         m_pPr->setPageBreakBefore(true);
1096         break;
1097     case docx_el_keepNext:
1098         m_state = tagId;
1099         m_pPr->setKeepNext(true);
1100         break;
1101     case docx_el_mirrorIndents:
1102         m_state = tagId;
1103         m_pPr->setMirrorIndents(true);
1104         break;
1105     case docx_el_suppressAutoHyphens:
1106         m_pPr->setHyphenate(css_hyph_none);
1107         //fallthrough
1108     default:
1109         m_state = tagId;
1110     }
1111     return NULL;
1112 }
1113 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)1114 void docx_pPrHandler::handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue)
1115 {
1116     switch(m_state) {
1117     case docx_el_pStyle:
1118         if( !lStr_cmp(attrname, "val") ) {
1119              m_pPr->setStyleId(m_importContext, attrvalue);
1120         }
1121         break;
1122     case docx_el_jc:
1123         if( !lStr_cmp(attrname, "val") ) {
1124             int attr_value = parse_name(jc_attr_values, attrvalue);
1125             if(attr_value != -1)
1126                 m_pPr->setTextAlign((css_text_align_t)attr_value);
1127         }
1128         break;
1129     case docx_el_spacing:
1130         if( !lStr_cmp(attrname, "line") ) {
1131             css_length_t val;
1132             parse_int(attrvalue, val);
1133             m_pPr->set(odx_p_line_spacing_prop, val);
1134         } else if( !lStr_cmp(attrname, "lineRule") ) {
1135             int attr_value = parse_name(lineRule_attr_values, attrvalue);
1136             if( -1 != attr_value )
1137                 m_pPr->setLineRule((odx_lineRule_type)attr_value);
1138         } else if ( !lStr_cmp(attrname, "afterAutospacing") ) {
1139             m_pPr->set(odx_p_after_auto_spacing_prop, parse_OnOff_attribute(attrvalue));
1140         } else if ( !lStr_cmp(attrname, "beforeAutospacing") ) {
1141             m_pPr->set(odx_p_before_auto_spacing_prop, parse_OnOff_attribute(attrvalue));
1142         } else {
1143             //todo
1144         }
1145         break;
1146     case docx_el_textAlignment:
1147         if( !lStr_cmp(attrname, "val") ) {
1148             int attr_value = parse_name(textAlignment_attr_values, attrvalue);
1149             if(attr_value != -1)
1150                 m_pPr->setVertAlign((css_vertical_align_t)attr_value);
1151         }
1152         break;
1153     case docx_el_ind:
1154         //todo
1155         break;
1156     case docx_el_ilvl:
1157         if( !lStr_cmp(attrname, "val") ) {
1158             css_length_t val;
1159             parse_int(attrvalue, val);
1160             m_pPr->set(odx_p_ilvl_prop, val.value);
1161         }
1162         break;
1163     case docx_el_numId:
1164         if( !lStr_cmp(attrname, "val") ) {
1165             css_length_t val;
1166             parse_int(attrvalue, val);
1167             m_pPr->set(odx_p_num_id_prop, val);
1168         }
1169         break;
1170     case docx_el_outlineLvl:
1171         if( !lStr_cmp(attrname, "val") ) {
1172             css_length_t val;
1173             parse_int(attrvalue, val);
1174             m_pPr->set(odx_p_outline_level_prop, val);
1175         }
1176         break;
1177     case docx_el_pageBreakBefore:
1178         if( !lStr_cmp(attrname, "val") )
1179             m_pPr->setPageBreakBefore(parse_OnOff_attribute(attrvalue));
1180         break;
1181     case docx_el_keepNext:
1182         if( !lStr_cmp(attrname, "val") )
1183             m_pPr->setKeepNext(parse_OnOff_attribute(attrvalue));
1184         break;
1185     case docx_el_mirrorIndents:
1186         if( !lStr_cmp(attrname, "val") )
1187             m_pPr->setMirrorIndents(parse_OnOff_attribute(attrvalue));
1188         break;
1189     case docx_el_suppressAutoHyphens:
1190         if( !lStr_cmp(attrname, "val") && !parse_OnOff_attribute(attrvalue) )
1191             m_pPr->setHyphenate(css_hyph_auto);
1192         break;
1193     default:
1194         break;
1195     }
1196 }
1197 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1198 void docx_pPrHandler::handleTagClose( const lChar32 * nsname, const lChar32 * tagname )
1199 {
1200     switch(m_state) {
1201     case docx_el_ilvl:
1202     case docx_el_numId:
1203         m_state = docx_el_numPr;
1204         break;
1205     case docx_el_numPr:
1206         setChildrenInfo(pPr_elements);
1207         //falltrrough
1208     default:
1209         docx_ElementHandler::handleTagClose(nsname, tagname);
1210         break;
1211     }
1212 }
1213 
reset()1214 void docx_pPrHandler::reset()
1215 {
1216     if(m_pPr)
1217         m_pPr->reset();
1218 }
1219 
start(odx_pPr * pPr)1220 void docx_pPrHandler::start(odx_pPr *pPr)
1221 {
1222     m_pPr = pPr;
1223     docx_ElementHandler::start();
1224 }
1225 
handleTagOpen(int tagId)1226 ldomNode * docx_pHandler::handleTagOpen(int tagId)
1227 {
1228     switch(tagId) {
1229     case docx_el_r:
1230     case docx_el_hyperlink:
1231         if ( 0 == m_runCount ) {
1232             m_pPr.combineWith(m_importContext->get_pPrDefault());
1233             css_length_t outlineLevel = m_pPr.getOutlineLvl();
1234             m_importContext->m_pStyle = m_pPr.getStyle(m_importContext);
1235             if ( outlineLevel.type != css_val_unspecified ) {
1236                 m_inTitle = true;
1237             }
1238             int numId = m_pPr.getNumberingId();
1239             if( numId != 0 && !m_inTitle ) {
1240                 int level = m_pPr.getNumberingLevel() + 1;
1241                 if( level > m_importContext->getListLevel() )
1242                     m_importContext->openList(level, numId, m_writer);
1243                 else if( level < m_importContext->getListLevel() )
1244                     m_importContext->closeList(level, m_writer);
1245                 else
1246                     m_writer->OnTagClose(U"", U"li");
1247                 m_writer->OnTagOpen(U"", U"li");
1248             } else {
1249                 if( m_importContext->isInList() )
1250                     m_importContext->closeList(0, m_writer);
1251                 if( m_inTitle )
1252                     m_titleHandler->onTitleStart(outlineLevel.value + 1);
1253                 else
1254                     m_writer->OnTagOpen(U"", U"p");
1255             }
1256             lString32 style = m_pPr.getCss();
1257             if( !style.empty() )
1258                 m_writer->OnAttribute(U"", U"style", style.c_str());
1259             m_writer->OnTagBody();
1260         }
1261         if(docx_el_r == tagId)
1262             m_rHandler.start();
1263         else
1264             m_hyperlinkHandler.start();
1265         m_runCount++;
1266         break;
1267     case docx_el_bookmarkStart:
1268         m_state = tagId;
1269         break;
1270         break;
1271     case docx_el_pPr:
1272         m_pPrHandler.start(&m_pPr);
1273         break;
1274     default:
1275         m_state = tagId;
1276         break;
1277     }
1278     return NULL;
1279 }
1280 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)1281 void docx_pHandler::handleAttribute(const lChar32 *attrname, const lChar32 *attrvalue)
1282 {
1283     if( docx_el_bookmarkStart == m_state && !lStr_cmp(attrname, "name") ) {
1284         m_writer->OnTagOpen(U"", U"a");
1285         m_writer->OnAttribute(U"", U"id", attrvalue);
1286         m_writer->OnTagBody();
1287         m_writer->OnTagClose(U"", U"a");
1288     }
1289 }
1290 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1291 void docx_pHandler::handleTagClose( const lChar32 * nsname, const lChar32 * tagname )
1292 {
1293     CR_UNUSED2(nsname, tagname);
1294 
1295     switch(m_state) {
1296     case docx_el_p:
1297         closeStyleTags(m_writer);
1298         if( m_pPr.getNumberingId() == 0 ) {
1299             if( !m_inTitle ) {
1300                 m_writer->OnTagClose(U"", U"p");
1301             }
1302         }
1303         stop();
1304         if( m_inTitle ) {
1305             m_inTitle = false;
1306             m_titleHandler->onTitleEnd();
1307         }
1308         break;
1309     default:
1310         m_state = docx_el_p;
1311         break;
1312     }
1313 }
1314 
reset()1315 void docx_pHandler::reset()
1316 {
1317     m_pPrHandler.reset();
1318     m_rHandler.reset();
1319     m_state = docx_el_p;
1320     m_runCount = 0;
1321 }
1322 
handleTagOpen(int tagId)1323 ldomNode * docx_documentHandler::handleTagOpen(int tagId)
1324 {
1325     if( tagId != docx_el_p && m_importContext->isInList() )
1326         m_importContext->closeList(0, m_writer);
1327     switch(tagId) {
1328     case docx_el_p:
1329         paragraphHandler.start();
1330         break;
1331     case docx_el_tbl:
1332         m_tableHandler.start();
1333         break;
1334     case docx_el_body:
1335         m_titleHandler->onBodyStart();
1336         m_writer->OnTagBody();
1337         //fallthrough
1338     default:
1339         m_state = tagId;
1340         break;
1341     }
1342     return NULL;
1343 }
1344 
handleAttribute(const lChar32 * nsname,const lChar32 * attrname,const lChar32 * attrvalue)1345 void docx_documentHandler::handleAttribute(const lChar32 * nsname, const lChar32 * attrname, const lChar32 * attrvalue)
1346 {
1347     if (m_state == docx_el_document && !lStr_cmp(nsname, "xmlns") )
1348         CRLog::debug("namespace declaration %s:%s",  LCSTR(attrname), LCSTR(attrvalue));
1349 }
1350 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1351 void docx_documentHandler::handleTagClose( const lChar32 * nsname, const lChar32 * tagname )
1352 {
1353     switch(m_state) {
1354     case docx_el_body:
1355         m_titleHandler->onBodyEnd();
1356         m_writer->OnTagClose(nsname, tagname);
1357         break;
1358     default:
1359         break;
1360     }
1361 }
1362 
handleTagOpen(int tagId)1363 ldomNode * docx_styleHandler::handleTagOpen(int tagId)
1364 {
1365     switch(tagId) {
1366     case docx_el_pPr:
1367         m_pPrHandler.start(m_style->get_pPrPointer());
1368         break;
1369     case docx_el_rPr:
1370         m_rPrHandler.start(m_style->get_rPrPointer());
1371         break;
1372     case docx_el_tblPr:
1373     case docx_el_trPr:
1374     case docx_el_tcPr:
1375         m_state = tagId;
1376         break;
1377     default:
1378         m_state = tagId;
1379         break;
1380     }
1381     return NULL;
1382 }
1383 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)1384 void docx_styleHandler::handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue)
1385 {
1386     switch(m_state) {
1387     case docx_el_style:
1388         if ( !lStr_cmp(attrname, "type") ) {
1389             int attr_value = parse_name(styleType_attr_values, attrvalue);
1390             if( -1 != attr_value )
1391                 m_style->setStyleType((odx_style_type)attr_value);
1392         } else if ( !lStr_cmp(attrname, "styleId") ) {
1393             m_style->setId(attrvalue);
1394         }
1395         break;
1396     case docx_el_name:
1397         if ( !lStr_cmp(attrname, "val") )
1398             m_style->setName(attrvalue);
1399         break;
1400     case docx_el_basedOn:
1401         if ( !lStr_cmp(attrname, "val") )
1402             m_style->setBasedOn(attrvalue);
1403         break;
1404     case docx_el_pPr:
1405         break;
1406     case docx_el_rPr:
1407         break;
1408     }
1409 }
1410 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1411 void docx_styleHandler::handleTagClose( const lChar32 * nsname, const lChar32 * tagname )
1412 {
1413     CR_UNUSED2(nsname, tagname);
1414 
1415     switch(m_state) {
1416     case docx_el_style:
1417         if ( m_style->isValid() )
1418             m_importContext->addStyle(m_styleRef);
1419         stop();
1420         break;
1421     default:
1422         m_state = docx_el_style;
1423         break;
1424     }
1425 }
1426 
start()1427 void docx_styleHandler::start()
1428 {
1429     docx_ElementHandler::start();
1430     m_styleRef = odx_StyleRef( new odx_Style );
1431     m_style = m_styleRef.get();
1432     m_state = docx_el_style;
1433 }
1434 
handleTagOpen(int tagId)1435 ldomNode * docx_stylesHandler::handleTagOpen(int tagId)
1436 {
1437     switch(tagId) {
1438     case docx_el_pPr:
1439         m_pPrHandler.start(m_importContext->get_pPrDefault());
1440         break;
1441     case docx_el_rPr:
1442         m_rPrHandler.start(m_importContext->get_rPrDefault());
1443         break;
1444     case docx_el_style:
1445         m_styleHandler.start();
1446         break;
1447     case docx_el_docDefaults:
1448         setChildrenInfo(docDefaults_elements);
1449         //falltrough
1450     default:
1451         m_state = tagId;
1452         break;
1453     }
1454     return NULL;
1455 }
1456 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1457 void docx_stylesHandler::handleTagClose( const lChar32 * nsname, const lChar32 * tagname )
1458 {
1459     switch(m_state) {
1460     case docx_el_rPrDefault:
1461     case docx_el_pPrDefault:
1462         m_state = docx_el_docDefaults;
1463         break;
1464     case docx_el_docDefaults:
1465         setChildrenInfo(styles_elements);
1466         //fallthrough
1467     case docx_el_style:
1468         m_state = docx_el_styles;
1469         break;
1470     case docx_el_styles:
1471         stop();
1472         break;
1473     default:
1474         CRLog::error("Unexpected tag(%s:%)", nsname, tagname);
1475         break;
1476     }
1477 }
1478 
reset()1479 void docx_stylesHandler::reset()
1480 {
1481     m_styleHandler.reset();
1482     m_state = docx_el_NULL;
1483 }
1484 
parseStyles(docxImportContext * importContext)1485 bool parseStyles(docxImportContext *importContext)
1486 {
1487     LVStreamRef m_stream = importContext->openContentPart(docx_StylesContentType);
1488     if ( m_stream.isNull() )
1489         return false;
1490 
1491     docXMLreader docReader(NULL);
1492     docx_stylesHandler stylesHandler(&docReader, NULL, importContext);
1493     docReader.setHandler(&stylesHandler);
1494 
1495     LVXMLParser parser(m_stream, &docReader);
1496 
1497     if ( !parser.Parse() )
1498         return false;
1499     return true;
1500 }
1501 
parseNumbering(docxImportContext * importContext)1502 bool parseNumbering(docxImportContext *importContext)
1503 {
1504     LVStreamRef m_stream = importContext->openContentPart(docx_NumberingContentType);
1505     if ( m_stream.isNull() )
1506         return false;
1507 
1508     docXMLreader docReader(NULL);
1509     docx_numberingHandler numberingHandler(&docReader, NULL, importContext);
1510     docReader.setHandler(&numberingHandler);
1511 
1512     LVXMLParser parser(m_stream, &docReader);
1513 
1514     if ( !parser.Parse() )
1515         return false;
1516     return true;
1517 }
1518 
parseFootnotes(ldomDocumentWriter & writer,docxImportContext & context,int element)1519 void parseFootnotes(ldomDocumentWriter& writer, docxImportContext& context, int element)
1520 {
1521     LVStreamRef m_stream;
1522 
1523     if( element == docx_el_footnotes )
1524         m_stream = context.openRelatedPart(docx_FootNotesRelationShip);
1525     else
1526         m_stream = context.openRelatedPart(docx_EndNotesRelationShip);
1527 
1528     if ( !m_stream.isNull() ) {
1529         docXMLreader docReader(&writer);
1530         docx_footnotesHandler footnotesHandler(&docReader, &writer, &context, element);
1531         docReader.setHandler(&footnotesHandler);
1532 
1533         LVXMLParser parser(m_stream, &docReader);
1534 
1535         if(parser.Parse())
1536 #ifdef ODX_CRENGINE_IN_PAGE_FOOTNOTES
1537             writer.OnTagClose(U"", docx_el_body_name);
1538 #else
1539             // We didn't add <body name=notes> to not trigger crengine auto-in-page-foonotes
1540             // mechanism, so we can tweak them with style tweaks. We used a simple <div> instead.
1541             writer.OnTagClose(U"", U"div");
1542 #endif
1543     }
1544     context.closeRelatedPart();
1545 }
1546 
ImportDocXDocument(LVStreamRef stream,ldomDocument * doc,LVDocViewCallback * progressCallback,CacheLoadingCallback * formatCallback)1547 bool ImportDocXDocument( LVStreamRef stream, ldomDocument * doc, LVDocViewCallback * progressCallback, CacheLoadingCallback * formatCallback )
1548 {
1549     LVContainerRef arc = LVOpenArchieve( stream );
1550     if ( arc.isNull() )
1551         return false; // not a ZIP archive
1552 
1553     doc->setContainer(arc);
1554     OpcPackage package(arc);
1555 
1556     docxImportContext importContext(&package, doc);
1557 
1558     package.readCoreProperties(doc->getProps());
1559 
1560 #if BUILD_LITE!=1
1561     if ( doc->openFromCache(formatCallback) ) {
1562         if ( progressCallback ) {
1563             progressCallback->OnLoadFileEnd( );
1564         }
1565         return true;
1566     }
1567 #endif
1568 
1569     parseNumbering(&importContext);
1570 
1571     if ( !parseStyles(&importContext) )
1572         return false;
1573 
1574     LVStreamRef m_stream = importContext.openContentPart(docx_DocumentContentType);
1575     if ( m_stream.isNull() )
1576         return false;
1577 
1578     ldomDocumentWriter writer(doc);
1579     docXMLreader docReader(&writer);
1580 
1581     importContext.startDocument(writer);
1582 
1583 #ifdef DOCX_FB2_DOM_STRUCTURE
1584     //Two options when dealing with titles: (FB2|HTML)
1585     odx_fb2TitleHandler titleHandler(&writer, DOCX_USE_CLASS_FOR_HEADING); //<section><title>..</title></section>
1586 #else
1587     odx_titleHandler titleHandler(&writer);  //<hx>..</hx>
1588 #endif
1589     docx_documentHandler documentHandler(&docReader, &writer, &importContext, &titleHandler);
1590     docReader.setHandler(&documentHandler);
1591 
1592     LVXMLParser parser(m_stream, &docReader);
1593 
1594     if ( !parser.Parse() )
1595         return false;
1596 
1597     if(importContext.m_footNoteCount > 0) {
1598         parseFootnotes(writer, importContext, docx_el_footnotes);
1599     }
1600     if(importContext.m_endNoteCount > 0) {
1601         parseFootnotes(writer, importContext, docx_el_endnotes);
1602     }
1603 
1604     importContext.endDocument(writer);
1605     writer.OnStop();
1606 
1607     if ( progressCallback ) {
1608         progressCallback->OnLoadFileEnd( );
1609         doc->compact();
1610         doc->dumpStatistics();
1611     }
1612     return true;
1613 }
1614 
docxImportContext(OpcPackage * package,ldomDocument * doc)1615 docxImportContext::docxImportContext(OpcPackage *package, ldomDocument *doc) :
1616     odx_ImportContext(doc), m_abstractNumbers(16),
1617     m_Numbers(16), m_footNoteCount(0), m_endNoteCount(0),
1618     m_inField(false), m_linkNode(NULL), m_pStyle(NULL),
1619     m_package(package)
1620 {
1621 }
1622 
~docxImportContext()1623 docxImportContext::~docxImportContext()
1624 {
1625 }
1626 
addNum(docxNumRef num)1627 void docxImportContext::addNum(docxNumRef num)
1628 {
1629     if ( !num.isNull() ) {
1630         m_Numbers.set(num->getId(), num);
1631     }
1632 }
1633 
addAbstractNum(docxAbstractNumRef abstractNum)1634 void docxImportContext::addAbstractNum(docxAbstractNumRef abstractNum)
1635 {
1636     if ( !abstractNum.isNull() ) {
1637         m_abstractNumbers.set(abstractNum->getId(), abstractNum);
1638     }
1639 }
1640 
openContentPart(const lChar32 * const contentType)1641 LVStreamRef docxImportContext::openContentPart(const lChar32 * const contentType)
1642 {
1643     m_docPart = m_package->getContentPart(contentType);
1644     if( !m_docPart.isNull() ) {
1645         return m_docPart->open();
1646     }
1647     return LVStreamRef();
1648 }
1649 
openRelatedPart(const lChar32 * const relationshipType)1650 LVStreamRef docxImportContext::openRelatedPart(const lChar32 * const relationshipType)
1651 {
1652     if ( !m_docPart.isNull() ) {
1653         m_relatedPart = m_docPart->getRelatedPart(relationshipType);
1654         if ( !m_relatedPart.isNull())
1655             return m_relatedPart->open();
1656     }
1657     return LVStreamRef();
1658 }
1659 
closeRelatedPart()1660 void docxImportContext::closeRelatedPart()
1661 {
1662     if ( !m_relatedPart.isNull() ) {
1663         m_relatedPart.Clear();
1664     }
1665 }
1666 
openList(int level,int numid,ldomDocumentWriter * writer)1667 void docxImportContext::openList(int level, int numid, ldomDocumentWriter *writer)
1668 {
1669     const docxNumRef num = getNum(numid);
1670 
1671     for(int i = getListLevel(); i < level; i++) {
1672         const docxNumLevel* listLevel = NULL;
1673         css_list_style_type_t listType = css_lst_disc;
1674         if ( !num.isNull() )
1675             listLevel = num->getDocxLevel(const_cast<docxImportContext&>(*this), level - 1);
1676         if (listLevel)
1677             listType = listLevel->getListType();
1678         writer->OnTagOpen(U"", U"ol");
1679         m_ListLevels.add(listType);
1680         writer->OnAttribute(U"", U"style", getListStyleCss(listType).c_str());
1681         writer->OnTagBody();
1682         if ( i != level - 1 )
1683             writer->OnTagOpenNoAttr(U"", U"li");
1684     }
1685 }
1686 
closeList(int level,ldomDocumentWriter * writer)1687 void docxImportContext::closeList(int level, ldomDocumentWriter *writer)
1688 {
1689     for(int i = getListLevel(); i > level; i--) {
1690         writer->OnTagClose(U"", U"li");
1691         writer->OnTagClose(U"", U"ol");
1692         m_ListLevels.remove(getListLevel() - 1);
1693     }
1694 }
1695 
handleTagOpen(int tagId)1696 ldomNode * docx_lvlHandler::handleTagOpen(int tagId)
1697 {
1698     switch(tagId) {
1699     case docx_el_pPr:
1700         m_pPrHandler.start(m_lvl->get_pPr());
1701         break;
1702     case docx_el_rPr:
1703         m_rPrHandler.start(m_lvl->get_rPr());
1704         break;
1705     case docx_el_isLgl:
1706         m_lvl->setLgl(true);
1707         //fallthrough
1708     case docx_el_lvlJc:
1709     case docx_el_lvlRestart:
1710     case docx_el_lvlText:
1711     case docx_el_numFmt:
1712     case docx_el_pStyle:
1713     case docx_el_start:
1714     default:
1715         m_state = tagId;
1716         break;
1717     }
1718     return NULL;
1719 }
1720 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)1721 void docx_lvlHandler::handleAttribute(const lChar32 *attrname, const lChar32 *attrvalue)
1722 {
1723     css_length_t result;
1724 
1725     if( !lStr_cmp(attrname, "val") ) {
1726         int attr_value;
1727 
1728         switch(m_state) {
1729         case docx_el_pStyle:
1730             m_lvl->setReferencedStyleId(attrvalue);
1731             break;
1732         case docx_el_lvlJc:
1733             attr_value = parse_name(jc_attr_values, attrvalue);
1734             if(attr_value != -1)
1735                 m_lvl->setLevelAlign((css_text_align_t)attr_value);
1736             break;
1737         case docx_el_isLgl:
1738             m_lvl->setLgl(parse_OnOff_attribute( attrvalue ));
1739             break;
1740         case docx_el_lvlRestart:
1741             parse_int(attrvalue, result);
1742             m_lvl->setLevelRestart(result);
1743             break;
1744         case docx_el_lvlText:
1745             m_lvl->setLevelText(attrvalue);
1746             break;
1747         case docx_el_numFmt:
1748             attr_value = parse_name(numFmt_attr_values, attrvalue);
1749             if( -1 != attr_value )
1750                 m_lvl->setNumberFormat((docx_numFormat_type)attr_value);
1751             break;
1752         case docx_el_start:
1753             parse_int(attrvalue, result);
1754             m_lvl->setLevelStart(result);
1755             break;
1756         case docx_el_suff:
1757             attr_value = parse_name(lvlSuff_attr_values, attrvalue);
1758             if( -1 != attr_value )
1759                 m_lvl->setLevelSuffix((docx_LevelSuffix_type)attr_value);
1760             break;
1761         }
1762     } else if( !lStr_cmp(attrname, "ilvl") ) {
1763         // m_state should be docx_el_lvl
1764         parse_int(attrvalue, result);
1765         m_lvl->setLevel(result);
1766     } else if( !lStr_cmp(attrname, "null") ) {
1767         // m_state should be docx_el_lvl
1768         m_lvl->setLevelTextNull(parse_OnOff_attribute( attrvalue ));
1769     }
1770 }
1771 
reset()1772 void docx_lvlHandler::reset()
1773 {
1774     m_rPrHandler.reset();
1775     m_pPrHandler.reset();
1776     if(m_lvl)
1777         m_lvl->reset();
1778 }
1779 
handleTagOpen(int tagId)1780 ldomNode *docx_footnotesHandler::handleTagOpen(int tagId)
1781 {
1782     switch(tagId) {
1783     case docx_el_p:
1784         if( m_normal && !m_importContext->m_footNoteId.empty() ) {
1785             if( m_pCount == 0 ) {
1786                 m_writer->OnTagOpen(U"", U"section");
1787                 lString32 id = isEndNote() ? U"c_" : U"n_";
1788                 id << m_importContext->m_footNoteId.c_str();
1789                 m_writer->OnAttribute(U"", U"id", id.c_str());
1790                 m_writer->OnAttribute(U"", U"role", isEndNote() ? U"doc-rearnote" : U"doc-footnote");
1791                 m_writer->OnTagBody();
1792             }
1793             paragraphHandler.start();
1794         } else {
1795             m_state = tagId;
1796         }
1797         m_pCount++;
1798         break;
1799     case docx_el_footnote:
1800     case docx_el_endnote:
1801         m_normal = true;
1802         m_importContext->m_footNoteId.clear();
1803         m_pCount = 0;
1804         m_state = tagId;
1805         break;
1806     case docx_el_footnotes:
1807     case docx_el_endnotes:
1808 #ifdef ODX_CRENGINE_IN_PAGE_FOOTNOTES
1809         m_writer->OnTagOpen(U"", docx_el_body_name);
1810         if(isEndNote()) {
1811             m_writer->OnAttribute(U"", U"name", U"comments");
1812             m_writer->OnTagBody();
1813             m_writer->OnTagOpen(U"", U"subtitle");
1814             m_writer->OnTagBody();
1815             m_writer->OnText(U"* * *", 5, 0);
1816             m_writer->OnTagClose(U"", U"subtitle");
1817         } else {
1818             m_writer->OnAttribute(U"", U"name", U"notes");
1819             m_writer->OnTagBody();
1820         }
1821 #else
1822         // We don't add <body name=notes> to not trigger crengine auto-in-page-foonotes
1823         // mechanism, so we can tweak them with style tweaks. We use a simple <div> instead.
1824         m_writer->OnTagOpen(U"", U"div");
1825         m_writer->OnAttribute(U"", U"style", U"page-break-before: always");
1826         m_writer->OnTagBody();
1827 #endif
1828         //fallthrough
1829     default:
1830         m_state = tagId;
1831         break;
1832     }
1833     return NULL;
1834 }
1835 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)1836 void docx_footnotesHandler::handleAttribute(const lChar32 *attrname, const lChar32 *attrvalue)
1837 {
1838     switch(m_state) {
1839     case docx_el_footnote:
1840     case docx_el_endnote:
1841         if( !lStr_cmp(attrname, "type") ) {
1842             if( lStr_cmp(attrvalue, "normal") )
1843                 m_normal = false;
1844         } else if( !lStr_cmp(attrname, "id") )
1845             m_importContext->m_footNoteId.append(attrvalue);
1846         break;
1847     default:
1848         break;
1849     }
1850 }
1851 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1852 void docx_footnotesHandler::handleTagClose(const lChar32 *nsname, const lChar32 *tagname)
1853 {
1854     switch (m_state) {
1855     case docx_el_p:
1856         m_state = isEndNote() ? docx_el_endnote : docx_el_footnote;
1857         break;
1858     case docx_el_endnote:
1859     case docx_el_footnote:
1860         m_writer->OnTagClose(U"", U"section");
1861     default:
1862         docx_ElementHandler::handleTagClose(nsname, tagname);
1863         break;
1864     }
1865 }
1866 
handleTagOpen(int tagId)1867 ldomNode *docx_hyperlinkHandler::handleTagOpen(int tagId)
1868 {
1869     switch(tagId) {
1870     case docx_el_r:
1871         if ( !m_target.empty() && 0 == m_runCount ) {
1872             m_writer->OnTagOpen(U"", U"a");
1873             m_writer->OnAttribute(U"", U"href", m_target.c_str());
1874             m_writer->OnTagBody();
1875         }
1876         m_runCount++;
1877         m_rHandler.start();
1878         break;
1879     default:
1880         m_state = tagId;
1881         break;
1882     }
1883     return NULL;
1884 }
1885 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)1886 void docx_hyperlinkHandler::handleAttribute(const lChar32 *attrname, const lChar32 *attrvalue)
1887 {
1888     if( docx_el_hyperlink == m_state) {
1889         if ( !lStr_cmp(attrname, "id") ) {
1890             m_target = m_importContext->getLinkTarget(lString32(attrvalue));
1891         } else if (!lStr_cmp(attrname, "anchor") && m_target.empty()) {
1892             m_target = cs32("#") + lString32(attrvalue);
1893         }
1894     }
1895 }
1896 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1897 void docx_hyperlinkHandler::handleTagClose(const lChar32 *nsname, const lChar32 *tagname)
1898 {
1899     switch (m_state) {
1900     case docx_el_hyperlink:
1901         if ( !m_target.empty() ) {
1902             m_writer->OnTagClose(U"", U"a");
1903         }
1904     default:
1905         docx_ElementHandler::handleTagClose(nsname, tagname);
1906         break;
1907     }
1908 }
1909 
handleTagOpen(int tagId)1910 ldomNode *docx_drawingHandler::handleTagOpen(int tagId)
1911 {
1912     m_level++;
1913     m_state = tagId;
1914     return NULL;
1915 }
1916 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)1917 void docx_drawingHandler::handleAttribute(const lChar32 *attrname, const lChar32 *attrvalue)
1918 {
1919     if( m_state == docx_el_blip && !lStr_cmp(attrname, "embed") ) {
1920         lString32 imgPath = m_importContext->getImageTarget(lString32(attrvalue));
1921         if( !imgPath.empty() ) {
1922             m_writer->OnTagOpen(U"", U"img");
1923             m_writer->OnAttribute(U"", U"src",  imgPath.c_str());
1924             m_writer->OnTagBody();
1925             m_writer->OnTagClose(U"", U"img", true);
1926         }
1927     }
1928 }
1929 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1930 void docx_drawingHandler::handleTagClose(const lChar32 *nsname, const lChar32 *tagname)
1931 {
1932     CR_UNUSED2(nsname, tagname);
1933 
1934     if(m_level <= 1)
1935         stop();
1936     m_level--;
1937 }
1938 
endRowSpan(int column)1939 void docx_tblHandler::endRowSpan(int column)
1940 {
1941     docx_row_span_info rowSpan = m_rowSpaninfo[column];
1942     if( rowSpan.rows > 1 ) {
1943         CRLog::warn("Row span on column: %d, end: %d", column, rowSpan.rows);
1944         if( rowSpan.column ) {
1945             rowSpan.column->setAttributeValue(LXML_NS_NONE,
1946                                               rowSpan.column->getDocument()->getAttrNameIndex(U"rowspan"),
1947                                               lString32::itoa(rowSpan.rows).c_str());
1948         } else {
1949             CRLog::error("No column node");
1950         }
1951     }
1952 }
1953 
handleTagOpen(int tagId)1954 ldomNode *docx_tblHandler::handleTagOpen(int tagId)
1955 {
1956     bool elementHandled = false;
1957     switch(tagId) {
1958     case docx_el_p:
1959         m_pHandler_->start();
1960         elementHandled = true;
1961         break;
1962     case docx_el_tc:
1963         m_colSpan = 1;
1964         CRLog::warn("Column: %d", m_column);
1965         m_vMergeState = VMERGE_NONE;
1966         break;
1967     case docx_el_vMerge:
1968         m_vMergeState = VMERGE_CONTINUE;
1969         break;
1970     case docx_el_tr:
1971         m_column = 0;
1972         m_writer->OnTagOpenNoAttr(U"", U"tr");
1973         break;
1974     default:
1975         break;
1976     }
1977     if( !elementHandled ) {
1978         m_state = tagId;
1979         m_levels.add(tagId);
1980     }
1981     return NULL;
1982 }
1983 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)1984 void docx_tblHandler::handleAttribute(const lChar32 *attrname, const lChar32 *attrvalue)
1985 {
1986     if( m_state == docx_el_gridSpan && !lStr_cmp( attrname, "val" ) ) {
1987         m_colSpan = lString32(attrvalue).atoi();
1988     } else if( m_state == docx_el_vMerge && !lStr_cmp( attrname, "val" ) ) {
1989         if( !lStr_cmp( attrvalue, "restart" ) )
1990             m_vMergeState = VMERGE_RESET;
1991     }
1992 }
1993 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)1994 void docx_tblHandler::handleTagClose(const lChar32 *nsname, const lChar32 *tagname)
1995 {
1996     CR_UNUSED2(nsname, tagname);
1997 
1998     if( !m_levels.empty() ) {
1999         switch(m_state) {
2000         case docx_el_tblPr:
2001             m_writer->OnTagOpenNoAttr(U"", U"table");
2002             break;
2003         case docx_el_tr:
2004             m_writer->OnTagClose(U"", U"tr");
2005             m_rowCount++;
2006             break;
2007         case docx_el_tc:
2008             m_column++;
2009             if( m_pHandler_ == &m_pHandler )
2010                 m_writer->OnTagClose(U"", U"td");
2011             break;
2012         case docx_el_gridCol:
2013             m_columnCount++;
2014             break;
2015         case docx_el_tblGrid:
2016             if( m_columnCount )
2017                 m_rowSpaninfo.reserve(m_columnCount);
2018             break;
2019         case docx_el_tcPr:
2020             if( VMERGE_NONE == m_vMergeState || VMERGE_RESET == m_vMergeState) {
2021                 m_pHandler_ = &m_pHandler;
2022                 ldomNode *columnNode = m_writer->OnTagOpen(U"", U"td");
2023                 for(int i = 0; i < m_colSpan; i++) {
2024                     if( m_column + i >= m_columnCount )
2025                         break; // shouldn't happen
2026                     endRowSpan(m_column + i);
2027                 }
2028                 m_rowSpaninfo[m_column] = docx_row_span_info(columnNode);
2029                 if( m_colSpan > 1)
2030                     m_writer->OnAttribute(U"", U"colspan", lString32::itoa(m_colSpan).c_str() );
2031                 m_writer->OnTagBody();
2032             } else if ( VMERGE_CONTINUE == m_vMergeState ) {
2033                 m_pHandler_ = &m_skipHandler;
2034                 m_rowSpaninfo[m_column].rows++;
2035             }
2036             m_column += m_colSpan - 1;
2037             break;
2038         default:
2039             break;
2040         }
2041         m_levels.erase(m_levels.length() - 1, 1);
2042         if( !m_levels.empty() ) {
2043             m_state = m_levels[m_levels.length() - 1];
2044         } else {
2045             m_state = docx_el_tbl;
2046         }
2047     } else {
2048         for(int i = 0; i < m_columnCount; i++) {
2049             endRowSpan(i);
2050         }
2051         m_writer->OnTagClose(U"", U"table");
2052         stop();
2053     }
2054 
2055 }
2056 
reset()2057 void docx_tblHandler::reset()
2058 {
2059     m_levels.clear();
2060     m_rowSpaninfo.clear();
2061     m_rowCount = 0;
2062     m_columnCount = 0;
2063 }
2064 
handleTagOpen(int tagId)2065 ldomNode *docx_numberingHandler::handleTagOpen(int tagId)
2066 {
2067     switch(tagId) {
2068     case docx_el_abstractNum:
2069         m_abstractNumHandler.start();
2070         break;
2071     case docx_el_num:
2072         m_numHandler.start();
2073         break;
2074     default:
2075         m_state = tagId;
2076     }
2077     return NULL;
2078 }
2079 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)2080 void docx_numberingHandler::handleTagClose(const lChar32 *nsname, const lChar32 *tagname)
2081 {
2082     switch(m_state) {
2083     case docx_el_num:
2084     case docx_el_abstractNum:
2085         m_state = docx_el_numbering;
2086         break;
2087     case docx_el_numbering:
2088         stop();
2089         break;
2090     default:
2091         CRLog::error("Unexpected tag(%s:%)", nsname, tagname);
2092         break;
2093     }
2094 }
2095 
handleTagOpen(int tagId)2096 ldomNode *docx_abstractNumHandler::handleTagOpen(int tagId)
2097 {
2098     switch(tagId) {
2099     case docx_el_lvl:
2100         if ( !m_levelRef.isNull() )
2101             m_abstractNumRef->addLevel( m_levelRef );
2102         m_levelRef = docxNumLevelRef( new docxNumLevel );
2103         m_lvlHandler.start( m_levelRef.get() );
2104         break;
2105     default:
2106         m_state = tagId;
2107     }
2108     return NULL;
2109 }
2110 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)2111 void docx_abstractNumHandler::handleAttribute(const lChar32 * attrname, const lChar32 * attrvalue)
2112 {
2113     switch(m_state) {
2114     case docx_el_abstractNum:
2115         if ( !lStr_cmp(attrname, "abstractNumId") )
2116             m_abstractNumRef->setId(lString32(attrvalue).atoi());
2117         break;
2118     default:
2119         break;
2120     }
2121 }
2122 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)2123 void docx_abstractNumHandler::handleTagClose(const lChar32 *nsname, const lChar32 *tagname)
2124 {
2125     CR_UNUSED2(nsname, tagname);
2126 
2127     switch(m_state) {
2128     case docx_el_abstractNum:
2129         if ( !m_levelRef.isNull() )
2130             m_abstractNumRef->addLevel( m_levelRef );
2131         if ( !m_abstractNumRef.isNull() )
2132             m_importContext->addAbstractNum( m_abstractNumRef );
2133         stop();
2134         break;
2135     default:
2136         m_state = docx_el_abstractNum;
2137         break;
2138     }
2139 }
2140 
start()2141 void docx_abstractNumHandler::start()
2142 {
2143     m_abstractNumRef = docxAbstractNumRef( new docxAbstractNum );
2144     docx_ElementHandler::start();
2145 }
2146 
handleAttribute(const lChar32 * attrname,const lChar32 * attrvalue)2147 void docx_numHandler::handleAttribute(const lChar32 *attrname, const lChar32 *attrvalue)
2148 {
2149     switch(m_state) {
2150     case docx_el_num:
2151         if ( !lStr_cmp(attrname, "numId") )
2152             m_numRef->setId( lString32(attrvalue).atoi() );
2153         break;
2154     case docx_el_abstractNumId:
2155         if ( !lStr_cmp(attrname, "val") )
2156             m_numRef->setBaseId( lString32(attrvalue).atoi() );
2157         break;
2158     default:
2159         break;
2160     }
2161 }
2162 
handleTagOpen(int tagId)2163 ldomNode *docx_numHandler::handleTagOpen(int tagId)
2164 {
2165     switch(tagId) {
2166     case docx_el_lvl:
2167         if ( !m_levelRef.isNull() )
2168             m_numRef->overrideLevel( m_levelRef );
2169         m_levelRef = docxNumLevelRef( new docxNumLevel );
2170         m_lvlHandler.start( m_levelRef.get() );
2171         break;
2172     default:
2173         m_state = tagId;
2174     }
2175     return NULL;
2176 }
2177 
handleTagClose(const lChar32 * nsname,const lChar32 * tagname)2178 void docx_numHandler::handleTagClose(const lChar32 *nsname, const lChar32 *tagname)
2179 {
2180     CR_UNUSED2(nsname, tagname);
2181 
2182     switch(m_state) {
2183     case docx_el_num:
2184         if ( !m_levelRef.isNull() )
2185             m_numRef->overrideLevel( m_levelRef );
2186         if ( m_numRef->isValid() )
2187             m_importContext->addNum( m_numRef );
2188         stop();
2189         break;
2190     default:
2191         m_state = docx_el_num;
2192         break;
2193     }
2194 }
2195 
start()2196 void docx_numHandler::start()
2197 {
2198     m_numRef = docxNumRef( new docxNum );
2199     docx_ElementHandler::start();
2200 }
2201 
getBase(docxImportContext & context) const2202 const docxAbstractNumRef docxNum::getBase(docxImportContext &context) const
2203 {
2204     return context.getAbstractNum(getBaseId());
2205 }
2206 
overrideLevel(docxNumLevelRef docxLevel)2207 void docxNum::overrideLevel(docxNumLevelRef docxLevel)
2208 {
2209     if( !docxLevel.isNull() )
2210         m_overrides.set(docxLevel->getLevel().value, docxLevel);
2211 }
2212 
getDocxLevel(docxImportContext & context,int level)2213 docxNumLevel *docxNum::getDocxLevel(docxImportContext &context, int level)
2214 {
2215     docxNumLevelRef levelRef = m_overrides.get(level);
2216     if( !levelRef.isNull() )
2217         return levelRef.get();
2218     docxAbstractNumRef baseRef = getBase(context);
2219     if( !baseRef.isNull() )
2220         return baseRef->getLevel(level);
2221     return NULL;
2222 }
2223 
isValid() const2224 bool docxNum::isValid() const
2225 {
2226     return (m_id.type != css_val_unspecified
2227             && m_abstractNumId.type != css_val_unspecified);
2228 }
2229 
reset()2230 void docxNum::reset()
2231 {
2232     m_id.type = css_val_unspecified;
2233     m_abstractNumId.type = css_val_unspecified;
2234     m_overrides.clear();
2235 }
2236 
addLevel(docxNumLevelRef docxLevel)2237 void docxAbstractNum::addLevel(docxNumLevelRef docxLevel)
2238 {
2239     m_levels.set(docxLevel->getLevel().value, docxLevel);
2240 }
2241 
docxAbstractNum()2242 docxAbstractNum::docxAbstractNum() : m_multilevel(docx_singlelevel),
2243     m_abstractNumId(css_val_unspecified, 0), m_levels(10)
2244 {
2245 }
2246 
getLevel(int level)2247 docxNumLevel *docxAbstractNum::getLevel(int level)
2248 {
2249     return m_levels.get(level).get();
2250 }
2251 
reset()2252 void docxAbstractNum::reset()
2253 {
2254     m_levels.clear();
2255 }
2256