1 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
2
3 /* AbiWord
4 * Copyright (C) 1998 AbiSource, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 * 02110-1301 USA.
20 */
21
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "ut_types.h"
26 #include "ut_assert.h"
27 #include "ut_debugmsg.h"
28 #include "ut_path.h"
29 #include "ut_string.h"
30 #include "ut_std_string.h"
31 #include "xap_App.h"
32 #include "xap_Frame.h"
33 #include "ie_impexp_DocBook.h"
34 #include "ie_imp_DocBook.h"
35 #include "ie_impGraphic.h"
36 #include "ie_types.h"
37 #include "fg_GraphicRaster.h"
38 #include "pd_Document.h"
39 #include "ut_growbuf.h"
40 #include "ut_png.h"
41
42 /*****************************************************************/
43 /*****************************************************************/
44
IE_Imp_DocBook_Sniffer(const char * _name)45 IE_Imp_DocBook_Sniffer::IE_Imp_DocBook_Sniffer (const char * _name) :
46 IE_ImpSniffer(_name)
47 {
48 //
49 }
50
51 // supported suffixes
52 static IE_SuffixConfidence IE_Imp_DocBook_Sniffer__SuffixConfidence[] = {
53 { "dbk", UT_CONFIDENCE_PERFECT },
54 { "xml", UT_CONFIDENCE_PERFECT },
55 { "", UT_CONFIDENCE_ZILCH }
56 };
57
getSuffixConfidence()58 const IE_SuffixConfidence * IE_Imp_DocBook_Sniffer::getSuffixConfidence ()
59 {
60 return IE_Imp_DocBook_Sniffer__SuffixConfidence;
61 }
62
recognizeContents(const char * szBuf,UT_uint32)63 UT_Confidence_t IE_Imp_DocBook_Sniffer::recognizeContents(const char * szBuf,
64 UT_uint32 /*iNumbytes*/)
65 {
66 // TODO: scan the first few lines
67
68 if(strstr(szBuf, "PUBLIC \"-//OASIS//DTD DocBook XML") == NULL)
69 return UT_CONFIDENCE_ZILCH;
70
71 return UT_CONFIDENCE_PERFECT;
72 }
73
constructImporter(PD_Document * pDocument,IE_Imp ** ppie)74 UT_Error IE_Imp_DocBook_Sniffer::constructImporter(PD_Document * pDocument,
75 IE_Imp ** ppie)
76 {
77 IE_Imp_DocBook * p = new IE_Imp_DocBook(pDocument);
78 *ppie = p;
79 return UT_OK;
80 }
81
getDlgLabels(const char ** pszDesc,const char ** pszSuffixList,IEFileType * ft)82 bool IE_Imp_DocBook_Sniffer::getDlgLabels(const char ** pszDesc,
83 const char ** pszSuffixList,
84 IEFileType * ft)
85 {
86 *pszDesc = "DocBook (.dbk, .xml)";
87 *pszSuffixList = "*.dbk; *.xml";
88 *ft = getFileType();
89 return true;
90 }
91
92 /*****************************************************************/
93 /*****************************************************************/
94
~IE_Imp_DocBook()95 IE_Imp_DocBook::~IE_Imp_DocBook()
96 {
97 //DELETEP(m_TableHelperStack);
98 }
99
IE_Imp_DocBook(PD_Document * pDocument)100 IE_Imp_DocBook::IE_Imp_DocBook(PD_Document * pDocument)
101 : IE_Imp_XML(pDocument, false),
102 m_iCurListID(AUTO_LIST_RESERVED),
103 m_iBlockDepth(0),
104 m_iDataDepth(0),
105 m_iListDepth(0),
106 m_iFootnotes(0),
107 m_iImages(0),
108 m_iSectionDepth(0),
109 m_iTitleDepth(0), // this counter doesn't include 'sections' like prefaces or dedications
110 m_iNoteID(-1),
111 m_utvTitles(7,1,false),
112 m_bMustAddTitle(false),
113 m_bRequiredBlock(false),
114 m_bWroteBold(false),
115 m_bWroteEntryPara(false),
116 m_bInFrame(false),
117 m_bInIndex(false),
118 m_bInMath(false),
119 m_bInMeta(false),
120 m_bInNote(false),
121 m_bInTable(false),
122 m_bInTOC(false),
123 m_bReadBook(false)
124 {
125 // m_TableHelperStack(new IE_Imp_TableHelperStack());
126 for(int i = 0; i < 7; i++)
127 {
128 m_utvTitles.addItem((fl_AutoNum *)NULL);
129 }
130 }
131
132 /*****************************************************************/
133 /*****************************************************************/
134
135
136 struct _TokenTable
137 {
138 const char * m_name;
139 int m_type;
140 };
141
142 static struct xmlToIdMapping s_Tokens[] =
143 {
144 { "abbrev", TT_ABBREVIATION },
145 { "abstract", TT_ABSTRACT },
146 { "ackno", TT_ACKNO },
147 { "acronym", TT_ACRONYM },
148 { "alt", TT_ALT },
149 { "anchor", TT_BOOKMARK },
150 { "appendix", TT_APPENDIX },
151 { "appendixinfo", TT_APPENDIXINFO },
152 { "application", TT_APPLICATION },
153 { "area", TT_AREA },
154 { "areaset", TT_AREASET },
155 { "areaspec", TT_AREASPEC },
156 { "article", TT_ARTICLE },
157 { "articleinfo", TT_ARTICLEINFO },
158 { "artpagenums", TT_ARTPAGENUMS },
159 { "attribution", TT_ATTRIBUTION },
160 { "author", TT_AUTHOR },
161 { "authorblurb", TT_AUTHORBLURB },
162 { "authorgroup", TT_AUTHORGROUP },
163 { "authorinitials", TT_AUTHORINITIALS },
164 { "beginpage", TT_PAGEBREAK },
165 { "bibliocoverage", TT_BIBLIOCOVERAGE },
166 { "bibliodiv", TT_BIBLIODIV },
167 { "biblioentry", TT_BIBLIOENTRY },
168 { "bibliography", TT_BIBLIOGRAPHY },
169 { "bibliomisc", TT_BIBLIOMISC },
170 { "bibliomixed", TT_BIBLIOMIXED },
171 { "bibliomset", TT_BIBLIOMSET },
172 { "bibliorelation", TT_BIBLIORELATION },
173 { "biblioset", TT_BIBLIOSET },
174 { "bibliosource", TT_BIBLIOSOURCE },
175 { "blockquote", TT_BLOCKQUOTE },
176 { "book", TT_DOCUMENT },
177 { "bookinfo", TT_BOOKINFO },
178 { "bridgehead", TT_BRIDGEHEAD },
179 { "chapter", TT_CHAPTER },
180 { "chapterinfo", TT_CHAPTERINFO },
181 { "citetitle", TT_CITETITLE },
182 { "cmdsynopsis", TT_CMDSYNOPSIS },
183 { "col", TT_COL },
184 { "collab", TT_COLLAB },
185 { "collabname", TT_COLLABNAME },
186 { "colophon", TT_COLOPHON },
187 { "colspec", TT_COLSPEC },
188 { "command", TT_COMMAND },
189 { "copyright", TT_COPYRIGHT },
190 { "date", TT_DATE },
191 { "dedication", TT_DEDICATION },
192 { "edition", TT_EDITION },
193 { "editor", TT_EDITOR },
194 { "email", TT_EMAIL },
195 { "emphasis", TT_EMPHASIS },
196 { "entry", TT_ENTRY },
197 { "entrytbl", TT_ENTRYTBL },
198 { "epigraph", TT_EPIGRAPH },
199 { "equation", TT_EQUATION },
200 { "figure", TT_FIGURE },
201 { "firstname", TT_FIRSTNAME },
202 { "footnote", TT_FOOTNOTE },
203 { "footnoteref", TT_FOOTNOTEREF },
204 { "formalpara", TT_FORMALPARA },
205 { "funcsynopsis", TT_FUNCSYNOPSIS },
206 { "glossary", TT_GLOSSARY },
207 { "graphic", TT_GRAPHIC },
208 { "graphicco", TT_GRAPHICCO },
209 { "holder", TT_HOLDER },
210 { "honorific", TT_HONORIFIC },
211 { "imagedata", TT_IMAGEDATA },
212 { "imageobject", TT_IMAGEOBJECT },
213 { "imageobjectco", TT_IMAGEOBJECTCO },
214 { "index", TT_INDEX },
215 { "indexdiv", TT_INDEXDIV },
216 { "indexentry", TT_INDEXENTRY },
217 { "indexinfo", TT_INDEXINFO },
218 { "indexterm", TT_INDEXTERM },
219 { "informalequation", TT_EQUATION },
220 { "informalfigure", TT_FIGURE },
221 { "informaltable", TT_TABLE },
222 { "inlineequation", TT_EQUATION },
223 { "inlinemediaobject",TT_MEDIAOBJECT },
224 { "invpartnumber", TT_INVPARTNUMBER },
225 { "issuenum", TT_ISSUENUM },
226 { "itemizedlist", TT_ITEMIZEDLIST },
227 { "itermset", TT_ITERMSET },
228 { "keyword", TT_KEYWORD },
229 { "keywordset", TT_KEYWORDSET },
230 { "legalnotice", TT_LEGALNOTICE },
231 { "lineage", TT_LINEAGE },
232 { "link", TT_LINK },
233 { "listitem", TT_LISTITEM },
234 { "literallayout", TT_PLAINTEXT },
235 { "mediaobject", TT_MEDIAOBJECT },
236 { "mediaobjectco", TT_MEDIAOBJECTCO },
237 { "orderedlist", TT_ORDEREDLIST },
238 { "othername", TT_OTHERNAME },
239 { "para", TT_BLOCK },
240 { "part", TT_PART },
241 { "partinfo", TT_PARTINFO },
242 { "partintro", TT_PARTINTRO },
243 { "phrase", TT_PHRASE },
244 { "preface", TT_PREFACE },
245 { "prefaceinfo", TT_PREFACEINFO },
246 { "primary", TT_PRIMARY },
247 { "primaryie", TT_PRIMARYIE },
248 { "printhistory", TT_PRINTHISTORY },
249 { "productname", TT_PRODUCTNAME },
250 { "productnumber", TT_PRODUCTNUMBER },
251 { "programlisting", TT_PLAINTEXT },
252 { "pubdate", TT_PUBDATE },
253 { "publisher", TT_PUBLISHER },
254 { "publishername", TT_PUBLISHERNAME },
255 { "quote", TT_QUOTE },
256 { "refentry", TT_REFENTRY },
257 { "revdescription", TT_REVDESCRIPTION },
258 { "revhistory", TT_REVHISTORY },
259 { "revision", TT_REVISION },
260 { "revnumber", TT_REVNUMBER },
261 { "revremark", TT_REVREMARK },
262 { "row", TT_ROW },
263 { "screen", TT_SCREEN },
264 { "screeninfo", TT_SCREENINFO },
265 { "screenshot", TT_SCREENSHOT },
266 { "secondary", TT_SECONDARY },
267 { "secondaryie", TT_SECONDARYIE },
268 { "sect1", TT_SECTION },
269 { "sect1info", TT_SECTIONINFO },
270 { "sect2", TT_SECTION },
271 { "sect2info", TT_SECTIONINFO },
272 { "sect3", TT_SECTION },
273 { "sect3info", TT_SECTIONINFO },
274 { "sect4", TT_SECTION },
275 { "sect4info", TT_SECTIONINFO },
276 { "sect5", TT_SECTION },
277 { "sect5info", TT_SECTIONINFO },
278 { "section", TT_SECTION },
279 { "sectioninfo", TT_SECTIONINFO },
280 { "see", TT_SEE },
281 { "seealso", TT_SEEALSO },
282 { "seeie", TT_SEEIE },
283 { "seg", TT_SEG },
284 { "seglistitem", TT_SEGLISTITEM },
285 { "segmentedlist", TT_SEGMENTEDLIST },
286 { "set", TT_SET },
287 { "setindex", TT_INDEX },
288 { "sgmltag", TT_SGMLTAG },
289 { "simpara", TT_BLOCK },
290 { "subject", TT_SUBJECT },
291 { "subjectset", TT_SUBJECTSET },
292 { "subjectterm", TT_SUBJECTTERM },
293 { "subscript", TT_SUBSCRIPT },
294 { "superscript", TT_SUPERSCRIPT },
295 { "surname", TT_SURNAME },
296 { "synopsis", TT_SYNOPSIS },
297 { "table", TT_TABLE },
298 { "tbody", TT_TBODY },
299 { "tertiary", TT_TERTIARY },
300 { "tertiaryie", TT_TERTIARYIE },
301 { "textobject", TT_TEXTOBJECT },
302 { "tfoot", TT_TFOOT },
303 { "tgroup", TT_TGROUP },
304 { "thead", TT_THEAD },
305 { "tip", TT_TIP },
306 { "title", TT_TITLE },
307 { "toc", TT_TOC },
308 { "tocback", TT_TOCBACK },
309 { "tocchap", TT_TOCCHAP },
310 { "tocfront", TT_TOCFRONT },
311 { "toclevel1", TT_TOCLEVEL1 },
312 { "toclevel2", TT_TOCLEVEL2 },
313 { "toclevel3", TT_TOCLEVEL3 },
314 { "toclevel4", TT_TOCLEVEL4 },
315 { "toclevel5", TT_TOCLEVEL5 },
316 { "tocpart", TT_TOCPART },
317 { "ulink", TT_ULINK },
318 { "variablelist", TT_VARIABLELIST },
319 { "varname", TT_VARNAME },
320 { "videodata", TT_VIDEODATA },
321 { "videoobject", TT_VIDEOOBJECT },
322 { "volumenum", TT_VOLUMENUM },
323 { "xref", TT_LINK },
324 { "year", TT_YEAR }
325 };
326
327 #define TokenTableSize ((sizeof(s_Tokens)/sizeof(s_Tokens[0])))
328
329 /*****************************************************************/
330 /*****************************************************************/
331
332 #define X_TestParseState(ps) ((m_parseState==(ps)))
333
334 #define X_VerifyParseState(ps) do { if (!(X_TestParseState(ps))) \
335 { UT_DEBUGMSG(("DOM: X_VerifyParseState failed: %s\n", #ps)); \
336 m_error = UT_IE_BOGUSDOCUMENT; \
337 return; } } while (0)
338
339 #define X_CheckDocument(b) do { if (!(b)) \
340 { UT_DEBUGMSG(("DOM: X_CheckDocument failed: %s\n", #b)); \
341 m_error = UT_IE_BOGUSDOCUMENT; \
342 return; } } while (0)
343
344 #define X_CheckError(v) do { if (!(v)) \
345 { UT_DEBUGMSG(("DOM: X_CheckError failed: %s\n", #v)); \
346 m_error = UT_ERROR; \
347 return; } } while (0)
348
349 #define X_EatIfAlreadyError() do { if (m_error) { UT_DEBUGMSG(("Already failed...\n")); return; } } while (0)
350
351 #define CHAPTER_HEADING 1
352 #define SECTION1_HEADING 2
353 #define SECTION2_HEADING 3
354 #define SECTION3_HEADING 4
355 #define SECTION4_HEADING 5
356 #define SECTION5_HEADING 6
357 #define SECTION6_HEADING 7
358
359 /*****************************************************************/
360 /*****************************************************************/
361
startElement(const gchar * name,const gchar ** atts)362 void IE_Imp_DocBook::startElement(const gchar *name,
363 const gchar **atts)
364 {
365 UT_DEBUGMSG(("DocBook import: startElement: %s\n", name));
366
367 // xml parser keeps running until buffer consumed
368 X_EatIfAlreadyError();
369
370 UT_uint32 tokenIndex = _mapNameToToken (name, s_Tokens, TokenTableSize);
371 bool bPush = true;
372
373 if (m_bMustAddTitle)
374 createTitle ();
375 m_bMustAddTitle = false;
376
377 switch (tokenIndex)
378 {
379 case TT_SET:
380 {
381 X_VerifyParseState(_PS_Init);
382 break;
383 }
384
385 case TT_ARTICLE:
386 case TT_DOCUMENT:
387 {
388 /* starts the document */
389 X_VerifyParseState(_PS_Init);
390 m_parseState = _PS_Doc;
391 X_CheckError(appendStrux(PTX_Section,static_cast<const gchar **>(NULL)));
392
393 m_iSectionDepth = 0; /* not in a section, nor a chapter */
394
395 if(tokenIndex == TT_ARTICLE)
396 m_iSectionDepth = 1;
397
398 break;
399 }
400
401 case TT_PART:
402 {
403 X_VerifyParseState(_PS_Doc);
404 X_CheckError(tagTop() == TT_DOCUMENT);
405 break;
406 }
407
408 case TT_CHAPTER:
409 {
410 X_VerifyParseState(_PS_Doc);
411 X_CheckError (m_iSectionDepth == 0);
412 m_iSectionDepth = 1;
413 m_iTitleDepth = 1;
414 /* we'll have to add a number */
415 m_bMustNumber = true;
416
417 break;
418 }
419
420 case TT_SECTION:
421 {
422 if(m_iBlockDepth > 0)
423 m_parseState = _PS_Sec;
424
425 X_CheckError((m_parseState ==_PS_Doc) || (m_parseState ==_PS_Sec));
426 // we must at least be in a chapter or article
427
428 m_parseState = _PS_Sec;
429 X_CheckError (m_iSectionDepth > 0);
430 m_iSectionDepth++;
431 m_iTitleDepth++;
432
433 const gchar *p_val = NULL;
434 p_val = _getXMLPropValue(static_cast<const gchar *>("role"), atts);
435 m_bMustNumber = false;
436
437 if(!p_val)
438 break;
439
440 if(!strcmp(p_val,"abi-frame"))
441 {
442 X_CheckError(appendStrux(PTX_SectionFrame, NULL));
443
444 m_iTitleDepth--;
445 m_bInFrame = true;
446 }
447 else if(!strcmp(p_val,"abi-toc"))
448 {
449 m_iTitleDepth--;
450 m_bInTOC = true; //so we can ignore the TOC title
451 }
452 else if(!strcmp(p_val,"numbered")) // for backward compatibility
453 {
454 m_bMustNumber = true;
455 }
456 else
457 {
458 m_sectionRole = p_val;
459 }
460
461 break;
462 }
463
464 case TT_ACKNO:
465 {
466 X_CheckError(tagTop() == TT_ARTICLE);
467 m_parseState = _PS_Sec;
468 break;
469 }
470
471 case TT_REFENTRY:
472 {
473 X_VerifyParseState(_PS_Sec);
474 break;
475 }
476
477 case TT_REFSYNOPSISDIV:
478 {
479 X_VerifyParseState(_PS_Sec);
480 X_CheckError(tagTop() == TT_REFENTRY);
481 m_parseState = _PS_Block;
482 m_iBlockDepth++;
483 break;
484 }
485
486 case TT_FUNCSYNOPSIS:
487 {
488 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Sec));
489 requireBlock();
490 break;
491 }
492
493 case TT_CMDSYNOPSIS:
494 case TT_EPIGRAPH:
495 {
496 X_CheckError(m_iSectionDepth > 0);
497 m_parseState = _PS_Block;
498 m_iBlockDepth++;
499 break;
500 }
501
502 case TT_BIBLIOGRAPHY:
503 case TT_GLOSSARY:
504 {
505 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Doc));
506 m_parseState = _PS_Sec;
507 m_iSectionDepth++;
508 break;
509 }
510
511 case TT_BIBLIODIV:
512 {
513 X_VerifyParseState(_PS_Sec);
514 X_CheckError(tagTop() == TT_BIBLIOGRAPHY);
515 m_parseState = _PS_Block;
516 m_iBlockDepth++;
517 break;
518 }
519 case TT_BIBLIOMIXED:
520 {
521 X_VerifyParseState(_PS_Sec);
522 m_parseState = _PS_MetaData;
523 m_bInMeta = true;
524 break;
525 }
526
527 case TT_BIBLIOMSET:
528 case TT_BIBLIOSET:
529 {
530 X_VerifyParseState(_PS_MetaData);
531 m_bInMeta = true;
532 break;
533 }
534
535 case TT_BIBLIOENTRY:
536 {
537 X_VerifyParseState(_PS_Sec);
538 m_parseState = _PS_MetaData;
539 m_bInMeta = true;
540 break;
541 }
542
543 case TT_APPENDIX:
544 case TT_COLOPHON:
545 case TT_DEDICATION:
546 case TT_PARTINTRO:
547 case TT_PREFACE:
548 {
549 X_VerifyParseState(_PS_Doc);
550 m_parseState = _PS_Sec;
551 m_iSectionDepth++;
552 break;
553 }
554
555 // Metadata elements:
556
557 case TT_APPENDIXINFO:
558 {
559 X_VerifyParseState(_PS_Sec);
560 X_CheckError(tagTop() == TT_APPENDIX);
561 m_parseState = _PS_MetaData;
562 m_bInMeta = true;
563 break;
564 }
565
566 case TT_PREFACEINFO:
567 {
568 X_VerifyParseState(_PS_Sec);
569 X_CheckError(tagTop() == TT_PREFACE);
570 m_parseState = _PS_MetaData;
571 m_bInMeta = true;
572 break;
573 }
574
575 case TT_CHAPTERINFO:
576 {
577 X_VerifyParseState(_PS_Doc);
578 X_CheckError(tagTop() == TT_CHAPTER);
579 m_parseState = _PS_MetaData;
580 m_bInMeta = true;
581 break;
582 }
583
584 case TT_ARTICLEINFO:
585 {
586 X_VerifyParseState(_PS_Doc);
587 X_CheckError(tagTop() == TT_ARTICLE);
588 m_parseState = _PS_MetaData;
589 m_bInMeta = true;
590 break;
591 }
592
593 case TT_PARTINFO:
594 {
595 X_VerifyParseState(_PS_Doc);
596 X_CheckError(tagTop() == TT_PART);
597 m_parseState = _PS_MetaData;
598 m_bInMeta = true;
599 break;
600 }
601
602 case TT_BOOKINFO:
603 {
604 X_VerifyParseState(_PS_Doc);
605 X_CheckError(tagTop() == TT_DOCUMENT);
606 m_parseState = _PS_MetaData;
607 m_bInMeta = true;
608 break;
609 }
610
611 case TT_SECTIONINFO:
612 {
613 X_VerifyParseState(_PS_Sec);
614 m_parseState = _PS_MetaData;
615 m_bInMeta = true;
616 break;
617 }
618
619 case TT_ARTPAGENUMS:
620 case TT_BIBLIOCOVERAGE:
621 case TT_BIBLIOMISC:
622 case TT_BIBLIORELATION:
623 case TT_BIBLIOSOURCE:
624 case TT_COLLAB:
625 case TT_COPYRIGHT:
626 case TT_EDITION:
627 case TT_ISSUENUM:
628 case TT_ITERMSET:
629 case TT_KEYWORDSET:
630 case TT_LEGALNOTICE:
631 case TT_PRINTHISTORY:
632 case TT_PUBDATE:
633 case TT_PUBLISHER:
634 case TT_SUBJECTSET:
635 case TT_VOLUMENUM:
636 {
637 X_VerifyParseState(_PS_MetaData);
638 m_parseState = _PS_Meta;
639 break;
640 }
641
642 case TT_AUTHOR:
643 {
644 X_CheckError((m_parseState == _PS_MetaData) || (m_parseState == _PS_Block));
645 break;
646 }
647
648 case TT_APPLICATION:
649 case TT_COMMAND:
650 {
651 X_CheckError((m_parseState == _PS_Meta) || (m_parseState == _PS_Block));
652 break;
653 }
654
655 case TT_AUTHORGROUP:
656 case TT_EDITOR:
657 case TT_INVPARTNUMBER:
658 {
659 X_VerifyParseState(_PS_MetaData);
660 break;
661 }
662
663 case TT_HOLDER:
664 case TT_YEAR:
665 {
666 X_VerifyParseState(_PS_Meta);
667 X_CheckError(tagTop() == TT_COPYRIGHT);
668 break;
669 }
670
671 case TT_DATE:
672 {
673 X_CheckError((m_parseState == _PS_Meta) || (m_parseState == _PS_Revision));
674 break;
675 }
676
677 case TT_AUTHORBLURB:
678 {
679 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_MetaData) || (m_parseState == _PS_Block));
680 m_parseState = _PS_Sec;
681 break;
682 }
683
684 case TT_AUTHORINITIALS:
685 {
686 X_CheckError((m_parseState == _PS_Meta) || (m_parseState == _PS_MetaData) || (m_parseState == _PS_Block) || (m_parseState == _PS_Revision));
687 break;
688 }
689
690 case TT_FIRSTNAME:
691 case TT_HONORIFIC:
692 case TT_LINEAGE:
693 case TT_OTHERNAME:
694 case TT_SURNAME:
695 {
696 X_CheckError((m_parseState == _PS_Meta) || (m_parseState == _PS_MetaData) || (m_parseState == _PS_Block));
697 break;
698 }
699
700 case TT_PUBLISHERNAME:
701 {
702 X_CheckError((m_parseState == _PS_MetaData) || (m_parseState == _PS_Meta));
703 m_parseState = _PS_Meta;
704 break;
705 }
706
707 case TT_COLLABNAME:
708 case TT_KEYWORD:
709 case TT_SUBJECT:
710 case TT_SUBJECTTERM:
711 {
712 X_VerifyParseState(_PS_Meta);
713 break;
714 }
715
716 case TT_ABSTRACT:
717 {
718 X_CheckError((m_parseState ==_PS_MetaData) || (m_iSectionDepth > 0));
719
720 if(m_parseState == _PS_MetaData)
721 m_parseState = _PS_Meta;
722 else
723 m_parseState = _PS_Sec;
724
725 break;
726 }
727
728 case TT_FORMALPARA:
729 {
730 X_CheckError((m_parseState ==_PS_List) || (m_parseState ==_PS_Sec));
731 m_parseState = _PS_Sec;
732 break;
733 }
734
735 case TT_BLOCK:
736 {
737 X_CheckError((m_iSectionDepth > 0) || (m_parseState == _PS_Meta));
738
739 if(m_bInTOC)
740 break;
741
742 if(m_parseState != _PS_Meta)
743 {
744 m_parseState = _PS_Block;
745
746 X_CheckError(appendStrux(PTX_Block, NULL));
747
748 m_iBlockDepth++;
749 }
750 else
751 {
752 bPush = false;
753 }
754 break;
755 }
756
757 case TT_BRIDGEHEAD:
758 {
759 X_VerifyParseState(_PS_Sec);
760 m_parseState = _PS_Block;
761 m_iBlockDepth++;
762
763 const gchar *buf[3];
764 buf[2] = NULL;
765
766 const gchar *p_val = NULL;
767 p_val = _getXMLPropValue(static_cast<const gchar *>("renderas"), atts);
768 gchar style_att[10] = "Heading a";
769
770 if(p_val)
771 {
772 if(!strcmp(p_val, "sect1") || !strcmp(p_val, "sect2") || !strcmp(p_val, "sect3") || !strcmp(p_val, "sect4"))
773 {
774 char num = p_val[4];
775 style_att[8] = num;
776 }
777 else if(!strcmp(p_val, "sect5"))
778 {
779 char num = '4';
780 style_att[8] = num; // Heading 5 doesn't exist in AbiWord; use Heading 4 instead
781 }
782 else // renderas could be "other"
783 {
784 char num = '1';
785 style_att[8] = num; // default to Heading 1
786 }
787 }
788 else
789 {
790 char num = '1';
791 style_att[8] = num;
792 }
793
794 X_CheckError(appendStrux(PTX_Block, NULL));
795 buf[0] = PT_STYLE_ATTRIBUTE_NAME;
796 buf[1] = g_strdup(style_att);
797 X_CheckError(appendFmt(const_cast<const gchar **>(buf)));
798 FREEP(buf[1]);
799
800 break;
801 }
802
803 case TT_ITEMIZEDLIST:
804 case TT_ORDEREDLIST:
805 case TT_SEGMENTEDLIST:
806 case TT_VARIABLELIST:
807 {
808 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block) || (m_parseState == _PS_List)); //nested lists are possible
809 m_parseState = _PS_ListSec;
810 m_iListDepth++;
811 break;
812 }
813
814 case TT_SEGLISTITEM:
815 {
816 X_VerifyParseState(_PS_ListSec);
817 X_CheckError(tagTop() == TT_SEGMENTEDLIST);
818 m_parseState = _PS_List;
819 break;
820 }
821
822 case TT_SEG:
823 {
824 X_VerifyParseState(_PS_List);
825 X_CheckError(tagTop() == TT_SEGLISTITEM);
826 m_parseState = _PS_Block;
827 m_iBlockDepth++;
828 break;
829 }
830
831 case TT_LISTITEM:
832 {
833 X_VerifyParseState(_PS_ListSec);
834 m_parseState = _PS_List;
835 break;
836 }
837
838 case TT_BLOCKQUOTE:
839 {
840 X_VerifyParseState(_PS_Sec);
841 m_parseState = _PS_Block;
842 m_iBlockDepth++;
843
844 const gchar *buf[3];
845 buf[0] = PT_STYLE_ATTRIBUTE_NAME;
846 buf[1] = "Block Text";
847 buf[2] = NULL;
848
849 X_CheckError(appendStrux(PTX_Block, NULL));
850 X_CheckError(appendFmt(const_cast<const gchar **>(buf)));
851 break;
852 }
853
854 case TT_PLAINTEXT:
855 {
856 m_parseState = _PS_Block;
857 m_iBlockDepth++;
858
859 const gchar *buf[3];
860 buf[0] = PT_STYLE_ATTRIBUTE_NAME;
861 buf[1] = "Plain Text";
862 buf[2] = NULL;
863
864 X_CheckError(appendStrux(PTX_Block, const_cast<const gchar **>(buf)));
865 m_bWhiteSignificant = true;
866 break;
867 }
868
869 case TT_PHRASE:
870 {
871 X_VerifyParseState(_PS_Block);
872
873 const gchar *p_val = NULL;
874 p_val = _getXMLPropValue(static_cast<const gchar *>("role"), atts);
875
876 const gchar *buf[7];
877 buf[0] = NULL;
878 buf[1] = NULL;
879 buf[2] = NULL;
880 buf[3] = NULL;
881 buf[4] = NULL;
882 buf[5] = NULL;
883 buf[6] = NULL;
884
885 if(p_val)
886 {
887 if(!strcmp(p_val, "strong"))
888 {
889 buf[0] = PT_PROPS_ATTRIBUTE_NAME;
890 buf[1] = "font-weight:bold";
891 X_CheckError(_pushInlineFmt(const_cast<const gchar **>(buf)));
892 X_CheckError(appendFmt(&m_vecInlineFmt));
893 m_bWroteBold = true;
894 }
895 else //possible field
896 {
897 if(!strcmp(p_val, "footnote_ref"))
898 {
899 break; //handled with TT_FOOTNOTEREF
900 }
901 else if(!strcmp(p_val, "footnote_anchor"))
902 {
903 X_CheckError(m_bInNote);
904 UT_UTF8String noteID;
905 UT_UTF8String_sprintf(noteID,"%i",m_iNoteID);
906
907 buf[2] = "footnote-id";
908 buf[3] = (gchar*)g_strdup(noteID.utf8_str());
909 buf[4] = PT_PROPS_ATTRIBUTE_NAME;
910 buf[5] = "text-position:superscript";
911 }
912 buf[0] = PT_TYPE_ATTRIBUTE_NAME;
913 buf[1] = (gchar*)p_val;
914
915 X_CheckError(appendObject(PTO_Field,const_cast<const gchar **>(buf)));
916 m_parseState = _PS_Field;
917 FREEP(buf[3]);
918 }
919 }
920
921 break;
922 }
923
924 case TT_EMPHASIS:
925 case TT_SUPERSCRIPT:
926 case TT_SUBSCRIPT:
927 {
928 X_VerifyParseState(_PS_Block);
929
930 const gchar *buf[3];
931 buf[0] = PT_PROPS_ATTRIBUTE_NAME;
932 buf[1] = NULL;
933 buf[2] = NULL;
934
935 switch(tokenIndex)
936 {
937 case TT_EMPHASIS:
938 buf[1] = "font-style:italic";
939 break;
940 case TT_SUPERSCRIPT:
941 buf[1] = "text-position:superscript";
942 break;
943 case TT_SUBSCRIPT:
944 buf[1] = "text-position:subscript";
945 break;
946 default:
947 UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
948 break;
949 }
950
951 X_CheckError(_pushInlineFmt(const_cast<const gchar **>(buf)));
952 X_CheckError(appendFmt(&m_vecInlineFmt));
953 break;
954 }
955
956 case TT_TITLE:
957 {
958 X_CheckError ((m_parseState == _PS_Doc) || (m_parseState == _PS_Sec) ||
959 (m_parseState == _PS_MetaData) || (m_parseState == _PS_Block) ||
960 (m_parseState == _PS_DataSec) || (m_parseState == _PS_Init) ||
961 (m_parseState == _PS_Table) || (m_parseState == _PS_ListSec));
962
963 m_bTitleAdded = false;
964 m_bMustAddTitle = true;
965
966 if ((m_parseState == _PS_Table) || (m_parseState == _PS_ListSec) || (m_parseState == _PS_Cell) ||
967 (m_parseState == _PS_Init) || (m_parseState == _PS_Cell) || (m_bInTOC))
968 {
969 m_bMustAddTitle = false;
970 }
971 if(((m_iSectionDepth == 1) || (m_iSectionDepth == 0)) && (m_parseState == _PS_Doc) && (m_iTitleDepth == 0))
972 {
973 m_bMustAddTitle = false; // this is for <book> and <article> titles
974 }
975 else if(m_parseState == _PS_MetaData)
976 {
977 m_bMustAddTitle = false;
978 m_parseState = _PS_Meta;
979 }
980
981 break;
982 }
983
984 case TT_PAGEBREAK:
985 {
986 X_CheckError((m_parseState == _PS_Block) || (m_iSectionDepth > 0));
987
988 if(m_parseState == _PS_Block)
989 {
990 UT_UCSChar ucs = UCS_FF;
991 appendSpan(&ucs,1);
992 }
993 else
994 {
995 bPush = false;
996 }
997
998 break;
999 }
1000
1001 case TT_QUOTE:
1002 {
1003 X_CheckError((m_parseState == _PS_Block) || (m_parseState = _PS_Meta));
1004 if(m_parseState == _PS_Block)
1005 {
1006 UT_UCSChar ucs = UCS_LDBLQUOTE;
1007 appendSpan(&ucs,1);
1008 }
1009 else
1010 {
1011 UT_ASSERT_HARMLESS(UT_TODO);
1012 }
1013 break;
1014 }
1015
1016 case TT_CITETITLE:
1017 case TT_PRODUCTNAME:
1018 case TT_PRODUCTNUMBER:
1019 {
1020 X_CheckError ((m_parseState == _PS_Block) || (m_parseState == _PS_MetaData));
1021 break;
1022 }
1023
1024 case TT_ABBREVIATION:
1025 case TT_ACRONYM:
1026 case TT_VARNAME:
1027 {
1028 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_MetaData));
1029 break;
1030 }
1031
1032 case TT_ULINK: //an external link
1033 {
1034 const gchar *buf[3];
1035 buf[2] = NULL;
1036
1037 const gchar *p_val = NULL;
1038 p_val = _getXMLPropValue(static_cast<const gchar *>("url"), atts);
1039
1040 if(p_val)
1041 {
1042 buf[0] = "xlink:href";
1043 buf[1] = (gchar*)p_val;
1044 X_CheckError(appendObject(PTO_Hyperlink, const_cast<const gchar **>(buf)));
1045 }
1046 else
1047 {
1048 UT_ASSERT_HARMLESS(UT_TODO);
1049 }
1050
1051 break;
1052 }
1053
1054 case TT_LINK: //an internal link
1055 {
1056 const gchar *buf[3];
1057 buf[2] = NULL;
1058
1059 const gchar *p_val = NULL;
1060 p_val = _getXMLPropValue(static_cast<const gchar *>("linkend"), atts);
1061
1062 if(p_val)
1063 {
1064 UT_UTF8String link = "#";
1065 link += p_val;
1066
1067 buf[0] = "xlink:href";
1068 buf[1] = (gchar*)link.utf8_str();
1069 X_CheckError(appendObject(PTO_Hyperlink, const_cast<const gchar **>(buf)));
1070 }
1071 else
1072 {
1073 UT_ASSERT_HARMLESS(UT_TODO);
1074 }
1075 break;
1076 }
1077
1078 case TT_ATTRIBUTION:
1079 case TT_EMAIL:
1080 case TT_SGMLTAG:
1081 {
1082 X_VerifyParseState(_PS_Block);
1083 break;
1084 }
1085
1086 case TT_BOOKMARK:
1087 {
1088 X_VerifyParseState(_PS_Block);
1089 const gchar *buf[5];
1090 buf[4] = NULL;
1091
1092 const gchar *p_val = NULL;
1093 p_val = _getXMLPropValue(static_cast<const gchar *>("id"), atts);
1094
1095 if(p_val)
1096 {
1097 buf[0] = PT_TYPE_ATTRIBUTE_NAME;
1098 buf[1] = "start";
1099 buf[2] = PT_NAME_ATTRIBUTE_NAME;
1100 buf[3] = (gchar*)p_val;
1101 X_CheckError(appendObject(PTO_Bookmark, const_cast<const gchar **>(buf)));
1102 buf[1] = "end";
1103 X_CheckError(appendObject(PTO_Bookmark, const_cast<const gchar **>(buf)));
1104 }
1105 else
1106 {
1107 UT_ASSERT_HARMLESS(UT_TODO);
1108 }
1109 break;
1110 }
1111
1112 case TT_FOOTNOTE:
1113 {
1114 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell));
1115 m_parseState = _PS_Sec;
1116 m_bInNote = true;
1117 m_iFootnotes++;
1118
1119 const gchar *buf[3];
1120 UT_UTF8String noteID;
1121
1122 if(m_iNoteID == -1)
1123 {
1124 UT_UTF8String id;
1125
1126 m_iNoteID = m_iFootnotes;
1127 UT_UTF8String_sprintf(id,"%i",m_iNoteID);
1128
1129 const gchar *ref[7];
1130 ref[0] = PT_TYPE_ATTRIBUTE_NAME;
1131 ref[1] = "footnote_ref";
1132 ref[2] = "footnote-id";
1133 ref[3] = (gchar*)g_strdup(id.utf8_str());
1134 ref[4] = PT_PROPS_ATTRIBUTE_NAME;
1135 ref[5] = "text-position:superscript";
1136 ref[6] = NULL;
1137 X_CheckError(appendObject(PTO_Field,const_cast<const gchar **>(ref)));
1138 FREEP(ref[3]);
1139 }
1140
1141 UT_UTF8String_sprintf(noteID,"%i",m_iNoteID);
1142 buf[0] = "footnote-id";
1143 buf[1] = (gchar*)g_strdup(noteID.utf8_str());
1144 buf[2] = NULL;
1145
1146 X_CheckError(appendStrux(PTX_SectionFootnote,const_cast<const gchar **>(buf)));
1147 FREEP(buf[1]);
1148
1149 break;
1150 }
1151
1152 case TT_FOOTNOTEREF:
1153 {
1154 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Field) || (m_parseState == _PS_Cell));
1155 m_parseState = _PS_Field;
1156
1157 const gchar *buf[7];
1158 buf[0] = PT_TYPE_ATTRIBUTE_NAME;
1159 buf[1] = "footnote_ref";
1160 buf[2] = NULL;
1161 buf[3] = NULL;
1162 buf[4] = NULL;
1163 buf[5] = NULL;
1164 buf[6] = NULL;
1165
1166 const gchar *p_val = NULL;
1167 p_val = _getXMLPropValue(static_cast<const gchar *>("linkend"), atts);
1168
1169 if(p_val)
1170 {
1171 if(strstr(p_val,"footnote-id-") != NULL)
1172 {
1173 p_val += 12;
1174 }
1175 else
1176 {
1177 p_val = "-1";
1178 }
1179 m_iNoteID = atoi(p_val);
1180
1181 buf[2] = "footnote-id";
1182 buf[3] = (gchar*)p_val;
1183 buf[4] = PT_PROPS_ATTRIBUTE_NAME;
1184 buf[5] = "text-position:superscript";
1185 buf[6] = NULL;
1186 X_CheckError(appendObject(PTO_Field,const_cast<const gchar **>(buf)));
1187 }
1188 else
1189 {
1190 UT_ASSERT_HARMLESS(UT_TODO);
1191 }
1192 break;
1193 }
1194
1195 // TOC elements:
1196
1197 case TT_TOC:
1198 {
1199 X_VerifyParseState(_PS_Sec);
1200 requireBlock();
1201
1202 X_CheckError(appendStrux(PTX_SectionTOC, NULL));
1203
1204 m_bInTOC = true;
1205 break;
1206 }
1207
1208 case TT_TOCBACK:
1209 case TT_TOCCHAP:
1210 case TT_TOCFRONT:
1211 case TT_TOCLEVEL1:
1212 case TT_TOCLEVEL2:
1213 case TT_TOCLEVEL3:
1214 case TT_TOCLEVEL4:
1215 case TT_TOCLEVEL5:
1216 case TT_TOCPART:
1217 {
1218 X_CheckError(m_bInTOC);
1219 break;
1220 }
1221
1222 // Index elements (not imported):
1223
1224 case TT_INDEX:
1225 {
1226 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Doc));
1227 break;
1228 }
1229
1230 case TT_INDEXTERM:
1231 {
1232 UT_ASSERT_HARMLESS(UT_TODO);
1233 break;
1234 }
1235
1236 case TT_PRIMARY:
1237 case TT_SECONDARY:
1238 case TT_SEE:
1239 case TT_SEEALSO:
1240 case TT_TERTIARY:
1241 {
1242 X_CheckError(tagTop() == TT_INDEXTERM);
1243 break;
1244 }
1245
1246 case TT_INDEXDIV:
1247 case TT_INDEXENTRY:
1248 case TT_INDEXINFO:
1249 case TT_PRIMARYIE:
1250 case TT_SECONDARYIE:
1251 case TT_SEEIE:
1252 case TT_TERTIARYIE:
1253 {
1254 X_CheckError(m_bInIndex);
1255 break;
1256 }
1257
1258 // Table elements:
1259
1260 case TT_TABLE:
1261 {
1262 if((m_parseState == _PS_Doc) && (tagTop() == TT_ARTICLE))
1263 {
1264 m_parseState = _PS_Sec;
1265 }
1266
1267 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_List));
1268 m_parseState = _PS_Table;
1269 //X_CheckError(m_TableHelperStack->OpenTable (getDoc(),static_cast<const char *>(NULL)));
1270 X_CheckError(appendStrux(PTX_SectionTable, NULL));
1271
1272 m_bInTable = true;
1273
1274 break;
1275 }
1276
1277 case TT_ENTRY: //table cell
1278 {
1279 X_VerifyParseState(_PS_Table);
1280 m_parseState = _PS_Cell;
1281
1282 //TODO: use the table helper
1283 X_CheckError(appendStrux(PTX_SectionCell, NULL));
1284
1285 break;
1286 }
1287
1288 case TT_ENTRYTBL: //nested table
1289 {
1290 X_VerifyParseState(_PS_Table);
1291 X_CheckError(appendStrux(PTX_SectionCell,static_cast<const gchar **>(NULL)));
1292 requireBlock();
1293
1294 X_CheckError(appendStrux(PTX_SectionTable, NULL));
1295
1296 m_parseState = _PS_Table; //requireBlock() will reset this
1297 break;
1298 }
1299
1300 case TT_COL:
1301 case TT_COLSPEC:
1302 case TT_ROW:
1303 case TT_TBODY:
1304 case TT_TFOOT:
1305 case TT_TGROUP:
1306 case TT_THEAD:
1307 {
1308 X_VerifyParseState(_PS_Table); //just make sure these elements are in a table
1309 break;
1310 }
1311
1312 // Revision elements:
1313
1314 case TT_REVHISTORY:
1315 {
1316 X_VerifyParseState(_PS_MetaData);
1317 m_parseState = _PS_RevisionSec;
1318 break;
1319 }
1320
1321 case TT_REVISION:
1322 {
1323 X_VerifyParseState(_PS_RevisionSec);
1324 m_parseState = _PS_Revision;
1325 break;
1326 }
1327
1328 case TT_REVREMARK:
1329 case TT_REVDESCRIPTION:
1330 case TT_REVNUMBER:
1331 {
1332 //TODO: import revisions
1333 X_VerifyParseState(_PS_Revision);
1334 break;
1335 }
1336
1337 case TT_EQUATION:
1338 {
1339 X_VerifyParseState(_PS_Block);
1340 m_bInMath = true;
1341 break;
1342 }
1343
1344 case TT_ALT:
1345 {
1346 X_CheckError(m_bInMath);
1347 break;
1348 }
1349
1350 case TT_TEXTOBJECT:
1351 {
1352 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_DataSec));
1353 m_parseState = _PS_Block;
1354 m_iBlockDepth++;
1355 break;
1356 }
1357
1358 case TT_GRAPHICCO:
1359 {
1360 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block));
1361 requireBlock();
1362 m_parseState = _PS_Block;
1363 break;
1364 }
1365
1366 case TT_MEDIAOBJECTCO:
1367 {
1368 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Block) || (m_parseState == _PS_DataSec));
1369 requireBlock();
1370 m_parseState = _PS_DataSec;
1371 m_iDataDepth++;
1372 break;
1373 }
1374
1375 case TT_AREA:
1376 {
1377 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_DataSec));
1378 break;
1379 }
1380
1381 case TT_AREASET:
1382 {
1383 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_DataSec));
1384 X_CheckError(tagTop() == TT_AREASPEC);
1385 break;
1386 }
1387
1388 case TT_AREASPEC:
1389 {
1390 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_DataSec));
1391 break;
1392 }
1393
1394 case TT_SCREENSHOT:
1395 {
1396 if(m_parseState == _PS_Meta)
1397 return; //don't handle images embedded in metadata
1398
1399 requireBlock();
1400 X_VerifyParseState(_PS_Block);
1401 m_parseState = _PS_DataSec;
1402 m_iDataDepth++;
1403 break;
1404 }
1405
1406 case TT_SCREEN:
1407 case TT_SYNOPSIS:
1408 {
1409 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_ListSec) || (m_parseState == _PS_List) || (m_parseState == _PS_Sec));
1410 requireBlock();
1411 m_parseState = _PS_Block;
1412 break;
1413 }
1414
1415 case TT_SCREENINFO:
1416 {
1417 X_VerifyParseState(_PS_DataSec);
1418 X_CheckError(tagTop() == TT_SCREENSHOT);
1419 m_parseState = _PS_Meta;
1420 m_iDataDepth++;
1421 break;
1422 }
1423
1424 case TT_GRAPHIC:
1425 {
1426 if(m_parseState == _PS_Meta)
1427 return; //don't handle images embedded in metadata
1428
1429 requireBlock();
1430 X_CheckError ((m_parseState == _PS_Block) || (m_parseState == _PS_DataSec) || (m_bInMath));
1431 m_parseState = _PS_DataItem;
1432 m_iDataDepth++;
1433
1434 const gchar *p_val = NULL;
1435 p_val = _getXMLPropValue(static_cast<const gchar *>("fileref"), atts);
1436
1437 if(p_val)
1438 createImage (p_val, atts);
1439
1440 break;
1441 }
1442
1443 case TT_FIGURE:
1444 {
1445 requireBlock();
1446 X_VerifyParseState(_PS_Block);
1447 m_parseState = _PS_DataSec;
1448 m_iDataDepth++;
1449 break;
1450 }
1451
1452 case TT_VIDEOOBJECT:
1453 {
1454 X_VerifyParseState(_PS_DataSec);
1455 m_parseState = _PS_DataSec;
1456 m_iDataDepth++;
1457 break;
1458 }
1459
1460 case TT_VIDEODATA:
1461 {
1462 X_CheckError(tagTop() == TT_VIDEOOBJECT);
1463 X_VerifyParseState(_PS_DataSec);
1464 m_parseState = _PS_DataItem;
1465 m_iDataDepth++;
1466 break;
1467 }
1468
1469 case TT_MEDIAOBJECT:
1470 {
1471 requireBlock();
1472 X_CheckError ((m_parseState == _PS_Meta) || (m_parseState == _PS_Block) || (m_parseState == _PS_DataSec));
1473 m_parseState = _PS_DataSec;
1474 m_iDataDepth++;
1475 break;
1476 }
1477
1478 case TT_IMAGEOBJECT:
1479 {
1480 X_CheckError ((m_parseState == _PS_Meta) || (m_parseState == _PS_Block) || (m_parseState == _PS_DataSec));
1481 m_parseState = _PS_DataSec;
1482 m_iDataDepth++;
1483 break;
1484 }
1485
1486 case TT_IMAGEDATA:
1487 {
1488 X_CheckError(tagTop() == TT_IMAGEOBJECT);
1489 m_parseState = _PS_DataItem;
1490 m_iDataDepth++;
1491
1492 const gchar *p_val = NULL;
1493 p_val = _getXMLPropValue(static_cast<const gchar *>("fileref"), atts);
1494
1495 if(p_val)
1496 createImage (p_val, atts);
1497
1498 break;
1499 }
1500
1501 case TT_OTHER:
1502 default:
1503 UT_ASSERT_HARMLESS(UT_TODO);
1504 UT_DEBUGMSG(("Unknown or knowingly unhandled tag [%s]\n",name));
1505 break;
1506 }
1507
1508 if(bPush)
1509 {
1510 m_utnsTagStack.push(tokenIndex);
1511 xxx_UT_DEBUGMSG(("Pushing %d onto stack\n",tokenIndex));
1512 }
1513 }
1514
endElement(const gchar * name)1515 void IE_Imp_DocBook::endElement(const gchar *name)
1516 {
1517
1518 UT_DEBUGMSG(("DocBook import: endElement: %s\n", name));
1519
1520 // xml parser keeps running until buffer consumed
1521 X_EatIfAlreadyError();
1522
1523 UT_uint32 tokenIndex = _mapNameToToken (name, s_Tokens, TokenTableSize);
1524 bool bPop = true;
1525
1526 switch (tokenIndex)
1527 {
1528 case TT_SET:
1529 {
1530 X_VerifyParseState(_PS_Init);
1531 break;
1532 }
1533
1534 case TT_DOCUMENT:
1535 {
1536 X_VerifyParseState(_PS_Doc);
1537 m_parseState = _PS_Init;
1538 // we've already imported a <book>, so we don't want to set any more metadata (a <set> can
1539 // contain more than one <book>)
1540 m_bReadBook = true;
1541 break;
1542 }
1543
1544 case TT_PART:
1545 {
1546 X_VerifyParseState(_PS_Doc);
1547 break;
1548 }
1549
1550 case TT_ARTICLE:
1551 {
1552 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Doc));
1553 m_parseState = _PS_Doc;
1554 m_iSectionDepth = 0;
1555 break;
1556 }
1557
1558 case TT_CHAPTER:
1559 {
1560 X_VerifyParseState(_PS_Sec);
1561 m_parseState = _PS_Doc;
1562 m_iSectionDepth = 0;
1563 m_iTitleDepth = 0;
1564 m_bMustNumber = false;
1565 break;
1566 }
1567
1568 case TT_SECTION:
1569 {
1570 if(m_iBlockDepth > 0) //can happen if requireBlock() was used
1571 {
1572 m_parseState = _PS_Sec;
1573 m_iBlockDepth = 0;
1574 }
1575
1576 X_VerifyParseState(_PS_Sec);
1577 m_iSectionDepth--;
1578
1579 if(!m_bInTOC && !m_bInFrame)
1580 m_iTitleDepth--;
1581
1582 if(m_iSectionDepth == 0)
1583 m_parseState = _PS_Doc;
1584
1585 if(m_bInFrame)
1586 {
1587 X_CheckError(appendStrux(PTX_EndFrame,NULL));
1588 m_bInFrame = false;
1589 }
1590 else if(m_bInTOC)
1591 {
1592 m_bInTOC = false;
1593 }
1594
1595 m_bMustNumber = false;
1596 break;
1597 }
1598
1599 case TT_ACKNO:
1600 {
1601 X_VerifyParseState(_PS_Sec);
1602 m_parseState = _PS_Doc;
1603 break;
1604 }
1605
1606 case TT_REFENTRY:
1607 {
1608 X_VerifyParseState(_PS_Sec);
1609 break;
1610 }
1611
1612 case TT_FUNCSYNOPSIS:
1613 {
1614 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Sec));
1615
1616 if(m_bRequiredBlock)
1617 {
1618 m_bRequiredBlock = false;
1619 m_iBlockDepth--;
1620 }
1621
1622 if(m_iBlockDepth == 0)
1623 m_parseState = _PS_Sec;
1624
1625 break;
1626 }
1627
1628 case TT_CMDSYNOPSIS:
1629 {
1630 X_VerifyParseState(_PS_Block);
1631 m_iBlockDepth--;
1632
1633 if(m_iBlockDepth == 0)
1634 m_parseState = _PS_Sec;
1635
1636 break;
1637 }
1638
1639 case TT_EPIGRAPH:
1640 case TT_REFSYNOPSISDIV:
1641 {
1642 X_VerifyParseState(_PS_Block);
1643 m_parseState = _PS_Sec;
1644 m_iBlockDepth--;
1645 break;
1646 }
1647
1648 case TT_BIBLIOGRAPHY:
1649 case TT_GLOSSARY:
1650 {
1651 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Doc));
1652 m_iSectionDepth--;
1653
1654 if(m_iSectionDepth == 0)
1655 m_parseState = _PS_Doc;
1656
1657 break;
1658 }
1659
1660 case TT_BIBLIODIV:
1661 {
1662 X_VerifyParseState(_PS_Sec);
1663 m_iBlockDepth--;
1664 break;
1665 }
1666
1667 case TT_BIBLIOMIXED:
1668 {
1669 X_VerifyParseState(_PS_MetaData);
1670 m_bInMeta = false;
1671 m_parseState = _PS_Sec;
1672 break;
1673 }
1674
1675 case TT_BIBLIOENTRY:
1676 {
1677 X_VerifyParseState(_PS_MetaData);
1678 m_bInMeta = false;
1679 m_parseState = _PS_Sec;
1680
1681 break;
1682 }
1683
1684 case TT_APPENDIX:
1685 case TT_COLOPHON:
1686 case TT_DEDICATION:
1687 case TT_PARTINTRO:
1688 case TT_PREFACE:
1689 {
1690 X_VerifyParseState(_PS_Sec);
1691 m_parseState = _PS_Doc;
1692 m_iSectionDepth--;
1693 break;
1694 }
1695
1696 // Metadata elements:
1697
1698 case TT_APPENDIXINFO:
1699 case TT_ARTICLEINFO:
1700 case TT_BOOKINFO:
1701 case TT_CHAPTERINFO:
1702 case TT_PARTINFO:
1703 case TT_PREFACEINFO:
1704 {
1705 X_VerifyParseState(_PS_MetaData);
1706 m_parseState = _PS_Doc;
1707 m_bInMeta = false;
1708 break;
1709 }
1710
1711 case TT_SECTIONINFO:
1712 {
1713 X_VerifyParseState(_PS_MetaData);
1714 m_parseState = _PS_Sec;
1715 m_bInMeta = false;
1716 break;
1717 }
1718
1719 case TT_ARTPAGENUMS:
1720 case TT_BIBLIOCOVERAGE:
1721 case TT_BIBLIOMISC:
1722 case TT_BIBLIORELATION:
1723 case TT_BIBLIOSOURCE:
1724 case TT_COLLAB:
1725 case TT_COPYRIGHT:
1726 case TT_EDITION:
1727 case TT_ISSUENUM:
1728 case TT_ITERMSET:
1729 case TT_KEYWORDSET:
1730 case TT_LEGALNOTICE:
1731 case TT_PRINTHISTORY:
1732 case TT_PUBDATE:
1733 case TT_PUBLISHERNAME:
1734 case TT_SUBJECTSET:
1735 case TT_VOLUMENUM:
1736 {
1737 X_VerifyParseState(_PS_Meta);
1738 m_parseState = _PS_MetaData;
1739 break;
1740 }
1741
1742 case TT_AUTHORGROUP:
1743 case TT_BIBLIOMSET:
1744 case TT_BIBLIOSET:
1745 case TT_EDITOR:
1746 case TT_INVPARTNUMBER:
1747 case TT_PUBLISHER:
1748 {
1749 X_VerifyParseState(_PS_MetaData);
1750 break;
1751 }
1752
1753 case TT_AUTHOR:
1754 {
1755 X_CheckError((m_parseState == _PS_MetaData) || (m_parseState == _PS_Block));
1756 break;
1757 }
1758
1759 case TT_APPLICATION:
1760 case TT_COMMAND:
1761 {
1762 X_CheckError((m_parseState == _PS_Meta) || (m_parseState == _PS_Block));
1763 break;
1764 }
1765
1766 case TT_DATE:
1767 {
1768 X_CheckError((m_parseState == _PS_Meta) || (m_parseState == _PS_Revision));
1769 break;
1770 }
1771
1772 case TT_AUTHORBLURB:
1773 {
1774 X_VerifyParseState(_PS_Sec);
1775
1776 if(m_bInMeta)
1777 m_parseState = _PS_MetaData;
1778 else if(m_iBlockDepth > 0)
1779 m_parseState = _PS_Block;
1780
1781 break;
1782 }
1783
1784 case TT_AUTHORINITIALS:
1785 {
1786 X_CheckError((m_parseState == _PS_Meta) || (m_parseState == _PS_MetaData) || (m_parseState == _PS_Block) || (m_parseState == _PS_Revision));
1787 break;
1788 }
1789
1790 case TT_FIRSTNAME:
1791 case TT_HONORIFIC:
1792 case TT_LINEAGE:
1793 case TT_OTHERNAME:
1794 case TT_SURNAME:
1795 {
1796 X_CheckError((m_parseState == _PS_Meta) || (m_parseState == _PS_MetaData) || (m_parseState == _PS_Block));
1797 break;
1798 }
1799
1800 case TT_COLLABNAME:
1801 case TT_HOLDER:
1802 case TT_KEYWORD:
1803 case TT_SUBJECT:
1804 case TT_SUBJECTTERM:
1805 case TT_YEAR:
1806 {
1807 X_VerifyParseState(_PS_Meta);
1808 break;
1809 }
1810
1811 case TT_ABSTRACT:
1812 {
1813 X_CheckError((m_parseState ==_PS_Meta) || (m_iSectionDepth > 0));
1814
1815 if(m_parseState == _PS_Meta)
1816 m_parseState = _PS_MetaData;
1817
1818 break;
1819 }
1820
1821 case TT_ITEMIZEDLIST:
1822 case TT_ORDEREDLIST:
1823 case TT_SEGMENTEDLIST:
1824 case TT_VARIABLELIST:
1825 {
1826 X_VerifyParseState(_PS_ListSec);
1827 m_iListDepth--;
1828
1829 if(m_iListDepth > 0) //nested lists are possible
1830 m_parseState = _PS_List;
1831 else if(m_iBlockDepth > 0)
1832 m_parseState = _PS_Block;
1833 else if(m_iListDepth == 0)
1834 m_parseState = _PS_Sec;
1835
1836 break;
1837 }
1838
1839 case TT_LISTITEM:
1840 case TT_SEGLISTITEM:
1841 {
1842 X_VerifyParseState(_PS_List);
1843 m_parseState = _PS_ListSec;
1844 break;
1845 }
1846
1847 case TT_SEG:
1848 {
1849 X_VerifyParseState(_PS_Block);
1850 m_parseState = _PS_List;
1851 m_iBlockDepth--;
1852 break;
1853 }
1854
1855 case TT_BRIDGEHEAD:
1856 {
1857 UT_ASSERT_HARMLESS(m_lenCharDataSeen==0);
1858
1859 X_VerifyParseState(_PS_Block);
1860 m_parseState = _PS_Sec;
1861 m_iBlockDepth--;
1862
1863 X_CheckDocument(_getInlineDepth()==0);
1864 _popInlineFmt();
1865 X_CheckError(appendFmt(&m_vecInlineFmt));
1866
1867 break;
1868 }
1869
1870 case TT_BLOCKQUOTE:
1871 {
1872 UT_ASSERT_HARMLESS(m_lenCharDataSeen==0);
1873
1874 X_VerifyParseState(_PS_Block);
1875 m_parseState = _PS_Sec;
1876 m_iBlockDepth--;
1877
1878 X_CheckDocument(_getInlineDepth()==0);
1879 _popInlineFmt();
1880 X_CheckError(appendFmt(&m_vecInlineFmt));
1881
1882 break;
1883 }
1884
1885 case TT_PLAINTEXT:
1886 {
1887 UT_ASSERT_HARMLESS(m_lenCharDataSeen==0);
1888
1889 X_VerifyParseState(_PS_Block);
1890 m_iBlockDepth--;
1891
1892 if(m_bInTable)
1893 m_parseState = _PS_Cell;
1894 else if(m_iBlockDepth == 0)
1895 m_parseState = _PS_Sec;
1896 else if(m_iDataDepth)
1897 m_parseState = _PS_DataSec;
1898 else
1899 m_parseState = _PS_Block;
1900
1901 m_bWhiteSignificant = false;
1902 break;
1903 }
1904
1905 case TT_FORMALPARA:
1906 {
1907 X_CheckError((m_parseState ==_PS_List) || (m_parseState ==_PS_Sec));
1908
1909 if(m_iListDepth > 0)
1910 m_parseState = _PS_List;
1911 else
1912 m_parseState = _PS_Sec;
1913
1914 break;
1915 }
1916
1917 case TT_BLOCK:
1918 {
1919 UT_ASSERT_HARMLESS(m_lenCharDataSeen==0);
1920
1921 if(m_bInTOC)
1922 break;
1923
1924 if(m_parseState != _PS_Meta)
1925 {
1926 X_CheckError((m_parseState == _PS_Cell) || (m_parseState == _PS_Block));
1927 X_CheckDocument(_getInlineDepth()==0);
1928
1929 m_iBlockDepth--;
1930
1931 if(m_bInTable)
1932 m_parseState = _PS_Cell;
1933 else if(m_iListDepth > 0)
1934 m_parseState = _PS_List;
1935 else if(m_iDataDepth > 0)
1936 m_parseState = _PS_DataSec;
1937 else if(m_iBlockDepth == 0)
1938 m_parseState = _PS_Sec;
1939 }
1940 else
1941 {
1942 bPop = false;
1943 }
1944 break;
1945 }
1946
1947 case TT_PHRASE:
1948 {
1949 X_CheckError((m_parseState ==_PS_Field) || (m_parseState ==_PS_Block) || (m_parseState == _PS_Cell));
1950
1951 if(m_parseState ==_PS_Field)
1952 m_parseState = _PS_Block;
1953
1954 if(m_bWroteBold)
1955 {
1956 m_bWroteBold = false;
1957 X_CheckDocument(_getInlineDepth()>0);
1958 _popInlineFmt();
1959 X_CheckError(appendFmt(&m_vecInlineFmt));
1960 }
1961
1962 break;
1963 }
1964
1965 case TT_EMPHASIS:
1966 case TT_SUPERSCRIPT:
1967 case TT_SUBSCRIPT:
1968 {
1969 UT_ASSERT_HARMLESS(m_lenCharDataSeen==0);
1970
1971 X_VerifyParseState(_PS_Block);
1972 X_CheckDocument(_getInlineDepth()>0);
1973 _popInlineFmt();
1974 X_CheckError(appendFmt(&m_vecInlineFmt));
1975
1976 break;
1977 }
1978
1979 case TT_TITLE:
1980 {
1981 if (m_bTitleAdded)
1982 {
1983 UT_ASSERT_HARMLESS(m_lenCharDataSeen==0);
1984 X_VerifyParseState(_PS_Block);
1985 m_parseState = _PS_Sec;
1986 X_CheckDocument(_getInlineDepth()==0);
1987 }
1988 else if(m_parseState == _PS_DataSec)
1989 {
1990 m_parseState = _PS_Block;
1991 }
1992 else if(m_parseState == _PS_Meta)
1993 {
1994 m_parseState = _PS_MetaData;
1995 }
1996 else if(m_parseState == _PS_Block)
1997 {
1998 m_parseState = _PS_Sec;
1999 }
2000
2001 m_bTitleAdded = false;
2002 m_bMustAddTitle = false;
2003 break;
2004 }
2005
2006 case TT_ATTRIBUTION:
2007 case TT_SGMLTAG:
2008 {
2009 X_VerifyParseState(_PS_Block);
2010 break;
2011 }
2012
2013 case TT_EMAIL: // an email address
2014 {
2015 X_VerifyParseState(_PS_Block);
2016 X_CheckError(appendObject(PTO_Hyperlink, NULL));
2017 break;
2018 }
2019
2020 case TT_BOOKMARK:
2021 {
2022 /* all has been taken care of in startElement. */
2023 break;
2024 }
2025
2026 case TT_LINK:
2027 case TT_ULINK:
2028 /* end of the link */
2029 {
2030 UT_ASSERT_HARMLESS(m_lenCharDataSeen==0);
2031 X_CheckError(appendObject(PTO_Hyperlink, NULL));
2032 }
2033 break;
2034
2035 case TT_PAGEBREAK:
2036 {
2037 X_CheckError((m_parseState == _PS_Block) || (m_iSectionDepth > 0));
2038 break;
2039 }
2040
2041 case TT_QUOTE:
2042 {
2043 X_CheckError((m_parseState == _PS_Block) || (m_parseState = _PS_Meta));
2044 if(m_parseState == _PS_Block)
2045 {
2046 UT_UCSChar ucs = UCS_RDBLQUOTE;
2047 appendSpan(&ucs,1);
2048 }
2049 else
2050 {
2051 UT_ASSERT_HARMLESS(UT_TODO);
2052 }
2053 break;
2054 }
2055
2056 case TT_CITETITLE:
2057 case TT_PRODUCTNAME:
2058 case TT_PRODUCTNUMBER:
2059 {
2060 X_CheckError ((m_parseState == _PS_Block) || (m_parseState == _PS_MetaData));
2061 break;
2062 }
2063
2064 case TT_ABBREVIATION:
2065 case TT_ACRONYM:
2066 case TT_VARNAME:
2067 {
2068 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_MetaData));
2069 break;
2070 }
2071
2072 case TT_FOOTNOTE:
2073 {
2074 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_Cell));
2075 X_CheckError(appendStrux(PTX_EndFootnote,static_cast<const gchar **>(NULL)));
2076
2077 if(m_bInTable)
2078 m_parseState = _PS_Cell;
2079 else
2080 m_parseState = _PS_Block;
2081
2082 m_bInNote = false;
2083 m_iNoteID = -1;
2084 break;
2085 }
2086
2087 case TT_FOOTNOTEREF:
2088 {
2089 X_VerifyParseState(_PS_Field);
2090
2091 if(m_bInTable)
2092 m_parseState = _PS_Cell;
2093 else
2094 m_parseState = _PS_Block;
2095
2096 break;
2097 }
2098
2099 // TOC elements:
2100
2101 case TT_TOC:
2102 {
2103 if(m_bRequiredBlock)
2104 {
2105 m_bRequiredBlock = false;
2106 m_iBlockDepth--;
2107 m_parseState = _PS_Sec;
2108 }
2109
2110 X_VerifyParseState(_PS_Sec);
2111 X_CheckError(appendStrux(PTX_EndTOC,static_cast<const gchar **>(NULL)));
2112 X_CheckError(appendStrux(PTX_Block,static_cast<const gchar **>(NULL)));
2113 break;
2114 }
2115
2116 case TT_TOCBACK:
2117 case TT_TOCCHAP:
2118 case TT_TOCFRONT:
2119 case TT_TOCLEVEL1:
2120 case TT_TOCLEVEL2:
2121 case TT_TOCLEVEL3:
2122 case TT_TOCLEVEL4:
2123 case TT_TOCLEVEL5:
2124 case TT_TOCPART:
2125 {
2126 X_CheckError(m_bInTOC);
2127 break;
2128 }
2129
2130 // Index elements (not imported):
2131
2132 case TT_INDEX:
2133 {
2134 X_CheckError((m_parseState == _PS_Sec) || (m_parseState == _PS_Doc));
2135 m_bInIndex = false;
2136 break;
2137 }
2138
2139 case TT_INDEXTERM:
2140 case TT_PRIMARY:
2141 case TT_SECONDARY:
2142 case TT_SEE:
2143 case TT_SEEALSO:
2144 case TT_TERTIARY:
2145 {
2146 UT_ASSERT_HARMLESS(UT_TODO);
2147 break;
2148 }
2149
2150 case TT_INDEXDIV:
2151 case TT_INDEXENTRY:
2152 case TT_INDEXINFO:
2153 case TT_PRIMARYIE:
2154 case TT_SECONDARYIE:
2155 case TT_SEEIE:
2156 case TT_TERTIARYIE:
2157 {
2158 X_CheckError(m_bInIndex);
2159 break;
2160 }
2161
2162 // Table elements:
2163
2164 case TT_TABLE:
2165 {
2166 X_VerifyParseState(_PS_Table);
2167 X_CheckError(appendStrux(PTX_EndTable,static_cast<const gchar **>(NULL)));
2168
2169 if(m_bRequiredBlock)
2170 {
2171 m_bRequiredBlock = false;
2172 m_iBlockDepth--;
2173 }
2174
2175 if(m_iListDepth > 0)
2176 m_parseState = _PS_List;
2177 else
2178 m_parseState = _PS_Sec;
2179
2180 m_bInTable = false;
2181 break;
2182 }
2183
2184 case TT_ENTRY: //table cell
2185 {
2186 if(m_bWroteEntryPara)
2187 {
2188 m_parseState = _PS_Cell;
2189 m_iBlockDepth--;
2190 m_bWroteEntryPara = false;
2191 }
2192
2193 X_VerifyParseState(_PS_Cell);
2194 X_CheckError(appendStrux(PTX_EndCell,NULL));
2195 m_parseState = _PS_Table;
2196
2197 break;
2198 }
2199
2200 case TT_ENTRYTBL: //nested table
2201 {
2202 X_VerifyParseState(_PS_Table);
2203 X_CheckError(appendStrux(PTX_EndTable,NULL));
2204 X_CheckError(appendStrux(PTX_EndCell,NULL));
2205
2206 if(m_bRequiredBlock)
2207 {
2208 m_bRequiredBlock = false;
2209 m_iBlockDepth--;
2210 }
2211
2212 break;
2213 }
2214
2215 case TT_COL:
2216 case TT_COLSPEC:
2217 case TT_ROW:
2218 case TT_TBODY:
2219 case TT_TFOOT:
2220 case TT_TGROUP:
2221 case TT_THEAD:
2222 {
2223 X_VerifyParseState(_PS_Table);
2224 break;
2225 }
2226
2227 // Revision elements:
2228
2229 case TT_REVHISTORY:
2230 {
2231 X_VerifyParseState(_PS_RevisionSec);
2232 m_parseState = _PS_MetaData;
2233 break;
2234 }
2235
2236 case TT_REVISION:
2237 {
2238 X_VerifyParseState(_PS_Revision);
2239 m_parseState = _PS_RevisionSec;
2240 break;
2241 }
2242
2243 case TT_REVREMARK:
2244 case TT_REVDESCRIPTION:
2245 case TT_REVNUMBER:
2246 {
2247 //TODO: import revisions
2248 X_VerifyParseState(_PS_Revision);
2249 break;
2250 }
2251
2252 case TT_EQUATION:
2253 {
2254 X_VerifyParseState(_PS_Block);
2255 m_bInMath = false;
2256 break;
2257 }
2258
2259 case TT_ALT:
2260 {
2261 X_CheckError(m_bInMath);
2262 break;
2263 }
2264
2265 case TT_TEXTOBJECT:
2266 {
2267 if(m_iDataDepth > 0)
2268 m_parseState = _PS_DataSec;
2269
2270 break;
2271 }
2272
2273 case TT_GRAPHICCO:
2274 {
2275 X_VerifyParseState(_PS_Block);
2276
2277 if(m_bRequiredBlock)
2278 {
2279 m_bRequiredBlock = false;
2280 m_iBlockDepth--;
2281 }
2282
2283 if(m_iBlockDepth == 0)
2284 m_parseState = _PS_Sec;
2285
2286 break;
2287 }
2288
2289 case TT_MEDIAOBJECTCO:
2290 {
2291 X_VerifyParseState(_PS_DataSec);
2292 m_iDataDepth--;
2293
2294 if(m_bRequiredBlock)
2295 {
2296 m_bRequiredBlock = false;
2297 m_iBlockDepth--;
2298 }
2299
2300 if(m_iDataDepth == 0)
2301 m_parseState = _PS_Sec;
2302
2303 break;
2304 }
2305
2306 case TT_AREA:
2307 case TT_AREASET:
2308 case TT_AREASPEC:
2309 {
2310 X_CheckError((m_parseState == _PS_Block) || (m_parseState == _PS_DataSec));
2311 break;
2312 }
2313
2314 case TT_SCREENSHOT:
2315 {
2316 X_VerifyParseState(_PS_DataSec);
2317 m_parseState = _PS_Block;
2318 m_iDataDepth--;
2319
2320 if(m_bRequiredBlock)
2321 {
2322 m_bRequiredBlock = false;
2323 m_iBlockDepth--;
2324 }
2325
2326 if(m_iDataDepth > 0)
2327 m_parseState = _PS_DataSec;
2328 else if(m_iBlockDepth)
2329 m_parseState = _PS_Block;
2330 else
2331 m_parseState = _PS_Sec;
2332
2333 break;
2334 }
2335
2336 case TT_SCREEN:
2337 case TT_SYNOPSIS:
2338 {
2339 X_VerifyParseState(_PS_Block);
2340
2341 if(m_bRequiredBlock)
2342 {
2343 m_bRequiredBlock = false;
2344 m_iBlockDepth--;
2345 }
2346
2347 if(m_iBlockDepth == 0)
2348 m_parseState = _PS_Sec;
2349
2350 break;
2351 }
2352
2353 case TT_SCREENINFO:
2354 {
2355 X_VerifyParseState(_PS_Meta);
2356 m_parseState = _PS_DataSec;
2357 m_iDataDepth--;
2358 break;
2359 }
2360
2361 case TT_GRAPHIC:
2362 {
2363 X_VerifyParseState(_PS_DataItem);
2364 m_iDataDepth--;
2365
2366 if(m_bRequiredBlock)
2367 {
2368 m_bRequiredBlock = false;
2369 m_iBlockDepth--;
2370 }
2371
2372 if(m_iDataDepth > 0)
2373 m_parseState = _PS_DataSec;
2374 else
2375 m_parseState = _PS_Block;
2376
2377 break;
2378 }
2379
2380 case TT_FIGURE:
2381 {
2382 X_VerifyParseState(_PS_DataSec);
2383 m_iDataDepth--;
2384
2385 if(m_bRequiredBlock)
2386 {
2387 m_bRequiredBlock = false;
2388 m_iBlockDepth--;
2389 }
2390
2391 if(m_iBlockDepth == 0)
2392 m_parseState = _PS_Sec;
2393 else if(m_iDataDepth == 0)
2394 m_parseState = _PS_Block;
2395
2396 break;
2397 }
2398
2399 case TT_VIDEOOBJECT:
2400 {
2401 X_VerifyParseState(_PS_DataSec);
2402 m_iDataDepth--;
2403 break;
2404 }
2405
2406 case TT_VIDEODATA:
2407 {
2408 X_VerifyParseState(_PS_DataItem);
2409 m_parseState = _PS_DataSec;
2410 m_iDataDepth--;
2411 break;
2412 }
2413
2414 case TT_MEDIAOBJECT:
2415 {
2416 X_VerifyParseState(_PS_DataSec);
2417 m_iDataDepth--;
2418
2419 if(m_bRequiredBlock)
2420 {
2421 m_bRequiredBlock = false;
2422 m_iBlockDepth--;
2423 }
2424
2425 if(m_iListDepth > 0)
2426 m_parseState = _PS_List;
2427 else if(m_bInMath)
2428 m_parseState = _PS_Block;
2429 else if(m_iDataDepth == 0)
2430 m_parseState = _PS_Sec;
2431
2432 break;
2433 }
2434
2435 case TT_IMAGEOBJECT:
2436 {
2437 X_VerifyParseState(_PS_DataSec);
2438 m_iDataDepth--;
2439 break;
2440 }
2441
2442 case TT_IMAGEDATA:
2443 {
2444 X_VerifyParseState(_PS_DataItem);
2445 m_parseState = _PS_DataSec;
2446 m_iDataDepth--;
2447 break;
2448 }
2449
2450 case TT_OTHER:
2451 default:
2452 UT_DEBUGMSG(("Unknown or intentionally unhandled end tag [%s]\n",name));
2453 break;
2454 }
2455
2456 if(bPop)
2457 {
2458 UT_uint32 i = 0;
2459
2460 m_utnsTagStack.pop((UT_sint32*)&i);
2461 xxx_UT_DEBUGMSG(("Popping %d off of stack\n",i));
2462
2463 if(i != tokenIndex) {
2464 UT_DEBUGMSG(("DocBook: Parse error!\n"));
2465 }
2466 }
2467 }
2468
2469
2470 /*
2471 * we redefine this function, so that it intercepts title
2472 * if a title has to been added, we first check that there is something to add
2473 * if so, we add the heading
2474 * if not, we skip the heading.
2475 * then, it calls its parent's charData.
2476 */
charData(const gchar * s,int len)2477 void IE_Imp_DocBook::charData(const gchar *s, int len)
2478 {
2479 if (m_bMustAddTitle && (len > 0))
2480 {
2481 createTitle();
2482 }
2483 else if((m_parseState == _PS_Meta) && m_bReadBook)
2484 {
2485 return; //only set metadata once
2486 }
2487 else if((m_parseState == _PS_Meta) && (len > 0))
2488 {
2489 std::string metaProp, updatedProp = "";
2490
2491 switch(tagTop())
2492 {
2493 case TT_TITLE:
2494 {
2495 getDoc()->setMetaDataProp("dc.title",s);
2496 break;
2497 }
2498
2499 case TT_AUTHOR:
2500 {
2501 getDoc()->setMetaDataProp("dc.creator",s);
2502 break;
2503 }
2504
2505 case TT_FIRSTNAME:
2506 case TT_LINEAGE:
2507 case TT_OTHERNAME:
2508 case TT_SURNAME:
2509 {
2510 UT_ASSERT_HARMLESS(UT_TODO);
2511 break;
2512 }
2513
2514 case TT_LEGALNOTICE:
2515 {
2516 getDoc()->setMetaDataProp("dc.rights",s);
2517 break;
2518 }
2519
2520 case TT_PUBLISHERNAME:
2521 {
2522 getDoc()->setMetaDataProp("dc.publisher",s);
2523 break;
2524 }
2525
2526 case TT_COLLABNAME:
2527 {
2528 getDoc()->setMetaDataProp("dc.contributor",s);
2529 break;
2530 }
2531
2532 case TT_SUBJECTTERM:
2533 {
2534 getDoc()->setMetaDataProp("dc.subject",s);
2535 break;
2536 }
2537
2538 case TT_KEYWORD:
2539 {
2540 if(getDoc()->getMetaDataProp (PD_META_KEY_KEYWORDS, metaProp) && metaProp.size())
2541 {
2542 updatedProp = metaProp;
2543 updatedProp += " "; //space the keywords
2544 }
2545 updatedProp += s;
2546 getDoc()->setMetaDataProp("abiword.keywords",updatedProp);
2547 break;
2548 }
2549
2550 case TT_ABSTRACT:
2551 {
2552 getDoc()->setMetaDataProp("dc.description",s);
2553 break;
2554 }
2555
2556 case TT_BIBLIOSOURCE:
2557 {
2558 getDoc()->setMetaDataProp("dc.source",s);
2559 break;
2560 }
2561
2562 case TT_BIBLIOCOVERAGE:
2563 {
2564 getDoc()->setMetaDataProp("dc.coverage",s);
2565 break;
2566 }
2567
2568 case TT_BIBLIORELATION:
2569 {
2570 getDoc()->setMetaDataProp("dc.relation",s);
2571 break;
2572 }
2573
2574 case TT_APPLICATION:
2575 case TT_ARTPAGENUMS:
2576 case TT_AUTHORGROUP:
2577 case TT_COPYRIGHT:
2578 case TT_EDITION:
2579 case TT_ISSUENUM:
2580 case TT_KEYWORDSET:
2581 case TT_PRINTHISTORY:
2582 case TT_PUBDATE:
2583 case TT_VOLUMENUM:
2584 case TT_YEAR:
2585 {
2586 break; //these can be safely ignored
2587 }
2588
2589 default:
2590 {
2591 UT_ASSERT_HARMLESS(UT_TODO);
2592 UT_DEBUGMSG(("Unhandled metadata in docbook importer: %d\n",tagTop()));
2593 break;
2594 }
2595 }
2596 }
2597 else if((m_parseState == _PS_Cell) && (len > 0))
2598 {
2599 requireBlock();
2600 }
2601 else if((m_parseState == _PS_Field) || m_bInTOC)
2602 {
2603 return; //ignore field text since it should be regenerated
2604 }
2605 else if(m_parseState == _PS_MetaData)
2606 {
2607 return;
2608 }
2609 else if((m_parseState == _PS_Block) && (len >0))
2610 {
2611 if(tagTop() == TT_EMAIL)
2612 {
2613 const gchar *buf[3];
2614 buf[2] = NULL;
2615
2616 UT_UTF8String link = "mailto:";
2617 link += s;
2618
2619 buf[0] = "xlink:href";
2620 buf[1] = (gchar*)link.utf8_str();
2621 X_CheckError(appendObject(PTO_Hyperlink, const_cast<const gchar **>(buf)));
2622 }
2623 }
2624
2625 IE_Imp_XML :: charData (s, len);
2626 }
2627 /*****************************************************************************/
2628
2629
2630 /*****************************************************************************/
2631 /*
2632 * creates a new title in the doc.
2633 */
createTitle(void)2634 void IE_Imp_DocBook :: createTitle (void)
2635 {
2636 UT_return_if_fail(m_iTitleDepth > 0);
2637
2638 if (m_parseState == _PS_DataSec)
2639 {
2640 UT_ASSERT_HARMLESS(UT_TODO);
2641 return;
2642 }
2643
2644 m_parseState = _PS_Block;
2645
2646 /* list of attributes */
2647 const gchar *buf[11];
2648 memset(buf, 0, sizeof(buf));
2649
2650 if(m_iTitleDepth > m_utvTitles.getItemCount())
2651 {
2652 m_utvTitles.addItem((fl_AutoNum *)NULL);
2653 }
2654
2655 bool foundStyle = false;
2656
2657 if(m_sectionRole.length())
2658 {
2659 foundStyle = true;
2660 if(!strcmp(m_sectionRole.utf8_str(), "Heading 1") || !strcmp(m_sectionRole.utf8_str(), "Heading 2") ||
2661 !strcmp(m_sectionRole.utf8_str(), "Heading 3") || !strcmp(m_sectionRole.utf8_str(), "Heading 4") ||
2662 !strcmp(m_sectionRole.utf8_str(), "Section Heading"))
2663 {
2664 buf[1] = g_strdup(m_sectionRole.utf8_str());
2665 }
2666 else if(!strcmp(m_sectionRole.utf8_str(), "Numbered Heading 1") || !strcmp(m_sectionRole.utf8_str(), "Numbered Heading 2") ||
2667 !strcmp(m_sectionRole.utf8_str(), "Numbered Heading 3") || !strcmp(m_sectionRole.utf8_str(), "Chapter Heading"))
2668 {
2669 buf[1] = g_strdup(m_sectionRole.utf8_str());
2670 m_bMustNumber = true;
2671 }
2672 else
2673 {
2674 foundStyle = false;
2675 }
2676 }
2677
2678 if(!foundStyle)
2679 {
2680 switch (m_iTitleDepth)
2681 {
2682 case CHAPTER_HEADING:
2683 {
2684 /* we must add a chapter heading */
2685 buf[1] = "Chapter Heading";
2686 break;
2687 }
2688
2689 case SECTION1_HEADING:
2690 {
2691 /* we must add a section heading */
2692 buf[1] = "Section Heading";
2693 break;
2694 }
2695
2696 case SECTION2_HEADING:
2697 {
2698 /* we must add a heading 1 */
2699 if (m_bMustNumber)
2700 {
2701 buf[1] = "Numbered Heading 1";
2702 }
2703 else
2704 {
2705 buf[1] = "Heading 1";
2706 }
2707 break;
2708 }
2709
2710 case SECTION3_HEADING:
2711 {
2712 /* we must add a heading 2 */
2713 if (m_bMustNumber)
2714 {
2715 buf[1] = "Numbered Heading 2";
2716 }
2717 else
2718 {
2719 buf[1] = "Heading 2";
2720 }
2721 break;
2722 }
2723
2724 case SECTION4_HEADING:
2725 {
2726 /* we must add a heading 3 */
2727 if (m_bMustNumber)
2728 {
2729 buf[1] = "Numbered Heading 3";
2730 }
2731 else
2732 {
2733 buf[1] = "Heading 3";
2734 }
2735 break;
2736 }
2737
2738 case SECTION5_HEADING:
2739 case SECTION6_HEADING:
2740 default:
2741 {
2742 if (m_bMustNumber)
2743 {
2744 buf[1] = "Numbered Heading 3"; //there's no Numbered Heading 4
2745 }
2746 else
2747 {
2748 buf[1] = "Heading 4";
2749 }
2750 break;
2751 }
2752 }
2753 }
2754
2755 if (m_bMustNumber)
2756 {
2757 /*
2758 * we must add a numbered heading; that means that we must put
2759 * it into a list
2760 */
2761 /* deletes previous lists of same level and above */
2762 for (UT_sint32 i = (m_iTitleDepth - 1); i < m_utvTitles.getItemCount(); i++)
2763 {
2764 if (i == 0) //always keep the first chapter title
2765 continue;
2766
2767 fl_AutoNum * temp = m_utvTitles.getNthItem(i);
2768 DELETEP(temp);
2769 }
2770 buf[8] = PT_PROPS_ATTRIBUTE_NAME;
2771
2772 if((m_utvTitles.getNthItem(m_iTitleDepth-1) == NULL))
2773 {
2774 // if a list doesn't exist at this depth, create it
2775 createList();
2776 buf[9] = "start-value:1; list-style:Numbered List";
2777 }
2778 else
2779 {
2780 buf[9] = "list-style:Numbered List";
2781 }
2782
2783 /* ok now it's created, we should add the id and the parent id */
2784
2785 buf[2] = PT_LEVEL_ATTRIBUTE_NAME;
2786
2787 UT_UTF8String val;
2788
2789 if(m_utvTitles[m_iTitleDepth - 1])
2790 UT_UTF8String_sprintf (val, "%d", m_utvTitles[m_iTitleDepth - 1]->getLevel());
2791 else
2792 val = "1";
2793
2794 buf[3] = (gchar *)g_strdup(val.utf8_str());
2795 buf[4] = PT_LISTID_ATTRIBUTE_NAME;
2796
2797 if(m_utvTitles[m_iTitleDepth - 1])
2798 UT_UTF8String_sprintf (val, "%d", m_utvTitles[m_iTitleDepth - 1]->getID());
2799 else
2800 UT_UTF8String_sprintf (val, "%d", ++m_iCurListID);
2801
2802 buf[5] = (gchar *)g_strdup(val.utf8_str());
2803 buf[6] = PT_PARENTID_ATTRIBUTE_NAME;
2804
2805 if(m_utvTitles[m_iTitleDepth - 1])
2806 UT_UTF8String_sprintf (val, "%d", m_utvTitles[m_iTitleDepth - 1]->getParentID());
2807 else
2808 val = "0";
2809
2810 buf[7] = (gchar *)g_strdup(val.utf8_str());
2811
2812 }
2813
2814 buf[0] = PT_STYLE_ATTRIBUTE_NAME;
2815
2816 if(buf[1] == NULL) //preventive code for appendStrux() below
2817 buf[0] = NULL;
2818
2819 X_CheckError(appendStrux(PTX_Block, const_cast<const gchar **>(buf)));
2820 if (m_bMustNumber)
2821 {
2822 /* adds field */
2823 const gchar * buf2 [3];
2824 buf2[0] = PT_TYPE_ATTRIBUTE_NAME;
2825 buf2[1] = "list_label";
2826 buf2[2] = NULL;
2827
2828 X_CheckError ( appendObject (PTO_Field, const_cast<const gchar **>(buf2)));
2829 X_CheckError ( appendFmt (const_cast<const gchar **>(buf2)));
2830 UT_UCSChar ucs = UCS_TAB;
2831 appendSpan(&ucs,1);
2832 _popInlineFmt();
2833 }
2834 X_CheckError ( appendFmt (static_cast<const gchar **>(NULL)));
2835
2836 m_bMustAddTitle = false;
2837 m_bTitleAdded = true;
2838
2839 if(foundStyle)
2840 FREEP(buf[1]);
2841
2842 FREEP(buf[3]);
2843 FREEP(buf[5]);
2844 FREEP(buf[7]);
2845 }
2846 /*****************************************************************************/
2847
2848
2849 /*****************************************************************************/
2850 /*
2851 * this function creates a new list in the document, and insert it in the
2852 * list table
2853 */
createList(void)2854 void IE_Imp_DocBook :: createList (void)
2855 {
2856 UT_return_if_fail(m_iTitleDepth);
2857
2858 int pid = 0;
2859
2860 if (m_iTitleDepth > 1)
2861 {
2862 for (int i = (m_iTitleDepth - 2); i >= 0; i--)
2863 {
2864 /* retrieves parent id, if available */
2865 if (m_utvTitles[i])
2866 {
2867 pid = m_utvTitles [i] -> getID ();
2868 break;
2869 }
2870 }
2871 }
2872
2873 const gchar * lDelim = "";
2874
2875 if(m_iTitleDepth == 1)
2876 lDelim = "Chapter %L.";
2877 else if(m_iTitleDepth == 2)
2878 lDelim = "Section %L.";
2879 else
2880 lDelim = "%L.";
2881
2882 /* creates the new list */
2883 fl_AutoNum *an = new fl_AutoNum (
2884 m_iCurListID,
2885 pid,
2886 NUMBERED_LIST,
2887 1,
2888 (const gchar *)lDelim,
2889 (const gchar *)"",
2890 getDoc (),
2891 NULL
2892 );
2893 getDoc()->addList(an);
2894
2895 /* register it in the vector */
2896 if(m_utvTitles.setNthItem((m_iTitleDepth - 1), an, NULL) == -1)
2897 {
2898 UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
2899 }
2900
2901 /* increment the id counter, so that it is unique */
2902 m_iCurListID++;
2903 }
2904 /*****************************************************************************/
2905
2906 /*****************************************************************************/
2907 /*
2908 * this function creates a new image in the document
2909 */
createImage(const char * name,const gchar ** atts)2910 void IE_Imp_DocBook::createImage(const char *name, const gchar **atts)
2911 {
2912 char * relative_file = UT_go_url_resolve_relative(m_szFileName, name);
2913 if(!relative_file)
2914 return;
2915
2916 UT_UTF8String filename(relative_file);
2917 g_free(relative_file);
2918
2919 FG_Graphic * pfg = 0;
2920 if (IE_ImpGraphic::loadGraphic (filename.utf8_str(), IEGFT_Unknown, &pfg) != UT_OK)
2921 return;
2922
2923 const UT_ByteBuf * pBB = pfg->getBuffer();
2924 X_CheckError(pBB);
2925
2926 std::string dataid = UT_std_string_sprintf ("image%u", static_cast<unsigned int>(m_iImages++));
2927
2928 X_CheckError (getDoc()->createDataItem (dataid.c_str(), false, pBB,
2929 pfg->getMimeType(), NULL));
2930
2931 const gchar *buf[5];
2932 buf[0] = "dataid";
2933 buf[1] = (gchar*)dataid.c_str();
2934 buf[2] = NULL;
2935 buf[4] = NULL;
2936
2937 UT_UTF8String props;
2938 const gchar *p_val = NULL;
2939
2940 p_val = _getXMLPropValue(static_cast<const gchar *>("depth"), atts);
2941
2942 if(p_val)
2943 {
2944 props = "height:";
2945 props+= p_val;
2946 }
2947
2948 p_val = _getXMLPropValue(static_cast<const gchar *>("width"), atts);
2949
2950 if(p_val)
2951 {
2952 if(props.length()) //the image might not have a depth attribute
2953 props+= "; ";
2954
2955 props+= "width:";
2956 props+= p_val;
2957 }
2958
2959 if(props.length())
2960 {
2961 buf[2] = PT_PROPS_ATTRIBUTE_NAME;
2962 buf[3] = (gchar*)props.utf8_str();
2963 }
2964
2965 X_CheckError(appendObject(PTO_Image, const_cast<const gchar **>(buf)));
2966 DELETEP(pfg);
2967 }
2968 /*****************************************************************************/
2969
tagTop(void)2970 UT_uint32 IE_Imp_DocBook::tagTop(void)
2971 {
2972 UT_sint32 i = 0;
2973
2974 if (m_utnsTagStack.viewTop (i))
2975 return (UT_uint32)i;
2976 return 0;
2977 }
2978
requireBlock(void)2979 void IE_Imp_DocBook::requireBlock(void)
2980 {
2981 if(!m_iBlockDepth)
2982 {
2983 m_iBlockDepth = 1;
2984 X_CheckError(appendStrux(PTX_Block,static_cast<const gchar **>(NULL)));
2985
2986 if(m_parseState == _PS_Cell)
2987 m_bWroteEntryPara = true;
2988 else
2989 m_bRequiredBlock = true;
2990
2991 m_parseState = _PS_Block;
2992 }
2993 }
2994