1 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: t -*- */
2
3 /* AbiWord
4 * Copyright (C) 1998-2000 AbiSource, Inc.
5 * Copyright (C) 2001 Dom Lachowicz <dominicl@seas.upenn.edu>
6 * Copyright (C) 2001-2003 Tomas Frydrych
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 * 02110-1301 USA.
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include "ut_locale.h"
28
29 #include <zlib.h>
30
31 #include "wv.h"
32
33 #include "ut_string_class.h"
34 #include "ut_string.h"
35 #include "ut_bytebuf.h"
36 #include "ut_units.h"
37 #include "ut_math.h"
38 #include "ut_assert.h"
39 #include "ut_debugmsg.h"
40 #include "ut_stack.h"
41
42 #include "xap_App.h"
43 #include "xap_Frame.h"
44 #include "xap_EncodingManager.h"
45 #include "xap_DialogFactory.h"
46 #include "xap_Dlg_Password.h"
47
48 #include "fg_Graphic.h"
49 #include "fg_GraphicRaster.h"
50 #include "fg_GraphicVector.h"
51
52 #include "pd_Document.h"
53
54 #include "ie_impexp_MsWord_97.h"
55 #include "ie_imp_MsWord_97.h"
56 #include "ie_impGraphic.h"
57
58 #include "ap_Strings.h"
59 #include "ap_Dialog_Id.h"
60
61 #include "pf_Frag_Strux.h"
62 #include "pt_PieceTable.h"
63 #include "pd_Style.h"
64
65 #include "fp_PageSize.h"
66
67 #include "ut_Language.h"
68
69 #include <gsf/gsf-infile.h>
70 #include <gsf/gsf-infile-msole.h>
71 #include <gsf/gsf-msole-utils.h>
72 #include <gsf/gsf-docprop-vector.h>
73 #include <gsf/gsf-meta-names.h>
74
75 #ifdef DEBUG
76 #define IE_IMP_MSWORD_DUMP
77 #include "ie_imp_MsWord_dump.h"
78 #undef IE_IMP_MSWORD_DUMP
79 #endif
80
81 #define X_CheckError(v) do { if (!(v)) return 1; } while (0)
82
83 // undef this to disable support for older images (<= Word95)
84 #define SUPPORTS_OLD_IMAGES 1
85
86 //#define BIDI_DEBUG
87 //
88 // Forward decls. to wv's callbacks
89 //
90 static int charProc (wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid);
91 static int specCharProc (wvParseStruct *ps, U16 eachchar, CHP* achp);
92 static int eleProc (wvParseStruct *ps, wvTag tag, void *props, int dirty);
93 static int docProc (wvParseStruct *ps, wvTag tag);
94
95 /*!
96 Translates MS numerical id's for standard styles into our names
97 The style names that have been commented out are those that do not
98 have currently a localised equivalent in AW
99 */
s_translateStyleId(UT_uint32 id)100 static const gchar * s_translateStyleId(UT_uint32 id)
101 {
102 if(id >= 4094)
103 {
104 return NULL;
105 }
106
107 // The style names that have been commented out are those that do
108 // not currently have a localised equivalent in AW
109 switch(id)
110 {
111 case 0: return "Normal";
112 case 1: return "Heading 1";
113 case 2: return "Heading 2";
114 case 3: return "Heading 3";
115 case 4: return "Heading 4";
116 case 5: return NULL /*"Heading 5"*/;
117 case 6: return NULL /*"Heading 6"*/;
118 case 7: return NULL /*"Heading 7"*/;
119 case 8: return NULL /*"Heading 8"*/;
120 case 9: return NULL /*"Heading 9"*/;
121 case 10: return NULL /*"Index 1"*/; /* Really a dup of 92? */
122 case 11: return NULL /*"Index 2"*/;
123 case 12: return NULL /*"Index 3"*/;
124 case 13: return NULL /*"Index 4"*/;
125 case 14: return NULL /*"Index 5"*/;
126 case 15: return NULL /*"Index 6"*/;
127 case 16: return NULL /*"Index 7"*/;
128 case 17: return NULL /*"Index 8"*/;
129 case 18: return NULL /*"Index 9"*/;
130 case 19: return NULL /*"Contents 1"*/; /* Handled by insertTOC? */
131 case 20: return NULL /*"Contents 2"*/; /* Handled by insertTOC? */
132 case 21: return NULL /*"Contents 3"*/; /* Handled by insertTOC? */
133 case 22: return NULL /*"Contents 4"*/; /* Handled by insertTOC? */
134 case 23: return NULL /*"TOC 5"*/; /* See Contents above for these five as well */
135 case 24: return NULL /*"TOC 6"*/;
136 case 25: return NULL /*"TOC 7"*/;
137 case 26: return NULL /*"TOC 8"*/;
138 case 27: return NULL /*"TOC 9"*/;
139 case 28: return NULL /*"Normal Indent"*/;
140 case 29: return "Footnote Text";
141 case 30: return NULL /*"Comment Text"*/;
142 case 31: return NULL /*"Header"*/;
143 case 32: return NULL /*"Footer"*/;
144 case 33: return NULL /*"Index Heading"*/;
145 case 34: return NULL /*"Caption"*/;
146 case 35: return NULL /*"Table of Figures"*/;
147 case 36: return NULL /*"Envelope Address"*/;
148 case 37: return NULL /*"Envelope Return"*/;
149 case 38: return "Footnote Reference";
150 case 39: return NULL /*"Comment Reference"*/;
151 case 40: return NULL /*"Line Number"*/;
152 case 41: return NULL /*"Page Number"*/;
153 case 42: return "Endnote Reference";
154 case 43: return "Endnote Text";
155 case 44: return NULL /*"Index of Authorities"*/;
156 case 45: return NULL /*"Macro Text"*/;
157 case 46: return NULL /*"TOA Heading"*/;
158 case 47: return NULL /*"List"*/; //WARNING: beginPara appears to handle arbitrary lists via _mapDocToAbiList*
159 case 48: return "Bulleted List";
160 case 49: return "Numbered List";
161 case 50: return NULL /*"List 2"*/;
162 case 51: return NULL /*"List 3"*/;
163 case 52: return NULL /*"List 4"*/;
164 case 53: return NULL /*"List 5"*/;
165 case 54: return NULL /*"List Bullet 2"*/;
166 case 55: return NULL /*"List Bullet 3"*/;
167 case 56: return NULL /*"List Bullet 4"*/;
168 case 57: return NULL /*"List Bullet 5"*/;
169 case 58: return NULL /*"List Number 2"*/;
170 case 59: return NULL /*"List Number 3"*/;
171 case 60: return NULL /*"List Number 4"*/;
172 case 61: return NULL /*"List Number 5"*/;
173 case 62: return NULL /*"Title"*/;
174 case 63: return NULL /*"Closing"*/;
175 case 64: return NULL /*"Signature"*/;
176 case 65: return NULL /*"Default Paragraph Font"*/;
177 case 66: return NULL /*"Body Text"*/;
178 case 67: return NULL /*"Body Text Indent"*/;
179 case 68: return NULL /*"List Continue"*/;
180 case 69: return NULL /*"List Continue 2"*/;
181 case 70: return NULL /*"List Continue 3"*/;
182 case 71: return NULL /*"List Continue 4"*/;
183 case 72: return NULL /*"List Continue 5"*/;
184 case 73: return NULL /*"Message Header"*/;
185 case 74: return NULL /*"Subtitle"*/;
186 case 75: return NULL /*"Salutation"*/;
187 case 76: return NULL /*"Date"*/;
188 case 77: return NULL /*"Body Text First Indent"*/;
189 case 78: return NULL /*"Body Text First Indent 2"*/;
190 case 79: return NULL /*"Note Heading"*/;
191 case 80: return NULL /*"Body Text 2"*/;
192 case 81: return NULL /*"Body Text 3"*/;
193 case 82: return NULL /*"Body Text Indent 2"*/;
194 case 83: return NULL /*"Body Text Indent 3"*/;
195 case 84: return "Block Text";
196 case 85: return NULL /*"Hyperlink"*/;
197 case 86: return NULL /*"FollowedHyperlink"*/;
198 case 87: return NULL /*"Strong"*/;
199 case 88: return NULL /*"Emphasis"*/;
200 case 89: return NULL /*"Document Map"*/;
201 case 90: return "Plain Text"; /* Really a dup of 109? */
202 case 91: return NULL /*"Email Signature"*/;
203 case 92: return NULL /*"Index 1"*/; /* Really a dup of 10? */
204 case 93: return NULL /*"List Bullet"*/;
205 case 94: return NULL /*"Normal (Web)"*/;
206 case 95: return NULL /*"HTML Acronym"*/;
207 case 96: return NULL /*"HTML Address"*/;
208 case 97: return NULL /*"HTML Cite"*/;
209 case 98: return NULL /*"HTML Code"*/;
210 case 99: return NULL /*"HTML Definition"*/;
211 case 100: return NULL /*"HTML Keyboard"*/;
212 case 101: return NULL /*"HTML Preformatted"*/;
213 case 102: return NULL /*"HTML Sample"*/;
214 case 103: return NULL /*"HTML Typewriter"*/;
215 case 104: return NULL /*"HTML Variable"*/;
216 case 105: return NULL /*"Table Normal"*/;
217 case 106: return NULL /*"Comment Subject"*/;
218 case 107: return NULL /*"No List"*/;
219 case 108: return NULL /*"Index Heading"*/;
220 case 109: return "Plain Text"; /* Really a dup of 90? */
221 case 110: return NULL /*"Hyperlink"*/;
222 case 111: return NULL /*"FollowedHyperlink"*/;
223 case 112: return "Numbered List"; /* Was EnumList, really a dup of 49? Closer than nothing anyway*/
224 case 115: return NULL /*"Balloon Text"*/;
225
226 case 153: return NULL /*"Table of Authorities"*/;
227 case 154: return NULL /*"Grille du tableau" in fr_FR*/;
228
229 default:
230 UT_DEBUGMSG(("Unknown style Id [%d]; Please submit this document with a bug report!\n", id));
231 // Would be nice if we had a UT_USERMSG or something to put up a prompt (with a
232 // don't display again option) with the message in normal mode, OutputMsg or silent
233 // in commandline or docserver mode, etc. Because it is the users, not the
234 // developers who will have such alien documents. -MG
235
236 UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
237 return NULL;
238 }
239 return NULL;
240 }
241
242 /*!
243 Strip characters that would confuse either the xml parser or our
244 property parser; caller is responsible to g_free the returned pointer
245 */
s_stripDangerousChars(const char * s)246 static char * s_stripDangerousChars(const char *s)
247 {
248 UT_uint32 j, k;
249 if(!s)
250 return NULL;
251
252 char * t = (char*) g_try_malloc(strlen(s)+1);
253 UT_return_val_if_fail(t,NULL);
254
255 for(j = 0, k = 0; j < strlen(s); )
256 {
257 if(s[j] < ' ' && s[j] >= 0 && s[j] != '\t' && s[j] != '\n' && s[j] != '\r')
258 {
259 j++;
260 }
261 else
262 {
263 switch(s[j])
264 {
265 default:
266 t[k++] = s[j++];
267 break;
268
269 // characters that would confuse the
270 // xml parser or our own property parser
271 case '<':
272 case '>':
273 case ':':
274 case ';':
275 case '&':
276 case '\"':
277 j++;
278 break;
279 }
280 }
281 }
282
283 t[k] = 0;
284
285 return t;
286 }
287
s_convert_to_utf8(const wvParseStruct * ps,const char * s)288 static char * s_convert_to_utf8 (const wvParseStruct *ps, const char *s)
289 {
290 // strangely wv seems to return an UTF-8 string despite a specified codepage
291 // so we must ensure it is UTF-8. This is time consuming. :-(
292 // If it is UTF-8 we just g_strdup() it.
293 // See bug 13229.
294 if (s == NULL)
295 return NULL;
296 if(g_utf8_validate(s, -1, NULL)) {
297 return g_strdup(s);
298 }
299 const char * encoding = NULL;
300 char fallback = '?';
301 encoding = wvLIDToCodePageConverter(ps->fib.lid);
302 return g_convert_with_fallback(s, -1, "UTF-8", encoding, &fallback, NULL, NULL, NULL);
303 }
304
305 //
306 // DOC uses an unsigned int color index
307 //
308 typedef UT_uint32 Doc_Color_t;
309
310 //
311 // A mapping between Word's colors and Abi's RGB color scheme;
312 // if you add colors, _make sure_ to increase the '16' in
313 // sMapIcoToColor() below
314 //
315 static Doc_Color_t word_colors [][3] = {
316 {0x00, 0x00, 0x00}, /* black */
317 {0x00, 0x00, 0xff}, /* blue */
318 {0x00, 0xff, 0xff}, /* cyan */
319 {0x00, 0xff, 0x00}, /* green */
320 {0xff, 0x00, 0xff}, /* magenta */
321 {0xff, 0x00, 0x00}, /* red */
322 {0xff, 0xff, 0x00}, /* yellow */
323 {0xff, 0xff, 0xff}, /* white */
324 {0x00, 0x00, 0x80}, /* dark blue */
325 {0x00, 0x80, 0x80}, /* dark cyan */
326 {0x00, 0x80, 0x00}, /* dark green */
327 {0x80, 0x00, 0x80}, /* dark magenta */
328 {0x80, 0x00, 0x00}, /* dark red */
329 {0x80, 0x80, 0x00}, /* dark yellow */
330 {0x80, 0x80, 0x80}, /* dark gray */
331 {0xc0, 0xc0, 0xc0}, /* light gray */
332 };
333
sMapIcoToColor(UT_uint16 ico,bool bForeground)334 static UT_String sMapIcoToColor (UT_uint16 ico, bool bForeground)
335 {
336 // need to handle the automatic colour 0; see bug 10261 for bounds-check
337 if((!ico && bForeground) || (ico > 16))
338 {
339 ico = 1; //black
340 }
341 else if(!ico && !bForeground)
342 {
343 ico = 8; //white
344 }
345
346 return UT_String_sprintf("%02x%02x%02x",
347 word_colors[ico-1][0],
348 word_colors[ico-1][1],
349 word_colors[ico-1][2]);
350 }
351
352 //
353 // Field Ids that are useful later for mapping
354 //
355 typedef enum {
356 F_TIME,
357 F_DATE,
358 F_EDITTIME,
359 F_AUTHOR,
360 F_PAGE,
361 F_NUMCHARS,
362 F_NUMPAGES,
363 F_NUMWORDS,
364 F_FILENAME,
365 F_HYPERLINK,
366 F_PAGEREF,
367 F_EMBED,
368 F_TOC,
369 F_DateTimePicture,
370 F_TOC_FROM_RANGE,
371 F_DATEINAME,
372 F_SPEICHERDAT,
373 F_MERGEFIELD,
374 F_OTHER
375 } Doc_Field_t;
376
377 struct field
378 {
379 UT_UCS2Char command [FLD_SIZE];
380 UT_UCS2Char argument [FLD_SIZE];
381 UT_UCS2Char *fieldWhich;
382 UT_sint32 fieldI;
383 char * fieldC;
384 UT_sint32 fieldRet;
385 Doc_Field_t type;
386 };
387
388
389 //
390 // A mapping between DOC's field names and our given IDs
391 //
392 typedef struct
393 {
394 const char * m_name;
395 Doc_Field_t m_id;
396 } Doc_Field_Mapping_t;
397
398 /*
399 * This next bit of code enables us to import many of Word's fields
400 */
401
402 static Doc_Field_Mapping_t s_Tokens[] =
403 {
404 {"TIME", F_TIME},
405 {"EDITTIME", F_EDITTIME},
406 {"DATE", F_DATE},
407 {"date", F_DATE},
408 {"DATEINAME", F_DATE}, // F_DATEINAME
409 {"SPEICHERDAT", F_DATE}, // F_SPEICHERDAT
410 {"\\@", F_DateTimePicture},
411
412 {"FILENAME", F_FILENAME},
413 {"\\filename", F_FILENAME},
414 {"PAGE", F_PAGE},
415 {"\\*Arabisch",F_PAGE},
416 {"NUMCHARS", F_NUMCHARS},
417 {"NUMPAGES", F_NUMPAGES},
418 {"NUMWORDS", F_NUMWORDS},
419 {"MERGEFIELD", F_MERGEFIELD},
420 // these below aren't handled by AbiWord, but they're known about
421 {"HYPERLINK", F_HYPERLINK},
422 {"PAGEREF", F_PAGEREF},
423 {"EMBED", F_EMBED},
424 {"TOC", F_TOC},
425 {"\\o", F_TOC_FROM_RANGE},
426 {"AUTHOR", F_AUTHOR},
427
428 { "*", F_OTHER}
429 };
430
431 #define FieldMappingSize (sizeof(s_Tokens)/sizeof(s_Tokens[0]))
432
433 static Doc_Field_t
s_mapNameToField(const char * name)434 s_mapNameToField (const char * name)
435 {
436 for (unsigned int k = 0; k < FieldMappingSize; k++)
437 {
438 // field names can be sometimes in lower-case
439 if (!g_ascii_strcasecmp(s_Tokens[k].m_name,name))
440 return s_Tokens[k].m_id;
441 }
442 return F_OTHER;
443 }
444
445 #undef FieldMappingSize
446
447 static const char *
s_mapPageIdToString(UT_uint16 id)448 s_mapPageIdToString (UT_uint16 id)
449 {
450 // TODO: make me way better when we determine code names
451
452 switch (id)
453 {
454 case 0:
455 case 1:
456 return "Letter";
457 case 5: return "Legal";
458 case 7: return NULL; //"Executive";
459 case 9: return "A4";
460 case 11: return "A5";
461 case 13: return "Folio";
462 case 14: return NULL; // in Word this is "B5" but the size
463 // does not correspond to AW's B5
464 case 20: return "Envelope No10";
465 case 27: return "DL Envelope";
466 case 28: return "C5";
467 case 34: return "B5"; // in Word this is B5 Envelope ...
468 case 37: return NULL; //"Monarch Envelope";
469
470 case 0xffff:
471 // this is a value that wv uses to indicate that page size
472 // is customised, just return NULL
473 return NULL;
474
475 default:
476 UT_DEBUGMSG(("Unknow page size: please submit this document with a bug report\n"));
477 UT_ASSERT_HARMLESS( 0 );
478 return 0;
479 }
480 }
481
482 /*!
483 Surprise, surprise, there are more list numerical formats than the 5 the
484 MS documentation states happens to mention, so here I will put what I found
485 out (later we will move it to some better place)
486 */
487 typedef enum
488 {
489 WLNF_INVALID = -1,
490 WLNF_EUROPEAN_ARABIC = 0,
491 WLNF_UPPER_ROMAN = 1,
492 WLNF_LOWER_ROMAN = 2,
493 WLNF_UPPER_LETTER = 3,
494 WLNF_LOWER_LETTER = 4,
495 WLNF_ORDINAL = 5,
496 WLNF_BULLETS = 23,
497 WLNF_HEBREW_NUMBERS = 45
498 } MSWordListIdType;
499
500 typedef struct{
501 UT_uint32 listId;
502 UT_uint32 level;
503 } ListIdLevelPair;
504
505 /*!
506 * Map msword list enums back to abi's
507 */
508 static const char *
s_mapDocToAbiListId(MSWordListIdType id)509 s_mapDocToAbiListId (MSWordListIdType id)
510 {
511 switch (id)
512 {
513 case WLNF_UPPER_ROMAN: // upper roman
514 return "4";
515
516 case WLNF_LOWER_ROMAN: // lower roman
517 return "3";
518
519 case WLNF_UPPER_LETTER: // upper letter
520 return "2";
521
522 case WLNF_LOWER_LETTER: // lower letter
523 return "1";
524
525 case WLNF_BULLETS: // bullet list
526 return "5";
527
528 case WLNF_HEBREW_NUMBERS:
529 return "129";
530
531 case WLNF_EUROPEAN_ARABIC:
532 case WLNF_ORDINAL: // ordinal
533 default:
534 return "0";
535 }
536 }
537
538 /*!
539 * form AW list deliminator string
540 */
s_mapDocToAbiListDelim(UT_uint16 * pStr,UT_uint32 iLen,UT_UTF8String & sDelim)541 static void s_mapDocToAbiListDelim (UT_uint16 * pStr, UT_uint32 iLen, UT_UTF8String &sDelim)
542 {
543 // the Word format string looks like this
544 // prefix '\0' suffix
545 // and the '\0' represents the location of the list number/bullet
546 UT_uint16 * pPfx = NULL;
547 UT_uint16 * pSfx = NULL;
548
549 if(iLen && *pStr)
550 pPfx = pStr;
551
552 UT_sint32 i;
553 for(i = 0; i < (UT_sint32)iLen - 1; i++)
554 {
555 if(pStr[i] == 0)
556 {
557 pSfx = pStr + i + 1;
558 break;
559 }
560 }
561
562 UT_UTF8String sUtf8Pfx;
563 UT_UTF8String sUtf8Sfx;
564
565 i= 0;
566 while(pPfx && *pPfx && i < (UT_sint32)iLen)
567 {
568 UT_UCS4Char c = *pPfx;
569 sUtf8Pfx.appendUCS4(&c,1);
570 i++;
571 pPfx++;
572 }
573
574 i++; // move past the '\0' divider
575 while(pSfx && *pSfx && i < (UT_sint32)iLen)
576 {
577 UT_UCS4Char c = *pSfx;
578 sUtf8Sfx.appendUCS4(&c,1);
579 i++;
580 pSfx++;
581 }
582
583 sDelim = sUtf8Pfx;
584 sDelim += "%L";
585 sDelim += sUtf8Sfx;
586 }
587
588 /*!
589 * Map msword list enums back to abi's list styles
590 */
591 static const char *
s_mapDocToAbiListStyle(MSWordListIdType id)592 s_mapDocToAbiListStyle (MSWordListIdType id)
593 {
594 switch (id)
595 {
596 case WLNF_UPPER_ROMAN: // upper roman
597 return "Upper Roman List";
598
599 case WLNF_LOWER_ROMAN: // lower roman
600 return "Lower Roman List";
601
602 case WLNF_UPPER_LETTER: // upper letter
603 return "Upper Case List";
604
605 case WLNF_LOWER_LETTER: // lower letter
606 return "Lower Case List";
607
608 case WLNF_BULLETS: // bullet list
609 return "Bullet List";
610
611 case WLNF_EUROPEAN_ARABIC:
612 case WLNF_ORDINAL: // ordinal
613 default:
614 return "Numbered List";
615 }
616 }
617
618 /*!
619 * Map msword list enums back to abi's field font for that given style
620 */
621 static const char *
s_fieldFontForListStyle(MSWordListIdType id)622 s_fieldFontForListStyle (MSWordListIdType id)
623 {
624 switch (id)
625 {
626 case WLNF_UPPER_ROMAN: // upper roman
627 return "NULL";
628
629 case WLNF_LOWER_ROMAN: // lower roman
630 return "NULL";
631
632 case WLNF_UPPER_LETTER: // upper letter
633 return "Times New Roman";
634
635 case WLNF_LOWER_LETTER: // lower letter
636 return "Times New Roman";
637
638 case WLNF_BULLETS: // bullet list
639 UT_DEBUGMSG(("Fieldfont set to symbol \n"));
640 return "NULL";
641
642 case WLNF_EUROPEAN_ARABIC:
643 case WLNF_ORDINAL: // ordinal
644 return "Times New Roman";
645
646 default:
647 UT_DEBUGMSG(("unknown list type %d field-font set to Times New Roman \n",id));
648 return "Times New Roman";
649 }
650 }
651
652 #if 0
653
654 // MS Word uses the langauge codes as explicit overrides when treating
655 // weak characters; this function translates language id to the
656 // overrided direction
657 static bool s_isLanguageRTL(short unsigned int lid)
658 {
659 const char * s = wvLIDToLangConverter (lid);
660 UT_Language l;
661 return (UTLANG_RTL == l.getOrderFromProperty(s));
662 }
663
664 static FootnoteType s_convertNoteType(UT_uint32 t)
665 {
666 return FOOTNOTE_TYPE_NUMERIC;
667 }
668
669 #endif
670
671 /****************************************************************************/
672 /****************************************************************************/
673
IE_Imp_MsWord_97_Sniffer()674 IE_Imp_MsWord_97_Sniffer::IE_Imp_MsWord_97_Sniffer ()
675 : IE_ImpSniffer(IE_IMPEXPNAME_MSWORD97)
676 {
677 //
678 }
679
680 // supported suffixes
681 static IE_SuffixConfidence IE_Imp_MsWord_97_Sniffer__SuffixConfidence[] = {
682 { "doc", UT_CONFIDENCE_PERFECT },
683 { "dot", UT_CONFIDENCE_PERFECT },
684 { "", UT_CONFIDENCE_ZILCH }
685 };
686
getSuffixConfidence()687 const IE_SuffixConfidence * IE_Imp_MsWord_97_Sniffer::getSuffixConfidence ()
688 {
689 return IE_Imp_MsWord_97_Sniffer__SuffixConfidence;
690 }
691
692 // supported mimetypes
693 static IE_MimeConfidence IE_Imp_MsWord_97_Sniffer__MimeConfidence[] = {
694 { IE_MIME_MATCH_FULL, IE_MIMETYPE_MSWord, UT_CONFIDENCE_GOOD },
695 { IE_MIME_MATCH_FULL, "application/vnd.ms-word", UT_CONFIDENCE_GOOD },
696 { IE_MIME_MATCH_FULL, "text/doc", UT_CONFIDENCE_GOOD }, // or is it? [TODO: check!]
697 { IE_MIME_MATCH_BOGUS, "", UT_CONFIDENCE_ZILCH }
698 };
699
getMimeConfidence()700 const IE_MimeConfidence * IE_Imp_MsWord_97_Sniffer::getMimeConfidence ()
701 {
702 return IE_Imp_MsWord_97_Sniffer__MimeConfidence;
703 }
704
recognizeContents(GsfInput * input)705 UT_Confidence_t IE_Imp_MsWord_97_Sniffer::recognizeContents (GsfInput * input)
706 {
707 GsfInfile * ole;
708
709 ole = gsf_infile_msole_new (input, NULL);
710
711 // invokes the old recognizeContents below, in hopes of identifying
712 // pre-OLE files
713 if (!ole)
714 return IE_ImpSniffer::recognizeContents (input);
715
716 UT_Confidence_t confidence = UT_CONFIDENCE_ZILCH;
717 GsfInput * stream = gsf_infile_child_by_name (ole, "WordDocument");
718 if (stream)
719 {
720 g_object_unref (G_OBJECT (stream));
721 confidence = UT_CONFIDENCE_PERFECT;
722 }
723
724 g_object_unref (G_OBJECT (ole));
725
726 return confidence;
727 }
728
recognizeContents(const char * szBuf,UT_uint32 iNumbytes)729 UT_Confidence_t IE_Imp_MsWord_97_Sniffer::recognizeContents (const char * szBuf,
730 UT_uint32 iNumbytes)
731 {
732 const char * magic = 0;
733 int magicoffset = 0;
734
735 magic = "Microsoft Word 6.0 Document";
736 magicoffset = 2080;
737 if (iNumbytes > (magicoffset + strlen (magic)))
738 {
739 if (!strncmp (szBuf + magicoffset, magic, strlen (magic)))
740 {
741 return UT_CONFIDENCE_PERFECT;
742 }
743 }
744
745 magic = "Documento Microsoft Word 6";
746 magicoffset = 2080;
747 if (iNumbytes > (magicoffset + strlen (magic)))
748 {
749 if (!strncmp(szBuf + magicoffset, magic, strlen (magic)))
750 {
751 return UT_CONFIDENCE_PERFECT;
752 }
753 }
754
755 magic = "MSWordDoc";
756 magicoffset = 2112;
757 if (iNumbytes > (magicoffset + strlen (magic)))
758 {
759 if (!strncmp (szBuf + magicoffset, magic, strlen (magic)))
760 {
761 return UT_CONFIDENCE_PERFECT;
762 }
763 }
764
765 // ok, that didn't work, we'll try to dig through the OLE stream
766 if (iNumbytes > 8)
767 {
768 // this code is too generic - also picks up .wri documents
769 if (szBuf[0] == static_cast<char>(0x31)
770 && static_cast< unsigned char>(szBuf[1]) == static_cast< unsigned char>(0xbe)
771 && szBuf[2] == static_cast<char>(0)
772 && szBuf[3] == static_cast<char>(0))
773 {
774 return UT_CONFIDENCE_SOSO; //POOR
775 }
776
777 // this identifies staroffice dox as well
778 if (static_cast< unsigned char>(szBuf[0]) == static_cast<unsigned char>(0xd0)
779 && static_cast< unsigned char>(szBuf[1]) == static_cast<unsigned char>(0xcf)
780 && szBuf[2] == static_cast<char>(0x11)
781 && static_cast< unsigned char>(szBuf[3]) == static_cast<unsigned char>(0xe0)
782 && static_cast< unsigned char>(szBuf[4]) == static_cast<unsigned char>(0xa1)
783 && static_cast< unsigned char>(szBuf[5]) == static_cast<unsigned char>(0xb1)
784 && szBuf[6] == static_cast<char>(0x1a)
785 && static_cast< unsigned char>(szBuf[7]) == static_cast<unsigned char>(0xe1))
786 {
787 return UT_CONFIDENCE_SOSO; // POOR
788 }
789
790 if (szBuf[0] == 'P' && szBuf[1] == 'O' &&
791 szBuf[2] == '^' && szBuf[3] == 'Q' && szBuf[4] == '`')
792 {
793 return UT_CONFIDENCE_POOR;
794 }
795 if (static_cast< unsigned char>(szBuf[0]) == static_cast<unsigned char>(0xfe)
796 && szBuf[1] == static_cast<char>(0x37)
797 && szBuf[2] == static_cast<char>(0)
798 && szBuf[3] == static_cast<char>(0x23))
799 {
800 return UT_CONFIDENCE_POOR;
801 }
802
803 /* WinWord 2 */
804 if (static_cast< unsigned char>(szBuf[0]) == static_cast<unsigned char>(0xdb)
805 && static_cast< unsigned char>(szBuf[1]) == static_cast<unsigned char>(0xa5)
806 && szBuf[2] == static_cast<char>(0x2d)
807 && szBuf[3] == static_cast<char>(0))
808 {
809 return UT_CONFIDENCE_PERFECT;
810 }
811 }
812 return UT_CONFIDENCE_ZILCH;
813 }
814
constructImporter(PD_Document * pDocument,IE_Imp ** ppie)815 UT_Error IE_Imp_MsWord_97_Sniffer::constructImporter (PD_Document * pDocument,
816 IE_Imp ** ppie)
817 {
818 IE_Imp_MsWord_97 * p = new IE_Imp_MsWord_97(pDocument);
819 *ppie = p;
820 return UT_OK;
821 }
822
getDlgLabels(const char ** pszDesc,const char ** pszSuffixList,IEFileType * ft)823 bool IE_Imp_MsWord_97_Sniffer::getDlgLabels (const char ** pszDesc,
824 const char ** pszSuffixList,
825 IEFileType * ft)
826 {
827 *pszDesc = "Microsoft Word (.doc, .dot)";
828 *pszSuffixList = "*.doc; *.dot";
829 *ft = getFileType();
830 return true;
831 }
832
833 /****************************************************************************/
834 /****************************************************************************/
835
836 // just buffer sizes, arbitrarily chosen
837 #define DOC_TEXTRUN_SIZE 2048
838 #define DOC_PROPBUFFER_SIZE 1024
839
~IE_Imp_MsWord_97()840 IE_Imp_MsWord_97::~IE_Imp_MsWord_97()
841 {
842 if(m_pBookmarks)
843 {
844 // g_free the names from the bookmarks
845 for(UT_uint32 i = 0; i < m_iBookmarksCount; i++)
846 {
847 // make sure we do not delete any name twice
848 if(m_pBookmarks[i].name && m_pBookmarks[i].start)
849 {
850 delete[] m_pBookmarks[i].name;
851 m_pBookmarks[i].name = NULL;
852 }
853 }
854 delete [] m_pBookmarks;
855 }
856
857 UT_VECTOR_PURGEALL(ListIdLevelPair *, m_vLists);
858 UT_VECTOR_PURGEALL(emObject *, m_vecEmObjects);
859 UT_VECTOR_PURGEALL(textboxPos *, m_vecTextboxPos);
860
861 DELETEPV(m_pTextboxes);
862 DELETEPV(m_pFootnotes);
863 DELETEPV(m_pEndnotes);
864 DELETEPV(m_pHeaders);
865 }
866
IE_Imp_MsWord_97(PD_Document * pDocument)867 IE_Imp_MsWord_97::IE_Imp_MsWord_97(PD_Document * pDocument)
868 : IE_Imp (pDocument),
869 m_nSections(0),
870 m_bSetPageSize(false),
871 m_bIsLower(false),
872 m_bInSect(false),
873 m_bInPara(false),
874 m_bLTRCharContext(true),
875 m_bLTRParaContext(true),
876 m_iOverrideIssued(UT_BIDI_UNSET),
877 m_bBidiMode(false),
878 m_bInLink(false),
879 m_pBookmarks(NULL),
880 m_iBookmarksCount(0),
881 m_pFootnotes(NULL),
882 m_iFootnotesCount(0),
883 m_pEndnotes(NULL),
884 m_iEndnotesCount(0),
885 m_pTextboxes(NULL),
886 m_iTextboxCount(0),
887 m_iMSWordListId(0),
888 m_bEncounteredRevision(false),
889 m_bInTable(false),
890 m_iRowsRemaining(0),
891 m_iCellsRemaining(0),
892 m_iCurrentRow(0),
893 m_iCurrentCell(0),
894 m_bRowOpen(false),
895 m_bCellOpen(false),
896 m_iFootnotesStart(0xffffffff),
897 m_iFootnotesEnd(0xffffffff),
898 m_iEndnotesStart(0xffffffff),
899 m_iEndnotesEnd(0xffffffff),
900 m_iNextFNote(0),
901 m_iNextENote(0),
902 m_bInFNotes(false),
903 m_bInENotes(false),
904 m_pNotesEndSection(NULL),
905 m_pHeaders(NULL),
906 m_iHeadersCount(0),
907 m_iHeadersStart(0xffffffff),
908 m_iHeadersEnd(0xffffffff),
909 m_iCurrentHeader(0),
910 m_bInHeaders(false),
911 m_iCurrentSectId(0),
912 m_iAnnotationsStart(0xffffffff),
913 m_iAnnotationsEnd(0xffffffff),
914 m_iMacrosStart(0xffffffff),
915 m_iMacrosEnd(0xffffffff),
916 m_iTextStart(0xffffffff),
917 m_iTextEnd(0xffffffff),
918 m_bPageBreakPending(false),
919 m_bLineBreakPending(false),
920 m_bSymbolFont(false),
921 m_dim(DIM_IN),
922 m_iLeft(0),
923 m_iRight(0),
924 m_iTextboxesStart(0xffffffff),
925 m_iTextboxesEnd(0xffffffff),
926 m_iNextTextbox(0),
927 m_iPrevHeaderPosition(0xffffffff),
928 m_bEvenOddHeaders(false),
929 m_bInTOC(false),
930 m_bTOCsupported(false),
931 m_bInTextboxes(false),
932 m_pTextboxEndSection(NULL),
933 m_iLeftCellPos(0),
934 m_iLastAppendedHeader(0xffffffff)
935 {
936 for(UT_uint32 i = 0; i < 9; i++)
937 m_iListIdIncrement[i] = 0;
938 m_vecTextboxPos.clear();
939 }
940
941 /****************************************************************************/
942 /****************************************************************************/
943
944 #define ErrCleanupAndExit(code) do {wvOLEFree (&ps); return(code);} while(0)
945
946 #define GetPassword() _getPassword ( XAP_App::getApp()->getLastFocussedFrame() )
947
948 #define ErrorMessage(x) do { XAP_Frame *_pFrame = XAP_App::getApp()->getLastFocussedFrame(); if ( _pFrame ) _errorMessage (_pFrame, (x)); } while (0)
949
_getPassword(XAP_Frame * pFrame)950 static UT_UTF8String _getPassword (XAP_Frame * pFrame)
951 {
952 UT_UTF8String password ( "" );
953
954 if ( pFrame )
955 {
956 pFrame->raise ();
957
958 XAP_DialogFactory * pDialogFactory
959 = (XAP_DialogFactory *)(pFrame->getDialogFactory());
960
961 XAP_Dialog_Password * pDlg = static_cast<XAP_Dialog_Password*>(pDialogFactory->requestDialog(XAP_DIALOG_ID_PASSWORD));
962 UT_return_val_if_fail(pDlg, password);
963
964 pDlg->runModal (pFrame);
965
966 XAP_Dialog_Password::tAnswer ans = pDlg->getAnswer();
967 bool bOK = (ans == XAP_Dialog_Password::a_OK);
968
969 if (bOK)
970 password = pDlg->getPassword ();
971
972 pDialogFactory->releaseDialog(pDlg);
973 }
974
975 return password;
976 }
977
978 #if 0
979 static void _errorMessage (XAP_Frame * pFrame, int id)
980 {
981 UT_return_if_fail(pFrame);
982
983 const XAP_StringSet * pSS = XAP_App::getApp ()->getStringSet ();
984
985 const char * text = pSS->getValue (id, pFrame->getApp()->getDefaultEncoding()).c_str();
986
987 pFrame->showMessageBox (text, XAP_Dialog_MessageBox::b_O,
988 XAP_Dialog_MessageBox::a_OK);
989 }
990 #endif
991
992 static const struct {
993 const char * metadata_key;
994 const char * abi_metadata_name;
995 } metadata_names[] = {
996 { GSF_META_NAME_TITLE, PD_META_KEY_TITLE },
997 { GSF_META_NAME_DESCRIPTION, PD_META_KEY_DESCRIPTION },
998 { GSF_META_NAME_SUBJECT, PD_META_KEY_SUBJECT },
999 { GSF_META_NAME_DATE_MODIFIED, PD_META_KEY_DATE_LAST_CHANGED },
1000 { GSF_META_NAME_DATE_CREATED, PD_META_KEY_DATE },
1001 { GSF_META_NAME_KEYWORDS, PD_META_KEY_KEYWORDS },
1002 { GSF_META_NAME_LANGUAGE, PD_META_KEY_LANGUAGE },
1003 { GSF_META_NAME_REVISION_COUNT, NULL },
1004 { GSF_META_NAME_EDITING_DURATION, NULL },
1005 { GSF_META_NAME_TABLE_COUNT, NULL },
1006 { GSF_META_NAME_IMAGE_COUNT, NULL },
1007 { GSF_META_NAME_OBJECT_COUNT, NULL },
1008 { GSF_META_NAME_PAGE_COUNT, NULL },
1009 { GSF_META_NAME_PARAGRAPH_COUNT, NULL },
1010 { GSF_META_NAME_WORD_COUNT, NULL },
1011 { GSF_META_NAME_CHARACTER_COUNT, NULL },
1012 { GSF_META_NAME_CELL_COUNT, NULL },
1013 { GSF_META_NAME_SPREADSHEET_COUNT, NULL },
1014 { GSF_META_NAME_CREATOR, PD_META_KEY_CREATOR },
1015 { GSF_META_NAME_TEMPLATE, NULL },
1016 { GSF_META_NAME_LAST_SAVED_BY, NULL },
1017 { GSF_META_NAME_LAST_PRINTED, NULL },
1018 { GSF_META_NAME_SECURITY, NULL },
1019 { GSF_META_NAME_CATEGORY, NULL },
1020 { GSF_META_NAME_PRESENTATION_FORMAT, NULL },
1021 { GSF_META_NAME_THUMBNAIL, NULL },
1022 { GSF_META_NAME_GENERATOR, PD_META_KEY_GENERATOR },
1023 { GSF_META_NAME_LINE_COUNT, NULL },
1024 { GSF_META_NAME_SLIDE_COUNT, NULL },
1025 { GSF_META_NAME_NOTE_COUNT, NULL },
1026 { GSF_META_NAME_HIDDEN_SLIDE_COUNT, NULL },
1027 { GSF_META_NAME_MM_CLIP_COUNT, NULL },
1028 { GSF_META_NAME_BYTE_COUNT, NULL },
1029 { GSF_META_NAME_SCALE, NULL },
1030 { GSF_META_NAME_HEADING_PAIRS, NULL },
1031 { GSF_META_NAME_DOCUMENT_PARTS, NULL },
1032 { GSF_META_NAME_MANAGER, PD_META_KEY_CONTRIBUTOR },
1033 { GSF_META_NAME_COMPANY, PD_META_KEY_PUBLISHER },
1034 { GSF_META_NAME_LINKS_DIRTY, NULL },
1035 { GSF_META_NAME_MSOLE_UNKNOWN_17, NULL },
1036 { GSF_META_NAME_MSOLE_UNKNOWN_18, NULL },
1037 { GSF_META_NAME_MSOLE_UNKNOWN_19, NULL },
1038 { GSF_META_NAME_MSOLE_UNKNOWN_20, NULL },
1039 { GSF_META_NAME_MSOLE_UNKNOWN_21, NULL },
1040 { GSF_META_NAME_MSOLE_UNKNOWN_22, NULL },
1041 { GSF_META_NAME_MSOLE_UNKNOWN_23, NULL },
1042 { GSF_META_NAME_DICTIONARY, NULL },
1043 { GSF_META_NAME_LOCALE_SYSTEM_DEFAULT, NULL },
1044 { GSF_META_NAME_CASE_SENSITIVE, NULL }
1045 };
1046 static const gsize nr_metadata_names = G_N_ELEMENTS(metadata_names);
1047
1048 struct DocAndLid
1049 {
1050 PD_Document *doc;
1051 int lid;
1052 };
1053
1054 static void
cb_print_property(char const * name,GsfDocProp const * prop,DocAndLid * doc)1055 cb_print_property (char const *name, GsfDocProp const *prop, DocAndLid * doc)
1056 {
1057 GValue const *val = gsf_doc_prop_get_val (prop);
1058
1059 if (! VAL_IS_GSF_DOCPROP_VECTOR ((GValue *)val)) {
1060
1061 // just scan over the table. consider optimizing if we really care to.
1062 for(gsize i = 0; i < nr_metadata_names; i++) {
1063 if(strcmp(metadata_names[i].metadata_key, name) == 0) {
1064 char const * abi_metadata_name = metadata_names[i].abi_metadata_name;
1065
1066 if(abi_metadata_name != NULL) {
1067 const char * encoding = NULL;
1068 if (doc->lid >> 8 != 0x04) {
1069 // header is not utf8 encoded
1070 encoding = wvLIDToCodePageConverter(doc->lid);
1071 }
1072 char *tmp;
1073
1074 if (G_VALUE_HOLDS(val, G_TYPE_STRING))
1075 {
1076 // special-case strings. it seems that g_value_get_string()
1077 // and g_strdup_value_contents() may return different things
1078 // check with document from bug 11148
1079 const char * contents = g_value_get_string(val);
1080
1081 if (encoding && *encoding)
1082 {
1083 tmp = g_convert_with_fallback(contents, -1, (gchar*)"UTF-8", encoding, (gchar*)"?", NULL, NULL, NULL);
1084 }
1085 else
1086 {
1087 tmp = g_strdup(contents);
1088 }
1089
1090 }
1091 else
1092 {
1093 // coerce into a string
1094 tmp = g_strdup_value_contents(val);
1095 }
1096
1097 char * meta = tmp;
1098 // strip beginning and ending quotes
1099 if(meta && strcmp(meta,"\"\"")) { // ignore '""' props
1100 if(meta[0] == '"')
1101 meta++;
1102 int len = strlen(meta);
1103 if ((len > 0) && meta[len - 1] == '"') {
1104 meta[len - 1] = '\0';
1105 }
1106 if (*meta) {
1107 doc->doc->setMetaDataProp(abi_metadata_name, meta);
1108 }
1109 }
1110 g_free (tmp);
1111 }
1112 }
1113 }
1114 }
1115 }
1116
print_summary_stream(GsfInfile * msole,const char * stream_name,int lid,PD_Document * doc)1117 static void print_summary_stream (GsfInfile * msole,
1118 const char * stream_name,
1119 int lid,
1120 PD_Document * doc)
1121 {
1122 GsfInput * stream = gsf_infile_child_by_name (msole, stream_name);
1123 if (stream != NULL) {
1124 GsfDocMetaData *meta_data = gsf_doc_meta_data_new ();
1125 GError *err = NULL;
1126
1127 err = gsf_msole_metadata_read (stream, meta_data);
1128 if (err != NULL) {
1129 g_warning ("Error getting metadata for %s: %s", stream_name, err->message);
1130 g_error_free (err);
1131 err = NULL;
1132 } else {
1133 DocAndLid dil;
1134
1135 dil.doc = doc;
1136 dil.lid = lid;
1137 gsf_doc_meta_data_foreach (meta_data,
1138 (GHFunc) cb_print_property, &dil);
1139 }
1140
1141 g_object_unref (meta_data);
1142 g_object_unref (G_OBJECT (stream));
1143 }
1144 }
1145
_handleMetaData(wvParseStruct * ps)1146 void IE_Imp_MsWord_97::_handleMetaData(wvParseStruct *ps)
1147 {
1148 print_summary_stream (GSF_INFILE(ps->ole_file), "\05SummaryInformation", ps->fib.lid, getDoc());
1149 print_summary_stream (GSF_INFILE(ps->ole_file), "\05DocumentSummaryInformation", ps->fib.lid, getDoc());
1150 }
1151
_loadFile(GsfInput * fp)1152 UT_Error IE_Imp_MsWord_97::_loadFile(GsfInput * fp)
1153 {
1154 wvParseStruct ps;
1155
1156 int ret = wvInitParser_gsf(&ps, fp);
1157 const char * password = NULL;
1158
1159 if (ret & 0x8000) /* Password protected? */
1160 {
1161 UT_UTF8String pass (GetPassword());
1162 if ( pass.size () != 0 )
1163 password = pass.utf8_str();
1164
1165 if ((ret & 0x7fff) == WORD8)
1166 {
1167 ret = 0;
1168 if (password == NULL)
1169 {
1170 //ErrorMessage(AP_STRING_ID_WORD_PassRequired);
1171 ErrCleanupAndExit(UT_IE_PROTECTED);
1172 }
1173 else
1174 {
1175 wvSetPassword (password, &ps);
1176 if (wvDecrypt97 (&ps))
1177 {
1178 //ErrorMessage(AP_STRING_ID_WORD_PassInvalid);
1179 ErrCleanupAndExit(UT_IE_PROTECTED);
1180 }
1181 }
1182 }
1183 else if (((ret & 0x7fff) == WORD7) || ((ret & 0x7fff) == WORD6))
1184 {
1185 ret = 0;
1186 if (password == NULL)
1187 {
1188 //ErrorMessage(AP_STRING_ID_WORD_PassRequired);
1189 ErrCleanupAndExit(UT_IE_PROTECTED);
1190 }
1191 else
1192 {
1193 wvSetPassword (password, &ps);
1194 if (wvDecrypt95 (&ps))
1195 {
1196 //("Incorrect Password\n"));
1197 ErrCleanupAndExit(UT_IE_PROTECTED);
1198 }
1199 }
1200 }
1201 }
1202
1203 if (ret) {
1204 ErrCleanupAndExit(UT_IE_BOGUSDOCUMENT);
1205 }
1206
1207 // register ourself as the userData
1208 ps.userData = this;
1209
1210 // register callbacks
1211 wvSetElementHandler (&ps, eleProc);
1212 wvSetCharHandler (&ps, charProc);
1213 wvSetSpecialCharHandler(&ps, specCharProc);
1214 wvSetDocumentHandler (&ps, docProc);
1215
1216 // need to init doc props
1217 if(!getLoadStylesOnly())
1218 getDoc()->setAttrProp(NULL);
1219
1220 _handleMetaData(&ps);
1221 wvText(&ps);
1222
1223 if(getLoadStylesOnly()) {
1224 wvOLEFree(&ps);
1225 return UT_OK;
1226 }
1227
1228 wvOLEFree(&ps);
1229
1230 // We can't be in a good state if we didn't add any sections!
1231 if (m_nSections == 0)
1232 return UT_IE_BOGUSDOCUMENT;
1233
1234 return UT_OK;
1235 }
1236
_flush()1237 void IE_Imp_MsWord_97::_flush ()
1238 {
1239 if(!m_pTextRun.size())
1240 return;
1241
1242 // we've got to ensure that we're inside of a section & paragraph
1243 if (!m_bInSect)
1244 {
1245 // append a blank default section - assume it works
1246 UT_DEBUGMSG(("#TF: _flush: appending default section\n"));
1247 _appendStrux(PTX_Section, NULL);
1248 m_bInSect = true;
1249 m_nSections++;
1250 }
1251
1252 pf_Frag * pF = getDoc()->getLastFrag();
1253 if (pF && pF->getType() == pf_Frag::PFT_Strux) {
1254 pf_Frag_Strux * pFS = (pf_Frag_Strux*)pF;
1255 if ((pFS->getStruxType() != PTX_Block) && (pFS->getStruxType() != PTX_EndFootnote) && (pFS->getStruxType() != PTX_EndEndnote))
1256 m_bInPara = false;
1257 }
1258
1259 if(!m_bInPara)
1260 {
1261 // append a blank defaul paragraph - assume it works
1262 UT_DEBUGMSG(("#TF: _flush: appending default block\n"));
1263 _appendStrux(PTX_Block, NULL);
1264 m_bInPara = true;
1265 emObject * pObject = NULL;
1266 if(m_vecEmObjects.getItemCount() > 0)
1267 {
1268 UT_sint32 i =0;
1269 for(i=0;i< m_vecEmObjects.getItemCount(); i++)
1270 {
1271 pObject = m_vecEmObjects.getNthItem(i);
1272 const gchar* propsArray[5];
1273 if(pObject->objType == PTO_Bookmark)
1274 {
1275 propsArray[0] = static_cast<const gchar *>("name");
1276 propsArray[1] = static_cast<const gchar *>(pObject->props1.c_str());
1277 propsArray[2] = static_cast<const gchar *>("type");
1278 propsArray[3] = static_cast<const gchar *>(pObject->props2.c_str());
1279 propsArray[4] = static_cast<const gchar *>(NULL);
1280 _appendObject (PTO_Bookmark, propsArray);
1281 }
1282 else
1283 {
1284 UT_DEBUGMSG(("MSWord 97 _flush: Object not handled \n"));
1285 UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
1286 }
1287 delete pObject;
1288 }
1289 m_vecEmObjects.clear();
1290 }
1291 }
1292
1293 if (m_pTextRun.size())
1294 {
1295 // bidi adjustments for neutrals
1296 //
1297 // We have a problem in bidi documents caused by the fact that
1298 // Word does not use the Unicode bidi algorithm, but rather one of
1299 // its own, which adds keyboard language to the equation. We get
1300 // around this by issuing an explicit direction override on the
1301 // neutral characters. We do it here in the _flush() function
1302 // because when we have both left and right context available
1303 // for these characters we can tell if the override is
1304 // superfluous, which it is most of the time; omitting the
1305 // sufperfluous overrides allows us to import documents in a
1306 // manner that will make them feel more like native AW docs.
1307 // (This does not get rid of all the unnecessary overrides, for
1308 // that we would need to have the text of an entire paragraph)
1309 //
1310 // I goes without saying that it would be highly desirable to be
1311 // able to determine at the start if a document is pure LTR (as
1312 // we do in the RTF importer), since that would save us lot of
1313 // extra processing
1314 // Tomas, May 8, 2003
1315
1316 if(m_bBidiMode)
1317 {
1318 const gchar* pProps = "props";
1319 UT_String prop_basic = m_charProps;
1320
1321 UT_String prop_ltr = prop_basic;
1322 UT_String prop_rtl = prop_basic;
1323
1324 if(prop_basic.size())
1325 {
1326 prop_ltr += ";";
1327 prop_rtl += ";";
1328 }
1329 else
1330 {
1331 // if the char props are empty, we need replace them
1332 // with the following to avoid asserts in PP_AttrProp
1333 prop_basic = "dir-override:";
1334 }
1335
1336
1337 prop_ltr += "dir-override:ltr";
1338 prop_rtl += "dir-override:rtl";
1339
1340 const gchar rev[] ="revision";
1341
1342 const gchar* propsArray[5];
1343 propsArray[0] = pProps;
1344 propsArray[1] = prop_basic.c_str();
1345 propsArray[2] = NULL;
1346 propsArray[3] = NULL;
1347 propsArray[4] = NULL;
1348
1349 UT_uint32 iEmptyAttrib = 2;
1350
1351 if(m_charRevs.size())
1352 {
1353 propsArray[iEmptyAttrib++] = &rev[0];
1354 propsArray[iEmptyAttrib++] = m_charRevs.c_str();
1355 }
1356
1357 const UT_UCS4Char * p;
1358 const UT_UCS4Char * pStart = m_pTextRun.ucs4_str();
1359 UT_uint32 iLen = m_pTextRun.size();
1360
1361 UT_BidiCharType iOverride = UT_BIDI_UNSET, cType, cLastType = UT_BIDI_UNSET, cNextType;
1362 UT_uint32 iLast = 0;
1363 UT_UCS4Char c = *pStart;
1364
1365 cType = UT_bidiGetCharType(c);
1366
1367 for(UT_uint32 i = 0; i < iLen; i++)
1368 {
1369 if(i < iLen - 1 )
1370 {
1371 c = *(pStart+i+1);
1372 cNextType = UT_bidiGetCharType(c);
1373 }
1374 else
1375 {
1376 cNextType = UT_BIDI_UNSET;
1377 }
1378
1379
1380 if(UT_BIDI_IS_NEUTRAL(cType))
1381 {
1382 if(m_bLTRCharContext
1383 && iOverride != UT_BIDI_LTR
1384 && (cLastType != UT_BIDI_LTR || cNextType != UT_BIDI_LTR))
1385 {
1386 if(i - iLast > 0)
1387 {
1388 p = pStart + iLast;
1389 if(!_appendFmt(propsArray))
1390 return;
1391
1392 if(!_appendSpan(p, i - iLast))
1393 return;
1394 }
1395 iOverride = UT_BIDI_LTR;
1396 propsArray[1] = prop_ltr.c_str();
1397 iLast = i;
1398 }
1399 else if(!m_bLTRCharContext
1400 && iOverride != UT_BIDI_RTL
1401 && (cLastType != UT_BIDI_RTL || cNextType != UT_BIDI_RTL))
1402 {
1403 if(i - iLast > 0)
1404 {
1405 p = pStart + iLast;
1406 if(!_appendFmt(propsArray))
1407 return;
1408
1409 if(!_appendSpan(p, i - iLast))
1410 return;
1411 }
1412 iOverride = UT_BIDI_RTL;
1413 propsArray[1] = prop_rtl.c_str();
1414 iLast = i;
1415 }
1416 }
1417 else
1418 {
1419 // strong character; if we previously issued an override,
1420 // we need to cancel it
1421 if(iOverride != static_cast<UT_uint32>(UT_BIDI_UNSET))
1422 {
1423 if(i - iLast > 0)
1424 {
1425 p = pStart + iLast;
1426 if(!_appendFmt(propsArray))
1427 return;
1428
1429 if(!_appendSpan(p, i - iLast))
1430 return;
1431 }
1432 iOverride = UT_BIDI_UNSET;
1433 propsArray[1] = prop_basic.c_str();
1434 iLast = i;
1435 }
1436 }
1437
1438 cLastType = cType;
1439 cType = cNextType;
1440 }
1441
1442 // insert what is left over
1443 if(iLen - iLast > 0)
1444 {
1445 p = pStart + iLast;
1446 if(!_appendFmt(propsArray))
1447 return;
1448
1449 if(!_appendSpan(p, iLen - iLast))
1450 return;
1451 }
1452 }
1453 else
1454 {
1455 // non-bidi document, just do it the easy way
1456 if (!_appendSpan(m_pTextRun.ucs4_str(), m_pTextRun.size()))
1457 {
1458 UT_DEBUGMSG(("DOM: error appending text run\n"));
1459 return;
1460 }
1461 }
1462
1463 m_pTextRun.clear();
1464 }
1465 }
1466
_appendChar(UT_UCSChar ch)1467 void IE_Imp_MsWord_97::_appendChar (UT_UCSChar ch)
1468 {
1469 if (m_bInTable) {
1470 switch (ch) {
1471 case 7: // eat tab characters
1472 return;
1473 case 30: // ??
1474 ch = '-';
1475 break;
1476 }
1477 }
1478
1479 if ( m_bIsLower )
1480 ch = UT_UCS4_tolower ( ch );
1481 m_pTextRun += ch;
1482 }
1483
1484 /****************************************************************************/
1485 /****************************************************************************/
1486
s_cmp_bookmarks_qsort(const void * a,const void * b)1487 static int s_cmp_bookmarks_qsort(const void * a, const void * b)
1488 {
1489 const bookmark * A = static_cast<const bookmark *>(a);
1490 const bookmark * B = static_cast<const bookmark *>(b);
1491
1492 if(A->pos != B->pos)
1493 return (A->pos - B->pos);
1494 else
1495 // for bookmarks with identical position we want any start bookmarks to be
1496 // before end bookmarks.
1497 return static_cast<UT_sint32>(B->start) - static_cast<UT_sint32>(A->start);
1498 }
1499
s_cmp_bookmarks_bsearch(const void * a,const void * b)1500 static int s_cmp_bookmarks_bsearch(const void * a, const void * b)
1501 {
1502 UT_uint32 A = *static_cast<const UT_uint32 *>(a);
1503 const bookmark * B = static_cast<const bookmark *>(b);
1504
1505 return (A - B->pos);
1506 }
1507
_getBookmarkName(const wvParseStruct * ps,UT_uint32 pos)1508 gchar * IE_Imp_MsWord_97::_getBookmarkName(const wvParseStruct * ps, UT_uint32 pos)
1509 {
1510 gchar *str;
1511 UT_UTF8String sUTF8;
1512
1513 if(ps->Sttbfbkmk.extendedflag == 0xFFFF)
1514 {
1515 // 16 bit stuff
1516 const UT_UCS2Char * p = static_cast<const UT_UCS2Char *>(ps->Sttbfbkmk.u16strings[pos]);
1517 if(p) {
1518 UT_uint32 len = UT_UCS2_strlen(p);
1519 sUTF8.clear();
1520 sUTF8.appendUCS2(p, len);
1521
1522 str = new gchar[sUTF8.byteLength()+1];
1523 strcpy(str, sUTF8.utf8_str());
1524 } else
1525 str = NULL;
1526 }
1527 else
1528 {
1529 // 8 bit stuff
1530 // there is a bug in wv, and the table gets incorrectly retrieved
1531 // if it contains 8-bit strings
1532 if(ps->Sttbfbkmk.s8strings[pos])
1533 {
1534 UT_uint32 len = strlen(ps->Sttbfbkmk.s8strings[pos]);
1535 str = new gchar[len + 1];
1536 UT_uint32 i = 0;
1537 for(i = 0; i < len; i++)
1538 str[i] = ps->Sttbfbkmk.s8strings[pos][i];
1539 str[i] = 0;
1540 }
1541 else
1542 str = NULL;
1543 }
1544
1545 return str;
1546 }
1547
_docProc(wvParseStruct * ps,UT_uint32 tag)1548 int IE_Imp_MsWord_97::_docProc (wvParseStruct * ps, UT_uint32 tag)
1549 {
1550 // flush out any pending character data
1551 this->_flush ();
1552
1553 switch (static_cast<wvTag>(tag))
1554 {
1555 case DOCBEGIN:
1556
1557 // test the bidi nature of this document
1558 #ifdef BIDI_DEBUG
1559 m_bBidiMode = wvIsBidiDocument(ps);
1560 UT_DEBUGMSG(("IE_Imp_MsWord_97::_docProc: complex %d, bidi %d\n",
1561 ps->fib.fComplex,m_bBidiMode));
1562 #else
1563 // for now we will assume that all documents are bidi
1564 // documents (Tomas, Apr 12, 2003)
1565
1566 m_bBidiMode = false;
1567 #endif
1568
1569 m_bEvenOddHeaders = (ps->dop.fFacingPages != 0);
1570
1571 // import styles
1572 _handleStyleSheet(ps);
1573
1574 if(getLoadStylesOnly())
1575 return 1;
1576
1577 // deal with bookmarks
1578 _handleBookmarks(ps);
1579
1580 // deal with footnotes and endnotes, headers
1581 // first, get the doc offsets of the foot/endnote text
1582 // (We are interested in the offset of this in the document,
1583 // not in the data stream; therefore, we do not add
1584 // ps->fib.fcMin for the simple doc
1585 // Tthere are some strange docs around that have invalid
1586 // values for the end of endnote section (e.g. the doc from
1587 // bug 3283); that's what the if's are about.
1588 m_iTextStart = 0;
1589 m_iTextEnd = ps->fib.ccpText;
1590 if(m_iTextEnd == 0xffffffff)
1591 m_iTextEnd = m_iTextStart;
1592
1593 m_iFootnotesStart = m_iTextEnd;
1594 m_iFootnotesEnd = m_iFootnotesStart + ps->fib.ccpFtn;
1595 if(m_iFootnotesEnd == 0xffffffff)
1596 m_iFootnotesEnd = m_iFootnotesStart;
1597
1598 m_iHeadersStart = m_iFootnotesEnd;
1599 m_iHeadersEnd = m_iHeadersStart + ps->fib.ccpHdr;
1600 if(m_iHeadersEnd == 0xffffffff)
1601 m_iHeadersEnd = m_iHeadersStart;
1602
1603 m_iMacrosStart = m_iHeadersEnd;
1604 m_iMacrosEnd = m_iMacrosStart + ps->fib.ccpMcr;
1605 if(m_iMacrosEnd == 0xffffffff)
1606 m_iMacrosEnd = m_iMacrosStart;
1607
1608 m_iAnnotationsStart = m_iMacrosEnd;
1609 m_iAnnotationsEnd = m_iAnnotationsStart + ps->fib.ccpAtn;
1610 if(m_iAnnotationsEnd == 0xffffffff)
1611 m_iAnnotationsEnd = m_iAnnotationsStart;
1612
1613 m_iEndnotesStart = m_iAnnotationsEnd;
1614 m_iEndnotesEnd = m_iEndnotesStart + ps->fib.ccpEdn;
1615 if(m_iEndnotesEnd == 0xffffffff)
1616 m_iEndnotesEnd = m_iEndnotesStart;
1617
1618 m_iTextboxesStart = m_iEndnotesEnd;
1619 m_iTextboxesEnd = m_iTextboxesStart + ps->fib.ccpTxbx;
1620 UT_DEBUGMSG(("Size of all text in all textboxes %d \n", ps->fib.ccpTxbx));
1621
1622 if(m_iTextboxesEnd == 0xffffffff)
1623 m_iTextboxesEnd = m_iTextboxesStart;
1624 UT_DEBUGMSG((" Found %d Positioned TextBoxes \n",ps->nooffspa));
1625 // now retrieve the note info ...
1626 _handleNotes(ps);
1627 _handleHeaders(ps);
1628 _handleTextBoxes(ps);
1629
1630 if(m_iAnnotationsEnd != m_iAnnotationsStart)
1631 {
1632 UT_DEBUGMSG(("Annotations of length %d in this doc \n",m_iAnnotationsEnd - m_iAnnotationsStart));
1633 }
1634 UT_DEBUGMSG(("Fnotes [%d,%d], Enotes [%d,%d]\n",
1635 m_iFootnotesStart, m_iFootnotesEnd, m_iEndnotesStart, m_iEndnotesEnd));
1636
1637 ///////////////////////////////////////////////////////////////////////////////
1638 // Set various revision states
1639 //
1640 // unlike Word:
1641 //
1642 // * we do not differentiate between screen and print: we
1643 // print whatever is on screen
1644 //
1645 // * if show revisions is off, Word shows what the
1646 // document looks like _after_ the last revision; by
1647 // default we show what it looked _before_ first
1648 // revision; we can show the post-revision state by
1649 // setting the view id to PD_MAX_REVISION
1650 //
1651 // * we currently do not handle the fLockRev parameter
1652 {
1653 bool bShow = ps->dop.fRMView == 1 || ps->dop.fRMPrint == 1;
1654
1655 getDoc()->setShowRevisions(bShow);
1656
1657 if(!bShow)
1658 {
1659 getDoc()->setShowRevisionId(PD_MAX_REVISION);
1660 }
1661
1662 getDoc()->setMarkRevisions(ps->dop.fRevMarking == 1);
1663 }
1664
1665 break;
1666
1667 case DOCEND:
1668 // we want to clean up fmt marks
1669 getDoc()->purgeFmtMarks();
1670 break;
1671 default:
1672 break;
1673 }
1674
1675 return 0;
1676 }
1677
_insertBookmark(bookmark * bm)1678 bool IE_Imp_MsWord_97::_insertBookmark(bookmark * bm)
1679 {
1680 // first of all flush what is in the buffers
1681 this->_flush();
1682 bool error = false;
1683
1684 const gchar* propsArray[5];
1685 propsArray[0] = static_cast<const gchar *>("name");
1686 propsArray[1] = static_cast<const gchar *>(bm->name);
1687 propsArray[2] = static_cast<const gchar *>("type");
1688 propsArray[4] = 0;
1689
1690 if(bm->start)
1691 propsArray[3] = static_cast<const gchar *>("start");
1692 else
1693 propsArray[3] = static_cast<const gchar *>("end");
1694
1695 if(m_bInTable && !m_bCellOpen)
1696 {
1697 emObject * pObject = new emObject;
1698 pObject->props1 = propsArray[1];
1699 pObject->objType = PTO_Bookmark;
1700 pObject->props2 = propsArray[3];
1701 m_vecEmObjects.addItem(pObject);
1702 }
1703 else
1704 {
1705 //
1706 // Bookmarks need to be preceded by Blocks
1707 //
1708 pf_Frag * pf = getDoc()->getLastFrag();
1709 while(pf && pf->getType() != pf_Frag::PFT_Strux)
1710 {
1711 pf = pf->getPrev();
1712 }
1713 if(pf && (pf->getType() == pf_Frag::PFT_Strux) )
1714 {
1715 pf_Frag_Strux * pfs = static_cast<pf_Frag_Strux *>(pf);
1716 if(pfs->getStruxType() != PTX_Block)
1717 {
1718 getDoc()->appendStrux(PTX_Block, NULL);
1719 }
1720 }
1721 else if( pf == NULL)
1722 {
1723 getDoc()->appendStrux(PTX_Block, NULL);
1724 }
1725
1726 if (!_appendObject (PTO_Bookmark, propsArray))
1727 {
1728 UT_DEBUGMSG (("Could not append bookmark object\n"));
1729 error = true;
1730 }
1731 }
1732 return error;
1733 }
1734
_insertBookmarkIfAppropriate(UT_uint32 iDocPosition)1735 bool IE_Imp_MsWord_97::_insertBookmarkIfAppropriate(UT_uint32 iDocPosition)
1736 {
1737 //now search for position iDocPosition in our bookmark list;
1738 bookmark * bm, * lastBm;
1739 if (m_iBookmarksCount == 0) {
1740 bm = static_cast<bookmark*>(NULL);
1741 }
1742 else {
1743 bm = static_cast<bookmark*>( bsearch(static_cast<const void *>(&iDocPosition),
1744 m_pBookmarks, m_iBookmarksCount, sizeof(bookmark),
1745 s_cmp_bookmarks_bsearch));
1746 }
1747 bool error = false;
1748 if(bm)
1749 {
1750 // there is a bookmark at the current position
1751 // first make sure the returned bookmark is the first one at this position
1752 while(bm > m_pBookmarks && (bm - 1)->pos == iDocPosition)
1753 bm--;
1754
1755 lastBm = &m_pBookmarks[m_iBookmarksCount];
1756
1757 while(bm < lastBm && bm->pos == iDocPosition)
1758 error |= _insertBookmark(bm++);
1759 }
1760 return error;
1761 }
1762
_charProc(wvParseStruct * ps,U16 eachchar,U8 chartype,U16 lid)1763 int IE_Imp_MsWord_97::_charProc (wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid)
1764 {
1765 // make sure we are not past the end of the document ...
1766 // this can happen with some complex documents
1767 if(ps->currentcp >= m_iTextboxesEnd)
1768 {
1769 UT_DEBUGMSG(("IE_Imp_MsWord_97::_charProc: processing past end of document !!! %d \n",ps->currentcp ));
1770 return 0;
1771 }
1772
1773 // reset the page break tracker
1774 if(m_bPageBreakPending)
1775 {
1776 // we have a page break pending, and being here means that it
1777 // was not a seciton break; we have to append it first and
1778 // then continue normal processing
1779 this->_appendChar (UCS_FF);
1780 m_bPageBreakPending = false;
1781 }
1782
1783 // reset the page break tracker
1784 if(m_bLineBreakPending)
1785 {
1786 // we have a line break pending
1787 this->_appendChar (UCS_LF);
1788 m_bLineBreakPending = false;
1789 }
1790
1791 if(!_handleHeadersText(ps->currentcp,true))
1792 return 0;
1793 if(!_handleNotesText(ps->currentcp))
1794 return 0;
1795 if(!_handleTextboxesText(ps->currentcp))
1796 return 0;
1797
1798 // insert any required bookmarks, but only if we are not in a
1799 // field ...
1800 if(!ps->fieldstate)
1801 _insertBookmarkIfAppropriate(ps->currentcp);
1802
1803 if(_insertNoteIfAppropriate(ps->currentcp,eachchar))
1804 return 0;
1805
1806 // convert incoming character to unicode
1807 if (chartype)
1808 eachchar = wvHandleCodePage(eachchar, lid);
1809
1810 switch (eachchar)
1811 {
1812
1813 case 11: // forced line break
1814 eachchar = UCS_LF;
1815 break;
1816
1817 case 12: // page or section break
1818 this->_flush ();
1819 //eachchar = UCS_FF;
1820 // we will not append page breaks to the buffer, only mark it
1821 // as pending append; that will allow us later to decide if we
1822 // should or should not appended (we want to remove any page
1823 // break that is at an end of a section
1824 m_bPageBreakPending = true;
1825 return 0;
1826
1827 case 13: // end of paragraph
1828 this->_flush();
1829 // see bug 9370
1830 // <delackner> aaah actually, Cocoa's writer is *definitely* broken
1831 // <delackner> ms word thinks the second para is part of the first, but broken with a non-paragraph-breaking-line-break
1832 // so we'll treat this like msword does
1833 m_bLineBreakPending = true;
1834 return 0;
1835
1836 case 14: // column break
1837 eachchar = UCS_VTAB;
1838 break;
1839
1840 case 19: // field begin
1841 this->_flush ();
1842 ps->fieldstate++;
1843 ps->fieldmiddle = 0;
1844 this->_fieldProc (ps, eachchar, chartype, lid);
1845 return 0;
1846
1847 case 20: // field separator; some docs have spurious 0x14's in
1848 // them, see bug 3745
1849 if (ps->fieldstate)
1850 {
1851 this->_fieldProc (ps, eachchar, chartype, lid);
1852 ps->fieldmiddle = 1;
1853 }
1854 return 0;
1855
1856 case 21: // field end
1857 if (ps->fieldstate)
1858 {
1859 ps->fieldstate--;
1860 ps->fieldmiddle = 0;
1861 this->_fieldProc (ps, eachchar, chartype, lid);
1862 }
1863 return 0;
1864 }
1865
1866 // i'm not sure if this is needed any more
1867 // yes, it is, for instance hyperlinks need it
1868 if (ps->fieldstate)
1869 {
1870 xxx_UT_DEBUGMSG(("DOM: fieldstate\n"));
1871 if(this->_fieldProc (ps, eachchar, chartype, lid))
1872 {
1873 return 0;
1874 }
1875 }
1876
1877 // take care of any oddities in Microsoft's character encoding
1878 if (chartype == 1 && eachchar == 146)
1879 eachchar = 39; // apostrophe
1880
1881 if(m_bSymbolFont)
1882 {
1883 eachchar &= 0x00ff;
1884 }
1885
1886 // see bug 9370. we probably got a char 13, but no open paragraph.
1887 if(!m_bInPara) {
1888 this->_appendChar (UCS_LF);
1889 _flush();
1890 }
1891
1892 this->_appendChar (static_cast<UT_UCSChar>(eachchar));
1893
1894 return 0;
1895 }
1896
_specCharProc(wvParseStruct * ps,U16 eachchar,CHP * achp)1897 int IE_Imp_MsWord_97::_specCharProc (wvParseStruct *ps, U16 eachchar, CHP *achp)
1898 {
1899 // make sure we are not past the end of the document ...
1900 // this can happen with some complex documents
1901 if(ps->currentcp >= m_iTextboxesEnd)
1902 {
1903 UT_DEBUGMSG(("IE_Imp_MsWord_97::_specCharProc: processing past end of document !!!\n"));
1904 return 0;
1905 }
1906
1907 Blip blip;
1908 long pos;
1909 FSPA * fspa;
1910 //FDOA * fdoa;
1911 #ifdef SUPPORTS_OLD_IMAGES
1912 wvStream *fil;
1913 PICF picf;
1914 #endif
1915
1916 if(!_handleHeadersText(ps->currentcp,true))
1917 return 0;
1918
1919 if(!_handleNotesText(ps->currentcp))
1920 return 0;
1921
1922 if(!_handleTextboxesText(ps->currentcp))
1923 return 0;
1924
1925 // insert any required bookmarks, but only if we are not in a
1926 // field ...
1927 if(!ps->fieldstate)
1928 _insertBookmarkIfAppropriate(ps->currentcp);
1929
1930 if(_insertNoteIfAppropriate(ps->currentcp,0))
1931 return 0;
1932
1933 if(eachchar == 0x28)
1934 {
1935 // this is a symbol; the font is identified by achp->ftcSym and the char code is
1936 // achp->xchSym
1937 this->_appendChar(achp->xchSym);
1938 return 0;
1939 }
1940
1941 //
1942 // This next bit of code is to handle fields
1943 //
1944
1945 switch (eachchar)
1946 {
1947
1948 case 19: // field begin
1949 this->_flush ();
1950 ps->fieldstate++;
1951 ps->fieldmiddle = 0;
1952 this->_fieldProc (ps, eachchar, 0, 0x400);
1953 return 0;
1954
1955 case 20: // field separator
1956 if (achp->fOle2)
1957 {
1958 UT_DEBUGMSG(("Field has an associated embedded OLE object\n"));
1959 }
1960 ps->fieldmiddle = 1;
1961 this->_fieldProc (ps, eachchar, 0, 0x400);
1962 return 0;
1963
1964 case 21: // field end
1965 ps->fieldstate--;
1966 ps->fieldmiddle = 0;
1967 this->_fieldProc (ps, eachchar, 0, 0x400);
1968 return 0;
1969
1970 }
1971
1972 /* it seems some fields characters slip through here which tricks
1973 * the import into thinking it has an image with it really does
1974 * not. this catches special characters in a field
1975 */
1976 if (ps->fieldstate) {
1977 if (this->_fieldProc(ps, eachchar, 0, 0x400))
1978 return 0;
1979 }
1980
1981 //
1982 // This next bit of code is to handle OLE2 embedded objects and images
1983 //
1984
1985 switch (eachchar)
1986 {
1987 case 0x01: // Older ( < Word97) image, currently not handled very well
1988 if (achp->fOle2) {
1989 UT_DEBUGMSG(("embedded OLE2 component. currently unsupported"));
1990 return 0;
1991 }
1992
1993 pos = wvStream_tell(ps->data);
1994
1995 #ifdef SUPPORTS_OLD_IMAGES
1996 UT_DEBUGMSG(("Pre W97 Image format.\n"));
1997 wvStream_goto(ps->data, achp->fcPic_fcObj_lTagObj);
1998
1999 if (1 == wvGetPICF(wvQuerySupported(&ps->fib, NULL), &picf,
2000 ps->data) && NULL != picf.rgb)
2001 {
2002 fil = picf.rgb;
2003
2004 if (wv0x01(&blip, fil, picf.lcb - picf.cbHeader))
2005 {
2006 this->_handleImage(&blip, picf.mx * picf.dxaGoal / 1000, picf.my * picf.dyaGoal / 1000, picf.dyaCropTop, picf.dyaCropBottom, picf.dxaCropLeft, picf.dxaCropRight);
2007 }
2008 else
2009 {
2010 UT_DEBUGMSG(("Dom: no graphic data\n"));
2011 }
2012
2013 wvStream_goto(ps->data, pos);
2014
2015 return 0;
2016 }
2017 else
2018 {
2019 UT_DEBUGMSG(("Couldn't import graphic!\n"));
2020 return 0;
2021 }
2022 #else
2023 UT_DEBUGMSG(("DOM: 0x01 graphics support is disabled at the moment\n"));
2024 wvStream_goto(ps->data, pos);
2025
2026 return 0;
2027 #endif
2028 break;
2029 case 0x08: // Word 97, 2000, XP image
2030 if (wvQuerySupported(&ps->fib, NULL) >= WORD8) // sanity check
2031 {
2032 if (ps->nooffspa > 0)
2033 {
2034
2035 fspa = wvGetFSPAFromCP(ps->currentcp, ps->fspa,
2036 ps->fspapos, ps->nooffspa);
2037
2038 if(!fspa)
2039 {
2040 UT_DEBUGMSG(("No fspa! Panic and Insanity Abounds!\n"));
2041 return 0;
2042 }
2043 UT_DEBUGMSG(("Found a psfa! \n"));
2044 double dLeft,dRight,dTop,dBottom = 0.0;
2045 dLeft = static_cast<double>(fspa->xaLeft)/1440.0;
2046 dRight = static_cast<double>(fspa->xaRight)/1440.0;
2047 dTop = static_cast<double>(fspa->yaTop)/1440.0;
2048 dBottom = static_cast<double>(fspa->yaBottom)/1440.0;
2049 UT_DEBUGMSG(("Left %f Right %f Top %f Bottom %f \n",dLeft,dRight,dTop,dBottom));
2050 UT_DEBUGMSG(("spid %d cTxbx %d \n",fspa->spid,fspa->cTxbx));
2051 UT_DEBUGMSG(("fHdr %d bx %d by %d wr %d wrk %d fRcaSimple %d fBelowText %d fAnchorLock %d \n",fspa->fHdr,fspa->bx,fspa->by,fspa->wr,fspa->wrk,fspa->fRcaSimple,fspa->fBelowText,fspa->fAnchorLock));
2052 UT_String sImageName;
2053 bool bPositionObject = false;
2054 if (wv0x08(&blip, fspa->spid, ps))
2055 {
2056 //
2057 // FIXME! Put some code in here to make this use Sectionframes!!
2058 //
2059 UT_DEBUGMSG(("!!!!Found a blip in a fspa!!!!!!!!!! \n"));
2060 if(UT_OK == this->_handlePositionedImage(&blip, sImageName))
2061 bPositionObject = true;
2062 }
2063 bool isTextBox = false;
2064 UT_uint32 textOff = 0;
2065 UT_uint32 i;
2066 escherstruct item;
2067 FSPContainer *answer = NULL;
2068
2069 UT_DEBUGMSG(("IE_Imp_MsWord_97:: escher: ps->fib.fcDggInfo %d ps->fib.lcbDggInfo %d \n", ps->fib.fcDggInfo,ps->fib.lcbDggInfo));
2070 wvGetEscher (&item, ps->fib.fcDggInfo, ps->fib.lcbDggInfo, ps->tablefd,
2071 ps->mainfd);
2072 for (i = 0; i < item.dgcontainer.no_spgrcontainer; i++)
2073 {
2074 answer = wvFindSPID (&(item.dgcontainer.spgrcontainer[i]), fspa->spid);
2075 if (answer)
2076 {
2077 break;
2078 }
2079 }
2080 if(answer != NULL)
2081 {
2082 ClientTextbox cTextBox = answer->clienttextbox;
2083 if(cTextBox.textid != NULL)
2084 {
2085 isTextBox = true;
2086 textOff = *cTextBox.textid;
2087 UT_DEBUGMSG(("Found a Text box! text offset is.. %d \n",textOff));
2088 }
2089 // passing struct to format parameter. WTF?
2090 xxx_UT_DEBUGMSG((" clienttextbox %x clientdata %x \n",answer->clienttextbox,answer->clientdata));
2091 }
2092 if(isTextBox || bPositionObject)
2093 {
2094 // if(answer != NULL)
2095 // {
2096 const char * atts[] = {NULL,NULL,NULL,NULL,NULL,NULL};
2097 if(bPositionObject && sImageName.size())
2098 {
2099 atts[0] = PT_STRUX_IMAGE_DATAID;
2100 atts[1] = sImageName.c_str();
2101 atts[2] = "props";
2102 }
2103 else
2104 {
2105 atts[0] = "props";
2106 }
2107 UT_String sProp;
2108 UT_String sProps;
2109 UT_String sVal;
2110 sProps.clear();
2111 sProps = "frame-type:";
2112 if(isTextBox)
2113 {
2114 sProps += "textbox; ";
2115 }
2116 else
2117 {
2118 sProps += "image; ";
2119 }
2120 sProps += "position-to:";
2121 if(fspa->by ==2)
2122 {
2123 sVal = "block-above-text; ";
2124 }
2125 else if(fspa->by ==0)
2126 {
2127 sVal = "column-above-text; ";
2128 }
2129 else if(fspa->by ==1)
2130 {
2131 sVal = "page-above-text; "; // should be page-above-text
2132 }
2133 sProps += sVal;
2134 sProps += "wrap-mode:";
2135 if(fspa->wr == 3)
2136 {
2137 sVal = "above-text; ";
2138 }
2139 else
2140 {
2141 sVal = "wrapped-both; ";
2142 }
2143 if(fspa->fBelowText == 1 && fspa->wr == 3)
2144 {
2145 UT_DEBUGMSG(("Set Below Text \n"));
2146 sVal = "below-text; ";
2147 }
2148 sProps += sVal;
2149 sProps += "xpos:";
2150 UT_String_sprintf(sVal,"%f",dLeft);
2151 sVal += "in; ";
2152
2153 sProps += sVal;
2154 sProps += "ypos:";
2155 UT_String_sprintf(sVal,"%f",dTop);
2156 sVal += "in; ";
2157
2158 sProps += sVal;
2159 sProps += "frame-col-xpos:";
2160 UT_String_sprintf(sVal,"%f",dLeft);
2161 sVal += "in; ";
2162
2163 sProps += sVal;
2164 sProps += "frame-col-ypos:";
2165 UT_String_sprintf(sVal,"%f",dTop);
2166 sVal += "in; ";
2167 sProps += sVal;
2168
2169 sProps += "frame-width:";
2170 UT_String_sprintf(sVal,"%f",dRight-dLeft);
2171 sVal += "in; ";
2172 sProps += sVal;
2173
2174 UT_DEBUGMSG(("Inserting Frame of width %s \n",sVal.c_str()));
2175 sProps += "frame-height:";
2176 UT_String_sprintf(sVal,"%f",dBottom-dTop);
2177 sVal += "in";
2178 sProps += sVal;
2179 //
2180 // Turn off the borders.
2181 //
2182 if(bPositionObject && !isTextBox)
2183 {
2184 sProp = "top-style";
2185 sVal = "none";
2186 UT_String_setProperty(sProps,sProp,sVal);
2187 sProp = "right-style";
2188 UT_String_setProperty(sProps,sProp,sVal);
2189 sProp = "left-style";
2190 UT_String_setProperty(sProps,sProp,sVal);
2191 sProp = "bot-style";
2192 UT_String_setProperty(sProps,sProp,sVal);
2193 }
2194 if(bPositionObject)
2195 {
2196 atts[3] = sProps.c_str();
2197 }
2198 else
2199 {
2200 atts[1] = sProps.c_str();
2201 }
2202 _appendStrux(PTX_SectionFrame,atts);
2203 _appendStrux(PTX_EndFrame,atts);
2204 if(isTextBox)
2205 {
2206 textboxPos * pPos = new textboxPos;
2207 pPos->lid = fspa->spid;
2208 PT_DocPosition posEnd =0;
2209 getDoc()->getBounds(true,posEnd); // clean frags!
2210
2211 pPos->endFrame = getDoc()->getLastFrag();
2212 m_vecTextboxPos.addItem(pPos);
2213 }
2214 wvReleaseEscher (&item);
2215 return true;
2216 }
2217 wvReleaseEscher (&item);
2218 }
2219 else
2220 {
2221 xxx_UT_DEBUGMSG(("nooffspa was <= 0 -- ignoring"));
2222 }
2223 }
2224 else
2225 {
2226 UT_DEBUGMSG(("pre Word8 0x08 graphic -- unsupported at the moment"));
2227 /*fdoa =*/ wvGetFDOAFromCP(ps->currentcp, NULL, ps->fdoapos,
2228 ps->nooffdoa);
2229
2230 // TODO: do something with the data in this fdoa someday...
2231 }
2232
2233 return 0;
2234 }
2235
2236 return 0;
2237 }
2238
_beginComment(wvParseStruct *,UT_uint32,void *,int)2239 int IE_Imp_MsWord_97::_beginComment(wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
2240 void * /*props*/, int /*dirty*/)
2241 {
2242 UT_DEBUGMSG(("DOM: begin comment\n"));
2243 return 0;
2244 }
2245
_endComment(wvParseStruct *,UT_uint32,void *,int)2246 int IE_Imp_MsWord_97::_endComment(wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
2247 void * /*props*/, int /*dirty*/)
2248 {
2249 UT_DEBUGMSG(("DOM: begin comment\n"));
2250 return 0;
2251 }
2252
2253
_eleProc(wvParseStruct * ps,UT_uint32 tag,void * props,int dirty)2254 int IE_Imp_MsWord_97::_eleProc(wvParseStruct *ps, UT_uint32 tag,
2255 void *props, int dirty)
2256 {
2257 // make sure we are not past the end of the document ...
2258 // this can happen with some complex documents
2259 if(ps->currentcp >= m_iTextboxesEnd)
2260 {
2261 UT_DEBUGMSG(("IE_Imp_MsWord_97::_eleProc: processing past end of document !!! %d \n",ps->currentcp >= m_iTextboxesEnd));
2262 return 0;
2263 }
2264
2265 //
2266 // Marshall these off to the correct handlers
2267 //
2268
2269 switch (static_cast<wvTag>(tag))
2270 {
2271
2272 case SECTIONBEGIN:
2273 return _beginSect (ps, tag, props, dirty);
2274
2275 case SECTIONEND:
2276 return _endSect (ps, tag, props, dirty);
2277
2278 case PARABEGIN:
2279 return _beginPara (ps, tag, props, dirty);
2280
2281 case PARAEND:
2282 return _endPara (ps, tag, props, dirty);
2283
2284 case CHARPROPBEGIN:
2285 return _beginChar (ps, tag, props, dirty);
2286
2287 case CHARPROPEND:
2288 return _endChar (ps, tag, props, dirty);
2289
2290 case COMMENTBEGIN:
2291 return _beginComment (ps, tag, props, dirty);
2292
2293 case COMMENTEND:
2294 return _endComment (ps, tag, props, dirty);
2295
2296 default:
2297 UT_ASSERT_NOT_REACHED();
2298
2299 }
2300
2301 return 0;
2302 }
2303
2304 /****************************************************************************/
2305 /****************************************************************************/
2306
_beginSect(wvParseStruct *,UT_uint32,void * prop,int)2307 int IE_Imp_MsWord_97::_beginSect (wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
2308 void *prop, int /*dirty*/)
2309 {
2310 SEP * asep = static_cast <SEP *>(prop);
2311
2312 const gchar * propsArray[15];
2313 UT_String propBuffer;
2314 UT_String props;
2315
2316 // flush any character runs
2317 this->_flush ();
2318
2319 m_iCurrentSectId++;
2320
2321 // first we need to deal with page size, because setting page size
2322 // resets all margins to the AW defaults
2323 // Sevior: Only do this ONCE!!! Abiword can only handle one page size.
2324 if(!m_bSetPageSize)
2325 {
2326 // all of this data is related to Abi's <pagesize> tag
2327 m_bSetPageSize = true;
2328 double page_width = 0.0;
2329 double page_height = 0.0;
2330 double page_scale = 1.0;
2331
2332 if (asep->dmOrientPage == 1)
2333 getDoc()->m_docPageSize.setLandscape ();
2334 else
2335 getDoc()->m_docPageSize.setPortrait ();
2336
2337 page_width = asep->xaPage / 1440.0;
2338 page_height = asep->yaPage / 1440.0;
2339
2340 // PROBLEM: there are two separate and independent page sizes
2341 // given to us, one by the explicit width and height and one
2342 // by the requested paper size, and we need to decide which
2343 // one we should follow. There are three scenarios
2344 // (1) the explicit size and paper match
2345 // (2) the explicit size and paper do not match
2346 // (a) the explicit size is the Word default (Letter)
2347 // (b) the explicit size is something else than the defaults
2348 //
2349 // In case (1) we use the requested paper. Case (2a) happens
2350 // when the user changes the page size by requesting a
2351 // different paper size but does not touch the width and
2352 // height controls -- we use the paper size. Case (2b) happens
2353 // when the user changes size by the with and height controls;
2354 // the paper request stored is the one that was in place
2355 // before the manual adjustment and is no longer valid, so we
2356 // use the explicit width and height.
2357
2358 // decide if the explicit width and height are valid, i.e., if
2359 // they contain the Word defaults the paper request has to be
2360 // 0 (Letter)
2361 bool bDoNotUseSize = (asep->xaPage == 12240 &&
2362 asep->yaPage == 15840 &&
2363 asep->dmPaperReq != 0);
2364
2365
2366 xxx_UT_DEBUGMSG(("DOM: pagesize: landscape: %d, width: %f, height: %f, paper-type: %d\n",
2367 asep->dmOrientPage, page_width, page_height, asep->dmPaperReq));
2368
2369 // map paper to AW page size name string ...
2370 const char * paper_name = s_mapPageIdToString (asep->dmPaperReq);
2371
2372 // check if the paper name is valid (i.e., there is a match
2373 // between the name and the sizes; if not, we use only the sizes
2374 bool bPaperNameValid = (paper_name != NULL);
2375
2376 if(bPaperNameValid)
2377 {
2378 // construct an instance of fp_PageSize for this paper
2379 // request; we will use this to verify whether its
2380 // dimensions match those stored in the explicit width and
2381 // height but also we will determine appropriate units to
2382 // be used (i.e., we want to use inches for Letter but
2383 // metric units for A4, etc.)
2384 fp_PageSize PageSize(paper_name);
2385
2386 // if we know that the explicit size is not valid, we do
2387 // not need any further checking
2388 if(!bDoNotUseSize)
2389 {
2390 // in order to minimize effect of rounding errors, we are
2391 // better doing the comparison in the twipses; the MS
2392 // values suffer from rounding (?) error which is quite
2393 // significant, so we will round to the second least
2394 // significant digit
2395
2396 double w = PageSize.Width(DIM_IN) * 1440.0;
2397 double h = PageSize.Height(DIM_IN) * 1440.0;
2398
2399 UT_uint32 iPaperW10 = ((UT_uint32) w)/10 + (((UT_uint32) w)%10 >= 5 ? 1 : 0);
2400 UT_uint32 iPaperH10 = ((UT_uint32) h)/10 + (((UT_uint32) h)%10 >= 5 ? 1 : 0);
2401
2402 UT_uint32 iPageW10 = asep->xaPage/10 + (asep->xaPage%10 >= 5 ? 1 : 0);
2403 UT_uint32 iPageH10 = asep->yaPage/10 + (asep->yaPage%10 >= 5 ? 1 : 0);
2404
2405 if(iPageW10 != iPaperW10 ||
2406 iPageH10 != iPaperH10)
2407 {
2408 bPaperNameValid = false;
2409 }
2410 }
2411
2412 // if we are to use the paper name, then get the
2413 // dimensions to be used ...
2414 if(bPaperNameValid)
2415 {
2416 m_dim = PageSize.getDims();
2417 }
2418 }
2419
2420 if (bPaperNameValid)
2421 {
2422 getDoc()->m_docPageSize.Set (paper_name);
2423 }
2424 else
2425 {
2426 getDoc()->m_docPageSize.Set ("Custom");
2427 getDoc()->m_docPageSize.Set (page_width, page_height, DIM_IN);
2428 getDoc()->m_docPageSize.setScale(page_scale);
2429 }
2430 } // end of page size stuff
2431
2432 if(asep->fBidi)
2433 {
2434 // this is an RTL section, set dominant direction to rtl
2435 props += "dom-dir:rtl;";
2436 }
2437 else
2438 {
2439 // this is an LTR section, we want to set the direction
2440 // explicitely so that we do not end up with wrong default
2441 props += "dom-dir:ltr;";
2442 }
2443
2444
2445 if(asep->fPgnRestart)
2446 {
2447 // set to 1 when page numbering should be restarted at the beginning of this section
2448 props += "section-restart:1;";
2449 }
2450
2451 // user specified starting page number
2452 UT_String_sprintf(propBuffer, "section-restart-value:%d;", asep->pgnStart);
2453 props += propBuffer;
2454
2455 // columns
2456 if (asep->ccolM1) {
2457 // number of columns
2458 UT_String_sprintf(propBuffer,"columns:%d;", (asep->ccolM1+1));
2459 props += propBuffer;
2460
2461 // columns gap
2462 UT_String_sprintf(propBuffer,"column-gap:%s;",
2463 UT_convertInchesToDimensionString(m_dim,
2464 (static_cast<double>(asep->dxaColumns) / 1440)));
2465 props += propBuffer;
2466 }
2467
2468 // draw a vertical line between columns
2469 if (asep->fLBetween == 1)
2470 {
2471 props += "column-line:on;";
2472 }
2473
2474 // space after section (gutter)
2475 UT_String_sprintf(propBuffer,"section-space-after:%s;",
2476 UT_convertInchesToDimensionString(m_dim,
2477 (static_cast<double>(asep->dzaGutter) / 1440)));
2478 props += propBuffer;
2479
2480 //
2481 // TODO: section breaks
2482 //
2483
2484 // page-margin-left
2485 UT_String_sprintf(propBuffer, "page-margin-left:%s;",
2486 UT_convertInchesToDimensionString(m_dim,
2487 (static_cast<double>(asep->dxaLeft) / 1440)));
2488 props += propBuffer;
2489
2490 // page-margin-right
2491 UT_String_sprintf(propBuffer, "page-margin-right:%s;",
2492 UT_convertInchesToDimensionString(m_dim,
2493 (static_cast<double>(asep->dxaRight) / 1440)));
2494 props += propBuffer;
2495
2496 // page-margin-top
2497 UT_String_sprintf(propBuffer, "page-margin-top:%s;",
2498 UT_convertInchesToDimensionString(m_dim,
2499 (static_cast<double>(asep->dyaTop) / 1440)));
2500 props += propBuffer;
2501
2502 // page-margin-bottom
2503 UT_String_sprintf(propBuffer, "page-margin-bottom:%s;",
2504 UT_convertInchesToDimensionString(m_dim,
2505 (static_cast<double>(asep->dyaBottom)/1440)));
2506 props += propBuffer;
2507
2508 // page-margin-header
2509 UT_String_sprintf(propBuffer, "page-margin-header:%s;",
2510 UT_convertInchesToDimensionString(m_dim,
2511 (static_cast<double>(asep->dyaHdrTop)/1440)));
2512 props += propBuffer;
2513
2514 // page-margin-footer (word's footer is measured from the bottom
2515 // edge of the page -- contrary to the docs -- our's from the
2516 // bottom margin of the page)
2517 double dFooter = static_cast<double>(asep->dyaBottom) - static_cast<double>(asep->dyaHdrBottom);
2518 if(dFooter < 0)
2519 {
2520 dFooter = -dFooter;
2521 }
2522 dFooter = dFooter/1440.;
2523 UT_String_sprintf(propBuffer, "page-margin-footer:%s",
2524 UT_convertInchesToDimensionString(m_dim,dFooter));
2525 props += propBuffer;
2526 xxx_UT_DEBUGMSG (("DOM:SEVIOR the section properties are: '%s'\n", props.c_str()));
2527
2528
2529 propsArray[0] = static_cast<const gchar *>("props");
2530 propsArray[1] = static_cast<const gchar *>(props.c_str());
2531
2532 UT_uint32 iOff = 2;
2533
2534 // headers/footers
2535 UT_String id[6];
2536 UT_uint32 iId = 0;
2537
2538 // see _handleHeaders() on the contents of the m_pHeaders array,
2539 // it will make this maths clear (m_iCurrentSectId is 1-based
2540 // indx)
2541 // For each section in the document they are six headers/footers;
2542 // each of these can be in 3 states:
2543 // length > 2: proper header, use it
2544 // length == 2: empty header; no header to be inserted
2545 // length == 0: use header from the previous section
2546
2547 if ((m_iCurrentSectId - 1)*6 + 6 < m_iHeadersCount)
2548 {
2549 // there are headers defined for this section
2550 UT_uint32 i = 6 + (m_iCurrentSectId - 1)*6;
2551 UT_uint32 j = i + 6;
2552 UT_sint32 k;
2553
2554 for( ; i < j && i < m_iHeadersCount; i++)
2555 {
2556 // skip any unsupported or empty headers
2557 if(m_pHeaders[i].type == HF_Unsupported || m_pHeaders[i].len == 2)
2558 {
2559 continue;
2560 }
2561
2562 // if this is a first page hdr/ftr we only use it if appropriate
2563 if( (m_pHeaders[i].type == HF_HeaderFirst && !asep->fTitlePage)
2564 || (m_pHeaders[i].type == HF_FooterFirst && !asep->fTitlePage))
2565 {
2566 // we want to change the type to unsupported to stop it from being
2567 // inserted into the document
2568 m_pHeaders[i].type = HF_Unsupported;
2569 continue;
2570 }
2571
2572 k = i;
2573 #if 0
2574 // For now this code is going to be disabled, since a
2575 // present AW sections cannot share headers, and this type
2576 // of a header needs to be replaced by a physical copy of
2577 // the previous meaningul header
2578 if(m_pHeaders[i].len == 0)
2579 {
2580 // this is the case where the section is to use the
2581 // header of a previous section -- scroll back until
2582 // we find one
2583 k -= 6;
2584 bool bContinue = false;
2585
2586 while(k > 5)
2587 {
2588 if(m_pHeaders[k].len == 2)
2589 {
2590 // found empty header
2591 bContinue = true;
2592 break;
2593 }
2594 else if(m_pHeaders[k].len == 0)
2595 {
2596 // try one section ahead
2597 k -= 6;
2598 }
2599 else
2600 {
2601 // found a meaningful header
2602 break;
2603 }
2604 }
2605
2606 if(bContinue || k < 6)
2607 {
2608 continue;
2609 }
2610 }
2611 #endif
2612 switch(m_pHeaders[k].type)
2613 {
2614 case HF_HeaderEven:
2615 propsArray[iOff++] = "header-even";
2616 break;
2617 case HF_FooterEven:
2618 propsArray[iOff++] = "footer-even";
2619 break;
2620 case HF_HeaderOdd:
2621 propsArray[iOff++] = "header";
2622 break;
2623 case HF_FooterOdd:
2624 propsArray[iOff++] = "footer";
2625 break;
2626 case HF_HeaderFirst:
2627 propsArray[iOff++] = "header-first";
2628 break;
2629 case HF_FooterFirst:
2630 propsArray[iOff++] = "footer-first";
2631 break;
2632 default:
2633 UT_ASSERT_HARMLESS(UT_NOT_REACHED);
2634 }
2635
2636 UT_String_sprintf(id[iId],"%d",m_pHeaders[k].pid);
2637 propsArray[iOff++] = id[iId++].c_str();
2638 }
2639 }
2640
2641 propsArray[iOff++] = 0;
2642 UT_return_val_if_fail(iOff <= sizeof(propsArray), 1);
2643
2644
2645 if (!_appendStrux(PTX_Section, static_cast<const gchar **>(&propsArray[0])))
2646 {
2647 UT_DEBUGMSG (("DOM: error appending section props!\n"));
2648 return 1;
2649 }
2650
2651 // increment our section count
2652 m_bInSect = true;
2653 m_bInPara = false; // reset paragraph status
2654 m_nSections++;
2655
2656 // TODO: we need to do some work on Headers/Footers
2657
2658 /*
2659 * break codes:
2660 * 0 No break
2661 * 1 New column
2662 * 2 New page
2663 * 3 Even page
2664 * 4 Odd page
2665 */
2666
2667 // if (asep->bkc > 1 && m_nSections > 1) // don't apply on the 1st page
2668 if (m_nSections > 1) // don't apply on the 1st page
2669 {
2670 // new sections always need a block
2671 if (!_appendStrux(PTX_Block, static_cast<const gchar **>(NULL)))
2672 {
2673 UT_DEBUGMSG (("DOM: error appending new block\n"));
2674 return 1;
2675 }
2676 m_bInPara = true;
2677
2678 UT_UCSChar ucs = UCS_FF;
2679 switch (asep->bkc) {
2680 case 1:
2681 ucs = UCS_VTAB;
2682 X_CheckError(_appendSpan(&ucs,1));
2683 break;
2684
2685 case 2:
2686 X_CheckError(_appendSpan(&ucs,1));
2687 break;
2688
2689 case 3: // TODO: handle me better (not even)
2690 X_CheckError(_appendSpan(&ucs,1));
2691 break;
2692
2693 case 4: // TODO: handle me better (not odd)
2694 X_CheckError(_appendSpan(&ucs,1));
2695 break;
2696
2697 case 0:
2698 default:
2699 break;
2700 }
2701 }
2702
2703 return 0;
2704 }
2705
2706 // this function is called from _handleHeadersText() with meaningless
2707 // parameters; if you want to make use of any of the parameters here,
2708 // make sure it will work with NULLs, etc.
_endSect(wvParseStruct *,UT_uint32,void *,int)2709 int IE_Imp_MsWord_97::_endSect (wvParseStruct * /* ps */ , UT_uint32 /* tag */ ,
2710 void * /* prop */, int /* dirty */ )
2711 {
2712 #if 0
2713 // if we're at the end of a section, we need to check for a section mark
2714 // at the end of our character stream and remove it (to prevent page breaks
2715 // between sections)
2716
2717 // this does not work -- if we are at the end of a section we have
2718 // already flushed the buffer in _endPara()
2719 if (m_pTextRun.size() &&
2720 m_pTextRun[m_pTextRun.size()-1] == UCS_FF)
2721 {
2722 m_pTextRun[m_pTextRun.size()-1] = 0;
2723 }
2724 #endif
2725
2726 // we never appended a paragraph inside of this section. we're naughty. correct that here.
2727 if (!m_bInPara && !m_bInTextboxes)
2728 _appendStrux(PTX_Block, NULL);
2729
2730 // if there is a pending page break it belongs to the section and
2731 // is to be removed, we just need to set the tracker to false
2732 m_bPageBreakPending = false;
2733 m_bLineBreakPending = false;
2734
2735 m_bInSect = false;
2736 m_bInPara = false; // reset paragraph status
2737 return 0;
2738 }
2739
_beginPara(wvParseStruct * ps,UT_uint32,void * prop,int)2740 int IE_Imp_MsWord_97::_beginPara (wvParseStruct *ps, UT_uint32 /*tag*/,
2741 void *prop, int /*dirty*/)
2742 {
2743
2744 // if in a header of unsupported type, just return
2745 // the +1 is to account for the fact that ps->currentcp applies to the previous
2746 // char position ...
2747 if(_ignorePosition(ps->currentcp + 1))
2748 return 0;
2749
2750 PAP *apap = static_cast <PAP *>(prop);
2751
2752 // the header/footnote/endnote sections are special; because the
2753 // parser treats them as a continuation of the document, we end up
2754 // here before we get chance to handle the change from main doc to
2755 // these sections -- we want the paragraph properties assembled
2756 // for future use, but we do not want the strux actually inserted
2757 bool bDoNotInsertStrux = (ps->currentcp == m_iFootnotesStart ||
2758 ps->currentcp == m_iEndnotesStart ||
2759 ps->currentcp == m_iHeadersStart);
2760
2761 // the end of endnotes/fnotes/headers and all other subsections in
2762 // the main stream always contains a paragraph marker; we do not
2763 // want it to insert strux on those
2764 if((ps->currentcp == m_iTextEnd - 1 && m_iTextEnd > m_iTextStart) ||
2765 //(ps->currentcp == m_iTextEnd - 2 && m_iTextEnd > m_iTextStart) ||
2766 (ps->currentcp == m_iFootnotesEnd - 1 && m_iFootnotesEnd > m_iFootnotesStart) ||
2767 (ps->currentcp == m_iEndnotesEnd - 1 && m_iEndnotesEnd > m_iEndnotesStart) ||
2768 (ps->currentcp == m_iHeadersEnd - 1 && m_iHeadersEnd > m_iHeadersStart) ||
2769 (ps->currentcp == m_iAnnotationsEnd - 1 && m_iAnnotationsEnd > m_iAnnotationsStart) ||
2770 (ps->currentcp == m_iMacrosStart - 1 && m_iMacrosEnd > m_iMacrosStart) ||
2771 (ps->currentcp == m_iTextboxesStart - 1 && m_iTextboxesEnd > m_iTextboxesStart))
2772 {
2773 bDoNotInsertStrux = true;
2774 }
2775 bool bInHdrFtr = false;
2776 if((ps->currentcp+1 >= m_iHeadersStart) && (ps->currentcp < m_iHeadersEnd))
2777 {
2778 bInHdrFtr = true;
2779 }
2780 bool bInTextboxes = false;
2781 if((ps->currentcp+1 >= m_iTextboxesStart) && (ps->currentcp < m_iTextboxesEnd))
2782 {
2783 bInTextboxes = true;
2784 }
2785 // at the end of each f/enote is a superflous paragraph marker
2786 // which we do not want imported
2787 if(m_bInFNotes && m_iNextFNote < m_iFootnotesCount && m_pFootnotes &&
2788 m_pFootnotes[m_iNextFNote].txt_pos + m_pFootnotes[m_iNextFNote].txt_len - 1 >= ps->currentcp)
2789 {
2790 bDoNotInsertStrux = true;
2791 }
2792
2793 if(m_bInENotes && m_iNextENote < m_iEndnotesCount && m_pEndnotes &&
2794 m_pEndnotes[m_iNextENote].txt_pos + m_pEndnotes[m_iNextENote].txt_len - 1 >= ps->currentcp)
2795 {
2796 bDoNotInsertStrux = true;
2797 }
2798
2799
2800 // the header section requires even more special care; since we
2801 // need to insert the HdrFtr strux for each header before we can
2802 // insert the block, we do not want a strux inserted at the start
2803 // position of a header; furthermore, each header ends with a
2804 // superfluous paragraph marker
2805 if(m_bInHeaders &&
2806 ((m_iCurrentHeader < m_iHeadersCount && m_pHeaders &&
2807 (m_pHeaders[m_iCurrentHeader].pos == ps->currentcp ||
2808 m_pHeaders[m_iCurrentHeader].pos + m_pHeaders[m_iCurrentHeader].len - 1 <= ps->currentcp))
2809 || m_iCurrentHeader == m_iHeadersCount))
2810 {
2811 //start a new header section
2812 bDoNotInsertStrux = true;
2813 }
2814
2815 {
2816 if (apap->fInTable)
2817 {
2818 // we have to call this unconditionally, since m_bInHeaders set does not mean that
2819 // the HdrFtr strux for this section has been inserted.
2820 _handleHeadersText(ps->currentcp +1, false);
2821 _handleTextboxesText(ps->currentcp+1);
2822 if (!m_bInTable)
2823 {
2824 m_bInTable = true;
2825 _table_open();
2826 //
2827 // Fill Column positions
2828 //
2829 UT_sint32 i= 0;
2830 for(i=0;i < ps->nocellbounds; i++)
2831 {
2832 if(ps->cellbounds)
2833 {
2834 UT_sint32 pos = ps->cellbounds[i];
2835 m_vecColumnPositions.addItem(pos);
2836 }
2837 }
2838 }
2839
2840 if (ps->endcell)
2841 {
2842 ps->endcell = 0;
2843 _cell_close();
2844 if (m_iCellsRemaining > 0)
2845 {
2846 m_iCellsRemaining--;
2847 if (m_iCellsRemaining == 0)
2848 {
2849 _row_close();
2850 }
2851 }
2852 }
2853
2854 _row_open(ps);
2855
2856 // determine column spans
2857 if (!m_bCellOpen)
2858 {
2859 m_vecColumnSpansForCurrentRow.clear();
2860
2861 xxx_UT_DEBUGMSG(("Number of cell bounds in New row %d \n",ps->nocellbounds));
2862 UT_sint32 column =1;
2863 UT_sint32 i =0;
2864 UT_sint32 posLeft = 0;
2865 UT_sint32 posRight =0;
2866 if (ps->cellbounds)
2867 posLeft = ps->cellbounds[0];
2868 for (column = 1; column < ps->nocellbounds; column++)
2869 {
2870 int span = 0;
2871 posRight = apap->ptap.rgdxaCenter[column];
2872 xxx_UT_DEBUGMSG(("column %d posLeft %d posRight %d \n",column,posLeft,posRight));
2873 for (i = 0; i < ps->nocellbounds; i++)
2874 {
2875 if (ps->cellbounds[i] >= posLeft && ps->cellbounds[i] < posRight)
2876 {
2877 span++;
2878 }
2879 else if (ps->cellbounds[i] >= posRight)
2880 {
2881 break;
2882 }
2883 }
2884 xxx_UT_DEBUGMSG(("COlumn %d has span %d \n",column,span));
2885 m_vecColumnSpansForCurrentRow.addItem(span);
2886 posLeft = posRight;
2887 }
2888 }
2889
2890 _cell_open(ps, apap);
2891
2892 if (m_iCellsRemaining == 0) {
2893 m_iCellsRemaining = apap->ptap.itcMac + 1;
2894 }
2895
2896 if (m_iRowsRemaining == 0) {
2897 m_iRowsRemaining = ps->norows;
2898 }
2899
2900 m_iRowsRemaining--;
2901 }
2902 else if (m_bInTable) {
2903 m_bInTable = false;
2904 _table_close(ps, apap);
2905 }
2906 }
2907
2908
2909 // first, flush any character data in any open runs
2910 // only flush if we are really inserting the strux (so that we can
2911 // remove any superfluous characters at ends of secitons,
2912 // e.g. page breaks)
2913 if(!bDoNotInsertStrux)
2914 {
2915 this->_flush ();
2916 }
2917
2918 if (apap->fTtp)
2919 {
2920 m_bInPara = true;
2921 xxx_UT_DEBUGMSG(("m_bInPara set true here -1 \n"));
2922 return 0;
2923 }
2924
2925 if (apap->fBidi == 1)
2926 {
2927 m_bLTRParaContext = false;
2928 } else
2929 {
2930 m_bLTRParaContext = true;
2931 }
2932
2933 m_bBidiMode = false;
2934
2935 // break before paragraph?
2936 if (apap->fPageBreakBefore)
2937 {
2938 // TODO: this should really set a property in
2939 // TODO: in the paragraph, instead; but this
2940 // TODO: gives a similar effect for now.
2941 // TOOD: when it is handled properly the code needs to be
2942 // moved into _generateParaProps()
2943 UT_DEBUGMSG(("_beginPara: appending default block\n"));
2944 _appendStrux(PTX_Block, NULL);
2945 UT_UCSChar ucs = UCS_FF;
2946 _appendSpan(&ucs,1);
2947 }
2948
2949 m_charProps.clear();
2950 m_charStyle.clear();
2951 m_paraProps.clear();
2952 m_paraStyle.clear();
2953 _generateParaProps(m_paraProps, apap, ps);
2954
2955 //props, level, listid, parentid, style, NULL
2956 const gchar * propsArray[11];
2957
2958 /* lists */
2959 UT_uint32 myListId = 0;
2960 UT_uint32 iAWListId = UT_UID_INVALID;
2961 UT_String szListId, szParentId, szLevel, szStartValue, szNumberProps;
2962
2963 // all lists have ilfo set; some lists can be 'customised' by
2964 // having the number field removed (see bug 3622) -- they are
2965 // still lists in Word, but do not look like it, and we will not
2966 // treat them as lists (Tomas, May 26, 2003)
2967 if(apap->ilfo && apap->linfo.numberstr)
2968 {
2969 UT_uint32 j;
2970 // if we are in a new list, then do some clean up first and remember the list id
2971 if(m_iMSWordListId != apap->linfo.id)
2972 {
2973 m_iMSWordListId = apap->linfo.id;
2974
2975 for(UT_uint32 i = 0; i < 9; i++)
2976 m_iListIdIncrement[i] = 0;
2977
2978 UT_VECTOR_PURGEALL(ListIdLevelPair *, m_vLists);
2979 m_vLists.clear();
2980 }
2981
2982 // a hack -- see the note on myListId below
2983 myListId = apap->linfo.id;
2984 myListId += apap->linfo.format;
2985 myListId += apap->ilvl;
2986
2987 /*
2988 IMPORTANT the list sutff is found in several different
2989 places:
2990
2991 apap->ilvl - the level of this list (0-8)
2992
2993 myListId - the id of this list, we need this to know to which list this
2994 paragraph belongs; unfortunately, there seem to be some cases where separate
2995 lists *share* the same id, for instance when two lists, of different formatting,
2996 are separated by only empty paragraphs. As a hack, I have added the format number
2997 to the list id, so gaining different id for different formattings (it is not foolproof,
2998 for if id1 + format1 == id2 + format2 then we get two lists joined, but the probability
2999 of that should be small). Further problem is that in AW, list id refers to the set of
3000 list elements on the same level, while in Word the id is that of the entire list. The
3001 easiest way to tranform the Word id to AW id is to add the level to the id, which
3002 is what has been done above
3003
3004 apap->linfo.start - the stating number of this entire list;
3005
3006 apap->linfo.numberstr - the actual number string to display (XCHAR *); we probably need
3007 this to work out the number separator, since there does not seem
3008 to be any reference to this anywhere
3009
3010 apap->linfo.numberstr_size - length of the number string
3011
3012 apap->linfo.format - number format (see the enum below)
3013
3014 apap->linfo.align - number alignment [0: lft, 1: rght, 2: cntr]
3015
3016 apap->linfo.ixchFollow - what character stands between the number and the para
3017 [0:= tab, 1: spc, 2: none]
3018 */
3019
3020 // If a given list id has already been defined, appending a new list with
3021 // same values will have a harmless effect
3022
3023
3024 // we will use this to keep track of how many entries of given level we have had
3025 // every time we get here, we increase the counter for all levels lower than ours
3026 // then we will add the counter for our level to myListId; this way subsections of
3027 // the list separated by a higher level list entry will have different id's
3028
3029
3030 for(j = apap->ilvl + 1; j < 9; j++)
3031 m_iListIdIncrement[j]++;
3032
3033 myListId += m_iListIdIncrement[apap->ilvl];
3034
3035 // see if this id is already in our map
3036 UT_sint32 k;
3037 for(k = 0; k < m_vListIdMap.getItemCount(); k+=2)
3038 {
3039 if((UT_uint32)m_vListIdMap.getNthItem(k) == myListId)
3040 {
3041 iAWListId = m_vListIdMap.getNthItem(k+1);
3042 break;
3043 }
3044 }
3045
3046 if(iAWListId == UT_UID_INVALID)
3047 {
3048 iAWListId = getDoc()->getUID(UT_UniqueId::List);
3049 UT_ASSERT_HARMLESS(iAWListId != UT_UID_INVALID);
3050
3051 m_vListIdMap.addItem(myListId);
3052 m_vListIdMap.addItem(iAWListId);
3053 }
3054
3055
3056 const gchar * list_atts[15];
3057 UT_uint32 iOffset = 0;
3058 UT_String propBuffer;
3059
3060 // list id number
3061 list_atts[iOffset++] = "id";
3062 UT_String_sprintf(propBuffer, "%d", iAWListId);
3063 szListId = propBuffer;
3064 list_atts[iOffset++] = szListId.c_str();
3065
3066
3067 // parent id
3068 list_atts[iOffset++] = "parentid";
3069
3070 // we will search backward our list vector for the first entry
3071 // that has a lower level than we and that will be our parent
3072 UT_uint32 myParentID = 0;
3073 for(UT_sint32 n = m_vLists.getItemCount(); n > 0; n--)
3074 {
3075 ListIdLevelPair * llp = (ListIdLevelPair *)(m_vLists.getNthItem(n - 1));
3076 if(llp->level < apap->ilvl)
3077 {
3078 myParentID = llp->listId;
3079 break;
3080 }
3081 }
3082 UT_String_sprintf(propBuffer, "%d", myParentID);
3083 szParentId = propBuffer;
3084 list_atts[iOffset++] = szParentId.c_str();
3085
3086 // list type
3087 list_atts[iOffset++] = "type";
3088 list_atts[iOffset++] = s_mapDocToAbiListId (static_cast<MSWordListIdType>(apap->linfo.format));
3089
3090 // start value
3091 list_atts[iOffset++] = "start-value";
3092 UT_String_sprintf(propBuffer, "%d", apap->linfo.start);
3093 szStartValue = propBuffer;
3094 list_atts[iOffset++] = szStartValue.c_str();
3095
3096 // list delimiter
3097 UT_UTF8String sDelim;
3098 s_mapDocToAbiListDelim (apap->linfo.numberstr,apap->linfo.numberstr_size,sDelim);
3099 list_atts[iOffset++] = "list-delim";
3100
3101 char * t = s_stripDangerousChars(sDelim.utf8_str());
3102 UT_String sDlm = t;
3103 FREEP(t);
3104 list_atts[iOffset++] = sDlm.c_str();
3105
3106 list_atts[iOffset++] = "level";
3107 UT_String_sprintf(propBuffer, "%d", apap->ilvl + 1); // Word level starts at 0, Abi's at 1
3108 szLevel = propBuffer;
3109 list_atts[iOffset++] = szLevel.c_str();
3110
3111 // generate character props for the number
3112 // TODO -- the properties represented by apap->linfo.chp need
3113 // to be applied to the list number/bulet. For now, I am going
3114 // to translate these into a regular props string and attach
3115 // them to the list attributes, but they need to be passed
3116 // somehow down to the number field (may need a dedicated
3117 // _generateListCharProps() for this
3118 // Tomas, May 12, 2003
3119 _generateCharProps(szNumberProps, &apap->linfo.chp, ps);
3120 list_atts[iOffset++] = "props";
3121 list_atts[iOffset++] = szNumberProps.c_str();
3122
3123 // NULL
3124 list_atts[iOffset++] = 0;
3125 UT_return_val_if_fail( iOffset <= sizeof(list_atts)/sizeof(gchar *), 1 );
3126
3127 // now add this to our vector of lists
3128 ListIdLevelPair * llp = new ListIdLevelPair;
3129 llp->listId = iAWListId;
3130 llp->level = apap->ilvl;
3131 m_vLists.addItem(static_cast<void*>(llp));
3132
3133 getDoc()->appendList(list_atts);
3134 UT_DEBUGMSG(("DOM: appended a list\n"));
3135
3136 // TODO: merge in list properties and such here with the variable 'props',
3137 // such as list-style, field-font, ...
3138
3139 // start-value
3140 // Need to put the ";" back in the para string.
3141 //
3142 m_paraProps[m_paraProps.size() - 1] = ';';
3143 m_paraProps += "start-value:";
3144 m_paraProps += szStartValue;
3145 m_paraProps += ";";
3146
3147 // list style
3148 m_paraProps += "list-style:";
3149 m_paraProps += s_mapDocToAbiListStyle (static_cast<MSWordListIdType>(apap->linfo.format));
3150 m_paraProps += ";";
3151
3152 // field-font
3153 m_paraProps += "field-font:";
3154 m_paraProps += s_fieldFontForListStyle (static_cast<MSWordListIdType>(apap->linfo.format));
3155 } // end of list-related code
3156
3157 // props
3158 UT_uint32 i = 0;
3159 propsArray[i++] = static_cast<const gchar *>("props");
3160 propsArray[i++] = static_cast<const gchar *>(m_paraProps.c_str());
3161
3162
3163 // level, or 0 for default, normal level
3164 if (myListId > 0)
3165 {
3166 propsArray[i++] = "level";
3167 propsArray[i++] = szLevel.c_str();
3168 propsArray[i++] = "listid";
3169 propsArray[i++] = szListId.c_str();
3170 propsArray[i++] = "parentid";
3171 propsArray[i++] = szParentId.c_str();
3172 }
3173
3174 // handle style
3175 // TODO from wv we get the style props expanded and applied to the
3176 // characters in the paragraph (i.e., part of the CHP structure);
3177 // we need to be able to tell to wv not to do this expansion
3178 if(apap->stylename[0])
3179 {
3180 const STD * pSTD = ps->stsh.std;
3181 UT_uint32 iCount = ps->stsh.Stshi.cstd;
3182
3183 if(apap->istd != istdNil && apap->istd < iCount)
3184 {
3185 propsArray[i++] = "style";
3186
3187 char * t = NULL;
3188 const gchar * pName = NULL;
3189 if(pSTD)
3190 pName = s_translateStyleId(pSTD[apap->istd].sti);
3191
3192 if(pName)
3193 {
3194 m_paraStyle = pName;
3195 }
3196 else if(pSTD)
3197 {
3198 m_paraStyle = t = s_convert_to_utf8(ps,pSTD[apap->istd].xstzName);
3199 }
3200
3201 FREEP(t);
3202 propsArray[i++] = m_paraStyle.c_str();
3203 }
3204
3205 }
3206
3207 // NULL
3208 propsArray[i] = 0;
3209
3210 if (!m_bInSect && !bDoNotInsertStrux)
3211 {
3212 // check for should-be-impossible case
3213 UT_ASSERT_NOT_REACHED();
3214 _appendStrux(PTX_Section, NULL);
3215 m_bInSect = true ;
3216 }
3217
3218 if(!bDoNotInsertStrux)
3219 {
3220 xxx_UT_DEBUGMSG(("_beginPara: pos %d [text ends %d]\n", ps->currentcp, m_iFootnotesStart));
3221
3222 if (!_appendStrux(PTX_Block, static_cast<const gchar **>(&propsArray[0])))
3223 {
3224 UT_DEBUGMSG(("DOM: error appending paragraph block\n"));
3225 return 1;
3226 }
3227 m_bInPara = true;
3228 }
3229
3230 if (myListId > 0 && !bDoNotInsertStrux)
3231 {
3232 // TODO: honor more props
3233 const gchar *list_field_fmt[5];
3234 list_field_fmt[0] = "type";
3235 list_field_fmt[1] = "list_label";
3236 list_field_fmt[2] = "props";
3237 list_field_fmt[3] = "text-decoration:none";
3238 list_field_fmt[4] = 0;
3239 _appendObject(PTO_Field, static_cast<const gchar**>(&list_field_fmt[0]));
3240 m_bInPara = true;
3241
3242 // the character following the list label - 0=tab, 1=space, 2=none
3243 if(apap->linfo.ixchFollow == 0) // tab
3244 {
3245 const gchar* attribs[3] = {"props","text-decoration:none",NULL};
3246 getDoc()->appendFmt(attribs);
3247 UT_UCSChar tab = UCS_TAB;
3248 _appendSpan(&tab, 1);
3249 }
3250 else if(apap->linfo.ixchFollow == 1) // space
3251 {
3252 const gchar* attribs[3] = {"props","text-decoration:none",NULL};
3253 getDoc()->appendFmt(attribs);
3254 UT_UCSChar space = UCS_SPACE;
3255 _appendSpan(&space, 1);
3256 }
3257 // else none
3258 }
3259
3260 return 0;
3261 }
3262
_endPara(wvParseStruct *,UT_uint32,void *,int)3263 int IE_Imp_MsWord_97::_endPara (wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
3264 void * /*prop*/, int /*dirty*/)
3265 {
3266 xxx_UT_DEBUGMSG(("#DOM: _endPara\n"));
3267 // have to flush here, otherwise flushing later on will result in
3268 // an empty paragraph being inserted
3269
3270 this->_flush ();
3271 m_bInPara = false;
3272 m_bLineBreakPending = false;
3273
3274 return 0;
3275 }
3276
_beginChar(wvParseStruct * ps,UT_uint32,void * prop,int)3277 int IE_Imp_MsWord_97::_beginChar (wvParseStruct *ps, UT_uint32 /*tag*/,
3278 void *prop, int /*dirty*/)
3279 {
3280 // if in a header of unsupported type, just return
3281 // the +1 is to account for the fact that ps->currentcp applies to the previous
3282 // char position ...
3283 if(_ignorePosition(ps->currentcp + 1))
3284 return 0;
3285
3286 // the header/footnote/endnote sections are special; because the
3287 // parser treats them as a continuation of the document, we end up
3288 // here before we get chance to handle the change from main doc to
3289 // these sections -- we want the char properties assembled
3290 // for future use, but we do not want them actually appended
3291 bool bDoNotAppendFmt = (ps->currentcp == m_iFootnotesStart ||
3292 ps->currentcp == m_iEndnotesStart ||
3293 ps->currentcp == m_iHeadersStart);
3294
3295 // the end of endnotes/fnotes/headers and all other subsections in
3296 // the main stream always contain a paragraph marker; we do not
3297 // want it to append fmt on those
3298 if((ps->currentcp == m_iTextEnd - 1 && m_iTextEnd > m_iTextStart) ||
3299 (ps->currentcp == m_iTextEnd - 2 && m_iTextEnd > m_iTextStart) ||
3300 (ps->currentcp == m_iFootnotesEnd - 1 && m_iFootnotesEnd > m_iFootnotesStart) ||
3301 (ps->currentcp == m_iEndnotesEnd - 1 && m_iEndnotesEnd > m_iEndnotesStart) ||
3302 (ps->currentcp == m_iHeadersEnd - 1 && m_iHeadersEnd > m_iHeadersStart) ||
3303 (ps->currentcp == m_iAnnotationsEnd - 1 && m_iAnnotationsEnd > m_iAnnotationsStart) ||
3304 (ps->currentcp == m_iMacrosStart - 1 && m_iMacrosEnd > m_iMacrosStart))
3305 {
3306 bDoNotAppendFmt = true;
3307 }
3308
3309
3310 // at the end of each f/enote is a superflous paragraph marker
3311 // which we do not want imported
3312 if(m_bInFNotes && m_iNextFNote < m_iFootnotesCount && m_pFootnotes &&
3313 m_pFootnotes[m_iNextFNote].txt_pos + m_pFootnotes[m_iNextFNote].txt_len - 1 >= ps->currentcp)
3314 {
3315 bDoNotAppendFmt = true;
3316 }
3317
3318 if(m_bInENotes && m_iNextENote < m_iEndnotesCount && m_pEndnotes &&
3319 m_pEndnotes[m_iNextENote].txt_pos + m_pEndnotes[m_iNextENote].txt_len - 1 >= ps->currentcp)
3320 {
3321 bDoNotAppendFmt = true;
3322 }
3323
3324 // the header section requires even more special care; since we
3325 // need to insert the HdrFtr strux for each header before we can
3326 // insert the block, we do not want a strux and fmt inserted at the start
3327 // position of a header; furthermore, each header ends with a
3328 // superfluous paragraph marker
3329 if(m_bInHeaders &&
3330 ((m_iCurrentHeader < m_iHeadersCount && m_pHeaders &&
3331 (m_pHeaders[m_iCurrentHeader].pos == ps->currentcp ||
3332 m_pHeaders[m_iCurrentHeader].pos + m_pHeaders[m_iCurrentHeader].len - 1 <= ps->currentcp))
3333 || m_iCurrentHeader == m_iHeadersCount))
3334 {
3335 //start a new header section
3336 bDoNotAppendFmt = true;
3337 }
3338
3339 // flush any data in our character runs
3340 // if we are not really appending, then do not flush, so that we
3341 // are not prevented from removing superflous page breaks at the
3342 // end of section
3343 if(!bDoNotAppendFmt)
3344 {
3345 this->_flush ();
3346 }
3347
3348
3349 CHP *achp = static_cast <CHP *>(prop);
3350
3351 const gchar * propsArray[7];
3352 UT_uint32 propsOffset = 0;
3353
3354 m_charProps.clear();
3355 m_charStyle.clear();
3356
3357 UT_uint32 iFontType = 0;
3358 if(achp->xchSym && ps->fonts.ffn)
3359 {
3360 // inserting a symbol char ...
3361 iFontType = ps->fonts.ffn[achp->ftcSym].chs;
3362 }
3363 else if(ps->fonts.ffn && (achp->ftcAscii < ps->fonts.nostrings))
3364 {
3365 iFontType = ps->fonts.ffn[achp->ftcAscii].chs;
3366 }
3367
3368 if(iFontType == 0)
3369 m_bSymbolFont = false;
3370 else if(iFontType == 2)
3371 m_bSymbolFont = true;
3372 else
3373 {
3374 xxx_UT_DEBUGMSG(("IE_Imp_MsWord_97::_beginChar: unknow font encoding %d\n",
3375 ps->fonts.ffn[achp->ftcAscii].chs));
3376 m_bSymbolFont = false;
3377 }
3378
3379 memset (propsArray, 0, sizeof(propsArray));
3380
3381 _generateCharProps(m_charProps, achp, ps);
3382
3383 if (!achp->fBidi)
3384 m_bLTRCharContext = true;
3385 else
3386 m_bLTRCharContext = false;
3387
3388 // we enter bidi mode if we encounter a character
3389 // formatting inconsistent with the base direction of the
3390 // paragraph; once in bidi mode, we have to stay there
3391 // until the end of the current pragraph
3392 m_bBidiMode = m_bBidiMode || (m_bLTRCharContext ^ m_bLTRParaContext);
3393
3394 propsArray[propsOffset++] = static_cast<const gchar *>("props");
3395 propsArray[propsOffset++] = static_cast<const gchar *>(m_charProps.c_str());
3396
3397 if(!m_bEncounteredRevision && (achp->fRMark || achp->fRMarkDel))
3398 {
3399 // revision "hack" - add a single revision for all revisioned text
3400 UT_UCS4String revisionStr ("msword_revisioned_text");
3401 getDoc()->addRevision(1, revisionStr.ucs4_str(), revisionStr.size(), 0, 0);
3402 m_bEncounteredRevision = true;
3403 }
3404
3405 if (achp->fRMark)
3406 {
3407 propsArray[propsOffset++] = static_cast<const gchar *>("revision");
3408 m_charRevs = "1";
3409 propsArray[propsOffset++] = m_charRevs.c_str();
3410 }
3411 else if (achp->fRMarkDel)
3412 {
3413 propsArray[propsOffset++] = static_cast<const gchar *>("revision");
3414 m_charRevs = "-1";
3415 propsArray[propsOffset++] = m_charRevs.c_str();
3416 }
3417 else
3418 m_charRevs.clear();
3419
3420
3421 if(achp->stylename[0])
3422 {
3423 const STD * pSTD = ps->stsh.std;
3424 UT_uint32 iCount = ps->stsh.Stshi.cstd;
3425
3426 if(achp->istd != istdNil && achp->istd < iCount)
3427 {
3428 propsArray[propsOffset++] = static_cast<const gchar *>("style");
3429 char * t = NULL;
3430 const gchar * pName = s_translateStyleId(pSTD[achp->istd].sti);
3431
3432 if(pName)
3433 {
3434 m_charStyle = pName;
3435 }
3436 else
3437 {
3438 m_charStyle = t = s_convert_to_utf8(ps,pSTD[achp->istd].xstzName);
3439 }
3440
3441 FREEP(t);
3442 propsArray[propsOffset++] = m_charStyle.c_str();
3443 }
3444 }
3445
3446 // woah - major error here
3447 if(!m_bInSect && !bDoNotAppendFmt)
3448 {
3449 UT_ASSERT_NOT_REACHED();
3450 _appendStrux(PTX_Section, NULL);
3451 m_bInSect = true ;
3452 }
3453
3454 if(!m_bInPara && !bDoNotAppendFmt)
3455 {
3456 UT_ASSERT_NOT_REACHED();
3457 _appendStrux(PTX_Block, NULL);
3458 m_bInPara = true ;
3459 }
3460
3461 if(!bDoNotAppendFmt)
3462 {
3463 if (!_appendFmt(static_cast<const gchar **>(&propsArray[0])))
3464 {
3465 UT_DEBUGMSG(("DOM: error appending character formatting\n"));
3466 return 1;
3467 }
3468 }
3469
3470 return 0;
3471 }
3472
_endChar(wvParseStruct *,UT_uint32,void *,int)3473 int IE_Imp_MsWord_97::_endChar (wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
3474 void * /*prop*/, int /*dirty*/)
3475 {
3476 // nothing is needed here
3477 return 0;
3478 }
3479
3480 /****************************************************************************/
3481 /****************************************************************************/
3482
_fieldProc(wvParseStruct * ps,U16 eachchar,U8 chartype,U16 lid)3483 int IE_Imp_MsWord_97::_fieldProc (wvParseStruct *ps, U16 eachchar,
3484 U8 chartype, U16 lid)
3485 {
3486 xxx_UT_DEBUGMSG(("DOM: fieldProc: %c %x\n", static_cast<char>(eachchar),
3487 static_cast<int>(eachchar)));
3488
3489 //
3490 // The majority of this code has just been ripped out of wv/field.c
3491 //
3492 field * f = NULL;
3493 UT_sint32 iRet = 1;
3494
3495 if (eachchar == 0x13) // beginning of a field
3496 {
3497 if(m_stackField.getDepth() > 0)
3498 {
3499 // see what kind of field we are in
3500 m_stackField.viewTop((void**)&f);
3501 UT_return_val_if_fail(f,0);
3502
3503 switch(f->type)
3504 {
3505 case F_TOC:
3506 case F_TOC_FROM_RANGE:
3507 if(_isTOCsupported(f))
3508 {
3509 break;
3510 }
3511
3512 // for unsuported TOCs fall through ...
3513
3514 case F_HYPERLINK:
3515 // for these fields we want to dump into the
3516 // document anything in the argument
3517 {
3518 f->argument[f->fieldI] = 0;
3519 UT_UCS2Char * a = f->argument;
3520
3521 if(*a == 0x14)
3522 {
3523 a++;
3524 }
3525
3526 while(*a)
3527 {
3528 this->_appendChar(*a++);
3529 }
3530 this->_flush();
3531
3532 f->argument[0] = 0;
3533 f->fieldI = 0;
3534 }
3535 break;
3536
3537 default:
3538 break;
3539 }
3540
3541 }
3542
3543 try
3544 {
3545 f = new field;
3546 }
3547 catch(...)
3548 {
3549 f = NULL;
3550 }
3551
3552 UT_return_val_if_fail(f,0);
3553 f->fieldWhich = f->command;
3554 f->command[0] = 0;
3555 f->argument[0] = 0;
3556 f->fieldI = 0;
3557 f->fieldRet = 1;
3558 f->type = F_OTHER;
3559 m_stackField.push((void*)f);
3560 }
3561 else if (eachchar == 0x14) // field trigger
3562 {
3563 m_stackField.viewTop((void**)&f);
3564 UT_return_val_if_fail(f,0);
3565
3566 f->command[f->fieldI] = 0;
3567 f->fieldC = wvWideStrToMB (f->command);
3568
3569 if (this->_handleCommandField(f->fieldC))
3570 f->fieldRet = 1;
3571 else
3572 f->fieldRet = 0;
3573
3574 wvFree(f->fieldC);
3575 f->fieldWhich = f->argument;
3576 f->fieldI = 0;
3577 }
3578 if(!f)
3579 {
3580 m_stackField.viewTop((void**)&f);
3581 }
3582
3583 UT_return_val_if_fail(f,0);
3584
3585 if (f->fieldI >= FLD_SIZE)
3586 {
3587 UT_DEBUGMSG(("DOM: Something completely absurd in the fields implementation!\n"));
3588 UT_ASSERT_NOT_REACHED();
3589 return 1;
3590 }
3591
3592 if (!f->fieldWhich) {
3593 UT_DEBUGMSG(("DOM: _fieldProc - 'which' is null\n"));
3594 UT_ASSERT_NOT_REACHED();
3595 return 1;
3596 }
3597
3598 if (chartype)
3599 f->fieldWhich[f->fieldI] = wvHandleCodePage(eachchar, lid);
3600 else
3601 f->fieldWhich[f->fieldI] = eachchar;
3602
3603 f->fieldI++;
3604
3605 if (eachchar == 0x15) // end of field marker
3606 {
3607 f->fieldWhich[f->fieldI] = 0;
3608 //I do not think we should convert this -- this is the field value
3609 //displayed in the document; in most cases we do not need it, as we
3610 //calulate it ourselves, but for instance for hyperlinks this is the
3611 //the text to which the link is tied
3612 //m_fieldA = wvWideStrToMB (m_argument);
3613 f->fieldC = wvWideStrToMB (f->command);
3614 _handleFieldEnd (f->fieldC, ps->currentcp);
3615 wvFree (f->fieldC);
3616 iRet = f->fieldRet;
3617
3618 m_stackField.pop((void**)&f);
3619 UT_return_val_if_fail(f,0);
3620 delete f;
3621 }
3622 return iRet;
3623 }
3624
_handleFieldEnd(char * command,UT_uint32)3625 bool IE_Imp_MsWord_97::_handleFieldEnd (char *command, UT_uint32 /*iDocPosition*/)
3626 {
3627 Doc_Field_t tokenIndex = F_OTHER;
3628 char *token;
3629 field * f = NULL;
3630 m_stackField.viewTop((void**)&f);
3631 UT_return_val_if_fail(f, true);
3632
3633 if (*command != 0x13)
3634 {
3635 UT_DEBUGMSG (("field did not begin with 0x13\n"));
3636 return true;
3637 }
3638
3639 if(m_bInTOC && m_bTOCsupported && ( f->type == F_TOC
3640 || f->type == F_TOC_FROM_RANGE))
3641 {
3642 // end of TOC field in a supported TOC; we do nothing, since the field has already
3643 // been processed in _handleFieldCommand()
3644 m_bInTOC = false;
3645 m_bTOCsupported = false;
3646 return _insertTOC(f);
3647 }
3648
3649 if(m_bInTOC && m_bTOCsupported)
3650 {
3651 // end of some non-TOC field inside supported TOC; just return
3652 return true;
3653 }
3654
3655 command++;
3656 token = strtok (command, "\t, ");
3657
3658 while(token)
3659 {
3660 tokenIndex = s_mapNameToField (token);
3661 switch (tokenIndex)
3662 {
3663 case F_MERGEFIELD:
3664 {
3665 const gchar* atts[5];
3666 atts[0] = "type";
3667 atts[1] = "mail_merge";
3668 atts[2] = "param";
3669 atts[3] = 0;
3670 atts[4] = 0;
3671
3672 token = strtok (NULL, "\"\" ");
3673
3674 UT_return_val_if_fail(f->argument[f->fieldI - 1] == 0x15, false);
3675
3676 f->argument[f->fieldI - 1] = 0;
3677 UT_UCS2Char * a = f->argument;
3678
3679 UT_UTF8String param;
3680
3681 if(*a == 0x14)
3682 {
3683 a++;
3684 }
3685
3686 while(*a)
3687 {
3688 if (!((171 == *a) || (187 == *a))) {
3689 // @argument looks like <<FieldName>>.
3690 // strip off the '<<' (171) and '>>' (187)
3691 param.appendUCS2(a, 1);
3692 }
3693
3694 a++;
3695 }
3696
3697 atts[3] = param.utf8_str();
3698
3699 if (!_appendObject (PTO_Field, static_cast<const gchar**>(&atts[0])))
3700 {
3701 UT_DEBUGMSG(("Dom: couldn't append field (type = '%s')\n", atts[1]));
3702 }
3703 }
3704 break;
3705
3706 case F_HYPERLINK:
3707 {
3708 token = strtok (NULL, "\"\" ");
3709 UT_return_val_if_fail(f->argument[f->fieldI - 1] == 0x15, false);
3710
3711 f->argument[f->fieldI - 1] = 0;
3712 UT_UCS2Char * a = f->argument;
3713
3714 if(*a == 0x14)
3715 {
3716 a++;
3717 }
3718
3719 while(*a)
3720 {
3721 this->_appendChar(*a++);
3722 }
3723 this->_flush();
3724
3725 if(!m_bInPara)
3726 {
3727 _appendStrux(PTX_Block, NULL);
3728 m_bInPara = true ;
3729 }
3730
3731 _appendObject(PTO_Hyperlink,NULL);
3732 m_bInLink = false;
3733 break;
3734 }
3735 case F_TOC:
3736 case F_TOC_FROM_RANGE:
3737 // we only get here for unsupported TOC types, in which case we dump the field
3738 // result (not ideal, since often the PAGEREF fields inside the TOC have not been
3739 // updated before save and so we get 'bookmark not found' instead of page numbers,
3740 // but it is better than nothing at all)
3741
3742 {
3743 token = strtok (NULL, "\"\" ");
3744 UT_return_val_if_fail(f->argument[f->fieldI - 1] == 0x15, false);
3745
3746 f->argument[f->fieldI - 1] = 0;
3747 UT_UCS2Char * a = f->argument;
3748
3749 if(*a == 0x14)
3750 {
3751 a++;
3752 }
3753
3754 while(*a)
3755 {
3756 this->_appendChar(*a++);
3757 }
3758 this->_flush();
3759 }
3760
3761 break;
3762
3763 default:
3764 break;
3765 }
3766
3767 token = strtok (NULL, "\t, ");
3768 }
3769 return false;
3770 }
3771
3772 /*!
3773 Word has several different toc tables (TOC, TOA, indexes); at the moment we only
3774 support TOC and even than only if it is based on heading styles
3775 */
_isTOCsupported(field * f)3776 bool IE_Imp_MsWord_97::_isTOCsupported(field *f)
3777 {
3778 UT_return_val_if_fail(f,false);
3779
3780 if( f->type != F_TOC
3781 && f->type != F_TOC_FROM_RANGE
3782 )
3783 {
3784 return false;
3785 }
3786
3787 bool bRet = true;
3788 char * command = wvWideStrToMB (f->command);
3789 UT_DEBUGMSG(("IE_Imp_MsWord_97::_isTOCsupported: command %s\n", command));
3790
3791 char * params = NULL;
3792
3793 if(f->type == F_TOC)
3794 {
3795 params = command + 5;
3796 }
3797 else if(f->type == F_TOC_FROM_RANGE)
3798 {
3799 params = command + 4;
3800 }
3801
3802 // we only support the heading based TOC for now
3803 char * t = strstr(params, "\\o");
3804
3805 if(!t)
3806 t = strstr(params, "\\t");
3807
3808 if(!t)
3809 {
3810 bRet = false;
3811 goto finish;
3812 }
3813
3814 finish:
3815 FREEP(command);
3816 return bRet;
3817 }
3818
3819
3820
3821 /*!
3822 returns true if the TOC has been handled, false if the TOC type is unsupported
3823 */
3824
3825 /* Does this handle the contents styles indirectly via inserting the TOC as new and
3826 letting the default/initial pt code handle it like new rather than actually importing it? */
3827
_insertTOC(field * f)3828 bool IE_Imp_MsWord_97::_insertTOC(field *f)
3829 {
3830 UT_return_val_if_fail(f,false);
3831 bool bRet = true;
3832 bool bSupported = false;
3833
3834 UT_sint32 i = 0, i1 = 0, i2 = 0;
3835 char * t = NULL, * t1 = NULL, * t2 = NULL;
3836 UT_UTF8String sProps = "toc-has-heading:0;", sTemp, sLeader;
3837
3838 const gchar * attrs [3] = {"props", NULL, NULL};
3839
3840 char * command = wvWideStrToMB (f->command);
3841 UT_DEBUGMSG(("IE_Imp_MsWord_97::_insertTOC: command %s\n", command));
3842
3843 char * params = NULL;
3844
3845 if(f->type == F_TOC)
3846 {
3847 params = command + 5;
3848 }
3849 else if(f->type == F_TOC_FROM_RANGE)
3850 {
3851 params = command + 4;
3852 }
3853 else
3854 {
3855 bRet = false;
3856 goto finish;
3857 }
3858
3859 if((t = strstr(params, "\\p")))
3860 {
3861 // this defines the leader, we parse it first, before we mess up the command
3862 t1 = strchr(t, '\"');
3863 if(t1)
3864 {
3865 t1++;
3866
3867 // AW can only use one of the chars (there are up to 5), we will take the first
3868 switch(*t1)
3869 {
3870 default: // not sure, we will treat this as a dot
3871 case '.': sLeader += "dot"; break;
3872 case '-': sLeader += "hyphen"; break;
3873 case '_': sLeader += "underline"; break;
3874 case ' ': sLeader += "none"; break;
3875 }
3876 }
3877 }
3878
3879 if((t = strstr(params, "\\b")))
3880 {
3881 // a bookmark restricts the range from which the TOC is built
3882 t1 = strchr(t, '\"');
3883 if(t1)
3884 {
3885 t1++;
3886
3887 t2 = strchr(t1, '\"');
3888
3889 char c = *t2;
3890 *t2 = 0;
3891
3892 sProps += "toc-range-bookmark:";
3893 sProps += t1;
3894 sProps += ";";
3895
3896 *t2 = c; // restore the string
3897 }
3898 }
3899
3900 if((t = strstr(params, "\\o")))
3901 {
3902 // heading-based TOC
3903 // \o param specifies a range of headings to use, e.g., \o "2-4"
3904 bSupported = true;
3905
3906 t = strchr(t, '\"');
3907
3908 if(!t)
3909 {
3910 bRet = false;
3911 goto finish;
3912 }
3913
3914 t++;
3915
3916 i1 = atoi(t);
3917
3918 if(!i1)
3919 {
3920 bRet = false;
3921 goto finish;
3922 }
3923
3924 t1 = strchr(t, '-');
3925 t2 = strchr(t, '\"');
3926
3927 t = UT_MIN(t1, t2);
3928
3929 if(!t)
3930 {
3931 bRet = false;
3932 goto finish;
3933 }
3934
3935 i2 = 0;
3936 if(*t == '\"')
3937 {
3938 i2 = i1;
3939 }
3940 else
3941 {
3942 UT_ASSERT_HARMLESS( *t == '-');
3943 t++;
3944 i2 = atoi(t);
3945 }
3946
3947 if(!i2)
3948 {
3949 bRet = false;
3950 goto finish;
3951 }
3952 // now create our TOC attr/props
3953 //
3954 // * we do not need to set the source styles, because the Heading
3955 // styles are the AW default
3956 //
3957 // * we do have to set the dest styles
3958 //
3959 // * I am not sure what to do about toc-id: the AW FV_Fiew::cmdInsertTOC() does not specify the
3960 // id, so neither will we
3961 //
3962 // AW currently only uses the first 4 Heading styles, but we will implement this for all 9
3963 // to avoid future work
3964
3965 for(i = 1; i < i1; ++i)
3966 {
3967 UT_UTF8String_sprintf(sTemp, "toc-source-style%d:nonexistentstyle;", i);
3968 sProps += sTemp;
3969 }
3970
3971 UT_sint32 iMin = UT_MIN(i2+1,10);
3972
3973 for(i = i1; i < iMin; ++i)
3974 {
3975 UT_UTF8String_sprintf(sTemp, "toc-dest-style%d:TOC %d", i, i);
3976 sProps += sTemp;
3977 sProps += ";";
3978
3979 if(sLeader.size())
3980 {
3981 UT_UTF8String_sprintf(sTemp, "toc-tab-leader%d:", i);
3982 sProps += sTemp;
3983 sProps += sLeader;
3984 sProps += ";";
3985 }
3986 }
3987
3988 for(i = iMin; i < 10; ++i)
3989 {
3990 UT_UTF8String_sprintf(sTemp, "toc-dest-style%d:nonexistentstyle", i);
3991 sProps += sTemp;
3992 sProps += ";";
3993 }
3994 }
3995
3996 // the \t and \o switches can be used simultaneously
3997 // if both switches define the same level, we are unable to handle that; we will used the style
3998 // in the \t switch (it is easier since the parsing of the \t parameter is destructive)
3999 if ((t = strstr(params, "\\t")))
4000 {
4001 // style-based toc, the params have the format
4002 // \t "style,level,style,level ..."
4003 bSupported = true;
4004 t1 = strchr(t, '\"');
4005 if(!t1)
4006 {
4007 bRet = false;
4008 goto finish;
4009 }
4010
4011 char * end = strchr(t1+1, '\"');
4012
4013 while(t1 && t1 < end)
4014 {
4015 t1++;
4016 t2 = strchr(t1, ',');
4017 if(!t2)
4018 {
4019 bRet = false;
4020 goto finish;
4021 }
4022
4023 *t2 = 0;
4024
4025 sTemp = t1; // style name
4026
4027 t1 = t2 + 1; // style level
4028 t2 = strchr(t1, ',');
4029
4030 if(t2)
4031 t2 = UT_MIN(t2,end);
4032 else
4033 t2 = end;
4034
4035 *t2 = 0;
4036
4037 sProps += "toc-source-style";
4038 sProps += t1;
4039 sProps += ":";
4040 sProps += sTemp;
4041 sProps += ";";
4042
4043 sProps += "toc-dest-style";
4044 sProps += t1;
4045 sProps += ":TOC ";
4046 sProps += t1;
4047 sProps += ";";
4048
4049 if(sLeader.size())
4050 {
4051 sProps += "toc-tab-leader";
4052 sProps += t1;
4053 sProps += ":";
4054 sProps += sLeader;
4055 sProps += ";";
4056 }
4057
4058 t1 = t2;
4059 }
4060 }
4061
4062 if(!bSupported)
4063 {
4064 bRet = false;
4065 goto finish;
4066 }
4067
4068 // remove trailing semicolon (screws up property parser)
4069 {
4070 sTemp = sProps;
4071 const char * c = sTemp.utf8_str();
4072 if(c[strlen(c)-1] == ';')
4073 {
4074 sProps.assign(c, strlen(c)-1);
4075 }
4076 }
4077
4078 attrs[1] = sProps.utf8_str();
4079
4080 if(!m_bInPara)
4081 {
4082 _appendStrux(PTX_Block, NULL);
4083 m_bInPara = true ;
4084 }
4085
4086 _appendStrux(PTX_SectionTOC, attrs);
4087 _appendStrux(PTX_EndTOC, NULL);
4088
4089 finish:
4090 FREEP(command);
4091 return bRet;
4092 }
4093
4094
_handleCommandField(char * command)4095 bool IE_Imp_MsWord_97::_handleCommandField (char *command)
4096 {
4097 // if we are currently inside a supported TOC, just return
4098 if(m_bInTOC && m_bTOCsupported)
4099 return true;
4100
4101 Doc_Field_t tokenIndex = F_OTHER;
4102 char *token = NULL;
4103 field * f = NULL;
4104 m_stackField.viewTop((void**)&f);
4105 UT_return_val_if_fail(f,true);
4106 bool bTypeSet = false;
4107
4108 xxx_UT_DEBUGMSG(("DOM: handleCommandField '%s'\n", command));
4109
4110 const gchar* atts[5];
4111 atts[0] = "type";
4112 atts[1] = 0;
4113 atts[2] = 0;
4114 atts[3] = 0;
4115 atts[4] = 0;
4116
4117 if (*command != 0x13)
4118 {
4119 UT_DEBUGMSG(("DOM: field did not begin with 0x13\n"));
4120 return true;
4121 }
4122
4123 //first skip the 0x13
4124 command++;
4125 token = strtok(command, "\t, ");
4126
4127 while(token)
4128 {
4129 tokenIndex = s_mapNameToField (token);
4130 if(!bTypeSet)
4131 {
4132 f->type = tokenIndex;
4133 bTypeSet = true;
4134 }
4135
4136 switch (tokenIndex)
4137 {
4138 case F_EDITTIME:
4139 case F_TIME:
4140 atts[1] = "time";
4141 break;
4142
4143 case F_DateTimePicture:
4144 //seems similar to a creation date
4145 atts[1] = "meta_date";
4146 break;
4147
4148 case F_DATE:
4149 atts[1] = "date";
4150 break;
4151
4152 case F_PAGE:
4153 atts[1] = "page_number";
4154 break;
4155
4156 case F_NUMCHARS:
4157 atts[1] = "char_count";
4158 break;
4159
4160 case F_NUMPAGES:
4161 atts[1] = "page_count";
4162 break;
4163
4164 case F_NUMWORDS:
4165 atts[1] = "word_count";
4166 break;
4167
4168 case F_FILENAME:
4169 atts[1] = "file_name";
4170 break;
4171
4172 case F_PAGEREF:
4173 token = strtok (NULL, "\"\" ");
4174 atts[1] = "page_ref";
4175 atts[2] = "param";
4176 if(token)
4177 atts[3] = static_cast<const gchar *>(token);
4178 else
4179 atts[3] = "no_bookmark_given";
4180 break;
4181
4182 case F_HYPERLINK:
4183 {
4184 const gchar *new_atts[3];
4185 token = strtok (NULL, "\"\" ");
4186
4187 if(token) {
4188 // hyperlink or hyperlink to bookmark
4189 new_atts[0] = "xlink:href";
4190 UT_String href;
4191 if ( !strcmp(token, "\\l") )
4192 {
4193 token = strtok (NULL, "\"\" ");
4194 href = "#";
4195 href += token;
4196 }
4197 else
4198 {
4199 href = token;
4200 }
4201 new_atts[1] = href.c_str();
4202 new_atts[2] = 0;
4203 this->_flush();
4204
4205 if(!m_bInPara)
4206 {
4207 _appendStrux(PTX_Block, NULL);
4208 m_bInPara = true ;
4209 }
4210
4211 if(m_bInLink)
4212 {
4213 UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
4214 _appendObject(PTO_Hyperlink, NULL);
4215 m_bInLink = false;
4216 }
4217
4218 _appendObject(PTO_Hyperlink, new_atts);
4219 m_bInLink = true;
4220 }
4221 return true;
4222 }
4223
4224 case F_TOC: // for the toc fields we will
4225 case F_TOC_FROM_RANGE: // insert the field result for now
4226 UT_DEBUGMSG(("TOC field encountered\n"));
4227 m_bInTOC = true;
4228 m_bTOCsupported = _isTOCsupported(f);
4229
4230 default:
4231 // unhandled field type
4232 token = strtok(NULL, "\t, ");
4233 continue;
4234 }
4235
4236
4237 this->_flush();
4238
4239 if(!m_bInPara)
4240 {
4241 _appendStrux(PTX_Block, NULL);
4242 m_bInPara = true ;
4243 }
4244
4245 if (!_appendObject (PTO_Field, static_cast<const gchar**>(&atts[0])))
4246 {
4247 UT_DEBUGMSG(("Dom: couldn't append field (type = '%s')\n", atts[1]));
4248 }
4249
4250 token = strtok(NULL, "\t, ");
4251 }
4252
4253 return true;
4254 }
4255
4256 typedef enum {
4257 MSWord_UnknownImage,
4258 MSWord_VectorImage,
4259 MSWord_RasterImage
4260 } MSWord_ImageType;
4261
s_determineImageType(Blip * b)4262 static MSWord_ImageType s_determineImageType ( Blip * b )
4263 {
4264 if ( !b )
4265 return MSWord_UnknownImage;
4266
4267 switch ( b->type )
4268 {
4269 case msoblipEMF:
4270 case msoblipWMF:
4271 case msoblipPICT:
4272 return MSWord_VectorImage;
4273
4274 case msoblipJPEG:
4275 case msoblipPNG:
4276 case msoblipDIB:
4277 return MSWord_RasterImage;
4278
4279 case msoblipERROR:
4280 case msoblipUNKNOWN:
4281 default:
4282 return MSWord_UnknownImage;
4283 }
4284 }
4285
s_determineIEGFT(Blip * b)4286 static IEGraphicFileType s_determineIEGFT ( Blip * b )
4287 {
4288 if ( !b )
4289 return IEGFT_Unknown;
4290
4291 switch ( b->type )
4292 {
4293 case msoblipEMF:
4294 return IEGFT_EMF;
4295 case msoblipWMF:
4296 return IEGFT_WMF;
4297
4298 case msoblipJPEG:
4299 return IEGFT_JPEG;
4300 case msoblipPNG:
4301 return IEGFT_PNG;
4302 case msoblipDIB:
4303 return IEGFT_DIB;
4304
4305 case msoblipPICT:
4306 case msoblipERROR:
4307 case msoblipUNKNOWN:
4308 default:
4309 return IEGFT_Unknown;
4310 }
4311 }
4312
4313
4314
_handleImage(Blip * b,long width,long height,long cropt,long cropb,long cropl,long cropr)4315 UT_Error IE_Imp_MsWord_97::_handleImage (Blip * b, long width, long height, long cropt, long cropb, long cropl, long cropr)
4316 {
4317 FG_Graphic* pFG = 0;
4318 UT_Error error = UT_OK;
4319 const UT_ByteBuf * buf = 0;
4320
4321 UT_String propBuffer;
4322 UT_String propsName;
4323
4324 // suck the data into the ByteBuffer
4325
4326 MSWord_ImageType imgType = s_determineImageType ( b );
4327 IEGraphicFileType iegft = s_determineIEGFT( b );
4328
4329 wvStream *pwv;
4330 bool decompress = false;
4331
4332 if ( imgType == MSWord_RasterImage )
4333 {
4334 pwv = b->blip.bitmap.m_pvBits;
4335
4336 }
4337 else if ( imgType == MSWord_VectorImage )
4338 {
4339 pwv = b->blip.metafile.m_pvBits;
4340 decompress = (b->blip.metafile.m_fCompression == msocompressionDeflate);
4341 }
4342 else
4343 {
4344 UT_DEBUGMSG(("UNKNOWN IMAGE TYPE!!"));
4345 return UT_ERROR;
4346 }
4347
4348 size_t size = wvStream_size (pwv);
4349 char *data = new char[size];
4350 wvStream_rewind(pwv);
4351 wvStream_read(data,size,sizeof(char),pwv);
4352
4353 UT_ByteBuf pictData;
4354 if (decompress)
4355 {
4356
4357 unsigned long uncomprLen, comprLen;
4358 comprLen = size;
4359 uncomprLen = b->blip.metafile.m_cb;
4360 Bytef *uncompr = new Bytef[uncomprLen];
4361 int err = uncompress (uncompr, &uncomprLen, reinterpret_cast<const unsigned char *>(data), comprLen);
4362 if (err != Z_OK)
4363 {
4364 UT_DEBUGMSG(("Could not uncompress image\n"));
4365 DELETEP(uncompr);
4366 goto Cleanup;
4367 }
4368 pictData.append(reinterpret_cast<const UT_Byte*>(uncompr), uncomprLen);
4369 DELETEPV(uncompr);
4370 }
4371 else
4372 {
4373 pictData.append(reinterpret_cast<const UT_Byte*>(data), size);
4374 }
4375
4376 delete [] data;
4377
4378 if(!pictData.getPointer(0))
4379 error = UT_ERROR;
4380 else
4381 error = IE_ImpGraphic::loadGraphic (pictData, iegft, &pFG);
4382
4383 if ((error != UT_OK) || !pFG)
4384 {
4385 UT_DEBUGMSG(("Could not import graphic\n"));
4386 goto Cleanup;
4387 }
4388
4389 buf = pFG->getBuffer();
4390
4391 if (!buf)
4392 {
4393 // i don't think that this could ever happen, but...
4394 UT_DEBUGMSG(("Could not convert to PNG\n"));
4395 error = UT_ERROR;
4396 goto Cleanup;
4397 }
4398
4399 //
4400 // This next bit of code will set up our properties based on the image attributes
4401 //
4402
4403 {
4404 UT_LocaleTransactor t(LC_NUMERIC, "C");
4405 UT_String_sprintf(propBuffer, "width:%fin; height:%fin; cropt:%fin; cropb:%fin; cropl:%fin; cropr:%fin",
4406 static_cast<double>(width) / static_cast<double>(1440),
4407 static_cast<double>(height) / static_cast<double>(1440),
4408 static_cast<double>(cropt) / static_cast<double>(1440),
4409 static_cast<double>(cropb) / static_cast<double>(1440),
4410 static_cast<double>(cropl) / static_cast<double>(1440),
4411 static_cast<double>(cropr) / static_cast<double>(1440));
4412 }
4413
4414 UT_String_sprintf(propsName, "%d", getDoc()->getUID(UT_UniqueId::Image));
4415
4416 const gchar* propsArray[5];
4417 propsArray[0] = "props";
4418 propsArray[1] = propBuffer.c_str();
4419 propsArray[2] = "dataid";
4420 propsArray[3] = propsName.c_str();
4421 propsArray[4] = 0;
4422
4423 if (!_ensureInBlock())
4424 {
4425 UT_DEBUGMSG (("_ensureInBlock() failed\n"));
4426 error = UT_ERROR;
4427 goto Cleanup;
4428 }
4429
4430 if (!_appendObject (PTO_Image, propsArray))
4431 {
4432 UT_DEBUGMSG (("Could not create append object\n"));
4433 error = UT_ERROR;
4434 goto Cleanup;
4435 }
4436
4437 if (!getDoc()->createDataItem(propsName.c_str(), false,
4438 buf, pFG->getMimeType(), NULL))
4439 {
4440 UT_DEBUGMSG (("Could not create data item\n"));
4441 // the mimetype is sunk anyway
4442 error = UT_ERROR;
4443 goto Cleanup;
4444 }
4445
4446 Cleanup:
4447 DELETEP(pFG);
4448
4449 return error;
4450 }
4451
4452
4453
4454 /*!
4455 * This method imports an image that can be later used as an embedded object.
4456 * The Blip pointer p contains the MS Word data we use to create the image
4457 * "width" and "height" are the width and height of the object in inches.
4458 * The routine returns the name of the data-item it creates is in the
4459 * UT_UTF8String sImageName
4460 */
_handlePositionedImage(Blip * b,UT_String & sImageName)4461 UT_Error IE_Imp_MsWord_97::_handlePositionedImage (Blip * b, UT_String & sImageName)
4462 {
4463 FG_Graphic* pFG = 0;
4464 UT_Error error = UT_OK;
4465 const UT_ByteBuf * buf = 0;
4466
4467 // suck the data into the ByteBuffer
4468
4469 MSWord_ImageType imgType = s_determineImageType ( b );
4470
4471 wvStream *pwv;
4472 bool decompress = false;
4473
4474 if ( imgType == MSWord_RasterImage )
4475 {
4476 pwv = b->blip.bitmap.m_pvBits;
4477
4478 }
4479 else if ( imgType == MSWord_VectorImage )
4480 {
4481 pwv = b->blip.metafile.m_pvBits;
4482 decompress = (b->blip.metafile.m_fCompression == msocompressionDeflate);
4483 }
4484 else
4485 {
4486 UT_DEBUGMSG(("UNKNOWN IMAGE TYPE!!"));
4487 return UT_ERROR;
4488 }
4489
4490 size_t size = wvStream_size (pwv);
4491 char *data = new char[size];
4492 wvStream_rewind(pwv);
4493 wvStream_read(data,size,sizeof(char),pwv);
4494
4495 UT_ByteBuf pictData;
4496
4497 if (decompress)
4498 {
4499
4500 unsigned long uncomprLen, comprLen;
4501 comprLen = size;
4502 uncomprLen = b->blip.metafile.m_cb;
4503 Bytef *uncompr = new Bytef[uncomprLen];
4504 int err = uncompress (uncompr, &uncomprLen, reinterpret_cast<const unsigned char *>(data), comprLen);
4505 if (err != Z_OK)
4506 {
4507 UT_DEBUGMSG(("Could not uncompress image\n"));
4508 DELETEP(uncompr);
4509 goto Cleanup;
4510 }
4511 pictData.append(reinterpret_cast<const UT_Byte*>(uncompr), uncomprLen);
4512 DELETEPV(uncompr);
4513 }
4514 else
4515 {
4516 pictData.append(reinterpret_cast<const UT_Byte*>(data), size);
4517 }
4518
4519 delete [] data;
4520
4521 if(!pictData.getPointer(0))
4522 error = UT_ERROR;
4523 else
4524 error = IE_ImpGraphic::loadGraphic (pictData, IEGFT_Unknown, &pFG);
4525
4526 if ((error != UT_OK) || !pFG)
4527 {
4528 UT_DEBUGMSG(("Could not import graphic\n"));
4529 goto Cleanup;
4530 }
4531
4532 // TODO: can we get back a vector graphic?
4533 buf = pFG->getBuffer();
4534
4535 if (!buf)
4536 {
4537 // i don't think that this could ever happen, but...
4538 UT_DEBUGMSG(("Could not convert to PNG\n"));
4539 error = UT_ERROR;
4540 goto Cleanup;
4541 }
4542
4543 UT_String_sprintf(sImageName, "%d", getDoc()->getUID(UT_UniqueId::Image));
4544
4545 if (!getDoc()->createDataItem(sImageName.c_str(), false,
4546 buf, pFG->getMimeType(), NULL))
4547 {
4548 UT_DEBUGMSG (("Could not create data item\n"));
4549 error = UT_ERROR;
4550 goto Cleanup;
4551 }
4552
4553 Cleanup:
4554 DELETEP(pFG);
4555
4556 return error;
4557 }
4558
4559 /****************************************************************************/
4560 /****************************************************************************/
4561
4562 //
4563 // wv callbacks to marshall data back to our importer class
4564 //
4565
charProc(wvParseStruct * ps,U16 eachchar,U8 chartype,U16 lid)4566 static int charProc (wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid)
4567 {
4568 IE_Imp_MsWord_97 * pDocReader = static_cast <IE_Imp_MsWord_97 *> (ps->userData);
4569 return pDocReader->_charProc (ps, eachchar, chartype, lid);
4570 }
4571
specCharProc(wvParseStruct * ps,U16 eachchar,CHP * achp)4572 static int specCharProc (wvParseStruct *ps, U16 eachchar, CHP* achp)
4573 {
4574 IE_Imp_MsWord_97 * pDocReader = static_cast <IE_Imp_MsWord_97 *> (ps->userData);
4575 return pDocReader->_specCharProc (ps, eachchar, achp);
4576 }
4577
eleProc(wvParseStruct * ps,wvTag tag,void * props,int dirty)4578 static int eleProc (wvParseStruct *ps, wvTag tag, void *props, int dirty)
4579 {
4580 IE_Imp_MsWord_97 * pDocReader = static_cast <IE_Imp_MsWord_97 *> (ps->userData);
4581 return pDocReader->_eleProc (ps, tag, props, dirty);
4582 }
4583
docProc(wvParseStruct * ps,wvTag tag)4584 static int docProc (wvParseStruct *ps, wvTag tag)
4585 {
4586 IE_Imp_MsWord_97 * pDocReader = static_cast <IE_Imp_MsWord_97 *> (ps->userData);
4587 return pDocReader->_docProc (ps, tag);
4588 }
4589
4590
4591 //--------------------------------------------------------------------------/
4592 //--------------------------------------------------------------------------/
4593
_table_open()4594 void IE_Imp_MsWord_97::_table_open ()
4595 {
4596 m_iCurrentRow = 0;
4597 m_iCurrentCell = 0;
4598
4599 // _appendStrux(PTX_Block, NULL); // Don't need/want this after 27/3/2005
4600 _appendStrux(PTX_SectionTable, NULL);
4601 m_vecColumnWidths.clear();
4602 m_bRowOpen = false;
4603 m_bCellOpen = false;
4604 m_bInPara = false;
4605 #ifdef DEBUG
4606 static UT_sint32 sTableCount = 0;
4607 sTableCount++;
4608 #endif
4609 UT_DEBUGMSG(("\n<TABLE> [%d]", sTableCount));
4610
4611 }
4612
4613 //--------------------------------------------------------------------------/
4614 //--------------------------------------------------------------------------/
4615
4616 /*!
4617 * Exand a vector with zeros to make room for a new value
4618 */
setNumberVector(UT_NumberVector & vec,UT_sint32 i,UT_sint32 val)4619 void IE_Imp_MsWord_97::setNumberVector(UT_NumberVector & vec, UT_sint32 i, UT_sint32 val)
4620 {
4621 while(i > static_cast<UT_sint32>(vec.size() +1))
4622 {
4623 vec.addItem(0);
4624 }
4625 vec.addItem(val); // we are sure that it will be appened at index i
4626 }
4627
4628 /*!
4629 * This method parses the vector of MsColSpans held by m_vecColumnWidths
4630 * and fills the vector colWidths with the widths of the individual columns.
4631 *
4632 * We do this because MSWord provides the widths of column spans, and in
4633 * some cases you can get a table with no row fully partitioned into
4634 * individual cells.
4635 */
_build_ColumnWidths(UT_NumberVector & colWidths)4636 bool IE_Imp_MsWord_97::_build_ColumnWidths(UT_NumberVector & colWidths)
4637 {
4638
4639 // OK handle the easy cases first and find the maximum value of iRight
4640
4641 UT_sint32 iMaxRight = 0;
4642 UT_sint32 i = 0;
4643 UT_sint32 iLeft,iRight = 0;
4644 UT_sint32 iSize = static_cast<UT_sint32>(m_vecColumnWidths.size());
4645 for(i=0; i< iSize;i++)
4646 {
4647 MsColSpan * pSpan = reinterpret_cast<MsColSpan *>(m_vecColumnWidths.getNthItem(i));
4648 iLeft = pSpan->iLeft;
4649 iRight = pSpan->iRight;
4650 if(iMaxRight < iRight)
4651 {
4652 iMaxRight = iRight;
4653 }
4654 if((iLeft + 1) == iRight)
4655 {
4656 setNumberVector(colWidths,iLeft,pSpan->width);
4657 xxx_UT_DEBUGMSG(("_build_ColumnWidths Initial set: Left %d Width %d \n",iLeft,colWidths[iLeft]));
4658 }
4659 }
4660 //
4661 // Look to see if we're finished now.
4662 //
4663 if((colWidths.size() == iMaxRight) && _isVectorFull(colWidths))
4664 {
4665 return true;
4666 }
4667 if(colWidths.size() < iMaxRight)
4668 {
4669 setNumberVector(colWidths,iMaxRight -1,0);
4670 }
4671 //
4672 // OK Now the hard part. Procede by scanning through the m_vecColWidths,
4673 // Looking for spans, at each span we look to see if we can break the span
4674 // into smaller pieces by subtracting a single span width.
4675 //
4676 // When we have a single column span we insert it in colWidths if colWidths
4677 // is empty at that point.
4678 //
4679 // We continue until colWidths is completely full.
4680 //
4681 UT_uint32 iLoop = 0;
4682 while(iLoop < 1000 && !_isVectorFull(colWidths))
4683 {
4684 for(i=0; i<static_cast<UT_sint32>(m_vecColumnWidths.size()); i++)
4685 {
4686 MsColSpan * pSpan = reinterpret_cast<MsColSpan *>(m_vecColumnWidths.getNthItem(i));
4687 iLeft = pSpan->iLeft;
4688 iRight = pSpan->iRight;
4689 xxx_UT_DEBUGMSG(("Loop %d iLeft %d,iRight %d colWidth[iLeft] %d colWidth[iRight-1] %d\n",iLoop,iLeft,iRight,colWidths[iLeft],colWidths[iRight -1]));
4690 if(iMaxRight < iRight)
4691 {
4692 iMaxRight = iRight;
4693 }
4694 if(((iLeft + 1) == iRight) && (colWidths[iLeft] == 0))
4695 {
4696 setNumberVector(colWidths,iLeft,pSpan->width);
4697 }
4698 else if((iLeft + 1) < iRight)
4699 {
4700 if(colWidths[iLeft] > 0)
4701 {
4702 if(!findMatchSpan(iLeft+1,iRight))
4703 {
4704 MsColSpan * pNewSpan = new MsColSpan();
4705 pNewSpan->iLeft = iLeft+1;
4706 pNewSpan->iRight = iRight;
4707 pNewSpan->width = pSpan->width - colWidths[iLeft];
4708 m_vecColumnWidths.addItem(pNewSpan);
4709 }
4710 }
4711 else if(colWidths[iRight - 1] > 0)
4712 {
4713 if(!findMatchSpan(iLeft,iRight-1))
4714 {
4715 MsColSpan * pNewSpan = new MsColSpan();
4716 pNewSpan->iLeft = iLeft;
4717 pNewSpan->iRight = iRight-1;
4718 pNewSpan->width = pSpan->width - colWidths[iRight-1];
4719 m_vecColumnWidths.addItem(pNewSpan);
4720 }
4721 }
4722 //
4723 // OK now look to see if we can fragment this by substracting a span of more
4724 // than one column from either end.
4725 //
4726 else
4727 {
4728 UT_sint32 k =0;
4729 for(k=0; k<static_cast<UT_sint32>(m_vecColumnWidths.size()); k++)
4730 {
4731 MsColSpan * pMulSpan = m_vecColumnWidths.getNthItem(i);
4732 UT_sint32 iMulLeft = pMulSpan->iLeft;
4733 UT_sint32 iMulRight = pMulSpan->iRight;
4734 if(iMulLeft == iLeft && iMulRight < iRight)
4735 {
4736 //
4737 // Make a new span fragment out of the bit greater than MulRight if one doesn't
4738 // exist
4739 //
4740 if(!findMatchSpan(iMulRight+1,iRight))
4741 {
4742 MsColSpan * pNewSpan = new MsColSpan();
4743 pNewSpan->iLeft = iMulRight+1;
4744 pNewSpan->iRight = iRight;
4745 pNewSpan->width = pSpan->width - pMulSpan->width;
4746 m_vecColumnWidths.addItem(pNewSpan);
4747 }
4748
4749 }
4750 else if (iMulLeft > iLeft && iMulRight == iRight)
4751 {
4752 //
4753 // Make a new span fragment out of the bit less than MulLeft
4754 //
4755 if(!findMatchSpan(iLeft,iMulLeft))
4756 {
4757 MsColSpan * pNewSpan = new MsColSpan();
4758 pNewSpan->iLeft = iLeft;
4759 pNewSpan->iRight = iMulLeft;
4760 pNewSpan->width = pSpan->width - pMulSpan->width;
4761 m_vecColumnWidths.addItem(pNewSpan);
4762 }
4763 }
4764 }
4765 }
4766 }
4767 }
4768 iLoop++;
4769 UT_ASSERT_HARMLESS(0);
4770 }
4771 UT_ASSERT_HARMLESS(iLoop < 1000);
4772 return (iLoop < 1000);
4773 }
4774
4775 /*!
4776 * Returns true if a span in the m_vecColumnWidths span matches the left, right
4777 * values given
4778 */
findMatchSpan(UT_sint32 iLeft,UT_sint32 iRight)4779 bool IE_Imp_MsWord_97::findMatchSpan(UT_sint32 iLeft,UT_sint32 iRight)
4780 {
4781 UT_sint32 i =0;
4782 for(i=0; i< static_cast<UT_sint32>(m_vecColumnWidths.size());i++)
4783 {
4784 MsColSpan * pSpan = m_vecColumnWidths.getNthItem(i);
4785 if(pSpan->iLeft == iLeft && pSpan->iRight == iRight)
4786 {
4787 return true;
4788 }
4789 }
4790 return false;
4791 }
4792
4793 /*!
4794 * Returns false if any element in the vector is non-zero
4795 */
_isVectorFull(UT_NumberVector & vec)4796 bool IE_Imp_MsWord_97::_isVectorFull(UT_NumberVector & vec)
4797 {
4798 UT_sint32 i = 0;
4799 for(i=0;i< vec.size() ; i++)
4800 {
4801 xxx_UT_DEBUGMSG(("isVectorFull i %d val %d \n",i,vec[i]));
4802 if( vec[i] == 0)
4803 {
4804 return false;
4805 break;
4806 }
4807 }
4808 return true;
4809 }
4810
_table_close(const wvParseStruct *,const PAP * apap)4811 void IE_Imp_MsWord_97::_table_close (const wvParseStruct * /*ps*/, const PAP *apap)
4812 {
4813 _cell_close();
4814 _row_close();
4815
4816 UT_String props("table-column-props:");
4817 UT_String propBuffer;
4818
4819 if (m_vecColumnWidths.size() > 0)
4820 {
4821 // build column width properties string
4822 UT_NumberVector colWidths;
4823 //
4824 // Some tables maybe too complicated for my simple algorithim to work out
4825 //
4826 if(_build_ColumnWidths(colWidths))
4827 {
4828
4829 for (UT_sint32 i = 0; i < colWidths.size(); i++)
4830 {
4831 UT_String_sprintf(propBuffer,"%s/",
4832 UT_convertInchesToDimensionString(m_dim,
4833 (static_cast<double>(colWidths.getNthItem(i)))/1440.0));
4834
4835 props += propBuffer;
4836 }
4837 }
4838
4839 props += "; ";
4840 //
4841 // FIXME: Put in left position here!!!!
4842 //
4843 UT_String_sprintf(propBuffer,"table-column-leftpos:%s; ",
4844 UT_convertInchesToDimensionString(m_dim,
4845 (static_cast<double>(m_iLeftCellPos)/1440.0)));
4846 props += propBuffer;
4847 UT_VECTOR_PURGEALL(MsColSpan *,m_vecColumnWidths);
4848 m_vecColumnWidths.clear ();
4849 }
4850
4851 props += "table-line-ignore:0; table-line-type:1; table-line-thickness:0.8pt;";
4852 if(apap->ptap.dxaGapHalf > 0)
4853 {
4854 props += UT_String_sprintf("table-col-spacing:%din", (2 * apap->ptap.dxaGapHalf)/ 1440);
4855 }
4856 else
4857 {
4858 props += "table-col-spacing:0.03in";
4859 }
4860 // apply properties
4861 PT_DocPosition posEnd =0;
4862 getDoc()->getBounds(true,posEnd); // clean frags!
4863 pf_Frag_Strux* sdh = getDoc()->getLastStruxOfType(PTX_SectionTable);
4864 getDoc()->changeStruxAttsNoUpdate(sdh,"props",props.c_str());
4865
4866 // end-of-table
4867 _appendStrux(PTX_EndTable, NULL);
4868 m_bInPara = false ;
4869
4870 UT_DEBUGMSG(("\n</TABLE>\n"));
4871 }
4872
4873 //--------------------------------------------------------------------------/
4874 //--------------------------------------------------------------------------/
4875
_row_open(const wvParseStruct * ps)4876 void IE_Imp_MsWord_97::_row_open (const wvParseStruct *ps)
4877 {
4878 if (m_bRowOpen)
4879 return;
4880
4881 if (m_iCurrentRow > ps->norows) {
4882 //UT_ASSERT(m_iCurrentRow <= ps->norows);
4883 return;
4884 }
4885
4886 m_bRowOpen = true;
4887 m_iCurrentRow++;
4888 xxx_UT_DEBUGMSG(("imp_MsWord: _row_open: Last Left %d Last Right %d \n",m_iLeft,m_iRight));
4889 m_iCurrentCell = 0;
4890 m_iLeft = 0;
4891 m_iRight = 0;
4892 xxx_UT_DEBUGMSG(("\n\t<ROW:%d>", m_iCurrentRow));
4893 }
4894
4895 //--------------------------------------------------------------------------/
4896 //--------------------------------------------------------------------------/
4897
_row_close()4898 void IE_Imp_MsWord_97::_row_close ()
4899 {
4900 if (m_bRowOpen) {
4901 xxx_UT_DEBUGMSG(("\t</ROW>"));
4902 }
4903 m_bRowOpen = false;
4904 }
4905
4906 //--------------------------------------------------------------------------/
4907 //--------------------------------------------------------------------------/
4908
4909 // from fp_TableContainer.h
4910 enum
4911 {
4912 LS_OFF = 0, // No line style, which means no line is drawn
4913 LS_NORMAL = 1 // A normal solid line
4914 };
4915
4916 static int
sConvertLineStyle(short lineType)4917 sConvertLineStyle (short lineType)
4918 {
4919 switch (lineType)
4920 {
4921 case 0: return LS_NORMAL;
4922 case 1:
4923 return LS_NORMAL;
4924
4925 // TODO: more cases here
4926 default:
4927 return LS_NORMAL;
4928 }
4929 }
4930
4931 static double
brc_to_pixel(int x)4932 brc_to_pixel (int x)
4933 {
4934 // each unit is 1/8 of a pixel. abi only deals with whole numbers,
4935 if(x == 255)
4936 return 0.;
4937 return x/8.;
4938 }
4939
_cell_open(const wvParseStruct * ps,const PAP * apap)4940 void IE_Imp_MsWord_97::_cell_open (const wvParseStruct *ps, const PAP *apap)
4941 {
4942 if (m_bCellOpen || apap->fTtp)
4943 return;
4944
4945 if (!m_bRowOpen || m_iCurrentRow > ps->norows) {
4946 //UT_ASSERT(m_bRowOpen || m_iCurrentRow <= ps->norows);
4947 return;
4948 }
4949
4950 UT_Vector columnWidths;
4951 UT_sint32 vspan = 0;
4952 UT_String propBuffer;
4953
4954 const gchar* propsArray[3];
4955 propsArray[0] = static_cast<const gchar*>("props");
4956 propsArray[1] = "";
4957 propsArray[2] = NULL;
4958
4959
4960 #if 0
4961 if(m_iCurrentCell >= apap->ptap.itcMac)
4962 {
4963 // this happens when the row contains no cell definitions; we
4964 // need to insert a dummy cell into our row
4965 goto do_insert;
4966 }
4967 #endif
4968
4969 // add a new cell
4970 m_bCellOpen = true;
4971 if(m_iCurrentCell == 0)
4972 {
4973 //
4974 // Scan the differences in centers for this row so we can work out the column
4975 // widths of the table eventually.
4976 //
4977 m_iLeftCellPos = 0;
4978 UT_sint32 iLeft, iRight, i;
4979 m_iLeftCellPos = ps->cellbounds[0];
4980 for(i = 0; i < ps->nocellbounds-1; i++)
4981 {
4982 iLeft = i;
4983 iRight = i+1;
4984 UT_sint32 width = ps->cellbounds[iRight] - ps->cellbounds[iLeft];
4985 if (width <= 0)
4986 break;
4987 MsColSpan * pSpan = new MsColSpan();
4988 pSpan->iLeft = iLeft;
4989 pSpan->iRight = iRight;
4990 pSpan->width = width;
4991 xxx_UT_DEBUGMSG(("MsImport iLeft %d iRight %d width %d \n",iLeft,iRight,width));
4992 m_vecColumnWidths.addItem(pSpan);
4993 }
4994 }
4995
4996 if (ps->vmerges && ps->vmerges[m_iCurrentRow - 1])
4997 vspan = ps->vmerges[m_iCurrentRow - 1][m_iCurrentCell];
4998
4999 if (vspan > 0)
5000 vspan--;
5001
5002 m_iRight = m_iLeft + m_vecColumnSpansForCurrentRow.getNthItem(m_iCurrentCell);
5003 if(m_iRight == m_iLeft)
5004 {
5005 m_iRight++;
5006 }
5007 xxx_UT_DEBUGMSG(("MSWord Import: iLeft %d iRight %d m_iCurrentCell %d \n",m_iLeft,m_iRight,m_iCurrentCell));
5008 UT_return_if_fail(vspan >= 0);
5009 UT_String_sprintf(propBuffer,
5010 "left-attach:%d; right-attach:%d; top-attach:%d; bot-attach:%d; ",
5011 m_iLeft,
5012 m_iRight,
5013 m_iCurrentRow - 1,
5014 m_iCurrentRow + vspan
5015 );
5016
5017 if(apap->ptap.dyaRowHeight < 0)
5018 {
5019 // absolute height
5020 double dHin = -(apap->ptap.dyaRowHeight/1440);
5021 propBuffer += UT_String_sprintf("height:%fin;",dHin);
5022 }
5023 else if(apap->ptap.dyaRowHeight > 0)
5024 {
5025 // at-least height -- I do not think we support this for now
5026 // double dHin = -(apap->ptap.dyaRowHeight/1440);
5027 // propBuffer += UT_String_sprintf("height:%fin;",dHin);
5028 }
5029 else
5030 {
5031 // auto height, do nothing
5032 }
5033
5034 propBuffer += UT_String_sprintf("color:%s;", sMapIcoToColor(apap->ptap.rgshd[m_iCurrentCell].icoFore, true).c_str());
5035 propBuffer += UT_String_sprintf("background-color:%s;", sMapIcoToColor(apap->ptap.rgshd[m_iCurrentCell].icoBack, false).c_str());
5036 // so long as it's not the "auto" color
5037 if (apap->ptap.rgshd[m_iCurrentCell].icoBack != 0)
5038 propBuffer += "bg-style:1;";
5039
5040 {
5041 UT_LocaleTransactor t(LC_NUMERIC, "C");
5042 propBuffer += UT_String_sprintf("top-color:%s; top-thickness:%fpt; top-style:%d;",
5043 sMapIcoToColor(apap->ptap.rgtc[m_iCurrentCell].brcTop.ico, true).c_str(),
5044 brc_to_pixel(apap->ptap.rgtc[m_iCurrentCell].brcTop.dptLineWidth),
5045 sConvertLineStyle(apap->ptap.rgtc[m_iCurrentCell].brcTop.brcType));
5046 propBuffer += UT_String_sprintf("left-color:%s; left-thickness:%fpx; left-style:%d;",
5047 sMapIcoToColor(apap->ptap.rgtc[m_iCurrentCell].brcLeft.ico, true).c_str(),
5048 brc_to_pixel(apap->ptap.rgtc[m_iCurrentCell].brcLeft.dptLineWidth),
5049 sConvertLineStyle(apap->ptap.rgtc[m_iCurrentCell].brcLeft.brcType));
5050 propBuffer += UT_String_sprintf("bot-color:%s; bot-thickness:%fpx; bot-style:%d;",
5051 sMapIcoToColor(apap->ptap.rgtc[m_iCurrentCell].brcBottom.ico, true).c_str(),
5052 brc_to_pixel(apap->ptap.rgtc[m_iCurrentCell].brcBottom.dptLineWidth),
5053 sConvertLineStyle(apap->ptap.rgtc[m_iCurrentCell].brcBottom.brcType));
5054 propBuffer += UT_String_sprintf("right-color:%s; right-thickness:%fpx; right-style:%d",
5055 sMapIcoToColor(apap->ptap.rgtc[m_iCurrentCell].brcRight.ico, true).c_str(),
5056 brc_to_pixel(apap->ptap.rgtc[m_iCurrentCell].brcRight.dptLineWidth),
5057 sConvertLineStyle(apap->ptap.rgtc[m_iCurrentCell].brcRight.brcType));
5058 }
5059 xxx_UT_DEBUGMSG(("propbuffer: %s \n",propBuffer.c_str()));
5060
5061 propsArray[1] = propBuffer.c_str();
5062
5063 // do_insert:
5064 _appendStrux(PTX_SectionCell, propsArray);
5065 m_bInPara = false;
5066 m_iCurrentCell++;
5067 m_iLeft = m_iRight;
5068 xxx_UT_DEBUGMSG(("\t<CELL:%d:%d>", static_cast<int>(m_vecColumnSpansForCurrentRow.getNthItem(m_iCurrentCell - 1)), ps->vmerges[m_iCurrentRow - 1][m_iCurrentCell - 1]));
5069 }
5070
5071 //--------------------------------------------------------------------------/
5072 //--------------------------------------------------------------------------/
5073
_cell_close()5074 void IE_Imp_MsWord_97::_cell_close ()
5075 {
5076 if (!m_bCellOpen)
5077 return;
5078
5079 m_bCellOpen = false;
5080 _appendStrux(PTX_EndCell, NULL);
5081 m_bInPara = false ;
5082
5083 xxx_UT_DEBUGMSG(("</CELL>"));
5084 }
5085
5086
_generateCharProps(UT_String & s,const CHP * achp,wvParseStruct * ps)5087 void IE_Imp_MsWord_97::_generateCharProps(UT_String &s, const CHP * achp, wvParseStruct *ps)
5088 {
5089 UT_String propBuffer;
5090
5091 // set char tolower if fSmallCaps && fLowerCase
5092 if ( achp->fSmallCaps && achp->fLowerCase )
5093 m_bIsLower = true;
5094 else
5095 m_bIsLower = false;
5096
5097 // set language based the lid - TODO: do we want to handle -none- differently?
5098 s += "lang:";
5099
5100 unsigned short iLid = 0;
5101 // I am not sure how the various lids are supposed to work, but
5102 // achp->fBidi does not mean that the lidBidi is set ...
5103 if (achp->fBidi)
5104 {
5105 iLid = achp->lidBidi;
5106 }
5107 else if(ps->fib.fFarEast)
5108 {
5109 iLid = achp->lidFE;
5110 }
5111 else
5112 {
5113 iLid = achp->lid;
5114 }
5115
5116
5117 // if we do not have meaningful lid, try default ...
5118 if(!iLid)
5119 iLid = achp->lidDefault;
5120
5121 s += wvLIDToLangConverter (iLid);
5122 s += ";";
5123
5124 // decide best codepage based on the lid (as lang code above)
5125 UT_String codepage;
5126 if (achp->fBidi)
5127 codepage = wvLIDToCodePageConverter (achp->lidBidi);
5128 else if (!ps->fib.fFarEast)
5129 codepage = wvLIDToCodePageConverter (achp->lidDefault);
5130 else
5131 codepage = wvLIDToCodePageConverter (achp->lidFE);
5132
5133 // watch out for codepage 0 = unicode
5134 const char * pNUE = XAP_EncodingManager::get_instance()->getNativeUnicodeEncodingName();
5135
5136 if (codepage == "CP0")
5137 codepage = pNUE;
5138
5139 // if this is the first codepage we've seen, use it.
5140 // if we see more than one different codepage in a document, use unicode.
5141 if (!getDoc()->getEncodingName())
5142 getDoc()->setEncodingName(codepage.c_str());
5143 else if (getDoc()->getEncodingName() != codepage)
5144 getDoc()->setEncodingName(pNUE);
5145
5146 // bold text
5147 bool fBold = (achp->fBidi ? achp->fBoldBidi : achp->fBold);
5148 if (fBold) {
5149 s += "font-weight:bold;";
5150 }
5151
5152 // italic text
5153 bool fItalic = (achp->fBidi ? achp->fItalicBidi : achp->fItalic);
5154 if (fItalic) {
5155 s += "font-style:italic;";
5156 }
5157
5158 // foreground color
5159 U8 ico = (achp->fBidi ? achp->icoBidi : achp->ico);
5160 if (ico) {
5161 UT_String_sprintf(propBuffer, "color:%s;",
5162 sMapIcoToColor(ico, true).c_str());
5163 s += propBuffer;
5164 }
5165
5166 // background color
5167 ico = achp->shd.icoBack;
5168 if (ico) {
5169 if (!achp->fHighlight) {
5170 // HACK: We don't support borders and shading yet, so it seems safe to use the background
5171 // color as a substitute when there's no true highlight color (see the doc from Bug 6432)
5172 UT_String_sprintf(propBuffer, "bgcolor:%s;",
5173 sMapIcoToColor(ico, false).c_str());
5174 } else {
5175 // Note: This property won't be rendered until we have borders and shading support
5176 UT_String_sprintf(propBuffer, "background-color:%s;",
5177 sMapIcoToColor(ico, false).c_str());
5178 }
5179 s += propBuffer;
5180 }
5181
5182
5183 // underline and strike-through
5184 if (achp->fStrike || achp->kul) {
5185 s += "text-decoration:";
5186 if ((achp->fStrike || achp->fDStrike) && achp->kul) {
5187 s += "underline line-through;";
5188 } else if (achp->kul) {
5189 s += "underline;";
5190 } else {
5191 s += "line-through;";
5192 }
5193 }
5194
5195 // background color
5196 if (achp->fHighlight) {
5197 UT_String_sprintf(propBuffer,"bgcolor:%s;",
5198 sMapIcoToColor(achp->icoHighlight, false).c_str());
5199 s += propBuffer;
5200 }
5201
5202 // superscript && subscript
5203 if (achp->iss == 1) {
5204 s += "text-position: superscript;";
5205 } else if (achp->iss == 2) {
5206 s += "text-position: subscript;";
5207 }
5208
5209 if (achp->fVanish)
5210 {
5211 s += "display:none;";
5212 }
5213
5214 // font size (hps is half-points)
5215 // I have seen a bidi doc that had hpsBidi == 0, and the actual size in hps
5216 U16 hps = (achp->fBidi && achp->hpsBidi ? achp->hpsBidi : achp->hps);
5217 UT_String_sprintf(propBuffer,
5218 "font-size:%dpt;", (int)(hps/2));
5219 s += propBuffer;
5220
5221 // font family
5222 char *fname;
5223
5224 // if the FarEast flag is set, use the FarEast font,
5225 // otherwise, we'll use the ASCII font.
5226 if(achp->xchSym)
5227 {
5228 fname = wvGetFontnameFromCode(&ps->fonts, achp->ftcSym);
5229 }
5230 else if (achp->fBidi)
5231 {
5232 fname = wvGetFontnameFromCode(&ps->fonts, achp->ftcBidi);
5233 }
5234 else if (!ps->fib.fFarEast)
5235 {
5236 fname = wvGetFontnameFromCode(&ps->fonts, achp->ftcAscii);
5237 }
5238 else
5239 {
5240 fname = wvGetFontnameFromCode(&ps->fonts, achp->ftcFE);
5241 }
5242
5243 // there are times when we should use the third, Other font,
5244 // and the logic to know when somehow depends on the
5245 // character sets or encoding types? it's in the docs.
5246
5247 UT_ASSERT_HARMLESS(fname != NULL);
5248 xxx_UT_DEBUGMSG(("font-family = %s\n", fname));
5249
5250 s += "font-family:";
5251
5252 if(fname)
5253 s += fname;
5254 else
5255 s += "Times New Roman";
5256 FREEP(fname);
5257 }
5258
_generateParaProps(UT_String & s,const PAP * apap,wvParseStruct *)5259 void IE_Imp_MsWord_97::_generateParaProps(UT_String &s, const PAP * apap, wvParseStruct * /*ps*/)
5260 {
5261 UT_String propBuffer;
5262
5263 // DOM TODO: i think that this is right
5264 if (apap->fBidi == 1)
5265 {
5266 s += "dom-dir:rtl;";
5267 }
5268 else
5269 {
5270 s += "dom-dir:ltr;";
5271 }
5272
5273 // paragraph alignment/justification
5274 switch(apap->jc)
5275 {
5276 case 0:
5277 s += "text-align:left;";
5278 break;
5279 case 1:
5280 s += "text-align:center;";
5281 break;
5282 case 2:
5283 s += "text-align:right;";
5284 break;
5285 case 3:
5286 s += "text-align:justify;";
5287 break;
5288 case 4:
5289 /* this type of justification is of unknown purpose and is
5290 * undocumented , but it shows up in asian documents so someone
5291 * should be able to tell me what it is someday
5292 */
5293 s += "text-align:justify;";
5294 break;
5295 }
5296
5297 // keep paragraph together?
5298 if (apap->fKeep) {
5299 s += "keep-together:yes;";
5300 }
5301
5302 // keep with next paragraph?
5303 if (apap->fKeepFollow) {
5304 s += "keep-with-next:yes;";
5305 }
5306
5307 // widowed/orphaned lines
5308 if (!apap->fWidowControl) {
5309 // these AbiWord properties give the same effect
5310 s += "orphans:0;widows:0;";
5311 }
5312
5313 // line spacing (single-spaced, double-spaced, etc.)
5314 if (apap->lspd.fMultLinespace) {
5315 UT_String_sprintf(propBuffer,
5316 "line-height:%s;",
5317 UT_convertToDimensionlessString( (static_cast<double>(apap->lspd.dyaLine) / 240), "1.1"));
5318 s += propBuffer;
5319 } else {
5320 // TODO: handle exact line heights
5321 }
5322
5323 //
5324 // margins
5325 //
5326
5327 // margin-right
5328 if (apap->dxaRight) {
5329 UT_String_sprintf(propBuffer,
5330 "margin-right:%s;",
5331 UT_convertInchesToDimensionString(m_dim, (static_cast<double>(apap->dxaRight) / 1440)));
5332 s += propBuffer;
5333 }
5334
5335 // margin-left
5336 if (apap->dxaLeft) {
5337 UT_String_sprintf(propBuffer,
5338 "margin-left:%s;",
5339 UT_convertInchesToDimensionString(m_dim, (static_cast<double>(apap->dxaLeft) / 1440)));
5340 s += propBuffer;
5341 }
5342
5343 // margin-left first line (indent)
5344 if (apap->dxaLeft1) {
5345 UT_String_sprintf(propBuffer,
5346 "text-indent:%s;",
5347 UT_convertInchesToDimensionString(m_dim, (static_cast<double>(apap->dxaLeft1) / 1440)));
5348 s += propBuffer;
5349 }
5350
5351 // margin-top
5352 if (apap->dyaBefore) {
5353 UT_String_sprintf(propBuffer,
5354 "margin-top:%dpt;", (apap->dyaBefore / 20));
5355 s += propBuffer;
5356 }
5357
5358 // margin-bottom
5359 if (apap->dyaAfter) {
5360 UT_String_sprintf(propBuffer,
5361 "margin-bottom:%dpt;", (apap->dyaAfter / 20));
5362 s += propBuffer;
5363 }
5364
5365 // tab stops
5366 if (apap->itbdMac) {
5367 propBuffer += "tabstops:";
5368
5369 for (int iTab = 0; iTab < apap->itbdMac; iTab++) {
5370 propBuffer += UT_String_sprintf("%s/",
5371 UT_convertInchesToDimensionString(m_dim,
5372 ((static_cast<double>(apap->rgdxaTab[iTab])) / 1440)));
5373
5374 switch (apap->rgtbd[iTab].jc) {
5375 case 1:
5376 propBuffer += "C,";
5377 break;
5378 case 2:
5379 propBuffer += "R,";
5380 break;
5381 case 3:
5382 propBuffer += "D,";
5383 break;
5384 case 4:
5385 propBuffer += "B,";
5386 break;
5387 case 0:
5388 default:
5389 propBuffer += "L,";
5390 break;
5391 }
5392 }
5393 // replace final comma with a semi-colon
5394 propBuffer[propBuffer.size()-1] = ';';
5395 s += propBuffer;
5396 }
5397
5398 // foreground color
5399 U8 ico = apap->shd.icoFore;
5400 if (ico) {
5401 UT_String_sprintf(propBuffer, "color:%s;",
5402 sMapIcoToColor(ico, true).c_str());
5403 s += propBuffer;
5404 }
5405
5406 // background color
5407 ico = apap->shd.icoBack;
5408 if (ico) {
5409 UT_String_sprintf(propBuffer, "background-color:%s;",
5410 sMapIcoToColor(ico, false).c_str());
5411 s += propBuffer;
5412 }
5413
5414 // remove the trailing semi-colon
5415 s [s.size()-1] = 0;
5416
5417 }
5418
5419
5420 /*! imports a stylesheet from our document */
5421
5422 #define PT_MAX_ATTRIBUTES 8
_handleStyleSheet(const wvParseStruct * ps)5423 void IE_Imp_MsWord_97::_handleStyleSheet(const wvParseStruct *ps)
5424 {
5425 UT_uint32 iCount = ps->stsh.Stshi.cstd;
5426 // UT_uint16 iBase = ps->stsh.Stshi.cbSTDBaseInFile;
5427
5428 const gchar * attribs[PT_MAX_ATTRIBUTES*2 + 1];
5429 UT_uint32 iOffset = 0;
5430
5431 const STD * pSTD = ps->stsh.std;
5432 const STD * pSTDBase = pSTD;
5433 UT_String props;
5434 char * s = NULL;
5435 char * b = NULL;
5436 char * f = NULL;
5437
5438 UT_return_if_fail(pSTD != NULL);
5439
5440 for(UT_uint32 i = 0; i < iCount; i++, pSTD++)
5441 {
5442 iOffset = 0;
5443
5444 if(!pSTD->xstzName)
5445 {
5446 continue;
5447 }
5448
5449 if(pSTD->cupx <= 1)
5450 {
5451 continue;
5452 }
5453
5454 //UT_DEBUGMSG(("Style name: [%s], id: %d\n", pSTD->xstzName, pSTD->sti));
5455
5456 attribs[iOffset++] = PT_NAME_ATTRIBUTE_NAME;
5457
5458 // make sure we use standard names for standard styles
5459 const gchar * pName = s_translateStyleId(pSTD->sti);
5460
5461 if(pName)
5462 {
5463 attribs[iOffset++] = pName;
5464 }
5465 else
5466 {
5467 s = s_convert_to_utf8(ps, pSTD->xstzName);
5468 attribs[iOffset++] = s;
5469 }
5470
5471 UT_DEBUGMSG(("Style name: [%s], id: %d\n", attribs[iOffset-1], pSTD->sti));
5472
5473
5474 attribs[iOffset++] = PT_TYPE_ATTRIBUTE_NAME;
5475 if(pSTD->sgc == sgcChp)
5476 {
5477 attribs[iOffset++] = "C";
5478 }
5479 else
5480 {
5481 attribs[iOffset++] = "P";
5482
5483 // also handle the followed-by, since that only applies to
5484 // paragraph style
5485 if(pSTD->istdNext != istdNil && pSTD->istdNext<iCount)
5486 {
5487 attribs[iOffset++] = PT_FOLLOWEDBY_ATTRIBUTE_NAME;
5488 const char * t = s_translateStyleId(pSTD->istdNext);
5489 if(!t)
5490 {
5491 t = f = s_convert_to_utf8(ps,(pSTDBase + pSTD->istdNext)->xstzName);
5492 }
5493 attribs[iOffset++] = t;
5494 }
5495 }
5496
5497 if(pSTD->istdBase != istdNil)
5498 {
5499 attribs[iOffset++] = PT_BASEDON_ATTRIBUTE_NAME;
5500 const char * t = s_translateStyleId(pSTD->istdBase);
5501 if(!t)
5502 t = b = s_convert_to_utf8(ps,(pSTDBase + pSTD->istdBase)->xstzName);
5503 attribs[iOffset++] = t;
5504 }
5505
5506 // now we want to generate props
5507 props.clear();
5508
5509 wvParseStruct * PS = const_cast<wvParseStruct *>(ps);
5510
5511 CHP achp;
5512 wvInitCHPFromIstd(&achp, (U16)i, &(PS->stsh));
5513 _generateCharProps(props,&achp,PS);
5514
5515 if(props.size())
5516 {
5517 props += ";";
5518 }
5519
5520 PAP apap;
5521 wvInitPAPFromIstd (&apap, (U16)i, &(PS->stsh));
5522 _generateParaProps(props,&apap,PS);
5523
5524 // remove trailing semicolon
5525 if(props[props.size()-1] == ';')
5526 {
5527 props[props.size()-1] = 0;
5528 }
5529
5530 xxx_UT_DEBUGMSG(("Style props: %s\n", props.c_str()));
5531
5532 if(props.size())
5533 {
5534 attribs[iOffset++] = PT_PROPS_ATTRIBUTE_NAME;
5535 attribs[iOffset++] = props.c_str();
5536 }
5537
5538 attribs[iOffset] = NULL;
5539
5540 PD_Style * pStyle = NULL;
5541 if(getDoc()->getStyle(pSTD->xstzName, &pStyle))
5542 {
5543 xxx_UT_DEBUGMSG(("Redefining style %s\n", pSTD->xstzName));
5544 pStyle->addAttributes(attribs);
5545 pStyle->getBasedOn();
5546 pStyle->getFollowedBy();
5547 }
5548 else
5549 {
5550 getDoc()->appendStyle(attribs);
5551 }
5552
5553 FREEP(s);
5554 FREEP(b);
5555 FREEP(f);
5556 }
5557 }
5558
_handleBookmarks(const wvParseStruct * ps)5559 int IE_Imp_MsWord_97::_handleBookmarks(const wvParseStruct *ps)
5560 {
5561 UT_uint32 i,j;
5562
5563 if(m_pBookmarks)
5564 {
5565 for(i = 0; i < m_iBookmarksCount; i++)
5566 {
5567 if(m_pBookmarks[i].name && m_pBookmarks[i].start)
5568 {
5569 delete []m_pBookmarks[i].name;
5570 m_pBookmarks[i].name = NULL;
5571 }
5572 }
5573 delete [] m_pBookmarks;
5574 }
5575 BKF *bkf;
5576 BKL *bkl;
5577 U32 *posf, *posl, nobkf, nobkl;
5578
5579 if(!wvGetBKF_PLCF (&bkf, &posf, &nobkf, ps->fib.fcPlcfbkf, ps->fib.lcbPlcfbkf, ps->tablefd))
5580 {
5581 m_iBookmarksCount = nobkf;
5582 }
5583 else
5584 m_iBookmarksCount = 0;
5585
5586 if(!wvGetBKL_PLCF (&bkl, &posl, &nobkl, ps->fib.fcPlcfbkl, ps->fib.lcbPlcfbkl, ps->fib.fcPlcfbkf, ps->fib.lcbPlcfbkf, ps->tablefd))
5587 {
5588 m_iBookmarksCount += nobkl;
5589 }
5590 else
5591 {
5592 if(m_iBookmarksCount > 0)
5593 {
5594 //g_free the bkf and posf
5595 wvFree(bkf);
5596 wvFree(posf);
5597 m_iBookmarksCount = 0;
5598 }
5599 }
5600 UT_return_val_if_fail(nobkl == nobkf, 0);
5601 if(m_iBookmarksCount > 0)
5602 {
5603 try
5604 {
5605 m_pBookmarks = new bookmark[m_iBookmarksCount];
5606 }
5607 catch(...)
5608 {
5609 m_pBookmarks = NULL;
5610 }
5611
5612 UT_return_val_if_fail(m_pBookmarks, 0);
5613 for(i = 0; i < nobkf; i++)
5614 {
5615 m_pBookmarks[i].name = _getBookmarkName(ps, i);
5616 m_pBookmarks[i].pos = posf[i];
5617 m_pBookmarks[i].start = true;
5618 }
5619
5620 for(j = i; j < nobkl + i; j++)
5621 {
5622 // since the name is shared with the start of the bookmark,
5623 // we reuse it
5624 UT_sint32 iBkf = static_cast<UT_sint32>(bkl[j-i].ibkf) < 0 ? nobkl + static_cast<UT_sint32>(bkl[j-i].ibkf) : bkl[j-i].ibkf;
5625 m_pBookmarks[j].name = m_pBookmarks[iBkf].name;
5626 m_pBookmarks[j].pos = posl[j - i];
5627 m_pBookmarks[j].start = false;
5628 }
5629 // g_free bkf, bkl, posf, posl
5630 wvFree(bkf);
5631 wvFree(bkl);
5632 wvFree(posf);
5633 wvFree(posl);
5634
5635 //now sort the bookmarks by position
5636 qsort(static_cast<void*>(m_pBookmarks),
5637 m_iBookmarksCount, sizeof(bookmark),
5638 s_cmp_bookmarks_qsort);
5639
5640 #ifdef DEBUG
5641 for(UT_uint32 k = 0; k < m_iBookmarksCount; k++)
5642 {
5643 UT_DEBUGMSG(("Bookmark: name [%s], pos %d, start %d\n",
5644 m_pBookmarks[k].name,m_pBookmarks[k].pos,m_pBookmarks[k].start));
5645 }
5646
5647 #endif
5648 }
5649 return 0;
5650 }
5651
_handleNotes(const wvParseStruct * ps)5652 void IE_Imp_MsWord_97::_handleNotes(const wvParseStruct *ps)
5653 {
5654 UT_uint32 i;
5655
5656 DELETEPV(m_pFootnotes);
5657 DELETEPV(m_pEndnotes);
5658
5659 m_iFootnotesCount = 0;
5660 m_iEndnotesCount = 0;
5661 UT_uint32 *pPLCF_ref = NULL;
5662 UT_uint32 *pPLCF_txt = NULL;
5663
5664 bool bNoteError = false;
5665
5666 if(ps->fib.lcbPlcffndTxt)
5667 {
5668 /* the docs say -1, but that is an error */
5669 m_iFootnotesCount = ps->fib.lcbPlcffndTxt/4 - 2;
5670 try
5671 {
5672 m_pFootnotes = new footnote[m_iFootnotesCount];
5673 }
5674 catch(...)
5675 {
5676 m_pFootnotes = NULL;
5677 }
5678
5679 UT_return_if_fail(m_pFootnotes);
5680
5681 // this is really quite straight forward; we retrieve the PLCF
5682 // chunks that describe the references/text of the footnotes, and
5683 // then use those to init our footnote stucts
5684 // for n footnotes the reference PLCF is a sequnce of (n+1) doc
5685 // positions (UT_uint32) followed by n type flags (UT_uint16)
5686 // the text PLCF is a sequence of n+2 positions (UT_uint32) of the footnote
5687 // text in its data stream
5688 if(wvGetPLCF((void **) &pPLCF_ref, ps->fib.fcPlcffndRef, ps->fib.lcbPlcffndRef, ps->tablefd))
5689 {
5690 bNoteError = true;
5691 }
5692
5693 if(!bNoteError &&
5694 wvGetPLCF((void **) &pPLCF_txt, ps->fib.fcPlcffndTxt, ps->fib.lcbPlcffndTxt, ps->tablefd))
5695 {
5696 wvFree(pPLCF_ref);
5697 bNoteError = true;
5698 }
5699
5700 if(!bNoteError)
5701 {
5702 UT_return_if_fail(pPLCF_ref && pPLCF_txt);
5703 for(i = 0; i < m_iFootnotesCount; i++)
5704 {
5705 m_pFootnotes[i].ref_pos = pPLCF_ref[i];
5706 m_pFootnotes[i].txt_pos = pPLCF_txt[i] + m_iFootnotesStart;
5707 m_pFootnotes[i].txt_len = pPLCF_txt[i+1] - pPLCF_txt[i];
5708 // idx is an index of int16.
5709 size_t idx = 2 * (m_iFootnotesCount + 1) + i;
5710 // If you hit this assert, congratulation, you found a buggy file
5711 //
5712 UT_ASSERT(idx * 2 < ps->fib.lcbPlcffndRef);
5713 if (idx * 2 >= ps->fib.lcbPlcffndRef) {
5714 bNoteError = true;
5715 // We are done with the footnotes here.
5716 // This is as graceful as it can be.
5717 m_iFootnotesCount--;
5718 break;
5719 }
5720 UT_uint32 iType = ((UT_uint16*)pPLCF_ref)[idx];
5721 m_pFootnotes[i].type = iType;
5722 m_pFootnotes[i].pid = getDoc()->getUID(UT_UniqueId::Footnote);
5723 UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleNotes: fnote %d, rpos %d, tpos %d, type %d\n",
5724 i, m_pFootnotes[i].ref_pos, m_pFootnotes[i].txt_pos, iType));
5725 }
5726
5727 wvFree(pPLCF_ref);
5728 wvFree(pPLCF_txt);
5729 }
5730
5731 // next, deal footnote formatting matters
5732 const gchar * props[] = {"document-footnote-type", NULL,
5733 "document-footnote-initial", NULL,
5734 "document-footnote-restart-section", NULL,
5735 "document-footnote-restart-page", NULL,
5736 NULL};
5737
5738 switch(ps->dop.rncFtn)
5739 {
5740 case 0:
5741 props[5] = "0";
5742 props[7] = "0";
5743 break;
5744 case 1:
5745 props[5] = "1";
5746 props[7] = "0";
5747 break;
5748 case 2:
5749 props[5] = "0";
5750 props[7] = "1";
5751 break;
5752 default:
5753 UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5754 }
5755
5756 UT_String number;
5757 UT_String_sprintf(number, "%d", ps->dop.nFtn);
5758 props[3] = number.c_str();
5759
5760 switch(ps->dop.nfcFtnRef)
5761 {
5762 case 0:
5763 props[1] = "numeric";
5764 break;
5765 case 1:
5766 props[1] = "upper-roman";
5767 break;
5768 case 2:
5769 props[1] = "lower-roman";
5770 break;
5771 case 3:
5772 props[1] = "upper";
5773 break;
5774 case 4:
5775 props[1] = "lower";
5776 break;
5777 default:
5778 UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5779 props[1] = "";
5780 break;
5781 }
5782
5783 getDoc()->setProperties(&props[0]);
5784 }
5785
5786 if(ps->fib.lcbPlcfendTxt)
5787 {
5788 m_iEndnotesCount = ps->fib.lcbPlcfendTxt/4 - 2;
5789 try
5790 {
5791 m_pEndnotes = new footnote[m_iEndnotesCount];
5792 }
5793 catch(...)
5794 {
5795 m_pEndnotes = NULL;
5796 }
5797
5798 UT_return_if_fail(m_pEndnotes);
5799
5800 bNoteError = false;
5801 if(wvGetPLCF((void **) &pPLCF_ref, ps->fib.fcPlcfendRef, ps->fib.lcbPlcfendRef, ps->tablefd))
5802 {
5803 bNoteError = true;
5804 }
5805
5806 if(!bNoteError &&
5807 wvGetPLCF((void **) &pPLCF_txt, ps->fib.fcPlcfendTxt, ps->fib.lcbPlcfendTxt, ps->tablefd))
5808 {
5809 wvFree(pPLCF_ref);
5810 bNoteError = true;
5811 }
5812
5813 if(!bNoteError)
5814 {
5815 UT_return_if_fail(pPLCF_ref && pPLCF_txt);
5816 for(i = 0; i < m_iEndnotesCount; i++)
5817 {
5818 m_pEndnotes[i].ref_pos = pPLCF_ref[i];
5819 m_pEndnotes[i].txt_pos = pPLCF_txt[i] + m_iEndnotesStart;
5820 m_pEndnotes[i].txt_len = pPLCF_txt[i+1] - pPLCF_txt[i];
5821 // idx is an index of int16.
5822 size_t idx = 2 * (m_iEndnotesCount + 1) + i;
5823 // If you hit this assert, congratulation, you found a buggy file
5824 //
5825 UT_ASSERT(idx * 2 < ps->fib.lcbPlcfendRef);
5826 if (idx * 2 >= ps->fib.lcbPlcfendRef) {
5827 bNoteError = true;
5828 // We are done with the endnotes here.
5829 // This is as graceful as it can be.
5830 m_iEndnotesCount--;
5831 break;
5832 }
5833 UT_uint32 iType = ((UT_uint16*)pPLCF_ref)[idx];
5834 m_pEndnotes[i].type = iType;
5835 m_pEndnotes[i].pid = getDoc()->getUID(UT_UniqueId::Endnote);
5836 UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleNotes: enote %d, rpos %d, tpos %d, type %d\n",
5837 i, m_pEndnotes[i].ref_pos, m_pEndnotes[i].txt_pos, iType));
5838 }
5839
5840 wvFree(pPLCF_ref);
5841 wvFree(pPLCF_txt);
5842 }
5843 // next, deal endnote formatting matters
5844 const gchar * props[] = {"document-endnote-type", NULL,
5845 "document-endnote-initial", NULL,
5846 "document-endnote-restart-section", NULL,
5847 "document-endnote-restart-page", NULL,
5848 "document-endnote-place-endsection",NULL,
5849 "document-endnote-place-enddoc", NULL,
5850 NULL};
5851
5852 switch(ps->dop.rncEdn)
5853 {
5854 case 0:
5855 props[5] = "0";
5856 props[7] = "0";
5857 break;
5858 case 1:
5859 props[5] = "1";
5860 props[7] = "0";
5861 break;
5862 case 2:
5863 props[5] = "0";
5864 props[7] = "1";
5865 break;
5866
5867 default:
5868 UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5869 }
5870
5871 UT_String number;
5872 UT_String_sprintf(number, "%d", ps->dop.nEdn);
5873 props[3] = number.c_str();
5874
5875 switch(ps->dop.nfcEdnRef)
5876 {
5877 case 0:
5878 props[1] = "numeric";
5879 break;
5880 case 1:
5881 props[1] = "upper-roman";
5882 break;
5883 case 2:
5884 props[1] = "lower-roman";
5885 break;
5886 case 3:
5887 props[1] = "upper";
5888 break;
5889 case 4:
5890 props[1] = "lower";
5891 break;
5892
5893 default:
5894 UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5895
5896 }
5897
5898 switch(ps->dop.epc)
5899 {
5900 case 0:
5901 props[9] = "1";
5902 props[11] = "0";
5903 break;
5904 case 3:
5905 props[9] = "0";
5906 props[11] = "1";
5907 break;
5908 default:
5909 UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5910
5911 }
5912
5913 getDoc()->setProperties(&props[0]);
5914 }
5915 }
5916
_handleTextBoxes(const wvParseStruct * ps)5917 void IE_Imp_MsWord_97::_handleTextBoxes(const wvParseStruct *ps)
5918 {
5919 UT_uint32 *pPLCF_dgg = NULL;
5920 UT_uint32 *pPLCF_txt = NULL;
5921
5922 DELETEPV(m_pTextboxes);
5923
5924 bool bTextboxError = false;
5925 m_iTextboxCount = 0;
5926 UT_sint32 i = 0;
5927 if(ps->fib.ccpTxbx > 0)
5928 {
5929 m_iTextboxCount = ps->nooffspa;
5930 m_pTextboxes = new textbox [m_iTextboxCount];
5931
5932
5933 // this is really quite straight forward; we retrieve the PLCF
5934 // chunks that describe the references/text of the textboxes, and
5935 // then use those to init our textbox stucts
5936 // for n textboxes the reference PLCF is a sequnce of (n+1) doc
5937 // positions (UT_uint32) followed by n type flags (UT_uint16)
5938 // the text PLCF is a sequence of n+2 positions (UT_uint32) of the
5939 // textbox
5940 // text in its data stream
5941
5942 // This appears to be identical to how footnotes/endnotes are handled.
5943
5944 if(wvGetPLCF((void **) &pPLCF_dgg, ps->fib.fcDggInfo, ps->fib.lcbDggInfo, ps->tablefd))
5945 {
5946 bTextboxError = true;
5947 }
5948
5949 UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleTextBoxes: ps->fib.fcDggInfo %d ps->fib.lcbDggInfo %d \n", ps->fib.fcDggInfo,ps->fib.lcbDggInfo));
5950
5951 UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleTextBoxes: Text size %d bytes\n", ps->fib.ccpTxbx));
5952
5953 UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleTextBoxes: fib.lid %d \n", ps->fib.lid));
5954 if(!bTextboxError &&
5955 wvGetPLCF((void **) &pPLCF_txt, ps->fib.fcPlcftxbxTxt, ps->fib.lcbPlcftxbxTxt, ps->tablefd))
5956 {
5957 bTextboxError = true;
5958 }
5959 if(!bTextboxError)
5960 {
5961 UT_return_if_fail(pPLCF_dgg && pPLCF_txt);
5962 for(i = 0; i < m_iTextboxCount; i++)
5963 {
5964 m_pTextboxes[i].ref_pos = pPLCF_dgg[i];
5965 m_pTextboxes[i].txt_pos = pPLCF_txt[i] + m_iTextboxesStart;
5966 m_pTextboxes[i].txt_len = pPLCF_txt[i+1] - pPLCF_txt[i];
5967 UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleTextbox: Tbox %d, rpos %d, tpos %d len %d \n",
5968 i, m_pTextboxes[i].ref_pos, m_pTextboxes[i].txt_pos,m_pTextboxes[i].txt_len));
5969 }
5970
5971 wvFree(pPLCF_dgg);
5972 wvFree(pPLCF_txt);
5973
5974 }
5975 }
5976
5977 }
5978
5979 /*!
5980 Determines whether footnote is to be inserted at present document
5981 position, and if so takes care of inserting the reference marker,
5982 note section and anchor marker.
5983
5984 returns true if a note was successfully inserted, false otherwise;
5985 if the return value is true, the caller should ignore the present character
5986
5987 we will take advantage of the notes being in document order, so we
5988 can just remember the last note we inserted, rather than having to
5989 search through the list
5990
5991 */
_insertNoteIfAppropriate(UT_uint32 iDocPosition,UT_UCS4Char c)5992 bool IE_Imp_MsWord_97::_insertNoteIfAppropriate(UT_uint32 iDocPosition, UT_UCS4Char c)
5993 {
5994 if(m_bInFNotes || m_bInENotes)
5995 return false;
5996
5997 bool res = false;
5998 //now search for position iDocPosition in our footnnote list;
5999 if(!m_pFootnotes || m_iFootnotesCount == 0 || m_iNextFNote >= m_iFootnotesCount)
6000 {
6001 goto endnotes;
6002 }
6003
6004 if(m_pFootnotes[m_iNextFNote].ref_pos == iDocPosition)
6005 {
6006 res |= _insertFootnote(m_pFootnotes + m_iNextFNote++,c);
6007 }
6008
6009 endnotes:
6010 if(!m_pEndnotes || m_iEndnotesCount == 0 || m_iNextENote >= m_iEndnotesCount)
6011 {
6012 goto finish;
6013 }
6014
6015 if(m_pEndnotes[m_iNextENote].ref_pos == iDocPosition)
6016 {
6017 res |= _insertEndnote(m_pEndnotes + m_iNextENote++,c);
6018 }
6019
6020
6021 finish:
6022 return res;
6023 }
6024
6025 /* returns true on successful insertion of the reference marker */
_insertFootnote(const footnote * f,UT_UCS4Char c)6026 bool IE_Imp_MsWord_97::_insertFootnote(const footnote * f, UT_UCS4Char c)
6027 {
6028 UT_return_val_if_fail(f, true);
6029 xxx_UT_DEBUGMSG(("IE_Imp_MsWord_97::_insertFootnote: pos: %d, pid %d\n", f->ref_pos, f->pid));
6030
6031 this->_flush();
6032
6033 bool res = true;
6034 const gchar * attribsS[3] ={"footnote-id",NULL,NULL};
6035 const gchar* attribsR[9] = {"type", "footnote_ref", "footnote-id",
6036 NULL, NULL, NULL, NULL, NULL, NULL};
6037 UT_uint32 iOffR = 3;
6038
6039 UT_String footpid;
6040 UT_String_sprintf(footpid,"%i",f->pid);
6041 attribsS[1] = footpid.c_str();
6042
6043 // for attribsR we need to set props and style in order to
6044 // preserve any formating set by a previous call to _beginChar()
6045 attribsR[iOffR++] = footpid.c_str();
6046 attribsR[iOffR++] = "props";
6047 attribsR[iOffR++] = m_charProps.c_str();
6048 if(!m_charStyle.empty())
6049 {
6050 attribsR[iOffR++] = "style";
6051 attribsR[iOffR++] = m_charStyle.c_str();
6052 }
6053
6054 UT_return_val_if_fail( iOffR <= sizeof(attribsR)/sizeof(gchar*), false );
6055
6056 if(f->type)
6057 {
6058 // auto-generated reference -- insert a field
6059 res &= _appendObject(PTO_Field, attribsR);
6060 }
6061 else
6062 {
6063 // manually-inserted marker, we need to issue the character
6064 // TODO -- in word the marker can consist of several
6065 // characters, but I have no idea how Word knows how many;
6066 // we at least need to reset the character formatting again
6067 // after we have inserted the footnote section
6068 res &= _appendSpan(&c,1);
6069 }
6070
6071 _appendStrux(PTX_SectionFootnote,attribsS);
6072 _appendStrux(PTX_EndFootnote,NULL);
6073
6074 if(!f->type)
6075 {
6076 // set the formatting to whatever it was, in case the footnote
6077 // marker is longer than one character
6078 _appendFmt(&attribsR[0]);
6079 }
6080
6081 return res;
6082 }
6083
_insertEndnote(const footnote * f,UT_UCS4Char c)6084 bool IE_Imp_MsWord_97::_insertEndnote(const footnote * f, UT_UCS4Char c)
6085 {
6086 UT_return_val_if_fail(f, true);
6087 xxx_UT_DEBUGMSG(("IE_Imp_MsWord_97::_insertEndnote: pos: %d, pid %d\n", f->ref_pos, f->pid));
6088
6089 this->_flush();
6090
6091 bool res = true;
6092 const gchar * attribsS[3] ={"endnote-id",NULL,NULL};
6093 const gchar* attribsR[9] = {"type", "endnote_ref", "endnote-id",
6094 NULL, NULL, NULL, NULL, NULL, NULL};
6095 UT_uint32 iOffR = 3;
6096
6097 UT_String footpid;
6098 UT_String_sprintf(footpid,"%i",f->pid);
6099 attribsS[1] = footpid.c_str();
6100
6101 // for attribsR we need to set props and style in order to
6102 // preserve any formating set by a previous call to _beginChar()
6103 attribsR[iOffR++] = footpid.c_str();
6104 attribsR[iOffR++] = "props";
6105 attribsR[iOffR++] = m_charProps.c_str();
6106 attribsR[iOffR++] = "style";
6107 attribsR[iOffR++] = m_charStyle.c_str();
6108
6109 UT_return_val_if_fail(iOffR <= sizeof(attribsR)/sizeof(gchar*), false);
6110
6111 if(f->type)
6112 {
6113 // auto-generated reference -- insert a field
6114 res &= _appendObject(PTO_Field, attribsR);
6115 }
6116 else
6117 {
6118 // manually-inserted marker, we need to issue the character
6119 // TODO -- in word the marker can consist of several
6120 // characters, but I have no idea how Word knows how many;
6121 // we at least need to reset the character formatting again
6122 // after we have inserted the footnote section
6123 res &= _appendSpan(&c,1);
6124 }
6125
6126 _appendStrux(PTX_SectionEndnote,attribsS);
6127 _appendStrux(PTX_EndEndnote,NULL);
6128
6129 if(!f->type)
6130 {
6131 // set the formatting to whatever it was, in case the footnote
6132 // marker is longer than one character
6133 _appendFmt(&attribsR[0]);
6134 }
6135
6136 return res;
6137 }
6138
6139
6140 /*!
6141 This function makes sure that the insert is happening at the
6142 correct place if we are in a segment which belongs to one of the
6143 set of notes (foonotes & endnote, in future also annotations).
6144
6145 \parameter UT_uint32 iDocPosition: character position in the Word
6146 document stream
6147 \return returns false if the present character is to be skipped,
6148 true otherwise
6149 */
_handleNotesText(UT_uint32 iDocPosition)6150 bool IE_Imp_MsWord_97::_handleNotesText(UT_uint32 iDocPosition)
6151 {
6152 if(iDocPosition >= m_iFootnotesStart && iDocPosition < m_iFootnotesEnd)
6153 {
6154 // upon entry into the footnote-land, we will need to search for
6155 // the first footnote section in our document, note that we are
6156 // in a footnote section, note at what doc position the current
6157 // footnote will end, and then let things run until we reach
6158 // the end of the note; then we need to search for the next
6159 // doc section, etc.
6160
6161 // if the footnote marker is auto-generated, we need to remove
6162 // the special character from the stream (happens
6163 // automatically)
6164
6165 // when in a footnote section, all the functions that normally
6166 // use append methods will need to use insert methods instead
6167
6168 if(!m_bInFNotes)
6169 {
6170 xxx_UT_DEBUGMSG(("In footnote territory: pos %d\n", iDocPosition));
6171 m_bInFNotes = true;
6172 m_bInHeaders = false;
6173
6174 // we will reuse the m_iNextFNote variable, noting it
6175 // refers to the CURRENT footnote
6176 m_iNextFNote = 0;
6177 _findNextFNoteSection();
6178 _endSect(NULL,0,NULL,0);
6179 m_bInSect = true;
6180 }
6181
6182 // the current footnote will end at pos
6183 // f.txt_pos + f.txt_len,
6184 if( m_iNextFNote < m_iFootnotesCount && iDocPosition == m_pFootnotes[m_iNextFNote].txt_pos +
6185 m_pFootnotes[m_iNextFNote].txt_len)
6186 {
6187 m_iNextFNote++;
6188
6189 // after the last footnote there is an extra paragraph
6190 // marker that is still a part of the footnote section --
6191 // we do not want that marker imported
6192 if(m_iNextFNote < m_iFootnotesCount)
6193 _findNextFNoteSection();
6194 else
6195 {
6196 UT_DEBUGMSG(("End of footnotes marker at pos %d\n", iDocPosition));
6197 return false;
6198 }
6199 }
6200
6201 // if this is the first character in a footnote, insert the reference
6202 if(iDocPosition == m_pFootnotes[m_iNextFNote].txt_pos)
6203 {
6204 const gchar* attribsA[] = {"type", "footnote_anchor",
6205 "footnote-id", NULL,
6206 "props", NULL,
6207 "style", NULL,
6208 NULL};
6209
6210 const gchar * attribsB[] = {"props", NULL,
6211 "style", NULL,
6212 NULL};
6213
6214 UT_String footpid;
6215 UT_String_sprintf(footpid,"%i",m_pFootnotes[m_iNextFNote].pid);
6216 attribsA[3] = footpid.c_str();
6217 attribsA[5] = m_charProps.c_str();
6218 attribsA[7] = m_charStyle.c_str();
6219
6220 attribsB[1] = m_paraProps.c_str();
6221 attribsB[3] = m_paraStyle.c_str();
6222
6223 _appendStrux(PTX_Block,attribsB);
6224 m_bInPara = true;
6225
6226 if(m_pFootnotes[m_iNextFNote].type)
6227 {
6228 _appendObject(PTO_Field, attribsA);
6229 return false;
6230 }
6231 return true;
6232 }
6233
6234 // do not return !!!
6235 xxx_UT_DEBUGMSG(("In footnote %d, on pos %d\n", m_iNextFNote, iDocPosition));
6236 }
6237 else if(m_bInFNotes)
6238 {
6239 m_bInFNotes = false;
6240 xxx_UT_DEBUGMSG(("Leaving footnote territory\n"));
6241 // move to the end of the do end of the document ...
6242
6243 // do not return !!!
6244 }
6245
6246 if(iDocPosition >= m_iEndnotesStart && iDocPosition < m_iEndnotesEnd)
6247 {
6248 if(!m_bInENotes)
6249 {
6250 xxx_UT_DEBUGMSG(("In endnote territory: pos %d\n", iDocPosition));
6251 m_bInENotes = true;
6252 m_bInHeaders = false;
6253 m_iNextENote = 0;
6254 _findNextENoteSection();
6255 _endSect(NULL,0,NULL,0);
6256 m_bInSect = true;
6257 }
6258
6259 if( m_iNextENote < m_iEndnotesCount && iDocPosition == m_pEndnotes[m_iNextENote].txt_pos +
6260 m_pEndnotes[m_iNextENote].txt_len)
6261 {
6262 m_iNextENote++;
6263
6264 // after the last endnote there is an extra paragraph
6265 // marker that is still a part of the endnote section --
6266 // we do not want that marker imported
6267 if(m_iNextENote < m_iEndnotesCount)
6268 _findNextENoteSection();
6269 else
6270 {
6271 xxx_UT_DEBUGMSG(("End of endnotes marker at pos %d\n", iDocPosition));
6272 return false;
6273 }
6274 }
6275
6276 // if this is the first character in an endnote, insert the anchor
6277 if( m_iNextENote < m_iEndnotesCount && iDocPosition == m_pEndnotes[m_iNextENote].txt_pos)
6278 {
6279 const gchar * attribsA[] = {"type", "endnote_anchor",
6280 "endnote-id", NULL,
6281 "props", NULL,
6282 "style", NULL,
6283 NULL};
6284
6285 const gchar * attribsB[] = {"props", NULL,
6286 "style", NULL,
6287 NULL};
6288
6289 UT_String footpid;
6290 UT_String_sprintf(footpid,"%i",m_pEndnotes[m_iNextENote].pid);
6291 attribsA[3] = footpid.c_str();
6292 attribsA[5] = m_charProps.c_str();
6293 attribsA[7] = m_charStyle.c_str();
6294
6295 attribsB[1] = m_paraProps.c_str();
6296 attribsB[3] = m_paraStyle.c_str();
6297
6298 _appendStrux(PTX_Block,attribsB);
6299 m_bInPara = true;
6300
6301 if(m_pEndnotes[m_iNextENote].type)
6302 {
6303 _appendObject(PTO_Field, attribsA);
6304 return false;
6305 }
6306 return true;
6307 }
6308
6309 xxx_UT_DEBUGMSG(("In endnote %d, on pos %d\n", m_iNextENote, iDocPosition));
6310 // do not return !!!
6311 }
6312 else if(m_bInENotes)
6313 {
6314 m_bInENotes = false;
6315 xxx_UT_DEBUGMSG(("Leaving endnote territory\n"));
6316 // move to the end of the document ...
6317
6318 // do not return !!!
6319 }
6320
6321 // we only return here, so that the code above could be extended
6322 // for handly annotations by simply copy/paste
6323 return true;
6324 }
6325
6326
6327
6328 /*!
6329 This function makes sure that the insert is happening at the
6330 correct place if we are in a segment which belongs to one of the
6331 set of Textboxes
6332
6333 \parameter UT_uint32 iDocPosition: character position in the Word
6334 document stream
6335 \return returns false if the present character is to be skipped,
6336 true otherwise
6337 */
_handleTextboxesText(UT_uint32 iDocPosition)6338 bool IE_Imp_MsWord_97::_handleTextboxesText(UT_uint32 iDocPosition)
6339 {
6340 if(iDocPosition >= m_iTextboxesStart && iDocPosition < m_iTextboxesEnd)
6341 {
6342 // upon entry into the Textland-land, we will need to search for
6343 // the first Textbox section in our document, note that we are
6344 // in a Textbox section, note at what doc position the current
6345 // textbox will end, and then let things run until we reach
6346 // the end of the textbox; then we need to search for the next
6347 // doc section, etc.
6348
6349
6350 // when in a Text box section, all the functions that normally
6351 // use append methods will need to use insert methods instead
6352
6353 if(!m_bInTextboxes)
6354 {
6355 UT_DEBUGMSG(("In Textbox territory: pos %d\n", iDocPosition));
6356 m_bInTextboxes = true;
6357 m_bInFNotes = false;
6358 m_bInHeaders = false;
6359
6360 // we will reuse the m_iNextTextbox variable, noting it
6361 // refers to the CURRENT textbox
6362
6363 m_iNextTextbox = 0;
6364 _findNextTextboxSection();
6365 _endSect(NULL,0,NULL,0);
6366 m_bInSect = true;
6367 }
6368
6369 // the current footnote will end at pos
6370 // f.txt_pos + f.txt_len,
6371 if( m_iNextTextbox < m_iTextboxCount && iDocPosition == m_pTextboxes[m_iNextTextbox].txt_pos +
6372 m_pTextboxes[m_iNextTextbox].txt_len)
6373 {
6374 m_iNextTextbox++;
6375
6376 // after the last footnote there is an extra paragraph
6377 // marker that is still a part of the footnote section --
6378 // we do not want that marker imported
6379 if(m_iNextTextbox < m_iTextboxCount)
6380 _findNextTextboxSection();
6381 else
6382 {
6383 UT_DEBUGMSG(("End of Textbox marker at pos %d\n", iDocPosition));
6384 return false;
6385 }
6386 }
6387
6388 // if(iDocPosition == m_pTextboxes[m_iNextTextbox].txt_pos)
6389 // {
6390 // const gchar * attribsB[] = {"props", NULL,
6391 // "style", NULL,
6392 // NULL};
6393
6394 // attribsB[1] = m_paraProps.c_str();
6395 // attribsB[3] = m_paraStyle.c_str();
6396
6397 // _appendStrux(PTX_Block,attribsB);
6398 // m_bInPara = true;
6399 // return true;
6400 // }
6401
6402 xxx_UT_DEBUGMSG(("In Textbox %d, on pos %d\n", m_iNextTextbox, iDocPosition));
6403 }
6404 else if(m_bInTextboxes)
6405 {
6406 m_bInTextboxes = false;
6407 UT_DEBUGMSG(("Leaving Textbox territory\n"));
6408 }
6409
6410 return true;
6411 }
6412
_findNextFNoteSection()6413 bool IE_Imp_MsWord_97::_findNextFNoteSection()
6414 {
6415 if(!m_iNextFNote)
6416 {
6417 // move to the start of the doc first
6418 m_pNotesEndSection = NULL;
6419 }
6420
6421 if(m_pNotesEndSection)
6422 {
6423 // move to the next fragment
6424 m_pNotesEndSection = m_pNotesEndSection->getNext();
6425 UT_return_val_if_fail(m_pNotesEndSection, false);
6426 }
6427
6428
6429 m_pNotesEndSection = getDoc()->findFragOfType(pf_Frag::PFT_Strux,
6430 (UT_sint32)PTX_EndFootnote,
6431 m_pNotesEndSection);
6432
6433 if(!m_pNotesEndSection)
6434 {
6435 xxx_UT_DEBUGMSG(("Error: footnote section not found!!!\n"));
6436 return false;
6437 }
6438
6439 return true;
6440 }
6441
6442
6443 ///////////////////////////////////////////////////////////////////////
6444 /*!
6445 * s_cmp_lids This function is used to sort the textboxPos lids in order
6446 * of their lid values. This matches the order of the text sort in the
6447 * in the out-of-stream table.
6448 * Used by theqsort method on UT_Vector.
6449 \param const void * P1 - pointer to a textboxPos pointer
6450 \param const void * P2 - pointer to a textboxPos pointer
6451 \returns -ve if sz1 < sz2, 0 if sz1 == sz2, +ve if sz1 > sz2
6452 */
s_cmp_lids(const void * P1,const void * P2)6453 static UT_sint32 s_cmp_lids(const void * P1, const void * P2)
6454 {
6455 const textboxPos ** pP1 = (const textboxPos **) P1;
6456 const textboxPos ** pP2 = (const textboxPos **) P2;
6457 UT_uint32 lid1 = (*pP1)->lid;
6458 UT_uint32 lid2 = (*pP2)->lid;
6459 return static_cast<UT_sint32>(lid1) - static_cast<UT_sint32>(lid2);
6460 }
6461
_findNextTextboxSection()6462 bool IE_Imp_MsWord_97::_findNextTextboxSection()
6463 {
6464 if(m_iNextTextbox == 0)
6465 {
6466 // move to the start of the doc first
6467 m_pTextboxEndSection = NULL;
6468 m_vecTextboxPos.qsort(s_cmp_lids);
6469
6470 }
6471 if(m_iNextTextbox >= m_vecTextboxPos.getItemCount())
6472 {
6473 UT_DEBUGMSG(("Error: Textbox section not found!!!\n"));
6474 return false;
6475 }
6476
6477 textboxPos * pPos = m_vecTextboxPos.getNthItem(m_iNextTextbox);
6478 m_pTextboxEndSection = pPos->endFrame;
6479
6480 if(!m_pTextboxEndSection)
6481 {
6482 UT_DEBUGMSG(("Error: Textbox section not found!!!\n"));
6483 return false;
6484 }
6485
6486 return true;
6487 }
6488
_findNextENoteSection()6489 bool IE_Imp_MsWord_97::_findNextENoteSection()
6490 {
6491 if(!m_iNextENote)
6492 {
6493 // move to the start of the doc first
6494 m_pNotesEndSection = NULL;
6495 }
6496
6497 if(m_pNotesEndSection)
6498 {
6499 // move to the next fragment
6500 m_pNotesEndSection = m_pNotesEndSection->getNext();
6501 UT_return_val_if_fail(m_pNotesEndSection, false);
6502 }
6503
6504 m_pNotesEndSection = getDoc()->findFragOfType(pf_Frag::PFT_Strux,
6505 (UT_sint32)PTX_EndEndnote,
6506 m_pNotesEndSection);
6507
6508 if(!m_pNotesEndSection)
6509 {
6510 UT_DEBUGMSG(("Error: endnote section not found!!!\n"));
6511 return false;
6512 }
6513
6514 return true;
6515 }
6516
_shouldUseInsert() const6517 bool IE_Imp_MsWord_97::_shouldUseInsert() const
6518 {
6519 return ((m_bInFNotes || m_bInENotes) && !m_bInHeaders && !m_bInTextboxes);
6520 }
6521
_ensureInBlock()6522 bool IE_Imp_MsWord_97::_ensureInBlock()
6523 {
6524
6525 bool bret = true;
6526
6527 pf_Frag * pf = getDoc()->getLastFrag();
6528 while(pf && pf->getType() != pf_Frag::PFT_Strux)
6529 {
6530 pf = pf->getPrev();
6531 }
6532 if(pf && (pf->getType() == pf_Frag::PFT_Strux) )
6533 {
6534 pf_Frag_Strux * pfs = static_cast<pf_Frag_Strux *>(pf);
6535 if(pfs->getStruxType() != PTX_Block)
6536 {
6537 bret = _appendStrux(PTX_Block, NULL);
6538 if (bret) m_bInPara = true;
6539 }
6540 }
6541 else if( pf == NULL)
6542 {
6543 bret = _appendStrux(PTX_Block, NULL);
6544 if (bret) m_bInPara = true;
6545 }
6546
6547 return bret;
6548 }
6549
_appendStrux(PTStruxType pts,const gchar ** attributes)6550 bool IE_Imp_MsWord_97::_appendStrux(PTStruxType pts, const gchar ** attributes)
6551 {
6552 if(pts == PTX_SectionFrame)
6553 {
6554 UT_DEBUGMSG(("Appending Frame \n"));
6555 }
6556 if(pts == PTX_EndFrame)
6557 {
6558 UT_DEBUGMSG(("Appending EndFrame \n"));
6559 }
6560 if(m_bInHeaders)
6561 {
6562 return _appendStruxHdrFtr(pts, attributes);
6563 }
6564 else if(_shouldUseInsert() && m_pNotesEndSection)
6565 {
6566 return getDoc()->insertStruxBeforeFrag(m_pNotesEndSection, pts, attributes);
6567 }
6568 else if(m_bInTextboxes && m_pTextboxEndSection)
6569 {
6570 if(pts == PTX_Block)
6571 {
6572 xxx_UT_DEBUGMSG(("Insert block in Text box \n"));
6573 }
6574 return getDoc()->insertStruxBeforeFrag(m_pTextboxEndSection, pts, attributes);
6575 }
6576 if(pts == PTX_SectionFrame)
6577 {
6578 // Make sure any pending text is flushed
6579 _flush();
6580
6581 //
6582 // Text boxes need to be preceded by Blocks
6583 //
6584 pf_Frag * pf = getDoc()->getLastFrag();
6585 while(pf && pf->getType() != pf_Frag::PFT_Strux)
6586 {
6587 pf = pf->getPrev();
6588 }
6589 if(pf && (pf->getType() == pf_Frag::PFT_Strux) )
6590 {
6591 pf_Frag_Strux * pfs = static_cast<pf_Frag_Strux *>(pf);
6592 if(pfs->getStruxType() != PTX_Block)
6593 {
6594 getDoc()->appendStrux(PTX_Block, NULL);
6595 }
6596 }
6597 else if( pf == NULL)
6598 {
6599 getDoc()->appendStrux(PTX_Block, NULL);
6600 }
6601 }
6602 return getDoc()->appendStrux(pts, attributes);
6603 }
6604
_appendObject(PTObjectType pto,const gchar ** attributes)6605 bool IE_Imp_MsWord_97::_appendObject(PTObjectType pto, const gchar ** attributes)
6606 {
6607 if(m_bInHeaders)
6608 {
6609 return _appendObjectHdrFtr(pto, attributes);
6610 }
6611 else if(_shouldUseInsert() && m_pNotesEndSection)
6612 {
6613 return getDoc()->insertObjectBeforeFrag(m_pNotesEndSection, pto, attributes);
6614 }
6615 else if(m_bInTextboxes && m_pTextboxEndSection)
6616 {
6617 return getDoc()->insertObjectBeforeFrag(m_pTextboxEndSection, pto, attributes);
6618 }
6619 if(!m_bInPara)
6620 {
6621 _appendStrux(PTX_Block, NULL);
6622 m_bInPara = true;
6623 }
6624 return getDoc()->appendObject(pto, attributes);
6625 }
6626
_appendSpan(const UT_UCSChar * p,UT_uint32 length)6627 bool IE_Imp_MsWord_97::_appendSpan(const UT_UCSChar * p, UT_uint32 length)
6628 {
6629 if(m_bInHeaders)
6630 {
6631 return _appendSpanHdrFtr(p, length);
6632 }
6633 else if(_shouldUseInsert() && m_pNotesEndSection)
6634 {
6635 return getDoc()->insertSpanBeforeFrag(m_pNotesEndSection, p, length);
6636 }
6637 else if(m_bInTextboxes && m_pTextboxEndSection)
6638 {
6639 return getDoc()->insertSpanBeforeFrag(m_pTextboxEndSection, p, length);
6640 }
6641 return getDoc()->appendSpan(p, length);
6642 }
6643
_appendFmt(const gchar ** attributes)6644 bool IE_Imp_MsWord_97::_appendFmt(const gchar ** attributes)
6645 {
6646 // no special processing required, this only changes m_loading in
6647 // the PT
6648 return getDoc()->appendFmt(attributes);
6649 }
6650
6651 /*!
6652 The append*HdrFtr() methods below are needed because in AW headers
6653 cannot be shared among sections; in contrast in Word one header
6654 can be used by a chain of sections. We get around it by
6655 duplicating that one header for each section that uses it. Since
6656 we cannot wind back throught the data stream we have to duplicate
6657 each shared header as we go using the info stored in the current
6658 header's d struct.
6659 */
_appendStruxHdrFtr(PTStruxType pts,const gchar ** attributes)6660 bool IE_Imp_MsWord_97::_appendStruxHdrFtr(PTStruxType pts, const gchar ** attributes)
6661 {
6662 UT_return_val_if_fail(m_bInHeaders,false);
6663 UT_return_val_if_fail(m_iCurrentHeader < m_iHeadersCount,false);
6664 UT_DEBUGMSG(("Inserting strux of type %d in HdrFtr %d\n",pts,m_iCurrentHeader));
6665 UT_ASSERT(m_bInSect);
6666 bool bRet = true;
6667 for(UT_sint32 i = 0; i < m_pHeaders[m_iCurrentHeader].d.frag.getItemCount(); i++)
6668 {
6669 pf_Frag * pF = (pf_Frag*) m_pHeaders[m_iCurrentHeader].d.frag.getNthItem(i);
6670 UT_return_val_if_fail(pF,false);
6671 UT_DEBUGMSG(("Inserting strux of type %d in Dirivative HdrFtr \n",pts));
6672
6673 bRet &= getDoc()->insertStruxBeforeFrag(pF, pts, attributes);
6674 }
6675
6676 bRet &= getDoc()->appendStrux(pts, attributes);
6677 if(pts != PTX_Block)
6678 {
6679 xxx_UT_DEBUGMSG(("m_bInPara set false here -1 \n"));
6680 m_bInPara = false;
6681 }
6682 else
6683 {
6684 m_bInPara = true;
6685 }
6686 return bRet;
6687 }
6688
_appendObjectHdrFtr(PTObjectType pto,const gchar ** attributes)6689 bool IE_Imp_MsWord_97::_appendObjectHdrFtr(PTObjectType pto, const gchar ** attributes)
6690 {
6691 UT_return_val_if_fail(m_bInHeaders,false);
6692 UT_return_val_if_fail(m_iCurrentHeader < m_iHeadersCount,false);
6693
6694 bool bRet = true;
6695
6696 for(UT_sint32 i = 0; i < m_pHeaders[m_iCurrentHeader].d.frag.getItemCount(); i++)
6697 {
6698 pf_Frag * pF = (pf_Frag*) m_pHeaders[m_iCurrentHeader].d.frag.getNthItem(i);
6699 UT_return_val_if_fail(pF,false);
6700 if(!m_bInPara)
6701 {
6702 bRet &= getDoc()->insertStruxBeforeFrag(pF, PTX_Block, NULL);
6703 }
6704 bRet &= getDoc()->insertObjectBeforeFrag(pF, pto, attributes);
6705 }
6706 if(!m_bInPara)
6707 {
6708 m_bInPara = true;
6709 bRet &= getDoc()->appendStrux(PTX_Block, NULL);
6710 }
6711 bRet &= getDoc()->appendObject(pto, attributes);
6712 return bRet;
6713 }
6714
_appendSpanHdrFtr(const UT_UCSChar * p,UT_uint32 length)6715 bool IE_Imp_MsWord_97::_appendSpanHdrFtr(const UT_UCSChar * p, UT_uint32 length)
6716 {
6717 UT_return_val_if_fail(m_bInHeaders,false);
6718 UT_return_val_if_fail(m_iCurrentHeader < m_iHeadersCount,false);
6719
6720 bool bRet = true;
6721
6722 for(UT_sint32 i = 0; i < m_pHeaders[m_iCurrentHeader].d.frag.getItemCount(); i++)
6723 {
6724 pf_Frag * pF = (pf_Frag*) m_pHeaders[m_iCurrentHeader].d.frag.getNthItem(i);
6725 UT_return_val_if_fail(pF,false);
6726 if(!m_bInPara)
6727 {
6728 bRet &= getDoc()->insertStruxBeforeFrag(pF, PTX_Block, NULL);
6729 }
6730
6731 bRet &= getDoc()->insertSpanBeforeFrag(pF, p, length);
6732 }
6733 if(!m_bInPara)
6734 {
6735 m_bInPara = true;
6736 bRet &= getDoc()->appendStrux(PTX_Block, NULL);
6737 }
6738 bRet &= getDoc()->appendSpan(p, length);
6739 return bRet;
6740 }
6741
6742
_handleHeaders(const wvParseStruct * ps)6743 void IE_Imp_MsWord_97::_handleHeaders(const wvParseStruct *ps)
6744 {
6745 UT_uint32 i, k;
6746
6747 DELETEPV(m_pHeaders);
6748
6749 m_iHeadersCount = 0;
6750 UT_uint32 *pPLCF_txt = NULL;
6751
6752 /*
6753 The header/footer PLCF in Word 97+ is organised as follows:
6754
6755 indx | function
6756 -------------------------------------------------------------------------------
6757 0-5: document wide settings
6758 -------------------------------------------------------------------------------
6759 0 | footnote separator
6760 1 | footnote continuation separator (i.e., continued on next page)
6761 2 | document-wide footnote continuation notice (i.e., continued
6762 from previous page)
6763 3-5 | as above for endnotes
6764 -------------------------------------------------------------------------------
6765 now for i-th section in document (i >= 0)
6766 -------------------------------------------------------------------------------
6767 i+6 | header even pages
6768 i+7 | header odd pages
6769 i+8 | footer even pages
6770 i+9 | footer odd pages
6771 i+10 | header first page
6772 i+11 | footer first page
6773 -------------------------------------------------------------------------------
6774 according to the docs now should come the foot/endnote
6775 separators but they do not -- those settings appear to be
6776 document wide only ...
6777 -------------------------------------------------------------------------------
6778 i+12 - i+17 | as the document wide footnote/endnote separators above
6779
6780 NB: the record for the last section in the document may be
6781 incomplete, i.e., for n sections m_iHeadersCount <= 6 + 12*n.
6782
6783 The even headers are only applied if ps->dop.fFacingPages is set
6784 */
6785
6786 bool bHeaderError = false;
6787
6788 if(ps->fib.lcbPlcfhdd)
6789 {
6790 /* the docs are ambiguous, at one place saying the PLCF
6791 contains n+2 entries, another n+1; I think the former is correct*/
6792 m_iHeadersCount = ps->fib.lcbPlcfhdd/4 - 2;
6793 try
6794 {
6795 m_pHeaders = new header[m_iHeadersCount];
6796 }
6797 catch(...)
6798 {
6799 m_pHeaders = NULL;
6800 }
6801
6802 UT_return_if_fail(m_pHeaders);
6803
6804 // this is really quite straight forward; we retrieve the PLCF
6805 // which is a sequence of n+2 positions (UT_uint32) of the
6806 // header text in its data stream
6807 if(wvGetPLCF((void **) &pPLCF_txt, ps->fib.fcPlcfhdd, ps->fib.lcbPlcfhdd, ps->tablefd))
6808 {
6809 bHeaderError = true;
6810 }
6811
6812 if(!bHeaderError)
6813 {
6814 UT_return_if_fail(pPLCF_txt);
6815 for(i = 0; i < m_iHeadersCount; i++)
6816 {
6817 m_pHeaders[i].pos = pPLCF_txt[i] + m_iHeadersStart;
6818 m_pHeaders[i].len = pPLCF_txt[i+1] - pPLCF_txt[i];
6819 m_pHeaders[i].pid = getDoc()->getUID(UT_UniqueId::HeaderFtr);
6820
6821 UT_DEBUGMSG(("Header %d has pid %d \n",i,m_pHeaders[i].pid));
6822 if(i < 6)
6823 {
6824 // document wide footnote/endnote separators
6825 m_pHeaders[i].type = HF_Unsupported;
6826 }
6827 else
6828 {
6829 switch((i-6)%6)
6830 {
6831 case 0:
6832 if(m_bEvenOddHeaders)
6833 m_pHeaders[i].type = HF_HeaderEven;
6834 else
6835 m_pHeaders[i].type = HF_Unsupported;
6836 break;
6837 case 1:
6838 m_pHeaders[i].type = HF_HeaderOdd;
6839 break;
6840 case 2:
6841 if(m_bEvenOddHeaders)
6842 m_pHeaders[i].type = HF_FooterEven;
6843 else
6844 m_pHeaders[i].type = HF_Unsupported;
6845 break;
6846 case 3:
6847 m_pHeaders[i].type = HF_FooterOdd;
6848 break;
6849 case 4:
6850 m_pHeaders[i].type = HF_HeaderFirst;
6851 break;
6852 case 5:
6853 m_pHeaders[i].type = HF_FooterFirst;
6854 break;
6855
6856 default:
6857 m_pHeaders[i].type = HF_Unsupported;
6858 }
6859
6860 UT_DEBUGMSG(("Header no. %d, pos %d, len %d\n",
6861 i,m_pHeaders[i].pos,m_pHeaders[i].len));
6862
6863 #if 1
6864 // this code is here because in AW we currently cannot
6865 // share headers between sections
6866 if(m_pHeaders[i].type != HF_Unsupported && m_pHeaders[i].len == 0)
6867 {
6868 // this is the case where the section is to use the
6869 // header of a previous section -- scroll back until
6870 // we find one
6871 k = i - 6;
6872 bool bContinue = false;
6873
6874 while(k > 5)
6875 {
6876 if(m_pHeaders[k].len == 2)
6877 {
6878 // found empty header
6879 // set the type of the present header unsupported, so it does not
6880 // get referenced
6881 m_pHeaders[i].type = HF_Unsupported;
6882 bContinue = true;
6883 break;
6884 }
6885 else if(m_pHeaders[k].len == 0)
6886 {
6887 // try one section ahead
6888 k -= 6;
6889 }
6890 else
6891 {
6892 // found a meaningful header
6893 break;
6894 }
6895 }
6896
6897 if(bContinue || k < 6)
6898 {
6899 // did not find any meaningful headers, set the type to unsupported, so
6900 // that it does not get referenced
6901 //
6902 // we do not want to do this to the first page hdr/ftr,
6903 // because in this case len == 0 can mean the header should be
6904 // empty but present (this is determined by asep->fTitlePage
6905 if(m_pHeaders[i].type != HF_HeaderFirst && m_pHeaders[i].type != HF_FooterFirst)
6906 m_pHeaders[i].type = HF_Unsupported;
6907
6908 continue;
6909 }
6910
6911 // so we have found a meaningful header k that is to
6912 // be used in place of header i; we add header
6913 // i to k's d-struct
6914
6915 m_pHeaders[k].d.hdr.addItem((void*)(m_pHeaders+i));
6916 }
6917 #endif
6918 }
6919 }
6920
6921 wvFree(pPLCF_txt);
6922 }
6923 }
6924 }
6925
6926 /*!
6927 A helper function that inserts the header/ftr section
6928 */
_insertHeaderSection(bool bDoBlockIns)6929 bool IE_Imp_MsWord_97::_insertHeaderSection(bool bDoBlockIns)
6930 {
6931 // need to insert our header/footer section, preserving
6932 // any existing formatting ...
6933
6934 // we need to be able to insert some 0-length headers
6935 if(m_pHeaders[m_iCurrentHeader].type != HF_Unsupported /*&& m_pHeaders[m_iCurrentHeader].len > 2*/)
6936 {
6937 UT_uint32 iOff = 0;
6938 const gchar * attribsB[] = {NULL, NULL,
6939 NULL, NULL,
6940 NULL};
6941 if(m_iCurrentHeader == m_iLastAppendedHeader)
6942 {
6943 return false;
6944 }
6945 m_iLastAppendedHeader = m_iCurrentHeader;
6946 if(m_paraProps.size())
6947 {
6948 attribsB[iOff++] = "props";
6949 attribsB[iOff++] = m_paraProps.c_str();
6950 }
6951
6952 if(m_paraStyle.size())
6953 {
6954 attribsB[iOff++] = "style";
6955 attribsB[iOff++] = m_paraStyle.c_str();
6956 }
6957
6958 const gchar * attribsC[] = {NULL, NULL,
6959 NULL, NULL,
6960 NULL};
6961 iOff = 0;
6962 if(m_charProps.size())
6963 {
6964 attribsC[iOff++] = "props";
6965 attribsC[iOff++] = m_charProps.c_str();
6966 }
6967
6968 if(m_charStyle.size())
6969 {
6970 attribsC[iOff++] = "style";
6971 attribsC[iOff++] = m_charStyle.c_str();
6972 }
6973
6974 const gchar * attribsS[] = {"type", NULL,
6975 "id", NULL,
6976 NULL};
6977
6978 UT_String id;
6979 UT_String_sprintf(id,"%d",m_pHeaders[m_iCurrentHeader].pid);
6980 attribsS[3] = id.c_str();
6981 UT_DEBUGMSG(("Appending Current Header %d pid %s \n",m_iCurrentHeader,id.c_str()));
6982 switch(m_pHeaders[m_iCurrentHeader].type)
6983 {
6984 case HF_HeaderEven:
6985 attribsS[1] = "header-even";
6986 break;
6987 case HF_FooterEven:
6988 attribsS[1] = "footer-even";
6989 break;
6990 case HF_HeaderOdd:
6991 attribsS[1] = "header";
6992 break;
6993 case HF_FooterOdd:
6994 attribsS[1] = "footer";
6995 break;
6996 case HF_HeaderFirst:
6997 attribsS[1] = "header-first";
6998 break;
6999 case HF_FooterFirst:
7000 attribsS[1] = "footer-first";
7001 break;
7002 default:
7003 UT_ASSERT_HARMLESS(UT_NOT_REACHED);
7004 }
7005
7006 // we use the document methods, not the importer methods intentionally
7007 UT_DEBUGMSG(("Direct Appending HdrFtr in MSWord_import \n"));
7008 if(!m_bInPara)
7009 {
7010 getDoc()->appendStrux(PTX_Block, NULL);
7011 m_bInPara = true;
7012 }
7013 getDoc()->appendStrux(PTX_SectionHdrFtr, attribsS);
7014 m_bInSect = true;
7015 m_bInHeaders = true;
7016
7017 if(bDoBlockIns)
7018 {
7019 getDoc()->appendStrux(PTX_Block, attribsB);
7020 m_bInPara = true;
7021 _appendFmt(attribsC);
7022 }
7023
7024 // now we insert the same for any derivative headers
7025 // ...
7026 for (UT_sint32 i = 0; i < m_pHeaders[m_iCurrentHeader].d.hdr.getItemCount(); i++)
7027 {
7028 header * pH = (header*)m_pHeaders[m_iCurrentHeader].d.hdr.getNthItem(i);
7029 UT_return_val_if_fail(pH, true);
7030
7031 // skip any unsupported headers (we set the type to unsupported when we find
7032 // out that it is not used by the section to which it belongs)
7033
7034 if(pH->type == HF_Unsupported)
7035 {
7036 continue;
7037 }
7038
7039 UT_String_sprintf(id,"%d",pH->pid);
7040 attribsS[3] = id.c_str();
7041
7042 switch(pH->type)
7043 {
7044 case HF_HeaderEven:
7045 attribsS[1] = "header-even";
7046 break;
7047 case HF_FooterEven:
7048 attribsS[1] = "footer-even";
7049 break;
7050 case HF_HeaderOdd:
7051 attribsS[1] = "header";
7052 break;
7053 case HF_FooterOdd:
7054 attribsS[1] = "footer";
7055 break;
7056 case HF_HeaderFirst:
7057 attribsS[1] = "header-first";
7058 break;
7059 case HF_FooterFirst:
7060 attribsS[1] = "footer-first";
7061 break;
7062 default:
7063 UT_ASSERT_HARMLESS(UT_NOT_REACHED);
7064 }
7065 UT_DEBUGMSG(("Appending Dirivative HdrFtr in MSWord_import \n"));
7066
7067 getDoc()->appendStrux(PTX_SectionHdrFtr, attribsS);
7068 m_bInHeaders = true;
7069
7070 // we need to remember the HdrFtr fragment for
7071 // later ...
7072 pf_Frag * pF = getDoc()->getLastFrag();
7073 UT_return_val_if_fail(pF && pF->getType() == pf_Frag::PFT_Strux, true);
7074
7075 pf_Frag_Strux * pFS = (pf_Frag_Strux*)pF;
7076 UT_return_val_if_fail(pFS->getStruxType() == PTX_SectionHdrFtr, true);
7077
7078 m_pHeaders[m_iCurrentHeader].d.frag.addItem((void*)pF);
7079
7080 if(bDoBlockIns)
7081 {
7082 getDoc()->appendStrux(PTX_Block, attribsB);
7083 getDoc()->appendFmt(attribsC);
7084 }
7085 }
7086
7087 return true;
7088 }
7089 else
7090 {
7091 // just gobble the character ...
7092 m_bInHeaders = true;
7093 return false;
7094 }
7095
7096 return false;
7097 }
7098
7099
7100
7101 /*!
7102 This function makes sure that the insert is happening at the
7103 correct place if we are in the header segment.
7104
7105 \parameter UT_uint32 iDocPosition: character position in the Word
7106 document stream
7107 \return returns false if the present character is to be skipped,
7108 true otherwise
7109 */
_handleHeadersText(UT_uint32 iDocPosition,bool bDoBlockIns)7110 bool IE_Imp_MsWord_97::_handleHeadersText(UT_uint32 iDocPosition,bool bDoBlockIns)
7111 {
7112 if(iDocPosition == m_iPrevHeaderPosition)
7113 {
7114 return true;
7115 }
7116
7117 if(iDocPosition == m_iHeadersEnd)
7118 {
7119 m_iCurrentHeader++;
7120
7121 if(m_iCurrentHeader < m_iHeadersCount)
7122 {
7123 // this is the case where we reached the end of the header segment, but still have
7124 // some headers in our header array left.
7125 // if we have any headers other than unsupported, we have to insert them as empty
7126
7127 for(; m_iCurrentHeader < m_iHeadersCount; m_iCurrentHeader++)
7128 {
7129 if(m_pHeaders[m_iCurrentHeader].type != HF_Unsupported)
7130 _insertHeaderSection(bDoBlockIns);
7131 }
7132 }
7133 }
7134
7135 if(iDocPosition >= m_iHeadersStart && iDocPosition < m_iHeadersEnd)
7136 {
7137 m_iPrevHeaderPosition = iDocPosition;
7138
7139 // upon entry into the header-land, we will need to search for
7140 // the first header/footer section in our document, note that we are
7141 // in a header section, note at what doc position the current
7142 // header will end, and then let things run until we reach
7143 // the end of the header; then we need to search for the next
7144 // doc section, etc.
7145
7146 // when we scroll through 0-length headers, we need to remember where we started,
7147 // so we can insert the hdr section later
7148 bool bScrolledHeader = false;
7149 UT_uint32 iOrigHeader = 0;
7150
7151 if(!m_bInHeaders)
7152 {
7153 UT_DEBUGMSG(("In headers territory: pos %d\n", iDocPosition));
7154 m_bInENotes = false;
7155 m_bInFNotes = false;
7156
7157 m_iCurrentHeader = 0;
7158
7159 // we need to close of any open section
7160 if(m_bInSect)
7161 {
7162 _endSect(NULL,0,NULL,0);
7163 }
7164
7165 // some headers can be 0-length, skip them ... (0-length: len <=2)
7166 while(m_iCurrentHeader < m_iHeadersCount && m_pHeaders[m_iCurrentHeader].len <= 2)
7167 {
7168 bScrolledHeader = true;
7169 m_iCurrentHeader++;
7170 }
7171
7172 m_bInHeaders = true;
7173 }
7174 xxx_UT_DEBUGMSG(("CurrentHeader %d HeaderCount %d \n",m_iCurrentHeader,m_iHeadersCount));
7175 if (m_iCurrentHeader < m_iHeadersCount) {
7176 if(iDocPosition == m_pHeaders[m_iCurrentHeader].pos +
7177 m_pHeaders[m_iCurrentHeader].len)
7178 {
7179 // new header, time to move on ...
7180 m_iCurrentHeader++;
7181 iOrigHeader = m_iCurrentHeader;
7182
7183 // some headers can be 0-length, skip them ... (0-length: len <=2)
7184 // some 0-length headers we are actually interested in; the 0-length
7185 // headers we do not care about should already be marked as HF_Unsupported
7186 while(m_iCurrentHeader < m_iHeadersCount && m_pHeaders[m_iCurrentHeader].type == HF_Unsupported
7187 /*m_pHeaders[m_iCurrentHeader].len <= 2*/)
7188 {
7189 bScrolledHeader = true;
7190 m_iCurrentHeader++;
7191 }
7192
7193 // after the last header there is an extra paragraph
7194 // marker that is still a part of the header section --
7195 // we do not want that marker imported
7196 if(m_iCurrentHeader == m_iHeadersCount)
7197 {
7198 UT_DEBUGMSG(("End of header marker at pos %d\n", iDocPosition));
7199 return false;
7200 }
7201
7202 // do not return, processing needs to continue ...
7203 }
7204 xxx_UT_DEBUGMSG(("iDocPosition %d m_pHeaders[m_iCurrentHeader].pos %d \n",iDocPosition,m_pHeaders[m_iCurrentHeader].pos));
7205 if((bScrolledHeader && m_pHeaders[iOrigHeader].pos == iDocPosition) ||
7206 (!bScrolledHeader && iDocPosition == m_pHeaders[m_iCurrentHeader].pos))
7207 {
7208 return _insertHeaderSection(bDoBlockIns);
7209 }
7210 }
7211 else
7212 {
7213 UT_DEBUGMSG(("DOM: bad header joo joo\n"));
7214 return false;
7215 }
7216
7217 // if we got this far, we are somwhere inside the header, just
7218 // process the character in a normal way
7219 return (m_pHeaders[m_iCurrentHeader].type != HF_Unsupported);
7220 }
7221
7222 return true;
7223 }
7224
7225 /*
7226 this function returns true if stuff at given position is to be ingored
7227 For example, the doc might contain headers in it that are not used ...
7228 */
_ignorePosition(UT_uint32 iDocPos)7229 bool IE_Imp_MsWord_97::_ignorePosition(UT_uint32 iDocPos)
7230 {
7231 if(m_bInTOC && m_bTOCsupported)
7232 return true;
7233
7234 if(m_bInHeaders && m_iCurrentHeader < m_iHeadersCount && m_pHeaders)
7235 {
7236 if( m_pHeaders[m_iCurrentHeader].type == HF_Unsupported
7237 || iDocPos < m_pHeaders[m_iCurrentHeader].pos)
7238 {
7239 return true;
7240 }
7241 }
7242
7243 return false;
7244 }
7245