1 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: t -*- */
2 
3 /* AbiWord
4  * Copyright (C) 1998-2000 AbiSource, Inc.
5  * Copyright (C) 2001 Dom Lachowicz <dominicl@seas.upenn.edu>
6  * Copyright (C) 2001-2003 Tomas Frydrych
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version 2
11  * of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21  * 02110-1301 USA.
22  */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include "ut_locale.h"
28 
29 #include <zlib.h>
30 
31 #include "wv.h"
32 
33 #include "ut_string_class.h"
34 #include "ut_string.h"
35 #include "ut_bytebuf.h"
36 #include "ut_units.h"
37 #include "ut_math.h"
38 #include "ut_assert.h"
39 #include "ut_debugmsg.h"
40 #include "ut_stack.h"
41 
42 #include "xap_App.h"
43 #include "xap_Frame.h"
44 #include "xap_EncodingManager.h"
45 #include "xap_DialogFactory.h"
46 #include "xap_Dlg_Password.h"
47 
48 #include "fg_Graphic.h"
49 #include "fg_GraphicRaster.h"
50 #include "fg_GraphicVector.h"
51 
52 #include "pd_Document.h"
53 
54 #include "ie_impexp_MsWord_97.h"
55 #include "ie_imp_MsWord_97.h"
56 #include "ie_impGraphic.h"
57 
58 #include "ap_Strings.h"
59 #include "ap_Dialog_Id.h"
60 
61 #include "pf_Frag_Strux.h"
62 #include "pt_PieceTable.h"
63 #include "pd_Style.h"
64 
65 #include "fp_PageSize.h"
66 
67 #include "ut_Language.h"
68 
69 #include <gsf/gsf-infile.h>
70 #include <gsf/gsf-infile-msole.h>
71 #include <gsf/gsf-msole-utils.h>
72 #include <gsf/gsf-docprop-vector.h>
73 #include <gsf/gsf-meta-names.h>
74 
75 #ifdef DEBUG
76 #define IE_IMP_MSWORD_DUMP
77 #include "ie_imp_MsWord_dump.h"
78 #undef IE_IMP_MSWORD_DUMP
79 #endif
80 
81 #define X_CheckError(v) 		do { if (!(v)) return 1; } while (0)
82 
83 // undef this to disable support for older images (<= Word95)
84 #define SUPPORTS_OLD_IMAGES 1
85 
86 //#define BIDI_DEBUG
87 //
88 // Forward decls. to wv's callbacks
89 //
90 static int charProc (wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid);
91 static int specCharProc (wvParseStruct *ps, U16 eachchar, CHP* achp);
92 static int eleProc (wvParseStruct *ps, wvTag tag, void *props, int dirty);
93 static int docProc (wvParseStruct *ps, wvTag tag);
94 
95 /*!
96     Translates MS numerical id's for standard styles into our names
97 	The style names that have been commented out are those that do not
98     have currently a localised equivalent in AW
99 */
s_translateStyleId(UT_uint32 id)100 static const gchar * s_translateStyleId(UT_uint32 id)
101 {
102 	if(id >= 4094)
103 	{
104 		return NULL;
105 	}
106 
107 	// The style names that have been commented out are those that do
108 	// not currently have a localised equivalent in AW
109 	switch(id)
110 	{
111 		case 0:  return "Normal";
112 		case 1:  return "Heading 1";
113 		case 2:  return "Heading 2";
114 		case 3:  return "Heading 3";
115 		case 4:  return "Heading 4";
116 		case 5:  return NULL /*"Heading 5"*/;
117 		case 6:  return NULL /*"Heading 6"*/;
118 		case 7:  return NULL /*"Heading 7"*/;
119 		case 8:  return NULL /*"Heading 8"*/;
120 		case 9:  return NULL /*"Heading 9"*/;
121 		case 10: return NULL /*"Index 1"*/;  /* Really a dup of 92? */
122 		case 11: return NULL /*"Index 2"*/;
123 		case 12: return NULL /*"Index 3"*/;
124 		case 13: return NULL /*"Index 4"*/;
125 		case 14: return NULL /*"Index 5"*/;
126 		case 15: return NULL /*"Index 6"*/;
127 		case 16: return NULL /*"Index 7"*/;
128 		case 17: return NULL /*"Index 8"*/;
129 		case 18: return NULL /*"Index 9"*/;
130 		case 19: return NULL /*"Contents 1"*/; /* Handled by insertTOC? */
131 		case 20: return NULL /*"Contents 2"*/; /* Handled by insertTOC? */
132 		case 21: return NULL /*"Contents 3"*/; /* Handled by insertTOC? */
133 		case 22: return NULL /*"Contents 4"*/; /* Handled by insertTOC? */
134 		case 23: return NULL /*"TOC 5"*/; /* See Contents above for these five as well */
135 		case 24: return NULL /*"TOC 6"*/;
136 		case 25: return NULL /*"TOC 7"*/;
137 		case 26: return NULL /*"TOC 8"*/;
138 		case 27: return NULL /*"TOC 9"*/;
139 		case 28: return NULL /*"Normal Indent"*/;
140 		case 29: return "Footnote Text";
141 		case 30: return NULL /*"Comment Text"*/;
142 		case 31: return NULL /*"Header"*/;
143 		case 32: return NULL /*"Footer"*/;
144 		case 33: return NULL /*"Index Heading"*/;
145 		case 34: return NULL /*"Caption"*/;
146 		case 35: return NULL /*"Table of Figures"*/;
147 		case 36: return NULL /*"Envelope Address"*/;
148 		case 37: return NULL /*"Envelope Return"*/;
149 		case 38: return "Footnote Reference";
150 		case 39: return NULL /*"Comment Reference"*/;
151 		case 40: return NULL /*"Line Number"*/;
152 		case 41: return NULL /*"Page Number"*/;
153 		case 42: return "Endnote Reference";
154 		case 43: return "Endnote Text";
155 		case 44: return NULL /*"Index of Authorities"*/;
156 		case 45: return NULL /*"Macro Text"*/;
157 		case 46: return NULL /*"TOA Heading"*/;
158 		case 47: return NULL /*"List"*/;   //WARNING: beginPara appears to handle arbitrary lists via _mapDocToAbiList*
159 		case 48: return "Bulleted List";
160 		case 49: return "Numbered List";
161 		case 50: return NULL /*"List 2"*/;
162 		case 51: return NULL /*"List 3"*/;
163 		case 52: return NULL /*"List 4"*/;
164 		case 53: return NULL /*"List 5"*/;
165 		case 54: return NULL /*"List Bullet 2"*/;
166 		case 55: return NULL /*"List Bullet 3"*/;
167 		case 56: return NULL /*"List Bullet 4"*/;
168 		case 57: return NULL /*"List Bullet 5"*/;
169 		case 58: return NULL /*"List Number 2"*/;
170 		case 59: return NULL /*"List Number 3"*/;
171 		case 60: return NULL /*"List Number 4"*/;
172 		case 61: return NULL /*"List Number 5"*/;
173 		case 62: return NULL /*"Title"*/;
174 		case 63: return NULL /*"Closing"*/;
175 		case 64: return NULL /*"Signature"*/;
176 		case 65: return NULL /*"Default Paragraph Font"*/;
177 		case 66: return NULL /*"Body Text"*/;
178 		case 67: return NULL /*"Body Text Indent"*/;
179 		case 68: return NULL /*"List Continue"*/;
180 		case 69: return NULL /*"List Continue 2"*/;
181 		case 70: return NULL /*"List Continue 3"*/;
182 		case 71: return NULL /*"List Continue 4"*/;
183 		case 72: return NULL /*"List Continue 5"*/;
184 		case 73: return NULL /*"Message Header"*/;
185 		case 74: return NULL /*"Subtitle"*/;
186 		case 75: return NULL /*"Salutation"*/;
187 		case 76: return NULL /*"Date"*/;
188 		case 77: return NULL /*"Body Text First Indent"*/;
189 		case 78: return NULL /*"Body Text First Indent 2"*/;
190 		case 79: return NULL /*"Note Heading"*/;
191 		case 80: return NULL /*"Body Text 2"*/;
192 		case 81: return NULL /*"Body Text 3"*/;
193 		case 82: return NULL /*"Body Text Indent 2"*/;
194 		case 83: return NULL /*"Body Text Indent 3"*/;
195 		case 84: return "Block Text";
196 		case 85: return NULL /*"Hyperlink"*/;
197 		case 86: return NULL /*"FollowedHyperlink"*/;
198 		case 87: return NULL /*"Strong"*/;
199 		case 88: return NULL /*"Emphasis"*/;
200 		case 89: return NULL /*"Document Map"*/;
201 		case 90: return "Plain Text"; /* Really a dup of 109? */
202 		case 91: return NULL /*"Email Signature"*/;
203 	    case 92: return NULL /*"Index 1"*/;  /* Really a dup of 10? */
204 	    case 93: return NULL /*"List Bullet"*/;
205 		case 94: return NULL /*"Normal (Web)"*/;
206 		case 95: return NULL /*"HTML Acronym"*/;
207 		case 96: return NULL /*"HTML Address"*/;
208 		case 97: return NULL /*"HTML Cite"*/;
209 		case 98: return NULL /*"HTML Code"*/;
210 		case 99: return NULL /*"HTML Definition"*/;
211 		case 100: return NULL /*"HTML Keyboard"*/;
212 		case 101: return NULL /*"HTML Preformatted"*/;
213 		case 102: return NULL /*"HTML Sample"*/;
214 		case 103: return NULL /*"HTML Typewriter"*/;
215 		case 104: return NULL /*"HTML Variable"*/;
216 		case 105: return NULL /*"Table Normal"*/;
217     	case 106: return NULL /*"Comment Subject"*/;
218 		case 107: return NULL /*"No List"*/;
219     	case 108: return NULL /*"Index Heading"*/;
220 	    case 109: return "Plain Text";  /* Really a dup of 90? */
221 	    case 110: return NULL /*"Hyperlink"*/;
222 	    case 111: return NULL /*"FollowedHyperlink"*/;
223     	case 112: return "Numbered List"; /* Was EnumList, really a dup of 49? Closer than nothing anyway*/
224     	case 115: return NULL /*"Balloon Text"*/;
225 
226 		case 153: return NULL /*"Table of Authorities"*/;
227 		case 154: return NULL /*"Grille du tableau" in fr_FR*/;
228 
229 		default:
230 			UT_DEBUGMSG(("Unknown style Id [%d]; Please submit this document with a bug report!\n", id));
231 			// Would be nice if we had a UT_USERMSG or something to put up a prompt (with a
232 			// don't display again option) with the message in normal mode, OutputMsg or silent
233 			// in commandline or docserver mode, etc.  Because it is the users, not the
234 			// developers who will have such alien documents.  -MG
235 
236 			UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
237 			return NULL;
238 	}
239 	return NULL;
240 }
241 
242 /*!
243     Strip characters that would confuse either the xml parser or our
244     property parser; caller is responsible to g_free the returned pointer
245 */
s_stripDangerousChars(const char * s)246 static char * s_stripDangerousChars(const char *s)
247 {
248 	UT_uint32 j, k;
249 	if(!s)
250 		return NULL;
251 
252 	char * t = (char*) g_try_malloc(strlen(s)+1);
253 	UT_return_val_if_fail(t,NULL);
254 
255 	for(j = 0, k = 0; j < strlen(s); )
256 	{
257 	    if(s[j] < ' ' && s[j] >= 0 && s[j] != '\t' && s[j] != '\n' && s[j] != '\r')
258 	    {
259 	        j++;
260 	    }
261 	    else
262 	    {
263 		switch(s[j])
264 		{
265 			default:
266 				t[k++] = s[j++];
267 				break;
268 
269 				// characters that would confuse the
270 				// xml parser or our own property parser
271 			case '<':
272 			case '>':
273 			case ':':
274 			case ';':
275 			case '&':
276 			case '\"':
277 				j++;
278 				break;
279 		}
280 	     }
281 	}
282 
283 	t[k] = 0;
284 
285 	return t;
286 }
287 
s_convert_to_utf8(const wvParseStruct * ps,const char * s)288 static char * s_convert_to_utf8 (const wvParseStruct *ps, const char *s)
289 {
290 	// strangely wv seems to return an UTF-8 string despite a specified codepage
291 	// so we must ensure it is UTF-8. This is time consuming. :-(
292 	// If it is UTF-8 we just g_strdup() it.
293 	// See bug 13229.
294 	if (s == NULL)
295 		return NULL;
296 	if(g_utf8_validate(s, -1, NULL)) {
297 		return g_strdup(s);
298 	}
299 	const char * encoding = NULL;
300 	char fallback = '?';
301 	encoding = wvLIDToCodePageConverter(ps->fib.lid);
302 	return g_convert_with_fallback(s, -1, "UTF-8", encoding, &fallback, NULL, NULL, NULL);
303 }
304 
305 //
306 // DOC uses an unsigned int color index
307 //
308 typedef UT_uint32 Doc_Color_t;
309 
310 //
311 // A mapping between Word's colors and Abi's RGB color scheme;
312 // if you add colors, _make sure_ to increase the '16' in
313 // sMapIcoToColor() below
314 //
315 static Doc_Color_t word_colors [][3] = {
316 	{0x00, 0x00, 0x00}, /* black */
317 	{0x00, 0x00, 0xff}, /* blue */
318 	{0x00, 0xff, 0xff}, /* cyan */
319 	{0x00, 0xff, 0x00}, /* green */
320 	{0xff, 0x00, 0xff}, /* magenta */
321 	{0xff, 0x00, 0x00}, /* red */
322 	{0xff, 0xff, 0x00}, /* yellow */
323 	{0xff, 0xff, 0xff}, /* white */
324 	{0x00, 0x00, 0x80}, /* dark blue */
325 	{0x00, 0x80, 0x80}, /* dark cyan */
326 	{0x00, 0x80, 0x00}, /* dark green */
327 	{0x80, 0x00, 0x80}, /* dark magenta */
328 	{0x80, 0x00, 0x00}, /* dark red */
329 	{0x80, 0x80, 0x00}, /* dark yellow */
330 	{0x80, 0x80, 0x80}, /* dark gray */
331 	{0xc0, 0xc0, 0xc0}, /* light gray */
332 };
333 
sMapIcoToColor(UT_uint16 ico,bool bForeground)334 static UT_String sMapIcoToColor (UT_uint16 ico, bool bForeground)
335 {
336 	// need to handle the automatic colour 0; see bug 10261 for bounds-check
337 	if((!ico && bForeground) || (ico > 16))
338 	{
339 		ico = 1;  //black
340 	}
341 	else if(!ico && !bForeground)
342 	{
343 		ico = 8;  //white
344 	}
345 
346 	return UT_String_sprintf("%02x%02x%02x",
347 							 word_colors[ico-1][0],
348 							 word_colors[ico-1][1],
349 							 word_colors[ico-1][2]);
350 }
351 
352 //
353 // Field Ids that are useful later for mapping
354 //
355 typedef enum {
356 	F_TIME,
357 	F_DATE,
358 	F_EDITTIME,
359 	F_AUTHOR,
360 	F_PAGE,
361 	F_NUMCHARS,
362 	F_NUMPAGES,
363 	F_NUMWORDS,
364 	F_FILENAME,
365 	F_HYPERLINK,
366 	F_PAGEREF,
367 	F_EMBED,
368 	F_TOC,
369 	F_DateTimePicture,
370 	F_TOC_FROM_RANGE,
371 	F_DATEINAME,
372 	F_SPEICHERDAT,
373 	F_MERGEFIELD,
374 	F_OTHER
375 } Doc_Field_t;
376 
377 struct field
378 {
379 	UT_UCS2Char command [FLD_SIZE];
380 	UT_UCS2Char argument [FLD_SIZE];
381 	UT_UCS2Char *fieldWhich;
382 	UT_sint32	fieldI;
383 	char *		fieldC;
384 	UT_sint32   fieldRet;
385 	Doc_Field_t type;
386 };
387 
388 
389 //
390 // A mapping between DOC's field names and our given IDs
391 //
392 typedef struct
393 {
394 	const char * m_name;
395 	Doc_Field_t m_id;
396 } Doc_Field_Mapping_t;
397 
398 /*
399  * This next bit of code enables us to import many of Word's fields
400  */
401 
402 static Doc_Field_Mapping_t s_Tokens[] =
403 {
404 	{"TIME",	   F_TIME},
405 	{"EDITTIME",   F_EDITTIME},
406 	{"DATE",	   F_DATE},
407 	{"date",	   F_DATE},
408 	{"DATEINAME",      F_DATE}, // F_DATEINAME
409 	{"SPEICHERDAT",    F_DATE}, // F_SPEICHERDAT
410 	{"\\@", 	   F_DateTimePicture},
411 
412 	{"FILENAME",   F_FILENAME},
413 	{"\\filename", F_FILENAME},
414 	{"PAGE",	   F_PAGE},
415 	{"\\*Arabisch",F_PAGE},
416 	{"NUMCHARS",   F_NUMCHARS},
417 	{"NUMPAGES",   F_NUMPAGES},
418 	{"NUMWORDS",   F_NUMWORDS},
419 	{"MERGEFIELD", F_MERGEFIELD},
420 	// these below aren't handled by AbiWord, but they're known about
421 	{"HYPERLINK",  F_HYPERLINK},
422 	{"PAGEREF",    F_PAGEREF},
423 	{"EMBED",	   F_EMBED},
424 	{"TOC", 	   F_TOC},
425 	{"\\o", 	   F_TOC_FROM_RANGE},
426 	{"AUTHOR",	   F_AUTHOR},
427 
428 	{ "*",		   F_OTHER}
429 };
430 
431 #define FieldMappingSize (sizeof(s_Tokens)/sizeof(s_Tokens[0]))
432 
433 static Doc_Field_t
s_mapNameToField(const char * name)434 s_mapNameToField (const char * name)
435 {
436 	for (unsigned int k = 0; k < FieldMappingSize; k++)
437 	{
438 		// field names can be sometimes in lower-case
439 		if (!g_ascii_strcasecmp(s_Tokens[k].m_name,name))
440 			return s_Tokens[k].m_id;
441 	}
442 	return F_OTHER;
443 }
444 
445 #undef FieldMappingSize
446 
447 static const char *
s_mapPageIdToString(UT_uint16 id)448 s_mapPageIdToString (UT_uint16 id)
449 {
450 	// TODO: make me way better when we determine code names
451 
452 	switch (id)
453 	{
454 		case 0:
455 		case 1:
456 			return "Letter";
457 		case 5:  return "Legal";
458 		case 7:  return NULL; //"Executive";
459 		case 9:  return "A4";
460 		case 11: return "A5";
461 		case 13: return "Folio";
462 		case 14: return NULL; // in Word this is "B5" but the size
463 							  // does not correspond to AW's B5
464 		case 20: return "Envelope No10";
465 		case 27: return "DL Envelope";
466 		case 28: return "C5";
467 		case 34: return "B5"; // in Word this is B5 Envelope ...
468 		case 37: return NULL; //"Monarch Envelope";
469 
470 		case 0xffff:
471 			// this is a value that wv uses to indicate that page size
472 			// is customised, just return NULL
473 			return NULL;
474 
475 		default:
476 			UT_DEBUGMSG(("Unknow page size: please submit this document with a bug report\n"));
477 			UT_ASSERT_HARMLESS( 0 );
478 			return 0;
479 	}
480 }
481 
482 /*!
483   Surprise, surprise, there are more list numerical formats than the 5 the
484   MS documentation states happens to mention, so here I will put what I found
485   out (later we will move it to some better place)
486 */
487 typedef enum
488 {
489   WLNF_INVALID		   = -1,
490   WLNF_EUROPEAN_ARABIC = 0,
491   WLNF_UPPER_ROMAN	   = 1,
492   WLNF_LOWER_ROMAN	   = 2,
493   WLNF_UPPER_LETTER    = 3,
494   WLNF_LOWER_LETTER    = 4,
495   WLNF_ORDINAL		   = 5,
496   WLNF_BULLETS		   = 23,
497   WLNF_HEBREW_NUMBERS  = 45
498 } MSWordListIdType;
499 
500 typedef struct{
501   UT_uint32 listId;
502   UT_uint32 level;
503 } ListIdLevelPair;
504 
505 /*!
506  * Map msword list enums back to abi's
507  */
508 static const char *
s_mapDocToAbiListId(MSWordListIdType id)509 s_mapDocToAbiListId (MSWordListIdType id)
510 {
511   switch (id)
512 	{
513 	case WLNF_UPPER_ROMAN: // upper roman
514 	  return "4";
515 
516 	case WLNF_LOWER_ROMAN: // lower roman
517 	  return "3";
518 
519 	case WLNF_UPPER_LETTER: // upper letter
520 	  return "2";
521 
522 	case WLNF_LOWER_LETTER: // lower letter
523 	  return "1";
524 
525 	case WLNF_BULLETS: // bullet list
526 	  return "5";
527 
528 	case WLNF_HEBREW_NUMBERS:
529 	  return "129";
530 
531 	case WLNF_EUROPEAN_ARABIC:
532 	case WLNF_ORDINAL: // ordinal
533 	default:
534 	  return "0";
535 	}
536 }
537 
538 /*!
539  * form AW list deliminator string
540  */
s_mapDocToAbiListDelim(UT_uint16 * pStr,UT_uint32 iLen,UT_UTF8String & sDelim)541 static void s_mapDocToAbiListDelim (UT_uint16 * pStr, UT_uint32 iLen, UT_UTF8String &sDelim)
542 {
543 	// the Word format string looks like this
544 	//    prefix '\0' suffix
545 	// and the '\0' represents the location of the list number/bullet
546 	UT_uint16 * pPfx = NULL;
547 	UT_uint16 * pSfx = NULL;
548 
549 	if(iLen && *pStr)
550 		pPfx = pStr;
551 
552 	UT_sint32 i;
553 	for(i = 0; i < (UT_sint32)iLen - 1; i++)
554 	{
555 		if(pStr[i] == 0)
556 		{
557 			pSfx = pStr + i + 1;
558 			break;
559 		}
560 	}
561 
562 	UT_UTF8String sUtf8Pfx;
563 	UT_UTF8String sUtf8Sfx;
564 
565 	i= 0;
566 	while(pPfx && *pPfx && i < (UT_sint32)iLen)
567 	{
568 		UT_UCS4Char c = *pPfx;
569 		sUtf8Pfx.appendUCS4(&c,1);
570 		i++;
571 		pPfx++;
572 	}
573 
574 	i++; // move past the '\0' divider
575 	while(pSfx && *pSfx && i < (UT_sint32)iLen)
576 	{
577 		UT_UCS4Char c = *pSfx;
578 		sUtf8Sfx.appendUCS4(&c,1);
579 		i++;
580 		pSfx++;
581 	}
582 
583 	sDelim = sUtf8Pfx;
584 	sDelim += "%L";
585 	sDelim += sUtf8Sfx;
586 }
587 
588 /*!
589  * Map msword list enums back to abi's list styles
590  */
591 static const char *
s_mapDocToAbiListStyle(MSWordListIdType id)592 s_mapDocToAbiListStyle (MSWordListIdType id)
593 {
594   switch (id)
595 	{
596 	case WLNF_UPPER_ROMAN: // upper roman
597 	  return "Upper Roman List";
598 
599 	case WLNF_LOWER_ROMAN: // lower roman
600 	  return "Lower Roman List";
601 
602 	case WLNF_UPPER_LETTER: // upper letter
603 	  return "Upper Case List";
604 
605 	case WLNF_LOWER_LETTER: // lower letter
606 	  return "Lower Case List";
607 
608 	case WLNF_BULLETS: // bullet list
609 	  return "Bullet List";
610 
611 	case WLNF_EUROPEAN_ARABIC:
612 	case WLNF_ORDINAL: // ordinal
613 	default:
614 	  return "Numbered List";
615 	}
616 }
617 
618 /*!
619  * Map msword list enums back to abi's field font for that given style
620  */
621 static const char *
s_fieldFontForListStyle(MSWordListIdType id)622 s_fieldFontForListStyle (MSWordListIdType id)
623 {
624   switch (id)
625 	{
626 	case WLNF_UPPER_ROMAN: // upper roman
627 	  return "NULL";
628 
629 	case WLNF_LOWER_ROMAN: // lower roman
630 	  return "NULL";
631 
632 	case WLNF_UPPER_LETTER: // upper letter
633 	  return "Times New Roman";
634 
635 	case WLNF_LOWER_LETTER: // lower letter
636 	  return "Times New Roman";
637 
638 	case WLNF_BULLETS: // bullet list
639 		UT_DEBUGMSG(("Fieldfont set to symbol \n"));
640 	  return "NULL";
641 
642 	case WLNF_EUROPEAN_ARABIC:
643 	case WLNF_ORDINAL: // ordinal
644 		return "Times New Roman";
645 
646 	default:
647 		UT_DEBUGMSG(("unknown list type %d field-font set to Times New Roman \n",id));
648 	  return "Times New Roman";
649 	}
650 }
651 
652 #if 0
653 
654 // MS Word uses the langauge codes as explicit overrides when treating
655 // weak characters; this function translates language id to the
656 // overrided direction
657 static bool s_isLanguageRTL(short unsigned int lid)
658 {
659 	const char * s = wvLIDToLangConverter (lid);
660 	UT_Language l;
661 	return (UTLANG_RTL == l.getOrderFromProperty(s));
662 }
663 
664 static FootnoteType s_convertNoteType(UT_uint32 t)
665 {
666 	return 	FOOTNOTE_TYPE_NUMERIC;
667 }
668 
669 #endif
670 
671 /****************************************************************************/
672 /****************************************************************************/
673 
IE_Imp_MsWord_97_Sniffer()674 IE_Imp_MsWord_97_Sniffer::IE_Imp_MsWord_97_Sniffer ()
675 	: IE_ImpSniffer(IE_IMPEXPNAME_MSWORD97)
676 {
677 	//
678 }
679 
680 // supported suffixes
681 static IE_SuffixConfidence IE_Imp_MsWord_97_Sniffer__SuffixConfidence[] = {
682 	{ "doc", 	UT_CONFIDENCE_PERFECT 	},
683 	{ "dot", 	UT_CONFIDENCE_PERFECT 	},
684 	{ "", 	UT_CONFIDENCE_ZILCH 	}
685 };
686 
getSuffixConfidence()687 const IE_SuffixConfidence * IE_Imp_MsWord_97_Sniffer::getSuffixConfidence ()
688 {
689 	return IE_Imp_MsWord_97_Sniffer__SuffixConfidence;
690 }
691 
692 // supported mimetypes
693 static IE_MimeConfidence IE_Imp_MsWord_97_Sniffer__MimeConfidence[] = {
694 	{ IE_MIME_MATCH_FULL, 	IE_MIMETYPE_MSWord, 		UT_CONFIDENCE_GOOD 	},
695 	{ IE_MIME_MATCH_FULL, 	"application/vnd.ms-word",	UT_CONFIDENCE_GOOD 	},
696 	{ IE_MIME_MATCH_FULL, 	"text/doc", 				UT_CONFIDENCE_GOOD 	}, // or is it? [TODO: check!]
697 	{ IE_MIME_MATCH_BOGUS, 	"", 						UT_CONFIDENCE_ZILCH }
698 };
699 
getMimeConfidence()700 const IE_MimeConfidence * IE_Imp_MsWord_97_Sniffer::getMimeConfidence ()
701 {
702 	return IE_Imp_MsWord_97_Sniffer__MimeConfidence;
703 }
704 
recognizeContents(GsfInput * input)705 UT_Confidence_t IE_Imp_MsWord_97_Sniffer::recognizeContents (GsfInput * input)
706 {
707 	GsfInfile * ole;
708 
709 	ole = gsf_infile_msole_new (input, NULL);
710 
711 	// invokes the old recognizeContents below, in hopes of identifying
712 	// pre-OLE files
713 	if (!ole)
714 		return IE_ImpSniffer::recognizeContents (input);
715 
716 	UT_Confidence_t confidence = UT_CONFIDENCE_ZILCH;
717 	GsfInput * stream = gsf_infile_child_by_name (ole, "WordDocument");
718 	if (stream)
719 		{
720 			g_object_unref (G_OBJECT (stream));
721 			confidence = UT_CONFIDENCE_PERFECT;
722 		}
723 
724 	g_object_unref (G_OBJECT (ole));
725 
726 	return confidence;
727 }
728 
recognizeContents(const char * szBuf,UT_uint32 iNumbytes)729 UT_Confidence_t IE_Imp_MsWord_97_Sniffer::recognizeContents (const char * szBuf,
730 															 UT_uint32 iNumbytes)
731 {
732 	const char * magic	= 0;
733 	int magicoffset = 0;
734 
735 	magic = "Microsoft Word 6.0 Document";
736 	magicoffset = 2080;
737 	if (iNumbytes > (magicoffset + strlen (magic)))
738 	{
739 		if (!strncmp (szBuf + magicoffset, magic, strlen (magic)))
740 		{
741 			return UT_CONFIDENCE_PERFECT;
742 		}
743 	}
744 
745 	magic = "Documento Microsoft Word 6";
746 	magicoffset = 2080;
747 	if (iNumbytes > (magicoffset + strlen (magic)))
748 	{
749 		if (!strncmp(szBuf + magicoffset, magic, strlen (magic)))
750 		{
751 			return UT_CONFIDENCE_PERFECT;
752 		}
753 	}
754 
755 	magic = "MSWordDoc";
756 	magicoffset = 2112;
757 	if (iNumbytes > (magicoffset + strlen (magic)))
758 	{
759 		if (!strncmp (szBuf + magicoffset, magic, strlen (magic)))
760 		{
761 			return UT_CONFIDENCE_PERFECT;
762 		}
763 	}
764 
765 	// ok, that didn't work, we'll try to dig through the OLE stream
766 	if (iNumbytes > 8)
767 	{
768 	        // this code is too generic - also picks up .wri documents
769 		if (szBuf[0] == static_cast<char>(0x31)
770 			&& static_cast< unsigned char>(szBuf[1]) == static_cast< unsigned char>(0xbe)
771 			&&  szBuf[2] == static_cast<char>(0)
772 			&& szBuf[3] == static_cast<char>(0))
773 		{
774 		  return UT_CONFIDENCE_SOSO; //POOR
775 		}
776 
777 		// this identifies staroffice dox as well
778 		if (static_cast< unsigned char>(szBuf[0]) == static_cast<unsigned char>(0xd0)
779 			&& static_cast< unsigned char>(szBuf[1]) == static_cast<unsigned char>(0xcf)
780 			&& szBuf[2] == static_cast<char>(0x11)
781 			&& static_cast< unsigned char>(szBuf[3]) == static_cast<unsigned char>(0xe0)
782 			&& static_cast< unsigned char>(szBuf[4]) == static_cast<unsigned char>(0xa1)
783 			&& static_cast< unsigned char>(szBuf[5]) == static_cast<unsigned char>(0xb1)
784 			&& szBuf[6] == static_cast<char>(0x1a)
785 			&& static_cast< unsigned char>(szBuf[7]) == static_cast<unsigned char>(0xe1))
786 		{
787 		  return UT_CONFIDENCE_SOSO; // POOR
788 		}
789 
790 		if (szBuf[0] == 'P' && szBuf[1] == 'O' &&
791 			szBuf[2] == '^' && szBuf[3] == 'Q' && szBuf[4] == '`')
792 		{
793 			return UT_CONFIDENCE_POOR;
794 		}
795 		if (static_cast< unsigned char>(szBuf[0]) == static_cast<unsigned char>(0xfe)
796 			&& szBuf[1] == static_cast<char>(0x37)
797 			&& szBuf[2] == static_cast<char>(0)
798 			&& szBuf[3] == static_cast<char>(0x23))
799 		{
800 			return UT_CONFIDENCE_POOR;
801 		}
802 
803 		/* WinWord 2 */
804 		if (static_cast< unsigned char>(szBuf[0]) == static_cast<unsigned char>(0xdb)
805 			&& static_cast< unsigned char>(szBuf[1]) == static_cast<unsigned char>(0xa5)
806 			&& szBuf[2] == static_cast<char>(0x2d)
807 			&& szBuf[3] == static_cast<char>(0))
808 		{
809 			return UT_CONFIDENCE_PERFECT;
810 		}
811 	}
812 	return UT_CONFIDENCE_ZILCH;
813 }
814 
constructImporter(PD_Document * pDocument,IE_Imp ** ppie)815 UT_Error IE_Imp_MsWord_97_Sniffer::constructImporter (PD_Document * pDocument,
816 													  IE_Imp ** ppie)
817 {
818 	IE_Imp_MsWord_97 * p = new IE_Imp_MsWord_97(pDocument);
819 	*ppie = p;
820 	return UT_OK;
821 }
822 
getDlgLabels(const char ** pszDesc,const char ** pszSuffixList,IEFileType * ft)823 bool	IE_Imp_MsWord_97_Sniffer::getDlgLabels (const char ** pszDesc,
824 												const char ** pszSuffixList,
825 												IEFileType * ft)
826 {
827 	*pszDesc = "Microsoft Word (.doc, .dot)";
828 	*pszSuffixList = "*.doc; *.dot";
829 	*ft = getFileType();
830 	return true;
831 }
832 
833 /****************************************************************************/
834 /****************************************************************************/
835 
836 // just buffer sizes, arbitrarily chosen
837 #define DOC_TEXTRUN_SIZE 2048
838 #define DOC_PROPBUFFER_SIZE 1024
839 
~IE_Imp_MsWord_97()840 IE_Imp_MsWord_97::~IE_Imp_MsWord_97()
841 {
842 	if(m_pBookmarks)
843 	{
844 		// g_free the names from the bookmarks
845 		for(UT_uint32 i = 0; i < m_iBookmarksCount; i++)
846 		{
847 			// make sure we do not delete any name twice
848 			if(m_pBookmarks[i].name && m_pBookmarks[i].start)
849 			{
850 			   delete[] m_pBookmarks[i].name;
851 			   m_pBookmarks[i].name = NULL;
852 			}
853 		}
854 		delete [] m_pBookmarks;
855 	}
856 
857 	UT_VECTOR_PURGEALL(ListIdLevelPair *, m_vLists);
858 	UT_VECTOR_PURGEALL(emObject *, m_vecEmObjects);
859 	UT_VECTOR_PURGEALL(textboxPos *, m_vecTextboxPos);
860 
861 	DELETEPV(m_pTextboxes);
862 	DELETEPV(m_pFootnotes);
863 	DELETEPV(m_pEndnotes);
864 	DELETEPV(m_pHeaders);
865 }
866 
IE_Imp_MsWord_97(PD_Document * pDocument)867 IE_Imp_MsWord_97::IE_Imp_MsWord_97(PD_Document * pDocument)
868   : IE_Imp (pDocument),
869 	m_nSections(0),
870 	m_bSetPageSize(false),
871 	m_bIsLower(false),
872 	m_bInSect(false),
873 	m_bInPara(false),
874 	m_bLTRCharContext(true),
875 	m_bLTRParaContext(true),
876 	m_iOverrideIssued(UT_BIDI_UNSET),
877 	m_bBidiMode(false),
878 	m_bInLink(false),
879 	m_pBookmarks(NULL),
880 	m_iBookmarksCount(0),
881 	m_pFootnotes(NULL),
882 	m_iFootnotesCount(0),
883 	m_pEndnotes(NULL),
884 	m_iEndnotesCount(0),
885 	m_pTextboxes(NULL),
886 	m_iTextboxCount(0),
887     m_iMSWordListId(0),
888     m_bEncounteredRevision(false),
889     m_bInTable(false),
890 	m_iRowsRemaining(0),
891     m_iCellsRemaining(0),
892     m_iCurrentRow(0),
893     m_iCurrentCell(0),
894     m_bRowOpen(false),
895 	m_bCellOpen(false),
896 	m_iFootnotesStart(0xffffffff),
897 	m_iFootnotesEnd(0xffffffff),
898 	m_iEndnotesStart(0xffffffff),
899 	m_iEndnotesEnd(0xffffffff),
900 	m_iNextFNote(0),
901 	m_iNextENote(0),
902 	m_bInFNotes(false),
903 	m_bInENotes(false),
904 	m_pNotesEndSection(NULL),
905 	m_pHeaders(NULL),
906 	m_iHeadersCount(0),
907 	m_iHeadersStart(0xffffffff),
908 	m_iHeadersEnd(0xffffffff),
909 	m_iCurrentHeader(0),
910 	m_bInHeaders(false),
911 	m_iCurrentSectId(0),
912 	m_iAnnotationsStart(0xffffffff),
913 	m_iAnnotationsEnd(0xffffffff),
914 	m_iMacrosStart(0xffffffff),
915 	m_iMacrosEnd(0xffffffff),
916 	m_iTextStart(0xffffffff),
917 	m_iTextEnd(0xffffffff),
918 	m_bPageBreakPending(false),
919     m_bLineBreakPending(false),
920 	m_bSymbolFont(false),
921 	m_dim(DIM_IN),
922 	m_iLeft(0),
923 	m_iRight(0),
924 	m_iTextboxesStart(0xffffffff),
925 	m_iTextboxesEnd(0xffffffff),
926 	m_iNextTextbox(0),
927 	m_iPrevHeaderPosition(0xffffffff),
928 	m_bEvenOddHeaders(false),
929 	m_bInTOC(false),
930 	m_bTOCsupported(false),
931 	m_bInTextboxes(false),
932 	m_pTextboxEndSection(NULL),
933 	m_iLeftCellPos(0),
934 	m_iLastAppendedHeader(0xffffffff)
935 {
936   for(UT_uint32 i = 0; i < 9; i++)
937 	  m_iListIdIncrement[i] = 0;
938   m_vecTextboxPos.clear();
939 }
940 
941 /****************************************************************************/
942 /****************************************************************************/
943 
944 #define ErrCleanupAndExit(code)  do {wvOLEFree (&ps); return(code);} while(0)
945 
946 #define GetPassword() _getPassword ( XAP_App::getApp()->getLastFocussedFrame() )
947 
948 #define ErrorMessage(x) do { XAP_Frame *_pFrame = XAP_App::getApp()->getLastFocussedFrame(); if ( _pFrame ) _errorMessage (_pFrame, (x)); } while (0)
949 
_getPassword(XAP_Frame * pFrame)950 static UT_UTF8String _getPassword (XAP_Frame * pFrame)
951 {
952   UT_UTF8String password ( "" );
953 
954   if ( pFrame )
955     {
956       pFrame->raise ();
957 
958       XAP_DialogFactory * pDialogFactory
959 		  = (XAP_DialogFactory *)(pFrame->getDialogFactory());
960 
961       XAP_Dialog_Password * pDlg = static_cast<XAP_Dialog_Password*>(pDialogFactory->requestDialog(XAP_DIALOG_ID_PASSWORD));
962       UT_return_val_if_fail(pDlg, password);
963 
964       pDlg->runModal (pFrame);
965 
966       XAP_Dialog_Password::tAnswer ans = pDlg->getAnswer();
967       bool bOK = (ans == XAP_Dialog_Password::a_OK);
968 
969       if (bOK)
970 		  password = pDlg->getPassword ();
971 
972       pDialogFactory->releaseDialog(pDlg);
973     }
974 
975   return password;
976 }
977 
978 #if 0
979 static void _errorMessage (XAP_Frame * pFrame, int id)
980 {
981   UT_return_if_fail(pFrame);
982 
983   const XAP_StringSet * pSS = XAP_App::getApp ()->getStringSet ();
984 
985   const char * text = pSS->getValue (id, pFrame->getApp()->getDefaultEncoding()).c_str();
986 
987   pFrame->showMessageBox (text, XAP_Dialog_MessageBox::b_O,
988 						  XAP_Dialog_MessageBox::a_OK);
989 }
990 #endif
991 
992 static const struct {
993   const char * metadata_key;
994   const char * abi_metadata_name;
995 } metadata_names[] = {
996   { GSF_META_NAME_TITLE, PD_META_KEY_TITLE },
997   { GSF_META_NAME_DESCRIPTION, PD_META_KEY_DESCRIPTION },
998   { GSF_META_NAME_SUBJECT, PD_META_KEY_SUBJECT },
999   { GSF_META_NAME_DATE_MODIFIED, PD_META_KEY_DATE_LAST_CHANGED },
1000   { GSF_META_NAME_DATE_CREATED, PD_META_KEY_DATE },
1001   { GSF_META_NAME_KEYWORDS, PD_META_KEY_KEYWORDS },
1002   { GSF_META_NAME_LANGUAGE, PD_META_KEY_LANGUAGE },
1003   { GSF_META_NAME_REVISION_COUNT, NULL },
1004   { GSF_META_NAME_EDITING_DURATION, NULL },
1005   { GSF_META_NAME_TABLE_COUNT, NULL },
1006   { GSF_META_NAME_IMAGE_COUNT, NULL },
1007   { GSF_META_NAME_OBJECT_COUNT, NULL },
1008   { GSF_META_NAME_PAGE_COUNT, NULL },
1009   { GSF_META_NAME_PARAGRAPH_COUNT, NULL },
1010   { GSF_META_NAME_WORD_COUNT, NULL },
1011   { GSF_META_NAME_CHARACTER_COUNT, NULL },
1012   { GSF_META_NAME_CELL_COUNT, NULL },
1013   { GSF_META_NAME_SPREADSHEET_COUNT, NULL },
1014   { GSF_META_NAME_CREATOR, PD_META_KEY_CREATOR },
1015   { GSF_META_NAME_TEMPLATE, NULL },
1016   { GSF_META_NAME_LAST_SAVED_BY, NULL },
1017   { GSF_META_NAME_LAST_PRINTED, NULL },
1018   { GSF_META_NAME_SECURITY, NULL },
1019   { GSF_META_NAME_CATEGORY, NULL },
1020   { GSF_META_NAME_PRESENTATION_FORMAT, NULL },
1021   { GSF_META_NAME_THUMBNAIL, NULL },
1022   { GSF_META_NAME_GENERATOR, PD_META_KEY_GENERATOR },
1023   { GSF_META_NAME_LINE_COUNT, NULL },
1024   { GSF_META_NAME_SLIDE_COUNT, NULL },
1025   { GSF_META_NAME_NOTE_COUNT, NULL },
1026   { GSF_META_NAME_HIDDEN_SLIDE_COUNT, NULL },
1027   { GSF_META_NAME_MM_CLIP_COUNT, NULL },
1028   { GSF_META_NAME_BYTE_COUNT, NULL },
1029   { GSF_META_NAME_SCALE, NULL },
1030   { GSF_META_NAME_HEADING_PAIRS, NULL },
1031   { GSF_META_NAME_DOCUMENT_PARTS, NULL },
1032   { GSF_META_NAME_MANAGER, PD_META_KEY_CONTRIBUTOR },
1033   { GSF_META_NAME_COMPANY, PD_META_KEY_PUBLISHER },
1034   { GSF_META_NAME_LINKS_DIRTY, NULL },
1035   { GSF_META_NAME_MSOLE_UNKNOWN_17, NULL },
1036   { GSF_META_NAME_MSOLE_UNKNOWN_18, NULL },
1037   { GSF_META_NAME_MSOLE_UNKNOWN_19, NULL },
1038   { GSF_META_NAME_MSOLE_UNKNOWN_20, NULL },
1039   { GSF_META_NAME_MSOLE_UNKNOWN_21, NULL },
1040   { GSF_META_NAME_MSOLE_UNKNOWN_22, NULL },
1041   { GSF_META_NAME_MSOLE_UNKNOWN_23, NULL },
1042   { GSF_META_NAME_DICTIONARY, NULL },
1043   { GSF_META_NAME_LOCALE_SYSTEM_DEFAULT, NULL },
1044   { GSF_META_NAME_CASE_SENSITIVE, NULL }
1045 };
1046 static const gsize nr_metadata_names = G_N_ELEMENTS(metadata_names);
1047 
1048 struct DocAndLid
1049 {
1050 	PD_Document *doc;
1051 	int lid;
1052 };
1053 
1054 static void
cb_print_property(char const * name,GsfDocProp const * prop,DocAndLid * doc)1055 cb_print_property (char const *name, GsfDocProp const *prop, DocAndLid * doc)
1056 {
1057   GValue const *val = gsf_doc_prop_get_val  (prop);
1058 
1059   if (! VAL_IS_GSF_DOCPROP_VECTOR ((GValue *)val)) {
1060 
1061 	  // just scan over the table. consider optimizing if we really care to.
1062 	  for(gsize i = 0; i < nr_metadata_names; i++) {
1063 		  if(strcmp(metadata_names[i].metadata_key, name) == 0) {
1064 			  char const * abi_metadata_name = metadata_names[i].abi_metadata_name;
1065 
1066 			  if(abi_metadata_name != NULL) {
1067 				  const char * encoding = NULL;
1068 				  if (doc->lid >> 8 != 0x04) {
1069 					// header is not utf8 encoded
1070 				  	encoding = wvLIDToCodePageConverter(doc->lid);
1071 				  }
1072 				  char *tmp;
1073 
1074 				  if (G_VALUE_HOLDS(val, G_TYPE_STRING))
1075 					  {
1076 						  // special-case strings. it seems that g_value_get_string()
1077 						  // and g_strdup_value_contents() may return different things
1078 						  // check with document from bug 11148
1079 						  const char * contents = g_value_get_string(val);
1080 
1081 						  if (encoding && *encoding)
1082 							  {
1083 								  tmp = g_convert_with_fallback(contents, -1, (gchar*)"UTF-8", encoding, (gchar*)"?", NULL, NULL, NULL);
1084 							  }
1085 						  else
1086 							  {
1087 								  tmp = g_strdup(contents);
1088 							  }
1089 
1090 					  }
1091 				  else
1092 					  {
1093 						  // coerce into a string
1094 						  tmp = g_strdup_value_contents(val);
1095 					  }
1096 
1097 				  char * meta = tmp;
1098 				  // strip beginning and ending quotes
1099 				  if(meta && strcmp(meta,"\"\"")) { // ignore '""' props
1100 					  if(meta[0] == '"')
1101 						  meta++;
1102 					  int len = strlen(meta);
1103 					  if ((len > 0) && meta[len - 1] == '"') {
1104 						  meta[len - 1] = '\0';
1105 					  }
1106 					  if (*meta) {
1107 						  doc->doc->setMetaDataProp(abi_metadata_name, meta);
1108 					  }
1109 				  }
1110 				  g_free (tmp);
1111 			  }
1112 		  }
1113 	  }
1114   }
1115 }
1116 
print_summary_stream(GsfInfile * msole,const char * stream_name,int lid,PD_Document * doc)1117 static void print_summary_stream (GsfInfile * msole,
1118 								  const char * stream_name,
1119 								  int lid,
1120 								  PD_Document * doc)
1121 {
1122   GsfInput * stream = gsf_infile_child_by_name (msole, stream_name);
1123   if (stream != NULL) {
1124     GsfDocMetaData *meta_data = gsf_doc_meta_data_new ();
1125     GError    *err = NULL;
1126 
1127     err = gsf_msole_metadata_read (stream, meta_data);
1128     if (err != NULL) {
1129       g_warning ("Error getting metadata for %s: %s", stream_name, err->message);
1130       g_error_free (err);
1131       err = NULL;
1132     } else {
1133 		DocAndLid dil;
1134 
1135 		dil.doc = doc;
1136 		dil.lid = lid;
1137 		gsf_doc_meta_data_foreach (meta_data,
1138 								   (GHFunc) cb_print_property, &dil);
1139     }
1140 
1141     g_object_unref (meta_data);
1142     g_object_unref (G_OBJECT (stream));
1143   }
1144 }
1145 
_handleMetaData(wvParseStruct * ps)1146 void IE_Imp_MsWord_97::_handleMetaData(wvParseStruct *ps)
1147 {
1148 	print_summary_stream (GSF_INFILE(ps->ole_file), "\05SummaryInformation", ps->fib.lid, getDoc());
1149 	print_summary_stream (GSF_INFILE(ps->ole_file), "\05DocumentSummaryInformation", ps->fib.lid, getDoc());
1150 }
1151 
_loadFile(GsfInput * fp)1152 UT_Error IE_Imp_MsWord_97::_loadFile(GsfInput * fp)
1153 {
1154   wvParseStruct ps;
1155 
1156   int ret = wvInitParser_gsf(&ps, fp);
1157   const char * password = NULL;
1158 
1159   if (ret & 0x8000)		/* Password protected? */
1160     {
1161       UT_UTF8String pass (GetPassword());
1162       if ( pass.size () != 0 )
1163 		  password = pass.utf8_str();
1164 
1165       if ((ret & 0x7fff) == WORD8)
1166 	{
1167 	  ret = 0;
1168 	  if (password == NULL)
1169 	    {
1170 			//ErrorMessage(AP_STRING_ID_WORD_PassRequired);
1171 	      ErrCleanupAndExit(UT_IE_PROTECTED);
1172 	    }
1173 	  else
1174 	    {
1175 	      wvSetPassword (password, &ps);
1176 	      if (wvDecrypt97 (&ps))
1177 		{
1178 			//ErrorMessage(AP_STRING_ID_WORD_PassInvalid);
1179 		  ErrCleanupAndExit(UT_IE_PROTECTED);
1180 		}
1181 	    }
1182 	}
1183       else if (((ret & 0x7fff) == WORD7) || ((ret & 0x7fff) == WORD6))
1184 	{
1185 	  ret = 0;
1186 	  if (password == NULL)
1187 	    {
1188 			//ErrorMessage(AP_STRING_ID_WORD_PassRequired);
1189 	      ErrCleanupAndExit(UT_IE_PROTECTED);
1190 	    }
1191 	  else
1192 	    {
1193 	      wvSetPassword (password, &ps);
1194 	      if (wvDecrypt95 (&ps))
1195 		{
1196 		  //("Incorrect Password\n"));
1197 		  ErrCleanupAndExit(UT_IE_PROTECTED);
1198 		}
1199 	    }
1200 	}
1201     }
1202 
1203   if (ret) {
1204     ErrCleanupAndExit(UT_IE_BOGUSDOCUMENT);
1205   }
1206 
1207   // register ourself as the userData
1208   ps.userData = this;
1209 
1210   // register callbacks
1211   wvSetElementHandler (&ps, eleProc);
1212   wvSetCharHandler (&ps, charProc);
1213   wvSetSpecialCharHandler(&ps, specCharProc);
1214   wvSetDocumentHandler (&ps, docProc);
1215 
1216   // need to init doc props
1217   if(!getLoadStylesOnly())
1218 	  getDoc()->setAttrProp(NULL);
1219 
1220   _handleMetaData(&ps);
1221   wvText(&ps);
1222 
1223   if(getLoadStylesOnly()) {
1224     wvOLEFree(&ps);
1225     return UT_OK;
1226   }
1227 
1228   wvOLEFree(&ps);
1229 
1230   // We can't be in a good state if we didn't add any sections!
1231   if (m_nSections == 0)
1232     return UT_IE_BOGUSDOCUMENT;
1233 
1234   return UT_OK;
1235 }
1236 
_flush()1237 void IE_Imp_MsWord_97::_flush ()
1238 {
1239   if(!m_pTextRun.size())
1240 	return;
1241 
1242   // we've got to ensure that we're inside of a section & paragraph
1243   if (!m_bInSect)
1244 	{
1245 	  // append a blank default section - assume it works
1246 	  UT_DEBUGMSG(("#TF: _flush: appending default section\n"));
1247 	  _appendStrux(PTX_Section, NULL);
1248 	  m_bInSect = true;
1249 	  m_nSections++;
1250 	}
1251 
1252   pf_Frag * pF = getDoc()->getLastFrag();
1253   if (pF && pF->getType() == pf_Frag::PFT_Strux) {
1254 	  pf_Frag_Strux * pFS = (pf_Frag_Strux*)pF;
1255 	  if ((pFS->getStruxType() != PTX_Block) && (pFS->getStruxType() != PTX_EndFootnote) && (pFS->getStruxType() != PTX_EndEndnote))
1256 		  m_bInPara = false;
1257   }
1258 
1259   if(!m_bInPara)
1260   {
1261 	  // append a blank defaul paragraph - assume it works
1262 	  UT_DEBUGMSG(("#TF: _flush: appending default block\n"));
1263 	  _appendStrux(PTX_Block, NULL);
1264 	  m_bInPara = true;
1265 	  emObject * pObject = NULL;
1266 	  if(m_vecEmObjects.getItemCount() > 0)
1267 	  {
1268 		  UT_sint32 i =0;
1269 		  for(i=0;i< m_vecEmObjects.getItemCount(); i++)
1270 		  {
1271 			  pObject = m_vecEmObjects.getNthItem(i);
1272 			  const gchar* propsArray[5];
1273 			  if(pObject->objType == PTO_Bookmark)
1274 			  {
1275 				  propsArray[0] = static_cast<const gchar *>("name");
1276 				  propsArray[1] = static_cast<const gchar *>(pObject->props1.c_str());
1277 				  propsArray[2] = static_cast<const gchar *>("type");
1278 				  propsArray[3] = static_cast<const gchar *>(pObject->props2.c_str());
1279 				  propsArray[4] = static_cast<const gchar *>(NULL);
1280 				  _appendObject (PTO_Bookmark, propsArray);
1281 			  }
1282 			  else
1283 			  {
1284 				  UT_DEBUGMSG(("MSWord 97 _flush: Object not handled \n"));
1285 				  UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
1286 			  }
1287 			  delete pObject;
1288 		  }
1289 		  m_vecEmObjects.clear();
1290 	  }
1291   }
1292 
1293   if (m_pTextRun.size())
1294   {
1295 	  // bidi adjustments for neutrals
1296 	  //
1297 	  // We have a problem in bidi documents caused by the fact that
1298 	  // Word does not use the Unicode bidi algorithm, but rather one of
1299 	  // its own, which adds keyboard language to the equation. We get
1300 	  // around this by issuing an explicit direction override on the
1301 	  // neutral characters. We do it here in the _flush() function
1302 	  // because when we have both left and right context available
1303 	  // for these characters we can tell if the override is
1304 	  // superfluous, which it is most of the time; omitting the
1305 	  // sufperfluous overrides allows us to import documents in a
1306 	  // manner that will make them feel more like native AW docs.
1307 	  // (This does not get rid of all the unnecessary overrides, for
1308 	  // that we would need to have the text of an entire paragraph)
1309 	  //
1310 	  // I goes without saying that it would be highly desirable to be
1311 	  // able to determine at the start if a document is pure LTR (as
1312 	  // we do in the RTF importer), since that would save us lot of
1313 	  // extra processing
1314 	  // Tomas, May 8, 2003
1315 
1316 	  if(m_bBidiMode)
1317 	  {
1318 		  const gchar* pProps = "props";
1319 		  UT_String prop_basic = m_charProps;
1320 
1321 		  UT_String prop_ltr = prop_basic;
1322 		  UT_String prop_rtl = prop_basic;
1323 
1324 		  if(prop_basic.size())
1325 		  {
1326 			  prop_ltr += ";";
1327 			  prop_rtl += ";";
1328 		  }
1329 		  else
1330 		  {
1331 			  // if the char props are empty, we need replace them
1332 			  // with the following to avoid asserts in PP_AttrProp
1333 			  prop_basic = "dir-override:";
1334 		  }
1335 
1336 
1337 		  prop_ltr += "dir-override:ltr";
1338 		  prop_rtl += "dir-override:rtl";
1339 
1340 		  const gchar rev[] ="revision";
1341 
1342 		  const gchar* propsArray[5];
1343 		  propsArray[0] = pProps;
1344 		  propsArray[1] = prop_basic.c_str();
1345 		  propsArray[2] = NULL;
1346 		  propsArray[3] = NULL;
1347 		  propsArray[4] = NULL;
1348 
1349 		  UT_uint32 iEmptyAttrib = 2;
1350 
1351 		  if(m_charRevs.size())
1352 		  {
1353 			  propsArray[iEmptyAttrib++] = &rev[0];
1354 			  propsArray[iEmptyAttrib++] = m_charRevs.c_str();
1355 		  }
1356 
1357 		  const UT_UCS4Char * p;
1358 		  const UT_UCS4Char * pStart = m_pTextRun.ucs4_str();
1359 		  UT_uint32 iLen = m_pTextRun.size();
1360 
1361 		  UT_BidiCharType iOverride = UT_BIDI_UNSET, cType, cLastType = UT_BIDI_UNSET, cNextType;
1362 		  UT_uint32 iLast = 0;
1363 		  UT_UCS4Char c = *pStart;
1364 
1365 		  cType = UT_bidiGetCharType(c);
1366 
1367 		  for(UT_uint32 i = 0; i < iLen; i++)
1368 		  {
1369 			  if(i < iLen - 1 )
1370 			  {
1371 				  c = *(pStart+i+1);
1372 				  cNextType = UT_bidiGetCharType(c);
1373 			  }
1374 			  else
1375 			  {
1376 				  cNextType = UT_BIDI_UNSET;
1377 			  }
1378 
1379 
1380 			  if(UT_BIDI_IS_NEUTRAL(cType))
1381 			  {
1382 				  if(m_bLTRCharContext
1383 					 && iOverride != UT_BIDI_LTR
1384 					 && (cLastType != UT_BIDI_LTR || cNextType != UT_BIDI_LTR))
1385 				  {
1386 					  if(i - iLast > 0)
1387 					  {
1388 						  p = pStart + iLast;
1389 						  if(!_appendFmt(propsArray))
1390 							  return;
1391 
1392 						  if(!_appendSpan(p, i - iLast))
1393 							  return;
1394 					  }
1395 					  iOverride = UT_BIDI_LTR;
1396 					  propsArray[1] = prop_ltr.c_str();
1397 					  iLast = i;
1398 				  }
1399 				  else if(!m_bLTRCharContext
1400 						  && iOverride != UT_BIDI_RTL
1401 						  && (cLastType != UT_BIDI_RTL || cNextType != UT_BIDI_RTL))
1402 				  {
1403 					  if(i - iLast > 0)
1404 					  {
1405 						  p = pStart + iLast;
1406 						  if(!_appendFmt(propsArray))
1407 							  return;
1408 
1409 						  if(!_appendSpan(p, i - iLast))
1410 							  return;
1411 					  }
1412 					  iOverride = UT_BIDI_RTL;
1413 					  propsArray[1] = prop_rtl.c_str();
1414 					  iLast = i;
1415 				  }
1416 			  }
1417 			  else
1418 			  {
1419 				  // strong character; if we previously issued an override,
1420 				  // we need to cancel it
1421 				  if(iOverride != static_cast<UT_uint32>(UT_BIDI_UNSET))
1422 				  {
1423 					  if(i - iLast > 0)
1424 					  {
1425 						  p = pStart + iLast;
1426 						  if(!_appendFmt(propsArray))
1427 							  return;
1428 
1429 						  if(!_appendSpan(p, i - iLast))
1430 							  return;
1431 					  }
1432 					  iOverride = UT_BIDI_UNSET;
1433 					  propsArray[1] = prop_basic.c_str();
1434 					  iLast = i;
1435 				  }
1436 			  }
1437 
1438 			  cLastType = cType;
1439 			  cType = cNextType;
1440 		  }
1441 
1442 		  // insert what is left over
1443 		  if(iLen - iLast > 0)
1444 		  {
1445 			  p = pStart + iLast;
1446 			  if(!_appendFmt(propsArray))
1447 				  return;
1448 
1449 			  if(!_appendSpan(p, iLen - iLast))
1450 				  return;
1451 		  }
1452 	  }
1453 	  else
1454 	  {
1455 		  // non-bidi document, just do it the easy way
1456 		  if (!_appendSpan(m_pTextRun.ucs4_str(), m_pTextRun.size()))
1457 		  {
1458 			  UT_DEBUGMSG(("DOM: error appending text run\n"));
1459 			  return;
1460 		  }
1461 	  }
1462 
1463 	  m_pTextRun.clear();
1464   }
1465 }
1466 
_appendChar(UT_UCSChar ch)1467 void IE_Imp_MsWord_97::_appendChar (UT_UCSChar ch)
1468 {
1469   if (m_bInTable) {
1470     switch (ch) {
1471     case 7:			// eat tab characters
1472       return;
1473     case 30:		// ??
1474       ch = '-';
1475 		  break;
1476     }
1477   }
1478 
1479   if ( m_bIsLower )
1480     ch = UT_UCS4_tolower ( ch );
1481   m_pTextRun += ch;
1482 }
1483 
1484 /****************************************************************************/
1485 /****************************************************************************/
1486 
s_cmp_bookmarks_qsort(const void * a,const void * b)1487 static int s_cmp_bookmarks_qsort(const void * a, const void * b)
1488 {
1489 	const bookmark * A = static_cast<const bookmark *>(a);
1490 	const bookmark * B = static_cast<const bookmark *>(b);
1491 
1492 	if(A->pos != B->pos)
1493 		return (A->pos - B->pos);
1494 	else
1495 		// for bookmarks with identical position we want any start bookmarks to be
1496 		// before end bookmarks.
1497 		return static_cast<UT_sint32>(B->start) - static_cast<UT_sint32>(A->start);
1498 }
1499 
s_cmp_bookmarks_bsearch(const void * a,const void * b)1500 static int s_cmp_bookmarks_bsearch(const void * a, const void * b)
1501 {
1502 	UT_uint32 A = *static_cast<const UT_uint32 *>(a);
1503 	const bookmark * B = static_cast<const bookmark *>(b);
1504 
1505 	return (A - B->pos);
1506 }
1507 
_getBookmarkName(const wvParseStruct * ps,UT_uint32 pos)1508 gchar * IE_Imp_MsWord_97::_getBookmarkName(const wvParseStruct * ps, UT_uint32 pos)
1509 {
1510 	gchar *str;
1511 	UT_UTF8String sUTF8;
1512 
1513 	if(ps->Sttbfbkmk.extendedflag == 0xFFFF)
1514 	{
1515 		// 16 bit stuff
1516 		const UT_UCS2Char * p = static_cast<const UT_UCS2Char *>(ps->Sttbfbkmk.u16strings[pos]);
1517 		if(p) {
1518 		  UT_uint32 len  = UT_UCS2_strlen(p);
1519 		  sUTF8.clear();
1520 		  sUTF8.appendUCS2(p, len);
1521 
1522 		  str = new gchar[sUTF8.byteLength()+1];
1523 		  strcpy(str, sUTF8.utf8_str());
1524 		} else
1525 		  str = NULL;
1526 	}
1527 	else
1528 	{
1529 		// 8 bit stuff
1530 		// there is a bug in wv, and the table gets incorrectly retrieved
1531 		// if it contains 8-bit strings
1532 		if(ps->Sttbfbkmk.s8strings[pos])
1533 		{
1534 			UT_uint32 len = strlen(ps->Sttbfbkmk.s8strings[pos]);
1535 			str = new gchar[len + 1];
1536 			UT_uint32 i = 0;
1537 			for(i = 0; i < len; i++)
1538 				str[i] = ps->Sttbfbkmk.s8strings[pos][i];
1539 			str[i] = 0;
1540 		}
1541 		else
1542 			str = NULL;
1543 	}
1544 
1545 	return str;
1546 }
1547 
_docProc(wvParseStruct * ps,UT_uint32 tag)1548 int IE_Imp_MsWord_97::_docProc (wvParseStruct * ps, UT_uint32 tag)
1549 {
1550 	// flush out any pending character data
1551 	this->_flush ();
1552 
1553 	switch (static_cast<wvTag>(tag))
1554 	{
1555 	case DOCBEGIN:
1556 
1557 		// test the bidi nature of this document
1558 #ifdef BIDI_DEBUG
1559 		m_bBidiMode = wvIsBidiDocument(ps);
1560 		UT_DEBUGMSG(("IE_Imp_MsWord_97::_docProc: complex %d, bidi %d\n",
1561 					 ps->fib.fComplex,m_bBidiMode));
1562 #else
1563 		// for now we will assume that all documents are bidi
1564 		// documents (Tomas, Apr 12, 2003)
1565 
1566 		m_bBidiMode = false;
1567 #endif
1568 
1569 		m_bEvenOddHeaders = (ps->dop.fFacingPages != 0);
1570 
1571 		// import styles
1572 		_handleStyleSheet(ps);
1573 
1574 		if(getLoadStylesOnly())
1575 			return 1;
1576 
1577 		// deal with bookmarks
1578 		_handleBookmarks(ps);
1579 
1580 		// deal with footnotes and endnotes, headers
1581 		// first, get the doc offsets of the foot/endnote text
1582 		// (We are interested in the offset of this in the document,
1583 		// not in the data stream; therefore, we do not add
1584 		// ps->fib.fcMin for the simple doc
1585 		// Tthere are some strange docs around that have invalid
1586 		// values for the end of endnote section (e.g. the doc from
1587 		// bug 3283); that's what the if's are about.
1588 		m_iTextStart      = 0;
1589 		m_iTextEnd        = ps->fib.ccpText;
1590 		if(m_iTextEnd == 0xffffffff)
1591 			m_iTextEnd = m_iTextStart;
1592 
1593 		m_iFootnotesStart = m_iTextEnd;
1594 		m_iFootnotesEnd   = m_iFootnotesStart + ps->fib.ccpFtn;
1595 		if(m_iFootnotesEnd == 0xffffffff)
1596 			m_iFootnotesEnd = m_iFootnotesStart;
1597 
1598 		m_iHeadersStart   = m_iFootnotesEnd;
1599 		m_iHeadersEnd     = m_iHeadersStart + ps->fib.ccpHdr;
1600 		if(m_iHeadersEnd == 0xffffffff)
1601 			m_iHeadersEnd = m_iHeadersStart;
1602 
1603 		m_iMacrosStart    = m_iHeadersEnd;
1604 		m_iMacrosEnd      = m_iMacrosStart + ps->fib.ccpMcr;
1605 		if(m_iMacrosEnd == 0xffffffff)
1606 			m_iMacrosEnd = m_iMacrosStart;
1607 
1608 		m_iAnnotationsStart = m_iMacrosEnd;
1609 		m_iAnnotationsEnd = m_iAnnotationsStart + ps->fib.ccpAtn;
1610 		if(m_iAnnotationsEnd == 0xffffffff)
1611 			m_iAnnotationsEnd = m_iAnnotationsStart;
1612 
1613 		m_iEndnotesStart  = m_iAnnotationsEnd;
1614 		m_iEndnotesEnd    = m_iEndnotesStart + ps->fib.ccpEdn;
1615 		if(m_iEndnotesEnd == 0xffffffff)
1616 			m_iEndnotesEnd = m_iEndnotesStart;
1617 
1618 		m_iTextboxesStart = m_iEndnotesEnd;
1619 		m_iTextboxesEnd = m_iTextboxesStart + ps->fib.ccpTxbx;
1620 		UT_DEBUGMSG(("Size of all text in all textboxes %d \n", ps->fib.ccpTxbx));
1621 
1622 		if(m_iTextboxesEnd == 0xffffffff)
1623 			m_iTextboxesEnd = m_iTextboxesStart;
1624 		UT_DEBUGMSG(("  Found %d Positioned TextBoxes \n",ps->nooffspa));
1625 		// now retrieve the note info ...
1626 		_handleNotes(ps);
1627 		_handleHeaders(ps);
1628 		_handleTextBoxes(ps);
1629 
1630 		if(m_iAnnotationsEnd != m_iAnnotationsStart)
1631 			{
1632 				UT_DEBUGMSG(("Annotations of length %d in this doc \n",m_iAnnotationsEnd - m_iAnnotationsStart));
1633 			}
1634 		UT_DEBUGMSG(("Fnotes [%d,%d], Enotes [%d,%d]\n",
1635 					 m_iFootnotesStart, m_iFootnotesEnd, m_iEndnotesStart, m_iEndnotesEnd));
1636 
1637 		///////////////////////////////////////////////////////////////////////////////
1638 		// Set various revision states
1639 		//
1640 		// unlike Word:
1641 		//
1642 		//     * we do not differentiate between screen and print: we
1643 		//       print whatever is on screen
1644 		//
1645 		//     * if show revisions is off, Word shows what the
1646 		//       document looks like _after_ the last revision; by
1647 		//       default we show what it looked _before_ first
1648 		//       revision; we can show the post-revision state by
1649 		//       setting the view id to PD_MAX_REVISION
1650 		//
1651 		//     * we currently do not handle the fLockRev parameter
1652 		{
1653 			bool bShow = ps->dop.fRMView == 1 || ps->dop.fRMPrint == 1;
1654 
1655 			getDoc()->setShowRevisions(bShow);
1656 
1657 			if(!bShow)
1658 			{
1659 				getDoc()->setShowRevisionId(PD_MAX_REVISION);
1660 			}
1661 
1662 			getDoc()->setMarkRevisions(ps->dop.fRevMarking == 1);
1663 		}
1664 
1665 		break;
1666 
1667 	case DOCEND:
1668 		// we want to clean up fmt marks
1669 		getDoc()->purgeFmtMarks();
1670 		break;
1671 	default:
1672 		break;
1673 	}
1674 
1675 	return 0;
1676 }
1677 
_insertBookmark(bookmark * bm)1678 bool IE_Imp_MsWord_97::_insertBookmark(bookmark * bm)
1679 {
1680 	// first of all flush what is in the buffers
1681 	this->_flush();
1682 	bool error = false;
1683 
1684 	const gchar* propsArray[5];
1685 	propsArray[0] = static_cast<const gchar *>("name");
1686 	propsArray[1] = static_cast<const gchar *>(bm->name);
1687 	propsArray[2] = static_cast<const gchar *>("type");
1688 	propsArray[4] = 0;
1689 
1690 	if(bm->start)
1691 		propsArray[3] = static_cast<const gchar *>("start");
1692 	else
1693 		propsArray[3] = static_cast<const gchar *>("end");
1694 
1695 	if(m_bInTable && !m_bCellOpen)
1696 	{
1697 		emObject * pObject = new emObject;
1698 		pObject->props1 = propsArray[1];
1699 		pObject->objType = PTO_Bookmark;
1700 		pObject->props2 = propsArray[3];
1701 		m_vecEmObjects.addItem(pObject);
1702 	}
1703 	else
1704 	{
1705 //
1706 // Bookmarks need to be preceded by Blocks
1707 //
1708 		pf_Frag * pf = getDoc()->getLastFrag();
1709 		while(pf && pf->getType() != pf_Frag::PFT_Strux)
1710 		{
1711 			pf = pf->getPrev();
1712 		}
1713 		if(pf && (pf->getType() == pf_Frag::PFT_Strux) )
1714 		{
1715 			pf_Frag_Strux * pfs = static_cast<pf_Frag_Strux *>(pf);
1716 			if(pfs->getStruxType() != PTX_Block)
1717 			{
1718 				getDoc()->appendStrux(PTX_Block, NULL);
1719 			}
1720 		}
1721 		else if( pf == NULL)
1722 		{
1723 			getDoc()->appendStrux(PTX_Block, NULL);
1724 		}
1725 
1726 		if (!_appendObject (PTO_Bookmark, propsArray))
1727 		{
1728 			UT_DEBUGMSG (("Could not append bookmark object\n"));
1729 			error = true;
1730 		}
1731 	}
1732 	return error;
1733 }
1734 
_insertBookmarkIfAppropriate(UT_uint32 iDocPosition)1735 bool IE_Imp_MsWord_97::_insertBookmarkIfAppropriate(UT_uint32 iDocPosition)
1736 {
1737 	//now search for position iDocPosition in our bookmark list;
1738 	bookmark * bm, * lastBm;
1739 	if (m_iBookmarksCount == 0) {
1740 		bm = static_cast<bookmark*>(NULL);
1741 	}
1742 	else {
1743 		bm = static_cast<bookmark*>( bsearch(static_cast<const void *>(&iDocPosition),
1744 				m_pBookmarks, m_iBookmarksCount, sizeof(bookmark),
1745 				s_cmp_bookmarks_bsearch));
1746 	}
1747 	bool error = false;
1748 	if(bm)
1749 	{
1750 	   // there is a bookmark at the current position
1751 	   // first make sure the returned bookmark is the first one at this position
1752 	   while(bm > m_pBookmarks && (bm - 1)->pos == iDocPosition)
1753 		   bm--;
1754 
1755 	   lastBm = &m_pBookmarks[m_iBookmarksCount];
1756 
1757 	   while(bm < lastBm && bm->pos == iDocPosition)
1758 		  error |= _insertBookmark(bm++);
1759 	}
1760 	return error;
1761 }
1762 
_charProc(wvParseStruct * ps,U16 eachchar,U8 chartype,U16 lid)1763 int IE_Imp_MsWord_97::_charProc (wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid)
1764 {
1765 	// make sure we are not past the end of the document ...
1766 	// this can happen with some complex documents
1767 	if(ps->currentcp >= m_iTextboxesEnd)
1768 	{
1769 		UT_DEBUGMSG(("IE_Imp_MsWord_97::_charProc: processing past end of document !!! %d \n",ps->currentcp ));
1770 		return 0;
1771 	}
1772 
1773 	// reset the page break tracker
1774 	if(m_bPageBreakPending)
1775 	{
1776 		// we have a page break pending, and being here means that it
1777 		// was not a seciton break; we have to append it first and
1778 		// then continue normal processing
1779 		this->_appendChar (UCS_FF);
1780 		m_bPageBreakPending = false;
1781 	}
1782 
1783 	// reset the page break tracker
1784 	if(m_bLineBreakPending)
1785 	{
1786 		// we have a line break pending
1787 		this->_appendChar (UCS_LF);
1788 		m_bLineBreakPending = false;
1789 	}
1790 
1791 	if(!_handleHeadersText(ps->currentcp,true))
1792 		return 0;
1793 	if(!_handleNotesText(ps->currentcp))
1794 		return 0;
1795 	if(!_handleTextboxesText(ps->currentcp))
1796 		return 0;
1797 
1798 	// insert any required bookmarks, but only if we are not in a
1799 	// field ...
1800 	if(!ps->fieldstate)
1801 		_insertBookmarkIfAppropriate(ps->currentcp);
1802 
1803 	if(_insertNoteIfAppropriate(ps->currentcp,eachchar))
1804 		return 0;
1805 
1806 	// convert incoming character to unicode
1807 	if (chartype)
1808 		eachchar = wvHandleCodePage(eachchar, lid);
1809 
1810 	switch (eachchar)
1811 	{
1812 
1813 	case 11: // forced line break
1814 		eachchar = UCS_LF;
1815 		break;
1816 
1817 	case 12: // page or section break
1818 		this->_flush ();
1819 		//eachchar = UCS_FF;
1820 		// we will not append page breaks to the buffer, only mark it
1821 		// as pending append; that will allow us later to decide if we
1822 		// should or should not appended (we want to remove any page
1823 		// break that is at an end of a section
1824 		m_bPageBreakPending = true;
1825 		return 0;
1826 
1827 	case 13: // end of paragraph
1828 	  this->_flush();
1829 	  // see bug 9370
1830 	  // <delackner> aaah actually, Cocoa's writer is *definitely* broken
1831 	  // <delackner> ms word thinks the second para is part of the first, but broken with a non-paragraph-breaking-line-break
1832 	  // so we'll treat this like msword does
1833 	  m_bLineBreakPending = true;
1834 	  return 0;
1835 
1836 	case 14: // column break
1837 		eachchar = UCS_VTAB;
1838 		break;
1839 
1840 	case 19: // field begin
1841 		this->_flush ();
1842 		ps->fieldstate++;
1843 		ps->fieldmiddle = 0;
1844 		this->_fieldProc (ps, eachchar, chartype, lid);
1845 		return 0;
1846 
1847 	case 20: // field separator; some docs have spurious 0x14's in
1848 			 // them, see bug 3745
1849 		if (ps->fieldstate)
1850 		{
1851 			this->_fieldProc (ps, eachchar, chartype, lid);
1852 			ps->fieldmiddle = 1;
1853 		}
1854 		return 0;
1855 
1856 	case 21: // field end
1857 		if (ps->fieldstate)
1858 		{
1859 			ps->fieldstate--;
1860 			ps->fieldmiddle = 0;
1861 			this->_fieldProc (ps, eachchar, chartype, lid);
1862 		}
1863 		return 0;
1864 	}
1865 
1866 	// i'm not sure if this is needed any more
1867 	// yes, it is, for instance hyperlinks need it
1868 	if (ps->fieldstate)
1869 	{
1870 		xxx_UT_DEBUGMSG(("DOM: fieldstate\n"));
1871 		if(this->_fieldProc (ps, eachchar, chartype, lid))
1872 		{
1873 			return 0;
1874 		}
1875 	}
1876 
1877 	// take care of any oddities in Microsoft's character encoding
1878 	if (chartype == 1 && eachchar == 146)
1879 		eachchar = 39; // apostrophe
1880 
1881 	if(m_bSymbolFont)
1882 	{
1883 		eachchar &= 0x00ff;
1884 	}
1885 
1886 	// see bug 9370. we probably got a char 13, but no open paragraph.
1887 	if(!m_bInPara) {
1888 	  this->_appendChar (UCS_LF);
1889 	  _flush();
1890 	}
1891 
1892 	this->_appendChar (static_cast<UT_UCSChar>(eachchar));
1893 
1894 	return 0;
1895 }
1896 
_specCharProc(wvParseStruct * ps,U16 eachchar,CHP * achp)1897 int IE_Imp_MsWord_97::_specCharProc (wvParseStruct *ps, U16 eachchar, CHP *achp)
1898 {
1899 	// make sure we are not past the end of the document ...
1900 	// this can happen with some complex documents
1901 	if(ps->currentcp >= m_iTextboxesEnd)
1902 	{
1903 		UT_DEBUGMSG(("IE_Imp_MsWord_97::_specCharProc: processing past end of document !!!\n"));
1904 		return 0;
1905 	}
1906 
1907 	Blip blip;
1908 	long pos;
1909 	FSPA * fspa;
1910 	//FDOA * fdoa;
1911 #ifdef SUPPORTS_OLD_IMAGES
1912 	wvStream *fil;
1913 	PICF picf;
1914 #endif
1915 
1916 	if(!_handleHeadersText(ps->currentcp,true))
1917 		return 0;
1918 
1919 	if(!_handleNotesText(ps->currentcp))
1920 		return 0;
1921 
1922 	if(!_handleTextboxesText(ps->currentcp))
1923 		return 0;
1924 
1925 	// insert any required bookmarks, but only if we are not in a
1926 	// field ...
1927 	if(!ps->fieldstate)
1928 		_insertBookmarkIfAppropriate(ps->currentcp);
1929 
1930 	if(_insertNoteIfAppropriate(ps->currentcp,0))
1931 		return 0;
1932 
1933 	if(eachchar == 0x28)
1934 	{
1935 		// this is a symbol; the font is identified by achp->ftcSym and the char code is
1936 		// achp->xchSym
1937 		this->_appendChar(achp->xchSym);
1938 		return 0;
1939 	}
1940 
1941 	//
1942 	// This next bit of code is to handle fields
1943 	//
1944 
1945 	switch (eachchar)
1946 	{
1947 
1948 	case 19: // field begin
1949 		this->_flush ();
1950 		ps->fieldstate++;
1951 		ps->fieldmiddle = 0;
1952 		this->_fieldProc (ps, eachchar, 0, 0x400);
1953 		return 0;
1954 
1955 	case 20: // field separator
1956 		if (achp->fOle2)
1957 		{
1958 			UT_DEBUGMSG(("Field has an associated embedded OLE object\n"));
1959 		}
1960 		ps->fieldmiddle = 1;
1961 		this->_fieldProc (ps, eachchar, 0, 0x400);
1962 		return 0;
1963 
1964 	case 21: // field end
1965 		ps->fieldstate--;
1966 		ps->fieldmiddle = 0;
1967 		this->_fieldProc (ps, eachchar, 0, 0x400);
1968 		return 0;
1969 
1970 	}
1971 
1972 	/* it seems some fields characters slip through here which tricks
1973 	 * the import into thinking it has an image with it really does
1974 	 * not. this catches special characters in a field
1975 	 */
1976 	if (ps->fieldstate) {
1977 		if (this->_fieldProc(ps, eachchar, 0, 0x400))
1978 			return 0;
1979 	}
1980 
1981 	//
1982 	// This next bit of code is to handle OLE2 embedded objects and images
1983 	//
1984 
1985 	switch (eachchar)
1986 	{
1987 	case 0x01: // Older ( < Word97) image, currently not handled very well
1988 		if (achp->fOle2) {
1989 			UT_DEBUGMSG(("embedded OLE2 component. currently unsupported"));
1990 			return 0;
1991 		}
1992 
1993 		pos = wvStream_tell(ps->data);
1994 
1995 #ifdef SUPPORTS_OLD_IMAGES
1996 		UT_DEBUGMSG(("Pre W97 Image format.\n"));
1997 		wvStream_goto(ps->data, achp->fcPic_fcObj_lTagObj);
1998 
1999 		if (1 == wvGetPICF(wvQuerySupported(&ps->fib, NULL), &picf,
2000 						   ps->data) && NULL != picf.rgb)
2001 		{
2002 			fil = picf.rgb;
2003 
2004 			if (wv0x01(&blip, fil, picf.lcb - picf.cbHeader))
2005 			{
2006 				this->_handleImage(&blip, picf.mx * picf.dxaGoal / 1000, picf.my * picf.dyaGoal / 1000, picf.dyaCropTop, picf.dyaCropBottom, picf.dxaCropLeft, picf.dxaCropRight);
2007 			}
2008 			else
2009 			{
2010 				UT_DEBUGMSG(("Dom: no graphic data\n"));
2011 			}
2012 
2013 			wvStream_goto(ps->data, pos);
2014 
2015 			return 0;
2016 		}
2017 		else
2018 		{
2019 			UT_DEBUGMSG(("Couldn't import graphic!\n"));
2020 			return 0;
2021 		}
2022 #else
2023 		UT_DEBUGMSG(("DOM: 0x01 graphics support is disabled at the moment\n"));
2024 		wvStream_goto(ps->data, pos);
2025 
2026 		return 0;
2027 #endif
2028 		break;
2029 	case 0x08: // Word 97, 2000, XP image
2030 		if (wvQuerySupported(&ps->fib, NULL) >= WORD8) // sanity check
2031 		{
2032 			if (ps->nooffspa > 0)
2033 			{
2034 
2035 				fspa = wvGetFSPAFromCP(ps->currentcp, ps->fspa,
2036 									   ps->fspapos, ps->nooffspa);
2037 
2038 				if(!fspa)
2039 				{
2040 					UT_DEBUGMSG(("No fspa! Panic and Insanity Abounds!\n"));
2041 					return 0;
2042 				}
2043 				UT_DEBUGMSG(("Found a psfa! \n"));
2044 				double dLeft,dRight,dTop,dBottom = 0.0;
2045 				dLeft = static_cast<double>(fspa->xaLeft)/1440.0;
2046 				dRight = static_cast<double>(fspa->xaRight)/1440.0;
2047 				dTop = static_cast<double>(fspa->yaTop)/1440.0;
2048 				dBottom = static_cast<double>(fspa->yaBottom)/1440.0;
2049 				UT_DEBUGMSG(("Left %f Right %f Top %f Bottom %f \n",dLeft,dRight,dTop,dBottom));
2050 				UT_DEBUGMSG(("spid %d cTxbx %d \n",fspa->spid,fspa->cTxbx));
2051 				UT_DEBUGMSG(("fHdr %d bx %d by %d wr %d wrk %d fRcaSimple %d fBelowText %d fAnchorLock %d \n",fspa->fHdr,fspa->bx,fspa->by,fspa->wr,fspa->wrk,fspa->fRcaSimple,fspa->fBelowText,fspa->fAnchorLock));
2052 				UT_String sImageName;
2053 				bool bPositionObject = false;
2054 				if (wv0x08(&blip, fspa->spid, ps))
2055 				{
2056 //
2057 // FIXME! Put some code in here to make this use Sectionframes!!
2058 //
2059 					UT_DEBUGMSG(("!!!!Found a blip in a fspa!!!!!!!!!! \n"));
2060 					if(UT_OK == this->_handlePositionedImage(&blip, sImageName))
2061 					   bPositionObject = true;
2062 				}
2063 				bool isTextBox = false;
2064 				UT_uint32 textOff = 0;
2065 				UT_uint32 i;
2066 				escherstruct item;
2067 				FSPContainer *answer = NULL;
2068 
2069 				UT_DEBUGMSG(("IE_Imp_MsWord_97:: escher: ps->fib.fcDggInfo %d ps->fib.lcbDggInfo %d \n", ps->fib.fcDggInfo,ps->fib.lcbDggInfo));
2070 				wvGetEscher (&item, ps->fib.fcDggInfo, ps->fib.lcbDggInfo, ps->tablefd,
2071 							 ps->mainfd);
2072 				for (i = 0; i < item.dgcontainer.no_spgrcontainer; i++)
2073 				{
2074 					answer = wvFindSPID (&(item.dgcontainer.spgrcontainer[i]), fspa->spid);
2075 					if (answer)
2076 					{
2077 						break;
2078 					}
2079 				}
2080 				if(answer != NULL)
2081 				{
2082 					ClientTextbox cTextBox = answer->clienttextbox;
2083 					if(cTextBox.textid != NULL)
2084 					{
2085 						isTextBox = true;
2086 						textOff = *cTextBox.textid;
2087 						UT_DEBUGMSG(("Found a Text box! text offset is.. %d \n",textOff));
2088 					}
2089                     // passing struct to format parameter. WTF?
2090 					xxx_UT_DEBUGMSG((" clienttextbox %x clientdata %x \n",answer->clienttextbox,answer->clientdata));
2091 				}
2092 				if(isTextBox || bPositionObject)
2093 				{
2094 //				if(answer != NULL)
2095 //				{
2096 					const char * atts[] = {NULL,NULL,NULL,NULL,NULL,NULL};
2097 					if(bPositionObject && sImageName.size())
2098 					{
2099 					  atts[0] =  PT_STRUX_IMAGE_DATAID;
2100 					  atts[1] = sImageName.c_str();
2101 					  atts[2] = "props";
2102 					}
2103 					else
2104 					{
2105 					  atts[0] = "props";
2106 					}
2107 					UT_String sProp;
2108 					UT_String sProps;
2109 					UT_String sVal;
2110 					sProps.clear();
2111 					sProps = "frame-type:";
2112 					if(isTextBox)
2113 					{
2114 					  sProps += "textbox; ";
2115 					}
2116 					else
2117 					{
2118 					  sProps += "image; ";
2119 					}
2120 					sProps += "position-to:";
2121 					if(fspa->by ==2)
2122 					{
2123 						sVal = "block-above-text; ";
2124 					}
2125 					else if(fspa->by ==0)
2126 					{
2127 						sVal = "column-above-text; ";
2128 					}
2129 					else if(fspa->by ==1)
2130 					{
2131 						sVal = "page-above-text; "; // should be page-above-text
2132 					}
2133 					sProps += sVal;
2134 					sProps += "wrap-mode:";
2135 					if(fspa->wr == 3)
2136 					{
2137 					  sVal = "above-text; ";
2138 					}
2139 					else
2140 					{
2141 						sVal = "wrapped-both; ";
2142 					}
2143        					if(fspa->fBelowText == 1 && fspa->wr == 3)
2144 				        {
2145 					     UT_DEBUGMSG(("Set Below Text \n"));
2146 					     sVal = "below-text; ";
2147 					}
2148 					sProps += sVal;
2149 					sProps += "xpos:";
2150 					UT_String_sprintf(sVal,"%f",dLeft);
2151 					sVal += "in; ";
2152 
2153 					sProps += sVal;
2154 					sProps += "ypos:";
2155 					UT_String_sprintf(sVal,"%f",dTop);
2156 					sVal += "in; ";
2157 
2158 					sProps += sVal;
2159 					sProps += "frame-col-xpos:";
2160 					UT_String_sprintf(sVal,"%f",dLeft);
2161 					sVal += "in; ";
2162 
2163 					sProps += sVal;
2164 					sProps += "frame-col-ypos:";
2165 					UT_String_sprintf(sVal,"%f",dTop);
2166 					sVal += "in; ";
2167 					sProps += sVal;
2168 
2169 					sProps += "frame-width:";
2170 					UT_String_sprintf(sVal,"%f",dRight-dLeft);
2171 					sVal += "in; ";
2172 					sProps += sVal;
2173 
2174 					UT_DEBUGMSG(("Inserting Frame of width %s \n",sVal.c_str()));
2175 					sProps += "frame-height:";
2176 					UT_String_sprintf(sVal,"%f",dBottom-dTop);
2177 					sVal += "in";
2178 					sProps += sVal;
2179 //
2180 // Turn off the borders.
2181 //
2182 					if(bPositionObject && !isTextBox)
2183 					{
2184 					  sProp = "top-style";
2185 					  sVal = "none";
2186 					  UT_String_setProperty(sProps,sProp,sVal);
2187 					  sProp = "right-style";
2188 					  UT_String_setProperty(sProps,sProp,sVal);
2189 					  sProp = "left-style";
2190 					  UT_String_setProperty(sProps,sProp,sVal);
2191 					  sProp = "bot-style";
2192 					  UT_String_setProperty(sProps,sProp,sVal);
2193 					}
2194 					if(bPositionObject)
2195 					{
2196 					  atts[3] = sProps.c_str();
2197 					}
2198 					else
2199 					{
2200 					  atts[1] = sProps.c_str();
2201 					}
2202 					_appendStrux(PTX_SectionFrame,atts);
2203 					_appendStrux(PTX_EndFrame,atts);
2204 					if(isTextBox)
2205 					{
2206 					  textboxPos * pPos = new textboxPos;
2207 					  pPos->lid = fspa->spid;
2208 					  PT_DocPosition posEnd =0;
2209 					  getDoc()->getBounds(true,posEnd); // clean frags!
2210 
2211 					  pPos->endFrame = getDoc()->getLastFrag();
2212 					  m_vecTextboxPos.addItem(pPos);
2213 					}
2214 					wvReleaseEscher (&item);
2215 					return true;
2216 				}
2217 				wvReleaseEscher (&item);
2218 			}
2219 			else
2220 			{
2221 				xxx_UT_DEBUGMSG(("nooffspa was <= 0 -- ignoring"));
2222 			}
2223 		}
2224 		else
2225 		{
2226 			UT_DEBUGMSG(("pre Word8 0x08 graphic -- unsupported at the moment"));
2227 			/*fdoa =*/ wvGetFDOAFromCP(ps->currentcp, NULL, ps->fdoapos,
2228 								   ps->nooffdoa);
2229 
2230 			// TODO: do something with the data in this fdoa someday...
2231 		}
2232 
2233 		return 0;
2234 	}
2235 
2236 	return 0;
2237 }
2238 
_beginComment(wvParseStruct *,UT_uint32,void *,int)2239 int IE_Imp_MsWord_97::_beginComment(wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
2240 					void * /*props*/, int /*dirty*/)
2241 {
2242   UT_DEBUGMSG(("DOM: begin comment\n"));
2243   return 0;
2244 }
2245 
_endComment(wvParseStruct *,UT_uint32,void *,int)2246 int IE_Imp_MsWord_97::_endComment(wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
2247 				  void * /*props*/, int /*dirty*/)
2248 {
2249   UT_DEBUGMSG(("DOM: begin comment\n"));
2250   return 0;
2251 }
2252 
2253 
_eleProc(wvParseStruct * ps,UT_uint32 tag,void * props,int dirty)2254 int IE_Imp_MsWord_97::_eleProc(wvParseStruct *ps, UT_uint32 tag,
2255 							   void *props, int dirty)
2256 {
2257 	// make sure we are not past the end of the document ...
2258 	// this can happen with some complex documents
2259 	if(ps->currentcp >= m_iTextboxesEnd)
2260 	{
2261 		UT_DEBUGMSG(("IE_Imp_MsWord_97::_eleProc: processing past end of document !!! %d \n",ps->currentcp >= m_iTextboxesEnd));
2262 		return 0;
2263 	}
2264 
2265 	//
2266 	// Marshall these off to the correct handlers
2267 	//
2268 
2269 	switch (static_cast<wvTag>(tag))
2270 	{
2271 
2272 	case SECTIONBEGIN:
2273 		return _beginSect (ps, tag, props, dirty);
2274 
2275 	case SECTIONEND:
2276 		return _endSect (ps, tag, props, dirty);
2277 
2278 	case PARABEGIN:
2279 		return _beginPara (ps, tag, props, dirty);
2280 
2281 	case PARAEND:
2282 		return _endPara (ps, tag, props, dirty);
2283 
2284 	case CHARPROPBEGIN:
2285 		return _beginChar (ps, tag, props, dirty);
2286 
2287 	case CHARPROPEND:
2288 		return _endChar (ps, tag, props, dirty);
2289 
2290 	case COMMENTBEGIN:
2291 	  return _beginComment (ps, tag, props, dirty);
2292 
2293 	case COMMENTEND:
2294 	  return _endComment (ps, tag, props, dirty);
2295 
2296 	default:
2297 	  UT_ASSERT_NOT_REACHED();
2298 
2299 	}
2300 
2301 	return 0;
2302 }
2303 
2304 /****************************************************************************/
2305 /****************************************************************************/
2306 
_beginSect(wvParseStruct *,UT_uint32,void * prop,int)2307 int IE_Imp_MsWord_97::_beginSect (wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
2308 				  void *prop, int /*dirty*/)
2309 {
2310 	SEP * asep = static_cast <SEP *>(prop);
2311 
2312 	const gchar * propsArray[15];
2313 	UT_String propBuffer;
2314 	UT_String props;
2315 
2316 	// flush any character runs
2317 	this->_flush ();
2318 
2319 	m_iCurrentSectId++;
2320 
2321 	// first we need to deal with page size, because setting page size
2322 	// resets all margins to the AW defaults
2323 	// Sevior: Only do this ONCE!!! Abiword can only handle one page size.
2324 	if(!m_bSetPageSize)
2325 	{
2326 		// all of this data is related to Abi's <pagesize> tag
2327 		m_bSetPageSize = true;
2328 		double page_width  = 0.0;
2329 		double page_height = 0.0;
2330 		double page_scale  = 1.0;
2331 
2332 		if (asep->dmOrientPage == 1)
2333 			getDoc()->m_docPageSize.setLandscape ();
2334 		else
2335 			getDoc()->m_docPageSize.setPortrait ();
2336 
2337 		page_width = asep->xaPage / 1440.0;
2338 		page_height = asep->yaPage / 1440.0;
2339 
2340 		// PROBLEM: there are two separate and independent page sizes
2341 		// given to us, one by the explicit width and height and one
2342 		// by the requested paper size, and we need to decide which
2343 		// one we should follow. There are three scenarios
2344 		//   (1) the explicit size and paper match
2345 		//   (2) the explicit size and paper do not match
2346 		//       (a) the explicit size is the Word default (Letter)
2347 		//       (b) the explicit size is something else than the defaults
2348 		//
2349 		// In case (1) we use the requested paper. Case (2a) happens
2350 		// when the user changes the page size by requesting a
2351 		// different paper size but does not touch the width and
2352 		// height controls -- we use the paper size. Case (2b) happens
2353 		// when the user changes size by the with and height controls;
2354 		// the paper request stored is the one that was in place
2355 		// before the manual adjustment and is no longer valid, so we
2356 		// use the explicit width and height.
2357 
2358 		// decide if the explicit width and height are valid, i.e., if
2359 		// they contain the Word defaults the paper request has to be
2360 		// 0 (Letter)
2361 		bool bDoNotUseSize = (asep->xaPage == 12240 &&
2362 							  asep->yaPage == 15840 &&
2363 							  asep->dmPaperReq != 0);
2364 
2365 
2366 		xxx_UT_DEBUGMSG(("DOM: pagesize: landscape: %d, width: %f, height: %f, paper-type: %d\n",
2367 					 asep->dmOrientPage, page_width, page_height, asep->dmPaperReq));
2368 
2369 		// map paper to AW page size name string ...
2370 		const char * paper_name = s_mapPageIdToString (asep->dmPaperReq);
2371 
2372 		// check if the paper name is valid (i.e., there is a match
2373 		// between the name and the sizes; if not, we use only the sizes
2374 		bool bPaperNameValid = (paper_name != NULL);
2375 
2376 		if(bPaperNameValid)
2377 		{
2378 			// construct an instance of fp_PageSize for this paper
2379 			// request; we will use this to verify whether its
2380 			// dimensions match those stored in the explicit width and
2381 			// height but also we will determine appropriate units to
2382 			// be used (i.e., we want to use inches for Letter but
2383 			// metric units for A4, etc.)
2384 			fp_PageSize PageSize(paper_name);
2385 
2386 			// if we know that the explicit size is not valid, we do
2387 			// not need any further checking
2388 			if(!bDoNotUseSize)
2389 			{
2390 				// in order to minimize effect of rounding errors, we are
2391 				// better doing the comparison in the twipses; the MS
2392 				// values suffer from rounding (?) error which is quite
2393 				// significant, so we will round to the second least
2394 				// significant digit
2395 
2396 				double w = PageSize.Width(DIM_IN) * 1440.0;
2397 				double h = PageSize.Height(DIM_IN) * 1440.0;
2398 
2399 				UT_uint32 iPaperW10 = ((UT_uint32) w)/10 + (((UT_uint32) w)%10 >= 5 ? 1 : 0);
2400 				UT_uint32 iPaperH10 = ((UT_uint32) h)/10 + (((UT_uint32) h)%10 >= 5 ? 1 : 0);
2401 
2402 				UT_uint32 iPageW10 = asep->xaPage/10 + (asep->xaPage%10 >= 5 ? 1 : 0);
2403 				UT_uint32 iPageH10 = asep->yaPage/10 + (asep->yaPage%10 >= 5 ? 1 : 0);
2404 
2405 				if(iPageW10 != iPaperW10 ||
2406 				   iPageH10 != iPaperH10)
2407 				{
2408 					bPaperNameValid = false;
2409 				}
2410 			}
2411 
2412 			// if we are to use the paper name, then get the
2413 			// dimensions to be used ...
2414 			if(bPaperNameValid)
2415 			{
2416 				m_dim = PageSize.getDims();
2417 			}
2418 		}
2419 
2420 		if (bPaperNameValid)
2421 		{
2422 			getDoc()->m_docPageSize.Set (paper_name);
2423 		}
2424 		else
2425 		{
2426 			getDoc()->m_docPageSize.Set ("Custom");
2427 			getDoc()->m_docPageSize.Set (page_width, page_height, DIM_IN);
2428 			getDoc()->m_docPageSize.setScale(page_scale);
2429 		}
2430 	} // end of page size stuff
2431 
2432 	if(asep->fBidi)
2433 	{
2434 		// this is an RTL section, set dominant direction to rtl
2435 		props += "dom-dir:rtl;";
2436 	}
2437 	else
2438 	{
2439 		// this is an LTR section, we want to set the direction
2440 		// explicitely so that we do not end up with wrong default
2441 		props += "dom-dir:ltr;";
2442 	}
2443 
2444 
2445 	if(asep->fPgnRestart)
2446 	{
2447 		// set to 1 when page numbering should be restarted at the beginning of this section
2448 		props += "section-restart:1;";
2449 	}
2450 
2451 	// user specified starting page number
2452 	UT_String_sprintf(propBuffer, "section-restart-value:%d;", asep->pgnStart);
2453 	props += propBuffer;
2454 
2455 	// columns
2456 	if (asep->ccolM1) {
2457 		// number of columns
2458 		UT_String_sprintf(propBuffer,"columns:%d;", (asep->ccolM1+1));
2459 		props += propBuffer;
2460 
2461 		// columns gap
2462 		UT_String_sprintf(propBuffer,"column-gap:%s;",
2463 			UT_convertInchesToDimensionString(m_dim,
2464 											  (static_cast<double>(asep->dxaColumns) / 1440)));
2465 		props += propBuffer;
2466 	}
2467 
2468 	// draw a vertical line between columns
2469 	if (asep->fLBetween == 1)
2470 	{
2471 		props += "column-line:on;";
2472 	}
2473 
2474 	// space after section (gutter)
2475 	UT_String_sprintf(propBuffer,"section-space-after:%s;",
2476 			UT_convertInchesToDimensionString(m_dim,
2477 											  (static_cast<double>(asep->dzaGutter) / 1440)));
2478 	props += propBuffer;
2479 
2480 	//
2481 	// TODO: section breaks
2482 	//
2483 
2484 	// page-margin-left
2485 	UT_String_sprintf(propBuffer, "page-margin-left:%s;",
2486 			UT_convertInchesToDimensionString(m_dim,
2487 											  (static_cast<double>(asep->dxaLeft) / 1440)));
2488 	props += propBuffer;
2489 
2490 	// page-margin-right
2491 	UT_String_sprintf(propBuffer, "page-margin-right:%s;",
2492 			UT_convertInchesToDimensionString(m_dim,
2493 											  (static_cast<double>(asep->dxaRight) / 1440)));
2494 	props += propBuffer;
2495 
2496 	// page-margin-top
2497 	UT_String_sprintf(propBuffer, "page-margin-top:%s;",
2498 			UT_convertInchesToDimensionString(m_dim,
2499 											  (static_cast<double>(asep->dyaTop) / 1440)));
2500 	props += propBuffer;
2501 
2502 	// page-margin-bottom
2503 	UT_String_sprintf(propBuffer, "page-margin-bottom:%s;",
2504 			UT_convertInchesToDimensionString(m_dim,
2505 											  (static_cast<double>(asep->dyaBottom)/1440)));
2506 	props += propBuffer;
2507 
2508 	// page-margin-header
2509 	UT_String_sprintf(propBuffer, "page-margin-header:%s;",
2510 			UT_convertInchesToDimensionString(m_dim,
2511 											  (static_cast<double>(asep->dyaHdrTop)/1440)));
2512 	props += propBuffer;
2513 
2514 	// page-margin-footer (word's footer is measured from the bottom
2515 	// edge of the page -- contrary to the docs -- our's from the
2516 	// bottom margin of the page)
2517 	double dFooter = static_cast<double>(asep->dyaBottom) - static_cast<double>(asep->dyaHdrBottom);
2518 	if(dFooter < 0)
2519 	{
2520 		dFooter = -dFooter;
2521 	}
2522 	dFooter = dFooter/1440.;
2523 	UT_String_sprintf(propBuffer, "page-margin-footer:%s",
2524 					  UT_convertInchesToDimensionString(m_dim,dFooter));
2525 	props += propBuffer;
2526 	xxx_UT_DEBUGMSG (("DOM:SEVIOR the section properties are: '%s'\n", props.c_str()));
2527 
2528 
2529 	propsArray[0] = static_cast<const gchar *>("props");
2530 	propsArray[1] = static_cast<const gchar *>(props.c_str());
2531 
2532 	UT_uint32 iOff = 2;
2533 
2534 	// headers/footers
2535 	UT_String id[6];
2536 	UT_uint32 iId = 0;
2537 
2538 	// see _handleHeaders() on the contents of the m_pHeaders array,
2539 	// it will make this maths clear (m_iCurrentSectId is 1-based
2540 	// indx)
2541 	// For each section in the document they are six headers/footers;
2542 	// each of these can be in 3 states:
2543 	//      length  > 2: proper header, use it
2544 	//      length == 2: empty header; no header to be inserted
2545 	//      length == 0: use header from the previous section
2546 
2547 	if ((m_iCurrentSectId - 1)*6 + 6 < m_iHeadersCount)
2548 	{
2549 		// there are headers defined for this section
2550 		UT_uint32 i = 6 + (m_iCurrentSectId - 1)*6;
2551 		UT_uint32 j = i + 6;
2552 		UT_sint32 k;
2553 
2554 		for( ; i < j && i < m_iHeadersCount; i++)
2555 		{
2556 			// skip any unsupported or empty headers
2557 			if(m_pHeaders[i].type == HF_Unsupported || m_pHeaders[i].len == 2)
2558 			{
2559 				continue;
2560 			}
2561 
2562 			// if this is a first page hdr/ftr we only use it if appropriate
2563 			if(   (m_pHeaders[i].type == HF_HeaderFirst && !asep->fTitlePage)
2564 			   || (m_pHeaders[i].type == HF_FooterFirst && !asep->fTitlePage))
2565 			{
2566 				// we want to change the type to unsupported to stop it from being
2567 				// inserted into the document
2568 				m_pHeaders[i].type = HF_Unsupported;
2569 				continue;
2570 			}
2571 
2572 			k = i;
2573 #if 0
2574 			// For now this code is going to be disabled, since a
2575 			// present AW sections cannot share headers, and this type
2576 			// of a header needs to be replaced by a physical copy of
2577 			// the previous meaningul header
2578 			if(m_pHeaders[i].len == 0)
2579 			{
2580 				// this is the case where the section is to use the
2581 				// header of a previous section -- scroll back until
2582 				// we find one
2583 				k -= 6;
2584 				bool bContinue = false;
2585 
2586 				while(k > 5)
2587 				{
2588 					if(m_pHeaders[k].len == 2)
2589 					{
2590 						// found empty header
2591 						bContinue = true;
2592 						break;
2593 					}
2594 					else if(m_pHeaders[k].len == 0)
2595 					{
2596 						// try one section ahead
2597 						k -= 6;
2598 					}
2599 					else
2600 					{
2601 						// found a meaningful header
2602 						break;
2603 					}
2604 				}
2605 
2606 				if(bContinue || k < 6)
2607 				{
2608 					continue;
2609 				}
2610 			}
2611 #endif
2612 			switch(m_pHeaders[k].type)
2613 			{
2614 				case HF_HeaderEven:
2615 					propsArray[iOff++] = "header-even";
2616 					break;
2617 				case HF_FooterEven:
2618 					propsArray[iOff++] = "footer-even";
2619 					break;
2620 				case HF_HeaderOdd:
2621 					propsArray[iOff++] = "header";
2622 					break;
2623 				case HF_FooterOdd:
2624 					propsArray[iOff++] = "footer";
2625 					break;
2626 				case HF_HeaderFirst:
2627 					propsArray[iOff++] = "header-first";
2628 					break;
2629 				case HF_FooterFirst:
2630 					propsArray[iOff++] = "footer-first";
2631 					break;
2632 				default:
2633 					UT_ASSERT_HARMLESS(UT_NOT_REACHED);
2634 			}
2635 
2636 			UT_String_sprintf(id[iId],"%d",m_pHeaders[k].pid);
2637 			propsArray[iOff++] = id[iId++].c_str();
2638 		}
2639 	}
2640 
2641 	propsArray[iOff++] = 0;
2642 	UT_return_val_if_fail(iOff <= sizeof(propsArray), 1);
2643 
2644 
2645 	if (!_appendStrux(PTX_Section, static_cast<const gchar **>(&propsArray[0])))
2646 	{
2647 		UT_DEBUGMSG (("DOM: error appending section props!\n"));
2648 		return 1;
2649 	}
2650 
2651 	// increment our section count
2652 	m_bInSect = true;
2653 	m_bInPara = false; // reset paragraph status
2654 	m_nSections++;
2655 
2656 	// TODO: we need to do some work on Headers/Footers
2657 
2658 	/*
2659 	 * break codes:
2660 	 * 0 No break
2661 	 * 1 New column
2662 	 * 2 New page
2663 	 * 3 Even page
2664 	 * 4 Odd page
2665 	 */
2666 
2667 	//	if (asep->bkc > 1 && m_nSections > 1) // don't apply on the 1st page
2668 	if (m_nSections > 1) // don't apply on the 1st page
2669 	{
2670 		// new sections always need a block
2671 		if (!_appendStrux(PTX_Block, static_cast<const gchar **>(NULL)))
2672 		{
2673 			UT_DEBUGMSG (("DOM: error appending new block\n"));
2674 			return 1;
2675 		}
2676 		m_bInPara = true;
2677 
2678 		UT_UCSChar ucs = UCS_FF;
2679 		switch (asep->bkc) {
2680 			case 1:
2681 				ucs = UCS_VTAB;
2682 				X_CheckError(_appendSpan(&ucs,1));
2683 				break;
2684 
2685 			case 2:
2686 				X_CheckError(_appendSpan(&ucs,1));
2687 				break;
2688 
2689 			case 3: // TODO: handle me better (not even)
2690 				X_CheckError(_appendSpan(&ucs,1));
2691 				break;
2692 
2693 			case 4: // TODO: handle me better (not odd)
2694 				X_CheckError(_appendSpan(&ucs,1));
2695 				break;
2696 
2697 			case 0:
2698 			default:
2699 				break;
2700 		}
2701 	}
2702 
2703 	return 0;
2704 }
2705 
2706 // this function is called from _handleHeadersText() with meaningless
2707 // parameters; if you want to make use of any of the parameters here,
2708 // make sure it will work with NULLs, etc.
_endSect(wvParseStruct *,UT_uint32,void *,int)2709 int IE_Imp_MsWord_97::_endSect (wvParseStruct * /* ps */ , UT_uint32  /* tag */ ,
2710 								void * /* prop */, int /* dirty */ )
2711 {
2712 #if 0
2713 	// if we're at the end of a section, we need to check for a section mark
2714 	// at the end of our character stream and remove it (to prevent page breaks
2715 	// between sections)
2716 
2717 	// this does not work -- if we are at the end of a section we have
2718 	// already flushed the buffer in _endPara()
2719 	if (m_pTextRun.size() &&
2720 		m_pTextRun[m_pTextRun.size()-1] == UCS_FF)
2721 	  {
2722 		m_pTextRun[m_pTextRun.size()-1] = 0;
2723 	  }
2724 #endif
2725 
2726 	// we never appended a paragraph inside of this section. we're naughty. correct that here.
2727 	if (!m_bInPara  && !m_bInTextboxes)
2728 		_appendStrux(PTX_Block, NULL);
2729 
2730 	// if there is a pending page break it belongs to the section and
2731 	// is to be removed, we just need to set the tracker to false
2732 	m_bPageBreakPending = false;
2733 	m_bLineBreakPending = false;
2734 
2735 	m_bInSect = false;
2736 	m_bInPara = false; // reset paragraph status
2737 	return 0;
2738 }
2739 
_beginPara(wvParseStruct * ps,UT_uint32,void * prop,int)2740 int IE_Imp_MsWord_97::_beginPara (wvParseStruct *ps, UT_uint32 /*tag*/,
2741 				  void *prop, int /*dirty*/)
2742 {
2743 
2744 	// if in a header of unsupported type, just return
2745 	// the +1 is to account for the fact that ps->currentcp applies to the previous
2746 	// char position ...
2747 	if(_ignorePosition(ps->currentcp + 1))
2748 		return 0;
2749 
2750 	PAP *apap = static_cast <PAP *>(prop);
2751 
2752 	// the header/footnote/endnote sections are special; because the
2753 	// parser treats them as a continuation of the document, we end up
2754 	// here before we get chance to handle the change from main doc to
2755 	// these sections -- we want the paragraph properties assembled
2756 	// for future use, but we do not want the strux actually inserted
2757 	bool bDoNotInsertStrux = (ps->currentcp == m_iFootnotesStart ||
2758 							  ps->currentcp == m_iEndnotesStart  ||
2759 							  ps->currentcp == m_iHeadersStart);
2760 
2761 	// the end of endnotes/fnotes/headers and all other subsections in
2762 	// the main stream always contains a paragraph marker; we do not
2763 	// want it to insert strux on those
2764 	if((ps->currentcp == m_iTextEnd - 1 && m_iTextEnd > m_iTextStart)                ||
2765 	   //(ps->currentcp == m_iTextEnd - 2 && m_iTextEnd > m_iTextStart)                ||
2766 	   (ps->currentcp == m_iFootnotesEnd - 1 && m_iFootnotesEnd > m_iFootnotesStart) ||
2767 	   (ps->currentcp == m_iEndnotesEnd - 1  && m_iEndnotesEnd > m_iEndnotesStart)   ||
2768 	   (ps->currentcp == m_iHeadersEnd - 1 && m_iHeadersEnd > m_iHeadersStart)       ||
2769 	   (ps->currentcp == m_iAnnotationsEnd - 1 && m_iAnnotationsEnd > m_iAnnotationsStart) ||
2770 	   (ps->currentcp == m_iMacrosStart - 1 && m_iMacrosEnd > m_iMacrosStart) ||
2771 	   (ps->currentcp == m_iTextboxesStart - 1 && m_iTextboxesEnd > m_iTextboxesStart))
2772 	{
2773 		bDoNotInsertStrux  = true;
2774 	}
2775 	bool bInHdrFtr = false;
2776 	if((ps->currentcp+1 >= m_iHeadersStart) && (ps->currentcp < m_iHeadersEnd))
2777 	{
2778 		bInHdrFtr = true;
2779 	}
2780 	bool bInTextboxes = false;
2781 	if((ps->currentcp+1 >= m_iTextboxesStart) && (ps->currentcp < m_iTextboxesEnd))
2782 	{
2783 		bInTextboxes = true;
2784 	}
2785 	// at the end of each f/enote is a superflous paragraph marker
2786 	// which we do not want imported
2787 	if(m_bInFNotes && m_iNextFNote < m_iFootnotesCount && m_pFootnotes &&
2788 	   m_pFootnotes[m_iNextFNote].txt_pos + m_pFootnotes[m_iNextFNote].txt_len - 1 >= ps->currentcp)
2789 	{
2790 		bDoNotInsertStrux = true;
2791 	}
2792 
2793 	if(m_bInENotes && m_iNextENote < m_iEndnotesCount && m_pEndnotes &&
2794 	   m_pEndnotes[m_iNextENote].txt_pos + m_pEndnotes[m_iNextENote].txt_len - 1 >= ps->currentcp)
2795 	{
2796 		bDoNotInsertStrux = true;
2797 	}
2798 
2799 
2800 	// the header section requires even more special care; since we
2801 	// need to insert the HdrFtr strux for each header before we can
2802 	// insert the block, we do not want a strux inserted at the start
2803 	// position of a header; furthermore, each header ends with a
2804 	// superfluous paragraph marker
2805 	if(m_bInHeaders &&
2806 	   ((m_iCurrentHeader < m_iHeadersCount && m_pHeaders &&
2807 	   (m_pHeaders[m_iCurrentHeader].pos == ps->currentcp ||
2808 		m_pHeaders[m_iCurrentHeader].pos + m_pHeaders[m_iCurrentHeader].len - 1 <= ps->currentcp))
2809 		|| m_iCurrentHeader == m_iHeadersCount))
2810 	{
2811 		//start a new header section
2812 		bDoNotInsertStrux = true;
2813 	}
2814 
2815 	{
2816 	  if (apap->fInTable)
2817 	  {
2818 		  // we have to call this unconditionally, since m_bInHeaders set does not mean that
2819 		  // the HdrFtr strux for this section has been inserted.
2820 		  _handleHeadersText(ps->currentcp +1, false);
2821 		  _handleTextboxesText(ps->currentcp+1);
2822 		  if (!m_bInTable)
2823 		  {
2824 			  m_bInTable = true;
2825 			  _table_open();
2826 //
2827 // Fill Column positions
2828 //
2829 			  UT_sint32 i= 0;
2830 			  for(i=0;i < ps->nocellbounds; i++)
2831 			  {
2832 				  if(ps->cellbounds)
2833 				  {
2834 					  UT_sint32 pos = ps->cellbounds[i];
2835 					  m_vecColumnPositions.addItem(pos);
2836 				  }
2837 			  }
2838 		  }
2839 
2840 		  if (ps->endcell)
2841 		  {
2842 			  ps->endcell = 0;
2843 			  _cell_close();
2844 			  if (m_iCellsRemaining > 0)
2845 			  {
2846 				  m_iCellsRemaining--;
2847 				  if (m_iCellsRemaining == 0)
2848 				  {
2849 					  _row_close();
2850 				  }
2851 			  }
2852 		  }
2853 
2854 	    _row_open(ps);
2855 
2856 	    // determine column spans
2857 	    if (!m_bCellOpen)
2858 		{
2859 			m_vecColumnSpansForCurrentRow.clear();
2860 
2861 			xxx_UT_DEBUGMSG(("Number of cell bounds in New row %d \n",ps->nocellbounds));
2862 			UT_sint32 column =1;
2863 			UT_sint32 i =0;
2864 			UT_sint32 posLeft = 0;
2865 			UT_sint32 posRight =0;
2866 			if (ps->cellbounds)
2867 				posLeft = ps->cellbounds[0];
2868 			for (column = 1; column < ps->nocellbounds; column++)
2869 			{
2870 				int span = 0;
2871 				posRight = apap->ptap.rgdxaCenter[column];
2872 				xxx_UT_DEBUGMSG(("column %d posLeft %d posRight %d \n",column,posLeft,posRight));
2873 				for (i = 0; i < ps->nocellbounds; i++)
2874 				{
2875 					if (ps->cellbounds[i] >= posLeft && ps->cellbounds[i] < posRight)
2876 					{
2877 						span++;
2878 					}
2879 					else if (ps->cellbounds[i] >= posRight)
2880 					{
2881 						break;
2882 					}
2883 				}
2884 				xxx_UT_DEBUGMSG(("COlumn %d has span %d \n",column,span));
2885 				m_vecColumnSpansForCurrentRow.addItem(span);
2886 				posLeft = posRight;
2887 			}
2888 	    }
2889 
2890 	    _cell_open(ps, apap);
2891 
2892 	    if (m_iCellsRemaining == 0) {
2893 	      m_iCellsRemaining = apap->ptap.itcMac + 1;
2894 	    }
2895 
2896 	    if (m_iRowsRemaining == 0) {
2897 	      m_iRowsRemaining = ps->norows;
2898 	    }
2899 
2900 	    m_iRowsRemaining--;
2901 	  }
2902 	  else if (m_bInTable) {
2903 	    m_bInTable = false;
2904 	    _table_close(ps, apap);
2905 	  }
2906 	}
2907 
2908 
2909 	// first, flush any character data in any open runs
2910 	// only flush if we are really inserting the strux (so that we can
2911 	// remove any superfluous characters at ends of secitons,
2912 	// e.g. page breaks)
2913 	if(!bDoNotInsertStrux)
2914 	{
2915 		this->_flush ();
2916 	}
2917 
2918 	if (apap->fTtp)
2919 	  {
2920 	    m_bInPara = true;
2921 		xxx_UT_DEBUGMSG(("m_bInPara set true here -1 \n"));
2922 	    return 0;
2923 	  }
2924 
2925 	if (apap->fBidi == 1)
2926 	{
2927 		m_bLTRParaContext = false;
2928 	} else
2929 	{
2930 		m_bLTRParaContext = true;
2931 	}
2932 
2933 	m_bBidiMode = false;
2934 
2935 	// break before paragraph?
2936 	if (apap->fPageBreakBefore)
2937 	{
2938 		// TODO: this should really set a property in
2939 		// TODO: in the paragraph, instead; but this
2940 		// TODO: gives a similar effect for now.
2941 		// TOOD: when it is handled properly the code needs to be
2942 		// moved into _generateParaProps()
2943 		UT_DEBUGMSG(("_beginPara: appending default block\n"));
2944 		_appendStrux(PTX_Block, NULL);
2945 		UT_UCSChar ucs = UCS_FF;
2946 		_appendSpan(&ucs,1);
2947 	}
2948 
2949 	m_charProps.clear();
2950 	m_charStyle.clear();
2951 	m_paraProps.clear();
2952 	m_paraStyle.clear();
2953 	_generateParaProps(m_paraProps, apap, ps);
2954 
2955 	//props, level, listid, parentid, style, NULL
2956 	const gchar * propsArray[11];
2957 
2958 	/* lists */
2959 	UT_uint32 myListId = 0;
2960 	UT_uint32 iAWListId = UT_UID_INVALID;
2961 	UT_String szListId, szParentId, szLevel, szStartValue, szNumberProps;
2962 
2963 	// all lists have ilfo set; some lists can be 'customised' by
2964 	// having the number field removed (see bug 3622) -- they are
2965 	// still lists in Word, but do not look like it, and we will not
2966 	// treat them as lists (Tomas, May 26, 2003)
2967 	if(apap->ilfo && apap->linfo.numberstr)
2968 	{
2969 		UT_uint32 j;
2970 		// if we are in a new list, then do some clean up first and remember the list id
2971 		if(m_iMSWordListId != apap->linfo.id)
2972 		{
2973 			m_iMSWordListId = apap->linfo.id;
2974 
2975 			for(UT_uint32 i = 0; i < 9; i++)
2976 				m_iListIdIncrement[i] = 0;
2977 
2978 			UT_VECTOR_PURGEALL(ListIdLevelPair *, m_vLists);
2979 			m_vLists.clear();
2980 		}
2981 
2982 		// a hack -- see the note on myListId below
2983 		myListId = apap->linfo.id;
2984 		myListId += apap->linfo.format;
2985 		myListId += apap->ilvl;
2986 
2987 		/*
2988 		  IMPORTANT the list sutff is found in several different
2989 		  places:
2990 
2991 		  apap->ilvl - the level of this list (0-8)
2992 
2993 		  myListId - the id of this list, we need this to know to which list this
2994 		  paragraph belongs; unfortunately, there seem to be some cases where separate
2995 		  lists *share* the same id, for instance when two lists, of different formatting,
2996 		  are separated by only empty paragraphs. As a hack, I have added the format number
2997 		  to the list id, so gaining different id for different formattings (it is not foolproof,
2998 		  for if id1 + format1 == id2 + format2 then we get two lists joined, but the probability
2999 		  of that should be small). Further problem is that in AW, list id refers to the set of
3000 		  list elements on the same level, while in Word the id is that of the entire list. The
3001 		  easiest way to tranform the Word id to AW id is to add the level to the id, which
3002 		  is what has been done above
3003 
3004 		  apap->linfo.start - the stating number of this entire list;
3005 
3006 		  apap->linfo.numberstr - the actual number string to display (XCHAR *); we probably need
3007 		  this to work out the number separator, since there does not seem
3008 		  to be any reference to this anywhere
3009 
3010 		  apap->linfo.numberstr_size - length of the number string
3011 
3012 		  apap->linfo.format - number format (see the enum below)
3013 
3014 		  apap->linfo.align	- number alignment [0: lft, 1: rght, 2: cntr]
3015 
3016 		  apap->linfo.ixchFollow - what character stands between the number and the para
3017 		  [0:= tab, 1: spc, 2: none]
3018 		*/
3019 
3020 		// If a given list id has already been defined, appending a new list with
3021 		// same values will have a harmless effect
3022 
3023 
3024 		// we will use this to keep track of how many entries of given level we have had
3025 		// every time we get here, we increase the counter for all levels lower than ours
3026 		// then we will add the counter for our level to myListId; this way subsections of
3027 		// the list separated by a higher level list entry will have different id's
3028 
3029 
3030 		for(j = apap->ilvl + 1; j < 9; j++)
3031 			m_iListIdIncrement[j]++;
3032 
3033 		myListId += m_iListIdIncrement[apap->ilvl];
3034 
3035 		// see if this id is already in our map
3036 		UT_sint32 k;
3037 		for(k = 0; k < m_vListIdMap.getItemCount(); k+=2)
3038 		{
3039 			if((UT_uint32)m_vListIdMap.getNthItem(k) == myListId)
3040 			{
3041 				iAWListId = m_vListIdMap.getNthItem(k+1);
3042 				break;
3043 			}
3044 		}
3045 
3046 		if(iAWListId == UT_UID_INVALID)
3047 		{
3048 			iAWListId = getDoc()->getUID(UT_UniqueId::List);
3049 			UT_ASSERT_HARMLESS(iAWListId != UT_UID_INVALID);
3050 
3051 			m_vListIdMap.addItem(myListId);
3052 			m_vListIdMap.addItem(iAWListId);
3053 		}
3054 
3055 
3056 		const gchar * list_atts[15];
3057 		UT_uint32 iOffset = 0;
3058 		UT_String propBuffer;
3059 
3060 		// list id number
3061 		list_atts[iOffset++] = "id";
3062 		UT_String_sprintf(propBuffer, "%d", iAWListId);
3063 		szListId = propBuffer;
3064 		list_atts[iOffset++] = szListId.c_str();
3065 
3066 
3067 		// parent id
3068 		list_atts[iOffset++] = "parentid";
3069 
3070 		// we will search backward our list vector for the first entry
3071 		// that has a lower level than we and that will be our parent
3072 		UT_uint32 myParentID = 0;
3073 		for(UT_sint32 n = m_vLists.getItemCount(); n > 0; n--)
3074 		{
3075 			ListIdLevelPair * llp = (ListIdLevelPair *)(m_vLists.getNthItem(n - 1));
3076 			if(llp->level < apap->ilvl)
3077 			{
3078 				myParentID = llp->listId;
3079 				break;
3080 			}
3081 		}
3082 		UT_String_sprintf(propBuffer, "%d", myParentID);
3083 		szParentId = propBuffer;
3084 		list_atts[iOffset++] = szParentId.c_str();
3085 
3086 		// list type
3087 		list_atts[iOffset++] = "type";
3088 		list_atts[iOffset++] = s_mapDocToAbiListId (static_cast<MSWordListIdType>(apap->linfo.format));
3089 
3090 		// start value
3091 		list_atts[iOffset++] = "start-value";
3092 		UT_String_sprintf(propBuffer, "%d", apap->linfo.start);
3093 		szStartValue = propBuffer;
3094 		list_atts[iOffset++] = szStartValue.c_str();
3095 
3096 		// list delimiter
3097 		UT_UTF8String sDelim;
3098 		s_mapDocToAbiListDelim (apap->linfo.numberstr,apap->linfo.numberstr_size,sDelim);
3099 		list_atts[iOffset++] = "list-delim";
3100 
3101 		char * t = s_stripDangerousChars(sDelim.utf8_str());
3102 		UT_String sDlm = t;
3103 		FREEP(t);
3104 		list_atts[iOffset++] = sDlm.c_str();
3105 
3106 		list_atts[iOffset++] = "level";
3107 		UT_String_sprintf(propBuffer, "%d", apap->ilvl + 1); // Word level starts at 0, Abi's at 1
3108 		szLevel = propBuffer;
3109 		list_atts[iOffset++] = szLevel.c_str();
3110 
3111 		// generate character props for the number
3112 		// TODO -- the properties represented by apap->linfo.chp need
3113 		// to be applied to the list number/bulet. For now, I am going
3114 		// to translate these into a regular props string and attach
3115 		// them to the list attributes, but they need to be passed
3116 		// somehow down to the number field (may need a dedicated
3117 		// _generateListCharProps() for this
3118 		// Tomas, May 12, 2003
3119 		_generateCharProps(szNumberProps, &apap->linfo.chp, ps);
3120 		list_atts[iOffset++] = "props";
3121 		list_atts[iOffset++] = szNumberProps.c_str();
3122 
3123 		// NULL
3124 		list_atts[iOffset++] = 0;
3125 		UT_return_val_if_fail( iOffset <=  sizeof(list_atts)/sizeof(gchar *), 1 );
3126 
3127 		// now add this to our vector of lists
3128 		ListIdLevelPair * llp = new ListIdLevelPair;
3129 		llp->listId = iAWListId;
3130 		llp->level = apap->ilvl;
3131 		m_vLists.addItem(static_cast<void*>(llp));
3132 
3133 		getDoc()->appendList(list_atts);
3134 		UT_DEBUGMSG(("DOM: appended a list\n"));
3135 
3136 		// TODO: merge in list properties and such here with the variable 'props',
3137 		// such as list-style, field-font, ...
3138 
3139 		// start-value
3140 		// Need to put the ";" back in the para string.
3141 		//
3142 		m_paraProps[m_paraProps.size() - 1] = ';';
3143 		m_paraProps += "start-value:";
3144 		m_paraProps += szStartValue;
3145 		m_paraProps += ";";
3146 
3147 		// list style
3148 		m_paraProps += "list-style:";
3149 		m_paraProps += s_mapDocToAbiListStyle (static_cast<MSWordListIdType>(apap->linfo.format));
3150 		m_paraProps += ";";
3151 
3152 		// field-font
3153 		m_paraProps += "field-font:";
3154 		m_paraProps += s_fieldFontForListStyle (static_cast<MSWordListIdType>(apap->linfo.format));
3155 	} // end of list-related code
3156 
3157  	// props
3158 	UT_uint32 i = 0;
3159 	propsArray[i++] = static_cast<const gchar *>("props");
3160 	propsArray[i++] = static_cast<const gchar *>(m_paraProps.c_str());
3161 
3162 
3163 	// level, or 0 for default, normal level
3164 	if (myListId > 0)
3165 	{
3166 		propsArray[i++] = "level";
3167 		propsArray[i++] = szLevel.c_str();
3168 		propsArray[i++] = "listid";
3169 		propsArray[i++] = szListId.c_str();
3170 		propsArray[i++] = "parentid";
3171 		propsArray[i++] = szParentId.c_str();
3172 	}
3173 
3174 	// handle style
3175 	// TODO from wv we get the style props expanded and applied to the
3176 	// characters in the paragraph (i.e., part of the CHP structure);
3177 	// we need to be able to tell to wv not to do this expansion
3178 	if(apap->stylename[0])
3179 	{
3180 		const STD * pSTD = ps->stsh.std;
3181 		UT_uint32 iCount = ps->stsh.Stshi.cstd;
3182 
3183 		if(apap->istd != istdNil && apap->istd < iCount)
3184 		{
3185 			propsArray[i++] = "style";
3186 
3187 			char * t = NULL;
3188 			const gchar * pName = NULL;
3189 			if(pSTD)
3190 				pName = s_translateStyleId(pSTD[apap->istd].sti);
3191 
3192 			if(pName)
3193 			{
3194 				m_paraStyle = pName;
3195 			}
3196 			else if(pSTD)
3197 			{
3198 				m_paraStyle = t = s_convert_to_utf8(ps,pSTD[apap->istd].xstzName);
3199 			}
3200 
3201 			FREEP(t);
3202 			propsArray[i++] = m_paraStyle.c_str();
3203 		}
3204 
3205 	}
3206 
3207 	// NULL
3208 	propsArray[i] = 0;
3209 
3210 	if (!m_bInSect && !bDoNotInsertStrux)
3211 	{
3212 		// check for should-be-impossible case
3213 		UT_ASSERT_NOT_REACHED();
3214 		_appendStrux(PTX_Section, NULL);
3215 		m_bInSect = true ;
3216 	}
3217 
3218 	if(!bDoNotInsertStrux)
3219 	{
3220 		xxx_UT_DEBUGMSG(("_beginPara: pos %d [text ends %d]\n", ps->currentcp, m_iFootnotesStart));
3221 
3222 		if (!_appendStrux(PTX_Block, static_cast<const gchar **>(&propsArray[0])))
3223 		{
3224 			UT_DEBUGMSG(("DOM: error appending paragraph block\n"));
3225 			return 1;
3226 		}
3227 		m_bInPara = true;
3228 	}
3229 
3230 	if (myListId > 0 && !bDoNotInsertStrux)
3231 	  {
3232 		// TODO: honor more props
3233 		const gchar *list_field_fmt[5];
3234 		list_field_fmt[0] = "type";
3235 		list_field_fmt[1] = "list_label";
3236 		list_field_fmt[2] = "props";
3237 		list_field_fmt[3] = "text-decoration:none";
3238 		list_field_fmt[4] = 0;
3239 		_appendObject(PTO_Field, static_cast<const gchar**>(&list_field_fmt[0]));
3240 		m_bInPara = true;
3241 
3242 		// the character following the list label - 0=tab, 1=space, 2=none
3243 		if(apap->linfo.ixchFollow == 0) // tab
3244 		{
3245 		        const gchar* attribs[3] = {"props","text-decoration:none",NULL};
3246 			getDoc()->appendFmt(attribs);
3247 			UT_UCSChar tab = UCS_TAB;
3248 			_appendSpan(&tab, 1);
3249 		}
3250 		else if(apap->linfo.ixchFollow == 1) // space
3251 		{
3252 		        const gchar* attribs[3] = {"props","text-decoration:none",NULL};
3253 			getDoc()->appendFmt(attribs);
3254 			UT_UCSChar space = UCS_SPACE;
3255 			_appendSpan(&space, 1);
3256 		}
3257 		// else none
3258 	  }
3259 
3260 	return 0;
3261 }
3262 
_endPara(wvParseStruct *,UT_uint32,void *,int)3263 int IE_Imp_MsWord_97::_endPara (wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
3264 								void * /*prop*/, int /*dirty*/)
3265 {
3266 	xxx_UT_DEBUGMSG(("#DOM: _endPara\n"));
3267 	// have to flush here, otherwise flushing later on will result in
3268 	// an empty paragraph being inserted
3269 
3270 	this->_flush ();
3271 	m_bInPara = false;
3272 	m_bLineBreakPending = false;
3273 
3274 	return 0;
3275 }
3276 
_beginChar(wvParseStruct * ps,UT_uint32,void * prop,int)3277 int IE_Imp_MsWord_97::_beginChar (wvParseStruct *ps, UT_uint32 /*tag*/,
3278 								  void *prop, int /*dirty*/)
3279 {
3280 	// if in a header of unsupported type, just return
3281 	// the +1 is to account for the fact that ps->currentcp applies to the previous
3282 	// char position ...
3283 	if(_ignorePosition(ps->currentcp + 1))
3284 		return 0;
3285 
3286 	// the header/footnote/endnote sections are special; because the
3287 	// parser treats them as a continuation of the document, we end up
3288 	// here before we get chance to handle the change from main doc to
3289 	// these sections -- we want the char properties assembled
3290 	// for future use, but we do not want them actually appended
3291 	bool bDoNotAppendFmt = (ps->currentcp == m_iFootnotesStart ||
3292 							  ps->currentcp == m_iEndnotesStart  ||
3293 							  ps->currentcp == m_iHeadersStart);
3294 
3295 	// the end of endnotes/fnotes/headers and all other subsections in
3296 	// the main stream always contain a paragraph marker; we do not
3297 	// want it to append fmt on those
3298 	if((ps->currentcp == m_iTextEnd - 1 && m_iTextEnd > m_iTextStart)                ||
3299 	   (ps->currentcp == m_iTextEnd - 2 && m_iTextEnd > m_iTextStart)                ||
3300 	   (ps->currentcp == m_iFootnotesEnd - 1 && m_iFootnotesEnd > m_iFootnotesStart) ||
3301 	   (ps->currentcp == m_iEndnotesEnd - 1  && m_iEndnotesEnd > m_iEndnotesStart)   ||
3302 	   (ps->currentcp == m_iHeadersEnd - 1 && m_iHeadersEnd > m_iHeadersStart)       ||
3303 	   (ps->currentcp == m_iAnnotationsEnd - 1 && m_iAnnotationsEnd > m_iAnnotationsStart) ||
3304 	   (ps->currentcp == m_iMacrosStart - 1 && m_iMacrosEnd > m_iMacrosStart))
3305 	{
3306 		bDoNotAppendFmt  = true;
3307 	}
3308 
3309 
3310 	// at the end of each f/enote is a superflous paragraph marker
3311 	// which we do not want imported
3312 	if(m_bInFNotes && m_iNextFNote < m_iFootnotesCount && m_pFootnotes &&
3313 	   m_pFootnotes[m_iNextFNote].txt_pos + m_pFootnotes[m_iNextFNote].txt_len - 1 >= ps->currentcp)
3314 	{
3315 		bDoNotAppendFmt = true;
3316 	}
3317 
3318 	if(m_bInENotes && m_iNextENote < m_iEndnotesCount && m_pEndnotes &&
3319 	   m_pEndnotes[m_iNextENote].txt_pos + m_pEndnotes[m_iNextENote].txt_len - 1 >= ps->currentcp)
3320 	{
3321 		bDoNotAppendFmt = true;
3322 	}
3323 
3324 	// the header section requires even more special care; since we
3325 	// need to insert the HdrFtr strux for each header before we can
3326 	// insert the block, we do not want a strux and fmt inserted at the start
3327 	// position of a header; furthermore, each header ends with a
3328 	// superfluous paragraph marker
3329 	if(m_bInHeaders &&
3330 	   ((m_iCurrentHeader < m_iHeadersCount && m_pHeaders &&
3331 	   (m_pHeaders[m_iCurrentHeader].pos == ps->currentcp ||
3332 		m_pHeaders[m_iCurrentHeader].pos + m_pHeaders[m_iCurrentHeader].len - 1 <= ps->currentcp))
3333 	   || m_iCurrentHeader == m_iHeadersCount))
3334 	{
3335 		//start a new header section
3336 		bDoNotAppendFmt = true;
3337 	}
3338 
3339 	// flush any data in our character runs
3340 	// if we are not really appending, then do not flush, so that we
3341 	// are not prevented from removing superflous page breaks at the
3342 	// end of section
3343 	if(!bDoNotAppendFmt)
3344 	{
3345 		this->_flush ();
3346 	}
3347 
3348 
3349 	CHP *achp = static_cast <CHP *>(prop);
3350 
3351 	const gchar * propsArray[7];
3352 	UT_uint32 propsOffset = 0;
3353 
3354 	m_charProps.clear();
3355 	m_charStyle.clear();
3356 
3357 	UT_uint32 iFontType = 0;
3358 	if(achp->xchSym && ps->fonts.ffn)
3359 	{
3360 		// inserting a symbol char ...
3361 		iFontType = ps->fonts.ffn[achp->ftcSym].chs;
3362 	}
3363 	else if(ps->fonts.ffn && (achp->ftcAscii < ps->fonts.nostrings))
3364 	{
3365 		iFontType = ps->fonts.ffn[achp->ftcAscii].chs;
3366 	}
3367 
3368 	if(iFontType == 0)
3369 		m_bSymbolFont = false;
3370 	else if(iFontType == 2)
3371 		m_bSymbolFont = true;
3372 	else
3373 	{
3374 		xxx_UT_DEBUGMSG(("IE_Imp_MsWord_97::_beginChar: unknow font encoding %d\n",
3375 					 ps->fonts.ffn[achp->ftcAscii].chs));
3376 		m_bSymbolFont = false;
3377 	}
3378 
3379 	memset (propsArray, 0, sizeof(propsArray));
3380 
3381 	_generateCharProps(m_charProps, achp, ps);
3382 
3383 	if (!achp->fBidi)
3384 		m_bLTRCharContext = true;
3385 	else
3386 		m_bLTRCharContext = false;
3387 
3388 	// we enter bidi mode if we encounter a character
3389 	// formatting inconsistent with the base direction of the
3390 	// paragraph; once in bidi mode, we have to stay there
3391 	// until the end of the current pragraph
3392 	m_bBidiMode = m_bBidiMode || (m_bLTRCharContext ^ m_bLTRParaContext);
3393 
3394 	propsArray[propsOffset++] = static_cast<const gchar *>("props");
3395 	propsArray[propsOffset++] = static_cast<const gchar *>(m_charProps.c_str());
3396 
3397 	if(!m_bEncounteredRevision && (achp->fRMark || achp->fRMarkDel))
3398 	{
3399 		// revision "hack" - add a single revision for all revisioned text
3400 		UT_UCS4String revisionStr ("msword_revisioned_text");
3401 		getDoc()->addRevision(1, revisionStr.ucs4_str(), revisionStr.size(), 0, 0);
3402 		m_bEncounteredRevision = true;
3403 	}
3404 
3405 	if (achp->fRMark)
3406 	{
3407 	    propsArray[propsOffset++] = static_cast<const gchar *>("revision");
3408 		m_charRevs = "1";
3409 	    propsArray[propsOffset++] = m_charRevs.c_str();
3410 	}
3411 	else if (achp->fRMarkDel)
3412 	{
3413 	    propsArray[propsOffset++] = static_cast<const gchar *>("revision");
3414 		m_charRevs = "-1";
3415 	    propsArray[propsOffset++] = m_charRevs.c_str();
3416 	}
3417 	else
3418 		m_charRevs.clear();
3419 
3420 
3421 	if(achp->stylename[0])
3422 	{
3423 		const STD * pSTD = ps->stsh.std;
3424 		UT_uint32 iCount = ps->stsh.Stshi.cstd;
3425 
3426 		if(achp->istd != istdNil && achp->istd < iCount)
3427 		{
3428 			propsArray[propsOffset++] = static_cast<const gchar *>("style");
3429 			char * t = NULL;
3430 			const gchar * pName = s_translateStyleId(pSTD[achp->istd].sti);
3431 
3432 			if(pName)
3433 			{
3434 				m_charStyle = pName;
3435 			}
3436 			else
3437 			{
3438 				m_charStyle = t = s_convert_to_utf8(ps,pSTD[achp->istd].xstzName);
3439 			}
3440 
3441 			FREEP(t);
3442 			propsArray[propsOffset++] = m_charStyle.c_str();
3443 		}
3444 	}
3445 
3446 	// woah - major error here
3447 	if(!m_bInSect && !bDoNotAppendFmt)
3448 	{
3449 		UT_ASSERT_NOT_REACHED();
3450 		_appendStrux(PTX_Section, NULL);
3451 		m_bInSect = true ;
3452 	}
3453 
3454 	if(!m_bInPara && !bDoNotAppendFmt)
3455 	{
3456 		UT_ASSERT_NOT_REACHED();
3457 		_appendStrux(PTX_Block, NULL);
3458 		m_bInPara = true ;
3459 	}
3460 
3461 	if(!bDoNotAppendFmt)
3462 	{
3463 		if (!_appendFmt(static_cast<const gchar **>(&propsArray[0])))
3464 		{
3465 			UT_DEBUGMSG(("DOM: error appending character formatting\n"));
3466 			return 1;
3467 		}
3468 	}
3469 
3470 	return 0;
3471 }
3472 
_endChar(wvParseStruct *,UT_uint32,void *,int)3473 int IE_Imp_MsWord_97::_endChar (wvParseStruct * /*ps*/, UT_uint32 /*tag*/,
3474 								void * /*prop*/, int /*dirty*/)
3475 {
3476 	// nothing is needed here
3477 	return 0;
3478 }
3479 
3480 /****************************************************************************/
3481 /****************************************************************************/
3482 
_fieldProc(wvParseStruct * ps,U16 eachchar,U8 chartype,U16 lid)3483 int IE_Imp_MsWord_97::_fieldProc (wvParseStruct *ps, U16 eachchar,
3484 								  U8 chartype, U16 lid)
3485 {
3486 	xxx_UT_DEBUGMSG(("DOM: fieldProc: %c %x\n", static_cast<char>(eachchar),
3487 					 static_cast<int>(eachchar)));
3488 
3489 	//
3490 	// The majority of this code has just been ripped out of wv/field.c
3491 	//
3492 	field * f = NULL;
3493 	UT_sint32 iRet = 1;
3494 
3495 	if (eachchar == 0x13) // beginning of a field
3496 	{
3497 		if(m_stackField.getDepth() > 0)
3498 		{
3499 			// see what kind of field we are in
3500 			m_stackField.viewTop((void**)&f);
3501 			UT_return_val_if_fail(f,0);
3502 
3503 			switch(f->type)
3504 			{
3505 				case F_TOC:
3506 				case F_TOC_FROM_RANGE:
3507 					if(_isTOCsupported(f))
3508 					{
3509 						break;
3510 					}
3511 
3512 					// for unsuported TOCs fall through ...
3513 
3514 				case F_HYPERLINK:
3515 					// for these fields we want to dump into the
3516 					// document anything in the argument
3517 					{
3518 						f->argument[f->fieldI] = 0;
3519 						UT_UCS2Char * a = f->argument;
3520 
3521 						if(*a == 0x14)
3522 						{
3523 							a++;
3524 						}
3525 
3526 						while(*a)
3527 						{
3528 							this->_appendChar(*a++);
3529 						}
3530 						this->_flush();
3531 
3532 						f->argument[0] = 0;
3533 						f->fieldI = 0;
3534 					}
3535 					break;
3536 
3537 				default:
3538 					break;
3539 			}
3540 
3541 		}
3542 
3543 		try
3544 		{
3545 			f = new field;
3546 		}
3547 		catch(...)
3548 		{
3549 			f = NULL;
3550 		}
3551 
3552 		UT_return_val_if_fail(f,0);
3553 		f->fieldWhich = f->command;
3554 		f->command[0] = 0;
3555 		f->argument[0] = 0;
3556 		f->fieldI = 0;
3557 		f->fieldRet = 1;
3558 		f->type = F_OTHER;
3559 		m_stackField.push((void*)f);
3560 	}
3561 	else if (eachchar == 0x14) // field trigger
3562 	{
3563 		m_stackField.viewTop((void**)&f);
3564 		UT_return_val_if_fail(f,0);
3565 
3566 		f->command[f->fieldI] = 0;
3567 		f->fieldC = wvWideStrToMB (f->command);
3568 
3569 		if (this->_handleCommandField(f->fieldC))
3570 			f->fieldRet = 1;
3571 		else
3572 			f->fieldRet = 0;
3573 
3574 		wvFree(f->fieldC);
3575 		f->fieldWhich = f->argument;
3576 		f->fieldI = 0;
3577 	}
3578 	if(!f)
3579 	{
3580 		m_stackField.viewTop((void**)&f);
3581 	}
3582 
3583 	UT_return_val_if_fail(f,0);
3584 
3585 	if (f->fieldI >= FLD_SIZE)
3586 	{
3587 		UT_DEBUGMSG(("DOM: Something completely absurd in the fields implementation!\n"));
3588 		UT_ASSERT_NOT_REACHED();
3589 		return 1;
3590 	}
3591 
3592 	if (!f->fieldWhich) {
3593 		UT_DEBUGMSG(("DOM: _fieldProc - 'which' is null\n"));
3594 		UT_ASSERT_NOT_REACHED();
3595 		return 1;
3596 	}
3597 
3598 	if (chartype)
3599 		f->fieldWhich[f->fieldI] = wvHandleCodePage(eachchar, lid);
3600 	else
3601 		f->fieldWhich[f->fieldI] = eachchar;
3602 
3603 	f->fieldI++;
3604 
3605 	if (eachchar == 0x15) // end of field marker
3606 	{
3607 		f->fieldWhich[f->fieldI] = 0;
3608 		//I do not think we should convert this -- this is the field value
3609 		//displayed in the document; in most cases we do not need it, as we
3610 		//calulate it ourselves, but for instance for hyperlinks this is the
3611 		//the text to which the link is tied
3612 		//m_fieldA = wvWideStrToMB (m_argument);
3613 		f->fieldC = wvWideStrToMB (f->command);
3614 		_handleFieldEnd (f->fieldC, ps->currentcp);
3615 		wvFree (f->fieldC);
3616 		iRet = f->fieldRet;
3617 
3618 		m_stackField.pop((void**)&f);
3619 		UT_return_val_if_fail(f,0);
3620 		delete f;
3621 	}
3622 	return iRet;
3623 }
3624 
_handleFieldEnd(char * command,UT_uint32)3625 bool IE_Imp_MsWord_97::_handleFieldEnd (char *command, UT_uint32 /*iDocPosition*/)
3626 {
3627 	Doc_Field_t tokenIndex = F_OTHER;
3628 	char *token;
3629 	field * f = NULL;
3630 	m_stackField.viewTop((void**)&f);
3631 	UT_return_val_if_fail(f, true);
3632 
3633 	if (*command != 0x13)
3634 	{
3635 		UT_DEBUGMSG (("field did not begin with 0x13\n"));
3636 		return true;
3637 	}
3638 
3639 	if(m_bInTOC && m_bTOCsupported && (   f->type == F_TOC
3640 									   || f->type == F_TOC_FROM_RANGE))
3641 	{
3642 		// end of TOC field in a supported TOC; we do nothing, since the field has already
3643 		// been processed in _handleFieldCommand()
3644 		m_bInTOC = false;
3645 		m_bTOCsupported = false;
3646 		return _insertTOC(f);
3647 	}
3648 
3649 	if(m_bInTOC && m_bTOCsupported)
3650 	{
3651 		// end of some non-TOC field inside supported TOC; just return
3652 		return true;
3653 	}
3654 
3655 	command++;
3656 	token = strtok (command, "\t, ");
3657 
3658 	while(token)
3659 	{
3660 		tokenIndex = s_mapNameToField (token);
3661 		switch (tokenIndex)
3662 		{
3663 		    case F_MERGEFIELD:
3664 			{
3665 				const gchar* atts[5];
3666 				atts[0] = "type";
3667 				atts[1] = "mail_merge";
3668 				atts[2] = "param";
3669 				atts[3] = 0;
3670 				atts[4] = 0;
3671 
3672 				token = strtok (NULL, "\"\" ");
3673 
3674 				UT_return_val_if_fail(f->argument[f->fieldI - 1] == 0x15, false);
3675 
3676 				f->argument[f->fieldI - 1] = 0;
3677 				UT_UCS2Char * a = f->argument;
3678 
3679 				UT_UTF8String param;
3680 
3681 				if(*a == 0x14)
3682 					{
3683 						a++;
3684 					}
3685 
3686 				while(*a)
3687 					{
3688 						if (!((171 == *a) || (187 == *a))) {
3689 							// @argument looks like <<FieldName>>.
3690 							// strip off the '<<' (171) and '>>' (187)
3691 							param.appendUCS2(a, 1);
3692 						}
3693 
3694 						a++;
3695 					}
3696 
3697 				atts[3] = param.utf8_str();
3698 
3699 				if (!_appendObject (PTO_Field, static_cast<const gchar**>(&atts[0])))
3700 					{
3701 						UT_DEBUGMSG(("Dom: couldn't append field (type = '%s')\n", atts[1]));
3702 					}
3703 			}
3704 			break;
3705 
3706 			case F_HYPERLINK:
3707 				{
3708 					token = strtok (NULL, "\"\" ");
3709 					UT_return_val_if_fail(f->argument[f->fieldI - 1] == 0x15, false);
3710 
3711 					f->argument[f->fieldI - 1] = 0;
3712 					UT_UCS2Char * a = f->argument;
3713 
3714 					if(*a == 0x14)
3715 					{
3716 						a++;
3717 					}
3718 
3719 					while(*a)
3720 					{
3721 						this->_appendChar(*a++);
3722 					}
3723 					this->_flush();
3724 
3725 					if(!m_bInPara)
3726 					{
3727 						_appendStrux(PTX_Block, NULL);
3728 						m_bInPara = true ;
3729 					}
3730 
3731 					_appendObject(PTO_Hyperlink,NULL);
3732 					m_bInLink = false;
3733 					break;
3734 				}
3735 			case F_TOC:
3736 			case F_TOC_FROM_RANGE:
3737 				// we only get here for unsupported TOC types, in which case we dump the field
3738 				// result (not ideal, since often the PAGEREF fields inside the TOC have not been
3739 				// updated before save and so we get 'bookmark not found' instead of page numbers,
3740 				// but it is better than nothing at all)
3741 
3742 				{
3743 					token = strtok (NULL, "\"\" ");
3744 					UT_return_val_if_fail(f->argument[f->fieldI - 1] == 0x15, false);
3745 
3746 					f->argument[f->fieldI - 1] = 0;
3747 					UT_UCS2Char * a = f->argument;
3748 
3749 					if(*a == 0x14)
3750 					{
3751 						a++;
3752 					}
3753 
3754 					while(*a)
3755 					{
3756 						this->_appendChar(*a++);
3757 					}
3758 					this->_flush();
3759 				}
3760 
3761 				break;
3762 
3763 			default:
3764 				break;
3765 		}
3766 
3767 		token = strtok (NULL, "\t, ");
3768 	}
3769 	return false;
3770 }
3771 
3772 /*!
3773     Word has several different toc tables (TOC, TOA, indexes); at the moment we only
3774     support TOC and even than only if it is based on heading styles
3775 */
_isTOCsupported(field * f)3776 bool IE_Imp_MsWord_97::_isTOCsupported(field *f)
3777 {
3778 	UT_return_val_if_fail(f,false);
3779 
3780 	if(   f->type != F_TOC
3781 	   && f->type != F_TOC_FROM_RANGE
3782 	  )
3783 	{
3784 		return false;
3785 	}
3786 
3787 	bool bRet = true;
3788 	char * command = wvWideStrToMB (f->command);
3789 	UT_DEBUGMSG(("IE_Imp_MsWord_97::_isTOCsupported: command %s\n", command));
3790 
3791 	char * params = NULL;
3792 
3793 	if(f->type == F_TOC)
3794 	{
3795 		params = command + 5;
3796 	}
3797 	else if(f->type == F_TOC_FROM_RANGE)
3798 	{
3799 		params = command + 4;
3800 	}
3801 
3802 	// we only support the heading based TOC for now
3803 	char * t = strstr(params, "\\o");
3804 
3805 	if(!t)
3806 		t = strstr(params, "\\t");
3807 
3808 	if(!t)
3809 	{
3810 		bRet = false;
3811 		goto finish;
3812 	}
3813 
3814  finish:
3815 	FREEP(command);
3816 	return bRet;
3817 }
3818 
3819 
3820 
3821 /*!
3822    returns true if the TOC has been handled, false if the TOC type is unsupported
3823 */
3824 
3825 /* Does this handle the contents styles indirectly via inserting the TOC as new and
3826 	letting the default/initial pt code handle it like new rather than actually importing it? */
3827 
_insertTOC(field * f)3828 bool IE_Imp_MsWord_97::_insertTOC(field *f)
3829 {
3830 	UT_return_val_if_fail(f,false);
3831 	bool bRet = true;
3832 	bool bSupported = false;
3833 
3834 	UT_sint32 i = 0, i1 = 0, i2 = 0;
3835 	char * t = NULL, * t1 = NULL, * t2 = NULL;
3836 	UT_UTF8String sProps = "toc-has-heading:0;", sTemp, sLeader;
3837 
3838 	const gchar * attrs [3] = {"props", NULL, NULL};
3839 
3840 	char * command = wvWideStrToMB (f->command);
3841 	UT_DEBUGMSG(("IE_Imp_MsWord_97::_insertTOC: command %s\n", command));
3842 
3843 	char * params = NULL;
3844 
3845 	if(f->type == F_TOC)
3846 	{
3847 		params = command + 5;
3848 	}
3849 	else if(f->type == F_TOC_FROM_RANGE)
3850 	{
3851 		params = command + 4;
3852 	}
3853 	else
3854 	{
3855 		bRet = false;
3856 		goto finish;
3857 	}
3858 
3859 	if((t = strstr(params, "\\p")))
3860 	{
3861 		// this defines the leader, we parse it first, before we mess up the command
3862 		t1 = strchr(t, '\"');
3863 		if(t1)
3864 		{
3865 			t1++;
3866 
3867 			// AW can only use one of the chars (there are up to 5), we will take the first
3868 			switch(*t1)
3869 			{
3870 				default: // not sure, we will treat this as a dot
3871 				case '.': sLeader += "dot";       break;
3872 				case '-': sLeader += "hyphen";    break;
3873 				case '_': sLeader += "underline"; break;
3874 				case ' ': sLeader += "none"; break;
3875 			}
3876 		}
3877 	}
3878 
3879 	if((t = strstr(params, "\\b")))
3880 	{
3881 		// a bookmark restricts the range from which the TOC is built
3882 		t1 = strchr(t, '\"');
3883 		if(t1)
3884 		{
3885 			t1++;
3886 
3887 			t2 = strchr(t1, '\"');
3888 
3889 			char c = *t2;
3890 			*t2 = 0;
3891 
3892 			sProps += "toc-range-bookmark:";
3893 			sProps += t1;
3894 			sProps += ";";
3895 
3896 			*t2 = c; // restore the string
3897 		}
3898 	}
3899 
3900 	if((t = strstr(params, "\\o")))
3901 	{
3902 		// heading-based TOC
3903 		// \o param specifies a range of headings to use, e.g., \o "2-4"
3904 		bSupported = true;
3905 
3906 		t = strchr(t, '\"');
3907 
3908 		if(!t)
3909 		{
3910 			bRet = false;
3911 			goto finish;
3912 		}
3913 
3914 		t++;
3915 
3916 		i1 = atoi(t);
3917 
3918 		if(!i1)
3919 		{
3920 			bRet = false;
3921 			goto finish;
3922 		}
3923 
3924 		t1 = strchr(t, '-');
3925 		t2 = strchr(t, '\"');
3926 
3927 		t = UT_MIN(t1, t2);
3928 
3929 		if(!t)
3930 		{
3931 			bRet = false;
3932 			goto finish;
3933 		}
3934 
3935 		i2 = 0;
3936 		if(*t == '\"')
3937 		{
3938 			i2 = i1;
3939 		}
3940 		else
3941 		{
3942 			UT_ASSERT_HARMLESS( *t == '-');
3943 			t++;
3944 			i2 = atoi(t);
3945 		}
3946 
3947 		if(!i2)
3948 		{
3949 			bRet = false;
3950 			goto finish;
3951 		}
3952 		// now create our TOC attr/props
3953 		//
3954 		// * we do not need to set the source styles, because the Heading
3955 		//   styles are the AW default
3956 		//
3957 		// * we do have to set the dest styles
3958 		//
3959 		// * I am not sure what to do about toc-id: the AW FV_Fiew::cmdInsertTOC() does not specify the
3960 		//   id, so neither will we
3961 		//
3962 		// AW currently only uses the first 4 Heading styles, but we will implement this for all 9
3963 		// to avoid future work
3964 
3965 		for(i = 1; i < i1; ++i)
3966 		{
3967 			UT_UTF8String_sprintf(sTemp, "toc-source-style%d:nonexistentstyle;", i);
3968 			sProps += sTemp;
3969 		}
3970 
3971 		UT_sint32 iMin = UT_MIN(i2+1,10);
3972 
3973 		for(i = i1; i < iMin; ++i)
3974 		{
3975 			UT_UTF8String_sprintf(sTemp, "toc-dest-style%d:TOC %d", i, i);
3976 			sProps += sTemp;
3977 			sProps += ";";
3978 
3979 			if(sLeader.size())
3980 			{
3981 				UT_UTF8String_sprintf(sTemp, "toc-tab-leader%d:", i);
3982 				sProps += sTemp;
3983 				sProps += sLeader;
3984 				sProps += ";";
3985 			}
3986 		}
3987 
3988 		for(i = iMin; i < 10; ++i)
3989 		{
3990 			UT_UTF8String_sprintf(sTemp, "toc-dest-style%d:nonexistentstyle", i);
3991 			sProps += sTemp;
3992 			sProps += ";";
3993 		}
3994 	}
3995 
3996 	// the \t and \o switches can be used simultaneously
3997 	// if both switches define the same level, we are unable to handle that; we will used the style
3998 	// in the \t switch (it is easier since the parsing of the \t parameter is destructive)
3999 	if ((t = strstr(params, "\\t")))
4000 	{
4001 		// style-based toc, the params have the format
4002 		// \t "style,level,style,level ..."
4003 		bSupported = true;
4004 		t1 = strchr(t, '\"');
4005 		if(!t1)
4006 		{
4007 			bRet = false;
4008 			goto finish;
4009 		}
4010 
4011 		char * end = strchr(t1+1, '\"');
4012 
4013 		while(t1 && t1 < end)
4014 		{
4015 			t1++;
4016 			t2 = strchr(t1, ',');
4017 			if(!t2)
4018 			{
4019 				bRet = false;
4020 				goto finish;
4021 			}
4022 
4023 			*t2 = 0;
4024 
4025 			sTemp = t1; // style name
4026 
4027 			t1 = t2 + 1; // style level
4028 			t2 = strchr(t1, ',');
4029 
4030 			if(t2)
4031 				t2 = UT_MIN(t2,end);
4032 			else
4033 				t2 = end;
4034 
4035 			*t2 = 0;
4036 
4037 			sProps += "toc-source-style";
4038 			sProps += t1;
4039 			sProps += ":";
4040 			sProps += sTemp;
4041 			sProps += ";";
4042 
4043 			sProps += "toc-dest-style";
4044 			sProps += t1;
4045 			sProps += ":TOC ";
4046 			sProps += t1;
4047 			sProps += ";";
4048 
4049 			if(sLeader.size())
4050 			{
4051 				sProps += "toc-tab-leader";
4052 				sProps += t1;
4053 				sProps += ":";
4054 				sProps += sLeader;
4055 				sProps += ";";
4056 			}
4057 
4058 			t1 = t2;
4059 		}
4060 	}
4061 
4062 	if(!bSupported)
4063 	{
4064 		bRet = false;
4065 		goto finish;
4066 	}
4067 
4068 	// remove trailing semicolon (screws up property parser)
4069 	{
4070 		sTemp = sProps;
4071 		const char * c = sTemp.utf8_str();
4072 		if(c[strlen(c)-1] == ';')
4073 		{
4074 			sProps.assign(c, strlen(c)-1);
4075 		}
4076 	}
4077 
4078 	attrs[1] = sProps.utf8_str();
4079 
4080 	if(!m_bInPara)
4081 	{
4082 		_appendStrux(PTX_Block, NULL);
4083 		m_bInPara = true ;
4084 	}
4085 
4086 	_appendStrux(PTX_SectionTOC, attrs);
4087 	_appendStrux(PTX_EndTOC, NULL);
4088 
4089  finish:
4090 	FREEP(command);
4091 	return bRet;
4092 }
4093 
4094 
_handleCommandField(char * command)4095 bool IE_Imp_MsWord_97::_handleCommandField (char *command)
4096 {
4097 	// if we are currently inside a supported TOC, just return
4098 	if(m_bInTOC && m_bTOCsupported)
4099 		return true;
4100 
4101 	Doc_Field_t tokenIndex = F_OTHER;
4102 	char *token = NULL;
4103 	field * f = NULL;
4104 	m_stackField.viewTop((void**)&f);
4105 	UT_return_val_if_fail(f,true);
4106 	bool bTypeSet = false;
4107 
4108 	xxx_UT_DEBUGMSG(("DOM: handleCommandField '%s'\n", command));
4109 
4110 	const gchar* atts[5];
4111 	atts[0] = "type";
4112 	atts[1] = 0;
4113 	atts[2] = 0;
4114 	atts[3] = 0;
4115 	atts[4] = 0;
4116 
4117 	if (*command != 0x13)
4118 	{
4119 		UT_DEBUGMSG(("DOM: field did not begin with 0x13\n"));
4120 		return true;
4121 	}
4122 
4123 	//first skip the 0x13
4124 	command++;
4125 	token = strtok(command, "\t, ");
4126 
4127 	while(token)
4128 	{
4129 		tokenIndex = s_mapNameToField (token);
4130 		if(!bTypeSet)
4131 		{
4132 			f->type = tokenIndex;
4133 			bTypeSet = true;
4134 		}
4135 
4136 		switch (tokenIndex)
4137 		{
4138 			case F_EDITTIME:
4139 			case F_TIME:
4140 				atts[1] = "time";
4141 				break;
4142 
4143 			case F_DateTimePicture:
4144 				//seems similar to a creation date
4145 				atts[1] = "meta_date";
4146 				break;
4147 
4148 			case F_DATE:
4149 				atts[1] = "date";
4150 				break;
4151 
4152 			case F_PAGE:
4153 				atts[1] = "page_number";
4154 				break;
4155 
4156 			case F_NUMCHARS:
4157 				atts[1] = "char_count";
4158 				break;
4159 
4160 			case F_NUMPAGES:
4161 				atts[1] = "page_count";
4162 				break;
4163 
4164 			case F_NUMWORDS:
4165 				atts[1] = "word_count";
4166 				break;
4167 
4168 			case F_FILENAME:
4169 				atts[1] = "file_name";
4170 				break;
4171 
4172 			case F_PAGEREF:
4173 				token = strtok (NULL, "\"\" ");
4174 				atts[1] = "page_ref";
4175 				atts[2] = "param";
4176 				if(token)
4177 					atts[3] = static_cast<const gchar *>(token);
4178 				else
4179 					atts[3] = "no_bookmark_given";
4180 				break;
4181 
4182 			case F_HYPERLINK:
4183 				{
4184 					const gchar *new_atts[3];
4185 					token = strtok (NULL, "\"\" ");
4186 
4187 					if(token) {
4188 					  // hyperlink or hyperlink to bookmark
4189 					  new_atts[0] = "xlink:href";
4190 					  UT_String href;
4191 					  if ( !strcmp(token, "\\l") )
4192 					    {
4193 					      token = strtok (NULL, "\"\" ");
4194 					      href = "#";
4195 					      href += token;
4196 					    }
4197 					  else
4198 					    {
4199 					      href = token;
4200 					    }
4201 					  new_atts[1] = href.c_str();
4202 					  new_atts[2] = 0;
4203 					  this->_flush();
4204 
4205 					  if(!m_bInPara)
4206 					    {
4207 					      _appendStrux(PTX_Block, NULL);
4208 					      m_bInPara = true ;
4209 					    }
4210 
4211 					  if(m_bInLink)
4212 					    {
4213 					      UT_ASSERT_HARMLESS(UT_SHOULD_NOT_HAPPEN);
4214 					      _appendObject(PTO_Hyperlink, NULL);
4215 					      m_bInLink = false;
4216 					    }
4217 
4218 					  _appendObject(PTO_Hyperlink, new_atts);
4219 					  m_bInLink = true;
4220 					}
4221 					return true;
4222 				}
4223 
4224 			case F_TOC:             // for the toc fields we will
4225 			case F_TOC_FROM_RANGE:  // insert the field result for now
4226 				UT_DEBUGMSG(("TOC field encountered\n"));
4227 				m_bInTOC = true;
4228 				m_bTOCsupported = _isTOCsupported(f);
4229 
4230 			default:
4231 				// unhandled field type
4232 				token = strtok(NULL, "\t, ");
4233 				continue;
4234 		}
4235 
4236 
4237 		this->_flush();
4238 
4239 		if(!m_bInPara)
4240 		{
4241 			_appendStrux(PTX_Block, NULL);
4242 			m_bInPara = true ;
4243 		}
4244 
4245 		if (!_appendObject (PTO_Field, static_cast<const gchar**>(&atts[0])))
4246 		{
4247 			UT_DEBUGMSG(("Dom: couldn't append field (type = '%s')\n", atts[1]));
4248 		}
4249 
4250 		token = strtok(NULL, "\t, ");
4251 	}
4252 
4253 	return true;
4254 }
4255 
4256 typedef enum {
4257   MSWord_UnknownImage,
4258   MSWord_VectorImage,
4259   MSWord_RasterImage
4260 } MSWord_ImageType;
4261 
s_determineImageType(Blip * b)4262 static MSWord_ImageType s_determineImageType ( Blip * b )
4263 {
4264   if ( !b )
4265 	return MSWord_UnknownImage;
4266 
4267   switch ( b->type )
4268 	{
4269 	case msoblipEMF:
4270 	case msoblipWMF:
4271 	case msoblipPICT:
4272 	  return MSWord_VectorImage;
4273 
4274 	case msoblipJPEG:
4275 	case msoblipPNG:
4276 	case msoblipDIB:
4277 	  return MSWord_RasterImage;
4278 
4279 	case msoblipERROR:
4280 	case msoblipUNKNOWN:
4281 	default:
4282 	  return MSWord_UnknownImage;
4283 	}
4284 }
4285 
s_determineIEGFT(Blip * b)4286 static IEGraphicFileType s_determineIEGFT ( Blip * b )
4287 {
4288 	if ( !b )
4289 		return IEGFT_Unknown;
4290 
4291 	switch ( b->type )
4292 	{
4293 	case msoblipEMF:
4294 		return IEGFT_EMF;
4295 	case msoblipWMF:
4296 		return IEGFT_WMF;
4297 
4298 	case msoblipJPEG:
4299 		return IEGFT_JPEG;
4300 	case msoblipPNG:
4301 		return IEGFT_PNG;
4302 	case msoblipDIB:
4303 		return IEGFT_DIB;
4304 
4305 	case msoblipPICT:
4306 	case msoblipERROR:
4307 	case msoblipUNKNOWN:
4308 	default:
4309 		return IEGFT_Unknown;
4310 	}
4311 }
4312 
4313 
4314 
_handleImage(Blip * b,long width,long height,long cropt,long cropb,long cropl,long cropr)4315 UT_Error IE_Imp_MsWord_97::_handleImage (Blip * b, long width, long height, long cropt, long cropb, long cropl, long cropr)
4316 {
4317 	FG_Graphic* pFG		= 0;
4318 	UT_Error error		= UT_OK;
4319 	const UT_ByteBuf * buf		= 0;
4320 
4321 	UT_String propBuffer;
4322 	UT_String propsName;
4323 
4324 	// suck the data into the ByteBuffer
4325 
4326 	MSWord_ImageType imgType = s_determineImageType ( b );
4327 	IEGraphicFileType iegft = s_determineIEGFT( b );
4328 
4329 	wvStream *pwv;
4330 	bool decompress = false;
4331 
4332 	if ( imgType == MSWord_RasterImage )
4333 	{
4334 		pwv = b->blip.bitmap.m_pvBits;
4335 
4336 	}
4337 	else if ( imgType == MSWord_VectorImage )
4338 	{
4339 		pwv = b->blip.metafile.m_pvBits;
4340 		decompress = (b->blip.metafile.m_fCompression == msocompressionDeflate);
4341 	}
4342 	else
4343 	{
4344 		UT_DEBUGMSG(("UNKNOWN IMAGE TYPE!!"));
4345 		return UT_ERROR;
4346 	}
4347 
4348 	size_t size = wvStream_size (pwv);
4349 	char *data = new char[size];
4350 	wvStream_rewind(pwv);
4351 	wvStream_read(data,size,sizeof(char),pwv);
4352 
4353 	UT_ByteBuf pictData;
4354 	if (decompress)
4355 	{
4356 
4357 		unsigned long uncomprLen, comprLen;
4358 		comprLen = size;
4359 		uncomprLen = b->blip.metafile.m_cb;
4360 		Bytef *uncompr = new Bytef[uncomprLen];
4361 		int err = uncompress (uncompr, &uncomprLen, reinterpret_cast<const unsigned char *>(data), comprLen);
4362 		if (err != Z_OK)
4363 		{
4364 			UT_DEBUGMSG(("Could not uncompress image\n"));
4365 			DELETEP(uncompr);
4366 			goto Cleanup;
4367 		}
4368 		pictData.append(reinterpret_cast<const UT_Byte*>(uncompr), uncomprLen);
4369 		DELETEPV(uncompr);
4370 	}
4371 	else
4372 	{
4373 		pictData.append(reinterpret_cast<const UT_Byte*>(data), size);
4374 	}
4375 
4376 	delete [] data;
4377 
4378 	if(!pictData.getPointer(0))
4379 		error =  UT_ERROR;
4380 	else
4381 		error = IE_ImpGraphic::loadGraphic (pictData, iegft, &pFG);
4382 
4383 	if ((error != UT_OK) || !pFG)
4384 	{
4385 		UT_DEBUGMSG(("Could not import graphic\n"));
4386 		goto Cleanup;
4387 	}
4388 
4389 	buf = pFG->getBuffer();
4390 
4391 	if (!buf)
4392 	{
4393 		// i don't think that this could ever happen, but...
4394 		UT_DEBUGMSG(("Could not convert to PNG\n"));
4395 		error = UT_ERROR;
4396 		goto Cleanup;
4397 	}
4398 
4399 	//
4400 	// This next bit of code will set up our properties based on the image attributes
4401 	//
4402 
4403 	{
4404 		UT_LocaleTransactor t(LC_NUMERIC, "C");
4405 		UT_String_sprintf(propBuffer, "width:%fin; height:%fin; cropt:%fin; cropb:%fin; cropl:%fin; cropr:%fin",
4406 						  static_cast<double>(width) / static_cast<double>(1440),
4407 						  static_cast<double>(height) / static_cast<double>(1440),
4408 						  static_cast<double>(cropt) / static_cast<double>(1440),
4409 						  static_cast<double>(cropb) / static_cast<double>(1440),
4410 						  static_cast<double>(cropl) / static_cast<double>(1440),
4411 						  static_cast<double>(cropr) / static_cast<double>(1440));
4412 	}
4413 
4414 	UT_String_sprintf(propsName, "%d", getDoc()->getUID(UT_UniqueId::Image));
4415 
4416 	const gchar* propsArray[5];
4417 	propsArray[0] = "props";
4418 	propsArray[1] = propBuffer.c_str();
4419 	propsArray[2] = "dataid";
4420 	propsArray[3] = propsName.c_str();
4421 	propsArray[4] = 0;
4422 
4423 	if (!_ensureInBlock())
4424 	{
4425 		UT_DEBUGMSG (("_ensureInBlock() failed\n"));
4426 		error = UT_ERROR;
4427 		goto Cleanup;
4428 	}
4429 
4430 	if (!_appendObject (PTO_Image, propsArray))
4431 	{
4432 		UT_DEBUGMSG (("Could not create append object\n"));
4433 		error = UT_ERROR;
4434 		goto Cleanup;
4435 	}
4436 
4437 	if (!getDoc()->createDataItem(propsName.c_str(), false,
4438 								  buf, pFG->getMimeType(), NULL))
4439 	{
4440 		UT_DEBUGMSG (("Could not create data item\n"));
4441 		// the mimetype is sunk anyway
4442 		error = UT_ERROR;
4443 		goto Cleanup;
4444 	}
4445 
4446 Cleanup:
4447 	DELETEP(pFG);
4448 
4449 	return error;
4450 }
4451 
4452 
4453 
4454 /*!
4455  * This method imports an image that can be later used as an embedded object.
4456  * The Blip pointer p contains the MS Word data we use to create the image
4457  * "width" and "height" are the width and height of the object in inches.
4458  * The routine returns the name of the data-item it creates is in the
4459  * UT_UTF8String sImageName
4460  */
_handlePositionedImage(Blip * b,UT_String & sImageName)4461 UT_Error IE_Imp_MsWord_97::_handlePositionedImage (Blip * b, UT_String & sImageName)
4462 {
4463 	FG_Graphic* pFG		= 0;
4464 	UT_Error error		= UT_OK;
4465 	const UT_ByteBuf * buf		= 0;
4466 
4467   // suck the data into the ByteBuffer
4468 
4469   MSWord_ImageType imgType = s_determineImageType ( b );
4470 
4471   wvStream *pwv;
4472   bool decompress = false;
4473 
4474   if ( imgType == MSWord_RasterImage )
4475 	{
4476 	  pwv = b->blip.bitmap.m_pvBits;
4477 
4478 	}
4479   else if ( imgType == MSWord_VectorImage )
4480 	{
4481 	  pwv = b->blip.metafile.m_pvBits;
4482 	  decompress = (b->blip.metafile.m_fCompression == msocompressionDeflate);
4483 	}
4484   else
4485 	{
4486 	  UT_DEBUGMSG(("UNKNOWN IMAGE TYPE!!"));
4487 	  return UT_ERROR;
4488 	}
4489 
4490   size_t size = wvStream_size (pwv);
4491   char *data = new char[size];
4492   wvStream_rewind(pwv);
4493   wvStream_read(data,size,sizeof(char),pwv);
4494 
4495   UT_ByteBuf pictData;
4496 
4497   if (decompress)
4498   {
4499 
4500     unsigned long uncomprLen, comprLen;
4501     comprLen = size;
4502     uncomprLen = b->blip.metafile.m_cb;
4503     Bytef *uncompr = new Bytef[uncomprLen];
4504     int err = uncompress (uncompr, &uncomprLen, reinterpret_cast<const unsigned char *>(data), comprLen);
4505     if (err != Z_OK)
4506       {
4507 	UT_DEBUGMSG(("Could not uncompress image\n"));
4508         DELETEP(uncompr);
4509 	goto Cleanup;
4510       }
4511       pictData.append(reinterpret_cast<const UT_Byte*>(uncompr), uncomprLen);
4512       DELETEPV(uncompr);
4513   }
4514   else
4515   {
4516     pictData.append(reinterpret_cast<const UT_Byte*>(data), size);
4517   }
4518 
4519   delete [] data;
4520 
4521   if(!pictData.getPointer(0))
4522 	  error =  UT_ERROR;
4523   else
4524 	  error = IE_ImpGraphic::loadGraphic (pictData, IEGFT_Unknown, &pFG);
4525 
4526   if ((error != UT_OK) || !pFG)
4527 	{
4528 	  UT_DEBUGMSG(("Could not import graphic\n"));
4529 	  goto Cleanup;
4530 	}
4531 
4532   // TODO: can we get back a vector graphic?
4533   buf = pFG->getBuffer();
4534 
4535   if (!buf)
4536 	{
4537 	  // i don't think that this could ever happen, but...
4538 	  UT_DEBUGMSG(("Could not convert to PNG\n"));
4539 	  error = UT_ERROR;
4540 	  goto Cleanup;
4541 	}
4542 
4543   UT_String_sprintf(sImageName, "%d", getDoc()->getUID(UT_UniqueId::Image));
4544 
4545   if (!getDoc()->createDataItem(sImageName.c_str(), false,
4546                                 buf, pFG->getMimeType(), NULL))
4547 	{
4548 	  UT_DEBUGMSG (("Could not create data item\n"));
4549 	  error = UT_ERROR;
4550 	  goto Cleanup;
4551 	}
4552 
4553  Cleanup:
4554   DELETEP(pFG);
4555 
4556   return error;
4557 }
4558 
4559 /****************************************************************************/
4560 /****************************************************************************/
4561 
4562 //
4563 // wv callbacks to marshall data back to our importer class
4564 //
4565 
charProc(wvParseStruct * ps,U16 eachchar,U8 chartype,U16 lid)4566 static int charProc (wvParseStruct *ps, U16 eachchar, U8 chartype, U16 lid)
4567 {
4568 	IE_Imp_MsWord_97 * pDocReader = static_cast <IE_Imp_MsWord_97 *> (ps->userData);
4569 	return pDocReader->_charProc (ps, eachchar, chartype, lid);
4570 }
4571 
specCharProc(wvParseStruct * ps,U16 eachchar,CHP * achp)4572 static int specCharProc (wvParseStruct *ps, U16 eachchar, CHP* achp)
4573 {
4574 	IE_Imp_MsWord_97 * pDocReader = static_cast <IE_Imp_MsWord_97 *> (ps->userData);
4575 	return pDocReader->_specCharProc (ps, eachchar, achp);
4576 }
4577 
eleProc(wvParseStruct * ps,wvTag tag,void * props,int dirty)4578 static int eleProc (wvParseStruct *ps, wvTag tag, void *props, int dirty)
4579 {
4580 	IE_Imp_MsWord_97 * pDocReader = static_cast <IE_Imp_MsWord_97 *> (ps->userData);
4581 	return pDocReader->_eleProc (ps, tag, props, dirty);
4582 }
4583 
docProc(wvParseStruct * ps,wvTag tag)4584 static int docProc (wvParseStruct *ps, wvTag tag)
4585 {
4586 	IE_Imp_MsWord_97 * pDocReader = static_cast <IE_Imp_MsWord_97 *> (ps->userData);
4587 	return pDocReader->_docProc (ps, tag);
4588 }
4589 
4590 
4591 //--------------------------------------------------------------------------/
4592 //--------------------------------------------------------------------------/
4593 
_table_open()4594 void IE_Imp_MsWord_97::_table_open ()
4595 {
4596   m_iCurrentRow = 0;
4597   m_iCurrentCell = 0;
4598 
4599   //  _appendStrux(PTX_Block, NULL); // Don't need/want this after 27/3/2005
4600   _appendStrux(PTX_SectionTable, NULL);
4601   m_vecColumnWidths.clear();
4602   m_bRowOpen = false;
4603   m_bCellOpen = false;
4604   m_bInPara = false;
4605 #ifdef DEBUG
4606   static UT_sint32 sTableCount = 0;
4607   sTableCount++;
4608 #endif
4609   UT_DEBUGMSG(("\n<TABLE> [%d]", sTableCount));
4610 
4611 }
4612 
4613 //--------------------------------------------------------------------------/
4614 //--------------------------------------------------------------------------/
4615 
4616 /*!
4617  * Exand a vector with zeros to make room for a new value
4618  */
setNumberVector(UT_NumberVector & vec,UT_sint32 i,UT_sint32 val)4619 void IE_Imp_MsWord_97::setNumberVector(UT_NumberVector & vec, UT_sint32 i, UT_sint32 val)
4620 {
4621 	while(i > static_cast<UT_sint32>(vec.size() +1))
4622 	{
4623 		vec.addItem(0);
4624 	}
4625 	vec.addItem(val); // we are sure that it will be appened at index i
4626 }
4627 
4628 /*!
4629  * This method parses the vector of MsColSpans held by m_vecColumnWidths
4630  * and fills the vector colWidths with the widths of the individual columns.
4631  *
4632  * We do this because MSWord provides the widths of column spans, and in
4633  * some cases you can get a table with no row fully partitioned into
4634  * individual cells.
4635  */
_build_ColumnWidths(UT_NumberVector & colWidths)4636 bool IE_Imp_MsWord_97::_build_ColumnWidths(UT_NumberVector & colWidths)
4637 {
4638 
4639 // OK handle the easy cases first and find the maximum value of iRight
4640 
4641 	UT_sint32 iMaxRight = 0;
4642 	UT_sint32 i = 0;
4643 	UT_sint32 iLeft,iRight = 0;
4644 	UT_sint32 iSize = static_cast<UT_sint32>(m_vecColumnWidths.size());
4645 	for(i=0; i< iSize;i++)
4646 	{
4647 		MsColSpan * pSpan = reinterpret_cast<MsColSpan *>(m_vecColumnWidths.getNthItem(i));
4648 		iLeft = pSpan->iLeft;
4649 		iRight = pSpan->iRight;
4650 		if(iMaxRight < iRight)
4651 		{
4652 			iMaxRight = iRight;
4653 		}
4654 		if((iLeft + 1) == iRight)
4655 		{
4656 			setNumberVector(colWidths,iLeft,pSpan->width);
4657 			xxx_UT_DEBUGMSG(("_build_ColumnWidths Initial set: Left %d Width %d \n",iLeft,colWidths[iLeft]));
4658 		}
4659 	}
4660 //
4661 // Look to see if we're finished now.
4662 //
4663 	if((colWidths.size() == iMaxRight) && _isVectorFull(colWidths))
4664 	{
4665 		return true;
4666 	}
4667 	if(colWidths.size() < iMaxRight)
4668 	{
4669 		setNumberVector(colWidths,iMaxRight -1,0);
4670 	}
4671 //
4672 // OK Now the hard part. Procede by scanning through the m_vecColWidths,
4673 // Looking for spans, at each span we look to see if we can break the span
4674 // into smaller pieces by subtracting a single span width.
4675 //
4676 // When we have a single column span we insert it in colWidths if colWidths
4677 // is empty at that point.
4678 //
4679 // We continue until colWidths is completely full.
4680 //
4681 	UT_uint32 iLoop = 0;
4682 	while(iLoop < 1000 && !_isVectorFull(colWidths))
4683 	{
4684 		for(i=0; i<static_cast<UT_sint32>(m_vecColumnWidths.size()); i++)
4685 		{
4686 			MsColSpan * pSpan = reinterpret_cast<MsColSpan *>(m_vecColumnWidths.getNthItem(i));
4687 			iLeft = pSpan->iLeft;
4688 			iRight = pSpan->iRight;
4689 			xxx_UT_DEBUGMSG(("Loop %d iLeft %d,iRight %d colWidth[iLeft] %d colWidth[iRight-1] %d\n",iLoop,iLeft,iRight,colWidths[iLeft],colWidths[iRight -1]));
4690 			if(iMaxRight < iRight)
4691 			{
4692 				iMaxRight = iRight;
4693 			}
4694 			if(((iLeft + 1) == iRight) && (colWidths[iLeft] == 0))
4695 			{
4696 				setNumberVector(colWidths,iLeft,pSpan->width);
4697 			}
4698 			else if((iLeft + 1) < iRight)
4699 			{
4700 				if(colWidths[iLeft] > 0)
4701 				{
4702 					if(!findMatchSpan(iLeft+1,iRight))
4703 					{
4704 						MsColSpan * pNewSpan = new MsColSpan();
4705 						pNewSpan->iLeft = iLeft+1;
4706 						pNewSpan->iRight = iRight;
4707 						pNewSpan->width = pSpan->width - colWidths[iLeft];
4708 						m_vecColumnWidths.addItem(pNewSpan);
4709 					}
4710 				}
4711 				else if(colWidths[iRight - 1] > 0)
4712 				{
4713 					if(!findMatchSpan(iLeft,iRight-1))
4714 					{
4715 						MsColSpan * pNewSpan = new MsColSpan();
4716 						pNewSpan->iLeft = iLeft;
4717 						pNewSpan->iRight = iRight-1;
4718 						pNewSpan->width = pSpan->width - colWidths[iRight-1];
4719 						m_vecColumnWidths.addItem(pNewSpan);
4720 					}
4721 				}
4722 //
4723 // OK now look to see if we can fragment this by substracting a span of more
4724 // than one column from either end.
4725 //
4726 				else
4727 				{
4728 					UT_sint32 k =0;
4729 					for(k=0; k<static_cast<UT_sint32>(m_vecColumnWidths.size()); k++)
4730 					{
4731 						MsColSpan * pMulSpan = m_vecColumnWidths.getNthItem(i);
4732 						UT_sint32 iMulLeft = pMulSpan->iLeft;
4733 						UT_sint32 iMulRight = pMulSpan->iRight;
4734 						if(iMulLeft == iLeft && iMulRight < iRight)
4735 						{
4736 //
4737 // Make a new span fragment out of the bit greater than MulRight if one doesn't
4738 // exist
4739 //
4740 							if(!findMatchSpan(iMulRight+1,iRight))
4741 							{
4742 								MsColSpan * pNewSpan = new MsColSpan();
4743 								pNewSpan->iLeft = iMulRight+1;
4744 								pNewSpan->iRight = iRight;
4745 								pNewSpan->width = pSpan->width - pMulSpan->width;
4746 								m_vecColumnWidths.addItem(pNewSpan);
4747 							}
4748 
4749 						}
4750 						else if (iMulLeft > iLeft && iMulRight == iRight)
4751 						{
4752 //
4753 // Make a new span fragment out of the bit less than MulLeft
4754 //
4755 							if(!findMatchSpan(iLeft,iMulLeft))
4756 							{
4757 								MsColSpan * pNewSpan = new MsColSpan();
4758 								pNewSpan->iLeft = iLeft;
4759 								pNewSpan->iRight = iMulLeft;
4760 								pNewSpan->width = pSpan->width - pMulSpan->width;
4761 								m_vecColumnWidths.addItem(pNewSpan);
4762 							}
4763 						}
4764 					}
4765 				}
4766 			}
4767 		}
4768 		iLoop++;
4769 		UT_ASSERT_HARMLESS(0);
4770 	}
4771 	UT_ASSERT_HARMLESS(iLoop < 1000);
4772 	return (iLoop < 1000);
4773 }
4774 
4775 /*!
4776  * Returns true if a span in the m_vecColumnWidths span matches the left, right
4777  * values given
4778  */
findMatchSpan(UT_sint32 iLeft,UT_sint32 iRight)4779 bool IE_Imp_MsWord_97::findMatchSpan(UT_sint32 iLeft,UT_sint32 iRight)
4780 {
4781 	UT_sint32 i =0;
4782 	for(i=0; i< static_cast<UT_sint32>(m_vecColumnWidths.size());i++)
4783 	{
4784 		MsColSpan * pSpan = m_vecColumnWidths.getNthItem(i);
4785 		if(pSpan->iLeft == iLeft && pSpan->iRight == iRight)
4786 		{
4787 			return true;
4788 		}
4789 	}
4790 	return false;
4791 }
4792 
4793 /*!
4794  * Returns false if any element in the vector is non-zero
4795  */
_isVectorFull(UT_NumberVector & vec)4796 bool IE_Imp_MsWord_97::_isVectorFull(UT_NumberVector & vec)
4797 {
4798 	UT_sint32 i = 0;
4799 	for(i=0;i< vec.size() ; i++)
4800 	{
4801 		xxx_UT_DEBUGMSG(("isVectorFull i %d val %d \n",i,vec[i]));
4802 		if( vec[i] == 0)
4803 		{
4804 			return false;
4805 			break;
4806 		}
4807 	}
4808 	return true;
4809 }
4810 
_table_close(const wvParseStruct *,const PAP * apap)4811 void IE_Imp_MsWord_97::_table_close (const wvParseStruct * /*ps*/, const PAP *apap)
4812 {
4813   _cell_close();
4814   _row_close();
4815 
4816   UT_String props("table-column-props:");
4817   UT_String propBuffer;
4818 
4819   if (m_vecColumnWidths.size() > 0)
4820   {
4821 	  // build column width properties string
4822 	  UT_NumberVector colWidths;
4823 //
4824 // Some tables maybe too complicated for my simple algorithim to work out
4825 //
4826 	  if(_build_ColumnWidths(colWidths))
4827 	  {
4828 
4829 		  for (UT_sint32 i = 0; i < colWidths.size(); i++)
4830 		  {
4831 			  UT_String_sprintf(propBuffer,"%s/",
4832 							UT_convertInchesToDimensionString(m_dim,
4833 															  (static_cast<double>(colWidths.getNthItem(i)))/1440.0));
4834 
4835 			  props += propBuffer;
4836 		  }
4837 	  }
4838 
4839 	  props += "; ";
4840 //
4841 // FIXME: Put in left position here!!!!
4842 //
4843 	  UT_String_sprintf(propBuffer,"table-column-leftpos:%s; ",
4844 							UT_convertInchesToDimensionString(m_dim,
4845 															  (static_cast<double>(m_iLeftCellPos)/1440.0)));
4846 	  props += propBuffer;
4847 	  UT_VECTOR_PURGEALL(MsColSpan *,m_vecColumnWidths);
4848 	  m_vecColumnWidths.clear ();
4849   }
4850 
4851   props += "table-line-ignore:0; table-line-type:1; table-line-thickness:0.8pt;";
4852   if(apap->ptap.dxaGapHalf > 0)
4853   {
4854 	  props += UT_String_sprintf("table-col-spacing:%din", (2 * apap->ptap.dxaGapHalf)/ 1440);
4855   }
4856   else
4857   {
4858 	  props += "table-col-spacing:0.03in";
4859   }
4860   // apply properties
4861   PT_DocPosition posEnd =0;
4862   getDoc()->getBounds(true,posEnd); // clean frags!
4863   pf_Frag_Strux* sdh = getDoc()->getLastStruxOfType(PTX_SectionTable);
4864   getDoc()->changeStruxAttsNoUpdate(sdh,"props",props.c_str());
4865 
4866   // end-of-table
4867   _appendStrux(PTX_EndTable, NULL);
4868   m_bInPara = false ;
4869 
4870   UT_DEBUGMSG(("\n</TABLE>\n"));
4871 }
4872 
4873 //--------------------------------------------------------------------------/
4874 //--------------------------------------------------------------------------/
4875 
_row_open(const wvParseStruct * ps)4876 void IE_Imp_MsWord_97::_row_open (const wvParseStruct *ps)
4877 {
4878   if (m_bRowOpen)
4879     return;
4880 
4881   if (m_iCurrentRow > ps->norows) {
4882 	  //UT_ASSERT(m_iCurrentRow <= ps->norows);
4883 	  return;
4884   }
4885 
4886   m_bRowOpen = true;
4887   m_iCurrentRow++;
4888   xxx_UT_DEBUGMSG(("imp_MsWord: _row_open: Last Left %d Last Right %d \n",m_iLeft,m_iRight));
4889   m_iCurrentCell = 0;
4890   m_iLeft = 0;
4891   m_iRight = 0;
4892   xxx_UT_DEBUGMSG(("\n\t<ROW:%d>", m_iCurrentRow));
4893 }
4894 
4895 //--------------------------------------------------------------------------/
4896 //--------------------------------------------------------------------------/
4897 
_row_close()4898 void IE_Imp_MsWord_97::_row_close ()
4899 {
4900   if (m_bRowOpen) {
4901     xxx_UT_DEBUGMSG(("\t</ROW>"));
4902   }
4903   m_bRowOpen = false;
4904 }
4905 
4906 //--------------------------------------------------------------------------/
4907 //--------------------------------------------------------------------------/
4908 
4909 // from fp_TableContainer.h
4910 enum
4911 {
4912   LS_OFF = 0,	        // No line style, which means no line is drawn
4913   LS_NORMAL = 1 	// A normal solid line
4914 };
4915 
4916 static int
sConvertLineStyle(short lineType)4917 sConvertLineStyle (short lineType)
4918 {
4919   switch (lineType)
4920     {
4921     case 0: return LS_NORMAL;
4922     case 1:
4923       return LS_NORMAL;
4924 
4925       // TODO: more cases here
4926     default:
4927       return LS_NORMAL;
4928     }
4929 }
4930 
4931 static double
brc_to_pixel(int x)4932 brc_to_pixel (int x)
4933 {
4934   // each unit is 1/8 of a pixel. abi only deals with whole numbers,
4935   if(x == 255)
4936     return  0.;
4937   return x/8.;
4938 }
4939 
_cell_open(const wvParseStruct * ps,const PAP * apap)4940 void IE_Imp_MsWord_97::_cell_open (const wvParseStruct *ps, const PAP *apap)
4941 {
4942   if (m_bCellOpen || apap->fTtp)
4943     return;
4944 
4945   if (!m_bRowOpen || m_iCurrentRow > ps->norows) {
4946 	  //UT_ASSERT(m_bRowOpen || m_iCurrentRow <= ps->norows);
4947 	  return;
4948   }
4949 
4950   UT_Vector columnWidths;
4951   UT_sint32 vspan = 0;
4952   UT_String propBuffer;
4953 
4954   const gchar* propsArray[3];
4955   propsArray[0] = static_cast<const gchar*>("props");
4956   propsArray[1] = "";
4957   propsArray[2] = NULL;
4958 
4959 
4960 #if 0
4961   if(m_iCurrentCell >= apap->ptap.itcMac)
4962   {
4963 	  // this happens when the row contains no cell definitions; we
4964 	  // need to insert a dummy cell into our row
4965 	  goto do_insert;
4966   }
4967 #endif
4968 
4969   // add a new cell
4970   m_bCellOpen = true;
4971   if(m_iCurrentCell == 0)
4972   {
4973 //
4974 // Scan the differences in centers for this row so we can work out the column
4975 // widths of the table eventually.
4976 //
4977 	  m_iLeftCellPos = 0;
4978 	  UT_sint32 iLeft, iRight, i;
4979 	  m_iLeftCellPos = ps->cellbounds[0];
4980 	  for(i = 0; i < ps->nocellbounds-1; i++)
4981 	  {
4982 		  iLeft = i;
4983 		  iRight = i+1;
4984 		  UT_sint32 width = ps->cellbounds[iRight] - ps->cellbounds[iLeft];
4985 		  if (width <= 0)
4986 			  break;
4987 		  MsColSpan * pSpan = new MsColSpan();
4988 		  pSpan->iLeft = iLeft;
4989 		  pSpan->iRight = iRight;
4990 		  pSpan->width = width;
4991 		  xxx_UT_DEBUGMSG(("MsImport iLeft %d  iRight %d width  %d \n",iLeft,iRight,width));
4992 		  m_vecColumnWidths.addItem(pSpan);
4993 	  }
4994   }
4995 
4996   if (ps->vmerges && ps->vmerges[m_iCurrentRow - 1])
4997     vspan = ps->vmerges[m_iCurrentRow - 1][m_iCurrentCell];
4998 
4999   if (vspan > 0)
5000     vspan--;
5001 
5002   m_iRight = m_iLeft + m_vecColumnSpansForCurrentRow.getNthItem(m_iCurrentCell);
5003   if(m_iRight == m_iLeft)
5004   {
5005 	  m_iRight++;
5006   }
5007   xxx_UT_DEBUGMSG(("MSWord Import:  iLeft %d iRight %d m_iCurrentCell %d \n",m_iLeft,m_iRight,m_iCurrentCell));
5008   UT_return_if_fail(vspan >= 0);
5009   UT_String_sprintf(propBuffer,
5010 		    "left-attach:%d; right-attach:%d; top-attach:%d; bot-attach:%d; ",
5011 		    m_iLeft,
5012 		    m_iRight,
5013 		    m_iCurrentRow - 1,
5014 		    m_iCurrentRow + vspan
5015 		    );
5016 
5017   if(apap->ptap.dyaRowHeight < 0)
5018   {
5019 	  // absolute height
5020 	  double dHin = -(apap->ptap.dyaRowHeight/1440);
5021 	  propBuffer += UT_String_sprintf("height:%fin;",dHin);
5022   }
5023   else if(apap->ptap.dyaRowHeight > 0)
5024   {
5025 	  // at-least height -- I do not think we support this for now
5026 	  // double dHin = -(apap->ptap.dyaRowHeight/1440);
5027 	  // propBuffer += UT_String_sprintf("height:%fin;",dHin);
5028   }
5029   else
5030   {
5031 	  // auto height, do nothing
5032   }
5033 
5034   propBuffer += UT_String_sprintf("color:%s;", sMapIcoToColor(apap->ptap.rgshd[m_iCurrentCell].icoFore, true).c_str());
5035   propBuffer += UT_String_sprintf("background-color:%s;", sMapIcoToColor(apap->ptap.rgshd[m_iCurrentCell].icoBack, false).c_str());
5036   // so long as it's not the "auto" color
5037   if (apap->ptap.rgshd[m_iCurrentCell].icoBack != 0)
5038     propBuffer += "bg-style:1;";
5039 
5040   {
5041 	  UT_LocaleTransactor t(LC_NUMERIC, "C");
5042 	  propBuffer += UT_String_sprintf("top-color:%s; top-thickness:%fpt; top-style:%d;",
5043 									  sMapIcoToColor(apap->ptap.rgtc[m_iCurrentCell].brcTop.ico, true).c_str(),
5044 									  brc_to_pixel(apap->ptap.rgtc[m_iCurrentCell].brcTop.dptLineWidth),
5045 									  sConvertLineStyle(apap->ptap.rgtc[m_iCurrentCell].brcTop.brcType));
5046 	  propBuffer += UT_String_sprintf("left-color:%s; left-thickness:%fpx; left-style:%d;",
5047 									  sMapIcoToColor(apap->ptap.rgtc[m_iCurrentCell].brcLeft.ico, true).c_str(),
5048 									  brc_to_pixel(apap->ptap.rgtc[m_iCurrentCell].brcLeft.dptLineWidth),
5049 									  sConvertLineStyle(apap->ptap.rgtc[m_iCurrentCell].brcLeft.brcType));
5050 	  propBuffer += UT_String_sprintf("bot-color:%s; bot-thickness:%fpx; bot-style:%d;",
5051 									  sMapIcoToColor(apap->ptap.rgtc[m_iCurrentCell].brcBottom.ico, true).c_str(),
5052 									  brc_to_pixel(apap->ptap.rgtc[m_iCurrentCell].brcBottom.dptLineWidth),
5053 									  sConvertLineStyle(apap->ptap.rgtc[m_iCurrentCell].brcBottom.brcType));
5054 	  propBuffer += UT_String_sprintf("right-color:%s; right-thickness:%fpx; right-style:%d",
5055 									  sMapIcoToColor(apap->ptap.rgtc[m_iCurrentCell].brcRight.ico, true).c_str(),
5056 									  brc_to_pixel(apap->ptap.rgtc[m_iCurrentCell].brcRight.dptLineWidth),
5057 									  sConvertLineStyle(apap->ptap.rgtc[m_iCurrentCell].brcRight.brcType));
5058   }
5059   xxx_UT_DEBUGMSG(("propbuffer: %s \n",propBuffer.c_str()));
5060 
5061   propsArray[1] = propBuffer.c_str();
5062 
5063   // do_insert:
5064   _appendStrux(PTX_SectionCell, propsArray);
5065   m_bInPara = false;
5066   m_iCurrentCell++;
5067   m_iLeft = m_iRight;
5068   xxx_UT_DEBUGMSG(("\t<CELL:%d:%d>", static_cast<int>(m_vecColumnSpansForCurrentRow.getNthItem(m_iCurrentCell - 1)), ps->vmerges[m_iCurrentRow - 1][m_iCurrentCell - 1]));
5069 }
5070 
5071 //--------------------------------------------------------------------------/
5072 //--------------------------------------------------------------------------/
5073 
_cell_close()5074 void IE_Imp_MsWord_97::_cell_close ()
5075 {
5076   if (!m_bCellOpen)
5077     return;
5078 
5079   m_bCellOpen = false;
5080   _appendStrux(PTX_EndCell, NULL);
5081   m_bInPara = false ;
5082 
5083   xxx_UT_DEBUGMSG(("</CELL>"));
5084 }
5085 
5086 
_generateCharProps(UT_String & s,const CHP * achp,wvParseStruct * ps)5087 void IE_Imp_MsWord_97::_generateCharProps(UT_String &s, const CHP * achp, wvParseStruct *ps)
5088 {
5089 	UT_String propBuffer;
5090 
5091 	// set char tolower if fSmallCaps && fLowerCase
5092 	if ( achp->fSmallCaps && achp->fLowerCase )
5093 		m_bIsLower = true;
5094 	else
5095 		m_bIsLower = false;
5096 
5097 	// set language based the lid - TODO: do we want to handle -none- differently?
5098 	s += "lang:";
5099 
5100 	unsigned short iLid = 0;
5101 	// I am not sure how the various lids are supposed to work, but
5102 	// achp->fBidi does not mean that the lidBidi is set ...
5103 	if (achp->fBidi)
5104 	{
5105 		iLid = achp->lidBidi;
5106 	}
5107 	else if(ps->fib.fFarEast)
5108 	{
5109 		iLid = achp->lidFE;
5110 	}
5111 	else
5112 	{
5113 		iLid = achp->lid;
5114 	}
5115 
5116 
5117 	// if we do not have meaningful lid, try default ...
5118 	if(!iLid)
5119 		iLid = achp->lidDefault;
5120 
5121 	s += wvLIDToLangConverter (iLid);
5122 	s += ";";
5123 
5124 	// decide best codepage based on the lid (as lang code above)
5125 	UT_String codepage;
5126 	if (achp->fBidi)
5127 		codepage = wvLIDToCodePageConverter (achp->lidBidi);
5128 	else if (!ps->fib.fFarEast)
5129 		codepage = wvLIDToCodePageConverter (achp->lidDefault);
5130 	else
5131 		codepage = wvLIDToCodePageConverter (achp->lidFE);
5132 
5133 	// watch out for codepage 0 = unicode
5134 	const char * pNUE = XAP_EncodingManager::get_instance()->getNativeUnicodeEncodingName();
5135 
5136 	if (codepage == "CP0")
5137 		codepage = pNUE;
5138 
5139 	// if this is the first codepage we've seen, use it.
5140 	// if we see more than one different codepage in a document, use unicode.
5141 	if (!getDoc()->getEncodingName())
5142 		getDoc()->setEncodingName(codepage.c_str());
5143 	else if (getDoc()->getEncodingName() != codepage)
5144 		getDoc()->setEncodingName(pNUE);
5145 
5146 	// bold text
5147 	bool fBold = (achp->fBidi ? achp->fBoldBidi : achp->fBold);
5148 	if (fBold) {
5149 		s += "font-weight:bold;";
5150 	}
5151 
5152 	// italic text
5153 	bool fItalic = (achp->fBidi ? achp->fItalicBidi : achp->fItalic);
5154 	if (fItalic) {
5155 		s += "font-style:italic;";
5156 	}
5157 
5158 	// foreground color
5159 	U8 ico = (achp->fBidi ? achp->icoBidi : achp->ico);
5160 	if (ico) {
5161 		UT_String_sprintf(propBuffer, "color:%s;",
5162 						  sMapIcoToColor(ico, true).c_str());
5163 		s += propBuffer;
5164 	}
5165 
5166 	// background color
5167 	ico = achp->shd.icoBack;
5168 	if (ico) {
5169 		if (!achp->fHighlight) {
5170 			// HACK: We don't support borders and shading yet, so it seems safe to use the background
5171 			// color as a substitute when there's no true highlight color (see the doc from Bug 6432)
5172 			UT_String_sprintf(propBuffer, "bgcolor:%s;",
5173 							  sMapIcoToColor(ico, false).c_str());
5174 		} else {
5175 			// Note: This property won't be rendered until we have borders and shading support
5176 			UT_String_sprintf(propBuffer, "background-color:%s;",
5177 							  sMapIcoToColor(ico, false).c_str());
5178 		}
5179 		s += propBuffer;
5180 	}
5181 
5182 
5183 	// underline and strike-through
5184 	if (achp->fStrike || achp->kul) {
5185 		s += "text-decoration:";
5186 		if ((achp->fStrike || achp->fDStrike) && achp->kul) {
5187 			s += "underline line-through;";
5188 		} else if (achp->kul) {
5189 			s += "underline;";
5190 		} else {
5191 			s += "line-through;";
5192 		}
5193 	}
5194 
5195 	// background color
5196 	if (achp->fHighlight) {
5197 		UT_String_sprintf(propBuffer,"bgcolor:%s;",
5198 						  sMapIcoToColor(achp->icoHighlight, false).c_str());
5199 		s += propBuffer;
5200 	}
5201 
5202 	// superscript && subscript
5203 	if (achp->iss == 1) {
5204 		s += "text-position: superscript;";
5205 	} else if (achp->iss == 2) {
5206 		s += "text-position: subscript;";
5207 	}
5208 
5209 	if (achp->fVanish)
5210 	{
5211 	    s += "display:none;";
5212 	}
5213 
5214 	// font size (hps is half-points)
5215 	// I have seen a bidi doc that had hpsBidi == 0, and the actual size in hps
5216 	U16 hps = (achp->fBidi &&  achp->hpsBidi ? achp->hpsBidi : achp->hps);
5217 	UT_String_sprintf(propBuffer,
5218 					  "font-size:%dpt;", (int)(hps/2));
5219 	s += propBuffer;
5220 
5221 	// font family
5222 	char *fname;
5223 
5224 	// if the FarEast flag is set, use the FarEast font,
5225 	// otherwise, we'll use the ASCII font.
5226 	if(achp->xchSym)
5227 	{
5228 		fname = wvGetFontnameFromCode(&ps->fonts, achp->ftcSym);
5229 	}
5230 	else if (achp->fBidi)
5231 	{
5232 		fname = wvGetFontnameFromCode(&ps->fonts, achp->ftcBidi);
5233 	}
5234 	else if (!ps->fib.fFarEast)
5235 	{
5236 		fname = wvGetFontnameFromCode(&ps->fonts, achp->ftcAscii);
5237 	}
5238 	else
5239 	{
5240 		fname = wvGetFontnameFromCode(&ps->fonts, achp->ftcFE);
5241 	}
5242 
5243 	// there are times when we should use the third, Other font,
5244 	// and the logic to know when somehow depends on the
5245 	// character sets or encoding types? it's in the docs.
5246 
5247 	UT_ASSERT_HARMLESS(fname != NULL);
5248 	xxx_UT_DEBUGMSG(("font-family = %s\n", fname));
5249 
5250 	s += "font-family:";
5251 
5252 	if(fname)
5253 		s += fname;
5254 	else
5255 		s += "Times New Roman";
5256 	FREEP(fname);
5257 }
5258 
_generateParaProps(UT_String & s,const PAP * apap,wvParseStruct *)5259 void IE_Imp_MsWord_97::_generateParaProps(UT_String &s, const PAP * apap, wvParseStruct * /*ps*/)
5260 {
5261 	UT_String propBuffer;
5262 
5263 	// DOM TODO: i think that this is right
5264 	if (apap->fBidi == 1)
5265 	{
5266 		s += "dom-dir:rtl;";
5267 	}
5268 	else
5269 	{
5270 		s += "dom-dir:ltr;";
5271 	}
5272 
5273 	// paragraph alignment/justification
5274 	switch(apap->jc)
5275 	{
5276 		case 0:
5277 			s += "text-align:left;";
5278 			break;
5279 		case 1:
5280 			s += "text-align:center;";
5281 			break;
5282 		case 2:
5283 			s += "text-align:right;";
5284 			break;
5285 		case 3:
5286 			s += "text-align:justify;";
5287 			break;
5288 		case 4:
5289 			/* this type of justification is of unknown purpose and is
5290 			 * undocumented , but it shows up in asian documents so someone
5291 			 * should be able to tell me what it is someday
5292 			 */
5293 			s += "text-align:justify;";
5294 			break;
5295 	}
5296 
5297 	// keep paragraph together?
5298 	if (apap->fKeep) {
5299 		s += "keep-together:yes;";
5300 	}
5301 
5302 	// keep with next paragraph?
5303 	if (apap->fKeepFollow) {
5304 		s += "keep-with-next:yes;";
5305 	}
5306 
5307 	// widowed/orphaned lines
5308 	if (!apap->fWidowControl) {
5309 		// these AbiWord properties give the same effect
5310 		s += "orphans:0;widows:0;";
5311 	}
5312 
5313 	// line spacing (single-spaced, double-spaced, etc.)
5314 	if (apap->lspd.fMultLinespace) {
5315 		UT_String_sprintf(propBuffer,
5316 						  "line-height:%s;",
5317 						  UT_convertToDimensionlessString( (static_cast<double>(apap->lspd.dyaLine) / 240), "1.1"));
5318 		s += propBuffer;
5319 	} else {
5320 		// TODO: handle exact line heights
5321 	}
5322 
5323 	//
5324 	// margins
5325 	//
5326 
5327 	// margin-right
5328 	if (apap->dxaRight) {
5329 		UT_String_sprintf(propBuffer,
5330 						  "margin-right:%s;",
5331 						  UT_convertInchesToDimensionString(m_dim, (static_cast<double>(apap->dxaRight) / 1440)));
5332 		s += propBuffer;
5333 	}
5334 
5335 	// margin-left
5336 	if (apap->dxaLeft) {
5337 		UT_String_sprintf(propBuffer,
5338 						  "margin-left:%s;",
5339 						  UT_convertInchesToDimensionString(m_dim, (static_cast<double>(apap->dxaLeft) / 1440)));
5340 		s += propBuffer;
5341 	}
5342 
5343 	// margin-left first line (indent)
5344 	if (apap->dxaLeft1) {
5345 		UT_String_sprintf(propBuffer,
5346 						  "text-indent:%s;",
5347 						  UT_convertInchesToDimensionString(m_dim, (static_cast<double>(apap->dxaLeft1) / 1440)));
5348 		s += propBuffer;
5349 	}
5350 
5351 	// margin-top
5352 	if (apap->dyaBefore) {
5353 		UT_String_sprintf(propBuffer,
5354 						  "margin-top:%dpt;", (apap->dyaBefore / 20));
5355 		s += propBuffer;
5356 	}
5357 
5358 	// margin-bottom
5359 	if (apap->dyaAfter) {
5360 		UT_String_sprintf(propBuffer,
5361 						  "margin-bottom:%dpt;", (apap->dyaAfter / 20));
5362 		s += propBuffer;
5363 	}
5364 
5365 	// tab stops
5366 	if (apap->itbdMac) {
5367 		propBuffer += "tabstops:";
5368 
5369 		for (int iTab = 0; iTab < apap->itbdMac; iTab++) {
5370 			propBuffer += UT_String_sprintf("%s/",
5371 						UT_convertInchesToDimensionString(m_dim,
5372 										((static_cast<double>(apap->rgdxaTab[iTab])) / 1440)));
5373 
5374 			switch (apap->rgtbd[iTab].jc) {
5375 				case 1:
5376 					propBuffer += "C,";
5377 					break;
5378 				case 2:
5379 					propBuffer += "R,";
5380 					break;
5381 				case 3:
5382 					propBuffer += "D,";
5383 					break;
5384 				case 4:
5385 					propBuffer += "B,";
5386 					break;
5387 				case 0:
5388 				default:
5389 					propBuffer += "L,";
5390 					break;
5391 			}
5392 		}
5393 		// replace final comma with a semi-colon
5394 		propBuffer[propBuffer.size()-1] = ';';
5395 		s += propBuffer;
5396 	}
5397 
5398 	// foreground color
5399 	U8 ico = apap->shd.icoFore;
5400 	if (ico) {
5401 		UT_String_sprintf(propBuffer, "color:%s;",
5402 						  sMapIcoToColor(ico, true).c_str());
5403 		s += propBuffer;
5404 	}
5405 
5406 	// background color
5407 	ico = apap->shd.icoBack;
5408 	if (ico) {
5409 		UT_String_sprintf(propBuffer, "background-color:%s;",
5410 						  sMapIcoToColor(ico, false).c_str());
5411 		s += propBuffer;
5412 	}
5413 
5414 	// remove the trailing semi-colon
5415 	s [s.size()-1] = 0;
5416 
5417 }
5418 
5419 
5420 /*! imports a stylesheet from our document */
5421 
5422 #define PT_MAX_ATTRIBUTES 8
_handleStyleSheet(const wvParseStruct * ps)5423 void IE_Imp_MsWord_97::_handleStyleSheet(const wvParseStruct *ps)
5424 {
5425 	UT_uint32 iCount = ps->stsh.Stshi.cstd;
5426 //	UT_uint16 iBase  = ps->stsh.Stshi.cbSTDBaseInFile;
5427 
5428 	const gchar * attribs[PT_MAX_ATTRIBUTES*2 + 1];
5429 	UT_uint32 iOffset = 0;
5430 
5431 	const STD * pSTD = ps->stsh.std;
5432 	const STD * pSTDBase = pSTD;
5433 	UT_String props;
5434 	char * s = NULL;
5435 	char * b = NULL;
5436 	char * f = NULL;
5437 
5438 	UT_return_if_fail(pSTD != NULL);
5439 
5440 	for(UT_uint32 i = 0; i < iCount; i++, pSTD++)
5441 	{
5442 		iOffset = 0;
5443 
5444 		if(!pSTD->xstzName)
5445 		{
5446 			continue;
5447 		}
5448 
5449 		if(pSTD->cupx <= 1)
5450 		{
5451 			continue;
5452 		}
5453 
5454 		//UT_DEBUGMSG(("Style name: [%s], id: %d\n", pSTD->xstzName, pSTD->sti));
5455 
5456 		attribs[iOffset++] = PT_NAME_ATTRIBUTE_NAME;
5457 
5458 		// make sure we use standard names for standard styles
5459 		const gchar * pName = s_translateStyleId(pSTD->sti);
5460 
5461 		if(pName)
5462 		{
5463 			attribs[iOffset++] = pName;
5464 		}
5465 		else
5466 		{
5467 			s = s_convert_to_utf8(ps, pSTD->xstzName);
5468 			attribs[iOffset++] = s;
5469 		}
5470 
5471 		UT_DEBUGMSG(("Style name: [%s], id: %d\n", attribs[iOffset-1], pSTD->sti));
5472 
5473 
5474 		attribs[iOffset++] = PT_TYPE_ATTRIBUTE_NAME;
5475 		if(pSTD->sgc == sgcChp)
5476 		{
5477 			attribs[iOffset++] = "C";
5478 		}
5479 		else
5480 		{
5481 			attribs[iOffset++] = "P";
5482 
5483 			// also handle the followed-by, since that only applies to
5484 			// paragraph style
5485 			if(pSTD->istdNext != istdNil && pSTD->istdNext<iCount)
5486 			{
5487 				attribs[iOffset++] = PT_FOLLOWEDBY_ATTRIBUTE_NAME;
5488 				const char * t = s_translateStyleId(pSTD->istdNext);
5489 				if(!t)
5490 				{
5491 					t = f = s_convert_to_utf8(ps,(pSTDBase + pSTD->istdNext)->xstzName);
5492 				}
5493 				attribs[iOffset++] = t;
5494 			}
5495 		}
5496 
5497 		if(pSTD->istdBase != istdNil)
5498 		{
5499 			attribs[iOffset++] = PT_BASEDON_ATTRIBUTE_NAME;
5500 			const char * t = s_translateStyleId(pSTD->istdBase);
5501 			if(!t)
5502 				t = b = s_convert_to_utf8(ps,(pSTDBase + pSTD->istdBase)->xstzName);
5503 			attribs[iOffset++] = t;
5504 		}
5505 
5506 		// now we want to generate props
5507 		props.clear();
5508 
5509 		wvParseStruct * PS = const_cast<wvParseStruct *>(ps);
5510 
5511 		CHP achp;
5512 		wvInitCHPFromIstd(&achp, (U16)i, &(PS->stsh));
5513 		_generateCharProps(props,&achp,PS);
5514 
5515 		if(props.size())
5516 		{
5517 			props += ";";
5518 		}
5519 
5520 		PAP apap;
5521 		wvInitPAPFromIstd (&apap, (U16)i, &(PS->stsh));
5522 		_generateParaProps(props,&apap,PS);
5523 
5524 		// remove trailing semicolon
5525 		if(props[props.size()-1] == ';')
5526 		{
5527 			props[props.size()-1] = 0;
5528 		}
5529 
5530 		xxx_UT_DEBUGMSG(("Style props: %s\n", props.c_str()));
5531 
5532 		if(props.size())
5533 		{
5534 			attribs[iOffset++] = PT_PROPS_ATTRIBUTE_NAME;
5535 			attribs[iOffset++] = props.c_str();
5536 		}
5537 
5538 		attribs[iOffset] = NULL;
5539 
5540 		PD_Style * pStyle = NULL;
5541 		if(getDoc()->getStyle(pSTD->xstzName, &pStyle))
5542 		{
5543 			xxx_UT_DEBUGMSG(("Redefining style %s\n", pSTD->xstzName));
5544 			pStyle->addAttributes(attribs);
5545 			pStyle->getBasedOn();
5546 			pStyle->getFollowedBy();
5547 		}
5548 		else
5549 		{
5550 			getDoc()->appendStyle(attribs);
5551 		}
5552 
5553 		FREEP(s);
5554 		FREEP(b);
5555 		FREEP(f);
5556 	}
5557 }
5558 
_handleBookmarks(const wvParseStruct * ps)5559 int IE_Imp_MsWord_97::_handleBookmarks(const wvParseStruct *ps)
5560 {
5561 	UT_uint32 i,j;
5562 
5563 	if(m_pBookmarks)
5564 	{
5565 		for(i = 0; i < m_iBookmarksCount; i++)
5566 		{
5567 			if(m_pBookmarks[i].name && m_pBookmarks[i].start)
5568 			{
5569 				delete []m_pBookmarks[i].name;
5570 				m_pBookmarks[i].name = NULL;
5571 			}
5572 		}
5573 		delete [] m_pBookmarks;
5574 	}
5575 	BKF *bkf;
5576 	BKL *bkl;
5577 	U32 *posf, *posl, nobkf, nobkl;
5578 
5579 	if(!wvGetBKF_PLCF (&bkf, &posf, &nobkf, ps->fib.fcPlcfbkf, ps->fib.lcbPlcfbkf, ps->tablefd))
5580 	{
5581 		m_iBookmarksCount = nobkf;
5582 	}
5583 	else
5584 		m_iBookmarksCount = 0;
5585 
5586 	if(!wvGetBKL_PLCF (&bkl, &posl, &nobkl, ps->fib.fcPlcfbkl, ps->fib.lcbPlcfbkl, ps->fib.fcPlcfbkf, ps->fib.lcbPlcfbkf, ps->tablefd))
5587 	{
5588 		m_iBookmarksCount += nobkl;
5589 	}
5590 	else
5591 	{
5592 		if(m_iBookmarksCount > 0)
5593 		{
5594 			//g_free the bkf and posf
5595 			wvFree(bkf);
5596 			wvFree(posf);
5597 			m_iBookmarksCount = 0;
5598 		}
5599 	}
5600 	UT_return_val_if_fail(nobkl == nobkf, 0);
5601 	if(m_iBookmarksCount > 0)
5602 	{
5603 		try
5604 		{
5605 			m_pBookmarks = new bookmark[m_iBookmarksCount];
5606 		}
5607 		catch(...)
5608 		{
5609 			m_pBookmarks = NULL;
5610 		}
5611 
5612 		UT_return_val_if_fail(m_pBookmarks, 0);
5613 		for(i = 0; i < nobkf; i++)
5614 		{
5615 			m_pBookmarks[i].name = _getBookmarkName(ps, i);
5616 			m_pBookmarks[i].pos  = posf[i];
5617 			m_pBookmarks[i].start = true;
5618 		}
5619 
5620 		for(j = i; j < nobkl + i; j++)
5621 		{
5622 			// since the name is shared with the start of the bookmark,
5623 			// we reuse it
5624 			UT_sint32 iBkf = static_cast<UT_sint32>(bkl[j-i].ibkf) < 0 ? nobkl + static_cast<UT_sint32>(bkl[j-i].ibkf) : bkl[j-i].ibkf;
5625 			m_pBookmarks[j].name = m_pBookmarks[iBkf].name;
5626 			m_pBookmarks[j].pos  = posl[j - i];
5627 			m_pBookmarks[j].start = false;
5628 		}
5629 		// g_free bkf, bkl, posf, posl
5630 		wvFree(bkf);
5631 		wvFree(bkl);
5632 		wvFree(posf);
5633 		wvFree(posl);
5634 
5635 		//now sort the bookmarks by position
5636 		qsort(static_cast<void*>(m_pBookmarks),
5637 			  m_iBookmarksCount, sizeof(bookmark),
5638 			  s_cmp_bookmarks_qsort);
5639 
5640 #ifdef DEBUG
5641 		for(UT_uint32 k = 0; k < m_iBookmarksCount; k++)
5642 		{
5643 			UT_DEBUGMSG(("Bookmark: name [%s], pos %d, start %d\n",
5644 						 m_pBookmarks[k].name,m_pBookmarks[k].pos,m_pBookmarks[k].start));
5645 		}
5646 
5647 #endif
5648 	}
5649 	return 0;
5650 }
5651 
_handleNotes(const wvParseStruct * ps)5652 void IE_Imp_MsWord_97::_handleNotes(const wvParseStruct *ps)
5653 {
5654 	UT_uint32 i;
5655 
5656 	DELETEPV(m_pFootnotes);
5657 	DELETEPV(m_pEndnotes);
5658 
5659 	m_iFootnotesCount = 0;
5660 	m_iEndnotesCount = 0;
5661 	UT_uint32 *pPLCF_ref = NULL;
5662 	UT_uint32 *pPLCF_txt = NULL;
5663 
5664 	bool bNoteError = false;
5665 
5666 	if(ps->fib.lcbPlcffndTxt)
5667 	{
5668 		/* the docs say -1, but that is an error */
5669 		m_iFootnotesCount = ps->fib.lcbPlcffndTxt/4 - 2;
5670 		try
5671 		{
5672 			m_pFootnotes = new footnote[m_iFootnotesCount];
5673 		}
5674 		catch(...)
5675 		{
5676 			m_pFootnotes = NULL;
5677 		}
5678 
5679 		UT_return_if_fail(m_pFootnotes);
5680 
5681 		// this is really quite straight forward; we retrieve the PLCF
5682 		// chunks that describe the references/text of the footnotes, and
5683 		// then use those to init our footnote stucts
5684 		// for n footnotes the reference PLCF is a sequnce of (n+1) doc
5685 		// positions (UT_uint32) followed by n type flags (UT_uint16)
5686 		// the text PLCF is a sequence of n+2 positions (UT_uint32) of the footnote
5687 		// text in its data stream
5688 		if(wvGetPLCF((void **) &pPLCF_ref, ps->fib.fcPlcffndRef, ps->fib.lcbPlcffndRef, ps->tablefd))
5689 		{
5690 			bNoteError = true;
5691 		}
5692 
5693 		if(!bNoteError &&
5694 		   wvGetPLCF((void **) &pPLCF_txt, ps->fib.fcPlcffndTxt, ps->fib.lcbPlcffndTxt, ps->tablefd))
5695 		{
5696 			wvFree(pPLCF_ref);
5697 			bNoteError = true;
5698 		}
5699 
5700 		if(!bNoteError)
5701 		{
5702 			UT_return_if_fail(pPLCF_ref && pPLCF_txt);
5703 			for(i = 0; i < m_iFootnotesCount; i++)
5704 			{
5705 				m_pFootnotes[i].ref_pos = pPLCF_ref[i];
5706 				m_pFootnotes[i].txt_pos = pPLCF_txt[i] + m_iFootnotesStart;
5707 				m_pFootnotes[i].txt_len = pPLCF_txt[i+1] - pPLCF_txt[i];
5708 				// idx is an index of int16.
5709 				size_t idx = 2 * (m_iFootnotesCount + 1) + i;
5710 				// If you hit this assert, congratulation, you found a buggy file
5711 				//
5712 				UT_ASSERT(idx * 2 < ps->fib.lcbPlcffndRef);
5713 				if (idx * 2 >= ps->fib.lcbPlcffndRef) {
5714 					bNoteError = true;
5715 					// We are done with the footnotes here.
5716 					// This is as graceful as it can be.
5717 					m_iFootnotesCount--;
5718 					break;
5719 				}
5720 				UT_uint32 iType = ((UT_uint16*)pPLCF_ref)[idx];
5721 				m_pFootnotes[i].type = iType;
5722 				m_pFootnotes[i].pid = getDoc()->getUID(UT_UniqueId::Footnote);
5723 				UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleNotes: fnote %d, rpos %d, tpos %d, type %d\n",
5724 							 i, m_pFootnotes[i].ref_pos, m_pFootnotes[i].txt_pos, iType));
5725 			}
5726 
5727 			wvFree(pPLCF_ref);
5728 			wvFree(pPLCF_txt);
5729 		}
5730 
5731 		// next, deal footnote formatting matters
5732 		const gchar * props[] = {"document-footnote-type",            NULL,
5733 									"document-footnote-initial",         NULL,
5734 									"document-footnote-restart-section", NULL,
5735 									"document-footnote-restart-page",    NULL,
5736 		                            NULL};
5737 
5738 		switch(ps->dop.rncFtn)
5739 		{
5740 			case 0:
5741 				props[5] = "0";
5742 				props[7] = "0";
5743 				break;
5744 			case 1:
5745 				props[5] = "1";
5746 				props[7] = "0";
5747 				break;
5748 			case 2:
5749 				props[5] = "0";
5750 				props[7] = "1";
5751 				break;
5752 			default:
5753 				UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5754 		}
5755 
5756 		UT_String number;
5757 		UT_String_sprintf(number, "%d", ps->dop.nFtn);
5758 		props[3] = number.c_str();
5759 
5760 		switch(ps->dop.nfcFtnRef)
5761 		{
5762 			case 0:
5763 				props[1] = "numeric";
5764 				break;
5765 			case 1:
5766 				props[1] = "upper-roman";
5767 				break;
5768 			case 2:
5769 				props[1] = "lower-roman";
5770 				break;
5771 			case 3:
5772 				props[1] = "upper";
5773 				break;
5774 			case 4:
5775 				props[1] = "lower";
5776 				break;
5777 			default:
5778 				UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5779 				props[1] = "";
5780 				break;
5781 		}
5782 
5783 		getDoc()->setProperties(&props[0]);
5784 	}
5785 
5786 	if(ps->fib.lcbPlcfendTxt)
5787 	{
5788 		m_iEndnotesCount  = ps->fib.lcbPlcfendTxt/4 - 2;
5789 		try
5790 		{
5791 			m_pEndnotes  = new footnote[m_iEndnotesCount];
5792 		}
5793 		catch(...)
5794 		{
5795 			m_pEndnotes = NULL;
5796 		}
5797 
5798 		UT_return_if_fail(m_pEndnotes);
5799 
5800 		bNoteError = false;
5801 		if(wvGetPLCF((void **) &pPLCF_ref, ps->fib.fcPlcfendRef, ps->fib.lcbPlcfendRef, ps->tablefd))
5802 		{
5803 			bNoteError = true;
5804 		}
5805 
5806 		if(!bNoteError &&
5807 		   wvGetPLCF((void **) &pPLCF_txt, ps->fib.fcPlcfendTxt, ps->fib.lcbPlcfendTxt, ps->tablefd))
5808 		{
5809 			wvFree(pPLCF_ref);
5810 			bNoteError = true;
5811 		}
5812 
5813 		if(!bNoteError)
5814 		{
5815 			UT_return_if_fail(pPLCF_ref && pPLCF_txt);
5816 			for(i = 0; i < m_iEndnotesCount; i++)
5817 			{
5818 				m_pEndnotes[i].ref_pos = pPLCF_ref[i];
5819 				m_pEndnotes[i].txt_pos = pPLCF_txt[i] + m_iEndnotesStart;
5820 				m_pEndnotes[i].txt_len = pPLCF_txt[i+1] - pPLCF_txt[i];
5821 				// idx is an index of int16.
5822 				size_t idx = 2 * (m_iEndnotesCount + 1) + i;
5823 				// If you hit this assert, congratulation, you found a buggy file
5824 				//
5825 				UT_ASSERT(idx * 2 < ps->fib.lcbPlcfendRef);
5826 				if (idx * 2 >= ps->fib.lcbPlcfendRef) {
5827 					bNoteError = true;
5828 					// We are done with the endnotes here.
5829 					// This is as graceful as it can be.
5830 					m_iEndnotesCount--;
5831 					break;
5832 				}
5833 				UT_uint32 iType = ((UT_uint16*)pPLCF_ref)[idx];
5834 				m_pEndnotes[i].type = iType;
5835 				m_pEndnotes[i].pid = getDoc()->getUID(UT_UniqueId::Endnote);
5836 				UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleNotes: enote %d, rpos %d, tpos %d, type %d\n",
5837 							 i, m_pEndnotes[i].ref_pos, m_pEndnotes[i].txt_pos, iType));
5838 			}
5839 
5840 			wvFree(pPLCF_ref);
5841 			wvFree(pPLCF_txt);
5842 		}
5843 		// next, deal endnote formatting matters
5844 		const gchar * props[] = {"document-endnote-type",            NULL,
5845 									"document-endnote-initial",         NULL,
5846 									"document-endnote-restart-section", NULL,
5847 									"document-endnote-restart-page",    NULL,
5848 									"document-endnote-place-endsection",NULL,
5849 									"document-endnote-place-enddoc",    NULL,
5850 		                            NULL};
5851 
5852 		switch(ps->dop.rncEdn)
5853 		{
5854 			case 0:
5855 				props[5] = "0";
5856 				props[7] = "0";
5857 				break;
5858 			case 1:
5859 				props[5] = "1";
5860 				props[7] = "0";
5861 				break;
5862 			case 2:
5863 				props[5] = "0";
5864 				props[7] = "1";
5865 				break;
5866 
5867 			default:
5868 				UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5869 		}
5870 
5871 		UT_String number;
5872 		UT_String_sprintf(number, "%d", ps->dop.nEdn);
5873 		props[3] = number.c_str();
5874 
5875 		switch(ps->dop.nfcEdnRef)
5876 		{
5877 			case 0:
5878 				props[1] = "numeric";
5879 				break;
5880 			case 1:
5881 				props[1] = "upper-roman";
5882 				break;
5883 			case 2:
5884 				props[1] = "lower-roman";
5885 				break;
5886 			case 3:
5887 				props[1] = "upper";
5888 				break;
5889 			case 4:
5890 				props[1] = "lower";
5891 				break;
5892 
5893 			default:
5894 				UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5895 
5896 		}
5897 
5898 		switch(ps->dop.epc)
5899 		{
5900 			case 0:
5901 				props[9]  = "1";
5902 				props[11] = "0";
5903 				break;
5904 			case 3:
5905 				props[9]  = "0";
5906 				props[11] = "1";
5907 				break;
5908 			default:
5909 				UT_ASSERT_HARMLESS(UT_NOT_REACHED);
5910 
5911 		}
5912 
5913 		getDoc()->setProperties(&props[0]);
5914 	}
5915 }
5916 
_handleTextBoxes(const wvParseStruct * ps)5917 void IE_Imp_MsWord_97::_handleTextBoxes(const wvParseStruct *ps)
5918 {
5919 	UT_uint32 *pPLCF_dgg = NULL;
5920 	UT_uint32 *pPLCF_txt = NULL;
5921 
5922 	DELETEPV(m_pTextboxes);
5923 
5924 	bool bTextboxError = false;
5925 	m_iTextboxCount = 0;
5926 	UT_sint32 i = 0;
5927 	if(ps->fib.ccpTxbx > 0)
5928 	{
5929 		m_iTextboxCount = ps->nooffspa;
5930 		m_pTextboxes = new textbox [m_iTextboxCount];
5931 
5932 
5933 		// this is really quite straight forward; we retrieve the PLCF
5934 		// chunks that describe the references/text of the textboxes, and
5935 		// then use those to init our textbox stucts
5936 		// for n textboxes the reference PLCF is a sequnce of (n+1) doc
5937 		// positions (UT_uint32) followed by n type flags (UT_uint16)
5938 		// the text PLCF is a sequence of n+2 positions (UT_uint32) of the
5939         // textbox
5940 		// text in its data stream
5941 
5942 // This appears to be identical to how footnotes/endnotes are handled.
5943 
5944 		if(wvGetPLCF((void **) &pPLCF_dgg, ps->fib.fcDggInfo, ps->fib.lcbDggInfo, ps->tablefd))
5945 		{
5946 			bTextboxError = true;
5947 		}
5948 
5949 		UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleTextBoxes: ps->fib.fcDggInfo %d ps->fib.lcbDggInfo %d \n", ps->fib.fcDggInfo,ps->fib.lcbDggInfo));
5950 
5951 		UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleTextBoxes: Text size %d bytes\n", ps->fib.ccpTxbx));
5952 
5953 		UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleTextBoxes: fib.lid %d \n", ps->fib.lid));
5954 		if(!bTextboxError &&
5955 		   wvGetPLCF((void **) &pPLCF_txt, ps->fib.fcPlcftxbxTxt, ps->fib.lcbPlcftxbxTxt, ps->tablefd))
5956 		{
5957 			bTextboxError = true;
5958 		}
5959 		if(!bTextboxError)
5960 		{
5961 			UT_return_if_fail(pPLCF_dgg && pPLCF_txt);
5962 			for(i = 0; i < m_iTextboxCount; i++)
5963 			{
5964 				m_pTextboxes[i].ref_pos = pPLCF_dgg[i];
5965 				m_pTextboxes[i].txt_pos = pPLCF_txt[i] + m_iTextboxesStart;
5966 				m_pTextboxes[i].txt_len = pPLCF_txt[i+1] - pPLCF_txt[i];
5967 				UT_DEBUGMSG(("IE_Imp_MsWord_97::_handleTextbox: Tbox %d, rpos %d, tpos %d len %d \n",
5968 							 i, m_pTextboxes[i].ref_pos, m_pTextboxes[i].txt_pos,m_pTextboxes[i].txt_len));
5969 			}
5970 
5971 			wvFree(pPLCF_dgg);
5972 			wvFree(pPLCF_txt);
5973 
5974 		}
5975 	}
5976 
5977 }
5978 
5979 /*!
5980    Determines whether footnote is to be inserted at present document
5981    position, and if so takes care of inserting the reference marker,
5982    note section and anchor marker.
5983 
5984    returns true if a note was successfully inserted, false otherwise;
5985    if the return value is true, the caller should ignore the present character
5986 
5987    we will take advantage of the notes being in document order, so we
5988    can just remember the last note we inserted, rather than having to
5989    search through the list
5990 
5991 */
_insertNoteIfAppropriate(UT_uint32 iDocPosition,UT_UCS4Char c)5992 bool IE_Imp_MsWord_97::_insertNoteIfAppropriate(UT_uint32 iDocPosition, UT_UCS4Char c)
5993 {
5994 	if(m_bInFNotes || m_bInENotes)
5995 		return false;
5996 
5997 	bool res = false;
5998 	//now search for position iDocPosition in our footnnote list;
5999 	if(!m_pFootnotes || m_iFootnotesCount == 0 || m_iNextFNote >= m_iFootnotesCount)
6000 	{
6001 		goto endnotes;
6002 	}
6003 
6004 	if(m_pFootnotes[m_iNextFNote].ref_pos == iDocPosition)
6005 	{
6006 		res |= _insertFootnote(m_pFootnotes + m_iNextFNote++,c);
6007 	}
6008 
6009  endnotes:
6010 	if(!m_pEndnotes || m_iEndnotesCount == 0 || m_iNextENote >= m_iEndnotesCount)
6011 	{
6012 		goto finish;
6013 	}
6014 
6015 	if(m_pEndnotes[m_iNextENote].ref_pos == iDocPosition)
6016 	{
6017 		res |= _insertEndnote(m_pEndnotes + m_iNextENote++,c);
6018 	}
6019 
6020 
6021  finish:
6022 	return res;
6023 }
6024 
6025 /* returns true on successful insertion of the reference marker */
_insertFootnote(const footnote * f,UT_UCS4Char c)6026 bool IE_Imp_MsWord_97::_insertFootnote(const footnote * f, UT_UCS4Char c)
6027 {
6028 	UT_return_val_if_fail(f, true);
6029 	xxx_UT_DEBUGMSG(("IE_Imp_MsWord_97::_insertFootnote: pos: %d, pid %d\n", f->ref_pos, f->pid));
6030 
6031 	this->_flush();
6032 
6033 	bool res = true;
6034 	const gchar * attribsS[3] ={"footnote-id",NULL,NULL};
6035 	const gchar* attribsR[9] = {"type", "footnote_ref", "footnote-id",
6036 								   NULL, NULL, NULL, NULL, NULL, NULL};
6037 	UT_uint32 iOffR = 3;
6038 
6039 	UT_String footpid;
6040 	UT_String_sprintf(footpid,"%i",f->pid);
6041 	attribsS[1] = footpid.c_str();
6042 
6043 	// for attribsR we need to set props and style in order to
6044 	// preserve any formating set by a previous call to _beginChar()
6045 	attribsR[iOffR++] = footpid.c_str();
6046 	attribsR[iOffR++] = "props";
6047 	attribsR[iOffR++] = m_charProps.c_str();
6048 	if(!m_charStyle.empty())
6049 	{
6050 		attribsR[iOffR++] = "style";
6051 		attribsR[iOffR++] = m_charStyle.c_str();
6052 	}
6053 
6054 	UT_return_val_if_fail( iOffR <= sizeof(attribsR)/sizeof(gchar*), false );
6055 
6056 	if(f->type)
6057 	{
6058 		// auto-generated reference -- insert a field
6059 		res &= _appendObject(PTO_Field, attribsR);
6060 	}
6061 	else
6062 	{
6063 		// manually-inserted marker, we need to issue the character
6064 		// TODO -- in word the marker can consist of several
6065 		// characters, but I have no idea how Word knows how many;
6066 		// we at least need to reset the character formatting again
6067 		// after we have inserted the footnote section
6068 		res &= _appendSpan(&c,1);
6069 	}
6070 
6071 	_appendStrux(PTX_SectionFootnote,attribsS);
6072 	_appendStrux(PTX_EndFootnote,NULL);
6073 
6074 	if(!f->type)
6075 	{
6076 		// set the formatting to whatever it was, in case the footnote
6077 		// marker is longer than one character
6078 		_appendFmt(&attribsR[0]);
6079 	}
6080 
6081 	return res;
6082 }
6083 
_insertEndnote(const footnote * f,UT_UCS4Char c)6084 bool IE_Imp_MsWord_97::_insertEndnote(const footnote * f, UT_UCS4Char c)
6085 {
6086 	UT_return_val_if_fail(f, true);
6087 	xxx_UT_DEBUGMSG(("IE_Imp_MsWord_97::_insertEndnote: pos: %d, pid %d\n", f->ref_pos, f->pid));
6088 
6089 	this->_flush();
6090 
6091 	bool res = true;
6092 	const gchar * attribsS[3] ={"endnote-id",NULL,NULL};
6093 	const gchar* attribsR[9] = {"type", "endnote_ref", "endnote-id",
6094 								   NULL, NULL, NULL, NULL, NULL, NULL};
6095 	UT_uint32 iOffR = 3;
6096 
6097 	UT_String footpid;
6098 	UT_String_sprintf(footpid,"%i",f->pid);
6099 	attribsS[1] = footpid.c_str();
6100 
6101 	// for attribsR we need to set props and style in order to
6102 	// preserve any formating set by a previous call to _beginChar()
6103 	attribsR[iOffR++] = footpid.c_str();
6104 	attribsR[iOffR++] = "props";
6105 	attribsR[iOffR++] = m_charProps.c_str();
6106 	attribsR[iOffR++] = "style";
6107 	attribsR[iOffR++] = m_charStyle.c_str();
6108 
6109 	UT_return_val_if_fail(iOffR <= sizeof(attribsR)/sizeof(gchar*), false);
6110 
6111 	if(f->type)
6112 	{
6113 		// auto-generated reference -- insert a field
6114 		res &= _appendObject(PTO_Field, attribsR);
6115 	}
6116 	else
6117 	{
6118 		// manually-inserted marker, we need to issue the character
6119 		// TODO -- in word the marker can consist of several
6120 		// characters, but I have no idea how Word knows how many;
6121 		// we at least need to reset the character formatting again
6122 		// after we have inserted the footnote section
6123 		res &= _appendSpan(&c,1);
6124 	}
6125 
6126 	_appendStrux(PTX_SectionEndnote,attribsS);
6127 	_appendStrux(PTX_EndEndnote,NULL);
6128 
6129 	if(!f->type)
6130 	{
6131 		// set the formatting to whatever it was, in case the footnote
6132 		// marker is longer than one character
6133 		_appendFmt(&attribsR[0]);
6134 	}
6135 
6136 	return res;
6137 }
6138 
6139 
6140 /*!
6141     This function makes sure that the insert is happening at the
6142     correct place if we are in a segment which belongs to one of the
6143     set of notes (foonotes & endnote, in future also annotations).
6144 
6145     \parameter UT_uint32 iDocPosition: character position in the Word
6146                                        document stream
6147     \return returns false if the present character is to be skipped,
6148             true otherwise
6149 */
_handleNotesText(UT_uint32 iDocPosition)6150 bool IE_Imp_MsWord_97::_handleNotesText(UT_uint32 iDocPosition)
6151 {
6152 	if(iDocPosition >= m_iFootnotesStart && iDocPosition < m_iFootnotesEnd)
6153 	{
6154 		// upon entry into the footnote-land, we will need to search for
6155 		// the first footnote section in our document, note that we are
6156 		// in a footnote section, note at what doc position the current
6157 		// footnote will end, and then let things run until we reach
6158 		// the end of the note; then we need to search for the next
6159 		// doc section, etc.
6160 
6161 		// if the footnote marker is auto-generated, we need to remove
6162 		// the special character from the stream (happens
6163 		// automatically)
6164 
6165 		// when in a footnote section, all the functions that normally
6166 		// use append methods will need to use insert methods instead
6167 
6168 		if(!m_bInFNotes)
6169 		{
6170 			xxx_UT_DEBUGMSG(("In footnote territory: pos %d\n", iDocPosition));
6171 			m_bInFNotes = true;
6172 			m_bInHeaders = false;
6173 
6174 			// we will reuse the m_iNextFNote variable, noting it
6175 			// refers to the CURRENT footnote
6176 			m_iNextFNote = 0;
6177 			_findNextFNoteSection();
6178 			_endSect(NULL,0,NULL,0);
6179 			m_bInSect = true;
6180 		}
6181 
6182 		// the current footnote will end at pos
6183 		// f.txt_pos + f.txt_len,
6184 		if( m_iNextFNote < m_iFootnotesCount && iDocPosition == m_pFootnotes[m_iNextFNote].txt_pos +
6185 		                                                        m_pFootnotes[m_iNextFNote].txt_len)
6186 		{
6187 			m_iNextFNote++;
6188 
6189 			// after the last footnote there is an extra paragraph
6190 			// marker that is still a part of the footnote section --
6191 			// we do not want that marker imported
6192 			if(m_iNextFNote < m_iFootnotesCount)
6193 				_findNextFNoteSection();
6194 			else
6195 			{
6196 				UT_DEBUGMSG(("End of footnotes marker at pos %d\n", iDocPosition));
6197 				return false;
6198 			}
6199 		}
6200 
6201 		// if this is the first character in a footnote, insert the reference
6202 		if(iDocPosition == m_pFootnotes[m_iNextFNote].txt_pos)
6203 		{
6204 			const gchar* attribsA[] = {"type", "footnote_anchor",
6205 										   "footnote-id", NULL,
6206 										   "props",       NULL,
6207 										   "style",       NULL,
6208 										   NULL};
6209 
6210 			const gchar * attribsB[] = {"props", NULL,
6211 											"style", NULL,
6212 											NULL};
6213 
6214 			UT_String footpid;
6215 			UT_String_sprintf(footpid,"%i",m_pFootnotes[m_iNextFNote].pid);
6216 			attribsA[3] = footpid.c_str();
6217 			attribsA[5] = m_charProps.c_str();
6218 			attribsA[7] = m_charStyle.c_str();
6219 
6220 			attribsB[1] = m_paraProps.c_str();
6221 			attribsB[3] = m_paraStyle.c_str();
6222 
6223 			_appendStrux(PTX_Block,attribsB);
6224 			m_bInPara = true;
6225 
6226 			if(m_pFootnotes[m_iNextFNote].type)
6227 			{
6228 				_appendObject(PTO_Field, attribsA);
6229 				return false;
6230 			}
6231 			return true;
6232 		}
6233 
6234 		// do not return !!!
6235 		xxx_UT_DEBUGMSG(("In footnote %d, on pos %d\n", m_iNextFNote, iDocPosition));
6236 	}
6237 	else if(m_bInFNotes)
6238 	{
6239 		m_bInFNotes = false;
6240 		xxx_UT_DEBUGMSG(("Leaving footnote territory\n"));
6241 		// move to the end of the do end of the document ...
6242 
6243 		// do not return !!!
6244 	}
6245 
6246 	if(iDocPosition >= m_iEndnotesStart && iDocPosition < m_iEndnotesEnd)
6247 	{
6248 		if(!m_bInENotes)
6249 		{
6250 			xxx_UT_DEBUGMSG(("In endnote territory: pos %d\n", iDocPosition));
6251 			m_bInENotes = true;
6252 			m_bInHeaders = false;
6253 			m_iNextENote = 0;
6254 			_findNextENoteSection();
6255 			_endSect(NULL,0,NULL,0);
6256 			m_bInSect = true;
6257 		}
6258 
6259 		if( m_iNextENote < m_iEndnotesCount && iDocPosition == m_pEndnotes[m_iNextENote].txt_pos +
6260 		                   m_pEndnotes[m_iNextENote].txt_len)
6261 		{
6262 			m_iNextENote++;
6263 
6264 			// after the last endnote there is an extra paragraph
6265 			// marker that is still a part of the endnote section --
6266 			// we do not want that marker imported
6267 			if(m_iNextENote < m_iEndnotesCount)
6268 				_findNextENoteSection();
6269 			else
6270 			{
6271 				xxx_UT_DEBUGMSG(("End of endnotes marker at pos %d\n", iDocPosition));
6272 				return false;
6273 			}
6274 		}
6275 
6276 		// if this is the first character in an endnote, insert the anchor
6277 		if( m_iNextENote < m_iEndnotesCount && iDocPosition == m_pEndnotes[m_iNextENote].txt_pos)
6278 		{
6279 			const gchar * attribsA[] = {"type", "endnote_anchor",
6280 										   "endnote-id", NULL,
6281 										   "props",       NULL,
6282 										   "style",       NULL,
6283 										   NULL};
6284 
6285 			const gchar * attribsB[] = {"props", NULL,
6286 										   "style", NULL,
6287 										   NULL};
6288 
6289 			UT_String footpid;
6290 			UT_String_sprintf(footpid,"%i",m_pEndnotes[m_iNextENote].pid);
6291 			attribsA[3] = footpid.c_str();
6292 			attribsA[5] = m_charProps.c_str();
6293 			attribsA[7] = m_charStyle.c_str();
6294 
6295 			attribsB[1] = m_paraProps.c_str();
6296 			attribsB[3] = m_paraStyle.c_str();
6297 
6298 			_appendStrux(PTX_Block,attribsB);
6299 			m_bInPara = true;
6300 
6301 			if(m_pEndnotes[m_iNextENote].type)
6302 			{
6303 				_appendObject(PTO_Field, attribsA);
6304 				return false;
6305 			}
6306 			return true;
6307 		}
6308 
6309 		xxx_UT_DEBUGMSG(("In endnote %d, on pos %d\n", m_iNextENote, iDocPosition));
6310 		// do not return !!!
6311 	}
6312 	else if(m_bInENotes)
6313 	{
6314 		m_bInENotes = false;
6315 		xxx_UT_DEBUGMSG(("Leaving endnote territory\n"));
6316 		// move to the end of the document ...
6317 
6318 		// do not return !!!
6319 	}
6320 
6321 	// we only return here, so that the code above could be extended
6322 	// for handly annotations by simply copy/paste
6323 	return true;
6324 }
6325 
6326 
6327 
6328 /*!
6329     This function makes sure that the insert is happening at the
6330     correct place if we are in a segment which belongs to one of the
6331     set of Textboxes
6332 
6333     \parameter UT_uint32 iDocPosition: character position in the Word
6334                                        document stream
6335     \return returns false if the present character is to be skipped,
6336             true otherwise
6337 */
_handleTextboxesText(UT_uint32 iDocPosition)6338 bool IE_Imp_MsWord_97::_handleTextboxesText(UT_uint32 iDocPosition)
6339 {
6340 	if(iDocPosition >= m_iTextboxesStart && iDocPosition < m_iTextboxesEnd)
6341 	{
6342 		// upon entry into the Textland-land, we will need to search for
6343 		// the first Textbox section in our document, note that we are
6344 		// in a Textbox section, note at what doc position the current
6345 		// textbox will end, and then let things run until we reach
6346 		// the end of the textbox; then we need to search for the next
6347 		// doc section, etc.
6348 
6349 
6350 		// when in a Text box section, all the functions that normally
6351 		// use append methods will need to use insert methods instead
6352 
6353 		if(!m_bInTextboxes)
6354 		{
6355 			UT_DEBUGMSG(("In Textbox territory: pos %d\n", iDocPosition));
6356 			m_bInTextboxes = true;
6357 			m_bInFNotes = false;
6358 			m_bInHeaders = false;
6359 
6360 			// we will reuse the m_iNextTextbox variable, noting it
6361 			// refers to the CURRENT textbox
6362 
6363 			m_iNextTextbox = 0;
6364 			_findNextTextboxSection();
6365 			_endSect(NULL,0,NULL,0);
6366 			m_bInSect = true;
6367 		}
6368 
6369 		// the current footnote will end at pos
6370 		// f.txt_pos + f.txt_len,
6371 		if( m_iNextTextbox < m_iTextboxCount && iDocPosition == m_pTextboxes[m_iNextTextbox].txt_pos +
6372 		                   m_pTextboxes[m_iNextTextbox].txt_len)
6373 		{
6374 			m_iNextTextbox++;
6375 
6376 			// after the last footnote there is an extra paragraph
6377 			// marker that is still a part of the footnote section --
6378 			// we do not want that marker imported
6379 			if(m_iNextTextbox < m_iTextboxCount)
6380 				_findNextTextboxSection();
6381 			else
6382 			{
6383 				UT_DEBUGMSG(("End of Textbox marker at pos %d\n", iDocPosition));
6384 				return false;
6385 			}
6386 		}
6387 
6388 // 		if(iDocPosition == m_pTextboxes[m_iNextTextbox].txt_pos)
6389 // 		{
6390 // 			const gchar * attribsB[] = {"props", NULL,
6391 // 											"style", NULL,
6392 // 											NULL};
6393 
6394 // 			attribsB[1] = m_paraProps.c_str();
6395 // 			attribsB[3] = m_paraStyle.c_str();
6396 
6397 // 			_appendStrux(PTX_Block,attribsB);
6398 // 			m_bInPara = true;
6399 // 			return true;
6400 // 		}
6401 
6402 		xxx_UT_DEBUGMSG(("In Textbox %d, on pos %d\n", m_iNextTextbox, iDocPosition));
6403 	}
6404 	else if(m_bInTextboxes)
6405 	{
6406 		m_bInTextboxes = false;
6407 		UT_DEBUGMSG(("Leaving Textbox territory\n"));
6408 	}
6409 
6410 	return true;
6411 }
6412 
_findNextFNoteSection()6413 bool IE_Imp_MsWord_97::_findNextFNoteSection()
6414 {
6415 	if(!m_iNextFNote)
6416 	{
6417 		// move to the start of the doc first
6418 		m_pNotesEndSection = NULL;
6419 	}
6420 
6421 	if(m_pNotesEndSection)
6422 	{
6423 		// move to the next fragment
6424 		m_pNotesEndSection = m_pNotesEndSection->getNext();
6425 		UT_return_val_if_fail(m_pNotesEndSection, false);
6426 	}
6427 
6428 
6429 	m_pNotesEndSection = getDoc()->findFragOfType(pf_Frag::PFT_Strux,
6430 												  (UT_sint32)PTX_EndFootnote,
6431 												  m_pNotesEndSection);
6432 
6433 	if(!m_pNotesEndSection)
6434 	{
6435 		xxx_UT_DEBUGMSG(("Error: footnote section not found!!!\n"));
6436 		return false;
6437 	}
6438 
6439 	return true;
6440 }
6441 
6442 
6443 ///////////////////////////////////////////////////////////////////////
6444 /*!
6445  * s_cmp_lids This function is used to sort the textboxPos lids in order
6446  * of their lid values. This matches the order of the text sort in the
6447  * in the out-of-stream table.
6448  * Used by theqsort method on UT_Vector.
6449 \param const void * P1  - pointer to a textboxPos pointer
6450 \param const void * P2  - pointer to a textboxPos pointer
6451 \returns -ve if sz1 < sz2, 0 if sz1 == sz2, +ve if sz1 > sz2
6452 */
s_cmp_lids(const void * P1,const void * P2)6453 static UT_sint32 s_cmp_lids(const void * P1, const void * P2)
6454 {
6455 	const textboxPos ** pP1 = (const textboxPos **) P1;
6456 	const textboxPos ** pP2 = (const textboxPos **) P2;
6457 	UT_uint32 lid1 = (*pP1)->lid;
6458 	UT_uint32 lid2 = (*pP2)->lid;
6459 	return static_cast<UT_sint32>(lid1) - static_cast<UT_sint32>(lid2);
6460 }
6461 
_findNextTextboxSection()6462 bool IE_Imp_MsWord_97::_findNextTextboxSection()
6463 {
6464 	if(m_iNextTextbox == 0)
6465 	{
6466 		// move to the start of the doc first
6467 		m_pTextboxEndSection = NULL;
6468 		m_vecTextboxPos.qsort(s_cmp_lids);
6469 
6470 	}
6471 	if(m_iNextTextbox >= m_vecTextboxPos.getItemCount())
6472 	{
6473 		UT_DEBUGMSG(("Error: Textbox section not found!!!\n"));
6474 		return false;
6475 	}
6476 
6477 	textboxPos * pPos = m_vecTextboxPos.getNthItem(m_iNextTextbox);
6478 	m_pTextboxEndSection = pPos->endFrame;
6479 
6480 	if(!m_pTextboxEndSection)
6481 	{
6482 		UT_DEBUGMSG(("Error: Textbox section not found!!!\n"));
6483 		return false;
6484 	}
6485 
6486 	return true;
6487 }
6488 
_findNextENoteSection()6489 bool IE_Imp_MsWord_97::_findNextENoteSection()
6490 {
6491 	if(!m_iNextENote)
6492 	{
6493 		// move to the start of the doc first
6494 		m_pNotesEndSection = NULL;
6495 	}
6496 
6497 	if(m_pNotesEndSection)
6498 	{
6499 		// move to the next fragment
6500 		m_pNotesEndSection = m_pNotesEndSection->getNext();
6501 		UT_return_val_if_fail(m_pNotesEndSection, false);
6502 	}
6503 
6504 	m_pNotesEndSection = getDoc()->findFragOfType(pf_Frag::PFT_Strux,
6505 												  (UT_sint32)PTX_EndEndnote,
6506 												  m_pNotesEndSection);
6507 
6508 	if(!m_pNotesEndSection)
6509 	{
6510 		UT_DEBUGMSG(("Error: endnote section not found!!!\n"));
6511 		return false;
6512 	}
6513 
6514 	return true;
6515 }
6516 
_shouldUseInsert() const6517 bool IE_Imp_MsWord_97::_shouldUseInsert() const
6518 {
6519 	return ((m_bInFNotes || m_bInENotes) && !m_bInHeaders && !m_bInTextboxes);
6520 }
6521 
_ensureInBlock()6522 bool IE_Imp_MsWord_97::_ensureInBlock()
6523 {
6524 
6525   bool bret = true;
6526 
6527   pf_Frag * pf = getDoc()->getLastFrag();
6528   while(pf && pf->getType() != pf_Frag::PFT_Strux)
6529     {
6530       pf = pf->getPrev();
6531     }
6532     if(pf && (pf->getType() == pf_Frag::PFT_Strux) )
6533     {
6534       pf_Frag_Strux * pfs = static_cast<pf_Frag_Strux *>(pf);
6535       if(pfs->getStruxType() != PTX_Block)
6536       {
6537         bret = _appendStrux(PTX_Block, NULL);
6538 	if (bret) m_bInPara = true;
6539       }
6540     }
6541     else if( pf == NULL)
6542     {
6543       bret = _appendStrux(PTX_Block, NULL);
6544       if (bret) m_bInPara = true;
6545     }
6546 
6547     return bret;
6548 }
6549 
_appendStrux(PTStruxType pts,const gchar ** attributes)6550 bool IE_Imp_MsWord_97::_appendStrux(PTStruxType pts, const gchar ** attributes)
6551 {
6552 	if(pts == PTX_SectionFrame)
6553 	{
6554 		UT_DEBUGMSG(("Appending Frame \n"));
6555 	}
6556 	if(pts == PTX_EndFrame)
6557 	{
6558 		UT_DEBUGMSG(("Appending EndFrame \n"));
6559 	}
6560 	if(m_bInHeaders)
6561 	{
6562 		return _appendStruxHdrFtr(pts, attributes);
6563 	}
6564 	else if(_shouldUseInsert() && m_pNotesEndSection)
6565 	{
6566 		return getDoc()->insertStruxBeforeFrag(m_pNotesEndSection, pts, attributes);
6567 	}
6568 	else if(m_bInTextboxes && m_pTextboxEndSection)
6569 	{
6570 		if(pts == PTX_Block)
6571 		{
6572 			xxx_UT_DEBUGMSG(("Insert block in Text box \n"));
6573 		}
6574 		return getDoc()->insertStruxBeforeFrag(m_pTextboxEndSection, pts, attributes);
6575 	}
6576 	if(pts == PTX_SectionFrame)
6577 	{
6578 //		Make sure any pending text is flushed
6579 		_flush();
6580 
6581 //
6582 // Text boxes need to be preceded by Blocks
6583 //
6584 		pf_Frag * pf = getDoc()->getLastFrag();
6585 		while(pf && pf->getType() != pf_Frag::PFT_Strux)
6586 		{
6587 			pf = pf->getPrev();
6588 		}
6589 		if(pf && (pf->getType() == pf_Frag::PFT_Strux) )
6590 		{
6591 			pf_Frag_Strux * pfs = static_cast<pf_Frag_Strux *>(pf);
6592 			if(pfs->getStruxType() != PTX_Block)
6593 			{
6594 				getDoc()->appendStrux(PTX_Block, NULL);
6595 			}
6596 		}
6597 		else if( pf == NULL)
6598 		{
6599 			getDoc()->appendStrux(PTX_Block, NULL);
6600 		}
6601 	}
6602 	return getDoc()->appendStrux(pts, attributes);
6603 }
6604 
_appendObject(PTObjectType pto,const gchar ** attributes)6605 bool IE_Imp_MsWord_97::_appendObject(PTObjectType pto, const gchar ** attributes)
6606 {
6607 	if(m_bInHeaders)
6608 	{
6609 		return _appendObjectHdrFtr(pto, attributes);
6610 	}
6611 	else if(_shouldUseInsert() && m_pNotesEndSection)
6612 	{
6613 		return getDoc()->insertObjectBeforeFrag(m_pNotesEndSection, pto, attributes);
6614 	}
6615 	else if(m_bInTextboxes && m_pTextboxEndSection)
6616 	{
6617 		return getDoc()->insertObjectBeforeFrag(m_pTextboxEndSection, pto, attributes);
6618 	}
6619 	if(!m_bInPara)
6620 	{
6621 	  _appendStrux(PTX_Block, NULL);
6622 	  m_bInPara = true;
6623 	}
6624 	return getDoc()->appendObject(pto, attributes);
6625 }
6626 
_appendSpan(const UT_UCSChar * p,UT_uint32 length)6627 bool IE_Imp_MsWord_97::_appendSpan(const UT_UCSChar * p, UT_uint32 length)
6628 {
6629 	if(m_bInHeaders)
6630 	{
6631 		return _appendSpanHdrFtr(p, length);
6632 	}
6633 	else if(_shouldUseInsert() && m_pNotesEndSection)
6634 	{
6635 		return getDoc()->insertSpanBeforeFrag(m_pNotesEndSection, p, length);
6636 	}
6637 	else if(m_bInTextboxes && m_pTextboxEndSection)
6638 	{
6639 		return getDoc()->insertSpanBeforeFrag(m_pTextboxEndSection, p, length);
6640 	}
6641 	return getDoc()->appendSpan(p, length);
6642 }
6643 
_appendFmt(const gchar ** attributes)6644 bool IE_Imp_MsWord_97::_appendFmt(const gchar ** attributes)
6645 {
6646 	// no special processing required, this only changes m_loading in
6647 	// the PT
6648 	return getDoc()->appendFmt(attributes);
6649 }
6650 
6651 /*!
6652     The append*HdrFtr() methods below are needed because in AW headers
6653     cannot be shared among sections; in contrast in Word one header
6654     can be used by a chain of sections. We get around it by
6655     duplicating that one header for each section that uses it. Since
6656     we cannot wind back throught the data stream we have to duplicate
6657     each shared header as we go using the info stored in the current
6658     header's d struct.
6659 */
_appendStruxHdrFtr(PTStruxType pts,const gchar ** attributes)6660 bool IE_Imp_MsWord_97::_appendStruxHdrFtr(PTStruxType pts, const gchar ** attributes)
6661 {
6662 	UT_return_val_if_fail(m_bInHeaders,false);
6663 	UT_return_val_if_fail(m_iCurrentHeader < m_iHeadersCount,false);
6664 	UT_DEBUGMSG(("Inserting strux of type %d in HdrFtr %d\n",pts,m_iCurrentHeader));
6665 	UT_ASSERT(m_bInSect);
6666 	bool bRet = true;
6667 	for(UT_sint32 i = 0; i < m_pHeaders[m_iCurrentHeader].d.frag.getItemCount(); i++)
6668 	{
6669 		pf_Frag * pF = (pf_Frag*) m_pHeaders[m_iCurrentHeader].d.frag.getNthItem(i);
6670 		UT_return_val_if_fail(pF,false);
6671 		UT_DEBUGMSG(("Inserting strux of type %d in Dirivative HdrFtr \n",pts));
6672 
6673 		bRet &= getDoc()->insertStruxBeforeFrag(pF, pts, attributes);
6674 	}
6675 
6676 	bRet &= getDoc()->appendStrux(pts, attributes);
6677 	if(pts != PTX_Block)
6678 	{
6679 		xxx_UT_DEBUGMSG(("m_bInPara set false here -1 \n"));
6680 		m_bInPara = false;
6681 	}
6682 	else
6683 	{
6684 		m_bInPara = true;
6685 	}
6686 	return bRet;
6687 }
6688 
_appendObjectHdrFtr(PTObjectType pto,const gchar ** attributes)6689 bool IE_Imp_MsWord_97::_appendObjectHdrFtr(PTObjectType pto, const gchar ** attributes)
6690 {
6691 	UT_return_val_if_fail(m_bInHeaders,false);
6692 	UT_return_val_if_fail(m_iCurrentHeader < m_iHeadersCount,false);
6693 
6694 	bool bRet = true;
6695 
6696 	for(UT_sint32 i = 0; i < m_pHeaders[m_iCurrentHeader].d.frag.getItemCount(); i++)
6697 	{
6698 		pf_Frag * pF = (pf_Frag*) m_pHeaders[m_iCurrentHeader].d.frag.getNthItem(i);
6699 		UT_return_val_if_fail(pF,false);
6700 		if(!m_bInPara)
6701 		{
6702 			bRet &= getDoc()->insertStruxBeforeFrag(pF, PTX_Block, NULL);
6703 		}
6704 		bRet &= getDoc()->insertObjectBeforeFrag(pF, pto, attributes);
6705 	}
6706 	if(!m_bInPara)
6707 	{
6708 		m_bInPara = true;
6709 		bRet &= getDoc()->appendStrux(PTX_Block, NULL);
6710 	}
6711 	bRet &= getDoc()->appendObject(pto, attributes);
6712 	return bRet;
6713 }
6714 
_appendSpanHdrFtr(const UT_UCSChar * p,UT_uint32 length)6715 bool IE_Imp_MsWord_97::_appendSpanHdrFtr(const UT_UCSChar * p, UT_uint32 length)
6716 {
6717 	UT_return_val_if_fail(m_bInHeaders,false);
6718 	UT_return_val_if_fail(m_iCurrentHeader < m_iHeadersCount,false);
6719 
6720 	bool bRet = true;
6721 
6722 	for(UT_sint32 i = 0; i < m_pHeaders[m_iCurrentHeader].d.frag.getItemCount(); i++)
6723 	{
6724 		pf_Frag * pF = (pf_Frag*) m_pHeaders[m_iCurrentHeader].d.frag.getNthItem(i);
6725 		UT_return_val_if_fail(pF,false);
6726 		if(!m_bInPara)
6727 		{
6728 			bRet &= getDoc()->insertStruxBeforeFrag(pF, PTX_Block, NULL);
6729 		}
6730 
6731 		bRet &= getDoc()->insertSpanBeforeFrag(pF, p, length);
6732 	}
6733 	if(!m_bInPara)
6734 	{
6735 		m_bInPara = true;
6736 		bRet &= getDoc()->appendStrux(PTX_Block, NULL);
6737 	}
6738 	bRet &= getDoc()->appendSpan(p, length);
6739 	return bRet;
6740 }
6741 
6742 
_handleHeaders(const wvParseStruct * ps)6743 void IE_Imp_MsWord_97::_handleHeaders(const wvParseStruct *ps)
6744 {
6745 	UT_uint32 i, k;
6746 
6747 	DELETEPV(m_pHeaders);
6748 
6749 	m_iHeadersCount = 0;
6750 	UT_uint32 *pPLCF_txt = NULL;
6751 
6752 	/*
6753 	   The header/footer PLCF in Word 97+ is organised as follows:
6754 
6755 	   indx         |  function
6756 	   -------------------------------------------------------------------------------
6757 	   0-5: document wide settings
6758 	   -------------------------------------------------------------------------------
6759 	    0           |  footnote separator
6760 	    1           |  footnote continuation separator (i.e., continued on next page)
6761 	    2           |  document-wide footnote continuation notice (i.e., continued
6762                    	   from previous page)
6763 	   3-5          |  as above for endnotes
6764        -------------------------------------------------------------------------------
6765 	   now for i-th section in document (i >= 0)
6766 	   -------------------------------------------------------------------------------
6767 	   i+6          |  header even pages
6768 	   i+7          |  header odd  pages
6769 	   i+8          |  footer even pages
6770 	   i+9          |  footer odd  pages
6771 	   i+10         |  header first page
6772 	   i+11         |  footer first page
6773 	   -------------------------------------------------------------------------------
6774        according to the docs now should come the foot/endnote
6775 	   separators but they do not -- those settings appear to be
6776 	   document wide only ...
6777 	   -------------------------------------------------------------------------------
6778 	   i+12 - i+17  |  as the document wide footnote/endnote separators above
6779 
6780 	   NB: the record for the last section in the document may be
6781 	       incomplete, i.e., for n sections  m_iHeadersCount <= 6 + 12*n.
6782 
6783 	   The even headers are only applied if ps->dop.fFacingPages is set
6784 	*/
6785 
6786 	bool bHeaderError = false;
6787 
6788 	if(ps->fib.lcbPlcfhdd)
6789 	{
6790 		/* the docs are ambiguous, at one place saying the PLCF
6791 		   contains n+2 entries, another n+1; I think the former is correct*/
6792 		m_iHeadersCount = ps->fib.lcbPlcfhdd/4 - 2;
6793 		try
6794 		{
6795 			m_pHeaders = new header[m_iHeadersCount];
6796 		}
6797 		catch(...)
6798 		{
6799 			m_pHeaders = NULL;
6800 		}
6801 
6802 		UT_return_if_fail(m_pHeaders);
6803 
6804 		// this is really quite straight forward; we retrieve the PLCF
6805 		// which is a sequence of n+2 positions (UT_uint32) of the
6806 		// header text in its data stream
6807 		if(wvGetPLCF((void **) &pPLCF_txt, ps->fib.fcPlcfhdd, ps->fib.lcbPlcfhdd, ps->tablefd))
6808 		{
6809 			bHeaderError = true;
6810 		}
6811 
6812 		if(!bHeaderError)
6813 		{
6814 			UT_return_if_fail(pPLCF_txt);
6815 			for(i = 0; i < m_iHeadersCount; i++)
6816 			{
6817 				m_pHeaders[i].pos = pPLCF_txt[i] + m_iHeadersStart;
6818 				m_pHeaders[i].len = pPLCF_txt[i+1] - pPLCF_txt[i];
6819 				m_pHeaders[i].pid = getDoc()->getUID(UT_UniqueId::HeaderFtr);
6820 
6821 				UT_DEBUGMSG(("Header %d has pid %d \n",i,m_pHeaders[i].pid));
6822 				if(i < 6)
6823 				{
6824 					// document wide footnote/endnote separators
6825 					m_pHeaders[i].type = HF_Unsupported;
6826 				}
6827 				else
6828 				{
6829 					switch((i-6)%6)
6830 					{
6831 						case 0:
6832 							if(m_bEvenOddHeaders)
6833 								m_pHeaders[i].type = HF_HeaderEven;
6834 							else
6835 								m_pHeaders[i].type = HF_Unsupported;
6836 							break;
6837 						case 1:
6838 							m_pHeaders[i].type = HF_HeaderOdd;
6839 							break;
6840 						case 2:
6841 							if(m_bEvenOddHeaders)
6842 								m_pHeaders[i].type = HF_FooterEven;
6843 							else
6844 								m_pHeaders[i].type = HF_Unsupported;
6845 							break;
6846 						case 3:
6847 							m_pHeaders[i].type = HF_FooterOdd;
6848 							break;
6849 						case 4:
6850 							m_pHeaders[i].type = HF_HeaderFirst;
6851 							break;
6852 						case 5:
6853 							m_pHeaders[i].type = HF_FooterFirst;
6854 							break;
6855 
6856 						default:
6857 							m_pHeaders[i].type = HF_Unsupported;
6858 					}
6859 
6860 					UT_DEBUGMSG(("Header no. %d, pos %d, len %d\n",
6861 								 i,m_pHeaders[i].pos,m_pHeaders[i].len));
6862 
6863 #if 1
6864 					// this code is here because in AW we currently cannot
6865 					// share headers between sections
6866 					if(m_pHeaders[i].type != HF_Unsupported && m_pHeaders[i].len == 0)
6867 					{
6868 						// this is the case where the section is to use the
6869 						// header of a previous section -- scroll back until
6870 						// we find one
6871 						k = i - 6;
6872 						bool bContinue = false;
6873 
6874 						while(k > 5)
6875 						{
6876 							if(m_pHeaders[k].len == 2)
6877 							{
6878 								// found empty header
6879 								// set the type of the present header unsupported, so it does not
6880 								// get referenced
6881 								m_pHeaders[i].type = HF_Unsupported;
6882 								bContinue = true;
6883 								break;
6884 							}
6885 							else if(m_pHeaders[k].len == 0)
6886 							{
6887 								// try one section ahead
6888 								k -= 6;
6889 							}
6890 							else
6891 							{
6892 								// found a meaningful header
6893 								break;
6894 							}
6895 						}
6896 
6897 						if(bContinue || k < 6)
6898 						{
6899 							// did not find any meaningful headers, set the type to unsupported, so
6900 							// that it does not get referenced
6901 							//
6902 							// we do not want to do this to the first page hdr/ftr,
6903 							// because in this case len == 0 can mean the header should be
6904 							// empty but present (this is determined by asep->fTitlePage
6905 							if(m_pHeaders[i].type != HF_HeaderFirst && m_pHeaders[i].type != HF_FooterFirst)
6906 								m_pHeaders[i].type = HF_Unsupported;
6907 
6908 							continue;
6909 						}
6910 
6911 						// so we have found a meaningful header k that is to
6912 						// be used in place of header i; we add header
6913 						// i to k's d-struct
6914 
6915 						m_pHeaders[k].d.hdr.addItem((void*)(m_pHeaders+i));
6916 					}
6917 #endif
6918 				}
6919 			}
6920 
6921 			wvFree(pPLCF_txt);
6922 		}
6923 	}
6924 }
6925 
6926 /*!
6927     A helper function that inserts the header/ftr section
6928 */
_insertHeaderSection(bool bDoBlockIns)6929 bool IE_Imp_MsWord_97::_insertHeaderSection(bool bDoBlockIns)
6930 {
6931 	// need to insert our header/footer section, preserving
6932 	// any existing formatting ...
6933 
6934 	// we need to be able to insert some 0-length headers
6935 	if(m_pHeaders[m_iCurrentHeader].type != HF_Unsupported /*&& m_pHeaders[m_iCurrentHeader].len > 2*/)
6936 	{
6937 		UT_uint32 iOff = 0;
6938 		const gchar * attribsB[] = {NULL, NULL,
6939 									   NULL, NULL,
6940 									   NULL};
6941 		if(m_iCurrentHeader == m_iLastAppendedHeader)
6942 		{
6943 			return false;
6944 		}
6945 		m_iLastAppendedHeader = m_iCurrentHeader;
6946 		if(m_paraProps.size())
6947 		{
6948 			attribsB[iOff++] = "props";
6949 			attribsB[iOff++] = m_paraProps.c_str();
6950 		}
6951 
6952 		if(m_paraStyle.size())
6953 		{
6954 			attribsB[iOff++] = "style";
6955 			attribsB[iOff++] = m_paraStyle.c_str();
6956 		}
6957 
6958 		const gchar * attribsC[] = {NULL, NULL,
6959 									   NULL, NULL,
6960 									   NULL};
6961 		iOff = 0;
6962 		if(m_charProps.size())
6963 		{
6964 			attribsC[iOff++] = "props";
6965 			attribsC[iOff++] = m_charProps.c_str();
6966 		}
6967 
6968 		if(m_charStyle.size())
6969 		{
6970 			attribsC[iOff++] = "style";
6971 			attribsC[iOff++] = m_charStyle.c_str();
6972 		}
6973 
6974 		const gchar * attribsS[] = {"type", NULL,
6975 									   "id",   NULL,
6976 									   NULL};
6977 
6978 		UT_String id;
6979 		UT_String_sprintf(id,"%d",m_pHeaders[m_iCurrentHeader].pid);
6980 		attribsS[3] = id.c_str();
6981 		UT_DEBUGMSG(("Appending Current Header %d pid %s \n",m_iCurrentHeader,id.c_str()));
6982 		switch(m_pHeaders[m_iCurrentHeader].type)
6983 		{
6984 			case HF_HeaderEven:
6985 				attribsS[1] = "header-even";
6986 				break;
6987 			case HF_FooterEven:
6988 				attribsS[1] = "footer-even";
6989 				break;
6990 			case HF_HeaderOdd:
6991 				attribsS[1] = "header";
6992 				break;
6993 			case HF_FooterOdd:
6994 				attribsS[1] = "footer";
6995 				break;
6996 			case HF_HeaderFirst:
6997 				attribsS[1] = "header-first";
6998 				break;
6999 			case HF_FooterFirst:
7000 				attribsS[1] = "footer-first";
7001 				break;
7002 			default:
7003 				UT_ASSERT_HARMLESS(UT_NOT_REACHED);
7004 		}
7005 
7006 		// we use the document methods, not the importer methods intentionally
7007 		UT_DEBUGMSG(("Direct Appending HdrFtr in MSWord_import \n"));
7008 		if(!m_bInPara)
7009 		{
7010 			getDoc()->appendStrux(PTX_Block, NULL);
7011 			m_bInPara = true;
7012 		}
7013 		getDoc()->appendStrux(PTX_SectionHdrFtr, attribsS);
7014 		m_bInSect = true;
7015 		m_bInHeaders = true;
7016 
7017 		if(bDoBlockIns)
7018 		{
7019 			getDoc()->appendStrux(PTX_Block, attribsB);
7020 			m_bInPara = true;
7021 			_appendFmt(attribsC);
7022 		}
7023 
7024 		// now we insert the same for any derivative headers
7025 		// ...
7026 		for (UT_sint32 i = 0; i < m_pHeaders[m_iCurrentHeader].d.hdr.getItemCount(); i++)
7027 		{
7028 			header * pH = (header*)m_pHeaders[m_iCurrentHeader].d.hdr.getNthItem(i);
7029 			UT_return_val_if_fail(pH, true);
7030 
7031 			// skip any unsupported headers (we set the type to unsupported when we find
7032 			// out that it is not used by the section to which it belongs)
7033 
7034 			if(pH->type == HF_Unsupported)
7035 			{
7036 				continue;
7037 			}
7038 
7039 			UT_String_sprintf(id,"%d",pH->pid);
7040 			attribsS[3] = id.c_str();
7041 
7042 			switch(pH->type)
7043 			{
7044 				case HF_HeaderEven:
7045 					attribsS[1] = "header-even";
7046 					break;
7047 				case HF_FooterEven:
7048 					attribsS[1] = "footer-even";
7049 					break;
7050 				case HF_HeaderOdd:
7051 					attribsS[1] = "header";
7052 					break;
7053 				case HF_FooterOdd:
7054 					attribsS[1] = "footer";
7055 					break;
7056 				case HF_HeaderFirst:
7057 					attribsS[1] = "header-first";
7058 					break;
7059 				case HF_FooterFirst:
7060 					attribsS[1] = "footer-first";
7061 					break;
7062 				default:
7063 					UT_ASSERT_HARMLESS(UT_NOT_REACHED);
7064 			}
7065 			UT_DEBUGMSG(("Appending Dirivative HdrFtr in MSWord_import \n"));
7066 
7067 			getDoc()->appendStrux(PTX_SectionHdrFtr, attribsS);
7068 			m_bInHeaders = true;
7069 
7070 			// we need to remember the HdrFtr fragment for
7071 			// later ...
7072 			pf_Frag * pF = getDoc()->getLastFrag();
7073 			UT_return_val_if_fail(pF && pF->getType() == pf_Frag::PFT_Strux, true);
7074 
7075 			pf_Frag_Strux * pFS = (pf_Frag_Strux*)pF;
7076 			UT_return_val_if_fail(pFS->getStruxType() == PTX_SectionHdrFtr, true);
7077 
7078 			m_pHeaders[m_iCurrentHeader].d.frag.addItem((void*)pF);
7079 
7080 			if(bDoBlockIns)
7081 			{
7082 				getDoc()->appendStrux(PTX_Block, attribsB);
7083 				getDoc()->appendFmt(attribsC);
7084 			}
7085 		}
7086 
7087 		return true;
7088 	}
7089 	else
7090 	{
7091 		// just gobble the character ...
7092 		m_bInHeaders = true;
7093 		return false;
7094 	}
7095 
7096 	return false;
7097 }
7098 
7099 
7100 
7101 /*!
7102     This function makes sure that the insert is happening at the
7103     correct place if we are in the header segment.
7104 
7105     \parameter UT_uint32 iDocPosition: character position in the Word
7106                                        document stream
7107     \return returns false if the present character is to be skipped,
7108             true otherwise
7109 */
_handleHeadersText(UT_uint32 iDocPosition,bool bDoBlockIns)7110 bool IE_Imp_MsWord_97::_handleHeadersText(UT_uint32 iDocPosition,bool bDoBlockIns)
7111 {
7112 	if(iDocPosition == m_iPrevHeaderPosition)
7113 	{
7114 		return true;
7115 	}
7116 
7117 	if(iDocPosition == m_iHeadersEnd)
7118 	{
7119 		m_iCurrentHeader++;
7120 
7121 		if(m_iCurrentHeader < m_iHeadersCount)
7122 		{
7123 			// this is the case where we reached the end of the header segment, but still have
7124 			// some headers in our header array left.
7125 			// if we have any headers other than unsupported, we have to insert them as empty
7126 
7127 			for(; m_iCurrentHeader < m_iHeadersCount; m_iCurrentHeader++)
7128 			{
7129 				if(m_pHeaders[m_iCurrentHeader].type != HF_Unsupported)
7130 					_insertHeaderSection(bDoBlockIns);
7131 			}
7132 		}
7133 	}
7134 
7135 	if(iDocPosition >= m_iHeadersStart && iDocPosition < m_iHeadersEnd)
7136 	{
7137 		m_iPrevHeaderPosition = iDocPosition;
7138 
7139 		// upon entry into the header-land, we will need to search for
7140 		// the first header/footer section in our document, note that we are
7141 		// in a header section, note at what doc position the current
7142 		// header will end, and then let things run until we reach
7143 		// the end of the header; then we need to search for the next
7144 		// doc section, etc.
7145 
7146 		// when we scroll through 0-length headers, we need to remember where we started,
7147 		// so we can insert the hdr section later
7148 		bool bScrolledHeader = false;
7149 		UT_uint32 iOrigHeader = 0;
7150 
7151 		if(!m_bInHeaders)
7152 		{
7153 			UT_DEBUGMSG(("In headers territory: pos %d\n", iDocPosition));
7154 			m_bInENotes = false;
7155 			m_bInFNotes = false;
7156 
7157 			m_iCurrentHeader = 0;
7158 
7159 			// we need to close of any open section
7160 			if(m_bInSect)
7161 			{
7162 				_endSect(NULL,0,NULL,0);
7163 			}
7164 
7165 			// some headers can be 0-length, skip them ... (0-length:  len <=2)
7166 			while(m_iCurrentHeader < m_iHeadersCount && m_pHeaders[m_iCurrentHeader].len <= 2)
7167 			{
7168 				bScrolledHeader = true;
7169 				m_iCurrentHeader++;
7170 			}
7171 
7172 			m_bInHeaders = true;
7173 		}
7174 		xxx_UT_DEBUGMSG(("CurrentHeader %d HeaderCount %d \n",m_iCurrentHeader,m_iHeadersCount));
7175 		if (m_iCurrentHeader < m_iHeadersCount) {
7176 			if(iDocPosition == m_pHeaders[m_iCurrentHeader].pos +
7177 			   m_pHeaders[m_iCurrentHeader].len)
7178 			{
7179 				// new header, time to move on ...
7180 				m_iCurrentHeader++;
7181 				iOrigHeader = m_iCurrentHeader;
7182 
7183 				// some headers can be 0-length, skip them ... (0-length:  len <=2)
7184 				// some 0-length headers we are actually interested in; the 0-length
7185 				// headers we do not care about should already be marked as HF_Unsupported
7186 				while(m_iCurrentHeader < m_iHeadersCount && m_pHeaders[m_iCurrentHeader].type == HF_Unsupported
7187 					  /*m_pHeaders[m_iCurrentHeader].len <= 2*/)
7188 				{
7189 					bScrolledHeader = true;
7190 					m_iCurrentHeader++;
7191 				}
7192 
7193 				// after the last header there is an extra paragraph
7194 				// marker that is still a part of the header section --
7195 				// we do not want that marker imported
7196 				if(m_iCurrentHeader ==  m_iHeadersCount)
7197 				{
7198 					UT_DEBUGMSG(("End of header marker at pos %d\n", iDocPosition));
7199 					return false;
7200 				}
7201 
7202 				// do not return, processing needs to continue ...
7203 			}
7204 			xxx_UT_DEBUGMSG(("iDocPosition %d m_pHeaders[m_iCurrentHeader].pos %d \n",iDocPosition,m_pHeaders[m_iCurrentHeader].pos));
7205 			if((bScrolledHeader && m_pHeaders[iOrigHeader].pos == iDocPosition) ||
7206 			   (!bScrolledHeader && iDocPosition == m_pHeaders[m_iCurrentHeader].pos))
7207 			{
7208 				return _insertHeaderSection(bDoBlockIns);
7209 			}
7210 		}
7211 		else
7212 		{
7213 			UT_DEBUGMSG(("DOM: bad header joo joo\n"));
7214 			return false;
7215 		}
7216 
7217 		// if we got this far, we are somwhere inside the header, just
7218 		// process the character in a normal way
7219 		return (m_pHeaders[m_iCurrentHeader].type != HF_Unsupported);
7220 	}
7221 
7222 	return true;
7223 }
7224 
7225 /*
7226    this function returns true if stuff at given position is to be ingored
7227    For example, the doc might contain headers in it that are not used ...
7228  */
_ignorePosition(UT_uint32 iDocPos)7229 bool IE_Imp_MsWord_97::_ignorePosition(UT_uint32 iDocPos)
7230 {
7231 	if(m_bInTOC && m_bTOCsupported)
7232 		return true;
7233 
7234 	if(m_bInHeaders && m_iCurrentHeader < m_iHeadersCount && m_pHeaders)
7235 	{
7236 		if(   m_pHeaders[m_iCurrentHeader].type == HF_Unsupported
7237 		   || iDocPos < m_pHeaders[m_iCurrentHeader].pos)
7238 		{
7239 			return true;
7240 		}
7241 	}
7242 
7243 	return false;
7244 }
7245