1 /************************************************************************/
2 /*									*/
3 /*  Manage the actual string content while reading an RTF document.	*/
4 /*									*/
5 /************************************************************************/
6 
7 #   include	"docRtfConfig.h"
8 
9 #   include	<stdlib.h>
10 #   include	<string.h>
11 #   include	<stdio.h>
12 #   include	<ctype.h>
13 
14 #   include	<appDebugon.h>
15 
16 #   include	<uniUtf8.h>
17 
18 #   include	"docRtfReaderImpl.h"
19 #   include	<docParaString.h>
20 #   include	<textConverter.h>
21 #   include	<textConverterImpl.h>
22 #   include	<docParaParticules.h>
23 
24 /************************************************************************/
25 /*									*/
26 /*  Handle text..							*/
27 /*									*/
28 /*  1)  Ignore it.							*/
29 /*  2)  Refuse it.							*/
30 /*  3)  Save it for later use. (Convert it to UTF-8 on the fly).	*/
31 /*									*/
32 /************************************************************************/
33 
34 /*  1  */
docRtfIgnoreText(RtfReader * rr,const char * text,int len)35 int docRtfIgnoreText(	RtfReader *		rr,
36 			const char *		text,
37 			int			len )
38     { return 0; }
39 
40 /*  2  */
docRtfRefuseText(RtfReader * rr,const char * text,int len)41 int docRtfRefuseText(	RtfReader *		rr,
42 			const char *		text,
43 			int			len )
44     { LDEB(1); return -1; }
45 
46 /************************************************************************/
47 /*									*/
48 /*  Save text: It is not encoded.					*/
49 /*									*/
50 /************************************************************************/
51 
docRtfSaveRawBytes(RtfReader * rr,const char * text,int len)52 int docRtfSaveRawBytes(		RtfReader *		rr,
53 				const char *		text,
54 				int			len )
55     {
56     RtfReadingState *	rrs= rr->rrcState;
57 
58     if  ( utilMemoryBufferAppendBytes( &(rrs->rrsSavedText),
59 					(const unsigned char *)text, len ) )
60 	{ LDEB(len); return -1;	}
61 
62     return 0;
63     }
64 
65 /************************************************************************/
66 /*									*/
67 /*  Convert text from some encoding to UTF-8.				*/
68 /*									*/
69 /************************************************************************/
70 
docRtfSaveBytes(void * vmb,int offset,const char * bytes,int count)71 static int docRtfSaveBytes(	void *		vmb,
72 				int		offset,
73 				const char *	bytes,
74 				int		count )
75     {
76     MemoryBuffer *	mb= (MemoryBuffer *)vmb;
77 
78     if  ( utilMemoryBufferAppendBytes( mb,
79 				    (const unsigned char *)bytes, count ) )
80 	{ LDEB(mb->mbSize); return -1;	}
81 
82     return count;
83     }
84 
85 /************************************************************************/
86 /*									*/
87 /*  Save text: It is in the document encoding.				*/
88 /*									*/
89 /************************************************************************/
90 
docRtfSaveDocEncodedText(RtfReader * rr,const char * text,int len)91 int docRtfSaveDocEncodedText(	RtfReader *		rr,
92 				const char *		text,
93 				int			len )
94     {
95     RtfReadingState *	rrs= rr->rrcState;
96     int			upto;
97     int			consumed= 0;
98 
99     upto= textConverterConvertToUtf8( rr->rrRtfTextConverter,
100 				    (void *)&(rrs->rrsSavedText),
101 				    &consumed,
102 				    rrs->rrsSavedText.mbSize, text, len );
103     if  ( upto < 0 )
104 	{ LDEB(upto); return -1;	}
105 
106     return 0;
107     }
108 
109 /************************************************************************/
110 
docRtfReadSetupTextConverters(RtfReader * rr)111 void docRtfReadSetupTextConverters(	RtfReader *	rr )
112     {
113     textConverterSetNativeEncodingName( rr->rrRtfTextConverter,
114 						    DOC_RTF_AnsiCharsetName );
115 
116     textConverterSetProduce( rr->rrRtfTextConverter, docRtfSaveBytes );
117 
118     docParaSetupTextConverter( rr->rrTextTextConverter );
119     }
120 
121 /************************************************************************/
122 /*									*/
123 /*  Store the text bytes that we collected from the rtf file in some	*/
124 /*  location. Use realloc() to rezize the target location and flush	*/
125 /*  the collected text bytes.						*/
126 /*									*/
127 /************************************************************************/
128 
docRtfStoreSavedText(char ** pTarget,int * pSize,RtfReader * rr,int removeSemicolon)129 int docRtfStoreSavedText(	char **		pTarget,
130 				int *		pSize,
131 				RtfReader *	rr,
132 				int		removeSemicolon )
133     {
134     RtfReadingState *	rrs= rr->rrcState;
135 
136     char *	fresh;
137     int		size;
138 
139     if  ( utilMemoryBufferIsEmpty( &(rrs->rrsSavedText) ) )
140 	{ *pSize= 0; return 0;	}
141 
142     size= rrs->rrsSavedText.mbSize;
143     fresh= (char *)realloc( *pTarget, size+ 1 );
144     if  ( ! fresh )
145 	{ LXDEB(size,fresh); return -1;	}
146 
147     memcpy( fresh, rrs->rrsSavedText.mbBytes, size );
148     fresh[size]= '\0';
149 
150     if  ( removeSemicolon		&&
151 	  size > 0			&&
152 	  fresh[size- 1] == ';'		)
153 	{ fresh[--size]= '\0';	}
154 
155     utilEmptyMemoryBuffer( &(rrs->rrsSavedText) );
156 
157     *pTarget= fresh;
158     *pSize= size;
159     return 0;
160     }
161 
docRtfMemoryBufferSetText(MemoryBuffer * mb,RtfReader * rr,int removeSemicolon)162 int docRtfMemoryBufferSetText(	MemoryBuffer *		mb,
163 				RtfReader *		rr,
164 				int			removeSemicolon )
165     {
166     char *	text= (char *)0;
167     int		size;
168 
169     if  ( docRtfStoreSavedText( &text, &size, rr, removeSemicolon ) )
170 	{ LDEB(1); return -1;	}
171 
172     if  ( utilMemoryBufferSetBytes( mb, (const unsigned char *)text, size ) )
173 	{ LDEB(1); return -1;	}
174 
175     if  ( text )
176 	{ free( text );		}
177 
178     return 0;
179     }
180 
181 /************************************************************************/
182 /*									*/
183 /*  Append saved text to a memory buffer.				*/
184 /*									*/
185 /************************************************************************/
186 
docRtfMemoryBufferAppendText(MemoryBuffer * mb,RtfReader * rr)187 int docRtfMemoryBufferAppendText(	MemoryBuffer *		mb,
188 					RtfReader *		rr )
189     {
190     const int	removeSemicolon= 0;
191     char *	text= (char *)0;
192     int		size;
193 
194     if  ( docRtfStoreSavedText( &text, &size, rr, removeSemicolon ) )
195 	{ LDEB(1); return -1;	}
196 
197     if  ( utilMemoryBufferAppendBytes( mb, (const unsigned char *)text, size ) )
198 	{ LDEB(1); return -1;	}
199 
200     if  ( text )
201 	{ free( text );		}
202 
203     return 0;
204     }
205 
206 /************************************************************************/
207 
docRtfReadAdaptToFontEncoding(RtfReader * rr,RtfReadingState * rrs)208 static int docRtfReadAdaptToFontEncoding(
209 				RtfReader *			rr,
210 				RtfReadingState *		rrs )
211     {
212     const char *		encodingName= (const char *)0;
213 
214     if  ( rrs->rrsTextCharset >= 0 )
215 	{
216 	encodingName= docGetEncodingName( rr->rrDocument,
217 			    &(rrs->rrsTextAttribute), rrs->rrsTextCharset );
218 	}
219 
220     if  ( ! encodingName )
221 	{ encodingName= rr->rrRtfTextConverter->tcNativeEncodingName;	}
222 
223     textConverterSetNativeEncodingName( rr->rrTextTextConverter, encodingName );
224 
225     return 0;
226     }
227 
228 /************************************************************************/
229 /*									*/
230 /*  Insert particules from the input in the document.			*/
231 /*									*/
232 /************************************************************************/
233 
docRtfTextParticule(RtfReader * rr,const char * text,int len)234 int docRtfTextParticule(	RtfReader *		rr,
235 				const char *		text,
236 				int			len )
237     {
238     RtfReadingState *		rrs= rr->rrcState;
239     BufferDocument *		bd= rr->rrDocument;
240     BufferItem *		paraNode;
241 
242     if  ( rr->rrcInIgnoredGroup )
243 	{ return 0;	}
244 
245     paraNode= docRtfGetParaNode( rr );
246     if  ( ! paraNode )
247 	{ XDEB(paraNode); return -1; }
248 
249     if  ( docParaStrlen( paraNode ) == 0	||
250 	  rr->rrAfterParaHeadField		)
251 	{
252 	if  ( docRtfAdaptToParaProperties( paraNode, bd, rrs,
253 					    rr->rrParagraphBreakOverride ) )
254 	    { LDEB(1); return -1;	}
255 	}
256 
257     if  ( docRtfReadAdaptToFontEncoding( rr, rrs ) )
258 	{ LDEB(1);	}
259 
260     if  ( rrs->rrsTextShadingChanged )
261 	{ docRtfRefreshTextShading( rr, rrs );	}
262 
263     if  ( docParaAppendText( bd, paraNode, &(rrs->rrsTextAttribute),
264 					rr->rrTextTextConverter, text, len ) )
265 	{ LDEB(1); return -1;	}
266 
267     rr->rrcAfterNoteref= 0;
268     rr->rrAfterParaHeadField= 0;
269 
270     return 0;
271     }
272 
273 /************************************************************************/
274 /*									*/
275 /*  Handle an explicit unicode.						*/
276 /*  Special characters.							*/
277 /*									*/
278 /************************************************************************/
279 
docRtfTextUnicodeValue(const RtfControlWord * rcw,int arg,RtfReader * rr)280 static int docRtfTextUnicodeValue(	const RtfControlWord *	rcw,
281 					int			arg,
282 					RtfReader *		rr )
283     {
284     RtfReadingState *	rrs= rr->rrcState;
285 
286     char		bytes[7];
287     int			count;
288 
289     if  ( arg < 0 )
290 	{ arg += 65536;	}
291 
292     /* Dirty HACK: Only use low byte of characters in the unicode private range */
293     if  ( arg >= 0xE000 && arg <= 0xF8FF )
294 	{
295 	bytes[0]= arg & 0xff;
296 	bytes[1]= '\0';
297 
298 	return docRtfSaveDocEncodedText( rr, (char *)bytes, 1 );
299 	}
300 
301     count= uniPutUtf8( bytes, arg );
302     if  ( count < 1 )
303 	{ LDEB(count); return 0;	}
304 
305     if  ( rr->rrcAddParticule == docRtfSaveRawBytes )
306 	{ XXDEB(rr->rrcAddParticule,docRtfSaveRawBytes); return 0; }
307 
308     if  ( rr->rrcAddParticule == docRtfTextParticule )
309 	{
310 	int			stroffShift= 0;
311 	int			stroff;
312 	BufferItem *		paraNode= docRtfGetParaNode( rr );
313 	BufferDocument *	bd= rr->rrDocument;
314 	int			textAttributeNumber;
315 
316 	if  ( ! paraNode )
317 	    { SXDEB(rcw->rcwWord,paraNode); return -1; }
318 
319 	if  ( rrs->rrsTextShadingChanged )
320 	    { docRtfRefreshTextShading( rr, rrs );	}
321 
322 	textAttributeNumber= docTextAttributeNumber( bd,
323 						&(rrs->rrsTextAttribute) );
324 	if  ( textAttributeNumber < 0 )
325 	    { LDEB(textAttributeNumber); return -1;	}
326 
327 	stroff= docParaStrlen( paraNode );
328 
329 	if  ( docParaStringReplace( &stroffShift, paraNode, stroff, stroff,
330 						    (char *)bytes, count ) )
331 	    { LDEB(docParaStrlen(paraNode)); return -1;	}
332 
333 	if  ( docParaDivideAppendedText( paraNode, textAttributeNumber,
334 						    stroff, stroff+ count ) )
335 	    { LLDEB(count,paraNode->biParaParticuleCount); return -1; }
336 	}
337     else{
338 	if  ( utilMemoryBufferAppendBytes( &(rrs->rrsSavedText),
339 					    (unsigned char *)bytes, count ) )
340 	    { LDEB(count); return -1;	}
341 	}
342 
343     return 0;
344     }
345 
docRtfTextUnicode(const RtfControlWord * rcw,int arg,RtfReader * rr)346 int docRtfTextUnicode(		const RtfControlWord *	rcw,
347 				int			arg,
348 				RtfReader *		rr )
349     {
350     RtfReadingState *	rrs= rr->rrcState;
351 
352     if  ( docRtfTextUnicodeValue( rcw, arg, rr ) )
353 	{ SXDEB(rcw->rcwWord,arg); return -1;	}
354 
355     rrs->rrsUnicodeBytesToSkip= rrs->rrsBytesPerUnicode;
356     return 0;
357     }
358 
docRtfTextSpecialChar(const RtfControlWord * rcw,int arg,RtfReader * rr)359 int docRtfTextSpecialChar(	const RtfControlWord *	rcw,
360 				int			arg,
361 				RtfReader *		rr )
362     {
363     /* docRtfTextParticule() adjusts level */
364 
365     if  ( docRtfTextUnicodeValue( rcw, rcw->rcwID, rr ) )
366 	{ SXDEB(rcw->rcwWord,arg); return -1;	}
367 
368     return 0;
369     }
370 
docRtfTextSpecialParticule(const RtfControlWord * rcw,int arg,RtfReader * rr)371 int docRtfTextSpecialParticule(	const RtfControlWord *	rcw,
372 				int			arg,
373 				RtfReader *		rr )
374     {
375     RtfReadingState *	rrs= rr->rrcState;
376     BufferItem *	paraNode;
377 
378     if  ( rr->rrcInIgnoredGroup > 0 )
379 	{ return 0;	}
380 
381     if  ( rrs->rrsTextShadingChanged )
382 	{ docRtfRefreshTextShading( rr, rrs );	}
383 
384     paraNode= docRtfGetParaNode( rr );
385     if  ( ! paraNode )
386 	{ SXDEB(rcw->rcwWord,paraNode); return -1; }
387 
388     switch( rcw->rcwID )
389 	{
390 	case DOCkindTAB:
391 	case DOCkindLINEBREAK:
392 	case DOCkindCHFTNSEP:
393 	case DOCkindCHFTNSEPC:
394 	case DOCkindOPT_HYPH:
395 	case DOCkindLTR_MARK:
396 	case DOCkindRTL_MARK:
397 	    if  ( docSaveSpecialParticule( rr->rrDocument, paraNode,
398 				    &(rrs->rrsTextAttribute), rcw->rcwID ) )
399 		{ LDEB(1); return -1;	}
400 
401 	    rr->rrcAfterNoteref= 0;
402 	    rr->rrAfterParaHeadField= 0;
403 
404 	    break;
405 
406 	case DOCkindPAGEBREAK:
407 	case DOCkindCOLUMNBREAK:
408 	    {
409 	    int				done= 0;
410 
411 	    if  ( rr->rrParagraphBreakOverride == -1		&&
412 		  ( docParaStrlen(paraNode) == 0	||
413 		    rr->rrAfterParaHeadField		)	)
414 		{
415 		if  ( rcw->rcwID == DOCkindPAGEBREAK )
416 		    { rr->rrParagraphBreakOverride= DOCibkPAGE; done= 1; }
417 
418 		if  ( rcw->rcwID == DOCkindCOLUMNBREAK )
419 		    { rr->rrParagraphBreakOverride= DOCibkCOL; done= 1; }
420 		}
421 
422 	    if  ( ! done						&&
423 		  docSaveSpecialParticule( rr->rrDocument, paraNode,
424 				&(rrs->rrsTextAttribute), rcw->rcwID ) )
425 		{ LDEB(1); return -1;	}
426 
427 	    rr->rrcAfterNoteref= 0;
428 	    rr->rrAfterParaHeadField= 0;
429 
430 	    break;
431 	    }
432 
433 	default:
434 	    SLDEB(rcw->rcwWord,rcw->rcwID);
435 	    break;
436 	}
437 
438     return 0;
439     }
440 
docRtfTextBidiMark(const RtfControlWord * rcw,int arg,RtfReader * rr)441 int docRtfTextBidiMark(	const RtfControlWord *	rcw,
442 			int			arg,
443 			RtfReader *		rr )
444     {
445 /*SDEB(rcw->rcwWord);*/
446     return 0;
447     }
448