1 /************************************************************************/
2 /* */
3 /* Manage the actual string content while reading an RTF document. */
4 /* */
5 /************************************************************************/
6
7 # include "docRtfConfig.h"
8
9 # include <stdlib.h>
10 # include <string.h>
11 # include <stdio.h>
12 # include <ctype.h>
13
14 # include <appDebugon.h>
15
16 # include <uniUtf8.h>
17
18 # include "docRtfReaderImpl.h"
19 # include <docParaString.h>
20 # include <textConverter.h>
21 # include <textConverterImpl.h>
22 # include <docParaParticules.h>
23
24 /************************************************************************/
25 /* */
26 /* Handle text.. */
27 /* */
28 /* 1) Ignore it. */
29 /* 2) Refuse it. */
30 /* 3) Save it for later use. (Convert it to UTF-8 on the fly). */
31 /* */
32 /************************************************************************/
33
34 /* 1 */
docRtfIgnoreText(RtfReader * rr,const char * text,int len)35 int docRtfIgnoreText( RtfReader * rr,
36 const char * text,
37 int len )
38 { return 0; }
39
40 /* 2 */
docRtfRefuseText(RtfReader * rr,const char * text,int len)41 int docRtfRefuseText( RtfReader * rr,
42 const char * text,
43 int len )
44 { LDEB(1); return -1; }
45
46 /************************************************************************/
47 /* */
48 /* Save text: It is not encoded. */
49 /* */
50 /************************************************************************/
51
docRtfSaveRawBytes(RtfReader * rr,const char * text,int len)52 int docRtfSaveRawBytes( RtfReader * rr,
53 const char * text,
54 int len )
55 {
56 RtfReadingState * rrs= rr->rrcState;
57
58 if ( utilMemoryBufferAppendBytes( &(rrs->rrsSavedText),
59 (const unsigned char *)text, len ) )
60 { LDEB(len); return -1; }
61
62 return 0;
63 }
64
65 /************************************************************************/
66 /* */
67 /* Convert text from some encoding to UTF-8. */
68 /* */
69 /************************************************************************/
70
docRtfSaveBytes(void * vmb,int offset,const char * bytes,int count)71 static int docRtfSaveBytes( void * vmb,
72 int offset,
73 const char * bytes,
74 int count )
75 {
76 MemoryBuffer * mb= (MemoryBuffer *)vmb;
77
78 if ( utilMemoryBufferAppendBytes( mb,
79 (const unsigned char *)bytes, count ) )
80 { LDEB(mb->mbSize); return -1; }
81
82 return count;
83 }
84
85 /************************************************************************/
86 /* */
87 /* Save text: It is in the document encoding. */
88 /* */
89 /************************************************************************/
90
docRtfSaveDocEncodedText(RtfReader * rr,const char * text,int len)91 int docRtfSaveDocEncodedText( RtfReader * rr,
92 const char * text,
93 int len )
94 {
95 RtfReadingState * rrs= rr->rrcState;
96 int upto;
97 int consumed= 0;
98
99 upto= textConverterConvertToUtf8( rr->rrRtfTextConverter,
100 (void *)&(rrs->rrsSavedText),
101 &consumed,
102 rrs->rrsSavedText.mbSize, text, len );
103 if ( upto < 0 )
104 { LDEB(upto); return -1; }
105
106 return 0;
107 }
108
109 /************************************************************************/
110
docRtfReadSetupTextConverters(RtfReader * rr)111 void docRtfReadSetupTextConverters( RtfReader * rr )
112 {
113 textConverterSetNativeEncodingName( rr->rrRtfTextConverter,
114 DOC_RTF_AnsiCharsetName );
115
116 textConverterSetProduce( rr->rrRtfTextConverter, docRtfSaveBytes );
117
118 docParaSetupTextConverter( rr->rrTextTextConverter );
119 }
120
121 /************************************************************************/
122 /* */
123 /* Store the text bytes that we collected from the rtf file in some */
124 /* location. Use realloc() to rezize the target location and flush */
125 /* the collected text bytes. */
126 /* */
127 /************************************************************************/
128
docRtfStoreSavedText(char ** pTarget,int * pSize,RtfReader * rr,int removeSemicolon)129 int docRtfStoreSavedText( char ** pTarget,
130 int * pSize,
131 RtfReader * rr,
132 int removeSemicolon )
133 {
134 RtfReadingState * rrs= rr->rrcState;
135
136 char * fresh;
137 int size;
138
139 if ( utilMemoryBufferIsEmpty( &(rrs->rrsSavedText) ) )
140 { *pSize= 0; return 0; }
141
142 size= rrs->rrsSavedText.mbSize;
143 fresh= (char *)realloc( *pTarget, size+ 1 );
144 if ( ! fresh )
145 { LXDEB(size,fresh); return -1; }
146
147 memcpy( fresh, rrs->rrsSavedText.mbBytes, size );
148 fresh[size]= '\0';
149
150 if ( removeSemicolon &&
151 size > 0 &&
152 fresh[size- 1] == ';' )
153 { fresh[--size]= '\0'; }
154
155 utilEmptyMemoryBuffer( &(rrs->rrsSavedText) );
156
157 *pTarget= fresh;
158 *pSize= size;
159 return 0;
160 }
161
docRtfMemoryBufferSetText(MemoryBuffer * mb,RtfReader * rr,int removeSemicolon)162 int docRtfMemoryBufferSetText( MemoryBuffer * mb,
163 RtfReader * rr,
164 int removeSemicolon )
165 {
166 char * text= (char *)0;
167 int size;
168
169 if ( docRtfStoreSavedText( &text, &size, rr, removeSemicolon ) )
170 { LDEB(1); return -1; }
171
172 if ( utilMemoryBufferSetBytes( mb, (const unsigned char *)text, size ) )
173 { LDEB(1); return -1; }
174
175 if ( text )
176 { free( text ); }
177
178 return 0;
179 }
180
181 /************************************************************************/
182 /* */
183 /* Append saved text to a memory buffer. */
184 /* */
185 /************************************************************************/
186
docRtfMemoryBufferAppendText(MemoryBuffer * mb,RtfReader * rr)187 int docRtfMemoryBufferAppendText( MemoryBuffer * mb,
188 RtfReader * rr )
189 {
190 const int removeSemicolon= 0;
191 char * text= (char *)0;
192 int size;
193
194 if ( docRtfStoreSavedText( &text, &size, rr, removeSemicolon ) )
195 { LDEB(1); return -1; }
196
197 if ( utilMemoryBufferAppendBytes( mb, (const unsigned char *)text, size ) )
198 { LDEB(1); return -1; }
199
200 if ( text )
201 { free( text ); }
202
203 return 0;
204 }
205
206 /************************************************************************/
207
docRtfReadAdaptToFontEncoding(RtfReader * rr,RtfReadingState * rrs)208 static int docRtfReadAdaptToFontEncoding(
209 RtfReader * rr,
210 RtfReadingState * rrs )
211 {
212 const char * encodingName= (const char *)0;
213
214 if ( rrs->rrsTextCharset >= 0 )
215 {
216 encodingName= docGetEncodingName( rr->rrDocument,
217 &(rrs->rrsTextAttribute), rrs->rrsTextCharset );
218 }
219
220 if ( ! encodingName )
221 { encodingName= rr->rrRtfTextConverter->tcNativeEncodingName; }
222
223 textConverterSetNativeEncodingName( rr->rrTextTextConverter, encodingName );
224
225 return 0;
226 }
227
228 /************************************************************************/
229 /* */
230 /* Insert particules from the input in the document. */
231 /* */
232 /************************************************************************/
233
docRtfTextParticule(RtfReader * rr,const char * text,int len)234 int docRtfTextParticule( RtfReader * rr,
235 const char * text,
236 int len )
237 {
238 RtfReadingState * rrs= rr->rrcState;
239 BufferDocument * bd= rr->rrDocument;
240 BufferItem * paraNode;
241
242 if ( rr->rrcInIgnoredGroup )
243 { return 0; }
244
245 paraNode= docRtfGetParaNode( rr );
246 if ( ! paraNode )
247 { XDEB(paraNode); return -1; }
248
249 if ( docParaStrlen( paraNode ) == 0 ||
250 rr->rrAfterParaHeadField )
251 {
252 if ( docRtfAdaptToParaProperties( paraNode, bd, rrs,
253 rr->rrParagraphBreakOverride ) )
254 { LDEB(1); return -1; }
255 }
256
257 if ( docRtfReadAdaptToFontEncoding( rr, rrs ) )
258 { LDEB(1); }
259
260 if ( rrs->rrsTextShadingChanged )
261 { docRtfRefreshTextShading( rr, rrs ); }
262
263 if ( docParaAppendText( bd, paraNode, &(rrs->rrsTextAttribute),
264 rr->rrTextTextConverter, text, len ) )
265 { LDEB(1); return -1; }
266
267 rr->rrcAfterNoteref= 0;
268 rr->rrAfterParaHeadField= 0;
269
270 return 0;
271 }
272
273 /************************************************************************/
274 /* */
275 /* Handle an explicit unicode. */
276 /* Special characters. */
277 /* */
278 /************************************************************************/
279
docRtfTextUnicodeValue(const RtfControlWord * rcw,int arg,RtfReader * rr)280 static int docRtfTextUnicodeValue( const RtfControlWord * rcw,
281 int arg,
282 RtfReader * rr )
283 {
284 RtfReadingState * rrs= rr->rrcState;
285
286 char bytes[7];
287 int count;
288
289 if ( arg < 0 )
290 { arg += 65536; }
291
292 /* Dirty HACK: Only use low byte of characters in the unicode private range */
293 if ( arg >= 0xE000 && arg <= 0xF8FF )
294 {
295 bytes[0]= arg & 0xff;
296 bytes[1]= '\0';
297
298 return docRtfSaveDocEncodedText( rr, (char *)bytes, 1 );
299 }
300
301 count= uniPutUtf8( bytes, arg );
302 if ( count < 1 )
303 { LDEB(count); return 0; }
304
305 if ( rr->rrcAddParticule == docRtfSaveRawBytes )
306 { XXDEB(rr->rrcAddParticule,docRtfSaveRawBytes); return 0; }
307
308 if ( rr->rrcAddParticule == docRtfTextParticule )
309 {
310 int stroffShift= 0;
311 int stroff;
312 BufferItem * paraNode= docRtfGetParaNode( rr );
313 BufferDocument * bd= rr->rrDocument;
314 int textAttributeNumber;
315
316 if ( ! paraNode )
317 { SXDEB(rcw->rcwWord,paraNode); return -1; }
318
319 if ( rrs->rrsTextShadingChanged )
320 { docRtfRefreshTextShading( rr, rrs ); }
321
322 textAttributeNumber= docTextAttributeNumber( bd,
323 &(rrs->rrsTextAttribute) );
324 if ( textAttributeNumber < 0 )
325 { LDEB(textAttributeNumber); return -1; }
326
327 stroff= docParaStrlen( paraNode );
328
329 if ( docParaStringReplace( &stroffShift, paraNode, stroff, stroff,
330 (char *)bytes, count ) )
331 { LDEB(docParaStrlen(paraNode)); return -1; }
332
333 if ( docParaDivideAppendedText( paraNode, textAttributeNumber,
334 stroff, stroff+ count ) )
335 { LLDEB(count,paraNode->biParaParticuleCount); return -1; }
336 }
337 else{
338 if ( utilMemoryBufferAppendBytes( &(rrs->rrsSavedText),
339 (unsigned char *)bytes, count ) )
340 { LDEB(count); return -1; }
341 }
342
343 return 0;
344 }
345
docRtfTextUnicode(const RtfControlWord * rcw,int arg,RtfReader * rr)346 int docRtfTextUnicode( const RtfControlWord * rcw,
347 int arg,
348 RtfReader * rr )
349 {
350 RtfReadingState * rrs= rr->rrcState;
351
352 if ( docRtfTextUnicodeValue( rcw, arg, rr ) )
353 { SXDEB(rcw->rcwWord,arg); return -1; }
354
355 rrs->rrsUnicodeBytesToSkip= rrs->rrsBytesPerUnicode;
356 return 0;
357 }
358
docRtfTextSpecialChar(const RtfControlWord * rcw,int arg,RtfReader * rr)359 int docRtfTextSpecialChar( const RtfControlWord * rcw,
360 int arg,
361 RtfReader * rr )
362 {
363 /* docRtfTextParticule() adjusts level */
364
365 if ( docRtfTextUnicodeValue( rcw, rcw->rcwID, rr ) )
366 { SXDEB(rcw->rcwWord,arg); return -1; }
367
368 return 0;
369 }
370
docRtfTextSpecialParticule(const RtfControlWord * rcw,int arg,RtfReader * rr)371 int docRtfTextSpecialParticule( const RtfControlWord * rcw,
372 int arg,
373 RtfReader * rr )
374 {
375 RtfReadingState * rrs= rr->rrcState;
376 BufferItem * paraNode;
377
378 if ( rr->rrcInIgnoredGroup > 0 )
379 { return 0; }
380
381 if ( rrs->rrsTextShadingChanged )
382 { docRtfRefreshTextShading( rr, rrs ); }
383
384 paraNode= docRtfGetParaNode( rr );
385 if ( ! paraNode )
386 { SXDEB(rcw->rcwWord,paraNode); return -1; }
387
388 switch( rcw->rcwID )
389 {
390 case DOCkindTAB:
391 case DOCkindLINEBREAK:
392 case DOCkindCHFTNSEP:
393 case DOCkindCHFTNSEPC:
394 case DOCkindOPT_HYPH:
395 case DOCkindLTR_MARK:
396 case DOCkindRTL_MARK:
397 if ( docSaveSpecialParticule( rr->rrDocument, paraNode,
398 &(rrs->rrsTextAttribute), rcw->rcwID ) )
399 { LDEB(1); return -1; }
400
401 rr->rrcAfterNoteref= 0;
402 rr->rrAfterParaHeadField= 0;
403
404 break;
405
406 case DOCkindPAGEBREAK:
407 case DOCkindCOLUMNBREAK:
408 {
409 int done= 0;
410
411 if ( rr->rrParagraphBreakOverride == -1 &&
412 ( docParaStrlen(paraNode) == 0 ||
413 rr->rrAfterParaHeadField ) )
414 {
415 if ( rcw->rcwID == DOCkindPAGEBREAK )
416 { rr->rrParagraphBreakOverride= DOCibkPAGE; done= 1; }
417
418 if ( rcw->rcwID == DOCkindCOLUMNBREAK )
419 { rr->rrParagraphBreakOverride= DOCibkCOL; done= 1; }
420 }
421
422 if ( ! done &&
423 docSaveSpecialParticule( rr->rrDocument, paraNode,
424 &(rrs->rrsTextAttribute), rcw->rcwID ) )
425 { LDEB(1); return -1; }
426
427 rr->rrcAfterNoteref= 0;
428 rr->rrAfterParaHeadField= 0;
429
430 break;
431 }
432
433 default:
434 SLDEB(rcw->rcwWord,rcw->rcwID);
435 break;
436 }
437
438 return 0;
439 }
440
docRtfTextBidiMark(const RtfControlWord * rcw,int arg,RtfReader * rr)441 int docRtfTextBidiMark( const RtfControlWord * rcw,
442 int arg,
443 RtfReader * rr )
444 {
445 /*SDEB(rcw->rcwWord);*/
446 return 0;
447 }
448