1 /************************************************************************/
2 /* */
3 /* Manage the actual string content while reading an RTF document. */
4 /* */
5 /************************************************************************/
6
7 # include "docRtfConfig.h"
8
9 # include <stdio.h>
10 # include <ctype.h>
11
12 # include <appDebugon.h>
13
14 # include <utilMatchFont.h>
15 # include <uniUtf8.h>
16 # include <textConverter.h>
17 # include <textConverterImpl.h>
18
19 # include "docRtfWriterImpl.h"
20 # include "docRtfFlags.h"
21 # include "docRtfTextConverter.h"
22
23 /************************************************************************/
24 /* */
25 /* Emit a string to the RTF output stream. */
26 /* */
27 /************************************************************************/
28
docRtfEscapeString(void * vrw,int produced,const char * ss,int n)29 static int docRtfEscapeString( void * vrw,
30 int produced, /* ignored */
31 const char * ss,
32 int n )
33 {
34 RtfWriter * rw= (RtfWriter *)vrw;
35 const unsigned char * us= (const unsigned char *)ss;
36 SimpleOutputStream * sos= rw->rwSosOut;
37 int i;
38 int addSpace= 0;
39
40 if ( n == 0 )
41 { return n; }
42
43 switch( rw->rwcAfter )
44 {
45 case RTFafterTAG:
46 if ( ss[0] == ' ' ||
47 ss[0] == '-' ||
48 ss[0] == '_' ||
49 isalnum( ss[0] ) )
50 { addSpace= 1; }
51 break;
52
53 case RTFafterARG:
54 if ( ss[0] == ' ' || isdigit( ss[0] ) )
55 { addSpace= 1; }
56 break;
57
58 case RTFafterTEXT:
59 break;
60
61 default:
62 CDEB(rw->rwcAfter); return -1;
63 }
64
65
66 if ( addSpace )
67 {
68 if ( sioOutPutByte( ' ', rw->rwSosOut ) < 0 )
69 { LDEB(1); return -1; }
70
71 rw->rwCol += 1;
72 }
73 rw->rwcAfter= RTFafterTEXT;
74
75 i= 0;
76 while( i < n )
77 {
78 int c= *us;
79
80 switch( c )
81 {
82 case '{': case '\\': case '}':
83 if ( sioOutPutByte( '\\', sos ) < 0 )
84 { LDEB(1); return -1; }
85 if ( sioOutPutByte( c, sos ) < 0 )
86 { LDEB(1); return -1; }
87 rw->rwCol += 2;
88 break;
89 default:
90 if ( c < 32 || c > 127 )
91 {
92 static const char xdigs[]= "0123456789abcdef";
93
94 if ( sioOutPutByte( '\\', sos ) < 0 )
95 { LDEB(1); return -1; }
96 if ( sioOutPutByte( '\'', sos ) < 0 )
97 { LDEB(1); return -1; }
98 if ( sioOutPutByte( xdigs[ ( c >> 4 ) & 0x0f ], sos ) < 0 )
99 { LDEB(1); return -1; }
100 if ( sioOutPutByte( xdigs[ ( c >> 0 ) & 0x0f ], sos ) < 0 )
101 { LDEB(1); return -1; }
102 rw->rwCol += 4;
103 }
104 else{
105 if ( sioOutPutByte( c, sos ) < 0 )
106 { LDEB(1); return -1; }
107 rw->rwCol += 1;
108 }
109 break;
110 }
111
112 i++; us++;
113 }
114
115 return n;
116 }
117
118 /************************************************************************/
119 /* */
120 /* Emit a unicode (UTF-16) character as a tag. */
121 /* */
122 /************************************************************************/
123
docRtfEmitUnicode(RtfWriter * rw,int symbol)124 static int docRtfEmitUnicode( RtfWriter * rw,
125 int symbol )
126 {
127 if ( symbol > 32767 && symbol < 65536 )
128 { symbol -= 65536; }
129
130 docRtfWriteArgTag( rw, "u", symbol );
131 docRtfEscapeString( (void *)rw, 0, "?", 1 );
132
133 return 0;
134 }
135
136 /************************************************************************/
137 /* */
138 /* Emit an utf-8 string in some legacy encoding. */
139 /* The string might very well contain unicodes that cannot be */
140 /* represented in the legacy encoding. The encoder stops on those */
141 /* bytes and we emit them in \u1234 format. */
142 /* */
143 /************************************************************************/
144
docRtfWriteEncodedString(RtfWriter * rw,TextConverter * tc,int fontEncoded,const char * ss,int n)145 static int docRtfWriteEncodedString( RtfWriter * rw,
146 TextConverter * tc,
147 int fontEncoded,
148 const char * ss,
149 int n )
150 {
151 int produced= 0;
152
153 while( n > 0 )
154 {
155 int consumed= 0;
156
157 produced= textConverterConvertFromUtf8( tc, (void *)rw,
158 &consumed, produced, ss, n );
159 if ( produced < 0 )
160 { LDEB(produced); return -1; }
161
162 ss += consumed; n -= consumed;
163
164 if ( n > 0 )
165 {
166 unsigned short symbol;
167
168 consumed= uniGetUtf8( &symbol, ss );
169 if ( consumed < 1 )
170 { LDEB(consumed); return -1; }
171
172 if ( fontEncoded )
173 {
174 BufferDocument * bd= rw->rwDocument;
175 const DocumentFont * df;
176 TextAttribute * ta= &(rw->rwTextAttribute);
177
178 df= docRtfGetCurrentFont( bd, ta );
179 if ( df )
180 {
181 int fontNumber;
182 int charset= FONTcharsetDEFAULT;
183 const char * encodingName;
184
185 fontNumber= docRtfWriteGetCharset( rw, &charset,
186 df, symbol );
187 if ( fontNumber >= 0 && rw->rwTextCharset != charset )
188 {
189 docRtfWriteArgTag( rw, "f", fontNumber );
190
191 encodingName= utilGetEncodingName( df->dfName,
192 charset );
193 textConverterSetNativeEncodingName(
194 rw->rwTextTextConverter,
195 encodingName );
196 rw->rwTextCharset= charset;
197 continue;
198 }
199 }
200 }
201
202 docRtfEmitUnicode( rw, symbol );
203
204 ss += consumed; n -= consumed;
205 }
206 }
207
208 return 0;
209 }
210
docRtfWriteDocEncodedString(RtfWriter * rw,const char * ss,int n)211 void docRtfWriteDocEncodedString( RtfWriter * rw,
212 const char * ss,
213 int n )
214 {
215 const int fontEncoded= 0;
216
217 docRtfWriteEncodedString( rw, rw->rwRtfTextConverter,
218 fontEncoded, ss, n );
219 }
220
docRtfWriteFontEncodedString(RtfWriter * rw,const char * ss,int n)221 void docRtfWriteFontEncodedString( RtfWriter * rw,
222 const char * ss,
223 int n )
224 {
225 const int fontEncoded= 1;
226 const char * encodingName= (const char *)0;
227
228 if ( rw->rwSaveFlags & RTFflagUNENCODED )
229 { docRtfWriteDocEncodedString( rw, ss, n ); return; }
230
231 encodingName= docGetEncodingName( rw->rwDocument,
232 &(rw->rwTextAttribute), rw->rwTextCharset );
233 if ( ! encodingName )
234 {
235 encodingName= rw->rwRtfTextConverter->tcNativeEncodingName;
236 }
237
238 textConverterSetNativeEncodingName(
239 rw->rwTextTextConverter, encodingName );
240
241 docRtfWriteEncodedString( rw, rw->rwTextTextConverter,
242 fontEncoded, ss, n );
243 }
244
docRtfWriteRawBytes(RtfWriter * rw,const char * ss,int n)245 void docRtfWriteRawBytes( RtfWriter * rw,
246 const char * ss,
247 int n )
248 {
249 docRtfEscapeString( (void *)rw, 0, ss, n );
250 }
251
252
253 /************************************************************************/
254
docRtfWriteSetupTextConverters(RtfWriter * rw)255 void docRtfWriteSetupTextConverters( RtfWriter * rw )
256 {
257 textConverterSetNativeEncodingName(
258 rw->rwRtfTextConverter,
259 DOC_RTF_AnsiCharsetName );
260
261 textConverterSetProduce( rw->rwRtfTextConverter, docRtfEscapeString );
262 textConverterSetProduce( rw->rwTextTextConverter, docRtfEscapeString );
263 }
264
265