1 /************************************************************************/
2 /*									*/
3 /*  Manage the actual string content while reading an RTF document.	*/
4 /*									*/
5 /************************************************************************/
6 
7 #   include	"docRtfConfig.h"
8 
9 #   include	<stdio.h>
10 #   include	<ctype.h>
11 
12 #   include	<appDebugon.h>
13 
14 #   include	<utilMatchFont.h>
15 #   include	<uniUtf8.h>
16 #   include	<textConverter.h>
17 #   include	<textConverterImpl.h>
18 
19 #   include	"docRtfWriterImpl.h"
20 #   include	"docRtfFlags.h"
21 #   include	"docRtfTextConverter.h"
22 
23 /************************************************************************/
24 /*									*/
25 /*  Emit a string to the RTF output stream.				*/
26 /*									*/
27 /************************************************************************/
28 
docRtfEscapeString(void * vrw,int produced,const char * ss,int n)29 static int docRtfEscapeString(	void *			vrw,
30 				int			produced, /* ignored */
31 				const char *		ss,
32 				int			n )
33     {
34     RtfWriter *			rw= (RtfWriter *)vrw;
35     const unsigned char *	us= (const unsigned char *)ss;
36     SimpleOutputStream *	sos= rw->rwSosOut;
37     int				i;
38     int				addSpace= 0;
39 
40     if  ( n == 0 )
41 	{ return n;	}
42 
43     switch( rw->rwcAfter )
44 	{
45 	case RTFafterTAG:
46 	    if  ( ss[0] == ' '		||
47 		  ss[0] == '-'		||
48 		  ss[0] == '_'		||
49 		  isalnum( ss[0] )	)
50 		{ addSpace= 1;	}
51 	    break;
52 
53 	case RTFafterARG:
54 	    if  ( ss[0] == ' ' || isdigit( ss[0] ) )
55 		{ addSpace= 1;	}
56 	    break;
57 
58 	case RTFafterTEXT:
59 	    break;
60 
61 	default:
62 	    CDEB(rw->rwcAfter); return -1;
63 	}
64 
65 
66     if  ( addSpace )
67 	{
68 	if  ( sioOutPutByte( ' ', rw->rwSosOut ) < 0 )
69 	    { LDEB(1); return -1;	}
70 
71 	rw->rwCol += 1;
72 	}
73     rw->rwcAfter= RTFafterTEXT;
74 
75     i= 0;
76     while( i < n )
77 	{
78 	int		c= *us;
79 
80 	switch( c )
81 	    {
82 	    case '{': case '\\': case '}':
83 		if  ( sioOutPutByte( '\\', sos ) < 0 )
84 		    { LDEB(1); return -1;	}
85 		if  ( sioOutPutByte( c, sos ) < 0 )
86 		    { LDEB(1); return -1;	}
87 		rw->rwCol += 2;
88 		break;
89 	    default:
90 		if  ( c < 32 || c > 127 )
91 		    {
92 		    static const char	xdigs[]= "0123456789abcdef";
93 
94 		    if  ( sioOutPutByte( '\\', sos ) < 0 )
95 			{ LDEB(1); return -1;	}
96 		    if  ( sioOutPutByte( '\'', sos ) < 0 )
97 			{ LDEB(1); return -1;	}
98 		    if  ( sioOutPutByte( xdigs[ ( c >> 4 ) & 0x0f ], sos ) < 0 )
99 			{ LDEB(1); return -1;	}
100 		    if  ( sioOutPutByte( xdigs[ ( c >> 0 ) & 0x0f ], sos ) < 0 )
101 			{ LDEB(1); return -1;	}
102 		    rw->rwCol += 4;
103 		    }
104 		else{
105 		    if  ( sioOutPutByte( c, sos ) < 0 )
106 			{ LDEB(1); return -1;	}
107 		    rw->rwCol += 1;
108 		    }
109 		break;
110 	    }
111 
112 	i++; us++;
113 	}
114 
115     return n;
116     }
117 
118 /************************************************************************/
119 /*									*/
120 /*  Emit a unicode (UTF-16) character as a tag.				*/
121 /*									*/
122 /************************************************************************/
123 
docRtfEmitUnicode(RtfWriter * rw,int symbol)124 static int docRtfEmitUnicode(		RtfWriter *		rw,
125 					int			symbol )
126     {
127     if  ( symbol > 32767 && symbol < 65536 )
128 	{ symbol -= 65536;	}
129 
130     docRtfWriteArgTag( rw, "u", symbol );
131     docRtfEscapeString( (void *)rw, 0, "?", 1 );
132 
133     return 0;
134     }
135 
136 /************************************************************************/
137 /*									*/
138 /*  Emit an utf-8 string in some legacy encoding.			*/
139 /*  The string might very well contain unicodes that cannot be		*/
140 /*  represented in the legacy encoding. The encoder stops on those	*/
141 /*  bytes and we emit them in \u1234 format.				*/
142 /*									*/
143 /************************************************************************/
144 
docRtfWriteEncodedString(RtfWriter * rw,TextConverter * tc,int fontEncoded,const char * ss,int n)145 static int docRtfWriteEncodedString(	RtfWriter *		rw,
146 					TextConverter *		tc,
147 					int			fontEncoded,
148 					const char *		ss,
149 					int			n )
150     {
151     int		produced= 0;
152 
153     while( n > 0 )
154 	{
155 	int		consumed= 0;
156 
157 	produced= textConverterConvertFromUtf8( tc, (void *)rw,
158 						&consumed, produced, ss, n );
159 	if  ( produced < 0 )
160 	    { LDEB(produced); return -1;	}
161 
162 	ss += consumed; n -= consumed;
163 
164 	if  ( n > 0 )
165 	    {
166 	    unsigned short	symbol;
167 
168 	    consumed= uniGetUtf8( &symbol, ss );
169 	    if  ( consumed < 1 )
170 		{ LDEB(consumed); return -1;	}
171 
172 	    if  ( fontEncoded )
173 		{
174 		BufferDocument *	bd= rw->rwDocument;
175 		const DocumentFont *	df;
176 		TextAttribute *		ta= &(rw->rwTextAttribute);
177 
178 		df= docRtfGetCurrentFont( bd, ta );
179 		if  ( df )
180 		    {
181 		    int			fontNumber;
182 		    int			charset= FONTcharsetDEFAULT;
183 		    const char *	encodingName;
184 
185 		    fontNumber= docRtfWriteGetCharset( rw, &charset,
186 								df, symbol );
187 		    if  ( fontNumber >= 0 && rw->rwTextCharset != charset )
188 			{
189 			docRtfWriteArgTag( rw, "f", fontNumber );
190 
191 			encodingName= utilGetEncodingName( df->dfName,
192 								    charset );
193 			textConverterSetNativeEncodingName(
194 				    rw->rwTextTextConverter,
195 				    encodingName );
196 			rw->rwTextCharset= charset;
197 			continue;
198 			}
199 		    }
200 		}
201 
202 	    docRtfEmitUnicode( rw, symbol );
203 
204 	    ss += consumed; n -= consumed;
205 	    }
206 	}
207 
208     return 0;
209     }
210 
docRtfWriteDocEncodedString(RtfWriter * rw,const char * ss,int n)211 void docRtfWriteDocEncodedString(	RtfWriter *		rw,
212 					const char *		ss,
213 					int			n )
214     {
215     const int	fontEncoded= 0;
216 
217     docRtfWriteEncodedString( rw, rw->rwRtfTextConverter,
218 							fontEncoded, ss, n );
219     }
220 
docRtfWriteFontEncodedString(RtfWriter * rw,const char * ss,int n)221 void docRtfWriteFontEncodedString(	RtfWriter *		rw,
222 					const char *		ss,
223 					int			n )
224     {
225     const int			fontEncoded= 1;
226     const char *		encodingName= (const char *)0;
227 
228     if  ( rw->rwSaveFlags & RTFflagUNENCODED )
229 	{ docRtfWriteDocEncodedString( rw, ss, n ); return;	}
230 
231     encodingName= docGetEncodingName( rw->rwDocument,
232 				&(rw->rwTextAttribute), rw->rwTextCharset );
233     if  ( ! encodingName )
234 	{
235 	encodingName= rw->rwRtfTextConverter->tcNativeEncodingName;
236 	}
237 
238     textConverterSetNativeEncodingName(
239 			rw->rwTextTextConverter, encodingName );
240 
241     docRtfWriteEncodedString( rw, rw->rwTextTextConverter,
242 							fontEncoded, ss, n );
243     }
244 
docRtfWriteRawBytes(RtfWriter * rw,const char * ss,int n)245 void docRtfWriteRawBytes(	RtfWriter *		rw,
246 				const char *		ss,
247 				int			n )
248     {
249     docRtfEscapeString( (void *)rw, 0, ss, n );
250     }
251 
252 
253 /************************************************************************/
254 
docRtfWriteSetupTextConverters(RtfWriter * rw)255 void docRtfWriteSetupTextConverters(	RtfWriter *	rw )
256     {
257     textConverterSetNativeEncodingName(
258 			    rw->rwRtfTextConverter,
259 			    DOC_RTF_AnsiCharsetName );
260 
261     textConverterSetProduce( rw->rwRtfTextConverter, docRtfEscapeString );
262     textConverterSetProduce( rw->rwTextTextConverter, docRtfEscapeString );
263     }
264 
265