1 /************************************************************************/
2 /*									*/
3 /*  Read a plain text file into a BufferDocument			*/
4 /*									*/
5 /************************************************************************/
6 
7 #   include	"docRtfConfig.h"
8 
9 #   include	<stdio.h>
10 #   include	<ctype.h>
11 
12 #   include	<appDebugon.h>
13 
14 #   include	<sioMemory.h>
15 #   include	<docBuf.h>
16 #   include	<docNodeTree.h>
17 #   include	<docParaString.h>
18 #   include	<docParaParticules.h>
19 #   include	<textConverter.h>
20 #   include	<textConverterImpl.h>
21 #   include	<sioUtf8.h>
22 #   include	"docPlainReadWrite.h"
23 
24 /************************************************************************/
25 /*									*/
26 /*  Read a document as a whole.						*/
27 /*									*/
28 /************************************************************************/
29 
30 typedef struct PlainReadingContext
31     {
32     TextAttribute	prcTextAttribute;
33     BufferItem *	prcParaNode;
34     int			prcHasOpenEnd;
35     int			prcLongestParagraph;
36     MemoryBuffer	prcCollected;
37 
38     TextConverter	prcTextConverter;
39     } PlainReadingContext;
40 
docPlainInitReadingContext(PlainReadingContext * prc)41 static void docPlainInitReadingContext( PlainReadingContext *	prc )
42     {
43     utilInitTextAttribute( &(prc->prcTextAttribute) );
44 
45     prc->prcParaNode= (BufferItem *)0;
46     prc->prcHasOpenEnd= 0;
47     prc->prcLongestParagraph= 0;
48 
49     utilInitMemoryBuffer( &(prc->prcCollected) );
50     textInitTextConverter( &(prc->prcTextConverter) );
51     docParaSetupTextConverter( &(prc->prcTextConverter) );
52     }
53 
docPlainCleanReadingContext(PlainReadingContext * prc)54 static void docPlainCleanReadingContext( PlainReadingContext *	prc )
55     {
56     /* utilCleanTextAttribute( &(prc->prcTextAttribute) ); */
57 
58     utilCleanMemoryBuffer( &(prc->prcCollected) );
59     textCleanTextConverter( &(prc->prcTextConverter) );
60     }
61 
docPlainReadParaContents(BufferDocument * bd,BufferItem * paraNode,int c,SimpleInputStream * sis,PlainReadingContext * prc)62 static int docPlainReadParaContents(	BufferDocument *	bd,
63 					BufferItem *		paraNode,
64 					int			c,
65 					SimpleInputStream *	sis,
66 					PlainReadingContext *	prc )
67     {
68     int				rval= 0;
69     SimpleOutputStream *	sos= (SimpleOutputStream *)0;
70 
71     utilEmptyMemoryBuffer( &(prc->prcCollected) );
72 
73     for (;;)
74 	{
75 	int		done;
76 
77 	if  ( c == '\f' )
78 	    {
79 	    /*  Ignored inside the paragraph */
80 	    if  ( docParaStrlen( paraNode ) == 0 )
81 		{ paraNode->biParaBreakKind= DOCibkPAGE;	}
82 
83 	    c= sioInGetUtf8( sis );
84 	    if  ( c == EOF )
85 		{ break;	}
86 
87 	    continue;
88 	    }
89 
90 	if  ( c == '\t' )
91 	    {
92 	    if  ( docSaveSpecialParticule( bd, paraNode,
93 				    &(prc->prcTextAttribute), DOCkindTAB ) )
94 		{ LDEB(docParaStrlen(paraNode)); rval= -1; goto ready;	}
95 
96 	    c= sioInGetUtf8( sis );
97 	    if  ( c == EOF )
98 		{ break;	}
99 
100 	    continue;
101 	    }
102 
103 	utilEmptyMemoryBuffer( &(prc->prcCollected) );
104 	sos= sioOutMemoryOpen( &(prc->prcCollected) );
105 	if  ( ! sos )
106 	    { XDEB(sos); rval= -1; goto ready;	}
107 	done= 0;
108 
109 	for (;;)
110 	    {
111 	    if  ( c != '\r' )
112 		{
113 		done++;
114 		if  ( sioOutPutUtf8( c, sos ) < 0 )
115 		    { LCDEB(c,c); rval= -1; goto ready;	}
116 		}
117 
118 	    c= sioInGetUtf8( sis );
119 	    if  ( c == EOF )
120 		{ prc->prcHasOpenEnd= 1; break;	}
121 
122 	    if  ( c == '\n' || c == '\t' )
123 		{ break;	}
124 	    }
125 
126 	sioOutClose( sos ); sos= (SimpleOutputStream *)0;
127 
128 	if  ( done > 0 )
129 	    {
130 	    const unsigned char *	bytes;
131 	    int				size;
132 
133 	    bytes= utilMemoryBufferGetBytes( &size, &(prc->prcCollected) );
134 
135 	    if  ( docParaAppendText( bd, paraNode, &(prc->prcTextAttribute),
136 			    &(prc->prcTextConverter), (char *)bytes, size ) )
137 		{ LDEB(size); rval= -1; goto ready;	}
138 	    }
139 
140 	if  ( c == EOF )
141 	    { prc->prcHasOpenEnd= 1; break;	}
142 	if  ( c == '\n' )
143 	    { prc->prcHasOpenEnd= 0; break;	}
144 	}
145 
146     /*  HACK: fix first paragraph of document!	*/
147     if  ( paraNode->biParaParticuleCount > 1			&&
148 	  paraNode->biParaParticules[0].tpKind == DOCkindSPAN	&&
149 	  paraNode->biParaParticules[0].tpStrlen == 0		)
150 	{ docDeleteParticules( paraNode, 0, 1 );	}
151 
152   ready:
153 
154     if  ( sos )
155 	{ sioOutClose( sos );	}
156 
157     return rval;
158     }
159 
160 /************************************************************************/
161 /*									*/
162 /*  Read a paragraph of text.						*/
163 /*									*/
164 /*  1)  Read the first byte.						*/
165 /*									*/
166 /************************************************************************/
167 
docPlainReadParagraph(BufferDocument * bd,SimpleInputStream * sis,PlainReadingContext * prc)168 static int docPlainReadParagraph(	BufferDocument *	bd,
169 					SimpleInputStream *	sis,
170 					PlainReadingContext *	prc )
171     {
172     int			c;
173     BufferItem *	paraNode;
174 
175     int			textAttrNr;
176 
177     /*  1  */
178     c= sioInGetUtf8( sis );
179     if  ( c == EOF )
180 	{ return 1;	}
181 
182     if  ( ! prc->prcParaNode )
183 	{
184 	DocumentPosition	dp;
185 
186 	if  ( docDocumentHead( &dp, bd ) )
187 	    { LDEB(1);	}
188 
189 	paraNode= dp.dpNode;
190 	prc->prcParaNode= paraNode;
191 	}
192     else{
193 	paraNode= docInsertNode( bd, prc->prcParaNode->biParent,
194 							    -1, DOClevPARA );
195 	if  ( ! paraNode )
196 	    { XDEB(paraNode); return -1;	}
197 
198 	prc->prcParaNode= paraNode;
199 	}
200 
201     if  ( docPlainReadParaContents( bd, paraNode, c, sis, prc ) )
202 	{ LDEB(1); return -1;	}
203 
204     if  ( paraNode->biParaParticuleCount == 0 )
205 	{
206 	textAttrNr= docTextAttributeNumber( bd, &(prc->prcTextAttribute) );
207 
208 	if  ( ! docInsertTextParticule( paraNode, 0, 0, 0,
209 						    DOCkindSPAN, textAttrNr ) )
210 	    { LDEB( paraNode->biParaParticuleCount); return -1;	}
211 	}
212 
213     if  ( prc->prcLongestParagraph < docParaStrlen( paraNode ) )
214 	{ prc->prcLongestParagraph=  docParaStrlen( paraNode );	}
215 
216     return 0;
217     }
218 
219 /************************************************************************/
220 /*									*/
221 /*  Read a plain text file.						*/
222 /*									*/
223 /*  1)  Assume 12 cpi and courier. ( 12 cpi .. 6pt wide .. 10 pt high. )*/
224 /*	But make it one less: 9 pt to match the rfc1234.txt make up.	*/
225 /*	Now if we want 80 characters on a line we need 80*6=480 pt. on	*/
226 /*	a line. A4 is 595 points wide, so we have 115 points left for	*/
227 /*	the margins. Round to 3/4 inch= 48 pt= 960 twips.		*/
228 /*  2)  Tab every 8 characters. 12 cpi .. 8/12 inch= 960 twips.		*/
229 /*									*/
230 /************************************************************************/
231 
docPlainReadFile(SimpleInputStream * sis,int * pMxL,const DocumentGeometry * dgPaper)232 BufferDocument * docPlainReadFile(
233 			SimpleInputStream *		sis,
234 			int *				pMxL,
235 			const DocumentGeometry *	dgPaper )
236     {
237     BufferDocument *	rval= (BufferDocument *)0;
238     BufferDocument *	bd;
239     DocumentGeometry	dgDoc= *dgPaper;
240     /*
241     const char *	lc_ctype= nl_langinfo( CODESET );
242     */
243     const char *	lc_ctype= "UTF-8";
244     const char *	font= "Courier";
245 
246     PlainReadingContext	prc;
247 
248     docPlainInitReadingContext( &prc );
249 
250     dgDoc.dgLeftMarginTwips= 960;
251     dgDoc.dgRightMarginTwips= 960;
252 
253     if  ( lc_ctype )
254 	{
255 	textConverterSetNativeEncodingName(
256 					&(prc.prcTextConverter), lc_ctype );
257 	}
258 
259     bd= docNewFile( &(prc.prcTextAttribute),
260 				    font, 2* 9,
261 				    (PostScriptFontList *)0, &dgDoc );
262     if  ( ! bd )
263 	{ XDEB(bd); return bd;	}
264 
265     bd->bdProperties.dpTabIntervalTwips= 960;
266 
267     for (;;)
268 	{
269 	int	res;
270 
271 	res= docPlainReadParagraph( bd, sis, &prc );
272 
273 	if  ( res > 0 )
274 	    { break;	}
275 	if  ( res < 0 )
276 	    { LDEB(res); goto ready; }
277 	}
278 
279     *pMxL= prc.prcLongestParagraph;
280     bd->bdProperties.dpHasOpenEnd= prc.prcHasOpenEnd;
281     rval= bd; bd= (BufferDocument *)0; /* steal */
282 
283   ready:
284 
285     docPlainCleanReadingContext( &prc );
286     if  ( bd )
287 	{ docFreeDocument( bd );	}
288 
289     return rval;
290     }
291