1 /************************************************************************/
2 /* */
3 /* Read a plain text file into a BufferDocument */
4 /* */
5 /************************************************************************/
6
7 # include "docRtfConfig.h"
8
9 # include <stdio.h>
10 # include <ctype.h>
11
12 # include <appDebugon.h>
13
14 # include <sioMemory.h>
15 # include <docBuf.h>
16 # include <docNodeTree.h>
17 # include <docParaString.h>
18 # include <docParaParticules.h>
19 # include <textConverter.h>
20 # include <textConverterImpl.h>
21 # include <sioUtf8.h>
22 # include "docPlainReadWrite.h"
23
24 /************************************************************************/
25 /* */
26 /* Read a document as a whole. */
27 /* */
28 /************************************************************************/
29
30 typedef struct PlainReadingContext
31 {
32 TextAttribute prcTextAttribute;
33 BufferItem * prcParaNode;
34 int prcHasOpenEnd;
35 int prcLongestParagraph;
36 MemoryBuffer prcCollected;
37
38 TextConverter prcTextConverter;
39 } PlainReadingContext;
40
docPlainInitReadingContext(PlainReadingContext * prc)41 static void docPlainInitReadingContext( PlainReadingContext * prc )
42 {
43 utilInitTextAttribute( &(prc->prcTextAttribute) );
44
45 prc->prcParaNode= (BufferItem *)0;
46 prc->prcHasOpenEnd= 0;
47 prc->prcLongestParagraph= 0;
48
49 utilInitMemoryBuffer( &(prc->prcCollected) );
50 textInitTextConverter( &(prc->prcTextConverter) );
51 docParaSetupTextConverter( &(prc->prcTextConverter) );
52 }
53
docPlainCleanReadingContext(PlainReadingContext * prc)54 static void docPlainCleanReadingContext( PlainReadingContext * prc )
55 {
56 /* utilCleanTextAttribute( &(prc->prcTextAttribute) ); */
57
58 utilCleanMemoryBuffer( &(prc->prcCollected) );
59 textCleanTextConverter( &(prc->prcTextConverter) );
60 }
61
docPlainReadParaContents(BufferDocument * bd,BufferItem * paraNode,int c,SimpleInputStream * sis,PlainReadingContext * prc)62 static int docPlainReadParaContents( BufferDocument * bd,
63 BufferItem * paraNode,
64 int c,
65 SimpleInputStream * sis,
66 PlainReadingContext * prc )
67 {
68 int rval= 0;
69 SimpleOutputStream * sos= (SimpleOutputStream *)0;
70
71 utilEmptyMemoryBuffer( &(prc->prcCollected) );
72
73 for (;;)
74 {
75 int done;
76
77 if ( c == '\f' )
78 {
79 /* Ignored inside the paragraph */
80 if ( docParaStrlen( paraNode ) == 0 )
81 { paraNode->biParaBreakKind= DOCibkPAGE; }
82
83 c= sioInGetUtf8( sis );
84 if ( c == EOF )
85 { break; }
86
87 continue;
88 }
89
90 if ( c == '\t' )
91 {
92 if ( docSaveSpecialParticule( bd, paraNode,
93 &(prc->prcTextAttribute), DOCkindTAB ) )
94 { LDEB(docParaStrlen(paraNode)); rval= -1; goto ready; }
95
96 c= sioInGetUtf8( sis );
97 if ( c == EOF )
98 { break; }
99
100 continue;
101 }
102
103 utilEmptyMemoryBuffer( &(prc->prcCollected) );
104 sos= sioOutMemoryOpen( &(prc->prcCollected) );
105 if ( ! sos )
106 { XDEB(sos); rval= -1; goto ready; }
107 done= 0;
108
109 for (;;)
110 {
111 if ( c != '\r' )
112 {
113 done++;
114 if ( sioOutPutUtf8( c, sos ) < 0 )
115 { LCDEB(c,c); rval= -1; goto ready; }
116 }
117
118 c= sioInGetUtf8( sis );
119 if ( c == EOF )
120 { prc->prcHasOpenEnd= 1; break; }
121
122 if ( c == '\n' || c == '\t' )
123 { break; }
124 }
125
126 sioOutClose( sos ); sos= (SimpleOutputStream *)0;
127
128 if ( done > 0 )
129 {
130 const unsigned char * bytes;
131 int size;
132
133 bytes= utilMemoryBufferGetBytes( &size, &(prc->prcCollected) );
134
135 if ( docParaAppendText( bd, paraNode, &(prc->prcTextAttribute),
136 &(prc->prcTextConverter), (char *)bytes, size ) )
137 { LDEB(size); rval= -1; goto ready; }
138 }
139
140 if ( c == EOF )
141 { prc->prcHasOpenEnd= 1; break; }
142 if ( c == '\n' )
143 { prc->prcHasOpenEnd= 0; break; }
144 }
145
146 /* HACK: fix first paragraph of document! */
147 if ( paraNode->biParaParticuleCount > 1 &&
148 paraNode->biParaParticules[0].tpKind == DOCkindSPAN &&
149 paraNode->biParaParticules[0].tpStrlen == 0 )
150 { docDeleteParticules( paraNode, 0, 1 ); }
151
152 ready:
153
154 if ( sos )
155 { sioOutClose( sos ); }
156
157 return rval;
158 }
159
160 /************************************************************************/
161 /* */
162 /* Read a paragraph of text. */
163 /* */
164 /* 1) Read the first byte. */
165 /* */
166 /************************************************************************/
167
docPlainReadParagraph(BufferDocument * bd,SimpleInputStream * sis,PlainReadingContext * prc)168 static int docPlainReadParagraph( BufferDocument * bd,
169 SimpleInputStream * sis,
170 PlainReadingContext * prc )
171 {
172 int c;
173 BufferItem * paraNode;
174
175 int textAttrNr;
176
177 /* 1 */
178 c= sioInGetUtf8( sis );
179 if ( c == EOF )
180 { return 1; }
181
182 if ( ! prc->prcParaNode )
183 {
184 DocumentPosition dp;
185
186 if ( docDocumentHead( &dp, bd ) )
187 { LDEB(1); }
188
189 paraNode= dp.dpNode;
190 prc->prcParaNode= paraNode;
191 }
192 else{
193 paraNode= docInsertNode( bd, prc->prcParaNode->biParent,
194 -1, DOClevPARA );
195 if ( ! paraNode )
196 { XDEB(paraNode); return -1; }
197
198 prc->prcParaNode= paraNode;
199 }
200
201 if ( docPlainReadParaContents( bd, paraNode, c, sis, prc ) )
202 { LDEB(1); return -1; }
203
204 if ( paraNode->biParaParticuleCount == 0 )
205 {
206 textAttrNr= docTextAttributeNumber( bd, &(prc->prcTextAttribute) );
207
208 if ( ! docInsertTextParticule( paraNode, 0, 0, 0,
209 DOCkindSPAN, textAttrNr ) )
210 { LDEB( paraNode->biParaParticuleCount); return -1; }
211 }
212
213 if ( prc->prcLongestParagraph < docParaStrlen( paraNode ) )
214 { prc->prcLongestParagraph= docParaStrlen( paraNode ); }
215
216 return 0;
217 }
218
219 /************************************************************************/
220 /* */
221 /* Read a plain text file. */
222 /* */
223 /* 1) Assume 12 cpi and courier. ( 12 cpi .. 6pt wide .. 10 pt high. )*/
224 /* But make it one less: 9 pt to match the rfc1234.txt make up. */
225 /* Now if we want 80 characters on a line we need 80*6=480 pt. on */
226 /* a line. A4 is 595 points wide, so we have 115 points left for */
227 /* the margins. Round to 3/4 inch= 48 pt= 960 twips. */
228 /* 2) Tab every 8 characters. 12 cpi .. 8/12 inch= 960 twips. */
229 /* */
230 /************************************************************************/
231
docPlainReadFile(SimpleInputStream * sis,int * pMxL,const DocumentGeometry * dgPaper)232 BufferDocument * docPlainReadFile(
233 SimpleInputStream * sis,
234 int * pMxL,
235 const DocumentGeometry * dgPaper )
236 {
237 BufferDocument * rval= (BufferDocument *)0;
238 BufferDocument * bd;
239 DocumentGeometry dgDoc= *dgPaper;
240 /*
241 const char * lc_ctype= nl_langinfo( CODESET );
242 */
243 const char * lc_ctype= "UTF-8";
244 const char * font= "Courier";
245
246 PlainReadingContext prc;
247
248 docPlainInitReadingContext( &prc );
249
250 dgDoc.dgLeftMarginTwips= 960;
251 dgDoc.dgRightMarginTwips= 960;
252
253 if ( lc_ctype )
254 {
255 textConverterSetNativeEncodingName(
256 &(prc.prcTextConverter), lc_ctype );
257 }
258
259 bd= docNewFile( &(prc.prcTextAttribute),
260 font, 2* 9,
261 (PostScriptFontList *)0, &dgDoc );
262 if ( ! bd )
263 { XDEB(bd); return bd; }
264
265 bd->bdProperties.dpTabIntervalTwips= 960;
266
267 for (;;)
268 {
269 int res;
270
271 res= docPlainReadParagraph( bd, sis, &prc );
272
273 if ( res > 0 )
274 { break; }
275 if ( res < 0 )
276 { LDEB(res); goto ready; }
277 }
278
279 *pMxL= prc.prcLongestParagraph;
280 bd->bdProperties.dpHasOpenEnd= prc.prcHasOpenEnd;
281 rval= bd; bd= (BufferDocument *)0; /* steal */
282
283 ready:
284
285 docPlainCleanReadingContext( &prc );
286 if ( bd )
287 { docFreeDocument( bd ); }
288
289 return rval;
290 }
291