1 /*   (C) Copyright 2000, 2001, 2002, 2003, 2004, 2005 Stijn van Dongen
2  *   (C) Copyright 2006, 2007, 2008, 2009  Stijn van Dongen
3  *
4  * This file is part of tingea.  You can redistribute and/or modify tingea
5  * under the terms of the GNU General Public License; either version 3 of the
6  * License or (at your option) any later version.  You should have received a
7  * copy of the GPL along with tingea, in the file COPYING.
8 */
9 
10 /* STATUS:
11  *    usable:  yes
12  *    tested:  yes, stress-tested in zoem and mcl
13  *    ad hoc:  somewhat
14  *    quirks:  probably a few
15  *    support: limited
16  *
17  * AIMS
18  *    -  Provide convenient and efficient wrappers for reading lines, files, searching.
19  *    -  Within these wrappers, account bytes and lines read.
20  *    It is explicitly not an aim to be an all-encompassing interface, wrapping
21  *    everything provided by stdio.h.  The type is not opaque and you are
22  *    encouraged to inspect its fp member.
23  *    -  The open modes are inspected to infer some knowledge,
24  *       then passed on directly to fopen.
25  *    -  File "-" is interpreted as either STDIN or STDOUT depending on the open mode.
26  *
27  * BUGS
28  *    -  buffer framework is fully implemented:
29  *          mcxIOexpectNum and mcxIOexpectReal ignore buffer.
30  *    -  Should incorporate more (f)error checking.
31  *
32  * TODO:
33  *    -  document interfaces.
34  *    -  document which routines corrupt the counts.
35  *    -  make sure that buffer treats \0 bytes correctly. Should be
36  *       pretty close.
37  *    -  buffered reads (problematic: mcxIOexpectNum and friends).
38  *    -  design {reset,close} framework, esp related to usr member.
39  *    ?  support for pipes
40 */
41 
42 #ifndef tingea_file_h
43 #define tingea_file_h
44 
45 #include <stdio.h>
46 #include <sys/types.h>
47 
48 #include "ting.h"
49 #include "types.h"
50 
51 
52 /* The thing below seems a reasonable test for seekability.
53  * Let's agree that the main thing is the encapsulation.
54  *
55 */
56 
57 #define mcxFPisSeekable(fp) (!fseek(fp, 0, SEEK_CUR))
58 
59 /* Possibly more stringent check:
60  * int had_error  = ferror(file);
61  * long curpos    = ftell(file);
62  * bool seekable  = (curpos != -1L && fseek(file, curpos, SEEK_SET) == 0);
63  * if (!had_error)
64  * clearerr(file);
65 */
66 
67 
68 
69 /*  **************************************************************************
70  * *
71  **            Implementation notes.
72  *
73  *
74  *    This is meant to be a lightweight layer for file operations.
75  *    It is so lightweight that the pivotal data structure is not hidden.
76  *
77  *    Basic usage:
78  *       mcxIO* xf = mcxIOnew(somestr, "r");
79  *       mcxIOopen(xf, EXIT_ON_FAIL);
80  *
81  *
82  *    Searching:
83  *       mcxIOfind(xf, pattern, ON_FAIL)
84  *
85  *
86  *    Reading lines:
87  *       mcxIOreadLine(xf, txt, mode)
88  *       modes (xor'ed bits):
89  *          MCX_READLINE_CHOMP
90  *          MCX_READLINE_SKIP_EMPTY
91  *          MCX_READLINE_PAR  (read a paragraph)
92  *          MCX_READLINE_BSC  (backslash continues line)
93  *          MCX_READLINE_DOT  (single dot on single line ends paragraph)
94  *    Reading files:
95  *       mcxIOreadFile(xf, txt)
96  *
97  *
98  *    Reading bytes:
99  *       int c = mcxIOstep(xf)
100  *       mcxIOstepback(c, xf)
101  *
102  *       These keep track of byte count, line count, and ofset within line.
103  *
104  *
105  *    Reset attributes for file name object - change name or mode.
106  *       mcxIOrenew(xf, name, mode)
107  *
108  *
109  *    There are some more small utility functions.
110  *
111     **************************************************************************
112    *
113   *
114  *
115  * TODO:
116  *    much todo about everything.
117  *
118  *    mcxIOdiscardLine
119  *    mcxIOskipSpace
120  *       Change to instance of sth more general.
121  *
122 */
123 
124 
125 #define mcxIOateof(xf)  (xf->ateof)
126 #define mcxIOstdio(xf)  (xf->stdio)
127 #define mcxIOlc(xf)     ((long) ((xf->lc) + (xf->lo ? 1 : 0)))
128                               /* this also takes care of EOF
129                                * not preceded by a newline
130                               */
131 
132 /* As long as you did not use mcxIOopen, feel free to do anything with the fn
133  * member, especially right after mcxIOnew.
134 */
135 
136 typedef struct
137 {  mcxTing*       fn
138 ;  char*          mode
139 ;  FILE*          fp
140 ;  dim            lc       /*    line count        */
141 ;  dim            lo       /*    line offset       */
142 ;  dim            lo_      /*    line offset backup, only valid when lo == 0 */
143 ;  dim            bc       /*    byte count        */
144 ;  int            ateof
145 ;  int            stdio
146 ;  mcxTing*       buffer   /*    e.g. when tryCookie fails and unseekable stream */
147 ;  dim            buffer_consumed
148 ;  void*          usr      /*    user object       */
149 ;  mcxstatus    (*usr_reset)(void*)    /*  function to reset user object */
150 ;  void         (*usr_free)(void*)     /*  function to free user object  */
151 ;
152 }  mcxIO    ;
153 
154 
155 /*
156  *    mcxIOrenew does *not* support callback for resetting the usr object
157 */
158 
159 mcxIO* mcxIOrenew
160 (  mcxIO*         xf
161 ,  const char*    name
162 ,  const char*    mode
163 )  ;
164 
165 
166 mcxIO* mcxIOnew
167 (  const char*    name
168 ,  const char*    mode
169 )  ;
170 
171 
172 mcxstatus mcxIOopen
173 (  mcxIO*         xf
174 ,  mcxOnFail      ON_FAIL
175 )  ;
176 
177 
178 mcxstatus mcxIOtestOpen
179 (  mcxIO*         xf
180 ,  mcxOnFail      ON_FAIL
181 )  ;
182 
183 
184 /*
185  *    mcxIOfree does *not* support callback for freeing the usr object
186 */
187 
188 void mcxIOfree
189 (  mcxIO**  xf
190 )  ;
191 
192 
193 void mcxIOfree_v
194 (  void*  xfpp
195 )  ;
196 
197 void mcxIOrelease
198 (  mcxIO*   xf
199 )  ;
200 
201 
202 void mcxIOerr
203 (  mcxIO*   xf
204 ,  const char     *complainer
205 ,  const char     *complaint
206 )  ;
207 
208 
209 /* Currently, for stdin/stdout/stderr clearerr is issued if necessary.
210  * This makes e.g. repeated reads from STDIN possible.
211  *
212  * usr_reset is called if present.
213 */
214 mcxstatus mcxIOclose
215 (  mcxIO       *xf
216 )  ;
217 
218 
219 mcxstatus mcxIOreset
220 (  mcxIO       *xf
221 )  ;
222 
223 
224 mcxstatus  mcxIOreadFile
225 (  mcxIO       *xf
226 ,  mcxTing     *fileTxt
227 )  ;
228 
229 
230 #define MCX_READLINE_DEFAULT      0
231 #define MCX_READLINE_CHOMP        1
232 #define MCX_READLINE_SKIP_EMPTY   2
233 #define MCX_READLINE_PAR          4
234 #define MCX_READLINE_BSC          8
235 #define MCX_READLINE_DOT          16
236 
237 
238 mcxstatus  mcxIOreadLine
239 (  mcxIO       *xf
240 ,  mcxTing     *lineTxt
241 ,  mcxbits     flags
242 )  ;
243 
244 
245 ofs mcxIOappendChunk
246 (  mcxIO        *xf
247 ,  mcxTing      *dst
248 ,  dim          sz
249 ,  mcxbits      flags
250 )  ;
251 
252 
253 /* Returns the number of bytes that could be discarded.
254 */
255 dim mcxIOdiscardLine
256 (  mcxIO       *xf
257 )  ;
258 
259 
260 /* Returns the number of bytes that could be discarded.
261  * ONLY keeps the xf->bc counter up to date.
262 */
263 dim mcxIOdiscard
264 (  mcxIO       *xf
265 ,  dim         amount
266 )  ;
267 
268 
269 /* OK to call this after mcxIOnew, before mcxIOopen */
270 
271 mcxstatus mcxIOnewName
272 (  mcxIO*    xf
273 ,  const char* newname
274 )  ;
275 
276 
277 /* OK to call this after mcxIOnew, before mcxIOopen */
278 
279 mcxstatus mcxIOappendName
280 (  mcxIO*    xf
281 ,  const char* suffix
282 )  ;
283 
284 
285 int mcxIOstep
286 (  mcxIO*    xf
287 )  ;
288 
289 
290 int mcxIOstepback
291 (  int c
292 ,  mcxIO*    xf
293 )  ;
294 
295 
296 void mcxIOpos
297 (  mcxIO*   xf
298 ,  FILE*    channel
299 )  ;
300 
301 
302 void mcxIOlistParmodes
303 (  void
304 )  ;
305 
306 
307 /*
308  *    Returns count of trailing characters in str not matching.
309 */
310 
311 int mcxIOexpect
312 (  mcxIO*         xf
313 ,  const char*    str
314 ,  mcxOnFail      ON_FAIL
315 )  ;
316 
317 mcxstatus mcxIOexpectReal
318 (  mcxIO*         xf
319 ,  double*        dblp
320 ,  mcxOnFail      ON_FAIL
321 )  ;
322 
323 mcxstatus mcxIOexpectNum
324 (  mcxIO*         xf
325 ,  long*          lngp
326 ,  mcxOnFail      ON_FAIL
327 )  ;
328 
329 
330 /*
331  *    Returns next non-white space char,
332  *    which is pushed back onto stream after reading.
333 */
334 
335 int mcxIOskipSpace
336 (  mcxIO*        xf
337 )  ;
338 
339 
340 /*
341  *    Purpose: find str in file. If str is found file pointer is set at the end
342  *    of match (fgetc or mcxIOstep would retrieve the next byte), otherwise,
343  *    the stream is at EOF.
344  *
345  *    Internally this uses Boyer Moore Horspool (bmh) search.
346  *    It processes the stream with fgetc, so the input file need not be
347  *    seekable. This means that finding is relatively slow.
348  *
349  *    An improvement would be to implement faster input munging for seekable
350  *    streams, (using reads of size pagesize) and then reposition the stream
351  *    after searching.
352  *
353 */
354 
355 mcxstatus mcxIOfind
356 (  mcxIO*         xf
357 ,  const char*    str
358 ,  mcxOnFail      ON_FAIL
359 )  ;
360 
361 
362 /*
363  *    NOTE
364  *       When the cookie is not found this routine does
365  *       1) It tries to fseek to the point of departure
366  *       2) If that fails, it stores the bytes it could not rewind
367  *             in xfin->buffer
368  *
369  *    +  mcxIOstep
370  *    +  mcxIOfind
371  *    +  mcxIOskipSpace
372  *    +  mcxIOexpect
373  *    +  mcxIOreadLine
374  *
375  *    will access this buffer, but certain other routines will not, e.g.
376  *
377  *    -  mcxIOreadFile
378  *    -  mcxIOexpectNum
379  *    -  mcxIOexpectReal
380  *    -  all stdio routines (fread, fgetc)
381  *
382  *    For all mcxIO routines this is an open bug.
383  *
384 */
385 
386 mcxbool mcxIOtryCookie
387 (  mcxIO*        xfin
388 ,  const unsigned char abcd[4]
389 )  ;
390 
391 mcxbool mcxIOwriteCookie
392 (  mcxIO*        xfout
393 ,  const unsigned char abcd[4]
394 )  ;
395 
396 
397 
398 #endif
399 
400