1 /* sgmlio.c -
2 IO functions for core parser.
3
4 Written by James Clark (jjc@jclark.com).
5 */
6
7 /* SGML must see a file in which records start with RS and end with
8 RE, and EOFCHAR (Ctl-Z) is present at the end. This module must
9 supply these characters if they are not naturally present in the
10 file. SGML will open two files at a time: when an entity is
11 nested, the new file is opened before closing the old in order to
12 make sure the open is successful. If it is, the original open file
13 is closed temporarily (IOPEND); when the stack is popped, the new
14 file is closed and the original file is re-opened (IOCONT). SGML
15 will check error returns for the initial open of a file and all
16 reads, and for re-openings when the stack is popped, but not for
17 closes. Returning <0 indicates an error; 0 or more is a successful
18 operation, except for IOREAD where the return value is the number
19 of characters read, and must exceed 0 to be successful. The first
20 READ must always be successful, and normally consists of just
21 priming the buffer with EOBCHAR (or RS EOBCHAR). SGMLIO must
22 assure that there is an EOBCHAR at the end of each block read,
23 except for the last block of the entity, which must have an
24 EOFCHAR.
25
26 SGML views an entity as a contiguous whole, without regard to its
27 actual form of storage. SGMLIO supports entities that are
28 equivalent to a single file of one or more records, or to a
29 concatenation of files.
30 */
31
32 /* Uses only stream I/O. This module should be portable to most ANSI
33 systems. */
34 /* We try to ensure that if an IO operation fails, then errno will contain
35 a meaningful value (although it may be zero.) */
36
37 #include "config.h"
38 #ifdef HAVE_O_NOINHERIT
39 #include <fcntl.h>
40 #include <io.h>
41 #endif /* HAVE_O_NOINHERIT */
42
43 #include "sgmlaux.h" /* Include files for auxiliary functions.. */
44
45 #ifdef HAVE_O_NOINHERIT
46 #define FOPENR(file) nifopen(file)
47 FILE *nifopen P((char *));
48 #else /* not HAVE_O_NOINHERIT */
49 #define FOPENR(file) fopen((file), "r")
50 #endif /* not HAVE_O_NOINHERIT */
51
52 struct iofcb { /* I/O file control block. */
53 FILE *fp; /* File handle. */
54 fpos_t off; /* Offset in file of current read block. */
55 char *next; /* Next file (NULL if no more). */
56 char *file; /* Current file (no length byte). */
57 int pendoff; /* Offset into line when file suspended. */
58 char bol; /* Non-zero if currently at beginning of line. */
59 char first; /* Non-zero if the first read. */
60 char wasbol; /* Non-zero if current block was at beginning of line. */
61 char canseek;
62 UNCH *pendbuf; /* Saved partial buffer for suspended file
63 that can't be closed and reopened. */
64 };
65
66 static char *lastfile; /* The name of the last file closed. */
67 static int bufsize; /* Size of buffer passed to ioread(). */
68 static char ismagic[256]; /* Table of magic chars that need to be prefixed
69 by DELNONCH. */
70 static int stdinused = 0;
71
72 static char *nextstr P((char *)); /* Iterate over list of strings. */
73 static FILE *openfile P((char *, char *));
74 static int closefile P((FILE *));
75 static int isreg P((FILE *));
76
ioinit(swp)77 VOID ioinit(swp)
78 struct switches *swp;
79 {
80 ismagic[EOBCHAR] = 1;
81 ismagic[EOFCHAR] = 1;
82 ismagic[EOS] = 1;
83 ismagic[(UNCH)DELNONCH] = 1;
84 ismagic[(UNCH)GENRECHAR] = 1;
85 bufsize = swp->swbufsz;
86 }
87
ioopen(id,pp)88 int ioopen(id, pp)
89 UNIV id;
90 UNIV *pp;
91 {
92 struct iofcb *f;
93 char *s;
94 errno = 0;
95 if (!id)
96 return -1;
97 s = id;
98 if (!*s)
99 return -1;
100 f = (struct iofcb *)rmalloc((UNS)sizeof(struct iofcb));
101 f->file = s;
102 f->next = nextstr(s);
103 errno = 0;
104 f->fp = openfile(f->file, &f->canseek);
105 f->bol = 1;
106 f->first = 1;
107 f->pendbuf = 0;
108 *pp = (UNIV)f;
109 return f->fp ? 1 : -1;
110 }
111
ioclose(p)112 VOID ioclose(p)
113 UNIV p;
114 {
115 struct iofcb *f = (struct iofcb *)p;
116 if (f->fp)
117 closefile(f->fp);
118 lastfile = f->file;
119 frem((UNIV)f);
120 }
121
iopend(p,off,buf)122 VOID iopend(p, off, buf)
123 UNIV p;
124 int off;
125 UNCH *buf;
126 {
127 struct iofcb *f = (struct iofcb *)p;
128 if (!f->canseek) {
129 UNCH *s;
130 for (s = buf + off; *s != EOFCHAR && *s != EOBCHAR; s++)
131 ;
132 s++;
133 f->pendbuf = (UNCH *)rmalloc((UNS)(s - buf - off));
134 memcpy((UNIV)f->pendbuf, (UNIV)(buf + off), (UNS)(s - buf - off));
135 return;
136 }
137 f->bol = 0;
138 if (f->wasbol) {
139 if (off == 0)
140 f->bol = 1;
141 else
142 off--;
143 }
144 f->pendoff = off;
145 if (f->fp) {
146 fclose(f->fp);
147 f->fp = 0;
148 }
149 }
150
iocont(p)151 int iocont(p)
152 UNIV p;
153 {
154 struct iofcb *f = (struct iofcb *)p;
155 int c = EOF;
156 int off = f->pendoff;
157
158 if (!f->canseek)
159 return 0;
160
161 errno = 0;
162 f->fp = FOPENR(f->file);
163 if (!f->fp)
164 return -1;
165 if (fsetpos(f->fp, &f->off))
166 return -1;
167 while (--off >= 0) {
168 c = getc(f->fp);
169 if (c != EOF && ismagic[c])
170 off--;
171 }
172 if (c == '\n')
173 f->bol = 1;
174 if (ferror(f->fp))
175 return -1;
176 return 0;
177 }
178
179 /* Return -1 on error, otherwise the number of bytes read. The
180 strategy is to concatenate the files, insert a RS at the beginning of
181 each line, and change each '\n' into a RE. The returned data
182 shouldn't cross a file boundary, otherwise error messages might be
183 inaccurate. The first read must always succeed. */
184
ioread(p,buf,newfilep)185 int ioread(p, buf, newfilep)
186 UNIV p;
187 UNCH *buf;
188 int *newfilep;
189 {
190 int i = 0;
191 struct iofcb *f = (struct iofcb *)p;
192 FILE *fp;
193 int c;
194
195 *newfilep = 0;
196 if (f->first) {
197 buf[i] = EOBCHAR;
198 f->first = 0;
199 return 1;
200 }
201 if (f->pendbuf) {
202 for (i = 0;
203 (buf[i] = f->pendbuf[i]) != EOBCHAR && buf[i] != EOFCHAR;
204 i++)
205 ;
206 frem((UNIV)f->pendbuf);
207 f->pendbuf = 0;
208 return i + 1;
209 }
210 fp = f->fp;
211 for (;;) {
212 errno = 0;
213 if (f->canseek && fgetpos(fp, &f->off))
214 f->canseek = 0;
215 errno = 0;
216 c = getc(fp);
217 if (c != EOF)
218 break;
219 if (ferror(fp))
220 return -1;
221 if (closefile(fp) == EOF)
222 return -1;
223 if (!f->next){
224 f->fp = 0;
225 buf[0] = EOFCHAR;
226 return 1;
227 }
228 f->file = f->next;
229 f->next = nextstr(f->next);
230 *newfilep = 1;
231 errno = 0;
232 fp = f->fp = openfile(f->file, &f->canseek);
233 if (!fp)
234 return -1;
235 f->bol = 1;
236 }
237 if (f->bol) {
238 f->bol = 0;
239 buf[i++] = RSCHAR;
240 f->wasbol = 1;
241 }
242 else
243 f->wasbol = 0;
244 errno = 0;
245 for (;;) {
246 if (c == '\n') {
247 f->bol = 1;
248 buf[i++] = RECHAR;
249 break;
250 }
251 if (ismagic[c]) {
252 buf[i++] = DELNONCH;
253 buf[i++] = SHIFTNON(c);
254 }
255 else
256 buf[i++] = c;
257 if (i >= bufsize - 2)
258 break;
259 c = getc(fp);
260 if (c == EOF) {
261 if (ferror(fp))
262 return -1;
263 /* This is in the middle of a line. */
264 break;
265 }
266 }
267 buf[i++] = EOBCHAR;
268 return i;
269 }
270
nextstr(p)271 static char *nextstr(p)
272 char *p;
273 {
274 p = strchr(p, '\0');
275 return *++p ? p : 0;
276 }
277
278 /* Return the filename associated with p. If p is NULL, return the filename
279 of the last file closed. */
280
ioflid(p)281 char *ioflid(p)
282 UNIV p;
283 {
284 if (!p)
285 return lastfile;
286 return ((struct iofcb *)p)->file;
287 }
288
289 static
openfile(name,seekp)290 FILE *openfile(name, seekp)
291 char *name;
292 char *seekp;
293 {
294 FILE *fp;
295 if (strcmp(name, STDINNAME) == 0) {
296 if (stdinused)
297 return 0;
298 stdinused = 1;
299 *seekp = 0;
300 return stdin;
301 }
302 fp = FOPENR(name);
303 if (fp)
304 *seekp = isreg(fp);
305 return fp;
306 }
307
308 /* Return -1 on error, 0 otherwise. */
309
310 static
closefile(fp)311 int closefile(fp)
312 FILE *fp;
313 {
314 if (fp == stdin) {
315 stdinused = 0;
316 clearerr(fp);
317 return 0;
318 }
319 else
320 return fclose(fp);
321 }
322
323 #ifdef HAVE_O_NOINHERIT
324
325 /* This is the same as fopen(name, "r") except that it tells DOS that
326 the file descriptor should not be inherited by child processes. */
327
nifopen(name)328 FILE *nifopen(name)
329 char *name;
330 {
331 int fd = open(name, O_RDONLY|O_NOINHERIT|O_TEXT);
332 if (fd < 0)
333 return 0;
334 return fdopen(fd, "r");
335 }
336
337 #endif /* HAVE_O_NOINHERIT */
338
339 #ifdef HAVE_SYS_STAT_H
340
341 #include <sys/types.h>
342 #include <sys/stat.h>
343
344 #ifndef S_ISREG
345 #ifdef S_IFMT
346 #ifdef S_IFREG
347 #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
348 #endif /* S_IFREG */
349 #endif /* S_IFMT */
350 #endif /* not S_ISREG */
351
352 #endif /* HAVE_SYS_STAT_H */
353
354 /* Return 1 if fp might be associated with a regular file. 0
355 otherwise. We check this because on many Unix systems lseek() will
356 succeed on a (pseudo-)terminal although terminals aren't seekable in
357 the way we need. */
358
359 static
isreg(fp)360 int isreg(fp)
361 FILE *fp;
362 {
363 #ifdef S_ISREG
364 struct stat sb;
365
366 /* This assumes that a system that has S_ISREG will also have
367 fstat() and fileno(). */
368 if (fstat(fileno(fp), &sb) == 0)
369 return S_ISREG(sb.st_mode);
370 #endif /* S_ISREG */
371 return 1;
372 }
373
374
375 /*
376 Local Variables:
377 c-indent-level: 5
378 c-continued-statement-offset: 5
379 c-brace-offset: -5
380 c-argdecl-indent: 0
381 c-label-offset: -5
382 comment-column: 30
383 End:
384 */
385