1 /* sgmlio.c -
2    IO functions for core parser.
3 
4    Written by James Clark (jjc@jclark.com).
5 */
6 
7 /* SGML must see a file in which records start with RS and end with
8    RE, and EOFCHAR (Ctl-Z) is present at the end.  This module must
9    supply these characters if they are not naturally present in the
10    file.  SGML will open two files at a time: when an entity is
11    nested, the new file is opened before closing the old in order to
12    make sure the open is successful. If it is, the original open file
13    is closed temporarily (IOPEND); when the stack is popped, the new
14    file is closed and the original file is re-opened (IOCONT). SGML
15    will check error returns for the initial open of a file and all
16    reads, and for re-openings when the stack is popped, but not for
17    closes.  Returning <0 indicates an error; 0 or more is a successful
18    operation, except for IOREAD where the return value is the number
19    of characters read, and must exceed 0 to be successful.  The first
20    READ must always be successful, and normally consists of just
21    priming the buffer with EOBCHAR (or RS EOBCHAR).  SGMLIO must
22    assure that there is an EOBCHAR at the end of each block read,
23    except for the last block of the entity, which must have an
24    EOFCHAR.
25 
26    SGML views an entity as a contiguous whole, without regard to its
27    actual form of storage.  SGMLIO supports entities that are
28    equivalent to a single file of one or more records, or to a
29    concatenation of files.
30 */
31 
32 /* Uses only stream I/O.  This module should be portable to most ANSI
33    systems. */
34 /* We try to ensure that if an IO operation fails, then errno will contain
35    a meaningful value (although it may be zero.) */
36 
37 #include "config.h"
38 #ifdef HAVE_O_NOINHERIT
39 #include <fcntl.h>
40 #include <io.h>
41 #endif /* HAVE_O_NOINHERIT */
42 
43 #include "sgmlaux.h"          /* Include files for auxiliary functions.. */
44 
45 #ifdef HAVE_O_NOINHERIT
46 #define FOPENR(file) nifopen(file)
47 FILE *nifopen P((char *));
48 #else /* not HAVE_O_NOINHERIT */
49 #define FOPENR(file) fopen((file), "r")
50 #endif /* not HAVE_O_NOINHERIT */
51 
52 struct iofcb {                /* I/O file control block. */
53      FILE *fp;		      /* File handle. */
54      fpos_t off;              /* Offset in file of current read block. */
55      char *next;              /* Next file (NULL if no more). */
56      char *file;              /* Current file (no length byte). */
57      int pendoff;	      /* Offset into line when file suspended. */
58      char bol;	              /* Non-zero if currently at beginning of line. */
59      char first;	      /* Non-zero if the first read.  */
60      char wasbol;	      /* Non-zero if current block was at beginning of line. */
61      char canseek;
62      UNCH *pendbuf;	      /* Saved partial buffer for suspended file
63 				 that can't be closed and reopened. */
64 };
65 
66 static char *lastfile;	      /* The name of the last file closed. */
67 static int bufsize;	      /* Size of buffer passed to ioread(). */
68 static char ismagic[256];     /* Table of magic chars that need to be prefixed
69 				 by DELNONCH. */
70 static int stdinused = 0;
71 
72 static char *nextstr P((char *)); /* Iterate over list of strings. */
73 static FILE *openfile P((char *, char *));
74 static int closefile P((FILE *));
75 static int isreg P((FILE *));
76 
ioinit(swp)77 VOID ioinit(swp)
78 struct switches *swp;
79 {
80      ismagic[EOBCHAR] = 1;
81      ismagic[EOFCHAR] = 1;
82      ismagic[EOS] = 1;
83      ismagic[(UNCH)DELNONCH] = 1;
84      ismagic[(UNCH)GENRECHAR] = 1;
85      bufsize = swp->swbufsz;
86 }
87 
ioopen(id,pp)88 int ioopen(id, pp)
89 UNIV id;
90 UNIV *pp;
91 {
92      struct iofcb *f;
93      char *s;
94      errno = 0;
95      if (!id)
96 	  return -1;
97      s = id;
98      if (!*s)
99 	  return -1;
100      f = (struct iofcb *)rmalloc((UNS)sizeof(struct iofcb));
101      f->file = s;
102      f->next = nextstr(s);
103      errno = 0;
104      f->fp = openfile(f->file, &f->canseek);
105      f->bol = 1;
106      f->first = 1;
107      f->pendbuf = 0;
108      *pp = (UNIV)f;
109      return f->fp ? 1 : -1;
110 }
111 
ioclose(p)112 VOID ioclose(p)
113 UNIV p;
114 {
115      struct iofcb *f = (struct iofcb *)p;
116      if (f->fp)
117 	  closefile(f->fp);
118      lastfile = f->file;
119      frem((UNIV)f);
120 }
121 
iopend(p,off,buf)122 VOID iopend(p, off, buf)
123 UNIV p;
124 int off;
125 UNCH *buf;
126 {
127      struct iofcb *f = (struct iofcb *)p;
128      if (!f->canseek) {
129 	  UNCH *s;
130 	  for (s = buf + off; *s != EOFCHAR && *s != EOBCHAR; s++)
131 	       ;
132 	  s++;
133 	  f->pendbuf = (UNCH *)rmalloc((UNS)(s - buf - off));
134 	  memcpy((UNIV)f->pendbuf, (UNIV)(buf + off), (UNS)(s - buf - off));
135 	  return;
136      }
137      f->bol = 0;
138      if (f->wasbol) {
139 	  if (off == 0)
140 	       f->bol = 1;
141 	  else
142 	       off--;
143      }
144      f->pendoff = off;
145      if (f->fp) {
146 	  fclose(f->fp);
147 	  f->fp = 0;
148      }
149 }
150 
iocont(p)151 int iocont(p)
152 UNIV p;
153 {
154      struct iofcb *f = (struct iofcb *)p;
155      int c = EOF;
156      int off = f->pendoff;
157 
158      if (!f->canseek)
159 	  return 0;
160 
161      errno = 0;
162      f->fp = FOPENR(f->file);
163      if (!f->fp)
164 	  return -1;
165      if (fsetpos(f->fp, &f->off))
166 	  return -1;
167      while (--off >= 0) {
168 	  c = getc(f->fp);
169 	  if (c != EOF && ismagic[c])
170 	       off--;
171      }
172      if (c == '\n')
173 	  f->bol = 1;
174      if (ferror(f->fp))
175 	  return -1;
176      return 0;
177 }
178 
179 /* Return -1 on error, otherwise the number of bytes read.  The
180 strategy is to concatenate the files, insert a RS at the beginning of
181 each line, and change each '\n' into a RE.  The returned data
182 shouldn't cross a file boundary, otherwise error messages might be
183 inaccurate.  The first read must always succeed. */
184 
ioread(p,buf,newfilep)185 int ioread(p, buf, newfilep)
186 UNIV p;
187 UNCH *buf;
188 int *newfilep;
189 {
190      int i = 0;
191      struct iofcb *f = (struct iofcb *)p;
192      FILE *fp;
193      int c;
194 
195      *newfilep = 0;
196      if (f->first) {
197 	  buf[i] = EOBCHAR;
198 	  f->first = 0;
199 	  return 1;
200      }
201      if (f->pendbuf) {
202 	  for (i = 0;
203 	       (buf[i] = f->pendbuf[i]) != EOBCHAR && buf[i] != EOFCHAR;
204 	       i++)
205 	       ;
206 	  frem((UNIV)f->pendbuf);
207 	  f->pendbuf = 0;
208 	  return i + 1;
209      }
210      fp = f->fp;
211      for (;;) {
212 	  errno = 0;
213 	  if (f->canseek && fgetpos(fp, &f->off))
214 	       f->canseek = 0;
215 	  errno = 0;
216 	  c = getc(fp);
217 	  if (c != EOF)
218 	       break;
219 	  if (ferror(fp))
220 	       return -1;
221 	  if (closefile(fp) == EOF)
222 	       return -1;
223 	  if (!f->next){
224 	       f->fp = 0;
225 	       buf[0] = EOFCHAR;
226 	       return 1;
227 	  }
228 	  f->file = f->next;
229 	  f->next = nextstr(f->next);
230 	  *newfilep = 1;
231 	  errno = 0;
232 	  fp = f->fp = openfile(f->file, &f->canseek);
233 	  if (!fp)
234 	       return -1;
235 	  f->bol = 1;
236      }
237      if (f->bol) {
238 	  f->bol = 0;
239 	  buf[i++] = RSCHAR;
240 	  f->wasbol = 1;
241      }
242      else
243 	  f->wasbol = 0;
244      errno = 0;
245      for (;;) {
246 	  if (c == '\n') {
247 	       f->bol = 1;
248 	       buf[i++] = RECHAR;
249 	       break;
250 	  }
251 	  if (ismagic[c]) {
252 	       buf[i++] = DELNONCH;
253 	       buf[i++] = SHIFTNON(c);
254 	  }
255 	  else
256 	       buf[i++] = c;
257 	  if (i >= bufsize - 2)
258 	       break;
259 	  c = getc(fp);
260 	  if (c == EOF) {
261 	       if (ferror(fp))
262 		    return -1;
263 	       /* This is in the middle of a line. */
264 	       break;
265 	  }
266      }
267      buf[i++] = EOBCHAR;
268      return i;
269 }
270 
nextstr(p)271 static char *nextstr(p)
272 char *p;
273 {
274      p = strchr(p, '\0');
275      return *++p ? p : 0;
276 }
277 
278 /* Return the filename associated with p.  If p is NULL, return the filename
279 of the last file closed. */
280 
ioflid(p)281 char *ioflid(p)
282 UNIV p;
283 {
284      if (!p)
285 	  return lastfile;
286      return ((struct iofcb *)p)->file;
287 }
288 
289 static
openfile(name,seekp)290 FILE *openfile(name, seekp)
291 char *name;
292 char *seekp;
293 {
294      FILE *fp;
295      if (strcmp(name, STDINNAME) == 0) {
296 	  if (stdinused)
297 	       return 0;
298 	  stdinused = 1;
299 	  *seekp = 0;
300 	  return stdin;
301      }
302      fp = FOPENR(name);
303      if (fp)
304 	  *seekp = isreg(fp);
305      return fp;
306 }
307 
308 /* Return -1 on error, 0 otherwise. */
309 
310 static
closefile(fp)311 int closefile(fp)
312 FILE *fp;
313 {
314      if (fp == stdin) {
315 	  stdinused = 0;
316 	  clearerr(fp);
317 	  return 0;
318      }
319      else
320 	  return fclose(fp);
321 }
322 
323 #ifdef HAVE_O_NOINHERIT
324 
325 /* This is the same as fopen(name, "r") except that it tells DOS that
326 the file descriptor should not be inherited by child processes.  */
327 
nifopen(name)328 FILE *nifopen(name)
329 char *name;
330 {
331      int fd = open(name, O_RDONLY|O_NOINHERIT|O_TEXT);
332      if (fd < 0)
333 	  return 0;
334      return fdopen(fd, "r");
335 }
336 
337 #endif /* HAVE_O_NOINHERIT */
338 
339 #ifdef HAVE_SYS_STAT_H
340 
341 #include <sys/types.h>
342 #include <sys/stat.h>
343 
344 #ifndef S_ISREG
345 #ifdef S_IFMT
346 #ifdef S_IFREG
347 #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
348 #endif /* S_IFREG */
349 #endif /* S_IFMT */
350 #endif /* not S_ISREG */
351 
352 #endif /* HAVE_SYS_STAT_H */
353 
354 /* Return 1 if fp might be associated with a regular file.  0
355 otherwise.  We check this because on many Unix systems lseek() will
356 succeed on a (pseudo-)terminal although terminals aren't seekable in
357 the way we need. */
358 
359 static
isreg(fp)360 int isreg(fp)
361 FILE *fp;
362 {
363 #ifdef S_ISREG
364      struct stat sb;
365 
366      /* This assumes that a system that has S_ISREG will also have
367         fstat() and fileno(). */
368      if (fstat(fileno(fp), &sb) == 0)
369 	  return S_ISREG(sb.st_mode);
370 #endif /* S_ISREG */
371      return 1;
372 }
373 
374 
375 /*
376 Local Variables:
377 c-indent-level: 5
378 c-continued-statement-offset: 5
379 c-brace-offset: -5
380 c-argdecl-indent: 0
381 c-label-offset: -5
382 comment-column: 30
383 End:
384 */
385