1 /********************************************
2 fin.c
3 copyright 2008-2014,2018.  Thomas E. Dickey
4 copyright 1991-1995,1996.  Michael D. Brennan
5 
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8 
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12 
13 /*
14  * $MawkId: fin.c,v 1.44 2018/11/15 00:31:57 tom Exp $
15  */
16 
17 /* fin.c */
18 
19 #include "mawk.h"
20 #include "fin.h"
21 #include "memory.h"
22 #include "bi_vars.h"
23 #include "field.h"
24 #include "symtype.h"
25 #include "scan.h"
26 
27 #ifdef	  HAVE_FCNTL_H
28 #include <fcntl.h>
29 #endif
30 
31 /* This file handles input files.  Opening, closing,
32    buffering and (most important) splitting files into
33    records, FINgets().
34 */
35 
36 static FIN *next_main(int);
37 static char *enlarge_fin_buffer(FIN *);
38 int is_cmdline_assign(char *);	/* also used by init */
39 
40 /* this is how we mark EOF on main_fin  */
41 static char dead_buff = 0;
42 static FIN dead_main =
43 {0, (FILE *) 0, &dead_buff, &dead_buff, &dead_buff,
44  1, EOF_FLAG};
45 
46 static void
free_fin_data(FIN * fin)47 free_fin_data(FIN * fin)
48 {
49     if (fin != &dead_main) {
50 	zfree(fin->buff, (size_t) (fin->nbuffs * BUFFSZ + 1));
51 	ZFREE(fin);
52     }
53 }
54 
55 /* convert file-descriptor to FIN*.
56    It's the main stream if main_flag is set
57 */
58 FIN *
FINdopen(int fd,int main_flag)59 FINdopen(int fd, int main_flag)
60 {
61     FIN *fin = ZMALLOC(FIN);
62 
63     fin->fd = fd;
64     fin->flags = main_flag ? (MAIN_FLAG | START_FLAG) : START_FLAG;
65     fin->buffp = fin->buff = (char *) zmalloc((size_t) BUFFSZ + 1);
66     fin->limit = fin->buffp;
67     fin->nbuffs = 1;
68     fin->buff[0] = 0;
69 
70     if ((isatty(fd) && rs_shadow.type == SEP_CHAR && rs_shadow.c == '\n')
71 	|| interactive_flag) {
72 	/* interactive, i.e., line buffer this file */
73 	if (fd == 0) {
74 	    fin->fp = stdin;
75 	} else if (!(fin->fp = fdopen(fd, "r"))) {
76 	    errmsg(errno, "fdopen failed");
77 	    free_fin_data(fin);
78 	    mawk_exit(2);
79 	}
80     } else {
81 	fin->fp = (FILE *) 0;
82     }
83 
84     return fin;
85 }
86 
87 /* open a FIN* by filename.
88    It's the main stream if main_flag is set.
89    Recognizes "-" as stdin.
90 */
91 
92 FIN *
FINopen(char * filename,int main_flag)93 FINopen(char *filename, int main_flag)
94 {
95     FIN *result = 0;
96     int fd;
97     int oflag = O_RDONLY;
98 
99 #if USE_BINMODE
100     int bm = binmode() & 1;
101     if (bm)
102 	oflag |= O_BINARY;
103 #endif
104 
105     TRACE(("FINopen(%s)\n", filename));
106     if ((filename[0] == '-' && filename[1] == 0) ||
107 	(filename[0] == '/' && !strcmp(filename, "/dev/stdin"))) {
108 #if USE_BINMODE
109 	if (bm)
110 	    setmode(0, O_BINARY);
111 #endif
112 	result = FINdopen(0, main_flag);
113     } else if ((fd = open(filename, oflag, 0)) != -1) {
114 	result = FINdopen(fd, main_flag);
115     }
116     return result;
117 }
118 
119 /* frees the buffer and fd, but leaves FIN structure until
120    the user calls close() */
121 
122 void
FINsemi_close(FIN * fin)123 FINsemi_close(FIN * fin)
124 {
125     static char dead = 0;
126 
127     if (fin->buff != &dead) {
128 	zfree(fin->buff, (size_t) (fin->nbuffs * BUFFSZ + 1));
129 
130 	if (fin->fd) {
131 	    if (fin->fp)
132 		fclose(fin->fp);
133 	    else
134 		close(fin->fd);
135 	}
136 
137 	fin->flags |= EOF_FLAG;
138 	fin->limit =
139 	    fin->buff =
140 	    fin->buffp = &dead;	/* marks it semi_closed */
141     }
142     /* else was already semi_closed */
143 }
144 
145 /* user called close() on input file */
146 void
FINclose(FIN * fin)147 FINclose(FIN * fin)
148 {
149     FINsemi_close(fin);
150     ZFREE(fin);
151 }
152 
153 /* return one input record as determined by RS,
154    from input file (FIN)  fin
155 */
156 
157 char *
FINgets(FIN * fin,size_t * len_p)158 FINgets(FIN * fin, size_t *len_p)
159 {
160     char *p;
161     char *q = 0;
162     size_t match_len;
163     size_t r;
164 
165   restart:
166 
167     if ((p = fin->buffp) >= fin->limit) {	/* need a refill */
168 	if (fin->flags & EOF_FLAG) {
169 	    if (fin->flags & MAIN_FLAG) {
170 		fin = next_main(0);
171 		goto restart;
172 	    } else {
173 		*len_p = 0;
174 		return (char *) 0;
175 	    }
176 	}
177 
178 	if (fin->fp) {
179 	    /* line buffering */
180 	    if (!fgets(fin->buff, BUFFSZ + 1, fin->fp)) {
181 		fin->flags |= EOF_FLAG;
182 		fin->buff[0] = 0;
183 		fin->buffp = fin->buff;
184 		fin->limit = fin->buffp;
185 		goto restart;	/* might be main_fin */
186 	    } else {		/* return this line */
187 		/* find eol */
188 		p = fin->buff;
189 		while (*p != '\n' && *p != 0)
190 		    p++;
191 
192 		*p = 0;
193 		*len_p = (unsigned) (p - fin->buff);
194 		fin->buffp = p;
195 		fin->limit = fin->buffp + strlen(fin->buffp);
196 		return fin->buff;
197 	    }
198 	} else {
199 	    /* block buffering */
200 	    r = fillbuff(fin->fd, fin->buff, (size_t) (fin->nbuffs * BUFFSZ));
201 	    if (r == 0) {
202 		fin->flags |= EOF_FLAG;
203 		fin->buffp = fin->buff;
204 		fin->limit = fin->buffp;
205 		goto restart;	/* might be main */
206 	    } else if (r < fin->nbuffs * BUFFSZ) {
207 		fin->flags |= EOF_FLAG;
208 	    }
209 
210 	    fin->limit = fin->buff + r;
211 	    p = fin->buffp = fin->buff;
212 
213 	    if (fin->flags & START_FLAG) {
214 		fin->flags &= ~START_FLAG;
215 		if (rs_shadow.type == SEP_MLR) {
216 		    /* trim blank lines from front of file */
217 		    while (*p == '\n')
218 			p++;
219 		    fin->buffp = p;
220 		    if (p >= fin->limit)
221 			goto restart;
222 		}
223 	    }
224 	}
225     }
226 
227   retry:
228 
229     switch (rs_shadow.type) {
230     case SEP_CHAR:
231 	q = memchr(p, rs_shadow.c, (size_t) (fin->limit - p));
232 	match_len = 1;
233 	break;
234 
235     case SEP_STR:
236 	q = str_str(p,
237 		    (size_t) (fin->limit - p),
238 		    ((STRING *) rs_shadow.ptr)->str,
239 		    match_len = ((STRING *) rs_shadow.ptr)->len);
240 	break;
241 
242     case SEP_MLR:
243     case SEP_RE:
244 	q = re_pos_match(p, (size_t) (fin->limit - p), rs_shadow.ptr,
245 			 &match_len,
246 			 (p != fin->buff) ||
247 			 (fin->flags & FIN_FLAG));
248 	/* if the match is at the end, there might still be
249 	   more to match in the file */
250 	if (q && q[match_len] == 0 && !(fin->flags & EOF_FLAG)) {
251 	    TRACE(("re_pos_match cancelled\n"));
252 	    q = (char *) 0;
253 	}
254 	break;
255 
256     default:
257 	bozo("type of rs_shadow");
258     }
259 
260     if (q) {
261 	/* the easy and normal case */
262 	*q = 0;
263 	*len_p = (unsigned) (q - p);
264 	fin->buffp = q + match_len;
265 	return p;
266     }
267 
268     if (fin->flags & EOF_FLAG) {
269 	/* last line without a record terminator */
270 	*len_p = r = (unsigned) (fin->limit - p);
271 	fin->buffp = p + r;
272 
273 	if (rs_shadow.type == SEP_MLR && fin->buffp[-1] == '\n'
274 	    && r != 0) {
275 	    (*len_p)--;
276 	    *--fin->buffp = 0;
277 	    fin->limit--;
278 	}
279 	return p;
280     }
281 
282     if (p == fin->buff) {
283 	/* current record is too big for the input buffer, grow buffer */
284 	p = enlarge_fin_buffer(fin);
285     } else {
286 	/* move a partial line to front of buffer and try again */
287 	size_t rr;
288 	size_t amount = (size_t) (fin->limit - p);
289 	size_t blocks = fin->nbuffs * BUFFSZ;
290 
291 	fin->flags |= FIN_FLAG;
292 	r = amount;
293 	if (blocks < r) {
294 	    fin->flags |= EOF_FLAG;
295 	    return 0;
296 	}
297 
298 	p = (char *) memmove(fin->buff, p, r);
299 	q = p + r;
300 	rr = blocks - r;
301 
302 	if ((r = fillbuff(fin->fd, q, rr)) < rr) {
303 	    fin->flags |= EOF_FLAG;
304 	    fin->limit = fin->buff + amount + r;
305 	}
306     }
307     goto retry;
308 }
309 
310 static char *
enlarge_fin_buffer(FIN * fin)311 enlarge_fin_buffer(FIN * fin)
312 {
313     size_t r;
314     size_t oldsize = fin->nbuffs * BUFFSZ + 1;
315     size_t limit = (size_t) (fin->limit - fin->buff);
316 
317 #ifdef  MSDOS
318     /* I'm not sure this can really happen:
319        avoid "16bit wrap" */
320     if (fin->nbuffs == MAX_BUFFS) {
321 	errmsg(0, "out of input buffer space");
322 	mawk_exit(2);
323     }
324 #endif
325 
326     fin->buffp =
327 	fin->buff = (char *) zrealloc(fin->buff, oldsize, oldsize + BUFFSZ);
328     fin->nbuffs++;
329 
330     r = fillbuff(fin->fd, fin->buff + (oldsize - 1), (size_t) BUFFSZ);
331     if (r < BUFFSZ)
332 	fin->flags |= EOF_FLAG;
333 
334     fin->limit = fin->buff + limit + r;
335     return fin->buff;
336 }
337 
338 /*--------
339   target is big enough to hold size + 1 chars
340   on exit the back of the target is zero terminated
341  *--------------*/
342 size_t
fillbuff(int fd,char * target,size_t size)343 fillbuff(int fd, char *target, size_t size)
344 {
345     register int r;
346     size_t entry_size = size;
347 
348     while (size)
349 	switch (r = (int) read(fd, target, size)) {
350 	case -1:
351 	    errmsg(errno, "read error");
352 	    mawk_exit(2);
353 
354 	case 0:
355 	    goto out;
356 
357 	default:
358 	    target += r;
359 	    size -= (unsigned) r;
360 	    break;
361 	}
362 
363   out:
364     *target = 0;
365     return (size_t) (entry_size - size);
366 }
367 
368 /* main_fin is a handle to the main input stream
369    == 0	 never been opened   */
370 
371 FIN *main_fin;
372 ARRAY Argv;			/* to the user this is ARGV  */
373 static double argi = 1.0;	/* index of next ARGV[argi] to try to open */
374 
375 static void
set_main_to_stdin(void)376 set_main_to_stdin(void)
377 {
378     cell_destroy(FILENAME);
379     FILENAME->type = C_STRING;
380     FILENAME->ptr = (PTR) new_STRING("-");
381     cell_destroy(FNR);
382     FNR->type = C_DOUBLE;
383     FNR->dval = 0.0;
384     rt_fnr = 0;
385     main_fin = FINdopen(0, 1);
386 }
387 
388 /* this gets called once to get the input stream going.
389    It is called after the execution of the BEGIN block
390    unless there is a getline inside BEGIN {}
391 */
392 void
open_main(void)393 open_main(void)
394 {
395     CELL argc;
396 
397 #if USE_BINMODE
398     int k = binmode();
399 
400     if (k & 1)
401 	setmode(0, O_BINARY);
402     if (k & 2) {
403 	setmode(1, O_BINARY);
404 	setmode(2, O_BINARY);
405     }
406 #endif
407 
408     cellcpy(&argc, ARGC);
409     if (argc.type != C_DOUBLE)
410 	cast1_to_d(&argc);
411 
412     if (argc.dval == 1.0)
413 	set_main_to_stdin();
414     else
415 	next_main(1);
416 }
417 
418 /* get the next command line file open */
419 static FIN *
next_main(int open_flag)420 next_main(int open_flag)	/* called by open_main() if on */
421 {
422     register CELL *cp;
423     CELL argc;			/* copy of ARGC */
424     CELL c_argi;		/* cell copy of argi */
425     CELL argval;		/* copy of ARGV[c_argi] */
426     int failed = 1;
427 
428     argval.type = C_NOINIT;
429     c_argi.type = C_DOUBLE;
430 
431     if (main_fin) {
432 	FINclose(main_fin);
433 	main_fin = 0;
434     }
435     /* FILENAME and FNR don't change unless we really open
436        a new file */
437 
438     /* make a copy of ARGC to avoid side effect */
439     if (cellcpy(&argc, ARGC)->type != C_DOUBLE)
440 	cast1_to_d(&argc);
441 
442     while (argi < argc.dval) {
443 	c_argi.dval = argi;
444 	argi += 1.0;
445 
446 	if (!(cp = array_find(Argv, &c_argi, NO_CREATE)))
447 	    continue;		/* its deleted */
448 
449 	/* make a copy so we can cast w/o side effect */
450 	cell_destroy(&argval);
451 	cp = cellcpy(&argval, cp);
452 	if (cp->type < C_STRING)
453 	    cast1_to_s(cp);
454 	if (string(cp)->len == 0) {
455 	    /* file argument is "" */
456 	    cell_destroy(cp);
457 	    continue;
458 	}
459 
460 	/* it might be a command line assignment */
461 	if (is_cmdline_assign(string(cp)->str)) {
462 	    continue;
463 	}
464 
465 	/* try to open it -- we used to continue on failure,
466 	   but posix says we should quit */
467 	if (!(main_fin = FINopen(string(cp)->str, 1))) {
468 	    errmsg(errno, "cannot open %s", string(cp)->str);
469 	    mawk_exit(2);
470 	}
471 
472 	/* success -- set FILENAME and FNR */
473 	cell_destroy(FILENAME);
474 	cellcpy(FILENAME, cp);
475 	cell_destroy(cp);
476 	cell_destroy(FNR);
477 	FNR->type = C_DOUBLE;
478 	FNR->dval = 0.0;
479 	rt_fnr = 0;
480 
481 	failed = 0;
482 	break;
483     }
484 
485     if (failed) {
486 	cell_destroy(&argval);
487 
488 	if (open_flag) {
489 	    /* all arguments were null or assignment */
490 	    set_main_to_stdin();
491 	} else {
492 	    main_fin = &dead_main;
493 	    /* since MAIN_FLAG is not set, FINgets won't call next_main() */
494 	}
495     }
496 
497     return main_fin;
498 }
499 
500 int
is_cmdline_assign(char * s)501 is_cmdline_assign(char *s)
502 {
503     register char *p;
504     int c;
505     SYMTAB *stp;
506     CELL *cp = 0;
507     size_t len;
508     CELL cell;			/* used if command line assign to pseudo field */
509     CELL *fp = (CELL *) 0;	/* ditto */
510     size_t length;
511 
512     if (scan_code[*(unsigned char *) s] != SC_IDCHAR)
513 	return 0;
514 
515     p = s + 1;
516     while ((c = scan_code[*(unsigned char *) p]) == SC_IDCHAR
517 	   || c == SC_DIGIT)
518 	p++;
519 
520     if (*p != '=')
521 	return 0;
522 
523     *p = 0;
524     stp = find(s);
525 
526     switch (stp->type) {
527     case ST_NONE:
528 	stp->type = ST_VAR;
529 	stp->stval.cp = cp = ZMALLOC(CELL);
530 	break;
531 
532     case ST_VAR:
533     case ST_NR:		/* !! no one will do this */
534 	cp = stp->stval.cp;
535 	cell_destroy(cp);
536 	break;
537 
538     case ST_FIELD:
539 	/* must be pseudo field */
540 	fp = stp->stval.cp;
541 	cp = &cell;
542 	break;
543 
544     default:
545 	rt_error(
546 		    "cannot command line assign to %s\n\ttype clash or keyword"
547 		    ,s);
548     }
549 
550     /* we need to keep ARGV[i] intact */
551     *p++ = '=';
552     len = strlen(p) + 1;
553     /* posix says escape sequences are on from command line */
554     p = rm_escape(strcpy((char *) zmalloc(len), p), &length);
555     cp->ptr = (PTR) new_STRING1(p, length);
556     zfree(p, len);
557     check_strnum(cp);		/* sets cp->type */
558     if (fp)			/* move it from cell to pfield[] */
559     {
560 	field_assign(fp, cp);
561 	free_STRING(string(cp));
562     }
563     return 1;
564 }
565 
566 #ifdef NO_LEAKS
567 void
fin_leaks(void)568 fin_leaks(void)
569 {
570     TRACE(("fin_leaks\n"));
571     if (main_fin) {
572 	free_fin_data(main_fin);
573 	main_fin = 0;
574     }
575 }
576 #endif
577