1 /********************************************
2 fin.c
3 copyright 2008-2014,2018. Thomas E. Dickey
4 copyright 1991-1995,1996. Michael D. Brennan
5
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12
13 /*
14 * $MawkId: fin.c,v 1.44 2018/11/15 00:31:57 tom Exp $
15 */
16
17 /* fin.c */
18
19 #include "mawk.h"
20 #include "fin.h"
21 #include "memory.h"
22 #include "bi_vars.h"
23 #include "field.h"
24 #include "symtype.h"
25 #include "scan.h"
26
27 #ifdef HAVE_FCNTL_H
28 #include <fcntl.h>
29 #endif
30
31 /* This file handles input files. Opening, closing,
32 buffering and (most important) splitting files into
33 records, FINgets().
34 */
35
36 static FIN *next_main(int);
37 static char *enlarge_fin_buffer(FIN *);
38 int is_cmdline_assign(char *); /* also used by init */
39
40 /* this is how we mark EOF on main_fin */
41 static char dead_buff = 0;
42 static FIN dead_main =
43 {0, (FILE *) 0, &dead_buff, &dead_buff, &dead_buff,
44 1, EOF_FLAG};
45
46 static void
free_fin_data(FIN * fin)47 free_fin_data(FIN * fin)
48 {
49 if (fin != &dead_main) {
50 zfree(fin->buff, (size_t) (fin->nbuffs * BUFFSZ + 1));
51 ZFREE(fin);
52 }
53 }
54
55 /* convert file-descriptor to FIN*.
56 It's the main stream if main_flag is set
57 */
58 FIN *
FINdopen(int fd,int main_flag)59 FINdopen(int fd, int main_flag)
60 {
61 FIN *fin = ZMALLOC(FIN);
62
63 fin->fd = fd;
64 fin->flags = main_flag ? (MAIN_FLAG | START_FLAG) : START_FLAG;
65 fin->buffp = fin->buff = (char *) zmalloc((size_t) BUFFSZ + 1);
66 fin->limit = fin->buffp;
67 fin->nbuffs = 1;
68 fin->buff[0] = 0;
69
70 if ((isatty(fd) && rs_shadow.type == SEP_CHAR && rs_shadow.c == '\n')
71 || interactive_flag) {
72 /* interactive, i.e., line buffer this file */
73 if (fd == 0) {
74 fin->fp = stdin;
75 } else if (!(fin->fp = fdopen(fd, "r"))) {
76 errmsg(errno, "fdopen failed");
77 free_fin_data(fin);
78 mawk_exit(2);
79 }
80 } else {
81 fin->fp = (FILE *) 0;
82 }
83
84 return fin;
85 }
86
87 /* open a FIN* by filename.
88 It's the main stream if main_flag is set.
89 Recognizes "-" as stdin.
90 */
91
92 FIN *
FINopen(char * filename,int main_flag)93 FINopen(char *filename, int main_flag)
94 {
95 FIN *result = 0;
96 int fd;
97 int oflag = O_RDONLY;
98
99 #if USE_BINMODE
100 int bm = binmode() & 1;
101 if (bm)
102 oflag |= O_BINARY;
103 #endif
104
105 TRACE(("FINopen(%s)\n", filename));
106 if ((filename[0] == '-' && filename[1] == 0) ||
107 (filename[0] == '/' && !strcmp(filename, "/dev/stdin"))) {
108 #if USE_BINMODE
109 if (bm)
110 setmode(0, O_BINARY);
111 #endif
112 result = FINdopen(0, main_flag);
113 } else if ((fd = open(filename, oflag, 0)) != -1) {
114 result = FINdopen(fd, main_flag);
115 }
116 return result;
117 }
118
119 /* frees the buffer and fd, but leaves FIN structure until
120 the user calls close() */
121
122 void
FINsemi_close(FIN * fin)123 FINsemi_close(FIN * fin)
124 {
125 static char dead = 0;
126
127 if (fin->buff != &dead) {
128 zfree(fin->buff, (size_t) (fin->nbuffs * BUFFSZ + 1));
129
130 if (fin->fd) {
131 if (fin->fp)
132 fclose(fin->fp);
133 else
134 close(fin->fd);
135 }
136
137 fin->flags |= EOF_FLAG;
138 fin->limit =
139 fin->buff =
140 fin->buffp = &dead; /* marks it semi_closed */
141 }
142 /* else was already semi_closed */
143 }
144
145 /* user called close() on input file */
146 void
FINclose(FIN * fin)147 FINclose(FIN * fin)
148 {
149 FINsemi_close(fin);
150 ZFREE(fin);
151 }
152
153 /* return one input record as determined by RS,
154 from input file (FIN) fin
155 */
156
157 char *
FINgets(FIN * fin,size_t * len_p)158 FINgets(FIN * fin, size_t *len_p)
159 {
160 char *p;
161 char *q = 0;
162 size_t match_len;
163 size_t r;
164
165 restart:
166
167 if ((p = fin->buffp) >= fin->limit) { /* need a refill */
168 if (fin->flags & EOF_FLAG) {
169 if (fin->flags & MAIN_FLAG) {
170 fin = next_main(0);
171 goto restart;
172 } else {
173 *len_p = 0;
174 return (char *) 0;
175 }
176 }
177
178 if (fin->fp) {
179 /* line buffering */
180 if (!fgets(fin->buff, BUFFSZ + 1, fin->fp)) {
181 fin->flags |= EOF_FLAG;
182 fin->buff[0] = 0;
183 fin->buffp = fin->buff;
184 fin->limit = fin->buffp;
185 goto restart; /* might be main_fin */
186 } else { /* return this line */
187 /* find eol */
188 p = fin->buff;
189 while (*p != '\n' && *p != 0)
190 p++;
191
192 *p = 0;
193 *len_p = (unsigned) (p - fin->buff);
194 fin->buffp = p;
195 fin->limit = fin->buffp + strlen(fin->buffp);
196 return fin->buff;
197 }
198 } else {
199 /* block buffering */
200 r = fillbuff(fin->fd, fin->buff, (size_t) (fin->nbuffs * BUFFSZ));
201 if (r == 0) {
202 fin->flags |= EOF_FLAG;
203 fin->buffp = fin->buff;
204 fin->limit = fin->buffp;
205 goto restart; /* might be main */
206 } else if (r < fin->nbuffs * BUFFSZ) {
207 fin->flags |= EOF_FLAG;
208 }
209
210 fin->limit = fin->buff + r;
211 p = fin->buffp = fin->buff;
212
213 if (fin->flags & START_FLAG) {
214 fin->flags &= ~START_FLAG;
215 if (rs_shadow.type == SEP_MLR) {
216 /* trim blank lines from front of file */
217 while (*p == '\n')
218 p++;
219 fin->buffp = p;
220 if (p >= fin->limit)
221 goto restart;
222 }
223 }
224 }
225 }
226
227 retry:
228
229 switch (rs_shadow.type) {
230 case SEP_CHAR:
231 q = memchr(p, rs_shadow.c, (size_t) (fin->limit - p));
232 match_len = 1;
233 break;
234
235 case SEP_STR:
236 q = str_str(p,
237 (size_t) (fin->limit - p),
238 ((STRING *) rs_shadow.ptr)->str,
239 match_len = ((STRING *) rs_shadow.ptr)->len);
240 break;
241
242 case SEP_MLR:
243 case SEP_RE:
244 q = re_pos_match(p, (size_t) (fin->limit - p), rs_shadow.ptr,
245 &match_len,
246 (p != fin->buff) ||
247 (fin->flags & FIN_FLAG));
248 /* if the match is at the end, there might still be
249 more to match in the file */
250 if (q && q[match_len] == 0 && !(fin->flags & EOF_FLAG)) {
251 TRACE(("re_pos_match cancelled\n"));
252 q = (char *) 0;
253 }
254 break;
255
256 default:
257 bozo("type of rs_shadow");
258 }
259
260 if (q) {
261 /* the easy and normal case */
262 *q = 0;
263 *len_p = (unsigned) (q - p);
264 fin->buffp = q + match_len;
265 return p;
266 }
267
268 if (fin->flags & EOF_FLAG) {
269 /* last line without a record terminator */
270 *len_p = r = (unsigned) (fin->limit - p);
271 fin->buffp = p + r;
272
273 if (rs_shadow.type == SEP_MLR && fin->buffp[-1] == '\n'
274 && r != 0) {
275 (*len_p)--;
276 *--fin->buffp = 0;
277 fin->limit--;
278 }
279 return p;
280 }
281
282 if (p == fin->buff) {
283 /* current record is too big for the input buffer, grow buffer */
284 p = enlarge_fin_buffer(fin);
285 } else {
286 /* move a partial line to front of buffer and try again */
287 size_t rr;
288 size_t amount = (size_t) (fin->limit - p);
289 size_t blocks = fin->nbuffs * BUFFSZ;
290
291 fin->flags |= FIN_FLAG;
292 r = amount;
293 if (blocks < r) {
294 fin->flags |= EOF_FLAG;
295 return 0;
296 }
297
298 p = (char *) memmove(fin->buff, p, r);
299 q = p + r;
300 rr = blocks - r;
301
302 if ((r = fillbuff(fin->fd, q, rr)) < rr) {
303 fin->flags |= EOF_FLAG;
304 fin->limit = fin->buff + amount + r;
305 }
306 }
307 goto retry;
308 }
309
310 static char *
enlarge_fin_buffer(FIN * fin)311 enlarge_fin_buffer(FIN * fin)
312 {
313 size_t r;
314 size_t oldsize = fin->nbuffs * BUFFSZ + 1;
315 size_t limit = (size_t) (fin->limit - fin->buff);
316
317 #ifdef MSDOS
318 /* I'm not sure this can really happen:
319 avoid "16bit wrap" */
320 if (fin->nbuffs == MAX_BUFFS) {
321 errmsg(0, "out of input buffer space");
322 mawk_exit(2);
323 }
324 #endif
325
326 fin->buffp =
327 fin->buff = (char *) zrealloc(fin->buff, oldsize, oldsize + BUFFSZ);
328 fin->nbuffs++;
329
330 r = fillbuff(fin->fd, fin->buff + (oldsize - 1), (size_t) BUFFSZ);
331 if (r < BUFFSZ)
332 fin->flags |= EOF_FLAG;
333
334 fin->limit = fin->buff + limit + r;
335 return fin->buff;
336 }
337
338 /*--------
339 target is big enough to hold size + 1 chars
340 on exit the back of the target is zero terminated
341 *--------------*/
342 size_t
fillbuff(int fd,char * target,size_t size)343 fillbuff(int fd, char *target, size_t size)
344 {
345 register int r;
346 size_t entry_size = size;
347
348 while (size)
349 switch (r = (int) read(fd, target, size)) {
350 case -1:
351 errmsg(errno, "read error");
352 mawk_exit(2);
353
354 case 0:
355 goto out;
356
357 default:
358 target += r;
359 size -= (unsigned) r;
360 break;
361 }
362
363 out:
364 *target = 0;
365 return (size_t) (entry_size - size);
366 }
367
368 /* main_fin is a handle to the main input stream
369 == 0 never been opened */
370
371 FIN *main_fin;
372 ARRAY Argv; /* to the user this is ARGV */
373 static double argi = 1.0; /* index of next ARGV[argi] to try to open */
374
375 static void
set_main_to_stdin(void)376 set_main_to_stdin(void)
377 {
378 cell_destroy(FILENAME);
379 FILENAME->type = C_STRING;
380 FILENAME->ptr = (PTR) new_STRING("-");
381 cell_destroy(FNR);
382 FNR->type = C_DOUBLE;
383 FNR->dval = 0.0;
384 rt_fnr = 0;
385 main_fin = FINdopen(0, 1);
386 }
387
388 /* this gets called once to get the input stream going.
389 It is called after the execution of the BEGIN block
390 unless there is a getline inside BEGIN {}
391 */
392 void
open_main(void)393 open_main(void)
394 {
395 CELL argc;
396
397 #if USE_BINMODE
398 int k = binmode();
399
400 if (k & 1)
401 setmode(0, O_BINARY);
402 if (k & 2) {
403 setmode(1, O_BINARY);
404 setmode(2, O_BINARY);
405 }
406 #endif
407
408 cellcpy(&argc, ARGC);
409 if (argc.type != C_DOUBLE)
410 cast1_to_d(&argc);
411
412 if (argc.dval == 1.0)
413 set_main_to_stdin();
414 else
415 next_main(1);
416 }
417
418 /* get the next command line file open */
419 static FIN *
next_main(int open_flag)420 next_main(int open_flag) /* called by open_main() if on */
421 {
422 register CELL *cp;
423 CELL argc; /* copy of ARGC */
424 CELL c_argi; /* cell copy of argi */
425 CELL argval; /* copy of ARGV[c_argi] */
426 int failed = 1;
427
428 argval.type = C_NOINIT;
429 c_argi.type = C_DOUBLE;
430
431 if (main_fin) {
432 FINclose(main_fin);
433 main_fin = 0;
434 }
435 /* FILENAME and FNR don't change unless we really open
436 a new file */
437
438 /* make a copy of ARGC to avoid side effect */
439 if (cellcpy(&argc, ARGC)->type != C_DOUBLE)
440 cast1_to_d(&argc);
441
442 while (argi < argc.dval) {
443 c_argi.dval = argi;
444 argi += 1.0;
445
446 if (!(cp = array_find(Argv, &c_argi, NO_CREATE)))
447 continue; /* its deleted */
448
449 /* make a copy so we can cast w/o side effect */
450 cell_destroy(&argval);
451 cp = cellcpy(&argval, cp);
452 if (cp->type < C_STRING)
453 cast1_to_s(cp);
454 if (string(cp)->len == 0) {
455 /* file argument is "" */
456 cell_destroy(cp);
457 continue;
458 }
459
460 /* it might be a command line assignment */
461 if (is_cmdline_assign(string(cp)->str)) {
462 continue;
463 }
464
465 /* try to open it -- we used to continue on failure,
466 but posix says we should quit */
467 if (!(main_fin = FINopen(string(cp)->str, 1))) {
468 errmsg(errno, "cannot open %s", string(cp)->str);
469 mawk_exit(2);
470 }
471
472 /* success -- set FILENAME and FNR */
473 cell_destroy(FILENAME);
474 cellcpy(FILENAME, cp);
475 cell_destroy(cp);
476 cell_destroy(FNR);
477 FNR->type = C_DOUBLE;
478 FNR->dval = 0.0;
479 rt_fnr = 0;
480
481 failed = 0;
482 break;
483 }
484
485 if (failed) {
486 cell_destroy(&argval);
487
488 if (open_flag) {
489 /* all arguments were null or assignment */
490 set_main_to_stdin();
491 } else {
492 main_fin = &dead_main;
493 /* since MAIN_FLAG is not set, FINgets won't call next_main() */
494 }
495 }
496
497 return main_fin;
498 }
499
500 int
is_cmdline_assign(char * s)501 is_cmdline_assign(char *s)
502 {
503 register char *p;
504 int c;
505 SYMTAB *stp;
506 CELL *cp = 0;
507 size_t len;
508 CELL cell; /* used if command line assign to pseudo field */
509 CELL *fp = (CELL *) 0; /* ditto */
510 size_t length;
511
512 if (scan_code[*(unsigned char *) s] != SC_IDCHAR)
513 return 0;
514
515 p = s + 1;
516 while ((c = scan_code[*(unsigned char *) p]) == SC_IDCHAR
517 || c == SC_DIGIT)
518 p++;
519
520 if (*p != '=')
521 return 0;
522
523 *p = 0;
524 stp = find(s);
525
526 switch (stp->type) {
527 case ST_NONE:
528 stp->type = ST_VAR;
529 stp->stval.cp = cp = ZMALLOC(CELL);
530 break;
531
532 case ST_VAR:
533 case ST_NR: /* !! no one will do this */
534 cp = stp->stval.cp;
535 cell_destroy(cp);
536 break;
537
538 case ST_FIELD:
539 /* must be pseudo field */
540 fp = stp->stval.cp;
541 cp = &cell;
542 break;
543
544 default:
545 rt_error(
546 "cannot command line assign to %s\n\ttype clash or keyword"
547 ,s);
548 }
549
550 /* we need to keep ARGV[i] intact */
551 *p++ = '=';
552 len = strlen(p) + 1;
553 /* posix says escape sequences are on from command line */
554 p = rm_escape(strcpy((char *) zmalloc(len), p), &length);
555 cp->ptr = (PTR) new_STRING1(p, length);
556 zfree(p, len);
557 check_strnum(cp); /* sets cp->type */
558 if (fp) /* move it from cell to pfield[] */
559 {
560 field_assign(fp, cp);
561 free_STRING(string(cp));
562 }
563 return 1;
564 }
565
566 #ifdef NO_LEAKS
567 void
fin_leaks(void)568 fin_leaks(void)
569 {
570 TRACE(("fin_leaks\n"));
571 if (main_fin) {
572 free_fin_data(main_fin);
573 main_fin = 0;
574 }
575 }
576 #endif
577