xref: /openbsd/usr.bin/awk/lib.c (revision fb60ec6a)
1 /*	$OpenBSD: lib.c,v 1.59 2024/08/03 21:12:16 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define DEBUG
27 #include <stdio.h>
28 #include <string.h>
29 #include <strings.h>
30 #include <ctype.h>
31 #include <errno.h>
32 #include <stdlib.h>
33 #include <stdarg.h>
34 #include <limits.h>
35 #include <math.h>
36 #include "awk.h"
37 
38 char	EMPTY[] = { '\0' };
39 FILE	*infile	= NULL;
40 bool	innew;		/* true = infile has not been read by readrec */
41 char	*file	= EMPTY;
42 char	*record;
43 int	recsize	= RECSIZE;
44 char	*fields;
45 int	fieldssize = RECSIZE;
46 
47 Cell	**fldtab;	/* pointers to Cells */
48 static size_t	len_inputFS = 0;
49 static char	*inputFS = NULL; /* FS at time of input, for field splitting */
50 
51 #define	MAXFLD	2
52 int	nfields	= MAXFLD;	/* last allocated slot for $i */
53 
54 bool	donefld;	/* true = implies rec broken into fields */
55 bool	donerec;	/* true = record is valid (no flds have changed) */
56 
57 int	lastfld	= 0;	/* last used field */
58 int	argno	= 1;	/* current input argument number */
59 extern	Awkfloat *ARGC;
60 
61 static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
62 static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
63 
recinit(unsigned int n)64 void recinit(unsigned int n)
65 {
66 	if ( (record = (char *) malloc(n)) == NULL
67 	  || (fields = (char *) malloc(n+1)) == NULL
68 	  || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
69 	  || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
70 		FATAL("out of space for $0 and fields");
71 	*record = '\0';
72 	*fldtab[0] = dollar0;
73 	fldtab[0]->sval = record;
74 	fldtab[0]->nval = tostring("0");
75 	makefields(1, nfields);
76 }
77 
makefields(int n1,int n2)78 void makefields(int n1, int n2)		/* create $n1..$n2 inclusive */
79 {
80 	char temp[50];
81 	int i;
82 
83 	for (i = n1; i <= n2; i++) {
84 		fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
85 		if (fldtab[i] == NULL)
86 			FATAL("out of space in makefields %d", i);
87 		*fldtab[i] = dollar1;
88 		snprintf(temp, sizeof(temp), "%d", i);
89 		fldtab[i]->nval = tostring(temp);
90 	}
91 }
92 
initgetrec(void)93 void initgetrec(void)
94 {
95 	int i;
96 	char *p;
97 
98 	for (i = 1; i < *ARGC; i++) {
99 		p = getargv(i); /* find 1st real filename */
100 		if (p == NULL || *p == '\0') {  /* deleted or zapped */
101 			argno++;
102 			continue;
103 		}
104 		if (!isclvar(p)) {
105 			setsval(lookup("FILENAME", symtab), p);
106 			return;
107 		}
108 		setclvar(p);	/* a commandline assignment before filename */
109 		argno++;
110 	}
111 	infile = stdin;		/* no filenames, so use stdin */
112 	innew = true;
113 }
114 
115 /*
116  * POSIX specifies that fields are supposed to be evaluated as if they were
117  * split using the value of FS at the time that the record's value ($0) was
118  * read.
119  *
120  * Since field-splitting is done lazily, we save the current value of FS
121  * whenever a new record is read in (implicitly or via getline), or when
122  * a new value is assigned to $0.
123  */
savefs(void)124 void savefs(void)
125 {
126 	size_t len = strlen(getsval(fsloc));
127 	if (len >= len_inputFS) {
128 		len_inputFS = len + 1;
129 		inputFS = (char *) realloc(inputFS, len_inputFS);
130 		if (inputFS == NULL)
131 			FATAL("field separator %.10s... is too long", *FS);
132 	}
133 	if (strlcpy(inputFS, *FS, len_inputFS) >= len_inputFS)
134 		FATAL("field separator %.10s... is too long", *FS);
135 }
136 
137 static bool firsttime = true;
138 
getrec(char ** pbuf,int * pbufsize,bool isrecord)139 int getrec(char **pbuf, int *pbufsize, bool isrecord)	/* get next input record */
140 {			/* note: cares whether buf == record */
141 	int c;
142 	char *buf = *pbuf;
143 	uschar saveb0;
144 	int bufsize = *pbufsize, savebufsize = bufsize;
145 
146 	if (firsttime) {
147 		firsttime = false;
148 		initgetrec();
149 	}
150 	DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
151 		*RS, *FS, *ARGC, *FILENAME);
152 	saveb0 = buf[0];
153 	buf[0] = 0;
154 	while (argno < *ARGC || infile == stdin) {
155 		DPRINTF("argno=%d, file=|%s|\n", argno, file);
156 		if (infile == NULL) {	/* have to open a new file */
157 			file = getargv(argno);
158 			if (file == NULL || *file == '\0') {	/* deleted or zapped */
159 				argno++;
160 				continue;
161 			}
162 			if (isclvar(file)) {	/* a var=value arg */
163 				setclvar(file);
164 				argno++;
165 				continue;
166 			}
167 			*FILENAME = file;
168 			DPRINTF("opening file %s\n", file);
169 			if (*file == '-' && *(file+1) == '\0')
170 				infile = stdin;
171 			else if ((infile = fopen(file, "r")) == NULL)
172 				FATAL("can't open file %s", file);
173 			innew = true;
174 			setfval(fnrloc, 0.0);
175 		}
176 		c = readrec(&buf, &bufsize, infile, innew);
177 		if (innew)
178 			innew = false;
179 		if (c != 0 || buf[0] != '\0') {	/* normal record */
180 			if (isrecord) {
181 				double result;
182 
183 				if (freeable(fldtab[0]))
184 					xfree(fldtab[0]->sval);
185 				fldtab[0]->sval = buf;	/* buf == record */
186 				fldtab[0]->tval = REC | STR | DONTFREE;
187 				if (is_number(fldtab[0]->sval, & result)) {
188 					fldtab[0]->fval = result;
189 					fldtab[0]->tval |= NUM;
190 				}
191 				donefld = false;
192 				donerec = true;
193 				savefs();
194 			}
195 			setfval(nrloc, nrloc->fval+1);
196 			setfval(fnrloc, fnrloc->fval+1);
197 			*pbuf = buf;
198 			*pbufsize = bufsize;
199 			return 1;
200 		}
201 		/* EOF arrived on this file; set up next */
202 		if (infile != stdin)
203 			fclose(infile);
204 		infile = NULL;
205 		argno++;
206 	}
207 	buf[0] = saveb0;
208 	*pbuf = buf;
209 	*pbufsize = savebufsize;
210 	return 0;	/* true end of file */
211 }
212 
nextfile(void)213 void nextfile(void)
214 {
215 	if (infile != NULL && infile != stdin)
216 		fclose(infile);
217 	infile = NULL;
218 	argno++;
219 }
220 
221 extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
222 
readrec(char ** pbuf,int * pbufsize,FILE * inf,bool newflag)223 int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag)	/* read one record into buf */
224 {
225 	int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
226 	char *rr = *pbuf, *buf = *pbuf;
227 	int bufsize = *pbufsize;
228 	char *rs = getsval(rsloc);
229 
230 	if (CSV) {
231 		c = readcsvrec(&buf, &bufsize, inf, newflag);
232 		isrec = (c == EOF && rr == buf) ? false : true;
233 	} else if (*rs && rs[1]) {
234 		bool found;
235 
236 		memset(buf, 0, bufsize);
237 		fa *pfa = makedfa(rs, 1);
238 		if (newflag)
239 			found = fnematch(pfa, inf, &buf, &bufsize, recsize);
240 		else {
241 			int tempstat = pfa->initstat;
242 			pfa->initstat = 2;
243 			found = fnematch(pfa, inf, &buf, &bufsize, recsize);
244 			pfa->initstat = tempstat;
245 		}
246 		if (found)
247 			setptr(patbeg, '\0');
248 		isrec = (found == 0 && *buf == '\0') ? false : true;
249 	} else {
250 		if ((sep = *rs) == 0) {
251 			sep = '\n';
252 			while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
253 				;
254 			if (c != EOF)
255 				ungetc(c, inf);
256 		}
257 		for (rr = buf; ; ) {
258 			for (; (c=getc(inf)) != sep && c != EOF; ) {
259 				if (rr-buf+1 > bufsize)
260 					if (!adjbuf(&buf, &bufsize, 1+rr-buf,
261 					    recsize, &rr, "readrec 1"))
262 						FATAL("input record `%.30s...' too long", buf);
263 				*rr++ = c;
264 			}
265 			if (*rs == sep || c == EOF)
266 				break;
267 			if ((c = getc(inf)) == '\n' || c == EOF)	/* 2 in a row */
268 				break;
269 			if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
270 			    "readrec 2"))
271 				FATAL("input record `%.30s...' too long", buf);
272 			*rr++ = '\n';
273 			*rr++ = c;
274 		}
275 		if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
276 			FATAL("input record `%.30s...' too long", buf);
277 		*rr = 0;
278 		isrec = (c == EOF && rr == buf) ? false : true;
279 	}
280 	*pbuf = buf;
281 	*pbufsize = bufsize;
282 	DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
283 	return isrec;
284 }
285 
286 
287 /*******************
288  * loose ends here:
289  *   \r\n should become \n
290  *   what about bare \r?  Excel uses that for embedded newlines
291  *   can't have "" in unquoted fields, according to RFC 4180
292 */
293 
readcsvrec(char ** pbuf,int * pbufsize,FILE * inf,bool newflag)294 int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
295 {			/* so read a complete record that might be multiple lines */
296 	int sep, c;
297 	char *rr = *pbuf, *buf = *pbuf;
298 	int bufsize = *pbufsize;
299 	bool in_quote = false;
300 
301 	sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
302 	rr = buf;
303 	while ((c = getc(inf)) != EOF) {
304 		if (c == sep) {
305 			if (! in_quote)
306 				break;
307 			if (rr > buf && rr[-1] == '\r')	// remove \r if was \r\n
308 				rr--;
309 		}
310 
311 		if (rr-buf+1 > bufsize)
312 			if (!adjbuf(&buf, &bufsize, 1+rr-buf,
313 			    recsize, &rr, "readcsvrec 1"))
314 				FATAL("input record `%.30s...' too long", buf);
315 		*rr++ = c;
316 		if (c == '"')
317 			in_quote = ! in_quote;
318  	}
319 	if (c == '\n' && rr > buf && rr[-1] == '\r') 	// remove \r if was \r\n
320 		rr--;
321 
322 	if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
323 		FATAL("input record `%.30s...' too long", buf);
324 	*rr = 0;
325 	*pbuf = buf;
326 	*pbufsize = bufsize;
327 	DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
328 	return c;
329 }
330 
getargv(int n)331 char *getargv(int n)	/* get ARGV[n] */
332 {
333 	Array *ap;
334 	Cell *x;
335 	char *s, temp[50];
336 	extern Cell *ARGVcell;
337 
338 	ap = (Array *)ARGVcell->sval;
339 	snprintf(temp, sizeof(temp), "%d", n);
340 	if (lookup(temp, ap) == NULL)
341 		return NULL;
342 	x = setsymtab(temp, "", 0.0, STR, ap);
343 	s = getsval(x);
344 	DPRINTF("getargv(%d) returns |%s|\n", n, s);
345 	return s;
346 }
347 
setclvar(char * s)348 void setclvar(char *s)	/* set var=value from s */
349 {
350 	char *e, *p;
351 	Cell *q;
352 	double result;
353 
354 /* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
355 /* I don't understand why it was changed. */
356 
357 	for (p=s; *p != '='; p++)
358 		;
359 	e = p;
360 	*p++ = 0;
361 	p = qstring(p, '\0');
362 	q = setsymtab(s, p, 0.0, STR, symtab);
363 	setsval(q, p);
364 	if (is_number(q->sval, & result)) {
365 		q->fval = result;
366 		q->tval |= NUM;
367 	}
368 	DPRINTF("command line set %s to |%s|\n", s, p);
369 	free(p);
370 	*e = '=';
371 }
372 
373 
fldbld(void)374 void fldbld(void)	/* create fields from current record */
375 {
376 	/* this relies on having fields[] the same length as $0 */
377 	/* the fields are all stored in this one array with \0's */
378 	/* possibly with a final trailing \0 not associated with any field */
379 	char *r, *fr, sep;
380 	Cell *p;
381 	int i, j, n;
382 
383 	if (donefld)
384 		return;
385 	if (!isstr(fldtab[0]))
386 		getsval(fldtab[0]);
387 	r = fldtab[0]->sval;
388 	n = strlen(r);
389 	if (n > fieldssize) {
390 		xfree(fields);
391 		if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
392 			FATAL("out of space for fields in fldbld %d", n);
393 		fieldssize = n;
394 	}
395 	fr = fields;
396 	i = 0;	/* number of fields accumulated here */
397 	if (inputFS == NULL)	/* make sure we have a copy of FS */
398 		savefs();
399 	if (!CSV && strlen(inputFS) > 1) {	/* it's a regular expression */
400 		i = refldbld(r, inputFS);
401 	} else if (!CSV && (sep = *inputFS) == ' ') {	/* default whitespace */
402 		for (i = 0; ; ) {
403 			while (*r == ' ' || *r == '\t' || *r == '\n')
404 				r++;
405 			if (*r == 0)
406 				break;
407 			i++;
408 			if (i > nfields)
409 				growfldtab(i);
410 			if (freeable(fldtab[i]))
411 				xfree(fldtab[i]->sval);
412 			fldtab[i]->sval = fr;
413 			fldtab[i]->tval = FLD | STR | DONTFREE;
414 			do
415 				*fr++ = *r++;
416 			while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
417 			*fr++ = 0;
418 		}
419 		*fr = 0;
420 	} else if (CSV) {	/* CSV processing.  no error handling */
421 		if (*r != 0) {
422 			for (;;) {
423 				i++;
424 				if (i > nfields)
425 					growfldtab(i);
426 				if (freeable(fldtab[i]))
427 					xfree(fldtab[i]->sval);
428 				fldtab[i]->sval = fr;
429 				fldtab[i]->tval = FLD | STR | DONTFREE;
430 				if (*r == '"' ) { /* start of "..." */
431 					for (r++ ; *r != '\0'; ) {
432 						if (*r == '"' && r[1] != '\0' && r[1] == '"') {
433 							r += 2; /* doubled quote */
434 							*fr++ = '"';
435 						} else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
436 							r++; /* skip over closing quote */
437 							break;
438 						} else {
439 							*fr++ = *r++;
440 						}
441 					}
442 					*fr++ = 0;
443 				} else {	/* unquoted field */
444 					while (*r != ',' && *r != '\0')
445 						*fr++ = *r++;
446 					*fr++ = 0;
447 				}
448 				if (*r++ == 0)
449 					break;
450 
451 			}
452 		}
453 		*fr = 0;
454 	} else if ((sep = *inputFS) == 0) {	/* new: FS="" => 1 char/field */
455 		for (i = 0; *r != '\0'; ) {
456 			char buf[10];
457 			i++;
458 			if (i > nfields)
459 				growfldtab(i);
460 			if (freeable(fldtab[i]))
461 				xfree(fldtab[i]->sval);
462 			n = u8_nextlen(r);
463 			for (j = 0; j < n; j++)
464 				buf[j] = *r++;
465 			buf[j] = '\0';
466 			fldtab[i]->sval = tostring(buf);
467 			fldtab[i]->tval = FLD | STR;
468 		}
469 		*fr = 0;
470 	} else if (*r != 0) {	/* if 0, it's a null field */
471 		/* subtle case: if length(FS) == 1 && length(RS > 0)
472 		 * \n is NOT a field separator (cf awk book 61,84).
473 		 * this variable is tested in the inner while loop.
474 		 */
475 		int rtest = '\n';  /* normal case */
476 		if (strlen(*RS) > 0)
477 			rtest = '\0';
478 		for (;;) {
479 			i++;
480 			if (i > nfields)
481 				growfldtab(i);
482 			if (freeable(fldtab[i]))
483 				xfree(fldtab[i]->sval);
484 			fldtab[i]->sval = fr;
485 			fldtab[i]->tval = FLD | STR | DONTFREE;
486 			while (*r != sep && *r != rtest && *r != '\0')	/* \n is always a separator */
487 				*fr++ = *r++;
488 			*fr++ = 0;
489 			if (*r++ == 0)
490 				break;
491 		}
492 		*fr = 0;
493 	}
494 	if (i > nfields)
495 		FATAL("record `%.30s...' has too many fields; can't happen", r);
496 	cleanfld(i+1, lastfld);	/* clean out junk from previous record */
497 	lastfld = i;
498 	donefld = true;
499 	for (j = 1; j <= lastfld; j++) {
500 		double result;
501 
502 		p = fldtab[j];
503 		if(is_number(p->sval, & result)) {
504 			p->fval = result;
505 			p->tval |= NUM;
506 		}
507 	}
508 	setfval(nfloc, (Awkfloat) lastfld);
509 	donerec = true; /* restore */
510 	if (dbg) {
511 		for (j = 0; j <= lastfld; j++) {
512 			p = fldtab[j];
513 			printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
514 		}
515 	}
516 }
517 
cleanfld(int n1,int n2)518 void cleanfld(int n1, int n2)	/* clean out fields n1 .. n2 inclusive */
519 {				/* nvals remain intact */
520 	Cell *p;
521 	int i;
522 
523 	for (i = n1; i <= n2; i++) {
524 		p = fldtab[i];
525 		if (freeable(p))
526 			xfree(p->sval);
527 		p->sval = EMPTY,
528 		p->tval = FLD | STR | DONTFREE;
529 	}
530 }
531 
newfld(int n)532 void newfld(int n)	/* add field n after end of existing lastfld */
533 {
534 	if (n > nfields)
535 		growfldtab(n);
536 	cleanfld(lastfld+1, n);
537 	lastfld = n;
538 	setfval(nfloc, (Awkfloat) n);
539 }
540 
setlastfld(int n)541 void setlastfld(int n)	/* set lastfld cleaning fldtab cells if necessary */
542 {
543 	if (n < 0)
544 		FATAL("cannot set NF to a negative value");
545 	if (n > nfields)
546 		growfldtab(n);
547 
548 	if (lastfld < n)
549 	    cleanfld(lastfld+1, n);
550 	else
551 	    cleanfld(n+1, lastfld);
552 
553 	lastfld = n;
554 }
555 
fieldadr(int n)556 Cell *fieldadr(int n)	/* get nth field */
557 {
558 	if (n < 0)
559 		FATAL("trying to access out of range field %d", n);
560 	if (n > nfields)	/* fields after NF are empty */
561 		growfldtab(n);	/* but does not increase NF */
562 	return(fldtab[n]);
563 }
564 
growfldtab(int n)565 void growfldtab(int n)	/* make new fields up to at least $n */
566 {
567 	int nf = 2 * nfields;
568 	size_t s;
569 
570 	if (n > nf)
571 		nf = n;
572 	s = (nf+1) * (sizeof (struct Cell *));  /* freebsd: how much do we need? */
573 	if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
574 		fldtab = (Cell **) realloc(fldtab, s);
575 	else					/* overflow sizeof int */
576 		xfree(fldtab);	/* make it null */
577 	if (fldtab == NULL)
578 		FATAL("out of space creating %d fields", nf);
579 	makefields(nfields+1, nf);
580 	nfields = nf;
581 }
582 
refldbld(const char * rec,const char * fs)583 int refldbld(const char *rec, const char *fs)	/* build fields from reg expr in FS */
584 {
585 	/* this relies on having fields[] the same length as $0 */
586 	/* the fields are all stored in this one array with \0's */
587 	char *fr;
588 	int i, tempstat, n;
589 	fa *pfa;
590 
591 	n = strlen(rec);
592 	if (n > fieldssize) {
593 		xfree(fields);
594 		if ((fields = (char *) malloc(n+1)) == NULL)
595 			FATAL("out of space for fields in refldbld %d", n);
596 		fieldssize = n;
597 	}
598 	fr = fields;
599 	*fr = '\0';
600 	if (*rec == '\0')
601 		return 0;
602 	pfa = makedfa(fs, 1);
603 	DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
604 	tempstat = pfa->initstat;
605 	for (i = 1; ; i++) {
606 		const size_t fss_rem = fields + fieldssize + 1 - fr;
607 		if (i > nfields)
608 			growfldtab(i);
609 		if (freeable(fldtab[i]))
610 			xfree(fldtab[i]->sval);
611 		fldtab[i]->tval = FLD | STR | DONTFREE;
612 		fldtab[i]->sval = fr;
613 		DPRINTF("refldbld: i=%d\n", i);
614 		if (nematch(pfa, rec)) {
615 			const size_t reclen = patbeg - rec;
616 			pfa->initstat = 2;	/* horrible coupling to b.c */
617 			DPRINTF("match %s (%d chars)\n", patbeg, patlen);
618 			if (reclen >= fss_rem)
619 				FATAL("out of space for fields in refldbld");
620 			memcpy(fr, rec, reclen);
621 			fr += reclen;
622 			*fr++ = '\0';
623 			rec = patbeg + patlen;
624 		} else {
625 			DPRINTF("no match %s\n", rec);
626 			if (strlcpy(fr, rec, fss_rem) >= fss_rem)
627 				FATAL("out of space for fields in refldbld");
628 			pfa->initstat = tempstat;
629 			break;
630 		}
631 	}
632 	return i;
633 }
634 
recbld(void)635 void recbld(void)	/* create $0 from $1..$NF if necessary */
636 {
637 	int i;
638 	char *r, *p;
639 	char *sep = getsval(ofsloc);
640 
641 	if (donerec)
642 		return;
643 	r = record;
644 	for (i = 1; i <= *NF; i++) {
645 		p = getsval(fldtab[i]);
646 		if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
647 			FATAL("created $0 `%.30s...' too long", record);
648 		while ((*r = *p++) != 0)
649 			r++;
650 		if (i < *NF) {
651 			if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
652 				FATAL("created $0 `%.30s...' too long", record);
653 			for (p = sep; (*r = *p++) != 0; )
654 				r++;
655 		}
656 	}
657 	if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
658 		FATAL("built giant record `%.30s...'", record);
659 	*r = '\0';
660 	DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
661 
662 	if (freeable(fldtab[0]))
663 		xfree(fldtab[0]->sval);
664 	fldtab[0]->tval = REC | STR | DONTFREE;
665 	fldtab[0]->sval = record;
666 
667 	DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
668 	DPRINTF("recbld = |%s|\n", record);
669 	donerec = true;
670 }
671 
672 int	errorflag	= 0;
673 
yyerror(const char * s)674 void yyerror(const char *s)
675 {
676 	SYNTAX("%s", s);
677 }
678 
SYNTAX(const char * fmt,...)679 void SYNTAX(const char *fmt, ...)
680 {
681 	extern char *cmdname, *curfname;
682 	static int been_here = 0;
683 	va_list varg;
684 
685 	if (been_here++ > 2)
686 		return;
687 	fprintf(stderr, "%s: ", cmdname);
688 	va_start(varg, fmt);
689 	vfprintf(stderr, fmt, varg);
690 	va_end(varg);
691 	fprintf(stderr, " at source line %d", lineno);
692 	if (curfname != NULL)
693 		fprintf(stderr, " in function %s", curfname);
694 	if (compile_time == COMPILING && cursource() != NULL)
695 		fprintf(stderr, " source file %s", cursource());
696 	fprintf(stderr, "\n");
697 	errorflag = 2;
698 	eprint();
699 }
700 
701 extern int bracecnt, brackcnt, parencnt;
702 
bracecheck(void)703 void bracecheck(void)
704 {
705 	int c;
706 	static int beenhere = 0;
707 
708 	if (beenhere++)
709 		return;
710 	while ((c = input()) != EOF && c != '\0')
711 		bclass(c);
712 	bcheck2(bracecnt, '{', '}');
713 	bcheck2(brackcnt, '[', ']');
714 	bcheck2(parencnt, '(', ')');
715 }
716 
bcheck2(int n,int c1,int c2)717 void bcheck2(int n, int c1, int c2)
718 {
719 	if (n == 1)
720 		fprintf(stderr, "\tmissing %c\n", c2);
721 	else if (n > 1)
722 		fprintf(stderr, "\t%d missing %c's\n", n, c2);
723 	else if (n == -1)
724 		fprintf(stderr, "\textra %c\n", c2);
725 	else if (n < -1)
726 		fprintf(stderr, "\t%d extra %c's\n", -n, c2);
727 }
728 
FATAL(const char * fmt,...)729 void FATAL(const char *fmt, ...)
730 {
731 	extern char *cmdname;
732 	va_list varg;
733 
734 	fflush(stdout);
735 	fprintf(stderr, "%s: ", cmdname);
736 	va_start(varg, fmt);
737 	vfprintf(stderr, fmt, varg);
738 	va_end(varg);
739 	error();
740 	if (dbg > 1)		/* core dump if serious debugging on */
741 		abort();
742 	exit(2);
743 }
744 
WARNING(const char * fmt,...)745 void WARNING(const char *fmt, ...)
746 {
747 	extern char *cmdname;
748 	va_list varg;
749 
750 	fflush(stdout);
751 	fprintf(stderr, "%s: ", cmdname);
752 	va_start(varg, fmt);
753 	vfprintf(stderr, fmt, varg);
754 	va_end(varg);
755 	error();
756 }
757 
error(void)758 void error(void)
759 {
760 	extern Node *curnode;
761 
762 	fprintf(stderr, "\n");
763 	if (compile_time != ERROR_PRINTING) {
764 		if (NR && *NR > 0) {
765 			fprintf(stderr, " input record number %d", (int) (*FNR));
766 			if (strcmp(*FILENAME, "-") != 0)
767 				fprintf(stderr, ", file %s", *FILENAME);
768 			fprintf(stderr, "\n");
769 		}
770 		if (curnode)
771 			fprintf(stderr, " source line number %d", curnode->lineno);
772 		else if (lineno)
773 			fprintf(stderr, " source line number %d", lineno);
774 		if (compile_time == COMPILING && cursource() != NULL)
775 			fprintf(stderr, " source file %s", cursource());
776 		fprintf(stderr, "\n");
777 		eprint();
778 	}
779 }
780 
eprint(void)781 void eprint(void)	/* try to print context around error */
782 {
783 	char *p, *q;
784 	int c;
785 	static int been_here = 0;
786 	extern char ebuf[], *ep;
787 
788 	if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
789 		return;
790 	p = ep - 1;
791 	if (p > ebuf && *p == '\n')
792 		p--;
793 	for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
794 		;
795 	while (*p == '\n')
796 		p++;
797 	fprintf(stderr, " context is\n\t");
798 	for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
799 		;
800 	for ( ; p < q; p++)
801 		if (*p)
802 			putc(*p, stderr);
803 	fprintf(stderr, " >>> ");
804 	for ( ; p < ep; p++)
805 		if (*p)
806 			putc(*p, stderr);
807 	fprintf(stderr, " <<< ");
808 	if (*ep)
809 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
810 			putc(c, stderr);
811 			bclass(c);
812 		}
813 	putc('\n', stderr);
814 	ep = ebuf;
815 }
816 
bclass(int c)817 void bclass(int c)
818 {
819 	switch (c) {
820 	case '{': bracecnt++; break;
821 	case '}': bracecnt--; break;
822 	case '[': brackcnt++; break;
823 	case ']': brackcnt--; break;
824 	case '(': parencnt++; break;
825 	case ')': parencnt--; break;
826 	}
827 }
828 
errcheck(double x,const char * s)829 double errcheck(double x, const char *s)
830 {
831 
832 	if (errno == EDOM) {
833 		errno = 0;
834 		WARNING("%s argument out of domain", s);
835 		x = 1;
836 	} else if (errno == ERANGE) {
837 		errno = 0;
838 		WARNING("%s result out of range", s);
839 		x = 1;
840 	}
841 	return x;
842 }
843 
isclvar(const char * s)844 int isclvar(const char *s)	/* is s of form var=something ? */
845 {
846 	const char *os = s;
847 
848 	if (!isalpha((uschar)*s) && *s != '_')
849 		return 0;
850 	for ( ; *s; s++)
851 		if (!(isalnum((uschar)*s) || *s == '_'))
852 			break;
853 	return *s == '=' && s > os;
854 }
855 
856 /* strtod is supposed to be a proper test of what's a valid number */
857 /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
858 /* wrong: violates 4.10.1.4 of ansi C standard */
859 
860 /* well, not quite. As of C99, hex floating point is allowed. so this is
861  * a bit of a mess. We work around the mess by checking for a hexadecimal
862  * value and disallowing it. Similarly, we now follow gawk and allow only
863  * +nan, -nan, +inf, and -inf for NaN and infinity values.
864  */
865 
866 /*
867  * This routine now has a more complicated interface, the main point
868  * being to avoid the double conversion of a string to double, and
869  * also to convey out, if requested, the information that the numeric
870  * value was a leading string or is all of the string. The latter bit
871  * is used in getfval().
872  */
873 
is_valid_number(const char * s,bool trailing_stuff_ok,bool * no_trailing,double * result)874 bool is_valid_number(const char *s, bool trailing_stuff_ok,
875 			bool *no_trailing, double *result)
876 {
877 	double r;
878 	char *ep;
879 	bool retval = false;
880 	bool is_nan = false;
881 	bool is_inf = false;
882 
883 	if (no_trailing)
884 		*no_trailing = false;
885 
886 	while (isspace((uschar)*s))
887 		s++;
888 
889 	/* no hex floating point, sorry */
890 	if (s[0] == '0' && tolower((uschar)s[1]) == 'x')
891 		return false;
892 
893 	/* allow +nan, -nan, +inf, -inf, any other letter, no */
894 	if (s[0] == '+' || s[0] == '-') {
895 		is_nan = (strncasecmp(s+1, "nan", 3) == 0);
896 		is_inf = (strncasecmp(s+1, "inf", 3) == 0);
897 		if ((is_nan || is_inf)
898 		    && (isspace((uschar)s[4]) || s[4] == '\0'))
899 			goto convert;
900 		else if (! isdigit((uschar)s[1]) && s[1] != '.')
901 			return false;
902 	}
903 	else if (! isdigit((uschar)s[0]) && s[0] != '.')
904 		return false;
905 
906 convert:
907 	errno = 0;
908 	r = strtod(s, &ep);
909 	if (ep == s || errno == ERANGE)
910 		return false;
911 
912 	if (isnan(r) && s[0] == '-' && signbit(r) == 0)
913 		r = -r;
914 
915 	if (result != NULL)
916 		*result = r;
917 
918 	/*
919 	 * check for trailing stuff
920 	 */
921 	while (isspace((uschar)*ep))
922 		ep++;
923 
924 	if (no_trailing != NULL)
925 		*no_trailing = (*ep == '\0');
926 
927 	/* return true if found the end, or trailing stuff is allowed */
928 	retval = *ep == '\0' || trailing_stuff_ok;
929 
930 	return retval;
931 }
932