xref: /dragonfly/contrib/awk/lib.c (revision ed569bc2)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define DEBUG
26 #include <stdio.h>
27 #include <string.h>
28 #include <strings.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <stdlib.h>
32 #include <stdarg.h>
33 #include <limits.h>
34 #include <math.h>
35 #include "awk.h"
36 
37 extern int u8_nextlen(const char *s);
38 
39 char	EMPTY[] = { '\0' };
40 FILE	*infile	= NULL;
41 bool	innew;		/* true = infile has not been read by readrec */
42 char	*file	= EMPTY;
43 char	*record;
44 int	recsize	= RECSIZE;
45 char	*fields;
46 int	fieldssize = RECSIZE;
47 
48 Cell	**fldtab;	/* pointers to Cells */
49 static size_t	len_inputFS = 0;
50 static char	*inputFS = NULL; /* FS at time of input, for field splitting */
51 
52 #define	MAXFLD	2
53 int	nfields	= MAXFLD;	/* last allocated slot for $i */
54 
55 bool	donefld;	/* true = implies rec broken into fields */
56 bool	donerec;	/* true = record is valid (no flds have changed) */
57 
58 int	lastfld	= 0;	/* last used field */
59 int	argno	= 1;	/* current input argument number */
60 extern	Awkfloat *ARGC;
61 
62 static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL };
63 static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL };
64 
recinit(unsigned int n)65 void recinit(unsigned int n)
66 {
67 	if ( (record = (char *) malloc(n)) == NULL
68 	  || (fields = (char *) malloc(n+1)) == NULL
69 	  || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL
70 	  || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL)
71 		FATAL("out of space for $0 and fields");
72 	*record = '\0';
73 	*fldtab[0] = dollar0;
74 	fldtab[0]->sval = record;
75 	fldtab[0]->nval = tostring("0");
76 	makefields(1, nfields);
77 }
78 
makefields(int n1,int n2)79 void makefields(int n1, int n2)		/* create $n1..$n2 inclusive */
80 {
81 	char temp[50];
82 	int i;
83 
84 	for (i = n1; i <= n2; i++) {
85 		fldtab[i] = (Cell *) malloc(sizeof(**fldtab));
86 		if (fldtab[i] == NULL)
87 			FATAL("out of space in makefields %d", i);
88 		*fldtab[i] = dollar1;
89 		snprintf(temp, sizeof(temp), "%d", i);
90 		fldtab[i]->nval = tostring(temp);
91 	}
92 }
93 
initgetrec(void)94 void initgetrec(void)
95 {
96 	int i;
97 	char *p;
98 
99 	for (i = 1; i < *ARGC; i++) {
100 		p = getargv(i); /* find 1st real filename */
101 		if (p == NULL || *p == '\0') {  /* deleted or zapped */
102 			argno++;
103 			continue;
104 		}
105 		if (!isclvar(p)) {
106 			setsval(lookup("FILENAME", symtab), p);
107 			return;
108 		}
109 		setclvar(p);	/* a commandline assignment before filename */
110 		argno++;
111 	}
112 	infile = stdin;		/* no filenames, so use stdin */
113 	innew = true;
114 }
115 
116 /*
117  * POSIX specifies that fields are supposed to be evaluated as if they were
118  * split using the value of FS at the time that the record's value ($0) was
119  * read.
120  *
121  * Since field-splitting is done lazily, we save the current value of FS
122  * whenever a new record is read in (implicitly or via getline), or when
123  * a new value is assigned to $0.
124  */
savefs(void)125 void savefs(void)
126 {
127 	size_t len;
128 	if ((len = strlen(getsval(fsloc))) < len_inputFS) {
129 		strcpy(inputFS, *FS);	/* for subsequent field splitting */
130 		return;
131 	}
132 
133 	len_inputFS = len + 1;
134 	inputFS = (char *) realloc(inputFS, len_inputFS);
135 	if (inputFS == NULL)
136 		FATAL("field separator %.10s... is too long", *FS);
137 	memcpy(inputFS, *FS, len_inputFS);
138 }
139 
140 static bool firsttime = true;
141 
getrec(char ** pbuf,int * pbufsize,bool isrecord)142 int getrec(char **pbuf, int *pbufsize, bool isrecord)	/* get next input record */
143 {			/* note: cares whether buf == record */
144 	int c;
145 	char *buf = *pbuf;
146 	uschar saveb0;
147 	int bufsize = *pbufsize, savebufsize = bufsize;
148 
149 	if (firsttime) {
150 		firsttime = false;
151 		initgetrec();
152 	}
153 	DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
154 		*RS, *FS, *ARGC, *FILENAME);
155 	saveb0 = buf[0];
156 	buf[0] = 0;
157 	while (argno < *ARGC || infile == stdin) {
158 		DPRINTF("argno=%d, file=|%s|\n", argno, file);
159 		if (infile == NULL) {	/* have to open a new file */
160 			file = getargv(argno);
161 			if (file == NULL || *file == '\0') {	/* deleted or zapped */
162 				argno++;
163 				continue;
164 			}
165 			if (isclvar(file)) {	/* a var=value arg */
166 				setclvar(file);
167 				argno++;
168 				continue;
169 			}
170 			*FILENAME = file;
171 			DPRINTF("opening file %s\n", file);
172 			if (*file == '-' && *(file+1) == '\0')
173 				infile = stdin;
174 			else if ((infile = fopen(file, "r")) == NULL)
175 				FATAL("can't open file %s", file);
176 			innew = true;
177 			setfval(fnrloc, 0.0);
178 		}
179 		c = readrec(&buf, &bufsize, infile, innew);
180 		if (innew)
181 			innew = false;
182 		if (c != 0 || buf[0] != '\0') {	/* normal record */
183 			if (isrecord) {
184 				double result;
185 
186 				if (freeable(fldtab[0]))
187 					xfree(fldtab[0]->sval);
188 				fldtab[0]->sval = buf;	/* buf == record */
189 				fldtab[0]->tval = REC | STR | DONTFREE;
190 				if (is_number(fldtab[0]->sval, & result)) {
191 					fldtab[0]->fval = result;
192 					fldtab[0]->tval |= NUM;
193 				}
194 				donefld = false;
195 				donerec = true;
196 				savefs();
197 			}
198 			setfval(nrloc, nrloc->fval+1);
199 			setfval(fnrloc, fnrloc->fval+1);
200 			*pbuf = buf;
201 			*pbufsize = bufsize;
202 			return 1;
203 		}
204 		/* EOF arrived on this file; set up next */
205 		if (infile != stdin)
206 			fclose(infile);
207 		infile = NULL;
208 		argno++;
209 	}
210 	buf[0] = saveb0;
211 	*pbuf = buf;
212 	*pbufsize = savebufsize;
213 	return 0;	/* true end of file */
214 }
215 
nextfile(void)216 void nextfile(void)
217 {
218 	if (infile != NULL && infile != stdin)
219 		fclose(infile);
220 	infile = NULL;
221 	argno++;
222 }
223 
224 extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
225 
readrec(char ** pbuf,int * pbufsize,FILE * inf,bool newflag)226 int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag)	/* read one record into buf */
227 {
228 	int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
229 	char *rr = *pbuf, *buf = *pbuf;
230 	int bufsize = *pbufsize;
231 	char *rs = getsval(rsloc);
232 
233 	if (CSV) {
234 		c = readcsvrec(pbuf, pbufsize, inf, newflag);
235 		isrec = (c == EOF && rr == buf) ? false : true;
236 	} else if (*rs && rs[1]) {
237 		bool found;
238 
239 		memset(buf, 0, bufsize);
240 		fa *pfa = makedfa(rs, 1);
241 		if (newflag)
242 			found = fnematch(pfa, inf, &buf, &bufsize, recsize);
243 		else {
244 			int tempstat = pfa->initstat;
245 			pfa->initstat = 2;
246 			found = fnematch(pfa, inf, &buf, &bufsize, recsize);
247 			pfa->initstat = tempstat;
248 		}
249 		if (found)
250 			setptr(patbeg, '\0');
251 		isrec = (found == 0 && *buf == '\0') ? false : true;
252 
253 	} else {
254 		if ((sep = *rs) == 0) {
255 			sep = '\n';
256 			while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
257 				;
258 			if (c != EOF)
259 				ungetc(c, inf);
260 		}
261 		for (rr = buf; ; ) {
262 			for (; (c=getc(inf)) != sep && c != EOF; ) {
263 				if (rr-buf+1 > bufsize)
264 					if (!adjbuf(&buf, &bufsize, 1+rr-buf,
265 					    recsize, &rr, "readrec 1"))
266 						FATAL("input record `%.30s...' too long", buf);
267 				*rr++ = c;
268 			}
269 			if (*rs == sep || c == EOF)
270 				break;
271 			if ((c = getc(inf)) == '\n' || c == EOF)	/* 2 in a row */
272 				break;
273 			if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr,
274 			    "readrec 2"))
275 				FATAL("input record `%.30s...' too long", buf);
276 			*rr++ = '\n';
277 			*rr++ = c;
278 		}
279 		if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3"))
280 			FATAL("input record `%.30s...' too long", buf);
281 		*rr = 0;
282 		isrec = (c == EOF && rr == buf) ? false : true;
283 	}
284 	*pbuf = buf;
285 	*pbufsize = bufsize;
286 	DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec);
287 	return isrec;
288 }
289 
290 
291 /*******************
292  * loose ends here:
293  *   \r\n should become \n
294  *   what about bare \r?  Excel uses that for embedded newlines
295  *   can't have "" in unquoted fields, according to RFC 4180
296 */
297 
298 
readcsvrec(char ** pbuf,int * pbufsize,FILE * inf,bool newflag)299 int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
300 {			/* so read a complete record that might be multiple lines */
301 	int sep, c;
302 	char *rr = *pbuf, *buf = *pbuf;
303 	int bufsize = *pbufsize;
304 	bool in_quote = false;
305 
306 	sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
307 	rr = buf;
308 	while ((c = getc(inf)) != EOF) {
309 		if (c == sep) {
310 			if (! in_quote)
311 				break;
312 			if (rr > buf && rr[-1] == '\r')	// remove \r if was \r\n
313 				rr--;
314 		}
315 
316 		if (rr-buf+1 > bufsize)
317 			if (!adjbuf(&buf, &bufsize, 1+rr-buf,
318 			    recsize, &rr, "readcsvrec 1"))
319 				FATAL("input record `%.30s...' too long", buf);
320 		*rr++ = c;
321 		if (c == '"')
322 			in_quote = ! in_quote;
323  	}
324 	if (c == '\n' && rr > buf && rr[-1] == '\r') 	// remove \r if was \r\n
325 		rr--;
326 
327 	if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
328 		FATAL("input record `%.30s...' too long", buf);
329 	*rr = 0;
330 	*pbuf = buf;
331 	*pbufsize = bufsize;
332 	DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
333 	return c;
334 }
335 
getargv(int n)336 char *getargv(int n)	/* get ARGV[n] */
337 {
338 	Cell *x;
339 	char *s, temp[50];
340 	extern Array *ARGVtab;
341 
342 	snprintf(temp, sizeof(temp), "%d", n);
343 	if (lookup(temp, ARGVtab) == NULL)
344 		return NULL;
345 	x = setsymtab(temp, "", 0.0, STR, ARGVtab);
346 	s = getsval(x);
347 	DPRINTF("getargv(%d) returns |%s|\n", n, s);
348 	return s;
349 }
350 
setclvar(char * s)351 void setclvar(char *s)	/* set var=value from s */
352 {
353 	char *e, *p;
354 	Cell *q;
355 	double result;
356 
357 /* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
358 /* I don't understand why it was changed. */
359 
360 	for (p=s; *p != '='; p++)
361 		;
362 	e = p;
363 	*p++ = 0;
364 	p = qstring(p, '\0');
365 	q = setsymtab(s, p, 0.0, STR, symtab);
366 	setsval(q, p);
367 	if (is_number(q->sval, & result)) {
368 		q->fval = result;
369 		q->tval |= NUM;
370 	}
371 	DPRINTF("command line set %s to |%s|\n", s, p);
372 	free(p);
373 	*e = '=';
374 }
375 
376 
fldbld(void)377 void fldbld(void)	/* create fields from current record */
378 {
379 	/* this relies on having fields[] the same length as $0 */
380 	/* the fields are all stored in this one array with \0's */
381 	/* possibly with a final trailing \0 not associated with any field */
382 	char *r, *fr, sep;
383 	Cell *p;
384 	int i, j, n;
385 
386 	if (donefld)
387 		return;
388 	if (!isstr(fldtab[0]))
389 		getsval(fldtab[0]);
390 	r = fldtab[0]->sval;
391 	n = strlen(r);
392 	if (n > fieldssize) {
393 		xfree(fields);
394 		if ((fields = (char *) malloc(n+2)) == NULL) /* possibly 2 final \0s */
395 			FATAL("out of space for fields in fldbld %d", n);
396 		fieldssize = n;
397 	}
398 	fr = fields;
399 	i = 0;	/* number of fields accumulated here */
400 	if (inputFS == NULL)	/* make sure we have a copy of FS */
401 		savefs();
402 	if (!CSV && strlen(inputFS) > 1) {	/* it's a regular expression */
403 		i = refldbld(r, inputFS);
404 	} else if (!CSV && (sep = *inputFS) == ' ') {	/* default whitespace */
405 		for (i = 0; ; ) {
406 			while (*r == ' ' || *r == '\t' || *r == '\n')
407 				r++;
408 			if (*r == 0)
409 				break;
410 			i++;
411 			if (i > nfields)
412 				growfldtab(i);
413 			if (freeable(fldtab[i]))
414 				xfree(fldtab[i]->sval);
415 			fldtab[i]->sval = fr;
416 			fldtab[i]->tval = FLD | STR | DONTFREE;
417 			do
418 				*fr++ = *r++;
419 			while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
420 			*fr++ = 0;
421 		}
422 		*fr = 0;
423 	} else if (CSV) {	/* CSV processing.  no error handling */
424 		if (*r != 0) {
425 			for (;;) {
426 				i++;
427 				if (i > nfields)
428 					growfldtab(i);
429 				if (freeable(fldtab[i]))
430 					xfree(fldtab[i]->sval);
431 				fldtab[i]->sval = fr;
432 				fldtab[i]->tval = FLD | STR | DONTFREE;
433 				if (*r == '"' ) { /* start of "..." */
434 					for (r++ ; *r != '\0'; ) {
435 						if (*r == '"' && r[1] != '\0' && r[1] == '"') {
436 							r += 2; /* doubled quote */
437 							*fr++ = '"';
438 						} else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
439 							r++; /* skip over closing quote */
440 							break;
441 						} else {
442 							*fr++ = *r++;
443 						}
444 					}
445 					*fr++ = 0;
446 				} else {	/* unquoted field */
447 					while (*r != ',' && *r != '\0')
448 						*fr++ = *r++;
449 					*fr++ = 0;
450 				}
451 				if (*r++ == 0)
452 					break;
453 
454 			}
455 		}
456 		*fr = 0;
457 	} else if ((sep = *inputFS) == 0) {	/* new: FS="" => 1 char/field */
458 		for (i = 0; *r != '\0'; ) {
459 			char buf[10];
460 			i++;
461 			if (i > nfields)
462 				growfldtab(i);
463 			if (freeable(fldtab[i]))
464 				xfree(fldtab[i]->sval);
465 			n = u8_nextlen(r);
466 			for (j = 0; j < n; j++)
467 				buf[j] = *r++;
468 			buf[j] = '\0';
469 			fldtab[i]->sval = tostring(buf);
470 			fldtab[i]->tval = FLD | STR;
471 		}
472 		*fr = 0;
473 	} else if (*r != 0) {	/* if 0, it's a null field */
474 		/* subtle case: if length(FS) == 1 && length(RS > 0)
475 		 * \n is NOT a field separator (cf awk book 61,84).
476 		 * this variable is tested in the inner while loop.
477 		 */
478 		int rtest = '\n';  /* normal case */
479 		if (strlen(*RS) > 0)
480 			rtest = '\0';
481 		for (;;) {
482 			i++;
483 			if (i > nfields)
484 				growfldtab(i);
485 			if (freeable(fldtab[i]))
486 				xfree(fldtab[i]->sval);
487 			fldtab[i]->sval = fr;
488 			fldtab[i]->tval = FLD | STR | DONTFREE;
489 			while (*r != sep && *r != rtest && *r != '\0')	/* \n is always a separator */
490 				*fr++ = *r++;
491 			*fr++ = 0;
492 			if (*r++ == 0)
493 				break;
494 		}
495 		*fr = 0;
496 	}
497 	if (i > nfields)
498 		FATAL("record `%.30s...' has too many fields; can't happen", r);
499 	cleanfld(i+1, lastfld);	/* clean out junk from previous record */
500 	lastfld = i;
501 	donefld = true;
502 	for (j = 1; j <= lastfld; j++) {
503 		double result;
504 
505 		p = fldtab[j];
506 		if(is_number(p->sval, & result)) {
507 			p->fval = result;
508 			p->tval |= NUM;
509 		}
510 	}
511 	setfval(nfloc, (Awkfloat) lastfld);
512 	donerec = true; /* restore */
513 	if (dbg) {
514 		for (j = 0; j <= lastfld; j++) {
515 			p = fldtab[j];
516 			printf("field %d (%s): |%s|\n", j, p->nval, p->sval);
517 		}
518 	}
519 }
520 
cleanfld(int n1,int n2)521 void cleanfld(int n1, int n2)	/* clean out fields n1 .. n2 inclusive */
522 {				/* nvals remain intact */
523 	Cell *p;
524 	int i;
525 
526 	for (i = n1; i <= n2; i++) {
527 		p = fldtab[i];
528 		if (freeable(p))
529 			xfree(p->sval);
530 		p->sval = EMPTY,
531 		p->tval = FLD | STR | DONTFREE;
532 	}
533 }
534 
newfld(int n)535 void newfld(int n)	/* add field n after end of existing lastfld */
536 {
537 	if (n > nfields)
538 		growfldtab(n);
539 	cleanfld(lastfld+1, n);
540 	lastfld = n;
541 	setfval(nfloc, (Awkfloat) n);
542 }
543 
setlastfld(int n)544 void setlastfld(int n)	/* set lastfld cleaning fldtab cells if necessary */
545 {
546 	if (n < 0)
547 		FATAL("cannot set NF to a negative value");
548 	if (n > nfields)
549 		growfldtab(n);
550 
551 	if (lastfld < n)
552 	    cleanfld(lastfld+1, n);
553 	else
554 	    cleanfld(n+1, lastfld);
555 
556 	lastfld = n;
557 }
558 
fieldadr(int n)559 Cell *fieldadr(int n)	/* get nth field */
560 {
561 	if (n < 0)
562 		FATAL("trying to access out of range field %d", n);
563 	if (n > nfields)	/* fields after NF are empty */
564 		growfldtab(n);	/* but does not increase NF */
565 	return(fldtab[n]);
566 }
567 
growfldtab(int n)568 void growfldtab(int n)	/* make new fields up to at least $n */
569 {
570 	int nf = 2 * nfields;
571 	size_t s;
572 
573 	if (n > nf)
574 		nf = n;
575 	s = (nf+1) * (sizeof (struct Cell *));  /* freebsd: how much do we need? */
576 	if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */
577 		fldtab = (Cell **) realloc(fldtab, s);
578 	else					/* overflow sizeof int */
579 		xfree(fldtab);	/* make it null */
580 	if (fldtab == NULL)
581 		FATAL("out of space creating %d fields", nf);
582 	makefields(nfields+1, nf);
583 	nfields = nf;
584 }
585 
refldbld(const char * rec,const char * fs)586 int refldbld(const char *rec, const char *fs)	/* build fields from reg expr in FS */
587 {
588 	/* this relies on having fields[] the same length as $0 */
589 	/* the fields are all stored in this one array with \0's */
590 	char *fr;
591 	int i, tempstat, n;
592 	fa *pfa;
593 
594 	n = strlen(rec);
595 	if (n > fieldssize) {
596 		xfree(fields);
597 		if ((fields = (char *) malloc(n+1)) == NULL)
598 			FATAL("out of space for fields in refldbld %d", n);
599 		fieldssize = n;
600 	}
601 	fr = fields;
602 	*fr = '\0';
603 	if (*rec == '\0')
604 		return 0;
605 	pfa = makedfa(fs, 1);
606 	DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs);
607 	tempstat = pfa->initstat;
608 	for (i = 1; ; i++) {
609 		if (i > nfields)
610 			growfldtab(i);
611 		if (freeable(fldtab[i]))
612 			xfree(fldtab[i]->sval);
613 		fldtab[i]->tval = FLD | STR | DONTFREE;
614 		fldtab[i]->sval = fr;
615 		DPRINTF("refldbld: i=%d\n", i);
616 		if (nematch(pfa, rec)) {
617 			pfa->initstat = 2;	/* horrible coupling to b.c */
618 			DPRINTF("match %s (%d chars)\n", patbeg, patlen);
619 			strncpy(fr, rec, patbeg-rec);
620 			fr += patbeg - rec + 1;
621 			*(fr-1) = '\0';
622 			rec = patbeg + patlen;
623 		} else {
624 			DPRINTF("no match %s\n", rec);
625 			strcpy(fr, rec);
626 			pfa->initstat = tempstat;
627 			break;
628 		}
629 	}
630 	return i;
631 }
632 
recbld(void)633 void recbld(void)	/* create $0 from $1..$NF if necessary */
634 {
635 	int i;
636 	char *r, *p;
637 	char *sep = getsval(ofsloc);
638 
639 	if (donerec)
640 		return;
641 	r = record;
642 	for (i = 1; i <= *NF; i++) {
643 		p = getsval(fldtab[i]);
644 		if (!adjbuf(&record, &recsize, 1+strlen(p)+r-record, recsize, &r, "recbld 1"))
645 			FATAL("created $0 `%.30s...' too long", record);
646 		while ((*r = *p++) != 0)
647 			r++;
648 		if (i < *NF) {
649 			if (!adjbuf(&record, &recsize, 2+strlen(sep)+r-record, recsize, &r, "recbld 2"))
650 				FATAL("created $0 `%.30s...' too long", record);
651 			for (p = sep; (*r = *p++) != 0; )
652 				r++;
653 		}
654 	}
655 	if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3"))
656 		FATAL("built giant record `%.30s...'", record);
657 	*r = '\0';
658 	DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
659 
660 	if (freeable(fldtab[0]))
661 		xfree(fldtab[0]->sval);
662 	fldtab[0]->tval = REC | STR | DONTFREE;
663 	fldtab[0]->sval = record;
664 
665 	DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]);
666 	DPRINTF("recbld = |%s|\n", record);
667 	donerec = true;
668 }
669 
670 int	errorflag	= 0;
671 
yyerror(const char * s)672 void yyerror(const char *s)
673 {
674 	SYNTAX("%s", s);
675 }
676 
SYNTAX(const char * fmt,...)677 void SYNTAX(const char *fmt, ...)
678 {
679 	extern char *cmdname, *curfname;
680 	static int been_here = 0;
681 	va_list varg;
682 
683 	if (been_here++ > 2)
684 		return;
685 	fprintf(stderr, "%s: ", cmdname);
686 	va_start(varg, fmt);
687 	vfprintf(stderr, fmt, varg);
688 	va_end(varg);
689 	fprintf(stderr, " at source line %d", lineno);
690 	if (curfname != NULL)
691 		fprintf(stderr, " in function %s", curfname);
692 	if (compile_time == COMPILING && cursource() != NULL)
693 		fprintf(stderr, " source file %s", cursource());
694 	fprintf(stderr, "\n");
695 	errorflag = 2;
696 	eprint();
697 }
698 
699 extern int bracecnt, brackcnt, parencnt;
700 
bracecheck(void)701 void bracecheck(void)
702 {
703 	int c;
704 	static int beenhere = 0;
705 
706 	if (beenhere++)
707 		return;
708 	while ((c = input()) != EOF && c != '\0')
709 		bclass(c);
710 	bcheck2(bracecnt, '{', '}');
711 	bcheck2(brackcnt, '[', ']');
712 	bcheck2(parencnt, '(', ')');
713 }
714 
bcheck2(int n,int c1,int c2)715 void bcheck2(int n, int c1, int c2)
716 {
717 	if (n == 1)
718 		fprintf(stderr, "\tmissing %c\n", c2);
719 	else if (n > 1)
720 		fprintf(stderr, "\t%d missing %c's\n", n, c2);
721 	else if (n == -1)
722 		fprintf(stderr, "\textra %c\n", c2);
723 	else if (n < -1)
724 		fprintf(stderr, "\t%d extra %c's\n", -n, c2);
725 }
726 
FATAL(const char * fmt,...)727 void FATAL(const char *fmt, ...)
728 {
729 	extern char *cmdname;
730 	va_list varg;
731 
732 	fflush(stdout);
733 	fprintf(stderr, "%s: ", cmdname);
734 	va_start(varg, fmt);
735 	vfprintf(stderr, fmt, varg);
736 	va_end(varg);
737 	error();
738 	if (dbg > 1)		/* core dump if serious debugging on */
739 		abort();
740 	exit(2);
741 }
742 
WARNING(const char * fmt,...)743 void WARNING(const char *fmt, ...)
744 {
745 	extern char *cmdname;
746 	va_list varg;
747 
748 	fflush(stdout);
749 	fprintf(stderr, "%s: ", cmdname);
750 	va_start(varg, fmt);
751 	vfprintf(stderr, fmt, varg);
752 	va_end(varg);
753 	error();
754 }
755 
error()756 void error()
757 {
758 	extern Node *curnode;
759 
760 	fprintf(stderr, "\n");
761 	if (compile_time != ERROR_PRINTING) {
762 		if (NR && *NR > 0) {
763 			fprintf(stderr, " input record number %d", (int) (*FNR));
764 			if (strcmp(*FILENAME, "-") != 0)
765 				fprintf(stderr, ", file %s", *FILENAME);
766 			fprintf(stderr, "\n");
767 		}
768 		if (curnode)
769 			fprintf(stderr, " source line number %d", curnode->lineno);
770 		else if (lineno)
771 			fprintf(stderr, " source line number %d", lineno);
772 		if (compile_time == COMPILING && cursource() != NULL)
773 			fprintf(stderr, " source file %s", cursource());
774 		fprintf(stderr, "\n");
775 		eprint();
776 	}
777 }
778 
eprint(void)779 void eprint(void)	/* try to print context around error */
780 {
781 	char *p, *q;
782 	int c;
783 	static int been_here = 0;
784 	extern char ebuf[], *ep;
785 
786 	if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep)
787 		return;
788 	if (ebuf == ep)
789 		return;
790 	p = ep - 1;
791 	if (p > ebuf && *p == '\n')
792 		p--;
793 	for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
794 		;
795 	while (*p == '\n')
796 		p++;
797 	fprintf(stderr, " context is\n\t");
798 	for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
799 		;
800 	for ( ; p < q; p++)
801 		if (*p)
802 			putc(*p, stderr);
803 	fprintf(stderr, " >>> ");
804 	for ( ; p < ep; p++)
805 		if (*p)
806 			putc(*p, stderr);
807 	fprintf(stderr, " <<< ");
808 	if (*ep)
809 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
810 			putc(c, stderr);
811 			bclass(c);
812 		}
813 	putc('\n', stderr);
814 	ep = ebuf;
815 }
816 
bclass(int c)817 void bclass(int c)
818 {
819 	switch (c) {
820 	case '{': bracecnt++; break;
821 	case '}': bracecnt--; break;
822 	case '[': brackcnt++; break;
823 	case ']': brackcnt--; break;
824 	case '(': parencnt++; break;
825 	case ')': parencnt--; break;
826 	}
827 }
828 
errcheck(double x,const char * s)829 double errcheck(double x, const char *s)
830 {
831 
832 	if (errno == EDOM) {
833 		errno = 0;
834 		WARNING("%s argument out of domain", s);
835 		x = 1;
836 	} else if (errno == ERANGE) {
837 		errno = 0;
838 		WARNING("%s result out of range", s);
839 		x = 1;
840 	}
841 	return x;
842 }
843 
isclvar(const char * s)844 int isclvar(const char *s)	/* is s of form var=something ? */
845 {
846 	const char *os = s;
847 
848 	if (!isalpha((int) *s) && *s != '_')
849 		return 0;
850 	for ( ; *s; s++)
851 		if (!(isalnum((int) *s) || *s == '_'))
852 			break;
853 	return *s == '=' && s > os;
854 }
855 
856 /* strtod is supposed to be a proper test of what's a valid number */
857 /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */
858 /* wrong: violates 4.10.1.4 of ansi C standard */
859 
860 /* well, not quite. As of C99, hex floating point is allowed. so this is
861  * a bit of a mess. We work around the mess by checking for a hexadecimal
862  * value and disallowing it. Similarly, we now follow gawk and allow only
863  * +nan, -nan, +inf, and -inf for NaN and infinity values.
864  */
865 
866 /*
867  * This routine now has a more complicated interface, the main point
868  * being to avoid the double conversion of a string to double, and
869  * also to convey out, if requested, the information that the numeric
870  * value was a leading string or is all of the string. The latter bit
871  * is used in getfval().
872  */
873 
is_valid_number(const char * s,bool trailing_stuff_ok,bool * no_trailing,double * result)874 bool is_valid_number(const char *s, bool trailing_stuff_ok,
875 			bool *no_trailing, double *result)
876 {
877 	double r;
878 	char *ep;
879 	bool retval = false;
880 	bool is_nan = false;
881 	bool is_inf = false;
882 
883 	if (no_trailing)
884 		*no_trailing = false;
885 
886 	while (isspace((int) *s))
887 		s++;
888 
889 	/* no hex floating point, sorry */
890 	if (s[0] == '0' && tolower(s[1]) == 'x')
891 		return false;
892 
893 	/* allow +nan, -nan, +inf, -inf, any other letter, no */
894 	if (s[0] == '+' || s[0] == '-') {
895 		is_nan = (strncasecmp(s+1, "nan", 3) == 0);
896 		is_inf = (strncasecmp(s+1, "inf", 3) == 0);
897 		if ((is_nan || is_inf)
898 		    && (isspace((int) s[4]) || s[4] == '\0'))
899 			goto convert;
900 		else if (! isdigit(s[1]) && s[1] != '.')
901 			return false;
902 	}
903 	else if (! isdigit(s[0]) && s[0] != '.')
904 		return false;
905 
906 convert:
907 	errno = 0;
908 	r = strtod(s, &ep);
909 	if (ep == s || errno == ERANGE)
910 		return false;
911 
912 	if (isnan(r) && s[0] == '-' && signbit(r) == 0)
913 		r = -r;
914 
915 	if (result != NULL)
916 		*result = r;
917 
918 	/*
919 	 * check for trailing stuff
920 	 */
921 	while (isspace((int) *ep))
922 		ep++;
923 
924 	if (no_trailing != NULL)
925 		*no_trailing = (*ep == '\0');
926 
927         /* return true if found the end, or trailing stuff is allowed */
928 	retval = *ep == '\0' || trailing_stuff_ok;
929 
930 	return retval;
931 }
932