xref: /original-bsd/contrib/awk.research/lib.c (revision 333da485)
1 /****************************************************************
2 Copyright (C) AT&T 1993
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name of AT&T or any of its entities
11 not be used in advertising or publicity pertaining to
12 distribution of the software without specific, written prior
13 permission.
14 
15 AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define DEBUG
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "y.tab.h"
33 
34 #define	getfval(p)	(((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p))
35 #define	getsval(p)	(((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p))
36 
37 FILE	*infile	= NULL;
38 uchar	*file	= (uchar*) "";
39 int	recsize	= RECSIZE;
40 uchar	*recdata;
41 uchar	*record;
42 uchar	*fields;
43 Cell	*fldtab;
44 
45 #define	MAXFLD	200
46 int	nfields	= MAXFLD;	/* can be set from commandline in main */
47 
48 int	donefld;	/* 1 = implies rec broken into fields */
49 int	donerec;	/* 1 = record is valid (no flds have changed) */
50 
51 int	maxfld	= 0;	/* last used field */
52 int	argno	= 1;	/* current input argument number */
53 extern	Awkfloat *ARGC;
54 
55 void recinit(unsigned int n)
56 {
57 	static Cell dollar0 = {
58 	    OCELL, CFLD, (uchar*) "$0", /*recdata*/0, 0.0, REC|STR|DONTFREE };
59 	static Cell dollar1 = {
60 	    OCELL, CFLD, NULL, (uchar*) "", 0.0, FLD|STR|DONTFREE };
61 	int i;
62 
63 	record = recdata = (uchar *) malloc(n);
64 	fields = (uchar *) malloc(n);
65 	fldtab = (Cell *) malloc(nfields * sizeof(Cell));
66 	if (recdata == NULL || fields == NULL || fldtab == NULL)
67 		ERROR "out of space for $0 and fields" FATAL;
68 	fldtab[0] = dollar0;
69 	fldtab[0].sval = recdata;
70 	for (i = 1; i < nfields; i++)
71 		fldtab[i] = dollar1;
72 }
73 
74 void initgetrec(void)
75 {
76 	int i;
77 	uchar *p;
78 
79 	for (i = 1; i < *ARGC; i++) {
80 		if (!isclvar(p = getargv(i))) {	/* find 1st real filename */
81 			setsval(lookup("FILENAME", symtab), getargv(i));
82 			return;
83 		}
84 		setclvar(p);	/* a commandline assignment before filename */
85 		argno++;
86 	}
87 	infile = stdin;		/* no filenames, so use stdin */
88 }
89 
90 getrec(uchar *buf)	/* get next input record from whatever source */
91 {			/* note: tests whether buf == record */
92 	int c;
93 	static int firsttime = 1;
94 
95 	if (firsttime) {
96 		firsttime = 0;
97 		initgetrec();
98 	}
99 	dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n",
100 		*RS, *FS, *ARGC, *FILENAME) );
101 	donefld = 0;
102 	donerec = 1;
103 	buf[0] = 0;
104 	while (argno < *ARGC || infile == stdin) {
105 		dprintf( ("argno=%d, file=|%s|\n", argno, file) );
106 		if (infile == NULL) {	/* have to open a new file */
107 			file = getargv(argno);
108 			if (*file == '\0') {	/* it's been zapped */
109 				argno++;
110 				continue;
111 			}
112 			if (isclvar(file)) {	/* a var=value arg */
113 				setclvar(file);
114 				argno++;
115 				continue;
116 			}
117 			*FILENAME = file;
118 			dprintf( ("opening file %s\n", file) );
119 			if (*file == '-' && *(file+1) == '\0')
120 				infile = stdin;
121 			else if ((infile = fopen((char *)file, "r")) == NULL)
122 				ERROR "can't open file %s", file FATAL;
123 			setfval(fnrloc, 0.0);
124 		}
125 		c = readrec(buf, recsize, infile);
126 		if (c != 0 || buf[0] != '\0') {	/* normal record */
127 			if (buf == record) {
128 				if (!(recloc->tval & DONTFREE))
129 					xfree(recloc->sval);
130 				recloc->sval = record;
131 				recloc->tval = REC | STR | DONTFREE;
132 				if (is_a_number(recloc->sval)) {
133 					recloc->fval = atof(recloc->sval);
134 					recloc->tval |= NUM;
135 				}
136 			}
137 			setfval(nrloc, nrloc->fval+1);
138 			setfval(fnrloc, fnrloc->fval+1);
139 			return 1;
140 		}
141 		/* EOF arrived on this file; set up next */
142 		if (infile != stdin)
143 			fclose(infile);
144 		infile = NULL;
145 		argno++;
146 	}
147 	return 0;	/* true end of file */
148 }
149 
150 readrec(uchar *buf, int bufsize, FILE *inf)	/* read one record into buf */
151 {
152 	register int sep, c;
153 	register uchar *rr;
154 	register int nrr;
155 
156 	if ((sep = **RS) == 0) {
157 		sep = '\n';
158 		while ((c=getc(inf)) == '\n' && c != EOF)	/* skip leading \n's */
159 			;
160 		if (c != EOF)
161 			ungetc(c, inf);
162 	}
163 	for (rr = buf, nrr = bufsize; ; ) {
164 		for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c)
165 			if (--nrr < 0)
166 				ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
167 		if (**RS == sep || c == EOF)
168 			break;
169 		if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */
170 			break;
171 		*rr++ = '\n';
172 		*rr++ = c;
173 	}
174 	if (rr > buf + bufsize)
175 		ERROR "input record `%.30s...' too long; try -mr n", buf FATAL;
176 	*rr = 0;
177 	dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) );
178 	return c == EOF && rr == buf ? 0 : 1;
179 }
180 
181 uchar *getargv(int n)	/* get ARGV[n] */
182 {
183 	Cell *x;
184 	uchar *s, temp[10];
185 	extern Array *ARGVtab;
186 
187 	sprintf((char *)temp, "%d", n);
188 	x = setsymtab(temp, "", 0.0, STR, ARGVtab);
189 	s = getsval(x);
190 	dprintf( ("getargv(%d) returns |%s|\n", n, s) );
191 	return s;
192 }
193 
194 void setclvar(uchar *s)	/* set var=value from s */
195 {
196 	uchar *p;
197 	Cell *q;
198 
199 	for (p=s; *p != '='; p++)
200 		;
201 	*p++ = 0;
202 	p = qstring(p, '\0');
203 	q = setsymtab(s, p, 0.0, STR, symtab);
204 	setsval(q, p);
205 	if (is_a_number(q->sval)) {
206 		q->fval = atof(q->sval);
207 		q->tval |= NUM;
208 	}
209 	dprintf( ("command line set %s to |%s|\n", s, p) );
210 }
211 
212 
213 void fldbld(void)	/* create fields from current record */
214 {
215 	register uchar *r, *fr, sep;
216 	Cell *p;
217 	int i;
218 
219 	if (donefld)
220 		return;
221 	if (!(recloc->tval & STR))
222 		getsval(recloc);
223 	r = recloc->sval;
224 	fr = fields;
225 	i = 0;	/* number of fields accumulated here */
226 	if (strlen(*FS) > 1) {	/* it's a regular expression */
227 		i = refldbld(r, *FS);
228 	} else if ((sep = **FS) == ' ') {	/* default whitespace */
229 		for (i = 0; ; ) {
230 			while (*r == ' ' || *r == '\t' || *r == '\n')
231 				r++;
232 			if (*r == 0)
233 				break;
234 			i++;
235 			if (i >= nfields)
236 				break;
237 			if (!(fldtab[i].tval & DONTFREE))
238 				xfree(fldtab[i].sval);
239 			fldtab[i].sval = fr;
240 			fldtab[i].tval = FLD | STR | DONTFREE;
241 			do
242 				*fr++ = *r++;
243 			while (*r != ' ' && *r != '\t' && *r != '\n' && *r != '\0');
244 			*fr++ = 0;
245 		}
246 		*fr = 0;
247 	} else if (*r != 0) {	/* if 0, it's a null field */
248 		for (;;) {
249 			i++;
250 			if (i >= nfields)
251 				break;
252 			if (!(fldtab[i].tval & DONTFREE))
253 				xfree(fldtab[i].sval);
254 			fldtab[i].sval = fr;
255 			fldtab[i].tval = FLD | STR | DONTFREE;
256 			while (*r != sep && *r != '\n' && *r != '\0')	/* \n is always a separator */
257 				*fr++ = *r++;
258 			*fr++ = 0;
259 			if (*r++ == 0)
260 				break;
261 		}
262 		*fr = 0;
263 	}
264 	if (i >= nfields)
265 		ERROR "record `%.30s...' has too many fields; try -mf n", record FATAL;
266 	/* clean out junk from previous record */
267 	cleanfld(i, maxfld);
268 	maxfld = i;
269 	donefld = 1;
270 	for (p = fldtab+1; p <= fldtab+maxfld; p++) {
271 		if(is_a_number(p->sval)) {
272 			p->fval = atof(p->sval);
273 			p->tval |= NUM;
274 		}
275 	}
276 	setfval(nfloc, (Awkfloat) maxfld);
277 	if (dbg)
278 		for (p = fldtab; p <= fldtab+maxfld; p++)
279 			printf("field %d: |%s|\n", p-fldtab, p->sval);
280 }
281 
282 void cleanfld(int n1, int n2)	/* clean out fields n1..n2 inclusive */
283 {
284 	static uchar *nullstat = (uchar *) "";
285 	register Cell *p, *q;
286 
287 	for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) {
288 		if (!(p->tval & DONTFREE))
289 			xfree(p->sval);
290 		p->tval = FLD | STR | DONTFREE;
291 		p->sval = nullstat;
292 	}
293 }
294 
295 void newfld(int n)	/* add field n (after end) */
296 {
297 	if (n >= nfields)
298 		ERROR "creating too many fields (%d); try -mf n", n, record FATAL;
299 	cleanfld(maxfld, n);
300 	maxfld = n;
301 	setfval(nfloc, (Awkfloat) n);
302 }
303 
304 refldbld(uchar *rec, uchar *fs)	/* build fields from reg expr in FS */
305 {
306 	uchar *fr;
307 	int i, tempstat;
308 	fa *pfa;
309 
310 	fr = fields;
311 	*fr = '\0';
312 	if (*rec == '\0')
313 		return 0;
314 	pfa = makedfa(fs, 1);
315 	dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) );
316 	tempstat = pfa->initstat;
317 	for (i = 1; i < nfields; i++) {
318 		if (!(fldtab[i].tval & DONTFREE))
319 			xfree(fldtab[i].sval);
320 		fldtab[i].tval = FLD | STR | DONTFREE;
321 		fldtab[i].sval = fr;
322 		dprintf( ("refldbld: i=%d\n", i) );
323 		if (nematch(pfa, rec)) {
324 			pfa->initstat = 2;	/* horrible coupling */
325 			dprintf( ("match %s (%d chars)\n", patbeg, patlen) );
326 			strncpy(fr, rec, patbeg-rec);
327 			fr += patbeg - rec + 1;
328 			*(fr-1) = '\0';
329 			rec = patbeg + patlen;
330 		} else {
331 			dprintf( ("no match %s\n", rec) );
332 			strcpy(fr, rec);
333 			pfa->initstat = tempstat;
334 			break;
335 		}
336 	}
337 	return i;
338 }
339 
340 void recbld(void)	/* create $0 from $1..$NF if necessary */
341 {
342 	register int i;
343 	register uchar *r, *p;
344 	static uchar *rec = 0;
345 
346 	if (donerec == 1)
347 		return;
348 	if (rec == 0) {
349 		rec = (uchar *) malloc(recsize);
350 		if (rec == 0)
351 			ERROR "out of space building $0, record size %d", recsize FATAL;
352 	}
353 	r = rec;
354 	for (i = 1; i <= *NF; i++) {
355 		p = getsval(&fldtab[i]);
356 		while (r < rec+recsize-1 && (*r = *p++))
357 			r++;
358 		if (i < *NF)
359 			for (p = *OFS; r < rec+recsize-1 && (*r = *p++); )
360 				r++;
361 	}
362 	if (r > rec + recsize - 1)
363 		ERROR "built giant record `%.30s...'; try -mr n", record FATAL;
364 	*r = '\0';
365 	dprintf( ("in recbld FS=%o, recloc=%o\n", **FS, recloc) );
366 	recloc->tval = REC | STR | DONTFREE;
367 	recloc->sval = record = rec;
368 	dprintf( ("in recbld FS=%o, recloc=%o\n", **FS, recloc) );
369 	dprintf( ("recbld = |%s|\n", record) );
370 	donerec = 1;
371 }
372 
373 Cell *fieldadr(int n)
374 {
375 	if (n < 0 || n >= nfields)
376 		ERROR "trying to access field %d; try -mf n", n FATAL;
377 	return(&fldtab[n]);
378 }
379 
380 int	errorflag	= 0;
381 char	errbuf[200];
382 
383 void yyerror(uchar *s)
384 {
385 	extern uchar *cmdname, *curfname;
386 	static int been_here = 0;
387 
388 	if (been_here++ > 2)
389 		return;
390 	fprintf(stderr, "%s: %s", cmdname, s);
391 	fprintf(stderr, " at source line %d", lineno);
392 	if (curfname != NULL)
393 		fprintf(stderr, " in function %s", curfname);
394 	fprintf(stderr, "\n");
395 	errorflag = 2;
396 	eprint();
397 }
398 
399 void fpecatch(int n)
400 {
401 	ERROR "floating point exception %d", n FATAL;
402 }
403 
404 extern int bracecnt, brackcnt, parencnt;
405 
406 void bracecheck(void)
407 {
408 	int c;
409 	static int beenhere = 0;
410 
411 	if (beenhere++)
412 		return;
413 	while ((c = lex_input()) != EOF && c != '\0')
414 		bclass(c);
415 	bcheck2(bracecnt, '{', '}');
416 	bcheck2(brackcnt, '[', ']');
417 	bcheck2(parencnt, '(', ')');
418 }
419 
420 void bcheck2(int n, int c1, int c2)
421 {
422 	if (n == 1)
423 		fprintf(stderr, "\tmissing %c\n", c2);
424 	else if (n > 1)
425 		fprintf(stderr, "\t%d missing %c's\n", n, c2);
426 	else if (n == -1)
427 		fprintf(stderr, "\textra %c\n", c2);
428 	else if (n < -1)
429 		fprintf(stderr, "\t%d extra %c's\n", -n, c2);
430 }
431 
432 void error(int f, char *s)
433 {
434 	extern Node *curnode;
435 	extern uchar *cmdname;
436 
437 	fflush(stdout);
438 	fprintf(stderr, "%s: ", cmdname);
439 	fprintf(stderr, "%s", s);
440 	fprintf(stderr, "\n");
441 	if (compile_time != 2 && NR && *NR > 0) {
442 		fprintf(stderr, " input record number %g", *FNR);
443 		if (strcmp(*FILENAME, "-") != 0)
444 			fprintf(stderr, ", file %s", *FILENAME);
445 		fprintf(stderr, "\n");
446 	}
447 	if (compile_time != 2 && curnode)
448 		fprintf(stderr, " source line number %d\n", curnode->lineno);
449 	else if (compile_time != 2 && lineno)
450 		fprintf(stderr, " source line number %d\n", lineno);
451 	eprint();
452 	if (f) {
453 		if (dbg > 1)		/* core dump if serious debugging on */
454 			abort();
455 		exit(2);
456 	}
457 }
458 
459 void eprint(void)	/* try to print context around error */
460 {
461 #if 0
462 	uchar *p, *q;
463 	int c;
464 	static int been_here = 0;
465 	extern uchar ebuf[], *ep;
466 
467 	if (compile_time == 2 || compile_time == 0 || been_here++ > 0)
468 		return;
469 	p = ep - 1;
470 	if (p > ebuf && *p == '\n')
471 		p--;
472 	for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--)
473 		;
474 	while (*p == '\n')
475 		p++;
476 	fprintf(stderr, " context is\n\t");
477 	for (q=ep-1; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--)
478 		;
479 	for ( ; p < q; p++)
480 		if (*p)
481 			putc(*p, stderr);
482 	fprintf(stderr, " >>> ");
483 	for ( ; p < ep; p++)
484 		if (*p)
485 			putc(*p, stderr);
486 	fprintf(stderr, " <<< ");
487 	if (*ep)
488 		while ((c = input()) != '\n' && c != '\0' && c != EOF) {
489 			putc(c, stderr);
490 			bclass(c);
491 		}
492 	putc('\n', stderr);
493 	ep = ebuf;
494 #endif
495 }
496 
497 void bclass(int c)
498 {
499 	switch (c) {
500 	case '{': bracecnt++; break;
501 	case '}': bracecnt--; break;
502 	case '[': brackcnt++; break;
503 	case ']': brackcnt--; break;
504 	case '(': parencnt++; break;
505 	case ')': parencnt--; break;
506 	}
507 }
508 
509 double errcheck(double x, uchar *s)
510 {
511 	extern int errno;
512 
513 	if (errno == EDOM) {
514 		errno = 0;
515 		ERROR "%s argument out of domain", s WARNING;
516 		x = 1;
517 	} else if (errno == ERANGE) {
518 		errno = 0;
519 		ERROR "%s result out of range", s WARNING;
520 		x = 1;
521 	}
522 	return x;
523 }
524 
525 isclvar(uchar *s)	/* is s of form var=something ? */
526 {
527 	uchar *os = s;
528 
529 	if (!isalpha(*s) && *s != '_')
530 		return 0;
531 	for ( ; *s; s++)
532 		if (!(isalnum(*s) || *s == '_'))
533 			break;
534 	return *s == '=' && s > os && *(s+1) != '=';
535 }
536 
537 #define	MAXEXPON	38	/* maximum exponent for fp number. should be IEEE */
538 
539 is_a_number(uchar *s)	/* probably should be done by a library function */
540 {
541 	register int d1, d2;
542 	int point;
543 	uchar *es;
544 
545 	d1 = d2 = point = 0;
546 	while (*s == ' ' || *s == '\t' || *s == '\n')
547 		s++;
548 	if (*s == '\0')
549 		return(0);	/* empty stuff isn't number */
550 	if (*s == '+' || *s == '-')
551 		s++;
552 	if (!isdigit(*s) && *s != '.')
553 		return(0);
554 	if (isdigit(*s)) {
555 		do {
556 			d1++;
557 			s++;
558 		} while (isdigit(*s));
559 	}
560 	if (*s == '.') {
561 		point++;
562 		s++;
563 	}
564 	if (isdigit(*s)) {
565 		d2++;
566 		do {
567 			s++;
568 		} while (isdigit(*s));
569 	}
570 	if (!(d1 || point && d2))
571 		return(0);
572 	if (*s == 'e' || *s == 'E') {
573 		s++;
574 		if (*s == '+' || *s == '-')
575 			s++;
576 		if (!isdigit(*s))
577 			return(0);
578 		es = s;
579 		do {
580 			s++;
581 		} while (isdigit(*s));
582 		if (s - es > 2)
583 			return(0);
584 		else if (s - es == 2 && (int)(10 * (*es-'0') + *(es+1)-'0') >= MAXEXPON)
585 			return(0);
586 	}
587 	while (*s == ' ' || *s == '\t' || *s == '\n')
588 		s++;
589 	if (*s == '\0')
590 		return(1);
591 	else
592 		return(0);
593 }
594