xref: /minix/external/historical/nawk/dist/tran.c (revision 84d9c625)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
28 
29 #define	DEBUG
30 #include <stdio.h>
31 #include <math.h>
32 #include <ctype.h>
33 #include <string.h>
34 #include <stdlib.h>
35 #include "awk.h"
36 #include "awkgram.h"
37 
38 #define	FULLTAB	2	/* rehash when table gets this x full */
39 #define	GROWTAB 4	/* grow table by this factor */
40 
41 Array	*symtab;	/* main symbol table */
42 
43 char	**FS;		/* initial field sep */
44 char	**RS;		/* initial record sep */
45 char	**OFS;		/* output field sep */
46 char	**ORS;		/* output record sep */
47 char	**OFMT;		/* output format for numbers */
48 char	**CONVFMT;	/* format for conversions in getsval */
49 Awkfloat *NF;		/* number of fields in current record */
50 Awkfloat *NR;		/* number of current record */
51 Awkfloat *FNR;		/* number of current record in current file */
52 char	**FILENAME;	/* current filename argument */
53 Awkfloat *ARGC;		/* number of arguments from command line */
54 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
55 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
56 Awkfloat *RLENGTH;	/* length of same */
57 
58 Cell	*fsloc;		/* FS */
59 Cell	*nrloc;		/* NR */
60 Cell	*nfloc;		/* NF */
61 Cell	*fnrloc;	/* FNR */
62 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
63 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
64 Cell	*rstartloc;	/* RSTART */
65 Cell	*rlengthloc;	/* RLENGTH */
66 Cell	*symtabloc;	/* SYMTAB */
67 
68 Cell	*nullloc;	/* a guaranteed empty cell */
69 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
70 Cell	*literal0;
71 
72 extern Cell **fldtab;
73 
74 static void
setfree(Cell * vp)75 setfree(Cell *vp)
76 {
77 	if (&vp->sval == FS || &vp->sval == RS ||
78 	    &vp->sval == OFS || &vp->sval == ORS ||
79 	    &vp->sval == OFMT || &vp->sval == CONVFMT ||
80 	    &vp->sval == FILENAME || &vp->sval == SUBSEP)
81 		vp->tval |= DONTFREE;
82 	else
83 		vp->tval &= ~DONTFREE;
84 }
85 
syminit(void)86 void syminit(void)	/* initialize symbol table with builtin vars */
87 {
88 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
89 	/* this is used for if(x)... tests: */
90 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
91 	nullnode = celltonode(nullloc, CCON);
92 
93 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
94 	FS = &fsloc->sval;
95 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
96 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
97 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
98 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
99 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
100 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
101 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
102 	NF = &nfloc->fval;
103 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
104 	NR = &nrloc->fval;
105 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
106 	FNR = &fnrloc->fval;
107 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
108 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
109 	RSTART = &rstartloc->fval;
110 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
111 	RLENGTH = &rlengthloc->fval;
112 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
113 	symtabloc->sval = (char *) symtab;
114 }
115 
arginit(int ac,char ** av)116 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
117 {
118 	Cell *cp;
119 	int i;
120 	char temp[50];
121 
122 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
123 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
124 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
125 	cp->sval = (char *) ARGVtab;
126 	for (i = 0; i < ac; i++) {
127 		snprintf(temp, sizeof(temp), "%d", i);
128 		if (is_number(*av))
129 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
130 		else
131 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
132 		av++;
133 	}
134 }
135 
envinit(char ** envp)136 void envinit(char **envp)	/* set up ENVIRON variable */
137 {
138 	Cell *cp;
139 	char *p;
140 
141 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
142 	ENVtab = makesymtab(NSYMTAB);
143 	cp->sval = (char *) ENVtab;
144 	for ( ; *envp; envp++) {
145 		if ((p = strchr(*envp, '=')) == NULL)
146 			continue;
147 		if( p == *envp ) /* no left hand side name in env string */
148 			continue;
149 		*p++ = 0;	/* split into two strings at = */
150 		if (is_number(p))
151 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
152 		else
153 			setsymtab(*envp, p, 0.0, STR, ENVtab);
154 		p[-1] = '=';	/* restore in case env is passed down to a shell */
155 	}
156 }
157 
makesymtab(int n)158 Array *makesymtab(int n)	/* make a new symbol table */
159 {
160 	Array *ap;
161 	Cell **tp;
162 
163 	ap = malloc(sizeof(*ap));
164 	tp = calloc(n, sizeof(*tp));
165 	if (ap == NULL || tp == NULL)
166 		FATAL("out of space in makesymtab");
167 	ap->nelem = 0;
168 	ap->size = n;
169 	ap->tab = tp;
170 	return(ap);
171 }
172 
freesymtab(Cell * ap)173 void freesymtab(Cell *ap)	/* free a symbol table */
174 {
175 	Cell *cp, *temp;
176 	Array *tp;
177 	int i;
178 
179 	if (!isarr(ap))
180 		return;
181 	tp = (Array *) ap->sval;
182 	if (tp == NULL)
183 		return;
184 	for (i = 0; i < tp->size; i++) {
185 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
186 			xfree(cp->nval);
187 			if (freeable(cp))
188 				xfree(cp->sval);
189 			temp = cp->cnext;	/* avoids freeing then using */
190 			free(cp);
191 			tp->nelem--;
192 		}
193 		tp->tab[i] = 0;
194 	}
195 	if (tp->nelem != 0)
196 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
197 	free(tp->tab);
198 	free(tp);
199 }
200 
freeelem(Cell * ap,const char * s)201 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
202 {
203 	Array *tp;
204 	Cell *p, *prev = NULL;
205 	int h;
206 
207 	tp = (Array *) ap->sval;
208 	h = hash(s, tp->size);
209 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
210 		if (strcmp(s, p->nval) == 0) {
211 			if (prev == NULL)	/* 1st one */
212 				tp->tab[h] = p->cnext;
213 			else			/* middle somewhere */
214 				prev->cnext = p->cnext;
215 			if (freeable(p))
216 				xfree(p->sval);
217 			free(p->nval);
218 			free(p);
219 			tp->nelem--;
220 			return;
221 		}
222 }
223 
setsymtab(const char * n,const char * s,Awkfloat f,unsigned t,Array * tp)224 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
225 {
226 	int h;
227 	Cell *p;
228 
229 	if (n == NULL)
230 		n = "";
231 
232 	if ((p = lookup(n, tp)) != NULL) {
233 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
234 			p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
235 		return(p);
236 	}
237 	p = malloc(sizeof(*p));
238 	if (p == NULL)
239 		FATAL("out of space for symbol table at %s", n);
240 	p->nval = tostring(n);
241 	p->sval = s ? tostring(s) : tostring("");
242 	p->fval = f;
243 	p->tval = t;
244 	p->csub = CUNK;
245 	p->ctype = OCELL;
246 	tp->nelem++;
247 	if (tp->nelem > FULLTAB * tp->size)
248 		rehash(tp);
249 	h = hash(n, tp->size);
250 	p->cnext = tp->tab[h];
251 	tp->tab[h] = p;
252 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
253 		p, p->nval, p->sval, p->fval, p->tval) );
254 	return(p);
255 }
256 
hash(const char * s,int n)257 int hash(const char *s, int n)	/* form hash value for string s */
258 {
259 	unsigned hashval;
260 
261 	for (hashval = 0; *s != '\0'; s++)
262 		hashval = (*s + 31 * hashval);
263 	return hashval % n;
264 }
265 
rehash(Array * tp)266 void rehash(Array *tp)	/* rehash items in small table into big one */
267 {
268 	int i, nh, nsz;
269 	Cell *cp, *op, **np;
270 
271 	nsz = GROWTAB * tp->size;
272 	np = calloc(nsz, sizeof(*np));
273 	if (np == NULL)		/* can't do it, but can keep running. */
274 		return;		/* someone else will run out later. */
275 	for (i = 0; i < tp->size; i++) {
276 		for (cp = tp->tab[i]; cp; cp = op) {
277 			op = cp->cnext;
278 			nh = hash(cp->nval, nsz);
279 			cp->cnext = np[nh];
280 			np[nh] = cp;
281 		}
282 	}
283 	free(tp->tab);
284 	tp->tab = np;
285 	tp->size = nsz;
286 }
287 
lookup(const char * s,Array * tp)288 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
289 {
290 	Cell *p;
291 	int h;
292 
293 	h = hash(s, tp->size);
294 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
295 		if (strcmp(s, p->nval) == 0)
296 			return(p);	/* found it */
297 	return(NULL);			/* not found */
298 }
299 
setfval(Cell * vp,Awkfloat f)300 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
301 {
302 	int fldno;
303 
304 	f += 0.0;		/* normalise negative zero to positive zero */
305 	if ((vp->tval & (NUM | STR)) == 0)
306 		funnyvar(vp, "assign to");
307 	if (isfld(vp)) {
308 		donerec = 0;	/* mark $0 invalid */
309 		fldno = atoi(vp->nval);
310 		if (fldno > *NF)
311 			newfld(fldno);
312 		   dprintf( ("setting field %d to %g\n", fldno, f) );
313 	} else if (&vp->fval == NF) {
314 		donerec = 0;	/* mark $0 invalid */
315 		setlastfld(f);
316 		dprintf( ("setting NF to %g\n", f) );
317 	} else if (isrec(vp)) {
318 		donefld = 0;	/* mark $1... invalid */
319 		donerec = 1;
320 	}
321 	if (freeable(vp))
322 		xfree(vp->sval); /* free any previous string */
323 	vp->tval &= ~STR;	/* mark string invalid */
324 	vp->tval |= NUM;	/* mark number ok */
325 	if (f == -0)  /* who would have thought this possible? */
326 		f = 0;
327 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
328 	return vp->fval = f;
329 }
330 
funnyvar(Cell * vp,const char * rw)331 void funnyvar(Cell *vp, const char *rw)
332 {
333 	if (isarr(vp))
334 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
335 	if (vp->tval & FCN)
336 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
337 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
338 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
339 }
340 
setsval(Cell * vp,const char * s)341 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
342 {
343 	char *t;
344 	int fldno;
345 	Awkfloat f;
346 
347 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
348 		vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
349 	if ((vp->tval & (NUM | STR)) == 0)
350 		funnyvar(vp, "assign to");
351 	if (isfld(vp)) {
352 		donerec = 0;	/* mark $0 invalid */
353 		fldno = atoi(vp->nval);
354 		if (fldno > *NF)
355 			newfld(fldno);
356 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
357 	} else if (isrec(vp)) {
358 		donefld = 0;	/* mark $1... invalid */
359 		donerec = 1;
360 	}
361 	t = s ? tostring(s) : tostring("");	/* in case it's self-assign */
362 	if (freeable(vp))
363 		xfree(vp->sval);
364 	vp->tval &= ~NUM;
365 	vp->tval |= STR;
366 	setfree(vp);
367 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
368 		vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
369 
370 	vp->sval = t;
371 	if (&vp->fval == NF) {
372 		donerec = 0;	/* mark $0 invalid */
373 		f = getfval(vp);
374 		setlastfld(f);
375 		dprintf( ("setting NF to %g\n", f) );
376 	}
377 
378 	return(vp->sval);
379 }
380 
getfval(Cell * vp)381 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
382 {
383 	if ((vp->tval & (NUM | STR)) == 0)
384 		funnyvar(vp, "read value of");
385 	if (isfld(vp) && donefld == 0)
386 		fldbld();
387 	else if (isrec(vp) && donerec == 0)
388 		recbld();
389 	if (!isnum(vp)) {	/* not a number */
390 		vp->fval = atof(vp->sval);	/* best guess */
391 		if (is_number(vp->sval) && !(vp->tval&CON))
392 			vp->tval |= NUM;	/* make NUM only sparingly */
393 	}
394 	   dprintf( ("getfval %p: %s = %g, t=%o\n",
395 		vp, NN(vp->nval), vp->fval, vp->tval) );
396 	return(vp->fval);
397 }
398 
get_str_val(Cell * vp,char ** fmt)399 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
400 {
401 	char s[100];
402 	double dtemp;
403 
404 	if ((vp->tval & (NUM | STR)) == 0)
405 		funnyvar(vp, "read value of");
406 	if (isfld(vp) && donefld == 0)
407 		fldbld();
408 	else if (isrec(vp) && donerec == 0)
409 		recbld();
410 	if (isstr(vp) == 0) {
411 		if (freeable(vp))
412 			xfree(vp->sval);
413 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
414 			snprintf(s, sizeof(s), "%.30g", vp->fval);
415 		else
416 			snprintf(s, sizeof(s), *fmt, vp->fval);
417 		vp->sval = tostring(s);
418 		vp->tval |= STR;
419 		setfree(vp);
420 	}
421 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
422 		vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
423 	return(vp->sval);
424 }
425 
getsval(Cell * vp)426 char *getsval(Cell *vp)       /* get string val of a Cell */
427 {
428       return get_str_val(vp, CONVFMT);
429 }
430 
getpssval(Cell * vp)431 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
432 {
433       return get_str_val(vp, OFMT);
434 }
435 
436 
tostring(const char * s)437 char *tostring(const char *s)	/* make a copy of string s */
438 {
439 	char *p;
440 
441 	p = strdup(s);
442 	if (p == NULL)
443 		FATAL("out of space in tostring on %s", s);
444 	return(p);
445 }
446 
catstr(Cell * a,Cell * b)447 Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */
448 {
449 	Cell *c;
450 	char *p;
451 	char *sa = getsval(a);
452 	char *sb = getsval(b);
453 	size_t l = strlen(sa) + strlen(sb) + 1;
454 	p = malloc(l);
455 	if (p == NULL)
456 		FATAL("out of space concatenating %s and %s", sa, sb);
457 	snprintf(p, l, "%s%s", sa, sb);
458 	c = setsymtab(p, p, 0.0, CON|STR|DONTFREE, symtab);
459 	free(p);
460 	return c;
461 }
462 
tostringN(const char * s,size_t n)463 char *tostringN(const char *s, size_t n)	/* make a copy of string s */
464 {
465 	char *p;
466 
467 	p = malloc(n);
468 	if (p == NULL)
469 		FATAL("out of space in tostring on %s", s);
470 	strcpy(p, s);
471 	return(p);
472 }
473 
qstring(const char * is,int delim)474 char *qstring(const char *is, int delim)	/* collect string up to next delim */
475 {
476 	const char *os = is;
477 	int c, n;
478 	const uschar *s = (const uschar *) is;
479 	uschar *buf, *bp;
480 
481 	if ((buf = malloc(strlen(is)+3)) == NULL)
482 		FATAL( "out of space in qstring(%s)", s);
483 	for (bp = buf; (c = *s) != delim; s++) {
484 		if (c == '\n')
485 			SYNTAX( "newline in string %.20s...", os );
486 		else if (c != '\\')
487 			*bp++ = c;
488 		else {	/* \something */
489 			c = *++s;
490 			if (c == 0) {	/* \ at end */
491 				*bp++ = '\\';
492 				break;	/* for loop */
493 			}
494 			switch (c) {
495 			case '\\':	*bp++ = '\\'; break;
496 			case 'n':	*bp++ = '\n'; break;
497 			case 't':	*bp++ = '\t'; break;
498 			case 'b':	*bp++ = '\b'; break;
499 			case 'f':	*bp++ = '\f'; break;
500 			case 'r':	*bp++ = '\r'; break;
501 			default:
502 				if (!isdigit(c)) {
503 					*bp++ = c;
504 					break;
505 				}
506 				n = c - '0';
507 				if (isdigit(s[1])) {
508 					n = 8 * n + *++s - '0';
509 					if (isdigit(s[1]))
510 						n = 8 * n + *++s - '0';
511 				}
512 				*bp++ = n;
513 				break;
514 			}
515 		}
516 	}
517 	*bp++ = 0;
518 	return (char *) buf;
519 }
520