xref: /original-bsd/contrib/awk.research/tran.c (revision 6066c21e)
1 /****************************************************************
2 Copyright (C) AT&T 1993
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name of AT&T or any of its entities
11 not be used in advertising or publicity pertaining to
12 distribution of the software without specific, written prior
13 permission.
14 
15 AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define	DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "y.tab.h"
33 
34 #define	FULLTAB	2	/* rehash when table gets this x full */
35 #define	GROWTAB 4	/* grow table by this factor */
36 
37 Array	*symtab;	/* main symbol table */
38 
39 uchar	**FS;		/* initial field sep */
40 uchar	**RS;		/* initial record sep */
41 uchar	**OFS;		/* output field sep */
42 uchar	**ORS;		/* output record sep */
43 uchar	**OFMT;		/* output format for numbers */
44 uchar	**CONVFMT;	/* format for conversions in getsval */
45 Awkfloat *NF;		/* number of fields in current record */
46 Awkfloat *NR;		/* number of current record */
47 Awkfloat *FNR;		/* number of current record in current file */
48 uchar	**FILENAME;	/* current filename argument */
49 Awkfloat *ARGC;		/* number of arguments from command line */
50 uchar	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH;	/* length of same */
53 
54 Cell	*recloc;	/* location of record */
55 Cell	*nrloc;		/* NR */
56 Cell	*nfloc;		/* NF */
57 Cell	*fnrloc;	/* FNR */
58 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60 Cell	*rstartloc;	/* RSTART */
61 Cell	*rlengthloc;	/* RLENGTH */
62 Cell	*symtabloc;	/* SYMTAB */
63 
64 Cell	*nullloc;	/* a guaranteed empty cell */
65 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66 
67 extern Cell *fldtab;
68 
69 void syminit(void)	/* initialize symbol table with builtin vars */
70 {
71 	setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
72 	/* this is used for if(x)... tests: */
73 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
74 	nullnode = valtonode(nullloc, CCON);
75 
76 	/* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */
77 	/* has been done elsewhere */
78 	recloc = &fldtab[0];
79 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
80 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
81 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
82 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
83 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
85 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
86 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
87 	NF = &nfloc->fval;
88 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
89 	NR = &nrloc->fval;
90 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
91 	FNR = &fnrloc->fval;
92 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
93 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
94 	RSTART = &rstartloc->fval;
95 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
96 	RLENGTH = &rlengthloc->fval;
97 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
98 	symtabloc->sval = (uchar *) symtab;
99 }
100 
101 void arginit(int ac, uchar *av[])	/* set up ARGV and ARGC */
102 {
103 	Cell *cp;
104 	int i;
105 	uchar temp[5];
106 
107 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
108 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
109 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
110 	cp->sval = (uchar *) ARGVtab;
111 	for (i = 0; i < ac; i++) {
112 		sprintf((char *)temp, "%d", i);
113 		if (is_a_number(*av))
114 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
115 		else
116 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
117 		av++;
118 	}
119 }
120 
121 void envinit(uchar **envp)	/* set up ENVIRON variable */
122 {
123 	Cell *cp;
124 	uchar *p;
125 
126 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
127 	ENVtab = makesymtab(NSYMTAB);
128 	cp->sval = (uchar *) ENVtab;
129 	for ( ; *envp; envp++) {
130 		if ((p = (uchar *) strchr((char *) *envp, '=')) == NULL)
131 			continue;
132 		*p++ = 0;	/* split into two strings at = */
133 		if (is_a_number(p))
134 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
135 		else
136 			setsymtab(*envp, p, 0.0, STR, ENVtab);
137 		p[-1] = '=';	/* restore in case env is passed down to a shell */
138 	}
139 }
140 
141 Array *makesymtab(int n)	/* make a new symbol table */
142 {
143 	Array *ap;
144 	Cell **tp;
145 
146 	ap = (Array *) malloc(sizeof(Array));
147 	tp = (Cell **) calloc(n, sizeof(Cell *));
148 	if (ap == NULL || tp == NULL)
149 		ERROR "out of space in makesymtab" FATAL;
150 	ap->nelem = 0;
151 	ap->size = n;
152 	ap->tab = tp;
153 	return(ap);
154 }
155 
156 void freesymtab(Cell *ap)	/* free a symbol table */
157 {
158 	Cell *cp, *temp;
159 	Array *tp;
160 	int i;
161 
162 	if (!isarr(ap))
163 		return;
164 	tp = (Array *) ap->sval;
165 	if (tp == NULL)
166 		return;
167 	for (i = 0; i < tp->size; i++) {
168 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
169 			xfree(cp->nval);
170 			if (freeable(cp))
171 				xfree(cp->sval);
172 			temp = cp->cnext;	/* avoids freeing then using */
173 			free((char *) cp);
174 		}
175 	}
176 	free((char *) (tp->tab));
177 	free((char *) tp);
178 }
179 
180 void freeelem(Cell *ap, uchar *s)	/* free elem s from ap (i.e., ap["s"] */
181 {
182 	Array *tp;
183 	Cell *p, *prev = NULL;
184 	int h;
185 
186 	tp = (Array *) ap->sval;
187 	h = hash(s, tp->size);
188 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
189 		if (strcmp((char *) s, (char *) p->nval) == 0) {
190 			if (prev == NULL)	/* 1st one */
191 				tp->tab[h] = p->cnext;
192 			else			/* middle somewhere */
193 				prev->cnext = p->cnext;
194 			if (freeable(p))
195 				xfree(p->sval);
196 			free(p->nval);
197 			free((char *) p);
198 			tp->nelem--;
199 			return;
200 		}
201 }
202 
203 Cell *setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned t, Array *tp)
204 {
205 	register int h;
206 	register Cell *p;
207 
208 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
209 		dprintf( ("setsymtab found %o: n=%s s=\"%s\" f=%g t=%o\n",
210 			p, p->nval, p->sval, p->fval, p->tval) );
211 		return(p);
212 	}
213 	p = (Cell *) malloc(sizeof(Cell));
214 	if (p == NULL)
215 		ERROR "out of space for symbol table at %s", n FATAL;
216 	p->nval = tostring(n);
217 	p->sval = s ? tostring(s) : tostring("");
218 	p->fval = f;
219 	p->tval = t;
220 	tp->nelem++;
221 	if (tp->nelem > FULLTAB * tp->size)
222 		rehash(tp);
223 	h = hash(n, tp->size);
224 	p->cnext = tp->tab[h];
225 	tp->tab[h] = p;
226 	dprintf( ("setsymtab set %o: n=%s s=\"%s\" f=%g t=%o\n",
227 		p, p->nval, p->sval, p->fval, p->tval) );
228 	return(p);
229 }
230 
231 hash(uchar *s, int n)	/* form hash value for string s */
232 {
233 	register unsigned hashval;
234 
235 	for (hashval = 0; *s != '\0'; s++)
236 		hashval = (*s + 31 * hashval);
237 	return hashval % n;
238 }
239 
240 void rehash(Array *tp)	/* rehash items in small table into big one */
241 {
242 	int i, nh, nsz;
243 	Cell *cp, *op, **np;
244 
245 	nsz = GROWTAB * tp->size;
246 	np = (Cell **) calloc(nsz, sizeof(Cell *));
247 	if (np == NULL)		/* can't do it, but can keep running. */
248 		return;		/* someone else will run out later. */
249 	for (i = 0; i < tp->size; i++) {
250 		for (cp = tp->tab[i]; cp; cp = op) {
251 			op = cp->cnext;
252 			nh = hash(cp->nval, nsz);
253 			cp->cnext = np[nh];
254 			np[nh] = cp;
255 		}
256 	}
257 	free((char *) (tp->tab));
258 	tp->tab = np;
259 	tp->size = nsz;
260 }
261 
262 Cell *lookup(uchar *s, Array *tp)	/* look for s in tp */
263 {
264 	register Cell *p, *prev = NULL;
265 	int h;
266 
267 	h = hash(s, tp->size);
268 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
269 		if (strcmp((char *) s, (char *) p->nval) == 0)
270 			return(p);	/* found it */
271 	return(NULL);			/* not found */
272 }
273 
274 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
275 {
276 	if ((vp->tval & (NUM | STR)) == 0)
277 		funnyvar(vp, "assign to");
278 	if (vp->tval & FLD) {
279 		donerec = 0;	/* mark $0 invalid */
280 		if (vp-fldtab > *NF)
281 			newfld(vp-fldtab);
282 		dprintf( ("setting field %d to %g\n", vp-fldtab, f) );
283 	} else if (vp->tval & REC) {
284 		donefld = 0;	/* mark $1... invalid */
285 		donerec = 1;
286 	}
287 	vp->tval &= ~STR;	/* mark string invalid */
288 	vp->tval |= NUM;	/* mark number ok */
289 	dprintf( ("setfval %o: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
290 	return vp->fval = f;
291 }
292 
293 void funnyvar(Cell *vp, char *rw)
294 {
295 	if (vp->tval & ARR)
296 		ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
297 	if (vp->tval & FCN)
298 		ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
299 	ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
300 		vp, vp->nval, vp->sval, vp->fval, vp->tval WARNING;
301 }
302 
303 uchar *setsval(Cell *vp, uchar *s)	/* set string val of a Cell */
304 {
305 	if ((vp->tval & (NUM | STR)) == 0)
306 		funnyvar(vp, "assign to");
307 	if (vp->tval & FLD) {
308 		donerec = 0;	/* mark $0 invalid */
309 		if (vp-fldtab > *NF)
310 			newfld(vp-fldtab);
311 		dprintf( ("setting field %d to %s\n", vp-fldtab, s) );
312 	} else if (vp->tval & REC) {
313 		donefld = 0;	/* mark $1... invalid */
314 		donerec = 1;
315 	}
316 	vp->tval &= ~NUM;
317 	vp->tval |= STR;
318 	if (freeable(vp))
319 		xfree(vp->sval);
320 	vp->tval &= ~DONTFREE;
321 	dprintf( ("setsval %o: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
322 	return(vp->sval = tostring(s));
323 }
324 
325 Awkfloat r_getfval(Cell *vp)	/* get float val of a Cell */
326 {
327 	if ((vp->tval & (NUM | STR)) == 0)
328 		funnyvar(vp, "read value of");
329 	if ((vp->tval & FLD) && donefld == 0)
330 		fldbld();
331 	else if ((vp->tval & REC) && donerec == 0)
332 		recbld();
333 	if (!isnum(vp)) {	/* not a number */
334 		vp->fval = atof(vp->sval);	/* best guess */
335 		if (is_a_number(vp->sval) && !(vp->tval&CON))
336 			vp->tval |= NUM;	/* make NUM only sparingly */
337 	}
338 	dprintf( ("getfval %o: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
339 	return(vp->fval);
340 }
341 
342 uchar *r_getsval(Cell *vp)	/* get string val of a Cell */
343 {
344 	uchar s[100];
345 	double dtemp;
346 
347 	if ((vp->tval & (NUM | STR)) == 0)
348 		funnyvar(vp, "read value of");
349 	if ((vp->tval & FLD) && donefld == 0)
350 		fldbld();
351 	else if ((vp->tval & REC) && donerec == 0)
352 		recbld();
353 	if ((vp->tval & STR) == 0) {
354 		if (!(vp->tval&DONTFREE))
355 			xfree(vp->sval);
356 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
357 			sprintf((char *)s, "%.20g", vp->fval);
358 		else
359 			sprintf((char *)s, (char *)*CONVFMT, vp->fval);
360 		vp->sval = tostring(s);
361 		vp->tval &= ~DONTFREE;
362 		vp->tval |= STR;
363 	}
364 	dprintf( ("getsval %o: %s = \"%s\", t=%o\n", vp, vp->nval, vp->sval, vp->tval) );
365 	return(vp->sval);
366 }
367 
368 uchar *tostring(uchar *s)	/* make a copy of string s */
369 {
370 	register uchar *p;
371 
372 	p = (uchar *) malloc(strlen((char *) s)+1);
373 	if (p == NULL)
374 		ERROR "out of space in tostring on %s", s FATAL;
375 	strcpy((char *) p, (char *) s);
376 	return(p);
377 }
378 
379 uchar *qstring(uchar *s, int delim)	/* collect string up to next delim */
380 {
381 	uchar *q;
382 	int c, n;
383 
384 	for (q = cbuf; (c = *s) != delim; s++) {
385 		if (q >= cbuf + CBUFLEN - 1)
386 			ERROR "string %.10s... too long", cbuf SYNTAX;
387 		else if (c == '\n')
388 			ERROR "newline in string %.10s...", cbuf SYNTAX;
389 		else if (c != '\\')
390 			*q++ = c;
391 		else	/* \something */
392 			switch (c = *++s) {
393 			case '\\':	*q++ = '\\'; break;
394 			case 'n':	*q++ = '\n'; break;
395 			case 't':	*q++ = '\t'; break;
396 			case 'b':	*q++ = '\b'; break;
397 			case 'f':	*q++ = '\f'; break;
398 			case 'r':	*q++ = '\r'; break;
399 			default:
400 				if (!isdigit(c)) {
401 					*q++ = c;
402 					break;
403 				}
404 				n = c - '0';
405 				if (isdigit(s[1])) {
406 					n = 8 * n + *++s - '0';
407 					if (isdigit(s[1]))
408 						n = 8 * n + *++s - '0';
409 				}
410 				*q++ = n;
411 				break;
412 			}
413 	}
414 	*q = '\0';
415 	return cbuf;
416 }
417