xref: /illumos-gate/usr/src/cmd/awk/tran.c (revision 55381082)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #define	DEBUG
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <ctype.h>
37 #include <string.h>
38 #include "awk.h"
39 #include "y.tab.h"
40 
41 #define	FULLTAB	2	/* rehash when table gets this x full */
42 #define	GROWTAB 4	/* grow table by this factor */
43 
44 Array	*symtab;	/* main symbol table */
45 
46 uchar	**FS;		/* initial field sep */
47 uchar	**RS;		/* initial record sep */
48 uchar	**OFS;		/* output field sep */
49 uchar	**ORS;		/* output record sep */
50 uchar	**OFMT;		/* output format for numbers */
51 Awkfloat *NF;		/* number of fields in current record */
52 Awkfloat *NR;		/* number of current record */
53 Awkfloat *FNR;		/* number of current record in current file */
54 uchar	**FILENAME;	/* current filename argument */
55 Awkfloat *ARGC;		/* number of arguments from command line */
56 uchar	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
57 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
58 Awkfloat *RLENGTH;	/* length of same */
59 
60 Cell	*recloc;	/* location of record */
61 Cell	*nrloc;		/* NR */
62 Cell	*nfloc;		/* NF */
63 Cell	*fnrloc;	/* FNR */
64 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
65 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
66 Cell	*rstartloc;	/* RSTART */
67 Cell	*rlengthloc;	/* RLENGTH */
68 Cell	*symtabloc;	/* SYMTAB */
69 
70 Cell	*nullloc;
71 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
72 
73 static	void	rehash(Array *);
74 
75 void
76 syminit(void)
77 {
78 	Cell	*p;
79 
80 	init_buf(&recdata, &record_size, LINE_INCR);
81 	record = recdata;
82 
83 	p = getfld(0);
84 	/* initialize $0 */
85 	p->nval = (uchar*) "$0";
86 	p->sval = recdata;
87 	p->tval = REC|STR|DONTFREE;
88 
89 	symtab = makesymtab(NSYMTAB);
90 	(void) setsymtab((uchar *)"0", (uchar *)"0", 0.0,
91 	    NUM|STR|CON|DONTFREE, symtab);
92 	/* this is used for if(x)... tests: */
93 	nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0,
94 	    NUM|STR|CON|DONTFREE, symtab);
95 	nullnode = valtonode(nullloc, CCON);
96 	recloc = getfld(0);
97 	FS = &setsymtab((uchar *)"FS", (uchar *)" ", 0.0,
98 	    STR|DONTFREE, symtab)->sval;
99 	RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0,
100 	    STR|DONTFREE, symtab)->sval;
101 	OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0,
102 	    STR|DONTFREE, symtab)->sval;
103 	ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0,
104 	    STR|DONTFREE, symtab)->sval;
105 	OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0,
106 	    STR|DONTFREE, symtab)->sval;
107 	FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0,
108 	    STR|DONTFREE, symtab)->sval;
109 	nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab);
110 	NF = &nfloc->fval;
111 	nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab);
112 	NR = &nrloc->fval;
113 	fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab);
114 	FNR = &fnrloc->fval;
115 	SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0,
116 	    STR|DONTFREE, symtab)->sval;
117 	rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0,
118 	    NUM, symtab);
119 	RSTART = &rstartloc->fval;
120 	rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0,
121 	    NUM, symtab);
122 	RLENGTH = &rlengthloc->fval;
123 	symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab);
124 	symtabloc->sval = (uchar *)symtab;
125 }
126 
127 void
128 arginit(int ac, uchar *av[])
129 {
130 	Cell *cp;
131 	int i;
132 	uchar temp[11];
133 
134 	/* first make FILENAME first real argument */
135 	for (i = 1; i < ac; i++) {
136 		if (!isclvar(av[i])) {
137 			(void) setsval(lookup((uchar *)"FILENAME", symtab),
138 			    av[i]);
139 			break;
140 		}
141 	}
142 	ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac,
143 	    NUM, symtab)->fval;
144 	cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab);
145 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
146 	cp->sval = (uchar *) ARGVtab;
147 	for (i = 0; i < ac; i++) {
148 		(void) sprintf((char *)temp, "%d", i);
149 		if (is_number(*av)) {
150 			(void) setsymtab(temp, *av, atof((const char *)*av),
151 			    STR|NUM, ARGVtab);
152 		} else {
153 			(void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
154 		}
155 		av++;
156 	}
157 }
158 
159 void
160 envinit(uchar *envp[])
161 {
162 	Cell *cp;
163 	uchar *p;
164 
165 	cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab);
166 	ENVtab = makesymtab(NSYMTAB);
167 	cp->sval = (uchar *) ENVtab;
168 	for (; *envp; envp++) {
169 		if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL)
170 			continue;
171 		*p++ = 0;	/* split into two strings at = */
172 		if (is_number(p)) {
173 			(void) setsymtab(*envp, p, atof((const char *)p),
174 			    STR|NUM, ENVtab);
175 		} else {
176 			(void) setsymtab(*envp, p, 0.0, STR, ENVtab);
177 		}
178 		/* restore in case env is passed down to a shell */
179 		p[-1] = '=';
180 	}
181 }
182 
183 Array *
184 makesymtab(int n)
185 {
186 	Array *ap;
187 	Cell **tp;
188 
189 	ap = (Array *)malloc(sizeof (Array));
190 	tp = (Cell **)calloc(n, sizeof (Cell *));
191 	if (ap == NULL || tp == NULL)
192 		ERROR "out of space in makesymtab" FATAL;
193 	ap->nelem = 0;
194 	ap->size = n;
195 	ap->tab = tp;
196 	return (ap);
197 }
198 
199 void
200 freesymtab(Cell *ap)	/* free symbol table */
201 {
202 	Cell *cp, *next;
203 	Array *tp;
204 	int i;
205 
206 	if (!isarr(ap))
207 		return;
208 	/*LINTED align*/
209 	tp = (Array *)ap->sval;
210 	if (tp == NULL)
211 		return;
212 	for (i = 0; i < tp->size; i++) {
213 		for (cp = tp->tab[i]; cp != NULL; cp = next) {
214 			next = cp->cnext;
215 			xfree(cp->nval);
216 			if (freeable(cp))
217 				xfree(cp->sval);
218 			free(cp);
219 		}
220 	}
221 	free(tp->tab);
222 	free(tp);
223 }
224 
225 void
226 freeelem(Cell *ap, uchar *s)		/* free elem s from ap (i.e., ap["s"] */
227 {
228 	Array *tp;
229 	Cell *p, *prev = NULL;
230 	int h;
231 
232 	/*LINTED align*/
233 	tp = (Array *)ap->sval;
234 	h = hash(s, tp->size);
235 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
236 		if (strcmp((char *)s, (char *)p->nval) == 0) {
237 			if (prev == NULL)	/* 1st one */
238 				tp->tab[h] = p->cnext;
239 			else			/* middle somewhere */
240 				prev->cnext = p->cnext;
241 			if (freeable(p))
242 				xfree(p->sval);
243 			free(p->nval);
244 			free(p);
245 			tp->nelem--;
246 			return;
247 		}
248 }
249 
250 Cell *
251 setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned int t, Array *tp)
252 {
253 	register int h;
254 	register Cell *p;
255 
256 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
257 		dprintf(("setsymtab found %p: n=%s", (void *)p, p->nval));
258 		dprintf((" s=\"%s\" f=%g t=%p\n",
259 		    p->sval, p->fval, (void *)p->tval));
260 		return (p);
261 	}
262 	p = (Cell *)malloc(sizeof (Cell));
263 	if (p == NULL)
264 		ERROR "symbol table overflow at %s", n FATAL;
265 	p->nval = tostring(n);
266 	p->sval = s ? tostring(s) : tostring((uchar *)"");
267 	p->fval = f;
268 	p->tval = t;
269 	p->csub = 0;
270 
271 	tp->nelem++;
272 	if (tp->nelem > FULLTAB * tp->size)
273 		rehash(tp);
274 	h = hash(n, tp->size);
275 	p->cnext = tp->tab[h];
276 	tp->tab[h] = p;
277 	dprintf(("setsymtab set %p: n=%s", (void *)p, p->nval));
278 	dprintf((" s=\"%s\" f=%g t=%p\n", p->sval, p->fval, (void *)p->tval));
279 	return (p);
280 }
281 
282 int
283 hash(uchar *s, int n)	/* form hash value for string s */
284 {
285 	register unsigned hashval;
286 
287 	for (hashval = 0; *s != '\0'; s++)
288 		hashval = (*s + 31 * hashval);
289 	return (hashval % n);
290 }
291 
292 static void
293 rehash(Array *tp)	/* rehash items in small table into big one */
294 {
295 	int i, nh, nsz;
296 	Cell *cp, *op, **np;
297 
298 	nsz = GROWTAB * tp->size;
299 	np = (Cell **)calloc(nsz, sizeof (Cell *));
300 	if (np == NULL)
301 		ERROR "out of space in rehash" FATAL;
302 	for (i = 0; i < tp->size; i++) {
303 		for (cp = tp->tab[i]; cp; cp = op) {
304 			op = cp->cnext;
305 			nh = hash(cp->nval, nsz);
306 			cp->cnext = np[nh];
307 			np[nh] = cp;
308 		}
309 	}
310 	free(tp->tab);
311 	tp->tab = np;
312 	tp->size = nsz;
313 }
314 
315 Cell *
316 lookup(uchar *s, Array *tp)	/* look for s in tp */
317 {
318 	register Cell *p;
319 	int h;
320 
321 	h = hash(s, tp->size);
322 	for (p = tp->tab[h]; p != NULL; p = p->cnext) {
323 		if (strcmp((char *)s, (char *)p->nval) == 0)
324 			return (p);	/* found it */
325 	}
326 	return (NULL);			/* not found */
327 }
328 
329 Awkfloat
330 setfval(Cell *vp, Awkfloat f)
331 {
332 	int	i;
333 
334 	if ((vp->tval & (NUM | STR)) == 0)
335 		funnyvar(vp, "assign to");
336 	if (vp->tval & FLD) {
337 		donerec = 0;	/* mark $0 invalid */
338 		i = fldidx(vp);
339 		if (i > *NF)
340 			newfld(i);
341 		dprintf(("setting field %d to %g\n", i, f));
342 	} else if (vp->tval & REC) {
343 		donefld = 0;	/* mark $1... invalid */
344 		donerec = 1;
345 	}
346 	vp->tval &= ~STR;	/* mark string invalid */
347 	vp->tval |= NUM;	/* mark number ok */
348 	dprintf(("setfval %p: %s = %g, t=%p\n", (void *)vp,
349 	    vp->nval ? vp->nval : (unsigned char *)"NULL",
350 	    f, (void *)vp->tval));
351 	return (vp->fval = f);
352 }
353 
354 void
355 funnyvar(Cell *vp, char *rw)
356 {
357 	if (vp->tval & ARR)
358 		ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
359 	if (vp->tval & FCN)
360 		ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
361 	ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
362 	    vp, vp->nval, vp->sval, vp->fval, vp->tval CONT;
363 }
364 
365 uchar *
366 setsval(Cell *vp, uchar *s)
367 {
368 	int	i;
369 
370 	if ((vp->tval & (NUM | STR)) == 0)
371 		funnyvar(vp, "assign to");
372 	if (vp->tval & FLD) {
373 		donerec = 0;	/* mark $0 invalid */
374 		i = fldidx(vp);
375 		if (i > *NF)
376 			newfld(i);
377 		dprintf(("setting field %d to %s\n", i, s));
378 	} else if (vp->tval & REC) {
379 		donefld = 0;	/* mark $1... invalid */
380 		donerec = 1;
381 	}
382 	vp->tval &= ~NUM;
383 	vp->tval |= STR;
384 	if (freeable(vp))
385 		xfree(vp->sval);
386 	vp->tval &= ~DONTFREE;
387 	dprintf(("setsval %p: %s = \"%s\", t=%p\n",
388 	    (void *)vp,
389 	    vp->nval ? (char *)vp->nval : "",
390 	    s,
391 	    (void *)(vp->tval ? (char *)vp->tval : "")));
392 	return (vp->sval = tostring(s));
393 }
394 
395 Awkfloat
396 r_getfval(Cell *vp)
397 {
398 	if ((vp->tval & (NUM | STR)) == 0)
399 		funnyvar(vp, "read value of");
400 	if ((vp->tval & FLD) && donefld == 0)
401 		fldbld();
402 	else if ((vp->tval & REC) && donerec == 0)
403 		recbld();
404 	if (!isnum(vp)) {	/* not a number */
405 		vp->fval = atof((const char *)vp->sval);	/* best guess */
406 		if (is_number(vp->sval) && !(vp->tval&CON))
407 			vp->tval |= NUM;	/* make NUM only sparingly */
408 	}
409 	dprintf(("getfval %p: %s = %g, t=%p\n",
410 	    (void *)vp, vp->nval, vp->fval, (void *)vp->tval));
411 	return (vp->fval);
412 }
413 
414 uchar *
415 r_getsval(Cell *vp)
416 {
417 	uchar s[256];
418 
419 	if ((vp->tval & (NUM | STR)) == 0)
420 		funnyvar(vp, "read value of");
421 	if ((vp->tval & FLD) && donefld == 0)
422 		fldbld();
423 	else if ((vp->tval & REC) && donerec == 0)
424 		recbld();
425 	if ((vp->tval & STR) == 0) {
426 		if (!(vp->tval&DONTFREE))
427 			xfree(vp->sval);
428 		if ((long long)vp->fval == vp->fval) {
429 			(void) snprintf((char *)s, sizeof (s),
430 			    "%.20g", vp->fval);
431 		} else {
432 			/*LINTED*/
433 			(void) snprintf((char *)s, sizeof (s),
434 			    (char *)*OFMT, vp->fval);
435 		}
436 		vp->sval = tostring(s);
437 		vp->tval &= ~DONTFREE;
438 		vp->tval |= STR;
439 	}
440 	dprintf(("getsval %p: %s = \"%s\", t=%p\n",
441 	    (void *)vp,
442 	    vp->nval ? (char *)vp->nval : "",
443 	    vp->sval ? (char *)vp->sval : "",
444 	    (void *)vp->tval));
445 	return (vp->sval);
446 }
447 
448 uchar *
449 tostring(uchar *s)
450 {
451 	register uchar *p;
452 
453 	p = (uchar *)malloc(strlen((char *)s)+1);
454 	if (p == NULL)
455 		ERROR "out of space in tostring on %s", s FATAL;
456 	(void) strcpy((char *)p, (char *)s);
457 	return (p);
458 }
459 
460 uchar *
461 qstring(uchar *s, int delim)	/* collect string up to delim */
462 {
463 	uchar *cbuf, *ret;
464 	int c, n;
465 	size_t	cbufsz, cnt;
466 
467 	init_buf(&cbuf, &cbufsz, LINE_INCR);
468 
469 	for (cnt = 0; (c = *s) != delim; s++) {
470 		if (c == '\n') {
471 			ERROR "newline in string %.10s...", cbuf SYNTAX;
472 		} else if (c != '\\') {
473 			expand_buf(&cbuf, &cbufsz, cnt);
474 			cbuf[cnt++] = c;
475 		} else {	/* \something */
476 			expand_buf(&cbuf, &cbufsz, cnt);
477 			switch (c = *++s) {
478 			case '\\':	cbuf[cnt++] = '\\'; break;
479 			case 'n':	cbuf[cnt++] = '\n'; break;
480 			case 't':	cbuf[cnt++] = '\t'; break;
481 			case 'b':	cbuf[cnt++] = '\b'; break;
482 			case 'f':	cbuf[cnt++] = '\f'; break;
483 			case 'r':	cbuf[cnt++] = '\r'; break;
484 			default:
485 				if (!isdigit(c)) {
486 					cbuf[cnt++] = c;
487 					break;
488 				}
489 				n = c - '0';
490 				if (isdigit(s[1])) {
491 					n = 8 * n + *++s - '0';
492 					if (isdigit(s[1]))
493 						n = 8 * n + *++s - '0';
494 				}
495 				cbuf[cnt++] = n;
496 				break;
497 			}
498 		}
499 	}
500 	cbuf[cnt] = '\0';
501 	ret = tostring(cbuf);
502 	free(cbuf);
503 	return (ret);
504 }
505