1 /*
2    Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002.
3 
4    Sccsid @(#)tran.c	1.16 (gritter) 2/4/05>
5  */
6 /* UNIX(R) Regular Expression Tools
7 
8    Copyright (C) 2001 Caldera International, Inc.
9 
10    This program is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2 of the License, or
13    (at your option) any later version.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to:
22        Free Software Foundation, Inc.
23        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 */
25 /*		copyright	"%c%" 	*/
26 
27 /*	from unixsrc:usr/src/common/cmd/awk/tran.c /main/uw7_nj/1	*/
28 /*	from RCS Header: tran.c 1.2 91/06/25 	*/
29 
30 
31 #define	DEBUG
32 #include <stdio.h>
33 #include <ctype.h>
34 #include <string.h>
35 #include "awk.h"
36 #include "y.tab.h"
37 #include <pfmt.h>
38 
39 #undef	RS
40 
41 #define	FULLTAB	2	/* rehash when table gets this x full */
42 #define	GROWTAB 4	/* grow table by this factor */
43 
44 Array	*symtab;	/* main symbol table */
45 
46 unsigned char	**FS;		/* initial field sep */
47 unsigned char	**RS;		/* initial record sep */
48 unsigned char	**OFS;		/* output field sep */
49 unsigned char	**ORS;		/* output record sep */
50 unsigned char	**OFMT;		/* output format for numbers */
51 unsigned char	**CONVFMT;	/* generic format for numbers->strings */
52 Awkfloat *NF;		/* number of fields in current record */
53 Awkfloat *NR;		/* number of current record */
54 Awkfloat *FNR;		/* number of current record in current file */
55 unsigned char	**FILENAME;	/* current filename argument */
56 Awkfloat *ARGC;		/* number of arguments from command line */
57 unsigned char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
58 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
59 Awkfloat *RLENGTH;	/* length of same */
60 
61 Cell	*recloc;	/* location of record */
62 Cell	*nrloc;		/* NR */
63 Cell	*nfloc;		/* NF */
64 Cell	*fsloc;		/* FS */
65 Cell	*fnrloc;	/* FNR */
66 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
67 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
68 Cell	*rstartloc;	/* RSTART */
69 Cell	*rlengthloc;	/* RLENGTH */
70 Cell	*symtabloc;	/* SYMTAB */
71 
72 Cell	*nullloc;
73 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
74 
75 extern Cell **fldtab;
76 static int hash(register unsigned char *s, int n);
77 static void rehash(Array *tp);
78 
79 static	const	char
80 	assigntovid[] = ":80",
81 	assigntov[] = "assign to";
82 
83 const char
84 	readvofid[] = ":81",
85 	readvof[] = "read value of",
86 	outofspace[] = ":82:Out of space in %s",
87 	nlstring[] = ":83:Newline in string %.10s ...";
88 
syminit(void)89 void syminit(void)
90 {
91 	symtab = makesymtab(NSYMTAB);
92 	setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
93 	/* this is used for if(x)... tests: */
94 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
95 	nullnode = valtonode(nullloc, CCON);
96 	/* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */
97 	recloc = fldtab[0];
98 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
99 	FS = &fsloc->sval;
100 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
101 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
102 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
103 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
104 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
105 	FILENAME = &setsymtab("FILENAME", "-", 0.0, STR|DONTFREE, symtab)->sval;
106 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
107 	NF = &nfloc->fval;
108 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
109 	NR = &nrloc->fval;
110 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
111 	FNR = &fnrloc->fval;
112 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
113 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
114 	RSTART = &rstartloc->fval;
115 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
116 	RLENGTH = &rlengthloc->fval;
117 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
118 	symtabloc->sval = (unsigned char *) symtab;
119 }
120 
arginit(int ac,unsigned char ** av)121 void arginit(int ac, unsigned char **av)
122 {
123 	Cell *cp;
124 	int i;
125 	unsigned char temp[25];
126 
127 	for (i = 1; i < ac; i++)	/* first make FILENAME first real argument */
128 		if (!isclvar(av[i])) {
129 			setsval(lookup("FILENAME", symtab), av[i]);
130 			break;
131 		}
132 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
133 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
134 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
135 	cp->sval = (unsigned char *) ARGVtab;
136 	for (i = 0; i < ac; i++) {
137 		snprintf((char *)temp, sizeof temp, "%d", i);
138 		setsymtab(temp, *av, 0.0, STR|CANBENUM, ARGVtab);
139 		av++;
140 	}
141 }
142 
envinit(unsigned char ** envp)143 void envinit(unsigned char **envp)
144 {
145 	Cell *cp;
146 	unsigned char *p;
147 
148 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
149 	ENVtab = makesymtab(NSYMTAB);
150 	cp->sval = (unsigned char *) ENVtab;
151 	for ( ; *envp; envp++) {
152 		if ((p = (unsigned char *) strchr((char *) *envp, '=')) == NULL)	/* index() on bsd */
153 			continue;
154 		*p++ = 0;	/* split into two strings at = */
155 		setsymtab(*envp, p, 0.0, STR|CANBENUM, ENVtab);
156 		p[-1] = '=';	/* restore in case env is passed down to a shell */
157 	}
158 }
159 
makesymtab(int n)160 Array *makesymtab(int n)
161 {
162 	Array *ap;
163 	Cell **tp;
164 
165 	ap = (Array *) malloc(sizeof(Array));
166 	tp = (Cell **) calloc(n, sizeof(Cell *));
167 	if (ap == NULL || tp == NULL)
168 		error(MM_ERROR, outofspace, "makesymtab");
169 	ap->nelem = 0;
170 	ap->size = n;
171 	ap->tab = tp;
172 	return(ap);
173 }
174 
freesymtab(Cell * ap)175 void freesymtab(Cell *ap)	/* free symbol table */
176 {
177 	Cell *cp, *temp;
178 	Array *tp;
179 	int i;
180 
181 	if (!isarr(ap))
182 		return;
183 	tp = (Array *) ap->sval;
184 	if (tp == NULL)
185 		return;
186 	for (i = 0; i < tp->size; i++) {
187 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
188 			xfree(cp->nval);
189 			if (freeable(cp))
190 				xfree(cp->sval);
191 			temp = cp->cnext;	/* avoids freeing then using */
192 			free(cp);
193 		}
194 	}
195 	free(tp->tab);
196 	free(tp);
197 }
198 
freeelem(Cell * ap,unsigned char * s)199 void freeelem(Cell *ap, unsigned char *s)
200 	/* free elem s from ap (i.e., ap["s"] */
201 {
202 	Array *tp;
203 	Cell *p, *prev = NULL;
204 	int h;
205 
206 	tp = (Array *) ap->sval;
207 	h = hash(s, tp->size);
208 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
209 		if (strcmp((char *) s, (char *) p->nval) == 0) {
210 			if (prev == NULL)	/* 1st one */
211 				tp->tab[h] = p->cnext;
212 			else			/* middle somewhere */
213 				prev->cnext = p->cnext;
214 			if (freeable(p))
215 				xfree(p->sval);
216 			free(p->nval);
217 			free(p);
218 			tp->nelem--;
219 			return;
220 		}
221 }
222 
ssetsymtab(unsigned char * n,unsigned char * s,Awkfloat f,unsigned t,Array * tp)223 Cell *ssetsymtab(unsigned char *n, unsigned char *s, Awkfloat f,
224 		unsigned t, Array *tp)
225 {
226 	register int h;
227 	register Cell *p;
228 
229 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
230 		dprintf( ("setsymtab found %lo: n=%s", (long)p, p->nval) );
231 		dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval? p->sval : tostring(""), p->fval, p->tval) );
232 		return(p);
233 	}
234 	p = (Cell *) malloc(sizeof(Cell));
235 	if (p == NULL)
236 		error(MM_ERROR, ":84:Symbol table overflow at %s", n);
237 	p->nval = tostring(n);
238 	p->sval = s ? tostring(s) : tostring("");
239 	p->fval = f;
240 	p->tval = t & ~CANBENUM;
241 	p->csub = 0;
242 	if (t & CANBENUM)
243 		(void)is2number(0, p);
244 	tp->nelem++;
245 	if (tp->nelem > FULLTAB * tp->size)
246 		rehash(tp);
247 	h = hash(n, tp->size);
248 	p->cnext = tp->tab[h];
249 	tp->tab[h] = p;
250 	dprintf( ("setsymtab set %lo: n=%s", (long)p, p->nval) );
251 	dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval? p->sval : tostring(""), p->fval, p->tval) );
252 	return(p);
253 }
254 
hash(register unsigned char * s,int n)255 static int hash(register unsigned char *s, int n)
256 	/* form hash value for string s */
257 {
258 	register unsigned hashval;
259 
260 	for (hashval = 0; *s != '\0'; s++)
261 		hashval = (*s + 31 * hashval);
262 	return hashval % n;
263 }
264 
rehash(Array * tp)265 static void rehash(Array *tp)	/* rehash items in small table into big one */
266 {
267 	int i, nh, nsz;
268 	Cell *cp, *op, **np;
269 
270 	nsz = GROWTAB * tp->size;
271 	np = (Cell **) calloc(nsz, sizeof(Cell *));
272 	if (np == NULL)
273 		error(MM_ERROR, outofspace, "rehash");
274 	for (i = 0; i < tp->size; i++) {
275 		for (cp = tp->tab[i]; cp; cp = op) {
276 			op = cp->cnext;
277 			nh = hash(cp->nval, nsz);
278 			cp->cnext = np[nh];
279 			np[nh] = cp;
280 		}
281 	}
282 	free(tp->tab);
283 	tp->tab = np;
284 	tp->size = nsz;
285 }
286 
slookup(register unsigned char * s,Array * tp)287 Cell *slookup(register unsigned char *s, Array *tp)	/* look for s in tp */
288 {
289 	register Cell *p, *prev = NULL;
290 	int h;
291 
292 	h = hash(s, tp->size);
293 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
294 		if (strcmp((char *) s, (char *) p->nval) == 0)
295 			return(p);	/* found it */
296 	return(NULL);			/* not found */
297 }
298 
setfval(register Cell * vp,Awkfloat f)299 Awkfloat setfval(register Cell *vp, Awkfloat f)
300 {
301 	if ((vp->tval & (NUM | STR)) == 0)
302 		funnyvar(vp, (char *)gettxt(assigntovid, assigntov));
303 	if (vp->tval & FLD) {
304 		int n;
305 		donerec = 0;	/* mark $0 invalid */
306 		for (n = 0; vp != fldtab[n]; n++);
307 		if (n > *NF)
308 			newfld(n);
309 		dprintf( ("setting field %d to %g\n", n, f) );
310 	} else if (vp->tval & REC) {
311 		donefld = 0;	/* mark $1... invalid */
312 		donerec = 1;
313 	}
314 	vp->tval &= ~STR;	/* mark string invalid */
315 	vp->tval |= NUM;	/* mark number ok */
316 	dprintf( ("setfval %lo: %s = %g, t=%o\n", (long)vp, vp->nval ? vp->nval : tostring(""), f, vp->tval) );
317 	return vp->fval = f;
318 }
319 
funnyvar(Cell * vp,char * rw)320 void funnyvar(Cell *vp, char *rw)
321 {
322 	if (vp->tval & ARR)
323 		error(MM_ERROR, ":85:Cannot %s %s; it's an array name.",
324 			rw, vp->nval);
325 	if (vp->tval & FCN)
326 		error(MM_ERROR, ":86:Cannot %s %s; it's a function.",
327 			rw, vp->nval);
328 	error(MM_ERROR, ":87:Funny variable %o: n=%s s=\"%s\" f=%g t=%o",
329 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
330 }
331 
setsval(register Cell * vp,unsigned char * s)332 unsigned char *setsval(register Cell *vp, unsigned char *s)
333 {
334 	if ((vp->tval & (NUM | STR)) == 0)
335 		funnyvar(vp, (char *)gettxt(assigntovid, assigntov));
336 	if (vp->tval & FLD) {
337 		int n;
338 		donerec = 0;	/* mark $0 invalid */
339 		for (n = 0; vp != fldtab[n]; n++);
340 		if (n > *NF)
341 			newfld(n);
342 		dprintf( ("setting field %d to %s\n", n, s) );
343 	} else if (vp->tval & REC) {
344 		donefld = 0;	/* mark $1... invalid */
345 		donerec = 1;
346 	} else if (vp == fsloc && donefld == 0) {
347 		/*
348 		* Because POSIX.2 requires that awk act as if it always
349 		* splits the current input line immediately after reading,
350 		* we force it to be split into fields just before a change
351 		* to FS if we haven't needed to do so yet.
352 		*/
353 		fldbld();
354 	}
355 	vp->tval &= ~NUM;
356 	vp->tval |= STR;
357 	s = tostring(s); /* moved to here since "s" can be "vp->sval" */
358 	if (freeable(vp))
359 		xfree(vp->sval);
360 	if (vp->tval & REC) {
361 		/*
362 		 * Make sure that recsize is large enough to build
363 		 * fields afterwards.
364 		 */
365 		unsigned char *os = s;
366 
367 		s = makerec(s, strlen((char *)s) + 1);
368 		free(os);
369 	} else
370 		vp->tval &= ~DONTFREE;
371 	dprintf( ("setsval %lo: %s = \"%s\", t=%o\n", (long)vp, vp->nval, s, vp->tval) );
372 	return(vp->sval = s);
373 }
374 
r_getfval(register Cell * vp)375 Awkfloat r_getfval(register Cell *vp)
376 {
377 	/* if (vp->tval & ARR)
378 		ERROR "Illegal reference to array %s", vp->nval FATAL;
379 		return 0.0; */
380 	if ((vp->tval & (NUM | STR)) == 0)
381 		funnyvar(vp, (char *)gettxt(readvofid, readvof));
382 	if ((vp->tval & FLD) && donefld == 0)
383 		fldbld();
384 	else if ((vp->tval & REC) && donerec == 0)
385 		recbld();
386 	if (!isnum(vp)) {	/* not marked as a number */
387 		vp->fval = awk_atof((char *)vp->sval);	/* best guess */
388 		if (is2number(vp->sval, 0) && !(vp->tval&CON))
389 			vp->tval |= NUM;	/* make NUM only sparingly */
390 	}
391 	dprintf( ("getfval %lo: %s = %g, t=%o\n", (long)vp, vp->nval, vp->fval, vp->tval) );
392 	return(vp->fval);
393 }
394 
r_getsval(register Cell * vp)395 unsigned char *r_getsval(register Cell *vp)
396 {
397 	unsigned char s[100];
398 
399 	/* if (vp->tval & ARR)
400 		ERROR "Illegal reference to array %s",
401 			vp->nval FATAL;
402 		return ""; */
403 	if ((vp->tval & (NUM | STR)) == 0)
404 		funnyvar(vp, (char *)gettxt(readvofid, readvof));
405 	if ((vp->tval & FLD) && donefld == 0)
406 		fldbld();
407 	else if ((vp->tval & REC) && donerec == 0)
408 		recbld();
409 	if ((vp->tval & STR) == 0) {
410 		if (!(vp->tval&DONTFREE))
411 			xfree(vp->sval);
412 		if ((long)vp->fval == vp->fval) {
413 			snprintf((char *)s, sizeof s, "%ld", (long)vp->fval);
414 			vp->tval |= STR;
415 		} else {
416 			snprintf((char *)s, sizeof s,
417 					(char *)(posix ? *CONVFMT : *OFMT),
418 					vp->fval);
419 			/*
420 			* In case CONVFMT is changed by the program,
421 			* we leave the string value uncached for non-
422 			* integer numeric constants.  Ugh.
423 			*/
424 			if (!(vp->tval & CON))
425 				vp->tval |= STR;
426 		}
427 		vp->sval = tostring(s);
428 		vp->tval &= ~DONTFREE;
429 	}
430 	dprintf( ("getsval %lo: %s = \"%s\", t=%o\n", (long)vp, vp->nval ? vp->nval : tostring(""), vp->sval ? vp->sval : tostring(""), vp->tval) );
431 	return(vp->sval);
432 }
433 
stostring(register const unsigned char * s)434 unsigned char *stostring(register const unsigned char *s)
435 {
436 	register unsigned char *p;
437 
438 	p = malloc(strlen((char *) s)+1);
439 	if (p == NULL)
440 		error(MM_ERROR, ":88:Out of space in tostring on %s", s);
441 	strcpy((char *) p, (char *) s);
442 	return(p);
443 }
444 
qstring(unsigned char * s,int delim)445 unsigned char *qstring(unsigned char *s, int delim)
446 	/* collect string up to delim */
447 {
448 	unsigned char *q;
449 	int c, n;
450 
451 	for (q = cbuf; (c = *s) != delim; s++) {
452 		if (q >= cbuf + CBUFLEN - 1)
453 			vyyerror(":89:String %.10s ... too long", cbuf);
454 		else if (c == '\n')
455 			vyyerror(nlstring, cbuf);
456 		else if (c != '\\')
457 			*q++ = c;
458 		else	/* \something */
459 			switch (c = *++s) {
460 			case '\\':	*q++ = '\\'; break;
461 			case 'n':	*q++ = '\n'; break;
462 			case 't':	*q++ = '\t'; break;
463 			case 'b':	*q++ = '\b'; break;
464 			case 'f':	*q++ = '\f'; break;
465 			case 'r':	*q++ = '\r'; break;
466 			default:
467 				if (!isdigit(c)) {
468 					*q++ = c;
469 					break;
470 				}
471 				n = c - '0';
472 				if (isdigit(s[1])) {
473 					n = 8 * n + *++s - '0';
474 					if (isdigit(s[1]))
475 						n = 8 * n + *++s - '0';
476 				}
477 				*q++ = n;
478 				break;
479 			}
480 	}
481 	*q = '\0';
482 	return cbuf;
483 }
484