1 /*
2 Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002.
3
4 Sccsid @(#)tran.c 1.16 (gritter) 2/4/05>
5 */
6 /* UNIX(R) Regular Expression Tools
7
8 Copyright (C) 2001 Caldera International, Inc.
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to:
22 Free Software Foundation, Inc.
23 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25 /* copyright "%c%" */
26
27 /* from unixsrc:usr/src/common/cmd/awk/tran.c /main/uw7_nj/1 */
28 /* from RCS Header: tran.c 1.2 91/06/25 */
29
30
31 #define DEBUG
32 #include <stdio.h>
33 #include <ctype.h>
34 #include <string.h>
35 #include "awk.h"
36 #include "y.tab.h"
37 #include <pfmt.h>
38
39 #undef RS
40
41 #define FULLTAB 2 /* rehash when table gets this x full */
42 #define GROWTAB 4 /* grow table by this factor */
43
44 Array *symtab; /* main symbol table */
45
46 unsigned char **FS; /* initial field sep */
47 unsigned char **RS; /* initial record sep */
48 unsigned char **OFS; /* output field sep */
49 unsigned char **ORS; /* output record sep */
50 unsigned char **OFMT; /* output format for numbers */
51 unsigned char **CONVFMT; /* generic format for numbers->strings */
52 Awkfloat *NF; /* number of fields in current record */
53 Awkfloat *NR; /* number of current record */
54 Awkfloat *FNR; /* number of current record in current file */
55 unsigned char **FILENAME; /* current filename argument */
56 Awkfloat *ARGC; /* number of arguments from command line */
57 unsigned char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */
58 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */
59 Awkfloat *RLENGTH; /* length of same */
60
61 Cell *recloc; /* location of record */
62 Cell *nrloc; /* NR */
63 Cell *nfloc; /* NF */
64 Cell *fsloc; /* FS */
65 Cell *fnrloc; /* FNR */
66 Array *ARGVtab; /* symbol table containing ARGV[...] */
67 Array *ENVtab; /* symbol table containing ENVIRON[...] */
68 Cell *rstartloc; /* RSTART */
69 Cell *rlengthloc; /* RLENGTH */
70 Cell *symtabloc; /* SYMTAB */
71
72 Cell *nullloc;
73 Node *nullnode; /* zero&null, converted into a node for comparisons */
74
75 extern Cell **fldtab;
76 static int hash(register unsigned char *s, int n);
77 static void rehash(Array *tp);
78
79 static const char
80 assigntovid[] = ":80",
81 assigntov[] = "assign to";
82
83 const char
84 readvofid[] = ":81",
85 readvof[] = "read value of",
86 outofspace[] = ":82:Out of space in %s",
87 nlstring[] = ":83:Newline in string %.10s ...";
88
syminit(void)89 void syminit(void)
90 {
91 symtab = makesymtab(NSYMTAB);
92 setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
93 /* this is used for if(x)... tests: */
94 nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
95 nullnode = valtonode(nullloc, CCON);
96 /* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */
97 recloc = fldtab[0];
98 fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
99 FS = &fsloc->sval;
100 RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
101 OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
102 ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
103 OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
104 CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
105 FILENAME = &setsymtab("FILENAME", "-", 0.0, STR|DONTFREE, symtab)->sval;
106 nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
107 NF = &nfloc->fval;
108 nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
109 NR = &nrloc->fval;
110 fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
111 FNR = &fnrloc->fval;
112 SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
113 rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
114 RSTART = &rstartloc->fval;
115 rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
116 RLENGTH = &rlengthloc->fval;
117 symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
118 symtabloc->sval = (unsigned char *) symtab;
119 }
120
arginit(int ac,unsigned char ** av)121 void arginit(int ac, unsigned char **av)
122 {
123 Cell *cp;
124 int i;
125 unsigned char temp[25];
126
127 for (i = 1; i < ac; i++) /* first make FILENAME first real argument */
128 if (!isclvar(av[i])) {
129 setsval(lookup("FILENAME", symtab), av[i]);
130 break;
131 }
132 ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
133 cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
134 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */
135 cp->sval = (unsigned char *) ARGVtab;
136 for (i = 0; i < ac; i++) {
137 snprintf((char *)temp, sizeof temp, "%d", i);
138 setsymtab(temp, *av, 0.0, STR|CANBENUM, ARGVtab);
139 av++;
140 }
141 }
142
envinit(unsigned char ** envp)143 void envinit(unsigned char **envp)
144 {
145 Cell *cp;
146 unsigned char *p;
147
148 cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
149 ENVtab = makesymtab(NSYMTAB);
150 cp->sval = (unsigned char *) ENVtab;
151 for ( ; *envp; envp++) {
152 if ((p = (unsigned char *) strchr((char *) *envp, '=')) == NULL) /* index() on bsd */
153 continue;
154 *p++ = 0; /* split into two strings at = */
155 setsymtab(*envp, p, 0.0, STR|CANBENUM, ENVtab);
156 p[-1] = '='; /* restore in case env is passed down to a shell */
157 }
158 }
159
makesymtab(int n)160 Array *makesymtab(int n)
161 {
162 Array *ap;
163 Cell **tp;
164
165 ap = (Array *) malloc(sizeof(Array));
166 tp = (Cell **) calloc(n, sizeof(Cell *));
167 if (ap == NULL || tp == NULL)
168 error(MM_ERROR, outofspace, "makesymtab");
169 ap->nelem = 0;
170 ap->size = n;
171 ap->tab = tp;
172 return(ap);
173 }
174
freesymtab(Cell * ap)175 void freesymtab(Cell *ap) /* free symbol table */
176 {
177 Cell *cp, *temp;
178 Array *tp;
179 int i;
180
181 if (!isarr(ap))
182 return;
183 tp = (Array *) ap->sval;
184 if (tp == NULL)
185 return;
186 for (i = 0; i < tp->size; i++) {
187 for (cp = tp->tab[i]; cp != NULL; cp = temp) {
188 xfree(cp->nval);
189 if (freeable(cp))
190 xfree(cp->sval);
191 temp = cp->cnext; /* avoids freeing then using */
192 free(cp);
193 }
194 }
195 free(tp->tab);
196 free(tp);
197 }
198
freeelem(Cell * ap,unsigned char * s)199 void freeelem(Cell *ap, unsigned char *s)
200 /* free elem s from ap (i.e., ap["s"] */
201 {
202 Array *tp;
203 Cell *p, *prev = NULL;
204 int h;
205
206 tp = (Array *) ap->sval;
207 h = hash(s, tp->size);
208 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
209 if (strcmp((char *) s, (char *) p->nval) == 0) {
210 if (prev == NULL) /* 1st one */
211 tp->tab[h] = p->cnext;
212 else /* middle somewhere */
213 prev->cnext = p->cnext;
214 if (freeable(p))
215 xfree(p->sval);
216 free(p->nval);
217 free(p);
218 tp->nelem--;
219 return;
220 }
221 }
222
ssetsymtab(unsigned char * n,unsigned char * s,Awkfloat f,unsigned t,Array * tp)223 Cell *ssetsymtab(unsigned char *n, unsigned char *s, Awkfloat f,
224 unsigned t, Array *tp)
225 {
226 register int h;
227 register Cell *p;
228
229 if (n != NULL && (p = lookup(n, tp)) != NULL) {
230 dprintf( ("setsymtab found %lo: n=%s", (long)p, p->nval) );
231 dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval? p->sval : tostring(""), p->fval, p->tval) );
232 return(p);
233 }
234 p = (Cell *) malloc(sizeof(Cell));
235 if (p == NULL)
236 error(MM_ERROR, ":84:Symbol table overflow at %s", n);
237 p->nval = tostring(n);
238 p->sval = s ? tostring(s) : tostring("");
239 p->fval = f;
240 p->tval = t & ~CANBENUM;
241 p->csub = 0;
242 if (t & CANBENUM)
243 (void)is2number(0, p);
244 tp->nelem++;
245 if (tp->nelem > FULLTAB * tp->size)
246 rehash(tp);
247 h = hash(n, tp->size);
248 p->cnext = tp->tab[h];
249 tp->tab[h] = p;
250 dprintf( ("setsymtab set %lo: n=%s", (long)p, p->nval) );
251 dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval? p->sval : tostring(""), p->fval, p->tval) );
252 return(p);
253 }
254
hash(register unsigned char * s,int n)255 static int hash(register unsigned char *s, int n)
256 /* form hash value for string s */
257 {
258 register unsigned hashval;
259
260 for (hashval = 0; *s != '\0'; s++)
261 hashval = (*s + 31 * hashval);
262 return hashval % n;
263 }
264
rehash(Array * tp)265 static void rehash(Array *tp) /* rehash items in small table into big one */
266 {
267 int i, nh, nsz;
268 Cell *cp, *op, **np;
269
270 nsz = GROWTAB * tp->size;
271 np = (Cell **) calloc(nsz, sizeof(Cell *));
272 if (np == NULL)
273 error(MM_ERROR, outofspace, "rehash");
274 for (i = 0; i < tp->size; i++) {
275 for (cp = tp->tab[i]; cp; cp = op) {
276 op = cp->cnext;
277 nh = hash(cp->nval, nsz);
278 cp->cnext = np[nh];
279 np[nh] = cp;
280 }
281 }
282 free(tp->tab);
283 tp->tab = np;
284 tp->size = nsz;
285 }
286
slookup(register unsigned char * s,Array * tp)287 Cell *slookup(register unsigned char *s, Array *tp) /* look for s in tp */
288 {
289 register Cell *p, *prev = NULL;
290 int h;
291
292 h = hash(s, tp->size);
293 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
294 if (strcmp((char *) s, (char *) p->nval) == 0)
295 return(p); /* found it */
296 return(NULL); /* not found */
297 }
298
setfval(register Cell * vp,Awkfloat f)299 Awkfloat setfval(register Cell *vp, Awkfloat f)
300 {
301 if ((vp->tval & (NUM | STR)) == 0)
302 funnyvar(vp, (char *)gettxt(assigntovid, assigntov));
303 if (vp->tval & FLD) {
304 int n;
305 donerec = 0; /* mark $0 invalid */
306 for (n = 0; vp != fldtab[n]; n++);
307 if (n > *NF)
308 newfld(n);
309 dprintf( ("setting field %d to %g\n", n, f) );
310 } else if (vp->tval & REC) {
311 donefld = 0; /* mark $1... invalid */
312 donerec = 1;
313 }
314 vp->tval &= ~STR; /* mark string invalid */
315 vp->tval |= NUM; /* mark number ok */
316 dprintf( ("setfval %lo: %s = %g, t=%o\n", (long)vp, vp->nval ? vp->nval : tostring(""), f, vp->tval) );
317 return vp->fval = f;
318 }
319
funnyvar(Cell * vp,char * rw)320 void funnyvar(Cell *vp, char *rw)
321 {
322 if (vp->tval & ARR)
323 error(MM_ERROR, ":85:Cannot %s %s; it's an array name.",
324 rw, vp->nval);
325 if (vp->tval & FCN)
326 error(MM_ERROR, ":86:Cannot %s %s; it's a function.",
327 rw, vp->nval);
328 error(MM_ERROR, ":87:Funny variable %o: n=%s s=\"%s\" f=%g t=%o",
329 vp, vp->nval, vp->sval, vp->fval, vp->tval);
330 }
331
setsval(register Cell * vp,unsigned char * s)332 unsigned char *setsval(register Cell *vp, unsigned char *s)
333 {
334 if ((vp->tval & (NUM | STR)) == 0)
335 funnyvar(vp, (char *)gettxt(assigntovid, assigntov));
336 if (vp->tval & FLD) {
337 int n;
338 donerec = 0; /* mark $0 invalid */
339 for (n = 0; vp != fldtab[n]; n++);
340 if (n > *NF)
341 newfld(n);
342 dprintf( ("setting field %d to %s\n", n, s) );
343 } else if (vp->tval & REC) {
344 donefld = 0; /* mark $1... invalid */
345 donerec = 1;
346 } else if (vp == fsloc && donefld == 0) {
347 /*
348 * Because POSIX.2 requires that awk act as if it always
349 * splits the current input line immediately after reading,
350 * we force it to be split into fields just before a change
351 * to FS if we haven't needed to do so yet.
352 */
353 fldbld();
354 }
355 vp->tval &= ~NUM;
356 vp->tval |= STR;
357 s = tostring(s); /* moved to here since "s" can be "vp->sval" */
358 if (freeable(vp))
359 xfree(vp->sval);
360 if (vp->tval & REC) {
361 /*
362 * Make sure that recsize is large enough to build
363 * fields afterwards.
364 */
365 unsigned char *os = s;
366
367 s = makerec(s, strlen((char *)s) + 1);
368 free(os);
369 } else
370 vp->tval &= ~DONTFREE;
371 dprintf( ("setsval %lo: %s = \"%s\", t=%o\n", (long)vp, vp->nval, s, vp->tval) );
372 return(vp->sval = s);
373 }
374
r_getfval(register Cell * vp)375 Awkfloat r_getfval(register Cell *vp)
376 {
377 /* if (vp->tval & ARR)
378 ERROR "Illegal reference to array %s", vp->nval FATAL;
379 return 0.0; */
380 if ((vp->tval & (NUM | STR)) == 0)
381 funnyvar(vp, (char *)gettxt(readvofid, readvof));
382 if ((vp->tval & FLD) && donefld == 0)
383 fldbld();
384 else if ((vp->tval & REC) && donerec == 0)
385 recbld();
386 if (!isnum(vp)) { /* not marked as a number */
387 vp->fval = awk_atof((char *)vp->sval); /* best guess */
388 if (is2number(vp->sval, 0) && !(vp->tval&CON))
389 vp->tval |= NUM; /* make NUM only sparingly */
390 }
391 dprintf( ("getfval %lo: %s = %g, t=%o\n", (long)vp, vp->nval, vp->fval, vp->tval) );
392 return(vp->fval);
393 }
394
r_getsval(register Cell * vp)395 unsigned char *r_getsval(register Cell *vp)
396 {
397 unsigned char s[100];
398
399 /* if (vp->tval & ARR)
400 ERROR "Illegal reference to array %s",
401 vp->nval FATAL;
402 return ""; */
403 if ((vp->tval & (NUM | STR)) == 0)
404 funnyvar(vp, (char *)gettxt(readvofid, readvof));
405 if ((vp->tval & FLD) && donefld == 0)
406 fldbld();
407 else if ((vp->tval & REC) && donerec == 0)
408 recbld();
409 if ((vp->tval & STR) == 0) {
410 if (!(vp->tval&DONTFREE))
411 xfree(vp->sval);
412 if ((long)vp->fval == vp->fval) {
413 snprintf((char *)s, sizeof s, "%ld", (long)vp->fval);
414 vp->tval |= STR;
415 } else {
416 snprintf((char *)s, sizeof s,
417 (char *)(posix ? *CONVFMT : *OFMT),
418 vp->fval);
419 /*
420 * In case CONVFMT is changed by the program,
421 * we leave the string value uncached for non-
422 * integer numeric constants. Ugh.
423 */
424 if (!(vp->tval & CON))
425 vp->tval |= STR;
426 }
427 vp->sval = tostring(s);
428 vp->tval &= ~DONTFREE;
429 }
430 dprintf( ("getsval %lo: %s = \"%s\", t=%o\n", (long)vp, vp->nval ? vp->nval : tostring(""), vp->sval ? vp->sval : tostring(""), vp->tval) );
431 return(vp->sval);
432 }
433
stostring(register const unsigned char * s)434 unsigned char *stostring(register const unsigned char *s)
435 {
436 register unsigned char *p;
437
438 p = malloc(strlen((char *) s)+1);
439 if (p == NULL)
440 error(MM_ERROR, ":88:Out of space in tostring on %s", s);
441 strcpy((char *) p, (char *) s);
442 return(p);
443 }
444
qstring(unsigned char * s,int delim)445 unsigned char *qstring(unsigned char *s, int delim)
446 /* collect string up to delim */
447 {
448 unsigned char *q;
449 int c, n;
450
451 for (q = cbuf; (c = *s) != delim; s++) {
452 if (q >= cbuf + CBUFLEN - 1)
453 vyyerror(":89:String %.10s ... too long", cbuf);
454 else if (c == '\n')
455 vyyerror(nlstring, cbuf);
456 else if (c != '\\')
457 *q++ = c;
458 else /* \something */
459 switch (c = *++s) {
460 case '\\': *q++ = '\\'; break;
461 case 'n': *q++ = '\n'; break;
462 case 't': *q++ = '\t'; break;
463 case 'b': *q++ = '\b'; break;
464 case 'f': *q++ = '\f'; break;
465 case 'r': *q++ = '\r'; break;
466 default:
467 if (!isdigit(c)) {
468 *q++ = c;
469 break;
470 }
471 n = c - '0';
472 if (isdigit(s[1])) {
473 n = 8 * n + *++s - '0';
474 if (isdigit(s[1]))
475 n = 8 * n + *++s - '0';
476 }
477 *q++ = n;
478 break;
479 }
480 }
481 *q = '\0';
482 return cbuf;
483 }
484