1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24
25 #define DEBUG
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <wctype.h>
30 #include <fcntl.h>
31 #include <setjmp.h>
32 #include <limits.h>
33 #include <math.h>
34 #include <string.h>
35 #include <stdlib.h>
36 #include <time.h>
37 #include <sys/types.h>
38 #include <sys/wait.h>
39 #include "awk.h"
40 #include "awkgram.tab.h"
41
42
43 static void stdinit(void);
44 static void flush_all(void);
45 static char *wide_char_to_byte_str(int rune, size_t *outlen);
46
47 #if 1
48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
49 #else
tempfree(Cell * p)50 void tempfree(Cell *p) {
51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
52 WARNING("bad csub %d in Cell %d %s",
53 p->csub, p->ctype, p->sval);
54 }
55 if (istemp(p))
56 tfree(p);
57 }
58 #endif
59
60 /* do we really need these? */
61 /* #ifdef _NFILE */
62 /* #ifndef FOPEN_MAX */
63 /* #define FOPEN_MAX _NFILE */
64 /* #endif */
65 /* #endif */
66 /* */
67 /* #ifndef FOPEN_MAX */
68 /* #define FOPEN_MAX 40 */ /* max number of open files */
69 /* #endif */
70 /* */
71 /* #ifndef RAND_MAX */
72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
73 /* #endif */
74
75 jmp_buf env;
76 extern int pairstack[];
77 extern Awkfloat srand_seed;
78
79 Node *winner = NULL; /* root of parse tree */
80 Cell *tmps; /* free temporary cells for execution */
81
82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
83 Cell *True = &truecell;
84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
85 Cell *False = &falsecell;
86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
87 Cell *jbreak = &breakcell;
88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
89 Cell *jcont = &contcell;
90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
91 Cell *jnext = &nextcell;
92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
93 Cell *jnextfile = &nextfilecell;
94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
95 Cell *jexit = &exitcell;
96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
97 Cell *jret = &retcell;
98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
99
100 Node *curnode = NULL; /* the node being executed, for debugging */
101
102 /* buffer memory management */
adjbuf(char ** pbuf,int * psiz,int minlen,int quantum,char ** pbptr,const char * whatrtn)103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
104 const char *whatrtn)
105 /* pbuf: address of pointer to buffer being managed
106 * psiz: address of buffer size variable
107 * minlen: minimum length of buffer needed
108 * quantum: buffer size quantum
109 * pbptr: address of movable pointer into buffer, or 0 if none
110 * whatrtn: name of the calling routine if failure should cause fatal error
111 *
112 * return 0 for realloc failure, !=0 for success
113 */
114 {
115 if (minlen > *psiz) {
116 char *tbuf;
117 int rminlen = quantum ? minlen % quantum : 0;
118 int boff = pbptr ? *pbptr - *pbuf : 0;
119 /* round up to next multiple of quantum */
120 if (rminlen)
121 minlen += quantum - rminlen;
122 tbuf = (char *) realloc(*pbuf, minlen);
123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
124 if (tbuf == NULL) {
125 if (whatrtn)
126 FATAL("out of memory in %s", whatrtn);
127 return 0;
128 }
129 *pbuf = tbuf;
130 *psiz = minlen;
131 if (pbptr)
132 *pbptr = tbuf + boff;
133 }
134 return 1;
135 }
136
run(Node * a)137 void run(Node *a) /* execution of parse tree starts here */
138 {
139
140 stdinit();
141 execute(a);
142 closeall();
143 }
144
execute(Node * u)145 Cell *execute(Node *u) /* execute a node of the parse tree */
146 {
147 Cell *(*proc)(Node **, int);
148 Cell *x;
149 Node *a;
150
151 if (u == NULL)
152 return(True);
153 for (a = u; ; a = a->nnext) {
154 curnode = a;
155 if (isvalue(a)) {
156 x = (Cell *) (a->narg[0]);
157 if (isfld(x) && !donefld)
158 fldbld();
159 else if (isrec(x) && !donerec)
160 recbld();
161 return(x);
162 }
163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
164 FATAL("illegal statement");
165 proc = proctab[a->nobj-FIRSTTOKEN];
166 x = (*proc)(a->narg, a->nobj);
167 if (isfld(x) && !donefld)
168 fldbld();
169 else if (isrec(x) && !donerec)
170 recbld();
171 if (isexpr(a))
172 return(x);
173 if (isjump(x))
174 return(x);
175 if (a->nnext == NULL)
176 return(x);
177 tempfree(x);
178 }
179 }
180
181
program(Node ** a,int n)182 Cell *program(Node **a, int n) /* execute an awk program */
183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
184 Cell *x;
185
186 if (setjmp(env) != 0)
187 goto ex;
188 if (a[0]) { /* BEGIN */
189 x = execute(a[0]);
190 if (isexit(x))
191 return(True);
192 if (isjump(x))
193 FATAL("illegal break, continue, next or nextfile from BEGIN");
194 tempfree(x);
195 }
196 if (a[1] || a[2])
197 while (getrec(&record, &recsize, true) > 0) {
198 x = execute(a[1]);
199 if (isexit(x))
200 break;
201 tempfree(x);
202 }
203 ex:
204 if (setjmp(env) != 0) /* handles exit within END */
205 goto ex1;
206 if (a[2]) { /* END */
207 x = execute(a[2]);
208 if (isbreak(x) || isnext(x) || iscont(x))
209 FATAL("illegal break, continue, next or nextfile from END");
210 tempfree(x);
211 }
212 ex1:
213 return(True);
214 }
215
216 struct Frame { /* stack frame for awk function calls */
217 int nargs; /* number of arguments in this call */
218 Cell *fcncell; /* pointer to Cell for function */
219 Cell **args; /* pointer to array of arguments after execute */
220 Cell *retval; /* return value */
221 };
222
223 #define NARGS 50 /* max args in a call */
224
225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
226 int nframe = 0; /* number of frames allocated */
227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */
228
call(Node ** a,int n)229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
230 {
231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
232 int i, ncall, ndef;
233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 Node *x;
235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
236 Cell *y, *z, *fcn;
237 char *s;
238
239 fcn = execute(a[0]); /* the function itself */
240 s = fcn->nval;
241 if (!isfcn(fcn))
242 FATAL("calling undefined function %s", s);
243 if (frame == NULL) {
244 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
245 if (frame == NULL)
246 FATAL("out of space for stack frames calling %s", s);
247 }
248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
249 ncall++;
250 ndef = (int) fcn->fval; /* args in defn */
251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
252 if (ncall > ndef)
253 WARNING("function %s called with %d args, uses only %d",
254 s, ncall, ndef);
255 if (ncall + ndef > NARGS)
256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
259 y = execute(x);
260 oargs[i] = y;
261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
263 if (isfcn(y))
264 FATAL("can't use function %s as argument in %s", y->nval, s);
265 if (isarr(y))
266 args[i] = y; /* arrays by ref */
267 else
268 args[i] = copycell(y);
269 tempfree(y);
270 }
271 for ( ; i < ndef; i++) { /* add null args for ones not provided */
272 args[i] = gettemp();
273 *args[i] = newcopycell;
274 }
275 frp++; /* now ok to up frame */
276 if (frp >= frame + nframe) {
277 int dfp = frp - frame; /* old index */
278 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
279 if (frame == NULL)
280 FATAL("out of space for stack frames in %s", s);
281 frp = frame + dfp;
282 }
283 frp->fcncell = fcn;
284 frp->args = args;
285 frp->nargs = ndef; /* number defined with (excess are locals) */
286 frp->retval = gettemp();
287
288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
289 y = execute((Node *)(fcn->sval)); /* execute body */
290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
291
292 for (i = 0; i < ndef; i++) {
293 Cell *t = frp->args[i];
294 if (isarr(t)) {
295 if (t->csub == CCOPY) {
296 if (i >= ncall) {
297 freesymtab(t);
298 t->csub = CTEMP;
299 tempfree(t);
300 } else {
301 oargs[i]->tval = t->tval;
302 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303 oargs[i]->sval = t->sval;
304 tempfree(t);
305 }
306 }
307 } else if (t != y) { /* kludge to prevent freeing twice */
308 t->csub = CTEMP;
309 tempfree(t);
310 } else if (t == y && t->csub == CCOPY) {
311 t->csub = CTEMP;
312 tempfree(t);
313 freed = 1;
314 }
315 }
316 tempfree(fcn);
317 if (isexit(y) || isnext(y))
318 return y;
319 if (freed == 0) {
320 tempfree(y); /* don't free twice! */
321 }
322 z = frp->retval; /* return value */
323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
324 frp--;
325 return(z);
326 }
327
copycell(Cell * x)328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
329 {
330 Cell *y;
331
332 /* copy is not constant or field */
333
334 y = gettemp();
335 y->tval = x->tval & ~(CON|FLD|REC);
336 y->csub = CCOPY; /* prevents freeing until call is over */
337 y->nval = x->nval; /* BUG? */
338 if (isstr(x) /* || x->ctype == OCELL */) {
339 y->sval = tostring(x->sval);
340 y->tval &= ~DONTFREE;
341 } else
342 y->tval |= DONTFREE;
343 y->fval = x->fval;
344 return y;
345 }
346
arg(Node ** a,int n)347 Cell *arg(Node **a, int n) /* nth argument of a function */
348 {
349
350 n = ptoi(a[0]); /* argument number, counting from 0 */
351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
352 if (n+1 > frp->nargs)
353 FATAL("argument #%d of function %s was not supplied",
354 n+1, frp->fcncell->nval);
355 return frp->args[n];
356 }
357
jump(Node ** a,int n)358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
359 {
360 Cell *y;
361
362 switch (n) {
363 case EXIT:
364 if (a[0] != NULL) {
365 y = execute(a[0]);
366 errorflag = (int) getfval(y);
367 tempfree(y);
368 }
369 longjmp(env, 1);
370 case RETURN:
371 if (a[0] != NULL) {
372 y = execute(a[0]);
373 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374 setsval(frp->retval, getsval(y));
375 frp->retval->fval = getfval(y);
376 frp->retval->tval |= NUM;
377 }
378 else if (y->tval & STR)
379 setsval(frp->retval, getsval(y));
380 else if (y->tval & NUM)
381 setfval(frp->retval, getfval(y));
382 else /* can't happen */
383 FATAL("bad type variable %d", y->tval);
384 tempfree(y);
385 }
386 return(jret);
387 case NEXT:
388 return(jnext);
389 case NEXTFILE:
390 nextfile();
391 return(jnextfile);
392 case BREAK:
393 return(jbreak);
394 case CONTINUE:
395 return(jcont);
396 default: /* can't happen */
397 FATAL("illegal jump type %d", n);
398 }
399 return 0; /* not reached */
400 }
401
awkgetline(Node ** a,int n)402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
403 { /* a[0] is variable, a[1] is operator, a[2] is filename */
404 Cell *r, *x;
405 extern Cell **fldtab;
406 FILE *fp;
407 char *buf;
408 int bufsize = recsize;
409 int mode;
410 bool newflag;
411 double result;
412
413 if ((buf = (char *) malloc(bufsize)) == NULL)
414 FATAL("out of memory in getline");
415
416 fflush(stdout); /* in case someone is waiting for a prompt */
417 r = gettemp();
418 if (a[1] != NULL) { /* getline < file */
419 x = execute(a[2]); /* filename */
420 mode = ptoi(a[1]);
421 if (mode == '|') /* input pipe */
422 mode = LE; /* arbitrary flag */
423 fp = openfile(mode, getsval(x), &newflag);
424 tempfree(x);
425 if (fp == NULL)
426 n = -1;
427 else
428 n = readrec(&buf, &bufsize, fp, newflag);
429 if (n <= 0) {
430 ;
431 } else if (a[0] != NULL) { /* getline var <file */
432 x = execute(a[0]);
433 setsval(x, buf);
434 if (is_number(x->sval, & result)) {
435 x->fval = result;
436 x->tval |= NUM;
437 }
438 tempfree(x);
439 } else { /* getline <file */
440 setsval(fldtab[0], buf);
441 if (is_number(fldtab[0]->sval, & result)) {
442 fldtab[0]->fval = result;
443 fldtab[0]->tval |= NUM;
444 }
445 }
446 } else { /* bare getline; use current input */
447 if (a[0] == NULL) /* getline */
448 n = getrec(&record, &recsize, true);
449 else { /* getline var */
450 n = getrec(&buf, &bufsize, false);
451 if (n > 0) {
452 x = execute(a[0]);
453 setsval(x, buf);
454 if (is_number(x->sval, & result)) {
455 x->fval = result;
456 x->tval |= NUM;
457 }
458 tempfree(x);
459 }
460 }
461 }
462 setfval(r, (Awkfloat) n);
463 free(buf);
464 return r;
465 }
466
getnf(Node ** a,int n)467 Cell *getnf(Node **a, int n) /* get NF */
468 {
469 if (!donefld)
470 fldbld();
471 return (Cell *) a[0];
472 }
473
474 static char *
makearraystring(Node * p,const char * func)475 makearraystring(Node *p, const char *func)
476 {
477 char *buf;
478 int bufsz = recsize;
479 size_t blen;
480
481 if ((buf = (char *) malloc(bufsz)) == NULL) {
482 FATAL("%s: out of memory", func);
483 }
484
485 blen = 0;
486 buf[blen] = '\0';
487
488 for (; p; p = p->nnext) {
489 Cell *x = execute(p); /* expr */
490 char *s = getsval(x);
491 size_t seplen = strlen(getsval(subseploc));
492 size_t nsub = p->nnext ? seplen : 0;
493 size_t slen = strlen(s);
494 size_t tlen = blen + slen + nsub;
495
496 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
497 FATAL("%s: out of memory %s[%s...]",
498 func, x->nval, buf);
499 }
500 memcpy(buf + blen, s, slen);
501 if (nsub) {
502 memcpy(buf + blen + slen, *SUBSEP, nsub);
503 }
504 buf[tlen] = '\0';
505 blen = tlen;
506 tempfree(x);
507 }
508 return buf;
509 }
510
array(Node ** a,int n)511 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
512 {
513 Cell *x, *z;
514 char *buf;
515
516 x = execute(a[0]); /* Cell* for symbol table */
517 buf = makearraystring(a[1], __func__);
518 if (!isarr(x)) {
519 DPRINTF("making %s into an array\n", NN(x->nval));
520 if (freeable(x))
521 xfree(x->sval);
522 x->tval &= ~(STR|NUM|DONTFREE);
523 x->tval |= ARR;
524 x->sval = (char *) makesymtab(NSYMTAB);
525 }
526 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
527 z->ctype = OCELL;
528 z->csub = CVAR;
529 tempfree(x);
530 free(buf);
531 return(z);
532 }
533
awkdelete(Node ** a,int n)534 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
535 {
536 Cell *x;
537
538 x = execute(a[0]); /* Cell* for symbol table */
539 if (x == symtabloc) {
540 FATAL("cannot delete SYMTAB or its elements");
541 }
542 if (!isarr(x))
543 return True;
544 if (a[1] == NULL) { /* delete the elements, not the table */
545 freesymtab(x);
546 x->tval &= ~STR;
547 x->tval |= ARR;
548 x->sval = (char *) makesymtab(NSYMTAB);
549 } else {
550 char *buf = makearraystring(a[1], __func__);
551 freeelem(x, buf);
552 free(buf);
553 }
554 tempfree(x);
555 return True;
556 }
557
intest(Node ** a,int n)558 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
559 {
560 Cell *ap, *k;
561 char *buf;
562
563 ap = execute(a[1]); /* array name */
564 if (!isarr(ap)) {
565 DPRINTF("making %s into an array\n", ap->nval);
566 if (freeable(ap))
567 xfree(ap->sval);
568 ap->tval &= ~(STR|NUM|DONTFREE);
569 ap->tval |= ARR;
570 ap->sval = (char *) makesymtab(NSYMTAB);
571 }
572 buf = makearraystring(a[0], __func__);
573 k = lookup(buf, (Array *) ap->sval);
574 tempfree(ap);
575 free(buf);
576 if (k == NULL)
577 return(False);
578 else
579 return(True);
580 }
581
582
583 /* ======== utf-8 code ========== */
584
585 /*
586 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
587 * or utf-8. u8_isutf tests whether a string starts with a valid
588 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
589 * u8_nextlen returns length of next valid sequence, which is
590 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
591 * u8_strlen returns length of string in valid utf-8 sequences
592 * and/or high-bit bytes. Conversion functions go between byte
593 * number and character number.
594 *
595 * In theory, this behaves the same as before for non-utf8 bytes.
596 *
597 * Limited checking! This is a potential security hole.
598 */
599
600 /* is s the beginning of a valid utf-8 string? */
601 /* return length 1..4 if yes, 0 if no */
u8_isutf(const char * s)602 int u8_isutf(const char *s)
603 {
604 int n, ret;
605 unsigned char c;
606
607 c = s[0];
608 if (c < 128 || awk_mb_cur_max == 1)
609 return 1; /* what if it's 0? */
610
611 n = strlen(s);
612 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
613 ret = 2; /* 110xxxxx 10xxxxxx */
614 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
615 && (s[2] & 0xC0) == 0x80) {
616 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
617 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
618 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
619 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
620 } else {
621 ret = 0;
622 }
623 return ret;
624 }
625
626 /* Convert (prefix of) utf8 string to utf-32 rune. */
627 /* Sets *rune to the value, returns the length. */
628 /* No error checking: watch out. */
u8_rune(int * rune,const char * s)629 int u8_rune(int *rune, const char *s)
630 {
631 int n, ret;
632 unsigned char c;
633
634 c = s[0];
635 if (c < 128 || awk_mb_cur_max == 1) {
636 *rune = c;
637 return 1;
638 }
639
640 n = strlen(s);
641 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
642 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
643 ret = 2;
644 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
645 && (s[2] & 0xC0) == 0x80) {
646 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
647 /* 1110xxxx 10xxxxxx 10xxxxxx */
648 ret = 3;
649 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
650 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
651 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
652 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
653 ret = 4;
654 } else {
655 *rune = c;
656 ret = 1;
657 }
658 return ret; /* returns one byte if sequence doesn't look like utf */
659 }
660
661 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
u8_nextlen(const char * s)662 int u8_nextlen(const char *s)
663 {
664 int len;
665
666 len = u8_isutf(s);
667 if (len == 0)
668 len = 1;
669 return len;
670 }
671
672 /* return number of utf characters or single non-utf bytes */
u8_strlen(const char * s)673 int u8_strlen(const char *s)
674 {
675 int i, len, n, totlen;
676 unsigned char c;
677
678 n = strlen(s);
679 totlen = 0;
680 for (i = 0; i < n; i += len) {
681 c = s[i];
682 if (c < 128 || awk_mb_cur_max == 1) {
683 len = 1;
684 } else {
685 len = u8_nextlen(&s[i]);
686 }
687 totlen++;
688 if (i > n)
689 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
690 }
691 return totlen;
692 }
693
694 /* convert utf-8 char number in a string to its byte offset */
u8_char2byte(const char * s,int charnum)695 int u8_char2byte(const char *s, int charnum)
696 {
697 int n;
698 int bytenum = 0;
699
700 while (charnum > 0) {
701 n = u8_nextlen(s);
702 s += n;
703 bytenum += n;
704 charnum--;
705 }
706 return bytenum;
707 }
708
709 /* convert byte offset in s to utf-8 char number that starts there */
u8_byte2char(const char * s,int bytenum)710 int u8_byte2char(const char *s, int bytenum)
711 {
712 int i, len, b;
713 int charnum = 0; /* BUG: what origin? */
714 /* should be 0 to match start==0 which means no match */
715
716 b = strlen(s);
717 if (bytenum > b) {
718 return -1; /* ??? */
719 }
720 for (i = 0; i <= bytenum; i += len) {
721 len = u8_nextlen(s+i);
722 charnum++;
723 }
724 return charnum;
725 }
726
727 /* runetochar() adapted from rune.c in the Plan 9 distributione */
728
729 enum
730 {
731 Runeerror = 128, /* from somewhere else */
732 Runemax = 0x10FFFF,
733
734 Bit1 = 7,
735 Bitx = 6,
736 Bit2 = 5,
737 Bit3 = 4,
738 Bit4 = 3,
739 Bit5 = 2,
740
741 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
742 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
743 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
744 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
745 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
746 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
747
748 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
749 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
750 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
751 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
752
753 Maskx = (1<<Bitx)-1, /* 0011 1111 */
754 Testx = Maskx ^ 0xFF, /* 1100 0000 */
755
756 };
757
runetochar(char * str,int c)758 int runetochar(char *str, int c)
759 {
760 /* one character sequence 00000-0007F => 00-7F */
761 if (c <= Rune1) {
762 str[0] = c;
763 return 1;
764 }
765
766 /* two character sequence 00080-007FF => T2 Tx */
767 if (c <= Rune2) {
768 str[0] = T2 | (c >> 1*Bitx);
769 str[1] = Tx | (c & Maskx);
770 return 2;
771 }
772
773 /* three character sequence 00800-0FFFF => T3 Tx Tx */
774 if (c > Runemax)
775 c = Runeerror;
776 if (c <= Rune3) {
777 str[0] = T3 | (c >> 2*Bitx);
778 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
779 str[2] = Tx | (c & Maskx);
780 return 3;
781 }
782
783 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
784 str[0] = T4 | (c >> 3*Bitx);
785 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
786 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
787 str[3] = Tx | (c & Maskx);
788 return 4;
789 }
790
791
792 /* ========== end of utf8 code =========== */
793
794
795
matchop(Node ** a,int n)796 Cell *matchop(Node **a, int n) /* ~ and match() */
797 {
798 Cell *x, *y, *z;
799 char *s, *t;
800 int i;
801 int cstart, cpatlen, len;
802 fa *pfa;
803 int (*mf)(fa *, const char *) = match, mode = 0;
804
805 if (n == MATCHFCN) {
806 mf = pmatch;
807 mode = 1;
808 }
809 x = execute(a[1]); /* a[1] = target text */
810 s = getsval(x);
811 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
812 i = (*mf)((fa *) a[2], s);
813 else {
814 y = execute(a[2]); /* a[2] = regular expr */
815 t = getsval(y);
816 pfa = makedfa(t, mode);
817 i = (*mf)(pfa, s);
818 tempfree(y);
819 }
820 z = x;
821 if (n == MATCHFCN) {
822 int start = patbeg - s + 1; /* origin 1 */
823 if (patlen < 0) {
824 start = 0; /* not found */
825 } else {
826 cstart = u8_byte2char(s, start-1);
827 cpatlen = 0;
828 for (i = 0; i < patlen; i += len) {
829 len = u8_nextlen(patbeg+i);
830 cpatlen++;
831 }
832
833 start = cstart;
834 patlen = cpatlen;
835 }
836
837 setfval(rstartloc, (Awkfloat) start);
838 setfval(rlengthloc, (Awkfloat) patlen);
839 x = gettemp();
840 x->tval = NUM;
841 x->fval = start;
842 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
843 x = True;
844 else
845 x = False;
846
847 tempfree(z);
848 return x;
849 }
850
851
boolop(Node ** a,int n)852 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
853 {
854 Cell *x, *y;
855 int i;
856
857 x = execute(a[0]);
858 i = istrue(x);
859 tempfree(x);
860 switch (n) {
861 case BOR:
862 if (i) return(True);
863 y = execute(a[1]);
864 i = istrue(y);
865 tempfree(y);
866 if (i) return(True);
867 else return(False);
868 case AND:
869 if ( !i ) return(False);
870 y = execute(a[1]);
871 i = istrue(y);
872 tempfree(y);
873 if (i) return(True);
874 else return(False);
875 case NOT:
876 if (i) return(False);
877 else return(True);
878 default: /* can't happen */
879 FATAL("unknown boolean operator %d", n);
880 }
881 return 0; /*NOTREACHED*/
882 }
883
relop(Node ** a,int n)884 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
885 {
886 int i;
887 Cell *x, *y;
888 Awkfloat j;
889 bool x_is_nan, y_is_nan;
890
891 x = execute(a[0]);
892 y = execute(a[1]);
893 x_is_nan = isnan(x->fval);
894 y_is_nan = isnan(y->fval);
895 if (x->tval&NUM && y->tval&NUM) {
896 if ((x_is_nan || y_is_nan) && n != NE)
897 return(False);
898 j = x->fval - y->fval;
899 i = j<0? -1: (j>0? 1: 0);
900 } else {
901 i = strcmp(getsval(x), getsval(y));
902 }
903 tempfree(x);
904 tempfree(y);
905 switch (n) {
906 case LT: if (i<0) return(True);
907 else return(False);
908 case LE: if (i<=0) return(True);
909 else return(False);
910 case NE: if (x_is_nan && y_is_nan) return(True);
911 else if (i!=0) return(True);
912 else return(False);
913 case EQ: if (i == 0) return(True);
914 else return(False);
915 case GE: if (i>=0) return(True);
916 else return(False);
917 case GT: if (i>0) return(True);
918 else return(False);
919 default: /* can't happen */
920 FATAL("unknown relational operator %d", n);
921 }
922 return 0; /*NOTREACHED*/
923 }
924
tfree(Cell * a)925 void tfree(Cell *a) /* free a tempcell */
926 {
927 if (freeable(a)) {
928 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
929 xfree(a->sval);
930 }
931 if (a == tmps)
932 FATAL("tempcell list is curdled");
933 a->cnext = tmps;
934 tmps = a;
935 }
936
gettemp(void)937 Cell *gettemp(void) /* get a tempcell */
938 { int i;
939 Cell *x;
940
941 if (!tmps) {
942 tmps = (Cell *) calloc(100, sizeof(*tmps));
943 if (!tmps)
944 FATAL("out of space for temporaries");
945 for (i = 1; i < 100; i++)
946 tmps[i-1].cnext = &tmps[i];
947 tmps[i-1].cnext = NULL;
948 }
949 x = tmps;
950 tmps = x->cnext;
951 *x = tempcell;
952 return(x);
953 }
954
indirect(Node ** a,int n)955 Cell *indirect(Node **a, int n) /* $( a[0] ) */
956 {
957 Awkfloat val;
958 Cell *x;
959 int m;
960 char *s;
961
962 x = execute(a[0]);
963 val = getfval(x); /* freebsd: defend against super large field numbers */
964 if ((Awkfloat)INT_MAX < val)
965 FATAL("trying to access out of range field %s", x->nval);
966 m = (int) val;
967 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */
968 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
969 /* BUG: can x->nval ever be null??? */
970 tempfree(x);
971 x = fieldadr(m);
972 x->ctype = OCELL; /* BUG? why are these needed? */
973 x->csub = CFLD;
974 return(x);
975 }
976
substr(Node ** a,int nnn)977 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
978 {
979 int k, m, n;
980 int mb, nb;
981 char *s;
982 int temp;
983 Cell *x, *y, *z = NULL;
984
985 x = execute(a[0]);
986 y = execute(a[1]);
987 if (a[2] != NULL)
988 z = execute(a[2]);
989 s = getsval(x);
990 k = u8_strlen(s) + 1;
991 if (k <= 1) {
992 tempfree(x);
993 tempfree(y);
994 if (a[2] != NULL) {
995 tempfree(z);
996 }
997 x = gettemp();
998 setsval(x, "");
999 return(x);
1000 }
1001 m = (int) getfval(y);
1002 if (m <= 0)
1003 m = 1;
1004 else if (m > k)
1005 m = k;
1006 tempfree(y);
1007 if (a[2] != NULL) {
1008 n = (int) getfval(z);
1009 tempfree(z);
1010 } else
1011 n = k - 1;
1012 if (n < 0)
1013 n = 0;
1014 else if (n > k - m)
1015 n = k - m;
1016 /* m is start, n is length from there */
1017 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1018 y = gettemp();
1019 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1020 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
1021
1022 temp = s[nb]; /* with thanks to John Linderman */
1023 s[nb] = '\0';
1024 setsval(y, s + mb);
1025 s[nb] = temp;
1026 tempfree(x);
1027 return(y);
1028 }
1029
sindex(Node ** a,int nnn)1030 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1031 {
1032 Cell *x, *y, *z;
1033 char *s1, *s2, *p1, *p2, *q;
1034 Awkfloat v = 0.0;
1035
1036 x = execute(a[0]);
1037 s1 = getsval(x);
1038 y = execute(a[1]);
1039 s2 = getsval(y);
1040
1041 z = gettemp();
1042 for (p1 = s1; *p1 != '\0'; p1++) {
1043 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1044 continue;
1045 if (*p2 == '\0') {
1046 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1047
1048 /* should be a function: used in match() as well */
1049 int i, len;
1050 v = 0;
1051 for (i = 0; i < p1-s1+1; i += len) {
1052 len = u8_nextlen(s1+i);
1053 v++;
1054 }
1055 break;
1056 }
1057 }
1058 tempfree(x);
1059 tempfree(y);
1060 setfval(z, v);
1061 return(z);
1062 }
1063
has_utf8(char * s)1064 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1065 {
1066 int n;
1067
1068 for (n = 0; *s != 0; s += n) {
1069 n = u8_nextlen(s);
1070 if (n > 1)
1071 return 1;
1072 }
1073 return 0;
1074 }
1075
1076 #define MAXNUMSIZE 50
1077
format(char ** pbuf,int * pbufsize,const char * s,Node * a)1078 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1079 {
1080 char *fmt;
1081 char *p, *t;
1082 const char *os;
1083 Cell *x;
1084 int flag = 0, n;
1085 int fmtwd; /* format width */
1086 int fmtsz = recsize;
1087 char *buf = *pbuf;
1088 int bufsize = *pbufsize;
1089 #define FMTSZ(a) (fmtsz - ((a) - fmt))
1090 #define BUFSZ(a) (bufsize - ((a) - buf))
1091
1092 static bool first = true;
1093 static bool have_a_format = false;
1094
1095 if (first) {
1096 char xbuf[100];
1097
1098 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1099 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1100 first = false;
1101 }
1102
1103 os = s;
1104 p = buf;
1105 if ((fmt = (char *) malloc(fmtsz)) == NULL)
1106 FATAL("out of memory in format()");
1107 while (*s) {
1108 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1109 if (*s != '%') {
1110 *p++ = *s++;
1111 continue;
1112 }
1113 if (*(s+1) == '%') {
1114 *p++ = '%';
1115 s += 2;
1116 continue;
1117 }
1118 fmtwd = atoi(s+1);
1119 if (fmtwd < 0)
1120 fmtwd = -fmtwd;
1121 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1122 for (t = fmt; (*t++ = *s) != '\0'; s++) {
1123 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1124 FATAL("format item %.30s... ran format() out of memory", os);
1125 /* Ignore size specifiers */
1126 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1127 t--;
1128 continue;
1129 }
1130 if (isalpha((uschar)*s))
1131 break;
1132 if (*s == '$') {
1133 FATAL("'$' not permitted in awk formats");
1134 }
1135 if (*s == '*') {
1136 if (a == NULL) {
1137 FATAL("not enough args in printf(%s)", os);
1138 }
1139 x = execute(a);
1140 a = a->nnext;
1141 snprintf(t - 1, FMTSZ(t - 1),
1142 "%d", fmtwd=(int) getfval(x));
1143 if (fmtwd < 0)
1144 fmtwd = -fmtwd;
1145 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1146 t = fmt + strlen(fmt);
1147 tempfree(x);
1148 }
1149 }
1150 *t = '\0';
1151 if (fmtwd < 0)
1152 fmtwd = -fmtwd;
1153 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1154 switch (*s) {
1155 case 'a': case 'A':
1156 if (have_a_format)
1157 flag = *s;
1158 else
1159 flag = 'f';
1160 break;
1161 case 'f': case 'e': case 'g': case 'E': case 'G':
1162 flag = 'f';
1163 break;
1164 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1165 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1166 *(t-1) = 'j';
1167 *t = *s;
1168 *++t = '\0';
1169 break;
1170 case 's':
1171 flag = 's';
1172 break;
1173 case 'c':
1174 flag = 'c';
1175 break;
1176 default:
1177 WARNING("weird printf conversion %s", fmt);
1178 flag = '?';
1179 break;
1180 }
1181 if (a == NULL)
1182 FATAL("not enough args in printf(%s)", os);
1183 x = execute(a);
1184 a = a->nnext;
1185 n = MAXNUMSIZE;
1186 if (fmtwd > n)
1187 n = fmtwd;
1188 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1189 switch (flag) {
1190 case '?':
1191 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1192 t = getsval(x);
1193 n = strlen(t);
1194 if (fmtwd > n)
1195 n = fmtwd;
1196 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1197 p += strlen(p);
1198 snprintf(p, BUFSZ(p), "%s", t);
1199 break;
1200 case 'a':
1201 case 'A':
1202 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1203 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1204 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1205
1206 case 's': {
1207 t = getsval(x);
1208 n = strlen(t);
1209 /* if simple format or no utf-8 in the string, sprintf works */
1210 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1211 if (fmtwd > n)
1212 n = fmtwd;
1213 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1214 FATAL("huge string/format (%d chars) in printf %.30s..." \
1215 " ran format() out of memory", n, t);
1216 snprintf(p, BUFSZ(p), fmt, t);
1217 break;
1218 }
1219
1220 /* get here if string has utf-8 chars and fmt is not plain %s */
1221 /* "%-w.ps", where -, w and .p are all optional */
1222 /* '0' before the w is a flag character */
1223 /* fmt points at % */
1224 int ljust = 0, wid = 0, prec = n, pad = 0;
1225 char *f = fmt+1;
1226 if (f[0] == '-') {
1227 ljust = 1;
1228 f++;
1229 }
1230 // flags '0' and '+' are recognized but skipped
1231 if (f[0] == '0') {
1232 f++;
1233 if (f[0] == '+')
1234 f++;
1235 }
1236 if (f[0] == '+') {
1237 f++;
1238 if (f[0] == '0')
1239 f++;
1240 }
1241 if (isdigit(f[0])) { /* there is a wid */
1242 wid = strtol(f, &f, 10);
1243 }
1244 if (f[0] == '.') { /* there is a .prec */
1245 prec = strtol(++f, &f, 10);
1246 }
1247 if (prec > u8_strlen(t))
1248 prec = u8_strlen(t);
1249 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1250 int i, k, n;
1251
1252 if (ljust) { // print prec chars from t, then pad blanks
1253 n = u8_char2byte(t, prec);
1254 for (k = 0; k < n; k++) {
1255 //putchar(t[k]);
1256 *p++ = t[k];
1257 }
1258 for (i = 0; i < pad; i++) {
1259 //printf(" ");
1260 *p++ = ' ';
1261 }
1262 } else { // print pad blanks, then prec chars from t
1263 for (i = 0; i < pad; i++) {
1264 //printf(" ");
1265 *p++ = ' ';
1266 }
1267 n = u8_char2byte(t, prec);
1268 for (k = 0; k < n; k++) {
1269 //putchar(t[k]);
1270 *p++ = t[k];
1271 }
1272 }
1273 *p = 0;
1274 break;
1275 }
1276
1277 case 'c': {
1278 /*
1279 * If a numeric value is given, awk should just turn
1280 * it into a character and print it:
1281 * BEGIN { printf("%c\n", 65) }
1282 * prints "A".
1283 *
1284 * But what if the numeric value is > 128 and
1285 * represents a valid Unicode code point?!? We do
1286 * our best to convert it back into UTF-8. If we
1287 * can't, we output the encoding of the Unicode
1288 * "invalid character", 0xFFFD.
1289 */
1290 if (isnum(x)) {
1291 int charval = (int) getfval(x);
1292
1293 if (charval != 0) {
1294 if (charval < 128 || awk_mb_cur_max == 1)
1295 snprintf(p, BUFSZ(p), fmt, charval);
1296 else {
1297 // possible unicode character
1298 size_t count;
1299 char *bs = wide_char_to_byte_str(charval, &count);
1300
1301 if (bs == NULL) { // invalid character
1302 // use unicode invalid character, 0xFFFD
1303 static char invalid_char[] = "\357\277\275";
1304 bs = invalid_char;
1305 count = 3;
1306 }
1307 t = bs;
1308 n = count;
1309 goto format_percent_c;
1310 }
1311 } else {
1312 *p++ = '\0'; /* explicit null byte */
1313 *p = '\0'; /* next output will start here */
1314 }
1315 break;
1316 }
1317 t = getsval(x);
1318 n = u8_nextlen(t);
1319 format_percent_c:
1320 if (n < 2) { /* not utf8 */
1321 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1322 break;
1323 }
1324
1325 // utf8 character, almost same song and dance as for %s
1326 int ljust = 0, wid = 0, prec = n, pad = 0;
1327 char *f = fmt+1;
1328 if (f[0] == '-') {
1329 ljust = 1;
1330 f++;
1331 }
1332 // flags '0' and '+' are recognized but skipped
1333 if (f[0] == '0') {
1334 f++;
1335 if (f[0] == '+')
1336 f++;
1337 }
1338 if (f[0] == '+') {
1339 f++;
1340 if (f[0] == '0')
1341 f++;
1342 }
1343 if (isdigit(f[0])) { /* there is a wid */
1344 wid = strtol(f, &f, 10);
1345 }
1346 if (f[0] == '.') { /* there is a .prec */
1347 prec = strtol(++f, &f, 10);
1348 }
1349 if (prec > 1) // %c --> only one character
1350 prec = 1;
1351 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1352 int i;
1353
1354 if (ljust) { // print one char from t, then pad blanks
1355 for (i = 0; i < n; i++)
1356 *p++ = t[i];
1357 for (i = 0; i < pad; i++) {
1358 //printf(" ");
1359 *p++ = ' ';
1360 }
1361 } else { // print pad blanks, then prec chars from t
1362 for (i = 0; i < pad; i++) {
1363 //printf(" ");
1364 *p++ = ' ';
1365 }
1366 for (i = 0; i < n; i++)
1367 *p++ = t[i];
1368 }
1369 *p = 0;
1370 break;
1371 }
1372 default:
1373 FATAL("can't happen: bad conversion %c in format()", flag);
1374 }
1375
1376 tempfree(x);
1377 p += strlen(p);
1378 s++;
1379 }
1380 *p = '\0';
1381 free(fmt);
1382 for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1383 x = execute(a);
1384 tempfree(x);
1385 }
1386 *pbuf = buf;
1387 *pbufsize = bufsize;
1388 return p - buf;
1389 }
1390
awksprintf(Node ** a,int n)1391 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1392 {
1393 Cell *x;
1394 Node *y;
1395 char *buf;
1396 int bufsz=3*recsize;
1397
1398 if ((buf = (char *) malloc(bufsz)) == NULL)
1399 FATAL("out of memory in awksprintf");
1400 y = a[0]->nnext;
1401 x = execute(a[0]);
1402 if (format(&buf, &bufsz, getsval(x), y) == -1)
1403 FATAL("sprintf string %.30s... too long. can't happen.", buf);
1404 tempfree(x);
1405 x = gettemp();
1406 x->sval = buf;
1407 x->tval = STR;
1408 return(x);
1409 }
1410
awkprintf(Node ** a,int n)1411 Cell *awkprintf(Node **a, int n) /* printf */
1412 { /* a[0] is list of args, starting with format string */
1413 /* a[1] is redirection operator, a[2] is redirection file */
1414 FILE *fp;
1415 Cell *x;
1416 Node *y;
1417 char *buf;
1418 int len;
1419 int bufsz=3*recsize;
1420
1421 if ((buf = (char *) malloc(bufsz)) == NULL)
1422 FATAL("out of memory in awkprintf");
1423 y = a[0]->nnext;
1424 x = execute(a[0]);
1425 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1426 FATAL("printf string %.30s... too long. can't happen.", buf);
1427 tempfree(x);
1428 if (a[1] == NULL) {
1429 /* fputs(buf, stdout); */
1430 fwrite(buf, len, 1, stdout);
1431 if (ferror(stdout))
1432 FATAL("write error on stdout");
1433 } else {
1434 fp = redirect(ptoi(a[1]), a[2]);
1435 /* fputs(buf, fp); */
1436 fwrite(buf, len, 1, fp);
1437 fflush(fp);
1438 if (ferror(fp))
1439 FATAL("write error on %s", filename(fp));
1440 }
1441 free(buf);
1442 return(True);
1443 }
1444
arith(Node ** a,int n)1445 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1446 {
1447 Awkfloat i, j = 0;
1448 double v;
1449 Cell *x, *y, *z;
1450
1451 x = execute(a[0]);
1452 i = getfval(x);
1453 tempfree(x);
1454 if (n != UMINUS && n != UPLUS) {
1455 y = execute(a[1]);
1456 j = getfval(y);
1457 tempfree(y);
1458 }
1459 z = gettemp();
1460 switch (n) {
1461 case ADD:
1462 i += j;
1463 break;
1464 case MINUS:
1465 i -= j;
1466 break;
1467 case MULT:
1468 i *= j;
1469 break;
1470 case DIVIDE:
1471 if (j == 0)
1472 FATAL("division by zero");
1473 i /= j;
1474 break;
1475 case MOD:
1476 if (j == 0)
1477 FATAL("division by zero in mod");
1478 modf(i/j, &v);
1479 i = i - j * v;
1480 break;
1481 case UMINUS:
1482 i = -i;
1483 break;
1484 case UPLUS: /* handled by getfval(), above */
1485 break;
1486 case POWER:
1487 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1488 i = ipow(i, (int) j);
1489 else {
1490 errno = 0;
1491 i = errcheck(pow(i, j), "pow");
1492 }
1493 break;
1494 default: /* can't happen */
1495 FATAL("illegal arithmetic operator %d", n);
1496 }
1497 setfval(z, i);
1498 return(z);
1499 }
1500
ipow(double x,int n)1501 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1502 {
1503 double v;
1504
1505 if (n <= 0)
1506 return 1;
1507 v = ipow(x, n/2);
1508 if (n % 2 == 0)
1509 return v * v;
1510 else
1511 return x * v * v;
1512 }
1513
incrdecr(Node ** a,int n)1514 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1515 {
1516 Cell *x, *z;
1517 int k;
1518 Awkfloat xf;
1519
1520 x = execute(a[0]);
1521 xf = getfval(x);
1522 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1523 if (n == PREINCR || n == PREDECR) {
1524 setfval(x, xf + k);
1525 return(x);
1526 }
1527 z = gettemp();
1528 setfval(z, xf);
1529 setfval(x, xf + k);
1530 tempfree(x);
1531 return(z);
1532 }
1533
assign(Node ** a,int n)1534 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1535 { /* this is subtle; don't muck with it. */
1536 Cell *x, *y;
1537 Awkfloat xf, yf;
1538 double v;
1539
1540 y = execute(a[1]);
1541 x = execute(a[0]);
1542 if (n == ASSIGN) { /* ordinary assignment */
1543 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1544 ; /* self-assignment: leave alone unless it's a field or NF */
1545 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1546 yf = getfval(y);
1547 setsval(x, getsval(y));
1548 x->fval = yf;
1549 x->tval |= NUM;
1550 }
1551 else if (isstr(y))
1552 setsval(x, getsval(y));
1553 else if (isnum(y))
1554 setfval(x, getfval(y));
1555 else
1556 funnyvar(y, "read value of");
1557 tempfree(y);
1558 return(x);
1559 }
1560 xf = getfval(x);
1561 yf = getfval(y);
1562 switch (n) {
1563 case ADDEQ:
1564 xf += yf;
1565 break;
1566 case SUBEQ:
1567 xf -= yf;
1568 break;
1569 case MULTEQ:
1570 xf *= yf;
1571 break;
1572 case DIVEQ:
1573 if (yf == 0)
1574 FATAL("division by zero in /=");
1575 xf /= yf;
1576 break;
1577 case MODEQ:
1578 if (yf == 0)
1579 FATAL("division by zero in %%=");
1580 modf(xf/yf, &v);
1581 xf = xf - yf * v;
1582 break;
1583 case POWEQ:
1584 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1585 xf = ipow(xf, (int) yf);
1586 else {
1587 errno = 0;
1588 xf = errcheck(pow(xf, yf), "pow");
1589 }
1590 break;
1591 default:
1592 FATAL("illegal assignment operator %d", n);
1593 break;
1594 }
1595 tempfree(y);
1596 setfval(x, xf);
1597 return(x);
1598 }
1599
cat(Node ** a,int q)1600 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1601 {
1602 Cell *x, *y, *z;
1603 int n1, n2;
1604 char *s = NULL;
1605 int ssz = 0;
1606
1607 x = execute(a[0]);
1608 n1 = strlen(getsval(x));
1609 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1610 memcpy(s, x->sval, n1);
1611
1612 tempfree(x);
1613
1614 y = execute(a[1]);
1615 n2 = strlen(getsval(y));
1616 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1617 memcpy(s + n1, y->sval, n2);
1618 s[n1 + n2] = '\0';
1619
1620 tempfree(y);
1621
1622 z = gettemp();
1623 z->sval = s;
1624 z->tval = STR;
1625
1626 return(z);
1627 }
1628
pastat(Node ** a,int n)1629 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1630 {
1631 Cell *x;
1632
1633 if (a[0] == NULL)
1634 x = execute(a[1]);
1635 else {
1636 x = execute(a[0]);
1637 if (istrue(x)) {
1638 tempfree(x);
1639 x = execute(a[1]);
1640 }
1641 }
1642 return x;
1643 }
1644
dopa2(Node ** a,int n)1645 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1646 {
1647 Cell *x;
1648 int pair;
1649
1650 pair = ptoi(a[3]);
1651 if (pairstack[pair] == 0) {
1652 x = execute(a[0]);
1653 if (istrue(x))
1654 pairstack[pair] = 1;
1655 tempfree(x);
1656 }
1657 if (pairstack[pair] == 1) {
1658 x = execute(a[1]);
1659 if (istrue(x))
1660 pairstack[pair] = 0;
1661 tempfree(x);
1662 x = execute(a[2]);
1663 return(x);
1664 }
1665 return(False);
1666 }
1667
split(Node ** a,int nnn)1668 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1669 {
1670 Cell *x = NULL, *y, *ap;
1671 const char *s, *origs, *t;
1672 const char *fs = NULL;
1673 char *origfs = NULL;
1674 int sep;
1675 char temp, num[50];
1676 int n, tempstat, arg3type;
1677 int j;
1678 double result;
1679
1680 y = execute(a[0]); /* source string */
1681 origs = s = strdup(getsval(y));
1682 tempfree(y);
1683 arg3type = ptoi(a[3]);
1684 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1685 fs = getsval(fsloc);
1686 } else if (arg3type == STRING) { /* split(str,arr,"string") */
1687 x = execute(a[2]);
1688 fs = origfs = strdup(getsval(x));
1689 tempfree(x);
1690 } else if (arg3type == REGEXPR) {
1691 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1692 } else {
1693 FATAL("illegal type of split");
1694 }
1695 sep = *fs;
1696 ap = execute(a[1]); /* array name */
1697 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1698 freesymtab(ap);
1699 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1700 ap->tval &= ~STR;
1701 ap->tval |= ARR;
1702 ap->sval = (char *) makesymtab(NSYMTAB);
1703
1704 n = 0;
1705 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1706 /* split(s, a, //); have to arrange that it looks like empty sep */
1707 arg3type = 0;
1708 fs = "";
1709 sep = 0;
1710 }
1711 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1712 fa *pfa;
1713 if (arg3type == REGEXPR) { /* it's ready already */
1714 pfa = (fa *) a[2];
1715 } else {
1716 pfa = makedfa(fs, 1);
1717 }
1718 if (nematch(pfa,s)) {
1719 tempstat = pfa->initstat;
1720 pfa->initstat = 2;
1721 do {
1722 n++;
1723 snprintf(num, sizeof(num), "%d", n);
1724 temp = *patbeg;
1725 setptr(patbeg, '\0');
1726 if (is_number(s, & result))
1727 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1728 else
1729 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1730 setptr(patbeg, temp);
1731 s = patbeg + patlen;
1732 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1733 n++;
1734 snprintf(num, sizeof(num), "%d", n);
1735 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1736 pfa->initstat = tempstat;
1737 goto spdone;
1738 }
1739 } while (nematch(pfa,s));
1740 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1741 /* cf gsub and refldbld */
1742 }
1743 n++;
1744 snprintf(num, sizeof(num), "%d", n);
1745 if (is_number(s, & result))
1746 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1747 else
1748 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1749 spdone:
1750 pfa = NULL;
1751
1752 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1753 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1754 for (;;) {
1755 char *fr = newt;
1756 n++;
1757 if (*s == '"' ) { /* start of "..." */
1758 for (s++ ; *s != '\0'; ) {
1759 if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1760 s += 2; /* doubled quote */
1761 *fr++ = '"';
1762 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1763 s++; /* skip over closing quote */
1764 break;
1765 } else {
1766 *fr++ = *s++;
1767 }
1768 }
1769 *fr++ = 0;
1770 } else { /* unquoted field */
1771 while (*s != ',' && *s != '\0')
1772 *fr++ = *s++;
1773 *fr++ = 0;
1774 }
1775 snprintf(num, sizeof(num), "%d", n);
1776 if (is_number(newt, &result))
1777 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1778 else
1779 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1780 if (*s++ == '\0')
1781 break;
1782 }
1783 free(newt);
1784
1785 } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1786 for (n = 0; ; ) {
1787 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1788 while (ISWS(*s))
1789 s++;
1790 if (*s == '\0')
1791 break;
1792 n++;
1793 t = s;
1794 do
1795 s++;
1796 while (*s != '\0' && !ISWS(*s));
1797 temp = *s;
1798 setptr(s, '\0');
1799 snprintf(num, sizeof(num), "%d", n);
1800 if (is_number(t, & result))
1801 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1802 else
1803 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1804 setptr(s, temp);
1805 if (*s != '\0')
1806 s++;
1807 }
1808
1809 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1810 for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1811 char buf[10];
1812 n++;
1813 snprintf(num, sizeof(num), "%d", n);
1814
1815 for (j = 0; j < u8_nextlen(s); j++) {
1816 buf[j] = s[j];
1817 }
1818 buf[j] = '\0';
1819
1820 if (isdigit((uschar)buf[0]))
1821 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1822 else
1823 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1824 }
1825
1826 } else if (*s != '\0') { /* some random single character */
1827 for (;;) {
1828 n++;
1829 t = s;
1830 while (*s != sep && *s != '\n' && *s != '\0')
1831 s++;
1832 temp = *s;
1833 setptr(s, '\0');
1834 snprintf(num, sizeof(num), "%d", n);
1835 if (is_number(t, & result))
1836 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1837 else
1838 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1839 setptr(s, temp);
1840 if (*s++ == '\0')
1841 break;
1842 }
1843 }
1844 tempfree(ap);
1845 xfree(origs);
1846 xfree(origfs);
1847 x = gettemp();
1848 x->tval = NUM;
1849 x->fval = n;
1850 return(x);
1851 }
1852
condexpr(Node ** a,int n)1853 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1854 {
1855 Cell *x;
1856
1857 x = execute(a[0]);
1858 if (istrue(x)) {
1859 tempfree(x);
1860 x = execute(a[1]);
1861 } else {
1862 tempfree(x);
1863 x = execute(a[2]);
1864 }
1865 return(x);
1866 }
1867
ifstat(Node ** a,int n)1868 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1869 {
1870 Cell *x;
1871
1872 x = execute(a[0]);
1873 if (istrue(x)) {
1874 tempfree(x);
1875 x = execute(a[1]);
1876 } else if (a[2] != NULL) {
1877 tempfree(x);
1878 x = execute(a[2]);
1879 }
1880 return(x);
1881 }
1882
whilestat(Node ** a,int n)1883 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1884 {
1885 Cell *x;
1886
1887 for (;;) {
1888 x = execute(a[0]);
1889 if (!istrue(x))
1890 return(x);
1891 tempfree(x);
1892 x = execute(a[1]);
1893 if (isbreak(x)) {
1894 x = True;
1895 return(x);
1896 }
1897 if (isnext(x) || isexit(x) || isret(x))
1898 return(x);
1899 tempfree(x);
1900 }
1901 }
1902
dostat(Node ** a,int n)1903 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1904 {
1905 Cell *x;
1906
1907 for (;;) {
1908 x = execute(a[0]);
1909 if (isbreak(x))
1910 return True;
1911 if (isnext(x) || isexit(x) || isret(x))
1912 return(x);
1913 tempfree(x);
1914 x = execute(a[1]);
1915 if (!istrue(x))
1916 return(x);
1917 tempfree(x);
1918 }
1919 }
1920
forstat(Node ** a,int n)1921 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1922 {
1923 Cell *x;
1924
1925 x = execute(a[0]);
1926 tempfree(x);
1927 for (;;) {
1928 if (a[1]!=NULL) {
1929 x = execute(a[1]);
1930 if (!istrue(x)) return(x);
1931 else tempfree(x);
1932 }
1933 x = execute(a[3]);
1934 if (isbreak(x)) /* turn off break */
1935 return True;
1936 if (isnext(x) || isexit(x) || isret(x))
1937 return(x);
1938 tempfree(x);
1939 x = execute(a[2]);
1940 tempfree(x);
1941 }
1942 }
1943
instat(Node ** a,int n)1944 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1945 {
1946 Cell *x, *vp, *arrayp, *cp, *ncp;
1947 Array *tp;
1948 int i;
1949
1950 vp = execute(a[0]);
1951 arrayp = execute(a[1]);
1952 if (!isarr(arrayp)) {
1953 return True;
1954 }
1955 tp = (Array *) arrayp->sval;
1956 tempfree(arrayp);
1957 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1958 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1959 setsval(vp, cp->nval);
1960 ncp = cp->cnext;
1961 x = execute(a[2]);
1962 if (isbreak(x)) {
1963 tempfree(vp);
1964 return True;
1965 }
1966 if (isnext(x) || isexit(x) || isret(x)) {
1967 tempfree(vp);
1968 return(x);
1969 }
1970 tempfree(x);
1971 }
1972 }
1973 return True;
1974 }
1975
nawk_convert(const char * s,int (* fun_c)(int),wint_t (* fun_wc)(wint_t))1976 static char *nawk_convert(const char *s, int (*fun_c)(int),
1977 wint_t (*fun_wc)(wint_t))
1978 {
1979 char *buf = NULL;
1980 char *pbuf = NULL;
1981 const char *ps = NULL;
1982 size_t n = 0;
1983 wchar_t wc;
1984 const size_t sz = awk_mb_cur_max;
1985 int unused;
1986
1987 if (sz == 1) {
1988 buf = tostring(s);
1989
1990 for (pbuf = buf; *pbuf; pbuf++)
1991 *pbuf = fun_c((uschar)*pbuf);
1992
1993 return buf;
1994 } else {
1995 /* upper/lower character may be shorter/longer */
1996 buf = tostringN(s, strlen(s) * sz + 1);
1997
1998 (void) mbtowc(NULL, NULL, 0); /* reset internal state */
1999 /*
2000 * Reset internal state here too.
2001 * Assign result to avoid a compiler warning. (Casting to void
2002 * doesn't work.)
2003 * Increment said variable to avoid a different warning.
2004 */
2005 unused = wctomb(NULL, L'\0');
2006 unused++;
2007
2008 ps = s;
2009 pbuf = buf;
2010 while (n = mbtowc(&wc, ps, sz),
2011 n > 0 && n != (size_t)-1 && n != (size_t)-2)
2012 {
2013 ps += n;
2014
2015 n = wctomb(pbuf, fun_wc(wc));
2016 if (n == (size_t)-1)
2017 FATAL("illegal wide character %s", s);
2018
2019 pbuf += n;
2020 }
2021
2022 *pbuf = '\0';
2023
2024 if (n)
2025 FATAL("illegal byte sequence %s", s);
2026
2027 return buf;
2028 }
2029 }
2030
2031 #ifdef __DJGPP__
towupper(wint_t wc)2032 static wint_t towupper(wint_t wc)
2033 {
2034 if (wc >= 0 && wc < 256)
2035 return toupper(wc & 0xFF);
2036
2037 return wc;
2038 }
2039
towlower(wint_t wc)2040 static wint_t towlower(wint_t wc)
2041 {
2042 if (wc >= 0 && wc < 256)
2043 return tolower(wc & 0xFF);
2044
2045 return wc;
2046 }
2047 #endif
2048
nawk_toupper(const char * s)2049 static char *nawk_toupper(const char *s)
2050 {
2051 return nawk_convert(s, toupper, towupper);
2052 }
2053
nawk_tolower(const char * s)2054 static char *nawk_tolower(const char *s)
2055 {
2056 return nawk_convert(s, tolower, towlower);
2057 }
2058
2059
2060
bltin(Node ** a,int n)2061 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2062 {
2063 Cell *x, *y;
2064 Awkfloat u;
2065 int t;
2066 Awkfloat tmp;
2067 char *buf;
2068 Node *nextarg;
2069 FILE *fp;
2070 int status = 0;
2071 int estatus = 0;
2072
2073 t = ptoi(a[0]);
2074 x = execute(a[1]);
2075 nextarg = a[1]->nnext;
2076 switch (t) {
2077 case FLENGTH:
2078 if (isarr(x))
2079 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2080 else
2081 u = u8_strlen(getsval(x));
2082 break;
2083 case FLOG:
2084 errno = 0;
2085 u = errcheck(log(getfval(x)), "log");
2086 break;
2087 case FINT:
2088 modf(getfval(x), &u); break;
2089 case FEXP:
2090 errno = 0;
2091 u = errcheck(exp(getfval(x)), "exp");
2092 break;
2093 case FSQRT:
2094 errno = 0;
2095 u = errcheck(sqrt(getfval(x)), "sqrt");
2096 break;
2097 case FSIN:
2098 u = sin(getfval(x)); break;
2099 case FCOS:
2100 u = cos(getfval(x)); break;
2101 case FATAN:
2102 if (nextarg == NULL) {
2103 WARNING("atan2 requires two arguments; returning 1.0");
2104 u = 1.0;
2105 } else {
2106 y = execute(a[1]->nnext);
2107 u = atan2(getfval(x), getfval(y));
2108 tempfree(y);
2109 nextarg = nextarg->nnext;
2110 }
2111 break;
2112 case FSYSTEM:
2113 fflush(stdout); /* in case something is buffered already */
2114 estatus = status = system(getsval(x));
2115 if (status != -1) {
2116 if (WIFEXITED(status)) {
2117 estatus = WEXITSTATUS(status);
2118 } else if (WIFSIGNALED(status)) {
2119 estatus = WTERMSIG(status) + 256;
2120 #ifdef WCOREDUMP
2121 if (WCOREDUMP(status))
2122 estatus += 256;
2123 #endif
2124 } else /* something else?!? */
2125 estatus = 0;
2126 }
2127 /* else estatus was set to -1 */
2128 u = estatus;
2129 break;
2130 case FRAND:
2131 /* random() returns numbers in [0..2^31-1]
2132 * in order to get a number in [0, 1), divide it by 2^31
2133 */
2134 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
2135 break;
2136 case FSRAND:
2137 if (isrec(x)) /* no argument provided */
2138 u = time((time_t *)0);
2139 else
2140 u = getfval(x);
2141 tmp = u;
2142 srandom((unsigned long) u);
2143 u = srand_seed;
2144 srand_seed = tmp;
2145 break;
2146 case FTOUPPER:
2147 case FTOLOWER:
2148 if (t == FTOUPPER)
2149 buf = nawk_toupper(getsval(x));
2150 else
2151 buf = nawk_tolower(getsval(x));
2152 tempfree(x);
2153 x = gettemp();
2154 setsval(x, buf);
2155 free(buf);
2156 return x;
2157 case FFLUSH:
2158 if (isrec(x) || strlen(getsval(x)) == 0) {
2159 flush_all(); /* fflush() or fflush("") -> all */
2160 u = 0;
2161 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2162 u = EOF;
2163 else
2164 u = fflush(fp);
2165 break;
2166 default: /* can't happen */
2167 FATAL("illegal function type %d", t);
2168 break;
2169 }
2170 tempfree(x);
2171 x = gettemp();
2172 setfval(x, u);
2173 if (nextarg != NULL) {
2174 WARNING("warning: function has too many arguments");
2175 for ( ; nextarg; nextarg = nextarg->nnext) {
2176 y = execute(nextarg);
2177 tempfree(y);
2178 }
2179 }
2180 return(x);
2181 }
2182
printstat(Node ** a,int n)2183 Cell *printstat(Node **a, int n) /* print a[0] */
2184 {
2185 Node *x;
2186 Cell *y;
2187 FILE *fp;
2188
2189 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
2190 fp = stdout;
2191 else
2192 fp = redirect(ptoi(a[1]), a[2]);
2193 for (x = a[0]; x != NULL; x = x->nnext) {
2194 y = execute(x);
2195 fputs(getpssval(y), fp);
2196 tempfree(y);
2197 if (x->nnext == NULL)
2198 fputs(getsval(orsloc), fp);
2199 else
2200 fputs(getsval(ofsloc), fp);
2201 }
2202 if (a[1] != NULL)
2203 fflush(fp);
2204 if (ferror(fp))
2205 FATAL("write error on %s", filename(fp));
2206 return(True);
2207 }
2208
nullproc(Node ** a,int n)2209 Cell *nullproc(Node **a, int n)
2210 {
2211 return 0;
2212 }
2213
2214
redirect(int a,Node * b)2215 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2216 {
2217 FILE *fp;
2218 Cell *x;
2219 char *fname;
2220
2221 x = execute(b);
2222 fname = getsval(x);
2223 fp = openfile(a, fname, NULL);
2224 if (fp == NULL)
2225 FATAL("can't open file %s", fname);
2226 tempfree(x);
2227 return fp;
2228 }
2229
2230 struct files {
2231 FILE *fp;
2232 const char *fname;
2233 int mode; /* '|', 'a', 'w' => LE/LT, GT */
2234 } *files;
2235
2236 size_t nfiles;
2237
stdinit(void)2238 static void stdinit(void) /* in case stdin, etc., are not constants */
2239 {
2240 nfiles = FOPEN_MAX;
2241 files = (struct files *) calloc(nfiles, sizeof(*files));
2242 if (files == NULL)
2243 FATAL("can't allocate file memory for %zu files", nfiles);
2244 files[0].fp = stdin;
2245 files[0].fname = tostring("/dev/stdin");
2246 files[0].mode = LT;
2247 files[1].fp = stdout;
2248 files[1].fname = tostring("/dev/stdout");
2249 files[1].mode = GT;
2250 files[2].fp = stderr;
2251 files[2].fname = tostring("/dev/stderr");
2252 files[2].mode = GT;
2253 }
2254
openfile(int a,const char * us,bool * pnewflag)2255 FILE *openfile(int a, const char *us, bool *pnewflag)
2256 {
2257 const char *s = us;
2258 size_t i;
2259 int m;
2260 FILE *fp = NULL;
2261
2262 if (*s == '\0')
2263 FATAL("null file name in print or getline");
2264 for (i = 0; i < nfiles; i++)
2265 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2266 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2267 a == FFLUSH)) {
2268 if (pnewflag)
2269 *pnewflag = false;
2270 return files[i].fp;
2271 }
2272 if (a == FFLUSH) /* didn't find it, so don't create it! */
2273 return NULL;
2274
2275 for (i = 0; i < nfiles; i++)
2276 if (files[i].fp == NULL)
2277 break;
2278 if (i >= nfiles) {
2279 struct files *nf;
2280 size_t nnf = nfiles + FOPEN_MAX;
2281 nf = (struct files *) realloc(files, nnf * sizeof(*nf));
2282 if (nf == NULL)
2283 FATAL("cannot grow files for %s and %zu files", s, nnf);
2284 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2285 nfiles = nnf;
2286 files = nf;
2287 }
2288 fflush(stdout); /* force a semblance of order */
2289 m = a;
2290 if (a == GT) {
2291 fp = fopen(s, "w");
2292 } else if (a == APPEND) {
2293 fp = fopen(s, "a");
2294 m = GT; /* so can mix > and >> */
2295 } else if (a == '|') { /* output pipe */
2296 fp = popen(s, "w");
2297 } else if (a == LE) { /* input pipe */
2298 fp = popen(s, "r");
2299 } else if (a == LT) { /* getline <file */
2300 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2301 } else /* can't happen */
2302 FATAL("illegal redirection %d", a);
2303 if (fp != NULL) {
2304 files[i].fname = tostring(s);
2305 files[i].fp = fp;
2306 files[i].mode = m;
2307 if (pnewflag)
2308 *pnewflag = true;
2309 if (fp != stdin && fp != stdout && fp != stderr)
2310 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2311 }
2312 return fp;
2313 }
2314
filename(FILE * fp)2315 const char *filename(FILE *fp)
2316 {
2317 size_t i;
2318
2319 for (i = 0; i < nfiles; i++)
2320 if (fp == files[i].fp)
2321 return files[i].fname;
2322 return "???";
2323 }
2324
closefile(Node ** a,int n)2325 Cell *closefile(Node **a, int n)
2326 {
2327 Cell *x;
2328 size_t i;
2329 bool stat;
2330
2331 x = execute(a[0]);
2332 getsval(x);
2333 stat = true;
2334 for (i = 0; i < nfiles; i++) {
2335 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2336 continue;
2337 if (files[i].mode == GT || files[i].mode == '|')
2338 fflush(files[i].fp);
2339 if (ferror(files[i].fp)) {
2340 if ((files[i].mode == GT && files[i].fp != stderr)
2341 || files[i].mode == '|')
2342 FATAL("write error on %s", files[i].fname);
2343 else
2344 WARNING("i/o error occurred on %s", files[i].fname);
2345 }
2346 if (files[i].fp == stdin || files[i].fp == stdout ||
2347 files[i].fp == stderr)
2348 stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2349 else if (files[i].mode == '|' || files[i].mode == LE)
2350 stat = pclose(files[i].fp) == -1;
2351 else
2352 stat = fclose(files[i].fp) == EOF;
2353 if (stat)
2354 WARNING("i/o error occurred closing %s", files[i].fname);
2355 xfree(files[i].fname);
2356 files[i].fname = NULL; /* watch out for ref thru this */
2357 files[i].fp = NULL;
2358 break;
2359 }
2360 tempfree(x);
2361 x = gettemp();
2362 setfval(x, (Awkfloat) (stat ? -1 : 0));
2363 return(x);
2364 }
2365
closeall(void)2366 void closeall(void)
2367 {
2368 size_t i;
2369 bool stat = false;
2370
2371 for (i = 0; i < nfiles; i++) {
2372 if (! files[i].fp)
2373 continue;
2374 if (files[i].mode == GT || files[i].mode == '|')
2375 fflush(files[i].fp);
2376 if (ferror(files[i].fp)) {
2377 if ((files[i].mode == GT && files[i].fp != stderr)
2378 || files[i].mode == '|')
2379 FATAL("write error on %s", files[i].fname);
2380 else
2381 WARNING("i/o error occurred on %s", files[i].fname);
2382 }
2383 if (files[i].fp == stdin || files[i].fp == stdout ||
2384 files[i].fp == stderr)
2385 continue;
2386 if (files[i].mode == '|' || files[i].mode == LE)
2387 stat = pclose(files[i].fp) == -1;
2388 else
2389 stat = fclose(files[i].fp) == EOF;
2390 if (stat)
2391 WARNING("i/o error occurred while closing %s", files[i].fname);
2392 }
2393 }
2394
flush_all(void)2395 static void flush_all(void)
2396 {
2397 size_t i;
2398
2399 for (i = 0; i < nfiles; i++)
2400 if (files[i].fp)
2401 fflush(files[i].fp);
2402 }
2403
2404 void backsub(char **pb_ptr, const char **sptr_ptr);
2405
dosub(Node ** a,int subop)2406 Cell *dosub(Node **a, int subop) /* sub and gsub */
2407 {
2408 fa *pfa;
2409 int tempstat;
2410 char *repl;
2411 Cell *x;
2412
2413 char *buf = NULL;
2414 char *pb = NULL;
2415 int bufsz = recsize;
2416
2417 const char *r, *s;
2418 const char *start;
2419 const char *noempty = NULL; /* empty match disallowed here */
2420 size_t m = 0; /* match count */
2421 size_t whichm; /* which match to select, 0 = global */
2422 int mtype; /* match type */
2423
2424 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
2425 pfa = (fa *) a[1];
2426 } else {
2427 x = execute(a[1]);
2428 pfa = makedfa(getsval(x), 1);
2429 tempfree(x);
2430 }
2431
2432 x = execute(a[2]); /* replacement string */
2433 repl = tostring(getsval(x));
2434 tempfree(x);
2435
2436 switch (subop) {
2437 case SUB:
2438 whichm = 1;
2439 x = execute(a[3]); /* source string */
2440 break;
2441 case GSUB:
2442 whichm = 0;
2443 x = execute(a[3]); /* source string */
2444 break;
2445 default:
2446 FATAL("dosub: unrecognized subop: %d", subop);
2447 }
2448
2449 start = getsval(x);
2450 while (pmatch(pfa, start)) {
2451 if (buf == NULL) {
2452 if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2453 FATAL("out of memory in dosub");
2454 tempstat = pfa->initstat;
2455 pfa->initstat = 2;
2456 }
2457
2458 /* match types */
2459 #define MT_IGNORE 0 /* unselected or invalid */
2460 #define MT_INSERT 1 /* selected, empty */
2461 #define MT_REPLACE 2 /* selected, not empty */
2462
2463 /* an empty match just after replacement is invalid */
2464
2465 if (patbeg == noempty && patlen == 0) {
2466 mtype = MT_IGNORE; /* invalid, not counted */
2467 } else if (whichm == ++m || whichm == 0) {
2468 mtype = patlen ? MT_REPLACE : MT_INSERT;
2469 } else {
2470 mtype = MT_IGNORE; /* unselected, but counted */
2471 }
2472
2473 /* leading text: */
2474 if (patbeg > start) {
2475 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2476 recsize, &pb, "dosub");
2477 s = start;
2478 while (s < patbeg)
2479 *pb++ = *s++;
2480 }
2481
2482 if (mtype == MT_IGNORE)
2483 goto matching_text; /* skip replacement text */
2484
2485 r = repl;
2486 while (*r != 0) {
2487 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2488 if (*r == '\\') {
2489 backsub(&pb, &r);
2490 } else if (*r == '&') {
2491 r++;
2492 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2493 &pb, "dosub");
2494 for (s = patbeg; s < patbeg+patlen; )
2495 *pb++ = *s++;
2496 } else {
2497 *pb++ = *r++;
2498 }
2499 }
2500
2501 matching_text:
2502 if (mtype == MT_REPLACE || *patbeg == '\0')
2503 goto next_search; /* skip matching text */
2504
2505 if (patlen == 0)
2506 patlen = u8_nextlen(patbeg);
2507 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2508 s = patbeg;
2509 while (s < patbeg + patlen)
2510 *pb++ = *s++;
2511
2512 next_search:
2513 start = patbeg + patlen;
2514 if (m == whichm || *patbeg == '\0')
2515 break;
2516 if (mtype == MT_REPLACE)
2517 noempty = start;
2518
2519 #undef MT_IGNORE
2520 #undef MT_INSERT
2521 #undef MT_REPLACE
2522 }
2523
2524 xfree(repl);
2525
2526 if (buf != NULL) {
2527 pfa->initstat = tempstat;
2528
2529 /* trailing text */
2530 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2531 while ((*pb++ = *start++) != '\0')
2532 ;
2533
2534 setsval(x, buf);
2535 free(buf);
2536 }
2537
2538 tempfree(x);
2539 x = gettemp();
2540 x->tval = NUM;
2541 x->fval = m;
2542 return x;
2543 }
2544
backsub(char ** pb_ptr,const char ** sptr_ptr)2545 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2546 { /* sptr[0] == '\\' */
2547 char *pb = *pb_ptr;
2548 const char *sptr = *sptr_ptr;
2549 static bool first = true;
2550 static bool do_posix = false;
2551
2552 if (first) {
2553 first = false;
2554 do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2555 }
2556
2557 if (sptr[1] == '\\') {
2558 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2559 *pb++ = '\\';
2560 *pb++ = '&';
2561 sptr += 4;
2562 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2563 *pb++ = '\\';
2564 sptr += 2;
2565 } else if (do_posix) { /* \\x -> \x */
2566 sptr++;
2567 *pb++ = *sptr++;
2568 } else { /* \\x -> \\x */
2569 *pb++ = *sptr++;
2570 *pb++ = *sptr++;
2571 }
2572 } else if (sptr[1] == '&') { /* literal & */
2573 sptr++;
2574 *pb++ = *sptr++;
2575 } else /* literal \ */
2576 *pb++ = *sptr++;
2577
2578 *pb_ptr = pb;
2579 *sptr_ptr = sptr;
2580 }
2581
wide_char_to_byte_str(int rune,size_t * outlen)2582 static char *wide_char_to_byte_str(int rune, size_t *outlen)
2583 {
2584 static char buf[5];
2585 int len;
2586
2587 if (rune < 0 || rune > 0x10FFFF)
2588 return NULL;
2589
2590 memset(buf, 0, sizeof(buf));
2591
2592 len = 0;
2593 if (rune <= 0x0000007F) {
2594 buf[len++] = rune;
2595 } else if (rune <= 0x000007FF) {
2596 // 110xxxxx 10xxxxxx
2597 buf[len++] = 0xC0 | (rune >> 6);
2598 buf[len++] = 0x80 | (rune & 0x3F);
2599 } else if (rune <= 0x0000FFFF) {
2600 // 1110xxxx 10xxxxxx 10xxxxxx
2601 buf[len++] = 0xE0 | (rune >> 12);
2602 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2603 buf[len++] = 0x80 | (rune & 0x3F);
2604
2605 } else {
2606 // 0x00010000 - 0x10FFFF
2607 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2608 buf[len++] = 0xF0 | (rune >> 18);
2609 buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2610 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2611 buf[len++] = 0x80 | (rune & 0x3F);
2612 }
2613
2614 *outlen = len;
2615 buf[len++] = '\0';
2616
2617 return buf;
2618 }
2619