1 /* $OpenBSD: run.c,v 1.88 2024/06/04 14:40:46 millert Exp $ */
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25
26 #define DEBUG
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <wctype.h>
31 #include <fcntl.h>
32 #include <setjmp.h>
33 #include <limits.h>
34 #include <math.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <time.h>
38 #include <sys/types.h>
39 #include <sys/wait.h>
40 #include "awk.h"
41 #include "awkgram.tab.h"
42
43
44 static void stdinit(void);
45 static void flush_all(void);
46 static char *wide_char_to_byte_str(int rune, size_t *outlen);
47
48 #if 1
49 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
50 #else
tempfree(Cell * p)51 void tempfree(Cell *p) {
52 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
53 WARNING("bad csub %d in Cell %d %s",
54 p->csub, p->ctype, p->sval);
55 }
56 if (istemp(p))
57 tfree(p);
58 }
59 #endif
60
61 /* do we really need these? */
62 /* #ifdef _NFILE */
63 /* #ifndef FOPEN_MAX */
64 /* #define FOPEN_MAX _NFILE */
65 /* #endif */
66 /* #endif */
67 /* */
68 /* #ifndef FOPEN_MAX */
69 /* #define FOPEN_MAX 40 */ /* max number of open files */
70 /* #endif */
71 /* */
72 /* #ifndef RAND_MAX */
73 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */
74 /* #endif */
75
76 jmp_buf env;
77 extern int pairstack[];
78 extern Awkfloat srand_seed;
79
80 Node *winner = NULL; /* root of parse tree */
81 Cell *tmps; /* free temporary cells for execution */
82
83 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
84 Cell *True = &truecell;
85 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
86 Cell *False = &falsecell;
87 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
88 Cell *jbreak = &breakcell;
89 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
90 Cell *jcont = &contcell;
91 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
92 Cell *jnext = &nextcell;
93 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
94 Cell *jnextfile = &nextfilecell;
95 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
96 Cell *jexit = &exitcell;
97 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
98 Cell *jret = &retcell;
99 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
100
101 Node *curnode = NULL; /* the node being executed, for debugging */
102
103 /* buffer memory management */
adjbuf(char ** pbuf,int * psiz,int minlen,int quantum,char ** pbptr,const char * whatrtn)104 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
105 const char *whatrtn)
106 /* pbuf: address of pointer to buffer being managed
107 * psiz: address of buffer size variable
108 * minlen: minimum length of buffer needed
109 * quantum: buffer size quantum
110 * pbptr: address of movable pointer into buffer, or 0 if none
111 * whatrtn: name of the calling routine if failure should cause fatal error
112 *
113 * return 0 for realloc failure, !=0 for success
114 */
115 {
116 if (minlen > *psiz) {
117 char *tbuf;
118 int rminlen = quantum ? minlen % quantum : 0;
119 int boff = pbptr ? *pbptr - *pbuf : 0;
120 /* round up to next multiple of quantum */
121 if (rminlen)
122 minlen += quantum - rminlen;
123 tbuf = (char *) realloc(*pbuf, minlen);
124 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
125 if (tbuf == NULL) {
126 if (whatrtn)
127 FATAL("out of memory in %s", whatrtn);
128 return 0;
129 }
130 *pbuf = tbuf;
131 *psiz = minlen;
132 if (pbptr)
133 *pbptr = tbuf + boff;
134 }
135 return 1;
136 }
137
run(Node * a)138 void run(Node *a) /* execution of parse tree starts here */
139 {
140
141 stdinit();
142 execute(a);
143 closeall();
144 }
145
execute(Node * u)146 Cell *execute(Node *u) /* execute a node of the parse tree */
147 {
148 Cell *(*proc)(Node **, int);
149 Cell *x;
150 Node *a;
151
152 if (u == NULL)
153 return(True);
154 for (a = u; ; a = a->nnext) {
155 curnode = a;
156 if (isvalue(a)) {
157 x = (Cell *) (a->narg[0]);
158 if (isfld(x) && !donefld)
159 fldbld();
160 else if (isrec(x) && !donerec)
161 recbld();
162 return(x);
163 }
164 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */
165 FATAL("illegal statement");
166 proc = proctab[a->nobj-FIRSTTOKEN];
167 x = (*proc)(a->narg, a->nobj);
168 if (isfld(x) && !donefld)
169 fldbld();
170 else if (isrec(x) && !donerec)
171 recbld();
172 if (isexpr(a))
173 return(x);
174 if (isjump(x))
175 return(x);
176 if (a->nnext == NULL)
177 return(x);
178 tempfree(x);
179 }
180 }
181
182
program(Node ** a,int n)183 Cell *program(Node **a, int n) /* execute an awk program */
184 { /* a[0] = BEGIN, a[1] = body, a[2] = END */
185 Cell *x;
186
187 if (setjmp(env) != 0)
188 goto ex;
189 if (a[0]) { /* BEGIN */
190 x = execute(a[0]);
191 if (isexit(x))
192 return(True);
193 if (isjump(x))
194 FATAL("illegal break, continue, next or nextfile from BEGIN");
195 tempfree(x);
196 }
197 if (a[1] || a[2])
198 while (getrec(&record, &recsize, true) > 0) {
199 x = execute(a[1]);
200 if (isexit(x))
201 break;
202 tempfree(x);
203 }
204 ex:
205 if (setjmp(env) != 0) /* handles exit within END */
206 goto ex1;
207 if (a[2]) { /* END */
208 x = execute(a[2]);
209 if (isbreak(x) || isnext(x) || iscont(x))
210 FATAL("illegal break, continue, next or nextfile from END");
211 tempfree(x);
212 }
213 ex1:
214 return(True);
215 }
216
217 struct Frame { /* stack frame for awk function calls */
218 int nargs; /* number of arguments in this call */
219 Cell *fcncell; /* pointer to Cell for function */
220 Cell **args; /* pointer to array of arguments after execute */
221 Cell *retval; /* return value */
222 };
223
224 #define NARGS 50 /* max args in a call */
225
226 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */
227 int nframe = 0; /* number of frames allocated */
228 struct Frame *frp = NULL; /* frame pointer. bottom level unused */
229
call(Node ** a,int n)230 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */
231 {
232 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
233 int i, ncall, ndef;
234 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
235 Node *x;
236 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */
237 Cell *y, *z, *fcn;
238 char *s;
239
240 fcn = execute(a[0]); /* the function itself */
241 s = fcn->nval;
242 if (!isfcn(fcn))
243 FATAL("calling undefined function %s", s);
244 if (frame == NULL) {
245 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
246 if (frame == NULL)
247 FATAL("out of space for stack frames calling %s", s);
248 }
249 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */
250 ncall++;
251 ndef = (int) fcn->fval; /* args in defn */
252 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
253 if (ncall > ndef)
254 WARNING("function %s called with %d args, uses only %d",
255 s, ncall, ndef);
256 if (ncall + ndef > NARGS)
257 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
258 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */
259 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
260 y = execute(x);
261 oargs[i] = y;
262 DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
263 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
264 if (isfcn(y))
265 FATAL("can't use function %s as argument in %s", y->nval, s);
266 if (isarr(y))
267 args[i] = y; /* arrays by ref */
268 else
269 args[i] = copycell(y);
270 tempfree(y);
271 }
272 for ( ; i < ndef; i++) { /* add null args for ones not provided */
273 args[i] = gettemp();
274 *args[i] = newcopycell;
275 }
276 frp++; /* now ok to up frame */
277 if (frp >= frame + nframe) {
278 int dfp = frp - frame; /* old index */
279 frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame));
280 if (frame == NULL)
281 FATAL("out of space for stack frames in %s", s);
282 frp = frame + dfp;
283 }
284 frp->fcncell = fcn;
285 frp->args = args;
286 frp->nargs = ndef; /* number defined with (excess are locals) */
287 frp->retval = gettemp();
288
289 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
290 y = execute((Node *)(fcn->sval)); /* execute body */
291 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
292
293 for (i = 0; i < ndef; i++) {
294 Cell *t = frp->args[i];
295 if (isarr(t)) {
296 if (t->csub == CCOPY) {
297 if (i >= ncall) {
298 freesymtab(t);
299 t->csub = CTEMP;
300 tempfree(t);
301 } else {
302 oargs[i]->tval = t->tval;
303 oargs[i]->tval &= ~(STR|NUM|DONTFREE);
304 oargs[i]->sval = t->sval;
305 tempfree(t);
306 }
307 }
308 } else if (t != y) { /* kludge to prevent freeing twice */
309 t->csub = CTEMP;
310 tempfree(t);
311 } else if (t == y && t->csub == CCOPY) {
312 t->csub = CTEMP;
313 tempfree(t);
314 freed = 1;
315 }
316 }
317 tempfree(fcn);
318 if (isexit(y) || isnext(y))
319 return y;
320 if (freed == 0) {
321 tempfree(y); /* don't free twice! */
322 }
323 z = frp->retval; /* return value */
324 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
325 frp--;
326 return(z);
327 }
328
copycell(Cell * x)329 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */
330 {
331 Cell *y;
332
333 /* copy is not constant or field */
334
335 y = gettemp();
336 y->tval = x->tval & ~(CON|FLD|REC);
337 y->csub = CCOPY; /* prevents freeing until call is over */
338 y->nval = x->nval; /* BUG? */
339 if (isstr(x) /* || x->ctype == OCELL */) {
340 y->sval = tostring(x->sval);
341 y->tval &= ~DONTFREE;
342 } else
343 y->tval |= DONTFREE;
344 y->fval = x->fval;
345 return y;
346 }
347
arg(Node ** a,int n)348 Cell *arg(Node **a, int n) /* nth argument of a function */
349 {
350
351 n = ptoi(a[0]); /* argument number, counting from 0 */
352 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
353 if (n+1 > frp->nargs)
354 FATAL("argument #%d of function %s was not supplied",
355 n+1, frp->fcncell->nval);
356 return frp->args[n];
357 }
358
jump(Node ** a,int n)359 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */
360 {
361 Cell *y;
362
363 switch (n) {
364 case EXIT:
365 if (a[0] != NULL) {
366 y = execute(a[0]);
367 errorflag = (int) getfval(y);
368 tempfree(y);
369 }
370 longjmp(env, 1);
371 case RETURN:
372 if (a[0] != NULL) {
373 y = execute(a[0]);
374 if ((y->tval & (STR|NUM)) == (STR|NUM)) {
375 setsval(frp->retval, getsval(y));
376 frp->retval->fval = getfval(y);
377 frp->retval->tval |= NUM;
378 }
379 else if (y->tval & STR)
380 setsval(frp->retval, getsval(y));
381 else if (y->tval & NUM)
382 setfval(frp->retval, getfval(y));
383 else /* can't happen */
384 FATAL("bad type variable %d", y->tval);
385 tempfree(y);
386 }
387 return(jret);
388 case NEXT:
389 return(jnext);
390 case NEXTFILE:
391 nextfile();
392 return(jnextfile);
393 case BREAK:
394 return(jbreak);
395 case CONTINUE:
396 return(jcont);
397 default: /* can't happen */
398 FATAL("illegal jump type %d", n);
399 }
400 return 0; /* not reached */
401 }
402
awkgetline(Node ** a,int n)403 Cell *awkgetline(Node **a, int n) /* get next line from specific input */
404 { /* a[0] is variable, a[1] is operator, a[2] is filename */
405 Cell *r, *x;
406 extern Cell **fldtab;
407 FILE *fp;
408 char *buf;
409 int bufsize = recsize;
410 int mode;
411 bool newflag;
412 double result;
413
414 if ((buf = (char *) malloc(bufsize)) == NULL)
415 FATAL("out of memory in getline");
416
417 fflush(stdout); /* in case someone is waiting for a prompt */
418 r = gettemp();
419 if (a[1] != NULL) { /* getline < file */
420 x = execute(a[2]); /* filename */
421 mode = ptoi(a[1]);
422 if (mode == '|') /* input pipe */
423 mode = LE; /* arbitrary flag */
424 fp = openfile(mode, getsval(x), &newflag);
425 tempfree(x);
426 if (fp == NULL)
427 n = -1;
428 else
429 n = readrec(&buf, &bufsize, fp, newflag);
430 if (n <= 0) {
431 ;
432 } else if (a[0] != NULL) { /* getline var <file */
433 x = execute(a[0]);
434 setsval(x, buf);
435 if (is_number(x->sval, & result)) {
436 x->fval = result;
437 x->tval |= NUM;
438 }
439 tempfree(x);
440 } else { /* getline <file */
441 setsval(fldtab[0], buf);
442 if (is_number(fldtab[0]->sval, & result)) {
443 fldtab[0]->fval = result;
444 fldtab[0]->tval |= NUM;
445 }
446 }
447 } else { /* bare getline; use current input */
448 if (a[0] == NULL) /* getline */
449 n = getrec(&record, &recsize, true);
450 else { /* getline var */
451 n = getrec(&buf, &bufsize, false);
452 if (n > 0) {
453 x = execute(a[0]);
454 setsval(x, buf);
455 if (is_number(x->sval, & result)) {
456 x->fval = result;
457 x->tval |= NUM;
458 }
459 tempfree(x);
460 }
461 }
462 }
463 setfval(r, (Awkfloat) n);
464 free(buf);
465 return r;
466 }
467
getnf(Node ** a,int n)468 Cell *getnf(Node **a, int n) /* get NF */
469 {
470 if (!donefld)
471 fldbld();
472 return (Cell *) a[0];
473 }
474
475 static char *
makearraystring(Node * p,const char * func)476 makearraystring(Node *p, const char *func)
477 {
478 char *buf;
479 int bufsz = recsize;
480 size_t blen;
481
482 if ((buf = (char *) malloc(bufsz)) == NULL) {
483 FATAL("%s: out of memory", func);
484 }
485
486 blen = 0;
487 buf[blen] = '\0';
488
489 for (; p; p = p->nnext) {
490 Cell *x = execute(p); /* expr */
491 char *s = getsval(x);
492 size_t seplen = strlen(getsval(subseploc));
493 size_t nsub = p->nnext ? seplen : 0;
494 size_t slen = strlen(s);
495 size_t tlen = blen + slen + nsub;
496
497 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
498 FATAL("%s: out of memory %s[%s...]",
499 func, x->nval, buf);
500 }
501 memcpy(buf + blen, s, slen);
502 if (nsub) {
503 memcpy(buf + blen + slen, *SUBSEP, nsub);
504 }
505 buf[tlen] = '\0';
506 blen = tlen;
507 tempfree(x);
508 }
509 return buf;
510 }
511
array(Node ** a,int n)512 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
513 {
514 Cell *x, *z;
515 char *buf;
516
517 x = execute(a[0]); /* Cell* for symbol table */
518 buf = makearraystring(a[1], __func__);
519 if (!isarr(x)) {
520 DPRINTF("making %s into an array\n", NN(x->nval));
521 if (freeable(x))
522 xfree(x->sval);
523 x->tval &= ~(STR|NUM|DONTFREE);
524 x->tval |= ARR;
525 x->sval = (char *) makesymtab(NSYMTAB);
526 }
527 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
528 z->ctype = OCELL;
529 z->csub = CVAR;
530 tempfree(x);
531 free(buf);
532 return(z);
533 }
534
awkdelete(Node ** a,int n)535 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */
536 {
537 Cell *x;
538
539 x = execute(a[0]); /* Cell* for symbol table */
540 if (x == symtabloc) {
541 FATAL("cannot delete SYMTAB or its elements");
542 }
543 if (!isarr(x))
544 return True;
545 if (a[1] == NULL) { /* delete the elements, not the table */
546 freesymtab(x);
547 x->tval &= ~STR;
548 x->tval |= ARR;
549 x->sval = (char *) makesymtab(NSYMTAB);
550 } else {
551 char *buf = makearraystring(a[1], __func__);
552 freeelem(x, buf);
553 free(buf);
554 }
555 tempfree(x);
556 return True;
557 }
558
intest(Node ** a,int n)559 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
560 {
561 Cell *ap, *k;
562 char *buf;
563
564 ap = execute(a[1]); /* array name */
565 if (!isarr(ap)) {
566 DPRINTF("making %s into an array\n", ap->nval);
567 if (freeable(ap))
568 xfree(ap->sval);
569 ap->tval &= ~(STR|NUM|DONTFREE);
570 ap->tval |= ARR;
571 ap->sval = (char *) makesymtab(NSYMTAB);
572 }
573 buf = makearraystring(a[0], __func__);
574 k = lookup(buf, (Array *) ap->sval);
575 tempfree(ap);
576 free(buf);
577 if (k == NULL)
578 return(False);
579 else
580 return(True);
581 }
582
583
584 /* ======== utf-8 code ========== */
585
586 /*
587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
588 * or utf-8. u8_isutf tests whether a string starts with a valid
589 * utf-8 sequence, and returns 0 if not (e.g., high bit set).
590 * u8_nextlen returns length of next valid sequence, which is
591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
592 * u8_strlen returns length of string in valid utf-8 sequences
593 * and/or high-bit bytes. Conversion functions go between byte
594 * number and character number.
595 *
596 * In theory, this behaves the same as before for non-utf8 bytes.
597 *
598 * Limited checking! This is a potential security hole.
599 */
600
601 /* is s the beginning of a valid utf-8 string? */
602 /* return length 1..4 if yes, 0 if no */
u8_isutf(const char * s)603 static int u8_isutf(const char *s)
604 {
605 int ret;
606 unsigned char c;
607
608 c = s[0];
609 if (c < 128 || awk_mb_cur_max == 1) {
610 ret = 1; /* what if it's 0? */
611 } else if (((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
612 ret = 2; /* 110xxxxx 10xxxxxx */
613 } else if (((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
614 && (s[2] & 0xC0) == 0x80) {
615 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
616 } else if (((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
617 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
618 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
619 } else {
620 ret = 0;
621 }
622 return ret;
623 }
624
625 /* Convert (prefix of) utf8 string to utf-32 rune. */
626 /* Sets *rune to the value, returns the length. */
627 /* No error checking: watch out. */
u8_rune(int * rune,const char * s)628 int u8_rune(int *rune, const char *s)
629 {
630 int n, ret;
631 unsigned char c;
632
633 c = s[0];
634 if (c < 128 || awk_mb_cur_max == 1) {
635 *rune = c;
636 return 1;
637 }
638
639 n = strlen(s);
640 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
641 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
642 ret = 2;
643 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
644 && (s[2] & 0xC0) == 0x80) {
645 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
646 /* 1110xxxx 10xxxxxx 10xxxxxx */
647 ret = 3;
648 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
649 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
650 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
651 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
652 ret = 4;
653 } else {
654 *rune = c;
655 ret = 1;
656 }
657 return ret; /* returns one byte if sequence doesn't look like utf */
658 }
659
660 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
u8_nextlen(const char * s)661 int u8_nextlen(const char *s)
662 {
663 int len;
664
665 len = u8_isutf(s);
666 if (len == 0)
667 len = 1;
668 return len;
669 }
670
671 /* return number of utf characters or single non-utf bytes */
u8_strlen(const char * s)672 static int u8_strlen(const char *s)
673 {
674 int i, len, n, totlen;
675 unsigned char c;
676
677 n = strlen(s);
678 totlen = 0;
679 for (i = 0; i < n; i += len) {
680 c = s[i];
681 if (c < 128 || awk_mb_cur_max == 1) {
682 len = 1;
683 } else {
684 len = u8_nextlen(&s[i]);
685 }
686 totlen++;
687 if (i > n)
688 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
689 }
690 return totlen;
691 }
692
693 /* convert utf-8 char number in a string to its byte offset */
u8_char2byte(const char * s,int charnum)694 static int u8_char2byte(const char *s, int charnum)
695 {
696 int n;
697 int bytenum = 0;
698
699 while (charnum > 0) {
700 n = u8_nextlen(s);
701 s += n;
702 bytenum += n;
703 charnum--;
704 }
705 return bytenum;
706 }
707
708 /* convert byte offset in s to utf-8 char number that starts there */
u8_byte2char(const char * s,int bytenum)709 static int u8_byte2char(const char *s, int bytenum)
710 {
711 int i, len, b;
712 int charnum = 0; /* BUG: what origin? */
713 /* should be 0 to match start==0 which means no match */
714
715 b = strlen(s);
716 if (bytenum > b) {
717 return -1; /* ??? */
718 }
719 for (i = 0; i <= bytenum; i += len) {
720 len = u8_nextlen(s+i);
721 charnum++;
722 }
723 return charnum;
724 }
725
726 /* runetochar() adapted from rune.c in the Plan 9 distribution */
727
728 enum
729 {
730 Runeerror = 128, /* from somewhere else */
731 Runemax = 0x10FFFF,
732
733 Bit1 = 7,
734 Bitx = 6,
735 Bit2 = 5,
736 Bit3 = 4,
737 Bit4 = 3,
738 Bit5 = 2,
739
740 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
741 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
742 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
743 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
744 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
745 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
746
747 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
748 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
749 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
750 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
751
752 Maskx = (1<<Bitx)-1, /* 0011 1111 */
753 Testx = Maskx ^ 0xFF, /* 1100 0000 */
754
755 };
756
runetochar(char * str,int c)757 int runetochar(char *str, int c)
758 {
759 /* one character sequence 00000-0007F => 00-7F */
760 if (c <= Rune1) {
761 str[0] = c;
762 return 1;
763 }
764
765 /* two character sequence 00080-007FF => T2 Tx */
766 if (c <= Rune2) {
767 str[0] = T2 | (c >> 1*Bitx);
768 str[1] = Tx | (c & Maskx);
769 return 2;
770 }
771
772 /* three character sequence 00800-0FFFF => T3 Tx Tx */
773 if (c > Runemax)
774 c = Runeerror;
775 if (c <= Rune3) {
776 str[0] = T3 | (c >> 2*Bitx);
777 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
778 str[2] = Tx | (c & Maskx);
779 return 3;
780 }
781
782 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
783 str[0] = T4 | (c >> 3*Bitx);
784 str[1] = Tx | ((c >> 2*Bitx) & Maskx);
785 str[2] = Tx | ((c >> 1*Bitx) & Maskx);
786 str[3] = Tx | (c & Maskx);
787 return 4;
788 }
789
790
791 /* ========== end of utf8 code =========== */
792
793
794
matchop(Node ** a,int n)795 Cell *matchop(Node **a, int n) /* ~ and match() */
796 {
797 Cell *x, *y, *z;
798 char *s, *t;
799 int i;
800 int cstart, cpatlen, len;
801 fa *pfa;
802 int (*mf)(fa *, const char *) = match, mode = 0;
803
804 if (n == MATCHFCN) {
805 mf = pmatch;
806 mode = 1;
807 }
808 x = execute(a[1]); /* a[1] = target text */
809 s = getsval(x);
810 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */
811 i = (*mf)((fa *) a[2], s);
812 else {
813 y = execute(a[2]); /* a[2] = regular expr */
814 t = getsval(y);
815 pfa = makedfa(t, mode);
816 i = (*mf)(pfa, s);
817 tempfree(y);
818 }
819 z = x;
820 if (n == MATCHFCN) {
821 int start = patbeg - s + 1; /* origin 1 */
822 if (patlen < 0) {
823 start = 0; /* not found */
824 } else {
825 cstart = u8_byte2char(s, start-1);
826 cpatlen = 0;
827 for (i = 0; i < patlen; i += len) {
828 len = u8_nextlen(patbeg+i);
829 cpatlen++;
830 }
831
832 start = cstart;
833 patlen = cpatlen;
834 }
835
836 setfval(rstartloc, (Awkfloat) start);
837 setfval(rlengthloc, (Awkfloat) patlen);
838 x = gettemp();
839 x->tval = NUM;
840 x->fval = start;
841 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
842 x = True;
843 else
844 x = False;
845
846 tempfree(z);
847 return x;
848 }
849
850
boolop(Node ** a,int n)851 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */
852 {
853 Cell *x, *y;
854 int i;
855
856 x = execute(a[0]);
857 i = istrue(x);
858 tempfree(x);
859 switch (n) {
860 case BOR:
861 if (i) return(True);
862 y = execute(a[1]);
863 i = istrue(y);
864 tempfree(y);
865 if (i) return(True);
866 else return(False);
867 case AND:
868 if ( !i ) return(False);
869 y = execute(a[1]);
870 i = istrue(y);
871 tempfree(y);
872 if (i) return(True);
873 else return(False);
874 case NOT:
875 if (i) return(False);
876 else return(True);
877 default: /* can't happen */
878 FATAL("unknown boolean operator %d", n);
879 }
880 return 0; /*NOTREACHED*/
881 }
882
relop(Node ** a,int n)883 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
884 {
885 int i;
886 Cell *x, *y;
887 Awkfloat j;
888 bool x_is_nan, y_is_nan;
889
890 x = execute(a[0]);
891 y = execute(a[1]);
892 x_is_nan = isnan(x->fval);
893 y_is_nan = isnan(y->fval);
894 if (x->tval&NUM && y->tval&NUM) {
895 if ((x_is_nan || y_is_nan) && n != NE)
896 return(False);
897 j = x->fval - y->fval;
898 i = j<0? -1: (j>0? 1: 0);
899 } else {
900 i = strcmp(getsval(x), getsval(y));
901 }
902 tempfree(x);
903 tempfree(y);
904 switch (n) {
905 case LT: if (i<0) return(True);
906 else return(False);
907 case LE: if (i<=0) return(True);
908 else return(False);
909 case NE: if (x_is_nan && y_is_nan) return(True);
910 else if (i!=0) return(True);
911 else return(False);
912 case EQ: if (i == 0) return(True);
913 else return(False);
914 case GE: if (i>=0) return(True);
915 else return(False);
916 case GT: if (i>0) return(True);
917 else return(False);
918 default: /* can't happen */
919 FATAL("unknown relational operator %d", n);
920 }
921 return 0; /*NOTREACHED*/
922 }
923
tfree(Cell * a)924 void tfree(Cell *a) /* free a tempcell */
925 {
926 if (freeable(a)) {
927 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
928 xfree(a->sval);
929 }
930 if (a == tmps)
931 FATAL("tempcell list is curdled");
932 a->cnext = tmps;
933 tmps = a;
934 }
935
gettemp(void)936 Cell *gettemp(void) /* get a tempcell */
937 { int i;
938 Cell *x;
939
940 if (!tmps) {
941 tmps = (Cell *) calloc(100, sizeof(*tmps));
942 if (!tmps)
943 FATAL("out of space for temporaries");
944 for (i = 1; i < 100; i++)
945 tmps[i-1].cnext = &tmps[i];
946 tmps[i-1].cnext = NULL;
947 }
948 x = tmps;
949 tmps = x->cnext;
950 *x = tempcell;
951 return(x);
952 }
953
indirect(Node ** a,int n)954 Cell *indirect(Node **a, int n) /* $( a[0] ) */
955 {
956 Awkfloat val;
957 Cell *x;
958 int m;
959 char *s;
960
961 x = execute(a[0]);
962 val = getfval(x); /* freebsd: defend against super large field numbers */
963 if ((Awkfloat)INT_MAX < val)
964 FATAL("trying to access out of range field %s", x->nval);
965 m = (int) val;
966 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */
967 FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
968 /* BUG: can x->nval ever be null??? */
969 tempfree(x);
970 x = fieldadr(m);
971 x->ctype = OCELL; /* BUG? why are these needed? */
972 x->csub = CFLD;
973 return(x);
974 }
975
substr(Node ** a,int nnn)976 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
977 {
978 int k, m, n;
979 int mb, nb;
980 char *s;
981 int temp;
982 Cell *x, *y, *z = NULL;
983
984 x = execute(a[0]);
985 y = execute(a[1]);
986 if (a[2] != NULL)
987 z = execute(a[2]);
988 s = getsval(x);
989 k = u8_strlen(s) + 1;
990 if (k <= 1) {
991 tempfree(x);
992 tempfree(y);
993 if (a[2] != NULL) {
994 tempfree(z);
995 }
996 x = gettemp();
997 setsval(x, "");
998 return(x);
999 }
1000 m = (int) getfval(y);
1001 if (m <= 0)
1002 m = 1;
1003 else if (m > k)
1004 m = k;
1005 tempfree(y);
1006 if (a[2] != NULL) {
1007 n = (int) getfval(z);
1008 tempfree(z);
1009 } else
1010 n = k - 1;
1011 if (n < 0)
1012 n = 0;
1013 else if (n > k - m)
1014 n = k - m;
1015 /* m is start, n is length from there */
1016 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1017 y = gettemp();
1018 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1019 nb = mb + u8_char2byte(&s[mb], n); /* byte offset of end+1 char in s */
1020
1021 temp = s[nb]; /* with thanks to John Linderman */
1022 s[nb] = '\0';
1023 setsval(y, s + mb);
1024 s[nb] = temp;
1025 tempfree(x);
1026 return(y);
1027 }
1028
sindex(Node ** a,int nnn)1029 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
1030 {
1031 Cell *x, *y, *z;
1032 char *s1, *s2, *p1, *p2, *q;
1033 Awkfloat v = 0.0;
1034
1035 x = execute(a[0]);
1036 s1 = getsval(x);
1037 y = execute(a[1]);
1038 s2 = getsval(y);
1039
1040 z = gettemp();
1041 for (p1 = s1; *p1 != '\0'; p1++) {
1042 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1043 continue;
1044 if (*p2 == '\0') {
1045 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
1046
1047 /* should be a function: used in match() as well */
1048 int i, len;
1049 v = 0;
1050 for (i = 0; i < p1-s1+1; i += len) {
1051 len = u8_nextlen(s1+i);
1052 v++;
1053 }
1054 break;
1055 }
1056 }
1057 tempfree(x);
1058 tempfree(y);
1059 setfval(z, v);
1060 return(z);
1061 }
1062
has_utf8(char * s)1063 static int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
1064 {
1065 int n;
1066
1067 for (n = 0; *s != 0; s += n) {
1068 n = u8_nextlen(s);
1069 if (n > 1)
1070 return 1;
1071 }
1072 return 0;
1073 }
1074
1075 #define MAXNUMSIZE 50
1076
format(char ** pbuf,int * pbufsize,const char * s,Node * a)1077 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
1078 {
1079 char *fmt;
1080 char *p, *t;
1081 const char *os;
1082 Cell *x;
1083 int flag = 0, n;
1084 int fmtwd; /* format width */
1085 int fmtsz = recsize;
1086 char *buf = *pbuf;
1087 int bufsize = *pbufsize;
1088 #define FMTSZ(a) (fmtsz - ((a) - fmt))
1089 #define BUFSZ(a) (bufsize - ((a) - buf))
1090
1091 static bool first = true;
1092 static bool have_a_format = false;
1093
1094 if (first) {
1095 char xbuf[100];
1096
1097 snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1098 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1099 first = false;
1100 }
1101
1102 os = s;
1103 p = buf;
1104 if ((fmt = (char *) malloc(fmtsz)) == NULL)
1105 FATAL("out of memory in format()");
1106 while (*s) {
1107 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1108 if (*s != '%') {
1109 *p++ = *s++;
1110 continue;
1111 }
1112 if (*(s+1) == '%') {
1113 *p++ = '%';
1114 s += 2;
1115 continue;
1116 }
1117 fmtwd = atoi(s+1);
1118 if (fmtwd < 0)
1119 fmtwd = -fmtwd;
1120 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1121 for (t = fmt; (*t++ = *s) != '\0'; s++) {
1122 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1123 FATAL("format item %.30s... ran format() out of memory", os);
1124 /* Ignore size specifiers */
1125 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */
1126 t--;
1127 continue;
1128 }
1129 if (isalpha((uschar)*s))
1130 break;
1131 if (*s == '$') {
1132 FATAL("'$' not permitted in awk formats");
1133 }
1134 if (*s == '*') {
1135 if (a == NULL) {
1136 FATAL("not enough args in printf(%s)", os);
1137 }
1138 x = execute(a);
1139 a = a->nnext;
1140 snprintf(t - 1, FMTSZ(t - 1),
1141 "%d", fmtwd=(int) getfval(x));
1142 if (fmtwd < 0)
1143 fmtwd = -fmtwd;
1144 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1145 t = fmt + strlen(fmt);
1146 tempfree(x);
1147 }
1148 }
1149 *t = '\0';
1150 if (fmtwd < 0)
1151 fmtwd = -fmtwd;
1152 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1153 switch (*s) {
1154 case 'a': case 'A':
1155 if (have_a_format)
1156 flag = *s;
1157 else
1158 flag = 'f';
1159 break;
1160 case 'f': case 'e': case 'g': case 'E': case 'G':
1161 flag = 'f';
1162 break;
1163 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1164 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1165 *(t-1) = 'j';
1166 *t = *s;
1167 *++t = '\0';
1168 break;
1169 case 's':
1170 flag = 's';
1171 break;
1172 case 'c':
1173 flag = 'c';
1174 break;
1175 default:
1176 WARNING("weird printf conversion %s", fmt);
1177 flag = '?';
1178 break;
1179 }
1180 if (a == NULL)
1181 FATAL("not enough args in printf(%s)", os);
1182 x = execute(a);
1183 a = a->nnext;
1184 n = MAXNUMSIZE;
1185 if (fmtwd > n)
1186 n = fmtwd;
1187 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1188 switch (flag) {
1189 case '?':
1190 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
1191 t = getsval(x);
1192 n = strlen(t);
1193 if (fmtwd > n)
1194 n = fmtwd;
1195 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1196 p += strlen(p);
1197 snprintf(p, BUFSZ(p), "%s", t);
1198 break;
1199 case 'a':
1200 case 'A':
1201 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1202 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1203 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1204
1205 case 's': {
1206 t = getsval(x);
1207 n = strlen(t);
1208 /* if simple format or no utf-8 in the string, sprintf works */
1209 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1210 if (fmtwd > n)
1211 n = fmtwd;
1212 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1213 FATAL("huge string/format (%d chars) in printf %.30s..." \
1214 " ran format() out of memory", n, t);
1215 snprintf(p, BUFSZ(p), fmt, t);
1216 break;
1217 }
1218
1219 /* get here if string has utf-8 chars and fmt is not plain %s */
1220 /* "%-w.ps", where -, w and .p are all optional */
1221 /* '0' before the w is a flag character */
1222 /* fmt points at % */
1223 int ljust = 0, wid = 0, prec = n, pad = 0;
1224 char *f = fmt+1;
1225 if (f[0] == '-') {
1226 ljust = 1;
1227 f++;
1228 }
1229 // flags '0' and '+' are recognized but skipped
1230 if (f[0] == '0') {
1231 f++;
1232 if (f[0] == '+')
1233 f++;
1234 }
1235 if (f[0] == '+') {
1236 f++;
1237 if (f[0] == '0')
1238 f++;
1239 }
1240 if (isdigit((uschar)f[0])) { /* there is a wid */
1241 wid = strtol(f, &f, 10);
1242 }
1243 if (f[0] == '.') { /* there is a .prec */
1244 prec = strtol(++f, &f, 10);
1245 }
1246 if (prec > u8_strlen(t))
1247 prec = u8_strlen(t);
1248 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1249 int i, precb;
1250
1251 if (ljust) { // print prec chars from t, then pad blanks
1252 precb = u8_char2byte(t, prec);
1253 for (i = 0; i < precb; i++) {
1254 //putchar(t[i]);
1255 *p++ = t[i];
1256 }
1257 for (i = 0; i < pad; i++) {
1258 //printf(" ");
1259 *p++ = ' ';
1260 }
1261 } else { // print pad blanks, then prec chars from t
1262 for (i = 0; i < pad; i++) {
1263 //printf(" ");
1264 *p++ = ' ';
1265 }
1266 precb = u8_char2byte(t, prec);
1267 for (i = 0; i < precb; i++) {
1268 //putchar(t[i]);
1269 *p++ = t[i];
1270 }
1271 }
1272 *p = 0;
1273 break;
1274 }
1275
1276 case 'c': {
1277 /*
1278 * If a numeric value is given, awk should just turn
1279 * it into a character and print it:
1280 * BEGIN { printf("%c\n", 65) }
1281 * prints "A".
1282 *
1283 * But what if the numeric value is > 128 and
1284 * represents a valid Unicode code point?!? We do
1285 * our best to convert it back into UTF-8. If we
1286 * can't, we output the encoding of the Unicode
1287 * "invalid character", 0xFFFD.
1288 */
1289 if (isnum(x)) {
1290 int charval = (int) getfval(x);
1291
1292 if (charval != 0) {
1293 if (charval < 128 || awk_mb_cur_max == 1)
1294 snprintf(p, BUFSZ(p), fmt, charval);
1295 else {
1296 // possible unicode character
1297 size_t count;
1298 char *bs = wide_char_to_byte_str(charval, &count);
1299
1300 if (bs == NULL) { // invalid character
1301 // use unicode invalid character, 0xFFFD
1302 static char invalid_char[] = "\357\277\275";
1303 bs = invalid_char;
1304 count = 3;
1305 }
1306 t = bs;
1307 n = count;
1308 goto format_percent_c;
1309 }
1310 } else {
1311 *p++ = '\0'; /* explicit null byte */
1312 *p = '\0'; /* next output will start here */
1313 }
1314 break;
1315 }
1316 t = getsval(x);
1317 n = u8_nextlen(t);
1318 format_percent_c:
1319 if (n < 2) { /* not utf8 */
1320 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1321 break;
1322 }
1323
1324 // utf8 character, almost same song and dance as for %s
1325 int ljust = 0, wid = 0, prec = n, pad = 0;
1326 char *f = fmt+1;
1327 if (f[0] == '-') {
1328 ljust = 1;
1329 f++;
1330 }
1331 // flags '0' and '+' are recognized but skipped
1332 if (f[0] == '0') {
1333 f++;
1334 if (f[0] == '+')
1335 f++;
1336 }
1337 if (f[0] == '+') {
1338 f++;
1339 if (f[0] == '0')
1340 f++;
1341 }
1342 if (isdigit((uschar)f[0])) { /* there is a wid */
1343 wid = strtol(f, &f, 10);
1344 }
1345 if (f[0] == '.') { /* there is a .prec */
1346 prec = strtol(++f, &f, 10);
1347 }
1348 if (prec > 1) // %c --> only one character
1349 prec = 1;
1350 pad = wid>prec ? wid - prec : 0; // has to be >= 0
1351 int i;
1352
1353 if (ljust) { // print one char from t, then pad blanks
1354 for (i = 0; i < n; i++)
1355 *p++ = t[i];
1356 for (i = 0; i < pad; i++) {
1357 //printf(" ");
1358 *p++ = ' ';
1359 }
1360 } else { // print pad blanks, then prec chars from t
1361 for (i = 0; i < pad; i++) {
1362 //printf(" ");
1363 *p++ = ' ';
1364 }
1365 for (i = 0; i < n; i++)
1366 *p++ = t[i];
1367 }
1368 *p = 0;
1369 break;
1370 }
1371 default:
1372 FATAL("can't happen: bad conversion %c in format()", flag);
1373 }
1374
1375 tempfree(x);
1376 p += strlen(p);
1377 s++;
1378 }
1379 *p = '\0';
1380 free(fmt);
1381 for ( ; a; a = a->nnext) { /* evaluate any remaining args */
1382 x = execute(a);
1383 tempfree(x);
1384 }
1385 *pbuf = buf;
1386 *pbufsize = bufsize;
1387 return p - buf;
1388 }
1389
awksprintf(Node ** a,int n)1390 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */
1391 {
1392 Cell *x;
1393 Node *y;
1394 char *buf;
1395 int bufsz=3*recsize;
1396
1397 if ((buf = (char *) malloc(bufsz)) == NULL)
1398 FATAL("out of memory in awksprintf");
1399 y = a[0]->nnext;
1400 x = execute(a[0]);
1401 if (format(&buf, &bufsz, getsval(x), y) == -1)
1402 FATAL("sprintf string %.30s... too long. can't happen.", buf);
1403 tempfree(x);
1404 x = gettemp();
1405 x->sval = buf;
1406 x->tval = STR;
1407 return(x);
1408 }
1409
awkprintf(Node ** a,int n)1410 Cell *awkprintf(Node **a, int n) /* printf */
1411 { /* a[0] is list of args, starting with format string */
1412 /* a[1] is redirection operator, a[2] is redirection file */
1413 FILE *fp;
1414 Cell *x;
1415 Node *y;
1416 char *buf;
1417 int len;
1418 int bufsz=3*recsize;
1419
1420 if ((buf = (char *) malloc(bufsz)) == NULL)
1421 FATAL("out of memory in awkprintf");
1422 y = a[0]->nnext;
1423 x = execute(a[0]);
1424 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1425 FATAL("printf string %.30s... too long. can't happen.", buf);
1426 tempfree(x);
1427 if (a[1] == NULL) {
1428 /* fputs(buf, stdout); */
1429 fwrite(buf, len, 1, stdout);
1430 if (ferror(stdout))
1431 FATAL("write error on stdout");
1432 } else {
1433 fp = redirect(ptoi(a[1]), a[2]);
1434 /* fputs(buf, fp); */
1435 fwrite(buf, len, 1, fp);
1436 fflush(fp);
1437 if (ferror(fp))
1438 FATAL("write error on %s", filename(fp));
1439 }
1440 free(buf);
1441 return(True);
1442 }
1443
arith(Node ** a,int n)1444 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */
1445 {
1446 Awkfloat i, j = 0;
1447 double v;
1448 Cell *x, *y, *z;
1449
1450 x = execute(a[0]);
1451 i = getfval(x);
1452 tempfree(x);
1453 if (n != UMINUS && n != UPLUS) {
1454 y = execute(a[1]);
1455 j = getfval(y);
1456 tempfree(y);
1457 }
1458 z = gettemp();
1459 switch (n) {
1460 case ADD:
1461 i += j;
1462 break;
1463 case MINUS:
1464 i -= j;
1465 break;
1466 case MULT:
1467 i *= j;
1468 break;
1469 case DIVIDE:
1470 if (j == 0)
1471 FATAL("division by zero");
1472 i /= j;
1473 break;
1474 case MOD:
1475 if (j == 0)
1476 FATAL("division by zero in mod");
1477 modf(i/j, &v);
1478 i = i - j * v;
1479 break;
1480 case UMINUS:
1481 i = -i;
1482 break;
1483 case UPLUS: /* handled by getfval(), above */
1484 break;
1485 case POWER:
1486 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */
1487 i = ipow(i, (int) j);
1488 else {
1489 errno = 0;
1490 i = errcheck(pow(i, j), "pow");
1491 }
1492 break;
1493 default: /* can't happen */
1494 FATAL("illegal arithmetic operator %d", n);
1495 }
1496 setfval(z, i);
1497 return(z);
1498 }
1499
ipow(double x,int n)1500 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */
1501 {
1502 double v;
1503
1504 if (n <= 0)
1505 return 1;
1506 v = ipow(x, n/2);
1507 if (n % 2 == 0)
1508 return v * v;
1509 else
1510 return x * v * v;
1511 }
1512
incrdecr(Node ** a,int n)1513 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */
1514 {
1515 Cell *x, *z;
1516 int k;
1517 Awkfloat xf;
1518
1519 x = execute(a[0]);
1520 xf = getfval(x);
1521 k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1522 if (n == PREINCR || n == PREDECR) {
1523 setfval(x, xf + k);
1524 return(x);
1525 }
1526 z = gettemp();
1527 setfval(z, xf);
1528 setfval(x, xf + k);
1529 tempfree(x);
1530 return(z);
1531 }
1532
assign(Node ** a,int n)1533 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */
1534 { /* this is subtle; don't muck with it. */
1535 Cell *x, *y;
1536 Awkfloat xf, yf;
1537 double v;
1538
1539 y = execute(a[1]);
1540 x = execute(a[0]);
1541 if (n == ASSIGN) { /* ordinary assignment */
1542 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1543 ; /* self-assignment: leave alone unless it's a field or NF */
1544 else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1545 yf = getfval(y);
1546 setsval(x, getsval(y));
1547 x->fval = yf;
1548 x->tval |= NUM;
1549 }
1550 else if (isstr(y))
1551 setsval(x, getsval(y));
1552 else if (isnum(y))
1553 setfval(x, getfval(y));
1554 else
1555 funnyvar(y, "read value of");
1556 tempfree(y);
1557 return(x);
1558 }
1559 xf = getfval(x);
1560 yf = getfval(y);
1561 switch (n) {
1562 case ADDEQ:
1563 xf += yf;
1564 break;
1565 case SUBEQ:
1566 xf -= yf;
1567 break;
1568 case MULTEQ:
1569 xf *= yf;
1570 break;
1571 case DIVEQ:
1572 if (yf == 0)
1573 FATAL("division by zero in /=");
1574 xf /= yf;
1575 break;
1576 case MODEQ:
1577 if (yf == 0)
1578 FATAL("division by zero in %%=");
1579 modf(xf/yf, &v);
1580 xf = xf - yf * v;
1581 break;
1582 case POWEQ:
1583 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */
1584 xf = ipow(xf, (int) yf);
1585 else {
1586 errno = 0;
1587 xf = errcheck(pow(xf, yf), "pow");
1588 }
1589 break;
1590 default:
1591 FATAL("illegal assignment operator %d", n);
1592 break;
1593 }
1594 tempfree(y);
1595 setfval(x, xf);
1596 return(x);
1597 }
1598
cat(Node ** a,int q)1599 Cell *cat(Node **a, int q) /* a[0] cat a[1] */
1600 {
1601 Cell *x, *y, *z;
1602 int n1, n2;
1603 char *s = NULL;
1604 int ssz = 0;
1605
1606 x = execute(a[0]);
1607 n1 = strlen(getsval(x));
1608 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1609 memcpy(s, x->sval, n1);
1610
1611 tempfree(x);
1612
1613 y = execute(a[1]);
1614 n2 = strlen(getsval(y));
1615 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1616 memcpy(s + n1, y->sval, n2);
1617 s[n1 + n2] = '\0';
1618
1619 tempfree(y);
1620
1621 z = gettemp();
1622 z->sval = s;
1623 z->tval = STR;
1624
1625 return(z);
1626 }
1627
pastat(Node ** a,int n)1628 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */
1629 {
1630 Cell *x;
1631
1632 if (a[0] == NULL)
1633 x = execute(a[1]);
1634 else {
1635 x = execute(a[0]);
1636 if (istrue(x)) {
1637 tempfree(x);
1638 x = execute(a[1]);
1639 }
1640 }
1641 return x;
1642 }
1643
dopa2(Node ** a,int n)1644 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */
1645 {
1646 Cell *x;
1647 int pair;
1648
1649 pair = ptoi(a[3]);
1650 if (pairstack[pair] == 0) {
1651 x = execute(a[0]);
1652 if (istrue(x))
1653 pairstack[pair] = 1;
1654 tempfree(x);
1655 }
1656 if (pairstack[pair] == 1) {
1657 x = execute(a[1]);
1658 if (istrue(x))
1659 pairstack[pair] = 0;
1660 tempfree(x);
1661 x = execute(a[2]);
1662 return(x);
1663 }
1664 return(False);
1665 }
1666
split(Node ** a,int nnn)1667 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
1668 {
1669 Cell *x = NULL, *y, *ap;
1670 const char *s, *origs, *t;
1671 const char *fs = NULL;
1672 char *origfs = NULL;
1673 int sep;
1674 char temp, num[50];
1675 int j, n, tempstat, arg3type;
1676 double result;
1677
1678 y = execute(a[0]); /* source string */
1679 origs = s = strdup(getsval(y));
1680 if (s == NULL)
1681 FATAL("out of space in split");
1682 tempfree(y);
1683 arg3type = ptoi(a[3]);
1684 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
1685 fs = getsval(fsloc);
1686 } else if (arg3type == STRING) { /* split(str,arr,"string") */
1687 x = execute(a[2]);
1688 fs = origfs = strdup(getsval(x));
1689 if (fs == NULL)
1690 FATAL("out of space in split");
1691 tempfree(x);
1692 } else if (arg3type == REGEXPR) {
1693 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
1694 } else {
1695 FATAL("illegal type of split");
1696 }
1697 sep = *fs;
1698 ap = execute(a[1]); /* array name */
1699 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1700 freesymtab(ap);
1701 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1702 ap->tval &= ~STR;
1703 ap->tval |= ARR;
1704 ap->sval = (char *) makesymtab(NSYMTAB);
1705
1706 n = 0;
1707 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1708 /* split(s, a, //); have to arrange that it looks like empty sep */
1709 arg3type = 0;
1710 fs = "";
1711 sep = 0;
1712 }
1713 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */
1714 fa *pfa;
1715 if (arg3type == REGEXPR) { /* it's ready already */
1716 pfa = (fa *) a[2];
1717 } else {
1718 pfa = makedfa(fs, 1);
1719 }
1720 if (nematch(pfa,s)) {
1721 tempstat = pfa->initstat;
1722 pfa->initstat = 2;
1723 do {
1724 n++;
1725 snprintf(num, sizeof(num), "%d", n);
1726 temp = *patbeg;
1727 setptr(patbeg, '\0');
1728 if (is_number(s, & result))
1729 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1730 else
1731 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1732 setptr(patbeg, temp);
1733 s = patbeg + patlen;
1734 if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1735 n++;
1736 snprintf(num, sizeof(num), "%d", n);
1737 setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1738 pfa->initstat = tempstat;
1739 goto spdone;
1740 }
1741 } while (nematch(pfa,s));
1742 pfa->initstat = tempstat; /* bwk: has to be here to reset */
1743 /* cf gsub and refldbld */
1744 }
1745 n++;
1746 snprintf(num, sizeof(num), "%d", n);
1747 if (is_number(s, & result))
1748 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1749 else
1750 setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1751 spdone:
1752 pfa = NULL;
1753
1754 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
1755 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1756 for (;;) {
1757 char *fr = newt;
1758 n++;
1759 if (*s == '"' ) { /* start of "..." */
1760 for (s++ ; *s != '\0'; ) {
1761 if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1762 s += 2; /* doubled quote */
1763 *fr++ = '"';
1764 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1765 s++; /* skip over closing quote */
1766 break;
1767 } else {
1768 *fr++ = *s++;
1769 }
1770 }
1771 *fr++ = 0;
1772 } else { /* unquoted field */
1773 while (*s != ',' && *s != '\0')
1774 *fr++ = *s++;
1775 *fr++ = 0;
1776 }
1777 snprintf(num, sizeof(num), "%d", n);
1778 if (is_number(newt, &result))
1779 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1780 else
1781 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1782 if (*s++ == '\0')
1783 break;
1784 }
1785 free(newt);
1786
1787 } else if (!CSV && sep == ' ') { /* usual case: split on white space */
1788 for (n = 0; ; ) {
1789 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
1790 while (ISWS(*s))
1791 s++;
1792 if (*s == '\0')
1793 break;
1794 n++;
1795 t = s;
1796 do
1797 s++;
1798 while (*s != '\0' && !ISWS(*s));
1799 temp = *s;
1800 setptr(s, '\0');
1801 snprintf(num, sizeof(num), "%d", n);
1802 if (is_number(t, & result))
1803 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1804 else
1805 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1806 setptr(s, temp);
1807 if (*s != '\0')
1808 s++;
1809 }
1810
1811 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
1812 for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1813 char buf[10];
1814 n++;
1815 snprintf(num, sizeof(num), "%d", n);
1816
1817 for (j = 0; j < u8_nextlen(s); j++) {
1818 buf[j] = s[j];
1819 }
1820 buf[j] = '\0';
1821
1822 if (isdigit((uschar)buf[0]))
1823 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1824 else
1825 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1826 }
1827
1828 } else if (*s != '\0') { /* some random single character */
1829 for (;;) {
1830 n++;
1831 t = s;
1832 while (*s != sep && *s != '\0')
1833 s++;
1834 temp = *s;
1835 setptr(s, '\0');
1836 snprintf(num, sizeof(num), "%d", n);
1837 if (is_number(t, & result))
1838 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1839 else
1840 setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1841 setptr(s, temp);
1842 if (*s++ == '\0')
1843 break;
1844 }
1845 }
1846 tempfree(ap);
1847 xfree(origs);
1848 xfree(origfs);
1849 x = gettemp();
1850 x->tval = NUM;
1851 x->fval = n;
1852 return(x);
1853 }
1854
condexpr(Node ** a,int n)1855 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */
1856 {
1857 Cell *x;
1858
1859 x = execute(a[0]);
1860 if (istrue(x)) {
1861 tempfree(x);
1862 x = execute(a[1]);
1863 } else {
1864 tempfree(x);
1865 x = execute(a[2]);
1866 }
1867 return(x);
1868 }
1869
ifstat(Node ** a,int n)1870 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */
1871 {
1872 Cell *x;
1873
1874 x = execute(a[0]);
1875 if (istrue(x)) {
1876 tempfree(x);
1877 x = execute(a[1]);
1878 } else if (a[2] != NULL) {
1879 tempfree(x);
1880 x = execute(a[2]);
1881 }
1882 return(x);
1883 }
1884
whilestat(Node ** a,int n)1885 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */
1886 {
1887 Cell *x;
1888
1889 for (;;) {
1890 x = execute(a[0]);
1891 if (!istrue(x))
1892 return(x);
1893 tempfree(x);
1894 x = execute(a[1]);
1895 if (isbreak(x)) {
1896 x = True;
1897 return(x);
1898 }
1899 if (isnext(x) || isexit(x) || isret(x))
1900 return(x);
1901 tempfree(x);
1902 }
1903 }
1904
dostat(Node ** a,int n)1905 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */
1906 {
1907 Cell *x;
1908
1909 for (;;) {
1910 x = execute(a[0]);
1911 if (isbreak(x))
1912 return True;
1913 if (isnext(x) || isexit(x) || isret(x))
1914 return(x);
1915 tempfree(x);
1916 x = execute(a[1]);
1917 if (!istrue(x))
1918 return(x);
1919 tempfree(x);
1920 }
1921 }
1922
forstat(Node ** a,int n)1923 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */
1924 {
1925 Cell *x;
1926
1927 x = execute(a[0]);
1928 tempfree(x);
1929 for (;;) {
1930 if (a[1]!=NULL) {
1931 x = execute(a[1]);
1932 if (!istrue(x)) return(x);
1933 else tempfree(x);
1934 }
1935 x = execute(a[3]);
1936 if (isbreak(x)) /* turn off break */
1937 return True;
1938 if (isnext(x) || isexit(x) || isret(x))
1939 return(x);
1940 tempfree(x);
1941 x = execute(a[2]);
1942 tempfree(x);
1943 }
1944 }
1945
instat(Node ** a,int n)1946 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */
1947 {
1948 Cell *x, *vp, *arrayp, *cp, *ncp;
1949 Array *tp;
1950 int i;
1951
1952 vp = execute(a[0]);
1953 arrayp = execute(a[1]);
1954 if (!isarr(arrayp)) {
1955 return True;
1956 }
1957 tp = (Array *) arrayp->sval;
1958 tempfree(arrayp);
1959 for (i = 0; i < tp->size; i++) { /* this routine knows too much */
1960 for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1961 setsval(vp, cp->nval);
1962 ncp = cp->cnext;
1963 x = execute(a[2]);
1964 if (isbreak(x)) {
1965 tempfree(vp);
1966 return True;
1967 }
1968 if (isnext(x) || isexit(x) || isret(x)) {
1969 tempfree(vp);
1970 return(x);
1971 }
1972 tempfree(x);
1973 }
1974 }
1975 return True;
1976 }
1977
nawk_convert(const char * s,int (* fun_c)(int),wint_t (* fun_wc)(wint_t))1978 static char *nawk_convert(const char *s, int (*fun_c)(int),
1979 wint_t (*fun_wc)(wint_t))
1980 {
1981 char *buf = NULL;
1982 char *pbuf = NULL;
1983 const char *ps = NULL;
1984 size_t n = 0;
1985 wchar_t wc;
1986 const size_t sz = awk_mb_cur_max;
1987
1988 if (sz == 1) {
1989 buf = tostring(s);
1990
1991 for (pbuf = buf; *pbuf; pbuf++)
1992 *pbuf = fun_c((uschar)*pbuf);
1993
1994 return buf;
1995 } else {
1996 /* upper/lower character may be shorter/longer */
1997 buf = tostringN(s, strlen(s) * sz + 1);
1998
1999 /* reset internal state */
2000 if (mbtowc(NULL, NULL, 0) == -1 || wctomb(NULL, L'\0') == -1)
2001 FATAL("unable to reset character conversion state");
2002
2003 ps = s;
2004 pbuf = buf;
2005 while (n = mbtowc(&wc, ps, sz),
2006 n > 0 && n != (size_t)-1 && n != (size_t)-2)
2007 {
2008 ps += n;
2009
2010 n = wctomb(pbuf, fun_wc(wc));
2011 if (n == (size_t)-1)
2012 FATAL("illegal wide character %s", s);
2013
2014 pbuf += n;
2015 }
2016
2017 *pbuf = '\0';
2018
2019 if (n)
2020 FATAL("illegal byte sequence %s", s);
2021
2022 return buf;
2023 }
2024 }
2025
2026 #ifdef __DJGPP__
towupper(wint_t wc)2027 static wint_t towupper(wint_t wc)
2028 {
2029 if (wc >= 0 && wc < 256)
2030 return toupper(wc & 0xFF);
2031
2032 return wc;
2033 }
2034
towlower(wint_t wc)2035 static wint_t towlower(wint_t wc)
2036 {
2037 if (wc >= 0 && wc < 256)
2038 return tolower(wc & 0xFF);
2039
2040 return wc;
2041 }
2042 #endif
2043
nawk_toupper(const char * s)2044 static char *nawk_toupper(const char *s)
2045 {
2046 return nawk_convert(s, toupper, towupper);
2047 }
2048
nawk_tolower(const char * s)2049 static char *nawk_tolower(const char *s)
2050 {
2051 return nawk_convert(s, tolower, towlower);
2052 }
2053
bltin(Node ** a,int n)2054 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */
2055 {
2056 Cell *x, *y;
2057 Awkfloat u = 0;
2058 int t, sz;
2059 Awkfloat tmp;
2060 char *buf, *fmt;
2061 Node *nextarg;
2062 FILE *fp;
2063 int status = 0;
2064 time_t tv;
2065 struct tm *tm, tmbuf;
2066 int estatus = 0;
2067
2068 t = ptoi(a[0]);
2069 x = execute(a[1]);
2070 nextarg = a[1]->nnext;
2071 switch (t) {
2072 case FLENGTH:
2073 if (isarr(x))
2074 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
2075 else
2076 u = u8_strlen(getsval(x));
2077 break;
2078 case FLOG:
2079 errno = 0;
2080 u = errcheck(log(getfval(x)), "log");
2081 break;
2082 case FINT:
2083 modf(getfval(x), &u); break;
2084 case FEXP:
2085 errno = 0;
2086 u = errcheck(exp(getfval(x)), "exp");
2087 break;
2088 case FSQRT:
2089 errno = 0;
2090 u = errcheck(sqrt(getfval(x)), "sqrt");
2091 break;
2092 case FSIN:
2093 u = sin(getfval(x)); break;
2094 case FCOS:
2095 u = cos(getfval(x)); break;
2096 case FATAN:
2097 if (nextarg == NULL) {
2098 WARNING("atan2 requires two arguments; returning 1.0");
2099 u = 1.0;
2100 } else {
2101 y = execute(a[1]->nnext);
2102 u = atan2(getfval(x), getfval(y));
2103 tempfree(y);
2104 nextarg = nextarg->nnext;
2105 }
2106 break;
2107 case FCOMPL:
2108 u = ~((int)getfval(x));
2109 break;
2110 case FAND:
2111 if (nextarg == 0) {
2112 WARNING("and requires two arguments; returning 0");
2113 u = 0;
2114 break;
2115 }
2116 y = execute(a[1]->nnext);
2117 u = ((int)getfval(x)) & ((int)getfval(y));
2118 tempfree(y);
2119 nextarg = nextarg->nnext;
2120 break;
2121 case FFOR:
2122 if (nextarg == 0) {
2123 WARNING("or requires two arguments; returning 0");
2124 u = 0;
2125 break;
2126 }
2127 y = execute(a[1]->nnext);
2128 u = ((int)getfval(x)) | ((int)getfval(y));
2129 tempfree(y);
2130 nextarg = nextarg->nnext;
2131 break;
2132 case FXOR:
2133 if (nextarg == 0) {
2134 WARNING("xor requires two arguments; returning 0");
2135 u = 0;
2136 break;
2137 }
2138 y = execute(a[1]->nnext);
2139 u = ((int)getfval(x)) ^ ((int)getfval(y));
2140 tempfree(y);
2141 nextarg = nextarg->nnext;
2142 break;
2143 case FLSHIFT:
2144 if (nextarg == 0) {
2145 WARNING("lshift requires two arguments; returning 0");
2146 u = 0;
2147 break;
2148 }
2149 y = execute(a[1]->nnext);
2150 u = ((int)getfval(x)) << ((int)getfval(y));
2151 tempfree(y);
2152 nextarg = nextarg->nnext;
2153 break;
2154 case FRSHIFT:
2155 if (nextarg == 0) {
2156 WARNING("rshift requires two arguments; returning 0");
2157 u = 0;
2158 break;
2159 }
2160 y = execute(a[1]->nnext);
2161 u = ((int)getfval(x)) >> ((int)getfval(y));
2162 tempfree(y);
2163 nextarg = nextarg->nnext;
2164 break;
2165 case FSYSTEM:
2166 fflush(stdout); /* in case something is buffered already */
2167 estatus = status = system(getsval(x));
2168 if (status != -1) {
2169 if (WIFEXITED(status)) {
2170 estatus = WEXITSTATUS(status);
2171 } else if (WIFSIGNALED(status)) {
2172 estatus = WTERMSIG(status) + 256;
2173 #ifdef WCOREDUMP
2174 if (WCOREDUMP(status))
2175 estatus += 256;
2176 #endif
2177 } else /* something else?!? */
2178 estatus = 0;
2179 }
2180 /* else estatus was set to -1 */
2181 u = estatus;
2182 break;
2183 case FRAND:
2184 /* random() returns numbers in [0..2^31-1]
2185 * in order to get a number in [0, 1), divide it by 2^31
2186 */
2187 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
2188 break;
2189 case FSRAND:
2190 if (isrec(x)) { /* no argument provided */
2191 u = time(NULL);
2192 tmp = u;
2193 srandom((unsigned int) u);
2194 } else {
2195 u = getfval(x);
2196 tmp = u;
2197 srandom_deterministic((unsigned int) u);
2198 }
2199 u = srand_seed;
2200 srand_seed = tmp;
2201 break;
2202 case FTOUPPER:
2203 case FTOLOWER:
2204 if (t == FTOUPPER)
2205 buf = nawk_toupper(getsval(x));
2206 else
2207 buf = nawk_tolower(getsval(x));
2208 tempfree(x);
2209 x = gettemp();
2210 setsval(x, buf);
2211 free(buf);
2212 return x;
2213 case FFLUSH:
2214 if (isrec(x) || strlen(getsval(x)) == 0) {
2215 flush_all(); /* fflush() or fflush("") -> all */
2216 u = 0;
2217 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2218 u = EOF;
2219 else
2220 u = fflush(fp);
2221 break;
2222 case FMKTIME:
2223 memset(&tmbuf, 0, sizeof(tmbuf));
2224 tm = &tmbuf;
2225 t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2226 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2227 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2228 switch (t) {
2229 case 6:
2230 tm->tm_isdst = -1; /* let mktime figure it out */
2231 /* FALLTHROUGH */
2232 case 7:
2233 tm->tm_year -= 1900;
2234 tm->tm_mon--;
2235 u = mktime(tm);
2236 break;
2237 default:
2238 u = -1;
2239 break;
2240 }
2241 break;
2242 case FSYSTIME:
2243 u = time((time_t *) 0);
2244 break;
2245 case FSTRFTIME:
2246 /* strftime([format [,timestamp]]) */
2247 if (nextarg) {
2248 y = execute(nextarg);
2249 nextarg = nextarg->nnext;
2250 tv = (time_t) getfval(y);
2251 tempfree(y);
2252 } else
2253 tv = time((time_t *) 0);
2254 tm = localtime(&tv);
2255 if (tm == NULL)
2256 FATAL("bad time %ld", (long)tv);
2257
2258 if (isrec(x)) {
2259 /* format argument not provided, use default */
2260 fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2261 } else
2262 fmt = tostring(getsval(x));
2263
2264 sz = 32;
2265 buf = NULL;
2266 do {
2267 if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL)
2268 FATAL("out of memory in strftime");
2269 sz *= 2;
2270 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2271
2272 y = gettemp();
2273 setsval(y, buf);
2274 free(fmt);
2275 free(buf);
2276
2277 return y;
2278 default: /* can't happen */
2279 FATAL("illegal function type %d", t);
2280 break;
2281 }
2282 tempfree(x);
2283 x = gettemp();
2284 setfval(x, u);
2285 if (nextarg != NULL) {
2286 WARNING("warning: function has too many arguments");
2287 for ( ; nextarg; nextarg = nextarg->nnext) {
2288 y = execute(nextarg);
2289 tempfree(y);
2290 }
2291 }
2292 return(x);
2293 }
2294
printstat(Node ** a,int n)2295 Cell *printstat(Node **a, int n) /* print a[0] */
2296 {
2297 Node *x;
2298 Cell *y;
2299 FILE *fp;
2300
2301 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */
2302 fp = stdout;
2303 else
2304 fp = redirect(ptoi(a[1]), a[2]);
2305 for (x = a[0]; x != NULL; x = x->nnext) {
2306 y = execute(x);
2307 fputs(getpssval(y), fp);
2308 tempfree(y);
2309 if (x->nnext == NULL)
2310 fputs(getsval(orsloc), fp);
2311 else
2312 fputs(getsval(ofsloc), fp);
2313 }
2314 if (a[1] != NULL)
2315 fflush(fp);
2316 if (ferror(fp))
2317 FATAL("write error on %s", filename(fp));
2318 return(True);
2319 }
2320
nullproc(Node ** a,int n)2321 Cell *nullproc(Node **a, int n)
2322 {
2323 return 0;
2324 }
2325
2326
redirect(int a,Node * b)2327 FILE *redirect(int a, Node *b) /* set up all i/o redirections */
2328 {
2329 FILE *fp;
2330 Cell *x;
2331 char *fname;
2332
2333 x = execute(b);
2334 fname = getsval(x);
2335 fp = openfile(a, fname, NULL);
2336 if (fp == NULL)
2337 FATAL("can't open file %s", fname);
2338 tempfree(x);
2339 return fp;
2340 }
2341
2342 struct files {
2343 FILE *fp;
2344 const char *fname;
2345 int mode; /* '|', 'a', 'w' => LE/LT, GT */
2346 } *files;
2347
2348 size_t nfiles;
2349
stdinit(void)2350 static void stdinit(void) /* in case stdin, etc., are not constants */
2351 {
2352 nfiles = FOPEN_MAX;
2353 files = (struct files *) calloc(nfiles, sizeof(*files));
2354 if (files == NULL)
2355 FATAL("can't allocate file memory for %zu files", nfiles);
2356 files[0].fp = stdin;
2357 files[0].fname = tostring("/dev/stdin");
2358 files[0].mode = LT;
2359 files[1].fp = stdout;
2360 files[1].fname = tostring("/dev/stdout");
2361 files[1].mode = GT;
2362 files[2].fp = stderr;
2363 files[2].fname = tostring("/dev/stderr");
2364 files[2].mode = GT;
2365 }
2366
openfile(int a,const char * us,bool * pnewflag)2367 FILE *openfile(int a, const char *us, bool *pnewflag)
2368 {
2369 const char *s = us;
2370 size_t i;
2371 int m;
2372 FILE *fp = NULL;
2373
2374 if (*s == '\0')
2375 FATAL("null file name in print or getline");
2376 for (i = 0; i < nfiles; i++)
2377 if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2378 (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2379 a == FFLUSH)) {
2380 if (pnewflag)
2381 *pnewflag = false;
2382 return files[i].fp;
2383 }
2384 if (a == FFLUSH) /* didn't find it, so don't create it! */
2385 return NULL;
2386
2387 for (i = 0; i < nfiles; i++)
2388 if (files[i].fp == NULL)
2389 break;
2390 if (i >= nfiles) {
2391 struct files *nf;
2392 size_t nnf = nfiles + FOPEN_MAX;
2393 nf = (struct files *) reallocarray(files, nnf, sizeof(*nf));
2394 if (nf == NULL)
2395 FATAL("cannot grow files for %s and %zu files", s, nnf);
2396 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2397 nfiles = nnf;
2398 files = nf;
2399 }
2400 fflush(stdout); /* force a semblance of order */
2401 m = a;
2402 if (a == GT) {
2403 fp = fopen(s, "w");
2404 } else if (a == APPEND) {
2405 fp = fopen(s, "a");
2406 m = GT; /* so can mix > and >> */
2407 } else if (a == '|') { /* output pipe */
2408 fp = popen(s, "w");
2409 } else if (a == LE) { /* input pipe */
2410 fp = popen(s, "r");
2411 } else if (a == LT) { /* getline <file */
2412 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */
2413 } else /* can't happen */
2414 FATAL("illegal redirection %d", a);
2415 if (fp != NULL) {
2416 files[i].fname = tostring(s);
2417 files[i].fp = fp;
2418 files[i].mode = m;
2419 if (pnewflag)
2420 *pnewflag = true;
2421 if (fp != stdin && fp != stdout && fp != stderr)
2422 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2423 }
2424 return fp;
2425 }
2426
filename(FILE * fp)2427 const char *filename(FILE *fp)
2428 {
2429 size_t i;
2430
2431 for (i = 0; i < nfiles; i++)
2432 if (fp == files[i].fp)
2433 return files[i].fname;
2434 return "???";
2435 }
2436
closefile(Node ** a,int n)2437 Cell *closefile(Node **a, int n)
2438 {
2439 Cell *x;
2440 size_t i;
2441 bool stat;
2442
2443 x = execute(a[0]);
2444 getsval(x);
2445 stat = true;
2446 for (i = 0; i < nfiles; i++) {
2447 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2448 continue;
2449 if (files[i].mode == GT || files[i].mode == '|')
2450 fflush(files[i].fp);
2451 if (ferror(files[i].fp)) {
2452 if ((files[i].mode == GT && files[i].fp != stderr)
2453 || files[i].mode == '|')
2454 FATAL("write error on %s", files[i].fname);
2455 else
2456 WARNING("i/o error occurred on %s", files[i].fname);
2457 }
2458 if (files[i].fp == stdin || files[i].fp == stdout ||
2459 files[i].fp == stderr)
2460 stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2461 else if (files[i].mode == '|' || files[i].mode == LE)
2462 stat = pclose(files[i].fp) == -1;
2463 else
2464 stat = fclose(files[i].fp) == EOF;
2465 if (stat)
2466 WARNING("i/o error occurred closing %s", files[i].fname);
2467 xfree(files[i].fname);
2468 files[i].fname = NULL; /* watch out for ref thru this */
2469 files[i].fp = NULL;
2470 break;
2471 }
2472 tempfree(x);
2473 x = gettemp();
2474 setfval(x, (Awkfloat) (stat ? -1 : 0));
2475 return(x);
2476 }
2477
closeall(void)2478 void closeall(void)
2479 {
2480 size_t i;
2481 bool stat = false;
2482
2483 for (i = 0; i < nfiles; i++) {
2484 if (! files[i].fp)
2485 continue;
2486 if (files[i].mode == GT || files[i].mode == '|')
2487 fflush(files[i].fp);
2488 if (ferror(files[i].fp)) {
2489 if ((files[i].mode == GT && files[i].fp != stderr)
2490 || files[i].mode == '|')
2491 FATAL("write error on %s", files[i].fname);
2492 else
2493 WARNING("i/o error occurred on %s", files[i].fname);
2494 }
2495 if (files[i].fp == stdin || files[i].fp == stdout ||
2496 files[i].fp == stderr)
2497 continue;
2498 if (files[i].mode == '|' || files[i].mode == LE)
2499 stat = pclose(files[i].fp) == -1;
2500 else
2501 stat = fclose(files[i].fp) == EOF;
2502 if (stat)
2503 WARNING("i/o error occurred while closing %s", files[i].fname);
2504 }
2505 }
2506
flush_all(void)2507 static void flush_all(void)
2508 {
2509 size_t i;
2510
2511 for (i = 0; i < nfiles; i++)
2512 if (files[i].fp)
2513 fflush(files[i].fp);
2514 }
2515
2516 void backsub(char **pb_ptr, const char **sptr_ptr);
2517
dosub(Node ** a,int subop)2518 Cell *dosub(Node **a, int subop) /* sub and gsub */
2519 {
2520 fa *pfa;
2521 int tempstat = 0;
2522 char *repl;
2523 Cell *x;
2524
2525 char *buf = NULL;
2526 char *pb = NULL;
2527 int bufsz = recsize;
2528
2529 const char *r, *s;
2530 const char *start;
2531 const char *noempty = NULL; /* empty match disallowed here */
2532 size_t m = 0; /* match count */
2533 size_t whichm = 0; /* which match to select, 0 = global */
2534 int mtype; /* match type */
2535
2536 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */
2537 pfa = (fa *) a[1];
2538 } else {
2539 x = execute(a[1]);
2540 pfa = makedfa(getsval(x), 1);
2541 tempfree(x);
2542 }
2543
2544 x = execute(a[2]); /* replacement string */
2545 repl = tostring(getsval(x));
2546 tempfree(x);
2547
2548 switch (subop) {
2549 case SUB:
2550 whichm = 1;
2551 x = execute(a[3]); /* source string */
2552 break;
2553 case GSUB:
2554 whichm = 0;
2555 x = execute(a[3]); /* source string */
2556 break;
2557 default:
2558 FATAL("dosub: unrecognized subop: %d", subop);
2559 }
2560
2561 start = getsval(x);
2562 while (pmatch(pfa, start)) {
2563 if (buf == NULL) {
2564 if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2565 FATAL("out of memory in dosub");
2566 tempstat = pfa->initstat;
2567 pfa->initstat = 2;
2568 }
2569
2570 /* match types */
2571 #define MT_IGNORE 0 /* unselected or invalid */
2572 #define MT_INSERT 1 /* selected, empty */
2573 #define MT_REPLACE 2 /* selected, not empty */
2574
2575 /* an empty match just after replacement is invalid */
2576
2577 if (patbeg == noempty && patlen == 0) {
2578 mtype = MT_IGNORE; /* invalid, not counted */
2579 } else if (whichm == ++m || whichm == 0) {
2580 mtype = patlen ? MT_REPLACE : MT_INSERT;
2581 } else {
2582 mtype = MT_IGNORE; /* unselected, but counted */
2583 }
2584
2585 /* leading text: */
2586 if (patbeg > start) {
2587 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2588 recsize, &pb, "dosub");
2589 s = start;
2590 while (s < patbeg)
2591 *pb++ = *s++;
2592 }
2593
2594 if (mtype == MT_IGNORE)
2595 goto matching_text; /* skip replacement text */
2596
2597 r = repl;
2598 while (*r != 0) {
2599 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2600 if (*r == '\\') {
2601 backsub(&pb, &r);
2602 } else if (*r == '&') {
2603 r++;
2604 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2605 &pb, "dosub");
2606 for (s = patbeg; s < patbeg+patlen; )
2607 *pb++ = *s++;
2608 } else {
2609 *pb++ = *r++;
2610 }
2611 }
2612
2613 matching_text:
2614 if (mtype == MT_REPLACE || *patbeg == '\0')
2615 goto next_search; /* skip matching text */
2616
2617 if (patlen == 0)
2618 patlen = u8_nextlen(patbeg);
2619 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2620 s = patbeg;
2621 while (s < patbeg + patlen)
2622 *pb++ = *s++;
2623
2624 next_search:
2625 start = patbeg + patlen;
2626 if (m == whichm || *patbeg == '\0')
2627 break;
2628 if (mtype == MT_REPLACE)
2629 noempty = start;
2630
2631 #undef MT_IGNORE
2632 #undef MT_INSERT
2633 #undef MT_REPLACE
2634 }
2635
2636 xfree(repl);
2637
2638 if (buf != NULL) {
2639 pfa->initstat = tempstat;
2640
2641 /* trailing text */
2642 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2643 while ((*pb++ = *start++) != '\0')
2644 ;
2645
2646 setsval(x, buf);
2647 free(buf);
2648 }
2649
2650 tempfree(x);
2651 x = gettemp();
2652 x->tval = NUM;
2653 x->fval = m;
2654 return x;
2655 }
2656
gensub(Node ** a,int nnn)2657 Cell *gensub(Node **a, int nnn) /* global selective substitute */
2658 /* XXX incomplete - doesn't support backreferences \0 ... \9 */
2659 {
2660 Cell *x, *y, *res, *h;
2661 char *rptr;
2662 const char *sptr;
2663 char *buf, *pb;
2664 const char *t, *q;
2665 fa *pfa;
2666 int mflag, tempstat, num, whichm;
2667 int bufsz = recsize;
2668
2669 if ((buf = (char *) malloc(bufsz)) == NULL)
2670 FATAL("out of memory in gensub");
2671 mflag = 0; /* if mflag == 0, can replace empty string */
2672 num = 0;
2673 x = execute(a[4]); /* source string */
2674 t = getsval(x);
2675 res = copycell(x); /* target string - initially copy of source */
2676 res->csub = CTEMP; /* result values are temporary */
2677 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
2678 pfa = (fa *) a[1]; /* regular expression */
2679 else {
2680 y = execute(a[1]);
2681 pfa = makedfa(getsval(y), 1);
2682 tempfree(y);
2683 }
2684 y = execute(a[2]); /* replacement string */
2685 h = execute(a[3]); /* which matches should be replaced */
2686 sptr = getsval(h);
2687 if (sptr[0] == 'g' || sptr[0] == 'G')
2688 whichm = -1;
2689 else {
2690 /*
2691 * The specified number is index of replacement, starting
2692 * from 1. GNU awk treats index lower than 0 same as
2693 * 1, we do same for compatibility.
2694 */
2695 whichm = (int) getfval(h) - 1;
2696 if (whichm < 0)
2697 whichm = 0;
2698 }
2699 tempfree(h);
2700
2701 if (pmatch(pfa, t)) {
2702 char *sl;
2703
2704 tempstat = pfa->initstat;
2705 pfa->initstat = 2;
2706 pb = buf;
2707 rptr = getsval(y);
2708 /*
2709 * XXX if there are any backreferences in subst string,
2710 * complain now.
2711 */
2712 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2713 if (strchr("0123456789", sl[1])) {
2714 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2715 }
2716 }
2717
2718 do {
2719 if (whichm >= 0 && whichm != num) {
2720 num++;
2721 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2722
2723 /* copy the part of string up to and including
2724 * match to output buffer */
2725 while (t < patbeg + patlen)
2726 *pb++ = *t++;
2727 continue;
2728 }
2729
2730 if (patlen == 0 && *patbeg != 0) { /* matched empty string */
2731 if (mflag == 0) { /* can replace empty */
2732 num++;
2733 sptr = rptr;
2734 while (*sptr != 0) {
2735 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2736 if (*sptr == '\\') {
2737 backsub(&pb, &sptr);
2738 } else if (*sptr == '&') {
2739 sptr++;
2740 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2741 for (q = patbeg; q < patbeg+patlen; )
2742 *pb++ = *q++;
2743 } else
2744 *pb++ = *sptr++;
2745 }
2746 }
2747 if (*t == 0) /* at end */
2748 goto done;
2749 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2750 *pb++ = *t++;
2751 if (pb > buf + bufsz) /* BUG: not sure of this test */
2752 FATAL("gensub result0 %.30s too big; can't happen", buf);
2753 mflag = 0;
2754 }
2755 else { /* matched nonempty string */
2756 num++;
2757 sptr = t;
2758 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2759 while (sptr < patbeg)
2760 *pb++ = *sptr++;
2761 sptr = rptr;
2762 while (*sptr != 0) {
2763 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2764 if (*sptr == '\\') {
2765 backsub(&pb, &sptr);
2766 } else if (*sptr == '&') {
2767 sptr++;
2768 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2769 for (q = patbeg; q < patbeg+patlen; )
2770 *pb++ = *q++;
2771 } else
2772 *pb++ = *sptr++;
2773 }
2774 t = patbeg + patlen;
2775 if (patlen == 0 || *t == 0 || *(t-1) == 0)
2776 goto done;
2777 if (pb > buf + bufsz)
2778 FATAL("gensub result1 %.30s too big; can't happen", buf);
2779 mflag = 1;
2780 }
2781 } while (pmatch(pfa,t));
2782 sptr = t;
2783 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2784 while ((*pb++ = *sptr++) != 0)
2785 ;
2786 done: if (pb > buf + bufsz)
2787 FATAL("gensub result2 %.30s too big; can't happen", buf);
2788 *pb = '\0';
2789 setsval(res, buf);
2790 pfa->initstat = tempstat;
2791 }
2792 tempfree(x);
2793 tempfree(y);
2794 free(buf);
2795 return(res);
2796 }
2797
backsub(char ** pb_ptr,const char ** sptr_ptr)2798 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
2799 { /* sptr[0] == '\\' */
2800 char *pb = *pb_ptr;
2801 const char *sptr = *sptr_ptr;
2802
2803 if (sptr[1] == '\\') {
2804 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2805 *pb++ = '\\';
2806 *pb++ = '&';
2807 sptr += 4;
2808 } else if (sptr[2] == '&') { /* \\& -> \ + matched */
2809 *pb++ = '\\';
2810 sptr += 2;
2811 } else if (do_posix) { /* \\x -> \x */
2812 sptr++;
2813 *pb++ = *sptr++;
2814 } else { /* \\x -> \\x */
2815 *pb++ = *sptr++;
2816 *pb++ = *sptr++;
2817 }
2818 } else if (sptr[1] == '&') { /* literal & */
2819 sptr++;
2820 *pb++ = *sptr++;
2821 } else /* literal \ */
2822 *pb++ = *sptr++;
2823
2824 *pb_ptr = pb;
2825 *sptr_ptr = sptr;
2826 }
2827
wide_char_to_byte_str(int rune,size_t * outlen)2828 static char *wide_char_to_byte_str(int rune, size_t *outlen)
2829 {
2830 static char buf[5];
2831 int len;
2832
2833 if (rune < 0 || rune > 0x10FFFF)
2834 return NULL;
2835
2836 memset(buf, 0, sizeof(buf));
2837
2838 len = 0;
2839 if (rune <= 0x0000007F) {
2840 buf[len++] = rune;
2841 } else if (rune <= 0x000007FF) {
2842 // 110xxxxx 10xxxxxx
2843 buf[len++] = 0xC0 | (rune >> 6);
2844 buf[len++] = 0x80 | (rune & 0x3F);
2845 } else if (rune <= 0x0000FFFF) {
2846 // 1110xxxx 10xxxxxx 10xxxxxx
2847 buf[len++] = 0xE0 | (rune >> 12);
2848 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2849 buf[len++] = 0x80 | (rune & 0x3F);
2850
2851 } else {
2852 // 0x00010000 - 0x10FFFF
2853 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2854 buf[len++] = 0xF0 | (rune >> 18);
2855 buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2856 buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2857 buf[len++] = 0x80 | (rune & 0x3F);
2858 }
2859
2860 *outlen = len;
2861 buf[len++] = '\0';
2862
2863 return buf;
2864 }
2865