1 /* $OpenBSD: run.c,v 1.80 2023/10/28 22:38:22 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 #define DEBUG 27 #include <stdio.h> 28 #include <ctype.h> 29 #include <errno.h> 30 #include <wctype.h> 31 #include <fcntl.h> 32 #include <setjmp.h> 33 #include <limits.h> 34 #include <math.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <time.h> 38 #include <sys/types.h> 39 #include <sys/wait.h> 40 #include "awk.h" 41 #include "awkgram.tab.h" 42 43 44 static void stdinit(void); 45 static void flush_all(void); 46 static char *wide_char_to_byte_str(int rune, size_t *outlen); 47 48 #if 1 49 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 50 #else 51 void tempfree(Cell *p) { 52 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 53 WARNING("bad csub %d in Cell %d %s", 54 p->csub, p->ctype, p->sval); 55 } 56 if (istemp(p)) 57 tfree(p); 58 } 59 #endif 60 61 /* do we really need these? */ 62 /* #ifdef _NFILE */ 63 /* #ifndef FOPEN_MAX */ 64 /* #define FOPEN_MAX _NFILE */ 65 /* #endif */ 66 /* #endif */ 67 /* */ 68 /* #ifndef FOPEN_MAX */ 69 /* #define FOPEN_MAX 40 */ /* max number of open files */ 70 /* #endif */ 71 /* */ 72 /* #ifndef RAND_MAX */ 73 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 74 /* #endif */ 75 76 jmp_buf env; 77 extern int pairstack[]; 78 extern Awkfloat srand_seed; 79 80 Node *winner = NULL; /* root of parse tree */ 81 Cell *tmps; /* free temporary cells for execution */ 82 83 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 84 Cell *True = &truecell; 85 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 86 Cell *False = &falsecell; 87 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 88 Cell *jbreak = &breakcell; 89 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 90 Cell *jcont = &contcell; 91 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 92 Cell *jnext = &nextcell; 93 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 94 Cell *jnextfile = &nextfilecell; 95 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 96 Cell *jexit = &exitcell; 97 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 98 Cell *jret = &retcell; 99 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 100 101 Node *curnode = NULL; /* the node being executed, for debugging */ 102 103 /* buffer memory management */ 104 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 105 const char *whatrtn) 106 /* pbuf: address of pointer to buffer being managed 107 * psiz: address of buffer size variable 108 * minlen: minimum length of buffer needed 109 * quantum: buffer size quantum 110 * pbptr: address of movable pointer into buffer, or 0 if none 111 * whatrtn: name of the calling routine if failure should cause fatal error 112 * 113 * return 0 for realloc failure, !=0 for success 114 */ 115 { 116 if (minlen > *psiz) { 117 char *tbuf; 118 int rminlen = quantum ? minlen % quantum : 0; 119 int boff = pbptr ? *pbptr - *pbuf : 0; 120 /* round up to next multiple of quantum */ 121 if (rminlen) 122 minlen += quantum - rminlen; 123 tbuf = (char *) realloc(*pbuf, minlen); 124 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 125 if (tbuf == NULL) { 126 if (whatrtn) 127 FATAL("out of memory in %s", whatrtn); 128 return 0; 129 } 130 *pbuf = tbuf; 131 *psiz = minlen; 132 if (pbptr) 133 *pbptr = tbuf + boff; 134 } 135 return 1; 136 } 137 138 void run(Node *a) /* execution of parse tree starts here */ 139 { 140 141 stdinit(); 142 execute(a); 143 closeall(); 144 } 145 146 Cell *execute(Node *u) /* execute a node of the parse tree */ 147 { 148 Cell *(*proc)(Node **, int); 149 Cell *x; 150 Node *a; 151 152 if (u == NULL) 153 return(True); 154 for (a = u; ; a = a->nnext) { 155 curnode = a; 156 if (isvalue(a)) { 157 x = (Cell *) (a->narg[0]); 158 if (isfld(x) && !donefld) 159 fldbld(); 160 else if (isrec(x) && !donerec) 161 recbld(); 162 return(x); 163 } 164 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 165 FATAL("illegal statement"); 166 proc = proctab[a->nobj-FIRSTTOKEN]; 167 x = (*proc)(a->narg, a->nobj); 168 if (isfld(x) && !donefld) 169 fldbld(); 170 else if (isrec(x) && !donerec) 171 recbld(); 172 if (isexpr(a)) 173 return(x); 174 if (isjump(x)) 175 return(x); 176 if (a->nnext == NULL) 177 return(x); 178 tempfree(x); 179 } 180 } 181 182 183 Cell *program(Node **a, int n) /* execute an awk program */ 184 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 185 Cell *x; 186 187 if (setjmp(env) != 0) 188 goto ex; 189 if (a[0]) { /* BEGIN */ 190 x = execute(a[0]); 191 if (isexit(x)) 192 return(True); 193 if (isjump(x)) 194 FATAL("illegal break, continue, next or nextfile from BEGIN"); 195 tempfree(x); 196 } 197 if (a[1] || a[2]) 198 while (getrec(&record, &recsize, true) > 0) { 199 x = execute(a[1]); 200 if (isexit(x)) 201 break; 202 tempfree(x); 203 } 204 ex: 205 if (setjmp(env) != 0) /* handles exit within END */ 206 goto ex1; 207 if (a[2]) { /* END */ 208 x = execute(a[2]); 209 if (isbreak(x) || isnext(x) || iscont(x)) 210 FATAL("illegal break, continue, next or nextfile from END"); 211 tempfree(x); 212 } 213 ex1: 214 return(True); 215 } 216 217 struct Frame { /* stack frame for awk function calls */ 218 int nargs; /* number of arguments in this call */ 219 Cell *fcncell; /* pointer to Cell for function */ 220 Cell **args; /* pointer to array of arguments after execute */ 221 Cell *retval; /* return value */ 222 }; 223 224 #define NARGS 50 /* max args in a call */ 225 226 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 227 int nframe = 0; /* number of frames allocated */ 228 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 229 230 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 231 { 232 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 233 int i, ncall, ndef; 234 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 235 Node *x; 236 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 237 Cell *y, *z, *fcn; 238 char *s; 239 240 fcn = execute(a[0]); /* the function itself */ 241 s = fcn->nval; 242 if (!isfcn(fcn)) 243 FATAL("calling undefined function %s", s); 244 if (frame == NULL) { 245 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 246 if (frame == NULL) 247 FATAL("out of space for stack frames calling %s", s); 248 } 249 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 250 ncall++; 251 ndef = (int) fcn->fval; /* args in defn */ 252 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 253 if (ncall > ndef) 254 WARNING("function %s called with %d args, uses only %d", 255 s, ncall, ndef); 256 if (ncall + ndef > NARGS) 257 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 258 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 259 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 260 y = execute(x); 261 oargs[i] = y; 262 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 263 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 264 if (isfcn(y)) 265 FATAL("can't use function %s as argument in %s", y->nval, s); 266 if (isarr(y)) 267 args[i] = y; /* arrays by ref */ 268 else 269 args[i] = copycell(y); 270 tempfree(y); 271 } 272 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 273 args[i] = gettemp(); 274 *args[i] = newcopycell; 275 } 276 frp++; /* now ok to up frame */ 277 if (frp >= frame + nframe) { 278 int dfp = frp - frame; /* old index */ 279 frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame)); 280 if (frame == NULL) 281 FATAL("out of space for stack frames in %s", s); 282 frp = frame + dfp; 283 } 284 frp->fcncell = fcn; 285 frp->args = args; 286 frp->nargs = ndef; /* number defined with (excess are locals) */ 287 frp->retval = gettemp(); 288 289 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 290 y = execute((Node *)(fcn->sval)); /* execute body */ 291 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 292 293 for (i = 0; i < ndef; i++) { 294 Cell *t = frp->args[i]; 295 if (isarr(t)) { 296 if (t->csub == CCOPY) { 297 if (i >= ncall) { 298 freesymtab(t); 299 t->csub = CTEMP; 300 tempfree(t); 301 } else { 302 oargs[i]->tval = t->tval; 303 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 304 oargs[i]->sval = t->sval; 305 tempfree(t); 306 } 307 } 308 } else if (t != y) { /* kludge to prevent freeing twice */ 309 t->csub = CTEMP; 310 tempfree(t); 311 } else if (t == y && t->csub == CCOPY) { 312 t->csub = CTEMP; 313 tempfree(t); 314 freed = 1; 315 } 316 } 317 tempfree(fcn); 318 if (isexit(y) || isnext(y)) 319 return y; 320 if (freed == 0) { 321 tempfree(y); /* don't free twice! */ 322 } 323 z = frp->retval; /* return value */ 324 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 325 frp--; 326 return(z); 327 } 328 329 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 330 { 331 Cell *y; 332 333 /* copy is not constant or field */ 334 335 y = gettemp(); 336 y->tval = x->tval & ~(CON|FLD|REC); 337 y->csub = CCOPY; /* prevents freeing until call is over */ 338 y->nval = x->nval; /* BUG? */ 339 if (isstr(x) /* || x->ctype == OCELL */) { 340 y->sval = tostring(x->sval); 341 y->tval &= ~DONTFREE; 342 } else 343 y->tval |= DONTFREE; 344 y->fval = x->fval; 345 return y; 346 } 347 348 Cell *arg(Node **a, int n) /* nth argument of a function */ 349 { 350 351 n = ptoi(a[0]); /* argument number, counting from 0 */ 352 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 353 if (n+1 > frp->nargs) 354 FATAL("argument #%d of function %s was not supplied", 355 n+1, frp->fcncell->nval); 356 return frp->args[n]; 357 } 358 359 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 360 { 361 Cell *y; 362 363 switch (n) { 364 case EXIT: 365 if (a[0] != NULL) { 366 y = execute(a[0]); 367 errorflag = (int) getfval(y); 368 tempfree(y); 369 } 370 longjmp(env, 1); 371 case RETURN: 372 if (a[0] != NULL) { 373 y = execute(a[0]); 374 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 375 setsval(frp->retval, getsval(y)); 376 frp->retval->fval = getfval(y); 377 frp->retval->tval |= NUM; 378 } 379 else if (y->tval & STR) 380 setsval(frp->retval, getsval(y)); 381 else if (y->tval & NUM) 382 setfval(frp->retval, getfval(y)); 383 else /* can't happen */ 384 FATAL("bad type variable %d", y->tval); 385 tempfree(y); 386 } 387 return(jret); 388 case NEXT: 389 return(jnext); 390 case NEXTFILE: 391 nextfile(); 392 return(jnextfile); 393 case BREAK: 394 return(jbreak); 395 case CONTINUE: 396 return(jcont); 397 default: /* can't happen */ 398 FATAL("illegal jump type %d", n); 399 } 400 return 0; /* not reached */ 401 } 402 403 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 404 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 405 Cell *r, *x; 406 extern Cell **fldtab; 407 FILE *fp; 408 char *buf; 409 int bufsize = recsize; 410 int mode; 411 bool newflag; 412 double result; 413 414 if ((buf = (char *) malloc(bufsize)) == NULL) 415 FATAL("out of memory in getline"); 416 417 fflush(stdout); /* in case someone is waiting for a prompt */ 418 r = gettemp(); 419 if (a[1] != NULL) { /* getline < file */ 420 x = execute(a[2]); /* filename */ 421 mode = ptoi(a[1]); 422 if (mode == '|') /* input pipe */ 423 mode = LE; /* arbitrary flag */ 424 fp = openfile(mode, getsval(x), &newflag); 425 tempfree(x); 426 if (fp == NULL) 427 n = -1; 428 else 429 n = readrec(&buf, &bufsize, fp, newflag); 430 if (n <= 0) { 431 ; 432 } else if (a[0] != NULL) { /* getline var <file */ 433 x = execute(a[0]); 434 setsval(x, buf); 435 if (is_number(x->sval, & result)) { 436 x->fval = result; 437 x->tval |= NUM; 438 } 439 tempfree(x); 440 } else { /* getline <file */ 441 setsval(fldtab[0], buf); 442 if (is_number(fldtab[0]->sval, & result)) { 443 fldtab[0]->fval = result; 444 fldtab[0]->tval |= NUM; 445 } 446 } 447 } else { /* bare getline; use current input */ 448 if (a[0] == NULL) /* getline */ 449 n = getrec(&record, &recsize, true); 450 else { /* getline var */ 451 n = getrec(&buf, &bufsize, false); 452 if (n > 0) { 453 x = execute(a[0]); 454 setsval(x, buf); 455 if (is_number(x->sval, & result)) { 456 x->fval = result; 457 x->tval |= NUM; 458 } 459 tempfree(x); 460 } 461 } 462 } 463 setfval(r, (Awkfloat) n); 464 free(buf); 465 return r; 466 } 467 468 Cell *getnf(Node **a, int n) /* get NF */ 469 { 470 if (!donefld) 471 fldbld(); 472 return (Cell *) a[0]; 473 } 474 475 static char * 476 makearraystring(Node *p, const char *func) 477 { 478 char *buf; 479 int bufsz = recsize; 480 size_t blen; 481 482 if ((buf = (char *) malloc(bufsz)) == NULL) { 483 FATAL("%s: out of memory", func); 484 } 485 486 blen = 0; 487 buf[blen] = '\0'; 488 489 for (; p; p = p->nnext) { 490 Cell *x = execute(p); /* expr */ 491 char *s = getsval(x); 492 size_t seplen = strlen(getsval(subseploc)); 493 size_t nsub = p->nnext ? seplen : 0; 494 size_t slen = strlen(s); 495 size_t tlen = blen + slen + nsub; 496 497 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 498 FATAL("%s: out of memory %s[%s...]", 499 func, x->nval, buf); 500 } 501 memcpy(buf + blen, s, slen); 502 if (nsub) { 503 memcpy(buf + blen + slen, *SUBSEP, nsub); 504 } 505 buf[tlen] = '\0'; 506 blen = tlen; 507 tempfree(x); 508 } 509 return buf; 510 } 511 512 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 513 { 514 Cell *x, *z; 515 char *buf; 516 517 x = execute(a[0]); /* Cell* for symbol table */ 518 buf = makearraystring(a[1], __func__); 519 if (!isarr(x)) { 520 DPRINTF("making %s into an array\n", NN(x->nval)); 521 if (freeable(x)) 522 xfree(x->sval); 523 x->tval &= ~(STR|NUM|DONTFREE); 524 x->tval |= ARR; 525 x->sval = (char *) makesymtab(NSYMTAB); 526 } 527 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 528 z->ctype = OCELL; 529 z->csub = CVAR; 530 tempfree(x); 531 free(buf); 532 return(z); 533 } 534 535 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 536 { 537 Cell *x; 538 539 x = execute(a[0]); /* Cell* for symbol table */ 540 if (x == symtabloc) { 541 FATAL("cannot delete SYMTAB or its elements"); 542 } 543 if (!isarr(x)) 544 return True; 545 if (a[1] == NULL) { /* delete the elements, not the table */ 546 freesymtab(x); 547 x->tval &= ~STR; 548 x->tval |= ARR; 549 x->sval = (char *) makesymtab(NSYMTAB); 550 } else { 551 char *buf = makearraystring(a[1], __func__); 552 freeelem(x, buf); 553 free(buf); 554 } 555 tempfree(x); 556 return True; 557 } 558 559 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 560 { 561 Cell *ap, *k; 562 char *buf; 563 564 ap = execute(a[1]); /* array name */ 565 if (!isarr(ap)) { 566 DPRINTF("making %s into an array\n", ap->nval); 567 if (freeable(ap)) 568 xfree(ap->sval); 569 ap->tval &= ~(STR|NUM|DONTFREE); 570 ap->tval |= ARR; 571 ap->sval = (char *) makesymtab(NSYMTAB); 572 } 573 buf = makearraystring(a[0], __func__); 574 k = lookup(buf, (Array *) ap->sval); 575 tempfree(ap); 576 free(buf); 577 if (k == NULL) 578 return(False); 579 else 580 return(True); 581 } 582 583 584 /* ======== utf-8 code ========== */ 585 586 /* 587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1), 588 * or utf-8. u8_isutf tests whether a string starts with a valid 589 * utf-8 sequence, and returns 0 if not (e.g., high bit set). 590 * u8_nextlen returns length of next valid sequence, which is 591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. 592 * u8_strlen returns length of string in valid utf-8 sequences 593 * and/or high-bit bytes. Conversion functions go between byte 594 * number and character number. 595 * 596 * In theory, this behaves the same as before for non-utf8 bytes. 597 * 598 * Limited checking! This is a potential security hole. 599 */ 600 601 /* is s the beginning of a valid utf-8 string? */ 602 /* return length 1..4 if yes, 0 if no */ 603 int u8_isutf(const char *s) 604 { 605 int n, ret; 606 unsigned char c; 607 608 c = s[0]; 609 if (c < 128 || awk_mb_cur_max == 1) 610 return 1; /* what if it's 0? */ 611 612 n = strlen(s); 613 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 614 ret = 2; /* 110xxxxx 10xxxxxx */ 615 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 616 && (s[2] & 0xC0) == 0x80) { 617 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ 618 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 619 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 620 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 621 } else { 622 ret = 0; 623 } 624 return ret; 625 } 626 627 /* Convert (prefix of) utf8 string to utf-32 rune. */ 628 /* Sets *rune to the value, returns the length. */ 629 /* No error checking: watch out. */ 630 int u8_rune(int *rune, const char *s) 631 { 632 int n, ret; 633 unsigned char c; 634 635 c = s[0]; 636 if (c < 128 || awk_mb_cur_max == 1) { 637 *rune = c; 638 return 1; 639 } 640 641 n = strlen(s); 642 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 643 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ 644 ret = 2; 645 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 646 && (s[2] & 0xC0) == 0x80) { 647 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); 648 /* 1110xxxx 10xxxxxx 10xxxxxx */ 649 ret = 3; 650 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 651 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 652 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); 653 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 654 ret = 4; 655 } else { 656 *rune = c; 657 ret = 1; 658 } 659 return ret; /* returns one byte if sequence doesn't look like utf */ 660 } 661 662 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ 663 int u8_nextlen(const char *s) 664 { 665 int len; 666 667 len = u8_isutf(s); 668 if (len == 0) 669 len = 1; 670 return len; 671 } 672 673 /* return number of utf characters or single non-utf bytes */ 674 int u8_strlen(const char *s) 675 { 676 int i, len, n, totlen; 677 unsigned char c; 678 679 n = strlen(s); 680 totlen = 0; 681 for (i = 0; i < n; i += len) { 682 c = s[i]; 683 if (c < 128 || awk_mb_cur_max == 1) { 684 len = 1; 685 } else { 686 len = u8_nextlen(&s[i]); 687 } 688 totlen++; 689 if (i > n) 690 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); 691 } 692 return totlen; 693 } 694 695 /* convert utf-8 char number in a string to its byte offset */ 696 int u8_char2byte(const char *s, int charnum) 697 { 698 int n; 699 int bytenum = 0; 700 701 while (charnum > 0) { 702 n = u8_nextlen(s); 703 s += n; 704 bytenum += n; 705 charnum--; 706 } 707 return bytenum; 708 } 709 710 /* convert byte offset in s to utf-8 char number that starts there */ 711 int u8_byte2char(const char *s, int bytenum) 712 { 713 int i, len, b; 714 int charnum = 0; /* BUG: what origin? */ 715 /* should be 0 to match start==0 which means no match */ 716 717 b = strlen(s); 718 if (bytenum > b) { 719 return -1; /* ??? */ 720 } 721 for (i = 0; i <= bytenum; i += len) { 722 len = u8_nextlen(s+i); 723 charnum++; 724 } 725 return charnum; 726 } 727 728 /* runetochar() adapted from rune.c in the Plan 9 distributione */ 729 730 enum 731 { 732 Runeerror = 128, /* from somewhere else */ 733 Runemax = 0x10FFFF, 734 735 Bit1 = 7, 736 Bitx = 6, 737 Bit2 = 5, 738 Bit3 = 4, 739 Bit4 = 3, 740 Bit5 = 2, 741 742 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 743 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 744 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 745 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 746 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 747 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 748 749 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 750 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 751 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 752 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ 753 754 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 755 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 756 757 }; 758 759 int runetochar(char *str, int c) 760 { 761 /* one character sequence 00000-0007F => 00-7F */ 762 if (c <= Rune1) { 763 str[0] = c; 764 return 1; 765 } 766 767 /* two character sequence 00080-007FF => T2 Tx */ 768 if (c <= Rune2) { 769 str[0] = T2 | (c >> 1*Bitx); 770 str[1] = Tx | (c & Maskx); 771 return 2; 772 } 773 774 /* three character sequence 00800-0FFFF => T3 Tx Tx */ 775 if (c > Runemax) 776 c = Runeerror; 777 if (c <= Rune3) { 778 str[0] = T3 | (c >> 2*Bitx); 779 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 780 str[2] = Tx | (c & Maskx); 781 return 3; 782 } 783 784 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ 785 str[0] = T4 | (c >> 3*Bitx); 786 str[1] = Tx | ((c >> 2*Bitx) & Maskx); 787 str[2] = Tx | ((c >> 1*Bitx) & Maskx); 788 str[3] = Tx | (c & Maskx); 789 return 4; 790 } 791 792 793 /* ========== end of utf8 code =========== */ 794 795 796 797 Cell *matchop(Node **a, int n) /* ~ and match() */ 798 { 799 Cell *x, *y; 800 char *s, *t; 801 int i; 802 int cstart, cpatlen, len; 803 fa *pfa; 804 int (*mf)(fa *, const char *) = match, mode = 0; 805 806 if (n == MATCHFCN) { 807 mf = pmatch; 808 mode = 1; 809 } 810 x = execute(a[1]); /* a[1] = target text */ 811 s = getsval(x); 812 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 813 i = (*mf)((fa *) a[2], s); 814 else { 815 y = execute(a[2]); /* a[2] = regular expr */ 816 t = getsval(y); 817 pfa = makedfa(t, mode); 818 i = (*mf)(pfa, s); 819 tempfree(y); 820 } 821 tempfree(x); 822 if (n == MATCHFCN) { 823 int start = patbeg - s + 1; /* origin 1 */ 824 if (patlen < 0) { 825 start = 0; /* not found */ 826 } else { 827 cstart = u8_byte2char(s, start-1); 828 cpatlen = 0; 829 for (i = 0; i < patlen; i += len) { 830 len = u8_nextlen(patbeg+i); 831 cpatlen++; 832 } 833 834 start = cstart; 835 patlen = cpatlen; 836 } 837 838 setfval(rstartloc, (Awkfloat) start); 839 setfval(rlengthloc, (Awkfloat) patlen); 840 x = gettemp(); 841 x->tval = NUM; 842 x->fval = start; 843 return x; 844 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 845 return(True); 846 else 847 return(False); 848 } 849 850 851 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 852 { 853 Cell *x, *y; 854 int i; 855 856 x = execute(a[0]); 857 i = istrue(x); 858 tempfree(x); 859 switch (n) { 860 case BOR: 861 if (i) return(True); 862 y = execute(a[1]); 863 i = istrue(y); 864 tempfree(y); 865 if (i) return(True); 866 else return(False); 867 case AND: 868 if ( !i ) return(False); 869 y = execute(a[1]); 870 i = istrue(y); 871 tempfree(y); 872 if (i) return(True); 873 else return(False); 874 case NOT: 875 if (i) return(False); 876 else return(True); 877 default: /* can't happen */ 878 FATAL("unknown boolean operator %d", n); 879 } 880 return 0; /*NOTREACHED*/ 881 } 882 883 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 884 { 885 int i; 886 Cell *x, *y; 887 Awkfloat j; 888 bool x_is_nan, y_is_nan; 889 890 x = execute(a[0]); 891 y = execute(a[1]); 892 x_is_nan = isnan(x->fval); 893 y_is_nan = isnan(y->fval); 894 if (x->tval&NUM && y->tval&NUM) { 895 if ((x_is_nan || y_is_nan) && n != NE) 896 return(False); 897 j = x->fval - y->fval; 898 i = j<0? -1: (j>0? 1: 0); 899 } else { 900 i = strcmp(getsval(x), getsval(y)); 901 } 902 tempfree(x); 903 tempfree(y); 904 switch (n) { 905 case LT: if (i<0) return(True); 906 else return(False); 907 case LE: if (i<=0) return(True); 908 else return(False); 909 case NE: if (x_is_nan && y_is_nan) return(True); 910 else if (i!=0) return(True); 911 else return(False); 912 case EQ: if (i == 0) return(True); 913 else return(False); 914 case GE: if (i>=0) return(True); 915 else return(False); 916 case GT: if (i>0) return(True); 917 else return(False); 918 default: /* can't happen */ 919 FATAL("unknown relational operator %d", n); 920 } 921 return 0; /*NOTREACHED*/ 922 } 923 924 void tfree(Cell *a) /* free a tempcell */ 925 { 926 if (freeable(a)) { 927 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 928 xfree(a->sval); 929 } 930 if (a == tmps) 931 FATAL("tempcell list is curdled"); 932 a->cnext = tmps; 933 tmps = a; 934 } 935 936 Cell *gettemp(void) /* get a tempcell */ 937 { int i; 938 Cell *x; 939 940 if (!tmps) { 941 tmps = (Cell *) calloc(100, sizeof(*tmps)); 942 if (!tmps) 943 FATAL("out of space for temporaries"); 944 for (i = 1; i < 100; i++) 945 tmps[i-1].cnext = &tmps[i]; 946 tmps[i-1].cnext = NULL; 947 } 948 x = tmps; 949 tmps = x->cnext; 950 *x = tempcell; 951 return(x); 952 } 953 954 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 955 { 956 Awkfloat val; 957 Cell *x; 958 int m; 959 char *s; 960 961 x = execute(a[0]); 962 val = getfval(x); /* freebsd: defend against super large field numbers */ 963 if ((Awkfloat)INT_MAX < val) 964 FATAL("trying to access out of range field %s", x->nval); 965 m = (int) val; 966 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 967 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 968 /* BUG: can x->nval ever be null??? */ 969 tempfree(x); 970 x = fieldadr(m); 971 x->ctype = OCELL; /* BUG? why are these needed? */ 972 x->csub = CFLD; 973 return(x); 974 } 975 976 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 977 { 978 int k, m, n; 979 int mb, nb; 980 char *s; 981 int temp; 982 Cell *x, *y, *z = NULL; 983 984 x = execute(a[0]); 985 y = execute(a[1]); 986 if (a[2] != NULL) 987 z = execute(a[2]); 988 s = getsval(x); 989 k = u8_strlen(s) + 1; 990 if (k <= 1) { 991 tempfree(x); 992 tempfree(y); 993 if (a[2] != NULL) { 994 tempfree(z); 995 } 996 x = gettemp(); 997 setsval(x, ""); 998 return(x); 999 } 1000 m = (int) getfval(y); 1001 if (m <= 0) 1002 m = 1; 1003 else if (m > k) 1004 m = k; 1005 tempfree(y); 1006 if (a[2] != NULL) { 1007 n = (int) getfval(z); 1008 tempfree(z); 1009 } else 1010 n = k - 1; 1011 if (n < 0) 1012 n = 0; 1013 else if (n > k - m) 1014 n = k - m; 1015 /* m is start, n is length from there */ 1016 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 1017 y = gettemp(); 1018 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ 1019 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ 1020 1021 temp = s[nb]; /* with thanks to John Linderman */ 1022 s[nb] = '\0'; 1023 setsval(y, s + mb); 1024 s[nb] = temp; 1025 tempfree(x); 1026 return(y); 1027 } 1028 1029 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 1030 { 1031 Cell *x, *y, *z; 1032 char *s1, *s2, *p1, *p2, *q; 1033 Awkfloat v = 0.0; 1034 1035 x = execute(a[0]); 1036 s1 = getsval(x); 1037 y = execute(a[1]); 1038 s2 = getsval(y); 1039 1040 z = gettemp(); 1041 for (p1 = s1; *p1 != '\0'; p1++) { 1042 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 1043 continue; 1044 if (*p2 == '\0') { 1045 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ 1046 1047 /* should be a function: used in match() as well */ 1048 int i, len; 1049 v = 0; 1050 for (i = 0; i < p1-s1+1; i += len) { 1051 len = u8_nextlen(s1+i); 1052 v++; 1053 } 1054 break; 1055 } 1056 } 1057 tempfree(x); 1058 tempfree(y); 1059 setfval(z, v); 1060 return(z); 1061 } 1062 1063 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ 1064 { 1065 int n; 1066 1067 for (n = 0; *s != 0; s += n) { 1068 n = u8_nextlen(s); 1069 if (n > 1) 1070 return 1; 1071 } 1072 return 0; 1073 } 1074 1075 #define MAXNUMSIZE 50 1076 1077 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 1078 { 1079 char *fmt; 1080 char *p, *t; 1081 const char *os; 1082 Cell *x; 1083 int flag = 0, n; 1084 int fmtwd; /* format width */ 1085 int fmtsz = recsize; 1086 char *buf = *pbuf; 1087 int bufsize = *pbufsize; 1088 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 1089 #define BUFSZ(a) (bufsize - ((a) - buf)) 1090 1091 static bool first = true; 1092 static bool have_a_format = false; 1093 1094 if (first) { 1095 char xbuf[100]; 1096 1097 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 1098 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 1099 first = false; 1100 } 1101 1102 os = s; 1103 p = buf; 1104 if ((fmt = (char *) malloc(fmtsz)) == NULL) 1105 FATAL("out of memory in format()"); 1106 while (*s) { 1107 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 1108 if (*s != '%') { 1109 *p++ = *s++; 1110 continue; 1111 } 1112 if (*(s+1) == '%') { 1113 *p++ = '%'; 1114 s += 2; 1115 continue; 1116 } 1117 fmtwd = atoi(s+1); 1118 if (fmtwd < 0) 1119 fmtwd = -fmtwd; 1120 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 1121 for (t = fmt; (*t++ = *s) != '\0'; s++) { 1122 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 1123 FATAL("format item %.30s... ran format() out of memory", os); 1124 /* Ignore size specifiers */ 1125 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 1126 t--; 1127 continue; 1128 } 1129 if (isalpha((uschar)*s)) 1130 break; 1131 if (*s == '$') { 1132 FATAL("'$' not permitted in awk formats"); 1133 } 1134 if (*s == '*') { 1135 if (a == NULL) { 1136 FATAL("not enough args in printf(%s)", os); 1137 } 1138 x = execute(a); 1139 a = a->nnext; 1140 snprintf(t - 1, FMTSZ(t - 1), 1141 "%d", fmtwd=(int) getfval(x)); 1142 if (fmtwd < 0) 1143 fmtwd = -fmtwd; 1144 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 1145 t = fmt + strlen(fmt); 1146 tempfree(x); 1147 } 1148 } 1149 *t = '\0'; 1150 if (fmtwd < 0) 1151 fmtwd = -fmtwd; 1152 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 1153 switch (*s) { 1154 case 'a': case 'A': 1155 if (have_a_format) 1156 flag = *s; 1157 else 1158 flag = 'f'; 1159 break; 1160 case 'f': case 'e': case 'g': case 'E': case 'G': 1161 flag = 'f'; 1162 break; 1163 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 1164 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 1165 *(t-1) = 'j'; 1166 *t = *s; 1167 *++t = '\0'; 1168 break; 1169 case 's': 1170 flag = 's'; 1171 break; 1172 case 'c': 1173 flag = 'c'; 1174 break; 1175 default: 1176 WARNING("weird printf conversion %s", fmt); 1177 flag = '?'; 1178 break; 1179 } 1180 if (a == NULL) 1181 FATAL("not enough args in printf(%s)", os); 1182 x = execute(a); 1183 a = a->nnext; 1184 n = MAXNUMSIZE; 1185 if (fmtwd > n) 1186 n = fmtwd; 1187 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 1188 switch (flag) { 1189 case '?': 1190 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 1191 t = getsval(x); 1192 n = strlen(t); 1193 if (fmtwd > n) 1194 n = fmtwd; 1195 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 1196 p += strlen(p); 1197 snprintf(p, BUFSZ(p), "%s", t); 1198 break; 1199 case 'a': 1200 case 'A': 1201 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 1202 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 1203 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 1204 1205 case 's': { 1206 t = getsval(x); 1207 n = strlen(t); 1208 /* if simple format or no utf-8 in the string, sprintf works */ 1209 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { 1210 if (fmtwd > n) 1211 n = fmtwd; 1212 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 1213 FATAL("huge string/format (%d chars) in printf %.30s..." \ 1214 " ran format() out of memory", n, t); 1215 snprintf(p, BUFSZ(p), fmt, t); 1216 break; 1217 } 1218 1219 /* get here if string has utf-8 chars and fmt is not plain %s */ 1220 /* "%-w.ps", where -, w and .p are all optional */ 1221 /* '0' before the w is a flag character */ 1222 /* fmt points at % */ 1223 int ljust = 0, wid = 0, prec = n, pad = 0; 1224 char *f = fmt+1; 1225 if (f[0] == '-') { 1226 ljust = 1; 1227 f++; 1228 } 1229 // flags '0' and '+' are recognized but skipped 1230 if (f[0] == '0') { 1231 f++; 1232 if (f[0] == '+') 1233 f++; 1234 } 1235 if (f[0] == '+') { 1236 f++; 1237 if (f[0] == '0') 1238 f++; 1239 } 1240 if (isdigit((uschar)f[0])) { /* there is a wid */ 1241 wid = strtol(f, &f, 10); 1242 } 1243 if (f[0] == '.') { /* there is a .prec */ 1244 prec = strtol(++f, &f, 10); 1245 } 1246 if (prec > u8_strlen(t)) 1247 prec = u8_strlen(t); 1248 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1249 int i, k, n; 1250 1251 if (ljust) { // print prec chars from t, then pad blanks 1252 n = u8_char2byte(t, prec); 1253 for (k = 0; k < n; k++) { 1254 //putchar(t[k]); 1255 *p++ = t[k]; 1256 } 1257 for (i = 0; i < pad; i++) { 1258 //printf(" "); 1259 *p++ = ' '; 1260 } 1261 } else { // print pad blanks, then prec chars from t 1262 for (i = 0; i < pad; i++) { 1263 //printf(" "); 1264 *p++ = ' '; 1265 } 1266 n = u8_char2byte(t, prec); 1267 for (k = 0; k < n; k++) { 1268 //putchar(t[k]); 1269 *p++ = t[k]; 1270 } 1271 } 1272 *p = 0; 1273 break; 1274 } 1275 1276 case 'c': { 1277 /* 1278 * If a numeric value is given, awk should just turn 1279 * it into a character and print it: 1280 * BEGIN { printf("%c\n", 65) } 1281 * prints "A". 1282 * 1283 * But what if the numeric value is > 128 and 1284 * represents a valid Unicode code point?!? We do 1285 * our best to convert it back into UTF-8. If we 1286 * can't, we output the encoding of the Unicode 1287 * "invalid character", 0xFFFD. 1288 */ 1289 if (isnum(x)) { 1290 int charval = (int) getfval(x); 1291 1292 if (charval != 0) { 1293 if (charval < 128 || awk_mb_cur_max == 1) 1294 snprintf(p, BUFSZ(p), fmt, charval); 1295 else { 1296 // possible unicode character 1297 size_t count; 1298 char *bs = wide_char_to_byte_str(charval, &count); 1299 1300 if (bs == NULL) { // invalid character 1301 // use unicode invalid character, 0xFFFD 1302 bs = "\357\277\275"; 1303 count = 3; 1304 } 1305 t = bs; 1306 n = count; 1307 goto format_percent_c; 1308 } 1309 } else { 1310 *p++ = '\0'; /* explicit null byte */ 1311 *p = '\0'; /* next output will start here */ 1312 } 1313 break; 1314 } 1315 t = getsval(x); 1316 n = u8_nextlen(t); 1317 format_percent_c: 1318 if (n < 2) { /* not utf8 */ 1319 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 1320 break; 1321 } 1322 1323 // utf8 character, almost same song and dance as for %s 1324 int ljust = 0, wid = 0, prec = n, pad = 0; 1325 char *f = fmt+1; 1326 if (f[0] == '-') { 1327 ljust = 1; 1328 f++; 1329 } 1330 // flags '0' and '+' are recognized but skipped 1331 if (f[0] == '0') { 1332 f++; 1333 if (f[0] == '+') 1334 f++; 1335 } 1336 if (f[0] == '+') { 1337 f++; 1338 if (f[0] == '0') 1339 f++; 1340 } 1341 if (isdigit((uschar)f[0])) { /* there is a wid */ 1342 wid = strtol(f, &f, 10); 1343 } 1344 if (f[0] == '.') { /* there is a .prec */ 1345 prec = strtol(++f, &f, 10); 1346 } 1347 if (prec > 1) // %c --> only one character 1348 prec = 1; 1349 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1350 int i; 1351 1352 if (ljust) { // print one char from t, then pad blanks 1353 for (i = 0; i < n; i++) 1354 *p++ = t[i]; 1355 for (i = 0; i < pad; i++) { 1356 //printf(" "); 1357 *p++ = ' '; 1358 } 1359 } else { // print pad blanks, then prec chars from t 1360 for (i = 0; i < pad; i++) { 1361 //printf(" "); 1362 *p++ = ' '; 1363 } 1364 for (i = 0; i < n; i++) 1365 *p++ = t[i]; 1366 } 1367 *p = 0; 1368 break; 1369 } 1370 default: 1371 FATAL("can't happen: bad conversion %c in format()", flag); 1372 } 1373 1374 tempfree(x); 1375 p += strlen(p); 1376 s++; 1377 } 1378 *p = '\0'; 1379 free(fmt); 1380 for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 1381 x = execute(a); 1382 tempfree(x); 1383 } 1384 *pbuf = buf; 1385 *pbufsize = bufsize; 1386 return p - buf; 1387 } 1388 1389 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 1390 { 1391 Cell *x; 1392 Node *y; 1393 char *buf; 1394 int bufsz=3*recsize; 1395 1396 if ((buf = (char *) malloc(bufsz)) == NULL) 1397 FATAL("out of memory in awksprintf"); 1398 y = a[0]->nnext; 1399 x = execute(a[0]); 1400 if (format(&buf, &bufsz, getsval(x), y) == -1) 1401 FATAL("sprintf string %.30s... too long. can't happen.", buf); 1402 tempfree(x); 1403 x = gettemp(); 1404 x->sval = buf; 1405 x->tval = STR; 1406 return(x); 1407 } 1408 1409 Cell *awkprintf(Node **a, int n) /* printf */ 1410 { /* a[0] is list of args, starting with format string */ 1411 /* a[1] is redirection operator, a[2] is redirection file */ 1412 FILE *fp; 1413 Cell *x; 1414 Node *y; 1415 char *buf; 1416 int len; 1417 int bufsz=3*recsize; 1418 1419 if ((buf = (char *) malloc(bufsz)) == NULL) 1420 FATAL("out of memory in awkprintf"); 1421 y = a[0]->nnext; 1422 x = execute(a[0]); 1423 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1424 FATAL("printf string %.30s... too long. can't happen.", buf); 1425 tempfree(x); 1426 if (a[1] == NULL) { 1427 /* fputs(buf, stdout); */ 1428 fwrite(buf, len, 1, stdout); 1429 if (ferror(stdout)) 1430 FATAL("write error on stdout"); 1431 } else { 1432 fp = redirect(ptoi(a[1]), a[2]); 1433 /* fputs(buf, fp); */ 1434 fwrite(buf, len, 1, fp); 1435 fflush(fp); 1436 if (ferror(fp)) 1437 FATAL("write error on %s", filename(fp)); 1438 } 1439 free(buf); 1440 return(True); 1441 } 1442 1443 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1444 { 1445 Awkfloat i, j = 0; 1446 double v; 1447 Cell *x, *y, *z; 1448 1449 x = execute(a[0]); 1450 i = getfval(x); 1451 tempfree(x); 1452 if (n != UMINUS && n != UPLUS) { 1453 y = execute(a[1]); 1454 j = getfval(y); 1455 tempfree(y); 1456 } 1457 z = gettemp(); 1458 switch (n) { 1459 case ADD: 1460 i += j; 1461 break; 1462 case MINUS: 1463 i -= j; 1464 break; 1465 case MULT: 1466 i *= j; 1467 break; 1468 case DIVIDE: 1469 if (j == 0) 1470 FATAL("division by zero"); 1471 i /= j; 1472 break; 1473 case MOD: 1474 if (j == 0) 1475 FATAL("division by zero in mod"); 1476 modf(i/j, &v); 1477 i = i - j * v; 1478 break; 1479 case UMINUS: 1480 i = -i; 1481 break; 1482 case UPLUS: /* handled by getfval(), above */ 1483 break; 1484 case POWER: 1485 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1486 i = ipow(i, (int) j); 1487 else { 1488 errno = 0; 1489 i = errcheck(pow(i, j), "pow"); 1490 } 1491 break; 1492 default: /* can't happen */ 1493 FATAL("illegal arithmetic operator %d", n); 1494 } 1495 setfval(z, i); 1496 return(z); 1497 } 1498 1499 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1500 { 1501 double v; 1502 1503 if (n <= 0) 1504 return 1; 1505 v = ipow(x, n/2); 1506 if (n % 2 == 0) 1507 return v * v; 1508 else 1509 return x * v * v; 1510 } 1511 1512 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1513 { 1514 Cell *x, *z; 1515 int k; 1516 Awkfloat xf; 1517 1518 x = execute(a[0]); 1519 xf = getfval(x); 1520 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1521 if (n == PREINCR || n == PREDECR) { 1522 setfval(x, xf + k); 1523 return(x); 1524 } 1525 z = gettemp(); 1526 setfval(z, xf); 1527 setfval(x, xf + k); 1528 tempfree(x); 1529 return(z); 1530 } 1531 1532 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1533 { /* this is subtle; don't muck with it. */ 1534 Cell *x, *y; 1535 Awkfloat xf, yf; 1536 double v; 1537 1538 y = execute(a[1]); 1539 x = execute(a[0]); 1540 if (n == ASSIGN) { /* ordinary assignment */ 1541 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1542 ; /* self-assignment: leave alone unless it's a field or NF */ 1543 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1544 setsval(x, getsval(y)); 1545 x->fval = getfval(y); 1546 x->tval |= NUM; 1547 } 1548 else if (isstr(y)) 1549 setsval(x, getsval(y)); 1550 else if (isnum(y)) 1551 setfval(x, getfval(y)); 1552 else 1553 funnyvar(y, "read value of"); 1554 tempfree(y); 1555 return(x); 1556 } 1557 xf = getfval(x); 1558 yf = getfval(y); 1559 switch (n) { 1560 case ADDEQ: 1561 xf += yf; 1562 break; 1563 case SUBEQ: 1564 xf -= yf; 1565 break; 1566 case MULTEQ: 1567 xf *= yf; 1568 break; 1569 case DIVEQ: 1570 if (yf == 0) 1571 FATAL("division by zero in /="); 1572 xf /= yf; 1573 break; 1574 case MODEQ: 1575 if (yf == 0) 1576 FATAL("division by zero in %%="); 1577 modf(xf/yf, &v); 1578 xf = xf - yf * v; 1579 break; 1580 case POWEQ: 1581 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1582 xf = ipow(xf, (int) yf); 1583 else { 1584 errno = 0; 1585 xf = errcheck(pow(xf, yf), "pow"); 1586 } 1587 break; 1588 default: 1589 FATAL("illegal assignment operator %d", n); 1590 break; 1591 } 1592 tempfree(y); 1593 setfval(x, xf); 1594 return(x); 1595 } 1596 1597 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1598 { 1599 Cell *x, *y, *z; 1600 int n1, n2; 1601 char *s = NULL; 1602 int ssz = 0; 1603 1604 x = execute(a[0]); 1605 n1 = strlen(getsval(x)); 1606 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1607 memcpy(s, x->sval, n1); 1608 1609 tempfree(x); 1610 1611 y = execute(a[1]); 1612 n2 = strlen(getsval(y)); 1613 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1614 memcpy(s + n1, y->sval, n2); 1615 s[n1 + n2] = '\0'; 1616 1617 tempfree(y); 1618 1619 z = gettemp(); 1620 z->sval = s; 1621 z->tval = STR; 1622 1623 return(z); 1624 } 1625 1626 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1627 { 1628 Cell *x; 1629 1630 if (a[0] == NULL) 1631 x = execute(a[1]); 1632 else { 1633 x = execute(a[0]); 1634 if (istrue(x)) { 1635 tempfree(x); 1636 x = execute(a[1]); 1637 } 1638 } 1639 return x; 1640 } 1641 1642 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1643 { 1644 Cell *x; 1645 int pair; 1646 1647 pair = ptoi(a[3]); 1648 if (pairstack[pair] == 0) { 1649 x = execute(a[0]); 1650 if (istrue(x)) 1651 pairstack[pair] = 1; 1652 tempfree(x); 1653 } 1654 if (pairstack[pair] == 1) { 1655 x = execute(a[1]); 1656 if (istrue(x)) 1657 pairstack[pair] = 0; 1658 tempfree(x); 1659 x = execute(a[2]); 1660 return(x); 1661 } 1662 return(False); 1663 } 1664 1665 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1666 { 1667 Cell *x = NULL, *y, *ap; 1668 const char *s, *origs, *t; 1669 const char *fs = NULL; 1670 char *origfs = NULL; 1671 int sep; 1672 char temp, num[50]; 1673 int j, n, tempstat, arg3type; 1674 double result; 1675 1676 y = execute(a[0]); /* source string */ 1677 origs = s = strdup(getsval(y)); 1678 if (s == NULL) 1679 FATAL("out of space in split"); 1680 tempfree(y); 1681 arg3type = ptoi(a[3]); 1682 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */ 1683 fs = getsval(fsloc); 1684 } else if (arg3type == STRING) { /* split(str,arr,"string") */ 1685 x = execute(a[2]); 1686 fs = origfs = strdup(getsval(x)); 1687 if (fs == NULL) 1688 FATAL("out of space in split"); 1689 tempfree(x); 1690 } else if (arg3type == REGEXPR) { 1691 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1692 } else { 1693 FATAL("illegal type of split"); 1694 } 1695 sep = *fs; 1696 ap = execute(a[1]); /* array name */ 1697 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ 1698 freesymtab(ap); 1699 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1700 ap->tval &= ~STR; 1701 ap->tval |= ARR; 1702 ap->sval = (char *) makesymtab(NSYMTAB); 1703 1704 n = 0; 1705 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1706 /* split(s, a, //); have to arrange that it looks like empty sep */ 1707 arg3type = 0; 1708 fs = ""; 1709 sep = 0; 1710 } 1711 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1712 fa *pfa; 1713 if (arg3type == REGEXPR) { /* it's ready already */ 1714 pfa = (fa *) a[2]; 1715 } else { 1716 pfa = makedfa(fs, 1); 1717 } 1718 if (nematch(pfa,s)) { 1719 tempstat = pfa->initstat; 1720 pfa->initstat = 2; 1721 do { 1722 n++; 1723 snprintf(num, sizeof(num), "%d", n); 1724 temp = *patbeg; 1725 setptr(patbeg, '\0'); 1726 if (is_number(s, & result)) 1727 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1728 else 1729 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1730 setptr(patbeg, temp); 1731 s = patbeg + patlen; 1732 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1733 n++; 1734 snprintf(num, sizeof(num), "%d", n); 1735 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1736 pfa->initstat = tempstat; 1737 goto spdone; 1738 } 1739 } while (nematch(pfa,s)); 1740 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1741 /* cf gsub and refldbld */ 1742 } 1743 n++; 1744 snprintf(num, sizeof(num), "%d", n); 1745 if (is_number(s, & result)) 1746 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1747 else 1748 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1749 spdone: 1750 pfa = NULL; 1751 1752 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */ 1753 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ 1754 for (;;) { 1755 char *fr = newt; 1756 n++; 1757 if (*s == '"' ) { /* start of "..." */ 1758 for (s++ ; *s != '\0'; ) { 1759 if (*s == '"' && s[1] != '\0' && s[1] == '"') { 1760 s += 2; /* doubled quote */ 1761 *fr++ = '"'; 1762 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { 1763 s++; /* skip over closing quote */ 1764 break; 1765 } else { 1766 *fr++ = *s++; 1767 } 1768 } 1769 *fr++ = 0; 1770 } else { /* unquoted field */ 1771 while (*s != ',' && *s != '\0') 1772 *fr++ = *s++; 1773 *fr++ = 0; 1774 } 1775 snprintf(num, sizeof(num), "%d", n); 1776 if (is_number(newt, &result)) 1777 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval); 1778 else 1779 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval); 1780 if (*s++ == '\0') 1781 break; 1782 } 1783 free(newt); 1784 1785 } else if (!CSV && sep == ' ') { /* usual case: split on white space */ 1786 for (n = 0; ; ) { 1787 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1788 while (ISWS(*s)) 1789 s++; 1790 if (*s == '\0') 1791 break; 1792 n++; 1793 t = s; 1794 do 1795 s++; 1796 while (*s != '\0' && !ISWS(*s)); 1797 temp = *s; 1798 setptr(s, '\0'); 1799 snprintf(num, sizeof(num), "%d", n); 1800 if (is_number(t, & result)) 1801 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1802 else 1803 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1804 setptr(s, temp); 1805 if (*s != '\0') 1806 s++; 1807 } 1808 1809 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1810 for (n = 0; *s != '\0'; s += u8_nextlen(s)) { 1811 char buf[10]; 1812 n++; 1813 snprintf(num, sizeof(num), "%d", n); 1814 1815 for (j = 0; j < u8_nextlen(s); j++) { 1816 buf[j] = s[j]; 1817 } 1818 buf[j] = '\0'; 1819 1820 if (isdigit((uschar)buf[0])) 1821 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1822 else 1823 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1824 } 1825 1826 } else if (*s != '\0') { /* some random single character */ 1827 for (;;) { 1828 n++; 1829 t = s; 1830 while (*s != sep && *s != '\n' && *s != '\0') 1831 s++; 1832 temp = *s; 1833 setptr(s, '\0'); 1834 snprintf(num, sizeof(num), "%d", n); 1835 if (is_number(t, & result)) 1836 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1837 else 1838 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1839 setptr(s, temp); 1840 if (*s++ == '\0') 1841 break; 1842 } 1843 } 1844 tempfree(ap); 1845 xfree(origs); 1846 xfree(origfs); 1847 x = gettemp(); 1848 x->tval = NUM; 1849 x->fval = n; 1850 return(x); 1851 } 1852 1853 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1854 { 1855 Cell *x; 1856 1857 x = execute(a[0]); 1858 if (istrue(x)) { 1859 tempfree(x); 1860 x = execute(a[1]); 1861 } else { 1862 tempfree(x); 1863 x = execute(a[2]); 1864 } 1865 return(x); 1866 } 1867 1868 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1869 { 1870 Cell *x; 1871 1872 x = execute(a[0]); 1873 if (istrue(x)) { 1874 tempfree(x); 1875 x = execute(a[1]); 1876 } else if (a[2] != NULL) { 1877 tempfree(x); 1878 x = execute(a[2]); 1879 } 1880 return(x); 1881 } 1882 1883 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1884 { 1885 Cell *x; 1886 1887 for (;;) { 1888 x = execute(a[0]); 1889 if (!istrue(x)) 1890 return(x); 1891 tempfree(x); 1892 x = execute(a[1]); 1893 if (isbreak(x)) { 1894 x = True; 1895 return(x); 1896 } 1897 if (isnext(x) || isexit(x) || isret(x)) 1898 return(x); 1899 tempfree(x); 1900 } 1901 } 1902 1903 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1904 { 1905 Cell *x; 1906 1907 for (;;) { 1908 x = execute(a[0]); 1909 if (isbreak(x)) 1910 return True; 1911 if (isnext(x) || isexit(x) || isret(x)) 1912 return(x); 1913 tempfree(x); 1914 x = execute(a[1]); 1915 if (!istrue(x)) 1916 return(x); 1917 tempfree(x); 1918 } 1919 } 1920 1921 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1922 { 1923 Cell *x; 1924 1925 x = execute(a[0]); 1926 tempfree(x); 1927 for (;;) { 1928 if (a[1]!=NULL) { 1929 x = execute(a[1]); 1930 if (!istrue(x)) return(x); 1931 else tempfree(x); 1932 } 1933 x = execute(a[3]); 1934 if (isbreak(x)) /* turn off break */ 1935 return True; 1936 if (isnext(x) || isexit(x) || isret(x)) 1937 return(x); 1938 tempfree(x); 1939 x = execute(a[2]); 1940 tempfree(x); 1941 } 1942 } 1943 1944 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1945 { 1946 Cell *x, *vp, *arrayp, *cp, *ncp; 1947 Array *tp; 1948 int i; 1949 1950 vp = execute(a[0]); 1951 arrayp = execute(a[1]); 1952 if (!isarr(arrayp)) { 1953 return True; 1954 } 1955 tp = (Array *) arrayp->sval; 1956 tempfree(arrayp); 1957 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1958 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1959 setsval(vp, cp->nval); 1960 ncp = cp->cnext; 1961 x = execute(a[2]); 1962 if (isbreak(x)) { 1963 tempfree(vp); 1964 return True; 1965 } 1966 if (isnext(x) || isexit(x) || isret(x)) { 1967 tempfree(vp); 1968 return(x); 1969 } 1970 tempfree(x); 1971 } 1972 } 1973 return True; 1974 } 1975 1976 static char *nawk_convert(const char *s, int (*fun_c)(int), 1977 wint_t (*fun_wc)(wint_t)) 1978 { 1979 char *buf = NULL; 1980 char *pbuf = NULL; 1981 const char *ps = NULL; 1982 size_t n = 0; 1983 wchar_t wc; 1984 const size_t sz = awk_mb_cur_max; 1985 int unused; 1986 1987 if (sz == 1) { 1988 buf = tostring(s); 1989 1990 for (pbuf = buf; *pbuf; pbuf++) 1991 *pbuf = fun_c((uschar)*pbuf); 1992 1993 return buf; 1994 } else { 1995 /* upper/lower character may be shorter/longer */ 1996 buf = tostringN(s, strlen(s) * sz + 1); 1997 1998 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1999 /* 2000 * Reset internal state here too. 2001 * Assign result to avoid a compiler warning. (Casting to void 2002 * doesn't work.) 2003 * Increment said variable to avoid a different warning. 2004 */ 2005 unused = wctomb(NULL, L'\0'); 2006 unused++; 2007 2008 ps = s; 2009 pbuf = buf; 2010 while (n = mbtowc(&wc, ps, sz), 2011 n > 0 && n != (size_t)-1 && n != (size_t)-2) 2012 { 2013 ps += n; 2014 2015 n = wctomb(pbuf, fun_wc(wc)); 2016 if (n == (size_t)-1) 2017 FATAL("illegal wide character %s", s); 2018 2019 pbuf += n; 2020 } 2021 2022 *pbuf = '\0'; 2023 2024 if (n) 2025 FATAL("illegal byte sequence %s", s); 2026 2027 return buf; 2028 } 2029 } 2030 2031 #ifdef __DJGPP__ 2032 static wint_t towupper(wint_t wc) 2033 { 2034 if (wc >= 0 && wc < 256) 2035 return toupper(wc & 0xFF); 2036 2037 return wc; 2038 } 2039 2040 static wint_t towlower(wint_t wc) 2041 { 2042 if (wc >= 0 && wc < 256) 2043 return tolower(wc & 0xFF); 2044 2045 return wc; 2046 } 2047 #endif 2048 2049 static char *nawk_toupper(const char *s) 2050 { 2051 return nawk_convert(s, toupper, towupper); 2052 } 2053 2054 static char *nawk_tolower(const char *s) 2055 { 2056 return nawk_convert(s, tolower, towlower); 2057 } 2058 2059 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 2060 { 2061 Cell *x, *y; 2062 Awkfloat u; 2063 int t, sz; 2064 Awkfloat tmp; 2065 char *buf, *fmt; 2066 Node *nextarg; 2067 FILE *fp; 2068 int status = 0; 2069 time_t tv; 2070 struct tm *tm, tmbuf; 2071 2072 t = ptoi(a[0]); 2073 x = execute(a[1]); 2074 nextarg = a[1]->nnext; 2075 switch (t) { 2076 case FLENGTH: 2077 if (isarr(x)) 2078 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 2079 else 2080 u = u8_strlen(getsval(x)); 2081 break; 2082 case FLOG: 2083 errno = 0; 2084 u = errcheck(log(getfval(x)), "log"); 2085 break; 2086 case FINT: 2087 modf(getfval(x), &u); break; 2088 case FEXP: 2089 errno = 0; 2090 u = errcheck(exp(getfval(x)), "exp"); 2091 break; 2092 case FSQRT: 2093 errno = 0; 2094 u = errcheck(sqrt(getfval(x)), "sqrt"); 2095 break; 2096 case FSIN: 2097 u = sin(getfval(x)); break; 2098 case FCOS: 2099 u = cos(getfval(x)); break; 2100 case FATAN: 2101 if (nextarg == NULL) { 2102 WARNING("atan2 requires two arguments; returning 1.0"); 2103 u = 1.0; 2104 } else { 2105 y = execute(a[1]->nnext); 2106 u = atan2(getfval(x), getfval(y)); 2107 tempfree(y); 2108 nextarg = nextarg->nnext; 2109 } 2110 break; 2111 case FCOMPL: 2112 u = ~((int)getfval(x)); 2113 break; 2114 case FAND: 2115 if (nextarg == 0) { 2116 WARNING("and requires two arguments; returning 0"); 2117 u = 0; 2118 break; 2119 } 2120 y = execute(a[1]->nnext); 2121 u = ((int)getfval(x)) & ((int)getfval(y)); 2122 tempfree(y); 2123 nextarg = nextarg->nnext; 2124 break; 2125 case FFOR: 2126 if (nextarg == 0) { 2127 WARNING("or requires two arguments; returning 0"); 2128 u = 0; 2129 break; 2130 } 2131 y = execute(a[1]->nnext); 2132 u = ((int)getfval(x)) | ((int)getfval(y)); 2133 tempfree(y); 2134 nextarg = nextarg->nnext; 2135 break; 2136 case FXOR: 2137 if (nextarg == 0) { 2138 WARNING("xor requires two arguments; returning 0"); 2139 u = 0; 2140 break; 2141 } 2142 y = execute(a[1]->nnext); 2143 u = ((int)getfval(x)) ^ ((int)getfval(y)); 2144 tempfree(y); 2145 nextarg = nextarg->nnext; 2146 break; 2147 case FLSHIFT: 2148 if (nextarg == 0) { 2149 WARNING("lshift requires two arguments; returning 0"); 2150 u = 0; 2151 break; 2152 } 2153 y = execute(a[1]->nnext); 2154 u = ((int)getfval(x)) << ((int)getfval(y)); 2155 tempfree(y); 2156 nextarg = nextarg->nnext; 2157 break; 2158 case FRSHIFT: 2159 if (nextarg == 0) { 2160 WARNING("rshift requires two arguments; returning 0"); 2161 u = 0; 2162 break; 2163 } 2164 y = execute(a[1]->nnext); 2165 u = ((int)getfval(x)) >> ((int)getfval(y)); 2166 tempfree(y); 2167 nextarg = nextarg->nnext; 2168 break; 2169 case FSYSTEM: 2170 fflush(stdout); /* in case something is buffered already */ 2171 status = system(getsval(x)); 2172 u = status; 2173 if (status != -1) { 2174 if (WIFEXITED(status)) { 2175 u = WEXITSTATUS(status); 2176 } else if (WIFSIGNALED(status)) { 2177 u = WTERMSIG(status) + 256; 2178 #ifdef WCOREDUMP 2179 if (WCOREDUMP(status)) 2180 u += 256; 2181 #endif 2182 } else /* something else?!? */ 2183 u = 0; 2184 } 2185 break; 2186 case FRAND: 2187 /* random() returns numbers in [0..2^31-1] 2188 * in order to get a number in [0, 1), divide it by 2^31 2189 */ 2190 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 2191 break; 2192 case FSRAND: 2193 if (isrec(x)) { /* no argument provided */ 2194 u = time(NULL); 2195 tmp = u; 2196 srandom((unsigned int) u); 2197 } else { 2198 u = getfval(x); 2199 tmp = u; 2200 srandom_deterministic((unsigned int) u); 2201 } 2202 u = srand_seed; 2203 srand_seed = tmp; 2204 break; 2205 case FTOUPPER: 2206 case FTOLOWER: 2207 if (t == FTOUPPER) 2208 buf = nawk_toupper(getsval(x)); 2209 else 2210 buf = nawk_tolower(getsval(x)); 2211 tempfree(x); 2212 x = gettemp(); 2213 setsval(x, buf); 2214 free(buf); 2215 return x; 2216 case FFLUSH: 2217 if (isrec(x) || strlen(getsval(x)) == 0) { 2218 flush_all(); /* fflush() or fflush("") -> all */ 2219 u = 0; 2220 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 2221 u = EOF; 2222 else 2223 u = fflush(fp); 2224 break; 2225 case FMKTIME: 2226 memset(&tmbuf, 0, sizeof(tmbuf)); 2227 tm = &tmbuf; 2228 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 2229 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 2230 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 2231 switch (t) { 2232 case 6: 2233 tm->tm_isdst = -1; /* let mktime figure it out */ 2234 /* FALLTHROUGH */ 2235 case 7: 2236 tm->tm_year -= 1900; 2237 tm->tm_mon--; 2238 u = mktime(tm); 2239 break; 2240 default: 2241 u = -1; 2242 break; 2243 } 2244 break; 2245 case FSYSTIME: 2246 u = time((time_t *) 0); 2247 break; 2248 case FSTRFTIME: 2249 /* strftime([format [,timestamp]]) */ 2250 if (nextarg) { 2251 y = execute(nextarg); 2252 nextarg = nextarg->nnext; 2253 tv = (time_t) getfval(y); 2254 tempfree(y); 2255 } else 2256 tv = time((time_t *) 0); 2257 tm = localtime(&tv); 2258 if (tm == NULL) 2259 FATAL("bad time %ld", (long)tv); 2260 2261 if (isrec(x)) { 2262 /* format argument not provided, use default */ 2263 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 2264 } else 2265 fmt = tostring(getsval(x)); 2266 2267 sz = 32; 2268 buf = NULL; 2269 do { 2270 if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL) 2271 FATAL("out of memory in strftime"); 2272 sz *= 2; 2273 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 2274 2275 y = gettemp(); 2276 setsval(y, buf); 2277 free(fmt); 2278 free(buf); 2279 2280 return y; 2281 default: /* can't happen */ 2282 FATAL("illegal function type %d", t); 2283 break; 2284 } 2285 tempfree(x); 2286 x = gettemp(); 2287 setfval(x, u); 2288 if (nextarg != NULL) { 2289 WARNING("warning: function has too many arguments"); 2290 for ( ; nextarg; nextarg = nextarg->nnext) { 2291 y = execute(nextarg); 2292 tempfree(y); 2293 } 2294 } 2295 return(x); 2296 } 2297 2298 Cell *printstat(Node **a, int n) /* print a[0] */ 2299 { 2300 Node *x; 2301 Cell *y; 2302 FILE *fp; 2303 2304 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 2305 fp = stdout; 2306 else 2307 fp = redirect(ptoi(a[1]), a[2]); 2308 for (x = a[0]; x != NULL; x = x->nnext) { 2309 y = execute(x); 2310 fputs(getpssval(y), fp); 2311 tempfree(y); 2312 if (x->nnext == NULL) 2313 fputs(getsval(orsloc), fp); 2314 else 2315 fputs(getsval(ofsloc), fp); 2316 } 2317 if (a[1] != NULL) 2318 fflush(fp); 2319 if (ferror(fp)) 2320 FATAL("write error on %s", filename(fp)); 2321 return(True); 2322 } 2323 2324 Cell *nullproc(Node **a, int n) 2325 { 2326 return 0; 2327 } 2328 2329 2330 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 2331 { 2332 FILE *fp; 2333 Cell *x; 2334 char *fname; 2335 2336 x = execute(b); 2337 fname = getsval(x); 2338 fp = openfile(a, fname, NULL); 2339 if (fp == NULL) 2340 FATAL("can't open file %s", fname); 2341 tempfree(x); 2342 return fp; 2343 } 2344 2345 struct files { 2346 FILE *fp; 2347 const char *fname; 2348 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 2349 } *files; 2350 2351 size_t nfiles; 2352 2353 static void stdinit(void) /* in case stdin, etc., are not constants */ 2354 { 2355 nfiles = FOPEN_MAX; 2356 files = (struct files *) calloc(nfiles, sizeof(*files)); 2357 if (files == NULL) 2358 FATAL("can't allocate file memory for %zu files", nfiles); 2359 files[0].fp = stdin; 2360 files[0].fname = tostring("/dev/stdin"); 2361 files[0].mode = LT; 2362 files[1].fp = stdout; 2363 files[1].fname = tostring("/dev/stdout"); 2364 files[1].mode = GT; 2365 files[2].fp = stderr; 2366 files[2].fname = tostring("/dev/stderr"); 2367 files[2].mode = GT; 2368 } 2369 2370 FILE *openfile(int a, const char *us, bool *pnewflag) 2371 { 2372 const char *s = us; 2373 size_t i; 2374 int m; 2375 FILE *fp = NULL; 2376 2377 if (*s == '\0') 2378 FATAL("null file name in print or getline"); 2379 for (i = 0; i < nfiles; i++) 2380 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 2381 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 2382 a == FFLUSH)) { 2383 if (pnewflag) 2384 *pnewflag = false; 2385 return files[i].fp; 2386 } 2387 if (a == FFLUSH) /* didn't find it, so don't create it! */ 2388 return NULL; 2389 2390 for (i = 0; i < nfiles; i++) 2391 if (files[i].fp == NULL) 2392 break; 2393 if (i >= nfiles) { 2394 struct files *nf; 2395 size_t nnf = nfiles + FOPEN_MAX; 2396 nf = (struct files *) reallocarray(files, nnf, sizeof(*nf)); 2397 if (nf == NULL) 2398 FATAL("cannot grow files for %s and %zu files", s, nnf); 2399 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 2400 nfiles = nnf; 2401 files = nf; 2402 } 2403 fflush(stdout); /* force a semblance of order */ 2404 m = a; 2405 if (a == GT) { 2406 fp = fopen(s, "w"); 2407 } else if (a == APPEND) { 2408 fp = fopen(s, "a"); 2409 m = GT; /* so can mix > and >> */ 2410 } else if (a == '|') { /* output pipe */ 2411 fp = popen(s, "w"); 2412 } else if (a == LE) { /* input pipe */ 2413 fp = popen(s, "r"); 2414 } else if (a == LT) { /* getline <file */ 2415 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 2416 } else /* can't happen */ 2417 FATAL("illegal redirection %d", a); 2418 if (fp != NULL) { 2419 files[i].fname = tostring(s); 2420 files[i].fp = fp; 2421 files[i].mode = m; 2422 if (pnewflag) 2423 *pnewflag = true; 2424 if (fp != stdin && fp != stdout && fp != stderr) 2425 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 2426 } 2427 return fp; 2428 } 2429 2430 const char *filename(FILE *fp) 2431 { 2432 size_t i; 2433 2434 for (i = 0; i < nfiles; i++) 2435 if (fp == files[i].fp) 2436 return files[i].fname; 2437 return "???"; 2438 } 2439 2440 Cell *closefile(Node **a, int n) 2441 { 2442 Cell *x; 2443 size_t i; 2444 bool stat; 2445 2446 x = execute(a[0]); 2447 getsval(x); 2448 stat = true; 2449 for (i = 0; i < nfiles; i++) { 2450 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2451 continue; 2452 if (files[i].mode == GT || files[i].mode == '|') 2453 fflush(files[i].fp); 2454 if (ferror(files[i].fp)) { 2455 if ((files[i].mode == GT && files[i].fp != stderr) 2456 || files[i].mode == '|') 2457 FATAL("write error on %s", files[i].fname); 2458 else 2459 WARNING("i/o error occurred on %s", files[i].fname); 2460 } 2461 if (files[i].fp == stdin || files[i].fp == stdout || 2462 files[i].fp == stderr) 2463 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2464 else if (files[i].mode == '|' || files[i].mode == LE) 2465 stat = pclose(files[i].fp) == -1; 2466 else 2467 stat = fclose(files[i].fp) == EOF; 2468 if (stat) 2469 WARNING("i/o error occurred closing %s", files[i].fname); 2470 xfree(files[i].fname); 2471 files[i].fname = NULL; /* watch out for ref thru this */ 2472 files[i].fp = NULL; 2473 break; 2474 } 2475 tempfree(x); 2476 x = gettemp(); 2477 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2478 return(x); 2479 } 2480 2481 void closeall(void) 2482 { 2483 size_t i; 2484 bool stat = false; 2485 2486 for (i = 0; i < nfiles; i++) { 2487 if (! files[i].fp) 2488 continue; 2489 if (files[i].mode == GT || files[i].mode == '|') 2490 fflush(files[i].fp); 2491 if (ferror(files[i].fp)) { 2492 if ((files[i].mode == GT && files[i].fp != stderr) 2493 || files[i].mode == '|') 2494 FATAL("write error on %s", files[i].fname); 2495 else 2496 WARNING("i/o error occurred on %s", files[i].fname); 2497 } 2498 if (files[i].fp == stdin || files[i].fp == stdout || 2499 files[i].fp == stderr) 2500 continue; 2501 if (files[i].mode == '|' || files[i].mode == LE) 2502 stat = pclose(files[i].fp) == -1; 2503 else 2504 stat = fclose(files[i].fp) == EOF; 2505 if (stat) 2506 WARNING("i/o error occurred while closing %s", files[i].fname); 2507 } 2508 } 2509 2510 static void flush_all(void) 2511 { 2512 size_t i; 2513 2514 for (i = 0; i < nfiles; i++) 2515 if (files[i].fp) 2516 fflush(files[i].fp); 2517 } 2518 2519 void backsub(char **pb_ptr, const char **sptr_ptr); 2520 2521 Cell *sub(Node **a, int nnn) /* substitute command */ 2522 { 2523 const char *sptr, *q; 2524 Cell *x, *y, *result; 2525 char *t, *buf, *pb; 2526 fa *pfa; 2527 int bufsz = recsize; 2528 2529 if ((buf = (char *) malloc(bufsz)) == NULL) 2530 FATAL("out of memory in sub"); 2531 x = execute(a[3]); /* target string */ 2532 t = getsval(x); 2533 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2534 pfa = (fa *) a[1]; /* regular expression */ 2535 else { 2536 y = execute(a[1]); 2537 pfa = makedfa(getsval(y), 1); 2538 tempfree(y); 2539 } 2540 y = execute(a[2]); /* replacement string */ 2541 result = False; 2542 if (pmatch(pfa, t)) { 2543 sptr = t; 2544 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2545 pb = buf; 2546 while (sptr < patbeg) 2547 *pb++ = *sptr++; 2548 sptr = getsval(y); 2549 while (*sptr != '\0') { 2550 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2551 if (*sptr == '\\') { 2552 backsub(&pb, &sptr); 2553 } else if (*sptr == '&') { 2554 sptr++; 2555 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2556 for (q = patbeg; q < patbeg+patlen; ) 2557 *pb++ = *q++; 2558 } else 2559 *pb++ = *sptr++; 2560 } 2561 *pb = '\0'; 2562 if (pb > buf + bufsz) 2563 FATAL("sub result1 %.30s too big; can't happen", buf); 2564 sptr = patbeg + patlen; 2565 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2566 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2567 while ((*pb++ = *sptr++) != '\0') 2568 continue; 2569 } 2570 if (pb > buf + bufsz) 2571 FATAL("sub result2 %.30s too big; can't happen", buf); 2572 setsval(x, buf); /* BUG: should be able to avoid copy */ 2573 result = True; 2574 } 2575 tempfree(x); 2576 tempfree(y); 2577 free(buf); 2578 return result; 2579 } 2580 2581 Cell *gsub(Node **a, int nnn) /* global substitute */ 2582 { 2583 Cell *x, *y; 2584 char *rptr, *pb; 2585 const char *q, *t, *sptr; 2586 char *buf; 2587 fa *pfa; 2588 int mflag, tempstat, num; 2589 int bufsz = recsize; 2590 int charlen = 0; 2591 2592 if ((buf = (char *) malloc(bufsz)) == NULL) 2593 FATAL("out of memory in gsub"); 2594 mflag = 0; /* if mflag == 0, can replace empty string */ 2595 num = 0; 2596 x = execute(a[3]); /* target string */ 2597 t = getsval(x); 2598 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2599 pfa = (fa *) a[1]; /* regular expression */ 2600 else { 2601 y = execute(a[1]); 2602 pfa = makedfa(getsval(y), 1); 2603 tempfree(y); 2604 } 2605 y = execute(a[2]); /* replacement string */ 2606 if (pmatch(pfa, t)) { 2607 tempstat = pfa->initstat; 2608 pfa->initstat = 2; 2609 pb = buf; 2610 rptr = getsval(y); 2611 do { 2612 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2613 if (mflag == 0) { /* can replace empty */ 2614 num++; 2615 sptr = rptr; 2616 while (*sptr != '\0') { 2617 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2618 if (*sptr == '\\') { 2619 backsub(&pb, &sptr); 2620 } else if (*sptr == '&') { 2621 sptr++; 2622 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2623 for (q = patbeg; q < patbeg+patlen; ) 2624 *pb++ = *q++; 2625 } else 2626 *pb++ = *sptr++; 2627 } 2628 } 2629 if (*t == '\0') /* at end */ 2630 goto done; 2631 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2632 charlen = u8_nextlen(t); 2633 while (charlen-- > 0) 2634 *pb++ = *t++; 2635 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2636 FATAL("gsub result0 %.30s too big; can't happen", buf); 2637 mflag = 0; 2638 } 2639 else { /* matched nonempty string */ 2640 num++; 2641 sptr = t; 2642 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2643 while (sptr < patbeg) 2644 *pb++ = *sptr++; 2645 sptr = rptr; 2646 while (*sptr != '\0') { 2647 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2648 if (*sptr == '\\') { 2649 backsub(&pb, &sptr); 2650 } else if (*sptr == '&') { 2651 sptr++; 2652 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2653 for (q = patbeg; q < patbeg+patlen; ) 2654 *pb++ = *q++; 2655 } else 2656 *pb++ = *sptr++; 2657 } 2658 t = patbeg + patlen; 2659 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2660 goto done; 2661 if (pb > buf + bufsz) 2662 FATAL("gsub result1 %.30s too big; can't happen", buf); 2663 mflag = 1; 2664 } 2665 } while (pmatch(pfa,t)); 2666 sptr = t; 2667 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2668 while ((*pb++ = *sptr++) != '\0') 2669 continue; 2670 done: if (pb < buf + bufsz) 2671 *pb = '\0'; 2672 else if (*(pb-1) != '\0') 2673 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2674 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2675 pfa->initstat = tempstat; 2676 } 2677 tempfree(x); 2678 tempfree(y); 2679 x = gettemp(); 2680 x->tval = NUM; 2681 x->fval = num; 2682 free(buf); 2683 return(x); 2684 } 2685 2686 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2687 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2688 { 2689 Cell *x, *y, *res, *h; 2690 char *rptr; 2691 const char *sptr; 2692 char *buf, *pb; 2693 const char *t, *q; 2694 fa *pfa; 2695 int mflag, tempstat, num, whichm; 2696 int bufsz = recsize; 2697 2698 if ((buf = malloc(bufsz)) == NULL) 2699 FATAL("out of memory in gensub"); 2700 mflag = 0; /* if mflag == 0, can replace empty string */ 2701 num = 0; 2702 x = execute(a[4]); /* source string */ 2703 t = getsval(x); 2704 res = copycell(x); /* target string - initially copy of source */ 2705 res->csub = CTEMP; /* result values are temporary */ 2706 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2707 pfa = (fa *) a[1]; /* regular expression */ 2708 else { 2709 y = execute(a[1]); 2710 pfa = makedfa(getsval(y), 1); 2711 tempfree(y); 2712 } 2713 y = execute(a[2]); /* replacement string */ 2714 h = execute(a[3]); /* which matches should be replaced */ 2715 sptr = getsval(h); 2716 if (sptr[0] == 'g' || sptr[0] == 'G') 2717 whichm = -1; 2718 else { 2719 /* 2720 * The specified number is index of replacement, starting 2721 * from 1. GNU awk treats index lower than 0 same as 2722 * 1, we do same for compatibility. 2723 */ 2724 whichm = (int) getfval(h) - 1; 2725 if (whichm < 0) 2726 whichm = 0; 2727 } 2728 tempfree(h); 2729 2730 if (pmatch(pfa, t)) { 2731 char *sl; 2732 2733 tempstat = pfa->initstat; 2734 pfa->initstat = 2; 2735 pb = buf; 2736 rptr = getsval(y); 2737 /* 2738 * XXX if there are any backreferences in subst string, 2739 * complain now. 2740 */ 2741 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2742 if (strchr("0123456789", sl[1])) { 2743 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2744 } 2745 } 2746 2747 do { 2748 if (whichm >= 0 && whichm != num) { 2749 num++; 2750 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2751 2752 /* copy the part of string up to and including 2753 * match to output buffer */ 2754 while (t < patbeg + patlen) 2755 *pb++ = *t++; 2756 continue; 2757 } 2758 2759 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2760 if (mflag == 0) { /* can replace empty */ 2761 num++; 2762 sptr = rptr; 2763 while (*sptr != 0) { 2764 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2765 if (*sptr == '\\') { 2766 backsub(&pb, &sptr); 2767 } else if (*sptr == '&') { 2768 sptr++; 2769 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2770 for (q = patbeg; q < patbeg+patlen; ) 2771 *pb++ = *q++; 2772 } else 2773 *pb++ = *sptr++; 2774 } 2775 } 2776 if (*t == 0) /* at end */ 2777 goto done; 2778 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2779 *pb++ = *t++; 2780 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2781 FATAL("gensub result0 %.30s too big; can't happen", buf); 2782 mflag = 0; 2783 } 2784 else { /* matched nonempty string */ 2785 num++; 2786 sptr = t; 2787 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2788 while (sptr < patbeg) 2789 *pb++ = *sptr++; 2790 sptr = rptr; 2791 while (*sptr != 0) { 2792 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2793 if (*sptr == '\\') { 2794 backsub(&pb, &sptr); 2795 } else if (*sptr == '&') { 2796 sptr++; 2797 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2798 for (q = patbeg; q < patbeg+patlen; ) 2799 *pb++ = *q++; 2800 } else 2801 *pb++ = *sptr++; 2802 } 2803 t = patbeg + patlen; 2804 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2805 goto done; 2806 if (pb > buf + bufsz) 2807 FATAL("gensub result1 %.30s too big; can't happen", buf); 2808 mflag = 1; 2809 } 2810 } while (pmatch(pfa,t)); 2811 sptr = t; 2812 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2813 while ((*pb++ = *sptr++) != 0) 2814 ; 2815 done: if (pb > buf + bufsz) 2816 FATAL("gensub result2 %.30s too big; can't happen", buf); 2817 *pb = '\0'; 2818 setsval(res, buf); 2819 pfa->initstat = tempstat; 2820 } 2821 tempfree(x); 2822 tempfree(y); 2823 free(buf); 2824 return(res); 2825 } 2826 2827 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2828 { /* sptr[0] == '\\' */ 2829 char *pb = *pb_ptr; 2830 const char *sptr = *sptr_ptr; 2831 2832 if (sptr[1] == '\\') { 2833 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2834 *pb++ = '\\'; 2835 *pb++ = '&'; 2836 sptr += 4; 2837 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2838 *pb++ = '\\'; 2839 sptr += 2; 2840 } else if (do_posix) { /* \\x -> \x */ 2841 sptr++; 2842 *pb++ = *sptr++; 2843 } else { /* \\x -> \\x */ 2844 *pb++ = *sptr++; 2845 *pb++ = *sptr++; 2846 } 2847 } else if (sptr[1] == '&') { /* literal & */ 2848 sptr++; 2849 *pb++ = *sptr++; 2850 } else /* literal \ */ 2851 *pb++ = *sptr++; 2852 2853 *pb_ptr = pb; 2854 *sptr_ptr = sptr; 2855 } 2856 2857 static char *wide_char_to_byte_str(int rune, size_t *outlen) 2858 { 2859 static char buf[5]; 2860 int len; 2861 2862 if (rune < 0 || rune > 0x10FFFF) 2863 return NULL; 2864 2865 memset(buf, 0, sizeof(buf)); 2866 2867 len = 0; 2868 if (rune <= 0x0000007F) { 2869 buf[len++] = rune; 2870 } else if (rune <= 0x000007FF) { 2871 // 110xxxxx 10xxxxxx 2872 buf[len++] = 0xC0 | (rune >> 6); 2873 buf[len++] = 0x80 | (rune & 0x3F); 2874 } else if (rune <= 0x0000FFFF) { 2875 // 1110xxxx 10xxxxxx 10xxxxxx 2876 buf[len++] = 0xE0 | (rune >> 12); 2877 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2878 buf[len++] = 0x80 | (rune & 0x3F); 2879 2880 } else { 2881 // 0x00010000 - 0x10FFFF 2882 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 2883 buf[len++] = 0xF0 | (rune >> 18); 2884 buf[len++] = 0x80 | ((rune >> 12) & 0x3F); 2885 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2886 buf[len++] = 0x80 | (rune & 0x3F); 2887 } 2888 2889 *outlen = len; 2890 buf[len++] = '\0'; 2891 2892 return buf; 2893 } 2894