1 /* $OpenBSD: run.c,v 1.84 2024/01/25 16:40:51 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 #define DEBUG 27 #include <stdio.h> 28 #include <ctype.h> 29 #include <errno.h> 30 #include <wctype.h> 31 #include <fcntl.h> 32 #include <setjmp.h> 33 #include <limits.h> 34 #include <math.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <time.h> 38 #include <sys/types.h> 39 #include <sys/wait.h> 40 #include "awk.h" 41 #include "awkgram.tab.h" 42 43 44 static void stdinit(void); 45 static void flush_all(void); 46 static char *wide_char_to_byte_str(int rune, size_t *outlen); 47 48 #if 1 49 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 50 #else 51 void tempfree(Cell *p) { 52 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 53 WARNING("bad csub %d in Cell %d %s", 54 p->csub, p->ctype, p->sval); 55 } 56 if (istemp(p)) 57 tfree(p); 58 } 59 #endif 60 61 /* do we really need these? */ 62 /* #ifdef _NFILE */ 63 /* #ifndef FOPEN_MAX */ 64 /* #define FOPEN_MAX _NFILE */ 65 /* #endif */ 66 /* #endif */ 67 /* */ 68 /* #ifndef FOPEN_MAX */ 69 /* #define FOPEN_MAX 40 */ /* max number of open files */ 70 /* #endif */ 71 /* */ 72 /* #ifndef RAND_MAX */ 73 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 74 /* #endif */ 75 76 jmp_buf env; 77 extern int pairstack[]; 78 extern Awkfloat srand_seed; 79 80 Node *winner = NULL; /* root of parse tree */ 81 Cell *tmps; /* free temporary cells for execution */ 82 83 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 84 Cell *True = &truecell; 85 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 86 Cell *False = &falsecell; 87 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 88 Cell *jbreak = &breakcell; 89 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 90 Cell *jcont = &contcell; 91 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 92 Cell *jnext = &nextcell; 93 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 94 Cell *jnextfile = &nextfilecell; 95 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 96 Cell *jexit = &exitcell; 97 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 98 Cell *jret = &retcell; 99 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 100 101 Node *curnode = NULL; /* the node being executed, for debugging */ 102 103 /* buffer memory management */ 104 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 105 const char *whatrtn) 106 /* pbuf: address of pointer to buffer being managed 107 * psiz: address of buffer size variable 108 * minlen: minimum length of buffer needed 109 * quantum: buffer size quantum 110 * pbptr: address of movable pointer into buffer, or 0 if none 111 * whatrtn: name of the calling routine if failure should cause fatal error 112 * 113 * return 0 for realloc failure, !=0 for success 114 */ 115 { 116 if (minlen > *psiz) { 117 char *tbuf; 118 int rminlen = quantum ? minlen % quantum : 0; 119 int boff = pbptr ? *pbptr - *pbuf : 0; 120 /* round up to next multiple of quantum */ 121 if (rminlen) 122 minlen += quantum - rminlen; 123 tbuf = (char *) realloc(*pbuf, minlen); 124 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 125 if (tbuf == NULL) { 126 if (whatrtn) 127 FATAL("out of memory in %s", whatrtn); 128 return 0; 129 } 130 *pbuf = tbuf; 131 *psiz = minlen; 132 if (pbptr) 133 *pbptr = tbuf + boff; 134 } 135 return 1; 136 } 137 138 void run(Node *a) /* execution of parse tree starts here */ 139 { 140 141 stdinit(); 142 execute(a); 143 closeall(); 144 } 145 146 Cell *execute(Node *u) /* execute a node of the parse tree */ 147 { 148 Cell *(*proc)(Node **, int); 149 Cell *x; 150 Node *a; 151 152 if (u == NULL) 153 return(True); 154 for (a = u; ; a = a->nnext) { 155 curnode = a; 156 if (isvalue(a)) { 157 x = (Cell *) (a->narg[0]); 158 if (isfld(x) && !donefld) 159 fldbld(); 160 else if (isrec(x) && !donerec) 161 recbld(); 162 return(x); 163 } 164 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 165 FATAL("illegal statement"); 166 proc = proctab[a->nobj-FIRSTTOKEN]; 167 x = (*proc)(a->narg, a->nobj); 168 if (isfld(x) && !donefld) 169 fldbld(); 170 else if (isrec(x) && !donerec) 171 recbld(); 172 if (isexpr(a)) 173 return(x); 174 if (isjump(x)) 175 return(x); 176 if (a->nnext == NULL) 177 return(x); 178 tempfree(x); 179 } 180 } 181 182 183 Cell *program(Node **a, int n) /* execute an awk program */ 184 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 185 Cell *x; 186 187 if (setjmp(env) != 0) 188 goto ex; 189 if (a[0]) { /* BEGIN */ 190 x = execute(a[0]); 191 if (isexit(x)) 192 return(True); 193 if (isjump(x)) 194 FATAL("illegal break, continue, next or nextfile from BEGIN"); 195 tempfree(x); 196 } 197 if (a[1] || a[2]) 198 while (getrec(&record, &recsize, true) > 0) { 199 x = execute(a[1]); 200 if (isexit(x)) 201 break; 202 tempfree(x); 203 } 204 ex: 205 if (setjmp(env) != 0) /* handles exit within END */ 206 goto ex1; 207 if (a[2]) { /* END */ 208 x = execute(a[2]); 209 if (isbreak(x) || isnext(x) || iscont(x)) 210 FATAL("illegal break, continue, next or nextfile from END"); 211 tempfree(x); 212 } 213 ex1: 214 return(True); 215 } 216 217 struct Frame { /* stack frame for awk function calls */ 218 int nargs; /* number of arguments in this call */ 219 Cell *fcncell; /* pointer to Cell for function */ 220 Cell **args; /* pointer to array of arguments after execute */ 221 Cell *retval; /* return value */ 222 }; 223 224 #define NARGS 50 /* max args in a call */ 225 226 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 227 int nframe = 0; /* number of frames allocated */ 228 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 229 230 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 231 { 232 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 233 int i, ncall, ndef; 234 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 235 Node *x; 236 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 237 Cell *y, *z, *fcn; 238 char *s; 239 240 fcn = execute(a[0]); /* the function itself */ 241 s = fcn->nval; 242 if (!isfcn(fcn)) 243 FATAL("calling undefined function %s", s); 244 if (frame == NULL) { 245 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 246 if (frame == NULL) 247 FATAL("out of space for stack frames calling %s", s); 248 } 249 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 250 ncall++; 251 ndef = (int) fcn->fval; /* args in defn */ 252 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 253 if (ncall > ndef) 254 WARNING("function %s called with %d args, uses only %d", 255 s, ncall, ndef); 256 if (ncall + ndef > NARGS) 257 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 258 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 259 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 260 y = execute(x); 261 oargs[i] = y; 262 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 263 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 264 if (isfcn(y)) 265 FATAL("can't use function %s as argument in %s", y->nval, s); 266 if (isarr(y)) 267 args[i] = y; /* arrays by ref */ 268 else 269 args[i] = copycell(y); 270 tempfree(y); 271 } 272 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 273 args[i] = gettemp(); 274 *args[i] = newcopycell; 275 } 276 frp++; /* now ok to up frame */ 277 if (frp >= frame + nframe) { 278 int dfp = frp - frame; /* old index */ 279 frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame)); 280 if (frame == NULL) 281 FATAL("out of space for stack frames in %s", s); 282 frp = frame + dfp; 283 } 284 frp->fcncell = fcn; 285 frp->args = args; 286 frp->nargs = ndef; /* number defined with (excess are locals) */ 287 frp->retval = gettemp(); 288 289 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 290 y = execute((Node *)(fcn->sval)); /* execute body */ 291 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 292 293 for (i = 0; i < ndef; i++) { 294 Cell *t = frp->args[i]; 295 if (isarr(t)) { 296 if (t->csub == CCOPY) { 297 if (i >= ncall) { 298 freesymtab(t); 299 t->csub = CTEMP; 300 tempfree(t); 301 } else { 302 oargs[i]->tval = t->tval; 303 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 304 oargs[i]->sval = t->sval; 305 tempfree(t); 306 } 307 } 308 } else if (t != y) { /* kludge to prevent freeing twice */ 309 t->csub = CTEMP; 310 tempfree(t); 311 } else if (t == y && t->csub == CCOPY) { 312 t->csub = CTEMP; 313 tempfree(t); 314 freed = 1; 315 } 316 } 317 tempfree(fcn); 318 if (isexit(y) || isnext(y)) 319 return y; 320 if (freed == 0) { 321 tempfree(y); /* don't free twice! */ 322 } 323 z = frp->retval; /* return value */ 324 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 325 frp--; 326 return(z); 327 } 328 329 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 330 { 331 Cell *y; 332 333 /* copy is not constant or field */ 334 335 y = gettemp(); 336 y->tval = x->tval & ~(CON|FLD|REC); 337 y->csub = CCOPY; /* prevents freeing until call is over */ 338 y->nval = x->nval; /* BUG? */ 339 if (isstr(x) /* || x->ctype == OCELL */) { 340 y->sval = tostring(x->sval); 341 y->tval &= ~DONTFREE; 342 } else 343 y->tval |= DONTFREE; 344 y->fval = x->fval; 345 return y; 346 } 347 348 Cell *arg(Node **a, int n) /* nth argument of a function */ 349 { 350 351 n = ptoi(a[0]); /* argument number, counting from 0 */ 352 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 353 if (n+1 > frp->nargs) 354 FATAL("argument #%d of function %s was not supplied", 355 n+1, frp->fcncell->nval); 356 return frp->args[n]; 357 } 358 359 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 360 { 361 Cell *y; 362 363 switch (n) { 364 case EXIT: 365 if (a[0] != NULL) { 366 y = execute(a[0]); 367 errorflag = (int) getfval(y); 368 tempfree(y); 369 } 370 longjmp(env, 1); 371 case RETURN: 372 if (a[0] != NULL) { 373 y = execute(a[0]); 374 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 375 setsval(frp->retval, getsval(y)); 376 frp->retval->fval = getfval(y); 377 frp->retval->tval |= NUM; 378 } 379 else if (y->tval & STR) 380 setsval(frp->retval, getsval(y)); 381 else if (y->tval & NUM) 382 setfval(frp->retval, getfval(y)); 383 else /* can't happen */ 384 FATAL("bad type variable %d", y->tval); 385 tempfree(y); 386 } 387 return(jret); 388 case NEXT: 389 return(jnext); 390 case NEXTFILE: 391 nextfile(); 392 return(jnextfile); 393 case BREAK: 394 return(jbreak); 395 case CONTINUE: 396 return(jcont); 397 default: /* can't happen */ 398 FATAL("illegal jump type %d", n); 399 } 400 return 0; /* not reached */ 401 } 402 403 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 404 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 405 Cell *r, *x; 406 extern Cell **fldtab; 407 FILE *fp; 408 char *buf; 409 int bufsize = recsize; 410 int mode; 411 bool newflag; 412 double result; 413 414 if ((buf = (char *) malloc(bufsize)) == NULL) 415 FATAL("out of memory in getline"); 416 417 fflush(stdout); /* in case someone is waiting for a prompt */ 418 r = gettemp(); 419 if (a[1] != NULL) { /* getline < file */ 420 x = execute(a[2]); /* filename */ 421 mode = ptoi(a[1]); 422 if (mode == '|') /* input pipe */ 423 mode = LE; /* arbitrary flag */ 424 fp = openfile(mode, getsval(x), &newflag); 425 tempfree(x); 426 if (fp == NULL) 427 n = -1; 428 else 429 n = readrec(&buf, &bufsize, fp, newflag); 430 if (n <= 0) { 431 ; 432 } else if (a[0] != NULL) { /* getline var <file */ 433 x = execute(a[0]); 434 setsval(x, buf); 435 if (is_number(x->sval, & result)) { 436 x->fval = result; 437 x->tval |= NUM; 438 } 439 tempfree(x); 440 } else { /* getline <file */ 441 setsval(fldtab[0], buf); 442 if (is_number(fldtab[0]->sval, & result)) { 443 fldtab[0]->fval = result; 444 fldtab[0]->tval |= NUM; 445 } 446 } 447 } else { /* bare getline; use current input */ 448 if (a[0] == NULL) /* getline */ 449 n = getrec(&record, &recsize, true); 450 else { /* getline var */ 451 n = getrec(&buf, &bufsize, false); 452 if (n > 0) { 453 x = execute(a[0]); 454 setsval(x, buf); 455 if (is_number(x->sval, & result)) { 456 x->fval = result; 457 x->tval |= NUM; 458 } 459 tempfree(x); 460 } 461 } 462 } 463 setfval(r, (Awkfloat) n); 464 free(buf); 465 return r; 466 } 467 468 Cell *getnf(Node **a, int n) /* get NF */ 469 { 470 if (!donefld) 471 fldbld(); 472 return (Cell *) a[0]; 473 } 474 475 static char * 476 makearraystring(Node *p, const char *func) 477 { 478 char *buf; 479 int bufsz = recsize; 480 size_t blen; 481 482 if ((buf = (char *) malloc(bufsz)) == NULL) { 483 FATAL("%s: out of memory", func); 484 } 485 486 blen = 0; 487 buf[blen] = '\0'; 488 489 for (; p; p = p->nnext) { 490 Cell *x = execute(p); /* expr */ 491 char *s = getsval(x); 492 size_t seplen = strlen(getsval(subseploc)); 493 size_t nsub = p->nnext ? seplen : 0; 494 size_t slen = strlen(s); 495 size_t tlen = blen + slen + nsub; 496 497 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 498 FATAL("%s: out of memory %s[%s...]", 499 func, x->nval, buf); 500 } 501 memcpy(buf + blen, s, slen); 502 if (nsub) { 503 memcpy(buf + blen + slen, *SUBSEP, nsub); 504 } 505 buf[tlen] = '\0'; 506 blen = tlen; 507 tempfree(x); 508 } 509 return buf; 510 } 511 512 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 513 { 514 Cell *x, *z; 515 char *buf; 516 517 x = execute(a[0]); /* Cell* for symbol table */ 518 buf = makearraystring(a[1], __func__); 519 if (!isarr(x)) { 520 DPRINTF("making %s into an array\n", NN(x->nval)); 521 if (freeable(x)) 522 xfree(x->sval); 523 x->tval &= ~(STR|NUM|DONTFREE); 524 x->tval |= ARR; 525 x->sval = (char *) makesymtab(NSYMTAB); 526 } 527 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 528 z->ctype = OCELL; 529 z->csub = CVAR; 530 tempfree(x); 531 free(buf); 532 return(z); 533 } 534 535 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 536 { 537 Cell *x; 538 539 x = execute(a[0]); /* Cell* for symbol table */ 540 if (x == symtabloc) { 541 FATAL("cannot delete SYMTAB or its elements"); 542 } 543 if (!isarr(x)) 544 return True; 545 if (a[1] == NULL) { /* delete the elements, not the table */ 546 freesymtab(x); 547 x->tval &= ~STR; 548 x->tval |= ARR; 549 x->sval = (char *) makesymtab(NSYMTAB); 550 } else { 551 char *buf = makearraystring(a[1], __func__); 552 freeelem(x, buf); 553 free(buf); 554 } 555 tempfree(x); 556 return True; 557 } 558 559 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 560 { 561 Cell *ap, *k; 562 char *buf; 563 564 ap = execute(a[1]); /* array name */ 565 if (!isarr(ap)) { 566 DPRINTF("making %s into an array\n", ap->nval); 567 if (freeable(ap)) 568 xfree(ap->sval); 569 ap->tval &= ~(STR|NUM|DONTFREE); 570 ap->tval |= ARR; 571 ap->sval = (char *) makesymtab(NSYMTAB); 572 } 573 buf = makearraystring(a[0], __func__); 574 k = lookup(buf, (Array *) ap->sval); 575 tempfree(ap); 576 free(buf); 577 if (k == NULL) 578 return(False); 579 else 580 return(True); 581 } 582 583 584 /* ======== utf-8 code ========== */ 585 586 /* 587 * Awk strings can contain ascii, random 8-bit items (eg Latin-1), 588 * or utf-8. u8_isutf tests whether a string starts with a valid 589 * utf-8 sequence, and returns 0 if not (e.g., high bit set). 590 * u8_nextlen returns length of next valid sequence, which is 591 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. 592 * u8_strlen returns length of string in valid utf-8 sequences 593 * and/or high-bit bytes. Conversion functions go between byte 594 * number and character number. 595 * 596 * In theory, this behaves the same as before for non-utf8 bytes. 597 * 598 * Limited checking! This is a potential security hole. 599 */ 600 601 /* is s the beginning of a valid utf-8 string? */ 602 /* return length 1..4 if yes, 0 if no */ 603 int u8_isutf(const char *s) 604 { 605 int n, ret; 606 unsigned char c; 607 608 c = s[0]; 609 if (c < 128 || awk_mb_cur_max == 1) 610 return 1; /* what if it's 0? */ 611 612 n = strlen(s); 613 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 614 ret = 2; /* 110xxxxx 10xxxxxx */ 615 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 616 && (s[2] & 0xC0) == 0x80) { 617 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ 618 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 619 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 620 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 621 } else { 622 ret = 0; 623 } 624 return ret; 625 } 626 627 /* Convert (prefix of) utf8 string to utf-32 rune. */ 628 /* Sets *rune to the value, returns the length. */ 629 /* No error checking: watch out. */ 630 int u8_rune(int *rune, const char *s) 631 { 632 int n, ret; 633 unsigned char c; 634 635 c = s[0]; 636 if (c < 128 || awk_mb_cur_max == 1) { 637 *rune = c; 638 return 1; 639 } 640 641 n = strlen(s); 642 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 643 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ 644 ret = 2; 645 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 646 && (s[2] & 0xC0) == 0x80) { 647 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); 648 /* 1110xxxx 10xxxxxx 10xxxxxx */ 649 ret = 3; 650 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 651 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 652 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); 653 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 654 ret = 4; 655 } else { 656 *rune = c; 657 ret = 1; 658 } 659 return ret; /* returns one byte if sequence doesn't look like utf */ 660 } 661 662 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ 663 int u8_nextlen(const char *s) 664 { 665 int len; 666 667 len = u8_isutf(s); 668 if (len == 0) 669 len = 1; 670 return len; 671 } 672 673 /* return number of utf characters or single non-utf bytes */ 674 int u8_strlen(const char *s) 675 { 676 int i, len, n, totlen; 677 unsigned char c; 678 679 n = strlen(s); 680 totlen = 0; 681 for (i = 0; i < n; i += len) { 682 c = s[i]; 683 if (c < 128 || awk_mb_cur_max == 1) { 684 len = 1; 685 } else { 686 len = u8_nextlen(&s[i]); 687 } 688 totlen++; 689 if (i > n) 690 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); 691 } 692 return totlen; 693 } 694 695 /* convert utf-8 char number in a string to its byte offset */ 696 int u8_char2byte(const char *s, int charnum) 697 { 698 int n; 699 int bytenum = 0; 700 701 while (charnum > 0) { 702 n = u8_nextlen(s); 703 s += n; 704 bytenum += n; 705 charnum--; 706 } 707 return bytenum; 708 } 709 710 /* convert byte offset in s to utf-8 char number that starts there */ 711 int u8_byte2char(const char *s, int bytenum) 712 { 713 int i, len, b; 714 int charnum = 0; /* BUG: what origin? */ 715 /* should be 0 to match start==0 which means no match */ 716 717 b = strlen(s); 718 if (bytenum > b) { 719 return -1; /* ??? */ 720 } 721 for (i = 0; i <= bytenum; i += len) { 722 len = u8_nextlen(s+i); 723 charnum++; 724 } 725 return charnum; 726 } 727 728 /* runetochar() adapted from rune.c in the Plan 9 distributione */ 729 730 enum 731 { 732 Runeerror = 128, /* from somewhere else */ 733 Runemax = 0x10FFFF, 734 735 Bit1 = 7, 736 Bitx = 6, 737 Bit2 = 5, 738 Bit3 = 4, 739 Bit4 = 3, 740 Bit5 = 2, 741 742 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 743 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 744 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 745 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 746 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 747 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 748 749 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 750 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 751 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 752 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ 753 754 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 755 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 756 757 }; 758 759 int runetochar(char *str, int c) 760 { 761 /* one character sequence 00000-0007F => 00-7F */ 762 if (c <= Rune1) { 763 str[0] = c; 764 return 1; 765 } 766 767 /* two character sequence 00080-007FF => T2 Tx */ 768 if (c <= Rune2) { 769 str[0] = T2 | (c >> 1*Bitx); 770 str[1] = Tx | (c & Maskx); 771 return 2; 772 } 773 774 /* three character sequence 00800-0FFFF => T3 Tx Tx */ 775 if (c > Runemax) 776 c = Runeerror; 777 if (c <= Rune3) { 778 str[0] = T3 | (c >> 2*Bitx); 779 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 780 str[2] = Tx | (c & Maskx); 781 return 3; 782 } 783 784 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ 785 str[0] = T4 | (c >> 3*Bitx); 786 str[1] = Tx | ((c >> 2*Bitx) & Maskx); 787 str[2] = Tx | ((c >> 1*Bitx) & Maskx); 788 str[3] = Tx | (c & Maskx); 789 return 4; 790 } 791 792 793 /* ========== end of utf8 code =========== */ 794 795 796 797 Cell *matchop(Node **a, int n) /* ~ and match() */ 798 { 799 Cell *x, *y, *z; 800 char *s, *t; 801 int i; 802 int cstart, cpatlen, len; 803 fa *pfa; 804 int (*mf)(fa *, const char *) = match, mode = 0; 805 806 if (n == MATCHFCN) { 807 mf = pmatch; 808 mode = 1; 809 } 810 x = execute(a[1]); /* a[1] = target text */ 811 s = getsval(x); 812 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 813 i = (*mf)((fa *) a[2], s); 814 else { 815 y = execute(a[2]); /* a[2] = regular expr */ 816 t = getsval(y); 817 pfa = makedfa(t, mode); 818 i = (*mf)(pfa, s); 819 tempfree(y); 820 } 821 z = x; 822 if (n == MATCHFCN) { 823 int start = patbeg - s + 1; /* origin 1 */ 824 if (patlen < 0) { 825 start = 0; /* not found */ 826 } else { 827 cstart = u8_byte2char(s, start-1); 828 cpatlen = 0; 829 for (i = 0; i < patlen; i += len) { 830 len = u8_nextlen(patbeg+i); 831 cpatlen++; 832 } 833 834 start = cstart; 835 patlen = cpatlen; 836 } 837 838 setfval(rstartloc, (Awkfloat) start); 839 setfval(rlengthloc, (Awkfloat) patlen); 840 x = gettemp(); 841 x->tval = NUM; 842 x->fval = start; 843 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 844 x = True; 845 else 846 x = False; 847 848 tempfree(z); 849 return x; 850 } 851 852 853 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 854 { 855 Cell *x, *y; 856 int i; 857 858 x = execute(a[0]); 859 i = istrue(x); 860 tempfree(x); 861 switch (n) { 862 case BOR: 863 if (i) return(True); 864 y = execute(a[1]); 865 i = istrue(y); 866 tempfree(y); 867 if (i) return(True); 868 else return(False); 869 case AND: 870 if ( !i ) return(False); 871 y = execute(a[1]); 872 i = istrue(y); 873 tempfree(y); 874 if (i) return(True); 875 else return(False); 876 case NOT: 877 if (i) return(False); 878 else return(True); 879 default: /* can't happen */ 880 FATAL("unknown boolean operator %d", n); 881 } 882 return 0; /*NOTREACHED*/ 883 } 884 885 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 886 { 887 int i; 888 Cell *x, *y; 889 Awkfloat j; 890 bool x_is_nan, y_is_nan; 891 892 x = execute(a[0]); 893 y = execute(a[1]); 894 x_is_nan = isnan(x->fval); 895 y_is_nan = isnan(y->fval); 896 if (x->tval&NUM && y->tval&NUM) { 897 if ((x_is_nan || y_is_nan) && n != NE) 898 return(False); 899 j = x->fval - y->fval; 900 i = j<0? -1: (j>0? 1: 0); 901 } else { 902 i = strcmp(getsval(x), getsval(y)); 903 } 904 tempfree(x); 905 tempfree(y); 906 switch (n) { 907 case LT: if (i<0) return(True); 908 else return(False); 909 case LE: if (i<=0) return(True); 910 else return(False); 911 case NE: if (x_is_nan && y_is_nan) return(True); 912 else if (i!=0) return(True); 913 else return(False); 914 case EQ: if (i == 0) return(True); 915 else return(False); 916 case GE: if (i>=0) return(True); 917 else return(False); 918 case GT: if (i>0) return(True); 919 else return(False); 920 default: /* can't happen */ 921 FATAL("unknown relational operator %d", n); 922 } 923 return 0; /*NOTREACHED*/ 924 } 925 926 void tfree(Cell *a) /* free a tempcell */ 927 { 928 if (freeable(a)) { 929 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 930 xfree(a->sval); 931 } 932 if (a == tmps) 933 FATAL("tempcell list is curdled"); 934 a->cnext = tmps; 935 tmps = a; 936 } 937 938 Cell *gettemp(void) /* get a tempcell */ 939 { int i; 940 Cell *x; 941 942 if (!tmps) { 943 tmps = (Cell *) calloc(100, sizeof(*tmps)); 944 if (!tmps) 945 FATAL("out of space for temporaries"); 946 for (i = 1; i < 100; i++) 947 tmps[i-1].cnext = &tmps[i]; 948 tmps[i-1].cnext = NULL; 949 } 950 x = tmps; 951 tmps = x->cnext; 952 *x = tempcell; 953 return(x); 954 } 955 956 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 957 { 958 Awkfloat val; 959 Cell *x; 960 int m; 961 char *s; 962 963 x = execute(a[0]); 964 val = getfval(x); /* freebsd: defend against super large field numbers */ 965 if ((Awkfloat)INT_MAX < val) 966 FATAL("trying to access out of range field %s", x->nval); 967 m = (int) val; 968 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 969 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 970 /* BUG: can x->nval ever be null??? */ 971 tempfree(x); 972 x = fieldadr(m); 973 x->ctype = OCELL; /* BUG? why are these needed? */ 974 x->csub = CFLD; 975 return(x); 976 } 977 978 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 979 { 980 int k, m, n; 981 int mb, nb; 982 char *s; 983 int temp; 984 Cell *x, *y, *z = NULL; 985 986 x = execute(a[0]); 987 y = execute(a[1]); 988 if (a[2] != NULL) 989 z = execute(a[2]); 990 s = getsval(x); 991 k = u8_strlen(s) + 1; 992 if (k <= 1) { 993 tempfree(x); 994 tempfree(y); 995 if (a[2] != NULL) { 996 tempfree(z); 997 } 998 x = gettemp(); 999 setsval(x, ""); 1000 return(x); 1001 } 1002 m = (int) getfval(y); 1003 if (m <= 0) 1004 m = 1; 1005 else if (m > k) 1006 m = k; 1007 tempfree(y); 1008 if (a[2] != NULL) { 1009 n = (int) getfval(z); 1010 tempfree(z); 1011 } else 1012 n = k - 1; 1013 if (n < 0) 1014 n = 0; 1015 else if (n > k - m) 1016 n = k - m; 1017 /* m is start, n is length from there */ 1018 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 1019 y = gettemp(); 1020 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ 1021 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ 1022 1023 temp = s[nb]; /* with thanks to John Linderman */ 1024 s[nb] = '\0'; 1025 setsval(y, s + mb); 1026 s[nb] = temp; 1027 tempfree(x); 1028 return(y); 1029 } 1030 1031 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 1032 { 1033 Cell *x, *y, *z; 1034 char *s1, *s2, *p1, *p2, *q; 1035 Awkfloat v = 0.0; 1036 1037 x = execute(a[0]); 1038 s1 = getsval(x); 1039 y = execute(a[1]); 1040 s2 = getsval(y); 1041 1042 z = gettemp(); 1043 for (p1 = s1; *p1 != '\0'; p1++) { 1044 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 1045 continue; 1046 if (*p2 == '\0') { 1047 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ 1048 1049 /* should be a function: used in match() as well */ 1050 int i, len; 1051 v = 0; 1052 for (i = 0; i < p1-s1+1; i += len) { 1053 len = u8_nextlen(s1+i); 1054 v++; 1055 } 1056 break; 1057 } 1058 } 1059 tempfree(x); 1060 tempfree(y); 1061 setfval(z, v); 1062 return(z); 1063 } 1064 1065 int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ 1066 { 1067 int n; 1068 1069 for (n = 0; *s != 0; s += n) { 1070 n = u8_nextlen(s); 1071 if (n > 1) 1072 return 1; 1073 } 1074 return 0; 1075 } 1076 1077 #define MAXNUMSIZE 50 1078 1079 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 1080 { 1081 char *fmt; 1082 char *p, *t; 1083 const char *os; 1084 Cell *x; 1085 int flag = 0, n; 1086 int fmtwd; /* format width */ 1087 int fmtsz = recsize; 1088 char *buf = *pbuf; 1089 int bufsize = *pbufsize; 1090 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 1091 #define BUFSZ(a) (bufsize - ((a) - buf)) 1092 1093 static bool first = true; 1094 static bool have_a_format = false; 1095 1096 if (first) { 1097 char xbuf[100]; 1098 1099 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 1100 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 1101 first = false; 1102 } 1103 1104 os = s; 1105 p = buf; 1106 if ((fmt = (char *) malloc(fmtsz)) == NULL) 1107 FATAL("out of memory in format()"); 1108 while (*s) { 1109 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 1110 if (*s != '%') { 1111 *p++ = *s++; 1112 continue; 1113 } 1114 if (*(s+1) == '%') { 1115 *p++ = '%'; 1116 s += 2; 1117 continue; 1118 } 1119 fmtwd = atoi(s+1); 1120 if (fmtwd < 0) 1121 fmtwd = -fmtwd; 1122 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 1123 for (t = fmt; (*t++ = *s) != '\0'; s++) { 1124 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 1125 FATAL("format item %.30s... ran format() out of memory", os); 1126 /* Ignore size specifiers */ 1127 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 1128 t--; 1129 continue; 1130 } 1131 if (isalpha((uschar)*s)) 1132 break; 1133 if (*s == '$') { 1134 FATAL("'$' not permitted in awk formats"); 1135 } 1136 if (*s == '*') { 1137 if (a == NULL) { 1138 FATAL("not enough args in printf(%s)", os); 1139 } 1140 x = execute(a); 1141 a = a->nnext; 1142 snprintf(t - 1, FMTSZ(t - 1), 1143 "%d", fmtwd=(int) getfval(x)); 1144 if (fmtwd < 0) 1145 fmtwd = -fmtwd; 1146 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 1147 t = fmt + strlen(fmt); 1148 tempfree(x); 1149 } 1150 } 1151 *t = '\0'; 1152 if (fmtwd < 0) 1153 fmtwd = -fmtwd; 1154 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 1155 switch (*s) { 1156 case 'a': case 'A': 1157 if (have_a_format) 1158 flag = *s; 1159 else 1160 flag = 'f'; 1161 break; 1162 case 'f': case 'e': case 'g': case 'E': case 'G': 1163 flag = 'f'; 1164 break; 1165 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 1166 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 1167 *(t-1) = 'j'; 1168 *t = *s; 1169 *++t = '\0'; 1170 break; 1171 case 's': 1172 flag = 's'; 1173 break; 1174 case 'c': 1175 flag = 'c'; 1176 break; 1177 default: 1178 WARNING("weird printf conversion %s", fmt); 1179 flag = '?'; 1180 break; 1181 } 1182 if (a == NULL) 1183 FATAL("not enough args in printf(%s)", os); 1184 x = execute(a); 1185 a = a->nnext; 1186 n = MAXNUMSIZE; 1187 if (fmtwd > n) 1188 n = fmtwd; 1189 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 1190 switch (flag) { 1191 case '?': 1192 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 1193 t = getsval(x); 1194 n = strlen(t); 1195 if (fmtwd > n) 1196 n = fmtwd; 1197 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 1198 p += strlen(p); 1199 snprintf(p, BUFSZ(p), "%s", t); 1200 break; 1201 case 'a': 1202 case 'A': 1203 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 1204 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 1205 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 1206 1207 case 's': { 1208 t = getsval(x); 1209 n = strlen(t); 1210 /* if simple format or no utf-8 in the string, sprintf works */ 1211 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { 1212 if (fmtwd > n) 1213 n = fmtwd; 1214 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 1215 FATAL("huge string/format (%d chars) in printf %.30s..." \ 1216 " ran format() out of memory", n, t); 1217 snprintf(p, BUFSZ(p), fmt, t); 1218 break; 1219 } 1220 1221 /* get here if string has utf-8 chars and fmt is not plain %s */ 1222 /* "%-w.ps", where -, w and .p are all optional */ 1223 /* '0' before the w is a flag character */ 1224 /* fmt points at % */ 1225 int ljust = 0, wid = 0, prec = n, pad = 0; 1226 char *f = fmt+1; 1227 if (f[0] == '-') { 1228 ljust = 1; 1229 f++; 1230 } 1231 // flags '0' and '+' are recognized but skipped 1232 if (f[0] == '0') { 1233 f++; 1234 if (f[0] == '+') 1235 f++; 1236 } 1237 if (f[0] == '+') { 1238 f++; 1239 if (f[0] == '0') 1240 f++; 1241 } 1242 if (isdigit((uschar)f[0])) { /* there is a wid */ 1243 wid = strtol(f, &f, 10); 1244 } 1245 if (f[0] == '.') { /* there is a .prec */ 1246 prec = strtol(++f, &f, 10); 1247 } 1248 if (prec > u8_strlen(t)) 1249 prec = u8_strlen(t); 1250 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1251 int i, k, n; 1252 1253 if (ljust) { // print prec chars from t, then pad blanks 1254 n = u8_char2byte(t, prec); 1255 for (k = 0; k < n; k++) { 1256 //putchar(t[k]); 1257 *p++ = t[k]; 1258 } 1259 for (i = 0; i < pad; i++) { 1260 //printf(" "); 1261 *p++ = ' '; 1262 } 1263 } else { // print pad blanks, then prec chars from t 1264 for (i = 0; i < pad; i++) { 1265 //printf(" "); 1266 *p++ = ' '; 1267 } 1268 n = u8_char2byte(t, prec); 1269 for (k = 0; k < n; k++) { 1270 //putchar(t[k]); 1271 *p++ = t[k]; 1272 } 1273 } 1274 *p = 0; 1275 break; 1276 } 1277 1278 case 'c': { 1279 /* 1280 * If a numeric value is given, awk should just turn 1281 * it into a character and print it: 1282 * BEGIN { printf("%c\n", 65) } 1283 * prints "A". 1284 * 1285 * But what if the numeric value is > 128 and 1286 * represents a valid Unicode code point?!? We do 1287 * our best to convert it back into UTF-8. If we 1288 * can't, we output the encoding of the Unicode 1289 * "invalid character", 0xFFFD. 1290 */ 1291 if (isnum(x)) { 1292 int charval = (int) getfval(x); 1293 1294 if (charval != 0) { 1295 if (charval < 128 || awk_mb_cur_max == 1) 1296 snprintf(p, BUFSZ(p), fmt, charval); 1297 else { 1298 // possible unicode character 1299 size_t count; 1300 char *bs = wide_char_to_byte_str(charval, &count); 1301 1302 if (bs == NULL) { // invalid character 1303 // use unicode invalid character, 0xFFFD 1304 static char invalid_char[] = "\357\277\275"; 1305 bs = invalid_char; 1306 count = 3; 1307 } 1308 t = bs; 1309 n = count; 1310 goto format_percent_c; 1311 } 1312 } else { 1313 *p++ = '\0'; /* explicit null byte */ 1314 *p = '\0'; /* next output will start here */ 1315 } 1316 break; 1317 } 1318 t = getsval(x); 1319 n = u8_nextlen(t); 1320 format_percent_c: 1321 if (n < 2) { /* not utf8 */ 1322 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 1323 break; 1324 } 1325 1326 // utf8 character, almost same song and dance as for %s 1327 int ljust = 0, wid = 0, prec = n, pad = 0; 1328 char *f = fmt+1; 1329 if (f[0] == '-') { 1330 ljust = 1; 1331 f++; 1332 } 1333 // flags '0' and '+' are recognized but skipped 1334 if (f[0] == '0') { 1335 f++; 1336 if (f[0] == '+') 1337 f++; 1338 } 1339 if (f[0] == '+') { 1340 f++; 1341 if (f[0] == '0') 1342 f++; 1343 } 1344 if (isdigit((uschar)f[0])) { /* there is a wid */ 1345 wid = strtol(f, &f, 10); 1346 } 1347 if (f[0] == '.') { /* there is a .prec */ 1348 prec = strtol(++f, &f, 10); 1349 } 1350 if (prec > 1) // %c --> only one character 1351 prec = 1; 1352 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1353 int i; 1354 1355 if (ljust) { // print one char from t, then pad blanks 1356 for (i = 0; i < n; i++) 1357 *p++ = t[i]; 1358 for (i = 0; i < pad; i++) { 1359 //printf(" "); 1360 *p++ = ' '; 1361 } 1362 } else { // print pad blanks, then prec chars from t 1363 for (i = 0; i < pad; i++) { 1364 //printf(" "); 1365 *p++ = ' '; 1366 } 1367 for (i = 0; i < n; i++) 1368 *p++ = t[i]; 1369 } 1370 *p = 0; 1371 break; 1372 } 1373 default: 1374 FATAL("can't happen: bad conversion %c in format()", flag); 1375 } 1376 1377 tempfree(x); 1378 p += strlen(p); 1379 s++; 1380 } 1381 *p = '\0'; 1382 free(fmt); 1383 for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 1384 x = execute(a); 1385 tempfree(x); 1386 } 1387 *pbuf = buf; 1388 *pbufsize = bufsize; 1389 return p - buf; 1390 } 1391 1392 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 1393 { 1394 Cell *x; 1395 Node *y; 1396 char *buf; 1397 int bufsz=3*recsize; 1398 1399 if ((buf = (char *) malloc(bufsz)) == NULL) 1400 FATAL("out of memory in awksprintf"); 1401 y = a[0]->nnext; 1402 x = execute(a[0]); 1403 if (format(&buf, &bufsz, getsval(x), y) == -1) 1404 FATAL("sprintf string %.30s... too long. can't happen.", buf); 1405 tempfree(x); 1406 x = gettemp(); 1407 x->sval = buf; 1408 x->tval = STR; 1409 return(x); 1410 } 1411 1412 Cell *awkprintf(Node **a, int n) /* printf */ 1413 { /* a[0] is list of args, starting with format string */ 1414 /* a[1] is redirection operator, a[2] is redirection file */ 1415 FILE *fp; 1416 Cell *x; 1417 Node *y; 1418 char *buf; 1419 int len; 1420 int bufsz=3*recsize; 1421 1422 if ((buf = (char *) malloc(bufsz)) == NULL) 1423 FATAL("out of memory in awkprintf"); 1424 y = a[0]->nnext; 1425 x = execute(a[0]); 1426 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1427 FATAL("printf string %.30s... too long. can't happen.", buf); 1428 tempfree(x); 1429 if (a[1] == NULL) { 1430 /* fputs(buf, stdout); */ 1431 fwrite(buf, len, 1, stdout); 1432 if (ferror(stdout)) 1433 FATAL("write error on stdout"); 1434 } else { 1435 fp = redirect(ptoi(a[1]), a[2]); 1436 /* fputs(buf, fp); */ 1437 fwrite(buf, len, 1, fp); 1438 fflush(fp); 1439 if (ferror(fp)) 1440 FATAL("write error on %s", filename(fp)); 1441 } 1442 free(buf); 1443 return(True); 1444 } 1445 1446 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1447 { 1448 Awkfloat i, j = 0; 1449 double v; 1450 Cell *x, *y, *z; 1451 1452 x = execute(a[0]); 1453 i = getfval(x); 1454 tempfree(x); 1455 if (n != UMINUS && n != UPLUS) { 1456 y = execute(a[1]); 1457 j = getfval(y); 1458 tempfree(y); 1459 } 1460 z = gettemp(); 1461 switch (n) { 1462 case ADD: 1463 i += j; 1464 break; 1465 case MINUS: 1466 i -= j; 1467 break; 1468 case MULT: 1469 i *= j; 1470 break; 1471 case DIVIDE: 1472 if (j == 0) 1473 FATAL("division by zero"); 1474 i /= j; 1475 break; 1476 case MOD: 1477 if (j == 0) 1478 FATAL("division by zero in mod"); 1479 modf(i/j, &v); 1480 i = i - j * v; 1481 break; 1482 case UMINUS: 1483 i = -i; 1484 break; 1485 case UPLUS: /* handled by getfval(), above */ 1486 break; 1487 case POWER: 1488 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1489 i = ipow(i, (int) j); 1490 else { 1491 errno = 0; 1492 i = errcheck(pow(i, j), "pow"); 1493 } 1494 break; 1495 default: /* can't happen */ 1496 FATAL("illegal arithmetic operator %d", n); 1497 } 1498 setfval(z, i); 1499 return(z); 1500 } 1501 1502 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1503 { 1504 double v; 1505 1506 if (n <= 0) 1507 return 1; 1508 v = ipow(x, n/2); 1509 if (n % 2 == 0) 1510 return v * v; 1511 else 1512 return x * v * v; 1513 } 1514 1515 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1516 { 1517 Cell *x, *z; 1518 int k; 1519 Awkfloat xf; 1520 1521 x = execute(a[0]); 1522 xf = getfval(x); 1523 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1524 if (n == PREINCR || n == PREDECR) { 1525 setfval(x, xf + k); 1526 return(x); 1527 } 1528 z = gettemp(); 1529 setfval(z, xf); 1530 setfval(x, xf + k); 1531 tempfree(x); 1532 return(z); 1533 } 1534 1535 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1536 { /* this is subtle; don't muck with it. */ 1537 Cell *x, *y; 1538 Awkfloat xf, yf; 1539 double v; 1540 1541 y = execute(a[1]); 1542 x = execute(a[0]); 1543 if (n == ASSIGN) { /* ordinary assignment */ 1544 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1545 ; /* self-assignment: leave alone unless it's a field or NF */ 1546 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1547 yf = getfval(y); 1548 setsval(x, getsval(y)); 1549 x->fval = yf; 1550 x->tval |= NUM; 1551 } 1552 else if (isstr(y)) 1553 setsval(x, getsval(y)); 1554 else if (isnum(y)) 1555 setfval(x, getfval(y)); 1556 else 1557 funnyvar(y, "read value of"); 1558 tempfree(y); 1559 return(x); 1560 } 1561 xf = getfval(x); 1562 yf = getfval(y); 1563 switch (n) { 1564 case ADDEQ: 1565 xf += yf; 1566 break; 1567 case SUBEQ: 1568 xf -= yf; 1569 break; 1570 case MULTEQ: 1571 xf *= yf; 1572 break; 1573 case DIVEQ: 1574 if (yf == 0) 1575 FATAL("division by zero in /="); 1576 xf /= yf; 1577 break; 1578 case MODEQ: 1579 if (yf == 0) 1580 FATAL("division by zero in %%="); 1581 modf(xf/yf, &v); 1582 xf = xf - yf * v; 1583 break; 1584 case POWEQ: 1585 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1586 xf = ipow(xf, (int) yf); 1587 else { 1588 errno = 0; 1589 xf = errcheck(pow(xf, yf), "pow"); 1590 } 1591 break; 1592 default: 1593 FATAL("illegal assignment operator %d", n); 1594 break; 1595 } 1596 tempfree(y); 1597 setfval(x, xf); 1598 return(x); 1599 } 1600 1601 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1602 { 1603 Cell *x, *y, *z; 1604 int n1, n2; 1605 char *s = NULL; 1606 int ssz = 0; 1607 1608 x = execute(a[0]); 1609 n1 = strlen(getsval(x)); 1610 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1611 memcpy(s, x->sval, n1); 1612 1613 tempfree(x); 1614 1615 y = execute(a[1]); 1616 n2 = strlen(getsval(y)); 1617 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1618 memcpy(s + n1, y->sval, n2); 1619 s[n1 + n2] = '\0'; 1620 1621 tempfree(y); 1622 1623 z = gettemp(); 1624 z->sval = s; 1625 z->tval = STR; 1626 1627 return(z); 1628 } 1629 1630 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1631 { 1632 Cell *x; 1633 1634 if (a[0] == NULL) 1635 x = execute(a[1]); 1636 else { 1637 x = execute(a[0]); 1638 if (istrue(x)) { 1639 tempfree(x); 1640 x = execute(a[1]); 1641 } 1642 } 1643 return x; 1644 } 1645 1646 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1647 { 1648 Cell *x; 1649 int pair; 1650 1651 pair = ptoi(a[3]); 1652 if (pairstack[pair] == 0) { 1653 x = execute(a[0]); 1654 if (istrue(x)) 1655 pairstack[pair] = 1; 1656 tempfree(x); 1657 } 1658 if (pairstack[pair] == 1) { 1659 x = execute(a[1]); 1660 if (istrue(x)) 1661 pairstack[pair] = 0; 1662 tempfree(x); 1663 x = execute(a[2]); 1664 return(x); 1665 } 1666 return(False); 1667 } 1668 1669 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1670 { 1671 Cell *x = NULL, *y, *ap; 1672 const char *s, *origs, *t; 1673 const char *fs = NULL; 1674 char *origfs = NULL; 1675 int sep; 1676 char temp, num[50]; 1677 int j, n, tempstat, arg3type; 1678 double result; 1679 1680 y = execute(a[0]); /* source string */ 1681 origs = s = strdup(getsval(y)); 1682 if (s == NULL) 1683 FATAL("out of space in split"); 1684 tempfree(y); 1685 arg3type = ptoi(a[3]); 1686 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */ 1687 fs = getsval(fsloc); 1688 } else if (arg3type == STRING) { /* split(str,arr,"string") */ 1689 x = execute(a[2]); 1690 fs = origfs = strdup(getsval(x)); 1691 if (fs == NULL) 1692 FATAL("out of space in split"); 1693 tempfree(x); 1694 } else if (arg3type == REGEXPR) { 1695 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1696 } else { 1697 FATAL("illegal type of split"); 1698 } 1699 sep = *fs; 1700 ap = execute(a[1]); /* array name */ 1701 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ 1702 freesymtab(ap); 1703 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1704 ap->tval &= ~STR; 1705 ap->tval |= ARR; 1706 ap->sval = (char *) makesymtab(NSYMTAB); 1707 1708 n = 0; 1709 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1710 /* split(s, a, //); have to arrange that it looks like empty sep */ 1711 arg3type = 0; 1712 fs = ""; 1713 sep = 0; 1714 } 1715 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1716 fa *pfa; 1717 if (arg3type == REGEXPR) { /* it's ready already */ 1718 pfa = (fa *) a[2]; 1719 } else { 1720 pfa = makedfa(fs, 1); 1721 } 1722 if (nematch(pfa,s)) { 1723 tempstat = pfa->initstat; 1724 pfa->initstat = 2; 1725 do { 1726 n++; 1727 snprintf(num, sizeof(num), "%d", n); 1728 temp = *patbeg; 1729 setptr(patbeg, '\0'); 1730 if (is_number(s, & result)) 1731 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1732 else 1733 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1734 setptr(patbeg, temp); 1735 s = patbeg + patlen; 1736 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1737 n++; 1738 snprintf(num, sizeof(num), "%d", n); 1739 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1740 pfa->initstat = tempstat; 1741 goto spdone; 1742 } 1743 } while (nematch(pfa,s)); 1744 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1745 /* cf gsub and refldbld */ 1746 } 1747 n++; 1748 snprintf(num, sizeof(num), "%d", n); 1749 if (is_number(s, & result)) 1750 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1751 else 1752 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1753 spdone: 1754 pfa = NULL; 1755 1756 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */ 1757 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ 1758 for (;;) { 1759 char *fr = newt; 1760 n++; 1761 if (*s == '"' ) { /* start of "..." */ 1762 for (s++ ; *s != '\0'; ) { 1763 if (*s == '"' && s[1] != '\0' && s[1] == '"') { 1764 s += 2; /* doubled quote */ 1765 *fr++ = '"'; 1766 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { 1767 s++; /* skip over closing quote */ 1768 break; 1769 } else { 1770 *fr++ = *s++; 1771 } 1772 } 1773 *fr++ = 0; 1774 } else { /* unquoted field */ 1775 while (*s != ',' && *s != '\0') 1776 *fr++ = *s++; 1777 *fr++ = 0; 1778 } 1779 snprintf(num, sizeof(num), "%d", n); 1780 if (is_number(newt, &result)) 1781 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval); 1782 else 1783 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval); 1784 if (*s++ == '\0') 1785 break; 1786 } 1787 free(newt); 1788 1789 } else if (!CSV && sep == ' ') { /* usual case: split on white space */ 1790 for (n = 0; ; ) { 1791 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1792 while (ISWS(*s)) 1793 s++; 1794 if (*s == '\0') 1795 break; 1796 n++; 1797 t = s; 1798 do 1799 s++; 1800 while (*s != '\0' && !ISWS(*s)); 1801 temp = *s; 1802 setptr(s, '\0'); 1803 snprintf(num, sizeof(num), "%d", n); 1804 if (is_number(t, & result)) 1805 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1806 else 1807 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1808 setptr(s, temp); 1809 if (*s != '\0') 1810 s++; 1811 } 1812 1813 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1814 for (n = 0; *s != '\0'; s += u8_nextlen(s)) { 1815 char buf[10]; 1816 n++; 1817 snprintf(num, sizeof(num), "%d", n); 1818 1819 for (j = 0; j < u8_nextlen(s); j++) { 1820 buf[j] = s[j]; 1821 } 1822 buf[j] = '\0'; 1823 1824 if (isdigit((uschar)buf[0])) 1825 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1826 else 1827 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1828 } 1829 1830 } else if (*s != '\0') { /* some random single character */ 1831 for (;;) { 1832 n++; 1833 t = s; 1834 while (*s != sep && *s != '\n' && *s != '\0') 1835 s++; 1836 temp = *s; 1837 setptr(s, '\0'); 1838 snprintf(num, sizeof(num), "%d", n); 1839 if (is_number(t, & result)) 1840 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1841 else 1842 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1843 setptr(s, temp); 1844 if (*s++ == '\0') 1845 break; 1846 } 1847 } 1848 tempfree(ap); 1849 xfree(origs); 1850 xfree(origfs); 1851 x = gettemp(); 1852 x->tval = NUM; 1853 x->fval = n; 1854 return(x); 1855 } 1856 1857 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1858 { 1859 Cell *x; 1860 1861 x = execute(a[0]); 1862 if (istrue(x)) { 1863 tempfree(x); 1864 x = execute(a[1]); 1865 } else { 1866 tempfree(x); 1867 x = execute(a[2]); 1868 } 1869 return(x); 1870 } 1871 1872 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1873 { 1874 Cell *x; 1875 1876 x = execute(a[0]); 1877 if (istrue(x)) { 1878 tempfree(x); 1879 x = execute(a[1]); 1880 } else if (a[2] != NULL) { 1881 tempfree(x); 1882 x = execute(a[2]); 1883 } 1884 return(x); 1885 } 1886 1887 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1888 { 1889 Cell *x; 1890 1891 for (;;) { 1892 x = execute(a[0]); 1893 if (!istrue(x)) 1894 return(x); 1895 tempfree(x); 1896 x = execute(a[1]); 1897 if (isbreak(x)) { 1898 x = True; 1899 return(x); 1900 } 1901 if (isnext(x) || isexit(x) || isret(x)) 1902 return(x); 1903 tempfree(x); 1904 } 1905 } 1906 1907 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1908 { 1909 Cell *x; 1910 1911 for (;;) { 1912 x = execute(a[0]); 1913 if (isbreak(x)) 1914 return True; 1915 if (isnext(x) || isexit(x) || isret(x)) 1916 return(x); 1917 tempfree(x); 1918 x = execute(a[1]); 1919 if (!istrue(x)) 1920 return(x); 1921 tempfree(x); 1922 } 1923 } 1924 1925 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1926 { 1927 Cell *x; 1928 1929 x = execute(a[0]); 1930 tempfree(x); 1931 for (;;) { 1932 if (a[1]!=NULL) { 1933 x = execute(a[1]); 1934 if (!istrue(x)) return(x); 1935 else tempfree(x); 1936 } 1937 x = execute(a[3]); 1938 if (isbreak(x)) /* turn off break */ 1939 return True; 1940 if (isnext(x) || isexit(x) || isret(x)) 1941 return(x); 1942 tempfree(x); 1943 x = execute(a[2]); 1944 tempfree(x); 1945 } 1946 } 1947 1948 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1949 { 1950 Cell *x, *vp, *arrayp, *cp, *ncp; 1951 Array *tp; 1952 int i; 1953 1954 vp = execute(a[0]); 1955 arrayp = execute(a[1]); 1956 if (!isarr(arrayp)) { 1957 return True; 1958 } 1959 tp = (Array *) arrayp->sval; 1960 tempfree(arrayp); 1961 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1962 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1963 setsval(vp, cp->nval); 1964 ncp = cp->cnext; 1965 x = execute(a[2]); 1966 if (isbreak(x)) { 1967 tempfree(vp); 1968 return True; 1969 } 1970 if (isnext(x) || isexit(x) || isret(x)) { 1971 tempfree(vp); 1972 return(x); 1973 } 1974 tempfree(x); 1975 } 1976 } 1977 return True; 1978 } 1979 1980 static char *nawk_convert(const char *s, int (*fun_c)(int), 1981 wint_t (*fun_wc)(wint_t)) 1982 { 1983 char *buf = NULL; 1984 char *pbuf = NULL; 1985 const char *ps = NULL; 1986 size_t n = 0; 1987 wchar_t wc; 1988 const size_t sz = awk_mb_cur_max; 1989 int unused; 1990 1991 if (sz == 1) { 1992 buf = tostring(s); 1993 1994 for (pbuf = buf; *pbuf; pbuf++) 1995 *pbuf = fun_c((uschar)*pbuf); 1996 1997 return buf; 1998 } else { 1999 /* upper/lower character may be shorter/longer */ 2000 buf = tostringN(s, strlen(s) * sz + 1); 2001 2002 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 2003 /* 2004 * Reset internal state here too. 2005 * Assign result to avoid a compiler warning. (Casting to void 2006 * doesn't work.) 2007 * Increment said variable to avoid a different warning. 2008 */ 2009 unused = wctomb(NULL, L'\0'); 2010 unused++; 2011 2012 ps = s; 2013 pbuf = buf; 2014 while (n = mbtowc(&wc, ps, sz), 2015 n > 0 && n != (size_t)-1 && n != (size_t)-2) 2016 { 2017 ps += n; 2018 2019 n = wctomb(pbuf, fun_wc(wc)); 2020 if (n == (size_t)-1) 2021 FATAL("illegal wide character %s", s); 2022 2023 pbuf += n; 2024 } 2025 2026 *pbuf = '\0'; 2027 2028 if (n) 2029 FATAL("illegal byte sequence %s", s); 2030 2031 return buf; 2032 } 2033 } 2034 2035 #ifdef __DJGPP__ 2036 static wint_t towupper(wint_t wc) 2037 { 2038 if (wc >= 0 && wc < 256) 2039 return toupper(wc & 0xFF); 2040 2041 return wc; 2042 } 2043 2044 static wint_t towlower(wint_t wc) 2045 { 2046 if (wc >= 0 && wc < 256) 2047 return tolower(wc & 0xFF); 2048 2049 return wc; 2050 } 2051 #endif 2052 2053 static char *nawk_toupper(const char *s) 2054 { 2055 return nawk_convert(s, toupper, towupper); 2056 } 2057 2058 static char *nawk_tolower(const char *s) 2059 { 2060 return nawk_convert(s, tolower, towlower); 2061 } 2062 2063 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 2064 { 2065 Cell *x, *y; 2066 Awkfloat u; 2067 int t, sz; 2068 Awkfloat tmp; 2069 char *buf, *fmt; 2070 Node *nextarg; 2071 FILE *fp; 2072 int status = 0; 2073 time_t tv; 2074 struct tm *tm, tmbuf; 2075 int estatus = 0; 2076 2077 t = ptoi(a[0]); 2078 x = execute(a[1]); 2079 nextarg = a[1]->nnext; 2080 switch (t) { 2081 case FLENGTH: 2082 if (isarr(x)) 2083 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 2084 else 2085 u = u8_strlen(getsval(x)); 2086 break; 2087 case FLOG: 2088 errno = 0; 2089 u = errcheck(log(getfval(x)), "log"); 2090 break; 2091 case FINT: 2092 modf(getfval(x), &u); break; 2093 case FEXP: 2094 errno = 0; 2095 u = errcheck(exp(getfval(x)), "exp"); 2096 break; 2097 case FSQRT: 2098 errno = 0; 2099 u = errcheck(sqrt(getfval(x)), "sqrt"); 2100 break; 2101 case FSIN: 2102 u = sin(getfval(x)); break; 2103 case FCOS: 2104 u = cos(getfval(x)); break; 2105 case FATAN: 2106 if (nextarg == NULL) { 2107 WARNING("atan2 requires two arguments; returning 1.0"); 2108 u = 1.0; 2109 } else { 2110 y = execute(a[1]->nnext); 2111 u = atan2(getfval(x), getfval(y)); 2112 tempfree(y); 2113 nextarg = nextarg->nnext; 2114 } 2115 break; 2116 case FCOMPL: 2117 u = ~((int)getfval(x)); 2118 break; 2119 case FAND: 2120 if (nextarg == 0) { 2121 WARNING("and requires two arguments; returning 0"); 2122 u = 0; 2123 break; 2124 } 2125 y = execute(a[1]->nnext); 2126 u = ((int)getfval(x)) & ((int)getfval(y)); 2127 tempfree(y); 2128 nextarg = nextarg->nnext; 2129 break; 2130 case FFOR: 2131 if (nextarg == 0) { 2132 WARNING("or requires two arguments; returning 0"); 2133 u = 0; 2134 break; 2135 } 2136 y = execute(a[1]->nnext); 2137 u = ((int)getfval(x)) | ((int)getfval(y)); 2138 tempfree(y); 2139 nextarg = nextarg->nnext; 2140 break; 2141 case FXOR: 2142 if (nextarg == 0) { 2143 WARNING("xor requires two arguments; returning 0"); 2144 u = 0; 2145 break; 2146 } 2147 y = execute(a[1]->nnext); 2148 u = ((int)getfval(x)) ^ ((int)getfval(y)); 2149 tempfree(y); 2150 nextarg = nextarg->nnext; 2151 break; 2152 case FLSHIFT: 2153 if (nextarg == 0) { 2154 WARNING("lshift requires two arguments; returning 0"); 2155 u = 0; 2156 break; 2157 } 2158 y = execute(a[1]->nnext); 2159 u = ((int)getfval(x)) << ((int)getfval(y)); 2160 tempfree(y); 2161 nextarg = nextarg->nnext; 2162 break; 2163 case FRSHIFT: 2164 if (nextarg == 0) { 2165 WARNING("rshift requires two arguments; returning 0"); 2166 u = 0; 2167 break; 2168 } 2169 y = execute(a[1]->nnext); 2170 u = ((int)getfval(x)) >> ((int)getfval(y)); 2171 tempfree(y); 2172 nextarg = nextarg->nnext; 2173 break; 2174 case FSYSTEM: 2175 fflush(stdout); /* in case something is buffered already */ 2176 estatus = status = system(getsval(x)); 2177 if (status != -1) { 2178 if (WIFEXITED(status)) { 2179 estatus = WEXITSTATUS(status); 2180 } else if (WIFSIGNALED(status)) { 2181 estatus = WTERMSIG(status) + 256; 2182 #ifdef WCOREDUMP 2183 if (WCOREDUMP(status)) 2184 estatus += 256; 2185 #endif 2186 } else /* something else?!? */ 2187 estatus = 0; 2188 } 2189 /* else estatus was set to -1 */ 2190 u = estatus; 2191 break; 2192 case FRAND: 2193 /* random() returns numbers in [0..2^31-1] 2194 * in order to get a number in [0, 1), divide it by 2^31 2195 */ 2196 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 2197 break; 2198 case FSRAND: 2199 if (isrec(x)) { /* no argument provided */ 2200 u = time(NULL); 2201 tmp = u; 2202 srandom((unsigned int) u); 2203 } else { 2204 u = getfval(x); 2205 tmp = u; 2206 srandom_deterministic((unsigned int) u); 2207 } 2208 u = srand_seed; 2209 srand_seed = tmp; 2210 break; 2211 case FTOUPPER: 2212 case FTOLOWER: 2213 if (t == FTOUPPER) 2214 buf = nawk_toupper(getsval(x)); 2215 else 2216 buf = nawk_tolower(getsval(x)); 2217 tempfree(x); 2218 x = gettemp(); 2219 setsval(x, buf); 2220 free(buf); 2221 return x; 2222 case FFLUSH: 2223 if (isrec(x) || strlen(getsval(x)) == 0) { 2224 flush_all(); /* fflush() or fflush("") -> all */ 2225 u = 0; 2226 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 2227 u = EOF; 2228 else 2229 u = fflush(fp); 2230 break; 2231 case FMKTIME: 2232 memset(&tmbuf, 0, sizeof(tmbuf)); 2233 tm = &tmbuf; 2234 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 2235 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 2236 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 2237 switch (t) { 2238 case 6: 2239 tm->tm_isdst = -1; /* let mktime figure it out */ 2240 /* FALLTHROUGH */ 2241 case 7: 2242 tm->tm_year -= 1900; 2243 tm->tm_mon--; 2244 u = mktime(tm); 2245 break; 2246 default: 2247 u = -1; 2248 break; 2249 } 2250 break; 2251 case FSYSTIME: 2252 u = time((time_t *) 0); 2253 break; 2254 case FSTRFTIME: 2255 /* strftime([format [,timestamp]]) */ 2256 if (nextarg) { 2257 y = execute(nextarg); 2258 nextarg = nextarg->nnext; 2259 tv = (time_t) getfval(y); 2260 tempfree(y); 2261 } else 2262 tv = time((time_t *) 0); 2263 tm = localtime(&tv); 2264 if (tm == NULL) 2265 FATAL("bad time %ld", (long)tv); 2266 2267 if (isrec(x)) { 2268 /* format argument not provided, use default */ 2269 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 2270 } else 2271 fmt = tostring(getsval(x)); 2272 2273 sz = 32; 2274 buf = NULL; 2275 do { 2276 if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL) 2277 FATAL("out of memory in strftime"); 2278 sz *= 2; 2279 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 2280 2281 y = gettemp(); 2282 setsval(y, buf); 2283 free(fmt); 2284 free(buf); 2285 2286 return y; 2287 default: /* can't happen */ 2288 FATAL("illegal function type %d", t); 2289 break; 2290 } 2291 tempfree(x); 2292 x = gettemp(); 2293 setfval(x, u); 2294 if (nextarg != NULL) { 2295 WARNING("warning: function has too many arguments"); 2296 for ( ; nextarg; nextarg = nextarg->nnext) { 2297 y = execute(nextarg); 2298 tempfree(y); 2299 } 2300 } 2301 return(x); 2302 } 2303 2304 Cell *printstat(Node **a, int n) /* print a[0] */ 2305 { 2306 Node *x; 2307 Cell *y; 2308 FILE *fp; 2309 2310 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 2311 fp = stdout; 2312 else 2313 fp = redirect(ptoi(a[1]), a[2]); 2314 for (x = a[0]; x != NULL; x = x->nnext) { 2315 y = execute(x); 2316 fputs(getpssval(y), fp); 2317 tempfree(y); 2318 if (x->nnext == NULL) 2319 fputs(getsval(orsloc), fp); 2320 else 2321 fputs(getsval(ofsloc), fp); 2322 } 2323 if (a[1] != NULL) 2324 fflush(fp); 2325 if (ferror(fp)) 2326 FATAL("write error on %s", filename(fp)); 2327 return(True); 2328 } 2329 2330 Cell *nullproc(Node **a, int n) 2331 { 2332 return 0; 2333 } 2334 2335 2336 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 2337 { 2338 FILE *fp; 2339 Cell *x; 2340 char *fname; 2341 2342 x = execute(b); 2343 fname = getsval(x); 2344 fp = openfile(a, fname, NULL); 2345 if (fp == NULL) 2346 FATAL("can't open file %s", fname); 2347 tempfree(x); 2348 return fp; 2349 } 2350 2351 struct files { 2352 FILE *fp; 2353 const char *fname; 2354 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 2355 } *files; 2356 2357 size_t nfiles; 2358 2359 static void stdinit(void) /* in case stdin, etc., are not constants */ 2360 { 2361 nfiles = FOPEN_MAX; 2362 files = (struct files *) calloc(nfiles, sizeof(*files)); 2363 if (files == NULL) 2364 FATAL("can't allocate file memory for %zu files", nfiles); 2365 files[0].fp = stdin; 2366 files[0].fname = tostring("/dev/stdin"); 2367 files[0].mode = LT; 2368 files[1].fp = stdout; 2369 files[1].fname = tostring("/dev/stdout"); 2370 files[1].mode = GT; 2371 files[2].fp = stderr; 2372 files[2].fname = tostring("/dev/stderr"); 2373 files[2].mode = GT; 2374 } 2375 2376 FILE *openfile(int a, const char *us, bool *pnewflag) 2377 { 2378 const char *s = us; 2379 size_t i; 2380 int m; 2381 FILE *fp = NULL; 2382 2383 if (*s == '\0') 2384 FATAL("null file name in print or getline"); 2385 for (i = 0; i < nfiles; i++) 2386 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 2387 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 2388 a == FFLUSH)) { 2389 if (pnewflag) 2390 *pnewflag = false; 2391 return files[i].fp; 2392 } 2393 if (a == FFLUSH) /* didn't find it, so don't create it! */ 2394 return NULL; 2395 2396 for (i = 0; i < nfiles; i++) 2397 if (files[i].fp == NULL) 2398 break; 2399 if (i >= nfiles) { 2400 struct files *nf; 2401 size_t nnf = nfiles + FOPEN_MAX; 2402 nf = (struct files *) reallocarray(files, nnf, sizeof(*nf)); 2403 if (nf == NULL) 2404 FATAL("cannot grow files for %s and %zu files", s, nnf); 2405 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 2406 nfiles = nnf; 2407 files = nf; 2408 } 2409 fflush(stdout); /* force a semblance of order */ 2410 m = a; 2411 if (a == GT) { 2412 fp = fopen(s, "w"); 2413 } else if (a == APPEND) { 2414 fp = fopen(s, "a"); 2415 m = GT; /* so can mix > and >> */ 2416 } else if (a == '|') { /* output pipe */ 2417 fp = popen(s, "w"); 2418 } else if (a == LE) { /* input pipe */ 2419 fp = popen(s, "r"); 2420 } else if (a == LT) { /* getline <file */ 2421 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 2422 } else /* can't happen */ 2423 FATAL("illegal redirection %d", a); 2424 if (fp != NULL) { 2425 files[i].fname = tostring(s); 2426 files[i].fp = fp; 2427 files[i].mode = m; 2428 if (pnewflag) 2429 *pnewflag = true; 2430 if (fp != stdin && fp != stdout && fp != stderr) 2431 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 2432 } 2433 return fp; 2434 } 2435 2436 const char *filename(FILE *fp) 2437 { 2438 size_t i; 2439 2440 for (i = 0; i < nfiles; i++) 2441 if (fp == files[i].fp) 2442 return files[i].fname; 2443 return "???"; 2444 } 2445 2446 Cell *closefile(Node **a, int n) 2447 { 2448 Cell *x; 2449 size_t i; 2450 bool stat; 2451 2452 x = execute(a[0]); 2453 getsval(x); 2454 stat = true; 2455 for (i = 0; i < nfiles; i++) { 2456 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2457 continue; 2458 if (files[i].mode == GT || files[i].mode == '|') 2459 fflush(files[i].fp); 2460 if (ferror(files[i].fp)) { 2461 if ((files[i].mode == GT && files[i].fp != stderr) 2462 || files[i].mode == '|') 2463 FATAL("write error on %s", files[i].fname); 2464 else 2465 WARNING("i/o error occurred on %s", files[i].fname); 2466 } 2467 if (files[i].fp == stdin || files[i].fp == stdout || 2468 files[i].fp == stderr) 2469 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2470 else if (files[i].mode == '|' || files[i].mode == LE) 2471 stat = pclose(files[i].fp) == -1; 2472 else 2473 stat = fclose(files[i].fp) == EOF; 2474 if (stat) 2475 WARNING("i/o error occurred closing %s", files[i].fname); 2476 xfree(files[i].fname); 2477 files[i].fname = NULL; /* watch out for ref thru this */ 2478 files[i].fp = NULL; 2479 break; 2480 } 2481 tempfree(x); 2482 x = gettemp(); 2483 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2484 return(x); 2485 } 2486 2487 void closeall(void) 2488 { 2489 size_t i; 2490 bool stat = false; 2491 2492 for (i = 0; i < nfiles; i++) { 2493 if (! files[i].fp) 2494 continue; 2495 if (files[i].mode == GT || files[i].mode == '|') 2496 fflush(files[i].fp); 2497 if (ferror(files[i].fp)) { 2498 if ((files[i].mode == GT && files[i].fp != stderr) 2499 || files[i].mode == '|') 2500 FATAL("write error on %s", files[i].fname); 2501 else 2502 WARNING("i/o error occurred on %s", files[i].fname); 2503 } 2504 if (files[i].fp == stdin || files[i].fp == stdout || 2505 files[i].fp == stderr) 2506 continue; 2507 if (files[i].mode == '|' || files[i].mode == LE) 2508 stat = pclose(files[i].fp) == -1; 2509 else 2510 stat = fclose(files[i].fp) == EOF; 2511 if (stat) 2512 WARNING("i/o error occurred while closing %s", files[i].fname); 2513 } 2514 } 2515 2516 static void flush_all(void) 2517 { 2518 size_t i; 2519 2520 for (i = 0; i < nfiles; i++) 2521 if (files[i].fp) 2522 fflush(files[i].fp); 2523 } 2524 2525 void backsub(char **pb_ptr, const char **sptr_ptr); 2526 2527 Cell *dosub(Node **a, int subop) /* sub and gsub */ 2528 { 2529 fa *pfa; 2530 int tempstat; 2531 char *repl; 2532 Cell *x; 2533 2534 char *buf = NULL; 2535 char *pb = NULL; 2536 int bufsz = recsize; 2537 2538 const char *r, *s; 2539 const char *start; 2540 const char *noempty = NULL; /* empty match disallowed here */ 2541 size_t m = 0; /* match count */ 2542 size_t whichm; /* which match to select, 0 = global */ 2543 int mtype; /* match type */ 2544 2545 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */ 2546 pfa = (fa *) a[1]; 2547 } else { 2548 x = execute(a[1]); 2549 pfa = makedfa(getsval(x), 1); 2550 tempfree(x); 2551 } 2552 2553 x = execute(a[2]); /* replacement string */ 2554 repl = tostring(getsval(x)); 2555 tempfree(x); 2556 2557 switch (subop) { 2558 case SUB: 2559 whichm = 1; 2560 x = execute(a[3]); /* source string */ 2561 break; 2562 case GSUB: 2563 whichm = 0; 2564 x = execute(a[3]); /* source string */ 2565 break; 2566 default: 2567 FATAL("dosub: unrecognized subop: %d", subop); 2568 } 2569 2570 start = getsval(x); 2571 while (pmatch(pfa, start)) { 2572 if (buf == NULL) { 2573 if ((pb = buf = (char *) malloc(bufsz)) == NULL) 2574 FATAL("out of memory in dosub"); 2575 tempstat = pfa->initstat; 2576 pfa->initstat = 2; 2577 } 2578 2579 /* match types */ 2580 #define MT_IGNORE 0 /* unselected or invalid */ 2581 #define MT_INSERT 1 /* selected, empty */ 2582 #define MT_REPLACE 2 /* selected, not empty */ 2583 2584 /* an empty match just after replacement is invalid */ 2585 2586 if (patbeg == noempty && patlen == 0) { 2587 mtype = MT_IGNORE; /* invalid, not counted */ 2588 } else if (whichm == ++m || whichm == 0) { 2589 mtype = patlen ? MT_REPLACE : MT_INSERT; 2590 } else { 2591 mtype = MT_IGNORE; /* unselected, but counted */ 2592 } 2593 2594 /* leading text: */ 2595 if (patbeg > start) { 2596 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), 2597 recsize, &pb, "dosub"); 2598 s = start; 2599 while (s < patbeg) 2600 *pb++ = *s++; 2601 } 2602 2603 if (mtype == MT_IGNORE) 2604 goto matching_text; /* skip replacement text */ 2605 2606 r = repl; 2607 while (*r != 0) { 2608 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); 2609 if (*r == '\\') { 2610 backsub(&pb, &r); 2611 } else if (*r == '&') { 2612 r++; 2613 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, 2614 &pb, "dosub"); 2615 for (s = patbeg; s < patbeg+patlen; ) 2616 *pb++ = *s++; 2617 } else { 2618 *pb++ = *r++; 2619 } 2620 } 2621 2622 matching_text: 2623 if (mtype == MT_REPLACE || *patbeg == '\0') 2624 goto next_search; /* skip matching text */ 2625 2626 if (patlen == 0) 2627 patlen = u8_nextlen(patbeg); 2628 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); 2629 s = patbeg; 2630 while (s < patbeg + patlen) 2631 *pb++ = *s++; 2632 2633 next_search: 2634 start = patbeg + patlen; 2635 if (m == whichm || *patbeg == '\0') 2636 break; 2637 if (mtype == MT_REPLACE) 2638 noempty = start; 2639 2640 #undef MT_IGNORE 2641 #undef MT_INSERT 2642 #undef MT_REPLACE 2643 } 2644 2645 xfree(repl); 2646 2647 if (buf != NULL) { 2648 pfa->initstat = tempstat; 2649 2650 /* trailing text */ 2651 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); 2652 while ((*pb++ = *start++) != '\0') 2653 ; 2654 2655 setsval(x, buf); 2656 free(buf); 2657 } 2658 2659 tempfree(x); 2660 x = gettemp(); 2661 x->tval = NUM; 2662 x->fval = m; 2663 return x; 2664 } 2665 2666 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2667 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2668 { 2669 Cell *x, *y, *res, *h; 2670 char *rptr; 2671 const char *sptr; 2672 char *buf, *pb; 2673 const char *t, *q; 2674 fa *pfa; 2675 int mflag, tempstat, num, whichm; 2676 int bufsz = recsize; 2677 2678 if ((buf = (char *) malloc(bufsz)) == NULL) 2679 FATAL("out of memory in gensub"); 2680 mflag = 0; /* if mflag == 0, can replace empty string */ 2681 num = 0; 2682 x = execute(a[4]); /* source string */ 2683 t = getsval(x); 2684 res = copycell(x); /* target string - initially copy of source */ 2685 res->csub = CTEMP; /* result values are temporary */ 2686 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2687 pfa = (fa *) a[1]; /* regular expression */ 2688 else { 2689 y = execute(a[1]); 2690 pfa = makedfa(getsval(y), 1); 2691 tempfree(y); 2692 } 2693 y = execute(a[2]); /* replacement string */ 2694 h = execute(a[3]); /* which matches should be replaced */ 2695 sptr = getsval(h); 2696 if (sptr[0] == 'g' || sptr[0] == 'G') 2697 whichm = -1; 2698 else { 2699 /* 2700 * The specified number is index of replacement, starting 2701 * from 1. GNU awk treats index lower than 0 same as 2702 * 1, we do same for compatibility. 2703 */ 2704 whichm = (int) getfval(h) - 1; 2705 if (whichm < 0) 2706 whichm = 0; 2707 } 2708 tempfree(h); 2709 2710 if (pmatch(pfa, t)) { 2711 char *sl; 2712 2713 tempstat = pfa->initstat; 2714 pfa->initstat = 2; 2715 pb = buf; 2716 rptr = getsval(y); 2717 /* 2718 * XXX if there are any backreferences in subst string, 2719 * complain now. 2720 */ 2721 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2722 if (strchr("0123456789", sl[1])) { 2723 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2724 } 2725 } 2726 2727 do { 2728 if (whichm >= 0 && whichm != num) { 2729 num++; 2730 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2731 2732 /* copy the part of string up to and including 2733 * match to output buffer */ 2734 while (t < patbeg + patlen) 2735 *pb++ = *t++; 2736 continue; 2737 } 2738 2739 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2740 if (mflag == 0) { /* can replace empty */ 2741 num++; 2742 sptr = rptr; 2743 while (*sptr != 0) { 2744 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2745 if (*sptr == '\\') { 2746 backsub(&pb, &sptr); 2747 } else if (*sptr == '&') { 2748 sptr++; 2749 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2750 for (q = patbeg; q < patbeg+patlen; ) 2751 *pb++ = *q++; 2752 } else 2753 *pb++ = *sptr++; 2754 } 2755 } 2756 if (*t == 0) /* at end */ 2757 goto done; 2758 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2759 *pb++ = *t++; 2760 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2761 FATAL("gensub result0 %.30s too big; can't happen", buf); 2762 mflag = 0; 2763 } 2764 else { /* matched nonempty string */ 2765 num++; 2766 sptr = t; 2767 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2768 while (sptr < patbeg) 2769 *pb++ = *sptr++; 2770 sptr = rptr; 2771 while (*sptr != 0) { 2772 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2773 if (*sptr == '\\') { 2774 backsub(&pb, &sptr); 2775 } else if (*sptr == '&') { 2776 sptr++; 2777 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2778 for (q = patbeg; q < patbeg+patlen; ) 2779 *pb++ = *q++; 2780 } else 2781 *pb++ = *sptr++; 2782 } 2783 t = patbeg + patlen; 2784 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2785 goto done; 2786 if (pb > buf + bufsz) 2787 FATAL("gensub result1 %.30s too big; can't happen", buf); 2788 mflag = 1; 2789 } 2790 } while (pmatch(pfa,t)); 2791 sptr = t; 2792 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2793 while ((*pb++ = *sptr++) != 0) 2794 ; 2795 done: if (pb > buf + bufsz) 2796 FATAL("gensub result2 %.30s too big; can't happen", buf); 2797 *pb = '\0'; 2798 setsval(res, buf); 2799 pfa->initstat = tempstat; 2800 } 2801 tempfree(x); 2802 tempfree(y); 2803 free(buf); 2804 return(res); 2805 } 2806 2807 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2808 { /* sptr[0] == '\\' */ 2809 char *pb = *pb_ptr; 2810 const char *sptr = *sptr_ptr; 2811 2812 if (sptr[1] == '\\') { 2813 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2814 *pb++ = '\\'; 2815 *pb++ = '&'; 2816 sptr += 4; 2817 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2818 *pb++ = '\\'; 2819 sptr += 2; 2820 } else if (do_posix) { /* \\x -> \x */ 2821 sptr++; 2822 *pb++ = *sptr++; 2823 } else { /* \\x -> \\x */ 2824 *pb++ = *sptr++; 2825 *pb++ = *sptr++; 2826 } 2827 } else if (sptr[1] == '&') { /* literal & */ 2828 sptr++; 2829 *pb++ = *sptr++; 2830 } else /* literal \ */ 2831 *pb++ = *sptr++; 2832 2833 *pb_ptr = pb; 2834 *sptr_ptr = sptr; 2835 } 2836 2837 static char *wide_char_to_byte_str(int rune, size_t *outlen) 2838 { 2839 static char buf[5]; 2840 int len; 2841 2842 if (rune < 0 || rune > 0x10FFFF) 2843 return NULL; 2844 2845 memset(buf, 0, sizeof(buf)); 2846 2847 len = 0; 2848 if (rune <= 0x0000007F) { 2849 buf[len++] = rune; 2850 } else if (rune <= 0x000007FF) { 2851 // 110xxxxx 10xxxxxx 2852 buf[len++] = 0xC0 | (rune >> 6); 2853 buf[len++] = 0x80 | (rune & 0x3F); 2854 } else if (rune <= 0x0000FFFF) { 2855 // 1110xxxx 10xxxxxx 10xxxxxx 2856 buf[len++] = 0xE0 | (rune >> 12); 2857 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2858 buf[len++] = 0x80 | (rune & 0x3F); 2859 2860 } else { 2861 // 0x00010000 - 0x10FFFF 2862 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 2863 buf[len++] = 0xF0 | (rune >> 18); 2864 buf[len++] = 0x80 | ((rune >> 12) & 0x3F); 2865 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2866 buf[len++] = 0x80 | (rune & 0x3F); 2867 } 2868 2869 *outlen = len; 2870 buf[len++] = '\0'; 2871 2872 return buf; 2873 } 2874