1 /*- 2 * Copyright (c) 1992 Diomidis Spinellis. 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Diomidis Spinellis of Imperial College, University of London. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)process.c 8.6 (Berkeley) 4/20/94 34 * $FreeBSD: src/usr.bin/sed/process.c,v 1.49 2008/02/09 09:12:02 dwmalone Exp $ 35 * $DragonFly: src/usr.bin/sed/process.c,v 1.6 2008/04/08 13:23:38 swildner Exp $ 36 */ 37 38 #include <sys/types.h> 39 #include <sys/stat.h> 40 #include <sys/ioctl.h> 41 #include <sys/uio.h> 42 43 #include <ctype.h> 44 #include <err.h> 45 #include <errno.h> 46 #include <fcntl.h> 47 #include <limits.h> 48 #include <regex.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <unistd.h> 53 #include <wchar.h> 54 #include <wctype.h> 55 56 #include "defs.h" 57 #include "extern.h" 58 59 static SPACE HS, PS, SS, YS; 60 #define pd PS.deleted 61 #define ps PS.space 62 #define psl PS.len 63 #define hs HS.space 64 #define hsl HS.len 65 66 static __inline int applies(struct s_command *); 67 static void do_tr(struct s_tr *); 68 static void flush_appends(void); 69 static void lputs(char *, size_t); 70 static __inline int regexec_e(regex_t *, const char *, int, int, size_t); 71 static void regsub(SPACE *, char *, char *); 72 static int substitute(struct s_command *); 73 74 struct s_appends *appends; /* Array of pointers to strings to append. */ 75 static int appendx; /* Index into appends array. */ 76 int appendnum; /* Size of appends array. */ 77 78 static int lastaddr; /* Set by applies if last address of a range. */ 79 static int sdone; /* If any substitutes since last line input. */ 80 /* Iov structure for 'w' commands. */ 81 static regex_t *defpreg; 82 size_t maxnsub; 83 regmatch_t *match; 84 85 #define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0) 86 87 void 88 process(void) 89 { 90 struct s_command *cp; 91 SPACE tspace; 92 size_t oldpsl = 0; 93 char *p; 94 95 p = NULL; 96 97 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 98 pd = 0; 99 top: 100 cp = prog; 101 redirect: 102 while (cp != NULL) { 103 if (!applies(cp)) { 104 cp = cp->next; 105 continue; 106 } 107 switch (cp->code) { 108 case '{': 109 cp = cp->u.c; 110 goto redirect; 111 case 'a': 112 if (appendx >= appendnum) 113 if ((appends = realloc(appends, 114 sizeof(struct s_appends) * 115 (appendnum *= 2))) == NULL) 116 err(1, "realloc"); 117 appends[appendx].type = AP_STRING; 118 appends[appendx].s = cp->t; 119 appends[appendx].len = strlen(cp->t); 120 appendx++; 121 break; 122 case 'b': 123 cp = cp->u.c; 124 goto redirect; 125 case 'c': 126 pd = 1; 127 psl = 0; 128 if (cp->a2 == NULL || lastaddr || lastline()) 129 (void)fprintf(outfile, "%s", cp->t); 130 break; 131 case 'd': 132 pd = 1; 133 goto new; 134 case 'D': 135 if (pd) 136 goto new; 137 if (psl == 0 || 138 (p = memchr(ps, '\n', psl)) == NULL) { 139 pd = 1; 140 goto new; 141 } else { 142 psl -= (p + 1) - ps; 143 memmove(ps, p + 1, psl); 144 goto top; 145 } 146 case 'g': 147 cspace(&PS, hs, hsl, REPLACE); 148 break; 149 case 'G': 150 cspace(&PS, "\n", 1, APPEND); 151 cspace(&PS, hs, hsl, APPEND); 152 break; 153 case 'h': 154 cspace(&HS, ps, psl, REPLACE); 155 break; 156 case 'H': 157 cspace(&HS, "\n", 1, APPEND); 158 cspace(&HS, ps, psl, APPEND); 159 break; 160 case 'i': 161 (void)fprintf(outfile, "%s", cp->t); 162 break; 163 case 'l': 164 lputs(ps, psl); 165 break; 166 case 'n': 167 if (!nflag && !pd) 168 OUT(); 169 flush_appends(); 170 if (!mf_fgets(&PS, REPLACE)) 171 exit(0); 172 pd = 0; 173 break; 174 case 'N': 175 flush_appends(); 176 cspace(&PS, "\n", 1, APPEND); 177 if (!mf_fgets(&PS, APPEND)) 178 exit(0); 179 break; 180 case 'p': 181 if (pd) 182 break; 183 OUT(); 184 break; 185 case 'P': 186 if (pd) 187 break; 188 if ((p = memchr(ps, '\n', psl)) != NULL) { 189 oldpsl = psl; 190 psl = p - ps; 191 } 192 OUT(); 193 if (p != NULL) 194 psl = oldpsl; 195 break; 196 case 'q': 197 if (!nflag && !pd) 198 OUT(); 199 flush_appends(); 200 exit(0); 201 case 'r': 202 if (appendx >= appendnum) 203 if ((appends = realloc(appends, 204 sizeof(struct s_appends) * 205 (appendnum *= 2))) == NULL) 206 err(1, "realloc"); 207 appends[appendx].type = AP_FILE; 208 appends[appendx].s = cp->t; 209 appends[appendx].len = strlen(cp->t); 210 appendx++; 211 break; 212 case 's': 213 sdone |= substitute(cp); 214 break; 215 case 't': 216 if (sdone) { 217 sdone = 0; 218 cp = cp->u.c; 219 goto redirect; 220 } 221 break; 222 case 'w': 223 if (pd) 224 break; 225 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 226 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 227 DEFFILEMODE)) == -1) 228 err(1, "%s", cp->t); 229 if (write(cp->u.fd, ps, psl) != (ssize_t)psl || 230 write(cp->u.fd, "\n", 1) != 1) 231 err(1, "%s", cp->t); 232 break; 233 case 'x': 234 /* 235 * If the hold space is null, make it empty 236 * but not null. Otherwise the pattern space 237 * will become null after the swap, which is 238 * an abnormal condition. 239 */ 240 if (hs == NULL) 241 cspace(&HS, "", 0, REPLACE); 242 tspace = PS; 243 PS = HS; 244 HS = tspace; 245 break; 246 case 'y': 247 if (pd || psl == 0) 248 break; 249 do_tr(cp->u.y); 250 break; 251 case ':': 252 case '}': 253 break; 254 case '=': 255 (void)fprintf(outfile, "%lu\n", linenum); 256 } 257 cp = cp->next; 258 } /* for all cp */ 259 260 new: if (!nflag && !pd) 261 OUT(); 262 flush_appends(); 263 } /* for all lines */ 264 } 265 266 /* 267 * TRUE if the address passed matches the current program state 268 * (lastline, linenumber, ps). 269 */ 270 #define MATCH(a) \ 271 ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ 272 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()) 273 274 /* 275 * Return TRUE if the command applies to the current line. Sets the inrange 276 * flag to process ranges. Interprets the non-select (``!'') flag. 277 */ 278 static __inline int 279 applies(struct s_command *cp) 280 { 281 int r; 282 283 lastaddr = 0; 284 if (cp->a1 == NULL && cp->a2 == NULL) 285 r = 1; 286 else if (cp->a2) 287 if (cp->inrange) { 288 if (MATCH(cp->a2)) { 289 cp->inrange = 0; 290 lastaddr = 1; 291 r = 1; 292 } else if (cp->a2->type == AT_LINE && 293 linenum > cp->a2->u.l) { 294 /* 295 * We missed the 2nd address due to a branch, 296 * so just close the range and return false. 297 */ 298 cp->inrange = 0; 299 r = 0; 300 } else 301 r = 1; 302 } else if (MATCH(cp->a1)) { 303 /* 304 * If the second address is a number less than or 305 * equal to the line number first selected, only 306 * one line shall be selected. 307 * -- POSIX 1003.2 308 */ 309 if (cp->a2->type == AT_LINE && 310 linenum >= cp->a2->u.l) 311 lastaddr = 1; 312 else 313 cp->inrange = 1; 314 r = 1; 315 } else 316 r = 0; 317 else 318 r = MATCH(cp->a1); 319 return (cp->nonsel ? ! r : r); 320 } 321 322 /* 323 * Reset the sed processor to its initial state. 324 */ 325 void 326 resetstate(void) 327 { 328 struct s_command *cp; 329 330 /* 331 * Reset all inrange markers. 332 */ 333 for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next) 334 if (cp->a2) 335 cp->inrange = 0; 336 337 /* 338 * Clear out the hold space. 339 */ 340 cspace(&HS, "", 0, REPLACE); 341 } 342 343 /* 344 * substitute -- 345 * Do substitutions in the pattern space. Currently, we build a 346 * copy of the new pattern space in the substitute space structure 347 * and then swap them. 348 */ 349 static int 350 substitute(struct s_command *cp) 351 { 352 SPACE tspace; 353 regex_t *re; 354 regoff_t re_off, slen; 355 int lastempty, n; 356 char *s; 357 358 s = ps; 359 re = cp->u.s->re; 360 if (re == NULL) { 361 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { 362 linenum = cp->u.s->linenum; 363 errx(1, "%lu: %s: \\%u not defined in the RE", 364 linenum, fname, cp->u.s->maxbref); 365 } 366 } 367 if (!regexec_e(re, s, 0, 0, psl)) 368 return (0); 369 370 SS.len = 0; /* Clean substitute space. */ 371 slen = psl; 372 n = cp->u.s->n; 373 lastempty = 1; 374 375 switch (n) { 376 case 0: /* Global */ 377 do { 378 if (lastempty || match[0].rm_so != match[0].rm_eo) { 379 /* Locate start of replaced string. */ 380 re_off = match[0].rm_so; 381 /* Copy leading retained string. */ 382 cspace(&SS, s, re_off, APPEND); 383 /* Add in regular expression. */ 384 regsub(&SS, s, cp->u.s->new); 385 } 386 387 /* Move past this match. */ 388 if (match[0].rm_so != match[0].rm_eo) { 389 s += match[0].rm_eo; 390 slen -= match[0].rm_eo; 391 lastempty = 0; 392 } else { 393 if (match[0].rm_so < slen) 394 cspace(&SS, s + match[0].rm_so, 1, 395 APPEND); 396 s += match[0].rm_so + 1; 397 slen -= match[0].rm_so + 1; 398 lastempty = 1; 399 } 400 } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); 401 /* Copy trailing retained string. */ 402 if (slen > 0) 403 cspace(&SS, s, slen, APPEND); 404 break; 405 default: /* Nth occurrence */ 406 while (--n) { 407 if (match[0].rm_eo == match[0].rm_so) 408 match[0].rm_eo = match[0].rm_so + 1; 409 s += match[0].rm_eo; 410 slen -= match[0].rm_eo; 411 if (slen < 0) 412 return (0); 413 if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) 414 return (0); 415 } 416 /* FALLTHROUGH */ 417 case 1: /* 1st occurrence */ 418 /* Locate start of replaced string. */ 419 re_off = match[0].rm_so + (s - ps); 420 /* Copy leading retained string. */ 421 cspace(&SS, ps, re_off, APPEND); 422 /* Add in regular expression. */ 423 regsub(&SS, s, cp->u.s->new); 424 /* Copy trailing retained string. */ 425 s += match[0].rm_eo; 426 slen -= match[0].rm_eo; 427 cspace(&SS, s, slen, APPEND); 428 break; 429 } 430 431 /* 432 * Swap the substitute space and the pattern space, and make sure 433 * that any leftover pointers into stdio memory get lost. 434 */ 435 tspace = PS; 436 PS = SS; 437 SS = tspace; 438 SS.space = SS.back; 439 440 /* Handle the 'p' flag. */ 441 if (cp->u.s->p) 442 OUT(); 443 444 /* Handle the 'w' flag. */ 445 if (cp->u.s->wfile && !pd) { 446 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 447 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 448 err(1, "%s", cp->u.s->wfile); 449 if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl || 450 write(cp->u.s->wfd, "\n", 1) != 1) 451 err(1, "%s", cp->u.s->wfile); 452 } 453 return (1); 454 } 455 456 /* 457 * do_tr -- 458 * Perform translation ('y' command) in the pattern space. 459 */ 460 static void 461 do_tr(struct s_tr *y) 462 { 463 SPACE tmp; 464 char c, *p; 465 size_t clen, left; 466 int i; 467 468 if (MB_CUR_MAX == 1) { 469 /* 470 * Single-byte encoding: perform in-place translation 471 * of the pattern space. 472 */ 473 for (p = ps; p < &ps[psl]; p++) 474 *p = y->bytetab[(u_char)*p]; 475 } else { 476 /* 477 * Multi-byte encoding: perform translation into the 478 * translation space, then swap the translation and 479 * pattern spaces. 480 */ 481 /* Clean translation space. */ 482 YS.len = 0; 483 for (p = ps, left = psl; left > 0; p += clen, left -= clen) { 484 if ((c = y->bytetab[(u_char)*p]) != '\0') { 485 cspace(&YS, &c, 1, APPEND); 486 clen = 1; 487 continue; 488 } 489 for (i = 0; i < y->nmultis; i++) 490 if (left >= y->multis[i].fromlen && 491 memcmp(p, y->multis[i].from, 492 y->multis[i].fromlen) == 0) 493 break; 494 if (i < y->nmultis) { 495 cspace(&YS, y->multis[i].to, 496 y->multis[i].tolen, APPEND); 497 clen = y->multis[i].fromlen; 498 } else { 499 cspace(&YS, p, 1, APPEND); 500 clen = 1; 501 } 502 } 503 /* Swap the translation space and the pattern space. */ 504 tmp = PS; 505 PS = YS; 506 YS = tmp; 507 YS.space = YS.back; 508 } 509 } 510 511 /* 512 * Flush append requests. Always called before reading a line, 513 * therefore it also resets the substitution done (sdone) flag. 514 */ 515 static void 516 flush_appends(void) 517 { 518 FILE *f; 519 int count, i; 520 char buf[8 * 1024]; 521 522 for (i = 0; i < appendx; i++) 523 switch (appends[i].type) { 524 case AP_STRING: 525 fwrite(appends[i].s, sizeof(char), appends[i].len, 526 outfile); 527 break; 528 case AP_FILE: 529 /* 530 * Read files probably shouldn't be cached. Since 531 * it's not an error to read a non-existent file, 532 * it's possible that another program is interacting 533 * with the sed script through the filesystem. It 534 * would be truly bizarre, but possible. It's probably 535 * not that big a performance win, anyhow. 536 */ 537 if ((f = fopen(appends[i].s, "r")) == NULL) 538 break; 539 while ((count = fread(buf, sizeof(char), sizeof(buf), f))) 540 (void)fwrite(buf, sizeof(char), count, outfile); 541 (void)fclose(f); 542 break; 543 } 544 if (ferror(outfile)) 545 errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO)); 546 appendx = sdone = 0; 547 } 548 549 static void 550 lputs(char *s, size_t len) 551 { 552 static const char escapes[] = "\\\a\b\f\r\t\v"; 553 int c, col, width; 554 const char *p; 555 struct winsize win; 556 static int termwidth = -1; 557 size_t clen, i; 558 wchar_t wc; 559 mbstate_t mbs; 560 561 if (outfile != stdout) 562 termwidth = 60; 563 if (termwidth == -1) { 564 if ((p = getenv("COLUMNS")) && *p != '\0') 565 termwidth = atoi(p); 566 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 567 win.ws_col > 0) 568 termwidth = win.ws_col; 569 else 570 termwidth = 60; 571 } 572 if (termwidth <= 0) 573 termwidth = 1; 574 575 memset(&mbs, 0, sizeof(mbs)); 576 col = 0; 577 while (len != 0) { 578 clen = mbrtowc(&wc, s, len, &mbs); 579 if (clen == 0) 580 clen = 1; 581 if (clen == (size_t)-1 || clen == (size_t)-2) { 582 wc = (unsigned char)*s; 583 clen = 1; 584 memset(&mbs, 0, sizeof(mbs)); 585 } 586 if (wc == '\n') { 587 if (col + 1 >= termwidth) 588 fprintf(outfile, "\\\n"); 589 fputc('$', outfile); 590 fputc('\n', outfile); 591 col = 0; 592 } else if (iswprint(wc)) { 593 width = wcwidth(wc); 594 if (col + width >= termwidth) { 595 fprintf(outfile, "\\\n"); 596 col = 0; 597 } 598 fwrite(s, 1, clen, outfile); 599 col += width; 600 } else if (wc != L'\0' && (c = wctob(wc)) != EOF && 601 (p = strchr(escapes, c)) != NULL) { 602 if (col + 2 >= termwidth) { 603 fprintf(outfile, "\\\n"); 604 col = 0; 605 } 606 fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]); 607 col += 2; 608 } else { 609 if (col + 4 * clen >= (unsigned)termwidth) { 610 fprintf(outfile, "\\\n"); 611 col = 0; 612 } 613 for (i = 0; i < clen; i++) 614 fprintf(outfile, "\\%03o", 615 (int)(unsigned char)s[i]); 616 col += 4 * clen; 617 } 618 s += clen; 619 len -= clen; 620 } 621 if (col + 1 >= termwidth) 622 fprintf(outfile, "\\\n"); 623 (void)fputc('$', outfile); 624 (void)fputc('\n', outfile); 625 if (ferror(outfile)) 626 errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO)); 627 } 628 629 static __inline int 630 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, 631 size_t slen) 632 { 633 int eval; 634 635 if (preg == NULL) { 636 if (defpreg == NULL) 637 errx(1, "first RE may not be empty"); 638 } else 639 defpreg = preg; 640 641 /* Set anchors */ 642 match[0].rm_so = 0; 643 match[0].rm_eo = slen; 644 645 eval = regexec(defpreg, string, 646 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); 647 switch(eval) { 648 case 0: 649 return (1); 650 case REG_NOMATCH: 651 return (0); 652 } 653 errx(1, "RE error: %s", strregerror(eval, defpreg)); 654 /* NOTREACHED */ 655 } 656 657 /* 658 * regsub - perform substitutions after a regexp match 659 * Based on a routine by Henry Spencer 660 */ 661 static void 662 regsub(SPACE *sp, char *string, char *src) 663 { 664 int len, no; 665 char c, *dst; 666 667 #define NEEDSP(reqlen) \ 668 /* XXX What is the +1 for? */ \ 669 if (sp->len + (reqlen) + 1 >= sp->blen) { \ 670 sp->blen += (reqlen) + 1024; \ 671 if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \ 672 == NULL) \ 673 err(1, "realloc"); \ 674 dst = sp->space + sp->len; \ 675 } 676 677 dst = sp->space + sp->len; 678 while ((c = *src++) != '\0') { 679 if (c == '&') 680 no = 0; 681 else if (c == '\\' && isdigit((unsigned char)*src)) 682 no = *src++ - '0'; 683 else 684 no = -1; 685 if (no < 0) { /* Ordinary character. */ 686 if (c == '\\' && (*src == '\\' || *src == '&')) 687 c = *src++; 688 NEEDSP(1); 689 *dst++ = c; 690 ++sp->len; 691 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 692 len = match[no].rm_eo - match[no].rm_so; 693 NEEDSP(len); 694 memmove(dst, string + match[no].rm_so, len); 695 dst += len; 696 sp->len += len; 697 } 698 } 699 NEEDSP(1); 700 *dst = '\0'; 701 } 702 703 /* 704 * cspace -- 705 * Concatenate space: append the source space to the destination space, 706 * allocating new space as necessary. 707 */ 708 void 709 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag) 710 { 711 size_t tlen; 712 713 /* Make sure SPACE has enough memory and ramp up quickly. */ 714 tlen = sp->len + len + 1; 715 if (tlen > sp->blen) { 716 sp->blen = tlen + 1024; 717 if ((sp->space = sp->back = realloc(sp->back, sp->blen)) == 718 NULL) 719 err(1, "realloc"); 720 } 721 722 if (spflag == REPLACE) 723 sp->len = 0; 724 725 memmove(sp->space + sp->len, p, len); 726 727 sp->space[sp->len += len] = '\0'; 728 } 729 730 /* 731 * Close all cached opened files and report any errors 732 */ 733 void 734 cfclose(struct s_command *cp, struct s_command *end) 735 { 736 737 for (; cp != end; cp = cp->next) 738 switch(cp->code) { 739 case 's': 740 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 741 err(1, "%s", cp->u.s->wfile); 742 cp->u.s->wfd = -1; 743 break; 744 case 'w': 745 if (cp->u.fd != -1 && close(cp->u.fd)) 746 err(1, "%s", cp->t); 747 cp->u.fd = -1; 748 break; 749 case '{': 750 cfclose(cp->u.c, cp->next); 751 break; 752 } 753 } 754