1 /*- 2 * Copyright (c) 1992 Diomidis Spinellis. 3 * Copyright (c) 1992 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Diomidis Spinellis of Imperial College, University of London. 8 * 9 * %sccs.include.redist.c% 10 */ 11 12 #ifndef lint 13 static char sccsid[] = "@(#)process.c 5.13 (Berkeley) 01/27/93"; 14 #endif /* not lint */ 15 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <sys/ioctl.h> 19 #include <sys/uio.h> 20 21 #include <ctype.h> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <limits.h> 25 #include <regex.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "defs.h" 32 #include "extern.h" 33 34 static SPACE HS, PS, SS; 35 #define pd PS.deleted 36 #define ps PS.space 37 #define psl PS.len 38 #define hs HS.space 39 #define hsl HS.len 40 41 static inline int applies __P((struct s_command *)); 42 static void flush_appends __P((void)); 43 static void lputs __P((char *)); 44 static inline int regexec_e __P((regex_t *, const char *, int, int, size_t)); 45 static void regsub __P((SPACE *, char *, char *)); 46 static int substitute __P((struct s_command *)); 47 48 struct s_appends *appends; /* Array of pointers to strings to append. */ 49 static int appendx; /* Index into appends array. */ 50 int appendnum; /* Size of appends array. */ 51 52 static int lastaddr; /* Set by applies if last address of a range. */ 53 static int sdone; /* If any substitutes since last line input. */ 54 /* Iov structure for 'w' commands. */ 55 static struct iovec iov[2] = { NULL, 0, "\n", 1 }; 56 57 static regex_t *defpreg; 58 size_t maxnsub; 59 regmatch_t *match, startend; 60 61 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); putchar('\n'); } 62 63 void 64 process() 65 { 66 struct s_command *cp; 67 SPACE tspace; 68 size_t len; 69 int r; 70 char oldc, *p; 71 72 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 73 pd = 0; 74 cp = prog; 75 redirect: 76 while (cp != NULL) { 77 if (!applies(cp)) { 78 cp = cp->next; 79 continue; 80 } 81 switch (cp->code) { 82 case '{': 83 cp = cp->u.c; 84 goto redirect; 85 case 'a': 86 if (appendx >= appendnum) 87 appends = xrealloc(appends, 88 sizeof(struct s_appends) * 89 (appendnum *= 2)); 90 appends[appendx].type = AP_STRING; 91 appends[appendx].s = cp->t; 92 appends[appendx].len = strlen(cp->t); 93 appendx++; 94 break; 95 case 'b': 96 cp = cp->u.c; 97 goto redirect; 98 case 'c': 99 pd = 1; 100 psl = 0; 101 if (cp->a2 == NULL || lastaddr) 102 (void)printf("%s", cp->t); 103 break; 104 case 'd': 105 pd = 1; 106 goto new; 107 case 'D': 108 if (pd) 109 goto new; 110 if ((p = strnchr(ps, '\n', psl)) == NULL) 111 pd = 1; 112 else { 113 psl -= (p - ps) - 1; 114 memmove(ps, p + 1, psl); 115 } 116 goto new; 117 case 'g': 118 cspace(&PS, hs, hsl, REPLACE); 119 break; 120 case 'G': 121 cspace(&PS, hs, hsl, APPENDNL); 122 break; 123 case 'h': 124 cspace(&HS, ps, psl, REPLACE); 125 break; 126 case 'H': 127 cspace(&HS, ps, psl, APPENDNL); 128 break; 129 case 'i': 130 (void)printf("%s", cp->t); 131 break; 132 case 'l': 133 lputs(ps); 134 break; 135 case 'n': 136 if (!nflag && !pd) 137 OUT(ps) 138 flush_appends(); 139 r = mf_fgets(&PS, REPLACE); 140 #ifdef HISTORIC_PRACTICE 141 if (!r) 142 exit(0); 143 #endif 144 pd = 0; 145 break; 146 case 'N': 147 flush_appends(); 148 if (!mf_fgets(&PS, APPENDNL)) { 149 if (!nflag && !pd) 150 OUT(ps) 151 exit(0); 152 } 153 break; 154 case 'p': 155 if (pd) 156 break; 157 OUT(ps) 158 break; 159 case 'P': 160 if (pd) 161 break; 162 if ((p = strnchr(ps, '\n', psl)) != NULL) { 163 oldc = *p; 164 *p = '\0'; 165 } 166 OUT(ps) 167 if (p != NULL) 168 *p = oldc; 169 break; 170 case 'q': 171 if (!nflag && !pd) 172 OUT(ps) 173 flush_appends(); 174 exit(0); 175 case 'r': 176 if (appendx >= appendnum) 177 appends = xrealloc(appends, 178 sizeof(struct s_appends) * 179 (appendnum *= 2)); 180 appends[appendx].type = AP_FILE; 181 appends[appendx].s = cp->t; 182 appends[appendx].len = strlen(cp->t); 183 appendx++; 184 break; 185 case 's': 186 sdone |= substitute(cp); 187 break; 188 case 't': 189 if (sdone) { 190 sdone = 0; 191 cp = cp->u.c; 192 goto redirect; 193 } 194 break; 195 case 'w': 196 if (pd) 197 break; 198 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 199 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 200 DEFFILEMODE)) == -1) 201 err(FATAL, "%s: %s\n", 202 cp->t, strerror(errno)); 203 iov[0].iov_base = ps; 204 iov[0].iov_len = psl; 205 if (writev(cp->u.fd, iov, 2) != psl + 1) 206 err(FATAL, "%s: %s\n", 207 cp->t, strerror(errno)); 208 break; 209 case 'x': 210 if (hs == NULL) 211 cspace(&HS, "", 0, REPLACE); 212 tspace = PS; 213 PS = HS; 214 HS = tspace; 215 break; 216 case 'y': 217 if (pd) 218 break; 219 for (p = ps, len = psl; len--; ++p) 220 *p = cp->u.y[*p]; 221 break; 222 case ':': 223 case '}': 224 break; 225 case '=': 226 (void)printf("%lu\n", linenum); 227 } 228 cp = cp->next; 229 } /* for all cp */ 230 231 new: if (!nflag && !pd) 232 OUT(ps) 233 flush_appends(); 234 } /* for all lines */ 235 } 236 237 /* 238 * TRUE if the address passed matches the current program state 239 * (lastline, linenumber, ps). 240 */ 241 #define MATCH(a) \ 242 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ 243 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline 244 245 /* 246 * Return TRUE if the command applies to the current line. Sets the inrange 247 * flag to process ranges. Interprets the non-select (``!'') flag. 248 */ 249 static inline int 250 applies(cp) 251 struct s_command *cp; 252 { 253 int r; 254 255 lastaddr = 0; 256 if (cp->a1 == NULL && cp->a2 == NULL) 257 r = 1; 258 else if (cp->a2) 259 if (cp->inrange) { 260 if (MATCH(cp->a2)) { 261 cp->inrange = 0; 262 lastaddr = 1; 263 } 264 r = 1; 265 } else if (MATCH(cp->a1)) { 266 /* 267 * If the second address is a number less than or 268 * equal to the line number first selected, only 269 * one line shall be selected. 270 * -- POSIX 1003.2 271 */ 272 if (cp->a2->type == AT_LINE && 273 linenum >= cp->a2->u.l) 274 lastaddr = 1; 275 else 276 cp->inrange = 1; 277 r = 1; 278 } else 279 r = 0; 280 else 281 r = MATCH(cp->a1); 282 return (cp->nonsel ? ! r : r); 283 } 284 285 /* 286 * substitute -- 287 * Do substitutions in the pattern space. Currently, we build a 288 * copy of the new pattern space in the substitute space structure 289 * and then swap them. 290 */ 291 static int 292 substitute(cp) 293 struct s_command *cp; 294 { 295 SPACE tspace; 296 regex_t *re; 297 size_t re_off, slen; 298 int n; 299 char *s; 300 301 s = ps; 302 re = cp->u.s->re; 303 if (re == NULL) { 304 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { 305 linenum = cp->u.s->linenum; 306 err(COMPILE, "\\%d not defined in the RE", 307 cp->u.s->maxbref); 308 } 309 } 310 if (!regexec_e(re, s, 0, 0, psl)) 311 return (0); 312 313 SS.len = 0; /* Clean substitute space. */ 314 slen = psl; 315 n = cp->u.s->n; 316 switch (n) { 317 case 0: /* Global */ 318 do { 319 /* Locate start of replaced string. */ 320 re_off = match[0].rm_so; 321 /* Copy leading retained string. */ 322 cspace(&SS, s, re_off, APPEND); 323 /* Add in regular expression. */ 324 regsub(&SS, s, cp->u.s->new); 325 /* Move past this match. */ 326 s += match[0].rm_eo; 327 slen -= match[0].rm_eo; 328 } while(regexec_e(re, s, REG_NOTBOL, 0, slen)); 329 /* Copy trailing retained string. */ 330 cspace(&SS, s, slen, APPEND); 331 break; 332 default: /* Nth occurrence */ 333 while (--n) { 334 s += match[0].rm_eo; 335 slen -= match[0].rm_eo; 336 if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) 337 return (0); 338 } 339 /* FALLTHROUGH */ 340 case 1: /* 1st occurrence */ 341 /* Locate start of replaced string. */ 342 re_off = match[0].rm_so + (s - ps); 343 /* Copy leading retained string. */ 344 cspace(&SS, ps, re_off, APPEND); 345 /* Add in regular expression. */ 346 regsub(&SS, s, cp->u.s->new); 347 /* Copy trailing retained string. */ 348 s += match[0].rm_eo; 349 slen -= match[0].rm_eo; 350 cspace(&SS, s, slen, APPEND); 351 break; 352 } 353 354 /* 355 * Swap the substitute space and the pattern space, and make sure 356 * that any leftover pointers into stdio memory get lost. 357 */ 358 tspace = PS; 359 PS = SS; 360 SS = tspace; 361 SS.space = SS.back; 362 363 /* Handle the 'p' flag. */ 364 if (cp->u.s->p) 365 OUT(ps) 366 367 /* Handle the 'w' flag. */ 368 if (cp->u.s->wfile && !pd) { 369 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 370 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 371 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); 372 iov[0].iov_base = ps; 373 iov[0].iov_len = psl; 374 if (writev(cp->u.s->wfd, iov, 2) != psl + 1) 375 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); 376 } 377 return (1); 378 } 379 380 /* 381 * Flush append requests. Always called before reading a line, 382 * therefore it also resets the substitution done (sdone) flag. 383 */ 384 static void 385 flush_appends() 386 { 387 FILE *f; 388 int count, i; 389 char buf[8 * 1024]; 390 391 for (i = 0; i < appendx; i++) 392 switch (appends[i].type) { 393 case AP_STRING: 394 fwrite(appends[i].s, sizeof(char), appends[i].len, 395 stdout); 396 break; 397 case AP_FILE: 398 /* 399 * Read files probably shouldn't be cached. Since 400 * it's not an error to read a non-existent file, 401 * it's possible that another program is interacting 402 * with the sed script through the file system. It 403 * would be truly bizarre, but possible. It's probably 404 * not that big a performance win, anyhow. 405 */ 406 if ((f = fopen(appends[i].s, "r")) == NULL) 407 break; 408 while (count = fread(buf, sizeof(char), sizeof(buf), 409 f)) 410 (void)fwrite(buf, sizeof(char), count, stdout); 411 (void)fclose(f); 412 break; 413 } 414 if (ferror(stdout)) 415 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 416 appendx = sdone = 0; 417 } 418 419 static void 420 lputs(s) 421 register char *s; 422 { 423 register int count; 424 register char *escapes, *p; 425 struct winsize win; 426 static int termwidth = -1; 427 428 if (termwidth == -1) 429 if (p = getenv("COLUMNS")) 430 termwidth = atoi(p); 431 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 432 win.ws_col > 0) 433 termwidth = win.ws_col; 434 else 435 termwidth = 60; 436 437 for (count = 0; *s; ++s) { 438 if (count >= termwidth) { 439 (void)printf("\\\n"); 440 count = 0; 441 } 442 if (isascii(*s) && isprint(*s) && *s != '\\') { 443 (void)putchar(*s); 444 count++; 445 } else { 446 escapes = "\\\a\b\f\n\r\t\v"; 447 (void)putchar('\\'); 448 if (p = strchr(escapes, *s)) { 449 (void)putchar("\\abfnrtv"[p - escapes]); 450 count += 2; 451 } else { 452 (void)printf("%03o", (u_char)*s); 453 count += 4; 454 } 455 } 456 } 457 (void)putchar('$'); 458 (void)putchar('\n'); 459 if (ferror(stdout)) 460 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 461 } 462 463 static inline int 464 regexec_e(preg, string, eflags, nomatch, slen) 465 regex_t *preg; 466 const char *string; 467 int eflags, nomatch; 468 size_t slen; 469 { 470 int eval; 471 472 /* So we can work with binary files */ 473 startend.rm_so = 0; 474 startend.rm_eo = slen; 475 match[0] = startend; 476 477 478 eflags |= REG_STARTEND; 479 480 if (preg == NULL) { 481 if (defpreg == NULL) 482 err(FATAL, "first RE may not be empty"); 483 } else 484 defpreg = preg; 485 486 eval = regexec(defpreg, string, 487 nomatch ? 0 : maxnsub + 1, match, eflags); 488 switch(eval) { 489 case 0: 490 return (1); 491 case REG_NOMATCH: 492 return (0); 493 } 494 err(FATAL, "RE error: %s", strregerror(eval, defpreg)); 495 /* NOTREACHED */ 496 } 497 498 /* 499 * regsub - perform substitutions after a regexp match 500 * Based on a routine by Henry Spencer 501 */ 502 static void 503 regsub(sp, string, src) 504 SPACE *sp; 505 char *string, *src; 506 { 507 register int len, no; 508 register char c, *dst; 509 510 #define NEEDSP(reqlen) \ 511 if (sp->len >= sp->blen - (reqlen) - 1) { \ 512 sp->blen += (reqlen) + 1024; \ 513 sp->space = sp->back = xrealloc(sp->back, sp->blen); \ 514 dst = sp->space + sp->len; \ 515 } 516 517 dst = sp->space + sp->len; 518 while ((c = *src++) != '\0') { 519 if (c == '&') 520 no = 0; 521 else if (c == '\\' && isdigit(*src)) 522 no = *src++ - '0'; 523 else 524 no = -1; 525 if (no < 0) { /* Ordinary character. */ 526 if (c == '\\' && (*src == '\\' || *src == '&')) 527 c = *src++; 528 NEEDSP(1); 529 *dst++ = c; 530 ++sp->len; 531 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 532 len = match[no].rm_eo - match[no].rm_so; 533 NEEDSP(len); 534 memmove(dst, string + match[no].rm_so, len); 535 dst += len; 536 sp->len += len; 537 } 538 } 539 NEEDSP(1); 540 *dst = '\0'; 541 } 542 543 /* 544 * aspace -- 545 * Append the source space to the destination space, allocating new 546 * space as necessary. 547 */ 548 void 549 cspace(sp, p, len, spflag) 550 SPACE *sp; 551 char *p; 552 size_t len; 553 enum e_spflag spflag; 554 { 555 size_t tlen; 556 557 /* 558 * Make sure SPACE has enough memory and ramp up quickly. Appends 559 * need two extra bytes, one for the newline, one for a terminating 560 * NULL. 561 */ 562 tlen = sp->len + len + (spflag == APPENDNL ? 2 : 1); 563 if (tlen > sp->blen) { 564 sp->blen = tlen + 1024; 565 sp->space = sp->back = xrealloc(sp->back, sp->blen); 566 } 567 568 if (spflag == APPENDNL) 569 sp->space[sp->len++] = '\n'; 570 else if (spflag == REPLACE) 571 sp->len = 0; 572 573 memmove(sp->space + sp->len, p, len); 574 575 sp->space[sp->len += len] = '\0'; 576 } 577 578 /* 579 * Close all cached opened files and report any errors 580 */ 581 void 582 cfclose(cp, end) 583 register struct s_command *cp, *end; 584 { 585 586 for (; cp != end; cp = cp->next) 587 switch(cp->code) { 588 case 's': 589 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 590 err(FATAL, 591 "%s: %s", cp->u.s->wfile, strerror(errno)); 592 cp->u.s->wfd = -1; 593 break; 594 case 'w': 595 if (cp->u.fd != -1 && close(cp->u.fd)) 596 err(FATAL, "%s: %s", cp->t, strerror(errno)); 597 cp->u.fd = -1; 598 break; 599 case '{': 600 cfclose(cp->u.c, cp->next); 601 break; 602 } 603 } 604