1 /*- 2 * Copyright (c) 1992 Diomidis Spinellis. 3 * Copyright (c) 1992 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Diomidis Spinellis of Imperial College, University of London. 8 * 9 * %sccs.include.redist.c% 10 */ 11 12 #ifndef lint 13 static char sccsid[] = "@(#)process.c 5.8 (Berkeley) 08/30/92"; 14 #endif /* not lint */ 15 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <sys/ioctl.h> 19 #include <sys/uio.h> 20 21 #include <ctype.h> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <limits.h> 25 #include <regex.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "defs.h" 32 #include "extern.h" 33 34 static SPACE HS, PS, SS; 35 #define pd PS.deleted 36 #define ps PS.space 37 #define psl PS.len 38 #define hs HS.space 39 #define hsl HS.len 40 41 static inline int applies __P((struct s_command *)); 42 static void flush_appends __P((void)); 43 static void lputs __P((char *)); 44 static inline int regexec_e __P((regex_t *, const char *, int, int)); 45 static void regsub __P((SPACE *, char *, char *)); 46 static int substitute __P((struct s_command *)); 47 48 struct s_appends *appends; /* Array of pointers to strings to append. */ 49 static int appendx; /* Index into appends array. */ 50 int appendnum; /* Size of appends array. */ 51 52 static int lastaddr; /* Set by applies if last address of a range. */ 53 static int sdone; /* If any substitutes since last line input. */ 54 /* Iov structure for 'w' commands. */ 55 static struct iovec iov[2] = { NULL, 0, "\n", 1 }; 56 57 static regex_t *defpreg; 58 size_t maxnsub; 59 regmatch_t *match; 60 61 void 62 process() 63 { 64 struct s_command *cp; 65 SPACE tspace; 66 size_t len; 67 int r; 68 char oldc, *p; 69 70 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 71 pd = 0; 72 cp = prog; 73 redirect: 74 while (cp != NULL) { 75 if (!applies(cp)) { 76 cp = cp->next; 77 continue; 78 } 79 switch (cp->code) { 80 case '{': 81 cp = cp->u.c; 82 goto redirect; 83 case 'a': 84 if (appendx >= appendnum) 85 appends = xrealloc(appends, 86 sizeof(struct s_appends) * 87 (appendnum *= 2)); 88 appends[appendx].type = AP_STRING; 89 appends[appendx].s = cp->t; 90 appendx++; 91 break; 92 case 'b': 93 cp = cp->u.c; 94 goto redirect; 95 case 'c': 96 pd = 1; 97 psl = 0; 98 if (cp->a2 == NULL || lastaddr) 99 (void)printf("%s", cp->t); 100 break; 101 case 'd': 102 pd = 1; 103 goto new; 104 case 'D': 105 if (pd) 106 goto new; 107 if ((p = strchr(ps, '\n')) == NULL) 108 pd = 1; 109 else { 110 psl -= (p - ps) - 1; 111 memmove(ps, p + 1, psl); 112 } 113 goto new; 114 case 'g': 115 cspace(&PS, hs, hsl, REPLACE); 116 break; 117 case 'G': 118 cspace(&PS, hs, hsl, APPENDNL); 119 break; 120 case 'h': 121 cspace(&HS, ps, psl, REPLACE); 122 break; 123 case 'H': 124 cspace(&HS, ps, psl, APPENDNL); 125 break; 126 case 'i': 127 (void)printf("%s", cp->t); 128 break; 129 case 'l': 130 lputs(ps); 131 break; 132 case 'n': 133 if (!nflag && !pd) 134 (void)printf("%s\n", ps); 135 flush_appends(); 136 r = mf_fgets(&PS, REPLACE); 137 #ifdef HISTORIC_PRACTICE 138 if (!r) 139 exit(0); 140 #endif 141 pd = 0; 142 break; 143 case 'N': 144 flush_appends(); 145 if (!mf_fgets(&PS, APPENDNL)) { 146 if (!nflag && !pd) 147 (void)printf("%s\n", ps); 148 exit(0); 149 } 150 break; 151 case 'p': 152 if (pd) 153 break; 154 (void)printf("%s\n", ps); 155 break; 156 case 'P': 157 if (pd) 158 break; 159 if ((p = strchr(ps, '\n')) != NULL) { 160 oldc = *p; 161 *p = '\0'; 162 } 163 (void)printf("%s\n", ps); 164 if (p != NULL) 165 *p = oldc; 166 break; 167 case 'q': 168 if (!nflag && !pd) 169 (void)printf("%s\n", ps); 170 flush_appends(); 171 exit(0); 172 case 'r': 173 if (appendx >= appendnum) 174 appends = xrealloc(appends, 175 sizeof(struct s_appends) * 176 (appendnum *= 2)); 177 appends[appendx].type = AP_FILE; 178 appends[appendx].s = cp->t; 179 appendx++; 180 break; 181 case 's': 182 sdone = substitute(cp); 183 break; 184 case 't': 185 if (sdone) { 186 sdone = 0; 187 cp = cp->u.c; 188 goto redirect; 189 } 190 break; 191 case 'w': 192 if (pd) 193 break; 194 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 195 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 196 DEFFILEMODE)) == -1) 197 err(FATAL, "%s: %s\n", 198 cp->t, strerror(errno)); 199 iov[0].iov_base = ps; 200 iov[0].iov_len = psl; 201 if (writev(cp->u.fd, iov, 2) != psl + 1) 202 err(FATAL, "%s: %s\n", 203 cp->t, strerror(errno)); 204 break; 205 case 'x': 206 tspace = PS; 207 PS = HS; 208 HS = tspace; 209 break; 210 case 'y': 211 if (pd) 212 break; 213 for (p = ps, len = psl; len--; ++p) 214 *p = cp->u.y[*p]; 215 break; 216 case ':': 217 case '}': 218 break; 219 case '=': 220 (void)printf("%lu\n", linenum); 221 } 222 cp = cp->next; 223 } /* for all cp */ 224 225 new: if (!nflag && !pd) 226 (void)printf("%s\n", ps); 227 flush_appends(); 228 } /* for all lines */ 229 } 230 231 /* 232 * TRUE if the address passed matches the current program state 233 * (lastline, linenumber, ps). 234 */ 235 #define MATCH(a) \ 236 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1) : \ 237 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline 238 239 /* 240 * Return TRUE if the command applies to the current line. Sets the inrange 241 * flag to process ranges. Interprets the non-select (``!'') flag. 242 */ 243 static inline int 244 applies(cp) 245 struct s_command *cp; 246 { 247 int r; 248 249 lastaddr = 0; 250 if (cp->a1 == NULL && cp->a2 == NULL) 251 r = 1; 252 else if (cp->a2) 253 if (cp->inrange) { 254 if (MATCH(cp->a2)) { 255 cp->inrange = 0; 256 lastaddr = 1; 257 } 258 r = 1; 259 } else if (MATCH(cp->a1)) { 260 /* 261 * If the second address is a number less than or 262 * equal to the line number first selected, only 263 * one line shall be selected. 264 * -- POSIX 1003.2 265 */ 266 if (cp->a2->type == AT_LINE && 267 linenum >= cp->a2->u.l) 268 lastaddr = 1; 269 else 270 cp->inrange = 1; 271 r = 1; 272 } else 273 r = 0; 274 else 275 r = MATCH(cp->a1); 276 return (cp->nonsel ? ! r : r); 277 } 278 279 /* 280 * substitute -- 281 * Do substitutions in the pattern space. Currently, we build a 282 * copy of the new pattern space in the substitute space structure 283 * and then swap them. 284 */ 285 static int 286 substitute(cp) 287 struct s_command *cp; 288 { 289 SPACE tspace; 290 regex_t *re; 291 size_t re_off; 292 int n; 293 char *endp, *s; 294 295 s = ps; 296 re = cp->u.s->re; 297 if (re == NULL) { 298 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { 299 linenum = cp->u.s->linenum; 300 err(COMPILE, "\\%d not defined in the RE", 301 cp->u.s->maxbref); 302 } 303 } 304 if (!regexec_e(re, s, 0, 0)) 305 return (0); 306 307 SS.len = 0; /* Clean substitute space. */ 308 n = cp->u.s->n; 309 switch (n) { 310 case 0: /* Global */ 311 do { 312 /* Locate start of replaced string. */ 313 re_off = match[0].rm_so; 314 /* Locate end of replaced string + 1. */ 315 endp = s + match[0].rm_eo; 316 /* Copy leading retained string. */ 317 cspace(&SS, s, re_off, APPEND); 318 /* Add in regular expression. */ 319 regsub(&SS, s, cp->u.s->new); 320 /* Move past this match. */ 321 s += match[0].rm_eo; 322 } while(regexec_e(re, s, REG_NOTBOL, 0)); 323 /* Copy trailing retained string. */ 324 cspace(&SS, s, strlen(s), APPEND); 325 break; 326 default: /* Nth occurrence */ 327 while (--n) { 328 s += match[0].rm_eo; 329 if (!regexec_e(re, s, REG_NOTBOL, 0)) 330 return (0); 331 } 332 /* FALLTHROUGH */ 333 case 1: /* 1st occurrence */ 334 /* Locate start of replaced string. */ 335 re_off = match[0].rm_so + (s - ps); 336 /* Copy leading retained string. */ 337 cspace(&SS, ps, re_off, APPEND); 338 /* Add in regular expression. */ 339 regsub(&SS, s, cp->u.s->new); 340 /* Copy trailing retained string. */ 341 s += match[0].rm_eo; 342 cspace(&SS, s, strlen(s), APPEND); 343 break; 344 } 345 346 /* 347 * Swap the substitute space and the pattern space, and make sure 348 * that any leftover pointers into stdio memory get lost. 349 */ 350 tspace = PS; 351 PS = SS; 352 SS = tspace; 353 SS.space = SS.back; 354 355 /* Handle the 'p' flag. */ 356 if (cp->u.s->p) 357 (void)printf("%s\n", ps); 358 359 /* Handle the 'w' flag. */ 360 if (cp->u.s->wfile && !pd) { 361 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 362 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 363 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); 364 iov[0].iov_base = ps; 365 iov[0].iov_len = psl; 366 if (writev(cp->u.s->wfd, iov, 2) != psl + 1) 367 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); 368 } 369 return (1); 370 } 371 372 /* 373 * Flush append requests. Always called before reading a line, 374 * therefore it also resets the substitution done (sdone) flag. 375 */ 376 static void 377 flush_appends() 378 { 379 FILE *f; 380 int count, i; 381 char buf[8 * 1024]; 382 383 for (i = 0; i < appendx; i++) 384 switch (appends[i].type) { 385 case AP_STRING: 386 (void)printf("%s", appends[i].s); 387 break; 388 case AP_FILE: 389 /* 390 * Read files probably shouldn't be cached. Since 391 * it's not an error to read a non-existent file, 392 * it's possible that another program is interacting 393 * with the sed script through the file system. It 394 * would be truly bizarre, but possible. It's probably 395 * not that big a performance win, anyhow. 396 */ 397 if ((f = fopen(appends[i].s, "r")) == NULL) 398 break; 399 while (count = fread(buf, 1, sizeof(buf), f)) 400 (void)fwrite(buf, 1, count, stdout); 401 (void)fclose(f); 402 break; 403 } 404 if (ferror(stdout)) 405 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 406 appendx = 0; 407 sdone = 0; 408 } 409 410 static void 411 lputs(s) 412 register char *s; 413 { 414 register int count; 415 register char *escapes, *p; 416 struct winsize win; 417 static int termwidth = -1; 418 419 if (termwidth == -1) 420 if (p = getenv("COLUMNS")) 421 termwidth = atoi(p); 422 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 423 win.ws_col > 0) 424 termwidth = win.ws_col; 425 else 426 termwidth = 60; 427 428 for (count = 0; *s; ++s) { 429 if (count >= termwidth) { 430 (void)printf("\\\n"); 431 count = 0; 432 } 433 if (isascii(*s) && isprint(*s) && *s != '\\') { 434 (void)putchar(*s); 435 count++; 436 } else { 437 escapes = "\\\a\b\f\n\r\t\v"; 438 (void)putchar('\\'); 439 if (p = strchr(escapes, *s)) { 440 (void)putchar("\\abfnrtv"[p - escapes]); 441 count += 2; 442 } else { 443 (void)printf("%03o", (u_char)*s); 444 count += 4; 445 } 446 } 447 } 448 (void)putchar('$'); 449 (void)putchar('\n'); 450 if (ferror(stdout)) 451 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 452 } 453 454 static inline int 455 regexec_e(preg, string, eflags, nomatch) 456 regex_t *preg; 457 const char *string; 458 int eflags, nomatch; 459 { 460 int eval; 461 462 if (preg == NULL) { 463 if (defpreg == NULL) 464 err(FATAL, "first RE may not be empty"); 465 } else 466 defpreg = preg; 467 468 eval = regexec(defpreg, string, 469 nomatch ? 0 : maxnsub + 1, match, eflags); 470 switch(eval) { 471 case 0: 472 return (1); 473 case REG_NOMATCH: 474 return (0); 475 } 476 err(FATAL, "RE error: %s", strregerror(eval, defpreg)); 477 /* NOTREACHED */ 478 } 479 480 /* 481 * regsub - perform substitutions after a regexp match 482 * Based on a routine by Henry Spencer 483 */ 484 static void 485 regsub(sp, string, src) 486 SPACE *sp; 487 char *string, *src; 488 { 489 register int len, no; 490 register char c, *dst; 491 492 #define NEEDSP(reqlen) \ 493 if (sp->len >= sp->blen - (reqlen) - 1) { \ 494 sp->blen += (reqlen) + 1024; \ 495 sp->space = sp->back = xrealloc(sp->back, sp->blen); \ 496 dst = sp->space + sp->len; \ 497 } 498 499 dst = sp->space + sp->len; 500 while ((c = *src++) != '\0') { 501 if (c == '&') 502 no = 0; 503 else if (c == '\\' && isdigit(*src)) 504 no = *src++ - '0'; 505 else 506 no = -1; 507 if (no < 0) { /* Ordinary character. */ 508 if (c == '\\' && (*src == '\\' || *src == '&')) 509 c = *src++; 510 NEEDSP(1); 511 *dst++ = c; 512 ++sp->len; 513 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 514 len = match[no].rm_eo - match[no].rm_so; 515 NEEDSP(len); 516 memmove(dst, string + match[no].rm_so, len); 517 dst += len; 518 sp->len += len; 519 } 520 } 521 NEEDSP(1); 522 *dst = '\0'; 523 } 524 525 /* 526 * aspace -- 527 * Append the source space to the destination space, allocating new 528 * space as necessary. 529 */ 530 void 531 cspace(sp, p, len, spflag) 532 SPACE *sp; 533 char *p; 534 size_t len; 535 enum e_spflag spflag; 536 { 537 size_t tlen; 538 539 /* 540 * Make sure SPACE has enough memory and ramp up quickly. Appends 541 * need two extra bytes, one for the newline, one for a terminating 542 * NULL. 543 */ 544 tlen = sp->len + len + spflag == APPENDNL ? 2 : 1; 545 if (tlen > sp->blen) { 546 sp->blen = tlen + 1024; 547 sp->space = sp->back = xrealloc(sp->back, sp->blen); 548 } 549 550 if (spflag == APPENDNL) 551 sp->space[sp->len++] = '\n'; 552 else if (spflag == REPLACE) 553 sp->len = 0; 554 555 memmove(sp->space + sp->len, p, len); 556 sp->space[sp->len += len] = '\0'; 557 } 558 559 /* 560 * Close all cached opened files and report any errors 561 */ 562 void 563 cfclose(cp, end) 564 register struct s_command *cp, *end; 565 { 566 567 for (; cp != end; cp = cp->next) 568 switch(cp->code) { 569 case 's': 570 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 571 err(FATAL, 572 "%s: %s", cp->u.s->wfile, strerror(errno)); 573 cp->u.s->wfd = -1; 574 break; 575 case 'w': 576 if (cp->u.fd != -1 && close(cp->u.fd)) 577 err(FATAL, "%s: %s", cp->t, strerror(errno)); 578 cp->u.fd = -1; 579 break; 580 case '{': 581 cfclose(cp->u.c, cp->next); 582 break; 583 } 584 } 585