1 /*- 2 * Copyright (c) 1992 Diomidis Spinellis. 3 * Copyright (c) 1992, 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Diomidis Spinellis of Imperial College, University of London. 8 * 9 * %sccs.include.redist.c% 10 */ 11 12 #ifndef lint 13 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 04/20/94"; 14 #endif /* not lint */ 15 16 #include <sys/types.h> 17 #include <sys/stat.h> 18 #include <sys/ioctl.h> 19 #include <sys/uio.h> 20 21 #include <ctype.h> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <limits.h> 25 #include <regex.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <unistd.h> 30 31 #include "defs.h" 32 #include "extern.h" 33 34 static SPACE HS, PS, SS; 35 #define pd PS.deleted 36 #define ps PS.space 37 #define psl PS.len 38 #define hs HS.space 39 #define hsl HS.len 40 41 static inline int applies __P((struct s_command *)); 42 static void flush_appends __P((void)); 43 static void lputs __P((char *)); 44 static inline int regexec_e __P((regex_t *, const char *, int, int, size_t)); 45 static void regsub __P((SPACE *, char *, char *)); 46 static int substitute __P((struct s_command *)); 47 48 struct s_appends *appends; /* Array of pointers to strings to append. */ 49 static int appendx; /* Index into appends array. */ 50 int appendnum; /* Size of appends array. */ 51 52 static int lastaddr; /* Set by applies if last address of a range. */ 53 static int sdone; /* If any substitutes since last line input. */ 54 /* Iov structure for 'w' commands. */ 55 static regex_t *defpreg; 56 size_t maxnsub; 57 regmatch_t *match; 58 59 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); } 60 61 void 62 process() 63 { 64 struct s_command *cp; 65 SPACE tspace; 66 size_t len; 67 char oldc, *p; 68 69 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 70 pd = 0; 71 cp = prog; 72 redirect: 73 while (cp != NULL) { 74 if (!applies(cp)) { 75 cp = cp->next; 76 continue; 77 } 78 switch (cp->code) { 79 case '{': 80 cp = cp->u.c; 81 goto redirect; 82 case 'a': 83 if (appendx >= appendnum) 84 appends = xrealloc(appends, 85 sizeof(struct s_appends) * 86 (appendnum *= 2)); 87 appends[appendx].type = AP_STRING; 88 appends[appendx].s = cp->t; 89 appends[appendx].len = strlen(cp->t); 90 appendx++; 91 break; 92 case 'b': 93 cp = cp->u.c; 94 goto redirect; 95 case 'c': 96 pd = 1; 97 psl = 0; 98 if (cp->a2 == NULL || lastaddr) 99 (void)printf("%s", cp->t); 100 break; 101 case 'd': 102 pd = 1; 103 goto new; 104 case 'D': 105 if (pd) 106 goto new; 107 if ((p = memchr(ps, '\n', psl)) == NULL) 108 pd = 1; 109 else { 110 psl -= (p - ps) + 1; 111 memmove(ps, p + 1, psl); 112 } 113 goto new; 114 case 'g': 115 cspace(&PS, hs, hsl, REPLACE); 116 break; 117 case 'G': 118 cspace(&PS, hs, hsl, 0); 119 break; 120 case 'h': 121 cspace(&HS, ps, psl, REPLACE); 122 break; 123 case 'H': 124 cspace(&HS, ps, psl, 0); 125 break; 126 case 'i': 127 (void)printf("%s", cp->t); 128 break; 129 case 'l': 130 lputs(ps); 131 break; 132 case 'n': 133 if (!nflag && !pd) 134 OUT(ps) 135 flush_appends(); 136 if (!mf_fgets(&PS, REPLACE)) 137 exit(0); 138 pd = 0; 139 break; 140 case 'N': 141 flush_appends(); 142 if (!mf_fgets(&PS, 0)) { 143 if (!nflag && !pd) 144 OUT(ps) 145 exit(0); 146 } 147 break; 148 case 'p': 149 if (pd) 150 break; 151 OUT(ps) 152 break; 153 case 'P': 154 if (pd) 155 break; 156 if ((p = memchr(ps, '\n', psl)) != NULL) { 157 oldc = *p; 158 *p = '\0'; 159 } 160 OUT(ps) 161 if (p != NULL) 162 *p = oldc; 163 break; 164 case 'q': 165 if (!nflag && !pd) 166 OUT(ps) 167 flush_appends(); 168 exit(0); 169 case 'r': 170 if (appendx >= appendnum) 171 appends = xrealloc(appends, 172 sizeof(struct s_appends) * 173 (appendnum *= 2)); 174 appends[appendx].type = AP_FILE; 175 appends[appendx].s = cp->t; 176 appends[appendx].len = strlen(cp->t); 177 appendx++; 178 break; 179 case 's': 180 sdone |= substitute(cp); 181 break; 182 case 't': 183 if (sdone) { 184 sdone = 0; 185 cp = cp->u.c; 186 goto redirect; 187 } 188 break; 189 case 'w': 190 if (pd) 191 break; 192 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 193 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 194 DEFFILEMODE)) == -1) 195 err(FATAL, "%s: %s\n", 196 cp->t, strerror(errno)); 197 if (write(cp->u.fd, ps, psl) != psl) 198 err(FATAL, "%s: %s\n", 199 cp->t, strerror(errno)); 200 break; 201 case 'x': 202 if (hs == NULL) 203 cspace(&HS, "", 0, REPLACE); 204 tspace = PS; 205 PS = HS; 206 HS = tspace; 207 break; 208 case 'y': 209 if (pd) 210 break; 211 for (p = ps, len = psl; --len; ++p) 212 *p = cp->u.y[*p]; 213 break; 214 case ':': 215 case '}': 216 break; 217 case '=': 218 (void)printf("%lu\n", linenum); 219 } 220 cp = cp->next; 221 } /* for all cp */ 222 223 new: if (!nflag && !pd) 224 OUT(ps) 225 flush_appends(); 226 } /* for all lines */ 227 } 228 229 /* 230 * TRUE if the address passed matches the current program state 231 * (lastline, linenumber, ps). 232 */ 233 #define MATCH(a) \ 234 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \ 235 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline 236 237 /* 238 * Return TRUE if the command applies to the current line. Sets the inrange 239 * flag to process ranges. Interprets the non-select (``!'') flag. 240 */ 241 static inline int 242 applies(cp) 243 struct s_command *cp; 244 { 245 int r; 246 247 lastaddr = 0; 248 if (cp->a1 == NULL && cp->a2 == NULL) 249 r = 1; 250 else if (cp->a2) 251 if (cp->inrange) { 252 if (MATCH(cp->a2)) { 253 cp->inrange = 0; 254 lastaddr = 1; 255 } 256 r = 1; 257 } else if (MATCH(cp->a1)) { 258 /* 259 * If the second address is a number less than or 260 * equal to the line number first selected, only 261 * one line shall be selected. 262 * -- POSIX 1003.2 263 */ 264 if (cp->a2->type == AT_LINE && 265 linenum >= cp->a2->u.l) 266 lastaddr = 1; 267 else 268 cp->inrange = 1; 269 r = 1; 270 } else 271 r = 0; 272 else 273 r = MATCH(cp->a1); 274 return (cp->nonsel ? ! r : r); 275 } 276 277 /* 278 * substitute -- 279 * Do substitutions in the pattern space. Currently, we build a 280 * copy of the new pattern space in the substitute space structure 281 * and then swap them. 282 */ 283 static int 284 substitute(cp) 285 struct s_command *cp; 286 { 287 SPACE tspace; 288 regex_t *re; 289 size_t re_off, slen; 290 int lastempty, n; 291 char *s; 292 293 s = ps; 294 re = cp->u.s->re; 295 if (re == NULL) { 296 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { 297 linenum = cp->u.s->linenum; 298 err(COMPILE, "\\%d not defined in the RE", 299 cp->u.s->maxbref); 300 } 301 } 302 if (!regexec_e(re, s, 0, 0, psl)) 303 return (0); 304 305 SS.len = 0; /* Clean substitute space. */ 306 slen = psl; 307 n = cp->u.s->n; 308 lastempty = 1; 309 310 switch (n) { 311 case 0: /* Global */ 312 do { 313 if (lastempty || match[0].rm_so != match[0].rm_eo) { 314 /* Locate start of replaced string. */ 315 re_off = match[0].rm_so; 316 /* Copy leading retained string. */ 317 cspace(&SS, s, re_off, APPEND); 318 /* Add in regular expression. */ 319 regsub(&SS, s, cp->u.s->new); 320 } 321 322 /* Move past this match. */ 323 if (match[0].rm_so != match[0].rm_eo) { 324 s += match[0].rm_eo; 325 slen -= match[0].rm_eo; 326 lastempty = 0; 327 } else { 328 if (match[0].rm_so == 0) 329 cspace(&SS, 330 s, match[0].rm_so + 1, APPEND); 331 else 332 cspace(&SS, 333 s + match[0].rm_so, 1, APPEND); 334 s += match[0].rm_so + 1; 335 slen -= match[0].rm_so + 1; 336 lastempty = 1; 337 } 338 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); 339 /* Copy trailing retained string. */ 340 if (slen > 0) 341 cspace(&SS, s, slen, APPEND); 342 break; 343 default: /* Nth occurrence */ 344 while (--n) { 345 s += match[0].rm_eo; 346 slen -= match[0].rm_eo; 347 if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) 348 return (0); 349 } 350 /* FALLTHROUGH */ 351 case 1: /* 1st occurrence */ 352 /* Locate start of replaced string. */ 353 re_off = match[0].rm_so + (s - ps); 354 /* Copy leading retained string. */ 355 cspace(&SS, ps, re_off, APPEND); 356 /* Add in regular expression. */ 357 regsub(&SS, s, cp->u.s->new); 358 /* Copy trailing retained string. */ 359 s += match[0].rm_eo; 360 slen -= match[0].rm_eo; 361 cspace(&SS, s, slen, APPEND); 362 break; 363 } 364 365 /* 366 * Swap the substitute space and the pattern space, and make sure 367 * that any leftover pointers into stdio memory get lost. 368 */ 369 tspace = PS; 370 PS = SS; 371 SS = tspace; 372 SS.space = SS.back; 373 374 /* Handle the 'p' flag. */ 375 if (cp->u.s->p) 376 OUT(ps) 377 378 /* Handle the 'w' flag. */ 379 if (cp->u.s->wfile && !pd) { 380 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 381 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 382 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); 383 if (write(cp->u.s->wfd, ps, psl) != psl) 384 err(FATAL, "%s: %s\n", cp->u.s->wfile, strerror(errno)); 385 } 386 return (1); 387 } 388 389 /* 390 * Flush append requests. Always called before reading a line, 391 * therefore it also resets the substitution done (sdone) flag. 392 */ 393 static void 394 flush_appends() 395 { 396 FILE *f; 397 int count, i; 398 char buf[8 * 1024]; 399 400 for (i = 0; i < appendx; i++) 401 switch (appends[i].type) { 402 case AP_STRING: 403 fwrite(appends[i].s, sizeof(char), appends[i].len, 404 stdout); 405 break; 406 case AP_FILE: 407 /* 408 * Read files probably shouldn't be cached. Since 409 * it's not an error to read a non-existent file, 410 * it's possible that another program is interacting 411 * with the sed script through the file system. It 412 * would be truly bizarre, but possible. It's probably 413 * not that big a performance win, anyhow. 414 */ 415 if ((f = fopen(appends[i].s, "r")) == NULL) 416 break; 417 while (count = fread(buf, sizeof(char), sizeof(buf), f)) 418 (void)fwrite(buf, sizeof(char), count, stdout); 419 (void)fclose(f); 420 break; 421 } 422 if (ferror(stdout)) 423 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 424 appendx = sdone = 0; 425 } 426 427 static void 428 lputs(s) 429 register char *s; 430 { 431 register int count; 432 register char *escapes, *p; 433 struct winsize win; 434 static int termwidth = -1; 435 436 if (termwidth == -1) 437 if (p = getenv("COLUMNS")) 438 termwidth = atoi(p); 439 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 && 440 win.ws_col > 0) 441 termwidth = win.ws_col; 442 else 443 termwidth = 60; 444 445 for (count = 0; *s; ++s) { 446 if (count >= termwidth) { 447 (void)printf("\\\n"); 448 count = 0; 449 } 450 if (isascii(*s) && isprint(*s) && *s != '\\') { 451 (void)putchar(*s); 452 count++; 453 } else { 454 escapes = "\\\a\b\f\n\r\t\v"; 455 (void)putchar('\\'); 456 if (p = strchr(escapes, *s)) { 457 (void)putchar("\\abfnrtv"[p - escapes]); 458 count += 2; 459 } else { 460 (void)printf("%03o", *(u_char *)s); 461 count += 4; 462 } 463 } 464 } 465 (void)putchar('$'); 466 (void)putchar('\n'); 467 if (ferror(stdout)) 468 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO)); 469 } 470 471 static inline int 472 regexec_e(preg, string, eflags, nomatch, slen) 473 regex_t *preg; 474 const char *string; 475 int eflags, nomatch; 476 size_t slen; 477 { 478 int eval; 479 480 if (preg == NULL) { 481 if (defpreg == NULL) 482 err(FATAL, "first RE may not be empty"); 483 } else 484 defpreg = preg; 485 486 /* Set anchors, discounting trailing newline (if any). */ 487 if (slen > 0 && string[slen - 1] == '\n') 488 slen--; 489 match[0].rm_so = 0; 490 match[0].rm_eo = slen; 491 492 eval = regexec(defpreg, string, 493 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); 494 switch(eval) { 495 case 0: 496 return (1); 497 case REG_NOMATCH: 498 return (0); 499 } 500 err(FATAL, "RE error: %s", strregerror(eval, defpreg)); 501 /* NOTREACHED */ 502 } 503 504 /* 505 * regsub - perform substitutions after a regexp match 506 * Based on a routine by Henry Spencer 507 */ 508 static void 509 regsub(sp, string, src) 510 SPACE *sp; 511 char *string, *src; 512 { 513 register int len, no; 514 register char c, *dst; 515 516 #define NEEDSP(reqlen) \ 517 if (sp->len >= sp->blen - (reqlen) - 1) { \ 518 sp->blen += (reqlen) + 1024; \ 519 sp->space = sp->back = xrealloc(sp->back, sp->blen); \ 520 dst = sp->space + sp->len; \ 521 } 522 523 dst = sp->space + sp->len; 524 while ((c = *src++) != '\0') { 525 if (c == '&') 526 no = 0; 527 else if (c == '\\' && isdigit(*src)) 528 no = *src++ - '0'; 529 else 530 no = -1; 531 if (no < 0) { /* Ordinary character. */ 532 if (c == '\\' && (*src == '\\' || *src == '&')) 533 c = *src++; 534 NEEDSP(1); 535 *dst++ = c; 536 ++sp->len; 537 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 538 len = match[no].rm_eo - match[no].rm_so; 539 NEEDSP(len); 540 memmove(dst, string + match[no].rm_so, len); 541 dst += len; 542 sp->len += len; 543 } 544 } 545 NEEDSP(1); 546 *dst = '\0'; 547 } 548 549 /* 550 * aspace -- 551 * Append the source space to the destination space, allocating new 552 * space as necessary. 553 */ 554 void 555 cspace(sp, p, len, spflag) 556 SPACE *sp; 557 char *p; 558 size_t len; 559 enum e_spflag spflag; 560 { 561 size_t tlen; 562 563 /* Make sure SPACE has enough memory and ramp up quickly. */ 564 tlen = sp->len + len + 1; 565 if (tlen > sp->blen) { 566 sp->blen = tlen + 1024; 567 sp->space = sp->back = xrealloc(sp->back, sp->blen); 568 } 569 570 if (spflag == REPLACE) 571 sp->len = 0; 572 573 memmove(sp->space + sp->len, p, len); 574 575 sp->space[sp->len += len] = '\0'; 576 } 577 578 /* 579 * Close all cached opened files and report any errors 580 */ 581 void 582 cfclose(cp, end) 583 register struct s_command *cp, *end; 584 { 585 586 for (; cp != end; cp = cp->next) 587 switch(cp->code) { 588 case 's': 589 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 590 err(FATAL, 591 "%s: %s", cp->u.s->wfile, strerror(errno)); 592 cp->u.s->wfd = -1; 593 break; 594 case 'w': 595 if (cp->u.fd != -1 && close(cp->u.fd)) 596 err(FATAL, "%s: %s", cp->t, strerror(errno)); 597 cp->u.fd = -1; 598 break; 599 case '{': 600 cfclose(cp->u.c, cp->next); 601 break; 602 } 603 } 604