1 /* Copyright (c) 1980 Regents of the University of California */ 2 static char *sccsid = "@(#)ex_re.c 5.1 08/20/80"; 3 #include "ex.h" 4 #include "ex_re.h" 5 6 /* 7 * Global, substitute and regular expressions. 8 * Very similar to ed, with some re extensions and 9 * confirmed substitute. 10 */ 11 global(k) 12 bool k; 13 { 14 register char *gp; 15 register int c; 16 register line *a1; 17 char globuf[GBSIZE], *Cwas; 18 int lines = lineDOL(); 19 int oinglobal = inglobal; 20 char *oglobp = globp; 21 22 Cwas = Command; 23 /* 24 * States of inglobal: 25 * 0: ordinary - not in a global command. 26 * 1: text coming from some buffer, not tty. 27 * 2: like 1, but the source of the buffer is a global command. 28 * Hence you're only in a global command if inglobal==2. This 29 * strange sounding convention is historically derived from 30 * everybody simulating a global command. 31 */ 32 if (inglobal==2) 33 error("Global within global@not allowed"); 34 markDOT(); 35 setall(); 36 nonzero(); 37 if (skipend()) 38 error("Global needs re|Missing regular expression for global"); 39 c = getchar(); 40 ignore(compile(c, 1)); 41 savere(scanre); 42 gp = globuf; 43 while ((c = getchar()) != '\n') { 44 switch (c) { 45 46 case EOF: 47 c = '\n'; 48 goto brkwh; 49 50 case '\\': 51 c = getchar(); 52 switch (c) { 53 54 case '\\': 55 ungetchar(c); 56 break; 57 58 case '\n': 59 break; 60 61 default: 62 *gp++ = '\\'; 63 break; 64 } 65 break; 66 } 67 *gp++ = c; 68 if (gp >= &globuf[GBSIZE - 2]) 69 error("Global command too long"); 70 } 71 brkwh: 72 ungetchar(c); 73 out: 74 newline(); 75 *gp++ = c; 76 *gp++ = 0; 77 saveall(); 78 inglobal = 2; 79 for (a1 = one; a1 <= dol; a1++) { 80 *a1 &= ~01; 81 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 82 *a1 |= 01; 83 } 84 /* should use gdelete from ed to avoid n**2 here on g/.../d */ 85 if (inopen) 86 inopen = -1; 87 for (a1 = one; a1 <= dol; a1++) { 88 if (*a1 & 01) { 89 *a1 &= ~01; 90 dot = a1; 91 globp = globuf; 92 commands(1, 1); 93 a1 = zero; 94 } 95 } 96 globp = oglobp; 97 inglobal = oinglobal; 98 endline = 1; 99 Command = Cwas; 100 netchHAD(lines); 101 setlastchar(EOF); 102 if (inopen) { 103 ungetchar(EOF); 104 inopen = 1; 105 } 106 } 107 108 bool cflag; 109 int scount, slines, stotal; 110 111 substitute(c) 112 int c; 113 { 114 register line *addr; 115 register int n; 116 int gsubf; 117 118 gsubf = compsub(c); 119 if(FIXUNDO) 120 save12(), undkind = UNDCHANGE; 121 stotal = 0; 122 slines = 0; 123 for (addr = addr1; addr <= addr2; addr++) { 124 scount = 0; 125 if (dosubcon(0, addr) == 0) 126 continue; 127 if (gsubf) { 128 #ifdef notdef 129 /* 130 * should check but loc2 is already munged. 131 * This needs a fancier check later. 132 */ 133 if (loc1 == loc2) 134 error("substitution loop"); 135 #endif 136 while (*loc2) 137 if (dosubcon(1, addr) == 0) 138 break; 139 } 140 if (scount) { 141 stotal += scount; 142 slines++; 143 putmark(addr); 144 n = append(getsub, addr); 145 addr += n; 146 addr2 += n; 147 } 148 } 149 if (stotal == 0 && !inglobal && !cflag) 150 error("Fail|Substitute pattern match failed"); 151 snote(stotal, slines); 152 return (stotal); 153 } 154 155 compsub(ch) 156 { 157 register int seof, c, uselastre; 158 static int gsubf; 159 160 if (!value(EDCOMPATIBLE)) 161 gsubf = cflag = 0; 162 uselastre = 0; 163 switch (ch) { 164 165 case 's': 166 ignore(skipwh()); 167 seof = getchar(); 168 if (endcmd(seof) || any(seof, "gcr")) { 169 ungetchar(seof); 170 goto redo; 171 } 172 if (isalpha(seof) || isdigit(seof)) 173 error("Substitute needs re|Missing regular expression for substitute"); 174 seof = compile(seof, 1); 175 uselastre = 1; 176 comprhs(seof); 177 gsubf = 0; 178 cflag = 0; 179 break; 180 181 case '~': 182 uselastre = 1; 183 /* fall into ... */ 184 case '&': 185 redo: 186 if (re.Expbuf[0] == 0) 187 error("No previous re|No previous regular expression"); 188 if (subre.Expbuf[0] == 0) 189 error("No previous substitute re|No previous substitute to repeat"); 190 break; 191 } 192 for (;;) { 193 c = getchar(); 194 switch (c) { 195 196 case 'g': 197 gsubf = !gsubf; 198 continue; 199 200 case 'c': 201 cflag = !cflag; 202 continue; 203 204 case 'r': 205 uselastre = 1; 206 continue; 207 208 default: 209 ungetchar(c); 210 setcount(); 211 newline(); 212 if (uselastre) 213 savere(subre); 214 else 215 resre(subre); 216 return (gsubf); 217 } 218 } 219 } 220 221 comprhs(seof) 222 int seof; 223 { 224 register char *rp, *orp; 225 register int c; 226 char orhsbuf[LBSIZE / 2]; 227 228 rp = rhsbuf; 229 CP(orhsbuf, rp); 230 for (;;) { 231 c = getchar(); 232 if (c == seof) 233 break; 234 switch (c) { 235 236 case '\\': 237 c = getchar(); 238 if (c == EOF) { 239 ungetchar(c); 240 break; 241 } 242 if (value(MAGIC)) { 243 /* 244 * When "magic", \& turns into a plain &, 245 * and all other chars work fine quoted. 246 */ 247 if (c != '&') 248 c |= QUOTE; 249 break; 250 } 251 magic: 252 if (c == '~') { 253 for (orp = orhsbuf; *orp; *rp++ = *orp++) 254 if (rp >= &rhsbuf[LBSIZE / 2 + 1]) 255 goto toobig; 256 continue; 257 } 258 c |= QUOTE; 259 break; 260 261 case '\n': 262 case EOF: 263 if (!(globp && globp[0])) { 264 ungetchar(c); 265 goto endrhs; 266 } 267 268 case '~': 269 case '&': 270 if (value(MAGIC)) 271 goto magic; 272 break; 273 } 274 if (rp >= &rhsbuf[LBSIZE / 2 - 1]) 275 toobig: 276 error("Replacement pattern too long@- limit 256 characters"); 277 *rp++ = c; 278 } 279 endrhs: 280 *rp++ = 0; 281 } 282 283 getsub() 284 { 285 register char *p; 286 287 if ((p = linebp) == 0) 288 return (EOF); 289 strcLIN(p); 290 linebp = 0; 291 return (0); 292 } 293 294 dosubcon(f, a) 295 bool f; 296 line *a; 297 { 298 299 if (execute(f, a) == 0) 300 return (0); 301 if (confirmed(a)) { 302 dosub(); 303 scount++; 304 } 305 return (1); 306 } 307 308 confirmed(a) 309 line *a; 310 { 311 register int c, ch; 312 313 if (cflag == 0) 314 return (1); 315 pofix(); 316 pline(lineno(a)); 317 if (inopen) 318 putchar('\n' | QUOTE); 319 c = column(loc1 - 1); 320 ugo(c - 1 + (inopen ? 1 : 0), ' '); 321 ugo(column(loc2 - 1) - c, '^'); 322 flush(); 323 ch = c = getkey(); 324 again: 325 if (c == '\r') 326 c = '\n'; 327 if (inopen) 328 putchar(c), flush(); 329 if (c != '\n' && c != EOF) { 330 c = getkey(); 331 goto again; 332 } 333 noteinp(); 334 return (ch == 'y'); 335 } 336 337 getch() 338 { 339 char c; 340 341 if (read(2, &c, 1) != 1) 342 return (EOF); 343 return (c & TRIM); 344 } 345 346 ugo(cnt, with) 347 int with; 348 int cnt; 349 { 350 351 if (cnt > 0) 352 do 353 putchar(with); 354 while (--cnt > 0); 355 } 356 357 int casecnt; 358 bool destuc; 359 360 dosub() 361 { 362 register char *lp, *sp, *rp; 363 int c; 364 365 lp = linebuf; 366 sp = genbuf; 367 rp = rhsbuf; 368 while (lp < loc1) 369 *sp++ = *lp++; 370 casecnt = 0; 371 while (c = *rp++) { 372 if (c & QUOTE) 373 switch (c & TRIM) { 374 375 case '&': 376 sp = place(sp, loc1, loc2); 377 if (sp == 0) 378 goto ovflo; 379 continue; 380 381 case 'l': 382 casecnt = 1; 383 destuc = 0; 384 continue; 385 386 case 'L': 387 casecnt = LBSIZE; 388 destuc = 0; 389 continue; 390 391 case 'u': 392 casecnt = 1; 393 destuc = 1; 394 continue; 395 396 case 'U': 397 casecnt = LBSIZE; 398 destuc = 1; 399 continue; 400 401 case 'E': 402 case 'e': 403 casecnt = 0; 404 continue; 405 } 406 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { 407 sp = place(sp, braslist[c - '1'], braelist[c - '1']); 408 if (sp == 0) 409 goto ovflo; 410 continue; 411 } 412 if (casecnt) 413 *sp++ = fixcase(c & TRIM); 414 else 415 *sp++ = c & TRIM; 416 if (sp >= &genbuf[LBSIZE]) 417 ovflo: 418 error("Line overflow@in substitute"); 419 } 420 lp = loc2; 421 loc2 = sp + (linebuf - genbuf); 422 while (*sp++ = *lp++) 423 if (sp >= &genbuf[LBSIZE]) 424 goto ovflo; 425 strcLIN(genbuf); 426 } 427 428 fixcase(c) 429 register int c; 430 { 431 432 if (casecnt == 0) 433 return (c); 434 casecnt--; 435 if (destuc) { 436 if (islower(c)) 437 c = toupper(c); 438 } else 439 if (isupper(c)) 440 c = tolower(c); 441 return (c); 442 } 443 444 char * 445 place(sp, l1, l2) 446 register char *sp, *l1, *l2; 447 { 448 449 while (l1 < l2) { 450 *sp++ = fixcase(*l1++); 451 if (sp >= &genbuf[LBSIZE]) 452 return (0); 453 } 454 return (sp); 455 } 456 457 snote(total, lines) 458 register int total, lines; 459 { 460 461 if (!notable(total)) 462 return; 463 printf(mesg("%d subs|%d substitutions"), total); 464 if (lines != 1 && lines != total) 465 printf(" on %d lines", lines); 466 noonl(); 467 flush(); 468 } 469 470 compile(eof, oknl) 471 int eof; 472 int oknl; 473 { 474 register int c; 475 register char *ep; 476 char *lastep; 477 char bracket[NBRA], *bracketp, *rhsp; 478 int cclcnt; 479 480 if (isalpha(eof) || isdigit(eof)) 481 error("Regular expressions cannot be delimited by letters or digits"); 482 ep = expbuf; 483 c = getchar(); 484 if (eof == '\\') 485 switch (c) { 486 487 case '/': 488 case '?': 489 if (scanre.Expbuf[0] == 0) 490 error("No previous scan re|No previous scanning regular expression"); 491 resre(scanre); 492 return (c); 493 494 case '&': 495 if (subre.Expbuf[0] == 0) 496 error("No previous substitute re|No previous substitute regular expression"); 497 resre(subre); 498 return (c); 499 500 default: 501 error("Badly formed re|Regular expression \\ must be followed by / or ?"); 502 } 503 if (c == eof || c == '\n' || c == EOF) { 504 if (*ep == 0) 505 error("No previous re|No previous regular expression"); 506 if (c == '\n' && oknl == 0) 507 error("Missing closing delimiter@for regular expression"); 508 if (c != eof) 509 ungetchar(c); 510 return (eof); 511 } 512 bracketp = bracket; 513 nbra = 0; 514 circfl = 0; 515 if (c == '^') { 516 c = getchar(); 517 circfl++; 518 } 519 ungetchar(c); 520 for (;;) { 521 if (ep >= &expbuf[ESIZE - 2]) 522 complex: 523 cerror("Re too complex|Regular expression too complicated"); 524 c = getchar(); 525 if (c == eof || c == EOF) { 526 if (bracketp != bracket) 527 cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); 528 *ep++ = CEOFC; 529 if (c == EOF) 530 ungetchar(c); 531 return (eof); 532 } 533 if (value(MAGIC)) { 534 if (c != '*' || ep == expbuf) 535 lastep = ep; 536 } else 537 if (c != '\\' || peekchar() != '*' || ep == expbuf) 538 lastep = ep; 539 switch (c) { 540 541 case '\\': 542 c = getchar(); 543 switch (c) { 544 545 case '(': 546 if (nbra >= NBRA) 547 cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); 548 *bracketp++ = nbra; 549 *ep++ = CBRA; 550 *ep++ = nbra++; 551 continue; 552 553 case ')': 554 if (bracketp <= bracket) 555 cerror("Extra \\)|More \\)'s than \\('s in regular expression"); 556 *ep++ = CKET; 557 *ep++ = *--bracketp; 558 continue; 559 560 case '<': 561 *ep++ = CBRC; 562 continue; 563 564 case '>': 565 *ep++ = CLET; 566 continue; 567 } 568 if (value(MAGIC) == 0) 569 magic: 570 switch (c) { 571 572 case '.': 573 *ep++ = CDOT; 574 continue; 575 576 case '~': 577 rhsp = rhsbuf; 578 while (*rhsp) { 579 if (*rhsp & QUOTE) { 580 c = *rhsp & TRIM; 581 if (c == '&') 582 error("Replacement pattern contains &@- cannot use in re"); 583 if (c >= '1' && c <= '9') 584 error("Replacement pattern contains \\d@- cannot use in re"); 585 } 586 if (ep >= &expbuf[ESIZE-2]) 587 goto complex; 588 *ep++ = CCHR; 589 *ep++ = *rhsp++ & TRIM; 590 } 591 continue; 592 593 case '*': 594 if (ep == expbuf) 595 break; 596 if (*lastep == CBRA || *lastep == CKET) 597 cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); 598 if (*lastep == CCHR && (lastep[1] & QUOTE)) 599 cerror("Illegal *|Can't * a \\n in regular expression"); 600 *lastep |= STAR; 601 continue; 602 603 case '[': 604 *ep++ = CCL; 605 *ep++ = 0; 606 cclcnt = 1; 607 c = getchar(); 608 if (c == '^') { 609 c = getchar(); 610 ep[-2] = NCCL; 611 } 612 if (c == ']') 613 cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); 614 while (c != ']') { 615 if (c == '\\' && any(peekchar(), "]-^\\")) 616 c = getchar() | QUOTE; 617 if (c == '\n' || c == EOF) 618 cerror("Missing ]"); 619 *ep++ = c; 620 cclcnt++; 621 if (ep >= &expbuf[ESIZE]) 622 goto complex; 623 c = getchar(); 624 } 625 lastep[1] = cclcnt; 626 continue; 627 } 628 if (c == EOF) { 629 ungetchar(EOF); 630 c = '\\'; 631 goto defchar; 632 } 633 *ep++ = CCHR; 634 if (c == '\n') 635 cerror("No newlines in re's|Can't escape newlines into regular expressions"); 636 /* 637 if (c < '1' || c > NBRA + '1') { 638 */ 639 *ep++ = c; 640 continue; 641 /* 642 } 643 c -= '1'; 644 if (c >= nbra) 645 cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); 646 *ep++ = c | QUOTE; 647 continue; 648 */ 649 650 case '\n': 651 if (oknl) { 652 ungetchar(c); 653 *ep++ = CEOFC; 654 return (eof); 655 } 656 cerror("Badly formed re|Missing closing delimiter for regular expression"); 657 658 case '$': 659 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { 660 *ep++ = CDOL; 661 continue; 662 } 663 goto defchar; 664 665 case '.': 666 case '~': 667 case '*': 668 case '[': 669 if (value(MAGIC)) 670 goto magic; 671 defchar: 672 default: 673 *ep++ = CCHR; 674 *ep++ = c; 675 continue; 676 } 677 } 678 } 679 680 cerror(s) 681 char *s; 682 { 683 684 expbuf[0] = 0; 685 error(s); 686 } 687 688 same(a, b) 689 register int a, b; 690 { 691 692 return (a == b || value(IGNORECASE) && 693 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); 694 } 695 696 char *locs; 697 698 execute(gf, addr) 699 line *addr; 700 { 701 register char *p1, *p2; 702 register int c; 703 704 if (gf) { 705 if (circfl) 706 return (0); 707 locs = p1 = loc2; 708 } else { 709 if (addr == zero) 710 return (0); 711 p1 = linebuf; 712 getline(*addr); 713 locs = 0; 714 } 715 p2 = expbuf; 716 if (circfl) { 717 loc1 = p1; 718 return (advance(p1, p2)); 719 } 720 /* fast check for first character */ 721 if (*p2 == CCHR) { 722 c = p2[1]; 723 do { 724 if (c != *p1 && (!value(IGNORECASE) || 725 !((islower(c) && toupper(c) == *p1) || 726 (islower(*p1) && toupper(*p1) == c)))) 727 continue; 728 if (advance(p1, p2)) { 729 loc1 = p1; 730 return (1); 731 } 732 } while (*p1++); 733 return (0); 734 } 735 /* regular algorithm */ 736 do { 737 if (advance(p1, p2)) { 738 loc1 = p1; 739 return (1); 740 } 741 } while (*p1++); 742 return (0); 743 } 744 745 #define uletter(c) (isalpha(c) || c == '_') 746 747 advance(lp, ep) 748 register char *lp, *ep; 749 { 750 register char *curlp; 751 char *sp, *sp1; 752 int c; 753 754 for (;;) switch (*ep++) { 755 756 case CCHR: 757 /* useless 758 if (*ep & QUOTE) { 759 c = *ep++ & TRIM; 760 sp = braslist[c]; 761 sp1 = braelist[c]; 762 while (sp < sp1) { 763 if (!same(*sp, *lp)) 764 return (0); 765 sp++, lp++; 766 } 767 continue; 768 } 769 */ 770 if (!same(*ep, *lp)) 771 return (0); 772 ep++, lp++; 773 continue; 774 775 case CDOT: 776 if (*lp++) 777 continue; 778 return (0); 779 780 case CDOL: 781 if (*lp == 0) 782 continue; 783 return (0); 784 785 case CEOFC: 786 loc2 = lp; 787 return (1); 788 789 case CCL: 790 if (cclass(ep, *lp++, 1)) { 791 ep += *ep; 792 continue; 793 } 794 return (0); 795 796 case NCCL: 797 if (cclass(ep, *lp++, 0)) { 798 ep += *ep; 799 continue; 800 } 801 return (0); 802 803 case CBRA: 804 braslist[*ep++] = lp; 805 continue; 806 807 case CKET: 808 braelist[*ep++] = lp; 809 continue; 810 811 case CDOT|STAR: 812 curlp = lp; 813 while (*lp++) 814 continue; 815 goto star; 816 817 case CCHR|STAR: 818 curlp = lp; 819 while (same(*lp, *ep)) 820 lp++; 821 lp++; 822 ep++; 823 goto star; 824 825 case CCL|STAR: 826 case NCCL|STAR: 827 curlp = lp; 828 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) 829 continue; 830 ep += *ep; 831 goto star; 832 star: 833 do { 834 lp--; 835 if (lp == locs) 836 break; 837 if (advance(lp, ep)) 838 return (1); 839 } while (lp > curlp); 840 return (0); 841 842 case CBRC: 843 if (lp == expbuf) 844 continue; 845 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) 846 continue; 847 return (0); 848 849 case CLET: 850 if (!uletter(*lp) && !isdigit(*lp)) 851 continue; 852 return (0); 853 854 default: 855 error("Re internal error"); 856 } 857 } 858 859 cclass(set, c, af) 860 register char *set; 861 register int c; 862 int af; 863 { 864 register int n; 865 866 if (c == 0) 867 return (0); 868 if (value(IGNORECASE) && isupper(c)) 869 c = tolower(c); 870 n = *set++; 871 while (--n) 872 if (n > 2 && set[1] == '-') { 873 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) 874 return (af); 875 set += 3; 876 n -= 2; 877 } else 878 if ((*set++ & TRIM) == c) 879 return (af); 880 return (!af); 881 } 882