1 /* Copyright (c) 1980 Regents of the University of California */ 2 static char *sccsid = "@(#)ex_re.c 6.2 10/23/80"; 3 #include "ex.h" 4 #include "ex_re.h" 5 6 /* 7 * Global, substitute and regular expressions. 8 * Very similar to ed, with some re extensions and 9 * confirmed substitute. 10 */ 11 global(k) 12 bool k; 13 { 14 register char *gp; 15 register int c; 16 register line *a1; 17 char globuf[GBSIZE], *Cwas; 18 int lines = lineDOL(); 19 int oinglobal = inglobal; 20 char *oglobp = globp; 21 22 Cwas = Command; 23 /* 24 * States of inglobal: 25 * 0: ordinary - not in a global command. 26 * 1: text coming from some buffer, not tty. 27 * 2: like 1, but the source of the buffer is a global command. 28 * Hence you're only in a global command if inglobal==2. This 29 * strange sounding convention is historically derived from 30 * everybody simulating a global command. 31 */ 32 if (inglobal==2) 33 error("Global within global@not allowed"); 34 markDOT(); 35 setall(); 36 nonzero(); 37 if (skipend()) 38 error("Global needs re|Missing regular expression for global"); 39 c = getchar(); 40 ignore(compile(c, 1)); 41 savere(scanre); 42 gp = globuf; 43 while ((c = getchar()) != '\n') { 44 switch (c) { 45 46 case EOF: 47 c = '\n'; 48 goto brkwh; 49 50 case '\\': 51 c = getchar(); 52 switch (c) { 53 54 case '\\': 55 ungetchar(c); 56 break; 57 58 case '\n': 59 break; 60 61 default: 62 *gp++ = '\\'; 63 break; 64 } 65 break; 66 } 67 *gp++ = c; 68 if (gp >= &globuf[GBSIZE - 2]) 69 error("Global command too long"); 70 } 71 brkwh: 72 ungetchar(c); 73 out: 74 newline(); 75 *gp++ = c; 76 *gp++ = 0; 77 saveall(); 78 inglobal = 2; 79 for (a1 = one; a1 <= dol; a1++) { 80 *a1 &= ~01; 81 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 82 *a1 |= 01; 83 } 84 #ifdef notdef 85 /* 86 * This code is commented out for now. The problem is that we don't 87 * fix up the undo area the way we should. Basically, I think what has 88 * to be done is to copy the undo area down (since we shrunk everything) 89 * and move the various pointers into it down too. I will do this later 90 * when I have time. (Mark, 10-20-80) 91 */ 92 /* 93 * Special case: g/.../d (avoid n^2 algorithm) 94 */ 95 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 96 gdelete(); 97 return; 98 } 99 #endif 100 if (inopen) 101 inopen = -1; 102 /* 103 * Now for each marked line, set dot there and do the commands. 104 * Note the n^2 behavior here for lots of lines matching. 105 * This is really needed: in some cases you could delete lines, 106 * causing a marked line to be moved before a1 and missed if 107 * we didn't restart at zero each time. 108 */ 109 for (a1 = one; a1 <= dol; a1++) { 110 if (*a1 & 01) { 111 *a1 &= ~01; 112 dot = a1; 113 globp = globuf; 114 commands(1, 1); 115 a1 = zero; 116 } 117 } 118 globp = oglobp; 119 inglobal = oinglobal; 120 endline = 1; 121 Command = Cwas; 122 netchHAD(lines); 123 setlastchar(EOF); 124 if (inopen) { 125 ungetchar(EOF); 126 inopen = 1; 127 } 128 } 129 130 /* 131 * gdelete: delete inside a global command. Handles the 132 * special case g/r.e./d. All lines to be deleted have 133 * already been marked. Squeeze the remaining lines together. 134 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 135 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 136 * good reason for this except the question: where to you draw the line? 137 */ 138 gdelete() 139 { 140 register line *a1, *a2, *a3; 141 142 a3 = dol; 143 /* find first marked line. can skip all before it */ 144 for (a1=zero; (*a1&01)==0; a1++) 145 if (a1>=a3) 146 return; 147 /* copy down unmarked lines, compacting as we go. */ 148 for (a2=a1+1; a2<=a3;) { 149 if (*a2&01) { 150 a2++; /* line is marked, skip it */ 151 dot = a1; /* dot left after line deletion */ 152 } else 153 *a1++ = *a2++; /* unmarked, copy it */ 154 } 155 dol = a1-1; 156 if (dot>dol) 157 dot = dol; 158 change(); 159 } 160 161 bool cflag; 162 int scount, slines, stotal; 163 164 substitute(c) 165 int c; 166 { 167 register line *addr; 168 register int n; 169 int gsubf, hopcount = 0; 170 171 gsubf = compsub(c); 172 if(FIXUNDO) 173 save12(), undkind = UNDCHANGE; 174 stotal = 0; 175 slines = 0; 176 for (addr = addr1; addr <= addr2; addr++) { 177 scount = 0; 178 if (dosubcon(0, addr) == 0) 179 continue; 180 if (gsubf) { 181 /* 182 * The loop can happen from s/\</&/g 183 * but we don't want to break other, reasonable cases. 184 */ 185 while (*loc2) { 186 if (++hopcount > sizeof linebuf) 187 error("substitution loop"); 188 if (dosubcon(1, addr) == 0) 189 break; 190 } 191 } 192 if (scount) { 193 stotal += scount; 194 slines++; 195 putmark(addr); 196 n = append(getsub, addr); 197 addr += n; 198 addr2 += n; 199 } 200 } 201 if (stotal == 0 && !inglobal && !cflag) 202 error("Fail|Substitute pattern match failed"); 203 snote(stotal, slines); 204 return (stotal); 205 } 206 207 compsub(ch) 208 { 209 register int seof, c, uselastre; 210 static int gsubf; 211 212 if (!value(EDCOMPATIBLE)) 213 gsubf = cflag = 0; 214 uselastre = 0; 215 switch (ch) { 216 217 case 's': 218 ignore(skipwh()); 219 seof = getchar(); 220 if (endcmd(seof) || any(seof, "gcr")) { 221 ungetchar(seof); 222 goto redo; 223 } 224 if (isalpha(seof) || isdigit(seof)) 225 error("Substitute needs re|Missing regular expression for substitute"); 226 seof = compile(seof, 1); 227 uselastre = 1; 228 comprhs(seof); 229 gsubf = 0; 230 cflag = 0; 231 break; 232 233 case '~': 234 uselastre = 1; 235 /* fall into ... */ 236 case '&': 237 redo: 238 if (re.Expbuf[0] == 0) 239 error("No previous re|No previous regular expression"); 240 if (subre.Expbuf[0] == 0) 241 error("No previous substitute re|No previous substitute to repeat"); 242 break; 243 } 244 for (;;) { 245 c = getchar(); 246 switch (c) { 247 248 case 'g': 249 gsubf = !gsubf; 250 continue; 251 252 case 'c': 253 cflag = !cflag; 254 continue; 255 256 case 'r': 257 uselastre = 1; 258 continue; 259 260 default: 261 ungetchar(c); 262 setcount(); 263 newline(); 264 if (uselastre) 265 savere(subre); 266 else 267 resre(subre); 268 return (gsubf); 269 } 270 } 271 } 272 273 comprhs(seof) 274 int seof; 275 { 276 register char *rp, *orp; 277 register int c; 278 char orhsbuf[RHSSIZE]; 279 280 rp = rhsbuf; 281 CP(orhsbuf, rp); 282 for (;;) { 283 c = getchar(); 284 if (c == seof) 285 break; 286 switch (c) { 287 288 case '\\': 289 c = getchar(); 290 if (c == EOF) { 291 ungetchar(c); 292 break; 293 } 294 if (value(MAGIC)) { 295 /* 296 * When "magic", \& turns into a plain &, 297 * and all other chars work fine quoted. 298 */ 299 if (c != '&') 300 c |= QUOTE; 301 break; 302 } 303 magic: 304 if (c == '~') { 305 for (orp = orhsbuf; *orp; *rp++ = *orp++) 306 if (rp >= &rhsbuf[RHSSIZE - 1]) 307 goto toobig; 308 continue; 309 } 310 c |= QUOTE; 311 break; 312 313 case '\n': 314 case EOF: 315 if (!(globp && globp[0])) { 316 ungetchar(c); 317 goto endrhs; 318 } 319 320 case '~': 321 case '&': 322 if (value(MAGIC)) 323 goto magic; 324 break; 325 } 326 if (rp >= &rhsbuf[RHSSIZE - 1]) { 327 toobig: 328 *rp = 0; 329 error("Replacement pattern too long@- limit 256 characters"); 330 } 331 *rp++ = c; 332 } 333 endrhs: 334 *rp++ = 0; 335 } 336 337 getsub() 338 { 339 register char *p; 340 341 if ((p = linebp) == 0) 342 return (EOF); 343 strcLIN(p); 344 linebp = 0; 345 return (0); 346 } 347 348 dosubcon(f, a) 349 bool f; 350 line *a; 351 { 352 353 if (execute(f, a) == 0) 354 return (0); 355 if (confirmed(a)) { 356 dosub(); 357 scount++; 358 } 359 return (1); 360 } 361 362 confirmed(a) 363 line *a; 364 { 365 register int c, ch; 366 367 if (cflag == 0) 368 return (1); 369 pofix(); 370 pline(lineno(a)); 371 if (inopen) 372 putchar('\n' | QUOTE); 373 c = column(loc1 - 1); 374 ugo(c - 1 + (inopen ? 1 : 0), ' '); 375 ugo(column(loc2 - 1) - c, '^'); 376 flush(); 377 ch = c = getkey(); 378 again: 379 if (c == '\r') 380 c = '\n'; 381 if (inopen) 382 putchar(c), flush(); 383 if (c != '\n' && c != EOF) { 384 c = getkey(); 385 goto again; 386 } 387 noteinp(); 388 return (ch == 'y'); 389 } 390 391 getch() 392 { 393 char c; 394 395 if (read(2, &c, 1) != 1) 396 return (EOF); 397 return (c & TRIM); 398 } 399 400 ugo(cnt, with) 401 int with; 402 int cnt; 403 { 404 405 if (cnt > 0) 406 do 407 putchar(with); 408 while (--cnt > 0); 409 } 410 411 int casecnt; 412 bool destuc; 413 414 dosub() 415 { 416 register char *lp, *sp, *rp; 417 int c; 418 419 lp = linebuf; 420 sp = genbuf; 421 rp = rhsbuf; 422 while (lp < loc1) 423 *sp++ = *lp++; 424 casecnt = 0; 425 while (c = *rp++) { 426 if (c & QUOTE) 427 switch (c & TRIM) { 428 429 case '&': 430 sp = place(sp, loc1, loc2); 431 if (sp == 0) 432 goto ovflo; 433 continue; 434 435 case 'l': 436 casecnt = 1; 437 destuc = 0; 438 continue; 439 440 case 'L': 441 casecnt = LBSIZE; 442 destuc = 0; 443 continue; 444 445 case 'u': 446 casecnt = 1; 447 destuc = 1; 448 continue; 449 450 case 'U': 451 casecnt = LBSIZE; 452 destuc = 1; 453 continue; 454 455 case 'E': 456 case 'e': 457 casecnt = 0; 458 continue; 459 } 460 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { 461 sp = place(sp, braslist[c - '1'], braelist[c - '1']); 462 if (sp == 0) 463 goto ovflo; 464 continue; 465 } 466 if (casecnt) 467 *sp++ = fixcase(c & TRIM); 468 else 469 *sp++ = c & TRIM; 470 if (sp >= &genbuf[LBSIZE]) 471 ovflo: 472 error("Line overflow@in substitute"); 473 } 474 lp = loc2; 475 loc2 = sp + (linebuf - genbuf); 476 while (*sp++ = *lp++) 477 if (sp >= &genbuf[LBSIZE]) 478 goto ovflo; 479 strcLIN(genbuf); 480 } 481 482 fixcase(c) 483 register int c; 484 { 485 486 if (casecnt == 0) 487 return (c); 488 casecnt--; 489 if (destuc) { 490 if (islower(c)) 491 c = toupper(c); 492 } else 493 if (isupper(c)) 494 c = tolower(c); 495 return (c); 496 } 497 498 char * 499 place(sp, l1, l2) 500 register char *sp, *l1, *l2; 501 { 502 503 while (l1 < l2) { 504 *sp++ = fixcase(*l1++); 505 if (sp >= &genbuf[LBSIZE]) 506 return (0); 507 } 508 return (sp); 509 } 510 511 snote(total, lines) 512 register int total, lines; 513 { 514 515 if (!notable(total)) 516 return; 517 printf(mesg("%d subs|%d substitutions"), total); 518 if (lines != 1 && lines != total) 519 printf(" on %d lines", lines); 520 noonl(); 521 flush(); 522 } 523 524 compile(eof, oknl) 525 int eof; 526 int oknl; 527 { 528 register int c; 529 register char *ep; 530 char *lastep; 531 char bracket[NBRA], *bracketp, *rhsp; 532 int cclcnt; 533 534 if (isalpha(eof) || isdigit(eof)) 535 error("Regular expressions cannot be delimited by letters or digits"); 536 ep = expbuf; 537 c = getchar(); 538 if (eof == '\\') 539 switch (c) { 540 541 case '/': 542 case '?': 543 if (scanre.Expbuf[0] == 0) 544 error("No previous scan re|No previous scanning regular expression"); 545 resre(scanre); 546 return (c); 547 548 case '&': 549 if (subre.Expbuf[0] == 0) 550 error("No previous substitute re|No previous substitute regular expression"); 551 resre(subre); 552 return (c); 553 554 default: 555 error("Badly formed re|Regular expression \\ must be followed by / or ?"); 556 } 557 if (c == eof || c == '\n' || c == EOF) { 558 if (*ep == 0) 559 error("No previous re|No previous regular expression"); 560 if (c == '\n' && oknl == 0) 561 error("Missing closing delimiter@for regular expression"); 562 if (c != eof) 563 ungetchar(c); 564 return (eof); 565 } 566 bracketp = bracket; 567 nbra = 0; 568 circfl = 0; 569 if (c == '^') { 570 c = getchar(); 571 circfl++; 572 } 573 ungetchar(c); 574 for (;;) { 575 if (ep >= &expbuf[ESIZE - 2]) 576 complex: 577 cerror("Re too complex|Regular expression too complicated"); 578 c = getchar(); 579 if (c == eof || c == EOF) { 580 if (bracketp != bracket) 581 cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); 582 *ep++ = CEOFC; 583 if (c == EOF) 584 ungetchar(c); 585 return (eof); 586 } 587 if (value(MAGIC)) { 588 if (c != '*' || ep == expbuf) 589 lastep = ep; 590 } else 591 if (c != '\\' || peekchar() != '*' || ep == expbuf) 592 lastep = ep; 593 switch (c) { 594 595 case '\\': 596 c = getchar(); 597 switch (c) { 598 599 case '(': 600 if (nbra >= NBRA) 601 cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); 602 *bracketp++ = nbra; 603 *ep++ = CBRA; 604 *ep++ = nbra++; 605 continue; 606 607 case ')': 608 if (bracketp <= bracket) 609 cerror("Extra \\)|More \\)'s than \\('s in regular expression"); 610 *ep++ = CKET; 611 *ep++ = *--bracketp; 612 continue; 613 614 case '<': 615 *ep++ = CBRC; 616 continue; 617 618 case '>': 619 *ep++ = CLET; 620 continue; 621 } 622 if (value(MAGIC) == 0) 623 magic: 624 switch (c) { 625 626 case '.': 627 *ep++ = CDOT; 628 continue; 629 630 case '~': 631 rhsp = rhsbuf; 632 while (*rhsp) { 633 if (*rhsp & QUOTE) { 634 c = *rhsp & TRIM; 635 if (c == '&') 636 error("Replacement pattern contains &@- cannot use in re"); 637 if (c >= '1' && c <= '9') 638 error("Replacement pattern contains \\d@- cannot use in re"); 639 } 640 if (ep >= &expbuf[ESIZE-2]) 641 goto complex; 642 *ep++ = CCHR; 643 *ep++ = *rhsp++ & TRIM; 644 } 645 continue; 646 647 case '*': 648 if (ep == expbuf) 649 break; 650 if (*lastep == CBRA || *lastep == CKET) 651 cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); 652 if (*lastep == CCHR && (lastep[1] & QUOTE)) 653 cerror("Illegal *|Can't * a \\n in regular expression"); 654 *lastep |= STAR; 655 continue; 656 657 case '[': 658 *ep++ = CCL; 659 *ep++ = 0; 660 cclcnt = 1; 661 c = getchar(); 662 if (c == '^') { 663 c = getchar(); 664 ep[-2] = NCCL; 665 } 666 if (c == ']') 667 cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); 668 while (c != ']') { 669 if (c == '\\' && any(peekchar(), "]-^\\")) 670 c = getchar() | QUOTE; 671 if (c == '\n' || c == EOF) 672 cerror("Missing ]"); 673 *ep++ = c; 674 cclcnt++; 675 if (ep >= &expbuf[ESIZE]) 676 goto complex; 677 c = getchar(); 678 } 679 lastep[1] = cclcnt; 680 continue; 681 } 682 if (c == EOF) { 683 ungetchar(EOF); 684 c = '\\'; 685 goto defchar; 686 } 687 *ep++ = CCHR; 688 if (c == '\n') 689 cerror("No newlines in re's|Can't escape newlines into regular expressions"); 690 /* 691 if (c < '1' || c > NBRA + '1') { 692 */ 693 *ep++ = c; 694 continue; 695 /* 696 } 697 c -= '1'; 698 if (c >= nbra) 699 cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); 700 *ep++ = c | QUOTE; 701 continue; 702 */ 703 704 case '\n': 705 if (oknl) { 706 ungetchar(c); 707 *ep++ = CEOFC; 708 return (eof); 709 } 710 cerror("Badly formed re|Missing closing delimiter for regular expression"); 711 712 case '$': 713 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { 714 *ep++ = CDOL; 715 continue; 716 } 717 goto defchar; 718 719 case '.': 720 case '~': 721 case '*': 722 case '[': 723 if (value(MAGIC)) 724 goto magic; 725 defchar: 726 default: 727 *ep++ = CCHR; 728 *ep++ = c; 729 continue; 730 } 731 } 732 } 733 734 cerror(s) 735 char *s; 736 { 737 738 expbuf[0] = 0; 739 error(s); 740 } 741 742 same(a, b) 743 register int a, b; 744 { 745 746 return (a == b || value(IGNORECASE) && 747 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); 748 } 749 750 char *locs; 751 752 execute(gf, addr) 753 line *addr; 754 { 755 register char *p1, *p2; 756 register int c; 757 758 if (gf) { 759 if (circfl) 760 return (0); 761 locs = p1 = loc2; 762 } else { 763 if (addr == zero) 764 return (0); 765 p1 = linebuf; 766 getline(*addr); 767 locs = 0; 768 } 769 p2 = expbuf; 770 if (circfl) { 771 loc1 = p1; 772 return (advance(p1, p2)); 773 } 774 /* fast check for first character */ 775 if (*p2 == CCHR) { 776 c = p2[1]; 777 do { 778 if (c != *p1 && (!value(IGNORECASE) || 779 !((islower(c) && toupper(c) == *p1) || 780 (islower(*p1) && toupper(*p1) == c)))) 781 continue; 782 if (advance(p1, p2)) { 783 loc1 = p1; 784 return (1); 785 } 786 } while (*p1++); 787 return (0); 788 } 789 /* regular algorithm */ 790 do { 791 if (advance(p1, p2)) { 792 loc1 = p1; 793 return (1); 794 } 795 } while (*p1++); 796 return (0); 797 } 798 799 #define uletter(c) (isalpha(c) || c == '_') 800 801 advance(lp, ep) 802 register char *lp, *ep; 803 { 804 register char *curlp; 805 char *sp, *sp1; 806 int c; 807 808 for (;;) switch (*ep++) { 809 810 case CCHR: 811 /* useless 812 if (*ep & QUOTE) { 813 c = *ep++ & TRIM; 814 sp = braslist[c]; 815 sp1 = braelist[c]; 816 while (sp < sp1) { 817 if (!same(*sp, *lp)) 818 return (0); 819 sp++, lp++; 820 } 821 continue; 822 } 823 */ 824 if (!same(*ep, *lp)) 825 return (0); 826 ep++, lp++; 827 continue; 828 829 case CDOT: 830 if (*lp++) 831 continue; 832 return (0); 833 834 case CDOL: 835 if (*lp == 0) 836 continue; 837 return (0); 838 839 case CEOFC: 840 loc2 = lp; 841 return (1); 842 843 case CCL: 844 if (cclass(ep, *lp++, 1)) { 845 ep += *ep; 846 continue; 847 } 848 return (0); 849 850 case NCCL: 851 if (cclass(ep, *lp++, 0)) { 852 ep += *ep; 853 continue; 854 } 855 return (0); 856 857 case CBRA: 858 braslist[*ep++] = lp; 859 continue; 860 861 case CKET: 862 braelist[*ep++] = lp; 863 continue; 864 865 case CDOT|STAR: 866 curlp = lp; 867 while (*lp++) 868 continue; 869 goto star; 870 871 case CCHR|STAR: 872 curlp = lp; 873 while (same(*lp, *ep)) 874 lp++; 875 lp++; 876 ep++; 877 goto star; 878 879 case CCL|STAR: 880 case NCCL|STAR: 881 curlp = lp; 882 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) 883 continue; 884 ep += *ep; 885 goto star; 886 star: 887 do { 888 lp--; 889 if (lp == locs) 890 break; 891 if (advance(lp, ep)) 892 return (1); 893 } while (lp > curlp); 894 return (0); 895 896 case CBRC: 897 if (lp == expbuf) 898 continue; 899 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) 900 continue; 901 return (0); 902 903 case CLET: 904 if (!uletter(*lp) && !isdigit(*lp)) 905 continue; 906 return (0); 907 908 default: 909 error("Re internal error"); 910 } 911 } 912 913 cclass(set, c, af) 914 register char *set; 915 register int c; 916 int af; 917 { 918 register int n; 919 920 if (c == 0) 921 return (0); 922 if (value(IGNORECASE) && isupper(c)) 923 c = tolower(c); 924 n = *set++; 925 while (--n) 926 if (n > 2 && set[1] == '-') { 927 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) 928 return (af); 929 set += 3; 930 n -= 2; 931 } else 932 if ((*set++ & TRIM) == c) 933 return (af); 934 return (!af); 935 } 936