1 /* Copyright (c) 1981 Regents of the University of California */ 2 static char *sccsid = "@(#)ex_re.c 7.3 06/18/83"; 3 #include "ex.h" 4 #include "ex_re.h" 5 6 /* 7 * Global, substitute and regular expressions. 8 * Very similar to ed, with some re extensions and 9 * confirmed substitute. 10 */ 11 global(k) 12 bool k; 13 { 14 register char *gp; 15 register int c; 16 register line *a1; 17 char globuf[GBSIZE], *Cwas; 18 int lines = lineDOL(); 19 int oinglobal = inglobal; 20 char *oglobp = globp; 21 22 Cwas = Command; 23 /* 24 * States of inglobal: 25 * 0: ordinary - not in a global command. 26 * 1: text coming from some buffer, not tty. 27 * 2: like 1, but the source of the buffer is a global command. 28 * Hence you're only in a global command if inglobal==2. This 29 * strange sounding convention is historically derived from 30 * everybody simulating a global command. 31 */ 32 if (inglobal==2) 33 error("Global within global@not allowed"); 34 markDOT(); 35 setall(); 36 nonzero(); 37 if (skipend()) 38 error("Global needs re|Missing regular expression for global"); 39 c = getchar(); 40 ignore(compile(c, 1)); 41 savere(scanre); 42 gp = globuf; 43 while ((c = getchar()) != '\n') { 44 switch (c) { 45 46 case EOF: 47 c = '\n'; 48 goto brkwh; 49 50 case '\\': 51 c = getchar(); 52 switch (c) { 53 54 case '\\': 55 ungetchar(c); 56 break; 57 58 case '\n': 59 break; 60 61 default: 62 *gp++ = '\\'; 63 break; 64 } 65 break; 66 } 67 *gp++ = c; 68 if (gp >= &globuf[GBSIZE - 2]) 69 error("Global command too long"); 70 } 71 brkwh: 72 ungetchar(c); 73 out: 74 newline(); 75 *gp++ = c; 76 *gp++ = 0; 77 saveall(); 78 inglobal = 2; 79 for (a1 = one; a1 <= dol; a1++) { 80 *a1 &= ~01; 81 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 82 *a1 |= 01; 83 } 84 #ifdef notdef 85 /* 86 * This code is commented out for now. The problem is that we don't 87 * fix up the undo area the way we should. Basically, I think what has 88 * to be done is to copy the undo area down (since we shrunk everything) 89 * and move the various pointers into it down too. I will do this later 90 * when I have time. (Mark, 10-20-80) 91 */ 92 /* 93 * Special case: g/.../d (avoid n^2 algorithm) 94 */ 95 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 96 gdelete(); 97 return; 98 } 99 #endif 100 if (inopen) 101 inopen = -1; 102 /* 103 * Now for each marked line, set dot there and do the commands. 104 * Note the n^2 behavior here for lots of lines matching. 105 * This is really needed: in some cases you could delete lines, 106 * causing a marked line to be moved before a1 and missed if 107 * we didn't restart at zero each time. 108 */ 109 for (a1 = one; a1 <= dol; a1++) { 110 if (*a1 & 01) { 111 *a1 &= ~01; 112 dot = a1; 113 globp = globuf; 114 commands(1, 1); 115 a1 = zero; 116 } 117 } 118 globp = oglobp; 119 inglobal = oinglobal; 120 endline = 1; 121 Command = Cwas; 122 netchHAD(lines); 123 setlastchar(EOF); 124 if (inopen) { 125 ungetchar(EOF); 126 inopen = 1; 127 } 128 } 129 130 /* 131 * gdelete: delete inside a global command. Handles the 132 * special case g/r.e./d. All lines to be deleted have 133 * already been marked. Squeeze the remaining lines together. 134 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 135 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 136 * good reason for this except the question: where to you draw the line? 137 */ 138 gdelete() 139 { 140 register line *a1, *a2, *a3; 141 142 a3 = dol; 143 /* find first marked line. can skip all before it */ 144 for (a1=zero; (*a1&01)==0; a1++) 145 if (a1>=a3) 146 return; 147 /* copy down unmarked lines, compacting as we go. */ 148 for (a2=a1+1; a2<=a3;) { 149 if (*a2&01) { 150 a2++; /* line is marked, skip it */ 151 dot = a1; /* dot left after line deletion */ 152 } else 153 *a1++ = *a2++; /* unmarked, copy it */ 154 } 155 dol = a1-1; 156 if (dot>dol) 157 dot = dol; 158 change(); 159 } 160 161 bool cflag; 162 int scount, slines, stotal; 163 164 substitute(c) 165 int c; 166 { 167 register line *addr; 168 register int n; 169 int gsubf, hopcount; 170 171 gsubf = compsub(c); 172 if(FIXUNDO) 173 save12(), undkind = UNDCHANGE; 174 stotal = 0; 175 slines = 0; 176 for (addr = addr1; addr <= addr2; addr++) { 177 scount = hopcount = 0; 178 if (dosubcon(0, addr) == 0) 179 continue; 180 if (gsubf) { 181 /* 182 * The loop can happen from s/\</&/g 183 * but we don't want to break other, reasonable cases. 184 */ 185 while (*loc2) { 186 if (++hopcount > sizeof linebuf) 187 error("substitution loop"); 188 if (dosubcon(1, addr) == 0) 189 break; 190 } 191 } 192 if (scount) { 193 stotal += scount; 194 slines++; 195 putmark(addr); 196 n = append(getsub, addr); 197 addr += n; 198 addr2 += n; 199 } 200 } 201 if (stotal == 0 && !inglobal && !cflag) 202 error("Fail|Substitute pattern match failed"); 203 snote(stotal, slines); 204 return (stotal); 205 } 206 207 compsub(ch) 208 { 209 register int seof, c, uselastre; 210 static int gsubf; 211 212 if (!value(EDCOMPATIBLE)) 213 gsubf = cflag = 0; 214 uselastre = 0; 215 switch (ch) { 216 217 case 's': 218 ignore(skipwh()); 219 seof = getchar(); 220 if (endcmd(seof) || any(seof, "gcr")) { 221 ungetchar(seof); 222 goto redo; 223 } 224 if (isalpha(seof) || isdigit(seof)) 225 error("Substitute needs re|Missing regular expression for substitute"); 226 seof = compile(seof, 1); 227 uselastre = 1; 228 comprhs(seof); 229 gsubf = 0; 230 cflag = 0; 231 break; 232 233 case '~': 234 uselastre = 1; 235 /* fall into ... */ 236 case '&': 237 redo: 238 if (re.Expbuf[0] == 0) 239 error("No previous re|No previous regular expression"); 240 if (subre.Expbuf[0] == 0) 241 error("No previous substitute re|No previous substitute to repeat"); 242 break; 243 } 244 for (;;) { 245 c = getchar(); 246 switch (c) { 247 248 case 'g': 249 gsubf = !gsubf; 250 continue; 251 252 case 'c': 253 cflag = !cflag; 254 continue; 255 256 case 'r': 257 uselastre = 1; 258 continue; 259 260 default: 261 ungetchar(c); 262 setcount(); 263 newline(); 264 if (uselastre) 265 savere(subre); 266 else 267 resre(subre); 268 return (gsubf); 269 } 270 } 271 } 272 273 comprhs(seof) 274 int seof; 275 { 276 register char *rp, *orp; 277 register int c; 278 char orhsbuf[RHSSIZE]; 279 280 rp = rhsbuf; 281 CP(orhsbuf, rp); 282 for (;;) { 283 c = getchar(); 284 if (c == seof) 285 break; 286 switch (c) { 287 288 case '\\': 289 c = getchar(); 290 if (c == EOF) { 291 ungetchar(c); 292 break; 293 } 294 if (value(MAGIC)) { 295 /* 296 * When "magic", \& turns into a plain &, 297 * and all other chars work fine quoted. 298 */ 299 if (c != '&') 300 c |= QUOTE; 301 break; 302 } 303 magic: 304 if (c == '~') { 305 for (orp = orhsbuf; *orp; *rp++ = *orp++) 306 if (rp >= &rhsbuf[RHSSIZE - 1]) 307 goto toobig; 308 continue; 309 } 310 c |= QUOTE; 311 break; 312 313 case '\n': 314 case EOF: 315 if (!(globp && globp[0])) { 316 ungetchar(c); 317 goto endrhs; 318 } 319 320 case '~': 321 case '&': 322 if (value(MAGIC)) 323 goto magic; 324 break; 325 } 326 if (rp >= &rhsbuf[RHSSIZE - 1]) { 327 toobig: 328 *rp = 0; 329 error("Replacement pattern too long@- limit 256 characters"); 330 } 331 *rp++ = c; 332 } 333 endrhs: 334 *rp++ = 0; 335 } 336 337 getsub() 338 { 339 register char *p; 340 341 if ((p = linebp) == 0) 342 return (EOF); 343 strcLIN(p); 344 linebp = 0; 345 return (0); 346 } 347 348 dosubcon(f, a) 349 bool f; 350 line *a; 351 { 352 353 if (execute(f, a) == 0) 354 return (0); 355 if (confirmed(a)) { 356 dosub(); 357 scount++; 358 } 359 return (1); 360 } 361 362 confirmed(a) 363 line *a; 364 { 365 register int c, ch; 366 367 if (cflag == 0) 368 return (1); 369 pofix(); 370 pline(lineno(a)); 371 if (inopen) 372 putchar('\n' | QUOTE); 373 c = column(loc1 - 1); 374 ugo(c - 1 + (inopen ? 1 : 0), ' '); 375 ugo(column(loc2 - 1) - c, '^'); 376 flush(); 377 ch = c = getkey(); 378 again: 379 if (c == '\r') 380 c = '\n'; 381 if (inopen) 382 putchar(c), flush(); 383 if (c != '\n' && c != EOF) { 384 c = getkey(); 385 goto again; 386 } 387 noteinp(); 388 return (ch == 'y'); 389 } 390 391 getch() 392 { 393 char c; 394 395 if (read(2, &c, 1) != 1) 396 return (EOF); 397 return (c & TRIM); 398 } 399 400 ugo(cnt, with) 401 int with; 402 int cnt; 403 { 404 405 if (cnt > 0) 406 do 407 putchar(with); 408 while (--cnt > 0); 409 } 410 411 int casecnt; 412 bool destuc; 413 414 dosub() 415 { 416 register char *lp, *sp, *rp; 417 int c; 418 419 lp = linebuf; 420 sp = genbuf; 421 rp = rhsbuf; 422 while (lp < loc1) 423 *sp++ = *lp++; 424 casecnt = 0; 425 while (c = *rp++) { 426 /* ^V <return> from vi to split lines */ 427 if (c == '\r') 428 c = '\n'; 429 430 if (c & QUOTE) 431 switch (c & TRIM) { 432 433 case '&': 434 sp = place(sp, loc1, loc2); 435 if (sp == 0) 436 goto ovflo; 437 continue; 438 439 case 'l': 440 casecnt = 1; 441 destuc = 0; 442 continue; 443 444 case 'L': 445 casecnt = LBSIZE; 446 destuc = 0; 447 continue; 448 449 case 'u': 450 casecnt = 1; 451 destuc = 1; 452 continue; 453 454 case 'U': 455 casecnt = LBSIZE; 456 destuc = 1; 457 continue; 458 459 case 'E': 460 case 'e': 461 casecnt = 0; 462 continue; 463 } 464 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { 465 sp = place(sp, braslist[c - '1'], braelist[c - '1']); 466 if (sp == 0) 467 goto ovflo; 468 continue; 469 } 470 if (casecnt) 471 *sp++ = fixcase(c & TRIM); 472 else 473 *sp++ = c & TRIM; 474 if (sp >= &genbuf[LBSIZE]) 475 ovflo: 476 error("Line overflow@in substitute"); 477 } 478 lp = loc2; 479 loc2 = sp + (linebuf - genbuf); 480 while (*sp++ = *lp++) 481 if (sp >= &genbuf[LBSIZE]) 482 goto ovflo; 483 strcLIN(genbuf); 484 } 485 486 fixcase(c) 487 register int c; 488 { 489 490 if (casecnt == 0) 491 return (c); 492 casecnt--; 493 if (destuc) { 494 if (islower(c)) 495 c = toupper(c); 496 } else 497 if (isupper(c)) 498 c = tolower(c); 499 return (c); 500 } 501 502 char * 503 place(sp, l1, l2) 504 register char *sp, *l1, *l2; 505 { 506 507 while (l1 < l2) { 508 *sp++ = fixcase(*l1++); 509 if (sp >= &genbuf[LBSIZE]) 510 return (0); 511 } 512 return (sp); 513 } 514 515 snote(total, lines) 516 register int total, lines; 517 { 518 519 if (!notable(total)) 520 return; 521 printf(mesg("%d subs|%d substitutions"), total); 522 if (lines != 1 && lines != total) 523 printf(" on %d lines", lines); 524 noonl(); 525 flush(); 526 } 527 528 compile(eof, oknl) 529 int eof; 530 int oknl; 531 { 532 register int c; 533 register char *ep; 534 char *lastep; 535 char bracket[NBRA], *bracketp, *rhsp; 536 int cclcnt; 537 538 if (isalpha(eof) || isdigit(eof)) 539 error("Regular expressions cannot be delimited by letters or digits"); 540 ep = expbuf; 541 c = getchar(); 542 if (eof == '\\') 543 switch (c) { 544 545 case '/': 546 case '?': 547 if (scanre.Expbuf[0] == 0) 548 error("No previous scan re|No previous scanning regular expression"); 549 resre(scanre); 550 return (c); 551 552 case '&': 553 if (subre.Expbuf[0] == 0) 554 error("No previous substitute re|No previous substitute regular expression"); 555 resre(subre); 556 return (c); 557 558 default: 559 error("Badly formed re|Regular expression \\ must be followed by / or ?"); 560 } 561 if (c == eof || c == '\n' || c == EOF) { 562 if (*ep == 0) 563 error("No previous re|No previous regular expression"); 564 if (c == '\n' && oknl == 0) 565 error("Missing closing delimiter@for regular expression"); 566 if (c != eof) 567 ungetchar(c); 568 return (eof); 569 } 570 bracketp = bracket; 571 nbra = 0; 572 circfl = 0; 573 if (c == '^') { 574 c = getchar(); 575 circfl++; 576 } 577 ungetchar(c); 578 for (;;) { 579 if (ep >= &expbuf[ESIZE - 2]) 580 complex: 581 cerror("Re too complex|Regular expression too complicated"); 582 c = getchar(); 583 if (c == eof || c == EOF) { 584 if (bracketp != bracket) 585 cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); 586 *ep++ = CEOFC; 587 if (c == EOF) 588 ungetchar(c); 589 return (eof); 590 } 591 if (value(MAGIC)) { 592 if (c != '*' || ep == expbuf) 593 lastep = ep; 594 } else 595 if (c != '\\' || peekchar() != '*' || ep == expbuf) 596 lastep = ep; 597 switch (c) { 598 599 case '\\': 600 c = getchar(); 601 switch (c) { 602 603 case '(': 604 if (nbra >= NBRA) 605 cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); 606 *bracketp++ = nbra; 607 *ep++ = CBRA; 608 *ep++ = nbra++; 609 continue; 610 611 case ')': 612 if (bracketp <= bracket) 613 cerror("Extra \\)|More \\)'s than \\('s in regular expression"); 614 *ep++ = CKET; 615 *ep++ = *--bracketp; 616 continue; 617 618 case '<': 619 *ep++ = CBRC; 620 continue; 621 622 case '>': 623 *ep++ = CLET; 624 continue; 625 } 626 if (value(MAGIC) == 0) 627 magic: 628 switch (c) { 629 630 case '.': 631 *ep++ = CDOT; 632 continue; 633 634 case '~': 635 rhsp = rhsbuf; 636 while (*rhsp) { 637 if (*rhsp & QUOTE) { 638 c = *rhsp & TRIM; 639 if (c == '&') 640 error("Replacement pattern contains &@- cannot use in re"); 641 if (c >= '1' && c <= '9') 642 error("Replacement pattern contains \\d@- cannot use in re"); 643 } 644 if (ep >= &expbuf[ESIZE-2]) 645 goto complex; 646 *ep++ = CCHR; 647 *ep++ = *rhsp++ & TRIM; 648 } 649 continue; 650 651 case '*': 652 if (ep == expbuf) 653 break; 654 if (*lastep == CBRA || *lastep == CKET) 655 cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); 656 if (*lastep == CCHR && (lastep[1] & QUOTE)) 657 cerror("Illegal *|Can't * a \\n in regular expression"); 658 *lastep |= STAR; 659 continue; 660 661 case '[': 662 *ep++ = CCL; 663 *ep++ = 0; 664 cclcnt = 1; 665 c = getchar(); 666 if (c == '^') { 667 c = getchar(); 668 ep[-2] = NCCL; 669 } 670 if (c == ']') 671 cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); 672 while (c != ']') { 673 if (c == '\\' && any(peekchar(), "]-^\\")) 674 c = getchar() | QUOTE; 675 if (c == '\n' || c == EOF) 676 cerror("Missing ]"); 677 *ep++ = c; 678 cclcnt++; 679 if (ep >= &expbuf[ESIZE]) 680 goto complex; 681 c = getchar(); 682 } 683 lastep[1] = cclcnt; 684 continue; 685 } 686 if (c == EOF) { 687 ungetchar(EOF); 688 c = '\\'; 689 goto defchar; 690 } 691 *ep++ = CCHR; 692 if (c == '\n') 693 cerror("No newlines in re's|Can't escape newlines into regular expressions"); 694 /* 695 if (c < '1' || c > NBRA + '1') { 696 */ 697 *ep++ = c; 698 continue; 699 /* 700 } 701 c -= '1'; 702 if (c >= nbra) 703 cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); 704 *ep++ = c | QUOTE; 705 continue; 706 */ 707 708 case '\n': 709 if (oknl) { 710 ungetchar(c); 711 *ep++ = CEOFC; 712 return (eof); 713 } 714 cerror("Badly formed re|Missing closing delimiter for regular expression"); 715 716 case '$': 717 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { 718 *ep++ = CDOL; 719 continue; 720 } 721 goto defchar; 722 723 case '.': 724 case '~': 725 case '*': 726 case '[': 727 if (value(MAGIC)) 728 goto magic; 729 defchar: 730 default: 731 *ep++ = CCHR; 732 *ep++ = c; 733 continue; 734 } 735 } 736 } 737 738 cerror(s) 739 char *s; 740 { 741 742 expbuf[0] = 0; 743 error(s); 744 } 745 746 same(a, b) 747 register int a, b; 748 { 749 750 return (a == b || value(IGNORECASE) && 751 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); 752 } 753 754 char *locs; 755 756 execute(gf, addr) 757 line *addr; 758 { 759 register char *p1, *p2; 760 register int c; 761 762 if (gf) { 763 if (circfl) 764 return (0); 765 locs = p1 = loc2; 766 } else { 767 if (addr == zero) 768 return (0); 769 p1 = linebuf; 770 getline(*addr); 771 locs = 0; 772 } 773 p2 = expbuf; 774 if (circfl) { 775 loc1 = p1; 776 return (advance(p1, p2)); 777 } 778 /* fast check for first character */ 779 if (*p2 == CCHR) { 780 c = p2[1]; 781 do { 782 if (c != *p1 && (!value(IGNORECASE) || 783 !((islower(c) && toupper(c) == *p1) || 784 (islower(*p1) && toupper(*p1) == c)))) 785 continue; 786 if (advance(p1, p2)) { 787 loc1 = p1; 788 return (1); 789 } 790 } while (*p1++); 791 return (0); 792 } 793 /* regular algorithm */ 794 do { 795 if (advance(p1, p2)) { 796 loc1 = p1; 797 return (1); 798 } 799 } while (*p1++); 800 return (0); 801 } 802 803 #define uletter(c) (isalpha(c) || c == '_') 804 805 advance(lp, ep) 806 register char *lp, *ep; 807 { 808 register char *curlp; 809 char *sp, *sp1; 810 int c; 811 812 for (;;) switch (*ep++) { 813 814 case CCHR: 815 /* useless 816 if (*ep & QUOTE) { 817 c = *ep++ & TRIM; 818 sp = braslist[c]; 819 sp1 = braelist[c]; 820 while (sp < sp1) { 821 if (!same(*sp, *lp)) 822 return (0); 823 sp++, lp++; 824 } 825 continue; 826 } 827 */ 828 if (!same(*ep, *lp)) 829 return (0); 830 ep++, lp++; 831 continue; 832 833 case CDOT: 834 if (*lp++) 835 continue; 836 return (0); 837 838 case CDOL: 839 if (*lp == 0) 840 continue; 841 return (0); 842 843 case CEOFC: 844 loc2 = lp; 845 return (1); 846 847 case CCL: 848 if (cclass(ep, *lp++, 1)) { 849 ep += *ep; 850 continue; 851 } 852 return (0); 853 854 case NCCL: 855 if (cclass(ep, *lp++, 0)) { 856 ep += *ep; 857 continue; 858 } 859 return (0); 860 861 case CBRA: 862 braslist[*ep++] = lp; 863 continue; 864 865 case CKET: 866 braelist[*ep++] = lp; 867 continue; 868 869 case CDOT|STAR: 870 curlp = lp; 871 while (*lp++) 872 continue; 873 goto star; 874 875 case CCHR|STAR: 876 curlp = lp; 877 while (same(*lp, *ep)) 878 lp++; 879 lp++; 880 ep++; 881 goto star; 882 883 case CCL|STAR: 884 case NCCL|STAR: 885 curlp = lp; 886 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) 887 continue; 888 ep += *ep; 889 goto star; 890 star: 891 do { 892 lp--; 893 if (lp == locs) 894 break; 895 if (advance(lp, ep)) 896 return (1); 897 } while (lp > curlp); 898 return (0); 899 900 case CBRC: 901 if (lp == linebuf) 902 continue; 903 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) 904 continue; 905 return (0); 906 907 case CLET: 908 if (!uletter(*lp) && !isdigit(*lp)) 909 continue; 910 return (0); 911 912 default: 913 error("Re internal error"); 914 } 915 } 916 917 cclass(set, c, af) 918 register char *set; 919 register int c; 920 int af; 921 { 922 register int n; 923 924 if (c == 0) 925 return (0); 926 if (value(IGNORECASE) && isupper(c)) 927 c = tolower(c); 928 n = *set++; 929 while (--n) 930 if (n > 2 && set[1] == '-') { 931 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) 932 return (af); 933 set += 3; 934 n -= 2; 935 } else 936 if ((*set++ & TRIM) == c) 937 return (af); 938 return (!af); 939 } 940