1 /*- 2 * Copyright (c) 1980, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.proprietary.c% 6 */ 7 8 #ifndef lint 9 static char sccsid[] = "@(#)ex_re.c 8.1 (Berkeley) 06/09/93"; 10 #endif /* not lint */ 11 12 #include "ex.h" 13 #include "ex_re.h" 14 15 /* 16 * Global, substitute and regular expressions. 17 * Very similar to ed, with some re extensions and 18 * confirmed substitute. 19 */ 20 global(k) 21 bool k; 22 { 23 register char *gp; 24 register int c; 25 register line *a1; 26 char globuf[GBSIZE], *Cwas; 27 int lines = lineDOL(); 28 int oinglobal = inglobal; 29 char *oglobp = globp; 30 31 Cwas = Command; 32 /* 33 * States of inglobal: 34 * 0: ordinary - not in a global command. 35 * 1: text coming from some buffer, not tty. 36 * 2: like 1, but the source of the buffer is a global command. 37 * Hence you're only in a global command if inglobal==2. This 38 * strange sounding convention is historically derived from 39 * everybody simulating a global command. 40 */ 41 if (inglobal==2) 42 error("Global within global@not allowed"); 43 markDOT(); 44 setall(); 45 nonzero(); 46 if (skipend()) 47 error("Global needs re|Missing regular expression for global"); 48 c = ex_getchar(); 49 ignore(compile(c, 1)); 50 savere(scanre); 51 gp = globuf; 52 while ((c = ex_getchar()) != '\n') { 53 switch (c) { 54 55 case EOF: 56 c = '\n'; 57 goto brkwh; 58 59 case '\\': 60 c = ex_getchar(); 61 switch (c) { 62 63 case '\\': 64 ungetchar(c); 65 break; 66 67 case '\n': 68 break; 69 70 default: 71 *gp++ = '\\'; 72 break; 73 } 74 break; 75 } 76 *gp++ = c; 77 if (gp >= &globuf[GBSIZE - 2]) 78 error("Global command too long"); 79 } 80 brkwh: 81 ungetchar(c); 82 newline(); 83 *gp++ = c; 84 *gp++ = 0; 85 saveall(); 86 inglobal = 2; 87 for (a1 = one; a1 <= dol; a1++) { 88 *a1 &= ~01; 89 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 90 *a1 |= 01; 91 } 92 #ifdef notdef 93 /* 94 * This code is commented out for now. The problem is that we don't 95 * fix up the undo area the way we should. Basically, I think what has 96 * to be done is to copy the undo area down (since we shrunk everything) 97 * and move the various pointers into it down too. I will do this later 98 * when I have time. (Mark, 10-20-80) 99 */ 100 /* 101 * Special case: g/.../d (avoid n^2 algorithm) 102 */ 103 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 104 gdelete(); 105 return; 106 } 107 #endif 108 if (inopen) 109 inopen = -1; 110 /* 111 * Now for each marked line, set dot there and do the commands. 112 * Note the n^2 behavior here for lots of lines matching. 113 * This is really needed: in some cases you could delete lines, 114 * causing a marked line to be moved before a1 and missed if 115 * we didn't restart at zero each time. 116 */ 117 for (a1 = one; a1 <= dol; a1++) { 118 if (*a1 & 01) { 119 *a1 &= ~01; 120 dot = a1; 121 globp = globuf; 122 commands(1, 1); 123 a1 = zero; 124 } 125 } 126 globp = oglobp; 127 inglobal = oinglobal; 128 endline = 1; 129 Command = Cwas; 130 netchHAD(lines); 131 setlastchar(EOF); 132 if (inopen) { 133 ungetchar(EOF); 134 inopen = 1; 135 } 136 } 137 138 /* 139 * gdelete: delete inside a global command. Handles the 140 * special case g/r.e./d. All lines to be deleted have 141 * already been marked. Squeeze the remaining lines together. 142 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 143 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 144 * good reason for this except the question: where to you draw the line? 145 */ 146 gdelete() 147 { 148 register line *a1, *a2, *a3; 149 150 a3 = dol; 151 /* find first marked line. can skip all before it */ 152 for (a1=zero; (*a1&01)==0; a1++) 153 if (a1>=a3) 154 return; 155 /* copy down unmarked lines, compacting as we go. */ 156 for (a2=a1+1; a2<=a3;) { 157 if (*a2&01) { 158 a2++; /* line is marked, skip it */ 159 dot = a1; /* dot left after line deletion */ 160 } else 161 *a1++ = *a2++; /* unmarked, copy it */ 162 } 163 dol = a1-1; 164 if (dot>dol) 165 dot = dol; 166 change(); 167 } 168 169 bool cflag; 170 int scount, slines, stotal; 171 172 substitute(c) 173 int c; 174 { 175 register line *addr; 176 register int n; 177 int gsubf, hopcount; 178 179 gsubf = compsub(c); 180 if(FIXUNDO) 181 save12(), undkind = UNDCHANGE; 182 stotal = 0; 183 slines = 0; 184 for (addr = addr1; addr <= addr2; addr++) { 185 scount = hopcount = 0; 186 if (dosubcon(0, addr) == 0) 187 continue; 188 if (gsubf) { 189 /* 190 * The loop can happen from s/\</&/g 191 * but we don't want to break other, reasonable cases. 192 */ 193 while (*loc2) { 194 if (++hopcount > sizeof linebuf) 195 error("substitution loop"); 196 if (dosubcon(1, addr) == 0) 197 break; 198 } 199 } 200 if (scount) { 201 stotal += scount; 202 slines++; 203 putmark(addr); 204 n = append(getsub, addr); 205 addr += n; 206 addr2 += n; 207 } 208 } 209 if (stotal == 0 && !inglobal && !cflag) 210 error("Fail|Substitute pattern match failed"); 211 snote(stotal, slines); 212 return (stotal); 213 } 214 215 compsub(ch) 216 { 217 register int seof, c, uselastre; 218 static int gsubf; 219 220 if (!value(EDCOMPATIBLE)) 221 gsubf = cflag = 0; 222 uselastre = 0; 223 switch (ch) { 224 225 case 's': 226 ignore(skipwh()); 227 seof = ex_getchar(); 228 if (endcmd(seof) || any(seof, "gcr")) { 229 ungetchar(seof); 230 goto redo; 231 } 232 if (isalpha(seof) || isdigit(seof)) 233 error("Substitute needs re|Missing regular expression for substitute"); 234 seof = compile(seof, 1); 235 uselastre = 1; 236 comprhs(seof); 237 gsubf = 0; 238 cflag = 0; 239 break; 240 241 case '~': 242 uselastre = 1; 243 /* fall into ... */ 244 case '&': 245 redo: 246 if (re.Expbuf[0] == 0) 247 error("No previous re|No previous regular expression"); 248 if (subre.Expbuf[0] == 0) 249 error("No previous substitute re|No previous substitute to repeat"); 250 break; 251 } 252 for (;;) { 253 c = ex_getchar(); 254 switch (c) { 255 256 case 'g': 257 gsubf = !gsubf; 258 continue; 259 260 case 'c': 261 cflag = !cflag; 262 continue; 263 264 case 'r': 265 uselastre = 1; 266 continue; 267 268 default: 269 ungetchar(c); 270 setcount(); 271 newline(); 272 if (uselastre) 273 savere(subre); 274 else 275 resre(subre); 276 return (gsubf); 277 } 278 } 279 } 280 281 comprhs(seof) 282 int seof; 283 { 284 register char *rp, *orp; 285 register int c; 286 char orhsbuf[RHSSIZE]; 287 288 rp = rhsbuf; 289 CP(orhsbuf, rp); 290 for (;;) { 291 c = ex_getchar(); 292 if (c == seof) 293 break; 294 switch (c) { 295 296 case '\\': 297 c = ex_getchar(); 298 if (c == EOF) { 299 ungetchar(c); 300 break; 301 } 302 if (value(MAGIC)) { 303 /* 304 * When "magic", \& turns into a plain &, 305 * and all other chars work fine quoted. 306 */ 307 if (c != '&') 308 c |= QUOTE; 309 break; 310 } 311 magic: 312 if (c == '~') { 313 for (orp = orhsbuf; *orp; *rp++ = *orp++) 314 if (rp >= &rhsbuf[RHSSIZE - 1]) 315 goto toobig; 316 continue; 317 } 318 c |= QUOTE; 319 break; 320 321 case '\n': 322 case EOF: 323 if (!(globp && globp[0])) { 324 ungetchar(c); 325 goto endrhs; 326 } 327 328 case '~': 329 case '&': 330 if (value(MAGIC)) 331 goto magic; 332 break; 333 } 334 if (rp >= &rhsbuf[RHSSIZE - 1]) { 335 toobig: 336 *rp = 0; 337 error("Replacement pattern too long@- limit 256 characters"); 338 } 339 *rp++ = c; 340 } 341 endrhs: 342 *rp++ = 0; 343 } 344 345 getsub() 346 { 347 register char *p; 348 349 if ((p = linebp) == 0) 350 return (EOF); 351 strcLIN(p); 352 linebp = 0; 353 return (0); 354 } 355 356 dosubcon(f, a) 357 bool f; 358 line *a; 359 { 360 361 if (execute(f, a) == 0) 362 return (0); 363 if (confirmed(a)) { 364 dosub(); 365 scount++; 366 } 367 return (1); 368 } 369 370 confirmed(a) 371 line *a; 372 { 373 register int c, ch; 374 375 if (cflag == 0) 376 return (1); 377 pofix(); 378 pline(lineno(a)); 379 if (inopen) 380 ex_putchar('\n' | QUOTE); 381 c = column(loc1 - 1); 382 ugo(c - 1 + (inopen ? 1 : 0), ' '); 383 ugo(column(loc2 - 1) - c, '^'); 384 flush(); 385 ch = c = getkey(); 386 again: 387 if (c == '\r') 388 c = '\n'; 389 if (inopen) 390 ex_putchar(c), flush(); 391 if (c != '\n' && c != EOF) { 392 c = getkey(); 393 goto again; 394 } 395 noteinp(); 396 return (ch == 'y'); 397 } 398 399 getch() 400 { 401 char c; 402 403 if (read(2, &c, 1) != 1) 404 return (EOF); 405 return (c & TRIM); 406 } 407 408 ugo(cnt, with) 409 int with; 410 int cnt; 411 { 412 413 if (cnt > 0) 414 do 415 ex_putchar(with); 416 while (--cnt > 0); 417 } 418 419 int casecnt; 420 bool destuc; 421 422 dosub() 423 { 424 register char *lp, *sp, *rp; 425 int c; 426 427 lp = linebuf; 428 sp = genbuf; 429 rp = rhsbuf; 430 while (lp < loc1) 431 *sp++ = *lp++; 432 casecnt = 0; 433 while (c = *rp++) { 434 /* ^V <return> from vi to split lines */ 435 if (c == '\r') 436 c = '\n'; 437 438 if (c & QUOTE) 439 switch (c & TRIM) { 440 441 case '&': 442 sp = place(sp, loc1, loc2); 443 if (sp == 0) 444 goto ovflo; 445 continue; 446 447 case 'l': 448 casecnt = 1; 449 destuc = 0; 450 continue; 451 452 case 'L': 453 casecnt = LBSIZE; 454 destuc = 0; 455 continue; 456 457 case 'u': 458 casecnt = 1; 459 destuc = 1; 460 continue; 461 462 case 'U': 463 casecnt = LBSIZE; 464 destuc = 1; 465 continue; 466 467 case 'E': 468 case 'e': 469 casecnt = 0; 470 continue; 471 } 472 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { 473 sp = place(sp, braslist[c - '1'], braelist[c - '1']); 474 if (sp == 0) 475 goto ovflo; 476 continue; 477 } 478 if (casecnt) 479 *sp++ = fixcase(c & TRIM); 480 else 481 *sp++ = c & TRIM; 482 if (sp >= &genbuf[LBSIZE]) 483 ovflo: 484 error("Line overflow@in substitute"); 485 } 486 lp = loc2; 487 loc2 = sp + (linebuf - genbuf); 488 while (*sp++ = *lp++) 489 if (sp >= &genbuf[LBSIZE]) 490 goto ovflo; 491 strcLIN(genbuf); 492 } 493 494 fixcase(c) 495 register int c; 496 { 497 498 if (casecnt == 0) 499 return (c); 500 casecnt--; 501 if (destuc) { 502 if (islower(c)) 503 c = toupper(c); 504 } else 505 if (isupper(c)) 506 c = tolower(c); 507 return (c); 508 } 509 510 char * 511 place(sp, l1, l2) 512 register char *sp, *l1, *l2; 513 { 514 515 while (l1 < l2) { 516 *sp++ = fixcase(*l1++); 517 if (sp >= &genbuf[LBSIZE]) 518 return (0); 519 } 520 return (sp); 521 } 522 523 snote(total, lines) 524 register int total, lines; 525 { 526 527 if (!notable(total)) 528 return; 529 ex_printf(mesg("%d subs|%d substitutions"), total); 530 if (lines != 1 && lines != total) 531 ex_printf(" on %d lines", lines); 532 noonl(); 533 flush(); 534 } 535 536 compile(eof, oknl) 537 int eof; 538 int oknl; 539 { 540 register int c; 541 register char *ep; 542 char *lastep; 543 char bracket[NBRA], *bracketp, *rhsp; 544 int cclcnt; 545 546 if (isalpha(eof) || isdigit(eof)) 547 error("Regular expressions cannot be delimited by letters or digits"); 548 ep = expbuf; 549 c = ex_getchar(); 550 if (eof == '\\') 551 switch (c) { 552 553 case '/': 554 case '?': 555 if (scanre.Expbuf[0] == 0) 556 error("No previous scan re|No previous scanning regular expression"); 557 resre(scanre); 558 return (c); 559 560 case '&': 561 if (subre.Expbuf[0] == 0) 562 error("No previous substitute re|No previous substitute regular expression"); 563 resre(subre); 564 return (c); 565 566 default: 567 error("Badly formed re|Regular expression \\ must be followed by / or ?"); 568 } 569 if (c == eof || c == '\n' || c == EOF) { 570 if (*ep == 0) 571 error("No previous re|No previous regular expression"); 572 if (c == '\n' && oknl == 0) 573 error("Missing closing delimiter@for regular expression"); 574 if (c != eof) 575 ungetchar(c); 576 return (eof); 577 } 578 bracketp = bracket; 579 nbra = 0; 580 circfl = 0; 581 if (c == '^') { 582 c = ex_getchar(); 583 circfl++; 584 } 585 ungetchar(c); 586 for (;;) { 587 if (ep >= &expbuf[ESIZE - 2]) 588 complex: 589 cerror("Re too complex|Regular expression too complicated"); 590 c = ex_getchar(); 591 if (c == eof || c == EOF) { 592 if (bracketp != bracket) 593 cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); 594 *ep++ = CEOFC; 595 if (c == EOF) 596 ungetchar(c); 597 return (eof); 598 } 599 if (value(MAGIC)) { 600 if (c != '*' || ep == expbuf) 601 lastep = ep; 602 } else 603 if (c != '\\' || peekchar() != '*' || ep == expbuf) 604 lastep = ep; 605 switch (c) { 606 607 case '\\': 608 c = ex_getchar(); 609 switch (c) { 610 611 case '(': 612 if (nbra >= NBRA) 613 cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); 614 *bracketp++ = nbra; 615 *ep++ = CBRA; 616 *ep++ = nbra++; 617 continue; 618 619 case ')': 620 if (bracketp <= bracket) 621 cerror("Extra \\)|More \\)'s than \\('s in regular expression"); 622 *ep++ = CKET; 623 *ep++ = *--bracketp; 624 continue; 625 626 case '<': 627 *ep++ = CBRC; 628 continue; 629 630 case '>': 631 *ep++ = CLET; 632 continue; 633 } 634 if (value(MAGIC) == 0) 635 magic: 636 switch (c) { 637 638 case '.': 639 *ep++ = CDOT; 640 continue; 641 642 case '~': 643 rhsp = rhsbuf; 644 while (*rhsp) { 645 if (*rhsp & QUOTE) { 646 c = *rhsp & TRIM; 647 if (c == '&') 648 error("Replacement pattern contains &@- cannot use in re"); 649 if (c >= '1' && c <= '9') 650 error("Replacement pattern contains \\d@- cannot use in re"); 651 } 652 if (ep >= &expbuf[ESIZE-2]) 653 goto complex; 654 *ep++ = CCHR; 655 *ep++ = *rhsp++ & TRIM; 656 } 657 continue; 658 659 case '*': 660 if (ep == expbuf) 661 break; 662 if (*lastep == CBRA || *lastep == CKET) 663 cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); 664 if (*lastep == CCHR && (lastep[1] & QUOTE)) 665 cerror("Illegal *|Can't * a \\n in regular expression"); 666 *lastep |= STAR; 667 continue; 668 669 case '[': 670 *ep++ = CCL; 671 *ep++ = 0; 672 cclcnt = 1; 673 c = ex_getchar(); 674 if (c == '^') { 675 c = ex_getchar(); 676 ep[-2] = NCCL; 677 } 678 if (c == ']') 679 cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); 680 while (c != ']') { 681 if (c == '\\' && any(peekchar(), "]-^\\")) 682 c = ex_getchar() | QUOTE; 683 if (c == '\n' || c == EOF) 684 cerror("Missing ]"); 685 *ep++ = c; 686 cclcnt++; 687 if (ep >= &expbuf[ESIZE]) 688 goto complex; 689 c = ex_getchar(); 690 } 691 lastep[1] = cclcnt; 692 continue; 693 } 694 if (c == EOF) { 695 ungetchar(EOF); 696 c = '\\'; 697 goto defchar; 698 } 699 *ep++ = CCHR; 700 if (c == '\n') 701 cerror("No newlines in re's|Can't escape newlines into regular expressions"); 702 /* 703 if (c < '1' || c > NBRA + '1') { 704 */ 705 *ep++ = c; 706 continue; 707 /* 708 } 709 c -= '1'; 710 if (c >= nbra) 711 cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); 712 *ep++ = c | QUOTE; 713 continue; 714 */ 715 716 case '\n': 717 if (oknl) { 718 ungetchar(c); 719 *ep++ = CEOFC; 720 return (eof); 721 } 722 cerror("Badly formed re|Missing closing delimiter for regular expression"); 723 724 case '$': 725 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { 726 *ep++ = CDOL; 727 continue; 728 } 729 goto defchar; 730 731 case '.': 732 case '~': 733 case '*': 734 case '[': 735 if (value(MAGIC)) 736 goto magic; 737 defchar: 738 default: 739 *ep++ = CCHR; 740 *ep++ = c; 741 continue; 742 } 743 } 744 } 745 746 cerror(s) 747 char *s; 748 { 749 750 expbuf[0] = 0; 751 error(s); 752 } 753 754 same(a, b) 755 register int a, b; 756 { 757 758 return (a == b || value(IGNORECASE) && 759 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); 760 } 761 762 char *locs; 763 764 /* VARARGS1 */ 765 execute(gf, addr) 766 line *addr; 767 { 768 register char *p1, *p2; 769 register int c; 770 771 if (gf) { 772 if (circfl) 773 return (0); 774 locs = p1 = loc2; 775 } else { 776 if (addr == zero) 777 return (0); 778 p1 = linebuf; 779 getline(*addr); 780 locs = 0; 781 } 782 p2 = expbuf; 783 if (circfl) { 784 loc1 = p1; 785 return (advance(p1, p2)); 786 } 787 /* fast check for first character */ 788 if (*p2 == CCHR) { 789 c = p2[1]; 790 do { 791 if (c != *p1 && (!value(IGNORECASE) || 792 !((islower(c) && toupper(c) == *p1) || 793 (islower(*p1) && toupper(*p1) == c)))) 794 continue; 795 if (advance(p1, p2)) { 796 loc1 = p1; 797 return (1); 798 } 799 } while (*p1++); 800 return (0); 801 } 802 /* regular algorithm */ 803 do { 804 if (advance(p1, p2)) { 805 loc1 = p1; 806 return (1); 807 } 808 } while (*p1++); 809 return (0); 810 } 811 812 #define uletter(c) (isalpha(c) || c == '_') 813 814 advance(lp, ep) 815 register char *lp, *ep; 816 { 817 register char *curlp; 818 819 for (;;) switch (*ep++) { 820 821 case CCHR: 822 /* useless 823 if (*ep & QUOTE) { 824 c = *ep++ & TRIM; 825 sp = braslist[c]; 826 sp1 = braelist[c]; 827 while (sp < sp1) { 828 if (!same(*sp, *lp)) 829 return (0); 830 sp++, lp++; 831 } 832 continue; 833 } 834 */ 835 if (!same(*ep, *lp)) 836 return (0); 837 ep++, lp++; 838 continue; 839 840 case CDOT: 841 if (*lp++) 842 continue; 843 return (0); 844 845 case CDOL: 846 if (*lp == 0) 847 continue; 848 return (0); 849 850 case CEOFC: 851 loc2 = lp; 852 return (1); 853 854 case CCL: 855 if (cclass(ep, *lp++, 1)) { 856 ep += *ep; 857 continue; 858 } 859 return (0); 860 861 case NCCL: 862 if (cclass(ep, *lp++, 0)) { 863 ep += *ep; 864 continue; 865 } 866 return (0); 867 868 case CBRA: 869 braslist[*ep++] = lp; 870 continue; 871 872 case CKET: 873 braelist[*ep++] = lp; 874 continue; 875 876 case CDOT|STAR: 877 curlp = lp; 878 while (*lp++) 879 continue; 880 goto star; 881 882 case CCHR|STAR: 883 curlp = lp; 884 while (same(*lp, *ep)) 885 lp++; 886 lp++; 887 ep++; 888 goto star; 889 890 case CCL|STAR: 891 case NCCL|STAR: 892 curlp = lp; 893 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) 894 continue; 895 ep += *ep; 896 goto star; 897 star: 898 do { 899 lp--; 900 if (lp == locs) 901 break; 902 if (advance(lp, ep)) 903 return (1); 904 } while (lp > curlp); 905 return (0); 906 907 case CBRC: 908 if (lp == linebuf) 909 continue; 910 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) 911 continue; 912 return (0); 913 914 case CLET: 915 if (!uletter(*lp) && !isdigit(*lp)) 916 continue; 917 return (0); 918 919 default: 920 error("Re internal error"); 921 } 922 } 923 924 cclass(set, c, af) 925 register char *set; 926 register int c; 927 int af; 928 { 929 register int n; 930 931 if (c == 0) 932 return (0); 933 if (value(IGNORECASE) && isupper(c)) 934 c = tolower(c); 935 n = *set++; 936 while (--n) 937 if (n > 2 && set[1] == '-') { 938 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) 939 return (af); 940 set += 3; 941 n -= 2; 942 } else 943 if ((*set++ & TRIM) == c) 944 return (af); 945 return (!af); 946 } 947