1 /* 2 * Copyright (c) 1980 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 */ 6 7 #ifndef lint 8 static char *sccsid = "@(#)ex_re.c 7.6 (Berkeley) 03/09/87"; 9 #endif not lint 10 11 #include "ex.h" 12 #include "ex_re.h" 13 14 /* 15 * Global, substitute and regular expressions. 16 * Very similar to ed, with some re extensions and 17 * confirmed substitute. 18 */ 19 global(k) 20 bool k; 21 { 22 register char *gp; 23 register int c; 24 register line *a1; 25 char globuf[GBSIZE], *Cwas; 26 int lines = lineDOL(); 27 int oinglobal = inglobal; 28 char *oglobp = globp; 29 30 Cwas = Command; 31 /* 32 * States of inglobal: 33 * 0: ordinary - not in a global command. 34 * 1: text coming from some buffer, not tty. 35 * 2: like 1, but the source of the buffer is a global command. 36 * Hence you're only in a global command if inglobal==2. This 37 * strange sounding convention is historically derived from 38 * everybody simulating a global command. 39 */ 40 if (inglobal==2) 41 error("Global within global@not allowed"); 42 markDOT(); 43 setall(); 44 nonzero(); 45 if (skipend()) 46 error("Global needs re|Missing regular expression for global"); 47 c = ex_getchar(); 48 ignore(compile(c, 1)); 49 savere(scanre); 50 gp = globuf; 51 while ((c = ex_getchar()) != '\n') { 52 switch (c) { 53 54 case EOF: 55 c = '\n'; 56 goto brkwh; 57 58 case '\\': 59 c = ex_getchar(); 60 switch (c) { 61 62 case '\\': 63 ungetchar(c); 64 break; 65 66 case '\n': 67 break; 68 69 default: 70 *gp++ = '\\'; 71 break; 72 } 73 break; 74 } 75 *gp++ = c; 76 if (gp >= &globuf[GBSIZE - 2]) 77 error("Global command too long"); 78 } 79 brkwh: 80 ungetchar(c); 81 newline(); 82 *gp++ = c; 83 *gp++ = 0; 84 saveall(); 85 inglobal = 2; 86 for (a1 = one; a1 <= dol; a1++) { 87 *a1 &= ~01; 88 if (a1 >= addr1 && a1 <= addr2 && execute(0, a1) == k) 89 *a1 |= 01; 90 } 91 #ifdef notdef 92 /* 93 * This code is commented out for now. The problem is that we don't 94 * fix up the undo area the way we should. Basically, I think what has 95 * to be done is to copy the undo area down (since we shrunk everything) 96 * and move the various pointers into it down too. I will do this later 97 * when I have time. (Mark, 10-20-80) 98 */ 99 /* 100 * Special case: g/.../d (avoid n^2 algorithm) 101 */ 102 if (globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { 103 gdelete(); 104 return; 105 } 106 #endif 107 if (inopen) 108 inopen = -1; 109 /* 110 * Now for each marked line, set dot there and do the commands. 111 * Note the n^2 behavior here for lots of lines matching. 112 * This is really needed: in some cases you could delete lines, 113 * causing a marked line to be moved before a1 and missed if 114 * we didn't restart at zero each time. 115 */ 116 for (a1 = one; a1 <= dol; a1++) { 117 if (*a1 & 01) { 118 *a1 &= ~01; 119 dot = a1; 120 globp = globuf; 121 commands(1, 1); 122 a1 = zero; 123 } 124 } 125 globp = oglobp; 126 inglobal = oinglobal; 127 endline = 1; 128 Command = Cwas; 129 netchHAD(lines); 130 setlastchar(EOF); 131 if (inopen) { 132 ungetchar(EOF); 133 inopen = 1; 134 } 135 } 136 137 /* 138 * gdelete: delete inside a global command. Handles the 139 * special case g/r.e./d. All lines to be deleted have 140 * already been marked. Squeeze the remaining lines together. 141 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/, 142 * and g/r.e./.,/r.e.2/d are not treated specially. There is no 143 * good reason for this except the question: where to you draw the line? 144 */ 145 gdelete() 146 { 147 register line *a1, *a2, *a3; 148 149 a3 = dol; 150 /* find first marked line. can skip all before it */ 151 for (a1=zero; (*a1&01)==0; a1++) 152 if (a1>=a3) 153 return; 154 /* copy down unmarked lines, compacting as we go. */ 155 for (a2=a1+1; a2<=a3;) { 156 if (*a2&01) { 157 a2++; /* line is marked, skip it */ 158 dot = a1; /* dot left after line deletion */ 159 } else 160 *a1++ = *a2++; /* unmarked, copy it */ 161 } 162 dol = a1-1; 163 if (dot>dol) 164 dot = dol; 165 change(); 166 } 167 168 bool cflag; 169 int scount, slines, stotal; 170 171 substitute(c) 172 int c; 173 { 174 register line *addr; 175 register int n; 176 int gsubf, hopcount; 177 178 gsubf = compsub(c); 179 if(FIXUNDO) 180 save12(), undkind = UNDCHANGE; 181 stotal = 0; 182 slines = 0; 183 for (addr = addr1; addr <= addr2; addr++) { 184 scount = hopcount = 0; 185 if (dosubcon(0, addr) == 0) 186 continue; 187 if (gsubf) { 188 /* 189 * The loop can happen from s/\</&/g 190 * but we don't want to break other, reasonable cases. 191 */ 192 while (*loc2) { 193 if (++hopcount > sizeof linebuf) 194 error("substitution loop"); 195 if (dosubcon(1, addr) == 0) 196 break; 197 } 198 } 199 if (scount) { 200 stotal += scount; 201 slines++; 202 putmark(addr); 203 n = append(getsub, addr); 204 addr += n; 205 addr2 += n; 206 } 207 } 208 if (stotal == 0 && !inglobal && !cflag) 209 error("Fail|Substitute pattern match failed"); 210 snote(stotal, slines); 211 return (stotal); 212 } 213 214 compsub(ch) 215 { 216 register int seof, c, uselastre; 217 static int gsubf; 218 219 if (!value(EDCOMPATIBLE)) 220 gsubf = cflag = 0; 221 uselastre = 0; 222 switch (ch) { 223 224 case 's': 225 ignore(skipwh()); 226 seof = ex_getchar(); 227 if (endcmd(seof) || any(seof, "gcr")) { 228 ungetchar(seof); 229 goto redo; 230 } 231 if (isalpha(seof) || isdigit(seof)) 232 error("Substitute needs re|Missing regular expression for substitute"); 233 seof = compile(seof, 1); 234 uselastre = 1; 235 comprhs(seof); 236 gsubf = 0; 237 cflag = 0; 238 break; 239 240 case '~': 241 uselastre = 1; 242 /* fall into ... */ 243 case '&': 244 redo: 245 if (re.Expbuf[0] == 0) 246 error("No previous re|No previous regular expression"); 247 if (subre.Expbuf[0] == 0) 248 error("No previous substitute re|No previous substitute to repeat"); 249 break; 250 } 251 for (;;) { 252 c = ex_getchar(); 253 switch (c) { 254 255 case 'g': 256 gsubf = !gsubf; 257 continue; 258 259 case 'c': 260 cflag = !cflag; 261 continue; 262 263 case 'r': 264 uselastre = 1; 265 continue; 266 267 default: 268 ungetchar(c); 269 setcount(); 270 newline(); 271 if (uselastre) 272 savere(subre); 273 else 274 resre(subre); 275 return (gsubf); 276 } 277 } 278 } 279 280 comprhs(seof) 281 int seof; 282 { 283 register char *rp, *orp; 284 register int c; 285 char orhsbuf[RHSSIZE]; 286 287 rp = rhsbuf; 288 CP(orhsbuf, rp); 289 for (;;) { 290 c = ex_getchar(); 291 if (c == seof) 292 break; 293 switch (c) { 294 295 case '\\': 296 c = ex_getchar(); 297 if (c == EOF) { 298 ungetchar(c); 299 break; 300 } 301 if (value(MAGIC)) { 302 /* 303 * When "magic", \& turns into a plain &, 304 * and all other chars work fine quoted. 305 */ 306 if (c != '&') 307 c |= QUOTE; 308 break; 309 } 310 magic: 311 if (c == '~') { 312 for (orp = orhsbuf; *orp; *rp++ = *orp++) 313 if (rp >= &rhsbuf[RHSSIZE - 1]) 314 goto toobig; 315 continue; 316 } 317 c |= QUOTE; 318 break; 319 320 case '\n': 321 case EOF: 322 if (!(globp && globp[0])) { 323 ungetchar(c); 324 goto endrhs; 325 } 326 327 case '~': 328 case '&': 329 if (value(MAGIC)) 330 goto magic; 331 break; 332 } 333 if (rp >= &rhsbuf[RHSSIZE - 1]) { 334 toobig: 335 *rp = 0; 336 error("Replacement pattern too long@- limit 256 characters"); 337 } 338 *rp++ = c; 339 } 340 endrhs: 341 *rp++ = 0; 342 } 343 344 getsub() 345 { 346 register char *p; 347 348 if ((p = linebp) == 0) 349 return (EOF); 350 strcLIN(p); 351 linebp = 0; 352 return (0); 353 } 354 355 dosubcon(f, a) 356 bool f; 357 line *a; 358 { 359 360 if (execute(f, a) == 0) 361 return (0); 362 if (confirmed(a)) { 363 dosub(); 364 scount++; 365 } 366 return (1); 367 } 368 369 confirmed(a) 370 line *a; 371 { 372 register int c, ch; 373 374 if (cflag == 0) 375 return (1); 376 pofix(); 377 pline(lineno(a)); 378 if (inopen) 379 ex_putchar('\n' | QUOTE); 380 c = column(loc1 - 1); 381 ugo(c - 1 + (inopen ? 1 : 0), ' '); 382 ugo(column(loc2 - 1) - c, '^'); 383 flush(); 384 ch = c = getkey(); 385 again: 386 if (c == '\r') 387 c = '\n'; 388 if (inopen) 389 ex_putchar(c), flush(); 390 if (c != '\n' && c != EOF) { 391 c = getkey(); 392 goto again; 393 } 394 noteinp(); 395 return (ch == 'y'); 396 } 397 398 getch() 399 { 400 char c; 401 402 if (read(2, &c, 1) != 1) 403 return (EOF); 404 return (c & TRIM); 405 } 406 407 ugo(cnt, with) 408 int with; 409 int cnt; 410 { 411 412 if (cnt > 0) 413 do 414 ex_putchar(with); 415 while (--cnt > 0); 416 } 417 418 int casecnt; 419 bool destuc; 420 421 dosub() 422 { 423 register char *lp, *sp, *rp; 424 int c; 425 426 lp = linebuf; 427 sp = genbuf; 428 rp = rhsbuf; 429 while (lp < loc1) 430 *sp++ = *lp++; 431 casecnt = 0; 432 while (c = *rp++) { 433 /* ^V <return> from vi to split lines */ 434 if (c == '\r') 435 c = '\n'; 436 437 if (c & QUOTE) 438 switch (c & TRIM) { 439 440 case '&': 441 sp = place(sp, loc1, loc2); 442 if (sp == 0) 443 goto ovflo; 444 continue; 445 446 case 'l': 447 casecnt = 1; 448 destuc = 0; 449 continue; 450 451 case 'L': 452 casecnt = LBSIZE; 453 destuc = 0; 454 continue; 455 456 case 'u': 457 casecnt = 1; 458 destuc = 1; 459 continue; 460 461 case 'U': 462 casecnt = LBSIZE; 463 destuc = 1; 464 continue; 465 466 case 'E': 467 case 'e': 468 casecnt = 0; 469 continue; 470 } 471 if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { 472 sp = place(sp, braslist[c - '1'], braelist[c - '1']); 473 if (sp == 0) 474 goto ovflo; 475 continue; 476 } 477 if (casecnt) 478 *sp++ = fixcase(c & TRIM); 479 else 480 *sp++ = c & TRIM; 481 if (sp >= &genbuf[LBSIZE]) 482 ovflo: 483 error("Line overflow@in substitute"); 484 } 485 lp = loc2; 486 loc2 = sp + (linebuf - genbuf); 487 while (*sp++ = *lp++) 488 if (sp >= &genbuf[LBSIZE]) 489 goto ovflo; 490 strcLIN(genbuf); 491 } 492 493 fixcase(c) 494 register int c; 495 { 496 497 if (casecnt == 0) 498 return (c); 499 casecnt--; 500 if (destuc) { 501 if (islower(c)) 502 c = toupper(c); 503 } else 504 if (isupper(c)) 505 c = tolower(c); 506 return (c); 507 } 508 509 char * 510 place(sp, l1, l2) 511 register char *sp, *l1, *l2; 512 { 513 514 while (l1 < l2) { 515 *sp++ = fixcase(*l1++); 516 if (sp >= &genbuf[LBSIZE]) 517 return (0); 518 } 519 return (sp); 520 } 521 522 snote(total, lines) 523 register int total, lines; 524 { 525 526 if (!notable(total)) 527 return; 528 ex_printf(mesg("%d subs|%d substitutions"), total); 529 if (lines != 1 && lines != total) 530 ex_printf(" on %d lines", lines); 531 noonl(); 532 flush(); 533 } 534 535 compile(eof, oknl) 536 int eof; 537 int oknl; 538 { 539 register int c; 540 register char *ep; 541 char *lastep; 542 char bracket[NBRA], *bracketp, *rhsp; 543 int cclcnt; 544 545 if (isalpha(eof) || isdigit(eof)) 546 error("Regular expressions cannot be delimited by letters or digits"); 547 ep = expbuf; 548 c = ex_getchar(); 549 if (eof == '\\') 550 switch (c) { 551 552 case '/': 553 case '?': 554 if (scanre.Expbuf[0] == 0) 555 error("No previous scan re|No previous scanning regular expression"); 556 resre(scanre); 557 return (c); 558 559 case '&': 560 if (subre.Expbuf[0] == 0) 561 error("No previous substitute re|No previous substitute regular expression"); 562 resre(subre); 563 return (c); 564 565 default: 566 error("Badly formed re|Regular expression \\ must be followed by / or ?"); 567 } 568 if (c == eof || c == '\n' || c == EOF) { 569 if (*ep == 0) 570 error("No previous re|No previous regular expression"); 571 if (c == '\n' && oknl == 0) 572 error("Missing closing delimiter@for regular expression"); 573 if (c != eof) 574 ungetchar(c); 575 return (eof); 576 } 577 bracketp = bracket; 578 nbra = 0; 579 circfl = 0; 580 if (c == '^') { 581 c = ex_getchar(); 582 circfl++; 583 } 584 ungetchar(c); 585 for (;;) { 586 if (ep >= &expbuf[ESIZE - 2]) 587 complex: 588 cerror("Re too complex|Regular expression too complicated"); 589 c = ex_getchar(); 590 if (c == eof || c == EOF) { 591 if (bracketp != bracket) 592 cerror("Unmatched \\(|More \\('s than \\)'s in regular expression"); 593 *ep++ = CEOFC; 594 if (c == EOF) 595 ungetchar(c); 596 return (eof); 597 } 598 if (value(MAGIC)) { 599 if (c != '*' || ep == expbuf) 600 lastep = ep; 601 } else 602 if (c != '\\' || peekchar() != '*' || ep == expbuf) 603 lastep = ep; 604 switch (c) { 605 606 case '\\': 607 c = ex_getchar(); 608 switch (c) { 609 610 case '(': 611 if (nbra >= NBRA) 612 cerror("Awash in \\('s!|Too many \\('d subexressions in a regular expression"); 613 *bracketp++ = nbra; 614 *ep++ = CBRA; 615 *ep++ = nbra++; 616 continue; 617 618 case ')': 619 if (bracketp <= bracket) 620 cerror("Extra \\)|More \\)'s than \\('s in regular expression"); 621 *ep++ = CKET; 622 *ep++ = *--bracketp; 623 continue; 624 625 case '<': 626 *ep++ = CBRC; 627 continue; 628 629 case '>': 630 *ep++ = CLET; 631 continue; 632 } 633 if (value(MAGIC) == 0) 634 magic: 635 switch (c) { 636 637 case '.': 638 *ep++ = CDOT; 639 continue; 640 641 case '~': 642 rhsp = rhsbuf; 643 while (*rhsp) { 644 if (*rhsp & QUOTE) { 645 c = *rhsp & TRIM; 646 if (c == '&') 647 error("Replacement pattern contains &@- cannot use in re"); 648 if (c >= '1' && c <= '9') 649 error("Replacement pattern contains \\d@- cannot use in re"); 650 } 651 if (ep >= &expbuf[ESIZE-2]) 652 goto complex; 653 *ep++ = CCHR; 654 *ep++ = *rhsp++ & TRIM; 655 } 656 continue; 657 658 case '*': 659 if (ep == expbuf) 660 break; 661 if (*lastep == CBRA || *lastep == CKET) 662 cerror("Illegal *|Can't * a \\( ... \\) in regular expression"); 663 if (*lastep == CCHR && (lastep[1] & QUOTE)) 664 cerror("Illegal *|Can't * a \\n in regular expression"); 665 *lastep |= STAR; 666 continue; 667 668 case '[': 669 *ep++ = CCL; 670 *ep++ = 0; 671 cclcnt = 1; 672 c = ex_getchar(); 673 if (c == '^') { 674 c = ex_getchar(); 675 ep[-2] = NCCL; 676 } 677 if (c == ']') 678 cerror("Bad character class|Empty character class '[]' or '[^]' cannot match"); 679 while (c != ']') { 680 if (c == '\\' && any(peekchar(), "]-^\\")) 681 c = ex_getchar() | QUOTE; 682 if (c == '\n' || c == EOF) 683 cerror("Missing ]"); 684 *ep++ = c; 685 cclcnt++; 686 if (ep >= &expbuf[ESIZE]) 687 goto complex; 688 c = ex_getchar(); 689 } 690 lastep[1] = cclcnt; 691 continue; 692 } 693 if (c == EOF) { 694 ungetchar(EOF); 695 c = '\\'; 696 goto defchar; 697 } 698 *ep++ = CCHR; 699 if (c == '\n') 700 cerror("No newlines in re's|Can't escape newlines into regular expressions"); 701 /* 702 if (c < '1' || c > NBRA + '1') { 703 */ 704 *ep++ = c; 705 continue; 706 /* 707 } 708 c -= '1'; 709 if (c >= nbra) 710 cerror("Bad \\n|\\n in regular expression with n greater than the number of \\('s"); 711 *ep++ = c | QUOTE; 712 continue; 713 */ 714 715 case '\n': 716 if (oknl) { 717 ungetchar(c); 718 *ep++ = CEOFC; 719 return (eof); 720 } 721 cerror("Badly formed re|Missing closing delimiter for regular expression"); 722 723 case '$': 724 if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { 725 *ep++ = CDOL; 726 continue; 727 } 728 goto defchar; 729 730 case '.': 731 case '~': 732 case '*': 733 case '[': 734 if (value(MAGIC)) 735 goto magic; 736 defchar: 737 default: 738 *ep++ = CCHR; 739 *ep++ = c; 740 continue; 741 } 742 } 743 } 744 745 cerror(s) 746 char *s; 747 { 748 749 expbuf[0] = 0; 750 error(s); 751 } 752 753 same(a, b) 754 register int a, b; 755 { 756 757 return (a == b || value(IGNORECASE) && 758 ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); 759 } 760 761 char *locs; 762 763 /* VARARGS1 */ 764 execute(gf, addr) 765 line *addr; 766 { 767 register char *p1, *p2; 768 register int c; 769 770 if (gf) { 771 if (circfl) 772 return (0); 773 locs = p1 = loc2; 774 } else { 775 if (addr == zero) 776 return (0); 777 p1 = linebuf; 778 getline(*addr); 779 locs = 0; 780 } 781 p2 = expbuf; 782 if (circfl) { 783 loc1 = p1; 784 return (advance(p1, p2)); 785 } 786 /* fast check for first character */ 787 if (*p2 == CCHR) { 788 c = p2[1]; 789 do { 790 if (c != *p1 && (!value(IGNORECASE) || 791 !((islower(c) && toupper(c) == *p1) || 792 (islower(*p1) && toupper(*p1) == c)))) 793 continue; 794 if (advance(p1, p2)) { 795 loc1 = p1; 796 return (1); 797 } 798 } while (*p1++); 799 return (0); 800 } 801 /* regular algorithm */ 802 do { 803 if (advance(p1, p2)) { 804 loc1 = p1; 805 return (1); 806 } 807 } while (*p1++); 808 return (0); 809 } 810 811 #define uletter(c) (isalpha(c) || c == '_') 812 813 advance(lp, ep) 814 register char *lp, *ep; 815 { 816 register char *curlp; 817 818 for (;;) switch (*ep++) { 819 820 case CCHR: 821 /* useless 822 if (*ep & QUOTE) { 823 c = *ep++ & TRIM; 824 sp = braslist[c]; 825 sp1 = braelist[c]; 826 while (sp < sp1) { 827 if (!same(*sp, *lp)) 828 return (0); 829 sp++, lp++; 830 } 831 continue; 832 } 833 */ 834 if (!same(*ep, *lp)) 835 return (0); 836 ep++, lp++; 837 continue; 838 839 case CDOT: 840 if (*lp++) 841 continue; 842 return (0); 843 844 case CDOL: 845 if (*lp == 0) 846 continue; 847 return (0); 848 849 case CEOFC: 850 loc2 = lp; 851 return (1); 852 853 case CCL: 854 if (cclass(ep, *lp++, 1)) { 855 ep += *ep; 856 continue; 857 } 858 return (0); 859 860 case NCCL: 861 if (cclass(ep, *lp++, 0)) { 862 ep += *ep; 863 continue; 864 } 865 return (0); 866 867 case CBRA: 868 braslist[*ep++] = lp; 869 continue; 870 871 case CKET: 872 braelist[*ep++] = lp; 873 continue; 874 875 case CDOT|STAR: 876 curlp = lp; 877 while (*lp++) 878 continue; 879 goto star; 880 881 case CCHR|STAR: 882 curlp = lp; 883 while (same(*lp, *ep)) 884 lp++; 885 lp++; 886 ep++; 887 goto star; 888 889 case CCL|STAR: 890 case NCCL|STAR: 891 curlp = lp; 892 while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) 893 continue; 894 ep += *ep; 895 goto star; 896 star: 897 do { 898 lp--; 899 if (lp == locs) 900 break; 901 if (advance(lp, ep)) 902 return (1); 903 } while (lp > curlp); 904 return (0); 905 906 case CBRC: 907 if (lp == linebuf) 908 continue; 909 if ((isdigit(*lp) || uletter(*lp)) && !uletter(lp[-1]) && !isdigit(lp[-1])) 910 continue; 911 return (0); 912 913 case CLET: 914 if (!uletter(*lp) && !isdigit(*lp)) 915 continue; 916 return (0); 917 918 default: 919 error("Re internal error"); 920 } 921 } 922 923 cclass(set, c, af) 924 register char *set; 925 register int c; 926 int af; 927 { 928 register int n; 929 930 if (c == 0) 931 return (0); 932 if (value(IGNORECASE) && isupper(c)) 933 c = tolower(c); 934 n = *set++; 935 while (--n) 936 if (n > 2 && set[1] == '-') { 937 if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) 938 return (af); 939 set += 3; 940 n -= 2; 941 } else 942 if ((*set++ & TRIM) == c) 943 return (af); 944 return (!af); 945 } 946