1 /* $OpenBSD: ex_subst.c,v 1.22 2015/01/16 06:40:14 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 1992, 1993, 1994, 1995, 1996 7 * Keith Bostic. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12 #include "config.h" 13 14 #include <sys/queue.h> 15 #include <sys/time.h> 16 17 #include <bitstring.h> 18 #include <ctype.h> 19 #include <errno.h> 20 #include <limits.h> 21 #include <stdio.h> 22 #include <stdlib.h> 23 #include <string.h> 24 #include <unistd.h> 25 26 #include "../common/common.h" 27 #include "../vi/vi.h" 28 29 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) 30 31 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ 32 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ 33 34 static int re_conv(SCR *, char **, size_t *, int *); 35 static int re_cscope_conv(SCR *, char **, size_t *, int *); 36 static int re_sub(SCR *, char *, char **, size_t *, size_t *, regmatch_t [10]); 37 static int re_tag_conv(SCR *, char **, size_t *, int *); 38 static int s(SCR *, EXCMD *, char *, regex_t *, u_int); 39 40 /* 41 * ex_s -- 42 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] 43 * 44 * Substitute on lines matching a pattern. 45 * 46 * PUBLIC: int ex_s(SCR *, EXCMD *); 47 */ 48 int 49 ex_s(SCR *sp, EXCMD *cmdp) 50 { 51 regex_t *re; 52 size_t blen, len; 53 u_int flags; 54 int delim; 55 char *bp, *ptrn, *rep, *p, *t; 56 57 /* 58 * Skip leading white space. 59 * 60 * !!! 61 * Historic vi allowed any non-alphanumeric to serve as the 62 * substitution command delimiter. 63 * 64 * !!! 65 * If the arguments are empty, it's the same as &, i.e. we 66 * repeat the last substitution. 67 */ 68 if (cmdp->argc == 0) 69 goto subagain; 70 for (p = cmdp->argv[0]->bp, 71 len = cmdp->argv[0]->len; len > 0; --len, ++p) { 72 if (!isblank(*p)) 73 break; 74 } 75 if (len == 0) 76 subagain: return (ex_subagain(sp, cmdp)); 77 78 delim = *p++; 79 if (isalnum(delim) || delim == '\\') 80 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); 81 82 /* 83 * !!! 84 * The full-blown substitute command reset the remembered 85 * state of the 'c' and 'g' suffices. 86 */ 87 sp->c_suffix = sp->g_suffix = 0; 88 89 /* 90 * Get the pattern string, toss escaping characters. 91 * 92 * !!! 93 * Historic vi accepted any of the following forms: 94 * 95 * :s/abc/def/ change "abc" to "def" 96 * :s/abc/def change "abc" to "def" 97 * :s/abc/ delete "abc" 98 * :s/abc delete "abc" 99 * 100 * QUOTING NOTE: 101 * 102 * Only toss an escaping character if it escapes a delimiter. 103 * This means that "s/A/\\\\f" replaces "A" with "\\f". It 104 * would be nice to be more regular, i.e. for each layer of 105 * escaping a single escaping character is removed, but that's 106 * not how the historic vi worked. 107 */ 108 for (ptrn = t = p;;) { 109 if (p[0] == '\0' || p[0] == delim) { 110 if (p[0] == delim) 111 ++p; 112 /* 113 * !!! 114 * Nul terminate the pattern string -- it's passed 115 * to regcomp which doesn't understand anything else. 116 */ 117 *t = '\0'; 118 break; 119 } 120 if (p[0] == '\\') { 121 if (p[1] == delim) 122 ++p; 123 else if (p[1] == '\\') 124 *t++ = *p++; 125 } 126 *t++ = *p++; 127 } 128 129 /* 130 * If the pattern string is empty, use the last RE (not just the 131 * last substitution RE). 132 */ 133 if (*ptrn == '\0') { 134 if (sp->re == NULL) { 135 ex_emsg(sp, NULL, EXM_NOPREVRE); 136 return (1); 137 } 138 139 /* Re-compile the RE if necessary. */ 140 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 141 sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 142 return (1); 143 flags = 0; 144 } else { 145 /* 146 * !!! 147 * Compile the RE. Historic practice is that substitutes set 148 * the search direction as well as both substitute and search 149 * RE's. We compile the RE twice, as we don't want to bother 150 * ref counting the pattern string and (opaque) structure. 151 */ 152 if (re_compile(sp, ptrn, t - ptrn, 153 &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH)) 154 return (1); 155 if (re_compile(sp, ptrn, t - ptrn, 156 &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST)) 157 return (1); 158 159 flags = SUB_FIRST; 160 sp->searchdir = FORWARD; 161 } 162 re = &sp->re_c; 163 164 /* 165 * Get the replacement string. 166 * 167 * The special character & (\& if O_MAGIC not set) matches the 168 * entire RE. No handling of & is required here, it's done by 169 * re_sub(). 170 * 171 * The special character ~ (\~ if O_MAGIC not set) inserts the 172 * previous replacement string into this replacement string. 173 * Count ~'s to figure out how much space we need. We could 174 * special case nonexistent last patterns or whether or not 175 * O_MAGIC is set, but it's probably not worth the effort. 176 * 177 * QUOTING NOTE: 178 * 179 * Only toss an escaping character if it escapes a delimiter or 180 * if O_MAGIC is set and it escapes a tilde. 181 * 182 * !!! 183 * If the entire replacement pattern is "%", then use the last 184 * replacement pattern. This semantic was added to vi in System 185 * V and then percolated elsewhere, presumably around the time 186 * that it was added to their version of ed(1). 187 */ 188 if (p[0] == '\0' || p[0] == delim) { 189 if (p[0] == delim) 190 ++p; 191 if (sp->repl != NULL) 192 free(sp->repl); 193 sp->repl = NULL; 194 sp->repl_len = 0; 195 } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) 196 p += p[1] == delim ? 2 : 1; 197 else { 198 for (rep = p, len = 0; 199 p[0] != '\0' && p[0] != delim; ++p, ++len) 200 if (p[0] == '~') 201 len += sp->repl_len; 202 GET_SPACE_RET(sp, bp, blen, len); 203 for (t = bp, len = 0, p = rep;;) { 204 if (p[0] == '\0' || p[0] == delim) { 205 if (p[0] == delim) 206 ++p; 207 break; 208 } 209 if (p[0] == '\\') { 210 if (p[1] == delim) 211 ++p; 212 else if (p[1] == '\\') { 213 *t++ = *p++; 214 ++len; 215 } else if (p[1] == '~') { 216 ++p; 217 if (!O_ISSET(sp, O_MAGIC)) 218 goto tilde; 219 } 220 } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { 221 tilde: ++p; 222 memcpy(t, sp->repl, sp->repl_len); 223 t += sp->repl_len; 224 len += sp->repl_len; 225 continue; 226 } 227 *t++ = *p++; 228 ++len; 229 } 230 if ((sp->repl_len = len) != 0) { 231 if (sp->repl != NULL) 232 free(sp->repl); 233 if ((sp->repl = malloc(len)) == NULL) { 234 msgq(sp, M_SYSERR, NULL); 235 FREE_SPACE(sp, bp, blen); 236 return (1); 237 } 238 memcpy(sp->repl, bp, len); 239 } 240 FREE_SPACE(sp, bp, blen); 241 } 242 return (s(sp, cmdp, p, re, flags)); 243 } 244 245 /* 246 * ex_subagain -- 247 * [line [,line]] & [cgr] [count] [#lp]] 248 * 249 * Substitute using the last substitute RE and replacement pattern. 250 * 251 * PUBLIC: int ex_subagain(SCR *, EXCMD *); 252 */ 253 int 254 ex_subagain(SCR *sp, EXCMD *cmdp) 255 { 256 if (sp->subre == NULL) { 257 ex_emsg(sp, NULL, EXM_NOPREVRE); 258 return (1); 259 } 260 if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp, 261 sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST)) 262 return (1); 263 return (s(sp, 264 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); 265 } 266 267 /* 268 * ex_subtilde -- 269 * [line [,line]] ~ [cgr] [count] [#lp]] 270 * 271 * Substitute using the last RE and last substitute replacement pattern. 272 * 273 * PUBLIC: int ex_subtilde(SCR *, EXCMD *); 274 */ 275 int 276 ex_subtilde(SCR *sp, EXCMD *cmdp) 277 { 278 if (sp->re == NULL) { 279 ex_emsg(sp, NULL, EXM_NOPREVRE); 280 return (1); 281 } 282 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 283 sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 284 return (1); 285 return (s(sp, 286 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); 287 } 288 289 /* 290 * s -- 291 * Do the substitution. This stuff is *really* tricky. There are lots of 292 * special cases, and general nastiness. Don't mess with it unless you're 293 * pretty confident. 294 * 295 * The nasty part of the substitution is what happens when the replacement 296 * string contains newlines. It's a bit tricky -- consider the information 297 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is 298 * to build a set of newline offsets which we use to break the line up later, 299 * when the replacement is done. Don't change it unless you're *damned* 300 * confident. 301 */ 302 #define NEEDNEWLINE(sp) { \ 303 if ((sp)->newl_len == (sp)->newl_cnt) { \ 304 (sp)->newl_len += 25; \ 305 REALLOCARRAY((sp), (sp)->newl, size_t *, \ 306 (sp)->newl_len, sizeof(size_t)); \ 307 if ((sp)->newl == NULL) { \ 308 (sp)->newl_len = 0; \ 309 return (1); \ 310 } \ 311 } \ 312 } 313 314 #define BUILD(sp, l, len) { \ 315 if (lbclen + (len) > lblen) { \ 316 lblen += MAXIMUM(lbclen + (len), 256); \ 317 REALLOC((sp), lb, char *, lblen); \ 318 if (lb == NULL) { \ 319 lbclen = 0; \ 320 return (1); \ 321 } \ 322 } \ 323 memcpy(lb + lbclen, (l), (len)); \ 324 lbclen += (len); \ 325 } 326 327 #define NEEDSP(sp, len, pnt) { \ 328 if (lbclen + (len) > lblen) { \ 329 lblen += MAXIMUM(lbclen + (len), 256); \ 330 REALLOC((sp), lb, char *, lblen); \ 331 if (lb == NULL) { \ 332 lbclen = 0; \ 333 return (1); \ 334 } \ 335 (pnt) = lb + lbclen; \ 336 } \ 337 } 338 339 static int 340 s(SCR *sp, EXCMD *cmdp, char *s, regex_t *re, u_int flags) 341 { 342 EVENT ev; 343 MARK from, to; 344 TEXTH tiq; 345 recno_t elno, lno, slno; 346 regmatch_t match[10]; 347 size_t blen, cnt, last, lbclen, lblen, len, llen; 348 size_t offset, saved_offset, scno; 349 int lflag, nflag, pflag, rflag; 350 int didsub, do_eol_match, eflags, empty_ok, eval; 351 int linechanged, matched, quit, rval; 352 unsigned long ul; 353 char *bp, *lb; 354 355 NEEDFILE(sp, cmdp); 356 357 slno = sp->lno; 358 scno = sp->cno; 359 360 /* 361 * !!! 362 * Historically, the 'g' and 'c' suffices were always toggled as flags, 363 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was 364 * not set, they were initialized to 0 for all substitute commands. If 365 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user 366 * specified substitute/replacement patterns (see ex_s()). 367 */ 368 if (!O_ISSET(sp, O_EDCOMPATIBLE)) 369 sp->c_suffix = sp->g_suffix = 0; 370 371 /* 372 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but 373 * it only displayed the last change. I'd disallow them, but they are 374 * useful in combination with the [v]global commands. In the current 375 * model the problem is combining them with the 'c' flag -- the screen 376 * would have to flip back and forth between the confirm screen and the 377 * ex print screen, which would be pretty awful. We do display all 378 * changes, though, for what that's worth. 379 * 380 * !!! 381 * Historic vi was fairly strict about the order of "options", the 382 * count, and "flags". I'm somewhat fuzzy on the difference between 383 * options and flags, anyway, so this is a simpler approach, and we 384 * just take it them in whatever order the user gives them. (The ex 385 * usage statement doesn't reflect this.) 386 */ 387 lflag = nflag = pflag = rflag = 0; 388 if (s == NULL) 389 goto noargs; 390 for (lno = OOBLNO; *s != '\0'; ++s) 391 switch (*s) { 392 case ' ': 393 case '\t': 394 continue; 395 case '+': 396 ++cmdp->flagoff; 397 break; 398 case '-': 399 --cmdp->flagoff; 400 break; 401 case '0': case '1': case '2': case '3': case '4': 402 case '5': case '6': case '7': case '8': case '9': 403 if (lno != OOBLNO) 404 goto usage; 405 errno = 0; 406 if ((ul = strtoul(s, &s, 10)) >= UINT_MAX) 407 errno = ERANGE; 408 if (*s == '\0') /* Loop increment correction. */ 409 --s; 410 if (errno == ERANGE) { 411 if (ul >= UINT_MAX) 412 msgq(sp, M_ERR, "153|Count overflow"); 413 else 414 msgq(sp, M_SYSERR, NULL); 415 return (1); 416 } 417 lno = (recno_t)ul; 418 /* 419 * In historic vi, the count was inclusive from the 420 * second address. 421 */ 422 cmdp->addr1.lno = cmdp->addr2.lno; 423 cmdp->addr2.lno += lno - 1; 424 if (!db_exist(sp, cmdp->addr2.lno) && 425 db_last(sp, &cmdp->addr2.lno)) 426 return (1); 427 break; 428 case '#': 429 nflag = 1; 430 break; 431 case 'c': 432 sp->c_suffix = !sp->c_suffix; 433 434 /* Ex text structure initialization. */ 435 if (F_ISSET(sp, SC_EX)) { 436 memset(&tiq, 0, sizeof(TEXTH)); 437 TAILQ_INIT(&tiq); 438 } 439 break; 440 case 'g': 441 sp->g_suffix = !sp->g_suffix; 442 break; 443 case 'l': 444 lflag = 1; 445 break; 446 case 'p': 447 pflag = 1; 448 break; 449 case 'r': 450 if (LF_ISSET(SUB_FIRST)) { 451 msgq(sp, M_ERR, 452 "155|Regular expression specified; r flag meaningless"); 453 return (1); 454 } 455 if (!F_ISSET(sp, SC_RE_SEARCH)) { 456 ex_emsg(sp, NULL, EXM_NOPREVRE); 457 return (1); 458 } 459 rflag = 1; 460 re = &sp->re_c; 461 break; 462 default: 463 goto usage; 464 } 465 466 if (*s != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) { 467 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); 468 return (1); 469 } 470 471 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { 472 msgq(sp, M_ERR, 473 "156|The #, l and p flags may not be combined with the c flag in vi mode"); 474 return (1); 475 } 476 477 /* 478 * bp: if interactive, line cache 479 * blen: if interactive, line cache length 480 * lb: build buffer pointer. 481 * lbclen: current length of built buffer. 482 * lblen; length of build buffer. 483 */ 484 bp = lb = NULL; 485 blen = lbclen = lblen = 0; 486 487 /* For each line... */ 488 for (matched = quit = 0, lno = cmdp->addr1.lno, 489 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { 490 491 /* Someone's unhappy, time to stop. */ 492 if (INTERRUPTED(sp)) 493 break; 494 495 /* Get the line. */ 496 if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 497 goto err; 498 499 /* 500 * Make a local copy if doing confirmation -- when calling 501 * the confirm routine we're likely to lose the cached copy. 502 */ 503 if (sp->c_suffix) { 504 if (bp == NULL) { 505 GET_SPACE_RET(sp, bp, blen, llen); 506 } else 507 ADD_SPACE_RET(sp, bp, blen, llen); 508 memcpy(bp, s, llen); 509 s = bp; 510 } 511 512 /* Start searching from the beginning. */ 513 offset = 0; 514 len = llen; 515 516 /* Reset the build buffer offset. */ 517 lbclen = 0; 518 519 /* Reset empty match flag. */ 520 empty_ok = 1; 521 522 /* 523 * We don't want to have to do a setline if the line didn't 524 * change -- keep track of whether or not this line changed. 525 * If doing confirmations, don't want to keep setting the 526 * line if change is refused -- keep track of substitutions. 527 */ 528 didsub = linechanged = 0; 529 530 /* New line, do an EOL match. */ 531 do_eol_match = 1; 532 533 /* It's not nul terminated, but we pretend it is. */ 534 eflags = REG_STARTEND; 535 536 /* 537 * The search area is from s + offset to the EOL. 538 * 539 * Generally, match[0].rm_so is the offset of the start 540 * of the match from the start of the search, and offset 541 * is the offset of the start of the last search. 542 */ 543 nextmatch: match[0].rm_so = 0; 544 match[0].rm_eo = len; 545 546 /* Get the next match. */ 547 eval = regexec(re, (char *)s + offset, 10, match, eflags); 548 549 /* 550 * There wasn't a match or if there was an error, deal with 551 * it. If there was a previous match in this line, resolve 552 * the changes into the database. Otherwise, just move on. 553 */ 554 if (eval == REG_NOMATCH) 555 goto endmatch; 556 if (eval != 0) { 557 re_error(sp, eval, re); 558 goto err; 559 } 560 matched = 1; 561 562 /* Only the first search can match an anchored expression. */ 563 eflags |= REG_NOTBOL; 564 565 /* 566 * !!! 567 * It's possible to match 0-length strings -- for example, the 568 * command s;a*;X;, when matched against the string "aabb" will 569 * result in "XbXbX", i.e. the matches are "aa", the space 570 * between the b's and the space between the b's and the end of 571 * the string. There is a similar space between the beginning 572 * of the string and the a's. The rule that we use (because vi 573 * historically used it) is that any 0-length match, occurring 574 * immediately after a match, is ignored. Otherwise, the above 575 * example would have resulted in "XXbXbX". Another example is 576 * incorrectly using " *" to replace groups of spaces with one 577 * space. 578 * 579 * The way we do this is that if we just had a successful match, 580 * the starting offset does not skip characters, and the match 581 * is empty, ignore the match and move forward. If there's no 582 * more characters in the string, we were attempting to match 583 * after the last character, so quit. 584 */ 585 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) { 586 empty_ok = 1; 587 if (len == 0) 588 goto endmatch; 589 BUILD(sp, s + offset, 1) 590 ++offset; 591 --len; 592 goto nextmatch; 593 } 594 595 /* Confirm change. */ 596 if (sp->c_suffix) { 597 /* 598 * Set the cursor position for confirmation. Note, 599 * if we matched on a '$', the cursor may be past 600 * the end of line. 601 */ 602 from.lno = to.lno = lno; 603 from.cno = match[0].rm_so + offset; 604 to.cno = match[0].rm_eo + offset; 605 /* 606 * Both ex and vi have to correct for a change before 607 * the first character in the line. 608 */ 609 if (llen == 0) 610 from.cno = to.cno = 0; 611 if (F_ISSET(sp, SC_VI)) { 612 /* 613 * Only vi has to correct for a change after 614 * the last character in the line. 615 * 616 * XXX 617 * It would be nice to change the vi code so 618 * that we could display a cursor past EOL. 619 */ 620 if (to.cno >= llen) 621 to.cno = llen - 1; 622 if (from.cno >= llen) 623 from.cno = llen - 1; 624 625 sp->lno = from.lno; 626 sp->cno = from.cno; 627 if (vs_refresh(sp, 1)) 628 goto err; 629 630 vs_update(sp, msg_cat(sp, 631 "169|Confirm change? [n]", NULL), NULL); 632 633 if (v_event_get(sp, &ev, 0, 0)) 634 goto err; 635 switch (ev.e_event) { 636 case E_CHARACTER: 637 break; 638 case E_EOF: 639 case E_ERR: 640 case E_INTERRUPT: 641 goto lquit; 642 default: 643 v_event_err(sp, &ev); 644 goto lquit; 645 } 646 } else { 647 if (ex_print(sp, cmdp, &from, &to, 0) || 648 ex_scprint(sp, &from, &to)) 649 goto lquit; 650 if (ex_txt(sp, &tiq, 0, TXT_CR)) 651 goto err; 652 ev.e_c = TAILQ_FIRST(&tiq)->lb[0]; 653 } 654 655 switch (ev.e_c) { 656 case CH_YES: 657 break; 658 default: 659 case CH_NO: 660 didsub = 0; 661 BUILD(sp, s +offset, match[0].rm_eo); 662 goto skip; 663 case CH_QUIT: 664 /* Set the quit/interrupted flags. */ 665 lquit: quit = 1; 666 F_SET(sp->gp, G_INTERRUPTED); 667 668 /* 669 * Resolve any changes, then return to (and 670 * exit from) the main loop. 671 */ 672 goto endmatch; 673 } 674 } 675 676 /* 677 * Set the cursor to the last position changed, converting 678 * from 1-based to 0-based. 679 */ 680 sp->lno = lno; 681 sp->cno = match[0].rm_so; 682 683 /* Copy the bytes before the match into the build buffer. */ 684 BUILD(sp, s + offset, match[0].rm_so); 685 686 /* Substitute the matching bytes. */ 687 didsub = 1; 688 if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match)) 689 goto err; 690 691 /* Set the change flag so we know this line was modified. */ 692 linechanged = 1; 693 694 /* Move past the matched bytes. */ 695 skip: offset += match[0].rm_eo; 696 len -= match[0].rm_eo; 697 698 /* A match cannot be followed by an empty pattern. */ 699 empty_ok = 0; 700 701 /* 702 * If doing a global change with confirmation, we have to 703 * update the screen. The basic idea is to store the line 704 * so the screen update routines can find it, and restart. 705 */ 706 if (didsub && sp->c_suffix && sp->g_suffix) { 707 /* 708 * The new search offset will be the end of the 709 * modified line. 710 */ 711 saved_offset = lbclen; 712 713 /* Copy the rest of the line. */ 714 if (len) 715 BUILD(sp, s + offset, len) 716 717 /* Set the new offset. */ 718 offset = saved_offset; 719 720 /* Store inserted lines, adjusting the build buffer. */ 721 last = 0; 722 if (sp->newl_cnt) { 723 for (cnt = 0; 724 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 725 if (db_insert(sp, lno, 726 lb + last, sp->newl[cnt] - last)) 727 goto err; 728 last = sp->newl[cnt] + 1; 729 ++sp->rptlines[L_ADDED]; 730 } 731 lbclen -= last; 732 offset -= last; 733 sp->newl_cnt = 0; 734 } 735 736 /* Store and retrieve the line. */ 737 if (db_set(sp, lno, lb + last, lbclen)) 738 goto err; 739 if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 740 goto err; 741 ADD_SPACE_RET(sp, bp, blen, llen) 742 memcpy(bp, s, llen); 743 s = bp; 744 len = llen - offset; 745 746 /* Restart the build. */ 747 lbclen = 0; 748 BUILD(sp, s, offset); 749 750 /* 751 * If we haven't already done the after-the-string 752 * match, do one. Set REG_NOTEOL so the '$' pattern 753 * only matches once. 754 */ 755 if (!do_eol_match) 756 goto endmatch; 757 if (offset == len) { 758 do_eol_match = 0; 759 eflags |= REG_NOTEOL; 760 } 761 goto nextmatch; 762 } 763 764 /* 765 * If it's a global: 766 * 767 * If at the end of the string, do a test for the after 768 * the string match. Set REG_NOTEOL so the '$' pattern 769 * only matches once. 770 */ 771 if (sp->g_suffix && do_eol_match) { 772 if (len == 0) { 773 do_eol_match = 0; 774 eflags |= REG_NOTEOL; 775 } 776 goto nextmatch; 777 } 778 779 endmatch: if (!linechanged) 780 continue; 781 782 /* Copy any remaining bytes into the build buffer. */ 783 if (len) 784 BUILD(sp, s + offset, len) 785 786 /* Store inserted lines, adjusting the build buffer. */ 787 last = 0; 788 if (sp->newl_cnt) { 789 for (cnt = 0; 790 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 791 if (db_insert(sp, 792 lno, lb + last, sp->newl[cnt] - last)) 793 goto err; 794 last = sp->newl[cnt] + 1; 795 ++sp->rptlines[L_ADDED]; 796 } 797 lbclen -= last; 798 sp->newl_cnt = 0; 799 } 800 801 /* Store the changed line. */ 802 if (db_set(sp, lno, lb + last, lbclen)) 803 goto err; 804 805 /* Update changed line counter. */ 806 if (sp->rptlchange != lno) { 807 sp->rptlchange = lno; 808 ++sp->rptlines[L_CHANGED]; 809 } 810 811 /* 812 * !!! 813 * Display as necessary. Historic practice is to only 814 * display the last line of a line split into multiple 815 * lines. 816 */ 817 if (lflag || nflag || pflag) { 818 from.lno = to.lno = lno; 819 from.cno = to.cno = 0; 820 if (lflag) 821 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST); 822 if (nflag) 823 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH); 824 if (pflag) 825 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); 826 } 827 } 828 829 /* 830 * !!! 831 * Historically, vi attempted to leave the cursor at the same place if 832 * the substitution was done at the current cursor position. Otherwise 833 * it moved it to the first non-blank of the last line changed. There 834 * were some problems: for example, :s/$/foo/ with the cursor on the 835 * last character of the line left the cursor on the last character, or 836 * the & command with multiple occurrences of the matching string in the 837 * line usually left the cursor in a fairly random position. 838 * 839 * We try to do the same thing, with the exception that if the user is 840 * doing substitution with confirmation, we move to the last line about 841 * which the user was consulted, as opposed to the last line that they 842 * actually changed. This prevents a screen flash if the user doesn't 843 * change many of the possible lines. 844 */ 845 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { 846 sp->cno = 0; 847 (void)nonblank(sp, sp->lno, &sp->cno); 848 } 849 850 /* 851 * If not in a global command, and nothing matched, say so. 852 * Else, if none of the lines displayed, put something up. 853 */ 854 rval = 0; 855 if (!matched) { 856 if (!F_ISSET(sp, SC_EX_GLOBAL)) { 857 msgq(sp, M_ERR, "157|No match found"); 858 goto err; 859 } 860 } else if (!lflag && !nflag && !pflag) 861 F_SET(cmdp, E_AUTOPRINT); 862 863 if (0) { 864 err: rval = 1; 865 } 866 867 if (bp != NULL) 868 FREE_SPACE(sp, bp, blen); 869 if (lb != NULL) 870 free(lb); 871 return (rval); 872 } 873 874 /* 875 * re_compile -- 876 * Compile the RE. 877 * 878 * PUBLIC: int re_compile(SCR *, 879 * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int); 880 */ 881 int 882 re_compile(SCR *sp, char *ptrn, size_t plen, char **ptrnp, size_t *lenp, 883 regex_t *rep, u_int flags) 884 { 885 size_t len; 886 int reflags, replaced, rval; 887 char *p; 888 889 /* Set RE flags. */ 890 reflags = 0; 891 if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) { 892 if (O_ISSET(sp, O_EXTENDED)) 893 reflags |= REG_EXTENDED; 894 if (O_ISSET(sp, O_IGNORECASE)) 895 reflags |= REG_ICASE; 896 if (O_ISSET(sp, O_ICLOWER)) { 897 for (p = ptrn, len = plen; len > 0; ++p, --len) 898 if (isupper(*p)) 899 break; 900 if (len == 0) 901 reflags |= REG_ICASE; 902 } 903 } 904 905 /* If we're replacing a saved value, clear the old one. */ 906 if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { 907 regfree(&sp->re_c); 908 F_CLR(sp, SC_RE_SEARCH); 909 } 910 if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { 911 regfree(&sp->subre_c); 912 F_CLR(sp, SC_RE_SUBST); 913 } 914 915 /* 916 * If we're saving the string, it's a pattern we haven't seen before, 917 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for 918 * later recompilation. Free any previously saved value. 919 */ 920 if (ptrnp != NULL) { 921 if (LF_ISSET(RE_C_CSCOPE)) { 922 if (re_cscope_conv(sp, &ptrn, &plen, &replaced)) 923 return (1); 924 /* 925 * XXX 926 * Currently, the match-any-<blank> expression used in 927 * re_cscope_conv() requires extended RE's. This may 928 * not be right or safe. 929 */ 930 reflags |= REG_EXTENDED; 931 } else if (LF_ISSET(RE_C_TAG)) { 932 if (re_tag_conv(sp, &ptrn, &plen, &replaced)) 933 return (1); 934 } else 935 if (re_conv(sp, &ptrn, &plen, &replaced)) 936 return (1); 937 938 /* Discard previous pattern. */ 939 if (*ptrnp != NULL) { 940 free(*ptrnp); 941 *ptrnp = NULL; 942 } 943 if (lenp != NULL) 944 *lenp = plen; 945 946 /* 947 * Copy the string into allocated memory. 948 * 949 * XXX 950 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated 951 * for now. There's just no other solution. 952 */ 953 MALLOC(sp, *ptrnp, char *, plen + 1); 954 if (*ptrnp != NULL) { 955 memcpy(*ptrnp, ptrn, plen); 956 (*ptrnp)[plen] = '\0'; 957 } 958 959 /* Free up conversion-routine-allocated memory. */ 960 if (replaced) 961 FREE_SPACE(sp, ptrn, 0); 962 963 if (*ptrnp == NULL) 964 return (1); 965 966 ptrn = *ptrnp; 967 } 968 969 /* 970 * XXX 971 * Regcomp isn't 8-bit clean, so we just lost if the pattern 972 * contained a nul. Bummer! 973 */ 974 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { 975 if (!LF_ISSET(RE_C_SILENT)) 976 re_error(sp, rval, rep); 977 return (1); 978 } 979 980 if (LF_ISSET(RE_C_SEARCH)) 981 F_SET(sp, SC_RE_SEARCH); 982 if (LF_ISSET(RE_C_SUBST)) 983 F_SET(sp, SC_RE_SUBST); 984 985 return (0); 986 } 987 988 /* 989 * re_conv -- 990 * Convert vi's regular expressions into something that the 991 * the POSIX 1003.2 RE functions can handle. 992 * 993 * There are two conversions we make to make vi's RE's (specifically 994 * the global, search, and substitute patterns) work with POSIX RE's. 995 * We assume that \<ptrn\> does "word" searches, which is non-standard 996 * but supported by most regexp libraries.. 997 * 998 * 1: If O_MAGIC is not set, strip backslashes from the magic character 999 * set (.[*~) that have them, and add them to the ones that don't. 1000 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text 1001 * from the last substitute command's replacement string. If O_MAGIC 1002 * is set, it's the string "~". 1003 * 1004 * !!!/XXX 1005 * This doesn't exactly match the historic behavior of vi because we do 1006 * the ~ substitution before calling the RE engine, so magic characters 1007 * in the replacement string will be expanded by the RE engine, and they 1008 * weren't historically. It's a bug. 1009 */ 1010 static int 1011 re_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp) 1012 { 1013 size_t blen, len, needlen; 1014 int magic; 1015 char *bp, *p, *t; 1016 1017 /* 1018 * First pass through, we figure out how much space we'll need. 1019 * We do it in two passes, on the grounds that most of the time 1020 * the user is doing a search and won't have magic characters. 1021 * That way we can skip most of the memory allocation and copies. 1022 */ 1023 magic = 0; 1024 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) 1025 switch (*p) { 1026 case '\\': 1027 if (len > 1) { 1028 --len; 1029 switch (*++p) { 1030 case '~': 1031 if (!O_ISSET(sp, O_MAGIC)) { 1032 magic = 1; 1033 needlen += sp->repl_len; 1034 } 1035 break; 1036 case '.': 1037 case '[': 1038 case '*': 1039 if (!O_ISSET(sp, O_MAGIC)) { 1040 magic = 1; 1041 needlen += 1; 1042 } 1043 break; 1044 default: 1045 needlen += 2; 1046 } 1047 } else 1048 needlen += 1; 1049 break; 1050 case '~': 1051 if (O_ISSET(sp, O_MAGIC)) { 1052 magic = 1; 1053 needlen += sp->repl_len; 1054 } 1055 break; 1056 case '.': 1057 case '[': 1058 case '*': 1059 if (!O_ISSET(sp, O_MAGIC)) { 1060 magic = 1; 1061 needlen += 2; 1062 } 1063 break; 1064 default: 1065 needlen += 1; 1066 break; 1067 } 1068 1069 if (!magic) { 1070 *replacedp = 0; 1071 return (0); 1072 } 1073 1074 /* Get enough memory to hold the final pattern. */ 1075 *replacedp = 1; 1076 GET_SPACE_RET(sp, bp, blen, needlen); 1077 1078 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) 1079 switch (*p) { 1080 case '\\': 1081 if (len > 1) { 1082 --len; 1083 switch (*++p) { 1084 case '~': 1085 if (O_ISSET(sp, O_MAGIC)) 1086 *t++ = '~'; 1087 else { 1088 memcpy(t, 1089 sp->repl, sp->repl_len); 1090 t += sp->repl_len; 1091 } 1092 break; 1093 case '.': 1094 case '[': 1095 case '*': 1096 if (O_ISSET(sp, O_MAGIC)) 1097 *t++ = '\\'; 1098 *t++ = *p; 1099 break; 1100 default: 1101 *t++ = '\\'; 1102 *t++ = *p; 1103 } 1104 } else 1105 *t++ = '\\'; 1106 break; 1107 case '~': 1108 if (O_ISSET(sp, O_MAGIC)) { 1109 memcpy(t, sp->repl, sp->repl_len); 1110 t += sp->repl_len; 1111 } else 1112 *t++ = '~'; 1113 break; 1114 case '.': 1115 case '[': 1116 case '*': 1117 if (!O_ISSET(sp, O_MAGIC)) 1118 *t++ = '\\'; 1119 *t++ = *p; 1120 break; 1121 default: 1122 *t++ = *p; 1123 break; 1124 } 1125 1126 *ptrnp = bp; 1127 *plenp = t - bp; 1128 return (0); 1129 } 1130 1131 /* 1132 * re_tag_conv -- 1133 * Convert a tags search path into something that the POSIX 1134 * 1003.2 RE functions can handle. 1135 */ 1136 static int 1137 re_tag_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp) 1138 { 1139 size_t blen, len; 1140 int lastdollar; 1141 char *bp, *p, *t; 1142 1143 len = *plenp; 1144 1145 /* Max memory usage is 2 times the length of the string. */ 1146 *replacedp = 1; 1147 GET_SPACE_RET(sp, bp, blen, len * 2); 1148 1149 p = *ptrnp; 1150 t = bp; 1151 1152 /* If the last character is a '/' or '?', we just strip it. */ 1153 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) 1154 --len; 1155 1156 /* If the next-to-last or last character is a '$', it's magic. */ 1157 if (len > 0 && p[len - 1] == '$') { 1158 --len; 1159 lastdollar = 1; 1160 } else 1161 lastdollar = 0; 1162 1163 /* If the first character is a '/' or '?', we just strip it. */ 1164 if (len > 0 && (p[0] == '/' || p[0] == '?')) { 1165 ++p; 1166 --len; 1167 } 1168 1169 /* If the first or second character is a '^', it's magic. */ 1170 if (p[0] == '^') { 1171 *t++ = *p++; 1172 --len; 1173 } 1174 1175 /* 1176 * Escape every other magic character we can find, meanwhile stripping 1177 * the backslashes ctags inserts when escaping the search delimiter 1178 * characters. 1179 */ 1180 for (; len > 0; --len) { 1181 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { 1182 ++p; 1183 --len; 1184 } else if (strchr("^.[]$*", p[0])) 1185 *t++ = '\\'; 1186 *t++ = *p++; 1187 if (len == 0) 1188 break; 1189 } 1190 if (lastdollar) 1191 *t++ = '$'; 1192 1193 *ptrnp = bp; 1194 *plenp = t - bp; 1195 return (0); 1196 } 1197 1198 /* 1199 * re_cscope_conv -- 1200 * Convert a cscope search path into something that the POSIX 1201 * 1003.2 RE functions can handle. 1202 */ 1203 static int 1204 re_cscope_conv(SCR *sp, char **ptrnp, size_t *plenp, int *replacedp) 1205 { 1206 size_t blen, len, nspaces; 1207 char *bp, *p, *t; 1208 1209 /* 1210 * Each space in the source line printed by cscope represents an 1211 * arbitrary sequence of spaces, tabs, and comments. 1212 */ 1213 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*" 1214 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len) 1215 if (*p == ' ') 1216 ++nspaces; 1217 1218 /* 1219 * Allocate plenty of space: 1220 * the string, plus potential escaping characters; 1221 * nspaces + 2 copies of CSCOPE_RE_SPACE; 1222 * ^, $, nul terminator characters. 1223 */ 1224 *replacedp = 1; 1225 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3; 1226 GET_SPACE_RET(sp, bp, blen, len); 1227 1228 p = *ptrnp; 1229 t = bp; 1230 1231 *t++ = '^'; 1232 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 1233 t += sizeof(CSCOPE_RE_SPACE) - 1; 1234 1235 for (len = *plenp; len > 0; ++p, --len) 1236 if (*p == ' ') { 1237 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 1238 t += sizeof(CSCOPE_RE_SPACE) - 1; 1239 } else { 1240 if (strchr("\\^.[]$*+?()|{}", *p)) 1241 *t++ = '\\'; 1242 *t++ = *p; 1243 } 1244 1245 memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 1246 t += sizeof(CSCOPE_RE_SPACE) - 1; 1247 *t++ = '$'; 1248 1249 *ptrnp = bp; 1250 *plenp = t - bp; 1251 return (0); 1252 } 1253 1254 /* 1255 * re_error -- 1256 * Report a regular expression error. 1257 * 1258 * PUBLIC: void re_error(SCR *, int, regex_t *); 1259 */ 1260 void 1261 re_error(SCR *sp, int errcode, regex_t *preg) 1262 { 1263 size_t s; 1264 char *oe; 1265 1266 s = regerror(errcode, preg, "", 0); 1267 if ((oe = malloc(s)) == NULL) 1268 msgq(sp, M_SYSERR, NULL); 1269 else { 1270 (void)regerror(errcode, preg, oe, s); 1271 msgq(sp, M_ERR, "RE error: %s", oe); 1272 free(oe); 1273 } 1274 } 1275 1276 /* 1277 * re_sub -- 1278 * Do the substitution for a regular expression. 1279 */ 1280 static int 1281 re_sub(SCR *sp, char *ip, char **lbp, size_t *lbclenp, size_t *lblenp, 1282 regmatch_t match[10]) 1283 { 1284 enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; 1285 size_t lbclen, lblen; /* Local copies. */ 1286 size_t mlen; /* Match length. */ 1287 size_t rpl; /* Remaining replacement length. */ 1288 char *rp; /* Replacement pointer. */ 1289 int ch; 1290 int no; /* Match replacement offset. */ 1291 char *p, *t; /* Buffer pointers. */ 1292 char *lb; /* Local copies. */ 1293 1294 lb = *lbp; /* Get local copies. */ 1295 lbclen = *lbclenp; 1296 lblen = *lblenp; 1297 1298 /* 1299 * QUOTING NOTE: 1300 * 1301 * There are some special sequences that vi provides in the 1302 * replacement patterns. 1303 * & string the RE matched (\& if nomagic set) 1304 * \# n-th regular subexpression 1305 * \E end \U, \L conversion 1306 * \e end \U, \L conversion 1307 * \l convert the next character to lower-case 1308 * \L convert to lower-case, until \E, \e, or end of replacement 1309 * \u convert the next character to upper-case 1310 * \U convert to upper-case, until \E, \e, or end of replacement 1311 * 1312 * Otherwise, since this is the lowest level of replacement, discard 1313 * all escaping characters. This (hopefully) matches historic practice. 1314 */ 1315 #define OUTCH(ch, nltrans) { \ 1316 CHAR_T __ch = (ch); \ 1317 u_int __value = KEY_VAL(sp, __ch); \ 1318 if ((nltrans) && (__value == K_CR || __value == K_NL)) { \ 1319 NEEDNEWLINE(sp); \ 1320 sp->newl[sp->newl_cnt++] = lbclen; \ 1321 } else if (conv != C_NOTSET) { \ 1322 switch (conv) { \ 1323 case C_ONELOWER: \ 1324 conv = C_NOTSET; \ 1325 /* FALLTHROUGH */ \ 1326 case C_LOWER: \ 1327 if (isupper(__ch)) \ 1328 __ch = tolower(__ch); \ 1329 break; \ 1330 case C_ONEUPPER: \ 1331 conv = C_NOTSET; \ 1332 /* FALLTHROUGH */ \ 1333 case C_UPPER: \ 1334 if (islower(__ch)) \ 1335 __ch = toupper(__ch); \ 1336 break; \ 1337 default: \ 1338 abort(); \ 1339 } \ 1340 } \ 1341 NEEDSP(sp, 1, p); \ 1342 *p++ = __ch; \ 1343 ++lbclen; \ 1344 } 1345 conv = C_NOTSET; 1346 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { 1347 switch (ch = *rp++) { 1348 case '&': 1349 if (O_ISSET(sp, O_MAGIC)) { 1350 no = 0; 1351 goto subzero; 1352 } 1353 break; 1354 case '\\': 1355 if (rpl == 0) 1356 break; 1357 --rpl; 1358 switch (ch = *rp) { 1359 case '&': 1360 ++rp; 1361 if (!O_ISSET(sp, O_MAGIC)) { 1362 no = 0; 1363 goto subzero; 1364 } 1365 break; 1366 case '0': case '1': case '2': case '3': case '4': 1367 case '5': case '6': case '7': case '8': case '9': 1368 no = *rp++ - '0'; 1369 subzero: if (match[no].rm_so == -1 || 1370 match[no].rm_eo == -1) 1371 break; 1372 mlen = match[no].rm_eo - match[no].rm_so; 1373 for (t = ip + match[no].rm_so; mlen--; ++t) 1374 OUTCH(*t, 0); 1375 continue; 1376 case 'e': 1377 case 'E': 1378 ++rp; 1379 conv = C_NOTSET; 1380 continue; 1381 case 'l': 1382 ++rp; 1383 conv = C_ONELOWER; 1384 continue; 1385 case 'L': 1386 ++rp; 1387 conv = C_LOWER; 1388 continue; 1389 case 'u': 1390 ++rp; 1391 conv = C_ONEUPPER; 1392 continue; 1393 case 'U': 1394 ++rp; 1395 conv = C_UPPER; 1396 continue; 1397 default: 1398 ++rp; 1399 break; 1400 } 1401 } 1402 OUTCH(ch, 1); 1403 } 1404 1405 *lbp = lb; /* Update caller's information. */ 1406 *lbclenp = lbclen; 1407 *lblenp = lblen; 1408 return (0); 1409 } 1410