1 /* $OpenBSD: re_search.c,v 1.36 2021/04/22 19:50:55 lum Exp $ */ 2 3 /* This file is in the public domain. */ 4 5 /* 6 * regular expression search commands for Mg 7 * 8 * This file contains functions to implement several of gnuemacs's regular 9 * expression functions for Mg. Several of the routines below are just minor 10 * re-arrangements of Mg's non-regular expression search functions. Some of 11 * them are similar in structure to the original MicroEMACS, others are 12 * modifications of Rich Ellison's code. Peter Newton re-wrote about half of 13 * them from scratch. 14 */ 15 16 #ifdef REGEX 17 #include <sys/queue.h> 18 #include <sys/types.h> 19 #include <regex.h> 20 #include <signal.h> 21 #include <stdio.h> 22 #include <string.h> 23 24 #include "def.h" 25 #include "macro.h" 26 27 #define SRCH_BEGIN (0) /* search sub-codes */ 28 #define SRCH_FORW (-1) 29 #define SRCH_BACK (-2) 30 #define SRCH_NOPR (-3) 31 #define SRCH_ACCM (-4) 32 #define SRCH_MARK (-5) 33 34 #define RE_NMATCH 10 /* max number of matches */ 35 #define REPLEN 256 /* max length of replacement string */ 36 37 char re_pat[NPAT]; /* regex pattern */ 38 int re_srch_lastdir = SRCH_NOPR; /* last search flags */ 39 int casefoldsearch = TRUE; /* does search ignore case? */ 40 41 static int re_doreplace(RSIZE, char *); 42 static int re_forwsrch(void); 43 static int re_backsrch(void); 44 static int re_readpattern(char *); 45 static int killmatches(int); 46 static int countmatches(int); 47 48 /* 49 * Search forward. 50 * Get a search string from the user and search for it starting at ".". If 51 * found, move "." to just after the matched characters. display does all 52 * the hard stuff. If not found, it just prints a message. 53 */ 54 /* ARGSUSED */ 55 int 56 re_forwsearch(int f, int n) 57 { 58 int s; 59 60 if ((s = re_readpattern("RE Search")) != TRUE) 61 return (s); 62 if (re_forwsrch() == FALSE) { 63 dobeep(); 64 ewprintf("Search failed: \"%s\"", re_pat); 65 return (FALSE); 66 } 67 re_srch_lastdir = SRCH_FORW; 68 return (TRUE); 69 } 70 71 /* 72 * Reverse search. 73 * Get a search string from the user, and search, starting at "." 74 * and proceeding toward the front of the buffer. If found "." is left 75 * pointing at the first character of the pattern [the last character that 76 * was matched]. 77 */ 78 /* ARGSUSED */ 79 int 80 re_backsearch(int f, int n) 81 { 82 int s; 83 84 if ((s = re_readpattern("RE Search backward")) != TRUE) 85 return (s); 86 if (re_backsrch() == FALSE) { 87 dobeep(); 88 ewprintf("Search failed: \"%s\"", re_pat); 89 return (FALSE); 90 } 91 re_srch_lastdir = SRCH_BACK; 92 return (TRUE); 93 } 94 95 /* 96 * Search again, using the same search string and direction as the last search 97 * command. The direction has been saved in "srch_lastdir", so you know which 98 * way to go. 99 * 100 * XXX: This code has problems -- some incompatibility(?) with extend.c causes 101 * match to fail when it should not. 102 */ 103 /* ARGSUSED */ 104 int 105 re_searchagain(int f, int n) 106 { 107 if (re_srch_lastdir == SRCH_NOPR) { 108 dobeep(); 109 ewprintf("No last search"); 110 return (FALSE); 111 } 112 if (re_srch_lastdir == SRCH_FORW) { 113 if (re_forwsrch() == FALSE) { 114 dobeep(); 115 ewprintf("Search failed: \"%s\"", re_pat); 116 return (FALSE); 117 } 118 return (TRUE); 119 } 120 if (re_srch_lastdir == SRCH_BACK) 121 if (re_backsrch() == FALSE) { 122 dobeep(); 123 ewprintf("Search failed: \"%s\"", re_pat); 124 return (FALSE); 125 } 126 127 return (TRUE); 128 } 129 130 /* Compiled regex goes here-- changed only when new pattern read */ 131 static regex_t regex_buff; 132 static regmatch_t regex_match[RE_NMATCH]; 133 134 /* 135 * Re-Query Replace. 136 * Replace strings selectively. Does a search and replace operation. 137 */ 138 /* ARGSUSED */ 139 int 140 re_queryrepl(int f, int n) 141 { 142 int rcnt = 0; /* replacements made so far */ 143 int plen, s; /* length of found string */ 144 char news[NPAT]; /* replacement string */ 145 146 if ((s = re_readpattern("RE Query replace")) != TRUE) 147 return (s); 148 if (eread("Query replace %s with: ", news, NPAT, 149 EFNUL | EFNEW | EFCR, re_pat) == NULL) 150 return (ABORT); 151 ewprintf("Query replacing %s with %s:", re_pat, news); 152 153 /* 154 * Search forward repeatedly, checking each time whether to insert 155 * or not. The "!" case makes the check always true, so it gets put 156 * into a tighter loop for efficiency. 157 */ 158 while (re_forwsrch() == TRUE) { 159 retry: 160 update(CMODE); 161 switch (getkey(FALSE)) { 162 case ' ': 163 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 164 if (re_doreplace((RSIZE)plen, news) == FALSE) 165 return (FALSE); 166 rcnt++; 167 break; 168 169 case '.': 170 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 171 if (re_doreplace((RSIZE)plen, news) == FALSE) 172 return (FALSE); 173 rcnt++; 174 goto stopsearch; 175 176 case CCHR('G'): /* ^G */ 177 (void)ctrlg(FFRAND, 0); 178 goto stopsearch; 179 case CCHR('['): /* ESC */ 180 case '`': 181 goto stopsearch; 182 case '!': 183 do { 184 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 185 if (re_doreplace((RSIZE)plen, news) == FALSE) 186 return (FALSE); 187 rcnt++; 188 } while (re_forwsrch() == TRUE); 189 goto stopsearch; 190 191 case CCHR('?'): /* To not replace */ 192 break; 193 194 default: 195 ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit"); 196 goto retry; 197 } 198 } 199 200 stopsearch: 201 curwp->w_rflag |= WFFULL; 202 update(CMODE); 203 if (!inmacro) { 204 if (rcnt == 0) 205 ewprintf("(No replacements done)"); 206 else if (rcnt == 1) 207 ewprintf("(1 replacement done)"); 208 else 209 ewprintf("(%d replacements done)", rcnt); 210 } 211 return (TRUE); 212 } 213 214 int 215 re_repl(int f, int n) 216 { 217 int rcnt = 0; /* replacements made so far */ 218 int plen, s; /* length of found string */ 219 char news[NPAT]; /* replacement string */ 220 221 if ((s = re_readpattern("RE Replace")) != TRUE) 222 return (s); 223 if (eread("Replace %s with: ", news, NPAT, 224 EFNUL | EFNEW | EFCR, re_pat) == NULL) 225 return (ABORT); 226 227 while (re_forwsrch() == TRUE) { 228 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 229 if (re_doreplace((RSIZE)plen, news) == FALSE) 230 return (FALSE); 231 rcnt++; 232 } 233 234 curwp->w_rflag |= WFFULL; 235 update(CMODE); 236 if (!inmacro) 237 ewprintf("(%d replacement(s) done)", rcnt); 238 239 return(TRUE); 240 } 241 242 /* 243 * Routine re_doreplace calls lreplace to make replacements needed by 244 * re_query replace. Its reason for existence is to deal with \1, \2. etc. 245 * plen: length to remove 246 * st: replacement string 247 */ 248 static int 249 re_doreplace(RSIZE plen, char *st) 250 { 251 int j, k, s, more, num, state; 252 struct line *clp; 253 char repstr[REPLEN]; 254 255 clp = curwp->w_dotp; 256 more = TRUE; 257 j = 0; 258 state = 0; 259 num = 0; 260 261 /* The following FSA parses the replacement string */ 262 while (more) { 263 switch (state) { 264 case 0: 265 if (*st == '\\') { 266 st++; 267 state = 1; 268 } else if (*st == '\0') 269 more = FALSE; 270 else { 271 repstr[j] = *st; 272 j++; 273 if (j >= REPLEN) 274 return (FALSE); 275 st++; 276 } 277 break; 278 case 1: 279 if (*st >= '0' && *st <= '9') { 280 num = *st - '0'; 281 st++; 282 state = 2; 283 } else if (*st == '\0') 284 more = FALSE; 285 else { 286 repstr[j] = *st; 287 j++; 288 if (j >= REPLEN) 289 return (FALSE); 290 st++; 291 state = 0; 292 } 293 break; 294 case 2: 295 if (*st >= '0' && *st <= '9') { 296 num = 10 * num + *st - '0'; 297 st++; 298 } else { 299 if (num >= RE_NMATCH) 300 return (FALSE); 301 k = regex_match[num].rm_eo - regex_match[num].rm_so; 302 if (j + k >= REPLEN) 303 return (FALSE); 304 bcopy(&(clp->l_text[regex_match[num].rm_so]), 305 &repstr[j], k); 306 j += k; 307 if (*st == '\0') 308 more = FALSE; 309 if (*st == '\\') { 310 st++; 311 state = 1; 312 } else { 313 repstr[j] = *st; 314 j++; 315 if (j >= REPLEN) 316 return (FALSE); 317 st++; 318 state = 0; 319 } 320 } 321 break; 322 } /* switch (state) */ 323 } /* while (more) */ 324 325 repstr[j] = '\0'; 326 s = lreplace(plen, repstr); 327 return (s); 328 } 329 330 /* 331 * This routine does the real work of a forward search. The pattern is 332 * sitting in the external variable "pat". If found, dot is updated, the 333 * window system is notified of the change, and TRUE is returned. If the 334 * string isn't found, FALSE is returned. 335 */ 336 static int 337 re_forwsrch(void) 338 { 339 int re_flags, tbo, tdotline, error; 340 struct line *clp; 341 342 clp = curwp->w_dotp; 343 tbo = curwp->w_doto; 344 tdotline = curwp->w_dotline; 345 346 if (tbo == clp->l_used) 347 /* 348 * Don't start matching past end of line -- must move to 349 * beginning of next line, unless line is empty or at 350 * end of file. 351 */ 352 if (clp != curbp->b_headp && llength(clp) != 0) { 353 clp = lforw(clp); 354 tdotline++; 355 tbo = 0; 356 } 357 /* 358 * Note this loop does not process the last line, but this editor 359 * always makes the last line empty so this is good. 360 */ 361 while (clp != (curbp->b_headp)) { 362 re_flags = REG_STARTEND; 363 if (tbo != 0) 364 re_flags |= REG_NOTBOL; 365 regex_match[0].rm_so = tbo; 366 regex_match[0].rm_eo = llength(clp); 367 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 368 RE_NMATCH, regex_match, re_flags); 369 if (error != 0) { 370 clp = lforw(clp); 371 tdotline++; 372 tbo = 0; 373 } else { 374 curwp->w_doto = regex_match[0].rm_eo; 375 curwp->w_dotp = clp; 376 curwp->w_dotline = tdotline; 377 curwp->w_rflag |= WFMOVE; 378 return (TRUE); 379 } 380 } 381 return (FALSE); 382 } 383 384 /* 385 * This routine does the real work of a backward search. The pattern is sitting 386 * in the external variable "re_pat". If found, dot is updated, the window 387 * system is notified of the change, and TRUE is returned. If the string isn't 388 * found, FALSE is returned. 389 */ 390 static int 391 re_backsrch(void) 392 { 393 struct line *clp; 394 int tbo, tdotline; 395 regmatch_t lastmatch; 396 397 clp = curwp->w_dotp; 398 tbo = curwp->w_doto; 399 tdotline = curwp->w_dotline; 400 401 /* Start search one position to the left of dot */ 402 tbo = tbo - 1; 403 if (tbo < 0) { 404 /* must move up one line */ 405 clp = lback(clp); 406 tdotline--; 407 tbo = llength(clp); 408 } 409 410 /* 411 * Note this loop does not process the last line, but this editor 412 * always makes the last line empty so this is good. 413 */ 414 while (clp != (curbp->b_headp)) { 415 regex_match[0].rm_so = 0; 416 regex_match[0].rm_eo = llength(clp); 417 lastmatch.rm_so = -1; 418 /* 419 * Keep searching until we don't match any longer. Assumes a 420 * non-match does not modify the regex_match array. We have to 421 * do this character-by-character after the first match since 422 * POSIX regexps don't give you a way to do reverse matches. 423 */ 424 while (!regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 425 RE_NMATCH, regex_match, REG_STARTEND) && 426 regex_match[0].rm_so <= tbo) { 427 memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t)); 428 regex_match[0].rm_so++; 429 regex_match[0].rm_eo = llength(clp); 430 } 431 if (lastmatch.rm_so == -1) { 432 clp = lback(clp); 433 tdotline--; 434 tbo = llength(clp); 435 } else { 436 memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t)); 437 curwp->w_doto = regex_match[0].rm_so; 438 curwp->w_dotp = clp; 439 curwp->w_dotline = tdotline; 440 curwp->w_rflag |= WFMOVE; 441 return (TRUE); 442 } 443 } 444 return (FALSE); 445 } 446 447 /* 448 * Read a pattern. 449 * Stash it in the external variable "re_pat". The "pat" is 450 * not updated if the user types in an empty line. If the user typed 451 * an empty line, and there is no old pattern, it is an error. 452 * Display the old pattern, in the style of Jeff Lomicka. There is 453 * some do-it-yourself control expansion. 454 */ 455 static int 456 re_readpattern(char *re_prompt) 457 { 458 static int dofree = 0; 459 int flags, error, s; 460 char tpat[NPAT], *rep; 461 462 if (re_pat[0] == '\0') 463 rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt); 464 else 465 rep = eread("%s (default %s): ", tpat, NPAT, 466 EFNUL | EFNEW | EFCR, re_prompt, re_pat); 467 if (rep == NULL) 468 return (ABORT); 469 if (rep[0] != '\0') { 470 /* New pattern given */ 471 (void)strlcpy(re_pat, tpat, sizeof(re_pat)); 472 if (casefoldsearch) 473 flags = REG_EXTENDED | REG_ICASE; 474 else 475 flags = REG_EXTENDED; 476 if (dofree) 477 regfree(®ex_buff); 478 error = regcomp(®ex_buff, re_pat, flags); 479 if (error != 0) { 480 char message[256]; 481 regerror(error, ®ex_buff, message, sizeof(message)); 482 dobeep(); 483 ewprintf("Regex Error: %s", message); 484 re_pat[0] = '\0'; 485 return (FALSE); 486 } 487 dofree = 1; 488 s = TRUE; 489 } else if (rep[0] == '\0' && re_pat[0] != '\0') 490 /* Just using old pattern */ 491 s = TRUE; 492 else 493 s = FALSE; 494 return (s); 495 } 496 497 /* 498 * Cause case to not matter in searches. This is the default. If called 499 * with argument cause case to matter. 500 */ 501 /* ARGSUSED*/ 502 int 503 setcasefold(int f, int n) 504 { 505 if (f & FFARG) { 506 casefoldsearch = FALSE; 507 ewprintf("Case-fold-search unset"); 508 } else { 509 casefoldsearch = TRUE; 510 ewprintf("Case-fold-search set"); 511 } 512 513 /* 514 * Invalidate the regular expression pattern since I'm too lazy to 515 * recompile it. 516 */ 517 re_pat[0] = '\0'; 518 return (TRUE); 519 } 520 521 /* 522 * Delete all lines after dot that contain a string matching regex. 523 */ 524 /* ARGSUSED */ 525 int 526 delmatchlines(int f, int n) 527 { 528 int s; 529 530 if ((s = re_readpattern("Flush lines (containing match for regexp)")) 531 != TRUE) 532 return (s); 533 534 s = killmatches(TRUE); 535 return (s); 536 } 537 538 /* 539 * Delete all lines after dot that don't contain a string matching regex. 540 */ 541 /* ARGSUSED */ 542 int 543 delnonmatchlines(int f, int n) 544 { 545 int s; 546 547 if ((s = re_readpattern("Keep lines (containing match for regexp)")) 548 != TRUE) 549 return (s); 550 551 s = killmatches(FALSE); 552 return (s); 553 } 554 555 /* 556 * This function does the work of deleting matching lines. 557 */ 558 static int 559 killmatches(int cond) 560 { 561 int s, error; 562 int count = 0; 563 struct line *clp; 564 565 clp = curwp->w_dotp; 566 if (curwp->w_doto == llength(clp)) 567 /* Consider dot on next line */ 568 clp = lforw(clp); 569 570 while (clp != (curbp->b_headp)) { 571 /* see if line matches */ 572 regex_match[0].rm_so = 0; 573 regex_match[0].rm_eo = llength(clp); 574 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 575 RE_NMATCH, regex_match, REG_STARTEND); 576 577 /* Delete line when appropriate */ 578 if ((cond == FALSE && error) || (cond == TRUE && !error)) { 579 curwp->w_doto = 0; 580 curwp->w_dotp = clp; 581 count++; 582 s = ldelete(llength(clp) + 1, KNONE); 583 clp = curwp->w_dotp; 584 curwp->w_rflag |= WFMOVE; 585 if (s == FALSE) 586 return (FALSE); 587 } else 588 clp = lforw(clp); 589 } 590 591 ewprintf("%d line(s) deleted", count); 592 if (count > 0) 593 curwp->w_rflag |= WFMOVE; 594 595 return (TRUE); 596 } 597 598 /* 599 * Count lines matching regex. 600 */ 601 /* ARGSUSED */ 602 int 603 cntmatchlines(int f, int n) 604 { 605 int s; 606 607 if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE) 608 return (s); 609 s = countmatches(TRUE); 610 611 return (s); 612 } 613 614 /* 615 * Count lines that fail to match regex. 616 */ 617 /* ARGSUSED */ 618 int 619 cntnonmatchlines(int f, int n) 620 { 621 int s; 622 623 if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE) 624 return (s); 625 s = countmatches(FALSE); 626 627 return (s); 628 } 629 630 /* 631 * This function does the work of counting matching lines. 632 */ 633 int 634 countmatches(int cond) 635 { 636 int error; 637 int count = 0; 638 struct line *clp; 639 640 clp = curwp->w_dotp; 641 if (curwp->w_doto == llength(clp)) 642 /* Consider dot on next line */ 643 clp = lforw(clp); 644 645 while (clp != (curbp->b_headp)) { 646 /* see if line matches */ 647 regex_match[0].rm_so = 0; 648 regex_match[0].rm_eo = llength(clp); 649 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 650 RE_NMATCH, regex_match, REG_STARTEND); 651 652 /* Count line when appropriate */ 653 if ((cond == FALSE && error) || (cond == TRUE && !error)) 654 count++; 655 clp = lforw(clp); 656 } 657 658 if (cond) 659 ewprintf("Number of lines matching: %d", count); 660 else 661 ewprintf("Number of lines not matching: %d", count); 662 663 return (TRUE); 664 } 665 #endif /* REGEX */ 666