1 /* $OpenBSD: re_search.c,v 1.35 2020/07/22 13:29:05 tb Exp $ */ 2 3 /* This file is in the public domain. */ 4 5 /* 6 * regular expression search commands for Mg 7 * 8 * This file contains functions to implement several of gnuemacs's regular 9 * expression functions for Mg. Several of the routines below are just minor 10 * re-arrangements of Mg's non-regular expression search functions. Some of 11 * them are similar in structure to the original MicroEMACS, others are 12 * modifications of Rich Ellison's code. Peter Newton re-wrote about half of 13 * them from scratch. 14 */ 15 16 #ifdef REGEX 17 #include <sys/queue.h> 18 #include <sys/types.h> 19 #include <regex.h> 20 #include <signal.h> 21 #include <stdio.h> 22 #include <string.h> 23 24 #include "def.h" 25 #include "macro.h" 26 27 #define SRCH_BEGIN (0) /* search sub-codes */ 28 #define SRCH_FORW (-1) 29 #define SRCH_BACK (-2) 30 #define SRCH_NOPR (-3) 31 #define SRCH_ACCM (-4) 32 #define SRCH_MARK (-5) 33 34 #define RE_NMATCH 10 /* max number of matches */ 35 #define REPLEN 256 /* max length of replacement string */ 36 37 char re_pat[NPAT]; /* regex pattern */ 38 int re_srch_lastdir = SRCH_NOPR; /* last search flags */ 39 int casefoldsearch = TRUE; /* does search ignore case? */ 40 41 static int re_doreplace(RSIZE, char *); 42 static int re_forwsrch(void); 43 static int re_backsrch(void); 44 static int re_readpattern(char *); 45 static int killmatches(int); 46 static int countmatches(int); 47 48 /* 49 * Search forward. 50 * Get a search string from the user and search for it starting at ".". If 51 * found, move "." to just after the matched characters. display does all 52 * the hard stuff. If not found, it just prints a message. 53 */ 54 /* ARGSUSED */ 55 int 56 re_forwsearch(int f, int n) 57 { 58 int s; 59 60 if ((s = re_readpattern("RE Search")) != TRUE) 61 return (s); 62 if (re_forwsrch() == FALSE) { 63 dobeep(); 64 ewprintf("Search failed: \"%s\"", re_pat); 65 return (FALSE); 66 } 67 re_srch_lastdir = SRCH_FORW; 68 return (TRUE); 69 } 70 71 /* 72 * Reverse search. 73 * Get a search string from the user, and search, starting at "." 74 * and proceeding toward the front of the buffer. If found "." is left 75 * pointing at the first character of the pattern [the last character that 76 * was matched]. 77 */ 78 /* ARGSUSED */ 79 int 80 re_backsearch(int f, int n) 81 { 82 int s; 83 84 if ((s = re_readpattern("RE Search backward")) != TRUE) 85 return (s); 86 if (re_backsrch() == FALSE) { 87 dobeep(); 88 ewprintf("Search failed: \"%s\"", re_pat); 89 return (FALSE); 90 } 91 re_srch_lastdir = SRCH_BACK; 92 return (TRUE); 93 } 94 95 /* 96 * Search again, using the same search string and direction as the last search 97 * command. The direction has been saved in "srch_lastdir", so you know which 98 * way to go. 99 * 100 * XXX: This code has problems -- some incompatibility(?) with extend.c causes 101 * match to fail when it should not. 102 */ 103 /* ARGSUSED */ 104 int 105 re_searchagain(int f, int n) 106 { 107 if (re_srch_lastdir == SRCH_NOPR) { 108 dobeep(); 109 ewprintf("No last search"); 110 return (FALSE); 111 } 112 if (re_srch_lastdir == SRCH_FORW) { 113 if (re_forwsrch() == FALSE) { 114 dobeep(); 115 ewprintf("Search failed: \"%s\"", re_pat); 116 return (FALSE); 117 } 118 return (TRUE); 119 } 120 if (re_srch_lastdir == SRCH_BACK) 121 if (re_backsrch() == FALSE) { 122 dobeep(); 123 ewprintf("Search failed: \"%s\"", re_pat); 124 return (FALSE); 125 } 126 127 return (TRUE); 128 } 129 130 /* Compiled regex goes here-- changed only when new pattern read */ 131 static regex_t regex_buff; 132 static regmatch_t regex_match[RE_NMATCH]; 133 134 /* 135 * Re-Query Replace. 136 * Replace strings selectively. Does a search and replace operation. 137 */ 138 /* ARGSUSED */ 139 int 140 re_queryrepl(int f, int n) 141 { 142 int rcnt = 0; /* replacements made so far */ 143 int plen, s; /* length of found string */ 144 char news[NPAT]; /* replacement string */ 145 146 if ((s = re_readpattern("RE Query replace")) != TRUE) 147 return (s); 148 if (eread("Query replace %s with: ", news, NPAT, 149 EFNUL | EFNEW | EFCR, re_pat) == NULL) 150 return (ABORT); 151 ewprintf("Query replacing %s with %s:", re_pat, news); 152 153 /* 154 * Search forward repeatedly, checking each time whether to insert 155 * or not. The "!" case makes the check always true, so it gets put 156 * into a tighter loop for efficiency. 157 */ 158 while (re_forwsrch() == TRUE) { 159 retry: 160 update(CMODE); 161 switch (getkey(FALSE)) { 162 case ' ': 163 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 164 if (re_doreplace((RSIZE)plen, news) == FALSE) 165 return (FALSE); 166 rcnt++; 167 break; 168 169 case '.': 170 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 171 if (re_doreplace((RSIZE)plen, news) == FALSE) 172 return (FALSE); 173 rcnt++; 174 goto stopsearch; 175 176 case CCHR('G'): /* ^G */ 177 (void)ctrlg(FFRAND, 0); 178 goto stopsearch; 179 case CCHR('['): /* ESC */ 180 case '`': 181 goto stopsearch; 182 case '!': 183 do { 184 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 185 if (re_doreplace((RSIZE)plen, news) == FALSE) 186 return (FALSE); 187 rcnt++; 188 } while (re_forwsrch() == TRUE); 189 goto stopsearch; 190 191 case CCHR('?'): /* To not replace */ 192 break; 193 194 default: 195 ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit"); 196 goto retry; 197 } 198 } 199 200 stopsearch: 201 curwp->w_rflag |= WFFULL; 202 update(CMODE); 203 if (!inmacro) { 204 if (rcnt == 0) 205 ewprintf("(No replacements done)"); 206 else if (rcnt == 1) 207 ewprintf("(1 replacement done)"); 208 else 209 ewprintf("(%d replacements done)", rcnt); 210 } 211 return (TRUE); 212 } 213 214 /* 215 * Routine re_doreplace calls lreplace to make replacements needed by 216 * re_query replace. Its reason for existence is to deal with \1, \2. etc. 217 * plen: length to remove 218 * st: replacement string 219 */ 220 static int 221 re_doreplace(RSIZE plen, char *st) 222 { 223 int j, k, s, more, num, state; 224 struct line *clp; 225 char repstr[REPLEN]; 226 227 clp = curwp->w_dotp; 228 more = TRUE; 229 j = 0; 230 state = 0; 231 num = 0; 232 233 /* The following FSA parses the replacement string */ 234 while (more) { 235 switch (state) { 236 case 0: 237 if (*st == '\\') { 238 st++; 239 state = 1; 240 } else if (*st == '\0') 241 more = FALSE; 242 else { 243 repstr[j] = *st; 244 j++; 245 if (j >= REPLEN) 246 return (FALSE); 247 st++; 248 } 249 break; 250 case 1: 251 if (*st >= '0' && *st <= '9') { 252 num = *st - '0'; 253 st++; 254 state = 2; 255 } else if (*st == '\0') 256 more = FALSE; 257 else { 258 repstr[j] = *st; 259 j++; 260 if (j >= REPLEN) 261 return (FALSE); 262 st++; 263 state = 0; 264 } 265 break; 266 case 2: 267 if (*st >= '0' && *st <= '9') { 268 num = 10 * num + *st - '0'; 269 st++; 270 } else { 271 if (num >= RE_NMATCH) 272 return (FALSE); 273 k = regex_match[num].rm_eo - regex_match[num].rm_so; 274 if (j + k >= REPLEN) 275 return (FALSE); 276 bcopy(&(clp->l_text[regex_match[num].rm_so]), 277 &repstr[j], k); 278 j += k; 279 if (*st == '\0') 280 more = FALSE; 281 if (*st == '\\') { 282 st++; 283 state = 1; 284 } else { 285 repstr[j] = *st; 286 j++; 287 if (j >= REPLEN) 288 return (FALSE); 289 st++; 290 state = 0; 291 } 292 } 293 break; 294 } /* switch (state) */ 295 } /* while (more) */ 296 297 repstr[j] = '\0'; 298 s = lreplace(plen, repstr); 299 return (s); 300 } 301 302 /* 303 * This routine does the real work of a forward search. The pattern is 304 * sitting in the external variable "pat". If found, dot is updated, the 305 * window system is notified of the change, and TRUE is returned. If the 306 * string isn't found, FALSE is returned. 307 */ 308 static int 309 re_forwsrch(void) 310 { 311 int re_flags, tbo, tdotline, error; 312 struct line *clp; 313 314 clp = curwp->w_dotp; 315 tbo = curwp->w_doto; 316 tdotline = curwp->w_dotline; 317 318 if (tbo == clp->l_used) 319 /* 320 * Don't start matching past end of line -- must move to 321 * beginning of next line, unless line is empty or at 322 * end of file. 323 */ 324 if (clp != curbp->b_headp && llength(clp) != 0) { 325 clp = lforw(clp); 326 tdotline++; 327 tbo = 0; 328 } 329 /* 330 * Note this loop does not process the last line, but this editor 331 * always makes the last line empty so this is good. 332 */ 333 while (clp != (curbp->b_headp)) { 334 re_flags = REG_STARTEND; 335 if (tbo != 0) 336 re_flags |= REG_NOTBOL; 337 regex_match[0].rm_so = tbo; 338 regex_match[0].rm_eo = llength(clp); 339 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 340 RE_NMATCH, regex_match, re_flags); 341 if (error != 0) { 342 clp = lforw(clp); 343 tdotline++; 344 tbo = 0; 345 } else { 346 curwp->w_doto = regex_match[0].rm_eo; 347 curwp->w_dotp = clp; 348 curwp->w_dotline = tdotline; 349 curwp->w_rflag |= WFMOVE; 350 return (TRUE); 351 } 352 } 353 return (FALSE); 354 } 355 356 /* 357 * This routine does the real work of a backward search. The pattern is sitting 358 * in the external variable "re_pat". If found, dot is updated, the window 359 * system is notified of the change, and TRUE is returned. If the string isn't 360 * found, FALSE is returned. 361 */ 362 static int 363 re_backsrch(void) 364 { 365 struct line *clp; 366 int tbo, tdotline; 367 regmatch_t lastmatch; 368 369 clp = curwp->w_dotp; 370 tbo = curwp->w_doto; 371 tdotline = curwp->w_dotline; 372 373 /* Start search one position to the left of dot */ 374 tbo = tbo - 1; 375 if (tbo < 0) { 376 /* must move up one line */ 377 clp = lback(clp); 378 tdotline--; 379 tbo = llength(clp); 380 } 381 382 /* 383 * Note this loop does not process the last line, but this editor 384 * always makes the last line empty so this is good. 385 */ 386 while (clp != (curbp->b_headp)) { 387 regex_match[0].rm_so = 0; 388 regex_match[0].rm_eo = llength(clp); 389 lastmatch.rm_so = -1; 390 /* 391 * Keep searching until we don't match any longer. Assumes a 392 * non-match does not modify the regex_match array. We have to 393 * do this character-by-character after the first match since 394 * POSIX regexps don't give you a way to do reverse matches. 395 */ 396 while (!regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 397 RE_NMATCH, regex_match, REG_STARTEND) && 398 regex_match[0].rm_so <= tbo) { 399 memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t)); 400 regex_match[0].rm_so++; 401 regex_match[0].rm_eo = llength(clp); 402 } 403 if (lastmatch.rm_so == -1) { 404 clp = lback(clp); 405 tdotline--; 406 tbo = llength(clp); 407 } else { 408 memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t)); 409 curwp->w_doto = regex_match[0].rm_so; 410 curwp->w_dotp = clp; 411 curwp->w_dotline = tdotline; 412 curwp->w_rflag |= WFMOVE; 413 return (TRUE); 414 } 415 } 416 return (FALSE); 417 } 418 419 /* 420 * Read a pattern. 421 * Stash it in the external variable "re_pat". The "pat" is 422 * not updated if the user types in an empty line. If the user typed 423 * an empty line, and there is no old pattern, it is an error. 424 * Display the old pattern, in the style of Jeff Lomicka. There is 425 * some do-it-yourself control expansion. 426 */ 427 static int 428 re_readpattern(char *re_prompt) 429 { 430 static int dofree = 0; 431 int flags, error, s; 432 char tpat[NPAT], *rep; 433 434 if (re_pat[0] == '\0') 435 rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt); 436 else 437 rep = eread("%s (default %s): ", tpat, NPAT, 438 EFNUL | EFNEW | EFCR, re_prompt, re_pat); 439 if (rep == NULL) 440 return (ABORT); 441 if (rep[0] != '\0') { 442 /* New pattern given */ 443 (void)strlcpy(re_pat, tpat, sizeof(re_pat)); 444 if (casefoldsearch) 445 flags = REG_EXTENDED | REG_ICASE; 446 else 447 flags = REG_EXTENDED; 448 if (dofree) 449 regfree(®ex_buff); 450 error = regcomp(®ex_buff, re_pat, flags); 451 if (error != 0) { 452 char message[256]; 453 regerror(error, ®ex_buff, message, sizeof(message)); 454 dobeep(); 455 ewprintf("Regex Error: %s", message); 456 re_pat[0] = '\0'; 457 return (FALSE); 458 } 459 dofree = 1; 460 s = TRUE; 461 } else if (rep[0] == '\0' && re_pat[0] != '\0') 462 /* Just using old pattern */ 463 s = TRUE; 464 else 465 s = FALSE; 466 return (s); 467 } 468 469 /* 470 * Cause case to not matter in searches. This is the default. If called 471 * with argument cause case to matter. 472 */ 473 /* ARGSUSED*/ 474 int 475 setcasefold(int f, int n) 476 { 477 if (f & FFARG) { 478 casefoldsearch = FALSE; 479 ewprintf("Case-fold-search unset"); 480 } else { 481 casefoldsearch = TRUE; 482 ewprintf("Case-fold-search set"); 483 } 484 485 /* 486 * Invalidate the regular expression pattern since I'm too lazy to 487 * recompile it. 488 */ 489 re_pat[0] = '\0'; 490 return (TRUE); 491 } 492 493 /* 494 * Delete all lines after dot that contain a string matching regex. 495 */ 496 /* ARGSUSED */ 497 int 498 delmatchlines(int f, int n) 499 { 500 int s; 501 502 if ((s = re_readpattern("Flush lines (containing match for regexp)")) 503 != TRUE) 504 return (s); 505 506 s = killmatches(TRUE); 507 return (s); 508 } 509 510 /* 511 * Delete all lines after dot that don't contain a string matching regex. 512 */ 513 /* ARGSUSED */ 514 int 515 delnonmatchlines(int f, int n) 516 { 517 int s; 518 519 if ((s = re_readpattern("Keep lines (containing match for regexp)")) 520 != TRUE) 521 return (s); 522 523 s = killmatches(FALSE); 524 return (s); 525 } 526 527 /* 528 * This function does the work of deleting matching lines. 529 */ 530 static int 531 killmatches(int cond) 532 { 533 int s, error; 534 int count = 0; 535 struct line *clp; 536 537 clp = curwp->w_dotp; 538 if (curwp->w_doto == llength(clp)) 539 /* Consider dot on next line */ 540 clp = lforw(clp); 541 542 while (clp != (curbp->b_headp)) { 543 /* see if line matches */ 544 regex_match[0].rm_so = 0; 545 regex_match[0].rm_eo = llength(clp); 546 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 547 RE_NMATCH, regex_match, REG_STARTEND); 548 549 /* Delete line when appropriate */ 550 if ((cond == FALSE && error) || (cond == TRUE && !error)) { 551 curwp->w_doto = 0; 552 curwp->w_dotp = clp; 553 count++; 554 s = ldelete(llength(clp) + 1, KNONE); 555 clp = curwp->w_dotp; 556 curwp->w_rflag |= WFMOVE; 557 if (s == FALSE) 558 return (FALSE); 559 } else 560 clp = lforw(clp); 561 } 562 563 ewprintf("%d line(s) deleted", count); 564 if (count > 0) 565 curwp->w_rflag |= WFMOVE; 566 567 return (TRUE); 568 } 569 570 /* 571 * Count lines matching regex. 572 */ 573 /* ARGSUSED */ 574 int 575 cntmatchlines(int f, int n) 576 { 577 int s; 578 579 if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE) 580 return (s); 581 s = countmatches(TRUE); 582 583 return (s); 584 } 585 586 /* 587 * Count lines that fail to match regex. 588 */ 589 /* ARGSUSED */ 590 int 591 cntnonmatchlines(int f, int n) 592 { 593 int s; 594 595 if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE) 596 return (s); 597 s = countmatches(FALSE); 598 599 return (s); 600 } 601 602 /* 603 * This function does the work of counting matching lines. 604 */ 605 int 606 countmatches(int cond) 607 { 608 int error; 609 int count = 0; 610 struct line *clp; 611 612 clp = curwp->w_dotp; 613 if (curwp->w_doto == llength(clp)) 614 /* Consider dot on next line */ 615 clp = lforw(clp); 616 617 while (clp != (curbp->b_headp)) { 618 /* see if line matches */ 619 regex_match[0].rm_so = 0; 620 regex_match[0].rm_eo = llength(clp); 621 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 622 RE_NMATCH, regex_match, REG_STARTEND); 623 624 /* Count line when appropriate */ 625 if ((cond == FALSE && error) || (cond == TRUE && !error)) 626 count++; 627 clp = lforw(clp); 628 } 629 630 if (cond) 631 ewprintf("Number of lines matching: %d", count); 632 else 633 ewprintf("Number of lines not matching: %d", count); 634 635 return (TRUE); 636 } 637 #endif /* REGEX */ 638