1 /* $OpenBSD: re_search.c,v 1.32 2017/05/30 07:05:22 florian Exp $ */ 2 3 /* This file is in the public domain. */ 4 5 /* 6 * regular expression search commands for Mg 7 * 8 * This file contains functions to implement several of gnuemacs's regular 9 * expression functions for Mg. Several of the routines below are just minor 10 * re-arrangements of Mg's non-regular expression search functions. Some of 11 * them are similar in structure to the original MicroEMACS, others are 12 * modifications of Rich Ellison's code. Peter Newton re-wrote about half of 13 * them from scratch. 14 */ 15 16 #ifdef REGEX 17 #include <sys/queue.h> 18 #include <sys/types.h> 19 #include <regex.h> 20 #include <signal.h> 21 #include <stdio.h> 22 #include <string.h> 23 24 #include "def.h" 25 #include "macro.h" 26 27 #define SRCH_BEGIN (0) /* search sub-codes */ 28 #define SRCH_FORW (-1) 29 #define SRCH_BACK (-2) 30 #define SRCH_NOPR (-3) 31 #define SRCH_ACCM (-4) 32 #define SRCH_MARK (-5) 33 34 #define RE_NMATCH 10 /* max number of matches */ 35 #define REPLEN 256 /* max length of replacement string */ 36 37 char re_pat[NPAT]; /* regex pattern */ 38 int re_srch_lastdir = SRCH_NOPR; /* last search flags */ 39 int casefoldsearch = TRUE; /* does search ignore case? */ 40 41 static int re_doreplace(RSIZE, char *); 42 static int re_forwsrch(void); 43 static int re_backsrch(void); 44 static int re_readpattern(char *); 45 static int killmatches(int); 46 static int countmatches(int); 47 48 /* 49 * Search forward. 50 * Get a search string from the user and search for it starting at ".". If 51 * found, move "." to just after the matched characters. display does all 52 * the hard stuff. If not found, it just prints a message. 53 */ 54 /* ARGSUSED */ 55 int 56 re_forwsearch(int f, int n) 57 { 58 int s; 59 60 if ((s = re_readpattern("RE Search")) != TRUE) 61 return (s); 62 if (re_forwsrch() == FALSE) { 63 dobeep(); 64 ewprintf("Search failed: \"%s\"", re_pat); 65 return (FALSE); 66 } 67 re_srch_lastdir = SRCH_FORW; 68 return (TRUE); 69 } 70 71 /* 72 * Reverse search. 73 * Get a search string from the user, and search, starting at "." 74 * and proceeding toward the front of the buffer. If found "." is left 75 * pointing at the first character of the pattern [the last character that 76 * was matched]. 77 */ 78 /* ARGSUSED */ 79 int 80 re_backsearch(int f, int n) 81 { 82 int s; 83 84 if ((s = re_readpattern("RE Search backward")) != TRUE) 85 return (s); 86 if (re_backsrch() == FALSE) { 87 dobeep(); 88 ewprintf("Search failed: \"%s\"", re_pat); 89 return (FALSE); 90 } 91 re_srch_lastdir = SRCH_BACK; 92 return (TRUE); 93 } 94 95 /* 96 * Search again, using the same search string and direction as the last search 97 * command. The direction has been saved in "srch_lastdir", so you know which 98 * way to go. 99 * 100 * XXX: This code has problems -- some incompatibility(?) with extend.c causes 101 * match to fail when it should not. 102 */ 103 /* ARGSUSED */ 104 int 105 re_searchagain(int f, int n) 106 { 107 if (re_srch_lastdir == SRCH_NOPR) { 108 dobeep(); 109 ewprintf("No last search"); 110 return (FALSE); 111 } 112 if (re_srch_lastdir == SRCH_FORW) { 113 if (re_forwsrch() == FALSE) { 114 dobeep(); 115 ewprintf("Search failed: \"%s\"", re_pat); 116 return (FALSE); 117 } 118 return (TRUE); 119 } 120 if (re_srch_lastdir == SRCH_BACK) 121 if (re_backsrch() == FALSE) { 122 dobeep(); 123 ewprintf("Search failed: \"%s\"", re_pat); 124 return (FALSE); 125 } 126 127 return (TRUE); 128 } 129 130 /* Compiled regex goes here-- changed only when new pattern read */ 131 static regex_t regex_buff; 132 static regmatch_t regex_match[RE_NMATCH]; 133 134 /* 135 * Re-Query Replace. 136 * Replace strings selectively. Does a search and replace operation. 137 */ 138 /* ARGSUSED */ 139 int 140 re_queryrepl(int f, int n) 141 { 142 int rcnt = 0; /* replacements made so far */ 143 int plen, s; /* length of found string */ 144 char news[NPAT]; /* replacement string */ 145 146 if ((s = re_readpattern("RE Query replace")) != TRUE) 147 return (s); 148 if (eread("Query replace %s with: ", news, NPAT, 149 EFNUL | EFNEW | EFCR, re_pat) == NULL) 150 return (ABORT); 151 ewprintf("Query replacing %s with %s:", re_pat, news); 152 153 /* 154 * Search forward repeatedly, checking each time whether to insert 155 * or not. The "!" case makes the check always true, so it gets put 156 * into a tighter loop for efficiency. 157 */ 158 while (re_forwsrch() == TRUE) { 159 retry: 160 update(CMODE); 161 switch (getkey(FALSE)) { 162 case ' ': 163 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 164 if (re_doreplace((RSIZE)plen, news) == FALSE) 165 return (FALSE); 166 rcnt++; 167 break; 168 169 case '.': 170 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 171 if (re_doreplace((RSIZE)plen, news) == FALSE) 172 return (FALSE); 173 rcnt++; 174 goto stopsearch; 175 176 case CCHR('G'): /* ^G */ 177 (void)ctrlg(FFRAND, 0); 178 goto stopsearch; 179 case CCHR('['): /* ESC */ 180 case '`': 181 goto stopsearch; 182 case '!': 183 do { 184 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 185 if (re_doreplace((RSIZE)plen, news) == FALSE) 186 return (FALSE); 187 rcnt++; 188 } while (re_forwsrch() == TRUE); 189 goto stopsearch; 190 191 case CCHR('?'): /* To not replace */ 192 break; 193 194 default: 195 ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit"); 196 goto retry; 197 } 198 } 199 200 stopsearch: 201 curwp->w_rflag |= WFFULL; 202 update(CMODE); 203 if (!inmacro) { 204 if (rcnt == 0) 205 ewprintf("(No replacements done)"); 206 else if (rcnt == 1) 207 ewprintf("(1 replacement done)"); 208 else 209 ewprintf("(%d replacements done)", rcnt); 210 } 211 return (TRUE); 212 } 213 214 /* 215 * Routine re_doreplace calls lreplace to make replacements needed by 216 * re_query replace. Its reason for existence is to deal with \1, \2. etc. 217 * plen: length to remove 218 * st: replacement string 219 */ 220 static int 221 re_doreplace(RSIZE plen, char *st) 222 { 223 int j, k, s, more, num, state; 224 struct line *clp; 225 char repstr[REPLEN]; 226 227 clp = curwp->w_dotp; 228 more = TRUE; 229 j = 0; 230 state = 0; 231 num = 0; 232 233 /* The following FSA parses the replacement string */ 234 while (more) { 235 switch (state) { 236 case 0: 237 if (*st == '\\') { 238 st++; 239 state = 1; 240 } else if (*st == '\0') 241 more = FALSE; 242 else { 243 repstr[j] = *st; 244 j++; 245 if (j >= REPLEN) 246 return (FALSE); 247 st++; 248 } 249 break; 250 case 1: 251 if (*st >= '0' && *st <= '9') { 252 num = *st - '0'; 253 st++; 254 state = 2; 255 } else if (*st == '\0') 256 more = FALSE; 257 else { 258 repstr[j] = *st; 259 j++; 260 if (j >= REPLEN) 261 return (FALSE); 262 st++; 263 state = 0; 264 } 265 break; 266 case 2: 267 if (*st >= '0' && *st <= '9') { 268 num = 10 * num + *st - '0'; 269 st++; 270 } else { 271 if (num >= RE_NMATCH) 272 return (FALSE); 273 k = regex_match[num].rm_eo - regex_match[num].rm_so; 274 if (j + k >= REPLEN) 275 return (FALSE); 276 bcopy(&(clp->l_text[regex_match[num].rm_so]), 277 &repstr[j], k); 278 j += k; 279 if (*st == '\0') 280 more = FALSE; 281 if (*st == '\\') { 282 st++; 283 state = 1; 284 } else { 285 repstr[j] = *st; 286 j++; 287 if (j >= REPLEN) 288 return (FALSE); 289 st++; 290 state = 0; 291 } 292 } 293 break; 294 } /* switch (state) */ 295 } /* while (more) */ 296 297 repstr[j] = '\0'; 298 s = lreplace(plen, repstr); 299 return (s); 300 } 301 302 /* 303 * This routine does the real work of a forward search. The pattern is 304 * sitting in the external variable "pat". If found, dot is updated, the 305 * window system is notified of the change, and TRUE is returned. If the 306 * string isn't found, FALSE is returned. 307 */ 308 static int 309 re_forwsrch(void) 310 { 311 int tbo, tdotline, error; 312 struct line *clp; 313 314 clp = curwp->w_dotp; 315 tbo = curwp->w_doto; 316 tdotline = curwp->w_dotline; 317 318 if (tbo == clp->l_used) 319 /* 320 * Don't start matching past end of line -- must move to 321 * beginning of next line, unless at end of file. 322 */ 323 if (clp != curbp->b_headp) { 324 clp = lforw(clp); 325 tdotline++; 326 tbo = 0; 327 } 328 /* 329 * Note this loop does not process the last line, but this editor 330 * always makes the last line empty so this is good. 331 */ 332 while (clp != (curbp->b_headp)) { 333 regex_match[0].rm_so = tbo; 334 regex_match[0].rm_eo = llength(clp); 335 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 336 REG_STARTEND); 337 if (error != 0) { 338 clp = lforw(clp); 339 tdotline++; 340 tbo = 0; 341 } else { 342 curwp->w_doto = regex_match[0].rm_eo; 343 curwp->w_dotp = clp; 344 curwp->w_dotline = tdotline; 345 curwp->w_rflag |= WFMOVE; 346 return (TRUE); 347 } 348 } 349 return (FALSE); 350 } 351 352 /* 353 * This routine does the real work of a backward search. The pattern is sitting 354 * in the external variable "re_pat". If found, dot is updated, the window 355 * system is notified of the change, and TRUE is returned. If the string isn't 356 * found, FALSE is returned. 357 */ 358 static int 359 re_backsrch(void) 360 { 361 struct line *clp; 362 int tbo, tdotline; 363 regmatch_t lastmatch; 364 365 clp = curwp->w_dotp; 366 tbo = curwp->w_doto; 367 tdotline = curwp->w_dotline; 368 369 /* Start search one position to the left of dot */ 370 tbo = tbo - 1; 371 if (tbo < 0) { 372 /* must move up one line */ 373 clp = lback(clp); 374 tdotline--; 375 tbo = llength(clp); 376 } 377 378 /* 379 * Note this loop does not process the last line, but this editor 380 * always makes the last line empty so this is good. 381 */ 382 while (clp != (curbp->b_headp)) { 383 regex_match[0].rm_so = 0; 384 regex_match[0].rm_eo = llength(clp); 385 lastmatch.rm_so = -1; 386 /* 387 * Keep searching until we don't match any longer. Assumes a 388 * non-match does not modify the regex_match array. We have to 389 * do this character-by-character after the first match since 390 * POSIX regexps don't give you a way to do reverse matches. 391 */ 392 while (!regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 393 REG_STARTEND) && regex_match[0].rm_so < tbo) { 394 memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t)); 395 regex_match[0].rm_so++; 396 regex_match[0].rm_eo = llength(clp); 397 } 398 if (lastmatch.rm_so == -1) { 399 clp = lback(clp); 400 tdotline--; 401 tbo = llength(clp); 402 } else { 403 memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t)); 404 curwp->w_doto = regex_match[0].rm_so; 405 curwp->w_dotp = clp; 406 curwp->w_dotline = tdotline; 407 curwp->w_rflag |= WFMOVE; 408 return (TRUE); 409 } 410 } 411 return (FALSE); 412 } 413 414 /* 415 * Read a pattern. 416 * Stash it in the external variable "re_pat". The "pat" is 417 * not updated if the user types in an empty line. If the user typed 418 * an empty line, and there is no old pattern, it is an error. 419 * Display the old pattern, in the style of Jeff Lomicka. There is 420 * some do-it-yourself control expansion. 421 */ 422 static int 423 re_readpattern(char *re_prompt) 424 { 425 static int dofree = 0; 426 int flags, error, s; 427 char tpat[NPAT], *rep; 428 429 if (re_pat[0] == '\0') 430 rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt); 431 else 432 rep = eread("%s: (default %s) ", tpat, NPAT, 433 EFNUL | EFNEW | EFCR, re_prompt, re_pat); 434 if (rep == NULL) 435 return (ABORT); 436 if (rep[0] != '\0') { 437 /* New pattern given */ 438 (void)strlcpy(re_pat, tpat, sizeof(re_pat)); 439 if (casefoldsearch) 440 flags = REG_EXTENDED | REG_ICASE; 441 else 442 flags = REG_EXTENDED; 443 if (dofree) 444 regfree(®ex_buff); 445 error = regcomp(®ex_buff, re_pat, flags); 446 if (error != 0) { 447 char message[256]; 448 regerror(error, ®ex_buff, message, sizeof(message)); 449 dobeep(); 450 ewprintf("Regex Error: %s", message); 451 re_pat[0] = '\0'; 452 return (FALSE); 453 } 454 dofree = 1; 455 s = TRUE; 456 } else if (rep[0] == '\0' && re_pat[0] != '\0') 457 /* Just using old pattern */ 458 s = TRUE; 459 else 460 s = FALSE; 461 return (s); 462 } 463 464 /* 465 * Cause case to not matter in searches. This is the default. If called 466 * with argument cause case to matter. 467 */ 468 /* ARGSUSED*/ 469 int 470 setcasefold(int f, int n) 471 { 472 if (f & FFARG) { 473 casefoldsearch = FALSE; 474 ewprintf("Case-fold-search unset"); 475 } else { 476 casefoldsearch = TRUE; 477 ewprintf("Case-fold-search set"); 478 } 479 480 /* 481 * Invalidate the regular expression pattern since I'm too lazy to 482 * recompile it. 483 */ 484 re_pat[0] = '\0'; 485 return (TRUE); 486 } 487 488 /* 489 * Delete all lines after dot that contain a string matching regex. 490 */ 491 /* ARGSUSED */ 492 int 493 delmatchlines(int f, int n) 494 { 495 int s; 496 497 if ((s = re_readpattern("Flush lines (containing match for regexp)")) 498 != TRUE) 499 return (s); 500 501 s = killmatches(TRUE); 502 return (s); 503 } 504 505 /* 506 * Delete all lines after dot that don't contain a string matching regex. 507 */ 508 /* ARGSUSED */ 509 int 510 delnonmatchlines(int f, int n) 511 { 512 int s; 513 514 if ((s = re_readpattern("Keep lines (containing match for regexp)")) 515 != TRUE) 516 return (s); 517 518 s = killmatches(FALSE); 519 return (s); 520 } 521 522 /* 523 * This function does the work of deleting matching lines. 524 */ 525 static int 526 killmatches(int cond) 527 { 528 int s, error; 529 int count = 0; 530 struct line *clp; 531 532 clp = curwp->w_dotp; 533 if (curwp->w_doto == llength(clp)) 534 /* Consider dot on next line */ 535 clp = lforw(clp); 536 537 while (clp != (curbp->b_headp)) { 538 /* see if line matches */ 539 regex_match[0].rm_so = 0; 540 regex_match[0].rm_eo = llength(clp); 541 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 542 REG_STARTEND); 543 544 /* Delete line when appropriate */ 545 if ((cond == FALSE && error) || (cond == TRUE && !error)) { 546 curwp->w_doto = 0; 547 curwp->w_dotp = clp; 548 count++; 549 s = ldelete(llength(clp) + 1, KNONE); 550 clp = curwp->w_dotp; 551 curwp->w_rflag |= WFMOVE; 552 if (s == FALSE) 553 return (FALSE); 554 } else 555 clp = lforw(clp); 556 } 557 558 ewprintf("%d line(s) deleted", count); 559 if (count > 0) 560 curwp->w_rflag |= WFMOVE; 561 562 return (TRUE); 563 } 564 565 /* 566 * Count lines matching regex. 567 */ 568 /* ARGSUSED */ 569 int 570 cntmatchlines(int f, int n) 571 { 572 int s; 573 574 if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE) 575 return (s); 576 s = countmatches(TRUE); 577 578 return (s); 579 } 580 581 /* 582 * Count lines that fail to match regex. 583 */ 584 /* ARGSUSED */ 585 int 586 cntnonmatchlines(int f, int n) 587 { 588 int s; 589 590 if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE) 591 return (s); 592 s = countmatches(FALSE); 593 594 return (s); 595 } 596 597 /* 598 * This function does the work of counting matching lines. 599 */ 600 int 601 countmatches(int cond) 602 { 603 int error; 604 int count = 0; 605 struct line *clp; 606 607 clp = curwp->w_dotp; 608 if (curwp->w_doto == llength(clp)) 609 /* Consider dot on next line */ 610 clp = lforw(clp); 611 612 while (clp != (curbp->b_headp)) { 613 /* see if line matches */ 614 regex_match[0].rm_so = 0; 615 regex_match[0].rm_eo = llength(clp); 616 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 617 REG_STARTEND); 618 619 /* Count line when appropriate */ 620 if ((cond == FALSE && error) || (cond == TRUE && !error)) 621 count++; 622 clp = lforw(clp); 623 } 624 625 if (cond) 626 ewprintf("Number of lines matching: %d", count); 627 else 628 ewprintf("Number of lines not matching: %d", count); 629 630 return (TRUE); 631 } 632 #endif /* REGEX */ 633