1 /* $OpenBSD: re_search.c,v 1.29 2013/12/20 18:44:13 florian Exp $ */ 2 3 /* This file is in the public domain. */ 4 5 /* 6 * regular expression search commands for Mg 7 * 8 * This file contains functions to implement several of gnuemacs's regular 9 * expression functions for Mg. Several of the routines below are just minor 10 * re-arrangements of Mg's non-regular expression search functions. Some of 11 * them are similar in structure to the original MicroEMACS, others are 12 * modifications of Rich Ellison's code. Peter Newton re-wrote about half of 13 * them from scratch. 14 */ 15 16 #ifdef REGEX 17 #include "def.h" 18 19 #include <sys/types.h> 20 #include <regex.h> 21 22 #include "macro.h" 23 24 #define SRCH_BEGIN (0) /* search sub-codes */ 25 #define SRCH_FORW (-1) 26 #define SRCH_BACK (-2) 27 #define SRCH_NOPR (-3) 28 #define SRCH_ACCM (-4) 29 #define SRCH_MARK (-5) 30 31 #define RE_NMATCH 10 /* max number of matches */ 32 #define REPLEN 256 /* max length of replacement string */ 33 34 char re_pat[NPAT]; /* regex pattern */ 35 int re_srch_lastdir = SRCH_NOPR; /* last search flags */ 36 int casefoldsearch = TRUE; /* does search ignore case? */ 37 38 static int re_doreplace(RSIZE, char *); 39 static int re_forwsrch(void); 40 static int re_backsrch(void); 41 static int re_readpattern(char *); 42 static int killmatches(int); 43 static int countmatches(int); 44 45 /* 46 * Search forward. 47 * Get a search string from the user and search for it starting at ".". If 48 * found, move "." to just after the matched characters. display does all 49 * the hard stuff. If not found, it just prints a message. 50 */ 51 /* ARGSUSED */ 52 int 53 re_forwsearch(int f, int n) 54 { 55 int s; 56 57 if ((s = re_readpattern("RE Search")) != TRUE) 58 return (s); 59 if (re_forwsrch() == FALSE) { 60 ewprintf("Search failed: \"%s\"", re_pat); 61 return (FALSE); 62 } 63 re_srch_lastdir = SRCH_FORW; 64 return (TRUE); 65 } 66 67 /* 68 * Reverse search. 69 * Get a search string from the user, and search, starting at "." 70 * and proceeding toward the front of the buffer. If found "." is left 71 * pointing at the first character of the pattern [the last character that 72 * was matched]. 73 */ 74 /* ARGSUSED */ 75 int 76 re_backsearch(int f, int n) 77 { 78 int s; 79 80 if ((s = re_readpattern("RE Search backward")) != TRUE) 81 return (s); 82 if (re_backsrch() == FALSE) { 83 ewprintf("Search failed: \"%s\"", re_pat); 84 return (FALSE); 85 } 86 re_srch_lastdir = SRCH_BACK; 87 return (TRUE); 88 } 89 90 /* 91 * Search again, using the same search string and direction as the last search 92 * command. The direction has been saved in "srch_lastdir", so you know which 93 * way to go. 94 * 95 * XXX: This code has problems -- some incompatibility(?) with extend.c causes 96 * match to fail when it should not. 97 */ 98 /* ARGSUSED */ 99 int 100 re_searchagain(int f, int n) 101 { 102 if (re_srch_lastdir == SRCH_NOPR) { 103 ewprintf("No last search"); 104 return (FALSE); 105 } 106 if (re_srch_lastdir == SRCH_FORW) { 107 if (re_forwsrch() == FALSE) { 108 ewprintf("Search failed: \"%s\"", re_pat); 109 return (FALSE); 110 } 111 return (TRUE); 112 } 113 if (re_srch_lastdir == SRCH_BACK) 114 if (re_backsrch() == FALSE) { 115 ewprintf("Search failed: \"%s\"", re_pat); 116 return (FALSE); 117 } 118 119 return (TRUE); 120 } 121 122 /* Compiled regex goes here-- changed only when new pattern read */ 123 static regex_t regex_buff; 124 static regmatch_t regex_match[RE_NMATCH]; 125 126 /* 127 * Re-Query Replace. 128 * Replace strings selectively. Does a search and replace operation. 129 */ 130 /* ARGSUSED */ 131 int 132 re_queryrepl(int f, int n) 133 { 134 int rcnt = 0; /* replacements made so far */ 135 int plen, s; /* length of found string */ 136 char news[NPAT]; /* replacement string */ 137 138 if ((s = re_readpattern("RE Query replace")) != TRUE) 139 return (s); 140 if (eread("Query replace %s with: ", news, NPAT, 141 EFNUL | EFNEW | EFCR, re_pat) == NULL) 142 return (ABORT); 143 ewprintf("Query replacing %s with %s:", re_pat, news); 144 145 /* 146 * Search forward repeatedly, checking each time whether to insert 147 * or not. The "!" case makes the check always true, so it gets put 148 * into a tighter loop for efficiency. 149 */ 150 while (re_forwsrch() == TRUE) { 151 retry: 152 update(CMODE); 153 switch (getkey(FALSE)) { 154 case ' ': 155 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 156 if (re_doreplace((RSIZE)plen, news) == FALSE) 157 return (FALSE); 158 rcnt++; 159 break; 160 161 case '.': 162 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 163 if (re_doreplace((RSIZE)plen, news) == FALSE) 164 return (FALSE); 165 rcnt++; 166 goto stopsearch; 167 168 case CCHR('G'): /* ^G */ 169 (void)ctrlg(FFRAND, 0); 170 goto stopsearch; 171 case CCHR('['): /* ESC */ 172 case '`': 173 goto stopsearch; 174 case '!': 175 do { 176 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 177 if (re_doreplace((RSIZE)plen, news) == FALSE) 178 return (FALSE); 179 rcnt++; 180 } while (re_forwsrch() == TRUE); 181 goto stopsearch; 182 183 case CCHR('?'): /* To not replace */ 184 break; 185 186 default: 187 ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit"); 188 goto retry; 189 } 190 } 191 192 stopsearch: 193 curwp->w_rflag |= WFFULL; 194 update(CMODE); 195 if (!inmacro) { 196 if (rcnt == 0) 197 ewprintf("(No replacements done)"); 198 else if (rcnt == 1) 199 ewprintf("(1 replacement done)"); 200 else 201 ewprintf("(%d replacements done)", rcnt); 202 } 203 return (TRUE); 204 } 205 206 /* 207 * Routine re_doreplace calls lreplace to make replacements needed by 208 * re_query replace. Its reason for existence is to deal with \1, \2. etc. 209 * plen: length to remove 210 * st: replacement string 211 */ 212 static int 213 re_doreplace(RSIZE plen, char *st) 214 { 215 int j, k, s, more, num, state; 216 struct line *clp; 217 char repstr[REPLEN]; 218 219 clp = curwp->w_dotp; 220 more = TRUE; 221 j = 0; 222 state = 0; 223 num = 0; 224 225 /* The following FSA parses the replacement string */ 226 while (more) { 227 switch (state) { 228 case 0: 229 if (*st == '\\') { 230 st++; 231 state = 1; 232 } else if (*st == '\0') 233 more = FALSE; 234 else { 235 repstr[j] = *st; 236 j++; 237 if (j >= REPLEN) 238 return (FALSE); 239 st++; 240 } 241 break; 242 case 1: 243 if (*st >= '0' && *st <= '9') { 244 num = *st - '0'; 245 st++; 246 state = 2; 247 } else if (*st == '\0') 248 more = FALSE; 249 else { 250 repstr[j] = *st; 251 j++; 252 if (j >= REPLEN) 253 return (FALSE); 254 st++; 255 state = 0; 256 } 257 break; 258 case 2: 259 if (*st >= '0' && *st <= '9') { 260 num = 10 * num + *st - '0'; 261 st++; 262 } else { 263 if (num >= RE_NMATCH) 264 return (FALSE); 265 k = regex_match[num].rm_eo - regex_match[num].rm_so; 266 if (j + k >= REPLEN) 267 return (FALSE); 268 bcopy(&(clp->l_text[regex_match[num].rm_so]), 269 &repstr[j], k); 270 j += k; 271 if (*st == '\0') 272 more = FALSE; 273 if (*st == '\\') { 274 st++; 275 state = 1; 276 } else { 277 repstr[j] = *st; 278 j++; 279 if (j >= REPLEN) 280 return (FALSE); 281 st++; 282 state = 0; 283 } 284 } 285 break; 286 } /* switch (state) */ 287 } /* while (more) */ 288 289 repstr[j] = '\0'; 290 s = lreplace(plen, repstr); 291 return (s); 292 } 293 294 /* 295 * This routine does the real work of a forward search. The pattern is 296 * sitting in the external variable "pat". If found, dot is updated, the 297 * window system is notified of the change, and TRUE is returned. If the 298 * string isn't found, FALSE is returned. 299 */ 300 static int 301 re_forwsrch(void) 302 { 303 int tbo, tdotline, error; 304 struct line *clp; 305 306 clp = curwp->w_dotp; 307 tbo = curwp->w_doto; 308 tdotline = curwp->w_dotline; 309 310 if (tbo == clp->l_used) 311 /* 312 * Don't start matching past end of line -- must move to 313 * beginning of next line, unless at end of file. 314 */ 315 if (clp != curbp->b_headp) { 316 clp = lforw(clp); 317 tdotline++; 318 tbo = 0; 319 } 320 /* 321 * Note this loop does not process the last line, but this editor 322 * always makes the last line empty so this is good. 323 */ 324 while (clp != (curbp->b_headp)) { 325 regex_match[0].rm_so = tbo; 326 regex_match[0].rm_eo = llength(clp); 327 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 328 REG_STARTEND); 329 if (error != 0) { 330 clp = lforw(clp); 331 tdotline++; 332 tbo = 0; 333 } else { 334 curwp->w_doto = regex_match[0].rm_eo; 335 curwp->w_dotp = clp; 336 curwp->w_dotline = tdotline; 337 curwp->w_rflag |= WFMOVE; 338 return (TRUE); 339 } 340 } 341 return (FALSE); 342 } 343 344 /* 345 * This routine does the real work of a backward search. The pattern is sitting 346 * in the external variable "re_pat". If found, dot is updated, the window 347 * system is notified of the change, and TRUE is returned. If the string isn't 348 * found, FALSE is returned. 349 */ 350 static int 351 re_backsrch(void) 352 { 353 struct line *clp; 354 int tbo, tdotline; 355 regmatch_t lastmatch; 356 357 clp = curwp->w_dotp; 358 tbo = curwp->w_doto; 359 tdotline = curwp->w_dotline; 360 361 /* Start search one position to the left of dot */ 362 tbo = tbo - 1; 363 if (tbo < 0) { 364 /* must move up one line */ 365 clp = lback(clp); 366 tdotline--; 367 tbo = llength(clp); 368 } 369 370 /* 371 * Note this loop does not process the last line, but this editor 372 * always makes the last line empty so this is good. 373 */ 374 while (clp != (curbp->b_headp)) { 375 regex_match[0].rm_so = 0; 376 regex_match[0].rm_eo = llength(clp); 377 lastmatch.rm_so = -1; 378 /* 379 * Keep searching until we don't match any longer. Assumes a 380 * non-match does not modify the regex_match array. We have to 381 * do this character-by-character after the first match since 382 * POSIX regexps don't give you a way to do reverse matches. 383 */ 384 while (!regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 385 REG_STARTEND) && regex_match[0].rm_so < tbo) { 386 memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t)); 387 regex_match[0].rm_so++; 388 regex_match[0].rm_eo = llength(clp); 389 } 390 if (lastmatch.rm_so == -1) { 391 clp = lback(clp); 392 tdotline--; 393 tbo = llength(clp); 394 } else { 395 memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t)); 396 curwp->w_doto = regex_match[0].rm_so; 397 curwp->w_dotp = clp; 398 curwp->w_dotline = tdotline; 399 curwp->w_rflag |= WFMOVE; 400 return (TRUE); 401 } 402 } 403 return (FALSE); 404 } 405 406 /* 407 * Read a pattern. 408 * Stash it in the external variable "re_pat". The "pat" is 409 * not updated if the user types in an empty line. If the user typed 410 * an empty line, and there is no old pattern, it is an error. 411 * Display the old pattern, in the style of Jeff Lomicka. There is 412 * some do-it-yourself control expansion. 413 */ 414 static int 415 re_readpattern(char *prompt) 416 { 417 static int dofree = 0; 418 int flags, error, s; 419 char tpat[NPAT], *rep; 420 421 if (re_pat[0] == '\0') 422 rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt); 423 else 424 rep = eread("%s: (default %s) ", tpat, NPAT, 425 EFNUL | EFNEW | EFCR, prompt, re_pat); 426 if (rep == NULL) 427 return (ABORT); 428 if (rep[0] != '\0') { 429 /* New pattern given */ 430 (void)strlcpy(re_pat, tpat, sizeof(re_pat)); 431 if (casefoldsearch) 432 flags = REG_EXTENDED | REG_ICASE; 433 else 434 flags = REG_EXTENDED; 435 if (dofree) 436 regfree(®ex_buff); 437 error = regcomp(®ex_buff, re_pat, flags); 438 if (error != 0) { 439 char message[256]; 440 regerror(error, ®ex_buff, message, sizeof(message)); 441 ewprintf("Regex Error: %s", message); 442 re_pat[0] = '\0'; 443 return (FALSE); 444 } 445 dofree = 1; 446 s = TRUE; 447 } else if (rep[0] == '\0' && re_pat[0] != '\0') 448 /* Just using old pattern */ 449 s = TRUE; 450 else 451 s = FALSE; 452 return (s); 453 } 454 455 /* 456 * Cause case to not matter in searches. This is the default. If called 457 * with argument cause case to matter. 458 */ 459 /* ARGSUSED*/ 460 int 461 setcasefold(int f, int n) 462 { 463 if (f & FFARG) { 464 casefoldsearch = FALSE; 465 ewprintf("Case-fold-search unset"); 466 } else { 467 casefoldsearch = TRUE; 468 ewprintf("Case-fold-search set"); 469 } 470 471 /* 472 * Invalidate the regular expression pattern since I'm too lazy to 473 * recompile it. 474 */ 475 re_pat[0] = '\0'; 476 return (TRUE); 477 } 478 479 /* 480 * Delete all lines after dot that contain a string matching regex. 481 */ 482 /* ARGSUSED */ 483 int 484 delmatchlines(int f, int n) 485 { 486 int s; 487 488 if ((s = re_readpattern("Flush lines (containing match for regexp)")) 489 != TRUE) 490 return (s); 491 492 s = killmatches(TRUE); 493 return (s); 494 } 495 496 /* 497 * Delete all lines after dot that don't contain a string matching regex. 498 */ 499 /* ARGSUSED */ 500 int 501 delnonmatchlines(int f, int n) 502 { 503 int s; 504 505 if ((s = re_readpattern("Keep lines (containing match for regexp)")) 506 != TRUE) 507 return (s); 508 509 s = killmatches(FALSE); 510 return (s); 511 } 512 513 /* 514 * This function does the work of deleting matching lines. 515 */ 516 static int 517 killmatches(int cond) 518 { 519 int s, error; 520 int count = 0; 521 struct line *clp; 522 523 clp = curwp->w_dotp; 524 if (curwp->w_doto == llength(clp)) 525 /* Consider dot on next line */ 526 clp = lforw(clp); 527 528 while (clp != (curbp->b_headp)) { 529 /* see if line matches */ 530 regex_match[0].rm_so = 0; 531 regex_match[0].rm_eo = llength(clp); 532 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 533 REG_STARTEND); 534 535 /* Delete line when appropriate */ 536 if ((cond == FALSE && error) || (cond == TRUE && !error)) { 537 curwp->w_doto = 0; 538 curwp->w_dotp = clp; 539 count++; 540 s = ldelete(llength(clp) + 1, KNONE); 541 clp = curwp->w_dotp; 542 curwp->w_rflag |= WFMOVE; 543 if (s == FALSE) 544 return (FALSE); 545 } else 546 clp = lforw(clp); 547 } 548 549 ewprintf("%d line(s) deleted", count); 550 if (count > 0) 551 curwp->w_rflag |= WFMOVE; 552 553 return (TRUE); 554 } 555 556 /* 557 * Count lines matching regex. 558 */ 559 /* ARGSUSED */ 560 int 561 cntmatchlines(int f, int n) 562 { 563 int s; 564 565 if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE) 566 return (s); 567 s = countmatches(TRUE); 568 569 return (s); 570 } 571 572 /* 573 * Count lines that fail to match regex. 574 */ 575 /* ARGSUSED */ 576 int 577 cntnonmatchlines(int f, int n) 578 { 579 int s; 580 581 if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE) 582 return (s); 583 s = countmatches(FALSE); 584 585 return (s); 586 } 587 588 /* 589 * This function does the work of counting matching lines. 590 */ 591 int 592 countmatches(int cond) 593 { 594 int error; 595 int count = 0; 596 struct line *clp; 597 598 clp = curwp->w_dotp; 599 if (curwp->w_doto == llength(clp)) 600 /* Consider dot on next line */ 601 clp = lforw(clp); 602 603 while (clp != (curbp->b_headp)) { 604 /* see if line matches */ 605 regex_match[0].rm_so = 0; 606 regex_match[0].rm_eo = llength(clp); 607 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 608 REG_STARTEND); 609 610 /* Count line when appropriate */ 611 if ((cond == FALSE && error) || (cond == TRUE && !error)) 612 count++; 613 clp = lforw(clp); 614 } 615 616 if (cond) 617 ewprintf("Number of lines matching: %d", count); 618 else 619 ewprintf("Number of lines not matching: %d", count); 620 621 return (TRUE); 622 } 623 #endif /* REGEX */ 624