1 /* $OpenBSD: re_search.c,v 1.28 2013/09/24 13:29:51 jasper Exp $ */ 2 3 /* This file is in the public domain. */ 4 5 /* 6 * regular expression search commands for Mg 7 * 8 * This file contains functions to implement several of gnuemacs's regular 9 * expression functions for Mg. Several of the routines below are just minor 10 * re-arrangements of Mg's non-regular expression search functions. Some of 11 * them are similar in structure to the original MicroEMACS, others are 12 * modifications of Rich Ellison's code. Peter Newton re-wrote about half of 13 * them from scratch. 14 */ 15 16 #ifdef REGEX 17 #include "def.h" 18 19 #include <sys/types.h> 20 #include <regex.h> 21 22 #include "macro.h" 23 24 #define SRCH_BEGIN (0) /* search sub-codes */ 25 #define SRCH_FORW (-1) 26 #define SRCH_BACK (-2) 27 #define SRCH_NOPR (-3) 28 #define SRCH_ACCM (-4) 29 #define SRCH_MARK (-5) 30 31 #define RE_NMATCH 10 /* max number of matches */ 32 #define REPLEN 256 /* max length of replacement string */ 33 34 char re_pat[NPAT]; /* regex pattern */ 35 int re_srch_lastdir = SRCH_NOPR; /* last search flags */ 36 int casefoldsearch = TRUE; /* does search ignore case? */ 37 38 static int re_doreplace(RSIZE, char *); 39 static int re_forwsrch(void); 40 static int re_backsrch(void); 41 static int re_readpattern(char *); 42 static int killmatches(int); 43 static int countmatches(int); 44 45 /* 46 * Search forward. 47 * Get a search string from the user and search for it starting at ".". If 48 * found, move "." to just after the matched characters. display does all 49 * the hard stuff. If not found, it just prints a message. 50 */ 51 /* ARGSUSED */ 52 int 53 re_forwsearch(int f, int n) 54 { 55 int s; 56 57 if ((s = re_readpattern("RE Search")) != TRUE) 58 return (s); 59 if (re_forwsrch() == FALSE) { 60 ewprintf("Search failed: \"%s\"", re_pat); 61 return (FALSE); 62 } 63 re_srch_lastdir = SRCH_FORW; 64 return (TRUE); 65 } 66 67 /* 68 * Reverse search. 69 * Get a search string from the user, and search, starting at "." 70 * and proceeding toward the front of the buffer. If found "." is left 71 * pointing at the first character of the pattern [the last character that 72 * was matched]. 73 */ 74 /* ARGSUSED */ 75 int 76 re_backsearch(int f, int n) 77 { 78 int s; 79 80 if ((s = re_readpattern("RE Search backward")) != TRUE) 81 return (s); 82 if (re_backsrch() == FALSE) { 83 ewprintf("Search failed: \"%s\"", re_pat); 84 return (FALSE); 85 } 86 re_srch_lastdir = SRCH_BACK; 87 return (TRUE); 88 } 89 90 /* 91 * Search again, using the same search string and direction as the last search 92 * command. The direction has been saved in "srch_lastdir", so you know which 93 * way to go. 94 * 95 * XXX: This code has problems -- some incompatibility(?) with extend.c causes 96 * match to fail when it should not. 97 */ 98 /* ARGSUSED */ 99 int 100 re_searchagain(int f, int n) 101 { 102 if (re_srch_lastdir == SRCH_NOPR) { 103 ewprintf("No last search"); 104 return (FALSE); 105 } 106 if (re_srch_lastdir == SRCH_FORW) { 107 if (re_forwsrch() == FALSE) { 108 ewprintf("Search failed: \"%s\"", re_pat); 109 return (FALSE); 110 } 111 return (TRUE); 112 } 113 if (re_srch_lastdir == SRCH_BACK) 114 if (re_backsrch() == FALSE) { 115 ewprintf("Search failed: \"%s\"", re_pat); 116 return (FALSE); 117 } 118 119 return (TRUE); 120 } 121 122 /* Compiled regex goes here-- changed only when new pattern read */ 123 static regex_t regex_buff; 124 static regmatch_t regex_match[RE_NMATCH]; 125 126 /* 127 * Re-Query Replace. 128 * Replace strings selectively. Does a search and replace operation. 129 */ 130 /* ARGSUSED */ 131 int 132 re_queryrepl(int f, int n) 133 { 134 int rcnt = 0; /* replacements made so far */ 135 int plen, s; /* length of found string */ 136 char news[NPAT]; /* replacement string */ 137 138 if ((s = re_readpattern("RE Query replace")) != TRUE) 139 return (s); 140 if (eread("Query replace %s with: ", news, NPAT, 141 EFNUL | EFNEW | EFCR, re_pat) == NULL) 142 return (ABORT); 143 ewprintf("Query replacing %s with %s:", re_pat, news); 144 145 /* 146 * Search forward repeatedly, checking each time whether to insert 147 * or not. The "!" case makes the check always true, so it gets put 148 * into a tighter loop for efficiency. 149 */ 150 while (re_forwsrch() == TRUE) { 151 retry: 152 update(CMODE); 153 switch (getkey(FALSE)) { 154 case ' ': 155 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 156 if (re_doreplace((RSIZE)plen, news) == FALSE) 157 return (FALSE); 158 rcnt++; 159 break; 160 161 case '.': 162 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 163 if (re_doreplace((RSIZE)plen, news) == FALSE) 164 return (FALSE); 165 rcnt++; 166 goto stopsearch; 167 168 case CCHR('G'): /* ^G */ 169 (void)ctrlg(FFRAND, 0); 170 goto stopsearch; 171 case CCHR('['): /* ESC */ 172 case '`': 173 goto stopsearch; 174 case '!': 175 do { 176 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 177 if (re_doreplace((RSIZE)plen, news) == FALSE) 178 return (FALSE); 179 rcnt++; 180 } while (re_forwsrch() == TRUE); 181 goto stopsearch; 182 183 case CCHR('?'): /* To not replace */ 184 break; 185 186 default: 187 ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit"); 188 goto retry; 189 } 190 } 191 192 stopsearch: 193 curwp->w_rflag |= WFFULL; 194 update(CMODE); 195 if (!inmacro) { 196 if (rcnt == 0) 197 ewprintf("(No replacements done)"); 198 else if (rcnt == 1) 199 ewprintf("(1 replacement done)"); 200 else 201 ewprintf("(%d replacements done)", rcnt); 202 } 203 return (TRUE); 204 } 205 206 /* 207 * Routine re_doreplace calls lreplace to make replacements needed by 208 * re_query replace. Its reason for existence is to deal with \1, \2. etc. 209 * plen: length to remove 210 * st: replacement string 211 */ 212 static int 213 re_doreplace(RSIZE plen, char *st) 214 { 215 int j, k, s, more, num, state; 216 struct line *clp; 217 char repstr[REPLEN]; 218 219 clp = curwp->w_dotp; 220 more = TRUE; 221 j = 0; 222 state = 0; 223 num = 0; 224 225 /* The following FSA parses the replacement string */ 226 while (more) { 227 switch (state) { 228 case 0: 229 if (*st == '\\') { 230 st++; 231 state = 1; 232 } else if (*st == '\0') 233 more = FALSE; 234 else { 235 repstr[j] = *st; 236 j++; 237 if (j >= REPLEN) 238 return (FALSE); 239 st++; 240 } 241 break; 242 case 1: 243 if (*st >= '0' && *st <= '9') { 244 num = *st - '0'; 245 st++; 246 state = 2; 247 } else if (*st == '\0') 248 more = FALSE; 249 else { 250 repstr[j] = *st; 251 j++; 252 if (j >= REPLEN) 253 return (FALSE); 254 st++; 255 state = 0; 256 } 257 break; 258 case 2: 259 if (*st >= '0' && *st <= '9') { 260 num = 10 * num + *st - '0'; 261 st++; 262 } else { 263 if (num >= RE_NMATCH) 264 return (FALSE); 265 k = regex_match[num].rm_eo - regex_match[num].rm_so; 266 if (j + k >= REPLEN) 267 return (FALSE); 268 bcopy(&(clp->l_text[regex_match[num].rm_so]), 269 &repstr[j], k); 270 j += k; 271 if (*st == '\0') 272 more = FALSE; 273 if (*st == '\\') { 274 st++; 275 state = 1; 276 } else { 277 repstr[j] = *st; 278 j++; 279 if (j >= REPLEN) 280 return (FALSE); 281 st++; 282 state = 0; 283 } 284 } 285 break; 286 } /* switch (state) */ 287 } /* while (more) */ 288 289 repstr[j] = '\0'; 290 s = lreplace(plen, repstr); 291 return (s); 292 } 293 294 /* 295 * This routine does the real work of a forward search. The pattern is 296 * sitting in the external variable "pat". If found, dot is updated, the 297 * window system is notified of the change, and TRUE is returned. If the 298 * string isn't found, FALSE is returned. 299 */ 300 static int 301 re_forwsrch(void) 302 { 303 int tbo, error; 304 struct line *clp; 305 306 clp = curwp->w_dotp; 307 tbo = curwp->w_doto; 308 309 if (tbo == clp->l_used) 310 /* 311 * Don't start matching past end of line -- must move to 312 * beginning of next line, unless at end of file. 313 */ 314 if (clp != curbp->b_headp) { 315 clp = lforw(clp); 316 tbo = 0; 317 } 318 /* 319 * Note this loop does not process the last line, but this editor 320 * always makes the last line empty so this is good. 321 */ 322 while (clp != (curbp->b_headp)) { 323 regex_match[0].rm_so = tbo; 324 regex_match[0].rm_eo = llength(clp); 325 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 326 REG_STARTEND); 327 if (error != 0) { 328 clp = lforw(clp); 329 tbo = 0; 330 } else { 331 curwp->w_doto = regex_match[0].rm_eo; 332 curwp->w_dotp = clp; 333 curwp->w_rflag |= WFMOVE; 334 return (TRUE); 335 } 336 } 337 return (FALSE); 338 } 339 340 /* 341 * This routine does the real work of a backward search. The pattern is sitting 342 * in the external variable "re_pat". If found, dot is updated, the window 343 * system is notified of the change, and TRUE is returned. If the string isn't 344 * found, FALSE is returned. 345 */ 346 static int 347 re_backsrch(void) 348 { 349 struct line *clp; 350 int tbo; 351 regmatch_t lastmatch; 352 353 clp = curwp->w_dotp; 354 tbo = curwp->w_doto; 355 356 /* Start search one position to the left of dot */ 357 tbo = tbo - 1; 358 if (tbo < 0) { 359 /* must move up one line */ 360 clp = lback(clp); 361 tbo = llength(clp); 362 } 363 364 /* 365 * Note this loop does not process the last line, but this editor 366 * always makes the last line empty so this is good. 367 */ 368 while (clp != (curbp->b_headp)) { 369 regex_match[0].rm_so = 0; 370 regex_match[0].rm_eo = llength(clp); 371 lastmatch.rm_so = -1; 372 /* 373 * Keep searching until we don't match any longer. Assumes a 374 * non-match does not modify the regex_match array. We have to 375 * do this character-by-character after the first match since 376 * POSIX regexps don't give you a way to do reverse matches. 377 */ 378 while (!regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 379 REG_STARTEND) && regex_match[0].rm_so < tbo) { 380 memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t)); 381 regex_match[0].rm_so++; 382 regex_match[0].rm_eo = llength(clp); 383 } 384 if (lastmatch.rm_so == -1) { 385 clp = lback(clp); 386 tbo = llength(clp); 387 } else { 388 memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t)); 389 curwp->w_doto = regex_match[0].rm_so; 390 curwp->w_dotp = clp; 391 curwp->w_rflag |= WFMOVE; 392 return (TRUE); 393 } 394 } 395 return (FALSE); 396 } 397 398 /* 399 * Read a pattern. 400 * Stash it in the external variable "re_pat". The "pat" is 401 * not updated if the user types in an empty line. If the user typed 402 * an empty line, and there is no old pattern, it is an error. 403 * Display the old pattern, in the style of Jeff Lomicka. There is 404 * some do-it-yourself control expansion. 405 */ 406 static int 407 re_readpattern(char *prompt) 408 { 409 static int dofree = 0; 410 int flags, error, s; 411 char tpat[NPAT], *rep; 412 413 if (re_pat[0] == '\0') 414 rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, prompt); 415 else 416 rep = eread("%s: (default %s) ", tpat, NPAT, 417 EFNUL | EFNEW | EFCR, prompt, re_pat); 418 if (rep == NULL) 419 return (ABORT); 420 if (rep[0] != '\0') { 421 /* New pattern given */ 422 (void)strlcpy(re_pat, tpat, sizeof(re_pat)); 423 if (casefoldsearch) 424 flags = REG_EXTENDED | REG_ICASE; 425 else 426 flags = REG_EXTENDED; 427 if (dofree) 428 regfree(®ex_buff); 429 error = regcomp(®ex_buff, re_pat, flags); 430 if (error != 0) { 431 char message[256]; 432 regerror(error, ®ex_buff, message, sizeof(message)); 433 ewprintf("Regex Error: %s", message); 434 re_pat[0] = '\0'; 435 return (FALSE); 436 } 437 dofree = 1; 438 s = TRUE; 439 } else if (rep[0] == '\0' && re_pat[0] != '\0') 440 /* Just using old pattern */ 441 s = TRUE; 442 else 443 s = FALSE; 444 return (s); 445 } 446 447 /* 448 * Cause case to not matter in searches. This is the default. If called 449 * with argument cause case to matter. 450 */ 451 /* ARGSUSED*/ 452 int 453 setcasefold(int f, int n) 454 { 455 if (f & FFARG) { 456 casefoldsearch = FALSE; 457 ewprintf("Case-fold-search unset"); 458 } else { 459 casefoldsearch = TRUE; 460 ewprintf("Case-fold-search set"); 461 } 462 463 /* 464 * Invalidate the regular expression pattern since I'm too lazy to 465 * recompile it. 466 */ 467 re_pat[0] = '\0'; 468 return (TRUE); 469 } 470 471 /* 472 * Delete all lines after dot that contain a string matching regex. 473 */ 474 /* ARGSUSED */ 475 int 476 delmatchlines(int f, int n) 477 { 478 int s; 479 480 if ((s = re_readpattern("Flush lines (containing match for regexp)")) 481 != TRUE) 482 return (s); 483 484 s = killmatches(TRUE); 485 return (s); 486 } 487 488 /* 489 * Delete all lines after dot that don't contain a string matching regex. 490 */ 491 /* ARGSUSED */ 492 int 493 delnonmatchlines(int f, int n) 494 { 495 int s; 496 497 if ((s = re_readpattern("Keep lines (containing match for regexp)")) 498 != TRUE) 499 return (s); 500 501 s = killmatches(FALSE); 502 return (s); 503 } 504 505 /* 506 * This function does the work of deleting matching lines. 507 */ 508 static int 509 killmatches(int cond) 510 { 511 int s, error; 512 int count = 0; 513 struct line *clp; 514 515 clp = curwp->w_dotp; 516 if (curwp->w_doto == llength(clp)) 517 /* Consider dot on next line */ 518 clp = lforw(clp); 519 520 while (clp != (curbp->b_headp)) { 521 /* see if line matches */ 522 regex_match[0].rm_so = 0; 523 regex_match[0].rm_eo = llength(clp); 524 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 525 REG_STARTEND); 526 527 /* Delete line when appropriate */ 528 if ((cond == FALSE && error) || (cond == TRUE && !error)) { 529 curwp->w_doto = 0; 530 curwp->w_dotp = clp; 531 count++; 532 s = ldelete(llength(clp) + 1, KNONE); 533 clp = curwp->w_dotp; 534 curwp->w_rflag |= WFMOVE; 535 if (s == FALSE) 536 return (FALSE); 537 } else 538 clp = lforw(clp); 539 } 540 541 ewprintf("%d line(s) deleted", count); 542 if (count > 0) 543 curwp->w_rflag |= WFMOVE; 544 545 return (TRUE); 546 } 547 548 /* 549 * Count lines matching regex. 550 */ 551 /* ARGSUSED */ 552 int 553 cntmatchlines(int f, int n) 554 { 555 int s; 556 557 if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE) 558 return (s); 559 s = countmatches(TRUE); 560 561 return (s); 562 } 563 564 /* 565 * Count lines that fail to match regex. 566 */ 567 /* ARGSUSED */ 568 int 569 cntnonmatchlines(int f, int n) 570 { 571 int s; 572 573 if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE) 574 return (s); 575 s = countmatches(FALSE); 576 577 return (s); 578 } 579 580 /* 581 * This function does the work of counting matching lines. 582 */ 583 int 584 countmatches(int cond) 585 { 586 int error; 587 int count = 0; 588 struct line *clp; 589 590 clp = curwp->w_dotp; 591 if (curwp->w_doto == llength(clp)) 592 /* Consider dot on next line */ 593 clp = lforw(clp); 594 595 while (clp != (curbp->b_headp)) { 596 /* see if line matches */ 597 regex_match[0].rm_so = 0; 598 regex_match[0].rm_eo = llength(clp); 599 error = regexec(®ex_buff, ltext(clp), RE_NMATCH, regex_match, 600 REG_STARTEND); 601 602 /* Count line when appropriate */ 603 if ((cond == FALSE && error) || (cond == TRUE && !error)) 604 count++; 605 clp = lforw(clp); 606 } 607 608 if (cond) 609 ewprintf("Number of lines matching: %d", count); 610 else 611 ewprintf("Number of lines not matching: %d", count); 612 613 return (TRUE); 614 } 615 #endif /* REGEX */ 616