1 /* $OpenBSD: re_search.c,v 1.37 2023/03/08 04:43:11 guenther Exp $ */
2
3 /* This file is in the public domain. */
4
5 /*
6 * regular expression search commands for Mg
7 *
8 * This file contains functions to implement several of gnuemacs's regular
9 * expression functions for Mg. Several of the routines below are just minor
10 * re-arrangements of Mg's non-regular expression search functions. Some of
11 * them are similar in structure to the original MicroEMACS, others are
12 * modifications of Rich Ellison's code. Peter Newton re-wrote about half of
13 * them from scratch.
14 */
15
16 #ifdef REGEX
17 #include <sys/queue.h>
18 #include <sys/types.h>
19 #include <regex.h>
20 #include <signal.h>
21 #include <stdio.h>
22 #include <string.h>
23
24 #include "def.h"
25 #include "macro.h"
26
27 #define SRCH_BEGIN (0) /* search sub-codes */
28 #define SRCH_FORW (-1)
29 #define SRCH_BACK (-2)
30 #define SRCH_NOPR (-3)
31 #define SRCH_ACCM (-4)
32 #define SRCH_MARK (-5)
33
34 #define RE_NMATCH 10 /* max number of matches */
35 #define REPLEN 256 /* max length of replacement string */
36
37 char re_pat[NPAT]; /* regex pattern */
38 int re_srch_lastdir = SRCH_NOPR; /* last search flags */
39 int casefoldsearch = TRUE; /* does search ignore case? */
40
41 static int re_doreplace(RSIZE, char *);
42 static int re_forwsrch(void);
43 static int re_backsrch(void);
44 static int re_readpattern(char *);
45 static int killmatches(int);
46 static int countmatches(int);
47
48 /*
49 * Search forward.
50 * Get a search string from the user and search for it starting at ".". If
51 * found, move "." to just after the matched characters. display does all
52 * the hard stuff. If not found, it just prints a message.
53 */
54 int
re_forwsearch(int f,int n)55 re_forwsearch(int f, int n)
56 {
57 int s;
58
59 if ((s = re_readpattern("RE Search")) != TRUE)
60 return (s);
61 if (re_forwsrch() == FALSE) {
62 dobeep();
63 ewprintf("Search failed: \"%s\"", re_pat);
64 return (FALSE);
65 }
66 re_srch_lastdir = SRCH_FORW;
67 return (TRUE);
68 }
69
70 /*
71 * Reverse search.
72 * Get a search string from the user, and search, starting at "."
73 * and proceeding toward the front of the buffer. If found "." is left
74 * pointing at the first character of the pattern [the last character that
75 * was matched].
76 */
77 int
re_backsearch(int f,int n)78 re_backsearch(int f, int n)
79 {
80 int s;
81
82 if ((s = re_readpattern("RE Search backward")) != TRUE)
83 return (s);
84 if (re_backsrch() == FALSE) {
85 dobeep();
86 ewprintf("Search failed: \"%s\"", re_pat);
87 return (FALSE);
88 }
89 re_srch_lastdir = SRCH_BACK;
90 return (TRUE);
91 }
92
93 /*
94 * Search again, using the same search string and direction as the last search
95 * command. The direction has been saved in "srch_lastdir", so you know which
96 * way to go.
97 *
98 * XXX: This code has problems -- some incompatibility(?) with extend.c causes
99 * match to fail when it should not.
100 */
101 int
re_searchagain(int f,int n)102 re_searchagain(int f, int n)
103 {
104 if (re_srch_lastdir == SRCH_NOPR) {
105 dobeep();
106 ewprintf("No last search");
107 return (FALSE);
108 }
109 if (re_srch_lastdir == SRCH_FORW) {
110 if (re_forwsrch() == FALSE) {
111 dobeep();
112 ewprintf("Search failed: \"%s\"", re_pat);
113 return (FALSE);
114 }
115 return (TRUE);
116 }
117 if (re_srch_lastdir == SRCH_BACK)
118 if (re_backsrch() == FALSE) {
119 dobeep();
120 ewprintf("Search failed: \"%s\"", re_pat);
121 return (FALSE);
122 }
123
124 return (TRUE);
125 }
126
127 /* Compiled regex goes here-- changed only when new pattern read */
128 static regex_t regex_buff;
129 static regmatch_t regex_match[RE_NMATCH];
130
131 /*
132 * Re-Query Replace.
133 * Replace strings selectively. Does a search and replace operation.
134 */
135 int
re_queryrepl(int f,int n)136 re_queryrepl(int f, int n)
137 {
138 int rcnt = 0; /* replacements made so far */
139 int plen, s; /* length of found string */
140 char news[NPAT]; /* replacement string */
141
142 if ((s = re_readpattern("RE Query replace")) != TRUE)
143 return (s);
144 if (eread("Query replace %s with: ", news, NPAT,
145 EFNUL | EFNEW | EFCR, re_pat) == NULL)
146 return (ABORT);
147 ewprintf("Query replacing %s with %s:", re_pat, news);
148
149 /*
150 * Search forward repeatedly, checking each time whether to insert
151 * or not. The "!" case makes the check always true, so it gets put
152 * into a tighter loop for efficiency.
153 */
154 while (re_forwsrch() == TRUE) {
155 retry:
156 update(CMODE);
157 switch (getkey(FALSE)) {
158 case ' ':
159 plen = regex_match[0].rm_eo - regex_match[0].rm_so;
160 if (re_doreplace((RSIZE)plen, news) == FALSE)
161 return (FALSE);
162 rcnt++;
163 break;
164
165 case '.':
166 plen = regex_match[0].rm_eo - regex_match[0].rm_so;
167 if (re_doreplace((RSIZE)plen, news) == FALSE)
168 return (FALSE);
169 rcnt++;
170 goto stopsearch;
171
172 case CCHR('G'): /* ^G */
173 (void)ctrlg(FFRAND, 0);
174 goto stopsearch;
175 case CCHR('['): /* ESC */
176 case '`':
177 goto stopsearch;
178 case '!':
179 do {
180 plen = regex_match[0].rm_eo - regex_match[0].rm_so;
181 if (re_doreplace((RSIZE)plen, news) == FALSE)
182 return (FALSE);
183 rcnt++;
184 } while (re_forwsrch() == TRUE);
185 goto stopsearch;
186
187 case CCHR('?'): /* To not replace */
188 break;
189
190 default:
191 ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
192 goto retry;
193 }
194 }
195
196 stopsearch:
197 curwp->w_rflag |= WFFULL;
198 update(CMODE);
199 if (!inmacro) {
200 if (rcnt == 0)
201 ewprintf("(No replacements done)");
202 else if (rcnt == 1)
203 ewprintf("(1 replacement done)");
204 else
205 ewprintf("(%d replacements done)", rcnt);
206 }
207 return (TRUE);
208 }
209
210 int
re_repl(int f,int n)211 re_repl(int f, int n)
212 {
213 int rcnt = 0; /* replacements made so far */
214 int plen, s; /* length of found string */
215 char news[NPAT]; /* replacement string */
216
217 if ((s = re_readpattern("RE Replace")) != TRUE)
218 return (s);
219 if (eread("Replace %s with: ", news, NPAT,
220 EFNUL | EFNEW | EFCR, re_pat) == NULL)
221 return (ABORT);
222
223 while (re_forwsrch() == TRUE) {
224 plen = regex_match[0].rm_eo - regex_match[0].rm_so;
225 if (re_doreplace((RSIZE)plen, news) == FALSE)
226 return (FALSE);
227 rcnt++;
228 }
229
230 curwp->w_rflag |= WFFULL;
231 update(CMODE);
232 if (!inmacro)
233 ewprintf("(%d replacement(s) done)", rcnt);
234
235 return(TRUE);
236 }
237
238 /*
239 * Routine re_doreplace calls lreplace to make replacements needed by
240 * re_query replace. Its reason for existence is to deal with \1, \2. etc.
241 * plen: length to remove
242 * st: replacement string
243 */
244 static int
re_doreplace(RSIZE plen,char * st)245 re_doreplace(RSIZE plen, char *st)
246 {
247 int j, k, s, more, num, state;
248 struct line *clp;
249 char repstr[REPLEN];
250
251 clp = curwp->w_dotp;
252 more = TRUE;
253 j = 0;
254 state = 0;
255 num = 0;
256
257 /* The following FSA parses the replacement string */
258 while (more) {
259 switch (state) {
260 case 0:
261 if (*st == '\\') {
262 st++;
263 state = 1;
264 } else if (*st == '\0')
265 more = FALSE;
266 else {
267 repstr[j] = *st;
268 j++;
269 if (j >= REPLEN)
270 return (FALSE);
271 st++;
272 }
273 break;
274 case 1:
275 if (*st >= '0' && *st <= '9') {
276 num = *st - '0';
277 st++;
278 state = 2;
279 } else if (*st == '\0')
280 more = FALSE;
281 else {
282 repstr[j] = *st;
283 j++;
284 if (j >= REPLEN)
285 return (FALSE);
286 st++;
287 state = 0;
288 }
289 break;
290 case 2:
291 if (*st >= '0' && *st <= '9') {
292 num = 10 * num + *st - '0';
293 st++;
294 } else {
295 if (num >= RE_NMATCH)
296 return (FALSE);
297 k = regex_match[num].rm_eo - regex_match[num].rm_so;
298 if (j + k >= REPLEN)
299 return (FALSE);
300 bcopy(&(clp->l_text[regex_match[num].rm_so]),
301 &repstr[j], k);
302 j += k;
303 if (*st == '\0')
304 more = FALSE;
305 if (*st == '\\') {
306 st++;
307 state = 1;
308 } else {
309 repstr[j] = *st;
310 j++;
311 if (j >= REPLEN)
312 return (FALSE);
313 st++;
314 state = 0;
315 }
316 }
317 break;
318 } /* switch (state) */
319 } /* while (more) */
320
321 repstr[j] = '\0';
322 s = lreplace(plen, repstr);
323 return (s);
324 }
325
326 /*
327 * This routine does the real work of a forward search. The pattern is
328 * sitting in the external variable "pat". If found, dot is updated, the
329 * window system is notified of the change, and TRUE is returned. If the
330 * string isn't found, FALSE is returned.
331 */
332 static int
re_forwsrch(void)333 re_forwsrch(void)
334 {
335 int re_flags, tbo, tdotline, error;
336 struct line *clp;
337
338 clp = curwp->w_dotp;
339 tbo = curwp->w_doto;
340 tdotline = curwp->w_dotline;
341
342 if (tbo == clp->l_used)
343 /*
344 * Don't start matching past end of line -- must move to
345 * beginning of next line, unless line is empty or at
346 * end of file.
347 */
348 if (clp != curbp->b_headp && llength(clp) != 0) {
349 clp = lforw(clp);
350 tdotline++;
351 tbo = 0;
352 }
353 /*
354 * Note this loop does not process the last line, but this editor
355 * always makes the last line empty so this is good.
356 */
357 while (clp != (curbp->b_headp)) {
358 re_flags = REG_STARTEND;
359 if (tbo != 0)
360 re_flags |= REG_NOTBOL;
361 regex_match[0].rm_so = tbo;
362 regex_match[0].rm_eo = llength(clp);
363 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "",
364 RE_NMATCH, regex_match, re_flags);
365 if (error != 0) {
366 clp = lforw(clp);
367 tdotline++;
368 tbo = 0;
369 } else {
370 curwp->w_doto = regex_match[0].rm_eo;
371 curwp->w_dotp = clp;
372 curwp->w_dotline = tdotline;
373 curwp->w_rflag |= WFMOVE;
374 return (TRUE);
375 }
376 }
377 return (FALSE);
378 }
379
380 /*
381 * This routine does the real work of a backward search. The pattern is sitting
382 * in the external variable "re_pat". If found, dot is updated, the window
383 * system is notified of the change, and TRUE is returned. If the string isn't
384 * found, FALSE is returned.
385 */
386 static int
re_backsrch(void)387 re_backsrch(void)
388 {
389 struct line *clp;
390 int tbo, tdotline;
391 regmatch_t lastmatch;
392
393 clp = curwp->w_dotp;
394 tbo = curwp->w_doto;
395 tdotline = curwp->w_dotline;
396
397 /* Start search one position to the left of dot */
398 tbo = tbo - 1;
399 if (tbo < 0) {
400 /* must move up one line */
401 clp = lback(clp);
402 tdotline--;
403 tbo = llength(clp);
404 }
405
406 /*
407 * Note this loop does not process the last line, but this editor
408 * always makes the last line empty so this is good.
409 */
410 while (clp != (curbp->b_headp)) {
411 regex_match[0].rm_so = 0;
412 regex_match[0].rm_eo = llength(clp);
413 lastmatch.rm_so = -1;
414 /*
415 * Keep searching until we don't match any longer. Assumes a
416 * non-match does not modify the regex_match array. We have to
417 * do this character-by-character after the first match since
418 * POSIX regexps don't give you a way to do reverse matches.
419 */
420 while (!regexec(®ex_buff, ltext(clp) ? ltext(clp) : "",
421 RE_NMATCH, regex_match, REG_STARTEND) &&
422 regex_match[0].rm_so <= tbo) {
423 memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t));
424 regex_match[0].rm_so++;
425 regex_match[0].rm_eo = llength(clp);
426 }
427 if (lastmatch.rm_so == -1) {
428 clp = lback(clp);
429 tdotline--;
430 tbo = llength(clp);
431 } else {
432 memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t));
433 curwp->w_doto = regex_match[0].rm_so;
434 curwp->w_dotp = clp;
435 curwp->w_dotline = tdotline;
436 curwp->w_rflag |= WFMOVE;
437 return (TRUE);
438 }
439 }
440 return (FALSE);
441 }
442
443 /*
444 * Read a pattern.
445 * Stash it in the external variable "re_pat". The "pat" is
446 * not updated if the user types in an empty line. If the user typed
447 * an empty line, and there is no old pattern, it is an error.
448 * Display the old pattern, in the style of Jeff Lomicka. There is
449 * some do-it-yourself control expansion.
450 */
451 static int
re_readpattern(char * re_prompt)452 re_readpattern(char *re_prompt)
453 {
454 static int dofree = 0;
455 int flags, error, s;
456 char tpat[NPAT], *rep;
457
458 if (re_pat[0] == '\0')
459 rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt);
460 else
461 rep = eread("%s (default %s): ", tpat, NPAT,
462 EFNUL | EFNEW | EFCR, re_prompt, re_pat);
463 if (rep == NULL)
464 return (ABORT);
465 if (rep[0] != '\0') {
466 /* New pattern given */
467 (void)strlcpy(re_pat, tpat, sizeof(re_pat));
468 if (casefoldsearch)
469 flags = REG_EXTENDED | REG_ICASE;
470 else
471 flags = REG_EXTENDED;
472 if (dofree)
473 regfree(®ex_buff);
474 error = regcomp(®ex_buff, re_pat, flags);
475 if (error != 0) {
476 char message[256];
477 regerror(error, ®ex_buff, message, sizeof(message));
478 dobeep();
479 ewprintf("Regex Error: %s", message);
480 re_pat[0] = '\0';
481 return (FALSE);
482 }
483 dofree = 1;
484 s = TRUE;
485 } else if (rep[0] == '\0' && re_pat[0] != '\0')
486 /* Just using old pattern */
487 s = TRUE;
488 else
489 s = FALSE;
490 return (s);
491 }
492
493 /*
494 * Cause case to not matter in searches. This is the default. If called
495 * with argument cause case to matter.
496 */
497 int
setcasefold(int f,int n)498 setcasefold(int f, int n)
499 {
500 if (f & FFARG) {
501 casefoldsearch = FALSE;
502 ewprintf("Case-fold-search unset");
503 } else {
504 casefoldsearch = TRUE;
505 ewprintf("Case-fold-search set");
506 }
507
508 /*
509 * Invalidate the regular expression pattern since I'm too lazy to
510 * recompile it.
511 */
512 re_pat[0] = '\0';
513 return (TRUE);
514 }
515
516 /*
517 * Delete all lines after dot that contain a string matching regex.
518 */
519 int
delmatchlines(int f,int n)520 delmatchlines(int f, int n)
521 {
522 int s;
523
524 if ((s = re_readpattern("Flush lines (containing match for regexp)"))
525 != TRUE)
526 return (s);
527
528 s = killmatches(TRUE);
529 return (s);
530 }
531
532 /*
533 * Delete all lines after dot that don't contain a string matching regex.
534 */
535 int
delnonmatchlines(int f,int n)536 delnonmatchlines(int f, int n)
537 {
538 int s;
539
540 if ((s = re_readpattern("Keep lines (containing match for regexp)"))
541 != TRUE)
542 return (s);
543
544 s = killmatches(FALSE);
545 return (s);
546 }
547
548 /*
549 * This function does the work of deleting matching lines.
550 */
551 static int
killmatches(int cond)552 killmatches(int cond)
553 {
554 int s, error;
555 int count = 0;
556 struct line *clp;
557
558 clp = curwp->w_dotp;
559 if (curwp->w_doto == llength(clp))
560 /* Consider dot on next line */
561 clp = lforw(clp);
562
563 while (clp != (curbp->b_headp)) {
564 /* see if line matches */
565 regex_match[0].rm_so = 0;
566 regex_match[0].rm_eo = llength(clp);
567 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "",
568 RE_NMATCH, regex_match, REG_STARTEND);
569
570 /* Delete line when appropriate */
571 if ((cond == FALSE && error) || (cond == TRUE && !error)) {
572 curwp->w_doto = 0;
573 curwp->w_dotp = clp;
574 count++;
575 s = ldelete(llength(clp) + 1, KNONE);
576 clp = curwp->w_dotp;
577 curwp->w_rflag |= WFMOVE;
578 if (s == FALSE)
579 return (FALSE);
580 } else
581 clp = lforw(clp);
582 }
583
584 ewprintf("%d line(s) deleted", count);
585 if (count > 0)
586 curwp->w_rflag |= WFMOVE;
587
588 return (TRUE);
589 }
590
591 /*
592 * Count lines matching regex.
593 */
594 int
cntmatchlines(int f,int n)595 cntmatchlines(int f, int n)
596 {
597 int s;
598
599 if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
600 return (s);
601 s = countmatches(TRUE);
602
603 return (s);
604 }
605
606 /*
607 * Count lines that fail to match regex.
608 */
609 int
cntnonmatchlines(int f,int n)610 cntnonmatchlines(int f, int n)
611 {
612 int s;
613
614 if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
615 return (s);
616 s = countmatches(FALSE);
617
618 return (s);
619 }
620
621 /*
622 * This function does the work of counting matching lines.
623 */
624 int
countmatches(int cond)625 countmatches(int cond)
626 {
627 int error;
628 int count = 0;
629 struct line *clp;
630
631 clp = curwp->w_dotp;
632 if (curwp->w_doto == llength(clp))
633 /* Consider dot on next line */
634 clp = lforw(clp);
635
636 while (clp != (curbp->b_headp)) {
637 /* see if line matches */
638 regex_match[0].rm_so = 0;
639 regex_match[0].rm_eo = llength(clp);
640 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "",
641 RE_NMATCH, regex_match, REG_STARTEND);
642
643 /* Count line when appropriate */
644 if ((cond == FALSE && error) || (cond == TRUE && !error))
645 count++;
646 clp = lforw(clp);
647 }
648
649 if (cond)
650 ewprintf("Number of lines matching: %d", count);
651 else
652 ewprintf("Number of lines not matching: %d", count);
653
654 return (TRUE);
655 }
656 #endif /* REGEX */
657