xref: /openbsd/usr.bin/mg/re_search.c (revision 5b133f3f)
1 /*	$OpenBSD: re_search.c,v 1.37 2023/03/08 04:43:11 guenther Exp $	*/
2 
3 /* This file is in the public domain. */
4 
5 /*
6  *	regular expression search commands for Mg
7  *
8  * This file contains functions to implement several of gnuemacs's regular
9  * expression functions for Mg.  Several of the routines below are just minor
10  * re-arrangements of Mg's non-regular expression search functions.  Some of
11  * them are similar in structure to the original MicroEMACS, others are
12  * modifications of Rich Ellison's code.  Peter Newton re-wrote about half of
13  * them from scratch.
14  */
15 
16 #ifdef REGEX
17 #include <sys/queue.h>
18 #include <sys/types.h>
19 #include <regex.h>
20 #include <signal.h>
21 #include <stdio.h>
22 #include <string.h>
23 
24 #include "def.h"
25 #include "macro.h"
26 
27 #define SRCH_BEGIN	(0)		/* search sub-codes		    */
28 #define SRCH_FORW	(-1)
29 #define SRCH_BACK	(-2)
30 #define SRCH_NOPR	(-3)
31 #define SRCH_ACCM	(-4)
32 #define SRCH_MARK	(-5)
33 
34 #define RE_NMATCH	10		/* max number of matches	    */
35 #define REPLEN		256		/* max length of replacement string */
36 
37 char	re_pat[NPAT];			/* regex pattern		    */
38 int	re_srch_lastdir = SRCH_NOPR;	/* last search flags		    */
39 int	casefoldsearch = TRUE;		/* does search ignore case?	    */
40 
41 static int	 re_doreplace(RSIZE, char *);
42 static int	 re_forwsrch(void);
43 static int	 re_backsrch(void);
44 static int	 re_readpattern(char *);
45 static int	 killmatches(int);
46 static int	 countmatches(int);
47 
48 /*
49  * Search forward.
50  * Get a search string from the user and search for it starting at ".".  If
51  * found, move "." to just after the matched characters.  display does all
52  * the hard stuff.  If not found, it just prints a message.
53  */
54 int
re_forwsearch(int f,int n)55 re_forwsearch(int f, int n)
56 {
57 	int	s;
58 
59 	if ((s = re_readpattern("RE Search")) != TRUE)
60 		return (s);
61 	if (re_forwsrch() == FALSE) {
62 		dobeep();
63 		ewprintf("Search failed: \"%s\"", re_pat);
64 		return (FALSE);
65 	}
66 	re_srch_lastdir = SRCH_FORW;
67 	return (TRUE);
68 }
69 
70 /*
71  * Reverse search.
72  * Get a search string from the user, and search, starting at "."
73  * and proceeding toward the front of the buffer. If found "." is left
74  * pointing at the first character of the pattern [the last character that
75  * was matched].
76  */
77 int
re_backsearch(int f,int n)78 re_backsearch(int f, int n)
79 {
80 	int	s;
81 
82 	if ((s = re_readpattern("RE Search backward")) != TRUE)
83 		return (s);
84 	if (re_backsrch() == FALSE) {
85 		dobeep();
86 		ewprintf("Search failed: \"%s\"", re_pat);
87 		return (FALSE);
88 	}
89 	re_srch_lastdir = SRCH_BACK;
90 	return (TRUE);
91 }
92 
93 /*
94  * Search again, using the same search string and direction as the last search
95  * command.  The direction has been saved in "srch_lastdir", so you know which
96  * way to go.
97  *
98  * XXX: This code has problems -- some incompatibility(?) with extend.c causes
99  * match to fail when it should not.
100  */
101 int
re_searchagain(int f,int n)102 re_searchagain(int f, int n)
103 {
104 	if (re_srch_lastdir == SRCH_NOPR) {
105 		dobeep();
106 		ewprintf("No last search");
107 		return (FALSE);
108 	}
109 	if (re_srch_lastdir == SRCH_FORW) {
110 		if (re_forwsrch() == FALSE) {
111 			dobeep();
112 			ewprintf("Search failed: \"%s\"", re_pat);
113 			return (FALSE);
114 		}
115 		return (TRUE);
116 	}
117 	if (re_srch_lastdir == SRCH_BACK)
118 		if (re_backsrch() == FALSE) {
119 			dobeep();
120 			ewprintf("Search failed: \"%s\"", re_pat);
121 			return (FALSE);
122 		}
123 
124 	return (TRUE);
125 }
126 
127 /* Compiled regex goes here-- changed only when new pattern read */
128 static regex_t		regex_buff;
129 static regmatch_t	regex_match[RE_NMATCH];
130 
131 /*
132  * Re-Query Replace.
133  *	Replace strings selectively.  Does a search and replace operation.
134  */
135 int
re_queryrepl(int f,int n)136 re_queryrepl(int f, int n)
137 {
138 	int	rcnt = 0;		/* replacements made so far	*/
139 	int	plen, s;		/* length of found string	*/
140 	char	news[NPAT];		/* replacement string		*/
141 
142 	if ((s = re_readpattern("RE Query replace")) != TRUE)
143 		return (s);
144 	if (eread("Query replace %s with: ", news, NPAT,
145 	    EFNUL | EFNEW | EFCR, re_pat) == NULL)
146 		return (ABORT);
147 	ewprintf("Query replacing %s with %s:", re_pat, news);
148 
149 	/*
150 	 * Search forward repeatedly, checking each time whether to insert
151 	 * or not.  The "!" case makes the check always true, so it gets put
152 	 * into a tighter loop for efficiency.
153 	 */
154 	while (re_forwsrch() == TRUE) {
155 retry:
156 		update(CMODE);
157 		switch (getkey(FALSE)) {
158 		case ' ':
159 			plen = regex_match[0].rm_eo - regex_match[0].rm_so;
160 			if (re_doreplace((RSIZE)plen, news) == FALSE)
161 				return (FALSE);
162 			rcnt++;
163 			break;
164 
165 		case '.':
166 			plen = regex_match[0].rm_eo - regex_match[0].rm_so;
167 			if (re_doreplace((RSIZE)plen, news) == FALSE)
168 				return (FALSE);
169 			rcnt++;
170 			goto stopsearch;
171 
172 		case CCHR('G'):				/* ^G */
173 			(void)ctrlg(FFRAND, 0);
174 			goto stopsearch;
175 		case CCHR('['):				/* ESC */
176 		case '`':
177 			goto stopsearch;
178 		case '!':
179 			do {
180 				plen = regex_match[0].rm_eo - regex_match[0].rm_so;
181 				if (re_doreplace((RSIZE)plen, news) == FALSE)
182 					return (FALSE);
183 				rcnt++;
184 			} while (re_forwsrch() == TRUE);
185 			goto stopsearch;
186 
187 		case CCHR('?'):				/* To not replace */
188 			break;
189 
190 		default:
191 			ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit");
192 			goto retry;
193 		}
194 	}
195 
196 stopsearch:
197 	curwp->w_rflag |= WFFULL;
198 	update(CMODE);
199 	if (!inmacro) {
200 		if (rcnt == 0)
201 			ewprintf("(No replacements done)");
202 		else if (rcnt == 1)
203 			ewprintf("(1 replacement done)");
204 		else
205 			ewprintf("(%d replacements done)", rcnt);
206 	}
207 	return (TRUE);
208 }
209 
210 int
re_repl(int f,int n)211 re_repl(int f, int n)
212 {
213 	int     rcnt = 0;		/* replacements made so far     */
214 	int     plen, s;		/* length of found string       */
215 	char    news[NPAT];		/* replacement string           */
216 
217 	if ((s = re_readpattern("RE Replace")) != TRUE)
218 		return (s);
219 	if (eread("Replace %s with: ", news, NPAT,
220 	    EFNUL | EFNEW | EFCR, re_pat) == NULL)
221                 return (ABORT);
222 
223 	while (re_forwsrch() == TRUE) {
224 		plen = regex_match[0].rm_eo - regex_match[0].rm_so;
225 		if (re_doreplace((RSIZE)plen, news) == FALSE)
226 			return (FALSE);
227 		rcnt++;
228 	}
229 
230 	curwp->w_rflag |= WFFULL;
231 	update(CMODE);
232 	if (!inmacro)
233 		ewprintf("(%d replacement(s) done)", rcnt);
234 
235 	return(TRUE);
236 }
237 
238 /*
239  * Routine re_doreplace calls lreplace to make replacements needed by
240  * re_query replace.  Its reason for existence is to deal with \1, \2. etc.
241  *  plen: length to remove
242  *  st:   replacement string
243  */
244 static int
re_doreplace(RSIZE plen,char * st)245 re_doreplace(RSIZE plen, char *st)
246 {
247 	int	 j, k, s, more, num, state;
248 	struct line	*clp;
249 	char	 repstr[REPLEN];
250 
251 	clp = curwp->w_dotp;
252 	more = TRUE;
253 	j = 0;
254 	state = 0;
255 	num = 0;
256 
257 	/* The following FSA parses the replacement string */
258 	while (more) {
259 		switch (state) {
260 		case 0:
261 			if (*st == '\\') {
262 				st++;
263 				state = 1;
264 			} else if (*st == '\0')
265 				more = FALSE;
266 			else {
267 				repstr[j] = *st;
268 				j++;
269 				if (j >= REPLEN)
270 					return (FALSE);
271 				st++;
272 			}
273 			break;
274 		case 1:
275 			if (*st >= '0' && *st <= '9') {
276 				num = *st - '0';
277 				st++;
278 				state = 2;
279 			} else if (*st == '\0')
280 				more = FALSE;
281 			else {
282 				repstr[j] = *st;
283 				j++;
284 				if (j >= REPLEN)
285 					return (FALSE);
286 				st++;
287 				state = 0;
288 			}
289 			break;
290 		case 2:
291 			if (*st >= '0' && *st <= '9') {
292 				num = 10 * num + *st - '0';
293 				st++;
294 			} else {
295 				if (num >= RE_NMATCH)
296 					return (FALSE);
297 				k = regex_match[num].rm_eo - regex_match[num].rm_so;
298 				if (j + k >= REPLEN)
299 					return (FALSE);
300 				bcopy(&(clp->l_text[regex_match[num].rm_so]),
301 				    &repstr[j], k);
302 				j += k;
303 				if (*st == '\0')
304 					more = FALSE;
305 				if (*st == '\\') {
306 					st++;
307 					state = 1;
308 				} else {
309 					repstr[j] = *st;
310 					j++;
311 					if (j >= REPLEN)
312 						return (FALSE);
313 					st++;
314 					state = 0;
315 				}
316 			}
317 			break;
318 		}		/* switch (state) */
319 	}			/* while (more)   */
320 
321 	repstr[j] = '\0';
322 	s = lreplace(plen, repstr);
323 	return (s);
324 }
325 
326 /*
327  * This routine does the real work of a forward search.  The pattern is
328  * sitting in the external variable "pat".  If found, dot is updated, the
329  * window system is notified of the change, and TRUE is returned.  If the
330  * string isn't found, FALSE is returned.
331  */
332 static int
re_forwsrch(void)333 re_forwsrch(void)
334 {
335 	int	 	 re_flags, tbo, tdotline, error;
336 	struct line	*clp;
337 
338 	clp = curwp->w_dotp;
339 	tbo = curwp->w_doto;
340 	tdotline = curwp->w_dotline;
341 
342 	if (tbo == clp->l_used)
343 		/*
344 		 * Don't start matching past end of line -- must move to
345 		 * beginning of next line, unless line is empty or at
346 		 * end of file.
347 		 */
348 		if (clp != curbp->b_headp && llength(clp) != 0) {
349 			clp = lforw(clp);
350 			tdotline++;
351 			tbo = 0;
352 		}
353 	/*
354 	 * Note this loop does not process the last line, but this editor
355 	 * always makes the last line empty so this is good.
356 	 */
357 	while (clp != (curbp->b_headp)) {
358 		re_flags = REG_STARTEND;
359 		if (tbo != 0)
360 			re_flags |= REG_NOTBOL;
361 		regex_match[0].rm_so = tbo;
362 		regex_match[0].rm_eo = llength(clp);
363 		error = regexec(&regex_buff, ltext(clp) ? ltext(clp) : "",
364 		    RE_NMATCH, regex_match, re_flags);
365 		if (error != 0) {
366 			clp = lforw(clp);
367 			tdotline++;
368 			tbo = 0;
369 		} else {
370 			curwp->w_doto = regex_match[0].rm_eo;
371 			curwp->w_dotp = clp;
372 			curwp->w_dotline = tdotline;
373 			curwp->w_rflag |= WFMOVE;
374 			return (TRUE);
375 		}
376 	}
377 	return (FALSE);
378 }
379 
380 /*
381  * This routine does the real work of a backward search.  The pattern is sitting
382  * in the external variable "re_pat".  If found, dot is updated, the window
383  * system is notified of the change, and TRUE is returned.  If the string isn't
384  * found, FALSE is returned.
385  */
386 static int
re_backsrch(void)387 re_backsrch(void)
388 {
389 	struct line		*clp;
390 	int		 tbo, tdotline;
391 	regmatch_t	 lastmatch;
392 
393 	clp = curwp->w_dotp;
394 	tbo = curwp->w_doto;
395 	tdotline = curwp->w_dotline;
396 
397 	/* Start search one position to the left of dot */
398 	tbo = tbo - 1;
399 	if (tbo < 0) {
400 		/* must move up one line */
401 		clp = lback(clp);
402 		tdotline--;
403 		tbo = llength(clp);
404 	}
405 
406 	/*
407 	 * Note this loop does not process the last line, but this editor
408 	 * always makes the last line empty so this is good.
409 	 */
410 	while (clp != (curbp->b_headp)) {
411 		regex_match[0].rm_so = 0;
412 		regex_match[0].rm_eo = llength(clp);
413 		lastmatch.rm_so = -1;
414 		/*
415 		 * Keep searching until we don't match any longer.  Assumes a
416 		 * non-match does not modify the regex_match array.  We have to
417 		 * do this character-by-character after the first match since
418 		 * POSIX regexps don't give you a way to do reverse matches.
419 		 */
420 		while (!regexec(&regex_buff, ltext(clp) ? ltext(clp) : "",
421 		    RE_NMATCH, regex_match, REG_STARTEND) &&
422 		    regex_match[0].rm_so <= tbo) {
423 			memcpy(&lastmatch, &regex_match[0], sizeof(regmatch_t));
424 			regex_match[0].rm_so++;
425 			regex_match[0].rm_eo = llength(clp);
426 		}
427 		if (lastmatch.rm_so == -1) {
428 			clp = lback(clp);
429 			tdotline--;
430 			tbo = llength(clp);
431 		} else {
432 			memcpy(&regex_match[0], &lastmatch, sizeof(regmatch_t));
433 			curwp->w_doto = regex_match[0].rm_so;
434 			curwp->w_dotp = clp;
435 			curwp->w_dotline = tdotline;
436 			curwp->w_rflag |= WFMOVE;
437 			return (TRUE);
438 		}
439 	}
440 	return (FALSE);
441 }
442 
443 /*
444  * Read a pattern.
445  * Stash it in the external variable "re_pat". The "pat" is
446  * not updated if the user types in an empty line. If the user typed
447  * an empty line, and there is no old pattern, it is an error.
448  * Display the old pattern, in the style of Jeff Lomicka. There is
449  * some do-it-yourself control expansion.
450  */
451 static int
re_readpattern(char * re_prompt)452 re_readpattern(char *re_prompt)
453 {
454 	static int	dofree = 0;
455 	int		flags, error, s;
456 	char		tpat[NPAT], *rep;
457 
458 	if (re_pat[0] == '\0')
459 		rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt);
460 	else
461 		rep = eread("%s (default %s): ", tpat, NPAT,
462 		    EFNUL | EFNEW | EFCR, re_prompt, re_pat);
463 	if (rep == NULL)
464 		return (ABORT);
465 	if (rep[0] != '\0') {
466 		/* New pattern given */
467 		(void)strlcpy(re_pat, tpat, sizeof(re_pat));
468 		if (casefoldsearch)
469 			flags = REG_EXTENDED | REG_ICASE;
470 		else
471 			flags = REG_EXTENDED;
472 		if (dofree)
473 			regfree(&regex_buff);
474 		error = regcomp(&regex_buff, re_pat, flags);
475 		if (error != 0) {
476 			char	message[256];
477 			regerror(error, &regex_buff, message, sizeof(message));
478 			dobeep();
479 			ewprintf("Regex Error: %s", message);
480 			re_pat[0] = '\0';
481 			return (FALSE);
482 		}
483 		dofree = 1;
484 		s = TRUE;
485 	} else if (rep[0] == '\0' && re_pat[0] != '\0')
486 		/* Just using old pattern */
487 		s = TRUE;
488 	else
489 		s = FALSE;
490 	return (s);
491 }
492 
493 /*
494  * Cause case to not matter in searches.  This is the default.	If called
495  * with argument cause case to matter.
496  */
497 int
setcasefold(int f,int n)498 setcasefold(int f, int n)
499 {
500 	if (f & FFARG) {
501 		casefoldsearch = FALSE;
502 		ewprintf("Case-fold-search unset");
503 	} else {
504 		casefoldsearch = TRUE;
505 		ewprintf("Case-fold-search set");
506 	}
507 
508 	/*
509 	 * Invalidate the regular expression pattern since I'm too lazy to
510 	 * recompile it.
511 	 */
512 	re_pat[0] = '\0';
513 	return (TRUE);
514 }
515 
516 /*
517  * Delete all lines after dot that contain a string matching regex.
518  */
519 int
delmatchlines(int f,int n)520 delmatchlines(int f, int n)
521 {
522 	int	s;
523 
524 	if ((s = re_readpattern("Flush lines (containing match for regexp)"))
525 	    != TRUE)
526 		return (s);
527 
528 	s = killmatches(TRUE);
529 	return (s);
530 }
531 
532 /*
533  * Delete all lines after dot that don't contain a string matching regex.
534  */
535 int
delnonmatchlines(int f,int n)536 delnonmatchlines(int f, int n)
537 {
538 	int	s;
539 
540 	if ((s = re_readpattern("Keep lines (containing match for regexp)"))
541 	    != TRUE)
542 		return (s);
543 
544 	s = killmatches(FALSE);
545 	return (s);
546 }
547 
548 /*
549  * This function does the work of deleting matching lines.
550  */
551 static int
killmatches(int cond)552 killmatches(int cond)
553 {
554 	int	 s, error;
555 	int	 count = 0;
556 	struct line	*clp;
557 
558 	clp = curwp->w_dotp;
559 	if (curwp->w_doto == llength(clp))
560 		/* Consider dot on next line */
561 		clp = lforw(clp);
562 
563 	while (clp != (curbp->b_headp)) {
564 		/* see if line matches */
565 		regex_match[0].rm_so = 0;
566 		regex_match[0].rm_eo = llength(clp);
567 		error = regexec(&regex_buff, ltext(clp) ? ltext(clp) : "",
568 		    RE_NMATCH, regex_match, REG_STARTEND);
569 
570 		/* Delete line when appropriate */
571 		if ((cond == FALSE && error) || (cond == TRUE && !error)) {
572 			curwp->w_doto = 0;
573 			curwp->w_dotp = clp;
574 			count++;
575 			s = ldelete(llength(clp) + 1, KNONE);
576 			clp = curwp->w_dotp;
577 			curwp->w_rflag |= WFMOVE;
578 			if (s == FALSE)
579 				return (FALSE);
580 		} else
581 			clp = lforw(clp);
582 	}
583 
584 	ewprintf("%d line(s) deleted", count);
585 	if (count > 0)
586 		curwp->w_rflag |= WFMOVE;
587 
588 	return (TRUE);
589 }
590 
591 /*
592  * Count lines matching regex.
593  */
594 int
cntmatchlines(int f,int n)595 cntmatchlines(int f, int n)
596 {
597 	int	s;
598 
599 	if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE)
600 		return (s);
601 	s = countmatches(TRUE);
602 
603 	return (s);
604 }
605 
606 /*
607  * Count lines that fail to match regex.
608  */
609 int
cntnonmatchlines(int f,int n)610 cntnonmatchlines(int f, int n)
611 {
612 	int	s;
613 
614 	if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE)
615 		return (s);
616 	s = countmatches(FALSE);
617 
618 	return (s);
619 }
620 
621 /*
622  * This function does the work of counting matching lines.
623  */
624 int
countmatches(int cond)625 countmatches(int cond)
626 {
627 	int	 error;
628 	int	 count = 0;
629 	struct line	*clp;
630 
631 	clp = curwp->w_dotp;
632 	if (curwp->w_doto == llength(clp))
633 		/* Consider dot on next line */
634 		clp = lforw(clp);
635 
636 	while (clp != (curbp->b_headp)) {
637 		/* see if line matches */
638 		regex_match[0].rm_so = 0;
639 		regex_match[0].rm_eo = llength(clp);
640 		error = regexec(&regex_buff, ltext(clp) ? ltext(clp) : "",
641 		    RE_NMATCH, regex_match, REG_STARTEND);
642 
643 		/* Count line when appropriate */
644 		if ((cond == FALSE && error) || (cond == TRUE && !error))
645 			count++;
646 		clp = lforw(clp);
647 	}
648 
649 	if (cond)
650 		ewprintf("Number of lines matching: %d", count);
651 	else
652 		ewprintf("Number of lines not matching: %d", count);
653 
654 	return (TRUE);
655 }
656 #endif	/* REGEX */
657