xref: /openbsd/usr.bin/vi/common/search.c (revision 4cfece93)
1 /*	$OpenBSD: search.c,v 1.14 2016/08/14 21:47:16 guenther Exp $	*/
2 
3 /*-
4  * Copyright (c) 1992, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 1992, 1993, 1994, 1995, 1996
7  *	Keith Bostic.  All rights reserved.
8  *
9  * See the LICENSE file for redistribution information.
10  */
11 
12 #include "config.h"
13 
14 #include <sys/types.h>
15 #include <sys/queue.h>
16 
17 #include <bitstring.h>
18 #include <ctype.h>
19 #include <errno.h>
20 #include <limits.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 
26 #include "common.h"
27 
28 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
29 
30 static void	search_msg(SCR *, smsg_t);
31 static int	search_init(SCR *, dir_t, char *, size_t, char **, u_int);
32 
33 /*
34  * search_init --
35  *	Set up a search.
36  */
37 static int
38 search_init(SCR *sp, dir_t dir, char *ptrn, size_t plen, char **epp,
39     u_int flags)
40 {
41 	recno_t lno;
42 	int delim;
43 	char *p, *t;
44 
45 	/* If the file is empty, it's a fast search. */
46 	if (sp->lno <= 1) {
47 		if (db_last(sp, &lno))
48 			return (1);
49 		if (lno == 0) {
50 			if (LF_ISSET(SEARCH_MSG))
51 				search_msg(sp, S_EMPTY);
52 			return (1);
53 		}
54 	}
55 
56 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
57 		/*
58 		 * Use the saved pattern if no pattern specified, or if only
59 		 * one or two delimiter characters specified.
60 		 *
61 		 * !!!
62 		 * Historically, only the pattern itself was saved, vi didn't
63 		 * preserve addressing or delta information.
64 		 */
65 		if (ptrn == NULL)
66 			goto prev;
67 		if (plen == 1) {
68 			if (epp != NULL)
69 				*epp = ptrn + 1;
70 			goto prev;
71 		}
72 		if (ptrn[0] == ptrn[1]) {
73 			if (epp != NULL)
74 				*epp = ptrn + 2;
75 
76 			/* Complain if we don't have a previous pattern. */
77 prev:			if (sp->re == NULL) {
78 				search_msg(sp, S_NOPREV);
79 				return (1);
80 			}
81 			/* Re-compile the search pattern if necessary. */
82 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
83 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
84 			    RE_C_SEARCH |
85 			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
86 				return (1);
87 
88 			/* Set the search direction. */
89 			if (LF_ISSET(SEARCH_SET))
90 				sp->searchdir = dir;
91 			return (0);
92 		}
93 
94 		/*
95 		 * Set the delimiter, and move forward to the terminating
96 		 * delimiter, handling escaped delimiters.
97 		 *
98 		 * QUOTING NOTE:
99 		 * Only discard an escape character if it escapes a delimiter.
100 		 */
101 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
102 			if (--plen == 0 || p[0] == delim) {
103 				if (plen != 0)
104 					++p;
105 				break;
106 			}
107 			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
108 				++p;
109 				--plen;
110 			}
111 		}
112 		if (epp != NULL)
113 			*epp = p;
114 
115 		plen = t - ptrn;
116 	}
117 
118 	/* Compile the RE. */
119 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
120 	    RE_C_SEARCH |
121 	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
122 	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0)))
123 		return (1);
124 
125 	/* Set the search direction. */
126 	if (LF_ISSET(SEARCH_SET))
127 		sp->searchdir = dir;
128 
129 	return (0);
130 }
131 
132 /*
133  * f_search --
134  *	Do a forward search.
135  *
136  * PUBLIC: int f_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
137  */
138 int
139 f_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn,
140     u_int flags)
141 {
142 	busy_t btype;
143 	recno_t lno;
144 	regmatch_t match[1];
145 	size_t coff, len;
146 	int cnt, eval, rval, wrapped = 0;
147 	char *l;
148 
149 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
150 		return (1);
151 
152 	if (LF_ISSET(SEARCH_FILE)) {
153 		lno = 1;
154 		coff = 0;
155 	} else {
156 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
157 			return (1);
158 		lno = fm->lno;
159 
160 		/*
161 		 * If doing incremental search, start searching at the previous
162 		 * column, so that we search a minimal distance and still match
163 		 * special patterns, e.g., \< for beginning of a word.
164 		 *
165 		 * Otherwise, start searching immediately after the cursor.  If
166 		 * at the end of the line, start searching on the next line.
167 		 * This is incompatible (read bug fix) with the historic vi --
168 		 * searches for the '$' pattern never moved forward, and the
169 		 * "-t foo" didn't work if the 'f' was the first character in
170 		 * the file.
171 		 */
172 		if (LF_ISSET(SEARCH_INCR)) {
173 			if ((coff = fm->cno) != 0)
174 				--coff;
175 		} else if (fm->cno + 1 >= len) {
176 			coff = 0;
177 			lno = fm->lno + 1;
178 			if (db_get(sp, lno, 0, &l, &len)) {
179 				if (!O_ISSET(sp, O_WRAPSCAN)) {
180 					if (LF_ISSET(SEARCH_MSG))
181 						search_msg(sp, S_EOF);
182 					return (1);
183 				}
184 				lno = 1;
185 				wrapped = 1;
186 			}
187 		} else
188 			coff = fm->cno + 1;
189 	}
190 
191 	btype = BUSY_ON;
192 	for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) {
193 		if (cnt-- == 0) {
194 			if (INTERRUPTED(sp))
195 				break;
196 			if (LF_ISSET(SEARCH_MSG)) {
197 				search_busy(sp, btype);
198 				btype = BUSY_UPDATE;
199 			}
200 			cnt = INTERRUPT_CHECK;
201 		}
202 		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
203 			if (wrapped) {
204 				if (LF_ISSET(SEARCH_MSG))
205 					search_msg(sp, S_NOTFOUND);
206 				break;
207 			}
208 			if (!O_ISSET(sp, O_WRAPSCAN)) {
209 				if (LF_ISSET(SEARCH_MSG))
210 					search_msg(sp, S_EOF);
211 				break;
212 			}
213 			lno = 0;
214 			wrapped = 1;
215 			continue;
216 		}
217 
218 		/* If already at EOL, just keep going. */
219 		if (len != 0 && coff == len)
220 			continue;
221 
222 		/* Set the termination. */
223 		match[0].rm_so = coff;
224 		match[0].rm_eo = len;
225 
226 		/* Search the line. */
227 		eval = regexec(&sp->re_c, l, 1, match,
228 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
229 		if (eval == REG_NOMATCH)
230 			continue;
231 		if (eval != 0) {
232 			if (LF_ISSET(SEARCH_MSG))
233 				re_error(sp, eval, &sp->re_c);
234 			else
235 				(void)sp->gp->scr_bell(sp);
236 			break;
237 		}
238 
239 		/* Warn if the search wrapped. */
240 		if (wrapped && LF_ISSET(SEARCH_WMSG))
241 			search_msg(sp, S_WRAP);
242 
243 		rm->lno = lno;
244 		rm->cno = match[0].rm_so;
245 
246 		/*
247 		 * If a change command, it's possible to move beyond the end
248 		 * of a line.  Historic vi generally got this wrong (e.g. try
249 		 * "c?$<cr>").  Not all that sure this gets it right, there
250 		 * are lots of strange cases.
251 		 */
252 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
253 			rm->cno = len != 0 ? len - 1 : 0;
254 
255 		rval = 0;
256 		break;
257 	}
258 
259 	if (LF_ISSET(SEARCH_MSG))
260 		search_busy(sp, BUSY_OFF);
261 	return (rval);
262 }
263 
264 /*
265  * b_search --
266  *	Do a backward search.
267  *
268  * PUBLIC: int b_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
269  */
270 int
271 b_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn,
272     u_int flags)
273 {
274 	busy_t btype;
275 	recno_t lno;
276 	regmatch_t match[1];
277 	size_t coff, last, len;
278 	int cnt, eval, rval, wrapped;
279 	char *l;
280 
281 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
282 		return (1);
283 
284 	/*
285 	 * If doing incremental search, set the "starting" position past the
286 	 * current column, so that we search a minimal distance and still
287 	 * match special patterns, e.g., \> for the end of a word.  This is
288 	 * safe when the cursor is at the end of a line because we only use
289 	 * it for comparison with the location of the match.
290 	 *
291 	 * Otherwise, start searching immediately before the cursor.  If in
292 	 * the first column, start search on the previous line.
293 	 */
294 	if (LF_ISSET(SEARCH_INCR)) {
295 		lno = fm->lno;
296 		coff = fm->cno + 1;
297 	} else {
298 		if (fm->cno == 0) {
299 			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
300 				if (LF_ISSET(SEARCH_MSG))
301 					search_msg(sp, S_SOF);
302 				return (1);
303 			}
304 			lno = fm->lno - 1;
305 		} else
306 			lno = fm->lno;
307 		coff = fm->cno;
308 	}
309 
310 	btype = BUSY_ON;
311 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
312 		if (cnt-- == 0) {
313 			if (INTERRUPTED(sp))
314 				break;
315 			if (LF_ISSET(SEARCH_MSG)) {
316 				search_busy(sp, btype);
317 				btype = BUSY_UPDATE;
318 			}
319 			cnt = INTERRUPT_CHECK;
320 		}
321 		if ((wrapped && lno < fm->lno) || lno == 0) {
322 			if (wrapped) {
323 				if (LF_ISSET(SEARCH_MSG))
324 					search_msg(sp, S_NOTFOUND);
325 				break;
326 			}
327 			if (!O_ISSET(sp, O_WRAPSCAN)) {
328 				if (LF_ISSET(SEARCH_MSG))
329 					search_msg(sp, S_SOF);
330 				break;
331 			}
332 			if (db_last(sp, &lno))
333 				break;
334 			if (lno == 0) {
335 				if (LF_ISSET(SEARCH_MSG))
336 					search_msg(sp, S_EMPTY);
337 				break;
338 			}
339 			++lno;
340 			wrapped = 1;
341 			continue;
342 		}
343 
344 		if (db_get(sp, lno, 0, &l, &len))
345 			break;
346 
347 		/* Set the termination. */
348 		match[0].rm_so = 0;
349 		match[0].rm_eo = len;
350 
351 		/* Search the line. */
352 		eval = regexec(&sp->re_c, l, 1, match,
353 		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
354 		if (eval == REG_NOMATCH)
355 			continue;
356 		if (eval != 0) {
357 			if (LF_ISSET(SEARCH_MSG))
358 				re_error(sp, eval, &sp->re_c);
359 			else
360 				(void)sp->gp->scr_bell(sp);
361 			break;
362 		}
363 
364 		/* Check for a match starting past the cursor. */
365 		if (coff != 0 && match[0].rm_so >= coff)
366 			continue;
367 
368 		/* Warn if the search wrapped. */
369 		if (wrapped && LF_ISSET(SEARCH_WMSG))
370 			search_msg(sp, S_WRAP);
371 
372 		/*
373 		 * We now have the first match on the line.  Step through the
374 		 * line character by character until find the last acceptable
375 		 * match.  This is painful, we need a better interface to regex
376 		 * to make this work.
377 		 */
378 		for (;;) {
379 			last = match[0].rm_so++;
380 			if (match[0].rm_so >= len)
381 				break;
382 			match[0].rm_eo = len;
383 			eval = regexec(&sp->re_c, l, 1, match,
384 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
385 			    REG_STARTEND);
386 			if (eval == REG_NOMATCH)
387 				break;
388 			if (eval != 0) {
389 				if (LF_ISSET(SEARCH_MSG))
390 					re_error(sp, eval, &sp->re_c);
391 				else
392 					(void)sp->gp->scr_bell(sp);
393 				goto err;
394 			}
395 			if (coff && match[0].rm_so >= coff)
396 				break;
397 		}
398 		rm->lno = lno;
399 
400 		/* See comment in f_search(). */
401 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
402 			rm->cno = len != 0 ? len - 1 : 0;
403 		else
404 			rm->cno = last;
405 		rval = 0;
406 		break;
407 	}
408 
409 err:	if (LF_ISSET(SEARCH_MSG))
410 		search_busy(sp, BUSY_OFF);
411 	return (rval);
412 }
413 
414 /*
415  * search_msg --
416  *	Display one of the search messages.
417  */
418 static void
419 search_msg(SCR *sp, smsg_t msg)
420 {
421 	switch (msg) {
422 	case S_EMPTY:
423 		msgq(sp, M_ERR, "File empty; nothing to search");
424 		break;
425 	case S_EOF:
426 		msgq(sp, M_ERR,
427 		    "Reached end-of-file without finding the pattern");
428 		break;
429 	case S_NOPREV:
430 		msgq(sp, M_ERR, "No previous search pattern");
431 		break;
432 	case S_NOTFOUND:
433 		msgq(sp, M_ERR, "Pattern not found");
434 		break;
435 	case S_SOF:
436 		msgq(sp, M_ERR,
437 		    "Reached top-of-file without finding the pattern");
438 		break;
439 	case S_WRAP:
440 		msgq(sp, M_ERR, "Search wrapped");
441 		break;
442 	default:
443 		abort();
444 	}
445 }
446 
447 /*
448  * search_busy --
449  *	Put up the busy searching message.
450  *
451  * PUBLIC: void search_busy(SCR *, busy_t);
452  */
453 void
454 search_busy(SCR *sp, busy_t btype)
455 {
456 	sp->gp->scr_busy(sp, "Searching...", btype);
457 }
458