xref: /minix/external/bsd/nvi/dist/common/search.c (revision 84d9c625)
1 /*	$NetBSD: search.c,v 1.2 2013/11/22 15:52:05 christos Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #ifndef lint
14 static const char sccsid[] = "Id: search.c,v 10.31 2001/06/25 15:19:12 skimo Exp  (Berkeley) Date: 2001/06/25 15:19:12 ";
15 #endif /* not lint */
16 
17 #include <sys/types.h>
18 #include <sys/queue.h>
19 
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 
29 #include "common.h"
30 
31 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
32 
33 static void	search_msg __P((SCR *, smsg_t));
34 static int	search_init __P((SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int));
35 
36 /*
37  * search_init --
38  *	Set up a search.
39  */
40 static int
41 search_init(SCR *sp, dir_t dir, CHAR_T *ptrn, size_t plen, CHAR_T **epp, u_int flags)
42 {
43 	db_recno_t lno;
44 	int delim;
45 	CHAR_T *p, *t;
46 
47 	/* If the file is empty, it's a fast search. */
48 	if (sp->lno <= 1) {
49 		if (db_last(sp, &lno))
50 			return (1);
51 		if (lno == 0) {
52 			if (LF_ISSET(SEARCH_MSG))
53 				search_msg(sp, S_EMPTY);
54 			return (1);
55 		}
56 	}
57 
58 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
59 		/*
60 		 * Use the saved pattern if no pattern specified, or if only
61 		 * one or two delimiter characters specified.
62 		 *
63 		 * !!!
64 		 * Historically, only the pattern itself was saved, vi didn't
65 		 * preserve addressing or delta information.
66 		 */
67 		if (ptrn == NULL)
68 			goto prev;
69 		if (plen == 1) {
70 			if (epp != NULL)
71 				*epp = ptrn + 1;
72 			goto prev;
73 		}
74 		if (ptrn[0] == ptrn[1]) {
75 			if (epp != NULL)
76 				*epp = ptrn + 2;
77 
78 			/* Complain if we don't have a previous pattern. */
79 prev:			if (sp->re == NULL) {
80 				search_msg(sp, S_NOPREV);
81 				return (1);
82 			}
83 			/* Re-compile the search pattern if necessary. */
84 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
85 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
86 			    SEARCH_CSEARCH | SEARCH_MSG))
87 				return (1);
88 
89 			/* Set the search direction. */
90 			if (LF_ISSET(SEARCH_SET))
91 				sp->searchdir = dir;
92 			return (0);
93 		}
94 
95 		/*
96 		 * Set the delimiter, and move forward to the terminating
97 		 * delimiter, handling escaped delimiters.
98 		 *
99 		 * QUOTING NOTE:
100 		 * Only discard an escape character if it escapes a delimiter.
101 		 */
102 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
103 			if (--plen == 0 || p[0] == delim) {
104 				if (plen != 0)
105 					++p;
106 				break;
107 			}
108 			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
109 				++p;
110 				--plen;
111 			}
112 		}
113 		if (epp != NULL)
114 			*epp = p;
115 
116 		plen = t - ptrn;
117 	}
118 
119 	/* Compile the RE. */
120 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
121 	    SEARCH_CSEARCH | LF_ISSET(SEARCH_CSCOPE | SEARCH_EXTEND |
122 	    SEARCH_IC | SEARCH_LITERAL | SEARCH_MSG | SEARCH_TAG)))
123 		return (1);
124 
125 	/* Set the search direction. */
126 	if (LF_ISSET(SEARCH_SET))
127 		sp->searchdir = dir;
128 
129 	return (0);
130 }
131 
132 /*
133  * f_search --
134  *	Do a forward search.
135  *
136  * PUBLIC: int f_search __P((SCR *,
137  * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
138  */
139 int
140 f_search(SCR *sp, MARK *fm, MARK *rm, CHAR_T *ptrn, size_t plen, CHAR_T **eptrn, u_int flags)
141 {
142 	busy_t btype;
143 	db_recno_t lno;
144 	regmatch_t match[1];
145 	size_t coff, len;
146 	int cnt, eval, rval, wrapped;
147 	CHAR_T *l;
148 
149 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
150 		return (1);
151 
152 	/* Figure out if we're going to wrap. */
153 	if (!LF_ISSET(SEARCH_NOOPT) && O_ISSET(sp, O_WRAPSCAN))
154 		LF_SET(SEARCH_WRAP);
155 
156 	if (LF_ISSET(SEARCH_FIRST)) {
157 		lno = 1;
158 		coff = 0;
159 	} else {
160 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
161 			return (1);
162 		lno = fm->lno;
163 
164 		/*
165 		 * If doing incremental search, start searching at the previous
166 		 * column, so that we search a minimal distance and still match
167 		 * special patterns, e.g., \< for beginning of a word.
168 		 *
169 		 * Otherwise, start searching immediately after the cursor.  If
170 		 * at the end of the line, start searching on the next line.
171 		 * This is incompatible (read bug fix) with the historic vi --
172 		 * searches for the '$' pattern never moved forward, and the
173 		 * "-t foo" didn't work if the 'f' was the first character in
174 		 * the file.
175 		 */
176 		if (LF_ISSET(SEARCH_INCR)) {
177 			if ((coff = fm->cno) != 0)
178 				--coff;
179 		} else if (fm->cno + 1 >= len) {
180 			coff = 0;
181 			lno = fm->lno + 1;
182 			if (db_get(sp, lno, 0, &l, &len)) {
183 				if (!LF_ISSET(SEARCH_WRAP)) {
184 					if (LF_ISSET(SEARCH_MSG))
185 						search_msg(sp, S_EOF);
186 					return (1);
187 				}
188 				lno = 1;
189 			}
190 		} else
191 			coff = fm->cno + 1;
192 	}
193 
194 	btype = BUSY_ON;
195 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; ++lno, coff = 0) {
196 		if (cnt-- == 0) {
197 			if (INTERRUPTED(sp))
198 				break;
199 			if (LF_ISSET(SEARCH_MSG)) {
200 				search_busy(sp, btype);
201 				btype = BUSY_UPDATE;
202 			}
203 			cnt = INTERRUPT_CHECK;
204 		}
205 		if ((wrapped && lno > fm->lno) ||
206 		    db_get(sp, lno, 0, &l, &len)) {
207 			if (wrapped) {
208 				if (LF_ISSET(SEARCH_MSG))
209 					search_msg(sp, S_NOTFOUND);
210 				break;
211 			}
212 			if (!LF_ISSET(SEARCH_WRAP)) {
213 				if (LF_ISSET(SEARCH_MSG))
214 					search_msg(sp, S_EOF);
215 				break;
216 			}
217 			lno = 0;
218 			wrapped = 1;
219 			continue;
220 		}
221 
222 		/* If already at EOL, just keep going. */
223 		if (len != 0 && coff == len)
224 			continue;
225 
226 		/* Set the termination. */
227 		match[0].rm_so = coff;
228 		match[0].rm_eo = len;
229 
230 #if defined(DEBUG) && 0
231 		vtrace(sp, "F search: %lu from %u to %u\n",
232 		    lno, coff, len != 0 ? len - 1 : len);
233 #endif
234 		/* Search the line. */
235 		eval = regexec(&sp->re_c, l, 1, match,
236 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
237 		if (eval == REG_NOMATCH)
238 			continue;
239 		if (eval != 0) {
240 			if (LF_ISSET(SEARCH_MSG))
241 				re_error(sp, eval, &sp->re_c);
242 			else
243 				(void)sp->gp->scr_bell(sp);
244 			break;
245 		}
246 
247 		/* Warn if the search wrapped. */
248 		if (wrapped && LF_ISSET(SEARCH_WMSG))
249 			search_msg(sp, S_WRAP);
250 
251 #if defined(DEBUG) && 0
252 		vtrace(sp, "F search: %qu to %qu\n",
253 		    match[0].rm_so, match[0].rm_eo);
254 #endif
255 		rm->lno = lno;
256 		rm->cno = match[0].rm_so;
257 
258 		/*
259 		 * If a change command, it's possible to move beyond the end
260 		 * of a line.  Historic vi generally got this wrong (e.g. try
261 		 * "c?$<cr>").  Not all that sure this gets it right, there
262 		 * are lots of strange cases.
263 		 */
264 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
265 			rm->cno = len != 0 ? len - 1 : 0;
266 
267 		rval = 0;
268 		break;
269 	}
270 
271 	if (LF_ISSET(SEARCH_MSG))
272 		search_busy(sp, BUSY_OFF);
273 	return (rval);
274 }
275 
276 /*
277  * b_search --
278  *	Do a backward search.
279  *
280  * PUBLIC: int b_search __P((SCR *,
281  * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
282  */
283 int
284 b_search(SCR *sp, MARK *fm, MARK *rm, CHAR_T *ptrn, size_t plen, CHAR_T **eptrn, u_int flags)
285 {
286 	busy_t btype;
287 	db_recno_t lno;
288 	regmatch_t match[1];
289 	size_t coff, last, len;
290 	int cnt, eval, rval, wrapped;
291 	CHAR_T *l;
292 
293 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
294 		return (1);
295 
296 	/* Figure out if we're going to wrap. */
297 	if (!LF_ISSET(SEARCH_NOOPT) && O_ISSET(sp, O_WRAPSCAN))
298 		LF_SET(SEARCH_WRAP);
299 
300 	/*
301 	 * If doing incremental search, set the "starting" position past the
302 	 * current column, so that we search a minimal distance and still
303 	 * match special patterns, e.g., \> for the end of a word.  This is
304 	 * safe when the cursor is at the end of a line because we only use
305 	 * it for comparison with the location of the match.
306 	 *
307 	 * Otherwise, start searching immediately before the cursor.  If in
308 	 * the first column, start search on the previous line.
309 	 */
310 	if (LF_ISSET(SEARCH_INCR)) {
311 		lno = fm->lno;
312 		coff = fm->cno + 1;
313 	} else {
314 		if (fm->cno == 0) {
315 			if (fm->lno == 1 && !LF_ISSET(SEARCH_WRAP)) {
316 				if (LF_ISSET(SEARCH_MSG))
317 					search_msg(sp, S_SOF);
318 				return (1);
319 			}
320 			lno = fm->lno - 1;
321 		} else
322 			lno = fm->lno;
323 		coff = fm->cno;
324 	}
325 
326 	btype = BUSY_ON;
327 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
328 		if (cnt-- == 0) {
329 			if (INTERRUPTED(sp))
330 				break;
331 			if (LF_ISSET(SEARCH_MSG)) {
332 				search_busy(sp, btype);
333 				btype = BUSY_UPDATE;
334 			}
335 			cnt = INTERRUPT_CHECK;
336 		}
337 		if ((wrapped && lno < fm->lno) || lno == 0) {
338 			if (wrapped) {
339 				if (LF_ISSET(SEARCH_MSG))
340 					search_msg(sp, S_NOTFOUND);
341 				break;
342 			}
343 			if (!LF_ISSET(SEARCH_WRAP)) {
344 				if (LF_ISSET(SEARCH_MSG))
345 					search_msg(sp, S_SOF);
346 				break;
347 			}
348 			if (db_last(sp, &lno))
349 				break;
350 			if (lno == 0) {
351 				if (LF_ISSET(SEARCH_MSG))
352 					search_msg(sp, S_EMPTY);
353 				break;
354 			}
355 			++lno;
356 			wrapped = 1;
357 			continue;
358 		}
359 
360 		if (db_get(sp, lno, 0, &l, &len))
361 			break;
362 
363 		/* Set the termination. */
364 		match[0].rm_so = 0;
365 		match[0].rm_eo = len;
366 
367 #if defined(DEBUG) && 0
368 		vtrace(sp,
369 		    "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
370 #endif
371 		/* Search the line. */
372 		eval = regexec(&sp->re_c, l, 1, match,
373 		    ((size_t)match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
374 		if (eval == REG_NOMATCH)
375 			continue;
376 		if (eval != 0) {
377 			if (LF_ISSET(SEARCH_MSG))
378 				re_error(sp, eval, &sp->re_c);
379 			else
380 				(void)sp->gp->scr_bell(sp);
381 			break;
382 		}
383 
384 		/* Check for a match starting past the cursor. */
385 		if (coff != 0 && (size_t)match[0].rm_so >= coff)
386 			continue;
387 
388 		/* Warn if the search wrapped. */
389 		if (wrapped && LF_ISSET(SEARCH_WMSG))
390 			search_msg(sp, S_WRAP);
391 
392 #if defined(DEBUG) && 0
393 		vtrace(sp, "B found: %qu to %qu\n",
394 		    match[0].rm_so, match[0].rm_eo);
395 #endif
396 		/*
397 		 * We now have the first match on the line.  Step through the
398 		 * line character by character until find the last acceptable
399 		 * match.  This is painful, we need a better interface to regex
400 		 * to make this work.
401 		 */
402 		for (;;) {
403 			last = match[0].rm_so++;
404 			if ((size_t)match[0].rm_so >= len)
405 				break;
406 			match[0].rm_eo = len;
407 			eval = regexec(&sp->re_c, l, 1, match,
408 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
409 			    REG_STARTEND);
410 			if (eval == REG_NOMATCH)
411 				break;
412 			if (eval != 0) {
413 				if (LF_ISSET(SEARCH_MSG))
414 					re_error(sp, eval, &sp->re_c);
415 				else
416 					(void)sp->gp->scr_bell(sp);
417 				goto err;
418 			}
419 			if (coff && (size_t)match[0].rm_so >= coff)
420 				break;
421 		}
422 		rm->lno = lno;
423 
424 		/* See comment in f_search(). */
425 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
426 			rm->cno = len != 0 ? len - 1 : 0;
427 		else
428 			rm->cno = last;
429 		rval = 0;
430 		break;
431 	}
432 
433 err:	if (LF_ISSET(SEARCH_MSG))
434 		search_busy(sp, BUSY_OFF);
435 	return (rval);
436 }
437 
438 /*
439  * search_msg --
440  *	Display one of the search messages.
441  */
442 static void
443 search_msg(SCR *sp, smsg_t msg)
444 {
445 	switch (msg) {
446 	case S_EMPTY:
447 		msgq(sp, M_ERR, "072|File empty; nothing to search");
448 		break;
449 	case S_EOF:
450 		msgq(sp, M_ERR,
451 		    "073|Reached end-of-file without finding the pattern");
452 		break;
453 	case S_NOPREV:
454 		msgq(sp, M_ERR, "074|No previous search pattern");
455 		break;
456 	case S_NOTFOUND:
457 		msgq(sp, M_ERR, "075|Pattern not found");
458 		break;
459 	case S_SOF:
460 		msgq(sp, M_ERR,
461 		    "076|Reached top-of-file without finding the pattern");
462 		break;
463 	case S_WRAP:
464 		msgq(sp, M_ERR, "077|Search wrapped");
465 		break;
466 	default:
467 		abort();
468 	}
469 }
470 
471 /*
472  * search_busy --
473  *	Put up the busy searching message.
474  *
475  * PUBLIC: void search_busy __P((SCR *, busy_t));
476  */
477 void
478 search_busy(SCR *sp, busy_t btype)
479 {
480 	sp->gp->scr_busy(sp, "078|Searching...", btype);
481 }
482