xref: /netbsd/external/bsd/nvi/dist/common/search.c (revision 19f88ca9)
1 /*	$NetBSD: search.c,v 1.5 2023/02/13 23:08:43 gutteridge Exp $ */
2 /*-
3  * Copyright (c) 1992, 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: search.c,v 10.31 2001/06/25 15:19:12 skimo Exp  (Berkeley) Date: 2001/06/25 15:19:12 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: search.c,v 1.5 2023/02/13 23:08:43 gutteridge Exp $");
20 #endif
21 
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 
25 #include <bitstring.h>
26 #include <ctype.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "common.h"
35 
36 typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
37 
38 static void	search_msg __P((SCR *, smsg_t));
39 static int	search_init __P((SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int));
40 
41 /*
42  * search_init --
43  *	Set up a search.
44  */
45 static int
search_init(SCR * sp,dir_t dir,CHAR_T * ptrn,size_t plen,CHAR_T ** epp,u_int flags)46 search_init(SCR *sp, dir_t dir, CHAR_T *ptrn, size_t plen, CHAR_T **epp, u_int flags)
47 {
48 	db_recno_t lno;
49 	int delim;
50 	CHAR_T *p, *t;
51 
52 	/* If the file is empty, it's a fast search. */
53 	if (sp->lno <= 1) {
54 		if (db_last(sp, &lno))
55 			return (1);
56 		if (lno == 0) {
57 			if (LF_ISSET(SEARCH_MSG))
58 				search_msg(sp, S_EMPTY);
59 			return (1);
60 		}
61 	}
62 
63 	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
64 		/*
65 		 * Use the saved pattern if no pattern specified, or if only
66 		 * one or two delimiter characters specified.
67 		 *
68 		 * !!!
69 		 * Historically, only the pattern itself was saved, vi didn't
70 		 * preserve addressing or delta information.
71 		 */
72 		if (ptrn == NULL)
73 			goto prev;
74 		if (plen == 1) {
75 			if (epp != NULL)
76 				*epp = ptrn + 1;
77 			goto prev;
78 		}
79 		if (ptrn[0] == ptrn[1]) {
80 			if (epp != NULL)
81 				*epp = ptrn + 2;
82 
83 			/* Complain if we don't have a previous pattern. */
84 prev:			if (sp->re == NULL) {
85 				search_msg(sp, S_NOPREV);
86 				return (1);
87 			}
88 			/* Re-compile the search pattern if necessary. */
89 			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
90 			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
91 			    SEARCH_CSEARCH | SEARCH_MSG))
92 				return (1);
93 
94 			/* Set the search direction. */
95 			if (LF_ISSET(SEARCH_SET))
96 				sp->searchdir = dir;
97 			return (0);
98 		}
99 
100 		/*
101 		 * Set the delimiter, and move forward to the terminating
102 		 * delimiter, handling escaped delimiters.
103 		 *
104 		 * QUOTING NOTE:
105 		 * Only discard an escape character if it escapes a delimiter.
106 		 */
107 		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
108 			if (--plen == 0 || p[0] == delim) {
109 				if (plen != 0)
110 					++p;
111 				break;
112 			}
113 			if (plen > 1 && p[0] == '\\') {
114 				if(p[1] == delim) {
115 					++p;
116 					--plen;
117 				} else if(p[1] == '\\') {
118 					*t++ = *p++;
119 					--plen;
120 				}
121 			}
122 		}
123 		if (epp != NULL)
124 			*epp = p;
125 
126 		plen = t - ptrn;
127 	}
128 
129 	/* Compile the RE. */
130 	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
131 	    SEARCH_CSEARCH | LF_ISSET(SEARCH_CSCOPE | SEARCH_EXTEND |
132 	    SEARCH_IC | SEARCH_LITERAL | SEARCH_MSG | SEARCH_TAG)))
133 		return (1);
134 
135 	/* Set the search direction. */
136 	if (LF_ISSET(SEARCH_SET))
137 		sp->searchdir = dir;
138 
139 	return (0);
140 }
141 
142 /*
143  * f_search --
144  *	Do a forward search.
145  *
146  * PUBLIC: int f_search __P((SCR *,
147  * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
148  */
149 int
f_search(SCR * sp,MARK * fm,MARK * rm,CHAR_T * ptrn,size_t plen,CHAR_T ** eptrn,u_int flags)150 f_search(SCR *sp, MARK *fm, MARK *rm, CHAR_T *ptrn, size_t plen, CHAR_T **eptrn, u_int flags)
151 {
152 	busy_t btype;
153 	db_recno_t lno;
154 	regmatch_t match[1];
155 	size_t coff, len;
156 	int cnt, eval, rval, wrapped = 0;
157 	CHAR_T *l;
158 
159 	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
160 		return (1);
161 
162 	/* Figure out if we're going to wrap. */
163 	if (!LF_ISSET(SEARCH_NOOPT) && O_ISSET(sp, O_WRAPSCAN))
164 		LF_SET(SEARCH_WRAP);
165 
166 	if (LF_ISSET(SEARCH_FIRST)) {
167 		lno = 1;
168 		coff = 0;
169 	} else {
170 		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
171 			return (1);
172 		lno = fm->lno;
173 
174 		/*
175 		 * If doing incremental search, start searching at the previous
176 		 * column, so that we search a minimal distance and still match
177 		 * special patterns, e.g., \< for beginning of a word.
178 		 *
179 		 * Otherwise, start searching immediately after the cursor.  If
180 		 * at the end of the line, start searching on the next line.
181 		 * This is incompatible (read bug fix) with the historic vi --
182 		 * searches for the '$' pattern never moved forward, and the
183 		 * "-t foo" didn't work if the 'f' was the first character in
184 		 * the file.
185 		 */
186 		if (LF_ISSET(SEARCH_INCR)) {
187 			if ((coff = fm->cno) != 0)
188 				--coff;
189 		} else if (fm->cno + 1 >= len) {
190 			coff = 0;
191 			lno = fm->lno + 1;
192 			if (db_get(sp, lno, 0, &l, &len)) {
193 				if (!LF_ISSET(SEARCH_WRAP)) {
194 					if (LF_ISSET(SEARCH_MSG))
195 						search_msg(sp, S_EOF);
196 					return (1);
197 				}
198 				lno = 1;
199 				wrapped = 1;
200 			}
201 		} else
202 			coff = fm->cno + 1;
203 	}
204 
205 	btype = BUSY_ON;
206 	for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) {
207 		if (cnt-- == 0) {
208 			if (INTERRUPTED(sp))
209 				break;
210 			if (LF_ISSET(SEARCH_MSG)) {
211 				search_busy(sp, btype);
212 				btype = BUSY_UPDATE;
213 			}
214 			cnt = INTERRUPT_CHECK;
215 		}
216 		if ((wrapped && lno > fm->lno) ||
217 		    db_get(sp, lno, 0, &l, &len)) {
218 			if (wrapped) {
219 				if (LF_ISSET(SEARCH_MSG))
220 					search_msg(sp, S_NOTFOUND);
221 				break;
222 			}
223 			if (!LF_ISSET(SEARCH_WRAP)) {
224 				if (LF_ISSET(SEARCH_MSG))
225 					search_msg(sp, S_EOF);
226 				break;
227 			}
228 			lno = 0;
229 			wrapped = 1;
230 			continue;
231 		}
232 
233 		/* If already at EOL, just keep going. */
234 		if (len != 0 && coff == len)
235 			continue;
236 
237 		/* Set the termination. */
238 		match[0].rm_so = coff;
239 		match[0].rm_eo = len;
240 
241 #if defined(DEBUG) && 0
242 		vtrace(sp, "F search: %lu from %u to %u\n",
243 		    lno, coff, len != 0 ? len - 1 : len);
244 #endif
245 		/* Search the line. */
246 		eval = regexec(&sp->re_c, l, 1, match,
247 		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
248 		if (eval == REG_NOMATCH)
249 			continue;
250 		if (eval != 0) {
251 			if (LF_ISSET(SEARCH_MSG))
252 				re_error(sp, eval, &sp->re_c);
253 			else
254 				(void)sp->gp->scr_bell(sp);
255 			break;
256 		}
257 
258 		/* Warn if the search wrapped. */
259 		if (wrapped && LF_ISSET(SEARCH_WMSG))
260 			search_msg(sp, S_WRAP);
261 
262 #if defined(DEBUG) && 0
263 		vtrace(sp, "F search: %qu to %qu\n",
264 		    match[0].rm_so, match[0].rm_eo);
265 #endif
266 		rm->lno = lno;
267 		rm->cno = match[0].rm_so;
268 
269 		/*
270 		 * If a change command, it's possible to move beyond the end
271 		 * of a line.  Historic vi generally got this wrong (e.g. try
272 		 * "c?$<cr>").  Not all that sure this gets it right, there
273 		 * are lots of strange cases.
274 		 */
275 		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
276 			rm->cno = len != 0 ? len - 1 : 0;
277 
278 		rval = 0;
279 		break;
280 	}
281 
282 	if (LF_ISSET(SEARCH_MSG))
283 		search_busy(sp, BUSY_OFF);
284 	return (rval);
285 }
286 
287 /*
288  * b_search --
289  *	Do a backward search.
290  *
291  * PUBLIC: int b_search __P((SCR *,
292  * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int));
293  */
294 int
b_search(SCR * sp,MARK * fm,MARK * rm,CHAR_T * ptrn,size_t plen,CHAR_T ** eptrn,u_int flags)295 b_search(SCR *sp, MARK *fm, MARK *rm, CHAR_T *ptrn, size_t plen, CHAR_T **eptrn, u_int flags)
296 {
297 	busy_t btype;
298 	db_recno_t lno;
299 	regmatch_t match[1];
300 	size_t coff, last, len;
301 	int cnt, eval, rval, wrapped;
302 	CHAR_T *l;
303 
304 	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
305 		return (1);
306 
307 	/* Figure out if we're going to wrap. */
308 	if (!LF_ISSET(SEARCH_NOOPT) && O_ISSET(sp, O_WRAPSCAN))
309 		LF_SET(SEARCH_WRAP);
310 
311 	/*
312 	 * If doing incremental search, set the "starting" position past the
313 	 * current column, so that we search a minimal distance and still
314 	 * match special patterns, e.g., \> for the end of a word.  This is
315 	 * safe when the cursor is at the end of a line because we only use
316 	 * it for comparison with the location of the match.
317 	 *
318 	 * Otherwise, start searching immediately before the cursor.  If in
319 	 * the first column, start search on the previous line.
320 	 */
321 	if (LF_ISSET(SEARCH_INCR)) {
322 		lno = fm->lno;
323 		coff = fm->cno + 1;
324 	} else {
325 		if (fm->cno == 0) {
326 			if (fm->lno == 1 && !LF_ISSET(SEARCH_WRAP)) {
327 				if (LF_ISSET(SEARCH_MSG))
328 					search_msg(sp, S_SOF);
329 				return (1);
330 			}
331 			lno = fm->lno - 1;
332 		} else
333 			lno = fm->lno;
334 		coff = fm->cno;
335 	}
336 
337 	btype = BUSY_ON;
338 	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
339 		if (cnt-- == 0) {
340 			if (INTERRUPTED(sp))
341 				break;
342 			if (LF_ISSET(SEARCH_MSG)) {
343 				search_busy(sp, btype);
344 				btype = BUSY_UPDATE;
345 			}
346 			cnt = INTERRUPT_CHECK;
347 		}
348 		if ((wrapped && lno < fm->lno) || lno == 0) {
349 			if (wrapped) {
350 				if (LF_ISSET(SEARCH_MSG))
351 					search_msg(sp, S_NOTFOUND);
352 				break;
353 			}
354 			if (!LF_ISSET(SEARCH_WRAP)) {
355 				if (LF_ISSET(SEARCH_MSG))
356 					search_msg(sp, S_SOF);
357 				break;
358 			}
359 			if (db_last(sp, &lno))
360 				break;
361 			if (lno == 0) {
362 				if (LF_ISSET(SEARCH_MSG))
363 					search_msg(sp, S_EMPTY);
364 				break;
365 			}
366 			++lno;
367 			wrapped = 1;
368 			continue;
369 		}
370 
371 		if (db_get(sp, lno, 0, &l, &len))
372 			break;
373 
374 		/* Set the termination. */
375 		match[0].rm_so = 0;
376 		match[0].rm_eo = len;
377 
378 #if defined(DEBUG) && 0
379 		vtrace(sp,
380 		    "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
381 #endif
382 		/* Search the line. */
383 		eval = regexec(&sp->re_c, l, 1, match,
384 		    ((size_t)match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
385 		if (eval == REG_NOMATCH)
386 			continue;
387 		if (eval != 0) {
388 			if (LF_ISSET(SEARCH_MSG))
389 				re_error(sp, eval, &sp->re_c);
390 			else
391 				(void)sp->gp->scr_bell(sp);
392 			break;
393 		}
394 
395 		/* Check for a match starting past the cursor. */
396 		if (coff != 0 && (size_t)match[0].rm_so >= coff)
397 			continue;
398 
399 		/* Warn if the search wrapped. */
400 		if (wrapped && LF_ISSET(SEARCH_WMSG))
401 			search_msg(sp, S_WRAP);
402 
403 #if defined(DEBUG) && 0
404 		vtrace(sp, "B found: %qu to %qu\n",
405 		    match[0].rm_so, match[0].rm_eo);
406 #endif
407 		/*
408 		 * We now have the first match on the line.  Step through the
409 		 * line character by character until find the last acceptable
410 		 * match.  This is painful, we need a better interface to regex
411 		 * to make this work.
412 		 */
413 		for (;;) {
414 			last = match[0].rm_so++;
415 			if ((size_t)match[0].rm_so >= len)
416 				break;
417 			match[0].rm_eo = len;
418 			eval = regexec(&sp->re_c, l, 1, match,
419 			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
420 			    REG_STARTEND);
421 			if (eval == REG_NOMATCH)
422 				break;
423 			if (eval != 0) {
424 				if (LF_ISSET(SEARCH_MSG))
425 					re_error(sp, eval, &sp->re_c);
426 				else
427 					(void)sp->gp->scr_bell(sp);
428 				goto err;
429 			}
430 			if (coff && (size_t)match[0].rm_so >= coff)
431 				break;
432 		}
433 		rm->lno = lno;
434 
435 		/* See comment in f_search(). */
436 		if (!LF_ISSET(SEARCH_EOL) && last >= len)
437 			rm->cno = len != 0 ? len - 1 : 0;
438 		else
439 			rm->cno = last;
440 		rval = 0;
441 		break;
442 	}
443 
444 err:	if (LF_ISSET(SEARCH_MSG))
445 		search_busy(sp, BUSY_OFF);
446 	return (rval);
447 }
448 
449 /*
450  * search_msg --
451  *	Display one of the search messages.
452  */
453 static void
search_msg(SCR * sp,smsg_t msg)454 search_msg(SCR *sp, smsg_t msg)
455 {
456 	switch (msg) {
457 	case S_EMPTY:
458 		msgq(sp, M_ERR, "072|File empty; nothing to search");
459 		break;
460 	case S_EOF:
461 		msgq(sp, M_ERR,
462 		    "073|Reached end-of-file without finding the pattern");
463 		break;
464 	case S_NOPREV:
465 		msgq(sp, M_ERR, "074|No previous search pattern");
466 		break;
467 	case S_NOTFOUND:
468 		msgq(sp, M_ERR, "075|Pattern not found");
469 		break;
470 	case S_SOF:
471 		msgq(sp, M_ERR,
472 		    "076|Reached top-of-file without finding the pattern");
473 		break;
474 	case S_WRAP:
475 		msgq(sp, M_ERR, "077|Search wrapped");
476 		break;
477 	default:
478 		abort();
479 	}
480 }
481 
482 /*
483  * search_busy --
484  *	Put up the busy searching message.
485  *
486  * PUBLIC: void search_busy __P((SCR *, busy_t));
487  */
488 void
search_busy(SCR * sp,busy_t btype)489 search_busy(SCR *sp, busy_t btype)
490 {
491 	sp->gp->scr_busy(sp, "078|Searching...", btype);
492 }
493