1 /* @(#)search.c 1.30 09/07/09 Copyright 1984-2009 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)search.c 1.30 09/07/09 Copyright 1984-2009 J. Schilling";
6 #endif
7 /*
8 * Search functions for VED
9 *
10 * Copyright (c) 1984-2009 J. Schilling
11 */
12 /*
13 * The contents of this file are subject to the terms of the
14 * Common Development and Distribution License, Version 1.0 only
15 * (the "License"). You may not use this file except in compliance
16 * with the License.
17 *
18 * See the file CDDL.Schily.txt in this distribution for details.
19 * A copy of the CDDL is also available via the Internet at
20 * http://www.opensource.org/licenses/cddl1.txt
21 *
22 * When distributing Covered Code, include this CDDL HEADER in each
23 * file and include the License file CDDL.Schily.txt from this distribution.
24 */
25
26 /*
27 * This are the low level support routines to search forwards or backwards
28 * through the buffer. High level interfaces are found in searchcmds.c and
29 * movedot.c
30 *
31 * The package implements two levels of of optimazation:
32 * If the pattern does not contain special characters of the pattern matcher
33 * simplesearch() and simplereverse() are used. If the pattern in addition
34 * is a one byte pattern, searchchar() and srevchar() are used.
35 * All other search operations are handled directly inside search()
36 * and reverse() by using the pattern matcher.
37 *
38 * All routines return a value > eof to indicate that the requested object
39 * could not be found. Some routines decrement/incremet the value 'begin' so
40 * we must return eof+2 to be different from all possible 'correct' values.
41 * If the pattern is bad, eof+3 is returned to indicate that no 'not found'
42 * message should be displayed.
43 *
44 * Return values:
45 * eof+2 Pattern not found
46 * eof+3 Bad Pattern
47 * other Pattern found
48 */
49 #include "ved.h"
50 #include "buffer.h"
51 #include <schily/patmatch.h>
52
53 #define SEARCHSIZE 4096
54
55 EXPORT BOOL issimple __PR((Uchar* pattern, int length));
56 LOCAL epos_t searchchar __PR((ewin_t *wp, epos_t begin, int ch));
57 EXPORT epos_t srevchar __PR((ewin_t *wp, epos_t begin, int ch));
58 EXPORT epos_t search __PR((ewin_t *wp, epos_t begin, Uchar* pattern, int patsize, BOOL domark));
59 EXPORT epos_t reverse __PR((ewin_t *wp, epos_t begin, Uchar* pattern, int patsize, BOOL domark));
60 LOCAL BOOL simplesearch __PR((Uchar* searchstr, int searchlen, Uchar* pattern, int patlen, Uchar** beginp, Uchar** endp));
61 LOCAL BOOL simplereverse __PR((Uchar* searchstr, int searchlen, Uchar* pattern, int patlen, Uchar** beginp, Uchar** endp));
62 LOCAL epos_t searchdone __PR((ewin_t *wp, epos_t start, epos_t ret, BOOL domark));
63
64 /*
65 * Check if the pattern contains special characters of the pattern matcher.
66 * Do not look for characters that may be used only inside of a character set.
67 */
68 EXPORT BOOL
issimple(pattern,length)69 issimple(pattern, length)
70 register Uchar *pattern;
71 register int length;
72 {
73 while (length-- > 0) {
74 switch (*pattern++) {
75
76 case ALT: case REP: case NIL:
77 case STAR: case LBRACK: case RBRACK:
78 case LCLASS: case RCLASS: case QUOTE:
79 case ANY: case START: case END:
80 return (FALSE);
81 }
82 }
83 return (TRUE);
84 }
85
86 /*
87 * Search forwards for a one character 'pattern'.
88 */
89 LOCAL epos_t
searchchar(wp,begin,ch)90 searchchar(wp, begin, ch)
91 ewin_t *wp;
92 epos_t begin;
93 Uchar ch;
94 {
95 register headr_t *linkp;
96 register Uchar *stuff;
97 register int linksize;
98 headr_t *passlinkp;
99 int pos;
100
101 findpos(wp, begin, &passlinkp, &pos);
102 linkp = passlinkp;
103 linksize = linkp->size - pos;
104 stuff = linkp->cont + pos;
105
106 for (;;) {
107 while (linksize--) {
108 if (*stuff++ == ch)
109 return (begin);
110 begin++;
111 }
112 if ((linkp = linkp->next) == NULL)
113 return (wp->eof+2);
114 readybuffer(wp, linkp);
115 linksize = linkp->size;
116 stuff = linkp->cont;
117 }
118 }
119
120 /*
121 * Search backwards for a one character 'pattern'.
122 */
123 EXPORT epos_t
srevchar(wp,begin,ch)124 srevchar(wp, begin, ch)
125 ewin_t *wp;
126 epos_t begin;
127 Uchar ch;
128 {
129 register headr_t *linkp;
130 register Uchar *stuff;
131 register int linksize;
132 headr_t *passlinkp;
133 int pos;
134
135 findpos(wp, begin, &passlinkp, &pos);
136 linkp = passlinkp;
137 linksize = pos + 1;
138 stuff = linkp->cont + pos;
139
140 for (;;) {
141 while (linksize--) {
142 if (*stuff-- == ch)
143 return (begin);
144 begin--;
145 }
146 if ((linkp = linkp->prev) == NULL)
147 return (wp->eof+2);
148 readybuffer(wp, linkp);
149 linksize = linkp->size;
150 stuff = linkp->cont + linkp->size - 1;
151 }
152 }
153
154 /*
155 * Search forwards for 'pattern' doing the appropriate optimizations.
156 */
157 EXPORT epos_t
search(wp,begin,pattern,patsize,domark)158 search(wp, begin, pattern, patsize, domark)
159 ewin_t *wp;
160 epos_t begin;
161 Uchar *pattern;
162 int patsize;
163 BOOL domark;
164 {
165 Uchar searchstr[SEARCHSIZE + 1];
166 int aux[SEARCHSIZE]; /* Nobody should be able to type such*/
167 int state[SEARCHSIZE+1]; /* a long pattern - avoid malloc() */
168 int alt;
169 int start;
170 int readsize;
171 int firsttime;
172 int midline;
173 int nl;
174 Uchar *strstart;
175 Uchar *place;
176 epos_t pos;
177
178 if (begin >= wp->eof)
179 return (wp->eof+2);
180 if (patsize == 0)
181 return (wp->eof+2);
182
183 if (wp->magic == FALSE || issimple(pattern, patsize)) {
184 /*
185 * Search without the pattern matcher.
186 */
187 if (patsize == 1) { /* single character pattern */
188 if ((pos = searchchar(wp, begin, *pattern)) < wp->eof)
189 return (searchdone(wp, pos, pos+1, domark));
190 else
191 return (wp->eof+2);
192 }
193
194 /* while (readsize = extr_line(begin, searchstr, SEARCHSIZE)) {*/
195 while ((readsize = extract(wp, begin, searchstr, SEARCHSIZE)) != 0) {
196 if (simplesearch(searchstr, readsize,
197 pattern, patsize, &strstart, &place)) {
198
199 return (searchdone(wp, begin+
200 (int)(strstart-searchstr),
201 begin+(int)(place-searchstr),
202 domark));
203 }
204 begin += readsize;
205 #ifdef _use_extr_line_
206 /*extr_line*/
207 if (begin < wp->eof && searchstr[readsize-1] != '\n') {
208 #else
209 if (begin < wp->eof) {
210 #endif
211 begin -= patsize - 1;
212 }
213 }
214 } else {
215 /*
216 * First compile the pattern.
217 */
218 if (patsize > sizeof (aux)/sizeof (aux[0])) {
219 writeerr(wp, "PATTERN TOO LONG");
220 return (wp->eof+3);
221 }
222 alt = patcompile(pattern, patsize, aux);
223 if (alt == 0) {
224 writeerr(wp, "BAD PATTERN");
225 return (wp->eof+3);
226 }
227 /*
228 * Check if we start searching at the beginning of a line.
229 */
230 midline = 0;
231 nl = 1;
232 if (begin != 0L &&
233 extract(wp, begin-1, searchstr, 1) && searchstr[0] != '\n') {
234 nl = 0; /* No newline found one left to the cursor */
235 }
236
237 /*
238 * Then start searching...
239 */
240 for (firsttime = 1;
241 (readsize = extr_line(wp, begin, C searchstr, SEARCHSIZE)) != 0;
242 firsttime = 0, midline = 0) {
243 /*
244 * If the current line does not begin with a new-line
245 * we are in touble. In theory, we need to disable '^'
246 * (begin of line) but this is only a silly approach.
247 */
248 if (!nl)
249 midline++;
250 nl = 0;
251 if (searchstr[readsize-1] == '\n') {
252 nl++;
253 /* XXX Ist das richtig ??? */
254 /* searchstr[readsize-1] = '\0';*/
255 }
256 for (start = 0; start < readsize; start++)
257 if ((place = UC patmatch(pattern, aux,
258 UC (searchstr - midline),
259 start + midline,
260 readsize + midline, alt, state)) != NULL)
261 if (!firsttime || place > searchstr) {
262 /*
263 * Only found if the cursor moved.
264 */
265 return (searchdone(wp, begin+start,
266 begin+(int)(place-searchstr),
267 domark));
268 }
269 begin += readsize;
270 if (begin < wp->eof && !nl)
271 begin -= patsize - 1;
272 }
273 }
274 return (wp->eof+2);
275 }
276
277 /*
278 * Search backwards for 'pattern' doing the appropriate optimizations.
279 * Decrement 'begin' first to make sure we will not find the same place twice.
280 */
281 EXPORT epos_t
reverse(wp,begin,pattern,patsize,domark)282 reverse(wp, begin, pattern, patsize, domark)
283 ewin_t *wp;
284 epos_t begin;
285 Uchar *pattern;
286 int patsize;
287 BOOL domark;
288 {
289 Uchar searchstr[SEARCHSIZE + 1];
290 int aux[SEARCHSIZE]; /* Nobody should be able to type such*/
291 int state[SEARCHSIZE+1]; /* a long pattern - avoid malloc() */
292 int alt;
293 int start;
294 int readsize;
295 int firsttime;
296 Uchar *strstart;
297 Uchar *place;
298 Uchar *bestplace;
299 epos_t pos = wp->dot;
300
301 begin--;
302 if (begin <= 0)
303 return (wp->eof+2);
304 if (patsize == 0)
305 return (wp->eof+2);
306 if (begin > wp->eof) /* OK JS */
307 return (wp->eof+2);
308
309 if (wp->magic == FALSE || issimple(pattern, patsize)) {
310 /*
311 * Search without the pattern matcher.
312 */
313 if (patsize == 1) { /* single character pattern */
314 if ((pos = srevchar(wp, begin-1, *pattern)) < wp->eof)
315 /* if ((pos = srevchar(wp, begin, *pattern)) < wp->eof)*/
316 return (searchdone(wp, pos, pos+1, domark));
317 else
318 return (wp->eof+2);
319 }
320
321 /* while (readsize = retractline(begin, searchstr, SEARCHSIZE)) {*/
322 while ((readsize = extract(wp, max(0, begin-SEARCHSIZE), searchstr, (int)min(begin, SEARCHSIZE))) != 0) {
323 begin -= readsize;
324 if (begin < 0)
325 begin = 0;
326 if (simplereverse(searchstr, readsize,
327 pattern, patsize, &strstart, &place))
328 return (searchdone(wp, begin+(int)(strstart-searchstr),
329 begin+(int)(place-searchstr), domark));
330 if (begin > 0)
331 begin += patsize - 1;
332 }
333 } else {
334 /*
335 * First compile the pattern.
336 */
337 if (patsize > sizeof (aux)/sizeof (aux[0])) {
338 writeerr(wp, "PATTERN TOO LONG");
339 return (wp->eof+3);
340 }
341 alt = patcompile(pattern, patsize, aux);
342 if (alt == 0) {
343 writeerr(wp, "BAD PATTERN");
344 return (wp->eof+3);
345 }
346
347 /*
348 * Then start searching...
349 */
350 ++begin;
351 for (firsttime = 1;
352 (readsize = retractline(wp, begin, C searchstr, SEARCHSIZE)) != 0;
353 firsttime = 0) {
354 begin -= readsize;
355 /* XXX Ist das richtig ??? */
356 /* if (searchstr[readsize-1] == '\n')*/
357 /* searchstr[readsize-1] = '\0';*/
358 for (start = 0, bestplace = 0; start < readsize; ++start)
359 if ((place = UC patmatch(pattern, aux, searchstr,
360 start, readsize, alt, state)) != NULL)
361 if (!firsttime ||
362 place - searchstr < readsize)
363 /*
364 * Only found if the cursor moved.
365 */
366 if (place > bestplace) {
367 /*
368 * Use the longest
369 * possible match.
370 */
371 bestplace = place;
372 pos = start;
373 }
374 if (bestplace)
375 return (searchdone(wp, begin+pos,
376 begin+(int)(bestplace-searchstr), domark));
377 }
378 }
379 return (wp->eof+2);
380 }
381
382 /*
383 * Search forwards for a simple pattern.
384 * If the pattern could be found, *beginp amd *endp are set.
385 * beginp points to the first character that matches,
386 * endp points to character the after the last matching character.
387 */
388 LOCAL BOOL
simplesearch(searchstr,searchlen,pattern,patlen,beginp,endp)389 simplesearch(searchstr, searchlen, pattern, patlen, beginp, endp)
390 Uchar *searchstr; /* buffer to be searched */
391 int searchlen; /* length of buffer to be searched */
392 Uchar *pattern; /* search pattern */
393 int patlen; /* length of search pattern */
394 Uchar **beginp; /* start 'return' value */
395 Uchar **endp; /* end 'return' value */
396 {
397 register Uchar *str;
398 register Uchar *pat;
399 register Uchar *strstart;
400 register Uchar *patstart;
401 register int n;
402 register int len;
403 register int plen;
404
405 len = searchlen;
406 plen = patlen;
407 strstart = searchstr;
408 patstart = pattern;
409 while (len-- >= plen) {
410 for (str = strstart, pat = patstart, n = 0;
411 *str == *pat && n < plen;
412 str++, pat++, n++) {
413 ;
414 }
415 if (n == plen) { /* Found */
416 *beginp = strstart;
417 *endp = str;
418 return (TRUE);
419 }
420 strstart++;
421 }
422 return (FALSE);
423 }
424
425 /*
426 * Search backwards for a simple pattern.
427 * If the pattern could be found, *beginp amd *endp are set.
428 * beginp points to the first character that matches,
429 * endp points to character the after the last matching character.
430 */
431 LOCAL BOOL
simplereverse(searchstr,searchlen,pattern,patlen,beginp,endp)432 simplereverse(searchstr, searchlen, pattern, patlen, beginp, endp)
433 Uchar *searchstr; /* buffer to be searched */
434 register int searchlen; /* length of buffer to be searched */
435 Uchar *pattern; /* search pattern */
436 int patlen; /* length of search pattern */
437 Uchar **beginp; /* start 'return' value */
438 Uchar **endp; /* end 'return' value */
439 {
440 register Uchar *str;
441 register Uchar *pat;
442 register Uchar *searchpos;
443 register Uchar *strstart;
444 register Uchar *patstart;
445 register int n;
446 register int plen;
447
448 plen = patlen;
449 strstart = searchstr;
450 searchpos = strstart + searchlen - plen + 1;
451 patstart = pattern;
452 while (--searchpos >= strstart) {
453 for (str = searchpos, pat = patstart, n = 0;
454 *str == *pat && n < plen;
455 str++, pat++, n++)
456 ;
457 if (n == plen) { /* Found */
458 *beginp = searchpos;
459 *endp = str;
460 return (TRUE);
461 }
462 }
463 return (FALSE);
464 }
465
466 /*
467 * Return the end of the found pattern and set a mark at the start of the
468 * found pattern if wanted.
469 */
470 LOCAL epos_t
searchdone(wp,start,ret,domark)471 searchdone(wp, start, ret, domark)
472 ewin_t *wp;
473 epos_t start;
474 epos_t ret;
475 BOOL domark;
476 {
477 if (domark)
478 setmark(wp, start);
479 return (ret);
480 }
481