1 /*
2  * sary - a suffix array library
3  *
4  * $Id: searcher.c,v 1.1.1.1 2004/06/11 18:57:27 satoru-t Exp $
5  *
6  * Copyright (C) 2000  Satoru Takabayashi <satoru@namazu.org>
7  * All rights reserved.
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Library General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
17  * Library General Public License for more details.
18  *
19  * You should have received a copy of the GNU Library General Public
20  * License along with this library; if not, write to the
21  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22  * Boston, MA 02111-1307, USA.
23  */
24 
25 #include "config.h"
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <glib.h>
30 #include <errno.h>
31 #include <ctype.h>
32 #include <sary.h>
33 
34 /*
35  * SarySearcher stands for Suffix Array Searcher.
36  */
37 
38 typedef	gboolean	(*SearchFunc) (SarySearcher *searcher,
39 				       const gchar *pattern,
40 				       SaryInt len,
41 				       SaryInt offset,
42 				       SaryInt range);
43 struct _SarySearcher {
44     SaryInt     len;    /* number of index points */
45     SaryText    *text;
46     SaryMmap    *array;
47     SaryInt  	*first;
48     SaryInt     *last;
49     SaryInt     *cursor;
50     SaryInt	*allocated_data;
51     gboolean    is_sorted;
52     gboolean    is_allocated;
53     SaryPattern	pattern;
54     SaryCache	*cache;
55     SearchFunc  search;
56 };
57 
58 typedef gchar* (*SeekFunc)(const gchar *cursor,
59 			   const gchar *sentinel,
60 			   gconstpointer data);
61 typedef struct {
62     SeekFunc		seek_backward;
63     SeekFunc		seek_forward;
64     gconstpointer	backward_data;
65     gconstpointer	forward_data;
66 } Seeker;
67 
68 typedef struct {
69     const gchar *str;
70     SaryInt len;
71 } Tag;
72 
73 typedef struct {
74     gchar **patterns;
75     gint  npatterns;
76 } Patterns;
77 
78 static gchar *		peek_next_occurrence	(SarySearcher *searcher);
79 static void		init_searcher_states	(SarySearcher *searcher,
80 						 gboolean first_time);
81 static gboolean		search 			(SarySearcher *searcher,
82 						 const gchar *pattern,
83 						 SaryInt len,
84 						 SaryInt offset,
85 						 SaryInt range);
86 static inline gint	bsearchcmp		(gconstpointer searcher_ptr,
87 					 	 gconstpointer obj_ptr);
88 static inline gint	qsortcmp		(gconstpointer ptr1,
89 						 gconstpointer ptr2);
90 static inline gint	qsortscmp		(gconstpointer ptr1,
91                                                  gconstpointer ptr2);
92 static Patterns*	patterns_new		(gchar **patterns,
93                                                  gint npatterns);
94 static void		patterns_destroy	(Patterns *pat);
95 static void		patterns_sort		(Patterns *pat);
96 static gboolean		cache_search		(SarySearcher *searcher,
97 						 const gchar *pattern,
98 						 SaryInt len,
99 						 SaryInt offset,
100 						 SaryInt range);
101 static GArray*		icase_search		(SarySearcher *searcher,
102 						 gchar *pattern,
103 						 SaryInt len,
104 						 SaryInt step,
105 						 GArray *result);
106 static gint		expand_letter		(gint *cand, gint c);
107 static void		assign_range		(SarySearcher *searcher,
108 						 SaryInt *occurences,
109 						 SaryInt len);
110 static gchar*		get_next_region		(SarySearcher *searcher,
111 						 Seeker *seeker,
112 						 SaryInt *len);
113 static gchar*		join_subsequent_region	(SarySearcher *searcher,
114 						 Seeker *seeker,
115 						 gchar *tail);
116 static gchar*		get_region		(const gchar *head,
117 						 const gchar *eof,
118 						 SaryInt len);
119 static gchar*		seek_lines_backward	(const gchar *cursor,
120 						 const gchar *bof,
121 						 gconstpointer n_ptr);
122 static gchar*		seek_lines_forward 	(const gchar *cursor,
123 						 const gchar *eof,
124 						 gconstpointer n_ptr);
125 static gchar*		seek_tag_backward	(const gchar *cursor,
126 						 const gchar *eof,
127 						 gconstpointer tag_ptr);
128 static gchar*		seek_tag_forward	(const gchar *cursor,
129 						 const gchar *eof,
130 						 gconstpointer tag_ptr);
131 static gboolean		has_prev_as_prefix	(const gchar *prev,
132                                                  const gchar *cur);
133 
134 SarySearcher *
sary_searcher_new(const gchar * file_name)135 sary_searcher_new (const gchar *file_name)
136 {
137     SarySearcher *searcher;
138     gchar *array_name = g_strconcat(file_name, ".ary", NULL);
139 
140     searcher = sary_searcher_new2(file_name, array_name);
141     g_free(array_name);
142     return searcher;
143 }
144 
145 SarySearcher *
sary_searcher_new2(const gchar * file_name,const gchar * array_name)146 sary_searcher_new2 (const gchar *file_name, const gchar *array_name)
147 {
148     SarySearcher *searcher = g_new(SarySearcher, 1);
149 
150     searcher->text = sary_text_new(file_name);
151     if (searcher->text == NULL) {
152 	return NULL;
153     }
154 
155     searcher->array = sary_mmap(array_name, "r");
156     if (searcher->array == NULL) {
157 	return NULL;
158     }
159 
160     searcher->len    = searcher->array->len / sizeof(SaryInt);
161     searcher->search = search;
162     searcher->cache  = NULL;
163 
164     init_searcher_states(searcher, TRUE);
165 
166     return searcher;
167 }
168 
169 void
sary_searcher_destroy(SarySearcher * searcher)170 sary_searcher_destroy (SarySearcher *searcher)
171 {
172     sary_text_destroy(searcher->text);
173     sary_cache_destroy(searcher->cache);
174     sary_munmap(searcher->array);
175 
176     g_free(searcher->allocated_data);
177     g_free(searcher);
178 }
179 
180 gboolean
sary_searcher_search(SarySearcher * searcher,const gchar * pattern)181 sary_searcher_search (SarySearcher *searcher, const gchar *pattern)
182 {
183     return sary_searcher_search2(searcher, pattern, strlen(pattern));
184 }
185 
186 gboolean
sary_searcher_search2(SarySearcher * searcher,const gchar * pattern,SaryInt len)187 sary_searcher_search2 (SarySearcher *searcher,
188                        const gchar *pattern,
189                        SaryInt len)
190 {
191     g_assert(searcher != NULL);
192     init_searcher_states(searcher, FALSE);
193 
194     /*
195      * Search the full range of the suffix array.
196      */
197     return searcher->search(searcher, pattern, len, 0, searcher->len);
198 }
199 
200 gboolean
sary_searcher_multi_search(SarySearcher * searcher,gchar ** patterns,gint npatterns)201 sary_searcher_multi_search (SarySearcher *searcher,
202                             gchar **patterns,
203                             gint npatterns)
204 {
205     gint i;
206     GArray *occurences = g_array_new(FALSE, FALSE, sizeof(SaryInt));
207     Patterns *pat = patterns_new(patterns, npatterns);
208     gboolean first_time = TRUE, result;
209 
210     g_assert(searcher != NULL);
211     init_searcher_states(searcher, FALSE);
212 
213     patterns_sort(pat);
214     for (i = 0; i < pat->npatterns; i++) {
215         /*
216          * If the previous pattern is "a" and the current
217          * one is "ab", we can skip the current one because
218          * the previous results include all results for the
219          * current one.
220          */
221         if (first_time || !has_prev_as_prefix(pat->patterns[i - 1],
222                                               pat->patterns[i]))
223         {
224             if (sary_searcher_search(searcher, pat->patterns[i])) {
225                 SaryInt len = sary_searcher_count_occurrences(searcher);
226 		g_array_append_vals(occurences, searcher->first, len);
227             }
228             first_time = FALSE;
229         }
230     }
231     patterns_destroy(pat);
232 
233     if (occurences->len == 0) { /* no pattern found */
234         result = FALSE;
235     } else {
236         searcher->is_allocated = TRUE;
237         searcher->allocated_data = (SaryInt *)occurences->data;
238         assign_range(searcher, searcher->allocated_data, occurences->len);
239         result = TRUE;
240     }
241     g_array_free(occurences, FALSE); /* don't free the data */
242     return result;
243 }
244 
245 gboolean
sary_searcher_isearch(SarySearcher * searcher,const gchar * pattern,SaryInt len)246 sary_searcher_isearch (SarySearcher *searcher,
247                        const gchar *pattern,
248                        SaryInt len)
249 {
250     SaryInt offset, range;
251     gboolean result;
252 
253     g_assert(searcher->pattern.skip <= len &&
254 	     searcher->is_sorted == FALSE);
255 
256     if (searcher->pattern.skip == 0) { /* the first time */
257 	init_searcher_states(searcher, FALSE);
258 	offset = 0;
259 	range  = searcher->len;
260     } else {
261 	offset = (gconstpointer)searcher->first - searcher->array->map;
262 	range  = sary_searcher_count_occurrences(searcher);
263     }
264 
265     /*
266      * Search the range of the previous search results.
267      * Don't use sary_searcher_sort_occurrences together.
268      */
269     result = searcher->search(searcher, pattern, len, offset, range);
270     searcher->pattern.skip = len;
271     return result;
272 }
273 
274 gboolean
sary_searcher_icase_search(SarySearcher * searcher,const gchar * pattern)275 sary_searcher_icase_search (SarySearcher *searcher, const gchar *pattern)
276 {
277     return sary_searcher_icase_search2(searcher, pattern, strlen(pattern));
278 }
279 
280 gboolean
sary_searcher_icase_search2(SarySearcher * searcher,const gchar * pattern,SaryInt len)281 sary_searcher_icase_search2 (SarySearcher *searcher,
282                              const gchar *pattern,
283                              SaryInt len)
284 {
285     gboolean result;
286     GArray *occurences;
287     gchar *tmppat;
288 
289     g_assert(len >= 0);
290     init_searcher_states(searcher, FALSE);
291 
292     if (len == 0) { /* match all occurrences */
293 	return sary_searcher_isearch(searcher, pattern, len);
294     }
295 
296     tmppat = g_new(gchar, len);  /* for modifications in icase_search. */
297     g_memmove(tmppat, pattern, len);
298 
299     occurences = g_array_new(FALSE, FALSE, sizeof(SaryInt));
300     occurences = icase_search(searcher, tmppat, len, 0, occurences);
301 
302     if (occurences->len == 0) { /* not found */
303 	result = FALSE;
304     } else {
305 	searcher->is_allocated   = TRUE;
306 	searcher->allocated_data = (SaryInt *)occurences->data;
307 	assign_range(searcher, searcher->allocated_data, occurences->len);
308 	result = TRUE;
309     }
310 
311     g_free(tmppat);
312     g_array_free(occurences, FALSE); /* don't free the data */
313 
314     return result;
315 }
316 
317 void
sary_searcher_isearch_reset(SarySearcher * searcher)318 sary_searcher_isearch_reset (SarySearcher *searcher)
319 {
320     searcher->pattern.skip = 0;
321 }
322 
323 SaryText *
sary_searcher_get_text(SarySearcher * searcher)324 sary_searcher_get_text (SarySearcher *searcher)
325 {
326     return searcher->text;
327 }
328 
329 SaryMmap *
sary_searcher_get_array(SarySearcher * searcher)330 sary_searcher_get_array (SarySearcher *searcher)
331 {
332     return searcher->array;
333 }
334 
335 gchar *
sary_searcher_get_next_line(SarySearcher * searcher)336 sary_searcher_get_next_line (SarySearcher *searcher)
337 {
338     /*
339      * This function is only a special case of
340      * sary_searcher_get_next_context_lines().  If
341      * sary_searcher_sort_occurrences() is used and the patttern
342      * occurs more than once in the same line, duplicated
343      * lines will never returned because of the work of
344      * join_subsequent_lines().
345      */
346     return sary_searcher_get_next_context_lines(searcher, 0, 0);
347 }
348 
349 gchar *
sary_searcher_get_next_line2(SarySearcher * searcher,SaryInt * len)350 sary_searcher_get_next_line2 (SarySearcher *searcher, SaryInt *len)
351 {
352     return sary_searcher_get_next_context_lines2(searcher, 0, 0, len);
353 }
354 
355 /*
356  * Act like GNU grep -A -B -C. Subsequent lines are joined
357  * not to print duplicated lines if occurrences are sorted.
358  */
359 
360 gchar *
sary_searcher_get_next_context_lines(SarySearcher * searcher,SaryInt backward,SaryInt forward)361 sary_searcher_get_next_context_lines (SarySearcher *searcher,
362 			       SaryInt backward,
363 			       SaryInt forward)
364 {
365     gchar *head, *eof;
366     SaryInt len;
367 
368     eof  = sary_text_get_eof(searcher->text);
369     head = sary_searcher_get_next_context_lines2(searcher, backward,
370                                                  forward, &len);
371 
372     return get_region(head, eof, len);
373 }
374 
375 /*
376  * Act like GNU grep -A -B -C. Subsequent lines are joined
377  * not to print duplicated lines if occurrences are sorted.
378  */
379 
380 gchar *
sary_searcher_get_next_context_lines2(SarySearcher * searcher,SaryInt backward,SaryInt forward,SaryInt * len)381 sary_searcher_get_next_context_lines2 (SarySearcher *searcher,
382 				SaryInt backward,
383 				SaryInt forward,
384 				SaryInt *len)
385 {
386     Seeker seeker;
387     g_assert(backward >= 0 && forward >=0);
388 
389     seeker.seek_backward = seek_lines_backward;
390     seeker.seek_forward  = seek_lines_forward;
391     seeker.backward_data = &backward;
392     seeker.forward_data  = &forward;
393 
394     return get_next_region(searcher, &seeker, len);
395 }
396 
397 gchar *
sary_searcher_get_next_tagged_region(SarySearcher * searcher,const gchar * start_tag,const gchar * end_tag)398 sary_searcher_get_next_tagged_region (SarySearcher *searcher,
399 			       const gchar *start_tag,
400 			       const gchar *end_tag)
401 {
402     gchar *head, *eof;
403     SaryInt len;
404     SaryInt start_tag_len, end_tag_len;
405 
406     start_tag_len = strlen(start_tag);
407     end_tag_len   = strlen(end_tag);
408 
409     eof  = sary_text_get_eof(searcher->text);
410     head = sary_searcher_get_next_tagged_region2(searcher,
411                                                  start_tag, start_tag_len,
412                                                  end_tag, end_tag_len, &len);
413     return get_region(head, eof, len);
414 }
415 
416 gchar *
sary_searcher_get_next_tagged_region2(SarySearcher * searcher,const gchar * start_tag,SaryInt start_tag_len,const gchar * end_tag,SaryInt end_tag_len,SaryInt * len)417 sary_searcher_get_next_tagged_region2 (SarySearcher *searcher,
418                                        const gchar *start_tag,
419                                        SaryInt start_tag_len,
420                                        const gchar *end_tag,
421                                        SaryInt end_tag_len,
422                                        SaryInt *len)
423 {
424     Seeker seeker;
425     Tag start, end;
426 
427     g_assert(start_tag != NULL && end_tag != NULL);
428     g_assert(start_tag_len >= 0 && end_tag_len >= 0);
429 
430     start.str = start_tag;
431     start.len = start_tag_len;
432     end.str   = end_tag;
433     end.len   = end_tag_len;
434 
435     seeker.seek_backward = seek_tag_backward;
436     seeker.seek_forward  = seek_tag_forward;
437     seeker.backward_data = &start;
438     seeker.forward_data  = &end;
439 
440     return get_next_region(searcher, &seeker, len);
441 }
442 
443 /*
444  * Return the text object whose cursor points to the
445  * position of the occurrence in the text. This function is
446  * useful for low-level text processing with sary_text_*
447  * functions.
448  */
449 
450 SaryText *
sary_searcher_get_next_occurrence(SarySearcher * searcher)451 sary_searcher_get_next_occurrence (SarySearcher *searcher)
452 {
453     gchar *occurrence;
454 
455     if (searcher->cursor > searcher->last) {
456 	return NULL;
457     }
458 
459     occurrence = sary_i_text(searcher->text, searcher->cursor);
460     sary_text_set_cursor(searcher->text, occurrence);
461     searcher->cursor++;
462 
463     return searcher->text;
464 }
465 
466 SaryInt
sary_searcher_get_next_position(SarySearcher * searcher)467 sary_searcher_get_next_position (SarySearcher *searcher)
468 {
469     SaryInt position;
470 
471     if (searcher->cursor > searcher->last) {
472         return -1;
473     }
474 
475     position =  GINT_FROM_BE(*(searcher->cursor));
476     searcher->cursor++;
477     return position;
478 }
479 
480 SaryInt
sary_searcher_count_occurrences(SarySearcher * searcher)481 sary_searcher_count_occurrences (SarySearcher *searcher)
482 {
483     return searcher->last - searcher->first + 1;
484 }
485 
486 void
sary_searcher_sort_occurrences(SarySearcher * searcher)487 sary_searcher_sort_occurrences (SarySearcher *searcher)
488 {
489     SaryInt len;
490 
491     len = sary_searcher_count_occurrences(searcher);
492 
493     if (searcher->is_allocated == FALSE) {
494 	searcher->allocated_data = g_new(SaryInt, len);
495 	g_memmove(searcher->allocated_data,
496 		  searcher->first, len * sizeof(SaryInt));
497 	searcher->is_allocated = TRUE;
498     }
499 
500     qsort(searcher->allocated_data, len, sizeof(SaryInt), qsortcmp);
501     assign_range(searcher, searcher->allocated_data, len);
502     searcher->is_sorted = TRUE;
503 }
504 
505 void
sary_searcher_enable_cache(SarySearcher * searcher)506 sary_searcher_enable_cache (SarySearcher *searcher)
507 {
508     searcher->cache  = sary_cache_new();
509     searcher->search = cache_search;
510 }
511 
512 static gchar *
peek_next_occurrence(SarySearcher * searcher)513 peek_next_occurrence (SarySearcher *searcher)
514 {
515     gchar *occurrence;
516 
517     if (searcher->cursor > searcher->last) {
518 	return NULL;
519     }
520 
521     occurrence = sary_i_text(searcher->text, searcher->cursor);
522     return occurrence;
523 }
524 
525 static void
init_searcher_states(SarySearcher * searcher,gboolean first_time)526 init_searcher_states (SarySearcher *searcher, gboolean first_time)
527 {
528     if (!first_time) {
529 	g_free(searcher->allocated_data);
530     }
531     searcher->allocated_data = NULL;
532     searcher->is_allocated   = FALSE;
533     searcher->is_sorted      = FALSE;
534     searcher->first     = NULL;
535     searcher->last      = NULL;
536     searcher->cursor    = NULL;
537     searcher->pattern.skip = 0;
538 }
539 
540 static gboolean
search(SarySearcher * searcher,const gchar * pattern,SaryInt len,SaryInt offset,SaryInt range)541 search (SarySearcher *searcher,
542 	const gchar *pattern,
543 	SaryInt len,
544 	SaryInt offset,
545 	SaryInt range)
546 {
547     SaryInt *first, *last;
548     SaryInt next_low, next_high;
549 
550     g_assert(len >= 0);
551 
552     if (searcher->array->map == NULL) {  /* 0-length (empty) file */
553 	return FALSE;
554     }
555 
556     searcher->pattern.str = (gchar *)pattern;
557     searcher->pattern.len = len;
558 
559     first = (SaryInt *)sary_bsearch_first(searcher,
560 					  searcher->array->map + offset,
561 					  range, sizeof(SaryInt),
562 					  &next_low, &next_high,
563 					  bsearchcmp);
564     if (first == NULL) {
565 	return FALSE;
566     }
567 
568     last  = (SaryInt *)sary_bsearch_last(searcher,
569 					 searcher->array->map + offset,
570 					 range, sizeof(SaryInt),
571 					 next_low, next_high,
572 					 bsearchcmp);
573     g_assert(last != NULL);
574 
575     searcher->first   = first;
576     searcher->last    = last;
577     searcher->cursor  = first;
578 
579     return TRUE;
580 }
581 
582 static inline gint
bsearchcmp(gconstpointer sary_searcher_ptr,gconstpointer obj_ptr)583 bsearchcmp (gconstpointer sary_searcher_ptr, gconstpointer obj_ptr)
584 {
585     gint len1, len2, skip;
586     SarySearcher *searcher  = (SarySearcher *)sary_searcher_ptr;
587     gchar *eof  = sary_text_get_eof(searcher->text);
588     gchar *pos = sary_i_text(searcher->text, obj_ptr);
589 
590     skip = searcher->pattern.skip;
591     len1 = searcher->pattern.len - skip;
592     len2 = eof - pos - skip;
593     if (len2 < 0) {
594 	len2 = 0;
595     }
596 
597     return memcmp(searcher->pattern.str + skip, pos + skip, MIN(len1, len2));
598 }
599 
600 static inline gint
qsortcmp(gconstpointer ptr1,gconstpointer ptr2)601 qsortcmp (gconstpointer ptr1, gconstpointer ptr2)
602 {
603     SaryInt occurrence1 = GINT_FROM_BE(*(SaryInt *)ptr1);
604     SaryInt occurrence2 = GINT_FROM_BE(*(SaryInt *)ptr2);
605 
606     if (occurrence1 < occurrence2) {
607 	return -1;
608     } else if (occurrence1 == occurrence2) {
609 	return 0;
610     } else {
611 	return 1;
612     }
613 }
614 
615 static inline gint
qsortscmp(gconstpointer ptr1,gconstpointer ptr2)616 qsortscmp (gconstpointer ptr1, gconstpointer ptr2)
617 {
618     const gchar *str1 = *(const gchar **)ptr1;
619     const gchar *str2 = *(const gchar **)ptr2;
620 
621     return strcmp(str1, str2);
622 }
623 
624 static gboolean
cache_search(SarySearcher * searcher,const gchar * pattern,SaryInt len,SaryInt offset,SaryInt range)625 cache_search (SarySearcher *searcher,
626 	      const gchar *pattern,
627 	      SaryInt len,
628 	      SaryInt offset,
629 	      SaryInt range)
630 {
631     SaryResult *cache;
632 
633     if ((cache = sary_cache_get(searcher->cache, pattern, len)) != NULL) {
634 	searcher->first   = cache->first;
635 	searcher->last    = cache->last;
636 	searcher->cursor  = cache->first;
637 	return TRUE;
638     } else {
639 	gboolean result = search(searcher, pattern, len, offset, range);
640 	if (result == TRUE) {
641 	    sary_cache_add(searcher->cache,
642 			   sary_i_text(searcher->text, searcher->first), len,
643 			   searcher->first, searcher->last);
644 	}
645 	return result;
646     }
647     g_assert_not_reached();
648 }
649 
650 static GArray *
icase_search(SarySearcher * searcher,gchar * pattern,SaryInt len,SaryInt step,GArray * result)651 icase_search (SarySearcher *searcher,
652 	      gchar *pattern,
653 	      SaryInt len,
654 	      SaryInt step,
655 	      GArray *result)
656 {
657     gint cand[2], ncand; /* candidates and the number of candidates */
658     gint i;
659 
660     ncand = expand_letter(cand, (guchar)pattern[step]);
661     for (i = 0; i < ncand; i++) {
662 	SaryInt *orig_first = searcher->first;
663 	SaryInt *orig_last  = searcher->last;
664 
665 	pattern[step] = cand[i];
666 	if (sary_searcher_isearch(searcher, pattern, step + 1)) {
667 	    if (step + 1 < len) {
668 		result = icase_search(searcher, pattern,
669                                       len, step + 1, result);
670 	    } else if (step + 1 == len) {
671 		g_array_append_vals(result, searcher->first,
672 				    sary_searcher_count_occurrences(searcher));
673 	    } else {
674 		g_assert_not_reached();
675 	    }
676 	}
677 	searcher->first = orig_first;
678 	searcher->last  = orig_last;
679 	searcher->pattern.skip--;
680     }
681 
682     return result;
683 }
684 
685 static gint
expand_letter(gint * cand,gint c)686 expand_letter (gint *cand, gint c)
687 {
688     if (isalpha(c)) {
689 	/*
690 	 * To preserve lexicographical order, do toupper first.
691 	 * Assume 'A' < 'a'.
692 	 */
693 	cand[0] = toupper(c);
694 	cand[1] = tolower(c);
695 	return 2;
696     } else {
697 	cand[0] = c;
698 	return 1;
699     }
700 }
701 
702 static void
assign_range(SarySearcher * searcher,SaryInt * occurences,SaryInt len)703 assign_range (SarySearcher *searcher, SaryInt *occurences, SaryInt len)
704 {
705     searcher->first  = occurences;
706     searcher->cursor = occurences;
707     searcher->last   = occurences + len - 1;
708 }
709 
710 static gchar *
get_next_region(SarySearcher * searcher,Seeker * seeker,SaryInt * len)711 get_next_region (SarySearcher *searcher, Seeker *seeker, SaryInt *len)
712 {
713     gchar *bof, *eof, *cursor;
714     gchar *head, *tail;
715 
716     if (searcher->cursor > searcher->last) {
717 	return NULL;
718     }
719 
720     bof    = sary_text_get_bof(searcher->text);
721     eof    = sary_text_get_eof(searcher->text);
722     cursor = sary_i_text(searcher->text, searcher->cursor);
723 
724     head   = seeker->seek_backward(cursor, bof, seeker->backward_data);
725     tail   = seeker->seek_forward(cursor, eof, seeker->forward_data);
726 
727     searcher->cursor++; /* Must be called before join_subsequent_region. */
728     if (searcher->is_sorted == TRUE) {
729 	tail = join_subsequent_region(searcher, seeker, tail);
730     }
731 
732     *len = tail - head;
733     return head;
734 }
735 
736 static gchar *
join_subsequent_region(SarySearcher * searcher,Seeker * seeker,gchar * tail)737 join_subsequent_region (SarySearcher *searcher, Seeker *seeker, gchar *tail)
738 {
739     gchar *bof = sary_text_get_bof(searcher->text);
740     gchar *eof = sary_text_get_eof(searcher->text);
741 
742     do {
743 	gchar *next, *next_head;
744 
745 	next = peek_next_occurrence(searcher);
746 	if (next == NULL) {
747 	    break;
748 	}
749 	next_head = seeker->seek_backward(next, bof, seeker->backward_data);
750 	if (next_head < tail) {
751 	    sary_searcher_get_next_occurrence(searcher);  /* skip */
752 	    tail = seeker->seek_forward(next, eof, seeker->forward_data);
753 	} else {
754 	    break;
755 	}
756     } while (1);
757 
758 
759     return tail;
760 }
761 
762 static gchar *
get_region(const gchar * head,const gchar * eof,SaryInt len)763 get_region (const gchar *head, const gchar *eof, SaryInt len)
764 {
765     if (head == NULL) {
766 	return NULL;
767     } else {
768 	return sary_str_get_region(head, eof, len);
769     }
770 }
771 
772 static gchar *
seek_lines_backward(const gchar * cursor,const gchar * bof,gconstpointer n_ptr)773 seek_lines_backward (const gchar *cursor,
774 		     const gchar *bof,
775 		     gconstpointer n_ptr)
776 {
777     SaryInt n = *(gint *)n_ptr;
778     return sary_str_seek_lines_backward(cursor, bof, n);
779 }
780 
781 static gchar *
seek_lines_forward(const gchar * cursor,const gchar * eof,gconstpointer n_ptr)782 seek_lines_forward (const gchar *cursor,
783 		    const gchar *eof,
784 		    gconstpointer n_ptr)
785 {
786     SaryInt n = *(gint *)n_ptr;
787     return sary_str_seek_lines_forward(cursor, eof, n);
788 }
789 
790 static gchar *
seek_tag_backward(const gchar * cursor,const gchar * bof,gconstpointer tag_ptr)791 seek_tag_backward (const gchar *cursor,
792 		   const gchar *bof,
793 		   gconstpointer tag_ptr)
794 {
795     Tag *tag = (Tag *)tag_ptr;
796     return sary_str_seek_pattern_backward2(cursor, bof, tag->str, tag->len);
797 }
798 
799 static gchar *
seek_tag_forward(const gchar * cursor,const gchar * eof,gconstpointer tag_ptr)800 seek_tag_forward (const gchar *cursor,
801 		  const gchar *eof,
802 		  gconstpointer tag_ptr)
803 {
804     Tag *tag = (Tag *)tag_ptr;
805     return sary_str_seek_pattern_forward2(cursor, eof, tag->str, tag->len);
806 }
807 
808 
809 static Patterns *
patterns_new(gchar ** patterns,gint npatterns)810 patterns_new (gchar **patterns, gint npatterns)
811 {
812     gint i;
813     Patterns *pat = malloc(sizeof(Patterns));
814 
815     pat->patterns  = g_new(gchar *, npatterns);
816     pat->npatterns = npatterns;
817     for (i = 0; i < npatterns; i++) {
818         pat->patterns[i] = g_strdup(patterns[i]);
819     }
820     return pat;
821 }
822 
823 static void
patterns_sort(Patterns * pat)824 patterns_sort (Patterns *pat)
825 {
826     qsort(pat->patterns, pat->npatterns, sizeof(gchar *), qsortscmp);
827 }
828 
829 static void
patterns_destroy(Patterns * pat)830 patterns_destroy (Patterns *pat)
831 {
832     int i;
833     for (i = 0; i < pat->npatterns; i++) {
834         g_free(pat->patterns[i]);
835     }
836     g_free(pat);
837 }
838 
839 static gboolean
has_prev_as_prefix(const gchar * prev,const gchar * cur)840 has_prev_as_prefix (const gchar *prev, const gchar *cur)
841 {
842     if (strncmp(prev, cur, strlen(prev)) == 0) {
843         return TRUE;
844     } else {
845         return FALSE;
846     }
847 }
848 
849