1 /*
2 * sary - a suffix array library
3 *
4 * $Id: searcher.c,v 1.1.1.1 2004/06/11 18:57:27 satoru-t Exp $
5 *
6 * Copyright (C) 2000 Satoru Takabayashi <satoru@namazu.org>
7 * All rights reserved.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Library General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Library General Public License for more details.
18 *
19 * You should have received a copy of the GNU Library General Public
20 * License along with this library; if not, write to the
21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
22 * Boston, MA 02111-1307, USA.
23 */
24
25 #include "config.h"
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <glib.h>
30 #include <errno.h>
31 #include <ctype.h>
32 #include <sary.h>
33
34 /*
35 * SarySearcher stands for Suffix Array Searcher.
36 */
37
38 typedef gboolean (*SearchFunc) (SarySearcher *searcher,
39 const gchar *pattern,
40 SaryInt len,
41 SaryInt offset,
42 SaryInt range);
43 struct _SarySearcher {
44 SaryInt len; /* number of index points */
45 SaryText *text;
46 SaryMmap *array;
47 SaryInt *first;
48 SaryInt *last;
49 SaryInt *cursor;
50 SaryInt *allocated_data;
51 gboolean is_sorted;
52 gboolean is_allocated;
53 SaryPattern pattern;
54 SaryCache *cache;
55 SearchFunc search;
56 };
57
58 typedef gchar* (*SeekFunc)(const gchar *cursor,
59 const gchar *sentinel,
60 gconstpointer data);
61 typedef struct {
62 SeekFunc seek_backward;
63 SeekFunc seek_forward;
64 gconstpointer backward_data;
65 gconstpointer forward_data;
66 } Seeker;
67
68 typedef struct {
69 const gchar *str;
70 SaryInt len;
71 } Tag;
72
73 typedef struct {
74 gchar **patterns;
75 gint npatterns;
76 } Patterns;
77
78 static gchar * peek_next_occurrence (SarySearcher *searcher);
79 static void init_searcher_states (SarySearcher *searcher,
80 gboolean first_time);
81 static gboolean search (SarySearcher *searcher,
82 const gchar *pattern,
83 SaryInt len,
84 SaryInt offset,
85 SaryInt range);
86 static inline gint bsearchcmp (gconstpointer searcher_ptr,
87 gconstpointer obj_ptr);
88 static inline gint qsortcmp (gconstpointer ptr1,
89 gconstpointer ptr2);
90 static inline gint qsortscmp (gconstpointer ptr1,
91 gconstpointer ptr2);
92 static Patterns* patterns_new (gchar **patterns,
93 gint npatterns);
94 static void patterns_destroy (Patterns *pat);
95 static void patterns_sort (Patterns *pat);
96 static gboolean cache_search (SarySearcher *searcher,
97 const gchar *pattern,
98 SaryInt len,
99 SaryInt offset,
100 SaryInt range);
101 static GArray* icase_search (SarySearcher *searcher,
102 gchar *pattern,
103 SaryInt len,
104 SaryInt step,
105 GArray *result);
106 static gint expand_letter (gint *cand, gint c);
107 static void assign_range (SarySearcher *searcher,
108 SaryInt *occurences,
109 SaryInt len);
110 static gchar* get_next_region (SarySearcher *searcher,
111 Seeker *seeker,
112 SaryInt *len);
113 static gchar* join_subsequent_region (SarySearcher *searcher,
114 Seeker *seeker,
115 gchar *tail);
116 static gchar* get_region (const gchar *head,
117 const gchar *eof,
118 SaryInt len);
119 static gchar* seek_lines_backward (const gchar *cursor,
120 const gchar *bof,
121 gconstpointer n_ptr);
122 static gchar* seek_lines_forward (const gchar *cursor,
123 const gchar *eof,
124 gconstpointer n_ptr);
125 static gchar* seek_tag_backward (const gchar *cursor,
126 const gchar *eof,
127 gconstpointer tag_ptr);
128 static gchar* seek_tag_forward (const gchar *cursor,
129 const gchar *eof,
130 gconstpointer tag_ptr);
131 static gboolean has_prev_as_prefix (const gchar *prev,
132 const gchar *cur);
133
134 SarySearcher *
sary_searcher_new(const gchar * file_name)135 sary_searcher_new (const gchar *file_name)
136 {
137 SarySearcher *searcher;
138 gchar *array_name = g_strconcat(file_name, ".ary", NULL);
139
140 searcher = sary_searcher_new2(file_name, array_name);
141 g_free(array_name);
142 return searcher;
143 }
144
145 SarySearcher *
sary_searcher_new2(const gchar * file_name,const gchar * array_name)146 sary_searcher_new2 (const gchar *file_name, const gchar *array_name)
147 {
148 SarySearcher *searcher = g_new(SarySearcher, 1);
149
150 searcher->text = sary_text_new(file_name);
151 if (searcher->text == NULL) {
152 return NULL;
153 }
154
155 searcher->array = sary_mmap(array_name, "r");
156 if (searcher->array == NULL) {
157 return NULL;
158 }
159
160 searcher->len = searcher->array->len / sizeof(SaryInt);
161 searcher->search = search;
162 searcher->cache = NULL;
163
164 init_searcher_states(searcher, TRUE);
165
166 return searcher;
167 }
168
169 void
sary_searcher_destroy(SarySearcher * searcher)170 sary_searcher_destroy (SarySearcher *searcher)
171 {
172 sary_text_destroy(searcher->text);
173 sary_cache_destroy(searcher->cache);
174 sary_munmap(searcher->array);
175
176 g_free(searcher->allocated_data);
177 g_free(searcher);
178 }
179
180 gboolean
sary_searcher_search(SarySearcher * searcher,const gchar * pattern)181 sary_searcher_search (SarySearcher *searcher, const gchar *pattern)
182 {
183 return sary_searcher_search2(searcher, pattern, strlen(pattern));
184 }
185
186 gboolean
sary_searcher_search2(SarySearcher * searcher,const gchar * pattern,SaryInt len)187 sary_searcher_search2 (SarySearcher *searcher,
188 const gchar *pattern,
189 SaryInt len)
190 {
191 g_assert(searcher != NULL);
192 init_searcher_states(searcher, FALSE);
193
194 /*
195 * Search the full range of the suffix array.
196 */
197 return searcher->search(searcher, pattern, len, 0, searcher->len);
198 }
199
200 gboolean
sary_searcher_multi_search(SarySearcher * searcher,gchar ** patterns,gint npatterns)201 sary_searcher_multi_search (SarySearcher *searcher,
202 gchar **patterns,
203 gint npatterns)
204 {
205 gint i;
206 GArray *occurences = g_array_new(FALSE, FALSE, sizeof(SaryInt));
207 Patterns *pat = patterns_new(patterns, npatterns);
208 gboolean first_time = TRUE, result;
209
210 g_assert(searcher != NULL);
211 init_searcher_states(searcher, FALSE);
212
213 patterns_sort(pat);
214 for (i = 0; i < pat->npatterns; i++) {
215 /*
216 * If the previous pattern is "a" and the current
217 * one is "ab", we can skip the current one because
218 * the previous results include all results for the
219 * current one.
220 */
221 if (first_time || !has_prev_as_prefix(pat->patterns[i - 1],
222 pat->patterns[i]))
223 {
224 if (sary_searcher_search(searcher, pat->patterns[i])) {
225 SaryInt len = sary_searcher_count_occurrences(searcher);
226 g_array_append_vals(occurences, searcher->first, len);
227 }
228 first_time = FALSE;
229 }
230 }
231 patterns_destroy(pat);
232
233 if (occurences->len == 0) { /* no pattern found */
234 result = FALSE;
235 } else {
236 searcher->is_allocated = TRUE;
237 searcher->allocated_data = (SaryInt *)occurences->data;
238 assign_range(searcher, searcher->allocated_data, occurences->len);
239 result = TRUE;
240 }
241 g_array_free(occurences, FALSE); /* don't free the data */
242 return result;
243 }
244
245 gboolean
sary_searcher_isearch(SarySearcher * searcher,const gchar * pattern,SaryInt len)246 sary_searcher_isearch (SarySearcher *searcher,
247 const gchar *pattern,
248 SaryInt len)
249 {
250 SaryInt offset, range;
251 gboolean result;
252
253 g_assert(searcher->pattern.skip <= len &&
254 searcher->is_sorted == FALSE);
255
256 if (searcher->pattern.skip == 0) { /* the first time */
257 init_searcher_states(searcher, FALSE);
258 offset = 0;
259 range = searcher->len;
260 } else {
261 offset = (gconstpointer)searcher->first - searcher->array->map;
262 range = sary_searcher_count_occurrences(searcher);
263 }
264
265 /*
266 * Search the range of the previous search results.
267 * Don't use sary_searcher_sort_occurrences together.
268 */
269 result = searcher->search(searcher, pattern, len, offset, range);
270 searcher->pattern.skip = len;
271 return result;
272 }
273
274 gboolean
sary_searcher_icase_search(SarySearcher * searcher,const gchar * pattern)275 sary_searcher_icase_search (SarySearcher *searcher, const gchar *pattern)
276 {
277 return sary_searcher_icase_search2(searcher, pattern, strlen(pattern));
278 }
279
280 gboolean
sary_searcher_icase_search2(SarySearcher * searcher,const gchar * pattern,SaryInt len)281 sary_searcher_icase_search2 (SarySearcher *searcher,
282 const gchar *pattern,
283 SaryInt len)
284 {
285 gboolean result;
286 GArray *occurences;
287 gchar *tmppat;
288
289 g_assert(len >= 0);
290 init_searcher_states(searcher, FALSE);
291
292 if (len == 0) { /* match all occurrences */
293 return sary_searcher_isearch(searcher, pattern, len);
294 }
295
296 tmppat = g_new(gchar, len); /* for modifications in icase_search. */
297 g_memmove(tmppat, pattern, len);
298
299 occurences = g_array_new(FALSE, FALSE, sizeof(SaryInt));
300 occurences = icase_search(searcher, tmppat, len, 0, occurences);
301
302 if (occurences->len == 0) { /* not found */
303 result = FALSE;
304 } else {
305 searcher->is_allocated = TRUE;
306 searcher->allocated_data = (SaryInt *)occurences->data;
307 assign_range(searcher, searcher->allocated_data, occurences->len);
308 result = TRUE;
309 }
310
311 g_free(tmppat);
312 g_array_free(occurences, FALSE); /* don't free the data */
313
314 return result;
315 }
316
317 void
sary_searcher_isearch_reset(SarySearcher * searcher)318 sary_searcher_isearch_reset (SarySearcher *searcher)
319 {
320 searcher->pattern.skip = 0;
321 }
322
323 SaryText *
sary_searcher_get_text(SarySearcher * searcher)324 sary_searcher_get_text (SarySearcher *searcher)
325 {
326 return searcher->text;
327 }
328
329 SaryMmap *
sary_searcher_get_array(SarySearcher * searcher)330 sary_searcher_get_array (SarySearcher *searcher)
331 {
332 return searcher->array;
333 }
334
335 gchar *
sary_searcher_get_next_line(SarySearcher * searcher)336 sary_searcher_get_next_line (SarySearcher *searcher)
337 {
338 /*
339 * This function is only a special case of
340 * sary_searcher_get_next_context_lines(). If
341 * sary_searcher_sort_occurrences() is used and the patttern
342 * occurs more than once in the same line, duplicated
343 * lines will never returned because of the work of
344 * join_subsequent_lines().
345 */
346 return sary_searcher_get_next_context_lines(searcher, 0, 0);
347 }
348
349 gchar *
sary_searcher_get_next_line2(SarySearcher * searcher,SaryInt * len)350 sary_searcher_get_next_line2 (SarySearcher *searcher, SaryInt *len)
351 {
352 return sary_searcher_get_next_context_lines2(searcher, 0, 0, len);
353 }
354
355 /*
356 * Act like GNU grep -A -B -C. Subsequent lines are joined
357 * not to print duplicated lines if occurrences are sorted.
358 */
359
360 gchar *
sary_searcher_get_next_context_lines(SarySearcher * searcher,SaryInt backward,SaryInt forward)361 sary_searcher_get_next_context_lines (SarySearcher *searcher,
362 SaryInt backward,
363 SaryInt forward)
364 {
365 gchar *head, *eof;
366 SaryInt len;
367
368 eof = sary_text_get_eof(searcher->text);
369 head = sary_searcher_get_next_context_lines2(searcher, backward,
370 forward, &len);
371
372 return get_region(head, eof, len);
373 }
374
375 /*
376 * Act like GNU grep -A -B -C. Subsequent lines are joined
377 * not to print duplicated lines if occurrences are sorted.
378 */
379
380 gchar *
sary_searcher_get_next_context_lines2(SarySearcher * searcher,SaryInt backward,SaryInt forward,SaryInt * len)381 sary_searcher_get_next_context_lines2 (SarySearcher *searcher,
382 SaryInt backward,
383 SaryInt forward,
384 SaryInt *len)
385 {
386 Seeker seeker;
387 g_assert(backward >= 0 && forward >=0);
388
389 seeker.seek_backward = seek_lines_backward;
390 seeker.seek_forward = seek_lines_forward;
391 seeker.backward_data = &backward;
392 seeker.forward_data = &forward;
393
394 return get_next_region(searcher, &seeker, len);
395 }
396
397 gchar *
sary_searcher_get_next_tagged_region(SarySearcher * searcher,const gchar * start_tag,const gchar * end_tag)398 sary_searcher_get_next_tagged_region (SarySearcher *searcher,
399 const gchar *start_tag,
400 const gchar *end_tag)
401 {
402 gchar *head, *eof;
403 SaryInt len;
404 SaryInt start_tag_len, end_tag_len;
405
406 start_tag_len = strlen(start_tag);
407 end_tag_len = strlen(end_tag);
408
409 eof = sary_text_get_eof(searcher->text);
410 head = sary_searcher_get_next_tagged_region2(searcher,
411 start_tag, start_tag_len,
412 end_tag, end_tag_len, &len);
413 return get_region(head, eof, len);
414 }
415
416 gchar *
sary_searcher_get_next_tagged_region2(SarySearcher * searcher,const gchar * start_tag,SaryInt start_tag_len,const gchar * end_tag,SaryInt end_tag_len,SaryInt * len)417 sary_searcher_get_next_tagged_region2 (SarySearcher *searcher,
418 const gchar *start_tag,
419 SaryInt start_tag_len,
420 const gchar *end_tag,
421 SaryInt end_tag_len,
422 SaryInt *len)
423 {
424 Seeker seeker;
425 Tag start, end;
426
427 g_assert(start_tag != NULL && end_tag != NULL);
428 g_assert(start_tag_len >= 0 && end_tag_len >= 0);
429
430 start.str = start_tag;
431 start.len = start_tag_len;
432 end.str = end_tag;
433 end.len = end_tag_len;
434
435 seeker.seek_backward = seek_tag_backward;
436 seeker.seek_forward = seek_tag_forward;
437 seeker.backward_data = &start;
438 seeker.forward_data = &end;
439
440 return get_next_region(searcher, &seeker, len);
441 }
442
443 /*
444 * Return the text object whose cursor points to the
445 * position of the occurrence in the text. This function is
446 * useful for low-level text processing with sary_text_*
447 * functions.
448 */
449
450 SaryText *
sary_searcher_get_next_occurrence(SarySearcher * searcher)451 sary_searcher_get_next_occurrence (SarySearcher *searcher)
452 {
453 gchar *occurrence;
454
455 if (searcher->cursor > searcher->last) {
456 return NULL;
457 }
458
459 occurrence = sary_i_text(searcher->text, searcher->cursor);
460 sary_text_set_cursor(searcher->text, occurrence);
461 searcher->cursor++;
462
463 return searcher->text;
464 }
465
466 SaryInt
sary_searcher_get_next_position(SarySearcher * searcher)467 sary_searcher_get_next_position (SarySearcher *searcher)
468 {
469 SaryInt position;
470
471 if (searcher->cursor > searcher->last) {
472 return -1;
473 }
474
475 position = GINT_FROM_BE(*(searcher->cursor));
476 searcher->cursor++;
477 return position;
478 }
479
480 SaryInt
sary_searcher_count_occurrences(SarySearcher * searcher)481 sary_searcher_count_occurrences (SarySearcher *searcher)
482 {
483 return searcher->last - searcher->first + 1;
484 }
485
486 void
sary_searcher_sort_occurrences(SarySearcher * searcher)487 sary_searcher_sort_occurrences (SarySearcher *searcher)
488 {
489 SaryInt len;
490
491 len = sary_searcher_count_occurrences(searcher);
492
493 if (searcher->is_allocated == FALSE) {
494 searcher->allocated_data = g_new(SaryInt, len);
495 g_memmove(searcher->allocated_data,
496 searcher->first, len * sizeof(SaryInt));
497 searcher->is_allocated = TRUE;
498 }
499
500 qsort(searcher->allocated_data, len, sizeof(SaryInt), qsortcmp);
501 assign_range(searcher, searcher->allocated_data, len);
502 searcher->is_sorted = TRUE;
503 }
504
505 void
sary_searcher_enable_cache(SarySearcher * searcher)506 sary_searcher_enable_cache (SarySearcher *searcher)
507 {
508 searcher->cache = sary_cache_new();
509 searcher->search = cache_search;
510 }
511
512 static gchar *
peek_next_occurrence(SarySearcher * searcher)513 peek_next_occurrence (SarySearcher *searcher)
514 {
515 gchar *occurrence;
516
517 if (searcher->cursor > searcher->last) {
518 return NULL;
519 }
520
521 occurrence = sary_i_text(searcher->text, searcher->cursor);
522 return occurrence;
523 }
524
525 static void
init_searcher_states(SarySearcher * searcher,gboolean first_time)526 init_searcher_states (SarySearcher *searcher, gboolean first_time)
527 {
528 if (!first_time) {
529 g_free(searcher->allocated_data);
530 }
531 searcher->allocated_data = NULL;
532 searcher->is_allocated = FALSE;
533 searcher->is_sorted = FALSE;
534 searcher->first = NULL;
535 searcher->last = NULL;
536 searcher->cursor = NULL;
537 searcher->pattern.skip = 0;
538 }
539
540 static gboolean
search(SarySearcher * searcher,const gchar * pattern,SaryInt len,SaryInt offset,SaryInt range)541 search (SarySearcher *searcher,
542 const gchar *pattern,
543 SaryInt len,
544 SaryInt offset,
545 SaryInt range)
546 {
547 SaryInt *first, *last;
548 SaryInt next_low, next_high;
549
550 g_assert(len >= 0);
551
552 if (searcher->array->map == NULL) { /* 0-length (empty) file */
553 return FALSE;
554 }
555
556 searcher->pattern.str = (gchar *)pattern;
557 searcher->pattern.len = len;
558
559 first = (SaryInt *)sary_bsearch_first(searcher,
560 searcher->array->map + offset,
561 range, sizeof(SaryInt),
562 &next_low, &next_high,
563 bsearchcmp);
564 if (first == NULL) {
565 return FALSE;
566 }
567
568 last = (SaryInt *)sary_bsearch_last(searcher,
569 searcher->array->map + offset,
570 range, sizeof(SaryInt),
571 next_low, next_high,
572 bsearchcmp);
573 g_assert(last != NULL);
574
575 searcher->first = first;
576 searcher->last = last;
577 searcher->cursor = first;
578
579 return TRUE;
580 }
581
582 static inline gint
bsearchcmp(gconstpointer sary_searcher_ptr,gconstpointer obj_ptr)583 bsearchcmp (gconstpointer sary_searcher_ptr, gconstpointer obj_ptr)
584 {
585 gint len1, len2, skip;
586 SarySearcher *searcher = (SarySearcher *)sary_searcher_ptr;
587 gchar *eof = sary_text_get_eof(searcher->text);
588 gchar *pos = sary_i_text(searcher->text, obj_ptr);
589
590 skip = searcher->pattern.skip;
591 len1 = searcher->pattern.len - skip;
592 len2 = eof - pos - skip;
593 if (len2 < 0) {
594 len2 = 0;
595 }
596
597 return memcmp(searcher->pattern.str + skip, pos + skip, MIN(len1, len2));
598 }
599
600 static inline gint
qsortcmp(gconstpointer ptr1,gconstpointer ptr2)601 qsortcmp (gconstpointer ptr1, gconstpointer ptr2)
602 {
603 SaryInt occurrence1 = GINT_FROM_BE(*(SaryInt *)ptr1);
604 SaryInt occurrence2 = GINT_FROM_BE(*(SaryInt *)ptr2);
605
606 if (occurrence1 < occurrence2) {
607 return -1;
608 } else if (occurrence1 == occurrence2) {
609 return 0;
610 } else {
611 return 1;
612 }
613 }
614
615 static inline gint
qsortscmp(gconstpointer ptr1,gconstpointer ptr2)616 qsortscmp (gconstpointer ptr1, gconstpointer ptr2)
617 {
618 const gchar *str1 = *(const gchar **)ptr1;
619 const gchar *str2 = *(const gchar **)ptr2;
620
621 return strcmp(str1, str2);
622 }
623
624 static gboolean
cache_search(SarySearcher * searcher,const gchar * pattern,SaryInt len,SaryInt offset,SaryInt range)625 cache_search (SarySearcher *searcher,
626 const gchar *pattern,
627 SaryInt len,
628 SaryInt offset,
629 SaryInt range)
630 {
631 SaryResult *cache;
632
633 if ((cache = sary_cache_get(searcher->cache, pattern, len)) != NULL) {
634 searcher->first = cache->first;
635 searcher->last = cache->last;
636 searcher->cursor = cache->first;
637 return TRUE;
638 } else {
639 gboolean result = search(searcher, pattern, len, offset, range);
640 if (result == TRUE) {
641 sary_cache_add(searcher->cache,
642 sary_i_text(searcher->text, searcher->first), len,
643 searcher->first, searcher->last);
644 }
645 return result;
646 }
647 g_assert_not_reached();
648 }
649
650 static GArray *
icase_search(SarySearcher * searcher,gchar * pattern,SaryInt len,SaryInt step,GArray * result)651 icase_search (SarySearcher *searcher,
652 gchar *pattern,
653 SaryInt len,
654 SaryInt step,
655 GArray *result)
656 {
657 gint cand[2], ncand; /* candidates and the number of candidates */
658 gint i;
659
660 ncand = expand_letter(cand, (guchar)pattern[step]);
661 for (i = 0; i < ncand; i++) {
662 SaryInt *orig_first = searcher->first;
663 SaryInt *orig_last = searcher->last;
664
665 pattern[step] = cand[i];
666 if (sary_searcher_isearch(searcher, pattern, step + 1)) {
667 if (step + 1 < len) {
668 result = icase_search(searcher, pattern,
669 len, step + 1, result);
670 } else if (step + 1 == len) {
671 g_array_append_vals(result, searcher->first,
672 sary_searcher_count_occurrences(searcher));
673 } else {
674 g_assert_not_reached();
675 }
676 }
677 searcher->first = orig_first;
678 searcher->last = orig_last;
679 searcher->pattern.skip--;
680 }
681
682 return result;
683 }
684
685 static gint
expand_letter(gint * cand,gint c)686 expand_letter (gint *cand, gint c)
687 {
688 if (isalpha(c)) {
689 /*
690 * To preserve lexicographical order, do toupper first.
691 * Assume 'A' < 'a'.
692 */
693 cand[0] = toupper(c);
694 cand[1] = tolower(c);
695 return 2;
696 } else {
697 cand[0] = c;
698 return 1;
699 }
700 }
701
702 static void
assign_range(SarySearcher * searcher,SaryInt * occurences,SaryInt len)703 assign_range (SarySearcher *searcher, SaryInt *occurences, SaryInt len)
704 {
705 searcher->first = occurences;
706 searcher->cursor = occurences;
707 searcher->last = occurences + len - 1;
708 }
709
710 static gchar *
get_next_region(SarySearcher * searcher,Seeker * seeker,SaryInt * len)711 get_next_region (SarySearcher *searcher, Seeker *seeker, SaryInt *len)
712 {
713 gchar *bof, *eof, *cursor;
714 gchar *head, *tail;
715
716 if (searcher->cursor > searcher->last) {
717 return NULL;
718 }
719
720 bof = sary_text_get_bof(searcher->text);
721 eof = sary_text_get_eof(searcher->text);
722 cursor = sary_i_text(searcher->text, searcher->cursor);
723
724 head = seeker->seek_backward(cursor, bof, seeker->backward_data);
725 tail = seeker->seek_forward(cursor, eof, seeker->forward_data);
726
727 searcher->cursor++; /* Must be called before join_subsequent_region. */
728 if (searcher->is_sorted == TRUE) {
729 tail = join_subsequent_region(searcher, seeker, tail);
730 }
731
732 *len = tail - head;
733 return head;
734 }
735
736 static gchar *
join_subsequent_region(SarySearcher * searcher,Seeker * seeker,gchar * tail)737 join_subsequent_region (SarySearcher *searcher, Seeker *seeker, gchar *tail)
738 {
739 gchar *bof = sary_text_get_bof(searcher->text);
740 gchar *eof = sary_text_get_eof(searcher->text);
741
742 do {
743 gchar *next, *next_head;
744
745 next = peek_next_occurrence(searcher);
746 if (next == NULL) {
747 break;
748 }
749 next_head = seeker->seek_backward(next, bof, seeker->backward_data);
750 if (next_head < tail) {
751 sary_searcher_get_next_occurrence(searcher); /* skip */
752 tail = seeker->seek_forward(next, eof, seeker->forward_data);
753 } else {
754 break;
755 }
756 } while (1);
757
758
759 return tail;
760 }
761
762 static gchar *
get_region(const gchar * head,const gchar * eof,SaryInt len)763 get_region (const gchar *head, const gchar *eof, SaryInt len)
764 {
765 if (head == NULL) {
766 return NULL;
767 } else {
768 return sary_str_get_region(head, eof, len);
769 }
770 }
771
772 static gchar *
seek_lines_backward(const gchar * cursor,const gchar * bof,gconstpointer n_ptr)773 seek_lines_backward (const gchar *cursor,
774 const gchar *bof,
775 gconstpointer n_ptr)
776 {
777 SaryInt n = *(gint *)n_ptr;
778 return sary_str_seek_lines_backward(cursor, bof, n);
779 }
780
781 static gchar *
seek_lines_forward(const gchar * cursor,const gchar * eof,gconstpointer n_ptr)782 seek_lines_forward (const gchar *cursor,
783 const gchar *eof,
784 gconstpointer n_ptr)
785 {
786 SaryInt n = *(gint *)n_ptr;
787 return sary_str_seek_lines_forward(cursor, eof, n);
788 }
789
790 static gchar *
seek_tag_backward(const gchar * cursor,const gchar * bof,gconstpointer tag_ptr)791 seek_tag_backward (const gchar *cursor,
792 const gchar *bof,
793 gconstpointer tag_ptr)
794 {
795 Tag *tag = (Tag *)tag_ptr;
796 return sary_str_seek_pattern_backward2(cursor, bof, tag->str, tag->len);
797 }
798
799 static gchar *
seek_tag_forward(const gchar * cursor,const gchar * eof,gconstpointer tag_ptr)800 seek_tag_forward (const gchar *cursor,
801 const gchar *eof,
802 gconstpointer tag_ptr)
803 {
804 Tag *tag = (Tag *)tag_ptr;
805 return sary_str_seek_pattern_forward2(cursor, eof, tag->str, tag->len);
806 }
807
808
809 static Patterns *
patterns_new(gchar ** patterns,gint npatterns)810 patterns_new (gchar **patterns, gint npatterns)
811 {
812 gint i;
813 Patterns *pat = malloc(sizeof(Patterns));
814
815 pat->patterns = g_new(gchar *, npatterns);
816 pat->npatterns = npatterns;
817 for (i = 0; i < npatterns; i++) {
818 pat->patterns[i] = g_strdup(patterns[i]);
819 }
820 return pat;
821 }
822
823 static void
patterns_sort(Patterns * pat)824 patterns_sort (Patterns *pat)
825 {
826 qsort(pat->patterns, pat->npatterns, sizeof(gchar *), qsortscmp);
827 }
828
829 static void
patterns_destroy(Patterns * pat)830 patterns_destroy (Patterns *pat)
831 {
832 int i;
833 for (i = 0; i < pat->npatterns; i++) {
834 g_free(pat->patterns[i]);
835 }
836 g_free(pat);
837 }
838
839 static gboolean
has_prev_as_prefix(const gchar * prev,const gchar * cur)840 has_prev_as_prefix (const gchar *prev, const gchar *cur)
841 {
842 if (strncmp(prev, cur, strlen(prev)) == 0) {
843 return TRUE;
844 } else {
845 return FALSE;
846 }
847 }
848
849