1 /* Copyright(C) 2004-2007 Brazil
2 
3   This library is free software; you can redistribute it and/or
4   modify it under the terms of the GNU Lesser General Public
5   License as published by the Free Software Foundation; either
6   version 2.1 of the License, or (at your option) any later version.
7 
8   This library is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11   Lesser General Public License for more details.
12 
13   You should have received a copy of the GNU Lesser General Public
14   License along with this library; if not, write to the Free Software
15   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 #include "senna_in.h"
18 #include <stdio.h>
19 #include <string.h>
20 #include "sym.h"
21 #include "inv.h"
22 #include "str.h"
23 #include "set.h"
24 #include "lex.h"
25 #include "cache.h"
26 #include "store.h"
27 #include "ctx.h"
28 #include "com.h"
29 
30 /* sen_index */
31 
32 inline static int
build_flags(int flags)33 build_flags(int flags)
34 {
35   if (flags & SEN_INDEX_ENABLE_SUFFIX_SEARCH) {
36     return flags | SEN_SYM_WITH_SIS;
37   } else if (flags & SEN_INDEX_DISABLE_SUFFIX_SEARCH) {
38     return flags & ~SEN_SYM_WITH_SIS;
39   } else {   /* default */
40     switch (flags & SEN_INDEX_TOKENIZER_MASK) {
41     case SEN_INDEX_MORPH_ANALYSE :
42       return flags | SEN_SYM_WITH_SIS;
43     case SEN_INDEX_NGRAM :
44       return flags & ~SEN_SYM_WITH_SIS;
45     case SEN_INDEX_DELIMITED :
46       return flags & ~SEN_SYM_WITH_SIS;
47     default :
48       return flags & ~SEN_SYM_WITH_SIS;
49     }
50   }
51 }
52 
53 void
sen_index_expire(void)54 sen_index_expire(void)
55 {
56   sen_inv_expire();
57 }
58 
59 sen_index *
sen_index_create(const char * path,int key_size,int flags,int initial_n_segments,sen_encoding encoding)60 sen_index_create(const char *path, int key_size,
61                  int flags, int initial_n_segments, sen_encoding encoding)
62 {
63   sen_index *i;
64   char buffer[PATH_MAX];
65   if (!path) { SEN_LOG(sen_log_warning, "sen_index_create: invalid argument"); return NULL; }
66   if (initial_n_segments == 0) { initial_n_segments = SENNA_DEFAULT_INITIAL_N_SEGMENTS; }
67   if (encoding == sen_enc_default) { encoding = sen_gctx.encoding; }
68   if (strlen(path) > PATH_MAX - 4) {
69     SEN_LOG(sen_log_warning, "sen_index_create: too long index path (%s)", path);
70     return NULL;
71   }
72   if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
73   SEN_LOG(sen_log_notice, "creating '%s' encoding=%s initial_n_segments=%d",
74           path, sen_enctostr(encoding), initial_n_segments);
75   strcpy(buffer, path);
76   strcat(buffer, ".SEN");
77   i->foreign_flags = 0;
78   if ((i->keys = sen_sym_create(buffer, key_size, (flags & 0x70000), sen_enc_none))) {
79     strcpy(buffer, path);
80     strcat(buffer, ".SEN.l");
81     if ((i->lexicon = sen_sym_create(buffer, 0, build_flags(flags), encoding))) {
82       strcpy(buffer, path);
83       strcat(buffer, ".SEN.i");
84       if ((i->inv = sen_inv_create(buffer, i->lexicon, initial_n_segments))) {
85         if ((flags & SEN_INDEX_WITH_VGRAM)) {
86           strcpy(buffer, path);
87           strcat(buffer, ".SEN.v");
88           i->vgram= sen_vgram_create(buffer);
89         } else {
90           i->vgram = NULL;
91         }
92         if (!(flags & SEN_INDEX_WITH_VGRAM) || i->vgram) {
93           SEN_LOG(sen_log_notice, "index created (%s) flags=%x", path, i->lexicon->flags);
94           return i;
95         }
96         sen_inv_close(i->inv);
97       }
98       sen_sym_close(i->lexicon);
99     }
100     sen_sym_close(i->keys);
101   }
102   SEN_GFREE(i);
103   return NULL;
104 }
105 
106 sen_index *
sen_index_open(const char * path)107 sen_index_open(const char *path)
108 {
109   sen_index *i;
110   char buffer[PATH_MAX];
111   if (!path) { SEN_LOG(sen_log_warning, "sen_index_open: invalid argument"); return NULL; }
112   if (strlen(path) > PATH_MAX - 4) {
113     SEN_LOG(sen_log_warning, "sen_index_open: too long index path (%s)", path);
114     return NULL;
115   }
116   if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
117   strcpy(buffer, path);
118   strcat(buffer, ".SEN");
119   i->foreign_flags = 0;
120   if ((i->keys = sen_sym_open(buffer))) {
121     strcpy(buffer, path);
122     strcat(buffer, ".SEN.l");
123     if ((i->lexicon = sen_sym_open(buffer))) {
124       strcpy(buffer, path);
125       strcat(buffer, ".SEN.i");
126       if ((i->inv = sen_inv_open(buffer, i->lexicon))) {
127         if ((i->lexicon->flags & SEN_INDEX_WITH_VGRAM)) {
128           strcpy(buffer, path);
129           strcat(buffer, ".SEN.v");
130           i->vgram = sen_vgram_open(buffer);
131         } else {
132           i->vgram = NULL;
133         }
134         if (!(i->lexicon->flags & SEN_INDEX_WITH_VGRAM) || i->vgram) {
135           SEN_LOG(sen_log_notice, "index opened (%p:%s) flags=%x", i, path, i->lexicon->flags);
136           return i;
137         }
138         sen_inv_close(i->inv);
139       }
140       sen_sym_close(i->lexicon);
141     }
142     sen_sym_close(i->keys);
143   }
144   SEN_GFREE(i);
145   return NULL;
146 }
147 
148 #define FOREIGN_KEY     1
149 #define FOREIGN_LEXICON 2
150 
151 sen_index *
sen_index_create_with_keys(const char * path,sen_sym * keys,int flags,int initial_n_segments,sen_encoding encoding)152 sen_index_create_with_keys(const char *path, sen_sym *keys,
153                            int flags, int initial_n_segments, sen_encoding encoding)
154 {
155   sen_index *i;
156   char buffer[PATH_MAX];
157   if (!path || !keys) {
158     SEN_LOG(sen_log_warning, "sen_index_create_with_keys: invalid argument");
159     return NULL;
160   }
161   if (initial_n_segments == 0) { initial_n_segments = SENNA_DEFAULT_INITIAL_N_SEGMENTS; }
162   if (encoding == sen_enc_default) { encoding = sen_gctx.encoding; }
163   if (strlen(path) > PATH_MAX - 4) {
164     SEN_LOG(sen_log_warning, "too long index path (%s)", path);
165     return NULL;
166   }
167   if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
168   SEN_LOG(sen_log_notice, "creating '%s' encoding=%s initial_n_segments=%d",
169           path, sen_enctostr(encoding), initial_n_segments);
170   i->keys = keys;
171   i->foreign_flags = FOREIGN_KEY;
172   strcpy(buffer, path);
173   strcat(buffer, ".SEN.l");
174   if ((i->lexicon = sen_sym_create(buffer, 0, build_flags(flags), encoding))) {
175     strcpy(buffer, path);
176     strcat(buffer, ".SEN.i");
177     if ((i->inv = sen_inv_create(buffer, i->lexicon, initial_n_segments))) {
178       if ((flags & SEN_INDEX_WITH_VGRAM)) {
179         strcpy(buffer, path);
180         strcat(buffer, ".SEN.v");
181         i->vgram= sen_vgram_create(buffer);
182       } else {
183         i->vgram = NULL;
184       }
185       if (!(flags & SEN_INDEX_WITH_VGRAM) || i->vgram) {
186         SEN_LOG(sen_log_notice, "index created (%s) flags=%x", path, i->lexicon->flags);
187         return i;
188       }
189       sen_inv_close(i->inv);
190     }
191     sen_sym_close(i->lexicon);
192   }
193   SEN_GFREE(i);
194   return NULL;
195 }
196 
197 sen_index *
sen_index_open_with_keys(const char * path,sen_sym * keys)198 sen_index_open_with_keys(const char *path, sen_sym *keys)
199 {
200   sen_index *i;
201   char buffer[PATH_MAX];
202   if (!path || !keys) {
203     SEN_LOG(sen_log_warning, "sen_index_open_with_keys: invalid argument");
204     return NULL;
205   }
206   if (strlen(path) > PATH_MAX - 4) {
207     SEN_LOG(sen_log_warning, "too long index path (%s)", path);
208     return NULL;
209   }
210   if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
211   i->keys = keys;
212   i->foreign_flags = FOREIGN_KEY;
213   strcpy(buffer, path);
214   strcat(buffer, ".SEN.l");
215   if ((i->lexicon = sen_sym_open(buffer))) {
216     strcpy(buffer, path);
217     strcat(buffer, ".SEN.i");
218     if ((i->inv = sen_inv_open(buffer, i->lexicon))) {
219       if ((i->lexicon->flags & SEN_INDEX_WITH_VGRAM)) {
220         strcpy(buffer, path);
221         strcat(buffer, ".SEN.v");
222         i->vgram = sen_vgram_open(buffer);
223       } else {
224         i->vgram = NULL;
225       }
226       if(!(i->lexicon->flags & SEN_INDEX_WITH_VGRAM) || i->vgram) {
227         SEN_LOG(sen_log_notice, "index opened (%p:%s) flags=%x", i, path, i->lexicon->flags);
228         return i;
229       }
230       sen_inv_close(i->inv);
231     }
232     sen_sym_close(i->lexicon);
233   }
234   SEN_GFREE(i);
235   return NULL;
236 }
237 
238 sen_index *
sen_index_create_with_keys_lexicon(const char * path,sen_sym * keys,sen_sym * lexicon,int initial_n_segments)239 sen_index_create_with_keys_lexicon(const char *path, sen_sym *keys, sen_sym *lexicon,
240                                    int initial_n_segments)
241 {
242   sen_index *i;
243   if (!keys || !path || !lexicon) {
244     SEN_LOG(sen_log_warning, "sen_index_create_with_keys_lexicon: invalid argument");
245     return NULL;
246   }
247   if (initial_n_segments == 0) { initial_n_segments = SENNA_DEFAULT_INITIAL_N_SEGMENTS; }
248   if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
249   SEN_LOG(sen_log_notice, "creating '%s' encoding=%s initial_n_segments=%d",
250           path, sen_enctostr(lexicon->encoding), initial_n_segments);
251   i->keys = keys;
252   i->lexicon = lexicon;
253   i->foreign_flags = FOREIGN_KEY|FOREIGN_LEXICON;
254   i->vgram = NULL;
255   if ((i->inv = sen_inv_create(path, i->lexicon, initial_n_segments))) {
256     SEN_LOG(sen_log_notice, "index created (%s) flags=%x", path, i->lexicon->flags);
257     return i;
258   }
259   SEN_GFREE(i);
260   return NULL;
261 }
262 
263 sen_index *
sen_index_open_with_keys_lexicon(const char * path,sen_sym * keys,sen_sym * lexicon)264 sen_index_open_with_keys_lexicon(const char *path, sen_sym *keys, sen_sym *lexicon)
265 {
266   sen_index *i;
267   if (!keys || !path || !lexicon) {
268     SEN_LOG(sen_log_warning, "sen_index_open_with_keys_lexicon: invalid argument");
269     return NULL;
270   }
271   if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
272   i->keys = keys;
273   i->lexicon = lexicon;
274   i->foreign_flags = FOREIGN_KEY|FOREIGN_LEXICON;
275   i->vgram = NULL;
276   if ((i->inv = sen_inv_open(path, i->lexicon))) {
277     SEN_LOG(sen_log_notice, "index opened (%p:%s) flags=%x", i, path, i->lexicon->flags);
278     return i;
279   }
280   SEN_GFREE(i);
281   return NULL;
282 }
283 
284 sen_rc
sen_index_close(sen_index * i)285 sen_index_close(sen_index *i)
286 {
287   if (!i) { return sen_invalid_argument; }
288   if (!(i->foreign_flags & FOREIGN_KEY)) { sen_sym_close(i->keys); }
289   if (!(i->foreign_flags & FOREIGN_LEXICON)) { sen_sym_close(i->lexicon); }
290   sen_inv_close(i->inv);
291   if (i->vgram) { sen_vgram_close(i->vgram); }
292   SEN_GFREE(i);
293   return sen_success;
294 }
295 
296 sen_rc
sen_index_remove(const char * path)297 sen_index_remove(const char *path)
298 {
299   sen_rc rc;
300   char buffer[PATH_MAX];
301   if (!path || strlen(path) > PATH_MAX - 8) { return sen_invalid_argument; }
302   snprintf(buffer, PATH_MAX, "%s.SEN", path);
303   if ((rc = sen_sym_remove(buffer))) { goto exit; }
304   snprintf(buffer, PATH_MAX, "%s.SEN.i", path);
305   if ((rc = sen_inv_remove(buffer))) { goto exit; }
306   snprintf(buffer, PATH_MAX, "%s.SEN.l", path);
307   if ((rc = sen_sym_remove(buffer))) { goto exit; }
308   snprintf(buffer, PATH_MAX, "%s.SEN.v", path);
309   sen_io_remove(buffer); // sen_vgram_remove
310 exit :
311   return rc;
312 }
313 
314 sen_rc
sen_index_rename(const char * old_name,const char * new_name)315 sen_index_rename(const char *old_name, const char *new_name)
316 {
317   char old_buffer[PATH_MAX];
318   char new_buffer[PATH_MAX];
319   if (!old_name || strlen(old_name) > PATH_MAX - 8) { return sen_invalid_argument; }
320   if (!new_name || strlen(new_name) > PATH_MAX - 8) { return sen_invalid_argument; }
321   snprintf(old_buffer, PATH_MAX, "%s.SEN", old_name);
322   snprintf(new_buffer, PATH_MAX, "%s.SEN", new_name);
323   sen_io_rename(old_buffer, new_buffer);
324   snprintf(old_buffer, PATH_MAX, "%s.SEN.i", old_name);
325   snprintf(new_buffer, PATH_MAX, "%s.SEN.i", new_name);
326   sen_io_rename(old_buffer, new_buffer);
327   snprintf(old_buffer, PATH_MAX, "%s.SEN.i.c", old_name);
328   snprintf(new_buffer, PATH_MAX, "%s.SEN.i.c", new_name);
329   sen_io_rename(old_buffer, new_buffer);
330   snprintf(old_buffer, PATH_MAX, "%s.SEN.l", old_name);
331   snprintf(new_buffer, PATH_MAX, "%s.SEN.l", new_name);
332   sen_io_rename(old_buffer, new_buffer);
333   snprintf(old_buffer, PATH_MAX, "%s.SEN.v", old_name);
334   snprintf(new_buffer, PATH_MAX, "%s.SEN.v", new_name);
335   sen_io_rename(old_buffer, new_buffer);
336   return sen_success;
337 }
338 
339 sen_rc
sen_index_info(sen_index * i,int * key_size,int * flags,int * initial_n_segments,sen_encoding * encoding,unsigned * nrecords_keys,unsigned * file_size_keys,unsigned * nrecords_lexicon,unsigned * file_size_lexicon,unsigned long long * inv_seg_size,unsigned long long * inv_chunk_size)340 sen_index_info(sen_index *i, int *key_size, int *flags,
341                int *initial_n_segments, sen_encoding *encoding,
342                unsigned *nrecords_keys, unsigned *file_size_keys,
343                unsigned *nrecords_lexicon, unsigned *file_size_lexicon,
344                unsigned long long *inv_seg_size, unsigned long long *inv_chunk_size)
345 {
346   sen_rc rc = sen_success;
347 
348   if (!i) { return sen_invalid_argument; }
349   if (key_size) { *key_size = i->keys->key_size; }
350   if (flags) { *flags = i->lexicon->flags & ~SEN_SYM_WITH_SIS; }
351   if (initial_n_segments) { *initial_n_segments = sen_inv_initial_n_segments(i->inv); }
352   if (encoding) { *encoding = i->lexicon->encoding; }
353   if (nrecords_keys || file_size_keys) {
354     if ((rc = sen_sym_info(i->keys, NULL, NULL, NULL, nrecords_keys, file_size_keys))) { return rc; }
355   }
356   if (nrecords_lexicon || file_size_lexicon) {
357     if ((rc = sen_sym_info(i->lexicon, NULL, NULL, NULL, nrecords_lexicon, file_size_lexicon))) { return rc; }
358   }
359   if (inv_seg_size || inv_chunk_size) {
360     uint64_t seg_size, chunk_size;
361 
362     rc = sen_inv_info(i->inv, &seg_size, &chunk_size);
363 
364     if (inv_seg_size) {
365       *inv_seg_size = seg_size;
366     }
367 
368     if (inv_chunk_size) {
369       *inv_chunk_size = chunk_size;
370     }
371 
372     if (rc != sen_success) {
373       return rc;
374     }
375   }
376   return sen_success;
377 }
378 
379 sen_rc
sen_index_lock(sen_index * i,int timeout)380 sen_index_lock(sen_index *i, int timeout)
381 {
382   if (!i) { return sen_invalid_argument; }
383   return sen_sym_lock(i->keys, timeout);
384 }
385 
386 sen_rc
sen_index_unlock(sen_index * i)387 sen_index_unlock(sen_index *i)
388 {
389   if (!i) { return sen_invalid_argument; }
390   return sen_sym_unlock(i->keys);
391 }
392 
393 sen_rc
sen_index_clear_lock(sen_index * i)394 sen_index_clear_lock(sen_index *i)
395 {
396   if (!i) { return sen_invalid_argument; }
397   return sen_sym_clear_lock(i->keys);
398 }
399 
400 int
sen_index_path(sen_index * i,char * pathbuf,int bufsize)401 sen_index_path(sen_index *i, char *pathbuf, int bufsize)
402 {
403   const char *invpath;
404   int pathsize;
405   if (!i) {
406     SEN_LOG(sen_log_warning, "sen_index_path: invalid argument");
407     return sen_invalid_argument;
408   }
409   invpath = sen_io_path(i->lexicon->io);
410   pathsize = strlen(invpath) - 5;
411   if (bufsize >= pathsize && pathbuf) {
412     memcpy(pathbuf, invpath, pathsize - 1);
413     pathbuf[pathsize - 1] = '\0';
414   }
415   return pathsize;
416 }
417 
418 sen_rc
sen_index_upd(sen_index * i,const void * key,const char * oldvalue,unsigned int oldvalue_len,const char * newvalue,unsigned int newvalue_len)419 sen_index_upd(sen_index *i, const void *key,
420               const char *oldvalue, unsigned int oldvalue_len,
421               const char *newvalue, unsigned int newvalue_len)
422 {
423   sen_id rid;
424   sen_rc rc = sen_invalid_argument;
425   if (!i || !key) {
426     SEN_LOG(sen_log_warning, "sen_index_upd: invalid argument");
427     return sen_invalid_argument;
428   }
429   if ((rc = sen_index_lock(i, -1))) {
430     SEN_LOG(sen_log_crit, "sen_index_upd: index lock failed");
431     return rc;
432   }
433   if (oldvalue && *oldvalue) {
434     if (!(rid = sen_sym_at(i->keys, key))) {
435       SEN_LOG(sen_log_error, "del : (%x) (invalid key)", key);
436       goto exit;
437     }
438   } else if (newvalue && *newvalue) {
439     if (!(rid = sen_sym_get(i->keys, key))) { goto exit; }
440   } else {
441     goto exit;
442   }
443   rc = sen_inv_upd(i->inv, rid, i->vgram, oldvalue, oldvalue_len, newvalue, newvalue_len);
444 exit :
445   sen_index_unlock(i);
446   return rc;
447 }
448 
449 #define DELETE_FLAG 1
450 
451 sen_rc
sen_index_del(sen_index * i,const void * key)452 sen_index_del(sen_index *i, const void *key)
453 {
454   sen_id rid;
455   if (!i || !key) { SEN_LOG(sen_log_warning, "sen_index_del: invalid argument"); return sen_invalid_argument; }
456   rid = sen_sym_at(i->keys, key);
457   if (!rid) { return sen_invalid_argument; }
458   return sen_sym_pocket_set(i->keys, rid, DELETE_FLAG);
459 }
460 
461 #define INITIAL_VALUE_SIZE 1024
462 
463 sen_values *
sen_values_open(void)464 sen_values_open(void)
465 {
466   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
467   sen_values *v = SEN_MALLOC(sizeof(sen_values));
468   if (v) {
469     v->n_values = 0;
470     v->values = NULL;
471   }
472   return v;
473 }
474 
475 sen_rc
sen_values_close(sen_values * v)476 sen_values_close(sen_values *v)
477 {
478   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
479   if (!v) { return sen_invalid_argument; }
480   if (v->values) { SEN_FREE(v->values); }
481   SEN_FREE(v);
482   return sen_success;
483 }
484 
485 sen_rc
sen_values_add(sen_values * v,const char * str,unsigned int str_len,unsigned int weight)486 sen_values_add(sen_values *v, const char *str, unsigned int str_len, unsigned int weight)
487 {
488   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
489   sen_value *vp;
490   if (!v || !str) { SEN_LOG(sen_log_warning, "sen_values_add: invalid argument"); return sen_invalid_argument; }
491   if (!(v->n_values & (INITIAL_VALUE_SIZE - 1))) {
492     vp = SEN_REALLOC(v->values, sizeof(sen_value) * (v->n_values + INITIAL_VALUE_SIZE));
493     SEN_LOG(sen_log_debug, "expanded values to %d,%p", v->n_values + INITIAL_VALUE_SIZE, vp);
494     if (!vp) { return sen_memory_exhausted; }
495     v->values = vp;
496   }
497   vp = &v->values[v->n_values];
498   vp->str = str;
499   vp->str_len = str_len;
500   vp->weight = weight;
501   v->n_values++;
502   return sen_success;
503 }
504 
505 sen_rc
sen_index_update(sen_index * i,const void * key,unsigned int section,sen_values * oldvalues,sen_values * newvalues)506 sen_index_update(sen_index *i, const void *key, unsigned int section,
507                  sen_values *oldvalues, sen_values *newvalues)
508 {
509   sen_id rid;
510   sen_rc rc = sen_invalid_argument;
511   if (!i || !key) {
512     SEN_LOG(sen_log_warning, "sen_index_update: invalid argument");
513     return rc;
514   }
515   if ((rc = sen_index_lock(i, -1))) {
516     SEN_LOG(sen_log_crit, "sen_index_update: index lock failed");
517     return rc;
518   }
519   if (newvalues) {
520     if (!(rid = sen_sym_get(i->keys, key))) { goto exit; }
521   } else {
522     if (!(rid = sen_sym_at(i->keys, key))) { goto exit; }
523   }
524   rc = sen_inv_update(i->inv, rid, i->vgram, section, oldvalues, newvalues);
525 exit :
526   sen_index_unlock(i);
527   return rc;
528 }
529 
530 /* select */
531 
532 sen_rc
sen_index_similar_search(sen_index * i,const char * string,unsigned int string_len,sen_records * r,sen_sel_operator op,sen_select_optarg * optarg)533 sen_index_similar_search(sen_index *i, const char *string,
534                          unsigned int string_len, sen_records *r,
535                          sen_sel_operator op, sen_select_optarg *optarg)
536 {
537   sen_rc rc;
538   if (!i || !string || !r || !optarg) { return sen_invalid_argument; }
539   r->keys = i->keys;
540   optarg->max_size = sen_sym_size(i->keys) * sizeof(int);
541   rc = sen_inv_similar_search(i->inv, string, string_len, r, op, optarg);
542   sen_records_cursor_clear(r);
543   return rc;
544 }
545 
546 #define TERM_EXTRACT_EACH_POST 0
547 #define TERM_EXTRACT_EACH_TERM 1
548 
549 sen_rc
sen_index_term_extract(sen_index * i,const char * string,unsigned int string_len,sen_records * r,sen_sel_operator op,sen_select_optarg * optarg)550 sen_index_term_extract(sen_index *i, const char *string,
551                        unsigned int string_len, sen_records *r,
552                        sen_sel_operator op, sen_select_optarg *optarg)
553 {
554   sen_rc rc;
555   r->keys = i->keys;
556   rc = sen_inv_term_extract(i->inv, string, string_len, r, op, optarg);
557   sen_records_cursor_clear(r);
558   if (!rc && optarg->max_interval == TERM_EXTRACT_EACH_POST) {
559     sen_sort_optarg opt;
560     opt.mode = sen_sort_ascending;
561     opt.compar = NULL;
562     opt.compar_arg = (void *)(intptr_t)r->key_size;
563     sen_records_sort(r, 10000, &opt); /* todo : why 10000? */
564   }
565   return rc;
566 }
567 
568 sen_rc
sen_index_select(sen_index * i,const char * string,unsigned int string_len,sen_records * r,sen_sel_operator op,sen_select_optarg * optarg)569 sen_index_select(sen_index *i, const char *string, unsigned int string_len,
570                  sen_records *r, sen_sel_operator op, sen_select_optarg *optarg)
571 {
572   sen_rc rc;
573   if (!r || !i) { return sen_invalid_argument; }
574   r->keys = i->keys;
575   if (optarg) { optarg->max_size = sen_sym_size(i->keys) * sizeof(int); }
576   rc = sen_inv_select(i->inv, string, string_len, r, op, optarg);
577   sen_records_cursor_clear(r);
578   return rc;
579 }
580 
581 sen_records *
sen_index_sel(sen_index * i,const char * string,unsigned int string_len)582 sen_index_sel(sen_index *i, const char *string, unsigned int string_len)
583 {
584   sen_records *r;
585   if (!i) { return NULL; }
586   r = sen_inv_sel(i->inv, string, string_len);
587   if (r) { r->keys = i->keys; }
588   return r;
589 }
590 
591 #ifdef USE_QUERY_ABORT
592 void
sen_index_set_abort_callback(sen_index * i,int (* cb)(void *),void * arg)593 sen_index_set_abort_callback(sen_index *i, int (*cb)(void*), void *arg)
594 {
595   sen_inv_set_abort_callback(i->inv, cb, arg);
596 }
597 #endif /* USE_QUERY_ABORT */
598 
599 /* sen_records_heap class */
600 
601 struct _sen_records_heap {
602   int n_entries;
603   int n_bins;
604   sen_records **bins;
605   int limit;
606   int curr;
607   int dir;
608   int (*compar)(sen_records *, sen_recordh *, sen_records *, sen_recordh *, void *);
609   void *compar_arg;
610 };
611 
612 inline static int
records_heap_cmp(sen_records_heap * h,sen_records * r1,sen_records * r2)613 records_heap_cmp(sen_records_heap *h, sen_records *r1, sen_records *r2)
614 {
615   sen_recordh *rh1 = (sen_recordh *)sen_records_curr_rec(r1);
616   sen_recordh *rh2 = (sen_recordh *)sen_records_curr_rec(r2);
617   if (!h->compar) {
618     int off1, off2;
619     if (h->compar_arg == (void *)-1) {
620       off1 = (r1->key_size) / sizeof(int32_t);
621       off2 = (r2->key_size) / sizeof(int32_t);
622     } else {
623       off1 = off2 = (int)(intptr_t)h->compar_arg;
624     }
625     return (((int32_t *)(rh2))[off2] - ((int32_t *)(rh1))[off1]) * h->dir > 0;
626   }
627   return h->compar(r1, rh1, r2, rh2, h->compar_arg) * h->dir > 0;
628 }
629 
630 sen_records_heap *
sen_records_heap_open(int size,int limit,sen_sort_optarg * optarg)631 sen_records_heap_open(int size, int limit, sen_sort_optarg *optarg)
632 {
633   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
634   sen_records_heap *h = SEN_MALLOC(sizeof(sen_records_heap));
635   if (!h) { return NULL; }
636   h->bins = SEN_MALLOC(sizeof(sen_records *) * size);
637   if (!h->bins) {
638     SEN_FREE(h);
639     return NULL;
640   }
641   h->n_entries = 0;
642   h->n_bins = size;
643   h->limit = limit;
644   h->curr = 0;
645   if (optarg) {
646     h->dir = (optarg->mode == sen_sort_ascending) ? 1 : -1;
647     h->compar = optarg->compar;
648     h->compar_arg = optarg->compar_arg;
649   } else {
650     h->dir = -1;
651     h->compar = NULL;
652     h->compar_arg = (void *) -1;
653   }
654   return h;
655 }
656 
657 sen_rc
sen_records_heap_add(sen_records_heap * h,sen_records * r)658 sen_records_heap_add(sen_records_heap *h, sen_records *r)
659 {
660   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
661   if (h->n_entries >= h->n_bins) {
662     int size = h->n_bins * 2;
663     sen_records **bins = SEN_REALLOC(h->bins, sizeof(sen_records *) * size);
664     // sen_log("expanded sen_records_heap to %d,%p", size, bins);
665     if (!bins) { return sen_memory_exhausted; }
666     h->n_bins = size;
667     h->bins = bins;
668   }
669   if (!sen_records_next(r, NULL, 0, NULL)) {
670     sen_records_close(r);
671     return sen_internal_error;
672   }
673   {
674     int n, n2;
675     sen_records *r2;
676     n = h->n_entries++;
677     while (n) {
678       n2 = (n - 1) >> 1;
679       r2 = h->bins[n2];
680       if (records_heap_cmp(h, r, r2)) { break; }
681       h->bins[n] = r2;
682       n = n2;
683     }
684     h->bins[n] = r;
685   }
686   return sen_success;
687 }
688 
689 int
sen_records_heap_next(sen_records_heap * h)690 sen_records_heap_next(sen_records_heap *h)
691 {
692   if (!h || !h->n_entries) { return 0; }
693   {
694     sen_records *r = h->bins[0];
695     if (!sen_records_next(r, NULL, 0, NULL)) {
696       sen_records_close(r);
697       r = h->bins[0] = h->bins[--h->n_entries];
698     }
699     {
700       int n = 0, m = h->n_entries;
701       if (m > 1) {
702         for (;;) {
703           int n1 = n * 2 + 1;
704           int n2 = n1 + 1;
705           sen_records *r1 = n1 < m ? h->bins[n1] : NULL;
706           sen_records *r2 = n2 < m ? h->bins[n2] : NULL;
707           if (r1 && records_heap_cmp(h, r, r1)) {
708             if (r2 && records_heap_cmp(h, r, r2) && records_heap_cmp(h, r1, r2)) {
709               h->bins[n] = r2;
710               n = n2;
711             } else {
712               h->bins[n] = r1;
713               n = n1;
714             }
715           } else {
716             if (r2 && records_heap_cmp(h, r, r2)) {
717               h->bins[n] = r2;
718               n = n2;
719             } else {
720               h->bins[n] = r;
721               break;
722             }
723           }
724         }
725       }
726       h->curr++;
727       return m;
728     }
729   }
730 }
731 
732 sen_records *
sen_records_heap_head(sen_records_heap * h)733 sen_records_heap_head(sen_records_heap *h)
734 {
735   return h->n_entries ? h->bins[0] : NULL;
736 }
737 
738 sen_rc
sen_records_heap_close(sen_records_heap * h)739 sen_records_heap_close(sen_records_heap *h)
740 {
741   sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
742   int i;
743   if (!h) { return sen_invalid_argument; }
744   for (i = h->n_entries; i--;) { sen_records_close(h->bins[i]); }
745   SEN_FREE(h->bins);
746   SEN_FREE(h);
747   return sen_success;
748 }
749 
750 /* todo : config_path will be disappeared */
751 sen_rc
sen_info(char ** version,char ** configure_options,char ** config_path,sen_encoding * default_encoding,unsigned int * initial_n_segments,unsigned int * partial_match_threshold)752 sen_info(char **version,
753          char **configure_options,
754          char **config_path,
755          sen_encoding *default_encoding,
756          unsigned int *initial_n_segments,
757          unsigned int *partial_match_threshold)
758 {
759   if (version) {
760     *version = PACKAGE_VERSION;
761   }
762   if (configure_options) {
763     *configure_options = CONFIGURE_OPTIONS;
764   }
765   if (default_encoding) {
766     *default_encoding = sen_gctx.encoding;
767   }
768   if (initial_n_segments) {
769     *initial_n_segments = SENNA_DEFAULT_INITIAL_N_SEGMENTS;
770   }
771   if (partial_match_threshold) {
772     *partial_match_threshold = SENNA_DEFAULT_QUERY_ESCALATION_THRESHOLD;
773   }
774   return sen_success;
775 }
776