1 /* Copyright(C) 2004-2007 Brazil
2
3 This library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Lesser General Public
5 License as published by the Free Software Foundation; either
6 version 2.1 of the License, or (at your option) any later version.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Lesser General Public License for more details.
12
13 You should have received a copy of the GNU Lesser General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17 #include "senna_in.h"
18 #include <stdio.h>
19 #include <string.h>
20 #include "sym.h"
21 #include "inv.h"
22 #include "str.h"
23 #include "set.h"
24 #include "lex.h"
25 #include "cache.h"
26 #include "store.h"
27 #include "ctx.h"
28 #include "com.h"
29
30 /* sen_index */
31
32 inline static int
build_flags(int flags)33 build_flags(int flags)
34 {
35 if (flags & SEN_INDEX_ENABLE_SUFFIX_SEARCH) {
36 return flags | SEN_SYM_WITH_SIS;
37 } else if (flags & SEN_INDEX_DISABLE_SUFFIX_SEARCH) {
38 return flags & ~SEN_SYM_WITH_SIS;
39 } else { /* default */
40 switch (flags & SEN_INDEX_TOKENIZER_MASK) {
41 case SEN_INDEX_MORPH_ANALYSE :
42 return flags | SEN_SYM_WITH_SIS;
43 case SEN_INDEX_NGRAM :
44 return flags & ~SEN_SYM_WITH_SIS;
45 case SEN_INDEX_DELIMITED :
46 return flags & ~SEN_SYM_WITH_SIS;
47 default :
48 return flags & ~SEN_SYM_WITH_SIS;
49 }
50 }
51 }
52
53 void
sen_index_expire(void)54 sen_index_expire(void)
55 {
56 sen_inv_expire();
57 }
58
59 sen_index *
sen_index_create(const char * path,int key_size,int flags,int initial_n_segments,sen_encoding encoding)60 sen_index_create(const char *path, int key_size,
61 int flags, int initial_n_segments, sen_encoding encoding)
62 {
63 sen_index *i;
64 char buffer[PATH_MAX];
65 if (!path) { SEN_LOG(sen_log_warning, "sen_index_create: invalid argument"); return NULL; }
66 if (initial_n_segments == 0) { initial_n_segments = SENNA_DEFAULT_INITIAL_N_SEGMENTS; }
67 if (encoding == sen_enc_default) { encoding = sen_gctx.encoding; }
68 if (strlen(path) > PATH_MAX - 4) {
69 SEN_LOG(sen_log_warning, "sen_index_create: too long index path (%s)", path);
70 return NULL;
71 }
72 if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
73 SEN_LOG(sen_log_notice, "creating '%s' encoding=%s initial_n_segments=%d",
74 path, sen_enctostr(encoding), initial_n_segments);
75 strcpy(buffer, path);
76 strcat(buffer, ".SEN");
77 i->foreign_flags = 0;
78 if ((i->keys = sen_sym_create(buffer, key_size, (flags & 0x70000), sen_enc_none))) {
79 strcpy(buffer, path);
80 strcat(buffer, ".SEN.l");
81 if ((i->lexicon = sen_sym_create(buffer, 0, build_flags(flags), encoding))) {
82 strcpy(buffer, path);
83 strcat(buffer, ".SEN.i");
84 if ((i->inv = sen_inv_create(buffer, i->lexicon, initial_n_segments))) {
85 if ((flags & SEN_INDEX_WITH_VGRAM)) {
86 strcpy(buffer, path);
87 strcat(buffer, ".SEN.v");
88 i->vgram= sen_vgram_create(buffer);
89 } else {
90 i->vgram = NULL;
91 }
92 if (!(flags & SEN_INDEX_WITH_VGRAM) || i->vgram) {
93 SEN_LOG(sen_log_notice, "index created (%s) flags=%x", path, i->lexicon->flags);
94 return i;
95 }
96 sen_inv_close(i->inv);
97 }
98 sen_sym_close(i->lexicon);
99 }
100 sen_sym_close(i->keys);
101 }
102 SEN_GFREE(i);
103 return NULL;
104 }
105
106 sen_index *
sen_index_open(const char * path)107 sen_index_open(const char *path)
108 {
109 sen_index *i;
110 char buffer[PATH_MAX];
111 if (!path) { SEN_LOG(sen_log_warning, "sen_index_open: invalid argument"); return NULL; }
112 if (strlen(path) > PATH_MAX - 4) {
113 SEN_LOG(sen_log_warning, "sen_index_open: too long index path (%s)", path);
114 return NULL;
115 }
116 if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
117 strcpy(buffer, path);
118 strcat(buffer, ".SEN");
119 i->foreign_flags = 0;
120 if ((i->keys = sen_sym_open(buffer))) {
121 strcpy(buffer, path);
122 strcat(buffer, ".SEN.l");
123 if ((i->lexicon = sen_sym_open(buffer))) {
124 strcpy(buffer, path);
125 strcat(buffer, ".SEN.i");
126 if ((i->inv = sen_inv_open(buffer, i->lexicon))) {
127 if ((i->lexicon->flags & SEN_INDEX_WITH_VGRAM)) {
128 strcpy(buffer, path);
129 strcat(buffer, ".SEN.v");
130 i->vgram = sen_vgram_open(buffer);
131 } else {
132 i->vgram = NULL;
133 }
134 if (!(i->lexicon->flags & SEN_INDEX_WITH_VGRAM) || i->vgram) {
135 SEN_LOG(sen_log_notice, "index opened (%p:%s) flags=%x", i, path, i->lexicon->flags);
136 return i;
137 }
138 sen_inv_close(i->inv);
139 }
140 sen_sym_close(i->lexicon);
141 }
142 sen_sym_close(i->keys);
143 }
144 SEN_GFREE(i);
145 return NULL;
146 }
147
148 #define FOREIGN_KEY 1
149 #define FOREIGN_LEXICON 2
150
151 sen_index *
sen_index_create_with_keys(const char * path,sen_sym * keys,int flags,int initial_n_segments,sen_encoding encoding)152 sen_index_create_with_keys(const char *path, sen_sym *keys,
153 int flags, int initial_n_segments, sen_encoding encoding)
154 {
155 sen_index *i;
156 char buffer[PATH_MAX];
157 if (!path || !keys) {
158 SEN_LOG(sen_log_warning, "sen_index_create_with_keys: invalid argument");
159 return NULL;
160 }
161 if (initial_n_segments == 0) { initial_n_segments = SENNA_DEFAULT_INITIAL_N_SEGMENTS; }
162 if (encoding == sen_enc_default) { encoding = sen_gctx.encoding; }
163 if (strlen(path) > PATH_MAX - 4) {
164 SEN_LOG(sen_log_warning, "too long index path (%s)", path);
165 return NULL;
166 }
167 if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
168 SEN_LOG(sen_log_notice, "creating '%s' encoding=%s initial_n_segments=%d",
169 path, sen_enctostr(encoding), initial_n_segments);
170 i->keys = keys;
171 i->foreign_flags = FOREIGN_KEY;
172 strcpy(buffer, path);
173 strcat(buffer, ".SEN.l");
174 if ((i->lexicon = sen_sym_create(buffer, 0, build_flags(flags), encoding))) {
175 strcpy(buffer, path);
176 strcat(buffer, ".SEN.i");
177 if ((i->inv = sen_inv_create(buffer, i->lexicon, initial_n_segments))) {
178 if ((flags & SEN_INDEX_WITH_VGRAM)) {
179 strcpy(buffer, path);
180 strcat(buffer, ".SEN.v");
181 i->vgram= sen_vgram_create(buffer);
182 } else {
183 i->vgram = NULL;
184 }
185 if (!(flags & SEN_INDEX_WITH_VGRAM) || i->vgram) {
186 SEN_LOG(sen_log_notice, "index created (%s) flags=%x", path, i->lexicon->flags);
187 return i;
188 }
189 sen_inv_close(i->inv);
190 }
191 sen_sym_close(i->lexicon);
192 }
193 SEN_GFREE(i);
194 return NULL;
195 }
196
197 sen_index *
sen_index_open_with_keys(const char * path,sen_sym * keys)198 sen_index_open_with_keys(const char *path, sen_sym *keys)
199 {
200 sen_index *i;
201 char buffer[PATH_MAX];
202 if (!path || !keys) {
203 SEN_LOG(sen_log_warning, "sen_index_open_with_keys: invalid argument");
204 return NULL;
205 }
206 if (strlen(path) > PATH_MAX - 4) {
207 SEN_LOG(sen_log_warning, "too long index path (%s)", path);
208 return NULL;
209 }
210 if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
211 i->keys = keys;
212 i->foreign_flags = FOREIGN_KEY;
213 strcpy(buffer, path);
214 strcat(buffer, ".SEN.l");
215 if ((i->lexicon = sen_sym_open(buffer))) {
216 strcpy(buffer, path);
217 strcat(buffer, ".SEN.i");
218 if ((i->inv = sen_inv_open(buffer, i->lexicon))) {
219 if ((i->lexicon->flags & SEN_INDEX_WITH_VGRAM)) {
220 strcpy(buffer, path);
221 strcat(buffer, ".SEN.v");
222 i->vgram = sen_vgram_open(buffer);
223 } else {
224 i->vgram = NULL;
225 }
226 if(!(i->lexicon->flags & SEN_INDEX_WITH_VGRAM) || i->vgram) {
227 SEN_LOG(sen_log_notice, "index opened (%p:%s) flags=%x", i, path, i->lexicon->flags);
228 return i;
229 }
230 sen_inv_close(i->inv);
231 }
232 sen_sym_close(i->lexicon);
233 }
234 SEN_GFREE(i);
235 return NULL;
236 }
237
238 sen_index *
sen_index_create_with_keys_lexicon(const char * path,sen_sym * keys,sen_sym * lexicon,int initial_n_segments)239 sen_index_create_with_keys_lexicon(const char *path, sen_sym *keys, sen_sym *lexicon,
240 int initial_n_segments)
241 {
242 sen_index *i;
243 if (!keys || !path || !lexicon) {
244 SEN_LOG(sen_log_warning, "sen_index_create_with_keys_lexicon: invalid argument");
245 return NULL;
246 }
247 if (initial_n_segments == 0) { initial_n_segments = SENNA_DEFAULT_INITIAL_N_SEGMENTS; }
248 if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
249 SEN_LOG(sen_log_notice, "creating '%s' encoding=%s initial_n_segments=%d",
250 path, sen_enctostr(lexicon->encoding), initial_n_segments);
251 i->keys = keys;
252 i->lexicon = lexicon;
253 i->foreign_flags = FOREIGN_KEY|FOREIGN_LEXICON;
254 i->vgram = NULL;
255 if ((i->inv = sen_inv_create(path, i->lexicon, initial_n_segments))) {
256 SEN_LOG(sen_log_notice, "index created (%s) flags=%x", path, i->lexicon->flags);
257 return i;
258 }
259 SEN_GFREE(i);
260 return NULL;
261 }
262
263 sen_index *
sen_index_open_with_keys_lexicon(const char * path,sen_sym * keys,sen_sym * lexicon)264 sen_index_open_with_keys_lexicon(const char *path, sen_sym *keys, sen_sym *lexicon)
265 {
266 sen_index *i;
267 if (!keys || !path || !lexicon) {
268 SEN_LOG(sen_log_warning, "sen_index_open_with_keys_lexicon: invalid argument");
269 return NULL;
270 }
271 if (!(i = SEN_GMALLOC(sizeof(sen_index)))) { return NULL; }
272 i->keys = keys;
273 i->lexicon = lexicon;
274 i->foreign_flags = FOREIGN_KEY|FOREIGN_LEXICON;
275 i->vgram = NULL;
276 if ((i->inv = sen_inv_open(path, i->lexicon))) {
277 SEN_LOG(sen_log_notice, "index opened (%p:%s) flags=%x", i, path, i->lexicon->flags);
278 return i;
279 }
280 SEN_GFREE(i);
281 return NULL;
282 }
283
284 sen_rc
sen_index_close(sen_index * i)285 sen_index_close(sen_index *i)
286 {
287 if (!i) { return sen_invalid_argument; }
288 if (!(i->foreign_flags & FOREIGN_KEY)) { sen_sym_close(i->keys); }
289 if (!(i->foreign_flags & FOREIGN_LEXICON)) { sen_sym_close(i->lexicon); }
290 sen_inv_close(i->inv);
291 if (i->vgram) { sen_vgram_close(i->vgram); }
292 SEN_GFREE(i);
293 return sen_success;
294 }
295
296 sen_rc
sen_index_remove(const char * path)297 sen_index_remove(const char *path)
298 {
299 sen_rc rc;
300 char buffer[PATH_MAX];
301 if (!path || strlen(path) > PATH_MAX - 8) { return sen_invalid_argument; }
302 snprintf(buffer, PATH_MAX, "%s.SEN", path);
303 if ((rc = sen_sym_remove(buffer))) { goto exit; }
304 snprintf(buffer, PATH_MAX, "%s.SEN.i", path);
305 if ((rc = sen_inv_remove(buffer))) { goto exit; }
306 snprintf(buffer, PATH_MAX, "%s.SEN.l", path);
307 if ((rc = sen_sym_remove(buffer))) { goto exit; }
308 snprintf(buffer, PATH_MAX, "%s.SEN.v", path);
309 sen_io_remove(buffer); // sen_vgram_remove
310 exit :
311 return rc;
312 }
313
314 sen_rc
sen_index_rename(const char * old_name,const char * new_name)315 sen_index_rename(const char *old_name, const char *new_name)
316 {
317 char old_buffer[PATH_MAX];
318 char new_buffer[PATH_MAX];
319 if (!old_name || strlen(old_name) > PATH_MAX - 8) { return sen_invalid_argument; }
320 if (!new_name || strlen(new_name) > PATH_MAX - 8) { return sen_invalid_argument; }
321 snprintf(old_buffer, PATH_MAX, "%s.SEN", old_name);
322 snprintf(new_buffer, PATH_MAX, "%s.SEN", new_name);
323 sen_io_rename(old_buffer, new_buffer);
324 snprintf(old_buffer, PATH_MAX, "%s.SEN.i", old_name);
325 snprintf(new_buffer, PATH_MAX, "%s.SEN.i", new_name);
326 sen_io_rename(old_buffer, new_buffer);
327 snprintf(old_buffer, PATH_MAX, "%s.SEN.i.c", old_name);
328 snprintf(new_buffer, PATH_MAX, "%s.SEN.i.c", new_name);
329 sen_io_rename(old_buffer, new_buffer);
330 snprintf(old_buffer, PATH_MAX, "%s.SEN.l", old_name);
331 snprintf(new_buffer, PATH_MAX, "%s.SEN.l", new_name);
332 sen_io_rename(old_buffer, new_buffer);
333 snprintf(old_buffer, PATH_MAX, "%s.SEN.v", old_name);
334 snprintf(new_buffer, PATH_MAX, "%s.SEN.v", new_name);
335 sen_io_rename(old_buffer, new_buffer);
336 return sen_success;
337 }
338
339 sen_rc
sen_index_info(sen_index * i,int * key_size,int * flags,int * initial_n_segments,sen_encoding * encoding,unsigned * nrecords_keys,unsigned * file_size_keys,unsigned * nrecords_lexicon,unsigned * file_size_lexicon,unsigned long long * inv_seg_size,unsigned long long * inv_chunk_size)340 sen_index_info(sen_index *i, int *key_size, int *flags,
341 int *initial_n_segments, sen_encoding *encoding,
342 unsigned *nrecords_keys, unsigned *file_size_keys,
343 unsigned *nrecords_lexicon, unsigned *file_size_lexicon,
344 unsigned long long *inv_seg_size, unsigned long long *inv_chunk_size)
345 {
346 sen_rc rc = sen_success;
347
348 if (!i) { return sen_invalid_argument; }
349 if (key_size) { *key_size = i->keys->key_size; }
350 if (flags) { *flags = i->lexicon->flags & ~SEN_SYM_WITH_SIS; }
351 if (initial_n_segments) { *initial_n_segments = sen_inv_initial_n_segments(i->inv); }
352 if (encoding) { *encoding = i->lexicon->encoding; }
353 if (nrecords_keys || file_size_keys) {
354 if ((rc = sen_sym_info(i->keys, NULL, NULL, NULL, nrecords_keys, file_size_keys))) { return rc; }
355 }
356 if (nrecords_lexicon || file_size_lexicon) {
357 if ((rc = sen_sym_info(i->lexicon, NULL, NULL, NULL, nrecords_lexicon, file_size_lexicon))) { return rc; }
358 }
359 if (inv_seg_size || inv_chunk_size) {
360 uint64_t seg_size, chunk_size;
361
362 rc = sen_inv_info(i->inv, &seg_size, &chunk_size);
363
364 if (inv_seg_size) {
365 *inv_seg_size = seg_size;
366 }
367
368 if (inv_chunk_size) {
369 *inv_chunk_size = chunk_size;
370 }
371
372 if (rc != sen_success) {
373 return rc;
374 }
375 }
376 return sen_success;
377 }
378
379 sen_rc
sen_index_lock(sen_index * i,int timeout)380 sen_index_lock(sen_index *i, int timeout)
381 {
382 if (!i) { return sen_invalid_argument; }
383 return sen_sym_lock(i->keys, timeout);
384 }
385
386 sen_rc
sen_index_unlock(sen_index * i)387 sen_index_unlock(sen_index *i)
388 {
389 if (!i) { return sen_invalid_argument; }
390 return sen_sym_unlock(i->keys);
391 }
392
393 sen_rc
sen_index_clear_lock(sen_index * i)394 sen_index_clear_lock(sen_index *i)
395 {
396 if (!i) { return sen_invalid_argument; }
397 return sen_sym_clear_lock(i->keys);
398 }
399
400 int
sen_index_path(sen_index * i,char * pathbuf,int bufsize)401 sen_index_path(sen_index *i, char *pathbuf, int bufsize)
402 {
403 const char *invpath;
404 int pathsize;
405 if (!i) {
406 SEN_LOG(sen_log_warning, "sen_index_path: invalid argument");
407 return sen_invalid_argument;
408 }
409 invpath = sen_io_path(i->lexicon->io);
410 pathsize = strlen(invpath) - 5;
411 if (bufsize >= pathsize && pathbuf) {
412 memcpy(pathbuf, invpath, pathsize - 1);
413 pathbuf[pathsize - 1] = '\0';
414 }
415 return pathsize;
416 }
417
418 sen_rc
sen_index_upd(sen_index * i,const void * key,const char * oldvalue,unsigned int oldvalue_len,const char * newvalue,unsigned int newvalue_len)419 sen_index_upd(sen_index *i, const void *key,
420 const char *oldvalue, unsigned int oldvalue_len,
421 const char *newvalue, unsigned int newvalue_len)
422 {
423 sen_id rid;
424 sen_rc rc = sen_invalid_argument;
425 if (!i || !key) {
426 SEN_LOG(sen_log_warning, "sen_index_upd: invalid argument");
427 return sen_invalid_argument;
428 }
429 if ((rc = sen_index_lock(i, -1))) {
430 SEN_LOG(sen_log_crit, "sen_index_upd: index lock failed");
431 return rc;
432 }
433 if (oldvalue && *oldvalue) {
434 if (!(rid = sen_sym_at(i->keys, key))) {
435 SEN_LOG(sen_log_error, "del : (%x) (invalid key)", key);
436 goto exit;
437 }
438 } else if (newvalue && *newvalue) {
439 if (!(rid = sen_sym_get(i->keys, key))) { goto exit; }
440 } else {
441 goto exit;
442 }
443 rc = sen_inv_upd(i->inv, rid, i->vgram, oldvalue, oldvalue_len, newvalue, newvalue_len);
444 exit :
445 sen_index_unlock(i);
446 return rc;
447 }
448
449 #define DELETE_FLAG 1
450
451 sen_rc
sen_index_del(sen_index * i,const void * key)452 sen_index_del(sen_index *i, const void *key)
453 {
454 sen_id rid;
455 if (!i || !key) { SEN_LOG(sen_log_warning, "sen_index_del: invalid argument"); return sen_invalid_argument; }
456 rid = sen_sym_at(i->keys, key);
457 if (!rid) { return sen_invalid_argument; }
458 return sen_sym_pocket_set(i->keys, rid, DELETE_FLAG);
459 }
460
461 #define INITIAL_VALUE_SIZE 1024
462
463 sen_values *
sen_values_open(void)464 sen_values_open(void)
465 {
466 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
467 sen_values *v = SEN_MALLOC(sizeof(sen_values));
468 if (v) {
469 v->n_values = 0;
470 v->values = NULL;
471 }
472 return v;
473 }
474
475 sen_rc
sen_values_close(sen_values * v)476 sen_values_close(sen_values *v)
477 {
478 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
479 if (!v) { return sen_invalid_argument; }
480 if (v->values) { SEN_FREE(v->values); }
481 SEN_FREE(v);
482 return sen_success;
483 }
484
485 sen_rc
sen_values_add(sen_values * v,const char * str,unsigned int str_len,unsigned int weight)486 sen_values_add(sen_values *v, const char *str, unsigned int str_len, unsigned int weight)
487 {
488 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
489 sen_value *vp;
490 if (!v || !str) { SEN_LOG(sen_log_warning, "sen_values_add: invalid argument"); return sen_invalid_argument; }
491 if (!(v->n_values & (INITIAL_VALUE_SIZE - 1))) {
492 vp = SEN_REALLOC(v->values, sizeof(sen_value) * (v->n_values + INITIAL_VALUE_SIZE));
493 SEN_LOG(sen_log_debug, "expanded values to %d,%p", v->n_values + INITIAL_VALUE_SIZE, vp);
494 if (!vp) { return sen_memory_exhausted; }
495 v->values = vp;
496 }
497 vp = &v->values[v->n_values];
498 vp->str = str;
499 vp->str_len = str_len;
500 vp->weight = weight;
501 v->n_values++;
502 return sen_success;
503 }
504
505 sen_rc
sen_index_update(sen_index * i,const void * key,unsigned int section,sen_values * oldvalues,sen_values * newvalues)506 sen_index_update(sen_index *i, const void *key, unsigned int section,
507 sen_values *oldvalues, sen_values *newvalues)
508 {
509 sen_id rid;
510 sen_rc rc = sen_invalid_argument;
511 if (!i || !key) {
512 SEN_LOG(sen_log_warning, "sen_index_update: invalid argument");
513 return rc;
514 }
515 if ((rc = sen_index_lock(i, -1))) {
516 SEN_LOG(sen_log_crit, "sen_index_update: index lock failed");
517 return rc;
518 }
519 if (newvalues) {
520 if (!(rid = sen_sym_get(i->keys, key))) { goto exit; }
521 } else {
522 if (!(rid = sen_sym_at(i->keys, key))) { goto exit; }
523 }
524 rc = sen_inv_update(i->inv, rid, i->vgram, section, oldvalues, newvalues);
525 exit :
526 sen_index_unlock(i);
527 return rc;
528 }
529
530 /* select */
531
532 sen_rc
sen_index_similar_search(sen_index * i,const char * string,unsigned int string_len,sen_records * r,sen_sel_operator op,sen_select_optarg * optarg)533 sen_index_similar_search(sen_index *i, const char *string,
534 unsigned int string_len, sen_records *r,
535 sen_sel_operator op, sen_select_optarg *optarg)
536 {
537 sen_rc rc;
538 if (!i || !string || !r || !optarg) { return sen_invalid_argument; }
539 r->keys = i->keys;
540 optarg->max_size = sen_sym_size(i->keys) * sizeof(int);
541 rc = sen_inv_similar_search(i->inv, string, string_len, r, op, optarg);
542 sen_records_cursor_clear(r);
543 return rc;
544 }
545
546 #define TERM_EXTRACT_EACH_POST 0
547 #define TERM_EXTRACT_EACH_TERM 1
548
549 sen_rc
sen_index_term_extract(sen_index * i,const char * string,unsigned int string_len,sen_records * r,sen_sel_operator op,sen_select_optarg * optarg)550 sen_index_term_extract(sen_index *i, const char *string,
551 unsigned int string_len, sen_records *r,
552 sen_sel_operator op, sen_select_optarg *optarg)
553 {
554 sen_rc rc;
555 r->keys = i->keys;
556 rc = sen_inv_term_extract(i->inv, string, string_len, r, op, optarg);
557 sen_records_cursor_clear(r);
558 if (!rc && optarg->max_interval == TERM_EXTRACT_EACH_POST) {
559 sen_sort_optarg opt;
560 opt.mode = sen_sort_ascending;
561 opt.compar = NULL;
562 opt.compar_arg = (void *)(intptr_t)r->key_size;
563 sen_records_sort(r, 10000, &opt); /* todo : why 10000? */
564 }
565 return rc;
566 }
567
568 sen_rc
sen_index_select(sen_index * i,const char * string,unsigned int string_len,sen_records * r,sen_sel_operator op,sen_select_optarg * optarg)569 sen_index_select(sen_index *i, const char *string, unsigned int string_len,
570 sen_records *r, sen_sel_operator op, sen_select_optarg *optarg)
571 {
572 sen_rc rc;
573 if (!r || !i) { return sen_invalid_argument; }
574 r->keys = i->keys;
575 if (optarg) { optarg->max_size = sen_sym_size(i->keys) * sizeof(int); }
576 rc = sen_inv_select(i->inv, string, string_len, r, op, optarg);
577 sen_records_cursor_clear(r);
578 return rc;
579 }
580
581 sen_records *
sen_index_sel(sen_index * i,const char * string,unsigned int string_len)582 sen_index_sel(sen_index *i, const char *string, unsigned int string_len)
583 {
584 sen_records *r;
585 if (!i) { return NULL; }
586 r = sen_inv_sel(i->inv, string, string_len);
587 if (r) { r->keys = i->keys; }
588 return r;
589 }
590
591 #ifdef USE_QUERY_ABORT
592 void
sen_index_set_abort_callback(sen_index * i,int (* cb)(void *),void * arg)593 sen_index_set_abort_callback(sen_index *i, int (*cb)(void*), void *arg)
594 {
595 sen_inv_set_abort_callback(i->inv, cb, arg);
596 }
597 #endif /* USE_QUERY_ABORT */
598
599 /* sen_records_heap class */
600
601 struct _sen_records_heap {
602 int n_entries;
603 int n_bins;
604 sen_records **bins;
605 int limit;
606 int curr;
607 int dir;
608 int (*compar)(sen_records *, sen_recordh *, sen_records *, sen_recordh *, void *);
609 void *compar_arg;
610 };
611
612 inline static int
records_heap_cmp(sen_records_heap * h,sen_records * r1,sen_records * r2)613 records_heap_cmp(sen_records_heap *h, sen_records *r1, sen_records *r2)
614 {
615 sen_recordh *rh1 = (sen_recordh *)sen_records_curr_rec(r1);
616 sen_recordh *rh2 = (sen_recordh *)sen_records_curr_rec(r2);
617 if (!h->compar) {
618 int off1, off2;
619 if (h->compar_arg == (void *)-1) {
620 off1 = (r1->key_size) / sizeof(int32_t);
621 off2 = (r2->key_size) / sizeof(int32_t);
622 } else {
623 off1 = off2 = (int)(intptr_t)h->compar_arg;
624 }
625 return (((int32_t *)(rh2))[off2] - ((int32_t *)(rh1))[off1]) * h->dir > 0;
626 }
627 return h->compar(r1, rh1, r2, rh2, h->compar_arg) * h->dir > 0;
628 }
629
630 sen_records_heap *
sen_records_heap_open(int size,int limit,sen_sort_optarg * optarg)631 sen_records_heap_open(int size, int limit, sen_sort_optarg *optarg)
632 {
633 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
634 sen_records_heap *h = SEN_MALLOC(sizeof(sen_records_heap));
635 if (!h) { return NULL; }
636 h->bins = SEN_MALLOC(sizeof(sen_records *) * size);
637 if (!h->bins) {
638 SEN_FREE(h);
639 return NULL;
640 }
641 h->n_entries = 0;
642 h->n_bins = size;
643 h->limit = limit;
644 h->curr = 0;
645 if (optarg) {
646 h->dir = (optarg->mode == sen_sort_ascending) ? 1 : -1;
647 h->compar = optarg->compar;
648 h->compar_arg = optarg->compar_arg;
649 } else {
650 h->dir = -1;
651 h->compar = NULL;
652 h->compar_arg = (void *) -1;
653 }
654 return h;
655 }
656
657 sen_rc
sen_records_heap_add(sen_records_heap * h,sen_records * r)658 sen_records_heap_add(sen_records_heap *h, sen_records *r)
659 {
660 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
661 if (h->n_entries >= h->n_bins) {
662 int size = h->n_bins * 2;
663 sen_records **bins = SEN_REALLOC(h->bins, sizeof(sen_records *) * size);
664 // sen_log("expanded sen_records_heap to %d,%p", size, bins);
665 if (!bins) { return sen_memory_exhausted; }
666 h->n_bins = size;
667 h->bins = bins;
668 }
669 if (!sen_records_next(r, NULL, 0, NULL)) {
670 sen_records_close(r);
671 return sen_internal_error;
672 }
673 {
674 int n, n2;
675 sen_records *r2;
676 n = h->n_entries++;
677 while (n) {
678 n2 = (n - 1) >> 1;
679 r2 = h->bins[n2];
680 if (records_heap_cmp(h, r, r2)) { break; }
681 h->bins[n] = r2;
682 n = n2;
683 }
684 h->bins[n] = r;
685 }
686 return sen_success;
687 }
688
689 int
sen_records_heap_next(sen_records_heap * h)690 sen_records_heap_next(sen_records_heap *h)
691 {
692 if (!h || !h->n_entries) { return 0; }
693 {
694 sen_records *r = h->bins[0];
695 if (!sen_records_next(r, NULL, 0, NULL)) {
696 sen_records_close(r);
697 r = h->bins[0] = h->bins[--h->n_entries];
698 }
699 {
700 int n = 0, m = h->n_entries;
701 if (m > 1) {
702 for (;;) {
703 int n1 = n * 2 + 1;
704 int n2 = n1 + 1;
705 sen_records *r1 = n1 < m ? h->bins[n1] : NULL;
706 sen_records *r2 = n2 < m ? h->bins[n2] : NULL;
707 if (r1 && records_heap_cmp(h, r, r1)) {
708 if (r2 && records_heap_cmp(h, r, r2) && records_heap_cmp(h, r1, r2)) {
709 h->bins[n] = r2;
710 n = n2;
711 } else {
712 h->bins[n] = r1;
713 n = n1;
714 }
715 } else {
716 if (r2 && records_heap_cmp(h, r, r2)) {
717 h->bins[n] = r2;
718 n = n2;
719 } else {
720 h->bins[n] = r;
721 break;
722 }
723 }
724 }
725 }
726 h->curr++;
727 return m;
728 }
729 }
730 }
731
732 sen_records *
sen_records_heap_head(sen_records_heap * h)733 sen_records_heap_head(sen_records_heap *h)
734 {
735 return h->n_entries ? h->bins[0] : NULL;
736 }
737
738 sen_rc
sen_records_heap_close(sen_records_heap * h)739 sen_records_heap_close(sen_records_heap *h)
740 {
741 sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
742 int i;
743 if (!h) { return sen_invalid_argument; }
744 for (i = h->n_entries; i--;) { sen_records_close(h->bins[i]); }
745 SEN_FREE(h->bins);
746 SEN_FREE(h);
747 return sen_success;
748 }
749
750 /* todo : config_path will be disappeared */
751 sen_rc
sen_info(char ** version,char ** configure_options,char ** config_path,sen_encoding * default_encoding,unsigned int * initial_n_segments,unsigned int * partial_match_threshold)752 sen_info(char **version,
753 char **configure_options,
754 char **config_path,
755 sen_encoding *default_encoding,
756 unsigned int *initial_n_segments,
757 unsigned int *partial_match_threshold)
758 {
759 if (version) {
760 *version = PACKAGE_VERSION;
761 }
762 if (configure_options) {
763 *configure_options = CONFIGURE_OPTIONS;
764 }
765 if (default_encoding) {
766 *default_encoding = sen_gctx.encoding;
767 }
768 if (initial_n_segments) {
769 *initial_n_segments = SENNA_DEFAULT_INITIAL_N_SEGMENTS;
770 }
771 if (partial_match_threshold) {
772 *partial_match_threshold = SENNA_DEFAULT_QUERY_ESCALATION_THRESHOLD;
773 }
774 return sen_success;
775 }
776