/* Copyright(C) 2004 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "senna_in.h" #include #include #include "ctx.h" #include "set.h" #define INITIAL_N_ENTRIES 256U #define INITIAL_INDEX_SIZE 256U #define GARBAGE ((entry *) 1) #define STEP(x) (((x) >> 2) | 0x1010101) typedef struct _sen_set_element entry; typedef struct _sen_set_element_str entry_str; #define SEN_SET_STRHASH(e) (((entry_str *)(e))->key) inline static entry * entry_new(sen_set *set) { entry *e; if (set->garbages) { e = set->garbages; set->garbages = *((entry **)e); memset(e, 0, set->entry_size); } else { SEN_ARRAY_NEXT(&set->a, e); } return e; } sen_rc sen_set_init(sen_ctx *ctx, sen_set *set, uint32_t key_size, uint32_t value_size, uint32_t init_size) { uint32_t entry_size, n, mod; for (n = INITIAL_INDEX_SIZE; n < init_size; n *= 2); switch (key_size) { case 0 : entry_size = (intptr_t)(&((entry_str *)0)->dummy) + value_size; break; case sizeof(uint32_t) : entry_size = (intptr_t)(&((entry *)0)->dummy) + value_size; break; default : entry_size = (intptr_t)(&((entry *)0)->dummy) + key_size + value_size; break; } if ((mod = entry_size % sizeof(intptr_t))) { entry_size += sizeof(intptr_t) - mod; } memset(set, 0, sizeof(sen_set)); set->ctx = ctx; set->key_size = key_size; set->value_size = value_size; set->entry_size = entry_size; set->max_offset = n - 1; sen_array_init(&set->a, ctx, entry_size, SEN_ARRAY_CLEAR); return (set->index = SEN_CALLOC(n * sizeof(entry *))) ? sen_success : sen_memory_exhausted; } sen_rc sen_set_fin(sen_set * set) { uint32_t i; sen_ctx *ctx; if (!set || !set->index) { return sen_invalid_argument; } ctx = set->ctx; if (!set->key_size) { entry *e, **sp; for (i = set->max_offset + 1, sp = set->index; i; i--, sp++) { e = *sp; if (!e || (e == GARBAGE)) { continue; } if (SEN_SET_STRKEY(e)) { SEN_FREE(SEN_SET_STRKEY(e)); } } } /* for (i = 0; i <= SEN_SET_MAX_CHUNK; i++) { if (set->chunks[i]) { SEN_FREE(set->chunks[i]); } } */ sen_array_fin(&set->a); SEN_FREE(set->index); return sen_success; } /* for compatibility */ sen_set * sen_set_open(uint32_t key_size, uint32_t value_size, uint32_t init_size) { sen_set *set; sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */ if (!(set = SEN_MALLOC(sizeof(sen_set)))) { return NULL; } if (sen_set_init(ctx, set, key_size, value_size, init_size)) { SEN_FREE(set); return NULL; } return set; } sen_rc sen_set_close(sen_set *set) { sen_rc rc; if (set) { sen_ctx *ctx = set->ctx; rc = sen_set_fin(set); SEN_FREE(set); } else { rc = sen_invalid_argument; } return rc; } sen_rc sen_set_reset(sen_set * set, uint32_t ne) { uint32_t i, j, m, n, s; entry **index, *e, **sp, **dp; sen_ctx *ctx = set->ctx; if (!ne) { ne = set->n_entries * 2; } if (ne > INT_MAX) { return sen_memory_exhausted; } for (n = INITIAL_INDEX_SIZE; n <= ne; n *= 2); if (!(index = SEN_CALLOC(n * sizeof(entry *)))) { return sen_memory_exhausted; } m = n - 1; if (set->index) { if (set->key_size) { for (j = set->max_offset + 1, sp = set->index; j; j--, sp++) { e = *sp; if (!e || (e == GARBAGE)) { continue; } for (i = e->key, s = STEP(i); ; i += s) { dp = index + (i & m); if (!*dp) { break; } } *dp = e; } } else { for (j = set->max_offset + 1, sp = set->index; j; j--, sp++) { e = *sp; if (!e || (e == GARBAGE)) { continue; } for (i = SEN_SET_STRHASH(e), s = STEP(i); ; i += s) { dp = index + (i & m); if (!*dp) { break; } } *dp = e; } } } { entry **i0 = set->index; set->index = index; set->max_offset = m; set->n_garbages = 0; if (i0) { SEN_FREE(i0); } } return sen_success; } sen_rc sen_set_info(sen_set *set, unsigned *key_size, unsigned *value_size, unsigned *n_entries) { if (!set) { return sen_invalid_argument; } if (key_size) { *key_size = set->key_size; } if (value_size) { *value_size = set->value_size; } if (n_entries) { *n_entries = set->n_entries; } return sen_success; } inline static uint32_t str_hash(const unsigned char *p) { uint32_t r; for (r = 0; *p; p++) { r = (r * 1021) + *p; } return r; } inline static uint32_t bin_hash(const uint8_t *p, uint32_t length) { uint32_t r; for (r = 0; length--; p++) { r = (r * 1021) + *p; } return r; } sen_set_eh * sen_set_int_at(sen_set *set, const uint32_t *key, void **value) { entry *e, **ep, **index = set->index; uint32_t h = *key, i, m = set->max_offset, s = STEP(h); if (index) { for (i = h; ep = index + (i & m), (e = *ep); i += s) { if (e == GARBAGE) { continue; } if (e->key == h) { if (value) { *value = e->dummy; } return ep; } } } return NULL; } sen_set_eh * sen_set_str_at(sen_set *set, const char *key, void **value) { entry *e, **ep, **index = set->index; uint32_t h = str_hash((unsigned char *)key), i, m = set->max_offset, s = STEP(h); if (index) { for (i = h; ep = index + (i & m), (e = *ep); i += s) { if (e == GARBAGE) { continue; } if (SEN_SET_STRHASH(e) == h && !strcmp(key, SEN_SET_STRKEY(e))) { if (value) { *value = SEN_SET_STRVAL(e); } return (sen_set_eh *) ep; } } } return NULL; } sen_set_eh * sen_set_bin_at(sen_set *set, const void *key, void **value) { entry *e, **ep, **index = set->index; uint32_t h = bin_hash(key, set->key_size), i, m = set->max_offset, s = STEP(h); if (index) { for (i = h; ep = index + (i & m), (e = *ep); i += s) { if (e == GARBAGE) { continue; } if (e->key == h && !memcmp(key, e->dummy, set->key_size)) { if (value) { *value = SEN_SET_BINVAL(e, set); } return ep; } } } return NULL; } sen_set_eh * sen_set_at(sen_set *set, const void *key, void **value) { if (set->arrayp) { if (sen_set_reset(set, 0)) { return NULL; } // set->curr_entry = 0; set->arrayp = 0; } switch (set->key_size) { case 0 : return sen_set_str_at(set, key, value); case sizeof(uint32_t) : return sen_set_int_at(set, key, value); default : return sen_set_bin_at(set, key, value); } } inline static sen_set_eh * sen_set_int_get(sen_set *set, const uint32_t *key, void **value) { static int _ncalls = 0, _ncolls = 0; entry *e, **ep, **np = NULL, **index = set->index; uint32_t h = *key, i, m = set->max_offset, s = STEP(h); _ncalls++; if (!index) { return NULL; } for (i = h; ep = index + (i & m), e = *ep; i += s) { if (e == GARBAGE) { if (!np) { np = ep; } } else { if (e->key == h) { goto exit; } } if (!(++_ncolls % 1000000) && (_ncolls > _ncalls)) { if (_ncolls < 0 || _ncalls < 0) { _ncolls = 0; _ncalls = 0; } else { SEN_LOG(sen_log_notice, "total:%d collisions:%d nentries:%d maxoffset:%d", _ncalls, _ncolls, set->n_entries, set->max_offset); } } } if (np) { set->n_garbages--; ep = np; } if (!(e = entry_new(set))) { return NULL; } e->key = h; *ep = e; set->n_entries++; exit : if (value) { *value = e->dummy; } return ep; } inline static sen_set_eh * sen_set_str_get(sen_set *set, const char *key, void **value) { sen_ctx *ctx = set->ctx; entry *e, **ep, **np = NULL, **index = set->index; uint32_t h = str_hash((unsigned char *)key), i, m = set->max_offset, s = STEP(h); if (!index) { return NULL; } for (i = h; ep = index + (i & m), e = *ep; i += s) { if (e == GARBAGE) { if (!np) { np = ep; } } else { if (SEN_SET_STRHASH(e) == h && !strcmp(key, SEN_SET_STRKEY(e))) { goto exit; } } } { char *keybuf = SEN_STRDUP(key); if (!keybuf) { return NULL; } if (!(e = entry_new(set))) { SEN_FREE(keybuf); return NULL; } SEN_SET_STRHASH(e) = h; SEN_SET_STRKEY(e) = keybuf; if (np) { set->n_garbages--; ep = np; } *ep = e; set->n_entries++; } exit : if (value) { *value = SEN_SET_STRVAL(e); } return ep; } inline static sen_set_eh * sen_set_bin_get(sen_set *set, const void *key, void **value) { entry *e, **ep, **np = NULL, **index = set->index; uint32_t h = bin_hash(key, set->key_size), i, m = set->max_offset, s = STEP(h); if (!index) { return NULL; } for (i = h; ep = index + (i & m), e = *ep; i += s) { if (e == GARBAGE) { if (!np) { np = ep; } } else { if (e->key == h && !memcmp(key, e->dummy, set->key_size)) { goto exit; } } } if (np) { set->n_garbages--; ep = np; } if (!(e = entry_new(set))) { return NULL; } e->key = h; memcpy(e->dummy, key, set->key_size); *ep = e; set->n_entries++; exit : if (value) { *value = &e->dummy[set->key_size]; } return ep; } sen_set_eh * sen_set_get(sen_set *set, const void *key, void **value) { if (!set) { return NULL; } if (set->arrayp) { if (sen_set_reset(set, 0)) { return NULL; } // set->curr_entry = 0; set->arrayp = 0; } else if ((set->n_entries + set->n_garbages) * 2 > set->max_offset) { sen_set_reset(set, 0); } switch (set->key_size) { case 0 : return sen_set_str_get(set, key, value); case sizeof(uint32_t) : return sen_set_int_get(set, key, value); default : return sen_set_bin_get(set, key, value); } } sen_rc sen_set_del(sen_set *set, sen_set_eh *ep) { entry *e; sen_ctx *ctx = set->ctx; if (!set || !ep || !*ep) { return sen_invalid_argument; } e = *ep; *ep = GARBAGE; if (!set->key_size) { SEN_FREE(SEN_SET_STRKEY(e)); } *((entry **)e) = set->garbages; set->garbages = e; set->n_entries--; set->n_garbages++; return sen_success; } sen_set_cursor * sen_set_cursor_open(sen_set *set) { sen_set_cursor *c; sen_ctx *ctx = set->ctx; if (!set || !set->index) { return NULL; } if (!(c = SEN_MALLOC(sizeof(sen_set_cursor)))) { return NULL; } c->set = set; c->index = set->index; c->curr = set->index; c->rest = set->max_offset + 1; return c; } sen_set_eh * sen_set_cursor_next(sen_set_cursor *c, void **key, void **value) { uint32_t i; entry *e, **ep; if (!c || !c->rest) { return NULL; } if (c->index != c->set->index) { SEN_LOG(sen_log_alert, "sen_reset invoked during sen_set_cursor is opened!"); return NULL; } for (i = c->rest, ep = c->curr;;i--, ep++) { if (!i) { c->rest = 0; return NULL; } e = *ep; if (e && e != GARBAGE) { break; } } switch (c->set->key_size) { case 0 : if (key) { *key = SEN_SET_STRKEY(e); } if (value) { *value = SEN_SET_STRVAL(e); } break; case sizeof(uint32_t) : if (key) { *key = SEN_SET_INTKEY(e); } if (value) { *value = SEN_SET_INTVAL(e); } break; default : if (key) { *key = SEN_SET_BINKEY(e); } if (value) { *value = SEN_SET_BINVAL(e, c->set); } break; } c->curr = ep + 1; c->rest = i - 1; return ep; } sen_rc sen_set_cursor_close(sen_set_cursor *cursor) { sen_ctx *ctx = cursor->set->ctx; SEN_FREE(cursor); return sen_success; } inline static void swap(entry **a, entry **b) { entry *c = *a; *a = *b; *b = c; } #define INT_OFFSET_VAL(x) (((int32_t *)(x))[offset] * dir) inline static entry ** pack_int(sen_set *set, entry **res, int offset, int dir) { uint32_t i, n, m = set->max_offset; int32_t ck; entry **head, **tail, *e, *c; for (i = m >> 1;; i = (i + 1) & m) { if ((c = set->index[i]) && (c != GARBAGE)) { break; } } head = res; n = set->n_entries - 1; tail = res + n; ck = INT_OFFSET_VAL(c); while (n--) { for (;;) { i = (i + 1) & m; if ((e = set->index[i]) && (e != GARBAGE)) { break; } } if (INT_OFFSET_VAL(e) < ck) { *head++ = e; } else { *tail-- = e; } } *head = c; return set->n_entries > 2 ? head : NULL; } inline static entry ** part_int(entry **b, entry **e, int offset, int dir) { int32_t ck; intptr_t d = e - b; entry **c; if (INT_OFFSET_VAL(*b) > INT_OFFSET_VAL(*e)) { swap(b, e); } if (d < 2) { return NULL; } c = b + (d >> 1); if (INT_OFFSET_VAL(*b) > INT_OFFSET_VAL(*c)) { swap(b, c); } else { if (INT_OFFSET_VAL(*c) > INT_OFFSET_VAL(*e)) { swap(c, e); } } if (d < 3) { return NULL; } b++; ck = INT_OFFSET_VAL(*c); swap(b, c); c = b; for (;;) { while (INT_OFFSET_VAL(*++b) < ck) ; while (INT_OFFSET_VAL(*--e) > ck) ; if (b >= e) { break; } swap(b, e); } swap(c, e); return e; } static void _sort_int(entry **head, entry **tail, int limit, int offset, int dir) { intptr_t rest; entry **c; if (head < tail && (c = part_int(head, tail, offset, dir))) { rest = limit - 1 - (c - head); _sort_int(head, c - 1, limit, offset, dir); if (rest > 0) { _sort_int(c + 1, tail, (int)rest, offset, dir); } } } inline static void sort_int(sen_set *set, entry **res, int limit, int offset, int dir) { entry **c = pack_int(set, res, offset, dir); if (c) { intptr_t rest = limit - 1 - (c - res); _sort_int(res, c - 1, limit, offset, dir); if (rest > 0 ) { _sort_int(c + 1, res + set->n_entries - 1, (int)rest, offset, dir); } } } inline static entry ** pack_func(sen_set *set, entry **res, int(*func)(sen_set *, entry **, sen_set *, entry **, void *), void *arg, int dir) { uint32_t i, n, m = set->max_offset; entry **head, **tail, *e, *c; for (i = m >> 1;; i = (i + 1) & m) { if ((c = set->index[i]) && (c != GARBAGE)) { break; } } head = res; n = set->n_entries - 1; tail = res + n; while (n--) { for (;;) { i = (i + 1) & m; if ((e = set->index[i]) && (e != GARBAGE)) { break; } } if (func(set, &e, set, &c, arg) * dir < 0) { *head++ = e; } else { *tail-- = e; } } *head = c; return set->n_entries > 2 ? head : NULL; } inline static entry ** part_func(entry **b, entry **e, int(*func)(sen_set *, entry **, sen_set *, entry **, void *), void *arg, sen_set *set, int dir) { intptr_t d = e - b; entry **c; if (func(set, b, set, e, arg) * dir > 0) { swap(b, e); } if (d < 2) { return NULL; } c = b + (d >> 1); if (func(set, b, set, c, arg) * dir > 0) { swap(b, c); } else { if (func(set, c, set, e, arg) * dir > 0) { swap(c, e); } } if (d < 3) { return NULL; } b++; swap(b, c); c = b; for (;;) { while (func(set, ++b, set, c, arg) * dir < 0) ; while (func(set, --e, set, c, arg) * dir > 0) ; if (b >= e) { break; } swap(b, e); } swap(c, e); return e; } static void _sort_func(entry **head, entry **tail, int limit, int(*func)(sen_set *, entry **, sen_set *, entry **, void *), void *arg, sen_set *set, int dir) { entry **c; if (head < tail && (c = part_func(head, tail, func, arg, set, dir))) { intptr_t rest = limit - 1 - (c - head); _sort_func(head, c - 1, limit, func, arg, set, dir); if (rest > 0) { _sort_func(c + 1, tail, (int)rest, func, arg, set, dir); } } } inline static void sort_func(sen_set *set, entry **res, int limit, int(*func)(sen_set *, entry **, sen_set *, entry **, void *), void *arg, int dir) { entry **c = pack_func(set, res, func, arg, dir); if (c) { intptr_t rest = limit - 1 - (c - res); _sort_func(res, c - 1, limit, func, arg, set, dir); if (rest > 0 ) { _sort_func(c + 1, res + set->n_entries - 1, (int)rest, func, arg, set, dir); } } } inline static int func_str(sen_set *sa, entry **a, sen_set *sb, entry **b, void *arg) { return strcmp(SEN_SET_STRKEY(*a), SEN_SET_STRKEY(*b)); } inline static int func_bin(sen_set *sa, entry **a, sen_set *sb, entry **b, void *arg) { return memcmp(SEN_SET_BINKEY(*a), SEN_SET_BINKEY(*b), (uintptr_t)arg); } sen_set_eh * sen_set_sort(sen_set *set, int limit, sen_set_sort_optarg *optarg) { sen_ctx *ctx = set->ctx; entry **res; int dir = 1; if (!set || !set->index) { SEN_LOG(sen_log_warning, "sen_set_sort: invalid argument !"); return NULL; } if (!set->n_entries) { return NULL; } if (!(res = SEN_MALLOC(sizeof(entry *) * set->n_entries))) { SEN_LOG(sen_log_alert, "allocation of entries failed on sen_set_sort !"); return NULL; } if (limit <= 0) { limit += set->n_entries; if (limit <= 0) { SEN_LOG(sen_log_alert, "limit is too small in sen_set_sort !"); return NULL; } } if (limit > set->n_entries) { limit = set->n_entries; } set->limit = limit; if (optarg) { dir = (optarg->mode == sen_sort_ascending) ? 1 : -1; if (optarg->compar) { sort_func(set, res, limit, optarg->compar, optarg->compar_arg, dir); goto exit; } else if (optarg->compar_arg) { sort_int(set, res, limit, ((intptr_t)optarg->compar_arg) / sizeof(int32_t), dir); goto exit; } } switch (set->key_size) { case 0 : sort_func(set, res, limit, func_str, NULL, dir); break; case sizeof(uint32_t) : sort_int(set, res, limit, 0, dir); break; default : sort_func(set, res, limit, func_bin, (void *)(intptr_t)set->key_size, dir); break; } exit : return res; } sen_rc sen_set_element_info(sen_set *set, const sen_set_eh *e, void **key, void **value) { if (!set || !e) { return sen_invalid_argument; } switch (set->key_size) { case 0 : if (key) { *key = SEN_SET_STRKEY(*e); } if (value) { *value = SEN_SET_STRVAL(*e); } break; case sizeof(uint32_t) : if (key) { *key = SEN_SET_INTKEY(*e); } if (value) { *value = SEN_SET_INTVAL(*e); } break; default : if (key) { *key = SEN_SET_BINKEY(*e); } if (value) { *value = SEN_SET_BINVAL(*e, set); } break; } return sen_success; } sen_set * sen_set_union(sen_set *a, sen_set *b) { void *key, *va, *vb; entry *e, **ep; uint32_t i, key_size = a->key_size, value_size = a->value_size; if (key_size != b->key_size || value_size != b->value_size) { return NULL; } for (i = b->n_entries, ep = b->index; i; ep++) { if ((e = *ep) && e != GARBAGE) { switch (key_size) { case 0 : key = SEN_SET_STRKEY(e); vb = SEN_SET_STRVAL(e); break; case sizeof(uint32_t) : key = SEN_SET_INTKEY(e); vb = SEN_SET_INTVAL(e); break; default : key = SEN_SET_BINKEY(e); vb = &e->dummy[key_size]; break; } if (sen_set_at(a, key, &va)) { /* do copy of merge? */ } else { if (sen_set_get(a, key, &va)) { memcpy(va, vb, value_size); } } i--; } } return a; } sen_set * sen_set_subtract(sen_set *a, sen_set *b) { void *key; entry *e, **ep, **dp; uint32_t i, key_size = a->key_size; if (key_size != b->key_size) { return NULL; } for (i = b->n_entries, ep = b->index; i; ep++) { if ((e = *ep) && e != GARBAGE) { switch (key_size) { case 0 : key = SEN_SET_STRKEY(e); break; case sizeof(uint32_t) : key = SEN_SET_INTKEY(e); break; default : key = SEN_SET_BINKEY(e); break; } if ((dp = sen_set_at(a, key, NULL))) { sen_set_del(a, dp); } i--; } } return a; } sen_set * sen_set_intersect(sen_set *a, sen_set *b) { void *key; entry *e, **ep; uint32_t i, key_size = a->key_size; if (key_size != b->key_size) { return NULL; } for (i = a->n_entries, ep = a->index; i; ep++) { if ((e = *ep) && e != GARBAGE) { switch (key_size) { case 0 : key = SEN_SET_STRKEY(e); break; case sizeof(uint32_t) : key = SEN_SET_INTKEY(e); break; default : key = SEN_SET_BINKEY(e); break; } if (!sen_set_at(b, key, NULL)) { sen_set_del(a, ep); } i--; } } return a; } int sen_set_difference(sen_set *a, sen_set *b) { void *key; entry *e, **ep, **dp; uint32_t count = 0, i, key_size = a->key_size; if (key_size != b->key_size) { return -1; } for (i = a->n_entries, ep = a->index; i; ep++) { if ((e = *ep) && e != GARBAGE) { switch (key_size) { case 0 : key = SEN_SET_STRKEY(e); break; case sizeof(uint32_t) : key = SEN_SET_INTKEY(e); break; default : key = SEN_SET_BINKEY(e); break; } if ((dp = sen_set_at(b, key, NULL))) { sen_set_del(b, dp); sen_set_del(a, ep); count++; } i--; } } return count; } sen_rc sen_set_array_init(sen_set *set, uint32_t size) { // sen_ctx *ctx = set->ctx; SEN_ASSERT(!set->n_entries); SEN_ASSERT(!set->garbages); set->arrayp = 1; /* set->curr_entry = 0; if (set->chunks[SEN_SET_MAX_CHUNK]) { SEN_FREE(set->chunks[SEN_SET_MAX_CHUNK]); } if (!(set->chunks[SEN_SET_MAX_CHUNK] = SEN_CALLOC(set->entry_size * size))) { return sen_memory_exhausted; } */ return sen_set_reset(set, size); } /* rset : sets with subrecs */ inline static int rec_unit_size(sen_rec_unit unit, int rec_size) { switch (unit) { case sen_rec_document : return sizeof(sen_id); case sen_rec_section : return sizeof(sen_id) + sizeof(int); case sen_rec_position : return sizeof(sen_id) + sizeof(int) + sizeof(int); default : return rec_size; } } sen_rc sen_rset_init(sen_ctx *ctx, sen_set *set, sen_rec_unit record_unit, int record_size, sen_rec_unit subrec_unit, int subrec_size, unsigned int max_n_subrecs) { sen_rc rc; record_size = rec_unit_size(record_unit, record_size); subrec_size = rec_unit_size(subrec_unit, subrec_size); if (record_unit != sen_rec_userdef && subrec_unit != sen_rec_userdef) { subrec_size -= record_size; } if (!set) { return sen_invalid_argument; } if (record_size < 0) { return sen_invalid_argument; } if ((rc = sen_set_init(ctx, set, record_size, SEN_RSET_SCORE_SIZE + sizeof(int) + max_n_subrecs * (SEN_RSET_SCORE_SIZE + subrec_size), 0))) { return rc; } set->record_unit = record_unit; set->subrec_unit = subrec_unit; set->record_size = record_size; set->subrec_size = subrec_size; set->max_n_subrecs = max_n_subrecs; return rc; } inline static void subrecs_push(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir) { byte *v; int *c2; int n = n_subrecs - 1, n2; while (n) { n2 = (n - 1) >> 1; c2 = SEN_RSET_SUBRECS_NTH(subrecs,size,n2); if (SEN_RSET_SUBRECS_CMP(score, *c2, dir)) { break; } SEN_RSET_SUBRECS_COPY(subrecs,size,n,c2); n = n2; } v = subrecs + n * (size + SEN_RSET_SCORE_SIZE); *((int *)v) = score; memcpy(v + SEN_RSET_SCORE_SIZE, body, size); } inline static void subrecs_replace_min(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir) { byte *v; int n = 0, n1, n2, *c1, *c2; for (;;) { n1 = n * 2 + 1; n2 = n1 + 1; c1 = n1 < n_subrecs ? SEN_RSET_SUBRECS_NTH(subrecs,size,n1) : NULL; c2 = n2 < n_subrecs ? SEN_RSET_SUBRECS_NTH(subrecs,size,n2) : NULL; if (c1 && SEN_RSET_SUBRECS_CMP(score, *c1, dir)) { if (c2 && SEN_RSET_SUBRECS_CMP(score, *c2, dir) && SEN_RSET_SUBRECS_CMP(*c1, *c2, dir)) { SEN_RSET_SUBRECS_COPY(subrecs,size,n,c2); n = n2; } else { SEN_RSET_SUBRECS_COPY(subrecs,size,n,c1); n = n1; } } else { if (c2 && SEN_RSET_SUBRECS_CMP(score, *c2, dir)) { SEN_RSET_SUBRECS_COPY(subrecs,size,n,c2); n = n2; } else { break; } } } v = subrecs + n * (size + SEN_RSET_SCORE_SIZE); memcpy(v, &score, SEN_RSET_SCORE_SIZE); memcpy(v + SEN_RSET_SCORE_SIZE, body, size); } void sen_rset_add_subrec(sen_set *s, sen_rset_recinfo *ri, int score, void *body, int dir) { int limit = s->max_n_subrecs; ri->score += score; ri->n_subrecs += 1; if (limit) { int ssize = s->subrec_size; int n_subrecs = SEN_RSET_N_SUBRECS(ri); if (limit < n_subrecs) { if (SEN_RSET_SUBRECS_CMP(score, *ri->subrecs, dir)) { subrecs_replace_min(ri->subrecs, ssize, limit, score, body, dir); } } else { subrecs_push(ri->subrecs, ssize, n_subrecs, score, body, dir); } } } sen_set * sen_rset_group(sen_set *s, int limit, sen_group_optarg *optarg) { sen_ctx *ctx = s->ctx; sen_set *g; sen_rset_recinfo *ri; sen_rec_unit unit; sen_set_cursor *c; const sen_recordh *rh; byte *key, *ekey, *gkey = NULL; int funcp, dir; unsigned int rsize; if (!s || !s->index) { return NULL; } if (optarg) { unit = sen_rec_userdef; rsize = optarg->key_size; funcp = optarg->func ? 1 : 0; dir = (optarg->mode == sen_sort_ascending) ? -1 : 1; } else { unit = sen_rec_document; rsize = rec_unit_size(unit, sizeof(sen_id)); funcp = 0; dir = 1; } if (funcp) { gkey = SEN_MALLOC(rsize ? rsize : 8192); if (!gkey) { SEN_LOG(sen_log_alert, "allocation for gkey failed !"); return NULL; } } else { if (s->record_size <= rsize) { return NULL; } } if (!(c = sen_set_cursor_open(s))) { SEN_LOG(sen_log_alert, "sen_set_cursor_open on sen_set_group failed !"); if (gkey) { SEN_FREE(gkey); } return NULL; } if (!(g = SEN_MALLOC(sizeof(sen_set)))) { sen_set_cursor_close(c); if (gkey) { SEN_FREE(gkey); } return NULL; } if (sen_rset_init(ctx, g, unit, rsize, s->record_unit, s->record_size, limit)) { SEN_LOG(sen_log_alert, "sen_rset_init in sen_set_group failed !"); sen_set_cursor_close(c); SEN_FREE(g); if (gkey) { SEN_FREE(gkey); } return NULL; } while ((rh = sen_set_cursor_next(c, (void **)&key, (void **)&ri))) { if (funcp) { if (optarg->func(s, rh, gkey, optarg->func_arg)) { continue; } ekey = key; } else { gkey = key; ekey = key + rsize; } { sen_rset_recinfo *gri; if (sen_set_get(g, gkey, (void **)&gri)) { sen_rset_add_subrec(g, gri, ri->score, ekey, dir); } } } sen_set_cursor_close(c); if (funcp) { SEN_FREE(gkey); } return g; } sen_rc sen_rset_subrec_info(sen_set *s, const sen_recordh *rh, int index, sen_id *rid, int *section, int *pos, int *score, void **subrec) { sen_rc rc; sen_rset_posinfo *pi; sen_rset_recinfo *ri; int *p, unit_size = SEN_RSET_SCORE_SIZE + s->subrec_size; if (!s || !rh || index < 0) { return sen_invalid_argument; } if ((unsigned int)index >= s->max_n_subrecs) { return sen_invalid_argument; } rc = sen_set_element_info(s, rh, (void **)&pi, (void **)&ri); if (rc) { return rc; } if (index >= ri->n_subrecs) { return sen_invalid_argument; } p = (int *)(ri->subrecs + index * unit_size); if (score) { *score = p[0]; } if (subrec) { *subrec = &p[1]; } switch (s->record_unit) { case sen_rec_document : if (rid) { *rid = pi->rid; } if (section) { *section = (s->subrec_unit != sen_rec_userdef) ? p[1] : 0; } if (pos) { *pos = (s->subrec_unit == sen_rec_position) ? p[2] : 0; } break; case sen_rec_section : if (rid) { *rid = pi->rid; } if (section) { *section = pi->sid; } if (pos) { *pos = (s->subrec_unit == sen_rec_position) ? p[1] : 0; } break; default : pi = (sen_rset_posinfo *)&p[1]; switch (s->subrec_unit) { case sen_rec_document : if (rid) { *rid = pi->rid; } if (section) { *section = 0; } if (pos) { *pos = 0; } break; case sen_rec_section : if (rid) { *rid = pi->rid; } if (section) { *section = pi->sid; } if (pos) { *pos = 0; } break; case sen_rec_position : if (rid) { *rid = pi->rid; } if (section) { *section = pi->sid; } if (pos) { *pos = pi->pos; } break; default : if (rid) { *rid = 0; } if (section) { *section = 0; } if (pos) { *pos = 0; } break; } break; } return sen_success; } /* sen_records : sets with keys or/and cursor */ sen_records * sen_records_open(sen_rec_unit record_unit, sen_rec_unit subrec_unit, unsigned int max_n_subrecs) { sen_records *r; if (!(r = SEN_GMALLOC(sizeof(sen_records)))) { return NULL; } if (sen_rset_init(&sen_gctx, r, record_unit, 0, subrec_unit, 0, max_n_subrecs)) { SEN_GFREE(r); return NULL; } return r; } void sen_records_cursor_clear(sen_records *r) { sen_ctx *ctx = r->ctx; if (r->sorted) { SEN_FREE(r->sorted); r->sorted = NULL; } if (r->cursor) { sen_set_cursor_close(r->cursor); r->cursor = NULL; } r->curr_rec = NULL; } sen_rc sen_records_close(sen_records *r) { sen_ctx *ctx; if (!r) { return sen_invalid_argument; } ctx = r->ctx; if (r->curr_rec) { sen_id *rid; sen_rset_recinfo *ri; if (!sen_set_element_info(r, r->curr_rec, (void **)&rid, (void **)&ri)) { SEN_LOG(sen_log_debug, "curr_rec: %d:%d", *rid, ri->score); // sen_log("sen_record_next=%d,%d", *rid, *(_sen_sym_key(r->keys, *rid))); } } sen_records_cursor_clear(r); return sen_set_close(r); } sen_rc sen_records_reopen(sen_records *r, sen_rec_unit record_unit, sen_rec_unit subrec_unit, unsigned int max_n_subrecs) { sen_ctx *ctx; int record_size = rec_unit_size(record_unit, 0); int subrec_size = rec_unit_size(subrec_unit, 0); if (!r || record_size < 0 || (max_n_subrecs && subrec_size <= record_size)) { return sen_invalid_argument; } ctx = r->ctx; sen_records_cursor_clear(r); sen_set_fin(r); return sen_rset_init(ctx, r, record_unit, 0, subrec_unit, 0, max_n_subrecs); } sen_rc sen_records_sort(sen_records *r, int limit, sen_sort_optarg *optarg) { if (!r) { return sen_invalid_argument; } sen_records_cursor_clear(r); if (optarg) { r->sorted = sen_set_sort(r, limit, optarg); } else { sen_set_sort_optarg arg; arg.mode = sen_sort_descending; arg.compar = NULL; arg.compar_arg = (void *)(intptr_t)r->key_size; r->sorted = sen_set_sort(r, limit, &arg); } return r->sorted ? sen_success : sen_internal_error; } sen_rc sen_records_rewind(sen_records *r) { if (!r) { return sen_invalid_argument; } if (r->sorted) { r->curr_rec = NULL; return sen_success; } else { sen_records_cursor_clear(r); r->cursor = sen_set_cursor_open(r); return r->cursor ? sen_success : sen_internal_error; } } int sen_records_next(sen_records *r, void *keybuf, int bufsize, int *score) { if (!r) { return 0; } if (r->sorted) { if (r->curr_rec) { if (r->curr_rec - r->sorted >= r->limit - 1) { return 0; } r->curr_rec++; } else { if (r->limit > 0) { r->curr_rec = r->sorted; } } } else { if (!r->cursor && sen_records_rewind(r)) { return 0; } r->curr_rec = sen_set_cursor_next(r->cursor, NULL, NULL); } if (r->curr_rec) { sen_id *rid; sen_rset_recinfo *ri; if (!sen_set_element_info(r, r->curr_rec, (void **)&rid, (void **)&ri)) { if (score) { *score = ri->score; } if (r->record_unit != sen_rec_userdef) { return r->keys ? sen_sym_key(r->keys, *rid, keybuf, bufsize) : r->record_size; } else { if (r->record_size <= bufsize) { memcpy(keybuf, rid, r->record_size); } return r->record_size; } } } return 0; } int sen_records_find(sen_records *r, const void *key) { sen_id rid; sen_rset_recinfo *ri; if (!r || !r->keys || r->record_unit != sen_rec_document) { return 0; } if (!(rid = sen_sym_at(r->keys, key))) { return 0; } if (!sen_set_at(r, &rid, (void **) &ri)) { return 0; } return ri->score; } int sen_records_nhits(sen_records *r) { if (!r) { return 0; } return r->n_entries; } int sen_records_curr_score(sen_records *r) { sen_rset_recinfo *ri; if (!r || !r->curr_rec) { return 0; } if (sen_set_element_info(r, r->curr_rec, NULL, (void **)&ri)) { return 0; } return ri->score; } int sen_records_curr_key(sen_records *r, void *keybuf, int bufsize) { sen_id *rid; if (!r || !r->curr_rec) { return 0; } if (sen_set_element_info(r, r->curr_rec, (void **)&rid, NULL)) { return 0; } if (r->record_unit != sen_rec_userdef) { return sen_sym_key(r->keys, *rid, keybuf, bufsize); } else { if (r->record_size <= bufsize) { memcpy(keybuf, rid, r->record_size); } return r->record_size; } } const sen_recordh * sen_records_curr_rec(sen_records *r) { if (!r) { return NULL; } return r->curr_rec; } sen_records * sen_records_union(sen_records *a, sen_records *b) { if (!a || !b) { return NULL; } if (a->keys != b->keys) { return NULL; } if (!sen_set_union(a, b)) { return NULL; } sen_records_close(b); sen_records_cursor_clear(a); return a; } sen_records * sen_records_subtract(sen_records *a, sen_records *b) { if (!a || !b) { return NULL; } if (a->keys != b->keys) { return NULL; } if (!sen_set_subtract(a, b)) { return NULL; } sen_records_close(b); sen_records_cursor_clear(a); return a; } sen_records * sen_records_intersect(sen_records *a, sen_records *b) { if (!a || !b) { return NULL; } if (a->keys != b->keys) { return NULL; } if (a->n_entries > b->n_entries) { sen_set c; memcpy(&c, a, sizeof(sen_records)); memcpy(a, b, sizeof(sen_records)); memcpy(b, &c, sizeof(sen_records)); } if (!sen_set_intersect(a, b)) { return NULL; } sen_records_close(b); sen_records_cursor_clear(a); return a; } int sen_records_difference(sen_records *a, sen_records *b) { int count; if (!a || !b) { return -1; } if (a->keys != b->keys) { return -1; } if ((count = sen_set_difference(a, b)) >= 0) { sen_records_cursor_clear(a); sen_records_cursor_clear(b); } return count; } #define SUBRECS_CMP(a,b,dir) (((a) - (b))*(dir) > 0) #define SUBRECS_NTH(subrecs,size,n) ((int *)(subrecs + n * (size + SEN_RSET_SCORE_SIZE))) #define SUBRECS_COPY(subrecs,size,n,src) \ (memcpy(subrecs + n * (size + SEN_RSET_SCORE_SIZE), src, size + SEN_RSET_SCORE_SIZE)) sen_rc sen_records_group(sen_records *r, int limit, sen_group_optarg *optarg) { sen_ctx *ctx; sen_set *g; if (!(g = sen_rset_group(r, limit, optarg))) { return sen_internal_error; } ctx = r->ctx; sen_records_cursor_clear(r); sen_set_fin(r); memcpy(r, g, sizeof(sen_set)); SEN_FREE(g); return sen_success; } const sen_recordh * sen_records_at(sen_records *r, const void *key, unsigned section, unsigned pos, int *score, int *n_subrecs) { sen_rset_recinfo *ri; const sen_recordh *res; if (!r || !r->keys) { return NULL; } if (r->record_unit == sen_rec_userdef) { res = r->curr_rec = sen_set_at(r, key, (void **) &ri); } else { sen_rset_posinfo pi; if (!(pi.rid = sen_sym_at(r->keys, key))) { return NULL; } pi.sid = section; pi.pos = pos; res = r->curr_rec = sen_set_at(r, &pi, (void **) &ri); } if (res) { if (score) { *score = ri->score; } if (n_subrecs) { *n_subrecs = ri->n_subrecs; } } return res; } sen_rc sen_record_info(sen_records *r, const sen_recordh *rh, void *keybuf, int bufsize, int *keysize, int *section, int *pos, int *score, int *n_subrecs) { sen_rc rc; sen_rset_posinfo *pi; sen_rset_recinfo *ri; if (!r || !rh) { return sen_invalid_argument; } rc = sen_set_element_info(r, rh, (void **)&pi, (void **)&ri); if (rc) { return rc; } switch (r->record_unit) { case sen_rec_document : if ((keybuf && bufsize) || keysize) { int len = sen_sym_key(r->keys, pi->rid, keybuf, bufsize); if (keysize) { *keysize = len; } } if (section) { *section = 0; } if (pos) { *pos = 0; } break; case sen_rec_section : if ((keybuf && bufsize) || keysize) { int len = sen_sym_key(r->keys, pi->rid, keybuf, bufsize); if (keysize) { *keysize = len; } } if (section) { *section = pi->sid; } if (pos) { *pos = 0; } break; case sen_rec_position : if ((keybuf && bufsize) || keysize) { int len = sen_sym_key(r->keys, pi->rid, keybuf, bufsize); if (keysize) { *keysize = len; } } if (section) { *section = pi->sid; } if (pos) { *pos = pi->pos; } break; case sen_rec_userdef : if ((keybuf && bufsize) || keysize) { unsigned int len = r->record_size; if (!len) { len = (unsigned int)strlen((const char *)pi) + 1; } if ((unsigned int)bufsize >= len) { memcpy(keybuf, pi, len); } if (keysize) { *keysize = len; } } if (section) { *section = 0; } if (pos) { *pos = 0; } break; default : return sen_invalid_format; break; } if (score) { *score = ri->score; } if (n_subrecs) { *n_subrecs = ri->n_subrecs; } return sen_success; } sen_rc sen_record_subrec_info(sen_records *r, const sen_recordh *rh, int index, void *keybuf, int bufsize, int *keysize, int *section, int *pos, int *score) { sen_rc rc; sen_id rid; void *subrec; if ((rc = sen_rset_subrec_info(r, rh, index, &rid, section, pos, score, &subrec))) { return rc; } if ((keybuf && bufsize) || keysize) { if (rid) { int len = sen_sym_key(r->keys, rid, keybuf, bufsize); if (keysize) { *keysize = len; } } else if (r->record_unit == sen_rec_userdef && r->subrec_unit == sen_rec_userdef) { int len = (int) r->subrec_size; if (!len) { len = (unsigned int)strlen((char *)subrec) + 1; } if ((unsigned int)bufsize >= len) { memcpy(keybuf, subrec, len); } if (keysize) { *keysize = len; } } } return sen_success; }