1 /* Copyright(C) 2004 Brazil
2
3 This library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Lesser General Public
5 License as published by the Free Software Foundation; either
6 version 2.1 of the License, or (at your option) any later version.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Lesser General Public License for more details.
12
13 You should have received a copy of the GNU Lesser General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17 #include "senna_in.h"
18 #include <fcntl.h>
19 #include <string.h>
20 #include <sys/stat.h>
21 #include "sym.h"
22
23 /* v08 functions */
24
25 sen_sym *sen_sym_create08(const char *path, uint32_t key_size,
26 uint32_t flags, sen_encoding encoding);
27 sen_sym *sen_sym_open08(const char *path);
28 sen_id sen_sym_get08(sen_sym *sym, const void *key);
29 sen_id sen_sym_at08(sen_sym *sym, const void *key);
30 sen_set *sen_sym_prefix_search08(sen_sym *sym, const void *key);
31 sen_set *sen_sym_suffix_search08(sen_sym *sym, const void *key);
32 sen_id sen_sym_common_prefix_search08(sen_sym *sym, const void *key);
33 sen_rc sen_sym_del08(sen_sym *sym, const void *key);
34 const char *_sen_sym_key08(sen_sym *sym, sen_id id);
35 int sen_sym_key08(sen_sym *sym, sen_id id, void *keybuf, int bufsize);
36 int sen_sym_pocket_get08(sen_sym *sym, sen_id id);
37 sen_rc sen_sym_pocket_set08(sen_sym *sym, sen_id id, unsigned int value);
38 int sen_sym_del_with_sis08(sen_sym *sym, sen_id id,
39 int(*func)(sen_id, void *), void *func_arg);
40
41 #define SEN_SYM_HEADER_SIZE 0x10000
42 #define SEN_IDSTR "SENNA:SYM:01.00"
43 #define SEN_SYM_DELETED (SEN_SYM_MAX_ID + 1)
44
45 #define SEN_SYM_SEGMENT_SIZE 0x400000
46 #define W_OF_KEY_IN_A_SEGMENT 22
47 #define W_OF_PAT_IN_A_SEGMENT 18
48 #define W_OF_SIS_IN_A_SEGMENT 19
49 #define KEY_MASK_IN_A_SEGMENT 0x3fffff
50 #define PAT_MASK_IN_A_SEGMENT 0x3ffff
51 #define SIS_MASK_IN_A_SEGMENT 0x7ffff
52 #define SEG_NOT_ASSIGNED 0xffff
53
54 struct sen_sym_header {
55 char idstr[16];
56 uint32_t flags;
57 sen_encoding encoding;
58 uint32_t key_size;
59 uint32_t nrecords;
60 uint32_t curr_rec;
61 int32_t curr_key;
62 int32_t curr_del;
63 int32_t curr_del2;
64 int32_t curr_del3;
65 uint32_t lock;
66 uint32_t reserved[18];
67 uint16_t keyarray[SEN_SYM_MAX_SEGMENT];
68 uint16_t patarray[SEN_SYM_MAX_SEGMENT];
69 uint16_t sisarray[SEN_SYM_MAX_SEGMENT];
70 sen_sym_delinfo delinfos[SEN_SYM_NDELINFOS];
71 sen_id garbages[(SEN_SYM_MAX_KEY_LENGTH + 1) / 8];
72 };
73
74 typedef struct {
75 sen_id r;
76 sen_id l;
77 uint32_t key;
78 uint16_t check;
79 /* deleting : 1, immediate : 1, pocket : 14 */
80 uint16_t bitfield;
81 } pat_node;
82
83 #define PAT_DELETING 1
84 #define PAT_IMMEDIATE 2
85 #define PAT_POCKET_MAX 0x3fff
86 #define PAT_DEL(x) ((x)->bitfield & PAT_DELETING)
87 #define PAT_IMD(x) ((x)->bitfield & PAT_IMMEDIATE)
88 #define PAT_PKT(x) ((x)->bitfield >> 2)
89 #define SET_PAT_DEL(x,v) ((x)->bitfield = ((x)->bitfield & 0xfffe) | (v))
90 #define SET_PAT_IMD(x,v) ((x)->bitfield = ((x)->bitfield & 0xfffd) | (v))
91 #define SET_PAT_PKT(x,v) ((x)->bitfield = ((x)->bitfield & 3) | ((v) << 2))
92
93 typedef struct {
94 sen_id children;
95 sen_id sibling;
96 } sis_node;
97
98 enum {
99 segment_empty = 0,
100 segment_key,
101 segment_pat,
102 segment_sis
103 };
104
105 /* bit operation */
106
107 /*
108 inline static uint32_t
109 nth_bit(const byte *key, uint32_t n)
110 {
111 return key[n >> 3] & (0x80 >> (n & 7));
112 }
113 */
114
115 #define nth_bit(key,n) (((key)[(n)>>3]) & (0x80 >> ((n) & 7)))
116
117 /* segment operation */
118
119 inline static sen_rc
segment_new(sen_sym * sym,int segtype)120 segment_new(sen_sym *sym, int segtype)
121 {
122 int i;
123 uint16_t seg = 0, k = 0, p = 0, s = 0;
124 char used[SEN_SYM_MAX_SEGMENT];
125 memset(used, 0, SEN_SYM_MAX_SEGMENT);
126 for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
127 if ((seg = sym->header->keyarray[i]) != SEG_NOT_ASSIGNED) {
128 k = i + 1; used[seg] = 1;
129 }
130 if ((seg = sym->header->patarray[i]) != SEG_NOT_ASSIGNED) {
131 p = i + 1; used[seg] = 1;
132 }
133 if ((seg = sym->header->sisarray[i]) != SEG_NOT_ASSIGNED) {
134 s = i + 1; used[seg] = 1;
135 }
136 }
137 for (seg = 0; used[seg];) {
138 if (++seg == SEN_SYM_MAX_SEGMENT) { return sen_memory_exhausted; }
139 }
140 switch (segtype) {
141 case segment_key :
142 sym->header->keyarray[k] = seg;
143 sym->header->curr_key = k * SEN_SYM_SEGMENT_SIZE;
144 break;
145 case segment_pat :
146 sym->header->patarray[p] = seg;
147 break;
148 case segment_sis :
149 sym->header->sisarray[s] = seg;
150 break;
151 }
152 return sen_success;
153 }
154
155 #define SEG_LOAD(io,segno,addr)\
156 {\
157 if (segno < SEN_SYM_MAX_SEGMENT) {\
158 SEN_IO_SEG_REF(io, segno, addr);\
159 if (addr) { SEN_IO_SEG_UNREF(io, segno); };\
160 }\
161 }
162
163 /* patricia array operation */
164
165 inline static pat_node *
pat_at(sen_sym * sym,sen_id id)166 pat_at(sen_sym *sym, sen_id id)
167 {
168 uint16_t lseg, pseg;
169 void **p;
170 if (id > SEN_SYM_MAX_ID) { return NULL; }
171 lseg = id >> W_OF_PAT_IN_A_SEGMENT;
172 p = &sym->pataddrs[lseg];
173 if (!*p) {
174 if ((pseg = sym->header->patarray[lseg]) == SEG_NOT_ASSIGNED) { return NULL; }
175 SEG_LOAD(sym->io, pseg, *p);
176 if (!*p) { return NULL; }
177 }
178 return (pat_node *)(((byte *)*p) + ((id & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));
179 }
180
181 #define PAT_AT(sym,id,n) do {\
182 void **p_;\
183 sen_id id_ = (id);\
184 uint16_t lseg_ = id_ >> W_OF_PAT_IN_A_SEGMENT;\
185 if (id_ > SEN_SYM_MAX_ID) { (n) = NULL; break; }\
186 p_ = &(sym)->pataddrs[lseg_];\
187 if (!*p_) {\
188 uint16_t pseg_ = (sym)->header->patarray[lseg_];\
189 if (pseg_ == SEG_NOT_ASSIGNED) { (n) = NULL; break; }\
190 SEG_LOAD((sym)->io, pseg_, *p_);\
191 if (!*p_) { (n) = NULL; break; }\
192 }\
193 (n) = (pat_node *)(((byte *)*p_) + ((id_ & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));\
194 } while(0)
195
196 inline static pat_node *
pat_get(sen_sym * sym,sen_id id)197 pat_get(sen_sym *sym, sen_id id)
198 {
199 uint16_t lseg, pseg;
200 void **p;
201 if (id > SEN_SYM_MAX_ID) { return NULL; }
202 lseg = id >> W_OF_PAT_IN_A_SEGMENT;
203 p = &sym->pataddrs[lseg];
204 if (!*p) {
205 while ((pseg = sym->header->patarray[lseg]) == SEG_NOT_ASSIGNED) {
206 segment_new(sym, segment_pat);
207 }
208 SEG_LOAD(sym->io, pseg, *p);
209 if (!*p) { return NULL; }
210 }
211 return (pat_node *)(((byte *)*p) + ((id & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));
212 }
213
214 inline static pat_node *
pat_node_new(sen_sym * sym,sen_id * id)215 pat_node_new(sen_sym *sym, sen_id *id)
216 {
217 uint32_t n = sym->header->curr_rec + 1;
218 pat_node *res;
219 if (n > SEN_SYM_MAX_ID) { return NULL; }
220 if ((res = pat_get(sym, n))) {
221 sym->header->curr_rec = n;
222 sym->header->nrecords++;
223 }
224 if (id) { *id = n; }
225 return res;
226 }
227
228 /* sis operation */
229
230 inline static sis_node *
sis_at(sen_sym * sym,sen_id id)231 sis_at(sen_sym *sym, sen_id id)
232 {
233 uint16_t lseg, pseg;
234 void **p;
235 if (id > SEN_SYM_MAX_ID) { return NULL; }
236 lseg = id >> W_OF_SIS_IN_A_SEGMENT;
237 p = &sym->sisaddrs[lseg];
238 if (!*p) {
239 if ((pseg = sym->header->sisarray[lseg]) == SEG_NOT_ASSIGNED) { return NULL; }
240 SEG_LOAD(sym->io, pseg, *p);
241 if (!*p) { return NULL; }
242 }
243 return (sis_node *)(((byte *)*p) + ((id & SIS_MASK_IN_A_SEGMENT) * sizeof(sis_node)));
244 }
245
246 inline static sis_node *
sis_get(sen_sym * sym,sen_id id)247 sis_get(sen_sym *sym, sen_id id)
248 {
249 uint16_t lseg, pseg;
250 void **p;
251 if (id > SEN_SYM_MAX_ID) { return NULL; }
252 lseg = id >> W_OF_SIS_IN_A_SEGMENT;
253 p = &sym->sisaddrs[lseg];
254 if (!*p) {
255 while ((pseg = sym->header->sisarray[lseg]) == SEG_NOT_ASSIGNED) {
256 segment_new(sym, segment_sis);
257 }
258 SEG_LOAD(sym->io, pseg, *p);
259 if (!*p) { return NULL; }
260 }
261 return (sis_node *)(((byte *)*p) + ((id & SIS_MASK_IN_A_SEGMENT) * sizeof(sis_node)));
262 }
263
264 #define MAX_LEVEL 16
265
266 static void
sis_collect(sen_sym * sym,sen_set * h,sen_id id,uint32_t level)267 sis_collect(sen_sym *sym, sen_set *h, sen_id id, uint32_t level)
268 {
269 uint32_t *offset;
270 sis_node *sl = sis_at(sym, id);
271 if (sl) {
272 sen_id sid = sl->children;
273 while (sid && sid != id) {
274 if (sen_set_get(h, &sid, (void **) &offset)) {
275 *offset = level;
276 if (level < MAX_LEVEL) { sis_collect(sym, h, sid, level + 1); }
277 if (!(sl = sis_at(sym, sid))) { break; }
278 sid = sl->sibling;
279 } else {
280 /* todo : must be handled */
281 }
282 }
283 }
284 }
285
286 /* key operation */
287
288 #define KEY_AT(sym,pos,ptr) \
289 {\
290 uint16_t lseg, pseg;\
291 void **p;\
292 lseg = pos >> W_OF_KEY_IN_A_SEGMENT;\
293 p = &sym->keyaddrs[lseg];\
294 if (*p) {\
295 ptr = ((byte *)*p) + (pos & KEY_MASK_IN_A_SEGMENT);\
296 } else {\
297 if ((pseg = sym->header->keyarray[lseg]) == SEG_NOT_ASSIGNED) {\
298 ptr = NULL;\
299 } else {\
300 SEG_LOAD(sym->io, pseg, *p);\
301 ptr = *p ? ((byte *)*p) + (pos & KEY_MASK_IN_A_SEGMENT) : NULL;\
302 }\
303 }\
304 }
305
306 inline static uint32_t
key_put(sen_sym * sym,const uint8_t * key,int len)307 key_put(sen_sym *sym, const uint8_t *key, int len)
308 {
309 int lpos;
310 uint32_t res = sym->header->curr_key;
311 if (len >= SEN_SYM_SEGMENT_SIZE) { return 0; }
312 lpos = res & KEY_MASK_IN_A_SEGMENT;
313 if (lpos + len > SEN_SYM_SEGMENT_SIZE || !lpos) {
314 if (segment_new(sym, segment_key)) { return 0; }
315 res = sym->header->curr_key;
316 }
317 {
318 uint8_t *dest;
319 KEY_AT(sym, res, dest);
320 if (!dest) { return 0; }
321 memcpy(dest, key, len);
322 }
323 sym->header->curr_key += len;
324 return res;
325 }
326
327 inline static uint8_t *
pat_node_get_key(sen_sym * sym,pat_node * n)328 pat_node_get_key(sen_sym *sym, pat_node *n)
329 {
330 if (PAT_IMD(n)) {
331 return (uint8_t *) &n->key;
332 } else {
333 uint8_t *res;
334 KEY_AT(sym, n->key, res);
335 return res;
336 }
337 }
338
339 inline static sen_rc
pat_node_set_key(sen_sym * sym,pat_node * n,const uint8_t * key,unsigned int len)340 pat_node_set_key(sen_sym *sym, pat_node *n, const uint8_t *key, unsigned int len)
341 {
342 if (!key || !len) { return sen_invalid_argument; }
343 if (len <= sizeof(uint32_t)) {
344 SET_PAT_IMD(n, PAT_IMMEDIATE);
345 memcpy(&n->key, key, len);
346 } else {
347 SET_PAT_IMD(n, 0);
348 n->key = key_put(sym, key, len);
349 }
350 return sen_success;
351 }
352
353 /* delinfo operation */
354
355 #define DL_EMPTY 0x00000000
356 #define DL_PHASE1 0x00000001
357 #define DL_PHASE2 0x00000002
358 #define DL_STMASK 0x00000003
359 #define DL_SHARED 0x80000000
360 #define DL_LD(x) (((x)->bitfield >> 2) & SEN_SYM_MAX_ID)
361 #define DL_ST(x) ((x)->bitfield & DL_STMASK)
362 #define DL_SH(x) ((x)->bitfield & DL_SHARED)
363 #define SET_DL_LD(x,v) ((x)->bitfield = ((x)->bitfield & (DL_STMASK|DL_SHARED)) | \
364 (((v) & SEN_SYM_MAX_ID) << 2))
365 #define SET_DL_ST(x,v) ((x)->bitfield = ((x)->bitfield & ~DL_STMASK) | ((v) & DL_STMASK))
366 #define SET_DL_SH(x,v) ((x)->bitfield = ((x)->bitfield & ~DL_SHARED) | ((v) & DL_SHARED))
367
368 inline static sen_sym_delinfo *
delinfo_search(sen_sym * sym,sen_id id)369 delinfo_search(sen_sym *sym, sen_id id)
370 {
371 int i;
372 sen_sym_delinfo *di;
373 for (i = (sym->header->curr_del2) & SEN_SYM_MDELINFOS;
374 i != sym->header->curr_del;
375 i = (i + 1) & SEN_SYM_MDELINFOS) {
376 di = &sym->header->delinfos[i];
377 if (DL_ST(di) != DL_PHASE1) { continue; }
378 if (DL_LD(di) == id) { return di; }
379 if (di->d == id) { return di; }
380 }
381 return NULL;
382 }
383
384 inline static sen_rc
delinfo_turn_2(sen_sym * sym,sen_sym_delinfo * di)385 delinfo_turn_2(sen_sym *sym, sen_sym_delinfo *di)
386 {
387 sen_id d, *p = NULL;
388 pat_node *ln, *dn;
389 // sen_log("delinfo_turn_2> di->d=%d di->ld=%d stat=%d", di->d, DL_LD(di), DL_ST(di));
390 if (DL_ST(di) != DL_PHASE1) { return sen_success; }
391 PAT_AT(sym, DL_LD(di), ln);
392 if (!ln) { return sen_invalid_argument; }
393 if (!(d = di->d)) { return sen_invalid_argument; }
394 PAT_AT(sym, d, dn);
395 if (!dn) { return sen_invalid_argument; }
396 SET_PAT_DEL(ln, 0);
397 SET_PAT_DEL(dn, 0);
398 {
399 sen_id r, *p0;
400 pat_node *rn;
401 int c;
402 size_t len = sym->key_size * 8;
403 const uint8_t *key = pat_node_get_key(sym, dn);
404 if (!key) { return sen_invalid_argument; }
405 if (!len) { len = (strlen((char *)key) + 1) * 8; }
406 PAT_AT(sym, 0, rn);
407 c = -1;
408 p0 = &rn->r;
409 while ((r = *p0)) {
410 if (r == d) {
411 p = p0;
412 break;
413 }
414 PAT_AT(sym, r, rn);
415 if (!rn) { return sen_invalid_format; }
416 if (rn->check <= c || len <= rn->check) { break; }
417 c = rn->check;
418 p0 = nth_bit((uint8_t *)key, c) ? &rn->r : &rn->l;
419 }
420 }
421 if (p) {
422 ln->check = dn->check;
423 ln->r = dn->r;
424 ln->l = dn->l;
425 *p = DL_LD(di);
426 } else {
427 /* debug */
428 int j;
429 sen_id dd;
430 sen_sym_delinfo *ddi;
431 SEN_LOG(sen_log_debug, "failed to find d=%d", d);
432 for (j = (sym->header->curr_del2 + 1) & SEN_SYM_MDELINFOS;
433 j != sym->header->curr_del;
434 j = (j + 1) & SEN_SYM_MDELINFOS) {
435 ddi = &sym->header->delinfos[j];
436 if (DL_ST(ddi) != DL_PHASE1) { continue; }
437 PAT_AT(sym, DL_LD(ddi), ln);
438 if (!ln) { continue; }
439 if (!(dd = ddi->d)) { continue; }
440 if (d == DL_LD(ddi)) {
441 SEN_LOG(sen_log_debug, "found!!!, d(%d) become ld of (%d)", d, dd);
442 }
443 }
444 /* debug */
445 }
446 SET_DL_ST(di, DL_PHASE2);
447 di->d = d;
448 // sen_log("delinfo_turn_2< di->d=%d di->ld=%d", di->d, DL_LD(di));
449 return sen_success;
450 }
451
452 inline static sen_rc
delinfo_turn_3(sen_sym * sym,sen_sym_delinfo * di)453 delinfo_turn_3(sen_sym *sym, sen_sym_delinfo *di)
454 {
455 pat_node *dn;
456 uint32_t size;
457 if (DL_ST(di) != DL_PHASE2) { return sen_success; }
458 PAT_AT(sym, di->d, dn);
459 if (!dn) { return sen_invalid_argument; }
460 if (DL_SH(di)) {
461 SET_PAT_IMD(dn, PAT_IMMEDIATE);
462 size = 0;
463 } else {
464 if (sym->key_size) {
465 size = sym->key_size;
466 } else {
467 // size = dn->check = (uint16_t)(strlen(key) + 1);
468 if (PAT_IMD(dn)) {
469 size = 0;
470 } else {
471 const uint8_t *key = pat_node_get_key(sym, dn);
472 if (!key) { return sen_invalid_argument; }
473 size = strlen((char *)key) + 1;
474 }
475 }
476 }
477 SET_DL_ST(di, DL_EMPTY);
478 dn->r = SEN_SYM_DELETED;
479 dn->l = sym->header->garbages[size];
480 sym->header->garbages[size] = di->d;
481 return sen_success;
482 }
483
484 inline static sen_sym_delinfo *
delinfo_new(sen_sym * sym)485 delinfo_new(sen_sym *sym)
486 {
487 sen_sym_delinfo *res = &sym->header->delinfos[sym->header->curr_del];
488 uint32_t n = (sym->header->curr_del + 1) & SEN_SYM_MDELINFOS;
489 int gap = ((n + SEN_SYM_NDELINFOS - sym->header->curr_del2) & SEN_SYM_MDELINFOS)
490 - (SEN_SYM_NDELINFOS / 2);
491 while (gap-- > 0) {
492 if (delinfo_turn_2(sym, &sym->header->delinfos[sym->header->curr_del2])) {
493 SEN_LOG(sen_log_crit, "d2 failed: %d", DL_LD(&sym->header->delinfos[sym->header->curr_del2]));
494 }
495 sym->header->curr_del2 = (sym->header->curr_del2 + 1) & SEN_SYM_MDELINFOS;
496 }
497 if (n == sym->header->curr_del3) {
498 if (delinfo_turn_3(sym, &sym->header->delinfos[sym->header->curr_del3])) {
499 SEN_LOG(sen_log_crit, "d3 failed: %d", DL_LD(&sym->header->delinfos[sym->header->curr_del3]));
500 }
501 sym->header->curr_del3 = (sym->header->curr_del3 + 1) & SEN_SYM_MDELINFOS;
502 }
503 sym->header->curr_del = n;
504 return res;
505 }
506
507 /* sym operation */
508
509 sen_sym *
sen_sym_create(const char * path,uint32_t key_size,uint32_t flags,sen_encoding encoding)510 sen_sym_create(const char *path, uint32_t key_size,
511 uint32_t flags, sen_encoding encoding)
512 {
513 int i;
514 sen_io *io;
515 sen_sym *sym;
516 pat_node *node0;
517 struct sen_sym_header *header;
518 ERRCLR(NULL);
519 if (flags & 0x70000) {
520 return sen_sym_create08(path, key_size, flags, encoding);
521 }
522 io = sen_io_create(path, SEN_SYM_HEADER_SIZE, SEN_SYM_SEGMENT_SIZE,
523 SEN_SYM_MAX_SEGMENT, sen_io_auto, SEN_SYM_MAX_SEGMENT);
524 if (!io) { return NULL; }
525 if (encoding == sen_enc_default) { encoding = sen_gctx.encoding; }
526 header = sen_io_header(io);
527 memcpy(header->idstr, SEN_IDSTR, 16);
528 header->flags = flags;
529 header->encoding = encoding;
530 header->key_size = key_size;
531 header->nrecords = 0;
532 header->curr_rec = 0;
533 header->curr_key = -1;
534 header->curr_del = 0;
535 header->curr_del2 = 0;
536 header->curr_del3 = 0;
537 header->lock = 0;
538 for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
539 header->keyarray[i] = SEG_NOT_ASSIGNED;
540 header->patarray[i] = SEG_NOT_ASSIGNED;
541 header->sisarray[i] = SEG_NOT_ASSIGNED;
542 }
543 if (!(sym = SEN_GMALLOC(sizeof(sen_sym)))) {
544 sen_io_close(io);
545 return NULL;
546 }
547 sym->v08p = 0;
548 sym->io = io;
549 sym->header = header;
550 sym->key_size = key_size;
551 sym->encoding = encoding;
552 sym->flags = flags;
553 sym->lock = &header->lock;
554 for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
555 sym->keyaddrs[i] = NULL;
556 sym->pataddrs[i] = NULL;
557 sym->sisaddrs[i] = NULL;
558 }
559 if (!(node0 = pat_get(sym, 0))) {
560 sen_io_close(io);
561 SEN_GFREE(sym);
562 return NULL;
563 }
564 node0->r = 0;
565 node0->l = 0;
566 node0->key = 0;
567 return sym;
568 }
569
570 sen_sym *
sen_sym_open(const char * path)571 sen_sym_open(const char *path)
572 {
573 int i;
574 sen_io *io;
575 sen_sym *sym;
576 pat_node *r0;
577 struct sen_sym_header *header;
578 ERRCLR(NULL);
579 io = sen_io_open(path, sen_io_auto, 8192);
580 if (!io) { return NULL; }
581 header = sen_io_header(io);
582 if (memcmp(header->idstr, SEN_IDSTR, 16)) {
583 SEN_LOG(sen_log_notice, "sym_idstr (%s)", header->idstr);
584 sen_io_close(io);
585 return sen_sym_open08(path);
586 }
587 if (!(sym = SEN_GMALLOC(sizeof(sen_sym)))) {
588 sen_io_close(io);
589 return NULL;
590 }
591 sym->v08p = 0;
592 sym->io = io;
593 sym->header = header;
594 sym->key_size = header->key_size;
595 sym->encoding = header->encoding;
596 sym->flags = header->flags;
597 sym->lock = &header->lock;
598 for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
599 sym->keyaddrs[i] = NULL;
600 sym->pataddrs[i] = NULL;
601 sym->sisaddrs[i] = NULL;
602 }
603 PAT_AT(sym, 0, r0);
604 if (!r0) {
605 sen_io_close(io);
606 SEN_GFREE(sym);
607 return NULL;
608 }
609 return sym;
610 }
611
612 sen_rc
sen_sym_close(sen_sym * sym)613 sen_sym_close(sen_sym *sym)
614 {
615 sen_rc rc;
616 if (!sym) { return sen_invalid_argument; }
617 rc = sen_io_close(sym->io);
618 SEN_GFREE(sym);
619 return rc;
620 }
621
622 sen_rc
sen_sym_remove(const char * path)623 sen_sym_remove(const char *path)
624 {
625 ERRCLR(NULL);
626 if (!path) { return sen_invalid_argument; }
627 return sen_io_remove(path);
628 }
629
630 inline static sen_id
_sen_sym_get(sen_sym * sym,const uint8_t * key,uint32_t * new,uint32_t * lkey)631 _sen_sym_get(sen_sym *sym, const uint8_t *key, uint32_t *new, uint32_t *lkey)
632 {
633 sen_id r, r0, *p0, *p1 = NULL;
634 pat_node *rn, *rn0;
635 int c = -1, c0 = -1, c1 = -1, len;
636 size_t size = sym->key_size;
637 *new = 0;
638 if (!size) { size = strlen((char *)key) + 1; }
639 len = (int)size * 8;
640 if (len > SEN_SYM_MAX_KEY_LENGTH) { return SEN_SYM_NIL; }
641 PAT_AT(sym, 0, rn0);
642 p0 = &rn0->r;
643 if (*p0) {
644 char xor, mask;
645 const uint8_t *s, *d;
646 for (;;) {
647 if (!(r0 = *p0)) {
648 if (!(s = pat_node_get_key(sym, rn0))) { return 0; }
649 break;
650 }
651 PAT_AT(sym, r0, rn0);
652 if (!rn0) { return 0; }
653 if (c0 < rn0->check && rn0->check < len) {
654 c1 = c0; c0 = rn0->check;
655 p1 = p0; p0 = nth_bit(key, c0) ? &rn0->r : &rn0->l;
656 } else {
657 if (!(s = pat_node_get_key(sym, rn0))) { return 0; }
658 if (rn0->check < len && !memcmp(s, key, size)) { return r0; }
659 break;
660 }
661 }
662 for (c = 0, d = key; *s == *d; c += 8, s++, d++);
663 for (xor = *s ^ *d, mask = 0x80; !(xor & mask); mask >>= 1, c++);
664 /* for test
665 e if (r0 && r0 != sen_sym_at(sym, pat_node_get_key(sym, rn0))) {
666 SEN_LOG(sen_log_debug, "deleted node is used as ld %d(%d:%s) %d", r0, PAT_DEL(rn0), pat_node_get_key(sym, rn0), sen_sym_at(sym, pat_node_get_key(sym, rn0)));
667 }
668 */
669 if (c == c0 && !*p0) {
670 if (c < len - 1) { c++; }
671 } else {
672 if (c < c0) {
673 if (c > c1) {
674 p0 = p1;
675 } else {
676 PAT_AT(sym, 0, rn0);
677 p0 = &rn0->r;
678 while ((r0 = *p0)) {
679 PAT_AT(sym, r0, rn0);
680 if (!rn0) { return 0; }
681 if (c < rn0->check) { break; }
682 p0 = nth_bit(key, rn0->check) ? &rn0->r : &rn0->l;
683 }
684 }
685 }
686 }
687 if (c >= len) { return 0; }
688 } else {
689 c = len - 1;
690 }
691 {
692 size_t size2 = size > sizeof(uint32_t) ? size : 0;
693 if (*lkey && size2) {
694 if (sym->header->garbages[0]) {
695 r = sym->header->garbages[0];
696 sym->header->nrecords++;
697 PAT_AT(sym, r, rn);
698 if (!rn) { return 0; }
699 sym->header->garbages[0] = rn->l;
700 } else {
701 if (!(rn = pat_node_new(sym, &r))) { return 0; }
702 }
703 SET_PAT_IMD(rn, 0);
704 rn->key = *lkey;
705 } else {
706 if (sym->header->garbages[size2]) {
707 uint8_t *keybuf;
708 r = sym->header->garbages[size2];
709 sym->header->nrecords++;
710 PAT_AT(sym, r, rn);
711 if (!rn) { return 0; }
712 sym->header->garbages[size2] = rn->l;
713 if (!(keybuf = pat_node_get_key(sym, rn))) { return 0; }
714 memcpy(keybuf, key, size);
715 } else {
716 if (!(rn = pat_node_new(sym, &r))) { return 0; }
717 pat_node_set_key(sym, rn, key, (unsigned int)size);
718 }
719 *lkey = rn->key;
720 }
721 }
722 rn->check = c;
723 SET_PAT_DEL(rn, 0);
724 SET_PAT_PKT(rn, 0);
725 if (nth_bit(key, c)) {
726 rn->r = r;
727 rn->l = *p0;
728 } else {
729 rn->r = *p0;
730 rn->l = r;
731 }
732 *p0 = r;
733 *new = 1;
734 return r;
735 }
736
737 inline static int
chop(sen_sym * sym,const char ** key,uint32_t * lkey)738 chop(sen_sym *sym, const char **key, uint32_t *lkey)
739 {
740 size_t len = sen_str_charlen(*key, sym->encoding);
741 if (len) {
742 *lkey += len;
743 *key += len;
744 return **key;
745 } else {
746 return 0;
747 }
748 }
749
750 sen_id
sen_sym_get(sen_sym * sym,const void * key)751 sen_sym_get(sen_sym *sym, const void *key)
752 {
753 uint32_t new, lkey = 0;
754 sen_id r0;
755 if (!sym || !key) { return SEN_SYM_NIL; }
756 if (sym->v08p) { return sen_sym_get08(sym, key); }
757 r0 = _sen_sym_get(sym, (uint8_t *)key, &new, &lkey);
758 if (r0 && (sym->flags & SEN_SYM_WITH_SIS) &&
759 (*((uint8_t *)key) & 0x80)) { // todo: refine!!
760 sis_node *sl, *sr;
761 sen_id l = r0, r;
762 if (new && (sl = sis_get(sym, l))) {
763 const char *sis = key;
764 sl->children = l;
765 sl->sibling = 0;
766 while (chop(sym, &sis, &lkey)) {
767 if (!(*sis & 0x80)) { break; }
768 if (!(r = _sen_sym_get(sym, (uint8_t *)sis, &new, &lkey))) { break; }
769 if (!(sr = sis_get(sym, r))) { break; }
770 if (new) {
771 sl->sibling = r;
772 sr->children = l;
773 sr->sibling = 0;
774 } else {
775 sl->sibling = sr->children;
776 sr->children = l;
777 break;
778 }
779 l = r;
780 sl = sr;
781 }
782 }
783 }
784 return r0;
785 }
786
787 sen_id
sen_sym_at(sen_sym * sym,const void * key)788 sen_sym_at(sen_sym *sym, const void *key)
789 {
790 sen_id r;
791 pat_node *rn;
792 int c = -1;
793 size_t len;
794 if (!sym || !key) { return SEN_SYM_NIL; }
795 if (sym->v08p) { return sen_sym_at08(sym, key); }
796 len = (sym->key_size ? sym->key_size : strlen(key) + 1) * 8;
797 PAT_AT(sym, 0, rn);
798 for (r = rn->r; r; r = nth_bit((uint8_t *)key, c) ? rn->r : rn->l) {
799 PAT_AT(sym, r, rn);
800 if (!rn) { break; /* corrupt? */ }
801 if (len <= rn->check) { break; }
802 if (rn->check <= c) {
803 const uint8_t *k = pat_node_get_key(sym, rn);
804 if (!k) { break; }
805 if (!memcmp(k, key, len >> 3)) { return r; }
806 break;
807 }
808 c = rn->check;
809 }
810 return SEN_SYM_NIL;
811 }
812
813 sen_id
sen_sym_nextid(sen_sym * sym,const void * key)814 sen_sym_nextid(sen_sym *sym, const void *key)
815 {
816 sen_id r = SEN_SYM_NIL;
817 if (sym && key) {
818 size_t size = sym->key_size;
819 if (!size) { size = strlen((char *)key) + 1; }
820 if (!(r = sym->header->garbages[size > sizeof(uint32_t) ? size : 0])) {
821 r = sym->header->curr_rec + 1;
822 }
823 }
824 return r;
825 }
826
827 static void
get_tc(sen_sym * sym,sen_set * h,pat_node * rn,unsigned int len)828 get_tc(sen_sym *sym, sen_set *h, pat_node *rn, unsigned int len)
829 {
830 sen_id id;
831 pat_node *node;
832 id = rn->r;
833 if (id) {
834 PAT_AT(sym, id, node);
835 if (node) {
836 if (node->check > rn->check) {
837 get_tc(sym, h, node, len);
838 } else {
839 sen_set_get(h, &id, NULL);
840 }
841 }
842 }
843 id = rn->l;
844 if (id) {
845 PAT_AT(sym, id, node);
846 if (node) {
847 if (node->check > rn->check) {
848 get_tc(sym, h, node, len);
849 } else {
850 sen_set_get(h, &id, NULL);
851 }
852 }
853 }
854 }
855
856 sen_rc
sen_sym_prefix_search_with_set(sen_sym * sym,const void * key,sen_set * h)857 sen_sym_prefix_search_with_set(sen_sym *sym, const void *key, sen_set *h)
858 {
859 int c = -1;
860 const uint8_t *k;
861 size_t len = strlen(key) * 8;
862 sen_id r;
863 pat_node *rn;
864 PAT_AT(sym, 0, rn);
865 r = rn->r;
866 while (r) {
867 PAT_AT(sym, r, rn);
868 if (!rn) { return sen_invalid_format; }
869 if (c < rn->check && len > rn->check) {
870 c = rn->check;
871 r = nth_bit((uint8_t *)key, c) ? rn->r : rn->l;
872 continue;
873 }
874 if (!(k = pat_node_get_key(sym, rn))) { break; }
875 if (!memcmp(k, key, len >> 3)) {
876 if (rn->check >= len) {
877 get_tc(sym, h, rn, (unsigned int)len);
878 } else {
879 sen_set_get(h, &r, NULL);
880 }
881 return sen_success;
882 }
883 break;
884 }
885 return sen_end_of_data;
886 }
887
888 sen_set *
sen_sym_prefix_search(sen_sym * sym,const void * key)889 sen_sym_prefix_search(sen_sym *sym, const void *key)
890 {
891 sen_set *h;
892 ERRCLR(NULL);
893 if (!sym || !key || sym->key_size) { return NULL; } /* only for string */
894 if (sym->v08p) { return sen_sym_prefix_search08(sym, key); }
895 if ((h = sen_set_open(sizeof(sen_id), 0, 0))) {
896 if (sen_sym_prefix_search_with_set(sym, key, h)) {
897 sen_set_close(h);
898 h = NULL;
899 }
900 }
901 return h;
902 }
903
904 sen_rc
sen_sym_suffix_search_with_set(sen_sym * sym,const void * key,sen_set * h)905 sen_sym_suffix_search_with_set(sen_sym *sym, const void *key, sen_set *h)
906 {
907 sen_id r;
908 uint32_t *offset;
909 if ((r = sen_sym_at(sym, key)) && sen_set_get(h, &r, (void **) &offset)) {
910 *offset = 0;
911 if (sym->flags & SEN_SYM_WITH_SIS) { sis_collect(sym, h, r, 1); }
912 return sen_success;
913 }
914 return sen_internal_error;
915 }
916
917 sen_set *
sen_sym_suffix_search(sen_sym * sym,const void * key)918 sen_sym_suffix_search(sen_sym *sym, const void *key)
919 {
920 sen_set *h;
921 ERRCLR(NULL);
922 if (!sym || !key || sym->key_size) { return NULL; }
923 if (sym->v08p) { return sen_sym_suffix_search08(sym, key); }
924 if ((h = sen_set_open(sizeof(sen_id), sizeof(uint32_t), 0))) {
925 if (sen_sym_suffix_search_with_set(sym, key, h)) {
926 sen_set_close(h);
927 h = NULL;
928 }
929 }
930 return h;
931 }
932
933 sen_id
sen_sym_common_prefix_search(sen_sym * sym,const void * key)934 sen_sym_common_prefix_search(sen_sym *sym, const void *key)
935 {
936 pat_node *rn, *rnl;
937 sen_id r, rl, r2 = SEN_SYM_NIL;
938 int c = -1, cl, len = 0, n, len2 = 8, nth_bit;
939 if (!sym || !key || sym->key_size) { return SEN_SYM_NIL; } /* only for string */
940 if (sym->v08p) { return sen_sym_common_prefix_search08(sym, key); }
941 // len = (strlen(key) + 1) * 8;
942 PAT_AT(sym, 0, rn);
943 for (r = rn->r; r;) {
944 PAT_AT(sym, r, rn);
945 if (!rn) { break; /* corrupt? */ }
946 // if (len <= rn->check) { break; }
947 if (rn->check <= c) {
948 uint8_t *p = pat_node_get_key(sym, rn);
949 if (!p) { break; }
950 if (!memcmp(p, key, strlen((char *)p))) { return r; }
951 break;
952 }
953 c = rn->check;
954 n = c >> 3;
955 while (len < n) { if (!((char *)key)[len++]) { goto exit; } }
956 nth_bit = ((uint8_t *)key)[n] & (0x80 >> (c & 7));
957 if (nth_bit) {
958 if (len2 <= c) {
959 len2 = (n + 1) * 8;
960 for (cl = c, rl = rn->l; rl; cl = rnl->check, rl = rnl->l) {
961 PAT_AT(sym, rl, rnl);
962 if (!rnl) { break; /* corrupt? */ }
963 if (len2 <= rnl->check) { break; }
964 if (rnl->check <= cl) {
965 uint8_t *p = pat_node_get_key(sym, rnl);
966 if (!p) { break; }
967 if (!p[n] && !memcmp(p, key, n)) { r2 = rl; }
968 break;
969 }
970 }
971 }
972 r = rn->r;
973 } else {
974 r = rn->l;
975 }
976 }
977 exit :
978 return r2;
979 }
980
981 inline static sen_rc
_sen_sym_del(sen_sym * sym,const char * key,int shared)982 _sen_sym_del(sen_sym *sym, const char *key, int shared)
983 {
984 sen_sym_delinfo *di;
985 uint8_t direction;
986 pat_node *rn, *rn0 = NULL, *rno;
987 int c, c0 = -1;
988 size_t len = sym->key_size * 8;
989 sen_id r, otherside, *p, *p0 = NULL;
990 if (!key) { return sen_invalid_argument; }
991 if (!len) { len = (strlen(key) + 1) * 8; }
992 di = delinfo_new(sym); /* must be called before find rn */
993 SET_DL_SH(di, shared ? DL_SHARED : 0);
994 PAT_AT(sym, 0, rn);
995 c = -1;
996 p = &rn->r;
997 for (;;) {
998 if (!(r = *p)) { return sen_invalid_argument; }
999 PAT_AT(sym, r, rn);
1000 if (!rn) { return sen_invalid_format; }
1001 if (len <= rn->check) { return sen_invalid_argument; }
1002 if (c >= rn->check) {
1003 const uint8_t *k = pat_node_get_key(sym, rn);
1004 if (!k) { return sen_invalid_argument; }
1005 if (!memcmp(k, key, len >> 3)) {
1006 break; /* found */
1007 } else { return sen_invalid_argument; }
1008 }
1009 c0 = c; c = rn->check;
1010 p0 = p; p = nth_bit((uint8_t *)key, c) ? &rn->r : &rn->l;
1011 rn0 = rn;
1012 }
1013 direction = (rn0->r == r);
1014 otherside = direction ? rn0->l : rn0->r;
1015 if (rn == rn0) {
1016 SET_DL_ST(di, DL_PHASE2);
1017 di->d = r;
1018 if (otherside) {
1019 PAT_AT(sym, otherside, rno);
1020 if (rno && c0 < rno->check && rno->check <= c) {
1021 if (!delinfo_search(sym, otherside)) {
1022 SEN_LOG(sen_log_error, "no delinfo found %d", otherside);
1023 }
1024 rno->check = 0;
1025 }
1026 }
1027 *p0 = otherside;
1028 } else {
1029 sen_sym_delinfo *ldi = NULL, *ddi = NULL;
1030 if (PAT_DEL(rn)) { ldi = delinfo_search(sym, r); }
1031 if (PAT_DEL(rn0)) { ddi = delinfo_search(sym, *p0); }
1032 if (ldi) {
1033 SET_PAT_DEL(rn, 0);
1034 SET_DL_ST(di, DL_PHASE2);
1035 if (ddi) {
1036 SET_PAT_DEL(rn0, 0);
1037 SET_DL_ST(ddi, DL_PHASE2);
1038 if (ddi == ldi) {
1039 if (r != DL_LD(ddi)) {
1040 SEN_LOG(sen_log_error, "r(%d) != DL_LD(ddi)(%d)", r, DL_LD(ddi));
1041 }
1042 di->d = r;
1043 } else {
1044 SET_DL_LD(ldi, DL_LD(ddi));
1045 di->d = r;
1046 }
1047 } else {
1048 SET_PAT_DEL(rn0, PAT_DELETING);
1049 SET_DL_LD(ldi, *p0);
1050 di->d = r;
1051 }
1052 } else {
1053 SET_PAT_DEL(rn, PAT_DELETING);
1054 if (ddi) {
1055 if (ddi->d != *p0) {
1056 SEN_LOG(sen_log_error, "ddi->d(%d) != *p0(%d)", ddi->d, *p0);
1057 }
1058 SET_PAT_DEL(rn0, 0);
1059 SET_DL_ST(ddi, DL_PHASE2);
1060 SET_DL_ST(di, DL_PHASE1);
1061 SET_DL_LD(di, DL_LD(ddi));
1062 di->d = r;
1063 /*
1064 SET_PAT_DEL(rn0, 0);
1065 ddi->d = r;
1066 SET_DL_ST(di, DL_PHASE2);
1067 di->d = *p0;
1068 */
1069 } else {
1070 SET_PAT_DEL(rn0, PAT_DELETING);
1071 SET_DL_ST(di, DL_PHASE1);
1072 SET_DL_LD(di, *p0);
1073 di->d = r;
1074 // sen_log("sym_del d=%d ld=%d stat=%d", r, *p0, DL_PHASE1);
1075 }
1076 }
1077 if (*p0 == otherside) {
1078 rn0->check = 0;
1079 } else {
1080 if (otherside) {
1081 PAT_AT(sym, otherside, rno);
1082 if (rno && c0 < rno->check && rno->check <= c) {
1083 if (!delinfo_search(sym, otherside)) {
1084 SEN_LOG(sen_log_error, "no delinfo found %d", otherside);
1085 }
1086 rno->check = 0;
1087 }
1088 }
1089 *p0 = otherside;
1090 }
1091 }
1092 sym->header->nrecords--;
1093 return sen_success;
1094 }
1095
1096 sen_rc
sen_sym_del(sen_sym * sym,const void * key)1097 sen_sym_del(sen_sym *sym, const void *key)
1098 {
1099 if (!sym) { return sen_invalid_argument; }
1100 if (sym->v08p) { return sen_sym_del08(sym, key); }
1101 if (sym->flags & SEN_SYM_WITH_SIS) {
1102 SEN_LOG(sen_log_warning, "use sen_sym_del_with_sis for sym with SEN_SYM_WITH_SIS");
1103 }
1104 if (sym->flags & SEN_INDEX_SHARED_LEXICON) {
1105 sen_id id = sen_sym_at(sym, key);
1106 if (id) {
1107 pat_node *rn;
1108 PAT_AT(sym, id, rn);
1109 if (rn && rn->bitfield >= 1 << 2) { return sen_success; }
1110 }
1111 }
1112 return _sen_sym_del(sym, key, 0);
1113 }
1114
1115 uint32_t
sen_sym_size(sen_sym * sym)1116 sen_sym_size(sen_sym *sym)
1117 {
1118 if (!sym) { return sen_invalid_argument; }
1119 return sym->header->nrecords;
1120 }
1121
1122 const char *
_sen_sym_key(sen_sym * sym,sen_id id)1123 _sen_sym_key(sen_sym *sym, sen_id id)
1124 {
1125 pat_node *node;
1126 if (sym->v08p) { return _sen_sym_key08(sym, id); }
1127 PAT_AT(sym, id, node);
1128 if (!node) { return NULL; }
1129 return (const char *) pat_node_get_key(sym, node);
1130 }
1131
1132 int
sen_sym_key(sen_sym * sym,sen_id id,void * keybuf,int bufsize)1133 sen_sym_key(sen_sym *sym, sen_id id, void *keybuf, int bufsize)
1134 {
1135 int len;
1136 uint8_t *key;
1137 pat_node *node;
1138 if (!sym) { return sen_invalid_argument; }
1139 if (sym->v08p) { return sen_sym_key08(sym, id, keybuf, bufsize); }
1140 PAT_AT(sym, id, node);
1141 if (!node) { return 0; }
1142 if (!(key = pat_node_get_key(sym, node))) { return 0; }
1143 if (!(len = sym->key_size)) { len = (int)(strlen((char *)key) + 1); }
1144 if (keybuf && bufsize >= len) { memcpy(keybuf, key, len); }
1145 return len;
1146 }
1147
1148 int
sen_sym_pocket_get(sen_sym * sym,sen_id id)1149 sen_sym_pocket_get(sen_sym *sym, sen_id id)
1150 {
1151 pat_node *node;
1152 if (!sym) { return -1; }
1153 if (sym->v08p) { return sen_sym_pocket_get08(sym, id); }
1154 PAT_AT(sym, id, node);
1155 if (!node) { return -1; }
1156 return PAT_PKT(node);
1157 }
1158
1159 sen_rc
sen_sym_pocket_set(sen_sym * sym,sen_id id,unsigned int value)1160 sen_sym_pocket_set(sen_sym *sym, sen_id id, unsigned int value)
1161 {
1162 pat_node *node;
1163 if (!sym) { return sen_invalid_argument; }
1164 if (sym->v08p) { return sen_sym_pocket_set08(sym, id, value); }
1165 PAT_AT(sym, id, node);
1166 if (value > PAT_POCKET_MAX || !node) { return sen_invalid_argument; }
1167 SET_PAT_PKT(node, value);
1168 return sen_success;
1169 }
1170
1171 sen_rc
sen_sym_pocket_incr(sen_sym * sym,sen_id id)1172 sen_sym_pocket_incr(sen_sym *sym, sen_id id)
1173 {
1174 pat_node *node;
1175 if (!sym) { return sen_invalid_argument; }
1176 if (sym->v08p) { return sen_invalid_argument; }
1177 PAT_AT(sym, id, node);
1178 if (!node) { return sen_invalid_argument; }
1179 node->bitfield += (1 << 2);
1180 if (node->bitfield < (1 << 2)) {
1181 SEN_LOG(sen_log_error, "sen_sym_pocket_incr failed %d", node->bitfield);
1182 return sen_invalid_format;
1183 }
1184 return sen_success;
1185 }
1186
1187 sen_rc
sen_sym_pocket_decr(sen_sym * sym,sen_id id)1188 sen_sym_pocket_decr(sen_sym *sym, sen_id id)
1189 {
1190 pat_node *node;
1191 if (!sym) { return sen_invalid_argument; }
1192 if (sym->v08p) { return sen_invalid_argument; }
1193 PAT_AT(sym, id, node);
1194 if (!node) { return sen_invalid_argument; }
1195 if (node->bitfield < (1 << 2)) {
1196 SEN_LOG(sen_log_error, "sen_sym_pocket_decr failed %d", node->bitfield);
1197 return sen_invalid_format;
1198 }
1199 node->bitfield -= (1 << 2);
1200 return sen_success;
1201 }
1202
1203 sen_rc
sen_sym_info(sen_sym * sym,int * key_size,unsigned * flags,sen_encoding * encoding,unsigned * nrecords,unsigned * file_size)1204 sen_sym_info(sen_sym *sym, int *key_size, unsigned *flags,
1205 sen_encoding *encoding, unsigned *nrecords, unsigned *file_size)
1206 {
1207 ERRCLR(NULL);
1208 if (!sym) { return sen_invalid_argument; }
1209 if (key_size) { *key_size = sym->key_size; }
1210 if (flags) { *flags = sym->flags; }
1211 if (encoding) { *encoding = sym->encoding; }
1212 if (nrecords) { *nrecords = sym->header->nrecords; }
1213 if (file_size) {
1214 sen_rc rc;
1215 off_t tmp = 0;
1216 if ((rc = sen_io_size(sym->io, &tmp))) {
1217 return rc;
1218 }
1219 *file_size = (unsigned) tmp; /* FIXME: inappropriate cast */
1220 }
1221 return sen_success;
1222 }
1223
1224 int
sen_sym_del_with_sis(sen_sym * sym,sen_id id,int (* func)(sen_id,void *),void * func_arg)1225 sen_sym_del_with_sis(sen_sym *sym, sen_id id,
1226 int(*func)(sen_id, void *), void *func_arg)
1227 {
1228 int level = 0, shared;
1229 const char *key = NULL, *_key;
1230 sis_node *sp, *ss = NULL, *si = sis_at(sym, id);
1231 if (sym->v08p) { return sen_sym_del_with_sis08(sym, id, func, func_arg); }
1232 while (id) {
1233 if (sym->flags & SEN_INDEX_SHARED_LEXICON) {
1234 pat_node *rn;
1235 PAT_AT(sym, id, rn);
1236 if (rn && rn->bitfield >= 1 << 2) { break; }
1237 }
1238 if ((si && si->children && si->children != id) || !func(id, func_arg)) { break; }
1239 //todo : if ERRP { break; }
1240 if (!(_key = _sen_sym_key(sym, id))) { break; }
1241 if (_key == key) {
1242 shared = 1;
1243 } else {
1244 key = _key;
1245 shared = 0;
1246 }
1247 _sen_sym_del(sym, key, shared);
1248 if (si) {
1249 sen_id *p, sid;
1250 uint32_t lkey = 0;
1251 if ((*key & 0x80) && chop(sym, &key, &lkey)) {
1252 if ((sid = sen_sym_at(sym, key)) && (ss = sis_at(sym, sid))) {
1253 for (p = &ss->children; *p && *p != sid; p = &sp->sibling) {
1254 if (*p == id) {
1255 *p = si->sibling;
1256 break;
1257 }
1258 if (!(sp = sis_at(sym, *p))) { break; }
1259 }
1260 }
1261 } else {
1262 sid = SEN_SYM_NIL;
1263 }
1264 si->sibling = 0;
1265 si->children = 0;
1266 id = sid;
1267 si = ss;
1268 } else {
1269 id = SEN_SYM_NIL;
1270 }
1271 level++;
1272 }
1273 if (level) {
1274 uint32_t lkey = 0;
1275 while (id && key) {
1276 if (_sen_sym_key(sym, id) != key) { break; }
1277 {
1278 pat_node *rn;
1279 PAT_AT(sym, id, rn);
1280 if (!rn) { break; }
1281 if (lkey) {
1282 rn->key = lkey;
1283 } else {
1284 pat_node_set_key(sym, rn, (uint8_t *)key, (unsigned int)(strlen(key) + 1));
1285 lkey = rn->key;
1286 }
1287 }
1288 if (!((*key & 0x80) && chop(sym, &key, &lkey))) { break; }
1289 id = sen_sym_at(sym, key);
1290 }
1291 }
1292 return level;
1293 }
1294
1295 sen_id
sen_sym_next(sen_sym * sym,sen_id id)1296 sen_sym_next(sen_sym *sym, sen_id id)
1297 {
1298 while (++id <= sym->header->curr_rec) {
1299 if (id == sen_sym_at(sym, _sen_sym_key(sym, id))) {
1300 return id;
1301 }
1302 }
1303 return SEN_SYM_NIL;
1304 }
1305
1306 sen_id
sen_sym_curr_id(sen_sym * sym)1307 sen_sym_curr_id(sen_sym *sym)
1308 {
1309 return sym->header->curr_rec;
1310 }
1311
1312 sen_rc
sen_sym_lock(sen_sym * sym,int timeout)1313 sen_sym_lock(sen_sym *sym, int timeout)
1314 {
1315 static int _ncalls = 0, _ncolls = 0;
1316 uint32_t count;
1317 _ncalls++;
1318 for (count = 0;; count++) {
1319 uint32_t lock;
1320 SEN_ATOMIC_ADD_EX(sym->lock, 1, lock);
1321 if (lock) {
1322 SEN_ATOMIC_ADD_EX(sym->lock, -1, lock);
1323 if (!timeout || (timeout > 0 && timeout == count)) { break; }
1324 if (!(++_ncolls % 1000000) && (_ncolls > _ncalls)) {
1325 if (_ncolls < 0 || _ncalls < 0) {
1326 _ncolls = 0; _ncalls = 0;
1327 } else {
1328 SEN_LOG(sen_log_notice, "sym(%p) collisions(%d/%d)", sym, _ncolls, _ncalls);
1329 }
1330 }
1331 usleep(1000);
1332 continue;
1333 }
1334 return sen_success;
1335 }
1336 return sen_other_error;
1337 }
1338
1339 sen_rc
sen_sym_unlock(sen_sym * sym)1340 sen_sym_unlock(sen_sym *sym)
1341 {
1342 uint32_t lock;
1343 SEN_ATOMIC_ADD_EX(sym->lock, -1, lock);
1344 return sen_success;
1345 }
1346
1347 sen_rc
sen_sym_clear_lock(sen_sym * sym)1348 sen_sym_clear_lock(sen_sym *sym)
1349 {
1350 *sym->lock = 0;
1351 return sen_success;
1352 }
1353
1354 int
sen_sym_scan(sen_sym * sym,const char * str,unsigned int str_len,sen_sym_scan_hit * sh,unsigned int sh_size,const char ** rest)1355 sen_sym_scan(sen_sym *sym, const char *str, unsigned int str_len,
1356 sen_sym_scan_hit *sh, unsigned int sh_size, const char **rest)
1357 {
1358 int n = 0;
1359 sen_id tid;
1360 if (sym->flags & SEN_INDEX_NORMALIZE) {
1361 sen_nstr *nstr = sen_nstr_open(str, str_len, sym->encoding, SEN_STR_WITH_CHECKS);
1362 if (nstr) {
1363 int16_t *cp = nstr->checks;
1364 unsigned int offset = 0, offset0 = 0;
1365 const char *sp = nstr->norm, *se = nstr->norm + nstr->norm_blen;
1366 while (n < sh_size) {
1367 if ((tid = sen_sym_common_prefix_search(sym, sp))) {
1368 int len = strlen(_sen_sym_key(sym, tid));
1369 sh[n].id = tid;
1370 sh[n].offset = (*cp > 0) ? offset : offset0;
1371 while (len--) {
1372 if (*cp > 0) { offset0 = offset; offset += *cp; }
1373 sp++; cp++;
1374 }
1375 sh[n].length = offset - sh[n].offset;
1376 n++;
1377 } else {
1378 if (*cp > 0) { offset0 = offset; offset += *cp; }
1379 do {
1380 sp++; cp++;
1381 } while (sp < se && !*cp);
1382 }
1383 if (se <= sp) { offset = str_len; break; }
1384 }
1385 if (rest) { *rest = nstr->orig + offset; }
1386 sen_nstr_close(nstr);
1387 } else {
1388 n = -1;
1389 if (rest) { *rest = str; }
1390 }
1391 } else {
1392 int len;
1393 const char *sp, *se = str + str_len;
1394 for (sp = str; sp < se && n < sh_size; sp += len) {
1395 if ((tid = sen_sym_common_prefix_search(sym, sp))) {
1396 len = strlen(_sen_sym_key(sym, tid));
1397 sh[n].id = tid;
1398 sh[n].offset = sp - str;
1399 sh[n].length = len;
1400 n++;
1401 } else {
1402 len = sen_str_charlen_nonnull(sp, se, sym->encoding);
1403 }
1404 if (!len) { break; }
1405 }
1406 if (rest) { *rest = sp; }
1407 }
1408 return n;
1409 }
1410
1411 #define INITIAL_SIZE 512
1412
1413 inline static void
push(sen_sym_cursor * c,sen_id id,uint16_t check)1414 push(sen_sym_cursor *c, sen_id id, uint16_t check)
1415 {
1416 sen_ctx *ctx = c->ctx;
1417 sen_sym_cursor_entry *se;
1418 if (c->size <= c->sp) {
1419 if (c->ss) {
1420 uint32_t size = c->size * 4;
1421 sen_sym_cursor_entry *ss = SEN_REALLOC(c->ss, size);
1422 if (!ss) { return; /* give up */ }
1423 c->ss = ss;
1424 c->size = size;
1425 } else {
1426 if (!(c->ss = SEN_MALLOC(sizeof(sen_sym_cursor_entry) * INITIAL_SIZE))) {
1427 return; /* give up */
1428 }
1429 c->size = INITIAL_SIZE;
1430 }
1431 }
1432 se = &c->ss[c->sp++];
1433 se->id = id;
1434 se->check = check;
1435 }
1436
1437 inline static sen_sym_cursor_entry *
pop(sen_sym_cursor * c)1438 pop(sen_sym_cursor *c)
1439 {
1440 return c->sp ? &c->ss[--c->sp] : NULL;
1441 }
1442
1443 sen_id
sen_sym_cursor_next(sen_sym_cursor * c)1444 sen_sym_cursor_next(sen_sym_cursor *c)
1445 {
1446 pat_node *node;
1447 sen_sym_cursor_entry *se;
1448 while ((se = pop(c))) {
1449 sen_id id = se->id;
1450 uint16_t check = se->check;
1451 for (;;) {
1452 if (id) {
1453 PAT_AT(c->sym, id, node);
1454 if (node) {
1455 if (node->check > check) {
1456 if (c->flags & SEN_SYM_ASCENDING) {
1457 push(c, node->r, node->check);
1458 id = node->l;
1459 } else {
1460 push(c, node->l, node->check);
1461 id = node->r;
1462 }
1463 check = node->check;
1464 continue;
1465 } else {
1466 if (id == c->limit) { c->sp = 0; }
1467 return id;
1468 }
1469 }
1470 } else {
1471 break;
1472 }
1473 }
1474 }
1475 return SEN_SYM_NIL;
1476 }
1477
1478 void
sen_sym_cursor_close(sen_sym_cursor * c)1479 sen_sym_cursor_close(sen_sym_cursor *c)
1480 {
1481 sen_ctx *ctx = c->ctx;
1482 if (c->ss) { SEN_FREE(c->ss); }
1483 SEN_FREE(c);
1484 }
1485
1486 inline static int
bitcmp(const void * s1,const void * s2,int offset,int length)1487 bitcmp(const void *s1, const void *s2, int offset, int length)
1488 {
1489 int r, rest = length + (offset & 7) - 8, bl = offset >> 3, mask = 0xff >> (offset & 7);
1490 unsigned char *a = (unsigned char *)s1 + bl, *b = (unsigned char *)s2 + bl;
1491 if (rest <= 0) {
1492 mask &= 0xff << -rest;
1493 return (*a & mask) - (*b & mask);
1494 }
1495 if ((r = (*a & mask) - (*b & mask))) { return r; }
1496 a++; b++;
1497 if ((bl = rest >> 3)) {
1498 if ((r = memcmp(a, b, bl))) { return r; }
1499 a += bl; b += bl;
1500 }
1501 mask = 0xff << (8 - (rest & 7));
1502 return (*a & mask) - (*b & mask);
1503 }
1504
1505 inline static sen_rc
set_cursor_ascend(sen_sym * sym,sen_sym_cursor * c,const void * key,int flags)1506 set_cursor_ascend(sen_sym *sym, sen_sym_cursor *c, const void *key, int flags)
1507 {
1508 sen_id id;
1509 pat_node *node;
1510 const uint8_t *k;
1511 int r, check = -1, len, c2;
1512 if (!(len = sym->key_size * 8)) { len = (strlen(key) + 1) * 8; }
1513 PAT_AT(sym, 0, node);
1514 for (id = node->r; id;) {
1515 PAT_AT(sym, id, node);
1516 if (!node) { return sen_invalid_format; }
1517 if (node->check <= check) {
1518 if (!(k = pat_node_get_key(sym, node))) { return sen_invalid_format; }
1519 if (sym->key_size) {
1520 if (flags & SEN_SYM_GT) {
1521 if (memcmp(key, k, sym->key_size) < 0) { push(c, id, check); }
1522 } else {
1523 if (memcmp(key, k, sym->key_size) <= 0) { push(c, id, check); }
1524 }
1525 } else {
1526 int l = (strlen(k) + 1) * 8;
1527 if (l == len) {
1528 if (flags & SEN_SYM_GT) {
1529 if (memcmp(key, k, l >> 3) < 0) { push(c, id, check); }
1530 } else {
1531 if (memcmp(key, k, l >> 3) <= 0) { push(c, id, check); }
1532 }
1533 } else if (l < len) {
1534 if (memcmp(key, k, l >> 3) < 0) { push(c, id, check); }
1535 } else {
1536 if (memcmp(key, k, len >> 3) <= 0) { push(c, id, check); }
1537 }
1538 }
1539 break;
1540 }
1541 c2 = len < node->check ? len : node->check;
1542 if (check + 1 < c2) {
1543 if (!(k = pat_node_get_key(sym, node))) { return sen_invalid_format; }
1544 if ((r = bitcmp(key, k, check + 1, c2 - (check + 1)))) {
1545 if (r < 0) {
1546 push(c, node->r, node->check);
1547 push(c, node->l, node->check);
1548 }
1549 break;
1550 }
1551 }
1552 if (len <= node->check) { break; }
1553 check = node->check;
1554 if (nth_bit((uint8_t *)key, check)) {
1555 id = node->r;
1556 } else {
1557 push(c, node->r, node->check);
1558 id = node->l;
1559 }
1560 }
1561 return sen_success;
1562 }
1563
1564 inline static sen_rc
set_cursor_descend(sen_sym * sym,sen_sym_cursor * c,const void * key,int flags)1565 set_cursor_descend(sen_sym *sym, sen_sym_cursor *c, const void *key, int flags)
1566 {
1567 sen_id id;
1568 pat_node *node;
1569 const uint8_t *k;
1570 int r, check = -1, len, c2;
1571 if (!(len = sym->key_size * 8)) { len = (strlen(key) + 1) * 8; }
1572 PAT_AT(sym, 0, node);
1573 for (id = node->r; id;) {
1574 PAT_AT(sym, id, node);
1575 if (!node) { return sen_invalid_format; }
1576 if (node->check <= check) {
1577 if (!(k = pat_node_get_key(sym, node))) { return sen_invalid_format; }
1578 if (sym->key_size) {
1579 if (flags & SEN_SYM_LT) {
1580 if (memcmp(key, k, sym->key_size) > 0) { push(c, id, check); }
1581 } else {
1582 if (memcmp(key, k, sym->key_size) >= 0) { push(c, id, check); }
1583 }
1584 } else {
1585 int l = (strlen(k) + 1) * 8;
1586 if (l <= len) {
1587 if ((flags & SEN_SYM_LT) && l == len) {
1588 if (memcmp(key, k, l >> 3) > 0) { push(c, id, check); }
1589 } else {
1590 if (memcmp(key, k, l >> 3) >= 0) { push(c, id, check); }
1591 }
1592 } else {
1593 if (memcmp(key, k, len >> 3) > 0) { push(c, id, check); }
1594 }
1595 }
1596 break;
1597 }
1598 c2 = len < node->check ? len : node->check;
1599 if (check + 1 < c2) {
1600 if (!(k = pat_node_get_key(sym, node))) { return sen_invalid_format; }
1601 if ((r = bitcmp(key, k, check + 1, c2 - (check + 1)))) {
1602 if (r >= 0) {
1603 push(c, node->l, node->check);
1604 push(c, node->r, node->check);
1605 }
1606 break;
1607 }
1608 }
1609 if (len <= node->check) {
1610 push(c, node->l, node->check);
1611 push(c, node->r, node->check);
1612 break;
1613 }
1614 check = node->check;
1615 if (nth_bit((uint8_t *)key, check)) {
1616 push(c, node->l, node->check);
1617 id = node->r;
1618 } else {
1619 id = node->l;
1620 }
1621 }
1622 return sen_success;
1623 }
1624
1625 sen_sym_cursor *
sen_sym_cursor_open(sen_sym * sym,sen_ctx * ctx,const void * min,const void * max,int flags)1626 sen_sym_cursor_open(sen_sym *sym, sen_ctx *ctx,
1627 const void *min, const void *max, int flags)
1628 {
1629 sen_id id;
1630 pat_node *node;
1631 sen_sym_cursor *c;
1632 if (!sym || !ctx) { return NULL; }
1633 if (!(c = SEN_MALLOCN(sen_sym_cursor, 1))) { return NULL; }
1634 c->sym = sym;
1635 c->ctx = ctx;
1636 c->size = 0;
1637 c->sp = 0;
1638 c->ss = NULL;
1639 c->limit = 0;
1640 if (flags & SEN_SYM_ASCENDING) {
1641 if (max) {
1642 set_cursor_descend(sym, c, max, flags);
1643 c->flags = SEN_SYM_DESCENDING;
1644 c->limit = sen_sym_cursor_next(c);
1645 c->sp = 0;
1646 }
1647 if (min) {
1648 set_cursor_ascend(sym, c, min, flags);
1649 } else {
1650 PAT_AT(sym, 0, node);
1651 if (!node) {
1652 sen_sym_cursor_close(c);
1653 return NULL;
1654 }
1655 if ((id = node->r)) {
1656 PAT_AT(sym, id, node);
1657 if (node) {
1658 push(c, node->r, node->check);
1659 push(c, node->l, node->check);
1660 }
1661 }
1662 }
1663 } else {
1664 if (min) {
1665 set_cursor_ascend(sym, c, min, flags);
1666 c->flags = SEN_SYM_ASCENDING;
1667 c->limit = sen_sym_cursor_next(c);
1668 c->sp = 0;
1669 }
1670 if (max) {
1671 set_cursor_descend(sym, c, max, flags);
1672 } else {
1673 PAT_AT(sym, 0, node);
1674 if (!node) {
1675 sen_sym_cursor_close(c);
1676 return NULL;
1677 }
1678 if ((id = node->r)) {
1679 PAT_AT(sym, id, node);
1680 if (node) {
1681 push(c, node->l, node->check);
1682 push(c, node->r, node->check);
1683 }
1684 }
1685 }
1686 }
1687 c->flags = flags;
1688 return c;
1689 }
1690