1 /* Copyright(C) 2004 Brazil
2
3 This library is free software; you can redistribute it and/or
4 modify it under the terms of the GNU Lesser General Public
5 License as published by the Free Software Foundation; either
6 version 2.1 of the License, or (at your option) any later version.
7
8 This library is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 Lesser General Public License for more details.
12
13 You should have received a copy of the GNU Lesser General Public
14 License along with this library; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17 #include "senna_in.h"
18 #include <fcntl.h>
19 #include <stdio.h> /* for debug */
20 #include <string.h>
21 #include <sys/stat.h>
22 #include "sym.h"
23
24 #define SEN_SYM_HEADER_SIZE 0x10000
25 #define SEN_IDSTR "SENNA:SYM:00.00"
26 #define SEN_SYM_DELETED (SEN_SYM_MAX_ID + 1)
27
28 #define SEN_SYM_SEGMENT_SIZE 0x400000
29 #define W_OF_KEY_IN_A_SEGMENT 22
30 #define W_OF_PAT_IN_A_SEGMENT 18
31 #define W_OF_SIS_IN_A_SEGMENT 19
32 #define KEY_MASK_IN_A_SEGMENT 0x3fffff
33 #define PAT_MASK_IN_A_SEGMENT 0x3ffff
34 #define SIS_MASK_IN_A_SEGMENT 0x7ffff
35
36 struct sen_sym_header {
37 char idstr[16];
38 uint32_t flags;
39 sen_encoding encoding;
40 uint32_t key_size;
41 uint32_t nrecords;
42 uint32_t curr_rec;
43 int32_t curr_key;
44 int32_t curr_del;
45 int32_t curr_del2;
46 int32_t curr_del3;
47 uint8_t segments[SEN_SYM_MAX_SEGMENT];
48 sen_sym_delinfo delinfos[SEN_SYM_NDELINFOS];
49 sen_id garbages[(SEN_SYM_MAX_KEY_LENGTH + 1) / 8];
50 uint32_t lock;
51 };
52
53 typedef struct {
54 sen_id r;
55 sen_id l;
56 uint32_t key;
57 uint16_t check;
58 /* deleting : 1, immediate : 1, pocket : 14 */
59 uint16_t bitfield;
60 } pat_node;
61
62 typedef struct {
63 int32_t segno;
64 void *addr;
65 } sen_sym_seginfo;
66
67 typedef struct {
68 uint8_t v08p;
69 sen_io *io;
70 struct sen_sym_header *header;
71 uint32_t flags;
72 sen_encoding encoding;
73 uint32_t key_size;
74 uint32_t nref;
75 uint32_t *lock;
76 sen_sym_seginfo keyarray[SEN_SYM_MAX_SEGMENT];
77 sen_sym_seginfo patarray[SEN_SYM_MAX_SEGMENT];
78 sen_sym_seginfo sisarray[SEN_SYM_MAX_SEGMENT];
79 }
80 sen_sym08;
81
82 #define SYM ((sen_sym08 *)sym)
83
84 #define PAT_DELETING 1
85 #define PAT_IMMEDIATE 2
86 #define PAT_POCKET_MAX 0x3fff
87 #define PAT_DEL(x) ((x)->bitfield & PAT_DELETING)
88 #define PAT_IMD(x) ((x)->bitfield & PAT_IMMEDIATE)
89 #define PAT_PKT(x) ((x)->bitfield >> 2)
90 #define SET_PAT_DEL(x,v) ((x)->bitfield = ((x)->bitfield & 0xfffe) | (v))
91 #define SET_PAT_IMD(x,v) ((x)->bitfield = ((x)->bitfield & 0xfffd) | (v))
92 #define SET_PAT_PKT(x,v) ((x)->bitfield = ((x)->bitfield & 3) | ((v) << 2))
93
94 typedef struct {
95 sen_id children;
96 sen_id sibling;
97 } sis_node;
98
99 enum {
100 segment_empty = 0,
101 segment_key,
102 segment_pat,
103 segment_sis
104 };
105
106 /* bit operation */
107
108 inline static uint32_t
nth_bit(const uint8_t * key,uint32_t n)109 nth_bit(const uint8_t *key, uint32_t n)
110 {
111 return key[n >> 3] & (0x80 >> (n & 7));
112 }
113
114 /* segment operation */
115
116 inline static sen_rc
load_all_segments(sen_sym08 * sym)117 load_all_segments(sen_sym08 *sym)
118 {
119 sen_rc rc = sen_success;
120 int seg, nkey = 0, npat = 0, nsis = 0, empty = 0;
121 for (seg = 0; seg < SEN_SYM_MAX_SEGMENT; seg++) {
122 switch (sym->header->segments[seg]) {
123 case segment_empty :
124 empty++;
125 break;
126 case segment_key :
127 sym->keyarray[nkey++].segno = seg;
128 break;
129 case segment_pat :
130 sym->patarray[npat++].segno = seg;
131 break;
132 case segment_sis :
133 sym->sisarray[nsis++].segno = seg;
134 break;
135 default :
136 rc = sen_invalid_format;
137 break;
138 }
139 }
140 return rc;
141 }
142
143 inline static sen_rc
segment_new(sen_sym08 * sym,int segtype)144 segment_new(sen_sym08 *sym, int segtype)
145 {
146 int seg = 0, type, n = 0;
147 while ((type = sym->header->segments[seg])) {
148 if (type == segtype) { n++; }
149 if (++seg >= SEN_SYM_MAX_SEGMENT) { return sen_memory_exhausted; }
150 }
151 sym->header->segments[seg] = segtype;
152 switch (segtype) {
153 case segment_key :
154 sym->keyarray[n].segno = seg;
155 sym->header->curr_key = n * SEN_SYM_SEGMENT_SIZE;
156 break;
157 case segment_pat :
158 sym->patarray[n].segno = seg;
159 break;
160 case segment_sis :
161 sym->sisarray[n].segno = seg;
162 break;
163 }
164 return sen_success;
165 }
166
167 #define SEG_LOAD(io,si)\
168 {\
169 int32_t segno = si->segno;\
170 if (0 <= segno && segno < SEN_SYM_MAX_SEGMENT) {\
171 SEN_IO_SEG_REF(io, segno, si->addr);\
172 SEN_IO_SEG_UNREF(io, segno);\
173 }\
174 }
175
176 /* patricia array operation */
177
178 inline static pat_node *
pat_at(sen_sym08 * sym,sen_id id)179 pat_at(sen_sym08 *sym, sen_id id)
180 {
181 sen_sym_seginfo *si;
182 if (id > SEN_SYM_MAX_ID) { return NULL; }
183 si = &sym->patarray[id >> W_OF_PAT_IN_A_SEGMENT];
184 if (!si->addr) {
185 if (si->segno == -1) { load_all_segments(sym); }
186 SEG_LOAD(sym->io, si);
187 if (!si->addr) { return NULL; }
188 }
189 return
190 (pat_node *)(((byte *)si->addr) + ((id & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));
191 }
192
193 inline static pat_node *
pat_get(sen_sym08 * sym,sen_id id)194 pat_get(sen_sym08 *sym, sen_id id)
195 {
196 sen_sym_seginfo *si;
197 if (id > SEN_SYM_MAX_ID) { return NULL; }
198 si = &sym->patarray[id >> W_OF_PAT_IN_A_SEGMENT];
199 if (!si->addr) {
200 if (si->segno == -1) { load_all_segments(sym); }
201 while (si->segno == -1) {
202 if (segment_new(sym, segment_pat)) { return NULL; }
203 }
204 SEG_LOAD(sym->io, si);
205 if (!si->addr) { return NULL; }
206 }
207 return
208 (pat_node *)(((byte *)si->addr) + ((id & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));
209 }
210
211 inline static pat_node *
pat_node_new(sen_sym08 * sym,sen_id * id)212 pat_node_new(sen_sym08 *sym, sen_id *id)
213 {
214 uint32_t n = sym->header->curr_rec + 1;
215 pat_node *res;
216 if (n > SEN_SYM_MAX_ID) { return NULL; }
217 if ((res = pat_get(sym, n))) {
218 sym->header->curr_rec = n;
219 sym->header->nrecords++;
220 }
221 if (id) { *id = n; }
222 return res;
223 }
224
225 /* sis operation */
226
227 inline static sis_node *
sis_at(sen_sym08 * sym,sen_id id)228 sis_at(sen_sym08 *sym, sen_id id)
229 {
230 sen_sym_seginfo *si;
231 if (id > SEN_SYM_MAX_ID) { return NULL; }
232 si = &sym->sisarray[id >> W_OF_SIS_IN_A_SEGMENT];
233 if (!si->addr) {
234 if (si->segno == -1) { load_all_segments(sym); }
235 SEG_LOAD(sym->io, si);
236 if (!si->addr) { return NULL; }
237 }
238 return
239 (sis_node *)(((byte *)si->addr) + ((id & SIS_MASK_IN_A_SEGMENT) * sizeof(sis_node)));
240 }
241
242 inline static sis_node *
sis_get(sen_sym08 * sym,sen_id id)243 sis_get(sen_sym08 *sym, sen_id id)
244 {
245 sen_sym_seginfo *si;
246 if (id == SEN_SYM_NIL || SEN_SYM_MAX_ID < id) { return NULL; }
247 si = &sym->sisarray[id >> W_OF_SIS_IN_A_SEGMENT];
248 if (!si->addr) {
249 if (si->segno == -1) { load_all_segments(sym); }
250 while (si->segno == -1) {
251 if (segment_new(sym, segment_sis)) { return NULL; }
252 }
253 SEG_LOAD(sym->io, si);
254 if (!si->addr) { return NULL; }
255 }
256 return
257 (sis_node *)(((byte *)si->addr) + ((id & SIS_MASK_IN_A_SEGMENT) * sizeof(sis_node)));
258 }
259
260 #define MAX_LEVEL 16
261
262 static void
sis_collect(sen_sym08 * sym,sen_set * h,sen_id id,uint32_t level)263 sis_collect(sen_sym08 *sym, sen_set *h, sen_id id, uint32_t level)
264 {
265 uint32_t *offset;
266 sis_node *sl = sis_at(sym, id);
267 if (sl) {
268 sen_id sid = sl->children;
269 while (sid && sid != id) {
270 sen_set_get(h, &sid, (void **) &offset);
271 *offset = level;
272 if (level < MAX_LEVEL) { sis_collect(sym, h, sid, level + 1); }
273 if (!(sl = sis_at(sym, sid))) { break; }
274 sid = sl->sibling;
275 }
276 }
277 }
278
279 /* key operation */
280
281 #define KEY_AT(sym,pos,ptr) \
282 {\
283 sen_sym_seginfo *si = &sym->keyarray[pos >> W_OF_KEY_IN_A_SEGMENT];\
284 if (si->addr) {\
285 ptr = (char *)(((byte *)si->addr) + (pos & KEY_MASK_IN_A_SEGMENT));\
286 } else {\
287 if (si->segno == -1) { load_all_segments(sym); }\
288 SEG_LOAD(sym->io, si);\
289 ptr = si->addr\
290 ? (char *)(((byte *)si->addr) + (pos & KEY_MASK_IN_A_SEGMENT))\
291 : NULL;\
292 }\
293 }
294
295 inline static uint32_t
key_put(sen_sym08 * sym,const uint8_t * key,int len)296 key_put(sen_sym08 *sym, const uint8_t *key, int len)
297 {
298 int lpos;
299 uint32_t res = sym->header->curr_key;
300 if (len >= SEN_SYM_SEGMENT_SIZE) { return 0; }
301 lpos = res & KEY_MASK_IN_A_SEGMENT;
302 if (lpos + len > SEN_SYM_SEGMENT_SIZE || !lpos) {
303 if (segment_new(sym, segment_key)) { return 0; }
304 res = sym->header->curr_key;
305 }
306 {
307 char *dest;
308 KEY_AT(sym, res, dest);
309 if (!dest) { return 0; }
310 memcpy(dest, key, len);
311 }
312 sym->header->curr_key += len;
313 return res;
314 }
315
316 inline static char *
pat_node_get_key(sen_sym08 * sym,pat_node * n)317 pat_node_get_key(sen_sym08 *sym, pat_node *n)
318 {
319 if (PAT_IMD(n)) {
320 return (char *)&n->key;
321 } else {
322 char *res;
323 KEY_AT(sym, n->key, res);
324 return res;
325 }
326 }
327
328 inline static sen_rc
pat_node_set_key(sen_sym08 * sym,pat_node * n,const uint8_t * key,unsigned int len)329 pat_node_set_key(sen_sym08 *sym, pat_node *n, const uint8_t *key, unsigned int len)
330 {
331 if (!key || !len) { return sen_invalid_argument; }
332 if (len <= sizeof(uint32_t)) {
333 SET_PAT_IMD(n, PAT_IMMEDIATE);
334 memcpy(&n->key, key, len);
335 } else {
336 SET_PAT_IMD(n, 0);
337 n->key = key_put(sym, key, len);
338 }
339 return sen_success;
340 }
341
342 /* delinfo operation */
343
344 #define DL_EMPTY 0x00000000
345 #define DL_PHASE1 0x00000001
346 #define DL_PHASE2 0x00000002
347 #define DL_STMASK 0x00000003
348 #define DL_SHARED 0x80000000
349 #define DL_LD(x) (((x)->bitfield >> 2) & SEN_SYM_MAX_ID)
350 #define DL_ST(x) ((x)->bitfield & DL_STMASK)
351 #define DL_SH(x) ((x)->bitfield & DL_SHARED)
352 #define SET_DL_LD(x,v) ((x)->bitfield = ((x)->bitfield & (DL_STMASK|DL_SHARED)) | \
353 (((v) & SEN_SYM_MAX_ID) << 2))
354 #define SET_DL_ST(x,v) ((x)->bitfield = ((x)->bitfield & ~DL_STMASK) | ((v) & DL_STMASK))
355 #define SET_DL_SH(x,v) ((x)->bitfield = ((x)->bitfield & ~DL_SHARED) | ((v) & DL_SHARED))
356
357 inline static sen_sym_delinfo *
delinfo_search(sen_sym08 * sym,sen_id id)358 delinfo_search(sen_sym08 *sym, sen_id id)
359 {
360 int i;
361 sen_sym_delinfo *di;
362 for (i = (sym->header->curr_del2) & SEN_SYM_MDELINFOS;
363 i != sym->header->curr_del;
364 i = (i + 1) & SEN_SYM_MDELINFOS) {
365 di = &sym->header->delinfos[i];
366 if (DL_ST(di) != DL_PHASE1) { continue; }
367 if (DL_LD(di) == id) { return di; }
368 if (di->d == id) { return di; }
369 }
370 return NULL;
371 }
372
373 inline static sen_rc
delinfo_turn_2(sen_sym08 * sym,sen_sym_delinfo * di)374 delinfo_turn_2(sen_sym08 *sym, sen_sym_delinfo *di)
375 {
376 sen_id d, *p = NULL;
377 pat_node *ln, *dn;
378 // sen_log("delinfo_turn_2> di->d=%d di->ld=%d stat=%d", di->d, DL_LD(di), DL_ST(di));
379 if (DL_ST(di) != DL_PHASE1) { return sen_success; }
380 if (!(ln = pat_at(sym, DL_LD(di)))) { return sen_invalid_argument; }
381 if (!(d = di->d)) { return sen_invalid_argument; }
382 if (!(dn = pat_at(sym, d))) { return sen_invalid_argument; }
383 SET_PAT_DEL(ln, 0);
384 SET_PAT_DEL(dn, 0);
385 {
386 sen_id r, *p0;
387 pat_node *rn;
388 int c;
389 size_t len = sym->key_size * 8;
390 const char *key = pat_node_get_key(sym, dn);
391 if (!key) { return sen_invalid_argument; }
392 if (!len) { len = (strlen(key) + 1) * 8; }
393 rn = pat_at(sym, 0);
394 c = -1;
395 p0 = &rn->r;
396 while ((r = *p0)) {
397 if (r == d) {
398 p = p0;
399 break;
400 }
401 if (!(rn = pat_at(sym, r))) { return sen_invalid_format; }
402 if (rn->check <= c || len <= rn->check) { break; }
403 c = rn->check;
404 p0 = nth_bit((uint8_t *)key, c) ? &rn->r : &rn->l;
405 }
406 }
407 if (p) {
408 ln->check = dn->check;
409 ln->r = dn->r;
410 ln->l = dn->l;
411 *p = DL_LD(di);
412 } else {
413 /* debug */
414 int j;
415 sen_id dd;
416 sen_sym_delinfo *ddi;
417 SEN_LOG(sen_log_debug, "failed to find d=%d", d);
418 for (j = (sym->header->curr_del2 + 1) & SEN_SYM_MDELINFOS;
419 j != sym->header->curr_del;
420 j = (j + 1) & SEN_SYM_MDELINFOS) {
421 ddi = &sym->header->delinfos[j];
422 if (DL_ST(ddi) != DL_PHASE1) { continue; }
423 if (!(ln = pat_at(sym, DL_LD(ddi)))) { continue; }
424 if (!(dd = ddi->d)) { continue; }
425 if (d == DL_LD(ddi)) {
426 SEN_LOG(sen_log_debug, "found!!!, d(%d) become ld of (%d)", d, dd);
427 }
428 }
429 /* debug */
430 }
431 SET_DL_ST(di, DL_PHASE2);
432 di->d = d;
433 // sen_log("delinfo_turn_2< di->d=%d di->ld=%d", di->d, DL_LD(di));
434 return sen_success;
435 }
436
437 inline static sen_rc
delinfo_turn_3(sen_sym08 * sym,sen_sym_delinfo * di)438 delinfo_turn_3(sen_sym08 *sym, sen_sym_delinfo *di)
439 {
440 pat_node *dn;
441 uint32_t size;
442 if (DL_ST(di) != DL_PHASE2) { return sen_success; }
443 if (!(dn = pat_at(sym, di->d))) { return sen_invalid_argument; }
444 if (DL_SH(di)) {
445 SET_PAT_IMD(dn, PAT_IMMEDIATE);
446 size = 0;
447 } else {
448 if (sym->key_size) {
449 size = sym->key_size;
450 } else {
451 // size = dn->check = (uint16_t)(strlen(key) + 1);
452 if (PAT_IMD(dn)) {
453 size = 0;
454 } else {
455 const char *key = pat_node_get_key(sym, dn);
456 if (!key) { return sen_invalid_argument; }
457 size = strlen(key) + 1;
458 }
459 }
460 }
461 SET_DL_ST(di, DL_EMPTY);
462 dn->r = SEN_SYM_DELETED;
463 dn->l = sym->header->garbages[size];
464 sym->header->garbages[size] = di->d;
465 return sen_success;
466 }
467
468 inline static sen_sym_delinfo *
delinfo_new(sen_sym08 * sym)469 delinfo_new(sen_sym08 *sym)
470 {
471 sen_sym_delinfo *res = &sym->header->delinfos[sym->header->curr_del];
472 uint32_t n = (sym->header->curr_del + 1) & SEN_SYM_MDELINFOS;
473 int gap = ((n + SEN_SYM_NDELINFOS - sym->header->curr_del2) & SEN_SYM_MDELINFOS)
474 - (SEN_SYM_NDELINFOS / 2);
475 while (gap-- > 0) {
476 if (delinfo_turn_2(sym, &sym->header->delinfos[sym->header->curr_del2])) {
477 SEN_LOG(sen_log_crit, "d2 failed: %d", DL_LD(&sym->header->delinfos[sym->header->curr_del2]));
478 }
479 sym->header->curr_del2 = (sym->header->curr_del2 + 1) & SEN_SYM_MDELINFOS;
480 }
481 if (n == sym->header->curr_del3) {
482 if (delinfo_turn_3(sym, &sym->header->delinfos[sym->header->curr_del3])) {
483 SEN_LOG(sen_log_crit, "d3 failed: %d", DL_LD(&sym->header->delinfos[sym->header->curr_del3]));
484 }
485 sym->header->curr_del3 = (sym->header->curr_del3 + 1) & SEN_SYM_MDELINFOS;
486 }
487 sym->header->curr_del = n;
488 return res;
489 }
490
491 /* sym operation */
492
493 sen_sym *
sen_sym_create08(const char * path,uint32_t key_size,uint32_t flags,sen_encoding encoding)494 sen_sym_create08(const char *path, uint32_t key_size,
495 uint32_t flags, sen_encoding encoding)
496 {
497 int i;
498 sen_io *io;
499 sen_sym08 *sym = NULL;
500 pat_node *node0;
501 struct sen_sym_header *header;
502 io = sen_io_create(path, SEN_SYM_HEADER_SIZE, SEN_SYM_SEGMENT_SIZE,
503 SEN_SYM_MAX_SEGMENT, sen_io_auto, SEN_SYM_MAX_SEGMENT);
504 if (!io) { return NULL; }
505 header = sen_io_header(io);
506 memcpy(header->idstr, SEN_IDSTR, 16);
507 header->flags = flags;
508 header->encoding = encoding;
509 header->key_size = key_size;
510 header->nrecords = 0;
511 header->curr_rec = 0;
512 header->curr_key = -1;
513 header->curr_del = 0;
514 header->curr_del2 = 0;
515 header->curr_del3 = 0;
516 header->lock = 0;
517 for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
518 header->segments[i] = segment_empty;
519 }
520 if (!(sym = SEN_GMALLOC(sizeof(sen_sym08)))) {
521 sen_io_close(io);
522 return NULL;
523 }
524 sym->v08p = 1;
525 sym->io = io;
526 sym->header = header;
527 sym->key_size = key_size;
528 sym->encoding = encoding;
529 sym->flags = flags;
530 sym->lock = &header->lock;
531 for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
532 sym->keyarray[i].segno = -1;
533 sym->keyarray[i].addr = NULL;
534 sym->patarray[i].segno = -1;
535 sym->patarray[i].addr = NULL;
536 sym->sisarray[i].segno = -1;
537 sym->sisarray[i].addr = NULL;
538 }
539 node0 = pat_get(sym, 0);
540 node0->r = 0;
541 node0->l = 0;
542 node0->key = 0;
543 return (sen_sym *)sym;
544 }
545
546 sen_sym *
sen_sym_open08(const char * path)547 sen_sym_open08(const char *path)
548 {
549 int i;
550 sen_io *io;
551 sen_sym08 *sym = NULL;
552 struct sen_sym_header *header;
553 io = sen_io_open(path, sen_io_auto, 8192);
554 if (!io) { return NULL; }
555 header = sen_io_header(io);
556 /*
557 if (memcmp(header->idstr, SEN_IDSTR, 16)) {
558 puts("id not match");
559 sen_io_close(io);
560 return NULL;
561 }
562 */
563 if (!(sym = SEN_GMALLOC(sizeof(sen_sym08)))) {
564 puts("malloc failed");
565 sen_io_close(io);
566 return NULL;
567 }
568 sym->v08p = 1;
569 sym->io = io;
570 sym->header = header;
571 sym->key_size = header->key_size;
572 sym->encoding = header->encoding;
573 sym->flags = header->flags;
574 sym->lock = &header->lock;
575 for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
576 sym->keyarray[i].segno = -1;
577 sym->keyarray[i].addr = NULL;
578 sym->patarray[i].segno = -1;
579 sym->patarray[i].addr = NULL;
580 sym->sisarray[i].segno = -1;
581 sym->sisarray[i].addr = NULL;
582 }
583 load_all_segments(sym);
584 return (sen_sym *)sym;
585 }
586
587 inline static sen_id
_sen_sym_get(sen_sym08 * sym,const uint8_t * key,uint32_t * new,uint32_t * lkey)588 _sen_sym_get(sen_sym08 *sym, const uint8_t *key, uint32_t *new, uint32_t *lkey)
589 {
590 sen_id r, r0, *p0, *p1 = NULL;
591 pat_node *rn, *rn0;
592 int c = -1, c0 = -1, c1 = -1, len;
593 size_t size = sym->key_size;
594 *new = 0;
595 if (!key) { return SEN_SYM_NIL; }
596 if (!size) { size = strlen((char *)key) + 1; }
597 len = (int)size * 8;
598 if (len > SEN_SYM_MAX_KEY_LENGTH) { return SEN_SYM_NIL; }
599 rn0 = pat_at(sym, 0); p0 = &rn0->r;
600 if (*p0) {
601 char xor, mask;
602 const char *s, *d;
603 while ((r0 = *p0)) {
604 if (!(rn0 = pat_at(sym, r0))) { return 0; }
605 if (c0 < rn0->check && rn0->check < len) {
606 c1 = c0; c0 = rn0->check;
607 p1 = p0; p0 = nth_bit(key, c0) ? &rn0->r : &rn0->l;
608 } else {
609 if (rn0->check < len && !memcmp(pat_node_get_key(sym, rn0), key, size)) {
610 return r0;
611 }
612 break;
613 }
614 }
615 for (c = 0, s = pat_node_get_key(sym, rn0), d = (char *)key; *s == *d; c += 8, s++, d++);
616 for (xor = *s ^ *d, mask = 0x80; !(xor & mask); mask >>= 1, c++);
617
618 if (c == c0 && !*p0) {
619 if (c < len - 1) { c++; }
620 } else {
621 if (c < c0) {
622 if (c > c1) {
623 p0 = p1;
624 } else {
625 rn0 = pat_at(sym, 0); p0 = &rn0->r;
626 while ((r0 = *p0)) {
627 if (!(rn0 = pat_at(sym, r0))) { return 0; }
628 if (c < rn0->check) { break; }
629 p0 = nth_bit(key, rn0->check) ? &rn0->r : &rn0->l;
630 }
631 }
632 }
633 }
634 if (c >= len) { return 0; }
635 } else {
636 c = (len - 1 > SEN_SYM_MAX_KEY_LENGTH) ? SEN_SYM_MAX_KEY_LENGTH : len - 1;
637 }
638 {
639 size_t size2 = size > sizeof(uint32_t) ? size : 0;
640 if (*lkey && size2) {
641 if (sym->header->garbages[0]) {
642 r = sym->header->garbages[0];
643 sym->header->nrecords++;
644 rn = pat_at(sym, r);
645 sym->header->garbages[0] = rn->l;
646 } else {
647 if (!(rn = pat_node_new(sym, &r))) { return 0; }
648 }
649 SET_PAT_IMD(rn, 0);
650 rn->key = *lkey;
651 } else {
652 if (sym->header->garbages[size2]) {
653 r = sym->header->garbages[size2];
654 sym->header->nrecords++;
655 rn = pat_at(sym, r);
656 sym->header->garbages[size2] = rn->l;
657 memcpy(pat_node_get_key(sym, rn), key, size);
658 } else {
659 if (!(rn = pat_node_new(sym, &r))) { return 0; }
660 pat_node_set_key(sym, rn, key, (unsigned int)size);
661 }
662 *lkey = rn->key;
663 }
664 }
665 rn->check = c;
666 SET_PAT_DEL(rn, 0);
667 SET_PAT_PKT(rn, 0);
668 if (nth_bit(key, c)) {
669 rn->r = r;
670 rn->l = *p0;
671 } else {
672 rn->r = *p0;
673 rn->l = r;
674 }
675 *p0 = r;
676 *new = 1;
677 return r;
678 }
679
680 inline static int
chop(sen_sym08 * sym,const char ** key,uint32_t * lkey)681 chop(sen_sym08 *sym, const char **key, uint32_t *lkey)
682 {
683 size_t len = sen_str_charlen(*key, sym->encoding);
684 if (len) {
685 *lkey += len;
686 *key += len;
687 return **key;
688 } else {
689 return 0;
690 }
691 }
692
693 sen_id
sen_sym_get08(sen_sym * sym,const void * key)694 sen_sym_get08(sen_sym *sym, const void *key)
695 {
696 uint32_t new, lkey = 0;
697 sen_id r0;
698 r0 = _sen_sym_get(SYM, (uint8_t *)key, &new, &lkey);
699 if ((SYM->flags & SEN_SYM_WITH_SIS) && (*((uint8_t *)key) & 0x80)) { // todo: refine!!
700 sis_node *sl, *sr;
701 sen_id l = r0, r;
702 if (new && (sl = sis_get(SYM, l))) {
703 const char *sis = key;
704 sl->children = l;
705 sl->sibling = 0;
706 while (chop(SYM, &sis, &lkey)) {
707 if (!(*sis & 0x80)) { break; }
708 r = _sen_sym_get(SYM, (uint8_t *)sis, &new, &lkey);
709 if (!(sr = sis_get(SYM, r))) { break; }
710 if (new) {
711 sl->sibling = r;
712 sr->children = l;
713 sr->sibling = 0;
714 } else {
715 sl->sibling = sr->children;
716 sr->children = l;
717 break;
718 }
719 l = r;
720 sl = sr;
721 }
722 }
723 }
724 return r0;
725 }
726
727 sen_id
sen_sym_at08(sen_sym * sym,const void * key)728 sen_sym_at08(sen_sym *sym, const void *key)
729 {
730 sen_id r;
731 pat_node *rn;
732 int c = -1;
733 size_t len = SYM->key_size * 8;
734 if (!key) { return SEN_SYM_NIL; }
735 if (!len) { len = (strlen(key) + 1) * 8; }
736 for (r = pat_at(SYM, 0)->r; r; r = nth_bit((uint8_t *)key, c) ? rn->r : rn->l) {
737 if (!(rn = pat_at(SYM, r))) { break; /* corrupt? */ }
738 if (len <= rn->check) { break; }
739 if (rn->check <= c) {
740 if (!memcmp(pat_node_get_key(SYM, rn), key, len >> 3)) { return r; }
741 break;
742 }
743 c = rn->check;
744 }
745 return SEN_SYM_NIL;
746 }
747
748 static void
get_tc(sen_sym08 * sym,sen_set * h,pat_node * rn,unsigned int len)749 get_tc(sen_sym08 *sym, sen_set *h, pat_node *rn, unsigned int len)
750 {
751 sen_id id;
752 pat_node *node;
753 id = rn->r;
754 if (id && (node = pat_at(sym, id))) {
755 if (node->check > rn->check) {
756 sen_set_get(h, &id, NULL);
757 get_tc(sym, h, node, len);
758 } else if (node->check < len) {
759 sen_set_get(h, &id, NULL);
760 }
761 }
762 id = rn->l;
763 if (id && (node = pat_at(sym, id))) {
764 if (node->check > rn->check) {
765 sen_set_get(h, &id, NULL);
766 get_tc(sym, h, node, len);
767 } else if (node->check < len) {
768 sen_set_get(h, &id, NULL);
769 }
770 }
771 }
772
773 sen_set *
sen_sym_prefix_search08(sen_sym * sym,const void * key)774 sen_sym_prefix_search08(sen_sym *sym, const void *key)
775 {
776 int c;
777 size_t len;
778 sen_id r;
779 pat_node *rn;
780 if (!key || SYM->key_size) { return NULL; } /* only for string */
781 len = strlen(key) * 8;
782 rn = pat_at(SYM, 0);
783 c = -1;
784 r = rn->r;
785 while (r) {
786 if (!(rn = pat_at(SYM, r))) { return NULL; }
787 if (c < rn->check && len > rn->check) {
788 c = rn->check;
789 r = nth_bit((uint8_t *)key, c) ? rn->r : rn->l;
790 continue;
791 }
792 if (!memcmp(pat_node_get_key(SYM, rn), key, len >> 3)) {
793 sen_set *h = sen_set_open(sizeof(sen_id), 0, 0);
794 SEN_ASSERT(h);
795 // if (!h) { return NULL; }
796 sen_set_get(h, &r, NULL);
797 if (rn->check >= len) { get_tc(SYM, h, rn, (unsigned int)len); }
798 return h;
799 }
800 break;
801 }
802 return NULL;
803 }
804
805 sen_set *
sen_sym_suffix_search08(sen_sym * sym,const void * key)806 sen_sym_suffix_search08(sen_sym *sym, const void *key)
807 {
808 sen_id r;
809 uint32_t *offset;
810 if (!key || SYM->key_size) { return NULL; }
811 if ((r = sen_sym_at08(sym, key))) {
812 sen_set *h = sen_set_open(sizeof(sen_id), sizeof(uint32_t), 0);
813 SEN_ASSERT(h);
814 // if (!h) { return NULL; }
815 sen_set_get(h, &r, (void **) &offset);
816 *offset = 0;
817 if (SYM->flags & SEN_SYM_WITH_SIS) { sis_collect(SYM, h, r, 1); }
818 return h;
819 }
820 return NULL;
821 }
822
823 sen_id
sen_sym_common_prefix_search08(sen_sym * sym,const void * key)824 sen_sym_common_prefix_search08(sen_sym *sym, const void *key)
825 {
826 pat_node *rn, *rnl;
827 sen_id r, rl, r2 = SEN_SYM_NIL;
828 int c = -1, cl, len = 0, n, len2 = 8, nth_bit;
829 if (!key) { return SEN_SYM_NIL; }
830 if (SYM->key_size) { return SEN_SYM_NIL; /* only for string */ }
831 // len = (strlen(key) + 1) * 8;
832 for (r = pat_at(SYM, 0)->r; r;) {
833 if (!(rn = pat_at(SYM, r))) { break; /* corrupt? */ }
834 // if (len <= rn->check) { break; }
835 if (rn->check <= c) {
836 char *p = pat_node_get_key(SYM, rn);
837 if (!memcmp(p, key, strlen(p))) { return r; }
838 break;
839 }
840 c = rn->check;
841 n = c >> 3;
842 while (len < n) { if (!((char *)key)[len++]) { goto exit; } }
843 nth_bit = ((uint8_t *)key)[n] & (0x80 >> (c & 7));
844 if (nth_bit) {
845 if (len2 <= c) {
846 len2 = (n + 1) * 8;
847 for (cl = c, rl = rn->l; rl; cl = rnl->check, rl = rnl->l) {
848 if (!(rnl = pat_at(SYM, rl))) { break; /* corrupt? */ }
849 if (len2 <= rnl->check) { break; }
850 if (rnl->check <= cl) {
851 char *p = pat_node_get_key(SYM, rnl);
852 if (!p[n] && !memcmp(p, key, n)) { r2 = rl; }
853 break;
854 }
855 }
856 }
857 r = rn->r;
858 } else {
859 r = rn->l;
860 }
861 }
862 exit :
863 return r2;
864 }
865
866 inline static sen_rc
_sen_sym_del(sen_sym08 * sym,const char * key,int shared)867 _sen_sym_del(sen_sym08 *sym, const char *key, int shared)
868 {
869 sen_sym_delinfo *di;
870 uint8_t direction;
871 pat_node *rn, *rn0 = NULL, *rno;
872 int c, c0 = -1;
873 size_t len = sym->key_size * 8;
874 sen_id r, otherside, *p, *p0 = NULL;
875 if (!key) { return sen_invalid_argument; }
876 if (!len) { len = (strlen(key) + 1) * 8; }
877 di = delinfo_new(sym); /* must be called before find rn */
878 SET_DL_SH(di, shared ? DL_SHARED : 0);
879 rn = pat_at(sym, 0);
880 c = -1;
881 p = &rn->r;
882 for (;;) {
883 if (!(r = *p)) { return sen_invalid_argument; }
884 if (!(rn = pat_at(sym, r))) { return sen_invalid_format; }
885 if (len <= rn->check) { return sen_invalid_argument; }
886 if (c >= rn->check) {
887 if (!memcmp(pat_node_get_key(sym, rn), key, len >> 3)) {
888 break; /* found */
889 } else { return sen_invalid_argument; }
890 }
891 c0 = c; c = rn->check;
892 p0 = p; p = nth_bit((uint8_t *)key, c) ? &rn->r : &rn->l;
893 rn0 = rn;
894 }
895 direction = (rn0->r == r);
896 otherside = direction ? rn0->l : rn0->r;
897 if (rn == rn0) {
898 SET_DL_ST(di, DL_PHASE2);
899 di->d = r;
900 if (otherside) {
901 rno = pat_at(sym, otherside);
902 if (rno && c0 < rno->check && rno->check <= c) {
903 if (!delinfo_search(sym, otherside)) {
904 SEN_LOG(sen_log_error, "no delinfo found %d", otherside);
905 }
906 rno->check = 0;
907 }
908 }
909 *p0 = otherside;
910 } else {
911 sen_sym_delinfo *ldi = NULL, *ddi = NULL;
912 if (PAT_DEL(rn)) { ldi = delinfo_search(sym, r); }
913 if (PAT_DEL(rn0)) { ddi = delinfo_search(sym, *p0); }
914 if (ldi) {
915 SET_PAT_DEL(rn, 0);
916 SET_DL_ST(di, DL_PHASE2);
917 if (ddi) {
918 SET_PAT_DEL(rn0, 0);
919 SET_DL_ST(ddi, DL_PHASE2);
920 if (ddi == ldi) {
921 if (r != DL_LD(ddi)) {
922 SEN_LOG(sen_log_error, "r(%d) != DL_LD(ddi)(%d)", r, DL_LD(ddi));
923 }
924 di->d = r;
925 } else {
926 SET_DL_LD(ldi, DL_LD(ddi));
927 di->d = r;
928 }
929 } else {
930 SET_PAT_DEL(rn0, PAT_DELETING);
931 SET_DL_LD(ldi, *p0);
932 di->d = r;
933 }
934 } else {
935 SET_PAT_DEL(rn, PAT_DELETING);
936 if (ddi) {
937 if (ddi->d != *p0) {
938 SEN_LOG(sen_log_error, "ddi->d(%d) != *p0(%d)", ddi->d, *p0);
939 }
940 SET_PAT_DEL(rn0, 0);
941 SET_DL_ST(ddi, DL_PHASE2);
942 SET_DL_ST(di, DL_PHASE1);
943 SET_DL_LD(di, DL_LD(ddi));
944 di->d = r;
945 /*
946 SET_PAT_DEL(rn0, 0);
947 ddi->d = r;
948 SET_DL_ST(di, DL_PHASE2);
949 di->d = *p0;
950 */
951 } else {
952 SET_PAT_DEL(rn0, PAT_DELETING);
953 SET_DL_ST(di, DL_PHASE1);
954 SET_DL_LD(di, *p0);
955 di->d = r;
956 // sen_log("sym_del d=%d ld=%d stat=%d", r, *p0, DL_PHASE1);
957 }
958 }
959 if (*p0 == otherside) {
960 rn0->check = 0;
961 } else {
962 if (otherside) {
963 rno = pat_at(sym, otherside);
964 if (rno && c0 < rno->check && rno->check <= c) {
965 if (!delinfo_search(sym, otherside)) {
966 SEN_LOG(sen_log_error, "no delinfo found %d", otherside);
967 }
968 rno->check = 0;
969 }
970 }
971 *p0 = otherside;
972 }
973 }
974 sym->header->nrecords--;
975 return sen_success;
976 }
977
978 sen_rc
sen_sym_del08(sen_sym * sym,const void * key)979 sen_sym_del08(sen_sym *sym, const void *key)
980 {
981 return _sen_sym_del(SYM, key, 0);
982 }
983
984 const char *
_sen_sym_key08(sen_sym * sym,sen_id id)985 _sen_sym_key08(sen_sym *sym, sen_id id)
986 {
987 pat_node *node = pat_at(SYM, id);
988 if (!node) { return NULL; }
989 return pat_node_get_key(SYM, node);
990 }
991
992 int
sen_sym_key08(sen_sym * sym,sen_id id,void * keybuf,int bufsize)993 sen_sym_key08(sen_sym *sym, sen_id id, void *keybuf, int bufsize)
994 {
995 int len;
996 char *key;
997 pat_node *node;
998 if (!(node = pat_at(SYM, id))) { return 0; }
999 if (!(key = pat_node_get_key(SYM, node))) { return 0; }
1000 if (!(len = SYM->key_size)) { len = (int)(strlen(key) + 1); }
1001 if (keybuf && bufsize >= len) { memcpy(keybuf, key, len); }
1002 return len;
1003 }
1004
1005 int
sen_sym_pocket_get08(sen_sym * sym,sen_id id)1006 sen_sym_pocket_get08(sen_sym *sym, sen_id id)
1007 {
1008 pat_node *node = pat_at(SYM, id);
1009 if (!node) { return -1; }
1010 return PAT_PKT(node);
1011 }
1012
1013 sen_rc
sen_sym_pocket_set08(sen_sym * sym,sen_id id,unsigned int value)1014 sen_sym_pocket_set08(sen_sym *sym, sen_id id, unsigned int value)
1015 {
1016 pat_node *node = pat_at(SYM, id);
1017 if (value > PAT_POCKET_MAX || !node) { return sen_invalid_argument; }
1018 SET_PAT_PKT(node, value);
1019 return sen_success;
1020 }
1021
1022 int
sen_sym_del_with_sis08(sen_sym * sym,sen_id id,int (* func)(sen_id,void *),void * func_arg)1023 sen_sym_del_with_sis08(sen_sym *sym, sen_id id,
1024 int(*func)(sen_id, void *), void *func_arg)
1025 {
1026 int level = 0, shared;
1027 const char *key = NULL, *_key;
1028 sis_node *sp, *ss = NULL, *si = sis_at(SYM, id);
1029 while (id) {
1030 if ((si && si->children && si->children != id) || !func(id, func_arg)) { break; }
1031 if (!(_key = _sen_sym_key08(sym, id))) { break; }
1032 if (_key == key) {
1033 shared = 1;
1034 } else {
1035 key = _key;
1036 shared = 0;
1037 }
1038 _sen_sym_del(SYM, key, shared);
1039 if (si) {
1040 sen_id *p, sid;
1041 uint32_t lkey = 0;
1042 if ((*key & 0x80) && chop(SYM, &key, &lkey)) {
1043 if ((sid = sen_sym_at08(sym, key)) && (ss = sis_at(SYM, sid))) {
1044 for (p = &ss->children; *p && *p != sid; p = &sp->sibling) {
1045 if (*p == id) {
1046 *p = si->sibling;
1047 break;
1048 }
1049 sp = sis_at(SYM, *p);
1050 }
1051 }
1052 } else {
1053 sid = SEN_SYM_NIL;
1054 }
1055 si->sibling = 0;
1056 si->children = 0;
1057 id = sid;
1058 si = ss;
1059 } else {
1060 id = SEN_SYM_NIL;
1061 }
1062 level++;
1063 }
1064 if (level) {
1065 uint32_t lkey = 0;
1066 while (id && key) {
1067 if (_sen_sym_key08(sym, id) != key) { break; }
1068 {
1069 pat_node *rn = pat_at(SYM, id);
1070 if (lkey) {
1071 rn->key = lkey;
1072 } else {
1073 pat_node_set_key(SYM, rn, (uint8_t *)key, (unsigned int)(strlen(key) + 1));
1074 lkey = rn->key;
1075 }
1076 }
1077 if (!((*key & 0x80) && chop(SYM, &key, &lkey))) { break; }
1078 id = sen_sym_at08(sym, key);
1079 }
1080 }
1081 return level;
1082 }
1083