1 /* Copyright(C) 2004 Brazil
2 
3   This library is free software; you can redistribute it and/or
4   modify it under the terms of the GNU Lesser General Public
5   License as published by the Free Software Foundation; either
6   version 2.1 of the License, or (at your option) any later version.
7 
8   This library is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11   Lesser General Public License for more details.
12 
13   You should have received a copy of the GNU Lesser General Public
14   License along with this library; if not, write to the Free Software
15   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 #include "senna_in.h"
18 #include <fcntl.h>
19 #include <stdio.h> /* for debug */
20 #include <string.h>
21 #include <sys/stat.h>
22 #include "sym.h"
23 
24 #define SEN_SYM_HEADER_SIZE 0x10000
25 #define SEN_IDSTR "SENNA:SYM:00.00"
26 #define SEN_SYM_DELETED (SEN_SYM_MAX_ID + 1)
27 
28 #define SEN_SYM_SEGMENT_SIZE 0x400000
29 #define W_OF_KEY_IN_A_SEGMENT 22
30 #define W_OF_PAT_IN_A_SEGMENT 18
31 #define W_OF_SIS_IN_A_SEGMENT 19
32 #define KEY_MASK_IN_A_SEGMENT 0x3fffff
33 #define PAT_MASK_IN_A_SEGMENT 0x3ffff
34 #define SIS_MASK_IN_A_SEGMENT 0x7ffff
35 
36 struct sen_sym_header {
37   char idstr[16];
38   uint32_t flags;
39   sen_encoding encoding;
40   uint32_t key_size;
41   uint32_t nrecords;
42   uint32_t curr_rec;
43   int32_t curr_key;
44   int32_t curr_del;
45   int32_t curr_del2;
46   int32_t curr_del3;
47   uint8_t segments[SEN_SYM_MAX_SEGMENT];
48   sen_sym_delinfo delinfos[SEN_SYM_NDELINFOS];
49   sen_id garbages[(SEN_SYM_MAX_KEY_LENGTH + 1) / 8];
50   uint32_t lock;
51 };
52 
53 typedef struct {
54   sen_id r;
55   sen_id l;
56   uint32_t key;
57   uint16_t check;
58   /* deleting : 1, immediate : 1, pocket : 14 */
59   uint16_t bitfield;
60 } pat_node;
61 
62 typedef struct {
63   int32_t segno;
64   void *addr;
65 } sen_sym_seginfo;
66 
67 typedef struct {
68   uint8_t v08p;
69   sen_io *io;
70   struct sen_sym_header *header;
71   uint32_t flags;
72   sen_encoding encoding;
73   uint32_t key_size;
74   uint32_t nref;
75   uint32_t *lock;
76   sen_sym_seginfo keyarray[SEN_SYM_MAX_SEGMENT];
77   sen_sym_seginfo patarray[SEN_SYM_MAX_SEGMENT];
78   sen_sym_seginfo sisarray[SEN_SYM_MAX_SEGMENT];
79 }
80 sen_sym08;
81 
82 #define SYM ((sen_sym08 *)sym)
83 
84 #define PAT_DELETING 1
85 #define PAT_IMMEDIATE 2
86 #define PAT_POCKET_MAX 0x3fff
87 #define PAT_DEL(x) ((x)->bitfield & PAT_DELETING)
88 #define PAT_IMD(x) ((x)->bitfield & PAT_IMMEDIATE)
89 #define PAT_PKT(x) ((x)->bitfield >> 2)
90 #define SET_PAT_DEL(x,v) ((x)->bitfield = ((x)->bitfield & 0xfffe) | (v))
91 #define SET_PAT_IMD(x,v) ((x)->bitfield = ((x)->bitfield & 0xfffd) | (v))
92 #define SET_PAT_PKT(x,v) ((x)->bitfield = ((x)->bitfield & 3) | ((v) << 2))
93 
94 typedef struct {
95   sen_id children;
96   sen_id sibling;
97 } sis_node;
98 
99 enum {
100   segment_empty = 0,
101   segment_key,
102   segment_pat,
103   segment_sis
104 };
105 
106 /* bit operation */
107 
108 inline static uint32_t
nth_bit(const uint8_t * key,uint32_t n)109 nth_bit(const uint8_t *key, uint32_t n)
110 {
111   return key[n >> 3] & (0x80 >> (n & 7));
112 }
113 
114 /* segment operation */
115 
116 inline static sen_rc
load_all_segments(sen_sym08 * sym)117 load_all_segments(sen_sym08 *sym)
118 {
119   sen_rc rc = sen_success;
120   int seg, nkey = 0, npat = 0, nsis = 0, empty = 0;
121   for (seg = 0; seg < SEN_SYM_MAX_SEGMENT; seg++) {
122     switch (sym->header->segments[seg]) {
123     case segment_empty :
124       empty++;
125       break;
126     case segment_key :
127       sym->keyarray[nkey++].segno = seg;
128       break;
129     case segment_pat :
130       sym->patarray[npat++].segno = seg;
131       break;
132     case segment_sis :
133       sym->sisarray[nsis++].segno = seg;
134       break;
135     default :
136       rc = sen_invalid_format;
137       break;
138     }
139   }
140   return rc;
141 }
142 
143 inline static sen_rc
segment_new(sen_sym08 * sym,int segtype)144 segment_new(sen_sym08 *sym, int segtype)
145 {
146   int seg = 0, type, n = 0;
147   while ((type = sym->header->segments[seg])) {
148     if (type == segtype) { n++; }
149     if (++seg >= SEN_SYM_MAX_SEGMENT) { return sen_memory_exhausted; }
150   }
151   sym->header->segments[seg] = segtype;
152   switch (segtype) {
153   case segment_key :
154     sym->keyarray[n].segno = seg;
155     sym->header->curr_key = n * SEN_SYM_SEGMENT_SIZE;
156     break;
157   case segment_pat :
158     sym->patarray[n].segno = seg;
159     break;
160   case segment_sis :
161     sym->sisarray[n].segno = seg;
162     break;
163   }
164   return sen_success;
165 }
166 
167 #define SEG_LOAD(io,si)\
168 {\
169   int32_t segno = si->segno;\
170   if (0 <= segno && segno < SEN_SYM_MAX_SEGMENT) {\
171     SEN_IO_SEG_REF(io, segno, si->addr);\
172     SEN_IO_SEG_UNREF(io, segno);\
173   }\
174 }
175 
176 /* patricia array operation */
177 
178 inline static pat_node *
pat_at(sen_sym08 * sym,sen_id id)179 pat_at(sen_sym08 *sym, sen_id id)
180 {
181   sen_sym_seginfo *si;
182   if (id > SEN_SYM_MAX_ID) { return NULL; }
183   si = &sym->patarray[id >> W_OF_PAT_IN_A_SEGMENT];
184   if (!si->addr) {
185     if (si->segno == -1) { load_all_segments(sym); }
186     SEG_LOAD(sym->io, si);
187     if (!si->addr) { return NULL; }
188   }
189   return
190     (pat_node *)(((byte *)si->addr) + ((id & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));
191 }
192 
193 inline static pat_node *
pat_get(sen_sym08 * sym,sen_id id)194 pat_get(sen_sym08 *sym, sen_id id)
195 {
196   sen_sym_seginfo *si;
197   if (id > SEN_SYM_MAX_ID) { return NULL; }
198   si = &sym->patarray[id >> W_OF_PAT_IN_A_SEGMENT];
199   if (!si->addr) {
200     if (si->segno == -1) { load_all_segments(sym); }
201     while (si->segno == -1) {
202       if (segment_new(sym, segment_pat)) { return NULL; }
203     }
204     SEG_LOAD(sym->io, si);
205     if (!si->addr) { return NULL; }
206   }
207   return
208     (pat_node *)(((byte *)si->addr) + ((id & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));
209 }
210 
211 inline static pat_node *
pat_node_new(sen_sym08 * sym,sen_id * id)212 pat_node_new(sen_sym08 *sym, sen_id *id)
213 {
214   uint32_t n = sym->header->curr_rec + 1;
215   pat_node *res;
216   if (n > SEN_SYM_MAX_ID) { return NULL; }
217   if ((res = pat_get(sym, n))) {
218     sym->header->curr_rec = n;
219     sym->header->nrecords++;
220   }
221   if (id) { *id = n; }
222   return res;
223 }
224 
225 /* sis operation */
226 
227 inline static sis_node *
sis_at(sen_sym08 * sym,sen_id id)228 sis_at(sen_sym08 *sym, sen_id id)
229 {
230   sen_sym_seginfo *si;
231   if (id > SEN_SYM_MAX_ID) { return NULL; }
232   si = &sym->sisarray[id >> W_OF_SIS_IN_A_SEGMENT];
233   if (!si->addr) {
234     if (si->segno == -1) { load_all_segments(sym); }
235     SEG_LOAD(sym->io, si);
236     if (!si->addr) { return NULL; }
237   }
238   return
239     (sis_node *)(((byte *)si->addr) + ((id & SIS_MASK_IN_A_SEGMENT) * sizeof(sis_node)));
240 }
241 
242 inline static sis_node *
sis_get(sen_sym08 * sym,sen_id id)243 sis_get(sen_sym08 *sym, sen_id id)
244 {
245   sen_sym_seginfo *si;
246   if (id == SEN_SYM_NIL || SEN_SYM_MAX_ID < id) { return NULL; }
247   si = &sym->sisarray[id >> W_OF_SIS_IN_A_SEGMENT];
248   if (!si->addr) {
249     if (si->segno == -1) { load_all_segments(sym); }
250     while (si->segno == -1) {
251       if (segment_new(sym, segment_sis)) { return NULL; }
252     }
253     SEG_LOAD(sym->io, si);
254     if (!si->addr) { return NULL; }
255   }
256   return
257     (sis_node *)(((byte *)si->addr) + ((id & SIS_MASK_IN_A_SEGMENT) * sizeof(sis_node)));
258 }
259 
260 #define MAX_LEVEL 16
261 
262 static void
sis_collect(sen_sym08 * sym,sen_set * h,sen_id id,uint32_t level)263 sis_collect(sen_sym08 *sym, sen_set *h, sen_id id, uint32_t level)
264 {
265   uint32_t *offset;
266   sis_node *sl = sis_at(sym, id);
267   if (sl) {
268     sen_id sid = sl->children;
269     while (sid && sid != id) {
270       sen_set_get(h, &sid, (void **) &offset);
271       *offset = level;
272       if (level < MAX_LEVEL) { sis_collect(sym, h, sid, level + 1); }
273       if (!(sl = sis_at(sym, sid))) { break; }
274       sid = sl->sibling;
275     }
276   }
277 }
278 
279 /* key operation */
280 
281 #define KEY_AT(sym,pos,ptr) \
282 {\
283   sen_sym_seginfo *si = &sym->keyarray[pos >> W_OF_KEY_IN_A_SEGMENT];\
284   if (si->addr) {\
285     ptr = (char *)(((byte *)si->addr) + (pos & KEY_MASK_IN_A_SEGMENT));\
286   } else {\
287     if (si->segno == -1) { load_all_segments(sym); }\
288     SEG_LOAD(sym->io, si);\
289     ptr = si->addr\
290       ? (char *)(((byte *)si->addr) + (pos & KEY_MASK_IN_A_SEGMENT))\
291       : NULL;\
292   }\
293 }
294 
295 inline static uint32_t
key_put(sen_sym08 * sym,const uint8_t * key,int len)296 key_put(sen_sym08 *sym, const uint8_t *key, int len)
297 {
298   int lpos;
299   uint32_t res = sym->header->curr_key;
300   if (len >= SEN_SYM_SEGMENT_SIZE) { return 0; }
301   lpos = res & KEY_MASK_IN_A_SEGMENT;
302   if (lpos + len > SEN_SYM_SEGMENT_SIZE || !lpos) {
303     if (segment_new(sym, segment_key)) { return 0; }
304     res = sym->header->curr_key;
305   }
306   {
307     char *dest;
308     KEY_AT(sym, res, dest);
309     if (!dest) { return 0; }
310     memcpy(dest, key, len);
311   }
312   sym->header->curr_key += len;
313   return res;
314 }
315 
316 inline static char *
pat_node_get_key(sen_sym08 * sym,pat_node * n)317 pat_node_get_key(sen_sym08 *sym, pat_node *n)
318 {
319   if (PAT_IMD(n)) {
320     return (char *)&n->key;
321   } else {
322     char *res;
323     KEY_AT(sym, n->key, res);
324     return res;
325   }
326 }
327 
328 inline static sen_rc
pat_node_set_key(sen_sym08 * sym,pat_node * n,const uint8_t * key,unsigned int len)329 pat_node_set_key(sen_sym08 *sym, pat_node *n, const uint8_t *key, unsigned int len)
330 {
331   if (!key || !len) { return sen_invalid_argument; }
332   if (len <= sizeof(uint32_t)) {
333     SET_PAT_IMD(n, PAT_IMMEDIATE);
334     memcpy(&n->key, key, len);
335   } else {
336     SET_PAT_IMD(n, 0);
337     n->key = key_put(sym, key, len);
338   }
339   return sen_success;
340 }
341 
342 /* delinfo operation */
343 
344 #define DL_EMPTY  0x00000000
345 #define DL_PHASE1 0x00000001
346 #define DL_PHASE2 0x00000002
347 #define DL_STMASK 0x00000003
348 #define DL_SHARED 0x80000000
349 #define DL_LD(x) (((x)->bitfield >> 2) & SEN_SYM_MAX_ID)
350 #define DL_ST(x) ((x)->bitfield & DL_STMASK)
351 #define DL_SH(x) ((x)->bitfield & DL_SHARED)
352 #define SET_DL_LD(x,v) ((x)->bitfield = ((x)->bitfield & (DL_STMASK|DL_SHARED)) | \
353                                         (((v) & SEN_SYM_MAX_ID) << 2))
354 #define SET_DL_ST(x,v) ((x)->bitfield = ((x)->bitfield & ~DL_STMASK) | ((v) & DL_STMASK))
355 #define SET_DL_SH(x,v) ((x)->bitfield = ((x)->bitfield & ~DL_SHARED) | ((v) & DL_SHARED))
356 
357 inline static sen_sym_delinfo *
delinfo_search(sen_sym08 * sym,sen_id id)358 delinfo_search(sen_sym08 *sym, sen_id id)
359 {
360   int i;
361   sen_sym_delinfo *di;
362   for (i = (sym->header->curr_del2) & SEN_SYM_MDELINFOS;
363        i != sym->header->curr_del;
364        i = (i + 1) & SEN_SYM_MDELINFOS) {
365     di = &sym->header->delinfos[i];
366     if (DL_ST(di) != DL_PHASE1) { continue; }
367     if (DL_LD(di) == id) { return di; }
368     if (di->d == id) { return di; }
369   }
370   return NULL;
371 }
372 
373 inline static sen_rc
delinfo_turn_2(sen_sym08 * sym,sen_sym_delinfo * di)374 delinfo_turn_2(sen_sym08 *sym, sen_sym_delinfo *di)
375 {
376   sen_id d, *p = NULL;
377   pat_node *ln, *dn;
378   // sen_log("delinfo_turn_2> di->d=%d di->ld=%d stat=%d", di->d, DL_LD(di), DL_ST(di));
379   if (DL_ST(di) != DL_PHASE1) { return sen_success; }
380   if (!(ln = pat_at(sym, DL_LD(di)))) { return sen_invalid_argument; }
381   if (!(d = di->d)) { return sen_invalid_argument; }
382   if (!(dn = pat_at(sym, d))) { return sen_invalid_argument; }
383   SET_PAT_DEL(ln, 0);
384   SET_PAT_DEL(dn, 0);
385   {
386     sen_id r, *p0;
387     pat_node *rn;
388     int c;
389     size_t len = sym->key_size * 8;
390     const char *key = pat_node_get_key(sym, dn);
391     if (!key) { return sen_invalid_argument; }
392     if (!len) { len = (strlen(key) + 1) * 8; }
393     rn = pat_at(sym, 0);
394     c = -1;
395     p0 = &rn->r;
396     while ((r = *p0)) {
397       if (r == d) {
398         p = p0;
399         break;
400       }
401       if (!(rn = pat_at(sym, r))) { return sen_invalid_format; }
402       if (rn->check <= c || len <= rn->check) { break; }
403       c = rn->check;
404       p0 = nth_bit((uint8_t *)key, c) ? &rn->r : &rn->l;
405     }
406   }
407   if (p) {
408     ln->check = dn->check;
409     ln->r = dn->r;
410     ln->l = dn->l;
411     *p = DL_LD(di);
412   } else {
413     /* debug */
414     int j;
415     sen_id dd;
416     sen_sym_delinfo *ddi;
417     SEN_LOG(sen_log_debug, "failed to find d=%d", d);
418     for (j = (sym->header->curr_del2 + 1) & SEN_SYM_MDELINFOS;
419          j != sym->header->curr_del;
420          j = (j + 1) & SEN_SYM_MDELINFOS) {
421       ddi = &sym->header->delinfos[j];
422       if (DL_ST(ddi) != DL_PHASE1) { continue; }
423       if (!(ln = pat_at(sym, DL_LD(ddi)))) { continue; }
424       if (!(dd = ddi->d)) { continue; }
425       if (d == DL_LD(ddi)) {
426         SEN_LOG(sen_log_debug, "found!!!, d(%d) become ld of (%d)", d, dd);
427       }
428     }
429     /* debug */
430   }
431   SET_DL_ST(di, DL_PHASE2);
432   di->d = d;
433   // sen_log("delinfo_turn_2< di->d=%d di->ld=%d", di->d, DL_LD(di));
434   return sen_success;
435 }
436 
437 inline static sen_rc
delinfo_turn_3(sen_sym08 * sym,sen_sym_delinfo * di)438 delinfo_turn_3(sen_sym08 *sym, sen_sym_delinfo *di)
439 {
440   pat_node *dn;
441   uint32_t size;
442   if (DL_ST(di) != DL_PHASE2) { return sen_success; }
443   if (!(dn = pat_at(sym, di->d))) { return sen_invalid_argument; }
444   if (DL_SH(di)) {
445     SET_PAT_IMD(dn, PAT_IMMEDIATE);
446     size = 0;
447   } else {
448     if (sym->key_size) {
449       size = sym->key_size;
450     } else {
451       // size = dn->check = (uint16_t)(strlen(key) + 1);
452       if (PAT_IMD(dn)) {
453         size = 0;
454       } else {
455         const char *key = pat_node_get_key(sym, dn);
456         if (!key) { return sen_invalid_argument; }
457         size = strlen(key) + 1;
458       }
459     }
460   }
461   SET_DL_ST(di, DL_EMPTY);
462   dn->r = SEN_SYM_DELETED;
463   dn->l = sym->header->garbages[size];
464   sym->header->garbages[size] = di->d;
465   return sen_success;
466 }
467 
468 inline static sen_sym_delinfo *
delinfo_new(sen_sym08 * sym)469 delinfo_new(sen_sym08 *sym)
470 {
471   sen_sym_delinfo *res = &sym->header->delinfos[sym->header->curr_del];
472   uint32_t n = (sym->header->curr_del + 1) & SEN_SYM_MDELINFOS;
473   int gap = ((n + SEN_SYM_NDELINFOS - sym->header->curr_del2) & SEN_SYM_MDELINFOS)
474             - (SEN_SYM_NDELINFOS / 2);
475   while (gap-- > 0) {
476     if (delinfo_turn_2(sym, &sym->header->delinfos[sym->header->curr_del2])) {
477       SEN_LOG(sen_log_crit, "d2 failed: %d", DL_LD(&sym->header->delinfos[sym->header->curr_del2]));
478     }
479     sym->header->curr_del2 = (sym->header->curr_del2 + 1) & SEN_SYM_MDELINFOS;
480   }
481   if (n == sym->header->curr_del3) {
482     if (delinfo_turn_3(sym, &sym->header->delinfos[sym->header->curr_del3])) {
483       SEN_LOG(sen_log_crit, "d3 failed: %d", DL_LD(&sym->header->delinfos[sym->header->curr_del3]));
484     }
485     sym->header->curr_del3 = (sym->header->curr_del3 + 1) & SEN_SYM_MDELINFOS;
486   }
487   sym->header->curr_del = n;
488   return res;
489 }
490 
491 /* sym operation */
492 
493 sen_sym *
sen_sym_create08(const char * path,uint32_t key_size,uint32_t flags,sen_encoding encoding)494 sen_sym_create08(const char *path, uint32_t key_size,
495                uint32_t flags, sen_encoding encoding)
496 {
497   int i;
498   sen_io *io;
499   sen_sym08 *sym = NULL;
500   pat_node *node0;
501   struct sen_sym_header *header;
502   io = sen_io_create(path, SEN_SYM_HEADER_SIZE, SEN_SYM_SEGMENT_SIZE,
503                      SEN_SYM_MAX_SEGMENT, sen_io_auto, SEN_SYM_MAX_SEGMENT);
504   if (!io) { return NULL; }
505   header = sen_io_header(io);
506   memcpy(header->idstr, SEN_IDSTR, 16);
507   header->flags = flags;
508   header->encoding = encoding;
509   header->key_size = key_size;
510   header->nrecords = 0;
511   header->curr_rec = 0;
512   header->curr_key = -1;
513   header->curr_del = 0;
514   header->curr_del2 = 0;
515   header->curr_del3 = 0;
516   header->lock = 0;
517   for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
518     header->segments[i] = segment_empty;
519   }
520   if (!(sym = SEN_GMALLOC(sizeof(sen_sym08)))) {
521     sen_io_close(io);
522     return NULL;
523   }
524   sym->v08p = 1;
525   sym->io = io;
526   sym->header = header;
527   sym->key_size = key_size;
528   sym->encoding = encoding;
529   sym->flags = flags;
530   sym->lock = &header->lock;
531   for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
532     sym->keyarray[i].segno = -1;
533     sym->keyarray[i].addr = NULL;
534     sym->patarray[i].segno = -1;
535     sym->patarray[i].addr = NULL;
536     sym->sisarray[i].segno = -1;
537     sym->sisarray[i].addr = NULL;
538   }
539   node0 = pat_get(sym, 0);
540   node0->r = 0;
541   node0->l = 0;
542   node0->key = 0;
543   return (sen_sym *)sym;
544 }
545 
546 sen_sym *
sen_sym_open08(const char * path)547 sen_sym_open08(const char *path)
548 {
549   int i;
550   sen_io *io;
551   sen_sym08 *sym = NULL;
552   struct sen_sym_header *header;
553   io = sen_io_open(path, sen_io_auto, 8192);
554   if (!io) { return NULL; }
555   header = sen_io_header(io);
556   /*
557   if (memcmp(header->idstr, SEN_IDSTR, 16)) {
558     puts("id not match");
559     sen_io_close(io);
560     return NULL;
561   }
562   */
563   if (!(sym = SEN_GMALLOC(sizeof(sen_sym08)))) {
564     puts("malloc failed");
565     sen_io_close(io);
566     return NULL;
567   }
568   sym->v08p = 1;
569   sym->io = io;
570   sym->header = header;
571   sym->key_size = header->key_size;
572   sym->encoding = header->encoding;
573   sym->flags = header->flags;
574   sym->lock = &header->lock;
575   for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
576     sym->keyarray[i].segno = -1;
577     sym->keyarray[i].addr = NULL;
578     sym->patarray[i].segno = -1;
579     sym->patarray[i].addr = NULL;
580     sym->sisarray[i].segno = -1;
581     sym->sisarray[i].addr = NULL;
582   }
583   load_all_segments(sym);
584   return (sen_sym *)sym;
585 }
586 
587 inline static sen_id
_sen_sym_get(sen_sym08 * sym,const uint8_t * key,uint32_t * new,uint32_t * lkey)588 _sen_sym_get(sen_sym08 *sym, const uint8_t *key, uint32_t *new, uint32_t *lkey)
589 {
590   sen_id r, r0, *p0, *p1 = NULL;
591   pat_node *rn, *rn0;
592   int c = -1, c0 = -1, c1 = -1, len;
593   size_t size = sym->key_size;
594   *new = 0;
595   if (!key) { return SEN_SYM_NIL; }
596   if (!size) { size = strlen((char *)key) + 1; }
597   len = (int)size * 8;
598   if (len > SEN_SYM_MAX_KEY_LENGTH) { return SEN_SYM_NIL; }
599   rn0 = pat_at(sym, 0); p0 = &rn0->r;
600   if (*p0) {
601     char xor, mask;
602     const char *s, *d;
603     while ((r0 = *p0)) {
604       if (!(rn0 = pat_at(sym, r0))) { return 0; }
605       if (c0 < rn0->check && rn0->check < len) {
606         c1 = c0; c0 = rn0->check;
607         p1 = p0; p0 = nth_bit(key, c0) ? &rn0->r : &rn0->l;
608       } else {
609         if (rn0->check < len && !memcmp(pat_node_get_key(sym, rn0), key, size)) {
610           return r0;
611         }
612         break;
613       }
614     }
615     for (c = 0, s = pat_node_get_key(sym, rn0), d = (char *)key; *s == *d; c += 8, s++, d++);
616     for (xor = *s ^ *d, mask = 0x80; !(xor & mask); mask >>= 1, c++);
617 
618     if (c == c0 && !*p0) {
619       if (c < len - 1) { c++; }
620     } else {
621       if (c < c0) {
622         if (c > c1) {
623           p0 = p1;
624         } else {
625           rn0 = pat_at(sym, 0); p0 = &rn0->r;
626           while ((r0 = *p0)) {
627             if (!(rn0 = pat_at(sym, r0))) { return 0; }
628             if (c < rn0->check) { break; }
629             p0 = nth_bit(key, rn0->check) ? &rn0->r : &rn0->l;
630           }
631         }
632       }
633     }
634     if (c >= len) { return 0; }
635   } else {
636     c = (len - 1 > SEN_SYM_MAX_KEY_LENGTH) ? SEN_SYM_MAX_KEY_LENGTH : len - 1;
637   }
638   {
639     size_t size2 = size > sizeof(uint32_t) ? size : 0;
640     if (*lkey && size2) {
641       if (sym->header->garbages[0]) {
642         r = sym->header->garbages[0];
643         sym->header->nrecords++;
644         rn = pat_at(sym, r);
645         sym->header->garbages[0] = rn->l;
646       } else {
647         if (!(rn = pat_node_new(sym, &r))) { return 0; }
648       }
649       SET_PAT_IMD(rn, 0);
650       rn->key = *lkey;
651     } else {
652       if (sym->header->garbages[size2]) {
653         r = sym->header->garbages[size2];
654         sym->header->nrecords++;
655         rn = pat_at(sym, r);
656         sym->header->garbages[size2] = rn->l;
657         memcpy(pat_node_get_key(sym, rn), key, size);
658       } else {
659         if (!(rn = pat_node_new(sym, &r))) { return 0; }
660         pat_node_set_key(sym, rn, key, (unsigned int)size);
661       }
662       *lkey = rn->key;
663     }
664   }
665   rn->check = c;
666   SET_PAT_DEL(rn, 0);
667   SET_PAT_PKT(rn, 0);
668   if (nth_bit(key, c)) {
669     rn->r = r;
670     rn->l = *p0;
671   } else {
672     rn->r = *p0;
673     rn->l = r;
674   }
675   *p0 = r;
676   *new = 1;
677   return r;
678 }
679 
680 inline static int
chop(sen_sym08 * sym,const char ** key,uint32_t * lkey)681 chop(sen_sym08 *sym, const char **key, uint32_t *lkey)
682 {
683   size_t len = sen_str_charlen(*key, sym->encoding);
684   if (len) {
685     *lkey += len;
686     *key += len;
687     return **key;
688   } else {
689     return 0;
690   }
691 }
692 
693 sen_id
sen_sym_get08(sen_sym * sym,const void * key)694 sen_sym_get08(sen_sym *sym, const void *key)
695 {
696   uint32_t new, lkey = 0;
697   sen_id r0;
698   r0 = _sen_sym_get(SYM, (uint8_t *)key, &new, &lkey);
699   if ((SYM->flags & SEN_SYM_WITH_SIS) && (*((uint8_t *)key) & 0x80)) { // todo: refine!!
700     sis_node *sl, *sr;
701     sen_id l = r0, r;
702     if (new && (sl = sis_get(SYM, l))) {
703       const char *sis = key;
704       sl->children = l;
705       sl->sibling = 0;
706       while (chop(SYM, &sis, &lkey)) {
707         if (!(*sis & 0x80)) { break; }
708         r = _sen_sym_get(SYM, (uint8_t *)sis, &new, &lkey);
709         if (!(sr = sis_get(SYM, r))) { break; }
710         if (new) {
711           sl->sibling = r;
712           sr->children = l;
713           sr->sibling = 0;
714         } else {
715           sl->sibling = sr->children;
716           sr->children = l;
717           break;
718         }
719         l = r;
720         sl = sr;
721       }
722     }
723   }
724   return r0;
725 }
726 
727 sen_id
sen_sym_at08(sen_sym * sym,const void * key)728 sen_sym_at08(sen_sym *sym, const void *key)
729 {
730   sen_id r;
731   pat_node *rn;
732   int c = -1;
733   size_t len = SYM->key_size * 8;
734   if (!key) { return SEN_SYM_NIL; }
735   if (!len) { len = (strlen(key) + 1) * 8; }
736   for (r = pat_at(SYM, 0)->r; r; r = nth_bit((uint8_t *)key, c) ? rn->r : rn->l) {
737     if (!(rn = pat_at(SYM, r))) { break; /* corrupt? */ }
738     if (len <= rn->check) { break; }
739     if (rn->check <= c) {
740       if (!memcmp(pat_node_get_key(SYM, rn), key, len >> 3)) { return r; }
741       break;
742     }
743     c = rn->check;
744   }
745   return SEN_SYM_NIL;
746 }
747 
748 static void
get_tc(sen_sym08 * sym,sen_set * h,pat_node * rn,unsigned int len)749 get_tc(sen_sym08 *sym, sen_set *h, pat_node *rn, unsigned int len)
750 {
751   sen_id id;
752   pat_node *node;
753   id = rn->r;
754   if (id && (node = pat_at(sym, id))) {
755     if (node->check > rn->check) {
756       sen_set_get(h, &id, NULL);
757       get_tc(sym, h, node, len);
758     } else if (node->check < len) {
759       sen_set_get(h, &id, NULL);
760     }
761   }
762   id = rn->l;
763   if (id && (node = pat_at(sym, id))) {
764     if (node->check > rn->check) {
765       sen_set_get(h, &id, NULL);
766       get_tc(sym, h, node, len);
767     } else if (node->check < len) {
768       sen_set_get(h, &id, NULL);
769     }
770   }
771 }
772 
773 sen_set *
sen_sym_prefix_search08(sen_sym * sym,const void * key)774 sen_sym_prefix_search08(sen_sym *sym, const void *key)
775 {
776   int c;
777   size_t len;
778   sen_id r;
779   pat_node *rn;
780   if (!key || SYM->key_size) { return NULL; } /* only for string */
781   len = strlen(key) * 8;
782   rn = pat_at(SYM, 0);
783   c = -1;
784   r = rn->r;
785   while (r) {
786     if (!(rn = pat_at(SYM, r))) { return NULL; }
787     if (c < rn->check && len > rn->check) {
788       c = rn->check;
789       r = nth_bit((uint8_t *)key, c) ? rn->r : rn->l;
790       continue;
791     }
792     if (!memcmp(pat_node_get_key(SYM, rn), key, len >> 3)) {
793       sen_set *h = sen_set_open(sizeof(sen_id), 0, 0);
794       SEN_ASSERT(h);
795 //      if (!h) { return NULL; }
796       sen_set_get(h, &r, NULL);
797       if (rn->check >= len) { get_tc(SYM, h, rn, (unsigned int)len); }
798       return h;
799     }
800     break;
801   }
802   return NULL;
803 }
804 
805 sen_set *
sen_sym_suffix_search08(sen_sym * sym,const void * key)806 sen_sym_suffix_search08(sen_sym *sym, const void *key)
807 {
808   sen_id r;
809   uint32_t *offset;
810   if (!key || SYM->key_size) { return NULL; }
811   if ((r = sen_sym_at08(sym, key))) {
812     sen_set *h = sen_set_open(sizeof(sen_id), sizeof(uint32_t), 0);
813     SEN_ASSERT(h);
814 //    if (!h) { return NULL; }
815     sen_set_get(h, &r, (void **) &offset);
816     *offset = 0;
817     if (SYM->flags & SEN_SYM_WITH_SIS) { sis_collect(SYM, h, r, 1); }
818     return h;
819   }
820   return NULL;
821 }
822 
823 sen_id
sen_sym_common_prefix_search08(sen_sym * sym,const void * key)824 sen_sym_common_prefix_search08(sen_sym *sym, const void *key)
825 {
826   pat_node *rn, *rnl;
827   sen_id r, rl, r2 = SEN_SYM_NIL;
828   int c = -1, cl, len = 0, n, len2 = 8, nth_bit;
829   if (!key) { return SEN_SYM_NIL; }
830   if (SYM->key_size) { return SEN_SYM_NIL; /* only for string */ }
831   // len = (strlen(key) + 1) * 8;
832   for (r = pat_at(SYM, 0)->r; r;) {
833     if (!(rn = pat_at(SYM, r))) { break; /* corrupt? */ }
834     // if (len <= rn->check) { break; }
835     if (rn->check <= c) {
836       char *p = pat_node_get_key(SYM, rn);
837       if (!memcmp(p, key, strlen(p))) { return r; }
838       break;
839     }
840     c = rn->check;
841     n = c >> 3;
842     while (len < n) { if (!((char *)key)[len++]) { goto exit; } }
843     nth_bit = ((uint8_t *)key)[n] & (0x80 >> (c & 7));
844     if (nth_bit) {
845       if (len2 <= c) {
846         len2 = (n + 1) * 8;
847         for (cl = c, rl = rn->l; rl; cl = rnl->check, rl = rnl->l) {
848           if (!(rnl = pat_at(SYM, rl))) { break; /* corrupt? */ }
849           if (len2 <= rnl->check) { break; }
850           if (rnl->check <= cl) {
851             char *p = pat_node_get_key(SYM, rnl);
852             if (!p[n] && !memcmp(p, key, n)) { r2 = rl; }
853             break;
854           }
855         }
856       }
857       r = rn->r;
858     } else {
859       r = rn->l;
860     }
861   }
862 exit :
863   return r2;
864 }
865 
866 inline static sen_rc
_sen_sym_del(sen_sym08 * sym,const char * key,int shared)867 _sen_sym_del(sen_sym08 *sym, const char *key, int shared)
868 {
869   sen_sym_delinfo *di;
870   uint8_t direction;
871   pat_node *rn, *rn0 = NULL, *rno;
872   int c, c0 = -1;
873   size_t len = sym->key_size * 8;
874   sen_id r, otherside, *p, *p0 = NULL;
875   if (!key) { return sen_invalid_argument; }
876   if (!len) { len = (strlen(key) + 1) * 8; }
877   di = delinfo_new(sym); /* must be called before find rn */
878   SET_DL_SH(di, shared ? DL_SHARED : 0);
879   rn = pat_at(sym, 0);
880   c = -1;
881   p = &rn->r;
882   for (;;) {
883     if (!(r = *p)) { return sen_invalid_argument; }
884     if (!(rn = pat_at(sym, r))) { return sen_invalid_format; }
885     if (len <= rn->check) { return sen_invalid_argument; }
886     if (c >= rn->check) {
887       if (!memcmp(pat_node_get_key(sym, rn), key, len >> 3)) {
888         break; /* found */
889       } else {  return sen_invalid_argument; }
890     }
891     c0 = c; c = rn->check;
892     p0 = p; p = nth_bit((uint8_t *)key, c) ? &rn->r : &rn->l;
893     rn0 = rn;
894   }
895   direction = (rn0->r == r);
896   otherside = direction ? rn0->l : rn0->r;
897   if (rn == rn0) {
898     SET_DL_ST(di, DL_PHASE2);
899     di->d = r;
900     if (otherside) {
901       rno = pat_at(sym, otherside);
902       if (rno && c0 < rno->check && rno->check <= c) {
903         if (!delinfo_search(sym, otherside)) {
904           SEN_LOG(sen_log_error, "no delinfo found %d", otherside);
905         }
906         rno->check = 0;
907       }
908     }
909     *p0 = otherside;
910   } else {
911     sen_sym_delinfo *ldi = NULL, *ddi = NULL;
912     if (PAT_DEL(rn)) { ldi = delinfo_search(sym, r); }
913     if (PAT_DEL(rn0)) { ddi = delinfo_search(sym, *p0); }
914     if (ldi) {
915       SET_PAT_DEL(rn, 0);
916       SET_DL_ST(di, DL_PHASE2);
917       if (ddi) {
918         SET_PAT_DEL(rn0, 0);
919         SET_DL_ST(ddi, DL_PHASE2);
920         if (ddi == ldi) {
921           if (r != DL_LD(ddi)) {
922             SEN_LOG(sen_log_error, "r(%d) != DL_LD(ddi)(%d)", r, DL_LD(ddi));
923           }
924           di->d = r;
925         } else {
926           SET_DL_LD(ldi, DL_LD(ddi));
927           di->d = r;
928         }
929       } else {
930         SET_PAT_DEL(rn0, PAT_DELETING);
931         SET_DL_LD(ldi, *p0);
932         di->d = r;
933       }
934     } else {
935       SET_PAT_DEL(rn, PAT_DELETING);
936       if (ddi) {
937         if (ddi->d != *p0) {
938           SEN_LOG(sen_log_error, "ddi->d(%d) != *p0(%d)", ddi->d, *p0);
939         }
940         SET_PAT_DEL(rn0, 0);
941         SET_DL_ST(ddi, DL_PHASE2);
942         SET_DL_ST(di, DL_PHASE1);
943         SET_DL_LD(di, DL_LD(ddi));
944         di->d = r;
945         /*
946         SET_PAT_DEL(rn0, 0);
947         ddi->d = r;
948         SET_DL_ST(di, DL_PHASE2);
949         di->d = *p0;
950         */
951       } else {
952         SET_PAT_DEL(rn0, PAT_DELETING);
953         SET_DL_ST(di, DL_PHASE1);
954         SET_DL_LD(di, *p0);
955         di->d = r;
956         // sen_log("sym_del d=%d ld=%d stat=%d", r, *p0, DL_PHASE1);
957       }
958     }
959     if (*p0 == otherside) {
960       rn0->check = 0;
961     } else {
962       if (otherside) {
963         rno = pat_at(sym, otherside);
964         if (rno && c0 < rno->check && rno->check <= c) {
965           if (!delinfo_search(sym, otherside)) {
966             SEN_LOG(sen_log_error, "no delinfo found %d", otherside);
967           }
968           rno->check = 0;
969         }
970       }
971       *p0 = otherside;
972     }
973   }
974   sym->header->nrecords--;
975   return sen_success;
976 }
977 
978 sen_rc
sen_sym_del08(sen_sym * sym,const void * key)979 sen_sym_del08(sen_sym *sym, const void *key)
980 {
981   return _sen_sym_del(SYM, key, 0);
982 }
983 
984 const char *
_sen_sym_key08(sen_sym * sym,sen_id id)985 _sen_sym_key08(sen_sym *sym, sen_id id)
986 {
987   pat_node *node = pat_at(SYM, id);
988   if (!node) { return NULL; }
989   return pat_node_get_key(SYM, node);
990 }
991 
992 int
sen_sym_key08(sen_sym * sym,sen_id id,void * keybuf,int bufsize)993 sen_sym_key08(sen_sym *sym, sen_id id, void *keybuf, int bufsize)
994 {
995   int len;
996   char *key;
997   pat_node *node;
998   if (!(node = pat_at(SYM, id))) { return 0; }
999   if (!(key = pat_node_get_key(SYM, node))) { return 0; }
1000   if (!(len = SYM->key_size)) { len = (int)(strlen(key) + 1); }
1001   if (keybuf && bufsize >= len) { memcpy(keybuf, key, len); }
1002   return len;
1003 }
1004 
1005 int
sen_sym_pocket_get08(sen_sym * sym,sen_id id)1006 sen_sym_pocket_get08(sen_sym *sym, sen_id id)
1007 {
1008   pat_node *node = pat_at(SYM, id);
1009   if (!node) { return -1; }
1010   return PAT_PKT(node);
1011 }
1012 
1013 sen_rc
sen_sym_pocket_set08(sen_sym * sym,sen_id id,unsigned int value)1014 sen_sym_pocket_set08(sen_sym *sym, sen_id id, unsigned int value)
1015 {
1016   pat_node *node = pat_at(SYM, id);
1017   if (value > PAT_POCKET_MAX || !node) { return sen_invalid_argument; }
1018   SET_PAT_PKT(node, value);
1019   return sen_success;
1020 }
1021 
1022 int
sen_sym_del_with_sis08(sen_sym * sym,sen_id id,int (* func)(sen_id,void *),void * func_arg)1023 sen_sym_del_with_sis08(sen_sym *sym, sen_id id,
1024                      int(*func)(sen_id, void *), void *func_arg)
1025 {
1026   int level = 0, shared;
1027   const char *key = NULL, *_key;
1028   sis_node *sp, *ss = NULL, *si = sis_at(SYM, id);
1029   while (id) {
1030     if ((si && si->children && si->children != id) || !func(id, func_arg)) { break; }
1031     if (!(_key = _sen_sym_key08(sym, id))) { break; }
1032     if (_key == key) {
1033       shared = 1;
1034     } else {
1035       key = _key;
1036       shared = 0;
1037     }
1038     _sen_sym_del(SYM, key, shared);
1039     if (si) {
1040       sen_id *p, sid;
1041       uint32_t lkey = 0;
1042       if ((*key & 0x80) && chop(SYM, &key, &lkey)) {
1043         if ((sid = sen_sym_at08(sym, key)) && (ss = sis_at(SYM, sid))) {
1044           for (p = &ss->children; *p && *p != sid; p = &sp->sibling) {
1045             if (*p == id) {
1046               *p = si->sibling;
1047               break;
1048             }
1049             sp = sis_at(SYM, *p);
1050           }
1051         }
1052       } else {
1053         sid = SEN_SYM_NIL;
1054       }
1055       si->sibling = 0;
1056       si->children = 0;
1057       id = sid;
1058       si = ss;
1059     } else {
1060       id = SEN_SYM_NIL;
1061     }
1062     level++;
1063   }
1064   if (level) {
1065     uint32_t lkey = 0;
1066     while (id && key) {
1067       if (_sen_sym_key08(sym, id) != key) { break; }
1068       {
1069         pat_node *rn = pat_at(SYM, id);
1070         if (lkey) {
1071           rn->key = lkey;
1072         } else {
1073           pat_node_set_key(SYM, rn, (uint8_t *)key, (unsigned int)(strlen(key) + 1));
1074           lkey = rn->key;
1075         }
1076       }
1077       if (!((*key & 0x80) && chop(SYM, &key, &lkey))) { break; }
1078       id = sen_sym_at08(sym, key);
1079     }
1080   }
1081   return level;
1082 }
1083