1 /* Copyright(C) 2004 Brazil
2 
3   This library is free software; you can redistribute it and/or
4   modify it under the terms of the GNU Lesser General Public
5   License as published by the Free Software Foundation; either
6   version 2.1 of the License, or (at your option) any later version.
7 
8   This library is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11   Lesser General Public License for more details.
12 
13   You should have received a copy of the GNU Lesser General Public
14   License along with this library; if not, write to the Free Software
15   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 #include "senna_in.h"
18 #include <fcntl.h>
19 #include <string.h>
20 #include <sys/stat.h>
21 #include "sym.h"
22 
23 /* v08 functions */
24 
25 sen_sym *sen_sym_create08(const char *path, uint32_t key_size,
26                           uint32_t flags, sen_encoding encoding);
27 sen_sym *sen_sym_open08(const char *path);
28 sen_id sen_sym_get08(sen_sym *sym, const void *key);
29 sen_id sen_sym_at08(sen_sym *sym, const void *key);
30 sen_set *sen_sym_prefix_search08(sen_sym *sym, const void *key);
31 sen_set *sen_sym_suffix_search08(sen_sym *sym, const void *key);
32 sen_id sen_sym_common_prefix_search08(sen_sym *sym, const void *key);
33 sen_rc sen_sym_del08(sen_sym *sym, const void *key);
34 const char *_sen_sym_key08(sen_sym *sym, sen_id id);
35 int sen_sym_key08(sen_sym *sym, sen_id id, void *keybuf, int bufsize);
36 int sen_sym_pocket_get08(sen_sym *sym, sen_id id);
37 sen_rc sen_sym_pocket_set08(sen_sym *sym, sen_id id, unsigned int value);
38 int sen_sym_del_with_sis08(sen_sym *sym, sen_id id,
39                            int(*func)(sen_id, void *), void *func_arg);
40 
41 #define SEN_SYM_HEADER_SIZE 0x10000
42 #define SEN_IDSTR "SENNA:SYM:01.00"
43 #define SEN_SYM_DELETED (SEN_SYM_MAX_ID + 1)
44 
45 #define SEN_SYM_SEGMENT_SIZE 0x400000
46 #define W_OF_KEY_IN_A_SEGMENT 22
47 #define W_OF_PAT_IN_A_SEGMENT 18
48 #define W_OF_SIS_IN_A_SEGMENT 19
49 #define KEY_MASK_IN_A_SEGMENT 0x3fffff
50 #define PAT_MASK_IN_A_SEGMENT 0x3ffff
51 #define SIS_MASK_IN_A_SEGMENT 0x7ffff
52 #define SEG_NOT_ASSIGNED 0xffff
53 
54 struct sen_sym_header {
55   char idstr[16];
56   uint32_t flags;
57   sen_encoding encoding;
58   uint32_t key_size;
59   uint32_t nrecords;
60   uint32_t curr_rec;
61   int32_t curr_key;
62   int32_t curr_del;
63   int32_t curr_del2;
64   int32_t curr_del3;
65   uint32_t lock;
66   uint32_t reserved[18];
67   uint16_t keyarray[SEN_SYM_MAX_SEGMENT];
68   uint16_t patarray[SEN_SYM_MAX_SEGMENT];
69   uint16_t sisarray[SEN_SYM_MAX_SEGMENT];
70   sen_sym_delinfo delinfos[SEN_SYM_NDELINFOS];
71   sen_id garbages[(SEN_SYM_MAX_KEY_LENGTH + 1) / 8];
72 };
73 
74 typedef struct {
75   sen_id r;
76   sen_id l;
77   uint32_t key;
78   uint16_t check;
79   /* deleting : 1, immediate : 1, pocket : 14 */
80   uint16_t bitfield;
81 } pat_node;
82 
83 #define PAT_DELETING 1
84 #define PAT_IMMEDIATE 2
85 #define PAT_POCKET_MAX 0x3fff
86 #define PAT_DEL(x) ((x)->bitfield & PAT_DELETING)
87 #define PAT_IMD(x) ((x)->bitfield & PAT_IMMEDIATE)
88 #define PAT_PKT(x) ((x)->bitfield >> 2)
89 #define SET_PAT_DEL(x,v) ((x)->bitfield = ((x)->bitfield & 0xfffe) | (v))
90 #define SET_PAT_IMD(x,v) ((x)->bitfield = ((x)->bitfield & 0xfffd) | (v))
91 #define SET_PAT_PKT(x,v) ((x)->bitfield = ((x)->bitfield & 3) | ((v) << 2))
92 
93 typedef struct {
94   sen_id children;
95   sen_id sibling;
96 } sis_node;
97 
98 enum {
99   segment_empty = 0,
100   segment_key,
101   segment_pat,
102   segment_sis
103 };
104 
105 /* bit operation */
106 
107 /*
108 inline static uint32_t
109 nth_bit(const byte *key, uint32_t n)
110 {
111   return key[n >> 3] & (0x80 >> (n & 7));
112 }
113 */
114 
115 #define nth_bit(key,n) (((key)[(n)>>3]) & (0x80 >> ((n) & 7)))
116 
117 /* segment operation */
118 
119 inline static sen_rc
segment_new(sen_sym * sym,int segtype)120 segment_new(sen_sym *sym, int segtype)
121 {
122   int i;
123   uint16_t seg = 0, k = 0, p = 0, s = 0;
124   char used[SEN_SYM_MAX_SEGMENT];
125   memset(used, 0, SEN_SYM_MAX_SEGMENT);
126   for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
127     if ((seg = sym->header->keyarray[i]) != SEG_NOT_ASSIGNED) {
128       k = i + 1; used[seg] = 1;
129     }
130     if ((seg = sym->header->patarray[i]) != SEG_NOT_ASSIGNED) {
131       p = i + 1; used[seg] = 1;
132     }
133     if ((seg = sym->header->sisarray[i]) != SEG_NOT_ASSIGNED) {
134       s = i + 1; used[seg] = 1;
135     }
136   }
137   for (seg = 0; used[seg];) {
138     if (++seg == SEN_SYM_MAX_SEGMENT) { return sen_memory_exhausted; }
139   }
140   switch (segtype) {
141   case segment_key :
142     sym->header->keyarray[k] = seg;
143     sym->header->curr_key = k * SEN_SYM_SEGMENT_SIZE;
144     break;
145   case segment_pat :
146     sym->header->patarray[p] = seg;
147     break;
148   case segment_sis :
149     sym->header->sisarray[s] = seg;
150     break;
151   }
152   return sen_success;
153 }
154 
155 #define SEG_LOAD(io,segno,addr)\
156 {\
157   if (segno < SEN_SYM_MAX_SEGMENT) {\
158     SEN_IO_SEG_REF(io, segno, addr);\
159     if (addr) { SEN_IO_SEG_UNREF(io, segno); };\
160   }\
161 }
162 
163 /* patricia array operation */
164 
165 inline static pat_node *
pat_at(sen_sym * sym,sen_id id)166 pat_at(sen_sym *sym, sen_id id)
167 {
168   uint16_t lseg, pseg;
169   void **p;
170   if (id > SEN_SYM_MAX_ID) { return NULL; }
171   lseg = id >> W_OF_PAT_IN_A_SEGMENT;
172   p = &sym->pataddrs[lseg];
173   if (!*p) {
174     if ((pseg = sym->header->patarray[lseg]) == SEG_NOT_ASSIGNED) { return NULL; }
175     SEG_LOAD(sym->io, pseg, *p);
176     if (!*p) { return NULL; }
177   }
178   return (pat_node *)(((byte *)*p) + ((id & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));
179 }
180 
181 #define PAT_AT(sym,id,n) do {\
182   void **p_;\
183   sen_id id_ = (id);\
184   uint16_t lseg_ = id_ >> W_OF_PAT_IN_A_SEGMENT;\
185   if (id_ > SEN_SYM_MAX_ID) { (n) = NULL; break; }\
186   p_ = &(sym)->pataddrs[lseg_];\
187   if (!*p_) {\
188     uint16_t pseg_ = (sym)->header->patarray[lseg_];\
189     if (pseg_ == SEG_NOT_ASSIGNED) { (n) = NULL; break; }\
190     SEG_LOAD((sym)->io, pseg_, *p_);\
191     if (!*p_) { (n) = NULL; break; }\
192   }\
193   (n) = (pat_node *)(((byte *)*p_) + ((id_ & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));\
194 } while(0)
195 
196 inline static pat_node *
pat_get(sen_sym * sym,sen_id id)197 pat_get(sen_sym *sym, sen_id id)
198 {
199   uint16_t lseg, pseg;
200   void **p;
201   if (id > SEN_SYM_MAX_ID) { return NULL; }
202   lseg = id >> W_OF_PAT_IN_A_SEGMENT;
203   p = &sym->pataddrs[lseg];
204   if (!*p) {
205     while ((pseg = sym->header->patarray[lseg]) == SEG_NOT_ASSIGNED) {
206       segment_new(sym, segment_pat);
207     }
208     SEG_LOAD(sym->io, pseg, *p);
209     if (!*p) { return NULL; }
210   }
211   return (pat_node *)(((byte *)*p) + ((id & PAT_MASK_IN_A_SEGMENT) * sizeof(pat_node)));
212 }
213 
214 inline static pat_node *
pat_node_new(sen_sym * sym,sen_id * id)215 pat_node_new(sen_sym *sym, sen_id *id)
216 {
217   uint32_t n = sym->header->curr_rec + 1;
218   pat_node *res;
219   if (n > SEN_SYM_MAX_ID) { return NULL; }
220   if ((res = pat_get(sym, n))) {
221     sym->header->curr_rec = n;
222     sym->header->nrecords++;
223   }
224   if (id) { *id = n; }
225   return res;
226 }
227 
228 /* sis operation */
229 
230 inline static sis_node *
sis_at(sen_sym * sym,sen_id id)231 sis_at(sen_sym *sym, sen_id id)
232 {
233   uint16_t lseg, pseg;
234   void **p;
235   if (id > SEN_SYM_MAX_ID) { return NULL; }
236   lseg = id >> W_OF_SIS_IN_A_SEGMENT;
237   p = &sym->sisaddrs[lseg];
238   if (!*p) {
239     if ((pseg = sym->header->sisarray[lseg]) == SEG_NOT_ASSIGNED) { return NULL; }
240     SEG_LOAD(sym->io, pseg, *p);
241     if (!*p) { return NULL; }
242   }
243   return (sis_node *)(((byte *)*p) + ((id & SIS_MASK_IN_A_SEGMENT) * sizeof(sis_node)));
244 }
245 
246 inline static sis_node *
sis_get(sen_sym * sym,sen_id id)247 sis_get(sen_sym *sym, sen_id id)
248 {
249   uint16_t lseg, pseg;
250   void **p;
251   if (id > SEN_SYM_MAX_ID) { return NULL; }
252   lseg = id >> W_OF_SIS_IN_A_SEGMENT;
253   p = &sym->sisaddrs[lseg];
254   if (!*p) {
255     while ((pseg = sym->header->sisarray[lseg]) == SEG_NOT_ASSIGNED) {
256       segment_new(sym, segment_sis);
257     }
258     SEG_LOAD(sym->io, pseg, *p);
259     if (!*p) { return NULL; }
260   }
261   return (sis_node *)(((byte *)*p) + ((id & SIS_MASK_IN_A_SEGMENT) * sizeof(sis_node)));
262 }
263 
264 #define MAX_LEVEL 16
265 
266 static void
sis_collect(sen_sym * sym,sen_set * h,sen_id id,uint32_t level)267 sis_collect(sen_sym *sym, sen_set *h, sen_id id, uint32_t level)
268 {
269   uint32_t *offset;
270   sis_node *sl = sis_at(sym, id);
271   if (sl) {
272     sen_id sid = sl->children;
273     while (sid && sid != id) {
274       if (sen_set_get(h, &sid, (void **) &offset)) {
275         *offset = level;
276         if (level < MAX_LEVEL) { sis_collect(sym, h, sid, level + 1); }
277         if (!(sl = sis_at(sym, sid))) { break; }
278         sid = sl->sibling;
279       } else {
280         /* todo : must be handled */
281       }
282     }
283   }
284 }
285 
286 /* key operation */
287 
288 #define KEY_AT(sym,pos,ptr) \
289 {\
290   uint16_t lseg, pseg;\
291   void **p;\
292   lseg = pos >> W_OF_KEY_IN_A_SEGMENT;\
293   p = &sym->keyaddrs[lseg];\
294   if (*p) {\
295     ptr = ((byte *)*p) + (pos & KEY_MASK_IN_A_SEGMENT);\
296   } else {\
297     if ((pseg = sym->header->keyarray[lseg]) == SEG_NOT_ASSIGNED) {\
298       ptr = NULL;\
299     } else {\
300       SEG_LOAD(sym->io, pseg, *p);\
301       ptr = *p ? ((byte *)*p) + (pos & KEY_MASK_IN_A_SEGMENT) : NULL;\
302     }\
303   }\
304 }
305 
306 inline static uint32_t
key_put(sen_sym * sym,const uint8_t * key,int len)307 key_put(sen_sym *sym, const uint8_t *key, int len)
308 {
309   int lpos;
310   uint32_t res = sym->header->curr_key;
311   if (len >= SEN_SYM_SEGMENT_SIZE) { return 0; }
312   lpos = res & KEY_MASK_IN_A_SEGMENT;
313   if (lpos + len > SEN_SYM_SEGMENT_SIZE || !lpos) {
314     if (segment_new(sym, segment_key)) { return 0; }
315     res = sym->header->curr_key;
316   }
317   {
318     uint8_t *dest;
319     KEY_AT(sym, res, dest);
320     if (!dest) { return 0; }
321     memcpy(dest, key, len);
322   }
323   sym->header->curr_key += len;
324   return res;
325 }
326 
327 inline static uint8_t *
pat_node_get_key(sen_sym * sym,pat_node * n)328 pat_node_get_key(sen_sym *sym, pat_node *n)
329 {
330   if (PAT_IMD(n)) {
331     return (uint8_t *) &n->key;
332   } else {
333     uint8_t *res;
334     KEY_AT(sym, n->key, res);
335     return res;
336   }
337 }
338 
339 inline static sen_rc
pat_node_set_key(sen_sym * sym,pat_node * n,const uint8_t * key,unsigned int len)340 pat_node_set_key(sen_sym *sym, pat_node *n, const uint8_t *key, unsigned int len)
341 {
342   if (!key || !len) { return sen_invalid_argument; }
343   if (len <= sizeof(uint32_t)) {
344     SET_PAT_IMD(n, PAT_IMMEDIATE);
345     memcpy(&n->key, key, len);
346   } else {
347     SET_PAT_IMD(n, 0);
348     n->key = key_put(sym, key, len);
349   }
350   return sen_success;
351 }
352 
353 /* delinfo operation */
354 
355 #define DL_EMPTY  0x00000000
356 #define DL_PHASE1 0x00000001
357 #define DL_PHASE2 0x00000002
358 #define DL_STMASK 0x00000003
359 #define DL_SHARED 0x80000000
360 #define DL_LD(x) (((x)->bitfield >> 2) & SEN_SYM_MAX_ID)
361 #define DL_ST(x) ((x)->bitfield & DL_STMASK)
362 #define DL_SH(x) ((x)->bitfield & DL_SHARED)
363 #define SET_DL_LD(x,v) ((x)->bitfield = ((x)->bitfield & (DL_STMASK|DL_SHARED)) | \
364                                         (((v) & SEN_SYM_MAX_ID) << 2))
365 #define SET_DL_ST(x,v) ((x)->bitfield = ((x)->bitfield & ~DL_STMASK) | ((v) & DL_STMASK))
366 #define SET_DL_SH(x,v) ((x)->bitfield = ((x)->bitfield & ~DL_SHARED) | ((v) & DL_SHARED))
367 
368 inline static sen_sym_delinfo *
delinfo_search(sen_sym * sym,sen_id id)369 delinfo_search(sen_sym *sym, sen_id id)
370 {
371   int i;
372   sen_sym_delinfo *di;
373   for (i = (sym->header->curr_del2) & SEN_SYM_MDELINFOS;
374        i != sym->header->curr_del;
375        i = (i + 1) & SEN_SYM_MDELINFOS) {
376     di = &sym->header->delinfos[i];
377     if (DL_ST(di) != DL_PHASE1) { continue; }
378     if (DL_LD(di) == id) { return di; }
379     if (di->d == id) { return di; }
380   }
381   return NULL;
382 }
383 
384 inline static sen_rc
delinfo_turn_2(sen_sym * sym,sen_sym_delinfo * di)385 delinfo_turn_2(sen_sym *sym, sen_sym_delinfo *di)
386 {
387   sen_id d, *p = NULL;
388   pat_node *ln, *dn;
389   // sen_log("delinfo_turn_2> di->d=%d di->ld=%d stat=%d", di->d, DL_LD(di), DL_ST(di));
390   if (DL_ST(di) != DL_PHASE1) { return sen_success; }
391   PAT_AT(sym, DL_LD(di), ln);
392   if (!ln) { return sen_invalid_argument; }
393   if (!(d = di->d)) { return sen_invalid_argument; }
394   PAT_AT(sym, d, dn);
395   if (!dn) { return sen_invalid_argument; }
396   SET_PAT_DEL(ln, 0);
397   SET_PAT_DEL(dn, 0);
398   {
399     sen_id r, *p0;
400     pat_node *rn;
401     int c;
402     size_t len = sym->key_size * 8;
403     const uint8_t *key = pat_node_get_key(sym, dn);
404     if (!key) { return sen_invalid_argument; }
405     if (!len) { len = (strlen((char *)key) + 1) * 8; }
406     PAT_AT(sym, 0, rn);
407     c = -1;
408     p0 = &rn->r;
409     while ((r = *p0)) {
410       if (r == d) {
411         p = p0;
412         break;
413       }
414       PAT_AT(sym, r, rn);
415       if (!rn) { return sen_invalid_format; }
416       if (rn->check <= c || len <= rn->check) { break; }
417       c = rn->check;
418       p0 = nth_bit((uint8_t *)key, c) ? &rn->r : &rn->l;
419     }
420   }
421   if (p) {
422     ln->check = dn->check;
423     ln->r = dn->r;
424     ln->l = dn->l;
425     *p = DL_LD(di);
426   } else {
427     /* debug */
428     int j;
429     sen_id dd;
430     sen_sym_delinfo *ddi;
431     SEN_LOG(sen_log_debug, "failed to find d=%d", d);
432     for (j = (sym->header->curr_del2 + 1) & SEN_SYM_MDELINFOS;
433          j != sym->header->curr_del;
434          j = (j + 1) & SEN_SYM_MDELINFOS) {
435       ddi = &sym->header->delinfos[j];
436       if (DL_ST(ddi) != DL_PHASE1) { continue; }
437       PAT_AT(sym, DL_LD(ddi), ln);
438       if (!ln) { continue; }
439       if (!(dd = ddi->d)) { continue; }
440       if (d == DL_LD(ddi)) {
441         SEN_LOG(sen_log_debug, "found!!!, d(%d) become ld of (%d)", d, dd);
442       }
443     }
444     /* debug */
445   }
446   SET_DL_ST(di, DL_PHASE2);
447   di->d = d;
448   // sen_log("delinfo_turn_2< di->d=%d di->ld=%d", di->d, DL_LD(di));
449   return sen_success;
450 }
451 
452 inline static sen_rc
delinfo_turn_3(sen_sym * sym,sen_sym_delinfo * di)453 delinfo_turn_3(sen_sym *sym, sen_sym_delinfo *di)
454 {
455   pat_node *dn;
456   uint32_t size;
457   if (DL_ST(di) != DL_PHASE2) { return sen_success; }
458   PAT_AT(sym, di->d, dn);
459   if (!dn) { return sen_invalid_argument; }
460   if (DL_SH(di)) {
461     SET_PAT_IMD(dn, PAT_IMMEDIATE);
462     size = 0;
463   } else {
464     if (sym->key_size) {
465       size = sym->key_size;
466     } else {
467       // size = dn->check = (uint16_t)(strlen(key) + 1);
468       if (PAT_IMD(dn)) {
469         size = 0;
470       } else {
471         const uint8_t *key = pat_node_get_key(sym, dn);
472         if (!key) { return sen_invalid_argument; }
473         size = strlen((char *)key) + 1;
474       }
475     }
476   }
477   SET_DL_ST(di, DL_EMPTY);
478   dn->r = SEN_SYM_DELETED;
479   dn->l = sym->header->garbages[size];
480   sym->header->garbages[size] = di->d;
481   return sen_success;
482 }
483 
484 inline static sen_sym_delinfo *
delinfo_new(sen_sym * sym)485 delinfo_new(sen_sym *sym)
486 {
487   sen_sym_delinfo *res = &sym->header->delinfos[sym->header->curr_del];
488   uint32_t n = (sym->header->curr_del + 1) & SEN_SYM_MDELINFOS;
489   int gap = ((n + SEN_SYM_NDELINFOS - sym->header->curr_del2) & SEN_SYM_MDELINFOS)
490             - (SEN_SYM_NDELINFOS / 2);
491   while (gap-- > 0) {
492     if (delinfo_turn_2(sym, &sym->header->delinfos[sym->header->curr_del2])) {
493       SEN_LOG(sen_log_crit, "d2 failed: %d", DL_LD(&sym->header->delinfos[sym->header->curr_del2]));
494     }
495     sym->header->curr_del2 = (sym->header->curr_del2 + 1) & SEN_SYM_MDELINFOS;
496   }
497   if (n == sym->header->curr_del3) {
498     if (delinfo_turn_3(sym, &sym->header->delinfos[sym->header->curr_del3])) {
499       SEN_LOG(sen_log_crit, "d3 failed: %d", DL_LD(&sym->header->delinfos[sym->header->curr_del3]));
500     }
501     sym->header->curr_del3 = (sym->header->curr_del3 + 1) & SEN_SYM_MDELINFOS;
502   }
503   sym->header->curr_del = n;
504   return res;
505 }
506 
507 /* sym operation */
508 
509 sen_sym *
sen_sym_create(const char * path,uint32_t key_size,uint32_t flags,sen_encoding encoding)510 sen_sym_create(const char *path, uint32_t key_size,
511                uint32_t flags, sen_encoding encoding)
512 {
513   int i;
514   sen_io *io;
515   sen_sym *sym;
516   pat_node *node0;
517   struct sen_sym_header *header;
518   ERRCLR(NULL);
519   if (flags & 0x70000) {
520     return sen_sym_create08(path, key_size, flags, encoding);
521   }
522   io = sen_io_create(path, SEN_SYM_HEADER_SIZE, SEN_SYM_SEGMENT_SIZE,
523                      SEN_SYM_MAX_SEGMENT, sen_io_auto, SEN_SYM_MAX_SEGMENT);
524   if (!io) { return NULL; }
525   if (encoding == sen_enc_default) { encoding = sen_gctx.encoding; }
526   header = sen_io_header(io);
527   memcpy(header->idstr, SEN_IDSTR, 16);
528   header->flags = flags;
529   header->encoding = encoding;
530   header->key_size = key_size;
531   header->nrecords = 0;
532   header->curr_rec = 0;
533   header->curr_key = -1;
534   header->curr_del = 0;
535   header->curr_del2 = 0;
536   header->curr_del3 = 0;
537   header->lock = 0;
538   for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
539     header->keyarray[i] = SEG_NOT_ASSIGNED;
540     header->patarray[i] = SEG_NOT_ASSIGNED;
541     header->sisarray[i] = SEG_NOT_ASSIGNED;
542   }
543   if (!(sym = SEN_GMALLOC(sizeof(sen_sym)))) {
544     sen_io_close(io);
545     return NULL;
546   }
547   sym->v08p = 0;
548   sym->io = io;
549   sym->header = header;
550   sym->key_size = key_size;
551   sym->encoding = encoding;
552   sym->flags = flags;
553   sym->lock = &header->lock;
554   for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
555     sym->keyaddrs[i] = NULL;
556     sym->pataddrs[i] = NULL;
557     sym->sisaddrs[i] = NULL;
558   }
559   if (!(node0 = pat_get(sym, 0))) {
560     sen_io_close(io);
561     SEN_GFREE(sym);
562     return NULL;
563   }
564   node0->r = 0;
565   node0->l = 0;
566   node0->key = 0;
567   return sym;
568 }
569 
570 sen_sym *
sen_sym_open(const char * path)571 sen_sym_open(const char *path)
572 {
573   int i;
574   sen_io *io;
575   sen_sym *sym;
576   pat_node *r0;
577   struct sen_sym_header *header;
578   ERRCLR(NULL);
579   io = sen_io_open(path, sen_io_auto, 8192);
580   if (!io) { return NULL; }
581   header = sen_io_header(io);
582   if (memcmp(header->idstr, SEN_IDSTR, 16)) {
583     SEN_LOG(sen_log_notice, "sym_idstr (%s)", header->idstr);
584     sen_io_close(io);
585     return sen_sym_open08(path);
586   }
587   if (!(sym = SEN_GMALLOC(sizeof(sen_sym)))) {
588     sen_io_close(io);
589     return NULL;
590   }
591   sym->v08p = 0;
592   sym->io = io;
593   sym->header = header;
594   sym->key_size = header->key_size;
595   sym->encoding = header->encoding;
596   sym->flags = header->flags;
597   sym->lock = &header->lock;
598   for (i = 0; i < SEN_SYM_MAX_SEGMENT; i++) {
599     sym->keyaddrs[i] = NULL;
600     sym->pataddrs[i] = NULL;
601     sym->sisaddrs[i] = NULL;
602   }
603   PAT_AT(sym, 0, r0);
604   if (!r0) {
605     sen_io_close(io);
606     SEN_GFREE(sym);
607     return NULL;
608   }
609   return sym;
610 }
611 
612 sen_rc
sen_sym_close(sen_sym * sym)613 sen_sym_close(sen_sym *sym)
614 {
615   sen_rc rc;
616   if (!sym) { return sen_invalid_argument; }
617   rc = sen_io_close(sym->io);
618   SEN_GFREE(sym);
619   return rc;
620 }
621 
622 sen_rc
sen_sym_remove(const char * path)623 sen_sym_remove(const char *path)
624 {
625   ERRCLR(NULL);
626   if (!path) { return sen_invalid_argument; }
627   return sen_io_remove(path);
628 }
629 
630 inline static sen_id
_sen_sym_get(sen_sym * sym,const uint8_t * key,uint32_t * new,uint32_t * lkey)631 _sen_sym_get(sen_sym *sym, const uint8_t *key, uint32_t *new, uint32_t *lkey)
632 {
633   sen_id r, r0, *p0, *p1 = NULL;
634   pat_node *rn, *rn0;
635   int c = -1, c0 = -1, c1 = -1, len;
636   size_t size = sym->key_size;
637   *new = 0;
638   if (!size) { size = strlen((char *)key) + 1; }
639   len = (int)size * 8;
640   if (len > SEN_SYM_MAX_KEY_LENGTH) { return SEN_SYM_NIL; }
641   PAT_AT(sym, 0, rn0);
642   p0 = &rn0->r;
643   if (*p0) {
644     char xor, mask;
645     const uint8_t *s, *d;
646     for (;;) {
647       if (!(r0 = *p0)) {
648         if (!(s = pat_node_get_key(sym, rn0))) { return 0; }
649         break;
650       }
651       PAT_AT(sym, r0, rn0);
652       if (!rn0) { return 0; }
653       if (c0 < rn0->check && rn0->check < len) {
654         c1 = c0; c0 = rn0->check;
655         p1 = p0; p0 = nth_bit(key, c0) ? &rn0->r : &rn0->l;
656       } else {
657         if (!(s = pat_node_get_key(sym, rn0))) { return 0; }
658         if (rn0->check < len && !memcmp(s, key, size)) { return r0; }
659         break;
660       }
661     }
662     for (c = 0, d = key; *s == *d; c += 8, s++, d++);
663     for (xor = *s ^ *d, mask = 0x80; !(xor & mask); mask >>= 1, c++);
664     /* for test
665 e    if (r0 && r0 != sen_sym_at(sym, pat_node_get_key(sym, rn0))) {
666       SEN_LOG(sen_log_debug, "deleted node is used as ld %d(%d:%s) %d", r0, PAT_DEL(rn0), pat_node_get_key(sym, rn0), sen_sym_at(sym, pat_node_get_key(sym, rn0)));
667     }
668     */
669     if (c == c0 && !*p0) {
670       if (c < len - 1) { c++; }
671     } else {
672       if (c < c0) {
673         if (c > c1) {
674           p0 = p1;
675         } else {
676           PAT_AT(sym, 0, rn0);
677           p0 = &rn0->r;
678           while ((r0 = *p0)) {
679             PAT_AT(sym, r0, rn0);
680             if (!rn0) { return 0; }
681             if (c < rn0->check) { break; }
682             p0 = nth_bit(key, rn0->check) ? &rn0->r : &rn0->l;
683           }
684         }
685       }
686     }
687     if (c >= len) { return 0; }
688   } else {
689     c = len - 1;
690   }
691   {
692     size_t size2 = size > sizeof(uint32_t) ? size : 0;
693     if (*lkey && size2) {
694       if (sym->header->garbages[0]) {
695         r = sym->header->garbages[0];
696         sym->header->nrecords++;
697         PAT_AT(sym, r, rn);
698         if (!rn) { return 0; }
699         sym->header->garbages[0] = rn->l;
700       } else {
701         if (!(rn = pat_node_new(sym, &r))) { return 0; }
702       }
703       SET_PAT_IMD(rn, 0);
704       rn->key = *lkey;
705     } else {
706       if (sym->header->garbages[size2]) {
707         uint8_t *keybuf;
708         r = sym->header->garbages[size2];
709         sym->header->nrecords++;
710         PAT_AT(sym, r, rn);
711         if (!rn) { return 0; }
712         sym->header->garbages[size2] = rn->l;
713         if (!(keybuf = pat_node_get_key(sym, rn))) { return 0; }
714         memcpy(keybuf, key, size);
715       } else {
716         if (!(rn = pat_node_new(sym, &r))) { return 0; }
717         pat_node_set_key(sym, rn, key, (unsigned int)size);
718       }
719       *lkey = rn->key;
720     }
721   }
722   rn->check = c;
723   SET_PAT_DEL(rn, 0);
724   SET_PAT_PKT(rn, 0);
725   if (nth_bit(key, c)) {
726     rn->r = r;
727     rn->l = *p0;
728   } else {
729     rn->r = *p0;
730     rn->l = r;
731   }
732   *p0 = r;
733   *new = 1;
734   return r;
735 }
736 
737 inline static int
chop(sen_sym * sym,const char ** key,uint32_t * lkey)738 chop(sen_sym *sym, const char **key, uint32_t *lkey)
739 {
740   size_t len = sen_str_charlen(*key, sym->encoding);
741   if (len) {
742     *lkey += len;
743     *key += len;
744     return **key;
745   } else {
746     return 0;
747   }
748 }
749 
750 sen_id
sen_sym_get(sen_sym * sym,const void * key)751 sen_sym_get(sen_sym *sym, const void *key)
752 {
753   uint32_t new, lkey = 0;
754   sen_id r0;
755   if (!sym || !key) { return SEN_SYM_NIL; }
756   if (sym->v08p) { return sen_sym_get08(sym, key); }
757   r0 = _sen_sym_get(sym, (uint8_t *)key, &new, &lkey);
758   if (r0 && (sym->flags & SEN_SYM_WITH_SIS) &&
759       (*((uint8_t *)key) & 0x80)) { // todo: refine!!
760     sis_node *sl, *sr;
761     sen_id l = r0, r;
762     if (new && (sl = sis_get(sym, l))) {
763       const char *sis = key;
764       sl->children = l;
765       sl->sibling = 0;
766       while (chop(sym, &sis, &lkey)) {
767         if (!(*sis & 0x80)) { break; }
768         if (!(r = _sen_sym_get(sym, (uint8_t *)sis, &new, &lkey))) { break; }
769         if (!(sr = sis_get(sym, r))) { break; }
770         if (new) {
771           sl->sibling = r;
772           sr->children = l;
773           sr->sibling = 0;
774         } else {
775           sl->sibling = sr->children;
776           sr->children = l;
777           break;
778         }
779         l = r;
780         sl = sr;
781       }
782     }
783   }
784   return r0;
785 }
786 
787 sen_id
sen_sym_at(sen_sym * sym,const void * key)788 sen_sym_at(sen_sym *sym, const void *key)
789 {
790   sen_id r;
791   pat_node *rn;
792   int c = -1;
793   size_t len;
794   if (!sym || !key) { return SEN_SYM_NIL; }
795   if (sym->v08p) { return sen_sym_at08(sym, key); }
796   len = (sym->key_size ? sym->key_size : strlen(key) + 1) * 8;
797   PAT_AT(sym, 0, rn);
798   for (r = rn->r; r; r = nth_bit((uint8_t *)key, c) ? rn->r : rn->l) {
799     PAT_AT(sym, r, rn);
800     if (!rn) { break; /* corrupt? */ }
801     if (len <= rn->check) { break; }
802     if (rn->check <= c) {
803       const uint8_t *k = pat_node_get_key(sym, rn);
804       if (!k) { break; }
805       if (!memcmp(k, key, len >> 3)) { return r; }
806       break;
807     }
808     c = rn->check;
809   }
810   return SEN_SYM_NIL;
811 }
812 
813 sen_id
sen_sym_nextid(sen_sym * sym,const void * key)814 sen_sym_nextid(sen_sym *sym, const void *key)
815 {
816   sen_id r = SEN_SYM_NIL;
817   if (sym && key) {
818     size_t size = sym->key_size;
819     if (!size) { size = strlen((char *)key) + 1; }
820     if (!(r = sym->header->garbages[size > sizeof(uint32_t) ? size : 0])) {
821       r = sym->header->curr_rec + 1;
822     }
823   }
824   return r;
825 }
826 
827 static void
get_tc(sen_sym * sym,sen_set * h,pat_node * rn,unsigned int len)828 get_tc(sen_sym *sym, sen_set *h, pat_node *rn, unsigned int len)
829 {
830   sen_id id;
831   pat_node *node;
832   id = rn->r;
833   if (id) {
834     PAT_AT(sym, id, node);
835     if (node) {
836       if (node->check > rn->check) {
837         get_tc(sym, h, node, len);
838       } else {
839         sen_set_get(h, &id, NULL);
840       }
841     }
842   }
843   id = rn->l;
844   if (id) {
845     PAT_AT(sym, id, node);
846     if (node) {
847       if (node->check > rn->check) {
848         get_tc(sym, h, node, len);
849       } else {
850         sen_set_get(h, &id, NULL);
851       }
852     }
853   }
854 }
855 
856 sen_rc
sen_sym_prefix_search_with_set(sen_sym * sym,const void * key,sen_set * h)857 sen_sym_prefix_search_with_set(sen_sym *sym, const void *key, sen_set *h)
858 {
859   int c = -1;
860   const uint8_t *k;
861   size_t len = strlen(key) * 8;
862   sen_id r;
863   pat_node *rn;
864   PAT_AT(sym, 0, rn);
865   r = rn->r;
866   while (r) {
867     PAT_AT(sym, r, rn);
868     if (!rn) { return sen_invalid_format; }
869     if (c < rn->check && len > rn->check) {
870       c = rn->check;
871       r = nth_bit((uint8_t *)key, c) ? rn->r : rn->l;
872       continue;
873     }
874     if (!(k = pat_node_get_key(sym, rn))) { break; }
875     if (!memcmp(k, key, len >> 3)) {
876       if (rn->check >= len) {
877         get_tc(sym, h, rn, (unsigned int)len);
878       } else {
879         sen_set_get(h, &r, NULL);
880       }
881       return sen_success;
882     }
883     break;
884   }
885   return sen_end_of_data;
886 }
887 
888 sen_set *
sen_sym_prefix_search(sen_sym * sym,const void * key)889 sen_sym_prefix_search(sen_sym *sym, const void *key)
890 {
891   sen_set *h;
892   ERRCLR(NULL);
893   if (!sym || !key || sym->key_size) { return NULL; } /* only for string */
894   if (sym->v08p) { return sen_sym_prefix_search08(sym, key); }
895   if ((h = sen_set_open(sizeof(sen_id), 0, 0))) {
896     if (sen_sym_prefix_search_with_set(sym, key, h)) {
897       sen_set_close(h);
898       h = NULL;
899     }
900   }
901   return h;
902 }
903 
904 sen_rc
sen_sym_suffix_search_with_set(sen_sym * sym,const void * key,sen_set * h)905 sen_sym_suffix_search_with_set(sen_sym *sym, const void *key, sen_set *h)
906 {
907   sen_id r;
908   uint32_t *offset;
909   if ((r = sen_sym_at(sym, key)) && sen_set_get(h, &r, (void **) &offset)) {
910     *offset = 0;
911     if (sym->flags & SEN_SYM_WITH_SIS) { sis_collect(sym, h, r, 1); }
912     return sen_success;
913   }
914   return sen_internal_error;
915 }
916 
917 sen_set *
sen_sym_suffix_search(sen_sym * sym,const void * key)918 sen_sym_suffix_search(sen_sym *sym, const void *key)
919 {
920   sen_set *h;
921   ERRCLR(NULL);
922   if (!sym || !key || sym->key_size) { return NULL; }
923   if (sym->v08p) { return sen_sym_suffix_search08(sym, key); }
924   if ((h = sen_set_open(sizeof(sen_id), sizeof(uint32_t), 0))) {
925     if (sen_sym_suffix_search_with_set(sym, key, h)) {
926       sen_set_close(h);
927       h = NULL;
928     }
929   }
930   return h;
931 }
932 
933 sen_id
sen_sym_common_prefix_search(sen_sym * sym,const void * key)934 sen_sym_common_prefix_search(sen_sym *sym, const void *key)
935 {
936   pat_node *rn, *rnl;
937   sen_id r, rl, r2 = SEN_SYM_NIL;
938   int c = -1, cl, len = 0, n, len2 = 8, nth_bit;
939   if (!sym || !key || sym->key_size) { return SEN_SYM_NIL; } /* only for string */
940   if (sym->v08p) { return sen_sym_common_prefix_search08(sym, key); }
941   // len = (strlen(key) + 1) * 8;
942   PAT_AT(sym, 0, rn);
943   for (r = rn->r; r;) {
944     PAT_AT(sym, r, rn);
945     if (!rn) { break; /* corrupt? */ }
946     // if (len <= rn->check) { break; }
947     if (rn->check <= c) {
948       uint8_t *p = pat_node_get_key(sym, rn);
949       if (!p) { break; }
950       if (!memcmp(p, key, strlen((char *)p))) { return r; }
951       break;
952     }
953     c = rn->check;
954     n = c >> 3;
955     while (len < n) { if (!((char *)key)[len++]) { goto exit; } }
956     nth_bit = ((uint8_t *)key)[n] & (0x80 >> (c & 7));
957     if (nth_bit) {
958       if (len2 <= c) {
959         len2 = (n + 1) * 8;
960         for (cl = c, rl = rn->l; rl; cl = rnl->check, rl = rnl->l) {
961           PAT_AT(sym, rl, rnl);
962           if (!rnl) { break; /* corrupt? */ }
963           if (len2 <= rnl->check) { break; }
964           if (rnl->check <= cl) {
965             uint8_t *p = pat_node_get_key(sym, rnl);
966             if (!p) { break; }
967             if (!p[n] && !memcmp(p, key, n)) { r2 = rl; }
968             break;
969           }
970         }
971       }
972       r = rn->r;
973     } else {
974       r = rn->l;
975     }
976   }
977 exit :
978   return r2;
979 }
980 
981 inline static sen_rc
_sen_sym_del(sen_sym * sym,const char * key,int shared)982 _sen_sym_del(sen_sym *sym, const char *key, int shared)
983 {
984   sen_sym_delinfo *di;
985   uint8_t direction;
986   pat_node *rn, *rn0 = NULL, *rno;
987   int c, c0 = -1;
988   size_t len = sym->key_size * 8;
989   sen_id r, otherside, *p, *p0 = NULL;
990   if (!key) { return sen_invalid_argument; }
991   if (!len) { len = (strlen(key) + 1) * 8; }
992   di = delinfo_new(sym); /* must be called before find rn */
993   SET_DL_SH(di, shared ? DL_SHARED : 0);
994   PAT_AT(sym, 0, rn);
995   c = -1;
996   p = &rn->r;
997   for (;;) {
998     if (!(r = *p)) { return sen_invalid_argument; }
999     PAT_AT(sym, r, rn);
1000     if (!rn) { return sen_invalid_format; }
1001     if (len <= rn->check) { return sen_invalid_argument; }
1002     if (c >= rn->check) {
1003       const uint8_t *k = pat_node_get_key(sym, rn);
1004       if (!k) { return sen_invalid_argument; }
1005       if (!memcmp(k, key, len >> 3)) {
1006         break; /* found */
1007       } else {  return sen_invalid_argument; }
1008     }
1009     c0 = c; c = rn->check;
1010     p0 = p; p = nth_bit((uint8_t *)key, c) ? &rn->r : &rn->l;
1011     rn0 = rn;
1012   }
1013   direction = (rn0->r == r);
1014   otherside = direction ? rn0->l : rn0->r;
1015   if (rn == rn0) {
1016     SET_DL_ST(di, DL_PHASE2);
1017     di->d = r;
1018     if (otherside) {
1019       PAT_AT(sym, otherside, rno);
1020       if (rno && c0 < rno->check && rno->check <= c) {
1021         if (!delinfo_search(sym, otherside)) {
1022           SEN_LOG(sen_log_error, "no delinfo found %d", otherside);
1023         }
1024         rno->check = 0;
1025       }
1026     }
1027     *p0 = otherside;
1028   } else {
1029     sen_sym_delinfo *ldi = NULL, *ddi = NULL;
1030     if (PAT_DEL(rn)) { ldi = delinfo_search(sym, r); }
1031     if (PAT_DEL(rn0)) { ddi = delinfo_search(sym, *p0); }
1032     if (ldi) {
1033       SET_PAT_DEL(rn, 0);
1034       SET_DL_ST(di, DL_PHASE2);
1035       if (ddi) {
1036         SET_PAT_DEL(rn0, 0);
1037         SET_DL_ST(ddi, DL_PHASE2);
1038         if (ddi == ldi) {
1039           if (r != DL_LD(ddi)) {
1040             SEN_LOG(sen_log_error, "r(%d) != DL_LD(ddi)(%d)", r, DL_LD(ddi));
1041           }
1042           di->d = r;
1043         } else {
1044           SET_DL_LD(ldi, DL_LD(ddi));
1045           di->d = r;
1046         }
1047       } else {
1048         SET_PAT_DEL(rn0, PAT_DELETING);
1049         SET_DL_LD(ldi, *p0);
1050         di->d = r;
1051       }
1052     } else {
1053       SET_PAT_DEL(rn, PAT_DELETING);
1054       if (ddi) {
1055         if (ddi->d != *p0) {
1056           SEN_LOG(sen_log_error, "ddi->d(%d) != *p0(%d)", ddi->d, *p0);
1057         }
1058         SET_PAT_DEL(rn0, 0);
1059         SET_DL_ST(ddi, DL_PHASE2);
1060         SET_DL_ST(di, DL_PHASE1);
1061         SET_DL_LD(di, DL_LD(ddi));
1062         di->d = r;
1063         /*
1064         SET_PAT_DEL(rn0, 0);
1065         ddi->d = r;
1066         SET_DL_ST(di, DL_PHASE2);
1067         di->d = *p0;
1068         */
1069       } else {
1070         SET_PAT_DEL(rn0, PAT_DELETING);
1071         SET_DL_ST(di, DL_PHASE1);
1072         SET_DL_LD(di, *p0);
1073         di->d = r;
1074         // sen_log("sym_del d=%d ld=%d stat=%d", r, *p0, DL_PHASE1);
1075       }
1076     }
1077     if (*p0 == otherside) {
1078       rn0->check = 0;
1079     } else {
1080       if (otherside) {
1081         PAT_AT(sym, otherside, rno);
1082         if (rno && c0 < rno->check && rno->check <= c) {
1083           if (!delinfo_search(sym, otherside)) {
1084             SEN_LOG(sen_log_error, "no delinfo found %d", otherside);
1085           }
1086           rno->check = 0;
1087         }
1088       }
1089       *p0 = otherside;
1090     }
1091   }
1092   sym->header->nrecords--;
1093   return sen_success;
1094 }
1095 
1096 sen_rc
sen_sym_del(sen_sym * sym,const void * key)1097 sen_sym_del(sen_sym *sym, const void *key)
1098 {
1099   if (!sym) { return sen_invalid_argument; }
1100   if (sym->v08p) { return sen_sym_del08(sym, key); }
1101   if (sym->flags & SEN_SYM_WITH_SIS) {
1102     SEN_LOG(sen_log_warning, "use sen_sym_del_with_sis for sym with SEN_SYM_WITH_SIS");
1103   }
1104   if (sym->flags & SEN_INDEX_SHARED_LEXICON) {
1105     sen_id id = sen_sym_at(sym, key);
1106     if (id) {
1107       pat_node *rn;
1108       PAT_AT(sym, id, rn);
1109       if (rn && rn->bitfield >= 1 << 2) { return sen_success; }
1110     }
1111   }
1112   return _sen_sym_del(sym, key, 0);
1113 }
1114 
1115 uint32_t
sen_sym_size(sen_sym * sym)1116 sen_sym_size(sen_sym *sym)
1117 {
1118   if (!sym) { return sen_invalid_argument; }
1119   return sym->header->nrecords;
1120 }
1121 
1122 const char *
_sen_sym_key(sen_sym * sym,sen_id id)1123 _sen_sym_key(sen_sym *sym, sen_id id)
1124 {
1125   pat_node *node;
1126   if (sym->v08p) { return _sen_sym_key08(sym, id); }
1127   PAT_AT(sym, id, node);
1128   if (!node) { return NULL; }
1129   return (const char *) pat_node_get_key(sym, node);
1130 }
1131 
1132 int
sen_sym_key(sen_sym * sym,sen_id id,void * keybuf,int bufsize)1133 sen_sym_key(sen_sym *sym, sen_id id, void *keybuf, int bufsize)
1134 {
1135   int len;
1136   uint8_t *key;
1137   pat_node *node;
1138   if (!sym) { return sen_invalid_argument; }
1139   if (sym->v08p) { return sen_sym_key08(sym, id, keybuf, bufsize); }
1140   PAT_AT(sym, id, node);
1141   if (!node) { return 0; }
1142   if (!(key = pat_node_get_key(sym, node))) { return 0; }
1143   if (!(len = sym->key_size)) { len = (int)(strlen((char *)key) + 1); }
1144   if (keybuf && bufsize >= len) { memcpy(keybuf, key, len); }
1145   return len;
1146 }
1147 
1148 int
sen_sym_pocket_get(sen_sym * sym,sen_id id)1149 sen_sym_pocket_get(sen_sym *sym, sen_id id)
1150 {
1151   pat_node *node;
1152   if (!sym) { return -1; }
1153   if (sym->v08p) { return sen_sym_pocket_get08(sym, id); }
1154   PAT_AT(sym, id, node);
1155   if (!node) { return -1; }
1156   return PAT_PKT(node);
1157 }
1158 
1159 sen_rc
sen_sym_pocket_set(sen_sym * sym,sen_id id,unsigned int value)1160 sen_sym_pocket_set(sen_sym *sym, sen_id id, unsigned int value)
1161 {
1162   pat_node *node;
1163   if (!sym) { return sen_invalid_argument; }
1164   if (sym->v08p) { return sen_sym_pocket_set08(sym, id, value); }
1165   PAT_AT(sym, id, node);
1166   if (value > PAT_POCKET_MAX || !node) { return sen_invalid_argument; }
1167   SET_PAT_PKT(node, value);
1168   return sen_success;
1169 }
1170 
1171 sen_rc
sen_sym_pocket_incr(sen_sym * sym,sen_id id)1172 sen_sym_pocket_incr(sen_sym *sym, sen_id id)
1173 {
1174   pat_node *node;
1175   if (!sym) { return sen_invalid_argument; }
1176   if (sym->v08p) { return sen_invalid_argument; }
1177   PAT_AT(sym, id, node);
1178   if (!node) { return sen_invalid_argument; }
1179   node->bitfield += (1 << 2);
1180   if (node->bitfield < (1 << 2)) {
1181     SEN_LOG(sen_log_error, "sen_sym_pocket_incr failed %d", node->bitfield);
1182     return sen_invalid_format;
1183   }
1184   return sen_success;
1185 }
1186 
1187 sen_rc
sen_sym_pocket_decr(sen_sym * sym,sen_id id)1188 sen_sym_pocket_decr(sen_sym *sym, sen_id id)
1189 {
1190   pat_node *node;
1191   if (!sym) { return sen_invalid_argument; }
1192   if (sym->v08p) { return sen_invalid_argument; }
1193   PAT_AT(sym, id, node);
1194   if (!node) { return sen_invalid_argument; }
1195   if (node->bitfield < (1 << 2)) {
1196     SEN_LOG(sen_log_error, "sen_sym_pocket_decr failed %d", node->bitfield);
1197     return sen_invalid_format;
1198   }
1199   node->bitfield -= (1 << 2);
1200   return sen_success;
1201 }
1202 
1203 sen_rc
sen_sym_info(sen_sym * sym,int * key_size,unsigned * flags,sen_encoding * encoding,unsigned * nrecords,unsigned * file_size)1204 sen_sym_info(sen_sym *sym, int *key_size, unsigned *flags,
1205              sen_encoding *encoding, unsigned *nrecords, unsigned *file_size)
1206 {
1207   ERRCLR(NULL);
1208   if (!sym) { return sen_invalid_argument; }
1209   if (key_size) { *key_size = sym->key_size; }
1210   if (flags) { *flags = sym->flags; }
1211   if (encoding) { *encoding = sym->encoding; }
1212   if (nrecords) { *nrecords = sym->header->nrecords; }
1213   if (file_size) {
1214     sen_rc rc;
1215     off_t tmp = 0;
1216     if ((rc = sen_io_size(sym->io, &tmp))) {
1217       return rc;
1218     }
1219     *file_size = (unsigned) tmp; /* FIXME: inappropriate cast */
1220   }
1221   return sen_success;
1222 }
1223 
1224 int
sen_sym_del_with_sis(sen_sym * sym,sen_id id,int (* func)(sen_id,void *),void * func_arg)1225 sen_sym_del_with_sis(sen_sym *sym, sen_id id,
1226                      int(*func)(sen_id, void *), void *func_arg)
1227 {
1228   int level = 0, shared;
1229   const char *key = NULL, *_key;
1230   sis_node *sp, *ss = NULL, *si = sis_at(sym, id);
1231   if (sym->v08p) { return sen_sym_del_with_sis08(sym, id, func, func_arg); }
1232   while (id) {
1233     if (sym->flags & SEN_INDEX_SHARED_LEXICON) {
1234       pat_node *rn;
1235       PAT_AT(sym, id, rn);
1236       if (rn && rn->bitfield >= 1 << 2) { break; }
1237     }
1238     if ((si && si->children && si->children != id) || !func(id, func_arg)) { break; }
1239     //todo : if ERRP { break; }
1240     if (!(_key = _sen_sym_key(sym, id))) { break; }
1241     if (_key == key) {
1242       shared = 1;
1243     } else {
1244       key = _key;
1245       shared = 0;
1246     }
1247     _sen_sym_del(sym, key, shared);
1248     if (si) {
1249       sen_id *p, sid;
1250       uint32_t lkey = 0;
1251       if ((*key & 0x80) && chop(sym, &key, &lkey)) {
1252         if ((sid = sen_sym_at(sym, key)) && (ss = sis_at(sym, sid))) {
1253           for (p = &ss->children; *p && *p != sid; p = &sp->sibling) {
1254             if (*p == id) {
1255               *p = si->sibling;
1256               break;
1257             }
1258             if (!(sp = sis_at(sym, *p))) { break; }
1259           }
1260         }
1261       } else {
1262         sid = SEN_SYM_NIL;
1263       }
1264       si->sibling = 0;
1265       si->children = 0;
1266       id = sid;
1267       si = ss;
1268     } else {
1269       id = SEN_SYM_NIL;
1270     }
1271     level++;
1272   }
1273   if (level) {
1274     uint32_t lkey = 0;
1275     while (id && key) {
1276       if (_sen_sym_key(sym, id) != key) { break; }
1277       {
1278         pat_node *rn;
1279         PAT_AT(sym, id, rn);
1280         if (!rn) { break; }
1281         if (lkey) {
1282           rn->key = lkey;
1283         } else {
1284           pat_node_set_key(sym, rn, (uint8_t *)key, (unsigned int)(strlen(key) + 1));
1285           lkey = rn->key;
1286         }
1287       }
1288       if (!((*key & 0x80) && chop(sym, &key, &lkey))) { break; }
1289       id = sen_sym_at(sym, key);
1290     }
1291   }
1292   return level;
1293 }
1294 
1295 sen_id
sen_sym_next(sen_sym * sym,sen_id id)1296 sen_sym_next(sen_sym *sym, sen_id id)
1297 {
1298   while (++id <= sym->header->curr_rec) {
1299     if (id == sen_sym_at(sym, _sen_sym_key(sym, id))) {
1300       return id;
1301     }
1302   }
1303   return SEN_SYM_NIL;
1304 }
1305 
1306 sen_id
sen_sym_curr_id(sen_sym * sym)1307 sen_sym_curr_id(sen_sym *sym)
1308 {
1309   return sym->header->curr_rec;
1310 }
1311 
1312 sen_rc
sen_sym_lock(sen_sym * sym,int timeout)1313 sen_sym_lock(sen_sym *sym, int timeout)
1314 {
1315   static int _ncalls = 0, _ncolls = 0;
1316   uint32_t count;
1317   _ncalls++;
1318   for (count = 0;; count++) {
1319     uint32_t lock;
1320     SEN_ATOMIC_ADD_EX(sym->lock, 1, lock);
1321     if (lock) {
1322       SEN_ATOMIC_ADD_EX(sym->lock, -1, lock);
1323       if (!timeout || (timeout > 0 && timeout == count)) { break; }
1324       if (!(++_ncolls % 1000000) && (_ncolls > _ncalls)) {
1325         if (_ncolls < 0 || _ncalls < 0) {
1326           _ncolls = 0; _ncalls = 0;
1327         } else {
1328           SEN_LOG(sen_log_notice, "sym(%p) collisions(%d/%d)", sym, _ncolls, _ncalls);
1329         }
1330       }
1331       usleep(1000);
1332       continue;
1333     }
1334     return sen_success;
1335   }
1336   return sen_other_error;
1337 }
1338 
1339 sen_rc
sen_sym_unlock(sen_sym * sym)1340 sen_sym_unlock(sen_sym *sym)
1341 {
1342   uint32_t lock;
1343   SEN_ATOMIC_ADD_EX(sym->lock, -1, lock);
1344   return sen_success;
1345 }
1346 
1347 sen_rc
sen_sym_clear_lock(sen_sym * sym)1348 sen_sym_clear_lock(sen_sym *sym)
1349 {
1350   *sym->lock = 0;
1351   return sen_success;
1352 }
1353 
1354 int
sen_sym_scan(sen_sym * sym,const char * str,unsigned int str_len,sen_sym_scan_hit * sh,unsigned int sh_size,const char ** rest)1355 sen_sym_scan(sen_sym *sym, const char *str, unsigned int str_len,
1356              sen_sym_scan_hit *sh, unsigned int sh_size, const char **rest)
1357 {
1358   int n = 0;
1359   sen_id tid;
1360   if (sym->flags & SEN_INDEX_NORMALIZE) {
1361     sen_nstr *nstr = sen_nstr_open(str, str_len, sym->encoding, SEN_STR_WITH_CHECKS);
1362     if (nstr) {
1363       int16_t *cp = nstr->checks;
1364       unsigned int offset = 0, offset0 = 0;
1365       const char *sp = nstr->norm, *se = nstr->norm + nstr->norm_blen;
1366       while (n < sh_size) {
1367         if ((tid = sen_sym_common_prefix_search(sym, sp))) {
1368           int len = strlen(_sen_sym_key(sym, tid));
1369           sh[n].id = tid;
1370           sh[n].offset = (*cp > 0) ? offset : offset0;
1371           while (len--) {
1372             if (*cp > 0) { offset0 = offset; offset += *cp; }
1373             sp++; cp++;
1374           }
1375           sh[n].length = offset - sh[n].offset;
1376           n++;
1377         } else {
1378           if (*cp > 0) { offset0 = offset; offset += *cp; }
1379           do {
1380             sp++; cp++;
1381           } while (sp < se && !*cp);
1382         }
1383         if (se <= sp) { offset = str_len; break; }
1384       }
1385       if (rest) { *rest = nstr->orig + offset; }
1386       sen_nstr_close(nstr);
1387     } else {
1388       n = -1;
1389       if (rest) { *rest = str; }
1390     }
1391   } else {
1392     int len;
1393     const char *sp, *se = str + str_len;
1394     for (sp = str; sp < se && n < sh_size; sp += len) {
1395       if ((tid = sen_sym_common_prefix_search(sym, sp))) {
1396         len = strlen(_sen_sym_key(sym, tid));
1397         sh[n].id = tid;
1398         sh[n].offset = sp - str;
1399         sh[n].length = len;
1400         n++;
1401       } else {
1402         len = sen_str_charlen_nonnull(sp, se, sym->encoding);
1403       }
1404       if (!len) { break; }
1405     }
1406     if (rest) { *rest = sp; }
1407   }
1408   return n;
1409 }
1410 
1411 #define INITIAL_SIZE 512
1412 
1413 inline static void
push(sen_sym_cursor * c,sen_id id,uint16_t check)1414 push(sen_sym_cursor *c, sen_id id, uint16_t check)
1415 {
1416   sen_ctx *ctx = c->ctx;
1417   sen_sym_cursor_entry *se;
1418   if (c->size <= c->sp) {
1419     if (c->ss) {
1420       uint32_t size = c->size * 4;
1421       sen_sym_cursor_entry *ss = SEN_REALLOC(c->ss, size);
1422       if (!ss) { return; /* give up */ }
1423       c->ss = ss;
1424       c->size = size;
1425     } else {
1426       if (!(c->ss = SEN_MALLOC(sizeof(sen_sym_cursor_entry) * INITIAL_SIZE))) {
1427         return; /* give up */
1428       }
1429       c->size = INITIAL_SIZE;
1430     }
1431   }
1432   se = &c->ss[c->sp++];
1433   se->id = id;
1434   se->check = check;
1435 }
1436 
1437 inline static sen_sym_cursor_entry *
pop(sen_sym_cursor * c)1438 pop(sen_sym_cursor *c)
1439 {
1440   return c->sp ? &c->ss[--c->sp] : NULL;
1441 }
1442 
1443 sen_id
sen_sym_cursor_next(sen_sym_cursor * c)1444 sen_sym_cursor_next(sen_sym_cursor *c)
1445 {
1446   pat_node *node;
1447   sen_sym_cursor_entry *se;
1448   while ((se = pop(c))) {
1449     sen_id id = se->id;
1450     uint16_t check = se->check;
1451     for (;;) {
1452       if (id) {
1453         PAT_AT(c->sym, id, node);
1454         if (node) {
1455           if (node->check > check) {
1456             if (c->flags & SEN_SYM_ASCENDING) {
1457               push(c, node->r, node->check);
1458               id = node->l;
1459             } else {
1460               push(c, node->l, node->check);
1461               id = node->r;
1462             }
1463             check = node->check;
1464             continue;
1465           } else {
1466             if (id == c->limit) { c->sp = 0; }
1467             return id;
1468           }
1469         }
1470       } else {
1471         break;
1472       }
1473     }
1474   }
1475   return SEN_SYM_NIL;
1476 }
1477 
1478 void
sen_sym_cursor_close(sen_sym_cursor * c)1479 sen_sym_cursor_close(sen_sym_cursor *c)
1480 {
1481   sen_ctx *ctx = c->ctx;
1482   if (c->ss) { SEN_FREE(c->ss); }
1483   SEN_FREE(c);
1484 }
1485 
1486 inline static int
bitcmp(const void * s1,const void * s2,int offset,int length)1487 bitcmp(const void *s1, const void *s2, int offset, int length)
1488 {
1489   int r, rest = length + (offset & 7) - 8, bl = offset >> 3, mask = 0xff >> (offset & 7);
1490   unsigned char *a = (unsigned char *)s1 + bl, *b = (unsigned char *)s2 + bl;
1491   if (rest <= 0) {
1492     mask &= 0xff << -rest;
1493     return (*a & mask) - (*b & mask);
1494   }
1495   if ((r = (*a & mask) - (*b & mask))) { return r; }
1496   a++; b++;
1497   if ((bl = rest >> 3)) {
1498     if ((r = memcmp(a, b, bl))) { return r; }
1499     a += bl; b += bl;
1500   }
1501   mask = 0xff << (8 - (rest & 7));
1502   return (*a & mask) - (*b & mask);
1503 }
1504 
1505 inline static sen_rc
set_cursor_ascend(sen_sym * sym,sen_sym_cursor * c,const void * key,int flags)1506 set_cursor_ascend(sen_sym *sym, sen_sym_cursor *c, const void *key, int flags)
1507 {
1508   sen_id id;
1509   pat_node *node;
1510   const uint8_t *k;
1511   int r, check = -1, len, c2;
1512   if (!(len = sym->key_size * 8)) { len = (strlen(key) + 1) * 8; }
1513   PAT_AT(sym, 0, node);
1514   for (id = node->r; id;) {
1515     PAT_AT(sym, id, node);
1516     if (!node) { return sen_invalid_format; }
1517     if (node->check <= check) {
1518       if (!(k = pat_node_get_key(sym, node))) { return sen_invalid_format; }
1519       if (sym->key_size) {
1520         if (flags & SEN_SYM_GT) {
1521           if (memcmp(key, k, sym->key_size) < 0) { push(c, id, check); }
1522         } else {
1523           if (memcmp(key, k, sym->key_size) <= 0) { push(c, id, check); }
1524         }
1525       } else {
1526         int l = (strlen(k) + 1) * 8;
1527         if (l == len) {
1528           if (flags & SEN_SYM_GT) {
1529             if (memcmp(key, k, l >> 3) < 0) { push(c, id, check); }
1530           } else {
1531             if (memcmp(key, k, l >> 3) <= 0) { push(c, id, check); }
1532           }
1533         } else if (l < len) {
1534           if (memcmp(key, k, l >> 3) < 0) { push(c, id, check); }
1535         } else {
1536           if (memcmp(key, k, len >> 3) <= 0) { push(c, id, check); }
1537         }
1538       }
1539       break;
1540     }
1541     c2 = len < node->check ? len : node->check;
1542     if (check + 1 < c2) {
1543       if (!(k = pat_node_get_key(sym, node))) { return sen_invalid_format; }
1544       if ((r = bitcmp(key, k, check + 1, c2 - (check + 1)))) {
1545         if (r < 0) {
1546           push(c, node->r, node->check);
1547           push(c, node->l, node->check);
1548         }
1549         break;
1550       }
1551     }
1552     if (len <= node->check) { break; }
1553     check = node->check;
1554     if (nth_bit((uint8_t *)key, check)) {
1555       id = node->r;
1556     } else {
1557       push(c, node->r, node->check);
1558       id = node->l;
1559     }
1560   }
1561   return sen_success;
1562 }
1563 
1564 inline static sen_rc
set_cursor_descend(sen_sym * sym,sen_sym_cursor * c,const void * key,int flags)1565 set_cursor_descend(sen_sym *sym, sen_sym_cursor *c, const void *key, int flags)
1566 {
1567   sen_id id;
1568   pat_node *node;
1569   const uint8_t *k;
1570   int r, check = -1, len, c2;
1571   if (!(len = sym->key_size * 8)) { len = (strlen(key) + 1) * 8; }
1572   PAT_AT(sym, 0, node);
1573   for (id = node->r; id;) {
1574     PAT_AT(sym, id, node);
1575     if (!node) { return sen_invalid_format; }
1576     if (node->check <= check) {
1577       if (!(k = pat_node_get_key(sym, node))) { return sen_invalid_format; }
1578       if (sym->key_size) {
1579         if (flags & SEN_SYM_LT) {
1580           if (memcmp(key, k, sym->key_size) > 0) { push(c, id, check); }
1581         } else {
1582           if (memcmp(key, k, sym->key_size) >= 0) { push(c, id, check); }
1583         }
1584       } else {
1585         int l = (strlen(k) + 1) * 8;
1586         if (l <= len) {
1587           if ((flags & SEN_SYM_LT) && l == len) {
1588             if (memcmp(key, k, l >> 3) > 0) { push(c, id, check); }
1589           } else {
1590             if (memcmp(key, k, l >> 3) >= 0) { push(c, id, check); }
1591           }
1592         } else {
1593           if (memcmp(key, k, len >> 3) > 0) { push(c, id, check); }
1594         }
1595       }
1596       break;
1597     }
1598     c2 = len < node->check ? len : node->check;
1599     if (check + 1 < c2) {
1600       if (!(k = pat_node_get_key(sym, node))) { return sen_invalid_format; }
1601       if ((r = bitcmp(key, k, check + 1, c2 - (check + 1)))) {
1602         if (r >= 0) {
1603           push(c, node->l, node->check);
1604           push(c, node->r, node->check);
1605         }
1606         break;
1607       }
1608     }
1609     if (len <= node->check) {
1610       push(c, node->l, node->check);
1611       push(c, node->r, node->check);
1612       break;
1613     }
1614     check = node->check;
1615     if (nth_bit((uint8_t *)key, check)) {
1616       push(c, node->l, node->check);
1617       id = node->r;
1618     } else {
1619       id = node->l;
1620     }
1621   }
1622   return sen_success;
1623 }
1624 
1625 sen_sym_cursor *
sen_sym_cursor_open(sen_sym * sym,sen_ctx * ctx,const void * min,const void * max,int flags)1626 sen_sym_cursor_open(sen_sym *sym, sen_ctx *ctx,
1627                     const void *min, const void *max, int flags)
1628 {
1629   sen_id id;
1630   pat_node *node;
1631   sen_sym_cursor *c;
1632   if (!sym || !ctx) { return NULL; }
1633   if (!(c = SEN_MALLOCN(sen_sym_cursor, 1))) { return NULL; }
1634   c->sym = sym;
1635   c->ctx = ctx;
1636   c->size = 0;
1637   c->sp = 0;
1638   c->ss = NULL;
1639   c->limit = 0;
1640   if (flags & SEN_SYM_ASCENDING) {
1641     if (max) {
1642       set_cursor_descend(sym, c, max, flags);
1643       c->flags = SEN_SYM_DESCENDING;
1644       c->limit = sen_sym_cursor_next(c);
1645       c->sp = 0;
1646     }
1647     if (min) {
1648       set_cursor_ascend(sym, c, min, flags);
1649     } else {
1650       PAT_AT(sym, 0, node);
1651       if (!node) {
1652         sen_sym_cursor_close(c);
1653         return NULL;
1654       }
1655       if ((id = node->r)) {
1656         PAT_AT(sym, id, node);
1657         if (node) {
1658           push(c, node->r, node->check);
1659           push(c, node->l, node->check);
1660         }
1661       }
1662     }
1663   } else {
1664     if (min) {
1665       set_cursor_ascend(sym, c, min, flags);
1666       c->flags = SEN_SYM_ASCENDING;
1667       c->limit = sen_sym_cursor_next(c);
1668       c->sp = 0;
1669     }
1670     if (max) {
1671       set_cursor_descend(sym, c, max, flags);
1672     } else {
1673       PAT_AT(sym, 0, node);
1674       if (!node) {
1675         sen_sym_cursor_close(c);
1676         return NULL;
1677       }
1678       if ((id = node->r)) {
1679         PAT_AT(sym, id, node);
1680         if (node) {
1681           push(c, node->l, node->check);
1682           push(c, node->r, node->check);
1683         }
1684       }
1685     }
1686   }
1687   c->flags = flags;
1688   return c;
1689 }
1690