1 /*************************************************************************************************
2  * The word database API of Tokyo Dystopia
3  *                                                               Copyright (C) 2007-2010 FAL Labs
4  * This file is part of Tokyo Dystopia.
5  * Tokyo Dystopia is free software; you can redistribute it and/or modify it under the terms of
6  * the GNU Lesser General Public License as published by the Free Software Foundation; either
7  * version 2.1 of the License or any later version.  Tokyo Dystopia is distributed in the hope
8  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
10  * License for more details.
11  * You should have received a copy of the GNU Lesser General Public License along with Tokyo
12  * Dystopia; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13  * Boston, MA 02111-1307 USA.
14  *************************************************************************************************/
15 
16 
17 #include "tcwdb.h"
18 #include "myconf.h"
19 
20 #define WDBMAGICDATA   "[word]"          // magic data for identification
21 #define WDBIOBUFSIZ    65536             // size of an I/O buffer
22 #define WDBMAXWORDLEN  1024              // maximum length of each search word
23 #define WDBWORDUNIT    1024              // unit number of word allocation
24 #define WDBRESUNIT     1024              // unit number of result allocation
25 #define WDBCCBNUM      1048573           // bucket number of the token cache
26 #define WDBCCDEFICSIZ  (1024LL*1024*128) // default capacity of the token cache
27 #define WDBDIDSBNUM    262139            // bucket number of the deleted ID set
28 #define WDBDTKNBNUM    262139            // bucket number of the deleted token map
29 #define WDBDEFFWMMAX   2048              // default maximum number forward matching expansion
30 #define WDBHJBNUMCO    4                 // coefficient of the bucket number for hash join
31 
32 #define WDBDEFETNUM    1000000           // default expected token number
33 #define WDBLMEMB       256               // number of members in each leaf of the index
34 #define WDBNMEMB       512               // number of members in each node of the index
35 #define WDBAPOW        9                 // alignment power of the index
36 #define WDBFPOW        11                // free block pool power of the index
37 #define WDBLSMAX       8192              // maximum size of each leaf of the index
38 #define WDBLCNUMW      64                // number of cached leaf nodes for writer
39 #define WDBLCNUMR      1024              // number of cached leaf nodes for reader
40 #define WDBNCNUM       1024              // number of cached non-leaf nodes
41 
42 
43 /* private function prototypes */
44 static bool tcwdblockmethod(TCWDB *wdb, bool wr);
45 static bool tcwdbunlockmethod(TCWDB *wdb);
46 static bool tcwdbopenimpl(TCWDB *wdb, const char *path, int omode);
47 static bool tcwdbcloseimpl(TCWDB *wdb);
48 static bool tcwdbputimpl(TCWDB *wdb, int64_t id, const TCLIST *words);
49 static bool tcwdboutimpl(TCWDB *wdb, int64_t id, const TCLIST *words);
50 static uint64_t *tcwdbsearchimpl(TCWDB *wdb, const char *word, int *np);
51 static int tccmpwords(const char **a, const char **b);
52 
53 
54 
55 /*************************************************************************************************
56  * API
57  *************************************************************************************************/
58 
59 
60 /* Get the message string corresponding to an error code. */
tcwdberrmsg(int ecode)61 const char *tcwdberrmsg(int ecode){
62   return tcbdberrmsg(ecode);
63 }
64 
65 
66 /* Create a word database object. */
tcwdbnew(void)67 TCWDB *tcwdbnew(void){
68   TCWDB *wdb = tcmalloc(sizeof(*wdb));
69   wdb->mmtx = tcmalloc(sizeof(pthread_rwlock_t));
70   if(pthread_rwlock_init(wdb->mmtx, NULL) != 0) tcmyfatal("pthread_rwlock_init failed");
71   wdb->idx = tcbdbnew();
72   if(!tcbdbsetmutex(wdb->idx)) tcmyfatal("tcbdbsetmutex failed");
73   wdb->open = false;
74   wdb->cc = NULL;
75   wdb->icsiz = WDBCCDEFICSIZ;
76   wdb->lcnum = 0;
77   wdb->dtokens = NULL;
78   wdb->dids = NULL;
79   wdb->etnum = WDBDEFETNUM;
80   wdb->opts = 0;
81   wdb->fwmmax = WDBDEFFWMMAX;
82   wdb->synccb = NULL;
83   wdb->syncopq = NULL;
84   wdb->addcb = NULL;
85   wdb->addopq = NULL;
86   return wdb;
87 }
88 
89 
90 /* Delete a word database object. */
tcwdbdel(TCWDB * wdb)91 void tcwdbdel(TCWDB *wdb){
92   assert(wdb);
93   if(wdb->open) tcwdbclose(wdb);
94   tcbdbdel(wdb->idx);
95   pthread_rwlock_destroy(wdb->mmtx);
96   tcfree(wdb->mmtx);
97   tcfree(wdb);
98 }
99 
100 
101 /* Get the last happened error code of a word database object. */
tcwdbecode(TCWDB * wdb)102 int tcwdbecode(TCWDB *wdb){
103   assert(wdb);
104   return tcbdbecode(wdb->idx);
105 }
106 
107 
108 /* Set the tuning parameters of a word database object. */
tcwdbtune(TCWDB * wdb,int64_t etnum,uint8_t opts)109 bool tcwdbtune(TCWDB *wdb, int64_t etnum, uint8_t opts){
110   assert(wdb);
111   if(!tcwdblockmethod(wdb, true)) return false;
112   if(wdb->open){
113     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
114     tcwdbunlockmethod(wdb);
115     return false;
116   }
117   wdb->etnum = (etnum > 0) ? etnum : WDBDEFETNUM;
118   wdb->opts = opts;
119   tcwdbunlockmethod(wdb);
120   return true;
121 }
122 
123 
124 /* Set the caching parameters of a word database object. */
tcwdbsetcache(TCWDB * wdb,int64_t icsiz,int32_t lcnum)125 bool tcwdbsetcache(TCWDB *wdb, int64_t icsiz, int32_t lcnum){
126   assert(wdb);
127   if(!tcwdblockmethod(wdb, true)) return false;
128   if(wdb->open){
129     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
130     tcwdbunlockmethod(wdb);
131     return false;
132   }
133   wdb->icsiz = (icsiz > 0) ? icsiz : WDBCCDEFICSIZ;
134   wdb->lcnum = (lcnum > 0) ? lcnum : 0;
135   tcwdbunlockmethod(wdb);
136   return true;
137 }
138 
139 
140 /* Set the maximum number of forward matching expansion of a word database object. */
tcwdbsetfwmmax(TCWDB * wdb,uint32_t fwmmax)141 bool tcwdbsetfwmmax(TCWDB *wdb, uint32_t fwmmax){
142   assert(wdb);
143   if(!tcwdblockmethod(wdb, true)) return false;
144   if(wdb->open){
145     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
146     tcwdbunlockmethod(wdb);
147     return false;
148   }
149   wdb->fwmmax = fwmmax;
150   tcwdbunlockmethod(wdb);
151   return true;
152 }
153 
154 
155 /* Open a word database object. */
tcwdbopen(TCWDB * wdb,const char * path,int omode)156 bool tcwdbopen(TCWDB *wdb, const char *path, int omode){
157   assert(wdb && path);
158   if(!tcwdblockmethod(wdb, true)) return false;
159   if(wdb->open){
160     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
161     tcwdbunlockmethod(wdb);
162     return false;
163   }
164   bool rv = tcwdbopenimpl(wdb, path, omode);
165   tcwdbunlockmethod(wdb);
166   return rv;
167 }
168 
169 
170 /* Close a word database object. */
tcwdbclose(TCWDB * wdb)171 bool tcwdbclose(TCWDB *wdb){
172   assert(wdb);
173   if(!tcwdblockmethod(wdb, true)) return false;
174   if(!wdb->open){
175     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
176     tcwdbunlockmethod(wdb);
177     return false;
178   }
179   bool rv = tcwdbcloseimpl(wdb);
180   tcwdbunlockmethod(wdb);
181   return rv;
182 }
183 
184 
185 /* Store a record into a word database object. */
tcwdbput(TCWDB * wdb,int64_t id,const TCLIST * words)186 bool tcwdbput(TCWDB *wdb, int64_t id, const TCLIST *words){
187   assert(wdb && id > 0 && words);
188   if(!tcwdblockmethod(wdb, true)) return false;
189   if(!wdb->open || !wdb->cc){
190     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
191     tcwdbunlockmethod(wdb);
192     return false;
193   }
194   if(tcidsetcheck(wdb->dids, id) && !tcwdbmemsync(wdb, 0)){
195     tcwdbunlockmethod(wdb);
196     return false;
197   }
198   bool rv = tcwdbputimpl(wdb, id, words);
199   tcwdbunlockmethod(wdb);
200   return rv;
201 }
202 
203 
204 /* Store a record with a text string into a word database object. */
tcwdbput2(TCWDB * wdb,int64_t id,const char * text,const char * delims)205 bool tcwdbput2(TCWDB *wdb, int64_t id, const char *text, const char *delims){
206   assert(wdb && id > 0 && text);
207   TCLIST *words = tcstrsplit(text, delims ? delims : WDBSPCCHARS);
208   bool rv = tcwdbput(wdb, id, words);
209   tclistdel(words);
210   return rv;
211 }
212 
213 
214 /* Remove a record of a word database object. */
tcwdbout(TCWDB * wdb,int64_t id,const TCLIST * words)215 bool tcwdbout(TCWDB *wdb, int64_t id, const TCLIST *words){
216   assert(wdb && id > 0 && words);
217   if(!tcwdblockmethod(wdb, true)) return false;
218   if(!wdb->open || !wdb->cc){
219     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
220     tcwdbunlockmethod(wdb);
221     return false;
222   }
223   if(tcidsetcheck(wdb->dids, id)){
224     tcwdbunlockmethod(wdb);
225     return true;
226   }
227   if(tcmaprnum(wdb->cc) > 0 && !tcwdbmemsync(wdb, 0)){
228     tcwdbunlockmethod(wdb);
229     return false;
230   }
231   bool rv = tcwdboutimpl(wdb, id, words);
232   tcwdbunlockmethod(wdb);
233   return rv;
234 }
235 
236 
237 /* Remove a record with a text string of a word database object. */
tcwdbout2(TCWDB * wdb,int64_t id,const char * text,const char * delims)238 bool tcwdbout2(TCWDB *wdb, int64_t id, const char *text, const char *delims){
239   assert(wdb && id > 0 && text);
240   TCLIST *words = tcstrsplit(text, delims ? delims : WDBSPCCHARS);
241   bool rv = tcwdbout(wdb, id, words);
242   tclistdel(words);
243   return rv;
244 }
245 
246 
247 /* Search a word database. */
tcwdbsearch(TCWDB * wdb,const char * word,int * np)248 uint64_t *tcwdbsearch(TCWDB *wdb, const char *word, int *np){
249   assert(wdb && word && np);
250   if(!tcwdblockmethod(wdb, false)) return NULL;
251   if(!wdb->open){
252     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
253     tcwdbunlockmethod(wdb);
254     return NULL;
255   }
256   if(wdb->cc && (tcmaprnum(wdb->cc) > 0 || tcmaprnum(wdb->dtokens) > 0)){
257     tcwdbunlockmethod(wdb);
258     if(!tcwdblockmethod(wdb, true)) return NULL;
259     if(!tcwdbmemsync(wdb, 0)){
260       tcwdbunlockmethod(wdb);
261       return NULL;
262     }
263     tcwdbunlockmethod(wdb);
264     if(!tcwdblockmethod(wdb, false)) return NULL;
265   }
266   uint64_t *rv = tcwdbsearchimpl(wdb, word, np);
267   tcwdbunlockmethod(wdb);
268   return rv;
269 }
270 
271 
272 /* Synchronize updated contents of a word database object with the file and the device. */
tcwdbsync(TCWDB * wdb)273 bool tcwdbsync(TCWDB *wdb){
274   assert(wdb);
275   if(!tcwdblockmethod(wdb, true)) return false;
276   if(!wdb->open || !wdb->cc){
277     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
278     tcwdbunlockmethod(wdb);
279     return false;
280   }
281   bool err = false;
282   if(!tcwdbmemsync(wdb, 2)) err = true;
283   tcwdbunlockmethod(wdb);
284   return !err;
285 }
286 
287 
288 /* Optimize the file of a word database object. */
tcwdboptimize(TCWDB * wdb)289 bool tcwdboptimize(TCWDB *wdb){
290   assert(wdb);
291   if(!tcwdblockmethod(wdb, true)) return false;
292   if(!wdb->open || !wdb->cc){
293     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
294     tcwdbunlockmethod(wdb);
295     return false;
296   }
297   bool err = false;
298   if(!tcwdbmemsync(wdb, 1)) err = true;
299   if(!tcbdboptimize(wdb->idx, 0, 0, 0, -1, -1, UINT8_MAX)) err = true;
300   tcwdbunlockmethod(wdb);
301   return !err;
302 }
303 
304 
305 /* Remove all records of a word database object. */
tcwdbvanish(TCWDB * wdb)306 bool tcwdbvanish(TCWDB *wdb){
307   assert(wdb);
308   if(!tcwdblockmethod(wdb, true)) return false;
309   if(!wdb->open || !wdb->cc){
310     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
311     tcwdbunlockmethod(wdb);
312     return false;
313   }
314   bool err = false;
315   tcmapclear(wdb->cc);
316   tcmapclear(wdb->dtokens);
317   if(!tcwdbmemsync(wdb, 1)) err = true;
318   if(!tcbdbvanish(wdb->idx)) err = true;
319   tcwdbunlockmethod(wdb);
320   return !err;
321 }
322 
323 
324 /* Copy the database file of a word database object. */
tcwdbcopy(TCWDB * wdb,const char * path)325 bool tcwdbcopy(TCWDB *wdb, const char *path){
326   assert(wdb && path);
327   if(!tcwdblockmethod(wdb, false)) return false;
328   if(!wdb->open || !wdb->cc){
329     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
330     tcwdbunlockmethod(wdb);
331     return false;
332   }
333   bool err = false;
334   if(!tcwdbmemsync(wdb, 1)) err = true;
335   if(!tcbdbcopy(wdb->idx, path)) err = true;
336   tcwdbunlockmethod(wdb);
337   return !err;
338 }
339 
340 
341 /* Get the file path of a word database object. */
tcwdbpath(TCWDB * wdb)342 const char *tcwdbpath(TCWDB *wdb){
343   assert(wdb);
344   return tcbdbpath(wdb->idx);
345 }
346 
347 
348 /* Get the number of tokens of a word database object. */
tcwdbtnum(TCWDB * wdb)349 uint64_t tcwdbtnum(TCWDB *wdb){
350   assert(wdb);
351   return tcbdbrnum(wdb->idx);
352 }
353 
354 
355 /* Get the size of the database file of a word database object. */
tcwdbfsiz(TCWDB * wdb)356 uint64_t tcwdbfsiz(TCWDB *wdb){
357   assert(wdb);
358   return tcbdbfsiz(wdb->idx);
359 }
360 
361 
362 
363 /*************************************************************************************************
364  * features for experts
365  *************************************************************************************************/
366 
367 
368 /* Set the file descriptor for debugging output. */
tcwdbsetdbgfd(TCWDB * wdb,int fd)369 void tcwdbsetdbgfd(TCWDB *wdb, int fd){
370   assert(wdb && fd >= 0);
371   tcbdbsetdbgfd(wdb->idx, fd);
372 }
373 
374 
375 /* Get the file descriptor for debugging output. */
tcwdbdbgfd(TCWDB * wdb)376 int tcwdbdbgfd(TCWDB *wdb){
377   assert(wdb);
378   return tcbdbdbgfd(wdb->idx);
379 }
380 
381 
382 /* Synchronize updating contents on memory of a word database object. */
tcwdbmemsync(TCWDB * wdb,int level)383 bool tcwdbmemsync(TCWDB *wdb, int level){
384   assert(wdb);
385   if(!wdb->open || !wdb->cc){
386     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
387     return false;
388   }
389   bool err = false;
390   bool (*synccb)(int, int, const char *, void *) = wdb->synccb;
391   void *syncopq = wdb->syncopq;
392   bool (*addcb)(const char *, void *) = wdb->addcb;
393   void *addopq = wdb->addopq;
394   TCBDB *idx = wdb->idx;
395   TCMAP *cc = wdb->cc;
396   if(synccb && !synccb(0, 0, "started", syncopq)){
397     tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
398     return false;
399   }
400   if(tcmaprnum(cc) > 0){
401     if(synccb && !synccb(0, 0, "getting tokens", syncopq)){
402       tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
403       return false;
404     }
405     int kn;
406     const char **keys = tcmapkeys2(cc, &kn);
407     if(synccb && !synccb(kn, 0, "sorting tokens", syncopq)){
408       tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
409       tcfree(keys);
410       return false;
411     }
412     qsort(keys, kn, sizeof(*keys), (int(*)(const void *, const void *))tccmpwords);
413     for(int i = 0; i < kn; i++){
414       if(synccb && !synccb(kn, i + 1, "storing tokens", syncopq)){
415         tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
416         tcfree(keys);
417         return false;
418       }
419       const char *kbuf = keys[i];
420       int ksiz = strlen(kbuf);
421       int vsiz;
422       const char *vbuf = tcmapget(cc, kbuf, ksiz, &vsiz);
423       if(!tcbdbputcat(idx, kbuf, ksiz, vbuf, vsiz)) err = true;
424     }
425     if(addcb){
426       if(synccb && !synccb(0, 0, "storing keyword list", syncopq)){
427         tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
428         tcfree(keys);
429         return false;
430       }
431       for(int i = 0; i < kn; i++){
432         if(!addcb(keys[i], addopq)){
433           tcfree(keys);
434           return false;
435         }
436       }
437     }
438     tcfree(keys);
439     tcmapclear(cc);
440   }
441   TCMAP *dtokens = wdb->dtokens;
442   TCIDSET *dids = wdb->dids;
443   if(tcmaprnum(dtokens) > 0){
444     if(synccb && !synccb(0, 0, "getting deleted tokens", syncopq)){
445       tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
446       return false;
447     }
448     int kn;
449     const char **keys = tcmapkeys2(dtokens, &kn);
450     if(synccb && !synccb(kn, 0, "sorting deleted tokens", syncopq)){
451       tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
452       tcfree(keys);
453       return false;
454     }
455     qsort(keys, kn, sizeof(*keys), (int(*)(const void *, const void *))tccmpwords);
456     for(int i = 0; i < kn; i++){
457       if(synccb && !synccb(kn, i + 1, "storing deleted tokens", syncopq)){
458         tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
459         tcfree(keys);
460         return false;
461       }
462       const char *kbuf = keys[i];
463       int ksiz = strlen(kbuf);
464       int vsiz;
465       const char *vbuf = tcbdbget3(idx, kbuf, ksiz, &vsiz);
466       if(!vbuf) continue;
467       char *nbuf = tcmalloc(vsiz + 1);
468       char *wp = nbuf;
469       const char *pv;
470       while(vsiz > 0){
471         pv = vbuf;
472         int step;
473         uint64_t id;
474         TDREADVNUMBUF64(vbuf, id, step);
475         vbuf += step;
476         vsiz -= step;
477         if(!tcidsetcheck(dids, id)){
478           int len = vbuf - pv;
479           memcpy(wp, pv, len);
480           wp += len;
481         }
482       }
483       int nsiz = wp - nbuf;
484       if(nsiz > 0){
485         if(!tcbdbput(idx, kbuf, ksiz, nbuf, nsiz)) err = true;
486       } else {
487         if(!tcbdbout(idx, kbuf, ksiz)) err = true;
488       }
489       tcfree(nbuf);
490     }
491     tcfree(keys);
492     tcmapclear(dtokens);
493     tcidsetclear(dids);
494   }
495   if(level > 0){
496     if(synccb && !synccb(0, 0, "synchronizing database", syncopq)){
497       tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
498       return false;
499     }
500     if(!tcbdbmemsync(idx, level > 1)) err = true;
501   }
502   if(synccb && !synccb(0, 0, "finished", syncopq)){
503     tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
504     return false;
505   }
506   return !err;
507 }
508 
509 
510 /* Clear the cache of a word database object. */
tcwdbcacheclear(TCWDB * wdb)511 bool tcwdbcacheclear(TCWDB *wdb){
512   assert(wdb);
513   if(!wdb->open){
514     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
515     return false;
516   }
517   return tcbdbcacheclear(wdb->idx);
518 }
519 
520 
521 /* Get the inode number of the database file of a word database object. */
tcwdbinode(TCWDB * wdb)522 uint64_t tcwdbinode(TCWDB *wdb){
523   assert(wdb);
524   return tcbdbinode(wdb->idx);
525 }
526 
527 
528 /* Get the modification time of the database file of a word database object. */
tcwdbmtime(TCWDB * wdb)529 time_t tcwdbmtime(TCWDB *wdb){
530   assert(wdb);
531   return tcbdbmtime(wdb->idx);
532 }
533 
534 
535 /* Get the options of a word database object. */
tcwdbopts(TCWDB * wdb)536 uint8_t tcwdbopts(TCWDB *wdb){
537   assert(wdb);
538   return tcbdbopts(wdb->idx);
539 }
540 
541 
542 /* Get the maximum number of forward matching expansion of a word database object. */
tcwdbfwmmax(TCWDB * wdb)543 uint32_t tcwdbfwmmax(TCWDB *wdb){
544   assert(wdb);
545   return wdb->fwmmax;
546 }
547 
548 
549 /* Get the number of records in the cache of a word database object. */
tcwdbcnum(TCWDB * wdb)550 uint32_t tcwdbcnum(TCWDB *wdb){
551   assert(wdb);
552   if(!wdb->cc) return 0;
553   return tcmaprnum(wdb->cc);
554 }
555 
556 
557 /* Set the callback function for sync progression of a word database object. */
tcwdbsetsynccb(TCWDB * wdb,bool (* cb)(int,int,const char *,void *),void * opq)558 void tcwdbsetsynccb(TCWDB *wdb, bool (*cb)(int, int, const char *, void *), void *opq){
559   assert(wdb);
560   wdb->synccb = cb;
561   wdb->syncopq = opq;
562 }
563 
564 
565 /* Set the callback function for word addition of a word database object. */
tcwdbsetaddcb(TCWDB * wdb,bool (* cb)(const char *,void *),void * opq)566 void tcwdbsetaddcb(TCWDB *wdb, bool (*cb)(const char *, void *), void *opq){
567   assert(wdb);
568   wdb->addcb = cb;
569   wdb->addopq = opq;
570 }
571 
572 
573 
574 /*************************************************************************************************
575  * private features
576  *************************************************************************************************/
577 
578 
579 /* Lock a method of the word database object.
580    `wdb' specifies the word database object.
581    `wr' specifies whether the lock is writer or not.
582    If successful, the return value is true, else, it is false. */
tcwdblockmethod(TCWDB * wdb,bool wr)583 static bool tcwdblockmethod(TCWDB *wdb, bool wr){
584   assert(wdb);
585   if(wr ? pthread_rwlock_wrlock(wdb->mmtx) != 0 : pthread_rwlock_rdlock(wdb->mmtx) != 0){
586     tcbdbsetecode(wdb->idx, TCETHREAD, __FILE__, __LINE__, __func__);
587     return false;
588   }
589   return true;
590 }
591 
592 
593 /* Unlock a method of the word database object.
594    `bdb' specifies the word database object.
595    If successful, the return value is true, else, it is false. */
tcwdbunlockmethod(TCWDB * wdb)596 static bool tcwdbunlockmethod(TCWDB *wdb){
597   assert(wdb);
598   if(pthread_rwlock_unlock(wdb->mmtx) != 0){
599     tcbdbsetecode(wdb->idx, TCETHREAD, __FILE__, __LINE__, __func__);
600     return false;
601   }
602   return true;
603 }
604 
605 
606 /* Open a word database object.
607    `wdb' specifies the word database object.
608    `path' specifies the path of the database file.
609    `omode' specifies the connection mode.
610    If successful, the return value is true, else, it is false. */
tcwdbopenimpl(TCWDB * wdb,const char * path,int omode)611 static bool tcwdbopenimpl(TCWDB *wdb, const char *path, int omode){
612   assert(wdb && path);
613   int bomode = BDBOREADER;
614   if(omode & WDBOWRITER){
615     bomode = BDBOWRITER;
616     if(omode & WDBOCREAT) bomode |= BDBOCREAT;
617     if(omode & WDBOTRUNC) bomode |= BDBOTRUNC;
618     int64_t bnum = (wdb->etnum / WDBLMEMB) * 2 + 1;
619     int bopts = 0;
620     if(wdb->opts & WDBTLARGE) bopts |= BDBTLARGE;
621     if(wdb->opts & WDBTDEFLATE) bopts |= BDBTDEFLATE;
622     if(wdb->opts & WDBTBZIP) bopts |= BDBTBZIP;
623     if(wdb->opts & WDBTTCBS) bopts |= BDBTTCBS;
624     if(!tcbdbtune(wdb->idx, WDBLMEMB, WDBNMEMB, bnum, WDBAPOW, WDBFPOW, bopts)) return false;
625     if(!tcbdbsetlsmax(wdb->idx, WDBLSMAX)) return false;
626   }
627   if(wdb->lcnum > 0){
628     if(!tcbdbsetcache(wdb->idx, wdb->lcnum, wdb->lcnum / 4 + 1)) return false;
629   } else {
630     if(!tcbdbsetcache(wdb->idx, (omode & WDBOWRITER) ? WDBLCNUMW : WDBLCNUMR, WDBNCNUM))
631       return false;
632   }
633   if(omode & WDBONOLCK) bomode |= BDBONOLCK;
634   if(omode & WDBOLCKNB) bomode |= BDBOLCKNB;
635   if(!tcbdbopen(wdb->idx, path, bomode)) return false;
636   if((omode & WDBOWRITER) && tcbdbrnum(wdb->idx) < 1){
637     memcpy(tcbdbopaque(wdb->idx), WDBMAGICDATA, strlen(WDBMAGICDATA));
638   } else if(!(omode & WDBONOLCK) &&
639             memcmp(tcbdbopaque(wdb->idx), WDBMAGICDATA, strlen(WDBMAGICDATA))){
640     tcbdbclose(wdb->idx);
641     tcbdbsetecode(wdb->idx, TCEMETA, __FILE__, __LINE__, __func__);
642     return 0;
643   }
644   if(omode & WDBOWRITER){
645     wdb->cc = tcmapnew2(WDBCCBNUM);
646     wdb->dtokens = tcmapnew2(WDBDTKNBNUM);
647     wdb->dids = tcidsetnew(WDBDIDSBNUM);
648   }
649   wdb->open = true;
650   return true;
651 }
652 
653 
654 /* Close a word database object.
655    `wdb' specifies the word database object.
656    If successful, the return value is true, else, it is false. */
tcwdbcloseimpl(TCWDB * wdb)657 static bool tcwdbcloseimpl(TCWDB *wdb){
658   assert(wdb);
659   bool err = false;
660   if(wdb->cc){
661     if((tcmaprnum(wdb->cc) > 0 || tcmaprnum(wdb->dtokens) > 0) && !tcwdbmemsync(wdb, 0))
662       err = true;
663     tcidsetdel(wdb->dids);
664     tcmapdel(wdb->dtokens);
665     tcmapdel(wdb->cc);
666     wdb->cc = NULL;
667   }
668   if(!tcbdbclose(wdb->idx)) err = true;
669   wdb->open = false;
670   return !err;
671 }
672 
673 
674 /* Store a record into a q-gram database object.
675    `wdb' specifies the q-gram database object.
676    `id' specifies the ID number of the record.
677    `words' specifies a list object contains the words of the record.
678    If successful, the return value is true, else, it is false. */
tcwdbputimpl(TCWDB * wdb,int64_t id,const TCLIST * words)679 static bool tcwdbputimpl(TCWDB *wdb, int64_t id, const TCLIST *words){
680   assert(wdb && id > 0 && words);
681   char idbuf[TDNUMBUFSIZ*2];
682   int idsiz;
683   TDSETVNUMBUF64(idsiz, idbuf, id);
684   TCMAP *cc = wdb->cc;
685   int wn = tclistnum(words);
686   TCMAP *uniq = tcmapnew2(wn + 1);
687   for(int i = 0; i < wn; i++){
688     int wsiz;
689     const char *word = tclistval(words, i, &wsiz);
690     if(!tcmapputkeep(uniq, word, wsiz, "", 0)) continue;
691     if(*word != '\0') tcmapputcat(cc, word, wsiz, idbuf, idsiz);
692   }
693   tcmapdel(uniq);
694   bool err = false;
695   if(tcmapmsiz(cc) >= wdb->icsiz && !tcwdbmemsync(wdb, 1)) err = true;
696   return !err;
697 }
698 
699 
700 /* Remove a record of a q-gram database object.
701    `wdb' specifies the q-gram database object.
702    `id' specifies the ID number of the record.
703    `words' specifies a list object contains the words of the record.
704    If successful, the return value is true, else, it is false. */
tcwdboutimpl(TCWDB * wdb,int64_t id,const TCLIST * words)705 static bool tcwdboutimpl(TCWDB *wdb, int64_t id, const TCLIST *words){
706   assert(wdb && id > 0 && words);
707   char idbuf[TDNUMBUFSIZ*2];
708   int idsiz;
709   TDSETVNUMBUF64(idsiz, idbuf, id);
710   TCMAP *dtokens = wdb->dtokens;
711   int wn = tclistnum(words);
712   for(int i = 0; i < wn; i++){
713     int wsiz;
714     const char *word = tclistval(words, i, &wsiz);
715     if(*word != '\0') tcmapputkeep(dtokens, word, wsiz, "", 0);
716   }
717   tcidsetmark(wdb->dids, id);
718   bool err = false;
719   if(tcmapmsiz(dtokens) >= wdb->icsiz && !tcwdbmemsync(wdb, 1)) err = true;
720   return !err;
721 }
722 
723 
724 /* Search a q-gram database.
725    `wdb' specifies the q-gram database object.
726    `word' specifies the string of the word to be matched to.
727    `np' specifies the pointer to the variable into which the number of elements of the return
728    value is assigned.
729    If successful, the return value is the pointer to an array of ID numbers of the corresponding
730    records. */
tcwdbsearchimpl(TCWDB * wdb,const char * word,int * np)731 static uint64_t *tcwdbsearchimpl(TCWDB *wdb, const char *word, int *np){
732   assert(wdb && word && np);
733   int wlen = strlen(word);
734   if(wlen > WDBMAXWORDLEN){
735     tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
736     return NULL;
737   }
738   int vsiz;
739   const char *vbuf = tcbdbget3(wdb->idx, word, wlen, &vsiz);
740   if(!vbuf){
741     vbuf = "";
742     vsiz = 0;
743   }
744   uint64_t *res = tcmalloc(WDBRESUNIT * sizeof(*res));
745   int rnum = 0;
746   int ranum = WDBRESUNIT;
747   while(vsiz > 0){
748     int step;
749     uint64_t id;
750     TDREADVNUMBUF64(vbuf, id, step);
751     vbuf += step;
752     vsiz -= step;
753     if(rnum >= ranum){
754       ranum *= 2;
755       res = tcrealloc(res, ranum * sizeof(*res));
756     }
757     res[rnum++] = id;
758   }
759   *np = rnum;
760   return res;
761 }
762 
763 
764 /* Compare two list elements in lexical order.
765    `a' specifies the pointer to one element.
766    `b' specifies the pointer to the other element.
767    The return value is positive if the former is big, negative if the latter is big, 0 if both
768    are equivalent. */
tccmpwords(const char ** a,const char ** b)769 static int tccmpwords(const char **a, const char **b){
770   assert(a && b);
771   return strcmp(*a, *b);
772 }
773 
774 
775 
776 // END OF FILE
777