1 /*************************************************************************************************
2  * The simple API of Tokyo Dystopia
3  *                                                               Copyright (C) 2007-2010 FAL Labs
4  * This file is part of Tokyo Dystopia.
5  * Tokyo Dystopia is free software; you can redistribute it and/or modify it under the terms of
6  * the GNU Lesser General Public License as published by the Free Software Foundation; either
7  * version 2.1 of the License or any later version.  Tokyo Dystopia is distributed in the hope
8  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
10  * License for more details.
11  * You should have received a copy of the GNU Lesser General Public License along with Tokyo
12  * Dystopia; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13  * Boston, MA 02111-1307 USA.
14  *************************************************************************************************/
15 
16 
17 #include "laputa.h"
18 #include "myconf.h"
19 
20 #define JDBDIRMODE     00755             // permission of created directories
21 #define JDBIOBUFSIZ    65536             // size of an I/O buffer
22 #define JDBDEFFWMMAX   2048              // default maximum number forward matching expansion
23 #define JDBTXDBNAME    "laputa.tch"      // name of the text database
24 #define JDBLSDBNAME    "list.tcb"        // name of the word list database
25 #define JDBTXDBMAGIC   0x4a              // magic data for identification
26 #define JDBAVGWSIZ     16                // average size of words
27 
28 #define JDBDEFERNUM    1000000           // default expected record number
29 #define JDBDEFETNUM    1000000           // default expected token number
30 #define JDBDEFIUSIZ    (1024LL*1024*512) // default expected token number
31 #define JDBTXBNUMCO    2                 // coefficient of the bucket number
32 #define JDBTXAPOW      3                 // alignment power of the text database
33 #define JDBTXFPOW      10                // free block pool power of the text database
34 #define JDBLSLMEMB     256               // alignment power of the text database
35 #define JDBLSNMEMB     256               // free block pool power of the text database
36 
37 
38 /* private function prototypes */
39 static bool tcjdblockmethod(TCJDB *jdb, bool wr);
40 static bool tcjdbunlockmethod(TCJDB *jdb);
41 static bool tcjdbsynccb(int total, int current, const char *msg, TCJDB *jdb);
42 static bool tcjdbaddcb(const char *word, TCJDB *jdb);
43 static bool tcjdbopenimpl(TCJDB *jdb, const char *path, int omode);
44 static bool tcjdbcloseimpl(TCJDB *jdb);
45 static bool tcjdbputimpl(TCJDB *jdb, int64_t id, const TCLIST *words);
46 static bool tcjdboutimpl(TCJDB *jdb, int64_t id);
47 static char *tcjdbgetimpl(TCJDB *jdb, int64_t id);
48 static uint64_t *tcjdbsearchimpl(TCJDB *jdb, const char *word, int smode, int *np);
49 static uint64_t *tcjdbsearchword(TCJDB *jdb, const char *word, int *np);
50 static uint64_t *tcjdbsearchtoken(TCJDB *jdb, const char *token, int *np);
51 static bool tcjdboptimizeimpl(TCJDB *jdb);
52 static bool tcjdbvanishimpl(TCJDB *jdb);
53 static bool tcjdbcopyimpl(TCJDB *jdb, const char *path);
54 
55 
56 
57 /*************************************************************************************************
58  * API
59  *************************************************************************************************/
60 
61 
62 /* Get the message string corresponding to an error code. */
tcjdberrmsg(int ecode)63 const char *tcjdberrmsg(int ecode){
64   return tchdberrmsg(ecode);
65 }
66 
67 
68 /* Create a tagged database object. */
tcjdbnew(void)69 TCJDB *tcjdbnew(void){
70   TCJDB *jdb = tcmalloc(sizeof(*jdb));
71   jdb->mmtx = tcmalloc(sizeof(pthread_rwlock_t));
72   if(pthread_rwlock_init(jdb->mmtx, NULL) != 0) tcmyfatal("pthread_rwlock_init failed");
73   jdb->txdb = tchdbnew();
74   if(!tchdbsetmutex(jdb->txdb)) tcmyfatal("tchdbsetmutex failed");
75   jdb->lsdb = tcbdbnew();
76   TCWDB **idxs = jdb->idxs;
77   for(int i = 0; i < JDBWDBMAX; i++){
78     idxs[i] = tcwdbnew();
79     tcwdbsetsynccb(idxs[i], (bool (*)(int, int, const char *, void *))tcjdbsynccb, jdb);
80     tcwdbsetaddcb(idxs[i], (bool (*)(const char *, void *))tcjdbaddcb, jdb);
81   }
82   jdb->inum = 0;
83   jdb->cnum = 0;
84   jdb->path = NULL;
85   jdb->wmode = false;
86   jdb->wopts = 0;
87   jdb->womode = 0;
88   jdb->ernum = JDBDEFERNUM;
89   jdb->etnum = JDBDEFETNUM;
90   jdb->iusiz = JDBDEFIUSIZ;
91   jdb->opts = 0;
92   jdb->synccb = NULL;
93   jdb->syncopq = NULL;
94   jdb->exopts = 0;
95   return jdb;
96 }
97 
98 
99 /* Delete a tagged database object. */
tcjdbdel(TCJDB * jdb)100 void tcjdbdel(TCJDB *jdb){
101   assert(jdb);
102   if(jdb->path) tcjdbclose(jdb);
103   TCWDB **idxs = jdb->idxs;
104   for(int i = JDBWDBMAX - 1; i >= 0; i--){
105     tcwdbdel(idxs[i]);
106   }
107   tcbdbdel(jdb->lsdb);
108   tchdbdel(jdb->txdb);
109   pthread_rwlock_destroy(jdb->mmtx);
110   tcfree(jdb->mmtx);
111   tcfree(jdb);
112 }
113 
114 
115 /* Get the last happened error code of a tagged database object. */
tcjdbecode(TCJDB * jdb)116 int tcjdbecode(TCJDB *jdb){
117   assert(jdb);
118   return tchdbecode(jdb->txdb);
119 }
120 
121 
122 /* Set the tuning parameters of a tagged database object. */
tcjdbtune(TCJDB * jdb,int64_t ernum,int64_t etnum,int64_t iusiz,uint8_t opts)123 bool tcjdbtune(TCJDB *jdb, int64_t ernum, int64_t etnum, int64_t iusiz, uint8_t opts){
124   assert(jdb);
125   if(!tcjdblockmethod(jdb, true)) return false;
126   if(jdb->path){
127     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
128     tcjdbunlockmethod(jdb);
129     return false;
130   }
131   jdb->ernum = (ernum > 0) ? ernum : JDBDEFERNUM;
132   jdb->etnum = (etnum > 0) ? etnum : JDBDEFETNUM;
133   jdb->iusiz = (iusiz > 0) ? iusiz : JDBDEFIUSIZ;
134   jdb->opts = opts;
135   tcjdbunlockmethod(jdb);
136   return true;
137 }
138 
139 
140 /* Set the caching parameters of a tagged database object. */
tcjdbsetcache(TCJDB * jdb,int64_t icsiz,int32_t lcnum)141 bool tcjdbsetcache(TCJDB *jdb, int64_t icsiz, int32_t lcnum){
142   assert(jdb);
143   if(!tcjdblockmethod(jdb, true)) return false;
144   if(jdb->path){
145     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
146     tcjdbunlockmethod(jdb);
147     return false;
148   }
149   TCWDB **idxs = jdb->idxs;
150   for(int i = 0; i < JDBWDBMAX; i++){
151     tcwdbsetcache(idxs[i], icsiz, lcnum);
152   }
153   tcjdbunlockmethod(jdb);
154   return true;
155 }
156 
157 
158 /* Set the maximum number of forward matching expansion of a tagged database object. */
tcjdbsetfwmmax(TCJDB * jdb,uint32_t fwmmax)159 bool tcjdbsetfwmmax(TCJDB *jdb, uint32_t fwmmax){
160   assert(jdb);
161   if(!tcjdblockmethod(jdb, true)) return false;
162   if(jdb->path){
163     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
164     tcjdbunlockmethod(jdb);
165     return false;
166   }
167   TCWDB **idxs = jdb->idxs;
168   for(int i = 0; i < JDBWDBMAX; i++){
169     tcwdbsetfwmmax(idxs[i], fwmmax);
170   }
171   tcjdbunlockmethod(jdb);
172   return true;
173 }
174 
175 
176 /* Open a tagged database object. */
tcjdbopen(TCJDB * jdb,const char * path,int omode)177 bool tcjdbopen(TCJDB *jdb, const char *path, int omode){
178   assert(jdb && path);
179   if(!tcjdblockmethod(jdb, true)) return false;
180   if(jdb->path){
181     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
182     tcjdbunlockmethod(jdb);
183     return false;
184   }
185   bool rv = tcjdbopenimpl(jdb, path, omode);
186   tcjdbunlockmethod(jdb);
187   return rv;
188 }
189 
190 
191 /* Close a tagged database object. */
tcjdbclose(TCJDB * jdb)192 bool tcjdbclose(TCJDB *jdb){
193   assert(jdb);
194   if(!tcjdblockmethod(jdb, true)) return false;
195   if(!jdb->path){
196     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
197     tcjdbunlockmethod(jdb);
198     return false;
199   }
200   bool rv = tcjdbcloseimpl(jdb);
201   tcjdbunlockmethod(jdb);
202   return rv;
203 }
204 
205 
206 /* Store a record into a tagged database object. */
tcjdbput(TCJDB * jdb,int64_t id,const TCLIST * words)207 bool tcjdbput(TCJDB *jdb, int64_t id, const TCLIST *words){
208   assert(jdb && id > 0 && words);
209   if(!tcjdblockmethod(jdb, true)) return false;
210   if(!jdb->path || !jdb->wmode){
211     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
212     tcjdbunlockmethod(jdb);
213     return false;
214   }
215   bool rv = tcjdbputimpl(jdb, id, words);
216   tcjdbunlockmethod(jdb);
217   return rv;
218 }
219 
220 
221 /* Store a record with a text string into a tagged database object. */
tcjdbput2(TCJDB * jdb,int64_t id,const char * text,const char * delims)222 bool tcjdbput2(TCJDB *jdb, int64_t id, const char *text, const char *delims){
223   assert(jdb && id > 0 && text);
224   TCLIST *words = tcstrsplit(text, delims ? delims : WDBSPCCHARS);
225   bool rv = tcjdbput(jdb, id, words);
226   tclistdel(words);
227   return rv;
228 }
229 
230 
231 /* Remove a record of a tagged database object. */
tcjdbout(TCJDB * jdb,int64_t id)232 bool tcjdbout(TCJDB *jdb, int64_t id){
233   assert(jdb && id > 0);
234   if(!tcjdblockmethod(jdb, true)) return false;
235   if(!jdb->path || !jdb->wmode){
236     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
237     tcjdbunlockmethod(jdb);
238     return false;
239   }
240   bool rv = tcjdboutimpl(jdb, id);
241   tcjdbunlockmethod(jdb);
242   return rv;
243 }
244 
245 
246 /* Retrieve a record of a tagged database object. */
tcjdbget(TCJDB * jdb,int64_t id)247 TCLIST *tcjdbget(TCJDB *jdb, int64_t id){
248   assert(jdb && id > 0);
249   char *text = tcjdbget2(jdb, id);
250   if(!text) return NULL;
251   TCLIST *words = tcstrsplit(text, "\t");
252   tcfree(text);
253   return words;
254 }
255 
256 
257 /* Retrieve a record as a string of a tagged database object. */
tcjdbget2(TCJDB * jdb,int64_t id)258 char *tcjdbget2(TCJDB *jdb, int64_t id){
259   assert(jdb && id > 0);
260   if(!tcjdblockmethod(jdb, false)) return false;
261   if(!jdb->path){
262     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
263     tcjdbunlockmethod(jdb);
264     return NULL;
265   }
266   char *rv = tcjdbgetimpl(jdb, id);
267   tcjdbunlockmethod(jdb);
268   return rv;
269 }
270 
271 
272 /* Search a tagged database. */
tcjdbsearch(TCJDB * jdb,const char * word,int smode,int * np)273 uint64_t *tcjdbsearch(TCJDB *jdb, const char *word, int smode, int *np){
274   assert(jdb && word && np);
275   if(!tcjdblockmethod(jdb, false)) return false;
276   if(!jdb->path){
277     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
278     tcjdbunlockmethod(jdb);
279     return false;
280   }
281   char *nword = tcstrdup(word);
282   tctextnormalize(nword, TCTNLOWER | TCTNNOACC | TCTNSPACE);
283   uint64_t *rv = tcjdbsearchimpl(jdb, nword, smode, np);
284   tcfree(nword);
285   tcjdbunlockmethod(jdb);
286   return rv;
287 }
288 
289 
290 /* Search a tagged database with a compound expression. */
tcjdbsearch2(TCJDB * jdb,const char * expr,int * np)291 uint64_t *tcjdbsearch2(TCJDB *jdb, const char *expr, int *np){
292   assert(jdb && expr && np);
293   TCLIST *terms = tclistnew();
294   char *nexpr = tcstrdup(expr);
295   tctextnormalize(nexpr, TCTNSPACE);
296   const char *rp = nexpr;
297   while(*rp != '\0'){
298     if(*rp == ' '){
299       rp++;
300       while(*rp == ' '){
301         rp++;
302       }
303     } else if(*rp == '"'){
304       const char *pv = rp;
305       rp++;
306       while(*rp != '\0' && !(*rp == '"' && *(++rp) != '"')){
307         rp++;
308       }
309       if(*rp == '"') rp++;
310       tclistpush(terms, pv, rp - pv);
311     } else if(rp[0] == '[' && rp[1] == '['){
312       const char *pv = rp;
313       rp += 2;
314       while(*rp != '\0' && !(rp[0] == ']' && rp[1] == ']')){
315         rp++;
316       }
317       if(rp[0] == ']' && rp[1] == ']') rp += 2;
318       tclistpush(terms, pv, rp - pv);
319     } else {
320       const char *pv = rp;
321       rp++;
322       while(*rp != '\0' && *rp != ' ' && *rp != '"'){
323         rp++;
324       }
325       tclistpush(terms, pv, rp - pv);
326     }
327   }
328   tcfree(nexpr);
329   int tnum = tclistnum(terms);
330   if(tnum < 1){
331     tclistdel(terms);
332     *np = 0;
333     return tcmalloc(1);
334   }
335   if(tnum == 1){
336     uint64_t *res = tcjdbsearchtoken(jdb, tclistval2(terms, 0), np);
337     tclistdel(terms);
338     return res;
339   }
340   QDBRSET *rsets = tcmalloc(tnum * sizeof(*rsets));
341   int rsnum = 0;
342   bool sign = true;
343   int ti = 0;
344   while(ti < tnum){
345     const char *term = tclistval2(terms, ti);
346     if(!strcmp(term, "&&") || !strcmp(term, "||")){
347       sign = true;
348     } else if(!strcmp(term, "!!")){
349       sign = false;
350     } else {
351       rsets[rsnum].ids = tcjdbsearchtoken(jdb, term, &rsets[rsnum].num);
352       int rsover = 0;
353       while(ti + 2 < tnum && !strcmp(tclistval2(terms, ti + 1), "||")){
354         rsover++;
355         int ri = rsnum + rsover;
356         rsets[ri].ids = tcjdbsearchtoken(jdb, tclistval2(terms, ti + 2), &rsets[ri].num);
357         ti += 2;
358       }
359       if(rsover > 0){
360         int rnum;
361         uint64_t *res = tcqdbresunion(rsets + rsnum, rsover + 1, &rnum);
362         for(int i = 0; i <= rsover; i++){
363           tcfree(rsets[rsnum+i].ids);
364         }
365         rsets[rsnum].ids = res;
366         rsets[rsnum].num = rnum;
367       }
368       if(!sign) rsets[rsnum].num *= -1;
369       rsnum++;
370       sign = true;
371     }
372     ti++;
373   }
374   uint64_t *res;
375   int rnum;
376   while(rsnum > 1){
377     if(rsets[0].num < 0) rsets[0].num = 0;
378     int unum = 0;
379     for(int i = 1; i < rsnum; i++){
380       if(rsets[i].num < 0) break;
381       unum++;
382     }
383     if(unum > 0){
384       res = tcqdbresisect(rsets, unum + 1, &rnum);
385       for(int i = 0; i <= unum; i++){
386         tcfree(rsets[i].ids);
387       }
388       rsets[0].ids = res;
389       rsets[0].num = rnum;
390       memmove(rsets + 1, rsets + unum + 1, (rsnum - unum - 1) * sizeof(*rsets));
391       rsnum -= unum;
392     }
393     if(rsnum > 1){
394       unum = 0;
395       for(int i = 1; i < rsnum; i++){
396         if(rsets[i].num >= 0) break;
397         rsets[i].num *= -1;
398         unum++;
399       }
400       if(unum > 0){
401         res = tcqdbresdiff(rsets, unum + 1, &rnum);
402         for(int i = 0; i <= unum; i++){
403           tcfree(rsets[i].ids);
404         }
405         rsets[0].ids = res;
406         rsets[0].num = rnum;
407         memmove(rsets + 1, rsets + unum + 1, (rsnum - unum - 1) * sizeof(*rsets));
408         rsnum -= unum;
409       }
410     }
411   }
412   if(rsnum < 1){
413     res = tcmalloc(1);
414     rnum = 0;
415   } else {
416     if(!rsets[0].ids || rsets[0].num < 0) rsets[0].num = 0;
417     res = rsets[0].ids;
418     rnum = rsets[0].num;
419     rsnum--;
420   }
421   for(int i = 0; i < rsnum; i++){
422     tcfree(rsets[i].ids);
423   }
424   tcfree(rsets);
425   tclistdel(terms);
426   *np = rnum;
427   return res;
428 }
429 
430 
431 /* Initialize the iterator of a tagged database object. */
tcjdbiterinit(TCJDB * jdb)432 bool tcjdbiterinit(TCJDB *jdb){
433   assert(jdb);
434   if(!tcjdblockmethod(jdb, true)) return false;
435   if(!jdb->path){
436     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
437     tcjdbunlockmethod(jdb);
438     return false;
439   }
440   bool rv = tchdbiterinit(jdb->txdb);
441   tcjdbunlockmethod(jdb);
442   return rv;
443 }
444 
445 
446 /* Get the next ID number of the iterator of a tagged database object. */
tcjdbiternext(TCJDB * jdb)447 uint64_t tcjdbiternext(TCJDB *jdb){
448   assert(jdb);
449   if(!tcjdblockmethod(jdb, true)) return false;
450   if(!jdb->path){
451     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
452     tcjdbunlockmethod(jdb);
453     return false;
454   }
455   uint64_t rv = 0;
456   int vsiz;
457   char *vbuf = tchdbiternext(jdb->txdb, &vsiz);
458   if(vbuf){
459     TDREADVNUMBUF64(vbuf, rv, vsiz);
460     tcfree(vbuf);
461   }
462   tcjdbunlockmethod(jdb);
463   return rv;
464 }
465 
466 
467 /* Synchronize updated contents of a tagged database object with the files and the device. */
tcjdbsync(TCJDB * jdb)468 bool tcjdbsync(TCJDB *jdb){
469   assert(jdb);
470   if(!tcjdblockmethod(jdb, true)) return false;
471   if(!jdb->path || !jdb->wmode){
472     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
473     tcjdbunlockmethod(jdb);
474     return false;
475   }
476   bool rv = tcjdbmemsync(jdb, 2);
477   tcjdbunlockmethod(jdb);
478   return rv;
479 }
480 
481 
482 /* Optimize the files of a tagged database object. */
tcjdboptimize(TCJDB * jdb)483 bool tcjdboptimize(TCJDB *jdb){
484   assert(jdb);
485   if(!tcjdblockmethod(jdb, true)) return false;
486   if(!jdb->path || !jdb->wmode){
487     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
488     tcjdbunlockmethod(jdb);
489     return false;
490   }
491   bool rv = tcjdboptimizeimpl(jdb);
492   tcjdbunlockmethod(jdb);
493   return rv;
494 }
495 
496 
497 /* Remove all records of a tagged database object. */
tcjdbvanish(TCJDB * jdb)498 bool tcjdbvanish(TCJDB *jdb){
499   assert(jdb);
500   if(!tcjdblockmethod(jdb, true)) return false;
501   if(!jdb->path || !jdb->wmode){
502     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
503     tcjdbunlockmethod(jdb);
504     return false;
505   }
506   bool rv = tcjdbvanishimpl(jdb);
507   tcjdbunlockmethod(jdb);
508   return rv;
509 }
510 
511 
512 /* Copy the database directory of a tagged database object. */
tcjdbcopy(TCJDB * jdb,const char * path)513 bool tcjdbcopy(TCJDB *jdb, const char *path){
514   assert(jdb);
515   if(!tcjdblockmethod(jdb, false)) return false;
516   if(!jdb->path || !jdb->wmode){
517     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
518     tcjdbunlockmethod(jdb);
519     return false;
520   }
521   bool rv = tcjdbcopyimpl(jdb, path);
522   tcjdbunlockmethod(jdb);
523   return rv;
524 }
525 
526 
527 /* Get the directory path of a tagged database object. */
tcjdbpath(TCJDB * jdb)528 const char *tcjdbpath(TCJDB *jdb){
529   assert(jdb);
530   return jdb->path;
531 }
532 
533 
534 /* Get the number of records of a tagged database object. */
tcjdbrnum(TCJDB * jdb)535 uint64_t tcjdbrnum(TCJDB *jdb){
536   assert(jdb);
537   if(!tcjdblockmethod(jdb, false)) return false;
538   if(!jdb->path){
539     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
540     tcjdbunlockmethod(jdb);
541     return 0;
542   }
543   uint64_t rv = tchdbrnum(jdb->txdb);
544   tcjdbunlockmethod(jdb);
545   return rv;
546 }
547 
548 
549 /* Get the total size of the database files of a tagged database object. */
tcjdbfsiz(TCJDB * jdb)550 uint64_t tcjdbfsiz(TCJDB *jdb){
551   assert(jdb);
552   if(!tcjdblockmethod(jdb, false)) return false;
553   if(!jdb->path){
554     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
555     tcjdbunlockmethod(jdb);
556     return 0;
557   }
558   uint64_t rv = tchdbfsiz(jdb->txdb);
559   TCWDB **idxs = jdb->idxs;
560   uint8_t inum = jdb->inum;
561   for(int i = 0; i < inum; i++){
562     rv += tcwdbfsiz(idxs[i]);
563   }
564   tcjdbunlockmethod(jdb);
565   return rv;
566 }
567 
568 
569 
570 /*************************************************************************************************
571  * features for experts
572  *************************************************************************************************/
573 
574 
575 /* Set the file descriptor for debugging output. */
tcjdbsetdbgfd(TCJDB * jdb,int fd)576 void tcjdbsetdbgfd(TCJDB *jdb, int fd){
577   assert(jdb);
578   tchdbsetdbgfd(jdb->txdb, fd);
579   TCWDB **idxs = jdb->idxs;
580   for(int i = 0; i < JDBWDBMAX; i++){
581     tcwdbsetdbgfd(idxs[i], fd);
582   }
583 }
584 
585 
586 /* Get the file descriptor for debugging output. */
tcjdbdbgfd(TCJDB * jdb)587 int tcjdbdbgfd(TCJDB *jdb){
588   assert(jdb);
589   return tchdbdbgfd(jdb->txdb);
590 }
591 
592 
593 /* Synchronize updating contents on memory of a tagged database object. */
tcjdbmemsync(TCJDB * jdb,int level)594 bool tcjdbmemsync(TCJDB *jdb, int level){
595   assert(jdb);
596   if(!jdb->path || !jdb->wmode){
597     tchdbsetecode(jdb->txdb, TCEINVALID, __FILE__, __LINE__, __func__);
598     return false;
599   }
600   TCHDB *txdb = jdb->txdb;
601   TCBDB *lsdb = jdb->lsdb;
602   TCWDB **idxs = jdb->idxs;
603   uint8_t inum = jdb->inum;
604   char *txopq = tchdbopaque(txdb);
605   *(uint8_t *)(txopq + sizeof(uint8_t)) = inum;
606   bool err = false;
607   if(!tchdbmemsync(txdb, false)) err = true;
608   if(!tcbdbmemsync(lsdb, false)) err = true;
609   for(int i = 0; i < inum; i++){
610     if(!tcwdbmemsync(idxs[i], level)){
611       tchdbsetecode(txdb, tcwdbecode(idxs[i]), __FILE__, __LINE__, __func__);
612       err = true;
613     }
614   }
615   return !err;
616 }
617 
618 
619 /* Get the inode number of the database file of a tagged database object. */
tcjdbinode(TCJDB * jdb)620 uint64_t tcjdbinode(TCJDB *jdb){
621   assert(jdb);
622   return tchdbinode(jdb->txdb);
623 }
624 
625 
626 /* Get the modification time of the database file of a tagged database object. */
tcjdbmtime(TCJDB * jdb)627 time_t tcjdbmtime(TCJDB *jdb){
628   assert(jdb);
629   return tchdbmtime(jdb->txdb);
630 }
631 
632 
633 /* Get the options of a tagged database object. */
tcjdbopts(TCJDB * jdb)634 uint8_t tcjdbopts(TCJDB *jdb){
635   assert(jdb);
636   return jdb->opts;
637 }
638 
639 
640 /* Set the callback function for sync progression of a tagged database object. */
tcjdbsetsynccb(TCJDB * jdb,bool (* cb)(int,int,const char *,void *),void * opq)641 void tcjdbsetsynccb(TCJDB *jdb, bool (*cb)(int, int, const char *, void *), void *opq){
642   assert(jdb);
643   jdb->synccb = cb;
644   jdb->syncopq = opq;
645 }
646 
647 
648 /* Set the expert options of a tagged database object. */
tcjdbsetexopts(TCJDB * jdb,uint32_t exopts)649 void tcjdbsetexopts(TCJDB *jdb, uint32_t exopts){
650   assert(jdb);
651   jdb->exopts = exopts;
652 }
653 
654 
655 
656 /*************************************************************************************************
657  * private features
658  *************************************************************************************************/
659 
660 
661 /* Lock a method of the tagged database object.
662    `jdb' specifies the tagged database object.
663    `wr' specifies whether the lock is writer or not.
664    If successful, the return value is true, else, it is false. */
tcjdblockmethod(TCJDB * jdb,bool wr)665 static bool tcjdblockmethod(TCJDB *jdb, bool wr){
666   assert(jdb);
667   if(wr ? pthread_rwlock_wrlock(jdb->mmtx) != 0 : pthread_rwlock_rdlock(jdb->mmtx) != 0){
668     tchdbsetecode(jdb->txdb, TCETHREAD, __FILE__, __LINE__, __func__);
669     return false;
670   }
671   return true;
672 }
673 
674 
675 /* Unlock a method of the tagged database object.
676    `bdb' specifies the tagged database object.
677    If successful, the return value is true, else, it is false. */
tcjdbunlockmethod(TCJDB * jdb)678 static bool tcjdbunlockmethod(TCJDB *jdb){
679   assert(jdb);
680   if(pthread_rwlock_unlock(jdb->mmtx) != 0){
681     tchdbsetecode(jdb->txdb, TCETHREAD, __FILE__, __LINE__, __func__);
682     return false;
683   }
684   return true;
685 }
686 
687 
688 /* Call the callback for sync progression.
689    `total' specifies the number of tokens to be synchronized.
690    `current' specifies the number of processed tokens.
691    `msg' specifies the message string.
692    `jdb' specifies the tagged database object.
693    The return value is true usually, or false if the operation should be terminated. */
tcjdbsynccb(int total,int current,const char * msg,TCJDB * jdb)694 static bool tcjdbsynccb(int total, int current, const char *msg, TCJDB *jdb){
695   assert(msg && jdb);
696   bool rv = jdb->synccb ? jdb->synccb(total, current, msg, jdb->syncopq) : true;
697   if((total|current) == 0 && !strcmp(msg, QDBSYNCMSGL) &&
698      tcwdbfsiz(jdb->idxs[jdb->cnum]) >= jdb->iusiz && jdb->inum > 0){
699     TCWDB **idxs = jdb->idxs;
700     if(jdb->synccb && !jdb->synccb(total, current, "to be cycled", jdb->syncopq)) rv = false;
701     if(!tcwdbcacheclear(jdb->idxs[jdb->cnum])){
702       tchdbsetecode(jdb->txdb, tcwdbecode(jdb->idxs[jdb->cnum]), __FILE__, __LINE__, __func__);
703       rv = false;
704     }
705     int inum = jdb->inum;
706     jdb->cnum = 0;
707     uint64_t min = UINT64_MAX;
708     for(int i = 0; i < inum; i++){
709       uint64_t fsiz = tcwdbfsiz(idxs[i]);
710       if(fsiz < min){
711         jdb->cnum = i;
712         min = fsiz;
713       }
714     }
715     if(min > jdb->iusiz && inum < JDBWDBMAX) jdb->cnum = inum;
716   }
717   return rv;
718 }
719 
720 
721 /* Call the callback for word addition.
722    `word' specifies the word.
723    `jdb' specifies the tagged database object.
724    The return value is true usually, or false if the operation should be terminated. */
tcjdbaddcb(const char * word,TCJDB * jdb)725 static bool tcjdbaddcb(const char *word, TCJDB *jdb){
726   assert(word && jdb);
727   tcbdbputkeep(jdb->lsdb, word, strlen(word), "", 0);
728   return true;
729 }
730 
731 
732 /* Open a tagged database object.
733    `jdb' specifies the tagged database object.
734    `path' specifies the path of the database file.
735    `omode' specifies the connection mode.
736    If successful, the return value is true, else, it is false. */
tcjdbopenimpl(TCJDB * jdb,const char * path,int omode)737 static bool tcjdbopenimpl(TCJDB *jdb, const char *path, int omode){
738   assert(jdb && path);
739   char pbuf[strlen(path)+TDNUMBUFSIZ];
740   if(omode & JDBOWRITER){
741     if(omode & JDBOCREAT){
742       if(mkdir(path, JDBDIRMODE) == -1 && errno != EEXIST){
743         int ecode = TCEMKDIR;
744         switch(errno){
745           case EACCES: ecode = TCENOPERM; break;
746           case ENOENT: ecode = TCENOFILE; break;
747         }
748         tchdbsetecode(jdb->txdb, ecode, __FILE__, __LINE__, __func__);
749         return false;
750       }
751     }
752     if(omode & JDBOTRUNC){
753       sprintf(pbuf, "%s%c%s", path, MYPATHCHR, JDBTXDBNAME);
754       if(unlink(pbuf) == -1 && errno != ENOENT){
755         tchdbsetecode(jdb->txdb, TCEUNLINK, __FILE__, __LINE__, __func__);
756         return false;
757       }
758       sprintf(pbuf, "%s%c%s", path, MYPATHCHR, JDBLSDBNAME);
759       if(unlink(pbuf) == -1 && errno != ENOENT){
760         tchdbsetecode(jdb->txdb, TCEUNLINK, __FILE__, __LINE__, __func__);
761         return false;
762       }
763       for(int i = 0; i < JDBWDBMAX; i++){
764         sprintf(pbuf, "%s%c%04d", path, MYPATHCHR, i + 1);
765         if(unlink(pbuf) == -1 && errno != ENOENT){
766           tchdbsetecode(jdb->txdb, TCEUNLINK, __FILE__, __LINE__, __func__);
767           return false;
768         }
769       }
770     }
771   }
772   struct stat sbuf;
773   if(stat(path, &sbuf) == -1){
774     int ecode = TCEOPEN;
775     switch(errno){
776       case EACCES: ecode = TCENOPERM; break;
777       case ENOENT: ecode = TCENOFILE; break;
778     }
779     tchdbsetecode(jdb->txdb, ecode, __FILE__, __LINE__, __func__);
780     return false;
781   }
782   if(!S_ISDIR(sbuf.st_mode)){
783     tchdbsetecode(jdb->txdb, TCEMISC, __FILE__, __LINE__, __func__);
784     return false;
785   }
786   TCHDB *txdb = jdb->txdb;
787   TCBDB *lsdb = jdb->lsdb;
788   TCWDB **idxs = jdb->idxs;
789   int homode = HDBOREADER;
790   uint8_t hopts = 0;
791   int bomode = BDBOREADER;
792   uint8_t bopts = 0;
793   int womode = WDBOREADER;
794   uint8_t wopts = 0;
795   int64_t etnum = jdb->etnum;
796   int64_t iusiz = jdb->iusiz;
797   if(omode & JDBOWRITER){
798     homode = HDBOWRITER;
799     bomode = BDBOWRITER;
800     womode = WDBOWRITER;
801     if(omode & JDBOCREAT){
802       homode |= HDBOCREAT;
803       bomode |= BDBOCREAT;
804       womode |= WDBOCREAT;
805     }
806     if(omode & JDBOTRUNC){
807       homode |= HDBOTRUNC;
808       bomode |= BDBOTRUNC;
809       womode |= WDBOTRUNC;
810     }
811     int64_t bnum = jdb->ernum * JDBTXBNUMCO + 1;
812     if(jdb->opts & JDBTLARGE){
813       hopts |= HDBTLARGE;
814       bopts |= BDBTLARGE;
815       wopts |= WDBTLARGE;
816     }
817     if(jdb->opts & JDBTDEFLATE) wopts |= WDBTDEFLATE;
818     if(jdb->opts & JDBTBZIP) wopts |= WDBTBZIP;
819     if(jdb->opts & JDBTTCBS){
820       hopts |= HDBTTCBS;
821       bopts |= BDBTTCBS;
822       wopts |= WDBTTCBS;
823     }
824     if(jdb->exopts & JDBXNOTXT){
825       if(!tchdbtune(txdb, 1, 0, 0, 0)) return false;
826     } else {
827       if(!tchdbtune(txdb, bnum, JDBTXAPOW, JDBTXFPOW, hopts)) return false;
828     }
829     if(!tcbdbtune(lsdb, JDBLSLMEMB, JDBLSNMEMB, (jdb->etnum / JDBLSLMEMB) * 4, -1, -1, bopts)){
830       tchdbsetecode(txdb, tcbdbecode(lsdb), __FILE__, __LINE__, __func__);
831       return false;
832     }
833   }
834   if(omode & JDBONOLCK){
835     homode |= HDBONOLCK;
836     bomode |= BDBONOLCK;
837     womode |= WDBONOLCK;
838   }
839   if(omode & JDBOLCKNB){
840     homode |= HDBOLCKNB;
841     bomode |= BDBOLCKNB;
842     womode |= WDBOLCKNB;
843   }
844   sprintf(pbuf, "%s%c%s", path, MYPATHCHR, JDBTXDBNAME);
845   if(!tchdbopen(txdb, pbuf, homode)) return false;
846   char *txopq = tchdbopaque(txdb);
847   uint8_t magic = *(uint8_t *)txopq;
848   if(magic == 0 && (omode & JDBOWRITER)){
849     *(uint8_t *)txopq = JDBTXDBMAGIC;
850     *(uint8_t *)(txopq + sizeof(magic) + sizeof(uint8_t)) = wopts;
851     uint64_t llnum = TDHTOILL(etnum);
852     memcpy(txopq + sizeof(magic) + sizeof(uint8_t) + sizeof(wopts), &llnum, sizeof(llnum));
853     llnum = TDHTOILL(iusiz);
854     memcpy(txopq + sizeof(magic) + sizeof(uint8_t) + sizeof(wopts) + sizeof(llnum),
855            &llnum, sizeof(llnum));
856   } else {
857     wopts = *(uint8_t *)(txopq + sizeof(magic) + sizeof(uint8_t));
858     memcpy(&etnum, txopq + sizeof(magic) + sizeof(uint8_t) + sizeof(wopts), sizeof(etnum));
859     etnum = TDITOHLL(etnum);
860     memcpy(&iusiz, txopq + sizeof(magic) + sizeof(uint8_t) + sizeof(wopts) + sizeof(etnum),
861            sizeof(iusiz));
862     iusiz = TDITOHLL(iusiz);
863   }
864   sprintf(pbuf, "%s%c%s", path, MYPATHCHR, JDBLSDBNAME);
865   if(!tcbdbopen(lsdb, pbuf, bomode)) return false;
866   if(omode & JDBOWRITER){
867     for(int i = 0; i < JDBWDBMAX; i++){
868       if(!tcwdbtune(idxs[i], etnum, wopts)){
869         tchdbsetecode(txdb, tcwdbecode(idxs[i]), __FILE__, __LINE__, __func__);
870         return false;
871       }
872     }
873   }
874   jdb->opts = 0;
875   if(wopts & WDBTLARGE) jdb->opts |= WDBTLARGE;
876   if(wopts & WDBTDEFLATE) jdb->opts |= WDBTDEFLATE;
877   if(wopts & WDBTBZIP) jdb->opts |= WDBTBZIP;
878   if(wopts & WDBTTCBS) jdb->opts |= JDBTTCBS;
879   uint8_t inum;
880   memcpy(&inum, txopq + sizeof(magic), sizeof(inum));
881   if(inum > JDBWDBMAX){
882     tchdbclose(txdb);
883     tchdbsetecode(txdb, TCEMETA, __FILE__, __LINE__, __func__);
884     return false;
885   }
886   jdb->cnum = 0;
887   uint64_t min = UINT64_MAX;
888   for(int i = 0; i < inum; i++){
889     sprintf(pbuf, "%s%c%04d", path, MYPATHCHR, i + 1);
890     if(!tcwdbopen(idxs[i], pbuf, womode)){
891       tchdbclose(txdb);
892       tchdbsetecode(txdb, tcwdbecode(idxs[i]), __FILE__, __LINE__, __func__);
893       for(int j = i - 1; j >= 0; j--){
894         tcwdbclose(idxs[i]);
895       }
896       return false;
897     }
898     uint64_t fsiz = tcwdbfsiz(idxs[i]);
899     if(fsiz < min){
900       jdb->cnum = i;
901       min = fsiz;
902     }
903   }
904   jdb->inum = inum;
905   jdb->path = tcstrdup(path);
906   jdb->wmode = omode & JDBOWRITER;
907   jdb->wopts = wopts;
908   jdb->womode = womode;
909   return true;
910 }
911 
912 
913 /* Close a tagged database object.
914    `jdb' specifies the tagged database object.
915    If successful, the return value is true, else, it is false. */
tcjdbcloseimpl(TCJDB * jdb)916 static bool tcjdbcloseimpl(TCJDB *jdb){
917   assert(jdb);
918   bool err = false;
919   TCHDB *txdb = jdb->txdb;
920   TCWDB **idxs = jdb->idxs;
921   uint8_t inum = jdb->inum;
922   if(jdb->wmode){
923     char *txopq = tchdbopaque(txdb);
924     *(uint8_t *)(txopq + sizeof(uint8_t)) = inum;
925   }
926   jdb->inum = 0;
927   for(int i = 0; i < inum; i++){
928     if(!tcwdbclose(idxs[i])){
929       tchdbsetecode(txdb, tcwdbecode(idxs[i]), __FILE__, __LINE__, __func__);
930       err = true;
931     }
932   }
933   if(!tchdbclose(txdb)) err = true;
934   tcfree(jdb->path);
935   jdb->path = NULL;
936   return !err;
937 }
938 
939 
940 /* Store a record into a tagged database object.
941    `jdb' specifies the tagged database object.
942    `id' specifies the ID number of the record.
943    `words' specifies a list object contains the words of the record.
944    If successful, the return value is true, else, it is false. */
tcjdbputimpl(TCJDB * jdb,int64_t id,const TCLIST * words)945 static bool tcjdbputimpl(TCJDB *jdb, int64_t id, const TCLIST *words){
946   assert(jdb && id > 0 && words);
947   TCHDB *txdb = jdb->txdb;
948   TCWDB **idxs = jdb->idxs;
949   uint8_t inum = jdb->inum;
950   uint8_t cnum = jdb->cnum;
951   if(cnum >= inum){
952     char pbuf[strlen(jdb->path)+TDNUMBUFSIZ];
953     sprintf(pbuf, "%s%c%04d", jdb->path, MYPATHCHR, inum + 1);
954     TCWDB *nidx = idxs[inum];
955     if(!tcwdbopen(nidx, pbuf, jdb->womode | JDBOCREAT)){
956       tchdbsetecode(txdb, tcwdbecode(nidx), __FILE__, __LINE__, __func__);
957       return false;
958     }
959     jdb->cnum = jdb->inum;
960     cnum = jdb->cnum;
961     jdb->inum++;
962   }
963   char kbuf[TDNUMBUFSIZ];
964   int ksiz;
965   TDSETVNUMBUF64(ksiz, kbuf, id);
966   char stack[JDBIOBUFSIZ];
967   int vsiz = tchdbget3(txdb, kbuf, ksiz, stack, JDBIOBUFSIZ);
968   if(vsiz > 0){
969     int ocnum = tcatoi(stack);
970     if(ocnum < 0 || ocnum >= JDBWDBMAX){
971       tchdbsetecode(txdb, TCEMISC, __FILE__, __LINE__, __func__);
972       return false;
973     }
974     TCWDB *oidx = idxs[ocnum];
975     if(vsiz >= JDBIOBUFSIZ){
976       char *vbuf = tchdbget(txdb, kbuf, ksiz, &vsiz);
977       if(vbuf){
978         TCLIST *owords = tcstrsplit(vbuf, "\t");
979         tcfree(tclistshift2(owords));
980         int ownum = tclistnum(owords);
981         for(int i = 0; i < ownum; i++){
982           int wsiz;
983           char *word = (char *)tclistval(owords, i, &wsiz);
984           tctextnormalize(word, TCTNLOWER | TCTNNOACC | TCTNSPACE);
985         }
986         if(!tcwdbout(oidx, id, owords)){
987           tchdbsetecode(txdb, tcwdbecode(oidx), __FILE__, __LINE__, __func__);
988           tclistdel(owords);
989           return false;
990         }
991         tclistdel(owords);
992         tcfree(vbuf);
993       } else {
994         tchdbsetecode(txdb, TCEMISC, __FILE__, __LINE__, __func__);
995         return false;
996       }
997     } else {
998       stack[vsiz] = '\0';
999       TCLIST *owords = tcstrsplit(stack, "\t");
1000       tcfree(tclistshift2(owords));
1001       int ownum = tclistnum(owords);
1002       for(int i = 0; i < ownum; i++){
1003         int wsiz;
1004         char *word = (char *)tclistval(owords, i, &wsiz);
1005         tctextnormalize(word, TCTNLOWER | TCTNNOACC | TCTNSPACE);
1006       }
1007       if(!tcwdbout(oidx, id, owords)){
1008         tchdbsetecode(txdb, tcwdbecode(oidx), __FILE__, __LINE__, __func__);
1009         tclistdel(owords);
1010         return false;
1011       }
1012       tclistdel(owords);
1013     }
1014     if(!tchdbout(txdb, kbuf, ksiz)) return false;
1015   }
1016   int wnum = tclistnum(words);
1017   TCXSTR *xstr = tcxstrnew3(wnum * JDBAVGWSIZ + 1);
1018   TCLIST *nwords = tclistnew2(wnum);
1019   tcxstrprintf(xstr, "%d", cnum);
1020   for(int i = 0; i < wnum; i++){
1021     int wsiz;
1022     const char *word = tclistval(words, i, &wsiz);
1023     if(wsiz >= JDBIOBUFSIZ) continue;
1024     memcpy(stack, word, wsiz);
1025     stack[wsiz] = '\0';
1026     for(int j = 0; j < wsiz; j++){
1027       if(((unsigned char *)stack)[j] < ' ') stack[j] = ' ';
1028     }
1029     tcxstrcat(xstr, "\t", 1);
1030     tcxstrcat(xstr, stack, wsiz);
1031     tctextnormalize(stack, TCTNLOWER | TCTNNOACC | TCTNSPACE);
1032     if(stack[0] != '\0') tclistpush2(nwords, stack);
1033   }
1034   if(!(jdb->exopts & JDBXNOTXT) &&
1035      !tchdbputkeep(txdb, kbuf, ksiz, tcxstrptr(xstr), tcxstrsize(xstr))){
1036     return false;
1037   }
1038   TCWDB *cidx = idxs[cnum];
1039   if(!tcwdbput(cidx, id, nwords)){
1040     tchdbsetecode(txdb, tcwdbecode(cidx), __FILE__, __LINE__, __func__);
1041     tclistdel(nwords);
1042     tcxstrdel(xstr);
1043     return false;
1044   }
1045   tclistdel(nwords);
1046   tcxstrdel(xstr);
1047   return true;
1048 }
1049 
1050 
1051 /* Remove a record of a tagged database object.
1052    `jdb' specifies the tagged database object.
1053    `id' specifies the ID number of the record.
1054    If successful, the return value is true, else, it is false. */
tcjdboutimpl(TCJDB * jdb,int64_t id)1055 static bool tcjdboutimpl(TCJDB *jdb, int64_t id){
1056   TCHDB *txdb = jdb->txdb;
1057   TCWDB **idxs = jdb->idxs;
1058   char kbuf[TDNUMBUFSIZ];
1059   int ksiz;
1060   TDSETVNUMBUF64(ksiz, kbuf, id);
1061   char stack[JDBIOBUFSIZ];
1062   int vsiz = tchdbget3(txdb, kbuf, ksiz, stack, JDBIOBUFSIZ);
1063   if(vsiz > 0){
1064     int ocnum = tcatoi(stack);
1065     if(ocnum < 0 || ocnum >= JDBWDBMAX){
1066       tchdbsetecode(txdb, TCEMISC, __FILE__, __LINE__, __func__);
1067       return false;
1068     }
1069     TCWDB *oidx = idxs[ocnum];
1070     if(vsiz >= JDBIOBUFSIZ){
1071       char *vbuf = tchdbget(txdb, kbuf, ksiz, &vsiz);
1072       if(vbuf){
1073         TCLIST *owords = tcstrsplit(vbuf, "\t");
1074         tcfree(tclistshift2(owords));
1075         int ownum = tclistnum(owords);
1076         for(int i = 0; i < ownum; i++){
1077           int wsiz;
1078           char *word = (char *)tclistval(owords, i, &wsiz);
1079           tctextnormalize(word, TCTNLOWER | TCTNNOACC | TCTNSPACE);
1080         }
1081         if(!tcwdbout(oidx, id, owords)){
1082           tchdbsetecode(txdb, tcwdbecode(oidx), __FILE__, __LINE__, __func__);
1083           tclistdel(owords);
1084           return false;
1085         }
1086         tclistdel(owords);
1087         tcfree(vbuf);
1088       } else {
1089         tchdbsetecode(txdb, TCEMISC, __FILE__, __LINE__, __func__);
1090         return false;
1091       }
1092     } else {
1093       stack[vsiz] = '\0';
1094       TCLIST *owords = tcstrsplit(stack, "\t");
1095       tcfree(tclistshift2(owords));
1096       int ownum = tclistnum(owords);
1097       for(int i = 0; i < ownum; i++){
1098         int wsiz;
1099         char *word = (char *)tclistval(owords, i, &wsiz);
1100         tctextnormalize(word, TCTNLOWER | TCTNNOACC | TCTNSPACE);
1101       }
1102       if(!tcwdbout(oidx, id, owords)){
1103         tchdbsetecode(txdb, tcwdbecode(oidx), __FILE__, __LINE__, __func__);
1104         tclistdel(owords);
1105         return false;
1106       }
1107       tclistdel(owords);
1108     }
1109     if(!tchdbout(txdb, kbuf, ksiz)) return false;
1110   } else {
1111     tchdbsetecode(txdb, TCENOREC, __FILE__, __LINE__, __func__);
1112     return false;
1113   }
1114   return true;
1115 }
1116 
1117 
1118 /* Retrieve a record of a tagged database object.
1119    `jdb' specifies the tagged database object connected as a writer.
1120    `id' specifies the ID number of the record.  It should be positive.
1121    If successful, the return value is the string of the corresponding record, else, it is
1122    `NULL'. */
tcjdbgetimpl(TCJDB * jdb,int64_t id)1123 static char *tcjdbgetimpl(TCJDB *jdb, int64_t id){
1124   assert(jdb && id > 0);
1125   char kbuf[TDNUMBUFSIZ];
1126   int ksiz;
1127   TDSETVNUMBUF64(ksiz, kbuf, id);
1128   int vsiz;
1129   char *vbuf = tchdbget(jdb->txdb, kbuf, ksiz, &vsiz);
1130   if(!vbuf) return NULL;
1131   char *pv = strchr(vbuf, '\t');
1132   if(!pv){
1133     tchdbsetecode(jdb->txdb, TCEMISC, __FILE__, __LINE__, __func__);
1134     tcfree(vbuf);
1135     return NULL;
1136   }
1137   pv++;
1138   vsiz = strlen(pv);
1139   memmove(vbuf, pv, vsiz);
1140   vbuf[vsiz] = '\0';
1141   return vbuf;
1142 }
1143 
1144 
1145 /* Search a tagged database.
1146    `jdb' specifies the tagged database object.
1147    `word' specifies the string of the word to be matched to.
1148    `smode' specifies the matching mode.
1149    `np' specifies the pointer to the variable into which the number of elements of the return
1150    value is assigned.
1151    If successful, the return value is the pointer to an array of ID numbers of the corresponding
1152    records.  `NULL' is returned on failure. */
tcjdbsearchimpl(TCJDB * jdb,const char * word,int smode,int * np)1153 static uint64_t *tcjdbsearchimpl(TCJDB *jdb, const char *word, int smode, int *np){
1154   assert(jdb && word && np);
1155   TCBDB *lsdb = jdb->lsdb;
1156   TCWDB **idxs = jdb->idxs;
1157   uint8_t inum = jdb->inum;
1158   if(inum < 1){
1159     *np = 0;
1160     return tcmalloc(1);
1161   }
1162   if(smode != JDBSSUBSTR){
1163     for(int i = 0; i < inum; i++){
1164       TCWDB *idx = idxs[i];
1165       if(tcwdbcnum(idx) > 0 && !tcwdbmemsync(idx, 0)){
1166         tchdbsetecode(jdb->txdb, tcwdbecode(idx), __FILE__, __LINE__, __func__);
1167         return NULL;
1168       }
1169     }
1170   }
1171   int fwmmax = tcwdbfwmmax(idxs[0]);
1172   if(fwmmax < 1) fwmmax = 1;
1173   TCLIST *words = tclistnew();
1174   if(smode == JDBSSUBSTR){
1175     BDBCUR *cur = tcbdbcurnew(lsdb);
1176     tcbdbcurfirst(cur);
1177     int ksiz;
1178     char *kbuf;
1179     while(tclistnum(words) < fwmmax && (kbuf = tcbdbcurkey(cur, &ksiz)) != NULL){
1180       if(strstr(kbuf, word)){
1181         tclistpushmalloc(words, kbuf, ksiz);
1182       } else {
1183         tcfree(kbuf);
1184       }
1185       tcbdbcurnext(cur);
1186     }
1187     tcbdbcurdel(cur);
1188   } else if(smode == JDBSPREFIX){
1189     tclistdel(words);
1190     words = tcbdbfwmkeys2(lsdb, word, fwmmax);
1191   } else if(smode == JDBSSUFFIX){
1192     BDBCUR *cur = tcbdbcurnew(lsdb);
1193     tcbdbcurfirst(cur);
1194     int ksiz;
1195     char *kbuf;
1196     while(tclistnum(words) < fwmmax && (kbuf = tcbdbcurkey(cur, &ksiz)) != NULL){
1197       if(tcstrbwm(kbuf, word)){
1198         tclistpushmalloc(words, kbuf, ksiz);
1199       } else {
1200         tcfree(kbuf);
1201       }
1202       tcbdbcurnext(cur);
1203     }
1204     tcbdbcurdel(cur);
1205   } else {
1206     tclistpush2(words, word);
1207   }
1208   int wnum = tclistnum(words);
1209   if(wnum < 1){
1210     tclistdel(words);
1211     *np = 0;
1212     return tcmalloc(1);
1213   }
1214   uint64_t *res;
1215   if(wnum == 1){
1216     res = tcjdbsearchword(jdb, tclistval2(words, 0), np);
1217   } else {
1218     QDBRSET *rsets = tcmalloc(wnum * sizeof(*rsets));
1219     for(int i = 0; i < wnum; i++){
1220       rsets[i].ids = tcjdbsearchword(jdb, tclistval2(words, i), &rsets[i].num);
1221       if(!rsets[i].ids) rsets[i].num = 0;
1222     }
1223     res = tcqdbresunion(rsets, wnum, np);
1224     for(int i = 0; i < wnum; i++){
1225       tcfree(rsets[i].ids);
1226     }
1227     tcfree(rsets);
1228   }
1229   tclistdel(words);
1230   return res;
1231 }
1232 
1233 
1234 /* Search a tagged database for a word.
1235    `jdb' specifies the tagged database object.
1236    `word' specifies the string of the word to be matched to.
1237    `np' specifies the pointer to the variable into which the number of elements of the return
1238    value is assigned.
1239    If successful, the return value is the pointer to an array of ID numbers of the corresponding
1240    records.  `NULL' is returned on failure. */
tcjdbsearchword(TCJDB * jdb,const char * word,int * np)1241 static uint64_t *tcjdbsearchword(TCJDB *jdb, const char *word, int *np){
1242   assert(jdb && word && np);
1243   TCWDB **idxs = jdb->idxs;
1244   uint8_t inum = jdb->inum;
1245   if(inum == 1){
1246     uint64_t *res = tcwdbsearch(idxs[0], word, np);
1247     if(!res) tchdbsetecode(jdb->txdb, tcwdbecode(idxs[0]), __FILE__, __LINE__, __func__);
1248     return res;
1249   }
1250   QDBRSET rsets[inum];
1251   for(int i = 0; i < inum; i++){
1252     rsets[i].ids = tcwdbsearch(idxs[i], word, &rsets[i].num);
1253   }
1254   uint64_t *res = tcqdbresunion(rsets, inum, np);
1255   for(int i = 0; i < inum; i++){
1256     tcfree(rsets[i].ids);
1257   }
1258   return res;
1259 }
1260 
1261 
1262 /* Search a tagged database with a token expression.
1263    `jdb' specifies the tagged database object.
1264    `token' specifies the string of the token expression.
1265    `np' specifies the pointer to the variable into which the number of elements of the return
1266    value is assigned.
1267    If successful, the return value is the pointer to an array of ID numbers of the corresponding
1268    records.  `NULL' is returned on failure. */
tcjdbsearchtoken(TCJDB * jdb,const char * token,int * np)1269 static uint64_t *tcjdbsearchtoken(TCJDB *jdb, const char *token, int *np){
1270   assert(jdb && token && np);
1271   int len = strlen(token);
1272   if(*token == '"'){
1273     char *bare = tcmalloc(len + 1);
1274     char *wp = bare;
1275     const char *rp = token + 1;
1276     while(*rp != '\0'){
1277       if(rp[0] == '"'){
1278         if(rp[1] == '"'){
1279           *(wp++) = '"';
1280         }
1281       } else {
1282         *(wp++) = *rp;
1283       }
1284       rp++;
1285     }
1286     *wp = '\0';
1287     uint64_t *res = tcjdbsearch(jdb, bare, JDBSFULL, np);
1288     tcfree(bare);
1289     return res;
1290   }
1291   if(len < 4) return tcjdbsearch(jdb, token, JDBSFULL, np);
1292   if(token[0] == '[' && token[1] == '[' && token[2] == '[' && token[3] == '['){
1293     char *bare = tcmemdup(token + 4, len - 4);
1294     uint64_t *res = tcjdbsearch(jdb, bare, JDBSPREFIX, np);
1295     tcfree(bare);
1296     return res;
1297   }
1298   if(token[len-1] == ']' && token[len-2] == ']' && token[len-3] == ']' && token[len-4] == ']'){
1299     char *bare = tcmemdup(token, len - 4);
1300     uint64_t *res = tcjdbsearch(jdb, bare, JDBSSUFFIX, np);
1301     tcfree(bare);
1302     return res;
1303   }
1304   if(token[0] != '[' || token[1] != '[' || token[len-1] != ']' || token[len-2] != ']')
1305     return tcjdbsearch(jdb, token, JDBSFULL, np);
1306   len -= 4;
1307   char *bare = tcmemdup(token + 2, len);
1308   bool prefix = false;
1309   bool suffix = false;
1310   if(len > 0 && bare[0] == '*'){
1311     memmove(bare, bare + 1, len);
1312     len--;
1313     suffix = true;
1314   }
1315   if(len > 0 && bare[len-1] == '*'){
1316     bare[len-1] = '\0';
1317     len--;
1318     prefix = true;
1319   }
1320   if(len < 1){
1321     tcfree(bare);
1322     *np = 0;
1323     return tcmalloc(1);
1324   }
1325   int smode = JDBSFULL;
1326   if(prefix && suffix){
1327     smode = JDBSSUBSTR;
1328   } else if(prefix){
1329     smode = JDBSPREFIX;
1330   } else if(suffix){
1331     smode = JDBSSUFFIX;
1332   }
1333   uint64_t *res = tcjdbsearch(jdb, bare, smode, np);
1334   tcfree(bare);
1335   return res;
1336 }
1337 
1338 
1339 /* Optimize the file of a tagged database object.
1340    `jdb' specifies the tagged database object connected as a writer.
1341    If successful, the return value is true, else, it is false. */
tcjdboptimizeimpl(TCJDB * jdb)1342 static bool tcjdboptimizeimpl(TCJDB *jdb){
1343   assert(jdb);
1344   TCHDB *txdb = jdb->txdb;
1345   TCWDB **idxs = jdb->idxs;
1346   uint8_t inum = jdb->inum;
1347   bool err = false;
1348   if(!tchdboptimize(txdb, -1, -1, -1, UINT8_MAX)) err = true;
1349   for(int i = 0; i < inum; i++){
1350     if(!tcwdboptimize(idxs[i])){
1351       tchdbsetecode(txdb, tcwdbecode(idxs[i]), __FILE__, __LINE__, __func__);
1352       err = true;
1353     }
1354   }
1355   return !err;
1356 }
1357 
1358 
1359 /* Remove all records of a tagged database object.
1360    `jdb' specifies the tagged database object connected as a writer.
1361    If successful, the return value is true, else, it is false. */
tcjdbvanishimpl(TCJDB * jdb)1362 static bool tcjdbvanishimpl(TCJDB *jdb){
1363   assert(jdb);
1364   TCHDB *txdb = jdb->txdb;
1365   TCWDB **idxs = jdb->idxs;
1366   uint8_t inum = jdb->inum;
1367   bool err = false;
1368   if(!tchdbvanish(txdb)) err = true;
1369   char *txopq = tchdbopaque(txdb);
1370   *(uint8_t *)(txopq + sizeof(uint8_t) + sizeof(uint8_t)) = jdb->wopts;
1371   for(int i = 0; i < inum; i++){
1372     if(!tcwdbvanish(idxs[i])){
1373       tchdbsetecode(txdb, tcwdbecode(idxs[i]), __FILE__, __LINE__, __func__);
1374       err = true;
1375     }
1376   }
1377   return !err;
1378 }
1379 
1380 
1381 /* Copy the database directory of a tagged database object.
1382    `jdb' specifies the tagged database object.
1383    `path' specifies the path of the destination directory.
1384    If successful, the return value is true, else, it is false. */
tcjdbcopyimpl(TCJDB * jdb,const char * path)1385 static bool tcjdbcopyimpl(TCJDB *jdb, const char *path){
1386   assert(jdb && path);
1387   TCHDB *txdb = jdb->txdb;
1388   TCBDB *lsdb = jdb->lsdb;
1389   TCWDB **idxs = jdb->idxs;
1390   uint8_t inum = jdb->inum;
1391   bool err = false;
1392   if(mkdir(path, JDBDIRMODE) == -1 && errno != EEXIST){
1393     int ecode = TCEMKDIR;
1394     switch(errno){
1395       case EACCES: ecode = TCENOPERM; break;
1396       case ENOENT: ecode = TCENOFILE; break;
1397     }
1398     tchdbsetecode(txdb, ecode, __FILE__, __LINE__, __func__);
1399     return false;
1400   }
1401   char pbuf[strlen(path)+TDNUMBUFSIZ];
1402   sprintf(pbuf, "%s%c%s", path, MYPATHCHR, JDBTXDBNAME);
1403   if(!tchdbcopy(txdb, pbuf)) err = true;
1404   sprintf(pbuf, "%s%c%s", path, MYPATHCHR, JDBLSDBNAME);
1405   if(!tcbdbcopy(lsdb, pbuf)){
1406     tchdbsetecode(txdb, tcbdbecode(lsdb), __FILE__, __LINE__, __func__);
1407     err = true;
1408   }
1409   for(int i = 0; i < inum; i++){
1410     sprintf(pbuf, "%s%c%04d", path, MYPATHCHR, i + 1);
1411     if(!tcwdbcopy(idxs[i], pbuf)){
1412       tchdbsetecode(txdb, tcwdbecode(idxs[i]), __FILE__, __LINE__, __func__);
1413       err = true;
1414     }
1415   }
1416   return !err;
1417 }
1418 
1419 
1420 
1421 // END OF FILE
1422