1 /*************************************************************************************************
2 * The word database API of Tokyo Dystopia
3 * Copyright (C) 2007-2010 FAL Labs
4 * This file is part of Tokyo Dystopia.
5 * Tokyo Dystopia is free software; you can redistribute it and/or modify it under the terms of
6 * the GNU Lesser General Public License as published by the Free Software Foundation; either
7 * version 2.1 of the License or any later version. Tokyo Dystopia is distributed in the hope
8 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
10 * License for more details.
11 * You should have received a copy of the GNU Lesser General Public License along with Tokyo
12 * Dystopia; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13 * Boston, MA 02111-1307 USA.
14 *************************************************************************************************/
15
16
17 #include "tcwdb.h"
18 #include "myconf.h"
19
20 #define WDBMAGICDATA "[word]" // magic data for identification
21 #define WDBIOBUFSIZ 65536 // size of an I/O buffer
22 #define WDBMAXWORDLEN 1024 // maximum length of each search word
23 #define WDBWORDUNIT 1024 // unit number of word allocation
24 #define WDBRESUNIT 1024 // unit number of result allocation
25 #define WDBCCBNUM 1048573 // bucket number of the token cache
26 #define WDBCCDEFICSIZ (1024LL*1024*128) // default capacity of the token cache
27 #define WDBDIDSBNUM 262139 // bucket number of the deleted ID set
28 #define WDBDTKNBNUM 262139 // bucket number of the deleted token map
29 #define WDBDEFFWMMAX 2048 // default maximum number forward matching expansion
30 #define WDBHJBNUMCO 4 // coefficient of the bucket number for hash join
31
32 #define WDBDEFETNUM 1000000 // default expected token number
33 #define WDBLMEMB 256 // number of members in each leaf of the index
34 #define WDBNMEMB 512 // number of members in each node of the index
35 #define WDBAPOW 9 // alignment power of the index
36 #define WDBFPOW 11 // free block pool power of the index
37 #define WDBLSMAX 8192 // maximum size of each leaf of the index
38 #define WDBLCNUMW 64 // number of cached leaf nodes for writer
39 #define WDBLCNUMR 1024 // number of cached leaf nodes for reader
40 #define WDBNCNUM 1024 // number of cached non-leaf nodes
41
42
43 /* private function prototypes */
44 static bool tcwdblockmethod(TCWDB *wdb, bool wr);
45 static bool tcwdbunlockmethod(TCWDB *wdb);
46 static bool tcwdbopenimpl(TCWDB *wdb, const char *path, int omode);
47 static bool tcwdbcloseimpl(TCWDB *wdb);
48 static bool tcwdbputimpl(TCWDB *wdb, int64_t id, const TCLIST *words);
49 static bool tcwdboutimpl(TCWDB *wdb, int64_t id, const TCLIST *words);
50 static uint64_t *tcwdbsearchimpl(TCWDB *wdb, const char *word, int *np);
51 static int tccmpwords(const char **a, const char **b);
52
53
54
55 /*************************************************************************************************
56 * API
57 *************************************************************************************************/
58
59
60 /* Get the message string corresponding to an error code. */
tcwdberrmsg(int ecode)61 const char *tcwdberrmsg(int ecode){
62 return tcbdberrmsg(ecode);
63 }
64
65
66 /* Create a word database object. */
tcwdbnew(void)67 TCWDB *tcwdbnew(void){
68 TCWDB *wdb = tcmalloc(sizeof(*wdb));
69 wdb->mmtx = tcmalloc(sizeof(pthread_rwlock_t));
70 if(pthread_rwlock_init(wdb->mmtx, NULL) != 0) tcmyfatal("pthread_rwlock_init failed");
71 wdb->idx = tcbdbnew();
72 if(!tcbdbsetmutex(wdb->idx)) tcmyfatal("tcbdbsetmutex failed");
73 wdb->open = false;
74 wdb->cc = NULL;
75 wdb->icsiz = WDBCCDEFICSIZ;
76 wdb->lcnum = 0;
77 wdb->dtokens = NULL;
78 wdb->dids = NULL;
79 wdb->etnum = WDBDEFETNUM;
80 wdb->opts = 0;
81 wdb->fwmmax = WDBDEFFWMMAX;
82 wdb->synccb = NULL;
83 wdb->syncopq = NULL;
84 wdb->addcb = NULL;
85 wdb->addopq = NULL;
86 return wdb;
87 }
88
89
90 /* Delete a word database object. */
tcwdbdel(TCWDB * wdb)91 void tcwdbdel(TCWDB *wdb){
92 assert(wdb);
93 if(wdb->open) tcwdbclose(wdb);
94 tcbdbdel(wdb->idx);
95 pthread_rwlock_destroy(wdb->mmtx);
96 tcfree(wdb->mmtx);
97 tcfree(wdb);
98 }
99
100
101 /* Get the last happened error code of a word database object. */
tcwdbecode(TCWDB * wdb)102 int tcwdbecode(TCWDB *wdb){
103 assert(wdb);
104 return tcbdbecode(wdb->idx);
105 }
106
107
108 /* Set the tuning parameters of a word database object. */
tcwdbtune(TCWDB * wdb,int64_t etnum,uint8_t opts)109 bool tcwdbtune(TCWDB *wdb, int64_t etnum, uint8_t opts){
110 assert(wdb);
111 if(!tcwdblockmethod(wdb, true)) return false;
112 if(wdb->open){
113 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
114 tcwdbunlockmethod(wdb);
115 return false;
116 }
117 wdb->etnum = (etnum > 0) ? etnum : WDBDEFETNUM;
118 wdb->opts = opts;
119 tcwdbunlockmethod(wdb);
120 return true;
121 }
122
123
124 /* Set the caching parameters of a word database object. */
tcwdbsetcache(TCWDB * wdb,int64_t icsiz,int32_t lcnum)125 bool tcwdbsetcache(TCWDB *wdb, int64_t icsiz, int32_t lcnum){
126 assert(wdb);
127 if(!tcwdblockmethod(wdb, true)) return false;
128 if(wdb->open){
129 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
130 tcwdbunlockmethod(wdb);
131 return false;
132 }
133 wdb->icsiz = (icsiz > 0) ? icsiz : WDBCCDEFICSIZ;
134 wdb->lcnum = (lcnum > 0) ? lcnum : 0;
135 tcwdbunlockmethod(wdb);
136 return true;
137 }
138
139
140 /* Set the maximum number of forward matching expansion of a word database object. */
tcwdbsetfwmmax(TCWDB * wdb,uint32_t fwmmax)141 bool tcwdbsetfwmmax(TCWDB *wdb, uint32_t fwmmax){
142 assert(wdb);
143 if(!tcwdblockmethod(wdb, true)) return false;
144 if(wdb->open){
145 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
146 tcwdbunlockmethod(wdb);
147 return false;
148 }
149 wdb->fwmmax = fwmmax;
150 tcwdbunlockmethod(wdb);
151 return true;
152 }
153
154
155 /* Open a word database object. */
tcwdbopen(TCWDB * wdb,const char * path,int omode)156 bool tcwdbopen(TCWDB *wdb, const char *path, int omode){
157 assert(wdb && path);
158 if(!tcwdblockmethod(wdb, true)) return false;
159 if(wdb->open){
160 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
161 tcwdbunlockmethod(wdb);
162 return false;
163 }
164 bool rv = tcwdbopenimpl(wdb, path, omode);
165 tcwdbunlockmethod(wdb);
166 return rv;
167 }
168
169
170 /* Close a word database object. */
tcwdbclose(TCWDB * wdb)171 bool tcwdbclose(TCWDB *wdb){
172 assert(wdb);
173 if(!tcwdblockmethod(wdb, true)) return false;
174 if(!wdb->open){
175 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
176 tcwdbunlockmethod(wdb);
177 return false;
178 }
179 bool rv = tcwdbcloseimpl(wdb);
180 tcwdbunlockmethod(wdb);
181 return rv;
182 }
183
184
185 /* Store a record into a word database object. */
tcwdbput(TCWDB * wdb,int64_t id,const TCLIST * words)186 bool tcwdbput(TCWDB *wdb, int64_t id, const TCLIST *words){
187 assert(wdb && id > 0 && words);
188 if(!tcwdblockmethod(wdb, true)) return false;
189 if(!wdb->open || !wdb->cc){
190 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
191 tcwdbunlockmethod(wdb);
192 return false;
193 }
194 if(tcidsetcheck(wdb->dids, id) && !tcwdbmemsync(wdb, 0)){
195 tcwdbunlockmethod(wdb);
196 return false;
197 }
198 bool rv = tcwdbputimpl(wdb, id, words);
199 tcwdbunlockmethod(wdb);
200 return rv;
201 }
202
203
204 /* Store a record with a text string into a word database object. */
tcwdbput2(TCWDB * wdb,int64_t id,const char * text,const char * delims)205 bool tcwdbput2(TCWDB *wdb, int64_t id, const char *text, const char *delims){
206 assert(wdb && id > 0 && text);
207 TCLIST *words = tcstrsplit(text, delims ? delims : WDBSPCCHARS);
208 bool rv = tcwdbput(wdb, id, words);
209 tclistdel(words);
210 return rv;
211 }
212
213
214 /* Remove a record of a word database object. */
tcwdbout(TCWDB * wdb,int64_t id,const TCLIST * words)215 bool tcwdbout(TCWDB *wdb, int64_t id, const TCLIST *words){
216 assert(wdb && id > 0 && words);
217 if(!tcwdblockmethod(wdb, true)) return false;
218 if(!wdb->open || !wdb->cc){
219 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
220 tcwdbunlockmethod(wdb);
221 return false;
222 }
223 if(tcidsetcheck(wdb->dids, id)){
224 tcwdbunlockmethod(wdb);
225 return true;
226 }
227 if(tcmaprnum(wdb->cc) > 0 && !tcwdbmemsync(wdb, 0)){
228 tcwdbunlockmethod(wdb);
229 return false;
230 }
231 bool rv = tcwdboutimpl(wdb, id, words);
232 tcwdbunlockmethod(wdb);
233 return rv;
234 }
235
236
237 /* Remove a record with a text string of a word database object. */
tcwdbout2(TCWDB * wdb,int64_t id,const char * text,const char * delims)238 bool tcwdbout2(TCWDB *wdb, int64_t id, const char *text, const char *delims){
239 assert(wdb && id > 0 && text);
240 TCLIST *words = tcstrsplit(text, delims ? delims : WDBSPCCHARS);
241 bool rv = tcwdbout(wdb, id, words);
242 tclistdel(words);
243 return rv;
244 }
245
246
247 /* Search a word database. */
tcwdbsearch(TCWDB * wdb,const char * word,int * np)248 uint64_t *tcwdbsearch(TCWDB *wdb, const char *word, int *np){
249 assert(wdb && word && np);
250 if(!tcwdblockmethod(wdb, false)) return NULL;
251 if(!wdb->open){
252 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
253 tcwdbunlockmethod(wdb);
254 return NULL;
255 }
256 if(wdb->cc && (tcmaprnum(wdb->cc) > 0 || tcmaprnum(wdb->dtokens) > 0)){
257 tcwdbunlockmethod(wdb);
258 if(!tcwdblockmethod(wdb, true)) return NULL;
259 if(!tcwdbmemsync(wdb, 0)){
260 tcwdbunlockmethod(wdb);
261 return NULL;
262 }
263 tcwdbunlockmethod(wdb);
264 if(!tcwdblockmethod(wdb, false)) return NULL;
265 }
266 uint64_t *rv = tcwdbsearchimpl(wdb, word, np);
267 tcwdbunlockmethod(wdb);
268 return rv;
269 }
270
271
272 /* Synchronize updated contents of a word database object with the file and the device. */
tcwdbsync(TCWDB * wdb)273 bool tcwdbsync(TCWDB *wdb){
274 assert(wdb);
275 if(!tcwdblockmethod(wdb, true)) return false;
276 if(!wdb->open || !wdb->cc){
277 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
278 tcwdbunlockmethod(wdb);
279 return false;
280 }
281 bool err = false;
282 if(!tcwdbmemsync(wdb, 2)) err = true;
283 tcwdbunlockmethod(wdb);
284 return !err;
285 }
286
287
288 /* Optimize the file of a word database object. */
tcwdboptimize(TCWDB * wdb)289 bool tcwdboptimize(TCWDB *wdb){
290 assert(wdb);
291 if(!tcwdblockmethod(wdb, true)) return false;
292 if(!wdb->open || !wdb->cc){
293 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
294 tcwdbunlockmethod(wdb);
295 return false;
296 }
297 bool err = false;
298 if(!tcwdbmemsync(wdb, 1)) err = true;
299 if(!tcbdboptimize(wdb->idx, 0, 0, 0, -1, -1, UINT8_MAX)) err = true;
300 tcwdbunlockmethod(wdb);
301 return !err;
302 }
303
304
305 /* Remove all records of a word database object. */
tcwdbvanish(TCWDB * wdb)306 bool tcwdbvanish(TCWDB *wdb){
307 assert(wdb);
308 if(!tcwdblockmethod(wdb, true)) return false;
309 if(!wdb->open || !wdb->cc){
310 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
311 tcwdbunlockmethod(wdb);
312 return false;
313 }
314 bool err = false;
315 tcmapclear(wdb->cc);
316 tcmapclear(wdb->dtokens);
317 if(!tcwdbmemsync(wdb, 1)) err = true;
318 if(!tcbdbvanish(wdb->idx)) err = true;
319 tcwdbunlockmethod(wdb);
320 return !err;
321 }
322
323
324 /* Copy the database file of a word database object. */
tcwdbcopy(TCWDB * wdb,const char * path)325 bool tcwdbcopy(TCWDB *wdb, const char *path){
326 assert(wdb && path);
327 if(!tcwdblockmethod(wdb, false)) return false;
328 if(!wdb->open || !wdb->cc){
329 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
330 tcwdbunlockmethod(wdb);
331 return false;
332 }
333 bool err = false;
334 if(!tcwdbmemsync(wdb, 1)) err = true;
335 if(!tcbdbcopy(wdb->idx, path)) err = true;
336 tcwdbunlockmethod(wdb);
337 return !err;
338 }
339
340
341 /* Get the file path of a word database object. */
tcwdbpath(TCWDB * wdb)342 const char *tcwdbpath(TCWDB *wdb){
343 assert(wdb);
344 return tcbdbpath(wdb->idx);
345 }
346
347
348 /* Get the number of tokens of a word database object. */
tcwdbtnum(TCWDB * wdb)349 uint64_t tcwdbtnum(TCWDB *wdb){
350 assert(wdb);
351 return tcbdbrnum(wdb->idx);
352 }
353
354
355 /* Get the size of the database file of a word database object. */
tcwdbfsiz(TCWDB * wdb)356 uint64_t tcwdbfsiz(TCWDB *wdb){
357 assert(wdb);
358 return tcbdbfsiz(wdb->idx);
359 }
360
361
362
363 /*************************************************************************************************
364 * features for experts
365 *************************************************************************************************/
366
367
368 /* Set the file descriptor for debugging output. */
tcwdbsetdbgfd(TCWDB * wdb,int fd)369 void tcwdbsetdbgfd(TCWDB *wdb, int fd){
370 assert(wdb && fd >= 0);
371 tcbdbsetdbgfd(wdb->idx, fd);
372 }
373
374
375 /* Get the file descriptor for debugging output. */
tcwdbdbgfd(TCWDB * wdb)376 int tcwdbdbgfd(TCWDB *wdb){
377 assert(wdb);
378 return tcbdbdbgfd(wdb->idx);
379 }
380
381
382 /* Synchronize updating contents on memory of a word database object. */
tcwdbmemsync(TCWDB * wdb,int level)383 bool tcwdbmemsync(TCWDB *wdb, int level){
384 assert(wdb);
385 if(!wdb->open || !wdb->cc){
386 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
387 return false;
388 }
389 bool err = false;
390 bool (*synccb)(int, int, const char *, void *) = wdb->synccb;
391 void *syncopq = wdb->syncopq;
392 bool (*addcb)(const char *, void *) = wdb->addcb;
393 void *addopq = wdb->addopq;
394 TCBDB *idx = wdb->idx;
395 TCMAP *cc = wdb->cc;
396 if(synccb && !synccb(0, 0, "started", syncopq)){
397 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
398 return false;
399 }
400 if(tcmaprnum(cc) > 0){
401 if(synccb && !synccb(0, 0, "getting tokens", syncopq)){
402 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
403 return false;
404 }
405 int kn;
406 const char **keys = tcmapkeys2(cc, &kn);
407 if(synccb && !synccb(kn, 0, "sorting tokens", syncopq)){
408 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
409 tcfree(keys);
410 return false;
411 }
412 qsort(keys, kn, sizeof(*keys), (int(*)(const void *, const void *))tccmpwords);
413 for(int i = 0; i < kn; i++){
414 if(synccb && !synccb(kn, i + 1, "storing tokens", syncopq)){
415 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
416 tcfree(keys);
417 return false;
418 }
419 const char *kbuf = keys[i];
420 int ksiz = strlen(kbuf);
421 int vsiz;
422 const char *vbuf = tcmapget(cc, kbuf, ksiz, &vsiz);
423 if(!tcbdbputcat(idx, kbuf, ksiz, vbuf, vsiz)) err = true;
424 }
425 if(addcb){
426 if(synccb && !synccb(0, 0, "storing keyword list", syncopq)){
427 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
428 tcfree(keys);
429 return false;
430 }
431 for(int i = 0; i < kn; i++){
432 if(!addcb(keys[i], addopq)){
433 tcfree(keys);
434 return false;
435 }
436 }
437 }
438 tcfree(keys);
439 tcmapclear(cc);
440 }
441 TCMAP *dtokens = wdb->dtokens;
442 TCIDSET *dids = wdb->dids;
443 if(tcmaprnum(dtokens) > 0){
444 if(synccb && !synccb(0, 0, "getting deleted tokens", syncopq)){
445 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
446 return false;
447 }
448 int kn;
449 const char **keys = tcmapkeys2(dtokens, &kn);
450 if(synccb && !synccb(kn, 0, "sorting deleted tokens", syncopq)){
451 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
452 tcfree(keys);
453 return false;
454 }
455 qsort(keys, kn, sizeof(*keys), (int(*)(const void *, const void *))tccmpwords);
456 for(int i = 0; i < kn; i++){
457 if(synccb && !synccb(kn, i + 1, "storing deleted tokens", syncopq)){
458 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
459 tcfree(keys);
460 return false;
461 }
462 const char *kbuf = keys[i];
463 int ksiz = strlen(kbuf);
464 int vsiz;
465 const char *vbuf = tcbdbget3(idx, kbuf, ksiz, &vsiz);
466 if(!vbuf) continue;
467 char *nbuf = tcmalloc(vsiz + 1);
468 char *wp = nbuf;
469 const char *pv;
470 while(vsiz > 0){
471 pv = vbuf;
472 int step;
473 uint64_t id;
474 TDREADVNUMBUF64(vbuf, id, step);
475 vbuf += step;
476 vsiz -= step;
477 if(!tcidsetcheck(dids, id)){
478 int len = vbuf - pv;
479 memcpy(wp, pv, len);
480 wp += len;
481 }
482 }
483 int nsiz = wp - nbuf;
484 if(nsiz > 0){
485 if(!tcbdbput(idx, kbuf, ksiz, nbuf, nsiz)) err = true;
486 } else {
487 if(!tcbdbout(idx, kbuf, ksiz)) err = true;
488 }
489 tcfree(nbuf);
490 }
491 tcfree(keys);
492 tcmapclear(dtokens);
493 tcidsetclear(dids);
494 }
495 if(level > 0){
496 if(synccb && !synccb(0, 0, "synchronizing database", syncopq)){
497 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
498 return false;
499 }
500 if(!tcbdbmemsync(idx, level > 1)) err = true;
501 }
502 if(synccb && !synccb(0, 0, "finished", syncopq)){
503 tcbdbsetecode(wdb->idx, TCEMISC, __FILE__, __LINE__, __func__);
504 return false;
505 }
506 return !err;
507 }
508
509
510 /* Clear the cache of a word database object. */
tcwdbcacheclear(TCWDB * wdb)511 bool tcwdbcacheclear(TCWDB *wdb){
512 assert(wdb);
513 if(!wdb->open){
514 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
515 return false;
516 }
517 return tcbdbcacheclear(wdb->idx);
518 }
519
520
521 /* Get the inode number of the database file of a word database object. */
tcwdbinode(TCWDB * wdb)522 uint64_t tcwdbinode(TCWDB *wdb){
523 assert(wdb);
524 return tcbdbinode(wdb->idx);
525 }
526
527
528 /* Get the modification time of the database file of a word database object. */
tcwdbmtime(TCWDB * wdb)529 time_t tcwdbmtime(TCWDB *wdb){
530 assert(wdb);
531 return tcbdbmtime(wdb->idx);
532 }
533
534
535 /* Get the options of a word database object. */
tcwdbopts(TCWDB * wdb)536 uint8_t tcwdbopts(TCWDB *wdb){
537 assert(wdb);
538 return tcbdbopts(wdb->idx);
539 }
540
541
542 /* Get the maximum number of forward matching expansion of a word database object. */
tcwdbfwmmax(TCWDB * wdb)543 uint32_t tcwdbfwmmax(TCWDB *wdb){
544 assert(wdb);
545 return wdb->fwmmax;
546 }
547
548
549 /* Get the number of records in the cache of a word database object. */
tcwdbcnum(TCWDB * wdb)550 uint32_t tcwdbcnum(TCWDB *wdb){
551 assert(wdb);
552 if(!wdb->cc) return 0;
553 return tcmaprnum(wdb->cc);
554 }
555
556
557 /* Set the callback function for sync progression of a word database object. */
tcwdbsetsynccb(TCWDB * wdb,bool (* cb)(int,int,const char *,void *),void * opq)558 void tcwdbsetsynccb(TCWDB *wdb, bool (*cb)(int, int, const char *, void *), void *opq){
559 assert(wdb);
560 wdb->synccb = cb;
561 wdb->syncopq = opq;
562 }
563
564
565 /* Set the callback function for word addition of a word database object. */
tcwdbsetaddcb(TCWDB * wdb,bool (* cb)(const char *,void *),void * opq)566 void tcwdbsetaddcb(TCWDB *wdb, bool (*cb)(const char *, void *), void *opq){
567 assert(wdb);
568 wdb->addcb = cb;
569 wdb->addopq = opq;
570 }
571
572
573
574 /*************************************************************************************************
575 * private features
576 *************************************************************************************************/
577
578
579 /* Lock a method of the word database object.
580 `wdb' specifies the word database object.
581 `wr' specifies whether the lock is writer or not.
582 If successful, the return value is true, else, it is false. */
tcwdblockmethod(TCWDB * wdb,bool wr)583 static bool tcwdblockmethod(TCWDB *wdb, bool wr){
584 assert(wdb);
585 if(wr ? pthread_rwlock_wrlock(wdb->mmtx) != 0 : pthread_rwlock_rdlock(wdb->mmtx) != 0){
586 tcbdbsetecode(wdb->idx, TCETHREAD, __FILE__, __LINE__, __func__);
587 return false;
588 }
589 return true;
590 }
591
592
593 /* Unlock a method of the word database object.
594 `bdb' specifies the word database object.
595 If successful, the return value is true, else, it is false. */
tcwdbunlockmethod(TCWDB * wdb)596 static bool tcwdbunlockmethod(TCWDB *wdb){
597 assert(wdb);
598 if(pthread_rwlock_unlock(wdb->mmtx) != 0){
599 tcbdbsetecode(wdb->idx, TCETHREAD, __FILE__, __LINE__, __func__);
600 return false;
601 }
602 return true;
603 }
604
605
606 /* Open a word database object.
607 `wdb' specifies the word database object.
608 `path' specifies the path of the database file.
609 `omode' specifies the connection mode.
610 If successful, the return value is true, else, it is false. */
tcwdbopenimpl(TCWDB * wdb,const char * path,int omode)611 static bool tcwdbopenimpl(TCWDB *wdb, const char *path, int omode){
612 assert(wdb && path);
613 int bomode = BDBOREADER;
614 if(omode & WDBOWRITER){
615 bomode = BDBOWRITER;
616 if(omode & WDBOCREAT) bomode |= BDBOCREAT;
617 if(omode & WDBOTRUNC) bomode |= BDBOTRUNC;
618 int64_t bnum = (wdb->etnum / WDBLMEMB) * 2 + 1;
619 int bopts = 0;
620 if(wdb->opts & WDBTLARGE) bopts |= BDBTLARGE;
621 if(wdb->opts & WDBTDEFLATE) bopts |= BDBTDEFLATE;
622 if(wdb->opts & WDBTBZIP) bopts |= BDBTBZIP;
623 if(wdb->opts & WDBTTCBS) bopts |= BDBTTCBS;
624 if(!tcbdbtune(wdb->idx, WDBLMEMB, WDBNMEMB, bnum, WDBAPOW, WDBFPOW, bopts)) return false;
625 if(!tcbdbsetlsmax(wdb->idx, WDBLSMAX)) return false;
626 }
627 if(wdb->lcnum > 0){
628 if(!tcbdbsetcache(wdb->idx, wdb->lcnum, wdb->lcnum / 4 + 1)) return false;
629 } else {
630 if(!tcbdbsetcache(wdb->idx, (omode & WDBOWRITER) ? WDBLCNUMW : WDBLCNUMR, WDBNCNUM))
631 return false;
632 }
633 if(omode & WDBONOLCK) bomode |= BDBONOLCK;
634 if(omode & WDBOLCKNB) bomode |= BDBOLCKNB;
635 if(!tcbdbopen(wdb->idx, path, bomode)) return false;
636 if((omode & WDBOWRITER) && tcbdbrnum(wdb->idx) < 1){
637 memcpy(tcbdbopaque(wdb->idx), WDBMAGICDATA, strlen(WDBMAGICDATA));
638 } else if(!(omode & WDBONOLCK) &&
639 memcmp(tcbdbopaque(wdb->idx), WDBMAGICDATA, strlen(WDBMAGICDATA))){
640 tcbdbclose(wdb->idx);
641 tcbdbsetecode(wdb->idx, TCEMETA, __FILE__, __LINE__, __func__);
642 return 0;
643 }
644 if(omode & WDBOWRITER){
645 wdb->cc = tcmapnew2(WDBCCBNUM);
646 wdb->dtokens = tcmapnew2(WDBDTKNBNUM);
647 wdb->dids = tcidsetnew(WDBDIDSBNUM);
648 }
649 wdb->open = true;
650 return true;
651 }
652
653
654 /* Close a word database object.
655 `wdb' specifies the word database object.
656 If successful, the return value is true, else, it is false. */
tcwdbcloseimpl(TCWDB * wdb)657 static bool tcwdbcloseimpl(TCWDB *wdb){
658 assert(wdb);
659 bool err = false;
660 if(wdb->cc){
661 if((tcmaprnum(wdb->cc) > 0 || tcmaprnum(wdb->dtokens) > 0) && !tcwdbmemsync(wdb, 0))
662 err = true;
663 tcidsetdel(wdb->dids);
664 tcmapdel(wdb->dtokens);
665 tcmapdel(wdb->cc);
666 wdb->cc = NULL;
667 }
668 if(!tcbdbclose(wdb->idx)) err = true;
669 wdb->open = false;
670 return !err;
671 }
672
673
674 /* Store a record into a q-gram database object.
675 `wdb' specifies the q-gram database object.
676 `id' specifies the ID number of the record.
677 `words' specifies a list object contains the words of the record.
678 If successful, the return value is true, else, it is false. */
tcwdbputimpl(TCWDB * wdb,int64_t id,const TCLIST * words)679 static bool tcwdbputimpl(TCWDB *wdb, int64_t id, const TCLIST *words){
680 assert(wdb && id > 0 && words);
681 char idbuf[TDNUMBUFSIZ*2];
682 int idsiz;
683 TDSETVNUMBUF64(idsiz, idbuf, id);
684 TCMAP *cc = wdb->cc;
685 int wn = tclistnum(words);
686 TCMAP *uniq = tcmapnew2(wn + 1);
687 for(int i = 0; i < wn; i++){
688 int wsiz;
689 const char *word = tclistval(words, i, &wsiz);
690 if(!tcmapputkeep(uniq, word, wsiz, "", 0)) continue;
691 if(*word != '\0') tcmapputcat(cc, word, wsiz, idbuf, idsiz);
692 }
693 tcmapdel(uniq);
694 bool err = false;
695 if(tcmapmsiz(cc) >= wdb->icsiz && !tcwdbmemsync(wdb, 1)) err = true;
696 return !err;
697 }
698
699
700 /* Remove a record of a q-gram database object.
701 `wdb' specifies the q-gram database object.
702 `id' specifies the ID number of the record.
703 `words' specifies a list object contains the words of the record.
704 If successful, the return value is true, else, it is false. */
tcwdboutimpl(TCWDB * wdb,int64_t id,const TCLIST * words)705 static bool tcwdboutimpl(TCWDB *wdb, int64_t id, const TCLIST *words){
706 assert(wdb && id > 0 && words);
707 char idbuf[TDNUMBUFSIZ*2];
708 int idsiz;
709 TDSETVNUMBUF64(idsiz, idbuf, id);
710 TCMAP *dtokens = wdb->dtokens;
711 int wn = tclistnum(words);
712 for(int i = 0; i < wn; i++){
713 int wsiz;
714 const char *word = tclistval(words, i, &wsiz);
715 if(*word != '\0') tcmapputkeep(dtokens, word, wsiz, "", 0);
716 }
717 tcidsetmark(wdb->dids, id);
718 bool err = false;
719 if(tcmapmsiz(dtokens) >= wdb->icsiz && !tcwdbmemsync(wdb, 1)) err = true;
720 return !err;
721 }
722
723
724 /* Search a q-gram database.
725 `wdb' specifies the q-gram database object.
726 `word' specifies the string of the word to be matched to.
727 `np' specifies the pointer to the variable into which the number of elements of the return
728 value is assigned.
729 If successful, the return value is the pointer to an array of ID numbers of the corresponding
730 records. */
tcwdbsearchimpl(TCWDB * wdb,const char * word,int * np)731 static uint64_t *tcwdbsearchimpl(TCWDB *wdb, const char *word, int *np){
732 assert(wdb && word && np);
733 int wlen = strlen(word);
734 if(wlen > WDBMAXWORDLEN){
735 tcbdbsetecode(wdb->idx, TCEINVALID, __FILE__, __LINE__, __func__);
736 return NULL;
737 }
738 int vsiz;
739 const char *vbuf = tcbdbget3(wdb->idx, word, wlen, &vsiz);
740 if(!vbuf){
741 vbuf = "";
742 vsiz = 0;
743 }
744 uint64_t *res = tcmalloc(WDBRESUNIT * sizeof(*res));
745 int rnum = 0;
746 int ranum = WDBRESUNIT;
747 while(vsiz > 0){
748 int step;
749 uint64_t id;
750 TDREADVNUMBUF64(vbuf, id, step);
751 vbuf += step;
752 vsiz -= step;
753 if(rnum >= ranum){
754 ranum *= 2;
755 res = tcrealloc(res, ranum * sizeof(*res));
756 }
757 res[rnum++] = id;
758 }
759 *np = rnum;
760 return res;
761 }
762
763
764 /* Compare two list elements in lexical order.
765 `a' specifies the pointer to one element.
766 `b' specifies the pointer to the other element.
767 The return value is positive if the former is big, negative if the latter is big, 0 if both
768 are equivalent. */
tccmpwords(const char ** a,const char ** b)769 static int tccmpwords(const char **a, const char **b){
770 assert(a && b);
771 return strcmp(*a, *b);
772 }
773
774
775
776 // END OF FILE
777