1 /*************************************************************************************************
2  * The B+ tree database API of Tokyo Cabinet
3  *                                                               Copyright (C) 2006-2012 FAL Labs
4  * This file is part of Tokyo Cabinet.
5  * Tokyo Cabinet is free software; you can redistribute it and/or modify it under the terms of
6  * the GNU Lesser General Public License as published by the Free Software Foundation; either
7  * version 2.1 of the License or any later version.  Tokyo Cabinet is distributed in the hope
8  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
10  * License for more details.
11  * You should have received a copy of the GNU Lesser General Public License along with Tokyo
12  * Cabinet; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13  * Boston, MA 02111-1307 USA.
14  *************************************************************************************************/
15 
16 
17 #include "tcutil.h"
18 #include "tchdb.h"
19 #include "tcbdb.h"
20 #include "myconf.h"
21 
22 #define BDBOPAQUESIZ   64                // size of using opaque field
23 #define BDBLEFTOPQSIZ  64                // size of left opaque field
24 #define BDBPAGEBUFSIZ  32768             // size of a buffer to read each page
25 #define BDBNODEIDBASE  ((1LL<<48)+1)     // base number of node ID
26 #define BDBLEVELMAX    64                // max level of B+ tree
27 #define BDBCACHEOUT    8                 // number of pages in a process of cacheout
28 
29 #define BDBDEFLMEMB    128               // default number of members in each leaf
30 #define BDBMINLMEMB    4                 // minimum number of members in each leaf
31 #define BDBDEFNMEMB    256               // default number of members in each node
32 #define BDBMINNMEMB    4                 // minimum number of members in each node
33 #define BDBDEFBNUM     32749             // default bucket number
34 #define BDBDEFAPOW     8                 // default alignment power
35 #define BDBDEFFPOW     10                // default free block pool power
36 #define BDBDEFLCNUM    1024              // default number of leaf cache
37 #define BDBDEFNCNUM    512               // default number of node cache
38 #define BDBDEFLSMAX    16384             // default maximum size of each leaf
39 #define BDBMINLSMAX    512               // minimum maximum size of each leaf
40 
41 typedef struct {                         // type of structure for a record
42   int ksiz;                              // size of the key region
43   int vsiz;                              // size of the value region
44   TCLIST *rest;                          // list of value objects
45 } BDBREC;
46 
47 typedef struct {                         // type of structure for a leaf page
48   uint64_t id;                           // ID number of the leaf
49   TCPTRLIST *recs;                       // list of records
50   int size;                              // predicted size of serialized buffer
51   uint64_t prev;                         // ID number of the previous leaf
52   uint64_t next;                         // ID number of the next leaf
53   bool dirty;                            // whether to be written back
54   bool dead;                             // whether to be removed
55 } BDBLEAF;
56 
57 typedef struct {                         // type of structure for a page index
58   uint64_t pid;                          // ID number of the referring page
59   int ksiz;                              // size of the key region
60 } BDBIDX;
61 
62 typedef struct {                         // type of structure for a node page
63   uint64_t id;                           // ID number of the node
64   uint64_t heir;                         // ID of the child before the first index
65   TCPTRLIST *idxs;                       // list of indices
66   bool dirty;                            // whether to be written back
67   bool dead;                             // whether to be removed
68 } BDBNODE;
69 
70 enum {                                   // enumeration for duplication behavior
71   BDBPDOVER,                             // overwrite an existing value
72   BDBPDKEEP,                             // keep the existing value
73   BDBPDCAT,                              // concatenate values
74   BDBPDDUP,                              // allow duplication of keys
75   BDBPDDUPB,                             // allow backward duplication
76   BDBPDADDINT,                           // add an integer
77   BDBPDADDDBL,                           // add a real number
78   BDBPDPROC                              // process by a callback function
79 };
80 
81 typedef struct {                         // type of structure for a duplication callback
82   TCPDPROC proc;                         // function pointer
83   void *op;                              // opaque pointer
84 } BDBPDPROCOP;
85 
86 
87 /* private macros */
88 #define BDBLOCKMETHOD(TC_bdb, TC_wr)                            \
89   ((TC_bdb)->mmtx ? tcbdblockmethod((TC_bdb), (TC_wr)) : true)
90 #define BDBUNLOCKMETHOD(TC_bdb)                         \
91   ((TC_bdb)->mmtx ? tcbdbunlockmethod(TC_bdb) : true)
92 #define BDBLOCKCACHE(TC_bdb)                            \
93   ((TC_bdb)->mmtx ? tcbdblockcache(TC_bdb) : true)
94 #define BDBUNLOCKCACHE(TC_bdb)                          \
95   ((TC_bdb)->mmtx ? tcbdbunlockcache(TC_bdb) : true)
96 #define BDBTHREADYIELD(TC_bdb)                          \
97   do { if((TC_bdb)->mmtx) sched_yield(); } while(false)
98 
99 
100 /* private function prototypes */
101 static void tcbdbclear(TCBDB *bdb);
102 static void tcbdbdumpmeta(TCBDB *bdb);
103 static void tcbdbloadmeta(TCBDB *bdb);
104 static BDBLEAF *tcbdbleafnew(TCBDB *bdb, uint64_t prev, uint64_t next);
105 static bool tcbdbleafcacheout(TCBDB *bdb, BDBLEAF *leaf);
106 static bool tcbdbleafsave(TCBDB *bdb, BDBLEAF *leaf);
107 static BDBLEAF *tcbdbleafload(TCBDB *bdb, uint64_t id);
108 static bool tcbdbleafcheck(TCBDB *bdb, uint64_t id);
109 static BDBLEAF *tcbdbgethistleaf(TCBDB *bdb, const char *kbuf, int ksiz, uint64_t id);
110 static bool tcbdbleafaddrec(TCBDB *bdb, BDBLEAF *leaf, int dmode,
111                             const char *kbuf, int ksiz, const char *vbuf, int vsiz);
112 static BDBLEAF *tcbdbleafdivide(TCBDB *bdb, BDBLEAF *leaf);
113 static bool tcbdbleafkill(TCBDB *bdb, BDBLEAF *leaf);
114 static BDBNODE *tcbdbnodenew(TCBDB *bdb, uint64_t heir);
115 static bool tcbdbnodecacheout(TCBDB *bdb, BDBNODE *node);
116 static bool tcbdbnodesave(TCBDB *bdb, BDBNODE *node);
117 static BDBNODE *tcbdbnodeload(TCBDB *bdb, uint64_t id);
118 static void tcbdbnodeaddidx(TCBDB *bdb, BDBNODE *node, bool order, uint64_t pid,
119                             const char *kbuf, int ksiz);
120 static bool tcbdbnodesubidx(TCBDB *bdb, BDBNODE *node, uint64_t pid);
121 static uint64_t tcbdbsearchleaf(TCBDB *bdb, const char *kbuf, int ksiz);
122 static BDBREC *tcbdbsearchrec(TCBDB *bdb, BDBLEAF *leaf, const char *kbuf, int ksiz, int *ip);
123 static void tcbdbremoverec(TCBDB *bdb, BDBLEAF *leaf, BDBREC *rec, int ri);
124 static bool tcbdbcacheadjust(TCBDB *bdb);
125 static void tcbdbcachepurge(TCBDB *bdb);
126 static bool tcbdbopenimpl(TCBDB *bdb, const char *path, int omode);
127 static bool tcbdbcloseimpl(TCBDB *bdb);
128 static bool tcbdbputimpl(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz,
129                          int dmode);
130 static bool tcbdboutimpl(TCBDB *bdb, const char *kbuf, int ksiz);
131 static bool tcbdboutlist(TCBDB *bdb, const char *kbuf, int ksiz);
132 static const char *tcbdbgetimpl(TCBDB *bdb, const char *kbuf, int ksiz, int *sp);
133 static int tcbdbgetnum(TCBDB *bdb, const char *kbuf, int ksiz);
134 static TCLIST *tcbdbgetlist(TCBDB *bdb, const char *kbuf, int ksiz);
135 static bool tcbdbrangeimpl(TCBDB *bdb, const char *bkbuf, int bksiz, bool binc,
136                            const char *ekbuf, int eksiz, bool einc, int max, TCLIST *keys);
137 static bool tcbdbrangefwm(TCBDB *bdb, const char *pbuf, int psiz, int max, TCLIST *keys);
138 static bool tcbdboptimizeimpl(TCBDB *bdb, int32_t lmemb, int32_t nmemb,
139                               int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts);
140 static bool tcbdbvanishimpl(TCBDB *bdb);
141 static bool tcbdblockmethod(TCBDB *bdb, bool wr);
142 static bool tcbdbunlockmethod(TCBDB *bdb);
143 static bool tcbdblockcache(TCBDB *bdb);
144 static bool tcbdbunlockcache(TCBDB *bdb);
145 static bool tcbdbcurfirstimpl(BDBCUR *cur);
146 static bool tcbdbcurlastimpl(BDBCUR *cur);
147 static bool tcbdbcurjumpimpl(BDBCUR *cur, const char *kbuf, int ksiz, bool forward);
148 static bool tcbdbcuradjust(BDBCUR *cur, bool forward);
149 static bool tcbdbcurprevimpl(BDBCUR *cur);
150 static bool tcbdbcurnextimpl(BDBCUR *cur);
151 static bool tcbdbcurputimpl(BDBCUR *cur, const char *vbuf, int vsiz, int mode);
152 static bool tcbdbcuroutimpl(BDBCUR *cur);
153 static bool tcbdbcurrecimpl(BDBCUR *cur, const char **kbp, int *ksp, const char **vbp, int *vsp);
154 static bool tcbdbforeachimpl(TCBDB *bdb, TCITER iter, void *op);
155 
156 
157 /* debugging function prototypes */
158 void tcbdbprintmeta(TCBDB *bdb);
159 void tcbdbprintleaf(TCBDB *bdb, BDBLEAF *leaf);
160 void tcbdbprintnode(TCBDB *bdb, BDBNODE *node);
161 
162 
163 
164 /*************************************************************************************************
165  * API
166  *************************************************************************************************/
167 
168 
169 /* Get the message string corresponding to an error code. */
tcbdberrmsg(int ecode)170 const char *tcbdberrmsg(int ecode){
171   return tcerrmsg(ecode);
172 }
173 
174 
175 /* Create a B+ tree database object. */
tcbdbnew(void)176 TCBDB *tcbdbnew(void){
177   TCBDB *bdb;
178   TCMALLOC(bdb, sizeof(*bdb));
179   tcbdbclear(bdb);
180   bdb->hdb = tchdbnew();
181   TCMALLOC(bdb->hist, sizeof(*bdb->hist) * BDBLEVELMAX);
182   tchdbtune(bdb->hdb, BDBDEFBNUM, BDBDEFAPOW, BDBDEFFPOW, 0);
183   tchdbsetxmsiz(bdb->hdb, 0);
184   return bdb;
185 }
186 
187 
188 /* Delete a B+ tree database object. */
tcbdbdel(TCBDB * bdb)189 void tcbdbdel(TCBDB *bdb){
190   assert(bdb);
191   if(bdb->open) tcbdbclose(bdb);
192   TCFREE(bdb->hist);
193   tchdbdel(bdb->hdb);
194   if(bdb->mmtx){
195     pthread_mutex_destroy(bdb->cmtx);
196     pthread_rwlock_destroy(bdb->mmtx);
197     TCFREE(bdb->cmtx);
198     TCFREE(bdb->mmtx);
199   }
200   TCFREE(bdb);
201 }
202 
203 
204 /* Get the last happened error code of a B+ tree database object. */
tcbdbecode(TCBDB * bdb)205 int tcbdbecode(TCBDB *bdb){
206   assert(bdb);
207   return tchdbecode(bdb->hdb);
208 }
209 
210 
211 /* Set mutual exclusion control of a B+ tree database object for threading. */
tcbdbsetmutex(TCBDB * bdb)212 bool tcbdbsetmutex(TCBDB *bdb){
213   assert(bdb);
214   if(!TCUSEPTHREAD) return true;
215   if(bdb->mmtx || bdb->open){
216     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
217     return false;
218   }
219   TCMALLOC(bdb->mmtx, sizeof(pthread_rwlock_t));
220   TCMALLOC(bdb->cmtx, sizeof(pthread_mutex_t));
221   bool err = false;
222   if(pthread_rwlock_init(bdb->mmtx, NULL) != 0) err = true;
223   if(pthread_mutex_init(bdb->cmtx, NULL) != 0) err = true;
224   if(err){
225     TCFREE(bdb->cmtx);
226     TCFREE(bdb->mmtx);
227     bdb->cmtx = NULL;
228     bdb->mmtx = NULL;
229     return false;
230   }
231   return tchdbsetmutex(bdb->hdb);
232 }
233 
234 
235 /* Set the custom comparison function of a B+ tree database object. */
tcbdbsetcmpfunc(TCBDB * bdb,TCCMP cmp,void * cmpop)236 bool tcbdbsetcmpfunc(TCBDB *bdb, TCCMP cmp, void *cmpop){
237   assert(bdb && cmp);
238   if(bdb->open){
239     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
240     return false;
241   }
242   bdb->cmp = cmp;
243   bdb->cmpop = cmpop;
244   return true;
245 }
246 
247 
248 /* Set the tuning parameters of a B+ tree database object. */
tcbdbtune(TCBDB * bdb,int32_t lmemb,int32_t nmemb,int64_t bnum,int8_t apow,int8_t fpow,uint8_t opts)249 bool tcbdbtune(TCBDB *bdb, int32_t lmemb, int32_t nmemb,
250                int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts){
251   assert(bdb);
252   if(bdb->open){
253     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
254     return false;
255   }
256   bdb->lmemb = (lmemb > 0) ? tclmax(lmemb, BDBMINLMEMB) : BDBDEFLMEMB;
257   bdb->nmemb = (nmemb > 0) ? tclmax(nmemb, BDBMINNMEMB) : BDBDEFNMEMB;
258   bdb->opts = opts;
259   uint8_t hopts = 0;
260   if(opts & BDBTLARGE) hopts |= HDBTLARGE;
261   if(opts & BDBTDEFLATE) hopts |= HDBTDEFLATE;
262   if(opts & BDBTBZIP) hopts |= HDBTBZIP;
263   if(opts & BDBTTCBS) hopts |= HDBTTCBS;
264   if(opts & BDBTEXCODEC) hopts |= HDBTEXCODEC;
265   bnum = (bnum > 0) ? bnum : BDBDEFBNUM;
266   apow = (apow >= 0) ? apow : BDBDEFAPOW;
267   fpow = (fpow >= 0) ? fpow : BDBDEFFPOW;
268   return tchdbtune(bdb->hdb, bnum, apow, fpow, hopts);
269 }
270 
271 
272 /* Set the caching parameters of a B+ tree database object. */
tcbdbsetcache(TCBDB * bdb,int32_t lcnum,int32_t ncnum)273 bool tcbdbsetcache(TCBDB *bdb, int32_t lcnum, int32_t ncnum){
274   assert(bdb);
275   if(bdb->open){
276     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
277     return false;
278   }
279   if(lcnum > 0) bdb->lcnum = tclmax(lcnum, BDBLEVELMAX);
280   if(ncnum > 0) bdb->ncnum = tclmax(ncnum, BDBLEVELMAX);
281   return true;
282 }
283 
284 
285 /* Set the size of the extra mapped memory of a B+ tree database object. */
tcbdbsetxmsiz(TCBDB * bdb,int64_t xmsiz)286 bool tcbdbsetxmsiz(TCBDB *bdb, int64_t xmsiz){
287   assert(bdb);
288   if(bdb->open){
289     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
290     return false;
291   }
292   return tchdbsetxmsiz(bdb->hdb, xmsiz);
293 }
294 
295 
296 /* Set the unit step number of auto defragmentation of a B+ tree database object. */
tcbdbsetdfunit(TCBDB * bdb,int32_t dfunit)297 bool tcbdbsetdfunit(TCBDB *bdb, int32_t dfunit){
298   assert(bdb);
299   if(bdb->open){
300     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
301     return false;
302   }
303   return tchdbsetdfunit(bdb->hdb, dfunit);
304 }
305 
306 
307 /* Open a database file and connect a B+ tree database object. */
tcbdbopen(TCBDB * bdb,const char * path,int omode)308 bool tcbdbopen(TCBDB *bdb, const char *path, int omode){
309   assert(bdb && path);
310   if(!BDBLOCKMETHOD(bdb, true)) return false;
311   if(bdb->open){
312     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
313     BDBUNLOCKMETHOD(bdb);
314     return false;
315   }
316   bool rv = tcbdbopenimpl(bdb, path, omode);
317   BDBUNLOCKMETHOD(bdb);
318   return rv;
319 }
320 
321 
322 /* Close a B+ tree database object. */
tcbdbclose(TCBDB * bdb)323 bool tcbdbclose(TCBDB *bdb){
324   assert(bdb);
325   if(!BDBLOCKMETHOD(bdb, true)) return false;
326   if(!bdb->open){
327     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
328     BDBUNLOCKMETHOD(bdb);
329     return false;
330   }
331   bool rv = tcbdbcloseimpl(bdb);
332   BDBUNLOCKMETHOD(bdb);
333   return rv;
334 }
335 
336 
337 /* Store a record into a B+ tree database object. */
tcbdbput(TCBDB * bdb,const void * kbuf,int ksiz,const void * vbuf,int vsiz)338 bool tcbdbput(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
339   assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
340   if(!BDBLOCKMETHOD(bdb, true)) return false;
341   if(!bdb->open || !bdb->wmode){
342     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
343     BDBUNLOCKMETHOD(bdb);
344     return false;
345   }
346   bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDOVER);
347   BDBUNLOCKMETHOD(bdb);
348   return rv;
349 }
350 
351 
352 /* Store a string record into a B+ tree database object. */
tcbdbput2(TCBDB * bdb,const char * kstr,const char * vstr)353 bool tcbdbput2(TCBDB *bdb, const char *kstr, const char *vstr){
354   assert(bdb && kstr && vstr);
355   return tcbdbput(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
356 }
357 
358 
359 /* Store a new record into a B+ tree database object. */
tcbdbputkeep(TCBDB * bdb,const void * kbuf,int ksiz,const void * vbuf,int vsiz)360 bool tcbdbputkeep(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
361   assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
362   if(!BDBLOCKMETHOD(bdb, true)) return false;
363   if(!bdb->open || !bdb->wmode){
364     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
365     BDBUNLOCKMETHOD(bdb);
366     return false;
367   }
368   bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDKEEP);
369   BDBUNLOCKMETHOD(bdb);
370   return rv;
371 }
372 
373 
374 /* Store a new string record into a B+ tree database object. */
tcbdbputkeep2(TCBDB * bdb,const char * kstr,const char * vstr)375 bool tcbdbputkeep2(TCBDB *bdb, const char *kstr, const char *vstr){
376   assert(bdb && kstr && vstr);
377   return tcbdbputkeep(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
378 }
379 
380 
381 /* Concatenate a value at the end of the existing record in a B+ tree database object. */
tcbdbputcat(TCBDB * bdb,const void * kbuf,int ksiz,const void * vbuf,int vsiz)382 bool tcbdbputcat(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
383   assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
384   if(!BDBLOCKMETHOD(bdb, true)) return false;
385   if(!bdb->open || !bdb->wmode){
386     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
387     BDBUNLOCKMETHOD(bdb);
388     return false;
389   }
390   bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDCAT);
391   BDBUNLOCKMETHOD(bdb);
392   return rv;
393 }
394 
395 
396 /* Concatenate a string value at the end of the existing record in a B+ tree database object. */
tcbdbputcat2(TCBDB * bdb,const char * kstr,const char * vstr)397 bool tcbdbputcat2(TCBDB *bdb, const char *kstr, const char *vstr){
398   assert(bdb && kstr && vstr);
399   return tcbdbputcat(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
400 }
401 
402 
403 /* Store a record into a B+ tree database object with allowing duplication of keys. */
tcbdbputdup(TCBDB * bdb,const void * kbuf,int ksiz,const void * vbuf,int vsiz)404 bool tcbdbputdup(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
405   assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
406   if(!BDBLOCKMETHOD(bdb, true)) return false;
407   if(!bdb->open || !bdb->wmode){
408     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
409     BDBUNLOCKMETHOD(bdb);
410     return false;
411   }
412   bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDDUP);
413   BDBUNLOCKMETHOD(bdb);
414   return rv;
415 }
416 
417 
418 /* Store a string record into a B+ tree database object with allowing duplication of keys. */
tcbdbputdup2(TCBDB * bdb,const char * kstr,const char * vstr)419 bool tcbdbputdup2(TCBDB *bdb, const char *kstr, const char *vstr){
420   assert(bdb && kstr && vstr);
421   return tcbdbputdup(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
422 }
423 
424 
425 /* Store records into a B+ tree database object with allowing duplication of keys. */
tcbdbputdup3(TCBDB * bdb,const void * kbuf,int ksiz,const TCLIST * vals)426 bool tcbdbputdup3(TCBDB *bdb, const void *kbuf, int ksiz, const TCLIST *vals){
427   assert(bdb && kbuf && ksiz >= 0 && vals);
428   if(!BDBLOCKMETHOD(bdb, true)) return false;
429   if(!bdb->open || !bdb->wmode){
430     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
431     BDBUNLOCKMETHOD(bdb);
432     return false;
433   }
434   bool err = false;
435   int ln = TCLISTNUM(vals);
436   for(int i = 0; i < ln; i++){
437     const char *vbuf;
438     int vsiz;
439     TCLISTVAL(vbuf, vals, i, vsiz);
440     if(!tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDDUP)) err = true;
441   }
442   BDBUNLOCKMETHOD(bdb);
443   return !err;
444 }
445 
446 
447 /* Remove a record of a B+ tree database object. */
tcbdbout(TCBDB * bdb,const void * kbuf,int ksiz)448 bool tcbdbout(TCBDB *bdb, const void *kbuf, int ksiz){
449   assert(bdb && kbuf && ksiz >= 0);
450   if(!BDBLOCKMETHOD(bdb, true)) return false;
451   if(!bdb->open || !bdb->wmode){
452     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
453     BDBUNLOCKMETHOD(bdb);
454     return false;
455   }
456   bool rv = tcbdboutimpl(bdb, kbuf, ksiz);
457   BDBUNLOCKMETHOD(bdb);
458   return rv;
459 }
460 
461 
462 /* Remove a string record of a B+ tree database object. */
tcbdbout2(TCBDB * bdb,const char * kstr)463 bool tcbdbout2(TCBDB *bdb, const char *kstr){
464   assert(bdb && kstr);
465   return tcbdbout(bdb, kstr, strlen(kstr));
466 }
467 
468 
469 /* Remove records of a B+ tree database object. */
tcbdbout3(TCBDB * bdb,const void * kbuf,int ksiz)470 bool tcbdbout3(TCBDB *bdb, const void *kbuf, int ksiz){
471   assert(bdb && kbuf && ksiz >= 0);
472   if(!BDBLOCKMETHOD(bdb, true)) return false;
473   if(!bdb->open || !bdb->wmode){
474     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
475     BDBUNLOCKMETHOD(bdb);
476     return false;
477   }
478   bool rv = tcbdboutlist(bdb, kbuf, ksiz);
479   BDBUNLOCKMETHOD(bdb);
480   return rv;
481 }
482 
483 
484 /* Retrieve a record in a B+ tree database object. */
tcbdbget(TCBDB * bdb,const void * kbuf,int ksiz,int * sp)485 void *tcbdbget(TCBDB *bdb, const void *kbuf, int ksiz, int *sp){
486   assert(bdb && kbuf && ksiz >= 0 && sp);
487   if(!BDBLOCKMETHOD(bdb, false)) return NULL;
488   if(!bdb->open){
489     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
490     BDBUNLOCKMETHOD(bdb);
491     return NULL;
492   }
493   const char *vbuf = tcbdbgetimpl(bdb, kbuf, ksiz, sp);
494   char *rv;
495   if(vbuf){
496     TCMEMDUP(rv, vbuf, *sp);
497   } else {
498     rv = NULL;
499   }
500   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
501   BDBUNLOCKMETHOD(bdb);
502   if(adj && BDBLOCKMETHOD(bdb, true)){
503     if(!bdb->tran && !tcbdbcacheadjust(bdb)){
504       TCFREE(rv);
505       rv = NULL;
506     }
507     BDBUNLOCKMETHOD(bdb);
508   }
509   return rv;
510 }
511 
512 
513 /* Retrieve a string record in a B+ tree database object. */
tcbdbget2(TCBDB * bdb,const char * kstr)514 char *tcbdbget2(TCBDB *bdb, const char *kstr){
515   assert(bdb && kstr);
516   int vsiz;
517   return tcbdbget(bdb, kstr, strlen(kstr), &vsiz);
518 }
519 
520 
521 /* Retrieve a record in a B+ tree database object and write the value into a buffer. */
tcbdbget3(TCBDB * bdb,const void * kbuf,int ksiz,int * sp)522 const void *tcbdbget3(TCBDB *bdb, const void *kbuf, int ksiz, int *sp){
523   assert(bdb && kbuf && ksiz >= 0 && sp);
524   if(!BDBLOCKMETHOD(bdb, false)) return NULL;
525   if(!bdb->open){
526     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
527     BDBUNLOCKMETHOD(bdb);
528     return NULL;
529   }
530   const char *rv = tcbdbgetimpl(bdb, kbuf, ksiz, sp);
531   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
532   BDBUNLOCKMETHOD(bdb);
533   if(adj && BDBLOCKMETHOD(bdb, true)){
534     if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = NULL;
535     BDBUNLOCKMETHOD(bdb);
536   }
537   return rv;
538 }
539 
540 
541 /* Retrieve records in a B+ tree database object. */
tcbdbget4(TCBDB * bdb,const void * kbuf,int ksiz)542 TCLIST *tcbdbget4(TCBDB *bdb, const void *kbuf, int ksiz){
543   assert(bdb && kbuf && ksiz >= 0);
544   if(!BDBLOCKMETHOD(bdb, false)) return NULL;
545   if(!bdb->open){
546     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
547     BDBUNLOCKMETHOD(bdb);
548     return NULL;
549   }
550   TCLIST *rv = tcbdbgetlist(bdb, kbuf, ksiz);
551   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
552   BDBUNLOCKMETHOD(bdb);
553   if(adj && BDBLOCKMETHOD(bdb, true)){
554     if(!bdb->tran && !tcbdbcacheadjust(bdb)){
555       if(rv) tclistdel(rv);
556       rv = NULL;
557     }
558     BDBUNLOCKMETHOD(bdb);
559   }
560   return rv;
561 }
562 
563 
564 /* Get the number of records corresponding a key in a B+ tree database object. */
tcbdbvnum(TCBDB * bdb,const void * kbuf,int ksiz)565 int tcbdbvnum(TCBDB *bdb, const void *kbuf, int ksiz){
566   assert(bdb && kbuf && ksiz >= 0);
567   if(!BDBLOCKMETHOD(bdb, false)) return 0;
568   if(!bdb->open){
569     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
570     BDBUNLOCKMETHOD(bdb);
571     return 0;
572   }
573   int rv = tcbdbgetnum(bdb, kbuf, ksiz);
574   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
575   BDBUNLOCKMETHOD(bdb);
576   if(adj && BDBLOCKMETHOD(bdb, true)){
577     if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = 0;
578     BDBUNLOCKMETHOD(bdb);
579   }
580   return rv;
581 }
582 
583 
584 /* Get the number of records corresponding a string key in a B+ tree database object. */
tcbdbvnum2(TCBDB * bdb,const char * kstr)585 int tcbdbvnum2(TCBDB *bdb, const char *kstr){
586   assert(bdb && kstr);
587   return tcbdbvnum(bdb, kstr, strlen(kstr));
588 }
589 
590 
591 /* Get the size of the value of a record in a B+ tree database object. */
tcbdbvsiz(TCBDB * bdb,const void * kbuf,int ksiz)592 int tcbdbvsiz(TCBDB *bdb, const void *kbuf, int ksiz){
593   assert(bdb && kbuf && ksiz >= 0);
594   int vsiz;
595   if(!tcbdbget3(bdb, kbuf, ksiz, &vsiz)) return -1;
596   return vsiz;
597 }
598 
599 
600 /* Get the size of the value of a string record in a B+ tree database object. */
tcbdbvsiz2(TCBDB * bdb,const char * kstr)601 int tcbdbvsiz2(TCBDB *bdb, const char *kstr){
602   assert(bdb && kstr);
603   return tcbdbvsiz(bdb, kstr, strlen(kstr));
604 }
605 
606 
607 /* Get keys of ranged records in a B+ tree database object. */
tcbdbrange(TCBDB * bdb,const void * bkbuf,int bksiz,bool binc,const void * ekbuf,int eksiz,bool einc,int max)608 TCLIST *tcbdbrange(TCBDB *bdb, const void *bkbuf, int bksiz, bool binc,
609                    const void *ekbuf, int eksiz, bool einc, int max){
610   assert(bdb);
611   TCLIST *keys = tclistnew();
612   if(!BDBLOCKMETHOD(bdb, false)) return keys;
613   if(!bdb->open){
614     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
615     BDBUNLOCKMETHOD(bdb);
616     return keys;
617   }
618   tcbdbrangeimpl(bdb, bkbuf, bksiz, binc, ekbuf, eksiz, einc, max, keys);
619   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
620   BDBUNLOCKMETHOD(bdb);
621   if(adj && BDBLOCKMETHOD(bdb, true)){
622     tcbdbcacheadjust(bdb);
623     BDBUNLOCKMETHOD(bdb);
624   }
625   return keys;
626 }
627 
628 
629 /* Get string keys of ranged records in a B+ tree database object. */
tcbdbrange2(TCBDB * bdb,const char * bkstr,bool binc,const char * ekstr,bool einc,int max)630 TCLIST *tcbdbrange2(TCBDB *bdb, const char *bkstr, bool binc,
631                     const char *ekstr, bool einc, int max){
632   assert(bdb);
633   return tcbdbrange(bdb, bkstr, bkstr ? strlen(bkstr) : 0, binc,
634                     ekstr, ekstr ? strlen(ekstr) : 0, einc, max);
635 }
636 
637 
638 /* Get forward matching keys in a B+ tree database object. */
tcbdbfwmkeys(TCBDB * bdb,const void * pbuf,int psiz,int max)639 TCLIST *tcbdbfwmkeys(TCBDB *bdb, const void *pbuf, int psiz, int max){
640   assert(bdb && pbuf && psiz >= 0);
641   TCLIST *keys = tclistnew();
642   if(!BDBLOCKMETHOD(bdb, false)) return keys;
643   if(!bdb->open){
644     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
645     BDBUNLOCKMETHOD(bdb);
646     return keys;
647   }
648   tcbdbrangefwm(bdb, pbuf, psiz, max, keys);
649   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
650   BDBUNLOCKMETHOD(bdb);
651   if(adj && BDBLOCKMETHOD(bdb, true)){
652     tcbdbcacheadjust(bdb);
653     BDBUNLOCKMETHOD(bdb);
654   }
655   return keys;
656 }
657 
658 
659 /* Get forward matching string keys in a B+ tree database object. */
tcbdbfwmkeys2(TCBDB * bdb,const char * pstr,int max)660 TCLIST *tcbdbfwmkeys2(TCBDB *bdb, const char *pstr, int max){
661   assert(bdb && pstr);
662   return tcbdbfwmkeys(bdb, pstr, strlen(pstr), max);
663 }
664 
665 
666 /* Add an integer to a record in a B+ tree database object. */
tcbdbaddint(TCBDB * bdb,const void * kbuf,int ksiz,int num)667 int tcbdbaddint(TCBDB *bdb, const void *kbuf, int ksiz, int num){
668   assert(bdb && kbuf && ksiz >= 0);
669   if(!BDBLOCKMETHOD(bdb, true)) return INT_MIN;
670   if(!bdb->open || !bdb->wmode){
671     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
672     BDBUNLOCKMETHOD(bdb);
673     return INT_MIN;
674   }
675   bool rv = tcbdbputimpl(bdb, kbuf, ksiz, (char *)&num, sizeof(num), BDBPDADDINT);
676   BDBUNLOCKMETHOD(bdb);
677   return rv ? num : INT_MIN;
678 }
679 
680 
681 /* Add a real number to a record in a B+ tree database object. */
tcbdbadddouble(TCBDB * bdb,const void * kbuf,int ksiz,double num)682 double tcbdbadddouble(TCBDB *bdb, const void *kbuf, int ksiz, double num){
683   assert(bdb && kbuf && ksiz >= 0);
684   if(!BDBLOCKMETHOD(bdb, true)) return nan("");
685   if(!bdb->open || !bdb->wmode){
686     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
687     BDBUNLOCKMETHOD(bdb);
688     return nan("");
689   }
690   bool rv = tcbdbputimpl(bdb, kbuf, ksiz, (char *)&num, sizeof(num), BDBPDADDDBL);
691   BDBUNLOCKMETHOD(bdb);
692   return rv ? num : nan("");
693 }
694 
695 
696 /* Synchronize updated contents of a B+ tree database object with the file and the device. */
tcbdbsync(TCBDB * bdb)697 bool tcbdbsync(TCBDB *bdb){
698   assert(bdb);
699   if(!BDBLOCKMETHOD(bdb, true)) return false;
700   if(!bdb->open || !bdb->wmode || bdb->tran){
701     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
702     BDBUNLOCKMETHOD(bdb);
703     return false;
704   }
705   bool rv = tcbdbmemsync(bdb, true);
706   BDBUNLOCKMETHOD(bdb);
707   return rv;
708 }
709 
710 
711 /* Optimize the file of a B+ tree database object. */
tcbdboptimize(TCBDB * bdb,int32_t lmemb,int32_t nmemb,int64_t bnum,int8_t apow,int8_t fpow,uint8_t opts)712 bool tcbdboptimize(TCBDB *bdb, int32_t lmemb, int32_t nmemb,
713                    int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts){
714   assert(bdb);
715   if(!BDBLOCKMETHOD(bdb, true)) return false;
716   if(!bdb->open || !bdb->wmode || bdb->tran){
717     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
718     BDBUNLOCKMETHOD(bdb);
719     return false;
720   }
721   BDBTHREADYIELD(bdb);
722   bool rv = tcbdboptimizeimpl(bdb, lmemb, nmemb, bnum, apow, fpow, opts);
723   BDBUNLOCKMETHOD(bdb);
724   return rv;
725 }
726 
727 
728 /* Remove all records of a B+ tree database object. */
tcbdbvanish(TCBDB * bdb)729 bool tcbdbvanish(TCBDB *bdb){
730   assert(bdb);
731   if(!BDBLOCKMETHOD(bdb, true)) return false;
732   if(!bdb->open || !bdb->wmode || bdb->tran){
733     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
734     BDBUNLOCKMETHOD(bdb);
735     return false;
736   }
737   BDBTHREADYIELD(bdb);
738   bool rv = tcbdbvanishimpl(bdb);
739   BDBUNLOCKMETHOD(bdb);
740   return rv;
741 }
742 
743 
744 /* Copy the database file of a B+ tree database object. */
tcbdbcopy(TCBDB * bdb,const char * path)745 bool tcbdbcopy(TCBDB *bdb, const char *path){
746   assert(bdb && path);
747   if(!BDBLOCKMETHOD(bdb, true)) return false;
748   if(!bdb->open){
749     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
750     BDBUNLOCKMETHOD(bdb);
751     return false;
752   }
753   BDBTHREADYIELD(bdb);
754   TCLIST *lids = tclistnew();
755   TCLIST *nids = tclistnew();
756   const char *vbuf;
757   int vsiz;
758   TCMAP *leafc = bdb->leafc;
759   tcmapiterinit(leafc);
760   while((vbuf = tcmapiternext(leafc, &vsiz)) != NULL){
761     TCLISTPUSH(lids, vbuf, vsiz);
762   }
763   TCMAP *nodec = bdb->nodec;
764   tcmapiterinit(nodec);
765   while((vbuf = tcmapiternext(nodec, &vsiz)) != NULL){
766     TCLISTPUSH(nids, vbuf, vsiz);
767   }
768   BDBUNLOCKMETHOD(bdb);
769   bool err = false;
770   int ln = TCLISTNUM(lids);
771   for(int i = 0; i < ln; i++){
772     vbuf = TCLISTVALPTR(lids, i);
773     if(BDBLOCKMETHOD(bdb, true)){
774       BDBTHREADYIELD(bdb);
775       if(bdb->open){
776         int rsiz;
777         BDBLEAF *leaf = (BDBLEAF *)tcmapget(bdb->leafc, vbuf, sizeof(leaf->id), &rsiz);
778         if(leaf && leaf->dirty && !tcbdbleafsave(bdb, leaf)) err = true;
779       } else {
780         err = true;
781       }
782       BDBUNLOCKMETHOD(bdb);
783     } else {
784       err = true;
785     }
786   }
787   ln = TCLISTNUM(nids);
788   for(int i = 0; i < ln; i++){
789     vbuf = TCLISTVALPTR(nids, i);
790     if(BDBLOCKMETHOD(bdb, true)){
791       if(bdb->open){
792         int rsiz;
793         BDBNODE *node = (BDBNODE *)tcmapget(bdb->nodec, vbuf, sizeof(node->id), &rsiz);
794         if(node && node->dirty && !tcbdbnodesave(bdb, node)) err = true;
795       } else {
796         err = true;
797       }
798       BDBUNLOCKMETHOD(bdb);
799     } else {
800       err = true;
801     }
802   }
803   tclistdel(nids);
804   tclistdel(lids);
805   if(!tcbdbtranbegin(bdb)) err = true;
806   if(BDBLOCKMETHOD(bdb, false)){
807     BDBTHREADYIELD(bdb);
808     if(!tchdbcopy(bdb->hdb, path)) err = true;
809     BDBUNLOCKMETHOD(bdb);
810   } else {
811     err = true;
812   }
813   if(!tcbdbtrancommit(bdb)) err = true;
814   return !err;
815 }
816 
817 
818 /* Begin the transaction of a B+ tree database object. */
tcbdbtranbegin(TCBDB * bdb)819 bool tcbdbtranbegin(TCBDB *bdb){
820   assert(bdb);
821   for(double wsec = 1.0 / sysconf(_SC_CLK_TCK); true; wsec *= 2){
822     if(!BDBLOCKMETHOD(bdb, true)) return false;
823     if(!bdb->open || !bdb->wmode){
824       tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
825       BDBUNLOCKMETHOD(bdb);
826       return false;
827     }
828     if(!bdb->tran) break;
829     BDBUNLOCKMETHOD(bdb);
830     if(wsec > 1.0) wsec = 1.0;
831     tcsleep(wsec);
832   }
833   if(!tcbdbmemsync(bdb, false)){
834     BDBUNLOCKMETHOD(bdb);
835     return false;
836   }
837   if(!tchdbtranbegin(bdb->hdb)){
838     BDBUNLOCKMETHOD(bdb);
839     return false;
840   }
841   bdb->tran = true;
842   TCMEMDUP(bdb->rbopaque, bdb->opaque, BDBOPAQUESIZ);
843   BDBUNLOCKMETHOD(bdb);
844   return true;
845 }
846 
847 
848 /* Commit the transaction of a B+ tree database object. */
tcbdbtrancommit(TCBDB * bdb)849 bool tcbdbtrancommit(TCBDB *bdb){
850   assert(bdb);
851   if(!BDBLOCKMETHOD(bdb, true)) return false;
852   if(!bdb->open || !bdb->wmode || !bdb->tran){
853     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
854     BDBUNLOCKMETHOD(bdb);
855     return false;
856   }
857   TCFREE(bdb->rbopaque);
858   bdb->tran = false;
859   bdb->rbopaque = NULL;
860   bool err = false;
861   if(!tcbdbmemsync(bdb, false)) err = true;
862   if(!tcbdbcacheadjust(bdb)) err = true;
863   if(err){
864     tchdbtranabort(bdb->hdb);
865   } else if(!tchdbtrancommit(bdb->hdb)){
866     err = true;
867   }
868   BDBUNLOCKMETHOD(bdb);
869   return !err;
870 }
871 
872 
873 /* Abort the transaction of a B+ tree database object. */
tcbdbtranabort(TCBDB * bdb)874 bool tcbdbtranabort(TCBDB *bdb){
875   assert(bdb);
876   if(!BDBLOCKMETHOD(bdb, true)) return false;
877   if(!bdb->open || !bdb->wmode || !bdb->tran){
878     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
879     BDBUNLOCKMETHOD(bdb);
880     return false;
881   }
882   tcbdbcachepurge(bdb);
883   memcpy(bdb->opaque, bdb->rbopaque, BDBOPAQUESIZ);
884   tcbdbloadmeta(bdb);
885   TCFREE(bdb->rbopaque);
886   bdb->tran = false;
887   bdb->rbopaque = NULL;
888   bdb->hleaf = 0;
889   bdb->lleaf = 0;
890   bdb->clock++;
891   bool err = false;
892   if(!tcbdbcacheadjust(bdb)) err = true;
893   if(!tchdbtranvoid(bdb->hdb)) err = true;
894   BDBUNLOCKMETHOD(bdb);
895   return !err;
896 }
897 
898 
899 /* Get the file path of a B+ tree database object. */
tcbdbpath(TCBDB * bdb)900 const char *tcbdbpath(TCBDB *bdb){
901   assert(bdb);
902   if(!BDBLOCKMETHOD(bdb, false)) return NULL;
903   if(!bdb->open){
904     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
905     BDBUNLOCKMETHOD(bdb);
906     return NULL;
907   }
908   const char *rv = tchdbpath(bdb->hdb);
909   BDBUNLOCKMETHOD(bdb);
910   return rv;
911 }
912 
913 
914 /* Get the number of records of a B+ tree database object. */
tcbdbrnum(TCBDB * bdb)915 uint64_t tcbdbrnum(TCBDB *bdb){
916   assert(bdb);
917   if(!BDBLOCKMETHOD(bdb, false)) return 0;
918   if(!bdb->open){
919     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
920     BDBUNLOCKMETHOD(bdb);
921     return 0;
922   }
923   uint64_t rv = bdb->rnum;
924   BDBUNLOCKMETHOD(bdb);
925   return rv;
926 }
927 
928 
929 /* Get the size of the database file of a B+ tree database object. */
tcbdbfsiz(TCBDB * bdb)930 uint64_t tcbdbfsiz(TCBDB *bdb){
931   assert(bdb);
932   if(!BDBLOCKMETHOD(bdb, false)) return 0;
933   if(!bdb->open){
934     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
935     BDBUNLOCKMETHOD(bdb);
936     return 0;
937   }
938   uint64_t rv = tchdbfsiz(bdb->hdb);
939   BDBUNLOCKMETHOD(bdb);
940   return rv;
941 }
942 
943 
944 /* Create a cursor object. */
tcbdbcurnew(TCBDB * bdb)945 BDBCUR *tcbdbcurnew(TCBDB *bdb){
946   assert(bdb);
947   BDBCUR *cur;
948   TCMALLOC(cur, sizeof(*cur));
949   cur->bdb = bdb;
950   cur->clock = 0;
951   cur->id = 0;
952   cur->kidx = 0;
953   cur->vidx = 0;
954   return cur;
955 }
956 
957 
958 /* Delete a cursor object. */
tcbdbcurdel(BDBCUR * cur)959 void tcbdbcurdel(BDBCUR *cur){
960   assert(cur);
961   TCFREE(cur);
962 }
963 
964 
965 /* Move a cursor object to the first record. */
tcbdbcurfirst(BDBCUR * cur)966 bool tcbdbcurfirst(BDBCUR *cur){
967   assert(cur);
968   TCBDB *bdb = cur->bdb;
969   if(!BDBLOCKMETHOD(bdb, false)) return false;
970   if(!bdb->open){
971     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
972     BDBUNLOCKMETHOD(bdb);
973     return false;
974   }
975   bool rv = tcbdbcurfirstimpl(cur);
976   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
977   BDBUNLOCKMETHOD(bdb);
978   if(adj && BDBLOCKMETHOD(bdb, true)){
979     if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
980     BDBUNLOCKMETHOD(bdb);
981   }
982   return rv;
983 }
984 
985 
986 /* Move a cursor object to the last record. */
tcbdbcurlast(BDBCUR * cur)987 bool tcbdbcurlast(BDBCUR *cur){
988   assert(cur);
989   TCBDB *bdb = cur->bdb;
990   if(!BDBLOCKMETHOD(bdb, false)) return false;
991   if(!bdb->open){
992     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
993     BDBUNLOCKMETHOD(bdb);
994     return false;
995   }
996   bool rv = tcbdbcurlastimpl(cur);
997   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
998   BDBUNLOCKMETHOD(bdb);
999   if(adj && BDBLOCKMETHOD(bdb, true)){
1000     if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
1001     BDBUNLOCKMETHOD(bdb);
1002   }
1003   return rv;
1004 }
1005 
1006 
1007 /* Move a cursor object to the front of records corresponding a key. */
tcbdbcurjump(BDBCUR * cur,const void * kbuf,int ksiz)1008 bool tcbdbcurjump(BDBCUR *cur, const void *kbuf, int ksiz){
1009   assert(cur && kbuf && ksiz >= 0);
1010   TCBDB *bdb = cur->bdb;
1011   if(!BDBLOCKMETHOD(bdb, false)) return false;
1012   if(!bdb->open){
1013     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1014     BDBUNLOCKMETHOD(bdb);
1015     return false;
1016   }
1017   bool rv = tcbdbcurjumpimpl(cur, kbuf, ksiz, true);
1018   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
1019   BDBUNLOCKMETHOD(bdb);
1020   if(adj && BDBLOCKMETHOD(bdb, true)){
1021     if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
1022     BDBUNLOCKMETHOD(bdb);
1023   }
1024   return rv;
1025 }
1026 
1027 
1028 /* Move a cursor object to the front of records corresponding a key string. */
tcbdbcurjump2(BDBCUR * cur,const char * kstr)1029 bool tcbdbcurjump2(BDBCUR *cur, const char *kstr){
1030   assert(cur && kstr);
1031   return tcbdbcurjump(cur, kstr, strlen(kstr));
1032 }
1033 
1034 
1035 /* Move a cursor object to the previous record. */
tcbdbcurprev(BDBCUR * cur)1036 bool tcbdbcurprev(BDBCUR *cur){
1037   assert(cur);
1038   TCBDB *bdb = cur->bdb;
1039   if(!BDBLOCKMETHOD(bdb, false)) return false;
1040   if(!bdb->open){
1041     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1042     BDBUNLOCKMETHOD(bdb);
1043     return false;
1044   }
1045   if(cur->id < 1){
1046     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1047     BDBUNLOCKMETHOD(bdb);
1048     return false;
1049   }
1050   bool rv = tcbdbcurprevimpl(cur);
1051   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
1052   BDBUNLOCKMETHOD(bdb);
1053   if(adj && BDBLOCKMETHOD(bdb, true)){
1054     if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
1055     BDBUNLOCKMETHOD(bdb);
1056   }
1057   return rv;
1058 }
1059 
1060 
1061 /* Move a cursor object to the next record. */
tcbdbcurnext(BDBCUR * cur)1062 bool tcbdbcurnext(BDBCUR *cur){
1063   assert(cur);
1064   TCBDB *bdb = cur->bdb;
1065   if(!BDBLOCKMETHOD(bdb, false)) return false;
1066   if(!bdb->open){
1067     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1068     BDBUNLOCKMETHOD(bdb);
1069     return false;
1070   }
1071   if(cur->id < 1){
1072     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1073     BDBUNLOCKMETHOD(bdb);
1074     return false;
1075   }
1076   bool rv = tcbdbcurnextimpl(cur);
1077   bool adj = TCMAPRNUM(bdb->leafc) > bdb->lcnum || TCMAPRNUM(bdb->nodec) > bdb->ncnum;
1078   BDBUNLOCKMETHOD(bdb);
1079   if(adj && BDBLOCKMETHOD(bdb, true)){
1080     if(!bdb->tran && !tcbdbcacheadjust(bdb)) rv = false;
1081     BDBUNLOCKMETHOD(bdb);
1082   }
1083   return rv;
1084 }
1085 
1086 
1087 /* Insert a record around a cursor object. */
tcbdbcurput(BDBCUR * cur,const void * vbuf,int vsiz,int cpmode)1088 bool tcbdbcurput(BDBCUR *cur, const void *vbuf, int vsiz, int cpmode){
1089   assert(cur && vbuf && vsiz >= 0);
1090   TCBDB *bdb = cur->bdb;
1091   if(!BDBLOCKMETHOD(bdb, true)) return false;
1092   if(!bdb->open || !bdb->wmode){
1093     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1094     BDBUNLOCKMETHOD(bdb);
1095     return false;
1096   }
1097   if(cur->id < 1){
1098     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1099     BDBUNLOCKMETHOD(bdb);
1100     return false;
1101   }
1102   bool rv = tcbdbcurputimpl(cur, vbuf, vsiz, cpmode);
1103   BDBUNLOCKMETHOD(bdb);
1104   return rv;
1105 }
1106 
1107 
1108 /* Insert a string record around a cursor object. */
tcbdbcurput2(BDBCUR * cur,const char * vstr,int cpmode)1109 bool tcbdbcurput2(BDBCUR *cur, const char *vstr, int cpmode){
1110   assert(cur && vstr);
1111   return tcbdbcurput(cur, vstr, strlen(vstr), cpmode);
1112 }
1113 
1114 
1115 /* Delete the record where a cursor object is. */
tcbdbcurout(BDBCUR * cur)1116 bool tcbdbcurout(BDBCUR *cur){
1117   assert(cur);
1118   TCBDB *bdb = cur->bdb;
1119   if(!BDBLOCKMETHOD(bdb, true)) return false;
1120   if(!bdb->open || !bdb->wmode){
1121     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1122     BDBUNLOCKMETHOD(bdb);
1123     return false;
1124   }
1125   if(cur->id < 1){
1126     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1127     BDBUNLOCKMETHOD(bdb);
1128     return false;
1129   }
1130   bool rv = tcbdbcuroutimpl(cur);
1131   BDBUNLOCKMETHOD(bdb);
1132   return rv;
1133 }
1134 
1135 
1136 /* Get the key of the record where the cursor object is. */
tcbdbcurkey(BDBCUR * cur,int * sp)1137 void *tcbdbcurkey(BDBCUR *cur, int *sp){
1138   assert(cur && sp);
1139   TCBDB *bdb = cur->bdb;
1140   if(!BDBLOCKMETHOD(bdb, false)) return false;
1141   if(!bdb->open){
1142     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1143     BDBUNLOCKMETHOD(bdb);
1144     return false;
1145   }
1146   if(cur->id < 1){
1147     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1148     BDBUNLOCKMETHOD(bdb);
1149     return false;
1150   }
1151   const char *kbuf, *vbuf;
1152   int ksiz, vsiz;
1153   char *rv;
1154   if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1155     TCMEMDUP(rv, kbuf, ksiz);
1156     *sp = ksiz;
1157   } else {
1158     rv = NULL;
1159   }
1160   BDBUNLOCKMETHOD(bdb);
1161   return rv;
1162 }
1163 
1164 
1165 /* Get the key string of the record where the cursor object is. */
tcbdbcurkey2(BDBCUR * cur)1166 char *tcbdbcurkey2(BDBCUR *cur){
1167   assert(cur);
1168   int ksiz;
1169   return tcbdbcurkey(cur, &ksiz);
1170 }
1171 
1172 
1173 /* Get the key of the record where the cursor object is, as a volatile buffer. */
tcbdbcurkey3(BDBCUR * cur,int * sp)1174 const void *tcbdbcurkey3(BDBCUR *cur, int *sp){
1175   assert(cur && sp);
1176   TCBDB *bdb = cur->bdb;
1177   if(!BDBLOCKMETHOD(bdb, false)) return false;
1178   if(!bdb->open){
1179     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1180     BDBUNLOCKMETHOD(bdb);
1181     return false;
1182   }
1183   if(cur->id < 1){
1184     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1185     BDBUNLOCKMETHOD(bdb);
1186     return false;
1187   }
1188   const char *kbuf, *vbuf;
1189   int ksiz, vsiz;
1190   const char *rv;
1191   if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1192     rv = kbuf;
1193     *sp = ksiz;
1194   } else {
1195     rv = NULL;
1196   }
1197   BDBUNLOCKMETHOD(bdb);
1198   return rv;
1199 }
1200 
1201 
1202 /* Get the value of the record where the cursor object is. */
tcbdbcurval(BDBCUR * cur,int * sp)1203 void *tcbdbcurval(BDBCUR *cur, int *sp){
1204   assert(cur && sp);
1205   TCBDB *bdb = cur->bdb;
1206   if(!BDBLOCKMETHOD(bdb, false)) return false;
1207   if(!bdb->open){
1208     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1209     BDBUNLOCKMETHOD(bdb);
1210     return false;
1211   }
1212   if(cur->id < 1){
1213     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1214     BDBUNLOCKMETHOD(bdb);
1215     return false;
1216   }
1217   const char *kbuf, *vbuf;
1218   int ksiz, vsiz;
1219   char *rv;
1220   if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1221     TCMEMDUP(rv, vbuf, vsiz);
1222     *sp = vsiz;
1223   } else {
1224     rv = NULL;
1225   }
1226   BDBUNLOCKMETHOD(bdb);
1227   return rv;
1228 }
1229 
1230 
1231 /* Get the value string of the record where the cursor object is. */
tcbdbcurval2(BDBCUR * cur)1232 char *tcbdbcurval2(BDBCUR *cur){
1233   assert(cur);
1234   int vsiz;
1235   return tcbdbcurval(cur, &vsiz);
1236 }
1237 
1238 
1239 /* Get the value of the record where the cursor object is, as a volatile buffer. */
tcbdbcurval3(BDBCUR * cur,int * sp)1240 const void *tcbdbcurval3(BDBCUR *cur, int *sp){
1241   assert(cur && sp);
1242   TCBDB *bdb = cur->bdb;
1243   if(!BDBLOCKMETHOD(bdb, false)) return false;
1244   if(!bdb->open){
1245     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1246     BDBUNLOCKMETHOD(bdb);
1247     return false;
1248   }
1249   if(cur->id < 1){
1250     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1251     BDBUNLOCKMETHOD(bdb);
1252     return false;
1253   }
1254   const char *kbuf, *vbuf;
1255   int ksiz, vsiz;
1256   const char *rv;
1257   if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1258     rv = vbuf;
1259     *sp = vsiz;
1260   } else {
1261     rv = NULL;
1262   }
1263   BDBUNLOCKMETHOD(bdb);
1264   return rv;
1265 }
1266 
1267 
1268 /* Get the key and the value of the record where the cursor object is. */
tcbdbcurrec(BDBCUR * cur,TCXSTR * kxstr,TCXSTR * vxstr)1269 bool tcbdbcurrec(BDBCUR *cur, TCXSTR *kxstr, TCXSTR *vxstr){
1270   assert(cur && kxstr && vxstr);
1271   TCBDB *bdb = cur->bdb;
1272   if(!BDBLOCKMETHOD(bdb, false)) return false;
1273   if(!bdb->open){
1274     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1275     BDBUNLOCKMETHOD(bdb);
1276     return false;
1277   }
1278   if(cur->id < 1){
1279     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
1280     BDBUNLOCKMETHOD(bdb);
1281     return false;
1282   }
1283   const char *kbuf, *vbuf;
1284   int ksiz, vsiz;
1285   bool rv;
1286   if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
1287     tcxstrclear(kxstr);
1288     TCXSTRCAT(kxstr, kbuf, ksiz);
1289     tcxstrclear(vxstr);
1290     TCXSTRCAT(vxstr, vbuf, vsiz);
1291     rv = true;
1292   } else {
1293     rv = false;
1294   }
1295   BDBUNLOCKMETHOD(bdb);
1296   return rv;
1297 }
1298 
1299 
1300 
1301 /*************************************************************************************************
1302  * features for experts
1303  *************************************************************************************************/
1304 
1305 
1306 /* Set the error code of a B+ tree database object. */
tcbdbsetecode(TCBDB * bdb,int ecode,const char * filename,int line,const char * func)1307 void tcbdbsetecode(TCBDB *bdb, int ecode, const char *filename, int line, const char *func){
1308   assert(bdb && filename && line >= 1 && func);
1309   tchdbsetecode(bdb->hdb, ecode, filename, line, func);
1310 }
1311 
1312 
1313 /* Set the file descriptor for debugging output. */
tcbdbsetdbgfd(TCBDB * bdb,int fd)1314 void tcbdbsetdbgfd(TCBDB *bdb, int fd){
1315   assert(bdb && fd >= 0);
1316   tchdbsetdbgfd(bdb->hdb, fd);
1317 }
1318 
1319 
1320 /* Get the file descriptor for debugging output. */
tcbdbdbgfd(TCBDB * bdb)1321 int tcbdbdbgfd(TCBDB *bdb){
1322   assert(bdb);
1323   return tchdbdbgfd(bdb->hdb);
1324 }
1325 
1326 
1327 /* Check whether mutual exclusion control is set to a B+ tree database object. */
tcbdbhasmutex(TCBDB * bdb)1328 bool tcbdbhasmutex(TCBDB *bdb){
1329   assert(bdb);
1330   return bdb->mmtx != NULL;
1331 }
1332 
1333 
1334 /* Synchronize updating contents on memory of a B+ tree database object. */
tcbdbmemsync(TCBDB * bdb,bool phys)1335 bool tcbdbmemsync(TCBDB *bdb, bool phys){
1336   assert(bdb);
1337   if(!bdb->open || !bdb->wmode){
1338     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1339     return false;
1340   }
1341   bool err = false;
1342   bool clk = BDBLOCKCACHE(bdb);
1343   const char *vbuf;
1344   int vsiz;
1345   TCMAP *leafc = bdb->leafc;
1346   tcmapiterinit(leafc);
1347   while((vbuf = tcmapiternext(leafc, &vsiz)) != NULL){
1348     int rsiz;
1349     BDBLEAF *leaf = (BDBLEAF *)tcmapiterval(vbuf, &rsiz);
1350     if(leaf->dirty && !tcbdbleafsave(bdb, leaf)) err = true;
1351   }
1352   TCMAP *nodec = bdb->nodec;
1353   tcmapiterinit(nodec);
1354   while((vbuf = tcmapiternext(nodec, &vsiz)) != NULL){
1355     int rsiz;
1356     BDBNODE *node = (BDBNODE *)tcmapiterval(vbuf, &rsiz);
1357     if(node->dirty && !tcbdbnodesave(bdb, node)) err = true;
1358   }
1359   if(clk) BDBUNLOCKCACHE(bdb);
1360   tcbdbdumpmeta(bdb);
1361   if(!tchdbmemsync(bdb->hdb, phys)) err = true;
1362   return !err;
1363 }
1364 
1365 
1366 /* Get the comparison function of a B+ tree database object. */
tcbdbcmpfunc(TCBDB * bdb)1367 TCCMP tcbdbcmpfunc(TCBDB *bdb){
1368   assert(bdb);
1369   return bdb->cmp;
1370 }
1371 
1372 
1373 /* Get the opaque object for the comparison function of a B+ tree database object. */
tcbdbcmpop(TCBDB * bdb)1374 void *tcbdbcmpop(TCBDB *bdb){
1375   assert(bdb);
1376   return bdb->cmpop;
1377 }
1378 
1379 
1380 /* Get the maximum number of cached leaf nodes of a B+ tree database object. */
tcbdblmemb(TCBDB * bdb)1381 uint32_t tcbdblmemb(TCBDB *bdb){
1382   assert(bdb);
1383   return bdb->lmemb;
1384 }
1385 
1386 
1387 /* Get the maximum number of cached non-leaf nodes of a B+ tree database object. */
tcbdbnmemb(TCBDB * bdb)1388 uint32_t tcbdbnmemb(TCBDB *bdb){
1389   assert(bdb);
1390   return bdb->nmemb;
1391 }
1392 
1393 
1394 /* Get the number of the leaf nodes of B+ tree database object. */
tcbdblnum(TCBDB * bdb)1395 uint64_t tcbdblnum(TCBDB *bdb){
1396   assert(bdb);
1397   if(!bdb->open){
1398     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1399     return 0;
1400   }
1401   return bdb->lnum;
1402 }
1403 
1404 
1405 /* Get the number of the non-leaf nodes of B+ tree database object. */
tcbdbnnum(TCBDB * bdb)1406 uint64_t tcbdbnnum(TCBDB *bdb){
1407   assert(bdb);
1408   if(!bdb->open){
1409     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1410     return 0;
1411   }
1412   return bdb->nnum;
1413 }
1414 
1415 
1416 /* Get the number of elements of the bucket array of a B+ tree database object. */
tcbdbbnum(TCBDB * bdb)1417 uint64_t tcbdbbnum(TCBDB *bdb){
1418   assert(bdb);
1419   if(!bdb->open){
1420     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1421     return 0;
1422   }
1423   return tchdbbnum(bdb->hdb);
1424 }
1425 
1426 
1427 /* Get the record alignment of a B+ tree database object. */
tcbdbalign(TCBDB * bdb)1428 uint32_t tcbdbalign(TCBDB *bdb){
1429   assert(bdb);
1430   if(!bdb->open){
1431     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1432     return 0;
1433   }
1434   return tchdbalign(bdb->hdb);
1435 }
1436 
1437 
1438 /* Get the maximum number of the free block pool of a B+ tree database object. */
tcbdbfbpmax(TCBDB * bdb)1439 uint32_t tcbdbfbpmax(TCBDB *bdb){
1440   assert(bdb);
1441   if(!bdb->open){
1442     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1443     return 0;
1444   }
1445   return tchdbfbpmax(bdb->hdb);
1446 }
1447 
1448 
1449 /* Get the inode number of the database file of a B+ tree database object. */
tcbdbinode(TCBDB * bdb)1450 uint64_t tcbdbinode(TCBDB *bdb){
1451   assert(bdb);
1452   if(!bdb->open){
1453     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1454     return 0;
1455   }
1456   return tchdbinode(bdb->hdb);
1457 }
1458 
1459 
1460 /* Get the modification time of the database file of a B+ tree database object. */
tcbdbmtime(TCBDB * bdb)1461 time_t tcbdbmtime(TCBDB *bdb){
1462   assert(bdb);
1463   if(!bdb->open){
1464     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1465     return 0;
1466   }
1467   return tchdbmtime(bdb->hdb);
1468 }
1469 
1470 
1471 /* Get the additional flags of a B+ tree database object. */
tcbdbflags(TCBDB * bdb)1472 uint8_t tcbdbflags(TCBDB *bdb){
1473   assert(bdb);
1474   if(!bdb->open){
1475     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1476     return 0;
1477   }
1478   return tchdbflags(bdb->hdb);
1479 }
1480 
1481 
1482 /* Get the options of a B+ tree database object. */
tcbdbopts(TCBDB * bdb)1483 uint8_t tcbdbopts(TCBDB *bdb){
1484   assert(bdb);
1485   if(!bdb->open){
1486     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1487     return 0;
1488   }
1489   return bdb->opts;
1490 }
1491 
1492 
1493 /* Get the pointer to the opaque field of a B+ tree database object. */
tcbdbopaque(TCBDB * bdb)1494 char *tcbdbopaque(TCBDB *bdb){
1495   assert(bdb);
1496   if(!bdb->open){
1497     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1498     return NULL;
1499   }
1500   return bdb->opaque + BDBOPAQUESIZ;
1501 }
1502 
1503 
1504 /* Get the number of used elements of the bucket array of a B+ tree database object. */
tcbdbbnumused(TCBDB * bdb)1505 uint64_t tcbdbbnumused(TCBDB *bdb){
1506   assert(bdb);
1507   if(!bdb->open){
1508     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1509     return 0;
1510   }
1511   return tchdbbnumused(bdb->hdb);
1512 }
1513 
1514 
1515 /* Set the maximum size of each leaf node. */
tcbdbsetlsmax(TCBDB * bdb,uint32_t lsmax)1516 bool tcbdbsetlsmax(TCBDB *bdb, uint32_t lsmax){
1517   assert(bdb);
1518   if(bdb->open){
1519     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1520     return false;
1521   }
1522   bdb->lsmax = (lsmax > 0) ? tclmax(lsmax, BDBMINLSMAX) : BDBDEFLSMAX;
1523   return true;
1524 }
1525 
1526 
1527 /* Set the capacity number of records. */
tcbdbsetcapnum(TCBDB * bdb,uint64_t capnum)1528 bool tcbdbsetcapnum(TCBDB *bdb, uint64_t capnum){
1529   assert(bdb);
1530   if(bdb->open){
1531     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1532     return false;
1533   }
1534   bdb->capnum = capnum;
1535   return true;
1536 }
1537 
1538 
1539 /* Set the custom codec functions of a B+ tree database object. */
tcbdbsetcodecfunc(TCBDB * bdb,TCCODEC enc,void * encop,TCCODEC dec,void * decop)1540 bool tcbdbsetcodecfunc(TCBDB *bdb, TCCODEC enc, void *encop, TCCODEC dec, void *decop){
1541   assert(bdb && enc && dec);
1542   if(!BDBLOCKMETHOD(bdb, true)) return false;
1543   if(bdb->open){
1544     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1545     BDBUNLOCKMETHOD(bdb);
1546     return false;
1547   }
1548   bool rv = tchdbsetcodecfunc(bdb->hdb, enc, encop, dec, decop);
1549   BDBUNLOCKMETHOD(bdb);
1550   return rv;
1551 }
1552 
1553 
1554 /* Get the unit step number of auto defragmentation of a B+ tree database object. */
tcbdbdfunit(TCBDB * bdb)1555 uint32_t tcbdbdfunit(TCBDB *bdb){
1556   assert(bdb);
1557   return tchdbdfunit(bdb->hdb);
1558 }
1559 
1560 
1561 /* Perform dynamic defragmentation of a B+ tree database object. */
tcbdbdefrag(TCBDB * bdb,int64_t step)1562 bool tcbdbdefrag(TCBDB *bdb, int64_t step){
1563   assert(bdb);
1564   if(!BDBLOCKMETHOD(bdb, false)) return false;
1565   if(!bdb->open || !bdb->wmode){
1566     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1567     BDBUNLOCKMETHOD(bdb);
1568     return false;
1569   }
1570   bool rv = tchdbdefrag(bdb->hdb, step);
1571   BDBUNLOCKMETHOD(bdb);
1572   return rv;
1573 }
1574 
1575 
1576 /* Clear the cache of a B+ tree database object. */
tcbdbcacheclear(TCBDB * bdb)1577 bool tcbdbcacheclear(TCBDB *bdb){
1578   assert(bdb);
1579   if(!BDBLOCKMETHOD(bdb, true)) return false;
1580   if(!bdb->open){
1581     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1582     BDBUNLOCKMETHOD(bdb);
1583     return false;
1584   }
1585   BDBTHREADYIELD(bdb);
1586   bool err = false;
1587   bool tran = bdb->tran;
1588   if(TCMAPRNUM(bdb->leafc) > 0){
1589     bool clk = BDBLOCKCACHE(bdb);
1590     TCMAP *leafc = bdb->leafc;
1591     tcmapiterinit(leafc);
1592     int rsiz;
1593     const void *buf;
1594     while((buf = tcmapiternext(leafc, &rsiz)) != NULL){
1595       BDBLEAF *leaf = (BDBLEAF *)tcmapiterval(buf, &rsiz);
1596       if(!(tran && leaf->dirty) && !tcbdbleafcacheout(bdb, leaf)) err = true;
1597     }
1598     if(clk) BDBUNLOCKCACHE(bdb);
1599   }
1600   if(TCMAPRNUM(bdb->nodec) > 0){
1601     bool clk = BDBLOCKCACHE(bdb);
1602     TCMAP *nodec = bdb->nodec;
1603     tcmapiterinit(nodec);
1604     int rsiz;
1605     const void *buf;
1606     while((buf = tcmapiternext(nodec, &rsiz)) != NULL){
1607       BDBNODE *node = (BDBNODE *)tcmapiterval(buf, &rsiz);
1608       if(!(tran && node->dirty) && !tcbdbnodecacheout(bdb, node)) err = true;
1609     }
1610     if(clk) BDBUNLOCKCACHE(bdb);
1611   }
1612   BDBUNLOCKMETHOD(bdb);
1613   return !err;
1614 }
1615 
1616 
1617 /* Store a new record into a B+ tree database object with backward duplication. */
tcbdbputdupback(TCBDB * bdb,const void * kbuf,int ksiz,const void * vbuf,int vsiz)1618 bool tcbdbputdupback(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz){
1619   assert(bdb && kbuf && ksiz >= 0 && vbuf && vsiz >= 0);
1620   if(!BDBLOCKMETHOD(bdb, true)) return false;
1621   if(!bdb->open || !bdb->wmode){
1622     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1623     BDBUNLOCKMETHOD(bdb);
1624     return false;
1625   }
1626   bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDDUPB);
1627   BDBUNLOCKMETHOD(bdb);
1628   return rv;
1629 }
1630 
1631 
1632 /* Store a record into a B+ tree database object with a duplication handler. */
tcbdbputproc(TCBDB * bdb,const void * kbuf,int ksiz,const void * vbuf,int vsiz,TCPDPROC proc,void * op)1633 bool tcbdbputproc(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz,
1634                   TCPDPROC proc, void *op){
1635   assert(bdb && kbuf && ksiz >= 0 && proc);
1636   if(!BDBLOCKMETHOD(bdb, true)) return false;
1637   if(!bdb->open || !bdb->wmode){
1638     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1639     BDBUNLOCKMETHOD(bdb);
1640     return false;
1641   }
1642   BDBPDPROCOP procop;
1643   procop.proc = proc;
1644   procop.op = op;
1645   BDBPDPROCOP *procptr = &procop;
1646   tcgeneric_t stack[(TCNUMBUFSIZ*2)/sizeof(tcgeneric_t)+1];
1647   char *rbuf;
1648   if(ksiz <= sizeof(stack) - sizeof(procptr)){
1649     rbuf = (char *)stack;
1650   } else {
1651     TCMALLOC(rbuf, ksiz + sizeof(procptr));
1652   }
1653   char *wp = rbuf;
1654   memcpy(wp, &procptr, sizeof(procptr));
1655   wp += sizeof(procptr);
1656   memcpy(wp, kbuf, ksiz);
1657   kbuf = rbuf + sizeof(procptr);
1658   bool rv = tcbdbputimpl(bdb, kbuf, ksiz, vbuf, vsiz, BDBPDPROC);
1659   if(rbuf != (char *)stack) TCFREE(rbuf);
1660   BDBUNLOCKMETHOD(bdb);
1661   return rv;
1662 }
1663 
1664 
1665 /* Store a new string record into a B+ tree database object with backward duplication. */
tcbdbputdupback2(TCBDB * bdb,const char * kstr,const char * vstr)1666 bool tcbdbputdupback2(TCBDB *bdb, const char *kstr, const char *vstr){
1667   assert(bdb && kstr && vstr);
1668   return tcbdbputdupback(bdb, kstr, strlen(kstr), vstr, strlen(vstr));
1669 }
1670 
1671 
1672 /* Move a cursor object to the rear of records corresponding a key. */
tcbdbcurjumpback(BDBCUR * cur,const void * kbuf,int ksiz)1673 bool tcbdbcurjumpback(BDBCUR *cur, const void *kbuf, int ksiz){
1674   assert(cur && kbuf && ksiz >= 0);
1675   TCBDB *bdb = cur->bdb;
1676   if(!BDBLOCKMETHOD(bdb, false)) return false;
1677   if(!bdb->open){
1678     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1679     BDBUNLOCKMETHOD(bdb);
1680     return false;
1681   }
1682   bool rv = tcbdbcurjumpimpl(cur, kbuf, ksiz, false);
1683   BDBUNLOCKMETHOD(bdb);
1684   return rv;
1685 }
1686 
1687 
1688 /* Move a cursor object to the rear of records corresponding a key string. */
tcbdbcurjumpback2(BDBCUR * cur,const char * kstr)1689 bool tcbdbcurjumpback2(BDBCUR *cur, const char *kstr){
1690   assert(cur && kstr);
1691   return tcbdbcurjumpback(cur, kstr, strlen(kstr));
1692 }
1693 
1694 
1695 /* Process each record atomically of a B+ tree database object. */
tcbdbforeach(TCBDB * bdb,TCITER iter,void * op)1696 bool tcbdbforeach(TCBDB *bdb, TCITER iter, void *op){
1697   assert(bdb && iter);
1698   if(!BDBLOCKMETHOD(bdb, true)) return false;
1699   if(!bdb->open){
1700     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
1701     BDBUNLOCKMETHOD(bdb);
1702     return false;
1703   }
1704   BDBTHREADYIELD(bdb);
1705   bool rv = tcbdbforeachimpl(bdb, iter, op);
1706   BDBUNLOCKMETHOD(bdb);
1707   return rv;
1708 }
1709 
1710 
1711 
1712 /*************************************************************************************************
1713  * private features
1714  *************************************************************************************************/
1715 
1716 
1717 /* Clear all members.
1718    `bdb' specifies the B+ tree database object. */
tcbdbclear(TCBDB * bdb)1719 static void tcbdbclear(TCBDB *bdb){
1720   assert(bdb);
1721   bdb->mmtx = NULL;
1722   bdb->cmtx = NULL;
1723   bdb->hdb = NULL;
1724   bdb->opaque = NULL;
1725   bdb->open = false;
1726   bdb->wmode = false;
1727   bdb->lmemb = BDBDEFLMEMB;
1728   bdb->nmemb = BDBDEFNMEMB;
1729   bdb->opts = 0;
1730   bdb->root = 0;
1731   bdb->first = 0;
1732   bdb->last = 0;
1733   bdb->lnum = 0;
1734   bdb->nnum = 0;
1735   bdb->rnum = 0;
1736   bdb->leafc = NULL;
1737   bdb->nodec = NULL;
1738   bdb->cmp = NULL;
1739   bdb->cmpop = NULL;
1740   bdb->lcnum = BDBDEFLCNUM;
1741   bdb->ncnum = BDBDEFNCNUM;
1742   bdb->lsmax = BDBDEFLSMAX;
1743   bdb->lschk = 0;
1744   bdb->capnum = 0;
1745   bdb->hist = NULL;
1746   bdb->hnum = 0;
1747   bdb->hleaf = 0;
1748   bdb->lleaf = 0;
1749   bdb->tran = false;
1750   bdb->rbopaque = NULL;
1751   bdb->clock = 0;
1752   bdb->cnt_saveleaf = -1;
1753   bdb->cnt_loadleaf = -1;
1754   bdb->cnt_killleaf = -1;
1755   bdb->cnt_adjleafc = -1;
1756   bdb->cnt_savenode = -1;
1757   bdb->cnt_loadnode = -1;
1758   bdb->cnt_adjnodec = -1;
1759   TCDODEBUG(bdb->cnt_saveleaf = 0);
1760   TCDODEBUG(bdb->cnt_loadleaf = 0);
1761   TCDODEBUG(bdb->cnt_killleaf = 0);
1762   TCDODEBUG(bdb->cnt_adjleafc = 0);
1763   TCDODEBUG(bdb->cnt_savenode = 0);
1764   TCDODEBUG(bdb->cnt_loadnode = 0);
1765   TCDODEBUG(bdb->cnt_adjnodec = 0);
1766 }
1767 
1768 
1769 /* Serialize meta data into the opaque field.
1770    `bdb' specifies the B+ tree database object. */
tcbdbdumpmeta(TCBDB * bdb)1771 static void tcbdbdumpmeta(TCBDB *bdb){
1772   assert(bdb);
1773   memset(bdb->opaque, 0, 64);
1774   char *wp = bdb->opaque;
1775   if(bdb->cmp == tccmplexical){
1776     *(uint8_t *)(wp++) = 0x0;
1777   } else if(bdb->cmp == tccmpdecimal){
1778     *(uint8_t *)(wp++) = 0x1;
1779   } else if(bdb->cmp == tccmpint32){
1780     *(uint8_t *)(wp++) = 0x2;
1781   } else if(bdb->cmp == tccmpint64){
1782     *(uint8_t *)(wp++) = 0x3;
1783   } else {
1784     *(uint8_t *)(wp++) = 0xff;
1785   }
1786   wp += 7;
1787   uint32_t lnum;
1788   lnum = bdb->lmemb;
1789   lnum = TCHTOIL(lnum);
1790   memcpy(wp, &lnum, sizeof(lnum));
1791   wp += sizeof(lnum);
1792   lnum = bdb->nmemb;
1793   lnum = TCHTOIL(lnum);
1794   memcpy(wp, &lnum, sizeof(lnum));
1795   wp += sizeof(lnum);
1796   uint64_t llnum;
1797   llnum = bdb->root;
1798   llnum = TCHTOILL(llnum);
1799   memcpy(wp, &llnum, sizeof(llnum));
1800   wp += sizeof(llnum);
1801   llnum = bdb->first;
1802   llnum = TCHTOILL(llnum);
1803   memcpy(wp, &llnum, sizeof(llnum));
1804   wp += sizeof(llnum);
1805   llnum = bdb->last;
1806   llnum = TCHTOILL(llnum);
1807   memcpy(wp, &llnum, sizeof(llnum));
1808   wp += sizeof(llnum);
1809   llnum = bdb->lnum;
1810   llnum = TCHTOILL(llnum);
1811   memcpy(wp, &llnum, sizeof(llnum));
1812   wp += sizeof(llnum);
1813   llnum = bdb->nnum;
1814   llnum = TCHTOILL(llnum);
1815   memcpy(wp, &llnum, sizeof(llnum));
1816   wp += sizeof(llnum);
1817   llnum = bdb->rnum;
1818   llnum = TCHTOILL(llnum);
1819   memcpy(wp, &llnum, sizeof(llnum));
1820   wp += sizeof(llnum);
1821 }
1822 
1823 
1824 /* Deserialize meta data from the opaque field.
1825    `bdb' specifies the B+ tree database object. */
tcbdbloadmeta(TCBDB * bdb)1826 static void tcbdbloadmeta(TCBDB *bdb){
1827   const char *rp = bdb->opaque;
1828   uint8_t cnum = *(uint8_t *)(rp++);
1829   if(cnum == 0x0){
1830     bdb->cmp = tccmplexical;
1831   } else if(cnum == 0x1){
1832     bdb->cmp = tccmpdecimal;
1833   } else if(cnum == 0x2){
1834     bdb->cmp = tccmpint32;
1835   } else if(cnum == 0x3){
1836     bdb->cmp = tccmpint64;
1837   }
1838   rp += 7;
1839   uint32_t lnum;
1840   memcpy(&lnum, rp, sizeof(lnum));
1841   rp += sizeof(lnum);
1842   bdb->lmemb = TCITOHL(lnum);
1843   memcpy(&lnum, rp, sizeof(lnum));
1844   rp += sizeof(lnum);
1845   bdb->nmemb = TCITOHL(lnum);
1846   uint64_t llnum;
1847   memcpy(&llnum, rp, sizeof(llnum));
1848   bdb->root = TCITOHLL(llnum);
1849   rp += sizeof(llnum);
1850   memcpy(&llnum, rp, sizeof(llnum));
1851   bdb->first = TCITOHLL(llnum);
1852   rp += sizeof(llnum);
1853   memcpy(&llnum, rp, sizeof(llnum));
1854   bdb->last = TCITOHLL(llnum);
1855   rp += sizeof(llnum);
1856   memcpy(&llnum, rp, sizeof(llnum));
1857   bdb->lnum = TCITOHLL(llnum);
1858   rp += sizeof(llnum);
1859   memcpy(&llnum, rp, sizeof(llnum));
1860   bdb->nnum = TCITOHLL(llnum);
1861   rp += sizeof(llnum);
1862   memcpy(&llnum, rp, sizeof(llnum));
1863   bdb->rnum = TCITOHLL(llnum);
1864   rp += sizeof(llnum);
1865 }
1866 
1867 
1868 /* Create a new leaf.
1869    `bdb' specifies the B+ tree database object.
1870    `prev' specifies the ID number of the previous leaf.
1871    `next' specifies the ID number of the next leaf.
1872    The return value is the new leaf object. */
tcbdbleafnew(TCBDB * bdb,uint64_t prev,uint64_t next)1873 static BDBLEAF *tcbdbleafnew(TCBDB *bdb, uint64_t prev, uint64_t next){
1874   assert(bdb);
1875   BDBLEAF lent;
1876   lent.id = ++bdb->lnum;
1877   lent.recs = tcptrlistnew2(bdb->lmemb + 1);
1878   lent.size = 0;
1879   lent.prev = prev;
1880   lent.next = next;
1881   lent.dirty = true;
1882   lent.dead = false;
1883   tcmapputkeep(bdb->leafc, &(lent.id), sizeof(lent.id), &lent, sizeof(lent));
1884   int rsiz;
1885   return (BDBLEAF *)tcmapget(bdb->leafc, &(lent.id), sizeof(lent.id), &rsiz);
1886 }
1887 
1888 
1889 /* Remove a leaf from the cache.
1890    `bdb' specifies the B+ tree database object.
1891    `leaf' specifies the leaf object.
1892    If successful, the return value is true, else, it is false. */
tcbdbleafcacheout(TCBDB * bdb,BDBLEAF * leaf)1893 static bool tcbdbleafcacheout(TCBDB *bdb, BDBLEAF *leaf){
1894   assert(bdb && leaf);
1895   bool err = false;
1896   if(leaf->dirty && !tcbdbleafsave(bdb, leaf)) err = true;
1897   TCPTRLIST *recs = leaf->recs;
1898   int ln = TCPTRLISTNUM(recs);
1899   for(int i = 0; i < ln; i++){
1900     BDBREC *rec = TCPTRLISTVAL(recs, i);
1901     if(rec->rest) tclistdel(rec->rest);
1902     TCFREE(rec);
1903   }
1904   tcptrlistdel(recs);
1905   tcmapout(bdb->leafc, &(leaf->id), sizeof(leaf->id));
1906   return !err;
1907 }
1908 
1909 
1910 /* Save a leaf into the internal database.
1911    `bdb' specifies the B+ tree database object.
1912    `leaf' specifies the leaf object.
1913    If successful, the return value is true, else, it is false. */
tcbdbleafsave(TCBDB * bdb,BDBLEAF * leaf)1914 static bool tcbdbleafsave(TCBDB *bdb, BDBLEAF *leaf){
1915   assert(bdb && leaf);
1916   TCDODEBUG(bdb->cnt_saveleaf++);
1917   TCXSTR *rbuf = tcxstrnew3(BDBPAGEBUFSIZ);
1918   char hbuf[(sizeof(uint64_t)+1)*3];
1919   char *wp = hbuf;
1920   uint64_t llnum;
1921   int step;
1922   llnum = leaf->prev;
1923   TCSETVNUMBUF64(step, wp, llnum);
1924   wp += step;
1925   llnum = leaf->next;
1926   TCSETVNUMBUF64(step, wp, llnum);
1927   wp += step;
1928   TCXSTRCAT(rbuf, hbuf, wp - hbuf);
1929   TCPTRLIST *recs = leaf->recs;
1930   int ln = TCPTRLISTNUM(recs);
1931   for(int i = 0; i < ln; i++){
1932     BDBREC *rec = TCPTRLISTVAL(recs, i);
1933     char *dbuf = (char *)rec + sizeof(*rec);
1934     int lnum;
1935     wp = hbuf;
1936     lnum = rec->ksiz;
1937     TCSETVNUMBUF(step, wp, lnum);
1938     wp += step;
1939     lnum = rec->vsiz;
1940     TCSETVNUMBUF(step, wp, lnum);
1941     wp += step;
1942     TCLIST *rest = rec->rest;
1943     int rnum = rest ? TCLISTNUM(rest) : 0;
1944     TCSETVNUMBUF(step, wp, rnum);
1945     wp += step;
1946     TCXSTRCAT(rbuf, hbuf, wp - hbuf);
1947     TCXSTRCAT(rbuf, dbuf, rec->ksiz);
1948     TCXSTRCAT(rbuf, dbuf + rec->ksiz + TCALIGNPAD(rec->ksiz), rec->vsiz);
1949     for(int j = 0; j < rnum; j++){
1950       const char *vbuf;
1951       int vsiz;
1952       TCLISTVAL(vbuf, rest, j, vsiz);
1953       TCSETVNUMBUF(step, hbuf, vsiz);
1954       TCXSTRCAT(rbuf, hbuf, step);
1955       TCXSTRCAT(rbuf, vbuf, vsiz);
1956     }
1957   }
1958   bool err = false;
1959   step = sprintf(hbuf, "%llx", (unsigned long long)leaf->id);
1960   if(ln < 1 && !tchdbout(bdb->hdb, hbuf, step) && tchdbecode(bdb->hdb) != TCENOREC)
1961     err = true;
1962   if(!leaf->dead && !tchdbput(bdb->hdb, hbuf, step, TCXSTRPTR(rbuf), TCXSTRSIZE(rbuf)))
1963     err = true;
1964   tcxstrdel(rbuf);
1965   leaf->dirty = false;
1966   leaf->dead = false;
1967   return !err;
1968 }
1969 
1970 
1971 /* Load a leaf from the internal database.
1972    `bdb' specifies the B+ tree database object.
1973    `id' specifies the ID number of the leaf.
1974    The return value is the leaf object or `NULL' on failure. */
tcbdbleafload(TCBDB * bdb,uint64_t id)1975 static BDBLEAF *tcbdbleafload(TCBDB *bdb, uint64_t id){
1976   assert(bdb && id > 0);
1977   bool clk = BDBLOCKCACHE(bdb);
1978   int rsiz;
1979   BDBLEAF *leaf = (BDBLEAF *)tcmapget3(bdb->leafc, &id, sizeof(id), &rsiz);
1980   if(leaf){
1981     if(clk) BDBUNLOCKCACHE(bdb);
1982     return leaf;
1983   }
1984   if(clk) BDBUNLOCKCACHE(bdb);
1985   TCDODEBUG(bdb->cnt_loadleaf++);
1986   char hbuf[(sizeof(uint64_t)+1)*3];
1987   int step;
1988   step = sprintf(hbuf, "%llx", (unsigned long long)id);
1989   char *rbuf = NULL;
1990   char wbuf[BDBPAGEBUFSIZ];
1991   const char *rp = NULL;
1992   rsiz = tchdbget3(bdb->hdb, hbuf, step, wbuf, BDBPAGEBUFSIZ);
1993   if(rsiz < 1){
1994     tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
1995     return false;
1996   } else if(rsiz < BDBPAGEBUFSIZ){
1997     rp = wbuf;
1998   } else {
1999     if(!(rbuf = tchdbget(bdb->hdb, hbuf, step, &rsiz))){
2000       tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2001       return false;
2002     }
2003     rp = rbuf;
2004   }
2005   BDBLEAF lent;
2006   lent.id = id;
2007   uint64_t llnum;
2008   TCREADVNUMBUF64(rp, llnum, step);
2009   lent.prev = llnum;
2010   rp += step;
2011   rsiz -= step;
2012   TCREADVNUMBUF64(rp, llnum, step);
2013   lent.next = llnum;
2014   rp += step;
2015   rsiz -= step;
2016   lent.dirty = false;
2017   lent.dead = false;
2018   lent.recs = tcptrlistnew2(bdb->lmemb + 1);
2019   lent.size = 0;
2020   bool err = false;
2021   while(rsiz >= 3){
2022     int ksiz;
2023     TCREADVNUMBUF(rp, ksiz, step);
2024     rp += step;
2025     rsiz -= step;
2026     int vsiz;
2027     TCREADVNUMBUF(rp, vsiz, step);
2028     rp += step;
2029     rsiz -= step;
2030     int rnum;
2031     TCREADVNUMBUF(rp, rnum, step);
2032     rp += step;
2033     rsiz -= step;
2034     if(rsiz < ksiz + vsiz + rnum){
2035       err = true;
2036       break;
2037     }
2038     int psiz = TCALIGNPAD(ksiz);
2039     BDBREC *nrec;
2040     TCMALLOC(nrec, sizeof(*nrec) + ksiz + psiz + vsiz + 1);
2041     char *dbuf = (char *)nrec + sizeof(*nrec);
2042     memcpy(dbuf, rp, ksiz);
2043     dbuf[ksiz] = '\0';
2044     nrec->ksiz = ksiz;
2045     rp += ksiz;
2046     rsiz -= ksiz;
2047     memcpy(dbuf + ksiz + psiz, rp, vsiz);
2048     dbuf[ksiz+psiz+vsiz] = '\0';
2049     nrec->vsiz = vsiz;
2050     rp += vsiz;
2051     rsiz -= vsiz;
2052     lent.size += ksiz;
2053     lent.size += vsiz;
2054     if(rnum > 0){
2055       nrec->rest = tclistnew2(rnum);
2056       while(rnum-- > 0 && rsiz > 0){
2057         TCREADVNUMBUF(rp, vsiz, step);
2058         rp += step;
2059         rsiz -= step;
2060         if(rsiz < vsiz){
2061           err = true;
2062           break;
2063         }
2064         TCLISTPUSH(nrec->rest, rp, vsiz);
2065         rp += vsiz;
2066         rsiz -= vsiz;
2067         lent.size += vsiz;
2068       }
2069     } else {
2070       nrec->rest = NULL;
2071     }
2072     TCPTRLISTPUSH(lent.recs, nrec);
2073   }
2074   TCFREE(rbuf);
2075   if(err || rsiz != 0){
2076     tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2077     return NULL;
2078   }
2079   clk = BDBLOCKCACHE(bdb);
2080   if(!tcmapputkeep(bdb->leafc, &(lent.id), sizeof(lent.id), &lent, sizeof(lent))){
2081     int ln = TCPTRLISTNUM(lent.recs);
2082     for(int i = 0; i < ln; i++){
2083       BDBREC *rec = TCPTRLISTVAL(lent.recs, i);
2084       if(rec->rest) tclistdel(rec->rest);
2085       TCFREE(rec);
2086     }
2087     tcptrlistdel(lent.recs);
2088   }
2089   leaf = (BDBLEAF *)tcmapget(bdb->leafc, &(lent.id), sizeof(lent.id), &rsiz);
2090   if(clk) BDBUNLOCKCACHE(bdb);
2091   return leaf;
2092 }
2093 
2094 
2095 /* Check existence of a leaf in the internal database.
2096    `bdb' specifies the B+ tree database object.
2097    `id' specifies the ID number of the leaf.
2098    The return value is true if the leaf exists, else, it is false. */
tcbdbleafcheck(TCBDB * bdb,uint64_t id)2099 static bool tcbdbleafcheck(TCBDB *bdb, uint64_t id){
2100   assert(bdb && id > 0);
2101   bool clk = BDBLOCKCACHE(bdb);
2102   int rsiz;
2103   BDBLEAF *leaf = (BDBLEAF *)tcmapget(bdb->leafc, &id, sizeof(id), &rsiz);
2104   if(clk) BDBUNLOCKCACHE(bdb);
2105   if(leaf) return true;
2106   char hbuf[(sizeof(uint64_t)+1)*3];
2107   int step = sprintf(hbuf, "%llx", (unsigned long long)id);
2108   return tchdbvsiz(bdb->hdb, hbuf, step) > 0;
2109 }
2110 
2111 
2112 /* Load the historical leaf from the internal database.
2113    `bdb' specifies the B+ tree database object.
2114    `kbuf' specifies the pointer to the region of the key.
2115    `ksiz' specifies the size of the region of the key.
2116    `id' specifies the ID number of the historical leaf.
2117    If successful, the return value is the pointer to the leaf, else, it is `NULL'. */
tcbdbgethistleaf(TCBDB * bdb,const char * kbuf,int ksiz,uint64_t id)2118 static BDBLEAF *tcbdbgethistleaf(TCBDB *bdb, const char *kbuf, int ksiz, uint64_t id){
2119   assert(bdb && kbuf && ksiz >= 0 && id > 0);
2120   BDBLEAF *leaf = tcbdbleafload(bdb, id);
2121   if(!leaf) return NULL;
2122   int ln = TCPTRLISTNUM(leaf->recs);
2123   if(ln < 2) return NULL;
2124   BDBREC *rec = TCPTRLISTVAL(leaf->recs, 0);
2125   char *dbuf = (char *)rec + sizeof(*rec);
2126   int rv;
2127   if(bdb->cmp == tccmplexical){
2128     TCCMPLEXICAL(rv, kbuf, ksiz, dbuf, rec->ksiz);
2129   } else {
2130     rv = bdb->cmp(kbuf, ksiz, dbuf, rec->ksiz, bdb->cmpop);
2131   }
2132   if(rv == 0) return leaf;
2133   if(rv < 0) return NULL;
2134   rec = TCPTRLISTVAL(leaf->recs, ln - 1);
2135   dbuf = (char *)rec + sizeof(*rec);
2136   if(bdb->cmp == tccmplexical){
2137     TCCMPLEXICAL(rv, kbuf, ksiz, dbuf, rec->ksiz);
2138   } else {
2139     rv = bdb->cmp(kbuf, ksiz, dbuf, rec->ksiz, bdb->cmpop);
2140   }
2141   if(rv <= 0 || leaf->next < 1) return leaf;
2142   return NULL;
2143 }
2144 
2145 
2146 /* Add a record to a leaf.
2147    `bdb' specifies the B+ tree database object.
2148    `leaf' specifies the leaf object.
2149    `dmode' specifies behavior when the key overlaps.
2150    `kbuf' specifies the pointer to the region of the key.
2151    `ksiz' specifies the size of the region of the key.
2152    `vbuf' specifies the pointer to the region of the value.
2153    `vsiz' specifies the size of the region of the value.
2154    If successful, the return value is true, else, it is false. */
tcbdbleafaddrec(TCBDB * bdb,BDBLEAF * leaf,int dmode,const char * kbuf,int ksiz,const char * vbuf,int vsiz)2155 static bool tcbdbleafaddrec(TCBDB *bdb, BDBLEAF *leaf, int dmode,
2156                             const char *kbuf, int ksiz, const char *vbuf, int vsiz){
2157   assert(bdb && leaf && kbuf && ksiz >= 0);
2158   TCCMP cmp = bdb->cmp;
2159   void *cmpop = bdb->cmpop;
2160   TCPTRLIST *recs = leaf->recs;
2161   int ln = TCPTRLISTNUM(recs);
2162   int left = 0;
2163   int right = ln;
2164   int i = (left + right) / 2;
2165   while(right >= left && i < ln){
2166     BDBREC *rec = TCPTRLISTVAL(recs, i);
2167     char *dbuf = (char *)rec + sizeof(*rec);
2168     int rv;
2169     if(cmp == tccmplexical){
2170       TCCMPLEXICAL(rv, kbuf, ksiz, dbuf, rec->ksiz);
2171     } else {
2172       rv = cmp(kbuf, ksiz, dbuf, rec->ksiz, cmpop);
2173     }
2174     if(rv == 0){
2175       break;
2176     } else if(rv <= 0){
2177       right = i - 1;
2178     } else {
2179       left = i + 1;
2180     }
2181     i = (left + right) / 2;
2182   }
2183   while(i < ln){
2184     BDBREC *rec = TCPTRLISTVAL(recs, i);
2185     char *dbuf = (char *)rec + sizeof(*rec);
2186     int rv;
2187     if(cmp == tccmplexical){
2188       TCCMPLEXICAL(rv, kbuf, ksiz, dbuf, rec->ksiz);
2189     } else {
2190       rv = cmp(kbuf, ksiz, dbuf, rec->ksiz, cmpop);
2191     }
2192     if(rv == 0){
2193       int psiz = TCALIGNPAD(rec->ksiz);
2194       BDBREC *orec = rec;
2195       BDBPDPROCOP *procptr;
2196       int nvsiz;
2197       char *nvbuf;
2198       switch(dmode){
2199         case BDBPDKEEP:
2200           tcbdbsetecode(bdb, TCEKEEP, __FILE__, __LINE__, __func__);
2201           return false;
2202         case BDBPDCAT:
2203           leaf->size += vsiz;
2204           TCREALLOC(rec, rec, sizeof(*rec) + rec->ksiz + psiz + rec->vsiz + vsiz + 1);
2205           if(rec != orec){
2206             tcptrlistover(recs, i, rec);
2207             dbuf = (char *)rec + sizeof(*rec);
2208           }
2209           memcpy(dbuf + rec->ksiz + psiz + rec->vsiz, vbuf, vsiz);
2210           rec->vsiz += vsiz;
2211           dbuf[rec->ksiz+psiz+rec->vsiz] = '\0';
2212           break;
2213         case BDBPDDUP:
2214           leaf->size += vsiz;
2215           if(!rec->rest) rec->rest = tclistnew2(1);
2216           TCLISTPUSH(rec->rest, vbuf, vsiz);
2217           bdb->rnum++;
2218           break;
2219         case BDBPDDUPB:
2220           leaf->size += vsiz;
2221           if(!rec->rest) rec->rest = tclistnew2(1);
2222           tclistunshift(rec->rest, dbuf + rec->ksiz + psiz, rec->vsiz);
2223           if(vsiz > rec->vsiz){
2224             TCREALLOC(rec, rec, sizeof(*rec) + rec->ksiz + psiz + vsiz + 1);
2225             if(rec != orec){
2226               tcptrlistover(recs, i, rec);
2227               dbuf = (char *)rec + sizeof(*rec);
2228             }
2229           }
2230           memcpy(dbuf + rec->ksiz + psiz, vbuf, vsiz);
2231           dbuf[rec->ksiz+psiz+vsiz] = '\0';
2232           rec->vsiz = vsiz;
2233           bdb->rnum++;
2234           break;
2235         case BDBPDADDINT:
2236           if(rec->vsiz != sizeof(int)){
2237             tcbdbsetecode(bdb, TCEKEEP, __FILE__, __LINE__, __func__);
2238             return false;
2239           }
2240           if(*(int *)vbuf == 0){
2241             *(int *)vbuf = *(int *)(dbuf + rec->ksiz + psiz);
2242             return true;
2243           }
2244           *(int *)(dbuf + rec->ksiz + psiz) += *(int *)vbuf;
2245           *(int *)vbuf = *(int *)(dbuf + rec->ksiz + psiz);
2246           break;
2247         case BDBPDADDDBL:
2248           if(rec->vsiz != sizeof(double)){
2249             tcbdbsetecode(bdb, TCEKEEP, __FILE__, __LINE__, __func__);
2250             return false;
2251           }
2252           if(*(double *)vbuf == 0.0){
2253             *(double *)vbuf = *(double *)(dbuf + rec->ksiz + psiz);
2254             return true;
2255           }
2256           *(double *)(dbuf + rec->ksiz + psiz) += *(double *)vbuf;
2257           *(double *)vbuf = *(double *)(dbuf + rec->ksiz + psiz);
2258           break;
2259         case BDBPDPROC:
2260           procptr = *(BDBPDPROCOP **)((char *)kbuf - sizeof(procptr));
2261           nvbuf = procptr->proc(dbuf + rec->ksiz + psiz, rec->vsiz, &nvsiz, procptr->op);
2262           if(nvbuf == (void *)-1){
2263             tcbdbremoverec(bdb, leaf, rec, i);
2264           } else if(nvbuf){
2265             leaf->size += nvsiz - rec->vsiz;
2266             if(nvsiz > rec->vsiz){
2267               TCREALLOC(rec, rec, sizeof(*rec) + rec->ksiz + psiz + nvsiz + 1);
2268               if(rec != orec){
2269                 tcptrlistover(recs, i, rec);
2270                 dbuf = (char *)rec + sizeof(*rec);
2271               }
2272             }
2273             memcpy(dbuf + rec->ksiz + psiz, nvbuf, nvsiz);
2274             dbuf[rec->ksiz+psiz+nvsiz] = '\0';
2275             rec->vsiz = nvsiz;
2276             TCFREE(nvbuf);
2277           } else {
2278             tcbdbsetecode(bdb, TCEKEEP, __FILE__, __LINE__, __func__);
2279             return false;
2280           }
2281           break;
2282         default:
2283           leaf->size += vsiz - rec->vsiz;
2284           if(vsiz > rec->vsiz){
2285             TCREALLOC(rec, rec, sizeof(*rec) + rec->ksiz + psiz + vsiz + 1);
2286             if(rec != orec){
2287               tcptrlistover(recs, i, rec);
2288               dbuf = (char *)rec + sizeof(*rec);
2289             }
2290           }
2291           memcpy(dbuf + rec->ksiz + psiz, vbuf, vsiz);
2292           dbuf[rec->ksiz+psiz+vsiz] = '\0';
2293           rec->vsiz = vsiz;
2294           break;
2295       }
2296       break;
2297     } else if(rv < 0){
2298       if(!vbuf){
2299         tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
2300         return false;
2301       }
2302       leaf->size += ksiz + vsiz;
2303       int psiz = TCALIGNPAD(ksiz);
2304       BDBREC *nrec;
2305       TCMALLOC(nrec, sizeof(*nrec) + ksiz + psiz + vsiz + 1);
2306       char *dbuf = (char *)nrec + sizeof(*nrec);
2307       memcpy(dbuf, kbuf, ksiz);
2308       dbuf[ksiz] = '\0';
2309       nrec->ksiz = ksiz;
2310       memcpy(dbuf + ksiz + psiz, vbuf, vsiz);
2311       dbuf[ksiz+psiz+vsiz] = '\0';
2312       nrec->vsiz = vsiz;
2313       nrec->rest = NULL;
2314       TCPTRLISTINSERT(recs, i, nrec);
2315       bdb->rnum++;
2316       break;
2317     }
2318     i++;
2319   }
2320   if(i >= ln){
2321     if(!vbuf){
2322       tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
2323       return false;
2324     }
2325     leaf->size += ksiz + vsiz;
2326     int psiz = TCALIGNPAD(ksiz);
2327     BDBREC *nrec;
2328     TCMALLOC(nrec, sizeof(*nrec) + ksiz + psiz + vsiz + 1);
2329     char *dbuf = (char *)nrec + sizeof(*nrec);
2330     memcpy(dbuf, kbuf, ksiz);
2331     dbuf[ksiz] = '\0';
2332     nrec->ksiz = ksiz;
2333     memcpy(dbuf + ksiz + psiz, vbuf, vsiz);
2334     dbuf[ksiz+psiz+vsiz] = '\0';
2335     nrec->vsiz = vsiz;
2336     nrec->rest = NULL;
2337     TCPTRLISTPUSH(recs, nrec);
2338     bdb->rnum++;
2339   }
2340   leaf->dirty = true;
2341   return true;
2342 }
2343 
2344 
2345 /* Divide a leaf into two.
2346    `bdb' specifies the B+ tree database object.
2347    `leaf' specifies the leaf object.
2348    The return value is the new leaf object or `NULL' on failure. */
tcbdbleafdivide(TCBDB * bdb,BDBLEAF * leaf)2349 static BDBLEAF *tcbdbleafdivide(TCBDB *bdb, BDBLEAF *leaf){
2350   assert(bdb && leaf);
2351   bdb->hleaf = 0;
2352   TCPTRLIST *recs = leaf->recs;
2353   int mid = TCPTRLISTNUM(recs) / 2;
2354   BDBLEAF *newleaf = tcbdbleafnew(bdb, leaf->id, leaf->next);
2355   if(newleaf->next > 0){
2356     BDBLEAF *nextleaf = tcbdbleafload(bdb, newleaf->next);
2357     if(!nextleaf) return NULL;
2358     nextleaf->prev = newleaf->id;
2359     nextleaf->dirty = true;
2360   }
2361   leaf->next = newleaf->id;
2362   leaf->dirty = true;
2363   int ln = TCPTRLISTNUM(recs);
2364   TCPTRLIST *newrecs = newleaf->recs;
2365   int nsiz = 0;
2366   for(int i = mid; i < ln; i++){
2367     BDBREC *rec = TCPTRLISTVAL(recs, i);
2368     nsiz += rec->ksiz + rec->vsiz;
2369     if(rec->rest){
2370       TCLIST *rest = rec->rest;
2371       int rnum = TCLISTNUM(rest);
2372       for(int j = 0; j < rnum; j++){
2373         nsiz += TCLISTVALSIZ(rest, j);
2374       }
2375     }
2376     TCPTRLISTPUSH(newrecs, rec);
2377   }
2378   TCPTRLISTTRUNC(recs, TCPTRLISTNUM(recs) - TCPTRLISTNUM(newrecs));
2379   leaf->size -= nsiz;
2380   newleaf->size = nsiz;
2381   return newleaf;
2382 }
2383 
2384 
2385 /* Cut off the path to a leaf and mark it dead.
2386    `bdb' specifies the B+ tree database object.
2387    `leaf' specifies the leaf object.
2388    If successful, the return value is true, else, it is false. */
tcbdbleafkill(TCBDB * bdb,BDBLEAF * leaf)2389 static bool tcbdbleafkill(TCBDB *bdb, BDBLEAF *leaf){
2390   assert(bdb && leaf);
2391   BDBNODE *node = tcbdbnodeload(bdb, bdb->hist[--bdb->hnum]);
2392   if(!node) return false;
2393   if(tcbdbnodesubidx(bdb, node, leaf->id)){
2394     TCDODEBUG(bdb->cnt_killleaf++);
2395     if(bdb->hleaf == leaf->id) bdb->hleaf = 0;
2396     if(leaf->prev > 0){
2397       BDBLEAF *tleaf = tcbdbleafload(bdb, leaf->prev);
2398       if(!tleaf) return false;
2399       tleaf->next = leaf->next;
2400       tleaf->dirty = true;
2401       if(bdb->last == leaf->id) bdb->last = leaf->prev;
2402     }
2403     if(leaf->next > 0){
2404       BDBLEAF *tleaf = tcbdbleafload(bdb, leaf->next);
2405       if(!tleaf) return false;
2406       tleaf->prev = leaf->prev;
2407       tleaf->dirty = true;
2408       if(bdb->first == leaf->id) bdb->first = leaf->next;
2409     }
2410     leaf->dead = true;
2411   }
2412   bdb->clock++;
2413   return true;
2414 }
2415 
2416 
2417 /* Create a new node.
2418    `bdb' specifies the B+ tree database object.
2419    `heir' specifies the ID of the child before the first index.
2420    The return value is the new node object. */
tcbdbnodenew(TCBDB * bdb,uint64_t heir)2421 static BDBNODE *tcbdbnodenew(TCBDB *bdb, uint64_t heir){
2422   assert(bdb && heir > 0);
2423   BDBNODE nent;
2424   nent.id = ++bdb->nnum + BDBNODEIDBASE;
2425   nent.idxs = tcptrlistnew2(bdb->nmemb + 1);
2426   nent.heir = heir;
2427   nent.dirty = true;
2428   nent.dead = false;
2429   tcmapputkeep(bdb->nodec, &(nent.id), sizeof(nent.id), &nent, sizeof(nent));
2430   int rsiz;
2431   return (BDBNODE *)tcmapget(bdb->nodec, &(nent.id), sizeof(nent.id), &rsiz);
2432 }
2433 
2434 
2435 /* Remove a node from the cache.
2436    `bdb' specifies the B+ tree database object.
2437    `node' specifies the node object.
2438    If successful, the return value is true, else, it is false. */
tcbdbnodecacheout(TCBDB * bdb,BDBNODE * node)2439 static bool tcbdbnodecacheout(TCBDB *bdb, BDBNODE *node){
2440   assert(bdb && node);
2441   bool err = false;
2442   if(node->dirty && !tcbdbnodesave(bdb, node)) err = true;
2443   TCPTRLIST *idxs = node->idxs;
2444   int ln = TCPTRLISTNUM(idxs);
2445   for(int i = 0; i < ln; i++){
2446     BDBIDX *idx = TCPTRLISTVAL(idxs, i);
2447     TCFREE(idx);
2448   }
2449   tcptrlistdel(idxs);
2450   tcmapout(bdb->nodec, &(node->id), sizeof(node->id));
2451   return !err;
2452 }
2453 
2454 
2455 /* Save a node into the internal database.
2456    `bdb' specifies the B+ tree database object.
2457    `node' specifies the node object.
2458    If successful, the return value is true, else, it is false. */
tcbdbnodesave(TCBDB * bdb,BDBNODE * node)2459 static bool tcbdbnodesave(TCBDB *bdb, BDBNODE *node){
2460   assert(bdb && node);
2461   TCDODEBUG(bdb->cnt_savenode++);
2462   TCXSTR *rbuf = tcxstrnew3(BDBPAGEBUFSIZ);
2463   char hbuf[(sizeof(uint64_t)+1)*2];
2464   uint64_t llnum;
2465   int step;
2466   llnum = node->heir;
2467   TCSETVNUMBUF64(step, hbuf, llnum);
2468   TCXSTRCAT(rbuf, hbuf, step);
2469   TCPTRLIST *idxs = node->idxs;
2470   int ln = TCPTRLISTNUM(idxs);
2471   for(int i = 0; i < ln; i++){
2472     BDBIDX *idx = TCPTRLISTVAL(idxs, i);
2473     char *ebuf = (char *)idx + sizeof(*idx);
2474     char *wp = hbuf;
2475     llnum = idx->pid;
2476     TCSETVNUMBUF64(step, wp, llnum);
2477     wp += step;
2478     uint32_t lnum = idx->ksiz;
2479     TCSETVNUMBUF(step, wp, lnum);
2480     wp += step;
2481     TCXSTRCAT(rbuf, hbuf, wp - hbuf);
2482     TCXSTRCAT(rbuf, ebuf, idx->ksiz);
2483   }
2484   bool err = false;
2485   step = sprintf(hbuf, "#%llx", (unsigned long long)(node->id - BDBNODEIDBASE));
2486   if(ln < 1 && !tchdbout(bdb->hdb, hbuf, step) && tchdbecode(bdb->hdb) != TCENOREC)
2487     err = true;
2488   if(!node->dead && !tchdbput(bdb->hdb, hbuf, step, TCXSTRPTR(rbuf), TCXSTRSIZE(rbuf)))
2489     err = true;
2490   tcxstrdel(rbuf);
2491   node->dirty = false;
2492   node->dead = false;
2493   return !err;
2494 }
2495 
2496 
2497 /* Load a node from the internal database.
2498    `bdb' specifies the B+ tree database object.
2499    `id' specifies the ID number of the node.
2500    The return value is the node object or `NULL' on failure. */
tcbdbnodeload(TCBDB * bdb,uint64_t id)2501 static BDBNODE *tcbdbnodeload(TCBDB *bdb, uint64_t id){
2502   assert(bdb && id > BDBNODEIDBASE);
2503   bool clk = BDBLOCKCACHE(bdb);
2504   int rsiz;
2505   BDBNODE *node = (BDBNODE *)tcmapget3(bdb->nodec, &id, sizeof(id), &rsiz);
2506   if(node){
2507     if(clk) BDBUNLOCKCACHE(bdb);
2508     return node;
2509   }
2510   if(clk) BDBUNLOCKCACHE(bdb);
2511   TCDODEBUG(bdb->cnt_loadnode++);
2512   char hbuf[(sizeof(uint64_t)+1)*2];
2513   int step;
2514   step = sprintf(hbuf, "#%llx", (unsigned long long)(id - BDBNODEIDBASE));
2515   char *rbuf = NULL;
2516   char wbuf[BDBPAGEBUFSIZ];
2517   const char *rp = NULL;
2518   rsiz = tchdbget3(bdb->hdb, hbuf, step, wbuf, BDBPAGEBUFSIZ);
2519   if(rsiz < 1){
2520     tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2521     return NULL;
2522   } else if(rsiz < BDBPAGEBUFSIZ){
2523     rp = wbuf;
2524   } else {
2525     if(!(rbuf = tchdbget(bdb->hdb, hbuf, step, &rsiz))){
2526       tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2527       return NULL;
2528     }
2529     rp = rbuf;
2530   }
2531   BDBNODE nent;
2532   nent.id = id;
2533   uint64_t llnum;
2534   TCREADVNUMBUF64(rp, llnum, step);
2535   nent.heir = llnum;
2536   rp += step;
2537   rsiz -= step;
2538   nent.dirty = false;
2539   nent.dead = false;
2540   nent.idxs = tcptrlistnew2(bdb->nmemb + 1);
2541   bool err = false;
2542   while(rsiz >= 2){
2543     uint64_t pid;
2544     TCREADVNUMBUF64(rp, pid, step);
2545     rp += step;
2546     rsiz -= step;
2547     int ksiz;
2548     TCREADVNUMBUF(rp, ksiz, step);
2549     rp += step;
2550     rsiz -= step;
2551     if(rsiz < ksiz){
2552       err = true;
2553       break;
2554     }
2555     BDBIDX *nidx;
2556     TCMALLOC(nidx, sizeof(*nidx) + ksiz + 1);
2557     nidx->pid = pid;
2558     char *ebuf = (char *)nidx + sizeof(*nidx);
2559     memcpy(ebuf, rp, ksiz);
2560     ebuf[ksiz] = '\0';
2561     nidx->ksiz = ksiz;
2562     rp += ksiz;
2563     rsiz -= ksiz;
2564     TCPTRLISTPUSH(nent.idxs, nidx);
2565   }
2566   TCFREE(rbuf);
2567   if(err || rsiz != 0){
2568     tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2569     return NULL;
2570   }
2571   clk = BDBLOCKCACHE(bdb);
2572   if(!tcmapputkeep(bdb->nodec, &(nent.id), sizeof(nent.id), &nent, sizeof(nent))){
2573     int ln = TCPTRLISTNUM(nent.idxs);
2574     for(int i = 0; i < ln; i++){
2575       BDBIDX *idx = TCPTRLISTVAL(nent.idxs, i);
2576       TCFREE(idx);
2577     }
2578     tcptrlistdel(nent.idxs);
2579   }
2580   node = (BDBNODE *)tcmapget(bdb->nodec, &(nent.id), sizeof(nent.id), &rsiz);
2581   if(clk) BDBUNLOCKCACHE(bdb);
2582   return node;
2583 }
2584 
2585 
2586 /* Add an index to a node.
2587    `bdb' specifies the B+ tree database object.
2588    `node' specifies the node object.
2589    `order' specifies whether the calling sequence is orderd or not.
2590    `pid' specifies the ID number of referred page.
2591    `kbuf' specifies the pointer to the region of the key.
2592    `ksiz' specifies the size of the region of the key. */
tcbdbnodeaddidx(TCBDB * bdb,BDBNODE * node,bool order,uint64_t pid,const char * kbuf,int ksiz)2593 static void tcbdbnodeaddidx(TCBDB *bdb, BDBNODE *node, bool order, uint64_t pid,
2594                             const char *kbuf, int ksiz){
2595   assert(bdb && node && pid > 0 && kbuf && ksiz >= 0);
2596   BDBIDX *nidx;
2597   TCMALLOC(nidx, sizeof(*nidx) + ksiz + 1);
2598   nidx->pid = pid;
2599   char *ebuf = (char *)nidx + sizeof(*nidx);
2600   memcpy(ebuf, kbuf, ksiz);
2601   ebuf[ksiz] = '\0';
2602   nidx->ksiz = ksiz;
2603   TCCMP cmp = bdb->cmp;
2604   void *cmpop = bdb->cmpop;
2605   TCPTRLIST *idxs = node->idxs;
2606   if(order){
2607     TCPTRLISTPUSH(idxs, nidx);
2608   } else {
2609     int ln = TCPTRLISTNUM(idxs);
2610     int left = 0;
2611     int right = ln;
2612     int i = (left + right) / 2;
2613     while(right >= left && i < ln){
2614       BDBIDX *idx = TCPTRLISTVAL(idxs, i);
2615       char *ebuf = (char *)idx + sizeof(*idx);
2616       int rv;
2617       if(cmp == tccmplexical){
2618         TCCMPLEXICAL(rv, kbuf, ksiz, ebuf, idx->ksiz);
2619       } else {
2620         rv = cmp(kbuf, ksiz, ebuf, idx->ksiz, cmpop);
2621       }
2622       if(rv == 0){
2623         break;
2624       } else if(rv <= 0){
2625         right = i - 1;
2626       } else {
2627         left = i + 1;
2628       }
2629       i = (left + right) / 2;
2630     }
2631     while(i < ln){
2632       BDBIDX *idx = TCPTRLISTVAL(idxs, i);
2633       char *ebuf = (char *)idx + sizeof(*idx);
2634       int rv;
2635       if(cmp == tccmplexical){
2636         TCCMPLEXICAL(rv, kbuf, ksiz, ebuf, idx->ksiz);
2637       } else {
2638         rv = cmp(kbuf, ksiz, ebuf, idx->ksiz, cmpop);
2639       }
2640       if(rv < 0){
2641         TCPTRLISTINSERT(idxs, i, nidx);
2642         break;
2643       }
2644       i++;
2645     }
2646     if(i >= ln) TCPTRLISTPUSH(idxs, nidx);
2647   }
2648   node->dirty = true;
2649 }
2650 
2651 
2652 /* Subtract an index from a node.
2653    `bdb' specifies the B+ tree database object.
2654    `node' specifies the node object.
2655    `pid' specifies the ID number of referred page.
2656    The return value is whether the subtraction is completed. */
tcbdbnodesubidx(TCBDB * bdb,BDBNODE * node,uint64_t pid)2657 static bool tcbdbnodesubidx(TCBDB *bdb, BDBNODE *node, uint64_t pid){
2658   assert(bdb && node && pid > 0);
2659   node->dirty = true;
2660   TCPTRLIST *idxs = node->idxs;
2661   if(node->heir == pid){
2662     if(TCPTRLISTNUM(idxs) > 0){
2663       BDBIDX *idx = tcptrlistshift(idxs);
2664       assert(idx);
2665       node->heir = idx->pid;
2666       TCFREE(idx);
2667       return true;
2668     } else if(bdb->hnum > 0){
2669       BDBNODE *pnode = tcbdbnodeload(bdb, bdb->hist[--bdb->hnum]);
2670       if(!pnode){
2671         tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2672         return false;
2673       }
2674       node->dead = true;
2675       return tcbdbnodesubidx(bdb, pnode, node->id);
2676     }
2677     node->dead = true;
2678     bdb->root = pid;
2679     while(pid > BDBNODEIDBASE){
2680       node = tcbdbnodeload(bdb, pid);
2681       if(!node){
2682         tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2683         return false;
2684       }
2685       if(node->dead){
2686         pid = node->heir;
2687         bdb->root = pid;
2688       } else {
2689         pid = 0;
2690       }
2691     }
2692     return false;
2693   }
2694   int ln = TCPTRLISTNUM(idxs);
2695   for(int i = 0; i < ln; i++){
2696     BDBIDX *idx = TCPTRLISTVAL(idxs, i);
2697     if(idx->pid == pid){
2698       TCFREE(tcptrlistremove(idxs, i));
2699       return true;
2700     }
2701   }
2702   tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2703   return false;
2704 }
2705 
2706 
2707 /* Search the leaf object corresponding to a key.
2708    `bdb' specifies the B+ tree database object.
2709    `kbuf' specifies the pointer to the region of the key.
2710    `ksiz' specifies the size of the region of the key.
2711    The return value is the ID number of the leaf object or 0 on failure. */
tcbdbsearchleaf(TCBDB * bdb,const char * kbuf,int ksiz)2712 static uint64_t tcbdbsearchleaf(TCBDB *bdb, const char *kbuf, int ksiz){
2713   assert(bdb && kbuf && ksiz >= 0);
2714   TCCMP cmp = bdb->cmp;
2715   void *cmpop = bdb->cmpop;
2716   uint64_t *hist = bdb->hist;
2717   uint64_t pid = bdb->root;
2718   int hnum = 0;
2719   bdb->hleaf = 0;
2720   while(pid > BDBNODEIDBASE){
2721     BDBNODE *node = tcbdbnodeload(bdb, pid);
2722     if(!node){
2723       tcbdbsetecode(bdb, TCEMISC, __FILE__, __LINE__, __func__);
2724       return 0;
2725     }
2726     hist[hnum++] = node->id;
2727     TCPTRLIST *idxs = node->idxs;
2728     int ln = TCPTRLISTNUM(idxs);
2729     if(ln > 0){
2730       int left = 0;
2731       int right = ln;
2732       int i = (left + right) / 2;
2733       BDBIDX *idx = NULL;
2734       while(right >= left && i < ln){
2735         idx = TCPTRLISTVAL(idxs, i);
2736         char *ebuf = (char *)idx + sizeof(*idx);
2737         int rv;
2738         if(cmp == tccmplexical){
2739           TCCMPLEXICAL(rv, kbuf, ksiz, ebuf, idx->ksiz);
2740         } else {
2741           rv = cmp(kbuf, ksiz, ebuf, idx->ksiz, cmpop);
2742         }
2743         if(rv == 0){
2744           break;
2745         } else if(rv <= 0){
2746           right = i - 1;
2747         } else {
2748           left = i + 1;
2749         }
2750         i = (left + right) / 2;
2751       }
2752       if(i > 0) i--;
2753       while(i < ln){
2754         idx = TCPTRLISTVAL(idxs, i);
2755         char *ebuf = (char *)idx + sizeof(*idx);
2756         int rv;
2757         if(cmp == tccmplexical){
2758           TCCMPLEXICAL(rv, kbuf, ksiz, ebuf, idx->ksiz);
2759         } else {
2760           rv = cmp(kbuf, ksiz, ebuf, idx->ksiz, cmpop);
2761         }
2762         if(rv < 0){
2763           if(i == 0){
2764             pid = node->heir;
2765             break;
2766           }
2767           idx = TCPTRLISTVAL(idxs, i - 1);
2768           pid = idx->pid;
2769           break;
2770         }
2771         i++;
2772       }
2773       if(i >= ln) pid = idx->pid;
2774     } else {
2775       pid = node->heir;
2776     }
2777   }
2778   if(bdb->lleaf == pid) bdb->hleaf = pid;
2779   bdb->lleaf = pid;
2780   bdb->hnum = hnum;
2781   return pid;
2782 }
2783 
2784 
2785 /* Search a record of a leaf.
2786    `bdb' specifies the B+ tree database object.
2787    `leaf' specifies the leaf object.
2788    `kbuf' specifies the pointer to the region of the key.
2789    `ksiz' specifies the size of the region of the key.
2790    `ip' specifies the pointer to a variable to fetch the index of the correspnding record.
2791    The return value is the pointer to a corresponding record or `NULL' on failure. */
tcbdbsearchrec(TCBDB * bdb,BDBLEAF * leaf,const char * kbuf,int ksiz,int * ip)2792 static BDBREC *tcbdbsearchrec(TCBDB *bdb, BDBLEAF *leaf, const char *kbuf, int ksiz, int *ip){
2793   assert(bdb && leaf && kbuf && ksiz >= 0);
2794   TCCMP cmp = bdb->cmp;
2795   void *cmpop = bdb->cmpop;
2796   TCPTRLIST *recs = leaf->recs;
2797   int ln = TCPTRLISTNUM(recs);
2798   int left = 0;
2799   int right = ln;
2800   int i = (left + right) / 2;
2801   while(right >= left && i < ln){
2802     BDBREC *rec = TCPTRLISTVAL(recs, i);
2803     char *dbuf = (char *)rec + sizeof(*rec);
2804     int rv;
2805     if(cmp == tccmplexical){
2806       TCCMPLEXICAL(rv, kbuf, ksiz, dbuf, rec->ksiz);
2807     } else {
2808       rv = cmp(kbuf, ksiz, dbuf, rec->ksiz, cmpop);
2809     }
2810     if(rv == 0){
2811       if(ip) *ip = i;
2812       return rec;
2813     } else if(rv <= 0){
2814       right = i - 1;
2815     } else {
2816       left = i + 1;
2817     }
2818     i = (left + right) / 2;
2819   }
2820   if(ip) *ip = i;
2821   return NULL;
2822 }
2823 
2824 
2825 /* Remove a record from a leaf.
2826    `bdb' specifies the B+ tree database object.
2827    `rec' specifies the record object.
2828    `ri' specifies the index of the record. */
tcbdbremoverec(TCBDB * bdb,BDBLEAF * leaf,BDBREC * rec,int ri)2829 static void tcbdbremoverec(TCBDB *bdb, BDBLEAF *leaf, BDBREC *rec, int ri){
2830   assert(bdb && leaf && rec && ri >= 0);
2831   if(rec->rest){
2832     leaf->size -= rec->vsiz;
2833     int vsiz;
2834     char *vbuf = tclistshift(rec->rest, &vsiz);
2835     int psiz = TCALIGNPAD(rec->ksiz);
2836     if(vsiz > rec->vsiz){
2837       BDBREC *orec = rec;
2838       TCREALLOC(rec, rec, sizeof(*rec) + rec->ksiz + psiz + vsiz + 1);
2839       if(rec != orec) tcptrlistover(leaf->recs, ri, rec);
2840     }
2841     char *dbuf = (char *)rec + sizeof(*rec);
2842     memcpy(dbuf + rec->ksiz + psiz, vbuf, vsiz);
2843     dbuf[rec->ksiz+psiz+vsiz] = '\0';
2844     rec->vsiz = vsiz;
2845     TCFREE(vbuf);
2846     if(TCLISTNUM(rec->rest) < 1){
2847       tclistdel(rec->rest);
2848       rec->rest = NULL;
2849     }
2850   } else {
2851     leaf->size -= rec->ksiz + rec->vsiz;
2852     TCFREE(tcptrlistremove(leaf->recs, ri));
2853   }
2854   bdb->rnum--;
2855 }
2856 
2857 
2858 /* Adjust the caches for leaves and nodes.
2859    `bdb' specifies the B+ tree database object.
2860    The return value is true if successful, else, it is false. */
tcbdbcacheadjust(TCBDB * bdb)2861 static bool tcbdbcacheadjust(TCBDB *bdb){
2862   bool err = false;
2863   if(TCMAPRNUM(bdb->leafc) > bdb->lcnum){
2864     TCDODEBUG(bdb->cnt_adjleafc++);
2865     int ecode = tchdbecode(bdb->hdb);
2866     bool clk = BDBLOCKCACHE(bdb);
2867     TCMAP *leafc = bdb->leafc;
2868     tcmapiterinit(leafc);
2869     int dnum = tclmax(TCMAPRNUM(bdb->leafc) - bdb->lcnum, BDBCACHEOUT);
2870     for(int i = 0; i < dnum; i++){
2871       int rsiz;
2872       if(!tcbdbleafcacheout(bdb, (BDBLEAF *)tcmapiterval(tcmapiternext(leafc, &rsiz), &rsiz)))
2873         err = true;
2874     }
2875     if(clk) BDBUNLOCKCACHE(bdb);
2876     if(!err && tchdbecode(bdb->hdb) != ecode)
2877       tcbdbsetecode(bdb, ecode, __FILE__, __LINE__, __func__);
2878   }
2879   if(TCMAPRNUM(bdb->nodec) > bdb->ncnum){
2880     TCDODEBUG(bdb->cnt_adjnodec++);
2881     int ecode = tchdbecode(bdb->hdb);
2882     bool clk = BDBLOCKCACHE(bdb);
2883     TCMAP *nodec = bdb->nodec;
2884     tcmapiterinit(nodec);
2885     int dnum = tclmax(TCMAPRNUM(bdb->nodec) - bdb->ncnum, BDBCACHEOUT);
2886     for(int i = 0; i < dnum; i++){
2887       int rsiz;
2888       if(!tcbdbnodecacheout(bdb, (BDBNODE *)tcmapiterval(tcmapiternext(nodec, &rsiz), &rsiz)))
2889         err = true;
2890     }
2891     if(clk) BDBUNLOCKCACHE(bdb);
2892     if(!err && tchdbecode(bdb->hdb) != ecode)
2893       tcbdbsetecode(bdb, ecode, __FILE__, __LINE__, __func__);
2894   }
2895   return !err;
2896 }
2897 
2898 
2899 /* Purge dirty pages of caches for leaves and nodes.
2900    `bdb' specifies the B+ tree database object. */
tcbdbcachepurge(TCBDB * bdb)2901 static void tcbdbcachepurge(TCBDB *bdb){
2902   bool clk = BDBLOCKCACHE(bdb);
2903   int tsiz;
2904   const char *tmp;
2905   tcmapiterinit(bdb->leafc);
2906   while((tmp = tcmapiternext(bdb->leafc, &tsiz)) != NULL){
2907     int lsiz;
2908     BDBLEAF *leaf = (BDBLEAF *)tcmapiterval(tmp, &lsiz);
2909     if(!leaf->dirty) continue;
2910     TCPTRLIST *recs = leaf->recs;
2911     int ln = TCPTRLISTNUM(recs);
2912     for(int i = 0; i < ln; i++){
2913       BDBREC *rec = TCPTRLISTVAL(recs, i);
2914       if(rec->rest) tclistdel(rec->rest);
2915       TCFREE(rec);
2916     }
2917     tcptrlistdel(recs);
2918     tcmapout(bdb->leafc, tmp, tsiz);
2919   }
2920   tcmapiterinit(bdb->nodec);
2921   while((tmp = tcmapiternext(bdb->nodec, &tsiz)) != NULL){
2922     int nsiz;
2923     BDBNODE *node = (BDBNODE *)tcmapiterval(tmp, &nsiz);
2924     if(!node->dirty) continue;
2925     TCPTRLIST *idxs = node->idxs;
2926     int ln = TCPTRLISTNUM(idxs);
2927     for(int i = 0; i < ln; i++){
2928       BDBIDX *idx = TCPTRLISTVAL(idxs, i);
2929       TCFREE(idx);
2930     }
2931     tcptrlistdel(idxs);
2932     tcmapout(bdb->nodec, tmp, tsiz);
2933   }
2934   if(clk) BDBUNLOCKCACHE(bdb);
2935 }
2936 
2937 
2938 /* Open a database file and connect a B+ tree database object.
2939    `bdb' specifies the B+ tree database object.
2940    `path' specifies the path of the internal database file.
2941    `omode' specifies the connection mode.
2942    If successful, the return value is true, else, it is false. */
tcbdbopenimpl(TCBDB * bdb,const char * path,int omode)2943 static bool tcbdbopenimpl(TCBDB *bdb, const char *path, int omode){
2944   assert(bdb && path);
2945   int homode = HDBOREADER;
2946   if(omode & BDBOWRITER){
2947     homode = HDBOWRITER;
2948     if(omode & BDBOCREAT) homode |= HDBOCREAT;
2949     if(omode & BDBOTRUNC) homode |= HDBOTRUNC;
2950     bdb->wmode = true;
2951   } else {
2952     bdb->wmode = false;
2953   }
2954   if(omode & BDBONOLCK) homode |= HDBONOLCK;
2955   if(omode & BDBOLCKNB) homode |= HDBOLCKNB;
2956   if(omode & BDBOTSYNC) homode |= HDBOTSYNC;
2957   tchdbsettype(bdb->hdb, TCDBTBTREE);
2958   if(!tchdbopen(bdb->hdb, path, homode)) return false;
2959   bdb->root = 0;
2960   bdb->first = 0;
2961   bdb->last = 0;
2962   bdb->lnum = 0;
2963   bdb->nnum = 0;
2964   bdb->rnum = 0;
2965   bdb->opaque = tchdbopaque(bdb->hdb);
2966   bdb->leafc = tcmapnew2(bdb->lcnum * 2 + 1);
2967   bdb->nodec = tcmapnew2(bdb->ncnum * 2 + 1);
2968   if(bdb->wmode && tchdbrnum(bdb->hdb) < 1){
2969     BDBLEAF *leaf = tcbdbleafnew(bdb, 0, 0);
2970     bdb->root = leaf->id;
2971     bdb->first = leaf->id;
2972     bdb->last = leaf->id;
2973     bdb->lnum = 1;
2974     bdb->nnum = 0;
2975     bdb->rnum = 0;
2976     if(!bdb->cmp){
2977       bdb->cmp = tccmplexical;
2978       bdb->cmpop = NULL;
2979     }
2980     tcbdbdumpmeta(bdb);
2981     if(!tcbdbleafsave(bdb, leaf)){
2982       tcmapdel(bdb->nodec);
2983       tcmapdel(bdb->leafc);
2984       tchdbclose(bdb->hdb);
2985       return false;
2986     }
2987   }
2988   tcbdbloadmeta(bdb);
2989   if(!bdb->cmp){
2990     tcbdbsetecode(bdb, TCEINVALID, __FILE__, __LINE__, __func__);
2991     tcmapdel(bdb->nodec);
2992     tcmapdel(bdb->leafc);
2993     tchdbclose(bdb->hdb);
2994     return false;
2995   }
2996   if(bdb->lmemb < BDBMINLMEMB || bdb->nmemb < BDBMINNMEMB ||
2997      bdb->root < 1 || bdb->first < 1 || bdb->last < 1 ||
2998      bdb->lnum < 0 || bdb->nnum < 0 || bdb->rnum < 0){
2999     tcbdbsetecode(bdb, TCEMETA, __FILE__, __LINE__, __func__);
3000     tcmapdel(bdb->nodec);
3001     tcmapdel(bdb->leafc);
3002     tchdbclose(bdb->hdb);
3003     return false;
3004   }
3005   bdb->open = true;
3006   uint8_t hopts = tchdbopts(bdb->hdb);
3007   uint8_t opts = 0;
3008   if(hopts & HDBTLARGE) opts |= BDBTLARGE;
3009   if(hopts & HDBTDEFLATE) opts |= BDBTDEFLATE;
3010   if(hopts & HDBTBZIP) opts |= BDBTBZIP;
3011   if(hopts & HDBTTCBS) opts |= BDBTTCBS;
3012   if(hopts & HDBTEXCODEC) opts |= BDBTEXCODEC;
3013   bdb->opts = opts;
3014   bdb->hleaf = 0;
3015   bdb->lleaf = 0;
3016   bdb->tran = false;
3017   bdb->rbopaque = NULL;
3018   bdb->clock = 1;
3019   return true;
3020 }
3021 
3022 
3023 /* Close a B+ tree database object.
3024    `bdb' specifies the B+ tree database object.
3025    If successful, the return value is true, else, it is false. */
tcbdbcloseimpl(TCBDB * bdb)3026 static bool tcbdbcloseimpl(TCBDB *bdb){
3027   assert(bdb);
3028   bool err = false;
3029   if(bdb->tran){
3030     tcbdbcachepurge(bdb);
3031     memcpy(bdb->opaque, bdb->rbopaque, BDBOPAQUESIZ);
3032     tcbdbloadmeta(bdb);
3033     TCFREE(bdb->rbopaque);
3034     bdb->tran = false;
3035     bdb->rbopaque = NULL;
3036     if(!tchdbtranvoid(bdb->hdb)) err = true;
3037   }
3038   bdb->open = false;
3039   const char *vbuf;
3040   int vsiz;
3041   TCMAP *leafc = bdb->leafc;
3042   tcmapiterinit(leafc);
3043   while((vbuf = tcmapiternext(leafc, &vsiz)) != NULL){
3044     if(!tcbdbleafcacheout(bdb, (BDBLEAF *)tcmapiterval(vbuf, &vsiz))) err = true;
3045   }
3046   TCMAP *nodec = bdb->nodec;
3047   tcmapiterinit(nodec);
3048   while((vbuf = tcmapiternext(nodec, &vsiz)) != NULL){
3049     if(!tcbdbnodecacheout(bdb, (BDBNODE *)tcmapiterval(vbuf, &vsiz))) err = true;
3050   }
3051   if(bdb->wmode) tcbdbdumpmeta(bdb);
3052   tcmapdel(bdb->nodec);
3053   tcmapdel(bdb->leafc);
3054   if(!tchdbclose(bdb->hdb)) err = true;
3055   return !err;
3056 }
3057 
3058 
3059 /* Store a record into a B+ tree database object.
3060    `bdb' specifies the B+ tree database object.
3061    `kbuf' specifies the pointer to the region of the key.
3062    `ksiz' specifies the size of the region of the key.
3063    `vbuf' specifies the pointer to the region of the value.
3064    `vsiz' specifies the size of the region of the value.
3065    `dmode' specifies behavior when the key overlaps.
3066    If successful, the return value is true, else, it is false. */
tcbdbputimpl(TCBDB * bdb,const void * kbuf,int ksiz,const void * vbuf,int vsiz,int dmode)3067 static bool tcbdbputimpl(TCBDB *bdb, const void *kbuf, int ksiz, const void *vbuf, int vsiz,
3068                          int dmode){
3069   assert(bdb && kbuf && ksiz >= 0);
3070   BDBLEAF *leaf = NULL;
3071   uint64_t hlid = bdb->hleaf;
3072   if(hlid < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz, hlid))){
3073     uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
3074     if(pid < 1) return false;
3075     if(!(leaf = tcbdbleafload(bdb, pid))) return false;
3076     hlid = 0;
3077   }
3078   if(!tcbdbleafaddrec(bdb, leaf, dmode, kbuf, ksiz, vbuf, vsiz)){
3079     if(!bdb->tran) tcbdbcacheadjust(bdb);
3080     return false;
3081   }
3082   int rnum = TCPTRLISTNUM(leaf->recs);
3083   if(rnum > bdb->lmemb || (rnum > 1 && leaf->size > bdb->lsmax)){
3084     if(hlid > 0 && hlid != tcbdbsearchleaf(bdb, kbuf, ksiz)) return false;
3085     bdb->lschk = 0;
3086     BDBLEAF *newleaf = tcbdbleafdivide(bdb, leaf);
3087     if(!newleaf) return false;
3088     if(leaf->id == bdb->last) bdb->last = newleaf->id;
3089     uint64_t heir = leaf->id;
3090     uint64_t pid = newleaf->id;
3091     BDBREC *rec = TCPTRLISTVAL(newleaf->recs, 0);
3092     char *dbuf = (char *)rec + sizeof(*rec);
3093     int ksiz = rec->ksiz;
3094     char *kbuf;
3095     TCMEMDUP(kbuf, dbuf, ksiz);
3096     while(true){
3097       BDBNODE *node;
3098       if(bdb->hnum < 1){
3099         node = tcbdbnodenew(bdb, heir);
3100         tcbdbnodeaddidx(bdb, node, true, pid, kbuf, ksiz);
3101         bdb->root = node->id;
3102         TCFREE(kbuf);
3103         break;
3104       }
3105       uint64_t parent = bdb->hist[--bdb->hnum];
3106       if(!(node = tcbdbnodeload(bdb, parent))){
3107         TCFREE(kbuf);
3108         return false;
3109       }
3110       tcbdbnodeaddidx(bdb, node, false, pid, kbuf, ksiz);
3111       TCFREE(kbuf);
3112       TCPTRLIST *idxs = node->idxs;
3113       int ln = TCPTRLISTNUM(idxs);
3114       if(ln <= bdb->nmemb) break;
3115       int mid = ln / 2;
3116       BDBIDX *idx = TCPTRLISTVAL(idxs, mid);
3117       BDBNODE *newnode = tcbdbnodenew(bdb, idx->pid);
3118       heir = node->id;
3119       pid = newnode->id;
3120       char *ebuf = (char *)idx + sizeof(*idx);
3121       TCMEMDUP(kbuf, ebuf, idx->ksiz);
3122       ksiz = idx->ksiz;
3123       for(int i = mid + 1; i < ln; i++){
3124         idx = TCPTRLISTVAL(idxs, i);
3125         char *ebuf = (char *)idx + sizeof(*idx);
3126         tcbdbnodeaddidx(bdb, newnode, true, idx->pid, ebuf, idx->ksiz);
3127       }
3128       ln = TCPTRLISTNUM(newnode->idxs);
3129       for(int i = 0; i <= ln; i++){
3130         idx = tcptrlistpop(idxs);
3131         TCFREE(idx);
3132       }
3133       node->dirty = true;
3134     }
3135     if(bdb->capnum > 0 && bdb->rnum > bdb->capnum){
3136       uint64_t xnum = bdb->rnum - bdb->capnum;
3137       BDBCUR *cur = tcbdbcurnew(bdb);
3138       while((xnum--) > 0){
3139         if((cur->id < 1 || cur->clock != bdb->clock) && !tcbdbcurfirstimpl(cur)){
3140           tcbdbcurdel(cur);
3141           return false;
3142         }
3143         if(!tcbdbcuroutimpl(cur)){
3144           tcbdbcurdel(cur);
3145           return false;
3146         }
3147       }
3148       tcbdbcurdel(cur);
3149     }
3150   } else if(rnum < 1){
3151     if(hlid > 0 && hlid != tcbdbsearchleaf(bdb, kbuf, ksiz)) return false;
3152     if(bdb->hnum > 0 && !tcbdbleafkill(bdb, leaf)) return false;
3153   }
3154   if(!bdb->tran && !tcbdbcacheadjust(bdb)) return false;
3155   return true;
3156 }
3157 
3158 
3159 /* Remove a record of a B+ tree database object.
3160    `bdb' specifies the B+ tree database object.
3161    `kbuf' specifies the pointer to the region of the key.
3162    `ksiz' specifies the size of the region of the key.
3163    If successful, the return value is true, else, it is false. */
tcbdboutimpl(TCBDB * bdb,const char * kbuf,int ksiz)3164 static bool tcbdboutimpl(TCBDB *bdb, const char *kbuf, int ksiz){
3165   assert(bdb && kbuf && ksiz >= 0);
3166   BDBLEAF *leaf = NULL;
3167   uint64_t hlid = bdb->hleaf;
3168   if(hlid < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz, hlid))){
3169     uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
3170     if(pid < 1) return false;
3171     if(!(leaf = tcbdbleafload(bdb, pid))) return false;
3172     hlid = 0;
3173   }
3174   int ri;
3175   BDBREC *rec = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, &ri);
3176   if(!rec){
3177     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3178     return false;
3179   }
3180   tcbdbremoverec(bdb, leaf, rec, ri);
3181   leaf->dirty = true;
3182   if(TCPTRLISTNUM(leaf->recs) < 1){
3183     if(hlid > 0 && hlid != tcbdbsearchleaf(bdb, kbuf, ksiz)) return false;
3184     if(bdb->hnum > 0 && !tcbdbleafkill(bdb, leaf)) return false;
3185   }
3186   if(!bdb->tran && !tcbdbcacheadjust(bdb)) return false;
3187   return true;
3188 }
3189 
3190 
3191 /* Remove records of a B+ tree database object.
3192    `bdb' specifies the B+ tree database object.
3193    `kbuf' specifies the pointer to the region of the key.
3194    `ksiz' specifies the size of the region of the key.
3195    If successful, the return value is true, else, it is false. */
tcbdboutlist(TCBDB * bdb,const char * kbuf,int ksiz)3196 static bool tcbdboutlist(TCBDB *bdb, const char *kbuf, int ksiz){
3197   assert(bdb && kbuf && ksiz >= 0);
3198   BDBLEAF *leaf = NULL;
3199   uint64_t hlid = bdb->hleaf;
3200   if(hlid < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz, hlid))){
3201     uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
3202     if(pid < 1) return false;
3203     if(!(leaf = tcbdbleafload(bdb, pid))) return false;
3204     hlid = 0;
3205   }
3206   int ri;
3207   BDBREC *rec = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, &ri);
3208   if(!rec){
3209     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3210     return false;
3211   }
3212   int rnum = 1;
3213   int rsiz = rec->ksiz + rec->vsiz;
3214   if(rec->rest){
3215     TCLIST *rest = rec->rest;
3216     int ln = TCLISTNUM(rec->rest);
3217     rnum += ln;
3218     for(int i = 0; i < ln; i++){
3219       rsiz += TCLISTVALSIZ(rest, i);
3220     }
3221     tclistdel(rest);
3222   }
3223   TCFREE(tcptrlistremove(leaf->recs, ri));
3224   leaf->size -= rsiz;
3225   leaf->dirty = true;
3226   bdb->rnum -= rnum;
3227   if(TCPTRLISTNUM(leaf->recs) < 1){
3228     if(hlid > 0 && hlid != tcbdbsearchleaf(bdb, kbuf, ksiz)) return false;
3229     if(bdb->hnum > 0 && !tcbdbleafkill(bdb, leaf)) return false;
3230   }
3231   if(!bdb->tran && !tcbdbcacheadjust(bdb)) return false;
3232   return true;
3233 }
3234 
3235 
3236 /* Retrieve a record in a B+ tree database object.
3237    `bdb' specifies the B+ tree database object.
3238    `kbuf' specifies the pointer to the region of the key.
3239    `ksiz' specifies the size of the region of the key.
3240    `sp' specifies the pointer to the variable into which the size of the region of the return
3241    value is assigned.
3242    If successful, the return value is the pointer to the region of the value of the corresponding
3243    record. */
tcbdbgetimpl(TCBDB * bdb,const char * kbuf,int ksiz,int * sp)3244 static const char *tcbdbgetimpl(TCBDB *bdb, const char *kbuf, int ksiz, int *sp){
3245   assert(bdb && kbuf && ksiz >= 0 && sp);
3246   BDBLEAF *leaf = NULL;
3247   uint64_t hlid = bdb->hleaf;
3248   if(hlid < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz, hlid))){
3249     uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
3250     if(pid < 1) return NULL;
3251     if(!(leaf = tcbdbleafload(bdb, pid))) return NULL;
3252   }
3253   BDBREC *rec = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, NULL);
3254   if(!rec){
3255     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3256     return NULL;
3257   }
3258   *sp = rec->vsiz;
3259   return (char *)rec + sizeof(*rec) + rec->ksiz + TCALIGNPAD(rec->ksiz);
3260 }
3261 
3262 
3263 /* Get the number of records corresponding a key in a B+ tree database object.
3264    `bdb' specifies the B+ tree database object.
3265    `kbuf' specifies the pointer to the region of the key.
3266    `ksiz' specifies the size of the region of the key.
3267    If successful, the return value is the number of the corresponding records, else, it is 0. */
tcbdbgetnum(TCBDB * bdb,const char * kbuf,int ksiz)3268 static int tcbdbgetnum(TCBDB *bdb, const char *kbuf, int ksiz){
3269   assert(bdb && kbuf && ksiz >= 0);
3270   BDBLEAF *leaf = NULL;
3271   uint64_t hlid = bdb->hleaf;
3272   if(hlid < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz, hlid))){
3273     uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
3274     if(pid < 1) return 0;
3275     if(!(leaf = tcbdbleafload(bdb, pid))) return 0;
3276   }
3277   BDBREC *rec = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, NULL);
3278   if(!rec){
3279     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3280     return 0;
3281   }
3282   return rec->rest ? TCLISTNUM(rec->rest) + 1 : 1;
3283 }
3284 
3285 
3286 /* Retrieve records in a B+ tree database object.
3287    `bdb' specifies the B+ tree database object.
3288    `kbuf' specifies the pointer to the region of the key.
3289    `ksiz' specifies the size of the region of the key.
3290    If successful, the return value is a list object of the values of the corresponding records. */
tcbdbgetlist(TCBDB * bdb,const char * kbuf,int ksiz)3291 static TCLIST *tcbdbgetlist(TCBDB *bdb, const char *kbuf, int ksiz){
3292   assert(bdb && kbuf && ksiz >= 0);
3293   BDBLEAF *leaf = NULL;
3294   uint64_t hlid = bdb->hleaf;
3295   if(hlid < 1 || !(leaf = tcbdbgethistleaf(bdb, kbuf, ksiz, hlid))){
3296     uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
3297     if(pid < 1) return NULL;
3298     if(!(leaf = tcbdbleafload(bdb, pid))) return NULL;
3299   }
3300   BDBREC *rec = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, NULL);
3301   if(!rec){
3302     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3303     return NULL;
3304   }
3305   TCLIST *vals;
3306   TCLIST *rest = rec->rest;
3307   if(rest){
3308     int ln = TCLISTNUM(rest);
3309     vals = tclistnew2(ln + 1);
3310     TCLISTPUSH(vals, (char *)rec + sizeof(*rec) + rec->ksiz + TCALIGNPAD(rec->ksiz), rec->vsiz);
3311     for(int i = 0; i < ln; i++){
3312       const char *vbuf;
3313       int vsiz;
3314       TCLISTVAL(vbuf, rest, i, vsiz);
3315       TCLISTPUSH(vals, vbuf, vsiz);
3316     }
3317   } else {
3318     vals = tclistnew2(1);
3319     TCLISTPUSH(vals, (char *)rec + sizeof(*rec) + rec->ksiz + TCALIGNPAD(rec->ksiz), rec->vsiz);
3320   }
3321   return vals;
3322 }
3323 
3324 
3325 /* Get keys of ranged records in a B+ tree database object.
3326    `bdb' specifies the B+ tree database object.
3327    `bkbuf' specifies the pointer to the region of the key of the beginning border.
3328    `bksiz' specifies the size of the region of the beginning key.
3329    `binc' specifies whether the beginning border is inclusive or not.
3330    `ekbuf' specifies the pointer to the region of the key of the ending border.
3331    `eksiz' specifies the size of the region of the ending key.
3332    `einc' specifies whether the ending border is inclusive or not.
3333    `max' specifies the maximum number of keys to be fetched.
3334    `keys' specifies a list object to store the result.
3335    If successful, the return value is true, else, it is false. */
tcbdbrangeimpl(TCBDB * bdb,const char * bkbuf,int bksiz,bool binc,const char * ekbuf,int eksiz,bool einc,int max,TCLIST * keys)3336 static bool tcbdbrangeimpl(TCBDB *bdb, const char *bkbuf, int bksiz, bool binc,
3337                            const char *ekbuf, int eksiz, bool einc, int max, TCLIST *keys){
3338   assert(bdb && keys);
3339   bool err = false;
3340   BDBCUR *cur = tcbdbcurnew(bdb);
3341   if(bkbuf){
3342     tcbdbcurjumpimpl(cur, bkbuf, bksiz, true);
3343   } else {
3344     tcbdbcurfirstimpl(cur);
3345   }
3346   TCCMP cmp = bdb->cmp;
3347   void *cmpop = bdb->cmpop;
3348   const char *lbuf = NULL;
3349   int lsiz = 0;
3350   while(cur->id > 0){
3351     const char *kbuf, *vbuf;
3352     int ksiz, vsiz;
3353     if(!tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
3354       if(tchdbecode(bdb->hdb) != TCEINVALID && tchdbecode(bdb->hdb) != TCENOREC) err = true;
3355       break;
3356     }
3357     if(bkbuf && !binc){
3358       if(cmp(kbuf, ksiz, bkbuf, bksiz, cmpop) == 0){
3359         tcbdbcurnextimpl(cur);
3360         continue;
3361       }
3362       bkbuf = NULL;
3363     }
3364     if(ekbuf){
3365       if(einc){
3366         if(cmp(kbuf, ksiz, ekbuf, eksiz, cmpop) > 0) break;
3367       } else {
3368         if(cmp(kbuf, ksiz, ekbuf, eksiz, cmpop) >= 0) break;
3369       }
3370     }
3371     if(!lbuf || lsiz != ksiz || memcmp(kbuf, lbuf, ksiz)){
3372       TCLISTPUSH(keys, kbuf, ksiz);
3373       if(max >= 0 && TCLISTNUM(keys) >= max) break;
3374       lbuf = kbuf;
3375       lsiz = ksiz;
3376     }
3377     tcbdbcurnextimpl(cur);
3378   }
3379   tcbdbcurdel(cur);
3380   return !err;
3381 }
3382 
3383 
3384 /* Get forward matching keys in a B+ tree database object.
3385    `bdb' specifies the B+ tree database object.
3386    `pbuf' specifies the pointer to the region of the prefix.
3387    `psiz' specifies the size of the region of the prefix.
3388    `max' specifies the maximum number of keys to be fetched.
3389    `keys' specifies a list object to store the result.
3390    If successful, the return value is true, else, it is false. */
tcbdbrangefwm(TCBDB * bdb,const char * pbuf,int psiz,int max,TCLIST * keys)3391 static bool tcbdbrangefwm(TCBDB *bdb, const char *pbuf, int psiz, int max, TCLIST *keys){
3392   assert(bdb && pbuf && psiz >= 0 && keys);
3393   bool err = false;
3394   if(max < 0) max = INT_MAX;
3395   if(max < 1) return true;
3396   BDBCUR *cur = tcbdbcurnew(bdb);
3397   tcbdbcurjumpimpl(cur, pbuf, psiz, true);
3398   const char *lbuf = NULL;
3399   int lsiz = 0;
3400   while(cur->id > 0){
3401     const char *kbuf, *vbuf;
3402     int ksiz, vsiz;
3403     if(!tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
3404       if(tchdbecode(bdb->hdb) != TCEINVALID && tchdbecode(bdb->hdb) != TCENOREC) err = true;
3405       break;
3406     }
3407     if(ksiz < psiz || memcmp(kbuf, pbuf, psiz)) break;
3408     if(!lbuf || lsiz != ksiz || memcmp(kbuf, lbuf, ksiz)){
3409       TCLISTPUSH(keys, kbuf, ksiz);
3410       if(TCLISTNUM(keys) >= max) break;
3411       lbuf = kbuf;
3412       lsiz = ksiz;
3413     }
3414     tcbdbcurnextimpl(cur);
3415   }
3416   tcbdbcurdel(cur);
3417   return !err;
3418 }
3419 
3420 
3421 /* Optimize the file of a B+ tree database object.
3422    `bdb' specifies the B+ tree database object.
3423    `lmemb' specifies the number of members in each leaf page.
3424    `nmemb' specifies the number of members in each non-leaf page.
3425    `bnum' specifies the number of elements of the bucket array.
3426    `apow' specifies the size of record alignment by power of 2.
3427    `fpow' specifies the maximum number of elements of the free block pool by power of 2.
3428    `opts' specifies options by bitwise-or.
3429    If successful, the return value is true, else, it is false. */
tcbdboptimizeimpl(TCBDB * bdb,int32_t lmemb,int32_t nmemb,int64_t bnum,int8_t apow,int8_t fpow,uint8_t opts)3430 static bool tcbdboptimizeimpl(TCBDB *bdb, int32_t lmemb, int32_t nmemb,
3431                               int64_t bnum, int8_t apow, int8_t fpow, uint8_t opts){
3432   assert(bdb);
3433   const char *path = tchdbpath(bdb->hdb);
3434   char *tpath = tcsprintf("%s%ctmp%c%llu", path, MYEXTCHR, MYEXTCHR, tchdbinode(bdb->hdb));
3435   TCBDB *tbdb = tcbdbnew();
3436   int dbgfd = tchdbdbgfd(bdb->hdb);
3437   if(dbgfd >= 0) tcbdbsetdbgfd(tbdb, dbgfd);
3438   tcbdbsetcmpfunc(tbdb, bdb->cmp, bdb->cmpop);
3439   TCCODEC enc, dec;
3440   void *encop, *decop;
3441   tchdbcodecfunc(bdb->hdb, &enc, &encop, &dec, &decop);
3442   if(enc && dec) tcbdbsetcodecfunc(tbdb, enc, encop, dec, decop);
3443   if(lmemb < 1) lmemb = bdb->lmemb;
3444   if(nmemb < 1) nmemb = bdb->nmemb;
3445   if(bnum < 1) bnum = tchdbrnum(bdb->hdb) * 2 + 1;
3446   if(apow < 0) apow = tclog2l(tchdbalign(bdb->hdb));
3447   if(fpow < 0) fpow = tclog2l(tchdbfbpmax(bdb->hdb));
3448   if(opts == UINT8_MAX) opts = bdb->opts;
3449   tcbdbtune(tbdb, lmemb, nmemb, bnum, apow, fpow, opts);
3450   tcbdbsetcache(tbdb, 1, 1);
3451   tcbdbsetlsmax(tbdb, bdb->lsmax);
3452   uint32_t lcnum = bdb->lcnum;
3453   uint32_t ncnum = bdb->ncnum;
3454   bdb->lcnum = BDBLEVELMAX;
3455   bdb->ncnum = BDBCACHEOUT * 2;
3456   tbdb->lcnum = BDBLEVELMAX;
3457   tbdb->ncnum = BDBCACHEOUT * 2;
3458   if(!tcbdbopen(tbdb, tpath, BDBOWRITER | BDBOCREAT | BDBOTRUNC)){
3459     tcbdbsetecode(bdb, tcbdbecode(tbdb), __FILE__, __LINE__, __func__);
3460     tcbdbdel(tbdb);
3461     TCFREE(tpath);
3462     return false;
3463   }
3464   memcpy(tcbdbopaque(tbdb), tcbdbopaque(bdb), BDBLEFTOPQSIZ);
3465   bool err = false;
3466   BDBCUR *cur = tcbdbcurnew(bdb);
3467   tcbdbcurfirstimpl(cur);
3468   const char *kbuf, *vbuf;
3469   int ksiz, vsiz;
3470   int cnt = 0;
3471   while(!err && cur->id > 0 && tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
3472     if(!tcbdbputdup(tbdb, kbuf, ksiz, vbuf, vsiz)){
3473       tcbdbsetecode(bdb, tcbdbecode(tbdb), __FILE__, __LINE__, __func__);
3474       err = true;
3475     }
3476     tcbdbcurnextimpl(cur);
3477     if((++cnt % 0xf == 0) && !tcbdbcacheadjust(bdb)) err = true;
3478   }
3479   tcbdbcurdel(cur);
3480   if(!tcbdbclose(tbdb)){
3481     tcbdbsetecode(bdb, tcbdbecode(tbdb), __FILE__, __LINE__, __func__);
3482     err = true;
3483   }
3484   bdb->lcnum = lcnum;
3485   bdb->ncnum = ncnum;
3486   tcbdbdel(tbdb);
3487   if(unlink(path) == -1){
3488     tcbdbsetecode(bdb, TCEUNLINK, __FILE__, __LINE__, __func__);
3489     err = true;
3490   }
3491   if(rename(tpath, path) == -1){
3492     tcbdbsetecode(bdb, TCERENAME, __FILE__, __LINE__, __func__);
3493     err = true;
3494   }
3495   TCFREE(tpath);
3496   if(err) return false;
3497   tpath = tcstrdup(path);
3498   int omode = (tchdbomode(bdb->hdb) & ~BDBOCREAT) & ~BDBOTRUNC;
3499   if(!tcbdbcloseimpl(bdb)){
3500     TCFREE(tpath);
3501     return false;
3502   }
3503   bool rv = tcbdbopenimpl(bdb, tpath, omode);
3504   TCFREE(tpath);
3505   return rv;
3506 }
3507 
3508 
3509 /* Remove all records of a B+ tree database object.
3510    `bdb' specifies the B+ tree database object.
3511    If successful, the return value is true, else, it is false. */
tcbdbvanishimpl(TCBDB * bdb)3512 static bool tcbdbvanishimpl(TCBDB *bdb){
3513   assert(bdb);
3514   char *path = tcstrdup(tchdbpath(bdb->hdb));
3515   int omode = tchdbomode(bdb->hdb);
3516   bool err = false;
3517   if(!tcbdbcloseimpl(bdb)) err = true;
3518   if(!tcbdbopenimpl(bdb, path, BDBOTRUNC | omode)) err = true;
3519   TCFREE(path);
3520   return !err;
3521 }
3522 
3523 
3524 /* Lock a method of the B+ tree database object.
3525    `bdb' specifies the B+ tree database object.
3526    `wr' specifies whether the lock is writer or not.
3527    If successful, the return value is true, else, it is false. */
tcbdblockmethod(TCBDB * bdb,bool wr)3528 static bool tcbdblockmethod(TCBDB *bdb, bool wr){
3529   assert(bdb);
3530   if(wr ? pthread_rwlock_wrlock(bdb->mmtx) != 0 : pthread_rwlock_rdlock(bdb->mmtx) != 0){
3531     tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3532     return false;
3533   }
3534   TCTESTYIELD();
3535   return true;
3536 }
3537 
3538 
3539 /* Unlock a method of the B+ tree database object.
3540    `bdb' specifies the B+ tree database object.
3541    If successful, the return value is true, else, it is false. */
tcbdbunlockmethod(TCBDB * bdb)3542 static bool tcbdbunlockmethod(TCBDB *bdb){
3543   assert(bdb);
3544   if(pthread_rwlock_unlock(bdb->mmtx) != 0){
3545     tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3546     return false;
3547   }
3548   TCTESTYIELD();
3549   return true;
3550 }
3551 
3552 
3553 /* Lock the cache of the B+ tree database object.
3554    `bdb' specifies the B+ tree database object.
3555    If successful, the return value is true, else, it is false. */
tcbdblockcache(TCBDB * bdb)3556 static bool tcbdblockcache(TCBDB *bdb){
3557   assert(bdb);
3558   if(pthread_mutex_lock(bdb->cmtx) != 0){
3559     tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3560     return false;
3561   }
3562   TCTESTYIELD();
3563   return true;
3564 }
3565 
3566 
3567 /* Unlock the cache of the B+ tree database object.
3568    `bdb' specifies the B+ tree database object.
3569    If successful, the return value is true, else, it is false. */
tcbdbunlockcache(TCBDB * bdb)3570 static bool tcbdbunlockcache(TCBDB *bdb){
3571   assert(bdb);
3572   if(pthread_mutex_unlock(bdb->cmtx) != 0){
3573     tcbdbsetecode(bdb, TCETHREAD, __FILE__, __LINE__, __func__);
3574     return false;
3575   }
3576   TCTESTYIELD();
3577   return true;
3578 }
3579 
3580 
3581 /* Move a cursor object to the first record.
3582    `cur' specifies the cursor object.
3583    If successful, the return value is true, else, it is false. */
tcbdbcurfirstimpl(BDBCUR * cur)3584 static bool tcbdbcurfirstimpl(BDBCUR *cur){
3585   assert(cur);
3586   TCBDB *bdb = cur->bdb;
3587   cur->clock = bdb->clock;
3588   cur->id = bdb->first;
3589   cur->kidx = 0;
3590   cur->vidx = 0;
3591   return tcbdbcuradjust(cur, true);
3592 }
3593 
3594 
3595 /* Move a cursor object to the last record.
3596    `cur' specifies the cursor object.
3597    If successful, the return value is true, else, it is false. */
tcbdbcurlastimpl(BDBCUR * cur)3598 static bool tcbdbcurlastimpl(BDBCUR *cur){
3599   assert(cur);
3600   TCBDB *bdb = cur->bdb;
3601   cur->clock = bdb->clock;
3602   cur->id = bdb->last;
3603   cur->kidx = INT_MAX;
3604   cur->vidx = INT_MAX;
3605   return tcbdbcuradjust(cur, false);
3606 }
3607 
3608 
3609 /* Move a cursor object to around records corresponding a key.
3610    `cur' specifies the cursor object.
3611    `kbuf' specifies the pointer to the region of the key.
3612    `ksiz' specifies the size of the region of the key.
3613    `forward' specifies whether the cursor is to be the front of records.
3614    If successful, the return value is true, else, it is false. */
tcbdbcurjumpimpl(BDBCUR * cur,const char * kbuf,int ksiz,bool forward)3615 static bool tcbdbcurjumpimpl(BDBCUR *cur, const char *kbuf, int ksiz, bool forward){
3616   assert(cur && kbuf && ksiz >= 0);
3617   TCBDB *bdb = cur->bdb;
3618   cur->clock = bdb->clock;
3619   uint64_t pid = tcbdbsearchleaf(bdb, kbuf, ksiz);
3620   if(pid < 1){
3621     cur->id = 0;
3622     cur->kidx = 0;
3623     cur->vidx = 0;
3624     return false;
3625   }
3626   BDBLEAF *leaf = tcbdbleafload(bdb, pid);
3627   if(!leaf){
3628     cur->id = 0;
3629     cur->kidx = 0;
3630     cur->vidx = 0;
3631     return false;
3632   }
3633   if(leaf->dead || TCPTRLISTNUM(leaf->recs) < 1){
3634     cur->id = pid;
3635     cur->kidx = 0;
3636     cur->vidx = 0;
3637     return forward ? tcbdbcurnextimpl(cur) : tcbdbcurprevimpl(cur);
3638   }
3639   int ri;
3640   BDBREC *rec = tcbdbsearchrec(bdb, leaf, kbuf, ksiz, &ri);
3641   if(rec){
3642     cur->id = pid;
3643     cur->kidx = ri;
3644     if(forward){
3645       cur->vidx = 0;
3646     } else {
3647       cur->vidx = rec->rest ? TCLISTNUM(rec->rest) : 0;
3648     }
3649     return true;
3650   }
3651   cur->id = leaf->id;
3652   if(ri > 0 && ri >= TCPTRLISTNUM(leaf->recs)) ri = TCPTRLISTNUM(leaf->recs) - 1;
3653   cur->kidx = ri;
3654   rec = TCPTRLISTVAL(leaf->recs, ri);
3655   char *dbuf = (char *)rec + sizeof(*rec);
3656   if(forward){
3657     int rv;
3658     if(bdb->cmp == tccmplexical){
3659       TCCMPLEXICAL(rv, kbuf, ksiz, dbuf, rec->ksiz);
3660     } else {
3661       rv = bdb->cmp(kbuf, ksiz, dbuf, rec->ksiz, bdb->cmpop);
3662     }
3663     if(rv < 0){
3664       cur->vidx = 0;
3665       return true;
3666     }
3667     cur->vidx = rec->rest ? TCLISTNUM(rec->rest) : 0;
3668     return tcbdbcurnextimpl(cur);
3669   }
3670   int rv;
3671   if(bdb->cmp == tccmplexical){
3672     TCCMPLEXICAL(rv, kbuf, ksiz, dbuf, rec->ksiz);
3673   } else {
3674     rv = bdb->cmp(kbuf, ksiz, dbuf, rec->ksiz, bdb->cmpop);
3675   }
3676   if(rv > 0){
3677     cur->vidx = rec->rest ? TCLISTNUM(rec->rest) : 0;
3678     return true;
3679   }
3680   cur->vidx = 0;
3681   return tcbdbcurprevimpl(cur);
3682 }
3683 
3684 
3685 /* Adjust a cursor object forward to the suitable record.
3686    `cur' specifies the cursor object.
3687    `forward' specifies the direction is forward or not.
3688    If successful, the return value is true, else, it is false. */
tcbdbcuradjust(BDBCUR * cur,bool forward)3689 static bool tcbdbcuradjust(BDBCUR *cur, bool forward){
3690   assert(cur);
3691   TCBDB *bdb = cur->bdb;
3692   if(cur->clock != bdb->clock){
3693     if(!tcbdbleafcheck(bdb, cur->id)){
3694       tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3695       cur->id = 0;
3696       cur->kidx = 0;
3697       cur->vidx = 0;
3698       return false;
3699     }
3700     cur->clock = bdb->clock;
3701   }
3702   while(true){
3703     if(cur->id < 1){
3704       tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3705       cur->id = 0;
3706       cur->kidx = 0;
3707       cur->vidx = 0;
3708       return false;
3709     }
3710     BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
3711     if(!leaf) return false;
3712     TCPTRLIST *recs = leaf->recs;
3713     int knum = TCPTRLISTNUM(recs);
3714     if(leaf->dead){
3715       if(forward){
3716         cur->id = leaf->next;
3717         cur->kidx = 0;
3718         cur->vidx = 0;
3719       } else {
3720         cur->id = leaf->prev;
3721         cur->kidx = INT_MAX;
3722         cur->vidx = INT_MAX;
3723       }
3724     } else if(cur->kidx < 0){
3725       if(forward){
3726         cur->kidx = 0;
3727         cur->vidx = 0;
3728       } else {
3729         cur->id = leaf->prev;
3730         cur->kidx = INT_MAX;
3731         cur->vidx = INT_MAX;
3732       }
3733     } else if(cur->kidx >= knum){
3734       if(forward){
3735         cur->id = leaf->next;
3736         cur->kidx = 0;
3737         cur->vidx = 0;
3738       } else {
3739         cur->kidx = knum - 1;
3740         cur->vidx = INT_MAX;
3741       }
3742     } else {
3743       BDBREC *rec = TCPTRLISTVAL(recs, cur->kidx);
3744       int vnum = rec->rest ? TCLISTNUM(rec->rest) + 1 : 1;
3745       if(cur->vidx < 0){
3746         if(forward){
3747           cur->vidx = 0;
3748         } else {
3749           cur->kidx--;
3750           cur->vidx = INT_MAX;
3751         }
3752       } else if(cur->vidx >= vnum){
3753         if(forward){
3754           cur->kidx++;
3755           cur->vidx = 0;
3756           if(cur->kidx >= knum){
3757             cur->id = leaf->next;
3758             cur->kidx = 0;
3759             cur->vidx = 0;
3760           } else {
3761             break;
3762           }
3763         } else {
3764           cur->vidx = vnum - 1;
3765           if(cur->vidx >= 0) break;
3766         }
3767       } else {
3768         break;
3769       }
3770     }
3771   }
3772   return true;
3773 }
3774 
3775 
3776 /* Move a cursor object to the previous record.
3777    `cur' specifies the cursor object.
3778    If successful, the return value is true, else, it is false. */
tcbdbcurprevimpl(BDBCUR * cur)3779 static bool tcbdbcurprevimpl(BDBCUR *cur){
3780   assert(cur);
3781   cur->vidx--;
3782   return tcbdbcuradjust(cur, false);
3783 }
3784 
3785 
3786 /* Move a cursor object to the next record.
3787    `cur' specifies the cursor object.
3788    If successful, the return value is true, else, it is false. */
tcbdbcurnextimpl(BDBCUR * cur)3789 static bool tcbdbcurnextimpl(BDBCUR *cur){
3790   assert(cur);
3791   cur->vidx++;
3792   return tcbdbcuradjust(cur, true);
3793 }
3794 
3795 
3796 /* Insert a record around a cursor object.
3797    `cur' specifies the cursor object.
3798    `vbuf' specifies the pointer to the region of the value.
3799    `vsiz' specifies the size of the region of the value.
3800    `cpmode' specifies detail adjustment.
3801    If successful, the return value is true, else, it is false. */
tcbdbcurputimpl(BDBCUR * cur,const char * vbuf,int vsiz,int cpmode)3802 static bool tcbdbcurputimpl(BDBCUR *cur, const char *vbuf, int vsiz, int cpmode){
3803   assert(cur && vbuf && vsiz >= 0);
3804   TCBDB *bdb = cur->bdb;
3805   if(cur->clock != bdb->clock){
3806     if(!tcbdbleafcheck(bdb, cur->id)){
3807       tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3808       cur->id = 0;
3809       cur->kidx = 0;
3810       cur->vidx = 0;
3811       return false;
3812     }
3813     cur->clock = bdb->clock;
3814   }
3815   BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
3816   if(!leaf) return false;
3817   TCPTRLIST *recs = leaf->recs;
3818   if(cur->kidx >= TCPTRLISTNUM(recs)){
3819     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3820     return false;
3821   }
3822   BDBREC *rec = TCPTRLISTVAL(recs, cur->kidx);
3823   int vnum = rec->rest ? TCLISTNUM(rec->rest) + 1 : 1;
3824   if(cur->vidx >= vnum){
3825     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3826     return false;
3827   }
3828   char *dbuf = (char *)rec + sizeof(*rec);
3829   int psiz = TCALIGNPAD(rec->ksiz);
3830   BDBREC *orec = rec;
3831   switch(cpmode){
3832     case BDBCPCURRENT:
3833       if(cur->vidx < 1){
3834         leaf->size += vsiz - rec->vsiz;
3835         if(vsiz > rec->vsiz){
3836           TCREALLOC(rec, rec, sizeof(*rec) + rec->ksiz + psiz + vsiz + 1);
3837           if(rec != orec){
3838             tcptrlistover(recs, cur->kidx, rec);
3839             dbuf = (char *)rec + sizeof(*rec);
3840           }
3841         }
3842         memcpy(dbuf + rec->ksiz + psiz, vbuf, vsiz);
3843         dbuf[rec->ksiz+psiz+vsiz] = '\0';
3844         rec->vsiz = vsiz;
3845       } else {
3846         leaf->size += vsiz - TCLISTVALSIZ(rec->rest, cur->vidx - 1);
3847         tclistover(rec->rest, cur->vidx - 1, vbuf, vsiz);
3848       }
3849       break;
3850     case BDBCPBEFORE:
3851       leaf->size += vsiz;
3852       if(cur->vidx < 1){
3853         if(!rec->rest) rec->rest = tclistnew2(1);
3854         tclistunshift(rec->rest, dbuf + rec->ksiz + psiz, rec->vsiz);
3855         if(vsiz > rec->vsiz){
3856           TCREALLOC(rec, rec, sizeof(*rec) + rec->ksiz + psiz + vsiz + 1);
3857           if(rec != orec){
3858             tcptrlistover(recs, cur->kidx, rec);
3859             dbuf = (char *)rec + sizeof(*rec);
3860           }
3861         }
3862         memcpy(dbuf + rec->ksiz + psiz, vbuf, vsiz);
3863         dbuf[rec->ksiz+psiz+vsiz] = '\0';
3864         rec->vsiz = vsiz;
3865       } else {
3866         TCLISTINSERT(rec->rest, cur->vidx - 1, vbuf, vsiz);
3867       }
3868       bdb->rnum++;
3869       break;
3870     case BDBCPAFTER:
3871       leaf->size += vsiz;
3872       if(!rec->rest) rec->rest = tclistnew2(1);
3873       TCLISTINSERT(rec->rest, cur->vidx, vbuf, vsiz);
3874       cur->vidx++;
3875       bdb->rnum++;
3876       break;
3877   }
3878   leaf->dirty = true;
3879   return true;
3880 }
3881 
3882 
3883 /* Delete the record where a cursor object is.
3884    `cur' specifies the cursor object.
3885    If successful, the return value is true, else, it is false. */
tcbdbcuroutimpl(BDBCUR * cur)3886 static bool tcbdbcuroutimpl(BDBCUR *cur){
3887   assert(cur);
3888   TCBDB *bdb = cur->bdb;
3889   if(cur->clock != bdb->clock){
3890     if(!tcbdbleafcheck(bdb, cur->id)){
3891       tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3892       cur->id = 0;
3893       cur->kidx = 0;
3894       cur->vidx = 0;
3895       return false;
3896     }
3897     cur->clock = bdb->clock;
3898   }
3899   BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
3900   if(!leaf) return false;
3901   TCPTRLIST *recs = leaf->recs;
3902   if(cur->kidx >= TCPTRLISTNUM(recs)){
3903     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3904     return false;
3905   }
3906   BDBREC *rec = TCPTRLISTVAL(recs, cur->kidx);
3907   char *dbuf = (char *)rec + sizeof(*rec);
3908   int vnum = rec->rest ? TCLISTNUM(rec->rest) + 1 : 1;
3909   if(cur->vidx >= vnum){
3910     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3911     return false;
3912   }
3913   if(rec->rest){
3914     if(cur->vidx < 1){
3915       leaf->size -= rec->vsiz;
3916       int vsiz;
3917       char *vbuf = tclistshift(rec->rest, &vsiz);
3918       int psiz = TCALIGNPAD(rec->ksiz);
3919       if(vsiz > rec->vsiz){
3920         BDBREC *orec = rec;
3921         TCREALLOC(rec, rec, sizeof(*rec) + rec->ksiz + psiz + vsiz + 1);
3922         if(rec != orec){
3923           tcptrlistover(leaf->recs, cur->kidx, rec);
3924           dbuf = (char *)rec + sizeof(*rec);
3925         }
3926       }
3927       memcpy(dbuf + rec->ksiz + psiz, vbuf, vsiz);
3928       dbuf[rec->ksiz+psiz+vsiz] = '\0';
3929       rec->vsiz = vsiz;
3930       TCFREE(vbuf);
3931     } else {
3932       int vsiz;
3933       char *vbuf = tclistremove(rec->rest, cur->vidx - 1, &vsiz);
3934       leaf->size -= vsiz;
3935       TCFREE(vbuf);
3936     }
3937     if(TCLISTNUM(rec->rest) < 1){
3938       tclistdel(rec->rest);
3939       rec->rest = NULL;
3940     }
3941   } else {
3942     leaf->size -= rec->ksiz + rec->vsiz;
3943     if(TCPTRLISTNUM(recs) < 2){
3944       uint64_t pid = tcbdbsearchleaf(bdb, dbuf, rec->ksiz);
3945       if(pid < 1) return false;
3946       if(bdb->hnum > 0){
3947         if(!(leaf = tcbdbleafload(bdb, pid))) return false;
3948         if(!tcbdbleafkill(bdb, leaf)) return false;
3949         if(leaf->next != 0){
3950           cur->id = leaf->next;
3951           cur->kidx = 0;
3952           cur->vidx = 0;
3953           cur->clock = bdb->clock;
3954         }
3955       }
3956     }
3957     TCFREE(tcptrlistremove(leaf->recs, cur->kidx));
3958   }
3959   bdb->rnum--;
3960   leaf->dirty = true;
3961   return tcbdbcuradjust(cur, true) || tchdbecode(bdb->hdb) == TCENOREC;
3962 }
3963 
3964 
3965 /* Get the key and the value of the current record of the cursor object.
3966    `cur' specifies the cursor object.
3967    `kbp' specifies the pointer to the variable into which the pointer to the region of the key is
3968    assgined.
3969    `ksp' specifies the pointer to the variable into which the size of the key region is assigned.
3970    `vbp' specifies the pointer to the variable into which the pointer to the region of the value
3971    is assgined.
3972    `vsp' specifies the pointer to the variable into which the size of the value region is
3973    assigned. */
tcbdbcurrecimpl(BDBCUR * cur,const char ** kbp,int * ksp,const char ** vbp,int * vsp)3974 static bool tcbdbcurrecimpl(BDBCUR *cur, const char **kbp, int *ksp, const char **vbp, int *vsp){
3975   assert(cur && kbp && ksp && vbp && vsp);
3976   TCBDB *bdb = cur->bdb;
3977   if(cur->clock != bdb->clock){
3978     if(!tcbdbleafcheck(bdb, cur->id)){
3979       tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3980       cur->id = 0;
3981       cur->kidx = 0;
3982       cur->vidx = 0;
3983       return false;
3984     }
3985     cur->clock = bdb->clock;
3986   }
3987   BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
3988   if(!leaf) return false;
3989   TCPTRLIST *recs = leaf->recs;
3990   if(cur->kidx >= TCPTRLISTNUM(recs)){
3991     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3992     return false;
3993   }
3994   BDBREC *rec = TCPTRLISTVAL(recs, cur->kidx);
3995   char *dbuf = (char *)rec + sizeof(*rec);
3996   int vnum = rec->rest ? TCLISTNUM(rec->rest) + 1 : 1;
3997   if(cur->vidx >= vnum){
3998     tcbdbsetecode(bdb, TCENOREC, __FILE__, __LINE__, __func__);
3999     return false;
4000   }
4001   *kbp = dbuf;
4002   *ksp = rec->ksiz;
4003   if(cur->vidx > 0){
4004     *vbp = tclistval(rec->rest, cur->vidx - 1, vsp);
4005   } else {
4006     *vbp = dbuf + rec->ksiz + TCALIGNPAD(rec->ksiz);
4007     *vsp = rec->vsiz;
4008   }
4009   return true;
4010 }
4011 
4012 
4013 /* Process each record atomically of a B+ tree database object.
4014    `func' specifies the pointer to the iterator function called for each record.
4015    `op' specifies an arbitrary pointer to be given as a parameter of the iterator function.
4016    If successful, the return value is true, else, it is false. */
tcbdbforeachimpl(TCBDB * bdb,TCITER iter,void * op)4017 static bool tcbdbforeachimpl(TCBDB *bdb, TCITER iter, void *op){
4018   assert(bdb && iter);
4019   bool err = false;
4020   BDBCUR *cur = tcbdbcurnew(bdb);
4021   tcbdbcurfirstimpl(cur);
4022   const char *kbuf, *vbuf;
4023   int ksiz, vsiz;
4024   while(cur->id > 0){
4025     if(tcbdbcurrecimpl(cur, &kbuf, &ksiz, &vbuf, &vsiz)){
4026       if(!iter(kbuf, ksiz, vbuf, vsiz, op)) break;
4027       tcbdbcurnextimpl(cur);
4028       if(bdb->tran){
4029         if(cur->id > 0){
4030           BDBLEAF *leaf = tcbdbleafload(bdb, cur->id);
4031           if(!leaf){
4032             err = true;
4033             break;
4034           }
4035           if(!leaf->dirty && !tcbdbleafcacheout(bdb, leaf)){
4036             err = false;
4037             break;
4038           }
4039         }
4040       } else if(TCMAPRNUM(bdb->leafc) > bdb->lcnum && !tcbdbcacheadjust(bdb)){
4041         err = true;
4042         break;
4043       }
4044     } else {
4045       if(tchdbecode(bdb->hdb) != TCEINVALID && tchdbecode(bdb->hdb) != TCENOREC) err = true;
4046       break;
4047     }
4048   }
4049   tcbdbcurdel(cur);
4050   return !err;
4051 }
4052 
4053 
4054 
4055 /*************************************************************************************************
4056  * debugging functions
4057  *************************************************************************************************/
4058 
4059 
4060 /* Print meta data of the header into the debugging output.
4061    `bdb' specifies the B+ tree database object. */
tcbdbprintmeta(TCBDB * bdb)4062 void tcbdbprintmeta(TCBDB *bdb){
4063   assert(bdb);
4064   int dbgfd = tchdbdbgfd(bdb->hdb);
4065   if(dbgfd < 0) return;
4066   if(dbgfd == UINT16_MAX) dbgfd = 1;
4067   char buf[BDBPAGEBUFSIZ];
4068   char *wp = buf;
4069   wp += sprintf(wp, "META:");
4070   wp += sprintf(wp, " mmtx=%p", (void *)bdb->mmtx);
4071   wp += sprintf(wp, " cmtx=%p", (void *)bdb->cmtx);
4072   wp += sprintf(wp, " hdb=%p", (void *)bdb->hdb);
4073   wp += sprintf(wp, " opaque=%p", (void *)bdb->opaque);
4074   wp += sprintf(wp, " open=%d", bdb->open);
4075   wp += sprintf(wp, " wmode=%d", bdb->wmode);
4076   wp += sprintf(wp, " lmemb=%u", bdb->lmemb);
4077   wp += sprintf(wp, " nmemb=%u", bdb->nmemb);
4078   wp += sprintf(wp, " opts=%u", bdb->opts);
4079   wp += sprintf(wp, " root=%llx", (unsigned long long)bdb->root);
4080   wp += sprintf(wp, " first=%llx", (unsigned long long)bdb->first);
4081   wp += sprintf(wp, " last=%llx", (unsigned long long)bdb->last);
4082   wp += sprintf(wp, " lnum=%llu", (unsigned long long)bdb->lnum);
4083   wp += sprintf(wp, " nnum=%llu", (unsigned long long)bdb->nnum);
4084   wp += sprintf(wp, " rnum=%llu", (unsigned long long)bdb->rnum);
4085   wp += sprintf(wp, " leafc=%p", (void *)bdb->leafc);
4086   wp += sprintf(wp, " nodec=%p", (void *)bdb->nodec);
4087   wp += sprintf(wp, " cmp=%p", (void *)(intptr_t)bdb->cmp);
4088   wp += sprintf(wp, " cmpop=%p", (void *)bdb->cmpop);
4089   wp += sprintf(wp, " lcnum=%u", bdb->lcnum);
4090   wp += sprintf(wp, " ncnum=%u", bdb->ncnum);
4091   wp += sprintf(wp, " lsmax=%u", bdb->lsmax);
4092   wp += sprintf(wp, " lschk=%u", bdb->lschk);
4093   wp += sprintf(wp, " capnum=%llu", (unsigned long long)bdb->capnum);
4094   wp += sprintf(wp, " hist=%p", (void *)bdb->hist);
4095   wp += sprintf(wp, " hnum=%d", bdb->hnum);
4096   wp += sprintf(wp, " hleaf=%llu", (unsigned long long)bdb->hleaf);
4097   wp += sprintf(wp, " lleaf=%llu", (unsigned long long)bdb->lleaf);
4098   wp += sprintf(wp, " tran=%d", bdb->tran);
4099   wp += sprintf(wp, " rbopaque=%p", (void *)bdb->rbopaque);
4100   wp += sprintf(wp, " clock=%llu", (unsigned long long)bdb->clock);
4101   wp += sprintf(wp, " cnt_saveleaf=%lld", (long long)bdb->cnt_saveleaf);
4102   wp += sprintf(wp, " cnt_loadleaf=%lld", (long long)bdb->cnt_loadleaf);
4103   wp += sprintf(wp, " cnt_killleaf=%lld", (long long)bdb->cnt_killleaf);
4104   wp += sprintf(wp, " cnt_adjleafc=%lld", (long long)bdb->cnt_adjleafc);
4105   wp += sprintf(wp, " cnt_savenode=%lld", (long long)bdb->cnt_savenode);
4106   wp += sprintf(wp, " cnt_loadnode=%lld", (long long)bdb->cnt_loadnode);
4107   wp += sprintf(wp, " cnt_adjnodec=%lld", (long long)bdb->cnt_adjnodec);
4108   *(wp++) = '\n';
4109   tcwrite(dbgfd, buf, wp - buf);
4110 }
4111 
4112 
4113 /* Print records of a leaf object into the debugging output.
4114    `bdb' specifies the B+ tree database object.
4115    `leaf' specifies the leaf object. */
tcbdbprintleaf(TCBDB * bdb,BDBLEAF * leaf)4116 void tcbdbprintleaf(TCBDB *bdb, BDBLEAF *leaf){
4117   assert(bdb && leaf);
4118   int dbgfd = tchdbdbgfd(bdb->hdb);
4119   TCPTRLIST *recs = leaf->recs;
4120   if(dbgfd < 0) return;
4121   if(dbgfd == UINT16_MAX) dbgfd = 1;
4122   char buf[BDBPAGEBUFSIZ];
4123   char *wp = buf;
4124   wp += sprintf(wp, "LEAF:");
4125   wp += sprintf(wp, " id:%llx", (unsigned long long)leaf->id);
4126   wp += sprintf(wp, " size:%u", leaf->size);
4127   wp += sprintf(wp, " prev:%llx", (unsigned long long)leaf->prev);
4128   wp += sprintf(wp, " next:%llx", (unsigned long long)leaf->next);
4129   wp += sprintf(wp, " dirty:%d", leaf->dirty);
4130   wp += sprintf(wp, " dead:%d", leaf->dead);
4131   wp += sprintf(wp, " rnum:%d", TCPTRLISTNUM(recs));
4132   *(wp++) = ' ';
4133   for(int i = 0; i < TCPTRLISTNUM(recs); i++){
4134     tcwrite(dbgfd, buf, wp - buf);
4135     wp = buf;
4136     BDBREC *rec = TCPTRLISTVAL(recs, i);
4137     char *dbuf = (char *)rec + sizeof(*rec);
4138     wp += sprintf(wp, " [%s:%s]", dbuf, dbuf + rec->ksiz + TCALIGNPAD(rec->ksiz));
4139     TCLIST *rest = rec->rest;
4140     if(rest){
4141       for(int j = 0; j < TCLISTNUM(rest); j++){
4142         wp += sprintf(wp, ":%s", (char *)TCLISTVALPTR(rest, j));
4143       }
4144     }
4145   }
4146   *(wp++) = '\n';
4147   tcwrite(dbgfd, buf, wp - buf);
4148 }
4149 
4150 
4151 /* Print indices of a node object into the debugging output.
4152    `bdb' specifies the B+ tree database object.
4153    `node' specifies the node object. */
tcbdbprintnode(TCBDB * bdb,BDBNODE * node)4154 void tcbdbprintnode(TCBDB *bdb, BDBNODE *node){
4155   assert(bdb && node);
4156   int dbgfd = tchdbdbgfd(bdb->hdb);
4157   TCPTRLIST *idxs = node->idxs;
4158   if(dbgfd < 0) return;
4159   if(dbgfd == UINT16_MAX) dbgfd = 1;
4160   char buf[BDBPAGEBUFSIZ];
4161   char *wp = buf;
4162   wp += sprintf(wp, "NODE:");
4163   wp += sprintf(wp, " id:%llx", (unsigned long long)node->id);
4164   wp += sprintf(wp, " heir:%llx", (unsigned long long)node->heir);
4165   wp += sprintf(wp, " dirty:%d", node->dirty);
4166   wp += sprintf(wp, " dead:%d", node->dead);
4167   wp += sprintf(wp, " rnum:%d", TCPTRLISTNUM(idxs));
4168   *(wp++) = ' ';
4169   for(int i = 0; i < TCPTRLISTNUM(idxs); i++){
4170     tcwrite(dbgfd, buf, wp - buf);
4171     wp = buf;
4172     BDBIDX *idx = TCPTRLISTVAL(idxs, i);
4173     char *ebuf = (char *)idx + sizeof(*idx);
4174     wp += sprintf(wp, " [%llx:%s]", (unsigned long long)idx->pid, ebuf);
4175   }
4176   *(wp++) = '\n';
4177   tcwrite(dbgfd, buf, wp - buf);
4178 }
4179 
4180 
4181 
4182 // END OF FILE
4183