1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #define BTREE_KEY2ID 1
28 
29 #include <kdb/extern.h>
30 #include <kdb/btree.h>
31 #include <kfs/file.h>
32 #include <kfs/pagefile.h>
33 #include <klib/refcount.h>
34 #include <klib/btree.h>
35 #include <klib/rc.h>
36 #include <klib/text.h>
37 #include <sysalloc.h>
38 
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42 
43 /*--------------------------------------------------------------------------
44  * KBTree
45  *  this implementation is an extremely simplified structure
46  *  meant to provide the ability to create an index for temporary use
47  */
48 #define eByteOrderTag 0x05031988
49 #define eByteOrderReverse 0x88190305
50 
51 
52 /* v3 does not store values, but stores keys in node pages */
53 typedef struct KBTreeHdr_v3 KBTreeHdr_v3;
54 struct KBTreeHdr_v3
55 {
56     /* last entry id */
57     uint32_t id_seq;
58 
59     /* key min/max */
60     uint16_t key_min, key_max;
61 
62     /* type [ 0 ] is type
63        type [ 1 ] is non-zero if comparison function was used
64        rest are for alignment */
65     uint8_t type [ 4 ];
66 
67     /* tree root */
68     uint32_t root;
69 
70     /* next to last */
71     uint32_t version;
72 
73     /* last */
74     uint32_t endian;
75 };
76 
77 struct Pager {
78     KPageFile *pager;
79     rc_t rc;
80 };
81 
PagerAlloc(Pager * self,uint32_t * newid)82 static void const *PagerAlloc(Pager *self, uint32_t *newid)
83 {
84     KPage *page = NULL;
85     self->rc = KPageFileAlloc(self->pager, &page, newid);
86     return (void const *)page;
87 }
88 
PagerUse(Pager * self,uint32_t pageid)89 static void const *PagerUse(Pager *self, uint32_t pageid)
90 {
91     KPage *page = NULL;
92     self->rc = KPageFileGet(self->pager, &page, pageid);
93     return (void const *)page;
94 }
95 
PagerAccess(Pager * self,void const * page)96 static void const *PagerAccess(Pager *self, void const *page)
97 {
98     void const *mem = NULL;
99     self->rc = KPageAccessRead((KPage const *)page, &mem, NULL);
100     return mem;
101 }
102 
PagerUpdate(Pager * self,void const * page)103 static void *PagerUpdate(Pager *self, void const *page)
104 {
105     void *mem = NULL;
106     self->rc = KPageAccessUpdate((KPage *)page, &mem, NULL);
107     return mem;
108 }
109 
PagerUnuse(Pager * self,void const * page)110 static void PagerUnuse(Pager *self, void const *page)
111 {
112     KPageRelease((KPage const *)page);
113 }
114 
115 static Pager_vt const KPageFile_vt = {
116     PagerAlloc,
117     PagerUse,
118     PagerAccess,
119     PagerUpdate,
120     PagerUnuse
121 };
122 
123 typedef struct KBTreeHdr_v3 KBTreeHdr;
124 
125 static
KBTreeReadHeader(KBTreeHdr * hdr,const KFile * f)126 rc_t KBTreeReadHeader ( KBTreeHdr *hdr, const KFile *f )
127 {
128     uint64_t eof;
129     rc_t rc = KFileSize ( f, & eof );
130     if ( rc == 0 )
131     {
132         size_t num_read;
133 
134         /* this would be an empty file */
135         if ( eof == 0 )
136         {
137             memset ( hdr, 0, sizeof * hdr );
138             return RC ( rcDB, rcTree, rcConstructing, rcData, rcNotFound );
139         }
140 
141         if ( eof < sizeof * hdr )
142             return RC ( rcDB, rcTree, rcConstructing, rcData, rcCorrupt );
143 
144         rc = KFileReadAll ( f, eof - sizeof * hdr, hdr, sizeof * hdr, & num_read );
145         if ( rc == 0 && num_read != sizeof * hdr )
146             rc = RC ( rcDB, rcTree, rcConstructing, rcData, rcInsufficient );
147         if ( rc == 0 )
148         {
149             if ( hdr -> endian != eByteOrderTag )
150             {
151                 if ( hdr -> endian == eByteOrderReverse )
152                     return RC ( rcDB, rcTree, rcConstructing, rcByteOrder, rcIncorrect );
153                 return RC ( rcDB, rcTree, rcConstructing, rcData, rcCorrupt );
154             }
155             if ( hdr -> version != 2 )
156                 return RC ( rcDB, rcTree, rcConstructing, rcHeader, rcBadVersion );
157         }
158     }
159     return rc;
160 }
161 
162 struct KBTree
163 {
164     /* file itself */
165     KFile *file;
166 
167     /* page cache layered on top */
168     Pager pgfile;
169 
170     /* "header" is stored at end */
171     KBTreeHdr hdr;
172 
173     KRefcount refcount;
174 
175     bool read_only;
176 };
177 
178 /* Whack
179  */
180 static
KBTreeWhack(KBTree * self)181 rc_t KBTreeWhack ( KBTree *self )
182 {
183     if ( self -> read_only || self -> file == NULL )
184         KPageFileRelease ( self -> pgfile.pager );
185     else
186     {
187         size_t num_writ;
188 
189         /* request page file size */
190         uint64_t eof;
191         rc_t rc = KPageFileSize ( self -> pgfile.pager, & eof, NULL, NULL );
192         if ( rc != 0 )
193             return rc;
194 
195         /* drop the page file and its cache */
196         KPageFileRelease ( self -> pgfile.pager );
197 
198         /* write header to tail */
199         rc = KFileWrite ( self -> file, eof, & self -> hdr, sizeof self -> hdr, & num_writ );
200         if ( rc == 0 && num_writ != sizeof self -> hdr )
201             rc = RC ( rcDB, rcTree, rcPersisting, rcTransfer, rcIncomplete );
202         if ( rc == 0 )
203             rc = KFileSetSize ( self -> file, eof + sizeof self -> hdr );
204         if ( rc != 0 )
205         {
206             /* TBD - can issue a warning here */
207         }
208     }
209 
210     KFileRelease ( self -> file );
211     free ( self );
212     return 0;
213 }
214 
215 
216 /* MakeRead
217  * MakeUpdate
218  *  make a b-tree object backed by supplied KFile
219  *
220  *  "backing" [ IN ] - open file with read permissions.
221  *   NB - a reference will be attached to this file.
222  *
223  *  "climit" [ IN ] - cache limit in bytes. the internal cache will
224  *   retain UP TO ( but not exceeding ) the limit specified. a value
225  *   of 0 ( zero ) will disable caching.
226  *
227  *  "cmp" [ IN, NULL OKAY ] - optional comparison callback function for opaque keys.
228  *   specific key types will use internal comparison functions. for opaque keys, a
229  *   NULL function pointer will cause ordering by size and binary comparison.
230  */
KBTreeMakeRead_1(const KBTree ** btp,const KFile * backing,size_t climit)231 LIB_EXPORT rc_t CC KBTreeMakeRead_1 ( const KBTree **btp,
232     const KFile *backing, size_t climit )
233 {
234     rc_t rc;
235 
236     if ( btp == NULL )
237         rc = RC ( rcDB, rcTree, rcConstructing, rcParam, rcNull );
238     else
239     {
240         if ( backing == NULL )
241             rc = RC ( rcDB, rcTree, rcConstructing, rcFile, rcNull );
242         else
243         {
244             KBTree *bt = malloc ( sizeof * bt );
245             if ( bt == NULL )
246                 rc = RC ( rcDB, rcTree, rcConstructing, rcMemory, rcExhausted );
247             else
248             {
249                 rc = KBTreeReadHeader ( & bt -> hdr, backing );
250                 if ( rc == 0 )
251                 {
252                     rc = KFileAddRef ( backing );
253                     if ( rc == 0 )
254                     {
255                         /* create page file */
256                         rc = KPageFileMakeRead ( ( const KPageFile** ) & bt -> pgfile.pager, backing, climit );
257                         if ( rc == 0 )
258                         {
259                             /* ready to go */
260                             bt -> file = ( KFile* ) backing;
261                             KRefcountInit ( & bt -> refcount, 1, "KBTree", "make-read", "btree" );
262                             bt -> read_only = true;
263 
264                             * btp = bt;
265                             return 0;
266                         }
267 
268                         KFileRelease ( backing );
269                     }
270                 }
271 
272                 free ( bt );
273             }
274         }
275 
276         * btp = NULL;
277     }
278 
279     return rc;
280 }
281 
282 
283 /* MakeUpdate
284  *  make a b-tree object backed by supplied KFile
285  *
286  *  "backing" [ IN ] - open file with read & write permissions.
287  *   NB - a reference will be attached to this file.
288  *
289  *  "climit" [ IN ] - cache limit in bytes. the internal cache will
290  *   retain UP TO ( but not exceeding ) the limit specified. a value
291  *   of 0 ( zero ) will disable caching.
292  *
293  *  "write_through" [ IN ] - if true, causes flushing of modified page
294  *   after its value is released
295  *
296  *  "type" [ IN ] - describes the key type ( see above )
297  *
298  *  "key_chunk_size" [ IN ] - the "chunking" ( alignment ) factor for
299  *   storing keys, rounded up to the nearest power of 2.
300  *
301  *  "value_chunk_size" [ IN ] - chunking factor for values
302  *   ( see "key_chunk_size" )
303  *
304  *  "min_key_size" [ IN ] and "max_key_size" [ IN ] - specifies the allowed
305  *   opaque key sizes. min == max implies fixed size. ignored for well
306  *   known fixed size key types.
307  *
308  *  "min_value_size" [ IN ] and "max_value_size" [ IN ] - specifies the allowed
309  *   value sizes. min == max implies fixed size.
310  *
311  *  "cmp" [ IN ] - comparison callback function for opaque keys.
312  */
KBTreeMakeUpdate_1(KBTree ** btp,KFile * backing,size_t climit)313 LIB_EXPORT rc_t CC KBTreeMakeUpdate_1 ( KBTree **btp, KFile *backing,
314     size_t climit )
315 {
316     rc_t rc;
317 
318     if ( btp == NULL )
319         rc = RC ( rcDB, rcTree, rcConstructing, rcParam, rcNull );
320     else
321     {
322         {
323             KBTree *bt = calloc ( 1,sizeof * bt );
324             if ( bt == NULL )
325                 rc = RC ( rcDB, rcTree, rcConstructing, rcMemory, rcExhausted );
326             else
327             {
328                 if ( backing == NULL || ( rc = KBTreeReadHeader ( & bt -> hdr, backing )) == 0 || GetRCState ( rc ) == rcNotFound )
329                 {
330                     /* detect empty file */
331                     if ( bt -> hdr . version == 0 )
332                     {
333                         assert ( bt -> hdr . id_seq == 0 );
334                         bt -> hdr . type [ 0 ] = 0;
335                         bt -> hdr . type [ 1 ] = 0;
336                         bt -> hdr . key_min = 0;
337                         bt -> hdr . key_max = 0;
338                         bt -> hdr . version = 3;
339                         bt -> hdr . endian = eByteOrderTag;
340                         rc = 0;
341                     }
342                     else
343                     {
344                         /* check for parameter equivalence */
345                         if ( bt -> hdr . version < 3 )
346                             rc = RC ( rcDB, rcTree, rcConstructing, rcHeader, rcBadVersion );
347                     }
348 
349                     if ( rc == 0 )
350                     {
351                         if(backing) rc = KFileAddRef ( backing );
352                         if ( rc == 0 )
353                         {
354                             /* create page file */
355                             rc = KPageFileMakeUpdate ( & bt -> pgfile.pager, backing, climit, false );
356                             if ( rc == 0 )
357                             {
358                                 /* ready to go */
359                                 bt -> file = backing;
360                                 KRefcountInit ( & bt -> refcount, 1, "KBTree", "make-update", "btree" );
361                                 bt -> read_only = false;
362 
363                                 * btp = bt;
364                                 return 0;
365                             }
366 
367                             if(backing) KFileRelease ( backing );
368                         }
369                     }
370                 }
371 
372                 free ( bt );
373             }
374         }
375 
376         * btp = NULL;
377     }
378 
379     return rc;
380 }
381 
382 
383 /* AddRef
384  * Release
385  *  ignores NULL references
386  */
KBTreeAddRef(const KBTree * self)387 LIB_EXPORT rc_t CC KBTreeAddRef ( const KBTree *self )
388 {
389     if ( self != NULL ) switch ( KRefcountAdd ( & self -> refcount, "KBTree" ) )
390     {
391     case krefOkay:
392         break;
393     default:
394         return RC ( rcDB, rcTree, rcAttaching, rcConstraint, rcViolated );
395     }
396     return 0;
397 }
398 
KBTreeRelease(const KBTree * self)399 LIB_EXPORT rc_t CC KBTreeRelease ( const KBTree *self )
400 {
401     if ( self != NULL ) switch ( KRefcountDrop ( & self -> refcount, "KBTree" ) )
402     {
403     case krefOkay:
404         break;
405     case krefWhack:
406         return KBTreeWhack ( ( KBTree* ) self );
407     default:
408         return RC ( rcDB, rcTree, rcReleasing, rcConstraint, rcViolated );
409     }
410     return 0;
411 }
412 
413 
414 /* DropBacking
415  *  used immediately prior to releasing
416  *  prevents modified pages from being flushed to disk
417  *  renders object nearly useless
418  */
KBTreeDropBacking(KBTree * self)419 LIB_EXPORT rc_t CC KBTreeDropBacking ( KBTree *self )
420 {
421     rc_t rc;
422 
423     if ( self == NULL )
424         return RC ( rcDB, rcTree, rcDetaching, rcSelf, rcNull );
425 
426     rc = KPageFileDropBacking ( self -> pgfile.pager );
427     if ( rc == 0 )
428     {
429         rc = KFileRelease ( self -> file );
430         if ( rc == 0 )
431             self -> file = NULL;
432     }
433 
434     return rc;
435 }
436 
437 
438 /* Size
439  *  returns size in bytes of file and cache
440  *
441  *  "lsize" [ OUT, NULL OKAY ] - return parameter for logical size
442  *
443  *  "fsize" [ OUT, NULL OKAY ] - return parameter for file size
444  *
445  *  "csize" [ OUT, NULL OKAY ] - return parameter for cache size
446  */
KBTreeSize(const KBTree * self,uint64_t * lsize,uint64_t * fsize,size_t * csize)447 LIB_EXPORT rc_t CC KBTreeSize ( const KBTree *self,
448     uint64_t *lsize, uint64_t *fsize, size_t *csize )
449 {
450     size_t dummysz;
451     uint64_t dummy64;
452 
453     if ( self != NULL )
454         return KPageFileSize ( self -> pgfile.pager, lsize, fsize, csize );
455 
456     if ( lsize == NULL )
457         lsize = & dummy64;
458     if ( fsize == NULL )
459         fsize = & dummy64;
460     if ( csize == NULL )
461         csize = & dummysz;
462 
463     * lsize = 0;
464     * fsize = 0;
465     * csize = 0;
466 
467     return RC ( rcDB, rcTree, rcAccessing, rcSelf, rcNull );
468 }
469 
470 
471 /* Find
472  *  searches for a match
473  *
474  *  "val" [ OUT ] - return parameter for value found
475  *   accessed via KBTreeValueAccess* described above
476  *   must be balanced with a call to KBTreeValueWhack.
477  *
478  *  "key" [ IN ] and "key_size" [ IN ] - describes an
479  *   opaque key
480  */
KBTreeFind(const KBTree * self,uint64_t * id,const void * key,size_t key_size)481 LIB_EXPORT rc_t CC KBTreeFind ( const KBTree *self, uint64_t *id,
482     const void *key, size_t key_size )
483 {
484     rc_t rc;
485 
486     if ( id == NULL )
487         rc = RC ( rcDB, rcTree, rcSelecting, rcParam, rcNull );
488     else
489     {
490         if ( self == NULL )
491             rc = RC ( rcDB, rcTree, rcSelecting, rcSelf, rcNull );
492         else if ( key_size == 0 )
493             rc = RC ( rcDB, rcTree, rcSelecting, rcParam, rcEmpty );
494         else if ( key == NULL )
495             rc = RC ( rcDB, rcTree, rcSelecting, rcParam, rcNull );
496         else if ( self -> hdr . root == 0 )
497             rc = RC ( rcDB, rcTree, rcSelecting, rcItem, rcNotFound );
498         else
499         {
500             uint32_t id32 = 0;
501             rc = BTreeFind(self->hdr.root, (Pager *)&self->pgfile, &KPageFile_vt, &id32, key, key_size);
502             if (self->pgfile.rc)
503                 rc = self->pgfile.rc;
504             *id = id32;
505         }
506     }
507 
508     return rc;
509 }
510 
511 
512 /* Entry
513  *  searches for a match or creates a new entry
514  *
515  *  "val" [ OUT ] - return parameter for value found
516  *   accessed via KBTreeValueAccess* described above
517  *   must be balanced with a call to KBTreeValueWhack.
518  *
519  *  "was_inserted" [ OUT ] - if true, the returned value was the result of an
520  *   insertion and can be guaranteed to be all 0 bits. otherwise, the returned
521  *   value will be whatever was there previously.
522  *
523  *  "alloc_size" [ IN ] - the number of value bytes to allocate upon insertion,
524  *   i.e. if the key was not found. this value must agree with the limits
525  *   specified in Make ( see above ).
526  *
527  *  "key" [ IN ] and "key_size" [ IN ] - describes an
528  *   opaque key
529  */
KBTreeEntry(KBTree * self,uint64_t * id,bool * was_inserted,const void * key,size_t key_size)530 LIB_EXPORT rc_t CC KBTreeEntry ( KBTree *self, uint64_t *id,
531     bool *was_inserted, const void *key, size_t key_size )
532 {
533     rc_t rc;
534 
535     bool dummy;
536     if ( was_inserted == NULL )
537         was_inserted = & dummy;
538     * was_inserted = false;
539 
540     if ( id == NULL )
541         rc = RC ( rcDB, rcTree, rcUpdating, rcParam, rcNull );
542     else
543     {
544         if ( self == NULL )
545             rc = RC ( rcDB, rcTree, rcUpdating, rcSelf, rcNull );
546         else if ( key_size == 0 )
547             rc = RC ( rcDB, rcTree, rcUpdating, rcParam, rcEmpty );
548         else if ( key == NULL )
549             rc = RC ( rcDB, rcTree, rcUpdating, rcParam, rcNull );
550         else
551         {
552             uint32_t id32 = *id;
553             rc = BTreeEntry(&self->hdr.root, (Pager *)&self->pgfile, &KPageFile_vt, &id32, was_inserted, key, key_size);
554             if (self->pgfile.rc)
555                 rc = self->pgfile.rc;
556             *id = id32;
557         }
558     }
559 
560     return rc;
561 }
562 
563 
564 /* ForEach
565  *  executes a function on each tree element
566  *
567  *  "reverse" [ IN ] - if true, iterate in reverse order
568  *
569  *  "f" [ IN ] and "data" [ IN, OPAQUE ] - callback function
570  */
KBTreeForEach(const KBTree * self,bool reverse,void (CC * f)(const void * key,size_t key_size,uint32_t id,void * data),void * data)571 LIB_EXPORT rc_t CC KBTreeForEach ( const KBTree *self, bool reverse, void ( CC * f ) ( const void *key, size_t key_size, uint32_t id, void *data ), void *data )
572 {
573     if ( self == NULL )
574         return RC ( rcDB, rcTree, rcVisiting, rcSelf, rcNull );
575     else if ( f == NULL )
576         return RC ( rcDB, rcTree, rcVisiting, rcFunction, rcNull );
577     else
578         return BTreeForEach(self->hdr.root, (Pager *)&self->pgfile, &KPageFile_vt, reverse, f, data);
579 }
580