1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <kdb/extern.h>
28 #include "index-priv.h"
29 #include "trieidx-priv.h"
30 #include <klib/ptrie.h>
31 #include <klib/text.h>
32 #include <kfs/directory.h>
33 #include <kfs/file.h>
34 #include <kfs/mmap.h>
35 #include <klib/pack.h>
36 #include <klib/rc.h>
37 #include <sysalloc.h>
38 
39 #include <stdlib.h>
40 #include <limits.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <byteswap.h>
45 #include <assert.h>
46 
47 #define KTRIE_ZEROS_KPTRIE 1
48 
49 /*--------------------------------------------------------------------------
50  * KPTrieIndex_v2
51  *  persisted keymap
52  */
53 
54 
55 /* Init
56  *  opens and initializes persisted keymap structure
57  */
58 static
KPTrieIndexInitID2Ord(KPTrieIndex_v2 * self,size_t in_size,int variant,int span_bits,int elem_bits)59 rc_t KPTrieIndexInitID2Ord ( KPTrieIndex_v2 *self, size_t in_size,
60     int variant, int span_bits, int elem_bits )
61 {
62     rc_t rc;
63     union
64     {
65         uint8_t *v8;
66         uint16_t *v16;
67         uint32_t *v32;
68         uint64_t *v64;
69     } dst;
70     size_t elem_bytes = elem_bits >> 3;
71     uint32_t scount = self -> count - 1;
72 
73     assert ( self -> count != 0 );
74 
75     if ( span_bits * scount > in_size * 8 )
76         return RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
77 
78     dst . v8 = malloc ( self -> count * elem_bytes );
79     if ( dst . v8 == NULL )
80         rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
81     else
82     {
83         size_t usize;
84         rc = Unpack ( span_bits, elem_bits,
85             & self -> ord2node [ self -> count ], 0,
86             span_bits * scount, NULL, & dst . v8 [ elem_bytes ],
87             scount * elem_bytes, & usize );
88         if ( rc == 0 )
89         {
90             uint32_t i;
91 
92             self -> id2ord . v8 = dst . v8;
93             self -> variant = variant;
94 
95             /* integrate to simple translation */
96             switch ( variant )
97             {
98             case 1:
99                 dst . v8 [ 0 ] = 0;
100                 for ( i = 0; i < scount; ++ i )
101                     dst . v8 [ i + 1 ] += dst . v8 [ i ];
102                 break;
103             case 2:
104                 dst . v16 [ 0 ] = 0;
105                 for ( i = 0; i < scount; ++ i )
106                     dst . v16 [ i + 1 ] += dst . v16 [ i ];
107                 break;
108             case 3:
109                 dst . v32 [ 0 ] = 0;
110                 for ( i = 0; i < scount; ++ i )
111                     dst . v32 [ i + 1 ] += dst . v32 [ i ];
112                 break;
113             case 4:
114                 dst . v64 [ 0 ] = 0;
115                 for ( i = 0; i < scount; ++ i )
116                     dst . v64 [ i + 1 ] += dst . v64 [ i ];
117                 break;
118             }
119 
120             return 0;
121         }
122 
123         free ( dst . v8 );
124     }
125 
126     return rc;
127 }
128 
129 static
KPTrieIndexExtractV1Range_v2(PTNode * n,void * data)130 void CC KPTrieIndexExtractV1Range_v2 ( PTNode *n, void *data )
131 {
132     KPTrieIndex_v2 *self = data;
133 
134     /* capture node id */
135     uint32_t id;
136     assert ( n -> data . size == sizeof id );
137     memmove ( & id, n -> data . addr, sizeof id );
138 
139     /* perform min/max */
140     if ( self -> count == 0 )
141         self -> first = self -> last = id;
142     else if ( id < self -> first )
143         self -> first = id;
144     else if ( id > self -> last )
145         self -> last = id;
146 
147     ++ self -> count;
148 }
149 
150 static
KPTrieIndexInitFromV1_v2(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap)151 rc_t KPTrieIndexInitFromV1_v2 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
152 {
153     KPTrieIndex_v1 v1;
154     rc_t rc = KPTrieIndexInit_v1 ( & v1, mm, byteswap );
155     if ( rc == 0 )
156     {
157         uint32_t *ord2node;
158         uint32_t total_id, test_id;
159         uint32_t i, id, id_bits, num_ids;
160 
161         /* assume preservation of persisted projection index */
162         self -> byteswap = byteswap;
163 
164         /* hopefully we got a projection index */
165         if ( v1 . id2node == NULL )
166         {
167 #if ! KTRIE_ZEROS_KPTRIE
168             self -> count = 0;
169 #endif
170             /* need to derive first and last from trie */
171             PTrieForEach ( v1 . key2id, KPTrieIndexExtractV1Range_v2, self );
172             if ( self -> count == 0 )
173                 KPTrieIndexWhack_v1 ( & v1 );
174             else
175             {
176                 /* take trie as-is */
177                 self -> key2id = v1 . key2id;
178                 self -> maxid = self -> last;
179             }
180 
181             /* note that this assumes a span of 1 for
182                each id. there are no known uses of v1 without
183                a projection index, so this is unlikely to be important */
184 
185             return 0;
186         }
187 
188         /* take id range */
189         self -> first = v1 . first;
190         self -> last = self -> maxid = v1 . last;
191 
192         /* count comes from trie as always */
193         self -> count = PTrieCount ( v1 . key2id );
194 
195         /* detect empty trie */
196         if ( self -> count == 0 || self -> first > self -> last )
197         {
198             self -> first = self -> last = self -> maxid = 0;
199             return 0;
200         }
201 
202         /* take trie as-is */
203         self -> key2id = v1 . key2id;
204 
205         /* now decide whether to use 1-1 or sparse projection
206            if the number of slots is less than twice the number of nodes,
207            it is more efficient to store the nodes as a linear array,
208            and represent missing ids as nulls.
209          */
210         if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
211         {
212             /* take the old projection array as-is,
213                treating NULL node ids as holes */
214             self -> ord2node = v1 . id2node;
215             return 0;
216         }
217 
218         /* need to create a new projection index */
219         self -> byteswap = false;
220 
221         /* convert to sparse
222            calculate id bits - notice that
223            test_id gets left shifted so that
224            the loop is guaranteed to exit */
225         num_ids = ( uint32_t ) ( self -> last - self -> first + 1 );
226         for ( total_id = num_ids >> 1, id_bits = 1, test_id = 1;
227             test_id <= total_id;
228             ++ id_bits, test_id <<= 1 )
229             ( void ) 0;
230 
231         /* determine variant */
232         if ( id_bits <= 8 )
233         {
234             /* allocate 4 bytes for new ord2node and 1 for id2ord */
235             uint8_t *id2ord = malloc ( self -> count * 5 );
236             if ( id2ord == NULL )
237                 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
238             else
239             {
240                 ord2node = ( uint32_t* ) & id2ord [ self -> count ];
241                 self -> ord2node = ord2node;
242                 self -> id2ord . v8 = id2ord;
243                 self -> variant = 1;
244 
245                 /* walk across v1 table, looking at each id */
246                 for ( i = id = 0; id < num_ids; ++ id )
247                 {
248                     /* detect non NULL node ids
249                        and pretend they represent a contiguous
250                        span with no holes in id space */
251                     if ( v1 . id2node [ id ] != 0 )
252                     {
253                         /* prevent overwriting */
254                         if ( i == self -> count )
255                         {
256                             rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
257                             break;
258                         }
259 
260                         /* record id and node for slot */
261                         id2ord [ i ] = ( uint8_t ) id;
262                         ord2node [ i ] = byteswap ? bswap_32 ( v1 . id2node [ id ] ) : v1 . id2node [ id ];
263                         ++ i;
264                     }
265                 }
266             }
267         }
268         else if ( id_bits <= 16 )
269         {
270             uint16_t *id2ord = malloc ( self -> count * 6 );
271             if ( id2ord == NULL )
272                 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
273             else
274             {
275                 ord2node = ( uint32_t* ) & id2ord [ self -> count ];
276                 self -> ord2node = ord2node;
277                 self -> id2ord . v16 = id2ord;
278                 self -> variant = 2;
279 
280                 for ( i = id = 0; id < num_ids; ++ id )
281                 {
282                     if ( v1 . id2node [ id ] != 0 )
283                     {
284                         if ( i == self -> count )
285                         {
286                             rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
287                             break;
288                         }
289 
290                         id2ord [ i ] = ( uint16_t ) id;
291                         ord2node [ i ] = byteswap ? bswap_32 ( v1 . id2node [ id ] ) : v1 . id2node [ id ];
292                         ++ i;
293                     }
294                 }
295             }
296         }
297         else
298         {
299             uint32_t *id2ord = malloc ( self -> count * 8 );
300             if ( id2ord == NULL )
301                 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
302             else
303             {
304                 ord2node = & id2ord [ self -> count ];
305                 self -> ord2node = ord2node;
306                 self -> id2ord . v32 = id2ord;
307                 self -> variant = 3;
308 
309                 for ( i = id = 0; id < num_ids; ++ id )
310                 {
311                     if ( v1 . id2node [ id ] != 0 )
312                     {
313                         if ( i == self -> count )
314                         {
315                             rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
316                             break;
317                         }
318 
319                         id2ord [ i ] = id;
320                         ord2node [ i ] = byteswap ? bswap_32 ( v1 . id2node [ id ] ) : v1 . id2node [ id ];
321                         ++ i;
322                     }
323                 }
324             }
325         }
326 
327         if ( rc == 0 )
328         {
329             if ( i == self -> count )
330                 return 0;
331 
332             rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
333         }
334 
335         KPTrieIndexWhack_v1 ( & v1 );
336     }
337 
338     return rc;
339 }
340 
KPTrieIndexInit_v2(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap)341 rc_t KPTrieIndexInit_v2 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
342 {
343     /* get size of map, assumed to be size of file */
344     size_t size;
345     rc_t rc = KMMapSize ( mm, & size );
346     if ( rc == 0 )
347     {
348         const KPTrieIndexHdr_v2 *hdr;
349 
350 #if ! KTRIE_ZEROS_KPTRIE
351         self -> mm = NULL;
352         self -> ord2node = NULL;
353         self -> id2ord . v32 = NULL;
354         self -> variant = 0;
355 #endif
356 
357         /* ignore empty file */
358         if ( size == 0 )
359         {
360 #if ! KTRIE_ZEROS_KPTRIE
361             self -> first = self -> last = self -> maxid = 0;
362             self -> key2id = NULL;
363             self -> count = 0;
364 #endif
365             return 0;
366         }
367 
368         /* have to have at least the base header */
369         if ( size < sizeof hdr -> dad )
370             return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
371 
372         rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
373         if ( rc == 0 )
374         {
375             uint16_t id_bits, span_bits;
376             /* recheck header size */
377             if ( size < sizeof * hdr )
378                 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
379 
380             if ( self -> byteswap )
381             {
382                 self -> first = bswap_64(hdr -> first);
383                 self -> last = self -> maxid = bswap_64(hdr -> last);
384                 id_bits = bswap_16(hdr -> id_bits);
385                 span_bits = bswap_16(hdr -> span_bits);
386             }
387             else
388             {
389                 self -> first = hdr -> first;
390                 self -> last = self -> maxid = hdr -> last;
391                 id_bits = hdr -> id_bits;
392                 span_bits = hdr -> span_bits;
393             }
394             self -> id_bits = ( uint8_t ) id_bits;
395             self -> span_bits = ( uint8_t ) span_bits;
396             self -> byteswap = byteswap;
397 
398             /* try to create the pttree */
399             rc = PTrieMakeOrig ( & self -> key2id,
400                 hdr + 1, size -= sizeof * hdr, byteswap );
401             if ( rc == 0 )
402             {
403                 size_t ptsize = PTrieSize ( self -> key2id );
404                 if ( ptsize <= size )
405                 {
406                     /* the count covers at least the number of trie nodes */
407                     self -> count = PTrieCount ( self -> key2id );
408 
409                     /* it could be stored without projection */
410                     if ( ptsize == size )
411                         return 0;
412 
413                     /* calculate remaining bytes */
414                     size -= ptsize;
415 
416                     /* there must be enough for an array of 4-byte node ids */
417                     if ( size >= ( ( size_t ) self -> count << 2 ) )
418                     {
419                         /* take the persisted array as-is */
420                         self -> ord2node = ( const uint32_t* )
421                             ( ( const char* ) ( hdr + 1 ) + ptsize );
422 
423                         /* read the count */
424                         if ( size >= 4 )
425                         {
426                             self -> count = * ( self -> ord2node ) ++;
427                             size -= 4;
428                             if ( byteswap )
429                                 self -> count = bswap_32 ( self -> count );
430                         }
431 
432                         /* determine strategy from id span and count */
433                         if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
434                         {
435                             /* must be contiguous */
436                             self -> count = ( uint32_t ) ( self -> last - self -> first + 1 );
437 
438                             /* size should be exactly this number of slots */
439                             if ( size == ( ( size_t ) self -> count << 2 ) )
440                                 return 0;
441 
442                             /* fall through to error return */
443                         }
444                         else if ( ( size == 4 && self -> count == 1 ) || size > ( ( size_t ) self -> count << 2 ) )
445                         {
446                             /* sparse */
447                             size -= ( size_t ) self -> count << 2;
448 
449                             /* unpack id map */
450                             if ( id_bits <= 8 )
451                                 rc = KPTrieIndexInitID2Ord ( self, size, 1, span_bits, 8 );
452                             else if ( id_bits <= 16 )
453                                 rc = KPTrieIndexInitID2Ord ( self, size, 2, span_bits, 16 );
454                             else if ( id_bits <= 32 )
455                                 rc = KPTrieIndexInitID2Ord ( self, size, 3, span_bits, 32 );
456                             else
457                                 rc = KPTrieIndexInitID2Ord ( self, size, 4, span_bits, 64 );
458 
459                             /* done */
460                             if ( rc == 0 )
461                                 return 0;
462 
463                             PTrieWhack ( self -> key2id ), self -> key2id = NULL;
464                             return rc;
465                         }
466                     }
467                 }
468 
469                 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
470                 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
471             }
472         }
473     }
474 
475     return rc;
476 }
477 
KPTrieIndexInit_v3_v4(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap,bool ptorig)478 rc_t KPTrieIndexInit_v3_v4 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap, bool ptorig )
479 {
480     /* get size of map, assumed to be size of file */
481     size_t size;
482     rc_t rc = KMMapSize ( mm, & size );
483     if ( rc == 0 )
484     {
485         const KPTrieIndexHdr_v3 *hdr;
486 
487 #if ! KTRIE_ZEROS_KPTRIE
488         self -> mm = NULL;
489         self -> ord2node = NULL;
490         self -> id2ord . v32 = NULL;
491         self -> variant = 0;
492 #endif
493 
494         /* ignore empty file */
495         if ( size == 0 )
496         {
497 #if ! KTRIE_ZEROS_KPTRIE
498             self -> first = self -> last = self -> maxid = 0;
499             self -> key2id = NULL;
500             self -> count = 0;
501 #endif
502             return 0;
503         }
504 
505         /* have to have at least the base header */
506         if ( size < sizeof hdr -> dad )
507             return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
508 
509         rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
510         if ( rc == 0 )
511         {
512             uint16_t id_bits, span_bits;
513             /* recheck header size */
514             if ( size < sizeof * hdr )
515                 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
516 
517             if ( self -> byteswap )
518             {
519                 self -> first = bswap_64(hdr -> first);
520                 self -> last = self -> maxid = bswap_64(hdr -> last);
521                 id_bits = bswap_16(hdr -> id_bits);
522                 span_bits = bswap_16(hdr -> span_bits);
523             }
524             else
525             {
526                 self -> first = hdr -> first;
527                 self -> last = self -> maxid = hdr -> last;
528                 id_bits = hdr -> id_bits;
529                 span_bits = hdr -> span_bits;
530             }
531             self -> id_bits = ( uint8_t ) id_bits;
532             self -> span_bits = ( uint8_t ) span_bits;
533             self -> byteswap = byteswap;
534 
535             /* try to create the pttree */
536             rc = ( ptorig ? PTrieMakeOrig : PTrieMake )
537                 ( & self -> key2id, hdr + 1, size -= sizeof * hdr, byteswap );
538             if ( rc == 0 )
539             {
540                 size_t ptsize = PTrieSize ( self -> key2id );
541                 if ( ptsize <= size )
542                 {
543                     /* the count covers at least the number of trie nodes */
544                     self -> count = PTrieCount ( self -> key2id );
545 
546                     /* it could be stored without projection */
547                     if ( ptsize == size )
548                         return 0;
549 
550                     /* calculate remaining bytes */
551                     size -= ptsize;
552 
553                     /* there must be enough for an array of 4-byte node ids */
554                     if ( size >= ( ( size_t ) self -> count << 2 ) )
555                     {
556                         /* take the persisted array as-is */
557                         self -> ord2node = ( const uint32_t* )
558                             ( ( const char* ) ( hdr + 1 ) + ptsize );
559 
560                         /* read the count */
561                         if ( size >= 4 )
562                         {
563                             self -> count = * ( self -> ord2node ) ++;
564                             size -= 4;
565                             if ( byteswap )
566                                 self -> count = bswap_32 ( self -> count );
567                         }
568 
569                         /* determine strategy from id span and count */
570                         if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
571                         {
572                             /* must be contiguous */
573                             self -> count = ( uint32_t ) ( self -> last - self -> first + 1 );
574 
575                             /* size should be exactly this number of slots */
576                             if ( size == ( ( size_t ) self -> count << 2 ) )
577                                 return 0;
578 
579                             /* fall through to error return */
580                         }
581                         else if ( ( size == 4 && self -> count == 1 ) || size > ( ( size_t ) self -> count << 2 ) )
582                         {
583                             /* sparse */
584                             size -= ( size_t ) self -> count << 2;
585 
586                             /* unpack id map */
587                             if ( id_bits <= 8 )
588                                 rc = KPTrieIndexInitID2Ord ( self, size, 1, span_bits, 8 );
589                             else if ( id_bits <= 16 )
590                                 rc = KPTrieIndexInitID2Ord ( self, size, 2, span_bits, 16 );
591                             else if ( id_bits <= 32 )
592                                 rc = KPTrieIndexInitID2Ord ( self, size, 3, span_bits, 32 );
593                             else
594                                 rc = KPTrieIndexInitID2Ord ( self, size, 4, span_bits, 64 );
595 
596                             /* done */
597                             if ( rc == 0 )
598                                 return 0;
599 
600                             PTrieWhack ( self -> key2id ), self -> key2id = NULL;
601                             return rc;
602                         }
603                     }
604                 }
605 
606                 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
607                 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
608             }
609         }
610     }
611 
612     return rc;
613 }
614 
615 /* Whack
616  *  closes down keymap
617  */
KPTrieIndexWhack_v2(KPTrieIndex_v2 * self)618 void KPTrieIndexWhack_v2 ( KPTrieIndex_v2 *self )
619 {
620     free ( ( void* ) self -> id2ord . v8 );
621     PTrieWhack ( self -> key2id );
622     KMMapRelease ( self -> mm );
623     memset ( self, 0, sizeof * self );
624 }
625 
KPTrieIndexID2Ord_v2(const KPTrieIndex_v2 * self,int64_t id)626 uint32_t KPTrieIndexID2Ord_v2 ( const KPTrieIndex_v2 *self, int64_t id )
627 {
628     if ( id >= self -> first && id <= self -> maxid )
629     {
630         int64_t nid;
631         uint32_t left, right, ord, count = self -> count;
632 
633         /* convert id either to a zero-based ord,
634            or else the translated id in id2ord */
635         id -= self -> first;
636 
637         /* handle type of projection */
638         switch ( self -> variant )
639         {
640         case 0:
641             /* return one-based ord */
642             return ( uint32_t ) ( id + 1 );
643 
644         case 1:
645             for ( left = 1, right = count; left <= right; )
646             {
647                 ord = ( left + right ) >> 1;
648                 nid = self -> id2ord . v8 [ ord - 1 ];
649                 if ( id == nid )
650                     return ord;
651 
652                 if ( id < nid )
653                 {
654                     right = ord - 1;
655                     continue;
656                 }
657                 if ( ord == count )
658                     return ord;
659 
660                 nid = self -> id2ord . v8 [ ord ];
661                 if ( id < nid )
662                     return ord;
663 
664                 left = ord + 1;
665             }
666             break;
667 
668         case 2:
669             for ( left = 1, right = count; left <= right; )
670             {
671                 ord = ( left + right ) >> 1;
672                 nid = self -> id2ord . v16 [ ord - 1 ];
673                 if ( id == nid )
674                     return ord;
675 
676                 if ( id < nid )
677                 {
678                     right = ord - 1;
679                     continue;
680                 }
681                 if ( ord == count )
682                     return ord;
683 
684                 nid = self -> id2ord . v16 [ ord ];
685                 if ( id < nid )
686                     return ord;
687 
688                 left = ord + 1;
689             }
690             break;
691 
692         case 3:
693             for ( left = 1, right = count; left <= right; )
694             {
695                 ord = ( left + right ) >> 1;
696                 nid = self -> id2ord . v32 [ ord - 1 ];
697                 if ( id == nid )
698                     return ord;
699 
700                 if ( id < nid )
701                 {
702                     right = ord - 1;
703                     continue;
704                 }
705                 if ( ord == count )
706                     return ord;
707 
708                 nid = self -> id2ord . v32 [ ord ];
709                 if ( id < nid )
710                     return ord;
711 
712                 left = ord + 1;
713             }
714             break;
715 
716         case 4:
717             for ( left = 1, right = count; left <= right; )
718             {
719                 ord = ( left + right ) >> 1;
720                 nid = self -> id2ord . v64 [ ord - 1 ];
721                 if ( id == nid )
722                     return ord;
723 
724                 if ( id < nid )
725                 {
726                     right = ord - 1;
727                     continue;
728                 }
729                 if ( ord == count )
730                     return ord;
731 
732                 nid = self -> id2ord . v64 [ ord ];
733                 if ( id < nid )
734                     return ord;
735 
736                 left = ord + 1;
737             }
738             break;
739         }
740     }
741     return 0;
742 }
743 
744 static
KPTrieIndexProject_v2(const KPTrieIndex_v2 * self,int64_t id,int64_t * start_id,uint32_t * span,char * key_buff,size_t buff_size,size_t * actsize)745 rc_t KPTrieIndexProject_v2 ( const KPTrieIndex_v2 *self,
746     int64_t id,
747 #if V2FIND_RETURNS_SPAN
748     int64_t *start_id, uint32_t *span,
749 #endif
750     char *key_buff, size_t buff_size, size_t *actsize )
751 {
752     uint32_t nid, ord = KPTrieIndexID2Ord_v2 ( self, id );
753     if ( ord != 0 )
754     {
755         assert ( start_id != NULL );
756         assert ( span != NULL );
757 
758         nid = self -> ord2node [ ord - 1 ];
759 
760         switch ( self -> variant )
761         {
762         case 0:
763             * start_id = id;
764             for ( ; ord < self -> count; ++ ord )
765             {
766                 if ( nid != self -> ord2node [ ord ] )
767                     break;
768             }
769             * span = ( uint32_t ) ( self -> first + ord - * start_id );
770             break;
771         case 1:
772             * start_id = self -> id2ord . v8 [ ord - 1 ];
773             * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
774                 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v8 [ ord ] ) - * start_id );
775             *start_id += self -> first;
776             break;
777         case 2:
778             * start_id = self -> id2ord . v16 [ ord - 1 ];
779             * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
780                 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v16 [ ord ] ) - * start_id );
781             *start_id += self -> first;
782             break;
783         case 3:
784             * start_id = self -> id2ord . v32 [ ord - 1 ];
785             * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
786                 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v32 [ ord ] ) - * start_id );
787             *start_id += self -> first;
788             break;
789         case 4:
790             * start_id = self -> id2ord . v64 [ ord - 1 ];
791             * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
792                 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v64 [ ord ] ) - * start_id );
793             *start_id += self -> first;
794             break;
795         }
796 
797         if ( nid != 0 )
798         {
799             rc_t rc;
800             PTNode node;
801 
802             if ( self -> byteswap )
803                 nid = bswap_32 ( nid );
804 
805             rc = PTrieGetNode ( self -> key2id, & node, nid );
806             if ( rc == 0 )
807             {
808                 const String *key;
809                 rc = PTNodeMakeKey ( & node, & key );
810                 if ( rc == 0 )
811                 {
812                     if ( actsize != NULL )
813                         * actsize = key -> size;
814                     if ( key -> size >= buff_size )
815                         rc = RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
816                     else
817                         string_copy ( key_buff, buff_size, key -> addr, key -> size );
818 
819                     StringWhack ( ( String* ) key );
820                 }
821             }
822             return rc;
823         }
824     }
825 
826     return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
827 }
828 
829 /* Find
830  */
831 static
KPTrieIndexFind_v2(const KPTrieIndex_v2 * self,const char * str,int64_t * start_id,uint32_t * span,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data,bool convertFromV1)832 rc_t KPTrieIndexFind_v2 ( const KPTrieIndex_v2 *self,
833     const char *str, int64_t *start_id,
834 #if V2FIND_RETURNS_SPAN
835     uint32_t *span,
836 #endif
837     int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void *data, bool convertFromV1 )
838 {
839     rc_t rc;
840 
841     /* search within in-core index */
842     if ( self -> count == 0 )
843         rc = RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
844     else
845     {
846         uint32_t nid;
847         PTNode pnode;
848 
849         String key;
850         StringInitCString ( & key, str );
851 
852         nid = PTrieFind ( self -> key2id, & key, & pnode, custom_cmp, data );
853         if ( nid == 0 )
854             rc = RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
855         else
856         {
857             size_t usize;
858 
859             /* detect conversion from v1 */
860             if ( convertFromV1 && self -> id_bits == 0 )
861             {
862                 /* v1 stored tree will have just a 32-bit spot id as data */
863                 uint32_t id;
864                 assert ( pnode . data . size == sizeof id );
865                 memmove ( & id, pnode . data . addr, sizeof id );
866                 * start_id = id;
867                 rc = 0;
868             }
869             else
870             {
871                 /* should be native v2 */
872                 if ( self -> id_bits > 0 )
873                 {
874                     rc = Unpack ( self -> id_bits, sizeof * start_id * 8,
875                         pnode . data . addr, 0, self -> id_bits, NULL,
876                         start_id, sizeof * start_id, & usize );
877                 }
878                 else
879                 {
880                     rc = 0;
881                 }
882                 * start_id += self -> first;
883             }
884 
885             if ( rc == 0 )
886             {
887 #if V2FIND_RETURNS_SPAN
888                 if ( self -> ord2node != NULL )
889                 {
890                     uint32_t ord = KPTrieIndexID2Ord_v2 ( self, * start_id );
891                     if ( ord == 0 )
892                         rc = RC ( rcDB, rcIndex, rcSelecting, rcId, rcNotFound );
893                     else if ( ord == self -> count )
894                         * span = ( uint32_t ) ( self -> maxid - * start_id + 1 );
895                     else switch ( self -> variant )
896                     {
897                     case 0:
898                         for ( ; ord < self -> count; ++ ord )
899                         {
900                             if ( nid != self -> ord2node [ ord ] )
901                                 break;
902                         }
903                         * span = ( uint32_t ) ( self -> first + ord - * start_id );
904                         break;
905                     case 1:
906                         * span = ( uint32_t ) ( self -> first + self -> id2ord . v8 [ ord ] - * start_id );
907                         break;
908                     case 2:
909                         * span = ( uint32_t ) ( self -> first + self -> id2ord . v16 [ ord ] - * start_id );
910                         break;
911                     case 3:
912                         * span = ( uint32_t ) ( self -> first + self -> id2ord . v32 [ ord ] - * start_id );
913                         break;
914                     case 4:
915                         * span = ( uint32_t ) ( self -> first + self -> id2ord . v64 [ ord ] - * start_id );
916                         break;
917                     }
918                 }
919                 else if ( self -> span_bits == 0 )
920                     * span = 1;
921                 else
922                 {
923                     rc = Unpack ( self -> span_bits, sizeof * span * 8,
924                         pnode . data . addr, 0, self -> id_bits, NULL,
925                         span, sizeof * span, & usize );
926                 }
927 #endif
928             }
929         }
930     }
931 
932     return rc;
933 }
934 
935 
936 /*--------------------------------------------------------------------------
937  * KTrieIndex_v2
938  */
939 
940 
941 /* whack whack */
KTrieIndexWhack_v2(KTrieIndex_v2 * self)942 void KTrieIndexWhack_v2 ( KTrieIndex_v2 *self )
943 {
944     KPTrieIndexWhack_v2 ( & self -> pt );
945 }
946 
947 /* initialize an index from file */
KTrieIndexOpen_v2(KTrieIndex_v2 * self,const KMMap * mm,bool byteswap)948 rc_t KTrieIndexOpen_v2 ( KTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
949 {
950     rc_t rc;
951     uint32_t version;
952     bool ptorig = false;
953     const KDBHdr *hdr = NULL;
954 
955 #if ! KTRIE_ZEROS_KPTRIE
956 #error "KTrie is supposed to zero KPTrie"
957 #endif
958     memset ( self, 0, sizeof * self );
959 
960     /* open the prior index in persisted mode, but
961        don't load it into the core-based Trie */
962     rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
963     if ( rc != 0 )
964         return rc;
965 
966     self -> pt . byteswap = byteswap;
967     version = byteswap ? bswap_32 ( hdr -> version ) : hdr -> version;
968 
969     switch ( version )
970     {
971     case 1:
972         rc = KPTrieIndexInitFromV1_v2 ( & self -> pt, mm, byteswap );
973         break;
974     case 2:
975         rc = KPTrieIndexInit_v2 ( & self -> pt, mm, byteswap );
976         break;
977     case 3:
978         ptorig = true;
979     case 4:
980         rc = KPTrieIndexInit_v3_v4 ( & self -> pt, mm, byteswap, ptorig );
981         break;
982     default:
983         rc = RC(rcDB, rcIndex, rcConstructing, rcIndex, rcBadVersion);
984         break;
985     }
986     if ( rc == 0 )
987     {
988         /* the file existed but was empty */
989         if ( self -> pt . key2id == NULL )
990         {
991             self -> pt . mm = NULL;
992             return 0;
993         }
994 
995         /* retain a reference to memory map */
996         rc = KMMapAddRef ( mm );
997         if ( rc == 0 )
998         {
999             self -> pt . mm = mm;
1000             return 0;
1001         }
1002 
1003         /* self -> pt gets whacked below */
1004     }
1005 
1006     KTrieIndexWhack_v2 ( self );
1007     return rc;
1008 }
1009 
1010 
1011 /* map key to id range */
KTrieIndexFind_v2(const KTrieIndex_v2 * self,const char * str,int64_t * start_id,uint32_t * span,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data,bool convertFromV1)1012 rc_t KTrieIndexFind_v2 ( const KTrieIndex_v2 *self,
1013     const char *str, int64_t *start_id,
1014 #if V2FIND_RETURNS_SPAN
1015     uint32_t *span,
1016 #endif
1017     int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void *data, bool convertFromV1  )
1018 {
1019     /* search within persisted index */
1020     if ( self -> pt . key2id != NULL )
1021     {
1022         return KPTrieIndexFind_v2 ( & self -> pt, str, start_id,
1023 #if V2FIND_RETURNS_SPAN
1024                                     span,
1025 #endif
1026                                     custom_cmp, data, convertFromV1 );
1027     }
1028 
1029     return RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
1030 }
1031 
KTrieIndexProject_v2(const KTrieIndex_v2 * self,int64_t id,int64_t * start_id,uint32_t * span,char * key_buff,size_t buff_size,size_t * actsize)1032 rc_t KTrieIndexProject_v2 ( const KTrieIndex_v2 *self,
1033     int64_t id,
1034 #if V2FIND_RETURNS_SPAN
1035      int64_t *start_id, uint32_t *span,
1036 #endif
1037     char *key_buff, size_t buff_size, size_t *actsize )
1038 {
1039     if ( self -> pt . ord2node == NULL )
1040         return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
1041 
1042     return KPTrieIndexProject_v2 ( & self -> pt, id,
1043 #if V2FIND_RETURNS_SPAN
1044        start_id, span,
1045 #endif
1046         key_buff, buff_size, actsize );
1047 }
1048