1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <kdb/extern.h>
28 
29 #include "windex-priv.h"
30 #include "trieidx-priv.h"
31 
32 #include <kdb/index.h>
33 #include <kfs/directory.h>
34 #include <kfs/file.h>
35 #include <kfs/md5.h>
36 #include <kfs/mmap.h>
37 #include <klib/ptrie.h>
38 #include <klib/text.h>
39 #include <klib/pack.h>
40 #include <klib/rc.h>
41 #include <os-native.h>
42 #include <sysalloc.h>
43 
44 #include <byteswap.h>
45 
46 #include <stdlib.h>
47 #include <limits.h>
48 #include <stdio.h>
49 #include <string.h>
50 #include <errno.h>
51 #include <assert.h>
52 
53 #define KTRIE_ZEROS_KPTRIE 1
54 
55 /*--------------------------------------------------------------------------
56  * KPTrieIndex_v2
57  *  persisted keymap
58  */
59 
60 
61 /* Init
62  *  opens and initializes persisted keymap structure
63  */
64 static
KPTrieIndexInitID2Ord(KPTrieIndex_v2 * self,size_t in_size,int variant,int span_bits,int elem_bits)65 rc_t KPTrieIndexInitID2Ord ( KPTrieIndex_v2 *self, size_t in_size,
66     int variant, int span_bits, int elem_bits )
67 {
68     rc_t rc;
69     union
70     {
71         uint8_t *v8;
72         uint16_t *v16;
73         uint32_t *v32;
74         uint64_t *v64;
75     } dst;
76     size_t elem_bytes = elem_bits >> 3;
77     uint32_t scount = self -> count - 1;
78 
79     assert ( self -> count != 0 );
80 
81     if ( span_bits * scount > in_size * 8 )
82         return RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
83 
84     dst . v8 = malloc ( self -> count * elem_bytes );
85     if ( dst . v8 == NULL )
86         rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
87     else
88     {
89         size_t usize;
90         rc = Unpack ( span_bits, elem_bits,
91             & self -> ord2node [ self -> count ], 0,
92             span_bits * scount, NULL, & dst . v8 [ elem_bytes ],
93             scount * elem_bytes, & usize );
94         if ( rc == 0 )
95         {
96             uint32_t i;
97 
98             self -> id2ord . v8 = dst . v8;
99             self -> variant = variant;
100 
101             /* integrate to simple translation */
102             switch ( variant )
103             {
104             case 1:
105                 dst . v8 [ 0 ] = 0;
106                 for ( i = 0; i < scount; ++ i )
107                     dst . v8 [ i + 1 ] += dst . v8 [ i ];
108                 break;
109             case 2:
110                 dst . v16 [ 0 ] = 0;
111                 for ( i = 0; i < scount; ++ i )
112                     dst . v16 [ i + 1 ] += dst . v16 [ i ];
113                 break;
114             case 3:
115                 dst . v32 [ 0 ] = 0;
116                 for ( i = 0; i < scount; ++ i )
117                     dst . v32 [ i + 1 ] += dst . v32 [ i ];
118                 break;
119             case 4:
120                 dst . v64 [ 0 ] = 0;
121                 for ( i = 0; i < scount; ++ i )
122                     dst . v64 [ i + 1 ] += dst . v64 [ i ];
123                 break;
124             }
125 
126             return 0;
127         }
128 
129         free ( dst . v8 );
130     }
131 
132     return rc;
133 }
134 
135 static
KPTrieIndexExtractV1Range_v2(PTNode * n,void * data)136 void CC KPTrieIndexExtractV1Range_v2 ( PTNode *n, void *data )
137 {
138     KPTrieIndex_v2 *self = data;
139 
140     /* capture node id */
141     uint32_t id;
142     assert ( n -> data . size == sizeof id );
143     memmove ( & id, n -> data . addr, sizeof id );
144 
145     /* perform min/max */
146     if ( self -> count == 0 )
147         self -> first = self -> last = id;
148     else if ( id < self -> first )
149         self -> first = id;
150     else if ( id > self -> last )
151         self -> last = id;
152 
153     ++ self -> count;
154 }
155 
156 static
KPTrieIndexInitFromV1_v2(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap)157 rc_t KPTrieIndexInitFromV1_v2 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
158 {
159     KPTrieIndex_v1 v1;
160     rc_t rc = KPTrieIndexInit_v1 ( & v1, mm, byteswap );
161     if ( rc == 0 )
162     {
163         uint32_t *ord2node;
164         uint32_t total_id, test_id;
165         uint32_t i, id, id_bits, num_ids;
166 
167         /* hopefully we got a projection index */
168         if ( v1 . id2node == NULL )
169         {
170 #if ! KTRIE_ZEROS_KPTRIE
171             self -> count = 0;
172 #endif
173             /* need to derive first and last from trie */
174             PTrieForEach ( v1 . key2id, KPTrieIndexExtractV1Range_v2, self );
175             if ( self -> count == 0 )
176                 KPTrieIndexWhack_v1 ( & v1 );
177             else
178             {
179                 /* take trie as-is */
180                 self -> key2id = v1 . key2id;
181                 self -> maxid = self -> last;
182             }
183 
184             /* note that this assumes a span of 1 for
185                each id. there are no known uses of v1 without
186                a projection index, so this is unlikely to be important */
187 
188             return 0;
189         }
190 
191         /* take id range */
192         self -> first = v1 . first;
193         self -> last = self -> maxid = v1 . last;
194 
195         /* count comes from trie as always */
196         self -> count = PTrieCount ( v1 . key2id );
197 
198         /* detect empty trie */
199         if ( self -> count == 0 || self -> first > self -> last )
200         {
201             self -> first = self -> last = self -> maxid = 0;
202             return 0;
203         }
204 
205         /* take trie as-is */
206         self -> key2id = v1 . key2id;
207 
208         /* now decide whether to use 1-1 or sparse projection */
209         if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
210         {
211             /* take the old projection array as-is,
212                treating NULL node ids as holes */
213             self -> ord2node = v1 . id2node;
214             return 0;
215         }
216 
217         /* convert to sparse
218            calculate id bits - notice that
219            total_id gets right shifted so that
220            the loop is guaranteed to exit */
221         num_ids = ( uint32_t ) ( self -> last - self -> first + 1 );
222         for ( total_id = num_ids >> 1, id_bits = 1, test_id = 1;
223             test_id <= total_id;
224             ++ id_bits, test_id <<= 1 )
225             ( void ) 0;
226 
227         /* determine variant */
228         if ( id_bits <= 8 )
229         {
230             /* allocate 4 bytes for new ord2node and 1 for id2ord */
231             uint8_t *id2ord = malloc ( self -> count * 5 );
232             if ( id2ord == NULL )
233                 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
234             else
235             {
236                 ord2node = ( uint32_t* ) & id2ord [ self -> count ];
237                 self -> ord2node = ord2node;
238                 self -> id2ord . v8 = id2ord;
239                 self -> variant = 1;
240 
241                 /* walk across v1 table, looking at each id */
242                 for ( i = id = 0; id < num_ids; ++ id )
243                 {
244                     /* detect non NULL node ids
245                        and pretend they represent a contiguous
246                        span with no holes in id space */
247                     if ( v1 . id2node [ id ] != 0 )
248                     {
249                         /* prevent overwriting */
250                         if ( i == self -> count )
251                         {
252                             rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
253                             break;
254                         }
255 
256                         /* record id and node for slot */
257                         id2ord [ i ] = ( uint8_t ) id;
258                         ord2node [ i ] = v1 . id2node [ id ];
259                         ++ i;
260                     }
261                 }
262             }
263         }
264         else if ( id_bits <= 16 )
265         {
266             uint16_t *id2ord = malloc ( self -> count * 6 );
267             if ( id2ord == NULL )
268                 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
269             else
270             {
271                 ord2node = ( uint32_t* ) & id2ord [ self -> count ];
272                 self -> ord2node = ord2node;
273                 self -> id2ord . v16 = id2ord;
274                 self -> variant = 2;
275 
276                 for ( i = id = 0; id < num_ids; ++ id )
277                 {
278                     if ( v1 . id2node [ id ] != 0 )
279                     {
280                         if ( i == self -> count )
281                         {
282                             rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
283                             break;
284                         }
285 
286                         id2ord [ i ] = ( uint16_t ) id;
287                         ord2node [ i ] = v1 . id2node [ id ];
288                         ++ i;
289                     }
290                 }
291             }
292         }
293         else
294         {
295             uint32_t *id2ord = malloc ( self -> count * 8 );
296             if ( id2ord == NULL )
297                 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
298             else
299             {
300                 ord2node = & id2ord [ self -> count ];
301                 self -> ord2node = ord2node;
302                 self -> id2ord . v32 = id2ord;
303                 self -> variant = 3;
304 
305                 for ( i = id = 0; id < num_ids; ++ id )
306                 {
307                     if ( v1 . id2node [ id ] != 0 )
308                     {
309                         if ( i == self -> count )
310                         {
311                             rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
312                             break;
313                         }
314 
315                         id2ord [ i ] = id;
316                         ord2node [ i ] = v1 . id2node [ id ];
317                         ++ i;
318                     }
319                 }
320             }
321         }
322 
323         if ( rc == 0 )
324         {
325             if ( i == self -> count )
326                 return 0;
327             rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
328         }
329 
330         KPTrieIndexWhack_v1 ( & v1 );
331     }
332 
333     return rc;
334 }
335 
KPTrieIndexInit_v2(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap)336 rc_t KPTrieIndexInit_v2 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
337 {
338     /* get size of map, assumed to be size of file */
339     size_t size;
340     rc_t rc = KMMapSize ( mm, & size );
341     if ( rc == 0 )
342     {
343         const KPTrieIndexHdr_v2 *hdr;
344 
345 #if ! KTRIE_ZEROS_KPTRIE
346         self -> mm = NULL;
347         self -> ord2node = NULL;
348         self -> id2ord . v32 = NULL;
349         self -> variant = 0;
350 #endif
351 
352         /* ignore empty file */
353         if ( size == 0 )
354         {
355 #if ! KTRIE_ZEROS_KPTRIE
356             self -> first = self -> last = self -> maxid = 0;
357             self -> key2id = NULL;
358             self -> count = 0;
359 #endif
360             return 0;
361         }
362 
363         /* have to have at least the base header */
364         if ( size < sizeof hdr -> dad )
365             return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
366 
367         rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
368         if ( rc == 0 )
369         {
370             /* recheck header size */
371             if ( size < sizeof * hdr )
372                 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
373 
374             self -> first = hdr -> first;
375             self -> last = self -> maxid = hdr -> last;
376             self -> id_bits = ( uint8_t ) hdr -> id_bits;
377             self -> span_bits = ( uint8_t ) hdr -> span_bits;
378             self -> byteswap = byteswap;
379 
380             /* try to create the pttree */
381             rc = PTrieMakeOrig ( & self -> key2id,
382                 hdr + 1, size -= sizeof * hdr, byteswap );
383             if ( rc == 0 )
384             {
385                 size_t ptsize = PTrieSize ( self -> key2id );
386                 if ( ptsize <= size )
387                 {
388                     /* the count covers at least the number of trie nodes */
389                     self -> count = PTrieCount ( self -> key2id );
390 
391                     /* it could be stored without projection */
392                     if ( ptsize == size )
393                         return 0;
394 
395                     /* calculate remaining bytes */
396                     size -= ptsize;
397 
398                     /* there must be enough for an array of 4-byte node ids */
399                     if ( size >= ( ( size_t ) self -> count << 2 ) )
400                     {
401                         /* take the persisted array as-is */
402                         self -> ord2node = ( const uint32_t* )
403                             ( ( const char* ) ( hdr + 1 ) + ptsize );
404 
405                         /* read the count */
406                         if ( size >= 4 )
407                         {
408                             self -> count = * ( self -> ord2node ) ++;
409                             size -= 4;
410                         }
411 
412                         /* determine strategy from id span and count */
413                         if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
414                         {
415                             /* must be contiguous */
416                             self -> count = ( uint32_t ) ( self -> last - self -> first + 1 );
417 
418                             /* size should be exactly this number of slots */
419                             if ( size == ( ( size_t ) self -> count << 2 ) )
420                                 return 0;
421 
422                             /* fall through to error return */
423                         }
424                         else if ( ( size == 4 && self -> count == 1 ) || size > ( ( size_t ) self -> count << 2 ) )
425                         {
426                             /* sparse */
427                             size -= ( size_t ) self -> count << 2;
428 
429                             /* unpack id map */
430                             if ( hdr -> id_bits <= 8 )
431                                 rc = KPTrieIndexInitID2Ord ( self, size, 1, hdr -> span_bits, 8 );
432                             else if ( hdr -> id_bits <= 16 )
433                                 rc = KPTrieIndexInitID2Ord ( self, size, 2, hdr -> span_bits, 16 );
434                             else if ( hdr -> id_bits <= 32 )
435                                 rc = KPTrieIndexInitID2Ord ( self, size, 3, hdr -> span_bits, 32 );
436                             else
437                                 rc = KPTrieIndexInitID2Ord ( self, size, 4, hdr -> span_bits, 64 );
438 
439                             /* done */
440                             if ( rc == 0 )
441                                 return 0;
442 
443                             PTrieWhack ( self -> key2id ), self -> key2id = NULL;
444                             return rc;
445                         }
446                     }
447                 }
448 
449                 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
450                 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
451             }
452         }
453     }
454 
455     return rc;
456 }
457 
KPTrieIndexInit_v3_v4(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap,bool ptorig)458 rc_t KPTrieIndexInit_v3_v4 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap, bool ptorig )
459 {
460     /* get size of map, assumed to be size of file */
461     size_t size;
462     rc_t rc = KMMapSize ( mm, & size );
463     if ( rc == 0 )
464     {
465         const KPTrieIndexHdr_v3 *hdr;
466 
467 #if ! KTRIE_ZEROS_KPTRIE
468         self -> mm = NULL;
469         self -> ord2node = NULL;
470         self -> id2ord . v32 = NULL;
471         self -> variant = 0;
472 #endif
473 
474         /* ignore empty file */
475         if ( size == 0 )
476         {
477 #if ! KTRIE_ZEROS_KPTRIE
478             self -> first = self -> last = self -> maxid = 0;
479             self -> key2id = NULL;
480             self -> count = 0;
481 #endif
482             return 0;
483         }
484 
485         /* have to have at least the base header */
486         if ( size < sizeof hdr -> dad )
487             return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
488 
489         rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
490         if ( rc == 0 )
491         {
492             /* recheck header size */
493             if ( size < sizeof * hdr )
494                 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
495 
496             self -> first = hdr -> first;
497             self -> last = self -> maxid = hdr -> last;
498             self -> id_bits = ( uint8_t ) hdr -> id_bits;
499             self -> span_bits = ( uint8_t ) hdr -> span_bits;
500             self -> byteswap = byteswap;
501 
502             /* try to create the pttree */
503             rc = ( ptorig ? PTrieMakeOrig : PTrieMake )
504                 ( & self -> key2id, hdr + 1, size -= sizeof * hdr, byteswap );
505             if ( rc == 0 )
506             {
507                 size_t ptsize = PTrieSize ( self -> key2id );
508                 if ( ptsize <= size )
509                 {
510                     /* the count covers at least the number of trie nodes */
511                     self -> count = PTrieCount ( self -> key2id );
512 
513                     /* it could be stored without projection */
514                     if ( ptsize == size )
515                         return 0;
516 
517                     /* calculate remaining bytes */
518                     size -= ptsize;
519 
520                     /* there must be enough for an array of 4-byte node ids */
521                     if ( size >= ( ( size_t ) self -> count << 2 ) )
522                     {
523                         /* take the persisted array as-is */
524                         self -> ord2node = ( const uint32_t* )
525                             ( ( const char* ) ( hdr + 1 ) + ptsize );
526 
527                         /* read the count */
528                         if ( size >= 4 )
529                         {
530                             self -> count = * ( self -> ord2node ) ++;
531                             size -= 4;
532                         }
533 
534                         /* determine strategy from id span and count */
535                         if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
536                         {
537                             /* must be contiguous */
538                             self -> count = ( uint32_t ) ( self -> last - self -> first + 1 );
539 
540                             /* size should be exactly this number of slots */
541                             if ( size == ( ( size_t ) self -> count << 2 ) )
542                                 return 0;
543 
544                             /* fall through to error return */
545                         }
546                         else if ( ( size == 4 && self -> count == 1 ) || size > ( ( size_t ) self -> count << 2 ) )
547                         {
548                             /* sparse */
549                             size -= ( size_t ) self -> count << 2;
550 
551                             /* unpack id map */
552                             if ( hdr -> id_bits <= 8 )
553                                 rc = KPTrieIndexInitID2Ord ( self, size, 1, hdr -> span_bits, 8 );
554                             else if ( hdr -> id_bits <= 16 )
555                                 rc = KPTrieIndexInitID2Ord ( self, size, 2, hdr -> span_bits, 16 );
556                             else if ( hdr -> id_bits <= 32 )
557                                 rc = KPTrieIndexInitID2Ord ( self, size, 3, hdr -> span_bits, 32 );
558                             else
559                                 rc = KPTrieIndexInitID2Ord ( self, size, 4, hdr -> span_bits, 64 );
560 
561                             /* done */
562                             if ( rc == 0 )
563                                 return 0;
564 
565                             PTrieWhack ( self -> key2id ), self -> key2id = NULL;
566                             return rc;
567                         }
568                     }
569                 }
570 
571                 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
572                 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
573             }
574         }
575     }
576 
577     return rc;
578 }
579 
580 /* Whack
581  *  closes down keymap
582  */
KPTrieIndexWhack_v2(KPTrieIndex_v2 * self)583 void KPTrieIndexWhack_v2 ( KPTrieIndex_v2 *self )
584 {
585     free ( ( void* ) self -> id2ord . v8 );
586     PTrieWhack ( self -> key2id );
587     KMMapRelease ( self -> mm );
588     memset ( self, 0, sizeof * self );
589 }
590 
KPTrieIndexID2Ord_v2(const KPTrieIndex_v2 * self,int64_t id)591 uint32_t KPTrieIndexID2Ord_v2 ( const KPTrieIndex_v2 *self, int64_t id )
592 {
593     if ( id >= self -> first && id <= self -> maxid )
594     {
595         int64_t nid;
596         uint32_t left, right, ord, count = self -> count;
597 
598         /* convert id either to a zero-based ord,
599            or else the translated id in id2ord */
600         id -= self -> first;
601 
602         /* handle type of projection */
603         switch ( self -> variant )
604         {
605         case 0:
606             /* return one-based ord */
607             return ( uint32_t ) ( id + 1 );
608 
609         case 1:
610             for ( left = 1, right = count; left <= right; )
611             {
612                 ord = ( left + right ) >> 1;
613                 nid = self -> id2ord . v8 [ ord - 1 ];
614                 if ( id == nid )
615                     return ord;
616 
617                 if ( id < nid )
618                 {
619                     right = ord - 1;
620                     continue;
621                 }
622                 if ( ord == count )
623                     return ord;
624 
625                 nid = self -> id2ord . v8 [ ord ];
626                 if ( id < nid )
627                     return ord;
628 
629                 left = ord + 1;
630             }
631             break;
632 
633         case 2:
634             for ( left = 1, right = count; left <= right; )
635             {
636                 ord = ( left + right ) >> 1;
637                 nid = self -> id2ord . v16 [ ord - 1 ];
638                 if ( id == nid )
639                     return ord;
640 
641                 if ( id < nid )
642                 {
643                     right = ord - 1;
644                     continue;
645                 }
646                 if ( ord == count )
647                     return ord;
648 
649                 nid = self -> id2ord . v16 [ ord ];
650                 if ( id < nid )
651                     return ord;
652 
653                 left = ord + 1;
654             }
655             break;
656 
657         case 3:
658             for ( left = 1, right = count; left <= right; )
659             {
660                 ord = ( left + right ) >> 1;
661                 nid = self -> id2ord . v32 [ ord - 1 ];
662                 if ( id == nid )
663                     return ord;
664 
665                 if ( id < nid )
666                 {
667                     right = ord - 1;
668                     continue;
669                 }
670                 if ( ord == count )
671                     return ord;
672 
673                 nid = self -> id2ord . v32 [ ord ];
674                 if ( id < nid )
675                     return ord;
676 
677                 left = ord + 1;
678             }
679             break;
680 
681         case 4:
682             for ( left = 1, right = count; left <= right; )
683             {
684                 ord = ( left + right ) >> 1;
685                 nid = self -> id2ord . v64 [ ord - 1 ];
686                 if ( id == nid )
687                     return ord;
688 
689                 if ( id < nid )
690                 {
691                     right = ord - 1;
692                     continue;
693                 }
694                 if ( ord == count )
695                     return ord;
696 
697                 nid = self -> id2ord . v64 [ ord ];
698                 if ( id < nid )
699                     return ord;
700 
701                 left = ord + 1;
702             }
703             break;
704         }
705     }
706     return 0;
707 }
708 
709 static
KPTrieIndexProject_v2(const KPTrieIndex_v2 * self,int64_t id,int64_t * start_id,uint32_t * span,char * key_buff,size_t buff_size,size_t * actsize)710 rc_t KPTrieIndexProject_v2 ( const KPTrieIndex_v2 *self,
711     int64_t id,
712 #if V2FIND_RETURNS_SPAN
713     int64_t *start_id, uint32_t *span,
714 #endif
715     char *key_buff, size_t buff_size, size_t *actsize )
716 {
717     uint32_t nid, ord = KPTrieIndexID2Ord_v2 ( self, id );
718     if ( ord != 0 )
719     {
720         assert ( start_id != NULL );
721         assert ( span != NULL );
722 
723         nid = self -> ord2node [ ord - 1 ];
724 
725         switch ( self -> variant )
726         {
727         case 0:
728             * start_id = id;
729             for ( ; ord < self -> count; ++ ord )
730             {
731                 if ( nid != self -> ord2node [ ord ] )
732                     break;
733             }
734             * span = self -> first + ord - * start_id;
735             break;
736         case 1:
737             * start_id = self -> id2ord . v8 [ ord - 1 ];
738             * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
739                 ( self -> maxid  - self -> first + 1 ) : self -> id2ord . v8 [ ord ] ) - * start_id );
740             *start_id += self->first;
741             break;
742         case 2:
743             * start_id = self -> id2ord . v16 [ ord - 1 ];
744             * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
745                 ( self -> maxid  - self -> first + 1 ) : self -> id2ord . v16 [ ord ] ) - * start_id );
746             *start_id += self->first;
747             break;
748         case 3:
749             * start_id = self -> id2ord . v32 [ ord - 1 ];
750             * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
751                 ( self -> maxid  - self -> first + 1 ) : self -> id2ord . v32 [ ord ] ) - * start_id );
752             *start_id += self->first;
753             break;
754         case 4:
755             * start_id = self -> id2ord . v64 [ ord - 1 ];
756             * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
757                 ( self -> maxid  - self -> first + 1 ) : self -> id2ord . v64 [ ord ] ) - * start_id );
758             *start_id += self->first;
759             break;
760         }
761 
762         if ( nid != 0 )
763         {
764             rc_t rc;
765             PTNode node;
766 
767             if ( self -> byteswap )
768                 nid = bswap_32 ( nid );
769 
770             rc = PTrieGetNode ( self -> key2id, & node, nid );
771             if ( rc == 0 )
772             {
773                 const String *key;
774                 rc = PTNodeMakeKey ( & node, & key );
775                 if ( rc == 0 )
776                 {
777                     if (actsize)
778                         *actsize = key -> size;
779                     if ( key -> size >= buff_size )
780                         rc = RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
781                     else
782                         string_copy ( key_buff, buff_size, key -> addr, key -> size );
783 
784                     StringWhack ( ( String* ) key );
785                 }
786             }
787             return rc;
788         }
789     }
790 
791     return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
792 }
793 
794 /* Find
795  */
796 static
KPTrieIndexFind_v2(const KPTrieIndex_v2 * self,const char * str,int64_t * start_id,uint32_t * span,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data,bool convertFromV1)797 rc_t KPTrieIndexFind_v2 ( const KPTrieIndex_v2 *self,
798     const char *str, int64_t *start_id,
799 #if V2FIND_RETURNS_SPAN
800     uint32_t *span,
801 #endif
802     int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void *data, bool convertFromV1 )
803 {
804     rc_t rc;
805 
806     /* detect empty index */
807     if ( self -> count == 0 )
808         rc = RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
809     else
810     {
811         uint32_t nid;
812         PTNode pnode;
813 
814         String key;
815         StringInitCString ( & key, str );
816 
817         /* try to find string */
818         nid = PTrieFind ( self -> key2id, & key, & pnode, custom_cmp, data );
819         if ( nid == 0 )
820             rc = RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
821         else
822         {
823             size_t usize;
824 
825             /* detect conversion from v1 */
826             if ( convertFromV1 && self -> id_bits == 0 )
827             {
828                 /* v1 stored tree will have just a 32-bit spot id as data */
829                 uint32_t id;
830                 assert ( pnode . data . size == sizeof id );
831                 memmove ( & id, pnode . data . addr, sizeof id );
832                 * start_id = id;
833                 rc = 0;
834             }
835             else
836             {
837                 /* should be native v2 */
838                 rc = Unpack ( self -> id_bits, sizeof * start_id * 8,
839                     pnode . data . addr, 0, self -> id_bits, NULL,
840                     start_id, sizeof * start_id, & usize );
841                 * start_id += self -> first;
842             }
843 
844             if ( rc == 0 )
845             {
846 #if V2FIND_RETURNS_SPAN
847                 if ( self -> ord2node != NULL )
848                 {
849                     uint32_t ord = KPTrieIndexID2Ord_v2 ( self, * start_id );
850                     if ( ord == 0 )
851                         rc = RC ( rcDB, rcIndex, rcSelecting, rcId, rcNotFound );
852                     else if ( ord == self -> count )
853                         * span = self -> maxid - * start_id + 1;
854                     else switch ( self -> variant )
855                     {
856                     case 0:
857                         for ( ; ord < self -> count; ++ ord )
858                         {
859                             if ( nid != self -> ord2node [ ord ] )
860                                 break;
861                         }
862                         * span = self -> first + ord - * start_id;
863                         break;
864                     case 1:
865                         * span = self -> first + self -> id2ord . v8 [ ord ] - * start_id;
866                         break;
867                     case 2:
868                         * span = self -> first + self -> id2ord . v16 [ ord ] - * start_id;
869                         break;
870                     case 3:
871                         * span = self -> first + self -> id2ord . v32 [ ord ] - * start_id;
872                         break;
873                     case 4:
874                         * span = self -> first + self -> id2ord . v64 [ ord ] - * start_id;
875                         break;
876                     }
877                 }
878                 else if ( self -> span_bits == 0 )
879                     * span = 1;
880                 else
881                 {
882                     rc = Unpack ( self -> span_bits, sizeof * span * 8,
883                         pnode . data . addr, 0, self -> id_bits, NULL,
884                         span, sizeof * span, & usize );
885                 }
886 #endif
887             }
888         }
889     }
890 
891     return rc;
892 }
893 
894 
895 /*--------------------------------------------------------------------------
896  * KTrieIdxNode_v2
897  */
898 
899 static
KTrieIdxNodeMake_v2_s1(KTrieIdxNode_v2_s1 ** n,const String * key,int64_t id)900 rc_t KTrieIdxNodeMake_v2_s1 ( KTrieIdxNode_v2_s1 **n, const String *key, int64_t id )
901 {
902     rc_t rc = TNodeMake ( ( TNode** ) n, sizeof ** n + key -> size );
903     if ( rc != 0 )
904         rc = ResetRCContext ( rc, rcDB, rcIndex, rcInserting );
905     else
906     {
907         KTrieIdxNode_v2_s1 *node = * n;
908         string_copy ( node -> key, key -> size + 1, key -> addr, key -> size);
909         StringInit ( & node -> n . key, node -> key, key -> size, key -> len );
910         node -> start_id = id;
911     }
912 
913     return rc;
914 }
915 
916 static
KTrieIdxNodeMakeHole_v2_s1(KTrieIdxNode_v2_s1 ** n,int64_t id)917 rc_t KTrieIdxNodeMakeHole_v2_s1 ( KTrieIdxNode_v2_s1 **n, int64_t id )
918 {
919     rc_t rc = TNodeMake ( ( TNode** ) n, sizeof ** n );
920     if ( rc != 0 )
921         rc = ResetRCContext ( rc, rcDB, rcIndex, rcInserting );
922     else
923     {
924         KTrieIdxNode_v2_s1 *node = * n;
925         node -> key [ 0 ] = 0;
926         StringInit ( & node -> n . key, node -> key, 0, 0 );
927         node -> start_id = id;
928     }
929 
930     return rc;
931 }
932 
933 static
KTrieIdxNodeMake_v2_s2(KTrieIdxNode_v2_s2 ** n,const String * key,int64_t id)934 rc_t KTrieIdxNodeMake_v2_s2 ( KTrieIdxNode_v2_s2 **n, const String *key, int64_t id )
935 {
936     rc_t rc = TNodeMake ( ( TNode** ) n, sizeof ** n + key -> size );
937     if ( rc != 0 )
938         rc = ResetRCContext ( rc, rcDB, rcIndex, rcInserting );
939     else
940     {
941         KTrieIdxNode_v2_s2 *node = * n;
942         string_copy ( node -> key, key -> size + 1, key -> addr, key -> size);
943         StringInit ( & node -> n . key, node -> key, key -> size, key -> len );
944         node -> start_id = id;
945         node -> span = 1;
946     }
947     return rc;
948 }
949 
950 static
KTrieIdxNodeWhack_v2(TNode * n,void * data)951 void CC KTrieIdxNodeWhack_v2 ( TNode *n, void *data )
952 {
953     TNodeWhack ( n );
954 }
955 
956 #if 0
957 static
958 void CC KTrieIdxNodeUnlink_v2 ( TNode *n, void *data )
959 {
960     if ( TrieUnlink ( data, n ) )
961         TNodeWhack ( n );
962 }
963 #endif
964 
965 
966 /*--------------------------------------------------------------------------
967  * KTrieIndex_v2
968  */
969 
970 static
KTrieIndexID2Ord_v2(const KTrieIndex_v2 * self,int64_t id)971 uint32_t KTrieIndexID2Ord_v2 ( const KTrieIndex_v2 *self, int64_t id )
972 {
973     if ( id >= self -> first && id <= self -> last )
974     {
975         uint32_t left, right, count = self -> count;
976         for ( left = 1, right = count; left <= right; )
977         {
978             uint32_t ord = ( left + right ) >> 1;
979             const KTrieIdxNode_v2_s1 *node = self -> ord2node [ ord - 1 ];
980             if ( id == node -> start_id )
981                 return ord;
982 
983             if ( id < node -> start_id )
984             {
985                 right = ord - 1;
986                 continue;
987             }
988 
989             if ( ord == count )
990                 return ord;
991 
992             node = self -> ord2node [ ord ];
993             if ( id < node -> start_id )
994                 return ord;
995 
996             left = ord + 1;
997         }
998     }
999     return 0;
1000 }
1001 
1002 static
KTrieIndexNode2Ord_v2(const KTrieIndex_v2 * self,const KTrieIdxNode_v2_s1 * node)1003 uint32_t KTrieIndexNode2Ord_v2 ( const KTrieIndex_v2 *self, const KTrieIdxNode_v2_s1 *node )
1004 {
1005     if ( self -> ord2node != NULL )
1006         return KTrieIndexID2Ord_v2 ( self, node -> start_id );
1007     return 0;
1008 }
1009 
1010 /* KTrieIndexWrite_v2
1011  */
1012 typedef struct PersistTrieData PersistTrieData;
1013 struct PersistTrieData
1014 {
1015     uint64_t pos;
1016     KFile *f;
1017     KMD5File *fmd5;
1018     uint8_t *buffer;
1019     size_t bsize;
1020     size_t marker;
1021 
1022     int64_t first;
1023     size_t ptt_size;
1024     size_t node_data_size;
1025     uint16_t id_bits;
1026     uint16_t span_bits;
1027     rc_t rc;
1028 };
1029 
1030 static
KTrieIndexWrite_v2(void * param,const void * buffer,size_t size,size_t * num_writ)1031 rc_t CC KTrieIndexWrite_v2 ( void *param,
1032     const void *buffer, size_t size, size_t *num_writ )
1033 {
1034     PersistTrieData *pb = param;
1035     size_t total, to_write;
1036 
1037     for ( total = 0; total < size; total += to_write )
1038     {
1039         to_write = size - total;
1040         if ( pb -> marker + to_write > pb -> bsize )
1041             to_write = pb -> bsize - pb -> marker;
1042 
1043         if ( to_write > 0 )
1044         {
1045             memmove ( pb -> buffer + pb -> marker,
1046                 ( const uint8_t* ) buffer + total, to_write );
1047             pb -> marker += to_write;
1048         }
1049 
1050         if ( pb -> marker == pb -> bsize )
1051         {
1052             size_t num_flushed;
1053             pb -> rc = KFileWrite ( pb -> f, pb -> pos,
1054                 pb -> buffer, pb -> bsize, & num_flushed );
1055             if ( pb -> rc != 0 )
1056             {
1057                 * num_writ = 0;
1058                 return pb -> rc;
1059             }
1060 
1061             if ( num_flushed == 0 )
1062             {
1063                 * num_writ = total + to_write;
1064                 return pb -> rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
1065             }
1066 
1067             pb -> marker -= num_flushed;
1068             pb -> pos += num_flushed;
1069 
1070             if ( pb -> marker != 0 )
1071                 memmove ( pb -> buffer, pb -> buffer + num_flushed, pb -> marker );
1072         }
1073     }
1074 
1075     * num_writ = total;
1076     return 0;
1077 }
1078 
1079 /* KTrieIndexAux_v2
1080  */
1081 static
KTrieIndexAux_v2_s1(void * param,const void * node,size_t * num_writ,PTWriteFunc write,void * write_param)1082 rc_t CC KTrieIndexAux_v2_s1 ( void *param, const void *node, size_t *num_writ,
1083     PTWriteFunc write, void *write_param )
1084 {
1085     PersistTrieData *pb = param;
1086 
1087     if ( write != NULL && pb -> node_data_size != 0 )
1088     {
1089         char buffer [ 8 ];
1090         const KTrieIdxNode_v2_s1 *n = node;
1091 
1092         /* pack from 64 possible bits down to total id span */
1093         if ( pb -> id_bits != 0 )
1094         {
1095             /* store name->id mapping as a simple translation
1096                from first, because we don't have easy access to
1097                neighboring nodes for storage as 1st derivative. */
1098             uint64_t idd = n -> start_id - pb -> first;
1099 
1100             bitsz_t psize;
1101             rc_t rc = Pack ( 64, pb -> id_bits, & idd,
1102                 sizeof idd, NULL, buffer, 0, sizeof buffer * 8, & psize );
1103             if ( rc != 0 )
1104                 return rc;
1105 
1106             /* the packing should produce a single unit */
1107             if ( psize != pb -> id_bits )
1108                 return RC ( rcDB, rcIndex, rcPacking, rcData, rcCorrupt );
1109         }
1110 
1111         /* write out the node */
1112         return ( * write ) ( write_param, buffer, pb -> node_data_size, num_writ );
1113     }
1114 
1115     /* will always store an integral number of bytes */
1116     * num_writ = pb -> node_data_size;
1117     return 0;
1118 }
1119 
1120 static
KTrieIndexAux_v2_s2(void * param,const void * node,size_t * num_writ,PTWriteFunc write,void * write_param)1121 rc_t CC KTrieIndexAux_v2_s2 ( void *param, const void *node, size_t *num_writ,
1122     PTWriteFunc write, void *write_param )
1123 {
1124     PersistTrieData *pb = param;
1125 
1126     if ( write != NULL && pb -> node_data_size != 0 )
1127     {
1128         const KTrieIdxNode_v2_s2 *n = node;
1129 
1130         rc_t rc;
1131         char buffer [ 12 ];
1132         bitsz_t psize, offset;
1133 
1134         if ( pb -> id_bits == 0 )
1135             offset = 0;
1136         else
1137         {
1138             /* again store name->id mapping as a simple translation
1139                from first, but pack bits tightly */
1140             uint64_t idd = n -> start_id - pb -> first;
1141             rc = Pack ( 64, pb -> id_bits, & idd,
1142                 sizeof idd, NULL, buffer, 0, sizeof buffer * 8, & offset );
1143             if ( rc != 0 )
1144                 return rc;
1145             if ( offset != pb -> id_bits )
1146                 return RC ( rcDB, rcIndex, rcPacking, rcData, rcCorrupt );
1147         }
1148 
1149         /* now pack id span down to a minimal number of bits
1150            6/8/09 - this is known to fail because Pack hasn't been
1151            updated to start on a non-0 bit offset */
1152         if ( pb -> span_bits != 0 )
1153         {
1154             rc = Pack ( 32, pb -> span_bits, & n -> span, sizeof n -> span,
1155                 NULL, buffer, offset, sizeof buffer * 8 - offset, & psize );
1156             if ( rc != 0 )
1157                 return rc;
1158             if ( psize != pb -> span_bits )
1159                 return RC ( rcDB, rcIndex, rcPacking, rcData, rcCorrupt );
1160         }
1161 
1162         /* write out packed combination */
1163         return ( * write ) ( write_param, buffer, pb -> node_data_size, num_writ );
1164     }
1165 
1166     * num_writ = pb -> node_data_size;
1167     return 0;
1168 }
1169 
1170 /* KTrieIndexPersist_v*
1171  *  write keymap to indicated location
1172  */
1173 #if KDBINDEXVERS == 2
1174 
1175 static
KTrieIndexPersistHdr_v2(KTrieIndex_v2 * self,PersistTrieData * pb)1176 void KTrieIndexPersistHdr_v2 ( KTrieIndex_v2 *self, PersistTrieData *pb )
1177 {
1178     KPTrieIndexHdr_v2 *hdr;
1179 
1180     uint64_t total_id, test_id;
1181     uint32_t total_span, test_span;
1182 
1183     pb -> pos = 0;
1184 
1185     hdr = ( KPTrieIndexHdr_v2* ) pb -> buffer;
1186     pb -> marker = sizeof * hdr;
1187 
1188     /* stamp version header */
1189     KDBHdrInit(&hdr->dad, 2);
1190 
1191     /* store first and last ids */
1192     pb -> first = self -> first;
1193     hdr -> first = self -> first;
1194     hdr -> last = self -> last;
1195 
1196     /* calculate id bits - notice that
1197        total_id gets right shifted so that
1198        the loop is guaranteed to exit */
1199     total_id = self -> last - self -> first;
1200     if ( total_id == 0 )
1201         pb -> id_bits = 0;
1202     else for ( total_id >>= 1, pb -> id_bits = 1, test_id = 1;
1203           test_id <= total_id;
1204           ++ pb -> id_bits, test_id <<= 1 )
1205         ( void ) 0;
1206 
1207     /* if we have maintained a projection index,
1208        calculate max span now */
1209     if ( self -> ord2node != NULL )
1210     {
1211         uint32_t i, span, max_span;
1212         int64_t cur, prev = self -> first;
1213         for ( i = max_span = 1; i < self -> count; prev = cur, ++ i )
1214         {
1215             cur = self -> ord2node [ i ] -> start_id;
1216             span = ( uint32_t ) ( cur - prev );
1217             if ( span > max_span )
1218                 max_span = span;
1219         }
1220 
1221         span = ( uint32_t ) ( self -> last - prev );
1222         if ( span > max_span )
1223             max_span = span;
1224 
1225         self -> max_span = max_span;
1226     }
1227 
1228     /* calculate span bits */
1229     total_span = self -> max_span;
1230     if ( total_span == 0 )
1231         pb -> span_bits = 0;
1232     else for ( total_span >>= 1, pb -> span_bits = 1, test_span = 1;
1233           test_span <= total_span;
1234           ++ pb -> span_bits, test_span <<= 1 )
1235         ( void ) 0;
1236 
1237     /* record these as header data */
1238     hdr -> id_bits = pb -> id_bits;
1239     hdr -> span_bits = pb -> span_bits;
1240 
1241     /* zero trailing junk */
1242     hdr -> align [ 0 ] = hdr -> align [ 1 ] = 0;
1243 }
1244 
1245 #else
1246 
1247 static
KTrieIndexPersistHdr_v3_v4(KTrieIndex_v2 * self,PersistTrieData * pb)1248 void KTrieIndexPersistHdr_v3_v4 ( KTrieIndex_v2 *self, PersistTrieData *pb )
1249 {
1250     KPTrieIndexHdr_v3 *hdr;
1251 
1252     uint64_t total_id, test_id;
1253     uint32_t total_span, test_span;
1254 
1255     pb -> pos = 0;
1256 
1257     hdr = ( KPTrieIndexHdr_v3* ) pb -> buffer;
1258     pb -> marker = sizeof * hdr;
1259 
1260     /* stamp version header */
1261     KDBHdrInit(&hdr->dad.h, KDBINDEXVERS);
1262     hdr->dad.index_type = kitText;
1263 
1264     /* store first and last ids */
1265     pb -> first = self -> first;
1266     hdr -> first = self -> first;
1267     hdr -> last = self -> last;
1268 
1269     /* calculate id bits - notice that
1270        total_id gets right shifted so that
1271        the loop is guaranteed to exit */
1272     total_id = self -> last - self -> first;
1273     if ( total_id == 0 )
1274         pb -> id_bits = 0;
1275     else for ( total_id >>= 1, pb -> id_bits = 1, test_id = 1;
1276           test_id <= total_id;
1277           ++ pb -> id_bits, test_id <<= 1 )
1278         ( void ) 0;
1279 
1280     /* if we have maintained a projection index,
1281        calculate max span now */
1282     if ( self -> ord2node != NULL )
1283     {
1284         uint32_t i, span, max_span;
1285         int64_t cur, prev = self -> first;
1286         for ( i = max_span = 1; i < self -> count; prev = cur, ++ i )
1287         {
1288             cur = self -> ord2node [ i ] -> start_id;
1289             span = ( uint32_t ) ( cur - prev );
1290             if ( span > max_span )
1291                 max_span = span;
1292         }
1293 
1294         span = ( uint32_t ) ( self -> last - prev );
1295         if ( span > max_span )
1296             max_span = span;
1297 
1298         self -> max_span = max_span;
1299     }
1300 
1301     /* calculate span bits */
1302     total_span = self -> max_span;
1303     if ( total_span == 0 )
1304         pb -> span_bits = 0;
1305     else for ( total_span >>= 1, pb -> span_bits = 1, test_span = 1;
1306           test_span <= total_span;
1307           ++ pb -> span_bits, test_span <<= 1 )
1308         ( void ) 0;
1309 
1310     /* record these as header data */
1311     hdr -> id_bits = pb -> id_bits;
1312     hdr -> span_bits = pb -> span_bits;
1313 
1314     /* zero trailing junk */
1315     hdr -> align [ 0 ] = hdr -> align [ 1 ] = 0;
1316 }
1317 
1318 #endif
1319 
1320 static
KTrieIndexPersistTrie_v2(const KTrieIndex_v2 * self,PersistTrieData * pb)1321 rc_t KTrieIndexPersistTrie_v2 ( const KTrieIndex_v2 *self, PersistTrieData *pb )
1322 {
1323     rc_t rc;
1324 
1325     /* persist the trie to file,
1326        using tree-internal key storage,
1327        capture the image size in pb */
1328     if ( self -> ord2node != NULL )
1329     {
1330         pb -> node_data_size = ( pb -> id_bits + 7 ) >> 3;
1331         rc = TriePersist ( & self -> key2id, & pb -> ptt_size,
1332             false, KTrieIndexWrite_v2, pb, KTrieIndexAux_v2_s1, pb );
1333     }
1334     else
1335     {
1336         pb -> node_data_size = ( pb -> id_bits + pb -> span_bits + 7 ) >> 3;
1337         rc = TriePersist ( & self -> key2id, & pb -> ptt_size,
1338             false, KTrieIndexWrite_v2, pb, KTrieIndexAux_v2_s2, pb );
1339     }
1340 
1341     if ( rc == 0 && pb -> marker != 0 )
1342     {
1343         size_t num_writ;
1344         rc = KFileWrite ( pb -> f, pb -> pos,
1345             pb -> buffer, pb -> marker, & num_writ );
1346         if ( rc == 0 && num_writ != pb -> marker )
1347             rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
1348     }
1349 
1350     return rc;
1351 }
1352 
1353 
1354 static
KTrieIndexPersistProjContig_v2(const KTrieIndex_v2 * self,PersistTrieData * pb,PTrie * tt,uint32_t * ord2node)1355 rc_t KTrieIndexPersistProjContig_v2 ( const KTrieIndex_v2 *self,
1356     PersistTrieData *pb, PTrie *tt, uint32_t *ord2node )
1357 {
1358     uint32_t i, j, nid;
1359     int64_t id = self -> first;
1360     for ( i = j = nid = 0; i < self -> count; ++ id, ++ j, ++ i )
1361     {
1362         const KTrieIdxNode_v2_s1 *node = self -> ord2node [ i ];
1363 
1364         /* back fill repeats */
1365         for ( ; id < node -> start_id; ++ id )
1366             ord2node [ j ++ ] = nid;
1367 
1368         /* check for a hole in id space */
1369         if ( node -> n . key . size == 0 )
1370             nid = 0;
1371         else
1372         {
1373             PTNode pn;
1374             nid = PTrieFind ( tt, & node -> n . key, & pn, NULL, NULL );
1375             if ( nid == 0 )
1376                 return RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
1377         }
1378 
1379         /* record nid for i at j */
1380         ord2node [ j ] = nid;
1381     }
1382 
1383     /* finish off trailing span */
1384     for ( ; id <= self -> last; ++ id )
1385         ord2node [ j ++ ] = nid;
1386 
1387     return 0;
1388 }
1389 
1390 static
KTrieIndexPersistProjSparse_v2(const KTrieIndex_v2 * self,PersistTrieData * pb,PTrie * tt,uint32_t * ord2node,bitsz_t * psize)1391 rc_t KTrieIndexPersistProjSparse_v2 ( const KTrieIndex_v2 *self,
1392     PersistTrieData *pb, PTrie *tt, uint32_t *ord2node, bitsz_t *psize )
1393 {
1394     uint32_t i, nid;
1395     int64_t *id2ord = ( void* ) & ord2node [ self -> count ];
1396     for ( i = 0; i < self -> count; ++ i )
1397     {
1398         const KTrieIdxNode_v2_s1 *node = self -> ord2node [ i ];
1399 
1400         /* record negated id for i - see 1st derivative below */
1401         id2ord [ i ] = - node -> start_id;
1402 
1403         /* check for a hole in id space */
1404         if ( node -> n . key . size == 0 )
1405             nid = 0;
1406         else
1407         {
1408             PTNode pn;
1409             nid = PTrieFind ( tt, & node -> n . key, & pn, NULL, NULL );
1410             if ( nid == 0 )
1411                 return RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
1412         }
1413 
1414         /* record nid for i */
1415         ord2node [ i ] = nid;
1416     }
1417 
1418     /* produce first derivative of ids
1419        for any given pair, the 1st derivative is generally
1420        right - left, which is usually stored right, such that
1421        we start at the end and move left toward the start, i.e.
1422 
1423        right -= left and move left
1424 
1425        in this case, we want to eliminate the leading 0
1426        and shift everything down, so we produce the result
1427        to the left side and move right toward end, but this
1428        requires more complicated arithmetic in order to preserve
1429        right - left, i.e.
1430 
1431        left = right - left and move right
1432 
1433        to avoid this arithmetic, the ids were stored negated above
1434        which converts the operation into
1435 
1436        left -= right and move right
1437     */
1438     for ( i = 1; i < self -> count; ++ i )
1439         id2ord [ i - 1 ] -= id2ord [ i ];
1440 
1441     /* pack from 64 to span-bits */
1442     return Pack ( 64, pb -> span_bits, id2ord, ( size_t ) ( self -> count - 1 ) << 3,
1443                 NULL, id2ord, 0, ( bitsz_t ) self -> count << 6, psize );
1444 }
1445 
1446 #if KDBINDEXVERS == 2
1447 
1448 static
KTrieIndexPersistProj_v2(const KTrieIndex_v2 * self,PersistTrieData * pb)1449 rc_t KTrieIndexPersistProj_v2 ( const KTrieIndex_v2 *self, PersistTrieData *pb )
1450 {
1451     rc_t rc = 0;
1452     void * addr;
1453     size_t map_size;
1454     uint64_t file_size;
1455     size_t num_to_read;
1456     uint64_t num_ids;
1457     bool is_sparse;
1458 
1459     /* there must be something to do */
1460     if ( self -> count == 0 || self -> ord2node == NULL )
1461         return 0;
1462 
1463     /* calculate what kind of projection strategy to use:
1464        when avg ( id span ) <= 2.0, just use a straight array.
1465        otherwise, use two arrays: first for node ids and last
1466        being 1st derivative of positional start_ids.
1467 
1468        the calculation of the ratio would be
1469          num_ids = self -> last - self -> first + 1;
1470          ratio = num_ids / self -> count;
1471          if ( ratio <= 2.0 )
1472              use 1-1
1473          else
1474              use sparse
1475 
1476        by reorganizing the comparison, we get
1477          if ( num_ids <= 2 * self -> count )...
1478     */
1479     num_ids = self -> last - self -> first + 1;
1480     if ( num_ids <= ( ( uint64_t ) self -> count << 1 ) )
1481     {
1482         /* store 1-1 projection index */
1483         is_sparse = false;
1484 
1485         /* map size is 4 bytes per id */
1486         map_size = pb -> ptt_size + ( ( size_t ) num_ids << 2 );
1487     }
1488     else
1489     {
1490         /* store sparse projection index */
1491         is_sparse = true;
1492 
1493         /* map size for node ids is 4 bytes per slot */
1494         map_size = pb -> ptt_size + ( ( size_t ) self -> count << 2 );
1495 
1496         /* map size for 1st derivative ids is initially 8 bytes per slot
1497            used initially to store full ids and then reduced 1st deriv. */
1498         map_size += ( size_t ) self -> count << 3;
1499     }
1500 
1501     /* add in 4 bytes for count */
1502     map_size += 4;
1503 
1504     /* create an updatable region spanning from end of header,
1505        starting from PTrie and extending to end of projection index */
1506     addr = malloc ( map_size );
1507     if ( addr == NULL )
1508         rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
1509     else
1510     {
1511         size_t num_read;
1512 
1513         rc = KFileSize ( pb -> f, & file_size );
1514         num_to_read = file_size - sizeof ( KPTrieIndexHdr_v2 );
1515         if ( rc == 0 )
1516         {
1517             rc = KFileReadAll ( pb -> f, sizeof ( KPTrieIndexHdr_v2 ), addr,
1518 			     num_to_read, & num_read );
1519             if ( rc == 0 )
1520             {
1521                 if ( num_read != num_to_read )
1522                 {
1523                     rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
1524                 }
1525             }
1526         }
1527         if ( rc != 0 )
1528             free ( addr );
1529     }
1530 
1531     if ( rc == 0 )
1532     {
1533         size_t num_writ;
1534         /* inflate the PTrie */
1535         PTrie *tt;
1536         rc = PTrieMakeOrig ( & tt, addr, pb -> ptt_size );
1537         if ( rc == 0 )
1538         {
1539             uint32_t *ord2node;
1540             assert ( pb -> ptt_size == PTrieSize ( tt ) );
1541             assert ( self -> count >= PTrieCount ( tt ) );
1542             ord2node = ( void* ) ( ( char* ) addr + pb -> ptt_size );
1543             assert ( ( ( size_t ) ord2node & 3 ) == 0 );
1544 
1545             /* set count */
1546             * ord2node ++ = self -> count;
1547 
1548             if ( ! is_sparse )
1549                 rc = KTrieIndexPersistProjContig_v2 ( self, pb, tt, ord2node );
1550             else
1551             {
1552                 bitsz_t psize;
1553                 rc = KTrieIndexPersistProjSparse_v2 ( self, pb, tt, ord2node, & psize );
1554                 if ( rc == 0 )
1555                 {
1556                     map_size -= ( size_t ) self -> count << 3;
1557                     map_size += ( psize + 7 ) >> 3;
1558                 }
1559             }
1560 
1561             /* done with pttree */
1562             PTrieWhack ( tt );
1563         }
1564         rc = KFileWrite ( pb -> f, file_size,
1565 			  (uint8_t*)addr + num_to_read,  map_size - num_to_read, & num_writ );
1566         if ( rc == 0  &&  num_writ != map_size - num_to_read )
1567             rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
1568         free ( addr );
1569     }
1570 
1571     return rc;
1572 }
1573 
1574 #else
1575 
1576 static
KTrieIndexPersistProj_v3(const KTrieIndex_v2 * self,PersistTrieData * pb)1577 rc_t KTrieIndexPersistProj_v3 ( const KTrieIndex_v2 *self, PersistTrieData *pb )
1578 {
1579     rc_t rc = 0;
1580     void * addr;
1581     size_t map_size;
1582     uint64_t file_size;
1583     size_t num_to_read;
1584     uint64_t num_ids;
1585     bool is_sparse;
1586 
1587     /* there must be something to do */
1588     if ( self -> count == 0 || self -> ord2node == NULL )
1589         return 0;
1590 
1591     /* calculate what kind of projection strategy to use:
1592        when avg ( id span ) <= 2.0, just use a straight array.
1593        otherwise, use two arrays: first for node ids and last
1594        being 1st derivative of positional start_ids.
1595 
1596        the calculation of the ratio would be
1597          num_ids = self -> last - self -> first + 1;
1598          ratio = num_ids / self -> count;
1599          if ( ratio <= 2.0 )
1600              use 1-1
1601          else
1602              use sparse
1603 
1604        by reorganizing the comparison, we get
1605          if ( num_ids <= 2 * self -> count )...
1606     */
1607     num_ids = self -> last - self -> first + 1;
1608     if ( num_ids <= ( ( uint64_t ) self -> count << 1 ) )
1609     {
1610         /* store 1-1 projection index */
1611         is_sparse = false;
1612 
1613         /* map size is 4 bytes per id */
1614         map_size = pb -> ptt_size + ( ( size_t ) num_ids << 2 );
1615     }
1616     else
1617     {
1618         /* store sparse projection index */
1619         is_sparse = true;
1620 
1621         /* map size for node ids is 4 bytes per slot */
1622         map_size = pb -> ptt_size + ( ( size_t ) self -> count << 2 );
1623 
1624         /* map size for 1st derivative ids is initially 8 bytes per slot
1625            used initially to store full ids and then reduced 1st deriv. */
1626         map_size += ( size_t ) self -> count << 3;
1627     }
1628 
1629     /* add in 4 bytes for count */
1630     map_size += 4;
1631 
1632     rc = KFileSize ( pb -> f, & file_size );
1633     if ( rc == 0 )
1634     {
1635         /* create an updatable region spanning from end of header,
1636            starting from PTrie and extending to end of projection index */
1637         addr = malloc ( map_size );
1638         if ( addr == NULL )
1639             rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
1640         else
1641         {
1642             size_t num_read;
1643             num_to_read = file_size - sizeof ( KPTrieIndexHdr_v3 );
1644             rc = KFileReadAll ( pb -> f, sizeof ( KPTrieIndexHdr_v3 ),
1645                 addr, num_to_read, & num_read );
1646             if ( rc != 0 )
1647                 free ( addr );
1648             else if ( num_read != num_to_read )
1649             {
1650                 free ( addr );
1651                 rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
1652             }
1653         }
1654     }
1655 
1656     if ( rc == 0 )
1657     {
1658         size_t num_writ;
1659         /* inflate the PTrie */
1660         PTrie *tt;
1661 #if KDBINDEXVERS > 3
1662         rc = PTrieMake ( & tt, addr, pb -> ptt_size, self -> pt . byteswap );
1663 #else
1664         rc = PTrieMakeOrig ( & tt, addr, pb -> ptt_size, self -> pt . byteswap );
1665 #endif
1666         if ( rc == 0 )
1667         {
1668             uint32_t *ord2node;
1669             assert ( pb -> ptt_size == PTrieSize ( tt ) );
1670             assert ( self -> count >= PTrieCount ( tt ) );
1671             ord2node = ( void* ) ( ( char* ) addr + pb -> ptt_size );
1672             assert ( ( ( size_t ) ord2node & 3 ) == 0 );
1673 
1674             /* set count */
1675             * ord2node ++ = self -> count;
1676 
1677             if ( ! is_sparse )
1678                 rc = KTrieIndexPersistProjContig_v2 ( self, pb, tt, ord2node );
1679             else
1680             {
1681                 bitsz_t psize;
1682                 rc = KTrieIndexPersistProjSparse_v2 ( self, pb, tt, ord2node, & psize );
1683                 if ( rc == 0 )
1684                 {
1685                     map_size -= ( size_t ) self -> count << 3;
1686                     map_size += ( psize + 7 ) >> 3;
1687                 }
1688             }
1689 
1690             /* done with pttree */
1691             PTrieWhack ( tt );
1692 
1693             if ( rc == 0 )
1694             {
1695                 rc = KFileWrite ( pb -> f, file_size,
1696                      ( uint8_t* ) addr + num_to_read,  map_size - num_to_read, & num_writ );
1697 
1698                 if ( rc == 0  &&  num_writ != map_size - num_to_read )
1699                     rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
1700             }
1701         }
1702 
1703         free ( addr );
1704     }
1705 
1706     return rc;
1707 }
1708 
1709 #endif
1710 
1711 static
KTrieIndexCreateMD5Wrapper(KDirectory * dir,KFile ** fp,KMD5File ** wrapper,char relpath[256],const char md5_relpath[260])1712 rc_t KTrieIndexCreateMD5Wrapper ( KDirectory *dir, KFile ** fp, KMD5File ** wrapper,
1713     char relpath [ 256 ], const char md5_relpath [ 260 ] )
1714 {
1715     /* create the md5 file for read/write */
1716     KFile *f;
1717     rc_t rc = KDirectoryCreateFile ( dir, & f, true,
1718                                       0664, kcmInit, "%s", md5_relpath );
1719     if ( rc == 0 )
1720     {
1721         /* create an md5sum formatter */
1722         KMD5SumFmt *fmt;
1723         rc = KMD5SumFmtMakeUpdate ( & fmt, f );
1724         if ( rc == 0 )
1725         {
1726             int dot_pos;
1727 
1728             /* convert relative path to a leaf */
1729             char *leaf = strrchr ( relpath, '/' );
1730             if ( leaf ++ == NULL )
1731                 leaf = relpath;
1732 
1733             /* trim off ".tmp" from "leaf"
1734                so that the format string reflects final name
1735                without the need to rename later */
1736             dot_pos = strlen ( leaf ) - 4;
1737             assert ( dot_pos > 0 );
1738             assert ( strcmp ( & leaf [ dot_pos ], ".tmp" ) == 0 );
1739             leaf [ dot_pos ] = 0;
1740 
1741             /* "fmt" now owns "f" */
1742             f = NULL;
1743 
1744             /* create a file wrapper that calculates and prints md5 */
1745             rc = KMD5FileMakeWrite ( wrapper, * fp, fmt, leaf );
1746 
1747             /* "wrapper" attaches a reference to "fmt", so we have to
1748                dump our reference regardless of "rc" */
1749             KMD5SumFmtRelease ( fmt );
1750 
1751             /* restore dot */
1752             leaf [ dot_pos ] = '.';
1753 
1754             /* if we succeeded, swap the "wrapper" for input file */
1755             if ( rc == 0 )
1756             {
1757                 * fp = KMD5FileToKFile ( * wrapper );
1758                 return 0;
1759             }
1760         }
1761 
1762         /* failed */
1763         KFileRelease ( f );
1764     }
1765 
1766     return rc;
1767 }
1768 
KTrieIndexPersist_v2(const KTrieIndex_v2 * self,bool proj,KDirectory * dir,const char * path,bool use_md5)1769 rc_t KTrieIndexPersist_v2 ( const KTrieIndex_v2 *self,
1770     bool proj, KDirectory *dir, const char *path, bool use_md5 )
1771 {
1772     rc_t rc;
1773     PersistTrieData pb;
1774 
1775     assert ( self != NULL );
1776     if ( self -> count == 0 )
1777         return 0;
1778 
1779     pb . fmd5 = NULL;
1780 
1781     /** Trie may have holes in serialization due to memory alignments ***/
1782     pb . buffer = calloc ( pb . bsize = 32 * 1024, 1 );
1783     if ( pb . buffer == NULL )
1784         rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
1785     else
1786     {
1787         /* determine the name of the file:
1788            it is created under a temporary name
1789            relative to the directory provided */
1790         char tmpname [ 256 ];
1791         rc = KDirectoryResolvePath ( dir, false,
1792             tmpname, sizeof tmpname, "%s.tmp", path );
1793         if ( rc == 0 )
1794         {
1795             /* create the name of temporary md5 file */
1796             char tmpmd5name [ 260 ];
1797             sprintf ( tmpmd5name, "%s.md5", tmpname );
1798 
1799             /* create the output file under temporary name
1800                ? why does it need read/write capabilities? */
1801             rc = KDirectoryCreateFile ( dir, & pb . f,
1802                                          true, 0664, kcmInit, "%s", tmpname );
1803             if ( rc == 0 )
1804             {
1805                 /* if using md5, wrap output file */
1806                 if ( use_md5 )
1807                     rc = KTrieIndexCreateMD5Wrapper ( dir, & pb . f, & pb . fmd5, tmpname, tmpmd5name );
1808                 if ( rc == 0 )
1809                 {
1810                     /* initial size */
1811                     pb . ptt_size = 0;
1812 #if KDBINDEXVERS == 2
1813                     KTrieIndexPersistHdr_v2 ( ( KTrieIndex_v2* ) self, & pb );
1814 #else
1815                     KTrieIndexPersistHdr_v3_v4 ( ( KTrieIndex_v2* ) self, & pb );
1816 #endif
1817 
1818                     /* persist tree */
1819                     rc = KTrieIndexPersistTrie_v2 ( self, & pb );
1820                     if ( rc == 0 )
1821                     {
1822                         /* persist projection table */
1823                         if ( proj )
1824                         {
1825 #if KDBINDEXVERS == 2
1826                             rc = KTrieIndexPersistProj_v2 ( self, & pb );
1827 #else
1828                             rc = KTrieIndexPersistProj_v3 ( self, & pb );
1829 #endif
1830                         }
1831                     }
1832                 }
1833 
1834                 /* close down the file now, success or not */
1835                 KFileRelease ( pb . f );
1836                 pb . f = NULL;
1837 
1838                 /* douse buffer and mark NULL in case of later attempt */
1839                 free ( pb . buffer );
1840                 pb . buffer = NULL;
1841 
1842                 /* rename the files on success */
1843                 if ( rc == 0 )
1844                 {
1845                     /* works even if "path" is absolute */
1846                     rc = KDirectoryRename ( dir, false, tmpname, path );
1847                     if ( rc == 0 )
1848                     {
1849                         int tmplen;
1850 
1851                         /* done if this was the only file to rename */
1852                         if ( ! use_md5 )
1853                             return 0;
1854 
1855                         /* use "tmpname" as a real "md5" name */
1856                         tmplen = strlen ( tmpname );
1857                         assert ( strcmp ( & tmpname [ tmplen - 4 ], ".tmp" ) == 0 );
1858                         strcpy ( & tmpname [ tmplen - 3 ], "md5" );
1859 
1860                         /* rename md5 file and be done on success */
1861                         rc = KDirectoryRename ( dir, false, tmpmd5name, tmpname );
1862                         if ( rc == 0 )
1863                             return 0;
1864 
1865                         /* failure here means we have a good index file,
1866                            but a bad md5 file, so convert "tmpname" to the
1867                            actual name of the index file */
1868                         tmpname [ tmplen - 4 ] = 0;
1869                     }
1870                 }
1871 
1872                 /* failed, remove the output files here */
1873                 KDirectoryRemove ( dir, false, "%s", tmpname );
1874                 if ( use_md5 )
1875                     KDirectoryRemove ( dir, false, "%s", tmpmd5name );
1876             }
1877         }
1878 
1879         /* douse buffer */
1880         free ( pb . buffer );
1881     }
1882 
1883     return rc;
1884 }
1885 
1886 
1887 /* whack whack */
KTrieIndexWhack_v2(KTrieIndex_v2 * self)1888 void KTrieIndexWhack_v2 ( KTrieIndex_v2 *self )
1889 {
1890     KPTrieIndexWhack_v2 ( & self -> pt );
1891     TrieWhack ( & self -> key2id, KTrieIdxNodeWhack_v2, NULL );
1892     free ( self -> ord2node );
1893 }
1894 
1895 /* initialize an index from file - can be NULL */
KTrieIndexOpen_v2(KTrieIndex_v2 * self,const KMMap * mm,bool byteswap)1896 rc_t KTrieIndexOpen_v2 ( KTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
1897 {
1898     rc_t rc;
1899     bool ptorig = false;
1900     const KDBHdr *hdr = NULL;
1901 
1902 #if ! KTRIE_ZEROS_KPTRIE
1903 #error "KTrie is supposed to zero KPTrie"
1904 #endif
1905     memset ( self, 0, sizeof * self );
1906     self -> pt . byteswap = byteswap;
1907 
1908     /* create an empty Trie index,
1909        with numeric but auto-expand character set,
1910        and a bucket size of 512, beyond which the
1911        tree will branch.
1912     */
1913     rc = TrieInit ( & self -> key2id, "0-9", 512, true );
1914     if ( rc != 0 )
1915         return rc;
1916 
1917     /* when opened for create, there will be no existing index */
1918     if ( mm == NULL )
1919         return 0;
1920 
1921     rc = KMMapAddrRead ( mm, (const void**) & hdr );
1922     if ( rc != 0 )
1923         return rc;
1924 
1925     switch ( hdr -> version )
1926     {
1927     case 1:
1928         rc = KPTrieIndexInitFromV1_v2 ( & self -> pt, mm, byteswap );
1929         break;
1930     case 2:
1931         rc = KPTrieIndexInit_v2 ( & self -> pt, mm, byteswap );
1932         break;
1933     case 3:
1934         ptorig = true;
1935     case 4:
1936         rc = KPTrieIndexInit_v3_v4 ( & self -> pt, mm, byteswap, ptorig );
1937         break;
1938     default:
1939         rc = RC(rcDB, rcIndex, rcConstructing, rcIndex, rcBadVersion);
1940     }
1941     /* open the prior index in persisted mode, but
1942        don't load it into the core-based Trie */
1943     if ( rc == 0 )
1944     {
1945         /* the file existed but was empty */
1946         if ( self -> pt . key2id == NULL )
1947         {
1948             self -> pt . mm = NULL;
1949             return 0;
1950         }
1951 
1952         /* retain a reference to memory map */
1953         rc = KMMapAddRef ( mm );
1954         if ( rc == 0 )
1955         {
1956             self -> pt . mm = mm;
1957             return 0;
1958         }
1959 
1960         /* self -> pt gets whacked below */
1961     }
1962 
1963     KTrieIndexWhack_v2 ( self );
1964     return rc;
1965 }
1966 
1967 /* KTrieIndexPopulate_v2
1968  */
1969 typedef struct KTrieIndexPopulateData_v2_s2 KTrieIndexPopulateData_v2_s2;
1970 struct KTrieIndexPopulateData_v2_s2
1971 {
1972     int64_t first;
1973 
1974     KTrieIndex_v2 *self;
1975     uint32_t count;
1976     rc_t rc;
1977 
1978     uint8_t id_bits;
1979     uint8_t span_bits;
1980 };
1981 
1982 static
KTrieIndexPopulate_v2_s2(PTNode * n,void * data)1983 bool CC KTrieIndexPopulate_v2_s2 ( PTNode *n, void *data )
1984 {
1985     const String *key;
1986     KTrieIndexPopulateData_v2_s2 *pb = data;
1987 
1988     int64_t id;
1989     size_t usize;
1990     uint32_t span;
1991 
1992     /* capture node data */
1993     assert ( n -> data . size == sizeof id );
1994     pb -> rc = Unpack ( pb -> id_bits, sizeof id * 8,
1995         n -> data . addr, 0, pb -> id_bits, NULL, & id, sizeof id, & usize );
1996     if ( pb -> rc == 0 )
1997     {
1998         pb -> rc = Unpack ( pb -> span_bits, sizeof span * 8,
1999             n -> data . addr, pb -> id_bits, pb -> span_bits, NULL, & span, sizeof span, & usize );
2000     }
2001     if ( pb -> rc != 0 )
2002         return true;
2003 
2004     pb -> rc = PTNodeMakeKey ( n, & key );
2005     if ( pb -> rc == 0 )
2006     {
2007         KTrieIdxNode_v2_s2 *node;
2008         pb -> rc = KTrieIdxNodeMake_v2_s2 ( & node, key, id + pb -> first );
2009         StringWhack ( ( String* ) key );
2010         if ( pb -> rc == 0 )
2011         {
2012             node -> span = span;
2013 
2014             pb -> rc = TrieInsert ( & pb -> self -> key2id, & node -> n );
2015             if ( pb -> rc == 0 )
2016             {
2017                 ++ pb -> count;
2018                 return false;
2019             }
2020 
2021             KTrieIdxNodeWhack_v2 ( & node -> n, NULL );
2022         }
2023     }
2024 
2025     return true;
2026 }
2027 
2028 /* KTrieIndexAttach_v2
2029  *  attach a keymap to an existing table
2030  */
2031 static
KTrieIndexPopulate_v2_s1(KTrieIndex_v2 * self,uint32_t i,int64_t idd)2032 rc_t KTrieIndexPopulate_v2_s1 ( KTrieIndex_v2 *self, uint32_t i, int64_t idd )
2033 {
2034     rc_t rc;
2035     uint32_t nid = self -> pt . ord2node [ i ];
2036 
2037     if ( i != 0 && self -> pt . ord2node [ i - 1 ] == nid )
2038         return 0;
2039 
2040     i = self -> count;
2041 
2042     if ( nid == 0 )
2043     {
2044         rc = KTrieIdxNodeMakeHole_v2_s1 ( & self -> ord2node [ i ], self -> pt . first + idd );
2045         if ( rc == 0 )
2046             self -> count = i + 1;
2047     }
2048     else
2049     {
2050         PTNode pnode;
2051         rc = PTrieGetNode ( self -> pt . key2id, & pnode, nid );
2052         if ( rc == 0 )
2053         {
2054             const String *key;
2055             rc = PTNodeMakeKey ( & pnode, & key );
2056             if ( rc == 0 )
2057             {
2058                 rc = KTrieIdxNodeMake_v2_s1 ( & self -> ord2node [ i ],
2059                     key, self -> pt . first + idd );
2060                 StringWhack ( ( String* ) key );
2061                 if ( rc == 0 )
2062                 {
2063                     rc = TrieInsert ( & self -> key2id, & self -> ord2node [ i ] -> n );
2064                     if ( rc != 0 )
2065                         KTrieIdxNodeWhack_v2 ( & self -> ord2node [ i ] -> n, NULL );
2066                     else
2067                         self -> count = i + 1;
2068                 }
2069             }
2070         }
2071     }
2072 
2073     return rc;
2074 }
2075 
KTrieIndexAttach_v2(KTrieIndex_v2 * self,bool proj)2076 rc_t KTrieIndexAttach_v2 ( KTrieIndex_v2 *self, bool proj )
2077 {
2078     rc_t rc = 0;
2079 
2080     /* if persisted index is empty, bail */
2081     if ( self -> count != 0 || self -> pt . count == 0 )
2082         return 0;
2083 
2084     /* see if we can use existing projection index */
2085     if ( proj && self -> pt . ord2node != NULL )
2086     {
2087         uint32_t i;
2088 
2089         self -> ord2node =
2090             malloc ( ( ( self -> pt . count + 4095 ) & ~ 4095 ) * sizeof self -> ord2node [ 0 ] );
2091         if ( self -> ord2node == NULL )
2092             return RC ( rcDB, rcIndex, rcUpdating, rcMemory, rcExhausted );
2093 
2094         /* we were called because our count is 0 */
2095         assert ( self -> count == 0 );
2096 
2097         /* handle variant */
2098         assert ( self -> pt . variant == 0 || self -> pt . id2ord . v8 != NULL );
2099         switch ( self -> pt . variant )
2100         {
2101         case 0:  /* 1-1 id to name */
2102             for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2103                 rc = KTrieIndexPopulate_v2_s1 ( self, i, i );
2104             break;
2105         case 1:  /* sparse 8-bit   */
2106             for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2107                 rc = KTrieIndexPopulate_v2_s1 ( self, i, self -> pt . id2ord . v8 [ i ] );
2108             break;
2109         case 2:  /* sparse 16-bit  */
2110             for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2111                 rc = KTrieIndexPopulate_v2_s1 ( self, i, self -> pt . id2ord . v16 [ i ] );
2112             break;
2113         case 3:  /* sparse 32-bit  */
2114             for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2115                 rc = KTrieIndexPopulate_v2_s1 ( self, i, self -> pt . id2ord . v32 [ i ] );
2116             break;
2117         case 4:  /* sparse 64-bit  */
2118             for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2119                 rc = KTrieIndexPopulate_v2_s1 ( self, i, self -> pt . id2ord . v64 [ i ] );
2120             break;
2121         }
2122 
2123         if ( rc != 0 )
2124         {
2125             for ( i = self -> count; i > 0; )
2126                 KTrieIdxNodeWhack_v2 ( & self -> ord2node [ -- i ] -> n, NULL );
2127             free ( self -> ord2node ), self -> ord2node = NULL;
2128             return rc;
2129         }
2130     }
2131     else
2132     {
2133         KTrieIndexPopulateData_v2_s2 pb;
2134         pb . first = self -> pt . first;
2135         pb . self = self;
2136         pb . count = 0;
2137         pb . rc = 0;
2138         PTrieDoUntil ( self -> pt . key2id, KTrieIndexPopulate_v2_s2, & pb );
2139         if ( pb . rc == 0 && pb . count != self -> pt . count )
2140             return RC ( rcDB, rcIndex, rcUpdating, rcIndex, rcCorrupt );
2141         self -> count = pb . count;
2142     }
2143 
2144     /* record known dimensions */
2145     self -> first = self -> pt . first;
2146     self -> last = self -> pt . last;
2147 
2148     /* should be able to drop persisted copy now */
2149     KPTrieIndexWhack_v2 ( & self -> pt );
2150 
2151     return 0;
2152 }
2153 
KTrieIndexInsert_v2(KTrieIndex_v2 * self,bool proj,const char * str,int64_t id)2154 rc_t KTrieIndexInsert_v2 ( KTrieIndex_v2 *self,
2155     bool proj, const char *str, int64_t id )
2156 {
2157     rc_t rc;
2158     String key;
2159     void *ord2node;
2160     uint32_t count;
2161 
2162 #if DISABLE_PROJ
2163     proj = false;
2164 #endif
2165 
2166     /* get the number of nodes in proj index or Trie.
2167        the persisted tree is only loaded into the in-core
2168        tree for edits ( insert/delete ), so the counts
2169        may differ. also, when in projection mode, the
2170        count refers to the number of array slots, which
2171        can be > the number of Trie nodes if there are
2172        holes in the id space. when not projecting, count
2173        is exactly the number of nodes in the Trie.
2174     */
2175     count = self -> count;
2176 
2177     /* detect first modification */
2178     if ( self -> count == 0 )
2179     {
2180         /* detect persisted data */
2181         if ( self -> pt . key2id != NULL )
2182         {
2183             /* load persisted data into core */
2184             rc = KTrieIndexAttach_v2 ( self, proj );
2185             if ( rc != 0 )
2186                 return rc;
2187 
2188             /* should have loaded everything */
2189             assert ( self -> count != 0 );
2190             count = self -> count;
2191         }
2192     }
2193 
2194     /* v2 only allows increasing ids
2195        they don't have to be contiguous,
2196        but they cannot repeat and cannot decrease */
2197     else if ( id <= self -> last )
2198         return RC ( rcDB, rcIndex, rcInserting, rcConstraint, rcViolated );
2199 
2200     /* convert key to String */
2201     StringInitCString ( & key, str );
2202 
2203     /* insertion strategy depends upon projection index */
2204     if ( proj )
2205     {
2206         KTrieIdxNode_v2_s1 *node;
2207 
2208         /* check for extension of last node */
2209         if ( count != 0 )
2210         {
2211             /* a non-zero count implies nodes in projection array */
2212             assert ( self -> ord2node != NULL );
2213 
2214             /* get last node */
2215             node = self -> ord2node [ count - 1 ];
2216             assert ( node != NULL );
2217 
2218             /* if the keys match, this is an update to the node */
2219             if ( StringEqual ( & key, & node -> n . key ) )
2220             {
2221                 /* this must be an extension of range */
2222                 if ( id != self -> last + 1 )
2223                     return RC ( rcDB, rcIndex, rcInserting, rcConstraint, rcViolated );
2224 
2225                 /* extend and done */
2226                 self -> last = id;
2227                 return 0;
2228             }
2229 
2230             /* not last node - create a hole if needed */
2231             if ( id != self -> last + 1 )
2232             {
2233                 /* extend array if needed
2234                    should never have to handle initial insert,
2235                    but would be happy to do so if asked */
2236                 if ( ( count & 4095 ) == 0 )
2237                 {
2238                     ord2node = realloc ( self -> ord2node,
2239                         ( count + 4096 ) * sizeof self -> ord2node [ 0 ] );
2240                     if ( ord2node == NULL )
2241                         return RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
2242                     self -> ord2node = ord2node;
2243                 }
2244 
2245                 /* create NULL mapping */
2246                 rc = KTrieIdxNodeMakeHole_v2_s1 ( & node, self -> last + 1 );
2247                 if ( rc != 0 )
2248                     return rc;
2249 
2250                 /* NB - this will cause count to be > num_nodes in Trie */
2251                 self -> ord2node [ count ++ ] = node;
2252             }
2253         }
2254 
2255         /* make a new mapping starting with id */
2256         rc = KTrieIdxNodeMake_v2_s1 ( & node, & key, id );
2257         if ( rc == 0 )
2258         {
2259             /* attempt insertion */
2260             rc = TrieInsertUnique ( & self -> key2id, & node -> n, NULL );
2261             if ( rc == 0 )
2262             {
2263                 /* create or extend projection array */
2264                 if ( ( count & 4095 ) == 0 )
2265                 {
2266                     ord2node = realloc ( self -> ord2node,
2267                         ( count + 4096 ) * sizeof self -> ord2node [ 0 ] );
2268                     if ( ord2node == NULL )
2269                         rc = RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
2270                     else
2271                         self -> ord2node = ord2node;
2272                 }
2273 
2274                 if ( rc == 0 )
2275                 {
2276                     /* set/extend range, detecting first insertion */
2277                     self -> last = id;
2278                     if ( count == 0 )
2279                         self -> first = id;
2280 
2281                     /* project */
2282                     self -> ord2node [ count ] = node;
2283                     self -> count = count + 1;
2284 
2285                     /* insertion complete */
2286                     return 0;
2287                 }
2288 
2289                 /* remove failed insertion */
2290                 TrieUnlink ( & self -> key2id, & node -> n );
2291             }
2292 
2293             /* clean up new node */
2294             KTrieIdxNodeWhack_v2 ( & node -> n, NULL );
2295         }
2296 
2297         /* clean up insertion of hole */
2298         if ( count != self -> count )
2299         {
2300             assert ( count - 1 == self -> count );
2301             KTrieIdxNodeWhack_v2 ( & self -> ord2node [ count - 1 ] -> n, NULL );
2302         }
2303     }
2304     else
2305     {
2306         KTrieIdxNode_v2_s2 *node;
2307 
2308         /* make a new mapping starting with id and a span of 1 */
2309         rc = KTrieIdxNodeMake_v2_s2 ( & node, & key, id );
2310         if ( rc == 0 )
2311         {
2312             /* attempt insertion */
2313             KTrieIdxNode_v2_s2 *exist;
2314             rc = TrieInsertUnique ( & self -> key2id, & node -> n, ( TNode** ) & exist );
2315             if ( rc == 0 )
2316             {
2317                 /* set/extend range, detecting first insertion */
2318                 if ( count == 0 )
2319                 {
2320                     self -> max_span = 1;
2321                     self -> first = id;
2322                 }
2323                 self -> last = id;
2324 
2325                 /* insertion complete */
2326                 self -> count = count + 1;
2327                 return 0;
2328             }
2329 
2330             /* clean up new node */
2331             KTrieIdxNodeWhack_v2 ( & node -> n, NULL );
2332 
2333             /* check existing for proper extension */
2334             if ( exist != NULL )
2335             {
2336                 if ( id == exist -> start_id + exist -> span )
2337                 {
2338                     assert ( count != 0 );
2339 
2340                     /* we already know id > last
2341                        and that it boarders the range of "exist"
2342                        so it must be last + 1 */
2343                     assert ( id - 1 == self -> last );
2344                     self -> last = id;
2345 
2346                     /* extend the span of "exist" */
2347                     ++ exist -> span;
2348                     if ( exist -> span > self -> max_span )
2349                         self -> max_span = exist -> span;
2350 
2351                     return 0;
2352                 }
2353             }
2354         }
2355     }
2356 
2357     return rc;
2358 }
2359 
2360 /* drop string from trie and all mappings */
KTrieIndexDelete_v2(KTrieIndex_v2 * self,bool proj,const char * str)2361 rc_t KTrieIndexDelete_v2 ( KTrieIndex_v2 *self, bool proj, const char *str )
2362 {
2363     rc_t rc;
2364     String key;
2365     TNode *tnode;
2366     /* uint32_t count; */
2367 
2368 #if DISABLE_PROJ
2369     proj = false;
2370 #endif
2371 
2372     /* detect first modification */
2373     /* count = self -> count; */
2374     if ( self -> count != 0 )
2375     {
2376         /* detect persisted data */
2377         if ( self -> pt . key2id != NULL )
2378         {
2379             rc = KTrieIndexAttach_v2 ( self, proj );
2380             if ( rc != 0 )
2381                 return rc;
2382         }
2383     }
2384 
2385     StringInitCString ( & key, str );
2386 
2387     /* interface states that all objects are dropped.
2388        however, this implementation only allows unique
2389        mappings to a contig range, so a simple find is sufficient */
2390     tnode = TrieFind ( & self -> key2id, & key );
2391     if ( tnode == NULL )
2392         return RC ( rcDB, rcIndex, rcRemoving, rcString, rcNotFound );
2393 
2394     /* remove from trie */
2395     TrieUnlink ( & self -> key2id, tnode );
2396 
2397     /* neutralize node in projection index */
2398     if ( proj )
2399     {
2400         KTrieIdxNode_v2_s1 *node = ( KTrieIdxNode_v2_s1* ) tnode;
2401         uint32_t ord = KTrieIndexNode2Ord_v2 ( self, node );
2402         if ( ord != 0 )
2403         {
2404             self -> ord2node [ ord - 1 ] -> n . key . size = 0;
2405             self -> ord2node [ ord - 1 ] -> n . key . len = 0;
2406             self -> ord2node [ ord - 1 ] -> key [ 0 ] = 0;
2407             return 0;
2408         }
2409     }
2410 
2411     /* whack node */
2412     KTrieIdxNodeWhack_v2 ( tnode, NULL );
2413 
2414     return 0;
2415 }
2416 
2417 /* map key to id range */
KTrieIndexFind_v2(const KTrieIndex_v2 * self,const char * str,int64_t * start_id,uint32_t * span,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data,bool convertFromV1)2418 rc_t KTrieIndexFind_v2 ( const KTrieIndex_v2 *self,
2419     const char *str, int64_t *start_id,
2420 #if V2FIND_RETURNS_SPAN
2421     uint32_t *span,
2422 #endif
2423     int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void *data, bool convertFromV1 )
2424 {
2425     /* search within in-core index */
2426     if ( self -> count != 0 )
2427     {
2428         const TNode *tnode;
2429 
2430         String key;
2431         StringInitCString ( & key, str );
2432 
2433         tnode = TrieFind ( & self -> key2id, & key );
2434         if ( tnode != NULL )
2435         {
2436             if ( self -> ord2node != NULL )
2437             {
2438                 const KTrieIdxNode_v2_s1 *node = ( const KTrieIdxNode_v2_s1* ) tnode;
2439                 uint32_t ord = KTrieIndexNode2Ord_v2 ( self, node );
2440                 if ( ord == 0 )
2441                     return RC ( rcDB, rcIndex, rcSelecting, rcIndex, rcCorrupt );
2442 
2443                 * start_id = node -> start_id;
2444 #if V2FIND_RETURNS_SPAN
2445                 if ( ord == self -> count )
2446                     * span = ( uint32_t ) ( self -> last - node -> start_id + 1 );
2447                 else
2448                     * span = ( uint32_t ) ( self -> ord2node [ ord ] -> start_id - node -> start_id );
2449 #endif
2450             }
2451             else
2452             {
2453                 const KTrieIdxNode_v2_s2 *node = ( const KTrieIdxNode_v2_s2* ) tnode;
2454                 * start_id = node -> start_id;
2455 #if V2FIND_RETURNS_SPAN
2456                 * span = node -> span;
2457 #endif
2458             }
2459 
2460             return 0;
2461         }
2462     }
2463 
2464     /* search within persisted index */
2465     else if ( self -> pt . key2id != NULL )
2466     {
2467         return KPTrieIndexFind_v2 ( & self -> pt, str, start_id,
2468 #if V2FIND_RETURNS_SPAN
2469                                     span,
2470 #endif
2471                                     custom_cmp, data, convertFromV1 );
2472     }
2473 
2474     return RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
2475 }
2476 
2477 /* projection index id to key-string */
2478 typedef struct KTrieIndexProjectData_v2 KTrieIndexProjectData_v2;
2479 struct KTrieIndexProjectData_v2
2480 {
2481     int64_t id;
2482     const KTrieIdxNode_v2_s2 *node;
2483 };
2484 
2485 static
KTrieIndexProjectScan_v2(TNode * n,void * data)2486 bool CC KTrieIndexProjectScan_v2 ( TNode *n, void *data )
2487 {
2488     KTrieIndexProjectData_v2 *pb = (KTrieIndexProjectData_v2 *)data;
2489     const KTrieIdxNode_v2_s2 *node = ( const KTrieIdxNode_v2_s2* ) n;
2490 
2491     if ( pb -> id >= node -> start_id &&
2492          pb -> id < node -> start_id + node -> span )
2493     {
2494         pb -> node = node;
2495         return true;
2496     }
2497 
2498     return false;
2499 }
2500 
KTrieIndexProject_v2(const KTrieIndex_v2 * self,int64_t id,int64_t * start_id,uint32_t * span,char * key_buff,size_t buff_size,size_t * actsize)2501 rc_t KTrieIndexProject_v2 ( const KTrieIndex_v2 *self,
2502     int64_t id,
2503 #if V2FIND_RETURNS_SPAN
2504      int64_t *start_id, uint32_t *span,
2505 #endif
2506     char *key_buff, size_t buff_size, size_t *actsize )
2507 {
2508     if ( self -> count != 0 )
2509     {
2510         if ( self -> ord2node != NULL )
2511         {
2512             uint32_t ord = KTrieIndexID2Ord_v2 ( self, id );
2513             if ( ord != 0 )
2514             {
2515                 const KTrieIdxNode_v2_s1 *node = self -> ord2node [ ord - 1 ];
2516 
2517                 if (actsize)
2518                     *actsize = node -> n . key . size;
2519                 if ( node -> n . key . size >= buff_size )
2520                     return RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
2521                 string_copy ( key_buff, buff_size,
2522                     node -> n . key . addr, node -> n . key . size );
2523                 * start_id = node -> start_id;
2524                 * span = ( ( ord == self -> count ) ?
2525                     ( self -> last + 1 ) : ( self -> ord2node [ ord ] -> start_id ) ) - node -> start_id;
2526                 return 0;
2527             }
2528         }
2529         else
2530         {
2531             KTrieIndexProjectData_v2 pb;
2532             pb . id = id;
2533             if ( TrieDoUntil ( & self -> key2id, KTrieIndexProjectScan_v2, & pb ) )
2534             {
2535                 const KTrieIdxNode_v2_s2 *node = pb . node;
2536 
2537                 if (actsize)
2538                     *actsize = node -> n . key . size;
2539                 if ( node -> n . key . size >= buff_size )
2540                     return RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
2541                 string_copy ( key_buff, buff_size,
2542                     node -> n . key . addr, node -> n . key . size );
2543                 * start_id = node -> start_id;
2544                 * span = node -> span;
2545                 return 0;
2546             }
2547         }
2548     }
2549 
2550     else if ( self -> pt . ord2node != NULL )
2551     {
2552         return KPTrieIndexProject_v2 ( & self -> pt, id,
2553 #if V2FIND_RETURNS_SPAN
2554             start_id, span,
2555 #endif
2556             key_buff, buff_size, actsize );
2557     }
2558 
2559     return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
2560 }
2561