1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <kdb/extern.h>
28 #include "index-priv.h"
29 #include "trieidx-priv.h"
30 #include <klib/ptrie.h>
31 #include <klib/text.h>
32 #include <kfs/directory.h>
33 #include <kfs/file.h>
34 #include <kfs/mmap.h>
35 #include <klib/pack.h>
36 #include <klib/rc.h>
37 #include <sysalloc.h>
38
39 #include <stdlib.h>
40 #include <limits.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <errno.h>
44 #include <byteswap.h>
45 #include <assert.h>
46
47 #define KTRIE_ZEROS_KPTRIE 1
48
49 /*--------------------------------------------------------------------------
50 * KPTrieIndex_v2
51 * persisted keymap
52 */
53
54
55 /* Init
56 * opens and initializes persisted keymap structure
57 */
58 static
KPTrieIndexInitID2Ord(KPTrieIndex_v2 * self,size_t in_size,int variant,int span_bits,int elem_bits)59 rc_t KPTrieIndexInitID2Ord ( KPTrieIndex_v2 *self, size_t in_size,
60 int variant, int span_bits, int elem_bits )
61 {
62 rc_t rc;
63 union
64 {
65 uint8_t *v8;
66 uint16_t *v16;
67 uint32_t *v32;
68 uint64_t *v64;
69 } dst;
70 size_t elem_bytes = elem_bits >> 3;
71 uint32_t scount = self -> count - 1;
72
73 assert ( self -> count != 0 );
74
75 if ( span_bits * scount > in_size * 8 )
76 return RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
77
78 dst . v8 = malloc ( self -> count * elem_bytes );
79 if ( dst . v8 == NULL )
80 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
81 else
82 {
83 size_t usize;
84 rc = Unpack ( span_bits, elem_bits,
85 & self -> ord2node [ self -> count ], 0,
86 span_bits * scount, NULL, & dst . v8 [ elem_bytes ],
87 scount * elem_bytes, & usize );
88 if ( rc == 0 )
89 {
90 uint32_t i;
91
92 self -> id2ord . v8 = dst . v8;
93 self -> variant = variant;
94
95 /* integrate to simple translation */
96 switch ( variant )
97 {
98 case 1:
99 dst . v8 [ 0 ] = 0;
100 for ( i = 0; i < scount; ++ i )
101 dst . v8 [ i + 1 ] += dst . v8 [ i ];
102 break;
103 case 2:
104 dst . v16 [ 0 ] = 0;
105 for ( i = 0; i < scount; ++ i )
106 dst . v16 [ i + 1 ] += dst . v16 [ i ];
107 break;
108 case 3:
109 dst . v32 [ 0 ] = 0;
110 for ( i = 0; i < scount; ++ i )
111 dst . v32 [ i + 1 ] += dst . v32 [ i ];
112 break;
113 case 4:
114 dst . v64 [ 0 ] = 0;
115 for ( i = 0; i < scount; ++ i )
116 dst . v64 [ i + 1 ] += dst . v64 [ i ];
117 break;
118 }
119
120 return 0;
121 }
122
123 free ( dst . v8 );
124 }
125
126 return rc;
127 }
128
129 static
KPTrieIndexExtractV1Range_v2(PTNode * n,void * data)130 void CC KPTrieIndexExtractV1Range_v2 ( PTNode *n, void *data )
131 {
132 KPTrieIndex_v2 *self = data;
133
134 /* capture node id */
135 uint32_t id;
136 assert ( n -> data . size == sizeof id );
137 memmove ( & id, n -> data . addr, sizeof id );
138
139 /* perform min/max */
140 if ( self -> count == 0 )
141 self -> first = self -> last = id;
142 else if ( id < self -> first )
143 self -> first = id;
144 else if ( id > self -> last )
145 self -> last = id;
146
147 ++ self -> count;
148 }
149
150 static
KPTrieIndexInitFromV1_v2(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap)151 rc_t KPTrieIndexInitFromV1_v2 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
152 {
153 KPTrieIndex_v1 v1;
154 rc_t rc = KPTrieIndexInit_v1 ( & v1, mm, byteswap );
155 if ( rc == 0 )
156 {
157 uint32_t *ord2node;
158 uint32_t total_id, test_id;
159 uint32_t i, id, id_bits, num_ids;
160
161 /* assume preservation of persisted projection index */
162 self -> byteswap = byteswap;
163
164 /* hopefully we got a projection index */
165 if ( v1 . id2node == NULL )
166 {
167 #if ! KTRIE_ZEROS_KPTRIE
168 self -> count = 0;
169 #endif
170 /* need to derive first and last from trie */
171 PTrieForEach ( v1 . key2id, KPTrieIndexExtractV1Range_v2, self );
172 if ( self -> count == 0 )
173 KPTrieIndexWhack_v1 ( & v1 );
174 else
175 {
176 /* take trie as-is */
177 self -> key2id = v1 . key2id;
178 self -> maxid = self -> last;
179 }
180
181 /* note that this assumes a span of 1 for
182 each id. there are no known uses of v1 without
183 a projection index, so this is unlikely to be important */
184
185 return 0;
186 }
187
188 /* take id range */
189 self -> first = v1 . first;
190 self -> last = self -> maxid = v1 . last;
191
192 /* count comes from trie as always */
193 self -> count = PTrieCount ( v1 . key2id );
194
195 /* detect empty trie */
196 if ( self -> count == 0 || self -> first > self -> last )
197 {
198 self -> first = self -> last = self -> maxid = 0;
199 return 0;
200 }
201
202 /* take trie as-is */
203 self -> key2id = v1 . key2id;
204
205 /* now decide whether to use 1-1 or sparse projection
206 if the number of slots is less than twice the number of nodes,
207 it is more efficient to store the nodes as a linear array,
208 and represent missing ids as nulls.
209 */
210 if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
211 {
212 /* take the old projection array as-is,
213 treating NULL node ids as holes */
214 self -> ord2node = v1 . id2node;
215 return 0;
216 }
217
218 /* need to create a new projection index */
219 self -> byteswap = false;
220
221 /* convert to sparse
222 calculate id bits - notice that
223 test_id gets left shifted so that
224 the loop is guaranteed to exit */
225 num_ids = ( uint32_t ) ( self -> last - self -> first + 1 );
226 for ( total_id = num_ids >> 1, id_bits = 1, test_id = 1;
227 test_id <= total_id;
228 ++ id_bits, test_id <<= 1 )
229 ( void ) 0;
230
231 /* determine variant */
232 if ( id_bits <= 8 )
233 {
234 /* allocate 4 bytes for new ord2node and 1 for id2ord */
235 uint8_t *id2ord = malloc ( self -> count * 5 );
236 if ( id2ord == NULL )
237 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
238 else
239 {
240 ord2node = ( uint32_t* ) & id2ord [ self -> count ];
241 self -> ord2node = ord2node;
242 self -> id2ord . v8 = id2ord;
243 self -> variant = 1;
244
245 /* walk across v1 table, looking at each id */
246 for ( i = id = 0; id < num_ids; ++ id )
247 {
248 /* detect non NULL node ids
249 and pretend they represent a contiguous
250 span with no holes in id space */
251 if ( v1 . id2node [ id ] != 0 )
252 {
253 /* prevent overwriting */
254 if ( i == self -> count )
255 {
256 rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
257 break;
258 }
259
260 /* record id and node for slot */
261 id2ord [ i ] = ( uint8_t ) id;
262 ord2node [ i ] = byteswap ? bswap_32 ( v1 . id2node [ id ] ) : v1 . id2node [ id ];
263 ++ i;
264 }
265 }
266 }
267 }
268 else if ( id_bits <= 16 )
269 {
270 uint16_t *id2ord = malloc ( self -> count * 6 );
271 if ( id2ord == NULL )
272 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
273 else
274 {
275 ord2node = ( uint32_t* ) & id2ord [ self -> count ];
276 self -> ord2node = ord2node;
277 self -> id2ord . v16 = id2ord;
278 self -> variant = 2;
279
280 for ( i = id = 0; id < num_ids; ++ id )
281 {
282 if ( v1 . id2node [ id ] != 0 )
283 {
284 if ( i == self -> count )
285 {
286 rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
287 break;
288 }
289
290 id2ord [ i ] = ( uint16_t ) id;
291 ord2node [ i ] = byteswap ? bswap_32 ( v1 . id2node [ id ] ) : v1 . id2node [ id ];
292 ++ i;
293 }
294 }
295 }
296 }
297 else
298 {
299 uint32_t *id2ord = malloc ( self -> count * 8 );
300 if ( id2ord == NULL )
301 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
302 else
303 {
304 ord2node = & id2ord [ self -> count ];
305 self -> ord2node = ord2node;
306 self -> id2ord . v32 = id2ord;
307 self -> variant = 3;
308
309 for ( i = id = 0; id < num_ids; ++ id )
310 {
311 if ( v1 . id2node [ id ] != 0 )
312 {
313 if ( i == self -> count )
314 {
315 rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
316 break;
317 }
318
319 id2ord [ i ] = id;
320 ord2node [ i ] = byteswap ? bswap_32 ( v1 . id2node [ id ] ) : v1 . id2node [ id ];
321 ++ i;
322 }
323 }
324 }
325 }
326
327 if ( rc == 0 )
328 {
329 if ( i == self -> count )
330 return 0;
331
332 rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
333 }
334
335 KPTrieIndexWhack_v1 ( & v1 );
336 }
337
338 return rc;
339 }
340
KPTrieIndexInit_v2(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap)341 rc_t KPTrieIndexInit_v2 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
342 {
343 /* get size of map, assumed to be size of file */
344 size_t size;
345 rc_t rc = KMMapSize ( mm, & size );
346 if ( rc == 0 )
347 {
348 const KPTrieIndexHdr_v2 *hdr;
349
350 #if ! KTRIE_ZEROS_KPTRIE
351 self -> mm = NULL;
352 self -> ord2node = NULL;
353 self -> id2ord . v32 = NULL;
354 self -> variant = 0;
355 #endif
356
357 /* ignore empty file */
358 if ( size == 0 )
359 {
360 #if ! KTRIE_ZEROS_KPTRIE
361 self -> first = self -> last = self -> maxid = 0;
362 self -> key2id = NULL;
363 self -> count = 0;
364 #endif
365 return 0;
366 }
367
368 /* have to have at least the base header */
369 if ( size < sizeof hdr -> dad )
370 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
371
372 rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
373 if ( rc == 0 )
374 {
375 uint16_t id_bits, span_bits;
376 /* recheck header size */
377 if ( size < sizeof * hdr )
378 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
379
380 if ( self -> byteswap )
381 {
382 self -> first = bswap_64(hdr -> first);
383 self -> last = self -> maxid = bswap_64(hdr -> last);
384 id_bits = bswap_16(hdr -> id_bits);
385 span_bits = bswap_16(hdr -> span_bits);
386 }
387 else
388 {
389 self -> first = hdr -> first;
390 self -> last = self -> maxid = hdr -> last;
391 id_bits = hdr -> id_bits;
392 span_bits = hdr -> span_bits;
393 }
394 self -> id_bits = ( uint8_t ) id_bits;
395 self -> span_bits = ( uint8_t ) span_bits;
396 self -> byteswap = byteswap;
397
398 /* try to create the pttree */
399 rc = PTrieMakeOrig ( & self -> key2id,
400 hdr + 1, size -= sizeof * hdr, byteswap );
401 if ( rc == 0 )
402 {
403 size_t ptsize = PTrieSize ( self -> key2id );
404 if ( ptsize <= size )
405 {
406 /* the count covers at least the number of trie nodes */
407 self -> count = PTrieCount ( self -> key2id );
408
409 /* it could be stored without projection */
410 if ( ptsize == size )
411 return 0;
412
413 /* calculate remaining bytes */
414 size -= ptsize;
415
416 /* there must be enough for an array of 4-byte node ids */
417 if ( size >= ( ( size_t ) self -> count << 2 ) )
418 {
419 /* take the persisted array as-is */
420 self -> ord2node = ( const uint32_t* )
421 ( ( const char* ) ( hdr + 1 ) + ptsize );
422
423 /* read the count */
424 if ( size >= 4 )
425 {
426 self -> count = * ( self -> ord2node ) ++;
427 size -= 4;
428 if ( byteswap )
429 self -> count = bswap_32 ( self -> count );
430 }
431
432 /* determine strategy from id span and count */
433 if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
434 {
435 /* must be contiguous */
436 self -> count = ( uint32_t ) ( self -> last - self -> first + 1 );
437
438 /* size should be exactly this number of slots */
439 if ( size == ( ( size_t ) self -> count << 2 ) )
440 return 0;
441
442 /* fall through to error return */
443 }
444 else if ( ( size == 4 && self -> count == 1 ) || size > ( ( size_t ) self -> count << 2 ) )
445 {
446 /* sparse */
447 size -= ( size_t ) self -> count << 2;
448
449 /* unpack id map */
450 if ( id_bits <= 8 )
451 rc = KPTrieIndexInitID2Ord ( self, size, 1, span_bits, 8 );
452 else if ( id_bits <= 16 )
453 rc = KPTrieIndexInitID2Ord ( self, size, 2, span_bits, 16 );
454 else if ( id_bits <= 32 )
455 rc = KPTrieIndexInitID2Ord ( self, size, 3, span_bits, 32 );
456 else
457 rc = KPTrieIndexInitID2Ord ( self, size, 4, span_bits, 64 );
458
459 /* done */
460 if ( rc == 0 )
461 return 0;
462
463 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
464 return rc;
465 }
466 }
467 }
468
469 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
470 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
471 }
472 }
473 }
474
475 return rc;
476 }
477
KPTrieIndexInit_v3_v4(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap,bool ptorig)478 rc_t KPTrieIndexInit_v3_v4 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap, bool ptorig )
479 {
480 /* get size of map, assumed to be size of file */
481 size_t size;
482 rc_t rc = KMMapSize ( mm, & size );
483 if ( rc == 0 )
484 {
485 const KPTrieIndexHdr_v3 *hdr;
486
487 #if ! KTRIE_ZEROS_KPTRIE
488 self -> mm = NULL;
489 self -> ord2node = NULL;
490 self -> id2ord . v32 = NULL;
491 self -> variant = 0;
492 #endif
493
494 /* ignore empty file */
495 if ( size == 0 )
496 {
497 #if ! KTRIE_ZEROS_KPTRIE
498 self -> first = self -> last = self -> maxid = 0;
499 self -> key2id = NULL;
500 self -> count = 0;
501 #endif
502 return 0;
503 }
504
505 /* have to have at least the base header */
506 if ( size < sizeof hdr -> dad )
507 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
508
509 rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
510 if ( rc == 0 )
511 {
512 uint16_t id_bits, span_bits;
513 /* recheck header size */
514 if ( size < sizeof * hdr )
515 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
516
517 if ( self -> byteswap )
518 {
519 self -> first = bswap_64(hdr -> first);
520 self -> last = self -> maxid = bswap_64(hdr -> last);
521 id_bits = bswap_16(hdr -> id_bits);
522 span_bits = bswap_16(hdr -> span_bits);
523 }
524 else
525 {
526 self -> first = hdr -> first;
527 self -> last = self -> maxid = hdr -> last;
528 id_bits = hdr -> id_bits;
529 span_bits = hdr -> span_bits;
530 }
531 self -> id_bits = ( uint8_t ) id_bits;
532 self -> span_bits = ( uint8_t ) span_bits;
533 self -> byteswap = byteswap;
534
535 /* try to create the pttree */
536 rc = ( ptorig ? PTrieMakeOrig : PTrieMake )
537 ( & self -> key2id, hdr + 1, size -= sizeof * hdr, byteswap );
538 if ( rc == 0 )
539 {
540 size_t ptsize = PTrieSize ( self -> key2id );
541 if ( ptsize <= size )
542 {
543 /* the count covers at least the number of trie nodes */
544 self -> count = PTrieCount ( self -> key2id );
545
546 /* it could be stored without projection */
547 if ( ptsize == size )
548 return 0;
549
550 /* calculate remaining bytes */
551 size -= ptsize;
552
553 /* there must be enough for an array of 4-byte node ids */
554 if ( size >= ( ( size_t ) self -> count << 2 ) )
555 {
556 /* take the persisted array as-is */
557 self -> ord2node = ( const uint32_t* )
558 ( ( const char* ) ( hdr + 1 ) + ptsize );
559
560 /* read the count */
561 if ( size >= 4 )
562 {
563 self -> count = * ( self -> ord2node ) ++;
564 size -= 4;
565 if ( byteswap )
566 self -> count = bswap_32 ( self -> count );
567 }
568
569 /* determine strategy from id span and count */
570 if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
571 {
572 /* must be contiguous */
573 self -> count = ( uint32_t ) ( self -> last - self -> first + 1 );
574
575 /* size should be exactly this number of slots */
576 if ( size == ( ( size_t ) self -> count << 2 ) )
577 return 0;
578
579 /* fall through to error return */
580 }
581 else if ( ( size == 4 && self -> count == 1 ) || size > ( ( size_t ) self -> count << 2 ) )
582 {
583 /* sparse */
584 size -= ( size_t ) self -> count << 2;
585
586 /* unpack id map */
587 if ( id_bits <= 8 )
588 rc = KPTrieIndexInitID2Ord ( self, size, 1, span_bits, 8 );
589 else if ( id_bits <= 16 )
590 rc = KPTrieIndexInitID2Ord ( self, size, 2, span_bits, 16 );
591 else if ( id_bits <= 32 )
592 rc = KPTrieIndexInitID2Ord ( self, size, 3, span_bits, 32 );
593 else
594 rc = KPTrieIndexInitID2Ord ( self, size, 4, span_bits, 64 );
595
596 /* done */
597 if ( rc == 0 )
598 return 0;
599
600 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
601 return rc;
602 }
603 }
604 }
605
606 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
607 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
608 }
609 }
610 }
611
612 return rc;
613 }
614
615 /* Whack
616 * closes down keymap
617 */
KPTrieIndexWhack_v2(KPTrieIndex_v2 * self)618 void KPTrieIndexWhack_v2 ( KPTrieIndex_v2 *self )
619 {
620 free ( ( void* ) self -> id2ord . v8 );
621 PTrieWhack ( self -> key2id );
622 KMMapRelease ( self -> mm );
623 memset ( self, 0, sizeof * self );
624 }
625
KPTrieIndexID2Ord_v2(const KPTrieIndex_v2 * self,int64_t id)626 uint32_t KPTrieIndexID2Ord_v2 ( const KPTrieIndex_v2 *self, int64_t id )
627 {
628 if ( id >= self -> first && id <= self -> maxid )
629 {
630 int64_t nid;
631 uint32_t left, right, ord, count = self -> count;
632
633 /* convert id either to a zero-based ord,
634 or else the translated id in id2ord */
635 id -= self -> first;
636
637 /* handle type of projection */
638 switch ( self -> variant )
639 {
640 case 0:
641 /* return one-based ord */
642 return ( uint32_t ) ( id + 1 );
643
644 case 1:
645 for ( left = 1, right = count; left <= right; )
646 {
647 ord = ( left + right ) >> 1;
648 nid = self -> id2ord . v8 [ ord - 1 ];
649 if ( id == nid )
650 return ord;
651
652 if ( id < nid )
653 {
654 right = ord - 1;
655 continue;
656 }
657 if ( ord == count )
658 return ord;
659
660 nid = self -> id2ord . v8 [ ord ];
661 if ( id < nid )
662 return ord;
663
664 left = ord + 1;
665 }
666 break;
667
668 case 2:
669 for ( left = 1, right = count; left <= right; )
670 {
671 ord = ( left + right ) >> 1;
672 nid = self -> id2ord . v16 [ ord - 1 ];
673 if ( id == nid )
674 return ord;
675
676 if ( id < nid )
677 {
678 right = ord - 1;
679 continue;
680 }
681 if ( ord == count )
682 return ord;
683
684 nid = self -> id2ord . v16 [ ord ];
685 if ( id < nid )
686 return ord;
687
688 left = ord + 1;
689 }
690 break;
691
692 case 3:
693 for ( left = 1, right = count; left <= right; )
694 {
695 ord = ( left + right ) >> 1;
696 nid = self -> id2ord . v32 [ ord - 1 ];
697 if ( id == nid )
698 return ord;
699
700 if ( id < nid )
701 {
702 right = ord - 1;
703 continue;
704 }
705 if ( ord == count )
706 return ord;
707
708 nid = self -> id2ord . v32 [ ord ];
709 if ( id < nid )
710 return ord;
711
712 left = ord + 1;
713 }
714 break;
715
716 case 4:
717 for ( left = 1, right = count; left <= right; )
718 {
719 ord = ( left + right ) >> 1;
720 nid = self -> id2ord . v64 [ ord - 1 ];
721 if ( id == nid )
722 return ord;
723
724 if ( id < nid )
725 {
726 right = ord - 1;
727 continue;
728 }
729 if ( ord == count )
730 return ord;
731
732 nid = self -> id2ord . v64 [ ord ];
733 if ( id < nid )
734 return ord;
735
736 left = ord + 1;
737 }
738 break;
739 }
740 }
741 return 0;
742 }
743
744 static
KPTrieIndexProject_v2(const KPTrieIndex_v2 * self,int64_t id,int64_t * start_id,uint32_t * span,char * key_buff,size_t buff_size,size_t * actsize)745 rc_t KPTrieIndexProject_v2 ( const KPTrieIndex_v2 *self,
746 int64_t id,
747 #if V2FIND_RETURNS_SPAN
748 int64_t *start_id, uint32_t *span,
749 #endif
750 char *key_buff, size_t buff_size, size_t *actsize )
751 {
752 uint32_t nid, ord = KPTrieIndexID2Ord_v2 ( self, id );
753 if ( ord != 0 )
754 {
755 assert ( start_id != NULL );
756 assert ( span != NULL );
757
758 nid = self -> ord2node [ ord - 1 ];
759
760 switch ( self -> variant )
761 {
762 case 0:
763 * start_id = id;
764 for ( ; ord < self -> count; ++ ord )
765 {
766 if ( nid != self -> ord2node [ ord ] )
767 break;
768 }
769 * span = ( uint32_t ) ( self -> first + ord - * start_id );
770 break;
771 case 1:
772 * start_id = self -> id2ord . v8 [ ord - 1 ];
773 * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
774 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v8 [ ord ] ) - * start_id );
775 *start_id += self -> first;
776 break;
777 case 2:
778 * start_id = self -> id2ord . v16 [ ord - 1 ];
779 * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
780 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v16 [ ord ] ) - * start_id );
781 *start_id += self -> first;
782 break;
783 case 3:
784 * start_id = self -> id2ord . v32 [ ord - 1 ];
785 * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
786 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v32 [ ord ] ) - * start_id );
787 *start_id += self -> first;
788 break;
789 case 4:
790 * start_id = self -> id2ord . v64 [ ord - 1 ];
791 * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
792 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v64 [ ord ] ) - * start_id );
793 *start_id += self -> first;
794 break;
795 }
796
797 if ( nid != 0 )
798 {
799 rc_t rc;
800 PTNode node;
801
802 if ( self -> byteswap )
803 nid = bswap_32 ( nid );
804
805 rc = PTrieGetNode ( self -> key2id, & node, nid );
806 if ( rc == 0 )
807 {
808 const String *key;
809 rc = PTNodeMakeKey ( & node, & key );
810 if ( rc == 0 )
811 {
812 if ( actsize != NULL )
813 * actsize = key -> size;
814 if ( key -> size >= buff_size )
815 rc = RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
816 else
817 string_copy ( key_buff, buff_size, key -> addr, key -> size );
818
819 StringWhack ( ( String* ) key );
820 }
821 }
822 return rc;
823 }
824 }
825
826 return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
827 }
828
829 /* Find
830 */
831 static
KPTrieIndexFind_v2(const KPTrieIndex_v2 * self,const char * str,int64_t * start_id,uint32_t * span,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data,bool convertFromV1)832 rc_t KPTrieIndexFind_v2 ( const KPTrieIndex_v2 *self,
833 const char *str, int64_t *start_id,
834 #if V2FIND_RETURNS_SPAN
835 uint32_t *span,
836 #endif
837 int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void *data, bool convertFromV1 )
838 {
839 rc_t rc;
840
841 /* search within in-core index */
842 if ( self -> count == 0 )
843 rc = RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
844 else
845 {
846 uint32_t nid;
847 PTNode pnode;
848
849 String key;
850 StringInitCString ( & key, str );
851
852 nid = PTrieFind ( self -> key2id, & key, & pnode, custom_cmp, data );
853 if ( nid == 0 )
854 rc = RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
855 else
856 {
857 size_t usize;
858
859 /* detect conversion from v1 */
860 if ( convertFromV1 && self -> id_bits == 0 )
861 {
862 /* v1 stored tree will have just a 32-bit spot id as data */
863 uint32_t id;
864 assert ( pnode . data . size == sizeof id );
865 memmove ( & id, pnode . data . addr, sizeof id );
866 * start_id = id;
867 rc = 0;
868 }
869 else
870 {
871 /* should be native v2 */
872 if ( self -> id_bits > 0 )
873 {
874 rc = Unpack ( self -> id_bits, sizeof * start_id * 8,
875 pnode . data . addr, 0, self -> id_bits, NULL,
876 start_id, sizeof * start_id, & usize );
877 }
878 else
879 {
880 rc = 0;
881 }
882 * start_id += self -> first;
883 }
884
885 if ( rc == 0 )
886 {
887 #if V2FIND_RETURNS_SPAN
888 if ( self -> ord2node != NULL )
889 {
890 uint32_t ord = KPTrieIndexID2Ord_v2 ( self, * start_id );
891 if ( ord == 0 )
892 rc = RC ( rcDB, rcIndex, rcSelecting, rcId, rcNotFound );
893 else if ( ord == self -> count )
894 * span = ( uint32_t ) ( self -> maxid - * start_id + 1 );
895 else switch ( self -> variant )
896 {
897 case 0:
898 for ( ; ord < self -> count; ++ ord )
899 {
900 if ( nid != self -> ord2node [ ord ] )
901 break;
902 }
903 * span = ( uint32_t ) ( self -> first + ord - * start_id );
904 break;
905 case 1:
906 * span = ( uint32_t ) ( self -> first + self -> id2ord . v8 [ ord ] - * start_id );
907 break;
908 case 2:
909 * span = ( uint32_t ) ( self -> first + self -> id2ord . v16 [ ord ] - * start_id );
910 break;
911 case 3:
912 * span = ( uint32_t ) ( self -> first + self -> id2ord . v32 [ ord ] - * start_id );
913 break;
914 case 4:
915 * span = ( uint32_t ) ( self -> first + self -> id2ord . v64 [ ord ] - * start_id );
916 break;
917 }
918 }
919 else if ( self -> span_bits == 0 )
920 * span = 1;
921 else
922 {
923 rc = Unpack ( self -> span_bits, sizeof * span * 8,
924 pnode . data . addr, 0, self -> id_bits, NULL,
925 span, sizeof * span, & usize );
926 }
927 #endif
928 }
929 }
930 }
931
932 return rc;
933 }
934
935
936 /*--------------------------------------------------------------------------
937 * KTrieIndex_v2
938 */
939
940
941 /* whack whack */
KTrieIndexWhack_v2(KTrieIndex_v2 * self)942 void KTrieIndexWhack_v2 ( KTrieIndex_v2 *self )
943 {
944 KPTrieIndexWhack_v2 ( & self -> pt );
945 }
946
947 /* initialize an index from file */
KTrieIndexOpen_v2(KTrieIndex_v2 * self,const KMMap * mm,bool byteswap)948 rc_t KTrieIndexOpen_v2 ( KTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
949 {
950 rc_t rc;
951 uint32_t version;
952 bool ptorig = false;
953 const KDBHdr *hdr = NULL;
954
955 #if ! KTRIE_ZEROS_KPTRIE
956 #error "KTrie is supposed to zero KPTrie"
957 #endif
958 memset ( self, 0, sizeof * self );
959
960 /* open the prior index in persisted mode, but
961 don't load it into the core-based Trie */
962 rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
963 if ( rc != 0 )
964 return rc;
965
966 self -> pt . byteswap = byteswap;
967 version = byteswap ? bswap_32 ( hdr -> version ) : hdr -> version;
968
969 switch ( version )
970 {
971 case 1:
972 rc = KPTrieIndexInitFromV1_v2 ( & self -> pt, mm, byteswap );
973 break;
974 case 2:
975 rc = KPTrieIndexInit_v2 ( & self -> pt, mm, byteswap );
976 break;
977 case 3:
978 ptorig = true;
979 case 4:
980 rc = KPTrieIndexInit_v3_v4 ( & self -> pt, mm, byteswap, ptorig );
981 break;
982 default:
983 rc = RC(rcDB, rcIndex, rcConstructing, rcIndex, rcBadVersion);
984 break;
985 }
986 if ( rc == 0 )
987 {
988 /* the file existed but was empty */
989 if ( self -> pt . key2id == NULL )
990 {
991 self -> pt . mm = NULL;
992 return 0;
993 }
994
995 /* retain a reference to memory map */
996 rc = KMMapAddRef ( mm );
997 if ( rc == 0 )
998 {
999 self -> pt . mm = mm;
1000 return 0;
1001 }
1002
1003 /* self -> pt gets whacked below */
1004 }
1005
1006 KTrieIndexWhack_v2 ( self );
1007 return rc;
1008 }
1009
1010
1011 /* map key to id range */
KTrieIndexFind_v2(const KTrieIndex_v2 * self,const char * str,int64_t * start_id,uint32_t * span,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data,bool convertFromV1)1012 rc_t KTrieIndexFind_v2 ( const KTrieIndex_v2 *self,
1013 const char *str, int64_t *start_id,
1014 #if V2FIND_RETURNS_SPAN
1015 uint32_t *span,
1016 #endif
1017 int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void *data, bool convertFromV1 )
1018 {
1019 /* search within persisted index */
1020 if ( self -> pt . key2id != NULL )
1021 {
1022 return KPTrieIndexFind_v2 ( & self -> pt, str, start_id,
1023 #if V2FIND_RETURNS_SPAN
1024 span,
1025 #endif
1026 custom_cmp, data, convertFromV1 );
1027 }
1028
1029 return RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
1030 }
1031
KTrieIndexProject_v2(const KTrieIndex_v2 * self,int64_t id,int64_t * start_id,uint32_t * span,char * key_buff,size_t buff_size,size_t * actsize)1032 rc_t KTrieIndexProject_v2 ( const KTrieIndex_v2 *self,
1033 int64_t id,
1034 #if V2FIND_RETURNS_SPAN
1035 int64_t *start_id, uint32_t *span,
1036 #endif
1037 char *key_buff, size_t buff_size, size_t *actsize )
1038 {
1039 if ( self -> pt . ord2node == NULL )
1040 return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
1041
1042 return KPTrieIndexProject_v2 ( & self -> pt, id,
1043 #if V2FIND_RETURNS_SPAN
1044 start_id, span,
1045 #endif
1046 key_buff, buff_size, actsize );
1047 }
1048