1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <kdb/extern.h>
28
29 #include "windex-priv.h"
30 #include "trieidx-priv.h"
31
32 #include <kdb/index.h>
33 #include <kfs/directory.h>
34 #include <kfs/file.h>
35 #include <kfs/md5.h>
36 #include <kfs/mmap.h>
37 #include <klib/ptrie.h>
38 #include <klib/text.h>
39 #include <klib/pack.h>
40 #include <klib/rc.h>
41 #include <os-native.h>
42 #include <sysalloc.h>
43
44 #include <byteswap.h>
45
46 #include <stdlib.h>
47 #include <limits.h>
48 #include <stdio.h>
49 #include <string.h>
50 #include <errno.h>
51 #include <assert.h>
52
53 #define KTRIE_ZEROS_KPTRIE 1
54
55 /*--------------------------------------------------------------------------
56 * KPTrieIndex_v2
57 * persisted keymap
58 */
59
60
61 /* Init
62 * opens and initializes persisted keymap structure
63 */
64 static
KPTrieIndexInitID2Ord(KPTrieIndex_v2 * self,size_t in_size,int variant,int span_bits,int elem_bits)65 rc_t KPTrieIndexInitID2Ord ( KPTrieIndex_v2 *self, size_t in_size,
66 int variant, int span_bits, int elem_bits )
67 {
68 rc_t rc;
69 union
70 {
71 uint8_t *v8;
72 uint16_t *v16;
73 uint32_t *v32;
74 uint64_t *v64;
75 } dst;
76 size_t elem_bytes = elem_bits >> 3;
77 uint32_t scount = self -> count - 1;
78
79 assert ( self -> count != 0 );
80
81 if ( span_bits * scount > in_size * 8 )
82 return RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
83
84 dst . v8 = malloc ( self -> count * elem_bytes );
85 if ( dst . v8 == NULL )
86 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
87 else
88 {
89 size_t usize;
90 rc = Unpack ( span_bits, elem_bits,
91 & self -> ord2node [ self -> count ], 0,
92 span_bits * scount, NULL, & dst . v8 [ elem_bytes ],
93 scount * elem_bytes, & usize );
94 if ( rc == 0 )
95 {
96 uint32_t i;
97
98 self -> id2ord . v8 = dst . v8;
99 self -> variant = variant;
100
101 /* integrate to simple translation */
102 switch ( variant )
103 {
104 case 1:
105 dst . v8 [ 0 ] = 0;
106 for ( i = 0; i < scount; ++ i )
107 dst . v8 [ i + 1 ] += dst . v8 [ i ];
108 break;
109 case 2:
110 dst . v16 [ 0 ] = 0;
111 for ( i = 0; i < scount; ++ i )
112 dst . v16 [ i + 1 ] += dst . v16 [ i ];
113 break;
114 case 3:
115 dst . v32 [ 0 ] = 0;
116 for ( i = 0; i < scount; ++ i )
117 dst . v32 [ i + 1 ] += dst . v32 [ i ];
118 break;
119 case 4:
120 dst . v64 [ 0 ] = 0;
121 for ( i = 0; i < scount; ++ i )
122 dst . v64 [ i + 1 ] += dst . v64 [ i ];
123 break;
124 }
125
126 return 0;
127 }
128
129 free ( dst . v8 );
130 }
131
132 return rc;
133 }
134
135 static
KPTrieIndexExtractV1Range_v2(PTNode * n,void * data)136 void CC KPTrieIndexExtractV1Range_v2 ( PTNode *n, void *data )
137 {
138 KPTrieIndex_v2 *self = data;
139
140 /* capture node id */
141 uint32_t id;
142 assert ( n -> data . size == sizeof id );
143 memmove ( & id, n -> data . addr, sizeof id );
144
145 /* perform min/max */
146 if ( self -> count == 0 )
147 self -> first = self -> last = id;
148 else if ( id < self -> first )
149 self -> first = id;
150 else if ( id > self -> last )
151 self -> last = id;
152
153 ++ self -> count;
154 }
155
156 static
KPTrieIndexInitFromV1_v2(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap)157 rc_t KPTrieIndexInitFromV1_v2 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
158 {
159 KPTrieIndex_v1 v1;
160 rc_t rc = KPTrieIndexInit_v1 ( & v1, mm, byteswap );
161 if ( rc == 0 )
162 {
163 uint32_t *ord2node;
164 uint32_t total_id, test_id;
165 uint32_t i, id, id_bits, num_ids;
166
167 /* hopefully we got a projection index */
168 if ( v1 . id2node == NULL )
169 {
170 #if ! KTRIE_ZEROS_KPTRIE
171 self -> count = 0;
172 #endif
173 /* need to derive first and last from trie */
174 PTrieForEach ( v1 . key2id, KPTrieIndexExtractV1Range_v2, self );
175 if ( self -> count == 0 )
176 KPTrieIndexWhack_v1 ( & v1 );
177 else
178 {
179 /* take trie as-is */
180 self -> key2id = v1 . key2id;
181 self -> maxid = self -> last;
182 }
183
184 /* note that this assumes a span of 1 for
185 each id. there are no known uses of v1 without
186 a projection index, so this is unlikely to be important */
187
188 return 0;
189 }
190
191 /* take id range */
192 self -> first = v1 . first;
193 self -> last = self -> maxid = v1 . last;
194
195 /* count comes from trie as always */
196 self -> count = PTrieCount ( v1 . key2id );
197
198 /* detect empty trie */
199 if ( self -> count == 0 || self -> first > self -> last )
200 {
201 self -> first = self -> last = self -> maxid = 0;
202 return 0;
203 }
204
205 /* take trie as-is */
206 self -> key2id = v1 . key2id;
207
208 /* now decide whether to use 1-1 or sparse projection */
209 if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
210 {
211 /* take the old projection array as-is,
212 treating NULL node ids as holes */
213 self -> ord2node = v1 . id2node;
214 return 0;
215 }
216
217 /* convert to sparse
218 calculate id bits - notice that
219 total_id gets right shifted so that
220 the loop is guaranteed to exit */
221 num_ids = ( uint32_t ) ( self -> last - self -> first + 1 );
222 for ( total_id = num_ids >> 1, id_bits = 1, test_id = 1;
223 test_id <= total_id;
224 ++ id_bits, test_id <<= 1 )
225 ( void ) 0;
226
227 /* determine variant */
228 if ( id_bits <= 8 )
229 {
230 /* allocate 4 bytes for new ord2node and 1 for id2ord */
231 uint8_t *id2ord = malloc ( self -> count * 5 );
232 if ( id2ord == NULL )
233 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
234 else
235 {
236 ord2node = ( uint32_t* ) & id2ord [ self -> count ];
237 self -> ord2node = ord2node;
238 self -> id2ord . v8 = id2ord;
239 self -> variant = 1;
240
241 /* walk across v1 table, looking at each id */
242 for ( i = id = 0; id < num_ids; ++ id )
243 {
244 /* detect non NULL node ids
245 and pretend they represent a contiguous
246 span with no holes in id space */
247 if ( v1 . id2node [ id ] != 0 )
248 {
249 /* prevent overwriting */
250 if ( i == self -> count )
251 {
252 rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
253 break;
254 }
255
256 /* record id and node for slot */
257 id2ord [ i ] = ( uint8_t ) id;
258 ord2node [ i ] = v1 . id2node [ id ];
259 ++ i;
260 }
261 }
262 }
263 }
264 else if ( id_bits <= 16 )
265 {
266 uint16_t *id2ord = malloc ( self -> count * 6 );
267 if ( id2ord == NULL )
268 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
269 else
270 {
271 ord2node = ( uint32_t* ) & id2ord [ self -> count ];
272 self -> ord2node = ord2node;
273 self -> id2ord . v16 = id2ord;
274 self -> variant = 2;
275
276 for ( i = id = 0; id < num_ids; ++ id )
277 {
278 if ( v1 . id2node [ id ] != 0 )
279 {
280 if ( i == self -> count )
281 {
282 rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
283 break;
284 }
285
286 id2ord [ i ] = ( uint16_t ) id;
287 ord2node [ i ] = v1 . id2node [ id ];
288 ++ i;
289 }
290 }
291 }
292 }
293 else
294 {
295 uint32_t *id2ord = malloc ( self -> count * 8 );
296 if ( id2ord == NULL )
297 rc = RC ( rcDB, rcIndex, rcConstructing, rcMemory, rcExhausted );
298 else
299 {
300 ord2node = & id2ord [ self -> count ];
301 self -> ord2node = ord2node;
302 self -> id2ord . v32 = id2ord;
303 self -> variant = 3;
304
305 for ( i = id = 0; id < num_ids; ++ id )
306 {
307 if ( v1 . id2node [ id ] != 0 )
308 {
309 if ( i == self -> count )
310 {
311 rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
312 break;
313 }
314
315 id2ord [ i ] = id;
316 ord2node [ i ] = v1 . id2node [ id ];
317 ++ i;
318 }
319 }
320 }
321 }
322
323 if ( rc == 0 )
324 {
325 if ( i == self -> count )
326 return 0;
327 rc = RC ( rcDB, rcIndex, rcConstructing, rcIndex, rcCorrupt );
328 }
329
330 KPTrieIndexWhack_v1 ( & v1 );
331 }
332
333 return rc;
334 }
335
KPTrieIndexInit_v2(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap)336 rc_t KPTrieIndexInit_v2 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
337 {
338 /* get size of map, assumed to be size of file */
339 size_t size;
340 rc_t rc = KMMapSize ( mm, & size );
341 if ( rc == 0 )
342 {
343 const KPTrieIndexHdr_v2 *hdr;
344
345 #if ! KTRIE_ZEROS_KPTRIE
346 self -> mm = NULL;
347 self -> ord2node = NULL;
348 self -> id2ord . v32 = NULL;
349 self -> variant = 0;
350 #endif
351
352 /* ignore empty file */
353 if ( size == 0 )
354 {
355 #if ! KTRIE_ZEROS_KPTRIE
356 self -> first = self -> last = self -> maxid = 0;
357 self -> key2id = NULL;
358 self -> count = 0;
359 #endif
360 return 0;
361 }
362
363 /* have to have at least the base header */
364 if ( size < sizeof hdr -> dad )
365 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
366
367 rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
368 if ( rc == 0 )
369 {
370 /* recheck header size */
371 if ( size < sizeof * hdr )
372 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
373
374 self -> first = hdr -> first;
375 self -> last = self -> maxid = hdr -> last;
376 self -> id_bits = ( uint8_t ) hdr -> id_bits;
377 self -> span_bits = ( uint8_t ) hdr -> span_bits;
378 self -> byteswap = byteswap;
379
380 /* try to create the pttree */
381 rc = PTrieMakeOrig ( & self -> key2id,
382 hdr + 1, size -= sizeof * hdr, byteswap );
383 if ( rc == 0 )
384 {
385 size_t ptsize = PTrieSize ( self -> key2id );
386 if ( ptsize <= size )
387 {
388 /* the count covers at least the number of trie nodes */
389 self -> count = PTrieCount ( self -> key2id );
390
391 /* it could be stored without projection */
392 if ( ptsize == size )
393 return 0;
394
395 /* calculate remaining bytes */
396 size -= ptsize;
397
398 /* there must be enough for an array of 4-byte node ids */
399 if ( size >= ( ( size_t ) self -> count << 2 ) )
400 {
401 /* take the persisted array as-is */
402 self -> ord2node = ( const uint32_t* )
403 ( ( const char* ) ( hdr + 1 ) + ptsize );
404
405 /* read the count */
406 if ( size >= 4 )
407 {
408 self -> count = * ( self -> ord2node ) ++;
409 size -= 4;
410 }
411
412 /* determine strategy from id span and count */
413 if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
414 {
415 /* must be contiguous */
416 self -> count = ( uint32_t ) ( self -> last - self -> first + 1 );
417
418 /* size should be exactly this number of slots */
419 if ( size == ( ( size_t ) self -> count << 2 ) )
420 return 0;
421
422 /* fall through to error return */
423 }
424 else if ( ( size == 4 && self -> count == 1 ) || size > ( ( size_t ) self -> count << 2 ) )
425 {
426 /* sparse */
427 size -= ( size_t ) self -> count << 2;
428
429 /* unpack id map */
430 if ( hdr -> id_bits <= 8 )
431 rc = KPTrieIndexInitID2Ord ( self, size, 1, hdr -> span_bits, 8 );
432 else if ( hdr -> id_bits <= 16 )
433 rc = KPTrieIndexInitID2Ord ( self, size, 2, hdr -> span_bits, 16 );
434 else if ( hdr -> id_bits <= 32 )
435 rc = KPTrieIndexInitID2Ord ( self, size, 3, hdr -> span_bits, 32 );
436 else
437 rc = KPTrieIndexInitID2Ord ( self, size, 4, hdr -> span_bits, 64 );
438
439 /* done */
440 if ( rc == 0 )
441 return 0;
442
443 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
444 return rc;
445 }
446 }
447 }
448
449 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
450 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
451 }
452 }
453 }
454
455 return rc;
456 }
457
KPTrieIndexInit_v3_v4(KPTrieIndex_v2 * self,const KMMap * mm,bool byteswap,bool ptorig)458 rc_t KPTrieIndexInit_v3_v4 ( KPTrieIndex_v2 *self, const KMMap *mm, bool byteswap, bool ptorig )
459 {
460 /* get size of map, assumed to be size of file */
461 size_t size;
462 rc_t rc = KMMapSize ( mm, & size );
463 if ( rc == 0 )
464 {
465 const KPTrieIndexHdr_v3 *hdr;
466
467 #if ! KTRIE_ZEROS_KPTRIE
468 self -> mm = NULL;
469 self -> ord2node = NULL;
470 self -> id2ord . v32 = NULL;
471 self -> variant = 0;
472 #endif
473
474 /* ignore empty file */
475 if ( size == 0 )
476 {
477 #if ! KTRIE_ZEROS_KPTRIE
478 self -> first = self -> last = self -> maxid = 0;
479 self -> key2id = NULL;
480 self -> count = 0;
481 #endif
482 return 0;
483 }
484
485 /* have to have at least the base header */
486 if ( size < sizeof hdr -> dad )
487 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
488
489 rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
490 if ( rc == 0 )
491 {
492 /* recheck header size */
493 if ( size < sizeof * hdr )
494 return RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
495
496 self -> first = hdr -> first;
497 self -> last = self -> maxid = hdr -> last;
498 self -> id_bits = ( uint8_t ) hdr -> id_bits;
499 self -> span_bits = ( uint8_t ) hdr -> span_bits;
500 self -> byteswap = byteswap;
501
502 /* try to create the pttree */
503 rc = ( ptorig ? PTrieMakeOrig : PTrieMake )
504 ( & self -> key2id, hdr + 1, size -= sizeof * hdr, byteswap );
505 if ( rc == 0 )
506 {
507 size_t ptsize = PTrieSize ( self -> key2id );
508 if ( ptsize <= size )
509 {
510 /* the count covers at least the number of trie nodes */
511 self -> count = PTrieCount ( self -> key2id );
512
513 /* it could be stored without projection */
514 if ( ptsize == size )
515 return 0;
516
517 /* calculate remaining bytes */
518 size -= ptsize;
519
520 /* there must be enough for an array of 4-byte node ids */
521 if ( size >= ( ( size_t ) self -> count << 2 ) )
522 {
523 /* take the persisted array as-is */
524 self -> ord2node = ( const uint32_t* )
525 ( ( const char* ) ( hdr + 1 ) + ptsize );
526
527 /* read the count */
528 if ( size >= 4 )
529 {
530 self -> count = * ( self -> ord2node ) ++;
531 size -= 4;
532 }
533
534 /* determine strategy from id span and count */
535 if ( ( self -> last - self -> first ) < ( ( int64_t ) self -> count << 1 ) )
536 {
537 /* must be contiguous */
538 self -> count = ( uint32_t ) ( self -> last - self -> first + 1 );
539
540 /* size should be exactly this number of slots */
541 if ( size == ( ( size_t ) self -> count << 2 ) )
542 return 0;
543
544 /* fall through to error return */
545 }
546 else if ( ( size == 4 && self -> count == 1 ) || size > ( ( size_t ) self -> count << 2 ) )
547 {
548 /* sparse */
549 size -= ( size_t ) self -> count << 2;
550
551 /* unpack id map */
552 if ( hdr -> id_bits <= 8 )
553 rc = KPTrieIndexInitID2Ord ( self, size, 1, hdr -> span_bits, 8 );
554 else if ( hdr -> id_bits <= 16 )
555 rc = KPTrieIndexInitID2Ord ( self, size, 2, hdr -> span_bits, 16 );
556 else if ( hdr -> id_bits <= 32 )
557 rc = KPTrieIndexInitID2Ord ( self, size, 3, hdr -> span_bits, 32 );
558 else
559 rc = KPTrieIndexInitID2Ord ( self, size, 4, hdr -> span_bits, 64 );
560
561 /* done */
562 if ( rc == 0 )
563 return 0;
564
565 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
566 return rc;
567 }
568 }
569 }
570
571 PTrieWhack ( self -> key2id ), self -> key2id = NULL;
572 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
573 }
574 }
575 }
576
577 return rc;
578 }
579
580 /* Whack
581 * closes down keymap
582 */
KPTrieIndexWhack_v2(KPTrieIndex_v2 * self)583 void KPTrieIndexWhack_v2 ( KPTrieIndex_v2 *self )
584 {
585 free ( ( void* ) self -> id2ord . v8 );
586 PTrieWhack ( self -> key2id );
587 KMMapRelease ( self -> mm );
588 memset ( self, 0, sizeof * self );
589 }
590
KPTrieIndexID2Ord_v2(const KPTrieIndex_v2 * self,int64_t id)591 uint32_t KPTrieIndexID2Ord_v2 ( const KPTrieIndex_v2 *self, int64_t id )
592 {
593 if ( id >= self -> first && id <= self -> maxid )
594 {
595 int64_t nid;
596 uint32_t left, right, ord, count = self -> count;
597
598 /* convert id either to a zero-based ord,
599 or else the translated id in id2ord */
600 id -= self -> first;
601
602 /* handle type of projection */
603 switch ( self -> variant )
604 {
605 case 0:
606 /* return one-based ord */
607 return ( uint32_t ) ( id + 1 );
608
609 case 1:
610 for ( left = 1, right = count; left <= right; )
611 {
612 ord = ( left + right ) >> 1;
613 nid = self -> id2ord . v8 [ ord - 1 ];
614 if ( id == nid )
615 return ord;
616
617 if ( id < nid )
618 {
619 right = ord - 1;
620 continue;
621 }
622 if ( ord == count )
623 return ord;
624
625 nid = self -> id2ord . v8 [ ord ];
626 if ( id < nid )
627 return ord;
628
629 left = ord + 1;
630 }
631 break;
632
633 case 2:
634 for ( left = 1, right = count; left <= right; )
635 {
636 ord = ( left + right ) >> 1;
637 nid = self -> id2ord . v16 [ ord - 1 ];
638 if ( id == nid )
639 return ord;
640
641 if ( id < nid )
642 {
643 right = ord - 1;
644 continue;
645 }
646 if ( ord == count )
647 return ord;
648
649 nid = self -> id2ord . v16 [ ord ];
650 if ( id < nid )
651 return ord;
652
653 left = ord + 1;
654 }
655 break;
656
657 case 3:
658 for ( left = 1, right = count; left <= right; )
659 {
660 ord = ( left + right ) >> 1;
661 nid = self -> id2ord . v32 [ ord - 1 ];
662 if ( id == nid )
663 return ord;
664
665 if ( id < nid )
666 {
667 right = ord - 1;
668 continue;
669 }
670 if ( ord == count )
671 return ord;
672
673 nid = self -> id2ord . v32 [ ord ];
674 if ( id < nid )
675 return ord;
676
677 left = ord + 1;
678 }
679 break;
680
681 case 4:
682 for ( left = 1, right = count; left <= right; )
683 {
684 ord = ( left + right ) >> 1;
685 nid = self -> id2ord . v64 [ ord - 1 ];
686 if ( id == nid )
687 return ord;
688
689 if ( id < nid )
690 {
691 right = ord - 1;
692 continue;
693 }
694 if ( ord == count )
695 return ord;
696
697 nid = self -> id2ord . v64 [ ord ];
698 if ( id < nid )
699 return ord;
700
701 left = ord + 1;
702 }
703 break;
704 }
705 }
706 return 0;
707 }
708
709 static
KPTrieIndexProject_v2(const KPTrieIndex_v2 * self,int64_t id,int64_t * start_id,uint32_t * span,char * key_buff,size_t buff_size,size_t * actsize)710 rc_t KPTrieIndexProject_v2 ( const KPTrieIndex_v2 *self,
711 int64_t id,
712 #if V2FIND_RETURNS_SPAN
713 int64_t *start_id, uint32_t *span,
714 #endif
715 char *key_buff, size_t buff_size, size_t *actsize )
716 {
717 uint32_t nid, ord = KPTrieIndexID2Ord_v2 ( self, id );
718 if ( ord != 0 )
719 {
720 assert ( start_id != NULL );
721 assert ( span != NULL );
722
723 nid = self -> ord2node [ ord - 1 ];
724
725 switch ( self -> variant )
726 {
727 case 0:
728 * start_id = id;
729 for ( ; ord < self -> count; ++ ord )
730 {
731 if ( nid != self -> ord2node [ ord ] )
732 break;
733 }
734 * span = self -> first + ord - * start_id;
735 break;
736 case 1:
737 * start_id = self -> id2ord . v8 [ ord - 1 ];
738 * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
739 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v8 [ ord ] ) - * start_id );
740 *start_id += self->first;
741 break;
742 case 2:
743 * start_id = self -> id2ord . v16 [ ord - 1 ];
744 * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
745 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v16 [ ord ] ) - * start_id );
746 *start_id += self->first;
747 break;
748 case 3:
749 * start_id = self -> id2ord . v32 [ ord - 1 ];
750 * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
751 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v32 [ ord ] ) - * start_id );
752 *start_id += self->first;
753 break;
754 case 4:
755 * start_id = self -> id2ord . v64 [ ord - 1 ];
756 * span = ( uint32_t ) ( ( ( ord == self -> count ) ?
757 ( self -> maxid - self -> first + 1 ) : self -> id2ord . v64 [ ord ] ) - * start_id );
758 *start_id += self->first;
759 break;
760 }
761
762 if ( nid != 0 )
763 {
764 rc_t rc;
765 PTNode node;
766
767 if ( self -> byteswap )
768 nid = bswap_32 ( nid );
769
770 rc = PTrieGetNode ( self -> key2id, & node, nid );
771 if ( rc == 0 )
772 {
773 const String *key;
774 rc = PTNodeMakeKey ( & node, & key );
775 if ( rc == 0 )
776 {
777 if (actsize)
778 *actsize = key -> size;
779 if ( key -> size >= buff_size )
780 rc = RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
781 else
782 string_copy ( key_buff, buff_size, key -> addr, key -> size );
783
784 StringWhack ( ( String* ) key );
785 }
786 }
787 return rc;
788 }
789 }
790
791 return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
792 }
793
794 /* Find
795 */
796 static
KPTrieIndexFind_v2(const KPTrieIndex_v2 * self,const char * str,int64_t * start_id,uint32_t * span,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data,bool convertFromV1)797 rc_t KPTrieIndexFind_v2 ( const KPTrieIndex_v2 *self,
798 const char *str, int64_t *start_id,
799 #if V2FIND_RETURNS_SPAN
800 uint32_t *span,
801 #endif
802 int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void *data, bool convertFromV1 )
803 {
804 rc_t rc;
805
806 /* detect empty index */
807 if ( self -> count == 0 )
808 rc = RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
809 else
810 {
811 uint32_t nid;
812 PTNode pnode;
813
814 String key;
815 StringInitCString ( & key, str );
816
817 /* try to find string */
818 nid = PTrieFind ( self -> key2id, & key, & pnode, custom_cmp, data );
819 if ( nid == 0 )
820 rc = RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
821 else
822 {
823 size_t usize;
824
825 /* detect conversion from v1 */
826 if ( convertFromV1 && self -> id_bits == 0 )
827 {
828 /* v1 stored tree will have just a 32-bit spot id as data */
829 uint32_t id;
830 assert ( pnode . data . size == sizeof id );
831 memmove ( & id, pnode . data . addr, sizeof id );
832 * start_id = id;
833 rc = 0;
834 }
835 else
836 {
837 /* should be native v2 */
838 rc = Unpack ( self -> id_bits, sizeof * start_id * 8,
839 pnode . data . addr, 0, self -> id_bits, NULL,
840 start_id, sizeof * start_id, & usize );
841 * start_id += self -> first;
842 }
843
844 if ( rc == 0 )
845 {
846 #if V2FIND_RETURNS_SPAN
847 if ( self -> ord2node != NULL )
848 {
849 uint32_t ord = KPTrieIndexID2Ord_v2 ( self, * start_id );
850 if ( ord == 0 )
851 rc = RC ( rcDB, rcIndex, rcSelecting, rcId, rcNotFound );
852 else if ( ord == self -> count )
853 * span = self -> maxid - * start_id + 1;
854 else switch ( self -> variant )
855 {
856 case 0:
857 for ( ; ord < self -> count; ++ ord )
858 {
859 if ( nid != self -> ord2node [ ord ] )
860 break;
861 }
862 * span = self -> first + ord - * start_id;
863 break;
864 case 1:
865 * span = self -> first + self -> id2ord . v8 [ ord ] - * start_id;
866 break;
867 case 2:
868 * span = self -> first + self -> id2ord . v16 [ ord ] - * start_id;
869 break;
870 case 3:
871 * span = self -> first + self -> id2ord . v32 [ ord ] - * start_id;
872 break;
873 case 4:
874 * span = self -> first + self -> id2ord . v64 [ ord ] - * start_id;
875 break;
876 }
877 }
878 else if ( self -> span_bits == 0 )
879 * span = 1;
880 else
881 {
882 rc = Unpack ( self -> span_bits, sizeof * span * 8,
883 pnode . data . addr, 0, self -> id_bits, NULL,
884 span, sizeof * span, & usize );
885 }
886 #endif
887 }
888 }
889 }
890
891 return rc;
892 }
893
894
895 /*--------------------------------------------------------------------------
896 * KTrieIdxNode_v2
897 */
898
899 static
KTrieIdxNodeMake_v2_s1(KTrieIdxNode_v2_s1 ** n,const String * key,int64_t id)900 rc_t KTrieIdxNodeMake_v2_s1 ( KTrieIdxNode_v2_s1 **n, const String *key, int64_t id )
901 {
902 rc_t rc = TNodeMake ( ( TNode** ) n, sizeof ** n + key -> size );
903 if ( rc != 0 )
904 rc = ResetRCContext ( rc, rcDB, rcIndex, rcInserting );
905 else
906 {
907 KTrieIdxNode_v2_s1 *node = * n;
908 string_copy ( node -> key, key -> size + 1, key -> addr, key -> size);
909 StringInit ( & node -> n . key, node -> key, key -> size, key -> len );
910 node -> start_id = id;
911 }
912
913 return rc;
914 }
915
916 static
KTrieIdxNodeMakeHole_v2_s1(KTrieIdxNode_v2_s1 ** n,int64_t id)917 rc_t KTrieIdxNodeMakeHole_v2_s1 ( KTrieIdxNode_v2_s1 **n, int64_t id )
918 {
919 rc_t rc = TNodeMake ( ( TNode** ) n, sizeof ** n );
920 if ( rc != 0 )
921 rc = ResetRCContext ( rc, rcDB, rcIndex, rcInserting );
922 else
923 {
924 KTrieIdxNode_v2_s1 *node = * n;
925 node -> key [ 0 ] = 0;
926 StringInit ( & node -> n . key, node -> key, 0, 0 );
927 node -> start_id = id;
928 }
929
930 return rc;
931 }
932
933 static
KTrieIdxNodeMake_v2_s2(KTrieIdxNode_v2_s2 ** n,const String * key,int64_t id)934 rc_t KTrieIdxNodeMake_v2_s2 ( KTrieIdxNode_v2_s2 **n, const String *key, int64_t id )
935 {
936 rc_t rc = TNodeMake ( ( TNode** ) n, sizeof ** n + key -> size );
937 if ( rc != 0 )
938 rc = ResetRCContext ( rc, rcDB, rcIndex, rcInserting );
939 else
940 {
941 KTrieIdxNode_v2_s2 *node = * n;
942 string_copy ( node -> key, key -> size + 1, key -> addr, key -> size);
943 StringInit ( & node -> n . key, node -> key, key -> size, key -> len );
944 node -> start_id = id;
945 node -> span = 1;
946 }
947 return rc;
948 }
949
950 static
KTrieIdxNodeWhack_v2(TNode * n,void * data)951 void CC KTrieIdxNodeWhack_v2 ( TNode *n, void *data )
952 {
953 TNodeWhack ( n );
954 }
955
956 #if 0
957 static
958 void CC KTrieIdxNodeUnlink_v2 ( TNode *n, void *data )
959 {
960 if ( TrieUnlink ( data, n ) )
961 TNodeWhack ( n );
962 }
963 #endif
964
965
966 /*--------------------------------------------------------------------------
967 * KTrieIndex_v2
968 */
969
970 static
KTrieIndexID2Ord_v2(const KTrieIndex_v2 * self,int64_t id)971 uint32_t KTrieIndexID2Ord_v2 ( const KTrieIndex_v2 *self, int64_t id )
972 {
973 if ( id >= self -> first && id <= self -> last )
974 {
975 uint32_t left, right, count = self -> count;
976 for ( left = 1, right = count; left <= right; )
977 {
978 uint32_t ord = ( left + right ) >> 1;
979 const KTrieIdxNode_v2_s1 *node = self -> ord2node [ ord - 1 ];
980 if ( id == node -> start_id )
981 return ord;
982
983 if ( id < node -> start_id )
984 {
985 right = ord - 1;
986 continue;
987 }
988
989 if ( ord == count )
990 return ord;
991
992 node = self -> ord2node [ ord ];
993 if ( id < node -> start_id )
994 return ord;
995
996 left = ord + 1;
997 }
998 }
999 return 0;
1000 }
1001
1002 static
KTrieIndexNode2Ord_v2(const KTrieIndex_v2 * self,const KTrieIdxNode_v2_s1 * node)1003 uint32_t KTrieIndexNode2Ord_v2 ( const KTrieIndex_v2 *self, const KTrieIdxNode_v2_s1 *node )
1004 {
1005 if ( self -> ord2node != NULL )
1006 return KTrieIndexID2Ord_v2 ( self, node -> start_id );
1007 return 0;
1008 }
1009
1010 /* KTrieIndexWrite_v2
1011 */
1012 typedef struct PersistTrieData PersistTrieData;
1013 struct PersistTrieData
1014 {
1015 uint64_t pos;
1016 KFile *f;
1017 KMD5File *fmd5;
1018 uint8_t *buffer;
1019 size_t bsize;
1020 size_t marker;
1021
1022 int64_t first;
1023 size_t ptt_size;
1024 size_t node_data_size;
1025 uint16_t id_bits;
1026 uint16_t span_bits;
1027 rc_t rc;
1028 };
1029
1030 static
KTrieIndexWrite_v2(void * param,const void * buffer,size_t size,size_t * num_writ)1031 rc_t CC KTrieIndexWrite_v2 ( void *param,
1032 const void *buffer, size_t size, size_t *num_writ )
1033 {
1034 PersistTrieData *pb = param;
1035 size_t total, to_write;
1036
1037 for ( total = 0; total < size; total += to_write )
1038 {
1039 to_write = size - total;
1040 if ( pb -> marker + to_write > pb -> bsize )
1041 to_write = pb -> bsize - pb -> marker;
1042
1043 if ( to_write > 0 )
1044 {
1045 memmove ( pb -> buffer + pb -> marker,
1046 ( const uint8_t* ) buffer + total, to_write );
1047 pb -> marker += to_write;
1048 }
1049
1050 if ( pb -> marker == pb -> bsize )
1051 {
1052 size_t num_flushed;
1053 pb -> rc = KFileWrite ( pb -> f, pb -> pos,
1054 pb -> buffer, pb -> bsize, & num_flushed );
1055 if ( pb -> rc != 0 )
1056 {
1057 * num_writ = 0;
1058 return pb -> rc;
1059 }
1060
1061 if ( num_flushed == 0 )
1062 {
1063 * num_writ = total + to_write;
1064 return pb -> rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
1065 }
1066
1067 pb -> marker -= num_flushed;
1068 pb -> pos += num_flushed;
1069
1070 if ( pb -> marker != 0 )
1071 memmove ( pb -> buffer, pb -> buffer + num_flushed, pb -> marker );
1072 }
1073 }
1074
1075 * num_writ = total;
1076 return 0;
1077 }
1078
1079 /* KTrieIndexAux_v2
1080 */
1081 static
KTrieIndexAux_v2_s1(void * param,const void * node,size_t * num_writ,PTWriteFunc write,void * write_param)1082 rc_t CC KTrieIndexAux_v2_s1 ( void *param, const void *node, size_t *num_writ,
1083 PTWriteFunc write, void *write_param )
1084 {
1085 PersistTrieData *pb = param;
1086
1087 if ( write != NULL && pb -> node_data_size != 0 )
1088 {
1089 char buffer [ 8 ];
1090 const KTrieIdxNode_v2_s1 *n = node;
1091
1092 /* pack from 64 possible bits down to total id span */
1093 if ( pb -> id_bits != 0 )
1094 {
1095 /* store name->id mapping as a simple translation
1096 from first, because we don't have easy access to
1097 neighboring nodes for storage as 1st derivative. */
1098 uint64_t idd = n -> start_id - pb -> first;
1099
1100 bitsz_t psize;
1101 rc_t rc = Pack ( 64, pb -> id_bits, & idd,
1102 sizeof idd, NULL, buffer, 0, sizeof buffer * 8, & psize );
1103 if ( rc != 0 )
1104 return rc;
1105
1106 /* the packing should produce a single unit */
1107 if ( psize != pb -> id_bits )
1108 return RC ( rcDB, rcIndex, rcPacking, rcData, rcCorrupt );
1109 }
1110
1111 /* write out the node */
1112 return ( * write ) ( write_param, buffer, pb -> node_data_size, num_writ );
1113 }
1114
1115 /* will always store an integral number of bytes */
1116 * num_writ = pb -> node_data_size;
1117 return 0;
1118 }
1119
1120 static
KTrieIndexAux_v2_s2(void * param,const void * node,size_t * num_writ,PTWriteFunc write,void * write_param)1121 rc_t CC KTrieIndexAux_v2_s2 ( void *param, const void *node, size_t *num_writ,
1122 PTWriteFunc write, void *write_param )
1123 {
1124 PersistTrieData *pb = param;
1125
1126 if ( write != NULL && pb -> node_data_size != 0 )
1127 {
1128 const KTrieIdxNode_v2_s2 *n = node;
1129
1130 rc_t rc;
1131 char buffer [ 12 ];
1132 bitsz_t psize, offset;
1133
1134 if ( pb -> id_bits == 0 )
1135 offset = 0;
1136 else
1137 {
1138 /* again store name->id mapping as a simple translation
1139 from first, but pack bits tightly */
1140 uint64_t idd = n -> start_id - pb -> first;
1141 rc = Pack ( 64, pb -> id_bits, & idd,
1142 sizeof idd, NULL, buffer, 0, sizeof buffer * 8, & offset );
1143 if ( rc != 0 )
1144 return rc;
1145 if ( offset != pb -> id_bits )
1146 return RC ( rcDB, rcIndex, rcPacking, rcData, rcCorrupt );
1147 }
1148
1149 /* now pack id span down to a minimal number of bits
1150 6/8/09 - this is known to fail because Pack hasn't been
1151 updated to start on a non-0 bit offset */
1152 if ( pb -> span_bits != 0 )
1153 {
1154 rc = Pack ( 32, pb -> span_bits, & n -> span, sizeof n -> span,
1155 NULL, buffer, offset, sizeof buffer * 8 - offset, & psize );
1156 if ( rc != 0 )
1157 return rc;
1158 if ( psize != pb -> span_bits )
1159 return RC ( rcDB, rcIndex, rcPacking, rcData, rcCorrupt );
1160 }
1161
1162 /* write out packed combination */
1163 return ( * write ) ( write_param, buffer, pb -> node_data_size, num_writ );
1164 }
1165
1166 * num_writ = pb -> node_data_size;
1167 return 0;
1168 }
1169
1170 /* KTrieIndexPersist_v*
1171 * write keymap to indicated location
1172 */
1173 #if KDBINDEXVERS == 2
1174
1175 static
KTrieIndexPersistHdr_v2(KTrieIndex_v2 * self,PersistTrieData * pb)1176 void KTrieIndexPersistHdr_v2 ( KTrieIndex_v2 *self, PersistTrieData *pb )
1177 {
1178 KPTrieIndexHdr_v2 *hdr;
1179
1180 uint64_t total_id, test_id;
1181 uint32_t total_span, test_span;
1182
1183 pb -> pos = 0;
1184
1185 hdr = ( KPTrieIndexHdr_v2* ) pb -> buffer;
1186 pb -> marker = sizeof * hdr;
1187
1188 /* stamp version header */
1189 KDBHdrInit(&hdr->dad, 2);
1190
1191 /* store first and last ids */
1192 pb -> first = self -> first;
1193 hdr -> first = self -> first;
1194 hdr -> last = self -> last;
1195
1196 /* calculate id bits - notice that
1197 total_id gets right shifted so that
1198 the loop is guaranteed to exit */
1199 total_id = self -> last - self -> first;
1200 if ( total_id == 0 )
1201 pb -> id_bits = 0;
1202 else for ( total_id >>= 1, pb -> id_bits = 1, test_id = 1;
1203 test_id <= total_id;
1204 ++ pb -> id_bits, test_id <<= 1 )
1205 ( void ) 0;
1206
1207 /* if we have maintained a projection index,
1208 calculate max span now */
1209 if ( self -> ord2node != NULL )
1210 {
1211 uint32_t i, span, max_span;
1212 int64_t cur, prev = self -> first;
1213 for ( i = max_span = 1; i < self -> count; prev = cur, ++ i )
1214 {
1215 cur = self -> ord2node [ i ] -> start_id;
1216 span = ( uint32_t ) ( cur - prev );
1217 if ( span > max_span )
1218 max_span = span;
1219 }
1220
1221 span = ( uint32_t ) ( self -> last - prev );
1222 if ( span > max_span )
1223 max_span = span;
1224
1225 self -> max_span = max_span;
1226 }
1227
1228 /* calculate span bits */
1229 total_span = self -> max_span;
1230 if ( total_span == 0 )
1231 pb -> span_bits = 0;
1232 else for ( total_span >>= 1, pb -> span_bits = 1, test_span = 1;
1233 test_span <= total_span;
1234 ++ pb -> span_bits, test_span <<= 1 )
1235 ( void ) 0;
1236
1237 /* record these as header data */
1238 hdr -> id_bits = pb -> id_bits;
1239 hdr -> span_bits = pb -> span_bits;
1240
1241 /* zero trailing junk */
1242 hdr -> align [ 0 ] = hdr -> align [ 1 ] = 0;
1243 }
1244
1245 #else
1246
1247 static
KTrieIndexPersistHdr_v3_v4(KTrieIndex_v2 * self,PersistTrieData * pb)1248 void KTrieIndexPersistHdr_v3_v4 ( KTrieIndex_v2 *self, PersistTrieData *pb )
1249 {
1250 KPTrieIndexHdr_v3 *hdr;
1251
1252 uint64_t total_id, test_id;
1253 uint32_t total_span, test_span;
1254
1255 pb -> pos = 0;
1256
1257 hdr = ( KPTrieIndexHdr_v3* ) pb -> buffer;
1258 pb -> marker = sizeof * hdr;
1259
1260 /* stamp version header */
1261 KDBHdrInit(&hdr->dad.h, KDBINDEXVERS);
1262 hdr->dad.index_type = kitText;
1263
1264 /* store first and last ids */
1265 pb -> first = self -> first;
1266 hdr -> first = self -> first;
1267 hdr -> last = self -> last;
1268
1269 /* calculate id bits - notice that
1270 total_id gets right shifted so that
1271 the loop is guaranteed to exit */
1272 total_id = self -> last - self -> first;
1273 if ( total_id == 0 )
1274 pb -> id_bits = 0;
1275 else for ( total_id >>= 1, pb -> id_bits = 1, test_id = 1;
1276 test_id <= total_id;
1277 ++ pb -> id_bits, test_id <<= 1 )
1278 ( void ) 0;
1279
1280 /* if we have maintained a projection index,
1281 calculate max span now */
1282 if ( self -> ord2node != NULL )
1283 {
1284 uint32_t i, span, max_span;
1285 int64_t cur, prev = self -> first;
1286 for ( i = max_span = 1; i < self -> count; prev = cur, ++ i )
1287 {
1288 cur = self -> ord2node [ i ] -> start_id;
1289 span = ( uint32_t ) ( cur - prev );
1290 if ( span > max_span )
1291 max_span = span;
1292 }
1293
1294 span = ( uint32_t ) ( self -> last - prev );
1295 if ( span > max_span )
1296 max_span = span;
1297
1298 self -> max_span = max_span;
1299 }
1300
1301 /* calculate span bits */
1302 total_span = self -> max_span;
1303 if ( total_span == 0 )
1304 pb -> span_bits = 0;
1305 else for ( total_span >>= 1, pb -> span_bits = 1, test_span = 1;
1306 test_span <= total_span;
1307 ++ pb -> span_bits, test_span <<= 1 )
1308 ( void ) 0;
1309
1310 /* record these as header data */
1311 hdr -> id_bits = pb -> id_bits;
1312 hdr -> span_bits = pb -> span_bits;
1313
1314 /* zero trailing junk */
1315 hdr -> align [ 0 ] = hdr -> align [ 1 ] = 0;
1316 }
1317
1318 #endif
1319
1320 static
KTrieIndexPersistTrie_v2(const KTrieIndex_v2 * self,PersistTrieData * pb)1321 rc_t KTrieIndexPersistTrie_v2 ( const KTrieIndex_v2 *self, PersistTrieData *pb )
1322 {
1323 rc_t rc;
1324
1325 /* persist the trie to file,
1326 using tree-internal key storage,
1327 capture the image size in pb */
1328 if ( self -> ord2node != NULL )
1329 {
1330 pb -> node_data_size = ( pb -> id_bits + 7 ) >> 3;
1331 rc = TriePersist ( & self -> key2id, & pb -> ptt_size,
1332 false, KTrieIndexWrite_v2, pb, KTrieIndexAux_v2_s1, pb );
1333 }
1334 else
1335 {
1336 pb -> node_data_size = ( pb -> id_bits + pb -> span_bits + 7 ) >> 3;
1337 rc = TriePersist ( & self -> key2id, & pb -> ptt_size,
1338 false, KTrieIndexWrite_v2, pb, KTrieIndexAux_v2_s2, pb );
1339 }
1340
1341 if ( rc == 0 && pb -> marker != 0 )
1342 {
1343 size_t num_writ;
1344 rc = KFileWrite ( pb -> f, pb -> pos,
1345 pb -> buffer, pb -> marker, & num_writ );
1346 if ( rc == 0 && num_writ != pb -> marker )
1347 rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
1348 }
1349
1350 return rc;
1351 }
1352
1353
1354 static
KTrieIndexPersistProjContig_v2(const KTrieIndex_v2 * self,PersistTrieData * pb,PTrie * tt,uint32_t * ord2node)1355 rc_t KTrieIndexPersistProjContig_v2 ( const KTrieIndex_v2 *self,
1356 PersistTrieData *pb, PTrie *tt, uint32_t *ord2node )
1357 {
1358 uint32_t i, j, nid;
1359 int64_t id = self -> first;
1360 for ( i = j = nid = 0; i < self -> count; ++ id, ++ j, ++ i )
1361 {
1362 const KTrieIdxNode_v2_s1 *node = self -> ord2node [ i ];
1363
1364 /* back fill repeats */
1365 for ( ; id < node -> start_id; ++ id )
1366 ord2node [ j ++ ] = nid;
1367
1368 /* check for a hole in id space */
1369 if ( node -> n . key . size == 0 )
1370 nid = 0;
1371 else
1372 {
1373 PTNode pn;
1374 nid = PTrieFind ( tt, & node -> n . key, & pn, NULL, NULL );
1375 if ( nid == 0 )
1376 return RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
1377 }
1378
1379 /* record nid for i at j */
1380 ord2node [ j ] = nid;
1381 }
1382
1383 /* finish off trailing span */
1384 for ( ; id <= self -> last; ++ id )
1385 ord2node [ j ++ ] = nid;
1386
1387 return 0;
1388 }
1389
1390 static
KTrieIndexPersistProjSparse_v2(const KTrieIndex_v2 * self,PersistTrieData * pb,PTrie * tt,uint32_t * ord2node,bitsz_t * psize)1391 rc_t KTrieIndexPersistProjSparse_v2 ( const KTrieIndex_v2 *self,
1392 PersistTrieData *pb, PTrie *tt, uint32_t *ord2node, bitsz_t *psize )
1393 {
1394 uint32_t i, nid;
1395 int64_t *id2ord = ( void* ) & ord2node [ self -> count ];
1396 for ( i = 0; i < self -> count; ++ i )
1397 {
1398 const KTrieIdxNode_v2_s1 *node = self -> ord2node [ i ];
1399
1400 /* record negated id for i - see 1st derivative below */
1401 id2ord [ i ] = - node -> start_id;
1402
1403 /* check for a hole in id space */
1404 if ( node -> n . key . size == 0 )
1405 nid = 0;
1406 else
1407 {
1408 PTNode pn;
1409 nid = PTrieFind ( tt, & node -> n . key, & pn, NULL, NULL );
1410 if ( nid == 0 )
1411 return RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
1412 }
1413
1414 /* record nid for i */
1415 ord2node [ i ] = nid;
1416 }
1417
1418 /* produce first derivative of ids
1419 for any given pair, the 1st derivative is generally
1420 right - left, which is usually stored right, such that
1421 we start at the end and move left toward the start, i.e.
1422
1423 right -= left and move left
1424
1425 in this case, we want to eliminate the leading 0
1426 and shift everything down, so we produce the result
1427 to the left side and move right toward end, but this
1428 requires more complicated arithmetic in order to preserve
1429 right - left, i.e.
1430
1431 left = right - left and move right
1432
1433 to avoid this arithmetic, the ids were stored negated above
1434 which converts the operation into
1435
1436 left -= right and move right
1437 */
1438 for ( i = 1; i < self -> count; ++ i )
1439 id2ord [ i - 1 ] -= id2ord [ i ];
1440
1441 /* pack from 64 to span-bits */
1442 return Pack ( 64, pb -> span_bits, id2ord, ( size_t ) ( self -> count - 1 ) << 3,
1443 NULL, id2ord, 0, ( bitsz_t ) self -> count << 6, psize );
1444 }
1445
1446 #if KDBINDEXVERS == 2
1447
1448 static
KTrieIndexPersistProj_v2(const KTrieIndex_v2 * self,PersistTrieData * pb)1449 rc_t KTrieIndexPersistProj_v2 ( const KTrieIndex_v2 *self, PersistTrieData *pb )
1450 {
1451 rc_t rc = 0;
1452 void * addr;
1453 size_t map_size;
1454 uint64_t file_size;
1455 size_t num_to_read;
1456 uint64_t num_ids;
1457 bool is_sparse;
1458
1459 /* there must be something to do */
1460 if ( self -> count == 0 || self -> ord2node == NULL )
1461 return 0;
1462
1463 /* calculate what kind of projection strategy to use:
1464 when avg ( id span ) <= 2.0, just use a straight array.
1465 otherwise, use two arrays: first for node ids and last
1466 being 1st derivative of positional start_ids.
1467
1468 the calculation of the ratio would be
1469 num_ids = self -> last - self -> first + 1;
1470 ratio = num_ids / self -> count;
1471 if ( ratio <= 2.0 )
1472 use 1-1
1473 else
1474 use sparse
1475
1476 by reorganizing the comparison, we get
1477 if ( num_ids <= 2 * self -> count )...
1478 */
1479 num_ids = self -> last - self -> first + 1;
1480 if ( num_ids <= ( ( uint64_t ) self -> count << 1 ) )
1481 {
1482 /* store 1-1 projection index */
1483 is_sparse = false;
1484
1485 /* map size is 4 bytes per id */
1486 map_size = pb -> ptt_size + ( ( size_t ) num_ids << 2 );
1487 }
1488 else
1489 {
1490 /* store sparse projection index */
1491 is_sparse = true;
1492
1493 /* map size for node ids is 4 bytes per slot */
1494 map_size = pb -> ptt_size + ( ( size_t ) self -> count << 2 );
1495
1496 /* map size for 1st derivative ids is initially 8 bytes per slot
1497 used initially to store full ids and then reduced 1st deriv. */
1498 map_size += ( size_t ) self -> count << 3;
1499 }
1500
1501 /* add in 4 bytes for count */
1502 map_size += 4;
1503
1504 /* create an updatable region spanning from end of header,
1505 starting from PTrie and extending to end of projection index */
1506 addr = malloc ( map_size );
1507 if ( addr == NULL )
1508 rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
1509 else
1510 {
1511 size_t num_read;
1512
1513 rc = KFileSize ( pb -> f, & file_size );
1514 num_to_read = file_size - sizeof ( KPTrieIndexHdr_v2 );
1515 if ( rc == 0 )
1516 {
1517 rc = KFileReadAll ( pb -> f, sizeof ( KPTrieIndexHdr_v2 ), addr,
1518 num_to_read, & num_read );
1519 if ( rc == 0 )
1520 {
1521 if ( num_read != num_to_read )
1522 {
1523 rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
1524 }
1525 }
1526 }
1527 if ( rc != 0 )
1528 free ( addr );
1529 }
1530
1531 if ( rc == 0 )
1532 {
1533 size_t num_writ;
1534 /* inflate the PTrie */
1535 PTrie *tt;
1536 rc = PTrieMakeOrig ( & tt, addr, pb -> ptt_size );
1537 if ( rc == 0 )
1538 {
1539 uint32_t *ord2node;
1540 assert ( pb -> ptt_size == PTrieSize ( tt ) );
1541 assert ( self -> count >= PTrieCount ( tt ) );
1542 ord2node = ( void* ) ( ( char* ) addr + pb -> ptt_size );
1543 assert ( ( ( size_t ) ord2node & 3 ) == 0 );
1544
1545 /* set count */
1546 * ord2node ++ = self -> count;
1547
1548 if ( ! is_sparse )
1549 rc = KTrieIndexPersistProjContig_v2 ( self, pb, tt, ord2node );
1550 else
1551 {
1552 bitsz_t psize;
1553 rc = KTrieIndexPersistProjSparse_v2 ( self, pb, tt, ord2node, & psize );
1554 if ( rc == 0 )
1555 {
1556 map_size -= ( size_t ) self -> count << 3;
1557 map_size += ( psize + 7 ) >> 3;
1558 }
1559 }
1560
1561 /* done with pttree */
1562 PTrieWhack ( tt );
1563 }
1564 rc = KFileWrite ( pb -> f, file_size,
1565 (uint8_t*)addr + num_to_read, map_size - num_to_read, & num_writ );
1566 if ( rc == 0 && num_writ != map_size - num_to_read )
1567 rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
1568 free ( addr );
1569 }
1570
1571 return rc;
1572 }
1573
1574 #else
1575
1576 static
KTrieIndexPersistProj_v3(const KTrieIndex_v2 * self,PersistTrieData * pb)1577 rc_t KTrieIndexPersistProj_v3 ( const KTrieIndex_v2 *self, PersistTrieData *pb )
1578 {
1579 rc_t rc = 0;
1580 void * addr;
1581 size_t map_size;
1582 uint64_t file_size;
1583 size_t num_to_read;
1584 uint64_t num_ids;
1585 bool is_sparse;
1586
1587 /* there must be something to do */
1588 if ( self -> count == 0 || self -> ord2node == NULL )
1589 return 0;
1590
1591 /* calculate what kind of projection strategy to use:
1592 when avg ( id span ) <= 2.0, just use a straight array.
1593 otherwise, use two arrays: first for node ids and last
1594 being 1st derivative of positional start_ids.
1595
1596 the calculation of the ratio would be
1597 num_ids = self -> last - self -> first + 1;
1598 ratio = num_ids / self -> count;
1599 if ( ratio <= 2.0 )
1600 use 1-1
1601 else
1602 use sparse
1603
1604 by reorganizing the comparison, we get
1605 if ( num_ids <= 2 * self -> count )...
1606 */
1607 num_ids = self -> last - self -> first + 1;
1608 if ( num_ids <= ( ( uint64_t ) self -> count << 1 ) )
1609 {
1610 /* store 1-1 projection index */
1611 is_sparse = false;
1612
1613 /* map size is 4 bytes per id */
1614 map_size = pb -> ptt_size + ( ( size_t ) num_ids << 2 );
1615 }
1616 else
1617 {
1618 /* store sparse projection index */
1619 is_sparse = true;
1620
1621 /* map size for node ids is 4 bytes per slot */
1622 map_size = pb -> ptt_size + ( ( size_t ) self -> count << 2 );
1623
1624 /* map size for 1st derivative ids is initially 8 bytes per slot
1625 used initially to store full ids and then reduced 1st deriv. */
1626 map_size += ( size_t ) self -> count << 3;
1627 }
1628
1629 /* add in 4 bytes for count */
1630 map_size += 4;
1631
1632 rc = KFileSize ( pb -> f, & file_size );
1633 if ( rc == 0 )
1634 {
1635 /* create an updatable region spanning from end of header,
1636 starting from PTrie and extending to end of projection index */
1637 addr = malloc ( map_size );
1638 if ( addr == NULL )
1639 rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
1640 else
1641 {
1642 size_t num_read;
1643 num_to_read = file_size - sizeof ( KPTrieIndexHdr_v3 );
1644 rc = KFileReadAll ( pb -> f, sizeof ( KPTrieIndexHdr_v3 ),
1645 addr, num_to_read, & num_read );
1646 if ( rc != 0 )
1647 free ( addr );
1648 else if ( num_read != num_to_read )
1649 {
1650 free ( addr );
1651 rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
1652 }
1653 }
1654 }
1655
1656 if ( rc == 0 )
1657 {
1658 size_t num_writ;
1659 /* inflate the PTrie */
1660 PTrie *tt;
1661 #if KDBINDEXVERS > 3
1662 rc = PTrieMake ( & tt, addr, pb -> ptt_size, self -> pt . byteswap );
1663 #else
1664 rc = PTrieMakeOrig ( & tt, addr, pb -> ptt_size, self -> pt . byteswap );
1665 #endif
1666 if ( rc == 0 )
1667 {
1668 uint32_t *ord2node;
1669 assert ( pb -> ptt_size == PTrieSize ( tt ) );
1670 assert ( self -> count >= PTrieCount ( tt ) );
1671 ord2node = ( void* ) ( ( char* ) addr + pb -> ptt_size );
1672 assert ( ( ( size_t ) ord2node & 3 ) == 0 );
1673
1674 /* set count */
1675 * ord2node ++ = self -> count;
1676
1677 if ( ! is_sparse )
1678 rc = KTrieIndexPersistProjContig_v2 ( self, pb, tt, ord2node );
1679 else
1680 {
1681 bitsz_t psize;
1682 rc = KTrieIndexPersistProjSparse_v2 ( self, pb, tt, ord2node, & psize );
1683 if ( rc == 0 )
1684 {
1685 map_size -= ( size_t ) self -> count << 3;
1686 map_size += ( psize + 7 ) >> 3;
1687 }
1688 }
1689
1690 /* done with pttree */
1691 PTrieWhack ( tt );
1692
1693 if ( rc == 0 )
1694 {
1695 rc = KFileWrite ( pb -> f, file_size,
1696 ( uint8_t* ) addr + num_to_read, map_size - num_to_read, & num_writ );
1697
1698 if ( rc == 0 && num_writ != map_size - num_to_read )
1699 rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
1700 }
1701 }
1702
1703 free ( addr );
1704 }
1705
1706 return rc;
1707 }
1708
1709 #endif
1710
1711 static
KTrieIndexCreateMD5Wrapper(KDirectory * dir,KFile ** fp,KMD5File ** wrapper,char relpath[256],const char md5_relpath[260])1712 rc_t KTrieIndexCreateMD5Wrapper ( KDirectory *dir, KFile ** fp, KMD5File ** wrapper,
1713 char relpath [ 256 ], const char md5_relpath [ 260 ] )
1714 {
1715 /* create the md5 file for read/write */
1716 KFile *f;
1717 rc_t rc = KDirectoryCreateFile ( dir, & f, true,
1718 0664, kcmInit, "%s", md5_relpath );
1719 if ( rc == 0 )
1720 {
1721 /* create an md5sum formatter */
1722 KMD5SumFmt *fmt;
1723 rc = KMD5SumFmtMakeUpdate ( & fmt, f );
1724 if ( rc == 0 )
1725 {
1726 int dot_pos;
1727
1728 /* convert relative path to a leaf */
1729 char *leaf = strrchr ( relpath, '/' );
1730 if ( leaf ++ == NULL )
1731 leaf = relpath;
1732
1733 /* trim off ".tmp" from "leaf"
1734 so that the format string reflects final name
1735 without the need to rename later */
1736 dot_pos = strlen ( leaf ) - 4;
1737 assert ( dot_pos > 0 );
1738 assert ( strcmp ( & leaf [ dot_pos ], ".tmp" ) == 0 );
1739 leaf [ dot_pos ] = 0;
1740
1741 /* "fmt" now owns "f" */
1742 f = NULL;
1743
1744 /* create a file wrapper that calculates and prints md5 */
1745 rc = KMD5FileMakeWrite ( wrapper, * fp, fmt, leaf );
1746
1747 /* "wrapper" attaches a reference to "fmt", so we have to
1748 dump our reference regardless of "rc" */
1749 KMD5SumFmtRelease ( fmt );
1750
1751 /* restore dot */
1752 leaf [ dot_pos ] = '.';
1753
1754 /* if we succeeded, swap the "wrapper" for input file */
1755 if ( rc == 0 )
1756 {
1757 * fp = KMD5FileToKFile ( * wrapper );
1758 return 0;
1759 }
1760 }
1761
1762 /* failed */
1763 KFileRelease ( f );
1764 }
1765
1766 return rc;
1767 }
1768
KTrieIndexPersist_v2(const KTrieIndex_v2 * self,bool proj,KDirectory * dir,const char * path,bool use_md5)1769 rc_t KTrieIndexPersist_v2 ( const KTrieIndex_v2 *self,
1770 bool proj, KDirectory *dir, const char *path, bool use_md5 )
1771 {
1772 rc_t rc;
1773 PersistTrieData pb;
1774
1775 assert ( self != NULL );
1776 if ( self -> count == 0 )
1777 return 0;
1778
1779 pb . fmd5 = NULL;
1780
1781 /** Trie may have holes in serialization due to memory alignments ***/
1782 pb . buffer = calloc ( pb . bsize = 32 * 1024, 1 );
1783 if ( pb . buffer == NULL )
1784 rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
1785 else
1786 {
1787 /* determine the name of the file:
1788 it is created under a temporary name
1789 relative to the directory provided */
1790 char tmpname [ 256 ];
1791 rc = KDirectoryResolvePath ( dir, false,
1792 tmpname, sizeof tmpname, "%s.tmp", path );
1793 if ( rc == 0 )
1794 {
1795 /* create the name of temporary md5 file */
1796 char tmpmd5name [ 260 ];
1797 sprintf ( tmpmd5name, "%s.md5", tmpname );
1798
1799 /* create the output file under temporary name
1800 ? why does it need read/write capabilities? */
1801 rc = KDirectoryCreateFile ( dir, & pb . f,
1802 true, 0664, kcmInit, "%s", tmpname );
1803 if ( rc == 0 )
1804 {
1805 /* if using md5, wrap output file */
1806 if ( use_md5 )
1807 rc = KTrieIndexCreateMD5Wrapper ( dir, & pb . f, & pb . fmd5, tmpname, tmpmd5name );
1808 if ( rc == 0 )
1809 {
1810 /* initial size */
1811 pb . ptt_size = 0;
1812 #if KDBINDEXVERS == 2
1813 KTrieIndexPersistHdr_v2 ( ( KTrieIndex_v2* ) self, & pb );
1814 #else
1815 KTrieIndexPersistHdr_v3_v4 ( ( KTrieIndex_v2* ) self, & pb );
1816 #endif
1817
1818 /* persist tree */
1819 rc = KTrieIndexPersistTrie_v2 ( self, & pb );
1820 if ( rc == 0 )
1821 {
1822 /* persist projection table */
1823 if ( proj )
1824 {
1825 #if KDBINDEXVERS == 2
1826 rc = KTrieIndexPersistProj_v2 ( self, & pb );
1827 #else
1828 rc = KTrieIndexPersistProj_v3 ( self, & pb );
1829 #endif
1830 }
1831 }
1832 }
1833
1834 /* close down the file now, success or not */
1835 KFileRelease ( pb . f );
1836 pb . f = NULL;
1837
1838 /* douse buffer and mark NULL in case of later attempt */
1839 free ( pb . buffer );
1840 pb . buffer = NULL;
1841
1842 /* rename the files on success */
1843 if ( rc == 0 )
1844 {
1845 /* works even if "path" is absolute */
1846 rc = KDirectoryRename ( dir, false, tmpname, path );
1847 if ( rc == 0 )
1848 {
1849 int tmplen;
1850
1851 /* done if this was the only file to rename */
1852 if ( ! use_md5 )
1853 return 0;
1854
1855 /* use "tmpname" as a real "md5" name */
1856 tmplen = strlen ( tmpname );
1857 assert ( strcmp ( & tmpname [ tmplen - 4 ], ".tmp" ) == 0 );
1858 strcpy ( & tmpname [ tmplen - 3 ], "md5" );
1859
1860 /* rename md5 file and be done on success */
1861 rc = KDirectoryRename ( dir, false, tmpmd5name, tmpname );
1862 if ( rc == 0 )
1863 return 0;
1864
1865 /* failure here means we have a good index file,
1866 but a bad md5 file, so convert "tmpname" to the
1867 actual name of the index file */
1868 tmpname [ tmplen - 4 ] = 0;
1869 }
1870 }
1871
1872 /* failed, remove the output files here */
1873 KDirectoryRemove ( dir, false, "%s", tmpname );
1874 if ( use_md5 )
1875 KDirectoryRemove ( dir, false, "%s", tmpmd5name );
1876 }
1877 }
1878
1879 /* douse buffer */
1880 free ( pb . buffer );
1881 }
1882
1883 return rc;
1884 }
1885
1886
1887 /* whack whack */
KTrieIndexWhack_v2(KTrieIndex_v2 * self)1888 void KTrieIndexWhack_v2 ( KTrieIndex_v2 *self )
1889 {
1890 KPTrieIndexWhack_v2 ( & self -> pt );
1891 TrieWhack ( & self -> key2id, KTrieIdxNodeWhack_v2, NULL );
1892 free ( self -> ord2node );
1893 }
1894
1895 /* initialize an index from file - can be NULL */
KTrieIndexOpen_v2(KTrieIndex_v2 * self,const KMMap * mm,bool byteswap)1896 rc_t KTrieIndexOpen_v2 ( KTrieIndex_v2 *self, const KMMap *mm, bool byteswap )
1897 {
1898 rc_t rc;
1899 bool ptorig = false;
1900 const KDBHdr *hdr = NULL;
1901
1902 #if ! KTRIE_ZEROS_KPTRIE
1903 #error "KTrie is supposed to zero KPTrie"
1904 #endif
1905 memset ( self, 0, sizeof * self );
1906 self -> pt . byteswap = byteswap;
1907
1908 /* create an empty Trie index,
1909 with numeric but auto-expand character set,
1910 and a bucket size of 512, beyond which the
1911 tree will branch.
1912 */
1913 rc = TrieInit ( & self -> key2id, "0-9", 512, true );
1914 if ( rc != 0 )
1915 return rc;
1916
1917 /* when opened for create, there will be no existing index */
1918 if ( mm == NULL )
1919 return 0;
1920
1921 rc = KMMapAddrRead ( mm, (const void**) & hdr );
1922 if ( rc != 0 )
1923 return rc;
1924
1925 switch ( hdr -> version )
1926 {
1927 case 1:
1928 rc = KPTrieIndexInitFromV1_v2 ( & self -> pt, mm, byteswap );
1929 break;
1930 case 2:
1931 rc = KPTrieIndexInit_v2 ( & self -> pt, mm, byteswap );
1932 break;
1933 case 3:
1934 ptorig = true;
1935 case 4:
1936 rc = KPTrieIndexInit_v3_v4 ( & self -> pt, mm, byteswap, ptorig );
1937 break;
1938 default:
1939 rc = RC(rcDB, rcIndex, rcConstructing, rcIndex, rcBadVersion);
1940 }
1941 /* open the prior index in persisted mode, but
1942 don't load it into the core-based Trie */
1943 if ( rc == 0 )
1944 {
1945 /* the file existed but was empty */
1946 if ( self -> pt . key2id == NULL )
1947 {
1948 self -> pt . mm = NULL;
1949 return 0;
1950 }
1951
1952 /* retain a reference to memory map */
1953 rc = KMMapAddRef ( mm );
1954 if ( rc == 0 )
1955 {
1956 self -> pt . mm = mm;
1957 return 0;
1958 }
1959
1960 /* self -> pt gets whacked below */
1961 }
1962
1963 KTrieIndexWhack_v2 ( self );
1964 return rc;
1965 }
1966
1967 /* KTrieIndexPopulate_v2
1968 */
1969 typedef struct KTrieIndexPopulateData_v2_s2 KTrieIndexPopulateData_v2_s2;
1970 struct KTrieIndexPopulateData_v2_s2
1971 {
1972 int64_t first;
1973
1974 KTrieIndex_v2 *self;
1975 uint32_t count;
1976 rc_t rc;
1977
1978 uint8_t id_bits;
1979 uint8_t span_bits;
1980 };
1981
1982 static
KTrieIndexPopulate_v2_s2(PTNode * n,void * data)1983 bool CC KTrieIndexPopulate_v2_s2 ( PTNode *n, void *data )
1984 {
1985 const String *key;
1986 KTrieIndexPopulateData_v2_s2 *pb = data;
1987
1988 int64_t id;
1989 size_t usize;
1990 uint32_t span;
1991
1992 /* capture node data */
1993 assert ( n -> data . size == sizeof id );
1994 pb -> rc = Unpack ( pb -> id_bits, sizeof id * 8,
1995 n -> data . addr, 0, pb -> id_bits, NULL, & id, sizeof id, & usize );
1996 if ( pb -> rc == 0 )
1997 {
1998 pb -> rc = Unpack ( pb -> span_bits, sizeof span * 8,
1999 n -> data . addr, pb -> id_bits, pb -> span_bits, NULL, & span, sizeof span, & usize );
2000 }
2001 if ( pb -> rc != 0 )
2002 return true;
2003
2004 pb -> rc = PTNodeMakeKey ( n, & key );
2005 if ( pb -> rc == 0 )
2006 {
2007 KTrieIdxNode_v2_s2 *node;
2008 pb -> rc = KTrieIdxNodeMake_v2_s2 ( & node, key, id + pb -> first );
2009 StringWhack ( ( String* ) key );
2010 if ( pb -> rc == 0 )
2011 {
2012 node -> span = span;
2013
2014 pb -> rc = TrieInsert ( & pb -> self -> key2id, & node -> n );
2015 if ( pb -> rc == 0 )
2016 {
2017 ++ pb -> count;
2018 return false;
2019 }
2020
2021 KTrieIdxNodeWhack_v2 ( & node -> n, NULL );
2022 }
2023 }
2024
2025 return true;
2026 }
2027
2028 /* KTrieIndexAttach_v2
2029 * attach a keymap to an existing table
2030 */
2031 static
KTrieIndexPopulate_v2_s1(KTrieIndex_v2 * self,uint32_t i,int64_t idd)2032 rc_t KTrieIndexPopulate_v2_s1 ( KTrieIndex_v2 *self, uint32_t i, int64_t idd )
2033 {
2034 rc_t rc;
2035 uint32_t nid = self -> pt . ord2node [ i ];
2036
2037 if ( i != 0 && self -> pt . ord2node [ i - 1 ] == nid )
2038 return 0;
2039
2040 i = self -> count;
2041
2042 if ( nid == 0 )
2043 {
2044 rc = KTrieIdxNodeMakeHole_v2_s1 ( & self -> ord2node [ i ], self -> pt . first + idd );
2045 if ( rc == 0 )
2046 self -> count = i + 1;
2047 }
2048 else
2049 {
2050 PTNode pnode;
2051 rc = PTrieGetNode ( self -> pt . key2id, & pnode, nid );
2052 if ( rc == 0 )
2053 {
2054 const String *key;
2055 rc = PTNodeMakeKey ( & pnode, & key );
2056 if ( rc == 0 )
2057 {
2058 rc = KTrieIdxNodeMake_v2_s1 ( & self -> ord2node [ i ],
2059 key, self -> pt . first + idd );
2060 StringWhack ( ( String* ) key );
2061 if ( rc == 0 )
2062 {
2063 rc = TrieInsert ( & self -> key2id, & self -> ord2node [ i ] -> n );
2064 if ( rc != 0 )
2065 KTrieIdxNodeWhack_v2 ( & self -> ord2node [ i ] -> n, NULL );
2066 else
2067 self -> count = i + 1;
2068 }
2069 }
2070 }
2071 }
2072
2073 return rc;
2074 }
2075
KTrieIndexAttach_v2(KTrieIndex_v2 * self,bool proj)2076 rc_t KTrieIndexAttach_v2 ( KTrieIndex_v2 *self, bool proj )
2077 {
2078 rc_t rc = 0;
2079
2080 /* if persisted index is empty, bail */
2081 if ( self -> count != 0 || self -> pt . count == 0 )
2082 return 0;
2083
2084 /* see if we can use existing projection index */
2085 if ( proj && self -> pt . ord2node != NULL )
2086 {
2087 uint32_t i;
2088
2089 self -> ord2node =
2090 malloc ( ( ( self -> pt . count + 4095 ) & ~ 4095 ) * sizeof self -> ord2node [ 0 ] );
2091 if ( self -> ord2node == NULL )
2092 return RC ( rcDB, rcIndex, rcUpdating, rcMemory, rcExhausted );
2093
2094 /* we were called because our count is 0 */
2095 assert ( self -> count == 0 );
2096
2097 /* handle variant */
2098 assert ( self -> pt . variant == 0 || self -> pt . id2ord . v8 != NULL );
2099 switch ( self -> pt . variant )
2100 {
2101 case 0: /* 1-1 id to name */
2102 for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2103 rc = KTrieIndexPopulate_v2_s1 ( self, i, i );
2104 break;
2105 case 1: /* sparse 8-bit */
2106 for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2107 rc = KTrieIndexPopulate_v2_s1 ( self, i, self -> pt . id2ord . v8 [ i ] );
2108 break;
2109 case 2: /* sparse 16-bit */
2110 for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2111 rc = KTrieIndexPopulate_v2_s1 ( self, i, self -> pt . id2ord . v16 [ i ] );
2112 break;
2113 case 3: /* sparse 32-bit */
2114 for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2115 rc = KTrieIndexPopulate_v2_s1 ( self, i, self -> pt . id2ord . v32 [ i ] );
2116 break;
2117 case 4: /* sparse 64-bit */
2118 for ( rc = 0, i = 0; i < self -> pt . count && rc == 0; ++ i )
2119 rc = KTrieIndexPopulate_v2_s1 ( self, i, self -> pt . id2ord . v64 [ i ] );
2120 break;
2121 }
2122
2123 if ( rc != 0 )
2124 {
2125 for ( i = self -> count; i > 0; )
2126 KTrieIdxNodeWhack_v2 ( & self -> ord2node [ -- i ] -> n, NULL );
2127 free ( self -> ord2node ), self -> ord2node = NULL;
2128 return rc;
2129 }
2130 }
2131 else
2132 {
2133 KTrieIndexPopulateData_v2_s2 pb;
2134 pb . first = self -> pt . first;
2135 pb . self = self;
2136 pb . count = 0;
2137 pb . rc = 0;
2138 PTrieDoUntil ( self -> pt . key2id, KTrieIndexPopulate_v2_s2, & pb );
2139 if ( pb . rc == 0 && pb . count != self -> pt . count )
2140 return RC ( rcDB, rcIndex, rcUpdating, rcIndex, rcCorrupt );
2141 self -> count = pb . count;
2142 }
2143
2144 /* record known dimensions */
2145 self -> first = self -> pt . first;
2146 self -> last = self -> pt . last;
2147
2148 /* should be able to drop persisted copy now */
2149 KPTrieIndexWhack_v2 ( & self -> pt );
2150
2151 return 0;
2152 }
2153
KTrieIndexInsert_v2(KTrieIndex_v2 * self,bool proj,const char * str,int64_t id)2154 rc_t KTrieIndexInsert_v2 ( KTrieIndex_v2 *self,
2155 bool proj, const char *str, int64_t id )
2156 {
2157 rc_t rc;
2158 String key;
2159 void *ord2node;
2160 uint32_t count;
2161
2162 #if DISABLE_PROJ
2163 proj = false;
2164 #endif
2165
2166 /* get the number of nodes in proj index or Trie.
2167 the persisted tree is only loaded into the in-core
2168 tree for edits ( insert/delete ), so the counts
2169 may differ. also, when in projection mode, the
2170 count refers to the number of array slots, which
2171 can be > the number of Trie nodes if there are
2172 holes in the id space. when not projecting, count
2173 is exactly the number of nodes in the Trie.
2174 */
2175 count = self -> count;
2176
2177 /* detect first modification */
2178 if ( self -> count == 0 )
2179 {
2180 /* detect persisted data */
2181 if ( self -> pt . key2id != NULL )
2182 {
2183 /* load persisted data into core */
2184 rc = KTrieIndexAttach_v2 ( self, proj );
2185 if ( rc != 0 )
2186 return rc;
2187
2188 /* should have loaded everything */
2189 assert ( self -> count != 0 );
2190 count = self -> count;
2191 }
2192 }
2193
2194 /* v2 only allows increasing ids
2195 they don't have to be contiguous,
2196 but they cannot repeat and cannot decrease */
2197 else if ( id <= self -> last )
2198 return RC ( rcDB, rcIndex, rcInserting, rcConstraint, rcViolated );
2199
2200 /* convert key to String */
2201 StringInitCString ( & key, str );
2202
2203 /* insertion strategy depends upon projection index */
2204 if ( proj )
2205 {
2206 KTrieIdxNode_v2_s1 *node;
2207
2208 /* check for extension of last node */
2209 if ( count != 0 )
2210 {
2211 /* a non-zero count implies nodes in projection array */
2212 assert ( self -> ord2node != NULL );
2213
2214 /* get last node */
2215 node = self -> ord2node [ count - 1 ];
2216 assert ( node != NULL );
2217
2218 /* if the keys match, this is an update to the node */
2219 if ( StringEqual ( & key, & node -> n . key ) )
2220 {
2221 /* this must be an extension of range */
2222 if ( id != self -> last + 1 )
2223 return RC ( rcDB, rcIndex, rcInserting, rcConstraint, rcViolated );
2224
2225 /* extend and done */
2226 self -> last = id;
2227 return 0;
2228 }
2229
2230 /* not last node - create a hole if needed */
2231 if ( id != self -> last + 1 )
2232 {
2233 /* extend array if needed
2234 should never have to handle initial insert,
2235 but would be happy to do so if asked */
2236 if ( ( count & 4095 ) == 0 )
2237 {
2238 ord2node = realloc ( self -> ord2node,
2239 ( count + 4096 ) * sizeof self -> ord2node [ 0 ] );
2240 if ( ord2node == NULL )
2241 return RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
2242 self -> ord2node = ord2node;
2243 }
2244
2245 /* create NULL mapping */
2246 rc = KTrieIdxNodeMakeHole_v2_s1 ( & node, self -> last + 1 );
2247 if ( rc != 0 )
2248 return rc;
2249
2250 /* NB - this will cause count to be > num_nodes in Trie */
2251 self -> ord2node [ count ++ ] = node;
2252 }
2253 }
2254
2255 /* make a new mapping starting with id */
2256 rc = KTrieIdxNodeMake_v2_s1 ( & node, & key, id );
2257 if ( rc == 0 )
2258 {
2259 /* attempt insertion */
2260 rc = TrieInsertUnique ( & self -> key2id, & node -> n, NULL );
2261 if ( rc == 0 )
2262 {
2263 /* create or extend projection array */
2264 if ( ( count & 4095 ) == 0 )
2265 {
2266 ord2node = realloc ( self -> ord2node,
2267 ( count + 4096 ) * sizeof self -> ord2node [ 0 ] );
2268 if ( ord2node == NULL )
2269 rc = RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
2270 else
2271 self -> ord2node = ord2node;
2272 }
2273
2274 if ( rc == 0 )
2275 {
2276 /* set/extend range, detecting first insertion */
2277 self -> last = id;
2278 if ( count == 0 )
2279 self -> first = id;
2280
2281 /* project */
2282 self -> ord2node [ count ] = node;
2283 self -> count = count + 1;
2284
2285 /* insertion complete */
2286 return 0;
2287 }
2288
2289 /* remove failed insertion */
2290 TrieUnlink ( & self -> key2id, & node -> n );
2291 }
2292
2293 /* clean up new node */
2294 KTrieIdxNodeWhack_v2 ( & node -> n, NULL );
2295 }
2296
2297 /* clean up insertion of hole */
2298 if ( count != self -> count )
2299 {
2300 assert ( count - 1 == self -> count );
2301 KTrieIdxNodeWhack_v2 ( & self -> ord2node [ count - 1 ] -> n, NULL );
2302 }
2303 }
2304 else
2305 {
2306 KTrieIdxNode_v2_s2 *node;
2307
2308 /* make a new mapping starting with id and a span of 1 */
2309 rc = KTrieIdxNodeMake_v2_s2 ( & node, & key, id );
2310 if ( rc == 0 )
2311 {
2312 /* attempt insertion */
2313 KTrieIdxNode_v2_s2 *exist;
2314 rc = TrieInsertUnique ( & self -> key2id, & node -> n, ( TNode** ) & exist );
2315 if ( rc == 0 )
2316 {
2317 /* set/extend range, detecting first insertion */
2318 if ( count == 0 )
2319 {
2320 self -> max_span = 1;
2321 self -> first = id;
2322 }
2323 self -> last = id;
2324
2325 /* insertion complete */
2326 self -> count = count + 1;
2327 return 0;
2328 }
2329
2330 /* clean up new node */
2331 KTrieIdxNodeWhack_v2 ( & node -> n, NULL );
2332
2333 /* check existing for proper extension */
2334 if ( exist != NULL )
2335 {
2336 if ( id == exist -> start_id + exist -> span )
2337 {
2338 assert ( count != 0 );
2339
2340 /* we already know id > last
2341 and that it boarders the range of "exist"
2342 so it must be last + 1 */
2343 assert ( id - 1 == self -> last );
2344 self -> last = id;
2345
2346 /* extend the span of "exist" */
2347 ++ exist -> span;
2348 if ( exist -> span > self -> max_span )
2349 self -> max_span = exist -> span;
2350
2351 return 0;
2352 }
2353 }
2354 }
2355 }
2356
2357 return rc;
2358 }
2359
2360 /* drop string from trie and all mappings */
KTrieIndexDelete_v2(KTrieIndex_v2 * self,bool proj,const char * str)2361 rc_t KTrieIndexDelete_v2 ( KTrieIndex_v2 *self, bool proj, const char *str )
2362 {
2363 rc_t rc;
2364 String key;
2365 TNode *tnode;
2366 /* uint32_t count; */
2367
2368 #if DISABLE_PROJ
2369 proj = false;
2370 #endif
2371
2372 /* detect first modification */
2373 /* count = self -> count; */
2374 if ( self -> count != 0 )
2375 {
2376 /* detect persisted data */
2377 if ( self -> pt . key2id != NULL )
2378 {
2379 rc = KTrieIndexAttach_v2 ( self, proj );
2380 if ( rc != 0 )
2381 return rc;
2382 }
2383 }
2384
2385 StringInitCString ( & key, str );
2386
2387 /* interface states that all objects are dropped.
2388 however, this implementation only allows unique
2389 mappings to a contig range, so a simple find is sufficient */
2390 tnode = TrieFind ( & self -> key2id, & key );
2391 if ( tnode == NULL )
2392 return RC ( rcDB, rcIndex, rcRemoving, rcString, rcNotFound );
2393
2394 /* remove from trie */
2395 TrieUnlink ( & self -> key2id, tnode );
2396
2397 /* neutralize node in projection index */
2398 if ( proj )
2399 {
2400 KTrieIdxNode_v2_s1 *node = ( KTrieIdxNode_v2_s1* ) tnode;
2401 uint32_t ord = KTrieIndexNode2Ord_v2 ( self, node );
2402 if ( ord != 0 )
2403 {
2404 self -> ord2node [ ord - 1 ] -> n . key . size = 0;
2405 self -> ord2node [ ord - 1 ] -> n . key . len = 0;
2406 self -> ord2node [ ord - 1 ] -> key [ 0 ] = 0;
2407 return 0;
2408 }
2409 }
2410
2411 /* whack node */
2412 KTrieIdxNodeWhack_v2 ( tnode, NULL );
2413
2414 return 0;
2415 }
2416
2417 /* map key to id range */
KTrieIndexFind_v2(const KTrieIndex_v2 * self,const char * str,int64_t * start_id,uint32_t * span,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data,bool convertFromV1)2418 rc_t KTrieIndexFind_v2 ( const KTrieIndex_v2 *self,
2419 const char *str, int64_t *start_id,
2420 #if V2FIND_RETURNS_SPAN
2421 uint32_t *span,
2422 #endif
2423 int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void *data, bool convertFromV1 )
2424 {
2425 /* search within in-core index */
2426 if ( self -> count != 0 )
2427 {
2428 const TNode *tnode;
2429
2430 String key;
2431 StringInitCString ( & key, str );
2432
2433 tnode = TrieFind ( & self -> key2id, & key );
2434 if ( tnode != NULL )
2435 {
2436 if ( self -> ord2node != NULL )
2437 {
2438 const KTrieIdxNode_v2_s1 *node = ( const KTrieIdxNode_v2_s1* ) tnode;
2439 uint32_t ord = KTrieIndexNode2Ord_v2 ( self, node );
2440 if ( ord == 0 )
2441 return RC ( rcDB, rcIndex, rcSelecting, rcIndex, rcCorrupt );
2442
2443 * start_id = node -> start_id;
2444 #if V2FIND_RETURNS_SPAN
2445 if ( ord == self -> count )
2446 * span = ( uint32_t ) ( self -> last - node -> start_id + 1 );
2447 else
2448 * span = ( uint32_t ) ( self -> ord2node [ ord ] -> start_id - node -> start_id );
2449 #endif
2450 }
2451 else
2452 {
2453 const KTrieIdxNode_v2_s2 *node = ( const KTrieIdxNode_v2_s2* ) tnode;
2454 * start_id = node -> start_id;
2455 #if V2FIND_RETURNS_SPAN
2456 * span = node -> span;
2457 #endif
2458 }
2459
2460 return 0;
2461 }
2462 }
2463
2464 /* search within persisted index */
2465 else if ( self -> pt . key2id != NULL )
2466 {
2467 return KPTrieIndexFind_v2 ( & self -> pt, str, start_id,
2468 #if V2FIND_RETURNS_SPAN
2469 span,
2470 #endif
2471 custom_cmp, data, convertFromV1 );
2472 }
2473
2474 return RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
2475 }
2476
2477 /* projection index id to key-string */
2478 typedef struct KTrieIndexProjectData_v2 KTrieIndexProjectData_v2;
2479 struct KTrieIndexProjectData_v2
2480 {
2481 int64_t id;
2482 const KTrieIdxNode_v2_s2 *node;
2483 };
2484
2485 static
KTrieIndexProjectScan_v2(TNode * n,void * data)2486 bool CC KTrieIndexProjectScan_v2 ( TNode *n, void *data )
2487 {
2488 KTrieIndexProjectData_v2 *pb = (KTrieIndexProjectData_v2 *)data;
2489 const KTrieIdxNode_v2_s2 *node = ( const KTrieIdxNode_v2_s2* ) n;
2490
2491 if ( pb -> id >= node -> start_id &&
2492 pb -> id < node -> start_id + node -> span )
2493 {
2494 pb -> node = node;
2495 return true;
2496 }
2497
2498 return false;
2499 }
2500
KTrieIndexProject_v2(const KTrieIndex_v2 * self,int64_t id,int64_t * start_id,uint32_t * span,char * key_buff,size_t buff_size,size_t * actsize)2501 rc_t KTrieIndexProject_v2 ( const KTrieIndex_v2 *self,
2502 int64_t id,
2503 #if V2FIND_RETURNS_SPAN
2504 int64_t *start_id, uint32_t *span,
2505 #endif
2506 char *key_buff, size_t buff_size, size_t *actsize )
2507 {
2508 if ( self -> count != 0 )
2509 {
2510 if ( self -> ord2node != NULL )
2511 {
2512 uint32_t ord = KTrieIndexID2Ord_v2 ( self, id );
2513 if ( ord != 0 )
2514 {
2515 const KTrieIdxNode_v2_s1 *node = self -> ord2node [ ord - 1 ];
2516
2517 if (actsize)
2518 *actsize = node -> n . key . size;
2519 if ( node -> n . key . size >= buff_size )
2520 return RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
2521 string_copy ( key_buff, buff_size,
2522 node -> n . key . addr, node -> n . key . size );
2523 * start_id = node -> start_id;
2524 * span = ( ( ord == self -> count ) ?
2525 ( self -> last + 1 ) : ( self -> ord2node [ ord ] -> start_id ) ) - node -> start_id;
2526 return 0;
2527 }
2528 }
2529 else
2530 {
2531 KTrieIndexProjectData_v2 pb;
2532 pb . id = id;
2533 if ( TrieDoUntil ( & self -> key2id, KTrieIndexProjectScan_v2, & pb ) )
2534 {
2535 const KTrieIdxNode_v2_s2 *node = pb . node;
2536
2537 if (actsize)
2538 *actsize = node -> n . key . size;
2539 if ( node -> n . key . size >= buff_size )
2540 return RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
2541 string_copy ( key_buff, buff_size,
2542 node -> n . key . addr, node -> n . key . size );
2543 * start_id = node -> start_id;
2544 * span = node -> span;
2545 return 0;
2546 }
2547 }
2548 }
2549
2550 else if ( self -> pt . ord2node != NULL )
2551 {
2552 return KPTrieIndexProject_v2 ( & self -> pt, id,
2553 #if V2FIND_RETURNS_SPAN
2554 start_id, span,
2555 #endif
2556 key_buff, buff_size, actsize );
2557 }
2558
2559 return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
2560 }
2561