1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <kdb/extern.h>
28 #include "windex-priv.h"
29 #include "kdbfmt-priv.h"
30 #include <klib/ptrie.h>
31 #include <klib/text.h>
32 #include <kfs/directory.h>
33 #include <kfs/file.h>
34 #include <kfs/md5.h>
35 #include <kfs/mmap.h>
36 #include <klib/rc.h>
37 #include <os-native.h>
38 #include <sysalloc.h>
39
40 #include <stdlib.h>
41 #include <limits.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <errno.h>
45 #include <byteswap.h>
46 #include <assert.h>
47
48 #define DISABLE_PROJ 0
49 #define LIMIT_INSERTS 0
50
51 #if LIMIT_INSERTS
52 #define INSERT_LIMIT 100000000
53 #endif
54
55 /*--------------------------------------------------------------------------
56 * KPTrieIndex_v1
57 * persisted keymap
58 */
59
60 /* KPTrieIndexInit
61 * opens and initializes persisted keymap structure
62 */
KPTrieIndexInit_v1(KPTrieIndex_v1 * self,const KMMap * mm,bool byteswap)63 rc_t KPTrieIndexInit_v1 ( KPTrieIndex_v1 *self, const KMMap *mm, bool byteswap )
64 {
65 size_t size;
66 rc_t rc = KMMapSize ( mm, & size );
67 if ( rc == 0 )
68 {
69 const KDBHdr *hdr;
70 rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
71 if ( rc == 0 )
72 {
73 /* try to create the pttree */
74 rc = PTrieMakeOrig ( & self -> key2id,
75 hdr + 1, size -= sizeof * hdr, byteswap );
76 if ( rc == 0 )
77 {
78 size_t ptsize = PTrieSize ( self -> key2id );
79 if ( ptsize <= size )
80 {
81 /* just in case */
82 self -> mm = NULL;
83
84 /* record for projection */
85 self -> byteswap = byteswap;
86
87 /* it could be stored without projection */
88 if ( ptsize == size )
89 {
90 self -> id2node = NULL;
91 self -> first = self -> last = 0;
92 return 0;
93 }
94
95 /* assume this is projection index */
96 self -> id2node = ( void* )
97 ( ( char* ) ( hdr + 1 ) + ptsize );
98 size -= ptsize;
99
100 /* it must have at least 4 bytes
101 and be 4 byte aligned */
102 if ( size >= sizeof ( uint32_t ) && ( size & 3 ) == 0 )
103 {
104 /* first entry is starting key
105 remaining entries are node ids */
106 self -> first = * self -> id2node ++;
107 size -= sizeof self -> id2node [ 0 ];
108 if ( size == 0 )
109 {
110 /* forget if empty */
111 self -> id2node = NULL;
112 self -> first = self -> last = 0;
113 return 0;
114 }
115 /* remaining entries */
116 self -> last = self -> first + ( size >> 2 ) - 1;
117 return 0;
118 }
119 }
120
121 PTrieWhack ( self -> key2id );
122 self -> key2id = NULL;
123
124 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
125 }
126 }
127 }
128
129 return rc;
130 }
131
132 /* KPTrieIndexWhack_v1
133 * closes down keymap
134 */
KPTrieIndexWhack_v1(KPTrieIndex_v1 * self)135 void KPTrieIndexWhack_v1 ( KPTrieIndex_v1 *self )
136 {
137 PTrieWhack ( self -> key2id );
138 KMMapRelease ( self -> mm );
139 memset ( self, 0, sizeof * self );
140 }
141
142
143 /*--------------------------------------------------------------------------
144 * KTrieIdxNode_v1
145 */
146
147 static
KTrieIdxNodeWhack_v1(TNode * n,void * data)148 void CC KTrieIdxNodeWhack_v1 ( TNode *n, void *data )
149 {
150 TNodeWhack ( n );
151 }
152
153 static
KTrieIdxNodeUnlink_v1(TNode * n,void * data)154 void CC KTrieIdxNodeUnlink_v1 ( TNode *n, void *data )
155 {
156 if ( TrieUnlink ( data, n ) )
157 TNodeWhack ( n );
158 }
159
160 static
KTrieIdxNodeCaptureID_v1(TNode * n,void * data)161 void CC KTrieIdxNodeCaptureID_v1 ( TNode *n, void *data )
162 {
163 KTrieIndex_v1 *self = data;
164 KTrieIdxNode_v1 *node = ( KTrieIdxNode_v1* ) n;
165 self -> id2node [ node -> id - self -> first ] = node;
166 }
167
168
169 /*--------------------------------------------------------------------------
170 * KTrieIndex_v1
171 */
172
173 /* KTrieIndexWrite_v1
174 */
175 typedef struct PersistTrieData PersistTrieData;
176 struct PersistTrieData
177 {
178 uint64_t pos;
179 KFile *f;
180 uint8_t *buffer;
181 size_t bsize;
182 size_t marker;
183
184 size_t ptt_size;
185 uint32_t first;
186 uint32_t last;
187 rc_t rc;
188 };
189
190 static
KTrieIndexWrite_v1(void * param,const void * buffer,size_t size,size_t * num_writ)191 rc_t CC KTrieIndexWrite_v1 ( void *param,
192 const void *buffer, size_t size, size_t *num_writ )
193 {
194 PersistTrieData *pb = param;
195 size_t total, to_write;
196
197 for ( total = 0; total < size; total += to_write )
198 {
199 to_write = size - total;
200 if ( pb -> marker + to_write > pb -> bsize )
201 to_write = pb -> bsize - pb -> marker;
202
203 if ( to_write > 0 )
204 {
205 memmove ( pb -> buffer + pb -> marker,
206 ( const uint8_t* ) buffer + total, to_write );
207 pb -> marker += to_write;
208 }
209
210 if ( pb -> marker == pb -> bsize )
211 {
212 size_t num_flushed;
213 pb -> rc = KFileWrite ( pb -> f, pb -> pos,
214 pb -> buffer, pb -> bsize, & num_flushed );
215 if ( pb -> rc != 0 )
216 {
217 * num_writ = 0;
218 return pb -> rc;
219 }
220
221 if ( num_flushed == 0 )
222 {
223 * num_writ = total + to_write;
224 return pb -> rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
225 }
226
227 pb -> marker -= num_flushed;
228 pb -> pos += num_flushed;
229
230 if ( pb -> marker != 0 )
231 memmove ( pb -> buffer, pb -> buffer + num_flushed, pb -> marker );
232 }
233 }
234
235 * num_writ = total;
236 return 0;
237 }
238
239 /* KTrieIndexAux_v1
240 */
241 static
KTrieIndexAux_v1(void * param,const void * node,size_t * num_writ,PTWriteFunc write,void * write_param)242 rc_t CC KTrieIndexAux_v1 ( void *param, const void *node, size_t *num_writ,
243 PTWriteFunc write, void *write_param )
244 {
245 const KTrieIdxNode_v1 *n = node;
246
247 if ( write != NULL )
248 {
249 PersistTrieData *pb = param;
250 if ( n -> id < pb -> first )
251 pb -> first = n -> id;
252 if ( n -> id > pb -> last )
253 pb -> last = n -> id;
254
255 return ( * write ) ( write_param, & n -> id, sizeof n -> id, num_writ );
256 }
257
258 * num_writ = sizeof n -> id;
259 return 0;
260 }
261
262 /* KTrieIndexPersist_v1
263 * write keymap to indicated location
264 */
265 static
KTrieIndexPersistTrie_v1(const KTrieIndex_v1 * self,PersistTrieData * pb)266 rc_t KTrieIndexPersistTrie_v1 ( const KTrieIndex_v1 *self, PersistTrieData *pb )
267 {
268 rc_t rc;
269 KDBHdr *hdr;
270
271 pb -> pos = 0;
272
273 hdr = ( KDBHdr* ) pb -> buffer;
274 KDBHdrInit ( hdr, 1 );
275 pb -> marker = sizeof * hdr;
276
277 /* persist the trie to file,
278 using tree-internal key storage,
279 capture the image size in pb */
280 rc = TriePersist ( & self -> key2id, & pb -> ptt_size,
281 false, KTrieIndexWrite_v1, pb, KTrieIndexAux_v1, pb );
282 if ( rc == 0 && pb -> marker != 0 )
283 {
284 size_t num_writ;
285 rc = KFileWrite ( pb -> f, pb -> pos,
286 pb -> buffer, pb -> marker, & num_writ );
287 if ( rc == 0 && num_writ != pb -> marker )
288 rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
289 }
290
291 return rc;
292 }
293
294
295 typedef struct PersistReverseData PersistReverseData;
296 struct PersistReverseData
297 {
298 PTrie *tt;
299 uint32_t *a;
300 uint32_t first;
301 uint32_t count;
302 uint32_t notfound;
303 };
304
305 static
KTrieIndexRecordNodeId_v1(TNode * node,void * data)306 void CC KTrieIndexRecordNodeId_v1 ( TNode *node, void *data )
307 {
308 PTNode pn;
309 PersistReverseData *pb = data;
310 const KTrieIdxNode_v1 *n = ( const KTrieIdxNode_v1* ) node;
311
312 /* lookup up the persisted node within image */
313 uint32_t id = PTrieFind ( pb -> tt, & n -> n . key, & pn, NULL, NULL );
314
315 /* write it into array */
316 pb -> a [ n -> id - pb -> first ] = ( uint32_t ) id;
317
318 if ( id == 0 )
319 ++ pb -> notfound;
320 else
321 ++ pb -> count;
322 }
323
324 static
KTrieIndexPersistProj_v1(const KTrieIndex_v1 * self,PersistTrieData * pb)325 rc_t KTrieIndexPersistProj_v1 ( const KTrieIndex_v1 *self, PersistTrieData *pb )
326 {
327 #if 0
328 rc_t rc;
329 KMMap *mmr;
330 size_t map_size;
331
332 /* there must be something to do */
333 if ( pb -> first > pb -> last )
334 return 0;
335
336
337 /* open a read-write map onto file
338 start at offset 0, which is the header,
339 followed by pb -> ptt_size bytes which is the tree,
340 and add on a slot for first id,
341 followed by last - first + 1 slots for id to node id map */
342 map_size = pb -> ptt_size + ( ( pb -> last - pb -> first + 2 ) << 2 );
343
344 rc = KMMapMakeRgnUpdate ( & mmr, pb -> f, sizeof ( KDBHdr ), map_size );
345 if ( rc == 0 )
346 {
347 void *addr;
348 PersistReverseData pb2;
349
350 rc = KMMapAddrUpdate ( mmr, & addr );
351 if ( rc == 0 )
352 {
353 rc = PTrieMakeOrig ( & pb2 . tt, addr, pb -> ptt_size );
354 if ( rc == 0 )
355 {
356 assert ( pb -> ptt_size == PTrieSize ( pb2 . tt ) );
357 pb2 . a = ( void* ) ( ( char* ) addr + pb -> ptt_size );
358 assert ( ( ( size_t ) pb2 . a & 3 ) == 0 );
359
360 /* record first id */
361 * pb2 . a ++ = pb -> first;
362 pb2 . first = pb -> first;
363 pb2 . count = pb2 . notfound = 0;
364
365 /* record all id to node mappings */
366 TrieForEach ( & self -> key2id, KTrieIndexRecordNodeId_v1, & pb2 );
367
368 /* check for having written ids */
369 assert ( pb2 . count == PTrieCount ( pb2 . tt ) );
370
371 /* done with pttree */
372 PTrieWhack ( pb2 . tt );
373
374 /* if there were nodes not found,
375 the initial persist was bad */
376 if ( pb2 . notfound != 0 )
377 rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
378 }
379 }
380
381 /* done with map - commits changes to disk */
382 KMMapRelease ( mmr );
383
384 /* truncate file to desired size */
385 KFileSetSize ( pb -> f, map_size + sizeof ( KDBHdr ) );
386 }
387
388 return rc;
389 #else
390 rc_t rc;
391 void * addr;
392 uint64_t file_size;
393 size_t num_to_read;
394 size_t map_size;
395
396 /* there must be something to do */
397 if ( pb -> first > pb -> last )
398 return 0;
399
400 /* open a read-write map onto file
401 start at offset 0, which is the header,
402 followed by pb -> ptt_size bytes which is the tree,
403 and add on a slot for first id,
404 followed by last - first + 1 slots for id to node id map */
405 map_size = pb -> ptt_size + ( ( pb -> last - pb -> first + 2 ) << 2 );
406
407 rc = KFileSize ( pb -> f, & file_size );
408 if ( rc == 0 )
409 {
410 addr = malloc ( map_size );
411 if ( addr == NULL )
412 rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
413 else
414 {
415 size_t num_read;
416 num_to_read = file_size - sizeof ( KDBHdr );
417 rc = KFileReadAll ( pb -> f, sizeof ( KDBHdr ),
418 addr, num_to_read, & num_read );
419 if ( rc != 0 )
420 free ( addr );
421 else if ( num_read != num_to_read )
422 {
423 free ( addr );
424 rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
425 }
426 }
427 }
428
429 if ( rc == 0 )
430 {
431 size_t num_writ;
432 PersistReverseData pb2;
433
434 rc = PTrieMakeOrig ( & pb2 . tt, addr, pb -> ptt_size, false );
435 if ( rc == 0 )
436 {
437 assert ( pb -> ptt_size == PTrieSize ( pb2 . tt ) );
438 pb2 . a = ( void* ) ( ( char* ) addr + pb -> ptt_size );
439 assert ( ( ( size_t ) pb2 . a & 3 ) == 0 );
440
441 /* record first id */
442 * pb2 . a ++ = pb -> first;
443 pb2 . first = pb -> first;
444 pb2 . count = pb2 . notfound = 0;
445
446 /* record all id to node mappings */
447 TrieForEach ( & self -> key2id, KTrieIndexRecordNodeId_v1, & pb2 );
448
449 /* check for having written ids */
450 assert ( pb2 . count == PTrieCount ( pb2 . tt ) );
451
452 /* done with pttree */
453 PTrieWhack ( pb2 . tt );
454
455 /* if there were nodes not found,
456 the initial persist was bad */
457 if ( pb2 . notfound != 0 )
458 rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
459 else
460 {
461 rc = KFileWrite ( pb -> f, file_size, ( uint8_t* ) addr + num_to_read,
462 map_size - num_to_read, & num_writ );
463 if ( rc == 0 && num_writ != map_size - num_to_read )
464 rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
465 }
466 }
467
468 /* done with map - commits changes to disk */
469 free ( addr );
470
471 /* truncate file to desired size */
472 KFileSetSize ( pb -> f, map_size + sizeof ( KDBHdr ) );
473 }
474
475 return rc;
476 #endif
477 }
478
KTrieIndexPersist_v1(const KTrieIndex_v1 * self,bool proj,KDirectory * dir,const char * path,bool use_md5)479 rc_t KTrieIndexPersist_v1 ( const KTrieIndex_v1 *self,
480 bool proj, KDirectory *dir, const char *path, bool use_md5 )
481 {
482 rc_t rc;
483 PersistTrieData pb;
484
485 assert ( self != NULL );
486
487 pb . buffer = malloc ( pb . bsize = 32 * 1024 );
488 if ( pb . buffer == NULL )
489 rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
490 else
491 {
492 char tmpname [ 256 ];
493 char tmpmd5name [ 256 ];
494 char md5path [ 256 ];
495 rc = KDirectoryResolvePath ( dir, false,
496 tmpname, sizeof tmpname, "%s.tmp", path );
497 if ( rc == 0 )
498 {
499 rc = KDirectoryCreateFile ( dir, & pb . f,
500 true, 0664, kcmInit, "%s", tmpname );
501
502 if (use_md5 && rc == 0 )
503 {
504 size_t tmplen = snprintf ( tmpmd5name, sizeof tmpmd5name, "%s.md5", tmpname );
505 KFile * kf;
506 KMD5SumFmt *km;
507 if ( tmplen >= sizeof ( tmpmd5name ) ) /* can't be == or no NUL */
508 {
509 rc = RC ( rcDB, rcIndex, rcPersisting, rcName, rcExcessive );
510 }
511 else
512 {
513 tmplen = snprintf ( md5path, sizeof md5path, "%s.md5", path );
514
515 if ( tmplen >= sizeof ( md5path ) ) /* can't be == or no NUL */
516 {
517 rc = RC ( rcDB, rcIndex, rcPersisting, rcName, rcExcessive );
518 }
519 else
520 {
521 rc = KDirectoryCreateFile ( dir, &kf, true, 0664,
522 kcmInit, "%s", tmpmd5name );
523 if ( rc == 0 )
524 {
525 rc = KMD5SumFmtMakeUpdate ( &km, kf );
526 if ( rc == 0 )
527 {
528 KMD5File * k5f;
529 kf = NULL;
530 rc = KMD5FileMakeWrite ( &k5f, pb . f, km, path );
531 if ( rc == 0 )
532 {
533 pb . f = KMD5FileToKFile ( k5f );
534 }
535 /* release pass or fail */
536 KMD5SumFmtRelease ( km );
537 }
538 else
539 KFileRelease ( kf );
540 }
541 else
542 KFileRelease ( kf );
543 }
544 }
545 if ( rc != 0 )
546 KFileRelease ( pb . f );
547 }
548
549 if ( rc == 0 )
550 {
551 pb . ptt_size = 0;
552 pb . first = ~ 0;
553 pb . last = 0;
554
555 rc = KTrieIndexPersistTrie_v1 ( self, & pb );
556 if ( rc == 0 )
557 {
558 if ( proj )
559 rc = KTrieIndexPersistProj_v1 ( self, & pb );
560 }
561
562 KFileRelease ( pb . f );
563 pb . f = NULL;
564 }
565 }
566
567 free ( pb . buffer );
568 pb . buffer = NULL;
569
570 if ( rc == 0 )
571 {
572 rc = KDirectoryRename ( dir, false, tmpname, path );
573 if ( rc == 0 )
574 {
575 if ( use_md5 )
576 rc = KDirectoryRename ( dir, false, tmpmd5name, md5path );
577 if ( rc == 0 )
578 {
579 /* done */
580 return 0;
581 }
582 }
583 }
584
585 /* whack temporary file */
586 KDirectoryRemove ( dir, false, "%s", tmpname );
587 if ( use_md5 )
588 KDirectoryRemove ( dir, false, "%s", tmpmd5name );
589 }
590
591 return rc;
592 }
593
594
595 /* whack whack */
KTrieIndexWhack_v1(KTrieIndex_v1 * self)596 void KTrieIndexWhack_v1 ( KTrieIndex_v1 *self )
597 {
598 KPTrieIndexWhack_v1 ( & self -> pt );
599 TrieWhack ( & self -> key2id, KTrieIdxNodeWhack_v1, NULL );
600 }
601
602 /* initialize an index from file - can be NULL */
KTrieIndexOpen_v1(KTrieIndex_v1 * self,const KMMap * mm,bool byteswap)603 rc_t KTrieIndexOpen_v1 ( KTrieIndex_v1 *self, const KMMap *mm, bool byteswap )
604 {
605 rc_t rc;
606
607 memset ( self, 0, sizeof * self );
608 rc = TrieInit ( & self -> key2id, "0-9", 512, true );
609 if ( rc != 0 )
610 return rc;
611
612 self -> first = ~ 0;
613
614 if ( mm == NULL )
615 return 0;
616
617 rc = KPTrieIndexInit_v1 ( & self -> pt, mm, byteswap );
618 if ( rc == 0 )
619 {
620 rc = KMMapAddRef ( mm );
621 if ( rc == 0 )
622 {
623 self -> pt . mm = mm;
624 return 0;
625 }
626 }
627
628 KTrieIndexWhack_v1 ( self );
629 return rc;
630 }
631
632 /* KTrieIndexPopulate_v1
633 */
634 typedef struct KTrieIndexPopulateData_v1 KTrieIndexPopulateData_v1;
635 struct KTrieIndexPopulateData_v1
636 {
637 KTrieIndex_v1 *idx;
638 uint32_t id;
639 rc_t rc;
640 };
641
642 static
KTrieIndexPopulate_v1(PTNode * n,void * data)643 bool CC KTrieIndexPopulate_v1 ( PTNode *n, void *data )
644 {
645 const String *key;
646 KTrieIndexPopulateData_v1 *pb = data;
647
648 /* capture node id */
649 uint32_t id;
650 assert ( n -> data . size == sizeof id );
651 memmove ( & id, n -> data . addr, sizeof id );
652
653 /* reject already mapped */
654 if ( id == pb -> id )
655 {
656 pb -> rc = RC ( rcDB, rcIndex, rcConstructing, rcNode, rcExists );
657 return true;
658 }
659
660 pb -> rc = PTNodeMakeKey ( n, & key );
661 if ( pb -> rc == 0 )
662 {
663 KTrieIdxNode_v1 *node;
664 pb -> rc = TNodeMake ( ( TNode** ) & node,
665 sizeof * node + key -> size );
666 if ( pb -> rc == 0 )
667 {
668 KTrieIndex_v1 *self = pb -> idx;
669
670 StringInit ( & node -> n . key, node -> key, key -> size, key -> len );
671 node -> id = id;
672 string_copy ( node -> key, key -> size + 1,
673 key -> addr, key -> size );
674
675 pb -> rc = TrieInsert ( & self -> key2id, & node -> n );
676 if ( pb -> rc == 0 )
677 {
678 free ( ( void* ) key );
679
680 /* if copying projection index */
681 if ( self -> id2node != NULL )
682 {
683 if ( self -> pt . id2node != NULL )
684 self -> id2node [ node -> id - self -> pt . first ] = node;
685 else
686 {
687 if ( node -> id < self -> first )
688 self -> first = node -> id;
689 if ( node -> id > self -> last )
690 self -> last = node -> id;
691 }
692 }
693 return 0;
694 }
695
696 TNodeWhack ( & node -> n );
697 }
698
699 StringWhack ( ( String* ) key );
700 }
701
702 return true;
703 }
704
705 /* KTrieIndexAttach_v1
706 * attach a keymap to an existing table
707 *
708 * "pkm" [ IN ] - a persisted keymap
709 */
710 static
KTrieIndexAttach_v1(KTrieIndex_v1 * self,bool proj,uint32_t id)711 rc_t KTrieIndexAttach_v1 ( KTrieIndex_v1 *self, bool proj, uint32_t id )
712 {
713 uint32_t proj_len;
714 KTrieIndexPopulateData_v1 pb;
715
716 #if LIMIT_INSERTS
717 proj_len = self -> pt . last - self -> pt . first + 1;
718 #endif
719
720 /* see if we can use existing projection index */
721 if ( proj && self -> pt . id2node != NULL )
722 {
723 /* reject if already mapped */
724 if ( id != 0 &&
725 id >= self -> pt . first &&
726 id <= self -> pt . last &&
727 self -> pt . id2node [ id - self -> pt . first ] != 0 )
728 {
729 return RC ( rcDB, rcIndex, rcUpdating, rcId, rcExists );
730 }
731
732 /* allocate index array */
733 #if LIMIT_INSERTS
734 if ( proj_len > INSERT_LIMIT )
735 return RC ( rcDB, rcIndex, rcUpdating, rcRange, rcExcessive );
736 #else
737 proj_len = self -> pt . last - self -> pt . first + 1;
738 #endif
739 proj_len = ( proj_len + 4095 ) & - 4096;
740 self -> id2node = calloc ( proj_len, sizeof self -> id2node [ 0 ] );
741 if ( self -> id2node == NULL )
742 return RC ( rcDB, rcIndex, rcUpdating, rcMemory, rcExhausted );
743
744 /* record known dimensions */
745 self -> first = self -> pt . first;
746 self -> last = self -> pt . last;
747 self -> len = proj_len;
748 }
749
750 #if LIMIT_INSERTS
751 else if ( proj_len > INSERT_LIMIT )
752 return RC ( rcDB, rcIndex, rcUpdating, rcRange, rcExcessive );
753 #endif
754
755 /* inflate persisted trie */
756 pb . idx = self;
757 pb . id = id;
758 pb . rc = 0;
759 PTrieDoUntil ( self -> pt . key2id, KTrieIndexPopulate_v1, & pb );
760
761 /* if successful but needing to add projection index */
762 if ( pb . rc == 0 && proj && self -> id2node == NULL )
763 {
764 proj_len = self -> last - self -> first + 1;
765 proj_len = ( proj_len + 4095 ) & - 4096;
766 self -> id2node = calloc ( proj_len, sizeof self -> id2node [ 0 ] );
767 if ( self -> id2node == NULL )
768 pb . rc = RC ( rcDB, rcIndex, rcUpdating, rcMemory, rcExhausted );
769 else
770 {
771 self -> len = proj_len;
772 TrieForEach ( & self -> key2id, KTrieIdxNodeCaptureID_v1, self );
773 }
774 }
775
776 if ( pb . rc == 0 )
777 KPTrieIndexWhack_v1 ( & self -> pt );
778 else if ( self -> id2node != NULL )
779 {
780 TrieForEach ( & self -> key2id,
781 KTrieIdxNodeUnlink_v1, & self -> key2id );
782 free ( self -> id2node );
783 self -> id2node = NULL;
784 self -> first = ~0;
785 self -> last = 0;
786 self -> len = 0;
787 }
788 return pb . rc;
789 }
790
791 /* insert string into trie, mapping to 32 bit id */
792 static
KTrieIndexExpandId2Node_v1(KTrieIndex_v1 * self,uint32_t range)793 rc_t KTrieIndexExpandId2Node_v1 ( KTrieIndex_v1 *self, uint32_t range )
794 {
795 KTrieIdxNode_v1 **id2node;
796 range = ( range + 4095 ) & - 4096;
797 id2node = realloc ( self -> id2node, range * sizeof id2node [ 0 ] );
798 if ( id2node == NULL )
799 return RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
800
801 self -> id2node = id2node;
802
803 #if ZERO_ID2NODE
804 /* why zero this when it is known to be invalid? */
805 memset ( id2node + self -> len, 0,
806 ( range - self -> len ) * sizeof id2node [ 0 ] );
807 #endif
808
809 self -> len = range;
810
811 return 0;
812 }
813
KTrieIndexInsert_v1(KTrieIndex_v1 * self,bool proj,const char * str,uint32_t id)814 rc_t KTrieIndexInsert_v1 ( KTrieIndex_v1 *self,
815 bool proj, const char *str, uint32_t id )
816 {
817 rc_t rc;
818
819 String key;
820 KTrieIdxNode_v1 *node;
821
822 #if DISABLE_PROJ
823 proj = false;
824 #endif
825
826 /* detect first modification */
827 if ( self -> last < self -> first )
828 {
829 /* detect persisted data */
830 if ( self -> pt . key2id != NULL )
831 {
832 rc = KTrieIndexAttach_v1 ( self, proj, id );
833 if ( rc != 0 )
834 return rc;
835 }
836
837 /* create empty projection array */
838 else if ( proj )
839 {
840 self -> id2node = malloc ( 4096 * sizeof self -> id2node [ 0 ] );
841 if ( self -> id2node == NULL )
842 return RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
843 self -> first = self -> last = id;
844 self -> len = 4096;
845 }
846 }
847
848 /* reject if already mapped */
849 else if ( self -> id2node != NULL &&
850 id >= self -> first &&
851 id <= self -> last &&
852 self -> id2node [ id - self -> first ] != NULL )
853 {
854 return RC ( rcDB, rcIndex, rcInserting, rcId, rcExists );
855 }
856 #if LIMIT_INSERTS && INSERT_LIMIT > 0
857 else if ( ( self -> last - self -> first ) >= ( INSERT_LIMIT - 1 ) )
858 {
859 return RC ( rcDB, rcIndex, rcUpdating, rcRange, rcExcessive );
860 }
861 #endif
862
863 StringInitCString ( & key, str );
864 rc = TNodeMake ( ( TNode** ) & node, sizeof * node + key . size );
865 if ( rc != 0 )
866 rc = RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
867 else
868 {
869 StringInit ( & node -> n . key, node -> key, key . size, key . len );
870 node -> id = id;
871 strcpy ( node -> key, str );
872
873 rc = TrieInsertUnique ( & self -> key2id, & node -> n, NULL );
874 if ( rc != 0 )
875 TNodeWhack ( & node -> n );
876 else if ( proj )
877 {
878 uint32_t range, offset;
879
880 if ( id < self -> first )
881 {
882 range = self -> last - id + 1;
883 if ( range > self -> len )
884 {
885 rc = KTrieIndexExpandId2Node_v1 ( self, range );
886 if ( rc != 0 )
887 {
888 TrieUnlink ( & self -> key2id, & node -> n );
889 KTrieIdxNodeWhack_v1 ( & node -> n, NULL );
890 return rc;
891 }
892 }
893
894 offset = self -> first - id;
895 memmove ( & self -> id2node [ offset ], self -> id2node,
896 ( self -> last - self -> first + 1 ) * sizeof self -> id2node [ 0 ] );
897 memset ( & self -> id2node [ 1 ], 0,
898 ( offset - 1 ) * sizeof self -> id2node [ 0 ] );
899
900 self -> first = id;
901 }
902 else if ( id > self -> last )
903 {
904 range = id - self -> first + 1;
905 if ( range > self -> len )
906 {
907 rc = KTrieIndexExpandId2Node_v1 ( self, range );
908 if ( rc != 0 )
909 {
910 TrieUnlink ( & self -> key2id, & node -> n );
911 KTrieIdxNodeWhack_v1 ( & node -> n, NULL );
912 return rc;
913 }
914 }
915
916 offset = id - 1 - self -> last;
917 if ( offset > 0 )
918 {
919 memset ( & self -> id2node [ self -> last - self -> first + 1 ],
920 0, offset * sizeof self -> id2node [ 0 ] );
921 }
922
923 self -> last = id;
924 }
925
926 assert ( self -> id2node != NULL );
927 self -> id2node [ id - self -> first ] = node;
928 }
929 }
930
931 return rc;
932 }
933
934 /* drop string from trie and all mappings */
KTrieIndexDelete_v1(KTrieIndex_v1 * self,bool proj,const char * str)935 rc_t KTrieIndexDelete_v1 ( KTrieIndex_v1 *self, bool proj, const char *str )
936 {
937 rc_t rc;
938
939 String key;
940 KTrieIdxNode_v1 *node;
941
942 #if DISABLE_PROJ
943 proj = 0;
944 #endif
945
946 /* detect first modification */
947 if ( self -> last < self -> first )
948 {
949 /* detect persisted data */
950 if ( self -> pt . key2id != NULL )
951 {
952 rc = KTrieIndexAttach_v1 ( self, proj, 0 );
953 if ( rc != 0 )
954 return rc;
955 }
956
957 /* create empty projection array */
958 else if ( proj )
959 {
960 self -> id2node = malloc ( 4096 * sizeof self -> id2node [ 0 ] );
961 if ( self -> id2node == NULL )
962 return RC ( rcDB, rcIndex, rcRemoving, rcMemory, rcExhausted );
963 self -> first = self -> last = 0;
964 self -> len = 4096;
965 }
966 }
967
968 /* interface states that all objects are dropped.
969 however, this implementation only allows unique
970 mappings, so a simple find is sufficient */
971 StringInitCString ( & key, str );
972 node = ( KTrieIdxNode_v1* ) TrieFind ( & self -> key2id, & key );
973 if ( node == NULL )
974 return RC ( rcDB, rcIndex, rcRemoving, rcString, rcNotFound );
975
976 /* drop from projection index */
977 if ( self -> id2node != NULL &&
978 node -> id >= self -> first &&
979 node -> id <= self -> last )
980 {
981 assert ( self -> id2node [ node -> id - self -> first ] == node );
982 if ( node -> id == self -> last )
983 {
984 if ( -- self -> last < self -> first )
985 {
986 free ( self -> id2node );
987 self -> id2node = NULL;
988 self -> len = 0;
989 }
990 }
991 else if ( node -> id == self -> first )
992 {
993 memmove ( self -> id2node, self -> id2node + 1,
994 ( self -> last - self -> first ) * sizeof self -> id2node [ 0 ] );
995 ++ self -> first;
996 }
997 else
998 {
999 self -> id2node [ node -> id - self -> first ] = NULL;
1000 }
1001 }
1002
1003 TrieUnlink ( & self -> key2id, & node -> n );
1004 KTrieIdxNodeWhack_v1 ( & node -> n, NULL );
1005
1006 return 0;
1007 }
1008
1009 /* map key to id ( was Key2Id ) */
KTrieIndexFind_v1(const KTrieIndex_v1 * self,const char * str,uint32_t * id,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data)1010 rc_t KTrieIndexFind_v1 ( const KTrieIndex_v1 *self, const char *str, uint32_t *id,
1011 int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void * data )
1012 {
1013 String key;
1014
1015 if ( self -> last < self -> first )
1016 {
1017 if ( self -> pt . key2id != NULL )
1018 {
1019 PTNode n;
1020 uint32_t nid;
1021 StringInitCString ( & key, str );
1022 nid = PTrieFind ( self -> pt . key2id, & key, & n, custom_cmp ,data);
1023 if ( nid != 0 )
1024 {
1025 assert ( n . data . size == sizeof * id );
1026 memmove ( id, n . data . addr, sizeof * id );
1027 return 0;
1028 }
1029 }
1030 }
1031 else
1032 {
1033 const KTrieIdxNode_v1 *n;
1034
1035 StringInitCString ( & key, str );
1036 n = ( const KTrieIdxNode_v1* ) TrieFind ( & self -> key2id, & key );
1037 if ( n != NULL )
1038 {
1039 * id = n -> id;
1040 return 0;
1041 }
1042 }
1043
1044 return RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
1045 }
1046
1047 /* projection index id to key-string ( was Id2Key ) */
KTrieIndexProject_v1(const KTrieIndex_v1 * self,uint32_t id,char * key_buff,size_t buff_size,size_t * actsize)1048 rc_t KTrieIndexProject_v1 ( const KTrieIndex_v1 *self,
1049 uint32_t id, char *key_buff, size_t buff_size, size_t *actsize )
1050 {
1051 if ( self -> last < self -> first )
1052 {
1053 if ( self -> pt . id2node != NULL &&
1054 id >= self -> pt . first &&
1055 id <= self -> pt . last )
1056 {
1057 PTNode n;
1058 uint32_t node = self -> pt . id2node [ id - self -> pt . first ];
1059 rc_t rc = PTrieGetNode ( self -> pt . key2id,
1060 & n, self -> pt . byteswap ? bswap_32 ( node ) : node );
1061 if ( rc == 0 )
1062 {
1063 const String *key;
1064 rc = PTNodeMakeKey ( & n, & key );
1065 if ( rc == 0 )
1066 {
1067 if (actsize)
1068 *actsize = key -> size;
1069
1070 if ( key -> size >= buff_size )
1071 rc = RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
1072 else
1073 string_copy ( key_buff, buff_size, key -> addr, key -> size );
1074
1075 StringWhack ( ( String* ) key );
1076 return rc;
1077 }
1078 }
1079 }
1080 }
1081 else
1082 {
1083 if ( self -> id2node != NULL &&
1084 id >= self -> first &&
1085 id <= self -> last )
1086 {
1087 const KTrieIdxNode_v1 *n = self -> id2node [ id - self -> first ];
1088 if ( n != NULL )
1089 {
1090 if ( n -> n . key . size >= buff_size )
1091 return RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
1092 string_copy ( key_buff, buff_size,
1093 n -> n . key . addr, n -> n . key . size );
1094 return 0;
1095 }
1096 }
1097 }
1098
1099 return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
1100 }
1101