1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <kdb/extern.h>
28 #include "windex-priv.h"
29 #include "kdbfmt-priv.h"
30 #include <klib/ptrie.h>
31 #include <klib/text.h>
32 #include <kfs/directory.h>
33 #include <kfs/file.h>
34 #include <kfs/md5.h>
35 #include <kfs/mmap.h>
36 #include <klib/rc.h>
37 #include <os-native.h>
38 #include <sysalloc.h>
39 
40 #include <stdlib.h>
41 #include <limits.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <errno.h>
45 #include <byteswap.h>
46 #include <assert.h>
47 
48 #define DISABLE_PROJ 0
49 #define LIMIT_INSERTS 0
50 
51 #if LIMIT_INSERTS
52 #define INSERT_LIMIT 100000000
53 #endif
54 
55 /*--------------------------------------------------------------------------
56  * KPTrieIndex_v1
57  *  persisted keymap
58  */
59 
60 /* KPTrieIndexInit
61  *  opens and initializes persisted keymap structure
62  */
KPTrieIndexInit_v1(KPTrieIndex_v1 * self,const KMMap * mm,bool byteswap)63 rc_t KPTrieIndexInit_v1 ( KPTrieIndex_v1 *self, const KMMap *mm, bool byteswap )
64 {
65     size_t size;
66     rc_t rc = KMMapSize ( mm, & size );
67     if ( rc == 0 )
68     {
69         const KDBHdr *hdr;
70         rc = KMMapAddrRead ( mm, ( const void** ) & hdr );
71         if ( rc == 0 )
72         {
73             /* try to create the pttree */
74             rc = PTrieMakeOrig ( & self -> key2id,
75                 hdr + 1, size -= sizeof * hdr, byteswap );
76             if ( rc == 0 )
77             {
78                 size_t ptsize = PTrieSize ( self -> key2id );
79                 if ( ptsize <= size )
80                 {
81                     /* just in case */
82                     self -> mm = NULL;
83 
84                     /* record for projection */
85                     self -> byteswap = byteswap;
86 
87                     /* it could be stored without projection */
88                     if ( ptsize == size )
89                     {
90                         self -> id2node = NULL;
91                         self -> first = self -> last = 0;
92                         return 0;
93                     }
94 
95                     /* assume this is projection index */
96                     self -> id2node = ( void* )
97                         ( ( char* ) ( hdr + 1 ) + ptsize );
98                     size -= ptsize;
99 
100                     /* it must have at least 4 bytes
101                        and be 4 byte aligned */
102                     if ( size >= sizeof ( uint32_t ) && ( size & 3 ) == 0 )
103                     {
104                         /* first entry is starting key
105                            remaining entries are node ids */
106                         self -> first = * self -> id2node ++;
107                         size -= sizeof self -> id2node [ 0 ];
108                         if ( size == 0 )
109                         {
110                             /* forget if empty */
111                             self -> id2node = NULL;
112                             self -> first = self -> last = 0;
113                             return 0;
114                         }
115                         /* remaining entries */
116                         self -> last = self -> first + ( size >> 2 ) - 1;
117                         return 0;
118                     }
119                 }
120 
121                 PTrieWhack ( self -> key2id );
122                 self -> key2id = NULL;
123 
124                 rc = RC ( rcDB, rcIndex, rcConstructing, rcTrie, rcCorrupt );
125             }
126         }
127     }
128 
129     return rc;
130 }
131 
132 /* KPTrieIndexWhack_v1
133  *  closes down keymap
134  */
KPTrieIndexWhack_v1(KPTrieIndex_v1 * self)135 void KPTrieIndexWhack_v1 ( KPTrieIndex_v1 *self )
136 {
137     PTrieWhack ( self -> key2id );
138     KMMapRelease ( self -> mm );
139     memset ( self, 0, sizeof * self );
140 }
141 
142 
143 /*--------------------------------------------------------------------------
144  * KTrieIdxNode_v1
145  */
146 
147 static
KTrieIdxNodeWhack_v1(TNode * n,void * data)148 void CC KTrieIdxNodeWhack_v1 ( TNode *n, void *data )
149 {
150     TNodeWhack ( n );
151 }
152 
153 static
KTrieIdxNodeUnlink_v1(TNode * n,void * data)154 void CC KTrieIdxNodeUnlink_v1 ( TNode *n, void *data )
155 {
156     if ( TrieUnlink ( data, n ) )
157         TNodeWhack ( n );
158 }
159 
160 static
KTrieIdxNodeCaptureID_v1(TNode * n,void * data)161 void CC KTrieIdxNodeCaptureID_v1 ( TNode *n, void *data )
162 {
163     KTrieIndex_v1 *self = data;
164     KTrieIdxNode_v1 *node = ( KTrieIdxNode_v1* ) n;
165     self -> id2node [ node -> id - self -> first ] = node;
166 }
167 
168 
169 /*--------------------------------------------------------------------------
170  * KTrieIndex_v1
171  */
172 
173 /* KTrieIndexWrite_v1
174  */
175 typedef struct PersistTrieData PersistTrieData;
176 struct PersistTrieData
177 {
178     uint64_t pos;
179     KFile *f;
180     uint8_t *buffer;
181     size_t bsize;
182     size_t marker;
183 
184     size_t ptt_size;
185     uint32_t first;
186     uint32_t last;
187     rc_t rc;
188 };
189 
190 static
KTrieIndexWrite_v1(void * param,const void * buffer,size_t size,size_t * num_writ)191 rc_t CC KTrieIndexWrite_v1 ( void *param,
192     const void *buffer, size_t size, size_t *num_writ )
193 {
194     PersistTrieData *pb = param;
195     size_t total, to_write;
196 
197     for ( total = 0; total < size; total += to_write )
198     {
199         to_write = size - total;
200         if ( pb -> marker + to_write > pb -> bsize )
201             to_write = pb -> bsize - pb -> marker;
202 
203         if ( to_write > 0 )
204         {
205             memmove ( pb -> buffer + pb -> marker,
206                 ( const uint8_t* ) buffer + total, to_write );
207             pb -> marker += to_write;
208         }
209 
210         if ( pb -> marker == pb -> bsize )
211         {
212             size_t num_flushed;
213             pb -> rc = KFileWrite ( pb -> f, pb -> pos,
214                 pb -> buffer, pb -> bsize, & num_flushed );
215             if ( pb -> rc != 0 )
216             {
217                 * num_writ = 0;
218                 return pb -> rc;
219             }
220 
221             if ( num_flushed == 0 )
222             {
223                 * num_writ = total + to_write;
224                 return pb -> rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
225             }
226 
227             pb -> marker -= num_flushed;
228             pb -> pos += num_flushed;
229 
230             if ( pb -> marker != 0 )
231                 memmove ( pb -> buffer, pb -> buffer + num_flushed, pb -> marker );
232         }
233     }
234 
235     * num_writ = total;
236     return 0;
237 }
238 
239 /* KTrieIndexAux_v1
240  */
241 static
KTrieIndexAux_v1(void * param,const void * node,size_t * num_writ,PTWriteFunc write,void * write_param)242 rc_t CC KTrieIndexAux_v1 ( void *param, const void *node, size_t *num_writ,
243     PTWriteFunc write, void *write_param )
244 {
245     const KTrieIdxNode_v1 *n = node;
246 
247     if ( write != NULL )
248     {
249         PersistTrieData *pb = param;
250         if ( n -> id < pb -> first )
251             pb -> first = n -> id;
252         if ( n -> id > pb -> last )
253             pb -> last = n -> id;
254 
255         return ( * write ) ( write_param, & n -> id, sizeof n -> id, num_writ );
256     }
257 
258     * num_writ = sizeof n -> id;
259     return 0;
260 }
261 
262 /* KTrieIndexPersist_v1
263  *  write keymap to indicated location
264  */
265 static
KTrieIndexPersistTrie_v1(const KTrieIndex_v1 * self,PersistTrieData * pb)266 rc_t KTrieIndexPersistTrie_v1 ( const KTrieIndex_v1 *self, PersistTrieData *pb )
267 {
268     rc_t rc;
269     KDBHdr *hdr;
270 
271     pb -> pos = 0;
272 
273     hdr = ( KDBHdr* ) pb -> buffer;
274     KDBHdrInit ( hdr, 1 );
275     pb -> marker = sizeof * hdr;
276 
277     /* persist the trie to file,
278        using tree-internal key storage,
279        capture the image size in pb */
280     rc = TriePersist ( & self -> key2id, & pb -> ptt_size,
281         false, KTrieIndexWrite_v1, pb, KTrieIndexAux_v1, pb );
282     if ( rc == 0 && pb -> marker != 0 )
283     {
284         size_t num_writ;
285         rc = KFileWrite ( pb -> f, pb -> pos,
286             pb -> buffer, pb -> marker, & num_writ );
287         if ( rc == 0 && num_writ != pb -> marker )
288             rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
289     }
290 
291     return rc;
292 }
293 
294 
295 typedef struct PersistReverseData PersistReverseData;
296 struct PersistReverseData
297 {
298     PTrie *tt;
299     uint32_t *a;
300     uint32_t first;
301     uint32_t count;
302     uint32_t notfound;
303 };
304 
305 static
KTrieIndexRecordNodeId_v1(TNode * node,void * data)306 void CC KTrieIndexRecordNodeId_v1 ( TNode *node, void *data )
307 {
308     PTNode pn;
309     PersistReverseData *pb = data;
310     const KTrieIdxNode_v1 *n = ( const KTrieIdxNode_v1* ) node;
311 
312     /* lookup up the persisted node within image */
313     uint32_t id = PTrieFind ( pb -> tt, & n -> n . key, & pn, NULL, NULL );
314 
315     /* write it into array */
316     pb -> a [ n -> id - pb -> first ] = ( uint32_t ) id;
317 
318     if ( id == 0 )
319         ++ pb -> notfound;
320     else
321         ++ pb -> count;
322 }
323 
324 static
KTrieIndexPersistProj_v1(const KTrieIndex_v1 * self,PersistTrieData * pb)325 rc_t KTrieIndexPersistProj_v1 ( const KTrieIndex_v1 *self, PersistTrieData *pb )
326 {
327 #if 0
328     rc_t rc;
329     KMMap *mmr;
330     size_t map_size;
331 
332     /* there must be something to do */
333     if ( pb -> first > pb -> last )
334         return 0;
335 
336 
337     /* open a read-write map onto file
338        start at offset 0, which is the header,
339        followed by pb -> ptt_size bytes which is the tree,
340        and add on a slot for first id,
341        followed by last - first + 1 slots for id to node id map */
342     map_size = pb -> ptt_size + ( ( pb -> last - pb -> first + 2 ) << 2 );
343 
344     rc = KMMapMakeRgnUpdate ( & mmr, pb -> f, sizeof ( KDBHdr ), map_size );
345     if ( rc == 0 )
346     {
347         void *addr;
348         PersistReverseData pb2;
349 
350         rc = KMMapAddrUpdate ( mmr, & addr );
351         if ( rc == 0 )
352         {
353             rc = PTrieMakeOrig ( & pb2 . tt, addr, pb -> ptt_size );
354             if ( rc == 0 )
355             {
356                 assert ( pb -> ptt_size == PTrieSize ( pb2 . tt ) );
357                 pb2 . a = ( void* ) ( ( char* ) addr + pb -> ptt_size );
358                 assert ( ( ( size_t ) pb2 . a & 3 ) == 0 );
359 
360                 /* record first id */
361                 * pb2 . a ++ = pb -> first;
362                 pb2 . first = pb -> first;
363                 pb2 . count = pb2 . notfound = 0;
364 
365                 /* record all id to node mappings */
366                 TrieForEach ( & self -> key2id, KTrieIndexRecordNodeId_v1, & pb2 );
367 
368                 /* check for having written ids */
369                 assert ( pb2 . count == PTrieCount ( pb2 . tt ) );
370 
371                 /* done with pttree */
372                 PTrieWhack ( pb2 . tt );
373 
374                 /* if there were nodes not found,
375                    the initial persist was bad */
376                 if ( pb2 . notfound != 0 )
377                     rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
378             }
379         }
380 
381         /* done with map - commits changes to disk */
382         KMMapRelease ( mmr );
383 
384         /* truncate file to desired size */
385         KFileSetSize ( pb -> f, map_size + sizeof ( KDBHdr ) );
386     }
387 
388     return rc;
389 #else
390     rc_t rc;
391     void * addr;
392     uint64_t file_size;
393     size_t num_to_read;
394     size_t map_size;
395 
396     /* there must be something to do */
397     if ( pb -> first > pb -> last )
398         return 0;
399 
400     /* open a read-write map onto file
401        start at offset 0, which is the header,
402        followed by pb -> ptt_size bytes which is the tree,
403        and add on a slot for first id,
404        followed by last - first + 1 slots for id to node id map */
405     map_size = pb -> ptt_size + ( ( pb -> last - pb -> first + 2 ) << 2 );
406 
407     rc = KFileSize ( pb -> f, & file_size );
408     if ( rc == 0 )
409     {
410         addr = malloc ( map_size );
411         if ( addr == NULL )
412             rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
413         else
414         {
415             size_t num_read;
416             num_to_read = file_size - sizeof ( KDBHdr );
417             rc = KFileReadAll ( pb -> f, sizeof ( KDBHdr ),
418                 addr, num_to_read, & num_read );
419             if ( rc != 0 )
420                 free ( addr );
421             else if ( num_read != num_to_read )
422             {
423                 free ( addr );
424                 rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
425             }
426         }
427     }
428 
429     if ( rc == 0 )
430     {
431         size_t num_writ;
432         PersistReverseData pb2;
433 
434         rc = PTrieMakeOrig ( & pb2 . tt, addr, pb -> ptt_size, false );
435         if ( rc == 0 )
436         {
437             assert ( pb -> ptt_size == PTrieSize ( pb2 . tt ) );
438             pb2 . a = ( void* ) ( ( char* ) addr + pb -> ptt_size );
439             assert ( ( ( size_t ) pb2 . a & 3 ) == 0 );
440 
441             /* record first id */
442             * pb2 . a ++ = pb -> first;
443             pb2 . first = pb -> first;
444             pb2 . count = pb2 . notfound = 0;
445 
446             /* record all id to node mappings */
447             TrieForEach ( & self -> key2id, KTrieIndexRecordNodeId_v1, & pb2 );
448 
449             /* check for having written ids */
450             assert ( pb2 . count == PTrieCount ( pb2 . tt ) );
451 
452             /* done with pttree */
453             PTrieWhack ( pb2 . tt );
454 
455             /* if there were nodes not found,
456                the initial persist was bad */
457             if ( pb2 . notfound != 0 )
458                 rc = RC ( rcDB, rcIndex, rcPersisting, rcTransfer, rcIncomplete );
459             else
460             {
461                 rc = KFileWrite ( pb -> f, file_size, ( uint8_t* ) addr + num_to_read,
462                     map_size - num_to_read, & num_writ );
463                 if ( rc == 0  && num_writ != map_size - num_to_read )
464                     rc = RC ( rcDB, rcIndex, rcPersisting, rcHeader, rcInsufficient );
465             }
466         }
467 
468         /* done with map - commits changes to disk */
469         free ( addr );
470 
471         /* truncate file to desired size */
472         KFileSetSize ( pb -> f, map_size + sizeof ( KDBHdr ) );
473     }
474 
475     return rc;
476 #endif
477 }
478 
KTrieIndexPersist_v1(const KTrieIndex_v1 * self,bool proj,KDirectory * dir,const char * path,bool use_md5)479 rc_t KTrieIndexPersist_v1 ( const KTrieIndex_v1 *self,
480     bool proj, KDirectory *dir, const char *path, bool use_md5 )
481 {
482     rc_t rc;
483     PersistTrieData pb;
484 
485     assert ( self != NULL );
486 
487     pb . buffer = malloc ( pb . bsize = 32 * 1024 );
488     if ( pb . buffer == NULL )
489         rc = RC ( rcDB, rcIndex, rcPersisting, rcMemory, rcExhausted );
490     else
491     {
492         char tmpname [ 256 ];
493         char tmpmd5name [ 256 ];
494         char md5path [ 256 ];
495         rc = KDirectoryResolvePath ( dir, false,
496             tmpname, sizeof tmpname, "%s.tmp", path );
497         if ( rc == 0 )
498         {
499             rc = KDirectoryCreateFile ( dir, & pb . f,
500                                          true, 0664, kcmInit, "%s", tmpname );
501 
502 	    if (use_md5 && rc == 0 )
503 	    {
504 		size_t tmplen = snprintf ( tmpmd5name, sizeof tmpmd5name, "%s.md5", tmpname );
505 		KFile * kf;
506 		KMD5SumFmt *km;
507 		if ( tmplen >= sizeof ( tmpmd5name ) ) /* can't be == or no NUL */
508 		{
509 		    rc = RC ( rcDB, rcIndex, rcPersisting, rcName, rcExcessive );
510 		}
511 		else
512 		{
513 		    tmplen = snprintf ( md5path, sizeof md5path, "%s.md5", path );
514 
515 		    if ( tmplen >= sizeof ( md5path ) ) /* can't be == or no NUL */
516 		    {
517 			rc = RC ( rcDB, rcIndex, rcPersisting, rcName, rcExcessive );
518 		    }
519 		    else
520 		    {
521 			rc = KDirectoryCreateFile ( dir, &kf, true, 0664,
522                                          kcmInit, "%s", tmpmd5name );
523 			if ( rc == 0 )
524 			{
525 			    rc = KMD5SumFmtMakeUpdate ( &km, kf );
526 			    if ( rc == 0 )
527 			    {
528 				KMD5File * k5f;
529 				kf = NULL;
530 				rc = KMD5FileMakeWrite ( &k5f, pb . f, km, path );
531 				if ( rc == 0 )
532 				{
533 				    pb . f = KMD5FileToKFile ( k5f );
534 				}
535 				/* release pass or fail */
536 				KMD5SumFmtRelease ( km );
537 			    }
538 			    else
539 				KFileRelease ( kf );
540 			}
541 			else
542 			    KFileRelease ( kf );
543 		    }
544 		}
545 		if ( rc != 0 )
546 		    KFileRelease ( pb . f );
547 	    }
548 
549             if ( rc == 0 )
550             {
551                 pb . ptt_size = 0;
552                 pb . first = ~ 0;
553                 pb . last = 0;
554 
555                 rc = KTrieIndexPersistTrie_v1 ( self, & pb );
556                 if ( rc == 0 )
557                 {
558                     if ( proj )
559                         rc = KTrieIndexPersistProj_v1 ( self, & pb );
560                 }
561 
562                 KFileRelease ( pb . f );
563                 pb . f = NULL;
564             }
565         }
566 
567         free ( pb . buffer );
568         pb . buffer = NULL;
569 
570         if ( rc == 0 )
571         {
572             rc = KDirectoryRename ( dir, false, tmpname, path );
573             if ( rc == 0 )
574             {
575 		if ( use_md5 )
576 		    rc = KDirectoryRename ( dir, false, tmpmd5name, md5path );
577 		if ( rc == 0 )
578 		{
579 		    /* done */
580 		    return 0;
581 		}
582             }
583         }
584 
585         /* whack temporary file */
586         KDirectoryRemove ( dir, false, "%s", tmpname );
587 	if ( use_md5 )
588 	    KDirectoryRemove ( dir, false, "%s", tmpmd5name );
589     }
590 
591     return rc;
592 }
593 
594 
595 /* whack whack */
KTrieIndexWhack_v1(KTrieIndex_v1 * self)596 void KTrieIndexWhack_v1 ( KTrieIndex_v1 *self )
597 {
598     KPTrieIndexWhack_v1 ( & self -> pt );
599     TrieWhack ( & self -> key2id, KTrieIdxNodeWhack_v1, NULL );
600 }
601 
602 /* initialize an index from file - can be NULL */
KTrieIndexOpen_v1(KTrieIndex_v1 * self,const KMMap * mm,bool byteswap)603 rc_t KTrieIndexOpen_v1 ( KTrieIndex_v1 *self, const KMMap *mm, bool byteswap )
604 {
605     rc_t rc;
606 
607     memset ( self, 0, sizeof * self );
608     rc = TrieInit ( & self -> key2id, "0-9", 512, true );
609     if ( rc != 0 )
610         return rc;
611 
612     self -> first = ~ 0;
613 
614     if ( mm == NULL )
615         return 0;
616 
617     rc = KPTrieIndexInit_v1 ( & self -> pt, mm, byteswap );
618     if ( rc == 0 )
619     {
620         rc = KMMapAddRef ( mm );
621         if ( rc == 0 )
622         {
623             self -> pt . mm = mm;
624             return 0;
625         }
626     }
627 
628     KTrieIndexWhack_v1 ( self );
629     return rc;
630 }
631 
632 /* KTrieIndexPopulate_v1
633  */
634 typedef struct KTrieIndexPopulateData_v1 KTrieIndexPopulateData_v1;
635 struct KTrieIndexPopulateData_v1
636 {
637     KTrieIndex_v1 *idx;
638     uint32_t id;
639     rc_t rc;
640 };
641 
642 static
KTrieIndexPopulate_v1(PTNode * n,void * data)643 bool CC KTrieIndexPopulate_v1 ( PTNode *n, void *data )
644 {
645     const String *key;
646     KTrieIndexPopulateData_v1 *pb = data;
647 
648     /* capture node id */
649     uint32_t id;
650     assert ( n -> data . size == sizeof id );
651     memmove ( & id, n -> data . addr, sizeof id );
652 
653     /* reject already mapped */
654     if ( id == pb -> id )
655     {
656         pb -> rc = RC ( rcDB, rcIndex, rcConstructing, rcNode, rcExists );
657         return true;
658     }
659 
660     pb -> rc = PTNodeMakeKey ( n, & key );
661     if ( pb -> rc == 0 )
662     {
663         KTrieIdxNode_v1 *node;
664         pb -> rc = TNodeMake ( ( TNode** ) & node,
665             sizeof * node + key -> size );
666         if ( pb -> rc == 0 )
667         {
668             KTrieIndex_v1 *self = pb -> idx;
669 
670             StringInit ( & node -> n . key, node -> key, key -> size, key -> len );
671             node -> id = id;
672             string_copy ( node -> key, key -> size + 1,
673                 key -> addr, key -> size );
674 
675             pb -> rc = TrieInsert ( & self -> key2id, & node -> n );
676             if ( pb -> rc == 0 )
677             {
678                 free ( ( void* ) key );
679 
680                 /* if copying projection index */
681                 if ( self -> id2node != NULL )
682                 {
683                     if ( self -> pt . id2node != NULL )
684                         self -> id2node [ node -> id - self -> pt . first ] = node;
685                     else
686                     {
687                         if ( node -> id < self -> first )
688                             self -> first = node -> id;
689                         if ( node -> id > self -> last )
690                             self -> last = node -> id;
691                     }
692                 }
693                 return 0;
694             }
695 
696             TNodeWhack ( & node -> n );
697         }
698 
699         StringWhack ( ( String* ) key );
700     }
701 
702     return true;
703 }
704 
705 /* KTrieIndexAttach_v1
706  *  attach a keymap to an existing table
707  *
708  *  "pkm" [ IN ] - a persisted keymap
709  */
710 static
KTrieIndexAttach_v1(KTrieIndex_v1 * self,bool proj,uint32_t id)711 rc_t KTrieIndexAttach_v1 ( KTrieIndex_v1 *self, bool proj, uint32_t id )
712 {
713     uint32_t proj_len;
714     KTrieIndexPopulateData_v1 pb;
715 
716 #if LIMIT_INSERTS
717     proj_len = self -> pt . last - self -> pt . first + 1;
718 #endif
719 
720     /* see if we can use existing projection index */
721     if ( proj && self -> pt . id2node != NULL )
722     {
723         /* reject if already mapped */
724         if ( id != 0 &&
725              id >= self -> pt . first &&
726              id <= self -> pt . last &&
727              self -> pt . id2node [ id - self -> pt . first ] != 0 )
728         {
729             return RC ( rcDB, rcIndex, rcUpdating, rcId, rcExists );
730         }
731 
732         /* allocate index array */
733 #if LIMIT_INSERTS
734         if ( proj_len > INSERT_LIMIT )
735             return RC ( rcDB, rcIndex, rcUpdating, rcRange, rcExcessive );
736 #else
737         proj_len = self -> pt . last - self -> pt . first + 1;
738 #endif
739         proj_len = ( proj_len + 4095 ) & - 4096;
740         self -> id2node = calloc ( proj_len, sizeof self -> id2node [ 0 ] );
741         if ( self -> id2node == NULL )
742             return RC ( rcDB, rcIndex, rcUpdating, rcMemory, rcExhausted );
743 
744         /* record known dimensions */
745         self -> first = self -> pt . first;
746         self -> last = self -> pt . last;
747         self -> len = proj_len;
748     }
749 
750 #if LIMIT_INSERTS
751     else if ( proj_len > INSERT_LIMIT )
752         return RC ( rcDB, rcIndex, rcUpdating, rcRange, rcExcessive );
753 #endif
754 
755     /* inflate persisted trie */
756     pb . idx = self;
757     pb . id = id;
758     pb . rc = 0;
759     PTrieDoUntil ( self -> pt . key2id, KTrieIndexPopulate_v1, & pb );
760 
761     /* if successful but needing to add projection index */
762     if ( pb . rc == 0 && proj && self -> id2node == NULL )
763     {
764         proj_len = self -> last - self -> first + 1;
765         proj_len = ( proj_len + 4095 ) & - 4096;
766         self -> id2node = calloc ( proj_len, sizeof self -> id2node [ 0 ] );
767         if ( self -> id2node == NULL )
768             pb . rc = RC ( rcDB, rcIndex, rcUpdating, rcMemory, rcExhausted );
769         else
770         {
771             self -> len = proj_len;
772             TrieForEach ( & self -> key2id, KTrieIdxNodeCaptureID_v1, self );
773         }
774     }
775 
776     if ( pb . rc == 0 )
777         KPTrieIndexWhack_v1 ( & self -> pt );
778     else if ( self -> id2node != NULL )
779     {
780         TrieForEach ( & self -> key2id,
781             KTrieIdxNodeUnlink_v1, & self -> key2id );
782         free ( self -> id2node );
783         self -> id2node = NULL;
784         self -> first = ~0;
785         self -> last = 0;
786         self -> len = 0;
787     }
788     return pb . rc;
789 }
790 
791 /* insert string into trie, mapping to 32 bit id */
792 static
KTrieIndexExpandId2Node_v1(KTrieIndex_v1 * self,uint32_t range)793 rc_t KTrieIndexExpandId2Node_v1 ( KTrieIndex_v1 *self, uint32_t range )
794 {
795     KTrieIdxNode_v1 **id2node;
796     range = ( range + 4095 ) & - 4096;
797     id2node = realloc ( self -> id2node, range * sizeof id2node [ 0 ] );
798     if ( id2node == NULL )
799         return RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
800 
801     self -> id2node = id2node;
802 
803 #if ZERO_ID2NODE
804     /* why zero this when it is known to be invalid? */
805     memset ( id2node + self -> len, 0,
806         ( range - self -> len ) * sizeof id2node [ 0 ] );
807 #endif
808 
809     self -> len = range;
810 
811     return 0;
812 }
813 
KTrieIndexInsert_v1(KTrieIndex_v1 * self,bool proj,const char * str,uint32_t id)814 rc_t KTrieIndexInsert_v1 ( KTrieIndex_v1 *self,
815     bool proj, const char *str, uint32_t id )
816 {
817     rc_t rc;
818 
819     String key;
820     KTrieIdxNode_v1 *node;
821 
822 #if DISABLE_PROJ
823     proj = false;
824 #endif
825 
826     /* detect first modification */
827     if ( self -> last < self -> first )
828     {
829         /* detect persisted data */
830         if ( self -> pt . key2id != NULL )
831         {
832             rc = KTrieIndexAttach_v1 ( self, proj, id );
833             if ( rc != 0 )
834                 return rc;
835         }
836 
837         /* create empty projection array */
838         else if ( proj )
839         {
840             self -> id2node = malloc ( 4096 * sizeof self -> id2node [ 0 ] );
841             if ( self -> id2node == NULL )
842                 return RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
843             self -> first = self -> last = id;
844             self -> len = 4096;
845         }
846     }
847 
848     /* reject if already mapped */
849     else if ( self -> id2node != NULL &&
850               id >= self -> first &&
851               id <= self -> last &&
852               self -> id2node [ id - self -> first ] != NULL )
853     {
854         return RC ( rcDB, rcIndex, rcInserting, rcId, rcExists );
855     }
856 #if LIMIT_INSERTS && INSERT_LIMIT > 0
857     else if ( ( self -> last - self -> first ) >= ( INSERT_LIMIT - 1 ) )
858     {
859         return RC ( rcDB, rcIndex, rcUpdating, rcRange, rcExcessive );
860     }
861 #endif
862 
863     StringInitCString ( & key, str );
864     rc = TNodeMake ( ( TNode** ) & node, sizeof * node + key . size );
865     if ( rc != 0 )
866         rc = RC ( rcDB, rcIndex, rcInserting, rcMemory, rcExhausted );
867     else
868     {
869         StringInit ( & node -> n . key, node -> key, key . size, key . len );
870         node -> id = id;
871         strcpy ( node -> key, str );
872 
873         rc = TrieInsertUnique ( & self -> key2id, & node -> n, NULL );
874         if ( rc != 0 )
875             TNodeWhack ( & node -> n );
876         else if ( proj )
877         {
878             uint32_t range, offset;
879 
880             if ( id < self -> first )
881             {
882                 range = self -> last - id + 1;
883                 if ( range > self -> len )
884                 {
885                     rc = KTrieIndexExpandId2Node_v1 ( self, range );
886                     if ( rc != 0 )
887                     {
888                         TrieUnlink ( & self -> key2id, & node -> n );
889                         KTrieIdxNodeWhack_v1 ( & node -> n, NULL );
890                         return rc;
891                     }
892                 }
893 
894                 offset = self -> first - id;
895                 memmove ( & self -> id2node [ offset ], self -> id2node,
896                     ( self -> last - self -> first + 1 ) * sizeof self -> id2node [ 0 ] );
897                 memset ( & self -> id2node [ 1 ], 0,
898                     ( offset - 1 ) * sizeof self -> id2node [ 0 ] );
899 
900                 self -> first = id;
901             }
902             else if ( id > self -> last )
903             {
904                 range = id - self -> first + 1;
905                 if ( range > self -> len )
906                 {
907                     rc = KTrieIndexExpandId2Node_v1 ( self, range );
908                     if ( rc != 0 )
909                     {
910                         TrieUnlink ( & self -> key2id, & node -> n );
911                         KTrieIdxNodeWhack_v1 ( & node -> n, NULL );
912                         return rc;
913                     }
914                 }
915 
916                 offset = id - 1 - self -> last;
917                 if ( offset > 0 )
918                 {
919                     memset ( & self -> id2node [ self -> last - self -> first + 1 ],
920                         0, offset * sizeof self -> id2node [ 0 ] );
921                 }
922 
923                 self -> last = id;
924             }
925 
926             assert ( self -> id2node != NULL );
927             self -> id2node [ id - self -> first ] = node;
928         }
929     }
930 
931     return rc;
932 }
933 
934 /* drop string from trie and all mappings */
KTrieIndexDelete_v1(KTrieIndex_v1 * self,bool proj,const char * str)935 rc_t KTrieIndexDelete_v1 ( KTrieIndex_v1 *self, bool proj, const char *str )
936 {
937     rc_t rc;
938 
939     String key;
940     KTrieIdxNode_v1 *node;
941 
942 #if DISABLE_PROJ
943     proj = 0;
944 #endif
945 
946     /* detect first modification */
947     if ( self -> last < self -> first )
948     {
949         /* detect persisted data */
950         if ( self -> pt . key2id != NULL )
951         {
952             rc = KTrieIndexAttach_v1 ( self, proj, 0 );
953             if ( rc != 0 )
954                 return rc;
955         }
956 
957         /* create empty projection array */
958         else if ( proj )
959         {
960             self -> id2node = malloc ( 4096 * sizeof self -> id2node [ 0 ] );
961             if ( self -> id2node == NULL )
962                 return RC ( rcDB, rcIndex, rcRemoving, rcMemory, rcExhausted );
963             self -> first = self -> last = 0;
964             self -> len = 4096;
965         }
966     }
967 
968     /* interface states that all objects are dropped.
969        however, this implementation only allows unique
970        mappings, so a simple find is sufficient */
971     StringInitCString ( & key, str );
972     node = ( KTrieIdxNode_v1* ) TrieFind ( & self -> key2id, & key );
973     if ( node == NULL )
974         return RC ( rcDB, rcIndex, rcRemoving, rcString, rcNotFound );
975 
976     /* drop from projection index */
977     if ( self -> id2node != NULL &&
978          node -> id >= self -> first &&
979          node -> id <= self -> last )
980     {
981         assert ( self -> id2node [ node -> id - self -> first ] == node );
982         if ( node -> id == self -> last )
983         {
984             if ( -- self -> last < self -> first )
985             {
986                 free ( self -> id2node );
987                 self -> id2node = NULL;
988                 self -> len = 0;
989             }
990         }
991         else if ( node -> id == self -> first )
992         {
993             memmove ( self -> id2node, self -> id2node + 1,
994                 ( self -> last - self -> first ) * sizeof self -> id2node [ 0 ] );
995             ++ self -> first;
996         }
997         else
998         {
999             self -> id2node [ node -> id - self -> first ] = NULL;
1000         }
1001     }
1002 
1003     TrieUnlink ( & self -> key2id, & node -> n );
1004     KTrieIdxNodeWhack_v1 ( & node -> n, NULL );
1005 
1006     return 0;
1007 }
1008 
1009 /* map key to id ( was Key2Id ) */
KTrieIndexFind_v1(const KTrieIndex_v1 * self,const char * str,uint32_t * id,int (CC * custom_cmp)(const void * item,const PBSTNode * n,void * data),void * data)1010 rc_t KTrieIndexFind_v1 ( const KTrieIndex_v1 *self, const char *str, uint32_t *id,
1011     int ( CC * custom_cmp ) ( const void *item, const PBSTNode *n, void *data ), void * data )
1012 {
1013     String key;
1014 
1015     if ( self -> last < self -> first )
1016     {
1017         if ( self -> pt . key2id != NULL )
1018         {
1019             PTNode n;
1020             uint32_t nid;
1021             StringInitCString ( & key, str );
1022             nid = PTrieFind ( self -> pt . key2id, & key, & n, custom_cmp ,data);
1023             if ( nid != 0 )
1024             {
1025                 assert ( n . data . size == sizeof * id );
1026                 memmove ( id, n . data . addr, sizeof * id );
1027                 return 0;
1028             }
1029         }
1030     }
1031     else
1032     {
1033         const KTrieIdxNode_v1 *n;
1034 
1035         StringInitCString ( & key, str );
1036         n = ( const KTrieIdxNode_v1* ) TrieFind ( & self -> key2id, & key );
1037         if ( n != NULL )
1038         {
1039             * id = n -> id;
1040             return 0;
1041         }
1042     }
1043 
1044     return RC ( rcDB, rcIndex, rcSelecting, rcString, rcNotFound );
1045 }
1046 
1047 /* projection index id to key-string ( was Id2Key ) */
KTrieIndexProject_v1(const KTrieIndex_v1 * self,uint32_t id,char * key_buff,size_t buff_size,size_t * actsize)1048 rc_t KTrieIndexProject_v1 ( const KTrieIndex_v1 *self,
1049     uint32_t id, char *key_buff, size_t buff_size, size_t *actsize )
1050 {
1051     if ( self -> last < self -> first )
1052     {
1053         if ( self -> pt . id2node != NULL &&
1054              id >= self -> pt . first &&
1055              id <= self -> pt . last )
1056         {
1057             PTNode n;
1058             uint32_t node = self -> pt . id2node [ id - self -> pt . first ];
1059             rc_t rc = PTrieGetNode ( self -> pt . key2id,
1060                 & n, self -> pt . byteswap ? bswap_32 ( node ) : node );
1061             if ( rc == 0 )
1062             {
1063                 const String *key;
1064                 rc = PTNodeMakeKey ( & n, & key );
1065                 if ( rc == 0 )
1066                 {
1067                     if (actsize)
1068                         *actsize = key -> size;
1069 
1070                     if ( key -> size >= buff_size )
1071                         rc = RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
1072                     else
1073                         string_copy ( key_buff, buff_size, key -> addr, key -> size );
1074 
1075                     StringWhack ( ( String* ) key );
1076                     return rc;
1077                 }
1078             }
1079         }
1080     }
1081     else
1082     {
1083         if ( self -> id2node != NULL &&
1084              id >= self -> first &&
1085              id <= self -> last )
1086         {
1087             const KTrieIdxNode_v1 *n = self -> id2node [ id - self -> first ];
1088             if ( n != NULL )
1089             {
1090                 if ( n -> n . key . size >= buff_size )
1091                     return RC ( rcDB, rcIndex, rcProjecting, rcBuffer, rcInsufficient );
1092                 string_copy ( key_buff, buff_size,
1093                     n -> n . key . addr, n -> n . key . size );
1094                 return 0;
1095             }
1096         }
1097     }
1098 
1099     return RC ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound );
1100 }
1101