1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <kdb/extern.h>
28 
29 #include "index-priv.h"
30 #include "idstats-priv.h"
31 
32 #include <kdb/index.h>
33 #include <klib/ptrie.h>
34 #include <klib/text.h>
35 #include <klib/log.h>
36 #include <klib/rc.h>
37 #include <sysalloc.h>
38 
39 #include <stdlib.h>
40 #include <limits.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <endian.h>
44 #include <byteswap.h>
45 #include <assert.h>
46 
47 
48 /*--------------------------------------------------------------------------
49  * KPTrieIndexCCParms
50  *  consistency check parameter block
51  */
52 typedef struct KPTrieIndexCCParms_v1 KPTrieIndexCCParms_v1;
53 struct KPTrieIndexCCParms_v1
54 {
55     KIdStats stats;
56     rc_t rc;
57     const KPTrieIndex_v1 *self;
58     const KIndex *outer;
59     bool key2id;
60     bool id2key;
61     bool failed;
62 };
63 
64 /* Init
65  */
66 static
KPTrieIndexCCParmsInit_v1(KPTrieIndexCCParms_v1 * pb,const KPTrieIndex_v1 * self,const KIndex * outer,bool key2id,bool id2key)67 void KPTrieIndexCCParmsInit_v1 ( KPTrieIndexCCParms_v1 *pb,
68     const KPTrieIndex_v1 *self, const KIndex *outer, bool key2id, bool id2key )
69 {
70     KIdStatsInit ( & pb -> stats );
71     pb -> rc = 0;
72     pb -> self = self;
73     pb -> outer = outer;
74     pb -> key2id = key2id;
75     pb -> id2key = self -> id2node ? id2key : false;
76     pb -> failed = false;
77 }
78 
79 /* Whack
80  */
81 static
KPTrieIndexCCParmsWhack_v1(KPTrieIndexCCParms_v1 * pb)82 void KPTrieIndexCCParmsWhack_v1 ( KPTrieIndexCCParms_v1 *pb )
83 {
84     KIdStatsWhack ( & pb -> stats );
85 }
86 
87 /*--------------------------------------------------------------------------
88  * KPTrieIndex_v1
89  *  persisted keymap
90  */
91 
92 /* CheckConsistency
93  *  runs a consistency check as efficiently as possible
94  *
95  *  in all cases, we make use of "PTrieForEach" to visit each node
96  *  using the natural storage order. each node returned is counted,
97  *  read and inserted into a BSTree whose nodes merge adjacent and
98  *  overlapping ids into existing nodes.
99  *
100  *  if running a deep "key->id" test, then the key is first regenerated
101  *  from the node, and then used to retrieve the id via the KIndex.
102  *
103  *  if the projection index exists, the id is tested against the node
104  *  to ensure that projection works. if "id->key" is true, then use
105  *  the KIndex to produce a key and compare it against the node.
106  */
107 static
KPTrieIndexCCVisit_v1(PTNode * n,void * data)108 bool CC KPTrieIndexCCVisit_v1 ( PTNode *n, void *data )
109 {
110     KPTrieIndexCCParms_v1 *pb = data;
111     const KPTrieIndex_v1 *self = pb -> self;
112 
113     /* payload of v1 PTNode is a 32-bit spot id */
114     uint32_t id;
115     assert ( n -> data . size == sizeof id );
116     memmove ( & id, n -> data . addr, sizeof id );
117     if ( self -> byteswap )
118         id = bswap_32 ( id );
119 
120     /* record the node, row id, and range of 1 */
121     pb -> rc = KIdStatsInsert ( & pb -> stats, id, 1 );
122     if ( pb -> rc != 0 )
123     {
124         PLOGERR ( klogSys, ( klogSys, pb -> rc, "failed when examining node id $(nid) with row id $(rid)",
125                               "nid=0x%08x,rid=%u", n -> id, id ));
126         return true;
127     }
128 
129     /* if we have a projection index, test it */
130     if ( self -> id2node != NULL )
131     {
132         if ( id < self -> first || id > self -> last )
133         {
134             PLOGMSG ( klogWarn, ( klogWarn, "node id $(nid) with row id $(rid) is not within projection range of $(min_rid)..$(max_rid)",
135                                  "nid=0x%08x,rid=%u,min_rid=%u,max_rid=%u", n -> id, id, self -> first, self -> last ));
136             pb -> failed = true;
137             return false;
138         }
139         if ( self -> id2node [ id - self -> first ] != n -> id )
140         {
141             PLOGMSG ( klogWarn, ( klogWarn, "node id $(nid) with row id $(rid) does not match projection node id of $(pnid)",
142                                  "nid=0x%08x,rid=%u,pnid=0x%08x", n -> id, id, self -> id2node [ id - self -> first ] ));
143             pb -> failed = true;
144             return false;
145         }
146     }
147 
148     /* if performing deeper tests */
149     if ( pb -> key2id || pb -> id2key )
150     {
151         rc_t rc;
152         int64_t start_id;
153         uint64_t id_count;
154 
155         /* need to recover key from given node */
156         const String *orig;
157         pb -> rc = PTNodeMakeKey ( n, & orig );
158         if ( pb -> rc != 0 )
159         {
160             PLOGERR ( klogSys, ( klogSys, pb -> rc, "failed when retrieving text for node id $(nid) with row id $(rid)",
161                                  "nid=0x%08x,rid=%u", n -> id, id ));
162             return true;
163         }
164 
165         /* key->id test */
166         if ( pb -> key2id )
167         {
168             rc = KIndexFindText ( pb -> outer, orig -> addr, & start_id, & id_count, NULL, NULL );
169             if ( rc != 0 )
170             {
171                 PLOGERR ( klogWarn, ( klogWarn, rc, "failed to retrieve start id and count for key '$(key)', row id $(rid)",
172                                       "key=%S,rid=%u", orig, id ) );
173                 pb -> failed = true;
174             }
175             else if ( start_id != ( int64_t ) id || id_count != 1 )
176             {
177                 PLOGERR ( klogWarn, ( klogWarn, rc, "key '$(key)' maps to start id $(start_id), count $(id_count): expected id $(rid), count 1.",
178                                       "key=%S,rid=%u,start_id=%ld,id_count=%lu", orig, id, start_id, id_count ) );
179                 pb -> failed = true;
180             }
181         }
182 
183         /* id->key test */
184         if ( pb -> id2key )
185         {
186             char buffer [ 256 ], *key = buffer;
187             size_t key_size, bsize = sizeof buffer;
188             if ( sizeof buffer <= orig -> size )
189             {
190                 key = malloc ( bsize = orig -> size + 1 );
191                 if ( key == 0 )
192                 {
193                     pb -> rc = RC ( rcDB, rcIndex, rcValidating, rcMemory, rcExhausted );
194                     StringWhack ( ( String* ) orig );
195                     return true;
196                 }
197             }
198 
199             rc = KIndexProjectText ( pb -> outer, id, & start_id, & id_count, key, bsize, & key_size );
200             if ( rc != 0 )
201             {
202                 PLOGERR ( klogWarn, ( klogWarn, rc, "failed to retrieve key, start id and count for row id $(rid)",
203                                       "rid=%u", id ) );
204                 pb -> failed = true;
205             }
206             else
207             {
208                 if ( orig -> size != key_size || memcmp ( orig -> addr, key, key_size ) != 0 )
209                 {
210                     PLOGERR ( klogWarn, ( klogWarn, rc, "row $(rid) maps to key '$(key)': expected key '$(orig)'.",
211                                           "rid=%u,key=%.*s,orig=%S", id, ( int ) key_size, key, orig ) );
212                     pb -> failed = true;
213                 }
214                 if ( start_id != ( int64_t ) id || id_count != 1 )
215                 {
216                     PLOGERR ( klogWarn, ( klogWarn, rc, "single row $(rid) maps to start id $(start_id), count $(id_count).",
217                                           "rid=%u,id_count=%u,start_id=%ld", id, id_count, start_id ) );
218                     pb -> failed = true;
219                 }
220             }
221 
222             if ( key != buffer )
223                 free ( key );
224         }
225 
226         StringWhack ( ( String* ) orig );
227     }
228 
229     return false;
230 }
231 
KPTrieIndexCheckConsistency_v1(const KPTrieIndex_v1 * self,int64_t * start_id,uint64_t * id_range,uint64_t * num_keys,uint64_t * num_rows,uint64_t * num_holes,const KIndex * outer,bool key2id,bool id2key)232 rc_t KPTrieIndexCheckConsistency_v1 ( const KPTrieIndex_v1 *self,
233     int64_t *start_id, uint64_t *id_range, uint64_t *num_keys,
234     uint64_t *num_rows, uint64_t *num_holes,
235     const KIndex *outer, bool key2id, bool id2key )
236 {
237     rc_t rc = 0;
238     KPTrieIndexCCParms_v1 pb;
239 
240     if ( self == NULL )
241         return RC ( rcDB, rcIndex, rcValidating, rcParam, rcNull );
242 
243     if ( ( key2id || id2key ) && outer == NULL )
244         return RC ( rcDB, rcIndex, rcValidating, rcSelf, rcNull );
245 
246     KPTrieIndexCCParmsInit_v1 ( & pb, self, outer, key2id, id2key );
247     if ( PTrieDoUntil ( self -> key2id, KPTrieIndexCCVisit_v1, & pb ) )
248         rc = pb . rc;
249     else if ( pb . failed )
250         rc = RC ( rcDB, rcIndex, rcValidating, rcSelf, rcCorrupt );
251 
252     if ( start_id != NULL )
253         * start_id = pb . stats . i_min_id;
254     if ( id_range != NULL )
255         * id_range = ( uint32_t ) ( pb . stats . x_max_id - pb . stats . i_min_id );
256     if ( num_keys != NULL )
257         * num_keys = pb . stats . num_entries;
258     if ( num_rows != NULL )
259         * num_rows = pb . stats . num_ids;
260     if ( num_holes != NULL )
261         * num_holes = pb . stats . num_holes;
262 
263     KPTrieIndexCCParmsWhack_v1 ( & pb );
264     return rc;
265 }
266