1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <kdb/extern.h>
28
29 #include "index-priv.h"
30 #include "idstats-priv.h"
31
32 #include <kdb/index.h>
33 #include <klib/ptrie.h>
34 #include <klib/text.h>
35 #include <klib/log.h>
36 #include <klib/rc.h>
37 #include <sysalloc.h>
38
39 #include <stdlib.h>
40 #include <limits.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <endian.h>
44 #include <byteswap.h>
45 #include <assert.h>
46
47
48 /*--------------------------------------------------------------------------
49 * KPTrieIndexCCParms
50 * consistency check parameter block
51 */
52 typedef struct KPTrieIndexCCParms_v1 KPTrieIndexCCParms_v1;
53 struct KPTrieIndexCCParms_v1
54 {
55 KIdStats stats;
56 rc_t rc;
57 const KPTrieIndex_v1 *self;
58 const KIndex *outer;
59 bool key2id;
60 bool id2key;
61 bool failed;
62 };
63
64 /* Init
65 */
66 static
KPTrieIndexCCParmsInit_v1(KPTrieIndexCCParms_v1 * pb,const KPTrieIndex_v1 * self,const KIndex * outer,bool key2id,bool id2key)67 void KPTrieIndexCCParmsInit_v1 ( KPTrieIndexCCParms_v1 *pb,
68 const KPTrieIndex_v1 *self, const KIndex *outer, bool key2id, bool id2key )
69 {
70 KIdStatsInit ( & pb -> stats );
71 pb -> rc = 0;
72 pb -> self = self;
73 pb -> outer = outer;
74 pb -> key2id = key2id;
75 pb -> id2key = self -> id2node ? id2key : false;
76 pb -> failed = false;
77 }
78
79 /* Whack
80 */
81 static
KPTrieIndexCCParmsWhack_v1(KPTrieIndexCCParms_v1 * pb)82 void KPTrieIndexCCParmsWhack_v1 ( KPTrieIndexCCParms_v1 *pb )
83 {
84 KIdStatsWhack ( & pb -> stats );
85 }
86
87 /*--------------------------------------------------------------------------
88 * KPTrieIndex_v1
89 * persisted keymap
90 */
91
92 /* CheckConsistency
93 * runs a consistency check as efficiently as possible
94 *
95 * in all cases, we make use of "PTrieForEach" to visit each node
96 * using the natural storage order. each node returned is counted,
97 * read and inserted into a BSTree whose nodes merge adjacent and
98 * overlapping ids into existing nodes.
99 *
100 * if running a deep "key->id" test, then the key is first regenerated
101 * from the node, and then used to retrieve the id via the KIndex.
102 *
103 * if the projection index exists, the id is tested against the node
104 * to ensure that projection works. if "id->key" is true, then use
105 * the KIndex to produce a key and compare it against the node.
106 */
107 static
KPTrieIndexCCVisit_v1(PTNode * n,void * data)108 bool CC KPTrieIndexCCVisit_v1 ( PTNode *n, void *data )
109 {
110 KPTrieIndexCCParms_v1 *pb = data;
111 const KPTrieIndex_v1 *self = pb -> self;
112
113 /* payload of v1 PTNode is a 32-bit spot id */
114 uint32_t id;
115 assert ( n -> data . size == sizeof id );
116 memmove ( & id, n -> data . addr, sizeof id );
117 if ( self -> byteswap )
118 id = bswap_32 ( id );
119
120 /* record the node, row id, and range of 1 */
121 pb -> rc = KIdStatsInsert ( & pb -> stats, id, 1 );
122 if ( pb -> rc != 0 )
123 {
124 PLOGERR ( klogSys, ( klogSys, pb -> rc, "failed when examining node id $(nid) with row id $(rid)",
125 "nid=0x%08x,rid=%u", n -> id, id ));
126 return true;
127 }
128
129 /* if we have a projection index, test it */
130 if ( self -> id2node != NULL )
131 {
132 if ( id < self -> first || id > self -> last )
133 {
134 PLOGMSG ( klogWarn, ( klogWarn, "node id $(nid) with row id $(rid) is not within projection range of $(min_rid)..$(max_rid)",
135 "nid=0x%08x,rid=%u,min_rid=%u,max_rid=%u", n -> id, id, self -> first, self -> last ));
136 pb -> failed = true;
137 return false;
138 }
139 if ( self -> id2node [ id - self -> first ] != n -> id )
140 {
141 PLOGMSG ( klogWarn, ( klogWarn, "node id $(nid) with row id $(rid) does not match projection node id of $(pnid)",
142 "nid=0x%08x,rid=%u,pnid=0x%08x", n -> id, id, self -> id2node [ id - self -> first ] ));
143 pb -> failed = true;
144 return false;
145 }
146 }
147
148 /* if performing deeper tests */
149 if ( pb -> key2id || pb -> id2key )
150 {
151 rc_t rc;
152 int64_t start_id;
153 uint64_t id_count;
154
155 /* need to recover key from given node */
156 const String *orig;
157 pb -> rc = PTNodeMakeKey ( n, & orig );
158 if ( pb -> rc != 0 )
159 {
160 PLOGERR ( klogSys, ( klogSys, pb -> rc, "failed when retrieving text for node id $(nid) with row id $(rid)",
161 "nid=0x%08x,rid=%u", n -> id, id ));
162 return true;
163 }
164
165 /* key->id test */
166 if ( pb -> key2id )
167 {
168 rc = KIndexFindText ( pb -> outer, orig -> addr, & start_id, & id_count, NULL, NULL );
169 if ( rc != 0 )
170 {
171 PLOGERR ( klogWarn, ( klogWarn, rc, "failed to retrieve start id and count for key '$(key)', row id $(rid)",
172 "key=%S,rid=%u", orig, id ) );
173 pb -> failed = true;
174 }
175 else if ( start_id != ( int64_t ) id || id_count != 1 )
176 {
177 PLOGERR ( klogWarn, ( klogWarn, rc, "key '$(key)' maps to start id $(start_id), count $(id_count): expected id $(rid), count 1.",
178 "key=%S,rid=%u,start_id=%ld,id_count=%lu", orig, id, start_id, id_count ) );
179 pb -> failed = true;
180 }
181 }
182
183 /* id->key test */
184 if ( pb -> id2key )
185 {
186 char buffer [ 256 ], *key = buffer;
187 size_t key_size, bsize = sizeof buffer;
188 if ( sizeof buffer <= orig -> size )
189 {
190 key = malloc ( bsize = orig -> size + 1 );
191 if ( key == 0 )
192 {
193 pb -> rc = RC ( rcDB, rcIndex, rcValidating, rcMemory, rcExhausted );
194 StringWhack ( ( String* ) orig );
195 return true;
196 }
197 }
198
199 rc = KIndexProjectText ( pb -> outer, id, & start_id, & id_count, key, bsize, & key_size );
200 if ( rc != 0 )
201 {
202 PLOGERR ( klogWarn, ( klogWarn, rc, "failed to retrieve key, start id and count for row id $(rid)",
203 "rid=%u", id ) );
204 pb -> failed = true;
205 }
206 else
207 {
208 if ( orig -> size != key_size || memcmp ( orig -> addr, key, key_size ) != 0 )
209 {
210 PLOGERR ( klogWarn, ( klogWarn, rc, "row $(rid) maps to key '$(key)': expected key '$(orig)'.",
211 "rid=%u,key=%.*s,orig=%S", id, ( int ) key_size, key, orig ) );
212 pb -> failed = true;
213 }
214 if ( start_id != ( int64_t ) id || id_count != 1 )
215 {
216 PLOGERR ( klogWarn, ( klogWarn, rc, "single row $(rid) maps to start id $(start_id), count $(id_count).",
217 "rid=%u,id_count=%u,start_id=%ld", id, id_count, start_id ) );
218 pb -> failed = true;
219 }
220 }
221
222 if ( key != buffer )
223 free ( key );
224 }
225
226 StringWhack ( ( String* ) orig );
227 }
228
229 return false;
230 }
231
KPTrieIndexCheckConsistency_v1(const KPTrieIndex_v1 * self,int64_t * start_id,uint64_t * id_range,uint64_t * num_keys,uint64_t * num_rows,uint64_t * num_holes,const KIndex * outer,bool key2id,bool id2key)232 rc_t KPTrieIndexCheckConsistency_v1 ( const KPTrieIndex_v1 *self,
233 int64_t *start_id, uint64_t *id_range, uint64_t *num_keys,
234 uint64_t *num_rows, uint64_t *num_holes,
235 const KIndex *outer, bool key2id, bool id2key )
236 {
237 rc_t rc = 0;
238 KPTrieIndexCCParms_v1 pb;
239
240 if ( self == NULL )
241 return RC ( rcDB, rcIndex, rcValidating, rcParam, rcNull );
242
243 if ( ( key2id || id2key ) && outer == NULL )
244 return RC ( rcDB, rcIndex, rcValidating, rcSelf, rcNull );
245
246 KPTrieIndexCCParmsInit_v1 ( & pb, self, outer, key2id, id2key );
247 if ( PTrieDoUntil ( self -> key2id, KPTrieIndexCCVisit_v1, & pb ) )
248 rc = pb . rc;
249 else if ( pb . failed )
250 rc = RC ( rcDB, rcIndex, rcValidating, rcSelf, rcCorrupt );
251
252 if ( start_id != NULL )
253 * start_id = pb . stats . i_min_id;
254 if ( id_range != NULL )
255 * id_range = ( uint32_t ) ( pb . stats . x_max_id - pb . stats . i_min_id );
256 if ( num_keys != NULL )
257 * num_keys = pb . stats . num_entries;
258 if ( num_rows != NULL )
259 * num_rows = pb . stats . num_ids;
260 if ( num_holes != NULL )
261 * num_holes = pb . stats . num_holes;
262
263 KPTrieIndexCCParmsWhack_v1 ( & pb );
264 return rc;
265 }
266