1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <kdb/extern.h>
28
29 #include "index-priv.h"
30 #include "idstats-priv.h"
31
32 #include <kdb/index.h>
33 #include <klib/ptrie.h>
34 #include <klib/pack.h>
35 #include <klib/text.h>
36 #include <klib/log.h>
37 #include <klib/rc.h>
38 #include <sysalloc.h>
39
40 #include <stdlib.h>
41 #include <limits.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <endian.h>
45 #include <byteswap.h>
46 #include <assert.h>
47
48
49
50 /*--------------------------------------------------------------------------
51 * KPTrieIndexCCParms
52 * consistency check parameter block
53 */
54 typedef struct KPTrieIndexCCParms_v2 KPTrieIndexCCParms_v2;
55 struct KPTrieIndexCCParms_v2
56 {
57 KIdStats stats;
58 rc_t rc;
59 const KPTrieIndex_v2 *self;
60 const KIndex *outer;
61 bool key2id;
62 bool id2key;
63 bool all_ids;
64 bool convertFromV1;
65 bool failed;
66 };
67
68 /* Init
69 */
70 static
KPTrieIndexCCParmsInit_v2(KPTrieIndexCCParms_v2 * pb,const KPTrieIndex_v2 * self,const KIndex * outer,bool key2id,bool id2key,bool all_ids,bool convertFromV1)71 void KPTrieIndexCCParmsInit_v2 ( KPTrieIndexCCParms_v2 *pb,
72 const KPTrieIndex_v2 *self, const KIndex *outer, bool key2id, bool id2key, bool all_ids, bool convertFromV1 )
73 {
74 KIdStatsInit ( & pb -> stats );
75 pb -> rc = 0;
76 pb -> self = self;
77 pb -> outer = outer;
78 pb -> key2id = key2id;
79 pb -> id2key = self -> ord2node ? id2key : false;
80 pb -> all_ids = all_ids;
81 pb -> convertFromV1 = convertFromV1;
82 pb -> failed = false;
83 }
84
85 /* Whack
86 */
87 static
KPTrieIndexCCParmsWhack_v2(KPTrieIndexCCParms_v2 * pb)88 void KPTrieIndexCCParmsWhack_v2 ( KPTrieIndexCCParms_v2 *pb )
89 {
90 KIdStatsWhack ( & pb -> stats );
91 }
92
93 /*--------------------------------------------------------------------------
94 * KPTrieIndex_v2
95 * persisted keymap
96 */
97
98 /* CheckConsistency
99 * runs a consistency check as efficiently as possible
100 *
101 * in all cases, we make use of "PTrieForEach" to visit each node
102 * using the natural storage order. each node returned is counted,
103 * read and inserted into a BSTree whose nodes merge adjacent and
104 * overlapping ids into existing nodes.
105 *
106 * if running a deep "key->id" test, then the key is first regenerated
107 * from the node, and then used to retrieve the id via the KIndex.
108 *
109 * if the projection index exists, the id is tested against the node
110 * to ensure that projection works. if "id->key" is true, then use
111 * the KIndex to produce a key and compare it against the node.
112 */
113 static
KPTrieIndexCCVisit_v2(PTNode * n,void * data)114 bool CC KPTrieIndexCCVisit_v2 ( PTNode *n, void *data )
115 {
116 KPTrieIndexCCParms_v2 *pb = (KPTrieIndexCCParms_v2 *)data;
117 const KPTrieIndex_v2 *self = pb -> self;
118
119 rc_t rc;
120 int64_t id;
121 size_t usize;
122 uint64_t span;
123 uint32_t i, ord;
124
125 /* detect conversion from v1 */
126 if ( pb -> convertFromV1 && self -> id_bits == 0 )
127 {
128 uint32_t id32;
129
130 /* payload of v1 PTNode is a 32-bit spot id */
131 assert ( n -> data . size == sizeof id32 );
132 memmove ( & id32, n -> data . addr, sizeof id32 );
133 id = self -> byteswap ? bswap_32 ( id32 ) : id32;
134 }
135 else
136 {
137 /* native v2 */
138 /* TBD - should this pass n -> data . size * 8 ??? */
139 if ( self -> id_bits != 0 )
140 {
141 rc = Unpack ( self -> id_bits, sizeof id * 8,
142 n -> data . addr, 0, self -> id_bits, NULL,
143 & id, sizeof id, & usize );
144 if ( rc != 0 )
145 {
146 PLOGMSG ( klogWarn, ( klogWarn, "could not determine row id of v2 node $(nid)",
147 "nid=0x%08x", n -> id ));
148 pb -> failed = true;
149 return false;
150 }
151 }
152 else
153 {
154 id = 0;
155 }
156
157 id += self -> first;
158 }
159
160 /* convert from row-id to 1-based ordinal index */
161 ord = KPTrieIndexID2Ord_v2 ( self, id );
162 if ( ord == 0 )
163 {
164 /* 0 means not found */
165 PLOGMSG ( klogWarn, ( klogWarn, "v2 node $(nid): row id $(rid) not found in trie",
166 "nid=0x%08x,rid=%ld", n -> id, id ));
167 pb -> failed = true;
168 return false;
169 }
170
171 if ( self -> ord2node != NULL )
172 {
173 /* to calculate span of last entry, where
174 1-based "ord" == the number of nodes,
175 just find the distance between the last row-id
176 in index ( self->maxid ) and the current row-id */
177 if ( ord == self -> count )
178 span = self -> maxid - id + 1;
179 else
180 {
181 /* from here on, we will use "ord" to be the
182 ZERO-BASED index of the slot FOLLOWING the
183 one corresponding to id. we want to find the
184 row-id AFTER the current one and calculate distance */
185 switch ( self -> variant )
186 {
187 /* linear array */
188 case 0:
189 /* starting with the FOLLOWING slot,
190 count duplicate entries */
191 for ( i = ord; i < self -> count; ++ i )
192 {
193 if ( n -> id != self -> ord2node [ i ] )
194 break;
195 }
196 span = self -> first + i - id;
197 break;
198
199 /* packed ordered array of sparse row-ids from here on
200 we already have "id" for this node, so the span will
201 be the distance from NEXT id - 1-based ord is 0-based next */
202 case 1:
203 span = self -> first + self -> id2ord . v8 [ ord ] - id;
204 break;
205 case 2:
206 span = self -> first + self -> id2ord . v16 [ ord ] - id;
207 break;
208 case 3:
209 span = self -> first + self -> id2ord . v32 [ ord ] - id;
210 break;
211 case 4:
212 span = self -> first + self -> id2ord . v64 [ ord ] - id;
213 break;
214 default:
215 PLOGMSG ( klogErr, ( klogErr, "PTrie v2 index has bad variant code: $(variant)",
216 "variant=%u", self -> variant ));
217 pb -> rc = RC ( rcDB, rcIndex, rcValidating, rcIndex, rcCorrupt );
218 return true;
219 }
220 }
221 }
222 else if ( self -> span_bits == 0 )
223 span = 1;
224 else
225 {
226 /* TBD - this case is never used in practice.
227 it would be an skey without a projection index */
228 rc = Unpack ( self -> span_bits, sizeof span * 8,
229 n -> data . addr, 0, self -> id_bits, NULL,
230 & span, sizeof span, & usize );
231 if ( rc != 0 )
232 {
233 PLOGMSG ( klogWarn, ( klogWarn, "could not determine span of v2 node $(nid), row id $(rid)",
234 "nid=0x%08x,rid=%ld", n -> id, id ));
235 pb -> failed = true;
236 return false;
237 }
238 }
239
240 /* record the node, row id, span */
241 pb -> rc = KIdStatsInsert ( & pb -> stats, id, span );
242 if ( pb -> rc != 0 )
243 {
244 PLOGERR ( klogSys, ( klogSys, pb -> rc, "failed when examining node id $(nid) with row id $(rid), span $(span)",
245 "nid=0x%08x,span=%u,rid=%ld", n -> id, span, id ));
246 return true;
247 }
248
249 /* if we have a projection index, test it */
250 if ( self -> ord2node != NULL )
251 {
252 if ( id < self -> first || id > self -> last )
253 {
254 PLOGMSG ( klogWarn, ( klogWarn, "node id $(nid) with row id $(rid) is not within projection range of $(min_rid)..$(max_rid)",
255 "nid=0x%08x,rid=%ld,min_rid=%ld,max_rid=%ld",
256 n -> id, id, self -> first, self -> last ));
257 pb -> failed = true;
258 return false;
259 }
260 for ( i = 0; i < span; ++ i )
261 {
262 if ( self -> ord2node [ i + ord - 1 ] != n -> id )
263 {
264 PLOGMSG ( klogWarn, ( klogWarn, "node id $(nid) with row id $(rid) does not match projection node id of $(pnid)",
265 "nid=0x%08x,rid=%ld,pnid=0x%08x", n -> id, id + 1, self -> ord2node [ i + ord - 1 ] ));
266 pb -> failed = true;
267 return false;
268 }
269 if ( ! pb -> all_ids || self -> variant != 0 )
270 break;
271 }
272 }
273
274 /* if performing deeper tests */
275 if ( pb -> key2id || pb -> id2key )
276 {
277 int64_t start_id;
278 uint64_t id_count;
279
280 /* need to recover key from given node */
281 const String *orig;
282 pb -> rc = PTNodeMakeKey ( n, & orig );
283 if ( pb -> rc != 0 )
284 {
285 PLOGERR ( klogSys, ( klogSys, pb -> rc, "failed when retrieving text for node id $(nid) with row id $(rid)",
286 "nid=0x%08x,rid=%u", n -> id, id ));
287 return true;
288 }
289
290 /* key->id test */
291 if ( pb -> key2id )
292 {
293 rc = KIndexFindText ( pb -> outer, orig -> addr, & start_id, & id_count, NULL, NULL );
294 if ( rc != 0 )
295 {
296 PLOGERR ( klogWarn, ( klogWarn, rc, "failed to retrieve start id and count for key '$(key)', row id $(rid)",
297 "key=%S,rid=%u", orig, id ) );
298 pb -> failed = true;
299 }
300 else if ( start_id != ( int64_t ) id || id_count != span )
301 {
302 PLOGERR ( klogWarn, ( klogWarn, rc, "key '$(key)' maps to start id $(start_id), count $(id_count): expected id $(rid), count 1.",
303 "key=%S,rid=%u,start_id=%ld,id_count=%lu", orig, id, start_id, id_count ) );
304 pb -> failed = true;
305 }
306 }
307
308 /* id->key test */
309 if ( pb -> id2key )
310 {
311 char buffer [ 256 ], *key = buffer;
312 size_t key_size, bsize = sizeof buffer;
313 if ( sizeof buffer <= orig -> size )
314 {
315 key = (char *)malloc ( bsize = orig -> size + 1 );
316 if ( key == 0 )
317 {
318 pb -> rc = RC ( rcDB, rcIndex, rcValidating, rcMemory, rcExhausted );
319 StringWhack ( ( String* ) orig );
320 return true;
321 }
322 }
323
324 for ( i = 0; i < span; ++ i )
325 {
326 rc = KIndexProjectText ( pb -> outer, id + i, & start_id, & id_count, key, bsize, & key_size );
327 if ( rc != 0 )
328 {
329 PLOGERR ( klogWarn, ( klogWarn, rc, "failed to retrieve key, start id and count for row id $(rid)",
330 "rid=%u", id + i ) );
331 pb -> failed = true;
332 break;
333 }
334
335 if ( orig -> size != key_size || memcmp ( orig -> addr, key, key_size ) != 0 )
336 {
337 PLOGERR ( klogWarn, ( klogWarn, rc, "row $(rid) maps to key '$(key)': expected key '$(orig)'.",
338 "rid=%u,key=%.*s,orig=%S", id + i, ( int ) key_size, key, orig ) );
339 pb -> failed = true;
340 }
341 if ( start_id != id || id_count != ( uint64_t ) span )
342 {
343 PLOGERR ( klogWarn, ( klogWarn, rc, "row $(rid) maps to start id $(start_id), count $(id_count): expected $(row_start), $(span).",
344 "rid=%u,id_count=%lu,start_id=%ld,row_start=%ld,span=%u",
345 id, id_count, start_id, id, span ) );
346 pb -> failed = true;
347 }
348
349 if ( ! pb -> all_ids || pb -> failed )
350 break;
351 }
352
353 if ( key != buffer )
354 free ( key );
355 }
356
357 StringWhack ( ( String* ) orig );
358 }
359
360 return false;
361 }
362
KPTrieIndexCheckConsistency_v2(const KPTrieIndex_v2 * self,int64_t * start_id,uint64_t * id_range,uint64_t * num_keys,uint64_t * num_rows,uint64_t * num_holes,const KIndex * outer,bool key2id,bool id2key,bool all_ids,bool convertFromV1)363 rc_t KPTrieIndexCheckConsistency_v2 ( const KPTrieIndex_v2 *self,
364 int64_t *start_id, uint64_t *id_range, uint64_t *num_keys,
365 uint64_t *num_rows, uint64_t *num_holes,
366 const KIndex *outer, bool key2id, bool id2key, bool all_ids, bool convertFromV1 )
367 {
368 rc_t rc = 0;
369 KPTrieIndexCCParms_v2 pb;
370
371 if ( self == NULL )
372 return RC ( rcDB, rcIndex, rcValidating, rcParam, rcNull );
373
374 if ( ( key2id || id2key ) && outer == NULL )
375 return RC ( rcDB, rcIndex, rcValidating, rcSelf, rcNull );
376
377 KPTrieIndexCCParmsInit_v2 ( & pb, self, outer, key2id, id2key, all_ids, convertFromV1 );
378 if ( PTrieDoUntil ( self -> key2id, KPTrieIndexCCVisit_v2, & pb ) )
379 rc = pb . rc;
380 else if ( pb . failed )
381 rc = RC ( rcDB, rcIndex, rcValidating, rcSelf, rcCorrupt );
382
383 if ( start_id != NULL )
384 * start_id = pb . stats . i_min_id;
385 if ( id_range != NULL )
386 * id_range = pb . stats . x_max_id - pb . stats . i_min_id;
387 if ( num_keys != NULL )
388 * num_keys = pb . stats . num_entries;
389 if ( num_rows != NULL )
390 * num_rows = pb . stats . num_ids;
391 if ( num_holes != NULL )
392 * num_holes = pb . stats . num_holes;
393
394 KPTrieIndexCCParmsWhack_v2 ( & pb );
395 return rc;
396 }
397