1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include "caps.h"
28 #include "ctx.h"
29 #include "mem.h"
30 #include "except.h"
31 #include "status.h"
32
33 #include <kapp/main.h>
34 #include <vdb/table.h>
35 #include <vdb/cursor.h>
36 #include <vdb/vdb-priv.h>
37 #include <kproc/thread.h>
38 #include <klib/rc.h>
39
40 #include <string.h>
41
42 #if ! _DEBUGGING
43 #define USE_BGTHREAD 1
44 #endif
45
46 FILE_ENTRY ( xcheck-ref-align );
47
48
49 /*--------------------------------------------------------------------------
50 * TestReferenceCell
51 * properly sorted tables will allow both columns to be walked.
52 * each row of the REFERENCE table column MUST contain zero or more
53 * sequential integers, and the first integer MUST continue the sequence
54 * established by previous rows. therefore, the row will be fully specified
55 * by a start and stop id pair.
56 */
57 static
TestReferenceCell(const ctx_t * ctx,const VCursor * ref_curs,uint32_t align_ids_idx,const char * align_name,int64_t ref_row_id,int64_t excl_ref_last_idx)58 int64_t TestReferenceCell ( const ctx_t *ctx,
59 const VCursor *ref_curs, uint32_t align_ids_idx,
60 const char *align_name, int64_t ref_row_id, int64_t excl_ref_last_idx )
61 {
62 FUNC_ENTRY ( ctx );
63
64 const int64_t *cell;
65 uint32_t elem_bits, boff, row_len;
66
67 rc_t rc = VCursorCellDataDirect ( ref_curs, ref_row_id, align_ids_idx,
68 & elem_bits, ( const void** ) & cell, & boff, & row_len );
69 if ( rc != 0 )
70 ERROR ( rc, "VCursorCellDataDirect - failed to read row %ld from REFERENCE cursor", ref_row_id );
71 else if ( elem_bits != sizeof * cell * 8 )
72 {
73 rc = RC ( rcExe, rcIndex, rcValidating, rcSize, rcIncorrect );
74 ERROR ( rc, "VCursorCellDataDirect - elem_bits of %u reading row %ld from REFERENCE cursor", elem_bits, ref_row_id );
75 }
76 else if ( boff != 0 )
77 {
78 rc = RC ( rcExe, rcIndex, rcValidating, rcOffset, rcIncorrect );
79 ERROR ( rc, "VCursorCellDataDirect - bit offset of %u reading row %ld from REFERENCE cursor", boff, ref_row_id );
80 }
81 else
82 {
83 uint32_t i;
84 for ( i = 0; i < row_len; ++ i )
85 {
86 if ( cell [ i ] != excl_ref_last_idx + i )
87 {
88 rc = RC ( rcExe, rcIndex, rcValidating, rcId, rcIncorrect );
89 ERROR ( rc, "REFERENCE.%s_IDS.%ld: expected id %ld but found %ld",
90 align_name, ref_row_id, excl_ref_last_idx + i, cell [ i ] );
91 break;
92 }
93 }
94
95 excl_ref_last_idx += row_len;
96 }
97
98 return excl_ref_last_idx;
99 }
100
101 static
TestAlignCell(const ctx_t * ctx,const VCursor * align_curs,uint32_t ref_id_idx,const char * align_name,int64_t align_row_id,int64_t ref_row_id)102 void TestAlignCell ( const ctx_t *ctx,
103 const VCursor *align_curs, uint32_t ref_id_idx,
104 const char *align_name, int64_t align_row_id, int64_t ref_row_id )
105 {
106 FUNC_ENTRY ( ctx );
107
108 const int64_t *cell;
109 uint32_t elem_bits, boff, row_len;
110
111
112 rc_t rc = VCursorCellDataDirect ( align_curs, align_row_id, ref_id_idx,
113 & elem_bits, ( const void** ) & cell, & boff, & row_len );
114 if ( rc != 0 )
115 ERROR ( rc, "VCursorCellDataDirect - failed to read row %ld from %s cursor", align_row_id, align_name );
116 else if ( elem_bits != sizeof * cell * 8 )
117 {
118 rc = RC ( rcExe, rcIndex, rcValidating, rcSize, rcIncorrect );
119 ERROR ( rc, "VCursorCellDataDirect - elem_bits of %u reading row %ld from %s cursor", elem_bits, align_row_id, align_name );
120 }
121 else if ( boff != 0 )
122 {
123 rc = RC ( rcExe, rcIndex, rcValidating, rcOffset, rcIncorrect );
124 ERROR ( rc, "VCursorCellDataDirect - bit offset of %u reading row %ld from %s cursor", boff, align_row_id, align_name );
125 }
126 else if ( row_len != 1 )
127 {
128 rc = RC ( rcExe, rcIndex, rcValidating, rcRange, rcIncorrect );
129 ERROR ( rc, "VCursorCellDataDirect - row_len of %u reading row %ld from %s cursor", row_len, align_row_id, align_name );
130 }
131 else if ( * cell != ref_row_id )
132 {
133 rc = RC ( rcExe, rcIndex, rcValidating, rcId, rcIncorrect );
134 ERROR ( rc, "%s.REF_ID.%ld: expected id %ld but found %ld",
135 align_name, align_row_id, ref_row_id, cell [ 0 ] );
136 }
137 }
138
139 /*--------------------------------------------------------------------------
140 * CrossCheckRefAlignCols
141 * performs the cross-check
142 */
143 static
CrossCheckRefAlignCols(const ctx_t * ctx,const VCursor * ref_curs,uint32_t align_ids_idx,const VCursor * align_curs,uint32_t ref_id_idx,const char * align_name)144 void CrossCheckRefAlignCols ( const ctx_t *ctx,
145 const VCursor *ref_curs, uint32_t align_ids_idx,
146 const VCursor *align_curs, uint32_t ref_id_idx, const char *align_name )
147 {
148 FUNC_ENTRY ( ctx );
149
150 int64_t ref_row_id, excl_ref_last_id;
151 rc_t rc = VCursorIdRange ( ref_curs, 0, & ref_row_id, ( uint64_t* ) & excl_ref_last_id );
152 if ( rc != 0 )
153 INTERNAL_ERROR ( rc, "VCursorIdRange - failed to establish row range on REFERENCE cursor" );
154 else
155 {
156 int64_t align_row_id, excl_align_last_id;
157
158 excl_ref_last_id += ref_row_id;
159
160 rc = VCursorIdRange ( align_curs, 0, & align_row_id, ( uint64_t* ) & excl_align_last_id );
161 if ( rc != 0 )
162 INTERNAL_ERROR ( rc, "VCursorIdRange - failed to establish row range on %s cursor", align_name );
163 else
164 {
165 int64_t excl_last_align_idx;
166
167 excl_align_last_id += align_row_id;
168
169 for ( excl_last_align_idx = 1; ref_row_id < excl_ref_last_id; ++ ref_row_id )
170 {
171 int64_t first_align_idx = excl_last_align_idx;
172
173 /* rule for bailing out */
174 rc = Quitting ();
175 if ( rc != 0 || FAILED () )
176 break;
177
178 /* the REFERENCE id cell should be filled purely with sequential ids */
179 TRY ( excl_last_align_idx = TestReferenceCell ( ctx, ref_curs, align_ids_idx,
180 align_name, ref_row_id, excl_last_align_idx ) )
181 {
182 /* the ids must be within the range of the alignment table */
183 if ( excl_last_align_idx > excl_align_last_id )
184 {
185 rc = RC ( rcExe, rcIndex, rcValidating, rcId, rcExcessive );
186 ERROR ( rc, "REFERENCE.%s_IDS.%ld: references non-existant rows ( %ld .. %ld : max %ld )",
187 align_name, ref_row_id, first_align_idx, excl_last_align_idx, excl_align_last_id );
188 break;
189 }
190
191 /* this is more of a permanent assert */
192 if ( first_align_idx != align_row_id )
193 {
194 rc = RC ( rcExe, rcIndex, rcValidating, rcId, rcIncorrect );
195 ERROR ( rc, "REFERENCE.%s_IDS.%ld: expected id %ld but found %ld",
196 align_name, ref_row_id, first_align_idx, align_row_id );
197 break;
198 }
199
200 /* each of the rows in alignment table must point back
201 to the same row in the REFERENCE table */
202 for ( ; align_row_id < excl_last_align_idx; ++ align_row_id )
203 {
204 ON_FAIL ( TestAlignCell ( ctx, align_curs, ref_id_idx,
205 align_name, align_row_id, ref_row_id ) )
206 break;
207 }
208 }
209 }
210
211 /* at this point, we must have seen every record */
212 if ( ! FAILED () )
213 {
214 if ( ref_row_id != excl_ref_last_id )
215 {
216 rc = RC ( rcExe, rcIndex, rcValidating, rcRange, rcIncomplete );
217 ERROR ( rc, "REFERENCE.%s_IDS: scan stopped on row %ld of %ld",
218 align_name, ref_row_id, excl_ref_last_id );
219 }
220 if ( align_row_id != excl_align_last_id )
221 {
222 rc = RC ( rcExe, rcIndex, rcValidating, rcRange, rcIncomplete );
223 ERROR ( rc, "%s.REF_ID: scan stopped on row %ld of %ld",
224 align_name, align_row_id, excl_align_last_id );
225 }
226 }
227 }
228 }
229 }
230
231
232
233 /*--------------------------------------------------------------------------
234 * CrossCheckRefAlignCurs
235 * adds columns and opens cursors
236 */
237 static
CrossCheckRefAlignCurs(const ctx_t * ctx,const VCursor * ref_curs,const VCursor * align_curs,const char * align_name)238 void CrossCheckRefAlignCurs ( const ctx_t *ctx,
239 const VCursor *ref_curs, const VCursor *align_curs, const char *align_name )
240 {
241 FUNC_ENTRY ( ctx );
242
243 uint32_t align_ids_idx;
244 rc_t rc = VCursorAddColumn ( ref_curs, & align_ids_idx, "%s_IDS", align_name );
245 if ( rc != 0 )
246 INTERNAL_ERROR ( rc, "VCursorAddColumn - failed to add column '%s_IDS' to REFERENCE cursor", align_name );
247 else
248 {
249 uint32_t ref_id_idx;
250 rc = VCursorAddColumn ( align_curs, & ref_id_idx, "REF_ID" );
251 if ( rc != 0 )
252 INTERNAL_ERROR ( rc, "VCursorAddColumn - failed to add column 'REF_ID' to %s cursor", align_name );
253 else
254 {
255 rc = VCursorOpen ( ref_curs );
256 if ( rc != 0 )
257 INTERNAL_ERROR ( rc, "VCursorOpen - failed to open cursor on REFERENCE table" );
258 else
259 {
260 rc = VCursorOpen ( align_curs );
261 if ( rc != 0 )
262 INTERNAL_ERROR ( rc, "VCursorOpen - failed to open cursor on %s table", align_name );
263 else
264 {
265 rc = VCursorSetRowId ( ref_curs, 1 );
266 if ( rc != 0 )
267 INTERNAL_ERROR ( rc, "VCursorSetRowId - failed to set row-id on REFERENCE cursor" );
268 else
269 {
270 rc = VCursorSetRowId ( align_curs, 1 );
271 if ( rc != 0 )
272 INTERNAL_ERROR ( rc, "VCursorSetRowId - failed to set row-id on %s cursor", align_name );
273 else
274 {
275 CrossCheckRefAlignCols ( ctx, ref_curs, align_ids_idx, align_curs, ref_id_idx, align_name );
276 }
277 }
278 }
279 }
280 }
281 }
282 }
283
284
285 /*--------------------------------------------------------------------------
286 * CrossCheckRefAlignTbl
287 * checks REFERENCE.<name>_IDS for properly sorted form
288 * runs a cross-check of REFERENCE.<name>_IDS against <name>.REF_ID
289 */
290 static
CrossCheckRefAlignTblInt(const ctx_t * ctx,const VTable * ref_tbl,const VTable * align_tbl,const char * align_name)291 void CrossCheckRefAlignTblInt ( const ctx_t *ctx,
292 const VTable *ref_tbl, const VTable *align_tbl, const char *align_name )
293 {
294 FUNC_ENTRY ( ctx );
295
296 rc_t rc;
297 const VCursor *ref_curs;
298
299 rc = VTableCreateCursorRead ( ref_tbl, & ref_curs );
300 if ( rc != 0 )
301 INTERNAL_ERROR ( rc, "VTableCreateCursorRead - failed to open cursor on REFERENCE table" );
302 else
303 {
304 const VCursor *align_curs;
305 rc = VTableCreateCursorRead ( align_tbl, & align_curs );
306 if ( rc != 0 )
307 INTERNAL_ERROR ( rc, "VTableCreateCursorRead - failed to open cursor on %s table", align_name );
308 else
309 {
310 rc = VCursorLinkedCursorSet(align_curs,"REFERENCE",ref_curs);
311 if ( rc != 0 )
312 INTERNAL_ERROR ( rc, "VCursorLinkedCursorSet - failed to link cursor on REFERENCE table" );
313 else
314 {
315 CrossCheckRefAlignCurs ( ctx, ref_curs, align_curs, align_name );
316 }
317
318 VCursorRelease ( align_curs );
319 }
320
321 VCursorRelease ( ref_curs );
322 }
323 }
324
325 #if USE_BGTHREAD
326
327 typedef struct CrossCheckRefAlignTblData CrossCheckRefAlignTblData;
328 struct CrossCheckRefAlignTblData
329 {
330 Caps caps;
331 const VTable *ref_tbl, *align_tbl;
332 char align_name [ 1 ];
333 };
334
335 static
CrossCheckRefAlignTblRun(const KThread * self,void * data)336 rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data )
337 {
338 CrossCheckRefAlignTblData *pb = data;
339
340 DECLARE_CTX_INFO ();
341 ctx_t thread_ctx = { & pb -> caps, NULL, & ctx_info };
342 const ctx_t *ctx = & thread_ctx;
343
344 STATUS ( 2, "running consistency-check on background thread 0x%p", self );
345
346 CrossCheckRefAlignTblInt ( ctx, pb -> ref_tbl, pb -> align_tbl, pb -> align_name );
347
348 STATUS ( 2, "finished consistency-check on background thread 0x%p: %s",
349 self, ctx -> rc ? "failure" : "success ");
350
351 VTableRelease ( pb -> align_tbl );
352 VTableRelease ( pb -> ref_tbl );
353 CapsWhack ( & pb -> caps, ctx );
354 free ( pb );
355
356 return ctx -> rc;
357 }
358
359 #endif
360
CrossCheckRefAlignTbl(const ctx_t * ctx,const VTable * ref_tbl,const VTable * align_tbl,const char * align_name,KThread ** pt)361 void CrossCheckRefAlignTbl ( const ctx_t *ctx,
362 const VTable *ref_tbl, const VTable *align_tbl, const char *align_name,
363 KThread ** pt )
364 {
365 FUNC_ENTRY ( ctx );
366
367 #if USE_BGTHREAD
368 size_t name_len;
369 CrossCheckRefAlignTblData *pb;
370 #endif
371
372 STATUS ( 2, "consistency-check on join indices between REFERENCE and %s tables", align_name );
373
374 assert ( pt );
375 * pt = NULL;
376
377 #if USE_BGTHREAD
378 name_len = strlen ( align_name );
379 pb = malloc ( sizeof * pb + name_len );
380 if ( pb == NULL ) {
381 rc_t rc = RC ( rcExe, rcMemory, rcAllocating, rcMemory, rcExhausted );
382 INTERNAL_ERROR ( rc, "" );
383 }
384 else {
385 TRY ( CapsInit ( & pb -> caps, ctx ) )
386 {
387 rc_t rc = VTableAddRef ( pb -> ref_tbl = ref_tbl );
388 if ( rc != 0 )
389 ERROR ( rc, "VTableAddRef failed on REFERENCE table" );
390 else
391 {
392 rc = VTableAddRef ( pb -> align_tbl = align_tbl );
393 if ( rc != 0 )
394 ERROR ( rc, "VTableAddRef failed on %s table", align_name );
395 else
396 {
397 KThread *t;
398
399 strcpy ( pb -> align_name, align_name );
400
401 rc = KThreadMake ( & t, CrossCheckRefAlignTblRun, pb );
402 if ( rc == 0 )
403 {
404 * pt = t;
405 return;
406 }
407
408 VTableRelease ( pb -> align_tbl );
409 }
410
411 VTableRelease ( pb -> ref_tbl );
412 }
413
414 CapsWhack ( & pb -> caps, ctx );
415 }
416
417 free ( pb );
418 }
419 #else
420 CrossCheckRefAlignTblInt ( ctx, ref_tbl, align_tbl, align_name );
421 #endif
422 }
423