1 /*===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 
27 #include "caps.h"
28 #include "ctx.h"
29 #include "mem.h"
30 #include "except.h"
31 #include "status.h"
32 
33 #include <kapp/main.h>
34 #include <vdb/table.h>
35 #include <vdb/cursor.h>
36 #include <vdb/vdb-priv.h>
37 #include <kproc/thread.h>
38 #include <klib/rc.h>
39 
40 #include <string.h>
41 
42 #if ! _DEBUGGING
43 #define USE_BGTHREAD 1
44 #endif
45 
46 FILE_ENTRY ( xcheck-ref-align );
47 
48 
49 /*--------------------------------------------------------------------------
50  * TestReferenceCell
51  *  properly sorted tables will allow both columns to be walked.
52  *  each row of the REFERENCE table column MUST contain zero or more
53  *  sequential integers, and the first integer MUST continue the sequence
54  *  established by previous rows. therefore, the row will be fully specified
55  *  by a start and stop id pair.
56  */
57 static
TestReferenceCell(const ctx_t * ctx,const VCursor * ref_curs,uint32_t align_ids_idx,const char * align_name,int64_t ref_row_id,int64_t excl_ref_last_idx)58 int64_t TestReferenceCell ( const ctx_t *ctx,
59     const VCursor *ref_curs, uint32_t align_ids_idx,
60     const char *align_name, int64_t ref_row_id, int64_t excl_ref_last_idx )
61 {
62     FUNC_ENTRY ( ctx );
63 
64     const int64_t *cell;
65     uint32_t elem_bits, boff, row_len;
66 
67     rc_t rc = VCursorCellDataDirect ( ref_curs, ref_row_id, align_ids_idx,
68         & elem_bits, ( const void** ) & cell, & boff, & row_len );
69     if ( rc != 0 )
70         ERROR ( rc, "VCursorCellDataDirect - failed to read row %ld from REFERENCE cursor", ref_row_id );
71     else if ( elem_bits != sizeof * cell * 8 )
72     {
73         rc = RC ( rcExe, rcIndex, rcValidating, rcSize, rcIncorrect );
74         ERROR ( rc, "VCursorCellDataDirect - elem_bits of %u reading row %ld from REFERENCE cursor", elem_bits, ref_row_id );
75     }
76     else if ( boff != 0 )
77     {
78         rc = RC ( rcExe, rcIndex, rcValidating, rcOffset, rcIncorrect );
79         ERROR ( rc, "VCursorCellDataDirect - bit offset of %u reading row %ld from REFERENCE cursor", boff, ref_row_id );
80     }
81     else
82     {
83         uint32_t i;
84         for ( i = 0; i < row_len; ++ i )
85         {
86             if ( cell [ i ] != excl_ref_last_idx + i )
87             {
88                 rc = RC ( rcExe, rcIndex, rcValidating, rcId, rcIncorrect );
89                 ERROR ( rc, "REFERENCE.%s_IDS.%ld: expected id %ld but found %ld",
90                         align_name, ref_row_id, excl_ref_last_idx + i, cell [ i ] );
91                 break;
92             }
93         }
94 
95         excl_ref_last_idx += row_len;
96     }
97 
98     return excl_ref_last_idx;
99 }
100 
101 static
TestAlignCell(const ctx_t * ctx,const VCursor * align_curs,uint32_t ref_id_idx,const char * align_name,int64_t align_row_id,int64_t ref_row_id)102 void TestAlignCell ( const ctx_t *ctx,
103     const VCursor *align_curs, uint32_t ref_id_idx,
104     const char *align_name, int64_t align_row_id, int64_t ref_row_id )
105 {
106     FUNC_ENTRY ( ctx );
107 
108     const int64_t *cell;
109     uint32_t elem_bits, boff, row_len;
110 
111 
112     rc_t rc = VCursorCellDataDirect ( align_curs, align_row_id, ref_id_idx,
113         & elem_bits, ( const void** ) & cell, & boff, & row_len );
114     if ( rc != 0 )
115         ERROR ( rc, "VCursorCellDataDirect - failed to read row %ld from %s cursor", align_row_id, align_name );
116     else if ( elem_bits != sizeof * cell * 8 )
117     {
118         rc = RC ( rcExe, rcIndex, rcValidating, rcSize, rcIncorrect );
119         ERROR ( rc, "VCursorCellDataDirect - elem_bits of %u reading row %ld from %s cursor", elem_bits, align_row_id, align_name );
120     }
121     else if ( boff != 0 )
122     {
123         rc = RC ( rcExe, rcIndex, rcValidating, rcOffset, rcIncorrect );
124         ERROR ( rc, "VCursorCellDataDirect - bit offset of %u reading row %ld from %s cursor", boff, align_row_id, align_name );
125     }
126     else if ( row_len != 1 )
127     {
128         rc = RC ( rcExe, rcIndex, rcValidating, rcRange, rcIncorrect );
129         ERROR ( rc, "VCursorCellDataDirect - row_len of %u reading row %ld from %s cursor", row_len, align_row_id, align_name );
130     }
131     else if ( * cell != ref_row_id )
132     {
133         rc = RC ( rcExe, rcIndex, rcValidating, rcId, rcIncorrect );
134         ERROR ( rc, "%s.REF_ID.%ld: expected id %ld but found %ld",
135                 align_name, align_row_id, ref_row_id, cell [ 0 ] );
136     }
137 }
138 
139 /*--------------------------------------------------------------------------
140  * CrossCheckRefAlignCols
141  *  performs the cross-check
142  */
143 static
CrossCheckRefAlignCols(const ctx_t * ctx,const VCursor * ref_curs,uint32_t align_ids_idx,const VCursor * align_curs,uint32_t ref_id_idx,const char * align_name)144 void CrossCheckRefAlignCols ( const ctx_t *ctx,
145     const VCursor *ref_curs, uint32_t align_ids_idx,
146     const VCursor *align_curs, uint32_t ref_id_idx, const char *align_name )
147 {
148     FUNC_ENTRY ( ctx );
149 
150     int64_t ref_row_id, excl_ref_last_id;
151     rc_t rc = VCursorIdRange ( ref_curs, 0, & ref_row_id, ( uint64_t* ) & excl_ref_last_id );
152     if ( rc != 0 )
153         INTERNAL_ERROR ( rc, "VCursorIdRange - failed to establish row range on REFERENCE cursor" );
154     else
155     {
156         int64_t align_row_id, excl_align_last_id;
157 
158         excl_ref_last_id += ref_row_id;
159 
160         rc = VCursorIdRange ( align_curs, 0, & align_row_id, ( uint64_t* ) & excl_align_last_id );
161         if ( rc != 0 )
162             INTERNAL_ERROR ( rc, "VCursorIdRange - failed to establish row range on %s cursor", align_name );
163         else
164         {
165             int64_t excl_last_align_idx;
166 
167             excl_align_last_id += align_row_id;
168 
169             for ( excl_last_align_idx = 1; ref_row_id < excl_ref_last_id; ++ ref_row_id )
170             {
171                 int64_t first_align_idx = excl_last_align_idx;
172 
173                 /* rule for bailing out */
174                 rc = Quitting ();
175                 if ( rc != 0 || FAILED () )
176                     break;
177 
178                 /* the REFERENCE id cell should be filled purely with sequential ids */
179                 TRY ( excl_last_align_idx = TestReferenceCell ( ctx, ref_curs, align_ids_idx,
180                           align_name, ref_row_id, excl_last_align_idx ) )
181                 {
182                     /* the ids must be within the range of the alignment table */
183                     if ( excl_last_align_idx > excl_align_last_id )
184                     {
185                         rc = RC ( rcExe, rcIndex, rcValidating, rcId, rcExcessive );
186                         ERROR ( rc, "REFERENCE.%s_IDS.%ld: references non-existant rows ( %ld .. %ld : max %ld )",
187                                 align_name, ref_row_id, first_align_idx, excl_last_align_idx, excl_align_last_id );
188                         break;
189                     }
190 
191                     /* this is more of a permanent assert */
192                     if ( first_align_idx != align_row_id )
193                     {
194                         rc = RC ( rcExe, rcIndex, rcValidating, rcId, rcIncorrect );
195                         ERROR ( rc, "REFERENCE.%s_IDS.%ld: expected id %ld but found %ld",
196                                 align_name, ref_row_id, first_align_idx, align_row_id );
197                         break;
198                     }
199 
200                     /* each of the rows in alignment table must point back
201                        to the same row in the REFERENCE table */
202                     for ( ; align_row_id < excl_last_align_idx; ++ align_row_id )
203                     {
204                         ON_FAIL ( TestAlignCell ( ctx, align_curs, ref_id_idx,
205                                       align_name, align_row_id, ref_row_id ) )
206                             break;
207                     }
208                 }
209             }
210 
211             /* at this point, we must have seen every record */
212             if ( ! FAILED () )
213             {
214                 if ( ref_row_id != excl_ref_last_id )
215                 {
216                     rc = RC ( rcExe, rcIndex, rcValidating, rcRange, rcIncomplete );
217                     ERROR ( rc, "REFERENCE.%s_IDS: scan stopped on row %ld of %ld",
218                             align_name, ref_row_id, excl_ref_last_id );
219                 }
220                 if ( align_row_id != excl_align_last_id )
221                 {
222                     rc = RC ( rcExe, rcIndex, rcValidating, rcRange, rcIncomplete );
223                     ERROR ( rc, "%s.REF_ID: scan stopped on row %ld of %ld",
224                             align_name, align_row_id, excl_align_last_id );
225                 }
226             }
227         }
228     }
229 }
230 
231 
232 
233 /*--------------------------------------------------------------------------
234  * CrossCheckRefAlignCurs
235  *  adds columns and opens cursors
236  */
237 static
CrossCheckRefAlignCurs(const ctx_t * ctx,const VCursor * ref_curs,const VCursor * align_curs,const char * align_name)238 void CrossCheckRefAlignCurs ( const ctx_t *ctx,
239     const VCursor *ref_curs, const VCursor *align_curs, const char *align_name )
240 {
241     FUNC_ENTRY ( ctx );
242 
243     uint32_t align_ids_idx;
244     rc_t rc = VCursorAddColumn ( ref_curs, & align_ids_idx, "%s_IDS", align_name );
245     if ( rc != 0 )
246         INTERNAL_ERROR ( rc, "VCursorAddColumn - failed to add column '%s_IDS' to REFERENCE cursor", align_name );
247     else
248     {
249         uint32_t ref_id_idx;
250         rc = VCursorAddColumn ( align_curs, & ref_id_idx, "REF_ID" );
251         if ( rc != 0 )
252             INTERNAL_ERROR ( rc, "VCursorAddColumn - failed to add column 'REF_ID' to %s cursor", align_name );
253         else
254         {
255             rc = VCursorOpen ( ref_curs );
256             if ( rc != 0 )
257                 INTERNAL_ERROR ( rc, "VCursorOpen - failed to open cursor on REFERENCE table" );
258             else
259             {
260                 rc = VCursorOpen ( align_curs );
261                 if ( rc != 0 )
262                     INTERNAL_ERROR ( rc, "VCursorOpen - failed to open cursor on %s table", align_name );
263                 else
264                 {
265                     rc = VCursorSetRowId ( ref_curs, 1 );
266                     if ( rc != 0 )
267                         INTERNAL_ERROR ( rc, "VCursorSetRowId - failed to set row-id on REFERENCE cursor" );
268                     else
269                     {
270                         rc = VCursorSetRowId ( align_curs, 1 );
271                         if ( rc != 0 )
272                             INTERNAL_ERROR ( rc, "VCursorSetRowId - failed to set row-id on %s cursor", align_name );
273                         else
274                         {
275                             CrossCheckRefAlignCols ( ctx, ref_curs, align_ids_idx, align_curs, ref_id_idx, align_name );
276                         }
277                     }
278                 }
279             }
280         }
281     }
282 }
283 
284 
285 /*--------------------------------------------------------------------------
286  * CrossCheckRefAlignTbl
287  *  checks REFERENCE.<name>_IDS for properly sorted form
288  *  runs a cross-check of REFERENCE.<name>_IDS against <name>.REF_ID
289  */
290 static
CrossCheckRefAlignTblInt(const ctx_t * ctx,const VTable * ref_tbl,const VTable * align_tbl,const char * align_name)291 void CrossCheckRefAlignTblInt ( const ctx_t *ctx,
292     const VTable *ref_tbl, const VTable *align_tbl, const char *align_name )
293 {
294     FUNC_ENTRY ( ctx );
295 
296     rc_t rc;
297     const VCursor *ref_curs;
298 
299     rc = VTableCreateCursorRead ( ref_tbl, & ref_curs );
300     if ( rc != 0 )
301         INTERNAL_ERROR ( rc, "VTableCreateCursorRead - failed to open cursor on REFERENCE table" );
302     else
303     {
304         const VCursor *align_curs;
305         rc = VTableCreateCursorRead ( align_tbl, & align_curs );
306         if ( rc != 0 )
307             INTERNAL_ERROR ( rc, "VTableCreateCursorRead - failed to open cursor on %s table", align_name );
308         else
309         {
310             rc = VCursorLinkedCursorSet(align_curs,"REFERENCE",ref_curs);
311             if ( rc != 0 )
312                 INTERNAL_ERROR ( rc, "VCursorLinkedCursorSet - failed to link cursor on REFERENCE table" );
313             else
314             {
315                 CrossCheckRefAlignCurs ( ctx, ref_curs, align_curs, align_name );
316             }
317 
318             VCursorRelease ( align_curs );
319         }
320 
321         VCursorRelease ( ref_curs );
322     }
323 }
324 
325 #if USE_BGTHREAD
326 
327 typedef struct CrossCheckRefAlignTblData CrossCheckRefAlignTblData;
328 struct CrossCheckRefAlignTblData
329 {
330     Caps caps;
331     const VTable *ref_tbl, *align_tbl;
332     char align_name [ 1 ];
333 };
334 
335 static
CrossCheckRefAlignTblRun(const KThread * self,void * data)336 rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data )
337 {
338     CrossCheckRefAlignTblData *pb = data;
339 
340     DECLARE_CTX_INFO ();
341     ctx_t thread_ctx = { & pb -> caps, NULL, & ctx_info };
342     const ctx_t *ctx = & thread_ctx;
343 
344     STATUS ( 2, "running consistency-check on background thread 0x%p", self );
345 
346     CrossCheckRefAlignTblInt ( ctx, pb -> ref_tbl, pb -> align_tbl, pb -> align_name );
347 
348     STATUS ( 2, "finished consistency-check on background thread 0x%p: %s",
349              self, ctx -> rc ? "failure" : "success ");
350 
351     VTableRelease ( pb -> align_tbl );
352     VTableRelease ( pb -> ref_tbl );
353     CapsWhack ( & pb -> caps, ctx );
354     free ( pb );
355 
356     return ctx -> rc;
357 }
358 
359 #endif
360 
CrossCheckRefAlignTbl(const ctx_t * ctx,const VTable * ref_tbl,const VTable * align_tbl,const char * align_name,KThread ** pt)361 void CrossCheckRefAlignTbl ( const ctx_t *ctx,
362     const VTable *ref_tbl, const VTable *align_tbl, const char *align_name,
363     KThread ** pt )
364 {
365     FUNC_ENTRY ( ctx );
366 
367 #if USE_BGTHREAD
368     size_t name_len;
369     CrossCheckRefAlignTblData *pb;
370 #endif
371 
372     STATUS ( 2, "consistency-check on join indices between REFERENCE and %s tables", align_name );
373 
374     assert ( pt );
375     * pt = NULL;
376 
377 #if USE_BGTHREAD
378     name_len = strlen ( align_name );
379     pb = malloc ( sizeof * pb + name_len );
380     if ( pb == NULL ) {
381         rc_t rc = RC ( rcExe, rcMemory, rcAllocating, rcMemory, rcExhausted );
382         INTERNAL_ERROR ( rc, "" );
383     }
384     else {
385         TRY ( CapsInit ( & pb -> caps, ctx ) )
386         {
387             rc_t rc = VTableAddRef ( pb -> ref_tbl = ref_tbl );
388             if ( rc != 0 )
389                 ERROR ( rc, "VTableAddRef failed on REFERENCE table" );
390             else
391             {
392                 rc = VTableAddRef ( pb -> align_tbl = align_tbl );
393                 if ( rc != 0 )
394                     ERROR ( rc, "VTableAddRef failed on %s table", align_name );
395                 else
396                 {
397                     KThread *t;
398 
399                     strcpy ( pb -> align_name, align_name );
400 
401                     rc = KThreadMake ( & t, CrossCheckRefAlignTblRun, pb );
402                     if ( rc == 0 )
403                     {
404                         * pt = t;
405                         return;
406                     }
407 
408                     VTableRelease ( pb -> align_tbl );
409                 }
410 
411                 VTableRelease ( pb -> ref_tbl );
412             }
413 
414             CapsWhack ( & pb -> caps, ctx );
415         }
416 
417         free ( pb );
418     }
419 #else
420     CrossCheckRefAlignTblInt ( ctx, ref_tbl, align_tbl, align_name );
421 #endif
422 }
423