1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include "col_by_col.h"
27 
28 #include <klib/log.h>
29 #include <klib/out.h>
30 #include <klib/num-gen.h>
31 #include <vdb/cursor.h>
32 #include <klib/progressbar.h>
33 
34 #include "coldefs.h"
35 #include "cmn.h"
36 
37 #include <sysalloc.h>
38 #include <stdlib.h>
39 #include <string.h>
40 
41 rc_t Quitting( void );  /* because we cannot include <kapp/main.h> where it is defined! */
42 
cbc_diff_column_iter(const col_pair * pair,const VCursor * cur_1,const VCursor * cur_2,const struct diff_ctx * dctx,const struct num_gen_iter * iter,unsigned long int * diffs)43 static rc_t cbc_diff_column_iter( const col_pair * pair, const VCursor * cur_1, const VCursor * cur_2,
44                                   const struct diff_ctx * dctx, const struct num_gen_iter * iter,
45                                   unsigned long int * diffs )
46 {
47     rc_t rc = 0;
48     struct progressbar * progress = NULL;
49     int64_t row_id;
50     uint64_t rows_checked = 0;
51     uint64_t rows_different = 0;
52 
53     if ( dctx -> show_progress )
54         make_progressbar( &progress, 2 );
55 
56     while ( ( rc == 0 ) && ( num_gen_iterator_next( iter, &row_id, &rc ) ) && ( *diffs < dctx -> max_err ) )
57     {
58         if ( rc == 0 ) rc = Quitting();    /* to be able to cancel the loop by signal */
59         if ( rc == 0 )
60         {
61             bool col_equal = true;
62 
63             if ( pair != NULL )
64                 rc = cmn_diff_column( pair, cur_1, cur_2, row_id,  &col_equal );
65 
66             if ( !col_equal )
67             {
68                 if ( rc == 0 )	rc = KOutMsg( "\n" );
69                 rows_different++;
70                 ( *diffs )++;
71             }
72             rows_checked++;
73 
74             if ( progress != NULL )
75             {
76                 uint32_t progress_value;
77                 if ( num_gen_iterator_percent( iter, 2, &progress_value ) == 0 )
78                     update_progressbar( progress, progress_value );
79             }
80 
81         } /* if (!Quitting) */
82     } /* while ( num_gen_iterator_next() ) */
83 
84     if ( rc == 0 )
85         rc = KOutMsg( "\n%,lu rows checked, %,lu rows differ\n", rows_checked, rows_different );
86 
87     if ( progress != NULL ) destroy_progressbar( progress );
88 
89 	return rc;
90 }
91 
cbc_diff_column(col_pair * pair,const VCursor * cur_1,const VCursor * cur_2,const struct diff_ctx * dctx,unsigned long int * diffs)92 static rc_t cbc_diff_column( col_pair * pair, const VCursor * cur_1, const VCursor * cur_2,
93                              const struct diff_ctx * dctx, unsigned long int *diffs )
94 {
95     rc_t rc = VCursorAddColumn( cur_1, &( pair -> idx[ 0 ] ), "%s", pair -> name );
96     if ( rc != 0 )
97     {
98         LOGERR ( klogInt, rc, "VCursorAddColumn( acc #1 ) failed" );
99     }
100     else
101     {
102         rc = VCursorAddColumn( cur_2, &( pair -> idx[ 1 ] ), "%s", pair -> name );
103         if ( rc != 0 )
104         {
105             LOGERR ( klogInt, rc, "VCursorAddColumn( acc #1 ) failed" );
106         }
107         else
108         {
109             rc = VCursorOpen( cur_1 );
110             if ( rc != 0 )
111             {
112                 LOGERR ( klogInt, rc, "VCursorOpen( acc #1 ) failed" );
113             }
114             else
115             {
116                 rc = VCursorOpen( cur_2 );
117                 if ( rc != 0 )
118                 {
119                     LOGERR ( klogInt, rc, "VCursorOpen( acc #2 ) failed" );
120                 }
121                 else
122                 {
123                     struct num_gen * rows_to_diff = NULL;
124                     rc = cmn_make_num_gen( cur_1, cur_2, pair->idx[0], pair->idx[1], dctx -> rows, &rows_to_diff );
125                     if ( rc == 0 && rows_to_diff != NULL )
126                     {
127                         const struct num_gen_iter * iter = NULL;
128                         rc = num_gen_iterator_make( rows_to_diff, &iter );
129                         if ( rc != 0 )
130                         {
131                             LOGERR ( klogInt, rc, "num_gen_iterator_make() failed" );
132                         }
133                         else if ( iter != NULL )
134                         {
135                             /* *************************************************************** */
136                             rc = cbc_diff_column_iter( pair, cur_1, cur_2, dctx, iter, diffs );
137                             /* *************************************************************** */
138                             num_gen_iterator_destroy( iter );
139                         }
140                         num_gen_destroy( rows_to_diff );
141                     }
142                 }
143             }
144         }
145     }
146     return rc;
147 }
148 
cbc_diff_columns(const col_defs * defs,const VTable * tab_1,const VTable * tab_2,const struct diff_ctx * dctx,const char * tablename,unsigned long int * diffs)149 rc_t cbc_diff_columns( const col_defs * defs, const VTable * tab_1, const VTable * tab_2,
150                        const struct diff_ctx * dctx, const char * tablename, unsigned long int *diffs )
151 {
152     rc_t rc = 0;
153     uint32_t i;
154     uint32_t count = VectorLength( &( defs -> cols ) );
155     for ( i = 0; i < count && rc == 0 && ( *diffs < dctx -> max_err ); ++i )
156     {
157         col_pair * pair = VectorGet( &( defs -> cols ), i );
158         if ( pair != NULL )
159         {
160             rc = KOutMsg( "comparing column '%s.%s'\n", tablename, pair -> name );
161             if ( rc == 0 )
162             {
163                 const VCursor * cur_1;
164                 rc = VTableCreateCursorRead( tab_1, &cur_1 );
165                 if ( rc != 0 )
166                 {
167                     LOGERR ( klogInt, rc, "VTableCreateCursorRead( acc #1 ) failed" );
168                 }
169                 else
170                 {
171                     const VCursor * cur_2;
172                     rc = VTableCreateCursorRead( tab_2, &cur_2 );
173                     if ( rc != 0 )
174                     {
175                         LOGERR ( klogInt, rc, "VTableCreateCursorRead( acc #2 ) failed" );
176                     }
177                     else
178                     {
179                         /* *************************************************************** */
180                         rc = cbc_diff_column( pair, cur_1, cur_2, dctx, diffs );
181                         /* *************************************************************** */
182                         VCursorRelease( cur_2 );
183                     }
184                     VCursorRelease( cur_1 );
185                 }
186             }
187         }
188     }
189     return rc;
190 }
191