1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include "pl-tools.h"
28 #include <klib/printf.h>
29 #include <sysalloc.h>
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33
34 #include <kdb/database.h>
35 #include <vdb/database.h>
36 #include <vdb/vdb-priv.h>
37
lctx_init(ld_context * lctx)38 void lctx_init( ld_context * lctx )
39 {
40 lctx->xml_logger = NULL;
41 lctx->xml_progress = NULL;
42 lctx->with_progress = false;
43 lctx->total_printed = false;
44 lctx->cache_content = false;
45 lctx->check_src_obj = false;
46 lctx->total_seq_bases = 0;
47 lctx->total_seq_spots = 0;
48 }
49
50
lctx_free(ld_context * lctx)51 void lctx_free( ld_context * lctx )
52 {
53 if ( lctx->xml_logger != NULL )
54 {
55 XMLLogger_Release( lctx->xml_logger );
56 lctx->xml_logger = NULL;
57 }
58 if ( lctx->xml_progress != NULL )
59 {
60 KLoadProgressbar_Release( lctx->xml_progress, false );
61 lctx->xml_progress = NULL;
62 }
63 }
64
65
check_src_objects(const KDirectory * hdf5_dir,const char ** groups,const char ** tables,bool show_not_found)66 rc_t check_src_objects( const KDirectory *hdf5_dir,
67 const char ** groups,
68 const char **tables,
69 bool show_not_found )
70 {
71 rc_t rc = 0;
72 uint16_t idx = 0;
73 uint32_t pt;
74
75 if ( groups != NULL )
76 {
77 while ( groups[ idx ] != NULL && rc == 0 )
78 {
79 pt = KDirectoryPathType ( hdf5_dir, "%s", groups[idx] );
80 if ( pt != kptDir )
81 {
82 rc = RC( rcExe, rcNoTarg, rcAllocating, rcParam, rcInvalid );
83 if ( show_not_found )
84 PLOGERR( klogErr, ( klogErr, rc, "hdf5-group '$(grp)' not found",
85 "grp=%s", groups[ idx ] ) );
86 else
87 PLOGERR( klogWarn, ( klogWarn, rc, "hdf5-group '$(grp)' not found",
88 "grp=%s", groups[ idx ] ) );
89 }
90 else
91 idx++;
92 }
93 }
94
95 idx = 0;
96 if ( tables != NULL && rc == 0 )
97 {
98 while ( tables[ idx ] != NULL && rc == 0 )
99 {
100 pt = KDirectoryPathType ( hdf5_dir, "%s", tables[idx] );
101 if ( pt != kptDataset )
102 {
103 rc = RC( rcExe, rcNoTarg, rcAllocating, rcParam, rcInvalid );
104 if ( show_not_found )
105 PLOGERR( klogErr, ( klogErr, rc, "hdf5-table '$(tbl)' not found",
106 "tbl=%s", tables[ idx ] ) );
107 else
108 PLOGERR( klogWarn, ( klogWarn, rc, "hdf5-table '$(tbl)' not found",
109 "tbl=%s", tables[ idx ] ) );
110 }
111 else
112 idx++;
113 }
114 }
115
116 return rc;
117 }
118
119
init_array_file(af_data * af)120 void init_array_file( af_data * af )
121 {
122 af->f = NULL;
123 af->af = NULL;
124 af->extents = NULL;
125 af->rc = -1;
126 af->content = NULL;
127 }
128
129
free_array_file(af_data * af)130 void free_array_file( af_data * af )
131 {
132 if ( af->af != NULL )
133 {
134 KArrayFileRelease( af->af );
135 af->af = NULL;
136 }
137 if ( af->f != NULL )
138 {
139 KFileRelease( af->f );
140 af->f = NULL;
141 }
142 if ( af->extents != NULL )
143 {
144 free( af->extents );
145 af->extents = NULL;
146 }
147 if ( af->content != NULL )
148 {
149 free( af->content );
150 af->content = NULL;
151 }
152 }
153
154
read_cache_content(af_data * af)155 static rc_t read_cache_content( af_data * af )
156 {
157 rc_t rc = 0;
158 uint64_t filesize = ( af->element_bits >> 3 ) * ( af->extents[ 0 ] );
159 if ( af->dimensionality == 2 )
160 filesize *= af->extents[ 1 ];
161 af->content = malloc( filesize );
162 if ( af->content == NULL )
163 rc = RC ( rcApp, rcArgv, rcAccessing, rcMemory, rcExhausted );
164 else
165 {
166 uint64_t pos2[ 2 ];
167 uint64_t read2[ 2 ];
168 uint64_t count2[ 2 ];
169 rc_t rc;
170
171 pos2[ 0 ] = 0;
172 pos2[ 1 ] = 0;
173 count2[ 0 ] = af->extents[ 0 ];
174 if ( af->dimensionality == 2 )
175 count2[ 1 ] = af->extents[ 1 ];
176 else
177 count2[ 1 ] = 0;
178
179 rc = KArrayFileRead ( af->af, af->dimensionality, pos2,
180 af->content, count2, read2 );
181 if ( rc != 0 )
182 LOGERR( klogErr, rc, "error reading arrayfile-data int cache" );
183 }
184 return rc;
185 }
186
187
open_array_file(const KDirectory * dir,const char * name,af_data * af,const uint64_t expected_element_bits,const uint64_t expected_cols,bool disp_wrong_bitsize,bool cache_content,bool supress_err_msg)188 rc_t open_array_file( const KDirectory *dir,
189 const char *name,
190 af_data * af,
191 const uint64_t expected_element_bits,
192 const uint64_t expected_cols,
193 bool disp_wrong_bitsize,
194 bool cache_content,
195 bool supress_err_msg )
196 {
197 rc_t rc;
198
199 init_array_file( af );
200 /* open the requested "File" (actually a hdf5-table) as KFile
201 the works because the given KDirectory is a HDF5-Directory */
202 rc = KDirectoryOpenFileRead ( dir, &af->f, "%s", name );
203 if ( rc != 0 )
204 {
205 if ( !supress_err_msg )
206 {
207 PLOGERR( klogErr, ( klogErr, rc, "cannot open hdf5-dataset '$(name)'",
208 "name=%s", name ) );
209 }
210 return rc;
211 }
212 /* cast the KFile into a KArrayFile */
213 rc = MakeHDF5ArrayFile ( af->f, &af->af );
214 if ( rc != 0 )
215 {
216 PLOGERR( klogErr, ( klogErr, rc, "cannot open hdf5-arrayfile '$(name)'",
217 "name=%s", name ) );
218 free_array_file( af );
219 return rc;
220 }
221 /* detect the dimensionality of the array-file */
222 rc = KArrayFileDimensionality ( af->af, &af->dimensionality );
223 if ( rc != 0 )
224 {
225 PLOGERR( klogErr, ( klogErr, rc, "cannot retrieve dimensionality on '$(name)'",
226 "name=%s", name ) );
227 free_array_file( af );
228 return rc;
229 }
230 /* make a array to hold the extent in every dimension */
231 af->extents = malloc( af->dimensionality * ( sizeof ( uint64_t ) ) );
232 if ( af->extents == NULL )
233 {
234 rc = RC ( rcApp, rcArgv, rcAccessing, rcMemory, rcExhausted );
235 PLOGERR( klogErr, ( klogErr, rc, "cannot allocate enough memory for extents of '$(name)'",
236 "name=%s", name ) );
237 free_array_file( af );
238 return rc;
239 }
240 /* read the actuall extents into the created array */
241 rc = KArrayFileDimExtents ( af->af, af->dimensionality, af->extents );
242 if ( rc != 0 )
243 {
244 PLOGERR( klogErr, ( klogErr, rc, "cannot retrieve extents of '$(name)'",
245 "name=%s", name ) );
246 free_array_file( af );
247 return rc;
248 }
249 /* request the size of the element in bits */
250 rc = KArrayFileElementSize ( af->af, &af->element_bits );
251 if ( rc != 0 )
252 {
253 PLOGERR( klogErr, ( klogErr, rc, "cannot retrieve element-size of '$(name)'",
254 "name=%s", name ) );
255 free_array_file( af );
256 return rc;
257 }
258 /* compare the discovered bit-size with the expected one */
259 if ( af->element_bits != expected_element_bits )
260 {
261 rc = RC ( rcExe, rcNoTarg, rcLoading, rcData, rcInconsistent );
262
263 /* display the wrong bitsize only if wanted
264 ( this function can be called to probe the bitsize:
265 in this case the wrong one should not be shown as an error )*/
266 if ( disp_wrong_bitsize )
267 PLOGERR( klogErr, ( klogErr, rc, "unexpected element-bits of $(bsize) in '$(name)'",
268 "bsize=%lu,name=%s", af->element_bits, name ) );
269
270 free_array_file( af );
271 return rc;
272 }
273
274 /* not generic, we handle only dimensionality of 1 and 2 */
275 if ( expected_cols == 1 )
276 {
277 /* the dimensionality has to be 1 in this case */
278 if ( af->dimensionality != 1 )
279 {
280 rc = RC ( rcExe, rcNoTarg, rcLoading, rcData, rcInconsistent );
281 PLOGERR( klogErr, ( klogErr, rc, "unexpected dimensionality of $(dim) in '$(name)'",
282 "dim=%lu,name=%s", af->dimensionality, name ) );
283 free_array_file( af );
284 return rc;
285 }
286 }
287 else
288 {
289 /* the dimensionality has to be 2 in this case */
290 if ( af->dimensionality != 2 )
291 {
292 rc = RC ( rcExe, rcNoTarg, rcLoading, rcData, rcInconsistent );
293 PLOGERR( klogErr, ( klogErr, rc, "unexpected dimensionality of $(dim) in '$(name)'",
294 "dim=%lu,name=%s", af->dimensionality, name ) );
295 free_array_file( af );
296 return rc;
297 }
298 else
299 {
300 if ( af->extents[ 1 ] != expected_cols )
301 {
302 rc = RC ( rcExe, rcNoTarg, rcLoading, rcData, rcInconsistent );
303 PLOGERR( klogErr, ( klogErr, rc, "unexpected extent[1] of $(ext) in '$(name)'",
304 "ext=%lu,name=%s", af->extents[ 1 ], name ) );
305 free_array_file( af );
306 return rc;
307 }
308 }
309 }
310 if ( rc == 0 && cache_content )
311 {
312 rc = read_cache_content( af );
313 }
314 return rc;
315 }
316
317
318 /* assembles the 'absolute' path to the requested array-file before opening it */
open_element(const KDirectory * hdf5_dir,af_data * element,const char * path,const char * name,const uint64_t expected_element_bits,const uint64_t expected_cols,bool disp_wrong_bitsize,bool cache_content,bool supress_err_msg)319 rc_t open_element( const KDirectory *hdf5_dir,
320 af_data *element,
321 const char * path,
322 const char * name,
323 const uint64_t expected_element_bits,
324 const uint64_t expected_cols,
325 bool disp_wrong_bitsize,
326 bool cache_content,
327 bool supress_err_msg )
328 {
329 char src_path[ 64 ];
330 size_t num_writ;
331
332 element->rc = string_printf ( src_path, sizeof src_path, &num_writ, "%s/%s", path, name );
333 if ( element->rc != 0 )
334 LOGERR( klogErr, element->rc, "cannot assemble hdf5-element-name" );
335 else
336 element->rc = open_array_file( hdf5_dir, src_path, element,
337 expected_element_bits, expected_cols,
338 disp_wrong_bitsize,
339 cache_content,
340 supress_err_msg );
341 return element->rc;
342 }
343
344
345 /* we are reading data from an array-file,
346 the underlying array-file knows the size of an element */
array_file_read_dim1(af_data * af,const uint64_t pos,void * dst,const uint64_t count,uint64_t * n_read)347 rc_t array_file_read_dim1( af_data * af, const uint64_t pos,
348 void *dst, const uint64_t count,
349 uint64_t *n_read )
350 {
351 rc_t rc = 0;
352 if ( af->content == NULL )
353 rc = KArrayFileRead ( af->af, 1, &pos, dst, &count, n_read );
354 else
355 {
356 if ( ( pos + count ) > af->extents[ 0 ] )
357 rc = RC ( rcExe, rcNoTarg, rcLoading, rcData, rcInconsistent );
358 else
359 {
360 uint64_t buf_idx = ( af->element_bits >> 3 ) * pos;
361 size_t num = ( af->element_bits >> 3 ) * count;
362 char * src = af->content;
363 src+=buf_idx;
364 memmove( dst, src, num );
365 *n_read = count;
366 }
367 }
368 if ( rc != 0 )
369 LOGERR( klogErr, rc, "error reading arrayfile-data (1 dim)" );
370 return rc;
371 }
372
373
374 /* we are reading values in 2 dimensions from the array-file */
array_file_read_dim2(af_data * af,const uint64_t pos,void * dst,const uint64_t count,const uint64_t ext2,uint64_t * n_read)375 rc_t array_file_read_dim2( af_data * af, const uint64_t pos,
376 void *dst, const uint64_t count,
377 const uint64_t ext2, uint64_t *n_read )
378 {
379 rc_t rc = 0;
380 if ( af->content == NULL )
381 {
382 uint64_t pos2[ 2 ];
383 uint64_t read2[ 2 ];
384 uint64_t count2[ 2 ];
385 rc_t rc;
386
387 pos2[ 0 ] = pos;
388 pos2[ 1 ] = 0;
389 count2[ 0 ] = count;
390 count2[ 1 ] = ext2;
391 rc = KArrayFileRead ( af->af, 2, pos2, dst, count2, read2 );
392 if ( rc != 0 )
393 LOGERR( klogErr, rc, "error reading arrayfile-data (2 dim)" );
394 *n_read = read2[ 0 ];
395 }
396 else
397 {
398 if ( ( pos + count ) > af->extents[ 0 ] )
399 rc = RC ( rcExe, rcNoTarg, rcLoading, rcData, rcInconsistent );
400 else
401 {
402 uint64_t buf_idx = ( af->element_bits >> 3 ) * pos * af->extents[ 1 ];
403 size_t num = ( af->element_bits >> 3 ) * count * af->extents[ 1 ];
404 char * src = af->content;
405 src+=buf_idx;
406 memmove( dst, src, num );
407 *n_read = count * af->extents[ 1 ];
408 }
409 }
410 return rc;
411 }
412
413
add_columns(VCursor * cursor,uint32_t count,int32_t exclude_this,uint32_t * idx_vector,const char ** names)414 rc_t add_columns( VCursor * cursor, uint32_t count, int32_t exclude_this,
415 uint32_t * idx_vector, const char ** names )
416 {
417 rc_t rc = 0;
418 uint32_t i;
419 for ( i = 0; i < count && rc == 0; ++i )
420 {
421 if ( i != exclude_this )
422 {
423 rc = VCursorAddColumn( cursor, &(idx_vector[i]), "%s", names[i] );
424 if ( rc != 0 )
425 PLOGERR( klogErr, ( klogErr, rc, "cannot add column '$(name)' to vdb-cursor",
426 "name=%s", names[i] ) );
427 }
428 }
429 return rc;
430 }
431
check_table_count(af_data * tab,const char * name,const uint64_t expected)432 bool check_table_count( af_data *tab, const char * name,
433 const uint64_t expected )
434 {
435 bool res = ( tab->extents != NULL );
436 if ( res )
437 {
438 res = ( tab->extents[ 0 ] == expected );
439 if ( !res )
440 {
441 rc_t rc = RC( rcExe, rcNoTarg, rcAllocating, rcParam, rcInvalid );
442 PLOGERR( klogErr, ( klogErr, rc, "'$(name)'.count != expected",
443 "name=%s", name ) );
444 }
445 }
446 else
447 {
448 PLOGMSG( klogWarn, ( klogWarn, "Table ... '$(name)' not found", "name=%s", name ) );
449 }
450 return res;
451 }
452
453
transfer_bits(VCursor * cursor,const uint32_t col_idx,af_data * src,char * buffer,const uint64_t offset,const uint64_t count,const uint32_t n_bits,const char * explanation)454 rc_t transfer_bits( VCursor *cursor, const uint32_t col_idx,
455 af_data *src, char * buffer, const uint64_t offset, const uint64_t count,
456 const uint32_t n_bits, const char * explanation )
457 {
458 uint64_t n_read;
459 rc_t rc = array_file_read_dim1( src, offset, buffer, count, &n_read );
460 if ( rc == 0 )
461 {
462 if ( count != n_read )
463 {
464 rc = RC( rcExe, rcNoTarg, rcAllocating, rcParam, rcInvalid );
465 PLOGERR( klogErr, ( klogErr, rc, "cannot read enought data from hdf5-table for '$(name)'",
466 "name=%s", explanation ) );
467 }
468 if ( rc == 0 )
469 {
470 rc = VCursorWrite( cursor, col_idx, n_bits, buffer, 0, count );
471 if ( rc != 0 )
472 PLOGERR( klogErr, ( klogErr, rc, "cannot write data to vdb for '$(name)'",
473 "name=%s", explanation ) );
474 }
475 }
476 return rc;
477 }
478
479
vdb_write_value(VCursor * cursor,const uint32_t col_idx,void * src,const uint32_t n_bits,const uint32_t n_elem,const char * explanation)480 rc_t vdb_write_value( VCursor *cursor, const uint32_t col_idx,
481 void * src, const uint32_t n_bits,
482 const uint32_t n_elem, const char *explanation )
483 {
484 rc_t rc = VCursorWrite( cursor, col_idx, n_bits, src, 0, n_elem );
485 if ( rc != 0 )
486 PLOGERR( klogErr, ( klogErr, rc, "cannot write data to vdb for '$(name)'",
487 "name=%s", explanation ) );
488 return rc;
489 }
490
491
vdb_write_uint32(VCursor * cursor,const uint32_t col_idx,uint32_t value,const char * explanation)492 rc_t vdb_write_uint32( VCursor *cursor, const uint32_t col_idx,
493 uint32_t value, const char *explanation )
494 {
495 return vdb_write_value( cursor, col_idx, &value, 32, 1, explanation );
496 }
497
498
vdb_write_uint16(VCursor * cursor,const uint32_t col_idx,uint16_t value,const char * explanation)499 rc_t vdb_write_uint16( VCursor *cursor, const uint32_t col_idx,
500 uint16_t value, const char *explanation )
501 {
502 return vdb_write_value( cursor, col_idx, &value, 16, 1, explanation );
503 }
504
505
vdb_write_uint8(VCursor * cursor,const uint32_t col_idx,uint8_t value,const char * explanation)506 rc_t vdb_write_uint8( VCursor *cursor, const uint32_t col_idx,
507 uint8_t value, const char *explanation )
508 {
509 return vdb_write_value( cursor, col_idx, &value, 8, 1, explanation );
510 }
511
512
vdb_write_float32(VCursor * cursor,const uint32_t col_idx,float value,const char * explanation)513 rc_t vdb_write_float32( VCursor *cursor, const uint32_t col_idx,
514 float value, const char *explanation )
515 {
516 return vdb_write_value( cursor, col_idx, &value, 32, 1, explanation );
517 }
518
519
prepare_table(VDatabase * database,VCursor ** cursor,const char * template_name,const char * table_name)520 rc_t prepare_table( VDatabase * database, VCursor ** cursor,
521 const char * template_name,
522 const char * table_name )
523 {
524 VTable * table;
525 rc_t rc = VDatabaseCreateTable( database, &table, template_name,
526 kcmInit | kcmMD5 | kcmParents, "%s", table_name );
527 if ( rc != 0 )
528 {
529 PLOGERR( klogErr, ( klogErr, rc, "cannot create vdb-table '$(name)'",
530 "name=%s", table_name ) );
531 }
532 else
533 {
534 rc = VTableCreateCursorWrite( table, cursor, kcmInsert );
535 if ( rc != 0 )
536 {
537 PLOGERR( klogErr, ( klogErr, rc, "cannot create vdb-cursor for '$(name)'",
538 "name=%s", table_name ) );
539 }
540 VTableRelease( table );
541 }
542 return rc;
543 }
544
545
load_table(VDatabase * database,KDirectory * hdf5_src,ld_context * lctx,const char * template_name,const char * table_name,loader_func func)546 rc_t load_table( VDatabase * database, KDirectory * hdf5_src, ld_context *lctx,
547 const char * template_name, const char * table_name, loader_func func )
548 {
549 VTable * table;
550 rc_t rc = VDatabaseCreateTable( database, &table, template_name,
551 kcmInit | kcmMD5 | kcmParents, "%s", table_name );
552 if ( rc != 0 )
553 PLOGERR( klogErr, ( klogErr, rc, "cannot create vdb-table '$(name)'",
554 "name=%s", table_name ) );
555 else
556 {
557 VCursor * cursor;
558 rc = VTableCreateCursorWrite( table, &cursor, kcmInsert );
559 if ( rc != 0 )
560 PLOGERR( klogErr, ( klogErr, rc, "cannot create vdb-cursor for '$(name)'",
561 "name=%s", table_name ) );
562 else
563 {
564 VTableRelease( table );
565 rc = func( lctx, hdf5_src, cursor, table_name ); /* the callback does the job! */
566 VCursorRelease( cursor );
567 }
568 }
569 return rc;
570 }
571
572
progress_chunk(const KLoadProgressbar ** xml_progress,const uint64_t chunk)573 rc_t progress_chunk( const KLoadProgressbar ** xml_progress, const uint64_t chunk )
574 {
575 rc_t rc;
576 /* release the old progressbar... */
577 if ( *xml_progress != NULL )
578 {
579 KLoadProgressbar_Release( *xml_progress, false );
580 *xml_progress = NULL;
581 }
582 rc = KLoadProgressbar_Make( xml_progress, 0 );
583 if ( rc == 0 )
584 rc = KLoadProgressbar_Append( *xml_progress, chunk );
585 else
586 LOGERR( klogErr, rc, "cannot make KLoadProgressbar" );
587
588 return rc;
589 }
590
591
progress_step(const KLoadProgressbar * xml_progress)592 rc_t progress_step( const KLoadProgressbar * xml_progress )
593 {
594 if ( xml_progress != NULL )
595 return KLoadProgressbar_Process( xml_progress, 1, false );
596 else
597 return 0;
598 }
599
600
print_log_info(const char * info)601 void print_log_info( const char * info )
602 {
603 KLogLevel tmp_lvl = KLogLevelGet();
604 KLogLevelSet( klogInfo );
605 LOGMSG( klogInfo, info );
606 KLogLevelSet( tmp_lvl );
607 }
608
609
610 /* was once intended to make the SEQ-table a alias to the CONSENSU-table,
611 this step was removed, but the decision could be reinstate again
612 because of that the function to do so is still here */
pacbio_make_alias(VDatabase * vdb_db,const char * existing_obj,const char * alias_to_create)613 rc_t pacbio_make_alias( VDatabase * vdb_db,
614 const char *existing_obj, const char *alias_to_create )
615 {
616 KDatabase *kdb;
617 rc_t rc = VDatabaseOpenKDatabaseUpdate ( vdb_db, & kdb );
618 if ( rc == 0 )
619 {
620 rc = KDatabaseAliasTable ( kdb, existing_obj, alias_to_create );
621 KDatabaseRelease ( kdb );
622 }
623 return rc;
624 }
625