1 /*=========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government have not placed any restriction on its use or reproduction. 12 * 13 * Although all reasonable efforts have been taken to ensure the accuracy 14 * and reliability of the software and data, the NLM and the U.S. 15 * Government do not and cannot warrant the performance or results that 16 * may be obtained by using this software or data. The NLM and the U.S. 17 * Government disclaim all warranties, express or implied, including 18 * warranties of performance, merchantability or fitness for any particular 19 * purpose. 20 * 21 * Please cite the author in any work or product based on this material. 22 * 23 * =========================================================================== 24 * 25 */ 26 27 #ifndef _h_pl_tools_ 28 #define _h_pl_tools_ 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 #include <klib/out.h> 35 #include <klib/rc.h> 36 #include <klib/text.h> 37 #include <klib/log.h> 38 #include <vdb/manager.h> 39 #include <vdb/schema.h> 40 #include <vdb/database.h> 41 #include <vdb/table.h> 42 #include <vdb/cursor.h> 43 #include <kfs/file.h> 44 #include <kfs/arrayfile.h> 45 #include <hdf5/kdf5.h> 46 #include <kapp/log-xml.h> 47 #include <kapp/progressbar.h> 48 49 /* for zmw */ 50 #define HOLE_NUMBER_BITSIZE 32 51 #define HOLE_NUMBER_COLS 1 52 53 #define HOLE_STATUS_BITSIZE 8 54 #define HOLE_STATUS_COLS 1 55 56 #define HOLE_XY_BITSIZE 16 57 #define HOLE_XY_COLS 2 58 59 #define NUMEVENT_BITSIZE 32 60 #define NUMEVENT_COLS 1 61 62 #define NUMPASSES_BITSIZE 32 63 #define NUMPASSES_COLS 1 64 65 /* for BaseCalls_cmn */ 66 #define BASECALL_BITSIZE 8 67 #define BASECALL_COLS 1 68 69 #define QUALITY_VALUE_BITSIZE 8 70 #define QUALITY_VALUE_COLS 1 71 72 #define DELETION_QV_BITSIZE 8 73 #define DELETION_QV_COLS 1 74 75 #define DELETION_TAG_BITSIZE 8 76 #define DELETION_TAG_COLS 1 77 78 #define INSERTION_QV_BITSIZE 8 79 #define INSERTION_QV_COLS 1 80 81 #define SUBSTITUTION_QV_BITZISE 8 82 #define SUBSTITUTION_QV_COLS 1 83 84 #define SUBSTITUTION_TAG_BITSIZE 8 85 #define SUBSTITUTION_TAG_COLS 1 86 87 /* for regions */ 88 #define REGIONS_BITSIZE 32 89 #define REGIONS_COLS 5 90 91 /* for sequence */ 92 #define PRE_BASE_FRAMES_BITSIZE 16 93 #define PRE_BASE_FRAMES_COLS 1 94 95 #define PULSE_INDEX_BITSIZE_16 16 96 #define PULSE_INDEX_BITSIZE_32 32 97 #define PULSE_INDEX_COLS 1 98 99 #define WIDTH_IN_FRAMES_BITSIZE 16 100 #define WIDTH_IN_FRAMES_COLS 1 101 102 /* for metrics */ 103 #define BASE_FRACTION_BITSIZE 32 104 #define BASE_FRACTION_COLS 4 105 106 #define BASE_IPD_BITSIZE 32 107 #define BASE_IPD_COLS 1 108 109 #define BASE_RATE_BITSIZE 32 110 #define BASE_RATE_COLS 1 111 112 #define BASE_WIDTH_BITSIZE 32 113 #define BASE_WIDTH_COLS 1 114 115 #define CM_BAS_QV_BITSIZE 32 116 #define CM_BAS_QV_COLS 4 117 118 #define CM_DEL_QV_BITSIZE 32 119 #define CM_DEL_QV_COLS 4 120 121 #define CM_INS_QV_BITSIZE 32 122 #define CM_INS_QV_COLS 4 123 124 #define CM_SUB_QV_BITSIZE 32 125 #define CM_SUB_QV_COLS 4 126 127 #define LOCAL_BASE_RATE_BITSIZE 32 128 #define LOCAL_BASE_RATE_COLS 1 129 130 #define DARK_BASE_RATE_BITSIZE 32 131 #define DARK_BASE_RATE_COLS 1 132 133 #define HQ_REGION_START_TIME_BITSIZE 32 134 #define HQ_REGION_START_TIME_COLS 1 135 136 #define HQ_REGION_END_TIME_BITSIZE 32 137 #define HQ_REGION_END_TIME_COLS 1 138 139 #define HQ_REGION_SNR_BITSIZE 32 140 #define HQ_REGION_SNR_COLS 4 141 142 #define PRODUCTIVITY_BITSIZE 8 143 #define PRODUCTIVITY_COLS 1 144 145 #define READ_SCORE_BITSIZE 32 146 #define READ_SCORE_COLS 1 147 148 #define RM_BAS_QV_BITSIZE 32 149 #define RM_BAS_QV_COLS 1 150 151 #define RM_DEL_QV_BITSIZE 32 152 #define RM_DEL_QV_COLS 1 153 154 #define RM_INS_QV_BITSIZE 32 155 #define RM_INS_QV_COLS 1 156 157 #define RM_SUB_QV_BITSIZE 32 158 #define RM_SUB_QV_COLS 1 159 160 /* for passes */ 161 #define ADAPTER_HIT_AFTER_BITSIZE 8 162 #define ADAPTER_HIT_AFTER_COLS 1 163 164 #define ADAPTER_HIT_BEFORE_BITSIZE 8 165 #define ADAPTER_HIT_BEFORE_COLS 1 166 167 #define PASS_DIRECTION_BITSIZE 8 168 #define PASS_DIRECTION_COLS 1 169 170 #define PASS_NUM_BASES_BITSIZE 32 171 #define PASS_NUM_BASES_COLS 1 172 173 #define PASS_START_BASE_BITSIZE 32 174 #define PASS_START_BASE_COLS 1 175 176 typedef struct ld_context 177 { 178 const XMLLogger* xml_logger; 179 const KLoadProgressbar *xml_progress; 180 const char *dst_path; 181 uint64_t total_seq_bases; 182 uint64_t total_seq_spots; 183 bool with_progress; 184 bool total_printed; 185 bool cache_content; 186 bool check_src_obj; 187 } ld_context; 188 189 190 void lctx_init( ld_context * lctx ); 191 void lctx_free( ld_context * lctx ); 192 193 194 rc_t check_src_objects( const KDirectory *hdf5_dir, 195 const char ** groups, 196 const char **tables, 197 bool show_not_found ); 198 199 typedef struct af_data 200 { 201 struct KFile const *f; /* the fake "file" from a HDF5-dir */ 202 struct KArrayFile *af; /* the arrayfile made from f */ 203 rc_t rc; 204 uint8_t dimensionality; /* how many dimensions the HDF5-dataset has */ 205 uint64_t * extents; /* the extension in every dimension */ 206 uint64_t element_bits; /* how big in bits is the element */ 207 void * content; /* read the whole thing into memory */ 208 } af_data; 209 210 211 void init_array_file( af_data * af ); 212 void free_array_file( af_data * af ); 213 214 rc_t open_array_file( const KDirectory *dir, 215 const char *name, 216 af_data * af, 217 const uint64_t expected_element_bits, 218 const uint64_t expected_cols, 219 bool disp_wrong_bitsize, 220 bool cache_content, 221 bool supress_err_msg ); 222 223 rc_t open_element( const KDirectory *hdf5_dir, 224 af_data *element, 225 const char * path, 226 const char * name, 227 const uint64_t expected_element_bits, 228 const uint64_t expected_cols, 229 bool disp_wrong_bitsize, 230 bool cache_content, 231 bool supress_err_msg ); 232 233 rc_t array_file_read_dim1( af_data * af, const uint64_t pos, 234 void *dst, const uint64_t count, 235 uint64_t *n_read ); 236 237 rc_t array_file_read_dim2( af_data * af, const uint64_t pos, 238 void *dst, const uint64_t count, 239 const uint64_t ext2, uint64_t *n_read ); 240 241 rc_t add_columns( VCursor * cursor, uint32_t count, int32_t exclude_this, 242 uint32_t * idx_vector, const char ** names ); 243 244 bool check_table_count( af_data *tab, const char * name, 245 const uint64_t expected ); 246 247 rc_t transfer_bits( VCursor *cursor, const uint32_t col_idx, 248 af_data *src, char * buffer, const uint64_t offset, const uint64_t count, 249 const uint32_t n_bits, const char * explanation ); 250 251 rc_t vdb_write_value( VCursor *cursor, const uint32_t col_idx, 252 void * src, const uint32_t n_bits, 253 const uint32_t n_elem, const char *explanation ); 254 255 rc_t vdb_write_uint32( VCursor *cursor, const uint32_t col_idx, 256 uint32_t value, const char *explanation ); 257 258 rc_t vdb_write_uint16( VCursor *cursor, const uint32_t col_idx, 259 uint16_t value, const char *explanation ); 260 261 rc_t vdb_write_uint8( VCursor *cursor, const uint32_t col_idx, 262 uint8_t value, const char *explanation ); 263 264 rc_t vdb_write_float32( VCursor *cursor, const uint32_t col_idx, 265 float value, const char *explanation ); 266 267 typedef rc_t (*loader_func)( ld_context *lctx, 268 KDirectory * hdf5_src, VCursor * cursor, 269 const char * table_name ); 270 271 rc_t prepare_table( VDatabase * database, VCursor ** cursor, 272 const char * template_name, 273 const char * table_name ); 274 275 rc_t load_table( VDatabase * database, KDirectory * hdf5_src, ld_context *lctx, 276 const char * template_name, const char * table_name, 277 loader_func func ); 278 279 rc_t progress_chunk( const KLoadProgressbar ** xml_progress, const uint64_t chunk ); 280 rc_t progress_step( const KLoadProgressbar * xml_progress ); 281 282 void print_log_info( const char * info ); 283 284 rc_t pacbio_make_alias( VDatabase * vdb_db, 285 const char *existing_obj, const char *alias_to_create ); 286 287 #ifdef __cplusplus 288 } 289 #endif 290 291 #endif 292