1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #ifndef _h_pl_tools_
28 #define _h_pl_tools_
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 #include <klib/out.h>
35 #include <klib/rc.h>
36 #include <klib/text.h>
37 #include <klib/log.h>
38 #include <vdb/manager.h>
39 #include <vdb/schema.h>
40 #include <vdb/database.h>
41 #include <vdb/table.h>
42 #include <vdb/cursor.h>
43 #include <kfs/file.h>
44 #include <kfs/arrayfile.h>
45 #include <hdf5/kdf5.h>
46 #include <kapp/log-xml.h>
47 #include <kapp/progressbar.h>
48 
49 /* for zmw */
50 #define HOLE_NUMBER_BITSIZE 32
51 #define HOLE_NUMBER_COLS 1
52 
53 #define HOLE_STATUS_BITSIZE 8
54 #define HOLE_STATUS_COLS 1
55 
56 #define HOLE_XY_BITSIZE 16
57 #define HOLE_XY_COLS 2
58 
59 #define NUMEVENT_BITSIZE 32
60 #define NUMEVENT_COLS 1
61 
62 #define NUMPASSES_BITSIZE 32
63 #define NUMPASSES_COLS 1
64 
65 /* for BaseCalls_cmn */
66 #define BASECALL_BITSIZE 8
67 #define BASECALL_COLS 1
68 
69 #define QUALITY_VALUE_BITSIZE 8
70 #define QUALITY_VALUE_COLS 1
71 
72 #define DELETION_QV_BITSIZE 8
73 #define DELETION_QV_COLS 1
74 
75 #define DELETION_TAG_BITSIZE 8
76 #define DELETION_TAG_COLS 1
77 
78 #define INSERTION_QV_BITSIZE 8
79 #define INSERTION_QV_COLS 1
80 
81 #define SUBSTITUTION_QV_BITZISE 8
82 #define SUBSTITUTION_QV_COLS 1
83 
84 #define SUBSTITUTION_TAG_BITSIZE 8
85 #define SUBSTITUTION_TAG_COLS 1
86 
87 /* for regions */
88 #define REGIONS_BITSIZE 32
89 #define REGIONS_COLS 5
90 
91 /* for sequence */
92 #define PRE_BASE_FRAMES_BITSIZE 16
93 #define PRE_BASE_FRAMES_COLS 1
94 
95 #define PULSE_INDEX_BITSIZE_16 16
96 #define PULSE_INDEX_BITSIZE_32 32
97 #define PULSE_INDEX_COLS 1
98 
99 #define WIDTH_IN_FRAMES_BITSIZE 16
100 #define WIDTH_IN_FRAMES_COLS 1
101 
102 /* for metrics */
103 #define BASE_FRACTION_BITSIZE 32
104 #define BASE_FRACTION_COLS 4
105 
106 #define BASE_IPD_BITSIZE 32
107 #define BASE_IPD_COLS 1
108 
109 #define BASE_RATE_BITSIZE 32
110 #define BASE_RATE_COLS 1
111 
112 #define BASE_WIDTH_BITSIZE 32
113 #define BASE_WIDTH_COLS 1
114 
115 #define CM_BAS_QV_BITSIZE 32
116 #define CM_BAS_QV_COLS 4
117 
118 #define CM_DEL_QV_BITSIZE 32
119 #define CM_DEL_QV_COLS 4
120 
121 #define CM_INS_QV_BITSIZE 32
122 #define CM_INS_QV_COLS 4
123 
124 #define CM_SUB_QV_BITSIZE 32
125 #define CM_SUB_QV_COLS 4
126 
127 #define LOCAL_BASE_RATE_BITSIZE 32
128 #define LOCAL_BASE_RATE_COLS 1
129 
130 #define DARK_BASE_RATE_BITSIZE 32
131 #define DARK_BASE_RATE_COLS 1
132 
133 #define HQ_REGION_START_TIME_BITSIZE 32
134 #define HQ_REGION_START_TIME_COLS 1
135 
136 #define HQ_REGION_END_TIME_BITSIZE 32
137 #define HQ_REGION_END_TIME_COLS 1
138 
139 #define HQ_REGION_SNR_BITSIZE 32
140 #define HQ_REGION_SNR_COLS 4
141 
142 #define PRODUCTIVITY_BITSIZE 8
143 #define PRODUCTIVITY_COLS 1
144 
145 #define READ_SCORE_BITSIZE 32
146 #define READ_SCORE_COLS 1
147 
148 #define RM_BAS_QV_BITSIZE 32
149 #define RM_BAS_QV_COLS 1
150 
151 #define RM_DEL_QV_BITSIZE 32
152 #define RM_DEL_QV_COLS 1
153 
154 #define RM_INS_QV_BITSIZE 32
155 #define RM_INS_QV_COLS 1
156 
157 #define RM_SUB_QV_BITSIZE 32
158 #define RM_SUB_QV_COLS 1
159 
160 /* for passes */
161 #define ADAPTER_HIT_AFTER_BITSIZE 8
162 #define ADAPTER_HIT_AFTER_COLS 1
163 
164 #define ADAPTER_HIT_BEFORE_BITSIZE 8
165 #define ADAPTER_HIT_BEFORE_COLS 1
166 
167 #define PASS_DIRECTION_BITSIZE 8
168 #define PASS_DIRECTION_COLS 1
169 
170 #define PASS_NUM_BASES_BITSIZE 32
171 #define PASS_NUM_BASES_COLS 1
172 
173 #define PASS_START_BASE_BITSIZE 32
174 #define PASS_START_BASE_COLS 1
175 
176 typedef struct ld_context
177 {
178     const XMLLogger* xml_logger;
179     const KLoadProgressbar *xml_progress;
180     const char *dst_path;
181     uint64_t total_seq_bases;
182     uint64_t total_seq_spots;
183     bool with_progress;
184     bool total_printed;
185     bool cache_content;
186     bool check_src_obj;
187 } ld_context;
188 
189 
190 void lctx_init( ld_context * lctx );
191 void lctx_free( ld_context * lctx );
192 
193 
194 rc_t check_src_objects( const KDirectory *hdf5_dir,
195                         const char ** groups,
196                         const char **tables,
197                         bool show_not_found );
198 
199 typedef struct af_data
200 {
201     struct KFile const *f;      /* the fake "file" from a HDF5-dir */
202     struct KArrayFile *af;      /* the arrayfile made from f */
203     rc_t rc;
204     uint8_t dimensionality;     /* how many dimensions the HDF5-dataset has */
205     uint64_t * extents;         /* the extension in every dimension */
206     uint64_t element_bits;      /* how big in bits is the element */
207     void * content;             /* read the whole thing into memory */
208 } af_data;
209 
210 
211 void init_array_file( af_data * af );
212 void free_array_file( af_data * af );
213 
214 rc_t open_array_file( const KDirectory *dir,
215                       const char *name,
216                       af_data * af,
217                       const uint64_t expected_element_bits,
218                       const uint64_t expected_cols,
219                       bool disp_wrong_bitsize,
220                       bool cache_content,
221                       bool supress_err_msg );
222 
223 rc_t open_element( const KDirectory *hdf5_dir,
224                    af_data *element,
225                    const char * path,
226                    const char * name,
227                    const uint64_t expected_element_bits,
228                    const uint64_t expected_cols,
229                    bool disp_wrong_bitsize,
230                    bool cache_content,
231                    bool supress_err_msg );
232 
233 rc_t array_file_read_dim1( af_data * af, const uint64_t pos,
234                            void *dst, const uint64_t count,
235                            uint64_t *n_read );
236 
237 rc_t array_file_read_dim2( af_data * af, const uint64_t pos,
238                            void *dst, const uint64_t count,
239                            const uint64_t ext2, uint64_t *n_read );
240 
241 rc_t add_columns( VCursor * cursor, uint32_t count, int32_t exclude_this,
242                   uint32_t * idx_vector, const char ** names );
243 
244 bool check_table_count( af_data *tab, const char * name,
245                         const uint64_t expected );
246 
247 rc_t transfer_bits( VCursor *cursor, const uint32_t col_idx,
248     af_data *src, char * buffer, const uint64_t offset, const uint64_t count,
249     const uint32_t n_bits, const char * explanation );
250 
251 rc_t vdb_write_value( VCursor *cursor, const uint32_t col_idx,
252                       void * src, const uint32_t n_bits,
253                       const uint32_t n_elem, const char *explanation );
254 
255 rc_t vdb_write_uint32( VCursor *cursor, const uint32_t col_idx,
256                        uint32_t value, const char *explanation );
257 
258 rc_t vdb_write_uint16( VCursor *cursor, const uint32_t col_idx,
259                        uint16_t value, const char *explanation );
260 
261 rc_t vdb_write_uint8( VCursor *cursor, const uint32_t col_idx,
262                       uint8_t value, const char *explanation );
263 
264 rc_t vdb_write_float32( VCursor *cursor, const uint32_t col_idx,
265                         float value, const char *explanation );
266 
267 typedef rc_t (*loader_func)( ld_context *lctx,
268                              KDirectory * hdf5_src, VCursor * cursor,
269                              const char * table_name );
270 
271 rc_t prepare_table( VDatabase * database, VCursor ** cursor,
272                     const char * template_name,
273                     const char * table_name );
274 
275 rc_t load_table( VDatabase * database, KDirectory * hdf5_src, ld_context *lctx,
276                  const char * template_name, const char * table_name,
277                  loader_func func );
278 
279 rc_t progress_chunk( const KLoadProgressbar ** xml_progress, const uint64_t chunk );
280 rc_t progress_step( const KLoadProgressbar * xml_progress );
281 
282 void print_log_info( const char * info );
283 
284 rc_t pacbio_make_alias( VDatabase * vdb_db,
285                         const char *existing_obj, const char *alias_to_create );
286 
287 #ifdef __cplusplus
288 }
289 #endif
290 
291 #endif
292