1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <svdb/extern.h>
28 #include <svdb/svdb.h>
29
30 #include <klib/rc.h>
31 #include <klib/log.h>
32 #include <klib/out.h>
33 #include <klib/debug.h>
34 #include <klib/text.h>
35 #include <klib/vector.h>
36 #include <klib/pack.h>
37 #include <klib/printf.h>
38
39 #include <kfs/directory.h>
40
41 #include <kdb/meta.h>
42 #include <kdb/namelist.h>
43
44 #include <vdb/manager.h>
45 #include <vdb/schema.h>
46 #include <vdb/database.h>
47 #include <vdb/table.h>
48 #include <vdb/cursor.h>
49
50 #include <sra/sraschema.h>
51 #include <sra/sradb.h>
52 #include <sra/pacbio.h>
53
54 #include <vfs/manager.h>
55 #include <vfs/resolver.h>
56 #include <vfs/path.h>
57
58 #include <sysalloc.h>
59
60 #include <stdlib.h>
61 #include <stdio.h>
62 #include <ctype.h>
63 #include <os-native.h>
64 #include <string.h>
65 #include <bitstr.h>
66
67 #define NTABS 8
68 #define NCOLUMNS 8
69 #define NMETACHILDS 4
70 #define NTYPES 8
71 #define DEF_ELEM_SEPARATOR ","
72 #define DEF_DIM_SEPARATOR "[]"
73 #define INVALID_COL 0xFFFFFFFF
74 #define INVALID_ROW 0xFFFFFFFFFFFFFFFF
75
76
77 bool print_err = false;
78 char last_err[ 1024 ];
79
80 /* forward decl's */
81 struct svdb_conn;
82 typedef struct svdb_conn* p_svdb_conn;
83
84 struct svdb_tab;
85 typedef struct svdb_tab* p_svdb_tab;
86
87 struct svdb_col;
88 typedef struct svdb_col* p_svdb_col;
89
90 struct svdb_type;
91 typedef struct svdb_type* p_svdb_type;
92
93 struct svdb_meta;
94 typedef struct svdb_meta* p_svdb_meta;
95
96
97 typedef const char * (*value_trans_func_t)( const uint32_t id );
98 typedef char * (*dim_trans_func_t)( const uint8_t *src );
99
100
101 typedef struct print_ctx
102 {
103 char * buf;
104 int buf_len;
105 int needed;
106 int printed;
107 } print_ctx;
108 typedef print_ctx* p_print_ctx;
109
110
111 typedef struct data_src
112 {
113 uint32_t elem_bits;
114 uint32_t boff;
115 uint32_t row_len;
116 uint32_t elem_idx;
117 const void * base;
118 } data_src;
119 typedef data_src* p_data_src;
120
121
122 typedef struct svdb_type
123 {
124 const char * name;
125 VTypedecl typedecl;
126 VTypedesc typedesc;
127 p_svdb_col col;
128 } svdb_type;
129
130
131 typedef struct svdb_col
132 {
133 const char * name;
134 const char * cast;
135 const char * elem_separator;
136 const char * dim_separator;
137 const char * cur_typedecl;
138 p_svdb_tab tab;
139 uint32_t cur_idx;
140 int enabled;
141 int open;
142 int visible;
143 Vector types;
144 uint32_t dflt_type_idx;
145 VTypedecl cursor_typedecl;
146 VTypedesc cursor_typedesc;
147 value_trans_func_t value_trans;
148 dim_trans_func_t dim_trans;
149 } svdb_col;
150
151
152 typedef struct svdb_meta
153 {
154 Vector childs;
155 int id;
156 const char * name;
157 const void * value;
158 int value_len;
159 } svdb_meta;
160
161
162 typedef struct svdb_tab
163 {
164 const VTable * tab;
165 const VCursor * cursor;
166 const char * name;
167 p_svdb_meta meta;
168 uint64_t range;
169 int64_t first;
170 int meta_id;
171 p_svdb_conn conn;
172 Vector all_columns;
173 Vector static_columns;
174 Vector non_static_columns;
175 Vector visible_columns;
176 Vector non_visible_columns;
177 } svdb_tab;
178
179
180 typedef struct svdb_conn
181 {
182 char * path;
183 KDirectory * dir;
184 const VDBManager * mgr;
185 VSchema * schema;
186 const VDatabase * db;
187 int is_db;
188 Vector tables;
189 } svdb_conn;
190
191
192 /* special translations of SRA-types into clear text */
193 const char SRA_PB_HS_0[] = { "SRA_PACBIO_HOLE_SEQUENCING" };
194 const char SRA_PB_HS_1[] = { "SRA_PACBIO_HOLE_ANTIHOLE" };
195 const char SRA_PB_HS_2[] = { "SRA_PACBIO_HOLE_FIDUCIAL" };
196 const char SRA_PB_HS_3[] = { "SRA_PACBIO_HOLE_SUSPECT" };
197 const char SRA_PB_HS_4[] = { "SRA_PACBIO_HOLE_ANTIMIRROR" };
198 const char SRA_PB_HS_5[] = { "SRA_PACBIO_HOLE_FDZMW" };
199 const char SRA_PB_HS_6[] = { "SRA_PACBIO_HOLE_FBZMW" };
200 const char SRA_PB_HS_7[] = { "SRA_PACBIO_HOLE_ANTIBEAMLET" };
201 const char SRA_PB_HS_8[] = { "SRA_PACBIO_HOLE_OUTSIDEFOV" };
202 const char SRA_PB_HS_9[] = { "unknown hole-status" };
203
sra_trans_hole_status(const uint32_t id)204 static const char *sra_trans_hole_status( const uint32_t id )
205 {
206 switch( id )
207 {
208 case SRA_PACBIO_HOLE_SEQUENCING : return( SRA_PB_HS_0 ); break;
209 case SRA_PACBIO_HOLE_ANTIHOLE : return( SRA_PB_HS_1 ); break;
210 case SRA_PACBIO_HOLE_FIDUCIAL : return( SRA_PB_HS_2 ); break;
211 case SRA_PACBIO_HOLE_SUSPECT : return( SRA_PB_HS_3 ); break;
212 case SRA_PACBIO_HOLE_ANTIMIRROR : return( SRA_PB_HS_4 ); break;
213 case SRA_PACBIO_HOLE_FDZMW : return( SRA_PB_HS_5 ); break;
214 case SRA_PACBIO_HOLE_FBZMW : return( SRA_PB_HS_6 ); break;
215 case SRA_PACBIO_HOLE_ANTIBEAMLET : return( SRA_PB_HS_7 ); break;
216 case SRA_PACBIO_HOLE_OUTSIDEFOV : return( SRA_PB_HS_8 ); break;
217 }
218 return( SRA_PB_HS_9 );
219 }
220
221 const char SRA_PF_0[] = { "SRA_PLATFORM_UNDEFINED" };
222 const char SRA_PF_1[] = { "SRA_PLATFORM_454" };
223 const char SRA_PF_2[] = { "SRA_PLATFORM_ILLUMINA" };
224 const char SRA_PF_3[] = { "SRA_PLATFORM_ABSOLID" };
225 const char SRA_PF_4[] = { "SRA_PLATFORM_COMPLETE_GENOMICS" };
226 const char SRA_PF_5[] = { "SRA_PLATFORM_HELICOS" };
227 const char SRA_PF_6[] = { "SRA_PLATFORM_PACBIO_SMRT" };
228 const char SRA_PF_7[] = { "SRA_PLATFORM_ION_TORRENT" };
229 const char SRA_PF_8[] = { "SRA_PLATFORM_CAPILLARY" };
230 const char SRA_PF_9[] = { "SRA_PLATFORM_OXFORD_NANOPORE" };
231 const char SRA_PF_N[] = { "unknown platform" };
232
sra_trans_platform(const uint32_t id)233 static const char *sra_trans_platform( const uint32_t id )
234 {
235 switch( id )
236 {
237 case 0 : return( SRA_PF_0 ); break;
238 case 1 : return( SRA_PF_1 ); break;
239 case 2 : return( SRA_PF_2 ); break;
240 case 3 : return( SRA_PF_3 ); break;
241 case 4 : return( SRA_PF_4 ); break;
242 case 5 : return( SRA_PF_5 ); break;
243 case 6 : return( SRA_PF_6 ); break;
244 case 7 : return( SRA_PF_7 ); break;
245 case 8 : return( SRA_PF_8 ); break;
246 case 9 : return( SRA_PF_9 ); break;
247 }
248 return( SRA_PF_N );
249 }
250
251
252 const char SRA_RT_0[] = { "SRA_READ_TYPE_TECHNICAL" };
253 const char SRA_RT_1[] = { "SRA_READ_TYPE_BIOLOGICAL" };
254 const char SRA_RT_2[] = { "SRA_READ_TYPE_TECHNICAL|SRA_READ_TYPE_FORWARD" };
255 const char SRA_RT_3[] = { "SRA_READ_TYPE_BIOLOGICAL|SRA_READ_TYPE_FORWARD" };
256 const char SRA_RT_4[] = { "SRA_READ_TYPE_TECHNICAL|SRA_READ_TYPE_REVERSE" };
257 const char SRA_RT_5[] = { "SRA_READ_TYPE_BIOLOGICAL|SRA_READ_TYPE_REVERSE" };
258 const char SRA_RT_6[] = { "unknown read-type" };
259
sra_trans_read_type(const uint32_t id)260 static const char *sra_trans_read_type( const uint32_t id )
261 {
262 switch( id )
263 {
264 case 0 : return( SRA_RT_0 ); break;
265 case 1 : return( SRA_RT_1 ); break;
266 case 2 : return( SRA_RT_2 ); break;
267 case 3 : return( SRA_RT_3 ); break;
268 case 4 : return( SRA_RT_4 ); break;
269 case 5 : return( SRA_RT_5 ); break;
270 }
271 return( SRA_RT_6 );
272 }
273
274
275 const char SRA_FT_0[] = { "SRA_READ_FILTER_PASS" };
276 const char SRA_FT_1[] = { "SRA_READ_FILTER_REJECT" };
277 const char SRA_FT_2[] = { "SRA_READ_FILTER_CRITERIA" };
278 const char SRA_FT_3[] = { "SRA_READ_FILTER_REDACTED" };
279 const char SRA_FT_4[] = { "unknown read-filter" };
280
sra_trans_read_filter(const uint32_t id)281 static const char *sra_trans_read_filter( const uint32_t id )
282 {
283 switch( id )
284 {
285 case 0 : return( SRA_FT_0 ); break;
286 case 1 : return( SRA_FT_1 ); break;
287 case 2 : return( SRA_FT_2 ); break;
288 case 3 : return( SRA_FT_3 ); break;
289 }
290 return( SRA_FT_4 );
291 }
292
293
294 /* hardcoded values taken from asm-trace/interface/sra/sradb.h */
295 #define SRA_KEY_PLATFORM_ID "INSDC:SRA:platform_id"
296 #define SRA_KEY_XREAD_TYPE "INSDC:SRA:xread_type"
297 #define SRA_KEY_READ_TYPE "INSDC:SRA:read_type"
298 #define SRA_KEY_READ_FILTER "INSDC:SRA:read_filter"
299 #define SRA_PACBIO_HOLE_STATUS "PacBio:hole:status"
300
vdcd_type_cmp(const VSchema * my_schema,VTypedecl * typedecl,const char * to_check)301 static bool vdcd_type_cmp( const VSchema *my_schema, VTypedecl * typedecl, const char * to_check )
302 {
303 VTypedecl type_to_check;
304 rc_t rc = VSchemaResolveTypedecl ( my_schema, &type_to_check, "%s", to_check );
305 if ( rc == 0 )
306 {
307 return VTypedeclToTypedecl ( typedecl, my_schema, &type_to_check, NULL, NULL );
308 }
309 return false;
310 }
311
sra_get_value_trans_func(const VSchema * my_schema,VTypedecl * typedecl)312 static value_trans_func_t sra_get_value_trans_func( const VSchema *my_schema, VTypedecl * typedecl )
313 {
314 value_trans_func_t res = NULL;
315
316 if ( my_schema == NULL ) return res;
317 if ( typedecl == NULL ) return res;
318
319 if ( vdcd_type_cmp( my_schema, typedecl, SRA_KEY_PLATFORM_ID ) )
320 {
321 res = sra_trans_platform;
322 }
323 else if ( vdcd_type_cmp( my_schema, typedecl, SRA_KEY_XREAD_TYPE ) )
324 {
325 res = sra_trans_read_type;
326 }
327 else if ( vdcd_type_cmp( my_schema, typedecl, SRA_KEY_READ_TYPE ) )
328 {
329 res = sra_trans_read_type;
330 }
331 else if ( vdcd_type_cmp( my_schema, typedecl, SRA_KEY_READ_FILTER ) )
332 {
333 res = sra_trans_read_filter;
334 }
335 else if ( vdcd_type_cmp( my_schema, typedecl, SRA_PACBIO_HOLE_STATUS ) )
336 {
337 res = sra_trans_hole_status;
338 }
339 return res;
340 }
341
342
343 /* implementation of the dimension-translation-functions */
sra_read_desc(const uint8_t * src)344 static char * sra_read_desc( const uint8_t * src )
345 {
346 char *res = calloc( 1, 120 );
347 SRAReadDesc desc;
348 memmove( &desc, src, sizeof( desc ) );
349 string_printf ( res, 119, NULL,
350 "seg.start=%u, seg.len=%u, type=%u, cs_key=%u, label=%s",
351 desc.seg.start, desc.seg.len, desc.type,
352 desc.cs_key, desc.label );
353 return res;
354 }
355
356
sra_spot_desc(const uint8_t * src)357 static char * sra_spot_desc( const uint8_t *src )
358 {
359 char *res = calloc( 1, 120 );
360 SRASpotDesc desc;
361 memmove( &desc, src, sizeof( desc ) );
362 string_printf ( res, 119, NULL,
363 "spot_len=%u, fixed_len=%u, signal_len=%u, clip_qual_right=%u, num_reads=%u",
364 desc.spot_len, desc.fixed_len, desc.signal_len,
365 desc.clip_qual_right, desc.num_reads );
366 return res;
367 }
368
369 /* hardcoded values taken from asm-trace/interface/sra/sradb.h */
370 #define SRA_KEY_READ_DESC "NCBI:SRA:ReadDesc"
371 #define SRA_KEY_SPOT_DESC "NCBI:SRA:SpotDesc"
372
sra_get_dim_trans_func(const VSchema * my_schema,VTypedecl * typedecl)373 static dim_trans_func_t sra_get_dim_trans_func( const VSchema *my_schema, VTypedecl * typedecl )
374 {
375 dim_trans_func_t res = NULL;
376
377 if ( my_schema == NULL ) return res;
378 if ( typedecl == NULL ) return res;
379
380 if ( vdcd_type_cmp( my_schema, typedecl, SRA_KEY_READ_DESC ) )
381 {
382 res = sra_read_desc;
383 }
384 else if ( vdcd_type_cmp( my_schema, typedecl, SRA_KEY_SPOT_DESC ) )
385 {
386 res = sra_spot_desc;
387 }
388 return res;
389 }
390
391
svdb_init_print_ctx(p_print_ctx ctx,char * buf,int buf_len)392 static void svdb_init_print_ctx( p_print_ctx ctx, char * buf, int buf_len )
393 {
394 ctx->buf = buf;
395 ctx->buf_len = buf_len;
396 ctx->needed = 0;
397 ctx->printed = 0;
398 }
399
400
401 KLIB_EXTERN int CC string_cmp ( const char *a, size_t asize,
402 const char *b, size_t bsize, uint32_t max_chars );
403
svdb_strcmp(const char * a,const char * b)404 static int svdb_strcmp( const char *a, const char *b )
405 {
406 size_t sa = string_size ( a );
407 size_t sb = string_size ( b );
408 uint32_t max_chars = ( sa > sb ) ? (uint32_t)sa : (uint32_t)sb;
409 return string_cmp ( a, sa, b, sb, max_chars );
410 }
411
412
svdb_set_last_err(const char * s)413 static void svdb_set_last_err( const char * s )
414 {
415 string_copy_measure ( last_err, sizeof last_err, s );
416 }
417
418
419 const char ACC_NOT_FOUND[] = { "accession not found" };
420 char accession_2_path_buffer[ 1024 ];
421
422
log_and_err(rc_t rc,const char * s)423 static void log_and_err( rc_t rc, const char * s )
424 {
425 if ( print_err ) LOGERR( klogInt, rc, s );
426 svdb_set_last_err( s );
427 }
428
429
svdb_accession_2_path(const char * accession)430 MOD_EXPORT const char * CC svdb_accession_2_path( const char * accession )
431 {
432 const char * res = NULL;
433 if ( accession != NULL && accession[0] != 0 )
434 {
435 if ( strchr ( accession, '/' ) == NULL )
436 {
437 VFSManager * vfs_mgr;
438 rc_t rc = VFSManagerMake ( &vfs_mgr );
439 if ( rc != 0 )
440 log_and_err( rc, "VFSManagerMake() failed in svdb_accession_2_path()" );
441 else
442 {
443 VResolver * resolver;
444
445 rc = VFSManagerGetResolver ( vfs_mgr, &resolver );
446 if ( rc != 0 )
447 log_and_err( rc, "VFSManagerGetResolver() failed in svdb_accession_2_path()" );
448 else
449 {
450 VPath * vpath;
451 rc = VFSManagerMakeSysPath ( vfs_mgr, &vpath, accession );
452 if ( rc != 0 )
453 log_and_err( rc, "VFSManagerMakeSysPath() failed in svdb_accession_2_path()" );
454 else
455 {
456 const VPath * local;
457 rc = VResolverQuery ( resolver, 0, vpath, &local, NULL, NULL );
458 if ( rc != 0 )
459 log_and_err( rc, "VResolverQuery() failed in svdb_accession_2_path()" );
460 else
461 {
462 const String * str;
463 rc = VPathMakeString ( local, &str );
464 if ( rc != 0 )
465 log_and_err( rc, "VPathMakeString() failed in svdb_accession_2_path()" );
466 else
467 {
468 res = string_dup ( str->addr, str->size );
469 StringWhack ( str );
470 }
471
472 VPathRelease ( local );
473 }
474 VPathRelease ( vpath );
475 }
476 VResolverRelease ( resolver );
477 }
478 VFSManagerRelease ( vfs_mgr );
479 }
480
481 }
482 }
483 return res;
484 }
485
486
svdb_make_type(p_svdb_col col,const char * name)487 static p_svdb_type svdb_make_type( p_svdb_col col, const char * name )
488 {
489 rc_t rc;
490 p_svdb_type res = calloc( 1, sizeof( svdb_type ) );
491 if ( res == NULL )
492 {
493 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
494 LOGERR( klogInt, rc, "calloc() failed in svdb_make_type()" );
495 }
496 else
497 {
498 res->col = col;
499 res->name = string_dup_measure ( name, NULL );
500 rc = VSchemaResolveTypedecl ( col->tab->conn->schema, &res->typedecl, "%s", name );
501 if ( rc != 0 )
502 {
503 LOGERR( klogInt, rc, "VSchemaResolveTypedecl() failed in svdb_make_type()" );
504 }
505 else
506 {
507 rc = VSchemaDescribeTypedecl ( col->tab->conn->schema, &res->typedesc, &res->typedecl );
508 if ( rc != 0 )
509 {
510 LOGERR( klogInt, rc, "VSchemaDescribeTypedecl() failed in svdb_make_type()" );
511 }
512 }
513 }
514 return res;
515 }
516
517
svdb_free_type(void * item,void * data)518 static void CC svdb_free_type( void * item, void * data )
519 {
520 p_svdb_type t = (p_svdb_type)item;
521 if ( t != NULL )
522 {
523 if ( t->name != NULL )
524 {
525 free( ( void * )t->name );
526 }
527 free( item );
528 }
529 }
530
531
svdb_discover_types(p_svdb_col col)532 static rc_t svdb_discover_types( p_svdb_col col )
533 {
534 KNamelist *type_names;
535 uint32_t dflt_idx;
536 rc_t rc = VTableColumnDatatypes ( col->tab->tab, col->name, &dflt_idx, &type_names );
537 if ( rc == 0 )
538 {
539 uint32_t n;
540 col->dflt_type_idx = dflt_idx;
541 rc = KNamelistCount( type_names, &n );
542 if ( rc == 0 )
543 {
544 uint32_t i;
545 for ( i = 0; i < n && rc == 0; ++i )
546 {
547 const char *type_name;
548 rc = KNamelistGet( type_names, i, &type_name );
549 if ( rc == 0 )
550 {
551 p_svdb_type type = svdb_make_type( col, type_name );
552 if ( type == NULL )
553 {
554 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
555 }
556 else
557 {
558 rc = VectorAppend ( &col->types, NULL, type );
559 if ( rc != 0 )
560 {
561 LOGERR( klogInt, rc, "VectorAppend() failed in svdb_discover_types()" );
562 svdb_free_type( type, NULL );
563 }
564 }
565 }
566 }
567 }
568 KNamelistRelease( type_names );
569 }
570 return rc;
571 }
572
573
svdb_make_col(p_svdb_tab tab,const char * name)574 static p_svdb_col svdb_make_col( p_svdb_tab tab, const char * name )
575 {
576 rc_t rc;
577 p_svdb_col res = calloc( 1, sizeof( svdb_col ) );
578 if ( res == NULL )
579 {
580 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
581 LOGERR( klogInt, rc, "calloc() failed in svdb_make_col()" );
582 }
583 else
584 {
585
586 VectorInit ( &res->types, 0, NTYPES );
587 res->tab = tab;
588 res->name = string_dup_measure ( name, NULL );
589 res->elem_separator = string_dup_measure ( DEF_ELEM_SEPARATOR, NULL );
590 res->dim_separator = string_dup_measure ( DEF_DIM_SEPARATOR, NULL );
591 res->visible = 1;
592 rc = svdb_discover_types( res );
593 if ( rc != 0 )
594 {
595 VectorWhack ( &res->types, svdb_free_type, NULL );
596 free( ( void* )res->name );
597 res = NULL;
598 }
599 }
600 return res;
601 }
602
603
free_if_not_null(const char * s)604 static void free_if_not_null( const char * s )
605 {
606 if ( s != NULL )
607 {
608 free( ( void * ) s );
609 }
610 }
611
612
svdb_free_col(void * item,void * data)613 static void CC svdb_free_col( void * item, void * data )
614 {
615 p_svdb_col c = (p_svdb_col)item;
616 if ( c != NULL )
617 {
618 VectorWhack ( &c->types, svdb_free_type, NULL );
619 free_if_not_null( c->elem_separator );
620 free_if_not_null( c->dim_separator );
621 free_if_not_null( c->cur_typedecl );
622 free_if_not_null( c->cast );
623 free_if_not_null( c->name );
624 free( item );
625 }
626 }
627
628
svdb_discover_columns(p_svdb_tab table)629 static rc_t svdb_discover_columns( p_svdb_tab table )
630 {
631 KNamelist *col_names;
632 rc_t rc = VTableListReadableColumns ( table->tab, &col_names );
633 if ( rc == 0 )
634 {
635 uint32_t n;
636 rc = KNamelistCount( col_names, &n );
637 if ( rc == 0 )
638 {
639 uint32_t i;
640 for ( i = 0; i < n && rc == 0; ++i )
641 {
642 const char *col_name;
643 rc = KNamelistGet( col_names, i, &col_name );
644 if ( rc == 0 )
645 {
646 p_svdb_col col = svdb_make_col( table, col_name );
647 if ( col == NULL )
648 {
649 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
650 }
651 else
652 {
653 rc = VectorAppend ( &table->all_columns, NULL, col );
654 if ( rc != 0 )
655 {
656 LOGERR( klogInt, rc, "VectorAppend() failed in svdb_discover_columns()" );
657 svdb_free_col( col, NULL );
658 }
659 else
660 {
661 VectorAppend ( &table->visible_columns, NULL, col );
662 }
663 }
664 }
665 }
666 }
667 KNamelistRelease( col_names );
668 }
669 return rc;
670 }
671
672
svdb_whack_meta(p_svdb_meta meta)673 static void svdb_whack_meta( p_svdb_meta meta )
674 {
675 if ( meta != NULL )
676 {
677 int i, n = VectorLength( &meta->childs );
678 for ( i = 0; i < n; ++i )
679 {
680 p_svdb_meta child = VectorGet ( &meta->childs, i );
681 if ( child != NULL )
682 {
683 svdb_whack_meta( child ); /* recursion here !!! */
684 }
685 }
686 VectorWhack( &meta->childs, NULL, NULL );
687 free_if_not_null( meta->name );
688 free_if_not_null( meta->value );
689 free( meta );
690 }
691 }
692
693
svdb_read_meta_value(p_svdb_meta m,const KMDataNode * node)694 static void svdb_read_meta_value( p_svdb_meta m, const KMDataNode *node )
695 {
696 char buffer[ 8 ];
697 size_t num_read, remaining;
698
699 /* first try to detect how big the node-value is */
700 rc_t rc = KMDataNodeRead ( node, 0, buffer, sizeof buffer, &num_read, &remaining );
701 if ( rc == 0 )
702 {
703 size_t count = num_read + remaining;
704 if ( count > 0 )
705 {
706 m->value = malloc( count );
707 if ( m->value != NULL )
708 {
709 /* 2nd try to read it really now */
710 rc = KMDataNodeRead ( node, 0, (void *)m->value, count, &num_read, &remaining );
711 if ( rc == 0 )
712 {
713 m->value_len = (uint32_t)count;
714 }
715 else
716 {
717 free( (void *)m->value );
718 m->value = NULL;
719 }
720 }
721 }
722 }
723 }
724
725
svdb_init_meta(p_svdb_tab tab,const KMDataNode * node,const char * name)726 static p_svdb_meta svdb_init_meta( p_svdb_tab tab, const KMDataNode *node, const char * name )
727 {
728 p_svdb_meta res = calloc( 1, sizeof( svdb_meta ) );
729 if ( res != NULL )
730 {
731 VectorInit ( &res->childs, 0, NMETACHILDS );
732 res->id = ( tab->meta_id )++;
733 res->name = string_dup_measure ( name, NULL );
734 svdb_read_meta_value( res, node );
735
736 if ( node != NULL )
737 {
738 KNamelist *names;
739 rc_t rc = KMDataNodeListChild ( node, &names );
740 if ( rc == 0 )
741 {
742 uint32_t i, count;
743 rc = KNamelistCount ( names, & count );
744 for ( i = 0; rc == 0 && i < count; ++ i )
745 {
746 const char *node_path;
747 rc = KNamelistGet ( names, i, & node_path );
748 if ( rc == 0 )
749 {
750 const KMDataNode *child_node;
751 rc = KMDataNodeOpenNodeRead ( node, &child_node, "%s", node_path );
752 if ( rc == 0 )
753 {
754 p_svdb_meta child = svdb_init_meta( tab, child_node, node_path );
755 if ( child != NULL )
756 {
757 rc = VectorAppend ( &res->childs, NULL, child );
758 if ( rc != 0 )
759 {
760 svdb_whack_meta( child );
761 }
762 }
763 KMDataNodeRelease ( child_node );
764 }
765 }
766 }
767 KNamelistRelease( names );
768 }
769 }
770 }
771 return res;
772 }
773
774
svdb_find_meta(p_svdb_meta meta,const int id)775 static p_svdb_meta svdb_find_meta( p_svdb_meta meta, const int id )
776 {
777 p_svdb_meta res = NULL;
778 if ( meta->id == id )
779 {
780 res = meta;
781 }
782 else
783 {
784 int i, n = VectorLength( &meta->childs );
785 for ( i = 0; i < n && res == NULL; ++i )
786 {
787 p_svdb_meta child = VectorGet ( &meta->childs, i );
788 if ( child != NULL )
789 {
790 res = svdb_find_meta( child, id );
791 }
792 }
793 }
794 return res;
795 }
796
797
svdb_discover_meta(p_svdb_tab tab)798 static rc_t svdb_discover_meta( p_svdb_tab tab )
799 {
800 const KMetadata *src_meta;
801 rc_t rc = VTableOpenMetadataRead ( tab->tab, &src_meta );
802 if ( rc == 0 )
803 {
804 const KMDataNode *root;
805 rc = KMetadataOpenNodeRead ( src_meta, &root, NULL );
806 if ( rc == 0 )
807 {
808 tab->meta = svdb_init_meta( tab, root, "/" );
809 KMDataNodeRelease ( root );
810 }
811 KMetadataRelease ( src_meta );
812 }
813 return rc;
814 }
815
816
svdb_make_tab(p_svdb_conn pself,const VTable * tab,const char * name)817 static p_svdb_tab svdb_make_tab( p_svdb_conn pself, const VTable * tab,
818 const char * name )
819 {
820 p_svdb_tab res = calloc( 1, sizeof( svdb_tab ) );
821 if ( res == NULL )
822 {
823 rc_t rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
824 LOGERR( klogInt, rc, "calloc() failed in svdb_make_tab()" );
825 }
826 else
827 {
828 rc_t rc;
829
830 res->conn = pself;
831 res->tab = tab;
832 VectorInit ( &res->all_columns, 0, NCOLUMNS );
833 VectorInit ( &res->static_columns, 0, NCOLUMNS );
834 VectorInit ( &res->non_static_columns, 0, NCOLUMNS );
835 VectorInit ( &res->visible_columns, 0, NCOLUMNS );
836 VectorInit ( &res->non_visible_columns, 0, NCOLUMNS );
837
838 rc = svdb_discover_columns( res );
839 if ( rc == 0 )
840 {
841 rc = svdb_discover_meta( res );
842 if ( rc == 0 )
843 {
844 res->name = string_dup_measure ( name, NULL );
845 }
846 }
847
848 if ( rc != 0 )
849 {
850 VectorWhack ( &res->static_columns, NULL, NULL );
851 VectorWhack ( &res->non_static_columns, NULL, NULL );
852 VectorWhack ( &res->visible_columns, NULL, NULL );
853 VectorWhack ( &res->non_visible_columns, NULL, NULL );
854 VectorWhack ( &res->all_columns, svdb_free_col, NULL );
855 svdb_whack_meta( res->meta );
856 free( res );
857 res = NULL;
858 }
859 }
860 return res;
861 }
862
863
svdb_free_table(void * item,void * data)864 static void CC svdb_free_table( void * item, void * data )
865 {
866 p_svdb_tab t = (p_svdb_tab)item;
867 if ( t != NULL )
868 {
869 rc_t rc;
870 if ( t->cursor != NULL )
871 {
872 rc = VCursorRelease ( t->cursor );
873 if ( rc != 0 )
874 {
875 LOGERR( klogInt, rc, "VCursorRelease() failed in svdb_close_table()" );
876 }
877 }
878 if ( t->tab != NULL )
879 {
880 rc = VTableRelease( t->tab );
881 if ( rc != 0 )
882 {
883 LOGERR( klogInt, rc, "VTableRelease() failed in svdb_close_table()" );
884 }
885 }
886 VectorWhack ( &t->static_columns, NULL, NULL );
887 VectorWhack ( &t->non_static_columns, NULL, NULL );
888 VectorWhack ( &t->visible_columns, NULL, NULL );
889 VectorWhack ( &t->non_visible_columns, NULL, NULL );
890 VectorWhack ( &t->all_columns, svdb_free_col, NULL );
891 svdb_whack_meta( t->meta );
892
893 free_if_not_null( t->name );
894 free( item );
895 }
896 }
897
898
svdb_clear_vector(Vector * v)899 static void svdb_clear_vector( Vector * v )
900 {
901 while ( VectorLength( v ) > 0 )
902 {
903 void * removed;
904 VectorRemove ( v, 0, &removed );
905 }
906 }
907
908
svdb_get_column_vector(p_svdb_tab table,const int selection)909 static Vector * svdb_get_column_vector( p_svdb_tab table, const int selection )
910 {
911 Vector * res = &table->all_columns;
912 switch( selection )
913 {
914 case STATIC_COLUMNS : res = &table->static_columns; break;
915 case NON_STATIC_COLUMNS : res = &table->non_static_columns; break;
916 case VISIBLE_COLUMNS : res = &table->visible_columns; break;
917 case NON_VISIBLE_COLUMNS : res = &table->non_visible_columns; break;
918 }
919 return res;
920 }
921
922
svdb_close(void * self)923 MOD_EXPORT void CC svdb_close( void * self )
924 {
925 p_svdb_conn pself = (p_svdb_conn)self;
926 if ( pself != NULL )
927 {
928 rc_t rc;
929 VectorWhack ( &pself->tables, svdb_free_table, NULL );
930 rc = VDatabaseRelease( pself->db );
931 if ( rc != 0 )
932 {
933 LOGERR( klogInt, rc, "VDatabaseRelease() failed in svdb_close()" );
934 }
935 rc = VSchemaRelease( pself->schema );
936 if ( rc != 0 )
937 {
938 LOGERR( klogInt, rc, "VSchemaRelease() failed in svdb_close()" );
939 }
940 rc = VDBManagerRelease( pself->mgr );
941 if ( rc != 0 )
942 {
943 LOGERR( klogInt, rc, "VDBManagerRelease() failed in svdb_close()" );
944 }
945 rc = KDirectoryRelease( pself->dir );
946 if ( rc != 0 )
947 {
948 LOGERR( klogInt, rc, "KDirectoryRelease() failed in svdb_close()" );
949 }
950 free_if_not_null( pself->path );
951 free( self );
952 }
953 }
954
955
svdb_append_tab(p_svdb_conn pself,const VTable * tab,const char * name)956 static rc_t svdb_append_tab( p_svdb_conn pself, const VTable * tab,
957 const char * name )
958 {
959 rc_t rc = 0;
960 p_svdb_tab t = svdb_make_tab( pself, tab, name );
961 if ( t == NULL )
962 {
963 rc = VTableRelease( tab );
964 if ( rc != 0 )
965 {
966 LOGERR( klogInt, rc, "VTableRelease() failed in svdb_open()" );
967 }
968 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
969 }
970 else
971 {
972 rc = VectorAppend ( &pself->tables, NULL, t );
973 if ( rc != 0 )
974 {
975 LOGERR( klogInt, rc, "VectorAppend() failed in svdb_open()" );
976 svdb_free_table( t, NULL );
977 }
978 }
979 return rc;
980 }
981
982
svdb_filename(const char * path)983 static const char * svdb_filename( const char * path )
984 {
985 const char * res = NULL;
986 if ( path != NULL && path[0] != 0 )
987 {
988 res = strrchr( path, '/' );
989 if ( res == NULL )
990 {
991 res = strrchr( path, '\\' );
992 }
993 if ( res == NULL )
994 {
995 res = path;
996 }
997 else
998 {
999 res++;
1000 }
1001 }
1002 return res;
1003 }
1004
1005
svdb_open_subtables(p_svdb_conn pself)1006 static rc_t svdb_open_subtables( p_svdb_conn pself )
1007 {
1008 KNamelist *tbl_names;
1009 rc_t rc = VDatabaseListTbl( pself->db, &tbl_names );
1010 if ( rc == 0 )
1011 {
1012 uint32_t n;
1013 rc = KNamelistCount( tbl_names, &n );
1014 if ( rc == 0 )
1015 {
1016 uint32_t i;
1017 for ( i = 0; i < n && rc == 0; ++i )
1018 {
1019 const char *tbl_name;
1020 rc = KNamelistGet( tbl_names, i, &tbl_name );
1021 if ( rc == 0 )
1022 {
1023 const VTable * tab;
1024 rc = VDatabaseOpenTableRead( pself->db, &tab, "%s", tbl_name );
1025 if ( rc == 0 )
1026 {
1027 rc = svdb_append_tab( pself, tab, tbl_name );
1028 }
1029 }
1030 }
1031 }
1032 KNamelistRelease( tbl_names );
1033 }
1034 return rc;
1035 }
1036
1037
svdb_open_path(const char * path)1038 MOD_EXPORT void * CC svdb_open_path( const char * path )
1039 {
1040 p_svdb_conn pself = NULL;
1041 if ( path != NULL && path[0] != 0 )
1042 {
1043 rc_t rc;
1044 pself = calloc( 1, sizeof( svdb_conn ) );
1045 if ( pself == NULL )
1046 {
1047 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
1048 LOGERR( klogInt, rc, "calloc() failed in svdb_open()" );
1049 svdb_set_last_err( "calloc() failed in svdb_open()" );
1050 }
1051 else
1052 {
1053 VectorInit ( &pself->tables, 0, NTABS );
1054 rc = KDirectoryNativeDir( &pself->dir );
1055 if ( rc != 0 )
1056 {
1057 LOGERR( klogInt, rc, "KDirectoryNativeDir() failed in svdb_open()" );
1058 svdb_set_last_err( "KDirectoryNativeDir() failed in svdb_open()" );
1059 }
1060 else
1061 {
1062 rc = VDBManagerMakeRead ( &pself->mgr, pself->dir );
1063 if ( rc != 0 )
1064 {
1065 LOGERR( klogInt, rc, "VDBManagerMakeRead() failed in svdb_open()" );
1066 svdb_set_last_err( "VDBManagerMakeRead() failed in svdb_open()" );
1067 }
1068 }
1069 if ( rc == 0 )
1070 {
1071 rc = VDBManagerMakeSRASchema( pself->mgr, &pself->schema );
1072 if ( rc != 0 )
1073 {
1074 LOGERR( klogInt, rc, "VDBManagerMakeSRASchema() failed in svdb_open()" );
1075 svdb_set_last_err( "VDBManagerMakeSRASchema() failed in svdb_open()" );
1076 }
1077 else
1078 {
1079 /* it is OK if these fail, we have no valid path then... */
1080 rc = VDBManagerOpenDBRead( pself->mgr, &pself->db, pself->schema, "%s", path );
1081 if ( rc == 0 )
1082 {
1083 pself->is_db = 1;
1084 rc = svdb_open_subtables( pself );
1085 }
1086 else
1087 {
1088 const VTable * tab;
1089 rc = VDBManagerOpenTableRead( pself->mgr, &tab, pself->schema, "%s", path );
1090 if ( rc == 0 )
1091 {
1092 rc = svdb_append_tab( pself, tab, svdb_filename( path ) );
1093 if ( rc != 0 )
1094 {
1095 svdb_set_last_err( "svdb_append_tab() failed in svdb_open()" );
1096 }
1097 }
1098 else
1099 {
1100 string_printf ( last_err, sizeof last_err, NULL,
1101 "VDBManagerOpenTableRead('%s')->'%R' failed",
1102 path, rc );
1103 /* svdb_set_last_err( "VDBManagerOpenTableRead() failed in svdb_open()" ); */
1104 }
1105 }
1106 }
1107 }
1108
1109 if ( rc == 0 )
1110 {
1111 pself->path = string_dup_measure ( path, NULL );
1112 svdb_set_last_err( "OK" );
1113 }
1114 else
1115 {
1116 svdb_close( pself );
1117 pself = NULL;
1118 }
1119 }
1120 }
1121 else
1122 {
1123 svdb_set_last_err( "path empty in svdb_open()" );
1124 }
1125 return pself;
1126 }
1127
1128
svdb_last_err(void)1129 MOD_EXPORT const char * CC svdb_last_err( void )
1130 {
1131 return last_err;
1132 }
1133
1134
svdb_is_db(void * self)1135 MOD_EXPORT int CC svdb_is_db( void * self )
1136 {
1137 int res = 0;
1138 if ( self != NULL )
1139 {
1140 p_svdb_conn pself = self;
1141 res = pself->is_db;
1142 }
1143 return res;
1144 }
1145
1146
svdb_count_tabs(void * self)1147 MOD_EXPORT int CC svdb_count_tabs( void * self )
1148 {
1149 int res = 0;
1150 if ( self != NULL )
1151 {
1152 p_svdb_conn pself = self;
1153 res = VectorLength( &pself->tables );
1154 }
1155 return res;
1156 }
1157
1158
svdb_get_tab(void * self,const int tab_id)1159 static p_svdb_tab svdb_get_tab( void * self, const int tab_id )
1160 {
1161 p_svdb_tab res = NULL;
1162 if ( self != NULL )
1163 {
1164 p_svdb_conn pself = self;
1165 res = VectorGet ( &pself->tables, tab_id );
1166 }
1167 return res;
1168 }
1169
1170
svdb_tab_meta_root(void * self,const int tab_id)1171 MOD_EXPORT int CC svdb_tab_meta_root( void * self, const int tab_id )
1172 {
1173 int res = -1;
1174 p_svdb_tab t = svdb_get_tab( self, tab_id );
1175 if ( t != NULL )
1176 {
1177 if ( t->meta != NULL )
1178 {
1179 res = t->meta->id;
1180 }
1181 }
1182 return res;
1183 }
1184
1185
svdb_get_meta_node(void * self,const int tab_id,const int meta_id)1186 static p_svdb_meta svdb_get_meta_node( void * self, const int tab_id, const int meta_id )
1187 {
1188 p_svdb_meta m = NULL;
1189 p_svdb_tab t = svdb_get_tab( self, tab_id );
1190 if ( t != NULL )
1191 {
1192 if ( t->meta != NULL )
1193 {
1194 m = svdb_find_meta( t->meta, meta_id );
1195 }
1196 }
1197 return m;
1198 }
1199
1200
svdb_check_printable(const void * ptr,const int len)1201 static int svdb_check_printable( const void * ptr, const int len )
1202 {
1203 int res = 0;
1204 if ( ptr != NULL && len > 0 )
1205 {
1206 int i, j = 0;
1207 const char * cp = ptr;
1208 for ( i = 0; i < len; ++i )
1209 {
1210 if ( !isprint ( cp[ i ] ) )
1211 j++;
1212 }
1213 if ( j == 0 )
1214 res = 1;
1215 else
1216 res = 0;
1217 }
1218 return res;
1219 }
1220
1221
svdb_tab_meta_value_printable(void * self,const int tab_id,const int meta_id)1222 MOD_EXPORT int CC svdb_tab_meta_value_printable( void * self, const int tab_id, const int meta_id )
1223 {
1224 int res = -1;
1225 p_svdb_meta m = svdb_get_meta_node( self, tab_id, meta_id );
1226 if ( m != NULL )
1227 {
1228 res = svdb_check_printable( m->value, m->value_len );
1229 }
1230 return res;
1231
1232 }
1233
1234
svdb_tab_meta_value_len(void * self,const int tab_id,const int meta_id)1235 MOD_EXPORT int CC svdb_tab_meta_value_len( void * self, const int tab_id, const int meta_id )
1236 {
1237 int res = -1;
1238 p_svdb_meta m = svdb_get_meta_node( self, tab_id, meta_id );
1239 if ( m != NULL && m->value != NULL && m->value_len )
1240 {
1241 res = m->value_len;
1242 }
1243 return res;
1244 }
1245
1246
svdb_tab_meta_value_ptr(void * self,const int tab_id,const int meta_id)1247 MOD_EXPORT const char * CC svdb_tab_meta_value_ptr( void * self, const int tab_id,
1248 const int meta_id )
1249 {
1250 const char * res = NULL;
1251 p_svdb_meta m = svdb_get_meta_node( self, tab_id, meta_id );
1252 if ( m != NULL && m->value != NULL && m->value_len )
1253 {
1254 res = ( char * )m->value;
1255 }
1256 return res;
1257 }
1258
1259
svdb_print_hex(char * dst,size_t dstlen,char * src,size_t srclen,size_t trim,size_t * num_writ)1260 static rc_t svdb_print_hex( char * dst, size_t dstlen, char * src, size_t srclen,
1261 size_t trim, size_t * num_writ )
1262 {
1263 rc_t rc = 0;
1264 bool periods = true;
1265 size_t writ, i, n = ( dstlen / 3 );
1266
1267 *num_writ = 0;
1268 if ( n >= srclen )
1269 {
1270 n = srclen;
1271 periods = false;
1272 }
1273 if ( periods )
1274 n--;
1275 for ( i = 0; i < n && rc == 0; ++i )
1276 {
1277 unsigned char x = src[i];
1278 if ( i > 0 )
1279 rc = string_printf ( dst, dstlen, &writ, "-%02X", x );
1280 else
1281 rc = string_printf ( dst, dstlen, &writ, "%02X", x );
1282 if ( rc == 0 )
1283 {
1284 dst += writ;
1285 *num_writ += writ;
1286 dstlen -= writ;
1287 }
1288 }
1289 if ( rc == 0 && periods )
1290 {
1291 rc = string_printf ( dst, dstlen, &writ, "..." );
1292 *num_writ += writ;
1293 }
1294 return rc;
1295 }
1296
1297
svdb_tab_meta_value(void * self,const int tab_id,const int meta_id,char * buf,int buflen,int trim)1298 MOD_EXPORT int CC svdb_tab_meta_value( void * self, const int tab_id, const int meta_id,
1299 char * buf, int buflen, int trim )
1300 {
1301 int res = -1;
1302 p_svdb_meta m = svdb_get_meta_node( self, tab_id, meta_id );
1303 if ( m != NULL && m->value != NULL && m->value_len )
1304 {
1305 rc_t rc;
1306 size_t num_writ;
1307 if ( svdb_check_printable( m->value, m->value_len ) )
1308 {
1309 if ( trim == 0 || trim >= m->value_len )
1310 rc = string_printf ( buf, buflen, &num_writ, "%.*s", m->value_len, m->value );
1311 else
1312 rc = string_printf ( buf, buflen, &num_writ, "%.*s...", trim, m->value );
1313 }
1314 else
1315 {
1316 rc = svdb_print_hex( buf, buflen, (char *)m->value, m->value_len, trim, &num_writ );
1317 }
1318
1319 if ( rc == 0 )
1320 res = ( int )num_writ;
1321 }
1322 return res;
1323 }
1324
1325
svdb_tab_meta_name(void * self,const int tab_id,const int meta_id,char * buf,int buflen)1326 MOD_EXPORT int CC svdb_tab_meta_name( void * self, const int tab_id, const int meta_id,
1327 char * buf, int buflen )
1328 {
1329 int res = -1;
1330 p_svdb_meta m = svdb_get_meta_node( self, tab_id, meta_id );
1331 if ( m != NULL )
1332 {
1333 size_t num_writ;
1334 rc_t rc = string_printf ( buf, buflen, &num_writ, "%s", m->name );
1335 if ( rc == 0 )
1336 {
1337 res = ( int )num_writ;
1338 }
1339 }
1340 return res;
1341 }
1342
1343
svdb_tab_meta_child_count(void * self,const int tab_id,const int meta_id)1344 MOD_EXPORT int CC svdb_tab_meta_child_count( void * self, const int tab_id,
1345 const int meta_id )
1346 {
1347 int res = -1;
1348 p_svdb_meta m = svdb_get_meta_node( self, tab_id, meta_id );
1349 if ( m != NULL )
1350 {
1351 res = VectorLength( &m->childs );
1352 }
1353 return res;
1354 }
1355
1356
svdb_tab_meta_child_id(void * self,const int tab_id,const int meta_id,const int child_idx)1357 MOD_EXPORT int CC svdb_tab_meta_child_id( void * self, const int tab_id,
1358 const int meta_id, const int child_idx )
1359 {
1360 int res = -1;
1361 p_svdb_meta m = svdb_get_meta_node( self, tab_id, meta_id );
1362 if ( m != NULL )
1363 {
1364 p_svdb_meta c = VectorGet( &m->childs, child_idx );
1365 if ( c != NULL )
1366 {
1367 res = c->id;
1368 }
1369 }
1370 return res;
1371 }
1372
1373
svdb_tab_idx(void * self,const char * name)1374 MOD_EXPORT int CC svdb_tab_idx( void * self, const char * name )
1375 {
1376 int res = -1;
1377 if ( self != NULL )
1378 {
1379 p_svdb_conn pself = self;
1380 int i, n = VectorLength( &pself->tables );
1381 for ( i = 0; i < n && res < 0; ++i )
1382 {
1383 p_svdb_tab tab = VectorGet ( &pself->tables, i );
1384 if ( tab != NULL )
1385 {
1386 if ( svdb_strcmp( tab->name, name ) == 0 )
1387 {
1388 res = i;
1389 }
1390 }
1391 }
1392 }
1393 return res;
1394 }
1395
1396
svdb_get_col(void * self,const int tab_id,const int selection,const int col_id)1397 static p_svdb_col svdb_get_col( void * self, const int tab_id,
1398 const int selection, const int col_id )
1399 {
1400 p_svdb_col res = NULL;
1401 p_svdb_tab tab = svdb_get_tab( self, tab_id );
1402 if ( tab != NULL )
1403 {
1404 Vector * v = svdb_get_column_vector( tab, selection );
1405 res = VectorGet ( v, col_id );
1406 }
1407 return res;
1408 }
1409
1410
svdb_set_column_visibility(void * self,const int tab_id,const int selection,const int col_id,const int visible)1411 MOD_EXPORT int CC svdb_set_column_visibility( void * self, const int tab_id,
1412 const int selection, const int col_id, const int visible )
1413 {
1414 int res = 0;
1415 p_svdb_col col = svdb_get_col ( self, tab_id, selection, col_id );
1416 if ( col != NULL )
1417 {
1418 int vis = visible;
1419 if ( vis != 0 )
1420 {
1421 vis = 1;
1422 }
1423 if ( col->visible != vis )
1424 {
1425 int i, n;
1426 p_svdb_tab tab = svdb_get_tab( self, tab_id );
1427 col->visible = vis;
1428 /* clear the visible and invisible vector */
1429 VectorWhack ( &tab->visible_columns, NULL, NULL );
1430 VectorWhack ( &tab->non_visible_columns, NULL, NULL );
1431
1432 VectorInit ( &tab->visible_columns, 0, NCOLUMNS );
1433 VectorInit ( &tab->non_visible_columns, 0, NCOLUMNS );
1434
1435 /* loop through all columns an redistibute them new
1436 ( to preserve the original order ) */
1437 n = VectorLength( &tab->all_columns );
1438 for ( i = 0; i < n; ++i )
1439 {
1440 col = VectorGet ( &tab->all_columns, i );
1441 if ( col != NULL )
1442 {
1443 if ( col->visible != 0 )
1444 {
1445 VectorAppend( &tab->visible_columns, NULL, col );
1446 }
1447 else
1448 {
1449 VectorAppend( &tab->non_visible_columns, NULL, col );
1450 }
1451 }
1452 }
1453 res = 1;
1454 }
1455 }
1456 return res;
1457 }
1458
1459
svdb_col_idx(void * self,const int tab_id,const int selection,const char * name)1460 MOD_EXPORT int CC svdb_col_idx( void * self, const int tab_id,
1461 const int selection, const char * name )
1462 {
1463 int res = -1;
1464 p_svdb_tab tab = svdb_get_tab( self, tab_id );
1465 if ( tab != NULL )
1466 {
1467 Vector * v = svdb_get_column_vector( tab, selection );
1468 int i, n = VectorLength( v );
1469 for ( i = 0; i < n && res < 0; ++i )
1470 {
1471 p_svdb_col col = VectorGet ( v, i );
1472 if ( col != NULL )
1473 {
1474 if ( svdb_strcmp( col->name, name ) == 0 )
1475 {
1476 res = i;
1477 }
1478 }
1479 }
1480 }
1481 return res;
1482 }
1483
1484
svdb_tabname(void * self,const int tab_id)1485 MOD_EXPORT const char * CC svdb_tabname( void * self, const int tab_id )
1486 {
1487 const char * res = NULL;
1488 p_svdb_tab tab = svdb_get_tab( self, tab_id );
1489 if ( tab != NULL )
1490 {
1491 res = tab->name;
1492 }
1493 return res;
1494 }
1495
1496
svdb_count_cols(void * self,const int tab_id,const int selection)1497 MOD_EXPORT int CC svdb_count_cols( void * self, const int tab_id, const int selection )
1498 {
1499 int res = 0;
1500 p_svdb_tab tab = svdb_get_tab( self, tab_id );
1501 if ( tab != NULL )
1502 {
1503 Vector * v = svdb_get_column_vector( tab, selection );
1504 res = VectorLength( v );
1505 }
1506 return res;
1507 }
1508
1509
svdb_colname(void * self,const int tab_id,const int selection,const int col_id)1510 MOD_EXPORT const char * CC svdb_colname( void * self, const int tab_id,
1511 const int selection, const int col_id )
1512 {
1513 const char * res = NULL;
1514 p_svdb_col col = svdb_get_col( self, tab_id, selection, col_id );
1515 if ( col != NULL )
1516 {
1517 res = col->name;
1518 }
1519 return res;
1520 }
1521
1522
svdb_dflt_type_idx(void * self,const int tab_id,const int selection,const int col_id)1523 MOD_EXPORT int CC svdb_dflt_type_idx( void * self, const int tab_id,
1524 const int selection, const int col_id )
1525 {
1526 int res = -1;
1527 p_svdb_col col = svdb_get_col( self, tab_id, selection, col_id );
1528 if ( col != NULL )
1529 {
1530 res = col->dflt_type_idx;
1531 }
1532 return res;
1533 }
1534
1535
svdb_count_types(void * self,const int tab_id,const int selection,const int col_id)1536 MOD_EXPORT int CC svdb_count_types( void * self, const int tab_id,
1537 const int selection, const int col_id )
1538 {
1539 int res = 0;
1540 p_svdb_col col = svdb_get_col( self, tab_id, selection, col_id );
1541 if ( col != NULL )
1542 {
1543 res = VectorLength( &col->types );
1544 }
1545 return res;
1546 }
1547
1548
svdb_get_type(void * self,const int tab_id,const int selection,const int col_id,const int type_id)1549 static p_svdb_type svdb_get_type( void * self, const int tab_id,
1550 const int selection, const int col_id, const int type_id )
1551 {
1552 p_svdb_type res = NULL;
1553 p_svdb_col col = svdb_get_col( self, tab_id, selection, col_id );
1554 if ( col != NULL )
1555 {
1556 res = VectorGet ( &col->types, type_id );
1557 }
1558 return res;
1559 }
1560
1561
svdb_typename(void * self,const int tab_id,const int selection,const int col_id,const int type_id)1562 MOD_EXPORT const char * CC svdb_typename( void * self, const int tab_id,
1563 const int selection, const int col_id, const int type_id )
1564 {
1565 const char * res = NULL;
1566 p_svdb_type type = svdb_get_type( self, tab_id, selection, col_id, type_id );
1567 if ( type != NULL )
1568 {
1569 res = type->name;
1570 }
1571 return res;
1572 }
1573
1574
svdb_type_idx(void * self,const int tab_id,const int selection,const int col_id,const char * name)1575 MOD_EXPORT int CC svdb_type_idx( void * self, const int tab_id,
1576 const int selection, const int col_id, const char * name )
1577 {
1578 int res = -1;
1579 p_svdb_col col = svdb_get_col( self, tab_id, selection, col_id );
1580 if ( col != NULL )
1581 {
1582 int i, n = VectorLength( &col->types );
1583 for ( i = 0; i < n && res < 0; ++i )
1584 {
1585 p_svdb_type type = VectorGet ( &col->types, i );
1586 if ( type != NULL )
1587 {
1588 if ( svdb_strcmp( type->name, name ) == 0 )
1589 {
1590 res = i;
1591 }
1592 }
1593 }
1594 }
1595 return res;
1596 }
1597
1598
svdb_typedomain(void * self,const int tab_id,const int selection,const int col_id,const int type_id)1599 MOD_EXPORT int CC svdb_typedomain( void * self, const int tab_id,
1600 const int selection, const int col_id, const int type_id )
1601 {
1602 int res = -1;
1603 p_svdb_type type = svdb_get_type( self, tab_id, selection, col_id, type_id );
1604 if ( type != NULL )
1605 {
1606 res = type->typedesc.domain;
1607 }
1608 return res;
1609 }
1610
1611
svdb_typebits(void * self,const int tab_id,const int selection,const int col_id,const int type_id)1612 MOD_EXPORT int CC svdb_typebits( void * self, const int tab_id,
1613 const int selection, const int col_id, const int type_id )
1614 {
1615 int res = -1;
1616 p_svdb_type type = svdb_get_type( self, tab_id, selection, col_id, type_id );
1617 if ( type != NULL )
1618 {
1619 res = type->typedesc.intrinsic_bits;
1620 }
1621 return res;
1622 }
1623
1624
svdb_typedim(void * self,const int tab_id,const int selection,const int col_id,const int type_id)1625 MOD_EXPORT int CC svdb_typedim( void * self, const int tab_id,
1626 const int selection, const int col_id, const int type_id )
1627 {
1628 int res = -1;
1629 p_svdb_type type = svdb_get_type( self, tab_id, selection, col_id, type_id );
1630 if ( type != NULL )
1631 {
1632 res = type->typedesc.intrinsic_dim;
1633 }
1634 return res;
1635 }
1636
1637
svdb_replace_string(const char ** dst,const char * s)1638 static void svdb_replace_string( const char **dst, const char * s )
1639 {
1640 free_if_not_null( *dst );
1641 *dst = string_dup_measure ( s, NULL );
1642 }
1643
1644
svdb_set_tab_elem_separator(p_svdb_tab tab,const int selection,const int col_id,const char * s)1645 static void svdb_set_tab_elem_separator( p_svdb_tab tab,
1646 const int selection, const int col_id, const char * s )
1647 {
1648 Vector * v = svdb_get_column_vector( tab, selection );
1649 if ( col_id >= 0 )
1650 {
1651 p_svdb_col col = VectorGet ( v, col_id );
1652 if ( col != NULL )
1653 {
1654 svdb_replace_string( &col->elem_separator, s );
1655 }
1656 }
1657 else
1658 {
1659 int i, n = VectorLength( v );
1660 for ( i = 0; i < n; ++ i )
1661 {
1662 p_svdb_col col = VectorGet ( v, i );
1663 if ( col != NULL )
1664 {
1665 svdb_replace_string( &col->elem_separator, s );
1666 }
1667 }
1668 }
1669 }
1670
1671
svdb_set_elem_separator(void * self,const int tab_id,const int selection,const int col_id,const char * s)1672 MOD_EXPORT void CC svdb_set_elem_separator( void * self,
1673 const int tab_id, const int selection, const int col_id,
1674 const char * s )
1675 {
1676 if ( self != NULL && s != NULL && s[0] != 0 )
1677 {
1678 p_svdb_conn pself = self;
1679 if ( tab_id >= 0 )
1680 {
1681 p_svdb_tab tab = VectorGet ( &pself->tables, tab_id );
1682 if ( tab != NULL )
1683 {
1684 svdb_set_tab_elem_separator( tab, selection, col_id, s );
1685 }
1686 }
1687 else
1688 {
1689 int i, n = VectorLength( &pself->tables );
1690 for ( i = 0; i < n; ++ i )
1691 {
1692 p_svdb_tab tab = VectorGet ( &pself->tables, i );
1693 if ( tab != NULL )
1694 {
1695 svdb_set_tab_elem_separator( tab, selection, col_id, s );
1696 }
1697 }
1698 }
1699 }
1700 }
1701
1702
svdb_set_tab_dim_separator(p_svdb_tab tab,const int selection,const int col_id,const char * s)1703 static void svdb_set_tab_dim_separator( p_svdb_tab tab,
1704 const int selection, const int col_id, const char * s )
1705 {
1706 Vector * v = svdb_get_column_vector( tab, selection );
1707 if ( col_id >= 0 )
1708 {
1709 p_svdb_col col = VectorGet ( v, col_id );
1710 if ( col != NULL )
1711 {
1712 svdb_replace_string( &col->dim_separator, s );
1713 }
1714 }
1715 else
1716 {
1717 int i, n = VectorLength( v );
1718 for ( i = 0; i < n; ++ i )
1719 {
1720 p_svdb_col col = VectorGet ( v, i );
1721 if ( col != NULL )
1722 {
1723 svdb_replace_string( &col->dim_separator, s );
1724 }
1725 }
1726 }
1727 }
1728
1729
svdb_set_dim_separator(void * self,const int tab_id,const int selection,const int col_id,const char * s)1730 MOD_EXPORT void CC svdb_set_dim_separator( void * self,
1731 const int tab_id, const int selection, const int col_id,
1732 const char * s )
1733 {
1734 if ( self != NULL && s != NULL && s[0] != 0 )
1735 {
1736 p_svdb_conn pself = self;
1737 if ( tab_id >= 0 )
1738 {
1739 p_svdb_tab tab = VectorGet ( &pself->tables, tab_id );
1740 if ( tab != NULL )
1741 {
1742 svdb_set_tab_dim_separator( tab, selection, col_id, s );
1743 }
1744 }
1745 else
1746 {
1747 int i, n = VectorLength( &pself->tables );
1748 for ( i = 0; i < n; ++ i )
1749 {
1750 p_svdb_tab tab = VectorGet ( &pself->tables, i );
1751 if ( tab != NULL )
1752 {
1753 svdb_set_tab_dim_separator( tab, selection, col_id, s );
1754 }
1755 }
1756 }
1757 }
1758 }
1759
1760
svdb_is_enabled(void * self,const int tab_id,const int selection,const int col_id)1761 MOD_EXPORT int CC svdb_is_enabled( void * self, const int tab_id,
1762 const int selection, const int col_id )
1763 {
1764 int res = 0;
1765 p_svdb_col col = svdb_get_col( self, tab_id, selection, col_id );
1766 if ( col != NULL )
1767 {
1768 res = col->enabled;
1769 }
1770 return res;
1771 }
1772
1773
svdb_set_columns_usage(p_svdb_tab tab,int enabled)1774 static void svdb_set_columns_usage( p_svdb_tab tab, int enabled )
1775 {
1776 Vector * v = svdb_get_column_vector( tab, 0 );
1777 uint32_t i, n = VectorLength( v );
1778 for ( i = 0; i < n; ++i )
1779 {
1780 p_svdb_col col = VectorGet ( v, i );
1781 if ( col != NULL )
1782 {
1783 col->enabled = enabled;
1784 col->open = 0;
1785 free_if_not_null( col->cast );
1786 col->cast = NULL;
1787 }
1788 }
1789 }
1790
1791
svdb_find_col(p_svdb_tab tab,const char * name)1792 static p_svdb_col svdb_find_col( p_svdb_tab tab, const char *name )
1793 {
1794 p_svdb_col res = NULL;
1795 Vector * v = svdb_get_column_vector( tab, 0 );
1796 uint32_t i, n = VectorLength( v );
1797 for ( i = 0; i < n && res == NULL; ++i )
1798 {
1799 p_svdb_col col = VectorGet ( v, i );
1800 if ( col != NULL )
1801 {
1802 if ( svdb_strcmp( col->name, name ) == 0 )
1803 {
1804 res = col;
1805 }
1806 }
1807 }
1808 return res;
1809 }
1810
1811
svdb_trim(char * s)1812 static void svdb_trim( char * s )
1813 {
1814 int i, l = string_measure( s, NULL );
1815 for ( i = 0; i < l; ++i )
1816 {
1817 if ( s[ i ] == ' ' )
1818 {
1819 s[ i ] = 0;
1820 }
1821 }
1822 while( s[ 0 ] == 0 )
1823 {
1824 memmove( s, &s[1], l );
1825 }
1826 }
1827
svdb_enable_col(p_svdb_tab tab,const char * defline,int start_cast,int end_cast,int start_name,int end_name)1828 static void svdb_enable_col( p_svdb_tab tab, const char *defline,
1829 int start_cast, int end_cast, int start_name, int end_name )
1830 {
1831 char * cast = NULL;
1832 char * name = NULL;
1833 int l = ( end_cast - start_cast );
1834 if ( l > 1 )
1835 {
1836 cast = calloc( 1, l );
1837 if ( cast != NULL )
1838 {
1839 memmove( cast, &defline[ start_cast + 1 ], l-1 );
1840 svdb_trim( cast );
1841 }
1842 }
1843
1844 l = ( end_name - start_name );
1845 if ( l > 0 )
1846 {
1847 name = calloc( 1, l + 1 );
1848 if ( name != NULL )
1849 {
1850 memmove( name, &defline[ start_name ], l );
1851 svdb_trim( name );
1852 }
1853 }
1854
1855 if ( name != NULL )
1856 {
1857 p_svdb_col col = svdb_find_col( tab, name );
1858 if ( col != NULL )
1859 {
1860 col->enabled = true;
1861 free_if_not_null( col->cast );
1862 if ( cast != NULL )
1863 {
1864 col->cast = string_dup_measure ( cast, NULL );
1865 }
1866 }
1867 }
1868
1869 free_if_not_null( cast );
1870 free_if_not_null( name );
1871 }
1872
1873
svdb_defline(p_svdb_tab tab,const char * defline)1874 static void svdb_defline( p_svdb_tab tab, const char *defline )
1875 {
1876 if ( defline == NULL || defline[0] == 0 )
1877 {
1878 /* if defline is empty, enable all columns without a cast */
1879 svdb_set_columns_usage( tab, 1 );
1880 }
1881 else
1882 {
1883 int start_cast = -1;
1884 int end_cast = -1;
1885 int start_name = 0;
1886 int i, l = string_measure ( defline, NULL );
1887
1888 /* disable all columns first, clear all casts */
1889 svdb_set_columns_usage( tab, 0 );
1890 /* enable only what is in the defline "(CAST1)NAME1,(CAST2)NAME2,NAME3"*/
1891 for ( i = 0; i < l; ++i )
1892 {
1893 switch ( defline[ i ] )
1894 {
1895 case '(' : start_cast = i; break;
1896 case ')' : end_cast = i; start_name = i+1; break;
1897 case ',' : svdb_enable_col( tab, defline, start_cast, end_cast, start_name, i );
1898 start_cast = end_cast = -1;
1899 start_name = i+1;
1900 break;
1901 }
1902 }
1903 svdb_enable_col( tab, defline, start_cast, end_cast, start_name, l );
1904 }
1905 }
1906
1907
svdb_add_to_cursor(p_svdb_tab tab)1908 static int svdb_add_to_cursor( p_svdb_tab tab )
1909 {
1910 int res = 0;
1911 rc_t rc = 0;
1912 Vector * v = svdb_get_column_vector( tab, 0 );
1913 uint32_t i, n = VectorLength( v );
1914 for ( i = 0; i < n && rc == 0; ++i )
1915 {
1916 p_svdb_col col = VectorGet ( v, i );
1917 if ( col != NULL && col->enabled )
1918 {
1919 if ( col->cast != NULL )
1920 {
1921 rc = VCursorAddColumn ( tab->cursor, &col->cur_idx, "(%s)%s", col->cast, col->name );
1922 }
1923 else
1924 {
1925 rc = VCursorAddColumn ( tab->cursor, &col->cur_idx, "%s", col->name );
1926 }
1927 if ( rc != 0 )
1928 {
1929 LOGERR( klogInt, rc, "VCursorAddColumn() failed in svdb_add_to_cursor()" );
1930 }
1931 else
1932 {
1933 col->open = 1;
1934 ++res;
1935 }
1936 }
1937 }
1938 return res;
1939 }
1940
1941
svdb_read_cursor_data_types(p_svdb_tab tab)1942 static void svdb_read_cursor_data_types( p_svdb_tab tab )
1943 {
1944 Vector * v = svdb_get_column_vector( tab, 0 );
1945 uint32_t i, n = VectorLength( v );
1946 for ( i = 0; i < n; ++i )
1947 {
1948 p_svdb_col col = VectorGet ( v, i );
1949 if ( col != NULL && col->open )
1950 {
1951 rc_t rc = VCursorDatatype ( tab->cursor, col->cur_idx,
1952 &col->cursor_typedecl, &col->cursor_typedesc );
1953 free_if_not_null( col->cur_typedecl );
1954 if ( rc == 0 )
1955 {
1956 char buf[ 64 ];
1957 rc = VTypedeclToText( &col->cursor_typedecl, tab->conn->schema,
1958 buf, sizeof( buf ) );
1959 if ( rc == 0 )
1960 {
1961 col->cur_typedecl = string_dup_measure ( buf, NULL );
1962 col->value_trans = sra_get_value_trans_func( tab->conn->schema, &col->cursor_typedecl );
1963 col->dim_trans = sra_get_dim_trans_func( tab->conn->schema, &col->cursor_typedecl );
1964 }
1965 }
1966 }
1967 }
1968 }
1969
1970
svdb_discover_static_columns(p_svdb_tab tab)1971 static void svdb_discover_static_columns( p_svdb_tab tab )
1972 {
1973 Vector * v = svdb_get_column_vector( tab, ALL_COLUMNS );
1974 uint32_t idx, i, n = VectorLength( v );
1975 int64_t first;
1976 uint64_t range;
1977
1978 tab->first = 0;
1979 tab->range = 0;
1980 for ( i = 0; i < n; ++i )
1981 {
1982 p_svdb_col col = VectorGet ( v, i );
1983 if ( col != NULL && col->open )
1984 {
1985 rc_t rc = VCursorIdRange ( tab->cursor, col->cur_idx, &first, &range );
1986 if ( rc == 0 )
1987 {
1988 if ( range == 0 )
1989 {
1990 VectorAppend ( &tab->static_columns, &idx, col );
1991 }
1992 else
1993 {
1994 VectorAppend ( &tab->non_static_columns, &idx, col );
1995 if ( tab->range == 0 )
1996 {
1997 tab->first = first;
1998 tab->range = range;
1999 }
2000 }
2001 }
2002 }
2003 }
2004 }
2005
2006
svdb_open_table(void * self,const int tab_id,const char * defline)2007 MOD_EXPORT int CC svdb_open_table( void * self, const int tab_id,
2008 const char *defline )
2009 {
2010 int res = -1;
2011 p_svdb_tab tab = svdb_get_tab( self, tab_id );
2012 if ( tab != NULL )
2013 {
2014 rc_t rc;
2015 svdb_clear_vector( &tab->static_columns );
2016 svdb_clear_vector( &tab->non_static_columns );
2017 svdb_defline( tab, defline );
2018 if ( tab->cursor != NULL )
2019 {
2020 rc = VCursorRelease( tab->cursor );
2021 if ( rc != 0 )
2022 {
2023 LOGERR( klogInt, rc, "VCursorRelease() failed in svdb_open_table()" );
2024 }
2025 }
2026 rc = VTableCreateCursorRead ( tab->tab, &tab->cursor );
2027 if ( rc != 0 )
2028 {
2029 LOGERR( klogInt, rc, "VTableCreateCursorRead() failed in svdb_open_table()" );
2030 }
2031 else
2032 {
2033 res = svdb_add_to_cursor( tab );
2034 if ( res > 0 )
2035 {
2036 rc = VCursorOpen ( tab->cursor );
2037 if ( rc == 0 )
2038 {
2039 svdb_read_cursor_data_types( tab );
2040 svdb_discover_static_columns( tab );
2041 }
2042 else
2043 {
2044 LOGERR( klogInt, rc, "VCursorOpen() failed in svdb_open_table()" );
2045 res = -1;
2046 }
2047 }
2048 }
2049 }
2050 return res;
2051 }
2052
2053
svdb_max_colname_length(void * self,const int tab_id,const int selection)2054 MOD_EXPORT int CC svdb_max_colname_length( void * self,
2055 const int tab_id, const int selection )
2056 {
2057 int res = 0;
2058 p_svdb_tab tab = svdb_get_tab( self, tab_id );
2059 if ( tab != NULL )
2060 {
2061 Vector * v = svdb_get_column_vector( tab, selection );
2062 uint32_t i, n = VectorLength( v );
2063 for ( i = 0; i < n; ++i )
2064 {
2065 p_svdb_col col = VectorGet ( v, i );
2066 if ( col != NULL && col->enabled )
2067 {
2068 int l = string_measure ( col->name, NULL );
2069 if ( l > res )
2070 {
2071 res = l;
2072 }
2073 }
2074 }
2075 }
2076 return res;
2077 }
2078
2079
svdb_row_range(void * self,const int tab_id)2080 MOD_EXPORT unsigned long long int CC svdb_row_range( void * self,
2081 const int tab_id )
2082 {
2083 unsigned long long int res = 0;
2084 p_svdb_tab tab = svdb_get_tab( self, tab_id );
2085 if ( tab != NULL )
2086 {
2087 res = tab->range;
2088 }
2089 return res;
2090 }
2091
2092
svdb_first_row(void * self,const int tab_id)2093 MOD_EXPORT signed long long int CC svdb_first_row( void * self,
2094 const int tab_id )
2095 {
2096 signed long long int res = 0;
2097 p_svdb_tab tab = svdb_get_tab( self, tab_id );
2098 if ( tab != NULL )
2099 {
2100 res = tab->first;
2101 }
2102 return res;
2103 }
2104
svdb_print_char(p_print_ctx dst,const char c)2105 static void svdb_print_char( p_print_ctx dst, const char c )
2106 {
2107 if ( dst->printed < dst->buf_len )
2108 {
2109 dst->buf[ ( dst->printed )++ ] = c;
2110 }
2111 ( dst->needed )++;
2112 }
2113
2114
svdb_print_str(p_print_ctx dst,const char * s)2115 static void svdb_print_str( p_print_ctx dst, const char * s )
2116 {
2117 while( *s > 0 )
2118 {
2119 svdb_print_char( dst, *(s++) );
2120 }
2121 }
2122
2123
2124 #define BYTE_OFFSET(VALUE) ( (VALUE) >> 3 )
2125 #define BIT_OFFSET(VALUE) ( (VALUE) & 0x7 )
2126
2127 uint8_t BitLength2Bytes[65] =
2128 {
2129 /* 0 1 2 3 4 5 6 7 8 9*/
2130 /* 0 */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 2,
2131 /* 1 */ 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
2132 /* 2 */ 3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
2133 /* 3 */ 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
2134 /* 4 */ 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
2135 /* 5 */ 7, 7, 7, 7, 7, 7, 7, 8, 8, 8,
2136 /* 6 */ 8, 8, 8, 8, 8
2137 };
2138
2139 /*************************************************************************************
2140 n_bits [IN] ... number of bits
2141
2142 calculates the number of bytes that have to be copied to contain the given
2143 number of bits
2144 *************************************************************************************/
bitlength_2_bytes(const size_t n_bits)2145 static uint16_t bitlength_2_bytes( const size_t n_bits )
2146 {
2147 if ( n_bits > 64 )
2148 return 8;
2149 else
2150 return BitLength2Bytes[ n_bits ];
2151 }
2152
2153 uint64_t BitLength2Mask[33] =
2154 {
2155 /* 0 */ 0x00,
2156 /* 1 .. 4 */ 0x1, 0x3, 0x7, 0xF,
2157 /* 5 .. 8 */ 0x1F, 0x3F, 0x7F, 0xFF,
2158 /* 9 .. 12 */ 0x1FF, 0x3FF, 0x7FF, 0xFFF,
2159 /*13 .. 16 */ 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
2160 /*17 .. 20 */ 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF,
2161 /*21 .. 24 */ 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF,
2162 /*25 .. 28 */ 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF,
2163 /*29 .. 32 */ 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
2164 };
2165
2166 /*************************************************************************************
2167 n_bits [IN] ... number of bits
2168
2169 creates a bitmask to mask exactly the given number of bits from a longer value
2170 *************************************************************************************/
bitlength_2_mask(const size_t n_bits)2171 static uint64_t bitlength_2_mask( const size_t n_bits )
2172 {
2173 uint64_t res;
2174 if ( n_bits < 33 )
2175 res = BitLength2Mask[ n_bits ];
2176 else
2177 {
2178 if ( n_bits < 65 )
2179 res = BitLength2Mask[ n_bits-32 ];
2180 else
2181 res = 0xFFFFFFFF;
2182 res <<= 32;
2183 res |= 0xFFFFFFFF;
2184 }
2185 return res;
2186 }
2187
2188
move_to_value(void * dst,p_data_src src)2189 static void move_to_value( void * dst, p_data_src src )
2190 {
2191 int ofs = ( src->boff + ( src->elem_bits * src->elem_idx ) );
2192 char *src_ptr = ( char* )src->base + BYTE_OFFSET( ofs );
2193 if ( BIT_OFFSET( ofs ) == 0 )
2194 {
2195 memmove( dst, src_ptr, bitlength_2_bytes( src->elem_bits ) );
2196 }
2197 else
2198 {
2199 bitcpy ( dst, 0, src_ptr, BIT_OFFSET( ofs ), src->elem_bits );
2200 }
2201 }
2202
2203
move_to_uint64(p_data_src src)2204 static uint64_t move_to_uint64( p_data_src src )
2205 {
2206 uint64_t value = 0;
2207 move_to_value( &value, src );
2208 if ( src->elem_bits & 7 )
2209 {
2210 size_t unpacked = 0;
2211 Unpack( src->elem_bits, sizeof( value ), &value, 0, src->elem_bits,
2212 NULL, &value, sizeof(value), &unpacked );
2213 }
2214 value &= bitlength_2_mask( src->elem_bits );
2215 return value;
2216 }
2217
2218
print_boolean_element(p_svdb_col col,p_print_ctx dst,p_data_src src)2219 static void print_boolean_element( p_svdb_col col, p_print_ctx dst, p_data_src src )
2220 {
2221 uint64_t value = move_to_uint64( src );
2222 if ( value == 0 )
2223 svdb_print_char( dst, '0' );
2224 else
2225 svdb_print_char( dst, '1' );
2226 }
2227
2228
print_uint_element(p_svdb_col col,p_print_ctx dst,p_data_src src)2229 static void print_uint_element( p_svdb_col col, p_print_ctx dst, p_data_src src )
2230 {
2231 uint64_t value = move_to_uint64( src );
2232 if ( src->elem_idx > 0 )
2233 {
2234 svdb_print_str( dst, col->elem_separator );
2235 }
2236 if ( col->value_trans != NULL )
2237 {
2238
2239 const char * s = col->value_trans( (uint32_t)value );
2240 if ( s != NULL )
2241 {
2242 svdb_print_str( dst, s );
2243 }
2244 }
2245 else
2246 {
2247 char buf[ 32 ];
2248 size_t num_writ;
2249 if ( string_printf ( buf, sizeof( buf ), &num_writ, "%u", value ) == 0 )
2250 {
2251 svdb_print_str( dst, buf );
2252 }
2253 }
2254 }
2255
2256
print_int_element(p_svdb_col col,p_print_ctx dst,p_data_src src)2257 static void print_int_element( p_svdb_col col, p_print_ctx dst, p_data_src src )
2258 {
2259 int64_t value = (int64_t)move_to_uint64( src );
2260 if ( src->elem_idx > 0 )
2261 {
2262 svdb_print_str( dst, col->elem_separator );
2263 }
2264 if ( col->value_trans != NULL )
2265 {
2266 const char * s = col->value_trans( (uint32_t)value );
2267 if ( s != NULL )
2268 {
2269 svdb_print_str( dst, s );
2270 }
2271 }
2272 else
2273 {
2274 rc_t rc;
2275 char buf[ 32 ];
2276 size_t num_writ;
2277 switch ( src->elem_bits )
2278 {
2279 case 8 : {
2280 int8_t temp = (int8_t)value;
2281 rc = string_printf ( buf, sizeof( buf ), &num_writ, "%d", temp );
2282 }
2283 break;
2284 case 16 : {
2285 int16_t temp = (int16_t)value;
2286 rc = string_printf ( buf, sizeof( buf ), &num_writ, "%d", temp );
2287 }
2288 break;
2289 case 32 : {
2290 int32_t temp = (int32_t)value;
2291 rc = string_printf ( buf, sizeof( buf ), &num_writ, "%d", temp );
2292 }
2293 break;
2294 case 64 : {
2295 rc = string_printf ( buf, sizeof( buf ), &num_writ, "%ld", value );
2296 }
2297 break;
2298 default : {
2299 rc = string_printf ( buf, sizeof( buf ), &num_writ, "?" );
2300 }
2301 }
2302 if ( rc == 0 )
2303 {
2304 svdb_print_str( dst, buf );
2305 }
2306 }
2307 }
2308
2309
2310 #define MAX_CHARS_FOR_DOUBLE 26
2311 #define BITSIZE_OF_FLOAT ( sizeof(float) * 8 )
2312 #define BITSIZE_OF_DOUBLE ( sizeof(double) * 8 )
print_float_element(p_svdb_col col,p_print_ctx dst,p_data_src src)2313 static void print_float_element( p_svdb_col col, p_print_ctx dst, p_data_src src )
2314 {
2315 rc_t rc;
2316 char buf[ 32 ];
2317 size_t num_writ;
2318 if ( src->elem_bits == BITSIZE_OF_FLOAT )
2319 {
2320 float value;
2321 move_to_value( &value, src );
2322 rc = string_printf ( buf, sizeof( buf ), &num_writ, "%e", value );
2323 }
2324 else if ( src->elem_bits == BITSIZE_OF_DOUBLE )
2325 {
2326 double value;
2327 move_to_value( &value, src );
2328 rc = string_printf ( buf, sizeof( buf ), &num_writ, "%e", value );
2329 }
2330 else
2331 {
2332 rc = string_printf ( buf, sizeof( buf ), &num_writ, "unknown float-type" );
2333 }
2334 if ( rc == 0 )
2335 {
2336 if ( src->elem_idx > 0 )
2337 {
2338 svdb_print_str( dst, col->elem_separator );
2339 }
2340 svdb_print_str( dst, buf );
2341 }
2342 }
2343
2344
print_ascii_element(p_svdb_col col,p_print_ctx dst,p_data_src src)2345 static void print_ascii_element( p_svdb_col col, p_print_ctx dst, p_data_src src )
2346 {
2347 int ofs = ( src->elem_bits * src->elem_idx );
2348 char * src_ptr = (char*)src->base + BYTE_OFFSET( src->boff + ofs );
2349 svdb_print_char( dst, *src_ptr );
2350 }
2351
2352
print_unicode_element(p_svdb_col col,p_print_ctx dst,p_data_src src)2353 static void print_unicode_element( p_svdb_col col, p_print_ctx dst, p_data_src src )
2354 {
2355 print_ascii_element( col, dst, src );
2356 }
2357
2358
svdb_print_domain(p_svdb_col col,p_print_ctx dst,p_data_src src)2359 static void svdb_print_domain( p_svdb_col col, p_print_ctx dst, p_data_src src )
2360 {
2361 switch ( col->cursor_typedesc.domain )
2362 {
2363 case vtdBool : print_boolean_element( col, dst, src ); break;
2364 case vtdUint : print_uint_element( col, dst, src ); break;
2365 case vtdInt : print_int_element( col, dst, src ); break;
2366 case vtdFloat : print_float_element( col, dst, src ); break;
2367 case vtdAscii : print_ascii_element( col, dst, src ); break;
2368 case vtdUnicode : print_unicode_element( col, dst, src ); break;
2369 }
2370 }
2371
2372
svdb_print_elem(p_svdb_col col,p_print_ctx dst,p_data_src src)2373 static void svdb_print_elem( p_svdb_col col, p_print_ctx dst, p_data_src src )
2374 {
2375 uint32_t dim = col->cursor_typedesc.intrinsic_dim;
2376
2377 if ( dim == 1 )
2378 {
2379 for ( src->elem_idx = 0; src->elem_idx < src->row_len; ++(src->elem_idx) )
2380 {
2381 svdb_print_domain( col, dst, src );
2382 }
2383 }
2384 else
2385 {
2386 data_src sub_src;
2387 sub_src.base = src->base;
2388 sub_src.boff = src->boff;
2389 sub_src.elem_bits = ( src->elem_bits / dim );
2390 for ( src->elem_idx = 0; src->elem_idx < src->row_len; ++(src->elem_idx ) )
2391 {
2392 char sep = col->dim_separator[0];
2393 svdb_print_char( dst, sep );
2394
2395 if ( col->dim_trans != NULL )
2396 {
2397 char * s;
2398 const uint8_t *src_ptr = sub_src.base;
2399 src_ptr += BYTE_OFFSET( sub_src.boff );
2400 s = col->dim_trans( src_ptr );
2401 if ( s != NULL )
2402 {
2403 svdb_print_str( dst, s );
2404 }
2405 }
2406 else
2407 {
2408 for ( sub_src.elem_idx = 0; sub_src.elem_idx < dim; ++sub_src.elem_idx )
2409 {
2410 svdb_print_domain( col, dst, &sub_src );
2411 }
2412 }
2413
2414 sep = col->dim_separator[1];
2415 if ( sep == 0 )
2416 {
2417 sep = col->dim_separator[0];
2418 }
2419 svdb_print_char( dst, sep );
2420 sub_src.boff += src->elem_bits;
2421 }
2422 }
2423 }
2424
2425
svdb_element_count(void * self,const int tab_id,const int selection,const int col_id,const unsigned long long int row)2426 MOD_EXPORT int CC svdb_element_count( void * self,
2427 const int tab_id, const int selection, const int col_id,
2428 const unsigned long long int row )
2429 {
2430 int res = 0;
2431 p_svdb_col col = svdb_get_col( self, tab_id, selection, col_id );
2432 if ( col != NULL && row < col->tab->range && col->open )
2433 {
2434 data_src data;
2435 int64_t row_id = col->tab->first + row;
2436 rc_t rc = VCursorCellDataDirect ( col->tab->cursor, row_id, col->cur_idx,
2437 &data.elem_bits, &data.base, &data.boff, &data.row_len );
2438 if ( rc != 0 )
2439 {
2440 LOGERR( klogInt, rc, "VCursorCellDataDirect() failed in element_count()" );
2441 }
2442 else
2443 {
2444 if ( data.row_len > 0 )
2445 {
2446 res = data.row_len;
2447 }
2448 }
2449 }
2450 return res;
2451 }
2452
2453
svdb_element(void * self,char * buf,int buflen,const int tab_id,const int selection,const int col_id,const int elem_id,const unsigned long long int row)2454 MOD_EXPORT int CC svdb_element( void * self, char * buf, int buflen,
2455 const int tab_id, const int selection, const int col_id, const int elem_id,
2456 const unsigned long long int row )
2457 {
2458 int res = 0;
2459 p_svdb_col col;
2460 buf[ 0 ] = 0;
2461 col = svdb_get_col( self, tab_id, selection, col_id );
2462 if ( col != NULL && row < col->tab->range && col->open )
2463 {
2464 rc_t rc;
2465 data_src data;
2466 print_ctx dst;
2467 int64_t row_id = col->tab->first + row;
2468
2469 svdb_init_print_ctx( &dst, buf, buflen - 1 );
2470 rc = VCursorCellDataDirect ( col->tab->cursor, row_id, col->cur_idx,
2471 &data.elem_bits, &data.base, &data.boff, &data.row_len );
2472 if ( rc != 0 )
2473 {
2474 LOGERR( klogInt, rc, "VCursorCellDataDirect() failed in svdb_element()" );
2475 }
2476 else
2477 {
2478 /*
2479 if ( data.row_len > 0 && elem_id < data.row_len )
2480 {
2481
2482 }
2483 */
2484 }
2485 }
2486 return res;
2487 }
2488
2489
svdb_cell(void * self,char * buf,int buflen,const int tab_id,const int selection,const int col_id,const unsigned long long int row)2490 MOD_EXPORT int CC svdb_cell( void * self, char * buf, int buflen,
2491 const int tab_id, const int selection, const int col_id,
2492 const unsigned long long int row )
2493 {
2494 int res = 0;
2495 p_svdb_col col;
2496 buf[ 0 ] = 0;
2497 col = svdb_get_col( self, tab_id, selection, col_id );
2498 if ( col != NULL && row < col->tab->range && col->open )
2499 {
2500 data_src data;
2501 int64_t row_id = col->tab->first + row;
2502 rc_t rc = VCursorCellDataDirect ( col->tab->cursor, row_id, col->cur_idx,
2503 &data.elem_bits, &data.base, &data.boff, &data.row_len );
2504 if ( rc != 0 )
2505 {
2506 LOGERR( klogInt, rc, "VCursorCellDataDirect() failed in svdb_cell()" );
2507 }
2508 else
2509 {
2510 if ( data.row_len > 0 )
2511 {
2512 print_ctx dst;
2513
2514 svdb_init_print_ctx( &dst, buf, buflen - 1 );
2515 svdb_print_elem( col, &dst, &data ); /* <--- */
2516 dst.buf[ dst.printed ] = 0;
2517 res = dst.needed;
2518 }
2519 }
2520 }
2521 return res;
2522 }
2523
2524
svdb_find_bufsize(p_svdb_col col,data_src * data)2525 static int svdb_find_bufsize( p_svdb_col col, data_src *data )
2526 {
2527 char buf[ 8 ];
2528 print_ctx dst;
2529
2530 svdb_init_print_ctx( &dst, buf, sizeof buf );
2531 svdb_print_elem( col, &dst, data ); /* <--- */
2532 return dst.needed + 1;
2533 }
2534
2535
svdb_find_fwd(void * self,const int tab_id,const int selection,const int col_id,const unsigned long long int row,const int chunksize,const char * pattern)2536 MOD_EXPORT unsigned long long int CC svdb_find_fwd( void * self, const int tab_id,
2537 const int selection, const int col_id, const unsigned long long int row,
2538 const int chunksize, const char * pattern )
2539 {
2540 unsigned long long int res = INVALID_ROW;
2541 p_svdb_col col = svdb_get_col( self, tab_id, selection, col_id );
2542 if ( col != NULL && row < col->tab->range && col->open && pattern != NULL )
2543 {
2544 int64_t search_row = row;
2545 rc_t rc = 0;
2546 int buf_size = 0;
2547 int chunk = 0;
2548 char * buf = NULL;
2549
2550 while( ( rc == 0 ) &&
2551 ( res == INVALID_ROW ) &&
2552 ( (uint64_t)search_row < col->tab->range ) &&
2553 ( chunk < chunksize ) )
2554 {
2555 data_src data;
2556 int64_t row_id = col->tab->first + search_row;
2557 rc = VCursorCellDataDirect ( col->tab->cursor, row_id, col->cur_idx,
2558 &data.elem_bits, &data.base, &data.boff, &data.row_len );
2559 if ( rc == 0 )
2560 {
2561 int this_size = svdb_find_bufsize( col, &data );
2562 ++chunk;
2563 if ( this_size > buf_size )
2564 {
2565 buf_size = this_size;
2566 if ( buf != NULL )
2567 {
2568 char * newbuf = realloc( buf, buf_size );
2569 if ( newbuf != NULL )
2570 buf = newbuf;
2571 else
2572 {
2573 free( buf );
2574 buf = NULL;
2575 }
2576 }
2577 else
2578 buf = malloc( buf_size );
2579 }
2580 if ( buf != NULL )
2581 {
2582 print_ctx dst;
2583 svdb_init_print_ctx( &dst, buf, buf_size - 1 );
2584 svdb_print_elem( col, &dst, &data ); /* <--- */
2585 dst.buf[ dst.printed ] = 0;
2586 if ( strstr ( buf, pattern ) != NULL )
2587 {
2588 res = search_row;
2589 }
2590 }
2591 search_row++;
2592 }
2593 }
2594 if ( ( chunk == chunksize )&&( res == INVALID_ROW ) )
2595 {
2596 res = 0xFFFFFFFFFFFFFFFE;
2597 }
2598 if ( buf != NULL )
2599 free( buf );
2600 }
2601 return res;
2602 }
2603
2604
svdb_find_bwd(void * self,const int tab_id,const int selection,const int col_id,const unsigned long long int row,const int chunksize,const char * pattern)2605 MOD_EXPORT unsigned long long int CC svdb_find_bwd( void * self, const int tab_id,
2606 const int selection, const int col_id, const unsigned long long int row,
2607 const int chunksize, const char * pattern )
2608 {
2609 unsigned long long int res = INVALID_ROW;
2610 p_svdb_col col;
2611 col = svdb_get_col( self, tab_id, selection, col_id );
2612 if ( col != NULL && row < col->tab->range && col->open && pattern != NULL )
2613 {
2614 int64_t search_row = row;
2615 rc_t rc = 0;
2616 int buf_size = 0;
2617 int chunk = 0;
2618 char * buf = NULL;
2619
2620 while( ( rc == 0 ) && ( res == INVALID_ROW ) &&
2621 ( search_row >= 0 ) && ( chunk < chunksize ) )
2622 {
2623 data_src data;
2624 int64_t row_id = col->tab->first + search_row;
2625 rc = VCursorCellDataDirect ( col->tab->cursor, row_id, col->cur_idx,
2626 &data.elem_bits, &data.base, &data.boff, &data.row_len );
2627 if ( rc == 0 )
2628 {
2629 int this_size = svdb_find_bufsize( col, &data );
2630 ++chunk;
2631 if ( this_size > buf_size )
2632 {
2633 buf_size = this_size;
2634 if ( buf != NULL )
2635 {
2636 char * newbuf = realloc( buf, buf_size );
2637 if ( newbuf != NULL )
2638 buf = newbuf;
2639 else
2640 {
2641 free( buf );
2642 buf = NULL;
2643 }
2644 }
2645 else
2646 buf = malloc( buf_size );
2647 }
2648 if ( buf != NULL )
2649 {
2650 print_ctx dst;
2651 svdb_init_print_ctx( &dst, buf, buf_size - 1 );
2652 svdb_print_elem( col, &dst, &data ); /* <--- */
2653 dst.buf[ dst.printed ] = 0;
2654 if ( strstr ( buf, pattern ) != NULL )
2655 {
2656 res = search_row;
2657 }
2658 }
2659 search_row--;
2660 }
2661 }
2662 if ( ( chunk == chunksize )&&( res == INVALID_ROW ) )
2663 {
2664 res = 0xFFFFFFFFFFFFFFFE;
2665 }
2666 if ( buf != NULL )
2667 free( buf );
2668 }
2669 return res;
2670 }
2671
2672
2673 /* **************************************************************************************************************** */
2674
2675 struct svdb_fastq;
2676 typedef struct svdb_fastq* p_svdb_fastq;
2677
2678 typedef struct svdb_fastq
2679 {
2680 char * path;
2681 KDirectory * dir;
2682 const VDBManager * mgr;
2683 VSchema * schema;
2684 const VDatabase * db;
2685 const VTable * sequence;
2686 const VCursor * cursor;
2687 int is_db;
2688
2689 uint32_t read_idx;
2690 uint32_t qual_idx;
2691 uint32_t start_idx;
2692 uint32_t len_idx;
2693 uint32_t name_idx;
2694 uint32_t rd_type_idx;
2695 uint32_t rd_filter_idx;
2696
2697 } svdb_fastq;
2698
2699
svdb_close_fastq(void * self)2700 MOD_EXPORT void CC svdb_close_fastq( void * self )
2701 {
2702 p_svdb_fastq pself = ( p_svdb_fastq )self;
2703 if ( pself != NULL )
2704 {
2705 rc_t rc = VCursorRelease( pself->cursor );
2706 if ( rc != 0 )
2707 {
2708 LOGERR( klogInt, rc, "VCursorRelease() failed in svdb_close_fastq()" );
2709 }
2710 rc = VTableRelease( pself->sequence );
2711 if ( rc != 0 )
2712 {
2713 LOGERR( klogInt, rc, "VTableRelease() failed in svdb_close_fastq()" );
2714 }
2715 rc = VDatabaseRelease( pself->db );
2716 if ( rc != 0 )
2717 {
2718 LOGERR( klogInt, rc, "VDatabaseRelease() failed in svdb_close_fastq()" );
2719 }
2720 rc = VSchemaRelease( pself->schema );
2721 if ( rc != 0 )
2722 {
2723 LOGERR( klogInt, rc, "VSchemaRelease() failed in svdb_close_fastq()" );
2724 }
2725 rc = VDBManagerRelease( pself->mgr );
2726 if ( rc != 0 )
2727 {
2728 LOGERR( klogInt, rc, "VDBManagerRelease() failed in svdb_close_fastq()" );
2729 }
2730 rc = KDirectoryRelease( pself->dir );
2731 if ( rc != 0 )
2732 {
2733 LOGERR( klogInt, rc, "KDirectoryRelease() failed in svdb_close_fastq()" );
2734 }
2735 free_if_not_null( pself->path );
2736 free( self );
2737 }
2738 }
2739
2740
svdb_KNamelist_contains(const KNamelist * nl,const char * to_find)2741 static bool svdb_KNamelist_contains( const KNamelist * nl, const char * to_find )
2742 {
2743 bool res = false;
2744 uint32_t n;
2745 rc_t rc = KNamelistCount( nl, &n );
2746 if ( rc == 0 && n > 0 )
2747 {
2748 uint32_t i;
2749 for ( i = 0; i < n && !res && rc == 0; ++i )
2750 {
2751 const char * name;
2752 rc = KNamelistGet ( nl, i, &name );
2753 if ( rc == 0 && name != NULL )
2754 res = ( svdb_strcmp( name, to_find ) == 0 );
2755 }
2756 }
2757 return res;
2758 }
2759
2760
svdb_discover_fastq_columns(p_svdb_fastq pself)2761 static rc_t svdb_discover_fastq_columns( p_svdb_fastq pself )
2762 {
2763 KNamelist *col_names;
2764 rc_t rc = VTableListReadableColumns ( pself->sequence, &col_names );
2765 if ( rc == 0 )
2766 {
2767 pself->read_idx = INVALID_COL;
2768 pself->qual_idx = INVALID_COL;
2769 pself->start_idx = INVALID_COL;
2770 pself->len_idx = INVALID_COL;
2771 pself->name_idx = INVALID_COL;
2772 pself->rd_type_idx = INVALID_COL;
2773 pself->rd_filter_idx= INVALID_COL;
2774
2775 if ( svdb_KNamelist_contains( col_names, "READ" ) )
2776 {
2777 rc = VCursorAddColumn ( pself->cursor, &pself->read_idx, "(INSDC:dna:text)READ" );
2778 if ( rc != 0 )
2779 log_and_err( rc, "VCursorAddColumn( READ ) failed" );
2780 }
2781
2782 if ( rc == 0 && svdb_KNamelist_contains( col_names, "QUALITY" ) )
2783 {
2784 rc = VCursorAddColumn ( pself->cursor, &pself->qual_idx, "(INSDC:quality:text:phred_33)QUALITY" );
2785 if ( rc != 0 )
2786 log_and_err( rc, "VCursorAddColumn( QUALITY ) failed" );
2787 }
2788
2789 if ( rc == 0 && svdb_KNamelist_contains( col_names, "READ_START" ) )
2790 {
2791 rc = VCursorAddColumn ( pself->cursor, &pself->start_idx, "(INSDC:coord:zero)READ_START" );
2792 if ( rc != 0 )
2793 log_and_err( rc, "VCursorAddColumn( READ_START ) failed" );
2794 }
2795
2796 if ( rc == 0 && svdb_KNamelist_contains( col_names, "READ_LEN" ) )
2797 {
2798 rc = VCursorAddColumn ( pself->cursor, &pself->len_idx, "(INSDC:coord:len)READ_LEN" );
2799 if ( rc != 0 )
2800 log_and_err( rc, "VCursorAddColumn( READ_LEN ) failed" );
2801 }
2802
2803 if ( rc == 0 && svdb_KNamelist_contains( col_names, "NAME" ) )
2804 {
2805 rc = VCursorAddColumn ( pself->cursor, &pself->name_idx, "(ascii)NAME" );
2806 if ( rc != 0 )
2807 log_and_err( rc, "VCursorAddColumn( NAME ) failed" );
2808 }
2809
2810 if ( rc == 0 && svdb_KNamelist_contains( col_names, "READ_TYPE" ) )
2811 {
2812 rc = VCursorAddColumn ( pself->cursor, &pself->rd_type_idx, "(INSDC:SRA:xread_type)READ_TYPE" );
2813 if ( rc != 0 )
2814 log_and_err( rc, "VCursorAddColumn( READ_TYPE ) failed" );
2815 }
2816
2817 if ( rc == 0 && svdb_KNamelist_contains( col_names, "READ_FILTER" ) )
2818 {
2819 rc = VCursorAddColumn ( pself->cursor, &pself->rd_filter_idx, "(INSDC:SRA:read_filter)READ_FILTER" );
2820 if ( rc != 0 )
2821 log_and_err( rc, "VCursorAddColumn( READ_FILTER ) failed" );
2822 }
2823
2824 KNamelistRelease( col_names );
2825 }
2826 return rc;
2827 }
2828
2829
svdb_open_fastq(const char * path)2830 MOD_EXPORT void * CC svdb_open_fastq( const char * path )
2831 {
2832 p_svdb_fastq pself = NULL;
2833 if ( path != NULL && path[0] != 0 )
2834 {
2835 rc_t rc;
2836 pself = calloc( 1, sizeof( svdb_fastq ) );
2837 if ( pself == NULL )
2838 {
2839 rc = RC( rcApp, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
2840 log_and_err( rc, "calloc() failed in svdb_open_fastq()" );
2841 }
2842 else
2843 {
2844 rc = KDirectoryNativeDir( &pself->dir );
2845 if ( rc != 0 )
2846 log_and_err( rc, "KDirectoryNativeDir() failed in svdb_open_fastq()" );
2847
2848 if ( rc == 0 )
2849 {
2850 rc = VDBManagerMakeRead ( &pself->mgr, pself->dir );
2851 if ( rc != 0 )
2852 log_and_err( rc, "VDBManagerMakeRead() failed in svdb_open_fastq()" );
2853 }
2854
2855 if ( rc == 0 )
2856 {
2857 rc = VDBManagerMakeSRASchema( pself->mgr, &pself->schema );
2858 if ( rc != 0 )
2859 log_and_err( rc, "VDBManagerMakeSRASchema() failed in svdb_open_fastq()" );
2860 }
2861
2862 if ( rc == 0 )
2863 {
2864 rc = VDBManagerOpenDBRead( pself->mgr, &pself->db, pself->schema, "%s", path );
2865 if ( rc == 0 )
2866 {
2867 pself->is_db = 1;
2868 rc = VDatabaseOpenTableRead( pself->db, &pself->sequence, "SEQUENCE" );
2869 if ( rc != 0 )
2870 {
2871 string_printf ( last_err, sizeof last_err, NULL,
2872 "VDatabaseOpenTableRead('%s')->'%R' failed in svdb_open_fastq()",
2873 path, rc );
2874 svdb_set_last_err( "VDatabaseOpenTableRead() failed in svdb_open_fastq()" );
2875
2876 }
2877 }
2878 else
2879 {
2880 rc = VDBManagerOpenTableRead( pself->mgr, &pself->sequence, pself->schema, "%s", path );
2881 if ( rc != 0 )
2882 {
2883 string_printf ( last_err, sizeof last_err, NULL,
2884 "VDBManagerOpenTableRead('%s')->'%R' failed in svdb_open_fastq()",
2885 path, rc );
2886 svdb_set_last_err( "VDBManagerOpenTableRead() failed in svdb_open_fastq()" );
2887 }
2888 }
2889 }
2890
2891 if ( rc == 0 )
2892 {
2893 rc = VTableCreateCursorRead ( pself->sequence, &pself->cursor );
2894 if ( rc != 0 )
2895 log_and_err( rc, "VTableCreateCursorRead() failed in svdb_open_fastq()" );
2896 }
2897
2898 if ( rc == 0 )
2899 rc = svdb_discover_fastq_columns( pself );
2900
2901 if ( rc == 0 )
2902 {
2903 rc = VCursorOpen( pself->cursor );
2904 if ( rc != 0 )
2905 log_and_err( rc, "VCursorOpen() failed in svdb_open_fastq()" );
2906 }
2907
2908 if ( rc == 0 )
2909 {
2910 pself->path = string_dup_measure ( path, NULL );
2911 svdb_set_last_err( "OK" );
2912 }
2913 else
2914 {
2915 svdb_close_fastq( pself );
2916 pself = NULL;
2917 }
2918 }
2919 }
2920 else
2921 {
2922 svdb_set_last_err( "path empty in svdb_open_fastq()" );
2923 }
2924 return pself;
2925
2926 }
2927
2928
2929 /*
2930 returns what the fastq-obj can produce
2931 0 ... nothing, handle invalid
2932 1 ... only READ ( that means fasta )
2933 2 ... READ and QUALITY ( but not spot splitting )
2934 3 ... READ, QUALITY and READ_START/READ_LEN ( splitted spots )
2935 */
svdb_fastq_scope(void * self)2936 MOD_EXPORT int CC svdb_fastq_scope( void * self )
2937 {
2938 int res = 0;
2939 if ( self != NULL )
2940 {
2941 p_svdb_fastq pself = self;
2942 if ( pself->read_idx != INVALID_COL )
2943 {
2944 if ( pself->qual_idx != INVALID_COL )
2945 {
2946 if ( pself->start_idx != INVALID_COL &&
2947 pself->len_idx != INVALID_COL )
2948 res = 3;
2949 else
2950 res = 2;
2951 }
2952 else res = 1;
2953 }
2954 }
2955 return res;
2956 }
2957
2958
svdb_fastq_without_name_col(p_svdb_fastq pself,char * buf,int buflen,int seq,const unsigned long long int row)2959 static int svdb_fastq_without_name_col( p_svdb_fastq pself, char * buf, int buflen,
2960 int seq, const unsigned long long int row )
2961 {
2962 int res = 0;
2963 uint32_t elem_bits, boff, data_len;
2964 const char * data = NULL;
2965
2966 rc_t rc = VCursorCellDataDirect( pself->cursor, row, pself->read_idx, &elem_bits,
2967 (const void**)&data, &boff, &data_len );
2968 if ( rc == 0 )
2969 {
2970 size_t num_writ;
2971 if ( seq > 0 )
2972 rc = string_printf ( buf, buflen, &num_writ, "%s.%li length=%u/%u",
2973 pself->path, row, data_len, seq );
2974 else
2975 rc = string_printf ( buf, buflen, &num_writ, "%s.%li length=%u",
2976 pself->path, row, data_len );
2977 if ( rc == 0 )
2978 res = (int)num_writ;
2979 }
2980
2981 return res;
2982 }
2983
2984
svdb_fastq_with_name_col(p_svdb_fastq pself,char * buf,int buflen,int seq,const unsigned long long int row)2985 static int svdb_fastq_with_name_col( p_svdb_fastq pself, char * buf, int buflen,
2986 int seq, const unsigned long long int row )
2987 {
2988 int res = 0;
2989 uint32_t elem_bits, boff, name_len;
2990 const char * name = NULL;
2991
2992 rc_t rc = VCursorCellDataDirect( pself->cursor, row, pself->name_idx, &elem_bits,
2993 (const void**)&name, &boff, &name_len );
2994 if ( rc == 0 )
2995 {
2996 uint32_t data_len;
2997 size_t num_writ;
2998 if ( seq > 0 )
2999 {
3000 uint32_t * data = NULL;
3001 rc = VCursorCellDataDirect( pself->cursor, row, pself->len_idx, &elem_bits,
3002 (const void**)&data, &boff, &data_len );
3003 if ( rc == 0 )
3004 rc = string_printf ( buf, buflen, &num_writ, "%s.%li %.*s length=%u/%u",
3005 pself->path, row, name_len, name, data[ seq - 1 ], seq );
3006 }
3007 else
3008 {
3009 const char * data = NULL;
3010 rc = VCursorCellDataDirect( pself->cursor, row, pself->read_idx, &elem_bits,
3011 (const void**)&data, &boff, &data_len );
3012 if ( rc == 0 )
3013 rc = string_printf ( buf, buflen, &num_writ, "%s.%li %.*s length=%u",
3014 pself->path, row, name_len, name, data_len );
3015
3016 }
3017 if ( rc == 0 ) res = (int)num_writ;
3018 }
3019 return res;
3020 }
3021
3022
svdb_fastq_name(void * self,char * buf,int buflen,int seq,const unsigned long long int row)3023 MOD_EXPORT int CC svdb_fastq_name( void * self, char * buf, int buflen,
3024 int seq, const unsigned long long int row )
3025 {
3026 int res = 0;
3027 if ( self != NULL )
3028 {
3029 p_svdb_fastq pself = self;
3030 if ( pself->name_idx != INVALID_COL )
3031 res = svdb_fastq_with_name_col( pself, buf, buflen, seq, row );
3032 else
3033 res = svdb_fastq_without_name_col( pself, buf, buflen, seq, row );
3034 }
3035 return res;
3036 }
3037
3038
svdb_fastq_data_ptr(p_svdb_fastq pself,const char ** buf,uint32_t src_idx,int seq,const unsigned long long int row)3039 static int svdb_fastq_data_ptr( p_svdb_fastq pself, const char ** buf, uint32_t src_idx,
3040 int seq, const unsigned long long int row )
3041 {
3042 int res = 0;
3043 uint32_t elem_bits, boff, data_len;
3044 const char * data = NULL;
3045 rc_t rc = VCursorCellDataDirect( pself->cursor, row, src_idx, &elem_bits,
3046 (const void**)&data, &boff, &data_len );
3047 if ( rc == 0 )
3048 {
3049 if ( seq > 0 )
3050 {
3051 uint32_t read_start_len, read_len_len;
3052 uint32_t * read_start = NULL;
3053 uint32_t * read_len = NULL;
3054 rc = VCursorCellDataDirect( pself->cursor, row, pself->start_idx, &elem_bits,
3055 (const void**)&read_start, &boff, &read_start_len );
3056 if ( rc == 0 )
3057 rc = VCursorCellDataDirect( pself->cursor, row, pself->len_idx, &elem_bits,
3058 (const void**)&read_len, &boff, &read_len_len );
3059 if ( rc == 0 && seq <= (int)read_start_len && seq <= (int)read_len_len )
3060 {
3061 uint32_t start = read_start[ seq - 1 ];
3062 res = read_len[ seq - 1 ];
3063 *buf = &data[ start ];
3064 }
3065 }
3066 else
3067 {
3068 *buf = data;
3069 res = data_len;
3070 }
3071 }
3072 return res;
3073 }
3074
3075
svdb_fastq_data(p_svdb_fastq pself,char * buf,int buflen,uint32_t src_idx,int seq,const unsigned long long int row)3076 static int svdb_fastq_data( p_svdb_fastq pself, char * buf, int buflen, uint32_t src_idx,
3077 int seq, const unsigned long long int row )
3078 {
3079 const char * src = NULL;
3080 int res = svdb_fastq_data_ptr( pself, &src, src_idx, seq, row );
3081 if ( res > 0 && src != NULL )
3082 {
3083 size_t num_writ;
3084 rc_t rc = string_printf ( buf, buflen, &num_writ, "%.*s", res, src );
3085 if ( rc == 0 ) res = (int)num_writ;
3086 }
3087 return res;
3088 }
3089
3090
svdb_fastq_readcount(void * self,const unsigned long long int row)3091 MOD_EXPORT int CC svdb_fastq_readcount( void * self, const unsigned long long int row )
3092 {
3093 int res = 0;
3094 if ( self != NULL )
3095 {
3096 uint32_t elem_bits, boff, data_len;
3097 const char * data = NULL;
3098 p_svdb_fastq pself = self;
3099 rc_t rc = VCursorCellDataDirect( pself->cursor, row, pself->start_idx, &elem_bits,
3100 (const void**)&data, &boff, &data_len );
3101 if ( rc == 0 )
3102 res = data_len;
3103 }
3104 return res;
3105 }
3106
3107
svdb_fastq_sequence(void * self,char * buf,int buflen,int seq,const unsigned long long int row)3108 MOD_EXPORT int CC svdb_fastq_sequence( void * self, char * buf, int buflen,
3109 int seq, const unsigned long long int row )
3110 {
3111 int res = 0;
3112 if ( self != NULL )
3113 {
3114 p_svdb_fastq pself = self;
3115 res = svdb_fastq_data( pself, buf, buflen, pself->read_idx, seq, row );
3116 }
3117 return res;
3118 }
3119
3120
svdb_fastq_quality(void * self,char * buf,int buflen,int seq,const unsigned long long int row)3121 MOD_EXPORT int CC svdb_fastq_quality( void * self, char * buf, int buflen,
3122 int seq, const unsigned long long int row )
3123 {
3124 int res = 0;
3125 if ( self != NULL )
3126 {
3127 p_svdb_fastq pself = self;
3128 res = svdb_fastq_data( pself, buf, buflen, pself->qual_idx, seq, row );
3129 }
3130 return res;
3131 }
3132
3133
svdb_fastq_rd_type_available(void * self)3134 MOD_EXPORT int CC svdb_fastq_rd_type_available( void * self )
3135 {
3136 int res = 0;
3137 if ( self != NULL )
3138 {
3139 p_svdb_fastq pself = self;
3140 if ( pself->rd_type_idx != INVALID_COL ) res = 1;
3141 }
3142 return res;
3143 }
3144
3145
svdb_fastq_rd_filter_available(void * self)3146 MOD_EXPORT int CC svdb_fastq_rd_filter_available( void * self )
3147 {
3148 int res = 0;
3149 if ( self != NULL )
3150 {
3151 p_svdb_fastq pself = self;
3152 if ( pself->rd_filter_idx != INVALID_COL ) res = 1;
3153 }
3154 return res;
3155 }
3156
3157
svdb_fastq_read_type_is_bio(void * self,int seq,const unsigned long long int row)3158 MOD_EXPORT int CC svdb_fastq_read_type_is_bio( void * self, int seq, const unsigned long long int row )
3159 {
3160 int res = 0;
3161 if ( self != NULL && seq > 0 )
3162 {
3163 p_svdb_fastq pself = self;
3164 if ( pself->rd_type_idx != INVALID_COL )
3165 {
3166 uint32_t elem_bits, boff, data_len;
3167 const uint8_t * data = NULL;
3168 rc_t rc = VCursorCellDataDirect( pself->cursor, row, pself->rd_type_idx, &elem_bits,
3169 (const void**)&data, &boff, &data_len );
3170 if ( rc == 0 && seq <= (int)data_len )
3171 {
3172 if ( data[ seq - 1 ] & 0x01 ) res = 1;
3173 }
3174 }
3175 }
3176 return res;
3177 }
3178
3179
svdb_fastq_read_filter_is_pass(void * self,int seq,const unsigned long long int row)3180 MOD_EXPORT int CC svdb_fastq_read_filter_is_pass( void * self, int seq, const unsigned long long int row )
3181 {
3182 int res = 0;
3183 if ( self != NULL && seq > 0 )
3184 {
3185 p_svdb_fastq pself = self;
3186 if ( pself->rd_filter_idx != INVALID_COL )
3187 {
3188 uint32_t elem_bits, boff, data_len;
3189 const uint8_t * data = NULL;
3190 rc_t rc = VCursorCellDataDirect( pself->cursor, row, pself->rd_filter_idx, &elem_bits,
3191 (const void**)&data, &boff, &data_len );
3192 if ( rc == 0 && seq <= (int)data_len )
3193 {
3194 if ( data[ seq - 1 ] == 0 ) res = 1;
3195 }
3196 }
3197 }
3198 return res;
3199 }
3200
3201
svdb_fastq_row_count(void * self)3202 MOD_EXPORT unsigned long long int CC svdb_fastq_row_count( void * self )
3203 {
3204 unsigned long long int res = 0;
3205 if ( self != NULL )
3206 {
3207 int64_t first;
3208 uint64_t range;
3209 p_svdb_fastq pself = self;
3210 rc_t rc = VCursorIdRange ( pself->cursor, pself->read_idx, &first, &range );
3211 if ( rc == 0 )
3212 res = range;
3213 }
3214 return res;
3215 }
3216