1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27 #include <klib/rc.h>
28
29 #include <kfs/directory.h>
30
31 #include <vdb/manager.h>
32 #include <vdb/table.h>
33 #include <vdb/cursor.h>
34
35 #include <os-native.h>
36 #include <sysalloc.h>
37
38 #include <string.h>
39
40 #include "refbases.h"
41
read_uint32(const VCursor * cur,uint32_t col_idx)42 static uint32_t read_uint32( const VCursor * cur, uint32_t col_idx )
43 {
44 uint32_t elem_bits, boff, row_len;
45 const uint32_t * value;
46 rc_t rc = VCursorCellDataDirect ( cur, 1, col_idx, &elem_bits, (const void**)&value, &boff, &row_len );
47 if ( rc == 0 )
48 return *value;
49 return 0;
50 }
51
read_buffer(const VCursor * cur,char * buffer,int64_t row_id,uint32_t offset,size_t buflen,uint32_t col_idx)52 static uint32_t read_buffer( const VCursor * cur, char * buffer, int64_t row_id,
53 uint32_t offset, size_t buflen, uint32_t col_idx )
54 {
55 uint32_t elem_bits, boff, row_len, res = 0;
56 const char * value;
57 rc_t rc = VCursorCellDataDirect ( cur, row_id, col_idx, &elem_bits, (const void**)&value, &boff, &row_len );
58 if ( rc == 0 && row_len > offset )
59 {
60 res = ( row_len - offset );
61 if ( res > buflen ) res = buflen;
62 memmove ( buffer, &value[ offset ], res );
63 }
64 return res;
65 }
66
67
read_bases(const VCursor * cur,char * buffer,uint32_t ref_pos_1_based,uint32_t ref_len,uint32_t col_idx,uint32_t max_seq_len)68 static uint32_t read_bases( const VCursor * cur, char * buffer, uint32_t ref_pos_1_based,
69 uint32_t ref_len, uint32_t col_idx, uint32_t max_seq_len )
70 {
71 uint32_t res = 0, n_read = 1;
72 uint32_t row_id = ( ( ref_pos_1_based - 1 ) / max_seq_len ) + 1;
73 uint32_t offset = ( ref_pos_1_based - 1 ) - ( ( row_id - 1 ) * max_seq_len );
74 size_t buflen = ref_len;
75 while ( res < ref_len && n_read > 0 )
76 {
77 n_read = read_buffer( cur, &buffer[ res ], row_id++, offset, buflen, col_idx );
78 res += n_read;
79 buflen -= n_read;
80 offset = 0;
81 }
82 return res;
83 }
84
85
read_refbases(const char * refname,uint32_t ref_pos_1_based,uint32_t ref_len,uint32_t * bases_in_ref)86 char * read_refbases( const char * refname, uint32_t ref_pos_1_based, uint32_t ref_len, uint32_t * bases_in_ref )
87 {
88 char * res = NULL;
89 KDirectory * dir;
90 rc_t rc = KDirectoryNativeDir( &dir );
91 if ( rc == 0 )
92 {
93 const VDBManager * mgr;
94 rc = VDBManagerMakeRead ( &mgr, dir );
95 if ( rc == 0 )
96 {
97 const VTable * tab;
98 rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", refname );
99 if ( rc == 0 )
100 {
101 const VCursor * cur;
102 rc = VTableCreateCursorRead( tab, &cur );
103 if ( rc == 0 )
104 {
105 uint32_t base_count_idx, read_idx, max_seq_len_idx;
106 rc = VCursorAddColumn( cur, &base_count_idx, "BASE_COUNT" );
107 if ( rc == 0 )
108 {
109 rc = VCursorAddColumn( cur, &read_idx, "READ" );
110 if ( rc == 0 )
111 {
112 rc = VCursorAddColumn( cur, &max_seq_len_idx, "MAX_SEQ_LEN" );
113 if ( rc == 0 )
114 {
115 rc = VCursorOpen ( cur );
116 if ( rc == 0 )
117 {
118 uint32_t base_count = read_uint32( cur, base_count_idx );
119 if ( bases_in_ref != NULL )
120 *bases_in_ref = base_count;
121 uint32_t max_seq_len = read_uint32( cur, max_seq_len_idx );
122 if ( base_count > ( ref_pos_1_based + ref_len ) && max_seq_len > 0 )
123 {
124 res = malloc( ref_len + 1 );
125 if ( res != NULL )
126 {
127 uint32_t n_read = read_bases( cur, res, ref_pos_1_based,
128 ref_len, read_idx, max_seq_len );
129 res[ n_read ] = 0;
130 }
131 }
132 }
133 }
134 }
135 }
136 VCursorRelease( cur );
137 }
138 VTableRelease( tab );
139 }
140 VDBManagerRelease( mgr );
141 }
142 KDirectoryRelease( dir );
143 }
144 return res;
145 }
146
147
ref_len(const char * refname)148 uint32_t ref_len( const char * refname )
149 {
150 uint32_t res = 0;
151 KDirectory * dir;
152 rc_t rc = KDirectoryNativeDir( &dir );
153 if ( rc == 0 )
154 {
155 const VDBManager * mgr;
156 rc = VDBManagerMakeRead ( &mgr, dir );
157 if ( rc == 0 )
158 {
159 const VTable * tab;
160 rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", refname );
161 if ( rc == 0 )
162 {
163 const VCursor * cur;
164 rc = VTableCreateCursorRead( tab, &cur );
165 if ( rc == 0 )
166 {
167 uint32_t base_count_idx;
168 rc = VCursorAddColumn( cur, &base_count_idx, "BASE_COUNT" );
169 if ( rc == 0 )
170 {
171 rc = VCursorOpen ( cur );
172 if ( rc == 0 )
173 res = read_uint32( cur, base_count_idx );
174 }
175 VCursorRelease( cur );
176 }
177 VTableRelease( tab );
178 }
179 VDBManagerRelease( mgr );
180 }
181 KDirectoryRelease( dir );
182 }
183 return res;
184 }
185