1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #include <klib/rc.h>
28 
29 #include <kfs/directory.h>
30 
31 #include <vdb/manager.h>
32 #include <vdb/table.h>
33 #include <vdb/cursor.h>
34 
35 #include <os-native.h>
36 #include <sysalloc.h>
37 
38 #include <string.h>
39 
40 #include "refbases.h"
41 
read_uint32(const VCursor * cur,uint32_t col_idx)42 static uint32_t read_uint32( const VCursor * cur, uint32_t col_idx )
43 {
44     uint32_t elem_bits, boff, row_len;
45     const uint32_t * value;
46     rc_t rc = VCursorCellDataDirect ( cur, 1, col_idx, &elem_bits, (const void**)&value, &boff, &row_len );
47     if ( rc == 0 )
48         return *value;
49     return 0;
50 }
51 
read_buffer(const VCursor * cur,char * buffer,int64_t row_id,uint32_t offset,size_t buflen,uint32_t col_idx)52 static uint32_t read_buffer( const VCursor * cur, char * buffer, int64_t row_id,
53                              uint32_t offset, size_t buflen, uint32_t col_idx )
54 {
55     uint32_t elem_bits, boff, row_len, res = 0;
56     const char * value;
57     rc_t rc = VCursorCellDataDirect ( cur, row_id, col_idx, &elem_bits, (const void**)&value, &boff, &row_len );
58     if ( rc == 0 && row_len > offset )
59     {
60         res = ( row_len - offset );
61         if ( res > buflen ) res = buflen;
62         memmove ( buffer, &value[ offset ], res );
63     }
64     return res;
65 }
66 
67 
read_bases(const VCursor * cur,char * buffer,uint32_t ref_pos_1_based,uint32_t ref_len,uint32_t col_idx,uint32_t max_seq_len)68 static uint32_t read_bases( const VCursor * cur, char * buffer, uint32_t ref_pos_1_based,
69                             uint32_t ref_len, uint32_t col_idx, uint32_t max_seq_len )
70 {
71     uint32_t res = 0, n_read = 1;
72     uint32_t row_id = ( ( ref_pos_1_based - 1 ) / max_seq_len ) + 1;
73     uint32_t offset = ( ref_pos_1_based - 1 ) - ( ( row_id - 1 ) * max_seq_len );
74     size_t buflen = ref_len;
75     while ( res < ref_len && n_read > 0 )
76     {
77         n_read = read_buffer( cur, &buffer[ res ], row_id++, offset, buflen, col_idx );
78         res += n_read;
79         buflen -= n_read;
80         offset = 0;
81     }
82     return res;
83 }
84 
85 
read_refbases(const char * refname,uint32_t ref_pos_1_based,uint32_t ref_len,uint32_t * bases_in_ref)86 char * read_refbases( const char * refname, uint32_t ref_pos_1_based, uint32_t ref_len, uint32_t * bases_in_ref )
87 {
88     char * res = NULL;
89     KDirectory * dir;
90     rc_t rc = KDirectoryNativeDir( &dir );
91     if ( rc == 0 )
92     {
93         const VDBManager * mgr;
94         rc = VDBManagerMakeRead ( &mgr, dir );
95         if ( rc == 0 )
96         {
97             const VTable * tab;
98             rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", refname );
99             if ( rc == 0 )
100             {
101                 const VCursor * cur;
102                 rc = VTableCreateCursorRead( tab, &cur );
103                 if ( rc == 0 )
104                 {
105                     uint32_t base_count_idx, read_idx, max_seq_len_idx;
106                     rc = VCursorAddColumn( cur, &base_count_idx, "BASE_COUNT" );
107                     if ( rc == 0 )
108                     {
109                         rc = VCursorAddColumn( cur, &read_idx, "READ" );
110                         if ( rc == 0 )
111                         {
112                             rc = VCursorAddColumn( cur, &max_seq_len_idx, "MAX_SEQ_LEN" );
113                             if ( rc == 0 )
114                             {
115                                 rc = VCursorOpen ( cur );
116                                 if ( rc == 0 )
117                                 {
118                                     uint32_t base_count = read_uint32( cur, base_count_idx );
119                                     if ( bases_in_ref != NULL )
120                                         *bases_in_ref = base_count;
121                                     uint32_t max_seq_len = read_uint32( cur, max_seq_len_idx );
122                                     if ( base_count > ( ref_pos_1_based + ref_len ) && max_seq_len > 0 )
123                                     {
124                                         res = malloc( ref_len + 1 );
125                                         if ( res != NULL )
126                                         {
127                                             uint32_t n_read = read_bases( cur, res, ref_pos_1_based,
128                                                                           ref_len, read_idx, max_seq_len );
129                                             res[ n_read ] = 0;
130                                         }
131                                     }
132                                 }
133                             }
134                         }
135                     }
136                     VCursorRelease( cur );
137                 }
138                 VTableRelease( tab );
139             }
140             VDBManagerRelease( mgr );
141         }
142         KDirectoryRelease( dir );
143     }
144     return res;
145 }
146 
147 
ref_len(const char * refname)148 uint32_t ref_len( const char * refname )
149 {
150     uint32_t res = 0;
151     KDirectory * dir;
152     rc_t rc = KDirectoryNativeDir( &dir );
153     if ( rc == 0 )
154     {
155         const VDBManager * mgr;
156         rc = VDBManagerMakeRead ( &mgr, dir );
157         if ( rc == 0 )
158         {
159             const VTable * tab;
160             rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", refname );
161             if ( rc == 0 )
162             {
163                 const VCursor * cur;
164                 rc = VTableCreateCursorRead( tab, &cur );
165                 if ( rc == 0 )
166                 {
167                     uint32_t base_count_idx;
168                     rc = VCursorAddColumn( cur, &base_count_idx, "BASE_COUNT" );
169                     if ( rc == 0 )
170                     {
171                         rc = VCursorOpen ( cur );
172                         if ( rc == 0 )
173                             res = read_uint32( cur, base_count_idx );
174                     }
175                     VCursorRelease( cur );
176                 }
177                 VTableRelease( tab );
178             }
179             VDBManagerRelease( mgr );
180         }
181         KDirectoryRelease( dir );
182     }
183     return res;
184 }
185