1 /*===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  */
26 #include <vdb/extern.h>
27 #include <vdb/xform.h>
28 #include <vdb/database.h>
29 #include <vdb/table.h>
30 #include <vdb/cursor.h>
31 #include <vdb/vdb-priv.h>
32 
33 #include <klib/defs.h>
34 #include <klib/rc.h>
35 #include <klib/debug.h>
36 #include <kdb/meta.h>
37 #include <klib/data-buffer.h>
38 #include <insdc/insdc.h>
39 #include <align/refseq-mgr.h>
40 #include <bitstr.h>
41 #include <sysalloc.h>
42 #include <stdint.h>
43 #include <stdlib.h>
44 #include <assert.h>
45 #include <string.h>
46 #include <stdio.h>
47 #include <insdc/sra.h>
48 
49 #ifdef _DEBUGGING
50 #define SUB_DEBUG(msg) DBGMSG(DBG_SRA,DBG_FLAG(DBG_SRA_SUB),msg)
51 #else
52 #define SUB_DEBUG(msg)
53 #endif
54 
55 typedef struct RestoreRead RestoreRead;
56 struct RestoreRead
57 {
58     const VCursor *curs;
59     uint32_t col_idx;
60     uint32_t read_len_idx;
61     uint32_t read_start_idx;
62 };
63 
64 static
RestoreReadWhack(void * obj)65 void CC RestoreReadWhack ( void *obj )
66 {
67     RestoreRead * self = obj;
68     if ( self != NULL )
69     {
70         VCursorRelease ( self -> curs );
71         free ( self );
72     }
73 }
74 
75 static
RestoreReadMake(RestoreRead ** objp,const VXfactInfo * info,const VFactoryParams * cp,const VCursor * native_curs)76 rc_t RestoreReadMake ( RestoreRead **objp, const VXfactInfo *info, const VFactoryParams *cp,
77                        const VCursor *native_curs )
78 {
79     rc_t rc;
80     RestoreRead * obj;
81 
82     SUB_DEBUG( ( "SUB.Make in 'project_read_from_sequence.c'\n" ) );
83 
84     /* create the object */
85     obj = malloc ( sizeof ( * obj ) );
86     if ( obj == NULL )
87     {
88         rc = RC ( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
89     }
90     else
91     {
92         rc = VCursorLinkedCursorGet( native_curs, "SEQUENCE", &obj->curs );
93         if ( rc != 0 )
94         {
95             const VDatabase * db;
96             const VTable * tbl;
97             uint64_t cache_size = 32*1024*1024;
98             uint64_t native_cursor_cache_size = VCursorGetCacheCapacity(native_curs);
99 
100             /* get at the parent database */
101             rc = VTableOpenParentRead ( info -> tbl, & db );
102             if ( rc != 0 )
103                 return rc;
104 
105             /* open the table */
106             rc = VDatabaseOpenTableRead ( db, &tbl, "SEQUENCE" );
107             VDatabaseRelease ( db );
108             if ( rc != 0 )
109                 return rc;
110 
111             if(native_cursor_cache_size/4 > cache_size){
112                 /* share cursor size with native cursor **/
113                 cache_size = native_cursor_cache_size/4;
114                 native_cursor_cache_size -= cache_size;
115                 VCursorSetCacheCapacity((VCursor*)native_curs,native_cursor_cache_size);
116             }
117             /* create a cursor */
118             rc = VTableCreateCachedCursorRead( tbl, &obj->curs, cache_size );
119             VTableRelease( tbl );
120             if ( rc != 0 )
121                 return rc;
122 
123             rc = VCursorPermitPostOpenAdd( obj->curs );
124             if ( rc != 0 )
125                 return rc;
126             rc = VCursorOpen( obj->curs );
127             if ( rc != 0 )
128                 return rc;
129             rc = VCursorLinkedCursorSet( native_curs, "SEQUENCE", obj->curs );
130             if ( rc != 0 )
131                 return rc;
132         }
133 
134         if ( rc == 0 )
135         {
136             /* add columns to cursor */
137             assert ( cp -> argc == 1 );
138             rc = VCursorAddColumn ( obj -> curs, & obj -> col_idx, "%.*s",
139                                     cp -> argv [ 0 ] . count, cp -> argv [ 0 ] . data . ascii );
140             if ( rc == 0 || GetRCState( rc ) == rcExists )
141                 rc = VCursorAddColumn ( obj -> curs, & obj -> read_len_idx, "(INSDC:coord:len)READ_LEN" );
142 
143             if ( rc == 0 || GetRCState( rc ) == rcExists )
144                 rc = VCursorAddColumn ( obj -> curs, & obj -> read_start_idx, "(INSDC:coord:zero)READ_START" );
145 
146             if ( rc == 0  || GetRCState( rc ) == rcExists)
147             {
148                 VTypedesc src;
149                 rc = VCursorDatatype ( obj -> curs, obj -> col_idx, NULL, & src );
150                 if ( rc == 0 )
151                 {
152                     /* selected column should have same characteristics */
153                     if ( src . domain != info -> fdesc . desc . domain                 ||
154                          src . intrinsic_bits != info -> fdesc . desc . intrinsic_bits ||
155                          src . intrinsic_dim != info -> fdesc . desc. intrinsic_dim )
156                     {
157                         rc = RC ( rcXF, rcFunction, rcConstructing, rcType, rcInconsistent );
158                     }
159                     else if ( ( src . intrinsic_bits & 7 ) != 0 )
160                     {
161                         rc = RC ( rcXF, rcFunction, rcConstructing, rcType, rcUnsupported );
162                     }
163                     else
164                     {
165                         * objp = obj;
166                         return 0;
167                     }
168                 }
169             }
170         }
171         free ( obj );
172     }
173     return rc;
174 }
175 
176 
177 static
project_from_sequence_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])178 rc_t CC project_from_sequence_impl ( void *data, const VXformInfo *info,
179     int64_t row_id, VRowResult *rslt, uint32_t argc, const VRowData argv [] )
180 {
181     RestoreRead *self =  data;
182 
183     rc_t rc;
184     INSDC_coord_zero read_id;
185     const int64_t *spot_id = argv [ 0 ] . u . data . base;
186     const INSDC_coord_one *read_id_in = argv [ 1 ] . u . data . base;
187     const INSDC_coord_one *read_start;
188     const INSDC_coord_len *read_len;
189     const void *src;
190     uint32_t src_sz;
191     uint32_t src_bits;
192     uint32_t nreads;
193     uint32_t nreads_2;
194 
195     spot_id += argv [ 0 ] . u . data . first_elem;
196     read_id_in += argv [ 1 ] . u . data . first_elem;
197 
198     assert( argv[ 0 ].u.data.elem_bits == 64 );
199     assert( argv[ 0 ].u.data.elem_count == 1 );
200 
201     assert( argv[ 1 ].u.data.elem_bits == 32 );
202     assert( argv[ 1 ].u.data.elem_count == 1 );
203 
204     if ( spot_id[ 0 ] == 0 ) /*** valid case , the projection should be empty ***/
205     {
206         rslt->elem_count = 0;
207         return 0;
208     }
209     assert( read_id_in[ 0 ] > 0 );
210     read_id = read_id_in[ 0 ] - 1; /** make zero - based **/
211 
212     SUB_DEBUG( ( "SUB.Rd in 'project_read_from_sequence.c' at #%lu\n", spot_id[ 0 ] ) );
213 
214     rc = VCursorCellDataDirect( self->curs, spot_id[ 0 ], self->read_len_idx,
215                                 NULL, ( void const ** )&read_len, NULL, &nreads );
216     if ( rc != 0 )
217         return rc;
218 
219     rc = VCursorCellDataDirect( self->curs, spot_id[ 0 ], self->read_start_idx,
220                                 NULL, ( void const ** )&read_start, NULL, &nreads_2 );
221     if ( rc != 0 )
222         return rc;
223 
224     if ( nreads != nreads_2 || read_id >= (INSDC_coord_zero)nreads )
225     {
226         return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
227     }
228 
229     rc = VCursorCellDataDirect( self->curs, spot_id[ 0 ], self->col_idx,
230                                 &src_bits, &src, NULL, &src_sz );
231     if ( rc != 0 )
232         return rc;
233 
234     if ( src_sz == nreads )
235     {
236         rslt->elem_count = 1;
237         rslt->data->elem_bits = src_bits;
238         rc = KDataBufferResize( rslt->data, 1 );
239         if ( rc == 0 )
240         {
241             memmove( rslt->data->base,
242                     &( ( char const * )src )[ ( read_id * src_bits ) >> 3 ],
243                     src_bits >> 3 );
244         }
245     }
246     else if ( src_sz == read_start[ nreads - 1 ] + read_len[ nreads - 1 ] )
247     {
248         /* like READ or QUALITY */
249         rslt->elem_count = read_len[ read_id ];
250         rslt->data->elem_bits = src_bits;
251         rc = KDataBufferResize( rslt->data, rslt->elem_count );
252         if ( rc == 0 )
253         {
254             memmove( rslt->data->base,
255                     &( ( char const * )src )[ ( read_start[ read_id ] * src_bits ) >> 3 ],
256                     ( size_t )( ( src_bits * rslt->elem_count ) >> 3 ) );
257         }
258     }
259     else
260     {
261         /* don't know how to break up the read or should use simple_sub_select */
262         return RC( rcXF, rcFunction, rcExecuting, rcConstraint, rcViolated );
263     }
264     return 0;
265 }
266 
267 /*
268  * function
269  * INSDC:4na:bin NCBI:align:project_from_sequence #1 < ascii col > ( I64 seq_spot_id, INSDC:coord:one seq_read_id )
270  *     = ALIGN:project_from_sequence;
271  */
272 VTRANSFACT_IMPL ( ALIGN_project_from_sequence, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
273     VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
274 {
275     RestoreRead *fself;
276     rc_t rc = RestoreReadMake ( & fself, info, cp, (const VCursor*)info->parms  );
277     if ( rc == 0 )
278     {
279         rslt->self = fself;
280         rslt->u.ndf = project_from_sequence_impl;
281         rslt->variant = vftRow;
282         rslt -> whack = RestoreReadWhack;
283     }
284     return rc;
285 }
286