1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include <vdb/extern.h>
27
28 #include <sra/sradb.h>
29 #include <vdb/xform.h>
30 #include <vdb/table.h>
31 #include <klib/data-buffer.h>
32 #include <klib/text.h>
33 #include <klib/rc.h>
34 #include <sysalloc.h>
35
36 #include <insdc/sra.h>
37
38 #include <stdlib.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <assert.h>
43
44
45 /* "properly aligned" fragments according to the aligner
46 may mean minimally that the fragments are on the same
47 chromosome. */
48 #define PROPER_ALIGNED_MEANS_SAME_CHROMOSOME 1
49
50
51 static
get_sam_flags_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t nreads,uint32_t argc,const VRowData argv[])52 rc_t CC get_sam_flags_impl(void *data, const VXformInfo *info,
53 int64_t row_id, VRowResult *rslt,
54 uint32_t nreads,
55 uint32_t argc, const VRowData argv [] )
56 {
57 rc_t rc;
58 int32_t *dst;
59 const INSDC_coord_one *rid = argv[ 1 ].u.data.base; /* SEQ_READ_ID */
60 const int32_t *tlen = argv[ 2 ].u.data.base; /* TEMPLATE_LEN */
61 const bool *ro1 = argv[ 3 ].u.data.base; /* REF_ORIENTATION */
62 const bool *ro2 = argv[ 4 ].u.data.base; /* MATE_REF_ORIENTATION */
63 const bool *sec = argv[ 5 ].u.data.base;
64 const bool mate_present = ( argv[ 4 ].u.data.elem_count > 0 );
65 const SRAReadFilter *flt = argc > 6 ? argv[ 6 ].u.data.base : NULL;
66
67 assert( argv[ 1 ].u.data.elem_count == 1 );
68 assert( argv[ 2 ].u.data.elem_count == 1 );
69 assert( argv[ 3 ].u.data.elem_count == 1 );
70 assert( argv[ 5 ].u.data.elem_count == 1 );
71
72 rc = KDataBufferResize( rslt->data, 1 );
73 if( rc != 0 )
74 return rc;
75 rslt->elem_count=1;
76 dst = rslt->data->base;
77 dst[ 0 ] = 0;
78 if( nreads == 0 )
79 return 0;
80
81 rid += argv[ 1 ].u.data.first_elem;
82 tlen += argv[ 2 ].u.data.first_elem;
83 ro1 += argv[ 3 ].u.data.first_elem;
84 ro2 += argv[ 4 ].u.data.first_elem;
85 sec += argv[ 5 ].u.data.first_elem;
86 if ( flt != NULL )
87 flt += argv[ 6 ].u.data.first_elem;
88
89 /***************** SAM FLAGS************
90 Bit Description
91 0x001 template having multiple fragments in sequencing
92 0x002 each fragment properly aligned according to the aligner
93 0x004 fragment unmapped
94 0x008 next fragment in the template unmapped
95 0x010 SEQ being reverse complemented
96 0x020 SEQ of the next fragment in the template being reversed
97 0x040 the first fragment in the template
98 0x080 the last fragment in the template
99 0x100 secondary alignment
100 0x200 not passing quality controls
101 0x400 PCR or optical duplicate
102 ****************************/
103 if ( ro1[ 0 ] )
104 dst[ 0 ] |= 0x10;
105
106 if ( sec[ 0 ] )
107 dst[ 0 ] |= 0x100;
108
109 if ( nreads > 1 )
110 {
111 if ( rid[ 0 ] == 1 )
112 dst[ 0 ] |= 0x40;
113
114 if ( rid[ 0 ] == nreads )
115 dst[ 0 ] |= 0x80;
116
117 dst[ 0 ] |= 0x1;
118
119 if( mate_present )
120 {
121 #if PROPER_ALIGNED_MEANS_SAME_CHROMOSOME
122 if ( tlen[ 0 ] != 0 )
123 #endif
124 dst[ 0 ] |= 0x2;
125 if ( ro2 [ 0 ] )
126 dst[ 0 ] |= 0x20;
127 }
128 else
129 {
130 dst[ 0 ] |= 0x8;
131 }
132 }
133
134 if ( flt != NULL )
135 {
136 if ( flt[ 0 ] == SRA_READ_FILTER_REJECT )
137 {
138 dst[ 0 ] |= 0x200;
139 }
140 else if ( flt[ 0 ] == SRA_READ_FILTER_CRITERIA )
141 {
142 dst[ 0 ] |= 0x400;
143 }
144 }
145 return rc;
146 }
147
148
149 static
get_sam_flags_impl_v1(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])150 rc_t CC get_sam_flags_impl_v1( void *data, const VXformInfo * info, int64_t row_id,
151 VRowResult * rslt, uint32_t argc, const VRowData argv [] )
152 {
153 uint32_t nreads = 0;
154 INSDC_coord_len const *read_len = argv[ 0 ].u.data.base;
155 unsigned i;
156
157 assert( argv[ 0 ].u.data.elem_bits == sizeof( read_len[ 0 ] ) * 8 );
158 read_len += argv[ 0 ].u.data.first_elem;
159
160 #if 1
161 for ( i = 0, nreads = 0; i != argv[ 0 ].u.data.elem_count; ++i )
162 {
163 if ( read_len[ i ] != 0 )
164 ++nreads;
165 }
166 #else
167 nreads = argv[ 0 ].u.data.elem_count;
168 #endif
169
170 return get_sam_flags_impl( data, info, row_id, rslt, nreads, argc, argv );
171 }
172
173
174 static
get_sam_flags_impl_v2(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])175 rc_t CC get_sam_flags_impl_v2( void * data,const VXformInfo * info, int64_t row_id,
176 VRowResult * rslt, uint32_t argc, const VRowData argv [] )
177 {
178 uint32_t nreads = 1;
179 int64_t const *mate_id = argv[ 0 ].u.data.base;
180 unsigned i;
181
182 assert( argv[ 0 ].u.data.elem_bits == sizeof( mate_id[ 0 ] ) * 8 );
183 mate_id += argv[ 0 ].u.data.first_elem;
184
185 for ( i = 0; i != argv[ 0 ].u.data.elem_count; ++i )
186 {
187 if ( mate_id[ i ] != 0 )
188 ++nreads;
189 }
190
191 return get_sam_flags_impl( data, info, row_id, rslt, nreads, argc, argv );
192 }
193
194
195 /*
196 * extern function U32 NCBI:align:get_sam_flags #1 (INSDC:coord:len read_len,
197 * INSDC:coord:one read_id,
198 * I32 template_len,
199 * bool strand,
200 * bool mate_strand,
201 * bool is_secondary);
202 *
203 */
204 VTRANSFACT_IMPL ( NCBI_align_get_sam_flags, 1, 0, 0 ) ( const void *self, const VXfactInfo *info, VFuncDesc *rslt,
205 const VFactoryParams *cp, const VFunctionParams *dp )
206 {
207 rslt->variant = vftRow;
208 rslt->u.rf = get_sam_flags_impl_v1;
209 return 0;
210 }
211
212
213 /*
214 * extern function U32 NCBI:align:get_sam_flags #2 (I64 mate_id,
215 * INSDC:coord:one read_id,
216 * I32 template_len,
217 * bool strand,
218 * bool mate_strand,
219 * bool is_secondary);
220 *
221 */
222 VTRANSFACT_IMPL ( NCBI_align_get_sam_flags_2, 2, 0, 0 ) ( const void *self, const VXfactInfo *info, VFuncDesc *rslt,
223 const VFactoryParams *cp, const VFunctionParams *dp )
224 {
225 rslt->variant = vftRow;
226 rslt->u.rf = get_sam_flags_impl_v2;
227 return 0;
228 }
229