1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include <vdb/extern.h>
27 
28 #include <sra/sradb.h>
29 #include <vdb/xform.h>
30 #include <vdb/table.h>
31 #include <klib/data-buffer.h>
32 #include <klib/text.h>
33 #include <klib/rc.h>
34 #include <sysalloc.h>
35 
36 #include <insdc/sra.h>
37 
38 #include <stdlib.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <assert.h>
43 
44 
45 /* "properly aligned" fragments according to the aligner
46    may mean minimally that the fragments are on the same
47    chromosome. */
48 #define PROPER_ALIGNED_MEANS_SAME_CHROMOSOME 1
49 
50 
51 static
get_sam_flags_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t nreads,uint32_t argc,const VRowData argv[])52 rc_t CC get_sam_flags_impl(void *data, const VXformInfo *info,
53                            int64_t row_id, VRowResult *rslt,
54                            uint32_t nreads,
55                            uint32_t argc, const VRowData argv [] )
56 {
57     rc_t 	rc;
58     int32_t	*dst;
59     const INSDC_coord_one  *rid = argv[ 1 ].u.data.base; /* SEQ_READ_ID */
60     const int32_t         *tlen = argv[ 2 ].u.data.base; /* TEMPLATE_LEN */
61     const bool	           *ro1 = argv[ 3 ].u.data.base; /* REF_ORIENTATION */
62     const bool             *ro2 = argv[ 4 ].u.data.base; /* MATE_REF_ORIENTATION */
63     const bool             *sec = argv[ 5 ].u.data.base;
64     const bool             mate_present = ( argv[ 4 ].u.data.elem_count > 0 );
65     const SRAReadFilter    *flt = argc > 6 ? argv[ 6 ].u.data.base : NULL;
66 
67     assert( argv[ 1 ].u.data.elem_count == 1 );
68     assert( argv[ 2 ].u.data.elem_count == 1 );
69     assert( argv[ 3 ].u.data.elem_count == 1 );
70     assert( argv[ 5 ].u.data.elem_count == 1 );
71 
72     rc = KDataBufferResize( rslt->data, 1 );
73     if( rc != 0 )
74         return rc;
75     rslt->elem_count=1;
76     dst = rslt->data->base;
77     dst[ 0 ] = 0;
78     if( nreads == 0 )
79         return 0;
80 
81     rid  += argv[ 1 ].u.data.first_elem;
82     tlen += argv[ 2 ].u.data.first_elem;
83     ro1  += argv[ 3 ].u.data.first_elem;
84     ro2  += argv[ 4 ].u.data.first_elem;
85     sec  += argv[ 5 ].u.data.first_elem;
86     if ( flt != NULL )
87         flt += argv[ 6 ].u.data.first_elem;
88 
89     /***************** SAM FLAGS************
90       Bit  Description
91      0x001 template having multiple fragments in sequencing
92      0x002 each fragment properly aligned according to the aligner
93      0x004 fragment unmapped
94      0x008 next fragment in the template unmapped
95      0x010 SEQ being reverse complemented
96      0x020 SEQ of the next fragment in the template being reversed
97      0x040 the first fragment in the template
98      0x080 the last fragment in the template
99      0x100 secondary alignment
100      0x200 not passing quality controls
101      0x400 PCR or optical duplicate
102     ****************************/
103     if ( ro1[ 0 ] )
104         dst[ 0 ] |= 0x10;
105 
106     if ( sec[ 0 ] )
107         dst[ 0 ] |= 0x100;
108 
109     if ( nreads > 1 )
110     {
111         if ( rid[ 0 ] == 1 )
112             dst[ 0 ] |= 0x40;
113 
114         if ( rid[ 0 ] == nreads )
115             dst[ 0 ] |= 0x80;
116 
117         dst[ 0 ] |= 0x1;
118 
119         if( mate_present )
120         {
121 #if PROPER_ALIGNED_MEANS_SAME_CHROMOSOME
122            if ( tlen[ 0 ] != 0 )
123 #endif
124                 dst[ 0 ] |= 0x2;
125            if ( ro2 [ 0 ] )
126                 dst[ 0 ] |= 0x20;
127         }
128         else
129         {
130             dst[ 0 ] |= 0x8;
131         }
132     }
133 
134     if ( flt != NULL )
135     {
136         if ( flt[ 0 ] == SRA_READ_FILTER_REJECT )
137         {
138             dst[ 0 ] |= 0x200;
139         }
140         else if ( flt[ 0 ] == SRA_READ_FILTER_CRITERIA )
141         {
142             dst[ 0 ] |= 0x400;
143         }
144     }
145     return rc;
146 }
147 
148 
149 static
get_sam_flags_impl_v1(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])150 rc_t CC get_sam_flags_impl_v1(  void  *data, const VXformInfo * info, int64_t row_id,
151                                 VRowResult * rslt, uint32_t argc, const VRowData argv [] )
152 {
153 	uint32_t nreads = 0;
154     INSDC_coord_len const *read_len = argv[ 0 ].u.data.base;
155     unsigned i;
156 
157     assert( argv[ 0 ].u.data.elem_bits == sizeof( read_len[ 0 ] ) * 8 );
158     read_len += argv[ 0 ].u.data.first_elem;
159 
160 #if 1
161     for ( i = 0, nreads = 0; i != argv[ 0 ].u.data.elem_count; ++i )
162     {
163         if ( read_len[ i ] != 0 )
164             ++nreads;
165     }
166 #else
167     nreads = argv[ 0 ].u.data.elem_count;
168 #endif
169 
170     return get_sam_flags_impl( data, info, row_id, rslt, nreads, argc, argv );
171 }
172 
173 
174 static
get_sam_flags_impl_v2(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])175 rc_t CC get_sam_flags_impl_v2( void * data,const VXformInfo * info, int64_t row_id,
176                                VRowResult * rslt, uint32_t argc, const VRowData argv [] )
177 {
178 	uint32_t nreads = 1;
179     int64_t const *mate_id = argv[ 0 ].u.data.base;
180     unsigned i;
181 
182     assert( argv[ 0 ].u.data.elem_bits == sizeof( mate_id[ 0 ] ) * 8 );
183     mate_id += argv[ 0 ].u.data.first_elem;
184 
185     for ( i = 0; i != argv[ 0 ].u.data.elem_count; ++i )
186     {
187         if ( mate_id[ i ] != 0 )
188             ++nreads;
189     }
190 
191     return get_sam_flags_impl( data, info, row_id, rslt, nreads, argc, argv );
192 }
193 
194 
195 /*
196  * extern function U32 NCBI:align:get_sam_flags #1 (INSDC:coord:len read_len,
197  *                                                  INSDC:coord:one read_id,
198  *                                                  I32 template_len,
199  *                                                  bool strand,
200  *                                                  bool mate_strand,
201  *                                                  bool is_secondary);
202  *
203  */
204 VTRANSFACT_IMPL ( NCBI_align_get_sam_flags, 1, 0, 0 ) ( const void *self, const VXfactInfo *info, VFuncDesc *rslt,
205                                                  const VFactoryParams *cp, const VFunctionParams *dp )
206 {
207     rslt->variant = vftRow;
208     rslt->u.rf = get_sam_flags_impl_v1;
209     return 0;
210 }
211 
212 
213 /*
214  * extern function U32 NCBI:align:get_sam_flags #2 (I64 mate_id,
215  *                                                  INSDC:coord:one read_id,
216  *                                                  I32 template_len,
217  *                                                  bool strand,
218  *                                                  bool mate_strand,
219  *                                                  bool is_secondary);
220  *
221  */
222 VTRANSFACT_IMPL ( NCBI_align_get_sam_flags_2, 2, 0, 0 ) ( const void *self, const VXfactInfo *info, VFuncDesc *rslt,
223                                                        const VFactoryParams *cp, const VFunctionParams *dp )
224 {
225     rslt->variant = vftRow;
226     rslt->u.rf = get_sam_flags_impl_v2;
227     return 0;
228 }
229