1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include <vdb/extern.h>
27
28 #include <klib/rc.h>
29 #include <insdc/insdc.h>
30 #include <sra/sradb.h>
31 #include <klib/data-buffer.h>
32 #include <sysalloc.h>
33 #include <assert.h>
34
35 #ifndef UNIT_TEST_FUNCTION
36 #include <vdb/xform.h>
37 #include <vdb/schema.h>
38 #endif
39
40 /*
41 This is a schema-function to synthesize quality values.
42 Its input are the read-len and the spot-filter column.
43 It sums up the values in read-len to determin the length of the produced column
44 It reads the spot-filter values and puts 'good' or 'bad' values into the output
45 The literals for 'good' and 'bad' are not hardcoded but passed into the function
46 from the schema.
47 */
48
49 typedef struct syn_qual_params
50 {
51 INSDC_quality_phred good;
52 INSDC_quality_phred bad;
53 } syn_qual_params;
54
sum_read_len(size_t const count,INSDC_coord_len const * const lengths)55 static INSDC_coord_len sum_read_len(size_t const count, INSDC_coord_len const *const lengths)
56 {
57 INSDC_coord_len result = 0;
58 size_t i;
59
60 for (i = 0; i < count; ++i) {
61 result += lengths[i];
62 }
63 return result;
64 }
65
is_good(size_t const count,INSDC_SRA_spot_filter const * const filters)66 static bool is_good(size_t const count, INSDC_SRA_spot_filter const *const filters)
67 {
68 return (count == 0 || filters[0] == SRA_SPOT_FILTER_PASS) ? true : false;
69 }
70
syn_quality_impl(syn_qual_params const * const params,size_t numreads,INSDC_coord_len const * const lengths,size_t numfilts,INSDC_SRA_spot_filter const * const filters,KDataBuffer * rslt)71 static rc_t syn_quality_impl(syn_qual_params const *const params,
72 size_t numreads, INSDC_coord_len const *const lengths,
73 size_t numfilts, INSDC_SRA_spot_filter const *const filters,
74 KDataBuffer *rslt)
75 {
76 rc_t rc = 0;
77 INSDC_coord_len const total_read_len = sum_read_len(numreads, lengths);
78 INSDC_quality_phred const q = is_good(numfilts, filters) ? params->good : params->bad;
79
80 rslt->elem_bits = 8;
81 rc = KDataBufferResize(rslt, total_read_len);
82 if ( rc == 0 && total_read_len > 0 )
83 memset(rslt->base, q, total_read_len);
84 return rc;
85 }
86
gen_syn_quality(syn_qual_params const * const params,uint8_t * const dst,size_t const total_length,size_t const numreads,INSDC_coord_zero const * const start,INSDC_coord_len const * const length,INSDC_SRA_xread_type const * const type,INSDC_SRA_read_filter const * const filter)87 static void gen_syn_quality( syn_qual_params const *const params
88 , uint8_t *const dst
89 , size_t const total_length
90 , size_t const numreads
91 , INSDC_coord_zero const *const start
92 , INSDC_coord_len const *const length
93 , INSDC_SRA_xread_type const *const type
94 , INSDC_SRA_read_filter const *const filter
95 )
96 {
97 unsigned i;
98
99 memset(dst, params->good, total_length);
100 for (i = 0; i < numreads; ++i) {
101 if ((type[i] & SRA_READ_TYPE_BIOLOGICAL) != SRA_READ_TYPE_BIOLOGICAL)
102 continue;
103 if (filter[i] == SRA_READ_FILTER_PASS)
104 continue;
105 assert(start[i] + length[i] <= total_length);
106 memset(dst + start[i], params->bad, length[i]);
107 }
108 }
109
syn_quality_read_impl(syn_qual_params const * const params,size_t const numreads,INSDC_coord_zero const * const start,INSDC_coord_len const * const length,INSDC_SRA_xread_type const * const type,INSDC_SRA_read_filter const * const filter,KDataBuffer * rslt)110 static rc_t syn_quality_read_impl( syn_qual_params const *const params
111 , size_t const numreads
112 , INSDC_coord_zero const *const start
113 , INSDC_coord_len const *const length
114 , INSDC_SRA_xread_type const *const type
115 , INSDC_SRA_read_filter const *const filter
116 , KDataBuffer *rslt)
117 {
118 rc_t rc = 0;
119 INSDC_coord_len const total_read_len = sum_read_len(numreads, length);
120
121 rslt->elem_bits = 8;
122 rc = KDataBufferResize(rslt, total_read_len);
123 if ( rc == 0 && total_read_len > 0 )
124 gen_syn_quality(params, (uint8_t *)rslt->base, total_read_len, numreads, start, length, type, filter);
125 return rc;
126 }
127
128 #ifndef UNIT_TEST_FUNCTION
129
130 #define SAFE_BASE(ELEM, DTYPE) ((ELEM < argc && sizeof(DTYPE) * 8 == (size_t)argv[ELEM].u.data.elem_bits) ? (((DTYPE const *)argv[ELEM].u.data.base) + argv[ELEM].u.data.first_elem) : ((DTYPE const *)NULL))
131 #define BIND_COLUMN(ELEM, DTYPE, POINTER) DTYPE const *const POINTER = SAFE_BASE(ELEM, DTYPE)
132 #define SAFE_COUNT(ELEM) (ELEM < argc ? argv[ELEM].u.data.elem_count : 0)
133
syn_quality_spot_drvr(void * self,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])134 static rc_t CC syn_quality_spot_drvr ( void * self,
135 const VXformInfo * info,
136 int64_t row_id,
137 VRowResult * rslt,
138 uint32_t argc,
139 const VRowData argv [] )
140 {
141 enum {
142 COL_READ_LEN,
143 COL_SPOT_FILTER,
144 };
145 rc_t rc;
146 assert(argc == 2);
147 rc = syn_quality_impl
148 (self,
149 SAFE_COUNT(COL_READ_LEN), SAFE_BASE(COL_READ_LEN, INSDC_coord_len),
150 SAFE_COUNT(COL_SPOT_FILTER), SAFE_BASE(COL_SPOT_FILTER, INSDC_SRA_spot_filter),
151 rslt->data);
152 rslt->elem_count = rslt->data->elem_count;
153 return rc;
154 }
155
syn_quality_read_drvr(void * self,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])156 static rc_t CC syn_quality_read_drvr ( void * self,
157 const VXformInfo * info,
158 int64_t row_id,
159 VRowResult * rslt,
160 uint32_t argc,
161 const VRowData argv [] )
162 {
163 enum {
164 COL_START,
165 COL_LEN,
166 COL_TYPE,
167 COL_FILTER,
168 };
169 rc_t rc;
170 assert(argc == 4);
171 rc = syn_quality_read_impl(self
172 , SAFE_COUNT(COL_START)
173 , SAFE_BASE(COL_START, INSDC_coord_zero)
174 , SAFE_BASE(COL_LEN, INSDC_coord_len)
175 , SAFE_BASE(COL_TYPE, INSDC_SRA_xread_type)
176 , SAFE_BASE(COL_FILTER, INSDC_SRA_read_filter)
177 , rslt->data
178 );
179 rslt->elem_count = rslt->data->elem_count;
180 return rc;
181 }
182
make_params(syn_qual_params * const params,VFactoryParams const * const fp)183 static void make_params(syn_qual_params *const params, VFactoryParams const *const fp)
184 {
185 params->good = 30;
186 params->bad = 3;
187 if (fp->argc > 0) {
188 assert(fp->argv[0].desc.domain == vtdUint && fp->argv[0].count == 1);
189 params->good = fp->argv[0].data.u8[0];
190
191 if (fp->argc > 1) {
192 assert(fp->argv[1].desc.domain == vtdUint && fp->argv[1].count == 1);
193 params->bad = fp->argv[1].data.u8[0];
194 }
195 }
196 }
197
198 enum syn_quality_flavor {
199 sqf_spot,
200 sqf_read,
201 };
202
NCBI_SRA_syn_quality_factory(VFuncDesc * rslt,const VFactoryParams * cp,const VFunctionParams * dp,enum syn_quality_flavor flavor)203 static rc_t NCBI_SRA_syn_quality_factory(
204 VFuncDesc * rslt,
205 const VFactoryParams * cp,
206 const VFunctionParams * dp,
207 enum syn_quality_flavor flavor )
208 {
209 /* expecting 2 or 4 data arguments and 0, 1, or 2 factory arguments */
210 assert((dp->argc == 2 || dp->argc == 4) && 0 <= cp->argc && cp->argc <= 2);
211
212 rslt->whack = free;
213 rslt->u.rf = (flavor == sqf_read) ? syn_quality_read_drvr : syn_quality_spot_drvr;
214 rslt->variant = vftRow;
215 rslt->self = malloc(sizeof(syn_qual_params));
216 if (rslt->self) {
217 make_params(rslt->self, cp);
218 return 0;
219 }
220 return RC(rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted);
221 }
222
223 /*
224 * function INSDC:quality:phred NCBI:syn_quality #1
225 * < * INSDC:quality:phred good_quality, INSDC:quality:phred bad_quality >
226 * ( INSDC:coord:len read_len, INSDC:SRA:spot_filter spot_filter );
227 */
228 VTRANSFACT_IMPL ( NCBI_SRA_syn_quality, 1, 0, 0 ) ( const void * Self,
229 const VXfactInfo * info,
230 VFuncDesc * rslt,
231 const VFactoryParams * cp,
232 const VFunctionParams * dp )
233 {
234 return NCBI_SRA_syn_quality_factory(rslt, cp, dp, sqf_spot);
235 }
236
237 /*
238 * function INSDC:quality:phred NCBI:SRA:syn_quality #2
239 * < INSDC:quality:phred good_quality, INSDC:quality:phred bad_quality >
240 * ( INSDC:coord:zero read_start, INSDC:coord:len read_len,
241 * INSDC:SRA:xread_type read_type, INSDC:SRA:read_filter read_filter )
242 * = NCBI:SRA:syn_quality_read;
243 */
244 VTRANSFACT_IMPL ( NCBI_SRA_syn_quality_read, 1, 0, 0 ) ( const void * Self,
245 const VXfactInfo * info,
246 VFuncDesc * rslt,
247 const VFactoryParams * cp,
248 const VFunctionParams * dp )
249 {
250 return NCBI_SRA_syn_quality_factory(rslt, cp, dp, sqf_read);
251 }
252
253
254 #else /* ifndef UNIT_TEST_FUNCTION */
255
256 #define ASSERT(X) do { if (!(X)) return -1; } while(0)
257
UnitTest_0Read_NoFilter(void)258 static int UnitTest_0Read_NoFilter(void)
259 {
260 syn_qual_params p; p.good = 30; p.bad = 3;
261 KDataBuffer buffer; memset(&buffer, 0, sizeof(buffer));
262 INSDC_coord_len length[] = { 20, 40 };
263 INSDC_SRA_spot_filter filter[] = { SRA_SPOT_FILTER_PASS, SRA_SPOT_FILTER_REJECT };
264 {
265 rc_t rc = syn_quality_impl(&p, 0, NULL, 0, NULL, &buffer);
266 ASSERT(rc == 0);
267 ASSERT(buffer.elem_bits == 8);
268 ASSERT(buffer.elem_count == 0);
269 (void)(length[0]);
270 (void)(filter[0]);
271 }
272 KDataBufferWhack(&buffer);
273 return 0;
274 }
275
UnitTest_1Read_NoFilter(void)276 static int UnitTest_1Read_NoFilter(void)
277 {
278 syn_qual_params p; p.good = 30; p.bad = 3;
279 KDataBuffer buffer; memset(&buffer, 0, sizeof(buffer));
280 INSDC_coord_len length[] = { 20, 40 };
281 INSDC_SRA_spot_filter filter[] = { SRA_SPOT_FILTER_PASS, SRA_SPOT_FILTER_REJECT };
282 {
283 unsigned i;
284 rc_t rc = syn_quality_impl(&p, 1, length, 0, NULL, &buffer);
285 ASSERT(rc == 0);
286 ASSERT(buffer.elem_bits == 8);
287 ASSERT(buffer.elem_count == length[0]);
288 for (i = 0; i < buffer.elem_count; ++i) {
289 INSDC_quality_phred const qv = ((INSDC_quality_phred *)buffer.base)[i];
290 ASSERT(qv == p.good);
291 }
292 (void)(length[0]);
293 (void)(filter[0]);
294 }
295 KDataBufferWhack(&buffer);
296 return 0;
297 }
298
UnitTest_2Read_NoFilter(void)299 static int UnitTest_2Read_NoFilter(void)
300 {
301 syn_qual_params p; p.good = 30; p.bad = 3;
302 KDataBuffer buffer; memset(&buffer, 0, sizeof(buffer));
303 INSDC_coord_len length[] = { 20, 40 };
304 INSDC_SRA_spot_filter filter[] = { SRA_SPOT_FILTER_PASS, SRA_SPOT_FILTER_REJECT };
305 {
306 unsigned i;
307 rc_t rc = syn_quality_impl(&p, 2, length, 0, NULL, &buffer);
308 ASSERT(rc == 0);
309 ASSERT(buffer.elem_bits == 8);
310 ASSERT(buffer.elem_count == length[0] + length[1]);
311 for (i = 0; i < buffer.elem_count; ++i) {
312 INSDC_quality_phred const qv = ((INSDC_quality_phred *)buffer.base)[i];
313 ASSERT(qv == p.good);
314 }
315 (void)(length[0]);
316 (void)(filter[0]);
317 }
318 KDataBufferWhack(&buffer);
319 return 0;
320 }
321
UnitTest_1Read_Pass(void)322 static int UnitTest_1Read_Pass(void)
323 {
324 syn_qual_params p; p.good = 30; p.bad = 3;
325 KDataBuffer buffer; memset(&buffer, 0, sizeof(buffer));
326 INSDC_coord_len length[] = { 20, 40 };
327 INSDC_SRA_spot_filter filter[] = { SRA_SPOT_FILTER_PASS, SRA_SPOT_FILTER_REJECT };
328 {
329 unsigned i;
330 rc_t rc = syn_quality_impl(&p, 1, length, 1, filter, &buffer);
331 ASSERT(rc == 0);
332 ASSERT(buffer.elem_bits == 8);
333 ASSERT(buffer.elem_count == length[0]);
334 for (i = 0; i < buffer.elem_count; ++i) {
335 INSDC_quality_phred const qv = ((INSDC_quality_phred *)buffer.base)[i];
336 ASSERT(qv == p.good);
337 }
338 (void)(length[0]);
339 (void)(filter[0]);
340 }
341 KDataBufferWhack(&buffer);
342 return 0;
343 }
344
UnitTest_1Read_Fail(void)345 static int UnitTest_1Read_Fail(void)
346 {
347 syn_qual_params p; p.good = 30; p.bad = 3;
348 KDataBuffer buffer; memset(&buffer, 0, sizeof(buffer));
349 INSDC_coord_len length[] = { 20, 40 };
350 INSDC_SRA_spot_filter filter[] = { SRA_SPOT_FILTER_PASS, SRA_SPOT_FILTER_REJECT };
351 {
352 unsigned i;
353 rc_t rc = syn_quality_impl(&p, 1, length, 1, filter + 1, &buffer);
354 ASSERT(rc == 0);
355 ASSERT(buffer.elem_bits == 8);
356 ASSERT(buffer.elem_count == length[0]);
357 for (i = 0; i < buffer.elem_count; ++i) {
358 INSDC_quality_phred const qv = ((INSDC_quality_phred *)buffer.base)[i];
359 ASSERT(qv == p.bad);
360 }
361 (void)(length[0]);
362 (void)(filter[0]);
363 }
364 KDataBufferWhack(&buffer);
365 return 0;
366 }
367
UnitTest_2Read_Pass(void)368 static int UnitTest_2Read_Pass(void)
369 {
370 syn_qual_params p; p.good = 30; p.bad = 3;
371 KDataBuffer buffer; memset(&buffer, 0, sizeof(buffer));
372 INSDC_coord_len length[] = { 20, 40 };
373 INSDC_SRA_spot_filter filter[] = { SRA_SPOT_FILTER_PASS, SRA_SPOT_FILTER_REJECT };
374 {
375 unsigned i;
376 rc_t rc = syn_quality_impl(&p, 2, length, 1, filter, &buffer);
377 ASSERT(rc == 0);
378 ASSERT(buffer.elem_bits == 8);
379 ASSERT(buffer.elem_count == length[0] + length[1]);
380 for (i = 0; i < buffer.elem_count; ++i) {
381 INSDC_quality_phred const qv = ((INSDC_quality_phred *)buffer.base)[i];
382 ASSERT(qv == p.good);
383 }
384 (void)(length[0]);
385 (void)(filter[0]);
386 }
387 KDataBufferWhack(&buffer);
388 return 0;
389 }
390
UnitTest_2Read_Fail(void)391 static int UnitTest_2Read_Fail(void)
392 {
393 syn_qual_params p; p.good = 30; p.bad = 3;
394 KDataBuffer buffer; memset(&buffer, 0, sizeof(buffer));
395 INSDC_coord_len length[] = { 20, 40 };
396 INSDC_SRA_spot_filter filter[] = { SRA_SPOT_FILTER_PASS, SRA_SPOT_FILTER_REJECT };
397 {
398 unsigned i;
399 rc_t rc = syn_quality_impl(&p, 2, length, 1, filter + 1, &buffer);
400 ASSERT(rc == 0);
401 ASSERT(buffer.elem_bits == 8);
402 ASSERT(buffer.elem_count == length[0] + length[1]);
403 for (i = 0; i < buffer.elem_count; ++i) {
404 INSDC_quality_phred const qv = ((INSDC_quality_phred *)buffer.base)[i];
405 ASSERT(qv == p.bad);
406 }
407 (void)(length[0]);
408 (void)(filter[0]);
409 }
410 KDataBufferWhack(&buffer);
411 return 0;
412 }
413 #endif
414