1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 #include <vdb/extern.h>
27
28 #include <klib/defs.h>
29 #include <klib/rc.h>
30 #include <vdb/table.h>
31 #include <vdb/xform.h>
32 #include <vdb/schema.h>
33 #include <kdb/meta.h>
34 #include <klib/data-buffer.h>
35 #include <bitstr.h>
36 #include <sysalloc.h>
37 #include <klib/printf.h>
38
39 #include <klib/out.h>
40
41 #include <align/align.h>
42
43 #include <stdint.h>
44 #include <stdlib.h>
45 #include <assert.h>
46 #include <string.h>
47 #include <stdio.h>
48 #include <ctype.h>
49 #include <os-native.h>
50
51 #include <insdc/insdc.h>
52
53 #define ARG_BASE(TYPE, N) (((TYPE const *)argv[(N)].u.data.base) + argv[(N)].u.data.first_elem)
54 #define ARG_ALIAS(TYPE, NAME, N) TYPE const *const NAME = ARG_BASE(TYPE, N)
55 #define ARG_ALIAS_COND(TYPE, NAME, N, ALT) TYPE const *const NAME = ((argc > (N)) ? ARG_BASE(TYPE, N) : ALT)
56
57 typedef struct {
58 int version;
59 } self_t;
60
61 static
op2b(KDataBuffer * dst,unsigned const offset,unsigned * const count,int const opcode,unsigned oplen)62 rc_t op2b( KDataBuffer *dst, unsigned const offset, unsigned *const count, int const opcode, unsigned oplen )
63 {
64 unsigned digits = 1;
65 unsigned scale = 10;
66
67 if ( oplen == 0 )
68 {
69 *count = 0;
70 return 0;
71 }
72
73 while ( scale < oplen )
74 {
75 scale *= 10;
76 ++digits;
77 }
78
79 if ( scale == oplen ) /* oplen is whole power of 10 */
80 ++digits;
81
82 *count = digits + 1;
83
84 if ( dst != NULL )
85 {
86 unsigned const need = offset + digits + 1;
87
88 if ( need > dst->elem_count )
89 {
90 rc_t rc = KDataBufferResize( dst, need );
91 if ( rc != 0 ) return rc;
92 }
93
94 {
95 char *const base = &( ( char * )dst->base )[ offset ];
96
97 base[ digits ] = opcode;
98 do
99 {
100 unsigned const digit = oplen % 10;
101
102 oplen /= 10;
103 base[ --digits ] = digit + '0';
104 } while ( digits );
105 }
106 }
107 return 0;
108 }
109
110
111 static
cigar_string(KDataBuffer * dst,size_t boff,INSDC_coord_len * const bsize,bool const full,bool const has_mismatch[],bool const has_ref_offset[],INSDC_coord_zero const read_start,INSDC_coord_zero const read_end,int32_t const ref_offset[],unsigned const ro_len,unsigned * ro_offset)112 rc_t cigar_string(KDataBuffer *dst, size_t boff,
113 INSDC_coord_len *const bsize, bool const full,
114 bool const has_mismatch[], bool const has_ref_offset[],
115 INSDC_coord_zero const read_start, INSDC_coord_zero const read_end,
116 int32_t const ref_offset[], unsigned const ro_len, unsigned *ro_offset)
117 {
118 size_t bsz = 0;
119 unsigned nwrit;
120 uint32_t i, m, mm;
121 rc_t rc;
122 unsigned cur_off = ro_offset ? *ro_offset : 0;
123
124 #define BUF_WRITE(OP, LEN) if ((rc = op2b(dst, (unsigned const)(bsz + boff), &nwrit, (int const)(OP), (unsigned)(LEN))) != 0) return rc; bsz += nwrit;
125
126 #define MACRO_FLUSH_MATCH { BUF_WRITE('=', m); m = 0; }
127 #define MACRO_FLUSH_MISMATCH { BUF_WRITE(i == read_end ? 'S' : 'X', mm); mm = 0; }
128
129 #define MACRO_FLUSH_BOTH \
130 if(m+mm > 0) { \
131 if(i==read_end && has_ref_offset[i]) { \
132 BUF_WRITE('M', m); BUF_WRITE('S', mm); \
133 } else { \
134 BUF_WRITE('M', m + mm) \
135 } \
136 m=mm=0; \
137 }
138
139 #define MACRO_FLUSH \
140 if(full){ \
141 MACRO_FLUSH_MATCH; \
142 MACRO_FLUSH_MISMATCH; \
143 } else { \
144 MACRO_FLUSH_BOTH; \
145 }
146
147 for( i = read_start, bsz = m = mm = 0; i < (uint32_t)read_end; i++ )
148 {
149 if( has_ref_offset[ i ] ) /*** No offset in the reference **/
150 {
151 int32_t offset;
152
153 if ( cur_off >= ro_len ) /*** bad data ***/
154 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
155
156 offset = ref_offset[ cur_off++ ];
157
158 if( offset > 0 ) /*** insert in the reference, delete in sequence ***/
159 {
160 if ( i == 0 ) /**** deletes in the beginning are disallowed, REF_START should have been moved and delete converted to insert **/
161 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
162 MACRO_FLUSH;
163 BUF_WRITE( 'D', offset );
164 }
165 else if ( offset < 0 ) /**** delete from the reference ***/
166 {
167 offset = -offset;
168 if ( i + offset > (uint32_t)read_end )
169 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
170 if ( i > 0 ) /** normally indels are before the current base ***/
171 {
172 MACRO_FLUSH;
173 BUF_WRITE( 'I', offset );
174 }
175 else
176 { /*** this is a soft clip at the beginning ***/
177 BUF_WRITE( 'S', offset );
178 }
179 i += offset;
180 }
181 else
182 { /*** Not possible ??? ***/
183 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
184 }
185 }
186
187 if ( i < (uint32_t)read_end )
188 {
189 if ( has_mismatch[ i ] )
190 {
191 if ( full )
192 {
193 MACRO_FLUSH_MATCH;
194 }
195 mm++;
196 }
197 else
198 {
199 if ( full )
200 {
201 MACRO_FLUSH_MISMATCH;
202 }
203 else
204 {
205 m += mm;
206 mm = 0;
207 }
208 m++;
209 }
210 }
211 }
212 MACRO_FLUSH;
213 *bsize = (INSDC_coord_len)bsz;
214 if ( ro_offset )
215 *ro_offset = cur_off;
216 return 0;
217
218 #undef BUF_WRITE
219 #undef MACRO_FLUSH_MATCH
220 #undef MACRO_FLUSH_MISMATCH
221 #undef MACRO_FLUSH_BOTH
222 #undef MACRO_FLUSH
223 }
224
225 typedef struct {
226 int opcode_M, opcode_X, opcode_S;
227 } cigar_opcode_options_t;
228
229 static
cigar_string_2_0(KDataBuffer * dst,size_t boff,INSDC_coord_len * const bsize,bool const has_mismatch[],bool const has_ref_offset[],INSDC_coord_zero const read_start,INSDC_coord_zero const read_end,int32_t const ref_offset[],unsigned const ro_len,unsigned ro_offset[],unsigned const reflen,cigar_opcode_options_t const * ops)230 rc_t cigar_string_2_0(KDataBuffer *dst,
231 size_t boff,
232 INSDC_coord_len *const bsize,
233 bool const has_mismatch[],
234 bool const has_ref_offset[],
235 INSDC_coord_zero const read_start,
236 INSDC_coord_zero const read_end,
237 int32_t const ref_offset[],
238 unsigned const ro_len,
239 unsigned ro_offset[],
240 unsigned const reflen,
241 cigar_opcode_options_t const *ops)
242 {
243 int ri;
244 unsigned si;
245 unsigned di;
246 rc_t rc;
247 unsigned nwrit;
248 unsigned cur_off = ro_offset ? *ro_offset : 0;
249 unsigned op_len;
250 int opcode;
251
252 #define BUF_WRITE(OP, LEN) { if ((rc = op2b(dst, (unsigned const)(di + boff), &nwrit, (int const)(OP), (unsigned)(LEN))) != 0) return rc; di += nwrit; }
253 assert ( read_start >= 0 );
254 si = read_start;
255 if ( /* !use_S && */ read_start == read_end && reflen > 0 ) /** full delete as a last ploidy ends up written nowhere **/
256 {
257 di=0;
258 opcode = 'D';
259 op_len = reflen;
260 }
261 else for ( op_len = di = 0, opcode = ri = 0; si < ( unsigned )read_end && ri <= ( int )reflen; )
262 {
263 if ( has_ref_offset[ si ] )
264 {
265 int offs;
266
267 if ( op_len > 0 )
268 {
269 BUF_WRITE( opcode, op_len );
270 op_len = 0;
271 }
272 if ( cur_off >= ro_len ) /*** bad data ***/
273 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
274
275 offs = ref_offset[ cur_off++ ];
276 if ( offs < 0 )
277 {
278 unsigned j;
279 for ( j = 1; j < (unsigned)-offs && ( si + j ) < ( unsigned )read_end; )
280 {
281 if ( has_ref_offset[ si + j ] ) /*** structured insert **/
282 {
283 BUF_WRITE( si ? 'I' : ops->opcode_S, j );
284 offs += j;
285 si += j;
286 j = 1;
287 }
288 else
289 {
290 j++;
291 }
292 }
293 if ( offs < 0 )
294 {
295 BUF_WRITE( si ? 'I' : ops->opcode_S, -offs );
296 si -= offs;
297 }
298 continue;
299 }
300 else if ( offs > 0 )
301 {
302 BUF_WRITE( 'D', offs );
303 ri += offs;
304 }
305 else
306 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
307 }
308
309 if ( ri < ( int )reflen )
310 {
311 int const op_nxt = ( has_mismatch[ si ] ? ops->opcode_X : ops->opcode_M );
312
313 if ( op_len == 0 || opcode == op_nxt )
314 ++op_len;
315 else
316 {
317 BUF_WRITE( opcode, op_len );
318 op_len = 1;
319 }
320 opcode = op_nxt;
321 }
322 else
323 break;
324 ++si;
325 ++ri;
326 }
327
328 BUF_WRITE( opcode, op_len );
329 if ( si < (unsigned)read_end )
330 {
331 if ( cur_off + 1 < ro_len )
332 {
333 assert( read_end + ref_offset[ cur_off ] == si );
334 cur_off++;
335 BUF_WRITE( 'I', read_end - si );
336 }
337 else
338 {
339 BUF_WRITE( ops->opcode_S, read_end - si );
340 }
341 }
342 *bsize = di;
343 if ( ro_offset != NULL )
344 *ro_offset = cur_off;
345
346 return 0;
347 #undef BUF_WRITE
348 }
349
350 static
cigar_string_2(KDataBuffer * dst,size_t boff,INSDC_coord_len * bsize,const int version,bool const has_mismatch[],bool const has_ref_offset[],INSDC_coord_zero const read_start,INSDC_coord_zero const read_end,int32_t const ref_offset[],unsigned const ro_len,unsigned * ro_offset,unsigned const reflen,bool use_S)351 rc_t cigar_string_2(KDataBuffer *dst, size_t boff,
352 INSDC_coord_len *bsize, const int version,
353 bool const has_mismatch[], bool const has_ref_offset[],
354 INSDC_coord_zero const read_start, INSDC_coord_zero const read_end,
355 int32_t const ref_offset[], unsigned const ro_len, unsigned * ro_offset,
356 unsigned const reflen, bool use_S)
357 {
358 cigar_opcode_options_t const ops = {
359 version == 1 ? '=' : 'M',
360 version == 1 ? 'X' : 'M',
361 use_S ? 'S' : 'I'
362 };
363 return cigar_string_2_0(dst, boff, bsize, has_mismatch, has_ref_offset, read_start, read_end, ref_offset, ro_len, ro_offset, reflen, &ops);
364 }
365
366 static
cigar_string_2_1(KDataBuffer * dst,size_t boff,INSDC_coord_len * bsize,const int version,bool const has_mismatch[],bool const has_ref_offset[],INSDC_coord_zero const read_start,INSDC_coord_zero const read_end,int32_t const ref_offset[],unsigned const ro_len,unsigned ro_offset[],uint8_t const ref_offset_type[],unsigned const reflen)367 rc_t cigar_string_2_1(KDataBuffer *dst, size_t boff,
368 INSDC_coord_len *bsize, const int version,
369 bool const has_mismatch[], bool const has_ref_offset[],
370 INSDC_coord_zero const read_start, INSDC_coord_zero const read_end,
371 int32_t const ref_offset[], unsigned const ro_len, unsigned ro_offset[],
372 uint8_t const ref_offset_type[],
373 unsigned const reflen)
374 {
375 int ri;
376 unsigned si = read_start;
377 unsigned di;
378 rc_t rc;
379 unsigned nwrit;
380 unsigned cur_off = ro_offset ? *ro_offset : 0;
381 unsigned op_len;
382 int opcode;
383 int const opM = (version & 1) ? '=' : 'M';
384 int const opX = (version & 1) ? 'X' : 'M';
385
386 #define BUF_WRITE(OP, LEN) { if ((rc = op2b(dst, (unsigned const)(di + boff), &nwrit, (int const)(OP), (unsigned)(LEN))) != 0) return rc; di += nwrit; }
387 if (read_start == read_end && reflen > 0) {
388 /** full delete as a last ploidy ends up written nowhere **/
389 di = 0;
390 opcode = 'D';
391 op_len = reflen;
392 }
393 else for (op_len = di = 0, opcode = ri = 0; si < (unsigned)read_end && ri <= (int)reflen; ) {
394 if (has_ref_offset[si]) {
395 int offs;
396 int type;
397
398 if (op_len > 0) {
399 BUF_WRITE(opcode, op_len);
400 op_len = 0;
401 }
402 if (cur_off >= ro_len) /*** bad data ***/
403 return RC(rcXF, rcFunction, rcExecuting, rcData, rcInvalid);
404
405 type = ref_offset_type[cur_off];
406 offs = ref_offset[cur_off];
407 ++cur_off;
408
409 if (offs < 0) {
410 unsigned const ins = -offs;
411 if( (version & 1) /*CIGAR_LONG*/ && type == 5 /** complete genomics **/ && ri >= ins /** safety **/){
412 BUF_WRITE('B', ins);
413 ri -= ins;
414 } else {
415 int const opc = (type == 1) ? 'S' : 'I';
416 BUF_WRITE(opc, ins);
417 si += ins;
418 continue;
419 }
420 } else if (offs > 0) {
421 int const opc = type == 0 ? 'D' : 'N';
422
423 BUF_WRITE(opc, offs);
424 ri += offs;
425 }
426 }
427 if (ri < (int)reflen) {
428 int const op_nxt = (has_mismatch[si] ? opX : opM);
429
430 if (op_len == 0 || opcode == op_nxt)
431 ++op_len;
432 else {
433 BUF_WRITE(opcode, op_len);
434 op_len = 1;
435 }
436 opcode = op_nxt;
437 }
438 else
439 break;
440 ++si;
441 ++ri;
442 }
443
444 BUF_WRITE(opcode, op_len);
445 *bsize = di;
446 if (ro_offset != NULL)
447 *ro_offset = cur_off;
448
449 return 0;
450 #undef BUF_WRITE
451 }
452
right_soft_clip(unsigned seq_len,unsigned ref_len,unsigned noffsets,int32_t const ref_offset[])453 static INSDC_coord_len right_soft_clip(unsigned seq_len, unsigned ref_len,
454 unsigned noffsets,
455 int32_t const ref_offset[])
456 {
457 INSDC_coord_len a = ref_len;
458 unsigned i;
459
460 for ( i = 0; i < noffsets; ++i )
461 a -= ref_offset[ i ];
462 return a < seq_len ? seq_len - a : 0;
463 }
464
465 static
cigar_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])466 rc_t CC cigar_impl ( void *data, const VXformInfo *info, int64_t row_id,
467 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
468 {
469 self_t const *self = data;
470 unsigned const rdln = ( unsigned const )argv[ 0 ].u.data.elem_count;
471 unsigned const ro_len = ( unsigned const )argv[ 2 ].u.data.elem_count;
472 bool const *has_mismatch = argv[ 0 ].u.data.base;
473 bool const *has_ref_offset = argv[ 1 ].u.data.base;
474 int32_t const *ref_offset = argv[ 2 ].u.data.base;
475 rc_t rc;
476
477 assert( argv[ 0 ].u.data.elem_bits == 8 );
478 assert( argv[ 1 ].u.data.elem_bits == 8 );
479 assert( argv[ 2 ].u.data.elem_bits == 32 );
480
481 assert( rdln == argv[ 1 ].u.data.elem_count );
482
483 has_mismatch += argv[ 0 ].u.data.first_elem;
484 has_ref_offset += argv[ 1 ].u.data.first_elem;
485 ref_offset += argv[ 2 ].u.data.first_elem;
486
487 rslt->data->elem_bits = 8;
488 if ( argc == 3 )
489 {
490 INSDC_coord_len count;
491
492 rc = cigar_string( rslt->data, 0, &count, self->version & 0x1,
493 has_mismatch, has_ref_offset,
494 0, rdln, ref_offset, ro_len, NULL);
495 rslt->elem_count = count;
496 }
497 else
498 {
499 int32_t const *const rfln = argv[ 3 ].u.data.base;
500 INSDC_coord_len count;
501
502 rc = cigar_string_2( rslt->data, 0, &count, self->version & 0x1,
503 has_mismatch, has_ref_offset,
504 0, rdln, ref_offset, ro_len, NULL,
505 rfln[ argv[ 3 ].u.data.first_elem ], true );
506 rslt->elem_count = count;
507 }
508 return rc;
509 }
510
511 static
cigar_impl_2(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])512 rc_t CC cigar_impl_2 ( void *data, const VXformInfo *info, int64_t row_id,
513 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
514 {
515 self_t const *self = data;
516 bool const *has_mismatch = argv[0].u.data.base;
517 bool const *has_ref_offset = argv[1].u.data.base;
518 int32_t const *ref_offset = argv[2].u.data.base;
519 INSDC_coord_len const *read_len = argv[3].u.data.base;
520 uint8_t const *const ref_offset_type = argc >= 6 ? ((uint8_t const *)argv[5].u.data.base) + argv[5].u.data.first_elem : NULL;
521 uint32_t const nreads = (uint32_t const)argv[ 3 ].u.data.elem_count;
522 uint32_t const ro_len = (uint32_t const)argv[ 2 ].u.data.elem_count;
523 uint32_t n;
524 uint32_t ro_offset = 0;
525 rc_t rc = 0;
526 INSDC_coord_zero start;
527 INSDC_coord_len *cigar_len = NULL;
528 KDataBuffer *buf = ( self->version & 0x04 ) ? NULL : rslt->data;
529
530 assert( argv[ 0 ].u.data.elem_bits == 8 );
531 assert( argv[ 1 ].u.data.elem_bits == 8 );
532 assert( argv[ 2 ].u.data.elem_bits == 32 );
533 assert( argv[ 3 ].u.data.elem_bits == 32 );
534
535 has_mismatch += argv[ 0 ].u.data.first_elem;
536 has_ref_offset += argv[ 1 ].u.data.first_elem;
537 ref_offset += argv[ 2 ].u.data.first_elem;
538 read_len += argv[ 3 ].u.data.first_elem;
539
540 if (self->version & 0x4) {
541 rslt->data->elem_bits = sizeof(cigar_len[0]) * 8;
542 rslt->elem_count = nreads;
543
544 rc = KDataBufferResize(rslt->data, rslt->elem_count);
545 if (rc != 0)
546 return rc;
547
548 cigar_len = rslt->data->base;
549 if (argv[0].u.data.elem_count == 0 || argv[1].u.data.elem_count == 0) {
550 memset(cigar_len, 0, sizeof(cigar_len[0]) * nreads);
551 return 0;
552 }
553 }
554 else {
555 rslt->data->elem_bits = 8;
556 rslt->elem_count = 0;
557 }
558
559 for (n = 0, start = 0, ro_offset = 0; n < nreads; start += read_len[n], ++n) {
560 INSDC_coord_len cnt;
561 INSDC_coord_len *const count = (self->version & 0x04) ? cigar_len + n : &cnt;
562
563 if (argc == 4) {
564 rc = cigar_string(buf, (size_t)rslt->elem_count, count, self->version & 0x1,
565 has_mismatch, has_ref_offset,
566 start, start + read_len[n],
567 ref_offset, ro_len, &ro_offset);
568 }
569 else if (argc == 5) {
570 int32_t const *const reflen = argv[4].u.data.base;
571
572 rc = cigar_string_2(buf, (size_t)rslt->elem_count, count, self->version & 0x1,
573 has_mismatch, has_ref_offset,
574 start, start + read_len[n],
575 ref_offset, ro_len, &ro_offset,
576 reflen[argv[4].u.data.first_elem], nreads == 1 ? true : false);
577 }
578 else { /* should be new function */
579 int32_t const *const reflen = argv[4].u.data.base;
580
581 rc = cigar_string_2_1(buf, (size_t)rslt->elem_count, count, self->version & 0x1,
582 has_mismatch, has_ref_offset,
583 start, start + read_len[n],
584 ref_offset, ro_len, &ro_offset,
585 ref_offset_type,
586 reflen[argv[4].u.data.first_elem]);
587 }
588 if (rc != 0)
589 return rc;
590 if ((self->version & 0x04) == 0)
591 rslt->elem_count += cnt;
592 }
593 return 0;
594 }
595
596 static
self_whack(void * ptr)597 void CC self_whack( void *ptr )
598 {
599 free( ptr );
600 }
601
602
603 /*
604 * function
605 * ascii ALIGN:cigar #1 ( bool has_mismatch, bool has_ref_offset, I32 ref_offset);
606 */
607 VTRANSFACT_IMPL ( ALIGN_cigar, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
608 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
609 {
610 self_t self;
611
612 self.version =cp -> argv [ 0 ] . data . u8 [ 0 ];
613 switch(self.version){
614 case 0:
615 case 1:
616 break;
617 default:
618 return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
619 }
620 rslt->u.rf = cigar_impl;
621 rslt->variant = vftRow;
622 rslt -> self = malloc ( sizeof self );
623 memmove(rslt -> self,&self,sizeof(self));
624 rslt -> whack = self_whack;
625
626 return 0;
627 }
628
629 VTRANSFACT_IMPL ( ALIGN_cigar_2, 2, 0, 0 ) ( const void *Self, const VXfactInfo *info,
630 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
631 {
632 self_t self;
633 VTypedesc const return_type = info->fdesc.desc;
634 int const version = cp->argv[0].data.u8[0];
635
636 self.version = version;
637 switch (version) {
638 case 0:
639 case 1:
640 break;
641 default:
642 return RC( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
643 }
644
645 if (return_type.domain == vtdAscii && return_type.intrinsic_bits == 8)
646 {
647 self.version |= 0x2;
648 }
649 else if (return_type.domain == vtdUint && return_type.intrinsic_bits == sizeof(INSDC_coord_len) * 8)
650 {
651 self.version |= 0x4;
652 }
653 else
654 {
655 return RC( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
656 }
657
658 rslt->u.rf = cigar_impl_2;
659 rslt->variant = vftRow;
660 rslt->self = malloc(sizeof(self));
661 if (rslt->self == NULL)
662 return RC( rcXF, rcFunction, rcConstructing, rcMemory, rcExhausted );
663
664 memmove(rslt->self, &self, sizeof(self));
665 rslt->whack = self_whack;
666 return 0;
667 }
668
669 static
edit_distance_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])670 rc_t CC edit_distance_impl ( void *data, const VXformInfo *info, int64_t row_id,
671 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
672 {
673 rc_t rc;
674 uint32_t i, roi, mrun;
675 uint32_t len = ( uint32_t ) argv[ 0 ].u.data.elem_count;
676 uint32_t *dst;
677
678 uint8_t const *has_mismatch = argv [ 0 ] . u . data . base;
679 uint8_t const *has_ref_offset = argv [ 1 ] . u . data . base;
680 int32_t const *ref_offset = argv [ 2 ] . u . data . base;
681
682 assert( argv[ 0 ].u.data.elem_bits == 8 );
683 assert( argv[ 1 ].u.data.elem_bits == 8 );
684 assert( argv[ 2 ].u.data.elem_bits == 32 );
685
686 assert( len == argv[ 1 ].u.data.elem_count );
687
688 has_mismatch += argv [ 0 ] . u . data . first_elem;
689 has_ref_offset += argv [ 1 ] . u . data . first_elem;
690 ref_offset += argv [ 2 ] . u . data . first_elem;
691
692 /* resize output row for the total number of reads */
693 rslt->data->elem_bits = rslt->elem_bits;
694 rc = KDataBufferResize( rslt -> data, 1 );
695 if ( rc != 0 )
696 return rc;
697
698 rslt -> elem_count = 1;
699 dst = rslt -> data -> base;
700 dst[ 0 ] = 0;
701 if( len == 0 )
702 return 0; /** nothing to do **/
703
704 if ( has_ref_offset[ 0 ] ) /** skip mismatches from the beginning == soft clip ***/
705 {
706 if ( ref_offset[ 0 ] > 0 ) /**** deletes in the beginning are disallowed, REF_START should have been moved and delete converted to insert **/
707 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
708
709 i = -( ref_offset [ 0 ] );
710 roi = 1;
711 mrun = 0;
712 }
713 else
714 {
715 i = roi = 0;
716 }
717
718 for ( mrun = 0; i < len; i++ )
719 {
720 if ( has_mismatch[ i ] )
721 {
722 mrun++;
723 }
724 else /*** intentionally skipping last run of mismatches **/
725 {
726 dst[ 0 ] += mrun;
727 mrun = 0;
728 }
729 }
730 return 0;
731 }
732
733 /*
734 * function
735 * ascii NCBI:align:edit_distance #1 ( bool has_mismatch, bool has_ref_offset, I32 ref_offset);
736 */
737 VTRANSFACT_IMPL ( NCBI_align_edit_distance, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
738 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
739 {
740 rslt->u.rf = edit_distance_impl;
741 rslt->variant = vftRow;
742 return 0;
743 }
744
745
746 /*
747 * edit distance = sum of lengths of inserts
748 * + sum of lengths of deletes
749 * + number of mismatches
750 * excluding soft clips
751 */
752
edit_distance(bool const has_ref_offset[],bool const has_mismatch[],unsigned const readlen,unsigned const reflen,int32_t const ref_offset[],unsigned const offsets)753 unsigned edit_distance(bool const has_ref_offset[],
754 bool const has_mismatch[],
755 unsigned const readlen,
756 unsigned const reflen,
757 int32_t const ref_offset[],
758 unsigned const offsets)
759 {
760 if ( readlen == 0 )
761 {
762 /* full delete */
763 return reflen;
764 }
765 else
766 {
767 INSDC_coord_len const rsc = right_soft_clip( readlen, reflen, offsets, ref_offset );
768 unsigned indels = 0;
769 unsigned misses = 0;
770 unsigned i = 0;
771 unsigned j = 0;
772
773 if ( has_ref_offset[ 0 ] && ref_offset[ j ] < 0 )
774 j = i = 1;
775
776 /* sum of insert lengths + sum of delete lengths excluding soft clips */
777 for ( ; i < readlen - rsc; ++i )
778 {
779 if ( has_ref_offset[ i ] )
780 {
781 int const offset = ref_offset[ j++ ];
782
783 if ( offset < 0 )
784 indels += -offset;
785 else
786 indels += offset;
787 }
788 }
789
790 /* sum of mismatches not in inserts or soft clips */
791 for ( j = i = 0; i < readlen - rsc; )
792 {
793 if ( has_ref_offset[ i ] )
794 {
795 int offset = ref_offset[ j++ ];
796
797 if ( offset < 0 )
798 {
799 i += -offset;
800 continue;
801 }
802 }
803 misses += has_mismatch[ i ] ? 1 : 0;
804 ++i;
805 }
806 return indels + misses;
807 }
808 }
809
810
811 static
edit_distance_2_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])812 rc_t CC edit_distance_2_impl ( void *data, const VXformInfo *info, int64_t row_id,
813 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
814 {
815 rc_t rc;
816 unsigned const nreads = argc > 4 ? ( unsigned const )argv[ 4 ].u.data.elem_count : 1;
817 unsigned const len = ( unsigned const )argv[ 0 ].u.data.elem_count;
818 unsigned const noffsets = ( unsigned const )argv[ 2 ].u.data.elem_count;
819 INSDC_coord_len const dummy_rl = len;
820
821 ARG_ALIAS ( bool , has_mismatch , 0 );
822 ARG_ALIAS ( bool , has_ref_offset, 1 );
823 ARG_ALIAS ( int32_t , ref_offset , 2 );
824 ARG_ALIAS ( INSDC_coord_len, ref_len , 3 );
825 ARG_ALIAS_COND( INSDC_coord_len, readlen , 4, &dummy_rl );
826
827 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_mismatch [ 0 ] ) * 8 );
828 assert( argv[ 1 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
829 assert( argv[ 2 ].u.data.elem_bits == sizeof( ref_offset [ 0 ] ) * 8 );
830 assert( argv[ 3 ].u.data.elem_bits == sizeof( ref_len [ 0 ] ) * 8 );
831
832 rslt->data->elem_bits = rslt->elem_bits;
833 if ( len == 0 )
834 {
835 return KDataBufferResize( rslt->data, rslt->elem_count = 0 );
836 }
837
838 assert( len == argv[ 1 ].u.data.elem_count );
839
840 rslt->elem_count = nreads;
841 rc = KDataBufferResize( rslt->data, nreads );
842 if ( rc == 0 )
843 {
844 unsigned i;
845 unsigned start = 0;
846 unsigned offset = 0;
847 uint32_t *const dst = rslt->data->base;
848
849 for ( i = 0; i < nreads; ++i )
850 {
851 unsigned const rlen = readlen[ i ];
852 unsigned j;
853 unsigned offsets = 0;
854
855 for ( j = 0; j < rlen; ++j )
856 {
857 if ( has_ref_offset[ start + j ] )
858 ++offsets;
859 }
860
861 if ( offsets + offset > noffsets )
862 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
863
864 dst[ i ] = edit_distance( has_ref_offset + start,
865 has_mismatch + start,
866 rlen,
867 ref_len[ 0 ],
868 ref_offset + offset,
869 offsets );
870 start += rlen;
871 offset += offsets;
872 }
873 }
874 return rc;
875 }
876
877
878 /*
879 * function
880 * U32 NCBI:align:edit_distance #2 ( bool has_mismatch, bool has_ref_offset,
881 * I32 ref_offset, INSDC:coord:len ref_len );
882 */
883 VTRANSFACT_IMPL ( NCBI_align_edit_distance_2, 2, 0, 0 ) ( const void *Self, const VXfactInfo *info,
884 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
885 {
886 rslt->u.rf = edit_distance_2_impl;
887 rslt->variant = vftRow;
888 return 0;
889 }
890
891
892 /*
893 * function
894 * U32 NCBI:align:edit_distance #3 ( bool has_mismatch, bool has_ref_offset,
895 * I32 ref_offset, NCBI:align:ro_type ref_offset_type, INSDC:coord:len read_len );
896 */
897 static
edit_distance_3_impl(void * const data,VXformInfo const info[],int64_t const row_id,VRowResult rslt[],uint32_t const argc,VRowData const argv[])898 rc_t CC edit_distance_3_impl(void *const data, VXformInfo const info[],
899 int64_t const row_id,
900 VRowResult rslt[],
901 uint32_t const argc,
902 VRowData const argv[/* argc */])
903 {
904 rc_t rc;
905 unsigned const nreads = (unsigned const)argv[4].u.data.elem_count;
906 ARG_ALIAS( bool, has_mismatch , 0);
907 ARG_ALIAS( bool, has_ref_offset , 1);
908 ARG_ALIAS( int32_t, ref_offset , 2);
909 ARG_ALIAS(NCBI_align_ro_type, ref_offset_type, 3);
910 ARG_ALIAS( INSDC_coord_len, read_len , 4);
911
912 rslt->data->elem_bits = rslt->elem_bits;
913 rslt->elem_count = nreads;
914 rc = KDataBufferResize(rslt->data, nreads);
915 if (rc == 0) {
916 uint32_t *const result = rslt->data->base;
917 unsigned cur = 0;
918 unsigned cur_ro = 0;
919 unsigned n;
920
921 for (n = 0; n < nreads; ++n) {
922 unsigned const len = read_len[n];
923 unsigned j;
924 unsigned miss = 0;
925 unsigned indel = 0;
926 unsigned cur_ro2 = cur_ro;
927
928 for (j = 0; j < len; ) {
929 if (has_ref_offset[cur] != 0) {
930 int const offset = ref_offset[cur_ro];
931
932 ++cur_ro;
933 if (offset < 0) {
934 unsigned const dist = -offset;
935
936 cur += dist;
937 j += dist;
938 continue;
939 }
940 }
941 if (has_mismatch[cur])
942 ++miss;
943 ++cur;
944 ++j;
945 }
946 while (cur_ro2 < cur_ro) {
947 int const type = ref_offset_type[cur_ro2];
948
949 if (type == 0) {
950 int const offset = ref_offset[cur_ro2];
951
952 if (offset < 0)
953 indel += -offset;
954 else
955 indel += +offset;
956 }
957 ++cur_ro2;
958 }
959 result[n] = miss + indel;
960 }
961 }
962 return rc;
963 }
964
965 VTRANSFACT_IMPL ( NCBI_align_edit_distance_3, 3, 0, 0 ) ( const void *Self, const VXfactInfo *info,
966 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
967 {
968 rslt->u.rf = edit_distance_3_impl;
969 rslt->variant = vftRow;
970 return 0;
971 }
972
973
974 /*
975 * function bool ALIGN:generate_has_mismatch #1 (INSDC:4na:bin reference,
976 * INSDC:4na:bin subject, bool has_ref_offset, I32 ref_offset);
977 */
978 static
generate_has_mismatch_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])979 rc_t CC generate_has_mismatch_impl ( void *data, const VXformInfo *info, int64_t row_id,
980 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
981 {
982 rc_t rc;
983 int32_t si, ri, roi;
984 uint32_t ref_len = ( uint32_t )argv[ 0 ].u.data.elem_count;
985 uint32_t sbj_len = ( uint32_t )argv[ 1 ].u.data.elem_count;
986 uint32_t hro_len = ( uint32_t )argv[ 2 ].u.data.elem_count;
987 uint32_t ro_len = ( uint32_t )argv[ 3 ].u.data.elem_count;
988 const uint8_t *ref = argv [ 0 ] . u . data . base;
989 const uint8_t *sbj = argv [ 1 ] . u . data . base;
990 const uint8_t *has_ref_offset = argv [ 2 ] . u . data . base;
991 const int32_t *ref_offset = argv [ 3 ] . u . data . base;
992
993 uint8_t * dst;
994 uint32_t len = 0;
995
996 rslt -> data -> elem_bits = 8;
997 if ( sbj_len == 0 )
998 {
999 rc = KDataBufferResize ( rslt -> data, 0 );
1000 if ( rc != 0 )
1001 return rc;
1002 rslt -> elem_count = 0;
1003 return 0;
1004 }
1005 assert( sbj_len == hro_len );
1006 len = sbj_len;
1007
1008 ref += argv [ 0 ] . u . data . first_elem;
1009 sbj += argv [ 1 ] . u . data . first_elem;
1010 has_ref_offset += argv [ 2 ] . u . data . first_elem;
1011 ref_offset += argv [ 3 ] . u . data . first_elem;
1012
1013 /* resize output row for the total number of reads */
1014 rslt -> data -> elem_bits = 8;
1015 rc = KDataBufferResize ( rslt -> data, len );
1016 if ( rc != 0 )
1017 return rc;
1018 rslt -> elem_count = len;
1019 dst = rslt -> data->base;
1020 for ( si = ri = roi = 0; si < ( int32_t )len; si++, ri++ )
1021 {
1022 if ( has_ref_offset[ si ] != 0 ) /*** need to offset the reference ***/
1023 {
1024 if ( roi >= ( int32_t )ro_len )
1025 {
1026 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
1027 }
1028 ri += ref_offset[ roi++ ];
1029 }
1030
1031 if ( ri >= 0 && ri < ( int32_t )ref_len && sbj[ si ] == ref[ ri ] )
1032 dst[ si ]=0;
1033 else
1034 dst[ si ]=1;
1035 }
1036 return 0;
1037 }
1038
1039
1040 VTRANSFACT_IMPL ( ALIGN_generate_has_mismatch, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1041 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1042 {
1043 rslt->u.rf = generate_has_mismatch_impl;
1044 rslt->variant = vftRow;
1045 rslt -> self = NULL;
1046 rslt -> whack = NULL;
1047 return 0;
1048 }
1049
1050 /*
1051 * function bool ALIGN:generate_mismatch #1 (INSDC:4na:bin reference,INSDC:4na:bin subject, bool has_ref_offset, I32 ref_offset);
1052 */
1053 static
generate_mismatch_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1054 rc_t CC generate_mismatch_impl ( void *data, const VXformInfo *info, int64_t row_id,
1055 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1056 {
1057 rc_t rc;
1058 int32_t si, ri, roi;
1059 uint32_t ref_len = ( uint32_t )argv[ 0 ].u.data.elem_count;
1060 uint32_t sbj_len = ( uint32_t )argv[ 1 ].u.data.elem_count;
1061 uint32_t hro_len = ( uint32_t )argv[ 2 ].u.data.elem_count;
1062 uint32_t ro_len = ( uint32_t )argv[ 3 ].u.data.elem_count;
1063 const uint8_t *ref = argv [ 0 ] . u . data . base;
1064 const uint8_t *sbj = argv [ 1 ] . u . data . base;
1065 const uint8_t *has_ref_offset = argv [ 2 ] . u . data . base;
1066 const int32_t *ref_offset = argv [ 3 ] . u . data . base;
1067 uint8_t buf[ 5 * 1024 ];
1068 uint32_t len;
1069
1070 rslt -> data -> elem_bits = 8;
1071 if ( sbj_len == 0 )
1072 {
1073 return KDataBufferResize( rslt->data, rslt->elem_count = 0 );
1074 }
1075 assert( sbj_len == hro_len );
1076
1077 ref += argv [ 0 ] . u . data . first_elem;
1078 sbj += argv [ 1 ] . u . data . first_elem;
1079 has_ref_offset += argv [ 2 ] . u . data . first_elem;
1080 ref_offset += argv [ 3 ] . u . data . first_elem;
1081
1082 for ( si = ri = roi = 0, len = 0; si < ( int32_t )sbj_len; si++, ri++ )
1083 {
1084 if ( has_ref_offset[ si ] != 0 )/*** need to offset the reference ***/
1085 {
1086 if ( roi >= ( int32_t )ro_len )
1087 {
1088 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
1089 }
1090 ri += ref_offset[ roi++ ];
1091 }
1092
1093 if ( ri >=0 && ri < ( int32_t )ref_len && sbj[ si ] == ref[ ri ] )
1094 {
1095 /*noop*/
1096 }
1097 else
1098 {
1099 if ( len > sizeof( buf ) )
1100 return RC( rcXF, rcFunction, rcExecuting, rcBuffer, rcInsufficient );
1101
1102 buf[ len++ ] = sbj[ si ];
1103 }
1104 }
1105
1106 /* resize output row for the total number of reads */
1107 rc = KDataBufferResize ( rslt -> data, len );
1108 if ( rc != 0 )
1109 return rc;
1110 rslt -> elem_count = len;
1111 memmove( rslt -> data->base, buf, len );
1112 return 0;
1113 }
1114
1115
1116 VTRANSFACT_IMPL ( ALIGN_generate_mismatch, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1117 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1118 {
1119 rslt->u.rf = generate_mismatch_impl;
1120 rslt->variant = vftRow;
1121 rslt -> self = NULL;
1122 rslt -> whack = NULL;
1123 return 0;
1124 }
1125
1126
1127
1128 /*
1129 * function INSDC:quality:phred NCBI:align:generate_mismatch_qual #1 (INSDC:quality:phred qual,bool has_mismatch)
1130 */
1131 static
generate_mismatch_qual_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1132 rc_t CC generate_mismatch_qual_impl ( void *data, const VXformInfo *info, int64_t row_id,
1133 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1134 {
1135 rc_t rc;
1136 const uint8_t *q = argv[ 0 ].u.data.base;
1137 const uint8_t *h_mm = argv[ 1 ].u.data.base;
1138 uint8_t buf[ 5 * 1024 ];
1139 uint32_t mm_cnt, i;
1140
1141 q += argv[ 0 ].u.data.first_elem;
1142 h_mm += argv[ 1 ].u.data.first_elem;
1143
1144 for ( mm_cnt = 0, i = 0; i < argv[ 0 ].u.data.elem_count; i++ )
1145 {
1146 if( h_mm[ i ] )
1147 {
1148 buf[ mm_cnt++ ] = q[ i ];
1149 }
1150 }
1151
1152 /* resize output row for the total number of reads */
1153 rslt -> data -> elem_bits = 8;
1154 rc = KDataBufferResize ( rslt -> data, mm_cnt );
1155 if ( rc != 0 )
1156 return rc;
1157 rslt -> elem_count = mm_cnt;
1158 if ( mm_cnt > 0 )
1159 memmove( rslt -> data->base, buf, mm_cnt );
1160
1161 return 0;
1162 }
1163
1164
1165 VTRANSFACT_IMPL ( ALIGN_generate_mismatch_qual, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1166 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1167 {
1168 rslt->u.rf = generate_mismatch_qual_impl;
1169 rslt->variant = vftRow;
1170 rslt -> self = NULL;
1171 rslt -> whack = NULL;
1172 return 0;
1173 }
1174
1175
1176
1177 /*
1178 * function ascii NCBI:align:get_mismatch_read #1
1179 * ( bool has_mismatch, INSDC:dna:text mismatch )
1180 */
1181 static
get_mismatch_read_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1182 rc_t CC get_mismatch_read_impl ( void *data, const VXformInfo *info, int64_t row_id,
1183 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1184 {
1185 rc_t rc;
1186 char *result;
1187 unsigned const readlen = ( unsigned const )argv[ 0 ].u.data.elem_count;
1188
1189 rslt->data->elem_bits = sizeof( result[ 0 ] ) * 8;
1190 rslt->elem_count = readlen;
1191 rc = KDataBufferResize( rslt->data, rslt->elem_count );
1192 if ( rc == 0 )
1193 {
1194 unsigned i;
1195 unsigned j;
1196 bool const *has_mismatch = argv[ 0 ].u.data.base;
1197 char const *mismatch = argv[ 1 ].u.data.base;
1198
1199 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_mismatch[ 0 ] ) * 8 );
1200 has_mismatch += argv[ 0 ].u.data.first_elem;
1201
1202 assert( argv[ 1 ].u.data.elem_bits == sizeof( mismatch[ 0 ] ) * 8 );
1203 mismatch += argv[ 1 ].u.data.first_elem;
1204
1205 result = rslt->data->base;
1206 for ( i = j = 0; i != readlen; ++i )
1207 {
1208 result[ i ] = has_mismatch[ i ] ? mismatch[ j++ ] : '=';
1209 }
1210 }
1211
1212 return rc;
1213 }
1214
1215
1216 VTRANSFACT_IMPL ( NCBI_align_get_mismatch_read, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1217 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1218 {
1219 rslt->u.rf = get_mismatch_read_impl;
1220 rslt->variant = vftRow;
1221 rslt -> self = NULL;
1222 rslt -> whack = NULL;
1223 return 0;
1224 }
1225
1226
1227
1228 /*
1229 * function INSDC:coord:len NCBI:align:get_left_soft_clip #1 (
1230 * bool has_ref_offset, INSDC:coord:zero ref_offset )
1231 */
1232 static
left_soft_clip_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1233 rc_t CC left_soft_clip_impl ( void *data, const VXformInfo *info, int64_t row_id,
1234 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1235 {
1236 rc_t rc;
1237 INSDC_coord_len result;
1238 unsigned const n_offsets = ( unsigned const )argv[ 1 ].u.data.elem_count;
1239
1240 result = 0;
1241
1242 if ( n_offsets > 0 )
1243 {
1244 bool const *has_ref_offset = argv[ 0 ].u.data.base;
1245 int32_t const *ref_offset = argv[ 1 ].u.data.base;
1246
1247 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
1248 assert( argv[ 1 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
1249
1250 has_ref_offset += argv[ 0 ].u.data.first_elem;
1251 ref_offset += argv[ 1 ].u.data.first_elem;
1252
1253 if ( has_ref_offset[ 0 ] && ref_offset[ 0 ] < 0 )
1254 {
1255 result = -ref_offset[ 0 ];
1256 }
1257 }
1258
1259 rslt->data->elem_bits = sizeof( result ) * 8;
1260 rslt->elem_count = 1;
1261 rc = KDataBufferResize( rslt->data, 1 );
1262 if ( rc == 0 )
1263 memmove( rslt->data->base, &result, sizeof( result ) );
1264
1265 return rc;
1266 }
1267
1268
1269 static
left_soft_clip_impl_2(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1270 rc_t CC left_soft_clip_impl_2 ( void *data, const VXformInfo *info, int64_t row_id,
1271 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1272 {
1273 rc_t rc;
1274 INSDC_coord_len* result;
1275 const bool* has_ref_offset = argv[ 0 ].u.data.base;
1276 const int32_t* ref_offset = argv[ 1 ].u.data.base;
1277 const uint64_t n_offsets = argv[ 1 ].u.data.elem_count;
1278 const INSDC_coord_len* read_len = argv[ 2 ].u.data.base;
1279 const uint32_t nreads = ( const uint32_t )argv[ 2 ].u.data.elem_count;
1280 uint32_t n, roi;
1281 INSDC_coord_zero start;
1282
1283 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
1284 assert( argv[ 1 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
1285 assert( argv[ 2 ].u.data.elem_bits == sizeof( read_len[ 0 ] ) * 8 );
1286
1287 has_ref_offset += argv[ 0 ].u.data.first_elem;
1288 ref_offset += argv[ 1 ].u.data.first_elem;
1289 read_len += argv[ 2 ].u.data.first_elem;
1290
1291 rslt->data->elem_bits = sizeof( *result ) * 8;
1292 rslt->elem_count = nreads;
1293 rc = KDataBufferResize( rslt->data, rslt->elem_count );
1294 result = rslt->data->base;
1295
1296 for ( n = roi = start = 0; rc == 0 && n < nreads; start += read_len[ n++ ] )
1297 {
1298 if ( has_ref_offset[ start ] && ref_offset[ roi ] < 0 )
1299 {
1300 if ( roi >= n_offsets )
1301 {
1302 rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
1303 break;
1304 }
1305 result[ n ] = -ref_offset[ roi ];
1306 }
1307 else
1308 {
1309 result[ n ] = 0;
1310 }
1311
1312 if ( n < nreads - 1 && roi < n_offsets )
1313 {
1314 /* scroll through has_ref_offset and consume ref_offsets for this read
1315 only if there is next read or more offsets */
1316 uint32_t k;
1317 for ( k = start; k < start + read_len[ n ]; k++ )
1318 {
1319 if ( has_ref_offset[ k ] )
1320 {
1321 if ( roi >= n_offsets )
1322 {
1323 rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
1324 break;
1325 }
1326 roi++;
1327 }
1328 }
1329 }
1330 }
1331
1332 return rc;
1333 }
1334
1335
1336 VTRANSFACT_IMPL ( NCBI_align_get_left_soft_clip, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1337 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1338 {
1339 rslt->u.rf = left_soft_clip_impl;
1340 rslt->variant = vftRow;
1341 rslt -> self = NULL;
1342 rslt -> whack = NULL;
1343 return 0;
1344 }
1345
1346
1347 VTRANSFACT_IMPL ( NCBI_align_get_left_soft_clip_2, 2, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1348 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1349 {
1350 rslt->u.rf = left_soft_clip_impl_2;
1351 rslt->variant = vftRow;
1352 rslt -> self = NULL;
1353 rslt -> whack = NULL;
1354 return 0;
1355 }
1356
1357
1358 /*
1359 * function INSDC:coord:len NCBI:align:get_right_soft_clip #1 (
1360 * bool has_mismatch, INSDC:coord:len left_clip )
1361 */
1362 static
right_soft_clip_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1363 rc_t CC right_soft_clip_impl ( void *data, const VXformInfo *info, int64_t row_id,
1364 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1365 {
1366 rc_t rc;
1367 INSDC_coord_len result = 0;
1368 INSDC_coord_len left = 0;
1369 uint32_t right = ( uint32_t )argv[ 0 ].u.data.elem_count;
1370 bool const *has_mismatch = argv[ 0 ].u.data.base;
1371 bool const *has_ref_offset = NULL;
1372 int32_t last_ref_offset = 0;
1373
1374 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_mismatch[ 0 ] ) * 8 );
1375 has_mismatch += argv[ 0 ].u.data.first_elem;
1376
1377 if ( argc > 2 )
1378 {
1379 has_ref_offset = argv[ 2 ].u.data.base;
1380 has_ref_offset += argv[ 2 ].u.data.first_elem;
1381 if ( argc > 3 )
1382 {
1383 const int32_t *ro = argv[ 3 ].u.data.base;
1384 ro += argv[ 3 ].u.data.first_elem;
1385 if ( argv[ 3 ].u.data.elem_count > 0 )
1386 {
1387 last_ref_offset = ro[ argv[ 3 ].u.data.elem_count - 1 ];
1388 }
1389 }
1390 }
1391
1392 assert( argv[ 1 ].u.data.elem_bits == sizeof( left ) * 8 );
1393 left = ( ( INSDC_coord_len const * )argv[ 1 ].u.data.base )[ argv[ 1 ].u.data.first_elem ];
1394
1395 while ( right != left && has_mismatch[ right - 1 ] &&
1396 ( has_ref_offset == NULL || has_ref_offset[ right - 1 ] == 0 ) )
1397 {
1398 ++result;
1399 --right;
1400 }
1401
1402 while ( right > 0 && last_ref_offset < 0 && has_ref_offset[ right - 1 ] == 0 ) /*** some mismatches from left needs to be recovered to cover for inserts **/
1403 {
1404 last_ref_offset++;
1405 right--;
1406 }
1407
1408 if ( last_ref_offset < -1 )
1409 {
1410 last_ref_offset ++;
1411 if ( result < (INSDC_coord_len)-last_ref_offset )
1412 result=0;
1413 else
1414 result += last_ref_offset;
1415 }
1416 else if ( last_ref_offset > 0 )
1417 {
1418 result += last_ref_offset;
1419 }
1420
1421 rslt->data->elem_bits = sizeof( result ) * 8;
1422 rslt->elem_count = 1;
1423 rc = KDataBufferResize( rslt->data, 1 );
1424 if ( rc == 0 )
1425 memmove( rslt->data->base, &result, sizeof( result ) );
1426
1427 return rc;
1428 }
1429
1430
1431 VTRANSFACT_IMPL ( NCBI_align_get_right_soft_clip, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1432 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1433 {
1434 rslt->u.rf = right_soft_clip_impl;
1435 rslt->variant = vftRow;
1436 rslt -> self = NULL;
1437 rslt -> whack = NULL;
1438 return 0;
1439 }
1440
1441
1442 VTRANSFACT_IMPL ( NCBI_align_get_right_soft_clip_2, 2, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1443 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1444 {
1445 rslt->u.rf = right_soft_clip_impl;
1446 rslt->variant = vftRow;
1447 rslt -> self = NULL;
1448 rslt -> whack = NULL;
1449 return 0;
1450 }
1451
1452
1453 /*
1454 * function INSDC:coord:len NCBI:align:get_right_soft_clip #3
1455 * ( bool has_ref_offset, I32 ref_offset, INSDC:coord:len ref_len )
1456 */
1457 static
right_soft_clip_3_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1458 rc_t CC right_soft_clip_3_impl ( void *data, const VXformInfo *info, int64_t row_id,
1459 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1460 {
1461 rc_t rc;
1462 INSDC_coord_len const ref_len = ( ( INSDC_coord_len const * )argv[ 2 ].u.data.base )[ argv[ 2 ].u.data.first_elem ];
1463 unsigned const seq_len = ( unsigned const )argv[ 0 ].u.data.elem_count;
1464 int32_t const *ref_offset = &( ( int32_t const * )argv[ 1 ].u.data.base )[ argv[ 1 ].u.data.first_elem ];
1465 unsigned const n = ( unsigned const )argv[ 1 ].u.data.elem_count;
1466
1467 assert( argv[ 1 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
1468 assert( argv[ 2 ].u.data.elem_bits == sizeof( ref_len ) * 8 );
1469
1470 rslt->data->elem_bits = rslt->elem_bits;
1471 rslt->elem_count = 1;
1472 rc = KDataBufferResize( rslt->data, 1 );
1473 if ( rc == 0 )
1474 {
1475 ( ( INSDC_coord_len * )rslt->data->base )[ 0 ] = right_soft_clip( seq_len, ref_len, n, ref_offset );
1476 }
1477
1478 return rc;
1479 }
1480
1481
1482 VTRANSFACT_IMPL ( NCBI_align_get_right_soft_clip_3, 3, 0, 0 )
1483 ( const void *Self, const VXfactInfo *info, VFuncDesc *rslt,
1484 const VFactoryParams *cp, const VFunctionParams *dp )
1485 {
1486 rslt->u.rf = right_soft_clip_3_impl;
1487 rslt->variant = vftRow;
1488 rslt -> self = NULL;
1489 rslt -> whack = NULL;
1490 return 0;
1491 }
1492
1493
1494 static
right_soft_clip_4_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1495 rc_t CC right_soft_clip_4_impl ( void *data, const VXformInfo *info, int64_t row_id,
1496 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1497 {
1498 rc_t rc;
1499 INSDC_coord_len* result;
1500 const bool* has_ref_offset = argv[ 0 ].u.data.base;
1501 const int32_t* ref_offset = argv[ 1 ].u.data.base;
1502 #if 0
1503 const uint64_t n_offsets = argv[ 1 ].u.data.elem_count;
1504 #endif
1505 const INSDC_coord_len* read_len = argv[ 2 ].u.data.base;
1506 const uint32_t nreads = ( const uint32_t )argv[ 2 ].u.data.elem_count;
1507 const INSDC_coord_len* ref_len = argv[ 3 ].u.data.base;
1508 uint32_t n, roi;
1509 INSDC_coord_zero start;
1510
1511 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
1512 assert( argv[ 1 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
1513 assert( argv[ 2 ].u.data.elem_bits == sizeof( read_len[ 0 ] ) * 8 );
1514 assert( argv[ 3 ].u.data.elem_bits == sizeof( ref_len[ 0 ] ) * 8 );
1515 assert( argv[ 3 ].u.data.elem_count == 1);
1516
1517 has_ref_offset += argv[ 0 ].u.data.first_elem;
1518 ref_offset += argv[ 1 ].u.data.first_elem;
1519 read_len += argv[ 2 ].u.data.first_elem;
1520 ref_len += argv[ 3 ].u.data.first_elem;
1521
1522 rslt->data->elem_bits = sizeof( *result ) * 8;
1523 rslt->elem_count = nreads;
1524 rc = KDataBufferResize( rslt->data, rslt->elem_count );
1525 result = rslt->data->base;
1526
1527 for( n = roi = start = 0; rc == 0 && n < nreads; ++n )
1528 {
1529 INSDC_coord_len const rlen = read_len[ n ];
1530 unsigned k;
1531 unsigned offset_count;
1532
1533 for ( k = offset_count = 0; k < rlen; ++k )
1534 {
1535 if ( has_ref_offset[ k + start ] )
1536 ++offset_count;
1537 }
1538
1539 result[ n ] = right_soft_clip( rlen, ref_len[ 0 ], offset_count, ref_offset );
1540 ref_offset += offset_count;
1541 start += rlen;
1542 #if 0
1543 result[ n ] = read_len[ n ] - ref_len[ 0 ];
1544 for ( k = start; k < start + read_len[ n ]; k++ )
1545 {
1546 if ( has_ref_offset[ k ] )
1547 {
1548 if ( roi >= n_offsets )
1549 {
1550 rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
1551 break;
1552 }
1553 result[ n ] += ref_offset[ roi++ ];
1554 }
1555 }
1556 #endif
1557 }
1558
1559 return rc;
1560 }
1561
1562 VTRANSFACT_IMPL ( NCBI_align_get_right_soft_clip_4, 4, 0, 0 )
1563 ( const void *Self, const VXfactInfo *info, VFuncDesc *rslt,
1564 const VFactoryParams *cp, const VFunctionParams *dp )
1565 {
1566 rslt->u.rf = right_soft_clip_4_impl;
1567 rslt->variant = vftRow;
1568 rslt -> self = NULL;
1569 rslt -> whack = NULL;
1570 return 0;
1571 }
1572
1573 /* extern function INSDC:coord:len NCBI:align:get_right_soft_clip #5
1574 * ( bool has_ref_offset, I32 ref_offset, NCBI:align:ro_type ref_offset_type,
1575 * INSDC:coord:len read_len )
1576 */
1577 static
right_soft_clip_5_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1578 rc_t CC right_soft_clip_5_impl ( void *data, const VXformInfo *info, int64_t row_id,
1579 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1580 {
1581 rc_t rc;
1582 unsigned const nreads = argv[3].u.data.elem_count;
1583 ARG_ALIAS( bool, has_ref_offset, 0);
1584 ARG_ALIAS( int32_t, ref_offset, 1);
1585 ARG_ALIAS(NCBI_align_ro_type, ref_offset_type, 2);
1586 ARG_ALIAS( INSDC_coord_len, read_len, 3);
1587
1588 rslt->data->elem_bits = sizeof(INSDC_coord_len) * 8;
1589 rslt->elem_count = nreads;
1590 rc = KDataBufferResize(rslt->data, nreads);
1591 if (rc == 0) {
1592 INSDC_coord_len *const result = rslt->data->base;
1593 unsigned cur = 0;
1594 unsigned cur_ro = 0;
1595 unsigned n;
1596
1597 for (n = 0; n < nreads; ++n) {
1598 unsigned const len = read_len[n];
1599 unsigned last = 0;
1600 unsigned prev = 0;
1601 unsigned j;
1602
1603 for (j = 0; j < len; ++j, ++cur) {
1604 if (has_ref_offset[cur] != 0) {
1605 int const offset = ref_offset[cur_ro];
1606 int const type = ref_offset_type[cur_ro];
1607
1608 ++cur_ro;
1609 if (j > 0 && offset < 0 && type == 1) {
1610 prev = last;
1611 last = -offset;
1612 }
1613 }
1614 }
1615 result[n] = prev == 0 ? last : prev;
1616 }
1617 }
1618
1619 return rc;
1620 }
1621
1622 VTRANSFACT_IMPL(NCBI_align_get_right_soft_clip_5, 5, 0, 0 )
1623 (const void *Self, const VXfactInfo *info, VFuncDesc *rslt,
1624 const VFactoryParams *cp, const VFunctionParams *dp )
1625 {
1626 rslt->u.rf = right_soft_clip_5_impl;
1627 rslt->variant = vftRow;
1628 rslt -> self = NULL;
1629 rslt -> whack = NULL;
1630 return 0;
1631 }
1632
1633 /*
1634 * function ascii NCBI:align:get_clipped_cigar #1 ( ascii cigar )
1635 */
1636 static
clipped_cigar_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1637 rc_t CC clipped_cigar_impl ( void *data, const VXformInfo *info, int64_t row_id,
1638 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1639 {
1640 rc_t rc;
1641 char const *cigar = argv[ 0 ].u.data.base;
1642 unsigned const ciglen = ( unsigned const )argv[ 0 ].u.data.elem_count;
1643 int n;
1644 unsigned start = 0;
1645 unsigned end = ciglen;
1646
1647 assert( argv[ 0 ].u.data.elem_bits == sizeof( cigar[ 0 ] ) * 8 );
1648 cigar += argv[ 0 ].u.data.first_elem;
1649
1650 for ( n = 0; n != ciglen; ++n )
1651 {
1652 if ( !isdigit( cigar[ n ] ) )
1653 break;
1654 }
1655
1656 if ( cigar[ n ] == 'S' )
1657 start = n + 1;
1658
1659 if ( cigar[ end - 1 ] == 'S' )
1660 {
1661 --end;
1662 while ( end > start && isdigit( cigar[ end - 1 ] ) )
1663 --end;
1664 }
1665
1666 rslt->data->elem_bits = sizeof( cigar[ 0 ] ) * 8;
1667 rslt->elem_count = ( end > start ) ? end - start : 0;
1668 rc = KDataBufferResize( rslt->data, rslt->elem_count );
1669 if ( rc == 0 && rslt->elem_count > 0 )
1670 memmove( rslt->data->base, &cigar[ start ], ( size_t )rslt->elem_count );
1671
1672 return rc;
1673 }
1674
1675
remove_left_soft_clip(int const length,char const cigar[])1676 static int remove_left_soft_clip(int const length, char const cigar[ /* length */ ])
1677 {
1678 int i;
1679 for (i = 0; i < length; ++i) {
1680 int const ch = cigar[i];
1681 if (ch < '0' || ch > '9')
1682 break;
1683 }
1684 if (i < length && cigar[i] == 'S')
1685 return i + 1;
1686 else
1687 return 0;
1688 }
1689
remove_right_soft_clip(int const length,char const cigar[])1690 static int remove_right_soft_clip(int const length, char const cigar[ /* length */ ])
1691 {
1692 if (length > 0 && cigar[length - 1] == 'S') {
1693 int i = length - 1;
1694 while (i > 0) {
1695 int const ch = cigar[i - 1];
1696 if (ch < '0' || ch > '9')
1697 break;
1698 --i;
1699 }
1700 return i;
1701 }
1702 return length;
1703 }
1704
1705 static
clipped_cigar_impl_v2(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1706 rc_t CC clipped_cigar_impl_v2 ( void *data, const VXformInfo *info, int64_t row_id,
1707 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1708 {
1709 VRowData const *const argCigar = &argv[0];
1710 VRowData const *const argCigLen = &argv[1];
1711 char const* cigar = argCigar->u.data.base;
1712 INSDC_coord_len const *cigLen = argCigLen->u.data.base;
1713 int const N = (int)argCigLen->u.data.elem_count;
1714 rc_t rc = 0;
1715
1716 assert(argCigar->u.data.elem_bits == sizeof(cigar[0]) * 8);
1717 assert(argCigLen->u.data.elem_bits == sizeof(cigLen[0]) * 8);
1718
1719 if (argCigar->u.data.elem_count == 0) {
1720 rslt->elem_count = 0;
1721 return KDataBufferResize(rslt->data, 0);
1722 }
1723
1724 cigar += argCigar->u.data.first_elem;
1725 cigLen += argCigLen->u.data.first_elem;
1726
1727 if (data != NULL)
1728 { /* outputting the lengths */
1729 rslt->data->elem_bits = sizeof(cigLen[0]) * 8;
1730 rslt->elem_count = N;
1731 rc = KDataBufferResize(rslt->data, rslt->elem_count);
1732 if (rc == 0) {
1733 INSDC_coord_len *const out = rslt->data->base;
1734 int i;
1735
1736 for (i = 0; i < N; ++i) {
1737 int const len = cigLen[i];
1738 int const rlsc = remove_left_soft_clip(len, cigar);
1739 int const rrsc = remove_right_soft_clip(len, cigar);
1740
1741 out[i] = (INSDC_coord_len)(rrsc > rlsc ? (rrsc - rlsc) : 0);
1742 cigar += len;
1743 }
1744 }
1745 }
1746 else
1747 { /* outputting the strings */
1748 int i;
1749
1750 rslt->data->elem_bits = sizeof(cigar[0]) * 8;
1751 rslt->elem_count = 0;
1752
1753 for (i = 0; i < N; ++i) {
1754 int const len = cigLen[i];
1755 int const rlsc = remove_left_soft_clip(len, cigar);
1756 int const rrsc = remove_right_soft_clip(len, cigar);
1757 int const newLen = rrsc > rlsc ? (rrsc - rlsc) : 0;
1758 char *out;
1759
1760 rc = KDataBufferResize(rslt->data, rslt->elem_count + newLen);
1761 if (rc) return rc;
1762 out = rslt->data->base;
1763 memmove(&out[rslt->elem_count], &cigar[rlsc], (size_t)newLen);
1764 rslt->elem_count += newLen;
1765 cigar += len;
1766 }
1767 }
1768 return rc;
1769 }
1770
1771
1772 VTRANSFACT_IMPL ( NCBI_align_get_clipped_cigar, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1773 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1774 {
1775 rslt->u.rf = clipped_cigar_impl;
1776 rslt->variant = vftRow;
1777 rslt -> self = NULL;
1778 rslt -> whack = NULL;
1779 return 0;
1780 }
1781
1782
1783 VTRANSFACT_IMPL ( NCBI_align_get_clipped_cigar_2, 2, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1784 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1785 {
1786 const VTypedesc* tp = &info->fdesc.desc;
1787
1788 if ( tp->domain == vtdAscii && tp->intrinsic_bits == 8 )
1789 {
1790 rslt->self = NULL;
1791 }
1792 else if ( tp->domain == vtdUint && tp->intrinsic_bits == sizeof( INSDC_coord_len ) * 8 )
1793 {
1794 rslt->self = rslt; /* something different from NULL :) */
1795 }
1796 else
1797 {
1798 return RC( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
1799 }
1800 rslt->u.rf = clipped_cigar_impl_v2;
1801 rslt->variant = vftRow;
1802 rslt->whack = NULL;
1803 return 0;
1804 }
1805
1806
1807
1808 /*
1809 * function I32 NCBI:align:get_clipped_ref_offset #1 (
1810 * bool has_ref_offset, I32 ref_offset )
1811 */
1812 static
clipped_ref_offset_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1813 rc_t CC clipped_ref_offset_impl ( void *data, const VXformInfo *info, int64_t row_id,
1814 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1815 {
1816 rc_t rc;
1817 unsigned start = 0;
1818 unsigned const n_offsets = ( unsigned const )argv[ 1 ].u.data.elem_count;
1819 int32_t const *ref_offset = argv[ 1 ].u.data.base;
1820
1821 assert( argv[ 1 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
1822 ref_offset += argv[ 1 ].u.data.first_elem;
1823
1824 if ( n_offsets > 0 )
1825 {
1826 bool const *has_ref_offset = argv[ 0 ].u.data.base;
1827
1828 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
1829 has_ref_offset += argv[ 0 ].u.data.first_elem;
1830
1831 if ( has_ref_offset[ 0 ] && ref_offset[ 0 ] < 0 )
1832 {
1833 start = 1;
1834 }
1835 }
1836
1837 rslt->data->elem_bits = sizeof( ref_offset[ 0 ] ) * 8;
1838 rslt->elem_count = n_offsets - start;
1839 rc = KDataBufferResize( rslt->data, rslt->elem_count );
1840 if ( rc == 0 )
1841 memmove( rslt->data->base,
1842 &ref_offset[ start ],
1843 ( size_t )( sizeof( ref_offset[ 0 ] ) * rslt->elem_count ) );
1844
1845 return rc;
1846 }
1847
1848
1849 VTRANSFACT_IMPL ( NCBI_align_get_clipped_ref_offset, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1850 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1851 {
1852 rslt->u.rf = clipped_ref_offset_impl;
1853 rslt->variant = vftRow;
1854 rslt -> self = NULL;
1855 rslt -> whack = NULL;
1856 return 0;
1857 }
1858
1859
1860 /*
1861 * function INSDC:coord:len NCBI:align:get_ref_len #1 (
1862 * bool has_ref_offset, I32 ref_offset, INSDC:coord:len right_clip )
1863 */
1864 static
get_ref_len_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1865 rc_t CC get_ref_len_impl ( void *data, const VXformInfo *info, int64_t row_id,
1866 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1867 {
1868 rc_t rc;
1869 INSDC_coord_len result,right=0;
1870
1871 unsigned const read_len = ( unsigned const )argv[ 0 ].u.data.elem_count;
1872 unsigned const n_offsets = ( unsigned const )argv[ 1 ].u.data.elem_count;
1873 bool const *hro = argv[ 0 ].u.data.base;
1874 int32_t const *ref_offset = argv[ 1 ].u.data.base;
1875
1876 assert( argv[ 1 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
1877
1878 hro += argv[ 0 ].u.data.first_elem;
1879 ref_offset += argv[ 1 ].u.data.first_elem;
1880
1881 if ( argc > 2 )
1882 {
1883 right = ( ( INSDC_coord_len* )argv[ 2 ].u.data.base )[ argv[ 2 ].u.data.first_elem ];
1884
1885 assert( argv[ 2 ].u.data.elem_bits == sizeof( right ) * 8 );
1886 assert( read_len >= right );
1887 }
1888
1889 if ( n_offsets == 0 )
1890 {
1891 if ( read_len < right )
1892 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
1893 result = read_len - right;
1894 }
1895 else
1896 {
1897 int32_t ires, rov;
1898 unsigned i;
1899
1900 if ( argc > 2 ) /*** clipping ***/
1901 {
1902 for ( i = 0, ires = read_len - right; i < n_offsets; i++ )
1903 {
1904 memmove( &rov, ref_offset + i, sizeof rov );
1905 ires += rov;
1906 }
1907 }
1908 else
1909 {
1910 int32_t sum_pos, sum_neg;
1911 for ( i = 0, sum_pos = sum_neg = 0; i < n_offsets; i++ )
1912 {
1913 memmove( &rov, ref_offset + i, sizeof rov );
1914 if ( rov > 0 )
1915 sum_pos += rov;
1916 else
1917 sum_neg += rov;
1918 }
1919
1920 if ( sum_pos + sum_neg >= 0 ) /** all offsets may not over-shorten needed reference ***/
1921 {
1922 ires = read_len + sum_pos + sum_neg;
1923 }
1924 else /** inefficient case - exact reach into the reference is needed **/
1925 {
1926 unsigned j;
1927 for ( i = 0, j = 0, sum_pos = 0, ires = 0; j < read_len; j++ )
1928 {
1929 if ( hro[ i ] )
1930 {
1931 if ( i >= n_offsets )
1932 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
1933 memmove( &rov, ref_offset + i, sizeof rov );
1934 ires += rov;
1935 i++;
1936 }
1937 sum_pos ++;
1938 if ( sum_pos > ires )
1939 ires=sum_pos;
1940 }
1941 }
1942 }
1943
1944 if ( ires < 0 )
1945 return RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
1946 result = ires;
1947 }
1948
1949 rslt->data->elem_bits = sizeof( result ) * 8;
1950 rslt->elem_count = 1;
1951 rc = KDataBufferResize( rslt->data, rslt->elem_count );
1952 if ( rc == 0 )
1953 memmove( rslt->data->base, &result, sizeof( result ) );
1954
1955 return rc;
1956 }
1957
1958
1959 VTRANSFACT_IMPL ( NCBI_align_get_ref_len, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
1960 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
1961 {
1962 rslt->u.rf = get_ref_len_impl;
1963 rslt->variant = vftRow;
1964 rslt -> self = NULL;
1965 rslt -> whack = NULL;
1966 return 0;
1967 }
1968
1969
1970 /*
1971 * function INSDC:coord:len NCBI:align:get_ref_len #2 (
1972 * bool has_ref_offset,
1973 * I32 ref_offset)
1974 */
1975 static
get_ref_len_2_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])1976 rc_t CC get_ref_len_2_impl ( void *data, const VXformInfo *info, int64_t row_id,
1977 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
1978 {
1979 rc_t rc;
1980 ARG_ALIAS(int32_t, refOffset, 1);
1981 unsigned const n = (unsigned)argv[1].u.data.elem_count;
1982 int result = (unsigned)argv[0].u.data.elem_count;
1983 unsigned i;
1984
1985 for (i = 0; i < n; ++i)
1986 result += (int)refOffset[i];
1987
1988 rslt->data->elem_bits = sizeof(INSDC_coord_len) * 8;
1989 rc = KDataBufferResize(rslt->data, rslt->elem_count = 1);
1990 if (rc == 0)
1991 ((INSDC_coord_len *)rslt->data->base)[0] = result;
1992
1993 return rc;
1994 }
1995
1996
1997 VTRANSFACT_IMPL(NCBI_align_get_ref_len_2, 2, 0, 0)(const void *Self, const VXfactInfo *info,
1998 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp)
1999 {
2000 rslt->u.rf = get_ref_len_2_impl;
2001 rslt->variant = vftRow;
2002 rslt -> self = NULL;
2003 rslt -> whack = NULL;
2004 return 0;
2005 }
2006
2007
2008 /*
2009 * function < type T > T NCBI:align:clip #1 ( T value,
2010 * INSDC:coord:len left, INSDC:coord:len right )
2011 */
2012 static
clip_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])2013 rc_t CC clip_impl ( void *data, const VXformInfo *info, int64_t row_id,
2014 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
2015 {
2016 rc_t rc;
2017 INSDC_coord_len left = ( ( INSDC_coord_len const * )argv[ 1 ].u.data.base )[ argv[ 1 ].u.data.first_elem ];
2018 INSDC_coord_len right = ( ( INSDC_coord_len const * )argv[ 2 ].u.data.base )[ argv[ 2 ].u.data.first_elem ];
2019
2020 rslt->data->elem_bits = argv[ 0 ].u.data.elem_bits;
2021 if ( argv[ 0 ].u.data.elem_count >= left + right )
2022 {
2023 rslt->elem_count = argv[ 0 ].u.data.elem_count - left - right;
2024 }
2025 else
2026 {
2027 rslt->elem_count = 0;
2028 }
2029
2030 rc = KDataBufferResize( rslt->data, rslt->elem_count );
2031 if ( rc == 0 )
2032 {
2033 if ( ( rslt->data->elem_bits & 7 ) == 0 )
2034 {
2035 memmove( rslt->data->base,
2036 &( ( char const * )argv[ 0 ].u.data.base )[ ( ( left + argv[ 0 ].u.data.first_elem ) * argv[ 0 ].u.data.elem_bits ) >> 3 ],
2037 ( size_t )( ( rslt->elem_count * rslt->data->elem_bits ) >> 3 ) );
2038 }
2039 else
2040 {
2041 bitcpy( rslt->data->base,
2042 0,
2043 argv[ 0 ].u.data.base,
2044 ( left + argv[ 0 ].u.data.first_elem ) * argv[ 0 ].u.data.elem_bits,
2045 rslt->elem_count * rslt->data->elem_bits );
2046 }
2047 }
2048 return rc;
2049 }
2050
2051
2052 VTRANSFACT_IMPL ( NCBI_align_clip, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
2053 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
2054 {
2055 rslt->u.rf = clip_impl;
2056 rslt->variant = vftRow;
2057 rslt -> self = NULL;
2058 rslt -> whack = NULL;
2059 return 0;
2060 }
2061
2062
2063 static
clip_impl_2(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])2064 rc_t CC clip_impl_2 ( void *data, const VXformInfo *info, int64_t row_id,
2065 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
2066 {
2067 rc_t rc;
2068 const INSDC_coord_len* read_len = argv[ 1 ].u.data.base;
2069 const uint32_t nreads = ( const uint32_t )argv[ 1 ].u.data.elem_count;
2070 const INSDC_coord_len* left = argv[ 2 ].u.data.base;
2071 const INSDC_coord_len* right = argv[ 3 ].u.data.base;
2072 uint32_t n;
2073 INSDC_coord_zero start;
2074
2075 assert( argv[ 1 ].u.data.elem_count == argv[ 2 ].u.data.elem_count );
2076 assert( argv[ 1 ].u.data.elem_count == argv[ 3 ].u.data.elem_count );
2077 assert( argv[ 1 ].u.data.elem_bits == sizeof( read_len[ 0 ] ) * 8 );
2078 assert( argv[ 2 ].u.data.elem_bits == sizeof( left[ 0 ] ) * 8 );
2079 assert( argv[ 2 ].u.data.elem_bits == sizeof( right[ 0 ] ) * 8 );
2080
2081 read_len += argv[ 1 ].u.data.first_elem;
2082 left += argv[ 2 ].u.data.first_elem;
2083 right += argv[ 3 ].u.data.first_elem;
2084
2085 rslt->data->elem_bits = argv[ 0 ].u.data.elem_bits;
2086 rslt->elem_count = 0;
2087 rc = KDataBufferResize( rslt->data, rslt->elem_count );
2088 if ( rc != 0 )
2089 return rc;
2090 if ( argv[ 0 ].u.data.elem_count == 0 )
2091 return 0;
2092
2093 for ( n = start = 0; rc == 0 && n < nreads; start += read_len[ n++ ] )
2094 {
2095 uint64_t x = left[n] + right[n];
2096
2097 if ( argv[ 0 ].u.data.elem_count < start ||
2098 argv[ 0 ].u.data.elem_count < start + read_len[ n ] )
2099 {
2100 rc = RC( rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
2101 }
2102 /*
2103 assert(argv[0].u.data.elem_count >= start);
2104 assert(argv[0].u.data.elem_count >= start + read_len[n]);
2105 */
2106 else if ( read_len[ n ] > x )
2107 {
2108 x = read_len[ n ] - x;
2109 if ( x > 0 )
2110 {
2111 rc = KDataBufferResize( rslt->data, rslt->elem_count + x );
2112 if ( rc == 0 )
2113 {
2114 bitcpy( rslt->data->base,
2115 rslt->elem_count * argv[ 0 ].u.data.elem_bits,
2116 argv[ 0 ].u.data.base,
2117 ( argv[ 0 ].u.data.first_elem + start + left[ n ] ) * argv[ 0 ].u.data.elem_bits,
2118 x * rslt->data->elem_bits );
2119 rslt->elem_count += x;
2120 }
2121 }
2122 }
2123 }
2124
2125 return rc;
2126 }
2127
2128
2129 VTRANSFACT_IMPL ( NCBI_align_clip_2, 2, 0, 0 ) ( const void *Self, const VXfactInfo *info,
2130 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
2131 {
2132 rslt->u.rf = clip_impl_2;
2133 rslt->variant = vftRow;
2134 rslt -> self = NULL;
2135 rslt -> whack = NULL;
2136 return 0;
2137 }
2138
2139
2140 /*
2141 * function bool NCBI:align:get_ref_mismatch #1
2142 * ( bool has_mismatch, bool has_ref_offset, I32 ref_offset,
2143 * INSDC:coord:len ref_len )
2144 */
2145 static
get_ref_mismatch_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])2146 rc_t CC get_ref_mismatch_impl ( void *data, const VXformInfo *info, int64_t row_id,
2147 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
2148 {
2149 rc_t rc;
2150 bool const *has_mismatch = argv [ 0 ] . u . data . base;
2151 bool const *has_ref_offset = argv [ 1 ] . u . data . base;
2152 int32_t const *ref_offset = argv [ 2 ] . u . data . base;
2153 INSDC_coord_len const *ref_len = argv [ 3 ] . u . data . base;
2154
2155 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_mismatch[ 0 ] ) * 8 );
2156 assert( argv[ 1 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
2157 assert( argv[ 2 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
2158 assert( argv[ 3 ].u.data.elem_bits == sizeof( ref_len[ 0 ] ) * 8 );
2159
2160 has_mismatch += argv[ 0 ].u.data.first_elem;
2161 has_ref_offset += argv[ 1 ].u.data.first_elem;
2162 ref_offset += argv[ 2 ].u.data.first_elem;
2163
2164 rslt->data->elem_bits = sizeof( bool ) * 8;
2165 rslt->elem_count = ref_len[ argv[ 3 ].u.data.first_elem ];
2166 rc = KDataBufferResize( rslt->data, rslt->elem_count );
2167 if ( rc == 0 )
2168 {
2169 bool *result = ( bool* )rslt->data->base;
2170 unsigned j;
2171 unsigned ri;
2172 unsigned si;
2173
2174 memset( result, 0, ( size_t )( sizeof( result[ 0 ] ) * rslt->elem_count ) );
2175 for ( j = ri = si = 0; si < argv[ 0 ].u.data.elem_count; )
2176 {
2177 if ( has_ref_offset[ si ] )
2178 {
2179 int offset = ref_offset[ j++ ];
2180
2181 if ( offset > 0 )
2182 ri += offset;
2183 else
2184 {
2185 si -= offset;
2186 continue;
2187 }
2188 }
2189 if ( ri >= rslt->elem_count )
2190 break;
2191 if ( has_mismatch[ si ] )
2192 {
2193 result[ ri ] = 1;
2194 }
2195 ++si;
2196 ++ri;
2197 }
2198 }
2199
2200 return rc;
2201 }
2202
2203
2204 VTRANSFACT_IMPL ( NCBI_align_get_ref_mismatch, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
2205 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
2206 {
2207 rslt->u.rf = get_ref_mismatch_impl;
2208 rslt->variant = vftRow;
2209 rslt -> self = NULL;
2210 rslt -> whack = NULL;
2211 return 0;
2212 }
2213
2214
2215
2216 /*
2217 * function bool NCBI:align:get_ref_insert #1
2218 * ( bool has_mismatch, bool has_ref_offset, I32 ref_offset,
2219 * INSDC:coord:len ref_len )
2220 */
2221 static
get_ref_insert_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])2222 rc_t CC get_ref_insert_impl ( void *data, const VXformInfo *info, int64_t row_id,
2223 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
2224 {
2225 rc_t rc;
2226 bool const *has_mismatch = argv [ 0 ] . u . data . base;
2227 bool const *has_ref_offset = argv [ 1 ] . u . data . base;
2228 int32_t const *ref_offset = argv [ 2 ] . u . data . base;
2229 INSDC_coord_len const *ref_len = argv [ 3 ] . u . data . base;
2230
2231 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_mismatch[ 0 ] ) * 8 );
2232 assert( argv[ 1 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
2233 assert( argv[ 2 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
2234 assert( argv[ 3 ].u.data.elem_bits == sizeof( ref_len[ 0 ] ) * 8 );
2235
2236 has_mismatch += argv[ 0 ].u.data.first_elem;
2237 has_ref_offset += argv[ 1 ].u.data.first_elem;
2238 ref_offset += argv[ 2 ].u.data.first_elem;
2239
2240 rslt->data->elem_bits = sizeof( bool ) * 8;
2241 rslt->elem_count = ref_len[ argv[ 3 ].u.data.first_elem ];
2242 rc = KDataBufferResize( rslt->data, rslt->elem_count );
2243 if (rc == 0 && rslt->elem_count > 0) {
2244 bool *result = ( bool* )rslt->data->base;
2245 unsigned j;
2246 unsigned ri;
2247 unsigned si;
2248
2249 memset( result, 0, ( size_t )( sizeof( result[ 0 ] ) * rslt->elem_count ) );
2250 for ( j = ri = si = 0; si < argv[ 0 ].u.data.elem_count; )
2251 {
2252 if ( has_ref_offset[ si ] )
2253 {
2254 int offset = ref_offset[ j++ ];
2255
2256 if ( offset > 0 )
2257 {
2258 ri += offset;
2259 }
2260 else
2261 {
2262 if ( si > 0 )
2263 {
2264 if ( ri >= 1 )
2265 result[ ri - 1 ] = 1;
2266 result[ ri ] = 1;
2267 }
2268 si -= offset;
2269 continue;
2270 }
2271 }
2272 ++si;
2273 ++ri;
2274 }
2275 }
2276 return rc;
2277 }
2278
2279
2280 VTRANSFACT_IMPL ( NCBI_align_get_ref_insert, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
2281 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
2282 {
2283 rslt->u.rf = get_ref_insert_impl;
2284 rslt->variant = vftRow;
2285 rslt -> self = NULL;
2286 rslt -> whack = NULL;
2287 return 0;
2288 }
2289
2290
2291
2292 /*
2293 * function bool NCBI:align:get_ref_delete #1
2294 * ( bool has_mismatch, bool has_ref_offset, I32 ref_offset,
2295 * INSDC:coord:len ref_len )
2296 */
2297 static
get_ref_delete_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])2298 rc_t CC get_ref_delete_impl ( void *data, const VXformInfo *info, int64_t row_id,
2299 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
2300 {
2301 rc_t rc;
2302 bool const *has_mismatch = argv [ 0 ] . u . data . base;
2303 bool const *has_ref_offset = argv [ 1 ] . u . data . base;
2304 int32_t const *ref_offset = argv [ 2 ] . u . data . base;
2305 INSDC_coord_len const *ref_len = argv [ 3 ] . u . data . base;
2306
2307 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_mismatch[ 0 ] ) * 8 );
2308 assert( argv[ 1 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
2309 assert( argv[ 2 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
2310 assert( argv[ 3 ].u.data.elem_bits == sizeof( ref_len[ 0 ] ) * 8 );
2311
2312 has_mismatch += argv[ 0 ].u.data.first_elem;
2313 has_ref_offset += argv[ 1 ].u.data.first_elem;
2314 ref_offset += argv[ 2 ].u.data.first_elem;
2315
2316 rslt->data->elem_bits = sizeof( bool ) * 8;
2317 rslt->elem_count = ref_len[ argv[ 3 ].u.data.first_elem ];
2318 rc = KDataBufferResize( rslt->data, rslt->elem_count );
2319 if ( rc == 0 )
2320 {
2321 bool *result = ( bool* )rslt->data->base;
2322 unsigned j;
2323 unsigned ri;
2324 unsigned si;
2325
2326 memset( result, 0, ( size_t )( sizeof( result[ 0 ] ) * rslt->elem_count ) );
2327 for ( j = ri = si = 0; si < argv[ 0 ].u.data.elem_count; )
2328 {
2329 if ( has_ref_offset[ si ] )
2330 {
2331 int offset = ref_offset[ j++ ];
2332
2333 if ( offset > 0 )
2334 {
2335 memset( &result[ ri ], 1, offset );
2336 ri += offset;
2337 }
2338 else
2339 {
2340 si -= offset;
2341 continue;
2342 }
2343 }
2344 ++si;
2345 ++ri;
2346 }
2347 }
2348
2349 return rc;
2350 }
2351
2352
2353 VTRANSFACT_IMPL ( NCBI_align_get_ref_delete, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
2354 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
2355 {
2356 rslt->u.rf = get_ref_delete_impl;
2357 rslt->variant = vftRow;
2358 rslt -> self = NULL;
2359 rslt -> whack = NULL;
2360 return 0;
2361 }
2362
2363
2364
2365 #define USE_BIGGER_PRESERVE_BORDER 1
2366 /*
2367 * function bool NCBI:align:get_preserve_qual #1
2368 * ( bool has_mismatch, bool has_ref_offset, I32 ref_offset,
2369 * INSDC:coord:len ref_len )
2370 */
2371 static
get_ref_preserve_qual_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])2372 rc_t CC get_ref_preserve_qual_impl ( void *data, const VXformInfo *info, int64_t row_id,
2373 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
2374 {
2375 rc_t rc;
2376 bool const *has_mismatch = argv [ 0 ] . u . data . base;
2377 bool const *has_ref_offset = argv [ 1 ] . u . data . base;
2378 int32_t const *ref_offset = argv [ 2 ] . u . data . base;
2379 INSDC_coord_len const *ref_len = argv [ 3 ] . u . data . base;
2380
2381 assert( argv[ 0 ].u.data.elem_bits == sizeof( has_mismatch[ 0 ] ) * 8 );
2382 assert( argv[ 1 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
2383 assert( argv[ 2 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
2384 assert( argv[ 3 ].u.data.elem_bits == sizeof( ref_len[ 0 ] ) * 8 );
2385
2386 has_mismatch += argv[ 0 ].u.data.first_elem;
2387 has_ref_offset += argv[ 1 ].u.data.first_elem;
2388 ref_offset += argv[ 2 ].u.data.first_elem;
2389
2390 rslt->data->elem_bits = sizeof( bool ) * 8;
2391 rslt->elem_count = ref_len[ argv[ 3 ].u.data.first_elem ];
2392 rc = KDataBufferResize( rslt->data, rslt->elem_count );
2393 if ( rc == 0 )
2394 {
2395 bool *result = ( bool* )rslt->data->base;
2396 unsigned j;
2397 unsigned ri;
2398 unsigned si;
2399
2400 memset( result, 0, ( size_t )( sizeof( result[ 0 ] ) * rslt->elem_count ) );
2401 for ( j = ri = si = 0; si < argv[ 0 ].u.data.elem_count; )
2402 {
2403 if ( has_ref_offset[ si ] )
2404 {
2405 int offset = ref_offset[ j++ ];
2406
2407 if ( offset > 0 )
2408 {
2409 /* Preserve the qualities for deleted bases + plus the border */
2410 #if USE_BIGGER_PRESERVE_BORDER
2411 if ( ri >= 2 ) result[ ri - 2 ] = 1;
2412 #endif
2413 if ( ri >= 1 ) result[ ri - 1 ] = 1;
2414 memset( &result[ ri ], 1, offset );
2415 ri += offset;
2416 result[ ri ] = 1;
2417 #if USE_BIGGER_PRESERVE_BORDER
2418 if ( ri + 1 < rslt->elem_count ) result[ ri + 1 ] = 1;
2419 #endif
2420 }
2421 else
2422 {
2423 if ( si > 0 )
2424 {
2425 /* Preserve the qualites for the bases bordering the insert */
2426 #if USE_BIGGER_PRESERVE_BORDER
2427 if ( ri >= 2 ) result[ ri - 2 ] = 1;
2428 #endif
2429 if ( ri >= 1 ) result[ ri - 1 ] = 1;
2430 result[ ri ] = 1;
2431 #if USE_BIGGER_PRESERVE_BORDER
2432 if ( ri + 1 < rslt->elem_count ) result[ ri + 1 ] = 1;
2433 #endif
2434 }
2435 si -= offset;
2436 continue;
2437 }
2438 }
2439 if ( ri >= rslt->elem_count ) break;
2440 if ( has_mismatch[ si ] )
2441 {
2442 #if USE_BIGGER_PRESERVE_BORDER
2443 if ( ri >= 1 ) result[ ri - 1 ] = 1;
2444 #endif
2445 result[ ri ] = 1;
2446 #if USE_BIGGER_PRESERVE_BORDER
2447 if ( ri + 1 < rslt->elem_count ) result[ ri + 1 ] = 1;
2448 #endif
2449 }
2450 ++si;
2451 ++ri;
2452 }
2453 }
2454 return rc;
2455 }
2456
2457
2458 VTRANSFACT_IMPL ( NCBI_align_get_ref_preserve_qual, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
2459 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
2460 {
2461 rslt->u.rf = get_ref_preserve_qual_impl;
2462 rslt->variant = vftRow;
2463 rslt -> self = NULL;
2464 rslt -> whack = NULL;
2465 return 0;
2466 }
2467
2468
2469
2470 /*
2471 * function bool NCBI:align:get_seq_preserve_qual #1
2472 * ( bool ref_preserve_qual, bool has_ref_offset, I32 ref_offset );
2473 */
2474 static
get_seq_preserve_qual_impl(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])2475 rc_t CC get_seq_preserve_qual_impl ( void *data, const VXformInfo *info, int64_t row_id,
2476 VRowResult *rslt, uint32_t argc, const VRowData argv [] )
2477 {
2478 rc_t rc;
2479 bool const *ref_pq = argv [ 0 ] . u . data . base;
2480 bool const *has_ref_offset = argv [ 1 ] . u . data . base;
2481 int32_t const *ref_offset = argv [ 2 ] . u . data . base;
2482
2483 assert( argv[ 0 ].u.data.elem_bits == sizeof( ref_pq[ 0 ] ) * 8 );
2484 assert( argv[ 1 ].u.data.elem_bits == sizeof( has_ref_offset[ 0 ] ) * 8 );
2485 assert( argv[ 2 ].u.data.elem_bits == sizeof( ref_offset[ 0 ] ) * 8 );
2486
2487 ref_pq += argv[ 0 ].u.data.first_elem;
2488 has_ref_offset += argv[ 1 ].u.data.first_elem;
2489 ref_offset += argv[ 2 ].u.data.first_elem;
2490
2491 rslt->data->elem_bits = sizeof( bool ) * 8;
2492 rslt->elem_count = argv[ 1 ].u.data.elem_count;
2493 rc = KDataBufferResize( rslt->data, rslt->elem_count );
2494 if ( rc == 0 )
2495 {
2496 bool *result = ( bool* )rslt->data->base;
2497 unsigned j;
2498 unsigned ri;
2499 unsigned si;
2500
2501 memset( result, 1, ( size_t )( sizeof( result[ 0 ] ) * rslt->elem_count ) );
2502 for ( j = ri = si = 0; si < argv[ 1 ].u.data.elem_count; )
2503 {
2504 if ( has_ref_offset[ si ] )
2505 {
2506 int offset = ref_offset[ j++ ];
2507
2508 if ( offset > 0 )
2509 {
2510 ri += offset;
2511 }
2512 else
2513 {
2514 si -= offset;
2515 continue;
2516 }
2517 }
2518 if ( ri >= argv[ 0 ].u.data.elem_count ) break;
2519 result[ si ] = ref_pq[ ri ];
2520 ++si;
2521 ++ri;
2522 }
2523 }
2524
2525 return rc;
2526 }
2527
2528 VTRANSFACT_IMPL ( NCBI_align_get_seq_preserve_qual, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
2529 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
2530 {
2531 rslt->u.rf = get_seq_preserve_qual_impl;
2532 rslt->variant = vftRow;
2533 rslt -> self = NULL;
2534 rslt -> whack = NULL;
2535 return 0;
2536 }
2537
2538
2539
2540 /*
2541 * function ascii NCBI:align:rna_orientation #1 ( NCBI:align:ro_type *ref_offset_type );
2542 */
2543 static
get_rna_orientation(void * data,const VXformInfo * info,int64_t row_id,VRowResult * rslt,uint32_t argc,const VRowData argv[])2544 rc_t CC get_rna_orientation(void *data, const VXformInfo *info, int64_t row_id,
2545 VRowResult *rslt, uint32_t argc, const VRowData argv [])
2546 {
2547 rc_t rc;
2548 uint8_t const *const offset_type = argc == 1 ? ((uint8_t const *)argv[0].u.data.base) + argv[0].u.data.first_elem : NULL;
2549 unsigned const count = argc == 1 ? (unsigned)argv[0].u.data.elem_count : 0;
2550
2551 assert(argv[ 0 ].u.data.elem_bits == sizeof(offset_type[0]) * 8);
2552
2553 rslt->data->elem_bits = sizeof(char) * 8;
2554 rslt->elem_count = 1;
2555 rc = KDataBufferResize(rslt->data, 1);
2556 if ( rc == 0 )
2557 {
2558 unsigned p_count = 0;
2559 unsigned m_count = 0;
2560 unsigned i;
2561 char *orient = rslt->data->base;
2562
2563 for (i = 0; i < count; ++i) {
2564 if (offset_type[i] == NCBI_align_ro_intron_plus)
2565 ++p_count;
2566 else if (offset_type[i] == NCBI_align_ro_intron_minus)
2567 ++m_count;
2568 }
2569 if (p_count > 0 && m_count == 0)
2570 *orient = '+';
2571 else if (m_count > 0 && p_count == 0)
2572 *orient = '-';
2573 else
2574 rslt->elem_count = 0;
2575 }
2576
2577 return rc;
2578 }
2579
2580 VTRANSFACT_IMPL ( NCBI_align_rna_orientation, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
2581 VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
2582 {
2583 rslt->u.rf = get_rna_orientation;
2584 rslt->variant = vftRow;
2585 rslt -> self = NULL;
2586 rslt -> whack = NULL;
2587 return 0;
2588 }
2589