1 /*===========================================================================
2 *
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26
27
28 #include "debug.h" /* DEBUG_MSG */
29 #include "defs.h" /* cg_eRightHalfDnbMap */
30 #include "formats.h" /* get_cg_reads_ngaps */
31 #include "writer-algn.h" /* CGWriterAlgn */
32
33 #include <align/align.h> /* NCBI_align_ro_complete_genomics */
34 #include <align/dna-reverse-cmpl.h> /* DNAReverseCompliment */
35
36 #include <klib/log.h> /* PLOGERR */
37 #include <klib/rc.h>
38 #include <klib/sort.h> /* ksort */
39
40 #include <sysalloc.h>
41
42 #include <assert.h>
43 #include <math.h> /* pow */
44 #include <string.h> /* strcmp */
45
46
47 typedef struct CGWriterAlgn_match_struct {
48 /* filled out by ReferenceMgr_Compress */
49 INSDC_coord_zero read_start;
50 INSDC_coord_len read_len;
51 bool has_ref_offset[CG_READS15_SPOT_LEN];
52 int32_t ref_offset[CG_READS15_SPOT_LEN];
53 uint8_t ref_offset_type[CG_READS15_SPOT_LEN];
54 bool has_mismatch[CG_READS15_SPOT_LEN];
55 char mismatch[CG_READS15_SPOT_LEN];
56 int64_t ref_id;
57 INSDC_coord_zero ref_start;
58 /* fill oud here */
59 int64_t seq_spot_id;
60 INSDC_coord_one seq_read_id;
61 bool ref_orientation;
62 uint32_t mapq;
63 /* used only only in secondary */
64 bool mate_ref_orientation;
65 int64_t mate_ref_id;
66 INSDC_coord_zero mate_ref_pos;
67 INSDC_coord_zero template_len;
68 INSDC_coord_len ref_len;
69 } CGWriterAlgn_match;
70
71 struct CGWriterAlgn {
72 const ReferenceMgr* rmgr;
73 const TableWriterAlgn* primary;
74 const TableWriterAlgn* secondary;
75 TableWriterAlgnData algn[CG_MAPPINGS_MAX];
76 CGWriterAlgn_match match[CG_MAPPINGS_MAX];
77 TMappingsData data;
78 uint32_t min_mapq;
79 bool single_mate;
80 uint64_t forced_pairs_cnt;
81 uint64_t dropped_mates_cnt;
82 };
83
84
85 static uint32_t global_cluster_size;
86
CGWriterAlgn_Make(const CGWriterAlgn ** cself,TMappingsData ** data,VDatabase * db,const ReferenceMgr * rmgr,uint32_t min_mapq,bool single_mate,uint32_t cluster_size)87 rc_t CGWriterAlgn_Make(const CGWriterAlgn** cself, TMappingsData** data, VDatabase* db, const ReferenceMgr* rmgr,
88 uint32_t min_mapq, bool single_mate, uint32_t cluster_size)
89 {
90 rc_t rc = 0;
91 CGWriterAlgn* self;
92
93 if( cself == NULL || db == NULL ) {
94 return RC(rcExe, rcFormatter, rcConstructing, rcParam, rcNull);
95 }
96 self = calloc(1, sizeof(*self));
97 if( self == NULL ) {
98 rc = RC(rcExe, rcFormatter, rcConstructing, rcMemory, rcExhausted);
99 } else {
100 if( (rc = TableWriterAlgn_Make(&self->primary, db,
101 ewalgn_tabletype_PrimaryAlignment, ewalgn_co_SEQ_SPOT_ID | ewalgn_co_unsorted)) != 0 ){
102 LOGERR(klogErr, rc, "primary alignment table");
103 } else if( (rc = TableWriterAlgn_Make(&self->secondary, db,
104 ewalgn_tabletype_SecondaryAlignment, ewalgn_co_SEQ_SPOT_ID | ewalgn_co_unsorted)) != 0 ) {
105 LOGERR(klogErr, rc, "secondary alignment table");
106 } else {
107 int i;
108 /* interconnect buffers */
109 for(i = 0; i < CG_MAPPINGS_MAX; i++) {
110 self->algn[i].seq_spot_id.buffer = &self->match[i].seq_spot_id;
111 self->algn[i].seq_spot_id.elements = 1;
112
113 self->algn[i].seq_read_id.buffer = &self->match[i].seq_read_id;
114 self->algn[i].seq_read_id.elements = 1;
115
116 self->algn[i].read_start.buffer = &self->match[i].read_start;
117
118 self->algn[i].read_len.buffer = &self->match[i].read_len;
119
120 self->algn[i].has_ref_offset.buffer = self->match[i].has_ref_offset;
121
122 self->algn[i].ref_offset.buffer = self->match[i].ref_offset;
123
124 self->algn[i].ref_offset_type.buffer = self->match[i].ref_offset_type;
125
126 self->algn[i].ref_id.buffer = &self->match[i].ref_id;
127
128 self->algn[i].ref_start.buffer = &self->match[i].ref_start;
129
130 self->algn[i].has_mismatch.buffer = self->match[i].has_mismatch;
131
132 self->algn[i].mismatch.buffer = self->match[i].mismatch;
133
134 self->algn[i].ref_orientation.buffer = &self->match[i].ref_orientation;
135 self->algn[i].ref_orientation.elements = 1;
136
137 self->algn[i].mapq.buffer = &self->match[i].mapq;
138 self->algn[i].mapq.elements = 1;
139
140 self->algn[i].mate_ref_orientation.buffer = &self->match[i].mate_ref_orientation;
141 self->algn[i].mate_ref_orientation.elements = 1;
142
143 self->algn[i].mate_ref_id.buffer = &self->match[i].mate_ref_id;
144 self->algn[i].mate_ref_id.elements = 1;
145
146 self->algn[i].mate_ref_pos.buffer = &self->match[i].mate_ref_pos;
147 self->algn[i].mate_ref_pos.elements = 1;
148
149 self->algn[i].template_len.buffer = &self->match[i].template_len;
150 self->algn[i].template_len.elements = 1;
151 }
152 self->rmgr = rmgr;
153 self->min_mapq = min_mapq;
154 self->single_mate = single_mate;
155 global_cluster_size = cluster_size;
156 *data = &self->data;
157 }
158 }
159 if( rc == 0 ) {
160 *cself = self;
161 } else {
162 CGWriterAlgn_Whack(self, false, NULL, NULL);
163 }
164 return rc;
165 }
166
CGWriterAlgn_Whack(const CGWriterAlgn * cself,bool commit,uint64_t * rows_1st,uint64_t * rows_2nd)167 rc_t CGWriterAlgn_Whack(const CGWriterAlgn* cself, bool commit, uint64_t* rows_1st, uint64_t* rows_2nd)
168 {
169 rc_t rc = 0;
170 if( cself != NULL ) {
171 CGWriterAlgn* self = (CGWriterAlgn*)cself;
172 rc_t rc1 = TableWriterAlgn_Whack(cself->primary, commit, rows_1st);
173 rc_t rc2 = TableWriterAlgn_Whack(cself->secondary, commit, rows_2nd);
174 if( self->forced_pairs_cnt > 0 ) {
175 PLOGMSG(klogInfo, (klogInfo, "$(forced_pairs_cnt) forced pairs to PRIMARY", "forced_pairs_cnt=%lu", self->forced_pairs_cnt));
176 }
177 if( self->dropped_mates_cnt > 0 ) {
178 PLOGMSG(klogInfo, (klogInfo, "$(dropped_mates_cnt) dropped duplicate mates in SECONDARY", "dropped_mates_cnt=%lu", self->dropped_mates_cnt));
179 }
180 rc = rc1 ? rc1 : rc2;
181 free(self);
182 }
183 return rc;
184 }
185
186 static
CGWriterAlgn_Save(CGWriterAlgn * const self,TReadsData * const rd,TableWriterAlgn const * const writer,uint32_t const mate,int64_t * const rowid)187 rc_t CGWriterAlgn_Save(CGWriterAlgn *const self,
188 TReadsData *const rd,
189 TableWriterAlgn const *const writer,
190 uint32_t const mate,
191 int64_t *const rowid)
192 {
193 rc_t rc = 0;
194 TMappingsData_map *const map = &self->data.map[mate];
195
196 if( !map->saved ) {
197 CGWriterAlgn_match *const match = &self->match[mate];
198 TableWriterAlgnData *const algn = &self->algn[mate];
199
200 uint32_t g = 0;
201
202 uint32_t* cigar = NULL;
203 uint32_t left_cigar15 []={ 5 << 4, 0, 10 << 4, 0, 10 << 4, 0,10 << 4 };
204 uint32_t right_cigar15[]={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 5 << 4 };
205 uint32_t left_cigar25 []={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 0 };
206 uint32_t right_cigar25[]={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 0 };
207 uint32_t *left_cigar = NULL;
208 uint32_t *right_cigar = NULL;
209 uint32_t cg_reads_ngaps = 0;
210
211 const char *read = NULL;
212 uint32_t read_len = 0;
213
214 assert(rd);
215
216 cg_reads_ngaps = get_cg_reads_ngaps(rd->reads_format);
217
218 read_len = rd->seq.spot_len / 2;
219 if (cg_reads_ngaps == 3) {
220 left_cigar = left_cigar15;
221 right_cigar = right_cigar15;
222 }
223 else if (cg_reads_ngaps == 2) {
224 left_cigar = left_cigar25;
225 right_cigar = right_cigar25;
226 }
227 else {
228 assert(0);
229 }
230
231 if (match->seq_read_id == 2) {
232 read = &((const char*)(rd->seq.sequence.buffer))[read_len];
233 cigar = right_cigar;
234 g = read_len;
235 }
236 else {
237 read = rd->seq.sequence.buffer;
238 cigar = left_cigar;
239 g = 0;
240 }
241 if (match->ref_orientation) {
242 if( rd->reverse[g] == '\0' ) {
243 if( (rc = DNAReverseCompliment(read, &rd->reverse[g], read_len)) != 0) {
244 return rc;
245 }
246 DEBUG_MSG(10, ("'%.*s' -> cg_eRevDnbStrand: '%.*s'\n", read_len, read, read_len, &rd->reverse[g]));
247 }
248 read = &rd->reverse[g];
249 cigar = (cigar == left_cigar) ? right_cigar : left_cigar;
250 }
251 for(g = 0; g < cg_reads_ngaps; g++) {
252 if( map->gap[g] > 0 ) {
253 cigar[g * 2 + 1] = (map->gap[g] << 4) | 3; /* 'xN' */
254 } else if( map->gap[g] < 0 ) {
255 cigar[g * 2 + 1] = (-map->gap[g] << 4) | 9; /* 'xB' */
256 } else {
257 cigar[g * 2 + 1] = 0; /* '0M' */
258 }
259 }
260 algn->ploidy = 0;
261 if( (rc = ReferenceMgr_Compress(self->rmgr, ewrefmgr_cmp_Binary,
262 map->chr, map->offset, read, read_len, cigar, 7, 0, NULL, 0, 0, NULL, 0, NCBI_align_ro_complete_genomics, algn)) != 0 ) {
263 PLOGERR(klogErr, (klogErr, rc, "compression failed $(id) $(o)",
264 PLOG_2(PLOG_S(id),PLOG_I32(o)), map->chr, map->offset));
265 }
266 else {
267 #if 1
268 /* this is to try represent these alignments as unmated to match cgatools
269 * axf uses the row length of MATE_REF_ORIENTATION as the indicator of
270 * mate presence
271 */
272 unsigned const save = algn->mate_ref_orientation.elements;
273
274 if (map->mate == mate)
275 algn->mate_ref_orientation.elements = 0;
276
277 rc = TableWriterAlgn_Write(writer, algn, rowid);
278
279 if (map->mate == mate)
280 algn->mate_ref_orientation.elements = save;
281 #else
282 rc = TableWriterAlgn_Write(writer, algn, rowid);
283 #endif
284 map->saved = true;
285 }
286 }
287
288 return rc;
289 }
290
291 #if 1
292 static
JointQ(double const Q1,double const Q2)293 double JointQ(double const Q1, double const Q2)
294 {
295 double const P1 = 1.0 - pow(10.0, Q1/-10.0); /* prob that 1 is not incorrect */
296 double const P2 = 1.0 - pow(10.0, Q2/-10.0); /* prob that 2 is not incorrect */
297 double const Pj = 1.0 - P1*P2; /* prob that 1+2 is incorrect */
298 double const Q = -10.0 * log10(Pj);
299
300 return Q;
301 }
302
303 static
FindBestPair(TMappingsData * const data)304 unsigned FindBestPair(TMappingsData *const data)
305 {
306 unsigned const N = data->map_qty;
307 unsigned i;
308 double maxq;
309 unsigned best = N;
310
311 /* pick best of the reciprocal pairs */
312 for (i = 0, maxq = -1.0; i != N; ++i) {
313 unsigned const mate = data->map[i].mate;
314 bool const is_left = (data->map[i].flags & cg_eRightHalfDnbMap) == 0;
315
316 if (mate < N && mate != i && data->map[mate].mate == i && is_left) {
317 double const Q1 = (int)data->map[i].weight - 33;
318 double const Q2 = (int)data->map[mate].weight - 33;
319 double const q = JointQ(Q1, Q2);
320
321 assert(Q1 >= 0);
322 assert(Q2 >= 0);
323 assert(q >= 0);
324 if (maxq < q) {
325 maxq = q;
326 best = i;
327 }
328 }
329 }
330 if (best < N)
331 return best;
332
333 /* no reciprocal pairs, pick best of any pair */
334 for (i = 0, maxq = 0.0; i != N; ++i) {
335 unsigned const mate = data->map[i].mate;
336 if (mate < N && mate != i) {
337 double const Q1 = (int)data->map[i].weight - 33;
338 double const Q2 = (int)data->map[mate].weight - 33;
339 double const q = JointQ(Q1, Q2);
340
341 assert(Q1 >= 0);
342 assert(Q2 >= 0);
343 assert(q >= 0);
344 if (maxq < q) {
345 maxq = q;
346 best = i;
347 }
348 }
349 }
350 if (best == N) {
351 /* no pair with a joint Q > 0; pick best mapping */
352 for (i = 0, maxq = 0.0; i != N; ++i) {
353 unsigned const mate = data->map[i].mate;
354 if (mate < N && mate != i) {
355 double const q = (int)data->map[i].weight - 33;
356
357 if (maxq < q) {
358 maxq = q;
359 best = i;
360 }
361 }
362 }
363 if (best == N) {
364 /* no mapping with Q > 0; pick first */
365 for (i = 0, maxq = 0.0; i != N; ++i) {
366 unsigned const mate = data->map[i].mate;
367 if (mate < N && mate != i) {
368 best = i;
369 break;
370 }
371 }
372 if (best == N) {
373 /* give up */
374 return N;
375 }
376 }
377 }
378 {
379 /* make the pair reciprocal */
380 unsigned const mate = data->map[best].mate;
381
382 if (mate < N) {
383 data->map[mate].mate = best;
384 return (data->map[best].flags & cg_eRightHalfDnbMap) ? mate : best;
385 }
386 return N;
387 }
388 }
389
390 static
FindBestLeft(TMappingsData * const data)391 unsigned FindBestLeft(TMappingsData *const data)
392 {
393 unsigned const N = data->map_qty;
394 unsigned i;
395 unsigned best;
396 int maxq;
397
398 for (best = N, maxq = -1, i = 0; i != N; ++i) {
399 int const q = (int)data->map[i].weight;
400
401 if ((data->map[i].flags & cg_eRightHalfDnbMap) == 0 && maxq < q) {
402 maxq = q;
403 best = i;
404 }
405 }
406 return best;
407 }
408
409 static
FindBestRight(TMappingsData * const data)410 unsigned FindBestRight(TMappingsData *const data)
411 {
412 unsigned const N = data->map_qty;
413 unsigned i;
414 unsigned best;
415 int maxq;
416
417 for (best = N, maxq = -1, i = 0; i != N; ++i) {
418 int const q = (int)data->map[i].weight;
419
420 if ((data->map[i].flags & cg_eRightHalfDnbMap) != 0 && maxq < q) {
421 maxq = q;
422 best = i;
423 }
424 }
425 return best;
426 }
427
428 static
check_in_cluster(TMappingsData_map const * const a,TMappingsData_map const * const b)429 bool check_in_cluster(TMappingsData_map const *const a, TMappingsData_map const *const b)
430 {
431 if ( (a->flags & cg_eRightHalfDnbMap) == (b->flags & cg_eRightHalfDnbMap)
432 && (strcmp(a->chr, b->chr) == 0)
433 && abs((int)a->offset - (int)b->offset) <= global_cluster_size)
434 {
435 return true;
436 }
437 return false;
438 }
439
440 static
clustering_sort_cb(void const * const A,void const * const B,void * const ctx)441 int64_t clustering_sort_cb(void const *const A, void const *const B, void *const ctx)
442 {
443 TMappingsData const *const data = (TMappingsData const *)ctx;
444 unsigned const ia = *(unsigned const *)A;
445 unsigned const ib = *(unsigned const *)B;
446 TMappingsData_map const *const a = &data->map[ia];
447 TMappingsData_map const *const b = &data->map[ib];
448 int64_t res;
449 unsigned j = 0;
450
451 res = (int64_t)(a->flags & cg_eRightHalfDnbMap) - (int64_t)(b->flags & cg_eRightHalfDnbMap); /**** separate by DNP side ***/
452 if (res) return res;
453
454 res = strcmp(a->chr, b->chr); /* same chromosome ? **/
455 if (res) return res;
456
457 res = (a->offset - b->offset) / (global_cluster_size + 1); /*** is it within the range ***/
458 if (res) return res;
459
460 /**cluster is defined here; now pick the winner **/
461 res = (int64_t)a->saved - (int64_t)b->saved; /*** if already saved **/
462 if (res) return -res;
463
464 res = (int64_t)a->weight - (int64_t)b->weight; /*** has higher score **/
465 if (res) return -res;
466
467 res = 0;
468 assert(data->cg_reads_ngaps);
469 for (j = 0; j != data->cg_reads_ngaps; ++j) {
470 res += (int64_t)(a->gap[j]) - (int64_t)(b->gap[j]);
471 } /** has lower projection on the reference **/
472
473 return res;
474 }
475
476 static
cluster_mates(TMappingsData * const data)477 void cluster_mates(TMappingsData *const data)
478 {
479 unsigned index[CG_MAPPINGS_MAX];
480 unsigned i;
481 unsigned j;
482
483 for (i = 0; i != data->map_qty; ++i)
484 index[i] = i;
485
486 ksort(index, data->map_qty, sizeof(index[0]), clustering_sort_cb, data);
487 for (i = 0, j = 1; j != data->map_qty; ++j) {
488 unsigned const ii = index[i];
489 unsigned const ij = index[j];
490 TMappingsData_map *const a = &data->map[ij];
491 TMappingsData_map const *const b = &data->map[ii];
492
493 if (check_in_cluster(a, b)) {
494 unsigned const a_mate = a->mate;
495 unsigned const b_mate = b->mate;
496
497 if ( a_mate == ij /** remove singletons **/
498 || a_mate == b_mate) /** or cluster originator has the same mate **/
499 {
500 a->saved = true;
501 DEBUG_MSG(10, ("mapping %u was dropped as a part of cluster at mapping %u\n", ij, ii));
502 }
503 }
504 else
505 i = j;
506 }
507 }
508
509 static
template_length(unsigned const self_left,unsigned const mate_left,unsigned const self_len,unsigned const mate_len,unsigned const read_id)510 INSDC_coord_zero template_length(unsigned const self_left,
511 unsigned const mate_left,
512 unsigned const self_len,
513 unsigned const mate_len,
514 unsigned const read_id)
515 {
516 /* adapted from libs/axf/template_len.c */
517 unsigned const self_right = self_left + self_len;
518 unsigned const mate_right = mate_left + mate_len;
519 unsigned const leftmost = (self_left < mate_left ) ? self_left : mate_left;
520 unsigned const rightmost = (self_right > mate_right) ? self_right : mate_right;
521 unsigned const tlen = rightmost - leftmost;
522
523 /* The standard says, "The leftmost segment has a plus sign and the rightmost has a minus sign." */
524 if ( (self_left <= mate_left && self_right >= mate_right) /* mate fully contained within self or */
525 || (mate_left <= self_left && mate_right >= self_right)) /* self fully contained within mate; */
526 {
527 if (self_left < mate_left || (read_id == 1 && self_left == mate_left))
528 return (INSDC_coord_zero)tlen;
529 else
530 return -(INSDC_coord_zero)tlen;
531 }
532 else if ( (self_right == mate_right && mate_left == leftmost) /* both are rightmost, but mate is leftmost */
533 || self_right == rightmost)
534 {
535 return -(INSDC_coord_zero)tlen;
536 }
537 else
538 return (INSDC_coord_zero)tlen;
539 }
540
541 static
CGWriterAlgn_Write_int(CGWriterAlgn * const self,TReadsData * const read)542 rc_t CGWriterAlgn_Write_int(CGWriterAlgn *const self, TReadsData *const read)
543 {
544 TMappingsData *const data = &self->data;
545 unsigned const N = data->map_qty;
546 rc_t rc = 0;
547
548 if (N != 0) {
549 unsigned left_prime = N;
550 unsigned right_prime = N;
551 unsigned i;
552 unsigned countLeft = 0;
553 unsigned countRight = 0;
554
555 assert(read);
556
557 data->cg_reads_ngaps = get_cg_reads_ngaps(read->reads_format);
558 assert(data->cg_reads_ngaps);
559
560 for (i = 0; i != N; ++i) {
561 char const *const refname = data->map[i].chr;
562 unsigned j;
563 INSDC_coord_len reflen = read->seq.spot_len / 2;
564 ReferenceSeq const *rseq;
565 bool shouldUnmap = false;
566 bool wasRenamed = false;
567
568 memset(&self->match[i], 0, sizeof(self->match[i]));
569
570 rc = ReferenceMgr_GetSeq(self->rmgr, &rseq, refname, &shouldUnmap, true, &wasRenamed);
571 if (rc) {
572 (void)PLOGERR(klogErr, (klogErr, rc, "Failed accessing Reference '$(ref)'", "ref=%s", refname));
573 break;
574 }
575 assert(shouldUnmap == false);
576 rc = ReferenceSeq_Get1stRow(rseq, &self->match[i].ref_id); /* if the above worked, this is infallible */
577 assert(rc == 0);
578 ReferenceSeq_Release(rseq);
579
580 for (j = 0; j != data->cg_reads_ngaps; ++j) {
581 reflen += data->map[i].gap[j];
582 }
583
584 self->match[i].seq_spot_id = read->rowid;
585 self->match[i].mapq = data->map[i].weight - 33;
586 self->match[i].ref_orientation = (data->map[i].flags & cg_eRevDnbStrand) ? true : false;
587 self->match[i].ref_len = reflen;
588
589 if (data->map[i].flags & cg_eRightHalfDnbMap) {
590 self->match[i].seq_read_id = 2;
591 ++countRight;
592 }
593 else {
594 self->match[i].seq_read_id = 1;
595 ++countLeft;
596 }
597 }
598
599 if (countLeft > 0 && countRight > 0) {
600 left_prime = FindBestPair(data);
601 if (left_prime < N) {
602 right_prime = data->map[left_prime].mate;
603 }
604 else { /* force the pairing */
605 left_prime = FindBestLeft(data);
606 right_prime = FindBestRight(data);
607 data->map[left_prime].mate = right_prime;
608 data->map[right_prime].mate = left_prime;
609 }
610 for (i = 0; i != N; ++i) {
611 unsigned const mate = data->map[i].mate;
612
613 if (mate < N && mate != i) {
614 INSDC_coord_zero const tlen = (self->match[i].ref_id == self->match[mate].ref_id)
615 ? template_length(data->map[i].offset,
616 data->map[mate].offset,
617 self->match[i].ref_len,
618 self->match[mate].ref_len,
619 self->match[i].seq_read_id)
620 : 0;
621
622 self->match[i].mate_ref_id = self->match[mate].ref_id;
623 self->match[i].mate_ref_orientation = self->match[mate].ref_orientation;
624 self->match[i].mate_ref_pos = data->map[mate].offset;
625 self->match[i].template_len = tlen;
626 }
627 }
628 }
629 else if (countLeft > 0) {
630 left_prime = FindBestLeft(data);
631 }
632 else {
633 assert(countRight > 0);
634 right_prime = FindBestRight(data);
635 }
636
637 read->align_count[0] = countLeft < 254 ? countLeft : 255;
638 read->align_count[1] = countRight < 254 ? countRight : 255;
639 if (rc == 0 && left_prime < N) {
640 rc = CGWriterAlgn_Save(self, read, self->primary, left_prime,
641 &read->prim_algn_id[0]);
642 read->prim_is_reverse[0] = self->match[left_prime].ref_orientation;
643 data->map[left_prime].saved = 1;
644 }
645 if (rc == 0 && right_prime < N) {
646 rc = CGWriterAlgn_Save(self, read, self->primary, right_prime,
647 &read->prim_algn_id[1]);
648 read->prim_is_reverse[1] = self->match[right_prime].ref_orientation;
649 data->map[right_prime].saved = 1;
650 }
651 if (global_cluster_size > 0 && data->map_qty > 1 + (left_prime < N ? 1 : 0) + (right_prime < N ? 1 : 0)) {
652 cluster_mates(data);
653 }
654 for (i = 0; i != N && rc == 0; ++i) {
655 if (data->map[i].saved || self->match[i].mapq < self->min_mapq)
656 continue;
657 rc = CGWriterAlgn_Save(self, read, self->secondary, i, NULL);
658 }
659 }
660 return rc;
661 }
662
CGWriterAlgn_Write(const CGWriterAlgn * cself,TReadsData * read)663 rc_t CGWriterAlgn_Write(const CGWriterAlgn* cself, TReadsData* read)
664 {
665 assert(cself != NULL);
666 assert(read != NULL);
667 assert(read->seq.sequence.buffer != NULL
668 && read->seq.sequence.elements == read->seq.spot_len
669 && (read->seq.sequence.elements == CG_READS15_SPOT_LEN ||
670 read->seq.sequence.elements == CG_READS25_SPOT_LEN));
671
672 memset(read->prim_algn_id, 0, sizeof(read->prim_algn_id));
673 memset(read->align_count, 0, sizeof(read->align_count));
674 memset(read->prim_is_reverse, 0, sizeof(read->prim_is_reverse));
675
676 return CGWriterAlgn_Write_int((CGWriterAlgn *)cself, read);
677 }
678
679 #else
680
CGWriterAlgn_Write(const CGWriterAlgn * cself,TReadsData * read)681 rc_t CGWriterAlgn_Write(const CGWriterAlgn* cself, TReadsData* read)
682 {
683 if( cself->data.map_qty != 0 ) {
684 /* primary is-found indicator: weights are ASCII-33 so they can't be 0 if found */
685 uint8_t left_weight = 0, right_weight = 0, pair_weight = 0;
686 uint32_t i, left_prim = 0, right_prim = 0, paired = 0;
687
688 CGWriterAlgn* self = (CGWriterAlgn*)cself;
689 TMappingsData* data = &self->data;
690
691 /* find best left, right and pair */
692 for(i = 0; i < data->map_qty; i++) {
693 int k = (data->map[i].flags & cg_eRightHalfDnbMap) ? 1 : 0;
694 if( read->align_count[k] < 254 ) {
695 read->align_count[k]++;
696 }
697 if( k == 0 ) {
698 if( left_weight < data->map[i].weight ) {
699 left_prim = i;
700 left_weight = data->map[i].weight;
701 }
702 } else {
703 if( right_weight < data->map[i].weight ) {
704 right_prim = i;
705 right_weight = data->map[i].weight;
706 }
707 }
708 if( i != data->map[i].mate && pair_weight < data->map[i].weight ) {
709 if( data->map[i].mate < data->map_qty ) {
710 /* note pair's left mate id */
711 paired = k == 0 ? i : data->map[i].mate;
712 pair_weight = data->map[i].weight;
713 } else {
714 /* fail safe in case mate id is out of map boundaries */
715 data->map[i].mate = i;
716 }
717 }
718 }
719 /* choose primary pair */
720 if( left_weight > right_weight && data->map[left_prim].mate != left_prim ) {
721 /* left is better and has a mate -> choose left pair */
722 right_prim = data->map[left_prim].mate;
723 right_weight = data->map[right_prim].weight;
724 } else if( right_weight > left_weight && data->map[right_prim].mate != right_prim ) {
725 /* right is better and has a mate -> choose right pair */
726 left_prim = data->map[right_prim].mate;
727 left_weight = data->map[left_prim].weight;
728 } else if( pair_weight > 0 ) {
729 /* use paired as primary */
730 left_prim = paired;
731 left_weight = data->map[left_prim].weight;
732 right_prim = data->map[left_prim].mate;
733 right_weight = data->map[right_prim].weight;
734 } else if( left_weight > 0 && right_weight > 0 ) {
735 /* force best left and right to be mates */
736 data->map[left_prim].mate = right_prim;
737 data->map[right_prim].mate = left_prim;
738 self->forced_pairs_cnt++;
739 DEBUG_MSG(10, ("forced pair: %u %u\n", left_prim, right_prim));
740 }
741 #if _DEBUGGING
742 DEBUG_MSG(10, ("alignment_count [%hu,%hu]", read->align_count[0], read->align_count[1]));
743 DEBUG_MSG(10, (" left primary: "));
744 if( left_weight > 0 ) {
745 DEBUG_MSG(10, ("weight %hu [%c], id %u", left_weight, left_weight, left_prim));
746 } else {
747 DEBUG_MSG(10, ("none"));
748 }
749 DEBUG_MSG(10, ("; right primary: "));
750 if( right_weight > 0 ) {
751 DEBUG_MSG(10, ("weight %hu [%c], id %u", right_weight, right_weight, right_prim));
752 } else {
753 DEBUG_MSG(10, ("none"));
754 }
755 DEBUG_MSG(10, ("\n"));
756 #endif
757 assert((left_weight == 0 && read->align_count[0] == 0) || (left_weight > 0 && read->align_count[0] > 0));
758 assert((right_weight == 0 && read->align_count[1] == 0) || (right_weight > 0 && read->align_count[1] > 0));
759
760 /* write left primary */
761 if( rc == 0 && left_weight > 0 ) {
762 rc = CGWriterAlgn_Save(self, read, self->primary, left_prim, &read->prim_algn_id[0]);
763 read->prim_is_reverse[0] = cself->match[left_prim].ref_orientation;
764 }
765 /* write right primary */
766 if( rc == 0 && right_weight > 0 ) {
767 rc = CGWriterAlgn_Save(self, read, self->primary, right_prim, &read->prim_algn_id[1]);
768 read->prim_is_reverse[1] = cself->match[right_prim].ref_orientation;
769 }
770 DEBUG_MSG(10, ("prim_algn_rowid [%li,%li], ", read->prim_algn_id[0], read->prim_algn_id[1]));
771 DEBUG_MSG(10, ("prim_is_reverse [%hu,%hu]\n", read->prim_is_reverse[0], read->prim_is_reverse[1]));
772 if( rc == 0 ) {
773 /* others go to secondary */
774 int64_t row;
775
776 rc = TableWriterAlgn_GetNextRowId(cself->secondary, &row);
777 if( global_cluster_size > 0 && data->map_qty > 1) {
778 cluster_mates(data);
779 }
780 if( cself->single_mate ) {
781 /* we need to re-mate in case original mate's weight is lower */
782 for(i = 0; rc == 0 && i < data->map_qty; i++ ) {
783 if( !data->map[i].saved && data->map[i].weight >= cself->min_mapq ) {
784 uint32_t mate = data->map[i].mate;
785 if( mate != i && data->map[mate].mate != i ) {
786 self->dropped_mates_cnt++;
787 if( data->map[data->map[mate].mate].weight < data->map[i].weight ) {
788 /* do not save my mate's mate */
789 DEBUG_MSG(10, ("mate %u dropped as pair of %u\n", data->map[mate].mate, mate));
790 data->map[data->map[mate].mate].saved = true;
791 /* repoint mate to me */
792 data->map[mate].mate = i;
793 } else {
794 /* do not save me */
795 DEBUG_MSG(10, ("mate %u dropped as pair of %u\n", i, mate));
796 data->map[i].saved = true;
797 }
798 }
799 }
800 }
801 }
802 for(i = 0; rc == 0 && i < data->map_qty; i++ ) {
803 if( !data->map[i].saved && data->map[i].weight >= cself->min_mapq ) {
804 uint32_t mate = data->map[i].mate;
805 /* no mate or mate is under-weigth */
806 if( mate == i || data->map[mate].weight < cself->min_mapq ||
807 /* or mate was saved in primary */
808 (left_weight > 0 && mate == left_prim) || (right_weight > 0 && mate == right_prim) ) {
809 self->match[i].mate_align_id = 0;
810 rc = CGWriterAlgn_Save(self, read, self->secondary, i, NULL);
811 row++;
812 } else {
813 self->match[mate].mate_align_id = row++;
814 self->match[i].mate_align_id = row++;
815 if( (rc = CGWriterAlgn_Save(self, read, self->secondary, i, NULL)) == 0 ) {
816 rc = CGWriterAlgn_Save(self, read, self->secondary, mate, NULL);
817 }
818 }
819 }
820 }
821 }
822 }
823 return rc;
824 }
825 #endif
826