1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 
28 #include "debug.h" /* DEBUG_MSG */
29 #include "defs.h" /* cg_eRightHalfDnbMap */
30 #include "formats.h" /* get_cg_reads_ngaps */
31 #include "writer-algn.h" /* CGWriterAlgn */
32 
33 #include <align/align.h> /* NCBI_align_ro_complete_genomics */
34 #include <align/dna-reverse-cmpl.h> /* DNAReverseCompliment */
35 
36 #include <klib/log.h> /* PLOGERR */
37 #include <klib/rc.h>
38 #include <klib/sort.h> /* ksort */
39 
40 #include <sysalloc.h>
41 
42 #include <assert.h>
43 #include <math.h> /* pow */
44 #include <string.h> /* strcmp */
45 
46 
47 typedef struct CGWriterAlgn_match_struct {
48     /* filled out by ReferenceMgr_Compress */
49     INSDC_coord_zero read_start;
50     INSDC_coord_len read_len;
51     bool has_ref_offset[CG_READS15_SPOT_LEN];
52     int32_t ref_offset[CG_READS15_SPOT_LEN];
53     uint8_t ref_offset_type[CG_READS15_SPOT_LEN];
54     bool has_mismatch[CG_READS15_SPOT_LEN];
55     char mismatch[CG_READS15_SPOT_LEN];
56     int64_t ref_id;
57     INSDC_coord_zero ref_start;
58     /* fill oud here */
59     int64_t seq_spot_id;
60     INSDC_coord_one seq_read_id;
61     bool ref_orientation;
62     uint32_t mapq;
63     /* used only only in secondary */
64     bool mate_ref_orientation;
65     int64_t mate_ref_id;
66     INSDC_coord_zero mate_ref_pos;
67     INSDC_coord_zero template_len;
68     INSDC_coord_len ref_len;
69 } CGWriterAlgn_match;
70 
71 struct CGWriterAlgn {
72     const ReferenceMgr* rmgr;
73     const TableWriterAlgn* primary;
74     const TableWriterAlgn* secondary;
75     TableWriterAlgnData algn[CG_MAPPINGS_MAX];
76     CGWriterAlgn_match match[CG_MAPPINGS_MAX];
77     TMappingsData data;
78     uint32_t min_mapq;
79     bool single_mate;
80     uint64_t forced_pairs_cnt;
81     uint64_t dropped_mates_cnt;
82 };
83 
84 
85 static uint32_t global_cluster_size;
86 
CGWriterAlgn_Make(const CGWriterAlgn ** cself,TMappingsData ** data,VDatabase * db,const ReferenceMgr * rmgr,uint32_t min_mapq,bool single_mate,uint32_t cluster_size)87 rc_t CGWriterAlgn_Make(const CGWriterAlgn** cself, TMappingsData** data, VDatabase* db, const ReferenceMgr* rmgr,
88                        uint32_t min_mapq, bool single_mate, uint32_t cluster_size)
89 {
90     rc_t rc = 0;
91     CGWriterAlgn* self;
92 
93     if( cself == NULL || db == NULL ) {
94         return RC(rcExe, rcFormatter, rcConstructing, rcParam, rcNull);
95     }
96     self = calloc(1, sizeof(*self));
97     if( self == NULL ) {
98         rc = RC(rcExe, rcFormatter, rcConstructing, rcMemory, rcExhausted);
99     } else {
100         if( (rc = TableWriterAlgn_Make(&self->primary, db,
101                             ewalgn_tabletype_PrimaryAlignment, ewalgn_co_SEQ_SPOT_ID | ewalgn_co_unsorted)) != 0 ){
102             LOGERR(klogErr, rc, "primary alignment table");
103         } else if( (rc = TableWriterAlgn_Make(&self->secondary, db,
104                             ewalgn_tabletype_SecondaryAlignment, ewalgn_co_SEQ_SPOT_ID | ewalgn_co_unsorted)) != 0 ) {
105             LOGERR(klogErr, rc, "secondary alignment table");
106         } else {
107             int i;
108             /* interconnect buffers */
109             for(i = 0; i < CG_MAPPINGS_MAX; i++) {
110                 self->algn[i].seq_spot_id.buffer = &self->match[i].seq_spot_id;
111                 self->algn[i].seq_spot_id.elements = 1;
112 
113                 self->algn[i].seq_read_id.buffer = &self->match[i].seq_read_id;
114                 self->algn[i].seq_read_id.elements = 1;
115 
116                 self->algn[i].read_start.buffer = &self->match[i].read_start;
117 
118                 self->algn[i].read_len.buffer = &self->match[i].read_len;
119 
120                 self->algn[i].has_ref_offset.buffer = self->match[i].has_ref_offset;
121 
122                 self->algn[i].ref_offset.buffer = self->match[i].ref_offset;
123 
124                 self->algn[i].ref_offset_type.buffer = self->match[i].ref_offset_type;
125 
126                 self->algn[i].ref_id.buffer = &self->match[i].ref_id;
127 
128                 self->algn[i].ref_start.buffer = &self->match[i].ref_start;
129 
130                 self->algn[i].has_mismatch.buffer = self->match[i].has_mismatch;
131 
132                 self->algn[i].mismatch.buffer = self->match[i].mismatch;
133 
134                 self->algn[i].ref_orientation.buffer = &self->match[i].ref_orientation;
135                 self->algn[i].ref_orientation.elements = 1;
136 
137                 self->algn[i].mapq.buffer = &self->match[i].mapq;
138                 self->algn[i].mapq.elements = 1;
139 
140                 self->algn[i].mate_ref_orientation.buffer = &self->match[i].mate_ref_orientation;
141                 self->algn[i].mate_ref_orientation.elements = 1;
142 
143                 self->algn[i].mate_ref_id.buffer = &self->match[i].mate_ref_id;
144                 self->algn[i].mate_ref_id.elements = 1;
145 
146                 self->algn[i].mate_ref_pos.buffer = &self->match[i].mate_ref_pos;
147                 self->algn[i].mate_ref_pos.elements = 1;
148 
149                 self->algn[i].template_len.buffer = &self->match[i].template_len;
150                 self->algn[i].template_len.elements = 1;
151             }
152             self->rmgr = rmgr;
153             self->min_mapq = min_mapq;
154             self->single_mate = single_mate;
155             global_cluster_size = cluster_size;
156             *data = &self->data;
157         }
158     }
159     if( rc == 0 ) {
160         *cself = self;
161     } else {
162         CGWriterAlgn_Whack(self, false, NULL, NULL);
163     }
164     return rc;
165 }
166 
CGWriterAlgn_Whack(const CGWriterAlgn * cself,bool commit,uint64_t * rows_1st,uint64_t * rows_2nd)167 rc_t CGWriterAlgn_Whack(const CGWriterAlgn* cself, bool commit, uint64_t* rows_1st, uint64_t* rows_2nd)
168 {
169     rc_t rc = 0;
170     if( cself != NULL ) {
171         CGWriterAlgn* self = (CGWriterAlgn*)cself;
172         rc_t rc1 = TableWriterAlgn_Whack(cself->primary, commit, rows_1st);
173         rc_t rc2 = TableWriterAlgn_Whack(cself->secondary, commit, rows_2nd);
174         if( self->forced_pairs_cnt > 0 ) {
175             PLOGMSG(klogInfo, (klogInfo, "$(forced_pairs_cnt) forced pairs to PRIMARY", "forced_pairs_cnt=%lu", self->forced_pairs_cnt));
176         }
177         if( self->dropped_mates_cnt > 0 ) {
178             PLOGMSG(klogInfo, (klogInfo, "$(dropped_mates_cnt) dropped duplicate mates in SECONDARY", "dropped_mates_cnt=%lu", self->dropped_mates_cnt));
179         }
180         rc = rc1 ? rc1 : rc2;
181         free(self);
182     }
183     return rc;
184 }
185 
186 static
CGWriterAlgn_Save(CGWriterAlgn * const self,TReadsData * const rd,TableWriterAlgn const * const writer,uint32_t const mate,int64_t * const rowid)187 rc_t CGWriterAlgn_Save(CGWriterAlgn *const self,
188                        TReadsData *const rd,
189                        TableWriterAlgn const *const writer,
190                        uint32_t const mate,
191                        int64_t *const rowid)
192 {
193     rc_t rc = 0;
194     TMappingsData_map *const map = &self->data.map[mate];
195 
196     if( !map->saved ) {
197         CGWriterAlgn_match *const match = &self->match[mate];
198         TableWriterAlgnData *const algn = &self->algn[mate];
199 
200         uint32_t g = 0;
201 
202         uint32_t* cigar = NULL;
203         uint32_t left_cigar15 []={  5 << 4, 0, 10 << 4, 0, 10 << 4, 0,10 << 4 };
204         uint32_t right_cigar15[]={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 5 << 4 };
205         uint32_t left_cigar25 []={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 0 };
206         uint32_t right_cigar25[]={ 10 << 4, 0, 10 << 4, 0, 10 << 4, 0, 0 };
207         uint32_t *left_cigar  = NULL;
208         uint32_t *right_cigar = NULL;
209         uint32_t cg_reads_ngaps = 0;
210 
211         const char *read = NULL;
212         uint32_t read_len = 0;
213 
214         assert(rd);
215 
216         cg_reads_ngaps = get_cg_reads_ngaps(rd->reads_format);
217 
218         read_len = rd->seq.spot_len / 2;
219         if (cg_reads_ngaps == 3) {
220             left_cigar  = left_cigar15;
221             right_cigar = right_cigar15;
222         }
223         else if (cg_reads_ngaps == 2) {
224             left_cigar  = left_cigar25;
225             right_cigar = right_cigar25;
226         }
227         else {
228             assert(0);
229         }
230 
231         if (match->seq_read_id == 2) {
232             read = &((const char*)(rd->seq.sequence.buffer))[read_len];
233             cigar = right_cigar;
234             g = read_len;
235         }
236         else {
237             read = rd->seq.sequence.buffer;
238             cigar = left_cigar;
239             g = 0;
240         }
241         if (match->ref_orientation) {
242             if( rd->reverse[g] == '\0' ) {
243                 if( (rc = DNAReverseCompliment(read, &rd->reverse[g], read_len)) != 0) {
244                     return rc;
245                 }
246                 DEBUG_MSG(10, ("'%.*s' -> cg_eRevDnbStrand: '%.*s'\n", read_len, read, read_len, &rd->reverse[g]));
247             }
248             read = &rd->reverse[g];
249             cigar = (cigar == left_cigar) ? right_cigar : left_cigar;
250         }
251         for(g = 0; g < cg_reads_ngaps; g++) {
252             if( map->gap[g] > 0 ) {
253                 cigar[g * 2 + 1] = (map->gap[g] << 4) | 3; /* 'xN' */
254             } else if( map->gap[g] < 0 ) {
255                 cigar[g * 2 + 1] = (-map->gap[g] << 4) | 9; /* 'xB' */
256             } else {
257                 cigar[g * 2 + 1] = 0; /* '0M' */
258             }
259         }
260         algn->ploidy = 0;
261         if( (rc = ReferenceMgr_Compress(self->rmgr, ewrefmgr_cmp_Binary,
262                     map->chr, map->offset, read, read_len, cigar, 7, 0, NULL, 0, 0, NULL, 0, NCBI_align_ro_complete_genomics, algn)) != 0 ) {
263             PLOGERR(klogErr, (klogErr, rc, "compression failed $(id) $(o)",
264                     PLOG_2(PLOG_S(id),PLOG_I32(o)), map->chr, map->offset));
265         }
266         else {
267 #if 1
268             /* this is to try represent these alignments as unmated to match cgatools
269              * axf uses the row length of MATE_REF_ORIENTATION as the indicator of
270              * mate presence
271              */
272             unsigned const save = algn->mate_ref_orientation.elements;
273 
274             if (map->mate == mate)
275                 algn->mate_ref_orientation.elements = 0;
276 
277             rc = TableWriterAlgn_Write(writer, algn, rowid);
278 
279             if (map->mate == mate)
280                 algn->mate_ref_orientation.elements = save;
281 #else
282             rc = TableWriterAlgn_Write(writer, algn, rowid);
283 #endif
284             map->saved = true;
285         }
286     }
287 
288     return rc;
289 }
290 
291 #if 1
292 static
JointQ(double const Q1,double const Q2)293 double JointQ(double const Q1, double const Q2)
294 {
295     double const P1 =   1.0 - pow(10.0, Q1/-10.0);  /* prob that  1  is not incorrect */
296     double const P2 =   1.0 - pow(10.0, Q2/-10.0);  /* prob that  2  is not incorrect */
297     double const Pj =   1.0 - P1*P2;                /* prob that 1+2   is   incorrect */
298     double const Q  = -10.0 * log10(Pj);
299 
300     return Q;
301 }
302 
303 static
FindBestPair(TMappingsData * const data)304 unsigned FindBestPair(TMappingsData *const data)
305 {
306     unsigned const N = data->map_qty;
307     unsigned i;
308     double maxq;
309     unsigned best = N;
310 
311     /* pick best of the reciprocal pairs */
312     for (i = 0, maxq = -1.0; i != N; ++i) {
313         unsigned const mate = data->map[i].mate;
314         bool const is_left = (data->map[i].flags & cg_eRightHalfDnbMap) == 0;
315 
316         if (mate < N && mate != i && data->map[mate].mate == i && is_left) {
317             double const Q1 = (int)data->map[i].weight - 33;
318             double const Q2 = (int)data->map[mate].weight - 33;
319             double const q = JointQ(Q1, Q2);
320 
321             assert(Q1 >= 0);
322             assert(Q2 >= 0);
323             assert(q >= 0);
324             if (maxq < q) {
325                 maxq = q;
326                 best = i;
327             }
328         }
329     }
330     if (best < N)
331         return best;
332 
333     /* no reciprocal pairs, pick best of any pair */
334     for (i = 0, maxq = 0.0; i != N; ++i) {
335         unsigned const mate = data->map[i].mate;
336         if (mate < N && mate != i) {
337             double const Q1 = (int)data->map[i].weight - 33;
338             double const Q2 = (int)data->map[mate].weight - 33;
339             double const q = JointQ(Q1, Q2);
340 
341             assert(Q1 >= 0);
342             assert(Q2 >= 0);
343             assert(q >= 0);
344             if (maxq < q) {
345                 maxq = q;
346                 best = i;
347             }
348         }
349     }
350     if (best == N) {
351         /* no pair with a joint Q > 0; pick best mapping */
352         for (i = 0, maxq = 0.0; i != N; ++i) {
353             unsigned const mate = data->map[i].mate;
354             if (mate < N && mate != i) {
355                 double const q = (int)data->map[i].weight - 33;
356 
357                 if (maxq < q) {
358                     maxq = q;
359                     best = i;
360                 }
361             }
362         }
363         if (best == N) {
364             /* no mapping with Q > 0; pick first */
365             for (i = 0, maxq = 0.0; i != N; ++i) {
366                 unsigned const mate = data->map[i].mate;
367                 if (mate < N && mate != i) {
368                     best = i;
369                     break;
370                 }
371             }
372             if (best == N) {
373                 /* give up */
374                 return N;
375             }
376         }
377     }
378     {
379         /* make the pair reciprocal */
380         unsigned const mate = data->map[best].mate;
381 
382         if (mate < N) {
383             data->map[mate].mate = best;
384             return (data->map[best].flags & cg_eRightHalfDnbMap) ? mate : best;
385         }
386         return N;
387     }
388 }
389 
390 static
FindBestLeft(TMappingsData * const data)391 unsigned FindBestLeft(TMappingsData *const data)
392 {
393     unsigned const N = data->map_qty;
394     unsigned i;
395     unsigned best;
396     int maxq;
397 
398     for (best = N, maxq = -1, i = 0; i != N; ++i) {
399         int const q = (int)data->map[i].weight;
400 
401         if ((data->map[i].flags & cg_eRightHalfDnbMap) == 0 && maxq < q) {
402             maxq = q;
403             best = i;
404         }
405     }
406     return best;
407 }
408 
409 static
FindBestRight(TMappingsData * const data)410 unsigned FindBestRight(TMappingsData *const data)
411 {
412     unsigned const N = data->map_qty;
413     unsigned i;
414     unsigned best;
415     int maxq;
416 
417     for (best = N, maxq = -1, i = 0; i != N; ++i) {
418         int const q = (int)data->map[i].weight;
419 
420         if ((data->map[i].flags & cg_eRightHalfDnbMap) != 0 && maxq < q) {
421             maxq = q;
422             best = i;
423         }
424     }
425     return best;
426 }
427 
428 static
check_in_cluster(TMappingsData_map const * const a,TMappingsData_map const * const b)429 bool check_in_cluster(TMappingsData_map const *const a, TMappingsData_map const *const b)
430 {
431 	if (   (a->flags & cg_eRightHalfDnbMap) == (b->flags & cg_eRightHalfDnbMap)
432         && (strcmp(a->chr, b->chr) == 0)
433         && abs((int)a->offset - (int)b->offset) <= global_cluster_size)
434     {
435         return true;
436 	}
437 	return false;
438 }
439 
440 static
clustering_sort_cb(void const * const A,void const * const B,void * const ctx)441 int64_t clustering_sort_cb(void const *const A, void const *const B, void *const ctx)
442 {
443     TMappingsData const *const data = (TMappingsData const *)ctx;
444     unsigned const ia = *(unsigned const *)A;
445     unsigned const ib = *(unsigned const *)B;
446     TMappingsData_map const *const a = &data->map[ia];
447     TMappingsData_map const *const b = &data->map[ib];
448     int64_t res;
449     unsigned j = 0;
450 
451 	res = (int64_t)(a->flags & cg_eRightHalfDnbMap) - (int64_t)(b->flags & cg_eRightHalfDnbMap); /**** separate by DNP side ***/
452 	if (res) return res;
453 
454     res = strcmp(a->chr, b->chr); /* same chromosome ? **/
455 	if (res) return res;
456 
457 	res = (a->offset - b->offset) / (global_cluster_size + 1); /***  is it within the range ***/
458 	if (res) return res;
459 
460 	/**cluster is defined here; now pick the winner **/
461 	res = (int64_t)a->saved - (int64_t)b->saved; /*** if already saved **/
462 	if (res) return -res;
463 
464 	res = (int64_t)a->weight - (int64_t)b->weight; /*** has  higher score **/
465 	if (res) return -res;
466 
467     res = 0;
468     assert(data->cg_reads_ngaps);
469     for (j = 0; j != data->cg_reads_ngaps; ++j) {
470         res += (int64_t)(a->gap[j]) - (int64_t)(b->gap[j]);
471     } /** has lower projection on the reference **/
472 
473     return res;
474 }
475 
476 static
cluster_mates(TMappingsData * const data)477 void cluster_mates(TMappingsData *const data)
478 {
479     unsigned index[CG_MAPPINGS_MAX];
480     unsigned i;
481     unsigned j;
482 
483     for (i = 0; i != data->map_qty; ++i)
484         index[i] = i;
485 
486     ksort(index, data->map_qty, sizeof(index[0]), clustering_sort_cb, data);
487     for (i = 0, j = 1; j != data->map_qty; ++j) {
488         unsigned const ii = index[i];
489         unsigned const ij = index[j];
490         TMappingsData_map *const a = &data->map[ij];
491         TMappingsData_map const *const b = &data->map[ii];
492 
493         if (check_in_cluster(a, b)) {
494             unsigned const a_mate = a->mate;
495             unsigned const b_mate = b->mate;
496 
497             if (   a_mate == ij /** remove singletons **/
498                 || a_mate == b_mate) /** or cluster originator has the same mate **/
499             {
500                 a->saved = true;
501                 DEBUG_MSG(10, ("mapping %u was dropped as a part of cluster at mapping %u\n", ij, ii));
502             }
503         }
504         else
505             i = j;
506     }
507 }
508 
509 static
template_length(unsigned const self_left,unsigned const mate_left,unsigned const self_len,unsigned const mate_len,unsigned const read_id)510 INSDC_coord_zero template_length(unsigned const self_left,
511                                  unsigned const mate_left,
512                                  unsigned const self_len,
513                                  unsigned const mate_len,
514                                  unsigned const read_id)
515 {
516     /* adapted from libs/axf/template_len.c */
517     unsigned const self_right = self_left + self_len;
518     unsigned const mate_right = mate_left + mate_len;
519     unsigned const  leftmost  = (self_left  < mate_left ) ? self_left  : mate_left;
520     unsigned const rightmost  = (self_right > mate_right) ? self_right : mate_right;
521     unsigned const tlen = rightmost - leftmost;
522 
523     /* The standard says, "The leftmost segment has a plus sign and the rightmost has a minus sign." */
524     if (   (self_left <= mate_left && self_right >= mate_right)     /* mate fully contained within self or */
525         || (mate_left <= self_left && mate_right >= self_right))    /* self fully contained within mate; */
526     {
527         if (self_left < mate_left || (read_id == 1 && self_left == mate_left))
528             return (INSDC_coord_zero)tlen;
529         else
530             return -(INSDC_coord_zero)tlen;
531     }
532     else if (   (self_right == mate_right && mate_left == leftmost) /* both are rightmost, but mate is leftmost */
533              ||  self_right == rightmost)
534     {
535         return -(INSDC_coord_zero)tlen;
536     }
537     else
538         return (INSDC_coord_zero)tlen;
539 }
540 
541 static
CGWriterAlgn_Write_int(CGWriterAlgn * const self,TReadsData * const read)542 rc_t CGWriterAlgn_Write_int(CGWriterAlgn *const self, TReadsData *const read)
543 {
544     TMappingsData *const data = &self->data;
545     unsigned const N = data->map_qty;
546     rc_t rc = 0;
547 
548     if (N != 0) {
549         unsigned left_prime = N;
550         unsigned right_prime = N;
551         unsigned i;
552         unsigned countLeft  = 0;
553         unsigned countRight = 0;
554 
555         assert(read);
556 
557         data->cg_reads_ngaps = get_cg_reads_ngaps(read->reads_format);
558         assert(data->cg_reads_ngaps);
559 
560         for (i = 0; i != N; ++i) {
561             char const *const refname = data->map[i].chr;
562             unsigned j;
563             INSDC_coord_len reflen = read->seq.spot_len / 2;
564             ReferenceSeq const *rseq;
565             bool shouldUnmap = false;
566             bool wasRenamed = false;
567 
568             memset(&self->match[i], 0, sizeof(self->match[i]));
569 
570             rc = ReferenceMgr_GetSeq(self->rmgr, &rseq, refname, &shouldUnmap, true, &wasRenamed);
571             if (rc) {
572                 (void)PLOGERR(klogErr, (klogErr, rc, "Failed accessing Reference '$(ref)'", "ref=%s", refname));
573                 break;
574             }
575             assert(shouldUnmap == false);
576             rc = ReferenceSeq_Get1stRow(rseq, &self->match[i].ref_id); /* if the above worked, this is infallible */
577             assert(rc == 0);
578             ReferenceSeq_Release(rseq);
579 
580             for (j = 0; j != data->cg_reads_ngaps; ++j) {
581                 reflen += data->map[i].gap[j];
582             }
583 
584             self->match[i].seq_spot_id = read->rowid;
585             self->match[i].mapq = data->map[i].weight - 33;
586             self->match[i].ref_orientation = (data->map[i].flags & cg_eRevDnbStrand) ? true : false;
587             self->match[i].ref_len = reflen;
588 
589             if (data->map[i].flags & cg_eRightHalfDnbMap) {
590                 self->match[i].seq_read_id = 2;
591                 ++countRight;
592             }
593             else {
594                 self->match[i].seq_read_id = 1;
595                 ++countLeft;
596             }
597         }
598 
599         if (countLeft > 0 && countRight > 0) {
600             left_prime = FindBestPair(data);
601             if (left_prime < N) {
602                 right_prime = data->map[left_prime].mate;
603             }
604             else { /* force the pairing */
605                 left_prime = FindBestLeft(data);
606                 right_prime = FindBestRight(data);
607                 data->map[left_prime].mate = right_prime;
608                 data->map[right_prime].mate = left_prime;
609             }
610             for (i = 0; i != N; ++i) {
611                 unsigned const mate = data->map[i].mate;
612 
613                 if (mate < N && mate != i) {
614                     INSDC_coord_zero const tlen = (self->match[i].ref_id == self->match[mate].ref_id)
615                                                 ? template_length(data->map[i].offset,
616                                                                   data->map[mate].offset,
617                                                                   self->match[i].ref_len,
618                                                                   self->match[mate].ref_len,
619                                                                   self->match[i].seq_read_id)
620                                                 : 0;
621 
622                     self->match[i].mate_ref_id = self->match[mate].ref_id;
623                     self->match[i].mate_ref_orientation = self->match[mate].ref_orientation;
624                     self->match[i].mate_ref_pos = data->map[mate].offset;
625                     self->match[i].template_len = tlen;
626                 }
627             }
628         }
629         else if (countLeft > 0) {
630             left_prime = FindBestLeft(data);
631         }
632         else {
633             assert(countRight > 0);
634             right_prime = FindBestRight(data);
635         }
636 
637         read->align_count[0] = countLeft  < 254 ? countLeft  : 255;
638         read->align_count[1] = countRight < 254 ? countRight : 255;
639         if (rc == 0 && left_prime < N) {
640             rc = CGWriterAlgn_Save(self, read, self->primary, left_prime,
641                                    &read->prim_algn_id[0]);
642             read->prim_is_reverse[0] = self->match[left_prime].ref_orientation;
643             data->map[left_prime].saved = 1;
644         }
645         if (rc == 0 && right_prime < N) {
646             rc = CGWriterAlgn_Save(self, read, self->primary, right_prime,
647                                    &read->prim_algn_id[1]);
648             read->prim_is_reverse[1] = self->match[right_prime].ref_orientation;
649             data->map[right_prime].saved = 1;
650         }
651         if (global_cluster_size > 0 && data->map_qty > 1 + (left_prime < N ? 1 : 0) + (right_prime < N ? 1 : 0)) {
652             cluster_mates(data);
653         }
654         for (i = 0; i != N && rc == 0; ++i) {
655             if (data->map[i].saved || self->match[i].mapq < self->min_mapq)
656                 continue;
657             rc = CGWriterAlgn_Save(self, read, self->secondary, i, NULL);
658         }
659     }
660     return rc;
661 }
662 
CGWriterAlgn_Write(const CGWriterAlgn * cself,TReadsData * read)663 rc_t CGWriterAlgn_Write(const CGWriterAlgn* cself, TReadsData* read)
664 {
665     assert(cself != NULL);
666     assert(read != NULL);
667     assert(read->seq.sequence.buffer != NULL
668         && read->seq.sequence.elements == read->seq.spot_len
669         && (read->seq.sequence.elements == CG_READS15_SPOT_LEN ||
670             read->seq.sequence.elements == CG_READS25_SPOT_LEN));
671 
672     memset(read->prim_algn_id, 0, sizeof(read->prim_algn_id));
673     memset(read->align_count, 0, sizeof(read->align_count));
674     memset(read->prim_is_reverse, 0, sizeof(read->prim_is_reverse));
675 
676     return CGWriterAlgn_Write_int((CGWriterAlgn *)cself, read);
677 }
678 
679 #else
680 
CGWriterAlgn_Write(const CGWriterAlgn * cself,TReadsData * read)681 rc_t CGWriterAlgn_Write(const CGWriterAlgn* cself, TReadsData* read)
682 {
683     if( cself->data.map_qty != 0 ) {
684         /* primary is-found indicator: weights are ASCII-33 so they can't be 0 if found */
685         uint8_t left_weight = 0, right_weight = 0, pair_weight = 0;
686         uint32_t i, left_prim = 0, right_prim = 0, paired = 0;
687 
688         CGWriterAlgn* self = (CGWriterAlgn*)cself;
689         TMappingsData* data = &self->data;
690 
691         /* find best left, right and pair */
692         for(i = 0; i < data->map_qty; i++) {
693             int k = (data->map[i].flags & cg_eRightHalfDnbMap) ? 1 : 0;
694             if( read->align_count[k] < 254 ) {
695                 read->align_count[k]++;
696             }
697             if( k == 0 ) {
698                 if( left_weight < data->map[i].weight ) {
699                     left_prim = i;
700                     left_weight = data->map[i].weight;
701                 }
702             } else {
703                 if( right_weight < data->map[i].weight ) {
704                     right_prim = i;
705                     right_weight = data->map[i].weight;
706                 }
707             }
708             if( i != data->map[i].mate && pair_weight < data->map[i].weight ) {
709                 if( data->map[i].mate < data->map_qty ) {
710                     /* note pair's left mate id */
711                     paired = k == 0 ? i : data->map[i].mate;
712                     pair_weight = data->map[i].weight;
713                 } else {
714                     /* fail safe in case mate id is out of map boundaries */
715                     data->map[i].mate = i;
716                 }
717             }
718         }
719         /* choose primary pair */
720         if( left_weight > right_weight && data->map[left_prim].mate != left_prim ) {
721             /* left is better and has a mate -> choose left pair */
722             right_prim = data->map[left_prim].mate;
723             right_weight = data->map[right_prim].weight;
724         } else if( right_weight > left_weight && data->map[right_prim].mate != right_prim ) {
725             /* right is better and has a mate -> choose right pair */
726             left_prim = data->map[right_prim].mate;
727             left_weight = data->map[left_prim].weight;
728         } else if( pair_weight > 0 ) {
729             /* use paired as primary */
730             left_prim = paired;
731             left_weight = data->map[left_prim].weight;
732             right_prim = data->map[left_prim].mate;
733             right_weight = data->map[right_prim].weight;
734         } else if( left_weight > 0 && right_weight > 0 ) {
735             /* force best left and right to be mates */
736             data->map[left_prim].mate = right_prim;
737             data->map[right_prim].mate = left_prim;
738             self->forced_pairs_cnt++;
739             DEBUG_MSG(10, ("forced pair: %u %u\n", left_prim, right_prim));
740         }
741 #if _DEBUGGING
742         DEBUG_MSG(10, ("alignment_count [%hu,%hu]", read->align_count[0], read->align_count[1]));
743         DEBUG_MSG(10, (" left primary: "));
744         if( left_weight > 0 ) {
745             DEBUG_MSG(10, ("weight %hu [%c], id %u", left_weight, left_weight, left_prim));
746         } else {
747             DEBUG_MSG(10, ("none"));
748         }
749         DEBUG_MSG(10, ("; right primary: "));
750         if( right_weight > 0 ) {
751             DEBUG_MSG(10, ("weight %hu [%c], id %u", right_weight, right_weight, right_prim));
752         } else {
753             DEBUG_MSG(10, ("none"));
754         }
755         DEBUG_MSG(10, ("\n"));
756 #endif
757         assert((left_weight == 0 && read->align_count[0] == 0) || (left_weight > 0 && read->align_count[0] > 0));
758         assert((right_weight == 0 && read->align_count[1] == 0) || (right_weight > 0 && read->align_count[1] > 0));
759 
760         /* write left primary */
761         if( rc == 0 && left_weight > 0 ) {
762             rc = CGWriterAlgn_Save(self, read, self->primary, left_prim, &read->prim_algn_id[0]);
763             read->prim_is_reverse[0] = cself->match[left_prim].ref_orientation;
764         }
765         /* write right primary */
766         if( rc == 0 && right_weight > 0 ) {
767             rc = CGWriterAlgn_Save(self, read, self->primary, right_prim, &read->prim_algn_id[1]);
768             read->prim_is_reverse[1] = cself->match[right_prim].ref_orientation;
769         }
770         DEBUG_MSG(10, ("prim_algn_rowid [%li,%li], ", read->prim_algn_id[0], read->prim_algn_id[1]));
771         DEBUG_MSG(10, ("prim_is_reverse [%hu,%hu]\n", read->prim_is_reverse[0], read->prim_is_reverse[1]));
772         if( rc == 0 ) {
773             /* others go to secondary */
774             int64_t row;
775 
776             rc = TableWriterAlgn_GetNextRowId(cself->secondary, &row);
777             if( global_cluster_size > 0  && data->map_qty > 1) {
778                 cluster_mates(data);
779             }
780             if( cself->single_mate ) {
781                 /* we need to re-mate in case original mate's weight is lower */
782                 for(i = 0; rc == 0 && i < data->map_qty; i++ ) {
783                     if( !data->map[i].saved && data->map[i].weight >= cself->min_mapq ) {
784                         uint32_t mate = data->map[i].mate;
785                         if( mate != i && data->map[mate].mate != i ) {
786                             self->dropped_mates_cnt++;
787                             if( data->map[data->map[mate].mate].weight < data->map[i].weight ) {
788                                 /* do not save my mate's mate */
789                                 DEBUG_MSG(10, ("mate %u dropped as pair of %u\n", data->map[mate].mate, mate));
790                                 data->map[data->map[mate].mate].saved = true;
791                                 /* repoint mate to me */
792                                 data->map[mate].mate = i;
793                             } else {
794                                 /* do not save me */
795                                 DEBUG_MSG(10, ("mate %u dropped as pair of %u\n", i, mate));
796                                 data->map[i].saved = true;
797                             }
798                         }
799                     }
800                 }
801             }
802             for(i = 0; rc == 0 && i < data->map_qty; i++ ) {
803                 if( !data->map[i].saved && data->map[i].weight >= cself->min_mapq ) {
804                     uint32_t mate = data->map[i].mate;
805                     /* no mate or mate is under-weigth */
806                     if( mate == i || data->map[mate].weight < cself->min_mapq ||
807                        /* or mate was saved in primary */
808                        (left_weight > 0 && mate == left_prim) || (right_weight > 0 && mate == right_prim) ) {
809                         self->match[i].mate_align_id = 0;
810                         rc = CGWriterAlgn_Save(self, read, self->secondary, i, NULL);
811                         row++;
812                     } else {
813                         self->match[mate].mate_align_id = row++;
814                         self->match[i].mate_align_id = row++;
815                         if( (rc = CGWriterAlgn_Save(self, read, self->secondary, i, NULL)) == 0 ) {
816                             rc = CGWriterAlgn_Save(self, read, self->secondary, mate, NULL);
817                         }
818                     }
819                 }
820             }
821         }
822     }
823     return rc;
824 }
825 #endif
826