1
2 /******************************************************************************
3 *
4 * This file is part of canu, a software program that assembles whole-genome
5 * sequencing reads into contigs.
6 *
7 * This software is based on:
8 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9 * the 'kmer package' r1994 (http://kmer.sourceforge.net)
10 *
11 * Except as indicated otherwise, this is a 'United States Government Work',
12 * and is released in the public domain.
13 *
14 * File 'README.licenses' in the root directory of this distribution
15 * contains full conditions and disclaimers.
16 */
17
18 #include "adjustOverlaps.H"
19
20 // Adjust the overlap for any trimming done already. This works by computing the fraction of the
21 // overlap trimmed for each read and each end, picking the largest fraction for each end, and
22 // applying that fraction to the other read.
23 //
24 // It expects only flipped overlaps. The output coordinates are for a REVERSE COMPLEMENTED
25 // b read. if you care which end is the actual 5' or 3' end, look at flipped().
26
27 bool
adjustFlipped(clearRangeFile * iniClr,ovOverlap * ovl,uint32 & aovlbgn,uint32 & aovlend,uint32 & bovlbgn,uint32 & bovlend,uint32 & aclrbgn,uint32 & aclrend,uint32 & bclrbgn,uint32 & bclrend,sqStore * seq)28 adjustFlipped(clearRangeFile *iniClr,
29 ovOverlap *ovl,
30 uint32 &aovlbgn, uint32 &aovlend, uint32 &bovlbgn, uint32 &bovlend,
31 uint32 &aclrbgn, uint32 &aclrend, uint32 &bclrbgn, uint32 &bclrend,
32 sqStore *seq) {
33
34 assert(ovl->flipped() == true);
35
36 uint32 bLen = seq->sqStore_getReadLength(ovl->b_iid);
37
38 aovlbgn = ovl->a_bgn();
39 bovlbgn = bLen - ovl->b_bgn(); // bgn(), because this is the higher coord
40 aovlend = ovl->a_end();
41 bovlend = bLen - ovl->b_end();
42
43 aclrbgn = iniClr->bgn(ovl->a_iid);
44 bclrbgn = bLen - iniClr->end(ovl->b_iid); // end(), because this is the higher coord
45 aclrend = iniClr->end(ovl->a_iid);
46 bclrend = bLen - iniClr->bgn(ovl->b_iid);
47
48 assert(aovlbgn < aovlend);
49 assert(bovlbgn < bovlend);
50
51 if ((aclrend <= aovlbgn) || (aovlend <= aclrbgn) ||
52 (bclrend <= bovlbgn) || (bovlend <= bclrbgn)) {
53 // Overlap doesn't intersect clear range, fail.
54 #if 0
55 fprintf(stderr, "Discard FLIP overlap from %u,%u-%u,%u based on clear ranges %u,%u and %u,%u\n",
56 aovlbgn, aovlend,
57 bovlbgn, bovlend,
58 aclrbgn, aclrend,
59 bclrbgn, bclrend);
60 #endif
61 return(false);
62 }
63
64
65 uint32 alen = aovlend - aovlbgn;
66 uint32 blen = bovlend - bovlbgn;
67
68 double afracbgn = (double)((aclrbgn < aovlbgn) ? (0) : (aclrbgn - aovlbgn)) / alen;
69 double bfracbgn = (double)((bclrbgn < bovlbgn) ? (0) : (bclrbgn - bovlbgn)) / blen;
70 double afracend = (double)((aclrend > aovlend) ? (0) : (aovlend - aclrend)) / alen;
71 double bfracend = (double)((bclrend > bovlend) ? (0) : (bovlend - bclrend)) / blen;
72
73 //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", afracbgn, afracend, bfracbgn, bfracend);
74 //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", afracbgn * alen, afracend * alen, bfracbgn * blen, bfracend * blen);
75
76 double maxbgn = std::max(afracbgn, bfracbgn);
77 double maxend = std::max(afracend, bfracend);
78
79 //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", maxbgn * alen, maxend * alen, maxbgn * blen, maxend * blen);
80
81 assert(maxbgn < 1.0);
82 assert(maxend < 1.0);
83
84 uint32 aadjbgn = (uint32)round(maxbgn * alen);
85 uint32 badjbgn = (uint32)round(maxbgn * blen);
86 uint32 aadjend = (uint32)round(maxend * alen);
87 uint32 badjend = (uint32)round(maxend * blen);
88
89 //fprintf(stderr, "frac a %u %u b %u %u alen %u blen %u\n", aadjbgn, aadjend, badjbgn, badjend, alen, blen);
90
91 #if 0
92 fprintf(stderr, "Adjusted FLIP overlap from %u,%u-%u,%u (adjust %u,%u,%u,%u) to %u,%u-%u,%u based on clear ranges %u,%u and %u,%u maxbgn=%f maxend=%f\n",
93 aovlbgn, aovlend,
94 bovlbgn, bovlend,
95 aadjbgn, aadjend, badjbgn, badjend,
96 aovlbgn + aadjbgn, aovlend - aadjend,
97 bovlbgn + badjbgn, bovlend - badjend,
98 aclrbgn, aclrend,
99 bclrbgn, bclrend,
100 maxbgn,
101 maxend);
102 #endif
103
104 aovlbgn += aadjbgn;
105 bovlbgn += badjbgn;
106 aovlend -= aadjend;
107 bovlend -= badjend;
108
109 assert(aclrbgn <= aovlbgn);
110 assert(bclrbgn <= bovlbgn);
111 assert(aovlend <= aclrend);
112 assert(bovlend <= bclrend);
113
114 return(true);
115 }
116