1
2 /******************************************************************************
3 *
4 * This file is part of canu, a software program that assembles whole-genome
5 * sequencing reads into contigs.
6 *
7 * This software is based on:
8 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9 * the 'kmer package' r1994 (http://kmer.sourceforge.net)
10 *
11 * Except as indicated otherwise, this is a 'United States Government Work',
12 * and is released in the public domain.
13 *
14 * File 'README.licenses' in the root directory of this distribution
15 * contains full conditions and disclaimers.
16 */
17
18 #include "adjustOverlaps.H"
19
20 // Adjust the overlap for any trimming done already. This works by computing the fraction of the
21 // overlap trimmed for each read and each end, picking the largest fraction for each end, and
22 // applying that fraction to the other read.
23 //
24 // It expects only normal overlaps.
25
26 bool
adjustNormal(clearRangeFile * iniClr,ovOverlap * ovl,uint32 & aovlbgn,uint32 & aovlend,uint32 & bovlbgn,uint32 & bovlend,uint32 & aclrbgn,uint32 & aclrend,uint32 & bclrbgn,uint32 & bclrend)27 adjustNormal(clearRangeFile *iniClr,
28 ovOverlap *ovl,
29 uint32 &aovlbgn, uint32 &aovlend, uint32 &bovlbgn, uint32 &bovlend,
30 uint32 &aclrbgn, uint32 &aclrend, uint32 &bclrbgn, uint32 &bclrend) {
31
32 assert(ovl->flipped() == false);
33
34 aovlbgn = ovl->a_bgn();
35 bovlbgn = ovl->b_bgn();
36 aovlend = ovl->a_end();
37 bovlend = ovl->b_end();
38
39 aclrbgn = iniClr->bgn(ovl->a_iid);
40 bclrbgn = iniClr->bgn(ovl->b_iid);
41 aclrend = iniClr->end(ovl->a_iid);
42 bclrend = iniClr->end(ovl->b_iid);
43
44 assert(aovlbgn < aovlend);
45 assert(bovlbgn < bovlend);
46
47 if ((aclrend <= aovlbgn) || (aovlend <= aclrbgn) ||
48 (bclrend <= bovlbgn) || (bovlend <= bclrbgn)) {
49 // Overlap doesn't intersect clear range, fail.
50 #if 0
51 fprintf(stderr, "Discard NORM overlap from %u,%u-%u,%u based on clear ranges %u,%u and %u,%u\n",
52 aovlbgn, aovlend,
53 bovlbgn, bovlend,
54 aclrbgn, aclrend,
55 bclrbgn, bclrend);
56 #endif
57 return(false);
58 }
59
60
61 uint32 alen = aovlend - aovlbgn;
62 uint32 blen = bovlend - bovlbgn;
63
64 double afracbgn = (double)((aclrbgn < aovlbgn) ? (0) : (aclrbgn - aovlbgn)) / alen;
65 double bfracbgn = (double)((bclrbgn < bovlbgn) ? (0) : (bclrbgn - bovlbgn)) / blen;
66 double afracend = (double)((aclrend > aovlend) ? (0) : (aovlend - aclrend)) / alen;
67 double bfracend = (double)((bclrend > bovlend) ? (0) : (bovlend - bclrend)) / blen;
68
69 //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", afracbgn, afracend, bfracbgn, bfracend);
70 //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", afracbgn * alen, afracend * alen, bfracbgn * blen, bfracend * blen);
71
72 double maxbgn = std::max(afracbgn, bfracbgn);
73 double maxend = std::max(afracend, bfracend);
74
75 //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", maxbgn * alen, maxend * alen, maxbgn * blen, maxend * blen);
76
77 assert(maxbgn < 1.0);
78 assert(maxend < 1.0);
79
80 uint32 aadjbgn = (uint32)round(maxbgn * alen);
81 uint32 badjbgn = (uint32)round(maxbgn * blen);
82 uint32 aadjend = (uint32)round(maxend * alen);
83 uint32 badjend = (uint32)round(maxend * blen);
84
85 //fprintf(stderr, "frac a %u %u b %u %u alen %u blen %u\n", aadjbgn, aadjend, badjbgn, badjend, alen, blen);
86
87 #if 0
88 fprintf(stderr, "Adjusted NORM overlap from %u,%u-%u,%u (adjust %u,%u,%u,%u) to %u,%u-%u,%u based on clear ranges %u,%u and %u,%u maxbgn=%f maxend=%f\n",
89 aovlbgn, aovlend,
90 bovlbgn, bovlend,
91 aadjbgn, aadjend, badjbgn, badjend,
92 aovlbgn + aadjbgn, aovlend - aadjend,
93 bovlbgn + badjbgn, bovlend - badjend,
94 aclrbgn, aclrend,
95 bclrbgn, bclrend,
96 maxbgn,
97 maxend);
98 #endif
99
100 aovlbgn += aadjbgn;
101 bovlbgn += badjbgn;
102 aovlend -= aadjend;
103 bovlend -= badjend;
104
105 assert(aclrbgn <= aovlbgn);
106 assert(bclrbgn <= bovlbgn);
107 assert(aovlend <= aclrend);
108 assert(bovlend <= bclrend);
109
110 return(true);
111 }
112