1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "adjustOverlaps.H"
19 
20 //  Adjust the overlap for any trimming done already.  This works by computing the fraction of the
21 //  overlap trimmed for each read and each end, picking the largest fraction for each end, and
22 //  applying that fraction to the other read.
23 //
24 //  It expects only normal overlaps.
25 
26 bool
adjustNormal(clearRangeFile * iniClr,ovOverlap * ovl,uint32 & aovlbgn,uint32 & aovlend,uint32 & bovlbgn,uint32 & bovlend,uint32 & aclrbgn,uint32 & aclrend,uint32 & bclrbgn,uint32 & bclrend)27 adjustNormal(clearRangeFile  *iniClr,
28              ovOverlap       *ovl,
29              uint32 &aovlbgn,  uint32 &aovlend,  uint32 &bovlbgn,  uint32 &bovlend,
30              uint32 &aclrbgn,  uint32 &aclrend,  uint32 &bclrbgn,  uint32 &bclrend) {
31 
32   assert(ovl->flipped() == false);
33 
34   aovlbgn = ovl->a_bgn();
35   bovlbgn = ovl->b_bgn();
36   aovlend = ovl->a_end();
37   bovlend = ovl->b_end();
38 
39   aclrbgn = iniClr->bgn(ovl->a_iid);
40   bclrbgn = iniClr->bgn(ovl->b_iid);
41   aclrend = iniClr->end(ovl->a_iid);
42   bclrend = iniClr->end(ovl->b_iid);
43 
44   assert(aovlbgn < aovlend);
45   assert(bovlbgn < bovlend);
46 
47   if ((aclrend <= aovlbgn) || (aovlend <= aclrbgn) ||
48       (bclrend <= bovlbgn) || (bovlend <= bclrbgn)) {
49     //  Overlap doesn't intersect clear range, fail.
50 #if 0
51     fprintf(stderr, "Discard  NORM overlap from %u,%u-%u,%u based on clear ranges %u,%u and %u,%u\n",
52             aovlbgn, aovlend,
53             bovlbgn, bovlend,
54             aclrbgn, aclrend,
55             bclrbgn, bclrend);
56 #endif
57     return(false);
58   }
59 
60 
61   uint32  alen = aovlend - aovlbgn;
62   uint32  blen = bovlend - bovlbgn;
63 
64   double  afracbgn = (double)((aclrbgn < aovlbgn) ? (0) : (aclrbgn - aovlbgn)) / alen;
65   double  bfracbgn = (double)((bclrbgn < bovlbgn) ? (0) : (bclrbgn - bovlbgn)) / blen;
66   double  afracend = (double)((aclrend > aovlend) ? (0) : (aovlend - aclrend)) / alen;
67   double  bfracend = (double)((bclrend > bovlend) ? (0) : (bovlend - bclrend)) / blen;
68 
69   //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", afracbgn, afracend, bfracbgn, bfracend);
70   //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", afracbgn * alen, afracend * alen, bfracbgn * blen, bfracend * blen);
71 
72   double  maxbgn = std::max(afracbgn, bfracbgn);
73   double  maxend = std::max(afracend, bfracend);
74 
75   //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", maxbgn * alen, maxend * alen, maxbgn * blen, maxend * blen);
76 
77   assert(maxbgn < 1.0);
78   assert(maxend < 1.0);
79 
80   uint32  aadjbgn = (uint32)round(maxbgn * alen);
81   uint32  badjbgn = (uint32)round(maxbgn * blen);
82   uint32  aadjend = (uint32)round(maxend * alen);
83   uint32  badjend = (uint32)round(maxend * blen);
84 
85   //fprintf(stderr, "frac a %u %u b %u %u alen %u blen %u\n", aadjbgn, aadjend, badjbgn, badjend, alen, blen);
86 
87 #if 0
88   fprintf(stderr, "Adjusted NORM overlap from %u,%u-%u,%u (adjust %u,%u,%u,%u) to %u,%u-%u,%u  based on clear ranges %u,%u and %u,%u  maxbgn=%f maxend=%f\n",
89           aovlbgn, aovlend,
90           bovlbgn, bovlend,
91           aadjbgn, aadjend, badjbgn, badjend,
92           aovlbgn + aadjbgn, aovlend - aadjend,
93           bovlbgn + badjbgn, bovlend - badjend,
94           aclrbgn, aclrend,
95           bclrbgn, bclrend,
96           maxbgn,
97           maxend);
98 #endif
99 
100   aovlbgn += aadjbgn;
101   bovlbgn += badjbgn;
102   aovlend -= aadjend;
103   bovlend -= badjend;
104 
105   assert(aclrbgn <= aovlbgn);
106   assert(bclrbgn <= bovlbgn);
107   assert(aovlend <= aclrend);
108   assert(bovlend <= bclrend);
109 
110   return(true);
111 }
112