1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "adjustOverlaps.H"
19 
20 //  Adjust the overlap for any trimming done already.  This works by computing the fraction of the
21 //  overlap trimmed for each read and each end, picking the largest fraction for each end, and
22 //  applying that fraction to the other read.
23 //
24 //  It expects only flipped overlaps.  The output coordinates are for a REVERSE COMPLEMENTED
25 //  b read.  if you care which end is the actual 5' or 3' end, look at flipped().
26 
27 bool
adjustFlipped(clearRangeFile * iniClr,ovOverlap * ovl,uint32 & aovlbgn,uint32 & aovlend,uint32 & bovlbgn,uint32 & bovlend,uint32 & aclrbgn,uint32 & aclrend,uint32 & bclrbgn,uint32 & bclrend,sqStore * seq)28 adjustFlipped(clearRangeFile  *iniClr,
29               ovOverlap       *ovl,
30               uint32 &aovlbgn,  uint32 &aovlend,  uint32 &bovlbgn,  uint32 &bovlend,
31               uint32 &aclrbgn,  uint32 &aclrend,  uint32 &bclrbgn,  uint32 &bclrend,
32               sqStore         *seq) {
33 
34   assert(ovl->flipped() == true);
35 
36   uint32  bLen = seq->sqStore_getReadLength(ovl->b_iid);
37 
38   aovlbgn =        ovl->a_bgn();
39   bovlbgn = bLen - ovl->b_bgn();  //  bgn(), because this is the higher coord
40   aovlend =        ovl->a_end();
41   bovlend = bLen - ovl->b_end();
42 
43   aclrbgn =        iniClr->bgn(ovl->a_iid);
44   bclrbgn = bLen - iniClr->end(ovl->b_iid);  //  end(), because this is the higher coord
45   aclrend =        iniClr->end(ovl->a_iid);
46   bclrend = bLen - iniClr->bgn(ovl->b_iid);
47 
48   assert(aovlbgn < aovlend);
49   assert(bovlbgn < bovlend);
50 
51   if ((aclrend <= aovlbgn) || (aovlend <= aclrbgn) ||
52       (bclrend <= bovlbgn) || (bovlend <= bclrbgn)) {
53     //  Overlap doesn't intersect clear range, fail.
54 #if 0
55     fprintf(stderr, "Discard  FLIP overlap from %u,%u-%u,%u based on clear ranges %u,%u and %u,%u\n",
56             aovlbgn, aovlend,
57             bovlbgn, bovlend,
58             aclrbgn, aclrend,
59             bclrbgn, bclrend);
60 #endif
61     return(false);
62   }
63 
64 
65   uint32  alen = aovlend - aovlbgn;
66   uint32  blen = bovlend - bovlbgn;
67 
68   double  afracbgn = (double)((aclrbgn < aovlbgn) ? (0) : (aclrbgn - aovlbgn)) / alen;
69   double  bfracbgn = (double)((bclrbgn < bovlbgn) ? (0) : (bclrbgn - bovlbgn)) / blen;
70   double  afracend = (double)((aclrend > aovlend) ? (0) : (aovlend - aclrend)) / alen;
71   double  bfracend = (double)((bclrend > bovlend) ? (0) : (bovlend - bclrend)) / blen;
72 
73   //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", afracbgn, afracend, bfracbgn, bfracend);
74   //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", afracbgn * alen, afracend * alen, bfracbgn * blen, bfracend * blen);
75 
76   double  maxbgn = std::max(afracbgn, bfracbgn);
77   double  maxend = std::max(afracend, bfracend);
78 
79   //fprintf(stderr, "frac a %.20f %.20f b %.20f %.20f\n", maxbgn * alen, maxend * alen, maxbgn * blen, maxend * blen);
80 
81   assert(maxbgn < 1.0);
82   assert(maxend < 1.0);
83 
84   uint32  aadjbgn = (uint32)round(maxbgn * alen);
85   uint32  badjbgn = (uint32)round(maxbgn * blen);
86   uint32  aadjend = (uint32)round(maxend * alen);
87   uint32  badjend = (uint32)round(maxend * blen);
88 
89   //fprintf(stderr, "frac a %u %u b %u %u alen %u blen %u\n", aadjbgn, aadjend, badjbgn, badjend, alen, blen);
90 
91 #if 0
92   fprintf(stderr, "Adjusted FLIP overlap from %u,%u-%u,%u (adjust %u,%u,%u,%u) to %u,%u-%u,%u  based on clear ranges %u,%u and %u,%u  maxbgn=%f maxend=%f\n",
93           aovlbgn, aovlend,
94           bovlbgn, bovlend,
95           aadjbgn, aadjend, badjbgn, badjend,
96           aovlbgn + aadjbgn, aovlend - aadjend,
97           bovlbgn + badjbgn, bovlend - badjend,
98           aclrbgn, aclrend,
99           bclrbgn, bclrend,
100           maxbgn,
101           maxend);
102 #endif
103 
104   aovlbgn += aadjbgn;
105   bovlbgn += badjbgn;
106   aovlend -= aadjend;
107   bovlend -= badjend;
108 
109   assert(aclrbgn <= aovlbgn);
110   assert(bclrbgn <= bovlbgn);
111   assert(aovlend <= aclrend);
112   assert(bovlend <= bclrend);
113 
114   return(true);
115 }
116