1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "ovStore.H"
19 #include "sqStore.H"
20 
21 
22 sqStore *ovOverlap::g = NULL;
23 
24 //  Even though the b_end_hi | b_end_lo is uint64 in the struct, the result
25 //  of combining them doesn't appear to be 64-bit.  The cast is necessary.
26 
27 char *
toString(char * str,ovOverlapDisplayType type,bool newLine)28 ovOverlap::toString(char                  *str,
29                     ovOverlapDisplayType   type,
30                     bool                   newLine) {
31 
32   switch (type) {
33     case ovOverlapAsHangs:
34       sprintf(str, "%10" F_U32P " %10" F_U32P "  %c  %6" F_S32P " %6" F_U32P " %6" F_S32P "  %7.6f%s%s",
35               a_iid, b_iid,
36               flipped() ? 'I' : 'N',
37               a_hang(), span(), b_hang(),
38               erate(),
39               (overlapIsDovetail()) ? "" : "  PARTIAL",
40               (newLine) ? "\n" : "");
41       break;
42 
43     case ovOverlapAsCoords:
44       sprintf(str, "%10" F_U32P " %10" F_U32P "  %c  %6" F_U32P "  %6" F_U32P " %6" F_U32P "  %6" F_U32P " %6" F_U32P "  %7.6f%s",
45               a_iid, b_iid,
46               flipped() ? 'I' : 'N',
47               span(),
48               a_bgn(), a_end(),
49               b_bgn(), b_end(),
50               erate(),
51               (newLine) ? "\n" : "");
52       break;
53 
54     case ovOverlapAsUnaligned:
55       sprintf(str, "%10" F_U32P " %10" F_U32P "  %c  %6" F_U32P "  %6" F_OVP " %6" F_OVP "  %6" F_OVP " %6" F_OVP "  %7.6f %s %s %s%s",
56               a_iid, b_iid,
57               flipped() ? 'I' : 'N',
58               span(),
59               dat.ovl.ahg5, dat.ovl.ahg3,
60               dat.ovl.bhg5, dat.ovl.bhg3,
61               erate(),
62               dat.ovl.forOBT ? "OBT" : "   ",
63               dat.ovl.forDUP ? "DUP" : "   ",
64               dat.ovl.forUTG ? "UTG" : "   ",
65               (newLine) ? "\n" : "");
66       break;
67 
68     case ovOverlapAsPaf:
69       // miniasm/map expects entries to be separated by tabs
70       // no padding spaces on names we don't confuse read identifiers
71       sprintf(str, "%" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%c\t%" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P " \tdv:f:%6.6f%s",
72               a_iid,
73               g->sqStore_getReadLength(a_iid), a_bgn(), a_end(),
74               flipped() ? '-' : '+',
75               b_iid,
76               g->sqStore_getReadLength(b_iid), flipped() ? b_end() : b_bgn(), flipped() ? b_bgn() : b_end(),
77               (uint32)floor(span() == 0 ? ((1-erate()) * (a_end()-a_bgn())) : (1-erate()) * span()),
78               span() == 0 ? a_end() - a_bgn() : span(),
79               255, erate(),
80               (newLine) ? "\n" : "");
81       break;
82   }
83 
84   return(str);
85 }
86 
87 
88 
89 bool
fromString(splitToWords & W,ovOverlapDisplayType type)90 ovOverlap::fromString(splitToWords          &W,
91                       ovOverlapDisplayType   type) {
92 
93 
94   switch (type) {
95     case ovOverlapAsHangs:
96       a_iid = W.touint32(0);
97       b_iid = W.touint32(1);
98 
99       flipped(W[2][0] == 'I');
100 
101       a_hang(W.touint32(3));
102       span(W.touint32(4));
103       b_hang(W.touint32(5));
104 
105       erate(W.todouble(6));
106 
107       break;
108 
109     case ovOverlapAsCoords:
110       a_iid = W.touint32(0);
111       b_iid = W.touint32(1);
112 
113       flipped(W[2][0] == 'I');
114 
115       span(W.touint32(3));
116 
117       {
118         uint32  alen = g->sqStore_getReadLength(a_iid);
119         uint32  blen = g->sqStore_getReadLength(b_iid);
120 
121         uint32  abgn = W.touint32(4);
122         uint32  aend = W.touint32(5);
123 
124         uint32  bbgn = W.touint32(6);
125         uint32  bend = W.touint32(7);
126 
127         dat.ovl.ahg5 = abgn;
128         dat.ovl.ahg3 = alen - aend;
129 
130         dat.ovl.bhg5 = (dat.ovl.flipped) ? blen - bbgn :        bbgn;
131         dat.ovl.bhg3 = (dat.ovl.flipped) ?        bend : blen - bend;
132       }
133 
134       erate(W.todouble(8));
135       break;
136 
137     case ovOverlapAsUnaligned:
138       a_iid = W.touint32(0);
139       b_iid = W.touint32(1);
140 
141       flipped(W[2][0] == 'I');
142 
143       dat.ovl.span = W.touint32(3);
144 
145       dat.ovl.ahg5 = W.touint32(4);
146       dat.ovl.ahg3 = W.touint32(5);
147 
148       dat.ovl.bhg5 = W.touint32(6);
149       dat.ovl.bhg3 = W.touint32(7);
150 
151       erate(W.todouble(8));
152 
153       dat.ovl.forUTG = false;
154       dat.ovl.forOBT = false;
155       dat.ovl.forDUP = false;
156 
157       for (uint32 i = 9; i < W.numWords(); i++) {
158         dat.ovl.forUTG |= ((W[i][0] == 'U') && (W[i][1] == 'T') && (W[i][2] == 'G'));  //  Fails if W[i] == "U".
159         dat.ovl.forOBT |= ((W[i][0] == 'O') && (W[i][1] == 'B') && (W[i][2] == 'T'));
160         dat.ovl.forDUP |= ((W[i][0] == 'D') && (W[i][1] == 'U') && (W[i][2] == 'P'));
161       }
162       break;
163 
164     case ovOverlapAsPaf:
165       a_iid = W.touint32(0);
166       b_iid = W.touint32(5);
167 
168       flipped(W[4][0] == '-');
169 
170       dat.ovl.span = W.touint32(3)-W.toint32(2);
171 
172       uint32  alen = W.toint32(1);
173       uint32  blen = W.toint32(6);
174 
175       uint32  abgn = W.touint32(2);
176       uint32  aend = W.touint32(3);
177 
178       // paf looks like our coord format but the start/end aren't decreasing like ours so flip them then we can use the same math below
179       uint32  bbgn = (dat.ovl.flipped) ? W.touint32(8) : W.touint32(7);
180       uint32  bend = (dat.ovl.flipped) ? W.touint32(7) : W.touint32(8);
181 
182       dat.ovl.ahg5 = abgn;
183       dat.ovl.ahg3 = alen - aend;
184 
185       dat.ovl.bhg5 = (dat.ovl.flipped) ? blen-bbgn : bbgn;
186       dat.ovl.bhg3 = (dat.ovl.flipped) ?      bend : blen - bend;
187 
188       for (int i = 0; i < W.numWords(); i++) {
189          if (W[i][0] == 'd' && W[i][1] == 'v' && W[i][3] == 'f') {
190             erate(strtodouble(W[i]+5));
191             break;
192           }
193       }
194 
195       dat.ovl.forUTG = true;
196       dat.ovl.forOBT = true;
197       dat.ovl.forDUP = true;
198       break;
199   }
200 
201 #warning NEEDS TO RETURN FALSE IF FAILED TO DECODE OVERLAP STRING
202 
203   return(true);
204 }
205 
206 
207 
208 void
swapIDs(ovOverlap const & orig)209 ovOverlap::swapIDs(ovOverlap const &orig) {
210 
211   a_iid = orig.b_iid;
212   b_iid = orig.a_iid;
213 
214   //  Copy the overlap as is, then fix it for the ID swap.
215 
216   for (uint32 ii=0; ii<ovOverlapNWORDS; ii++)
217     dat.dat[ii] = orig.dat.dat[ii];
218 
219   //  Swap the A and B hangs.  If the overlap is flipped, we also need to reverse 5' and 3' hangs to
220   //  make the now-A read forward oriented.
221 
222   if (orig.dat.ovl.flipped == false) {
223     dat.ovl.ahg5 = orig.dat.ovl.bhg5;
224     dat.ovl.ahg3 = orig.dat.ovl.bhg3;
225     dat.ovl.bhg5 = orig.dat.ovl.ahg5;
226     dat.ovl.bhg3 = orig.dat.ovl.ahg3;
227   } else {
228     dat.ovl.ahg5 = orig.dat.ovl.bhg3;
229     dat.ovl.ahg3 = orig.dat.ovl.bhg5;
230     dat.ovl.bhg5 = orig.dat.ovl.ahg3;
231     dat.ovl.bhg3 = orig.dat.ovl.ahg5;
232   }
233 
234   //  Whatever alignment orientation was in the original, it is opposite now.
235 
236 #ifndef DO_NOT_STORE_ALIGN_PTR
237   dat.ovl.alignSwapped = ! orig.dat.ovl.alignSwapped;
238 #endif
239 }
240