1
2 /******************************************************************************
3 *
4 * This file is part of canu, a software program that assembles whole-genome
5 * sequencing reads into contigs.
6 *
7 * This software is based on:
8 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9 * the 'kmer package' r1994 (http://kmer.sourceforge.net)
10 *
11 * Except as indicated otherwise, this is a 'United States Government Work',
12 * and is released in the public domain.
13 *
14 * File 'README.licenses' in the root directory of this distribution
15 * contains full conditions and disclaimers.
16 */
17
18 #include "ovStore.H"
19 #include "sqStore.H"
20
21
22 sqStore *ovOverlap::g = NULL;
23
24 // Even though the b_end_hi | b_end_lo is uint64 in the struct, the result
25 // of combining them doesn't appear to be 64-bit. The cast is necessary.
26
27 char *
toString(char * str,ovOverlapDisplayType type,bool newLine)28 ovOverlap::toString(char *str,
29 ovOverlapDisplayType type,
30 bool newLine) {
31
32 switch (type) {
33 case ovOverlapAsHangs:
34 sprintf(str, "%10" F_U32P " %10" F_U32P " %c %6" F_S32P " %6" F_U32P " %6" F_S32P " %7.6f%s%s",
35 a_iid, b_iid,
36 flipped() ? 'I' : 'N',
37 a_hang(), span(), b_hang(),
38 erate(),
39 (overlapIsDovetail()) ? "" : " PARTIAL",
40 (newLine) ? "\n" : "");
41 break;
42
43 case ovOverlapAsCoords:
44 sprintf(str, "%10" F_U32P " %10" F_U32P " %c %6" F_U32P " %6" F_U32P " %6" F_U32P " %6" F_U32P " %6" F_U32P " %7.6f%s",
45 a_iid, b_iid,
46 flipped() ? 'I' : 'N',
47 span(),
48 a_bgn(), a_end(),
49 b_bgn(), b_end(),
50 erate(),
51 (newLine) ? "\n" : "");
52 break;
53
54 case ovOverlapAsUnaligned:
55 sprintf(str, "%10" F_U32P " %10" F_U32P " %c %6" F_U32P " %6" F_OVP " %6" F_OVP " %6" F_OVP " %6" F_OVP " %7.6f %s %s %s%s",
56 a_iid, b_iid,
57 flipped() ? 'I' : 'N',
58 span(),
59 dat.ovl.ahg5, dat.ovl.ahg3,
60 dat.ovl.bhg5, dat.ovl.bhg3,
61 erate(),
62 dat.ovl.forOBT ? "OBT" : " ",
63 dat.ovl.forDUP ? "DUP" : " ",
64 dat.ovl.forUTG ? "UTG" : " ",
65 (newLine) ? "\n" : "");
66 break;
67
68 case ovOverlapAsPaf:
69 // miniasm/map expects entries to be separated by tabs
70 // no padding spaces on names we don't confuse read identifiers
71 sprintf(str, "%" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%c\t%" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P "\t%6" F_U32P " \tdv:f:%6.6f%s",
72 a_iid,
73 g->sqStore_getReadLength(a_iid), a_bgn(), a_end(),
74 flipped() ? '-' : '+',
75 b_iid,
76 g->sqStore_getReadLength(b_iid), flipped() ? b_end() : b_bgn(), flipped() ? b_bgn() : b_end(),
77 (uint32)floor(span() == 0 ? ((1-erate()) * (a_end()-a_bgn())) : (1-erate()) * span()),
78 span() == 0 ? a_end() - a_bgn() : span(),
79 255, erate(),
80 (newLine) ? "\n" : "");
81 break;
82 }
83
84 return(str);
85 }
86
87
88
89 bool
fromString(splitToWords & W,ovOverlapDisplayType type)90 ovOverlap::fromString(splitToWords &W,
91 ovOverlapDisplayType type) {
92
93
94 switch (type) {
95 case ovOverlapAsHangs:
96 a_iid = W.touint32(0);
97 b_iid = W.touint32(1);
98
99 flipped(W[2][0] == 'I');
100
101 a_hang(W.touint32(3));
102 span(W.touint32(4));
103 b_hang(W.touint32(5));
104
105 erate(W.todouble(6));
106
107 break;
108
109 case ovOverlapAsCoords:
110 a_iid = W.touint32(0);
111 b_iid = W.touint32(1);
112
113 flipped(W[2][0] == 'I');
114
115 span(W.touint32(3));
116
117 {
118 uint32 alen = g->sqStore_getReadLength(a_iid);
119 uint32 blen = g->sqStore_getReadLength(b_iid);
120
121 uint32 abgn = W.touint32(4);
122 uint32 aend = W.touint32(5);
123
124 uint32 bbgn = W.touint32(6);
125 uint32 bend = W.touint32(7);
126
127 dat.ovl.ahg5 = abgn;
128 dat.ovl.ahg3 = alen - aend;
129
130 dat.ovl.bhg5 = (dat.ovl.flipped) ? blen - bbgn : bbgn;
131 dat.ovl.bhg3 = (dat.ovl.flipped) ? bend : blen - bend;
132 }
133
134 erate(W.todouble(8));
135 break;
136
137 case ovOverlapAsUnaligned:
138 a_iid = W.touint32(0);
139 b_iid = W.touint32(1);
140
141 flipped(W[2][0] == 'I');
142
143 dat.ovl.span = W.touint32(3);
144
145 dat.ovl.ahg5 = W.touint32(4);
146 dat.ovl.ahg3 = W.touint32(5);
147
148 dat.ovl.bhg5 = W.touint32(6);
149 dat.ovl.bhg3 = W.touint32(7);
150
151 erate(W.todouble(8));
152
153 dat.ovl.forUTG = false;
154 dat.ovl.forOBT = false;
155 dat.ovl.forDUP = false;
156
157 for (uint32 i = 9; i < W.numWords(); i++) {
158 dat.ovl.forUTG |= ((W[i][0] == 'U') && (W[i][1] == 'T') && (W[i][2] == 'G')); // Fails if W[i] == "U".
159 dat.ovl.forOBT |= ((W[i][0] == 'O') && (W[i][1] == 'B') && (W[i][2] == 'T'));
160 dat.ovl.forDUP |= ((W[i][0] == 'D') && (W[i][1] == 'U') && (W[i][2] == 'P'));
161 }
162 break;
163
164 case ovOverlapAsPaf:
165 a_iid = W.touint32(0);
166 b_iid = W.touint32(5);
167
168 flipped(W[4][0] == '-');
169
170 dat.ovl.span = W.touint32(3)-W.toint32(2);
171
172 uint32 alen = W.toint32(1);
173 uint32 blen = W.toint32(6);
174
175 uint32 abgn = W.touint32(2);
176 uint32 aend = W.touint32(3);
177
178 // paf looks like our coord format but the start/end aren't decreasing like ours so flip them then we can use the same math below
179 uint32 bbgn = (dat.ovl.flipped) ? W.touint32(8) : W.touint32(7);
180 uint32 bend = (dat.ovl.flipped) ? W.touint32(7) : W.touint32(8);
181
182 dat.ovl.ahg5 = abgn;
183 dat.ovl.ahg3 = alen - aend;
184
185 dat.ovl.bhg5 = (dat.ovl.flipped) ? blen-bbgn : bbgn;
186 dat.ovl.bhg3 = (dat.ovl.flipped) ? bend : blen - bend;
187
188 for (int i = 0; i < W.numWords(); i++) {
189 if (W[i][0] == 'd' && W[i][1] == 'v' && W[i][3] == 'f') {
190 erate(strtodouble(W[i]+5));
191 break;
192 }
193 }
194
195 dat.ovl.forUTG = true;
196 dat.ovl.forOBT = true;
197 dat.ovl.forDUP = true;
198 break;
199 }
200
201 #warning NEEDS TO RETURN FALSE IF FAILED TO DECODE OVERLAP STRING
202
203 return(true);
204 }
205
206
207
208 void
swapIDs(ovOverlap const & orig)209 ovOverlap::swapIDs(ovOverlap const &orig) {
210
211 a_iid = orig.b_iid;
212 b_iid = orig.a_iid;
213
214 // Copy the overlap as is, then fix it for the ID swap.
215
216 for (uint32 ii=0; ii<ovOverlapNWORDS; ii++)
217 dat.dat[ii] = orig.dat.dat[ii];
218
219 // Swap the A and B hangs. If the overlap is flipped, we also need to reverse 5' and 3' hangs to
220 // make the now-A read forward oriented.
221
222 if (orig.dat.ovl.flipped == false) {
223 dat.ovl.ahg5 = orig.dat.ovl.bhg5;
224 dat.ovl.ahg3 = orig.dat.ovl.bhg3;
225 dat.ovl.bhg5 = orig.dat.ovl.ahg5;
226 dat.ovl.bhg3 = orig.dat.ovl.ahg3;
227 } else {
228 dat.ovl.ahg5 = orig.dat.ovl.bhg3;
229 dat.ovl.ahg3 = orig.dat.ovl.bhg5;
230 dat.ovl.bhg5 = orig.dat.ovl.ahg3;
231 dat.ovl.bhg3 = orig.dat.ovl.ahg5;
232 }
233
234 // Whatever alignment orientation was in the original, it is opposite now.
235
236 #ifndef DO_NOT_STORE_ALIGN_PTR
237 dat.ovl.alignSwapped = ! orig.dat.ovl.alignSwapped;
238 #endif
239 }
240