1 /*
2 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3 *
4 * This file is part of Bowtie 2.
5 *
6 * Bowtie 2 is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Bowtie 2 is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 /*
21 * pe.h
22 *
23 * A class encapsulating a paired-end policy and routines for
24 * identifying intervals according to the policy. For instance,
25 * contains a routine that, given a policy and details about a match
26 * for one mate, returns details about where to search for the other
27 * mate.
28 */
29
30 #ifndef PE_H_
31 #define PE_H_
32
33 #include <iostream>
34 #include <stdint.h>
35
36 // In description below "To the left" = "Upstream of w/r/t the Watson strand"
37
38 // The 4 possible policies describing how mates 1 and 2 should be
39 // oriented with respect to the reference genome and each other
40 enum {
41 // (fw) Both mates from Watson with 1 to the left, or
42 // (rc) Both mates from Crick with 2 to the left
43 PE_POLICY_FF = 1,
44
45 // (fw) Both mates from Crick with 1 to the left, or
46 // (rc) Both mates from Watson with 2 to the left
47 PE_POLICY_RR,
48
49 // (fw) Mate 1 from Watson and mate 2 from Crick with 1 to the left, or
50 // (rc) Mate 2 from Watson and mate 1 from Crick with 2 to the left
51 PE_POLICY_FR,
52
53 // (fw) Mate 1 from Crick and mate 2 from Watson with 1 to the left, or
54 // (rc) Mate 2 from Crick and mate 1 from Watson with 2 to the left
55 PE_POLICY_RF
56 };
57
58 // Various distinct ways that the mates might align with respect to
59 // each other in a concordant alignment. We distinguish between them
60 // because in some cases a user may want to consider some of these
61 // categories to be discordant, even if the alignment otherwise
62 // conforms to the paired-end policy.
63
64 enum {
65 // Describes a paired-end alignment where the mates
66 // straightforwardly conform to the paired-end policy without any
67 // overlap between the mates
68 PE_ALS_NORMAL = 1,
69
70 // Describes a paired-end alignment where the mate overlap, but
71 // neither contains the other and they do not dovetail, but the
72 // alignment conforms to the paired-end policy
73 PE_ALS_OVERLAP,
74
75 // Describes a paired-end alignment where the mates conform to the
76 // paired-end policy, but one mate strictly contains the other but
77 // they don't dovetail. We distinguish this from a "normal"
78 // concordant alignment because some users may wish to categorize
79 // such an alignment as discordant.
80 PE_ALS_CONTAIN,
81
82 // Describes a paired-end alignment where the mates conform to the
83 // paired-end policy, but mates "fall off" each other. E.g. if the
84 // policy is FR and any of these happen:
85 // 1: >>>>> >>>>>
86 // 2: <<<<<< <<<<<<
87 // And the overall extent is consistent with the minimum fragment
88 // length, this is a dovetail alignment. We distinguish this from
89 // a "normal" concordant alignment because some users may wish to
90 // categorize such an alignment as discordant.
91 PE_ALS_DOVETAIL,
92
93 // The mates are clearly discordant, owing to their orientations
94 // and/or implied fragment length
95 PE_ALS_DISCORD
96 };
97
98 /**
99 * Return true iff the orientations and relative positions of mates 1
100 * and 2 are compatible with the given PE_POLICY.
101 */
pePolicyCompat(int policy,bool oneLeft,bool oneWat,bool twoWat)102 static inline bool pePolicyCompat(
103 int policy, // PE_POLICY
104 bool oneLeft, // true iff mate 1 is to the left of mate 2
105 bool oneWat, // true iff mate 1 aligned to Watson strand
106 bool twoWat) // true iff mate 2 aligned to Watson strand
107 {
108 switch(policy) {
109 case PE_POLICY_FF:
110 return oneWat == twoWat && oneWat == oneLeft;
111 case PE_POLICY_RR:
112 return oneWat == twoWat && oneWat != oneLeft;
113 case PE_POLICY_FR:
114 return oneWat != twoWat && oneWat == oneLeft;
115 case PE_POLICY_RF:
116 return oneWat != twoWat && oneWat != oneLeft;
117 default: {
118 std::cerr << "Bad PE_POLICY: " << policy << std::endl;
119 throw 1;
120 }
121 }
122 throw 1;
123 }
124
125 /**
126 * Given that the given mate aligns in the given orientation, return
127 * true iff the other mate must appear "to the right" of the given mate
128 * in order for the alignment to be concordant.
129 */
pePolicyMateDir(int policy,bool is1,bool fw,bool & left,bool & mfw)130 static inline void pePolicyMateDir(
131 int policy,// in: PE_POLICY
132 bool is1, // in: true iff mate 1 is the one that already aligned
133 bool fw, // in: true iff already-aligned mate aligned to Watson
134 bool& left, // out: set =true iff other mate must be to the left
135 bool& mfw) // out: set =true iff other mate must align to watson
136 {
137 switch(policy) {
138 case PE_POLICY_FF: {
139 left = (is1 != fw);
140 mfw = fw;
141 break;
142 }
143 case PE_POLICY_RR: {
144 left = (is1 == fw);
145 mfw = fw;
146 break;
147 }
148 case PE_POLICY_FR: {
149 left = !fw;
150 mfw = !fw;
151 break;
152 }
153 case PE_POLICY_RF: {
154 left = fw;
155 mfw = !fw;
156 break;
157 }
158 default: {
159 std::cerr << "Error: No such PE_POLICY: " << policy << std::endl;
160 throw 1;
161 }
162 }
163 return;
164 }
165
166 /**
167 * Encapsulates paired-end alignment parameters.
168 */
169 class PairedEndPolicy {
170
171 public:
172
PairedEndPolicy()173 PairedEndPolicy() { reset(); }
174
PairedEndPolicy(int pol,size_t maxfrag,size_t minfrag,bool local,bool flippingOk,bool dovetailOk,bool containOk,bool olapOk,bool expandToFit)175 PairedEndPolicy(
176 int pol,
177 size_t maxfrag,
178 size_t minfrag,
179 bool local,
180 bool flippingOk,
181 bool dovetailOk,
182 bool containOk,
183 bool olapOk,
184 bool expandToFit)
185 {
186 init(
187 pol,
188 maxfrag,
189 minfrag,
190 local,
191 flippingOk,
192 dovetailOk,
193 containOk,
194 olapOk,
195 expandToFit);
196 }
197
198 /**
199 * Initialize with nonsense values.
200 */
reset()201 void reset() {
202 init(-1, 0xffffffff, 0xffffffff, false, false, false, false, false, false);
203 }
204
205 /**
206 * Initialize given policy, maximum & minimum fragment lengths.
207 */
init(int pol,size_t maxfrag,size_t minfrag,bool local,bool flippingOk,bool dovetailOk,bool containOk,bool olapOk,bool expandToFit)208 void init(
209 int pol,
210 size_t maxfrag,
211 size_t minfrag,
212 bool local,
213 bool flippingOk,
214 bool dovetailOk,
215 bool containOk,
216 bool olapOk,
217 bool expandToFit)
218 {
219 pol_ = pol;
220 maxfrag_ = maxfrag;
221 minfrag_ = minfrag;
222 local_ = local;
223 flippingOk_ = flippingOk;
224 dovetailOk_ = dovetailOk;
225 containOk_ = containOk;
226 olapOk_ = olapOk;
227 expandToFit_ = expandToFit;
228 }
229
230 /**
231 * Given details about how one mate aligns, and some details about the
232 * reference sequence it aligned to, calculate a window and orientation s.t.
233 * a paired-end alignment is concordant iff the opposite mate aligns in the
234 * calculated window with the calculated orientation. The calculaton does not
235 * consider gaps. The dynamic programming framer will take gaps into account.
236 *
237 * Returns false if no concordant alignments are possible, true otherwise.
238 */
239 bool otherMate(
240 bool is1, // true -> mate 1 aligned and we're looking
241 // for 2, false -> vice versa
242 bool fw, // orientation of aligned mate
243 int64_t off, // offset into the reference sequence
244 int64_t maxalcols, // max # columns spanned by alignment
245 size_t reflen, // length of reference sequence aligned to
246 size_t len1, // length of mate 1
247 size_t len2, // length of mate 2
248 bool& oleft, // out: true iff opp mate must be to right of anchor
249 int64_t& oll, // out: leftmost Watson off for LHS of opp alignment
250 int64_t& olr, // out: rightmost Watson off for LHS of opp alignment
251 int64_t& orl, // out: leftmost Watson off for RHS of opp alignment
252 int64_t& orr, // out: rightmost Watson off for RHS of opp alignment
253 bool& ofw) // out: true iff opp mate must be on Watson strand
254 const;
255
256 /**
257 * Return a PE_TYPE flag indicating, given a PE_POLICY and coordinates
258 * for a paired-end alignment, qwhat type of alignment it is, i.e.,
259 * whether it's:
260 *
261 * 1. Straightforwardly concordant
262 * 2. Mates dovetail (one extends beyond the end of the other)
263 * 3. One mate contains the other but they don't dovetail
264 * 4. One mate overlaps the other but neither contains the other and
265 * they don't dovetail
266 * 5. Discordant
267 */
268 int peClassifyPair(
269 int64_t off1, // offset of mate 1
270 size_t len1, // length of mate 1
271 bool fw1, // whether mate 1 aligned to Watson
272 int64_t off2, // offset of mate 2
273 size_t len2, // length of mate 2
274 bool fw2) // whether mate 2 aligned to Watson
275 const;
276
policy()277 int policy() const { return pol_; }
maxFragLen()278 size_t maxFragLen() const { return maxfrag_; }
minFragLen()279 size_t minFragLen() const { return minfrag_; }
280
281 protected:
282
283 // Use local alignment to search for the opposite mate, rather than
284 // a type of alignment that requires the read to align end-to-end
285 bool local_;
286
287 // Policy governing how mates should be oriented with respect to
288 // each other and the reference genome
289 int pol_;
290
291 // true iff settings are such that mates that violate the expected relative
292 // orientation but are still consistent with maximum fragment length are OK
293 bool flippingOk_;
294
295 // true iff settings are such that dovetailed mates should be
296 // considered concordant.
297 bool dovetailOk_;
298
299 // true iff paired-end alignments where one mate's alignment is
300 // strictly contained within the other's should be considered
301 // concordant
302 bool containOk_;
303
304 // true iff paired-end alignments where one mate's alignment
305 // overlaps the other's should be considered concordant
306 bool olapOk_;
307
308 // What to do when a mate length is > maxfrag_? If expandToFit_ is
309 // true, we temporarily increase maxfrag_ to equal the mate length.
310 // Otherwise we say that any paired-end alignment involving the
311 // long mate is discordant.
312 bool expandToFit_;
313
314 // Maximum fragment size to consider
315 size_t maxfrag_;
316
317 // Minimum fragment size to consider
318 size_t minfrag_;
319 };
320
321 #endif /*ndef PE_H_*/
322