1 /*
2  * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
3  *
4  * This file is part of Bowtie 2.
5  *
6  * Bowtie 2 is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Bowtie 2 is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 /*
21  *  pe.h
22  *
23  * A class encapsulating a paired-end policy and routines for
24  * identifying intervals according to the policy.  For instance,
25  * contains a routine that, given a policy and details about a match
26  * for one mate, returns details about where to search for the other
27  * mate.
28  */
29 
30 #ifndef PE_H_
31 #define PE_H_
32 
33 #include <iostream>
34 #include <stdint.h>
35 
36 // In description below "To the left" = "Upstream of w/r/t the Watson strand"
37 
38 // The 4 possible policies describing how mates 1 and 2 should be
39 // oriented with respect to the reference genome and each other
40 enum {
41 	// (fw) Both mates from Watson with 1 to the left, or
42 	// (rc) Both mates from Crick with 2 to the left
43 	PE_POLICY_FF = 1,
44 
45 	// (fw) Both mates from Crick with 1 to the left, or
46 	// (rc) Both mates from Watson with 2 to the left
47 	PE_POLICY_RR,
48 
49 	// (fw) Mate 1 from Watson and mate 2 from Crick with 1 to the left, or
50 	// (rc) Mate 2 from Watson and mate 1 from Crick with 2 to the left
51 	PE_POLICY_FR,
52 
53 	// (fw) Mate 1 from Crick and mate 2 from Watson with 1 to the left, or
54 	// (rc) Mate 2 from Crick and mate 1 from Watson with 2 to the left
55 	PE_POLICY_RF
56 };
57 
58 // Various distinct ways that the mates might align with respect to
59 // each other in a concordant alignment.  We distinguish between them
60 // because in some cases a user may want to consider some of these
61 // categories to be discordant, even if the alignment otherwise
62 // conforms to the paired-end policy.
63 
64 enum {
65 	// Describes a paired-end alignment where the mates
66 	// straightforwardly conform to the paired-end policy without any
67 	// overlap between the mates
68 	PE_ALS_NORMAL = 1,
69 
70 	// Describes a paired-end alignment where the mate overlap, but
71 	// neither contains the other and they do not dovetail, but the
72 	// alignment conforms to the paired-end policy
73 	PE_ALS_OVERLAP,
74 
75 	// Describes a paired-end alignment where the mates conform to the
76 	// paired-end policy, but one mate strictly contains the other but
77 	// they don't dovetail.  We distinguish this from a "normal"
78 	// concordant alignment because some users may wish to categorize
79 	// such an alignment as discordant.
80 	PE_ALS_CONTAIN,
81 
82 	// Describes a paired-end alignment where the mates conform to the
83 	// paired-end policy, but mates "fall off" each other.  E.g. if the
84 	// policy is FR and any of these happen:
85 	// 1:     >>>>>   >>>>>
86 	// 2:  <<<<<<    <<<<<<
87 	// And the overall extent is consistent with the minimum fragment
88 	// length, this is a dovetail alignment.  We distinguish this from
89 	// a "normal" concordant alignment because some users may wish to
90 	// categorize such an alignment as discordant.
91 	PE_ALS_DOVETAIL,
92 
93 	// The mates are clearly discordant, owing to their orientations
94 	// and/or implied fragment length
95 	PE_ALS_DISCORD
96 };
97 
98 /**
99  * Return true iff the orientations and relative positions of mates 1
100  * and 2 are compatible with the given PE_POLICY.
101  */
pePolicyCompat(int policy,bool oneLeft,bool oneWat,bool twoWat)102 static inline bool pePolicyCompat(
103 	int policy,   // PE_POLICY
104 	bool oneLeft, // true iff mate 1 is to the left of mate 2
105 	bool oneWat,  // true iff mate 1 aligned to Watson strand
106 	bool twoWat)  // true iff mate 2 aligned to Watson strand
107 {
108 	switch(policy) {
109 		case PE_POLICY_FF:
110 			return oneWat == twoWat && oneWat == oneLeft;
111 		case PE_POLICY_RR:
112 			return oneWat == twoWat && oneWat != oneLeft;
113 		case PE_POLICY_FR:
114 			return oneWat != twoWat && oneWat == oneLeft;
115 		case PE_POLICY_RF:
116 			return oneWat != twoWat && oneWat != oneLeft;
117 		default: {
118 			std::cerr << "Bad PE_POLICY: " << policy << std::endl;
119 			throw 1;
120 		}
121 	}
122 	throw 1;
123 }
124 
125 /**
126  * Given that the given mate aligns in the given orientation, return
127  * true iff the other mate must appear "to the right" of the given mate
128  * in order for the alignment to be concordant.
129  */
pePolicyMateDir(int policy,bool is1,bool fw,bool & left,bool & mfw)130 static inline void pePolicyMateDir(
131 	int   policy,// in: PE_POLICY
132 	bool  is1,   // in: true iff mate 1 is the one that already aligned
133 	bool  fw,    // in: true iff already-aligned mate aligned to Watson
134 	bool& left,  // out: set =true iff other mate must be to the left
135 	bool& mfw)   // out: set =true iff other mate must align to watson
136 {
137 	switch(policy) {
138 		case PE_POLICY_FF: {
139 			left = (is1 != fw);
140 			mfw = fw;
141 			break;
142 		}
143 		case PE_POLICY_RR: {
144 			left = (is1 == fw);
145 			mfw = fw;
146 			break;
147 		}
148 		case PE_POLICY_FR: {
149 			left = !fw;
150 			mfw = !fw;
151 			break;
152 		}
153 		case PE_POLICY_RF: {
154 			left = fw;
155 			mfw = !fw;
156 			break;
157 		}
158 		default: {
159 			std::cerr << "Error: No such PE_POLICY: " << policy << std::endl;
160 			throw 1;
161 		}
162 	}
163 	return;
164 }
165 
166 /**
167  * Encapsulates paired-end alignment parameters.
168  */
169 class PairedEndPolicy {
170 
171 public:
172 
PairedEndPolicy()173 	PairedEndPolicy() { reset(); }
174 
PairedEndPolicy(int pol,size_t maxfrag,size_t minfrag,bool local,bool flippingOk,bool dovetailOk,bool containOk,bool olapOk,bool expandToFit)175 	PairedEndPolicy(
176 		int pol,
177 		size_t maxfrag,
178 		size_t minfrag,
179 		bool local,
180 		bool flippingOk,
181 		bool dovetailOk,
182 		bool containOk,
183 		bool olapOk,
184 		bool expandToFit)
185 	{
186 		init(
187 			pol,
188 			maxfrag,
189 			minfrag,
190 			local,
191 			flippingOk,
192 			dovetailOk,
193 			containOk,
194 			olapOk,
195 			expandToFit);
196 	}
197 
198 	/**
199 	 * Initialize with nonsense values.
200 	 */
reset()201 	void reset() {
202 		init(-1, 0xffffffff, 0xffffffff, false, false, false, false, false, false);
203 	}
204 
205 	/**
206 	 * Initialize given policy, maximum & minimum fragment lengths.
207 	 */
init(int pol,size_t maxfrag,size_t minfrag,bool local,bool flippingOk,bool dovetailOk,bool containOk,bool olapOk,bool expandToFit)208 	void init(
209 		int pol,
210 		size_t maxfrag,
211 		size_t minfrag,
212 		bool local,
213 		bool flippingOk,
214 		bool dovetailOk,
215 		bool containOk,
216 		bool olapOk,
217 		bool expandToFit)
218 	{
219 		pol_         = pol;
220 		maxfrag_     = maxfrag;
221 		minfrag_     = minfrag;
222 		local_       = local;
223 		flippingOk_  = flippingOk;
224 		dovetailOk_  = dovetailOk;
225 		containOk_   = containOk;
226 		olapOk_      = olapOk;
227 		expandToFit_ = expandToFit;
228 	}
229 
230 /**
231  * Given details about how one mate aligns, and some details about the
232  * reference sequence it aligned to, calculate a window and orientation s.t.
233  * a paired-end alignment is concordant iff the opposite mate aligns in the
234  * calculated window with the calculated orientation.  The calculaton does not
235  * consider gaps.  The dynamic programming framer will take gaps into account.
236  *
237  * Returns false if no concordant alignments are possible, true otherwise.
238  */
239 bool otherMate(
240 	bool     is1,       // true -> mate 1 aligned and we're looking
241 						// for 2, false -> vice versa
242 	bool     fw,        // orientation of aligned mate
243 	int64_t  off,       // offset into the reference sequence
244 	int64_t  maxalcols, // max # columns spanned by alignment
245 	size_t   reflen,    // length of reference sequence aligned to
246 	size_t   len1,      // length of mate 1
247 	size_t   len2,      // length of mate 2
248 	bool&    oleft,     // out: true iff opp mate must be to right of anchor
249 	int64_t& oll,       // out: leftmost Watson off for LHS of opp alignment
250 	int64_t& olr,       // out: rightmost Watson off for LHS of opp alignment
251 	int64_t& orl,       // out: leftmost Watson off for RHS of opp alignment
252 	int64_t& orr,       // out: rightmost Watson off for RHS of opp alignment
253 	bool&    ofw)       // out: true iff opp mate must be on Watson strand
254 	const;
255 
256 	/**
257 	 * Return a PE_TYPE flag indicating, given a PE_POLICY and coordinates
258 	 * for a paired-end alignment,	qwhat type of alignment it is, i.e.,
259 	 * whether it's:
260 	 *
261 	 * 1. Straightforwardly concordant
262 	 * 2. Mates dovetail (one extends beyond the end of the other)
263 	 * 3. One mate contains the other but they don't dovetail
264 	 * 4. One mate overlaps the other but neither contains the other and
265 	 *    they don't dovetail
266 	 * 5. Discordant
267 	 */
268 	int peClassifyPair(
269 		int64_t  off1,   // offset of mate 1
270 		size_t   len1,   // length of mate 1
271 		bool     fw1,    // whether mate 1 aligned to Watson
272 		int64_t  off2,   // offset of mate 2
273 		size_t   len2,   // length of mate 2
274 		bool     fw2)    // whether mate 2 aligned to Watson
275 		const;
276 
policy()277 	int    policy()     const { return pol_;     }
maxFragLen()278 	size_t maxFragLen() const { return maxfrag_; }
minFragLen()279 	size_t minFragLen() const { return minfrag_; }
280 
281 protected:
282 
283 	// Use local alignment to search for the opposite mate, rather than
284 	// a type of alignment that requires the read to align end-to-end
285 	bool local_;
286 
287 	// Policy governing how mates should be oriented with respect to
288 	// each other and the reference genome
289 	int pol_;
290 
291 	// true iff settings are such that mates that violate the expected relative
292 	// orientation but are still consistent with maximum fragment length are OK
293 	bool flippingOk_;
294 
295 	// true iff settings are such that dovetailed mates should be
296 	// considered concordant.
297 	bool dovetailOk_;
298 
299 	// true iff paired-end alignments where one mate's alignment is
300 	// strictly contained within the other's should be considered
301 	// concordant
302 	bool containOk_;
303 
304 	// true iff paired-end alignments where one mate's alignment
305 	// overlaps the other's should be considered concordant
306 	bool olapOk_;
307 
308 	// What to do when a mate length is > maxfrag_?  If expandToFit_ is
309 	// true, we temporarily increase maxfrag_ to equal the mate length.
310 	// Otherwise we say that any paired-end alignment involving the
311 	// long mate is discordant.
312 	bool expandToFit_;
313 
314 	// Maximum fragment size to consider
315 	size_t maxfrag_;
316 
317 	// Minimum fragment size to consider
318 	size_t minfrag_;
319 };
320 
321 #endif /*ndef PE_H_*/
322