1 #include "Alignment.h"
2 #include "Estimate.h"
3 #include "Histogram.h"
4 #include "IOUtil.h"
5 #include "MemoryUtil.h"
6 #include "SAM.h"
7 #include "StringUtil.h"
8 #include "Uncompress.h"
9 #include "UnorderedMap.h"
10 #include <algorithm>
11 #include <climits>
12 #include <cmath>
13 #include <cstdlib>
14 #include <cstring>
15 #include <fstream>
16 #include <functional>
17 #include <getopt.h>
18 #include <iomanip>
19 #include <iostream>
20 #include <iterator>
21 #include <sstream>
22 #include <string>
23 #include <vector>
24 
25 using namespace std;
26 
27 #define PROGRAM "ParseAligns"
28 
29 static const char VERSION_MESSAGE[] =
30     PROGRAM " (" PACKAGE_NAME ") " VERSION "\n"
31             "Written by Jared Simpson and Shaun Jackman.\n"
32             "\n"
33             "Copyright 2014 Canada's Michael Smith Genome Sciences Centre\n";
34 
35 static const char USAGE_MESSAGE[] =
36     "Usage: " PROGRAM " -k<kmer> [OPTION]... [FILE]...\n"
37     "Write pairs that map to the same contig to the file SAME.\n"
38     "Write pairs that map to different contigs to standard output.\n"
39     "Alignments may be read from FILE(s) or standard input.\n"
40     "\n"
41     " Options:\n"
42     "\n"
43     "  -l, --min-align=N     minimum alignment length\n"
44     "  -d, --dist=DISTANCE   write distance estimates to this file\n"
45     "  -f, --frag=SAME       write fragment sizes to this file\n"
46     "  -h, --hist=FILE       write the fragment size histogram to FILE\n"
47     "      --sam             alignments are in SAM format\n"
48     "      --kaligner        alignments are in KAligner format\n"
49     "  -c, --cover=COVERAGE  coverage cut-off for distance estimates\n"
50     "  -v, --verbose         display verbose output\n"
51     "      --help            display this help and exit\n"
52     "      --version         output version information and exit\n"
53     "\n"
54     "Report bugs to <" PACKAGE_BUGREPORT ">.\n";
55 
56 namespace opt {
57 unsigned k; // used by DistanceEst
58 static unsigned c;
59 static int verbose;
60 static string distPath;
61 static string fragPath;
62 static string histPath;
63 
64 /** Input alignment format. */
65 static int inputFormat;
66 enum
67 {
68 	KALIGNER,
69 	SAM
70 };
71 
72 /** Output format */
73 int format = ADJ; // used by Estimate
74 }
75 
76 static const char shortopts[] = "d:l:f:h:c:v";
77 
78 enum
79 {
80 	OPT_HELP = 1,
81 	OPT_VERSION
82 };
83 
84 static const struct option longopts[] = {
85 	{ "dist", required_argument, NULL, 'd' },
86 	{ "min-align", required_argument, NULL, 'l' },
87 	{ "frag", required_argument, NULL, 'f' },
88 	{ "hist", required_argument, NULL, 'h' },
89 	{ "kaligner", no_argument, &opt::inputFormat, opt::KALIGNER },
90 	{ "sam", no_argument, &opt::inputFormat, opt::SAM },
91 	{ "cover", required_argument, NULL, 'c' },
92 	{ "verbose", no_argument, NULL, 'v' },
93 	{ "help", no_argument, NULL, OPT_HELP },
94 	{ "version", no_argument, NULL, OPT_VERSION },
95 	{ NULL, 0, NULL, 0 }
96 };
97 
98 static struct
99 {
100 	size_t alignments;
101 	size_t bothUnaligned;
102 	size_t oneUnaligned;
103 	size_t numDifferent;
104 	size_t numFF;
105 	size_t numMulti;
106 	size_t numSplit;
107 } stats;
108 
109 static ofstream fragFile;
110 static Histogram histogram;
111 
112 typedef vector<Alignment> AlignmentVector;
113 
114 /** A map of read IDs to alignments. */
115 typedef unordered_map<string, AlignmentVector> ReadAlignMap;
116 
117 /** A map of contig IDs to distance estimates. */
118 typedef unordered_map<string, EstimateRecord> EstimateMap;
119 static EstimateMap estMap;
120 
121 static bool
122 checkUniqueAlignments(const AlignmentVector& alignVec);
123 static string
124 makePairID(string id);
125 
126 /**
127  * Return the size of the fragment demarcated by the specified
128  * alignments.
129  */
130 static int
fragmentSize(const Alignment & a0,const Alignment & a1)131 fragmentSize(const Alignment& a0, const Alignment& a1)
132 {
133 	assert(a0.contig == a1.contig);
134 	assert(a0.isRC != a1.isRC);
135 	const Alignment& f = a0.isRC ? a1 : a0;
136 	const Alignment& r = a0.isRC ? a0 : a1;
137 	return r - f;
138 }
139 
140 typedef pair<ContigNode, DistanceEst> Estimate;
141 typedef vector<Estimate> Estimates;
142 
143 static void
addEstimate(EstimateMap & map,const Alignment & a,Estimate & est,bool reverse)144 addEstimate(EstimateMap& map, const Alignment& a, Estimate& est, bool reverse)
145 {
146 	// count up the number of estimates that agree
147 	bool placed = false;
148 	bool a_isRC = a.isRC != reverse;
149 	EstimateMap::iterator estimatesIt = map.find(a.contig);
150 	if (estimatesIt != map.end()) {
151 		Estimates& estimates = estimatesIt->second.estimates[a_isRC];
152 		for (Estimates::iterator estIt = estimates.begin(); estIt != estimates.end(); ++estIt) {
153 			if (estIt->first.id() == est.first.id()) {
154 				estIt->second.numPairs++;
155 				estIt->second.distance += est.second.distance;
156 				placed = true;
157 				break;
158 			}
159 		}
160 	}
161 	if (!placed)
162 		map[a.contig].estimates[a_isRC].push_back(est);
163 }
164 
165 static void
doReadIntegrity(const ReadAlignMap::value_type & a)166 doReadIntegrity(const ReadAlignMap::value_type& a)
167 {
168 	AlignmentVector::const_iterator refAlignIter = a.second.begin();
169 	unsigned firstStart, lastEnd, largestSize;
170 	Alignment first, last, largest;
171 
172 	firstStart = refAlignIter->read_start_pos;
173 	lastEnd = firstStart + refAlignIter->align_length;
174 	largestSize = refAlignIter->align_length;
175 	first = last = largest = *refAlignIter;
176 	++refAlignIter;
177 
178 	// for each alignment in the vector a.second
179 	for (; refAlignIter != a.second.end(); ++refAlignIter) {
180 		if ((unsigned)refAlignIter->read_start_pos < firstStart) {
181 			firstStart = refAlignIter->read_start_pos;
182 			first = *refAlignIter;
183 		}
184 		if ((unsigned)(refAlignIter->read_start_pos + refAlignIter->align_length) > lastEnd) {
185 			lastEnd = refAlignIter->read_start_pos + refAlignIter->align_length;
186 			last = *refAlignIter;
187 		}
188 		if ((unsigned)refAlignIter->align_length > largestSize) {
189 			largestSize = refAlignIter->align_length;
190 			largest = *refAlignIter;
191 		}
192 	}
193 
194 	if (largest.contig != last.contig) {
195 		Estimate est;
196 		unsigned largest_end = largest.read_start_pos + largest.align_length - opt::k;
197 		int distance = last.read_start_pos - largest_end;
198 		est.first = find_vertex(last.contig, largest.isRC != last.isRC, g_contigNames);
199 		est.second.distance = distance - opt::k;
200 		est.second.numPairs = 1;
201 		est.second.stdDev = 0;
202 		addEstimate(estMap, largest, est, false);
203 	}
204 
205 	if (largest.contig != first.contig && largest.contig != last.contig) {
206 		Estimate est;
207 		unsigned first_end = first.read_start_pos + first.align_length - opt::k;
208 		int distance = last.read_start_pos - first_end;
209 		est.first = find_vertex(last.contig, first.isRC != last.isRC, g_contigNames);
210 		est.second.distance = distance - opt::k;
211 		est.second.numPairs = 1;
212 		est.second.stdDev = 0;
213 		addEstimate(estMap, first, est, false);
214 	}
215 
216 	if (largest.contig != first.contig) {
217 		largest.flipQuery();
218 		first.flipQuery();
219 		Estimate est;
220 		unsigned largest_end = largest.read_start_pos + largest.align_length - opt::k;
221 		int distance = first.read_start_pos - largest_end;
222 		est.first = find_vertex(first.contig, largest.isRC != first.isRC, g_contigNames);
223 		est.second.distance = distance - opt::k;
224 		est.second.numPairs = 1;
225 		est.second.stdDev = 0;
226 		addEstimate(estMap, largest, est, false);
227 	}
228 
229 #if 0
230 	//for each alignment in the vector a.second
231 	for (AlignmentVector::const_iterator refAlignIter = a.second.begin();
232 			refAlignIter != a.second.end(); ++refAlignIter) {
233 		//for each alignment after the current one
234 		for (AlignmentVector::const_iterator alignIter = a.second.begin();
235 				alignIter != a.second.end(); ++alignIter) {
236 			//make sure both alignments aren't for the same contig
237 			if (alignIter->contig != refAlignIter->contig) {
238 				Estimate est;
239 				//Make sure the distance is read as 0 if the two contigs are
240 				//directly adjacent to each other. A -ve number suggests an
241 				//overlap.
242 				assert(refAlignIter->read_start_pos != alignIter->read_start_pos);
243 				const Alignment& a = refAlignIter->read_start_pos < alignIter->read_start_pos ? *refAlignIter : *alignIter;
244 				const Alignment& b = refAlignIter->read_start_pos > alignIter->read_start_pos ? *refAlignIter : *alignIter;
245 				unsigned a_end = a.read_start_pos + a.align_length - opt::k;
246 				int distance = b.read_start_pos - a_end;
247 				est.nID = ContigID(b.contig);
248 				est.distance = distance - opt::k;
249 				est.numPairs = 1;
250 				est.stdDev = 0;
251 				//weird file format...
252 				est.isRC = a.isRC != b.isRC;
253 
254 				addEstimate(estMap, a, est, false);
255 			}
256 		}
257 	}
258 #endif
259 }
260 
261 static void
generateDistFile()262 generateDistFile()
263 {
264 	ofstream distFile(opt::distPath.c_str());
265 	assert(distFile.is_open());
266 	for (EstimateMap::iterator mapIt = estMap.begin(); mapIt != estMap.end(); ++mapIt) {
267 		// Skip empty iterators
268 		assert(!mapIt->second.estimates[0].empty() || !mapIt->second.estimates[1].empty());
269 		distFile << mapIt->first;
270 		for (int refIsRC = 0; refIsRC <= 1; refIsRC++) {
271 			if (refIsRC)
272 				distFile << " ;";
273 
274 			for (Estimates::iterator vecIt = mapIt->second.estimates[refIsRC].begin();
275 			     vecIt != mapIt->second.estimates[refIsRC].end();
276 			     ++vecIt) {
277 				vecIt->second.distance =
278 				    (int)round((double)vecIt->second.distance / (double)vecIt->second.numPairs);
279 				if (vecIt->second.numPairs >= opt::c && vecIt->second.numPairs != 0
280 				    /*&& vecIt->distance > 1 - opt::k*/)
281 					distFile << ' ' << get(g_contigNames, vecIt->first) << ',' << vecIt->second;
282 			}
283 		}
284 		distFile << '\n';
285 		assert_good(distFile, opt::distPath);
286 	}
287 	distFile.close();
288 }
289 
290 static bool
291 isSingleEnd(const string& id);
292 static bool
293 needsFlipping(const string& id);
294 
295 /**
296  * Return an alignment flipped as necessary to produce an alignment
297  * pair whose expected orientation is forward-reverse.  If the
298  * expected orientation is forward-forward, then reverse the first
299  * alignment, so that the alignment is forward-reverse, which is
300  * required by DistanceEst.
301  */
302 static const Alignment
flipAlignment(const Alignment & a,const string & id)303 flipAlignment(const Alignment& a, const string& id)
304 {
305 	return needsFlipping(id) ? a.flipQuery() : a;
306 }
307 
308 static void
handleAlignmentPair(const ReadAlignMap::value_type & curr,const ReadAlignMap::value_type & pair)309 handleAlignmentPair(const ReadAlignMap::value_type& curr, const ReadAlignMap::value_type& pair)
310 {
311 	const string& currID = curr.first;
312 	const string& pairID = pair.first;
313 
314 	// Both reads must align to a unique location.
315 	// The reads are allowed to span more than one contig, but
316 	// at least one of the two reads must span no more than
317 	// two contigs.
318 	const unsigned MAX_SPAN = 2;
319 	if (curr.second.empty() && pair.second.empty()) {
320 		stats.bothUnaligned++;
321 	} else if (curr.second.empty() || pair.second.empty()) {
322 		stats.oneUnaligned++;
323 	} else if (!checkUniqueAlignments(curr.second) || !checkUniqueAlignments(pair.second)) {
324 		stats.numMulti++;
325 	} else if (curr.second.size() > MAX_SPAN && pair.second.size() > MAX_SPAN) {
326 		stats.numSplit++;
327 	} else {
328 		// Iterate over the vectors, outputting the aligments
329 		bool counted = false;
330 		for (AlignmentVector::const_iterator refAlignIter = curr.second.begin();
331 		     refAlignIter != curr.second.end();
332 		     ++refAlignIter) {
333 			for (AlignmentVector::const_iterator pairAlignIter = pair.second.begin();
334 			     pairAlignIter != pair.second.end();
335 			     ++pairAlignIter) {
336 				const Alignment& a0 = flipAlignment(*refAlignIter, currID);
337 				const Alignment& a1 = flipAlignment(*pairAlignIter, pairID);
338 
339 				bool sameTarget = a0.contig == a1.contig;
340 				if (sameTarget && curr.second.size() == 1 && pair.second.size() == 1) {
341 					// Same target and the only alignment.
342 					if (a0.isRC != a1.isRC) {
343 						// Correctly oriented. Add this alignment to
344 						// the distribution of fragment sizes.
345 						int size = fragmentSize(a0, a1);
346 						histogram.insert(size);
347 						if (!opt::fragPath.empty()) {
348 							fragFile << size << '\n';
349 							assert(fragFile.good());
350 						}
351 					} else
352 						stats.numFF++;
353 					counted = true;
354 				}
355 
356 				bool outputSameTarget = opt::fragPath.empty() && opt::histPath.empty();
357 				if (!sameTarget || outputSameTarget) {
358 					cout << SAMRecord(a0, a1) << '\n' << SAMRecord(a1, a0) << '\n';
359 					assert(cout.good());
360 				}
361 			}
362 		}
363 		if (!counted)
364 			stats.numDifferent++;
365 	}
366 }
367 
368 static void
printProgress(const ReadAlignMap & map)369 printProgress(const ReadAlignMap& map)
370 {
371 	if (opt::verbose == 0)
372 		return;
373 
374 	static size_t prevBuckets;
375 	if (prevBuckets == 0)
376 		prevBuckets = map.bucket_count();
377 
378 	size_t buckets = map.bucket_count();
379 	if (stats.alignments % 1000000 == 0 || buckets != prevBuckets) {
380 		prevBuckets = buckets;
381 		size_t size = map.size();
382 		cerr << "Read " << stats.alignments
383 		     << " alignments. "
384 		        "Hash load: "
385 		     << size << " / " << buckets << " = " << (float)size / buckets << " using "
386 		     << toSI(getMemoryUsage()) << "B." << endl;
387 	}
388 }
389 
390 static void
handleAlignment(const ReadAlignMap::value_type & alignments,ReadAlignMap & out)391 handleAlignment(const ReadAlignMap::value_type& alignments, ReadAlignMap& out)
392 {
393 	if (!isSingleEnd(alignments.first)) {
394 		string pairID = makePairID(alignments.first);
395 		ReadAlignMap::iterator pairIter = out.find(pairID);
396 		if (pairIter != out.end()) {
397 			handleAlignmentPair(*pairIter, alignments);
398 			out.erase(pairIter);
399 		} else if (!out.insert(alignments).second) {
400 			cerr << "error: duplicate read ID `" << alignments.first << "'\n";
401 			exit(EXIT_FAILURE);
402 		}
403 	}
404 
405 	if (!opt::distPath.empty() && alignments.second.size() >= 2)
406 		doReadIntegrity(alignments);
407 
408 	stats.alignments++;
409 	printProgress(out);
410 }
411 
412 static void
readAlignment(const string & line,ReadAlignMap & out)413 readAlignment(const string& line, ReadAlignMap& out)
414 {
415 	istringstream s(line);
416 	pair<string, AlignmentVector> v;
417 	switch (opt::inputFormat) {
418 	case opt::SAM: {
419 		SAMRecord sam;
420 		s >> sam;
421 		assert(s);
422 		v.first = sam.qname;
423 		if (sam.isRead1())
424 			v.first += "/1";
425 		else if (sam.isRead2())
426 			v.first += "/2";
427 		if (!sam.isUnmapped())
428 			v.second.push_back(sam);
429 		break;
430 	}
431 	case opt::KALIGNER: {
432 		s >> v.first;
433 		assert(s);
434 		v.second.reserve(count(line.begin(), line.end(), '\t'));
435 		v.second.assign(istream_iterator<Alignment>(s), istream_iterator<Alignment>());
436 		assert(s.eof());
437 		break;
438 	}
439 	}
440 	handleAlignment(v, out);
441 }
442 
443 static void
readAlignments(istream & in,ReadAlignMap * pout)444 readAlignments(istream& in, ReadAlignMap* pout)
445 {
446 	for (string line; getline(in, line);)
447 		if (line.empty() || line[0] == '@')
448 			cout << line << '\n';
449 		else
450 			readAlignment(line, *pout);
451 	assert(in.eof());
452 }
453 
454 static void
readAlignmentsFile(string path,ReadAlignMap * pout)455 readAlignmentsFile(string path, ReadAlignMap* pout)
456 {
457 	if (opt::verbose > 0)
458 		cerr << "Reading `" << path << "'..." << endl;
459 	ifstream fin(path.c_str());
460 	assert_good(fin, path);
461 	readAlignments(fin, pout);
462 	fin.close();
463 }
464 
465 /** Return the specified number formatted as a percent. */
466 static string
percent(size_t x,size_t n)467 percent(size_t x, size_t n)
468 {
469 	ostringstream ss;
470 	ss << setw((int)log10(n) + 1) << x;
471 	if (x > 0)
472 		ss << "  " << setprecision(3) << (float)100 * x / n << '%';
473 	return ss.str();
474 }
475 
476 int
main(int argc,char * const * argv)477 main(int argc, char* const* argv)
478 {
479 	bool die = false;
480 	for (int c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) {
481 		istringstream arg(optarg != NULL ? optarg : "");
482 		switch (c) {
483 		case '?':
484 			die = true;
485 			break;
486 		case 'l':
487 			arg >> opt::k;
488 			break;
489 		case 'c':
490 			arg >> opt::c;
491 			break;
492 		case 'd':
493 			arg >> opt::distPath;
494 			break;
495 		case 'f':
496 			arg >> opt::fragPath;
497 			break;
498 		case 'h':
499 			arg >> opt::histPath;
500 			break;
501 		case 'v':
502 			opt::verbose++;
503 			break;
504 		case OPT_HELP:
505 			cout << USAGE_MESSAGE;
506 			exit(EXIT_SUCCESS);
507 		case OPT_VERSION:
508 			cout << VERSION_MESSAGE;
509 			exit(EXIT_SUCCESS);
510 		}
511 		if (optarg != NULL && !arg.eof()) {
512 			cerr << PROGRAM ": invalid option: `-" << (char)c << optarg << "'\n";
513 			exit(EXIT_FAILURE);
514 		}
515 	}
516 
517 	if (opt::k <= 0 && opt::inputFormat == opt::KALIGNER) {
518 		cerr << PROGRAM ": "
519 		     << "missing -k,--kmer option\n";
520 		die = true;
521 	}
522 
523 	if (die) {
524 		cerr << "Try `" << PROGRAM << " --help' for more information.\n";
525 		exit(EXIT_FAILURE);
526 	}
527 
528 	if (!opt::fragPath.empty()) {
529 		fragFile.open(opt::fragPath.c_str());
530 		assert(fragFile.is_open());
531 	}
532 
533 	ReadAlignMap alignTable(1);
534 	if (optind < argc) {
535 		for_each(argv + optind, argv + argc, [&alignTable](const std::string& s) {
536 			readAlignmentsFile(s, &alignTable);
537 		});
538 	} else {
539 		if (opt::verbose > 0)
540 			cerr << "Reading from standard input..." << endl;
541 		readAlignments(cin, &alignTable);
542 	}
543 	if (opt::verbose > 0)
544 		cerr << "Read " << stats.alignments << " alignments" << endl;
545 
546 	unsigned numRF = histogram.count(INT_MIN, 0);
547 	unsigned numFR = histogram.count(1, INT_MAX);
548 	size_t sum = alignTable.size() + stats.bothUnaligned + stats.oneUnaligned + numFR + numRF +
549 	             stats.numFF + stats.numDifferent + stats.numMulti + stats.numSplit;
550 	cerr << "Mateless   " << percent(alignTable.size(), sum)
551 	     << "\n"
552 	        "Unaligned  "
553 	     << percent(stats.bothUnaligned, sum)
554 	     << "\n"
555 	        "Singleton  "
556 	     << percent(stats.oneUnaligned, sum)
557 	     << "\n"
558 	        "FR         "
559 	     << percent(numFR, sum)
560 	     << "\n"
561 	        "RF         "
562 	     << percent(numRF, sum)
563 	     << "\n"
564 	        "FF         "
565 	     << percent(stats.numFF, sum)
566 	     << "\n"
567 	        "Different  "
568 	     << percent(stats.numDifferent, sum)
569 	     << "\n"
570 	        "Multimap   "
571 	     << percent(stats.numMulti, sum)
572 	     << "\n"
573 	        "Split      "
574 	     << percent(stats.numSplit, sum)
575 	     << "\n"
576 	        "Total      "
577 	     << sum << endl;
578 
579 	if (!opt::distPath.empty())
580 		generateDistFile();
581 
582 	if (!opt::fragPath.empty())
583 		fragFile.close();
584 
585 	if (!opt::histPath.empty()) {
586 		ofstream histFile(opt::histPath.c_str());
587 		assert(histFile.is_open());
588 		histFile << histogram;
589 		assert(histFile.good());
590 		histFile.close();
591 	}
592 
593 	if (numFR < numRF)
594 		histogram = histogram.negate();
595 	histogram.eraseNegative();
596 	histogram.removeNoise();
597 	histogram.removeOutliers();
598 	Histogram h = histogram.trimFraction(0.0001);
599 	if (opt::verbose > 0)
600 		cerr << "Stats mean: " << setprecision(4) << h.mean()
601 		     << " "
602 		        "median: "
603 		     << setprecision(4) << h.median()
604 		     << " "
605 		        "sd: "
606 		     << setprecision(4) << h.sd()
607 		     << " "
608 		        "n: "
609 		     << h.size()
610 		     << " "
611 		        "min: "
612 		     << h.minimum() << " max: " << h.maximum() << '\n'
613 		     << h.barplot() << endl;
614 
615 	if (stats.numFF > numFR && stats.numFF > numRF) {
616 		cerr << "error: The mate pairs of this library are oriented "
617 		        "forward-forward (FF), which is not supported by ABySS."
618 		     << endl;
619 		exit(EXIT_FAILURE);
620 	}
621 
622 	return 0;
623 }
624 
625 /** Return whether any k-mer in the query is aligned more than once.
626  */
627 static bool
checkUniqueAlignments(const AlignmentVector & alignVec)628 checkUniqueAlignments(const AlignmentVector& alignVec)
629 {
630 	assert(!alignVec.empty());
631 	if (alignVec.size() == 1)
632 		return true;
633 
634 	unsigned nKmer = alignVec.front().read_length - opt::k + 1;
635 	vector<unsigned> coverage(nKmer);
636 
637 	for (AlignmentVector::const_iterator iter = alignVec.begin(); iter != alignVec.end(); ++iter) {
638 		assert((unsigned)iter->align_length >= opt::k);
639 		unsigned end = iter->read_start_pos + iter->align_length - opt::k + 1;
640 		assert(end <= nKmer);
641 		for (unsigned i = iter->read_start_pos; i < end; i++)
642 			coverage[i]++;
643 	}
644 
645 	for (unsigned i = 0; i < nKmer; ++i)
646 		if (coverage[i] > 1)
647 			return false;
648 	return true;
649 }
650 
651 static bool
replaceSuffix(string & s,const string & suffix0,const string & suffix1)652 replaceSuffix(string& s, const string& suffix0, const string& suffix1)
653 {
654 	if (endsWith(s, suffix0)) {
655 		s.replace(s.length() - suffix0.length(), string::npos, suffix1);
656 		return true;
657 	} else if (endsWith(s, suffix1)) {
658 		s.replace(s.length() - suffix1.length(), string::npos, suffix0);
659 		return true;
660 	} else
661 		return false;
662 }
663 
664 /** Return true if the specified read ID is of a single-end read. */
665 static bool
isSingleEnd(const string & id)666 isSingleEnd(const string& id)
667 {
668 	unsigned l = id.length();
669 	return endsWith(id, ".fn") || (l > 6 && id.substr(l - 6, 5) == ".part");
670 }
671 
672 /** Return the mate ID of the specified read ID. */
673 static string
makePairID(string id)674 makePairID(string id)
675 {
676 	if (equal(id.begin(), id.begin() + 3, "SRR"))
677 		return id;
678 
679 	assert(!id.empty());
680 	char& c = id[id.length() - 1];
681 	switch (c) {
682 	case '1':
683 		c = '2';
684 		return id;
685 	case '2':
686 		c = '1';
687 		return id;
688 	case 'A':
689 		c = 'B';
690 		return id;
691 	case 'B':
692 		c = 'A';
693 		return id;
694 	case 'F':
695 		c = 'R';
696 		return id;
697 	case 'R':
698 		c = 'F';
699 		return id;
700 	case 'f':
701 		c = 'r';
702 		return id;
703 	case 'r':
704 		c = 'f';
705 		return id;
706 	}
707 
708 	if (replaceSuffix(id, "forward", "reverse") || replaceSuffix(id, "F3", "R3"))
709 		return id;
710 
711 	cerr << "error: read ID `" << id
712 	     << "' must end in one of\n"
713 	        "\t1 and 2 or A and B or F and R or"
714 	        " F3 and R3 or forward and reverse\n";
715 	exit(EXIT_FAILURE);
716 }
717 
718 static bool
needsFlipping(const string & id)719 needsFlipping(const string& id)
720 {
721 	return endsWith(id, "F3");
722 }
723