1 /**
2  * Identify and pop simple bubbles.
3  * Written by Shaun Jackman <sjackman@bcgsc.ca>.
4  */
5 
6 #include "Graph/PopBubbles.h"
7 #include "Common/Options.h"
8 #include "ConstString.h"
9 #include "ContigPath.h"
10 #include "ContigProperties.h"
11 #include "FastaReader.h"
12 #include "Graph/ContigGraph.h"
13 #include "Graph/ContigGraphAlgorithms.h"
14 #include "Graph/DepthFirstSearch.h"
15 #include "Graph/DirectedGraph.h"
16 #include "Graph/GraphIO.h"
17 #include "Graph/GraphUtil.h"
18 #include "IOUtil.h"
19 #include "Sequence.h"
20 #include "Uncompress.h"
21 #include "alignGlobal.h"
22 #include "config.h"
23 #include <algorithm>
24 #include <boost/lambda/bind.hpp>
25 #include <boost/lambda/lambda.hpp>
26 #include <climits> // for UINT_MAX
27 #include <fstream>
28 #include <functional>
29 #include <getopt.h>
30 #include <iostream>
31 #include <iterator>
32 #include <map>
33 #include <set>
34 #include <sstream>
35 #include <string>
36 #include <utility>
37 #include <vector>
38 #if _OPENMP
39 #include <omp.h>
40 #endif
41 
42 using namespace std;
43 using namespace boost::lambda;
44 using boost::tie;
45 
46 #define PROGRAM "PopBubbles"
47 
48 static const char VERSION_MESSAGE[] =
49     PROGRAM " (" PACKAGE_NAME ") " VERSION "\n"
50             "Written by Shaun Jackman.\n"
51             "\n"
52             "Copyright 2014 Canada's Michael Smith Genome Sciences Centre\n";
53 
54 static const char USAGE_MESSAGE[] =
55     "Usage: " PROGRAM " -k<kmer> [OPTION]... FASTA ADJ\n"
56     "Identify and pop simple bubbles.\n"
57     "\n"
58     " Arguments:\n"
59     "\n"
60     "  FASTA  contigs in FASTA format\n"
61     "  ADJ    contig adjacency graph\n"
62     "\n"
63     " Options:\n"
64     "\n"
65     "  -k, --kmer=N          k-mer size\n"
66     "  -a, --branches=N      maximum number of branches, default: 2\n"
67     "  -b, --bubble-length=N pop bubbles shorter than N bp\n"
68     "                        default is 10000\n"
69     "  -p, --identity=REAL   minimum identity, default: 0.9\n"
70     "  -c, --coverage=REAL   remove contigs with mean k-mer coverage\n"
71     "                        less than this threshold [0]\n"
72     "      --scaffold        scaffold over bubbles that have\n"
73     "                        insufficient identity\n"
74     "      --no-scaffold     disable scaffolding [default]\n"
75     "      --SS              expect contigs to be oriented correctly\n"
76     "      --no-SS           no assumption about contig orientation [default]\n"
77     "  -g, --graph=FILE      write the contig adjacency graph to FILE\n"
78     "      --adj             output the graph in ADJ format [default]\n"
79     "      --asqg            output the graph in ASQG format\n"
80     "      --dot             output the graph in GraphViz format\n"
81     "      --gfa             output the graph in GFA1 format\n"
82     "      --gfa1            output the graph in GFA1 format\n"
83     "      --gfa2            output the graph in GFA2 format\n"
84     "      --gv              output the graph in GraphViz format\n"
85     "      --sam             output the graph in SAM format\n"
86     "      --bubble-graph    output a graph of the bubbles\n"
87     "  -j, --threads=N       use N parallel threads [1]\n"
88     "  -v, --verbose         display verbose output\n"
89     "      --help            display this help and exit\n"
90     "      --version         output version information and exit\n"
91     "\n"
92     "Report bugs to <" PACKAGE_BUGREPORT ">.\n";
93 
94 namespace opt {
95 unsigned k; // used by ContigProperties
96 
97 /** Maximum number of branches. */
98 static unsigned maxBranches = 2;
99 
100 /** Pop bubbles shorter than this threshold. */
101 static unsigned maxLength = 10000;
102 
103 /** Minimum identity. */
104 static float identity = 0.9;
105 
106 /** Minimum mean k-mer coverage. */
107 static float minCoverage;
108 
109 /** Scaffold over bubbles that have insufficient identity. */
110 static int scaffold;
111 
112 /** Write the contig adjacency graph to this file. */
113 static string graphPath;
114 
115 /** Output a graph of the bubbles. */
116 static int bubbleGraph;
117 
118 int format; // used by ContigProperties
119 
120 /** Run a strand-specific RNA-Seq assembly. */
121 static int ss;
122 
123 /** Number of threads. */
124 static int threads = 1;
125 }
126 
127 static const char shortopts[] = "a:b:c:g:j:k:p:v";
128 
129 enum
130 {
131 	OPT_HELP = 1,
132 	OPT_VERSION
133 };
134 
135 static const struct option longopts[] = { { "branches", required_argument, NULL, 'a' },
136 	                                      { "bubble-length", required_argument, NULL, 'b' },
137 	                                      { "coverage", required_argument, NULL, 'c' },
138 	                                      {
139 	                                          "bubble-graph",
140 	                                          no_argument,
141 	                                          &opt::bubbleGraph,
142 	                                          1,
143 	                                      },
144 	                                      { "graph", required_argument, NULL, 'g' },
145 	                                      { "adj", no_argument, &opt::format, ADJ },
146 	                                      { "asqg", no_argument, &opt::format, ASQG },
147 	                                      { "dot", no_argument, &opt::format, DOT },
148 	                                      { "gfa", no_argument, &opt::format, GFA1 },
149 	                                      { "gfa1", no_argument, &opt::format, GFA1 },
150 	                                      { "gfa2", no_argument, &opt::format, GFA2 },
151 	                                      { "gv", no_argument, &opt::format, DOT },
152 	                                      { "sam", no_argument, &opt::format, SAM },
153 	                                      { "kmer", required_argument, NULL, 'k' },
154 	                                      { "identity", required_argument, NULL, 'p' },
155 	                                      { "scaffold", no_argument, &opt::scaffold, 1 },
156 	                                      { "no-scaffold", no_argument, &opt::scaffold, 0 },
157 	                                      { "SS", no_argument, &opt::ss, 1 },
158 	                                      { "no-SS", no_argument, &opt::ss, 0 },
159 	                                      { "threads", required_argument, NULL, 'j' },
160 	                                      { "verbose", no_argument, NULL, 'v' },
161 	                                      { "help", no_argument, NULL, OPT_HELP },
162 	                                      { "version", no_argument, NULL, OPT_VERSION },
163 	                                      { NULL, 0, NULL, 0 } };
164 
165 /** Popped branches. */
166 static vector<ContigID> g_popped;
167 
168 /** Contig adjacency graph. */
169 typedef ContigGraph<DirectedGraph<ContigProperties, Distance>> Graph;
170 typedef Graph::vertex_descriptor vertex_descriptor;
171 typedef Graph::adjacency_iterator adjacency_iterator;
172 
173 /** Return the distance from vertex u to v. */
174 static int
getDistance(const Graph & g,vertex_descriptor u,vertex_descriptor v)175 getDistance(const Graph& g, vertex_descriptor u, vertex_descriptor v)
176 {
177 	typedef graph_traits<Graph>::edge_descriptor edge_descriptor;
178 	pair<edge_descriptor, bool> e = edge(u, v, g);
179 	assert(e.second);
180 	return g[e.first].distance;
181 }
182 
183 struct CompareCoverage
184 {
185 	const Graph& g;
CompareCoverageCompareCoverage186 	CompareCoverage(const Graph& g)
187 	  : g(g)
188 	{}
operator ()CompareCoverage189 	bool operator()(vertex_descriptor u, vertex_descriptor v)
190 	{
191 		return g[u].coverage > g[v].coverage;
192 	}
193 };
194 
195 /** Pop the bubble between vertices v and tail. */
196 static void
popBubble(Graph & g,vertex_descriptor v,vertex_descriptor tail)197 popBubble(Graph& g, vertex_descriptor v, vertex_descriptor tail)
198 {
199 	unsigned nbranches = g.out_degree(v);
200 	assert(nbranches > 1);
201 	assert(nbranches == g.in_degree(tail));
202 	vector<vertex_descriptor> sorted(nbranches);
203 	pair<adjacency_iterator, adjacency_iterator> adj = g.adjacent_vertices(v);
204 	copy(adj.first, adj.second, sorted.begin());
205 	sort(sorted.begin(), sorted.end(), CompareCoverage(g));
206 	if (opt::bubbleGraph)
207 #pragma omp critical(cout)
208 	{
209 		cout << '"' << get(vertex_name, g, v) << "\" -> {";
210 		for (vector<vertex_descriptor>::const_iterator it = sorted.begin(); it != sorted.end();
211 		     ++it)
212 			cout << " \"" << get(vertex_name, g, *it) << '"';
213 		cout << " } -> \"" << get(vertex_name, g, tail) << "\"\n";
214 	}
215 #pragma omp critical(g_popped)
216 	transform(sorted.begin() + 1, sorted.end(), back_inserter(g_popped), [](const ContigNode& c) {
217 		return c.contigIndex();
218 	});
219 }
220 
221 static struct
222 {
223 	unsigned bubbles;
224 	unsigned popped;
225 	unsigned scaffold;
226 	unsigned notSimple;
227 	unsigned tooLong;
228 	unsigned tooMany;
229 	unsigned dissimilar;
230 } g_count;
231 
232 /** Contig sequences. */
233 typedef vector<const_string> Contigs;
234 static Contigs g_contigs;
235 
236 /** Return the sequence of vertex u. */
237 static string
getSequence(const Graph * g,vertex_descriptor u)238 getSequence(const Graph* g, vertex_descriptor u)
239 {
240 	size_t i = get(vertex_contig_index, *g, u);
241 	assert(i < g_contigs.size());
242 	string seq(g_contigs[i]);
243 	return get(vertex_sense, *g, u) ? reverseComplement(seq) : seq;
244 }
245 
246 /** Return the length of vertex v. */
247 static unsigned
getLength(const Graph * g,vertex_descriptor v)248 getLength(const Graph* g, vertex_descriptor v)
249 {
250 	return (*g)[v].length;
251 }
252 
253 /** Align the sequences of [first,last).
254  * @param t the vertex to the left of the bubble
255  * @param v the vertex to the right of the bubble
256  * @return the identity of the global alignment
257  */
258 template<typename It>
259 static float
getAlignmentIdentity(const Graph & g,vertex_descriptor t,vertex_descriptor v,It first,It last)260 getAlignmentIdentity(const Graph& g, vertex_descriptor t, vertex_descriptor v, It first, It last)
261 {
262 	unsigned nbranches = distance(first, last);
263 	vector<int> inDists(nbranches);
264 	transform(
265 	    first, last, inDists.begin(), boost::lambda::bind(getDistance, boost::cref(g), t, _1));
266 	vector<int> outDists(nbranches);
267 	transform(
268 	    first, last, outDists.begin(), boost::lambda::bind(getDistance, boost::cref(g), _1, v));
269 	vector<int> insertLens(nbranches);
270 	transform(
271 	    first,
272 	    last,
273 	    insertLens.begin(),
274 	    boost::lambda::bind(getDistance, boost::cref(g), t, _1) +
275 	        boost::lambda::bind(getLength, &g, _1) +
276 	        boost::lambda::bind(getDistance, boost::cref(g), _1, v));
277 
278 	int max_in_overlap = -(*min_element(inDists.begin(), inDists.end()));
279 	assert(max_in_overlap >= 0);
280 	int max_out_overlap = -(*min_element(outDists.begin(), outDists.end()));
281 	assert(max_out_overlap >= 0);
282 	int min_insert_len = *min_element(insertLens.begin(), insertLens.end());
283 	int max_insert_len = *max_element(insertLens.begin(), insertLens.end());
284 
285 	float max_identity = (float)(min_insert_len + max_in_overlap + max_out_overlap) /
286 	                     (max_insert_len + max_in_overlap + max_out_overlap);
287 	if (min_insert_len <= 0 || max_identity < opt::identity)
288 		return max_identity;
289 
290 	vector<string> seqs(nbranches);
291 	transform(first, last, seqs.begin(), boost::lambda::bind(getSequence, &g, _1));
292 	for (unsigned i = 0; i < seqs.size(); i++) {
293 		// Remove the overlapping sequence.
294 		int n = seqs[i].size();
295 		int l = -inDists[i], r = -outDists[i];
296 		assert(n > l + r);
297 		seqs[i] = seqs[i].substr(l, n - l - r);
298 	}
299 
300 	unsigned matches, consensusSize;
301 	tie(matches, consensusSize) = align(seqs);
302 	return (float)(matches + max_in_overlap + max_out_overlap) /
303 	       (consensusSize + max_in_overlap + max_out_overlap);
304 }
305 
306 /** Pop the specified bubble if it is a simple bubble.
307  * @return whether the bubble is popped
308  */
309 static bool
popSimpleBubble(Graph * pg,vertex_descriptor v)310 popSimpleBubble(Graph* pg, vertex_descriptor v)
311 {
312 	Graph& g = *pg;
313 	unsigned nbranches = g.out_degree(v);
314 	assert(nbranches >= 2);
315 	vertex_descriptor v1 = *g.adjacent_vertices(v).first;
316 	if (g.out_degree(v1) != 1) {
317 #pragma omp atomic
318 		g_count.notSimple++;
319 		return false;
320 	}
321 	vertex_descriptor tail = *g.adjacent_vertices(v1).first;
322 	if (v == get(vertex_complement, g, tail) // Palindrome
323 	    || g.in_degree(tail) != nbranches) {
324 #pragma omp atomic
325 		g_count.notSimple++;
326 		return false;
327 	}
328 
329 	// Check that every branch is simple and ends at the same node.
330 	pair<adjacency_iterator, adjacency_iterator> adj = g.adjacent_vertices(v);
331 	for (adjacency_iterator it = adj.first; it != adj.second; ++it) {
332 		if (g.out_degree(*it) != 1 || g.in_degree(*it) != 1) {
333 #pragma omp atomic
334 			g_count.notSimple++;
335 			return false;
336 		}
337 		if (*g.adjacent_vertices(*it).first != tail) {
338 			// The branches do not merge back to the same node.
339 #pragma omp atomic
340 			g_count.notSimple++;
341 			return false;
342 		}
343 	}
344 
345 	if (opt::verbose > 2)
346 #pragma omp critical(cerr)
347 	{
348 		cerr << "\n* " << get(vertex_name, g, v) << " ->";
349 		for (adjacency_iterator it = adj.first; it != adj.second; ++it)
350 			cerr << ' ' << get(vertex_name, g, *it);
351 		cerr << " -> " << get(vertex_name, g, tail) << '\n';
352 	}
353 
354 	if (nbranches > opt::maxBranches) {
355 		// Too many branches.
356 #pragma omp atomic
357 		g_count.tooMany++;
358 		if (opt::verbose > 1)
359 #pragma omp critical(cerr)
360 			cerr << nbranches << " paths (too many)\n";
361 		return false;
362 	}
363 
364 	vector<unsigned> lengths(nbranches);
365 	transform(adj.first, adj.second, lengths.begin(), [&g](const ContigNode& c) {
366 		return getLength(&g, c);
367 	});
368 	unsigned minLength = *min_element(lengths.begin(), lengths.end());
369 	unsigned maxLength = *max_element(lengths.begin(), lengths.end());
370 	if (maxLength >= opt::maxLength) {
371 		// This branch is too long.
372 #pragma omp atomic
373 		g_count.tooLong++;
374 		if (opt::verbose > 1)
375 #pragma omp critical(cerr)
376 			cerr << minLength << '\t' << maxLength << "\t0\t(too long)\n";
377 		return false;
378 	}
379 
380 	float identity =
381 	    opt::identity == 0 ? 0 : getAlignmentIdentity(g, v, tail, adj.first, adj.second);
382 	bool dissimilar = identity < opt::identity;
383 	if (opt::verbose > 1)
384 #pragma omp critical(cerr)
385 		cerr << minLength << '\t' << maxLength << '\t' << identity
386 		     << (dissimilar ? "\t(dissimilar)" : "") << '\n';
387 	if (dissimilar) {
388 		// Insufficient identity.
389 #pragma omp atomic
390 		g_count.dissimilar++;
391 		return false;
392 	}
393 
394 #pragma omp atomic
395 	g_count.popped++;
396 	popBubble(g, v, tail);
397 	return true;
398 }
399 
400 /** Add distances to a path. */
401 static ContigPath
addDistance(const Graph & g,const ContigPath & path)402 addDistance(const Graph& g, const ContigPath& path)
403 {
404 	ContigPath out;
405 	out.reserve(path.size());
406 	ContigNode u = path.front();
407 	out.push_back(u);
408 	for (ContigPath::const_iterator it = path.begin() + 1; it != path.end(); ++it) {
409 		ContigNode v = *it;
410 		int distance = getDistance(g, u, v);
411 		if (distance >= 0) {
412 			int numN = distance + opt::k - 1; // by convention
413 			assert(numN >= 0);
414 			numN = max(numN, 1);
415 			out.push_back(ContigNode(numN, 'N'));
416 		}
417 		out.push_back(v);
418 		u = v;
419 	}
420 	return out;
421 }
422 
423 /** Return the length of the longest path through the bubble. */
424 static int
longestPath(const Graph & g,const Bubble & topo)425 longestPath(const Graph& g, const Bubble& topo)
426 {
427 	typedef graph_traits<Graph>::edge_descriptor E;
428 	typedef graph_traits<Graph>::out_edge_iterator Eit;
429 	typedef graph_traits<Graph>::vertex_descriptor V;
430 
431 	EdgeWeightMap<Graph> weight(g);
432 	map<ContigNode, int> distance;
433 	distance[topo.front()] = 0;
434 	for (Bubble::const_iterator it = topo.begin(); it != topo.end(); ++it) {
435 		V u = *it;
436 		Eit eit, elast;
437 		for (tie(eit, elast) = out_edges(u, g); eit != elast; ++eit) {
438 			E e = *eit;
439 			V v = target(e, g);
440 			distance[v] = max(distance[v], distance[u] + weight[e]);
441 		}
442 	}
443 	V v = topo.back();
444 	return distance[v] - g[v].length;
445 }
446 
447 /** Scaffold over the bubble between vertices u and w.
448  * Add an edge (u,w) with the distance property set to the length of
449  * the largest branch of the bubble.
450  */
451 static void
scaffoldBubble(Graph & g,const Bubble & bubble)452 scaffoldBubble(Graph& g, const Bubble& bubble)
453 {
454 	typedef graph_traits<Graph>::vertex_descriptor V;
455 	assert(opt::scaffold);
456 	assert(bubble.size() > 2);
457 
458 	V u = bubble.front(), w = bubble.back();
459 	if (edge(u, w, g).second) {
460 		// Already scaffolded.
461 		return;
462 	}
463 	assert(isBubble(g, bubble.begin(), bubble.end()));
464 
465 	assert(bubble.size() > 2);
466 	size_t n = bubble.size() - 2;
467 	g_popped.reserve(g_popped.size() + n);
468 	for (Bubble::const_iterator it = bubble.begin() + 1; it != bubble.end() - 1; ++it)
469 		g_popped.push_back(it->contigIndex());
470 
471 	add_edge(u, w, max(longestPath(g, bubble), 1), g);
472 }
473 
474 /** Pop the specified bubble if it is simple, otherwise scaffold. */
475 static void
popOrScaffoldBubble(Graph & g,const Bubble & bubble)476 popOrScaffoldBubble(Graph& g, const Bubble& bubble)
477 {
478 #pragma omp atomic
479 	g_count.bubbles++;
480 	if (!popSimpleBubble(&g, bubble.front()) && opt::scaffold) {
481 #pragma omp atomic
482 		g_count.scaffold++;
483 		scaffoldBubble(g, bubble);
484 	}
485 }
486 
487 /** Return the length of the specified vertex in k-mer. */
488 static unsigned
getKmerLength(const ContigProperties & vp)489 getKmerLength(const ContigProperties& vp)
490 {
491 	assert(vp.length >= opt::k);
492 	return vp.length - opt::k + 1;
493 }
494 
495 /** Return the mean k-mer coverage of the specified vertex. */
496 static float
getMeanCoverage(const ContigProperties & vp)497 getMeanCoverage(const ContigProperties& vp)
498 {
499 	return (float)vp.coverage / getKmerLength(vp);
500 }
501 
502 /** Remove contigs with insufficient coverage. */
503 static void
filterGraph(Graph & g)504 filterGraph(Graph& g)
505 {
506 	typedef graph_traits<Graph> GTraits;
507 	typedef GTraits::vertex_descriptor V;
508 	typedef GTraits::vertex_iterator Vit;
509 
510 	unsigned removedContigs = 0, removedKmer = 0;
511 	std::pair<Vit, Vit> urange = vertices(g);
512 	for (Vit uit = urange.first; uit != urange.second; ++uit) {
513 		V u = *uit;
514 		if (get(vertex_removed, g, u))
515 			continue;
516 		const ContigProperties& vp = g[u];
517 		if (getMeanCoverage(vp) < opt::minCoverage) {
518 			removedContigs++;
519 			removedKmer += getKmerLength(vp);
520 			clear_vertex(u, g);
521 			remove_vertex(u, g);
522 			g_popped.push_back(get(vertex_contig_index, g, u));
523 		}
524 	}
525 	if (opt::verbose > 0) {
526 		cerr << "Removed " << removedKmer << " k-mer in " << removedContigs
527 		     << " contigs with mean k-mer coverage "
528 		        "less than "
529 		     << opt::minCoverage << ".\n";
530 		printGraphStats(cerr, g);
531 	}
532 }
533 
534 /** Remove the specified contig from the adjacency graph. */
535 static void
removeContig(Graph * g,ContigID id)536 removeContig(Graph* g, ContigID id)
537 {
538 	ContigNode v(id, false);
539 	g->clear_vertex(v);
540 	g->remove_vertex(v);
541 }
542 
543 int
main(int argc,char ** argv)544 main(int argc, char** argv)
545 {
546 	string commandLine;
547 	{
548 		ostringstream ss;
549 		char** last = argv + argc - 1;
550 		copy(argv, last, ostream_iterator<const char*>(ss, " "));
551 		ss << *last;
552 		commandLine = ss.str();
553 	}
554 
555 	bool die = false;
556 	for (int c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) {
557 		istringstream arg(optarg != NULL ? optarg : "");
558 		switch (c) {
559 		case '?':
560 			die = true;
561 			break;
562 		case 'a':
563 			arg >> opt::maxBranches;
564 			break;
565 		case 'b':
566 			arg >> opt::maxLength;
567 			break;
568 		case 'c':
569 			arg >> opt::minCoverage;
570 			break;
571 		case 'g':
572 			arg >> opt::graphPath;
573 			break;
574 		case 'j':
575 			arg >> opt::threads;
576 			break;
577 		case 'k':
578 			arg >> opt::k;
579 			break;
580 		case 'p':
581 			arg >> opt::identity;
582 			break;
583 		case 'v':
584 			opt::verbose++;
585 			break;
586 		case OPT_HELP:
587 			cout << USAGE_MESSAGE;
588 			exit(EXIT_SUCCESS);
589 		case OPT_VERSION:
590 			cout << VERSION_MESSAGE;
591 			exit(EXIT_SUCCESS);
592 		}
593 		if (optarg != NULL && !arg.eof()) {
594 			cerr << PROGRAM ": invalid option: `-" << (char)c << optarg << "'\n";
595 			exit(EXIT_FAILURE);
596 		}
597 	}
598 
599 	if (opt::k <= 0) {
600 		cerr << PROGRAM ": "
601 		     << "missing -k,--kmer option\n";
602 		die = true;
603 	}
604 
605 	if (argc - optind < 2) {
606 		cerr << PROGRAM ": missing arguments\n";
607 		die = true;
608 	}
609 
610 	if (argc - optind > 2) {
611 		cerr << PROGRAM ": too many arguments\n";
612 		die = true;
613 	}
614 
615 	if (die) {
616 		cerr << "Try `" << PROGRAM << " --help' for more information.\n";
617 		exit(EXIT_FAILURE);
618 	}
619 
620 	const char* contigsPath(argv[optind++]);
621 	string adjPath(argv[optind++]);
622 
623 	// Read the contig adjacency graph.
624 	if (opt::verbose > 0)
625 		cerr << "Reading `" << adjPath << "'...\n";
626 	ifstream fin(adjPath.c_str());
627 	assert_good(fin, adjPath);
628 	Graph g;
629 	fin >> g;
630 	assert(fin.eof());
631 	g_contigNames.lock();
632 	if (opt::verbose > 0)
633 		printGraphStats(cerr, g);
634 
635 	// Read the contigs.
636 	Contigs& contigs = g_contigs;
637 	if (opt::identity > 0) {
638 		if (opt::verbose > 0)
639 			cerr << "Reading `" << contigsPath << "'...\n";
640 		FastaReader in(contigsPath, FastaReader::NO_FOLD_CASE);
641 		for (FastaRecord rec; in >> rec;) {
642 			if (g_contigNames.count(rec.id) == 0)
643 				continue;
644 			assert(contigs.size() == get(g_contigNames, rec.id));
645 			contigs.push_back(rec.seq);
646 		}
647 		assert(in.eof());
648 		assert(!contigs.empty());
649 		opt::colourSpace = isdigit(contigs.front()[0]);
650 	}
651 
652 	// Remove contigs with insufficient coverage.
653 	if (opt::minCoverage > 0)
654 		filterGraph(g);
655 
656 	if (opt::bubbleGraph)
657 		cout << "digraph bubbles {\n";
658 
659 	Bubbles bubbles = discoverBubbles(g);
660 	for (Bubbles::const_iterator it = bubbles.begin(); it != bubbles.end(); ++it)
661 		popOrScaffoldBubble(g, *it);
662 
663 	// Each bubble should be identified twice. Remove the duplicate.
664 	sort(g_popped.begin(), g_popped.end());
665 	g_popped.erase(unique(g_popped.begin(), g_popped.end()), g_popped.end());
666 
667 	if (opt::bubbleGraph) {
668 		cout << "}\n";
669 	} else {
670 		for (vector<ContigID>::const_iterator it = g_popped.begin(); it != g_popped.end(); ++it)
671 			cout << get(g_contigNames, *it) << '\n';
672 	}
673 
674 	if (opt::verbose > 0)
675 		cerr << "Bubbles: " << (g_count.bubbles + 1) / 2 << " Popped: " << (g_count.popped + 1) / 2
676 		     << " Scaffolds: " << (g_count.scaffold + 1) / 2
677 		     << " Complex: " << (g_count.notSimple + 1) / 2
678 		     << " Too long: " << (g_count.tooLong + 1) / 2
679 		     << " Too many: " << (g_count.tooMany + 1) / 2
680 		     << " Dissimilar: " << (g_count.dissimilar + 1) / 2 << '\n';
681 
682 	if (!opt::graphPath.empty()) {
683 		// Remove the popped contigs from the adjacency graph.
684 		for_each(g_popped.begin(), g_popped.end(), [&g](const ContigID& c) {
685 			return removeContig(&g, c);
686 		});
687 
688 		// Assemble unambiguous paths.
689 		g_contigNames.unlock();
690 		typedef vector<ContigPath> ContigPaths;
691 		ContigPaths paths;
692 		size_t numContigs = num_vertices(g) / 2;
693 		if (opt::scaffold) {
694 			Graph gorig = g;
695 			if (opt::ss)
696 				assemble_stranded(g, back_inserter(paths));
697 			else
698 				assemble(g, back_inserter(paths));
699 			for (ContigPaths::const_iterator it = paths.begin(); it != paths.end(); ++it) {
700 				ContigNode u(numContigs + it - paths.begin(), false);
701 				string name = createContigName();
702 				put(vertex_name, g, u, name);
703 				cout << name << '\t' << addDistance(gorig, *it) << '\n';
704 			}
705 		} else {
706 			if (opt::ss)
707 				assemble_stranded(g, back_inserter(paths));
708 			else
709 				assemble(g, back_inserter(paths));
710 			for (ContigPaths::const_iterator it = paths.begin(); it != paths.end(); ++it) {
711 				ContigNode u(numContigs + it - paths.begin(), false);
712 				string name = createContigName();
713 				put(vertex_name, g, u, name);
714 				cout << name << '\t' << *it << '\n';
715 			}
716 		}
717 		g_contigNames.lock();
718 		paths.clear();
719 
720 		// Output the updated adjacency graph.
721 		ofstream fout(opt::graphPath.c_str());
722 		assert_good(fout, opt::graphPath);
723 		write_graph(fout, g, PROGRAM, commandLine);
724 		assert_good(fout, opt::graphPath);
725 		if (opt::verbose > 0)
726 			printGraphStats(cerr, g);
727 	}
728 
729 	return 0;
730 }
731