1 /**
2  * Remove short contigs that do not contribute any relevant
3  * information to the assembly.
4  * Written by Tony Raymond <traymond@bcgsc.ca>
5  */
6 
7 #include "Common/Options.h"
8 #include "ContigID.h"
9 #include "ContigPath.h"
10 #include "ContigProperties.h"
11 #include "FastaReader.h"
12 #include "Graph/ContigGraph.h"
13 #include "Graph/ContigGraphAlgorithms.h"
14 #include "Graph/DirectedGraph.h"
15 #include "Graph/GraphIO.h"
16 #include "Graph/GraphUtil.h"
17 #include "IOUtil.h"
18 #include "Uncompress.h"
19 #include <algorithm>
20 #include <boost/lambda/bind.hpp>
21 #include <boost/lambda/lambda.hpp>
22 #include <fstream>
23 #include <functional>
24 #include <getopt.h>
25 #include <iostream>
26 #include <iterator>
27 #include <sstream>
28 #include <string>
29 #include <utility>
30 #include <vector>
31 
32 using namespace std;
33 using namespace rel_ops;
34 using namespace boost::lambda;
35 using boost::tie;
36 
37 #define PROGRAM "abyss-filtergraph"
38 
39 static const char VERSION_MESSAGE[] =
40     PROGRAM " (" PACKAGE_NAME ") " VERSION "\n"
41             "Written by Tony Raymond.\n"
42             "\n"
43             "Copyright 2014 Canada's Michael Smith Genome Sciences Centre\n";
44 
45 static const char USAGE_MESSAGE[] =
46     "Usage: " PROGRAM " -k<kmer> [OPTION]... ADJ [FASTA]\n"
47     "Remove short contigs that do not contribute any relevant\n"
48     "information to the assembly.\n"
49     "\n"
50     " Arguments:\n"
51     "\n"
52     "  ADJ    contig adjacency graph\n"
53     "  FASTA  contigs to check consistency of ADJ edges\n"
54     "\n"
55     " Options:\n"
56     "\n"
57     "  -k, --kmer=N            k-mer size\n"
58     "      --SS                expect contigs to be oriented correctly\n"
59     "      --no-SS             no assumption about contig orientation\n"
60     "  -T, --island=N          remove islands shorter than N [0]\n"
61     "  -t, --tip=N             remove tips shorter than N [0]\n"
62     "  -l, --length=N          remove contigs shorter than N [0]\n"
63     "  -L, --max-length=N      remove contigs longer than N [0]\n"
64     "  -c, --coverage=FLOAT    remove contigs with mean k-mer coverage less than FLOAT [0]\n"
65     "  -C, --max-coverage=FLOAT remove contigs with mean k-mer coverage at least FLOAT [0]\n"
66     "      --shim              remove filler contigs that only contribute\n"
67     "                          to adjacency [default]\n"
68     "      --no-shim           disable filler contigs removal\n"
69     "      --shim-max-degree=N only remove shims where the smaller of \n"
70     "                          in/out degree is smaller than N [1]\n"
71     "  -m, --min-overlap=N     require a minimum overlap of N bases [10]\n"
72     "      --assemble          assemble unambiguous paths\n"
73     "      --no-assemble       disable assembling of paths [default]\n"
74     "  -g, --graph=FILE        write the contig adjacency graph to FILE\n"
75     "  -i, --ignore=FILE       ignore contigs seen in FILE\n"
76     "  -r, --remove=FILE       remove contigs seen in FILE\n"
77     "      --adj               output the graph in ADJ format [default]\n"
78     "      --asqg              output the graph in ASQG format\n"
79     "      --dot               output the graph in GraphViz format\n"
80     "      --gfa               output the graph in GFA1 format\n"
81     "      --gfa1              output the graph in GFA1 format\n"
82     "      --gfa2              output the graph in GFA2 format\n"
83     "      --gv                output the graph in GraphViz format\n"
84     "      --sam               output the graph in SAM format\n"
85     "  -v, --verbose           display verbose output\n"
86     "      --help              display this help and exit\n"
87     "      --version           output version information and exit\n"
88     "\n"
89     "Report bugs to <" PACKAGE_BUGREPORT ">.\n";
90 
91 namespace opt {
92 unsigned k; // used by ContigProperties
93 
94 /** Run a strand-specific RNA-Seq assembly. */
95 static int ss;
96 
97 /** Remove island contigs less than this length. */
98 static unsigned minIslandLen = 0;
99 
100 /** Remove tips less than this length. */
101 static unsigned minTipLen = 0;
102 
103 /** Remove all contigs less than this length. */
104 static unsigned minLen = 0;
105 
106 /** Remove all contigs more than this length. */
107 static unsigned maxLen = 0;
108 
109 /** Remove contigs with mean k-mer coverage less than this threshold. */
110 static float minCoverage = 0;
111 
112 /** Remove contigs with mean k-mer coverage at least this threshold. */
113 static float maxCoverage = 0;
114 
115 /** Remove short contigs that don't contribute any sequence. */
116 static int shim = 1;
117 
118 /** Only remove shims where the smaller of in/out degree is small
119  * enough. */
120 static unsigned shimMaxDegree = 1;
121 
122 /** Assemble unambiguous paths. */
123 static int assemble = 0;
124 
125 /** Write the contig adjacency graph to this file. */
126 static string graphPath;
127 
128 /** Contigs to ignore. */
129 static string ignorePath;
130 
131 /** Contigs to remove. */
132 static string removePath;
133 
134 /** The minimum overlap allowed between two contigs. */
135 static int minOverlap = 10;
136 
137 /** Output graph format. */
138 int format = ADJ; // used by ContigProperties
139 }
140 
141 static const char shortopts[] = "c:C:g:i:r:k:l:L:m:t:T:v";
142 
143 enum
144 {
145 	OPT_HELP = 1,
146 	OPT_VERSION,
147 	OPT_SHIM_MAX_DEG
148 };
149 
150 static const struct option longopts[] = {
151 	{ "adj", no_argument, &opt::format, ADJ },
152 	{ "asqg", no_argument, &opt::format, ASQG },
153 	{ "dot", no_argument, &opt::format, DOT },
154 	{ "gfa", no_argument, &opt::format, GFA1 },
155 	{ "gfa1", no_argument, &opt::format, GFA1 },
156 	{ "gfa2", no_argument, &opt::format, GFA2 },
157 	{ "gv", no_argument, &opt::format, DOT },
158 	{ "sam", no_argument, &opt::format, SAM },
159 	{ "graph", required_argument, NULL, 'g' },
160 	{ "ignore", required_argument, NULL, 'i' },
161 	{ "remove", required_argument, NULL, 'r' },
162 	{ "SS", no_argument, &opt::ss, 1 },
163 	{ "no-SS", no_argument, &opt::ss, 0 },
164 	{ "kmer", required_argument, NULL, 'k' },
165 	{ "island", required_argument, NULL, 'T' },
166 	{ "tip", required_argument, NULL, 't' },
167 	{ "length", required_argument, NULL, 'l' },
168 	{ "max-length", required_argument, NULL, 'L' },
169 	{ "coverage", required_argument, NULL, 'c' },
170 	{ "max-coverage", required_argument, NULL, 'C' },
171 	{ "shim", no_argument, &opt::shim, 1 },
172 	{ "no-shim", no_argument, &opt::shim, 0 },
173 	{ "shim-max-degree", required_argument, NULL, OPT_SHIM_MAX_DEG },
174 	{ "assemble", no_argument, &opt::assemble, 1 },
175 	{ "no-assemble", no_argument, &opt::assemble, 0 },
176 	{ "min-overlap", required_argument, NULL, 'm' },
177 	{ "verbose", no_argument, NULL, 'v' },
178 	{ "help", no_argument, NULL, OPT_HELP },
179 	{ "version", no_argument, NULL, OPT_VERSION },
180 	{ NULL, 0, NULL, 0 }
181 };
182 
183 static vector<ContigID> g_removed;
184 
185 /** Contig adjacency graph. */
186 typedef ContigGraph<DirectedGraph<ContigProperties, Distance>> Graph;
187 typedef Graph::vertex_descriptor vertex_descriptor;
188 typedef Graph::edge_descriptor edge_descriptor;
189 
190 /** Data for verbose output. */
191 static struct
192 {
193 	unsigned removed;
194 	unsigned tails;
195 	unsigned too_long;
196 	unsigned too_complex;
197 	unsigned self_adj;
198 	unsigned checked;
199 	unsigned parallel_edge;
200 } g_count;
201 
202 /** Returns if the contig can be removed from the graph. */
203 static bool
removable(const Graph * pg,vertex_descriptor v)204 removable(const Graph* pg, vertex_descriptor v)
205 {
206 	typedef graph_traits<Graph> GTraits;
207 	typedef GTraits::out_edge_iterator OEit;
208 	typedef GTraits::in_edge_iterator IEit;
209 	typedef GTraits::vertex_descriptor V;
210 
211 	const Graph& g = *pg;
212 
213 	// Check if previously removed
214 	if (get(vertex_removed, g, v)) {
215 		g_count.removed++;
216 		return false;
217 	}
218 
219 	unsigned min_degree = min(out_degree(v, g), in_degree(v, g));
220 
221 	// Check for tails
222 	if (min_degree == 0) {
223 		g_count.tails++;
224 		return false;
225 	}
226 
227 	// Check that the result will be less complex that the original
228 	if (min_degree > opt::shimMaxDegree) {
229 		g_count.too_complex++;
230 		return false;
231 	}
232 
233 	// Check if self adjacent
234 	OEit oei0, oei1;
235 	tie(oei0, oei1) = out_edges(v, g);
236 	for (OEit vw = oei0; vw != oei1; ++vw) {
237 		V w = target(*vw, g);
238 		V vc = get(vertex_complement, g, v);
239 		if (v == w || vc == w) {
240 			g_count.self_adj++;
241 			return false;
242 		}
243 	}
244 
245 	// Check that removing the contig will result in adjacent contigs
246 	// overlapping by at least opt::minOverlap.
247 	IEit iei0, iei1;
248 	tie(iei0, iei1) = in_edges(v, g);
249 	IEit maxuv = iei0;
250 	for (IEit uv = iei0; uv != iei1; ++uv)
251 		if (g[*maxuv].distance < g[*uv].distance)
252 			maxuv = uv;
253 	OEit maxvw = oei0;
254 	for (OEit vw = oei0; vw != oei1; ++vw)
255 		if (g[*maxvw].distance < g[*vw].distance)
256 			maxvw = vw;
257 
258 	if (g[*maxuv].distance + (int)g[v].length + g[*maxvw].distance > -opt::minOverlap) {
259 		g_count.too_long++;
260 		return false;
261 	}
262 	return true;
263 }
264 
265 /** Data to store information of an edge. */
266 struct EdgeInfo
267 {
268 	vertex_descriptor u;
269 	vertex_descriptor w;
270 	edge_bundle_type<Graph>::type ep;
271 
EdgeInfoEdgeInfo272 	EdgeInfo(vertex_descriptor u, vertex_descriptor w, int ep)
273 	  : u(u)
274 	  , w(w)
275 	  , ep(ep)
276 	{}
EdgeInfoEdgeInfo277 	EdgeInfo()
278 	  : u()
279 	  , w()
280 	  , ep()
281 	{}
282 };
283 
284 /** Returns a list of edges that may be added when the vertex v is
285  * removed. */
286 static bool
findNewEdges(const Graph & g,vertex_descriptor v,vector<EdgeInfo> & eds,vector<bool> & markedContigs)287 findNewEdges(
288     const Graph& g,
289     vertex_descriptor v,
290     vector<EdgeInfo>& eds,
291     vector<bool>& markedContigs)
292 {
293 	typedef graph_traits<Graph> GTraits;
294 	typedef GTraits::vertex_descriptor V;
295 	typedef GTraits::out_edge_iterator OEit;
296 	typedef GTraits::in_edge_iterator IEit;
297 
298 	IEit iei0, iei1;
299 	tie(iei0, iei1) = in_edges(v, g);
300 	OEit oei0, oei1;
301 	tie(oei0, oei1) = out_edges(v, g);
302 
303 	vector<V> marked;
304 
305 	// if not marked and longest link LE contig length.
306 	// for every edge from u->v and v->w we must add an edge u->w
307 	for (IEit uv = iei0; uv != iei1; ++uv) {
308 		for (OEit vw = oei0; vw != oei1; ++vw) {
309 			int x = g[*uv].distance + (int)g[v].length + g[*vw].distance;
310 			assert(x <= 0);
311 			EdgeInfo ed(source(*uv, g), target(*vw, g), x);
312 			eds.push_back(ed);
313 			if (out_degree(v, g) > 1)
314 				marked.push_back(ed.u);
315 			if (in_degree(v, g) > 1)
316 				marked.push_back(ed.w);
317 		}
318 	}
319 	for (vector<V>::const_iterator it = marked.begin(); it != marked.end(); it++)
320 		markedContigs[get(vertex_index, g, *it)] = true;
321 	return true;
322 }
323 
324 /** Adds all edges described in the vector eds. */
325 static void
addNewEdges(Graph & g,const vector<EdgeInfo> & eds)326 addNewEdges(Graph& g, const vector<EdgeInfo>& eds)
327 {
328 	for (vector<EdgeInfo>::const_iterator edsit = eds.begin(); edsit != eds.end(); ++edsit) {
329 		// Don't add parallel edges! This can happen when removing a palindrome.
330 		if (edge(edsit->u, edsit->w, g).second) {
331 			g_count.parallel_edge++;
332 			continue;
333 		}
334 		assert(edsit->ep.distance <= -opt::minOverlap);
335 		add_edge(edsit->u, edsit->w, edsit->ep, g);
336 	}
337 }
338 
339 static void
removeContig(vertex_descriptor v,Graph & g)340 removeContig(vertex_descriptor v, Graph& g)
341 {
342 	clear_vertex(v, g);
343 	remove_vertex(v, g);
344 	g_removed.push_back(get(vertex_contig_index, g, v));
345 	g_count.removed++;
346 }
347 
348 /** Remove the specified contig from the adjacency graph. */
349 static void
removeContigs(Graph & g,vector<vertex_descriptor> & sc)350 removeContigs(Graph& g, vector<vertex_descriptor>& sc)
351 {
352 	typedef graph_traits<Graph> GTraits;
353 	typedef GTraits::vertex_descriptor V;
354 
355 	vector<vertex_descriptor> out;
356 	out.reserve(sc.size());
357 
358 	vector<bool> markedContigs(g.num_vertices());
359 	for (vector<vertex_descriptor>::iterator it = sc.begin(); it != sc.end(); ++it) {
360 		V v = *it;
361 		if (opt::verbose > 0 && ++g_count.checked % 10000000 == 0)
362 			cerr << "Removed " << g_count.removed << "/" << g_count.checked
363 			     << " vertices that have been checked.\n";
364 
365 		if (markedContigs[get(vertex_index, g, v)]) {
366 			out.push_back(v);
367 			continue;
368 		}
369 
370 		if (!removable(&g, v))
371 			continue;
372 
373 		vector<EdgeInfo> eds;
374 		if (findNewEdges(g, v, eds, markedContigs))
375 			addNewEdges(g, eds);
376 		else
377 			continue;
378 
379 		removeContig(v, g);
380 	}
381 	sc.swap(out);
382 }
383 
384 /** Return the value of the bit at the specified index. */
385 struct Marked : unary_function<vertex_descriptor, bool>
386 {
387 	typedef vector<bool> Data;
MarkedMarked388 	Marked(const Graph& g, const Data& data)
389 	  : m_g(g)
390 	  , m_data(data)
391 	{}
operator ()Marked392 	bool operator()(vertex_descriptor u) const { return m_data[get(vertex_contig_index, m_g, u)]; }
393 
394   private:
395 	const Graph& m_g;
396 	const Data& m_data;
397 };
398 
399 /** Finds all potentially removable contigs in the graph. */
400 static void
findShortContigs(const Graph & g,const vector<bool> & seen,vector<vertex_descriptor> & sc)401 findShortContigs(const Graph& g, const vector<bool>& seen, vector<vertex_descriptor>& sc)
402 {
403 	typedef graph_traits<Graph> GTraits;
404 	typedef GTraits::vertex_iterator Vit;
405 	Vit first, second;
406 	tie(first, second) = vertices(g);
407 	::copy_if(
408 	    first,
409 	    second,
410 	    back_inserter(sc),
411 	    !boost::lambda::bind(Marked(g, seen), _1) && boost::lambda::bind(removable, &g, _1));
412 }
413 
414 /** Functor used for sorting contigs based on degree, then size,
415  * and then ID. */
416 struct sortContigs
417 {
418 	const Graph& g;
419 
sortContigssortContigs420 	sortContigs(const Graph& g)
421 	  : g(g)
422 	{}
423 
424 	template<typename V>
operator ()sortContigs425 	bool operator()(V a, V b)
426 	{
427 		const ContigProperties& ap = g[a];
428 		const ContigProperties& bp = g[b];
429 
430 		unsigned dega = out_degree(a, g) * in_degree(a, g);
431 		unsigned degb = out_degree(b, g) * in_degree(b, g);
432 
433 		return dega != degb ? dega < degb : ap.length != bp.length ? ap.length < bp.length : a < b;
434 	}
435 };
436 
437 struct ShorterThanX : unary_function<vertex_descriptor, bool>
438 {
439 	const Graph& g;
440 	const vector<bool>& seen;
441 	size_t x;
442 
ShorterThanXShorterThanX443 	ShorterThanX(const Graph& g, const vector<bool>& seen, size_t x)
444 	  : g(g)
445 	  , seen(seen)
446 	  , x(x)
447 	{}
448 
operator ()ShorterThanX449 	bool operator()(vertex_descriptor y) const
450 	{
451 		return g[y].length < x && !get(vertex_removed, g, y) &&
452 		       !seen[get(vertex_contig_index, g, y)];
453 	}
454 };
455 
456 struct LongerThanX : unary_function<vertex_descriptor, bool>
457 {
458 	const Graph& g;
459 	const vector<bool>& seen;
460 	size_t x;
461 
LongerThanXLongerThanX462 	LongerThanX(const Graph& g, const vector<bool>& seen, size_t x)
463 	  : g(g)
464 	  , seen(seen)
465 	  , x(x)
466 	{}
467 
operator ()LongerThanX468 	bool operator()(vertex_descriptor y) const
469 	{
470 		return g[y].length > x && !get(vertex_removed, g, y) &&
471 		       !seen[get(vertex_contig_index, g, y)];
472 	}
473 };
474 
475 struct CoverageLessThan : unary_function<vertex_descriptor, bool>
476 {
477 	const Graph& g;
478 	const vector<bool>& seen;
479 	float minCov;
480 
CoverageLessThanCoverageLessThan481 	CoverageLessThan(const Graph& g, const vector<bool>& seen, float minCov)
482 	  : g(g)
483 	  , seen(seen)
484 	  , minCov(minCov)
485 	{}
486 
operator ()CoverageLessThan487 	bool operator()(vertex_descriptor u) const
488 	{
489 		assert(opt::k > 0);
490 		float meanCoverage = (float)g[u].coverage / (g[u].length - opt::k + 1);
491 		return meanCoverage < minCov && !get(vertex_removed, g, u) &&
492 		       !seen[get(vertex_contig_index, g, u)];
493 	}
494 };
495 
496 static void
removeShims(Graph & g,const vector<bool> & seen)497 removeShims(Graph& g, const vector<bool>& seen)
498 {
499 	if (opt::verbose > 0)
500 		cerr << "Removing shim contigs from the graph...\n";
501 	vector<vertex_descriptor> shortContigs;
502 	findShortContigs(g, seen, shortContigs);
503 	for (unsigned i = 0; !shortContigs.empty(); ++i) {
504 		if (opt::verbose > 0)
505 			cerr << "Pass " << i + 1 << ": Checking " << shortContigs.size() << " contigs.\n";
506 		sort(shortContigs.begin(), shortContigs.end(), sortContigs(g));
507 		removeContigs(g, shortContigs);
508 	}
509 	if (opt::verbose > 0) {
510 		cerr << "Shim removal stats:\n";
511 		cerr << "Removed: " << g_count.removed / 2 << " Too Complex: " << g_count.too_complex / 2
512 		     << " Tails: " << g_count.tails / 2 << " Too Long: " << g_count.too_long / 2
513 		     << " Self Adjacent: " << g_count.self_adj / 2
514 		     << " Parallel Edges: " << g_count.parallel_edge / 2 << '\n';
515 	}
516 }
517 
518 template<typename pred>
519 static void
removeContigs_if(Graph & g,pred p)520 removeContigs_if(Graph& g, pred p)
521 {
522 	typedef graph_traits<Graph> GTraits;
523 	typedef GTraits::vertex_iterator Vit;
524 	typedef GTraits::vertex_descriptor V;
525 	Vit first, second;
526 	tie(first, second) = vertices(g);
527 	vector<V> sc;
528 	::copy_if(first, second, back_inserter(sc), p);
529 	remove_vertex_if(g, sc.begin(), sc.end(), True<V>());
530 	transform(sc.begin(), sc.end(), back_inserter(g_removed), [](const ContigNode& c) {
531 		return c.contigIndex();
532 	});
533 	if (opt::verbose > 0)
534 		cerr << "Removed " << sc.size() / 2 << " contigs.\n";
535 }
536 
537 /** Contig sequences. */
538 typedef vector<const_string> Contigs;
539 static Contigs g_contigs;
540 
541 /** Return the sequence of vertex u. */
542 static string
getSequence(const Graph & g,vertex_descriptor u)543 getSequence(const Graph& g, vertex_descriptor u)
544 {
545 	size_t i = get(vertex_contig_index, g, u);
546 	assert(i < g_contigs.size());
547 	string seq(g_contigs[i]);
548 	return get(vertex_sense, g, u) ? reverseComplement(seq) : seq;
549 }
550 
551 /** Return whether the specified edge is inconsistent. */
552 struct is_edge_inconsistent : unary_function<edge_descriptor, bool>
553 {
554 	const Graph& g;
555 
is_edge_inconsistentis_edge_inconsistent556 	is_edge_inconsistent(const Graph& g)
557 	  : g(g)
558 	{}
559 
operator ()is_edge_inconsistent560 	bool operator()(edge_descriptor e) const
561 	{
562 		vertex_descriptor u = source(e, g);
563 		vertex_descriptor v = target(e, g);
564 
565 		int overlap = g[e].distance;
566 		assert(overlap < 0);
567 
568 		string su = getSequence(g, u);
569 		string sv = getSequence(g, v);
570 		const unsigned u_start = su.length() + overlap;
571 
572 		for (unsigned i = 0; i < (unsigned)-overlap; i++)
573 			if (!(ambiguityToBitmask(su[u_start + i]) & ambiguityToBitmask(sv[i])))
574 				return true;
575 		return false;
576 	}
577 };
578 
579 template<typename It>
580 static void
remove_edge(Graph & g,It first,It last)581 remove_edge(Graph& g, It first, It last)
582 {
583 	for (; first != last; first++)
584 		remove_edge(*first, g);
585 }
586 
587 template<typename pred>
588 static void
removeEdges_if(Graph & g,pred p)589 removeEdges_if(Graph& g, pred p)
590 {
591 	typedef graph_traits<Graph> GTraits;
592 	typedef GTraits::edge_iterator Eit;
593 	typedef GTraits::edge_descriptor E;
594 	Eit first, second;
595 	tie(first, second) = edges(g);
596 	vector<E> sc;
597 	::copy_if(first, second, back_inserter(sc), p);
598 	remove_edge(g, sc.begin(), sc.end());
599 	if (opt::verbose > 0) {
600 		cerr << "Edge removal stats:\n";
601 		cerr << "Removed: " << sc.size() << '\n';
602 	}
603 }
604 
605 int
main(int argc,char ** argv)606 main(int argc, char** argv)
607 {
608 	string commandLine;
609 	{
610 		ostringstream ss;
611 		char** last = argv + argc - 1;
612 		copy(argv, last, ostream_iterator<const char*>(ss, " "));
613 		ss << *last;
614 		commandLine = ss.str();
615 	}
616 
617 	bool die = false;
618 	for (int c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) {
619 		istringstream arg(optarg != NULL ? optarg : "");
620 		switch (c) {
621 		case '?':
622 			die = true;
623 			break;
624 		case 'c':
625 			arg >> opt::minCoverage;
626 			break;
627 		case 'C':
628 			arg >> opt::maxCoverage;
629 			break;
630 		case 'l':
631 			arg >> opt::minLen;
632 			break;
633 		case 'L':
634 			arg >> opt::maxLen;
635 			break;
636 		case 'm':
637 			arg >> opt::minOverlap;
638 			break;
639 		case 'g':
640 			arg >> opt::graphPath;
641 			break;
642 		case 'i':
643 			arg >> opt::ignorePath;
644 			break;
645 		case 'r':
646 			arg >> opt::removePath;
647 			break;
648 		case 'k':
649 			arg >> opt::k;
650 			break;
651 		case 'T':
652 			arg >> opt::minIslandLen;
653 			break;
654 		case 't':
655 			arg >> opt::minTipLen;
656 			break;
657 		case 'v':
658 			opt::verbose++;
659 			break;
660 		case OPT_SHIM_MAX_DEG:
661 			arg >> opt::shimMaxDegree;
662 			break;
663 		case OPT_HELP:
664 			cout << USAGE_MESSAGE;
665 			exit(EXIT_SUCCESS);
666 		case OPT_VERSION:
667 			cout << VERSION_MESSAGE;
668 			exit(EXIT_SUCCESS);
669 		}
670 		if (optarg != NULL && !arg.eof()) {
671 			cerr << PROGRAM ": invalid option: `-" << (char)c << optarg << "'\n";
672 			exit(EXIT_FAILURE);
673 		}
674 	}
675 
676 	if (opt::minOverlap < 0) {
677 		cerr << PROGRAM ": "
678 		     << "--min-overlap must be a positive integer.\n";
679 		die = true;
680 	}
681 
682 	if (opt::k <= 0) {
683 		cerr << PROGRAM ": "
684 		     << "missing -k,--kmer option\n";
685 		die = true;
686 	}
687 
688 	if (argc - optind < 1) {
689 		cerr << PROGRAM ": missing arguments\n";
690 		die = true;
691 	}
692 
693 	if (argc - optind > 2) {
694 		cerr << PROGRAM ": too many arguments\n";
695 		die = true;
696 	}
697 
698 	if (die) {
699 		cerr << "Try `" << PROGRAM << " --help' for more information.\n";
700 		exit(EXIT_FAILURE);
701 	}
702 
703 	Graph g;
704 	// Read the contig adjacency graph.
705 	{
706 		string adjPath(argv[optind++]);
707 		if (opt::verbose > 0)
708 			cerr << "Loading graph from file: " << adjPath << '\n';
709 		ifstream fin(adjPath.c_str());
710 		assert_good(fin, adjPath);
711 		fin >> g;
712 		assert(fin.eof());
713 	}
714 
715 	// Read the set of contigs to ignore.
716 	vector<bool> seen(num_vertices(g) / 2);
717 	if (!opt::ignorePath.empty()) {
718 		ifstream in(opt::ignorePath.c_str());
719 		assert_good(in, opt::ignorePath);
720 		markSeenInPath(in, seen);
721 	}
722 
723 	if (opt::verbose > 0) {
724 		cerr << "Graph stats before:\n";
725 		printGraphStats(cerr, g);
726 	}
727 
728 	// Remove list of contigs
729 	if (!opt::removePath.empty()) {
730 		ifstream in(opt::removePath.c_str());
731 		assert(in.good());
732 		string s;
733 		size_t b = g_removed.size();
734 		while (in >> s) {
735 			size_t i = get(g_contigNames, s);
736 			removeContig(ContigNode(i, 0), g);
737 		}
738 		assert(in.eof());
739 		if (opt::verbose)
740 			cerr << "Removed " << g_removed.size() - b << " contigs.\n";
741 	}
742 
743 	// Remove shims.
744 	if (opt::shim)
745 		removeShims(g, seen);
746 
747 	// Remove islands.
748 	if (opt::minIslandLen > 0) {
749 		size_t s = g_removed.size();
750 		removeIslands_if(g, back_inserter(g_removed), ShorterThanX(g, seen, opt::minIslandLen));
751 		if (opt::verbose)
752 			cerr << "Removed " << g_removed.size() - s << " islands.\n";
753 	}
754 
755 	// Remove tips.
756 	if (opt::minTipLen > 0) {
757 		size_t s, prev;
758 		s = g_removed.size();
759 		do {
760 			prev = g_removed.size();
761 			pruneTips_if(g, back_inserter(g_removed), ShorterThanX(g, seen, opt::minTipLen));
762 		} while (prev < g_removed.size());
763 		if (opt::verbose)
764 			cerr << "Removed " << g_removed.size() - s << " tips.\n";
765 	}
766 
767 	// Remove short contigs.
768 	if (opt::minLen > 0)
769 		removeContigs_if(g, ShorterThanX(g, seen, opt::minLen));
770 
771 	// Remove long contigs.
772 	if (opt::maxLen > 0)
773 		removeContigs_if(g, LongerThanX(g, seen, opt::maxLen));
774 
775 	// Remove contigs with low mean k-mer coverage.
776 	if (opt::minCoverage > 0)
777 		removeContigs_if(g, CoverageLessThan(g, seen, opt::minCoverage));
778 
779 	// Remove contigs with high mean k-mer coverage.
780 	if (opt::maxCoverage > 0)
781 		removeContigs_if(g, std::not1(CoverageLessThan(g, seen, opt::maxCoverage)));
782 
783 	// Remove inconsistent edges of spaceseeds
784 	if (argc - optind == 1) {
785 		const char* contigsPath(argv[optind++]);
786 		Contigs& contigs = g_contigs;
787 		if (opt::verbose > 0)
788 			cerr << "Reading `" << contigsPath << "'...\n";
789 		FastaReader in(contigsPath, FastaReader::NO_FOLD_CASE);
790 		for (FastaRecord rec; in >> rec;) {
791 			if (g_contigNames.count(rec.id) == 0)
792 				continue;
793 			assert(contigs.size() == get(g_contigNames, rec.id));
794 			contigs.push_back(rec.seq);
795 		}
796 		assert(in.eof());
797 
798 		removeEdges_if(g, is_edge_inconsistent(g));
799 	}
800 
801 	if (opt::verbose > 0) {
802 		cerr << "Graph stats after:\n";
803 		printGraphStats(cerr, g);
804 	}
805 
806 	// Output the updated adjacency graph.
807 	if (!opt::graphPath.empty()) {
808 		ofstream fout(opt::graphPath.c_str());
809 		assert_good(fout, opt::graphPath);
810 		write_graph(fout, g, PROGRAM, commandLine);
811 		assert_good(fout, opt::graphPath);
812 	}
813 
814 	// Assemble unambiguous paths. These need to be assembled by
815 	// MergeContigs before being processed by other applications.
816 	if (opt::assemble) {
817 		size_t numContigs = num_vertices(g) / 2;
818 		typedef vector<ContigPath> ContigPaths;
819 		ContigPaths paths;
820 		if (opt::ss)
821 			assemble_stranded(g, back_inserter(paths));
822 		else
823 			assemble(g, back_inserter(paths));
824 		g_contigNames.unlock();
825 		for (ContigPaths::const_iterator it = paths.begin(); it != paths.end(); ++it) {
826 			ContigNode u(numContigs + it - paths.begin(), false);
827 			string name = createContigName();
828 			put(vertex_name, g, u, name);
829 			cout << name << '\t' << *it << '\n';
830 		}
831 		g_contigNames.lock();
832 	}
833 
834 	return 0;
835 }
836