1 #include "config.h"
2 #include "Common/ContigPath.h"
3 #include "Common/IOUtil.h"
4 #include "Graph/ContigGraph.h"
5 #include "Graph/ContigGraphAlgorithms.h"
6 #include "Graph/DirectedGraph.h"
7 #include "Graph/GraphIO.h"
8 #include "Graph/GraphUtil.h"
9 #include "Uncompress.h"
10 #include <boost/graph/graph_traits.hpp>
11 #include <boost/lambda/bind.hpp>
12 #include <boost/lambda/lambda.hpp>
13 #include <boost/ref.hpp>
14 #include <algorithm>
15 #include <cassert>
16 #include <getopt.h>
17 #include <iostream>
18 #include <iterator>
19 #include <sstream>
20 #include <string>
21 #include <vector>
22 #include <utility>
23 
24 using namespace std;
25 using namespace boost::lambda;
26 using boost::tie;
27 
28 #define PROGRAM "abyss-junction"
29 
30 static const char VERSION_MESSAGE[] =
31 PROGRAM " (" PACKAGE_NAME ") " VERSION "\n"
32 "Written by Shaun Jackman.\n"
33 "\n"
34 "Copyright 2014 Canada's Michael Smith Genome Sciences Centre\n";
35 
36 static const char USAGE_MESSAGE[] =
37 "Usage: " PROGRAM " [OPTION]... OVERLAP [SCAFFOLD]...\n"
38 "Extend junction contigs that are supported by the scaffold graph.\n"
39 "\n"
40 " Arguments:\n"
41 "\n"
42 "  OVERLAP   the overlap graph\n"
43 "  SCAFFOLD  a scaffold graph\n"
44 "\n"
45 " Options:\n"
46 "\n"
47 "  -i, --ignore=FILE     ignore junctions seen in FILE\n"
48 "  -v, --verbose         display verbose output\n"
49 "      --help            display this help and exit\n"
50 "      --version         output version information and exit\n"
51 "\n"
52 "Report bugs to <" PACKAGE_BUGREPORT ">.\n";
53 
54 namespace opt {
55 	unsigned k; // used by DotIO
56 
57 	/** Do not check for evidence in the scaffold graph. */
58 	bool noScaffoldGraph;
59 
60 	/** Junction contigs to ignore. */
61 	string ignorePath;
62 
63 	/** Verbose output. */
64 	int verbose; // used by PopBubbles
65 
66  	/** Output format */
67  	int format = DOT; // used by DistanceEst
68 }
69 
70 static const char shortopts[] = "i:v";
71 
72 enum { OPT_HELP = 1, OPT_VERSION, };
73 
74 static const struct option longopts[] = {
75 	{ "ignore",      no_argument,       NULL, 'i' },
76 	{ "verbose",     no_argument,       NULL, 'v' },
77 	{ "help",        no_argument,       NULL, OPT_HELP },
78 	{ "version",     no_argument,       NULL, OPT_VERSION },
79 	{ NULL, 0, NULL, 0 }
80 };
81 
82 /** Counts. */
83 static struct {
84 	unsigned junctions;
85 	unsigned supported;
86 } g_count;
87 
88 /** An overlap graph. */
89 typedef DirectedGraph<NoProperty, NoProperty> DG;
90 typedef ContigGraph<DG> Graph;
91 typedef Graph OverlapGraph;
92 
93 /** A scaffold graph. */
94 typedef Graph ScaffoldGraph;
95 
96 /** Extend junction contigs. */
extendJunction(const OverlapGraph & overlapG,const ScaffoldGraph & scaffoldG,graph_traits<OverlapGraph>::vertex_descriptor v)97 void extendJunction(
98 		const OverlapGraph& overlapG,
99 		const ScaffoldGraph& scaffoldG,
100 		graph_traits<OverlapGraph>::vertex_descriptor v)
101 {
102 	if (get(vertex_sense, overlapG, v)
103 			|| in_degree(v, overlapG) != 1
104 			|| out_degree(v, overlapG) != 1)
105 		return;
106 	typedef graph_traits<OverlapGraph>::vertex_descriptor V;
107 	V u = source(*in_edges(v, overlapG).first, overlapG);
108 	V w = *adjacent_vertices(v, overlapG).first;
109 	if (opt::noScaffoldGraph
110 			|| edge(u, w, scaffoldG).second) {
111 		// This junction contig is supported by the scaffold graph.
112 		ContigPath path;
113 		path.reserve(3);
114 		extend(overlapG, get(vertex_complement, overlapG, v),
115 				back_inserter(path));
116 		reverseComplement(path.begin(), path.end());
117 		path.push_back(v);
118 		extend(overlapG, v, back_inserter(path));
119 		assert(path.size() >= 3);
120 		cout << createContigName() << '\t' << path << '\n';
121 		g_count.supported++;
122 	}
123 	g_count.junctions++;
124 }
125 
126 /** Allow parallel edges. */
127 struct AllowParallelEdges {
128 	template <typename EP>
operator ()AllowParallelEdges129 	EP operator()(const EP& a, const EP&) const
130 	{
131 		return a;
132 	}
133 };
134 
135 /** Read a graph from the specified file. */
readGraph(const string & path,Graph & g)136 static void readGraph(const string& path, Graph& g)
137 {
138 	if (opt::verbose > 0)
139 		cerr << "Reading `" << path << "'...\n";
140 	ifstream fin(path.c_str());
141 	istream& in = path == "-" ? cin : fin;
142 	assert_good(in, path);
143 	read_graph(in, g, AllowParallelEdges());
144 	assert(in.eof());
145 	if (opt::verbose > 1)
146 		printGraphStats(cerr, g);
147 	g_contigNames.lock();
148 }
149 
main(int argc,char ** argv)150 int main(int argc, char** argv)
151 {
152 	bool die = false;
153 	for (int c; (c = getopt_long(argc, argv,
154 					shortopts, longopts, NULL)) != -1;) {
155 		istringstream arg(optarg != NULL ? optarg : "");
156 		switch (c) {
157 			case '?': die = true; break;
158 			case 'i': arg >> opt::ignorePath; break;
159 			case 'v': opt::verbose++; break;
160 			case OPT_HELP:
161 				cout << USAGE_MESSAGE;
162 				exit(EXIT_SUCCESS);
163 			case OPT_VERSION:
164 				cout << VERSION_MESSAGE;
165 				exit(EXIT_SUCCESS);
166 		}
167 		if (optarg != NULL && !arg.eof()) {
168 			cerr << PROGRAM ": invalid option: `-"
169 				<< (char)c << optarg << "'\n";
170 			exit(EXIT_FAILURE);
171 		}
172 	}
173 
174 	if (argc - optind < 1) {
175 		cerr << PROGRAM ": missing arguments\n";
176 		die = true;
177 	}
178 
179 	if (die) {
180 		cerr << "Try `" << PROGRAM
181 			<< " --help' for more information.\n";
182 		exit(EXIT_FAILURE);
183 	}
184 
185 	OverlapGraph overlapG;
186 	readGraph(argv[optind++], overlapG);
187 
188 	ScaffoldGraph scaffoldG(overlapG.num_vertices() / 2);
189 	if (optind < argc) {
190 		for_each(argv + optind, argv + argc,
191 				boost::lambda::bind(readGraph, _1, boost::ref(scaffoldG)));
192 		// Add any missing complementary edges.
193 		size_t numAdded = addComplementaryEdges(scaffoldG);
194 		if (opt::verbose > 0)
195 			cerr << "Added " << numAdded << " complementary edges.\n";
196 		if (opt::verbose > 1)
197 			printGraphStats(cerr, scaffoldG);
198 	} else
199 		opt::noScaffoldGraph = true;
200 
201 	// Read the set of contigs to ignore.
202 	vector<bool> seen(num_vertices(overlapG) / 2);
203 	if (!opt::ignorePath.empty()) {
204 		ifstream in(opt::ignorePath.c_str());
205 		assert_good(in, opt::ignorePath);
206 		markSeenInPath(in, seen);
207 	}
208 
209 	// Extend the junction contigs.
210 	graph_traits<OverlapGraph>::vertex_iterator uit, ulast;
211 	for (tie(uit, ulast) = vertices(overlapG); uit != ulast; ++uit)
212 		if (!seen[get(vertex_contig_index, overlapG, *uit)])
213 			extendJunction(overlapG, scaffoldG, *uit);
214 
215 	assert_good(cout, "stdout");
216 
217 	if (opt::verbose > 0) {
218 		cerr << "Junctions: " << g_count.junctions << '\n';
219 		if (!opt::noScaffoldGraph)
220 			cerr << "Supported: " << g_count.supported << '\n';
221 	}
222 
223 	return 0;
224 }
225