1 #include "config.h"
2 #include "Common/ContigPath.h"
3 #include "Common/IOUtil.h"
4 #include "Graph/ContigGraph.h"
5 #include "Graph/ContigGraphAlgorithms.h"
6 #include "Graph/DirectedGraph.h"
7 #include "Graph/GraphIO.h"
8 #include "Graph/GraphUtil.h"
9 #include "Uncompress.h"
10 #include <boost/graph/graph_traits.hpp>
11 #include <boost/lambda/bind.hpp>
12 #include <boost/lambda/lambda.hpp>
13 #include <boost/ref.hpp>
14 #include <algorithm>
15 #include <cassert>
16 #include <getopt.h>
17 #include <iostream>
18 #include <iterator>
19 #include <sstream>
20 #include <string>
21 #include <vector>
22 #include <utility>
23
24 using namespace std;
25 using namespace boost::lambda;
26 using boost::tie;
27
28 #define PROGRAM "abyss-junction"
29
30 static const char VERSION_MESSAGE[] =
31 PROGRAM " (" PACKAGE_NAME ") " VERSION "\n"
32 "Written by Shaun Jackman.\n"
33 "\n"
34 "Copyright 2014 Canada's Michael Smith Genome Sciences Centre\n";
35
36 static const char USAGE_MESSAGE[] =
37 "Usage: " PROGRAM " [OPTION]... OVERLAP [SCAFFOLD]...\n"
38 "Extend junction contigs that are supported by the scaffold graph.\n"
39 "\n"
40 " Arguments:\n"
41 "\n"
42 " OVERLAP the overlap graph\n"
43 " SCAFFOLD a scaffold graph\n"
44 "\n"
45 " Options:\n"
46 "\n"
47 " -i, --ignore=FILE ignore junctions seen in FILE\n"
48 " -v, --verbose display verbose output\n"
49 " --help display this help and exit\n"
50 " --version output version information and exit\n"
51 "\n"
52 "Report bugs to <" PACKAGE_BUGREPORT ">.\n";
53
54 namespace opt {
55 unsigned k; // used by DotIO
56
57 /** Do not check for evidence in the scaffold graph. */
58 bool noScaffoldGraph;
59
60 /** Junction contigs to ignore. */
61 string ignorePath;
62
63 /** Verbose output. */
64 int verbose; // used by PopBubbles
65
66 /** Output format */
67 int format = DOT; // used by DistanceEst
68 }
69
70 static const char shortopts[] = "i:v";
71
72 enum { OPT_HELP = 1, OPT_VERSION, };
73
74 static const struct option longopts[] = {
75 { "ignore", no_argument, NULL, 'i' },
76 { "verbose", no_argument, NULL, 'v' },
77 { "help", no_argument, NULL, OPT_HELP },
78 { "version", no_argument, NULL, OPT_VERSION },
79 { NULL, 0, NULL, 0 }
80 };
81
82 /** Counts. */
83 static struct {
84 unsigned junctions;
85 unsigned supported;
86 } g_count;
87
88 /** An overlap graph. */
89 typedef DirectedGraph<NoProperty, NoProperty> DG;
90 typedef ContigGraph<DG> Graph;
91 typedef Graph OverlapGraph;
92
93 /** A scaffold graph. */
94 typedef Graph ScaffoldGraph;
95
96 /** Extend junction contigs. */
extendJunction(const OverlapGraph & overlapG,const ScaffoldGraph & scaffoldG,graph_traits<OverlapGraph>::vertex_descriptor v)97 void extendJunction(
98 const OverlapGraph& overlapG,
99 const ScaffoldGraph& scaffoldG,
100 graph_traits<OverlapGraph>::vertex_descriptor v)
101 {
102 if (get(vertex_sense, overlapG, v)
103 || in_degree(v, overlapG) != 1
104 || out_degree(v, overlapG) != 1)
105 return;
106 typedef graph_traits<OverlapGraph>::vertex_descriptor V;
107 V u = source(*in_edges(v, overlapG).first, overlapG);
108 V w = *adjacent_vertices(v, overlapG).first;
109 if (opt::noScaffoldGraph
110 || edge(u, w, scaffoldG).second) {
111 // This junction contig is supported by the scaffold graph.
112 ContigPath path;
113 path.reserve(3);
114 extend(overlapG, get(vertex_complement, overlapG, v),
115 back_inserter(path));
116 reverseComplement(path.begin(), path.end());
117 path.push_back(v);
118 extend(overlapG, v, back_inserter(path));
119 assert(path.size() >= 3);
120 cout << createContigName() << '\t' << path << '\n';
121 g_count.supported++;
122 }
123 g_count.junctions++;
124 }
125
126 /** Allow parallel edges. */
127 struct AllowParallelEdges {
128 template <typename EP>
operator ()AllowParallelEdges129 EP operator()(const EP& a, const EP&) const
130 {
131 return a;
132 }
133 };
134
135 /** Read a graph from the specified file. */
readGraph(const string & path,Graph & g)136 static void readGraph(const string& path, Graph& g)
137 {
138 if (opt::verbose > 0)
139 cerr << "Reading `" << path << "'...\n";
140 ifstream fin(path.c_str());
141 istream& in = path == "-" ? cin : fin;
142 assert_good(in, path);
143 read_graph(in, g, AllowParallelEdges());
144 assert(in.eof());
145 if (opt::verbose > 1)
146 printGraphStats(cerr, g);
147 g_contigNames.lock();
148 }
149
main(int argc,char ** argv)150 int main(int argc, char** argv)
151 {
152 bool die = false;
153 for (int c; (c = getopt_long(argc, argv,
154 shortopts, longopts, NULL)) != -1;) {
155 istringstream arg(optarg != NULL ? optarg : "");
156 switch (c) {
157 case '?': die = true; break;
158 case 'i': arg >> opt::ignorePath; break;
159 case 'v': opt::verbose++; break;
160 case OPT_HELP:
161 cout << USAGE_MESSAGE;
162 exit(EXIT_SUCCESS);
163 case OPT_VERSION:
164 cout << VERSION_MESSAGE;
165 exit(EXIT_SUCCESS);
166 }
167 if (optarg != NULL && !arg.eof()) {
168 cerr << PROGRAM ": invalid option: `-"
169 << (char)c << optarg << "'\n";
170 exit(EXIT_FAILURE);
171 }
172 }
173
174 if (argc - optind < 1) {
175 cerr << PROGRAM ": missing arguments\n";
176 die = true;
177 }
178
179 if (die) {
180 cerr << "Try `" << PROGRAM
181 << " --help' for more information.\n";
182 exit(EXIT_FAILURE);
183 }
184
185 OverlapGraph overlapG;
186 readGraph(argv[optind++], overlapG);
187
188 ScaffoldGraph scaffoldG(overlapG.num_vertices() / 2);
189 if (optind < argc) {
190 for_each(argv + optind, argv + argc,
191 boost::lambda::bind(readGraph, _1, boost::ref(scaffoldG)));
192 // Add any missing complementary edges.
193 size_t numAdded = addComplementaryEdges(scaffoldG);
194 if (opt::verbose > 0)
195 cerr << "Added " << numAdded << " complementary edges.\n";
196 if (opt::verbose > 1)
197 printGraphStats(cerr, scaffoldG);
198 } else
199 opt::noScaffoldGraph = true;
200
201 // Read the set of contigs to ignore.
202 vector<bool> seen(num_vertices(overlapG) / 2);
203 if (!opt::ignorePath.empty()) {
204 ifstream in(opt::ignorePath.c_str());
205 assert_good(in, opt::ignorePath);
206 markSeenInPath(in, seen);
207 }
208
209 // Extend the junction contigs.
210 graph_traits<OverlapGraph>::vertex_iterator uit, ulast;
211 for (tie(uit, ulast) = vertices(overlapG); uit != ulast; ++uit)
212 if (!seen[get(vertex_contig_index, overlapG, *uit)])
213 extendJunction(overlapG, scaffoldG, *uit);
214
215 assert_good(cout, "stdout");
216
217 if (opt::verbose > 0) {
218 cerr << "Junctions: " << g_count.junctions << '\n';
219 if (!opt::noScaffoldGraph)
220 cerr << "Supported: " << g_count.supported << '\n';
221 }
222
223 return 0;
224 }
225