1 /**
2 * Identify and pop simple bubbles.
3 * Written by Shaun Jackman <sjackman@bcgsc.ca>.
4 */
5
6 #include "Graph/PopBubbles.h"
7 #include "Common/Options.h"
8 #include "ConstString.h"
9 #include "ContigPath.h"
10 #include "ContigProperties.h"
11 #include "FastaReader.h"
12 #include "Graph/ContigGraph.h"
13 #include "Graph/ContigGraphAlgorithms.h"
14 #include "Graph/DepthFirstSearch.h"
15 #include "Graph/DirectedGraph.h"
16 #include "Graph/GraphIO.h"
17 #include "Graph/GraphUtil.h"
18 #include "IOUtil.h"
19 #include "Sequence.h"
20 #include "Uncompress.h"
21 #include "alignGlobal.h"
22 #include "config.h"
23 #include <algorithm>
24 #include <boost/lambda/bind.hpp>
25 #include <boost/lambda/lambda.hpp>
26 #include <climits> // for UINT_MAX
27 #include <fstream>
28 #include <functional>
29 #include <getopt.h>
30 #include <iostream>
31 #include <iterator>
32 #include <map>
33 #include <set>
34 #include <sstream>
35 #include <string>
36 #include <utility>
37 #include <vector>
38 #if _OPENMP
39 #include <omp.h>
40 #endif
41
42 using namespace std;
43 using namespace boost::lambda;
44 using boost::tie;
45
46 #define PROGRAM "PopBubbles"
47
48 static const char VERSION_MESSAGE[] =
49 PROGRAM " (" PACKAGE_NAME ") " VERSION "\n"
50 "Written by Shaun Jackman.\n"
51 "\n"
52 "Copyright 2014 Canada's Michael Smith Genome Sciences Centre\n";
53
54 static const char USAGE_MESSAGE[] =
55 "Usage: " PROGRAM " -k<kmer> [OPTION]... FASTA ADJ\n"
56 "Identify and pop simple bubbles.\n"
57 "\n"
58 " Arguments:\n"
59 "\n"
60 " FASTA contigs in FASTA format\n"
61 " ADJ contig adjacency graph\n"
62 "\n"
63 " Options:\n"
64 "\n"
65 " -k, --kmer=N k-mer size\n"
66 " -a, --branches=N maximum number of branches, default: 2\n"
67 " -b, --bubble-length=N pop bubbles shorter than N bp\n"
68 " default is 10000\n"
69 " -p, --identity=REAL minimum identity, default: 0.9\n"
70 " -c, --coverage=REAL remove contigs with mean k-mer coverage\n"
71 " less than this threshold [0]\n"
72 " --scaffold scaffold over bubbles that have\n"
73 " insufficient identity\n"
74 " --no-scaffold disable scaffolding [default]\n"
75 " --SS expect contigs to be oriented correctly\n"
76 " --no-SS no assumption about contig orientation [default]\n"
77 " -g, --graph=FILE write the contig adjacency graph to FILE\n"
78 " --adj output the graph in ADJ format [default]\n"
79 " --asqg output the graph in ASQG format\n"
80 " --dot output the graph in GraphViz format\n"
81 " --gfa output the graph in GFA1 format\n"
82 " --gfa1 output the graph in GFA1 format\n"
83 " --gfa2 output the graph in GFA2 format\n"
84 " --gv output the graph in GraphViz format\n"
85 " --sam output the graph in SAM format\n"
86 " --bubble-graph output a graph of the bubbles\n"
87 " -j, --threads=N use N parallel threads [1]\n"
88 " -v, --verbose display verbose output\n"
89 " --help display this help and exit\n"
90 " --version output version information and exit\n"
91 "\n"
92 "Report bugs to <" PACKAGE_BUGREPORT ">.\n";
93
94 namespace opt {
95 unsigned k; // used by ContigProperties
96
97 /** Maximum number of branches. */
98 static unsigned maxBranches = 2;
99
100 /** Pop bubbles shorter than this threshold. */
101 static unsigned maxLength = 10000;
102
103 /** Minimum identity. */
104 static float identity = 0.9;
105
106 /** Minimum mean k-mer coverage. */
107 static float minCoverage;
108
109 /** Scaffold over bubbles that have insufficient identity. */
110 static int scaffold;
111
112 /** Write the contig adjacency graph to this file. */
113 static string graphPath;
114
115 /** Output a graph of the bubbles. */
116 static int bubbleGraph;
117
118 int format; // used by ContigProperties
119
120 /** Run a strand-specific RNA-Seq assembly. */
121 static int ss;
122
123 /** Number of threads. */
124 static int threads = 1;
125 }
126
127 static const char shortopts[] = "a:b:c:g:j:k:p:v";
128
129 enum
130 {
131 OPT_HELP = 1,
132 OPT_VERSION
133 };
134
135 static const struct option longopts[] = { { "branches", required_argument, NULL, 'a' },
136 { "bubble-length", required_argument, NULL, 'b' },
137 { "coverage", required_argument, NULL, 'c' },
138 {
139 "bubble-graph",
140 no_argument,
141 &opt::bubbleGraph,
142 1,
143 },
144 { "graph", required_argument, NULL, 'g' },
145 { "adj", no_argument, &opt::format, ADJ },
146 { "asqg", no_argument, &opt::format, ASQG },
147 { "dot", no_argument, &opt::format, DOT },
148 { "gfa", no_argument, &opt::format, GFA1 },
149 { "gfa1", no_argument, &opt::format, GFA1 },
150 { "gfa2", no_argument, &opt::format, GFA2 },
151 { "gv", no_argument, &opt::format, DOT },
152 { "sam", no_argument, &opt::format, SAM },
153 { "kmer", required_argument, NULL, 'k' },
154 { "identity", required_argument, NULL, 'p' },
155 { "scaffold", no_argument, &opt::scaffold, 1 },
156 { "no-scaffold", no_argument, &opt::scaffold, 0 },
157 { "SS", no_argument, &opt::ss, 1 },
158 { "no-SS", no_argument, &opt::ss, 0 },
159 { "threads", required_argument, NULL, 'j' },
160 { "verbose", no_argument, NULL, 'v' },
161 { "help", no_argument, NULL, OPT_HELP },
162 { "version", no_argument, NULL, OPT_VERSION },
163 { NULL, 0, NULL, 0 } };
164
165 /** Popped branches. */
166 static vector<ContigID> g_popped;
167
168 /** Contig adjacency graph. */
169 typedef ContigGraph<DirectedGraph<ContigProperties, Distance>> Graph;
170 typedef Graph::vertex_descriptor vertex_descriptor;
171 typedef Graph::adjacency_iterator adjacency_iterator;
172
173 /** Return the distance from vertex u to v. */
174 static int
getDistance(const Graph & g,vertex_descriptor u,vertex_descriptor v)175 getDistance(const Graph& g, vertex_descriptor u, vertex_descriptor v)
176 {
177 typedef graph_traits<Graph>::edge_descriptor edge_descriptor;
178 pair<edge_descriptor, bool> e = edge(u, v, g);
179 assert(e.second);
180 return g[e.first].distance;
181 }
182
183 struct CompareCoverage
184 {
185 const Graph& g;
CompareCoverageCompareCoverage186 CompareCoverage(const Graph& g)
187 : g(g)
188 {}
operator ()CompareCoverage189 bool operator()(vertex_descriptor u, vertex_descriptor v)
190 {
191 return g[u].coverage > g[v].coverage;
192 }
193 };
194
195 /** Pop the bubble between vertices v and tail. */
196 static void
popBubble(Graph & g,vertex_descriptor v,vertex_descriptor tail)197 popBubble(Graph& g, vertex_descriptor v, vertex_descriptor tail)
198 {
199 unsigned nbranches = g.out_degree(v);
200 assert(nbranches > 1);
201 assert(nbranches == g.in_degree(tail));
202 vector<vertex_descriptor> sorted(nbranches);
203 pair<adjacency_iterator, adjacency_iterator> adj = g.adjacent_vertices(v);
204 copy(adj.first, adj.second, sorted.begin());
205 sort(sorted.begin(), sorted.end(), CompareCoverage(g));
206 if (opt::bubbleGraph)
207 #pragma omp critical(cout)
208 {
209 cout << '"' << get(vertex_name, g, v) << "\" -> {";
210 for (vector<vertex_descriptor>::const_iterator it = sorted.begin(); it != sorted.end();
211 ++it)
212 cout << " \"" << get(vertex_name, g, *it) << '"';
213 cout << " } -> \"" << get(vertex_name, g, tail) << "\"\n";
214 }
215 #pragma omp critical(g_popped)
216 transform(sorted.begin() + 1, sorted.end(), back_inserter(g_popped), [](const ContigNode& c) {
217 return c.contigIndex();
218 });
219 }
220
221 static struct
222 {
223 unsigned bubbles;
224 unsigned popped;
225 unsigned scaffold;
226 unsigned notSimple;
227 unsigned tooLong;
228 unsigned tooMany;
229 unsigned dissimilar;
230 } g_count;
231
232 /** Contig sequences. */
233 typedef vector<const_string> Contigs;
234 static Contigs g_contigs;
235
236 /** Return the sequence of vertex u. */
237 static string
getSequence(const Graph * g,vertex_descriptor u)238 getSequence(const Graph* g, vertex_descriptor u)
239 {
240 size_t i = get(vertex_contig_index, *g, u);
241 assert(i < g_contigs.size());
242 string seq(g_contigs[i]);
243 return get(vertex_sense, *g, u) ? reverseComplement(seq) : seq;
244 }
245
246 /** Return the length of vertex v. */
247 static unsigned
getLength(const Graph * g,vertex_descriptor v)248 getLength(const Graph* g, vertex_descriptor v)
249 {
250 return (*g)[v].length;
251 }
252
253 /** Align the sequences of [first,last).
254 * @param t the vertex to the left of the bubble
255 * @param v the vertex to the right of the bubble
256 * @return the identity of the global alignment
257 */
258 template<typename It>
259 static float
getAlignmentIdentity(const Graph & g,vertex_descriptor t,vertex_descriptor v,It first,It last)260 getAlignmentIdentity(const Graph& g, vertex_descriptor t, vertex_descriptor v, It first, It last)
261 {
262 unsigned nbranches = distance(first, last);
263 vector<int> inDists(nbranches);
264 transform(
265 first, last, inDists.begin(), boost::lambda::bind(getDistance, boost::cref(g), t, _1));
266 vector<int> outDists(nbranches);
267 transform(
268 first, last, outDists.begin(), boost::lambda::bind(getDistance, boost::cref(g), _1, v));
269 vector<int> insertLens(nbranches);
270 transform(
271 first,
272 last,
273 insertLens.begin(),
274 boost::lambda::bind(getDistance, boost::cref(g), t, _1) +
275 boost::lambda::bind(getLength, &g, _1) +
276 boost::lambda::bind(getDistance, boost::cref(g), _1, v));
277
278 int max_in_overlap = -(*min_element(inDists.begin(), inDists.end()));
279 assert(max_in_overlap >= 0);
280 int max_out_overlap = -(*min_element(outDists.begin(), outDists.end()));
281 assert(max_out_overlap >= 0);
282 int min_insert_len = *min_element(insertLens.begin(), insertLens.end());
283 int max_insert_len = *max_element(insertLens.begin(), insertLens.end());
284
285 float max_identity = (float)(min_insert_len + max_in_overlap + max_out_overlap) /
286 (max_insert_len + max_in_overlap + max_out_overlap);
287 if (min_insert_len <= 0 || max_identity < opt::identity)
288 return max_identity;
289
290 vector<string> seqs(nbranches);
291 transform(first, last, seqs.begin(), boost::lambda::bind(getSequence, &g, _1));
292 for (unsigned i = 0; i < seqs.size(); i++) {
293 // Remove the overlapping sequence.
294 int n = seqs[i].size();
295 int l = -inDists[i], r = -outDists[i];
296 assert(n > l + r);
297 seqs[i] = seqs[i].substr(l, n - l - r);
298 }
299
300 unsigned matches, consensusSize;
301 tie(matches, consensusSize) = align(seqs);
302 return (float)(matches + max_in_overlap + max_out_overlap) /
303 (consensusSize + max_in_overlap + max_out_overlap);
304 }
305
306 /** Pop the specified bubble if it is a simple bubble.
307 * @return whether the bubble is popped
308 */
309 static bool
popSimpleBubble(Graph * pg,vertex_descriptor v)310 popSimpleBubble(Graph* pg, vertex_descriptor v)
311 {
312 Graph& g = *pg;
313 unsigned nbranches = g.out_degree(v);
314 assert(nbranches >= 2);
315 vertex_descriptor v1 = *g.adjacent_vertices(v).first;
316 if (g.out_degree(v1) != 1) {
317 #pragma omp atomic
318 g_count.notSimple++;
319 return false;
320 }
321 vertex_descriptor tail = *g.adjacent_vertices(v1).first;
322 if (v == get(vertex_complement, g, tail) // Palindrome
323 || g.in_degree(tail) != nbranches) {
324 #pragma omp atomic
325 g_count.notSimple++;
326 return false;
327 }
328
329 // Check that every branch is simple and ends at the same node.
330 pair<adjacency_iterator, adjacency_iterator> adj = g.adjacent_vertices(v);
331 for (adjacency_iterator it = adj.first; it != adj.second; ++it) {
332 if (g.out_degree(*it) != 1 || g.in_degree(*it) != 1) {
333 #pragma omp atomic
334 g_count.notSimple++;
335 return false;
336 }
337 if (*g.adjacent_vertices(*it).first != tail) {
338 // The branches do not merge back to the same node.
339 #pragma omp atomic
340 g_count.notSimple++;
341 return false;
342 }
343 }
344
345 if (opt::verbose > 2)
346 #pragma omp critical(cerr)
347 {
348 cerr << "\n* " << get(vertex_name, g, v) << " ->";
349 for (adjacency_iterator it = adj.first; it != adj.second; ++it)
350 cerr << ' ' << get(vertex_name, g, *it);
351 cerr << " -> " << get(vertex_name, g, tail) << '\n';
352 }
353
354 if (nbranches > opt::maxBranches) {
355 // Too many branches.
356 #pragma omp atomic
357 g_count.tooMany++;
358 if (opt::verbose > 1)
359 #pragma omp critical(cerr)
360 cerr << nbranches << " paths (too many)\n";
361 return false;
362 }
363
364 vector<unsigned> lengths(nbranches);
365 transform(adj.first, adj.second, lengths.begin(), [&g](const ContigNode& c) {
366 return getLength(&g, c);
367 });
368 unsigned minLength = *min_element(lengths.begin(), lengths.end());
369 unsigned maxLength = *max_element(lengths.begin(), lengths.end());
370 if (maxLength >= opt::maxLength) {
371 // This branch is too long.
372 #pragma omp atomic
373 g_count.tooLong++;
374 if (opt::verbose > 1)
375 #pragma omp critical(cerr)
376 cerr << minLength << '\t' << maxLength << "\t0\t(too long)\n";
377 return false;
378 }
379
380 float identity =
381 opt::identity == 0 ? 0 : getAlignmentIdentity(g, v, tail, adj.first, adj.second);
382 bool dissimilar = identity < opt::identity;
383 if (opt::verbose > 1)
384 #pragma omp critical(cerr)
385 cerr << minLength << '\t' << maxLength << '\t' << identity
386 << (dissimilar ? "\t(dissimilar)" : "") << '\n';
387 if (dissimilar) {
388 // Insufficient identity.
389 #pragma omp atomic
390 g_count.dissimilar++;
391 return false;
392 }
393
394 #pragma omp atomic
395 g_count.popped++;
396 popBubble(g, v, tail);
397 return true;
398 }
399
400 /** Add distances to a path. */
401 static ContigPath
addDistance(const Graph & g,const ContigPath & path)402 addDistance(const Graph& g, const ContigPath& path)
403 {
404 ContigPath out;
405 out.reserve(path.size());
406 ContigNode u = path.front();
407 out.push_back(u);
408 for (ContigPath::const_iterator it = path.begin() + 1; it != path.end(); ++it) {
409 ContigNode v = *it;
410 int distance = getDistance(g, u, v);
411 if (distance >= 0) {
412 int numN = distance + opt::k - 1; // by convention
413 assert(numN >= 0);
414 numN = max(numN, 1);
415 out.push_back(ContigNode(numN, 'N'));
416 }
417 out.push_back(v);
418 u = v;
419 }
420 return out;
421 }
422
423 /** Return the length of the longest path through the bubble. */
424 static int
longestPath(const Graph & g,const Bubble & topo)425 longestPath(const Graph& g, const Bubble& topo)
426 {
427 typedef graph_traits<Graph>::edge_descriptor E;
428 typedef graph_traits<Graph>::out_edge_iterator Eit;
429 typedef graph_traits<Graph>::vertex_descriptor V;
430
431 EdgeWeightMap<Graph> weight(g);
432 map<ContigNode, int> distance;
433 distance[topo.front()] = 0;
434 for (Bubble::const_iterator it = topo.begin(); it != topo.end(); ++it) {
435 V u = *it;
436 Eit eit, elast;
437 for (tie(eit, elast) = out_edges(u, g); eit != elast; ++eit) {
438 E e = *eit;
439 V v = target(e, g);
440 distance[v] = max(distance[v], distance[u] + weight[e]);
441 }
442 }
443 V v = topo.back();
444 return distance[v] - g[v].length;
445 }
446
447 /** Scaffold over the bubble between vertices u and w.
448 * Add an edge (u,w) with the distance property set to the length of
449 * the largest branch of the bubble.
450 */
451 static void
scaffoldBubble(Graph & g,const Bubble & bubble)452 scaffoldBubble(Graph& g, const Bubble& bubble)
453 {
454 typedef graph_traits<Graph>::vertex_descriptor V;
455 assert(opt::scaffold);
456 assert(bubble.size() > 2);
457
458 V u = bubble.front(), w = bubble.back();
459 if (edge(u, w, g).second) {
460 // Already scaffolded.
461 return;
462 }
463 assert(isBubble(g, bubble.begin(), bubble.end()));
464
465 assert(bubble.size() > 2);
466 size_t n = bubble.size() - 2;
467 g_popped.reserve(g_popped.size() + n);
468 for (Bubble::const_iterator it = bubble.begin() + 1; it != bubble.end() - 1; ++it)
469 g_popped.push_back(it->contigIndex());
470
471 add_edge(u, w, max(longestPath(g, bubble), 1), g);
472 }
473
474 /** Pop the specified bubble if it is simple, otherwise scaffold. */
475 static void
popOrScaffoldBubble(Graph & g,const Bubble & bubble)476 popOrScaffoldBubble(Graph& g, const Bubble& bubble)
477 {
478 #pragma omp atomic
479 g_count.bubbles++;
480 if (!popSimpleBubble(&g, bubble.front()) && opt::scaffold) {
481 #pragma omp atomic
482 g_count.scaffold++;
483 scaffoldBubble(g, bubble);
484 }
485 }
486
487 /** Return the length of the specified vertex in k-mer. */
488 static unsigned
getKmerLength(const ContigProperties & vp)489 getKmerLength(const ContigProperties& vp)
490 {
491 assert(vp.length >= opt::k);
492 return vp.length - opt::k + 1;
493 }
494
495 /** Return the mean k-mer coverage of the specified vertex. */
496 static float
getMeanCoverage(const ContigProperties & vp)497 getMeanCoverage(const ContigProperties& vp)
498 {
499 return (float)vp.coverage / getKmerLength(vp);
500 }
501
502 /** Remove contigs with insufficient coverage. */
503 static void
filterGraph(Graph & g)504 filterGraph(Graph& g)
505 {
506 typedef graph_traits<Graph> GTraits;
507 typedef GTraits::vertex_descriptor V;
508 typedef GTraits::vertex_iterator Vit;
509
510 unsigned removedContigs = 0, removedKmer = 0;
511 std::pair<Vit, Vit> urange = vertices(g);
512 for (Vit uit = urange.first; uit != urange.second; ++uit) {
513 V u = *uit;
514 if (get(vertex_removed, g, u))
515 continue;
516 const ContigProperties& vp = g[u];
517 if (getMeanCoverage(vp) < opt::minCoverage) {
518 removedContigs++;
519 removedKmer += getKmerLength(vp);
520 clear_vertex(u, g);
521 remove_vertex(u, g);
522 g_popped.push_back(get(vertex_contig_index, g, u));
523 }
524 }
525 if (opt::verbose > 0) {
526 cerr << "Removed " << removedKmer << " k-mer in " << removedContigs
527 << " contigs with mean k-mer coverage "
528 "less than "
529 << opt::minCoverage << ".\n";
530 printGraphStats(cerr, g);
531 }
532 }
533
534 /** Remove the specified contig from the adjacency graph. */
535 static void
removeContig(Graph * g,ContigID id)536 removeContig(Graph* g, ContigID id)
537 {
538 ContigNode v(id, false);
539 g->clear_vertex(v);
540 g->remove_vertex(v);
541 }
542
543 int
main(int argc,char ** argv)544 main(int argc, char** argv)
545 {
546 string commandLine;
547 {
548 ostringstream ss;
549 char** last = argv + argc - 1;
550 copy(argv, last, ostream_iterator<const char*>(ss, " "));
551 ss << *last;
552 commandLine = ss.str();
553 }
554
555 bool die = false;
556 for (int c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) {
557 istringstream arg(optarg != NULL ? optarg : "");
558 switch (c) {
559 case '?':
560 die = true;
561 break;
562 case 'a':
563 arg >> opt::maxBranches;
564 break;
565 case 'b':
566 arg >> opt::maxLength;
567 break;
568 case 'c':
569 arg >> opt::minCoverage;
570 break;
571 case 'g':
572 arg >> opt::graphPath;
573 break;
574 case 'j':
575 arg >> opt::threads;
576 break;
577 case 'k':
578 arg >> opt::k;
579 break;
580 case 'p':
581 arg >> opt::identity;
582 break;
583 case 'v':
584 opt::verbose++;
585 break;
586 case OPT_HELP:
587 cout << USAGE_MESSAGE;
588 exit(EXIT_SUCCESS);
589 case OPT_VERSION:
590 cout << VERSION_MESSAGE;
591 exit(EXIT_SUCCESS);
592 }
593 if (optarg != NULL && !arg.eof()) {
594 cerr << PROGRAM ": invalid option: `-" << (char)c << optarg << "'\n";
595 exit(EXIT_FAILURE);
596 }
597 }
598
599 if (opt::k <= 0) {
600 cerr << PROGRAM ": "
601 << "missing -k,--kmer option\n";
602 die = true;
603 }
604
605 if (argc - optind < 2) {
606 cerr << PROGRAM ": missing arguments\n";
607 die = true;
608 }
609
610 if (argc - optind > 2) {
611 cerr << PROGRAM ": too many arguments\n";
612 die = true;
613 }
614
615 if (die) {
616 cerr << "Try `" << PROGRAM << " --help' for more information.\n";
617 exit(EXIT_FAILURE);
618 }
619
620 const char* contigsPath(argv[optind++]);
621 string adjPath(argv[optind++]);
622
623 // Read the contig adjacency graph.
624 if (opt::verbose > 0)
625 cerr << "Reading `" << adjPath << "'...\n";
626 ifstream fin(adjPath.c_str());
627 assert_good(fin, adjPath);
628 Graph g;
629 fin >> g;
630 assert(fin.eof());
631 g_contigNames.lock();
632 if (opt::verbose > 0)
633 printGraphStats(cerr, g);
634
635 // Read the contigs.
636 Contigs& contigs = g_contigs;
637 if (opt::identity > 0) {
638 if (opt::verbose > 0)
639 cerr << "Reading `" << contigsPath << "'...\n";
640 FastaReader in(contigsPath, FastaReader::NO_FOLD_CASE);
641 for (FastaRecord rec; in >> rec;) {
642 if (g_contigNames.count(rec.id) == 0)
643 continue;
644 assert(contigs.size() == get(g_contigNames, rec.id));
645 contigs.push_back(rec.seq);
646 }
647 assert(in.eof());
648 assert(!contigs.empty());
649 opt::colourSpace = isdigit(contigs.front()[0]);
650 }
651
652 // Remove contigs with insufficient coverage.
653 if (opt::minCoverage > 0)
654 filterGraph(g);
655
656 if (opt::bubbleGraph)
657 cout << "digraph bubbles {\n";
658
659 Bubbles bubbles = discoverBubbles(g);
660 for (Bubbles::const_iterator it = bubbles.begin(); it != bubbles.end(); ++it)
661 popOrScaffoldBubble(g, *it);
662
663 // Each bubble should be identified twice. Remove the duplicate.
664 sort(g_popped.begin(), g_popped.end());
665 g_popped.erase(unique(g_popped.begin(), g_popped.end()), g_popped.end());
666
667 if (opt::bubbleGraph) {
668 cout << "}\n";
669 } else {
670 for (vector<ContigID>::const_iterator it = g_popped.begin(); it != g_popped.end(); ++it)
671 cout << get(g_contigNames, *it) << '\n';
672 }
673
674 if (opt::verbose > 0)
675 cerr << "Bubbles: " << (g_count.bubbles + 1) / 2 << " Popped: " << (g_count.popped + 1) / 2
676 << " Scaffolds: " << (g_count.scaffold + 1) / 2
677 << " Complex: " << (g_count.notSimple + 1) / 2
678 << " Too long: " << (g_count.tooLong + 1) / 2
679 << " Too many: " << (g_count.tooMany + 1) / 2
680 << " Dissimilar: " << (g_count.dissimilar + 1) / 2 << '\n';
681
682 if (!opt::graphPath.empty()) {
683 // Remove the popped contigs from the adjacency graph.
684 for_each(g_popped.begin(), g_popped.end(), [&g](const ContigID& c) {
685 return removeContig(&g, c);
686 });
687
688 // Assemble unambiguous paths.
689 g_contigNames.unlock();
690 typedef vector<ContigPath> ContigPaths;
691 ContigPaths paths;
692 size_t numContigs = num_vertices(g) / 2;
693 if (opt::scaffold) {
694 Graph gorig = g;
695 if (opt::ss)
696 assemble_stranded(g, back_inserter(paths));
697 else
698 assemble(g, back_inserter(paths));
699 for (ContigPaths::const_iterator it = paths.begin(); it != paths.end(); ++it) {
700 ContigNode u(numContigs + it - paths.begin(), false);
701 string name = createContigName();
702 put(vertex_name, g, u, name);
703 cout << name << '\t' << addDistance(gorig, *it) << '\n';
704 }
705 } else {
706 if (opt::ss)
707 assemble_stranded(g, back_inserter(paths));
708 else
709 assemble(g, back_inserter(paths));
710 for (ContigPaths::const_iterator it = paths.begin(); it != paths.end(); ++it) {
711 ContigNode u(numContigs + it - paths.begin(), false);
712 string name = createContigName();
713 put(vertex_name, g, u, name);
714 cout << name << '\t' << *it << '\n';
715 }
716 }
717 g_contigNames.lock();
718 paths.clear();
719
720 // Output the updated adjacency graph.
721 ofstream fout(opt::graphPath.c_str());
722 assert_good(fout, opt::graphPath);
723 write_graph(fout, g, PROGRAM, commandLine);
724 assert_good(fout, opt::graphPath);
725 if (opt::verbose > 0)
726 printGraphStats(cerr, g);
727 }
728
729 return 0;
730 }
731