1 #ifndef ASSEMBLY_DOTWRITER_H
2 #define ASSEMBLY_DOTWRITER_H 1
3
4 /** Written by Shaun Jackman <sjackman@bcgsc.ca>. */
5
6 #include "Common/UnorderedMap.h"
7 #include "Graph/ContigGraphAlgorithms.h"
8 #include <cassert>
9 #include <iostream>
10 #include <sstream>
11
12 class DotWriter
13 {
14 private:
15 typedef SequenceCollectionHash Graph;
16 typedef graph_traits<Graph>::vertex_descriptor V;
17 typedef graph_traits<Graph>::vertex_iterator Vit;
18 typedef graph_traits<Graph>::adjacency_iterator Ait;
19 typedef std::string VertexName;
20
DotWriter()21 DotWriter() : m_id(0) { }
22
23 /** Complement the specified name. */
24 static std::string
complementName(std::string s)25 complementName(std::string s)
26 {
27 char c = *s.rbegin();
28 assert(c == '+' || c == '-');
29 *s.rbegin() = c == '+' ? '-' : '+';
30 return s;
31 }
32
33 /** Return the name of the specified vertex. */
34 const VertexName&
getName(const V & u)35 getName(const V& u) const
36 {
37 Names::const_iterator it = m_names.find(u);
38 if (it == m_names.end())
39 std::cerr << "error: cannot find vertex " << u << '\n';
40 assert(it != m_names.end());
41 return it->second;
42 }
43
44 /** Set the name of the specified vertex. */
setName(const V & u,const VertexName & uname)45 void setName(const V& u, const VertexName& uname)
46 {
47 std::pair<Names::const_iterator, bool> inserted
48 = m_names.insert(Names::value_type(u, uname));
49 if (!inserted.second)
50 std::cerr << "error: duplicate vertex " << u << '\n';
51 assert(inserted.second);
52 (void)inserted;
53 }
54
55 /** Return whether this vertex is contiguous and not palindromic. */
contiguousOut(const Graph & g,const V & u)56 bool contiguousOut(const Graph& g, const V& u)
57 {
58 return contiguous_out(g, u)
59 && !u.isPalindrome()
60 && !u.isPalindrome(SENSE)
61 && !(*adjacent_vertices(u, g).first).isPalindrome();
62 }
63
64 /** Return the in-adjacent vertex. */
adjacentVertexIn(const V & u,const Graph & g)65 V adjacentVertexIn(const V& u, const Graph& g)
66 {
67 return source(*in_edges(u, g).first, g);
68 }
69
70 /** Return whether this vertex is contiguous and not palindromic. */
contiguousIn(const Graph & g,const V & u)71 bool contiguousIn(const Graph& g, const V& u)
72 {
73 return contiguous_in(g, u)
74 && !u.isPalindrome()
75 && !u.isPalindrome(ANTISENSE)
76 && !adjacentVertexIn(u, g).isPalindrome();
77 }
78
79 /** Write out the specified contig. */
writeContig(std::ostream & out,const Graph & g,const V & u)80 void writeContig(std::ostream& out, const Graph& g, const V& u)
81 {
82 unsigned n = 0;
83 unsigned c = 0;
84 V v;
85 for (v = u; contiguousOut(g, v); v = *adjacent_vertices(v, g).first) {
86 ++n;
87 c += get(vertex_coverage, g, v);
88 }
89 ++n;
90 c += get(vertex_coverage, g, v);
91
92 // Output the canonical orientation of the contig.
93 V vrc = get(vertex_complement, g, v);
94 if (vrc < u)
95 return;
96
97 std::ostringstream ss;
98 ss << m_id << '+';
99 VertexName uname = ss.str();
100 VertexName vname(uname);
101 *vname.rbegin() = '-';
102 ++m_id;
103
104 // Reorient the contig to agree with assembleContig.
105 bool rc;
106 Graph::const_iterator it = g.find(u, rc);
107 assert(it != g.end());
108 (void)it;
109 if (rc)
110 std::swap(uname, vname);
111
112 setName(u, uname);
113 if (u == vrc) {
114 // Palindrome
115 assert(n == 1);
116 } else
117 setName(vrc, vname);
118
119 unsigned l = n + V::length() - 1;
120 out << '"' << uname << "\" [l=" << l << " C=" << c << "]\n"
121 "\"" << vname << "\" [l=" << l << " C=" << c << "]\n";
122 }
123
124 /** Write out the contigs that split at the specified sequence. */
125 void
writeEdges(std::ostream & out,const Graph & g,const V & u,const VertexName & uname)126 writeEdges(std::ostream& out, const Graph& g,
127 const V& u, const VertexName& uname) const
128 {
129 if (out_degree(u, g) == 0)
130 return;
131 out << '"' << uname << "\" -> {";
132 std::pair<Ait, Ait> adj = adjacent_vertices(u, g);
133 for (Ait vit = adj.first; vit != adj.second; ++vit) {
134 V v = *vit;
135 const VertexName& vname = getName(v);
136 out << " \"" << vname << '"';
137 if (v.isPalindrome())
138 out << " \"" << complementName(vname) << '"';
139 }
140 out << " }\n";
141 }
142
143 /** Output the edges of the specified vertex. */
144 void
writeEdges(std::ostream & out,const Graph & g,const V & u)145 writeEdges(std::ostream& out, const Graph& g, const V& u) const
146 {
147 std::string uname = complementName(getName(get(vertex_complement, g, u)));
148 writeEdges(out, g, u, uname);
149 if (u.isPalindrome()) {
150 uname = complementName(uname);
151 writeEdges(out, g, u, uname);
152 }
153 }
154
155 /** Write out a dot graph for the specified collection. */
writeGraph(std::ostream & out,const Graph & g)156 void writeGraph(std::ostream& out, const Graph& g)
157 {
158 out << "digraph g {\n"
159 "graph [k=" << V::length() << "]\n"
160 "edge [d=" << -int(V::length() - 1) << "]\n";
161 std::pair<Vit, Vit> uits = vertices(g);
162
163 // Output the vertices.
164 for (Vit uit = uits.first; uit != uits.second; ++uit) {
165 V u = *uit;
166 if (get(vertex_removed, g, u))
167 continue;
168 if (!contiguousIn(g, u))
169 writeContig(out, g, u);
170 // Skip the second occurence of the palindrome.
171 if (u.isPalindrome()) {
172 assert(uit != uits.second);
173 ++uit;
174 }
175 }
176
177 // Output the edges.
178 for (Vit uit = uits.first; uit != uits.second; ++uit) {
179 V u = *uit;
180 if (get(vertex_removed, g, u))
181 continue;
182 if (!contiguousOut(g, u))
183 writeEdges(out, g, u);
184 // Skip the second occurence of the palindrome.
185 if (u.isPalindrome()) {
186 assert(uit != uits.second);
187 ++uit;
188 }
189 }
190
191 out << "}" << std::endl;
192 }
193
194 public:
195
196 /** Write out a dot graph for the specified collection. */
197 static
write(std::ostream & out,const Graph & g)198 void write(std::ostream& out, const Graph& g)
199 {
200 DotWriter dotWriter;
201 dotWriter.writeGraph(out, g);
202 }
203
204 private:
205 typedef unordered_map<V, VertexName> Names;
206
207 /** A map of terminal k-mers to contig names. */
208 Names m_names;
209
210 /** The current contig name. */
211 unsigned m_id;
212 };
213
214 #endif
215