1 #ifndef ASSEMBLY_DOTWRITER_H
2 #define ASSEMBLY_DOTWRITER_H 1
3 
4 /** Written by Shaun Jackman <sjackman@bcgsc.ca>. */
5 
6 #include "Common/UnorderedMap.h"
7 #include "Graph/ContigGraphAlgorithms.h"
8 #include <cassert>
9 #include <iostream>
10 #include <sstream>
11 
12 class DotWriter
13 {
14 private:
15 	typedef SequenceCollectionHash Graph;
16 	typedef graph_traits<Graph>::vertex_descriptor V;
17 	typedef graph_traits<Graph>::vertex_iterator Vit;
18 	typedef graph_traits<Graph>::adjacency_iterator Ait;
19 	typedef std::string VertexName;
20 
DotWriter()21 	DotWriter() : m_id(0) { }
22 
23 /** Complement the specified name. */
24 static std::string
complementName(std::string s)25 complementName(std::string s)
26 {
27 	char c = *s.rbegin();
28 	assert(c == '+' || c == '-');
29 	*s.rbegin() = c == '+' ? '-' : '+';
30 	return s;
31 }
32 
33 /** Return the name of the specified vertex. */
34 const VertexName&
getName(const V & u)35 getName(const V& u) const
36 {
37 	Names::const_iterator it = m_names.find(u);
38 	if (it == m_names.end())
39 		std::cerr << "error: cannot find vertex " << u << '\n';
40 	assert(it != m_names.end());
41 	return it->second;
42 }
43 
44 /** Set the name of the specified vertex. */
setName(const V & u,const VertexName & uname)45 void setName(const V& u, const VertexName& uname)
46 {
47 	std::pair<Names::const_iterator, bool> inserted
48 		= m_names.insert(Names::value_type(u, uname));
49 	if (!inserted.second)
50 		std::cerr << "error: duplicate vertex " << u << '\n';
51 	assert(inserted.second);
52 	(void)inserted;
53 }
54 
55 /** Return whether this vertex is contiguous and not palindromic. */
contiguousOut(const Graph & g,const V & u)56 bool contiguousOut(const Graph& g, const V& u)
57 {
58 	return contiguous_out(g, u)
59 		&& !u.isPalindrome()
60 		&& !u.isPalindrome(SENSE)
61 		&& !(*adjacent_vertices(u, g).first).isPalindrome();
62 }
63 
64 /** Return the in-adjacent vertex. */
adjacentVertexIn(const V & u,const Graph & g)65 V adjacentVertexIn(const V& u, const Graph& g)
66 {
67 	return source(*in_edges(u, g).first, g);
68 }
69 
70 /** Return whether this vertex is contiguous and not palindromic. */
contiguousIn(const Graph & g,const V & u)71 bool contiguousIn(const Graph& g, const V& u)
72 {
73 	return contiguous_in(g, u)
74 		&& !u.isPalindrome()
75 		&& !u.isPalindrome(ANTISENSE)
76 		&& !adjacentVertexIn(u, g).isPalindrome();
77 }
78 
79 /** Write out the specified contig. */
writeContig(std::ostream & out,const Graph & g,const V & u)80 void writeContig(std::ostream& out, const Graph& g, const V& u)
81 {
82 	unsigned n = 0;
83 	unsigned c = 0;
84 	V v;
85 	for (v = u; contiguousOut(g, v); v = *adjacent_vertices(v, g).first) {
86 		++n;
87 		c += get(vertex_coverage, g, v);
88 	}
89 	++n;
90 	c += get(vertex_coverage, g, v);
91 
92 	// Output the canonical orientation of the contig.
93 	V vrc = get(vertex_complement, g, v);
94 	if (vrc < u)
95 		return;
96 
97 	std::ostringstream ss;
98 	ss << m_id << '+';
99 	VertexName uname = ss.str();
100 	VertexName vname(uname);
101 	*vname.rbegin() = '-';
102 	++m_id;
103 
104 	// Reorient the contig to agree with assembleContig.
105 	bool rc;
106 	Graph::const_iterator it = g.find(u, rc);
107 	assert(it != g.end());
108 	(void)it;
109 	if (rc)
110 		std::swap(uname, vname);
111 
112 	setName(u, uname);
113 	if (u == vrc) {
114 		// Palindrome
115 		assert(n == 1);
116 	} else
117 		setName(vrc, vname);
118 
119 	unsigned l = n + V::length() - 1;
120 	out << '"' << uname << "\" [l=" << l << " C=" << c << "]\n"
121 		"\"" << vname << "\" [l=" << l << " C=" << c << "]\n";
122 }
123 
124 /** Write out the contigs that split at the specified sequence. */
125 void
writeEdges(std::ostream & out,const Graph & g,const V & u,const VertexName & uname)126 writeEdges(std::ostream& out, const Graph& g,
127 		const V& u, const VertexName& uname) const
128 {
129 	if (out_degree(u, g) == 0)
130 		return;
131 	out << '"' << uname << "\" -> {";
132 	std::pair<Ait, Ait> adj = adjacent_vertices(u, g);
133 	for (Ait vit = adj.first; vit != adj.second; ++vit) {
134 		V v = *vit;
135 		const VertexName& vname = getName(v);
136 		out << " \"" << vname << '"';
137 		if (v.isPalindrome())
138 			out << " \"" << complementName(vname) << '"';
139 	}
140 	out << " }\n";
141 }
142 
143 /** Output the edges of the specified vertex. */
144 void
writeEdges(std::ostream & out,const Graph & g,const V & u)145 writeEdges(std::ostream& out, const Graph& g, const V& u) const
146 {
147 	std::string uname = complementName(getName(get(vertex_complement, g, u)));
148 	writeEdges(out, g, u, uname);
149 	if (u.isPalindrome()) {
150 		uname = complementName(uname);
151 		writeEdges(out, g, u, uname);
152 	}
153 }
154 
155 /** Write out a dot graph for the specified collection. */
writeGraph(std::ostream & out,const Graph & g)156 void writeGraph(std::ostream& out, const Graph& g)
157 {
158 	out << "digraph g {\n"
159 		"graph [k=" << V::length() << "]\n"
160 		"edge [d=" << -int(V::length() - 1) << "]\n";
161 	std::pair<Vit, Vit> uits = vertices(g);
162 
163 	// Output the vertices.
164 	for (Vit uit = uits.first; uit != uits.second; ++uit) {
165 		V u = *uit;
166 		if (get(vertex_removed, g, u))
167 			continue;
168 		if (!contiguousIn(g, u))
169 			writeContig(out, g, u);
170 		// Skip the second occurence of the palindrome.
171 		if (u.isPalindrome()) {
172 			assert(uit != uits.second);
173 			++uit;
174 		}
175 	}
176 
177 	// Output the edges.
178 	for (Vit uit = uits.first; uit != uits.second; ++uit) {
179 		V u = *uit;
180 		if (get(vertex_removed, g, u))
181 			continue;
182 		if (!contiguousOut(g, u))
183 			writeEdges(out, g, u);
184 		// Skip the second occurence of the palindrome.
185 		if (u.isPalindrome()) {
186 			assert(uit != uits.second);
187 			++uit;
188 		}
189 	}
190 
191 	out << "}" << std::endl;
192 }
193 
194 public:
195 
196 /** Write out a dot graph for the specified collection. */
197 static
write(std::ostream & out,const Graph & g)198 void write(std::ostream& out, const Graph& g)
199 {
200 	DotWriter dotWriter;
201 	dotWriter.writeGraph(out, g);
202 }
203 
204 private:
205 	typedef unordered_map<V, VertexName> Names;
206 
207 	/** A map of terminal k-mers to contig names. */
208 	Names m_names;
209 
210 	/** The current contig name. */
211 	unsigned m_id;
212 };
213 
214 #endif
215