1 /**
2  * de Bruijn Graph data structure using a Bloom filter
3  * Copyright 2013 Shaun Jackman
4  */
5 
6 #ifndef DBGBLOOM_H
7 #define DBGBLOOM_H 1
8 
9 #include "Assembly/SeqExt.h" // for NUM_BASES
10 #include "Common/IOUtil.h"
11 #include "Common/Kmer.h"
12 #include "Common/Uncompress.h"
13 #include "DataLayer/FastaReader.h"
14 #include "Graph/Properties.h"
15 
16 #include <algorithm>
17 #include <cassert>
18 #include <cstdlib> // for abort
19 #include <fstream>
20 #include <string>
21 
22 using boost::graph_traits;
23 
24 template <typename BF>
25 class DBGBloom: public BF {
26   public:
27 	/** The bundled vertex properties. */
28 	typedef no_property vertex_bundled;
29 	typedef no_property vertex_property_type;
30 
31 	/** The bundled edge properties. */
32 	typedef no_property edge_bundled;
33 	typedef no_property edge_property_type;
34 
35 	/** The bloom filter */
36 	const BF& m_bloom;
37 
DBGBloom(const BF & bloom)38 	DBGBloom(const BF& bloom) : m_bloom(bloom), m_depthThresh(0) { }
39 
DBGBloom(const BF & bloom,unsigned depthThresh)40 	DBGBloom(const BF& bloom, unsigned depthThresh) :
41 		m_bloom(bloom), m_depthThresh(depthThresh)  { }
42 
43 	const unsigned m_depthThresh;
44 
45   private:
46 	/** Copy constructor. */
47 	DBGBloom(const DBGBloom<BF>&);
48 
49 }; // class DBGBloom
50 
51 /** PropertyGraph vertex_index */
52 template <typename Graph>
53 struct DBGBloomIndexMap
54 	: boost::put_get_helper<size_t, Graph >
55 {
56 	typedef Kmer key_type;
57 	typedef size_t value_type;
58 	typedef value_type reference;
59 	typedef boost::readable_property_map_tag category;
60 
61 	const Graph& m_g;
62 
DBGBloomIndexMapDBGBloomIndexMap63 	DBGBloomIndexMap(const Graph& g) : m_g(g) { }
64 
65 	reference operator[](const key_type& u) const
66 	{
67 		return m_g.m_bloom.hash(u) % m_g.m_bloom.size();
68 	}
69 };
70 
71 // Graph
72 
73 namespace boost {
74 
75 /** Graph traits */
76 template <typename BF>
77 struct graph_traits< DBGBloom<BF> > {
78 	// Graph
79 	typedef Kmer vertex_descriptor;
80 	typedef boost::directed_tag directed_category;
81 	struct traversal_category
82 		: boost::adjacency_graph_tag,
83 		boost::bidirectional_graph_tag,
84 		boost::vertex_list_graph_tag
85 		{ };
86 	typedef boost::disallow_parallel_edge_tag edge_parallel_category;
87 
88 	static vertex_descriptor null_vertex() { return Kmer(); }
89 
90 	// IncidenceGraph
91 	typedef std::pair<vertex_descriptor, vertex_descriptor>
92 		edge_descriptor;
93 	typedef unsigned degree_size_type;
94 
95 	// VertexListGraph
96 	typedef size_t vertices_size_type;
97 	typedef void vertex_iterator;
98 
99 	// EdgeListGraph
100 	typedef size_t edges_size_type;
101 	typedef void edge_iterator;
102 
103 // AdjacencyGraph
104 
105 /** Iterate through the adjacent vertices of a vertex. */
106 struct adjacency_iterator
107 	: public std::iterator<std::input_iterator_tag, vertex_descriptor>
108 {
109 	/** Skip to the next edge that is present. */
110 	void next()
111 	{
112 		for (; m_i < NUM_BASES; ++m_i) {
113 			m_v.setLastBase(SENSE, m_i);
114 			if (vertex_exists(m_v, m_g))
115 				break;
116 		}
117 	}
118 
119   public:
120 	adjacency_iterator(const DBGBloom<BF>& g) : m_g(g), m_i(NUM_BASES) { }
121 
122 	adjacency_iterator(const DBGBloom<BF>& g, vertex_descriptor u)
123 		: m_g(g), m_v(u), m_i(0)
124 	{
125 		m_v.shift(SENSE);
126 		next();
127 	}
128 
129 	const vertex_descriptor& operator*() const
130 	{
131 		assert(m_i < NUM_BASES);
132 		return m_v;
133 	}
134 
135 	bool operator==(const adjacency_iterator& it) const
136 	{
137 		return m_i == it.m_i;
138 	}
139 
140 	bool operator!=(const adjacency_iterator& it) const
141 	{
142 		return !(*this == it);
143 	}
144 
145 	adjacency_iterator& operator++()
146 	{
147 		assert(m_i < NUM_BASES);
148 		++m_i;
149 		next();
150 		return *this;
151 	}
152 
153   private:
154 	const DBGBloom<BF>& m_g;
155 	vertex_descriptor m_v;
156 	short unsigned m_i;
157 }; // adjacency_iterator
158 
159 /** IncidenceGraph */
160 struct out_edge_iterator
161 	: public std::iterator<std::input_iterator_tag, edge_descriptor>
162 {
163 	/** Skip to the next edge that is present. */
164 	void next()
165 	{
166 		for (; m_i < NUM_BASES; ++m_i) {
167 			m_v.setLastBase(SENSE, m_i);
168 			if (vertex_exists(m_v, *m_g))
169 				break;
170 		}
171 	}
172 
173   public:
174 	out_edge_iterator() { }
175 
176 	out_edge_iterator(const DBGBloom<BF>& g) : m_g(&g), m_i(NUM_BASES) { }
177 
178 	out_edge_iterator(const DBGBloom<BF>& g, vertex_descriptor u)
179 		: m_g(&g), m_u(u), m_v(u), m_i(0)
180 	{
181 		m_v.shift(SENSE);
182 		next();
183 	}
184 
185 	edge_descriptor operator*() const
186 	{
187 		assert(m_i < NUM_BASES);
188 		return edge_descriptor(m_u, m_v);
189 	}
190 
191 	bool operator==(const out_edge_iterator& it) const
192 	{
193 		return m_i == it.m_i;
194 	}
195 
196 	bool operator!=(const out_edge_iterator& it) const
197 	{
198 		return !(*this == it);
199 	}
200 
201 	out_edge_iterator& operator++()
202 	{
203 		assert(m_i < NUM_BASES);
204 		++m_i;
205 		next();
206 		return *this;
207 	}
208 
209 	out_edge_iterator operator++(int)
210 	{
211 		out_edge_iterator it = *this;
212 		++*this;
213 		return it;
214 	}
215 
216   private:
217 	const DBGBloom<BF>* m_g;
218 	vertex_descriptor m_u;
219 	vertex_descriptor m_v;
220 	unsigned m_i;
221 }; // out_edge_iterator
222 
223 /** BidirectionalGraph */
224 struct in_edge_iterator
225 	: public std::iterator<std::input_iterator_tag, edge_descriptor>
226 {
227 	/** Skip to the next edge that is present. */
228 	void next()
229 	{
230 		for (; m_i < NUM_BASES; ++m_i) {
231 			m_v.setLastBase(ANTISENSE, m_i);
232 			if (vertex_exists(m_v, *m_g))
233 				break;
234 		}
235 	}
236 
237   public:
238 	in_edge_iterator() { }
239 
240 	in_edge_iterator(const DBGBloom<BF>& g) : m_g(&g), m_i(NUM_BASES) { }
241 
242 	in_edge_iterator(const DBGBloom<BF>& g, vertex_descriptor u)
243 		: m_g(&g), m_u(u), m_v(u), m_i(0)
244 	{
245 		m_v.shift(ANTISENSE);
246 		next();
247 	}
248 
249 	edge_descriptor operator*() const
250 	{
251 		assert(m_i < NUM_BASES);
252 		return edge_descriptor(m_v, m_u);
253 	}
254 
255 	bool operator==(const in_edge_iterator& it) const
256 	{
257 		return m_i == it.m_i;
258 	}
259 
260 	bool operator!=(const in_edge_iterator& it) const
261 	{
262 		return !(*this == it);
263 	}
264 
265 	in_edge_iterator& operator++()
266 	{
267 		assert(m_i < NUM_BASES);
268 		++m_i;
269 		next();
270 		return *this;
271 	}
272 
273 	in_edge_iterator operator++(int)
274 	{
275 		in_edge_iterator it = *this;
276 		++*this;
277 		return it;
278 	}
279 
280   private:
281 	const DBGBloom<BF>* m_g;
282 	vertex_descriptor m_u;
283 	vertex_descriptor m_v;
284 	unsigned m_i;
285 }; // in_edge_iterator
286 
287 }; // graph_traits<DBGBloom>
288 
289 } // namespace boost
290 
291 // Subgraph
292 
293 /** Return whether this vertex exists in the subgraph. */
294 template <typename Graph>
295 static inline bool
296 vertex_exists(typename graph_traits<Graph>::vertex_descriptor u, const Graph& g)
297 {
298 	return g.m_bloom[u] > g.m_depthThresh;
299 }
300 
301 template <typename Graph>
302 static inline
303 std::pair<typename graph_traits<Graph>::adjacency_iterator,
304 		typename graph_traits<Graph>::adjacency_iterator>
305 adjacent_vertices(
306 		typename graph_traits<Graph>::vertex_descriptor u, const Graph& g)
307 {
308 	typedef typename graph_traits<Graph>::adjacency_iterator adjacency_iterator;
309 	return std::make_pair(adjacency_iterator(g, u), adjacency_iterator(g));
310 }
311 
312 // IncidenceGraph
313 template <typename Graph>
314 static inline
315 typename graph_traits<Graph>::degree_size_type
316 out_degree(
317 		typename graph_traits<Graph>::vertex_descriptor u,
318 		const Graph& g)
319 {
320 	typedef typename graph_traits<Graph>::adjacency_iterator Ait;
321 	std::pair<Ait, Ait> adj = adjacent_vertices(u, g);
322 	return std::distance(adj.first, adj.second);
323 }
324 
325 template <typename Graph>
326 static inline typename
327 std::pair<typename graph_traits<Graph>::out_edge_iterator,
328 	typename graph_traits<Graph>::out_edge_iterator>
329 out_edges(
330 		typename graph_traits<Graph>::vertex_descriptor u,
331 		const Graph& g)
332 {
333 	typedef typename graph_traits<Graph>::out_edge_iterator Oit;
334 	return std::make_pair(Oit(g, u), Oit(g));
335 }
336 
337 // BidirectionalGraph
338 template <typename Graph>
339 static inline
340 typename graph_traits<Graph>::degree_size_type
341 in_degree(typename graph_traits<Graph>::vertex_descriptor u,
342 		const Graph& g)
343 {
344 	return out_degree(reverseComplement(u), g);
345 }
346 
347 template <typename Graph>
348 static inline
349 typename graph_traits<Graph>::degree_size_type
350 degree(typename graph_traits<Graph>::vertex_descriptor u,
351 	const Graph& g)
352 {
353 	return in_degree(u, g) + out_degree(u, g);
354 }
355 
356 template <typename Graph>
357 static inline
358 std::pair<typename graph_traits<Graph>::in_edge_iterator,
359 	typename graph_traits<Graph>::in_edge_iterator>
360 in_edges(
361 		typename graph_traits<Graph>::vertex_descriptor u,
362 		const Graph& g)
363 {
364 	typedef typename graph_traits<Graph>::in_edge_iterator Iit;
365 	return std::make_pair(Iit(g, u), Iit(g));
366 }
367 
368 // VertexListGraph
369 template <typename Graph>
370 static inline
371 typename graph_traits<Graph>::vertices_size_type
372 num_vertices(const Graph& g)
373 {
374 	return g.m_bloom.popcount();
375 }
376 
377 // PropertyGraph vertex_index
378 template<typename Graph>
379 static inline
380 DBGBloomIndexMap<Graph>
381 get(vertex_index_t, const Graph& g)
382 {
383 	return DBGBloomIndexMap<Graph>(g);
384 }
385 
386 template <typename Graph>
387 static inline
388 typename graph_traits<Graph>::vertices_size_type
389 get(vertex_index_t tag, const Graph& g,
390 		typename graph_traits<Graph>::vertex_descriptor u)
391 {
392 	return get(get(tag, g), u);
393 }
394 
395 // PropertyGraph
396 
397 /** Return the reverse complement of the specified k-mer. */
398 template <typename Graph>
399 static inline
400 typename graph_traits<Graph>::vertex_descriptor
401 get(vertex_complement_t, const Graph&,
402 		typename graph_traits<Graph>::vertex_descriptor u)
403 {
404 	return reverseComplement(u);
405 }
406 
407 /** Return the name of the specified vertex. */
408 template <typename Graph>
409 static inline
410 Kmer get(vertex_name_t, const Graph&,
411 		typename graph_traits<Graph>::vertex_descriptor u)
412 {
413 	return u;
414 }
415 
416 template <typename Graph>
417 static inline
418 bool
419 get(vertex_removed_t, const Graph&,
420 		typename graph_traits<Graph>::vertex_descriptor)
421 {
422 	return false;
423 }
424 
425 template <typename Graph>
426 static inline
427 no_property
428 get(vertex_bundle_t, const Graph&,
429 		typename graph_traits<Graph>::edge_descriptor)
430 {
431 	return no_property();
432 }
433 
434 template <typename Graph>
435 static inline
436 no_property
437 get(edge_bundle_t, const Graph&,
438 		typename graph_traits<Graph>::edge_descriptor)
439 {
440 	return no_property();
441 }
442 
443 #endif
444