1 /** 2 * de Bruijn Graph data structure using a Bloom filter 3 * Copyright 2013 Shaun Jackman 4 */ 5 6 #ifndef DBGBLOOM_H 7 #define DBGBLOOM_H 1 8 9 #include "Assembly/SeqExt.h" // for NUM_BASES 10 #include "Common/IOUtil.h" 11 #include "Common/Kmer.h" 12 #include "Common/Uncompress.h" 13 #include "DataLayer/FastaReader.h" 14 #include "Graph/Properties.h" 15 16 #include <algorithm> 17 #include <cassert> 18 #include <cstdlib> // for abort 19 #include <fstream> 20 #include <string> 21 22 using boost::graph_traits; 23 24 template <typename BF> 25 class DBGBloom: public BF { 26 public: 27 /** The bundled vertex properties. */ 28 typedef no_property vertex_bundled; 29 typedef no_property vertex_property_type; 30 31 /** The bundled edge properties. */ 32 typedef no_property edge_bundled; 33 typedef no_property edge_property_type; 34 35 /** The bloom filter */ 36 const BF& m_bloom; 37 DBGBloom(const BF & bloom)38 DBGBloom(const BF& bloom) : m_bloom(bloom), m_depthThresh(0) { } 39 DBGBloom(const BF & bloom,unsigned depthThresh)40 DBGBloom(const BF& bloom, unsigned depthThresh) : 41 m_bloom(bloom), m_depthThresh(depthThresh) { } 42 43 const unsigned m_depthThresh; 44 45 private: 46 /** Copy constructor. */ 47 DBGBloom(const DBGBloom<BF>&); 48 49 }; // class DBGBloom 50 51 /** PropertyGraph vertex_index */ 52 template <typename Graph> 53 struct DBGBloomIndexMap 54 : boost::put_get_helper<size_t, Graph > 55 { 56 typedef Kmer key_type; 57 typedef size_t value_type; 58 typedef value_type reference; 59 typedef boost::readable_property_map_tag category; 60 61 const Graph& m_g; 62 DBGBloomIndexMapDBGBloomIndexMap63 DBGBloomIndexMap(const Graph& g) : m_g(g) { } 64 65 reference operator[](const key_type& u) const 66 { 67 return m_g.m_bloom.hash(u) % m_g.m_bloom.size(); 68 } 69 }; 70 71 // Graph 72 73 namespace boost { 74 75 /** Graph traits */ 76 template <typename BF> 77 struct graph_traits< DBGBloom<BF> > { 78 // Graph 79 typedef Kmer vertex_descriptor; 80 typedef boost::directed_tag directed_category; 81 struct traversal_category 82 : boost::adjacency_graph_tag, 83 boost::bidirectional_graph_tag, 84 boost::vertex_list_graph_tag 85 { }; 86 typedef boost::disallow_parallel_edge_tag edge_parallel_category; 87 88 static vertex_descriptor null_vertex() { return Kmer(); } 89 90 // IncidenceGraph 91 typedef std::pair<vertex_descriptor, vertex_descriptor> 92 edge_descriptor; 93 typedef unsigned degree_size_type; 94 95 // VertexListGraph 96 typedef size_t vertices_size_type; 97 typedef void vertex_iterator; 98 99 // EdgeListGraph 100 typedef size_t edges_size_type; 101 typedef void edge_iterator; 102 103 // AdjacencyGraph 104 105 /** Iterate through the adjacent vertices of a vertex. */ 106 struct adjacency_iterator 107 : public std::iterator<std::input_iterator_tag, vertex_descriptor> 108 { 109 /** Skip to the next edge that is present. */ 110 void next() 111 { 112 for (; m_i < NUM_BASES; ++m_i) { 113 m_v.setLastBase(SENSE, m_i); 114 if (vertex_exists(m_v, m_g)) 115 break; 116 } 117 } 118 119 public: 120 adjacency_iterator(const DBGBloom<BF>& g) : m_g(g), m_i(NUM_BASES) { } 121 122 adjacency_iterator(const DBGBloom<BF>& g, vertex_descriptor u) 123 : m_g(g), m_v(u), m_i(0) 124 { 125 m_v.shift(SENSE); 126 next(); 127 } 128 129 const vertex_descriptor& operator*() const 130 { 131 assert(m_i < NUM_BASES); 132 return m_v; 133 } 134 135 bool operator==(const adjacency_iterator& it) const 136 { 137 return m_i == it.m_i; 138 } 139 140 bool operator!=(const adjacency_iterator& it) const 141 { 142 return !(*this == it); 143 } 144 145 adjacency_iterator& operator++() 146 { 147 assert(m_i < NUM_BASES); 148 ++m_i; 149 next(); 150 return *this; 151 } 152 153 private: 154 const DBGBloom<BF>& m_g; 155 vertex_descriptor m_v; 156 short unsigned m_i; 157 }; // adjacency_iterator 158 159 /** IncidenceGraph */ 160 struct out_edge_iterator 161 : public std::iterator<std::input_iterator_tag, edge_descriptor> 162 { 163 /** Skip to the next edge that is present. */ 164 void next() 165 { 166 for (; m_i < NUM_BASES; ++m_i) { 167 m_v.setLastBase(SENSE, m_i); 168 if (vertex_exists(m_v, *m_g)) 169 break; 170 } 171 } 172 173 public: 174 out_edge_iterator() { } 175 176 out_edge_iterator(const DBGBloom<BF>& g) : m_g(&g), m_i(NUM_BASES) { } 177 178 out_edge_iterator(const DBGBloom<BF>& g, vertex_descriptor u) 179 : m_g(&g), m_u(u), m_v(u), m_i(0) 180 { 181 m_v.shift(SENSE); 182 next(); 183 } 184 185 edge_descriptor operator*() const 186 { 187 assert(m_i < NUM_BASES); 188 return edge_descriptor(m_u, m_v); 189 } 190 191 bool operator==(const out_edge_iterator& it) const 192 { 193 return m_i == it.m_i; 194 } 195 196 bool operator!=(const out_edge_iterator& it) const 197 { 198 return !(*this == it); 199 } 200 201 out_edge_iterator& operator++() 202 { 203 assert(m_i < NUM_BASES); 204 ++m_i; 205 next(); 206 return *this; 207 } 208 209 out_edge_iterator operator++(int) 210 { 211 out_edge_iterator it = *this; 212 ++*this; 213 return it; 214 } 215 216 private: 217 const DBGBloom<BF>* m_g; 218 vertex_descriptor m_u; 219 vertex_descriptor m_v; 220 unsigned m_i; 221 }; // out_edge_iterator 222 223 /** BidirectionalGraph */ 224 struct in_edge_iterator 225 : public std::iterator<std::input_iterator_tag, edge_descriptor> 226 { 227 /** Skip to the next edge that is present. */ 228 void next() 229 { 230 for (; m_i < NUM_BASES; ++m_i) { 231 m_v.setLastBase(ANTISENSE, m_i); 232 if (vertex_exists(m_v, *m_g)) 233 break; 234 } 235 } 236 237 public: 238 in_edge_iterator() { } 239 240 in_edge_iterator(const DBGBloom<BF>& g) : m_g(&g), m_i(NUM_BASES) { } 241 242 in_edge_iterator(const DBGBloom<BF>& g, vertex_descriptor u) 243 : m_g(&g), m_u(u), m_v(u), m_i(0) 244 { 245 m_v.shift(ANTISENSE); 246 next(); 247 } 248 249 edge_descriptor operator*() const 250 { 251 assert(m_i < NUM_BASES); 252 return edge_descriptor(m_v, m_u); 253 } 254 255 bool operator==(const in_edge_iterator& it) const 256 { 257 return m_i == it.m_i; 258 } 259 260 bool operator!=(const in_edge_iterator& it) const 261 { 262 return !(*this == it); 263 } 264 265 in_edge_iterator& operator++() 266 { 267 assert(m_i < NUM_BASES); 268 ++m_i; 269 next(); 270 return *this; 271 } 272 273 in_edge_iterator operator++(int) 274 { 275 in_edge_iterator it = *this; 276 ++*this; 277 return it; 278 } 279 280 private: 281 const DBGBloom<BF>* m_g; 282 vertex_descriptor m_u; 283 vertex_descriptor m_v; 284 unsigned m_i; 285 }; // in_edge_iterator 286 287 }; // graph_traits<DBGBloom> 288 289 } // namespace boost 290 291 // Subgraph 292 293 /** Return whether this vertex exists in the subgraph. */ 294 template <typename Graph> 295 static inline bool 296 vertex_exists(typename graph_traits<Graph>::vertex_descriptor u, const Graph& g) 297 { 298 return g.m_bloom[u] > g.m_depthThresh; 299 } 300 301 template <typename Graph> 302 static inline 303 std::pair<typename graph_traits<Graph>::adjacency_iterator, 304 typename graph_traits<Graph>::adjacency_iterator> 305 adjacent_vertices( 306 typename graph_traits<Graph>::vertex_descriptor u, const Graph& g) 307 { 308 typedef typename graph_traits<Graph>::adjacency_iterator adjacency_iterator; 309 return std::make_pair(adjacency_iterator(g, u), adjacency_iterator(g)); 310 } 311 312 // IncidenceGraph 313 template <typename Graph> 314 static inline 315 typename graph_traits<Graph>::degree_size_type 316 out_degree( 317 typename graph_traits<Graph>::vertex_descriptor u, 318 const Graph& g) 319 { 320 typedef typename graph_traits<Graph>::adjacency_iterator Ait; 321 std::pair<Ait, Ait> adj = adjacent_vertices(u, g); 322 return std::distance(adj.first, adj.second); 323 } 324 325 template <typename Graph> 326 static inline typename 327 std::pair<typename graph_traits<Graph>::out_edge_iterator, 328 typename graph_traits<Graph>::out_edge_iterator> 329 out_edges( 330 typename graph_traits<Graph>::vertex_descriptor u, 331 const Graph& g) 332 { 333 typedef typename graph_traits<Graph>::out_edge_iterator Oit; 334 return std::make_pair(Oit(g, u), Oit(g)); 335 } 336 337 // BidirectionalGraph 338 template <typename Graph> 339 static inline 340 typename graph_traits<Graph>::degree_size_type 341 in_degree(typename graph_traits<Graph>::vertex_descriptor u, 342 const Graph& g) 343 { 344 return out_degree(reverseComplement(u), g); 345 } 346 347 template <typename Graph> 348 static inline 349 typename graph_traits<Graph>::degree_size_type 350 degree(typename graph_traits<Graph>::vertex_descriptor u, 351 const Graph& g) 352 { 353 return in_degree(u, g) + out_degree(u, g); 354 } 355 356 template <typename Graph> 357 static inline 358 std::pair<typename graph_traits<Graph>::in_edge_iterator, 359 typename graph_traits<Graph>::in_edge_iterator> 360 in_edges( 361 typename graph_traits<Graph>::vertex_descriptor u, 362 const Graph& g) 363 { 364 typedef typename graph_traits<Graph>::in_edge_iterator Iit; 365 return std::make_pair(Iit(g, u), Iit(g)); 366 } 367 368 // VertexListGraph 369 template <typename Graph> 370 static inline 371 typename graph_traits<Graph>::vertices_size_type 372 num_vertices(const Graph& g) 373 { 374 return g.m_bloom.popcount(); 375 } 376 377 // PropertyGraph vertex_index 378 template<typename Graph> 379 static inline 380 DBGBloomIndexMap<Graph> 381 get(vertex_index_t, const Graph& g) 382 { 383 return DBGBloomIndexMap<Graph>(g); 384 } 385 386 template <typename Graph> 387 static inline 388 typename graph_traits<Graph>::vertices_size_type 389 get(vertex_index_t tag, const Graph& g, 390 typename graph_traits<Graph>::vertex_descriptor u) 391 { 392 return get(get(tag, g), u); 393 } 394 395 // PropertyGraph 396 397 /** Return the reverse complement of the specified k-mer. */ 398 template <typename Graph> 399 static inline 400 typename graph_traits<Graph>::vertex_descriptor 401 get(vertex_complement_t, const Graph&, 402 typename graph_traits<Graph>::vertex_descriptor u) 403 { 404 return reverseComplement(u); 405 } 406 407 /** Return the name of the specified vertex. */ 408 template <typename Graph> 409 static inline 410 Kmer get(vertex_name_t, const Graph&, 411 typename graph_traits<Graph>::vertex_descriptor u) 412 { 413 return u; 414 } 415 416 template <typename Graph> 417 static inline 418 bool 419 get(vertex_removed_t, const Graph&, 420 typename graph_traits<Graph>::vertex_descriptor) 421 { 422 return false; 423 } 424 425 template <typename Graph> 426 static inline 427 no_property 428 get(vertex_bundle_t, const Graph&, 429 typename graph_traits<Graph>::edge_descriptor) 430 { 431 return no_property(); 432 } 433 434 template <typename Graph> 435 static inline 436 no_property 437 get(edge_bundle_t, const Graph&, 438 typename graph_traits<Graph>::edge_descriptor) 439 { 440 return no_property(); 441 } 442 443 #endif 444