1 
2 #ifndef _EXTENDPATH_H_
3 #define _EXTENDPATH_H_
4 
5 #include "Graph/Path.h"
6 #include "Common/UnorderedSet.h"
7 #include "Common/UnorderedMap.h"
8 #include "Common/Hash.h"
9 #include <boost/graph/graph_traits.hpp>
10 #include <boost/graph/graph_concepts.hpp>
11 #include <cassert>
12 #include <cstdio>
13 #include <iostream>
14 #include <algorithm>
15 
16 /**
17  * Parameters for path extension.
18  */
19 struct ExtendPathParams
20 {
21 	/* ignore branches shorter than or equal to this length */
22 	unsigned trimLen;
23 	/* longest branch of Bloom filter false positives */
24 	unsigned fpTrim;
25 	/* maximum length after extension */
26 	unsigned maxLen;
27 	/*
28 	 * if true, multiple incoming branches > trimLen
29 	 * will cause a path extension to halt
30 	 */
31 	bool lookBehind;
32 	/*
33 	 * If false, ignore incoming branches for the starting vertex.
34 	 * This is useful when when we are intentionally starting our
35 	 * path extension from a branching point.
36 	 */
37     bool lookBehindStartVertex;
38 
39 	/* constructor */
ExtendPathParamsExtendPathParams40 	ExtendPathParams() : trimLen(0), fpTrim(0), maxLen(NO_LIMIT), lookBehind(true),
41 		lookBehindStartVertex(true) {}
42 };
43 
44 /**
45  * The result of attempting to extend a path.
46  */
47 enum PathExtensionResultCode {
48 	/** stopped path extension at a vertex with multiple incoming branches */
49 	ER_AMBI_IN,
50 	/** stopped path extension at a vertex with multiple outgoing branches */
51     ER_AMBI_OUT,
52 	/** stopped path extension at a vertex with no outgoing branches */
53 	ER_DEAD_END,
54 	/** stopped path extension after completing a cycle */
55 	ER_CYCLE,
56 	/** stopped path extension at caller-specified length limit */
57 	ER_LENGTH_LIMIT,
58 };
59 
60 /**
61  * Translate path extension result code to a string.
62  */
pathExtensionResultStr(PathExtensionResultCode result)63 static inline const char* pathExtensionResultStr(PathExtensionResultCode result)
64 {
65 	switch(result) {
66 	case ER_AMBI_IN:
67 		return "AMBI_IN";
68 	case ER_AMBI_OUT:
69 		return "AMBI_OUT";
70 	case ER_DEAD_END:
71 		return "DEAD_END";
72 	case ER_CYCLE:
73 		return "CYCLE";
74 	case ER_LENGTH_LIMIT:
75 		return "LENGTH_LIMIT";
76 	default:
77 		assert(false);
78 	}
79 }
80 
81 /** length of path extension (in vertices) and reason for stopping */
82 typedef std::pair<unsigned, PathExtensionResultCode> PathExtensionResult;
83 
84 /**
85  * Return true if there is a path of at least depthLimit vertices
86  * that extends from given vertex u, otherwise return false.
87  * Implemented using a bounded depth first search.
88  *
89  * @param start starting vertex for traversal
90  * @param dir direction for traversal (FORWARD or REVERSE)
91  * @param depth depth of current vertex u
92  * @param depthLimit maximum depth to probe
93  * @param g graph to use for traversal
94  * @param visited vertices that have already been visited by the DFS
95  * @return true if at least one path with length >= len
96  * extends from v in direction dir, false otherwise
97  */
98 template <class Graph>
lookAhead(const typename boost::graph_traits<Graph>::vertex_descriptor & u,Direction dir,unsigned depth,unsigned depthLimit,unordered_set<typename boost::graph_traits<Graph>::vertex_descriptor,hash<typename boost::graph_traits<Graph>::vertex_descriptor>> & visited,const Graph & g)99 static inline bool lookAhead(
100 	const typename boost::graph_traits<Graph>::vertex_descriptor& u,
101 	Direction dir, unsigned depth, unsigned depthLimit,
102 	unordered_set< typename boost::graph_traits<Graph>::vertex_descriptor,
103 	hash<typename boost::graph_traits<Graph>::vertex_descriptor> >& visited, const Graph& g)
104 {
105     typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
106     typedef typename boost::graph_traits<Graph>::out_edge_iterator OutEdgeIter;
107     typedef typename boost::graph_traits<Graph>::in_edge_iterator InEdgeIter;
108 
109 	OutEdgeIter oei, oei_end;
110 	InEdgeIter iei, iei_end;
111 
112 	visited.insert(u);
113 	if (depth >= depthLimit)
114 		return true;
115 
116 	if (dir == FORWARD) {
117 		for (boost::tie(oei, oei_end) = out_edges(u, g);
118 			oei != oei_end; ++oei) {
119 			const V& v = target(*oei, g);
120 			if (visited.find(v) == visited.end()) {
121 				if(lookAhead(v, dir, depth+1, depthLimit, visited, g))
122 					return true;
123 			}
124 		}
125 	} else {
126 		assert(dir == REVERSE);
127 		for (boost::tie(iei, iei_end) = in_edges(u, g);
128 			 iei != iei_end; ++iei) {
129 			const V& v = source(*iei, g);
130 			if (visited.find(v) == visited.end()) {
131 				if(lookAhead(v, dir, depth+1, depthLimit, visited, g))
132 					return true;
133 			}
134 		}
135 	}
136 
137 	return false;
138 }
139 
140 /**
141  * Return true if there is a path of at least 'depth' vertices
142  * that extends from given vertex v, otherwise return false.
143  * Implemented using a bounded depth first search.
144  *
145  * @param start starting vertex for traversal
146  * @param dir direction for traversal (FORWARD or REVERSE)
147  * @param depth length of path to test for
148  * @param g graph to use for traversal
149  * @return true if at least one path with length >= len
150  * extends from v in direction dir, false otherwise
151  */
152 template <class Graph>
lookAhead(const typename boost::graph_traits<Graph>::vertex_descriptor & start,Direction dir,unsigned depth,const Graph & g)153 static inline bool lookAhead(
154 	const typename boost::graph_traits<Graph>::vertex_descriptor& start,
155 	Direction dir, unsigned depth, const Graph& g)
156 {
157 	typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
158 	unordered_set< V, hash<V> > visited;
159 	return lookAhead(start, dir, 0, depth, visited, g);
160 }
161 
162 /**
163  * Return true if the given edge represents the beginning of a "true branch".
164  *
165  * A path is a true branch if it has length >= `trim` or terminates in a
166  * branching node, where a branching node is (recursively) defined to be
167  * a node with either >= 2 incoming true branches or >= 2 outgoing true branches.
168  *
169  * This method is similar to `lookAhead`, but it additionally changes traversal
170  * direction when a dead-end is encountered.
171  */
172 template <class Graph>
trueBranch(const typename boost::graph_traits<Graph>::edge_descriptor & e,unsigned depth,Direction dir,const Graph & g,unsigned trim,unsigned fpTrim,unordered_set<typename boost::graph_traits<Graph>::vertex_descriptor> & visited)173 static inline bool trueBranch(
174 	const typename boost::graph_traits<Graph>::edge_descriptor& e,
175 	unsigned depth, Direction dir, const Graph& g, unsigned trim,
176 	unsigned fpTrim, unordered_set<typename boost::graph_traits<Graph>::vertex_descriptor>& visited)
177 {
178 	typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
179 
180 	typename boost::graph_traits<Graph>::out_edge_iterator oei, oei_end;
181 	typename boost::graph_traits<Graph>::in_edge_iterator iei, iei_end;
182 
183 	const V& u = (dir == FORWARD) ? source(e, g) : target(e, g);
184 	const V& v = (dir == FORWARD) ? target(e, g) : source(e, g);
185 
186 	/* branches with bubbles/cycles are considered true branches */
187 	if (visited.find(v) != visited.end())
188 		return true;
189 
190 	if (depth >= trim)
191 		return true;
192 
193 	visited.insert(v);
194 
195 	if (dir == FORWARD) {
196 		for (boost::tie(oei, oei_end) = out_edges(v, g);
197 			oei != oei_end; ++oei) {
198 			if (trueBranch(*oei, depth+1, FORWARD, g, trim, fpTrim, visited))
199 				return true;
200 		}
201 		/*
202 		 * Note: The test for depth/lookAhead >= fpTrim before changing
203 		 * traversal direction is needed to deal with an X-shaped
204 		 * graph pattern that is frequently created by Bloom false positives.
205 		 * See the test for `trueBranch` in `ExtendPathTest.h` for an example.
206 		 */
207 		if (depth >= fpTrim || lookAhead(v, FORWARD, fpTrim, g)) {
208 			for (boost::tie(iei, iei_end) = in_edges(v, g);
209 				iei != iei_end; ++iei) {
210 				if (source(*iei, g) == u)
211 					continue;
212 				if (trueBranch(*iei, 0, REVERSE, g, trim, fpTrim, visited))
213 					return true;
214 			}
215 		}
216 	} else {
217 		assert(dir == REVERSE);
218 		for (boost::tie(iei, iei_end) = in_edges(v, g);
219 			iei != iei_end; ++iei) {
220 			if (trueBranch(*iei, depth+1, REVERSE, g, trim, fpTrim, visited))
221 				return true;
222 		}
223 		/*
224 		 * Note: The test for depth/lookAhead >= fpTrim before changing
225 		 * traversal direction is needed to deal with an X-shaped
226 		 * graph pattern that is frequently created by Bloom false positives.
227 		 * See the test for `trueBranch` in `ExtendPathTest.h` for an example.
228 		 */
229 		if (depth >= fpTrim || lookAhead(v, REVERSE, fpTrim, g)) {
230 			for (boost::tie(oei, oei_end) = out_edges(v, g);
231 				 oei != oei_end; ++oei) {
232 				if (target(*oei, g) == u)
233 					continue;
234 				if (trueBranch(*oei, 0, FORWARD, g, trim, fpTrim, visited))
235 					return true;
236 			}
237 		}
238 	}
239 
240 	visited.erase(v);
241 
242 	return false;
243 }
244 
245 /**
246  * Return true if the given edge represents the start of a "true branch".
247  * Roughly speaking, a path is a true branch if it has length >= trim
248  * or terminates in a branching node, where a branching node is (recursively)
249  * defined to be a node with either >= 2 incoming true branches or >= outgoing
250  * true branches.
251  */
252 template <class Graph>
trueBranch(const typename boost::graph_traits<Graph>::edge_descriptor & e,Direction dir,const Graph & g,unsigned trim,unsigned fpTrim)253 static inline bool trueBranch(
254 	const typename boost::graph_traits<Graph>::edge_descriptor& e,
255 	Direction dir, const Graph& g, unsigned trim, unsigned fpTrim)
256 {
257 	typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
258 	unordered_set<V> visited;
259 	return trueBranch(e, 0, dir, g, trim, fpTrim, visited);
260 }
261 
262 /**
263  * Return neighbour vertices that begin branches that are longer than trimLen.
264  *
265  * @param u root vertex
266  * @param dir direction for neighbours (FORWARD or REVERSE)
267  * @param g graph
268  * @param trimLen ignore all branches less than or equal to this length
269  * @return std::vector of neighbour vertices that start branches that are
270  * greater than trimLen vertices in length
271  */
272 template <class BidirectionalGraph>
273 static inline std::vector<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor>
trueBranches(const typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor & u,Direction dir,const BidirectionalGraph & g,unsigned trim,unsigned fpTrim)274 trueBranches(const typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor& u,
275 	Direction dir, const BidirectionalGraph& g, unsigned trim, unsigned fpTrim)
276 {
277 	typedef BidirectionalGraph G;
278 	typedef boost::graph_traits<G> graph_traits;
279 	typedef typename graph_traits::vertex_descriptor V;
280 
281 	typename graph_traits::out_edge_iterator oei, oei_end;
282 	typename graph_traits::in_edge_iterator iei, iei_end;
283 
284 	std::vector<V> branchRoots;
285 
286 	if (dir == FORWARD) {
287 		for (boost::tie(oei, oei_end) = out_edges(u, g);
288 			oei != oei_end; ++oei) {
289 			const V& v = target(*oei, g);
290 			if (trueBranch(*oei, dir, g, trim, fpTrim))
291 				branchRoots.push_back(v);
292 		}
293 	} else {
294 		assert(dir == REVERSE);
295 		for (boost::tie(iei, iei_end) = in_edges(u, g);
296 			iei != iei_end; ++iei) {
297 			const V& v = source(*iei, g);
298 			if (trueBranch(*iei, dir, g, trim, fpTrim)) {
299 				branchRoots.push_back(v);
300 			}
301 		}
302 	}
303 
304 	return branchRoots;
305 }
306 
307 /**
308  * Return the unique predecessor/successor of a given vertex. In
309  * cases where a predecessor/successor does not exist (i.e. a dead end)
310  * or is not unique (i.e. a branching point), return an result code indicating
311  * why a unique successor could not be returned.
312  */
313 template <class Graph>
314 static inline std::pair<typename boost::graph_traits<Graph>::vertex_descriptor,
315 	PathExtensionResultCode>
successor(const typename boost::graph_traits<Graph>::vertex_descriptor & u,Direction dir,const Graph & g,unsigned trim,unsigned fpTrim)316 successor(const typename boost::graph_traits<Graph>::vertex_descriptor& u,
317 	Direction dir, const Graph& g, unsigned trim, unsigned fpTrim)
318 {
319 	typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
320 	typedef typename boost::graph_traits<Graph>::in_edge_iterator InEdgeIt;
321 	typedef typename boost::graph_traits<Graph>::out_edge_iterator OutEdgeIt;
322 
323 	InEdgeIt iei, iei_end;
324     OutEdgeIt oei, oei_end;
325 
326 	/* assign u to suppress uninitialized warning */
327 	V v = u;
328 	for (unsigned i = 0; true; i = (i == 0) ? 1 : std::min(trim, 2*i))
329 	{
330 		unsigned trueBranches = 0;
331 
332 		if (dir == FORWARD) {
333 			for (boost::tie(oei, oei_end) = out_edges(u, g); oei != oei_end; ++oei) {
334 				if (trueBranch(*oei, FORWARD, g, i, fpTrim)) {
335 					v = target(*oei, g);
336 					++trueBranches;
337 					if (trueBranches >= 2)
338 						break;
339 				}
340 			}
341 		} else {
342 			assert(dir == REVERSE);
343 			for (boost::tie(iei, iei_end) = in_edges(u, g); iei != iei_end; ++iei) {
344 				if (trueBranch(*iei, REVERSE, g, i, fpTrim)) {
345 					v = source(*iei, g);
346 					++trueBranches;
347 					if (trueBranches >= 2)
348 						break;
349 				}
350 			}
351 		}
352 
353 		if (trueBranches == 0)
354 			return std::make_pair(v, ER_DEAD_END);
355 		else if (trueBranches == 1)
356 			return std::make_pair(v, ER_LENGTH_LIMIT);
357 		else if (i == trim)
358 			return std::make_pair(v, ER_AMBI_OUT);
359 	}
360 
361 }
362 
363 /**
364  * Return true if the given vertex has more than one possible
365  * predecessor/successor in the graph.
366  */
367 template <class Graph>
368 static inline bool
ambiguous(const typename boost::graph_traits<Graph>::vertex_descriptor & u,Direction dir,const Graph & g,unsigned trim,unsigned fpTrim)369 ambiguous(const typename boost::graph_traits<Graph>::vertex_descriptor& u,
370 	Direction dir, const Graph& g, unsigned trim, unsigned fpTrim)
371 {
372 	return successor(u, dir, g, trim, fpTrim).second == ER_AMBI_OUT;
373 }
374 
375 /**
376  * Return true if the given vertex has more than one possible
377  * predecessor/successor in the graph.
378  *
379  * @param expected always include this vertex in the set of possible
380  * predecssors/successors, even if it is not a true branch.
381  */
382 template <class Graph>
383 static inline bool
ambiguous(const typename boost::graph_traits<Graph>::vertex_descriptor & u,const typename boost::graph_traits<Graph>::vertex_descriptor & expected,Direction dir,const Graph & g,unsigned trim,unsigned fpTrim)384 ambiguous(const typename boost::graph_traits<Graph>::vertex_descriptor& u,
385 	const typename boost::graph_traits<Graph>::vertex_descriptor& expected,
386 	Direction dir, const Graph& g, unsigned trim, unsigned fpTrim)
387 {
388 	typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
389 
390 	V v;
391 	PathExtensionResultCode result;
392 
393 	boost::tie(v, result) = successor(u, dir, g, trim, fpTrim);
394 
395 	return result == ER_AMBI_OUT || (result == ER_LENGTH_LIMIT && v != expected);
396 }
397 
398 /**
399  * Extend path by a single vertex, if there is a unique predecessor/successor
400  * in the direction of extension.
401  */
402 template <class Graph>
403 static inline PathExtensionResultCode
extendPathBySingleVertex(Path<typename boost::graph_traits<Graph>::vertex_descriptor> & path,Direction dir,const Graph & g,unsigned trim,unsigned fpTrim,bool lookBehind)404 extendPathBySingleVertex(
405 	Path<typename boost::graph_traits<Graph>::vertex_descriptor>& path,
406 	Direction dir, const Graph& g, unsigned trim, unsigned fpTrim,
407 	bool lookBehind)
408 {
409 	assert(!path.empty());
410 
411 	typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
412 
413 	V t, v;
414 	PathExtensionResultCode result;
415 
416 	const V& head = (dir == FORWARD) ? path.back() : path.front();
417 
418 	if (lookBehind) {
419 
420 		Direction otherDir = (dir == FORWARD) ? REVERSE : FORWARD;
421 		boost::tie(t, result) = successor(head, otherDir, g, trim, fpTrim);
422 
423 		if (result == ER_AMBI_OUT)
424 			return ER_AMBI_IN;
425 
426 		/*
427 		 * Tricky: If our path was seeded on a tip, we want to stop the
428 		 * extension when we reconnect to the graph. We can detect that
429 		 * we are on tip if we reach a branching point where the predecessor
430 		 * vertex in the path does not match the expected predecessor `t`.
431 		 */
432 		if (path.size() > 1) {
433 			if (result == ER_DEAD_END) {
434 				/* no predecessors or all predecessors were tips */
435 				return ER_AMBI_IN;
436 			} else {
437 				/* check if we are on a tip */
438 				assert(result == ER_LENGTH_LIMIT);
439 				const V& prev = (dir == FORWARD) ?
440 					*(path.rbegin() + 1) : *(path.begin() + 1);
441 				if (prev != t)
442 					return ER_AMBI_IN;
443 			}
444 		}
445 
446 	}
447 
448 	boost::tie(v, result) = successor(head, dir, g, trim, fpTrim);
449 	if (result != ER_LENGTH_LIMIT)
450 		return result;
451 
452 	if (dir == FORWARD)
453 		path.push_back(v);
454 	else
455 		path.push_front(v);
456 
457 	return ER_LENGTH_LIMIT;
458 }
459 
460 /**
461  * Return the depth of the graph from the given source vertex,
462  * i.e. the distance of the furthest node.  The depth is measured
463  * by means of an exhaustive breadth first search.
464  *
465  * @param root starting vertex for traversal
466  * @param dir direction for traversal (FORWARD or REVERSE)
467  * @param g graph to use for traversal
468  * @return the distance of the furthest vertex from root
469  */
470 template <typename Graph>
depth(typename boost::graph_traits<Graph>::vertex_descriptor root,Direction dir,const Graph & g)471 static inline size_t depth(
472 	typename boost::graph_traits<Graph>::vertex_descriptor root,
473 	Direction dir, const Graph& g)
474 {
475     typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
476     typedef typename boost::graph_traits<Graph>::out_edge_iterator OutEdgeIter;
477     typedef typename boost::graph_traits<Graph>::in_edge_iterator InEdgeIter;
478 
479 	OutEdgeIter oei, oei_end;
480 	InEdgeIter iei, iei_end;
481 
482 	unordered_set<V, hash<V> > visited;
483 	typedef unordered_map<V, size_t> DepthMap;
484 	DepthMap depthMap;
485 	std::deque<V> q;
486 
487 	q.push_back(root);
488 
489 	visited.insert(root);
490 	std::pair<typename DepthMap::iterator, bool> inserted =
491 		depthMap.insert(std::make_pair(root, 0));
492 	assert(inserted.second);
493 
494 	size_t maxDepth = 0;
495 	while (!q.empty()) {
496 		V& u = q.front();
497 		visited.insert(u);
498 		typename DepthMap::const_iterator it = depthMap.find(u);
499 		assert(it != depthMap.end());
500 		size_t depth = it->second;
501 		if (depth > maxDepth)
502 			maxDepth = depth;
503 		if (dir == FORWARD) {
504 			for (boost::tie(oei, oei_end) = out_edges(u, g);
505 				oei != oei_end; ++oei) {
506 				V v = target(*oei, g);
507 				if (visited.find(v) == visited.end()) {
508 					visited.insert(v);
509 					std::pair<typename DepthMap::iterator, bool> inserted =
510 						depthMap.insert(std::make_pair(v, depth+1));
511 					assert(inserted.second);
512 					q.push_back(v);
513 				}
514 			}
515 		} else {
516 			assert(dir == REVERSE);
517 			for (boost::tie(iei, iei_end) = in_edges(u, g);
518 				iei != iei_end; ++iei) {
519 				V v = source(*iei, g);
520 				if (visited.find(v) == visited.end()) {
521 					visited.insert(v);
522 					std::pair<typename DepthMap::iterator, bool> inserted =
523 						depthMap.insert(std::make_pair(v, depth+1));
524 					assert(inserted.second);
525 					q.push_back(v);
526 				}
527 			}
528 		}
529 		q.pop_front();
530 	}
531 
532 	return maxDepth;
533 }
534 
535 /**
536  * Return the neighbor vertex corresponding to the longest branch.  If there
537  * are no neighbour vertices, an assertion will be thrown. If there
538  * is a tie between branch lengths, the "winning" branch is chosen arbitrarily.
539  *
540  * @param u root vertex
541  * @param dir direction of branches to consider (FORWARD or REVERSE)
542  * @param g the graph
543  * @return the vertex at the head of the longest branch
544  */
545 template <typename Graph>
546 inline static
547 std::pair<typename boost::graph_traits<Graph>::vertex_descriptor, bool>
longestBranch(const typename boost::graph_traits<Graph>::vertex_descriptor & u,Direction dir,const Graph & g)548 longestBranch(const typename boost::graph_traits<Graph>::vertex_descriptor& u,
549 	Direction dir, const Graph& g)
550 {
551 	typedef typename boost::graph_traits<Graph>::vertex_descriptor V;
552     typedef typename boost::graph_traits<Graph>::out_edge_iterator OutEdgeIter;
553     typedef typename boost::graph_traits<Graph>::in_edge_iterator InEdgeIter;
554 
555 	OutEdgeIter oei, oei_end;
556 	InEdgeIter iei, iei_end;
557 	size_t maxDepth = 0;
558 	unsigned degree = 0;
559 	bool tie = false;
560 	/* note: had to initialize to prevent compiler warnings */
561 	V longestBranch = u;
562 	if (dir == FORWARD) {
563 		for (boost::tie(oei, oei_end) = out_edges(u, g);
564 			 oei != oei_end; ++oei) {
565 			degree++;
566 			const V& v = target(*oei, g);
567 			size_t d = depth(v, dir, g) + 1;
568 			if (d > maxDepth) {
569 				maxDepth = d;
570 				longestBranch = v;
571 				tie = false;
572 			} else if (d == maxDepth && v < longestBranch) {
573 				/*
574 				 * make an arbitrary choice among branches
575 				 * of equal length using the vertex comparison
576 				 * operator (operator<).
577 				 */
578 				longestBranch = v;
579 				tie = true;
580 			}
581 		}
582 	} else {
583 		assert(dir == REVERSE);
584 		for (boost::tie(iei, iei_end) = in_edges(u, g);
585 			 iei != iei_end; ++iei) {
586 			degree++;
587 			const V& v = source(*iei, g);
588 			size_t d = depth(v, dir, g) + 1;
589 			if (d > maxDepth) {
590 				maxDepth = d;
591 				longestBranch = v;
592 				tie = false;
593 			} else if (d == maxDepth && v < longestBranch) {
594 				/*
595 				 * make an arbitrary choice among branches
596 				 * of equal length using the vertex comparison
597 				 * operator (operator<).
598 				 */
599 				longestBranch = v;
600 				tie = true;
601 			}
602 		}
603 	}
604 	assert(degree > 0);
605 	return std::make_pair(longestBranch, tie);
606 }
607 
608 /**
609  * Extend a path up to the next branching point in the graph.
610  *
611  * @param path path to extend (modified by this function)
612  * @param dir direction to extend path (FORWARD or REVERSE)
613  * @param g graph in which to perform the extension
614  * @param visited set of previously visited vertices (used
615  * to detect cycles in the de Bruijn graph)
616  * @param params parameters controlling extension (e.g. trimLen)
617  * @return PathExtensionResult: NO_EXTENSION, HIT_BRANCHING_POINT,
618  * or EXTENDED.
619  */
620 template <class BidirectionalGraph>
extendPath(Path<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor> & path,Direction dir,const BidirectionalGraph & g,unordered_set<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor> & visited,const ExtendPathParams & params)621 static inline PathExtensionResult extendPath(
622 	Path<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor>& path,
623 	Direction dir, const BidirectionalGraph& g,
624 	unordered_set<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor>& visited,
625 	const ExtendPathParams& params)
626 {
627 	typedef BidirectionalGraph G;
628 	typedef boost::graph_traits<G> graph_traits;
629 	typedef typename graph_traits::vertex_descriptor V;
630 	typename graph_traits::out_edge_iterator oei, oei_end;
631 	typename graph_traits::in_edge_iterator iei, iei_end;
632 
633 	assert(path.size() > 0);
634 	size_t origPathLen = path.size();
635 	PathExtensionResultCode result = ER_DEAD_END;
636 	bool lookBehind = params.lookBehindStartVertex;
637 
638 	assert(!path.empty());
639 	while (path.size() < params.maxLen)
640 	{
641 		result = extendPathBySingleVertex(path, dir, g,
642 			params.trimLen, params.fpTrim, lookBehind);
643 
644 		if (result != ER_LENGTH_LIMIT)
645 			break;
646 
647 		const V& head = (dir == FORWARD) ? path.back() : path.front();
648 		bool inserted;
649 		boost::tie(boost::tuples::ignore, inserted) = visited.insert(head);
650 		if (!inserted) {
651 			result = ER_CYCLE;
652 			if (dir == FORWARD)
653 				path.pop_back();
654 			else
655 				path.pop_front();
656 			break;
657 		}
658 
659 		/* override `lookBehindStartVertex` after first extension */
660 		lookBehind = params.lookBehind;
661 	}
662 
663 	if (params.maxLen != NO_LIMIT && path.size() == params.maxLen)
664 		result = ER_LENGTH_LIMIT;
665 
666 	assert(path.size() >= origPathLen);
667 	unsigned extension = path.size() - origPathLen;
668 
669 	/*
670 	 * Sanity check: If no length limit was imposed, we must have stopped
671 	 * the extension for some other reason (e.g. dead end)
672 	 */
673 	assert(params.maxLen != NO_LIMIT || result != ER_LENGTH_LIMIT);
674 
675 	return std::make_pair(extension, result);
676 }
677 
678 /**
679  * Extend a path up to the next branching point in the graph.
680  *
681  * @param path path to extend (modified by this function)
682  * @param dir direction to extend path (FORWARD or REVERSE)
683  * @param g graph in which to perform the extension
684  * @param params parameters controlling extension (e.g. trimLen)
685  * @return PathExtensionResult: NO_EXTENSION, HIT_BRANCHING_POINT,
686  * or EXTENDED.
687  */
688 template <class BidirectionalGraph>
extendPath(Path<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor> & path,Direction dir,const BidirectionalGraph & g,const ExtendPathParams & params)689 PathExtensionResult extendPath(
690 	Path<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor>& path,
691 	Direction dir, const BidirectionalGraph& g, const ExtendPathParams& params)
692 {
693 	typedef typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor V;
694 
695 	/* track visited nodes to avoid infinite traversal of cycles */
696 	unordered_set<V> visited;
697 	visited.insert(path.begin(), path.end());
698 
699 	return extendPath(path, dir, g, visited, params);
700 }
701 
702 /**
703  * Extend a path up to the next branching point in the graph.
704  *
705  * @param path path to extend (modified by this function)
706  * @param dir direction to extend path (FORWARD or REVERSE)
707  * @param g graph in which to perform the extension
708  * @return PathExtensionResult: NO_EXTENSION, HIT_BRANCHING_POINT,
709  * or EXTENDED.
710  */
711 template <class BidirectionalGraph>
extendPath(Path<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor> & path,Direction dir,const BidirectionalGraph & g)712 PathExtensionResult extendPath(
713 	Path<typename boost::graph_traits<BidirectionalGraph>::vertex_descriptor>& path,
714 	Direction dir, const BidirectionalGraph& g)
715 {
716 	/* default extension params */
717 	ExtendPathParams params;
718 	return extendPath(path, dir, g, params);
719 }
720 
721 #endif
722