1 #ifndef BRANCHGROUP_H
2 #define BRANCHGROUP_H 1
3 
4 #include "Common/Algorithms.h"
5 #include "Common/Exception.h"
6 #include <algorithm> // for swap
7 #include <map>
8 #include <utility>
9 
10 enum BranchGroupStatus
11 {
12 	BGS_ACTIVE,
13 	BGS_NOEXT,
14 	BGS_JOINED,
15 	BGS_TOOLONG,
16 	BGS_TOOMANYBRANCHES
17 };
18 
19 /** A container of BranchRecord. */
20 class BranchGroup
21 {
22   public:
23 	typedef std::vector<BranchRecord> BranchGroupData;
24 	typedef BranchGroupData::iterator iterator;
25 	typedef BranchGroupData::const_iterator const_iterator;
26 
BranchGroup()27 	BranchGroup()
28 	  : m_dir(SENSE)
29 	  , m_maxNumBranches(0)
30 	  , m_noExt(false)
31 	  , m_status(BGS_ACTIVE)
32 	{}
33 
BranchGroup(extDirection dir,size_t maxNumBranches,const BranchRecord::V & origin)34 	BranchGroup(extDirection dir, size_t maxNumBranches, const BranchRecord::V& origin)
35 	  : m_dir(dir)
36 	  , m_origin(origin)
37 	  , m_maxNumBranches(maxNumBranches)
38 	  , m_noExt(false)
39 	  , m_status(BGS_ACTIVE)
40 	{
41 		m_branches.reserve(m_maxNumBranches);
42 	}
43 
BranchGroup(extDirection dir,size_t maxNumBranches,const BranchRecord::V & origin,const BranchRecord & branch)44 	BranchGroup(
45 	    extDirection dir,
46 	    size_t maxNumBranches,
47 	    const BranchRecord::V& origin,
48 	    const BranchRecord& branch)
49 	  : m_dir(dir)
50 	  , m_origin(origin)
51 	  , m_maxNumBranches(maxNumBranches)
52 	  , m_noExt(false)
53 	  , m_status(BGS_ACTIVE)
54 	{
55 		m_branches.reserve(m_maxNumBranches);
56 		m_branches.push_back(branch);
57 	}
58 
BranchGroup(const BranchGroup & o)59 	BranchGroup(const BranchGroup& o)
60 	  : m_branches(o.m_branches)
61 	  , m_dir(o.m_dir)
62 	  , m_origin(o.m_origin)
63 	  , m_maxNumBranches(o.m_maxNumBranches)
64 	  , m_noExt(o.m_noExt)
65 	  , m_status(o.m_status)
66 	{
67 		m_branches.reserve(m_maxNumBranches);
68 	}
69 
70 	/** Add a branch to this group. */
addBranch(const BranchRecord & branch)71 	BranchRecord& addBranch(const BranchRecord& branch)
72 	{
73 		assert(m_branches.size() < m_maxNumBranches);
74 		m_branches.push_back(branch);
75 		return m_branches.back();
76 	}
77 
78 	/** Add a branch to this group and extend the new branch with
79 	 * the given k-mer. */
addBranch(const BranchRecord & branch,const BranchRecord::V & kmer)80 	void addBranch(const BranchRecord& branch, const BranchRecord::V& kmer)
81 	{
82 		if (m_branches.size() < m_maxNumBranches)
83 			addBranch(branch).push_back(std::make_pair(kmer, BranchRecord::VP()));
84 		else
85 			m_status = BGS_TOOMANYBRANCHES;
86 	}
87 
88 	/** Return the specified branch. */
89 	BranchRecord& operator[](unsigned id) { return m_branches[id]; }
90 
91 	/** Return the number of branches in this group. */
size()92 	size_t size() const { return m_branches.size(); }
93 
94 	/** Return whether a branch contains the specified k-mer at
95 	 * the index i. */
exists(unsigned i,const BranchRecord::V & kmer)96 	bool exists(unsigned i, const BranchRecord::V& kmer) const
97 	{
98 		for (BranchGroupData::const_iterator it = m_branches.begin(); it != m_branches.end(); ++it)
99 			if (it->exists(i, kmer))
100 				return true;
101 		return false;
102 	}
103 
104 	// return the current status of the branch
getStatus()105 	BranchGroupStatus getStatus() const { return m_status; }
106 
107 	// set the no extension flag
setNoExtension()108 	void setNoExtension() { m_noExt = true; }
109 
110 	// is the no extension flag set?
isNoExt()111 	bool isNoExt() const { return m_noExt; }
112 
113 	// return the direction of growth
getDirection()114 	extDirection getDirection() const { return m_dir; }
115 
begin()116 	iterator begin() { return m_branches.begin(); }
end()117 	iterator end() { return m_branches.end(); }
begin()118 	const_iterator begin() const { return m_branches.begin(); }
end()119 	const_iterator end() const { return m_branches.end(); }
120 
121 	// Check the stop conditions for the bubble growth
updateStatus(unsigned maxLength)122 	BranchGroupStatus updateStatus(unsigned maxLength)
123 	{
124 		assert(m_branches.size() <= m_maxNumBranches);
125 
126 		if (m_status != BGS_ACTIVE)
127 			return m_status;
128 
129 		// Check if the no extension flag is set
130 		if (m_noExt) {
131 			m_status = BGS_NOEXT;
132 			return m_status;
133 		}
134 
135 		// Check if any branches are too long or any sequence has a loop
136 		for (BranchGroupData::const_iterator iter = m_branches.begin(); iter != m_branches.end();
137 		     ++iter) {
138 			if (iter->isTooLong(maxLength)) {
139 				m_status = BGS_TOOLONG;
140 				return m_status;
141 			}
142 		}
143 
144 		BranchGroupData::const_iterator it = m_branches.begin();
145 		const BranchRecord::V& lastSeq = it->back().first;
146 		while (++it != m_branches.end())
147 			if (it->back().first != lastSeq)
148 				return m_status = BGS_ACTIVE;
149 
150 		// All the branches of the bubble have joined.
151 		// Remove the last base, which is identical for every branch.
152 		std::for_each(
153 		    m_branches.begin(), m_branches.end(), [](BranchRecord& b) { return b.pop_back(); });
154 
155 		// Sort the branches by coverage.
156 		std::function<int(const BranchRecord&)> lambda = [](const BranchRecord& b) {
157 			return b.calculateBranchMultiplicity();
158 		};
159 		sort_by_transform(m_branches.begin(), m_branches.end(), lambda);
160 		reverse(m_branches.begin(), m_branches.end());
161 
162 		return m_status = BGS_JOINED;
163 	}
164 
165 	/** Return whether any branches of this group are active. */
isActive()166 	bool isActive() const
167 	{
168 		for (BranchGroupData::const_iterator it = m_branches.begin(); it != m_branches.end(); ++it)
169 			if (it->isActive())
170 				return true;
171 		return false;
172 	}
173 
174 	/** Return whether this branch is extendable. */
isExtendable()175 	bool isExtendable()
176 	{
177 		if (m_noExt)
178 			return false;
179 
180 		// A group is extendable when all the branches are the same
181 		// length. All the branches are lockstepped for growth.
182 		BranchGroupData::iterator it = m_branches.begin();
183 		unsigned length = it++->size();
184 		for (; it != m_branches.end(); ++it)
185 			if (it->size() != length)
186 				return false;
187 		return true;
188 	}
189 
190 	/** Return whether this branch is ambiguous at its origin. Also
191 	 * returns false if the origin of the branch has since been deleted.
192 	 */
isAmbiguous(const SequenceCollectionHash & g)193 	bool isAmbiguous(const SequenceCollectionHash& g) const
194 	{
195 		// Get fresh data from the collection to check that this bubble
196 		// does in fact still exist.
197 		const BranchRecord::VP& data = g.getSeqAndData(m_origin).second;
198 		return data.deleted() ? false : data.isAmbiguous(m_dir);
199 	}
200 
201   private:
202 	BranchGroup& operator=(const BranchGroup& o);
203 
204 	BranchGroupData m_branches;
205 	extDirection m_dir;
206 	BranchRecord::V m_origin;
207 	size_t m_maxNumBranches;
208 	bool m_noExt;
209 	BranchGroupStatus m_status;
210 };
211 
212 #endif
213