1 #ifndef BRANCHGROUP_H 2 #define BRANCHGROUP_H 1 3 4 #include "Common/Algorithms.h" 5 #include "Common/Exception.h" 6 #include <algorithm> // for swap 7 #include <map> 8 #include <utility> 9 10 enum BranchGroupStatus 11 { 12 BGS_ACTIVE, 13 BGS_NOEXT, 14 BGS_JOINED, 15 BGS_TOOLONG, 16 BGS_TOOMANYBRANCHES 17 }; 18 19 /** A container of BranchRecord. */ 20 class BranchGroup 21 { 22 public: 23 typedef std::vector<BranchRecord> BranchGroupData; 24 typedef BranchGroupData::iterator iterator; 25 typedef BranchGroupData::const_iterator const_iterator; 26 BranchGroup()27 BranchGroup() 28 : m_dir(SENSE) 29 , m_maxNumBranches(0) 30 , m_noExt(false) 31 , m_status(BGS_ACTIVE) 32 {} 33 BranchGroup(extDirection dir,size_t maxNumBranches,const BranchRecord::V & origin)34 BranchGroup(extDirection dir, size_t maxNumBranches, const BranchRecord::V& origin) 35 : m_dir(dir) 36 , m_origin(origin) 37 , m_maxNumBranches(maxNumBranches) 38 , m_noExt(false) 39 , m_status(BGS_ACTIVE) 40 { 41 m_branches.reserve(m_maxNumBranches); 42 } 43 BranchGroup(extDirection dir,size_t maxNumBranches,const BranchRecord::V & origin,const BranchRecord & branch)44 BranchGroup( 45 extDirection dir, 46 size_t maxNumBranches, 47 const BranchRecord::V& origin, 48 const BranchRecord& branch) 49 : m_dir(dir) 50 , m_origin(origin) 51 , m_maxNumBranches(maxNumBranches) 52 , m_noExt(false) 53 , m_status(BGS_ACTIVE) 54 { 55 m_branches.reserve(m_maxNumBranches); 56 m_branches.push_back(branch); 57 } 58 BranchGroup(const BranchGroup & o)59 BranchGroup(const BranchGroup& o) 60 : m_branches(o.m_branches) 61 , m_dir(o.m_dir) 62 , m_origin(o.m_origin) 63 , m_maxNumBranches(o.m_maxNumBranches) 64 , m_noExt(o.m_noExt) 65 , m_status(o.m_status) 66 { 67 m_branches.reserve(m_maxNumBranches); 68 } 69 70 /** Add a branch to this group. */ addBranch(const BranchRecord & branch)71 BranchRecord& addBranch(const BranchRecord& branch) 72 { 73 assert(m_branches.size() < m_maxNumBranches); 74 m_branches.push_back(branch); 75 return m_branches.back(); 76 } 77 78 /** Add a branch to this group and extend the new branch with 79 * the given k-mer. */ addBranch(const BranchRecord & branch,const BranchRecord::V & kmer)80 void addBranch(const BranchRecord& branch, const BranchRecord::V& kmer) 81 { 82 if (m_branches.size() < m_maxNumBranches) 83 addBranch(branch).push_back(std::make_pair(kmer, BranchRecord::VP())); 84 else 85 m_status = BGS_TOOMANYBRANCHES; 86 } 87 88 /** Return the specified branch. */ 89 BranchRecord& operator[](unsigned id) { return m_branches[id]; } 90 91 /** Return the number of branches in this group. */ size()92 size_t size() const { return m_branches.size(); } 93 94 /** Return whether a branch contains the specified k-mer at 95 * the index i. */ exists(unsigned i,const BranchRecord::V & kmer)96 bool exists(unsigned i, const BranchRecord::V& kmer) const 97 { 98 for (BranchGroupData::const_iterator it = m_branches.begin(); it != m_branches.end(); ++it) 99 if (it->exists(i, kmer)) 100 return true; 101 return false; 102 } 103 104 // return the current status of the branch getStatus()105 BranchGroupStatus getStatus() const { return m_status; } 106 107 // set the no extension flag setNoExtension()108 void setNoExtension() { m_noExt = true; } 109 110 // is the no extension flag set? isNoExt()111 bool isNoExt() const { return m_noExt; } 112 113 // return the direction of growth getDirection()114 extDirection getDirection() const { return m_dir; } 115 begin()116 iterator begin() { return m_branches.begin(); } end()117 iterator end() { return m_branches.end(); } begin()118 const_iterator begin() const { return m_branches.begin(); } end()119 const_iterator end() const { return m_branches.end(); } 120 121 // Check the stop conditions for the bubble growth updateStatus(unsigned maxLength)122 BranchGroupStatus updateStatus(unsigned maxLength) 123 { 124 assert(m_branches.size() <= m_maxNumBranches); 125 126 if (m_status != BGS_ACTIVE) 127 return m_status; 128 129 // Check if the no extension flag is set 130 if (m_noExt) { 131 m_status = BGS_NOEXT; 132 return m_status; 133 } 134 135 // Check if any branches are too long or any sequence has a loop 136 for (BranchGroupData::const_iterator iter = m_branches.begin(); iter != m_branches.end(); 137 ++iter) { 138 if (iter->isTooLong(maxLength)) { 139 m_status = BGS_TOOLONG; 140 return m_status; 141 } 142 } 143 144 BranchGroupData::const_iterator it = m_branches.begin(); 145 const BranchRecord::V& lastSeq = it->back().first; 146 while (++it != m_branches.end()) 147 if (it->back().first != lastSeq) 148 return m_status = BGS_ACTIVE; 149 150 // All the branches of the bubble have joined. 151 // Remove the last base, which is identical for every branch. 152 std::for_each( 153 m_branches.begin(), m_branches.end(), [](BranchRecord& b) { return b.pop_back(); }); 154 155 // Sort the branches by coverage. 156 std::function<int(const BranchRecord&)> lambda = [](const BranchRecord& b) { 157 return b.calculateBranchMultiplicity(); 158 }; 159 sort_by_transform(m_branches.begin(), m_branches.end(), lambda); 160 reverse(m_branches.begin(), m_branches.end()); 161 162 return m_status = BGS_JOINED; 163 } 164 165 /** Return whether any branches of this group are active. */ isActive()166 bool isActive() const 167 { 168 for (BranchGroupData::const_iterator it = m_branches.begin(); it != m_branches.end(); ++it) 169 if (it->isActive()) 170 return true; 171 return false; 172 } 173 174 /** Return whether this branch is extendable. */ isExtendable()175 bool isExtendable() 176 { 177 if (m_noExt) 178 return false; 179 180 // A group is extendable when all the branches are the same 181 // length. All the branches are lockstepped for growth. 182 BranchGroupData::iterator it = m_branches.begin(); 183 unsigned length = it++->size(); 184 for (; it != m_branches.end(); ++it) 185 if (it->size() != length) 186 return false; 187 return true; 188 } 189 190 /** Return whether this branch is ambiguous at its origin. Also 191 * returns false if the origin of the branch has since been deleted. 192 */ isAmbiguous(const SequenceCollectionHash & g)193 bool isAmbiguous(const SequenceCollectionHash& g) const 194 { 195 // Get fresh data from the collection to check that this bubble 196 // does in fact still exist. 197 const BranchRecord::VP& data = g.getSeqAndData(m_origin).second; 198 return data.deleted() ? false : data.isAmbiguous(m_dir); 199 } 200 201 private: 202 BranchGroup& operator=(const BranchGroup& o); 203 204 BranchGroupData m_branches; 205 extDirection m_dir; 206 BranchRecord::V m_origin; 207 size_t m_maxNumBranches; 208 bool m_noExt; 209 BranchGroupStatus m_status; 210 }; 211 212 #endif 213