1 #ifndef SEQEXT_H
2 #define SEQEXT_H 1
3
4 #include "Common/Options.h" // for opt::colourSpace
5 #include "Common/Sequence.h" // for codeToBase
6 #include <cassert>
7 #include <ostream>
8 #include <stdint.h>
9
10 static const unsigned NUM_BASES = 4;
11
12 /** Return the complement of the specified base.
13 * The reverse of a single base is a no-op.
14 * If the assembly is in colour space, this is a no-op.
15 */
reverseComplement(uint8_t base)16 static inline uint8_t reverseComplement(uint8_t base)
17 {
18 return opt::colourSpace ? base : ~base & 0x3;
19 }
20
21 /** The adjacent vertices of a Kmer. */
22 class SeqExt
23 {
24 public:
25 typedef uint8_t Symbol;
26
27 /** The number of symbols. */
28 static const unsigned NUM = NUM_BASES;
29
SeqExt()30 SeqExt() : m_record(0) { };
SeqExt(uint8_t base)31 explicit SeqExt(uint8_t base) : m_record(1<<base) { };
32
33 /** Return a SeqExt with the specified bits set. */
mask(uint8_t bits)34 static SeqExt mask(uint8_t bits)
35 {
36 SeqExt ext;
37 ext.m_record = bits;
38 return ext;
39 }
40
41 /** Return the out degree. */
outDegree()42 unsigned outDegree()
43 {
44 assert(m_record < 16);
45 static const uint8_t popcount[16] = {
46 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
47 };
48 return popcount[m_record];
49 }
50
51 /** Set the specified adjacency. */
setBase(uint8_t base)52 void setBase(uint8_t base)
53 {
54 m_record |= 1 << base;
55 }
56
57 /** Clear the specified adjacency. */
clearBase(uint8_t base)58 void clearBase(uint8_t base)
59 {
60 m_record &= ~(1 << base);
61 }
62
63 /** Remove the specified edges. */
clear(SeqExt ext)64 void clear(SeqExt ext)
65 {
66 m_record &= ~ext.m_record;
67 }
68
69 /** Return wheter the specified base is adjacent. */
checkBase(uint8_t base)70 bool checkBase(uint8_t base) const
71 {
72 return m_record & (1 << base);
73 }
74
75 /** Clear all adjacency. */
clear()76 void clear()
77 {
78 m_record = 0;
79 }
80
81 /** Return whether this kmer has any adjacent kmer. */
hasExtension()82 bool hasExtension() const
83 {
84 return m_record > 0;
85 }
86
87 /** Return whether this kmer has more than one adjacent kmer.
88 */
isAmbiguous()89 bool isAmbiguous() const
90 {
91 bool powerOfTwo = (m_record & (m_record - 1)) > 0;
92 return m_record > 0 && powerOfTwo;
93 }
94
95 /** Return the complementary adjacency.
96 * If the assembly is in colour space, this is a no-op.
97 */
complement()98 SeqExt complement() const
99 {
100 static const uint8_t complements[16] = {
101 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
102 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
103 };
104 assert(m_record < 1 << NUM);
105 return opt::colourSpace ? *this : mask(complements[m_record]);
106 }
107
108 friend std::ostream& operator <<(std::ostream& out,
109 const SeqExt& o)
110 {
111 assert(o.m_record < 1 << NUM);
112 for (unsigned i = 0; i << NUM; ++i)
113 if (o.checkBase(i))
114 out << codeToBase(i);
115 return out;
116 }
117
118 private:
119 uint8_t m_record;
120 };
121
122 #endif
123