1 #ifndef PAIREDDBG_DINUC_H 2 #define PAIREDDBG_DINUC_H 1 3 4 #include "Common/BitUtil.h" 5 #include <cassert> 6 #include <stdint.h> 7 8 /** A pair of nucleotides. */ 9 class Dinuc 10 { 11 public: 12 /** The number of symbols. */ 13 static const unsigned NUM = 16; 14 15 /** A nucleotide. A bit vector of two bits. */ 16 typedef uint8_t Nuc; 17 18 /** A dinucleotide. A bit vector of four bits. */ 19 typedef uint8_t Bits; 20 21 /** Default constructor. */ Dinuc()22 Dinuc() { } 23 24 /** Construct a Dinuc from two nucleotides. */ Dinuc(Nuc a,Nuc b)25 Dinuc(Nuc a, Nuc b) : m_data(a | (b << 2)) { } 26 27 /** Construct a Dinuc from an integer. */ Dinuc(Bits x)28 explicit Dinuc(Bits x) : m_data(x) { } 29 30 /** Cast to an integer. */ toInt()31 Bits toInt() const { return m_data; } 32 33 /** Return the first nucleotide. */ a()34 Nuc a() const { return m_data & 0x3; } 35 36 /** Return the first nucleotide. */ b()37 Nuc b() const { return (m_data >> 2) & 0x3; } 38 39 /** Compare two dinucleotides. */ 40 bool operator<(const Dinuc& x) const 41 { 42 return m_data < x.m_data; 43 } 44 45 /** Complement a single base. */ complementNuc(Nuc x)46 static Nuc complementNuc(Nuc x) { return 3 - x; } 47 48 /** Return the reverse complement of this dinucleotide. */ reverseComplement()49 Dinuc reverseComplement() const 50 { 51 return Dinuc(complementNuc(b()), complementNuc(a())); 52 } 53 54 /** Increment this dinucleotide. */ 55 Dinuc& operator++() 56 { 57 ++m_data; 58 return *this; 59 } 60 61 /** Return the first dinucleotide. */ begin()62 static Dinuc begin() { return Dinuc(0); } 63 64 /** Return the last dinucleotide. */ end()65 static Dinuc end() { return Dinuc(NUM); } 66 67 private: 68 /** Two nucleotides packed into a single scalar. */ 69 Bits m_data; 70 }; 71 72 /** Return the reverse complement of this dinucleotide. */ reverseComplement(const Dinuc & x)73static inline Dinuc reverseComplement(const Dinuc& x) 74 { 75 return x.reverseComplement(); 76 } 77 78 /** A set of dinucleotides. */ 79 class DinucSet 80 { 81 public: 82 typedef Dinuc Symbol; 83 84 /** The number of symbols. */ 85 static const unsigned NUM = Dinuc::NUM; 86 87 /** A bit vector. */ 88 typedef uint16_t Bits; 89 90 /** Default constructor. */ DinucSet()91 DinucSet() : m_data(0) { } 92 93 /** Construct a set containing a single element. */ DinucSet(const Dinuc & x)94 DinucSet(const Dinuc& x) : m_data(1 << x.toInt()) { } 95 96 /** Return a set with the specified bits set. */ mask(Bits x)97static DinucSet mask(Bits x) 98 { 99 DinucSet s; 100 s.m_data = x; 101 return s; 102 } 103 104 /** Return whether the specified element is present in this set. */ checkBase(const Dinuc & x)105bool checkBase(const Dinuc& x) const 106 { 107 return m_data & (1 << x.toInt()); 108 } 109 110 /** Return the number of elements in this set. */ outDegree()111unsigned outDegree() const 112 { 113 return popcount(m_data); 114 } 115 116 /** Return whether this set is non-empty. */ hasExtension()117bool hasExtension() const 118 { 119 return m_data != 0; 120 } 121 122 /** Return whether this set has two or more elements. */ isAmbiguous()123bool isAmbiguous() const 124 { 125 return outDegree() > 1; 126 } 127 128 /** Add the specified element to this set. */ setBase(const Dinuc & x)129void setBase(const Dinuc& x) 130 { 131 m_data |= 1 << x.toInt(); 132 } 133 134 /** Remove all elements from this set. */ clear()135void clear() 136 { 137 m_data = 0; 138 } 139 140 /** Remove the specified elements from this set. */ clear(const DinucSet & x)141void clear(const DinucSet& x) 142 { 143 m_data &= ~x.m_data; 144 } 145 146 /** Return the complementary nucleotides of this set. */ complement()147DinucSet complement() const 148 { 149 DinucSet x; 150 for (Dinuc i = Dinuc::begin(); i < Dinuc::end(); ++i) { 151 if (checkBase(i)) 152 x.setBase(i.reverseComplement()); 153 } 154 return x; 155 } 156 157 bool operator==(const DinucSet& x) const 158 { 159 return m_data == x.m_data; 160 } 161 162 private: 163 /** A bit vector representing a set. */ 164 Bits m_data; 165 }; 166 167 #endif 168