1 #ifndef PAIREDDBG_DINUC_H
2 #define PAIREDDBG_DINUC_H 1
3 
4 #include "Common/BitUtil.h"
5 #include <cassert>
6 #include <stdint.h>
7 
8 /** A pair of nucleotides. */
9 class Dinuc
10 {
11 public:
12 	/** The number of symbols. */
13 	static const unsigned NUM = 16;
14 
15 	/** A nucleotide. A bit vector of two bits. */
16 	typedef uint8_t Nuc;
17 
18 	/** A dinucleotide. A bit vector of four bits. */
19 	typedef uint8_t Bits;
20 
21 	/** Default constructor. */
Dinuc()22 	Dinuc() { }
23 
24 	/** Construct a Dinuc from two nucleotides. */
Dinuc(Nuc a,Nuc b)25 	Dinuc(Nuc a, Nuc b) : m_data(a | (b << 2)) { }
26 
27 	/** Construct a Dinuc from an integer. */
Dinuc(Bits x)28 	explicit Dinuc(Bits x) : m_data(x) { }
29 
30 	/** Cast to an integer. */
toInt()31 	Bits toInt() const { return m_data; }
32 
33 	/** Return the first nucleotide. */
a()34 	Nuc a() const { return m_data & 0x3; }
35 
36 	/** Return the first nucleotide. */
b()37 	Nuc b() const { return (m_data >> 2) & 0x3; }
38 
39 	/** Compare two dinucleotides. */
40 	bool operator<(const Dinuc& x) const
41 	{
42 		return m_data < x.m_data;
43 	}
44 
45 	/** Complement a single base. */
complementNuc(Nuc x)46 	static Nuc complementNuc(Nuc x) { return 3 - x; }
47 
48 	/** Return the reverse complement of this dinucleotide. */
reverseComplement()49 	Dinuc reverseComplement() const
50 	{
51 		return Dinuc(complementNuc(b()), complementNuc(a()));
52 	}
53 
54 	/** Increment this dinucleotide. */
55 	Dinuc& operator++()
56 	{
57 		++m_data;
58 		return *this;
59 	}
60 
61 	/** Return the first dinucleotide. */
begin()62 	static Dinuc begin() { return Dinuc(0); }
63 
64 	/** Return the last dinucleotide. */
end()65 	static Dinuc end() { return Dinuc(NUM); }
66 
67 private:
68 	/** Two nucleotides packed into a single scalar. */
69 	Bits m_data;
70 };
71 
72 /** Return the reverse complement of this dinucleotide. */
reverseComplement(const Dinuc & x)73 static inline Dinuc reverseComplement(const Dinuc& x)
74 {
75 	return x.reverseComplement();
76 }
77 
78 /** A set of dinucleotides. */
79 class DinucSet
80 {
81 public:
82 	typedef Dinuc Symbol;
83 
84 	/** The number of symbols. */
85 	static const unsigned NUM = Dinuc::NUM;
86 
87 	/** A bit vector. */
88 	typedef uint16_t Bits;
89 
90 	/** Default constructor. */
DinucSet()91 	DinucSet() : m_data(0) { }
92 
93 	/** Construct a set containing a single element. */
DinucSet(const Dinuc & x)94 	DinucSet(const Dinuc& x) : m_data(1 << x.toInt()) { }
95 
96 /** Return a set with the specified bits set. */
mask(Bits x)97 static DinucSet mask(Bits x)
98 {
99 	DinucSet s;
100 	s.m_data = x;
101 	return s;
102 }
103 
104 /** Return whether the specified element is present in this set. */
checkBase(const Dinuc & x)105 bool checkBase(const Dinuc& x) const
106 {
107 	return m_data & (1 << x.toInt());
108 }
109 
110 /** Return the number of elements in this set. */
outDegree()111 unsigned outDegree() const
112 {
113 	return popcount(m_data);
114 }
115 
116 /** Return whether this set is non-empty. */
hasExtension()117 bool hasExtension() const
118 {
119 	return m_data != 0;
120 }
121 
122 /** Return whether this set has two or more elements. */
isAmbiguous()123 bool isAmbiguous() const
124 {
125 	return outDegree() > 1;
126 }
127 
128 /** Add the specified element to this set. */
setBase(const Dinuc & x)129 void setBase(const Dinuc& x)
130 {
131 	m_data |= 1 << x.toInt();
132 }
133 
134 /** Remove all elements from this set. */
clear()135 void clear()
136 {
137 	m_data = 0;
138 }
139 
140 /** Remove the specified elements from this set. */
clear(const DinucSet & x)141 void clear(const DinucSet& x)
142 {
143 	m_data &= ~x.m_data;
144 }
145 
146 /** Return the complementary nucleotides of this set. */
complement()147 DinucSet complement() const
148 {
149 	DinucSet x;
150 	for (Dinuc i = Dinuc::begin(); i < Dinuc::end(); ++i) {
151 		if (checkBase(i))
152 			x.setBase(i.reverseComplement());
153 	}
154 	return x;
155 }
156 
157 bool operator==(const DinucSet& x) const
158 {
159 	return m_data == x.m_data;
160 }
161 
162 private:
163 	/** A bit vector representing a set. */
164 	Bits m_data;
165 };
166 
167 #endif
168