1 /*
2  *
3  * BloomFilter.hpp
4  *
5  *  Created on: Aug 10, 2012
6  *      Author: cjustin
7  */
8 
9 #ifndef KMERBLOOMFILTER_H_
10 #define KMERBLOOMFILTER_H_
11 #include "BloomFilter.hpp"
12 #include "vendor/nthash.hpp"
13 
14 using namespace std;
15 
16 class KmerBloomFilter : public BloomFilter
17 {
18   public:
19 	/*
20 	 * Default constructor.
21 	 */
KmerBloomFilter()22 	KmerBloomFilter()
23 	  : BloomFilter()
24 	{}
25 
26 	/* De novo filter constructor.
27 	 *
28 	 * preconditions:
29 	 * filterSize must be a multiple of 64
30 	 *
31 	 * kmerSize refers to the number of bases the kmer has
32 	 */
KmerBloomFilter(size_t filterSize,unsigned hashNum,unsigned kmerSize)33 	KmerBloomFilter(size_t filterSize, unsigned hashNum, unsigned kmerSize)
34 	  : BloomFilter(filterSize, hashNum, kmerSize)
35 	{}
36 
KmerBloomFilter(const string & filterFilePath)37 	KmerBloomFilter(const string& filterFilePath)
38 	  : BloomFilter(filterFilePath)
39 	{}
40 
41 	using BloomFilter::contains;
42 	using BloomFilter::insert;
43 
44 	/*
45 	 * Single pass filtering, computes hash values on the fly
46 	 */
contains(const char * kmer) const47 	bool contains(const char* kmer) const
48 	{
49 		uint64_t hVal = NTC64(kmer, m_kmerSize);
50 		size_t normalizedValue = hVal % m_size;
51 		unsigned char bit = bitMask[normalizedValue % bitsPerChar];
52 		if ((m_filter[normalizedValue / bitsPerChar] & bit) == 0)
53 			return false;
54 		for (unsigned i = 1; i < m_hashNum; i++) {
55 			normalizedValue = NTE64(hVal, m_kmerSize, i) % m_size;
56 			unsigned char bit = bitMask[normalizedValue % bitsPerChar];
57 			if ((m_filter[normalizedValue / bitsPerChar] & bit) == 0)
58 				return false;
59 		}
60 		return true;
61 	}
62 
insert(const char * kmer)63 	void insert(const char* kmer)
64 	{
65 		uint64_t hVal = NTC64(kmer, m_kmerSize);
66 		size_t normalizedValue = hVal % m_size;
67 		__sync_fetch_and_or(
68 		    &m_filter[normalizedValue / bitsPerChar], bitMask[normalizedValue % bitsPerChar]);
69 		for (unsigned i = 1; i < m_hashNum; i++) {
70 			size_t normalizedValue = NTE64(hVal, m_kmerSize, i) % m_size;
71 			__sync_fetch_and_or(
72 			    &m_filter[normalizedValue / bitsPerChar], bitMask[normalizedValue % bitsPerChar]);
73 		}
74 	}
75 };
76 
77 #endif /* KMERBLOOMFILTER_H_ */
78