1 /* 2 * 3 * BloomFilter.hpp 4 * 5 * Created on: Aug 10, 2012 6 * Author: cjustin 7 */ 8 9 #ifndef KMERBLOOMFILTER_H_ 10 #define KMERBLOOMFILTER_H_ 11 #include "BloomFilter.hpp" 12 #include "vendor/nthash.hpp" 13 14 using namespace std; 15 16 class KmerBloomFilter : public BloomFilter 17 { 18 public: 19 /* 20 * Default constructor. 21 */ KmerBloomFilter()22 KmerBloomFilter() 23 : BloomFilter() 24 {} 25 26 /* De novo filter constructor. 27 * 28 * preconditions: 29 * filterSize must be a multiple of 64 30 * 31 * kmerSize refers to the number of bases the kmer has 32 */ KmerBloomFilter(size_t filterSize,unsigned hashNum,unsigned kmerSize)33 KmerBloomFilter(size_t filterSize, unsigned hashNum, unsigned kmerSize) 34 : BloomFilter(filterSize, hashNum, kmerSize) 35 {} 36 KmerBloomFilter(const string & filterFilePath)37 KmerBloomFilter(const string& filterFilePath) 38 : BloomFilter(filterFilePath) 39 {} 40 41 using BloomFilter::contains; 42 using BloomFilter::insert; 43 44 /* 45 * Single pass filtering, computes hash values on the fly 46 */ contains(const char * kmer) const47 bool contains(const char* kmer) const 48 { 49 uint64_t hVal = NTC64(kmer, m_kmerSize); 50 size_t normalizedValue = hVal % m_size; 51 unsigned char bit = bitMask[normalizedValue % bitsPerChar]; 52 if ((m_filter[normalizedValue / bitsPerChar] & bit) == 0) 53 return false; 54 for (unsigned i = 1; i < m_hashNum; i++) { 55 normalizedValue = NTE64(hVal, m_kmerSize, i) % m_size; 56 unsigned char bit = bitMask[normalizedValue % bitsPerChar]; 57 if ((m_filter[normalizedValue / bitsPerChar] & bit) == 0) 58 return false; 59 } 60 return true; 61 } 62 insert(const char * kmer)63 void insert(const char* kmer) 64 { 65 uint64_t hVal = NTC64(kmer, m_kmerSize); 66 size_t normalizedValue = hVal % m_size; 67 __sync_fetch_and_or( 68 &m_filter[normalizedValue / bitsPerChar], bitMask[normalizedValue % bitsPerChar]); 69 for (unsigned i = 1; i < m_hashNum; i++) { 70 size_t normalizedValue = NTE64(hVal, m_kmerSize, i) % m_size; 71 __sync_fetch_and_or( 72 &m_filter[normalizedValue / bitsPerChar], bitMask[normalizedValue % bitsPerChar]); 73 } 74 } 75 }; 76 77 #endif /* KMERBLOOMFILTER_H_ */ 78