/* * * CBFilter.hpp * Author: Hamid Mohamadi * Genome Sciences Centre, * British Columbia Cancer Agency */ #ifndef CBFILTER_H_ #define CBFILTER_H_ #include #include #include #include #include #include #include #include #include #include #include #include "nthash.hpp" using namespace std; class CBFilter { public: CBFilter(size_t filterSize, unsigned hashNum, unsigned kmerSize, unsigned repCap): m_size(filterSize), m_hashNum(hashNum), m_kmerSize(kmerSize), m_reCap(repCap) { m_filter = new uint32_t [m_size](); } bool insert_and_test(const uint64_t hVal) { bool greaterFlag = true; const size_t bucket_idx = hVal % m_size; uint32_t bucket = m_filter[bucket_idx]; const int idx1=hVal&0x3, idx2=hVal>>2&0x3; /*cerr << bucket << endl; cerr << m_filter[bucket_idx] << endl; cerr << idx1 << endl; cerr << idx2 << endl;*/ uint8_t c1c = bucket>>8*idx1; uint8_t c2c = bucket>>8*idx2; if (c1c < m_reCap || c2c < m_reCap) { greaterFlag = false; if (c1c == c2c){ ++c1c; ++ c2c; m_filter[bucket_idx] |= ((((uint32_t)c1c)<<8*idx1) | (((uint32_t)c2c)<<8*idx2)); } else { if (c1c < c2c) { ++c1c; m_filter[bucket_idx] |= (((uint32_t)c1c)<<8*idx1); } else { ++c2c; m_filter[bucket_idx] |= (((uint32_t)c2c)<<8*idx2); } } } //cerr << m_filter[bucket_idx] << endl; //exit(0); /*unsigned minCount = 256; bool greaterFlag = true; for (unsigned i = 0; i < m_hashNum; i++) { size_t hLoc = hVal[i] % m_size; if(m_filter[hLoc] < m_reCap) { if(m_filter[hLoc] < minCount) minCount = m_filter[hLoc]; greaterFlag = false; } } if(!greaterFlag) { for (unsigned i = 0; i < m_hashNum; i++) { size_t hLoc = hVal[i] % m_size; if(m_filter[hLoc] == minCount) { //#pragma omp atomic ++m_filter[hLoc]; } } }*/ return greaterFlag; } ~CBFilter() { delete[] m_filter; } private: CBFilter(const CBFilter& that); //to prevent copy construction uint32_t *m_filter; size_t m_size; unsigned m_hashNum; unsigned m_kmerSize; unsigned m_reCap; }; #endif /* CBFILTER_H_ */