1 /* SuperRead pipeline
2  * Copyright (C) 2012  Genome group at University of Maryland.
3  *
4  * This program is free software: you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation, either version 3 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 
19 #include <stdlib.h>
20 #include <algorithm>
21 #include <utility>
22 #include <vector>
23 #include <string>
24 #include <gtest/gtest.h>
25 #include <charb.hpp>
26 #include <src/bloom_counter2.hpp>
27 
28 typedef std::pair<std::string,unsigned int> elt;
29 struct parameter {
30   size_t nb_strings;
31   size_t nb_inserts;
32   double error_rate;
33 };
34 class BloomCounter : public ::testing::TestWithParam<parameter> { };
35 
TEST_P(BloomCounter,FalsePositive)36 TEST_P(BloomCounter, FalsePositive) {
37   const size_t nb_inserts = GetParam().nb_inserts;
38   const double error_rate = GetParam().error_rate;
39   const size_t nb_strings = GetParam().nb_strings;
40   static const size_t str_len    = 100;
41   std::vector<elt>    counts;
42   charb               str(str_len);
43   str[str_len] = '\0';
44 
45   for(size_t i = 0; i < nb_strings; ++i) {
46     for(size_t j = 0; j < str_len; ++j)
47       str[j] = 'A' + (random() % 26);
48     counts.push_back(std::make_pair((char*)str, (unsigned int)0));
49   }
50 
51   bloom_counter2<const char *> bc1(error_rate, nb_inserts);
52   bloom_counter2<const char *> bc2(error_rate, nb_inserts);
53   bloom_counter2<const char *> bc3(error_rate, nb_inserts);
54 
55   size_t nb_errors   = 0;
56   size_t collisions2 = 0;
57   size_t collisions3 = 0;
58 
59   for(size_t i = 0; i < nb_inserts; ++i) {
60     std::vector<elt>::reference ref = counts[random() % nb_strings];
61     ++ref.second;
62     nb_errors += bc1.insert(ref.first.c_str()) >= ref.second;
63     bc2[ref.first.c_str()]++;
64     ++bc3[ref.first.c_str()];
65   }
66 
67   // Check known strings
68   for(size_t i = 0; i < nb_strings; ++i) {
69     std::vector<elt>::reference ref = counts[i];
70     unsigned int expected = std::min(ref.second, (unsigned int)2);
71     unsigned int actual = bc1.check(ref.first.c_str());
72     EXPECT_LE(expected, actual);
73     if(expected != actual)
74       ++nb_errors;
75     if(expected != *bc2[ref.first.c_str()])
76       ++collisions2;
77     if(expected != *bc3[ref.first.c_str()])
78       ++collisions3;
79   }
80   EXPECT_GT(error_rate * nb_strings, nb_errors);
81   EXPECT_GT(error_rate * nb_strings, collisions2);
82   EXPECT_GT(error_rate * nb_strings, collisions3);
83 
84   nb_errors = collisions2 = collisions3 = 0;
85   // Check unknown strings
86   for(size_t i = 0; i < nb_inserts; ++i) {
87     for(size_t j = 0; j < str_len; ++j)
88       str[j] = '0' + (random() % 10);
89     if(bc1.check(str) > 0)
90       ++nb_errors;
91     if(bc2[str] > 0)
92       ++collisions2;
93     if(bc3[str] > 0)
94       ++collisions3;
95   }
96   EXPECT_GT(2 * error_rate * nb_inserts, nb_errors);
97   EXPECT_GT(2 * error_rate * nb_inserts, collisions2);
98   EXPECT_GT(2 * error_rate * nb_inserts, collisions3);
99 }
100 
101 INSTANTIATE_TEST_CASE_P(BloomCounterTest, BloomCounter,
102                         ::testing::Values(parameter({    1024,    2048, 0.01 }),
103                                           //                                          parameter({ 1000000, 3000000, 0.01 })));
104                                           parameter({   10000,   30000, 0.01 })));
105