1 /* SuperRead pipeline
2 * Copyright (C) 2012 Genome group at University of Maryland.
3 *
4 * This program is free software: you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation, either version 3 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18
19 #include <stdlib.h>
20 #include <algorithm>
21 #include <utility>
22 #include <vector>
23 #include <string>
24 #include <gtest/gtest.h>
25 #include <charb.hpp>
26 #include <src/bloom_counter2.hpp>
27
28 typedef std::pair<std::string,unsigned int> elt;
29 struct parameter {
30 size_t nb_strings;
31 size_t nb_inserts;
32 double error_rate;
33 };
34 class BloomCounter : public ::testing::TestWithParam<parameter> { };
35
TEST_P(BloomCounter,FalsePositive)36 TEST_P(BloomCounter, FalsePositive) {
37 const size_t nb_inserts = GetParam().nb_inserts;
38 const double error_rate = GetParam().error_rate;
39 const size_t nb_strings = GetParam().nb_strings;
40 static const size_t str_len = 100;
41 std::vector<elt> counts;
42 charb str(str_len);
43 str[str_len] = '\0';
44
45 for(size_t i = 0; i < nb_strings; ++i) {
46 for(size_t j = 0; j < str_len; ++j)
47 str[j] = 'A' + (random() % 26);
48 counts.push_back(std::make_pair((char*)str, (unsigned int)0));
49 }
50
51 bloom_counter2<const char *> bc1(error_rate, nb_inserts);
52 bloom_counter2<const char *> bc2(error_rate, nb_inserts);
53 bloom_counter2<const char *> bc3(error_rate, nb_inserts);
54
55 size_t nb_errors = 0;
56 size_t collisions2 = 0;
57 size_t collisions3 = 0;
58
59 for(size_t i = 0; i < nb_inserts; ++i) {
60 std::vector<elt>::reference ref = counts[random() % nb_strings];
61 ++ref.second;
62 nb_errors += bc1.insert(ref.first.c_str()) >= ref.second;
63 bc2[ref.first.c_str()]++;
64 ++bc3[ref.first.c_str()];
65 }
66
67 // Check known strings
68 for(size_t i = 0; i < nb_strings; ++i) {
69 std::vector<elt>::reference ref = counts[i];
70 unsigned int expected = std::min(ref.second, (unsigned int)2);
71 unsigned int actual = bc1.check(ref.first.c_str());
72 EXPECT_LE(expected, actual);
73 if(expected != actual)
74 ++nb_errors;
75 if(expected != *bc2[ref.first.c_str()])
76 ++collisions2;
77 if(expected != *bc3[ref.first.c_str()])
78 ++collisions3;
79 }
80 EXPECT_GT(error_rate * nb_strings, nb_errors);
81 EXPECT_GT(error_rate * nb_strings, collisions2);
82 EXPECT_GT(error_rate * nb_strings, collisions3);
83
84 nb_errors = collisions2 = collisions3 = 0;
85 // Check unknown strings
86 for(size_t i = 0; i < nb_inserts; ++i) {
87 for(size_t j = 0; j < str_len; ++j)
88 str[j] = '0' + (random() % 10);
89 if(bc1.check(str) > 0)
90 ++nb_errors;
91 if(bc2[str] > 0)
92 ++collisions2;
93 if(bc3[str] > 0)
94 ++collisions3;
95 }
96 EXPECT_GT(2 * error_rate * nb_inserts, nb_errors);
97 EXPECT_GT(2 * error_rate * nb_inserts, collisions2);
98 EXPECT_GT(2 * error_rate * nb_inserts, collisions3);
99 }
100
101 INSTANTIATE_TEST_CASE_P(BloomCounterTest, BloomCounter,
102 ::testing::Values(parameter({ 1024, 2048, 0.01 }),
103 // parameter({ 1000000, 3000000, 0.01 })));
104 parameter({ 10000, 30000, 0.01 })));
105