1#include <config.h> 2 3#include <sys/types.h> 4#include <sys/stat.h> 5 6#include <errno.h> 7#include <stdio.h> 8#include <stdlib.h> 9#include <string.h> 10#include <unistd.h> 11 12#include "cunit/cyrunit.h" 13#include "lib/util.h" 14#include "lib/strhash.h" 15 16extern int verbose; 17 18static const char *repeat(int c, unsigned n) 19{ 20 static char buf[1024]; 21 char *p = buf; 22 23 /* always leave room for a \0 */ 24 if (n >= sizeof(buf)) 25 n = sizeof(buf) - 1; 26 27 memset(buf, 0, sizeof(buf)); 28 while (n--) { 29 *p++ = c; 30 } 31 32 return buf; 33} 34 35static void test_repeated(void) 36{ 37 /* repeated chars on the end should not obliterate earlier input */ 38 unsigned suffix_lengths[] = { 15, 31, 63, 127, 255, 511, 1023 }; 39 unsigned i; 40 41 for (i = 0; i < sizeof(suffix_lengths) / sizeof(suffix_lengths[0]); i++) { 42 char *cat = strconcat("cat", repeat('a', suffix_lengths[i]), NULL); 43 char *dog = strconcat("dog", repeat('a', suffix_lengths[i]), NULL); 44 char *mouse = strconcat("mouse", repeat('a', suffix_lengths[i]), NULL); 45 46 unsigned xcat = strhash(cat); 47 unsigned xdog = strhash(dog); 48 unsigned xmouse = strhash(mouse); 49 50 CU_ASSERT_NOT_EQUAL(xcat, xdog); 51 CU_ASSERT_NOT_EQUAL(xdog, xmouse); 52 CU_ASSERT_NOT_EQUAL(xmouse, xcat); 53 54 free(cat); 55 free(dog); 56 free(mouse); 57 } 58} 59 60static void test_seeded(void) 61{ 62 const char *const words[] = { "lorem", "ipsum", "dolor", "sit", "amet" }; 63 const size_t n_words = sizeof(words) / sizeof(words[0]); 64 unsigned hashes[n_words]; 65 unsigned i, j; 66 67 memset(hashes, 0, sizeof(hashes)); 68 69 /* with no seed, same input should produce same hash */ 70 for (i = 0; i < n_words; i++) { 71 unsigned h1 = strhash(words[i]); 72 unsigned h2 = strhash(words[i]); 73 CU_ASSERT_EQUAL(h1, h2); 74 } 75 76 /* with explicit zero seed, same input should produce same hash */ 77 for (i = 0; i < n_words; i++) { 78 unsigned h1 = strhash(words[i]); 79 unsigned h2 = strhash_seeded(0, words[i]); 80 unsigned h3 = strhash_seeded(0, words[i]); 81 CU_ASSERT_EQUAL(h1, h2); 82 CU_ASSERT_EQUAL(h2, h3); 83 CU_ASSERT_EQUAL(h3, h1); 84 } 85 86 /* with some seed, same input should produce same hash */ 87 for (j = 0; j < 5; j++) { 88 uint32_t seed; 89 do { 90 seed = rand(); 91 } while (seed == 0); 92 93 for (i = 0; i < n_words; i++) { 94 unsigned h1 = strhash_seeded(seed, words[i]); 95 unsigned h2 = strhash_seeded(seed, words[i]); 96 CU_ASSERT_EQUAL(h1, h2); 97 } 98 } 99 100 /* with different seed, same input should produce different hash */ 101 for (i = 0; i < n_words; i++) { 102 uint32_t seed1, seed2; 103 do { 104 seed1 = rand(); 105 seed2 = rand(); 106 } while (seed1 == 0 || seed2 == 0 || seed1 == seed2); 107 108 unsigned h1 = strhash_seeded(seed1, words[i]); 109 unsigned h2 = strhash_seeded(seed2, words[i]); 110 111 CU_ASSERT_NOT_EQUAL(h1, h2); 112 } 113} 114 115/* We can't define-out an entire test function when a feature is missing 116 * (in this case getline), because it confuses cunit.pl. So instead we 117 * make sure it will at least compile, but then return early without doing 118 * anything if the feature we wanted was missing. 119 */ 120#ifndef HAVE_GETLINE 121#define getline(a,b,c) (((void)(b)), -1) 122#endif 123 124#define NBUCKETS (0x10000) 125static void test_quality(void) 126{ 127 const char *wordsfile = "/usr/share/dict/words"; 128 unsigned buckets[NBUCKETS] = {0}; 129 FILE *stream; 130 char *line = NULL; 131 size_t len = 0; 132 ssize_t nread; 133 unsigned i; 134 unsigned inputs = 0; 135 unsigned contains_none = 0; 136 unsigned contains_one = 0; 137 unsigned contains_many = 0; 138 unsigned contains_many_sum = 0; 139 unsigned highest_count = 0; 140 unsigned highest_count_freq = 0; 141 unsigned max_acceptable_count; 142 double load; 143 144#ifndef HAVE_GETLINE 145 /* can't do anything anyway */ 146 return; 147#endif 148 149 stream = fopen(wordsfile, "r"); 150 if (!stream) { 151 if (verbose) 152 fprintf(stderr, "%s: %s (skipping) ", wordsfile, strerror(errno)); 153 return; 154 } 155 156 while ((nread = getline(&line, &len, stream)) != -1) { 157 /* chomp */ 158 if (line[nread - 1] == '\n') 159 line[nread - 1] = '\0'; 160 161 unsigned hash = strhash_seeded_djb2(0, line) % NBUCKETS; 162 163 buckets[hash]++; 164 inputs++; 165 } 166 free(line); 167 168 /* arbitrary declaration of quality: no buckets should have more 169 * than ten times the expected load 170 */ 171 load = inputs * 1.0 / NBUCKETS; 172 max_acceptable_count = load * 10; 173 174 unsigned bucket_counts[max_acceptable_count + 2]; 175 memset(bucket_counts, 0, sizeof(bucket_counts)); 176 177 for (i = 0; i < NBUCKETS; i++) { 178 switch (buckets[i]) { 179 case 0: 180 contains_none++; 181 break; 182 case 1: 183 contains_one++; 184 break; 185 default: 186 contains_many++; 187 contains_many_sum += buckets[i]; 188 break; 189 } 190 191 if (buckets[i] > max_acceptable_count) { 192 bucket_counts[max_acceptable_count+1]++; 193 } 194 else { 195 bucket_counts[buckets[i]]++; 196 } 197 198 if (buckets[i] > highest_count) { 199 highest_count = buckets[i]; 200 highest_count_freq = 1; 201 } 202 else if (buckets[i] == highest_count) { 203 highest_count_freq++; 204 } 205 } 206 207 if (verbose) { 208 putc('\n', stderr); 209 fprintf(stderr, "buckets: %u inputs: %u load: %g\n", 210 NBUCKETS, inputs, load); 211 fprintf(stderr, "empty: %u unique: %u busy: %u\n", 212 contains_none, contains_one, contains_many); 213 fprintf(stderr, "avg count in busy buckets: %g\n", 214 contains_many_sum * 1.0 / contains_many); 215 fprintf(stderr, "busiest %u buckets contain %u each\n", 216 highest_count_freq, highest_count); 217 fprintf(stderr, "max acceptable count: %u\n", max_acceptable_count); 218 fprintf(stderr, "\nbucket count histogram:\ncount frequency\n"); 219 for (i = 0; i <= max_acceptable_count; i++) { 220 fprintf(stderr, "%4u %u\n", i, bucket_counts[i]); 221 } 222 fprintf(stderr, "%4u+ %u\n", max_acceptable_count + 1, 223 bucket_counts[max_acceptable_count + 1]); 224 } 225 226 CU_ASSERT_EQUAL(bucket_counts[max_acceptable_count + 1], 0); 227} 228#undef NBUCKETS 229 230/* vim: set ft=c: */ 231