1 #include <sys/types.h>  /* for 'open' */
2 #include <sys/stat.h>   /* for 'open' */
3 #include <fcntl.h>      /* for 'open' */
4 #include <stdlib.h>     /* for 'malloc' */
5 #include <stdio.h>      /* for 'printf' */
6 #include <unistd.h>     /* for 'read' */
7 #include <errno.h>      /* for 'sterror' */
8 #include <sys/time.h>   /* for 'gettimeofday' */
9 #include "uthash.h"
10 
11 #undef uthash_noexpand_fyi
12 #define uthash_noexpand_fyi(t) die()
13 #define UNALIGNED_KEYS 0
14 
die()15 static void die()
16 {
17     fprintf(stderr,"expansion inhibited\n");
18     exit(-1);
19 }
20 
21 /* Windows doesn't have gettimeofday. While Cygwin and some
22  * versions of MinGW supply one, it is very coarse. This substitute
23  * gives much more accurate elapsed times under Windows. */
24 #if (( defined __CYGWIN__ ) || ( defined __MINGW32__ ))
25 #include <windows.h>
win_gettimeofday(struct timeval * p,void * tz)26 static void win_gettimeofday(struct timeval* p, void* tz /* IGNORED */)
27 {
28     LARGE_INTEGER q;
29     static long long freq;
30     static long long cyg_timer;
31     QueryPerformanceFrequency(&q);
32     freq = q.QuadPart;
33     QueryPerformanceCounter(&q);
34     cyg_timer = q.QuadPart;
35     p->tv_sec = (long)(cyg_timer / freq);
36     p->tv_usec = (long)(((cyg_timer % freq) * 1000000) / freq);
37 }
38 #define gettimeofday win_gettimeofday
39 #define MODE (O_RDONLY|O_BINARY)
40 #else
41 #define MODE (O_RDONLY)
42 #endif
43 
44 #ifndef timersub
45 #define timersub(a, b, result)                                                \
46   do {                                                                        \
47     (result)->tv_sec = (a)->tv_sec - (b)->tv_sec;                             \
48     (result)->tv_usec = (a)->tv_usec - (b)->tv_usec;                          \
49     if ((result)->tv_usec < 0) {                                              \
50       --(result)->tv_sec;                                                     \
51       (result)->tv_usec += 1000000;                                           \
52     }                                                                         \
53   } while (0)
54 #endif
55 
56 typedef struct stat_key {
57     char *key;
58     unsigned len;
59     UT_hash_handle hh, hh2;
60 } stat_key;
61 
62 #define CHAIN_0  0
63 #define CHAIN_5  1
64 #define CHAIN_10  2
65 #define CHAIN_20  3
66 #define CHAIN_100 4
67 #define CHAIN_MAX 5
hash_chain_len_histogram(const UT_hash_table * tbl)68 static void hash_chain_len_histogram(const UT_hash_table *tbl)
69 {
70     unsigned i, bkt_hist[CHAIN_MAX+1];
71     double pct = 100.0/(double)tbl->num_buckets;
72     memset(bkt_hist,0,sizeof(bkt_hist));
73     for(i=0; i < tbl->num_buckets; i++) {
74         unsigned count = tbl->buckets[i].count;
75         if (count == 0U) {
76             bkt_hist[CHAIN_0]++;
77         } else if (count < 5U) {
78             bkt_hist[CHAIN_5]++;
79         } else if (count < 10U) {
80             bkt_hist[CHAIN_10]++;
81         } else if (count < 20U) {
82             bkt_hist[CHAIN_20]++;
83         } else if (count < 100U) {
84             bkt_hist[CHAIN_100]++;
85         } else {
86             bkt_hist[CHAIN_MAX]++;
87         }
88     }
89     fprintf(stderr, "Buckets with     0 items: %.1f%%\n", (double)bkt_hist[CHAIN_0 ]*pct);
90     fprintf(stderr, "Buckets with <   5 items: %.1f%%\n", (double)bkt_hist[CHAIN_5 ]*pct);
91     fprintf(stderr, "Buckets with <  10 items: %.1f%%\n", (double)bkt_hist[CHAIN_10]*pct);
92     fprintf(stderr, "Buckets with <  20 items: %.1f%%\n", (double)bkt_hist[CHAIN_20]*pct);
93     fprintf(stderr, "Buckets with < 100 items: %.1f%%\n", (double)bkt_hist[CHAIN_100]*pct);
94     fprintf(stderr, "Buckets with > 100 items: %.1f%%\n", (double)bkt_hist[CHAIN_MAX]*pct);
95 }
96 
main(int argc,char * argv[])97 int main(int argc, char *argv[])
98 {
99     int dups=0, rc, fd, done=0, err=0, want, i, padding=0, v=1, percent=100;
100     unsigned keylen, max_keylen=0, verbose=0;
101     const char *filename = "/dev/stdin";
102     char *dst;
103     stat_key *keyt, *keytmp, *keys=NULL, *keys2=NULL;
104     struct timeval start_tm, end_tm, elapsed_tm, elapsed_tm2, elapsed_tm3;
105 
106     if ((argc >= 3) && (strcmp(argv[1],"-p") == 0)) {
107         percent = atoi(argv[2]);
108         v = 3;
109     }
110     if ((v < argc) && (strcmp(argv[v],"-v") == 0)) {
111         verbose=1;
112         v++;
113     }
114     if (v < argc) {
115         filename=argv[v];
116     }
117     fd=open(filename,MODE);
118 
119     if ( fd == -1 ) {
120         fprintf(stderr,"open failed %s: %s\n", filename, strerror(errno));
121         return -1;
122     }
123 
124     for(i=0; done==0; i++) {
125 
126         want = sizeof(int);
127         dst = (char*)&keylen;
128 readmore1:
129         rc = read(fd,dst,want);
130         if (rc != want) {
131             if (rc == 0) {
132                 done=1;
133             } else if (rc == -1) {
134                 fprintf(stderr,"read failed: %s\n", strerror(errno));
135                 err=1;
136             } else if (rc > 0) {
137                 want -= rc;
138                 dst += rc;
139                 goto readmore1;
140             }
141         }
142 
143         if (done || err) {
144             break;
145         }
146         if (keylen > max_keylen) {
147             max_keylen=keylen;
148         }
149 
150         keyt = (stat_key*)malloc(sizeof(stat_key));
151         if (keyt == NULL) {
152             fprintf(stderr,"out of memory\n");
153             exit(-1);
154         }
155 
156         /* read key */
157 #ifdef UNALIGNED_KEYS
158         padding = i%8;
159 #endif
160         keyt->key = (char*)malloc(padding+keylen);
161         if (keyt->key == NULL) {
162             fprintf(stderr,"out of memory\n");
163             exit(-1);
164         }
165         keyt->key += padding; /* forcibly alter the alignment of key */
166         keyt->len = keylen;
167 
168         want = keylen;
169         dst = keyt->key;
170 readmore2:
171         rc = read(fd,dst,want);
172         if (rc != want) {
173             if (rc == -1) {
174                 fprintf(stderr,"read failed: %s\n", strerror(errno));
175                 err=1;
176             } else if (rc == 0) {
177                 fprintf(stderr,"incomplete file\n");
178                 err=1;
179             } else if (rc >= 0) {
180                 want -= rc;
181                 dst += rc;
182                 goto readmore2;
183             }
184         }
185         if (err != 0) {
186             break;
187         }
188         /* if percent was set to something less than 100%, skip some keys*/
189         if (((rand()*1.0) / RAND_MAX) > ((percent*1.0)/100)) {
190             free(keyt->key-padding);
191             free(keyt);
192             continue;
193         }
194 
195         /* eliminate dups */
196         HASH_FIND(hh,keys,keyt->key,keylen,keytmp);
197         if (keytmp != NULL) {
198             dups++;
199             free(keyt->key - padding);
200             free(keyt);
201         } else {
202             HASH_ADD_KEYPTR(hh,keys,keyt->key,keylen,keyt);
203         }
204     }
205 
206     if (verbose != 0) {
207         unsigned key_count = HASH_COUNT(keys);
208         fprintf(stderr,"max key length: %u\n", max_keylen);
209         fprintf(stderr,"number unique keys: %u\n", key_count);
210         fprintf(stderr,"keystats memory: %u\n",
211                 (unsigned)((sizeof(stat_key)+max_keylen)*key_count));
212         hash_chain_len_histogram(keys->hh.tbl);
213     }
214 
215     /* add all keys to a new hash, so we can measure add time w/o malloc */
216     gettimeofday(&start_tm,NULL);
217     for(keyt = keys; keyt != NULL; keyt=(stat_key*)keyt->hh.next) {
218         HASH_ADD_KEYPTR(hh2,keys2,keyt->key,keyt->len,keyt);
219     }
220     gettimeofday(&end_tm,NULL);
221     timersub(&end_tm, &start_tm, &elapsed_tm);
222 
223     /* now look up all keys in the new hash, again measuring elapsed time */
224     gettimeofday(&start_tm,NULL);
225     for(keyt = keys; keyt != NULL; keyt=(stat_key*)keyt->hh.next) {
226         HASH_FIND(hh2,keys2,keyt->key,keyt->len,keytmp);
227         if (keytmp == NULL) {
228             fprintf(stderr,"internal error, key not found\n");
229         }
230     }
231     gettimeofday(&end_tm,NULL);
232     timersub(&end_tm, &start_tm, &elapsed_tm2);
233 
234     /* now delete all items in the new hash, measuring elapsed time */
235     gettimeofday(&start_tm,NULL);
236     while (keys2 != NULL) {
237         keytmp = keys2;
238         HASH_DELETE(hh2,keys2,keytmp);
239     }
240     gettimeofday(&end_tm,NULL);
241     timersub(&end_tm, &start_tm, &elapsed_tm3);
242 
243     if (err == 0) {
244         printf("%.3f,%u,%u,%d,%s,%ld,%ld,%ld\n",
245                1-(1.0*keys->hh.tbl->nonideal_items/keys->hh.tbl->num_items),
246                keys->hh.tbl->num_items,
247                keys->hh.tbl->num_buckets,
248                dups,
249                (keys->hh.tbl->noexpand != 0U) ? "nx" : "ok",
250                (elapsed_tm.tv_sec * 1000000) + elapsed_tm.tv_usec,
251                (elapsed_tm2.tv_sec * 1000000) + elapsed_tm2.tv_usec,
252                (elapsed_tm3.tv_sec * 1000000) + elapsed_tm3.tv_usec );
253     }
254     return 0;
255 }
256