1 /*
2 * copyright 2010-2012 Edscott Wilson Garcia (GPL-license)
3 *
4 *
5 * Tests on a 4 GB ram box.
6 * Step 1: Create a database with over 4M records. Total key size > 4GB. Table size > 40GB
7 * Check code, can DBH handle this?
8 *
9 *
10 * This is very simple example program to test 64 bit
11 * functions of the Disk Based Hash (DBH) and
12 * verify correct handling of dbh files greater than
13 * 2 Gb in size (up to 256^8/2).
14
15 * A dbh file is created from a specified filesystem.
16 * Paths are indexed with g_string hash key
17 * Hash key collisions are noted in dbh file COLLISIONS
18 * path => Hash key<->path associations are noted in dbh file INDEX
19 * Hash key => file are noted in dbh file TABLE
20 *
21 * usage: ./filesystem path option
22 * Option can be:
23 * "index" (create INDEX, COLLISIONS and TABLE dbh files)
24 * "dump" (do a foreach on all records and print summary)
25 * "regen" (recreate TABLE dbh file with optimized fisical structure)
26 * "compare" (compare each file in TABLE with actual file on disk)
27 * "parallel"
28 * "thread"
29 * "fulltest" (all of the above)
30
31 */
32 #include "config.h"
33 #define _GNU_SOURCE /* See feature_test_macros(7) */
34 #include <features.h>
35 #include <pthread.h>
36
37 #ifdef HAVE_LSTAT
38 # define LSTAT lstat
39 #else
40 # define LSTAT stat
41 #endif
42 #include <string.h>
43 #include <stdlib.h>
44 #include <stdio.h>
45 #include <dbh.h>
46 #include <dirent.h>
47 #include <sys/types.h>
48 #include <inttypes.h>
49
50 #ifdef HAVE_GDBM_H
51 # include <gdbm.h>
52 #endif
53
54 #ifdef HAVE_SYS_WAIT_H
55 # include <sys/wait.h>
56 #endif
57
58 #ifdef HAVE_SYS_RESOURCE_H
59 #include <sys/time.h>
60 #include <sys/resource.h>
61 #endif
62
63 #include <sys/stat.h>
64 #include <fcntl.h>
65 #include <unistd.h>
66 #include <errno.h>
67 #ifdef HAVE_WINDOWS_H
68 #include <windows.h>
69 #endif
70 #ifndef O_BINARY
71 #define O_BINARY 0x0
72 #endif
73
74
75 #include <glib.h>
76
77 #define SKIP_DIR "/home"
78 #define DIRECTORY "/home/edscott/testfiles/"
79 const gchar *directory=DIRECTORY;
80
81 #define RANDOM_LIST DIRECTORY"randomlist.txt"
82
83 #define COLLISIONS DIRECTORY"performance.collisions.dbh"
84 #define TABLE DIRECTORY"performance.table.dbh"
85
86 #define QINDEX DIRECTORY"performance.qindex.dbh"
87 #define QTABLE DIRECTORY"performance.qtable.dbh"
88
89 #define GCOLLISIONS DIRECTORY"performance.gcollisions.dbf"
90 #define GTABLE DIRECTORY"performance.gtable.dbf"
91
92 #define REBUILT DIRECTORY"performance.index.rebuilt.dbh"
93 #define TEST_INDEX DIRECTORY"parperformance.index.dbh"
94
95 #define HELP \
96 " Options:\n"\
97 " create: Create table files of items within the specified \"path\"\n"\
98 " random: Create a random list for tests\n"\
99 " regen: Regenerate the DBH table (sweep/fanout)\n"\
100 " test: Random r/w tests\n"\
101 " *To test a DBH table larger than 4 GB, choose a \"path\" with more than 4GB.\n"\
102 " Do not alter any item within \"path\" during the test or error will occur."
103
104
105 typedef struct dump_t{
106 int original_count;
107 long long original_sum;
108 long long sum;
109 int which;
110 int count;
111 gint natural;
112 }dump_t;
113
114
115 static
get_hash_key(unsigned char bucket,const char * pre_key)116 gchar *get_hash_key(unsigned char bucket, const char *pre_key){
117 GString *gs = g_string_new(pre_key);
118 gchar *key;
119 key=g_strdup_printf("%c%10u", bucket, g_string_hash(gs));
120 g_string_free(gs, TRUE);
121 return key;
122 }
123
skip_msg(const gchar * path,const gchar * file,const gchar * reason)124 static gchar *skip_msg(const gchar *path, const gchar *file, const gchar *reason){
125 if (reason) fprintf(stderr, "skipping \"%s/%s\" (%s)\n",path, file, reason);
126 return NULL;
127 }
128
out(long count,long walltime)129 void out(long count, long walltime){
130 #ifdef HAVE_SYS_RESOURCE_H
131 struct rusage usage;
132 if (getrusage(RUSAGE_SELF, &usage)){
133 fprintf(stderr, "rusage(): %s\n", strerror(errno));
134 return;
135 }
136 if (!count) {
137 fprintf(stdout, "# count walltime usertime systime resident shared data stack page_r page_f swaps block_i block_o\n");
138 return;
139 }
140 // usertime systime resident shared data stack page_r page_f swaps block_i block_o
141 //fprintf(stdout, "%d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld \n",
142 fprintf(stdout, "%ld", count);
143 fprintf(stdout, "\t%ld", walltime);
144 fprintf(stdout, "\t%ld", (long)usage.ru_utime.tv_sec);
145 fprintf(stdout, "\t%ld", (long)usage.ru_stime.tv_sec);
146 fprintf(stdout, "\t%ld", (long)usage.ru_maxrss);
147 fprintf(stdout, "\t%ld", (long)usage.ru_ixrss);
148 fprintf(stdout, "\t%ld", usage.ru_idrss);
149 fprintf(stdout, "\t%ld", usage.ru_isrss);
150 fprintf(stdout, "\t%ld", usage.ru_minflt);
151 fprintf(stdout, "\t%ld", usage.ru_majflt);
152 fprintf(stdout, "\t%ld", usage.ru_nswap);
153 fprintf(stdout, "\t%ld", usage.ru_inblock);
154 fprintf(stdout, "\t%ld", usage.ru_oublock);
155 fprintf(stdout, "\n");
156 fflush(stdout);
157 #endif
158 return;
159 }
160 static gchar *
get_fullpath(const gchar * path,struct dirent * d,struct stat * st)161 get_fullpath(const gchar *path, struct dirent *d, struct stat *st){
162 if(strcmp(d->d_name, ".")==0) return NULL; //skip_msg(path, d->d_name, NULL);
163 if(strcmp(d->d_name, "..")==0) return NULL; //skip_msg(path, d->d_name, NULL);
164 if(strstr(d->d_name, ".dbh")) return NULL; //skip_msg(path, d->d_name, NULL);
165 if(strstr(d->d_name, ".dbf")) return NULL; //skip_msg(path, d->d_name, NULL);
166 gchar *fullpath=g_build_filename(path,d->d_name,NULL);
167 if (LSTAT(fullpath,st)<0 ){
168 g_free(fullpath);
169 return skip_msg(path, d->d_name, "cannot stat");
170 }
171 if (st->st_size == 0) {
172 g_free(fullpath);
173 return skip_msg(path, d->d_name, NULL);
174 return skip_msg(path, d->d_name, "st->st_size == 0");
175 }
176 // Let's put a 0.2 MB limit for recordsize in the test.
177 if (st->st_size > 200000LL) {
178 g_free(fullpath);
179
180 return skip_msg(path, d->d_name, NULL);
181 // return skip_msg(path, d->d_name, "file is too big (this is arbitrary)");
182 }
183 if (!S_ISREG(st->st_mode) && !S_ISDIR(st->st_mode)) {
184 g_free(fullpath);
185 return NULL; //skip_msg(path, d->d_name, "non regular file");
186 }
187 return fullpath;
188 }
189 //////////////////////////////////////////////////////////////////////
190
191
192 static int
check_filesystem(const char * path,size_t limit,size_t * records_p,size_t * size_p,size_t * key_storage_p)193 check_filesystem(const char *path, size_t limit, size_t *records_p, size_t *size_p, size_t *key_storage_p)
194 {
195 DIR *directory;
196 struct dirent *d;
197
198 errno=0;
199 directory = opendir(path);
200 if(!directory) {
201 fprintf(stderr,"Cannot open %s (%s)\n" ,path, strerror(errno));
202 return -1;
203 }
204 #define _BSD_SOURCE 1
205
206 while((d = readdir(directory)) != NULL)
207 {
208
209 gboolean is_dir=FALSE;
210 struct stat st;
211
212 gchar *fullpath=get_fullpath(path, d, &st);
213 if (!fullpath) continue;
214
215 if (S_ISDIR(st.st_mode)) is_dir=TRUE;
216
217 if (is_dir) {
218 if (strcmp(fullpath, SKIP_DIR)){
219 int retval=0;
220 retval = check_filesystem(fullpath, limit, records_p, size_p, key_storage_p);
221 if (retval < 0) return -1;
222 }
223 } else if (st.st_size <= limit){
224 (*records_p)++;
225 (*size_p) += st.st_size;
226 (*key_storage_p) += (strlen(fullpath));
227 }
228
229
230 g_free(fullpath);
231 }
232 closedir(directory);
233 return (1);
234 }
235
236 //////////////////////////////////////////////////////////////////////
237
238 static int
qread_filesystem(DBHashTable * dbh,DBHashTable * index,const char * path,dump_t * dump_p)239 qread_filesystem(DBHashTable *dbh, DBHashTable *index, const char *path, dump_t *dump_p)
240 {
241 DIR *directory;
242 int count = 0;
243 struct dirent *d;
244
245
246 directory = opendir(path);
247 if(!directory) {
248 fprintf(stderr,"Cannot open %s\n" ,path);
249 return -1;
250 }
251
252 // We add 1 to q_number to save the null terminating char.
253 unsigned char q_number[DBH_KEYLENGTH(dbh)+1];
254 unsigned char q_key[DBH_KEYLENGTH(index)+1];
255 //fprintf(stderr, "keylength=%d\n", DBH_KEYLENGTH(index));
256
257 #define _BSD_SOURCE 1
258 while((d = readdir(directory)) != NULL)
259 {
260 // allocate fullpath...
261 struct stat st;
262
263 gchar *fullpath=get_fullpath(path, d, &st);
264 if (!fullpath) continue;
265
266 if (S_ISDIR(st.st_mode)) {
267 // skip value
268 if (strcmp(fullpath, SKIP_DIR)){
269 int retval;
270 retval = qread_filesystem(dbh, index, fullpath, dump_p);
271 if (retval > 0) count += retval;
272 }
273 } else { // not a dir.
274 // This is useful if our data size grows over 1024 B:
275 if (DBH_MAXIMUM_RECORD_SIZE(dbh) < st.st_size) {
276 dbh_set_size(dbh,st.st_size);
277 fprintf(stderr, "dbh_set_size set to %lld (%s)\n",(long long)st.st_size, fullpath);
278 }
279 // This is binary mode in unix.
280 int fd=open(fullpath,O_RDONLY|O_BINARY);
281 if (fd < 0) {
282 fprintf(stderr, "cannot open %s for read\n",fullpath);
283 g_free(fullpath); continue;
284 }
285 // This works instead of dbh_set_data():
286 // read data directly into DBH_DATA(dbh)
287 dbh_set_recordsize(dbh,st.st_size);
288 if (read(fd,DBH_DATA(dbh),st.st_size) < 0){
289 fprintf(stderr, "problem reading %lld bytes from %s\n",
290 (long long)st.st_size,fullpath);
291 close(fd);
292 g_free(fullpath); continue;
293 }
294 close(fd);
295
296 // index...
297 // set the index key
298 memset(q_key, 0, DBH_KEYLENGTH(index)+1);
299 strncpy((gchar *)q_key, fullpath, DBH_KEYLENGTH(index));
300 dbh_set_key(index, q_key);
301 // set the index data
302 memset(q_number, 0, DBH_KEYLENGTH(dbh)+1);
303 dbh_genkey(q_number, DBH_KEYLENGTH(dbh), dump_p->natural++);
304 gint recordsize = strlen((gchar *)q_number)+1;
305 //fprintf(stderr,"Recordsize: %d\n", recordsize);
306 dbh_set_recordsize(index, recordsize);
307 dbh_set_data(index, q_number, recordsize);
308 // update the index
309 //fprintf(stderr, "key: %s, data %s\n", (gchar *)DBH_KEY(index), (gchar *)DBH_DATA(index));
310 dbh_update(index);
311 //fprintf(stderr, "index q_number:\"%s\" data:\"%s\"\n", q_number, (char *)DBH_DATA(index));
312
313 // table...
314 // use the q number as the access key
315 dbh_set_key(dbh, q_number);
316 dbh_update(dbh);
317 dump_p->sum += st.st_size;
318 count++;
319 //fprintf(stdout,"%s: adding %ld bytes of data to table\n",fullpath, st.st_size);
320 //
321 }
322
323 g_free(fullpath);
324 }
325 closedir(directory);
326 // printf ("%s -> %d files\n",path,count);
327 return (count);
328 }
329
330 static int
read_filesystem(DBHashTable * dbh,DBHashTable * collisions,const char * path,dump_t * dump_p)331 read_filesystem(DBHashTable *dbh, DBHashTable *collisions, const char *path, dump_t *dump_p)
332 {
333 DIR *directory;
334 int count = 0;
335 struct dirent *d;
336
337 directory = opendir(path);
338 if(!directory) {
339 fprintf(stderr,"Cannot open %s\n" ,path);
340 return -1;
341 }
342 #define _BSD_SOURCE 1
343 while((d = readdir(directory)) != NULL)
344 {
345
346 gboolean is_dir=FALSE;
347 unsigned char bucket='A';
348 struct stat st;
349
350 gchar *fullpath=get_fullpath(path, d, &st);
351 if (!fullpath) continue;
352
353
354 // while hash key is already used, keep moving to next bucket.
355 gchar *key=NULL;
356 while (1) {
357 key=get_hash_key(bucket,fullpath);
358 dbh_set_key (dbh,(unsigned char *)key);
359 if (!dbh_load(dbh)) break;
360 fprintf(stderr, "HASH colision: %s -> %s\n", key,fullpath);
361 bucket++;
362 g_free(key);
363 }
364 g_free(key);
365
366
367
368
369 // If we are beyond first bucket, a collision has occured.
370 if (bucket > 'A'){
371 char collision_key[255];
372 memset(collision_key,0,255);
373 strncpy(collision_key,fullpath, 254);
374
375 dbh_set_key (collisions,(unsigned char *)collision_key);
376
377 dbh_set_size(collisions,DBH_KEYLENGTH(dbh));
378 dbh_set_data(collisions,(void *)DBH_KEY(dbh),DBH_KEYLENGTH(dbh));
379 dbh_update(collisions);
380 }
381
382
383
384 if (S_ISDIR(st.st_mode)) is_dir=TRUE;
385
386 if (!is_dir) {
387 // This is useful if our data size grows over 1024 B:
388 if (DBH_MAXIMUM_RECORD_SIZE(dbh) < st.st_size) {
389 dbh_set_size(dbh,st.st_size);
390 fprintf(stderr, "dbh_set_size set to %lld\n",(long long)st.st_size);
391 }
392 int fd=open(fullpath,O_RDONLY);
393 if (fd < 0) {
394 fprintf(stderr, "cannot open %s for read\n",fullpath);
395 g_free(fullpath); continue;
396 }
397 // This works instead of dbh_set_data():
398 if (read(fd,DBH_DATA(dbh),st.st_size) < 0){
399 fprintf(stderr, "problem reading %lld bytes from %s\n",
400 (long long)st.st_size,fullpath);
401 close(fd);
402 g_free(fullpath); continue;
403 }
404 close(fd);
405 dbh_set_recordsize(dbh,st.st_size);
406 dbh_update(dbh);
407 dump_p->sum += st.st_size;
408 count++;
409 //fprintf(stderr,"%s\n",fullpath);
410 //
411 }
412 if (is_dir) {
413 if (strcmp(fullpath, SKIP_DIR)){
414 int retval;
415 retval = read_filesystem(dbh,collisions, fullpath, dump_p);
416 if (retval > 0) count += retval;
417 }
418 }
419
420 g_free(fullpath);
421 }
422 closedir(directory);
423 // printf ("%s -> %d files\n",path,count);
424 return (count);
425 }
426
427
428 // Keep these variables global so that recursion won't gobble up all memory...
429 size_t data_size = 1024;
430 void *data_ptr = NULL;
431
432 static int
gread_filesystem(GDBM_FILE dbf,GDBM_FILE collisions,const char * path,dump_t * dump_p)433 gread_filesystem(GDBM_FILE dbf, GDBM_FILE collisions, const char *path, dump_t *dump_p)
434 {
435 DIR *directory;
436 int count = 0;
437 struct dirent *d;
438
439 directory = opendir(path);
440 if(!directory) {
441 fprintf(stderr,"Cannot open %s\n" ,path);
442 return -1;
443 }
444 if (!data_ptr) data_ptr = (void *)malloc(data_size);
445 if (!data_ptr) g_error("should not happen, malloc 1024\n");
446
447 gchar *key_s=NULL;
448 #define _BSD_SOURCE 1
449 while((d = readdir(directory)) != NULL)
450 {
451
452 gboolean is_dir=FALSE;
453 unsigned char bucket='A';
454 // allocate fullpath...
455 struct stat st;
456
457 gchar *fullpath=get_fullpath(path, d, &st);
458 if (!fullpath) continue;
459
460
461 // while hash key is already used, keep moving to next bucket.
462 datum key;
463 datum content;
464
465 g_free(key_s);
466 while (1) {
467 key_s=get_hash_key(bucket,fullpath);
468 // Set key
469 key.dptr = key_s;
470 key.dsize = 11;
471 // Does the key collide?
472 if (!gdbm_exists(dbf, key)) break;
473 fprintf(stderr, "HASH colision: %s -> %s\n", key_s, fullpath);
474 bucket++;
475 g_free(key_s);
476 }
477
478 // up above and on eoc: g_free(key_s);
479
480
481
482
483 // If we are beyond first bucket, a collision has occured.
484 if (bucket > 'A'){
485 char collision_key[255];
486 memset(collision_key,0,255);
487 strncpy(collision_key,fullpath, 254);
488
489 // set the collision key:
490 key.dptr = collision_key;
491 key.dsize = 254; // XXX we could probably do better here, using variable key size...
492
493 // set the data:
494 content.dptr = key_s;
495 content.dsize = 11;
496
497 // Update the record
498 gdbm_store(collisions, key, content, GDBM_INSERT);
499
500 }
501
502
503
504 if (S_ISDIR(st.st_mode)) is_dir=TRUE;
505
506 if (!is_dir) {
507
508 // This is useful if our data size grows over 1024 B:
509 if (data_size < st.st_size) {
510 g_free(data_ptr);
511 data_size = st.st_size;
512 data_ptr = (void *)malloc(data_size);
513 if (!data_ptr){
514 fprintf(stderr, "Cannot allocate %lld bytes for gdbm data_ptr. Terminating now...\n", (long long)data_size);
515 exit(1);
516 }
517 fprintf(stderr, "gdbm size set to %lld\n",(long long)st.st_size);
518 }
519 int fd=open(fullpath,O_RDONLY);
520 if (fd < 0) {
521 fprintf(stderr, "cannot open %s for read\n",fullpath);
522 g_free(fullpath); continue;
523 }
524 // This works instead of dbh_set_data():
525 if (read(fd, data_ptr, st.st_size) < 0){
526 fprintf(stderr, "problem reading %lld bytes from %s\n",
527 (long long)st.st_size,fullpath);
528 close(fd);
529 g_free(fullpath); continue;
530 }
531 close(fd);
532
533 // Set key
534 key.dptr = key_s;
535 key.dsize = 11;
536
537 // Set data
538 content.dptr = data_ptr;
539 content.dsize = st.st_size;
540
541 // update data
542 gdbm_store (dbf, key, content, GDBM_INSERT);
543
544 dump_p->sum += st.st_size;
545 count++;
546 //fprintf(stderr,"%s\n",fullpath);
547 //
548 }
549 if (is_dir) {
550 if (strcmp(fullpath, SKIP_DIR)){
551 int retval;
552 retval = gread_filesystem(dbf,collisions, fullpath, dump_p);
553 if (retval > 0) count += retval;
554 }
555 }
556
557 g_free(fullpath);
558 }
559 closedir(directory);
560 // free final leftover
561 g_free(key_s);
562
563 // printf ("%s -> %d files\n",path,count);
564 return (count);
565 }
566
567 #if 0
568
569 static void operate (DBHashTable *dbh){
570 dump_t *dump_p = dbh->user_data;
571 dump_p->count++;
572 //sum += strlen((char *)DBH_DATA(dbh));
573 dump_p->sum += DBH_RECORD_SIZE(dbh);
574 }
575
576 static DBHashTable *dbh_key;
577 static void compare (DBHashTable *dbh){
578 dbh_set_key (dbh_key,(unsigned char *)DBH_KEY(dbh));
579 dbh_load(dbh_key);
580 char *path=DBH_DATA(dbh_key);
581 int fd=open(path,O_RDONLY);
582 if (fd < 0) {
583 printf("cannot open %s for read\n",path);
584 return;
585 }
586 // This works instead of dbh_set_data():
587 struct stat st;
588 LSTAT(path,&st);
589 void *p=malloc(st.st_size);
590 if (p == NULL) {
591 fprintf(stderr, "malloc: %s\n", strerror(errno));
592 exit(1);
593 }
594 if (read(fd,p,st.st_size) < 0){
595 printf("problem reading %lld bytes from %s\n",
596 (long long)st.st_size,path);
597 close(fd);
598 free(p);
599 return;
600 }
601 close(fd);
602 if (memcmp(p,DBH_DATA(dbh),st.st_size) != 0) {
603 printf("%s does not compare!\n",path);
604 } else {
605 static int count=0;
606 if (count++ % 1000 == 0) {
607 printf ("."); fflush(stdout);
608 }
609 }
610 free(p);
611 }
612 #endif
613
614 #if 0
615 static int
616 dump(dump_t *dump_p) {
617 //char **argv, int which, int original_count, long long original_sum){
618 dump_p->count=0; dump_p->sum=0;
619 const char *text;
620 if (dump_p->which) text = "Sweep"; else text = "Fanout";
621 fprintf(stdout,"%s is now being performed by pid %d\n", text, getpid());
622 // PARALLEL SAFE need not be specified on READ_ONLY
623 DBHashTable *dbh=dbh_new(TABLE, NULL, DBH_READ_ONLY);
624 dbh->user_data = dump_p;
625 if (dump_p->which) dbh_foreach_sweep (dbh,operate);
626 else dbh_foreach_fanout (dbh,operate);
627 dbh_close(dbh);
628 /*
629 if (strcmp(dump_p->argv[1],"fulltest")==0) {
630 if (dump_p->sum != dump_p->original_sum){
631 //g_warning("Original sum does not match %s sum (%I64d != %I64d)\nTest FAILED.\n",
632 g_warning("Original sum does not match %s sum (%lld != %lld)\nTest FAILED.\n",
633 text, dump_p->original_sum, dump_p->sum);
634 exit(1);
635 }
636 if (dump_p->count != dump_p->original_count){
637 g_warning("Original count does not match %s count (%d != %d)\nTest FAILED.\n",
638 text, dump_p->original_count, dump_p->count);
639 exit(1);
640 }
641 }*/
642 fprintf(stdout,
643 " Sweep data:\n"\
644 " Items in the DBH table (filesystem count) = %d\n"\
645 " Sum of data items size saved in DBH table = %lld\n",
646 dump_p->count, dump_p->sum);
647 /*
648 if (strcmp(dump_p->argv[2],"fulltest")==0) {
649 fprintf(stderr, "Test %s PASSED\n", text);
650 }*/
651 return 1;
652 }
653 #endif
654
655
656 #if 0
657 static void
658 check_files(void){
659 if (!g_file_test(TABLE, G_FILE_TEST_EXISTS)){
660 g_warning("Index file %s has not yet been created\n",
661 TABLE);
662 exit(1);
663 }
664 if (!g_file_test(COLLISIONS, G_FILE_TEST_EXISTS)){
665 g_warning("DBH table %s has not yet been created\n",
666 COLLISIONS);
667 exit(1);
668 }
669 }
670 #endif
671
rebuild(DBHashTable * dbh_thread)672 void rebuild (DBHashTable *dbh_thread){
673 DBHashTable *rebuilt_dbh = dbh_thread->user_data;
674 // Adquire mutex.
675 dbh_mutex_lock(rebuilt_dbh);
676
677 // Copy key and data to rebuilt_dbh
678 dbh_set_key(rebuilt_dbh, DBH_KEY(dbh_thread));
679 dbh_set_recordsize (rebuilt_dbh, DBH_RECORD_SIZE(dbh_thread));
680 dbh_set_data(rebuilt_dbh, DBH_DATA(dbh_thread), DBH_RECORD_SIZE(dbh_thread));
681 // Write to rebuilt dbh
682 dbh_update(rebuilt_dbh);
683 // Release mutex
684 dbh_mutex_unlock(rebuilt_dbh);
685 return;
686 }
687
688 long long checksum=0;
689 GSList *random_list = NULL;
690 GSList *random_numlist = NULL;
691 GSList *random_qlist = NULL;
692 GSList *random_glist = NULL;
693 gint random_count=0;
get_random_list(DBHashTable * dbh,void * data)694 void get_random_list(DBHashTable *dbh, void *data){
695 // flip a coin...
696 //static struct drand48_data rand_buffer;
697 static unsigned int s=7;
698 if (random_list == NULL){
699 srand48(time(NULL));
700 }
701 double coin = drand48();
702 //fprintf(stderr, "coin 1= %4.2lf\n", coin);
703 // if (coin < 0.5) return;
704
705 coin = drand48();
706 //fprintf(stderr, "coin 2= %4.2lf\n", coin);
707 const gchar *path = (gchar *)DBH_KEY(dbh);
708 if (coin > 0.5){
709 random_list = g_slist_prepend(random_list, g_strdup(path));
710 random_numlist = g_slist_prepend(random_numlist, GINT_TO_POINTER(++random_count));
711 }
712 else {
713 random_list = g_slist_append(random_list, g_strdup(path));
714 random_numlist = g_slist_append(random_numlist, GINT_TO_POINTER(++random_count));
715 }
716 //fprintf(stderr, "random key in list: \"%s\"\n", path);
717 checksum += s;
718 s++;
719 }
720 #if 0
721 void dump_qindex(DBHashTable *xdbh){
722
723 fprintf(stderr, "DUMP key: %s, data: %s\n", (gchar *)DBH_KEY(xdbh), (gchar *)DBH_DATA(xdbh));
724 return;
725 }
726 #endif
727
728
score(char ** argv)729 static int score(char **argv){
730 struct stat st;
731 if (stat(argv[2], &st) < 0 || !S_ISDIR(st.st_mode)){
732 fprintf(stderr, "%s is not a directory.\n", argv[1]);
733 }
734 size_t records=0, size=0, limit=2000000, key_storage=0;
735 fprintf(stderr, "Checking %s for table creation with files <= %ld Mbytes...\n", argv[2], (long)(limit/1000000));
736 check_filesystem(argv[2], limit, &records, &size, &key_storage);
737 fprintf(stderr, "%s has %ld records for a total of %ld Mb\n", argv[2], (long)records, (long)(size/1000000));
738 fprintf(stderr, "full variable key storage=%ld KB versus fixed size = %ld KB \n",(long)key_storage/1000, (long)(256*records/1000));
739 return (1);
740 }
741
742 static int
create_gdbm_table(const gchar * path)743 create_gdbm_table(const gchar *path){
744 time_t gdbm_creation_time;
745 time_t start = time(NULL);
746
747 dump_t dump_v;
748 memset(&dump_v, 0, sizeof(dump_t));
749
750 fprintf(stderr, "/////////////////// GDBM g_hash key table generation /////////////////\n");
751 fprintf(stderr,"Creating index now, process 0x%x recursively reading %s\n", getpid(), path);
752 // This table is the bucket index file. The index file is also
753 // the data table.
754 //
755 GDBM_FILE dbf = gdbm_open(GTABLE, 0, GDBM_NEWDB, 0770, NULL);
756 // This table handles collisions. If a path is indexed here (first 254
757 // bytes of the string), then the data element is the actual hash table
758 // key. This avoids a collision with a path that has already been indexed.
759 GDBM_FILE collisions = gdbm_open(GCOLLISIONS, 0, GDBM_NEWDB, 0770, NULL);
760 // Read the filesystem data into the DBH table.
761 dump_v.sum = 0;
762 dump_v.original_count=gread_filesystem(dbf, collisions, path, &dump_v);
763 dump_v.original_sum = dump_v.sum;
764 gdbm_close(dbf);
765 gdbm_close(collisions);
766 gdbm_creation_time = time(NULL) - start;
767 fprintf(stderr,
768 " Index created:\n"\
769 " Items in the GDBM table (filesystem count) = %d\n"\
770 " Sum of data items size saved in GDBM table = %lld\ntime = %lld s.",
771 dump_v.original_count, dump_v.original_sum,
772 (long long) gdbm_creation_time);
773
774 fprintf(stderr, "gdbm creation time = %lld s.\n", (long long)gdbm_creation_time);
775 return (1);
776
777 }
778
779 // This DBH uses more than one bucket in order to handle hashtable
780 // key collisions.
781 static int
create_dbh_table(gchar * path)782 create_dbh_table(gchar *path){
783
784 dump_t dump_v;
785 memset(&dump_v, 0, sizeof(dump_t));
786 time_t dbh_creation_time;
787 time_t start = time(NULL);
788 fprintf(stderr, "/////////////////// DBH g_hash key table generation /////////////////\n");
789 fprintf(stderr,"Creating index now, process 0x%x recursively reading %s\n", getpid(), path);
790 // This table is the bucket index file. The index file is also
791 // the data table.
792 //
793 // Our key length here is one extra byte to handle bucket id for collisions.
794 unsigned char key_length = 11;
795 DBHashTable *dbh = dbh_new(TABLE, &key_length, DBH_CREATE);
796 // This table handles collisions. If a path is indexed here (first 254
797 // bytes of the string), then the data element is the actual hash table
798 // key. This avoids a collision with a path that has already been indexed.
799 key_length = 254;
800 DBHashTable *collisions = dbh_new(COLLISIONS, &key_length, DBH_CREATE);
801 // Read the filesystem data into the DBH table.
802 dump_v.sum = 0;
803 dump_v.original_count=read_filesystem(dbh, collisions, path, &dump_v);
804 dump_v.original_sum = dump_v.sum;
805 dbh_close(dbh);
806 dbh_close(collisions);
807 fprintf(stderr,
808 " Index created:\n"\
809 " Items in the DBH table (filesystem count) = %d\n"\
810 " Sum of data items size saved in DBH table = %lld\n",
811 dump_v.original_count, dump_v.original_sum);
812 dbh_creation_time = time(NULL) - start;
813 fprintf(stderr, "dbh creation time = %lld s.\n", (long long)dbh_creation_time);
814 return 1;
815
816 }
817
818
819 // quantified key index.
820 static int
create_qdbh_table(gchar * path)821 create_qdbh_table(gchar *path) {
822 dump_t dump_v;
823 memset(&dump_v, 0, sizeof(dump_t));
824 time_t q_creation_time;
825 time_t start = time(NULL);
826 fprintf(stderr, "/////////////////// DBH quantified key table generation ///////////////////////\n");
827 fprintf(stderr,"Creating qindex now, process 0x%x recursively reading %s\n", getpid(), path);
828 // This table is the bucket index file. The index file is also
829 // the data table.
830 unsigned char key_length = 10;
831 DBHashTable *dbh = dbh_new(QTABLE, &key_length, DBH_CREATE);
832 key_length = 254;
833 DBHashTable *index = dbh_new(QINDEX, &key_length, DBH_CREATE);
834 // Read the filesystem data into the DBH table.
835 dump_v.natural = 1;
836 dump_v.sum = 0;
837 dump_v.original_count=qread_filesystem(dbh,index, path,&dump_v);
838 dump_v.original_sum = dump_v.sum;
839 dbh_close(dbh);
840 dbh_close(index);
841 fprintf(stderr,
842 " Q Index created: (%lld records, %lld data, %lld erased, %lld format)\n"\
843 " Q Table created: (%lld records, %lld data, %lld erased, %lld format)\n"\
844 " Items in the DBH table (filesystem count) = %d\n"\
845 " Sum of data items size saved in DBH table = %lld\n",
846 DBH_RECORDS(index), DBH_DATA_SPACE(index), DBH_ERASED_SPACE(index), DBH_FORMAT_SPACE(index),
847 DBH_RECORDS(dbh), DBH_DATA_SPACE(dbh), DBH_ERASED_SPACE(dbh), DBH_FORMAT_SPACE(dbh),
848 dump_v.original_count, dump_v.original_sum);
849 q_creation_time = time(NULL) - start;
850 //index=dbh_new(QINDEX,&key_length, DBH_READ_ONLY);
851 //dbh_foreach_sweep(index, dump_qindex);
852 //dbh_close(index);
853 fprintf(stderr, "qdbh creation time = %lld s.\n", (long long)q_creation_time);
854 return 1;
855 }
856
857 static int
mkdir_output(void)858 mkdir_output(void){
859 if (g_mkdir_with_parents(DIRECTORY, 0770) < 0){
860 if (!g_file_test(DIRECTORY, G_FILE_TEST_IS_DIR)){
861 g_warning("mkdir(%s): %s\n", DIRECTORY, strerror(errno));
862 exit(1);
863 }
864 }
865 if (!g_file_test(DIRECTORY, G_FILE_TEST_IS_DIR)){
866 g_warning("Failed test: g_file_test(%s, G_FILE_TEST_IS_DIR)\n",
867 DIRECTORY );
868 exit(1);
869 }
870 return 1;
871 }
872
load_grow_dbh(DBHashTable * in_table,const gchar * key)873 static gint load_grow_dbh(DBHashTable *in_table, const gchar *key){
874 DBHashTable *table;
875 if (in_table)table = in_table;
876 else table = dbh_new("grow.dbh", NULL, DBH_READ_ONLY);
877
878 dbh_set_key(table, (unsigned char *)key);
879 int retval = 0;
880 if (!dbh_load(table)) {
881 fprintf(stderr, "cannot load table key \"%s\"\n", key);
882 retval=-1;
883 }
884
885 if (in_table==NULL) dbh_close(table);
886 return retval;
887 }
load_grow_dbf(GDBM_FILE in_gtable,const gchar * inkey)888 static gint load_grow_dbf(GDBM_FILE in_gtable, const gchar *inkey){
889 gint retval = 0;
890 GDBM_FILE gtable;
891 if (in_gtable) gtable = in_gtable;
892 else gtable = gdbm_open("grow.dbf", 0, GDBM_READER, 0770, NULL);
893
894 datum key;
895
896
897
898 int ik = atoi(inkey);
899 key.dptr = (void *)&ik;
900 key.dsize = sizeof(int);
901
902 datum record = gdbm_fetch(gtable, key);
903 if (!record.dptr) {
904 fprintf(stderr, "cannot load gdbm table key \"%s\"\n", inkey);
905 retval=-1;
906 }
907 g_free(record.dptr);
908 if (in_gtable==NULL) gdbm_close(gtable);
909 return retval;
910 }
911
load_dbh_item(const gchar * item)912 static gint load_dbh_item(const gchar *item){
913 gint retval = 0;
914 gchar collision_key[256];
915 DBHashTable *collisions = dbh_new(COLLISIONS, NULL, DBH_READ_ONLY);
916 DBHashTable *table = dbh_new(TABLE, NULL, DBH_READ_ONLY);
917
918 memset(collision_key, 0, DBH_KEYLENGTH(collisions)+1);
919 memcpy(collision_key, item, strlen(item));
920 dbh_set_key(collisions, (unsigned char *)collision_key);
921 if (!dbh_load(collisions)){
922 // get regular hash key in 'A' bucket.
923 gchar *k=get_hash_key('A', item);
924 dbh_set_key(table, (unsigned char *) k);
925 g_free(k);
926 } else {
927 dbh_set_key(table, DBH_DATA(collisions));
928 }
929 if (!dbh_load(table)) {
930 fprintf(stderr, "cannot load table key \"%s\"\n", item);
931 retval=-1;
932 }
933
934 dbh_close(table);
935 dbh_close(collisions);
936 return retval;
937 }
938
load_qdbh_item(const gchar * item)939 static gint load_qdbh_item(const gchar *item){
940 gint retval = 0;
941 DBHashTable *table = dbh_new(QTABLE, NULL, DBH_READ_ONLY);
942 DBHashTable *index = dbh_new(QINDEX, NULL, DBH_READ_ONLY);
943 //fprintf(stderr, "keylength=%d/%d\n", DBH_KEYLENGTH(index),key_length);
944
945 gchar index_key[DBH_KEYLENGTH(index)];
946
947 memset(index_key, 0, DBH_KEYLENGTH(index));
948 memcpy(index_key, item, strlen(item));
949 dbh_set_key(index, (unsigned char *)index_key);
950 if (!dbh_load(index)){
951 fprintf(stderr, "Cannot load index key %s\n", item);
952 retval=-1;
953 } else {
954 dbh_set_key(table, DBH_DATA(index));
955 if (!dbh_load(table)) fprintf(stderr, "cannot load qtable key \"%s\"\n", item);
956 }
957
958 dbh_close(index);
959 dbh_close(table);
960 return retval;
961 }
962
load_gdbm_item(const gchar * item)963 static gint load_gdbm_item(const gchar *item){
964 gint retval = 0;
965 GDBM_FILE gcollisions = gdbm_open(GCOLLISIONS, 0, GDBM_READER, 0770, NULL);
966 GDBM_FILE gtable = gdbm_open(GTABLE, 0, GDBM_READER, 0770, NULL);
967
968 gchar collision_key[256];
969 gint c_key_length=11;
970 memset(collision_key, 0, c_key_length+1);
971 memcpy(collision_key, item, strlen(item));
972 datum gkey;
973
974 gkey.dptr = collision_key;
975 gkey.dsize = c_key_length;
976
977 datum record = gdbm_fetch(gcollisions, gkey);
978 if (record.dptr == NULL){
979 // get regular hash key in 'A' bucket.
980 gchar *k=get_hash_key('A', item);
981 gkey.dptr = g_strdup(k);
982 gkey.dsize = 11;
983 g_free(k);
984 } else {
985 gkey.dptr = record.dptr;
986 gkey.dsize = 11;
987 }
988 record = gdbm_fetch(gtable, gkey);
989 if (!record.dptr) {
990 fprintf(stderr, "cannot load gdbm table key \"%s\"\n", item);
991 retval=-1;
992 }
993 g_free(record.dptr);
994 g_free(gkey.dptr);
995 gdbm_close(gcollisions);
996 gdbm_close(gtable);
997 return retval;
998 }
999
1000 #if 0
1001 typedef struct grow_t {
1002 DBHashTable *dbh;
1003 long records;
1004 long size;
1005 time_t start;
1006 } grow_t;
1007 static void grow_f(DBHashTable *dbh, void *data){
1008 char junk[256];
1009 grow_t *grow_p = data;
1010 //const gchar *path = (gchar *)DBH_KEY(dbh);
1011 double r = 1.0 * rand() /RAND_MAX;
1012 int size = r * 255;
1013 dbh_set_data(grow_p->dbh, junk, size);
1014 grow_p->size += size;
1015 grow_p->records++;
1016 // dbh_genkey (DBH_KEY(grow_p->dbh), DBH_KEYLENGTH(grow_p->dbh), grow_p->records);
1017
1018 // dbh_update(grow_p->dbh);
1019 if (grow_p->records % 1000 == 0) {
1020 fprintf(stderr, "%ld records done, size=%ld (%ld s)\n",
1021 grow_p->records, grow_p->size, (long)(time(NULL) - grow_p->start));
1022 }
1023
1024 }
1025 #endif
1026
1027
1028
1029 #define KEY_LEN 10
1030 #define V_PER_THREAD 10000
1031 #define MAX_KEYS 25*1000*1000
1032 #define MAX_THREADS 5
1033 #define JUNK_LENGTH 64
1034
1035
1036 typedef struct r2_t {
1037 time_t start;
1038 gint index;
1039 gint process;
1040 unsigned char key[V_PER_THREAD][KEY_LEN+1];
1041 int size[V_PER_THREAD];
1042 }r2_t;
1043
w_grow_f(void * data)1044 void *w_grow_f(void *data){
1045 r2_t *r2_p = data;
1046 // fprintf(stderr, "process=%d index=%d\n", r2_p->process, r2_p->index);
1047 //return data;
1048 gint i;
1049 unsigned char key[KEY_LEN+1];
1050 memset(key,0,KEY_LEN+1);
1051 for (i=0; i < V_PER_THREAD; i++){
1052 dbh_genkey(key, KEY_LEN, i + r2_p->index);
1053 //strcpy(r2_p->key+i,key);
1054 memcpy(r2_p->key+i,key, KEY_LEN+1);
1055 double r = 1.0 * rand() /RAND_MAX;
1056 r2_p->size[i] = r * JUNK_LENGTH;
1057 if (!r2_p->size[i]) r2_p->size[i]=5;
1058
1059 }
1060
1061 int quantum=0;
1062 unsigned char *p = key;
1063 for (;p && *p; p++) quantum += (*p-'0');
1064 i--;
1065 fprintf(stderr, "[%d] %d:%.2lfM: %s [%s](%ld s.)\n", r2_p->process, quantum, 1.0*(i+r2_p->index)/1000000.01,
1066 key, (char *)(r2_p->key+(V_PER_THREAD-1)),
1067 (long)(time(NULL) - r2_p->start));
1068
1069 return data;
1070 }
1071
grow_write_f(void * process_data,void * data)1072 void grow_write_f(void *process_data, void *data){
1073 void **arg = data;
1074 char junk[JUNK_LENGTH];
1075 r2_t *r2_p = process_data;
1076 DBHashTable *dbh = arg[0];
1077 GDBM_FILE dbf = arg[1];
1078 int k;
1079 for (k=0; k<V_PER_THREAD; k++){
1080 dbh_set_key(dbh, (unsigned char *)(r2_p->key+k));
1081 dbh_set_data(dbh, junk, r2_p->size[k]);
1082 dbh_update(dbh);
1083 datum key;
1084 datum content;
1085 int ik = k+r2_p->index;
1086 key.dptr = (void *)&ik;
1087 key.dsize = sizeof(int);
1088 content.dptr = junk;
1089 content.dsize = r2_p->size[k];
1090 gdbm_store(dbf, key, content, GDBM_INSERT);
1091
1092 }
1093 g_free(process_data);
1094 }
1095
grow(void)1096 static void grow(void){
1097 gint max_threads = MAX_THREADS;
1098 gint current_threads = 0;
1099 pthread_t thread_id[max_threads];
1100 r2_t r2_v[max_threads];
1101
1102 gint index = 0;
1103 time_t start = time(NULL);
1104
1105 for (; current_threads < max_threads; current_threads++){
1106 if (!index) index = 1;
1107 else index += V_PER_THREAD;
1108 r2_v[current_threads].process = current_threads+1;
1109 r2_v[current_threads].index = index;
1110 r2_v[current_threads].start = start;
1111
1112 pthread_create(thread_id+current_threads, NULL,
1113 w_grow_f, (void *)(r2_v+current_threads));
1114 }
1115
1116 unsigned char keylength = KEY_LEN;
1117 DBHashTable *dbh =
1118 dbh_new("/home/edscott/testfiles/grow.dbh", &keylength, DBH_CREATE);
1119 GDBM_FILE dbf = gdbm_open("/home/edscott/testfiles/grow.dbf", 0, GDBM_NEWDB|GDBM_SYNC, 0770, NULL);
1120 void *arg[]={dbh, dbf};
1121 gint i=0;
1122 GThreadPool *writepool = g_thread_pool_new(grow_write_f, arg, 1, TRUE, NULL);
1123 while (1){
1124 void *return_data;
1125 if (thread_id[i] && pthread_tryjoin_np(thread_id[i], &return_data)==0){
1126 fprintf(stderr, "joined thread [%d], now writing\n", i);
1127 // process data, threadpool
1128 r2_t *thread_r2_p = malloc(sizeof(r2_t));
1129 if (!thread_r2_p) g_error("Cannot malloc thread_r2_p: %s\n", strerror(errno));
1130 memcpy(thread_r2_p, return_data, sizeof(r2_t));
1131
1132 g_thread_pool_push (writepool, thread_r2_p, NULL);
1133 r2_t *r2_p = return_data;
1134 // start new thread
1135 if (index < MAX_KEYS){
1136 index += V_PER_THREAD;
1137 r2_p->index = index;
1138 pthread_create(thread_id+i, NULL, w_grow_f, (void *)(r2_p));
1139 } else {
1140 fprintf(stderr, "Not starting new thread, limit reached: %d\n", index);
1141 thread_id[i] = 0;
1142 if (--current_threads == 0) {
1143 fprintf(stderr, "All threads are done.\n");
1144 break;
1145 }
1146 }
1147 }
1148 if (++i >= max_threads){
1149 sleep(1);
1150 i=0;
1151 }
1152 }
1153 g_thread_pool_free(writepool, FALSE, TRUE);
1154 fprintf(stderr, "Threadpool write is complete %ld s. for %d records\n",
1155 (long)(time(NULL)-start), index);
1156 fprintf(stderr, "Threadpool write is complete.\n");
1157 dbh_close(dbh);
1158 gdbm_close(dbf);
1159 }
1160
1161
1162
1163 #if 0
1164 grow_t grow_v;
1165 memset(&grow_v, 0, sizeof(grow_t));
1166 grow_v.start=time(NULL);
1167 // Go though q-index
1168 unsigned char keylength;
1169 DBHashTable *index = dbh_new(QINDEX, &keylength, 0);
1170 // unsigned char keylength = DBH_KEYLENGTH(index);
1171 grow_v.dbh = dbh_new("/home/edscott/testfiles/grow.dbh", &keylength, DBH_CREATE);
1172
1173 fprintf(stderr, "growing file\n");
1174 dbh_foreach(index, grow_f, &grow_v);
1175 fprintf(stderr, "grow done: %ld seconds.\n",(long)(time(NULL) - grow_v.start));
1176 dbh_close(index);
1177 dbh_close(grow_v.dbh);
1178 #endif
1179 /*
1180 static void grow_test(){
1181 time_t dbh_time=0;
1182 time_t dbf_time=0;
1183 DBHashTable *dbh =
1184 dbh_new("/home/edscott/testfiles/grow.dbh", &keylength, DBH_READONLY);
1185 GDBM_FILE dbf =
1186 gdbm_open("/home/edscott/testfiles/grow.dbf", 0, 0, 0770, NULL);
1187 double r = (1.0)*rand()/RAND_MAX;
1188 r *= MAX_KEYS;
1189 int item = r;
1190 unsigned char key[KEY_LEN+1];
1191 memset(key,0,KEY_LEN+1);
1192 time_t start=time(NULL);
1193 dbh_genkey(key, KEY_LEN, item);
1194 dbh_set_key(dbh, (unsigned char *)(r2_p->key+k));
1195 dbh_load
1196 dbh_time += (time(NULL) - start);
1197
1198 }
1199 */
1200
1201
main(int argc,char ** argv)1202 int main(int argc, char **argv){
1203
1204 if (argc < 2) {
1205 fprintf(stderr,"insufficient arguments (%d < 2), usage: %s option [path] (option)\n%s\n",
1206 argc, argv[0], HELP);
1207 exit(1);
1208 }
1209
1210 if (argc == 3 && strcmp(argv[1], "create")==0){
1211 if (argc < 3) {
1212 fprintf(stderr, "option %s requires a path\n", argv[1]);
1213 exit (1);
1214 }
1215 mkdir_output();
1216 // First, take a look at what we got to build a test datafile
1217 score(argv);
1218 // Create gdbm table
1219 create_gdbm_table(argv[2]);
1220 // Create DBH table
1221 create_dbh_table(argv[2]);
1222 // Create double DBH table
1223 create_qdbh_table(argv[2]);
1224 exit (0);
1225
1226 }
1227
1228
1229
1230 if (strcmp(argv[1],"regen")==0 ) {
1231 DBHashTable *dbh = dbh_new(QTABLE, NULL, 0);
1232 DBHashTable *index = dbh_new(QINDEX, NULL, 0);
1233 fprintf(stderr, "regen QINDEX\n");
1234 dbh_regen_sweep(&index);
1235 fprintf(stderr, "regen QTABLE\n");
1236 dbh_regen_sweep(&dbh);
1237 dbh_close(dbh);
1238 dbh_close(index);
1239
1240 DBHashTable *table = dbh_new(TABLE, NULL, 0);
1241 DBHashTable *collisions = dbh_new(COLLISIONS, NULL, 0);
1242 fprintf(stderr, "regen COLLISIONS\n");
1243 dbh_regen_sweep(&collisions);
1244 fprintf(stderr, "regen TABLE\n");
1245 dbh_regen_sweep(&table);
1246 dbh_close(table);
1247 dbh_close(collisions);
1248 exit(0);
1249 }
1250
1251 GSList *list = NULL;
1252 if (strcmp(argv[1],"random")==0 ) {
1253 fprintf(stderr, "preparing random list...\n");
1254 // prepare a random access list with 25% of data records.
1255 do {
1256 unsigned char key_length;
1257 random_count = 0;
1258 DBHashTable *random_src = dbh_new(QINDEX, &key_length, DBH_READ_ONLY);
1259 if (!random_src) {
1260 fprintf(stderr, "cannot create random list until tables are generated\n");
1261 fprintf(stderr, "%s found -> %d\n", QINDEX, g_file_test(QINDEX, G_FILE_TEST_EXISTS));
1262 exit(1);
1263 }
1264
1265 dbh_foreach(random_src, get_random_list, NULL);
1266 fprintf(stderr, "random list has %d/%d items (randomness=%lld)\n",
1267 g_slist_length(random_list), (int)DBH_RECORDS(random_src)/3, checksum);
1268 if (g_slist_length(random_list) < (int)DBH_RECORDS(random_src)/3){
1269 // free list data...
1270 GSList *tlist = random_list;
1271 for (;tlist && tlist->data; tlist = tlist->next) g_free(tlist->data);
1272 g_slist_free(random_list);
1273 g_slist_free(random_numlist);
1274 random_list=NULL;
1275 random_numlist=NULL;
1276 }
1277 dbh_close(random_src);
1278 checksum = 0;
1279 } while (random_list == NULL);
1280 fprintf(stderr,"Random list is ready. Now writing out...\n");
1281 FILE *outlist = fopen(RANDOM_LIST,"w");
1282 if (!outlist){
1283 fprintf(stderr, "cannot open %s for write\n", RANDOM_LIST);
1284 exit(1);
1285 }
1286 for (list=random_list; list && list->data; list = list->next){
1287 fprintf(outlist, "%s\n", (gchar *)list->data);
1288 }
1289 fclose(outlist);
1290 fprintf(stderr,"Random list done.\n");
1291 exit(0);
1292 }
1293
1294
1295 if (strstr(argv[1],"grow")) {
1296 if (strstr(argv[1],"growlist")){
1297 unsigned char key[KEY_LEN+1];
1298 memset(key,0,KEY_LEN+1);
1299 int i;
1300 time_t start = time(NULL);
1301 double r;
1302 for (i=0; i<MAX_KEYS/2; i++){
1303 retry:
1304 r = (1.0)*rand()/RAND_MAX;
1305 r *= MAX_KEYS;
1306 int item = r;
1307 if (!item) goto retry;
1308 unsigned char key[KEY_LEN+1];
1309 memset(key,0,KEY_LEN+1);
1310
1311 dbh_genkey(key, KEY_LEN, item);
1312 fprintf(stdout, "%d:%s\n", item, key);
1313 if (i%100 == 0) {
1314 double t = (double)(time(NULL) - start)/60.0;
1315 fprintf(stderr, "records: %d in %lf minutes\n", i, t);
1316 }
1317 }
1318
1319 }
1320 if (strstr(argv[1],"growtest")) {
1321 fprintf(stderr, "doing growtest\n");
1322 if (!g_file_test("growlist.txt", G_FILE_TEST_EXISTS)){
1323 fprintf(stderr, "%s does not exist. Run random option first\n", "growlist.txt");
1324 exit(1);
1325 } else {
1326 FILE *inlist = fopen("growlist.txt","r");
1327 if (!inlist){
1328 fprintf(stderr, "cannot open %s for read\n", "growlist.txt");
1329 exit(1);
1330 }
1331 gchar buffer[300];
1332 fprintf(stderr, "Reading random list file...\n");
1333 gint count=1;
1334 while (fgets(buffer, 300, inlist) && !feof(inlist)){
1335 if (strchr(buffer, '\n')) *strchr(buffer, '\n')=0;
1336 random_list = g_slist_prepend(random_list, g_strdup(buffer));
1337 count++;
1338 //if (count > 50)break;
1339 //if (count %10000 == 0) fprintf(stderr, "read %d records...\n", count);
1340
1341 }
1342 fclose(inlist);
1343 fprintf(stderr, "Finished reading random list file (%d records).\n", count);
1344 }
1345 GSList *list = random_list;
1346 time_t start=time(NULL);
1347 // test dbh
1348 int count;
1349 DBHashTable *g_dbh=NULL;
1350 if (strstr(argv[1],"growtest2")) {
1351 g_dbh = dbh_new("grow.dbh", NULL, DBH_READ_ONLY);
1352 }
1353 for (list = random_list,count=1; list && list->data; list=list->next, count++){
1354 gchar *key = strchr((gchar *)(list->data),':');
1355 if (!key) {
1356 g_warning("key is null for %s\n",(gchar *)(list->data));
1357 continue;
1358 }
1359 key++;
1360 //fprintf(stderr,"key=%s data=%s\n", key, (gchar *)(list->data));
1361 load_grow_dbh(g_dbh, key);
1362 if (count % 10000 == 0){
1363 fprintf(stdout, "%d %ld\n", count, (long)(time(NULL)-start));
1364 }
1365 if (count % 1000 == 0){
1366 fprintf(stderr, "dbh: %d records in %ld seconds\n", count, (long)(time(NULL)-start));
1367 }
1368 }
1369 if (g_dbh) dbh_close(g_dbh);
1370 start=time(NULL);
1371
1372 GDBM_FILE g_dbf=NULL;
1373 if (strstr(argv[1],"growtest2")) {
1374 g_dbf = gdbm_open("grow.dbf", 0, GDBM_READER, 0770, NULL);
1375 }
1376 for (list = random_list,count=1; list && list->data; list=list->next, count++){
1377 gchar *key = (gchar *)(list->data);
1378 *strchr(key,':') = 0;
1379 //fprintf(stderr,"key=%s data=%s\n", key, (gchar *)(list->data));
1380 load_grow_dbf(g_dbf,key);
1381 if (count % 10000 == 0){
1382 fprintf(stdout, "%d %ld\n", count, (long)(time(NULL)-start));
1383 }
1384 if (count % 1000 == 0){
1385 fprintf(stderr, "dbf: %d records in %ld seconds\n", count, (long)(time(NULL)-start));
1386 }
1387 }
1388 if (g_dbf) gdbm_close(g_dbf);
1389
1390 }
1391
1392
1393
1394 //else grow();
1395 exit(0);
1396 }
1397
1398
1399 if (!strstr(argv[1],"test")) {
1400 fprintf(stderr, "unknown option: %s\n", argv[1]);
1401 exit(1);
1402 }
1403
1404
1405 {
1406
1407
1408 if (!g_file_test(RANDOM_LIST, G_FILE_TEST_EXISTS)){
1409 fprintf(stderr, "%s does not exist. Run random option first\n", RANDOM_LIST);
1410 exit(1);
1411 } else {
1412 FILE *inlist = fopen(RANDOM_LIST,"r");
1413 if (!inlist){
1414 fprintf(stderr, "cannot open %s for read\n", RANDOM_LIST);
1415 exit(1);
1416 }
1417 gchar buffer[300];
1418 fprintf(stderr, "Reading random list file...\n");
1419 gint count=1;
1420 while (fgets(buffer, 300, inlist) && !feof(inlist)){
1421 if (strchr(buffer, '\n')) *strchr(buffer, '\n')=0;
1422 random_list = g_slist_prepend(random_list, g_strdup(buffer));
1423 count++;
1424 //if (count %10000 == 0) fprintf(stderr, "read %d records...\n", count);
1425
1426 }
1427 fclose(inlist);
1428 fprintf(stderr, "Finished reading random list file (%d records).\n", count);
1429 }
1430 }
1431
1432
1433 dump_t dump_v;
1434 memset(&dump_v, 0, sizeof(dump_t));
1435
1436 time_t start;
1437
1438
1439 fprintf(stderr, "Starting test...\n");
1440 //gchar *tests[]={"test-g","test-d","test-q",NULL};
1441 gchar **p;
1442 gchar **q;
1443
1444 gchar *strings[256];
1445 memset(strings, 0, 256*sizeof(gchar **));
1446
1447 srand((int)time(NULL));
1448
1449 //for (p=tests; p && *p; p++)
1450 p=argv+1;
1451 fprintf(stdout, "# GNUplot output for %s: %s\n",
1452 *p,
1453 strstr(*p,"test-g")?"GDBM":
1454 strstr(*p,"test-q")?"QDBH":
1455 strstr(*p,"test-d")?"DBH":"wtf");
1456 {
1457 fprintf(stderr, "Testing %s...\n", *p);
1458 out(0, 0);
1459 start=time(NULL);
1460 long t_items=0;
1461 gint i;
1462 gint item = RAND_MAX;
1463 long items;
1464 long top_items = 100000;
1465 //long top_items = 100;
1466 gint k;
1467
1468 for (items=10, k=0; items <= top_items; items *= 10, k++) {
1469 if (!strings[k]) strings[k] = g_strdup_printf("%ld", items);
1470 for(i=0; i< items; i++){
1471 while (item > g_slist_length(random_list) || !item) {
1472 double r = 1.0 * rand() / RAND_MAX * g_slist_length(random_list);
1473 item=r;
1474 }
1475 if (strstr(*p,"test-g"))
1476 load_gdbm_item((gchar *) ((g_slist_nth(random_list, item))->data));
1477 if (strstr(*p,"test-q"))
1478 load_qdbh_item((gchar *) ((g_slist_nth(random_list, item))->data));
1479 else
1480 load_dbh_item((gchar *) ((g_slist_nth(random_list, item))->data));
1481
1482 item=0;
1483
1484
1485 }
1486 fprintf(stderr, "%s table loaded %ld items in %ld seconds\n",
1487 strstr(*p,"test-g")?"GDBM":
1488 strstr(*p,"test-q")?"QDBH":
1489 strstr(*p,"test-d")?"DBH":"wtf",
1490 items, (long)(time(NULL)-start));
1491 t_items += items;
1492 out(t_items, (long)(time(NULL)-start));
1493
1494 gchar *g = g_strdup_printf("%s\t%ld", strings[k],(long)(time(NULL)-start));
1495 g_free(strings[k]);
1496 strings[k] = g;
1497
1498 start=time(NULL);
1499
1500 }
1501 }
1502 //fprintf(stdout, "# GNUplot output: column content:\n# records gdbm(s) dbh(s) qdbh(s) \n");
1503 for (q=strings; q && *q; q++){
1504 // fprintf(stdout, "%s\n", *q);fflush(stdout);
1505 }
1506
1507
1508
1509
1510 exit(0);
1511
1512 for (list = random_list; list && list->data; list = list->next) g_free(list->data);
1513 g_slist_free(random_list);
1514
1515 exit(0);
1516 }
1517
1518
1519
1520 ////////////////////////////////////
1521
1522 #if 0
1523 // Full or specific test follows.
1524 check_files();
1525 // Dump test
1526 if (strcmp(argv[2],"dump")==0 || strcmp(argv[2],"fulltest")==0) {
1527 // Find out how many items and total size of data records
1528 // a sweep/fanout of DBH table will find
1529 int i; for(i=1; i>=0; i--) {dump_v.which = i; dump(&dump_v);}
1530 }
1531
1532 // Regen tests
1533 if (strcmp(argv[2],"regen")==0 || strcmp(argv[2],"fulltest")==0) {
1534 fprintf(stderr, "/////////////////// Serial tests //////////////////////////\n");
1535 fprintf(stdout,"Performing regen_sweep now...\n");
1536 DBHashTable *dbh;
1537 dbh=dbh_new(TABLE, NULL, 0);
1538 dbh_regen_sweep(&dbh);
1539 dbh_close(dbh);
1540 // Find out how many items and total size of data records
1541 // a sweep of DBH table will find
1542 dump_v.which = 1;
1543 dump(&dump_v);
1544 fprintf(stdout,"Performing regen_fanout now...\n");
1545 dbh=dbh_new(TABLE, NULL, 0);
1546 dbh_regen_fanout(&dbh);
1547 dbh_close(dbh);
1548 // Find out how many items and total size of data records
1549 // a sweep of DBH table will find
1550 dump_v.which = 0;
1551 dump(&dump_v);
1552 }
1553 #endif
1554
1555 // DBH test with q number keys //////////////////////////////////
1556 #if 0
1557 // this will mislead following test...
1558 start=time(NULL);
1559 DBHashTable *table = dbh_new(QTABLE, NULL, DBH_READ_ONLY);
1560 GSList *list = random_numlist;
1561 unsigned char k[DBH_KEYLENGTH(table)+1];
1562 memset(k, 0, DBH_KEYLENGTH(table)+1);
1563 for (;list && list->data; list = list->next){
1564 dbh_genkey(k, DBH_KEYLENGTH(table), GPOINTER_TO_INT(list->data));
1565 dbh_set_key(table, k);
1566 if (!dbh_load(table)) fprintf(stderr, "q-numload cannot load item %s (%d)\n", k, GPOINTER_TO_INT(list->data));
1567 else loaded++;
1568 }
1569 dbh_close(table);
1570
1571 fprintf(stderr,"q-numload loaded %d items, random access time = %ld s.\n", loaded, (long)(time(NULL) - start));
1572
1573 #endif
1574