1 /***************************************************************************
2 * tools/benchmarks/berkeley_db_benchmark.cpp
3 *
4 * Part of the STXXL. See http://stxxl.sourceforge.net
5 *
6 * Copyright (C) 2006 Roman Dementiev <dementiev@ira.uka.de>
7 * Copyright (C) 2009, 2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
8 *
9 * Distributed under the Boost Software License, Version 1.0.
10 * (See accompanying file LICENSE_1_0.txt or copy at
11 * http://www.boost.org/LICENSE_1_0.txt)
12 **************************************************************************/
13
14 //! \example containers/berkeley_db_benchmark.cpp
15 //! This is a benchmark mentioned in the paper
16 //! R. Dementiev, L. Kettner, P. Sanders "STXXL: standard template library for XXL data sets"
17 //! Software: Practice and Experience
18 //! Volume 38, Issue 6, Pages 589-637, May 2008
19 //! DOI: 10.1002/spe.844
20
21 #include <stxxl/vector>
22 #include <stxxl/map>
23 #include <stxxl/timer>
24 #include <stxxl/stream>
25
26 ///// BDB header ////////////
27 #include <db_cxx.h>
28
29 ///////// TPIE headers //////////
30 #include "app_config.h"
31 #include <portability.h>
32 #include <ami_btree.h>
33 ////////////////////////////
34
35 #define BDB_BULK_SCAN
36
37 #define KEY_SIZE 8
38 #define DATA_SIZE 32
39
40 #define NODE_BLOCK_SIZE (32 * 1024)
41 #define LEAF_BLOCK_SIZE (32 * 1024)
42
43 #define LEAF_BLOCK_SIZE (32 * 1024)
44
45 #define TOTAL_CACHE_SIZE (750 * 1024 * 1024)
46 //#define TOTAL_CACHE_SIZE (150 * 1024 * 1024)
47
48 //#define NODE_CACHE_SIZE (1 * (TOTAL_CACHE_SIZE / 40))
49 //#define LEAF_CACHE_SIZE (39 * (TOTAL_CACHE_SIZE / 40))
50
51 #define NODE_CACHE_SIZE (64 * 1024 * 1024)
52 #define LEAF_CACHE_SIZE (TOTAL_CACHE_SIZE - NODE_CACHE_SIZE)
53
54 #define SORTER_MEM (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4)
55
56 #define SCAN_LIMIT(x) (x)
57
58 //#define BDB_FILE "/data3/bdb_file"
59 #define BDB_FILE "/var/tmp/bdb_file"
60
61 // BDB settings
62 u_int32_t pagesize = LEAF_BLOCK_SIZE;
63 u_int bulkbufsize = 4 * 1024 * 1024;
64 u_int logbufsize = 8 * 1024 * 1024;
65 u_int cachesize = (TOTAL_CACHE_SIZE < 500 * 1024 * 1024) ? (4 * (TOTAL_CACHE_SIZE / 5)) : (TOTAL_CACHE_SIZE - 100 * 1024 * 1024);
66 u_int datasize = DATA_SIZE;
67 u_int keysize = KEY_SIZE;
68 u_int numitems = 0;
69
70 const char* letters = "abcdefghijklmnopqrstuvwxuz";
71
72 struct my_key
73 {
74 char keybuf[KEY_SIZE];
75 };
76
operator <<(std::ostream & o,const my_key & obj)77 std::ostream& operator << (std::ostream& o, const my_key& obj)
78 {
79 for (int i = 0; i < KEY_SIZE; ++i)
80 o << obj.keybuf[i];
81
82 return o;
83 }
84
operator ==(const my_key & a,const my_key & b)85 bool operator == (const my_key& a, const my_key& b)
86 {
87 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) == 0;
88 }
89
operator !=(const my_key & a,const my_key & b)90 bool operator != (const my_key& a, const my_key& b)
91 {
92 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) != 0;
93 }
94
operator <(const my_key & a,const my_key & b)95 bool operator < (const my_key& a, const my_key& b)
96 {
97 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
98 }
99
operator >(const my_key & a,const my_key & b)100 bool operator > (const my_key& a, const my_key& b)
101 {
102 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) > 0;
103 }
104
operator <=(const my_key & a,const my_key & b)105 bool operator <= (const my_key& a, const my_key& b)
106 {
107 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) <= 0;
108 }
109
operator >=(const my_key & a,const my_key & b)110 bool operator >= (const my_key& a, const my_key& b)
111 {
112 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0;
113 }
114
115 struct my_data
116 {
117 char databuf[DATA_SIZE];
118 };
119
operator <<(std::ostream & o,const my_data & obj)120 std::ostream& operator << (std::ostream& o, const my_data& obj)
121 {
122 o << "DATA(size=" << sizeof(obj) << ") ";
123
124 return o;
125 }
126
127 my_key min_key, max_key;
128
129 struct comp_type : std::binary_function<my_key, my_key, bool>
130 {
operator ()comp_type131 bool operator () (const my_key& a, const my_key& b) const
132 {
133 return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
134 }
max_valuecomp_type135 static my_key max_value()
136 {
137 return max_key;
138 }
min_valuecomp_type139 static my_key min_value()
140 {
141 return min_key;
142 }
143 };
144
145 /// TPIE declarations
146 // Key type.
147 typedef my_key bkey_t;
148
149 // Element type for the btree.
150 struct el_t {
151 bkey_t key_;
152 my_data data_;
el_tel_t153 el_t(bkey_t k) : key_(k) { }
el_tel_t154 el_t() { }
155 };
156 struct key_from_el {
operator ()key_from_el157 bkey_t operator () (const el_t& v) const
158 {
159 return v.key_;
160 }
161 };
162
163 // Temporary distinction btw UN*X and WIN, since there are some
164 // problems with the MMAP collection implementation.
165 #ifdef _WIN32
166 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u_btree_t;
167 #else
168 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t;
169 #endif
170
init()171 void init()
172 {
173 memset(max_key.keybuf, std::numeric_limits<unsigned char>::max(), KEY_SIZE);
174 memset(min_key.keybuf, std::numeric_limits<unsigned char>::min(), KEY_SIZE);
175 }
176
177 typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE> map_type;
178
179 #define REAL_NODE_BLOCK_SIZE map_type::node_block_type::raw_size
180 #define REAL_LEAF_BLOCK_SIZE map_type::leaf_block_type::raw_size
181 #define REAL_NODE_MELEMENTS map_type::node_block_type::size
182 #define REAL_LEAF_MELEMENTS map_type::leaf_block_type::size
183
184 typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type;
185 //typedef stxxl::vector<std::pair<my_key,my_data>,1,stxxl::lru_pager<1>,512*1024> vector_type;
186
187 //#define KEYPOS (i % KEY_SIZE)
188 //#define VALUE (myrand() % 26)
189
190 #if 0
191 unsigned ran32State = 0xdeadbeef;
192 inline unsigned myrand()
193 {
194 return (ran32State = 1664525 * ran32State + 1013904223);
195 }
196 inline void rand_key(stxxl::int64 pos, my_key& Key)
197 {
198 for (int i = 0; i < KEY_SIZE; ++i)
199 Key.keybuf[i] = letters[myrand() % 26];
200 }
201 #else // a longer pseudo random sequence
202 long long unsigned ran32State = 0xdeadbeef;
myrand()203 inline long long unsigned myrand()
204 {
205 return (ran32State = (ran32State * 0x5DEECE66DULL + 0xBULL) & 0xFFFFFFFFFFFFULL);
206 }
rand_key(stxxl::int64,my_key & Key)207 inline void rand_key(stxxl::int64 /*pos*/, my_key& Key)
208 {
209 long long unsigned r = myrand();
210 for (int i = 0; i < KEY_SIZE; ++i)
211 {
212 Key.keybuf[i] = letters[r % 26];
213 r >>= 5;
214 }
215 }
216 #endif
217
run_bdb_btree(stxxl::int64 ops)218 void run_bdb_btree(stxxl::int64 ops)
219 {
220 const char* filename = BDB_FILE;
221
222 my_key key1_storage;
223 my_data data1_storage;
224
225 unlink(filename);
226
227 memset(key1_storage.keybuf, 'a', KEY_SIZE);
228 memset(data1_storage.databuf, 'b', DATA_SIZE);
229
230 Db db(NULL, 0); // Instantiate the Db object
231
232 try {
233 db.set_errfile(stderr);
234 db.set_pagesize(pagesize);
235 db.set_cachesize(0, cachesize, 1);
236
237 // Open the database
238 db.open(NULL, // Transaction pointer
239 filename, // Database file name
240 NULL, // Optional logical database name
241 DB_BTREE, // Database access method
242 DB_CREATE, // Open flags
243 0); // File mode (using defaults)
244
245 // here we start with the tests
246 Dbt key1(key1_storage.keybuf, KEY_SIZE);
247 Dbt data1(data1_storage.databuf, DATA_SIZE);
248
249 stxxl::timer Timer;
250 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
251 stxxl::int64 i;
252 //comp_type cmp_;
253
254 ran32State = 0xdeadbeef;
255
256 DB_BTREE_STAT* dbstat;
257
258 db.stat(NULL, &dbstat, 0);
259 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
260
261 db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
262
263 Timer.start();
264
265 for (i = 0; i < n_inserts; ++i)
266 {
267 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
268 rand_key(i, key1_storage);
269 db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
270 }
271
272 Timer.stop();
273 db.stat(NULL, &dbstat, 0);
274 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
275 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
276 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
277
278 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
279
280 /////////////////////////////////////////
281 Timer.reset();
282 Timer.start();
283
284 Dbc* cursorp;
285 db.cursor(NULL, &cursorp, 0);
286
287 for (i = 0; i < n_locates; ++i)
288 {
289 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
290 rand_key(i, key1_storage);
291 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
292 Dbt datax(data1_storage.databuf, DATA_SIZE);
293
294 cursorp->get(&keyx, &datax, DB_SET_RANGE);
295 }
296
297 Timer.stop();
298 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
299 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
300
301 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
302
303 ////////////////////////////////////
304 Timer.reset();
305
306 Timer.start();
307
308 stxxl::int64 n_scanned = 0;
309
310 for (i = 0; i < n_range_queries; ++i)
311 {
312 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
313 rand_key(i, key1_storage);
314 my_key last_key = key1_storage;
315 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
316 rand_key(i, key1_storage);
317 if (last_key < key1_storage)
318 std::swap(last_key, key1_storage);
319
320 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
321 Dbt datax(data1_storage.databuf, DATA_SIZE);
322
323 if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
324 continue;
325
326 while (*((my_key*)keyx.get_data()) <= last_key)
327 {
328 ++n_scanned;
329 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
330 break;
331 }
332
333 if (n_scanned >= 10 * n_range_queries)
334 {
335 ++i;
336 break;
337 }
338 }
339
340 n_range_queries = i;
341
342 Timer.stop();
343 if (cursorp != NULL)
344 cursorp->close();
345
346 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
347 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
348 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
349
350 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
351
352 //////////////////////////////////////
353
354 ran32State = 0xdeadbeef;
355 memset(key1_storage.keybuf, 'a', KEY_SIZE);
356
357 Timer.reset();
358 Timer.start();
359
360 for (i = 0; i < n_deletes; ++i)
361 {
362 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
363 rand_key(i, key1_storage);
364 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
365 db.del(NULL, &keyx, 0);
366 }
367
368 Timer.stop();
369 db.stat(NULL, &dbstat, 0);
370 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
371 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
372 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
373
374 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
375
376 db.close(0);
377 }
378 catch (DbException& e)
379 {
380 STXXL_ERRMSG("DbException happened");
381 }
382 catch (std::exception& e)
383 {
384 STXXL_ERRMSG("std::exception happened");
385 }
386
387 unlink(filename);
388 }
389
run_stxxl_map(stxxl::int64 ops)390 void run_stxxl_map(stxxl::int64 ops)
391 {
392 map_type Map(NODE_CACHE_SIZE, LEAF_CACHE_SIZE);
393 Map.enable_prefetching();
394 stxxl::stats* Stats = stxxl::stats::get_instance();
395
396 std::pair<my_key, my_data> element;
397
398 memset(element.first.keybuf, 'a', KEY_SIZE);
399 memset(element.second.databuf, 'b', DATA_SIZE);
400
401 stxxl::timer Timer;
402 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
403 stxxl::int64 i;
404 //comp_type cmp_;
405
406 ran32State = 0xdeadbeef;
407
408 //stxxl::random_number32 myrand;
409
410 STXXL_MSG("Records in map: " << Map.size());
411
412 stxxl::stats_data stats_begin(*Stats);
413 Timer.start();
414
415 for (i = 0; i < n_inserts; ++i)
416 {
417 //element.first.keybuf[KEYPOS] = letters[VALUE];
418 rand_key(i, element.first);
419 Map.insert(element);
420 }
421
422 Timer.stop();
423
424 STXXL_MSG("Records in map: " << Map.size());
425 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
426 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
427
428 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
429
430 ////////////////////////////////////////////////
431
432 const map_type& CMap(Map); // const map reference
433
434 stats_begin = stxxl::stats_data(*Stats);
435 Timer.reset();
436 Timer.start();
437
438 for (i = 0; i < n_locates; ++i)
439 {
440 //element.first.keybuf[KEYPOS] = letters[VALUE];
441 rand_key(i, element.first);
442 CMap.lower_bound(element.first);
443 }
444
445 Timer.stop();
446 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
447 " seconds : " << (double(n_locates) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
448
449 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
450
451 ////////////////////////////////////
452
453 stats_begin = stxxl::stats_data(*Stats);
454 Timer.reset();
455 Timer.start();
456
457 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
458
459 for (i = 0; i < n_range_queries; ++i)
460 {
461 //element.first.keybuf[KEYPOS] = letters[VALUE];
462 rand_key(i, element.first);
463 my_key begin_key = element.first;
464 map_type::const_iterator begin = CMap.lower_bound(element.first);
465 //element.first.keybuf[KEYPOS] = letters[VALUE];
466 rand_key(i, element.first);
467 map_type::const_iterator beyond = CMap.lower_bound(element.first);
468 if (element.first < begin_key)
469 std::swap(begin, beyond);
470
471 while (begin != beyond)
472 {
473 my_data tmp = begin->second;
474 stxxl::STXXL_UNUSED(tmp);
475 ++n_scanned;
476 ++begin;
477 }
478 if (n_scanned >= 10 * n_range_queries)
479 {
480 ++i;
481 break;
482 }
483 }
484
485 n_range_queries = i;
486
487 Timer.stop();
488 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
489 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
490 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
491
492 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
493
494 //////////////////////////////////////
495 ran32State = 0xdeadbeef;
496 memset(element.first.keybuf, 'a', KEY_SIZE);
497 memset(element.second.databuf, 'b', DATA_SIZE);
498
499 stats_begin = stxxl::stats_data(*Stats);
500 Timer.reset();
501 Timer.start();
502
503 for (i = 0; i < n_deletes; ++i)
504 {
505 //element.first.keybuf[KEYPOS] = letters[VALUE];
506 rand_key(i, element.first);
507 Map.erase(element.first);
508 }
509
510 Timer.stop();
511 STXXL_MSG("Records in map: " << Map.size());
512 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
513 " seconds : " << (double(n_deletes) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
514
515 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
516 }
517
518 class rand_key_gen
519 {
520 stxxl::int64 counter;
521 my_key& current;
522 stxxl::random_number32 myrand;
523 rand_key_gen();
524
525 public:
526 typedef my_key value_type;
527
rand_key_gen(stxxl::int64 el,my_key & cur)528 rand_key_gen(stxxl::int64 el, my_key& cur)
529 : counter(el), current(cur)
530 {
531 //const stxxl::int64 & i = counter;
532 //current.keybuf[KEYPOS] = letters[VALUE];
533 rand_key(counter, current);
534 }
operator *()535 const my_key& operator * () { return current; }
operator ->()536 const my_key* operator -> () { return ¤t; }
537
operator ++()538 rand_key_gen& operator ++ ()
539 {
540 --counter;
541 //const stxxl::int64 & i = counter;
542 //current.keybuf[KEYPOS] = letters[VALUE];
543 rand_key(counter, current);
544 return *this;
545 }
empty() const546 bool empty() const { return counter == 0; }
547 };
548
549 template <class InputType>
550 class key2pair
551 {
552 InputType& in;
553 std::pair<my_key, my_data> current;
554 key2pair();
555
556 public:
557 typedef std::pair<my_key, my_data> value_type;
558
key2pair(InputType & in_)559 key2pair(InputType& in_) : in(in_)
560 {
561 if (!in.empty())
562 current.first = *in;
563 }
operator *()564 const value_type& operator * () { return current; }
operator ->()565 const value_type* operator -> () { return ¤t; }
566
operator ++()567 key2pair& operator ++ ()
568 {
569 ++in;
570 if (!empty())
571 current.first = *in;
572
573 return *this;
574 }
empty() const575 bool empty() const { return in.empty(); }
576 };
577
run_stxxl_map_big(stxxl::int64 n,unsigned ops)578 void run_stxxl_map_big(stxxl::int64 n, unsigned ops)
579 {
580 stxxl::stats* Stats = stxxl::stats::get_instance();
581
582 std::pair<my_key, my_data> element;
583
584 memset(element.first.keybuf, 'a', KEY_SIZE);
585 memset(element.second.databuf, 'b', DATA_SIZE);
586
587 stxxl::timer Timer;
588 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
589 stxxl::int64 i;
590 //comp_type cmp_;
591
592 ran32State = 0xdeadbeef;
593
594 //stxxl::random_number32 myrand;
595
596 Timer.start();
597
598 vector_type SortedSeq(n);
599 const vector_type& CSortedSeq(SortedSeq);
600 {
601 rand_key_gen Gen(n, element.first);
602 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
603 sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
604 typedef key2pair<sorter_type> key2pair_type;
605 key2pair_type Key2Pair(Sorter);
606 stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
607 }
608
609 Timer.stop();
610
611 STXXL_MSG("Finished sorting input. Elapsed time: " <<
612 (Timer.mseconds() / 1000.) << " seconds.");
613
614 stxxl::stats_data stats_begin(*Stats);
615 Timer.reset();
616 Timer.start();
617
618 // bulk construction
619 map_type Map(CSortedSeq.begin(),
620 CSortedSeq.end(),
621 NODE_CACHE_SIZE, LEAF_CACHE_SIZE, true);
622
623 Timer.stop();
624
625 STXXL_MSG("Records in map: " << Map.size());
626 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
627 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
628
629 Map.print_statistics(cout);
630 Map.reset_statistics();
631 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
632
633 ////////////////////////////////////////
634
635 Map.disable_prefetching();
636
637 stats_begin = stxxl::stats_data(*Stats);
638 Timer.reset();
639 Timer.start();
640
641 for (i = 0; i < n_inserts; ++i)
642 {
643 //element.first.keybuf[KEYPOS] = letters[VALUE];
644 rand_key(i, element.first);
645 Map.insert(element);
646 }
647
648 Timer.stop();
649
650 STXXL_MSG("Records in map: " << Map.size());
651 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
652 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
653
654 Map.print_statistics(cout);
655 Map.reset_statistics();
656 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
657
658 ////////////////////////////////////
659
660 const map_type& CMap(Map); // const map reference
661
662 Timer.reset();
663 Timer.start();
664
665 for (i = 0; i < n_locates; ++i)
666 {
667 //element.first.keybuf[KEYPOS] = letters[VALUE];
668 rand_key(i, element.first);
669 CMap.lower_bound(element.first);
670 }
671
672 Timer.stop();
673 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
674 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
675
676 Map.print_statistics(cout);
677 Map.reset_statistics();
678 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
679
680 ////////////////////////////////////
681
682 Map.enable_prefetching();
683
684 stats_begin = stxxl::stats_data(*Stats);
685 Timer.reset();
686 Timer.start();
687
688 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
689
690 for (i = 0; i < n_range_queries; ++i)
691 {
692 //element.first.keybuf[KEYPOS] = letters[VALUE];
693 rand_key(i, element.first);
694 my_key begin_key = element.first;
695 map_type::const_iterator begin = CMap.lower_bound(element.first);
696 //element.first.keybuf[KEYPOS] = letters[VALUE];
697 rand_key(i, element.first);
698 map_type::const_iterator beyond = CMap.lower_bound(element.first);
699 if (element.first < begin_key)
700 std::swap(begin, beyond);
701
702 /*
703 STXXL_MSG("Looking "<<element.first<<" scanned: "<<n_scanned);
704
705 if(beyond==CMap.end())
706 {
707 STXXL_MSG("Upper bound "<<"END");
708 }
709 else
710 {
711 STXXL_MSG("Upper bound "<<((element.first>begin_key)?element.first:begin_key));
712 }*/
713
714 while (begin != beyond)
715 {
716 ++n_scanned;
717 ++begin;
718 }
719
720 if (n_scanned >= SCAN_LIMIT(n))
721 {
722 ++i;
723 break;
724 }
725 }
726
727 n_range_queries = i;
728
729 Timer.stop();
730 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
731 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
732 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
733
734 Map.print_statistics(cout);
735 Map.reset_statistics();
736 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
737
738 //////////////////////////////////////
739
740 ran32State = 0xdeadbeef;
741 memset(element.first.keybuf, 'a', KEY_SIZE);
742 memset(element.second.databuf, 'b', DATA_SIZE);
743
744 Map.disable_prefetching();
745
746 stats_begin = stxxl::stats_data(*Stats);
747 Timer.reset();
748 Timer.start();
749
750 for (i = n_deletes; i > 0; --i)
751 {
752 //element.first.keybuf[KEYPOS] = letters[VALUE];
753 rand_key(i, element.first);
754 Map.erase(element.first);
755 }
756
757 Timer.stop();
758 STXXL_MSG("Records in map: " << Map.size());
759 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
760 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
761
762 Map.print_statistics(cout);
763 Map.reset_statistics();
764 std::cout << (stxxl::stats_data(*Stats) - stats_begin);
765 }
766
767 /////////////////////////////////////////////////////////////////////////
768
769 typedef AMI_STREAM<el_t> stream_t;
770
771 char dummy;
772
773 class MyFilter
774 {
775 public:
operator ()(const el_t & v) const776 bool operator () (const el_t& v) const
777 {
778 dummy += v.key_.keybuf[0]; // touch element
779 return true;
780 }
781 };
782
run_tpie_btree_big(stxxl::int64 n,unsigned ops)783 void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
784 {
785 el_t element;
786
787 memset(element.key_.keybuf, 'a', KEY_SIZE);
788 memset(element.data_.databuf, 'b', DATA_SIZE);
789
790 // Log debugging info from the application, but not from the library.
791 tpie_log_init(TPIE_LOG_APP_DEBUG);
792 MM_manager.set_memory_limit(TOTAL_CACHE_SIZE);
793 MM_manager.enforce_memory_limit();
794
795 stream_t* is = new stream_t;
796
797 stxxl::timer Timer;
798 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
799 stxxl::int64 i;
800 //comp_type cmp_;
801
802 ran32State = 0xdeadbeef;
803
804 //stxxl::random_number32 myrand;
805
806 Timer.start();
807
808 {
809 rand_key_gen Gen(n, element.key_);
810 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
811 sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
812 //typedef key2pair<sorter_type> key2pair_type;
813 //key2pair_type Key2Pair(Sorter);
814 while (!Sorter.empty())
815 {
816 is->write_item(*Sorter);
817 ++Sorter;
818 }
819 }
820
821 Timer.stop();
822
823 STXXL_MSG("Finished sorting input. Elapsed time: " <<
824 (Timer.mseconds() / 1000.) << " seconds.");
825
826 Timer.reset();
827 Timer.start();
828
829 // bulk construction
830 u_btree_t* u_btree;
831 AMI_btree_params params;
832 params.node_block_factor = NODE_BLOCK_SIZE / 4096;
833 params.leaf_block_factor = LEAF_BLOCK_SIZE / 4096;
834 params.leaf_cache_size = LEAF_CACHE_SIZE / LEAF_BLOCK_SIZE;
835 params.node_cache_size = NODE_CACHE_SIZE / NODE_BLOCK_SIZE;
836
837 u_btree = new u_btree_t(params);
838
839 using std::cout;
840 using std::cerr;
841
842 if (!u_btree->is_valid()) {
843 cerr << "Error initializing btree. Aborting.\n";
844 delete u_btree;
845 exit(1);
846 }
847
848 if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR)
849 cerr << "Error during bulk loading.\n";
850
851 Timer.stop();
852
853 STXXL_MSG("Records in map: " << u_btree->size());
854 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
855 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
856
857 ////////////////////////////////////////
858 Timer.reset();
859
860 Timer.start();
861
862 for (i = 0; i < n_inserts; ++i)
863 {
864 //element.first.keybuf[KEYPOS] = letters[VALUE];
865 rand_key(i, element.key_);
866 u_btree->insert(element);
867 }
868
869 Timer.stop();
870
871 STXXL_MSG("Records in map: " << u_btree->size());
872 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
873 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
874
875 ////////////////////////////////////////////////
876 Timer.reset();
877
878 Timer.start();
879
880 el_t result;
881 for (i = 0; i < n_locates; ++i)
882 {
883 //element.first.keybuf[KEYPOS] = letters[VALUE];
884 rand_key(i, element.key_);
885 u_btree->succ(element.key_, result);
886 }
887
888 Timer.stop();
889 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
890 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
891
892 ////////////////////////////////////
893 Timer.reset();
894
895 Timer.start();
896
897 stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
898 MyFilter filter;
899
900 for (i = 0; i < n_range_queries; ++i)
901 {
902 rand_key(i, element.key_);
903 my_key begin_key = element.key_;
904 rand_key(i, element.key_);
905 if (element.key_ < begin_key)
906 n_scanned += u_btree->range_query(element.key_, begin_key, NULL, filter);
907
908 else
909 n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter);
910
911 if (n_scanned >= SCAN_LIMIT(n))
912 {
913 ++i;
914 break;
915 }
916 }
917
918 n_range_queries = i;
919
920 Timer.stop();
921 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
922 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
923 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
924
925 //////////////////////////////////////
926 ran32State = 0xdeadbeef;
927 memset(element.key_.keybuf, 'a', KEY_SIZE);
928 memset(element.data_.databuf, 'b', DATA_SIZE);
929
930 Timer.reset();
931
932 Timer.start();
933
934 for (i = n_deletes; i > 0; --i)
935 {
936 //element.first.keybuf[KEYPOS] = letters[VALUE];
937 rand_key(i, element.key_);
938 u_btree->erase(element.key_);
939 }
940
941 Timer.stop();
942 STXXL_MSG("Records in map: " << u_btree->size());
943 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
944 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
945 delete u_btree;
946 delete is;
947 }
948
run_bdb_btree_big(stxxl::int64 n,unsigned ops)949 void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
950 {
951 const char* filename = BDB_FILE;
952
953 my_key key1_storage;
954 my_data data1_storage;
955
956 #ifdef BDB_BULK_SCAN
957 int* bulk_buffer = new int[logbufsize / sizeof(int)];
958 #endif
959
960 unlink(filename);
961
962 memset(key1_storage.keybuf, 'a', KEY_SIZE);
963 memset(data1_storage.databuf, 'b', DATA_SIZE);
964
965 Db db(NULL, 0); // Instantiate the Db object
966
967 try {
968 // here we start with the tests
969 Dbt key1(key1_storage.keybuf, KEY_SIZE);
970 Dbt data1(data1_storage.databuf, DATA_SIZE);
971
972 stxxl::timer Timer;
973 stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
974 stxxl::int64 i;
975 //comp_type cmp_;
976
977 ran32State = 0xdeadbeef;
978
979 Timer.start();
980
981 vector_type SortedSeq(n);
982 //const vector_type & CSortedSeq(SortedSeq);
983 {
984 rand_key_gen Gen(n, key1_storage);
985 typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
986 sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
987 typedef key2pair<sorter_type> key2pair_type;
988 key2pair_type Key2Pair(Sorter);
989 stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
990 }
991
992 Timer.stop();
993
994 STXXL_MSG("Finished sorting input. Elapsed time: " << (Timer.mseconds() / 1000.) << " seconds.");
995
996 db.set_errfile(stderr);
997 db.set_pagesize(pagesize);
998 db.set_cachesize(0, cachesize, 10);
999
1000 STXXL_MSG("BDB cache size set.");
1001
1002 // Open the database
1003 db.open(NULL, // Transaction pointer
1004 filename, // Database file name
1005 NULL, // Optional logical database name
1006 DB_BTREE, // Database access method
1007 DB_CREATE, // Open flags
1008 0); // File mode (using defaults)
1009
1010 db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
1011
1012 Timer.reset();
1013 Timer.start();
1014
1015 // DBD does not have bulk construction
1016 // however inserting in sorted order might help
1017 // to improve performance
1018 vector_type::const_iterator cit = SortedSeq.begin();
1019 for (i = 0; i < n; ++i, ++cit)
1020 {
1021 key1_storage = cit->first;
1022 db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
1023 }
1024
1025 Timer.stop();
1026
1027 DB_BTREE_STAT* dbstat;
1028 db.stat(NULL, &dbstat, 0);
1029 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
1030 STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
1031 " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
1032
1033 db.stat_print(0);
1034 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1035 ////////////////////////////////////////
1036
1037 Timer.reset();
1038 Timer.start();
1039
1040 for (i = 0; i < n_inserts; ++i)
1041 {
1042 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1043 rand_key(i, key1_storage);
1044 db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
1045 }
1046
1047 Timer.stop();
1048 db.stat(NULL, &dbstat, 0);
1049 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
1050 STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
1051 " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
1052
1053 db.stat_print(0);
1054 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1055
1056 /////////////////////////////////////////
1057 Timer.reset();
1058 Timer.start();
1059
1060 Dbc* cursorp;
1061 db.cursor(NULL, &cursorp, 0);
1062
1063 for (i = 0; i < n_locates; ++i)
1064 {
1065 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1066 rand_key(i, key1_storage);
1067 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
1068 Dbt datax(data1_storage.databuf, DATA_SIZE);
1069
1070 cursorp->get(&keyx, &datax, DB_SET_RANGE);
1071 }
1072
1073 Timer.stop();
1074 STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
1075 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
1076
1077 db.stat_print(0);
1078 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1079
1080 ////////////////////////////////////
1081 Timer.reset();
1082
1083 Timer.start();
1084
1085 stxxl::int64 n_scanned = 0;
1086
1087 for (i = 0; i < n_range_queries; ++i)
1088 {
1089 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1090 rand_key(i, key1_storage);
1091 my_key last_key = key1_storage;
1092 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1093 rand_key(i, key1_storage);
1094 if (last_key < key1_storage)
1095 std::swap(last_key, key1_storage);
1096
1097 //STXXL_MSG("Looking "<<key1_storage<<" scanned: "<<n_scanned);
1098 //STXXL_MSG("Upper bound "<<last_key);
1099
1100 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
1101 #ifdef BDB_BULK_SCAN
1102 Dbt datax(bulk_buffer, DATA_SIZE);
1103 datax.set_ulen(logbufsize);
1104 datax.set_flags(DB_DBT_USERMEM);
1105 #else
1106 Dbt datax(data1_storage.databuf, DATA_SIZE);
1107 #endif
1108
1109 #ifdef BDB_BULK_SCAN
1110 if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND)
1111 continue;
1112
1113 do
1114 {
1115 DbMultipleKeyDataIterator BulkIterator(datax);
1116 Dbt key1, data1;
1117 while (BulkIterator.next(key1, data1) &&
1118 *((my_key*)key1.get_data()) <= last_key)
1119 {
1120 ++n_scanned;
1121 //STXXL_MSG("Result "<<*((my_key *)key1.get_data()));
1122 }
1123 if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND)
1124 break;
1125
1126 if (*((my_key*)keyx.get_data()) > last_key)
1127 {
1128 break;
1129 }
1130 } while (1);
1131
1132 #else
1133 if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
1134 continue;
1135
1136 while (*((my_key*)keyx.get_data()) <= last_key)
1137 {
1138 ++n_scanned;
1139 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
1140 break;
1141 }
1142 #endif
1143
1144 if (n_scanned >= SCAN_LIMIT(n))
1145 {
1146 ++i;
1147 break;
1148 }
1149 }
1150
1151 n_range_queries = i;
1152
1153 Timer.stop();
1154 if (cursorp != NULL)
1155 cursorp->close();
1156
1157 STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
1158 " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
1159 " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
1160
1161 db.stat_print(0);
1162 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1163
1164 //////////////////////////////////////
1165
1166 ran32State = 0xdeadbeef;
1167 memset(key1_storage.keybuf, 'a', KEY_SIZE);
1168
1169 Timer.reset();
1170 Timer.start();
1171
1172 for (i = 0; i < n_deletes; ++i)
1173 {
1174 //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1175 rand_key(i, key1_storage);
1176 Dbt keyx(key1_storage.keybuf, KEY_SIZE);
1177 db.del(NULL, &keyx, 0);
1178 }
1179
1180 Timer.stop();
1181 db.stat(NULL, &dbstat, 0);
1182 STXXL_MSG("Records in map: " << dbstat->bt_ndata);
1183 STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
1184 " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
1185
1186 db.stat_print(0);
1187 db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1188
1189 db.close(0);
1190 }
1191 catch (DbException& e)
1192 {
1193 STXXL_ERRMSG("DbException happened");
1194 }
1195 catch (std::exception& e)
1196 {
1197 STXXL_ERRMSG("std::exception happened");
1198 }
1199
1200 unlink(filename);
1201
1202 #ifdef BDB_BULK_SCAN
1203 delete[] bulk_buffer;
1204 #endif
1205 }
1206
main(int argc,char * argv[])1207 int main(int argc, char* argv[])
1208 {
1209 STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes");
1210 STXXL_MSG("stxxl::map Real Leaf block size: " << REAL_LEAF_BLOCK_SIZE << " bytes");
1211 STXXL_MSG("stxxl::map Node max elements : " << REAL_NODE_MELEMENTS);
1212 STXXL_MSG("stxxl::map Leaf max elements : " << REAL_LEAF_MELEMENTS);
1213 #if STXXL_DIRECT_IO_OFF
1214 STXXL_MSG("STXXL_DIRECT_IO_OFF is defined");
1215 #else
1216 STXXL_MSG("STXXL_DIRECT_IO_OFF is NOT defined");
1217 #endif
1218
1219 if (argc < 3)
1220 {
1221 STXXL_MSG("Usage: " << argv[0] << " version #ops");
1222 STXXL_MSG("\t version = 1: test stxxl map");
1223 STXXL_MSG("\t version = 2: test Berkeley DB btree");
1224 STXXL_MSG("\t version = 3: big test stxxl map");
1225 STXXL_MSG("\t version = 4: big test Berkeley DB btree");
1226 STXXL_MSG("\t version = 5: big test TPIE btree");
1227 return -1;
1228 }
1229
1230 init();
1231
1232 int version = atoi(argv[1]);
1233 stxxl::uint64 ops = stxxl::atouint64(argv[2]);
1234
1235 STXXL_MSG("Running version : " << version);
1236 STXXL_MSG("Operations to perform: " << ops);
1237 STXXL_MSG("Btree cache size : " << TOTAL_CACHE_SIZE << " bytes");
1238 STXXL_MSG("Leaf block size : " << LEAF_BLOCK_SIZE << " bytes");
1239
1240 switch (version)
1241 {
1242 case 1:
1243 run_stxxl_map(ops);
1244 break;
1245 case 2:
1246 run_bdb_btree(ops);
1247 break;
1248 case 3:
1249 run_stxxl_map_big(ops, 100000);
1250 break;
1251 case 4:
1252 run_bdb_btree_big(ops, 100000);
1253 break;
1254 case 5:
1255 run_tpie_btree_big(ops, 100000);
1256 break;
1257 default:
1258 STXXL_MSG("Unsupported version " << version);
1259 }
1260 }
1261