1 /***************************************************************************
2  *  tools/benchmarks/berkeley_db_benchmark.cpp
3  *
4  *  Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  *  Copyright (C) 2006 Roman Dementiev <dementiev@ira.uka.de>
7  *  Copyright (C) 2009, 2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
8  *
9  *  Distributed under the Boost Software License, Version 1.0.
10  *  (See accompanying file LICENSE_1_0.txt or copy at
11  *  http://www.boost.org/LICENSE_1_0.txt)
12  **************************************************************************/
13 
14 //! \example containers/berkeley_db_benchmark.cpp
15 //! This is a benchmark mentioned in the paper
16 //! R. Dementiev, L. Kettner, P. Sanders "STXXL: standard template library for XXL data sets"
17 //! Software: Practice and Experience
18 //! Volume 38, Issue 6, Pages 589-637, May 2008
19 //! DOI: 10.1002/spe.844
20 
21 #include <stxxl/vector>
22 #include <stxxl/map>
23 #include <stxxl/timer>
24 #include <stxxl/stream>
25 
26 ///// BDB header ////////////
27 #include <db_cxx.h>
28 
29 ///////// TPIE headers //////////
30 #include "app_config.h"
31 #include <portability.h>
32 #include <ami_btree.h>
33 ////////////////////////////
34 
35 #define BDB_BULK_SCAN
36 
37 #define KEY_SIZE                8
38 #define DATA_SIZE               32
39 
40 #define NODE_BLOCK_SIZE         (32 * 1024)
41 #define LEAF_BLOCK_SIZE         (32 * 1024)
42 
43 #define LEAF_BLOCK_SIZE         (32 * 1024)
44 
45 #define TOTAL_CACHE_SIZE        (750 * 1024 * 1024)
46 //#define TOTAL_CACHE_SIZE      (150 * 1024 * 1024)
47 
48 //#define NODE_CACHE_SIZE       (1 * (TOTAL_CACHE_SIZE / 40))
49 //#define LEAF_CACHE_SIZE       (39 * (TOTAL_CACHE_SIZE / 40))
50 
51 #define NODE_CACHE_SIZE         (64 * 1024 * 1024)
52 #define LEAF_CACHE_SIZE         (TOTAL_CACHE_SIZE - NODE_CACHE_SIZE)
53 
54 #define SORTER_MEM              (TOTAL_CACHE_SIZE - 1024 * 1024 * 2 * 4)
55 
56 #define SCAN_LIMIT(x)   (x)
57 
58 //#define BDB_FILE "/data3/bdb_file"
59 #define BDB_FILE "/var/tmp/bdb_file"
60 
61 // BDB settings
62 u_int32_t pagesize = LEAF_BLOCK_SIZE;
63 u_int bulkbufsize = 4 * 1024 * 1024;
64 u_int logbufsize = 8 * 1024 * 1024;
65 u_int cachesize = (TOTAL_CACHE_SIZE < 500 * 1024 * 1024) ? (4 * (TOTAL_CACHE_SIZE / 5)) : (TOTAL_CACHE_SIZE - 100 * 1024 * 1024);
66 u_int datasize = DATA_SIZE;
67 u_int keysize = KEY_SIZE;
68 u_int numitems = 0;
69 
70 const char* letters = "abcdefghijklmnopqrstuvwxuz";
71 
72 struct my_key
73 {
74     char keybuf[KEY_SIZE];
75 };
76 
operator <<(std::ostream & o,const my_key & obj)77 std::ostream& operator << (std::ostream& o, const my_key& obj)
78 {
79     for (int i = 0; i < KEY_SIZE; ++i)
80         o << obj.keybuf[i];
81 
82     return o;
83 }
84 
operator ==(const my_key & a,const my_key & b)85 bool operator == (const my_key& a, const my_key& b)
86 {
87     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) == 0;
88 }
89 
operator !=(const my_key & a,const my_key & b)90 bool operator != (const my_key& a, const my_key& b)
91 {
92     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) != 0;
93 }
94 
operator <(const my_key & a,const my_key & b)95 bool operator < (const my_key& a, const my_key& b)
96 {
97     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
98 }
99 
operator >(const my_key & a,const my_key & b)100 bool operator > (const my_key& a, const my_key& b)
101 {
102     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) > 0;
103 }
104 
operator <=(const my_key & a,const my_key & b)105 bool operator <= (const my_key& a, const my_key& b)
106 {
107     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) <= 0;
108 }
109 
operator >=(const my_key & a,const my_key & b)110 bool operator >= (const my_key& a, const my_key& b)
111 {
112     return strncmp(a.keybuf, b.keybuf, KEY_SIZE) >= 0;
113 }
114 
115 struct my_data
116 {
117     char databuf[DATA_SIZE];
118 };
119 
operator <<(std::ostream & o,const my_data & obj)120 std::ostream& operator << (std::ostream& o, const my_data& obj)
121 {
122     o << "DATA(size=" << sizeof(obj) << ") ";
123 
124     return o;
125 }
126 
127 my_key min_key, max_key;
128 
129 struct comp_type : std::binary_function<my_key, my_key, bool>
130 {
operator ()comp_type131     bool operator () (const my_key& a, const my_key& b) const
132     {
133         return strncmp(a.keybuf, b.keybuf, KEY_SIZE) < 0;
134     }
max_valuecomp_type135     static my_key max_value()
136     {
137         return max_key;
138     }
min_valuecomp_type139     static my_key min_value()
140     {
141         return min_key;
142     }
143 };
144 
145 /// TPIE  declarations
146 // Key type.
147 typedef my_key bkey_t;
148 
149 // Element type for the btree.
150 struct el_t {
151     bkey_t key_;
152     my_data data_;
el_tel_t153     el_t(bkey_t k) : key_(k) { }
el_tel_t154     el_t() { }
155 };
156 struct key_from_el {
operator ()key_from_el157     bkey_t operator () (const el_t& v) const
158     {
159         return v.key_;
160     }
161 };
162 
163 // Temporary distinction btw UN*X and WIN, since there are some
164 // problems with the MMAP collection implementation.
165 #ifdef _WIN32
166 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el, BTE_COLLECTION_UFS> u_btree_t;
167 #else
168 typedef AMI_btree<bkey_t, el_t, less<bkey_t>, key_from_el> u_btree_t;
169 #endif
170 
init()171 void init()
172 {
173     memset(max_key.keybuf, std::numeric_limits<unsigned char>::max(), KEY_SIZE);
174     memset(min_key.keybuf, std::numeric_limits<unsigned char>::min(), KEY_SIZE);
175 }
176 
177 typedef stxxl::map<my_key, my_data, comp_type, NODE_BLOCK_SIZE, LEAF_BLOCK_SIZE> map_type;
178 
179 #define REAL_NODE_BLOCK_SIZE map_type::node_block_type::raw_size
180 #define REAL_LEAF_BLOCK_SIZE map_type::leaf_block_type::raw_size
181 #define REAL_NODE_MELEMENTS map_type::node_block_type::size
182 #define REAL_LEAF_MELEMENTS map_type::leaf_block_type::size
183 
184 typedef stxxl::VECTOR_GENERATOR<std::pair<my_key, my_data>, 1, 1>::result vector_type;
185 //typedef stxxl::vector<std::pair<my_key,my_data>,1,stxxl::lru_pager<1>,512*1024>  vector_type;
186 
187 //#define KEYPOS        (i % KEY_SIZE)
188 //#define VALUE         (myrand() % 26)
189 
190 #if 0
191 unsigned ran32State = 0xdeadbeef;
192 inline unsigned myrand()
193 {
194     return (ran32State = 1664525 * ran32State + 1013904223);
195 }
196 inline void rand_key(stxxl::int64 pos, my_key& Key)
197 {
198     for (int i = 0; i < KEY_SIZE; ++i)
199         Key.keybuf[i] = letters[myrand() % 26];
200 }
201 #else // a longer pseudo random sequence
202 long long unsigned ran32State = 0xdeadbeef;
myrand()203 inline long long unsigned myrand()
204 {
205     return (ran32State = (ran32State * 0x5DEECE66DULL + 0xBULL) & 0xFFFFFFFFFFFFULL);
206 }
rand_key(stxxl::int64,my_key & Key)207 inline void rand_key(stxxl::int64 /*pos*/, my_key& Key)
208 {
209     long long unsigned r = myrand();
210     for (int i = 0; i < KEY_SIZE; ++i)
211     {
212         Key.keybuf[i] = letters[r % 26];
213         r >>= 5;
214     }
215 }
216 #endif
217 
run_bdb_btree(stxxl::int64 ops)218 void run_bdb_btree(stxxl::int64 ops)
219 {
220     const char* filename = BDB_FILE;
221 
222     my_key key1_storage;
223     my_data data1_storage;
224 
225     unlink(filename);
226 
227     memset(key1_storage.keybuf, 'a', KEY_SIZE);
228     memset(data1_storage.databuf, 'b', DATA_SIZE);
229 
230     Db db(NULL, 0);             // Instantiate the Db object
231 
232     try {
233         db.set_errfile(stderr);
234         db.set_pagesize(pagesize);
235         db.set_cachesize(0, cachesize, 1);
236 
237         // Open the database
238         db.open(NULL,           // Transaction pointer
239                 filename,       // Database file name
240                 NULL,           // Optional logical database name
241                 DB_BTREE,       // Database access method
242                 DB_CREATE,      // Open flags
243                 0);             // File mode (using defaults)
244 
245         // here we start with the tests
246         Dbt key1(key1_storage.keybuf, KEY_SIZE);
247         Dbt data1(data1_storage.databuf, DATA_SIZE);
248 
249         stxxl::timer Timer;
250         stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
251         stxxl::int64 i;
252         //comp_type cmp_;
253 
254         ran32State = 0xdeadbeef;
255 
256         DB_BTREE_STAT* dbstat;
257 
258         db.stat(NULL, &dbstat, 0);
259         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
260 
261         db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
262 
263         Timer.start();
264 
265         for (i = 0; i < n_inserts; ++i)
266         {
267             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
268             rand_key(i, key1_storage);
269             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
270         }
271 
272         Timer.stop();
273         db.stat(NULL, &dbstat, 0);
274         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
275         STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
276                   " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
277 
278         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
279 
280         /////////////////////////////////////////
281         Timer.reset();
282         Timer.start();
283 
284         Dbc* cursorp;
285         db.cursor(NULL, &cursorp, 0);
286 
287         for (i = 0; i < n_locates; ++i)
288         {
289             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
290             rand_key(i, key1_storage);
291             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
292             Dbt datax(data1_storage.databuf, DATA_SIZE);
293 
294             cursorp->get(&keyx, &datax, DB_SET_RANGE);
295         }
296 
297         Timer.stop();
298         STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
299                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
300 
301         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
302 
303         ////////////////////////////////////
304         Timer.reset();
305 
306         Timer.start();
307 
308         stxxl::int64 n_scanned = 0;
309 
310         for (i = 0; i < n_range_queries; ++i)
311         {
312             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
313             rand_key(i, key1_storage);
314             my_key last_key = key1_storage;
315             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
316             rand_key(i, key1_storage);
317             if (last_key < key1_storage)
318                 std::swap(last_key, key1_storage);
319 
320             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
321             Dbt datax(data1_storage.databuf, DATA_SIZE);
322 
323             if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
324                 continue;
325 
326             while (*((my_key*)keyx.get_data()) <= last_key)
327             {
328                 ++n_scanned;
329                 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
330                     break;
331             }
332 
333             if (n_scanned >= 10 * n_range_queries)
334             {
335                 ++i;
336                 break;
337             }
338         }
339 
340         n_range_queries = i;
341 
342         Timer.stop();
343         if (cursorp != NULL)
344             cursorp->close();
345 
346         STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
347                   " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
348                   " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
349 
350         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
351 
352         //////////////////////////////////////
353 
354         ran32State = 0xdeadbeef;
355         memset(key1_storage.keybuf, 'a', KEY_SIZE);
356 
357         Timer.reset();
358         Timer.start();
359 
360         for (i = 0; i < n_deletes; ++i)
361         {
362             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
363             rand_key(i, key1_storage);
364             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
365             db.del(NULL, &keyx, 0);
366         }
367 
368         Timer.stop();
369         db.stat(NULL, &dbstat, 0);
370         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
371         STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
372                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
373 
374         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
375 
376         db.close(0);
377     }
378     catch (DbException& e)
379     {
380         STXXL_ERRMSG("DbException happened");
381     }
382     catch (std::exception& e)
383     {
384         STXXL_ERRMSG("std::exception happened");
385     }
386 
387     unlink(filename);
388 }
389 
run_stxxl_map(stxxl::int64 ops)390 void run_stxxl_map(stxxl::int64 ops)
391 {
392     map_type Map(NODE_CACHE_SIZE, LEAF_CACHE_SIZE);
393     Map.enable_prefetching();
394     stxxl::stats* Stats = stxxl::stats::get_instance();
395 
396     std::pair<my_key, my_data> element;
397 
398     memset(element.first.keybuf, 'a', KEY_SIZE);
399     memset(element.second.databuf, 'b', DATA_SIZE);
400 
401     stxxl::timer Timer;
402     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
403     stxxl::int64 i;
404     //comp_type cmp_;
405 
406     ran32State = 0xdeadbeef;
407 
408     //stxxl::random_number32 myrand;
409 
410     STXXL_MSG("Records in map: " << Map.size());
411 
412     stxxl::stats_data stats_begin(*Stats);
413     Timer.start();
414 
415     for (i = 0; i < n_inserts; ++i)
416     {
417         //element.first.keybuf[KEYPOS] = letters[VALUE];
418         rand_key(i, element.first);
419         Map.insert(element);
420     }
421 
422     Timer.stop();
423 
424     STXXL_MSG("Records in map: " << Map.size());
425     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
426               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
427 
428     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
429 
430     ////////////////////////////////////////////////
431 
432     const map_type& CMap(Map);      // const map reference
433 
434     stats_begin = stxxl::stats_data(*Stats);
435     Timer.reset();
436     Timer.start();
437 
438     for (i = 0; i < n_locates; ++i)
439     {
440         //element.first.keybuf[KEYPOS] = letters[VALUE];
441         rand_key(i, element.first);
442         CMap.lower_bound(element.first);
443     }
444 
445     Timer.stop();
446     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
447               " seconds : " << (double(n_locates) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
448 
449     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
450 
451     ////////////////////////////////////
452 
453     stats_begin = stxxl::stats_data(*Stats);
454     Timer.reset();
455     Timer.start();
456 
457     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
458 
459     for (i = 0; i < n_range_queries; ++i)
460     {
461         //element.first.keybuf[KEYPOS] = letters[VALUE];
462         rand_key(i, element.first);
463         my_key begin_key = element.first;
464         map_type::const_iterator begin = CMap.lower_bound(element.first);
465         //element.first.keybuf[KEYPOS] = letters[VALUE];
466         rand_key(i, element.first);
467         map_type::const_iterator beyond = CMap.lower_bound(element.first);
468         if (element.first < begin_key)
469             std::swap(begin, beyond);
470 
471         while (begin != beyond)
472         {
473             my_data tmp = begin->second;
474             stxxl::STXXL_UNUSED(tmp);
475             ++n_scanned;
476             ++begin;
477         }
478         if (n_scanned >= 10 * n_range_queries)
479         {
480             ++i;
481             break;
482         }
483     }
484 
485     n_range_queries = i;
486 
487     Timer.stop();
488     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
489               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
490               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
491 
492     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
493 
494     //////////////////////////////////////
495     ran32State = 0xdeadbeef;
496     memset(element.first.keybuf, 'a', KEY_SIZE);
497     memset(element.second.databuf, 'b', DATA_SIZE);
498 
499     stats_begin = stxxl::stats_data(*Stats);
500     Timer.reset();
501     Timer.start();
502 
503     for (i = 0; i < n_deletes; ++i)
504     {
505         //element.first.keybuf[KEYPOS] = letters[VALUE];
506         rand_key(i, element.first);
507         Map.erase(element.first);
508     }
509 
510     Timer.stop();
511     STXXL_MSG("Records in map: " << Map.size());
512     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
513               " seconds : " << (double(n_deletes) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
514 
515     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
516 }
517 
518 class rand_key_gen
519 {
520     stxxl::int64 counter;
521     my_key& current;
522     stxxl::random_number32 myrand;
523     rand_key_gen();
524 
525 public:
526     typedef my_key value_type;
527 
rand_key_gen(stxxl::int64 el,my_key & cur)528     rand_key_gen(stxxl::int64 el, my_key& cur)
529         : counter(el), current(cur)
530     {
531         //const stxxl::int64  & i = counter;
532         //current.keybuf[KEYPOS] = letters[VALUE];
533         rand_key(counter, current);
534     }
operator *()535     const my_key& operator * () { return current; }
operator ->()536     const my_key* operator -> () { return &current; }
537 
operator ++()538     rand_key_gen& operator ++ ()
539     {
540         --counter;
541         //const stxxl::int64  & i = counter;
542         //current.keybuf[KEYPOS] = letters[VALUE];
543         rand_key(counter, current);
544         return *this;
545     }
empty() const546     bool empty() const { return counter == 0; }
547 };
548 
549 template <class InputType>
550 class key2pair
551 {
552     InputType& in;
553     std::pair<my_key, my_data> current;
554     key2pair();
555 
556 public:
557     typedef std::pair<my_key, my_data> value_type;
558 
key2pair(InputType & in_)559     key2pair(InputType& in_) : in(in_)
560     {
561         if (!in.empty())
562             current.first = *in;
563     }
operator *()564     const value_type& operator * () { return current; }
operator ->()565     const value_type* operator -> () { return &current; }
566 
operator ++()567     key2pair& operator ++ ()
568     {
569         ++in;
570         if (!empty())
571             current.first = *in;
572 
573         return *this;
574     }
empty() const575     bool empty() const { return in.empty(); }
576 };
577 
run_stxxl_map_big(stxxl::int64 n,unsigned ops)578 void run_stxxl_map_big(stxxl::int64 n, unsigned ops)
579 {
580     stxxl::stats* Stats = stxxl::stats::get_instance();
581 
582     std::pair<my_key, my_data> element;
583 
584     memset(element.first.keybuf, 'a', KEY_SIZE);
585     memset(element.second.databuf, 'b', DATA_SIZE);
586 
587     stxxl::timer Timer;
588     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
589     stxxl::int64 i;
590     //comp_type cmp_;
591 
592     ran32State = 0xdeadbeef;
593 
594     //stxxl::random_number32 myrand;
595 
596     Timer.start();
597 
598     vector_type SortedSeq(n);
599     const vector_type& CSortedSeq(SortedSeq);
600     {
601         rand_key_gen Gen(n, element.first);
602         typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
603         sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
604         typedef key2pair<sorter_type> key2pair_type;
605         key2pair_type Key2Pair(Sorter);
606         stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
607     }
608 
609     Timer.stop();
610 
611     STXXL_MSG("Finished sorting input. Elapsed time: " <<
612               (Timer.mseconds() / 1000.) << " seconds.");
613 
614     stxxl::stats_data stats_begin(*Stats);
615     Timer.reset();
616     Timer.start();
617 
618     // bulk construction
619     map_type Map(CSortedSeq.begin(),
620                  CSortedSeq.end(),
621                  NODE_CACHE_SIZE, LEAF_CACHE_SIZE, true);
622 
623     Timer.stop();
624 
625     STXXL_MSG("Records in map: " << Map.size());
626     STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
627               " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
628 
629     Map.print_statistics(cout);
630     Map.reset_statistics();
631     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
632 
633     ////////////////////////////////////////
634 
635     Map.disable_prefetching();
636 
637     stats_begin = stxxl::stats_data(*Stats);
638     Timer.reset();
639     Timer.start();
640 
641     for (i = 0; i < n_inserts; ++i)
642     {
643         //element.first.keybuf[KEYPOS] = letters[VALUE];
644         rand_key(i, element.first);
645         Map.insert(element);
646     }
647 
648     Timer.stop();
649 
650     STXXL_MSG("Records in map: " << Map.size());
651     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
652               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
653 
654     Map.print_statistics(cout);
655     Map.reset_statistics();
656     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
657 
658     ////////////////////////////////////
659 
660     const map_type& CMap(Map);      // const map reference
661 
662     Timer.reset();
663     Timer.start();
664 
665     for (i = 0; i < n_locates; ++i)
666     {
667         //element.first.keybuf[KEYPOS] = letters[VALUE];
668         rand_key(i, element.first);
669         CMap.lower_bound(element.first);
670     }
671 
672     Timer.stop();
673     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
674               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
675 
676     Map.print_statistics(cout);
677     Map.reset_statistics();
678     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
679 
680     ////////////////////////////////////
681 
682     Map.enable_prefetching();
683 
684     stats_begin = stxxl::stats_data(*Stats);
685     Timer.reset();
686     Timer.start();
687 
688     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
689 
690     for (i = 0; i < n_range_queries; ++i)
691     {
692         //element.first.keybuf[KEYPOS] = letters[VALUE];
693         rand_key(i, element.first);
694         my_key begin_key = element.first;
695         map_type::const_iterator begin = CMap.lower_bound(element.first);
696         //element.first.keybuf[KEYPOS] = letters[VALUE];
697         rand_key(i, element.first);
698         map_type::const_iterator beyond = CMap.lower_bound(element.first);
699         if (element.first < begin_key)
700             std::swap(begin, beyond);
701 
702         /*
703            STXXL_MSG("Looking     "<<element.first<<" scanned: "<<n_scanned);
704 
705            if(beyond==CMap.end())
706            {
707                 STXXL_MSG("Upper bound "<<"END");
708            }
709            else
710            {
711                 STXXL_MSG("Upper bound "<<((element.first>begin_key)?element.first:begin_key));
712            }*/
713 
714         while (begin != beyond)
715         {
716             ++n_scanned;
717             ++begin;
718         }
719 
720         if (n_scanned >= SCAN_LIMIT(n))
721         {
722             ++i;
723             break;
724         }
725     }
726 
727     n_range_queries = i;
728 
729     Timer.stop();
730     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
731               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
732               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
733 
734     Map.print_statistics(cout);
735     Map.reset_statistics();
736     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
737 
738     //////////////////////////////////////
739 
740     ran32State = 0xdeadbeef;
741     memset(element.first.keybuf, 'a', KEY_SIZE);
742     memset(element.second.databuf, 'b', DATA_SIZE);
743 
744     Map.disable_prefetching();
745 
746     stats_begin = stxxl::stats_data(*Stats);
747     Timer.reset();
748     Timer.start();
749 
750     for (i = n_deletes; i > 0; --i)
751     {
752         //element.first.keybuf[KEYPOS] = letters[VALUE];
753         rand_key(i, element.first);
754         Map.erase(element.first);
755     }
756 
757     Timer.stop();
758     STXXL_MSG("Records in map: " << Map.size());
759     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
760               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
761 
762     Map.print_statistics(cout);
763     Map.reset_statistics();
764     std::cout << (stxxl::stats_data(*Stats) - stats_begin);
765 }
766 
767 /////////////////////////////////////////////////////////////////////////
768 
769 typedef AMI_STREAM<el_t> stream_t;
770 
771 char dummy;
772 
773 class MyFilter
774 {
775 public:
operator ()(const el_t & v) const776     bool operator () (const el_t& v) const
777     {
778         dummy += v.key_.keybuf[0];         // touch element
779         return true;
780     }
781 };
782 
run_tpie_btree_big(stxxl::int64 n,unsigned ops)783 void run_tpie_btree_big(stxxl::int64 n, unsigned ops)
784 {
785     el_t element;
786 
787     memset(element.key_.keybuf, 'a', KEY_SIZE);
788     memset(element.data_.databuf, 'b', DATA_SIZE);
789 
790     // Log debugging info from the application, but not from the library.
791     tpie_log_init(TPIE_LOG_APP_DEBUG);
792     MM_manager.set_memory_limit(TOTAL_CACHE_SIZE);
793     MM_manager.enforce_memory_limit();
794 
795     stream_t* is = new stream_t;
796 
797     stxxl::timer Timer;
798     stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
799     stxxl::int64 i;
800     //comp_type cmp_;
801 
802     ran32State = 0xdeadbeef;
803 
804     //stxxl::random_number32 myrand;
805 
806     Timer.start();
807 
808     {
809         rand_key_gen Gen(n, element.key_);
810         typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
811         sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
812         //typedef key2pair<sorter_type> key2pair_type;
813         //key2pair_type Key2Pair(Sorter);
814         while (!Sorter.empty())
815         {
816             is->write_item(*Sorter);
817             ++Sorter;
818         }
819     }
820 
821     Timer.stop();
822 
823     STXXL_MSG("Finished sorting input. Elapsed time: " <<
824               (Timer.mseconds() / 1000.) << " seconds.");
825 
826     Timer.reset();
827     Timer.start();
828 
829     // bulk construction
830     u_btree_t* u_btree;
831     AMI_btree_params params;
832     params.node_block_factor = NODE_BLOCK_SIZE / 4096;
833     params.leaf_block_factor = LEAF_BLOCK_SIZE / 4096;
834     params.leaf_cache_size = LEAF_CACHE_SIZE / LEAF_BLOCK_SIZE;
835     params.node_cache_size = NODE_CACHE_SIZE / NODE_BLOCK_SIZE;
836 
837     u_btree = new u_btree_t(params);
838 
839     using std::cout;
840     using std::cerr;
841 
842     if (!u_btree->is_valid()) {
843         cerr << "Error initializing btree. Aborting.\n";
844         delete u_btree;
845         exit(1);
846     }
847 
848     if (u_btree->load_sorted(is, 1.0, 1.0) != AMI_ERROR_NO_ERROR)
849         cerr << "Error during bulk loading.\n";
850 
851     Timer.stop();
852 
853     STXXL_MSG("Records in map: " << u_btree->size());
854     STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
855               " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
856 
857     ////////////////////////////////////////
858     Timer.reset();
859 
860     Timer.start();
861 
862     for (i = 0; i < n_inserts; ++i)
863     {
864         //element.first.keybuf[KEYPOS] = letters[VALUE];
865         rand_key(i, element.key_);
866         u_btree->insert(element);
867     }
868 
869     Timer.stop();
870 
871     STXXL_MSG("Records in map: " << u_btree->size());
872     STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
873               " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
874 
875     ////////////////////////////////////////////////
876     Timer.reset();
877 
878     Timer.start();
879 
880     el_t result;
881     for (i = 0; i < n_locates; ++i)
882     {
883         //element.first.keybuf[KEYPOS] = letters[VALUE];
884         rand_key(i, element.key_);
885         u_btree->succ(element.key_, result);
886     }
887 
888     Timer.stop();
889     STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
890               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
891 
892     ////////////////////////////////////
893     Timer.reset();
894 
895     Timer.start();
896 
897     stxxl::int64 n_scanned = 0; //, skipped_qieries = 0;
898     MyFilter filter;
899 
900     for (i = 0; i < n_range_queries; ++i)
901     {
902         rand_key(i, element.key_);
903         my_key begin_key = element.key_;
904         rand_key(i, element.key_);
905         if (element.key_ < begin_key)
906             n_scanned += u_btree->range_query(element.key_, begin_key, NULL, filter);
907 
908         else
909             n_scanned += u_btree->range_query(begin_key, element.key_, NULL, filter);
910 
911         if (n_scanned >= SCAN_LIMIT(n))
912         {
913             ++i;
914             break;
915         }
916     }
917 
918     n_range_queries = i;
919 
920     Timer.stop();
921     STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
922               " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
923               " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
924 
925     //////////////////////////////////////
926     ran32State = 0xdeadbeef;
927     memset(element.key_.keybuf, 'a', KEY_SIZE);
928     memset(element.data_.databuf, 'b', DATA_SIZE);
929 
930     Timer.reset();
931 
932     Timer.start();
933 
934     for (i = n_deletes; i > 0; --i)
935     {
936         //element.first.keybuf[KEYPOS] = letters[VALUE];
937         rand_key(i, element.key_);
938         u_btree->erase(element.key_);
939     }
940 
941     Timer.stop();
942     STXXL_MSG("Records in map: " << u_btree->size());
943     STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
944               " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
945     delete u_btree;
946     delete is;
947 }
948 
run_bdb_btree_big(stxxl::int64 n,unsigned ops)949 void run_bdb_btree_big(stxxl::int64 n, unsigned ops)
950 {
951     const char* filename = BDB_FILE;
952 
953     my_key key1_storage;
954     my_data data1_storage;
955 
956 #ifdef BDB_BULK_SCAN
957     int* bulk_buffer = new int[logbufsize / sizeof(int)];
958 #endif
959 
960     unlink(filename);
961 
962     memset(key1_storage.keybuf, 'a', KEY_SIZE);
963     memset(data1_storage.databuf, 'b', DATA_SIZE);
964 
965     Db db(NULL, 0);                   // Instantiate the Db object
966 
967     try {
968         // here we start with the tests
969         Dbt key1(key1_storage.keybuf, KEY_SIZE);
970         Dbt data1(data1_storage.databuf, DATA_SIZE);
971 
972         stxxl::timer Timer;
973         stxxl::int64 n_inserts = ops, n_locates = ops, n_range_queries = ops, n_deletes = ops;
974         stxxl::int64 i;
975         //comp_type cmp_;
976 
977         ran32State = 0xdeadbeef;
978 
979         Timer.start();
980 
981         vector_type SortedSeq(n);
982         //const vector_type & CSortedSeq(SortedSeq);
983         {
984             rand_key_gen Gen(n, key1_storage);
985             typedef stxxl::stream::sort<rand_key_gen, comp_type> sorter_type;
986             sorter_type Sorter(Gen, comp_type(), SORTER_MEM);
987             typedef key2pair<sorter_type> key2pair_type;
988             key2pair_type Key2Pair(Sorter);
989             stxxl::stream::materialize(Key2Pair, SortedSeq.begin());
990         }
991 
992         Timer.stop();
993 
994         STXXL_MSG("Finished sorting input. Elapsed time: " << (Timer.mseconds() / 1000.) << " seconds.");
995 
996         db.set_errfile(stderr);
997         db.set_pagesize(pagesize);
998         db.set_cachesize(0, cachesize, 10);
999 
1000         STXXL_MSG("BDB cache size set.");
1001 
1002         // Open the database
1003         db.open(NULL,           // Transaction pointer
1004                 filename,       // Database file name
1005                 NULL,           // Optional logical database name
1006                 DB_BTREE,       // Database access method
1007                 DB_CREATE,      // Open flags
1008                 0);             // File mode (using defaults)
1009 
1010         db.get_env()->memp_stat(NULL, NULL, DB_STAT_CLEAR);
1011 
1012         Timer.reset();
1013         Timer.start();
1014 
1015         // DBD does not have bulk construction
1016         // however inserting in sorted order might help
1017         // to improve performance
1018         vector_type::const_iterator cit = SortedSeq.begin();
1019         for (i = 0; i < n; ++i, ++cit)
1020         {
1021             key1_storage = cit->first;
1022             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
1023         }
1024 
1025         Timer.stop();
1026 
1027         DB_BTREE_STAT* dbstat;
1028         db.stat(NULL, &dbstat, 0);
1029         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
1030         STXXL_MSG("Construction elapsed time: " << (Timer.mseconds() / 1000.) <<
1031                   " seconds : " << (double(n) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
1032 
1033         db.stat_print(0);
1034         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1035         ////////////////////////////////////////
1036 
1037         Timer.reset();
1038         Timer.start();
1039 
1040         for (i = 0; i < n_inserts; ++i)
1041         {
1042             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1043             rand_key(i, key1_storage);
1044             db.put(NULL, &key1, &data1, DB_NOOVERWRITE);
1045         }
1046 
1047         Timer.stop();
1048         db.stat(NULL, &dbstat, 0);
1049         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
1050         STXXL_MSG("Insertions elapsed time: " << (Timer.mseconds() / 1000.) <<
1051                   " seconds : " << (double(n_inserts) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
1052 
1053         db.stat_print(0);
1054         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1055 
1056         /////////////////////////////////////////
1057         Timer.reset();
1058         Timer.start();
1059 
1060         Dbc* cursorp;
1061         db.cursor(NULL, &cursorp, 0);
1062 
1063         for (i = 0; i < n_locates; ++i)
1064         {
1065             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1066             rand_key(i, key1_storage);
1067             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
1068             Dbt datax(data1_storage.databuf, DATA_SIZE);
1069 
1070             cursorp->get(&keyx, &datax, DB_SET_RANGE);
1071         }
1072 
1073         Timer.stop();
1074         STXXL_MSG("Locates elapsed time: " << (Timer.mseconds() / 1000.) <<
1075                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
1076 
1077         db.stat_print(0);
1078         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1079 
1080         ////////////////////////////////////
1081         Timer.reset();
1082 
1083         Timer.start();
1084 
1085         stxxl::int64 n_scanned = 0;
1086 
1087         for (i = 0; i < n_range_queries; ++i)
1088         {
1089             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1090             rand_key(i, key1_storage);
1091             my_key last_key = key1_storage;
1092             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1093             rand_key(i, key1_storage);
1094             if (last_key < key1_storage)
1095                 std::swap(last_key, key1_storage);
1096 
1097             //STXXL_MSG("Looking     "<<key1_storage<<" scanned: "<<n_scanned);
1098             //STXXL_MSG("Upper bound "<<last_key);
1099 
1100             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
1101 #ifdef BDB_BULK_SCAN
1102             Dbt datax(bulk_buffer, DATA_SIZE);
1103             datax.set_ulen(logbufsize);
1104             datax.set_flags(DB_DBT_USERMEM);
1105 #else
1106             Dbt datax(data1_storage.databuf, DATA_SIZE);
1107 #endif
1108 
1109 #ifdef BDB_BULK_SCAN
1110             if (cursorp->get(&keyx, &datax, DB_SET_RANGE | DB_MULTIPLE_KEY) == DB_NOTFOUND)
1111                 continue;
1112 
1113             do
1114             {
1115                 DbMultipleKeyDataIterator BulkIterator(datax);
1116                 Dbt key1, data1;
1117                 while (BulkIterator.next(key1, data1) &&
1118                        *((my_key*)key1.get_data()) <= last_key)
1119                 {
1120                     ++n_scanned;
1121                     //STXXL_MSG("Result      "<<*((my_key *)key1.get_data()));
1122                 }
1123                 if (cursorp->get(&keyx, &datax, DB_NEXT | DB_MULTIPLE_KEY) == DB_NOTFOUND)
1124                     break;
1125 
1126                 if (*((my_key*)keyx.get_data()) > last_key)
1127                 {
1128                     break;
1129                 }
1130             } while (1);
1131 
1132 #else
1133             if (cursorp->get(&keyx, &datax, DB_SET_RANGE) == DB_NOTFOUND)
1134                 continue;
1135 
1136             while (*((my_key*)keyx.get_data()) <= last_key)
1137             {
1138                 ++n_scanned;
1139                 if (cursorp->get(&keyx, &datax, DB_NEXT) == DB_NOTFOUND)
1140                     break;
1141             }
1142 #endif
1143 
1144             if (n_scanned >= SCAN_LIMIT(n))
1145             {
1146                 ++i;
1147                 break;
1148             }
1149         }
1150 
1151         n_range_queries = i;
1152 
1153         Timer.stop();
1154         if (cursorp != NULL)
1155             cursorp->close();
1156 
1157         STXXL_MSG("Range query elapsed time: " << (Timer.mseconds() / 1000.) <<
1158                   " seconds : " << (double(n_scanned) / (Timer.mseconds() / 1000.)) <<
1159                   " key/data pairs per sec, #queries " << n_range_queries << " #scanned elements: " << n_scanned);
1160 
1161         db.stat_print(0);
1162         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1163 
1164         //////////////////////////////////////
1165 
1166         ran32State = 0xdeadbeef;
1167         memset(key1_storage.keybuf, 'a', KEY_SIZE);
1168 
1169         Timer.reset();
1170         Timer.start();
1171 
1172         for (i = 0; i < n_deletes; ++i)
1173         {
1174             //key1_storage.keybuf[KEYPOS] = letters[VALUE];
1175             rand_key(i, key1_storage);
1176             Dbt keyx(key1_storage.keybuf, KEY_SIZE);
1177             db.del(NULL, &keyx, 0);
1178         }
1179 
1180         Timer.stop();
1181         db.stat(NULL, &dbstat, 0);
1182         STXXL_MSG("Records in map: " << dbstat->bt_ndata);
1183         STXXL_MSG("Erase elapsed time: " << (Timer.mseconds() / 1000.) <<
1184                   " seconds : " << (double(ops) / (Timer.mseconds() / 1000.)) << " key/data pairs per sec");
1185 
1186         db.stat_print(0);
1187         db.get_env()->memp_stat_print(DB_STAT_CLEAR);
1188 
1189         db.close(0);
1190     }
1191     catch (DbException& e)
1192     {
1193         STXXL_ERRMSG("DbException happened");
1194     }
1195     catch (std::exception& e)
1196     {
1197         STXXL_ERRMSG("std::exception happened");
1198     }
1199 
1200     unlink(filename);
1201 
1202 #ifdef BDB_BULK_SCAN
1203     delete[]  bulk_buffer;
1204 #endif
1205 }
1206 
main(int argc,char * argv[])1207 int main(int argc, char* argv[])
1208 {
1209     STXXL_MSG("stxxl::map Real Node block size: " << REAL_NODE_BLOCK_SIZE << " bytes");
1210     STXXL_MSG("stxxl::map Real Leaf block size: " << REAL_LEAF_BLOCK_SIZE << " bytes");
1211     STXXL_MSG("stxxl::map Node max elements   : " << REAL_NODE_MELEMENTS);
1212     STXXL_MSG("stxxl::map Leaf max elements   : " << REAL_LEAF_MELEMENTS);
1213 #if STXXL_DIRECT_IO_OFF
1214     STXXL_MSG("STXXL_DIRECT_IO_OFF is defined");
1215 #else
1216     STXXL_MSG("STXXL_DIRECT_IO_OFF is NOT defined");
1217 #endif
1218 
1219     if (argc < 3)
1220     {
1221         STXXL_MSG("Usage: " << argv[0] << " version #ops");
1222         STXXL_MSG("\t version = 1: test stxxl map");
1223         STXXL_MSG("\t version = 2: test Berkeley DB btree");
1224         STXXL_MSG("\t version = 3: big test stxxl map");
1225         STXXL_MSG("\t version = 4: big test Berkeley DB btree");
1226         STXXL_MSG("\t version = 5: big test TPIE btree");
1227         return -1;
1228     }
1229 
1230     init();
1231 
1232     int version = atoi(argv[1]);
1233     stxxl::uint64 ops = stxxl::atouint64(argv[2]);
1234 
1235     STXXL_MSG("Running version      : " << version);
1236     STXXL_MSG("Operations to perform: " << ops);
1237     STXXL_MSG("Btree cache size     : " << TOTAL_CACHE_SIZE << " bytes");
1238     STXXL_MSG("Leaf block size      : " << LEAF_BLOCK_SIZE << " bytes");
1239 
1240     switch (version)
1241     {
1242     case 1:
1243         run_stxxl_map(ops);
1244         break;
1245     case 2:
1246         run_bdb_btree(ops);
1247         break;
1248     case 3:
1249         run_stxxl_map_big(ops, 100000);
1250         break;
1251     case 4:
1252         run_bdb_btree_big(ops, 100000);
1253         break;
1254     case 5:
1255         run_tpie_btree_big(ops, 100000);
1256         break;
1257     default:
1258         STXXL_MSG("Unsupported version " << version);
1259     }
1260 }
1261