1 // Copyright David Abrahams, Matthias Troyer, Michael Gauckler 2 // 2005. Distributed under the Boost Software License, Version 3 // 1.0. (See accompanying file LICENSE_1_0.txt or copy at 4 // http://www.boost.org/LICENSE_1_0.txt) 5 #if !defined(BOOST_SPIRIT_TEST_BENCHMARK_HPP) 6 #define BOOST_SPIRIT_TEST_BENCHMARK_HPP 7 8 #ifdef _MSC_VER 9 // inline aggressively 10 # pragma inline_recursion(on) // turn on inline recursion 11 # pragma inline_depth(255) // max inline depth 12 # define _SECURE_SCL 0 13 #endif 14 15 #include "high_resolution_timer.hpp" 16 #include <iostream> 17 #include <cstring> 18 #include <boost/preprocessor/seq/for_each.hpp> 19 #include <boost/preprocessor/stringize.hpp> 20 21 namespace test 22 { 23 // This value is required to ensure that a smart compiler's dead 24 // code elimination doesn't optimize away anything we're testing. 25 // We'll use it to compute the return code of the executable to make 26 // sure it's needed. 27 int live_code; 28 29 // Call objects of the given Accumulator type repeatedly 30 template <class Accumulator> hammer(long const repeats)31 void hammer(long const repeats) 32 { 33 // Strategy: because the sum in an accumulator after each call 34 // depends on the previous value of the sum, the CPU's pipeline 35 // might be stalled while waiting for the previous addition to 36 // complete. Therefore, we allocate an array of accumulators, 37 // and update them in sequence, so that there's no dependency 38 // between adjacent addition operations. 39 // 40 // Additionally, if there were only one accumulator, the 41 // compiler or CPU might decide to update the value in a 42 // register rather that writing it back to memory. we want each 43 // operation to at least update the L1 cache. *** Note: This 44 // concern is specific to the particular application at which 45 // we're targeting the test. *** 46 47 // This has to be at least as large as the number of 48 // simultaneous accumulations that can be executing in the 49 // compiler pipeline. A safe number here is larger than the 50 // machine's maximum pipeline depth. If you want to test the L2 51 // or L3 cache, or main memory, you can increase the size of 52 // this array. 1024 is an upper limit on the pipeline depth of 53 // current vector machines. 54 55 const std::size_t number_of_accumulators = 1024; 56 live_code = 0; // reset to zero 57 58 Accumulator a[number_of_accumulators]; 59 60 for (long iteration = 0; iteration < repeats; ++iteration) 61 { 62 for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) 63 { 64 ap->benchmark(); 65 } 66 } 67 68 // Accumulate all the partial sums to avoid dead code 69 // elimination. 70 for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap) 71 { 72 live_code += ap->val; 73 } 74 } 75 76 // Measure the time required to hammer accumulators of the given type 77 template <class Accumulator> measure(long const repeats)78 double measure(long const repeats) 79 { 80 // Hammer accumulators a couple of times to ensure the 81 // instruction cache is full of our test code, and that we don't 82 // measure the cost of a page fault for accessing the data page 83 // containing the memory where the accumulators will be 84 // allocated 85 hammer<Accumulator>(repeats); 86 hammer<Accumulator>(repeats); 87 88 // Now start a timer 89 util::high_resolution_timer time; 90 hammer<Accumulator>(repeats); // This time, we'll measure 91 return time.elapsed(); // return the elapsed time 92 } 93 94 template <class Accumulator> report(char const * name,long const repeats)95 void report(char const* name, long const repeats) 96 { 97 std::cout.precision(10); 98 std::cout << name << ": "; 99 for (int i = 0; i < (20-int(strlen(name))); ++i) 100 std::cout << ' '; 101 std::cout << std::fixed << test::measure<Accumulator>(repeats) << " [s] "; 102 Accumulator acc; 103 acc.benchmark(); 104 std::cout << std::hex << "{checksum: " << acc.val << "}"; 105 std::cout << std::flush << std::endl; 106 } 107 108 struct base 109 { basetest::base110 base() : val(0) {} 111 int val; // This is needed to avoid dead-code elimination 112 }; 113 114 #define BOOST_SPIRIT_TEST_HAMMER(r, data, elem) \ 115 test::hammer<elem>(repeats); 116 /***/ 117 118 #define BOOST_SPIRIT_TEST_MEASURE(r, data, elem) \ 119 test::report<elem>(BOOST_PP_STRINGIZE(elem), repeats); \ 120 /***/ 121 122 #define BOOST_SPIRIT_TEST_BENCHMARK(max_repeats, FSeq) \ 123 long repeats = 100; \ 124 double measured = 0; \ 125 while (measured < 2.0 && repeats <= max_repeats) \ 126 { \ 127 repeats *= 10; \ 128 util::high_resolution_timer time; \ 129 BOOST_PP_SEQ_FOR_EACH(BOOST_SPIRIT_TEST_HAMMER, _, FSeq) \ 130 measured = time.elapsed(); \ 131 } \ 132 BOOST_PP_SEQ_FOR_EACH(BOOST_SPIRIT_TEST_MEASURE, _, FSeq) \ 133 /***/ 134 } 135 136 #endif 137