1 // Copyright David Abrahams, Matthias Troyer, Michael Gauckler
2 // 2005. Distributed under the Boost Software License, Version
3 // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
4 // http://www.boost.org/LICENSE_1_0.txt)
5 #if !defined(BOOST_SPIRIT_TEST_BENCHMARK_HPP)
6 #define BOOST_SPIRIT_TEST_BENCHMARK_HPP
7 
8 #ifdef _MSC_VER
9 // inline aggressively
10 # pragma inline_recursion(on) // turn on inline recursion
11 # pragma inline_depth(255)    // max inline depth
12 # define _SECURE_SCL 0
13 #endif
14 
15 #include "high_resolution_timer.hpp"
16 #include <iostream>
17 #include <cstring>
18 #include <boost/preprocessor/seq/for_each.hpp>
19 #include <boost/preprocessor/stringize.hpp>
20 
21 namespace test
22 {
23     // This value is required to ensure that a smart compiler's dead
24     // code elimination doesn't optimize away anything we're testing.
25     // We'll use it to compute the return code of the executable to make
26     // sure it's needed.
27     int live_code;
28 
29     // Call objects of the given Accumulator type repeatedly
30     template <class Accumulator>
hammer(long const repeats)31     void hammer(long const repeats)
32     {
33         // Strategy: because the sum in an accumulator after each call
34         // depends on the previous value of the sum, the CPU's pipeline
35         // might be stalled while waiting for the previous addition to
36         // complete.  Therefore, we allocate an array of accumulators,
37         // and update them in sequence, so that there's no dependency
38         // between adjacent addition operations.
39         //
40         // Additionally, if there were only one accumulator, the
41         // compiler or CPU might decide to update the value in a
42         // register rather that writing it back to memory.  we want each
43         // operation to at least update the L1 cache.  *** Note: This
44         // concern is specific to the particular application at which
45         // we're targeting the test. ***
46 
47         // This has to be at least as large as the number of
48         // simultaneous accumulations that can be executing in the
49         // compiler pipeline.  A safe number here is larger than the
50         // machine's maximum pipeline depth. If you want to test the L2
51         // or L3 cache, or main memory, you can increase the size of
52         // this array.  1024 is an upper limit on the pipeline depth of
53         // current vector machines.
54 
55         const std::size_t number_of_accumulators = 1024;
56         live_code = 0; // reset to zero
57 
58         Accumulator a[number_of_accumulators];
59 
60         for (long iteration = 0; iteration < repeats; ++iteration)
61         {
62             for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
63             {
64                 ap->benchmark();
65             }
66         }
67 
68         // Accumulate all the partial sums to avoid dead code
69         // elimination.
70         for (Accumulator* ap = a; ap < a + number_of_accumulators; ++ap)
71         {
72             live_code += ap->val;
73         }
74     }
75 
76     // Measure the time required to hammer accumulators of the given type
77     template <class Accumulator>
measure(long const repeats)78     double measure(long const repeats)
79     {
80         // Hammer accumulators a couple of times to ensure the
81         // instruction cache is full of our test code, and that we don't
82         // measure the cost of a page fault for accessing the data page
83         // containing the memory where the accumulators will be
84         // allocated
85         hammer<Accumulator>(repeats);
86         hammer<Accumulator>(repeats);
87 
88         // Now start a timer
89         util::high_resolution_timer time;
90         hammer<Accumulator>(repeats);   // This time, we'll measure
91         return time.elapsed();          // return the elapsed time
92     }
93 
94     template <class Accumulator>
report(char const * name,long const repeats)95     void report(char const* name, long const repeats)
96     {
97         std::cout.precision(10);
98         std::cout << name << ": ";
99         for (int i = 0; i < (20-int(strlen(name))); ++i)
100             std::cout << ' ';
101         std::cout << std::fixed << test::measure<Accumulator>(repeats) << " [s] ";
102         Accumulator acc;
103         acc.benchmark();
104         std::cout << std::hex << "{checksum: " << acc.val << "}";
105         std::cout << std::flush << std::endl;
106     }
107 
108     struct base
109     {
basetest::base110         base() : val(0) {}
111         int val;    // This is needed to avoid dead-code elimination
112     };
113 
114 #define BOOST_SPIRIT_TEST_HAMMER(r, data, elem)                     \
115     test::hammer<elem>(repeats);
116     /***/
117 
118 #define BOOST_SPIRIT_TEST_MEASURE(r, data, elem)                    \
119     test::report<elem>(BOOST_PP_STRINGIZE(elem), repeats);          \
120     /***/
121 
122 #define BOOST_SPIRIT_TEST_BENCHMARK(max_repeats, FSeq)              \
123     long repeats = 100;                                             \
124     double measured = 0;                                            \
125     while (measured < 2.0 && repeats <= max_repeats)                \
126     {                                                               \
127         repeats *= 10;                                              \
128         util::high_resolution_timer time;                           \
129         BOOST_PP_SEQ_FOR_EACH(BOOST_SPIRIT_TEST_HAMMER, _, FSeq)    \
130         measured = time.elapsed();                                  \
131     }                                                               \
132     BOOST_PP_SEQ_FOR_EACH(BOOST_SPIRIT_TEST_MEASURE, _, FSeq)       \
133     /***/
134 }
135 
136 #endif
137