1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4 
5 #include <cstdio>
6 #include <cstdlib>
7 #include <numeric>
8 
9 #include <OpenImageIO/benchmark.h>
10 #include <OpenImageIO/thread.h>
11 
12 
13 OIIO_NAMESPACE_BEGIN
14 
15 namespace pvt {
16 
17 void OIIO_API
18 #if __has_attribute(__optnone__)
19     __attribute__((__optnone__))
20 #endif
use_char_ptr(char const volatile *)21     use_char_ptr(char const volatile*)
22 {
23 }
24 
25 }  // namespace pvt
26 
27 
28 // Implementation of clobber_ptr is trivial, but the code in other modules
29 // doesn't know that.
30 void OIIO_API
31 #if __has_attribute(__optnone__)
32     __attribute__((__optnone__))
33 #endif
clobber(void *)34     clobber(void*)
35 {
36 }
37 
38 
39 
40 double
iteration_overhead()41 Benchmarker::iteration_overhead()
42 {
43     static bool initialized = false;
44     static double overhead  = 0.0;
45     if (!initialized) {
46         auto trivial                     = []() {};
47         const size_t trials              = 10;
48         const size_t overhead_iterations = 10000000;
49         std::vector<double> times(trials);
50         for (auto& t : times)
51             t = do_trial(overhead_iterations, trivial);
52         compute_stats(times, overhead_iterations);
53         overhead    = median();
54         initialized = true;
55         // std::cout << "iteration overhead is " << overhead << "\n";
56     }
57     return overhead;
58 }
59 
60 
61 
62 void
compute_stats(std::vector<double> & times,size_t iterations)63 Benchmarker::compute_stats(std::vector<double>& times, size_t iterations)
64 {
65     size_t trials = times.size();
66     OIIO_ASSERT(trials >= 1);
67 #if 0
68     // Debugging: print all the trial times
69     for (auto v : times)
70         std::cout << v/iterations*1e6 << ' ';
71     std::cout << "\n";
72 #endif
73 
74     // Sort so that we can exclude outliers
75     std::sort(times.begin(), times.end());
76 
77     size_t first = 0, last = trials;
78     if (size_t(2 * exclude_outliers() + 3) <= trials) {
79         first += exclude_outliers();
80         last -= exclude_outliers();
81     }
82     size_t nt = last - first;
83     if (nt == 1) {
84         m_avg    = times[first];
85         m_stddev = 0;
86         m_range  = 0;
87     } else {
88         m_avg = std::accumulate(times.begin() + first, times.begin() + last,
89                                 0.0)
90                 / nt;
91         double sum2 = std::accumulate(times.begin() + first,
92                                       times.begin() + last, 0.0,
93                                       [&](double a, double b) {
94                                           return a + (b - m_avg) * (b - m_avg);
95                                       });
96         m_stddev    = sqrt(sum2 / (nt - 1));
97         m_range     = times[last - 1] - times[first];
98     }
99 
100     if (m_trials & 1)  // odd
101         m_median = times[m_trials / 2];
102     else
103         m_median = 0.5 * (times[m_trials / 2] + times[m_trials / 2 + 1]);
104 
105     m_avg /= iterations;
106     m_stddev /= iterations;
107     m_range /= iterations;
108     m_median /= iterations;
109 }
110 
111 
112 
113 OIIO_API
114 std::ostream&
operator <<(std::ostream & out,const Benchmarker & bench)115 operator<<(std::ostream& out, const Benchmarker& bench)
116 {
117     // Get local copies of relevant statistics
118     double avg    = bench.avg();
119     double stddev = bench.stddev();
120     double range  = bench.range();
121 
122     // Figure out appropriate scale
123     static const char* unitnames[] = { "ns", "ns", "us", "ms", "s" };
124     static double unitscales[]     = { 1e9, 1e9, 1e6, 1e3, 1 };
125     int unit                       = int(bench.units());
126     if (unit == int(Benchmarker::Unit::autounit)) {
127         while (unit < int(Benchmarker::Unit::s)
128                && bench.avg() * unitscales[unit] > 10000.0)
129             ++unit;
130     }
131     const char* unitname = unitnames[unit];
132     double scale         = unitscales[unit];
133     char rateunit        = 'M';
134     double ratescale     = 1.0e6;
135     if (bench.avg() >= 1.0e-6) {
136         rateunit  = 'k';
137         ratescale = 1.0e3;
138     }
139 
140     avg *= scale;
141     stddev *= scale;
142     range *= scale;
143 
144     if (bench.indent())
145         out << std::string(bench.indent(), ' ');
146     if (unit == int(Benchmarker::Unit::s))
147         out << Strutil::sprintf("%-16s: %s", bench.m_name,
148                                 Strutil::timeintervalformat(avg, 2));
149     else
150         out << Strutil::sprintf("%-16s: %6.1f %s (+/-%4.1f%s), ", bench.name(),
151                                 avg, unitname, stddev, unitname);
152     if (bench.avg() < 0.25e-9) {
153         // Less than 1/4 ns iteration time is probably an error
154         out << "unreliable";
155         return out;
156     }
157     if (bench.work() == 1)
158         out << Strutil::sprintf("%6.1f %c/s", (1.0f / ratescale) / bench.avg(),
159                                 rateunit);
160     else
161         out << Strutil::sprintf("%6.1f %cvals/s, %.1f %ccalls/s",
162                                 (bench.work() / ratescale) / bench.avg(),
163                                 rateunit, (1.0f / ratescale) / bench.avg(),
164                                 rateunit);
165     if (bench.verbose() >= 2)
166         out << Strutil::sprintf(" (%dx%d, rng=%.1f%%, med=%.1f)",
167                                 bench.trials(), bench.iterations(), unitname,
168                                 (range / avg) * 100.0, bench.median() * scale);
169 #if 0
170     if (range > avg/10.0) {
171         for (auto v : bench.m_times)
172             std::cout << v*scale/bench.iterations() << ' ';
173         std::cout << "\n";
174     }
175 #endif
176     return out;
177 }
178 
179 
180 
181 OIIO_API std::vector<double>
timed_thread_wedge(function_view<void (int)> task,function_view<void ()> pretask,function_view<void ()> posttask,std::ostream * out,int maxthreads,int total_iterations,int ntrials,cspan<int> threadcounts)182 timed_thread_wedge(function_view<void(int)> task, function_view<void()> pretask,
183                    function_view<void()> posttask, std::ostream* out,
184                    int maxthreads, int total_iterations, int ntrials,
185                    cspan<int> threadcounts)
186 {
187     std::vector<double> times(threadcounts.size(), 0.0f);
188     if (out)
189         (*out)
190             << "threads    time   speedup  efficient  its/thread   range (best of "
191             << ntrials << ")\n";
192     for (size_t i = 0; i < (size_t)threadcounts.size(); ++i) {
193         int nthreads = threadcounts[i];
194         if (nthreads > maxthreads)
195             continue;
196         int iters = total_iterations / nthreads;
197         double range;
198         times[i] = time_trial(
199             [&]() {
200                 pretask();
201                 thread_group threads;
202                 for (int t = 0; t < nthreads; ++t)
203                     threads.create_thread(task, iters);
204                 threads.join_all();
205                 posttask();
206             },
207             ntrials, &range);
208         if (out) {
209             double one_thread_time = times[0] * threadcounts[0];
210             double ideal           = one_thread_time / nthreads;
211             double speedup         = one_thread_time / times[i];
212             double efficiency      = 100.0 * ideal / times[i];
213             Strutil::fprintf(*out,
214                              "%4d   %8.1f   %6.2fx    %6.2f%% %10d %8.2f\n",
215                              nthreads, times[i], speedup, efficiency, iters,
216                              range);
217         }
218     }
219     return times;
220 }
221 
222 
223 
224 OIIO_API void
timed_thread_wedge(function_view<void (int)> task,int maxthreads,int total_iterations,int ntrials,cspan<int> threadcounts)225 timed_thread_wedge(function_view<void(int)> task, int maxthreads,
226                    int total_iterations, int ntrials, cspan<int> threadcounts)
227 {
228     timed_thread_wedge(
229         task, []() {}, []() {}, &std::cout, maxthreads, total_iterations,
230         ntrials, threadcounts);
231 }
232 
233 OIIO_NAMESPACE_END
234