1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __tbb_perf_h__
18 #define __tbb_perf_h__
19 
20 #ifndef TBB_PERF_TYPEINFO
21 #define TBB_PERF_TYPEINFO 1
22 #endif
23 
24 #if TBB_PERF_TYPEINFO
25     #include <typeinfo>
26     #define __TBB_PERF_TEST_CLASS_NAME(T) typeid(T).name()
27 #else /* !TBB_PERF_TYPEINFO */
28     #define __TBB_PERF_TEST_CLASS_NAME(T) NULL
29 #endif /* !TBB_PERF_TYPEINFO */
30 
31 
32 #include "tbb/tick_count.h"
33 
34 // TODO: Fix build scripts to provide more reliable build phase identification means
35 #ifndef __TBB_PERF_API
36 #if _USRDLL
37     #if _MSC_VER
38         #define __TBB_PERF_API __declspec(dllexport)
39     #else /* !_MSC_VER */
40         #define __TBB_PERF_API
41     #endif /* !_MSC_VER */
42 #else /* !_USRDLL */
43     #if _MSC_VER
44         #define __TBB_PERF_API __declspec(dllimport)
45     #else /* !_MSC_VER */
46         #define __TBB_PERF_API
47     #endif /* !_MSC_VER */
48 #endif /* !_USRDLL */
49 #endif /* !__TBB_PERF_API */
50 
51 #if _WIN32||_WIN64
52 
53 namespace Perf {
54     typedef unsigned __int64 tick_t;
55     #if defined(_M_X64)
rdtsc()56         inline tick_t rdtsc () { return __rdtsc(); }
57     #elif _M_IX86
rdtsc()58         inline tick_t rdtsc () { __asm { rdtsc } }
59     #else
60         #error Unsupported ISA
61     #endif
62 } // namespace Perf
63 
64 #elif __linux__ || __APPLE__
65 
66 #include <stdint.h>
67 
68 namespace Perf {
69     typedef uint64_t tick_t;
70     #if __x86_64__ || __i386__ || __i386
rdtsc()71         inline tick_t rdtsc () {
72             uint32_t lo, hi;
73             __asm__ __volatile__ ( "rdtsc" : "=a" (lo), "=d" (hi) );
74             return (tick_t)lo | ((tick_t)hi) << 32;
75         }
76     #else
77         #error Unsupported ISA
78     #endif
79 } // namespace Perf
80 
81 #else
82     #error Unsupported OS
83 #endif /* OS */
84 
85 __TBB_PERF_API extern int NumThreads,
86                           MaxConcurrency,
87                           NumCpus;
88 
89 // Functions and global variables provided by the benchmarking framework
90 namespace Perf {
91 
92 typedef double duration_t;
93 
94 static const int MaxWorkloadNameLen = 64;
95 
96 static const char* NoHistogram = (char*)-1;
97 static const char* DefaultHistogram = (char*)-2;
98 
99 __TBB_PERF_API void AnchorFunc ( void* );
100 __TBB_PERF_API void AnchorFunc2 ( void*, void*  );
101 
102 //! Helper that can be used in the preprocess handler to clean caches
103 /** Cleaning caches is necessary to obtain reproducible results when a test
104     accesses significant ranges of memory. **/
105 __TBB_PERF_API void WipeCaches ();
106 
107 //! Specifies the name to be used to designate the current workload in output
108 /** Should be used from Test::SetWorkload(). If necessary workload name will be
109     truncated to MaxWorkloadNameLen characters. **/
110 __TBB_PERF_API void SetWorkloadName( const char* format, ... );
111 
112 class __TBB_PERF_API Test {
113 public:
~Test()114     virtual ~Test () {}
115 
116     //! Struct used by tests running in multiple masters mode
117     struct ThreadInfo {
118         //! Zero based thread ID
119         int     tid;
120         //! Pointer to test specific data
121         /** If used by the test, should be initialized by OnStartLocal(), and
122             finalized by OnFinishLocal(). **/
123         void*   data;
124     };
125 
126     ////////////////////////////////////////////////////////////////////////////////
127     // Mandatory methods
128 
129     //! Returns the number of workloads supported
130     virtual int NumWorkloads () = 0;
131 
132     //! Set workload info for the subsequent calls to Run() and RunSerial()
133     /** This method can use global helper function Perf::SetWorkloadName() in order
134         to specify the name of the current workload, which will be used in output
135         to designate the workload. If SetWorkloadName is not called, workloadIndex
136         will be used for this purpose.
137 
138         When testing task scheduler, make sure that this method does not trigger
139         its automatic initialization. **/
140     virtual void SetWorkload ( int workloadIndex ) = 0;
141 
142     //! Test implementation
143     /** Called by the timing framework several times in a loop to achieve approx.
144         RunDuration time, and this loop is timed NumRuns times to collect statistics.
145         Argument ti specifies information about the master thread calling this method. **/
146     virtual void Run ( ThreadInfo& ti ) = 0;
147 
148     ////////////////////////////////////////////////////////////////////////////////
149     // Optional methods
150 
151     //! Returns short title string to be used in the regular output to identify the test
152     /** Should uniquely identify the test among other ones in the given benchmark suite.
153         If not implemented, the test implementation class' RTTI name is used. **/
Name()154     virtual const char* Name () { return NULL; };
155 
156     //! Returns minimal number of master threads
157     /** Used for task scheduler tests only (when UseTbbScheduler option is specified
158         in session settings). **/
MinNumMasters()159     virtual int MinNumMasters () { return 1; }
160 
161     //! Returns maximal number of master threads
162     /** Used for task scheduler tests only (when UseTbbScheduler option is specified
163         in session settings). **/
MaxNumMasters()164     virtual int MaxNumMasters () { return 1; }
165 
166     //! Executes serial workload equivalent to the one processed by Run()
167     /** Called by the timing framework several times in a loop to collect statistics. **/
168     virtual void RunSerial ( ThreadInfo& ti );
169 
170     //! Invoked before each call to Run()
171     /** Can be used to preinitialize data necessary for the test, clean up
172         caches (see Perf::WipeCaches), etc.
173         In multiple masters mode this method is called on each thread. **/
174     virtual void OnStart ( ThreadInfo& ti );
175 
176     //! Invoked after each call to Run()
177     /** Can be used to free resources allocated by OnStart().
178         Note that this method must work correctly independently of whether Run(),
179         RunSerial() or nothing is called between OnStart() and OnFinish().
180         In multiple masters mode this method is called on each thread. **/
181     virtual void OnFinish ( ThreadInfo& ti );
182 
183     //! Functionality, the cost of which has to be factored out from timing results
184     /** Applies to both parallel and serial versions. **/
185     virtual void Baseline ( ThreadInfo& );
186 
187     //! Returns description string to be used in the benchmark info/summary output
Description()188     virtual const char* Description () { return NULL; }
189 
190     //! Specifies if the histogram of individual run times in a series
191     /** If the method is not overridden, histogramName argument of TestMain is used. **/
HistogramName()192     virtual const char* HistogramName () { return DefaultHistogram; }
193 }; // class Test
194 
195 namespace internal {
196     __TBB_PERF_API void RegisterTest ( Test*, const char* testClassName, bool takeOwnership );
197 }
198 
199 template<class T>
RegisterTest()200 void RegisterTest() { internal::RegisterTest( new T, __TBB_PERF_TEST_CLASS_NAME(T), true ); }
201 
202 template<class T>
RegisterTest(T & t)203 void RegisterTest( T& t ) { internal::RegisterTest( &t, __TBB_PERF_TEST_CLASS_NAME(T), false ); }
204 
205 enum SessionOptions {
206     //! Use Test::RunSerial if present
207     UseBaseline = 0x01,
208     UseSerialBaseline = 0x02,
209     UseBaselines = UseBaseline | UseSerialBaseline,
210     UseTaskScheduler = 0x10,
211     UseAffinityModes = 0x20,
212     UseSmallestWorkloadOnly = 0x40
213 };
214 
215 struct SessionSettings {
216     //! A combination of SessionOptions flags
217     uintptr_t my_opts;
218 
219     //! Name of a file to store performance results
220     /** These results are duplicates of what is printed on the console. **/
221     const char* my_resFile;
222 
223     //! Output destination for the histogram of individual run times in a series
224     /** If it is a string, the histogram is stored in a file with such name.
225         If it is NULL, the histogram is printed on the console. By default histograms
226         are suppressed.
227 
228         The histogram is formatted as two column table:
229         "time bucket start" "number of tests in this bucket"
230 
231         When this setting enables histogram generation, an individual test
232         can override it by implementing HistogramName method. **/
233     const char* my_histogramName;
234 
235     SessionSettings ( uintptr_t opts = 0, const char* resFile = NULL, const char* histogram = NoHistogram )
my_optsSessionSettings236         : my_opts(opts)
237         , my_resFile(resFile)
238         , my_histogramName(histogram)
239     {}
240 }; // struct SessionSettings
241 
242 //! Benchmarking session entry point
243 /** Executes all the individual tests registered previously by means of
244     RegisterTest<MycrotestImpl> **/
245 __TBB_PERF_API int TestMain( int argc, char* argv[],
246                              const SessionSettings* defaultSettings = NULL );
247 
248 
249 } // namespace Perf
250 
251 #endif /* __tbb_perf_h__ */
252 
253 
254