1 /*
2  * Copyright 2011-2018 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __UTIL_PROFILING_H__
18 #define __UTIL_PROFILING_H__
19 
20 #include <atomic>
21 
22 #include "util/util_map.h"
23 #include "util/util_thread.h"
24 #include "util/util_vector.h"
25 
26 CCL_NAMESPACE_BEGIN
27 
28 enum ProfilingEvent : uint32_t {
29   PROFILING_UNKNOWN,
30   PROFILING_RAY_SETUP,
31   PROFILING_PATH_INTEGRATE,
32   PROFILING_SCENE_INTERSECT,
33   PROFILING_INDIRECT_EMISSION,
34   PROFILING_VOLUME,
35   PROFILING_SHADER_SETUP,
36   PROFILING_SHADER_EVAL,
37   PROFILING_SHADER_APPLY,
38   PROFILING_AO,
39   PROFILING_SUBSURFACE,
40   PROFILING_CONNECT_LIGHT,
41   PROFILING_SURFACE_BOUNCE,
42   PROFILING_WRITE_RESULT,
43 
44   PROFILING_INTERSECT,
45   PROFILING_INTERSECT_LOCAL,
46   PROFILING_INTERSECT_SHADOW_ALL,
47   PROFILING_INTERSECT_VOLUME,
48   PROFILING_INTERSECT_VOLUME_ALL,
49 
50   PROFILING_CLOSURE_EVAL,
51   PROFILING_CLOSURE_SAMPLE,
52   PROFILING_CLOSURE_VOLUME_EVAL,
53   PROFILING_CLOSURE_VOLUME_SAMPLE,
54 
55   PROFILING_DENOISING,
56   PROFILING_DENOISING_CONSTRUCT_TRANSFORM,
57   PROFILING_DENOISING_RECONSTRUCT,
58   PROFILING_DENOISING_DIVIDE_SHADOW,
59   PROFILING_DENOISING_NON_LOCAL_MEANS,
60   PROFILING_DENOISING_COMBINE_HALVES,
61   PROFILING_DENOISING_GET_FEATURE,
62   PROFILING_DENOISING_DETECT_OUTLIERS,
63 
64   PROFILING_NUM_EVENTS,
65 };
66 
67 /* Contains the current execution state of a worker thread.
68  * These values are constantly updated by the worker.
69  * Periodically the profiler thread will wake up, read them
70  * and update its internal counters based on it.
71  *
72  * Atomics aren't needed here since we're only doing direct
73  * writes and reads to (4-byte-aligned) uint32_t, which is
74  * guaranteed to be atomic on x86 since the 486.
75  * Memory ordering is not guaranteed but does not matter.
76  *
77  * And even on other architectures, the extremely rare corner
78  * case of reading an intermediate state could at worst result
79  * in a single incorrect sample. */
80 struct ProfilingState {
81   volatile uint32_t event = PROFILING_UNKNOWN;
82   volatile int32_t shader = -1;
83   volatile int32_t object = -1;
84   volatile bool active = false;
85 
86   vector<uint64_t> shader_hits;
87   vector<uint64_t> object_hits;
88 };
89 
90 class Profiler {
91  public:
92   Profiler();
93   ~Profiler();
94 
95   void reset(int num_shaders, int num_objects);
96 
97   void start();
98   void stop();
99 
100   void add_state(ProfilingState *state);
101   void remove_state(ProfilingState *state);
102 
103   uint64_t get_event(ProfilingEvent event);
104   bool get_shader(int shader, uint64_t &samples, uint64_t &hits);
105   bool get_object(int object, uint64_t &samples, uint64_t &hits);
106 
107  protected:
108   void run();
109 
110   /* Tracks how often the worker was in each ProfilingEvent while sampling,
111    * so multiplying the values by the sample frequency (currently 1ms)
112    * gives the approximate time spent in each state. */
113   vector<uint64_t> event_samples;
114   vector<uint64_t> shader_samples;
115   vector<uint64_t> object_samples;
116 
117   /* Tracks the total amounts every object/shader was hit.
118    * Used to evaluate relative cost, written by the render thread.
119    * Indexed by the shader and object IDs that the kernel also uses
120    * to index __object_flag and __shaders. */
121   vector<uint64_t> shader_hits;
122   vector<uint64_t> object_hits;
123 
124   volatile bool do_stop_worker;
125   thread *worker;
126 
127   thread_mutex mutex;
128   vector<ProfilingState *> states;
129 };
130 
131 class ProfilingHelper {
132  public:
ProfilingHelper(ProfilingState * state,ProfilingEvent event)133   ProfilingHelper(ProfilingState *state, ProfilingEvent event) : state(state)
134   {
135     previous_event = state->event;
136     state->event = event;
137   }
138 
set_event(ProfilingEvent event)139   inline void set_event(ProfilingEvent event)
140   {
141     state->event = event;
142   }
143 
set_shader(int shader)144   inline void set_shader(int shader)
145   {
146     state->shader = shader;
147     if (state->active) {
148       assert(shader < state->shader_hits.size());
149       state->shader_hits[shader]++;
150     }
151   }
152 
set_object(int object)153   inline void set_object(int object)
154   {
155     state->object = object;
156     if (state->active) {
157       assert(object < state->object_hits.size());
158       state->object_hits[object]++;
159     }
160   }
161 
~ProfilingHelper()162   ~ProfilingHelper()
163   {
164     state->event = previous_event;
165   }
166 
167  private:
168   ProfilingState *state;
169   uint32_t previous_event;
170 };
171 
172 CCL_NAMESPACE_END
173 
174 #endif /* __UTIL_PROFILING_H__ */
175