1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "piglit-util-gl.h"
25
26 /**
27 * @file time-elapsed.c
28 *
29 * Test TIME_ELAPSED and TIMESTAMP queries.
30 */
31
32 PIGLIT_GL_TEST_CONFIG_BEGIN
33
34 config.supports_gl_compat_version = 10;
35 config.window_visual = PIGLIT_GL_VISUAL_DOUBLE | PIGLIT_GL_VISUAL_RGBA;
36
37 PIGLIT_GL_TEST_CONFIG_END
38
39 static char *vs_text =
40 "#version 110\n"
41 "void main()\n"
42 "{\n"
43 " gl_Position = gl_Vertex;\n"
44 "}\n";
45
46 /**
47 * Time-wasting fragment shader. This fragment shader computes:
48 *
49 * x = (sum(i=0..(iters-1): 2*i) % iters) / iters
50 *
51 * This value should always work out to 0.0, but it's highly unlikely
52 * that an optimizer will figure this out. Hence we can use this
53 * shader to waste an arbitrary amount of time (by suitable choice of
54 * the value of iters).
55 *
56 * The shader outputs a color of (x, 1.0, 0.0, 0.0).
57 */
58 static char *fs_text =
59 "#version 110\n"
60 "uniform int iters;\n"
61 "void main()\n"
62 "{\n"
63 " int cumulative_sum = 0;\n"
64 " for (int i = 0; i < iters; ++i) {\n"
65 " cumulative_sum += i;\n"
66 " if (cumulative_sum >= iters)\n"
67 " cumulative_sum -= iters;\n"
68 " cumulative_sum += i;\n"
69 " if (cumulative_sum >= iters)\n"
70 " cumulative_sum -= iters;\n"
71 " }\n"
72 " float x = float(cumulative_sum) / float(iters);\n"
73 " gl_FragColor = vec4(x, 1.0, 0.0, 0.0);\n"
74 "}\n";
75
76 enum {
77 TIME_ELAPSED,
78 TIMESTAMP
79 } test = TIME_ELAPSED;
80
81 GLuint prog;
82 GLint iters_loc;
83
84 static float
draw(GLuint * q,int iters)85 draw(GLuint *q, int iters)
86 {
87 int64_t start_time, end_time;
88
89 glUseProgram(prog);
90 glUniform1i(iters_loc, iters);
91
92 start_time = piglit_time_get_nano();
93
94 if (test == TIMESTAMP) {
95 glQueryCounter(q[0], GL_TIMESTAMP);
96 } else {
97 glBeginQuery(GL_TIME_ELAPSED, q[0]);
98 }
99 piglit_draw_rect(-1, -1, 2, 2);
100 if (test == TIMESTAMP) {
101 glQueryCounter(q[1], GL_TIMESTAMP);
102 } else {
103 glEndQuery(GL_TIME_ELAPSED);
104 }
105
106 /* This glFinish() is important, since this is used in a
107 * timing loop.
108 */
109 glFinish();
110
111 end_time = piglit_time_get_nano();
112
113 return (end_time - start_time)/ 1000.0 / 1000.0 / 1000.0;
114 }
115
116 static float
get_gpu_time(GLuint * q)117 get_gpu_time(GLuint *q)
118 {
119 GLint64EXT elapsed;
120
121 if (test == TIMESTAMP) {
122 GLint64 start, end;
123 glGetQueryObjecti64vEXT(q[0], GL_QUERY_RESULT, &start);
124 glGetQueryObjecti64vEXT(q[1], GL_QUERY_RESULT, &end);
125 elapsed = end - start;
126 } else {
127 glGetQueryObjecti64vEXT(q[0], GL_QUERY_RESULT, &elapsed);
128 }
129
130 return elapsed / 1000.0 / 1000.0 / 1000.0;
131 }
132
133 enum piglit_result
piglit_display(void)134 piglit_display(void)
135 {
136 bool pass = true;
137 float green[4] = {0.0, 1.0, 0.0, 0.0};
138 GLuint q[2];
139 int iters;
140 #define NUM_RESULTS 5
141 float cpu_time[NUM_RESULTS];
142 float gpu_time[NUM_RESULTS];
143 float delta[NUM_RESULTS];
144 float cpu_time_mean;
145 float delta_mean, delta_stddev;
146 float cpu_overhead;
147 float t, t_cutoff;
148 int i;
149
150 glColor4f(0.0, 1.0, 0.0, 0.0);
151 glGenQueries(2, q);
152
153 /* Prime the drawing pipe before we start measuring time,
154 * since the first draw call is likely to be slower than all
155 * others.
156 */
157 draw(q, 1);
158
159 /* Figure out some baseline difference between GPU time
160 * elapsed and CPU time elapsed for a single draw call (CPU
161 * overhead of timer query and glFinish()).
162 *
163 * Note that this doesn't take into account any extra CPU time
164 * elapsed from start to finish if multiple batchbuffers are
165 * accumulated by the driver in getting to our 1/10th of a
166 * second elapsed time goal, and some other client sneaks
167 * rendering in in between those batches.
168 *
169 * Part of the rendering size being relatively large is to
170 * hopefully avoid that, though it might be better to have
171 * some time-consuming shader with a single draw call instead.
172 */
173 cpu_overhead = 0;
174 for (i = 0; i < NUM_RESULTS; i++) {
175 cpu_time[i] = draw(q, 1);
176 gpu_time[i] = get_gpu_time(q);
177
178 cpu_overhead += cpu_time[i] - gpu_time[i];
179 }
180 cpu_overhead /= NUM_RESULTS;
181
182 /* Find a number of draw calls that takes about 1/10th of a
183 * second.
184 */
185 retry:
186 for (iters = 1; ; iters *= 2) {
187 if (draw(q, iters) > 0.1)
188 break;
189 if (iters * 2 <= iters) {
190 printf("Couldn't find appropriate number of iterations\n");
191 piglit_report_result(PIGLIT_FAIL);
192 }
193 }
194
195 /* Now, do several runs like this so we can determine if the
196 * timer matches up with wall time.
197 */
198 for (i = 0; i < NUM_RESULTS; i++) {
199 cpu_time[i] = draw(q, iters);
200 gpu_time[i] = get_gpu_time(q);
201 }
202
203 cpu_time_mean = 0;
204 delta_mean = 0;
205 for (i = 0; i < NUM_RESULTS; i++) {
206 delta[i] = cpu_time[i] - cpu_overhead - gpu_time[i];
207 cpu_time_mean += cpu_time[i];
208 delta_mean += delta[i];
209 }
210 cpu_time_mean /= NUM_RESULTS;
211 delta_mean /= NUM_RESULTS;
212
213 /* There's some risk of our "get to 0.1 seconds" loop deciding
214 * that a small number of iters was sufficient if we got
215 * scheduled out for a while. Re-run if so.
216 *
217 * We wouldn't have that problem if we could rely on the GPU
218 * time elapsed query, but that's the thing we're testing.
219 */
220 if (cpu_time_mean < 0.05)
221 goto retry;
222
223 /* Calculate stddevs. */
224 delta_stddev = 0;
225 for (i = 0; i < NUM_RESULTS; i++) {
226 float d = delta[i] - delta_mean;
227 delta_stddev += d * d / (NUM_RESULTS - 1);
228 }
229 delta_stddev = sqrt(delta_stddev);
230
231 /* Dependent t-test for paired samples.
232 *
233 * This is a good test, because we expect the two times (cpu
234 * and gpu) of the samples to be correlated, and we expect the
235 * stddev to match (since time it should arise from system
236 * variables like scheduling of other tasks and state of the
237 * caches). Unless maybe the variance of cpu time is greater
238 * than gpu time, because we may see scheduling accounted for
239 * in our CPU (wall) time, while scheduling other tasks
240 * doesn't end up counted toward our GPU time.
241 */
242 t = delta_mean / (delta_stddev / sqrt(NUM_RESULTS));
243
244 /* Integral of Student's t distribution for 4 degrees of
245 * freedom (NUM_RESULTS = 5), two-tailed (we care about
246 * difference above or below 0, not just one direction), at
247 * p = .05.
248 */
249 t_cutoff = 2.776;
250
251 /* Now test that our sampled distribution (rate of clock
252 * advance between CPU and GPU) was within expectations for a
253 * delta of 0. I actually want to be testing the likelihood
254 * that the real difference is enough that we actually care.
255 * I didn't find an easy way to account for that after a bunch
256 * of wikipedia browsing, so I'll punt on proper analysis for
257 * now and just check that the sampled delta isn't too small
258 * to care about.
259 */
260 if (t > t_cutoff && fabs(delta_mean) > .05 * cpu_time_mean) {
261 fprintf(stderr, "GPU time didn't match CPU time\n");
262 printf("Estimated CPU overhead: %f\n", cpu_overhead);
263 printf("Difference: %f secs (+/- %f secs)\n",
264 delta_mean, delta_stddev);
265 printf("t = %f\n", t);
266
267 printf("%20s %20s %20s\n",
268 "gpu_time", "cpu_time", "delta");
269 for (i = 0; i < NUM_RESULTS; i++) {
270 printf("%20f %20f %20f\n",
271 gpu_time[i], cpu_time[i], delta[i]);
272 }
273
274 pass = false;
275 }
276
277 pass = piglit_probe_rect_rgba(0, 0, piglit_width, piglit_height,
278 green) && pass;
279
280 piglit_present_results();
281
282 glDeleteQueries(2, q);
283
284 return pass ? PIGLIT_PASS : PIGLIT_FAIL;
285 }
286
287 void
piglit_init(int argc,char ** argv)288 piglit_init(int argc, char **argv)
289 {
290 piglit_require_gl_version(20);
291
292 prog = piglit_build_simple_program(vs_text, fs_text);
293 iters_loc = glGetUniformLocation(prog, "iters");
294
295 piglit_require_extension("GL_EXT_timer_query");
296
297 if (argc == 2 && strcmp(argv[1], "timestamp") == 0) {
298 piglit_require_extension("GL_ARB_timer_query");
299 test = TIMESTAMP;
300 }
301 }
302