1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "piglit-util-gl.h"
25 
26 /**
27  * @file time-elapsed.c
28  *
29  * Test TIME_ELAPSED and TIMESTAMP queries.
30  */
31 
32 PIGLIT_GL_TEST_CONFIG_BEGIN
33 
34 	config.supports_gl_compat_version = 10;
35 	config.window_visual = PIGLIT_GL_VISUAL_DOUBLE | PIGLIT_GL_VISUAL_RGBA;
36 
37 PIGLIT_GL_TEST_CONFIG_END
38 
39 static char *vs_text =
40 	"#version 110\n"
41 	"void main()\n"
42 	"{\n"
43 	"  gl_Position = gl_Vertex;\n"
44 	"}\n";
45 
46 /**
47  * Time-wasting fragment shader.  This fragment shader computes:
48  *
49  *   x = (sum(i=0..(iters-1): 2*i) % iters) / iters
50  *
51  * This value should always work out to 0.0, but it's highly unlikely
52  * that an optimizer will figure this out.  Hence we can use this
53  * shader to waste an arbitrary amount of time (by suitable choice of
54  * the value of iters).
55  *
56  * The shader outputs a color of (x, 1.0, 0.0, 0.0).
57  */
58 static char *fs_text =
59 	"#version 110\n"
60 	"uniform int iters;\n"
61 	"void main()\n"
62 	"{\n"
63 	"  int cumulative_sum = 0;\n"
64 	"  for (int i = 0; i < iters; ++i) {\n"
65 	"    cumulative_sum += i;\n"
66 	"    if (cumulative_sum >= iters)\n"
67 	"      cumulative_sum -= iters;\n"
68 	"    cumulative_sum += i;\n"
69 	"    if (cumulative_sum >= iters)\n"
70 	"      cumulative_sum -= iters;\n"
71 	"  }\n"
72 	"  float x = float(cumulative_sum) / float(iters);\n"
73 	"  gl_FragColor = vec4(x, 1.0, 0.0, 0.0);\n"
74 	"}\n";
75 
76 enum {
77 	TIME_ELAPSED,
78 	TIMESTAMP
79 } test = TIME_ELAPSED;
80 
81 GLuint prog;
82 GLint iters_loc;
83 
84 static float
draw(GLuint * q,int iters)85 draw(GLuint *q, int iters)
86 {
87 	int64_t start_time, end_time;
88 
89 	glUseProgram(prog);
90 	glUniform1i(iters_loc, iters);
91 
92 	start_time = piglit_time_get_nano();
93 
94 	if (test == TIMESTAMP) {
95 		glQueryCounter(q[0], GL_TIMESTAMP);
96 	} else {
97 		glBeginQuery(GL_TIME_ELAPSED, q[0]);
98 	}
99 	piglit_draw_rect(-1, -1, 2, 2);
100 	if (test == TIMESTAMP) {
101 		glQueryCounter(q[1], GL_TIMESTAMP);
102 	} else {
103 		glEndQuery(GL_TIME_ELAPSED);
104 	}
105 
106 	/* This glFinish() is important, since this is used in a
107 	 * timing loop.
108 	 */
109 	glFinish();
110 
111 	end_time = piglit_time_get_nano();
112 
113 	return (end_time - start_time)/ 1000.0 / 1000.0 / 1000.0;
114 }
115 
116 static float
get_gpu_time(GLuint * q)117 get_gpu_time(GLuint *q)
118 {
119 	GLint64EXT elapsed;
120 
121 	if (test == TIMESTAMP) {
122 		GLint64 start, end;
123 		glGetQueryObjecti64vEXT(q[0], GL_QUERY_RESULT, &start);
124 		glGetQueryObjecti64vEXT(q[1], GL_QUERY_RESULT, &end);
125 		elapsed = end - start;
126 	} else {
127 		glGetQueryObjecti64vEXT(q[0], GL_QUERY_RESULT, &elapsed);
128 	}
129 
130 	return elapsed / 1000.0 / 1000.0 / 1000.0;
131 }
132 
133 enum piglit_result
piglit_display(void)134 piglit_display(void)
135 {
136 	bool pass = true;
137 	float green[4] = {0.0, 1.0, 0.0, 0.0};
138 	GLuint q[2];
139 	int iters;
140 #define	NUM_RESULTS 5
141 	float cpu_time[NUM_RESULTS];
142 	float gpu_time[NUM_RESULTS];
143 	float delta[NUM_RESULTS];
144 	float cpu_time_mean;
145 	float delta_mean, delta_stddev;
146 	float cpu_overhead;
147 	float t, t_cutoff;
148 	int i;
149 
150 	glColor4f(0.0, 1.0, 0.0, 0.0);
151 	glGenQueries(2, q);
152 
153 	/* Prime the drawing pipe before we start measuring time,
154 	 * since the first draw call is likely to be slower than all
155 	 * others.
156 	 */
157 	draw(q, 1);
158 
159 	/* Figure out some baseline difference between GPU time
160 	 * elapsed and CPU time elapsed for a single draw call (CPU
161 	 * overhead of timer query and glFinish()).
162 	 *
163 	 * Note that this doesn't take into account any extra CPU time
164 	 * elapsed from start to finish if multiple batchbuffers are
165 	 * accumulated by the driver in getting to our 1/10th of a
166 	 * second elapsed time goal, and some other client sneaks
167 	 * rendering in in between those batches.
168 	 *
169 	 * Part of the rendering size being relatively large is to
170 	 * hopefully avoid that, though it might be better to have
171 	 * some time-consuming shader with a single draw call instead.
172 	 */
173 	cpu_overhead = 0;
174 	for (i = 0; i < NUM_RESULTS; i++) {
175 		cpu_time[i] = draw(q, 1);
176 		gpu_time[i] = get_gpu_time(q);
177 
178 		cpu_overhead += cpu_time[i] - gpu_time[i];
179 	}
180 	cpu_overhead /= NUM_RESULTS;
181 
182 	/* Find a number of draw calls that takes about 1/10th of a
183 	 * second.
184 	 */
185 retry:
186 	for (iters = 1; ; iters *= 2) {
187 		if (draw(q, iters) > 0.1)
188 			break;
189 		if (iters * 2 <= iters) {
190 			printf("Couldn't find appropriate number of iterations\n");
191 			piglit_report_result(PIGLIT_FAIL);
192 		}
193 	}
194 
195 	/* Now, do several runs like this so we can determine if the
196 	 * timer matches up with wall time.
197 	 */
198 	for (i = 0; i < NUM_RESULTS; i++) {
199 		cpu_time[i] = draw(q, iters);
200 		gpu_time[i] = get_gpu_time(q);
201 	}
202 
203 	cpu_time_mean = 0;
204 	delta_mean = 0;
205 	for (i = 0; i < NUM_RESULTS; i++) {
206 		delta[i] = cpu_time[i] - cpu_overhead - gpu_time[i];
207 		cpu_time_mean += cpu_time[i];
208 		delta_mean += delta[i];
209 	}
210 	cpu_time_mean /= NUM_RESULTS;
211 	delta_mean /= NUM_RESULTS;
212 
213 	/* There's some risk of our "get to 0.1 seconds" loop deciding
214 	 * that a small number of iters was sufficient if we got
215 	 * scheduled out for a while.  Re-run if so.
216 	 *
217 	 * We wouldn't have that problem if we could rely on the GPU
218 	 * time elapsed query, but that's the thing we're testing.
219 	 */
220 	if (cpu_time_mean < 0.05)
221 		goto retry;
222 
223 	/* Calculate stddevs. */
224 	delta_stddev = 0;
225 	for (i = 0; i < NUM_RESULTS; i++) {
226 		float d = delta[i] - delta_mean;
227 		delta_stddev += d * d / (NUM_RESULTS - 1);
228 	}
229 	delta_stddev = sqrt(delta_stddev);
230 
231 	/* Dependent t-test for paired samples.
232 	 *
233 	 * This is a good test, because we expect the two times (cpu
234 	 * and gpu) of the samples to be correlated, and we expect the
235 	 * stddev to match (since time it should arise from system
236 	 * variables like scheduling of other tasks and state of the
237 	 * caches).  Unless maybe the variance of cpu time is greater
238 	 * than gpu time, because we may see scheduling accounted for
239 	 * in our CPU (wall) time, while scheduling other tasks
240 	 * doesn't end up counted toward our GPU time.
241 	 */
242 	t = delta_mean / (delta_stddev / sqrt(NUM_RESULTS));
243 
244 	/* Integral of Student's t distribution for 4 degrees of
245 	 * freedom (NUM_RESULTS = 5), two-tailed (we care about
246 	 * difference above or below 0, not just one direction), at
247 	 * p = .05.
248 	 */
249 	t_cutoff = 2.776;
250 
251 	/* Now test that our sampled distribution (rate of clock
252 	 * advance between CPU and GPU) was within expectations for a
253 	 * delta of 0.  I actually want to be testing the likelihood
254 	 * that the real difference is enough that we actually care.
255 	 * I didn't find an easy way to account for that after a bunch
256 	 * of wikipedia browsing, so I'll punt on proper analysis for
257 	 * now and just check that the sampled delta isn't too small
258 	 * to care about.
259 	 */
260 	if (t > t_cutoff && fabs(delta_mean) > .05 * cpu_time_mean) {
261 		fprintf(stderr, "GPU time didn't match CPU time\n");
262 		printf("Estimated CPU overhead: %f\n", cpu_overhead);
263 		printf("Difference: %f secs (+/- %f secs)\n",
264 		       delta_mean, delta_stddev);
265 		printf("t = %f\n", t);
266 
267 		printf("%20s %20s %20s\n",
268 		       "gpu_time", "cpu_time", "delta");
269 		for (i = 0; i < NUM_RESULTS; i++) {
270 			printf("%20f %20f %20f\n",
271 			       gpu_time[i], cpu_time[i], delta[i]);
272 		}
273 
274 		pass = false;
275 	}
276 
277 	pass = piglit_probe_rect_rgba(0, 0, piglit_width, piglit_height,
278 				      green) && pass;
279 
280 	piglit_present_results();
281 
282 	glDeleteQueries(2, q);
283 
284 	return pass ? PIGLIT_PASS : PIGLIT_FAIL;
285 }
286 
287 void
piglit_init(int argc,char ** argv)288 piglit_init(int argc, char **argv)
289 {
290 	piglit_require_gl_version(20);
291 
292 	prog = piglit_build_simple_program(vs_text, fs_text);
293 	iters_loc = glGetUniformLocation(prog, "iters");
294 
295 	piglit_require_extension("GL_EXT_timer_query");
296 
297 	if (argc == 2 && strcmp(argv[1], "timestamp") == 0) {
298 		piglit_require_extension("GL_ARB_timer_query");
299 		test = TIMESTAMP;
300 	}
301 }
302