1 /* Copyright (c) 2017-2018 Hans-Kristian Arntzen
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "wsi_timing.hpp"
24 #include "wsi.hpp"
25 #include <string.h>
26 #include <algorithm>
27 #include <cmath>
28 
29 #ifndef _WIN32
30 #include <time.h>
31 #endif
32 
33 namespace Vulkan
34 {
init(WSIPlatform * platform,VkDevice device,VkSwapchainKHR swapchain,const WSITimingOptions & options)35 void WSITiming::init(WSIPlatform *platform, VkDevice device, VkSwapchainKHR swapchain, const WSITimingOptions &options)
36 {
37 	this->platform = platform;
38 	this->device = device;
39 	this->swapchain = swapchain;
40 	this->options = options;
41 
42 	serial = {};
43 	pacing = {};
44 	last_frame = {};
45 	feedback = {};
46 	smoothing = {};
47 	feedback.timing_buffer.resize(64);
48 }
49 
set_debug_enable(bool enable)50 void WSITiming::set_debug_enable(bool enable)
51 {
52 	options.debug = enable;
53 }
54 
set_latency_limiter(LatencyLimiter limiter)55 void WSITiming::set_latency_limiter(LatencyLimiter limiter)
56 {
57 	options.latency_limiter = limiter;
58 }
59 
get_options() const60 const WSITimingOptions &WSITiming::get_options() const
61 {
62 	return options;
63 }
64 
set_swap_interval(unsigned interval)65 void WSITiming::set_swap_interval(unsigned interval)
66 {
67 	if (interval == options.swap_interval || interval == 0)
68 		return;
69 
70 	// First, extrapolate to our current serial so we can make a more correct target time using the new swap interval.
71 	uint64_t target = compute_target_present_time_for_serial(serial.serial);
72 	if (target)
73 	{
74 		pacing.base_serial = serial.serial;
75 		pacing.base_present = target;
76 	}
77 
78 	options.swap_interval = interval;
79 }
80 
update_refresh_interval()81 void WSITiming::update_refresh_interval()
82 {
83 	VkRefreshCycleDurationGOOGLE refresh;
84 	if (vkGetRefreshCycleDurationGOOGLE(device, swapchain, &refresh) == VK_SUCCESS)
85 	{
86 		if (!feedback.refresh_interval || options.debug)
87 			LOGI("Observed refresh rate: %.6f Hz.\n", 1e9 / refresh.refreshDuration);
88 		feedback.refresh_interval = refresh.refreshDuration;
89 	}
90 	else
91 		LOGE("Failed to get refresh cycle duration.\n");
92 }
93 
find_latest_timestamp(uint32_t start_serial)94 WSITiming::Timing *WSITiming::find_latest_timestamp(uint32_t start_serial)
95 {
96 	for (uint32_t i = 1; i < NUM_TIMINGS - 1; i++)
97 	{
98 		uint32_t past_serial = start_serial - i;
99 		auto &past = feedback.past_timings[past_serial & NUM_TIMING_MASK];
100 		if (past.wall_serial == past_serial && past.timing.actualPresentTime != 0)
101 			return &past;
102 	}
103 
104 	return nullptr;
105 }
106 
update_past_presentation_timing()107 void WSITiming::update_past_presentation_timing()
108 {
109 	// Update past presentation timings.
110 	uint32_t presentation_count;
111 	if (vkGetPastPresentationTimingGOOGLE(device, swapchain, &presentation_count, nullptr) != VK_SUCCESS)
112 		return;
113 
114 	if (presentation_count)
115 	{
116 		if (presentation_count > feedback.timing_buffer.size())
117 			feedback.timing_buffer.resize(presentation_count);
118 		auto res = vkGetPastPresentationTimingGOOGLE(device, swapchain, &presentation_count, feedback.timing_buffer.data());
119 
120 		// I have a feeling this is racy in nature and we might have received another presentation timing in-between
121 		// querying count and getting actual data, so accept INCOMPLETE here.
122 		if (res == VK_SUCCESS || res == VK_INCOMPLETE)
123 		{
124 			for (uint32_t i = 0; i < presentation_count; i++)
125 			{
126 				auto &t = feedback.past_timings[feedback.timing_buffer[i].presentID & NUM_TIMING_MASK];
127 				if (t.wall_serial == feedback.timing_buffer[i].presentID)
128 				{
129 					t.timing = feedback.timing_buffer[i];
130 
131 					uint64_t gpu_done_time = (t.timing.earliestPresentTime - t.timing.presentMargin);
132 					t.slack = int64_t(t.timing.actualPresentTime - gpu_done_time);
133 					t.pipeline_latency = int64_t(gpu_done_time - t.wall_frame_begin);
134 
135 					// Expected result unless proven otherwise.
136 					t.result = TimingResult::Expected;
137 
138 					// Feed the heuristics on when to drop frame rate or promote it.
139 					if ((feedback.refresh_interval != 0) &&
140 					    (t.timing.earliestPresentTime < t.timing.actualPresentTime) &&
141 					    (t.timing.presentMargin > feedback.refresh_interval / 4))
142 					{
143 						// We could have presented earlier, and we had decent GPU margin to do so.
144 						// Deal with frame dropping later.
145 						t.result = TimingResult::VeryEarly;
146 						if (options.debug)
147 							LOGI("Frame completed very early, but was held back by swap interval!\n");
148 					}
149 				}
150 
151 				update_frame_pacing(t.wall_serial, t.timing.actualPresentTime, false);
152 			}
153 		}
154 	}
155 
156 	auto *timing = find_latest_timestamp(serial.serial);
157 	if (timing && timing->timing.actualPresentTime >= timing->wall_frame_begin)
158 	{
159 		auto total_latency = timing->timing.actualPresentTime - timing->wall_frame_begin;
160 		feedback.latency = 0.99 * feedback.latency + 0.01e-9 * total_latency;
161 
162 		if (options.debug)
163 		{
164 			LOGI("Have presentation timing for %u frames in the past.\n",
165 			     serial.serial - timing->timing.presentID);
166 		}
167 
168 		if (int64_t(timing->timing.presentMargin) < 0)
169 			LOGE("Present margin is negative (%lld) ... ?!\n", static_cast<long long>(timing->timing.presentMargin));
170 
171 		if (timing->timing.earliestPresentTime > timing->timing.actualPresentTime)
172 			LOGE("Earliest present time is > actual present time ... Bug?\n");
173 
174 		if (timing->timing.actualPresentTime < timing->timing.desiredPresentTime)
175 		{
176 			LOGE("Image was presented before desired present time, bug? (actual: %llu, desired: %llu)\n",
177 			     static_cast<unsigned long long>(timing->timing.actualPresentTime),
178 			     static_cast<unsigned long long>(timing->timing.desiredPresentTime));
179 		}
180 		else if (feedback.refresh_interval != 0 && timing->timing.desiredPresentTime != 0)
181 		{
182 			uint64_t delta = timing->timing.actualPresentTime - timing->timing.desiredPresentTime;
183 			if (delta >= feedback.refresh_interval)
184 			{
185 				LOGE("*** Image was presented %u frames late "
186 				     "(target: %.3f ms, rounded target: %.3f ms, actual: %.3f ms) compared to desired target. "
187 				     "This normally happens in startup phase, but otherwise it's either a real hitch or app bug. ***\n",
188 				     unsigned(delta / feedback.refresh_interval),
189 				     timing->wall_frame_target * 1e-6, timing->timing.desiredPresentTime * 1e-6,
190 				     timing->timing.actualPresentTime * 1e-6);
191 			}
192 		}
193 
194 		// How much can we squeeze latency?
195 		if (options.debug)
196 			LOGI("Total latency: %.3f ms, slack time: %.3f\n", total_latency * 1e-6, timing->slack * 1e-6);
197 
198 		if (last_frame.serial && timing->wall_serial != last_frame.serial)
199 		{
200 			double frame_time_ns = double(timing->timing.actualPresentTime - last_frame.present_time) /
201 			                       double(timing->wall_serial - last_frame.serial);
202 
203 			// We only detect a hitch if we have the same swap interval target,
204 			// otherwise it might as well just be a transient state thing.
205 			if ((timing->swap_interval_target == options.swap_interval) &&
206 			    (feedback.refresh_interval != 0) &&
207 			    (frame_time_ns > 1.1 * options.swap_interval * feedback.refresh_interval))
208 			{
209 				LOGE("*** HITCH DETECTED ***\n");
210 				timing->result = TimingResult::TooLate;
211 
212 				if (platform)
213 				{
214 					unsigned frame_delta = unsigned(round(frame_time_ns / (options.swap_interval *
215 					                                                       feedback.refresh_interval)));
216 					VK_ASSERT(frame_delta);
217 					unsigned dropped_frames = frame_delta - 1;
218 					platform->event_display_timing_stutter(serial.serial, timing->wall_serial, dropped_frames);
219 				}
220 			}
221 
222 			if (options.debug)
223 			{
224 				LOGI("Frame time ID #%u: %.3f ms\n",
225 				     timing->wall_serial,
226 				     1e-6 * frame_time_ns);
227 			}
228 		}
229 
230 		last_frame.serial = timing->wall_serial;
231 		last_frame.present_time = timing->timing.actualPresentTime;
232 	}
233 }
234 
wait_until(int64_t nsecs)235 void WSITiming::wait_until(int64_t nsecs)
236 {
237 #ifndef _WIN32
238 	timespec ts;
239 	ts.tv_sec = nsecs / 1000000000;
240 	ts.tv_nsec = nsecs % 1000000000;
241 	clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, nullptr);
242 #else
243 	(void)nsecs;
244 #endif
245 }
246 
get_wall_time()247 uint64_t WSITiming::get_wall_time()
248 {
249 #ifndef _WIN32
250 	// GOOGLE_display_timing on Linux and Android use CLOCK_MONOTONIC explicitly.
251 	timespec ts;
252 	clock_gettime(CLOCK_MONOTONIC, &ts);
253 	return ts.tv_sec * 1000000000ull + ts.tv_nsec;
254 #else
255 	return 0;
256 #endif
257 }
258 
update_frame_pacing(uint32_t serial,uint64_t present_time,bool wall_time)259 void WSITiming::update_frame_pacing(uint32_t serial, uint64_t present_time, bool wall_time)
260 {
261 	if (wall_time && !pacing.have_real_estimate)
262 	{
263 		// We don't have a refresh interval yet, just update the estimate from CPU.
264 		pacing.base_serial = serial;
265 		pacing.base_present = present_time;
266 		pacing.have_estimate = true;
267 		return;
268 	}
269 	else if (!wall_time && !pacing.have_real_estimate)
270 	{
271 		pacing.base_serial = serial;
272 		pacing.base_present = present_time;
273 		pacing.have_real_estimate = true;
274 		return;
275 	}
276 	else if (wall_time)
277 	{
278 		// We already have a correct estimate, don't update.
279 		return;
280 	}
281 
282 	if (!feedback.refresh_interval)
283 	{
284 		// If we don't have a refresh interval yet, we cannot estimate anything.
285 		// What we can do instead is just to blindly use the latest observed timestamp as our guiding hand.
286 		if (present_time > pacing.base_present)
287 		{
288 			pacing.base_serial = serial;
289 			pacing.base_present = present_time;
290 		}
291 	}
292 	else
293 	{
294 		int32_t frame_dist = int32_t(serial - pacing.base_serial);
295 
296 		// Don't update with the past.
297 		if (frame_dist <= 0)
298 			return;
299 
300 		// Extrapolate timing from current.
301 		uint64_t extrapolated_present_time =
302 				pacing.base_present + feedback.refresh_interval * options.swap_interval * (serial - pacing.base_serial);
303 		int64_t error = std::abs(int64_t(extrapolated_present_time - present_time));
304 
305 		// If the delta is close enough (expected frame pace),
306 		// update the base ID, so we can make more accurate future estimates.
307 		// This is relevant if we want to dynamically change swap interval.
308 		// If present time is significantly larger than extrapolated time,
309 		// we can assume we had a dropped frame, so we also need to update our base estimate.
310 		if ((present_time > extrapolated_present_time) || (error < int64_t(feedback.refresh_interval / 2)))
311 		{
312 			// We must have dropped frames, or similar.
313 			// Update our base estimate.
314 			pacing.base_serial = serial;
315 			pacing.base_present = present_time;
316 			if (options.debug)
317 			{
318 				LOGI("Updating frame pacing base to serial: %u (delta: %.3f ms)\n", pacing.base_serial,
319 				     1e-6 * int64_t(present_time - extrapolated_present_time));
320 			}
321 		}
322 	}
323 }
324 
update_frame_time_smoothing(double & frame_time,double & elapsed_time)325 void WSITiming::update_frame_time_smoothing(double &frame_time, double &elapsed_time)
326 {
327 	double target_frame_time = frame_time;
328 	if (feedback.refresh_interval)
329 		target_frame_time = double(options.swap_interval * feedback.refresh_interval) * 1e-9;
330 
331 	double actual_elapsed = elapsed_time - smoothing.offset;
332 	smoothing.elapsed += target_frame_time;
333 
334 	double delta = actual_elapsed - smoothing.elapsed;
335 	if (delta > std::fabs(target_frame_time * 4.0))
336 	{
337 		// We're way off, something must have happened, reset the smoothing.
338 		// Don't jump skip the frame time, other than keeping the frame_time as-is.
339 		// We might have had a natural pause, and it doesn't make sense to report absurd frame times.
340 		// Apps needing to sync to wall time over time could use elapsed_time as a guiding hand.
341 		if (options.debug)
342 			LOGI("Detected discontinuity in smoothing algorithm!\n");
343 		smoothing.offset = elapsed_time;
344 		smoothing.elapsed = 0.0;
345 		return;
346 	}
347 
348 	double jitter_offset = 0.0;
349 
350 	// Accept up to 0.5% jitter to catch up or slow down smoothly to our target elapsed time.
351 	if (delta > 0.1 * target_frame_time)
352 		jitter_offset = 0.005 * target_frame_time;
353 	else if (delta < -0.1 * target_frame_time)
354 		jitter_offset = -0.005 * target_frame_time;
355 
356 	target_frame_time += jitter_offset;
357 	smoothing.elapsed += jitter_offset;
358 
359 	elapsed_time = smoothing.elapsed + smoothing.offset;
360 	frame_time = target_frame_time;
361 }
362 
get_refresh_interval() const363 uint64_t WSITiming::get_refresh_interval() const
364 {
365 	return feedback.refresh_interval;
366 }
367 
get_current_latency() const368 double WSITiming::get_current_latency() const
369 {
370 	return feedback.latency;
371 }
372 
limit_latency(Timing & new_timing)373 void WSITiming::limit_latency(Timing &new_timing)
374 {
375 	if (options.latency_limiter != LatencyLimiter::None)
376 	{
377 		// Try to squeeze timings by sleeping, quite shaky, but very fun :)
378 		if (feedback.refresh_interval)
379 		{
380 			int64_t target = int64_t(compute_target_present_time_for_serial(serial.serial));
381 
382 			if (options.latency_limiter == LatencyLimiter::AdaptiveLowLatency)
383 			{
384 				int64_t latency = 0;
385 				if (get_conservative_latency(latency))
386 				{
387 					// Keep quarter frame as buffer in case this frame is heavier than normal.
388 					latency += feedback.refresh_interval >> 2;
389 					wait_until(target - latency);
390 
391 					uint64_t old_time = new_timing.wall_frame_begin;
392 					new_timing.wall_frame_begin = get_wall_time();
393 					if (options.debug)
394 					{
395 						LOGI("Slept for %.3f ms for latency tuning.\n",
396 						     1e-6 * (new_timing.wall_frame_begin - old_time));
397 					}
398 				}
399 			}
400 			else if (options.latency_limiter == LatencyLimiter::IdealPipeline)
401 			{
402 				// In the ideal pipeline we have one frame for CPU to work,
403 				// then one frame for GPU to work in parallel, so we should strive for a little under 2 frames of latency here.
404 				// The assumption is that we can kick some work to GPU at least mid-way through our frame,
405 				// which will become our slack factor.
406 				int64_t latency = feedback.refresh_interval * 2;
407 				wait_until(target - latency);
408 
409 				uint64_t old_time = new_timing.wall_frame_begin;
410 				new_timing.wall_frame_begin = get_wall_time();
411 				if (options.debug)
412 				{
413 					LOGI("Slept for %.3f ms for latency tuning.\n",
414 					     1e-6 * (new_timing.wall_frame_begin - old_time));
415 				}
416 			}
417 		}
418 	}
419 }
420 
begin_frame(double & frame_time,double & elapsed_time)421 void WSITiming::begin_frame(double &frame_time, double &elapsed_time)
422 {
423 	promote_or_demote_frame_rate();
424 
425 	// Update initial frame elapsed estimate,
426 	// from here, we'll try to lock the frame time to refresh_rate +/- epsilon.
427 	if (serial.serial == 0)
428 	{
429 		smoothing.offset = elapsed_time;
430 		smoothing.elapsed = 0.0;
431 	}
432 	serial.serial++;
433 
434 	if (options.debug)
435 		LOGI("Starting WSITiming frame serial: %u\n", serial.serial);
436 
437 	// On X11, this is found over time by observation, so we need to adapt it.
438 	// Only after we have observed the refresh cycle duration, we can start syncing against it.
439 	if ((serial.serial & 7) == 0)
440 		update_refresh_interval();
441 
442 	auto &new_timing = feedback.past_timings[serial.serial & NUM_TIMING_MASK];
443 	new_timing.wall_serial = serial.serial;
444 	new_timing.wall_frame_begin = get_wall_time();
445 	new_timing.swap_interval_target = options.swap_interval;
446 	new_timing.result = TimingResult::Unknown;
447 	new_timing.timing = {};
448 
449 	update_past_presentation_timing();
450 	// Absolute minimum case, just get some initial data before we have some real estimates.
451 	update_frame_pacing(serial.serial, new_timing.wall_frame_begin, true);
452 	update_frame_time_smoothing(frame_time, elapsed_time);
453 	limit_latency(new_timing);
454 
455 	new_timing.wall_frame_target = compute_target_present_time_for_serial(serial.serial);
456 }
457 
get_conservative_latency(int64_t & latency) const458 bool WSITiming::get_conservative_latency(int64_t &latency) const
459 {
460 	latency = 0;
461 	unsigned valid_latencies = 0;
462 	for (auto &timing : feedback.past_timings)
463 	{
464 		if (timing.timing.actualPresentTime >= timing.wall_frame_begin)
465 		{
466 			latency = std::max(latency, timing.pipeline_latency);
467 			valid_latencies++;
468 		}
469 	}
470 
471 	return valid_latencies > (NUM_TIMINGS / 2);
472 }
473 
compute_target_present_time_for_serial(uint32_t serial)474 uint64_t WSITiming::compute_target_present_time_for_serial(uint32_t serial)
475 {
476 	if (!pacing.have_estimate)
477 		return 0;
478 
479 	uint64_t frame_delta = serial - pacing.base_serial;
480 	frame_delta *= options.swap_interval;
481 	uint64_t target = pacing.base_present + feedback.refresh_interval * frame_delta;
482 	return target;
483 }
484 
promote_or_demote_frame_rate()485 void WSITiming::promote_or_demote_frame_rate()
486 {
487 	if (!options.adaptive_swap_interval)
488 		return;
489 
490 	if (feedback.refresh_interval == 0)
491 		return;
492 
493 	// Analyze if we should do something with frame rate.
494 	// The heuristic is something like:
495 	// - If we observe at least 3 hitches the last window of timing events, demote frame rate.
496 	// - If we observe consistent earliestPresent < actualPresent and presentMargin is at least a quarter frame,
497 	//   promote frame rate.
498 
499 	// We can only make an analysis if all timings come from same swap interval.
500 	// This also limits how often we can automatically change frame rate.
501 
502 	unsigned frame_drops = 0;
503 	unsigned early_frames = 0;
504 	unsigned total = 0;
505 	for (auto &timing : feedback.past_timings)
506 	{
507 		if (timing.result == TimingResult::Unknown)
508 			continue;
509 		if (options.swap_interval != timing.swap_interval_target)
510 			return;
511 
512 		total++;
513 		if (timing.result == TimingResult::VeryEarly)
514 			early_frames++;
515 		else if (timing.result == TimingResult::TooLate)
516 			frame_drops++;
517 	}
518 
519 	// Don't have enough data.
520 	if (total < NUM_TIMINGS / 2)
521 		return;
522 
523 	if (early_frames == total && options.swap_interval > 1)
524 	{
525 		// We can go down in swap interval, great!
526 		set_swap_interval(options.swap_interval - 1);
527 		LOGI("Adjusted swap interval down to %u!\n", options.swap_interval);
528 	}
529 	else if (frame_drops >= 3)
530 	{
531 		if (options.swap_interval < 8) // swap interval of 8, lol
532 		{
533 			set_swap_interval(options.swap_interval + 1);
534 			LOGI("Too much hitching detected, increasing swap interval to %u!\n", options.swap_interval);
535 		}
536 		else
537 			LOGI("Still detecting hitching, but reached swap interval limit.\n");
538 	}
539 }
540 
fill_present_info_timing(VkPresentTimeGOOGLE & time)541 bool WSITiming::fill_present_info_timing(VkPresentTimeGOOGLE &time)
542 {
543 	time.presentID = serial.serial;
544 
545 	time.desiredPresentTime = compute_target_present_time_for_serial(serial.serial);
546 
547 	// Want to set the desired target close enough,
548 	// but not exactly at estimated target, since we have a rounding error cliff.
549 	// Set the target a quarter frame away from real target.
550 	if (time.desiredPresentTime != 0 && feedback.refresh_interval != 0)
551 		time.desiredPresentTime -= feedback.refresh_interval >> 4;
552 
553 	return true;
554 }
555 
556 }
557