1 /* Copyright (c) 2017-2018 Hans-Kristian Arntzen
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "wsi_timing.hpp"
24 #include "wsi.hpp"
25 #include <string.h>
26 #include <algorithm>
27 #include <cmath>
28
29 #ifndef _WIN32
30 #include <time.h>
31 #endif
32
33 namespace Vulkan
34 {
init(WSIPlatform * platform,VkDevice device,VkSwapchainKHR swapchain,const WSITimingOptions & options)35 void WSITiming::init(WSIPlatform *platform, VkDevice device, VkSwapchainKHR swapchain, const WSITimingOptions &options)
36 {
37 this->platform = platform;
38 this->device = device;
39 this->swapchain = swapchain;
40 this->options = options;
41
42 serial = {};
43 pacing = {};
44 last_frame = {};
45 feedback = {};
46 smoothing = {};
47 feedback.timing_buffer.resize(64);
48 }
49
set_debug_enable(bool enable)50 void WSITiming::set_debug_enable(bool enable)
51 {
52 options.debug = enable;
53 }
54
set_latency_limiter(LatencyLimiter limiter)55 void WSITiming::set_latency_limiter(LatencyLimiter limiter)
56 {
57 options.latency_limiter = limiter;
58 }
59
get_options() const60 const WSITimingOptions &WSITiming::get_options() const
61 {
62 return options;
63 }
64
set_swap_interval(unsigned interval)65 void WSITiming::set_swap_interval(unsigned interval)
66 {
67 if (interval == options.swap_interval || interval == 0)
68 return;
69
70 // First, extrapolate to our current serial so we can make a more correct target time using the new swap interval.
71 uint64_t target = compute_target_present_time_for_serial(serial.serial);
72 if (target)
73 {
74 pacing.base_serial = serial.serial;
75 pacing.base_present = target;
76 }
77
78 options.swap_interval = interval;
79 }
80
update_refresh_interval()81 void WSITiming::update_refresh_interval()
82 {
83 VkRefreshCycleDurationGOOGLE refresh;
84 if (vkGetRefreshCycleDurationGOOGLE(device, swapchain, &refresh) == VK_SUCCESS)
85 {
86 if (!feedback.refresh_interval || options.debug)
87 LOGI("Observed refresh rate: %.6f Hz.\n", 1e9 / refresh.refreshDuration);
88 feedback.refresh_interval = refresh.refreshDuration;
89 }
90 else
91 LOGE("Failed to get refresh cycle duration.\n");
92 }
93
find_latest_timestamp(uint32_t start_serial)94 WSITiming::Timing *WSITiming::find_latest_timestamp(uint32_t start_serial)
95 {
96 for (uint32_t i = 1; i < NUM_TIMINGS - 1; i++)
97 {
98 uint32_t past_serial = start_serial - i;
99 auto &past = feedback.past_timings[past_serial & NUM_TIMING_MASK];
100 if (past.wall_serial == past_serial && past.timing.actualPresentTime != 0)
101 return &past;
102 }
103
104 return nullptr;
105 }
106
update_past_presentation_timing()107 void WSITiming::update_past_presentation_timing()
108 {
109 // Update past presentation timings.
110 uint32_t presentation_count;
111 if (vkGetPastPresentationTimingGOOGLE(device, swapchain, &presentation_count, nullptr) != VK_SUCCESS)
112 return;
113
114 if (presentation_count)
115 {
116 if (presentation_count > feedback.timing_buffer.size())
117 feedback.timing_buffer.resize(presentation_count);
118 auto res = vkGetPastPresentationTimingGOOGLE(device, swapchain, &presentation_count, feedback.timing_buffer.data());
119
120 // I have a feeling this is racy in nature and we might have received another presentation timing in-between
121 // querying count and getting actual data, so accept INCOMPLETE here.
122 if (res == VK_SUCCESS || res == VK_INCOMPLETE)
123 {
124 for (uint32_t i = 0; i < presentation_count; i++)
125 {
126 auto &t = feedback.past_timings[feedback.timing_buffer[i].presentID & NUM_TIMING_MASK];
127 if (t.wall_serial == feedback.timing_buffer[i].presentID)
128 {
129 t.timing = feedback.timing_buffer[i];
130
131 uint64_t gpu_done_time = (t.timing.earliestPresentTime - t.timing.presentMargin);
132 t.slack = int64_t(t.timing.actualPresentTime - gpu_done_time);
133 t.pipeline_latency = int64_t(gpu_done_time - t.wall_frame_begin);
134
135 // Expected result unless proven otherwise.
136 t.result = TimingResult::Expected;
137
138 // Feed the heuristics on when to drop frame rate or promote it.
139 if ((feedback.refresh_interval != 0) &&
140 (t.timing.earliestPresentTime < t.timing.actualPresentTime) &&
141 (t.timing.presentMargin > feedback.refresh_interval / 4))
142 {
143 // We could have presented earlier, and we had decent GPU margin to do so.
144 // Deal with frame dropping later.
145 t.result = TimingResult::VeryEarly;
146 if (options.debug)
147 LOGI("Frame completed very early, but was held back by swap interval!\n");
148 }
149 }
150
151 update_frame_pacing(t.wall_serial, t.timing.actualPresentTime, false);
152 }
153 }
154 }
155
156 auto *timing = find_latest_timestamp(serial.serial);
157 if (timing && timing->timing.actualPresentTime >= timing->wall_frame_begin)
158 {
159 auto total_latency = timing->timing.actualPresentTime - timing->wall_frame_begin;
160 feedback.latency = 0.99 * feedback.latency + 0.01e-9 * total_latency;
161
162 if (options.debug)
163 {
164 LOGI("Have presentation timing for %u frames in the past.\n",
165 serial.serial - timing->timing.presentID);
166 }
167
168 if (int64_t(timing->timing.presentMargin) < 0)
169 LOGE("Present margin is negative (%lld) ... ?!\n", static_cast<long long>(timing->timing.presentMargin));
170
171 if (timing->timing.earliestPresentTime > timing->timing.actualPresentTime)
172 LOGE("Earliest present time is > actual present time ... Bug?\n");
173
174 if (timing->timing.actualPresentTime < timing->timing.desiredPresentTime)
175 {
176 LOGE("Image was presented before desired present time, bug? (actual: %llu, desired: %llu)\n",
177 static_cast<unsigned long long>(timing->timing.actualPresentTime),
178 static_cast<unsigned long long>(timing->timing.desiredPresentTime));
179 }
180 else if (feedback.refresh_interval != 0 && timing->timing.desiredPresentTime != 0)
181 {
182 uint64_t delta = timing->timing.actualPresentTime - timing->timing.desiredPresentTime;
183 if (delta >= feedback.refresh_interval)
184 {
185 LOGE("*** Image was presented %u frames late "
186 "(target: %.3f ms, rounded target: %.3f ms, actual: %.3f ms) compared to desired target. "
187 "This normally happens in startup phase, but otherwise it's either a real hitch or app bug. ***\n",
188 unsigned(delta / feedback.refresh_interval),
189 timing->wall_frame_target * 1e-6, timing->timing.desiredPresentTime * 1e-6,
190 timing->timing.actualPresentTime * 1e-6);
191 }
192 }
193
194 // How much can we squeeze latency?
195 if (options.debug)
196 LOGI("Total latency: %.3f ms, slack time: %.3f\n", total_latency * 1e-6, timing->slack * 1e-6);
197
198 if (last_frame.serial && timing->wall_serial != last_frame.serial)
199 {
200 double frame_time_ns = double(timing->timing.actualPresentTime - last_frame.present_time) /
201 double(timing->wall_serial - last_frame.serial);
202
203 // We only detect a hitch if we have the same swap interval target,
204 // otherwise it might as well just be a transient state thing.
205 if ((timing->swap_interval_target == options.swap_interval) &&
206 (feedback.refresh_interval != 0) &&
207 (frame_time_ns > 1.1 * options.swap_interval * feedback.refresh_interval))
208 {
209 LOGE("*** HITCH DETECTED ***\n");
210 timing->result = TimingResult::TooLate;
211
212 if (platform)
213 {
214 unsigned frame_delta = unsigned(round(frame_time_ns / (options.swap_interval *
215 feedback.refresh_interval)));
216 VK_ASSERT(frame_delta);
217 unsigned dropped_frames = frame_delta - 1;
218 platform->event_display_timing_stutter(serial.serial, timing->wall_serial, dropped_frames);
219 }
220 }
221
222 if (options.debug)
223 {
224 LOGI("Frame time ID #%u: %.3f ms\n",
225 timing->wall_serial,
226 1e-6 * frame_time_ns);
227 }
228 }
229
230 last_frame.serial = timing->wall_serial;
231 last_frame.present_time = timing->timing.actualPresentTime;
232 }
233 }
234
wait_until(int64_t nsecs)235 void WSITiming::wait_until(int64_t nsecs)
236 {
237 #ifndef _WIN32
238 timespec ts;
239 ts.tv_sec = nsecs / 1000000000;
240 ts.tv_nsec = nsecs % 1000000000;
241 clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ts, nullptr);
242 #else
243 (void)nsecs;
244 #endif
245 }
246
get_wall_time()247 uint64_t WSITiming::get_wall_time()
248 {
249 #ifndef _WIN32
250 // GOOGLE_display_timing on Linux and Android use CLOCK_MONOTONIC explicitly.
251 timespec ts;
252 clock_gettime(CLOCK_MONOTONIC, &ts);
253 return ts.tv_sec * 1000000000ull + ts.tv_nsec;
254 #else
255 return 0;
256 #endif
257 }
258
update_frame_pacing(uint32_t serial,uint64_t present_time,bool wall_time)259 void WSITiming::update_frame_pacing(uint32_t serial, uint64_t present_time, bool wall_time)
260 {
261 if (wall_time && !pacing.have_real_estimate)
262 {
263 // We don't have a refresh interval yet, just update the estimate from CPU.
264 pacing.base_serial = serial;
265 pacing.base_present = present_time;
266 pacing.have_estimate = true;
267 return;
268 }
269 else if (!wall_time && !pacing.have_real_estimate)
270 {
271 pacing.base_serial = serial;
272 pacing.base_present = present_time;
273 pacing.have_real_estimate = true;
274 return;
275 }
276 else if (wall_time)
277 {
278 // We already have a correct estimate, don't update.
279 return;
280 }
281
282 if (!feedback.refresh_interval)
283 {
284 // If we don't have a refresh interval yet, we cannot estimate anything.
285 // What we can do instead is just to blindly use the latest observed timestamp as our guiding hand.
286 if (present_time > pacing.base_present)
287 {
288 pacing.base_serial = serial;
289 pacing.base_present = present_time;
290 }
291 }
292 else
293 {
294 int32_t frame_dist = int32_t(serial - pacing.base_serial);
295
296 // Don't update with the past.
297 if (frame_dist <= 0)
298 return;
299
300 // Extrapolate timing from current.
301 uint64_t extrapolated_present_time =
302 pacing.base_present + feedback.refresh_interval * options.swap_interval * (serial - pacing.base_serial);
303 int64_t error = std::abs(int64_t(extrapolated_present_time - present_time));
304
305 // If the delta is close enough (expected frame pace),
306 // update the base ID, so we can make more accurate future estimates.
307 // This is relevant if we want to dynamically change swap interval.
308 // If present time is significantly larger than extrapolated time,
309 // we can assume we had a dropped frame, so we also need to update our base estimate.
310 if ((present_time > extrapolated_present_time) || (error < int64_t(feedback.refresh_interval / 2)))
311 {
312 // We must have dropped frames, or similar.
313 // Update our base estimate.
314 pacing.base_serial = serial;
315 pacing.base_present = present_time;
316 if (options.debug)
317 {
318 LOGI("Updating frame pacing base to serial: %u (delta: %.3f ms)\n", pacing.base_serial,
319 1e-6 * int64_t(present_time - extrapolated_present_time));
320 }
321 }
322 }
323 }
324
update_frame_time_smoothing(double & frame_time,double & elapsed_time)325 void WSITiming::update_frame_time_smoothing(double &frame_time, double &elapsed_time)
326 {
327 double target_frame_time = frame_time;
328 if (feedback.refresh_interval)
329 target_frame_time = double(options.swap_interval * feedback.refresh_interval) * 1e-9;
330
331 double actual_elapsed = elapsed_time - smoothing.offset;
332 smoothing.elapsed += target_frame_time;
333
334 double delta = actual_elapsed - smoothing.elapsed;
335 if (delta > std::fabs(target_frame_time * 4.0))
336 {
337 // We're way off, something must have happened, reset the smoothing.
338 // Don't jump skip the frame time, other than keeping the frame_time as-is.
339 // We might have had a natural pause, and it doesn't make sense to report absurd frame times.
340 // Apps needing to sync to wall time over time could use elapsed_time as a guiding hand.
341 if (options.debug)
342 LOGI("Detected discontinuity in smoothing algorithm!\n");
343 smoothing.offset = elapsed_time;
344 smoothing.elapsed = 0.0;
345 return;
346 }
347
348 double jitter_offset = 0.0;
349
350 // Accept up to 0.5% jitter to catch up or slow down smoothly to our target elapsed time.
351 if (delta > 0.1 * target_frame_time)
352 jitter_offset = 0.005 * target_frame_time;
353 else if (delta < -0.1 * target_frame_time)
354 jitter_offset = -0.005 * target_frame_time;
355
356 target_frame_time += jitter_offset;
357 smoothing.elapsed += jitter_offset;
358
359 elapsed_time = smoothing.elapsed + smoothing.offset;
360 frame_time = target_frame_time;
361 }
362
get_refresh_interval() const363 uint64_t WSITiming::get_refresh_interval() const
364 {
365 return feedback.refresh_interval;
366 }
367
get_current_latency() const368 double WSITiming::get_current_latency() const
369 {
370 return feedback.latency;
371 }
372
limit_latency(Timing & new_timing)373 void WSITiming::limit_latency(Timing &new_timing)
374 {
375 if (options.latency_limiter != LatencyLimiter::None)
376 {
377 // Try to squeeze timings by sleeping, quite shaky, but very fun :)
378 if (feedback.refresh_interval)
379 {
380 int64_t target = int64_t(compute_target_present_time_for_serial(serial.serial));
381
382 if (options.latency_limiter == LatencyLimiter::AdaptiveLowLatency)
383 {
384 int64_t latency = 0;
385 if (get_conservative_latency(latency))
386 {
387 // Keep quarter frame as buffer in case this frame is heavier than normal.
388 latency += feedback.refresh_interval >> 2;
389 wait_until(target - latency);
390
391 uint64_t old_time = new_timing.wall_frame_begin;
392 new_timing.wall_frame_begin = get_wall_time();
393 if (options.debug)
394 {
395 LOGI("Slept for %.3f ms for latency tuning.\n",
396 1e-6 * (new_timing.wall_frame_begin - old_time));
397 }
398 }
399 }
400 else if (options.latency_limiter == LatencyLimiter::IdealPipeline)
401 {
402 // In the ideal pipeline we have one frame for CPU to work,
403 // then one frame for GPU to work in parallel, so we should strive for a little under 2 frames of latency here.
404 // The assumption is that we can kick some work to GPU at least mid-way through our frame,
405 // which will become our slack factor.
406 int64_t latency = feedback.refresh_interval * 2;
407 wait_until(target - latency);
408
409 uint64_t old_time = new_timing.wall_frame_begin;
410 new_timing.wall_frame_begin = get_wall_time();
411 if (options.debug)
412 {
413 LOGI("Slept for %.3f ms for latency tuning.\n",
414 1e-6 * (new_timing.wall_frame_begin - old_time));
415 }
416 }
417 }
418 }
419 }
420
begin_frame(double & frame_time,double & elapsed_time)421 void WSITiming::begin_frame(double &frame_time, double &elapsed_time)
422 {
423 promote_or_demote_frame_rate();
424
425 // Update initial frame elapsed estimate,
426 // from here, we'll try to lock the frame time to refresh_rate +/- epsilon.
427 if (serial.serial == 0)
428 {
429 smoothing.offset = elapsed_time;
430 smoothing.elapsed = 0.0;
431 }
432 serial.serial++;
433
434 if (options.debug)
435 LOGI("Starting WSITiming frame serial: %u\n", serial.serial);
436
437 // On X11, this is found over time by observation, so we need to adapt it.
438 // Only after we have observed the refresh cycle duration, we can start syncing against it.
439 if ((serial.serial & 7) == 0)
440 update_refresh_interval();
441
442 auto &new_timing = feedback.past_timings[serial.serial & NUM_TIMING_MASK];
443 new_timing.wall_serial = serial.serial;
444 new_timing.wall_frame_begin = get_wall_time();
445 new_timing.swap_interval_target = options.swap_interval;
446 new_timing.result = TimingResult::Unknown;
447 new_timing.timing = {};
448
449 update_past_presentation_timing();
450 // Absolute minimum case, just get some initial data before we have some real estimates.
451 update_frame_pacing(serial.serial, new_timing.wall_frame_begin, true);
452 update_frame_time_smoothing(frame_time, elapsed_time);
453 limit_latency(new_timing);
454
455 new_timing.wall_frame_target = compute_target_present_time_for_serial(serial.serial);
456 }
457
get_conservative_latency(int64_t & latency) const458 bool WSITiming::get_conservative_latency(int64_t &latency) const
459 {
460 latency = 0;
461 unsigned valid_latencies = 0;
462 for (auto &timing : feedback.past_timings)
463 {
464 if (timing.timing.actualPresentTime >= timing.wall_frame_begin)
465 {
466 latency = std::max(latency, timing.pipeline_latency);
467 valid_latencies++;
468 }
469 }
470
471 return valid_latencies > (NUM_TIMINGS / 2);
472 }
473
compute_target_present_time_for_serial(uint32_t serial)474 uint64_t WSITiming::compute_target_present_time_for_serial(uint32_t serial)
475 {
476 if (!pacing.have_estimate)
477 return 0;
478
479 uint64_t frame_delta = serial - pacing.base_serial;
480 frame_delta *= options.swap_interval;
481 uint64_t target = pacing.base_present + feedback.refresh_interval * frame_delta;
482 return target;
483 }
484
promote_or_demote_frame_rate()485 void WSITiming::promote_or_demote_frame_rate()
486 {
487 if (!options.adaptive_swap_interval)
488 return;
489
490 if (feedback.refresh_interval == 0)
491 return;
492
493 // Analyze if we should do something with frame rate.
494 // The heuristic is something like:
495 // - If we observe at least 3 hitches the last window of timing events, demote frame rate.
496 // - If we observe consistent earliestPresent < actualPresent and presentMargin is at least a quarter frame,
497 // promote frame rate.
498
499 // We can only make an analysis if all timings come from same swap interval.
500 // This also limits how often we can automatically change frame rate.
501
502 unsigned frame_drops = 0;
503 unsigned early_frames = 0;
504 unsigned total = 0;
505 for (auto &timing : feedback.past_timings)
506 {
507 if (timing.result == TimingResult::Unknown)
508 continue;
509 if (options.swap_interval != timing.swap_interval_target)
510 return;
511
512 total++;
513 if (timing.result == TimingResult::VeryEarly)
514 early_frames++;
515 else if (timing.result == TimingResult::TooLate)
516 frame_drops++;
517 }
518
519 // Don't have enough data.
520 if (total < NUM_TIMINGS / 2)
521 return;
522
523 if (early_frames == total && options.swap_interval > 1)
524 {
525 // We can go down in swap interval, great!
526 set_swap_interval(options.swap_interval - 1);
527 LOGI("Adjusted swap interval down to %u!\n", options.swap_interval);
528 }
529 else if (frame_drops >= 3)
530 {
531 if (options.swap_interval < 8) // swap interval of 8, lol
532 {
533 set_swap_interval(options.swap_interval + 1);
534 LOGI("Too much hitching detected, increasing swap interval to %u!\n", options.swap_interval);
535 }
536 else
537 LOGI("Still detecting hitching, but reached swap interval limit.\n");
538 }
539 }
540
fill_present_info_timing(VkPresentTimeGOOGLE & time)541 bool WSITiming::fill_present_info_timing(VkPresentTimeGOOGLE &time)
542 {
543 time.presentID = serial.serial;
544
545 time.desiredPresentTime = compute_target_present_time_for_serial(serial.serial);
546
547 // Want to set the desired target close enough,
548 // but not exactly at estimated target, since we have a rounding error cliff.
549 // Set the target a quarter frame away from real target.
550 if (time.desiredPresentTime != 0 && feedback.refresh_interval != 0)
551 time.desiredPresentTime -= feedback.refresh_interval >> 4;
552
553 return true;
554 }
555
556 }
557