1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*
25 * DIFR stands for Display Idle Frame Refresh which is a low-power feature
26 * for display that allows scanning out frames from L2 cache. The actual GPU
27 * memory can be gated off while the display outputs are served off the
28 * cache.
29 *
30 * DIFR is defined in three operational layers 1, 2, and 3 and operates in
31 * terms of entering and exiting these layers in order.
32 *
33 * Layer 1 has to deem it's possible to enter DIFR until layer 2 and 3 can
34 * start considering. Any layer seeing conditions that prevent entering DIFR
35 * mode can abort the attempt to enter. But, finally, if all layers agree
36 * the hardware will switch to low-power mode, turn off GPU memory, and
37 * start serving pixels off the cache.
38 *
39 * Managing some high-level state to help the hardware transition from one
40 * layer to another is implemented in NVKMS and RM. Simplified, NVKMS
41 * handles assistance for layer 1 and RM for layer 2.
42 *
43 * Much of the layer 1 or NVKMS DIFR specific code is collected into this
44 * file, centered around an object called NVDIFRStateEvo.
45 *
46 * The role of NVKMS is to listen for DIFR prefetch events (which originate
47 * from h/w and get dispatched by RM), prefetch framebuffer pixels into L2
48 * cache, and report back to h/w (via RM). NVKMS will also disable DIFR each
49 * time there's an explicitly known display update (such as a flip) and
50 * re-enable it once enough idle time has passed.
51 *
52 * The rest of NVKMS will call entrypoints in this file to inform the DIFR
53 * implementation here about changes in relevant state.
54 *
55 * For each DevEvo object nvkms-evo.c will call
56 * nvDIFRAllocate()/nvDIFRFree() here to also create a corresponding DIFR
57 * state object. The DIFR state will contain everything needed to implement
58 * prefetching such as channel and copy engine allocation.
59 *
60 * If DIFR state was successfully allocated, nvkms-rm.c will create an event
61 * listener for DIFR prefetch events which will call back to
62 * nvDIFRPrefetchSurfaces() here in order to do prefetching. This means
63 * going through each active head and issuing a special CE copy, for all
64 * layers of the surface, to populate the L2 cache with framebuffer pixel
65 * data.
66 *
67 * After all prefetches are complete, RM needs to know about the completion
68 * status. This is implemented in nvDIFRSendPrefetchResponse(), again called
69 * by nvkms-rm.c.
70 *
71 * NVKMS must also temporarily disable DIFR in hardware if it knows about
72 * upcoming updates to the framebuffer and then re-enable DIFR when the
73 * screen becomes idle again. For this, nvFlipEvoOneHead() will call us back
74 * via nvDIFRNotifyFlip() when a new flip is happening. We will call RM to
75 * disable DIFR, then set up a timer into the future and when it triggers we
76 * will re-enable DIFR again. But if nvFlipEvoOneHead() notifies us about
77 * another upcoming frame, we'll just replace the old timer with a new one.
78 * Thus, the timer will eventually wake us after notifications of new frames
79 * cease to come in.
80 *
81 * The DIFR hardware will automatically detect activity in graphics/copy
82 * engines and will not try to enter the low-power mode if there is any. So
83 * this is something NVKMS doesn't have to worry about.
84 *
85 * Userspace can also flag surfaces as non-cacheable which makes us abort
86 * any prefetches if those surfaces are currently displayed on any active
87 * heads. For now, CPU mapped surfaces are flagged as such because neither
88 * NVKMS nor the hardware can observe CPU writes into a surface.
89 */
90
91
92
93 #include "nvkms-difr.h"
94 #include "nvkms-push.h"
95 #include "nvkms-rm.h"
96 #include "nvkms-rmapi.h"
97 #include "nvkms-utils.h"
98 #include "nvkms-evo.h"
99
100 #include "nvidia-push-init.h"
101 #include "nvidia-push-methods.h"
102 #include "nvidia-push-types.h"
103 #include "nvidia-push-types.h"
104 #include "nvidia-push-utils.h"
105
106 #include <class/cl2080.h>
107 #include <class/cla06f.h>
108 #include <class/cla06fsubch.h>
109 #include <class/cla0b5.h>
110 #include <class/clb0b5sw.h>
111 #include <class/clc7b5.h>
112 #include <ctrl/ctrl2080/ctrl2080ce.h>
113 #include <ctrl/ctrl2080/ctrl2080lpwr.h>
114
115 #define PREFETCH_DONE_VALUE 0x00000fed
116
117 /* How long to wait after last flip until re-enabling DIFR. */
118 #define DIFR_IDLE_WAIT_PERIOD_US 500000
119
120 /* How long to wait for prefetch dma completion. */
121 #define DIFR_PREFETCH_WAIT_PERIOD_US 10000 /* 10ms */
122
123 /*
124 * DIFR runtime state
125 */
126 typedef struct _NVDIFRStateEvoRec {
127 NVDevEvoPtr pDevEvo;
128 NvU32 copyEngineType;
129
130 /*
131 * This is kept in sync with whether DIFR is explicitly disabled in
132 * hardware.
133 */
134 NvBool hwDisabled;
135 NvU64 lastFlipTime;
136 nvkms_timer_handle_t *idleTimer;
137
138 /* Pushbuffer for DIFR prefetches. */
139 NvPushChannelRec prefetchPushChannel;
140 NvU32 pushChannelHandlePool[NV_PUSH_CHANNEL_HANDLE_POOL_NUM];
141
142 /* Copy engine instance for DIFR prefetches. */
143 NvU32 prefetchEngine;
144
145 /* For tracking which surfaces have been prefetched already. */
146 NvU32 prefetchPass;
147 } NVDIFRStateEvoRec;
148
149 /*
150 * Prefetch parameters for DMA copy.
151 */
152 typedef struct {
153 NvUPtr surfGpuAddress;
154 size_t surfSizeBytes;
155 enum NvKmsSurfaceMemoryFormat surfFormat;
156 NvU32 surfPitchBytes;
157 } NVDIFRPrefetchParams;
158
159 static NvBool AllocDIFRPushChannel(NVDIFRStateEvoPtr pDifr);
160 static void FreeDIFRPushChannel(NVDIFRStateEvoPtr pDifr);
161 static NvBool AllocDIFRCopyEngine(NVDIFRStateEvoPtr pDifr);
162 static void FreeDIFRCopyEngine(NVDIFRStateEvoPtr pDifr);
163
164 static NvU32 PrefetchSingleSurface(NVDIFRStateEvoPtr pDifr,
165 NVDIFRPrefetchParams *pParams,
166 size_t *remainingCache);
167 static NvBool PrefetchHelperSurfaceEvo(NVDIFRStateEvoPtr pDifr,
168 size_t *cacheRemaining,
169 NVSurfaceEvoPtr pSurfaceEvo,
170 NvU32 *status);
171 static NvBool PrefetchHelperLutSurface(NVDIFRStateEvoPtr pDifr,
172 size_t *cacheRemaining,
173 NVLutSurfaceEvoPtr pLutSurface,
174 NvU32 *status);
175
176 static NvBool SetDisabledState(NVDIFRStateEvoPtr pDifr,
177 NvBool shouldDisable);
178 static NvBool IsCECompatibleWithDIFR(NVDevEvoPtr pDevEvo,
179 NvU32 instance);
180 static void EnsureIdleTimer(NVDIFRStateEvoPtr pDifr);
181 static void IdleTimerProc(void *dataPtr, NvU32 dataU32);
182
183 /*
184 * Public entry points.
185 */
186
nvDIFRAllocate(NVDevEvoPtr pDevEvo)187 NVDIFRStateEvoPtr nvDIFRAllocate(NVDevEvoPtr pDevEvo)
188 {
189 NV2080_CTRL_CMD_LPWR_DIFR_CTRL_PARAMS params = { 0 };
190 NVDIFRStateEvoPtr pDifr;
191 NvU32 ret;
192
193 /* DIFR not supported/implemented on RM SLI */
194 if (pDevEvo->numSubDevices > 1) {
195 return NULL;
196 }
197
198 params.ctrlParamVal = NV2080_CTRL_LPWR_DIFR_CTRL_SUPPORT_STATUS;
199 ret = nvRmApiControl(nvEvoGlobal.clientHandle,
200 pDevEvo->pSubDevices[0]->handle,
201 NV2080_CTRL_CMD_LPWR_DIFR_CTRL,
202 ¶ms,
203 sizeof(params));
204
205 if (ret != NV_OK) {
206 nvEvoLogDev(pDevEvo,
207 EVO_LOG_WARN,
208 "unable to query whether display caching is supported");
209 return NULL;
210 }
211
212 if (params.ctrlParamVal != NV2080_CTRL_LPWR_DIFR_SUPPORTED) {
213 return NULL;
214 }
215
216 pDifr = nvCalloc(sizeof(*pDifr), 1);
217 if (!pDifr) {
218 return NULL;
219 }
220
221 pDifr->pDevEvo = pDevEvo;
222
223 if (!AllocDIFRPushChannel(pDifr) ||
224 !AllocDIFRCopyEngine(pDifr)) {
225 nvDIFRFree(pDifr);
226
227 return NULL;
228 }
229
230 return pDifr;
231 }
232
nvDIFRFree(NVDIFRStateEvoPtr pDifr)233 void nvDIFRFree(NVDIFRStateEvoPtr pDifr)
234 {
235 nvAssert(pDifr);
236
237 /* Cancel pending idle timer. */
238 nvkms_free_timer(pDifr->idleTimer);
239
240 /* Leave DIFR enabled (default state). */
241 SetDisabledState(pDifr, FALSE);
242
243 /* Free resources. */
244 FreeDIFRCopyEngine(pDifr);
245 FreeDIFRPushChannel(pDifr);
246
247 nvFree(pDifr);
248 }
249
250 /*
251 * Notify of a new or upcoming flip. This will disable DIFR for a brief
252 * period in anticipation of further flips.
253 */
nvDIFRNotifyFlip(NVDIFRStateEvoPtr pDifr)254 void nvDIFRNotifyFlip(NVDIFRStateEvoPtr pDifr)
255 {
256 pDifr->lastFlipTime = nvkms_get_usec();
257
258 /* A flip is coming: signal RM to disable DIFR if we haven't already. */
259 if (SetDisabledState(pDifr, TRUE)) {
260 /* Check back after a while and re-enable if idle again. */
261 EnsureIdleTimer(pDifr);
262 }
263 }
264
nvDIFRPrefetchSurfaces(NVDIFRStateEvoPtr pDifr,size_t l2CacheSize)265 NvU32 nvDIFRPrefetchSurfaces(NVDIFRStateEvoPtr pDifr, size_t l2CacheSize)
266 {
267 NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
268 NVEvoSubDevPtr pSubDev;
269 NVEvoSubDevHeadStatePtr pHeadState;
270 size_t cacheRemaining = l2CacheSize;
271 NvU32 layer;
272 NvU32 head;
273 NvU32 apiHead;
274 NvU32 eye;
275 NvU32 i;
276 NvU32 status;
277
278 /*
279 * If the console is active then the scanout surfaces will get updated by
280 * the OS console driver without any knowledge of NVKMS, DIFR should not be
281 * enabled in that case.
282 */
283 if (nvEvoIsConsoleActive(pDevEvo)) {
284 /*
285 * NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE: Despite
286 * what the name suggests this will actually tell RM (and further PMU)
287 * to disable DIFR until the next modeset.
288 */
289 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE;
290 }
291
292 /*
293 * If DIFR is disabled it's because we know we were or will be flipping.
294 */
295 if (pDifr->hwDisabled) {
296 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_OS_FLIPS_ENABLED;
297 }
298
299 status = NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS;
300
301 pSubDev = &pDevEvo->gpus[0];
302
303 /* Get new prefetch pass counter for this iteration. */
304 pDifr->prefetchPass++;
305
306 /*
307 * Start by prefetching the cursor surface and image surfaces from
308 * present layers.
309 */
310 for (head = 0; head < pDevEvo->numHeads; head++) {
311 pHeadState = &pSubDev->headState[head];
312
313 if (!PrefetchHelperSurfaceEvo(pDifr,
314 &cacheRemaining,
315 pHeadState->cursor.pSurfaceEvo,
316 &status)) {
317 goto out;
318 }
319
320 for (layer = 0; layer <= pDevEvo->head[head].numLayers; layer++) {
321 for (eye = 0; eye < NVKMS_MAX_EYES; eye++) {
322
323 if (!PrefetchHelperSurfaceEvo(pDifr,
324 &cacheRemaining,
325 pHeadState->layer[layer].pSurfaceEvo[eye],
326 &status)) {
327 goto out;
328 }
329 }
330
331 /*
332 * Prefetch per-layer LUTs, if any, but skip null LUTs and
333 * duplicates already prefetched.
334 */
335 if (!PrefetchHelperLutSurface(pDifr,
336 &cacheRemaining,
337 pHeadState->layer[layer].inputLut.pLutSurfaceEvo,
338 &status)) {
339 goto out;
340 }
341
342 if (!PrefetchHelperLutSurface(pDifr,
343 &cacheRemaining,
344 pHeadState->layer[layer].tmoLut.pLutSurfaceEvo,
345 &status)) {
346 goto out;
347 }
348 }
349 }
350
351 /*
352 * Finally prefetch the known main LUTs.
353 */
354 if (!PrefetchHelperLutSurface(pDifr,
355 &cacheRemaining,
356 pDevEvo->lut.defaultLut,
357 &status)) {
358 goto out;
359 }
360
361 for (apiHead = 0; apiHead < pDevEvo->numApiHeads; apiHead++) {
362 for (i = 0; i < ARRAY_LEN(pDevEvo->lut.apiHead[apiHead].LUT); i++) {
363 if (!PrefetchHelperLutSurface(pDifr,
364 &cacheRemaining,
365 pDevEvo->lut.apiHead[apiHead].LUT[i],
366 &status)) {
367 goto out;
368 }
369 }
370 }
371
372 out:
373 return status;
374 }
375
nvDIFRSendPrefetchResponse(NVDIFRStateEvoPtr pDifr,NvU32 responseStatus)376 NvBool nvDIFRSendPrefetchResponse(NVDIFRStateEvoPtr pDifr, NvU32 responseStatus)
377 {
378 NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
379 NV2080_CTRL_CMD_LPWR_DIFR_PREFETCH_RESPONSE_PARAMS params = { 0 };
380
381 params.responseVal = responseStatus;
382
383 return (nvRmApiControl(nvEvoGlobal.clientHandle,
384 pDevEvo->pSubDevices[0]->handle,
385 NV2080_CTRL_CMD_LPWR_DIFR_PREFETCH_RESPONSE,
386 ¶ms,
387 sizeof(params))
388 == NV_OK);
389 }
390
391 /*
392 * Local helper functions.
393 */
AllocDIFRPushChannel(NVDIFRStateEvoPtr pDifr)394 static NvBool AllocDIFRPushChannel(NVDIFRStateEvoPtr pDifr)
395 {
396 NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
397 NvPushAllocChannelParams params = { 0 };
398 NvU32 i;
399
400 pDifr->copyEngineType = NV2080_ENGINE_TYPE_NULL;
401
402 for (i = 0; i < NV2080_ENGINE_TYPE_COPY_SIZE; i++) {
403 if (IsCECompatibleWithDIFR(pDevEvo, i)) {
404 pDifr->copyEngineType = NV2080_ENGINE_TYPE_COPY(i);
405 break;
406 }
407 }
408
409 if (pDifr->copyEngineType == NV2080_ENGINE_TYPE_NULL) {
410 return FALSE;
411 }
412
413 params.engineType = pDifr->copyEngineType;
414 params.pDevice = &pDifr->pDevEvo->nvPush.device;
415 params.difrPrefetch = TRUE;
416 params.logNvDiss = FALSE;
417 params.noTimeout = FALSE;
418 params.ignoreChannelErrors = FALSE;
419 params.numNotifiers = 1;
420 params.pushBufferSizeInBytes = 1024;
421
422 ct_assert(sizeof(params.handlePool) == sizeof(pDifr->pushChannelHandlePool));
423
424 for (i = 0; i < ARRAY_LEN(pDifr->pushChannelHandlePool); i++) {
425 pDifr->pushChannelHandlePool[i] =
426 nvGenerateUnixRmHandle(&pDevEvo->handleAllocator);
427
428 params.handlePool[i] = pDifr->pushChannelHandlePool[i];
429 }
430
431 if (!nvPushAllocChannel(¶ms, &pDifr->prefetchPushChannel)) {
432 return FALSE;
433 }
434
435 return TRUE;
436 }
437
FreeDIFRPushChannel(NVDIFRStateEvoPtr pDifr)438 static void FreeDIFRPushChannel(NVDIFRStateEvoPtr pDifr)
439 {
440 NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
441 NvU32 i;
442
443 nvPushFreeChannel(&pDifr->prefetchPushChannel);
444
445 for (i = 0; i < ARRAY_LEN(pDifr->pushChannelHandlePool); i++) {
446 nvFreeUnixRmHandle(&pDevEvo->handleAllocator,
447 pDifr->pushChannelHandlePool[i]);
448 pDifr->pushChannelHandlePool[i] = 0;
449 }
450 }
451
AllocDIFRCopyEngine(NVDIFRStateEvoPtr pDifr)452 static NvBool AllocDIFRCopyEngine(NVDIFRStateEvoPtr pDifr)
453 {
454 NVB0B5_ALLOCATION_PARAMETERS allocParams = { 0 };
455 NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
456 NvU32 ret;
457
458 /*
459 * We will only be called if NV2080_CTRL_CMD_LPWR_DIFR_CTRL says DIFR is
460 * supported in which case we assume the chip supports this CE class.
461 */
462 nvAssert(nvRmEvoClassListCheck(pDevEvo, AMPERE_DMA_COPY_B));
463
464 pDifr->prefetchEngine = nvGenerateUnixRmHandle(&pDevEvo->handleAllocator);
465 if (pDifr->prefetchEngine == 0) {
466 return NV_FALSE;
467 }
468
469 allocParams.version = NVB0B5_ALLOCATION_PARAMETERS_VERSION_1;
470 allocParams.engineType = pDifr->copyEngineType;
471
472 ret = nvRmApiAlloc(nvEvoGlobal.clientHandle,
473 pDifr->prefetchPushChannel.channelHandle[0],
474 pDifr->prefetchEngine,
475 AMPERE_DMA_COPY_B,
476 &allocParams);
477 if (ret != NVOS_STATUS_SUCCESS) {
478 return NV_FALSE;
479 }
480
481 return NV_TRUE;
482 }
483
FreeDIFRCopyEngine(NVDIFRStateEvoPtr pDifr)484 static void FreeDIFRCopyEngine(NVDIFRStateEvoPtr pDifr)
485 {
486 if (pDifr->prefetchEngine != 0) {
487 nvRmApiFree(nvEvoGlobal.clientHandle,
488 pDifr->pDevEvo->pSubDevices[0]->handle,
489 pDifr->prefetchEngine);
490 }
491
492 nvFreeUnixRmHandle(&pDifr->pDevEvo->handleAllocator,
493 pDifr->prefetchEngine);
494 }
495
PrefetchSingleSurface(NVDIFRStateEvoPtr pDifr,NVDIFRPrefetchParams * pParams,size_t * cacheRemaining)496 static NvU32 PrefetchSingleSurface(NVDIFRStateEvoPtr pDifr,
497 NVDIFRPrefetchParams *pParams,
498 size_t *cacheRemaining)
499 {
500 NvPushChannelPtr p = &pDifr->prefetchPushChannel;
501 NvU64 semaphoreGPUAddress = nvPushGetNotifierGpuAddress(p, 0, 0);
502 NvGpuSemaphore *semaphore = (NvGpuSemaphore *)
503 nvPushGetNotifierCpuAddress(p, 0, 0);
504 const NvKmsSurfaceMemoryFormatInfo *finfo =
505 nvKmsGetSurfaceMemoryFormatInfo(pParams->surfFormat);
506 NvU32 componentSizes;
507 NvU32 line_length_in;
508 NvU32 line_count;
509 NvU64 starttime;
510 NvU64 endtime;
511
512 /*
513 * Tell SET_REMAP_COMPONENTS the byte-size of a pixel in terms of color
514 * component size and count. It doesn't matter which actual combinations we
515 * choose as long as size*count will be equal to bytesPerPixel. This is
516 * because we won't be doing any actual remapping per se: we will just
517 * effectively tell the prefetch operation to fetch the correct amount of
518 * bytes for each pixel.
519 */
520 switch (finfo->rgb.bytesPerPixel) {
521 #define COMPONENTS(size, num) \
522 (DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _COMPONENT_SIZE, size) | \
523 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _NUM_SRC_COMPONENTS, num) | \
524 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _NUM_DST_COMPONENTS, num))
525
526 case 1: componentSizes = COMPONENTS(_ONE, _ONE); break;
527 case 2: componentSizes = COMPONENTS(_ONE, _TWO); break;
528 case 3: componentSizes = COMPONENTS(_ONE, _THREE); break;
529 case 4: componentSizes = COMPONENTS(_ONE, _FOUR); break;
530 case 6: componentSizes = COMPONENTS(_TWO, _THREE); break;
531 case 8: componentSizes = COMPONENTS(_TWO, _FOUR); break;
532 case 12: componentSizes = COMPONENTS(_FOUR, _THREE); break;
533 case 16: componentSizes = COMPONENTS(_FOUR, _FOUR); break;
534 default: componentSizes = 0; break;
535 #undef COMPONENTS
536 }
537
538 /*
539 * TODO: For now, we don't prefetch multiplane surfaces. In order to do so
540 * we'd need to loop over all valid planes of the pSurfaceEvo and issue a
541 * prefetch for each plane.
542 */
543 if (finfo->numPlanes > 1) {
544 /*
545 * Regardless of its wording, this is the proper failure code to send
546 * upstream. This lets the RM disable DIFR until the next modeset.
547 */
548 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE;
549 }
550
551 /*
552 * Compute some dimensional values to obtain correct blob size for
553 * prefetching. Use the given pitch and calculate the number of lines
554 * needed to cover the whole memory region.
555 */
556 nvAssert(pParams->surfPitchBytes % finfo->rgb.bytesPerPixel == 0);
557 line_length_in = pParams->surfPitchBytes / finfo->rgb.bytesPerPixel;
558
559 nvAssert(pParams->surfSizeBytes % pParams->surfPitchBytes == 0);
560 line_count = pParams->surfSizeBytes / pParams->surfPitchBytes;
561
562 /*
563 * Greedy strategy: assume all surfaces will fit in the supplied L2 size but
564 * the first one that doesn't will cause the prefetch request to fail. If we
565 * run out of cache then DIFR will disable itself until the next modeset.
566 */
567 if (*cacheRemaining < pParams->surfSizeBytes) {
568 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE;
569 }
570
571 *cacheRemaining -= pParams->surfSizeBytes;
572
573 /*
574 * Push buffer DMA copy and semaphore programming.
575 */
576 nvPushSetObject(p, NVA06F_SUBCHANNEL_COPY_ENGINE, &pDifr->prefetchEngine);
577 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE,
578 NVA0B5_SET_REMAP_COMPONENTS, 1);
579 nvPushSetMethodData(p,
580 componentSizes |
581 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_X, _CONST_A) |
582 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_Y, _CONST_A) |
583 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_Z, _CONST_A) |
584 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_W, _CONST_A));
585 nvPushImmedVal(p, NVA06F_SUBCHANNEL_COPY_ENGINE,
586 NVA0B5_SET_REMAP_CONST_A, 0);
587 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_OFFSET_IN_UPPER, 2);
588 nvPushSetMethodDataU64(p, pParams->surfGpuAddress);
589 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_OFFSET_OUT_UPPER, 2);
590 nvPushSetMethodDataU64(p, pParams->surfGpuAddress);
591
592 /*
593 * We don't expect phenomally large pitches but the .mfs for DMA copy
594 * defines PitchIn/PitchOut to be of signed 32-bit type for all
595 * architectures so assert that the value will be what h/w understands.
596 */
597 nvAssert(pParams->surfPitchBytes <= NV_S32_MAX);
598
599 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_PITCH_IN, 1);
600 nvPushSetMethodData(p, pParams->surfPitchBytes);
601 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_PITCH_OUT, 1);
602 nvPushSetMethodData(p, pParams->surfPitchBytes);
603
604 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LINE_LENGTH_IN, 1);
605 nvPushSetMethodData(p, line_length_in);
606 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LINE_COUNT, 1);
607 nvPushSetMethodData(p, line_count);
608 nvAssert(pParams->surfPitchBytes * line_count == pParams->surfSizeBytes);
609
610 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LAUNCH_DMA, 1);
611 nvPushSetMethodData
612 (p,
613 DRF_DEF(A0B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _PIPELINED) |
614 DRF_DEF(A0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _TRUE) |
615 DRF_DEF(A0B5, _LAUNCH_DMA, _SEMAPHORE_TYPE, _NONE) |
616 DRF_DEF(A0B5, _LAUNCH_DMA, _INTERRUPT_TYPE, _NONE) |
617 DRF_DEF(A0B5, _LAUNCH_DMA, _REMAP_ENABLE, _TRUE) |
618 DRF_DEF(A0B5, _LAUNCH_DMA, _SRC_MEMORY_LAYOUT, _PITCH) |
619 DRF_DEF(A0B5, _LAUNCH_DMA, _DST_MEMORY_LAYOUT, _PITCH) |
620 DRF_DEF(A0B5, _LAUNCH_DMA, _MULTI_LINE_ENABLE, _TRUE) |
621 DRF_DEF(A0B5, _LAUNCH_DMA, _SRC_TYPE, _VIRTUAL) |
622 DRF_DEF(A0B5, _LAUNCH_DMA, _DST_TYPE, _VIRTUAL));
623
624 /*
625 * Reset semaphore value. A memory barrier will be issued by nvidia-push so
626 * we don't need one here.
627 */
628 semaphore->data[0] = 0;
629
630 /* Program a semaphore release after prefetch DMA copy. */
631 nvPushMethod(p, 0, NVA06F_SEMAPHOREA, 4);
632 nvPushSetMethodDataU64(p, semaphoreGPUAddress);
633 nvPushSetMethodData(p, PREFETCH_DONE_VALUE);
634 nvPushSetMethodData(p,
635 DRF_DEF(A06F, _SEMAPHORED, _OPERATION, _RELEASE) |
636 DRF_DEF(A06F, _SEMAPHORED, _RELEASE_WFI, _EN) |
637 DRF_DEF(A06F, _SEMAPHORED, _RELEASE_SIZE, _4BYTE));
638 nvPushKickoff(p);
639
640 /*
641 * Errors and prefetch faults are handled as follows. If prefetch
642 * succeeds the semaphore release will trigger and we will exit upon
643 * seeing PREFETCH_DONE_VALUE in the memory location. Upon failure we
644 * will end up timing out, signal RM of the CE fault and DIFR will
645 * remain disabled until next driver load.
646 *
647 * Currently the total launch-to-end effective (with scheduling)
648 * prefetch rate on silicon seems to be around 15k pixels per
649 * microsecond, empirically. Thus, the time will range from a couple of
650 * hundred microseconds for a very small panel to slightly less than 2
651 * milliseconds for a single 4k display. We'll wait for 100us at a time
652 * and expect a realistic completion within few milliseconds at most.
653 */
654 starttime = nvkms_get_usec();
655 do {
656 endtime = nvkms_get_usec();
657
658 if (semaphore->data[0] == PREFETCH_DONE_VALUE) {
659 return NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS;
660 }
661
662 nvkms_usleep(100);
663 } while (endtime - starttime < DIFR_PREFETCH_WAIT_PERIOD_US); /* 10ms */
664
665 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_CE_HW_ERROR;
666 }
667
PrefetchHelperSurfaceEvo(NVDIFRStateEvoPtr pDifr,size_t * cacheRemaining,NVSurfaceEvoPtr pSurfaceEvo,NvU32 * status)668 static NvBool PrefetchHelperSurfaceEvo(NVDIFRStateEvoPtr pDifr,
669 size_t *cacheRemaining,
670 NVSurfaceEvoPtr pSurfaceEvo,
671 NvU32 *status)
672 {
673 NVDIFRPrefetchParams params;
674
675 nvAssert(*status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS);
676
677 if (!pSurfaceEvo) {
678 return TRUE;
679 }
680
681 if (pSurfaceEvo->noDisplayCaching) {
682 *status = NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_OS_FLIPS_ENABLED;
683 return FALSE;
684 }
685
686 /*
687 * If we see the same SurfaceEvo twice (UBB, multi-head X screens, etc)
688 * we only ever want to prefetch it once within a single
689 * nvDIFRPrefetchSurfaces() call.
690 */
691 if (pSurfaceEvo->difrLastPrefetchPass == pDifr->prefetchPass) {
692 return TRUE;
693 }
694
695 /*
696 * Update pass counter even if we fail later: we want to try each
697 * surface only once.
698 */
699 pSurfaceEvo->difrLastPrefetchPass = pDifr->prefetchPass;
700
701 /* Collect copy parameters and do the prefetch. */
702 params.surfGpuAddress = pSurfaceEvo->gpuAddress;
703 params.surfSizeBytes = pSurfaceEvo->planes[0].rmObjectSizeInBytes;
704 params.surfPitchBytes = pSurfaceEvo->planes[0].pitch;
705 params.surfFormat = pSurfaceEvo->format;
706
707 if (pSurfaceEvo->layout == NvKmsSurfaceMemoryLayoutBlockLinear) {
708 params.surfPitchBytes *= NVKMS_BLOCK_LINEAR_GOB_WIDTH;
709 }
710
711 *status = PrefetchSingleSurface(pDifr, ¶ms, cacheRemaining);
712
713 return *status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS;
714 }
715
PrefetchHelperLutSurface(NVDIFRStateEvoPtr pDifr,size_t * cacheRemaining,NVLutSurfaceEvoPtr pLutSurface,NvU32 * status)716 static NvBool PrefetchHelperLutSurface(NVDIFRStateEvoPtr pDifr,
717 size_t *cacheRemaining,
718 NVLutSurfaceEvoPtr pLutSurface,
719 NvU32 *status)
720 {
721 NVDIFRPrefetchParams params;
722
723 nvAssert(*status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS);
724
725 if (!pLutSurface) {
726 return TRUE;
727 }
728
729 /*
730 * LUTs are often shared so we only want to prefetch (or consider) each
731 * LUT at most once during the prefetch process.
732 */
733 if (pLutSurface->difrLastPrefetchPass == pDifr->prefetchPass) {
734 return TRUE;
735 }
736
737 pLutSurface->difrLastPrefetchPass = pDifr->prefetchPass;
738
739 /* Collect copy parameters and do the prefetch. */
740 params.surfGpuAddress = (NvUPtr)pLutSurface->gpuAddress;
741 params.surfSizeBytes = pLutSurface->size;
742 params.surfPitchBytes = pLutSurface->size;
743 params.surfFormat = NvKmsSurfaceMemoryFormatI8;
744
745 *status = PrefetchSingleSurface(pDifr, ¶ms, cacheRemaining);
746
747 return *status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS;
748 }
749
750 /*
751 * Set DIFR disabled state in H/W. Return true if state was changed and it
752 * was successfully signalled downstream.
753 */
SetDisabledState(NVDIFRStateEvoPtr pDifr,NvBool shouldDisable)754 static NvBool SetDisabledState(NVDIFRStateEvoPtr pDifr,
755 NvBool shouldDisable)
756 {
757 NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
758 NV2080_CTRL_CMD_LPWR_DIFR_CTRL_PARAMS params = { 0 };
759 NvU32 ret;
760
761 if (shouldDisable == pDifr->hwDisabled) {
762 return TRUE;
763 }
764
765 params.ctrlParamVal = shouldDisable
766 ? NV2080_CTRL_LPWR_DIFR_CTRL_DISABLE
767 : NV2080_CTRL_LPWR_DIFR_CTRL_ENABLE;
768
769 ret = nvRmApiControl(nvEvoGlobal.clientHandle,
770 pDevEvo->pSubDevices[0]->handle,
771 NV2080_CTRL_CMD_LPWR_DIFR_CTRL,
772 ¶ms,
773 sizeof(params));
774
775 if (ret != NV_OK) {
776 return FALSE;
777 }
778
779 pDifr->hwDisabled = shouldDisable;
780
781 return TRUE;
782 }
783
IsCECompatibleWithDIFR(NVDevEvoPtr pDevEvo,NvU32 instance)784 static NvBool IsCECompatibleWithDIFR(NVDevEvoPtr pDevEvo, NvU32 instance)
785 {
786 NV2080_CTRL_CE_GET_CAPS_V2_PARAMS params;
787 NvU32 ret;
788
789 nvkms_memset(¶ms, 0, sizeof(params));
790 params.ceEngineType = NV2080_ENGINE_TYPE_COPY(instance);
791
792 ret = nvRmApiControl(nvEvoGlobal.clientHandle,
793 pDevEvo->pSubDevices[0]->handle,
794 NV2080_CTRL_CMD_CE_GET_CAPS_V2,
795 ¶ms,
796 sizeof(params));
797
798 if (ret != NVOS_STATUS_SUCCESS) {
799 return FALSE;
800 }
801
802 ct_assert(sizeof(params.capsTbl) == NV2080_CTRL_CE_CAPS_TBL_SIZE);
803
804 /* Current criteria: DIFR prefetches can't use graphics CEs. */
805 if (NV2080_CTRL_CE_GET_CAP(params.capsTbl, NV2080_CTRL_CE_CAPS_CE_GRCE)) {
806 return FALSE;
807 }
808
809 return TRUE;
810 }
811
812 /*
813 * Make sure we have a pending idle timer to check back on idleness.
814 */
EnsureIdleTimer(NVDIFRStateEvoPtr pDifr)815 static void EnsureIdleTimer(NVDIFRStateEvoPtr pDifr)
816 {
817 if (!pDifr->idleTimer) {
818 /* Wait 100x longer in emulation. */
819 NvU64 idlePeriod =
820 DIFR_IDLE_WAIT_PERIOD_US *
821 (nvIsEmulationEvo(pDifr->pDevEvo) ? 100 : 1);
822
823 pDifr->idleTimer =
824 nvkms_alloc_timer(IdleTimerProc, pDifr, 0, idlePeriod);
825 }
826 }
827
828 /*
829 * An idle timer should always remain pending after a flip until further
830 * flips cease and DIFR can be re-enabled.
831 *
832 * Currently we'll try to re-enable DIFR after a constant period of idleness
833 * since the last flip but this could resonate badly with a client that's
834 * rendering at the same pace.
835 *
836 * To avoid churn we could track the time DIFR actually did remain enabled.
837 * If the enabled-period is relatively short against the disabled-period, we
838 * should bump the timeout to re-enable so that we won't be retrying all the
839 * time. Conversely, we should reset the bumped timeout after we actually
840 * managed to sleep long enough with DIFR enabled.
841 *
842 * Note: There's the question of whether we should apply slight hysteresis
843 * within NVKMS regarding enabling/disabling DIFR. The hardware itself does
844 * some churn-limiting and practical observations show that it seems to work
845 * sufficiently and I've not observed rapid, repeating prefetch requests.
846 * Keeping this note here in case this matter needs to be revisited later.
847 */
IdleTimerProc(void * dataPtr,NvU32 dataU32)848 static void IdleTimerProc(void *dataPtr, NvU32 dataU32)
849 {
850 NVDIFRStateEvoPtr pDifr = (NVDIFRStateEvoPtr)dataPtr;
851 NvU64 now = nvkms_get_usec();
852 NvU64 idlePeriod =
853 DIFR_IDLE_WAIT_PERIOD_US *
854 (nvIsEmulationEvo(pDifr->pDevEvo) ? 100 : 1);
855
856 /* First free the timer that triggered us. */
857 nvkms_free_timer(pDifr->idleTimer);
858 pDifr->idleTimer = NULL;
859
860 if (now - pDifr->lastFlipTime >= idlePeriod) {
861 /*
862 * Enough time has passed with no new flips, enable DIFR if the console
863 * is not active. If the console is active then the scanout surfaces
864 * will get updated by the OS console driver without any knowledge of
865 * NVKMS, DIFR can not be enabled in that case; the idle timer will get
866 * scheduled by nvDIFRNotifyFlip() on next modeset/flip, till then DIFR
867 * will remain disabled.
868 */
869 if (!nvEvoIsConsoleActive(pDifr->pDevEvo)) {
870 SetDisabledState(pDifr, FALSE);
871 }
872 } else {
873 /* New flips have happened since the original, reset idle timer. */
874 EnsureIdleTimer(pDifr);
875 }
876 }
877