1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*
25  * DIFR stands for Display Idle Frame Refresh which is a low-power feature
26  * for display that allows scanning out frames from L2 cache. The actual GPU
27  * memory can be gated off while the display outputs are served off the
28  * cache.
29  *
30  * DIFR is defined in three operational layers 1, 2, and 3 and operates in
31  * terms of entering and exiting these layers in order.
32  *
33  * Layer 1 has to deem it's possible to enter DIFR until layer 2 and 3 can
34  * start considering. Any layer seeing conditions that prevent entering DIFR
35  * mode can abort the attempt to enter. But, finally, if all layers agree
36  * the hardware will switch to low-power mode, turn off GPU memory, and
37  * start serving pixels off the cache.
38  *
39  * Managing some high-level state to help the hardware transition from one
40  * layer to another is implemented in NVKMS and RM. Simplified, NVKMS
41  * handles assistance for layer 1 and RM for layer 2.
42  *
43  * Much of the layer 1 or NVKMS DIFR specific code is collected into this
44  * file, centered around an object called NVDIFRStateEvo.
45  *
46  * The role of NVKMS is to listen for DIFR prefetch events (which originate
47  * from h/w and get dispatched by RM), prefetch framebuffer pixels into L2
48  * cache, and report back to h/w (via RM). NVKMS will also disable DIFR each
49  * time there's an explicitly known display update (such as a flip) and
50  * re-enable it once enough idle time has passed.
51  *
52  * The rest of NVKMS will call entrypoints in this file to inform the DIFR
53  * implementation here about changes in relevant state.
54  *
55  * For each DevEvo object nvkms-evo.c will call
56  * nvDIFRAllocate()/nvDIFRFree() here to also create a corresponding DIFR
57  * state object. The DIFR state will contain everything needed to implement
58  * prefetching such as channel and copy engine allocation.
59  *
60  * If DIFR state was successfully allocated, nvkms-rm.c will create an event
61  * listener for DIFR prefetch events which will call back to
62  * nvDIFRPrefetchSurfaces() here in order to do prefetching. This means
63  * going through each active head and issuing a special CE copy, for all
64  * layers of the surface, to populate the L2 cache with framebuffer pixel
65  * data.
66  *
67  * After all prefetches are complete, RM needs to know about the completion
68  * status. This is implemented in nvDIFRSendPrefetchResponse(), again called
69  * by nvkms-rm.c.
70  *
71  * NVKMS must also temporarily disable DIFR in hardware if it knows about
72  * upcoming updates to the framebuffer and then re-enable DIFR when the
73  * screen becomes idle again. For this, nvFlipEvoOneHead() will call us back
74  * via nvDIFRNotifyFlip() when a new flip is happening. We will call RM to
75  * disable DIFR, then set up a timer into the future and when it triggers we
76  * will re-enable DIFR again. But if nvFlipEvoOneHead() notifies us about
77  * another upcoming frame, we'll just replace the old timer with a new one.
78  * Thus, the timer will eventually wake us after notifications of new frames
79  * cease to come in.
80  *
81  * The DIFR hardware will automatically detect activity in graphics/copy
82  * engines and will not try to enter the low-power mode if there is any. So
83  * this is something NVKMS doesn't have to worry about.
84  *
85  * Userspace can also flag surfaces as non-cacheable which makes us abort
86  * any prefetches if those surfaces are currently displayed on any active
87  * heads. For now, CPU mapped surfaces are flagged as such because neither
88  * NVKMS nor the hardware can observe CPU writes into a surface.
89  */
90 
91 
92 
93 #include "nvkms-difr.h"
94 #include "nvkms-push.h"
95 #include "nvkms-rm.h"
96 #include "nvkms-rmapi.h"
97 #include "nvkms-utils.h"
98 #include "nvkms-evo.h"
99 
100 #include "nvidia-push-init.h"
101 #include "nvidia-push-methods.h"
102 #include "nvidia-push-types.h"
103 #include "nvidia-push-types.h"
104 #include "nvidia-push-utils.h"
105 
106 #include <class/cl2080.h>
107 #include <class/cla06f.h>
108 #include <class/cla06fsubch.h>
109 #include <class/cla0b5.h>
110 #include <class/clb0b5sw.h>
111 #include <class/clc7b5.h>
112 #include <ctrl/ctrl2080/ctrl2080ce.h>
113 #include <ctrl/ctrl2080/ctrl2080lpwr.h>
114 
115 #define PREFETCH_DONE_VALUE             0x00000fed
116 
117 /* How long to wait after last flip until re-enabling DIFR. */
118 #define DIFR_IDLE_WAIT_PERIOD_US        500000
119 
120 /* How long to wait for prefetch dma completion. */
121 #define DIFR_PREFETCH_WAIT_PERIOD_US    10000 /* 10ms */
122 
123 /*
124  * DIFR runtime state
125  */
126 typedef struct _NVDIFRStateEvoRec {
127     NVDevEvoPtr pDevEvo;
128     NvU32 copyEngineType;
129 
130     /*
131      * This is kept in sync with whether DIFR is explicitly disabled in
132      * hardware.
133      */
134     NvBool hwDisabled;
135     NvU64 lastFlipTime;
136     nvkms_timer_handle_t *idleTimer;
137 
138     /* Pushbuffer for DIFR prefetches. */
139     NvPushChannelRec prefetchPushChannel;
140     NvU32 pushChannelHandlePool[NV_PUSH_CHANNEL_HANDLE_POOL_NUM];
141 
142     /* Copy engine instance for DIFR prefetches. */
143     NvU32 prefetchEngine;
144 
145     /* For tracking which surfaces have been prefetched already. */
146     NvU32 prefetchPass;
147 } NVDIFRStateEvoRec;
148 
149 /*
150  * Prefetch parameters for DMA copy.
151  */
152 typedef struct {
153     NvUPtr surfGpuAddress;
154     size_t surfSizeBytes;
155     enum NvKmsSurfaceMemoryFormat surfFormat;
156     NvU32 surfPitchBytes;
157 } NVDIFRPrefetchParams;
158 
159 static NvBool AllocDIFRPushChannel(NVDIFRStateEvoPtr pDifr);
160 static void FreeDIFRPushChannel(NVDIFRStateEvoPtr pDifr);
161 static NvBool AllocDIFRCopyEngine(NVDIFRStateEvoPtr pDifr);
162 static void FreeDIFRCopyEngine(NVDIFRStateEvoPtr pDifr);
163 
164 static NvU32 PrefetchSingleSurface(NVDIFRStateEvoPtr pDifr,
165                                    NVDIFRPrefetchParams *pParams,
166                                    size_t *remainingCache);
167 static NvBool PrefetchHelperSurfaceEvo(NVDIFRStateEvoPtr pDifr,
168                                        size_t *cacheRemaining,
169                                        NVSurfaceEvoPtr pSurfaceEvo,
170                                        NvU32 *status);
171 static NvBool PrefetchHelperLutSurface(NVDIFRStateEvoPtr pDifr,
172                                        size_t *cacheRemaining,
173                                        NVLutSurfaceEvoPtr pLutSurface,
174                                        NvU32 *status);
175 
176 static NvBool SetDisabledState(NVDIFRStateEvoPtr pDifr,
177                                NvBool shouldDisable);
178 static NvBool IsCECompatibleWithDIFR(NVDevEvoPtr pDevEvo,
179                                      NvU32 instance);
180 static void EnsureIdleTimer(NVDIFRStateEvoPtr pDifr);
181 static void IdleTimerProc(void *dataPtr, NvU32 dataU32);
182 
183 /*
184  * Public entry points.
185  */
186 
nvDIFRAllocate(NVDevEvoPtr pDevEvo)187 NVDIFRStateEvoPtr nvDIFRAllocate(NVDevEvoPtr pDevEvo)
188 {
189     NV2080_CTRL_CMD_LPWR_DIFR_CTRL_PARAMS params = { 0 };
190     NVDIFRStateEvoPtr pDifr;
191     NvU32 ret;
192 
193     /* DIFR not supported/implemented on RM SLI */
194     if (pDevEvo->numSubDevices > 1) {
195         return NULL;
196     }
197 
198     params.ctrlParamVal = NV2080_CTRL_LPWR_DIFR_CTRL_SUPPORT_STATUS;
199     ret = nvRmApiControl(nvEvoGlobal.clientHandle,
200                          pDevEvo->pSubDevices[0]->handle,
201                          NV2080_CTRL_CMD_LPWR_DIFR_CTRL,
202                          &params,
203                          sizeof(params));
204 
205     if (ret != NV_OK) {
206         nvEvoLogDev(pDevEvo,
207                     EVO_LOG_WARN,
208                     "unable to query whether display caching is supported");
209         return NULL;
210     }
211 
212     if (params.ctrlParamVal != NV2080_CTRL_LPWR_DIFR_SUPPORTED) {
213         return NULL;
214     }
215 
216     pDifr = nvCalloc(sizeof(*pDifr), 1);
217     if (!pDifr) {
218         return NULL;
219     }
220 
221     pDifr->pDevEvo = pDevEvo;
222 
223     if (!AllocDIFRPushChannel(pDifr) ||
224         !AllocDIFRCopyEngine(pDifr)) {
225         nvDIFRFree(pDifr);
226 
227         return NULL;
228     }
229 
230     return pDifr;
231 }
232 
nvDIFRFree(NVDIFRStateEvoPtr pDifr)233 void nvDIFRFree(NVDIFRStateEvoPtr pDifr)
234 {
235     nvAssert(pDifr);
236 
237     /* Cancel pending idle timer. */
238     nvkms_free_timer(pDifr->idleTimer);
239 
240     /* Leave DIFR enabled (default state). */
241     SetDisabledState(pDifr, FALSE);
242 
243     /* Free resources. */
244     FreeDIFRCopyEngine(pDifr);
245     FreeDIFRPushChannel(pDifr);
246 
247     nvFree(pDifr);
248 }
249 
250 /*
251  * Notify of a new or upcoming flip. This will disable DIFR for a brief
252  * period in anticipation of further flips.
253  */
nvDIFRNotifyFlip(NVDIFRStateEvoPtr pDifr)254 void nvDIFRNotifyFlip(NVDIFRStateEvoPtr pDifr)
255 {
256     pDifr->lastFlipTime = nvkms_get_usec();
257 
258     /* A flip is coming: signal RM to disable DIFR if we haven't already. */
259     if (SetDisabledState(pDifr, TRUE)) {
260         /* Check back after a while and re-enable if idle again. */
261         EnsureIdleTimer(pDifr);
262     }
263 }
264 
nvDIFRPrefetchSurfaces(NVDIFRStateEvoPtr pDifr,size_t l2CacheSize)265 NvU32 nvDIFRPrefetchSurfaces(NVDIFRStateEvoPtr pDifr, size_t l2CacheSize)
266 {
267     NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
268     NVEvoSubDevPtr pSubDev;
269     NVEvoSubDevHeadStatePtr pHeadState;
270     size_t cacheRemaining = l2CacheSize;
271     NvU32 layer;
272     NvU32 head;
273     NvU32 apiHead;
274     NvU32 eye;
275     NvU32 i;
276     NvU32 status;
277 
278     /*
279      * If the console is active then the scanout surfaces will get updated by
280      * the OS console driver without any knowledge of NVKMS, DIFR should not be
281      * enabled in that case.
282      */
283     if (nvEvoIsConsoleActive(pDevEvo)) {
284         /*
285          * NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE: Despite
286          * what the name suggests this will actually tell RM (and further PMU)
287          * to disable DIFR until the next modeset.
288          */
289         return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE;
290     }
291 
292     /*
293      * If DIFR is disabled it's because we know we were or will be flipping.
294      */
295     if (pDifr->hwDisabled) {
296         return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_OS_FLIPS_ENABLED;
297     }
298 
299     status = NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS;
300 
301     pSubDev = &pDevEvo->gpus[0];
302 
303     /* Get new prefetch pass counter for this iteration. */
304     pDifr->prefetchPass++;
305 
306     /*
307      * Start by prefetching the cursor surface and image surfaces from
308      * present layers.
309      */
310     for (head = 0; head < pDevEvo->numHeads; head++) {
311         pHeadState = &pSubDev->headState[head];
312 
313         if (!PrefetchHelperSurfaceEvo(pDifr,
314                                       &cacheRemaining,
315                                       pHeadState->cursor.pSurfaceEvo,
316                                       &status)) {
317             goto out;
318         }
319 
320         for (layer = 0; layer <= pDevEvo->head[head].numLayers; layer++) {
321             for (eye = 0; eye < NVKMS_MAX_EYES; eye++) {
322 
323                 if (!PrefetchHelperSurfaceEvo(pDifr,
324                                               &cacheRemaining,
325                                               pHeadState->layer[layer].pSurfaceEvo[eye],
326                                               &status)) {
327                     goto out;
328                 }
329             }
330 
331             /*
332              * Prefetch per-layer LUTs, if any, but skip null LUTs and
333              * duplicates already prefetched.
334              */
335             if (!PrefetchHelperLutSurface(pDifr,
336                                           &cacheRemaining,
337                                           pHeadState->layer[layer].inputLut.pLutSurfaceEvo,
338                                           &status)) {
339                 goto out;
340             }
341 
342             if (!PrefetchHelperLutSurface(pDifr,
343                                           &cacheRemaining,
344                                           pHeadState->layer[layer].tmoLut.pLutSurfaceEvo,
345                                           &status)) {
346                 goto out;
347             }
348         }
349     }
350 
351     /*
352      * Finally prefetch the known main LUTs.
353      */
354     if (!PrefetchHelperLutSurface(pDifr,
355                                   &cacheRemaining,
356                                   pDevEvo->lut.defaultLut,
357                                   &status)) {
358         goto out;
359     }
360 
361     for (apiHead = 0; apiHead < pDevEvo->numApiHeads; apiHead++) {
362         for (i = 0; i < ARRAY_LEN(pDevEvo->lut.apiHead[apiHead].LUT); i++) {
363             if (!PrefetchHelperLutSurface(pDifr,
364                                           &cacheRemaining,
365                                           pDevEvo->lut.apiHead[apiHead].LUT[i],
366                                           &status)) {
367                 goto out;
368             }
369         }
370     }
371 
372 out:
373     return status;
374 }
375 
nvDIFRSendPrefetchResponse(NVDIFRStateEvoPtr pDifr,NvU32 responseStatus)376 NvBool nvDIFRSendPrefetchResponse(NVDIFRStateEvoPtr pDifr, NvU32 responseStatus)
377 {
378     NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
379     NV2080_CTRL_CMD_LPWR_DIFR_PREFETCH_RESPONSE_PARAMS params = { 0 };
380 
381     params.responseVal = responseStatus;
382 
383     return (nvRmApiControl(nvEvoGlobal.clientHandle,
384                            pDevEvo->pSubDevices[0]->handle,
385                            NV2080_CTRL_CMD_LPWR_DIFR_PREFETCH_RESPONSE,
386                            &params,
387                            sizeof(params))
388             == NV_OK);
389 }
390 
391 /*
392  * Local helper functions.
393  */
AllocDIFRPushChannel(NVDIFRStateEvoPtr pDifr)394 static NvBool AllocDIFRPushChannel(NVDIFRStateEvoPtr pDifr)
395 {
396     NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
397     NvPushAllocChannelParams params = { 0 };
398     NvU32 i;
399 
400     pDifr->copyEngineType = NV2080_ENGINE_TYPE_NULL;
401 
402     for (i = 0; i < NV2080_ENGINE_TYPE_COPY_SIZE; i++) {
403         if (IsCECompatibleWithDIFR(pDevEvo, i)) {
404             pDifr->copyEngineType = NV2080_ENGINE_TYPE_COPY(i);
405             break;
406         }
407     }
408 
409     if (pDifr->copyEngineType == NV2080_ENGINE_TYPE_NULL) {
410         return FALSE;
411     }
412 
413     params.engineType = pDifr->copyEngineType;
414     params.pDevice = &pDifr->pDevEvo->nvPush.device;
415     params.difrPrefetch = TRUE;
416     params.logNvDiss = FALSE;
417     params.noTimeout = FALSE;
418     params.ignoreChannelErrors = FALSE;
419     params.numNotifiers = 1;
420     params.pushBufferSizeInBytes = 1024;
421 
422     ct_assert(sizeof(params.handlePool) == sizeof(pDifr->pushChannelHandlePool));
423 
424     for (i = 0; i < ARRAY_LEN(pDifr->pushChannelHandlePool); i++) {
425         pDifr->pushChannelHandlePool[i] =
426             nvGenerateUnixRmHandle(&pDevEvo->handleAllocator);
427 
428         params.handlePool[i] = pDifr->pushChannelHandlePool[i];
429     }
430 
431     if (!nvPushAllocChannel(&params, &pDifr->prefetchPushChannel)) {
432         return FALSE;
433     }
434 
435     return TRUE;
436 }
437 
FreeDIFRPushChannel(NVDIFRStateEvoPtr pDifr)438 static void FreeDIFRPushChannel(NVDIFRStateEvoPtr pDifr)
439 {
440     NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
441     NvU32 i;
442 
443     nvPushFreeChannel(&pDifr->prefetchPushChannel);
444 
445     for (i = 0; i < ARRAY_LEN(pDifr->pushChannelHandlePool); i++) {
446         nvFreeUnixRmHandle(&pDevEvo->handleAllocator,
447                            pDifr->pushChannelHandlePool[i]);
448         pDifr->pushChannelHandlePool[i] = 0;
449     }
450 }
451 
AllocDIFRCopyEngine(NVDIFRStateEvoPtr pDifr)452 static NvBool AllocDIFRCopyEngine(NVDIFRStateEvoPtr pDifr)
453 {
454     NVB0B5_ALLOCATION_PARAMETERS allocParams = { 0 };
455     NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
456     NvU32 ret;
457 
458     /*
459      * We will only be called if NV2080_CTRL_CMD_LPWR_DIFR_CTRL says DIFR is
460      * supported in which case we assume the chip supports this CE class.
461      */
462     nvAssert(nvRmEvoClassListCheck(pDevEvo, AMPERE_DMA_COPY_B));
463 
464     pDifr->prefetchEngine = nvGenerateUnixRmHandle(&pDevEvo->handleAllocator);
465     if (pDifr->prefetchEngine == 0) {
466         return NV_FALSE;
467     }
468 
469     allocParams.version = NVB0B5_ALLOCATION_PARAMETERS_VERSION_1;
470     allocParams.engineType = pDifr->copyEngineType;
471 
472     ret = nvRmApiAlloc(nvEvoGlobal.clientHandle,
473                        pDifr->prefetchPushChannel.channelHandle[0],
474                        pDifr->prefetchEngine,
475                        AMPERE_DMA_COPY_B,
476                        &allocParams);
477     if (ret != NVOS_STATUS_SUCCESS) {
478         return NV_FALSE;
479     }
480 
481     return NV_TRUE;
482 }
483 
FreeDIFRCopyEngine(NVDIFRStateEvoPtr pDifr)484 static void FreeDIFRCopyEngine(NVDIFRStateEvoPtr pDifr)
485 {
486     if (pDifr->prefetchEngine != 0) {
487         nvRmApiFree(nvEvoGlobal.clientHandle,
488                     pDifr->pDevEvo->pSubDevices[0]->handle,
489                     pDifr->prefetchEngine);
490     }
491 
492     nvFreeUnixRmHandle(&pDifr->pDevEvo->handleAllocator,
493                        pDifr->prefetchEngine);
494 }
495 
PrefetchSingleSurface(NVDIFRStateEvoPtr pDifr,NVDIFRPrefetchParams * pParams,size_t * cacheRemaining)496 static NvU32 PrefetchSingleSurface(NVDIFRStateEvoPtr pDifr,
497                                    NVDIFRPrefetchParams *pParams,
498                                    size_t *cacheRemaining)
499 {
500     NvPushChannelPtr p = &pDifr->prefetchPushChannel;
501     NvU64 semaphoreGPUAddress = nvPushGetNotifierGpuAddress(p, 0, 0);
502     NvGpuSemaphore *semaphore = (NvGpuSemaphore *)
503         nvPushGetNotifierCpuAddress(p, 0, 0);
504     const NvKmsSurfaceMemoryFormatInfo *finfo =
505         nvKmsGetSurfaceMemoryFormatInfo(pParams->surfFormat);
506     NvU32 componentSizes;
507     NvU32 line_length_in;
508     NvU32 line_count;
509     NvU64 starttime;
510     NvU64 endtime;
511 
512     /*
513      * Tell SET_REMAP_COMPONENTS the byte-size of a pixel in terms of color
514      * component size and count. It doesn't matter which actual combinations we
515      * choose as long as size*count will be equal to bytesPerPixel. This is
516      * because we won't be doing any actual remapping per se: we will just
517      * effectively tell the prefetch operation to fetch the correct amount of
518      * bytes for each pixel.
519      */
520     switch (finfo->rgb.bytesPerPixel) {
521 #define COMPONENTS(size, num)                                           \
522     (DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _COMPONENT_SIZE, size) |      \
523      DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _NUM_SRC_COMPONENTS, num) |   \
524      DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _NUM_DST_COMPONENTS, num))
525 
526     case 1: componentSizes = COMPONENTS(_ONE, _ONE); break;
527     case 2: componentSizes = COMPONENTS(_ONE, _TWO); break;
528     case 3: componentSizes = COMPONENTS(_ONE, _THREE); break;
529     case 4: componentSizes = COMPONENTS(_ONE, _FOUR); break;
530     case 6: componentSizes = COMPONENTS(_TWO, _THREE); break;
531     case 8: componentSizes = COMPONENTS(_TWO, _FOUR); break;
532     case 12: componentSizes = COMPONENTS(_FOUR, _THREE); break;
533     case 16: componentSizes = COMPONENTS(_FOUR, _FOUR); break;
534     default: componentSizes = 0; break;
535 #undef COMPONENTS
536     }
537 
538     /*
539      * TODO: For now, we don't prefetch multiplane surfaces. In order to do so
540      * we'd need to loop over all valid planes of the pSurfaceEvo and issue a
541      * prefetch for each plane.
542      */
543     if (finfo->numPlanes > 1) {
544         /*
545          * Regardless of its wording, this is the proper failure code to send
546          * upstream. This lets the RM disable DIFR until the next modeset.
547          */
548         return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE;
549     }
550 
551     /*
552      * Compute some dimensional values to obtain correct blob size for
553      * prefetching. Use the given pitch and calculate the number of lines
554      * needed to cover the whole memory region.
555      */
556     nvAssert(pParams->surfPitchBytes % finfo->rgb.bytesPerPixel == 0);
557     line_length_in = pParams->surfPitchBytes / finfo->rgb.bytesPerPixel;
558 
559     nvAssert(pParams->surfSizeBytes % pParams->surfPitchBytes == 0);
560     line_count = pParams->surfSizeBytes / pParams->surfPitchBytes;
561 
562     /*
563      * Greedy strategy: assume all surfaces will fit in the supplied L2 size but
564      * the first one that doesn't will cause the prefetch request to fail. If we
565      * run out of cache then DIFR will disable itself until the next modeset.
566      */
567     if (*cacheRemaining < pParams->surfSizeBytes) {
568         return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE;
569     }
570 
571     *cacheRemaining -= pParams->surfSizeBytes;
572 
573     /*
574      * Push buffer DMA copy and semaphore programming.
575      */
576     nvPushSetObject(p, NVA06F_SUBCHANNEL_COPY_ENGINE, &pDifr->prefetchEngine);
577     nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE,
578                  NVA0B5_SET_REMAP_COMPONENTS, 1);
579     nvPushSetMethodData(p,
580                         componentSizes |
581                         DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_X, _CONST_A) |
582                         DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_Y, _CONST_A) |
583                         DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_Z, _CONST_A) |
584                         DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_W, _CONST_A));
585     nvPushImmedVal(p, NVA06F_SUBCHANNEL_COPY_ENGINE,
586                    NVA0B5_SET_REMAP_CONST_A, 0);
587     nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_OFFSET_IN_UPPER, 2);
588     nvPushSetMethodDataU64(p, pParams->surfGpuAddress);
589     nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_OFFSET_OUT_UPPER, 2);
590     nvPushSetMethodDataU64(p, pParams->surfGpuAddress);
591 
592     /*
593      * We don't expect phenomally large pitches but the .mfs for DMA copy
594      * defines PitchIn/PitchOut to be of signed 32-bit type for all
595      * architectures so assert that the value will be what h/w understands.
596      */
597     nvAssert(pParams->surfPitchBytes <= NV_S32_MAX);
598 
599     nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_PITCH_IN, 1);
600     nvPushSetMethodData(p, pParams->surfPitchBytes);
601     nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_PITCH_OUT, 1);
602     nvPushSetMethodData(p, pParams->surfPitchBytes);
603 
604     nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LINE_LENGTH_IN, 1);
605     nvPushSetMethodData(p, line_length_in);
606     nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LINE_COUNT, 1);
607     nvPushSetMethodData(p, line_count);
608     nvAssert(pParams->surfPitchBytes * line_count == pParams->surfSizeBytes);
609 
610     nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LAUNCH_DMA, 1);
611     nvPushSetMethodData
612         (p,
613          DRF_DEF(A0B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _PIPELINED) |
614          DRF_DEF(A0B5, _LAUNCH_DMA, _FLUSH_ENABLE,       _TRUE)      |
615          DRF_DEF(A0B5, _LAUNCH_DMA, _SEMAPHORE_TYPE,     _NONE)      |
616          DRF_DEF(A0B5, _LAUNCH_DMA, _INTERRUPT_TYPE,     _NONE)      |
617          DRF_DEF(A0B5, _LAUNCH_DMA, _REMAP_ENABLE,       _TRUE)      |
618          DRF_DEF(A0B5, _LAUNCH_DMA, _SRC_MEMORY_LAYOUT,  _PITCH)     |
619          DRF_DEF(A0B5, _LAUNCH_DMA, _DST_MEMORY_LAYOUT,  _PITCH)     |
620          DRF_DEF(A0B5, _LAUNCH_DMA, _MULTI_LINE_ENABLE,  _TRUE)      |
621          DRF_DEF(A0B5, _LAUNCH_DMA, _SRC_TYPE,           _VIRTUAL)   |
622          DRF_DEF(A0B5, _LAUNCH_DMA, _DST_TYPE,           _VIRTUAL));
623 
624     /*
625      * Reset semaphore value. A memory barrier will be issued by nvidia-push so
626      * we don't need one here.
627      */
628     semaphore->data[0] = 0;
629 
630     /* Program a semaphore release after prefetch DMA copy. */
631     nvPushMethod(p, 0, NVA06F_SEMAPHOREA, 4);
632     nvPushSetMethodDataU64(p, semaphoreGPUAddress);
633     nvPushSetMethodData(p, PREFETCH_DONE_VALUE);
634     nvPushSetMethodData(p,
635                         DRF_DEF(A06F, _SEMAPHORED, _OPERATION, _RELEASE) |
636                         DRF_DEF(A06F, _SEMAPHORED, _RELEASE_WFI, _EN) |
637                         DRF_DEF(A06F, _SEMAPHORED, _RELEASE_SIZE, _4BYTE));
638     nvPushKickoff(p);
639 
640     /*
641      * Errors and prefetch faults are handled as follows. If prefetch
642      * succeeds the semaphore release will trigger and we will exit upon
643      * seeing PREFETCH_DONE_VALUE in the memory location. Upon failure we
644      * will end up timing out, signal RM of the CE fault and DIFR will
645      * remain disabled until next driver load.
646      *
647      * Currently the total launch-to-end effective (with scheduling)
648      * prefetch rate on silicon seems to be around 15k pixels per
649      * microsecond, empirically. Thus, the time will range from a couple of
650      * hundred microseconds for a very small panel to slightly less than 2
651      * milliseconds for a single 4k display. We'll wait for 100us at a time
652      * and expect a realistic completion within few milliseconds at most.
653      */
654     starttime = nvkms_get_usec();
655     do {
656         endtime = nvkms_get_usec();
657 
658         if (semaphore->data[0] == PREFETCH_DONE_VALUE) {
659             return NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS;
660         }
661 
662         nvkms_usleep(100);
663     } while (endtime - starttime < DIFR_PREFETCH_WAIT_PERIOD_US); /* 10ms */
664 
665     return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_CE_HW_ERROR;
666 }
667 
PrefetchHelperSurfaceEvo(NVDIFRStateEvoPtr pDifr,size_t * cacheRemaining,NVSurfaceEvoPtr pSurfaceEvo,NvU32 * status)668 static NvBool PrefetchHelperSurfaceEvo(NVDIFRStateEvoPtr pDifr,
669                                        size_t *cacheRemaining,
670                                        NVSurfaceEvoPtr pSurfaceEvo,
671                                        NvU32 *status)
672 {
673     NVDIFRPrefetchParams params;
674 
675     nvAssert(*status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS);
676 
677     if (!pSurfaceEvo) {
678         return TRUE;
679     }
680 
681     if (pSurfaceEvo->noDisplayCaching) {
682         *status = NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_OS_FLIPS_ENABLED;
683         return FALSE;
684     }
685 
686     /*
687      * If we see the same SurfaceEvo twice (UBB, multi-head X screens, etc)
688      * we only ever want to prefetch it once within a single
689      * nvDIFRPrefetchSurfaces() call.
690      */
691     if (pSurfaceEvo->difrLastPrefetchPass == pDifr->prefetchPass) {
692         return TRUE;
693     }
694 
695     /*
696      * Update pass counter even if we fail later: we want to try each
697      * surface only once.
698      */
699     pSurfaceEvo->difrLastPrefetchPass = pDifr->prefetchPass;
700 
701     /* Collect copy parameters and do the prefetch. */
702     params.surfGpuAddress = pSurfaceEvo->gpuAddress;
703     params.surfSizeBytes = pSurfaceEvo->planes[0].rmObjectSizeInBytes;
704     params.surfPitchBytes = pSurfaceEvo->planes[0].pitch;
705     params.surfFormat = pSurfaceEvo->format;
706 
707     if (pSurfaceEvo->layout == NvKmsSurfaceMemoryLayoutBlockLinear) {
708         params.surfPitchBytes *= NVKMS_BLOCK_LINEAR_GOB_WIDTH;
709     }
710 
711     *status = PrefetchSingleSurface(pDifr, &params, cacheRemaining);
712 
713     return *status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS;
714 }
715 
PrefetchHelperLutSurface(NVDIFRStateEvoPtr pDifr,size_t * cacheRemaining,NVLutSurfaceEvoPtr pLutSurface,NvU32 * status)716 static NvBool PrefetchHelperLutSurface(NVDIFRStateEvoPtr pDifr,
717                                        size_t *cacheRemaining,
718                                        NVLutSurfaceEvoPtr pLutSurface,
719                                        NvU32 *status)
720 {
721     NVDIFRPrefetchParams params;
722 
723     nvAssert(*status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS);
724 
725     if (!pLutSurface) {
726         return TRUE;
727     }
728 
729     /*
730      * LUTs are often shared so we only want to prefetch (or consider) each
731      * LUT at most once during the prefetch process.
732      */
733     if (pLutSurface->difrLastPrefetchPass == pDifr->prefetchPass) {
734         return TRUE;
735     }
736 
737     pLutSurface->difrLastPrefetchPass = pDifr->prefetchPass;
738 
739     /* Collect copy parameters and do the prefetch. */
740     params.surfGpuAddress = (NvUPtr)pLutSurface->gpuAddress;
741     params.surfSizeBytes = pLutSurface->size;
742     params.surfPitchBytes = pLutSurface->size;
743     params.surfFormat = NvKmsSurfaceMemoryFormatI8;
744 
745     *status = PrefetchSingleSurface(pDifr, &params, cacheRemaining);
746 
747     return *status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS;
748 }
749 
750 /*
751  * Set DIFR disabled state in H/W. Return true if state was changed and it
752  * was successfully signalled downstream.
753  */
SetDisabledState(NVDIFRStateEvoPtr pDifr,NvBool shouldDisable)754 static NvBool SetDisabledState(NVDIFRStateEvoPtr pDifr,
755                                NvBool shouldDisable)
756 {
757     NVDevEvoPtr pDevEvo = pDifr->pDevEvo;
758     NV2080_CTRL_CMD_LPWR_DIFR_CTRL_PARAMS params = { 0 };
759     NvU32 ret;
760 
761     if (shouldDisable == pDifr->hwDisabled) {
762         return TRUE;
763     }
764 
765     params.ctrlParamVal = shouldDisable
766         ? NV2080_CTRL_LPWR_DIFR_CTRL_DISABLE
767         : NV2080_CTRL_LPWR_DIFR_CTRL_ENABLE;
768 
769     ret = nvRmApiControl(nvEvoGlobal.clientHandle,
770                          pDevEvo->pSubDevices[0]->handle,
771                          NV2080_CTRL_CMD_LPWR_DIFR_CTRL,
772                          &params,
773                          sizeof(params));
774 
775     if (ret != NV_OK) {
776         return FALSE;
777     }
778 
779     pDifr->hwDisabled = shouldDisable;
780 
781     return TRUE;
782 }
783 
IsCECompatibleWithDIFR(NVDevEvoPtr pDevEvo,NvU32 instance)784 static NvBool IsCECompatibleWithDIFR(NVDevEvoPtr pDevEvo, NvU32 instance)
785 {
786     NV2080_CTRL_CE_GET_CAPS_V2_PARAMS params;
787     NvU32 ret;
788 
789     nvkms_memset(&params, 0, sizeof(params));
790     params.ceEngineType = NV2080_ENGINE_TYPE_COPY(instance);
791 
792     ret = nvRmApiControl(nvEvoGlobal.clientHandle,
793                          pDevEvo->pSubDevices[0]->handle,
794                          NV2080_CTRL_CMD_CE_GET_CAPS_V2,
795                          &params,
796                          sizeof(params));
797 
798     if (ret != NVOS_STATUS_SUCCESS) {
799         return FALSE;
800     }
801 
802     ct_assert(sizeof(params.capsTbl) == NV2080_CTRL_CE_CAPS_TBL_SIZE);
803 
804     /* Current criteria: DIFR prefetches can't use graphics CEs. */
805     if (NV2080_CTRL_CE_GET_CAP(params.capsTbl, NV2080_CTRL_CE_CAPS_CE_GRCE)) {
806         return FALSE;
807     }
808 
809     return TRUE;
810 }
811 
812 /*
813  * Make sure we have a pending idle timer to check back on idleness.
814  */
EnsureIdleTimer(NVDIFRStateEvoPtr pDifr)815 static void EnsureIdleTimer(NVDIFRStateEvoPtr pDifr)
816 {
817     if (!pDifr->idleTimer) {
818         /* Wait 100x longer in emulation. */
819         NvU64 idlePeriod =
820             DIFR_IDLE_WAIT_PERIOD_US *
821             (nvIsEmulationEvo(pDifr->pDevEvo) ? 100 : 1);
822 
823         pDifr->idleTimer =
824             nvkms_alloc_timer(IdleTimerProc, pDifr, 0, idlePeriod);
825     }
826 }
827 
828 /*
829  * An idle timer should always remain pending after a flip until further
830  * flips cease and DIFR can be re-enabled.
831  *
832  * Currently we'll try to re-enable DIFR after a constant period of idleness
833  * since the last flip but this could resonate badly with a client that's
834  * rendering at the same pace.
835  *
836  * To avoid churn we could track the time DIFR actually did remain enabled.
837  * If the enabled-period is relatively short against the disabled-period, we
838  * should bump the timeout to re-enable so that we won't be retrying all the
839  * time. Conversely, we should reset the bumped timeout after we actually
840  * managed to sleep long enough with DIFR enabled.
841  *
842  * Note: There's the question of whether we should apply slight hysteresis
843  * within NVKMS regarding enabling/disabling DIFR. The hardware itself does
844  * some churn-limiting and practical observations show that it seems to work
845  * sufficiently and I've not observed rapid, repeating prefetch requests.
846  * Keeping this note here in case this matter needs to be revisited later.
847  */
IdleTimerProc(void * dataPtr,NvU32 dataU32)848 static void IdleTimerProc(void *dataPtr, NvU32 dataU32)
849 {
850     NVDIFRStateEvoPtr pDifr = (NVDIFRStateEvoPtr)dataPtr;
851     NvU64 now = nvkms_get_usec();
852     NvU64 idlePeriod =
853         DIFR_IDLE_WAIT_PERIOD_US *
854         (nvIsEmulationEvo(pDifr->pDevEvo) ? 100 : 1);
855 
856     /* First free the timer that triggered us. */
857     nvkms_free_timer(pDifr->idleTimer);
858     pDifr->idleTimer = NULL;
859 
860     if (now - pDifr->lastFlipTime >= idlePeriod) {
861         /*
862          * Enough time has passed with no new flips, enable DIFR if the console
863          * is not active. If the console is active then the scanout surfaces
864          * will get updated by the OS console driver without any knowledge of
865          * NVKMS, DIFR can not be enabled in that case; the idle timer will get
866          * scheduled by nvDIFRNotifyFlip() on next modeset/flip, till then DIFR
867          * will remain disabled.
868          */
869         if (!nvEvoIsConsoleActive(pDifr->pDevEvo)) {
870             SetDisabledState(pDifr, FALSE);
871         }
872     } else {
873         /* New flips have happened since the original, reset idle timer. */
874         EnsureIdleTimer(pDifr);
875     }
876 }
877