1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /* 25 * DIFR stands for Display Idle Frame Refresh which is a low-power feature 26 * for display that allows scanning out frames from L2 cache. The actual GPU 27 * memory can be gated off while the display outputs are served off the 28 * cache. 29 * 30 * DIFR is defined in three operational layers 1, 2, and 3 and operates in 31 * terms of entering and exiting these layers in order. 32 * 33 * Layer 1 has to deem it's possible to enter DIFR until layer 2 and 3 can 34 * start considering. Any layer seeing conditions that prevent entering DIFR 35 * mode can abort the attempt to enter. But, finally, if all layers agree 36 * the hardware will switch to low-power mode, turn off GPU memory, and 37 * start serving pixels off the cache. 38 * 39 * Managing some high-level state to help the hardware transition from one 40 * layer to another is implemented in NVKMS and RM. Simplified, NVKMS 41 * handles assistance for layer 1 and RM for layer 2. 42 * 43 * Much of the layer 1 or NVKMS DIFR specific code is collected into this 44 * file, centered around an object called NVDIFRStateEvo. 45 * 46 * The role of NVKMS is to listen for DIFR prefetch events (which originate 47 * from h/w and get dispatched by RM), prefetch framebuffer pixels into L2 48 * cache, and report back to h/w (via RM). NVKMS will also disable DIFR each 49 * time there's an explicitly known display update (such as a flip) and 50 * re-enable it once enough idle time has passed. 51 * 52 * The rest of NVKMS will call entrypoints in this file to inform the DIFR 53 * implementation here about changes in relevant state. 54 * 55 * For each DevEvo object nvkms-evo.c will call 56 * nvDIFRAllocate()/nvDIFRFree() here to also create a corresponding DIFR 57 * state object. The DIFR state will contain everything needed to implement 58 * prefetching such as channel and copy engine allocation. 59 * 60 * If DIFR state was successfully allocated, nvkms-rm.c will create an event 61 * listener for DIFR prefetch events which will call back to 62 * nvDIFRPrefetchSurfaces() here in order to do prefetching. This means 63 * going through each active head and issuing a special CE copy, for all 64 * layers of the surface, to populate the L2 cache with framebuffer pixel 65 * data. 66 * 67 * After all prefetches are complete, RM needs to know about the completion 68 * status. This is implemented in nvDIFRSendPrefetchResponse(), again called 69 * by nvkms-rm.c. 70 * 71 * NVKMS must also temporarily disable DIFR in hardware if it knows about 72 * upcoming updates to the framebuffer and then re-enable DIFR when the 73 * screen becomes idle again. For this, nvFlipEvoOneHead() will call us back 74 * via nvDIFRNotifyFlip() when a new flip is happening. We will call RM to 75 * disable DIFR, then set up a timer into the future and when it triggers we 76 * will re-enable DIFR again. But if nvFlipEvoOneHead() notifies us about 77 * another upcoming frame, we'll just replace the old timer with a new one. 78 * Thus, the timer will eventually wake us after notifications of new frames 79 * cease to come in. 80 * 81 * The DIFR hardware will automatically detect activity in graphics/copy 82 * engines and will not try to enter the low-power mode if there is any. So 83 * this is something NVKMS doesn't have to worry about. 84 * 85 * Userspace can also flag surfaces as non-cacheable which makes us abort 86 * any prefetches if those surfaces are currently displayed on any active 87 * heads. For now, CPU mapped surfaces are flagged as such because neither 88 * NVKMS nor the hardware can observe CPU writes into a surface. 89 */ 90 91 92 93 #include "nvkms-difr.h" 94 #include "nvkms-push.h" 95 #include "nvkms-rm.h" 96 #include "nvkms-rmapi.h" 97 #include "nvkms-utils.h" 98 99 #include "nvidia-push-init.h" 100 #include "nvidia-push-methods.h" 101 #include "nvidia-push-types.h" 102 #include "nvidia-push-types.h" 103 #include "nvidia-push-utils.h" 104 105 #include <class/cl2080.h> 106 #include <class/cla06f.h> 107 #include <class/cla06fsubch.h> 108 #include <class/cla0b5.h> 109 #include <class/clb0b5sw.h> 110 #include <class/clc7b5.h> 111 #include <ctrl/ctrl2080/ctrl2080ce.h> 112 #include <ctrl/ctrl2080/ctrl2080lpwr.h> 113 114 #define PREFETCH_DONE_VALUE 0x00000fed 115 116 /* How long to wait after last flip until re-enabling DIFR. */ 117 #define DIFR_IDLE_WAIT_PERIOD_US 500000 118 119 /* How long to wait for prefetch dma completion. */ 120 #define DIFR_PREFETCH_WAIT_PERIOD_US 10000 /* 10ms */ 121 122 /* 123 * DIFR runtime state 124 */ 125 typedef struct _NVDIFRStateEvoRec { 126 NVDevEvoPtr pDevEvo; 127 NvU32 copyEngineType; 128 129 /* 130 * This is kept in sync with whether DIFR is explicitly disabled in 131 * hardware. 132 */ 133 NvBool hwDisabled; 134 NvU64 lastFlipTime; 135 nvkms_timer_handle_t *idleTimer; 136 137 /* Pushbuffer for DIFR prefetches. */ 138 NvPushChannelRec prefetchPushChannel; 139 NvU32 pushChannelHandlePool[NV_PUSH_CHANNEL_HANDLE_POOL_NUM]; 140 141 /* Copy engine instance for DIFR prefetches. */ 142 NvU32 prefetchEngine; 143 144 /* For tracking which surfaces have been prefetched already. */ 145 NvU32 prefetchPass; 146 } NVDIFRStateEvoRec; 147 148 /* 149 * Prefetch parameters for DMA copy. 150 */ 151 typedef struct { 152 NvUPtr surfGpuAddress; 153 size_t surfSizeBytes; 154 enum NvKmsSurfaceMemoryFormat surfFormat; 155 NvU32 surfPitchBytes; 156 } NVDIFRPrefetchParams; 157 158 static NvBool AllocDIFRPushChannel(NVDIFRStateEvoPtr pDifr); 159 static void FreeDIFRPushChannel(NVDIFRStateEvoPtr pDifr); 160 static NvBool AllocDIFRCopyEngine(NVDIFRStateEvoPtr pDifr); 161 static void FreeDIFRCopyEngine(NVDIFRStateEvoPtr pDifr); 162 163 static NvU32 PrefetchSingleSurface(NVDIFRStateEvoPtr pDifr, 164 NVDIFRPrefetchParams *pParams, 165 size_t *remainingCache); 166 static NvBool PrefetchHelperSurfaceEvo(NVDIFRStateEvoPtr pDifr, 167 size_t *cacheRemaining, 168 NVSurfaceEvoPtr pSurfaceEvo, 169 NvU32 *status); 170 static NvBool PrefetchHelperLutSurface(NVDIFRStateEvoPtr pDifr, 171 size_t *cacheRemaining, 172 NVLutSurfaceEvoPtr pLutSurface, 173 NvU32 *status); 174 175 static NvBool SetDisabledState(NVDIFRStateEvoPtr pDifr, 176 NvBool shouldDisable); 177 static NvBool IsCECompatibleWithDIFR(NVDevEvoPtr pDevEvo, 178 NvU32 instance); 179 static void EnsureIdleTimer(NVDIFRStateEvoPtr pDifr); 180 static void IdleTimerProc(void *dataPtr, NvU32 dataU32); 181 182 /* 183 * Public entry points. 184 */ 185 186 NVDIFRStateEvoPtr nvDIFRAllocate(NVDevEvoPtr pDevEvo) 187 { 188 NV2080_CTRL_CMD_LPWR_DIFR_CTRL_PARAMS params = { 0 }; 189 NVDIFRStateEvoPtr pDifr; 190 NvU32 ret; 191 192 /* DIFR not supported/implemented on RM SLI */ 193 if (pDevEvo->numSubDevices > 1) { 194 return NULL; 195 } 196 197 params.ctrlParamVal = NV2080_CTRL_LPWR_DIFR_CTRL_SUPPORT_STATUS; 198 ret = nvRmApiControl(nvEvoGlobal.clientHandle, 199 pDevEvo->pSubDevices[0]->handle, 200 NV2080_CTRL_CMD_LPWR_DIFR_CTRL, 201 ¶ms, 202 sizeof(params)); 203 204 if (ret != NV_OK) { 205 nvEvoLogDev(pDevEvo, 206 EVO_LOG_WARN, 207 "unable to query whether display caching is supported"); 208 return NULL; 209 } 210 211 if (params.ctrlParamVal != NV2080_CTRL_LPWR_DIFR_SUPPORTED) { 212 return NULL; 213 } 214 215 pDifr = nvCalloc(sizeof(*pDifr), 1); 216 if (!pDifr) { 217 return NULL; 218 } 219 220 pDifr->pDevEvo = pDevEvo; 221 222 if (!AllocDIFRPushChannel(pDifr) || 223 !AllocDIFRCopyEngine(pDifr)) { 224 nvDIFRFree(pDifr); 225 226 return NULL; 227 } 228 229 return pDifr; 230 } 231 232 void nvDIFRFree(NVDIFRStateEvoPtr pDifr) 233 { 234 nvAssert(pDifr); 235 236 /* Cancel pending idle timer. */ 237 nvkms_free_timer(pDifr->idleTimer); 238 239 /* Leave DIFR enabled (default state). */ 240 SetDisabledState(pDifr, FALSE); 241 242 /* Free resources. */ 243 FreeDIFRCopyEngine(pDifr); 244 FreeDIFRPushChannel(pDifr); 245 246 nvFree(pDifr); 247 } 248 249 /* 250 * Notify of a new or upcoming flip. This will disable DIFR for a brief 251 * period in anticipation of further flips. 252 */ 253 void nvDIFRNotifyFlip(NVDIFRStateEvoPtr pDifr) 254 { 255 pDifr->lastFlipTime = nvkms_get_usec(); 256 257 /* A flip is coming: signal RM to disable DIFR if we haven't already. */ 258 if (SetDisabledState(pDifr, TRUE)) { 259 /* Check back after a while and re-enable if idle again. */ 260 EnsureIdleTimer(pDifr); 261 } 262 } 263 264 NvU32 nvDIFRPrefetchSurfaces(NVDIFRStateEvoPtr pDifr, size_t l2CacheSize) 265 { 266 NVDevEvoPtr pDevEvo = pDifr->pDevEvo; 267 NVEvoSubDevPtr pSubDev; 268 NVEvoSubDevHeadStatePtr pHeadState; 269 size_t cacheRemaining = l2CacheSize; 270 NvU32 layer; 271 NvU32 head; 272 NvU32 apiHead; 273 NvU32 eye; 274 NvU32 i; 275 NvU32 status; 276 277 /* 278 * If DIFR is disabled it's because we know we were or will be flipping, or 279 * if console is active then the scanout surfaces will get updated by the 280 * OS console driver without any knowledge of NVKMS. 281 */ 282 if (pDifr->hwDisabled || nvEvoIsConsoleActive(pDevEvo)) { 283 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_OS_FLIPS_ENABLED; 284 } 285 286 status = NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS; 287 288 pSubDev = &pDevEvo->gpus[0]; 289 290 /* Get new prefetch pass counter for this iteration. */ 291 pDifr->prefetchPass++; 292 293 /* 294 * Start by prefetching the cursor surface and image surfaces from 295 * present layers. 296 */ 297 for (head = 0; head < pDevEvo->numHeads; head++) { 298 pHeadState = &pSubDev->headState[head]; 299 300 if (!PrefetchHelperSurfaceEvo(pDifr, 301 &cacheRemaining, 302 pHeadState->cursor.pSurfaceEvo, 303 &status)) { 304 goto out; 305 } 306 307 for (layer = 0; layer <= pDevEvo->head[head].numLayers; layer++) { 308 for (eye = 0; eye < NVKMS_MAX_EYES; eye++) { 309 310 if (!PrefetchHelperSurfaceEvo(pDifr, 311 &cacheRemaining, 312 pHeadState->layer[layer].pSurfaceEvo[eye], 313 &status)) { 314 goto out; 315 } 316 } 317 318 /* 319 * Prefetch per-layer LUTs, if any, but skip null LUTs and 320 * duplicates already prefetched. 321 */ 322 if (!PrefetchHelperLutSurface(pDifr, 323 &cacheRemaining, 324 pHeadState->layer[layer].inputLut.pLutSurfaceEvo, 325 &status)) { 326 goto out; 327 } 328 329 if (!PrefetchHelperLutSurface(pDifr, 330 &cacheRemaining, 331 pHeadState->layer[layer].tmoLut.pLutSurfaceEvo, 332 &status)) { 333 goto out; 334 } 335 } 336 } 337 338 /* 339 * Finally prefetch the known main LUTs. 340 */ 341 if (!PrefetchHelperLutSurface(pDifr, 342 &cacheRemaining, 343 pDevEvo->lut.defaultLut, 344 &status)) { 345 goto out; 346 } 347 348 for (apiHead = 0; apiHead < pDevEvo->numApiHeads; apiHead++) { 349 for (i = 0; i < ARRAY_LEN(pDevEvo->lut.apiHead[apiHead].LUT); i++) { 350 if (!PrefetchHelperLutSurface(pDifr, 351 &cacheRemaining, 352 pDevEvo->lut.apiHead[apiHead].LUT[i], 353 &status)) { 354 goto out; 355 } 356 } 357 } 358 359 out: 360 return status; 361 } 362 363 NvBool nvDIFRSendPrefetchResponse(NVDIFRStateEvoPtr pDifr, NvU32 responseStatus) 364 { 365 NVDevEvoPtr pDevEvo = pDifr->pDevEvo; 366 NV2080_CTRL_CMD_LPWR_DIFR_PREFETCH_RESPONSE_PARAMS params = { 0 }; 367 368 params.responseVal = responseStatus; 369 370 return (nvRmApiControl(nvEvoGlobal.clientHandle, 371 pDevEvo->pSubDevices[0]->handle, 372 NV2080_CTRL_CMD_LPWR_DIFR_PREFETCH_RESPONSE, 373 ¶ms, 374 sizeof(params)) 375 == NV_OK); 376 } 377 378 /* 379 * Local helper functions. 380 */ 381 static NvBool AllocDIFRPushChannel(NVDIFRStateEvoPtr pDifr) 382 { 383 NVDevEvoPtr pDevEvo = pDifr->pDevEvo; 384 NvPushAllocChannelParams params = { 0 }; 385 NvU32 i; 386 387 pDifr->copyEngineType = NV2080_ENGINE_TYPE_NULL; 388 389 for (i = 0; i < NV2080_ENGINE_TYPE_COPY_SIZE; i++) { 390 if (IsCECompatibleWithDIFR(pDevEvo, i)) { 391 pDifr->copyEngineType = NV2080_ENGINE_TYPE_COPY(i); 392 break; 393 } 394 } 395 396 if (pDifr->copyEngineType == NV2080_ENGINE_TYPE_NULL) { 397 return FALSE; 398 } 399 400 params.engineType = pDifr->copyEngineType; 401 params.pDevice = &pDifr->pDevEvo->nvPush.device; 402 params.difrPrefetch = TRUE; 403 params.logNvDiss = FALSE; 404 params.noTimeout = FALSE; 405 params.ignoreChannelErrors = FALSE; 406 params.numNotifiers = 1; 407 params.pushBufferSizeInBytes = 1024; 408 409 ct_assert(sizeof(params.handlePool) == sizeof(pDifr->pushChannelHandlePool)); 410 411 for (i = 0; i < ARRAY_LEN(pDifr->pushChannelHandlePool); i++) { 412 pDifr->pushChannelHandlePool[i] = 413 nvGenerateUnixRmHandle(&pDevEvo->handleAllocator); 414 415 params.handlePool[i] = pDifr->pushChannelHandlePool[i]; 416 } 417 418 if (!nvPushAllocChannel(¶ms, &pDifr->prefetchPushChannel)) { 419 return FALSE; 420 } 421 422 return TRUE; 423 } 424 425 static void FreeDIFRPushChannel(NVDIFRStateEvoPtr pDifr) 426 { 427 NVDevEvoPtr pDevEvo = pDifr->pDevEvo; 428 NvU32 i; 429 430 nvPushFreeChannel(&pDifr->prefetchPushChannel); 431 432 for (i = 0; i < ARRAY_LEN(pDifr->pushChannelHandlePool); i++) { 433 nvFreeUnixRmHandle(&pDevEvo->handleAllocator, 434 pDifr->pushChannelHandlePool[i]); 435 pDifr->pushChannelHandlePool[i] = 0; 436 } 437 } 438 439 static NvBool AllocDIFRCopyEngine(NVDIFRStateEvoPtr pDifr) 440 { 441 NVB0B5_ALLOCATION_PARAMETERS allocParams = { 0 }; 442 NVDevEvoPtr pDevEvo = pDifr->pDevEvo; 443 NvU32 ret; 444 445 /* 446 * We will only be called if NV2080_CTRL_CMD_LPWR_DIFR_CTRL says DIFR is 447 * supported in which case we assume the chip supports this CE class. 448 */ 449 nvAssert(nvRmEvoClassListCheck(pDevEvo, AMPERE_DMA_COPY_B)); 450 451 pDifr->prefetchEngine = nvGenerateUnixRmHandle(&pDevEvo->handleAllocator); 452 if (pDifr->prefetchEngine == 0) { 453 return NV_FALSE; 454 } 455 456 allocParams.version = NVB0B5_ALLOCATION_PARAMETERS_VERSION_1; 457 allocParams.engineType = pDifr->copyEngineType; 458 459 ret = nvRmApiAlloc(nvEvoGlobal.clientHandle, 460 pDifr->prefetchPushChannel.channelHandle[0], 461 pDifr->prefetchEngine, 462 AMPERE_DMA_COPY_B, 463 &allocParams); 464 if (ret != NVOS_STATUS_SUCCESS) { 465 return NV_FALSE; 466 } 467 468 return NV_TRUE; 469 } 470 471 static void FreeDIFRCopyEngine(NVDIFRStateEvoPtr pDifr) 472 { 473 if (pDifr->prefetchEngine != 0) { 474 nvRmApiFree(nvEvoGlobal.clientHandle, 475 pDifr->pDevEvo->pSubDevices[0]->handle, 476 pDifr->prefetchEngine); 477 } 478 479 nvFreeUnixRmHandle(&pDifr->pDevEvo->handleAllocator, 480 pDifr->prefetchEngine); 481 } 482 483 static NvU32 PrefetchSingleSurface(NVDIFRStateEvoPtr pDifr, 484 NVDIFRPrefetchParams *pParams, 485 size_t *cacheRemaining) 486 { 487 NvPushChannelPtr p = &pDifr->prefetchPushChannel; 488 NvU64 semaphoreGPUAddress = nvPushGetNotifierGpuAddress(p, 0, 0); 489 NvGpuSemaphore *semaphore = (NvGpuSemaphore *) 490 nvPushGetNotifierCpuAddress(p, 0, 0); 491 const NvKmsSurfaceMemoryFormatInfo *finfo = 492 nvKmsGetSurfaceMemoryFormatInfo(pParams->surfFormat); 493 NvU32 componentSizes; 494 NvU32 line_length_in; 495 NvU32 line_count; 496 NvU64 starttime; 497 NvU64 endtime; 498 499 /* 500 * Tell SET_REMAP_COMPONENTS the byte-size of a pixel in terms of color 501 * component size and count. It doesn't matter which actual combinations we 502 * choose as long as size*count will be equal to bytesPerPixel. This is 503 * because we won't be doing any actual remapping per se: we will just 504 * effectively tell the prefetch operation to fetch the correct amount of 505 * bytes for each pixel. 506 */ 507 switch (finfo->rgb.bytesPerPixel) { 508 #define COMPONENTS(size, num) \ 509 (DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _COMPONENT_SIZE, size) | \ 510 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _NUM_SRC_COMPONENTS, num) | \ 511 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _NUM_DST_COMPONENTS, num)) 512 513 case 1: componentSizes = COMPONENTS(_ONE, _ONE); break; 514 case 2: componentSizes = COMPONENTS(_ONE, _TWO); break; 515 case 3: componentSizes = COMPONENTS(_ONE, _THREE); break; 516 case 4: componentSizes = COMPONENTS(_ONE, _FOUR); break; 517 case 6: componentSizes = COMPONENTS(_TWO, _THREE); break; 518 case 8: componentSizes = COMPONENTS(_TWO, _FOUR); break; 519 case 12: componentSizes = COMPONENTS(_FOUR, _THREE); break; 520 case 16: componentSizes = COMPONENTS(_FOUR, _FOUR); break; 521 default: componentSizes = 0; break; 522 #undef COMPONENTS 523 } 524 525 /* 526 * TODO: For now, we don't prefetch multiplane surfaces. In order to do so 527 * we'd need to loop over all valid planes of the pSurfaceEvo and issue a 528 * prefetch for each plane. 529 */ 530 if (finfo->numPlanes > 1) { 531 /* 532 * Regardless of its wording, this is the proper failure code to send 533 * upstream. This lets the RM disable DIFR until the next modeset. 534 */ 535 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE; 536 } 537 538 /* 539 * Compute some dimensional values to obtain correct blob size for 540 * prefetching. Use the given pitch and calculate the number of lines 541 * needed to cover the whole memory region. 542 */ 543 nvAssert(pParams->surfPitchBytes % finfo->rgb.bytesPerPixel == 0); 544 line_length_in = pParams->surfPitchBytes / finfo->rgb.bytesPerPixel; 545 546 nvAssert(pParams->surfSizeBytes % pParams->surfPitchBytes == 0); 547 line_count = pParams->surfSizeBytes / pParams->surfPitchBytes; 548 549 /* 550 * Greedy strategy: assume all surfaces will fit in the supplied L2 size but 551 * the first one that doesn't will cause the prefetch request to fail. If we 552 * run out of cache then DIFR will disable itself until the next modeset. 553 */ 554 if (*cacheRemaining < pParams->surfSizeBytes) { 555 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_INSUFFICIENT_L2_SIZE; 556 } 557 558 *cacheRemaining -= pParams->surfSizeBytes; 559 560 /* 561 * Push buffer DMA copy and semaphore programming. 562 */ 563 nvPushSetObject(p, NVA06F_SUBCHANNEL_COPY_ENGINE, &pDifr->prefetchEngine); 564 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, 565 NVA0B5_SET_REMAP_COMPONENTS, 1); 566 nvPushSetMethodData(p, 567 componentSizes | 568 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_X, _CONST_A) | 569 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_Y, _CONST_A) | 570 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_Z, _CONST_A) | 571 DRF_DEF(A0B5, _SET_REMAP_COMPONENTS, _DST_W, _CONST_A)); 572 nvPushImmedVal(p, NVA06F_SUBCHANNEL_COPY_ENGINE, 573 NVA0B5_SET_REMAP_CONST_A, 0); 574 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_OFFSET_IN_UPPER, 2); 575 nvPushSetMethodDataU64(p, pParams->surfGpuAddress); 576 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_OFFSET_OUT_UPPER, 2); 577 nvPushSetMethodDataU64(p, pParams->surfGpuAddress); 578 579 /* 580 * We don't expect phenomally large pitches but the .mfs for DMA copy 581 * defines PitchIn/PitchOut to be of signed 32-bit type for all 582 * architectures so assert that the value will be what h/w understands. 583 */ 584 nvAssert(pParams->surfPitchBytes <= NV_S32_MAX); 585 586 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_PITCH_IN, 1); 587 nvPushSetMethodData(p, pParams->surfPitchBytes); 588 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_PITCH_OUT, 1); 589 nvPushSetMethodData(p, pParams->surfPitchBytes); 590 591 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LINE_LENGTH_IN, 1); 592 nvPushSetMethodData(p, line_length_in); 593 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LINE_COUNT, 1); 594 nvPushSetMethodData(p, line_count); 595 nvAssert(pParams->surfPitchBytes * line_count == pParams->surfSizeBytes); 596 597 nvPushMethod(p, NVA06F_SUBCHANNEL_COPY_ENGINE, NVA0B5_LAUNCH_DMA, 1); 598 nvPushSetMethodData 599 (p, 600 DRF_DEF(A0B5, _LAUNCH_DMA, _DATA_TRANSFER_TYPE, _PIPELINED) | 601 DRF_DEF(A0B5, _LAUNCH_DMA, _FLUSH_ENABLE, _TRUE) | 602 DRF_DEF(A0B5, _LAUNCH_DMA, _SEMAPHORE_TYPE, _NONE) | 603 DRF_DEF(A0B5, _LAUNCH_DMA, _INTERRUPT_TYPE, _NONE) | 604 DRF_DEF(A0B5, _LAUNCH_DMA, _REMAP_ENABLE, _TRUE) | 605 DRF_DEF(A0B5, _LAUNCH_DMA, _SRC_MEMORY_LAYOUT, _PITCH) | 606 DRF_DEF(A0B5, _LAUNCH_DMA, _DST_MEMORY_LAYOUT, _PITCH) | 607 DRF_DEF(A0B5, _LAUNCH_DMA, _MULTI_LINE_ENABLE, _TRUE) | 608 DRF_DEF(A0B5, _LAUNCH_DMA, _SRC_TYPE, _VIRTUAL) | 609 DRF_DEF(A0B5, _LAUNCH_DMA, _DST_TYPE, _VIRTUAL)); 610 611 /* 612 * Reset semaphore value. A memory barrier will be issued by nvidia-push so 613 * we don't need one here. 614 */ 615 semaphore->data[0] = 0; 616 617 /* Program a semaphore release after prefetch DMA copy. */ 618 nvPushMethod(p, 0, NVA06F_SEMAPHOREA, 4); 619 nvPushSetMethodDataU64(p, semaphoreGPUAddress); 620 nvPushSetMethodData(p, PREFETCH_DONE_VALUE); 621 nvPushSetMethodData(p, 622 DRF_DEF(A06F, _SEMAPHORED, _OPERATION, _RELEASE) | 623 DRF_DEF(A06F, _SEMAPHORED, _RELEASE_WFI, _EN) | 624 DRF_DEF(A06F, _SEMAPHORED, _RELEASE_SIZE, _4BYTE)); 625 nvPushKickoff(p); 626 627 /* 628 * Errors and prefetch faults are handled as follows. If prefetch 629 * succeeds the semaphore release will trigger and we will exit upon 630 * seeing PREFETCH_DONE_VALUE in the memory location. Upon failure we 631 * will end up timing out, signal RM of the CE fault and DIFR will 632 * remain disabled until next driver load. 633 * 634 * Currently the total launch-to-end effective (with scheduling) 635 * prefetch rate on silicon seems to be around 15k pixels per 636 * microsecond, empirically. Thus, the time will range from a couple of 637 * hundred microseconds for a very small panel to slightly less than 2 638 * milliseconds for a single 4k display. We'll wait for 100us at a time 639 * and expect a realistic completion within few milliseconds at most. 640 */ 641 starttime = nvkms_get_usec(); 642 do { 643 endtime = nvkms_get_usec(); 644 645 if (semaphore->data[0] == PREFETCH_DONE_VALUE) { 646 return NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS; 647 } 648 649 nvkms_usleep(100); 650 } while (endtime - starttime < DIFR_PREFETCH_WAIT_PERIOD_US); /* 10ms */ 651 652 return NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_CE_HW_ERROR; 653 } 654 655 static NvBool PrefetchHelperSurfaceEvo(NVDIFRStateEvoPtr pDifr, 656 size_t *cacheRemaining, 657 NVSurfaceEvoPtr pSurfaceEvo, 658 NvU32 *status) 659 { 660 NVDIFRPrefetchParams params; 661 662 nvAssert(*status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS); 663 664 if (!pSurfaceEvo) { 665 return TRUE; 666 } 667 668 if (pSurfaceEvo->noDisplayCaching) { 669 *status = NV2080_CTRL_LPWR_DIFR_PREFETCH_FAIL_OS_FLIPS_ENABLED; 670 return FALSE; 671 } 672 673 /* 674 * If we see the same SurfaceEvo twice (UBB, multi-head X screens, etc) 675 * we only ever want to prefetch it once within a single 676 * nvDIFRPrefetchSurfaces() call. 677 */ 678 if (pSurfaceEvo->difrLastPrefetchPass == pDifr->prefetchPass) { 679 return TRUE; 680 } 681 682 /* 683 * Update pass counter even if we fail later: we want to try each 684 * surface only once. 685 */ 686 pSurfaceEvo->difrLastPrefetchPass = pDifr->prefetchPass; 687 688 /* Collect copy parameters and do the prefetch. */ 689 params.surfGpuAddress = pSurfaceEvo->gpuAddress; 690 params.surfSizeBytes = pSurfaceEvo->planes[0].rmObjectSizeInBytes; 691 params.surfPitchBytes = pSurfaceEvo->planes[0].pitch; 692 params.surfFormat = pSurfaceEvo->format; 693 694 if (pSurfaceEvo->layout == NvKmsSurfaceMemoryLayoutBlockLinear) { 695 params.surfPitchBytes *= NVKMS_BLOCK_LINEAR_GOB_WIDTH; 696 } 697 698 *status = PrefetchSingleSurface(pDifr, ¶ms, cacheRemaining); 699 700 return *status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS; 701 } 702 703 static NvBool PrefetchHelperLutSurface(NVDIFRStateEvoPtr pDifr, 704 size_t *cacheRemaining, 705 NVLutSurfaceEvoPtr pLutSurface, 706 NvU32 *status) 707 { 708 NVDIFRPrefetchParams params; 709 710 nvAssert(*status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS); 711 712 if (!pLutSurface) { 713 return TRUE; 714 } 715 716 /* 717 * LUTs are often shared so we only want to prefetch (or consider) each 718 * LUT at most once during the prefetch process. 719 */ 720 if (pLutSurface->difrLastPrefetchPass == pDifr->prefetchPass) { 721 return TRUE; 722 } 723 724 pLutSurface->difrLastPrefetchPass = pDifr->prefetchPass; 725 726 /* Collect copy parameters and do the prefetch. */ 727 params.surfGpuAddress = (NvUPtr)pLutSurface->gpuAddress; 728 params.surfSizeBytes = pLutSurface->size; 729 params.surfPitchBytes = pLutSurface->size; 730 params.surfFormat = NvKmsSurfaceMemoryFormatI8; 731 732 *status = PrefetchSingleSurface(pDifr, ¶ms, cacheRemaining); 733 734 return *status == NV2080_CTRL_LPWR_DIFR_PREFETCH_SUCCESS; 735 } 736 737 /* 738 * Set DIFR disabled state in H/W. Return true if state was changed and it 739 * was successfully signalled downstream. 740 */ 741 static NvBool SetDisabledState(NVDIFRStateEvoPtr pDifr, 742 NvBool shouldDisable) 743 { 744 NVDevEvoPtr pDevEvo = pDifr->pDevEvo; 745 NV2080_CTRL_CMD_LPWR_DIFR_CTRL_PARAMS params = { 0 }; 746 NvU32 ret; 747 748 if (shouldDisable == pDifr->hwDisabled) { 749 return TRUE; 750 } 751 752 params.ctrlParamVal = shouldDisable 753 ? NV2080_CTRL_LPWR_DIFR_CTRL_DISABLE 754 : NV2080_CTRL_LPWR_DIFR_CTRL_ENABLE; 755 756 ret = nvRmApiControl(nvEvoGlobal.clientHandle, 757 pDevEvo->pSubDevices[0]->handle, 758 NV2080_CTRL_CMD_LPWR_DIFR_CTRL, 759 ¶ms, 760 sizeof(params)); 761 762 if (ret != NV_OK) { 763 return FALSE; 764 } 765 766 pDifr->hwDisabled = shouldDisable; 767 768 return TRUE; 769 } 770 771 static NvBool IsCECompatibleWithDIFR(NVDevEvoPtr pDevEvo, NvU32 instance) 772 { 773 NV2080_CTRL_CE_GET_CAPS_V2_PARAMS params; 774 NvU32 ret; 775 776 nvkms_memset(¶ms, 0, sizeof(params)); 777 params.ceEngineType = NV2080_ENGINE_TYPE_COPY(instance); 778 779 ret = nvRmApiControl(nvEvoGlobal.clientHandle, 780 pDevEvo->pSubDevices[0]->handle, 781 NV2080_CTRL_CMD_CE_GET_CAPS_V2, 782 ¶ms, 783 sizeof(params)); 784 785 if (ret != NVOS_STATUS_SUCCESS) { 786 return FALSE; 787 } 788 789 ct_assert(sizeof(params.capsTbl) == NV2080_CTRL_CE_CAPS_TBL_SIZE); 790 791 /* Current criteria: DIFR prefetches can't use graphics CEs. */ 792 if (NV2080_CTRL_CE_GET_CAP(params.capsTbl, NV2080_CTRL_CE_CAPS_CE_GRCE)) { 793 return FALSE; 794 } 795 796 return TRUE; 797 } 798 799 /* 800 * Make sure we have a pending idle timer to check back on idleness. 801 */ 802 static void EnsureIdleTimer(NVDIFRStateEvoPtr pDifr) 803 { 804 if (!pDifr->idleTimer) { 805 /* Wait 100x longer in emulation. */ 806 NvU64 idlePeriod = 807 DIFR_IDLE_WAIT_PERIOD_US * 808 (nvIsEmulationEvo(pDifr->pDevEvo) ? 100 : 1); 809 810 pDifr->idleTimer = 811 nvkms_alloc_timer(IdleTimerProc, pDifr, 0, idlePeriod); 812 } 813 } 814 815 /* 816 * An idle timer should always remain pending after a flip until further 817 * flips cease and DIFR can be re-enabled. 818 * 819 * Currently we'll try to re-enable DIFR after a constant period of idleness 820 * since the last flip but this could resonate badly with a client that's 821 * rendering at the same pace. 822 * 823 * To avoid churn we could track the time DIFR actually did remain enabled. 824 * If the enabled-period is relatively short against the disabled-period, we 825 * should bump the timeout to re-enable so that we won't be retrying all the 826 * time. Conversely, we should reset the bumped timeout after we actually 827 * managed to sleep long enough with DIFR enabled. 828 * 829 * Note: There's the question of whether we should apply slight hysteresis 830 * within NVKMS regarding enabling/disabling DIFR. The hardware itself does 831 * some churn-limiting and practical observations show that it seems to work 832 * sufficiently and I've not observed rapid, repeating prefetch requests. 833 * Keeping this note here in case this matter needs to be revisited later. 834 */ 835 static void IdleTimerProc(void *dataPtr, NvU32 dataU32) 836 { 837 NVDIFRStateEvoPtr pDifr = (NVDIFRStateEvoPtr)dataPtr; 838 NvU64 now = nvkms_get_usec(); 839 NvU64 idlePeriod = 840 DIFR_IDLE_WAIT_PERIOD_US * 841 (nvIsEmulationEvo(pDifr->pDevEvo) ? 100 : 1); 842 843 /* First free the timer that triggered us. */ 844 nvkms_free_timer(pDifr->idleTimer); 845 pDifr->idleTimer = NULL; 846 847 if (now - pDifr->lastFlipTime >= idlePeriod) { 848 /* Enough time has passed with no new flips, enable DIFR. */ 849 SetDisabledState(pDifr, FALSE); 850 } else { 851 /* New flips have happened since the original, reset idle timer. */ 852 EnsureIdleTimer(pDifr); 853 } 854 } 855