1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <rmconfig.h>
25 
26 /*
27  * NOTE:
28  * The driver disables dynamic power management by effectively setting
29  * nv_dynamic_power_t::mode to NV_DYNAMIC_PM_NEVER at two places:
30  * 1) During module probe, when the _PRx methods are not supported
31  * by the SBIOS or when the kernel does not support runtime PM.
32  * 2) During RM Init, when the GPU is not Turing+ or when the system is not
33  * a laptop or when the VBIOS does not support RTD3/GC6.
34  *
35  * Thus, after RMInit, if the nv_dynamic_power_t::mode is still
36  * NV_DYNAMIC_PM_FINE, then we know for sure that the fine grained power
37  * control is active and running. In this case, the runtime D3 status is
38  * shown as "Enabled (fine-grained)".
39  *
40  * If the nv_dynamic_power_t::mode is NV_DYNAMIC_PM_NEVER, we have to
41  * distinguish between four cases:
42  * 1) The driver disabled dynamic power management due to lack of kernel/SBIOS
43  * support.
44  * 2) The driver disabled dynamic power management because the GPU does not
45  * support it or the system is not a laptop.
46  * 3) The user has explicitly disabled the feature.
47  * 4) Configuration is not supported by default.
48  *
49  * In order to differentiate between these four cases,
50  * we use the value of regkey NVreg_DynamicPowerManagement. If this value is
51  * set to 0, then we know that the user has explicitly disabled the
52  * feature. In this case we show the runtime D3 status as "Disabled". This
53  * handles case (3) above.
54  *
55  * For case (4), we look at the combination of nv_dynamic_power_t::mode and
56  * NVreg_DynamicPowerManagement value. If mode is never and regkey value is
57  * default, then status is shown as "Disabled by default".
58  *
59  * For remaining cases, (case (1) and (2)) we show status as "Not Supported".
60  *
61  * When the regkey is set to 0 and the driver disables the feature due to lack
62  * of some support, then precedence will be given to regkey value. Thus, when
63  * both "Not supported" and "Disabled" are possible values, precedence will be
64  * given to "Disabled" over "Not Supported".
65  *
66  * For coarse-grain power control, we show runtime D3 status as
67  * "Enabled (coarse-grained)".
68  *
69  * The runtime D3 status is shown as "?" when RM has not been initialized.
70  * This is because we do not have full knowledge regarding driver triggered
71  * disablement until RM is initialized.
72  */
73 
74 #include <nvlog_inc.h>
75 #include <nv.h>
76 #include <nv-priv.h>
77 #include <nv-reg.h>
78 #include <nv_ref.h>
79 
80 #include <osapi.h>
81 
82 #include <gpu/mem_mgr/mem_mgr.h>
83 #include <gpu/kern_gpu_power.h>
84 #include <core/locks.h>
85 #include "kernel/gpu/intr/intr.h"
86 
87 #include <gpu/mem_sys/kern_mem_sys.h>
88 #include <gpu/subdevice/subdevice.h>
89 #include <ctrl/ctrl2080/ctrl2080unix.h>
90 #include <objtmr.h>
91 
92 //
93 // Schedule timer based callback, to check for the complete GPU Idleness.
94 // Windows has idle time from 70msec to 10sec, we opted for present duration
95 // considering windows limit. Duration is not much aggressive or slow, hence
96 // less thrashing.
97 //
98 #define GC6_PRECONDITION_CHECK_TIME    ((NvU64)5 * 1000 * 1000 * 1000)
99 
100 //
101 // Timeout needed for back to back GC6 cycles.
102 // Timeout is kept same as the timeout selected for GC6 precondition check.
103 // There are cases where GPU is in GC6 and then kernel wakes GPU out of GC6
104 // as part of say accessing pci tree through lspci and then again ask driver
105 // to put GPU in GC6 state after access to device info is done.
106 // Below are the reasons for having timeout limit for back to back GC6 cycle.
107 // 1> Less chip life cycle.
108 // 2> P-state is not reached to GC6 supported P-state, in some cycles.
109 // P-state case returns error to kernel, resulting in corrupted sysfs entry
110 // and then kernel never calls driver to put device in low power state.
111 //
112 #define GC6_CYCLE_IDLE_HOLDOFF_CHECK_TIME    GC6_PRECONDITION_CHECK_TIME
113 
114 //
115 // Once GPU is found to be idle, driver will schedule another callback of
116 // smaller duration. Driver needs to be sure that methods that are present
117 // in host pipeline are flushed to the respective engines and engines become
118 // idle upon consumption.
119 //
120 #define GC6_BAR1_BLOCKER_CHECK_AND_METHOD_FLUSH_TIME (200 * 1000 * 1000)
121 
122 //
123 // Cap Maximum FB allocation size for GCOFF. If regkey value is greater
124 // than this value then it will be capped to this value.
125 //
126 #define GCOFF_DYNAMIC_PM_MAX_FB_SIZE_MB      1024
127 
128 static void RmScheduleCallbackForIdlePreConditions(OBJGPU *);
129 static void RmScheduleCallbackForIdlePreConditionsUnderGpuLock(OBJGPU *);
130 static void RmScheduleCallbackToIndicateIdle(OBJGPU *);
131 static NvBool RmCheckForGcxSupportOnCurrentState(OBJGPU *);
132 static void RmScheduleCallbackToRemoveIdleHoldoff(OBJGPU *);
133 static void RmQueueIdleSustainedWorkitem(OBJGPU *);
134 
135 /*!
136  * @brief Wrapper that checks lock order for the dynamic power mutex.  Locking
137  * order dictates that the GPU lock must not be taken before taking the dynamic
138  * power mutex.
139  *
140  * The GPU lock must not be held when calling this function.
141  *
142  * @param[in]   nvp     nv_priv_t pointer.
143  */
144 static void acquireDynamicPowerMutex(nv_priv_t *nvp)
145 {
146     /*
147      * Note that this may be called before nvp->pGpu has been initialized, for
148      * example at the very beginning of nv_start_device.
149      */
150     NV_ASSERT_CHECKED(!nvp->pGpu || !rmDeviceGpuLockIsOwner(nvp->pGpu->gpuInstance));
151 
152     portSyncMutexAcquire(nvp->dynamic_power.mutex);
153 }
154 
155 /*!
156  * @brief Wrapper to release the mutex, just for consistency with
157  * acquireDynamicPowerMutex() above.
158  *
159  * @param[in]   nvp     nv_priv_t pointer.
160  */
161 static void releaseDynamicPowerMutex(nv_priv_t *nvp)
162 {
163     portSyncMutexRelease(nvp->dynamic_power.mutex);
164 }
165 
166 /*!
167  * @brief: Helper function to get a string for the given state.
168  *
169  * @param[in]   state   Dynamic power state.
170  *
171  * @return      String for the given state.
172  */
173 const char *nv_dynamic_power_state_string(
174     nv_dynamic_power_state_t state
175 )
176 {
177     switch (state)
178     {
179 #define DPCASE(_case) \
180     case NV_DYNAMIC_POWER_STATE_ ## _case: return # _case;
181 
182     DPCASE(IN_USE);
183     DPCASE(IDLE_INSTANT);
184     DPCASE(IDLE_SUSTAINED);
185     DPCASE(IDLE_INDICATED);
186     DPCASE(UNKNOWN);
187 #undef DPCASE
188     }
189     NV_ASSERT(0);
190     return "UNEXPECTED";
191 }
192 
193 /*!
194  * @brief: Helper function to transition the GPU represented by the 'nv'
195  * pointer from the power state 'old_state' to the power state 'new_state'.
196  *
197  * An atomic compare-and-swap is used; the function returns true if the
198  * operation was successful.  If the function returns false, the state is
199  * unchanged.
200  *
201  * @param[in]   nv          nv_state_t pointer.
202  * @param[in]   old_state   Previous (current) state.
203  * @param[in]   new_state   New (requested) state.
204  *
205  * @return      TRUE if the operation was successful; FALSE otherwise.
206  */
207 NvBool nv_dynamic_power_state_transition(
208     nv_state_t *nv,
209     nv_dynamic_power_state_t old_state,
210     nv_dynamic_power_state_t new_state
211 )
212 {
213     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
214 
215     NV_ASSERT(old_state != new_state);
216 
217     ct_assert(sizeof(nv_dynamic_power_state_t) == sizeof(NvS32));
218     NvBool ret = portAtomicCompareAndSwapS32((NvS32*)&nvp->dynamic_power.state,
219                                              new_state, old_state);
220 
221     if (ret)
222     {
223         NV_PRINTF(LEVEL_INFO, "%s: state transition %s -> %s\n",
224                   __FUNCTION__,
225                   nv_dynamic_power_state_string(old_state),
226                   nv_dynamic_power_state_string(new_state));
227     }
228     else
229     {
230         NV_PRINTF(LEVEL_INFO, "%s: FAILED state transition %s -> %s\n",
231                   __FUNCTION__,
232                   nv_dynamic_power_state_string(old_state),
233                   nv_dynamic_power_state_string(new_state));
234     }
235 
236     return ret;
237 
238 }
239 
240 /*!
241  * @brief: Check if the GPU hardware appears to be idle.
242  *
243  * Assumes the GPU lock is held.
244  *
245  * @param[in]   pGpu    OBJGPU pointer.
246  *
247  * @return      TRUE if the GPU appears to be currently idle; FALSE otherwise.
248  */
249 static NvBool RmCanEnterGcxUnderGpuLock(
250     OBJGPU *pGpu
251 )
252 {
253     NV_ASSERT(rmDeviceGpuLockIsOwner(pGpu->gpuInstance));
254 
255     /*
256      * If GPU does not support GC6 and the actual FB utilization is higher than the threshold,
257      * then the GPU can neither enter GC6 nor GCOFF. So, return from here.
258      */
259     if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_RTD3_GC6_SUPPORTED))
260     {
261         NvU64          usedFbSize     = 0;
262         nv_state_t    *nv             = NV_GET_NV_STATE(pGpu);
263         nv_priv_t     *nvp            = NV_GET_NV_PRIV(nv);
264         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
265 
266         if (!((memmgrGetUsedRamSize(pGpu, pMemoryManager, &usedFbSize) == NV_OK) &&
267             (usedFbSize <= nvp->dynamic_power.gcoff_max_fb_size)))
268         {
269             return NV_FALSE;
270         }
271     }
272 
273     // Check the instantaneous engine level idleness.
274     return RmCheckForGcxSupportOnCurrentState(pGpu);
275 }
276 
277 /*!
278  * @brief: Check the feasibility of GPU engaging in a GCx cycle.
279  *
280  * Takes the GPU lock.
281  *
282  * @param[in]   pGpu    OBJGPU pointer.
283  *
284  * @return      TRUE if the GPU appears to be currently idle; FALSE otherwise.
285  */
286 static NvBool RmCanEnterGcx(
287     OBJGPU *pGpu
288 )
289 {
290     NvBool      result = NV_FALSE;
291     NV_STATUS   status;
292     GPU_MASK    gpuMask;
293 
294     // LOCK: acquire per device lock
295     status = rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
296                                    GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER,
297                                    &gpuMask);
298     if (status == NV_OK)
299     {
300         result = RmCanEnterGcxUnderGpuLock(pGpu);
301         // UNLOCK: release per device lock
302         rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
303     }
304 
305     return result;
306 }
307 
308 /*!
309  * @brief: Force the GPU to be "not idle", if it had previously been indicated
310  * idle.  This is triggered by GPU usage (i.e., writing through a revoked
311  * mapping), not tracked through the refcount maintained through
312  * os_{ref,unref}_dynamic_power.  So, if we'd previously indicated the GPU was
313  * idle, we transition to the IDLE_INSTANT state.
314  *
315  * @param[in]   gpuInstance     GPU instance ID.
316  * @param[in]   pArgs           Unused callback closure.
317  */
318 static void RmForceGpuNotIdle(
319     NvU32 gpuInstance,
320     void *pArgs
321 )
322 {
323     OBJGPU *pGpu = gpumgrGetGpu(gpuInstance);
324     nv_state_t *nv = NV_GET_NV_STATE(pGpu);
325     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
326     nv_dynamic_power_state_t old_state;
327     NvBool ret;
328 
329     acquireDynamicPowerMutex(nvp);
330 
331     old_state = nvp->dynamic_power.state;
332 
333     switch (old_state)
334     {
335     case NV_DYNAMIC_POWER_STATE_IDLE_INDICATED:
336         nv_indicate_not_idle(nv);
337         NV_ASSERT(nvp->dynamic_power.deferred_idle_enabled);
338         RmScheduleCallbackForIdlePreConditions(pGpu);
339         /* fallthrough */
340     case NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED:
341         ret = nv_dynamic_power_state_transition(nv, old_state, NV_DYNAMIC_POWER_STATE_IDLE_INSTANT);
342         NV_ASSERT(ret);
343         break;
344     case NV_DYNAMIC_POWER_STATE_IDLE_INSTANT:
345     case NV_DYNAMIC_POWER_STATE_IN_USE:
346         break;
347     case NV_DYNAMIC_POWER_STATE_UNKNOWN:
348         NV_ASSERT(0);
349         break;
350     }
351 
352     /*
353      * Now that the GPU is guaranteed to not be powered off, we can resume
354      * servicing mmap requests as usual.
355      */
356     nv_acquire_mmap_lock(nv);
357     nv_set_safe_to_mmap_locked(nv, NV_TRUE);
358     nv_release_mmap_lock(nv);
359 
360     releaseDynamicPowerMutex(nvp);
361 }
362 
363 /*!
364  * @brief: Work item to actually indicate the GPU is idle.  This rechecks the
365  * preconditions one last time and verifies nothing has attempted to write
366  * through any mappings which were revoked.
367  *
368  * @param[in]   gpuInstance     GPU instance ID.
369  * @param[in]   pArgs           Unused callback closure.
370  */
371 static void RmIndicateIdle(
372     NvU32 gpuInstance,
373     void *pArgs
374 )
375 {
376     OBJGPU *pGpu = gpumgrGetGpu(gpuInstance);
377     nv_state_t *nv = NV_GET_NV_STATE(pGpu);
378     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
379 
380     acquireDynamicPowerMutex(nvp);
381 
382     nv_acquire_mmap_lock(nv);
383 
384     if (nv_get_all_mappings_revoked_locked(nv) &&
385         nvp->dynamic_power.state == NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED &&
386         RmCanEnterGcx(pGpu))
387     {
388         nv_set_safe_to_mmap_locked(nv, NV_FALSE);
389         nv_dynamic_power_state_transition(nv, NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED,
390                                               NV_DYNAMIC_POWER_STATE_IDLE_INDICATED);
391         nv_indicate_idle(nv);
392     }
393     else
394     {
395         RmScheduleCallbackForIdlePreConditions(pGpu);
396     }
397 
398     nv_release_mmap_lock(nv);
399 
400     releaseDynamicPowerMutex(nvp);
401 }
402 
403 /*!
404  * @brief: Helper function to trigger the GPU to be forced indicated
405  * "non-idle", which powers it on if it had been previously powered off, and
406  * allows mmap processing to continue.
407  *
408  * Triggered when a revoked mapping is accessed.
409  *
410  * @param[in]   sp  nvidia_stack_t pointer.
411  * @param[in]   nv  nv_state_t pointer.
412  */
413 NV_STATUS NV_API_CALL rm_schedule_gpu_wakeup(
414     nvidia_stack_t *sp,
415     nv_state_t *nv
416 )
417 {
418     void *fp;
419     NV_STATUS ret;
420     OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(nv);
421 
422     NV_ENTER_RM_RUNTIME(sp, fp);
423 
424     ret = osQueueWorkItem(pGpu, RmForceGpuNotIdle, NULL);
425 
426     NV_EXIT_RM_RUNTIME(sp, fp);
427 
428     return ret;
429 }
430 
431 /*!
432  * @brief Function to increment/decrement refcount for GPU driving console.
433  *
434  * @params[in]  nv         nv_state_t pointer.
435  * @params[in]  bIncrease  If true, increase refcount else decrease.
436  */
437 void
438 RmUpdateGc6ConsoleRefCount
439 (
440     nv_state_t *nv,
441     NvBool      bIncrease
442 )
443 {
444     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
445 
446     if (nv->console_device == NV_FALSE)
447     {
448         return;
449     }
450 
451     acquireDynamicPowerMutex(nvp);
452 
453     if (bIncrease)
454     {
455         /*
456          * Here refcount value should atleast be 2 due to below:
457          * 1> Refcount increased by NV2080_CTRL_CMD_OS_UNIX_GC6_BLOCKER_REFCNT.
458          * 2> Refcount increased by the Rm entry points for current thread.
459          */
460         if (nvp->dynamic_power.refcount <= 1)
461         {
462             NV_PRINTF(LEVEL_INFO,
463                   "%s: Unexpected dynamic power refcount value\n", __FUNCTION__);
464         }
465         else
466         {
467             nvp->dynamic_power.refcount++;
468         }
469     }
470     else
471     {
472         /*
473          * Here refcount value should atleast be 3 due to below:
474          * 1> Refcount set by RM at the probe time for console device.
475          * 2> Refcount increased by NV2080_CTRL_CMD_OS_UNIX_GC6_BLOCKER_REFCNT.
476          * 3> Refcount increased by the Rm entry points for current thread.
477          */
478         if (nvp->dynamic_power.refcount <= 2)
479         {
480             NV_PRINTF(LEVEL_INFO,
481                   "%s: Unexpected dynamic power refcount value\n", __FUNCTION__);
482         }
483         else
484         {
485             nvp->dynamic_power.refcount--;
486         }
487     }
488 
489     releaseDynamicPowerMutex(nvp);
490 }
491 
492 /*!
493  * @brief Implements the NV2080_CTRL_CMD_OS_UNIX_GC6_BLOCKER_REFCNT RmControl
494  * request.
495  *
496  * @param[in]     pSubdevice
497  * @param[in,out] pParams
498  *
499  * @return
500  *      NV_OK                       Success
501  *      NV_ERR_INVALID_ARGUMENT     Invalid pParams->action
502  */
503 NV_STATUS
504 subdeviceCtrlCmdOsUnixGc6BlockerRefCnt_IMPL
505 (
506     Subdevice *pSubdevice,
507     NV2080_CTRL_OS_UNIX_GC6_BLOCKER_REFCNT_PARAMS *pParams
508 )
509 {
510     OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
511 
512     /* TODO: keep track of inc/dec on a per-client basis, clean up after
513      * clients which are unbalanced. */
514     if (pParams->action == NV2080_CTRL_OS_UNIX_GC6_BLOCKER_REFCNT_INC)
515     {
516         return osRefGpuAccessNeeded(pGpu->pOsGpuInfo);
517     }
518     else if (pParams->action == NV2080_CTRL_OS_UNIX_GC6_BLOCKER_REFCNT_DEC)
519     {
520         osUnrefGpuAccessNeeded(pGpu->pOsGpuInfo);
521     }
522     else
523     {
524         return NV_ERR_INVALID_ARGUMENT;
525     }
526 
527     return NV_OK;
528 }
529 
530 /*!
531  * @brief Implements the NV2080_CTRL_CMD_OS_UNIX_AUDIO_DYNAMIC_POWER RmControl
532  *        request. It will perform dynamic power management for GPU
533  *        HDA contoller.
534  *
535  * @param[in]     pSubdevice
536  * @param[in,out] pParams
537  *
538  * @return
539  *      NV_OK   Success
540  */
541 NV_STATUS
542 subdeviceCtrlCmdOsUnixAudioDynamicPower_IMPL
543 (
544     Subdevice *pSubdevice,
545     NV2080_CTRL_OS_UNIX_AUDIO_DYNAMIC_POWER_PARAMS *pParams
546 )
547 {
548     OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
549 
550     /*
551      * Ideally, we need to honor pParams->bEnter. If it is true, then send the
552      * request for suspending the HDA controller and if is false, then send the
553      * request for waking up the HDA controller. Currently, for unix based OS,
554      * we don't have any proper way to do this and it is implemented with
555      * workaround. This workaround will wake up the HDA controller for few
556      * seconds and then bring it back to suspended state. We are using this
557      * workaround for both the cases (bEnter is true or false). It will help
558      * in keeping HDA controller active for the duration, when GPU is
559      * processing HDA controller commands (like writing ELD data).
560      */
561     nv_audio_dynamic_power(NV_GET_NV_STATE(pGpu));
562 
563     return NV_OK;
564 }
565 
566 /*!
567  * @brief: Function to indicate if Video Memory is powered off or not by
568  * checking if GPU is in GCOFF state.
569  *
570  * @param[in]   sp     nvidia_stack_t pointer.
571  * @param[in]   pNv    nv_state_t pointer.
572  *
573  * @return      String indicating Video Memory power status.
574  */
575 
576 const char* NV_API_CALL rm_get_vidmem_power_status(
577     nvidia_stack_t *sp,
578     nv_state_t     *pNv
579 )
580 {
581     THREAD_STATE_NODE threadState;
582     void              *fp;
583     GPU_MASK          gpuMask;
584     const char        *pVidmemPowerStatus = "?";
585 
586     NV_ENTER_RM_RUNTIME(sp,fp);
587     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
588 
589     // LOCK: acquire API lock
590     if ((rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER)) == NV_OK)
591     {
592         OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(pNv);
593 
594         // LOCK: acquire per device lock
595         if ((pGpu != NULL) &&
596            ((rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
597                                    GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER,
598                                    &gpuMask)) == NV_OK))
599         {
600             if (pGpu->getProperty(pGpu, PDB_PROP_GPU_GCOFF_STATE_ENTERED))
601             {
602                 pVidmemPowerStatus = "Off";
603             }
604             else
605             {
606                 pVidmemPowerStatus = "Active";
607             }
608 
609             // UNLOCK: release per device lock
610             rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
611         }
612 
613         // UNLOCK: release API lock
614         rmapiLockRelease();
615     }
616 
617     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
618     NV_EXIT_RM_RUNTIME(sp,fp);
619 
620     return pVidmemPowerStatus;
621 }
622 
623 /*!
624  * @brief: Function to indicate if GC6/GC-OFF is supported
625  * or not on the SKU.
626  *
627  * @param[in]   sp             nvidia_stack_t pointer.
628  * @param[in]   pNv            nv_state_t pointer.
629  * @param[in]   bGcxTypeGc6    If true, returns string indicating GC6 support
630  *                             otherwise returns GC-OFF support.
631  *
632  * @return      String indicating GC6/GC-OFF support status.
633  */
634 const char* NV_API_CALL rm_get_gpu_gcx_support(
635     nvidia_stack_t *sp,
636     nv_state_t     *pNv,
637     NvBool         bGcxTypeGC6
638 )
639 {
640     THREAD_STATE_NODE threadState;
641     void              *fp;
642     GPU_MASK          gpuMask;
643     const char        *pSupported = "?";
644 
645     NV_ENTER_RM_RUNTIME(sp,fp);
646     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
647 
648     // LOCK: acquire API lock
649     if ((rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER)) == NV_OK)
650     {
651         OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(pNv);
652 
653         // LOCK: acquire per device lock
654         if ((pGpu != NULL) &&
655             ((rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
656                                     GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER,
657                                     &gpuMask)) == NV_OK))
658         {
659             pSupported = "Not Supported";
660 
661             if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_UNIX_DYNAMIC_POWER_SUPPORTED))
662             {
663                 goto done;
664             }
665 
666             if (bGcxTypeGC6)
667             {
668                 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_RTD3_GC6_SUPPORTED))
669                 {
670                     pSupported = "Supported";
671                 }
672             }
673             else
674             {
675                 if ((pGpu->getProperty(pGpu, PDB_PROP_GPU_RTD3_GCOFF_SUPPORTED)) ||
676                     (pGpu->getProperty(pGpu, PDB_PROP_GPU_LEGACY_GCOFF_SUPPORTED)))
677                 {
678                     pSupported = "Supported";
679                 }
680             }
681 
682 done:
683             // UNLOCK: release per device lock
684             rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
685         }
686 
687         //UNLOCK: release API lock
688         rmapiLockRelease();
689     }
690 
691     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
692     NV_EXIT_RM_RUNTIME(sp,fp);
693 
694     return pSupported;
695 }
696 
697 /*!
698  * @brief Function to increment/decrement global Gcoff disallow refcount.
699  *
700  * @params[in]  pOsGpuInfo        OS specific GPU information pointer.
701  * @params[in]  bIsGcoffDisallow  If true, increase refcount else decrease.
702  */
703 void osClientGcoffDisallowRefcount(
704     OS_GPU_INFO *pOsGpuInfo,
705     NvBool       bIsGcoffDisallow
706 )
707 {
708     nv_priv_t    *nvp = NV_GET_NV_PRIV(pOsGpuInfo);
709 
710     if (nvp->dynamic_power.mode == NV_DYNAMIC_PM_NEVER)
711     {
712         return;
713     }
714 
715     acquireDynamicPowerMutex(nvp);
716 
717     if (bIsGcoffDisallow)
718     {
719         nvp->dynamic_power.clients_gcoff_disallow_refcount++;
720     }
721     else
722     {
723         nvp->dynamic_power.clients_gcoff_disallow_refcount--;
724     }
725 
726     releaseDynamicPowerMutex(nvp);
727 }
728 
729 /*!
730  * @brief Implements the NV2080_CTRL_CMD_OS_UNIX_ALLOW_DISALLOW_GCOFF RmControl
731  * request.
732  *
733  * @param[in]     pSubdevice
734  * @param[in,out] pParams
735  *
736  * @return
737  *      NV_OK                           Success
738  *      NV_ERR_INVALID_ARGUMENT         Invalid pParams->action
739  *      NV_ERR_INVALID_OBJECT_HANDLE    Invalid Object handle
740  *      NV_ERR_OBJECT_NOT_FOUND         Object not found
741  */
742 NV_STATUS
743 subdeviceCtrlCmdOsUnixAllowDisallowGcoff_IMPL
744 (
745     Subdevice *pSubdevice,
746     NV2080_CTRL_OS_UNIX_ALLOW_DISALLOW_GCOFF_PARAMS *pParams
747 )
748 {
749     NV_STATUS status;
750     OBJGPU   *pGpu = GPU_RES_GET_GPU(pSubdevice);
751 
752     if (pParams->action == NV2080_CTRL_OS_UNIX_ALLOW_DISALLOW_GCOFF_ALLOW)
753     {
754         if (pSubdevice->bGcoffDisallowed)
755         {
756             pSubdevice->bGcoffDisallowed = NV_FALSE;
757             osClientGcoffDisallowRefcount(pGpu->pOsGpuInfo, NV_FALSE);
758             status = NV_OK;
759        }
760        else
761        {
762             NV_ASSERT(0);
763             status = NV_ERR_INVALID_ARGUMENT;
764        }
765     }
766     else if (pParams->action == NV2080_CTRL_OS_UNIX_ALLOW_DISALLOW_GCOFF_DISALLOW)
767     {
768         if (!pSubdevice->bGcoffDisallowed)
769         {
770             pSubdevice->bGcoffDisallowed = NV_TRUE;
771             osClientGcoffDisallowRefcount(pGpu->pOsGpuInfo, NV_TRUE);
772             status = NV_OK;
773         }
774         else
775         {
776             NV_ASSERT(0);
777             status = NV_ERR_INVALID_ARGUMENT;
778         }
779     }
780     else
781     {
782         status = NV_ERR_INVALID_ARGUMENT;
783     }
784 
785     return status;
786 }
787 
788 /*!
789  * @brief Read and Parse the Dynamic Power Management regkey.
790  * This function modifies the default value of regkey to either enable
791  * or disable RTD3 as per System form factor and GPU architecture.
792  *
793  * @param[in]    pNv             nv_state_t pointer.
794  * @param[out]   pRegkeyValue    Original value of regkey.
795  * @param[out]   pOption         Modified value of regkey.
796  *
797  * @return       NV_STATUS code.
798  */
799 
800 #define NV_PMC_BOOT_42_CHIP_ID_GA102                     0x00000172
801 
802 static NV_STATUS
803 rmReadAndParseDynamicPowerRegkey
804 (
805     nv_state_t *pNv,
806     NvU32      *pRegkeyValue,
807     NvU32      *pOption
808 )
809 {
810     NV_STATUS  status;
811     NvU32      chipId;
812     nv_priv_t *pNvp = NV_GET_NV_PRIV(pNv);
813 
814     status = osReadRegistryDword(NULL, NV_REG_DYNAMIC_POWER_MANAGEMENT, pRegkeyValue);
815 
816     if (status != NV_OK)
817         return status;
818 
819     // If User has set some value, honor that value
820     if (*pRegkeyValue != NV_REG_DYNAMIC_POWER_MANAGEMENT_DEFAULT)
821     {
822         *pOption = *pRegkeyValue;
823         return NV_OK;
824     }
825 
826     chipId = DRF_VAL(_PMC, _BOOT_42, _CHIP_ID, pNvp->pmc_boot_42);
827 
828     // From GA102+, we enable RTD3 only if system is found to be Notebook
829     if ((chipId >= NV_PMC_BOOT_42_CHIP_ID_GA102) &&
830         (rm_is_system_notebook()))
831     {
832         *pOption = NV_REG_DYNAMIC_POWER_MANAGEMENT_FINE;
833         return NV_OK;
834     }
835 
836     *pOption = NV_REG_DYNAMIC_POWER_MANAGEMENT_NEVER;
837     return NV_OK;
838 }
839 #undef NV_PMC_BOOT_42_CHIP_ID_GA102
840 
841 /*!
842  * @brief Initialize state related to dynamic power management.
843  * Called once per GPU during driver initialization.
844  *
845  * @param[in]   sp  nvidia_stack_t pointer.
846  * @param[in]   nv  nv_state_t pointer.
847  */
848 void NV_API_CALL rm_init_dynamic_power_management(
849     nvidia_stack_t *sp,
850     nv_state_t *nv,
851     NvBool bPr3AcpiMethodPresent
852 )
853 {
854     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
855     NvU32 option   = NV_REG_DYNAMIC_POWER_MANAGEMENT_NEVER;
856     NvU32 gcOffMaxFbSizeMb;
857     void *fp;
858     NvBool bUefiConsole;
859     NvU32 status;
860     NvU32 regkeyValue;
861 
862     NV_ENTER_RM_RUNTIME(sp,fp);
863 
864     portMemSet(&nvp->dynamic_power, 0, sizeof(nvp->dynamic_power));
865 
866     /*
867      * Program an impossible value so that we show correct status
868      * during procfs read of runtime D3 status.
869      */
870     nvp->dynamic_power.dynamic_power_regkey = 0xFFFFFFFFu;
871 
872     /*
873      * Dynamic power management will be disabled in either of
874      * following 3 cases:
875      * 1. The driver does not support dynamic PM for this
876      *    distro/kernel version.
877      * 2. _PR3 ACPI method is not supported by upstream port of GPU.
878      * 3. This feature is not enabled by regkey.
879      * Also see NOTE
880      */
881     status = rmReadAndParseDynamicPowerRegkey(nv, &regkeyValue, &option);
882 
883     if (status == NV_OK)
884     {
885         nvp->dynamic_power.dynamic_power_regkey = regkeyValue;
886     }
887 
888     if (!nv_dynamic_power_available(nv) || !bPr3AcpiMethodPresent ||
889         (status != NV_OK))
890     {
891         NV_PRINTF(LEVEL_NOTICE,
892                   "%s: Disabling dynamic power management either due to lack"
893                   " of system support or due to error (%d) in reading regkey."
894                   "\n", __FUNCTION__, status);
895         option = NV_REG_DYNAMIC_POWER_MANAGEMENT_NEVER;
896     }
897 
898     /*
899      * Read the OS registry for Maximum FB size during GCOFF based dynamic PM.
900      * This will be capped to GCOFF_DYNAMIC_PM_MAX_FB_SIZE_MB.
901      */
902     if (osReadRegistryDword(NULL,
903                             NV_REG_DYNAMIC_POWER_MANAGEMENT_VIDEO_MEMORY_THRESHOLD,
904                             &gcOffMaxFbSizeMb) == NV_OK)
905     {
906         if (gcOffMaxFbSizeMb > GCOFF_DYNAMIC_PM_MAX_FB_SIZE_MB)
907             gcOffMaxFbSizeMb = GCOFF_DYNAMIC_PM_MAX_FB_SIZE_MB;
908     }
909     else
910     {
911         gcOffMaxFbSizeMb = GCOFF_DYNAMIC_PM_MAX_FB_SIZE_MB;
912     }
913 
914     nvp->dynamic_power.gcoff_max_fb_size =
915             (NvU64)gcOffMaxFbSizeMb * 1024 * 1024;
916 
917     nvp->dynamic_power.mutex = portSyncMutexCreate(portMemAllocatorGetGlobalNonPaged());
918     if (nvp->dynamic_power.mutex == NULL)
919     {
920         NV_PRINTF(LEVEL_ERROR,
921                   "%s: failed to create power mutex\n", __FUNCTION__);
922         nvp->dynamic_power.mode = NV_DYNAMIC_PM_NEVER;
923         goto done;
924     }
925 
926     switch (option)
927     {
928     case NV_REG_DYNAMIC_POWER_MANAGEMENT_FINE:
929         nvp->dynamic_power.mode = NV_DYNAMIC_PM_FINE;
930         break;
931     case NV_REG_DYNAMIC_POWER_MANAGEMENT_COARSE:
932         nvp->dynamic_power.mode = NV_DYNAMIC_PM_COARSE;
933         break;
934     default:
935         nv_printf(LEVEL_ERROR,
936                   "NVRM: Unknown DynamicPowerManagement value '%u' specified; "
937                   "disabling dynamic power management.\n", option);
938         // fallthrough
939     case NV_REG_DYNAMIC_POWER_MANAGEMENT_NEVER:
940         nvp->dynamic_power.mode = NV_DYNAMIC_PM_NEVER;
941         break;
942     }
943 
944     // Enable RTD3 infrastructure from OS side.
945     if ((nvp->dynamic_power.mode == NV_DYNAMIC_PM_FINE) &&
946         (nvp->dynamic_power.dynamic_power_regkey == NV_REG_DYNAMIC_POWER_MANAGEMENT_DEFAULT))
947     {
948         nv_allow_runtime_suspend(nv);
949     }
950 
951     // Legacy case: check if device is primary and driven by VBIOS or fb driver.
952     nv->primary_vga = NV_FALSE;
953 
954     //
955     // Below function always return NV_OK and depends upon kernel flags
956     // IORESOURCE_ROM_SHADOW & PCI_ROM_RESOURCE for Primary VGA detection.
957     //
958     nv_set_primary_vga_status(nv);
959 
960     // UEFI case: where console is driven by GOP driver.
961     bUefiConsole = rm_get_uefi_console_status(nv);
962 
963     nv->console_device = bUefiConsole || nv->primary_vga;
964 
965     //
966     // Initialize refcount as 1 and state as IN_USE.
967     // rm_enable_dynamic_power_management() will decrease the refcount
968     // and change state to IDLE_INDICATED, if all prerequisites are met.
969     //
970     nvp->dynamic_power.refcount = 1;
971     nv_dynamic_power_state_transition(nv, NV_DYNAMIC_POWER_STATE_UNKNOWN,
972                                       NV_DYNAMIC_POWER_STATE_IN_USE);
973 done:
974     NV_EXIT_RM_RUNTIME(sp,fp);
975 }
976 
977 /*!
978  * @brief Clean up state related to dynamic power management.
979  * Called during driver removal.
980  *
981  * @param[in]   sp  nvidia_stack_t pointer.
982  * @param[in]   nv  nv_state_t pointer.
983  */
984 void NV_API_CALL rm_cleanup_dynamic_power_management(
985     nvidia_stack_t *sp,
986     nv_state_t *nv
987 )
988 {
989     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
990     void *fp;
991     NvBool ret;
992 
993     NV_ENTER_RM_RUNTIME(sp,fp);
994 
995     // Disable RTD3 infrastructure from OS side.
996     if ((nvp->dynamic_power.mode == NV_DYNAMIC_PM_FINE) &&
997         (nvp->dynamic_power.dynamic_power_regkey == NV_REG_DYNAMIC_POWER_MANAGEMENT_DEFAULT))
998     {
999         nv_disallow_runtime_suspend(nv);
1000     }
1001 
1002     nv_dynamic_power_state_t old_state = nvp->dynamic_power.state;
1003 
1004     switch (old_state)
1005     {
1006     case NV_DYNAMIC_POWER_STATE_IDLE_INDICATED:
1007         nv_indicate_not_idle(nv);
1008         /* fallthrough */
1009     case NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED:
1010     case NV_DYNAMIC_POWER_STATE_IDLE_INSTANT:
1011         ret = nv_dynamic_power_state_transition(nv, old_state,
1012                                                 NV_DYNAMIC_POWER_STATE_IN_USE);
1013         /*
1014          * Nothing else should be running asynchronous to teardown that could
1015          * change the state so this should always succeed.
1016          */
1017         NV_ASSERT(ret);
1018         break;
1019     case NV_DYNAMIC_POWER_STATE_IN_USE:
1020         break;
1021     case NV_DYNAMIC_POWER_STATE_UNKNOWN:
1022         NV_ASSERT(0);
1023         break;
1024     }
1025 
1026     if (nvp->dynamic_power.mutex)
1027     {
1028         portSyncMutexDestroy(nvp->dynamic_power.mutex);
1029         nvp->dynamic_power.mutex = NULL;
1030     }
1031 
1032     NV_EXIT_RM_RUNTIME(sp,fp);
1033 }
1034 
1035 /*!
1036  * @brief Initialize the dynamic power management refcount and enable GPUOFF,
1037  * if all prerequisites are met.
1038  * Called once per GPU during driver initialization.
1039  *
1040  * @param[in]   sp  nvidia_stack_t pointer.
1041  * @param[in]   nv  nv_state_t pointer.
1042  */
1043 void NV_API_CALL rm_enable_dynamic_power_management(
1044     nvidia_stack_t *sp,
1045     nv_state_t     *nv
1046 )
1047 {
1048     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
1049     void *fp;
1050 
1051     NV_ENTER_RM_RUNTIME(sp,fp);
1052 
1053     /*
1054      * If dynamic power management is enabled and device is not driving console,
1055      * then dynamic power management can be engaged (since we're not
1056      * using the GPU yet, so it's okay to power it off).
1057      */
1058     if ((nvp->dynamic_power.mode != NV_DYNAMIC_PM_NEVER) && !nv->console_device)
1059     {
1060         os_unref_dynamic_power(nv, NV_DYNAMIC_PM_COARSE);
1061     }
1062 
1063     NV_EXIT_RM_RUNTIME(sp,fp);
1064 }
1065 
1066 /*
1067  * @brief Increment the dynamic power refcount to prevent the GPU from being
1068  * powered off until a corresponding os_unref_dynamic_power() call is made.
1069  * These two calls must always be balanced in pairs.
1070  *
1071  * This call may block until the GPU is powered on.
1072  *
1073  * The caller must not be holding the GPU lock, but may be holding the API
1074  * lock.
1075  *
1076  * @param[in]   sp      nvidia_stack_t pointer.
1077  * @param[in]   mode    Dynamic power mode this refcount bump is used for.
1078  */
1079 NV_STATUS
1080 os_ref_dynamic_power(
1081     nv_state_t *nv,
1082     nv_dynamic_power_mode_t mode
1083 )
1084 {
1085     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
1086     NV_STATUS status = NV_OK;
1087     NvS32 ref;
1088 
1089     if (nvp == NULL)
1090     {
1091         NV_ASSERT(nv->removed);
1092         return NV_OK;
1093     }
1094 
1095     if (mode > nvp->dynamic_power.mode)
1096     {
1097         return NV_OK;
1098     }
1099 
1100     acquireDynamicPowerMutex(nvp);
1101 
1102     ref = nvp->dynamic_power.refcount++;
1103 
1104     NV_ASSERT(ref >= 0);
1105 
1106     if (ref > 0)
1107     {
1108         NV_ASSERT(nvp->dynamic_power.state == NV_DYNAMIC_POWER_STATE_IN_USE);
1109     }
1110     else
1111     {
1112         /*
1113          * Just transitioned from 0 -> 1.
1114          * The current dynamic power state could be any of:
1115          *
1116          * - IDLE_INDICATED: call nv_indicate_not_idle() to ensure
1117          *   the GPU is at full power and ready to be used.
1118          *
1119          * - IDLE_INSTANT/IDLE_SUSTAINED: transition to IN_USE.
1120          *   Note that in these states, callbacks may be operating
1121          *   asynchronously, so care has to be taken to retry the write if it
1122          *   fails.
1123          */
1124 
1125         nv_dynamic_power_state_t old_state = nvp->dynamic_power.state;
1126         NvBool ret;
1127 
1128         switch (old_state)
1129         {
1130         default:
1131         case NV_DYNAMIC_POWER_STATE_IN_USE:
1132             NV_PRINTF(LEVEL_ERROR, "NVRM: %s: Unexpected dynamic power state 0x%x\n",
1133                       __FUNCTION__, old_state);
1134             /* fallthrough */
1135         case NV_DYNAMIC_POWER_STATE_IDLE_INDICATED:
1136             status = nv_indicate_not_idle(nv);
1137             if (status != NV_OK)
1138             {
1139                 nvp->dynamic_power.refcount--;
1140                 break;
1141             }
1142             if (nvp->dynamic_power.deferred_idle_enabled)
1143             {
1144                 RmScheduleCallbackForIdlePreConditions(NV_GET_NV_PRIV_PGPU(nv));
1145             }
1146             /* fallthrough */
1147         case NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED:
1148         case NV_DYNAMIC_POWER_STATE_IDLE_INSTANT:
1149             do {
1150                 ret = nv_dynamic_power_state_transition(nv, old_state, NV_DYNAMIC_POWER_STATE_IN_USE);
1151                 if (!ret)
1152                 {
1153                     old_state = nvp->dynamic_power.state;
1154                     NV_ASSERT(old_state == NV_DYNAMIC_POWER_STATE_IDLE_INSTANT ||
1155                               old_state == NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED);
1156                 }
1157             } while (!ret);
1158             break;
1159         }
1160     }
1161 
1162     releaseDynamicPowerMutex(nvp);
1163 
1164     return status;
1165 }
1166 
1167 /*!
1168  * @brief Wrapper around os_ref_dynamic_power() suitable for use from the
1169  * per-OS layers.
1170  *
1171  * @param[in]   sp      nvidia_stack_t pointer.
1172  * @param[in]   nv      nv_state_t pointer.
1173  * @param[in]   mode    Dynamic power mode this refcount bump is used for.
1174  */
1175 NV_STATUS NV_API_CALL rm_ref_dynamic_power(
1176     nvidia_stack_t *sp,
1177     nv_state_t *nv,
1178     nv_dynamic_power_mode_t mode
1179 )
1180 {
1181     NV_STATUS status;
1182     void *fp;
1183 
1184     NV_ENTER_RM_RUNTIME(sp,fp);
1185 
1186     status = os_ref_dynamic_power(nv, mode);
1187 
1188     NV_EXIT_RM_RUNTIME(sp,fp);
1189 
1190     return status;
1191 }
1192 
1193 /*!
1194  * @brief Decrement the dynamic power refcount to release an earlier
1195  * requirement that the GPU be powered on made by calling
1196  * os_ref_dynamic_power().
1197  * These two calls must always be balanced in pairs.
1198  *
1199  * This call may block until the GPU is powered on.
1200  *
1201  * The caller must not be holding the GPU lock, but may be holding the API
1202  * lock.
1203  *
1204  * @param[in]   sp      nvidia_stack_t pointer.
1205  * @param[in]   mode    Dynamic power mode this refcount bump is used for.
1206  */
1207 void
1208 os_unref_dynamic_power(
1209     nv_state_t *nv,
1210     nv_dynamic_power_mode_t mode
1211 )
1212 {
1213     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
1214     NvS32 ref;
1215 
1216     if (nvp == NULL)
1217     {
1218         NV_ASSERT(nv->removed);
1219         return;
1220     }
1221 
1222     if (mode > nvp->dynamic_power.mode)
1223     {
1224         return;
1225     }
1226 
1227     acquireDynamicPowerMutex(nvp);
1228 
1229     ref = --nvp->dynamic_power.refcount;
1230 
1231     NV_ASSERT(ref >= 0);
1232 
1233     if (ref == 0) {
1234         NvBool ret;
1235 
1236         /*
1237          * Just transitioned from 1 -> 0.  We should have been in the IN_USE
1238          * state previously; now we transition to an idle state: "instant" idle
1239          * if deferred idle is enabled; otherwise indicate idle immediately.
1240          *
1241          * Nothing should be asynchronously changing the state from IN_USE,
1242          * so the state transitions are not expected to fail.
1243          */
1244 
1245         NV_ASSERT(nvp->dynamic_power.state == NV_DYNAMIC_POWER_STATE_IN_USE);
1246 
1247         if (nvp->dynamic_power.deferred_idle_enabled)
1248         {
1249             ret = nv_dynamic_power_state_transition(nv, NV_DYNAMIC_POWER_STATE_IN_USE,
1250                                                         NV_DYNAMIC_POWER_STATE_IDLE_INSTANT);
1251             NV_ASSERT(ret);
1252         }
1253         else
1254         {
1255             nv_indicate_idle(nv);
1256             ret = nv_dynamic_power_state_transition(nv, NV_DYNAMIC_POWER_STATE_IN_USE,
1257                                                         NV_DYNAMIC_POWER_STATE_IDLE_INDICATED);
1258             NV_ASSERT(ret);
1259         }
1260     }
1261 
1262     releaseDynamicPowerMutex(nvp);
1263 }
1264 
1265 /*!
1266  * @brief Wrapper around os_unref_dynamic_power() suitable for use from the
1267  * per-OS layers.
1268  *
1269  * @param[in]   sp      nvidia_stack_t pointer.
1270  * @param[in]   nv      nv_state_t pointer.
1271  * @param[in]   mode    Dynamic power mode this refcount bump is used for.
1272  */
1273 void NV_API_CALL rm_unref_dynamic_power(
1274     nvidia_stack_t *sp,
1275     nv_state_t *nv,
1276     nv_dynamic_power_mode_t mode
1277 )
1278 {
1279     void *fp;
1280 
1281     NV_ENTER_RM_RUNTIME(sp,fp);
1282 
1283     os_unref_dynamic_power(nv, mode);
1284 
1285     NV_EXIT_RM_RUNTIME(sp,fp);
1286 }
1287 
1288 /*!
1289  * @brief Wrapper around os_ref_dynamic_power() suitable for use from core RM.
1290  *
1291  * @params[in]  pOsGpuInfo        OS specific GPU information pointer
1292  */
1293 NV_STATUS osRefGpuAccessNeeded(
1294     OS_GPU_INFO *pOsGpuInfo
1295 )
1296 {
1297     return os_ref_dynamic_power(pOsGpuInfo, NV_DYNAMIC_PM_FINE);
1298 }
1299 
1300 /*!
1301  * @brief Wrapper around os_unref_dynamic_power() suitable for use from core RM.
1302  *
1303  * @params[in]  pOsGpuInfo        OS specific GPU information pointer
1304  */
1305 void osUnrefGpuAccessNeeded(
1306     OS_GPU_INFO *pOsGpuInfo
1307 )
1308 {
1309     os_unref_dynamic_power(pOsGpuInfo, NV_DYNAMIC_PM_FINE);
1310 }
1311 
1312 /*!
1313  * @brief Check if GCx is supported on current pstate and if engines are idle.
1314  *
1315  * @param[in]   pGpu    OBJGPU pointer.
1316  */
1317 static NvBool RmCheckForGcxSupportOnCurrentState(
1318     OBJGPU *pGpu
1319 )
1320 {
1321     NV_STATUS   status = NV_OK;
1322     nv_state_t *nv     = NV_GET_NV_STATE(pGpu);
1323     RM_API     *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1324     NV2080_CTRL_INTERNAL_GCX_ENTRY_PREREQUISITE_PARAMS entryPrerequisiteParams;
1325 
1326     portMemSet(&entryPrerequisiteParams, 0, sizeof(entryPrerequisiteParams));
1327 
1328     status = pRmApi->Control(pRmApi,
1329                              nv->rmapi.hClient,
1330                              nv->rmapi.hSubDevice,
1331                              NV2080_CTRL_CMD_INTERNAL_GCX_ENTRY_PREREQUISITE,
1332                              (void*)&entryPrerequisiteParams,
1333                              sizeof(entryPrerequisiteParams));
1334 
1335     if (status != NV_OK)
1336     {
1337         NV_PRINTF(LEVEL_ERROR, "NVRM, Failed to get GCx pre-requisite, status=0x%x\n",
1338                   status);
1339         return NV_FALSE;
1340     }
1341 
1342     return entryPrerequisiteParams.bIsGC6Satisfied || entryPrerequisiteParams.bIsGCOFFSatisfied;
1343 }
1344 
1345 /*!
1346  * @brief: Work item to actually remove the idle holdoff which was applied
1347  * on GC6 exit.
1348  * This workitem is scheduled after the timeout limit for idle holdoff is
1349  * finished.
1350  *
1351  * Queue with lock flags:
1352  *     OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_SUBDEVICE_RW
1353  *
1354  * @param[in]   gpuInstance     GPU instance ID.
1355  * @param[in]   pArgs           Unused callback closure.
1356  */
1357 static void RmRemoveIdleHoldoff(
1358    NvU32 gpuInstance,
1359     void *pArgs
1360 )
1361 {
1362     OBJGPU     *pGpu = gpumgrGetGpu(gpuInstance);
1363     nv_state_t *nv  = NV_GET_NV_STATE(pGpu);
1364     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
1365 
1366     if (nvp->dynamic_power.b_idle_holdoff == NV_TRUE)
1367     {
1368         if ((RmCheckForGcxSupportOnCurrentState(pGpu) == NV_TRUE) ||
1369             (nvp->dynamic_power.idle_precondition_check_callback_scheduled))
1370         {
1371             nv_indicate_idle(nv);
1372             nvp->dynamic_power.b_idle_holdoff = NV_FALSE;
1373         }
1374         else
1375         {
1376             RmScheduleCallbackToRemoveIdleHoldoff(pGpu);
1377         }
1378     }
1379 }
1380 
1381 /*!
1382  * @brief Timer callback to schedule a work item to remove idle hold off
1383  * that was applied after GC6 exit.
1384  *
1385  * @param[in]   pCallbackData   OBJGPU pointer (with void pointer type).
1386  */
1387 static void timerCallbackToRemoveIdleHoldoff(
1388     void *pCallbackData
1389 )
1390 {
1391     OBJGPU *pGpu   = reinterpretCast(pCallbackData, OBJGPU *);
1392 
1393     osQueueWorkItemWithFlags(pGpu,
1394                              RmRemoveIdleHoldoff,
1395                              NULL,
1396                              OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_SUBDEVICE_RW);
1397 }
1398 
1399 /*!
1400  * @brief Timer callback to check if all idle conditions remain met and
1401  * if so schedule a work item to indicate the GPU is idle.
1402  *
1403  * @param[in]   pCallbackData   OBJGPU pointer (with void pointer type).
1404  */
1405 static void timerCallbackToIndicateIdle(
1406     void *pCallbackData
1407 )
1408 {
1409     OBJGPU     *pGpu    = reinterpretCast(pCallbackData, OBJGPU *);
1410     nv_state_t *nv      = NV_GET_NV_STATE(pGpu);
1411     nv_priv_t  *nvp     = NV_GET_NV_PRIV(nv);
1412 
1413     nv_acquire_mmap_lock(nv);
1414 
1415     if (nv_get_all_mappings_revoked_locked(nv) &&
1416         nvp->dynamic_power.state == NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED &&
1417         RmCanEnterGcxUnderGpuLock(pGpu))
1418     {
1419         osQueueWorkItem(pGpu, RmIndicateIdle, NULL);
1420     }
1421     else
1422     {
1423         if (nvp->dynamic_power.state == NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED)
1424         {
1425             /*
1426              * Something used the GPU between when we last sampled and now.
1427              * Go back to the "instant" state to start waiting from scratch.
1428              */
1429             nv_dynamic_power_state_transition(nv, NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED,
1430                                                   NV_DYNAMIC_POWER_STATE_IDLE_INSTANT);
1431         }
1432 
1433         RmScheduleCallbackForIdlePreConditionsUnderGpuLock(pGpu);
1434     }
1435 
1436     nv_release_mmap_lock(nv);
1437 }
1438 
1439 /*!
1440  * @brief Timer callback to periodically check if all idle preconditions are
1441  * met.  If so, the dynamic power state is advanced from IDLE_INSTANT to
1442  * IDLE_SUSTAINED, or (if already in the IDLE_SUSTAINED state) mappings are
1443  * revoked and timerCallbackToIndicateIdle() is scheduled.
1444  *
1445  * @param[in]   pCallbackData   OBJGPU pointer (with void pointer type).
1446  */
1447 static void timerCallbackForIdlePreConditions(
1448     void *pCallbackData
1449 )
1450 {
1451     OBJGPU     *pGpu    = reinterpretCast(pCallbackData, OBJGPU *);
1452     nv_state_t *nv      = NV_GET_NV_STATE(pGpu);
1453     nv_priv_t  *nvp     = NV_GET_NV_PRIV(nv);
1454 
1455     nvp->dynamic_power.idle_precondition_check_callback_scheduled = NV_FALSE;
1456 
1457     /*
1458      * Note: It is not safe to take the dynamic power mutex here.
1459      * Lock ordering dictates that the dynamic power mutex must be taken before
1460      * the GPU lock, and this function is called with the GPU lock held.
1461      *
1462      * Therefore, this function is careful about *only* triggering specific
1463      * transitions:
1464      * NV_DYNAMIC_POWER_STATE_IDLE_INSTANT -> NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED
1465      * NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED -> NV_DYNAMIC_POWER_STATE_IDLE_INSTANT
1466      *
1467      * Other codepaths are resilient to these state transitions occurring
1468      * asynchronously.  The state transition itself is done with an atomic
1469      * compare and swap, so there should be no danger of inadvertently
1470      * triggering a transition from a different state due to a
1471      * read-modify-write or "time of check to time of use" (TOCTTOU) race
1472      * condition.
1473      */
1474 
1475     if (nvp->dynamic_power.state != NV_DYNAMIC_POWER_STATE_IN_USE)
1476     {
1477         if (RmCanEnterGcxUnderGpuLock(pGpu))
1478         {
1479             switch (nvp->dynamic_power.state)
1480             {
1481             case NV_DYNAMIC_POWER_STATE_UNKNOWN:
1482                 NV_ASSERT(0);
1483                 /* fallthrough */
1484             case NV_DYNAMIC_POWER_STATE_IDLE_INDICATED:
1485                 NV_PRINTF(LEVEL_ERROR, "NVRM: %s: unexpected dynamic power state 0x%x\n",
1486                           __FUNCTION__, nvp->dynamic_power.state);
1487                 /* fallthrough */
1488             case NV_DYNAMIC_POWER_STATE_IN_USE:
1489                 break;
1490             case NV_DYNAMIC_POWER_STATE_IDLE_INSTANT:
1491                 nv_dynamic_power_state_transition(nv, NV_DYNAMIC_POWER_STATE_IDLE_INSTANT,
1492                                                       NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED);
1493                 break;
1494             case NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED:
1495                 RmQueueIdleSustainedWorkitem(pGpu);
1496                 return;
1497             }
1498 
1499         }
1500         else
1501         {
1502             if (nvp->dynamic_power.state == NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED)
1503             {
1504                 /*
1505                  * Something used the GPU between when we last sampled and now.
1506                  * Go back to the "instant" state to start waiting from scratch.
1507                  */
1508                 nv_dynamic_power_state_transition(nv, NV_DYNAMIC_POWER_STATE_IDLE_SUSTAINED,
1509                                                       NV_DYNAMIC_POWER_STATE_IDLE_INSTANT);
1510             }
1511         }
1512     }
1513     RmScheduleCallbackForIdlePreConditionsUnderGpuLock(pGpu);
1514 }
1515 
1516 /*!
1517  * @brief Cancel any in-flight callback to remove idle holdoff.
1518  *
1519  * @param[in]   pGpu    OBJGPU pointer.
1520  */
1521 static void RmCancelCallbackToRemoveIdleHoldoff(
1522     OBJGPU *pGpu
1523 )
1524 {
1525     nv_state_t *nv  = NV_GET_NV_STATE(pGpu);
1526     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
1527     TMR_EVENT_GENERAL_PARAMS cancelParams;
1528 
1529     if (NvP64_VALUE(nvp->dynamic_power.remove_idle_holdoff) != NULL)
1530     {
1531         portMemSet(&cancelParams, 0, sizeof(cancelParams));
1532 
1533         cancelParams.pEvent = nvp->dynamic_power.remove_idle_holdoff;
1534 
1535         tmrCtrlCmdEventCancel(pGpu, &cancelParams);
1536     }
1537 }
1538 
1539 /*!
1540  * @brief Cancel any in-flight timer callbacks.
1541  *
1542  * @param[in]   pGpu    OBJGPU pointer.
1543  */
1544 static void RmCancelDynamicPowerCallbacks(
1545     OBJGPU *pGpu
1546 )
1547 {
1548     nv_state_t *nv  = NV_GET_NV_STATE(pGpu);
1549     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
1550     TMR_EVENT_GENERAL_PARAMS cancelParams;
1551 
1552     if (NvP64_VALUE(nvp->dynamic_power.idle_precondition_check_event) != NULL)
1553     {
1554         // Cancel precondition check callback.
1555         portMemSet(&cancelParams, 0, sizeof(cancelParams));
1556 
1557         cancelParams.pEvent = nvp->dynamic_power.idle_precondition_check_event;
1558 
1559         tmrCtrlCmdEventCancel(pGpu, &cancelParams);
1560     }
1561 
1562     if (NvP64_VALUE(nvp->dynamic_power.indicate_idle_event) != NULL)
1563     {
1564         // Cancel callback to indicate idle
1565         portMemSet(&cancelParams, 0, sizeof(cancelParams));
1566 
1567         cancelParams.pEvent = nvp->dynamic_power.indicate_idle_event;
1568 
1569         tmrCtrlCmdEventCancel(pGpu, &cancelParams);
1570     }
1571 
1572     RmCancelCallbackToRemoveIdleHoldoff(pGpu);
1573 
1574     nvp->dynamic_power.idle_precondition_check_callback_scheduled = NV_FALSE;
1575 }
1576 
1577 /*!
1578  * @brief Free timer events allocated by CreateDynamicPowerCallbacks()
1579  *
1580  * @param[in]   pGpu    OBJGPU pointer.
1581  */
1582 static void RmDestroyDynamicPowerCallbacks(
1583     OBJGPU *pGpu
1584 )
1585 {
1586     nv_state_t *nv  = NV_GET_NV_STATE(pGpu);
1587     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
1588     TMR_EVENT_GENERAL_PARAMS destroyParams;
1589 
1590     if (NvP64_VALUE(nvp->dynamic_power.idle_precondition_check_event) != NULL)
1591     {
1592         // Destroy precondition check callback.
1593         portMemSet(&destroyParams, 0, sizeof(destroyParams));
1594 
1595         destroyParams.pEvent = nvp->dynamic_power.idle_precondition_check_event;
1596 
1597         tmrCtrlCmdEventDestroy(pGpu, &destroyParams);
1598         nvp->dynamic_power.idle_precondition_check_event = NvP64_NULL;
1599     }
1600 
1601     if (NvP64_VALUE(nvp->dynamic_power.indicate_idle_event) != NULL)
1602     {
1603         // Destroy callback to indicate idle
1604         portMemSet(&destroyParams, 0, sizeof(destroyParams));
1605 
1606         destroyParams.pEvent = nvp->dynamic_power.indicate_idle_event;
1607 
1608         tmrCtrlCmdEventDestroy(pGpu, &destroyParams);
1609         nvp->dynamic_power.indicate_idle_event = NvP64_NULL;
1610     }
1611 
1612     if (NvP64_VALUE(nvp->dynamic_power.remove_idle_holdoff) != NULL)
1613     {
1614         // Destroy callback to decrease kernel refcount
1615         portMemSet(&destroyParams, 0, sizeof(destroyParams));
1616 
1617         destroyParams.pEvent = nvp->dynamic_power.remove_idle_holdoff;
1618 
1619         tmrCtrlCmdEventDestroy(pGpu, &destroyParams);
1620         nvp->dynamic_power.remove_idle_holdoff = NvP64_NULL;
1621     }
1622 
1623     nvp->dynamic_power.deferred_idle_enabled = NV_FALSE;
1624 }
1625 
1626 /*
1627  * @brief Adds a GPU to OBJOS::dynamicPowerSupportGpuMask
1628  *
1629  * @param[in]                  instance
1630  */
1631 void osAddGpuDynPwrSupported
1632 (
1633     NvU32 instance
1634 )
1635 {
1636     OBJSYS *pSys = SYS_GET_INSTANCE();
1637     OBJOS *pOS = SYS_GET_OS(pSys);
1638 
1639     pOS->dynamicPowerSupportGpuMask |= (1 << instance);
1640 }
1641 
1642 /*
1643  * @brief Removes a GPU from OBJOS::dynamicPowerSupportGpuMask
1644  *
1645  * @param[in]                  instance
1646  */
1647 void osRemoveGpuDynPwrSupported
1648 (
1649     NvU32 instance
1650 )
1651 {
1652     OBJSYS *pSys = SYS_GET_INSTANCE();
1653     OBJOS *pOS = SYS_GET_OS(pSys);
1654 
1655     pOS->dynamicPowerSupportGpuMask &= ~(1 << instance);
1656 }
1657 
1658 /*
1659  * @brief queries  OBJOS::dynamicPowerSupportGpuMask
1660  *
1661  * @param[in]                  void
1662  */
1663 NvU32 osGetDynamicPowerSupportMask
1664 (
1665     void
1666 )
1667 {
1668     OBJSYS *pSys = SYS_GET_INSTANCE();
1669     OBJOS *pOS = SYS_GET_OS(pSys);
1670 
1671     return  pOS->dynamicPowerSupportGpuMask;
1672 }
1673 
1674 /*!
1675  * @brief Cancel any outstanding callbacks, and free RM resources allocated by
1676  * RmInitDeferredDynamicPowerManagement().
1677  *
1678  * @param[in]   nv      nv_state_t pointer.
1679  */
1680 void RmDestroyDeferredDynamicPowerManagement(
1681     nv_state_t *nv
1682 )
1683 {
1684     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
1685     OBJGPU    *pGpu = NV_GET_NV_PRIV_PGPU(nv);
1686 
1687     //
1688     // Re-instate the dynamic power mode to fine if it was
1689     // disabled due to lack of platform support
1690     //
1691     if (nvp->dynamic_power.b_fine_not_supported == NV_TRUE)
1692     {
1693         nvp->dynamic_power.mode = NV_DYNAMIC_PM_FINE;
1694         nvp->dynamic_power.b_fine_not_supported = NV_FALSE;
1695     }
1696 
1697     //
1698     // Before cancelling and destroying the callbacks, make sure to
1699     // remove the idle holfoff that was applied during gc6 exit.
1700     //
1701     if (nvp->dynamic_power.b_idle_holdoff == NV_TRUE)
1702     {
1703         nv_indicate_idle(nv);
1704         nvp->dynamic_power.b_idle_holdoff = NV_FALSE;
1705     }
1706 
1707     RmCancelDynamicPowerCallbacks(pGpu);
1708     RmDestroyDynamicPowerCallbacks(pGpu);
1709     osRemoveGpuDynPwrSupported(gpuGetInstance(pGpu));
1710 }
1711 
1712 /*!
1713  * @brief Allocate timer events to call the functions:
1714  * timerCallbackForIdlePreConditions()
1715  * timerCallbackToIndicateIdle()
1716  *
1717  * The callbacks must be scheduled separately.
1718  *
1719  * @param[in]   pGpu    OBJGPU pointer.
1720  */
1721 static NV_STATUS CreateDynamicPowerCallbacks(
1722     OBJGPU *pGpu
1723 )
1724 {
1725     nv_state_t *nv  = NV_GET_NV_STATE(pGpu);
1726     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
1727     TMR_EVENT_SET_PARAMS createEventParams;
1728     NV_STATUS status;
1729 
1730     // Creating PreCondition check callback.
1731     portMemSet(&createEventParams, 0, sizeof(createEventParams));
1732 
1733     createEventParams.ppEvent = &(nvp->dynamic_power.idle_precondition_check_event);
1734     createEventParams.pTimeProc = NV_PTR_TO_NvP64(timerCallbackForIdlePreConditions);
1735     createEventParams.pCallbackData = NV_PTR_TO_NvP64(pGpu);
1736     createEventParams.flags = TMR_FLAG_USE_OS_TIMER;
1737 
1738     status = tmrCtrlCmdEventCreate(pGpu, &createEventParams);
1739 
1740     if (status != NV_OK)
1741     {
1742         NV_PRINTF(LEVEL_ERROR,
1743                   "NVRM: Error creating dynamic power precondition check callback\n");
1744         nvp->dynamic_power.idle_precondition_check_event = NvP64_NULL;
1745         nvp->dynamic_power.indicate_idle_event = NvP64_NULL;
1746         nvp->dynamic_power.remove_idle_holdoff = NvP64_NULL;
1747         return status;
1748     }
1749 
1750     // Create callback to indicate idle..
1751     portMemSet(&createEventParams, 0, sizeof(createEventParams));
1752 
1753     createEventParams.ppEvent = &(nvp->dynamic_power.indicate_idle_event);
1754     createEventParams.pTimeProc = NV_PTR_TO_NvP64(timerCallbackToIndicateIdle);
1755     createEventParams.pCallbackData = NV_PTR_TO_NvP64(pGpu);
1756     createEventParams.flags = TMR_FLAG_USE_OS_TIMER;
1757 
1758     status = tmrCtrlCmdEventCreate(pGpu, &createEventParams);
1759 
1760     if (status != NV_OK)
1761     {
1762         NV_PRINTF(LEVEL_ERROR,
1763                   "NVRM: Error creating callback to indicate GPU idle\n");
1764         nvp->dynamic_power.idle_precondition_check_event = NvP64_NULL;
1765         nvp->dynamic_power.indicate_idle_event = NvP64_NULL;
1766         nvp->dynamic_power.remove_idle_holdoff = NvP64_NULL;
1767         return status;
1768     }
1769 
1770     // Create callback to remove idle holdoff
1771     portMemSet(&createEventParams, 0, sizeof(createEventParams));
1772 
1773     createEventParams.ppEvent = &(nvp->dynamic_power.remove_idle_holdoff);
1774     createEventParams.pTimeProc = NV_PTR_TO_NvP64(timerCallbackToRemoveIdleHoldoff);
1775     createEventParams.pCallbackData = NV_PTR_TO_NvP64(pGpu);
1776     createEventParams.flags = TMR_FLAG_USE_OS_TIMER;
1777 
1778     status = tmrCtrlCmdEventCreate(pGpu, &createEventParams);
1779 
1780     if (status != NV_OK)
1781     {
1782         NV_PRINTF(LEVEL_ERROR,
1783                   "NVRM: Error creating callback to decrease kernel refcount\n");
1784         nvp->dynamic_power.idle_precondition_check_event = NvP64_NULL;
1785         nvp->dynamic_power.indicate_idle_event = NvP64_NULL;
1786         nvp->dynamic_power.remove_idle_holdoff = NvP64_NULL;
1787         return status;
1788     }
1789 
1790     return NV_OK;
1791 }
1792 
1793 /*!
1794  * @brief Schedule the timerCallbackForIdlePreConditions() function to be called.
1795  *
1796  * If it is already scheduled, the function is a noop.
1797  *
1798  * The GPU lock must be held when calling this function.
1799  *
1800  * @param[in]   pGpu    OBJGPU pointer.
1801  */
1802 static void RmScheduleCallbackForIdlePreConditionsUnderGpuLock(
1803     OBJGPU *pGpu
1804 )
1805 {
1806     nv_state_t *nv = NV_GET_NV_STATE(pGpu);
1807     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
1808     TMR_EVENT_SCHEDULE_PARAMS scheduleEventParams;
1809     NV_STATUS status;
1810 
1811     NV_ASSERT(rmDeviceGpuLockIsOwner(pGpu->gpuInstance));
1812 
1813     if (nvp->dynamic_power.idle_precondition_check_callback_scheduled)
1814     {
1815         return;
1816     }
1817 
1818     if (NvP64_VALUE(nvp->dynamic_power.idle_precondition_check_event) != NULL)
1819     {
1820         portMemSet(&scheduleEventParams, 0, sizeof(scheduleEventParams));
1821 
1822         scheduleEventParams.pEvent = nvp->dynamic_power.idle_precondition_check_event;
1823         scheduleEventParams.timeNs = GC6_PRECONDITION_CHECK_TIME;
1824         scheduleEventParams.bUseTimeAbs = NV_FALSE;
1825 
1826         status = tmrCtrlCmdEventSchedule(pGpu, &scheduleEventParams);
1827 
1828         if (status == NV_OK)
1829         {
1830             nvp->dynamic_power.idle_precondition_check_callback_scheduled = NV_TRUE;
1831         }
1832         else
1833         {
1834             NV_PRINTF(LEVEL_ERROR, "NVRM: Error scheduling precondition callback\n");
1835         }
1836     }
1837 }
1838 
1839 /*!
1840  * @brief Schedule the timerCallbackForIdlePreConditions() function to be called.
1841  *
1842  * If it is already scheduled, the function is a noop.
1843  *
1844  * This function takes the GPU lock.
1845  *
1846  * @param[in]   pGpu    OBJGPU pointer.
1847  */
1848 static void RmScheduleCallbackForIdlePreConditions(
1849     OBJGPU *pGpu
1850 )
1851 {
1852     NV_STATUS   status;
1853     GPU_MASK    gpuMask;
1854 
1855     // LOCK: acquire per device lock
1856     status = rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
1857                                    GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER,
1858                                    &gpuMask);
1859     if (status == NV_OK)
1860     {
1861         RmScheduleCallbackForIdlePreConditionsUnderGpuLock(pGpu);
1862         // UNLOCK: release per device lock
1863         rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
1864     }
1865 }
1866 
1867 /*!
1868  * @brief Schedule the timerCallbackToIndicateIdle() function to be called.
1869  *
1870  * The GPU lock must be held when calling this function.
1871  *
1872  * @param[in]   pGpu    OBJGPU pointer.
1873  */
1874 static void RmScheduleCallbackToIndicateIdle(
1875     OBJGPU *pGpu
1876 )
1877 {
1878     nv_state_t *nv  = NV_GET_NV_STATE(pGpu);
1879     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
1880     TMR_EVENT_SCHEDULE_PARAMS scheduleEventParams;
1881     NV_STATUS status;
1882 
1883     NV_ASSERT(rmDeviceGpuLockIsOwner(pGpu->gpuInstance));
1884 
1885     if (NvP64_VALUE(nvp->dynamic_power.indicate_idle_event) != NULL)
1886     {
1887         portMemSet(&scheduleEventParams, 0, sizeof(scheduleEventParams));
1888 
1889         scheduleEventParams.pEvent = nvp->dynamic_power.indicate_idle_event;
1890         scheduleEventParams.timeNs = GC6_BAR1_BLOCKER_CHECK_AND_METHOD_FLUSH_TIME;
1891         scheduleEventParams.bUseTimeAbs = NV_FALSE;
1892 
1893         status = tmrCtrlCmdEventSchedule(pGpu, &scheduleEventParams);
1894 
1895         if (status != NV_OK)
1896             NV_PRINTF(LEVEL_ERROR, "NVRM: Error scheduling indicate idle callback\n");
1897     }
1898 }
1899 
1900 /*!
1901  * @brief Schedule the timerCallbackToRemoveIdleHoldoff() function.
1902  *
1903  * The GPU lock must be held when calling this function.
1904  *
1905  * @param[in]   pGpu    OBJGPU pointer.
1906  */
1907 static void RmScheduleCallbackToRemoveIdleHoldoff(
1908     OBJGPU *pGpu
1909 )
1910 {
1911     nv_state_t *nv = NV_GET_NV_STATE(pGpu);
1912     nv_priv_t *nvp = NV_GET_NV_PRIV(nv);
1913     TMR_EVENT_SCHEDULE_PARAMS scheduleEventParams;
1914     NV_STATUS status;
1915 
1916     NV_ASSERT(rmDeviceGpuLockIsOwner(pGpu->gpuInstance));
1917 
1918     if (NvP64_VALUE(nvp->dynamic_power.remove_idle_holdoff) != NULL)
1919     {
1920         portMemSet(&scheduleEventParams, 0, sizeof(scheduleEventParams));
1921 
1922         scheduleEventParams.pEvent = nvp->dynamic_power.remove_idle_holdoff;
1923         scheduleEventParams.timeNs = GC6_CYCLE_IDLE_HOLDOFF_CHECK_TIME;
1924         scheduleEventParams.bUseTimeAbs = NV_FALSE;
1925 
1926         status = tmrCtrlCmdEventSchedule(pGpu, &scheduleEventParams);
1927 
1928         if (status != NV_OK)
1929         {
1930             NV_PRINTF(LEVEL_ERROR,
1931                       "NVRM: Error scheduling kernel refcount decrement callback\n");
1932         }
1933         else
1934         {
1935             nvp->dynamic_power.b_idle_holdoff = NV_TRUE;
1936         }
1937     }
1938 }
1939 
1940 /*!
1941  * @brief Check if the system supports RTD3-GC6.
1942  *
1943  * @param[in]   pGpu    OBJGPU pointer.
1944  */
1945 static NvBool RmCheckRtd3GcxSupport(
1946     nv_state_t *pNv
1947 )
1948 {
1949     nv_priv_t *nvp = NV_GET_NV_PRIV(pNv);
1950     OBJGPU    *pGpu = NV_GET_NV_PRIV_PGPU(pNv);
1951     RM_API    *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1952     NV_STATUS  status;
1953     NV0080_CTRL_GPU_GET_VIRTUALIZATION_MODE_PARAMS virtModeParams = { 0 };
1954     NvBool     bGC6Support   = NV_FALSE;
1955     NvBool     bGCOFFSupport = NV_FALSE;
1956 
1957     if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_UNIX_DYNAMIC_POWER_SUPPORTED))
1958     {
1959         NV_PRINTF(LEVEL_NOTICE, "NVRM: RTD3/GC6 is not supported for this arch\n");
1960         return NV_FALSE;
1961     }
1962 
1963     if (nvp->b_mobile_config_enabled)
1964     {
1965         bGC6Support = pGpu->getProperty(pGpu, PDB_PROP_GPU_RTD3_GC6_SUPPORTED);
1966         bGCOFFSupport = bGC6Support;
1967     }
1968     else
1969     {
1970         bGC6Support = pGpu->getProperty(pGpu, PDB_PROP_GPU_RTD3_GC6_SUPPORTED);
1971         bGCOFFSupport = pGpu->getProperty(pGpu, PDB_PROP_GPU_RTD3_GCOFF_SUPPORTED);
1972     }
1973 
1974     if (!bGC6Support && !bGCOFFSupport)
1975     {
1976         NV_PRINTF(LEVEL_NOTICE,
1977                   "NVRM: Disabling RTD3. [GC6 support=%d GCOFF support=%d]\n",
1978                   bGC6Support, bGCOFFSupport);
1979         return NV_FALSE;
1980     }
1981 
1982     status = pRmApi->Control(pRmApi, pNv->rmapi.hClient, pNv->rmapi.hDevice,
1983                              NV0080_CTRL_CMD_GPU_GET_VIRTUALIZATION_MODE,
1984                              &virtModeParams, sizeof(virtModeParams));
1985 
1986     if (status != NV_OK)
1987     {
1988         NV_PRINTF(LEVEL_ERROR,
1989                   "NVRM: Failed to get Virtualization mode, status=0x%x\n",
1990                   status);
1991         return NV_FALSE;
1992     }
1993 
1994     if ((virtModeParams.virtualizationMode != NV0080_CTRL_GPU_VIRTUALIZATION_MODE_NONE) &&
1995         (virtModeParams.virtualizationMode != NV0080_CTRL_GPU_VIRTUALIZATION_MODE_NMOS))
1996     {
1997         NV_PRINTF(LEVEL_NOTICE, "NVRM: RTD3/GC6 is not supported on VM\n");
1998         return NV_FALSE;
1999     }
2000 
2001     return NV_TRUE;
2002 }
2003 
2004 /*!
2005  * @brief Allocate resources needed to track deferred power management, and
2006  * schedule the initial periodic callbacks.
2007  *
2008  * @param[in]   nv      nv_state_t pointer.
2009  */
2010 void RmInitDeferredDynamicPowerManagement(
2011     nv_state_t *nv
2012 )
2013 {
2014     NV_STATUS  status;
2015     nv_priv_t *nvp  = NV_GET_NV_PRIV(nv);
2016 
2017     // LOCK: acquire GPUs lock
2018     if ((status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER)) == NV_OK)
2019     {
2020         if (nvp->dynamic_power.mode == NV_DYNAMIC_PM_FINE)
2021         {
2022             OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(nv);
2023 
2024             if (!RmCheckRtd3GcxSupport(nv))
2025             {
2026                  nvp->dynamic_power.mode = NV_DYNAMIC_PM_NEVER;
2027                  nvp->dynamic_power.b_fine_not_supported = NV_TRUE;
2028                  NV_PRINTF(LEVEL_NOTICE, "NVRM: RTD3/GC6 is not supported\n");
2029                  goto unlock;
2030             }
2031             osAddGpuDynPwrSupported(gpuGetInstance(pGpu));
2032             nvp->dynamic_power.b_fine_not_supported = NV_FALSE;
2033             status = CreateDynamicPowerCallbacks(pGpu);
2034 
2035             if (status == NV_OK)
2036             {
2037                 RmScheduleCallbackForIdlePreConditionsUnderGpuLock(pGpu);
2038 
2039                 nvp->dynamic_power.deferred_idle_enabled = NV_TRUE;
2040                 // RM's default is GCOFF allow
2041                 nvp->dynamic_power.clients_gcoff_disallow_refcount = 0;
2042             }
2043         }
2044 unlock:
2045         // UNLOCK: release GPUs lock
2046         rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
2047     }
2048 
2049     if (status != NV_OK)
2050        NV_PRINTF(LEVEL_ERROR, "NVRM: Failed to register for dynamic power callbacks\n");
2051 }
2052 
2053 /*!
2054  * @brief Check if GCOFF state can be used for Power Management.
2055  *
2056  * @param[in]   pGpu            OBJGPU pointer.
2057  * @param[in]   usedFbSize      Used FB Allocation size.
2058  * @param[in]   bIsDynamicPM    If true, check for dynamic PM; otherwise, check
2059  *                              for system PM (suspend/resume).
2060  *
2061  * returns NV_TRUE if GCOFF state can be used, otherwise NV_FALSE.
2062  *
2063  */
2064 static NvBool RmCheckForGcOffPM(
2065     OBJGPU *pGpu,
2066     NvU64   usedFbSize,
2067     NvBool  bIsDynamicPM
2068 )
2069 {
2070     nv_state_t *nv  = NV_GET_NV_STATE(pGpu);
2071     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
2072     NvU64       gcoff_max_fb_size;
2073 
2074     if (bIsDynamicPM)
2075     {
2076         /*
2077          * GCOFF won't be enaged for Dynamic PM if X server does not allow.
2078          * The dynamic_power.clients_gcoff_disallow_refcount is being accessed
2079          * without acquiring dynamic power mutex since this value can be
2080          * modified only when GPU is in active state. The os_ref_dynamic_power()
2081          * function will be called to hold a reference to the GPU whenever
2082          * RM is processing osClientGcoffDisallowRefcount() calls. This either
2083          * prevents the GPU from going into a low power state or will wait for
2084          * it to be resumed before proceeding.
2085          */
2086         if (nvp->dynamic_power.clients_gcoff_disallow_refcount != 0)
2087             return NV_FALSE;
2088 
2089         gcoff_max_fb_size = nvp->dynamic_power.gcoff_max_fb_size;
2090     }
2091     else
2092     {
2093         gcoff_max_fb_size = nvp->s0ix_gcoff_max_fb_size;
2094     }
2095 
2096     /*
2097      * GCOFF will be engaged whenever the following necessary preconditions
2098      * are met:
2099      *
2100      * 1. The GCOFF has not been disabled with regkey by setting it to zero.
2101      * 2. Used FB allocation size are within limits.
2102      */
2103     return (gcoff_max_fb_size > 0) &&
2104            (usedFbSize <= gcoff_max_fb_size);
2105 }
2106 
2107 //
2108 // Function to update fixed fbsr modes to support multiple vairants such as
2109 // GCOFF and cuda S3/resume.
2110 //
2111 static void
2112 RmUpdateFixedFbsrModes(OBJGPU *pGpu)
2113 {
2114     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2115     nv_state_t    *nv             = NV_GET_NV_STATE(pGpu);
2116 
2117     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_GCOFF_STATE_ENTERING))
2118     {
2119         pMemoryManager->fixedFbsrModesMask = NVBIT(FBSR_TYPE_DMA);
2120     }
2121     else if (nv->preserve_vidmem_allocations)
2122     {
2123         pMemoryManager->fixedFbsrModesMask = NVBIT(FBSR_TYPE_FILE);
2124     }
2125 }
2126 
2127 static NV_STATUS
2128 RmPowerManagementInternal(
2129     OBJGPU *pGpu,
2130     nv_pm_action_t pmAction
2131 )
2132 {
2133     // default to NV_OK. there may cases where resman is loaded, but
2134     // no devices are allocated (we're still at the console). in these
2135     // cases, it's fine to let the system do whatever it wants.
2136     NV_STATUS rmStatus = NV_OK;
2137 
2138     if (pGpu)
2139     {
2140         nv_state_t *nv = NV_GET_NV_STATE(pGpu);
2141         nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
2142         NvBool bcState = gpumgrGetBcEnabledStatus(pGpu);
2143         Intr *pIntr = GPU_GET_INTR(pGpu);
2144 
2145         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2146         if ((pmAction == NV_PM_ACTION_HIBERNATE) || (pmAction == NV_PM_ACTION_STANDBY))
2147         {
2148             //
2149             // pFb object store the FBSR mode through which FB state unload has happened,
2150             // so os layer doesn't need to set FBSR mode on resume.
2151             //
2152             RmUpdateFixedFbsrModes(pGpu);
2153         }
2154 
2155         switch (pmAction)
2156         {
2157             case NV_PM_ACTION_HIBERNATE:
2158                 nvp->pm_state.InHibernate = NV_TRUE;
2159                 nvp->pm_state.IntrEn = intrGetIntrEn(pIntr);
2160                 intrSetIntrEn(pIntr, INTERRUPT_TYPE_DISABLED);
2161                 gpumgrSetBcEnabledStatus(pGpu, NV_FALSE);
2162 
2163                 rmStatus = gpuEnterHibernate(pGpu);
2164                 gpumgrSetBcEnabledStatus(pGpu, bcState);
2165 
2166                 break;
2167 
2168             case NV_PM_ACTION_STANDBY:
2169                 nvp->pm_state.InHibernate = NV_FALSE;
2170                 nvp->pm_state.IntrEn = intrGetIntrEn(pIntr);
2171                 intrSetIntrEn(pIntr, INTERRUPT_TYPE_DISABLED);
2172                 gpumgrSetBcEnabledStatus(pGpu, NV_FALSE);
2173 
2174                 rmStatus = gpuEnterStandby(pGpu);
2175                 gpumgrSetBcEnabledStatus(pGpu, bcState);
2176 
2177                 break;
2178 
2179             case NV_PM_ACTION_RESUME:
2180                 gpumgrSetBcEnabledStatus(pGpu, NV_FALSE);
2181 
2182                 if (nvp->pm_state.InHibernate)
2183                 {
2184                     gpuResumeFromHibernate(pGpu);
2185                 }
2186                 else
2187                 {
2188                     gpuResumeFromStandby(pGpu);
2189                 }
2190                 intrSetIntrEn(pIntr, nvp->pm_state.IntrEn);
2191                 gpumgrSetBcEnabledStatus(pGpu, bcState);
2192                 NvU32 ac_plugged = 0;
2193 
2194                 if (IsMobile(pGpu))
2195                 {
2196                     if (nv_acpi_get_powersource(&ac_plugged) == NV_OK)
2197                     {
2198                         //
2199                         // As we have already acquired API and GPU lock here, we are
2200                         // directly calling RmPowerSourceChangeEvent.
2201                         //
2202                         RmPowerSourceChangeEvent(nv, !ac_plugged);
2203                     }
2204                     RmRequestDNotifierState(nv);
2205                 }
2206                 break;
2207 
2208             default:
2209                 rmStatus = NV_ERR_INVALID_ARGUMENT;
2210                 break;
2211         }
2212         pMemoryManager->fixedFbsrModesMask = 0;
2213     }
2214 
2215     return rmStatus;
2216 }
2217 
2218 static NV_STATUS
2219 RmPowerManagement(
2220     OBJGPU *pGpu,
2221     nv_pm_action_t pmAction
2222 )
2223 {
2224     NV_STATUS   rmStatus;
2225 
2226     rmStatus = RmPowerManagementInternal(pGpu, pmAction);
2227 
2228     return rmStatus;
2229 }
2230 
2231 /*!
2232  * @brief Call core RM to perform the GCx (GC6/GCOF) entry sequence (before
2233  * powering off the GPU) or exit sequence (after powering the GPU back on).
2234  *
2235  * The GPU lock should be held when calling this function.
2236  *
2237  * @param[in]   pGpu            OBJGPU pointer.
2238  * @param[in]   bEnter          If true, perform entry sequence; exit sequence
2239  *                              otherwise.
2240  * @param[in]   bIsDynamicPM    If true, it is for dynamic PM; otherwise, it is
2241  *                              for system PM (suspend/resume).
2242  *
2243  * @return      NV_STATUS code indicating if the operation was successful.
2244  */
2245 NV_STATUS RmGcxPowerManagement(
2246     OBJGPU *pGpu,
2247     NvBool  bEnter,
2248     NvBool  bIsDynamicPM,
2249     NvBool *bTryAgain
2250 )
2251 {
2252     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
2253     nv_state_t         *nv                  = NV_GET_NV_STATE(pGpu);
2254     NV_STATUS           status              = NV_OK;
2255 
2256     if (pGpu->acpiMethodData.jtMethodData.bSBIOSCaps &&
2257         !pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_MOBILE))
2258     {
2259         //
2260         // AML override is expected only on NB platforms to support GCOFF-1.0(
2261         // which is legacy GCOFF) and GC6-3.0
2262         // GC6 3.0 is possible without AML override but due to changes required
2263         // for GCOFF-1.0 in SBIOS & HW, AML override is needed for GC6-3.0 also.
2264         //
2265         NV_PRINTF(LEVEL_INFO,"NVRM: AML overrides present in Desktop");
2266     }
2267 
2268     nv->d0_state_in_suspend = NV_FALSE;
2269 
2270     if (bEnter)
2271     {
2272         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2273         NvU64          usedFbSize = 0;
2274         NvBool         bCanUseGc6 = NV_FALSE;
2275         NV_STATUS      fbsrStatus = NV_ERR_GENERIC;
2276 
2277         //
2278         // If the GPU supports GC6, then check if GC6 can be used for
2279         // the current power management request.
2280         // 1. For dynamic PM, GC6 can be used if it is supported by the GPU.
2281         // 2. For system PM with s2idle, GC6 can be used if it is
2282         //    supported by the GPU.
2283         //
2284         if (pGpu->getProperty(pGpu, PDB_PROP_GPU_RTD3_GC6_SUPPORTED))
2285         {
2286             bCanUseGc6 = bIsDynamicPM ? NV_TRUE : nv_s2idle_pm_configured();
2287         }
2288 
2289         //
2290         // If GC6 cannot be used, then no need to compare the used FB size with
2291         // threshold value and select GCOFF irrespective of FB size.
2292         //
2293         if ((memmgrGetUsedRamSize(pGpu, pMemoryManager, &usedFbSize) == NV_OK) &&
2294             (!bCanUseGc6 || RmCheckForGcOffPM(pGpu, usedFbSize, bIsDynamicPM)) &&
2295             ((fbsrStatus = fbsrReserveSysMemoryForPowerMgmt(pGpu, pMemoryManager->pFbsr[FBSR_TYPE_DMA],
2296                                              usedFbSize)) == NV_OK))
2297         {
2298             pGpu->setProperty(pGpu, PDB_PROP_GPU_GCOFF_STATE_ENTERING, NV_TRUE);
2299 
2300             //
2301             // Set 'bPreserveComptagBackingStoreOnSuspendDef' so that comptag
2302             // related handling can be done during state unload/load.
2303             //
2304             pKernelMemorySystem->bPreserveComptagBackingStoreOnSuspend = NV_TRUE;
2305             status = RmPowerManagement(pGpu, NV_PM_ACTION_STANDBY);
2306             pGpu->setProperty(pGpu, PDB_PROP_GPU_GCOFF_STATE_ENTERING, NV_FALSE);
2307 
2308             if (status == NV_OK)
2309             {
2310                 pGpu->setProperty(pGpu, PDB_PROP_GPU_GCOFF_STATE_ENTERED, NV_TRUE);
2311             }
2312             else
2313             {
2314                 /*
2315                  * Normally the NV_PM_ACTION_RESUME resume should free the
2316                  * the reserved memory while doing FB state load. But if the
2317                  * failure has happened at very early stage in FB state unload
2318                  * while doing NV_PM_ACTION_STANDBY, then reserved system memory
2319                  * might not be freed.
2320                  */
2321                 fbsrFreeReservedSysMemoryForPowerMgmt(pMemoryManager->pFbsr[FBSR_TYPE_DMA]);
2322                 pKernelMemorySystem->bPreserveComptagBackingStoreOnSuspend = NV_FALSE;
2323             }
2324         }
2325         else if (bCanUseGc6)
2326         {
2327             NV2080_CTRL_GC6_ENTRY_PARAMS entryParams;
2328             portMemSet(&entryParams, 0, sizeof(entryParams));
2329 
2330             entryParams.flavorId = NV2080_CTRL_GC6_FLAVOR_ID_MSHYBRID;
2331             entryParams.stepMask = NVBIT(NV2080_CTRL_GC6_STEP_ID_GPU_OFF);
2332             entryParams.params.bIsRTD3Transition = NV_TRUE;
2333 
2334             //
2335             // Currently if the GPU is not in the Lowest P-State, then the
2336             // GC6 entry will fail in its internal sanity check. The system
2337             // suspend can be triggered in any P-State, so sanity check
2338             // for the lowest P-State needs to be skipped. In this case, during
2339             // Perf state unload time, the forceful P-State switch will
2340             // happen which will bring the GPU into lowest P-State.
2341             //
2342             entryParams.params.bSkipPstateSanity = !bIsDynamicPM;
2343 
2344             status = gpuGc6Entry(pGpu, &entryParams);
2345 
2346             //
2347             // The GC6 state requires SW involvement before each transition
2348             // from D0 to D3cold. In case of system suspend with pass-through
2349             // mode, the VM suspend should happen before host suspend. During
2350             // VM suspend, the NVIDIA driver running in the VM side will be
2351             // involved in D3cold transition, but in the host side, it will
2352             // go through D3cold->D0->D3cold transition and will make the
2353             // second D3cold transition without NVIDIA driver. To handle this
2354             // use-case, during VM suspend, use the D0 state and the host linux
2355             // kernel will put the device into D3cold state.
2356             //
2357             if ((status == NV_OK) && ((nv->flags & NV_FLAG_PASSTHRU) != 0))
2358             {
2359                 nv->d0_state_in_suspend = NV_TRUE;
2360             }
2361         }
2362         //
2363         // The else condition below will hit in the following cases:
2364         // Case 1. During system suspend transition: For systems that support s2idle but are configured
2365         // for deep sleep, this "else" condition will be hit when the system memory
2366         // is not sufficient. In this case, we should unset bTryAgain to abort the current suspend entry.
2367         // Case 2. During runtime suspend transition: For systems that do not support GC6 but support
2368         // GCOFF, this "else" condition will be hit when system memory is not sufficent, In this case, we
2369         // should set bTryagain so that the kernel can reschedule the callback later.
2370         //
2371         else
2372         {
2373             if (bIsDynamicPM)
2374             {
2375                 if (fbsrStatus == NV_ERR_NO_MEMORY)
2376                 {
2377                     *bTryAgain = NV_TRUE;
2378                 }
2379                 status = fbsrStatus;
2380             }
2381             else
2382             {
2383                 status = NV_ERR_NOT_SUPPORTED;
2384                 NV_PRINTF(LEVEL_ERROR,
2385                           "NVRM: %s: System suspend failed with current system suspend configuration. "
2386                           "Please change the system suspend configuration to s2idle in /sys/power/mem_sleep.\n",
2387                           __FUNCTION__);
2388             }
2389         }
2390     }
2391     else
2392     {
2393         if (pGpu->getProperty(pGpu, PDB_PROP_GPU_GCOFF_STATE_ENTERED))
2394         {
2395             status = RmPowerManagement(pGpu, NV_PM_ACTION_RESUME);
2396             pGpu->setProperty(pGpu, PDB_PROP_GPU_GCOFF_STATE_ENTERED, NV_FALSE);
2397             pKernelMemorySystem->bPreserveComptagBackingStoreOnSuspend = NV_FALSE;
2398         }
2399         else
2400         {
2401             NV2080_CTRL_GC6_EXIT_PARAMS exitParams;
2402             portMemSet(&exitParams, 0, sizeof(exitParams));
2403 
2404             exitParams.flavorId = NV2080_CTRL_GC6_FLAVOR_ID_MSHYBRID;
2405             exitParams.params.bIsRTD3Transition = NV_TRUE;
2406 
2407             status = gpuGc6Exit(pGpu, &exitParams);
2408         }
2409     }
2410 
2411     return status;
2412 }
2413 
2414 NV_STATUS NV_API_CALL rm_power_management(
2415     nvidia_stack_t *sp,
2416     nv_state_t *pNv,
2417     nv_pm_action_t pmAction
2418 )
2419 {
2420     THREAD_STATE_NODE threadState;
2421     NV_STATUS rmStatus = NV_OK;
2422     void *fp;
2423     NvBool bTryAgain = NV_FALSE;
2424 
2425     NV_ENTER_RM_RUNTIME(sp,fp);
2426     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
2427 
2428     NV_ASSERT_OK(os_flush_work_queue(pNv->queue));
2429 
2430     // LOCK: acquire API lock
2431     if ((rmStatus = rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER)) == NV_OK)
2432     {
2433         OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(pNv);
2434 
2435         if (pGpu != NULL)
2436         {
2437             if ((rmStatus = os_ref_dynamic_power(pNv, NV_DYNAMIC_PM_FINE)) == NV_OK)
2438             {
2439                 // LOCK: acquire GPUs lock
2440                 if ((rmStatus = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER)) == NV_OK)
2441                 {
2442                     nv_priv_t *nvp = NV_GET_NV_PRIV(pNv);
2443 
2444                     //
2445                     // Before going to S3 or S4, remove idle holdoff which was
2446                     // applied during gc6 exit.
2447                     //
2448                     if ((pmAction != NV_PM_ACTION_RESUME) &&
2449                         (nvp->dynamic_power.b_idle_holdoff == NV_TRUE))
2450                     {
2451                         nv_indicate_idle(pNv);
2452                         RmCancelCallbackToRemoveIdleHoldoff(pGpu);
2453                         nvp->dynamic_power.b_idle_holdoff = NV_FALSE;
2454                     }
2455 
2456                     //
2457                     // Use GCx (GCOFF/GC6) power management if S0ix-based PM is
2458                     // enabled and the request is for system suspend/resume.
2459                     // Otherwise, use the existing mechanism.
2460                     //
2461                     if (nvp->s0ix_pm_enabled &&
2462                         (pmAction == NV_PM_ACTION_STANDBY ||
2463                         (pmAction == NV_PM_ACTION_RESUME &&
2464                          !nvp->pm_state.InHibernate)))
2465                     {
2466                         rmStatus = RmGcxPowerManagement(pGpu,
2467                                         pmAction == NV_PM_ACTION_STANDBY,
2468                                         NV_FALSE, &bTryAgain);
2469 
2470                     }
2471                     else
2472                     {
2473                         rmStatus = RmPowerManagement(pGpu, pmAction);
2474                     }
2475 
2476                     // UNLOCK: release GPUs lock
2477                     rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
2478                 }
2479                 os_unref_dynamic_power(pNv, NV_DYNAMIC_PM_FINE);
2480             }
2481         }
2482         // UNLOCK: release API lock
2483         rmapiLockRelease();
2484     }
2485 
2486     NV_ASSERT_OK(os_flush_work_queue(pNv->queue));
2487 
2488     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
2489     NV_EXIT_RM_RUNTIME(sp,fp);
2490 
2491     return rmStatus;
2492 }
2493 
2494 /*!
2495  * @brief Wrapper around RmGcxPowerManagement() and removes the Idle holdoff
2496  * during exit sequence.
2497  *
2498  * The GPU lock should be held when calling this function.
2499  *
2500  * @param[in]   pGpu    OBJGPU pointer.
2501  * @param[in]   bEnter  If true, perform entry sequence; exit sequence
2502  *                      otherwise.
2503  *
2504  * @return      NV_STATUS code indicating if the operation was successful.
2505  */
2506 static NV_STATUS RmTransitionDynamicPower(
2507     OBJGPU *pGpu,
2508     NvBool  bEnter,
2509     NvBool *bTryAgain
2510 )
2511 {
2512     nv_state_t *nv   = NV_GET_NV_STATE(pGpu);
2513     NV_STATUS   status;
2514 
2515     status = RmGcxPowerManagement(pGpu, bEnter, NV_TRUE, bTryAgain);
2516 
2517     if (!bEnter && status == NV_OK)
2518     {
2519         nv_idle_holdoff(nv);
2520         RmScheduleCallbackToRemoveIdleHoldoff(pGpu);
2521     }
2522 
2523     return status;
2524 }
2525 
2526 /*!
2527  * @brief Wrapper around RmTransitionDynamicPower() which sets up the RM
2528  * runtime (thread state and altstack), and takes the GPU lock for the duration
2529  * of the operation.
2530  *
2531  * @param[in]   sp      nvidia_stack_t pointer.
2532  * @param[in]   nv      nv_state_t pointer.
2533  * @param[in]   bEnter  If true, perform entry sequence; exit sequence
2534  *                      otherwise.
2535  *
2536  * @return      NV_STATUS code indicating if the operation was successful.
2537  */
2538 NV_STATUS NV_API_CALL rm_transition_dynamic_power(
2539     nvidia_stack_t *sp,
2540     nv_state_t     *nv,
2541     NvBool          bEnter,
2542     NvBool         *bTryAgain
2543 )
2544 {
2545     OBJGPU             *pGpu = NV_GET_NV_PRIV_PGPU(nv);
2546     NV_STATUS           status = NV_OK;
2547     THREAD_STATE_NODE   threadState;
2548     void               *fp;
2549 
2550     NV_ENTER_RM_RUNTIME(sp,fp);
2551 
2552     threadStateInit(&threadState, THREAD_STATE_FLAGS_NONE);
2553 
2554     // LOCK: acquire GPUs lock
2555     status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER);
2556     if (status == NV_OK)
2557     {
2558         status = RmTransitionDynamicPower(pGpu, bEnter, bTryAgain);
2559 
2560         // UNLOCK: release GPUs lock
2561         rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
2562     }
2563 
2564     threadStateFree(&threadState, THREAD_STATE_FLAGS_NONE);
2565 
2566     NV_EXIT_RM_RUNTIME(sp,fp);
2567 
2568     return status;
2569 }
2570 
2571 /*!
2572  * @brief: Notify client about hot-plugged/unplugged displays.
2573  * Called after GPU exits GC6 because of display hot-plug/unplug.
2574  *
2575  * @param[in]   nv    nv_state_t pointer.
2576  */
2577 static void RmNotifyClientAboutHotplug(
2578     nv_state_t *nv
2579 )
2580 {
2581     NV_STATUS status;
2582     GPU_MASK gpuMask;
2583     OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(nv);
2584 
2585     Nv2080HotplugNotification hotplugNotificationParams;
2586 
2587     portMemSet(&hotplugNotificationParams, 0, sizeof(hotplugNotificationParams));
2588 
2589     // LOCK: acquire per device lock
2590     status = rmGpuGroupLockAcquire(pGpu->gpuInstance, GPU_LOCK_GRP_SUBDEVICE,
2591                                    GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_HOTPLUG,
2592                                    &gpuMask);
2593     if (status == NV_OK)
2594     {
2595         gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_HOTPLUG,
2596                                 &hotplugNotificationParams,
2597                                 sizeof(hotplugNotificationParams), 0, 0);
2598         // UNLOCK: release per device lock
2599         rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
2600     }
2601 }
2602 
2603 /*!
2604  * @brief If GPU is in GC6, exit GC6 state and
2605  * notify client about display hot-plug/unplug.
2606  *
2607  * @param[in]   sp    nvidia_stack_t pointer.
2608  * @param[in]   nv    nv_state_t pointer.
2609  */
2610 void RmHandleDisplayChange(
2611     nvidia_stack_t *sp,
2612     nv_state_t *nv
2613 )
2614 {
2615     OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(nv);
2616 
2617     if (pGpu &&
2618         (IS_GPU_GC6_STATE_ENTERED(pGpu) ||
2619          pGpu->getProperty(pGpu, PDB_PROP_GPU_GCOFF_STATE_ENTERED)))
2620     {
2621         if (rm_ref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE) == NV_OK)
2622         {
2623             RmNotifyClientAboutHotplug(nv);
2624             rm_unref_dynamic_power(sp, nv, NV_DYNAMIC_PM_FINE);
2625         }
2626         else
2627             NV_PRINTF(LEVEL_ERROR,
2628                       "NVRM: %s: Failed to increment dynamic power refcount\n",
2629                       __FUNCTION__);
2630     }
2631 }
2632 
2633 /*!
2634  * @brief: Function to query Dynamic Power Management
2635  *
2636  * @param[in]   sp     nvidia_stack_t pointer.
2637  * @param[in]   pNv    nv_state_t pointer.
2638  *
2639  * @return      String indicating Dynamic Power Management status.
2640  */
2641 const char* NV_API_CALL rm_get_dynamic_power_management_status(
2642     nvidia_stack_t *sp,
2643     nv_state_t     *pNv
2644 )
2645 {
2646     void              *fp;
2647     nv_priv_t         *nvp = NV_GET_NV_PRIV(pNv);
2648     const char        *returnString = "?";
2649 
2650     NV_ENTER_RM_RUNTIME(sp,fp);
2651 
2652     // LOCK: acquire API lock
2653     if ((rmapiLockAcquire(API_LOCK_FLAGS_NONE, RM_LOCK_MODULES_DYN_POWER)) == NV_OK)
2654     {
2655         OBJGPU *pGpu = NV_GET_NV_PRIV_PGPU(pNv);
2656 
2657         // Check if RM is inited
2658         if (pGpu != NULL)
2659         {
2660            /*
2661             * If the platform/driver does not support Dynamic Power Management,
2662             * we set mode as NV_DYNAMIC_PM_NEVER. Hence, after RmInit if the
2663             * mode is still NV_DYNAMIC_PM_FINE, we are sure that it is
2664             * supported and enabled. Also see NOTE.
2665             */
2666             if (nvp->dynamic_power.mode == NV_DYNAMIC_PM_FINE)
2667             {
2668                 returnString = "Enabled (fine-grained)";
2669             }
2670             else if (nvp->dynamic_power.mode == NV_DYNAMIC_PM_COARSE)
2671             {
2672                 returnString = "Enabled (coarse-grained)";
2673             }
2674             else if (nvp->dynamic_power.mode == NV_DYNAMIC_PM_NEVER)
2675             {
2676 
2677                 if (nvp->dynamic_power.dynamic_power_regkey ==
2678                     NV_REG_DYNAMIC_POWER_MANAGEMENT_NEVER)
2679                     returnString = "Disabled";
2680                 else if (nvp->dynamic_power.dynamic_power_regkey ==
2681                          NV_REG_DYNAMIC_POWER_MANAGEMENT_DEFAULT)
2682                     returnString = "Disabled by default";
2683                 else
2684                     returnString = "Not supported";
2685             }
2686         }
2687         //UNLOCK: release API lock
2688         rmapiLockRelease();
2689     }
2690 
2691     NV_EXIT_RM_RUNTIME(sp,fp);
2692 
2693     return returnString;
2694 }
2695 
2696 static void RmHandleIdleSustained(
2697     NvU32 gpuInstance,
2698     void *pArgs
2699 )
2700 {
2701     OBJGPU     *pGpu = gpumgrGetGpu(gpuInstance);
2702     nv_state_t *nv   = NV_GET_NV_STATE(pGpu);
2703     nv_priv_t  *nvp  = NV_GET_NV_PRIV(nv);
2704 
2705     nv_revoke_gpu_mappings(NV_GET_NV_STATE(pGpu));
2706     RmScheduleCallbackToIndicateIdle(pGpu);
2707     nvp->dynamic_power.b_idle_sustained_workitem_queued = NV_FALSE;
2708 }
2709 
2710 /*
2711  * Queue a workitem for revoking GPU mappings, and scheduling a callback to indicate idle.
2712  */
2713 static void RmQueueIdleSustainedWorkitem(
2714     OBJGPU *pGpu
2715 )
2716 {
2717     nv_state_t *nv   = NV_GET_NV_STATE(pGpu);
2718     nv_priv_t  *nvp  = NV_GET_NV_PRIV(nv);
2719     NV_STATUS status = NV_OK;
2720 
2721     if (!nvp->dynamic_power.b_idle_sustained_workitem_queued)
2722     {
2723         status = osQueueWorkItemWithFlags(pGpu,
2724                                           RmHandleIdleSustained,
2725                                           NULL,
2726                                           OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_SUBDEVICE_RW);
2727         if (status != NV_OK)
2728         {
2729             NV_PRINTF(LEVEL_WARNING,
2730                       "NVRM: %s: Failed to queue RmHandleIdleSustained() workitem.\n",
2731                       __FUNCTION__);
2732             RmScheduleCallbackForIdlePreConditionsUnderGpuLock(pGpu);
2733             return;
2734         }
2735         nvp->dynamic_power.b_idle_sustained_workitem_queued = NV_TRUE;
2736     }
2737 }
2738 
2739 /*
2740  * Allocate resources needed for S0ix-based system power management.
2741  */
2742 void
2743 RmInitS0ixPowerManagement(
2744     nv_state_t *nv
2745 )
2746 {
2747     nv_priv_t  *nvp = NV_GET_NV_PRIV(nv);
2748     NvU32       data;
2749     NvBool      bRtd3Gc6Support = NV_FALSE;
2750 
2751     // S0ix-based S2Idle, on desktops, is not supported yet. Return early for desktop SKUs
2752     if (!nvp->b_mobile_config_enabled)
2753     {
2754         return;
2755     }
2756 
2757     // LOCK: acquire GPUs lock
2758     if (rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_INIT) == NV_OK)
2759     {
2760         bRtd3Gc6Support = RmCheckRtd3GcxSupport(nv);
2761 
2762         // UNLOCK: release GPUs lock
2763         rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
2764     }
2765 
2766     /*
2767      * The GPU S0ix-based system power management will be enabled
2768      * only if all the following necessary requirements are met:
2769      *
2770      * 1. The GPU has RTD3 support.
2771      * 2. The platform has support for s0ix.
2772      * 3. Feature regkey EnableS0ixPowerManagement is enabled.
2773      */
2774     if (bRtd3Gc6Support &&
2775         nv_platform_supports_s0ix() &&
2776         (osReadRegistryDword(NULL, NV_REG_ENABLE_S0IX_POWER_MANAGEMENT,
2777                              &data) == NV_OK) && (data == 1))
2778     {
2779         nvp->s0ix_pm_enabled = NV_TRUE;
2780 
2781         /*
2782          * Read the OS registry for Maximum FB size for S0ix-based PM
2783          * which will be expressed in Megabytes (1048576 bytes) and
2784          * convert it into bytes.
2785          */
2786         if ((osReadRegistryDword(NULL,
2787                                  NV_REG_S0IX_POWER_MANAGEMENT_VIDEO_MEMORY_THRESHOLD,
2788                                  &data) == NV_OK))
2789         {
2790             nvp->s0ix_gcoff_max_fb_size = (NvU64)data * 1024 * 1024;
2791         }
2792     }
2793 }
2794