1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "kernel/core/core.h"
25 #include "kernel/core/locks.h"
26 #include "gpu/subdevice/subdevice.h"
27 #include "kernel/gpu/mem_mgr/heap.h"
28 #include "kernel/gpu/mem_mgr/mem_mgr.h"
29 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
30 #include "kernel/gpu/rc/kernel_rc.h"
31 #include "kernel/gpu/bif/kernel_bif.h"
32 #include "kernel/os/os.h"
33 
34 #include "class/cl0000.h" // NV01_NULL_OBJECT
35 #include "class/cl0002.h" // NV01_CONTEXT_DMA
36 #include "class/cl003e.h" // NV01_MEMORY_SYSTEM
37 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
38 #include "class/cl0070.h" // NV01_MEMORY_VIRTUAL
39 #include "class/cl0080.h" // NV01_DEVICE_0
40 #include "class/cl2080.h" // NV20_SUBDEVICE_0
41 #include "class/cl902d.h" // FERMI_TWOD_A
42 #include "class/cl906f.h" // GF100_CHANNEL_GPFIFO
43 #include "class/cla06f.h" // KEPLER_CHANNEL_GPFIFO_A
44 #include "class/cla06fsubch.h"
45 #include "class/cla16f.h" // KEPLER_CHANNEL_GPFIFO_B
46 #include "class/clb06f.h" // MAXWELL_CHANNEL_GPFIFO_A
47 #include "class/clc06f.h" // PASCAL_CHANNEL_GPFIFO_A
48 #include "class/clc36f.h" // VOLTA_CHANNEL_GPFIFO_A
49 #include "class/clc46f.h" // TURING_CHANNEL_GPFIFO_A
50 #include "class/clc56f.h" // AMPERE_CHANNEL_GPFIFO_A
51 #include "class/clc86f.h" // HOPPER_CHANNEL_GPFIFO_A
52 
53 #include "deprecated/rmapi_deprecated.h"
54 #include "nvRmReg.h"
55 
56 
57 //
58 // Watchdog object ids
59 //
60 #define WATCHDOG_PUSHBUFFER_CHANNEL_ID 0x31415900
61 #define WATCHDOG_NOTIFIER_DMA_ID       (WATCHDOG_PUSHBUFFER_CHANNEL_ID + 2)
62 #define WATCHDOG_DEVICE_ID             (WATCHDOG_PUSHBUFFER_CHANNEL_ID + 3)
63 #define WATCHDOG_SUB_DEVICE_0_ID       (WATCHDOG_PUSHBUFFER_CHANNEL_ID + 4)
64 #define WATCHDOG_GROBJ_ID              (WATCHDOG_SUB_DEVICE_0_ID + NV_MAX_SUBDEVICES)
65 #define WATCHDOG_ERROR_DMA_ID          (WATCHDOG_GROBJ_ID + 1)
66 #define WATCHDOG_MEM_ID                (WATCHDOG_GROBJ_ID + 2)
67 #define WATCHDOG_VIRTUAL_CTX_ID        (WATCHDOG_GROBJ_ID + 3)
68 #define WATCHDOG_USERD_PHYS_MEM_ID     (WATCHDOG_GROBJ_ID + 4)
69 
70 // Push buffer size in dwords
71 #define WATCHDOG_PUSHBUF_SIZE 128
72 
73 // Default watchdog pushbuffer size (if no PERF engine)
74 #define WATCHDOG_PB_SIZE_DEFAULT 0xC000
75 
76 #define WATCHDOG_PUSHBUFFERS        2
77 #define WATCHDOG_GPFIFO_ENTRIES     4
78 #define WATCHDOG_GRAPHICS_NOTIFIERS 3
79 
80 #define GPFIFO_ALIGN   NV906F_GP_ENTRY__SIZE
81 #define NOTIFIER_ALIGN 16
82 
83 #define WATCHDOG_GPFIFO_OFFSET(pbBytes)                        \
84     ((((pbBytes)*WATCHDOG_PUSHBUFFERS) + (GPFIFO_ALIGN - 1)) & \
85      ~(GPFIFO_ALIGN - 1))
86 
87 #define WATCHDOG_BEGINNING_NOTIFIER_OFFSET(pbBytes)         \
88     (((WATCHDOG_GPFIFO_OFFSET(pbBytes) +                    \
89        (WATCHDOG_GPFIFO_ENTRIES * NV906F_GP_ENTRY__SIZE)) + \
90       (NOTIFIER_ALIGN - 1)) &                               \
91      ~(NOTIFIER_ALIGN - 1))
92 
93 #define WATCHDOG_ERROR_NOTIFIER_OFFSET(pbBytes) \
94     (WATCHDOG_BEGINNING_NOTIFIER_OFFSET(pbBytes))
95 
96 #define WATCHDOG_NOTIFIER_OFFSET(pbBytes, gpuIndex, notifier)                \
97     (WATCHDOG_BEGINNING_NOTIFIER_OFFSET(pbBytes) +                           \
98      (sizeof(NvNotification) * NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1) + \
99      ((gpuIndex) * sizeof(NvNotification) * WATCHDOG_GRAPHICS_NOTIFIERS) +   \
100      (sizeof(NvNotification) * (notifier)))
101 
102 #define WATCHDOG_WORK_SUBMIT_TOKEN_OFFSET(pbBytes)          \
103     ((WATCHDOG_BEGINNING_NOTIFIER_OFFSET(pbBytes)) +        \
104      NV_CHANNELGPFIFO_NOTIFICATION_TYPE_WORK_SUBMIT_TOKEN * \
105          sizeof(NvNotification))
106 
107 #define WATCHDOG_PUSHBUFFER_OFFSET(pbBytes, pbnum) ((pbBytes) * (pbnum))
108 
109 #define SUBDEVICE_MASK_ALL DRF_MASK(NV906F_DMA_SET_SUBDEVICE_MASK_VALUE)
110 
111 
112 NV_STATUS
113 krcWatchdogChangeState_IMPL
114 (
115     KernelRc  *pKernelRc,
116     Subdevice *pSubdevice,
117     RC_CHANGE_WATCHDOG_STATE_OPERATION_TYPE operation
118 )
119 {
120     //
121     // Provide automatic management of RC watchdog enabling and disabling.
122     // Provide for cooperation between RM clients, and allow for independent
123     // behavior or multiple client and multiple GPUs.
124     //
125     // RM clients can use the NV2080_CTRL_CMD_RC_ENABLE_WATCHDOG and related API
126     // calls to request enabling or disabling of the RM watchdog, per GPU.
127     // Whether or not the watchdog is actually enabled or disabled, however,
128     // depends upon whether or not other, conflicting requests are already in
129     // force.
130     //
131     // Some background as to how this is normally used:
132     //
133     // -- Normally, some clients (such as X) wants the watchdog running.
134     // -- Normally, CUDA wants the watchdog disabled.
135     // -- When the RM initializes, it sets the watchog to disabled.
136     // -- X will normally tell the RM, for each GPU that it manages, to enable
137     //    the watchdog.
138     // -- Each CUDA client normally will tell the RM, for each GPU that it
139     //    manages, to disable the watchdog.
140     // -- X will have options that provide for either *not* enabling the
141     //    watchdog, or at least, not blocking another client from disabling the
142     //    watchdog.
143     // -- Likewise, CUDA will have an option that provides for either enabling
144     //    the watchdog, or at least, not blocking another client from enabling
145     //    the watchdog.
146     //
147     // The watchdog is not allowed to transition directly between ENABLED and
148     // DISABLED states. It must go through a "don't care" state, in between:
149     //
150     // ENABLED <--> DON'T-CARE <--> DISABLED
151     //
152     // Each of the three states may be reached with an associated RM API call:
153     //
154     // NV2080_CTRL_CMD_RC_ENABLE_WATCHDOG:             ENABLED state
155     // NV2080_CTRL_CMD_RC_DISABLE_WATCHDOG:            DISABLED state
156     // NV2080_CTRL_CMD_RC_RELEASE_WATCHDOG_REQUESTS:   DON'T-CARE state
157     //
158     // In addition, RM client destruction leads directly to the DON'T-CARE
159     // state. This allows good behavior and cooperation between possibly
160     // conflicting RM clients.
161     //
162     // Basic operation:
163     //
164     // ENABLE requests: Increment enableRequestsRefCount, disallow disable
165     // operations from any client,  but *allow* additional enable operations
166     // from any client.
167     //
168     // DISABLE requests: Increment disableRequestsRefCount, disallow enable
169     // operations from any client,  but *allow* additional disable operations
170     // from any client.
171     //
172     // CLIENT DESTRUCTION requests: Decrement the enableRequestsRefCount if the
173     // client had an existing ENABLE request when it was destroyed. Reduce the
174     // disableRequestsRefCount if the client had an existing DISABLE request
175     // when it was destroyed.
176     //
177     // RELEASE requests: Possibly reduce the refCount, just as if the client had
178     // been destroyed. This is convenenient for client such as MODS, that tend
179     // to make multiple calls to enable and disable the watchdog, within the
180     // lifetime of a single RM client.
181     //
182     //
183     NvBool bCurrentEnableRequest      = NV_FALSE;
184     NvBool bCurrentDisableRequest     = NV_FALSE;
185     NvBool bCurrentSoftDisableRequest = NV_FALSE;
186     NvS32  prevEnableRefCount      = pKernelRc->watchdogPersistent.enableRequestsRefCount;
187     NvS32  prevDisableRefCount     = pKernelRc->watchdogPersistent.disableRequestsRefCount;
188     NvS32  prevSoftDisableRefCount = pKernelRc->watchdogPersistent.softDisableRequestsRefCount;
189     NvBool bPrevEnableRequest      = pSubdevice->bRcWatchdogEnableRequested;
190     NvBool bPrevDisableRequest     = pSubdevice->bRcWatchdogDisableRequested;
191     NvBool bPrevSoftDisableRequest = pSubdevice->bRcWatchdogSoftDisableRequested;
192     OBJGPU *pGpu = ENG_GET_GPU(pKernelRc);
193     const char *opstring;
194 
195     switch (operation)
196     {
197         case RMAPI_ENABLE_REQUEST:
198             bCurrentEnableRequest      = NV_TRUE;
199             bCurrentDisableRequest     = NV_FALSE;
200             bCurrentSoftDisableRequest = NV_FALSE;
201             opstring = "enable watchdog";
202             break;
203 
204         case RMAPI_SOFT_DISABLE_REQUEST:
205             bCurrentEnableRequest      = NV_FALSE;
206             bCurrentDisableRequest     = NV_FALSE;
207             bCurrentSoftDisableRequest = NV_TRUE;
208             opstring = "soft disable watchdog";
209             break;
210 
211         case RMAPI_DISABLE_REQUEST:
212             bCurrentEnableRequest      = NV_FALSE;
213             bCurrentDisableRequest     = NV_TRUE;
214             bCurrentSoftDisableRequest = NV_FALSE;
215             opstring = "disable watchdog";
216             break;
217 
218         case RMAPI_RELEASE_ALL_REQUESTS:
219             bCurrentEnableRequest      = NV_FALSE;
220             bCurrentDisableRequest     = NV_FALSE;
221             bCurrentSoftDisableRequest = NV_FALSE;
222             opstring = "release all requests";
223             break;
224 
225         case RM_CLIENT_DESTRUCTION:
226             bCurrentEnableRequest      = NV_FALSE;
227             bCurrentDisableRequest     = NV_FALSE;
228             bCurrentSoftDisableRequest = NV_FALSE;
229             opstring = "destroy RM client";
230             break;
231 
232         default:
233             NV_ASSERT(0);
234             return NV_ERR_INVALID_ARGUMENT;
235             break;
236     }
237     // -Wunused-but-set-variable nonsense if NV_PRINTF is compiled out
238     (void)opstring;
239 
240 
241     //
242     // Step 1: check for conflicting requests, and bail out without changing
243     // client state or watchdog state, if there are any such conflicts. We don't
244     // consider the soft disable requests for conflicts, since they won't be
245     // applied anyway, but we do still want them to be counted for when the
246     // conflicting request is released - we'll fall back to the soft-disabled
247     // state then.
248     //
249     if ((pKernelRc->watchdogPersistent.disableRequestsRefCount != 0 &&
250          bCurrentEnableRequest) ||
251         (pKernelRc->watchdogPersistent.enableRequestsRefCount != 0 &&
252          bCurrentDisableRequest))
253     {
254         NV_PRINTF(LEVEL_ERROR,
255             "Cannot %s on GPU 0x%x, due to another client's request\n"
256             "(Enable requests: %d, Disable requests: %d)\n",
257             opstring,
258             pGpu->gpuId,
259             pKernelRc->watchdogPersistent.enableRequestsRefCount,
260             pKernelRc->watchdogPersistent.disableRequestsRefCount);
261 
262         return NV_ERR_STATE_IN_USE;
263     }
264 
265     NV_PRINTF(LEVEL_INFO,
266         "(before) op: %s, GPU 0x%x, enableRefCt: %d, disableRefCt: %d, softDisableRefCt: %d, WDflags: 0x%x\n",
267         opstring,
268         pGpu->gpuId,
269         pKernelRc->watchdogPersistent.enableRequestsRefCount,
270         pKernelRc->watchdogPersistent.disableRequestsRefCount,
271         pKernelRc->watchdogPersistent.softDisableRequestsRefCount,
272         pKernelRc->watchdog.flags);
273 
274     // Step 2: if client state has changed, adjust the per-GPU/RC refcount:
275     if (!bPrevEnableRequest && bCurrentEnableRequest)
276     {
277         ++pKernelRc->watchdogPersistent.enableRequestsRefCount;
278     }
279     else if (bPrevEnableRequest && !bCurrentEnableRequest)
280     {
281         --pKernelRc->watchdogPersistent.enableRequestsRefCount;
282     }
283 
284     if (!bPrevDisableRequest && bCurrentDisableRequest)
285     {
286         ++pKernelRc->watchdogPersistent.disableRequestsRefCount;
287     }
288     else if (bPrevDisableRequest && !bCurrentDisableRequest)
289     {
290         --pKernelRc->watchdogPersistent.disableRequestsRefCount;
291     }
292 
293     if (!bPrevSoftDisableRequest && bCurrentSoftDisableRequest)
294     {
295         ++pKernelRc->watchdogPersistent.softDisableRequestsRefCount;
296     }
297     else if (bPrevSoftDisableRequest && !bCurrentSoftDisableRequest)
298     {
299         --pKernelRc->watchdogPersistent.softDisableRequestsRefCount;
300     }
301 
302     // Step 3: record client state:
303     pSubdevice->bRcWatchdogEnableRequested      = bCurrentEnableRequest;
304     pSubdevice->bRcWatchdogDisableRequested     = bCurrentDisableRequest;
305     pSubdevice->bRcWatchdogSoftDisableRequested = bCurrentSoftDisableRequest;
306 
307     //
308     // Step 4: if per-GPU/RC refcount has changed from 0 to 1, then change the
309     // watchdog state:
310     //
311     if (pKernelRc->watchdogPersistent.enableRequestsRefCount == 1 &&
312         prevEnableRefCount == 0 &&
313         pKernelRc->watchdogPersistent.disableRequestsRefCount == 0)
314     {
315         // Enable the watchdog:
316         krcWatchdogEnable(pKernelRc, NV_FALSE /* bOverRide */);
317     }
318     else if (pKernelRc->watchdogPersistent.disableRequestsRefCount == 1 &&
319              prevDisableRefCount == 0 &&
320              pKernelRc->watchdogPersistent.enableRequestsRefCount == 0)
321     {
322         // Disable the watchdog:
323         krcWatchdogDisable(pKernelRc);
324     }
325     else if ((pKernelRc->watchdogPersistent.enableRequestsRefCount == 0) &&
326              (pKernelRc->watchdogPersistent.disableRequestsRefCount == 0) &&
327              ((prevEnableRefCount > 0) || (prevSoftDisableRefCount == 0)) &&
328              (pKernelRc->watchdogPersistent.softDisableRequestsRefCount > 0))
329     {
330         //
331         // Go back to disabled if all of the below are true:
332         //  (1) there are no outstanding enable or disable requests,
333         //  (2) the change is the release of the last enable request OR
334         //      there were previously no soft disable requests
335         //  (3) there are now one or more outstanding soft disable requests
336         //      (including the one currently being refcounted.
337         //
338         krcWatchdogDisable(pKernelRc);
339     }
340 
341     NV_PRINTF(LEVEL_INFO,
342         "(after) op: %s, GPU 0x%x, enableRefCt: %d, disableRefCt: %d, softDisableRefCt: %d, WDflags: 0x%x\n",
343         opstring,
344         pGpu->gpuId,
345         pKernelRc->watchdogPersistent.enableRequestsRefCount,
346         pKernelRc->watchdogPersistent.disableRequestsRefCount,
347         pKernelRc->watchdogPersistent.softDisableRequestsRefCount,
348         pKernelRc->watchdog.flags);
349 
350     return NV_OK;
351 }
352 
353 
354 void
355 krcWatchdogDisable_IMPL
356 (
357     KernelRc *pKernelRc
358 )
359 {
360     pKernelRc->watchdog.flags |= WATCHDOG_FLAGS_DISABLED;
361 }
362 
363 
364 void
365 krcWatchdogEnable_IMPL
366 (
367     KernelRc *pKernelRc,
368     NvBool    bOverRide
369 )
370 {
371     //
372     // Make sure no operations are pending from before
373     // if bOverRide is NV_TRUE then we are enabling from a modeswitch
374     //
375     if (bOverRide)
376         pKernelRc->watchdog.deviceResetRd = pKernelRc->watchdog.deviceResetWr;
377 
378     pKernelRc->watchdog.flags &= ~WATCHDOG_FLAGS_DISABLED;
379 }
380 
381 
382 NV_STATUS
383 krcWatchdogShutdown_IMPL
384 (
385     OBJGPU   *pGpu,
386     KernelRc *pKernelRc
387 )
388 {
389     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
390 
391     if (!(pKernelRc->watchdog.flags & WATCHDOG_FLAGS_INITIALIZED))
392         return NV_OK;
393 
394     krcWatchdogDisable(pKernelRc);
395     osRemove1SecondRepeatingCallback(pGpu,
396                                      krcWatchdogTimerProc,
397                                      NULL /* pData */);
398 
399     // This should free the client and all associated resources
400     pRmApi->Free(pRmApi,
401                  pKernelRc->watchdog.hClient,
402                  pKernelRc->watchdog.hClient);
403 
404     //
405     // Make sure to clear any old watchdog data this also clears
406     // WATCHDOG_FLAGS_INITIALIZED
407     //
408     portMemSet(&pKernelRc->watchdog, 0, sizeof pKernelRc->watchdog);
409     portMemSet(&pKernelRc->watchdogChannelInfo, 0,
410                sizeof pKernelRc->watchdogChannelInfo);
411 
412     return NV_OK;
413 }
414 
415 
416 void krcWatchdogGetReservationCounts_IMPL
417 (
418     KernelRc *pKernelRc,
419     NvS32    *pEnable,
420     NvS32    *pDisable,
421     NvS32    *pSoftDisable
422 )
423 {
424     if (pEnable != NULL)
425         *pEnable = pKernelRc->watchdogPersistent.enableRequestsRefCount;
426 
427     if (pDisable != NULL)
428         *pDisable = pKernelRc->watchdogPersistent.disableRequestsRefCount;
429 
430     if (pSoftDisable != NULL)
431         *pSoftDisable = pKernelRc->watchdogPersistent .softDisableRequestsRefCount;
432 }
433 
434 
435 NV_STATUS
436 krcWatchdogInit_IMPL
437 (
438     OBJGPU   *pGpu,
439     KernelRc *pKernelRc
440 )
441 {
442     NvHandle        hClient;
443     NvU32           subDeviceInstance;
444     NvU32           grObj;
445     NvU32           gpfifoObj;
446     NvU32           pushBufBytes;
447     NvU32           allocationSize;
448     NvU32           ctrlSize;
449     NV_STATUS       status;
450     RsClient       *pClient;
451     KernelChannel  *pKernelChannel;
452     NvBool          bCacheSnoop;
453     RM_API         *pRmApi = rmGpuLockIsOwner() ?
454                              rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL) :
455                              rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
456     NvBool          bClientUserd = IsVOLTAorBetter(pGpu);
457     NvBool          bAcquireLock = NV_FALSE;
458 
459     union
460     {
461         NV0080_ALLOC_PARAMETERS                nv0080;
462         NV2080_ALLOC_PARAMETERS                nv2080;
463         NV_CHANNEL_ALLOC_PARAMS channelGPFifo;
464         NV_CONTEXT_DMA_ALLOCATION_PARAMS       ctxDma;
465         NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS    virtual;
466         NV_MEMORY_ALLOCATION_PARAMS            mem;
467     } *pParams = NULL;
468 
469     // If booting in SMC mode, skip watchdog init since TWOD is not supported
470     NV_CHECK_OR_RETURN(LEVEL_SILENT,
471                        !IS_MIG_ENABLED(pGpu) &&
472                            gpuIsClassSupported(pGpu, FERMI_TWOD_A),
473                        NV_OK);
474 
475     if (pKernelRc->watchdog.flags &
476         (WATCHDOG_FLAGS_DISABLED | WATCHDOG_FLAGS_INITIALIZED))
477     {
478         return NV_OK;
479     }
480 
481     if (bClientUserd)
482     {
483         Heap *pHeap = GPU_GET_HEAP(pGpu);
484         if (pHeap->pmaObject.bNuma)
485         {
486             // PMA can't be used until it's onlined
487             bClientUserd = NV_FALSE;
488         }
489     }
490 
491     portMemSet(&pKernelRc->watchdogChannelInfo, 0,
492                sizeof pKernelRc->watchdogChannelInfo);
493 
494     // Bug 4088184 WAR: release GPU lock before allocating NV01_ROOT
495     if (rmGpuLockIsOwner())
496     {
497         bAcquireLock = NV_TRUE;
498         rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
499         pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
500     }
501 
502     // Allocate a root.
503     {
504         hClient = NV01_NULL_OBJECT;
505         if (pRmApi->AllocWithHandle(pRmApi,
506                                     NV01_NULL_OBJECT /* hClient */,
507                                     NV01_NULL_OBJECT /* hParent */,
508                                     NV01_NULL_OBJECT /* hObject */,
509                                     NV01_ROOT,
510                                     &hClient,
511                                     sizeof(hClient)) != NV_OK)
512         {
513             NV_PRINTF(LEVEL_WARNING, "Unable to allocate a watchdog client\n");
514             return NV_ERR_GENERIC;
515         }
516 
517         pParams = portMemAllocNonPaged(sizeof *pParams);
518         if (pParams == NULL)
519         {
520             status = NV_ERR_NO_MEMORY;
521             goto error;
522         }
523     }
524 
525     if (bAcquireLock)
526     {
527         status = rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_RC);
528         if (status != NV_OK)
529         {
530             NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n");
531             DBG_BREAKPOINT();
532             goto error;
533         }
534         pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
535         bAcquireLock = NV_FALSE;
536     }
537 
538     // Alloc device
539     {
540         NV0080_ALLOC_PARAMETERS *pNv0080 = &pParams->nv0080;
541 
542         portMemSet(pNv0080, 0, sizeof *pNv0080);
543         pNv0080->deviceId = gpuGetDeviceInstance(pGpu);
544         pNv0080->hClientShare = hClient;
545 
546         status = pRmApi->AllocWithHandle(pRmApi,
547                                          hClient            /* hClient */,
548                                          hClient            /* hParent */,
549                                          WATCHDOG_DEVICE_ID /* hObject */,
550                                          NV01_DEVICE_0,
551                                          pNv0080,
552                                          sizeof(*pNv0080));
553         if (status != NV_OK)
554         {
555             NV_PRINTF(LEVEL_WARNING, "Unable to allocate a watchdog device\n");
556             goto error;
557         }
558     }
559 
560     // Alloc subdevices
561     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE)
562     {
563         NV2080_ALLOC_PARAMETERS *pNv2080 = &pParams->nv2080;
564 
565         subDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
566 
567         portMemSet(pNv2080, 0, sizeof *pNv2080);
568         pNv2080->subDeviceId = subDeviceInstance;
569 
570         status = pRmApi->AllocWithHandle(pRmApi,
571             hClient                                        /* hClient */,
572             WATCHDOG_DEVICE_ID                             /* hParent */,
573             (WATCHDOG_SUB_DEVICE_0_ID + subDeviceInstance) /* hObject */,
574             NV20_SUBDEVICE_0,
575             pNv2080,
576             sizeof(*pNv2080));
577         if (status != NV_OK)
578         {
579             NV_PRINTF(LEVEL_WARNING,
580                       "Unable to allocate a watchdog subdevice\n");
581             SLI_LOOP_GOTO(error);
582         }
583     }
584     SLI_LOOP_END
585 
586     //
587     // Determine what class to allocate so we will know whether to use
588     // context DMAs.  Context DMAs are not allowed on any gpu after Fermi
589     //
590     if (gpuIsClassSupported(pGpu, FERMI_TWOD_A))
591     {
592         grObj = FERMI_TWOD_A;
593     }
594     else
595     {
596         grObj = NV01_NULL_OBJECT; // Null object will kill RmAllocObject
597     }
598 
599     {
600         const struct
601         {
602             NvU32    gpfifoObject;
603             NvLength ctrlSize;
604         } gpfifoMapping[] = {
605               {KEPLER_CHANNEL_GPFIFO_B,  sizeof(NvA16FControl)}
606             , {KEPLER_CHANNEL_GPFIFO_A,  sizeof(NvA06FControl)}
607             , {MAXWELL_CHANNEL_GPFIFO_A, sizeof(Nvb06FControl)}
608             , {PASCAL_CHANNEL_GPFIFO_A,  sizeof(Nvc06fControl)}
609             , {VOLTA_CHANNEL_GPFIFO_A,   sizeof(Nvc36fControl)}
610             , {TURING_CHANNEL_GPFIFO_A,  sizeof(Nvc46fControl)}
611             , {AMPERE_CHANNEL_GPFIFO_A,  sizeof(Nvc56fControl)}
612             , {HOPPER_CHANNEL_GPFIFO_A,  sizeof(Nvc86fControl)}
613         };
614 
615         NvU32 i;
616 
617         // Defaults if none match
618         gpfifoObj = GF100_CHANNEL_GPFIFO;
619         ctrlSize  = sizeof(Nv906fControl);
620         pKernelRc->watchdogChannelInfo.class2dSubch = 0;
621 
622         for (i = 0; i < NV_ARRAY_ELEMENTS(gpfifoMapping); ++i)
623         {
624             if (gpuIsClassSupported(pGpu, gpfifoMapping[i].gpfifoObject))
625             {
626                 gpfifoObj = gpfifoMapping[i].gpfifoObject;
627                 ctrlSize  = gpfifoMapping[i].ctrlSize;
628                 pKernelRc->watchdogChannelInfo
629                     .class2dSubch = NVA06F_SUBCHANNEL_2D;
630                 break;
631             }
632         }
633     }
634 
635     // RMCONFIG: only if PERF engine is enabled
636     if (RMCFG_MODULE_KERNEL_PERF)
637     {
638         pushBufBytes = WATCHDOG_PUSHBUF_SIZE * 4;
639     }
640     else
641     {
642         pushBufBytes = WATCHDOG_PB_SIZE_DEFAULT;
643     }
644 
645     // Allocate a virtual context handle
646     {
647         NV_MEMORY_VIRTUAL_ALLOCATION_PARAMS *pVirtual = &pParams->virtual;
648 
649         portMemSet(pVirtual, 0, sizeof *pVirtual);
650         status = pRmApi->AllocWithHandle(pRmApi,
651                                          hClient                 /* hClient */,
652                                          WATCHDOG_DEVICE_ID      /* hParent */,
653                                          WATCHDOG_VIRTUAL_CTX_ID /* hObject */,
654                                          NV01_MEMORY_VIRTUAL,
655                                          pVirtual,
656                                          sizeof(*pVirtual));
657         if (status != NV_OK)
658         {
659             NV_PRINTF(LEVEL_WARNING,
660                       "Unable to allocate unified heap for watchdog\n");
661             goto error;
662         }
663     }
664 
665     //
666     // Calculate the system memory allocation size based on size of push
667     // buffers, notifers, GPFIFOs, etc., taking alignment requirements into
668     // consideration.
669     //
670     pKernelRc->watchdogChannelInfo.pbBytes = pushBufBytes;
671     pushBufBytes *= WATCHDOG_PUSHBUFFERS;
672     allocationSize = (NvU32)(
673         pushBufBytes +
674         ((WATCHDOG_GPFIFO_ENTRIES * NV906F_GP_ENTRY__SIZE) + GPFIFO_ALIGN) +
675         ((sizeof(NvNotification) + NOTIFIER_ALIGN) *
676          NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1) +
677         (sizeof(NvNotification) * WATCHDOG_GRAPHICS_NOTIFIERS *
678          NV_MAX_SUBDEVICES));
679 
680     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
681     bCacheSnoop = FLD_TEST_REF(BIF_DMA_CAPS_SNOOP, _CTXDMA,
682                                kbifGetDmaCaps(pGpu, pKernelBif));
683 
684     {
685         NV_MEMORY_ALLOCATION_PARAMS *pMem = &pParams->mem;
686         NvU32 hClass = NV01_MEMORY_SYSTEM;
687 
688         portMemSet(pMem, 0, sizeof *pMem);
689         pMem->owner = HEAP_OWNER_RM_CLIENT_GENERIC;
690         pMem->size  = allocationSize;
691         pMem->type  = NVOS32_TYPE_IMAGE;
692 
693         pMem->attr2 = DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _NO);
694 
695         // Apply registry overrides to channel pushbuffer.
696         switch (DRF_VAL(_REG_STR_RM, _INST_LOC_4, _CHANNEL_PUSHBUFFER, pGpu->instLocOverrides4))
697         {
698             case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_VID:
699                 hClass = NV01_MEMORY_LOCAL_USER;
700                 pMem->attr |= DRF_DEF(OS32, _ATTR, _LOCATION,  _VIDMEM) |
701                               DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED);
702                 break;
703 
704             case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_COH:
705                 hClass = NV01_MEMORY_SYSTEM;
706                 pMem->attr |= DRF_DEF(OS32, _ATTR, _LOCATION,  _PCI)    |
707                               DRF_DEF(OS32, _ATTR, _COHERENCY, _CACHED) |
708                               DRF_DEF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS);
709                 break;
710 
711             case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_NCOH:
712                 hClass = NV01_MEMORY_SYSTEM;
713                 pMem->attr |= DRF_DEF(OS32, _ATTR, _LOCATION,  _PCI)      |
714                               DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED) |
715                               DRF_DEF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS);
716                 break;
717 
718             case NV_REG_STR_RM_INST_LOC_4_CHANNEL_PUSHBUFFER_DEFAULT:
719             default:
720                 hClass = NV01_MEMORY_SYSTEM;
721                 pMem->attr |= DRF_DEF(OS32, _ATTR, _LOCATION,  _PCI)      |
722                               DRF_DEF(OS32, _ATTR, _COHERENCY, _UNCACHED) |
723                               DRF_DEF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS);
724         }
725 
726         if (bCacheSnoop && (hClass == NV01_MEMORY_SYSTEM))
727         {
728             pMem->attr = FLD_SET_DRF(OS32, _ATTR, _COHERENCY, _CACHED,
729                                      pMem->attr);
730         }
731 
732         if (((pKernelRc->watchdog.flags & WATCHDOG_FLAGS_ALLOC_UNCACHED_PCI) != 0) &&
733             (hClass == NV01_MEMORY_SYSTEM))
734         {
735             pMem->attr = FLD_SET_DRF(OS32, _ATTR, _COHERENCY, _UNCACHED,
736                                      pMem->attr);
737         }
738 
739         //
740         // When Hopper CC is enabled all RM internal sysmem allocations that are
741         // required to be accessed from GPU should be in unprotected memory
742         // All video allocations must be in CPR
743         //
744 
745         //
746         // Allocate memory using vidHeapControl
747         //
748         // vidHeapControl calls should happen outside GPU locks. This is a PMA
749         // requirement as memory allocation calls may invoke eviction which UVM
750         // could get stuck behind GPU lock
751         //
752         if (hClass == NV01_MEMORY_LOCAL_USER && rmGpuLockIsOwner())
753         {
754             bAcquireLock = NV_TRUE;
755             rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
756             pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
757         }
758 
759         // Allocate memory for the notifiers and pushbuffer
760         status = pRmApi->AllocWithHandle(pRmApi,
761                                          hClient            /* hClient */,
762                                          WATCHDOG_DEVICE_ID /* hParent */,
763                                          WATCHDOG_MEM_ID    /* hObject */,
764                                          hClass,
765                                          pMem,
766                                          sizeof(*pMem));
767 
768         if (bAcquireLock)
769         {
770             // Re-acquire the GPU locks
771             if (rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_RC) != NV_OK)
772             {
773                 NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n");
774                 DBG_BREAKPOINT();
775                 goto error;
776             }
777             pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
778             bAcquireLock = NV_FALSE;
779         }
780 
781         if (status != NV_OK)
782         {
783             NV_PRINTF(LEVEL_WARNING,
784                       "Unable to allocate %s memory for watchdog\n",
785                       (hClass == NV01_MEMORY_LOCAL_USER) ? "video" : "system");
786             goto error;
787         }
788 
789         status = pRmApi->MapToCpu( pRmApi,
790             hClient            /* hClient */,
791             WATCHDOG_DEVICE_ID /* hDevice */,
792             WATCHDOG_MEM_ID    /* hMemory */,
793             0                  /* offset */,
794             pMem->size         /* length */,
795             (void **)&pKernelRc->watchdogChannelInfo.pCpuAddr,
796             0                  /* flags */);
797         if (status != NV_OK)
798         {
799             NV_PRINTF(LEVEL_WARNING,
800                       "Unable to map memory for watchdog\n");
801             goto error;
802         }
803 
804         portMemSet(pKernelRc->watchdogChannelInfo.pCpuAddr, 0, pMem->size);
805 
806         // Map the allocation into the unified heap.
807         status = pRmApi->Map(pRmApi,
808             hClient                 /* hClient */,
809             WATCHDOG_DEVICE_ID      /* hDevice */,
810             WATCHDOG_VIRTUAL_CTX_ID /* hMemctx */,
811             WATCHDOG_MEM_ID         /* hMemory */,
812             0                       /* offset */,
813             allocationSize          /* length */,
814             (bCacheSnoop ? DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE) :
815                            DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _DISABLE)) |
816                 DRF_DEF(OS46, _FLAGS, _ACCESS, _READ_WRITE),
817             &pKernelRc->watchdogChannelInfo.pGpuAddr);
818         if (status != NV_OK)
819         {
820             NV_PRINTF(LEVEL_ERROR,
821                       "Unable to map memory into watchdog's heap\n");
822             goto error;
823         }
824     }
825 
826     // Allocate the error notifier context DMA.
827     {
828         NV_CONTEXT_DMA_ALLOCATION_PARAMS *pCtxDma = &pParams->ctxDma;
829 
830         portMemSet(pCtxDma, 0, sizeof *pCtxDma);
831         pCtxDma->hSubDevice = 0;
832         pCtxDma->flags = (bCacheSnoop ?
833                               DRF_DEF(OS03, _FLAGS, _CACHE_SNOOP, _ENABLE) :
834                               DRF_DEF(OS03, _FLAGS, _CACHE_SNOOP, _DISABLE)) |
835                          DRF_DEF(OS03, _FLAGS, _ACCESS, _READ_WRITE) |
836                          DRF_DEF(OS03, _FLAGS, _MAPPING, _KERNEL) |
837                          DRF_DEF(OS03, _FLAGS, _HASH_TABLE, _DISABLE);
838         pCtxDma->hMemory = WATCHDOG_MEM_ID;
839         pCtxDma->offset  = WATCHDOG_ERROR_NOTIFIER_OFFSET(
840             pKernelRc->watchdogChannelInfo.pbBytes);
841         pCtxDma->limit = ((NV_CHANNELGPFIFO_NOTIFICATION_TYPE__SIZE_1 *
842                            sizeof(NvNotification)) -
843                           1);
844 
845         status = pRmApi->AllocWithHandle(pRmApi,
846                                          hClient               /* hClient */ ,
847                                          WATCHDOG_DEVICE_ID    /* hParent */ ,
848                                          WATCHDOG_ERROR_DMA_ID /* hObject */,
849                                          NV01_CONTEXT_DMA,
850                                          pCtxDma,
851                                          sizeof(*pCtxDma));
852         if (status != NV_OK)
853         {
854             NV_PRINTF(LEVEL_WARNING,
855                       "Unable to set up watchdog's error context\n");
856             goto error;
857         }
858     }
859 
860     // Allocate the graphics notifier context DMA.
861     {
862         NV_CONTEXT_DMA_ALLOCATION_PARAMS *pCtxDma = &pParams->ctxDma;
863 
864         portMemSet(pCtxDma, 0, sizeof *pCtxDma);
865         pCtxDma->hSubDevice = 0;
866         pCtxDma->flags = (bCacheSnoop ?
867                               DRF_DEF(OS03, _FLAGS, _CACHE_SNOOP, _ENABLE) :
868                               DRF_DEF(OS03, _FLAGS, _CACHE_SNOOP, _DISABLE)) |
869                          DRF_DEF(OS03, _FLAGS, _ACCESS, _READ_WRITE) |
870                          DRF_DEF(OS03, _FLAGS, _HASH_TABLE, _DISABLE);
871         pCtxDma->hMemory = WATCHDOG_MEM_ID;
872         pCtxDma->offset  = WATCHDOG_NOTIFIER_OFFSET(
873             pKernelRc->watchdogChannelInfo.pbBytes,
874             0 /* gpuIndex */,
875             0 /* notifier */);
876         pCtxDma->limit = ((sizeof(NvNotification) *
877                            WATCHDOG_GRAPHICS_NOTIFIERS * NV_MAX_SUBDEVICES) -
878                           1);
879 
880         status = pRmApi->AllocWithHandle(pRmApi,
881                                          hClient                  /* hClient */,
882                                          WATCHDOG_DEVICE_ID       /* hParent */,
883                                          WATCHDOG_NOTIFIER_DMA_ID /* hObject */,
884                                          NV01_CONTEXT_DMA,
885                                          pCtxDma,
886                                          sizeof(*pCtxDma));
887         if (status != NV_OK)
888         {
889             NV_PRINTF(LEVEL_WARNING, "Unable to set up watchdog's notifier\n");
890             goto error;
891         }
892     }
893 
894     if (bClientUserd)
895     {
896         NV_MEMORY_ALLOCATION_PARAMS *pMem          = &pParams->mem;
897         NvU32                        userdMemClass = NV01_MEMORY_LOCAL_USER;
898 
899         portMemSet(pMem, 0, sizeof *pMem);
900         pMem->owner = HEAP_OWNER_RM_CLIENT_GENERIC;
901         pMem->size  = ctrlSize;
902         pMem->type  = NVOS32_TYPE_IMAGE;
903 
904         // Apply registry overrides to USERD.
905         switch (DRF_VAL(_REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides))
906         {
907             case NV_REG_STR_RM_INST_LOC_USERD_COH:
908             case NV_REG_STR_RM_INST_LOC_USERD_NCOH:
909                 userdMemClass = NV01_MEMORY_SYSTEM;
910                 pMem->attr    = DRF_DEF(OS32, _ATTR, _LOCATION, _PCI);
911                 break;
912 
913             case NV_REG_STR_RM_INST_LOC_USERD_VID:
914             case NV_REG_STR_RM_INST_LOC_USERD_DEFAULT:
915                 pMem->attr = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM);
916                 break;
917         }
918 
919         //
920         // When APM is enabled all RM internal allocations must to go to
921         // unprotected memory irrespective of vidmem or sysmem
922         // When Hopper CC is enabled all RM internal sysmem allocations that
923         // are required to be accessed from GPU should be in unprotected memory
924         // and all vidmem allocations must go to protected memory
925         //
926 
927         //
928         // Allocate memory using vidHeapControl
929         //
930         // vidHeapControl calls should happen outside GPU locks. This is a PMA
931         // requirement as memory allocation calls may invoke eviction which UVM
932         // could get stuck behind GPU lock
933         //
934         if (userdMemClass == NV01_MEMORY_LOCAL_USER && rmGpuLockIsOwner())
935         {
936             bAcquireLock = NV_TRUE;
937             rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
938             pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
939         }
940 
941         //
942         // Using device handle since VGPU doesnt support subdevice memory
943         // allocations
944         //
945         status = pRmApi->AllocWithHandle(pRmApi,
946             hClient                    /* hClient */,
947             WATCHDOG_DEVICE_ID         /* hParent */,
948             WATCHDOG_USERD_PHYS_MEM_ID /* hObject */,
949             userdMemClass,
950             pMem,
951             sizeof(*pMem));
952 
953         if (status != NV_OK)
954         {
955             NV_PRINTF(LEVEL_WARNING,
956                       "Unable to allocate video memory for USERD\n");
957             goto error;
958         }
959     }
960 
961     {
962         NV_CHANNEL_ALLOC_PARAMS *pChannelGPFifo =
963             &pParams->channelGPFifo;
964 
965         //
966         // RmAllocChannel recognizes our handle and attempts to give us
967         // channel 30. This is not guaranteed; we could theoretically get any
968         // channel.
969         //
970         portMemSet(pChannelGPFifo, 0, sizeof *pChannelGPFifo);
971         pChannelGPFifo->hObjectError  = WATCHDOG_ERROR_DMA_ID;
972         pChannelGPFifo->hObjectBuffer = WATCHDOG_VIRTUAL_CTX_ID;
973         pChannelGPFifo->gpFifoOffset  = (
974             pKernelRc->watchdogChannelInfo.pGpuAddr +
975             WATCHDOG_GPFIFO_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes));
976         pChannelGPFifo->gpFifoEntries = WATCHDOG_GPFIFO_ENTRIES;
977 
978         // 2d object is only suppported on GR0
979         pChannelGPFifo->engineType = RM_ENGINE_TYPE_GR0;
980 
981         if (bClientUserd)
982             pChannelGPFifo->hUserdMemory[0] = WATCHDOG_USERD_PHYS_MEM_ID;
983 
984         // channel alloc API needs to be called without GPU lock
985         if (!bAcquireLock && rmGpuLockIsOwner())
986         {
987             bAcquireLock = NV_TRUE;
988             rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
989             pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
990         }
991 
992         status = pRmApi->AllocWithHandle(pRmApi,
993             hClient                        /* hClient */,
994             WATCHDOG_DEVICE_ID             /* hParent */,
995             WATCHDOG_PUSHBUFFER_CHANNEL_ID /* hObject */,
996             gpfifoObj,
997             pChannelGPFifo,
998             sizeof(*pChannelGPFifo));
999 
1000         if (bAcquireLock)
1001         {
1002             // Reaquire the GPU locks
1003             if (rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_RC) !=
1004                 NV_OK)
1005             {
1006                 NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n");
1007                 DBG_BREAKPOINT();
1008                 status = NV_ERR_GENERIC;
1009                 goto error;
1010             }
1011             pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
1012         }
1013 
1014         if (status != NV_OK)
1015         {
1016             NV_PRINTF(LEVEL_WARNING, "Unable to alloc watchdog channel\n");
1017 
1018             if (status == NV_ERR_INVALID_CLASS)
1019             {
1020                 status = NV_ERR_NOT_SUPPORTED;
1021             }
1022             goto error;
1023         }
1024     }
1025 
1026     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE)
1027     {
1028         Nv906fControl *pControlGPFifo = NULL;
1029         subDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1030 
1031         // USERD isn't mapped for us on Fermi by RmAllocChannel.
1032         status = pRmApi->MapToCpu(pRmApi,
1033             hClient                                        /* hClient */,
1034             (WATCHDOG_SUB_DEVICE_0_ID + subDeviceInstance) /* hDevice */,
1035             bClientUserd ? WATCHDOG_USERD_PHYS_MEM_ID :
1036                            WATCHDOG_PUSHBUFFER_CHANNEL_ID  /* hMemory */,
1037             0                                              /* offset */,
1038             ctrlSize                                       /* length */,
1039             (void **)&pControlGPFifo,
1040             0                                              /* flags */);
1041         if (status != NV_OK)
1042         {
1043             NV_PRINTF(LEVEL_WARNING,
1044                       "Unable to create a watchdog GPFIFO mapping\n");
1045             SLI_LOOP_GOTO(error);
1046         }
1047 
1048         pKernelRc->watchdogChannelInfo.pControlGPFifo[subDeviceInstance] =
1049             pControlGPFifo;
1050 
1051         pKernelRc->watchdog.notifiers[subDeviceInstance] =(NvNotification *)(
1052             pKernelRc->watchdogChannelInfo.pCpuAddr +
1053             WATCHDOG_NOTIFIER_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes,
1054                                      subDeviceInstance /* gpuIndex */,
1055                                      0                 /* notifier */));
1056     }
1057     SLI_LOOP_END
1058 
1059     pKernelRc->watchdog.errorContext = (NvNotification *)(
1060         pKernelRc->watchdogChannelInfo.pCpuAddr +
1061         WATCHDOG_ERROR_NOTIFIER_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes));
1062 
1063     pKernelRc->watchdog.notifierToken = (NvNotification *)(
1064         pKernelRc->watchdogChannelInfo.pCpuAddr +
1065         WATCHDOG_WORK_SUBMIT_TOKEN_OFFSET(
1066             pKernelRc->watchdogChannelInfo.pbBytes));
1067 
1068     // Create an object that will require a trip through the graphics engine
1069     status = pRmApi->AllocWithHandle(pRmApi,
1070         hClient                        /* hClient */,
1071         WATCHDOG_PUSHBUFFER_CHANNEL_ID /* hParent */,
1072         WATCHDOG_GROBJ_ID              /* hObject */,
1073         grObj,
1074         NULL,
1075         0);
1076     if (status != NV_OK)
1077     {
1078         NV_PRINTF(LEVEL_WARNING, "Unable to allocate class %x\n", grObj);
1079         goto error;
1080     }
1081 
1082     // Fetch the client object
1083     status = serverGetClientUnderLock(&g_resServ, hClient, &pClient);
1084     if (status != NV_OK)
1085     {
1086         NV_PRINTF(LEVEL_ERROR, "Unable to obtain client object\n");
1087         goto error;
1088     }
1089 
1090     //
1091     // Determine the (class + engine) handle the hardware will understand, if
1092     // necessary
1093     //
1094     if (CliGetKernelChannelWithDevice(pClient,
1095                                       WATCHDOG_DEVICE_ID,
1096                                       WATCHDOG_PUSHBUFFER_CHANNEL_ID,
1097                                       &pKernelChannel) != NV_OK)
1098     {
1099         NV_PRINTF(LEVEL_ERROR, "CliGetKernelChannelWithDevice failed\n");
1100         status = NV_ERR_INVALID_CHANNEL;
1101         goto error;
1102     }
1103 
1104     NV_ASSERT_OR_ELSE(pKernelChannel != NULL, status = NV_ERR_INVALID_CHANNEL;
1105                       goto error);
1106 
1107     {
1108         NvU32 classID;
1109         RM_ENGINE_TYPE engineID;
1110 
1111         status = kchannelGetClassEngineID_HAL(pGpu, pKernelChannel,
1112             WATCHDOG_GROBJ_ID,
1113             &pKernelRc->watchdogChannelInfo.classEngineID,
1114             &classID,
1115             &engineID);
1116         if (status != NV_OK)
1117         {
1118             NV_PRINTF(LEVEL_WARNING,
1119                       "Unable to get class engine ID %x\n",
1120                       grObj);
1121             goto error;
1122         }
1123     }
1124 
1125     pKernelRc->watchdog.hClient   = hClient;
1126     pKernelRc->watchdog.runlistId = kchannelGetRunlistId(pKernelChannel);
1127 
1128     // Schedule the watchdog channel for execution.
1129     {
1130         NVA06F_CTRL_GPFIFO_SCHEDULE_PARAMS nvA06fScheduleParams;
1131 
1132         portMemSet(&nvA06fScheduleParams, 0, sizeof nvA06fScheduleParams);
1133         nvA06fScheduleParams.bEnable = NV_TRUE;
1134 
1135         status = pRmApi->Control(pRmApi,
1136                                  pKernelRc->watchdog.hClient,
1137                                  WATCHDOG_PUSHBUFFER_CHANNEL_ID,
1138                                  NVA06F_CTRL_CMD_GPFIFO_SCHEDULE,
1139                                  &nvA06fScheduleParams,
1140                                  sizeof nvA06fScheduleParams);
1141         if (status != NV_OK)
1142         {
1143             NV_PRINTF(LEVEL_ERROR, "Unable to schedule watchdog channel\n");
1144             goto error;
1145         }
1146     }
1147 
1148     // Get the work submit token that watchdog can use while submitting work
1149     {
1150         NvU32 workSubmitToken;
1151         status = kfifoRmctrlGetWorkSubmitToken_HAL(GPU_GET_KERNEL_FIFO(pGpu),
1152             hClient,
1153             WATCHDOG_PUSHBUFFER_CHANNEL_ID,
1154             &workSubmitToken);
1155         if (status != NV_OK)
1156         {
1157             NV_PRINTF(LEVEL_WARNING,
1158                       "Unable to get work submit token for watchdog\n");
1159             goto error;
1160         }
1161     }
1162 
1163     krcWatchdogInitPushbuffer_HAL(pGpu, pKernelRc);
1164 
1165     pKernelRc->watchdog.flags |= WATCHDOG_FLAGS_INITIALIZED;
1166 
1167     // Hook into the 1 Hz OS timer
1168     osSchedule1SecondCallback(pGpu,
1169                               krcWatchdogTimerProc,
1170                               NULL /* pData */,
1171                               NV_OS_1HZ_REPEAT);
1172 
1173     // Schedule next interval to run immediately
1174     pKernelRc->watchdogPersistent.nextRunTime = 0;
1175 
1176 error:
1177     NV_ASSERT(status == NV_OK);
1178 
1179     if (status != NV_OK)
1180     {
1181         pRmApi->Free(pRmApi, hClient, hClient);
1182     }
1183 
1184     portMemFree(pParams);
1185     return status;
1186 }
1187 
1188 
1189 void
1190 krcWatchdogInitPushbuffer_IMPL
1191 (
1192     OBJGPU   *pGpu,
1193     KernelRc *pKernelRc
1194 )
1195 {
1196     NvU32 *ptr, *ptrbase, *ptrbase1;
1197     NvU32  pbOffset;
1198 
1199     //
1200     // Set up the pushbuffer.
1201     // Create two seperate pushbuffer segments
1202     // First -  Set object on graphics class
1203     // Second - Notifier, setref
1204     // Create GPFIFO
1205     // Point to setobject pushbuffer, gp_put++
1206     // Then keep on pointing gp_entry to second pushbuffer segment everytime we
1207     // need a notifier
1208     //
1209     pbOffset = WATCHDOG_PUSHBUFFER_OFFSET(
1210         pKernelRc->watchdogChannelInfo.pbBytes,
1211         0);
1212     ptrbase = ptr = (NvU32 *)(pKernelRc->watchdogChannelInfo.pCpuAddr +
1213                               pbOffset);
1214 
1215     if (IsSLIEnabled(pGpu))
1216     {
1217         PUSH_DATA(
1218             DRF_DEF(906F, _DMA, _SEC_OP, _GRP0_USE_TERT) |
1219             DRF_DEF(906F, _DMA, _TERT_OP, _GRP0_SET_SUB_DEV_MASK) |
1220             DRF_NUM(906F, _DMA, _SET_SUBDEVICE_MASK_VALUE, SUBDEVICE_MASK_ALL));
1221     }
1222 
1223     // Set up object in first pushbuffer
1224     PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch,
1225               NV902D_SET_OBJECT,
1226               pKernelRc->watchdogChannelInfo.classEngineID);
1227 
1228     //
1229     // Construct GPFIFO entries
1230     // Pushbuffer 0
1231     //
1232     {
1233         NvU64 get    = pKernelRc->watchdogChannelInfo.pGpuAddr + pbOffset;
1234         NvU32 length = (NvU8 *)ptr - (NvU8 *)ptrbase;
1235 
1236         pKernelRc->watchdogChannelInfo.gpEntry0[0] =
1237             DRF_DEF(906F, _GP_ENTRY0, _NO_CONTEXT_SWITCH, _FALSE) |
1238             DRF_NUM(906F, _GP_ENTRY0, _GET,               NvU64_LO32(get) >> 2);
1239 
1240         pKernelRc->watchdogChannelInfo.gpEntry0[1] =
1241             DRF_NUM(906F, _GP_ENTRY1, _GET_HI, NvU64_HI32(get)) |
1242             DRF_NUM(906F, _GP_ENTRY1, _LENGTH, length >> 2)     |
1243             DRF_DEF(906F, _GP_ENTRY1, _PRIV,   _USER)           |
1244             DRF_DEF(906F, _GP_ENTRY1, _LEVEL,  _MAIN);
1245     }
1246 
1247     // Set up notifiers in second pushbuffer
1248     pbOffset = WATCHDOG_PUSHBUFFER_OFFSET(
1249         pKernelRc->watchdogChannelInfo.pbBytes,
1250         1);
1251     ptrbase1 = ptr = (NvU32 *)(pKernelRc->watchdogChannelInfo.pCpuAddr +
1252                                pbOffset);
1253 
1254     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE);
1255     {
1256         NvU64  offset;
1257         if (IsSLIEnabled(pGpu))
1258         {
1259             PUSH_DATA(DRF_DEF(906F, _DMA, _SEC_OP, _GRP0_USE_TERT) |
1260                       DRF_DEF(906F, _DMA, _TERT_OP, _GRP0_SET_SUB_DEV_MASK) |
1261                       DRF_NUM(906F, _DMA, _SET_SUBDEVICE_MASK_VALUE,
1262                               NVBIT(gpumgrGetSubDeviceInstanceFromGpu(pGpu))));
1263         }
1264 
1265         offset = (pKernelRc->watchdogChannelInfo.pGpuAddr +
1266                   WATCHDOG_NOTIFIER_OFFSET(
1267                       pKernelRc->watchdogChannelInfo.pbBytes,
1268                       gpumgrGetSubDeviceInstanceFromGpu(pGpu),
1269                       0));
1270 
1271         PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch,
1272             NV902D_SET_NOTIFY_A,
1273             DRF_NUM(902D, _SET_NOTIFY_A, _ADDRESS_UPPER, NvU64_HI32(offset)));
1274         PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch,
1275             NV902D_SET_NOTIFY_B,
1276             DRF_NUM(902D, _SET_NOTIFY_B, _ADDRESS_LOWER, NvU64_LO32(offset)));
1277     }
1278     SLI_LOOP_END;
1279 
1280     if (IsSLIEnabled(pGpu))
1281     {
1282         PUSH_DATA(
1283             DRF_DEF(906F, _DMA, _SEC_OP, _GRP0_USE_TERT) |
1284             DRF_DEF(906F, _DMA, _TERT_OP, _GRP0_SET_SUB_DEV_MASK) |
1285             DRF_NUM(906F, _DMA, _SET_SUBDEVICE_MASK_VALUE, SUBDEVICE_MASK_ALL));
1286     }
1287 
1288     // Notifiers
1289     PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch,
1290               NV902D_NOTIFY, NV902D_NOTIFY_TYPE_WRITE_ONLY);
1291     PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch,
1292               NV902D_NO_OPERATION, 0x0);
1293     PUSH_PAIR(pKernelRc->watchdogChannelInfo.class2dSubch,
1294               NV906F_SET_REFERENCE, 0x0);
1295 
1296     // Pushbuffer 1
1297     {
1298         NvU64 get    = pKernelRc->watchdogChannelInfo.pGpuAddr + pbOffset;
1299         NvU32 length = (NvU8 *)ptr - (NvU8 *)ptrbase1;
1300 
1301         pKernelRc->watchdogChannelInfo.gpEntry1[0] =
1302             DRF_DEF(906F, _GP_ENTRY0, _NO_CONTEXT_SWITCH, _FALSE) |
1303             DRF_NUM(906F, _GP_ENTRY0, _GET,               NvU64_LO32(get) >> 2);
1304 
1305         pKernelRc->watchdogChannelInfo.gpEntry1[1] =
1306             DRF_NUM(906F, _GP_ENTRY1, _GET_HI, NvU64_HI32(get)) |
1307             DRF_NUM(906F, _GP_ENTRY1, _LENGTH, length >> 2)     |
1308             DRF_DEF(906F, _GP_ENTRY1, _PRIV,   _USER)           |
1309             DRF_DEF(906F, _GP_ENTRY1, _LEVEL,  _MAIN)           |
1310             DRF_DEF(906F, _GP_ENTRY1, _SYNC,   _WAIT);
1311     }
1312 
1313     // Write a new entry to the GPFIFO  (pushbuffer 0)
1314     {
1315         NvU32 *pGpEntry = (NvU32 *)(
1316             pKernelRc->watchdogChannelInfo.pCpuAddr +
1317             WATCHDOG_GPFIFO_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes));
1318         MEM_WR32(&pGpEntry[0], pKernelRc->watchdogChannelInfo.gpEntry0[0]);
1319         MEM_WR32(&pGpEntry[1], pKernelRc->watchdogChannelInfo.gpEntry0[1]);
1320     }
1321 
1322     // Flush the WRC buffer using fence operation before updating gp_put
1323     osFlushCpuWriteCombineBuffer();
1324 
1325     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE);
1326     {
1327         NvU32 subdeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1328         MEM_WR32(
1329             &pKernelRc->watchdogChannelInfo.pControlGPFifo[subdeviceId]->GPPut,
1330             1);
1331         pKernelRc->watchdog.notifiers[subdeviceId]->status = 0;
1332     }
1333     SLI_LOOP_END;
1334 
1335     //
1336     // Flush the WRC buffer using fence operation before updating the usermode
1337     // channel ID register
1338     //
1339     osFlushCpuWriteCombineBuffer();
1340 
1341     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE);
1342     {
1343         kfifoUpdateUsermodeDoorbell_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
1344             pKernelRc->watchdog.notifierToken->info32,
1345             pKernelRc->watchdog.runlistId);
1346     }
1347     SLI_LOOP_END;
1348 
1349     krcWatchdogWriteNotifierToGpfifo(pGpu, pKernelRc);
1350 }
1351 
1352 
1353 void
1354 krcWatchdogWriteNotifierToGpfifo_IMPL
1355 (
1356     OBJGPU   *pGpu,
1357     KernelRc *pKernelRc
1358 )
1359 {
1360     NvU32  GPPut;
1361 
1362     // Write a second entry to the GPFIFO  (notifier)
1363     {
1364         NvU32 subdeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1365         NvU32 *pGpEntry;
1366 
1367         GPPut = MEM_RD32(
1368             &pKernelRc->watchdogChannelInfo.pControlGPFifo[subdeviceId]->GPPut);
1369 
1370         if (GPPut >= WATCHDOG_GPFIFO_ENTRIES)
1371         {
1372             NV_ASSERT(GPPut < WATCHDOG_GPFIFO_ENTRIES);
1373             return;
1374         }
1375 
1376         pGpEntry = (NvU32 *)(
1377             pKernelRc->watchdogChannelInfo.pCpuAddr +
1378             WATCHDOG_GPFIFO_OFFSET(pKernelRc->watchdogChannelInfo.pbBytes) +
1379             (GPPut * NV906F_GP_ENTRY__SIZE));
1380         MEM_WR32(&pGpEntry[0], pKernelRc->watchdogChannelInfo.gpEntry1[0]);
1381         MEM_WR32(&pGpEntry[1], pKernelRc->watchdogChannelInfo.gpEntry1[1]);
1382     }
1383 
1384     //
1385     // Flush the WRC buffer using fence operation before updating the usermode
1386     // channel ID register
1387     //
1388     osFlushCpuWriteCombineBuffer();
1389 
1390     //
1391     // Write out incremented GPPut (we need > 2 GP fifo entries as one entry
1392     // must always be empty, as there is no extra state bit to distinguish
1393     // between a full GPFIFO buffer and an empty GPFIFO buffer).
1394     //
1395     GPPut = (GPPut + 1) % WATCHDOG_GPFIFO_ENTRIES;
1396 
1397     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE);
1398     {
1399         NvU32 subdeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1400         MEM_WR32(
1401             &pKernelRc->watchdogChannelInfo.pControlGPFifo[subdeviceId]->GPPut,
1402             GPPut);
1403     }
1404     SLI_LOOP_END;
1405 
1406     //
1407     // Flush the WRC buffer using fence operation before updating the usermode
1408     // channel ID register
1409     //
1410     osFlushCpuWriteCombineBuffer();
1411 
1412     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE);
1413     {
1414         kfifoUpdateUsermodeDoorbell_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
1415             pKernelRc->watchdog.notifierToken->info32,
1416             pKernelRc->watchdog.runlistId);
1417     }
1418     SLI_LOOP_END;
1419 }
1420 
1421 
1422