1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "gpu/mem_mgr/sem_surf.h"
25 #include "os/os.h" // NV_MEMORY_NONCONTIGUOUS, osEventNotification
26 #include "gpu/device/device.h"
27 #include "gpu/mem_mgr/mem_mgr.h"
28 #include "gpu/mem_mgr/mem_desc.h"
29 #include "gpu/gpu.h"
30 #include "rmapi/client.h"
31 #include "rmapi/rs_utils.h"
32 
33 #include "class/cl0080.h"
34 #include "class/cl2080.h"
35 #include "class/cl0005.h"
36 #include "class/cl003e.h" // NV01_MEMORY_SYSTEM
37 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
38 
39 static void
40 _semsurfFreeRmClient
41 (
42     SEM_SHARED_DATA *pShared
43 )
44 {
45     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
46 
47     if (pShared->hClient != NV01_NULL_OBJECT)
48     {
49         pRmApi->Free(pRmApi,
50                      pShared->hClient,
51                      pShared->hClient);
52     }
53 
54     pShared->hClient    = NV01_NULL_OBJECT;
55     pShared->hDevice    = NV01_NULL_OBJECT;
56     pShared->hSubDevice = NV01_NULL_OBJECT;
57 }
58 
59 static void
60 _semsurfUnregisterCallback
61 (
62     SEM_SHARED_DATA *pShared
63 )
64 {
65     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
66 
67     if ((pShared->hClient != NV01_NULL_OBJECT) &&
68         (pShared->hEvent != NV01_NULL_OBJECT))
69     {
70         pRmApi->Free(pRmApi,
71                      pShared->hClient,
72                      pShared->hEvent);
73     }
74 
75     pShared->hEvent     = NV01_NULL_OBJECT;
76 }
77 
78 static NvU64
79 _semsurfGetValue
80 (
81     SEM_SHARED_DATA *pShared,
82     NvU64 index
83 )
84 {
85     volatile NvU8 *pSem          = pShared->pSem;
86     volatile NvU8 *pMaxSubmitted = pShared->pMaxSubmitted;
87     volatile NvU8 *pSemBase      = pSem + index * pShared->layout.size;
88 
89     portAtomicMemoryFenceFull();
90 
91     if (pShared->bIs64Bit)
92     {
93         volatile NvU64 *pSemVal = (volatile NvU64 *)pSemBase;
94 
95         NV_PRINTF(LEVEL_INFO, " Read semaphore surface value as 64-bit native\n");
96 
97         return *pSemVal;
98     }
99     else
100     {
101         const volatile NvU32 *pSemVal           = (volatile NvU32 *)pSemBase;
102         volatile NvU8        *pMaxSubmittedBase = pMaxSubmitted + index * pShared->layout.size;
103         volatile NvU64       *pMaxSubmitted     = (volatile NvU64 *)(pMaxSubmittedBase + pShared->layout.maxSubmittedSemaphoreValueOffset);
104 
105         // The ordering below is critical. See NvTimeSemFermiGetPayload() for full comment.
106         // TODO Share this code?
107         NvU64 semVal = *pSemVal;
108 
109         portAtomicMemoryFenceLoad();
110 
111         NvU64 maxSubmitted = portAtomicExAddU64(pMaxSubmitted, 0);
112 
113         NV_PRINTF(LEVEL_INFO, " Read maxSubmitted %" NvU64_fmtu " and 32-bit semVal %"
114                   NvU64_fmtu " from semaphore index %" NvU64_fmtu "\n",
115                   maxSubmitted, semVal, index);
116 
117         // The value is monotonically increasing, and the max outstanding
118         // wait and the value can differ by no more than 2^31-1. Hence...
119         if ((maxSubmitted & 0xFFFFFFFFull) < semVal)
120             maxSubmitted -= 0x100000000ull;
121 
122         return semVal | (maxSubmitted & 0xffffffff00000000ull);
123     }
124 }
125 
126 static void
127 _semsurfSetValue
128 (
129     SEM_SHARED_DATA *pShared,
130     NvU64 index,
131     NvU64 newValue
132 )
133 {
134     volatile NvU8 *pSem          = pShared->pSem;
135     volatile NvU8 *pMaxSubmitted = pShared->pMaxSubmitted;
136     volatile NvU8 *pSemBase      = pSem + index * pShared->layout.size;
137 
138     if (pShared->bIs64Bit)
139     {
140         volatile NvU64 *pSemVal = (volatile NvU64 *)pSemBase;
141 
142         portAtomicMemoryFenceFull();
143 
144         *pSemVal = newValue;
145 
146         NV_PRINTF(LEVEL_INFO, " Updated semaphore surface value as 64-bit "
147                   "native to %" NvU64_fmtu "\n", newValue);
148     }
149     else
150     {
151         volatile NvU32 *pSemVal           = (volatile NvU32 *)pSemBase;
152         volatile NvU8  *pMaxSubmittedBase = pMaxSubmitted + index * pShared->layout.size;
153         volatile NvU64 *pMaxSubmitted     = (volatile NvU64 *)(pMaxSubmittedBase + pShared->layout.maxSubmittedSemaphoreValueOffset);
154         NvU64 oldMax, origMax;
155 
156         portAtomicMemoryFenceFull();
157         origMax = oldMax = portAtomicExAddU64(pMaxSubmitted, 0);
158 
159         // First save the actual value to the max submitted slot using
160         // an atomic max operation
161         while (oldMax < newValue)
162         {
163             NvBool exchanged;
164 
165             // XXX Does CompareAndSwap imply any barriers? The comments in
166             // nvport/atomic.h imply it does, but that this shouldn't be relied
167             // upon, so include another barrier here.
168             portAtomicMemoryFenceFull();
169             exchanged = portAtomicExCompareAndSwapU64(pMaxSubmitted,
170                                                       newValue,
171                                                       oldMax);
172 
173             if (exchanged)
174                 break; // The value was set. Exit.
175 
176             // The "current" value changed. Update it.
177             oldMax = portAtomicExAddU64(pMaxSubmitted, 0);
178         }
179 
180         portAtomicMemoryFenceStore();
181 
182         // Now write the low bits to the GPU-accessible semaphore value.
183         *pSemVal = NvU64_LO32(newValue);
184 
185         NV_PRINTF(LEVEL_INFO, " Updated maxSubmitted from %" NvU64_fmtu " to %"
186                   NvU64_fmtu " and 32-bit semVal %u"
187                   " at semaphore index %" NvU64_fmtu "\n",
188                   origMax, newValue, NvU64_LO32(newValue), index);
189         (void) origMax;
190     }
191 }
192 
193 static NvBool
194 _semsurfNotifyCompleted
195 (
196     SEM_SHARED_DATA *pShared,
197     SEM_PENDING_NOTIFICATIONS *notifications
198 )
199 {
200     OBJGPU *pGpu = pShared->pSemaphoreMem->pGpu;
201     SEM_PENDING_NOTIFICATIONSIter pendIter;
202     SEM_VALUE_LISTENERS_NODE *pVNode;
203     NV_STATUS rmStatus;
204     NvBool valuesChanged = NV_FALSE;
205     EVENTNOTIFICATION **ppListeners;
206 
207     for (pendIter = listIterAll(notifications);
208          listIterNext(&pendIter);
209          pendIter = listIterAll(notifications))
210     {
211         pVNode = pendIter.pValue;
212 
213         rmStatus = osEventNotification(pGpu,
214                                        pVNode->pListeners,
215                                        NV_SEMAPHORE_SURFACE_WAIT_VALUE,
216                                        NULL,
217                                        0);
218 
219         NV_PRINTF(LEVEL_INFO,
220                   "SemMem(0x%08x, 0x%08x): Delivered OS events for value %"
221                   NvU64_fmtu " at idx %" NvU64_fmtu ". Status: %s (0x%08x)\n",
222                   pShared->hClient, pShared->hSemaphoreMem,
223                   pVNode->value, pVNode->index,
224                   nvstatusToString(rmStatus),
225                   rmStatus);
226         (void) rmStatus;
227 
228         /*
229          * Auto-remove the event listeners for this value now that they
230          * have been notified.  It makes little sense to keep notifying
231          * them after this.
232          */
233         ppListeners = &pendIter.pValue->pListeners;
234         while (*ppListeners)
235         {
236             unregisterEventNotificationWithData(ppListeners,
237                                                 (*ppListeners)->hEventClient,
238                                                 NV01_NULL_OBJECT, /* hNotifier/subdevice */
239                                                 (*ppListeners)->hEvent,
240                                                 NV_TRUE, /* match data/notificationHandle */
241                                                 (*ppListeners)->Data);
242         }
243 
244         if (pVNode->newValue != 0) {
245             /*
246              * It is safe to do this operation outside of the spinlock because
247              * it is the RM client's responsibility to ensure the semaphore
248              * surface value is monotonically incrementing by explicitly
249              * ordering all writes to it, including these. This does imply the
250              * need to manually fence memory operations touching the semaphore
251              * value though.
252              */
253             _semsurfSetValue(pShared, pVNode->index, pVNode->newValue);
254 
255             NV_PRINTF(LEVEL_INFO,
256                       "SemMem(0x%08x, 0x%08x): Value updated by waiter "
257                       " to %" NvU64_fmtu " at idx %" NvU64_fmtu "\n",
258                       pShared->hClient, pShared->hSemaphoreMem, pVNode->newValue, pVNode->index);
259 
260             valuesChanged = NV_TRUE;
261         }
262 
263         listRemove(notifications, pendIter.pValue);
264         portMemFree(pendIter.pValue);
265     }
266 
267     return valuesChanged;
268 }
269 
270 static void
271 _semsurfSetMonitoredValue
272 (
273     SEM_SHARED_DATA *pShared,
274     NvU64 index,
275     NvU64 value
276 )
277 {
278     if (!pShared->bHasMonitoredFence)
279         return;
280 
281     /*
282      * This function must be called with the spinlock held to avoid a race
283      * condition where two threads disagree on the current minimum wait value
284      * for a given slot.
285      */
286     volatile NvU64 *pMonitoredFence =
287         (volatile NvU64 *)(pShared->pSem +
288                            pShared->layout.size * index +
289                            pShared->layout.monitoredFenceThresholdOffset);
290 
291     NV_PRINTF(LEVEL_NOTICE,
292               "SemMem(0x%08x, 0x%08x): "
293               "Setting monitored fence value at index %" NvU64_fmtu
294               " to %" NvU64_fmtu "\n",
295               pShared->hClient, pShared->hSemaphoreMem,
296               index, value);
297 
298     // Don't care if this races with loads, but ensure it happens in order
299     // with prior stores.
300     portAtomicMemoryFenceStore();
301     *pMonitoredFence = value;
302 }
303 
304 static void
305 _semsurfEventCallback
306 (
307     void        *pArg,
308     void        *pData,
309     NvHandle     hEvent,
310     NvU32        data,
311     NvU32        status
312 )
313 {
314     SEM_SHARED_DATA *pShared = pArg;
315     SEM_INDEX_LISTENERSIter ilIter;
316     SEM_VALUE_LISTENERSIter vlIter;
317     SEM_PENDING_NOTIFICATIONS notifications;
318     NvU64 index;
319     NvU64 semValue;
320     NvU64 minWaitValue;
321     NvBool removedIndex = NV_FALSE;
322     NvBool valuesChanged = NV_TRUE;
323 
324     NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Got a callback\n", pShared->hClient, pShared->hSemaphoreMem);
325     NV_PRINTF(LEVEL_INFO, "  hEvent: 0x%08x surf event: 0x%08x, data 0x%08x, status 0x%08x\n",
326               hEvent, pShared->hEvent, data, status);
327 
328     while (valuesChanged)
329     {
330         listInitIntrusive(&notifications);
331 
332         NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Entering spinlock\n",
333                   pShared->hClient,
334                   pShared->hSemaphoreMem);
335         portSyncSpinlockAcquire(pShared->pSpinlock);
336 
337         for (ilIter = mapIterAll(&pShared->listenerMap);
338              mapIterNext(&ilIter);
339              removedIndex ? ilIter = mapIterAll(&pShared->listenerMap) :
340              ilIter)
341         {
342             removedIndex = NV_FALSE;
343             minWaitValue = NV_U64_MAX;
344 
345             /*
346              * TODO Only notify if vlIter.pValue->value <= the semaphore current
347              * value at mapKey(&pShared->listenerMap, ilIter->pValue)
348              */
349             index = mapKey(&pShared->listenerMap, ilIter.pValue);
350             semValue = _semsurfGetValue(pShared, index);
351 
352             for (vlIter = listIterAll(&ilIter.pValue->listeners);
353                  listIterNext(&vlIter);
354                  vlIter = listIterAll(&ilIter.pValue->listeners))
355             {
356                 NV_PRINTF(LEVEL_SILENT,
357                           "  Checking index %" NvU64_fmtu " value waiter %"
358                           NvU64_fmtu " against semaphore value %" NvU64_fmtu "\n",
359                           index, vlIter.pValue->value, semValue);
360 
361                 if (semValue >= vlIter.pValue->value)
362                 {
363                     listInsertExisting(&notifications, NULL, vlIter.pValue);
364                     listRemove(&ilIter.pValue->listeners, vlIter.pValue);
365                 }
366                 else
367                 {
368                     /* No other values at this index should be signaled yet. */
369                     minWaitValue = vlIter.pValue->value;
370                     break;
371                 }
372             }
373 
374             if (listCount(&ilIter.pValue->listeners) == 0)
375             {
376                 NV_ASSERT(minWaitValue == NV_U64_MAX);
377                 mapRemove(&pShared->listenerMap, ilIter.pValue);
378                 portMemFree(ilIter.pValue);
379                 removedIndex = NV_TRUE;
380             }
381             else
382             {
383                 removedIndex = NV_FALSE;
384             }
385 
386             _semsurfSetMonitoredValue(pShared, index, minWaitValue);
387         }
388 
389         portSyncSpinlockRelease(pShared->pSpinlock);
390         NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Exited spinlock\n",
391                   pShared->hClient,
392                   pShared->hSemaphoreMem);
393 
394         // Send notifications outside of spinlock. They have already been removed
395         // from the object-wide lists, so their existance is private to this
396         // instance of this function now. Hence, no locking is required for this
397         // step.
398         valuesChanged = _semsurfNotifyCompleted(pShared, &notifications);
399     }
400 }
401 
402 static NV_STATUS
403 _semsurfAllocRmClient
404 (
405     SemaphoreSurface *pSemSurf
406 )
407 {
408     SEM_SHARED_DATA *pShared = pSemSurf->pShared;
409     NV0080_ALLOC_PARAMETERS nv0080AllocParams;
410     NV2080_ALLOC_PARAMETERS nv2080AllocParams;
411     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
412 
413     // Allocate an internal client, device, and subDevice for the semaphore
414     // surface. These will be used to allocate the internally-managed memory
415     // object wrapped by the semaphore surface, and to to register callbacks
416     // on the GPU for semaphore awaken/notification interrupts.
417     NV_ASSERT_OK_OR_RETURN(
418         pRmApi->AllocWithHandle(pRmApi,
419                                 NV01_NULL_OBJECT,
420                                 NV01_NULL_OBJECT,
421                                 NV01_NULL_OBJECT,
422                                 NV01_ROOT,
423                                 &pShared->hClient,
424                                 sizeof(pShared->hClient)));
425 
426     portMemSet(&nv0080AllocParams, 0, sizeof(nv0080AllocParams));
427     nv0080AllocParams.deviceId =
428         gpuGetDeviceInstance(GPU_RES_GET_GPU(pSemSurf));
429 
430     NV_ASSERT_OK_OR_RETURN(
431         pRmApi->Alloc(pRmApi,
432                       pShared->hClient,
433                       pShared->hClient,
434                       &pShared->hDevice,
435                       NV01_DEVICE_0,
436                       &nv0080AllocParams,
437                       sizeof(nv0080AllocParams)));
438 
439     // Allocate a subDevice
440     portMemSet(&nv2080AllocParams, 0, sizeof(nv2080AllocParams));
441     nv2080AllocParams.subDeviceId =
442         gpumgrGetSubDeviceInstanceFromGpu(GPU_RES_GET_GPU(pSemSurf));
443 
444     NV_ASSERT_OK_OR_RETURN(
445         pRmApi->Alloc(pRmApi,
446                       pShared->hClient,
447                       pShared->hDevice,
448                       &pShared->hSubDevice,
449                       NV20_SUBDEVICE_0,
450                       &nv2080AllocParams,
451                       sizeof(nv2080AllocParams)));
452 
453     return NV_OK;
454 }
455 
456 static void
457 _semsurfFreeMemory
458 (
459     SEM_SHARED_DATA *pShared
460 )
461 {
462     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
463 
464     if (pShared->hClient != NV01_NULL_OBJECT)
465     {
466         if ((pShared->hMaxSubmittedMem != NV01_NULL_OBJECT) &&
467             (pShared->hMaxSubmittedMem != pShared->hSemaphoreMem))
468             pRmApi->Free(pRmApi,
469                          pShared->hClient,
470                          pShared->hMaxSubmittedMem);
471 
472         if (pShared->hSemaphoreMem != NV01_NULL_OBJECT)
473             pRmApi->Free(pRmApi,
474                          pShared->hClient,
475                          pShared->hSemaphoreMem);
476     }
477 
478     pShared->hSemaphoreMem    = NV01_NULL_OBJECT;
479     pShared->hMaxSubmittedMem = NV01_NULL_OBJECT;
480 }
481 
482 static NV_STATUS
483 _semsurfDupMemory
484 (
485     SemaphoreSurface *pSemSurf,
486     NV_SEMAPHORE_SURFACE_ALLOC_PARAMETERS  *pAllocParams
487 )
488 {
489     SEM_SHARED_DATA *pShared = pSemSurf->pShared;
490     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
491 
492     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
493         pRmApi->DupObject(pRmApi,
494                           pShared->hClient,
495                           pShared->hDevice,
496                           &pShared->hSemaphoreMem,
497                           RES_GET_CLIENT_HANDLE(pSemSurf),
498                           pAllocParams->hSemaphoreMem,
499                           NV04_DUP_HANDLE_FLAGS_NONE));
500 
501     if (pAllocParams->hMaxSubmittedMem != NV01_NULL_OBJECT)
502     {
503         NV_CHECK_OR_RETURN(LEVEL_ERROR,
504                            !pSemSurf->pShared->bIs64Bit,
505                            NV_ERR_INVALID_PARAMETER);
506 
507         if (pAllocParams->hMaxSubmittedMem != pAllocParams->hSemaphoreMem)
508         {
509             NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
510                 pRmApi->DupObject(pRmApi,
511                                   pShared->hClient,
512                                   pShared->hDevice,
513                                   &pShared->hMaxSubmittedMem,
514                                   RES_GET_CLIENT_HANDLE(pSemSurf),
515                                   pAllocParams->hMaxSubmittedMem,
516                                   NV04_DUP_HANDLE_FLAGS_NONE));
517         }
518         else
519         {
520             pSemSurf->pShared->hMaxSubmittedMem =
521                 pSemSurf->pShared->hSemaphoreMem;
522         }
523     }
524     else
525     {
526         pSemSurf->pShared->hMaxSubmittedMem = NV01_NULL_OBJECT;
527     }
528 
529     return NV_OK;
530 }
531 
532 static NV_STATUS
533 _semsurfRegisterCallback
534 (
535     SemaphoreSurface *pSemSurf
536 )
537 {
538     SEM_SHARED_DATA *pShared = pSemSurf->pShared;
539     NV0005_ALLOC_PARAMETERS nv0005AllocParams;
540     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
541 
542     pShared->callback.func = _semsurfEventCallback;
543     pShared->callback.arg = pShared;
544 
545     portMemSet(&nv0005AllocParams, 0, sizeof(nv0005AllocParams));
546     nv0005AllocParams.hParentClient = pShared->hClient;
547     nv0005AllocParams.hClass        = NV01_EVENT_KERNEL_CALLBACK_EX;
548     nv0005AllocParams.notifyIndex   = NV2080_NOTIFIERS_FIFO_EVENT_MTHD |
549         NV01_EVENT_NONSTALL_INTR |
550         NV01_EVENT_WITHOUT_EVENT_DATA |
551         NV01_EVENT_SUBDEVICE_SPECIFIC |
552         DRF_NUM(0005, _NOTIFY_INDEX, _SUBDEVICE,
553                 gpumgrGetSubDeviceInstanceFromGpu(GPU_RES_GET_GPU(pSemSurf)));
554     nv0005AllocParams.data          = NV_PTR_TO_NvP64(&pShared->callback);
555 
556     return pRmApi->Alloc(pRmApi,
557                          pShared->hClient,
558                          pShared->hSubDevice,
559                          &pShared->hEvent,
560                          NV01_EVENT_KERNEL_CALLBACK_EX,
561                          &nv0005AllocParams,
562                          sizeof(nv0005AllocParams));
563 }
564 
565 static NV_STATUS
566 semsurfCopyConstruct
567 (
568     SemaphoreSurface *pSemSurf,
569     CALL_CONTEXT *pCallContext,
570     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
571 )
572 {
573     RsResourceRef *pSrcRef = pParams->pSrcRef;
574     SemaphoreSurface *pSrcSemSurf = dynamicCast(pSrcRef->pResource,
575                                                 SemaphoreSurface);
576 
577     pSemSurf->pShared = pSrcSemSurf->pShared;
578     NV_ASSERT(pSemSurf->pShared->refCount > 0);
579     pSemSurf->pShared->refCount++;
580 
581     NV_PRINTF(LEVEL_NOTICE,
582               "SemSurf(0x%08x, 0x%08x): Copied with SemMem(0x%08x, 0x%08x)\n",
583               RES_GET_CLIENT_HANDLE(pSemSurf), RES_GET_HANDLE(pSemSurf),
584               pSemSurf->pShared->hClient, pSemSurf->pShared->hSemaphoreMem);
585 
586     return NV_OK;
587 }
588 
589 static void
590 _semsurfDestroyShared
591 (
592     SEM_SHARED_DATA *pShared
593 )
594 {
595     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
596 
597     if (pShared == NULL)
598         return;
599 
600     _semsurfUnregisterCallback(pShared);
601 
602     if (pShared->pMaxSubmittedMem)
603     {
604         if (pShared->maxSubmittedKernAddr)
605         {
606             pRmApi->UnmapFromCpu(pRmApi,
607                                  pShared->hClient,
608                                  pShared->hDevice,
609                                  pShared->hMaxSubmittedMem,
610                                  pShared->maxSubmittedKernAddr,
611                                  0, 0);
612 
613             pShared->maxSubmittedKernAddr = 0;
614             pShared->pMaxSubmitted = NULL;
615         }
616     }
617 
618     if (pShared->pSemaphoreMem)
619     {
620         if (pShared->semKernAddr)
621         {
622             pRmApi->UnmapFromCpu(pRmApi,
623                                  pShared->hClient,
624                                  pShared->hDevice,
625                                  pShared->hSemaphoreMem,
626                                  pShared->semKernAddr,
627                                  0, 0);
628         }
629 
630         pShared->semKernAddr = 0;
631         pShared->pSem = NULL;
632     }
633 
634     pShared->pMaxSubmittedMem = NULL;
635     pShared->pSemaphoreMem = NULL;
636 
637     _semsurfFreeMemory(pShared);
638     _semsurfFreeRmClient(pShared);
639 
640     if (pShared->pSpinlock)
641         portSyncSpinlockDestroy(pShared->pSpinlock);
642 
643     portMemFree(pShared);
644 }
645 
646 static NvBool
647 _semsurfValidateIndex
648 (
649     SEM_SHARED_DATA *pShared,
650     NvU64 index
651 )
652 {
653     const NvU64 slotSize = pShared->layout.size;
654 
655     if (((index * slotSize) + slotSize) <= pShared->pSemaphoreMem->pMemDesc->Size)
656         return NV_TRUE;
657     else
658         return NV_FALSE;
659 }
660 
661 NV_STATUS
662 semsurfConstruct_IMPL
663 (
664     SemaphoreSurface *pSemSurf,
665     CALL_CONTEXT *pCallContext,
666     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
667 )
668 {
669     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
670     SEM_SHARED_DATA                        *pShared;
671     RmClient                               *pClient;
672     RsClient                               *pRsClient;
673     NV_SEMAPHORE_SURFACE_ALLOC_PARAMETERS  *pAllocParams = pParams->pAllocParams;
674     NvU32                                   maxSubmittedCoherency;
675     NV_STATUS                               status = NV_OK;
676     NvU64                                   i;
677 
678     if (RS_IS_COPY_CTOR(pParams))
679     {
680         //
681         // Copy constructor (NvRmDupObject)
682         //
683         return semsurfCopyConstruct(pSemSurf, pCallContext, pParams);
684     }
685 
686     NV_CHECK_OR_RETURN(LEVEL_ERROR,
687                        pAllocParams->flags == 0ULL,
688                        NV_ERR_INVALID_ARGUMENT);
689 
690     pShared = pSemSurf->pShared = portMemAllocNonPaged(sizeof(*pSemSurf->pShared));
691     NV_ASSERT_TRUE_OR_GOTO(status, pShared != NULL, NV_ERR_NO_MEMORY, ctorFailed);
692 
693     portMemSet(pShared, 0, sizeof(*pShared));
694     pShared->refCount = 1;
695     mapInitIntrusive(&pShared->listenerMap);
696 
697     pShared->pSpinlock = portSyncSpinlockCreate(portMemAllocatorGetGlobalNonPaged());
698     NV_ASSERT_TRUE_OR_GOTO(status, pShared->pSpinlock != NULL, NV_ERR_NO_MEMORY, ctorFailed);
699 
700     NV_ASSERT_OK_OR_GOTO(status, _semsurfAllocRmClient(pSemSurf), ctorFailed);
701 
702     NV_ASSERT_OK_OR_GOTO(status,
703         pRmApi->Control(pRmApi,
704                         pShared->hClient,
705                         pShared->hSubDevice,
706                         NV2080_CTRL_CMD_FB_GET_SEMAPHORE_SURFACE_LAYOUT,
707                        &pShared->layout,
708                         sizeof pShared->layout),
709     ctorFailed);
710 
711     pShared->bIs64Bit           = !!(pShared->layout.caps & NV2080_CTRL_FB_GET_SEMAPHORE_SURFACE_LAYOUT_CAPS_64BIT_SEMAPHORES_SUPPORTED);
712     pShared->bHasMonitoredFence = !!(pShared->layout.caps & NV2080_CTRL_FB_GET_SEMAPHORE_SURFACE_LAYOUT_CAPS_MONITORED_FENCE_SUPPORTED);
713 
714     pClient = serverutilGetClientUnderLock(pShared->hClient);
715     NV_ASSERT_TRUE_OR_GOTO(status, pClient != NULL, NV_ERR_INVALID_STATE, ctorFailed);
716 
717     pRsClient = staticCast(pClient, RsClient);
718 
719     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
720         _semsurfDupMemory(pSemSurf, pAllocParams),
721         ctorFailed);
722 
723     NV_ASSERT_OK_OR_GOTO(status, memGetByHandle(pRsClient, pShared->hSemaphoreMem, &pShared->pSemaphoreMem), ctorFailed);
724 
725     /*
726      * XXX Limit semaphore surfaces to sysmem for the time being. Vidmem
727      * access latency may be too high to allow handling semaphore surfaces
728      * directly in the lockless top-half ISR.
729      */
730     NV_CHECK_OR_ELSE(LEVEL_ERROR,
731                      DRF_VAL(OS32, _ATTR, _LOCATION,
732                              pShared->pSemaphoreMem->Attr) ==
733                      NVOS32_ATTR_LOCATION_PCI,
734                      status = NV_ERR_INVALID_PARAMETER;
735                      goto ctorFailed);
736 
737     NV_ASSERT_OK_OR_GOTO(status,
738         pRmApi->MapToCpu(pRmApi,
739                          pShared->hClient,
740                          pShared->hDevice,
741                          pShared->hSemaphoreMem,
742                          0,
743                          pShared->pSemaphoreMem->pMemDesc->Size,
744                          &pShared->semKernAddr,
745                          0),
746         ctorFailed);
747 
748     pShared->pSem = KERNEL_POINTER_FROM_NvP64(NvU8 *, pShared->semKernAddr);
749 
750     if (!pShared->bIs64Bit)
751     {
752         if (pShared->hSemaphoreMem != pShared->hMaxSubmittedMem)
753         {
754 
755             NV_ASSERT_OK_OR_GOTO(status,
756                 memGetByHandle(pRsClient, pShared->hMaxSubmittedMem, &pShared->pMaxSubmittedMem),
757                 ctorFailed);
758 
759             NV_CHECK_OR_ELSE(LEVEL_ERROR,
760                              pShared->pMaxSubmittedMem->pMemDesc->Size >=
761                              pShared->pSemaphoreMem->pMemDesc->Size,
762                              status = NV_ERR_BUFFER_TOO_SMALL;
763                              goto ctorFailed);
764 
765             NV_ASSERT_OK_OR_GOTO(status,
766                 pRmApi->MapToCpu(pRmApi,
767                                  pShared->hClient,
768                                  pShared->hDevice,
769                                  pShared->hMaxSubmittedMem,
770                                  0,
771                                  pShared->pMaxSubmittedMem->pMemDesc->Size,
772                                  &pShared->maxSubmittedKernAddr,
773                                  0),
774                 ctorFailed);
775 
776             pShared->pMaxSubmitted =
777                 KERNEL_POINTER_FROM_NvP64(NvU8 *, pShared->maxSubmittedKernAddr);
778         }
779         else
780         {
781             pShared->pMaxSubmitted = pShared->pSem;
782             pShared->pMaxSubmittedMem = pShared->pSemaphoreMem;
783         }
784 
785         /*
786          * XXX Limit semaphore surfaces to sysmem for the time being. Vidmem
787          * access latency may be too high to allow handling semaphore surfaces
788          * directly in the lockless top-half ISR.
789          */
790         NV_CHECK_OR_ELSE(LEVEL_ERROR,
791                          DRF_VAL(OS32, _ATTR, _LOCATION,
792                                  pShared->pMaxSubmittedMem->Attr) ==
793                          NVOS32_ATTR_LOCATION_PCI,
794                          status = NV_ERR_INVALID_PARAMETER;
795                          goto ctorFailed);
796 
797         /*
798          * The max submitted value memory must be usable with CPU atomics,
799          * which at least on ARM architectures requires cached mappings.
800          */
801         maxSubmittedCoherency = DRF_VAL(OS32, _ATTR, _COHERENCY, pShared->pMaxSubmittedMem->Attr);
802         NV_CHECK_OR_ELSE(LEVEL_ERROR,
803                          (maxSubmittedCoherency != NVOS32_ATTR_COHERENCY_UNCACHED) &&
804                          (maxSubmittedCoherency != NVOS32_ATTR_COHERENCY_WRITE_COMBINE),
805                          status = NV_ERR_INVALID_PARAMETER;
806                          goto ctorFailed);
807     }
808 
809 
810     NV_ASSERT_OK_OR_GOTO(status, _semsurfRegisterCallback(pSemSurf), ctorFailed);
811 
812     for (i = 0; _semsurfValidateIndex(pShared, i); i++)
813     {
814         _semsurfSetMonitoredValue(pShared, i, NV_U64_MAX);
815     }
816 
817     /* Any failures should have already taken the ctorFailed path */
818     NV_ASSERT_OR_GOTO(status == NV_OK, ctorFailed);
819 
820     NV_PRINTF(LEVEL_NOTICE,
821               "SemSurf(0x%08x, 0x%08x): Constructed with SemMem(0x%08x, 0x%08x)\n",
822               RES_GET_CLIENT_HANDLE(pSemSurf), RES_GET_HANDLE(pSemSurf),
823               pShared->hClient, pShared->hSemaphoreMem);
824 
825     return NV_OK;
826 
827 ctorFailed:
828     _semsurfDestroyShared(pShared);
829 
830     return status;
831 }
832 
833 void
834 semsurfDestruct_IMPL
835 (
836     SemaphoreSurface *pSemSurf
837 )
838 {
839     SEM_SHARED_DATA *pShared = pSemSurf->pShared;
840     SEM_INDEX_LISTENERS_NODE *pIndexListeners;
841     SEM_VALUE_LISTENERS_NODE *pValueListeners;
842     SEM_VALUE_LISTENERS_NODE *pNextValueListeners;
843     EVENTNOTIFICATION *pListener;
844     EVENTNOTIFICATION *pNextListener;
845     NvU64 minWaitValue;
846     NvU64 curIdx;
847     NvHandle hSemClient = RES_GET_CLIENT_HANDLE(pSemSurf);
848     NvHandle hSemSurf = RES_GET_HANDLE(pSemSurf);
849     NvHandle hSharedClient = pShared->hClient;
850     NvHandle hSharedMem = pShared->hSemaphoreMem;
851 
852     NV_ASSERT_OR_RETURN_VOID(pShared);
853     NV_ASSERT_OR_GOTO(pShared->pSpinlock, skipRemoveListeners);
854 
855     NV_PRINTF(LEVEL_NOTICE,
856               "SemSurf(0x%08x, 0x%08x): Destructor with SemMem(0x%08x, 0x%08x)\n",
857               hSemClient, hSemSurf, hSharedClient, hSharedMem);
858 
859     /* Remove any pending waiters instantiated via this sibling */
860     NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Entering spinlock\n",
861               hSharedClient, hSharedMem);
862 
863     portSyncSpinlockAcquire(pShared->pSpinlock);
864 
865     for (pIndexListeners = mapFindGEQ(&pShared->listenerMap, 0);
866          pIndexListeners;
867          pIndexListeners = mapFindGEQ(&pShared->listenerMap, curIdx + 1))
868     {
869         minWaitValue = NV_U64_MAX;
870         curIdx = mapKey(&pShared->listenerMap, pIndexListeners);
871 
872         for (pValueListeners = listHead(&pIndexListeners->listeners);
873              pValueListeners;
874              pValueListeners = pNextValueListeners)
875         {
876             for (pListener = pValueListeners->pListeners;
877                  pListener;
878                  pListener = pNextListener)
879             {
880                 pNextListener = pListener->Next;
881 
882                 if ((pListener->hEventClient == hSemClient) &&
883                     (pListener->hEvent == hSemSurf))
884                 {
885                     NV_PRINTF(LEVEL_WARNING,
886                               "SemSurf(0x%08x, 0x%08x): "
887                               "Deleting active waiter at index %" NvU64_fmtu
888                               " value %" NvU64_fmtu "\n",
889                               hSemClient,
890                               hSemSurf,
891                               curIdx,
892                               pValueListeners->value);
893 
894                     unregisterEventNotificationWithData(&pValueListeners->pListeners,
895                                                         hSemClient,
896                                                         NV01_NULL_OBJECT, /* hNotifier/subdevice */
897                                                         hSemSurf,
898                                                         NV_TRUE, /* match data/notificationHandle */
899                                                         pListener->Data);
900                 }
901             }
902 
903             pNextValueListeners = listNext(&pIndexListeners->listeners,
904                                            pValueListeners);
905 
906             if (!pValueListeners->pListeners)
907             {
908                 listRemove(&pIndexListeners->listeners, pValueListeners);
909                 portMemFree(pValueListeners);
910             }
911             else if (pValueListeners->value < minWaitValue)
912             {
913                 minWaitValue = pValueListeners->value;
914             }
915         }
916 
917         _semsurfSetMonitoredValue(pShared,
918                                   curIdx,
919                                   minWaitValue);
920 
921         if (listCount(&pIndexListeners->listeners) == 0)
922         {
923             NV_ASSERT(minWaitValue == NV_U64_MAX);
924             mapRemove(&pShared->listenerMap, pIndexListeners);
925             portMemFree(pIndexListeners);
926         }
927     }
928 
929     portSyncSpinlockRelease(pShared->pSpinlock);
930     NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Exited spinlock\n",
931               hSharedClient, hSharedMem);
932 
933 skipRemoveListeners:
934     NV_ASSERT(pShared->refCount > 0);
935     --pShared->refCount;
936     if (pShared->refCount <= 0)
937         _semsurfDestroyShared(pShared);
938 }
939 
940 NV_STATUS
941 semsurfCtrlCmdRefMemory_IMPL
942 (
943     SemaphoreSurface *pSemSurf,
944     NV_SEMAPHORE_SURFACE_CTRL_REF_MEMORY_PARAMS *pParams
945 )
946 {
947     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
948     SEM_SHARED_DATA *pShared = pSemSurf->pShared;
949     NV_STATUS status = NV_OK;
950     NvHandle hSemMemOut = pParams->hSemaphoreMem;
951     NvHandle hMaxMemOut = pParams->hMaxSubmittedMem;
952     NvHandle hDeviceDst = RES_GET_HANDLE(GPU_RES_GET_DEVICE(pSemSurf));
953     NvBool bSemMemDuped = NV_FALSE;
954     NvBool bMaxMemDuped = NV_FALSE;
955 
956     NV_CHECK_OK_OR_GOTO(status,
957                         LEVEL_ERROR,
958                         pRmApi->DupObject(pRmApi,
959                                           RES_GET_CLIENT_HANDLE(pSemSurf),
960                                           hDeviceDst,
961                                           &hSemMemOut,
962                                           pShared->hClient,
963                                           pShared->hSemaphoreMem,
964                                           0),
965                         error);
966 
967     bSemMemDuped = NV_TRUE;
968 
969     if (pShared->pMaxSubmittedMem)
970     {
971         if (pShared->pMaxSubmittedMem != pShared->pSemaphoreMem)
972         {
973             NV_CHECK_OK_OR_GOTO(status,
974                                 LEVEL_ERROR,
975                                 pRmApi->DupObject(pRmApi,
976                                                   RES_GET_CLIENT_HANDLE(pSemSurf),
977                                                   hDeviceDst,
978                                                   &hMaxMemOut,
979                                                   pShared->hClient,
980                                                   pShared->hMaxSubmittedMem,
981                                                   0),
982                                 error);
983 
984             bMaxMemDuped = NV_TRUE;
985         }
986         else
987         {
988             if (pParams->hMaxSubmittedMem != pParams->hSemaphoreMem)
989             {
990                 status = NV_ERR_INVALID_PARAMETER;
991                 goto error;
992             }
993 
994             hMaxMemOut = hSemMemOut;
995         }
996     }
997     else
998     {
999         if (pParams->hMaxSubmittedMem != NV01_NULL_OBJECT)
1000         {
1001             return NV_ERR_INVALID_PARAMETER;
1002         }
1003     }
1004 
1005     pParams->hSemaphoreMem = hSemMemOut;
1006     pParams->hMaxSubmittedMem = hMaxMemOut;
1007 
1008     return NV_OK;
1009 
1010 error:
1011     if (bMaxMemDuped)
1012     {
1013         pRmApi->Free(pRmApi,
1014                      RES_GET_CLIENT_HANDLE(pSemSurf),
1015                      hMaxMemOut);
1016     }
1017 
1018     if (bSemMemDuped)
1019     {
1020         pRmApi->Free(pRmApi,
1021                      RES_GET_CLIENT_HANDLE(pSemSurf),
1022                      hSemMemOut);
1023     }
1024 
1025     return status;
1026 }
1027 
1028 NV_STATUS
1029 semsurfCtrlCmdBindChannel_IMPL
1030 (
1031     SemaphoreSurface *pSemSurf,
1032     NV_SEMAPHORE_SURFACE_CTRL_BIND_CHANNEL_PARAMS *pParams
1033 )
1034 {
1035     return NV_ERR_NOT_SUPPORTED;
1036 }
1037 
1038 NV_STATUS
1039 _semsurfSetValueAndNotify
1040 (
1041     SemaphoreSurface *pSemSurf,
1042     NvU64 index,
1043     NvU64 newValue
1044 )
1045 {
1046     SEM_INDEX_LISTENERS_NODE *valueNode;
1047     SEM_VALUE_LISTENERSIter vlIter;
1048     SEM_PENDING_NOTIFICATIONS notifications;
1049     NvU64 curValue;
1050     NvU64 minWaitValue;
1051     NvBool valueChanged = NV_TRUE;
1052 
1053     _semsurfSetValue(pSemSurf->pShared, index, newValue);
1054 
1055     while (valueChanged)
1056     {
1057         curValue = newValue;
1058         listInitIntrusive(&notifications);
1059 
1060         NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Entering spinlock\n",
1061                   pSemSurf->pShared->hClient, pSemSurf->pShared->hSemaphoreMem);
1062         portSyncSpinlockAcquire(pSemSurf->pShared->pSpinlock);
1063 
1064         valueNode = mapFind(&pSemSurf->pShared->listenerMap, index);
1065 
1066         if (valueNode)
1067         {
1068             minWaitValue = NV_U64_MAX;
1069 
1070             for (vlIter = listIterAll(&valueNode->listeners);
1071                  listIterNext(&vlIter);
1072                  vlIter = listIterAll(&valueNode->listeners))
1073             {
1074                 NV_PRINTF(LEVEL_SILENT,
1075                           "  Checking index %" NvU64_fmtu " value waiter %"
1076                           NvU64_fmtu " against semaphore value %" NvU64_fmtu
1077                           " from CPU write\n",
1078                           index, vlIter.pValue->value, curValue);
1079 
1080                 if (curValue >= vlIter.pValue->value)
1081                 {
1082                     listInsertExisting(&notifications, NULL, vlIter.pValue);
1083                     listRemove(&valueNode->listeners, vlIter.pValue);
1084                     if (vlIter.pValue->newValue != 0)
1085                     {
1086                         NV_ASSERT(vlIter.pValue->newValue >= newValue);
1087                         newValue = vlIter.pValue->newValue;
1088                     }
1089                 }
1090                 else
1091                 {
1092                     /* No other values at this index should be signaled yet. */
1093                     minWaitValue = vlIter.pValue->value;
1094                     break;
1095                 }
1096             }
1097 
1098             if (listCount(&valueNode->listeners) == 0)
1099             {
1100                 NV_ASSERT(minWaitValue == NV_U64_MAX);
1101                 mapRemove(&pSemSurf->pShared->listenerMap, valueNode);
1102                 portMemFree(valueNode);
1103             }
1104 
1105             _semsurfSetMonitoredValue(pSemSurf->pShared, index, minWaitValue);
1106         }
1107 
1108         portSyncSpinlockRelease(pSemSurf->pShared->pSpinlock);
1109         NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Exited spinlock\n",
1110                   pSemSurf->pShared->hClient, pSemSurf->pShared->hSemaphoreMem);
1111 
1112         // Send notifications outside of spinlock. They have already been removed
1113         // from the object-wide lists, so their existance is private to this
1114         // instance of this function now. Hence, no locking is required for this
1115         // step.
1116         valueChanged = _semsurfNotifyCompleted(pSemSurf->pShared,
1117                                                &notifications);
1118 
1119         NV_ASSERT(!valueChanged || (newValue > curValue));
1120     }
1121 
1122     return NV_OK;
1123 }
1124 
1125 static NV_STATUS
1126 _semsurfAddWaiter
1127 (
1128     SemaphoreSurface *pSemSurf,
1129     RsClient *pRsClient,
1130     NvU32 hSemaphoreSurf,
1131     NvU64 index,
1132     NvU64 waitValue,
1133     NvU64 newValue,
1134     NvP64 notificationHandle,
1135     NvBool bKernel
1136 )
1137 {
1138     SEM_INDEX_LISTENERS_NODE *pIndexListeners;
1139     SEM_VALUE_LISTENERSIter vlIter;
1140     SEM_VALUE_LISTENERS_NODE *pValueListeners;
1141     EVENTNOTIFICATION *pListener;
1142     NvHandle hClient = pRsClient->hClient;
1143     NvBool valid;
1144     NV_STATUS rmStatus = NV_OK;
1145     NvU64 semValue;
1146     NvU64 prevMinWaitValue = NV_U64_MAX;
1147 
1148     // The new value must be greater than the wait value to guarantee
1149     // the monotonically incrementing behavior required of semaphore surface
1150     // values.
1151     if ((newValue != 0) && (newValue <= waitValue))
1152     {
1153         NV_PRINTF(LEVEL_ERROR,
1154                   "SemSurf(0x%08x, 0x%08x): "
1155                   "Requested backwards update from %" NvU64_fmtu "->%"
1156                   NvU64_fmtu " at idx %" NvU64_fmtu "\n",
1157                   hClient, hSemaphoreSurf, waitValue, newValue, index);
1158         return NV_ERR_INVALID_STATE;
1159     }
1160 
1161     portSyncSpinlockAcquire(pSemSurf->pShared->pSpinlock);
1162 
1163     pIndexListeners = mapFind(&pSemSurf->pShared->listenerMap, index);
1164 
1165     if (pIndexListeners && listCount(&pIndexListeners->listeners) > 0)
1166     {
1167         pValueListeners = listHead(&pIndexListeners->listeners);
1168         prevMinWaitValue = pValueListeners->value;
1169     }
1170 
1171     /* Check if semaphore value has already been reached. This must be done
1172      * inside the spinlock to prevent the following race from dropping
1173      * notifications:
1174      *
1175      * -Enter RM control registering waiter for value 1
1176      * -RM control read current semaphore value 0
1177      * -GPU semaphore write land semaphore value 1
1178      * -GPU conditional TRAP non-stall interrupt.
1179      * -RM interrupt handler walks handlers, finds no registered waiters
1180      * -RM control proceeds, adds waiter for value 1.
1181      * -No further interrupts are generated.
1182      * -FAILURE - client's wait stalls indefinitely.
1183      *
1184      * Placing the value read inside the spinlock forces this deterministic
1185      * ordering of the above events:
1186      *
1187      * -Enter RM control registering waiter for value 1
1188      *  *lock*
1189      * -RM control read current semaphore value 0
1190      * -GPU semaphore write land semaphore value 1
1191      * -GPU conditional TRAP non-stall interrupt.
1192      * -RM interrupt handler walks handlers
1193      *  *lock* -- Blocks, defers rest of handler
1194      * -RM control proceeds, adds waiter for value 1.
1195      *  *unlock
1196      * -RM interrupt handler walks handlers
1197      *  *lock* -- Unblocks, defers rest of handler
1198      * --finds the registered waiter, signals it
1199      *  *unlock*
1200      * -SUCCESS - client's wait was signaled.
1201      *
1202      * Additionally, note there is a race involving checking the semaphore
1203      * value and updating the monitored fence/conditional trap value here. In
1204      * order for the semaphore surface event handler to have a chance to run
1205      * and queue up a waiter list walk, the monitored fence value must be
1206      * updated before the conditional trap methods execute. These execute
1207      * after the methods to update the semaphore value, but the following
1208      * race is possible if the monitored fence value is updated after the
1209      * check for already-signalled semaphores:
1210      *
1211      * -RM control reads current semaphore value 0
1212      *  *Semaphore not yet signaled. Proceed with registering a waiter*
1213      * -GPU semaphore write land semaphore value 1
1214      * -GPU conditional TRAP executes: Monitored fence value indicates no waiter
1215      *  *Interrupt is not generated*
1216      * -RM control updates monitored fence value to 1
1217      * -RM control finishes registering waiter on value 1
1218      * -FAILURE - conditional trap method has already executed,
1219      *  so waiter won't signal until something else generate san interrupt!
1220      *
1221      * Hence, enough work must be done to update the monitored fence value
1222      * before checking for an already signalled semaphore, and then the
1223      * monitored fence update, if any, must be undone if an already-signalled
1224      * semaphore is indeed found. If the updated monitored fence value has
1225      * already caused an interrupt to be generated, that's OK, the handler will
1226      * just be a no-op.
1227      */
1228     if (waitValue < prevMinWaitValue)
1229     {
1230         _semsurfSetMonitoredValue(pSemSurf->pShared, index, waitValue);
1231     }
1232 
1233     semValue = _semsurfGetValue(pSemSurf->pShared, index);
1234 
1235     if (semValue >= waitValue)
1236     {
1237         NV_PRINTF(LEVEL_NOTICE,
1238                   "SemSurf(0x%08x, 0x%08x): "
1239                   "Detected already signalled wait for %" NvU64_fmtu
1240                   " at idx %" NvU64_fmtu " current val %" NvU64_fmtu "\n",
1241                   hClient, hSemaphoreSurf, waitValue, index, semValue);
1242         rmStatus = NV_ERR_ALREADY_SIGNALLED;
1243         goto failureUnlock;
1244     }
1245 
1246     if (!pIndexListeners)
1247     {
1248         pIndexListeners = portMemAllocNonPaged(sizeof(*pIndexListeners));
1249         if (!pIndexListeners)
1250         {
1251             NV_PRINTF(LEVEL_ERROR,
1252                       "SemSurf(0x%08x, 0x%08x): "
1253                       "Failed to allocate a semaphore index listeners node\n",
1254                       hClient, hSemaphoreSurf);
1255             rmStatus = NV_ERR_NO_MEMORY;
1256             goto failureUnlock;
1257         }
1258 
1259         listInitIntrusive(&pIndexListeners->listeners);
1260 
1261         if (!mapInsertExisting(&pSemSurf->pShared->listenerMap,
1262                                index,
1263                                pIndexListeners))
1264         {
1265             NV_PRINTF(LEVEL_ERROR,
1266                       "SemSurf(0x%08x, 0x%08x): "
1267                       "Duplicate entry found for new index listener list\n",
1268                       hClient, hSemaphoreSurf);
1269             portMemFree(pIndexListeners);
1270             rmStatus = NV_ERR_INVALID_STATE;
1271             goto failureUnlock;
1272         }
1273     }
1274 
1275     /* XXX Would be easier/faster if the value listener list was a priority queue */
1276     vlIter = listIterAll(&pIndexListeners->listeners);
1277     while ((valid = listIterNext(&vlIter)) && vlIter.pValue->value < waitValue);
1278 
1279     if (valid && vlIter.pValue->value == waitValue)
1280     {
1281         pValueListeners = vlIter.pValue;
1282     }
1283     else
1284     {
1285         pValueListeners = portMemAllocNonPaged(sizeof(*pValueListeners));
1286 
1287         if (!pValueListeners)
1288         {
1289             NV_PRINTF(LEVEL_ERROR,
1290                       "SemSurf(0x%08x, 0x%08x): "
1291                       "Failed to allocate a semaphore value listener node\n",
1292                       hClient, hSemaphoreSurf);
1293             rmStatus = NV_ERR_NO_MEMORY;
1294             goto cleanupIndexListener;
1295         }
1296         portMemSet(pValueListeners, 0, sizeof(*pValueListeners));
1297 
1298         pValueListeners->value = waitValue;
1299         pValueListeners->index = index;
1300 
1301         listInsertExisting(&pIndexListeners->listeners,
1302                            valid ? vlIter.pValue : NULL, pValueListeners);
1303     }
1304 
1305     if (newValue)
1306     {
1307         // It is a client error if two waiters request to auto-update the value
1308         // of a semaphore after it reaches the same prerequisite value, as the
1309         // order of those signal two operations is indeterminate. This could be
1310         // handled by taking the max here without violating any forward progress
1311         // rules, but it is better to return an error given the likelyhood there
1312         // is an error in the client's logic.
1313         if ((pValueListeners->newValue != 0))
1314         {
1315             NV_PRINTF(LEVEL_ERROR,
1316                       "SemSurf(0x%08x, 0x%08x): "
1317                       "Existing value-updating waiter at index %" NvU64_fmtu
1318                       " for wait value %" NvU64_fmtu ":\n Existing update "
1319                       "value: %" NvU64_fmtu "\n Requested update value: %"
1320                       NvU64_fmtu "\n",
1321                       hClient, hSemaphoreSurf, index, waitValue,
1322                       pValueListeners->newValue, newValue);
1323             rmStatus = NV_ERR_STATE_IN_USE;
1324             goto cleanupValueListener;
1325 
1326         }
1327         pValueListeners->newValue = newValue;
1328     }
1329 
1330     if (notificationHandle)
1331     {
1332         for (pListener = pValueListeners->pListeners;
1333              pListener;
1334              pListener = pListener->Next)
1335         {
1336             if (pListener->Data == notificationHandle)
1337             {
1338                 NV_PRINTF(LEVEL_ERROR,
1339                           "SemSurf(0x%08x, 0x%08x): "
1340                           "Notification handle already registered at index %"
1341                           NvU64_fmtu " for wait value %" NvU64_fmtu ".\n",
1342                           hClient, hSemaphoreSurf, index, waitValue);
1343                 /* Back out the auto-update value applied above, if any */
1344                 pValueListeners->newValue = 0;
1345                 rmStatus = NV_ERR_STATE_IN_USE;
1346                 goto cleanupValueListener;
1347             }
1348         }
1349 
1350         rmStatus = registerEventNotification(&pValueListeners->pListeners,
1351                                              pRsClient,
1352                                              NV01_NULL_OBJECT, /* hNotifier/subdevice */
1353                                              hSemaphoreSurf,
1354                                              NV_SEMAPHORE_SURFACE_WAIT_VALUE |
1355                                              NV01_EVENT_WITHOUT_EVENT_DATA,
1356                                              /*
1357                                               * Allow user to pass this in
1358                                               * explicitly?  Doesn't seem to provide
1359                                               * any added value value and adds more
1360                                               * parameter validation work
1361                                               */
1362                                              bKernel ?
1363                                              NV01_EVENT_KERNEL_CALLBACK_EX :
1364                                              NV01_EVENT_WIN32_EVENT,
1365                                              notificationHandle,
1366                                              !bKernel);
1367 
1368         if (rmStatus != NV_OK)
1369         {
1370             NV_PRINTF(LEVEL_ERROR,
1371                       "SemSurf(0x%08x, 0x%08x): "
1372                       "Failed to register event notification for semaphore surface "
1373                       "listener at index %" NvU64_fmtu ", value %" NvU64_fmtu
1374                       ".  Status: 0x%08x\n",
1375                       hClient, hSemaphoreSurf, index, waitValue, rmStatus);
1376             goto cleanupValueListener;
1377         }
1378     }
1379 
1380     portSyncSpinlockRelease(pSemSurf->pShared->pSpinlock);
1381 
1382     NV_PRINTF(LEVEL_INFO,
1383               "SemSurf(0x%08x, 0x%08x): "
1384               "Registered semaphore surface value listener at index %"
1385               NvU64_fmtu ", value %" NvU64_fmtu " current value %" NvU64_fmtu
1386               " post-wait value %" NvU64_fmtu " notification: %" NvU64_fmtx "\n",
1387               hClient, hSemaphoreSurf, index, waitValue, semValue, newValue,
1388               (NvU64)notificationHandle);
1389 
1390     return rmStatus;
1391 
1392 cleanupValueListener:
1393     if (!pValueListeners->pListeners)
1394     {
1395         listRemove(&pIndexListeners->listeners, pValueListeners);
1396         portMemFree(pValueListeners);
1397     }
1398 
1399 cleanupIndexListener:
1400     if (listCount(&pIndexListeners->listeners) == 0)
1401     {
1402        mapRemove(&pSemSurf->pShared->listenerMap, pIndexListeners);
1403        portMemFree(pIndexListeners);
1404     }
1405 
1406 failureUnlock:
1407     // Must be done with the lock.
1408     if (waitValue < prevMinWaitValue)
1409     {
1410         _semsurfSetMonitoredValue(pSemSurf->pShared, index, prevMinWaitValue);
1411     }
1412 
1413     portSyncSpinlockRelease(pSemSurf->pShared->pSpinlock);
1414 
1415     // There's no point of going through the trouble of notifying the waiter in
1416     // this case, but it is worth immediately running the auto-update code here
1417     // to avoid the need for the caller to call back in through the whole RMAPI
1418     // framework with the set value control to emulate an auto-update itself.
1419     // Just take care of it here.
1420     if ((rmStatus == NV_ERR_ALREADY_SIGNALLED) && (newValue != 0))
1421     {
1422         // On success, the ALREADY_SIGNALLED value must be preserved if the
1423         // client also asked to register an OS event/callback. If no event/
1424         // callback was requested, return success, as there is all requested
1425         // operations have been carried out.
1426         //
1427         // On error, the client must always be notified this call did nothing.
1428         NV_STATUS setValStatus = _semsurfSetValueAndNotify(pSemSurf,
1429                                                            index,
1430                                                            newValue);
1431 
1432         if ((setValStatus != NV_OK) || !notificationHandle)
1433             rmStatus = setValStatus;
1434     }
1435 
1436     return rmStatus;
1437 }
1438 
1439 NV_STATUS
1440 semsurfCtrlCmdRegisterWaiter_IMPL
1441 (
1442     SemaphoreSurface *pSemSurf,
1443     NV_SEMAPHORE_SURFACE_CTRL_REGISTER_WAITER_PARAMS *pParams
1444 )
1445 {
1446     CALL_CONTEXT       *pCallContext = resservGetTlsCallContext();
1447     NvP64               notificationHandle = 0;
1448     const NvBool        bKernel = (pCallContext->secInfo.paramLocation ==
1449                                    PARAM_LOCATION_KERNEL);
1450     NV_STATUS           rmStatus = NV_OK;
1451 
1452     NV_CHECK_OR_RETURN(LEVEL_ERROR,
1453                        _semsurfValidateIndex(pSemSurf->pShared, pParams->index),
1454                        NV_ERR_INVALID_INDEX);
1455 
1456     if (pParams->notificationHandle)
1457     {
1458         if (bKernel)
1459         {
1460             notificationHandle = (NvP64)pParams->notificationHandle;
1461         }
1462         else
1463         {
1464             rmStatus = osUserHandleToKernelPtr(pCallContext->pClient->hClient,
1465                                                (NvP64)pParams->notificationHandle,
1466                                                &notificationHandle);
1467         }
1468     }
1469 
1470     if (rmStatus != NV_OK)
1471     {
1472         NV_PRINTF(LEVEL_ERROR,
1473                   "Invalid semaphore surface notification handle: 0x%016"
1474                   NvU64_fmtx ", status: %s (0x%08x)\n",
1475                   pParams->notificationHandle, nvstatusToString(rmStatus),
1476                   rmStatus);
1477         return rmStatus;
1478     }
1479 
1480     rmStatus = _semsurfAddWaiter(pSemSurf,
1481                                  staticCast(RES_GET_CLIENT(pSemSurf), RsClient),
1482                                  RES_GET_HANDLE(pSemSurf),
1483                                  pParams->index,
1484                                  pParams->waitValue,
1485                                  pParams->newValue,
1486                                  notificationHandle,
1487                                  bKernel);
1488 
1489     return rmStatus;
1490 }
1491 
1492 NV_STATUS
1493 semsurfCtrlCmdSetValue_IMPL
1494 (
1495     SemaphoreSurface *pSemSurf,
1496     NV_SEMAPHORE_SURFACE_CTRL_SET_VALUE_PARAMS *pParams
1497 )
1498 {
1499     NV_CHECK_OR_RETURN(LEVEL_ERROR,
1500                        _semsurfValidateIndex(pSemSurf->pShared, pParams->index),
1501                        NV_ERR_INVALID_INDEX);
1502 
1503     return _semsurfSetValueAndNotify(pSemSurf,
1504                                      pParams->index,
1505                                      pParams->newValue);
1506 }
1507 
1508 static NV_STATUS
1509 _semsurfDelWaiter
1510 (
1511     SemaphoreSurface *pSemSurf,
1512     NvU64 index,
1513     NvU64 waitValue,
1514     NvP64 notificationHandle,
1515     NvBool bKernel
1516 )
1517 {
1518     SEM_INDEX_LISTENERS_NODE *pIndexListeners;
1519     SEM_VALUE_LISTENERSIter vlIter;
1520     SEM_VALUE_LISTENERS_NODE *pValueListeners;
1521     NvHandle hClient = RES_GET_CLIENT_HANDLE(pSemSurf);
1522     NvHandle hSemaphoreSurf = RES_GET_HANDLE(pSemSurf);
1523     NvBool valid;
1524     NV_STATUS rmStatus = NV_ERR_GENERIC;
1525 
1526     NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Entering spinlock\n",
1527               pSemSurf->pShared->hClient,
1528               pSemSurf->pShared->hSemaphoreMem);
1529     portSyncSpinlockAcquire(pSemSurf->pShared->pSpinlock);
1530 
1531     pIndexListeners = mapFind(&pSemSurf->pShared->listenerMap, index);
1532 
1533     if (!pIndexListeners)
1534         goto unlockReturn;
1535 
1536     vlIter = listIterAll(&pIndexListeners->listeners);
1537     while ((valid = listIterNext(&vlIter)) && vlIter.pValue->value < waitValue);
1538 
1539     if (!valid || (vlIter.pValue->value != waitValue))
1540         goto unlockReturn;
1541 
1542     pValueListeners = vlIter.pValue;
1543 
1544     rmStatus = unregisterEventNotificationWithData(&pValueListeners->pListeners,
1545                                                    hClient,
1546                                                    /* hNotifier/subdevice */
1547                                                    NV01_NULL_OBJECT,
1548                                                    hSemaphoreSurf,
1549                                                    /* match notificationHandle */
1550                                                    NV_TRUE,
1551                                                    notificationHandle);
1552 
1553     if (rmStatus != NVOS_STATUS_SUCCESS)
1554         goto unlockReturn;
1555 
1556     NV_PRINTF(LEVEL_INFO, "SemSurf(0x%08x, 0x%08x): "
1557               "Unregistered event notification " NvP64_fmt
1558               " from semaphore surface listener at index %" NvU64_fmtu
1559               ", value %" NvU64_fmtu ".\n",
1560               hClient, hSemaphoreSurf, notificationHandle, index, waitValue);
1561 
1562     if (!pValueListeners->pListeners)
1563     {
1564         listRemove(&pIndexListeners->listeners, pValueListeners);
1565         portMemFree(pValueListeners);
1566 
1567         if (listCount(&pIndexListeners->listeners) == 0)
1568         {
1569             mapRemove(&pSemSurf->pShared->listenerMap, pIndexListeners);
1570             portMemFree(pIndexListeners);
1571             _semsurfSetMonitoredValue(pSemSurf->pShared, index, NV_U64_MAX);
1572         }
1573         else
1574         {
1575             pValueListeners = listHead(&pIndexListeners->listeners);
1576             _semsurfSetMonitoredValue(pSemSurf->pShared, index,
1577                                       pValueListeners->value);
1578         }
1579     }
1580 
1581     rmStatus = NV_OK;
1582 
1583 unlockReturn:
1584     portSyncSpinlockRelease(pSemSurf->pShared->pSpinlock);
1585     NV_PRINTF(LEVEL_INFO, "SemMem(0x%08x, 0x%08x): Exited spinlock\n",
1586               pSemSurf->pShared->hClient, pSemSurf->pShared->hSemaphoreMem);
1587 
1588     return rmStatus;
1589 }
1590 
1591 NV_STATUS
1592 semsurfCtrlCmdUnregisterWaiter_IMPL
1593 (
1594     SemaphoreSurface *pSemSurf,
1595     NV_SEMAPHORE_SURFACE_CTRL_UNREGISTER_WAITER_PARAMS *pParams
1596 )
1597 {
1598     CALL_CONTEXT       *pCallContext = resservGetTlsCallContext();
1599     NvP64               notificationHandle = 0;
1600     const NvBool        bKernel = (pCallContext->secInfo.paramLocation ==
1601                                    PARAM_LOCATION_KERNEL);
1602     NV_STATUS           rmStatus = NV_OK;
1603 
1604     NV_CHECK_OR_RETURN(LEVEL_ERROR,
1605                        _semsurfValidateIndex(pSemSurf->pShared, pParams->index),
1606                        NV_ERR_INVALID_INDEX);
1607 
1608     if (pParams->notificationHandle)
1609     {
1610         if (bKernel)
1611         {
1612             notificationHandle = (NvP64)pParams->notificationHandle;
1613         }
1614         else
1615         {
1616             rmStatus = osUserHandleToKernelPtr(pCallContext->pClient->hClient,
1617                                                (NvP64)pParams->notificationHandle,
1618                                                &notificationHandle);
1619         }
1620     }
1621 
1622     if (rmStatus != NV_OK)
1623     {
1624         NV_PRINTF(LEVEL_ERROR,
1625                   "Invalid semaphore surface notification handle: 0x%016"
1626                   NvU64_fmtx ", status: %s (0x%08x)\n",
1627                   pParams->notificationHandle, nvstatusToString(rmStatus),
1628                   rmStatus);
1629         return rmStatus;
1630     }
1631 
1632     rmStatus = _semsurfDelWaiter(pSemSurf,
1633                                  pParams->index,
1634                                  pParams->waitValue,
1635                                  notificationHandle,
1636                                  bKernel);
1637 
1638     return rmStatus;
1639 }
1640