1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef __NV_SEMAPHORE_H__
25 #define __NV_SEMAPHORE_H__
26 
27 #include "nvtypes.h"
28 #include "nvCpuIntrinsics.h"
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 typedef volatile struct {
35     NvU32 payload;
36     NvU32 reportValue;
37     NvU64 timer;
38 } NvReportSemaphore32;
39 
40 typedef volatile struct {
41     NvU64 reportValue;
42     NvU64 timer;
43 } NvReportSemaphore64;
44 
45 typedef volatile union {
46     NvReportSemaphore32 sema32;
47     NvReportSemaphore64 sema64;
48 } NvReportSemaphore;
49 
50 /*
51  * These structures can't change size.  They map to the GPU and other driver
52  * components expect the same size.
53  */
54 ct_assert(sizeof(NvReportSemaphore32) == 16);
55 ct_assert(sizeof(NvReportSemaphore64) == 16);
56 ct_assert(sizeof(NvReportSemaphore)   == 16);
57 
58 /*
59  * Pre-Volta GPUs can only read/write a 32-bit semaphore.  Rather than try to
60  * use multiple semaphore writes to emulate a full 64-bit write, which is prone
61  * to race conditions when the value wraps, derive the full 64-bit value by
62  * comparing the current GPU-accessible value with the the last value written by
63  * the CPU or submitted to be written by the GPU, which is stashed in the
64  * timestamp field of the semaphore by the CPU in both these cases.
65  */
NvTimeSemFermiSetMaxSubmittedVal(volatile NvU64 * maxSubmittedPtr,const NvU64 value)66 static inline void NvTimeSemFermiSetMaxSubmittedVal(
67     volatile NvU64 *maxSubmittedPtr,
68     const NvU64 value)
69 {
70     NvU64 oldValue =
71             (NvU64)__NVatomicCompareExchange64((volatile NvS64 *)maxSubmittedPtr,
72                                                0, 0);
73 
74     // Atomically set report->timer to max(value, report->time).
75     while (oldValue < value) {
76         const NvU64 prevValue =
77             (NvU64)__NVatomicCompareExchange64((volatile NvS64 *)maxSubmittedPtr,
78                                                (NvS64)value,
79                                                (NvS64)oldValue);
80         if (prevValue == oldValue) {
81             // The specified value was set.  Done.
82             nvAssert(*maxSubmittedPtr >= value);
83             break;
84         }
85 
86         oldValue = prevValue;
87     }
88 }
89 
NvTimeSemFermiSetMaxSubmitted(NvReportSemaphore32 * report,const NvU64 value)90 static inline void NvTimeSemFermiSetMaxSubmitted(
91     NvReportSemaphore32 *report,
92     const NvU64 value)
93 {
94     NvTimeSemFermiSetMaxSubmittedVal(&report->timer, value);
95 }
96 
NvTimeSemFermiGetPayloadVal(volatile void * payloadPtr,volatile void * maxSubmittedPtr)97 static inline NvU64 NvTimeSemFermiGetPayloadVal(
98     volatile void *payloadPtr,
99     volatile void *maxSubmittedPtr)
100 {
101     // The ordering of the two operations below is critical.  Other threads
102     // may be submitting GPU work that modifies the semaphore value, or
103     // modifying it from the CPU themselves.  Both of those operations first
104     // set the 64-bit max submitted/timer value, then modify or submit work
105     // to modify the 32-bit payload value.  Consider this hypothetical timeline
106     // if the order of operations below is reversed:
107     //
108     //   thread1:
109     //   -SetMaxSubmitted(0x1);
110     //   -report->payload = 0x1;
111     //
112     //   thread2:
113     //   -Reads 0x1 from report->timer
114     //
115     //   thread1:
116     //   -SetMaxSubmitted(0x7fffffff);
117     //   -report->payload = 0x7fffffff;
118     //   -SetMaxSubmitted(0x100000000);
119     //   -report->payload = 0x00000000;
120     //
121     //   thread2:
122     //   -Reads 0x0 from report->payload
123     //
124     // The logic below would see 0 (payload) is less than 1 (max submitted) and
125     // determine a wrap is outstanding, subtract one from the high 32-bits of
126     // the max submitted value (0x00000000 - 0x1), overflow, and return the
127     // current 64-bit value as 0xffffffff00000000 when the correct value is
128     // 0x100000000.  To avoid this, we must read the payload prior to reading
129     // the max submitted value from the timer field.  The logic can correctly
130     // adjust the max submitted value back down if a wrap occurs between these
131     // two operations, but has no way to bump the max submitted value up if a
132     // wrap occurs with the opposite ordering.
133     NvU64 current = *(volatile NvU32*)payloadPtr;
134     // Use an atomic exchange to ensure the 64-bit read is atomic even on 32-bit
135     // CPUs.
136     NvU64 submitted = (NvU64)
137         __NVatomicCompareExchange64((volatile NvS64 *)maxSubmittedPtr, 0ll, 0ll);
138 
139     nvAssert(!(current & 0xFFFFFFFF00000000ull));
140 
141     // The value is monotonically increasing, and differ by no more than
142     // 2^31 - 1.  Hence, if the low word of the submitted value is less
143     // than the low word of the current value, exactly one 32-bit wrap
144     // occurred between the current value and the most recently
145     // submitted value.  Walk back the high word to match the value
146     // associated with the current GPU-visible value.
147     if ((submitted & 0xFFFFFFFFull) < current) {
148         submitted -= 0x100000000ull;
149     }
150 
151     current |= (submitted & 0xFFFFFFFF00000000ull);
152 
153     return current;
154 }
155 
NvTimeSemFermiGetPayload(NvReportSemaphore32 * report)156 static inline NvU64 NvTimeSemFermiGetPayload(
157     NvReportSemaphore32 *report)
158 {
159     return NvTimeSemFermiGetPayloadVal(&report->payload, &report->timer);
160 }
161 
NvTimeSemFermiSetPayload(NvReportSemaphore32 * report,const NvU64 payload)162 static inline void NvTimeSemFermiSetPayload(
163     NvReportSemaphore32 *report,
164     const NvU64 payload)
165 {
166     // First save the actual value to the reserved/timer bits
167     NvTimeSemFermiSetMaxSubmittedVal(&report->timer, payload);
168 
169     // Then write the low bits to the GPU-accessible semaphore value.
170     report->payload = (NvU32)(payload & 0xFFFFFFFFULL);
171 }
172 
173 /*
174  * Volta and up.
175  */
176 
NvTimeSemVoltaGetPayloadVal(volatile void * payloadPtr)177 static inline NvU64 NvTimeSemVoltaGetPayloadVal(
178     volatile void *payloadPtr)
179 {
180     nvAssert(payloadPtr);
181     return (NvU64)
182         __NVatomicCompareExchange64((volatile NvS64 *)payloadPtr,
183                                     0, 0);
184 }
185 
NvTimeSemVoltaGetPayload(NvReportSemaphore64 * report)186 static inline NvU64 NvTimeSemVoltaGetPayload(
187     NvReportSemaphore64 *report)
188 {
189     return NvTimeSemVoltaGetPayloadVal(&report->reportValue);
190 }
191 
NvTimeSemVoltaSetPayload(NvReportSemaphore64 * report,const NvU64 payload)192 static inline void NvTimeSemVoltaSetPayload(
193     NvReportSemaphore64 *report,
194     const NvU64 payload)
195 {
196     NvU64 oldPayload = 0;
197 
198     while (NV_TRUE) {
199         NvU64 prevPayload = (NvU64)
200             __NVatomicCompareExchange64((volatile NvS64 *)&report->reportValue,
201                                         (NvS64)payload, (NvS64)oldPayload);
202 
203         if (prevPayload == oldPayload) {
204             break;
205         }
206 
207         nvAssert(prevPayload < payload);
208 
209         oldPayload = prevPayload;
210     }
211 }
212 
213 #ifdef __cplusplus
214 };
215 #endif
216 
217 #endif /* __NV_SEMAPHORE_H__ */
218