1 /*******************************************************************************
2     Copyright (c) 2016-2021 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #include "uvm_linux.h"
25 #include "uvm_global.h"
26 #include "uvm_gpu.h"
27 #include "uvm_hal.h"
28 #include "clc365.h"
29 #include "uvm_volta_fault_buffer.h"
30 
31 typedef struct {
32     NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
33 } access_counter_buffer_entry_c365_t;
34 
uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t * parent_gpu)35 void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
36 {
37     volatile NvU32 *reg;
38     NvU32 mask;
39 
40     reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnSet;
41     mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
42 
43     UVM_GPU_WRITE_ONCE(*reg, mask);
44 }
45 
uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t * parent_gpu)46 void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
47 {
48     volatile NvU32 *reg;
49     NvU32 mask;
50 
51     reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
52     mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
53 
54     UVM_GPU_WRITE_ONCE(*reg, mask);
55 }
56 
uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t * parent_gpu,NvU32 get)57 void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
58 {
59     // No-op, this function is only used by pulse-based interrupt GPUs.
60 }
61 
uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t * parent_gpu)62 NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
63 {
64     return NVC365_NOTIFY_BUF_SIZE;
65 }
66 
get_access_counter_inst_aperture(NvU32 * access_counter_entry)67 static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
68 {
69     NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
70 
71     switch (hw_aperture_value) {
72         case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
73             return UVM_APERTURE_VID;
74         case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
75         case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
76              return UVM_APERTURE_SYS;
77     }
78 
79     UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
80     return UVM_APERTURE_MAX;
81 }
82 
get_access_counter_aperture(NvU32 * access_counter_entry)83 static uvm_aperture_t get_access_counter_aperture(NvU32 *access_counter_entry)
84 {
85     NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, APERTURE);
86     NvU32 peer_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, PEER_ID);
87 
88     switch (hw_aperture_value) {
89         case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
90             return UVM_APERTURE_VID;
91         case NVC365_NOTIFY_BUF_ENTRY_APERTURE_PEER_MEM:
92             return UVM_APERTURE_PEER(peer_id);
93         case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
94         case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
95              return UVM_APERTURE_SYS;
96     }
97 
98     UVM_ASSERT_MSG(false, "Invalid aperture value: %d\n", hw_aperture_value);
99     return UVM_APERTURE_MAX;
100 }
101 
get_address(uvm_parent_gpu_t * parent_gpu,NvU32 * access_counter_entry)102 static uvm_gpu_address_t get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
103 {
104     NvU64 address;
105     bool is_virtual;
106     NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
107     NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
108     NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
109 
110     address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
111     is_virtual = (addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
112 
113     if (is_virtual) {
114         address = uvm_parent_gpu_canonical_address(parent_gpu, address);
115         return uvm_gpu_address_virtual(address);
116     }
117     else {
118         uvm_aperture_t aperture = get_access_counter_aperture(access_counter_entry);
119 
120         UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
121         UVM_ASSERT_MSG(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GPA,
122                        "Invalid address type%u\n", addr_type_value);
123 
124         return uvm_gpu_address_physical(aperture, address);
125     }
126 }
127 
get_access_counter_type(NvU32 * access_counter_entry)128 static uvm_access_counter_type_t get_access_counter_type(NvU32 *access_counter_entry)
129 {
130     NvU32 type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE);
131     if (type_value == NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU)
132         return UVM_ACCESS_COUNTER_TYPE_MOMC;
133     else
134         return UVM_ACCESS_COUNTER_TYPE_MIMC;
135 }
136 
get_access_counter_buffer_entry(uvm_parent_gpu_t * parent_gpu,NvU32 index)137 static NvU32 *get_access_counter_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
138 {
139     access_counter_buffer_entry_c365_t *buffer_start;
140     NvU32 *access_counter_entry;
141 
142     UVM_ASSERT(index < parent_gpu->access_counter_buffer_info.max_notifications);
143 
144     buffer_start = (access_counter_buffer_entry_c365_t *)parent_gpu->access_counter_buffer_info.rm_info.bufferAddress;
145     access_counter_entry = (NvU32 *)&buffer_start[index];
146 
147     return access_counter_entry;
148 }
149 
uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t * parent_gpu,NvU32 index)150 bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
151 {
152     NvU32 *access_counter_entry;
153     bool is_valid;
154 
155     access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
156 
157     is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
158 
159     return is_valid;
160 }
161 
uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t * parent_gpu,NvU32 index)162 void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
163 {
164     NvU32 *access_counter_entry;
165 
166     access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
167 
168     WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
169 }
170 
uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t * parent_gpu,NvU32 index,uvm_access_counter_buffer_entry_t * buffer_entry)171 void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
172                                                      NvU32 index,
173                                                      uvm_access_counter_buffer_entry_t *buffer_entry)
174 {
175     NvU32 *access_counter_entry;
176 
177     // Valid bit must be set before this function is called
178     UVM_ASSERT(uvm_hal_volta_access_counter_buffer_entry_is_valid(parent_gpu, index));
179 
180     access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
181 
182     buffer_entry->counter_type = get_access_counter_type(access_counter_entry);
183 
184     buffer_entry->address = get_address(parent_gpu, access_counter_entry);
185 
186     if (buffer_entry->address.is_virtual) {
187         NvU64 inst_hi, inst_lo;
188 
189         inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
190         inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
191         buffer_entry->virtual_info.instance_ptr.address =
192             inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
193 
194         // HW value contains the 4K page number. Shift to build the full address
195         buffer_entry->virtual_info.instance_ptr.address <<= 12;
196 
197         buffer_entry->virtual_info.instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
198 
199         buffer_entry->virtual_info.mmu_engine_id =
200             READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
201 
202         // MMU engine id aligns with the fault buffer packets. Therefore, we
203         // reuse the helpers to compute the MMU engine type and the VE ID from
204         // the fault buffer class
205         buffer_entry->virtual_info.mmu_engine_type =
206             parent_gpu->arch_hal->mmu_engine_id_to_type(buffer_entry->virtual_info.mmu_engine_id);
207 
208         buffer_entry->virtual_info.ve_id =
209             parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->virtual_info.mmu_engine_id,
210                                                     buffer_entry->virtual_info.mmu_engine_type);
211     }
212     else if (buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
213         // Ignore any set bit beyond 47 since it is the maximum physical address
214         // supported by the GPU. See the definition of
215         // uvm_gpu_t::dma_addressable_start for why higher bits might be set.
216         const NvU64 mask_46_0 = (0x1UL << 47) - 1;
217         buffer_entry->address.address &= mask_46_0;
218     }
219 
220     buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
221 
222     buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
223 
224     buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
225 
226     buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
227 
228     // Automatically clear valid bit for the entry in the access counter buffer
229     uvm_hal_volta_access_counter_buffer_entry_clear_valid(parent_gpu, index);
230 }
231