1 /*******************************************************************************
2     Copyright (c) 2017-2024 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 *******************************************************************************/
22 
23 #include "linux/sort.h"
24 #include "nv_uvm_interface.h"
25 #include "uvm_gpu_access_counters.h"
26 #include "uvm_global.h"
27 #include "uvm_gpu.h"
28 #include "uvm_hal.h"
29 #include "uvm_kvmalloc.h"
30 #include "uvm_tools.h"
31 #include "uvm_va_block.h"
32 #include "uvm_va_range.h"
33 #include "uvm_va_space_mm.h"
34 #include "uvm_pmm_sysmem.h"
35 #include "uvm_perf_module.h"
36 #include "uvm_ats.h"
37 #include "uvm_ats_faults.h"
38 
39 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN     1
40 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
41 #define UVM_PERF_ACCESS_COUNTER_GRANULARITY         UVM_ACCESS_COUNTER_GRANULARITY_2M
42 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN       1
43 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX       ((1 << 16) - 1)
44 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT   256
45 
46 #define UVM_ACCESS_COUNTER_ACTION_CLEAR     0x1
47 #define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED  0x2
48 
49 // Each page in a tracked physical range may belong to a different VA Block. We
50 // preallocate an array of reverse map translations. However, access counter
51 // granularity can be set to up to 16G, which would require an array too large
52 // to hold all possible translations. Thus, we set an upper bound for reverse
53 // map translations, and we perform as many translation requests as needed to
54 // cover the whole tracked range.
55 #define UVM_MAX_TRANSLATION_SIZE (2 * 1024 * 1024ULL)
56 #define UVM_SUB_GRANULARITY_REGIONS 32
57 
58 static unsigned g_uvm_access_counter_threshold;
59 
60 // Per-VA space access counters information
61 typedef struct
62 {
63     // VA space-specific configuration settings. These override the global
64     // settings
65     struct
66     {
67         atomic_t enable_mimc_migrations;
68 
69         atomic_t enable_momc_migrations;
70     } params;
71 
72     uvm_va_space_t *va_space;
73 } va_space_access_counters_info_t;
74 
75 // Enable/disable access-counter-guided migrations
76 //
77 static int uvm_perf_access_counter_mimc_migration_enable = -1;
78 static int uvm_perf_access_counter_momc_migration_enable = -1;
79 
80 // Number of entries that are fetched from the GPU access counter notification
81 // buffer and serviced in batch
82 static unsigned uvm_perf_access_counter_batch_count = UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT;
83 
84 // See module param documentation below
85 static unsigned uvm_perf_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT;
86 
87 // Module parameters for the tunables
88 module_param(uvm_perf_access_counter_mimc_migration_enable, int, S_IRUGO);
89 MODULE_PARM_DESC(uvm_perf_access_counter_mimc_migration_enable,
90                  "Whether MIMC access counters will trigger migrations."
91                  "Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
92 module_param(uvm_perf_access_counter_momc_migration_enable, int, S_IRUGO);
93 MODULE_PARM_DESC(uvm_perf_access_counter_momc_migration_enable,
94                  "Whether MOMC access counters will trigger migrations."
95                  "Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
96 module_param(uvm_perf_access_counter_batch_count, uint, S_IRUGO);
97 module_param(uvm_perf_access_counter_threshold, uint, S_IRUGO);
98 MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
99                  "Number of remote accesses on a region required to trigger a notification."
100                  "Valid values: [1, 65535]");
101 
102 static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
103                                                uvm_gpu_buffer_flush_mode_t flush_mode);
104 
105 static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
106 
107 // Performance heuristics module for access_counters
108 static uvm_perf_module_t g_module_access_counters;
109 
110 // Get the access counters tracking struct for the given VA space if it exists.
111 // This information is allocated at VA space creation and freed during VA space
112 // destruction.
va_space_access_counters_info_get_or_null(uvm_va_space_t * va_space)113 static va_space_access_counters_info_t *va_space_access_counters_info_get_or_null(uvm_va_space_t *va_space)
114 {
115     return uvm_perf_module_type_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS);
116 }
117 
118 // Get the access counters tracking struct for the given VA space. It asserts
119 // that the information has been previously created.
va_space_access_counters_info_get(uvm_va_space_t * va_space)120 static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va_space_t *va_space)
121 {
122     va_space_access_counters_info_t *va_space_access_counters = va_space_access_counters_info_get_or_null(va_space);
123     UVM_ASSERT(va_space_access_counters);
124 
125     return va_space_access_counters;
126 }
127 
128 // Whether access counter migrations are enabled or not. The policy is as
129 // follows:
130 // - MIMC migrations are disabled by default on all non-ATS systems.
131 // - MOMC migrations are disabled by default on all systems
132 // - Users can override this policy by specifying on/off
is_migration_enabled(uvm_access_counter_type_t type)133 static bool is_migration_enabled(uvm_access_counter_type_t type)
134 {
135     int val;
136     if (type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
137         val = uvm_perf_access_counter_mimc_migration_enable;
138     }
139     else {
140         val = uvm_perf_access_counter_momc_migration_enable;
141 
142         UVM_ASSERT(type == UVM_ACCESS_COUNTER_TYPE_MOMC);
143     }
144 
145     if (val == 0)
146         return false;
147     else if (val > 0)
148         return true;
149 
150     if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
151         return false;
152 
153     if (UVM_ATS_SUPPORTED())
154         return g_uvm_global.ats.supported;
155 
156     return false;
157 }
158 
159 // Create the access counters tracking struct for the given VA space
160 //
161 // VA space lock needs to be held in write mode
va_space_access_counters_info_create(uvm_va_space_t * va_space)162 static va_space_access_counters_info_t *va_space_access_counters_info_create(uvm_va_space_t *va_space)
163 {
164     va_space_access_counters_info_t *va_space_access_counters;
165     uvm_assert_rwsem_locked_write(&va_space->lock);
166 
167     UVM_ASSERT(va_space_access_counters_info_get_or_null(va_space) == NULL);
168 
169     va_space_access_counters = uvm_kvmalloc_zero(sizeof(*va_space_access_counters));
170     if (va_space_access_counters) {
171         uvm_perf_module_type_set_data(va_space->perf_modules_data,
172                                       va_space_access_counters,
173                                       UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS);
174 
175         // Snap the access_counters parameters so that they can be tuned per VA space
176         atomic_set(&va_space_access_counters->params.enable_mimc_migrations,
177                    is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MIMC));
178         atomic_set(&va_space_access_counters->params.enable_momc_migrations,
179                    is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MOMC));
180         va_space_access_counters->va_space = va_space;
181     }
182 
183     return va_space_access_counters;
184 }
185 
186 // Destroy the access counters tracking struct for the given VA space
187 //
188 // VA space lock needs to be in write mode
va_space_access_counters_info_destroy(uvm_va_space_t * va_space)189 static void va_space_access_counters_info_destroy(uvm_va_space_t *va_space)
190 {
191     va_space_access_counters_info_t *va_space_access_counters = va_space_access_counters_info_get_or_null(va_space);
192     uvm_assert_rwsem_locked_write(&va_space->lock);
193 
194     if (va_space_access_counters) {
195         uvm_perf_module_type_unset_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS);
196         uvm_kvfree(va_space_access_counters);
197     }
198 }
199 
config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY granularity,NvU64 * bytes)200 static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY granularity, NvU64 *bytes)
201 {
202     switch (granularity) {
203         case UVM_ACCESS_COUNTER_GRANULARITY_64K:
204             *bytes = 64 * 1024ULL;
205             break;
206         case UVM_ACCESS_COUNTER_GRANULARITY_2M:
207             *bytes = 2 * UVM_SIZE_1MB;
208             break;
209         case UVM_ACCESS_COUNTER_GRANULARITY_16M:
210             *bytes = 16 * UVM_SIZE_1MB;
211             break;
212         case UVM_ACCESS_COUNTER_GRANULARITY_16G:
213             *bytes = 16 * UVM_SIZE_1GB;
214             break;
215         default:
216             return NV_ERR_INVALID_ARGUMENT;
217     }
218 
219     return NV_OK;
220 }
221 
222 // Clear the access counter notifications and add it to the per-GPU clear
223 // tracker.
access_counter_clear_notifications(uvm_gpu_t * gpu,uvm_access_counter_buffer_entry_t ** notification_start,NvU32 num_notifications)224 static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
225                                                     uvm_access_counter_buffer_entry_t **notification_start,
226                                                     NvU32 num_notifications)
227 {
228     NvU32 i;
229     NV_STATUS status;
230     uvm_push_t push;
231     uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
232 
233     status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
234     if (status != NV_OK) {
235         UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
236                       nvstatusToString(status),
237                       uvm_gpu_name(gpu));
238         return status;
239     }
240 
241     for (i = 0; i < num_notifications; i++)
242         gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
243 
244     uvm_push_end(&push);
245 
246     uvm_tracker_remove_completed(&access_counters->clear_tracker);
247 
248     return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
249 }
250 
251 // Clear all access counters and add the operation to the per-GPU clear tracker
access_counter_clear_all(uvm_gpu_t * gpu)252 static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu)
253 {
254     NV_STATUS status;
255     uvm_push_t push;
256     uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
257 
258     status = uvm_push_begin(gpu->channel_manager,
259                             UVM_CHANNEL_TYPE_MEMOPS,
260                             &push,
261                             "Clear access counter: all");
262     if (status != NV_OK) {
263         UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
264                       nvstatusToString(status),
265                       uvm_gpu_name(gpu));
266         return status;
267     }
268 
269     gpu->parent->host_hal->access_counter_clear_all(&push);
270 
271     uvm_push_end(&push);
272 
273     uvm_tracker_remove_completed(&access_counters->clear_tracker);
274 
275     return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
276 }
277 
278 static const uvm_gpu_access_counter_type_config_t *
get_config_for_type(const uvm_access_counter_buffer_info_t * access_counters,uvm_access_counter_type_t counter_type)279 get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm_access_counter_type_t counter_type)
280 {
281     return counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? &(access_counters)->current_config.mimc :
282                                                          &(access_counters)->current_config.momc;
283 }
284 
uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t * parent_gpu)285 bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
286 {
287     UVM_ASSERT(parent_gpu->access_counters_supported);
288 
289     // Fast path 1: we left some notifications unserviced in the buffer in the last pass
290     if (parent_gpu->access_counter_buffer_info.cached_get != parent_gpu->access_counter_buffer_info.cached_put)
291         return true;
292 
293     // Fast path 2: read the valid bit of the notification buffer entry pointed by the cached get pointer
294     if (!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu,
295                                                                parent_gpu->access_counter_buffer_info.cached_get)) {
296         // Slow path: read the put pointer from the GPU register via BAR0 over PCIe
297         parent_gpu->access_counter_buffer_info.cached_put =
298             UVM_GPU_READ_ONCE(*parent_gpu->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
299 
300         // No interrupt pending
301         if (parent_gpu->access_counter_buffer_info.cached_get == parent_gpu->access_counter_buffer_info.cached_put)
302             return false;
303     }
304 
305     return true;
306 }
307 
308 // Initialize the configuration and pre-compute some required values for the
309 // given access counter type
init_access_counter_types_config(const UvmGpuAccessCntrConfig * config,uvm_access_counter_type_t counter_type,uvm_gpu_access_counter_type_config_t * counter_type_config)310 static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *config,
311                                              uvm_access_counter_type_t counter_type,
312                                              uvm_gpu_access_counter_type_config_t *counter_type_config)
313 {
314     NV_STATUS status;
315     NvU64 tracking_size = 0;
316     UVM_ACCESS_COUNTER_GRANULARITY granularity = counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? config->mimcGranularity:
317                                                                                                config->momcGranularity;
318     UVM_ACCESS_COUNTER_USE_LIMIT use_limit = counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? config->mimcUseLimit:
319                                                                                            config->momcUseLimit;
320 
321     counter_type_config->rm.granularity = granularity;
322     counter_type_config->rm.use_limit = use_limit;
323 
324     // Precompute the maximum size to use in reverse map translations and the
325     // number of translations that are required per access counter notification.
326     status = config_granularity_to_bytes(granularity, &tracking_size);
327     UVM_ASSERT(status == NV_OK);
328 
329     // sub_granularity field is only filled for tracking granularities larger
330     // than 64K
331     if (granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
332         counter_type_config->sub_granularity_region_size = tracking_size;
333     else
334         counter_type_config->sub_granularity_region_size = tracking_size / UVM_SUB_GRANULARITY_REGIONS;
335 
336     counter_type_config->translation_size = min(UVM_MAX_TRANSLATION_SIZE, tracking_size);
337     counter_type_config->translations_per_counter =
338         max(counter_type_config->translation_size / UVM_MAX_TRANSLATION_SIZE, 1ULL);
339     counter_type_config->sub_granularity_regions_per_translation =
340         max(counter_type_config->translation_size / counter_type_config->sub_granularity_region_size, 1ULL);
341     UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
342 }
343 
uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t * parent_gpu)344 NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
345 {
346     NV_STATUS status = NV_OK;
347     uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
348     uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
349     NvU64 granularity_bytes = 0;
350 
351     if (uvm_perf_access_counter_threshold < UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN) {
352         g_uvm_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN;
353         pr_info("Value %u too small for uvm_perf_access_counter_threshold, using %u instead\n",
354                 uvm_perf_access_counter_threshold,
355                 g_uvm_access_counter_threshold);
356     }
357     else if (uvm_perf_access_counter_threshold > UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX) {
358         g_uvm_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX;
359         pr_info("Value %u too large for uvm_perf_access_counter_threshold, using %u instead\n",
360                 uvm_perf_access_counter_threshold,
361                 g_uvm_access_counter_threshold);
362     }
363     else {
364         g_uvm_access_counter_threshold = uvm_perf_access_counter_threshold;
365     }
366 
367     uvm_assert_mutex_locked(&g_uvm_global.global_lock);
368     UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);
369 
370     status = uvm_rm_locked_call(nvUvmInterfaceInitAccessCntrInfo(parent_gpu->rm_device,
371                                                                  &access_counters->rm_info,
372                                                                  0));
373     if (status != NV_OK) {
374         UVM_ERR_PRINT("Failed to init notify buffer info from RM: %s, GPU %s\n",
375                       nvstatusToString(status),
376                       uvm_parent_gpu_name(parent_gpu));
377 
378         // nvUvmInterfaceInitAccessCntrInfo may leave fields in rm_info
379         // populated when it returns an error. Set the buffer handle to zero as
380         // it is used by the deinitialization logic to determine if it was
381         // correctly initialized.
382         access_counters->rm_info.accessCntrBufferHandle = 0;
383         goto fail;
384     }
385 
386     UVM_ASSERT(access_counters->rm_info.bufferSize %
387                parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu) == 0);
388 
389     status = config_granularity_to_bytes(UVM_PERF_ACCESS_COUNTER_GRANULARITY, &granularity_bytes);
390     UVM_ASSERT(status == NV_OK);
391     if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
392         UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
393 
394     parent_gpu->access_counter_buffer_info.notifications_ignored_count = 0;
395     parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
396 
397     uvm_tracker_init(&access_counters->clear_tracker);
398 
399     access_counters->max_notifications = parent_gpu->access_counter_buffer_info.rm_info.bufferSize /
400                                          parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu);
401 
402     // Check provided module parameter value
403     access_counters->max_batch_size = max(uvm_perf_access_counter_batch_count,
404                                           (NvU32)UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN);
405     access_counters->max_batch_size = min(access_counters->max_batch_size,
406                                           access_counters->max_notifications);
407 
408     if (access_counters->max_batch_size != uvm_perf_access_counter_batch_count) {
409         pr_info("Invalid uvm_perf_access_counter_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
410                 uvm_parent_gpu_name(parent_gpu),
411                 uvm_perf_access_counter_batch_count,
412                 UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN,
413                 access_counters->max_notifications,
414                 access_counters->max_batch_size);
415     }
416 
417     batch_context->notification_cache = uvm_kvmalloc_zero(access_counters->max_notifications *
418                                                           sizeof(*batch_context->notification_cache));
419     if (!batch_context->notification_cache) {
420         status = NV_ERR_NO_MEMORY;
421         goto fail;
422     }
423 
424     batch_context->virt.notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
425                                                           sizeof(*batch_context->virt.notifications));
426     if (!batch_context->virt.notifications) {
427         status = NV_ERR_NO_MEMORY;
428         goto fail;
429     }
430 
431     batch_context->phys.notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
432                                                           sizeof(*batch_context->phys.notifications));
433     if (!batch_context->phys.notifications) {
434         status = NV_ERR_NO_MEMORY;
435         goto fail;
436     }
437 
438     batch_context->phys.translations = uvm_kvmalloc_zero((UVM_MAX_TRANSLATION_SIZE / PAGE_SIZE) *
439                                                          sizeof(*batch_context->phys.translations));
440     if (!batch_context->phys.translations) {
441         status = NV_ERR_NO_MEMORY;
442         goto fail;
443     }
444 
445     return NV_OK;
446 
447 fail:
448     uvm_parent_gpu_deinit_access_counters(parent_gpu);
449 
450     return status;
451 }
452 
uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t * parent_gpu)453 void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
454 {
455     uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
456     uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
457 
458     UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
459 
460     if (access_counters->rm_info.accessCntrBufferHandle) {
461         NV_STATUS status = uvm_rm_locked_call(nvUvmInterfaceDestroyAccessCntrInfo(parent_gpu->rm_device,
462                                                                                   &access_counters->rm_info));
463         UVM_ASSERT(status == NV_OK);
464 
465         access_counters->rm_info.accessCntrBufferHandle = 0;
466         uvm_tracker_deinit(&access_counters->clear_tracker);
467     }
468 
469     uvm_kvfree(batch_context->notification_cache);
470     uvm_kvfree(batch_context->virt.notifications);
471     uvm_kvfree(batch_context->phys.notifications);
472     uvm_kvfree(batch_context->phys.translations);
473     batch_context->notification_cache = NULL;
474     batch_context->virt.notifications = NULL;
475     batch_context->phys.notifications = NULL;
476     batch_context->phys.translations = NULL;
477 }
478 
uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t * parent_gpu)479 bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
480 {
481     if (!parent_gpu->access_counters_supported)
482         return false;
483 
484     if (parent_gpu->rm_info.isSimulated)
485         return true;
486 
487     return is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MIMC) || is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MOMC);
488 }
489 
490 // This function enables access counters with the given configuration and takes
491 // ownership from RM. The function also stores the new configuration within the
492 // uvm_gpu_t struct.
access_counters_take_ownership(uvm_gpu_t * gpu,UvmGpuAccessCntrConfig * config)493 static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntrConfig *config)
494 {
495     NV_STATUS status, disable_status;
496     uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
497 
498     UVM_ASSERT(gpu->parent->access_counters_supported);
499     UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
500 
501     status = uvm_rm_locked_call(nvUvmInterfaceEnableAccessCntr(gpu->parent->rm_device,
502                                                                &access_counters->rm_info,
503                                                                config));
504     if (status != NV_OK) {
505         UVM_ERR_PRINT("Failed to enable access counter notification from RM: %s, GPU %s\n",
506                       nvstatusToString(status), uvm_gpu_name(gpu));
507         return status;
508     }
509 
510     status = access_counter_clear_all(gpu);
511     if (status != NV_OK)
512         goto error;
513 
514     status = uvm_tracker_wait(&access_counters->clear_tracker);
515     if (status != NV_OK)
516         goto error;
517 
518     // Read current get pointer as this might not be the first time we have
519     // taken control of the notify buffer since the GPU was initialized. Then
520     // flush old notifications. This will update the cached_put pointer.
521     access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
522     access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
523 
524     access_counters->current_config.threshold = config->threshold;
525 
526     init_access_counter_types_config(config, UVM_ACCESS_COUNTER_TYPE_MIMC, &access_counters->current_config.mimc);
527     init_access_counter_types_config(config, UVM_ACCESS_COUNTER_TYPE_MOMC, &access_counters->current_config.momc);
528 
529     return NV_OK;
530 
531 error:
532     disable_status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(gpu->parent->rm_device,
533                                                                         &access_counters->rm_info));
534     UVM_ASSERT(disable_status == NV_OK);
535 
536     return status;
537 }
538 
539 // If ownership is yielded as part of reconfiguration, the access counters
540 // handling refcount may not be 0
access_counters_yield_ownership(uvm_parent_gpu_t * parent_gpu)541 static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
542 {
543     NV_STATUS status;
544     uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
545 
546     UVM_ASSERT(parent_gpu->access_counters_supported);
547     UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
548 
549     // Wait for any pending clear operation befor releasing ownership
550     status = uvm_tracker_wait(&access_counters->clear_tracker);
551     if (status != NV_OK)
552         UVM_ASSERT(status == uvm_global_get_status());
553 
554     status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device,
555                                                                 &access_counters->rm_info));
556     UVM_ASSERT(status == NV_OK);
557 }
558 
559 // Increment the refcount of access counter enablement. If this is the first
560 // reference, enable the HW feature.
gpu_access_counters_enable(uvm_gpu_t * gpu,UvmGpuAccessCntrConfig * config)561 static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConfig *config)
562 {
563     UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
564     UVM_ASSERT(gpu->parent->access_counters_supported);
565     UVM_ASSERT(gpu->parent->access_counter_buffer_info.rm_info.accessCntrBufferHandle);
566 
567     // There cannot be a concurrent modification of the handling count, since
568     // the only two writes of that field happen in the enable/disable functions
569     // and those are protected by the access counters ISR lock.
570     if (gpu->parent->isr.access_counters.handling_ref_count == 0) {
571         NV_STATUS status = access_counters_take_ownership(gpu, config);
572 
573         if (status != NV_OK)
574             return status;
575     }
576 
577     ++gpu->parent->isr.access_counters.handling_ref_count;
578     return NV_OK;
579 }
580 
581 // Decrement the refcount of access counter enablement. If this is the last
582 // reference, disable the HW feature.
parent_gpu_access_counters_disable(uvm_parent_gpu_t * parent_gpu)583 static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
584 {
585     UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
586     UVM_ASSERT(parent_gpu->access_counters_supported);
587     UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);
588 
589     if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
590         access_counters_yield_ownership(parent_gpu);
591 }
592 
593 // Invoked during registration of the GPU in the VA space
uvm_gpu_access_counters_enable(uvm_gpu_t * gpu,uvm_va_space_t * va_space)594 NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
595 {
596     NV_STATUS status;
597 
598     UVM_ASSERT(gpu->parent->access_counters_supported);
599 
600     uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
601 
602     if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
603         status = NV_ERR_INVALID_DEVICE;
604     }
605     else {
606         UvmGpuAccessCntrConfig default_config =
607         {
608             .mimcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
609             .momcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
610             .mimcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
611             .momcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
612             .threshold = g_uvm_access_counter_threshold,
613         };
614         status = gpu_access_counters_enable(gpu, &default_config);
615 
616         // No VA space lock is currently held, so the mask is atomically
617         // modified to protect from concurrent enablement of access counters in
618         // another GPU
619         if (status == NV_OK)
620             uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
621     }
622 
623     // If this is the first reference taken on access counters, dropping the
624     // ISR lock will enable interrupts.
625     uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
626 
627     return status;
628 }
629 
uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t * parent_gpu,uvm_va_space_t * va_space)630 void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
631                                             uvm_va_space_t *va_space)
632 {
633     UVM_ASSERT(parent_gpu->access_counters_supported);
634 
635     uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
636 
637     if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
638                                                         parent_gpu->id)) {
639         parent_gpu_access_counters_disable(parent_gpu);
640 
641         // If this is VA space reconfigured access counters, clear the
642         // ownership to allow for other processes to invoke the reconfiguration
643         if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
644             parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
645     }
646 
647     uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
648 }
649 
write_get(uvm_parent_gpu_t * parent_gpu,NvU32 get)650 static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
651 {
652     uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
653 
654     UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
655 
656     // Write get on the GPU only if it's changed.
657     if (access_counters->cached_get == get)
658         return;
659 
660     access_counters->cached_get = get;
661 
662     // Update get pointer on the GPU
663     UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
664 }
665 
access_counter_buffer_flush_locked(uvm_parent_gpu_t * parent_gpu,uvm_gpu_buffer_flush_mode_t flush_mode)666 static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
667                                                uvm_gpu_buffer_flush_mode_t flush_mode)
668 {
669     NvU32 get;
670     NvU32 put;
671     uvm_spin_loop_t spin;
672     uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
673 
674     UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
675     UVM_ASSERT(parent_gpu->access_counters_supported);
676 
677     // Read PUT pointer from the GPU if requested
678     UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
679     if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT)
680         access_counters->cached_put = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferPut);
681 
682     get = access_counters->cached_get;
683     put = access_counters->cached_put;
684 
685     while (get != put) {
686         // Wait until valid bit is set
687         UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
688 
689         parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
690         ++get;
691         if (get == access_counters->max_notifications)
692             get = 0;
693     }
694 
695     write_get(parent_gpu, get);
696 }
697 
uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t * parent_gpu)698 void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
699 {
700     UVM_ASSERT(parent_gpu->access_counters_supported);
701 
702     // Disables access counter interrupts and notification servicing
703     uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
704 
705     if (parent_gpu->isr.access_counters.handling_ref_count > 0)
706         access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
707 
708     uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
709 }
710 
cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t * a,const uvm_access_counter_buffer_entry_t * b)711 static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
712                                                   const uvm_access_counter_buffer_entry_t *b)
713 {
714     int result;
715 
716     result = uvm_gpu_phys_addr_cmp(a->virtual_info.instance_ptr, b->virtual_info.instance_ptr);
717     // On Volta+ we need to sort by {instance_ptr + subctx_id} pair since it can
718     // map to a different VA space
719     if (result != 0)
720         return result;
721     return UVM_CMP_DEFAULT(a->virtual_info.ve_id, b->virtual_info.ve_id);
722 }
723 
724 // Sort comparator for pointers to GVA access counter notification buffer
725 // entries that sorts by instance pointer
cmp_sort_virt_notifications_by_instance_ptr(const void * _a,const void * _b)726 static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const void *_b)
727 {
728     const uvm_access_counter_buffer_entry_t *a = *(const uvm_access_counter_buffer_entry_t **)_a;
729     const uvm_access_counter_buffer_entry_t *b = *(const uvm_access_counter_buffer_entry_t **)_b;
730 
731     UVM_ASSERT(a->address.is_virtual);
732     UVM_ASSERT(b->address.is_virtual);
733 
734     return cmp_access_counter_instance_ptr(a, b);
735 }
736 
737 // Sort comparator for pointers to GVA access counter notification buffer
738 // entries that sorts by va_space, and fault address.
cmp_sort_virt_notifications_by_va_space_address(const void * _a,const void * _b)739 static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const void *_b)
740 {
741     const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
742     const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
743 
744     int result;
745 
746     result = UVM_CMP_DEFAULT((*a)->virtual_info.va_space, (*b)->virtual_info.va_space);
747     if (result != 0)
748         return result;
749 
750     return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
751 }
752 
753 // Sort comparator for pointers to GPA access counter notification buffer
754 // entries that sorts by physical address' aperture
cmp_sort_phys_notifications_by_processor_id(const void * _a,const void * _b)755 static int cmp_sort_phys_notifications_by_processor_id(const void *_a, const void *_b)
756 {
757     const uvm_access_counter_buffer_entry_t *a = *(const uvm_access_counter_buffer_entry_t **)_a;
758     const uvm_access_counter_buffer_entry_t *b = *(const uvm_access_counter_buffer_entry_t **)_b;
759 
760     UVM_ASSERT(!a->address.is_virtual);
761     UVM_ASSERT(!b->address.is_virtual);
762 
763     return uvm_id_cmp(a->physical_info.resident_id, b->physical_info.resident_id);
764 }
765 
766 typedef enum
767 {
768     // Fetch a batch of notifications from the buffer. Stop at the first entry
769     // that is not ready yet
770     NOTIFICATION_FETCH_MODE_BATCH_READY,
771 
772     // Fetch all notifications in the buffer before PUT. Wait for all
773     // notifications to become ready
774     NOTIFICATION_FETCH_MODE_ALL,
775 } notification_fetch_mode_t;
776 
fetch_access_counter_buffer_entries(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context,notification_fetch_mode_t fetch_mode)777 static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
778                                                  uvm_access_counter_service_batch_context_t *batch_context,
779                                                  notification_fetch_mode_t fetch_mode)
780 {
781     NvU32 get;
782     NvU32 put;
783     NvU32 notification_index;
784     uvm_access_counter_buffer_entry_t *notification_cache;
785     uvm_spin_loop_t spin;
786     uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
787     NvU32 last_instance_ptr_idx = 0;
788     uvm_aperture_t last_aperture = UVM_APERTURE_PEER_MAX;
789 
790     UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
791     UVM_ASSERT(gpu->parent->access_counters_supported);
792 
793     notification_cache = batch_context->notification_cache;
794 
795     get = access_counters->cached_get;
796 
797     // Read put pointer from GPU and cache it
798     if (get == access_counters->cached_put) {
799         access_counters->cached_put = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferPut);
800     }
801 
802     put = access_counters->cached_put;
803 
804     if (get == put)
805         return 0;
806 
807     batch_context->phys.num_notifications = 0;
808     batch_context->virt.num_notifications = 0;
809 
810     batch_context->virt.is_single_instance_ptr = true;
811     batch_context->phys.is_single_aperture = true;
812 
813     notification_index = 0;
814 
815     // Parse until get != put and have enough space to cache.
816     while ((get != put) &&
817            (fetch_mode == NOTIFICATION_FETCH_MODE_ALL || notification_index < access_counters->max_batch_size)) {
818         uvm_access_counter_buffer_entry_t *current_entry = &notification_cache[notification_index];
819 
820         // We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it
821         // individually since entries can be written out of order
822         UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
823             // We have some entry to work on. Let's do the rest later.
824             if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
825                 goto done;
826         }
827 
828         // Prevent later accesses being moved above the read of the valid bit
829         smp_mb__after_atomic();
830 
831         // Got valid bit set. Let's cache.
832         gpu->parent->access_counter_buffer_hal->parse_entry(gpu->parent, get, current_entry);
833 
834         if (current_entry->address.is_virtual) {
835             batch_context->virt.notifications[batch_context->virt.num_notifications++] = current_entry;
836 
837             if (batch_context->virt.is_single_instance_ptr) {
838                 if (batch_context->virt.num_notifications == 1) {
839                     last_instance_ptr_idx = notification_index;
840                 }
841                 else if (cmp_access_counter_instance_ptr(&notification_cache[last_instance_ptr_idx],
842                                                          current_entry) != 0) {
843                     batch_context->virt.is_single_instance_ptr = false;
844                 }
845             }
846         }
847         else {
848             const NvU64 translation_size = get_config_for_type(access_counters, current_entry->counter_type)->translation_size;
849             current_entry->address.address = UVM_ALIGN_DOWN(current_entry->address.address, translation_size);
850 
851             batch_context->phys.notifications[batch_context->phys.num_notifications++] = current_entry;
852 
853             current_entry->physical_info.resident_id =
854                 uvm_gpu_get_processor_id_by_address(gpu, uvm_gpu_phys_address(current_entry->address.aperture,
855                                                                               current_entry->address.address));
856 
857             if (batch_context->phys.is_single_aperture) {
858                 if (batch_context->phys.num_notifications == 1)
859                     last_aperture = current_entry->address.aperture;
860                 else if (current_entry->address.aperture != last_aperture)
861                     batch_context->phys.is_single_aperture = false;
862             }
863 
864             if (current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC)
865                 UVM_ASSERT(uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
866             else
867                 UVM_ASSERT(!uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
868         }
869 
870         ++notification_index;
871         ++get;
872         if (get == access_counters->max_notifications)
873             get = 0;
874     }
875 
876 done:
877     write_get(gpu->parent, get);
878 
879     return notification_index;
880 }
881 
translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t * parent_gpu,uvm_access_counter_service_batch_context_t * batch_context)882 static void translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t *parent_gpu,
883                                                        uvm_access_counter_service_batch_context_t *batch_context)
884 {
885     NvU32 i;
886     NV_STATUS status;
887 
888     for (i = 0; i < batch_context->virt.num_notifications; ++i) {
889         uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
890 
891         if (i == 0 ||
892             cmp_access_counter_instance_ptr(current_entry, batch_context->virt.notifications[i - 1]) != 0) {
893             // If instance_ptr is different, make a new translation. If the
894             // translation fails then va_space will be NULL and the entry will
895             // simply be ignored in subsequent processing.
896             status = uvm_parent_gpu_access_counter_entry_to_va_space(parent_gpu,
897                                                                      current_entry,
898                                                                      &current_entry->virtual_info.va_space);
899             if (status != NV_OK)
900                 UVM_ASSERT(current_entry->virtual_info.va_space == NULL);
901         }
902         else {
903             current_entry->virtual_info.va_space = batch_context->virt.notifications[i - 1]->virtual_info.va_space;
904         }
905     }
906 }
907 
908 // GVA notifications provide an instance_ptr and ve_id that can be directly
909 // translated to a VA space. In order to minimize translations, we sort the
910 // entries by instance_ptr, va_space and notification address in that order.
preprocess_virt_notifications(uvm_parent_gpu_t * parent_gpu,uvm_access_counter_service_batch_context_t * batch_context)911 static void preprocess_virt_notifications(uvm_parent_gpu_t *parent_gpu,
912                                           uvm_access_counter_service_batch_context_t *batch_context)
913 {
914     if (!batch_context->virt.is_single_instance_ptr) {
915         sort(batch_context->virt.notifications,
916              batch_context->virt.num_notifications,
917              sizeof(*batch_context->virt.notifications),
918              cmp_sort_virt_notifications_by_instance_ptr,
919              NULL);
920     }
921 
922     translate_virt_notifications_instance_ptrs(parent_gpu, batch_context);
923 
924     sort(batch_context->virt.notifications,
925          batch_context->virt.num_notifications,
926          sizeof(*batch_context->virt.notifications),
927          cmp_sort_virt_notifications_by_va_space_address,
928          NULL);
929 }
930 
931 // GPA notifications provide a physical address and an aperture. Sort
932 // accesses by aperture to try to coalesce operations on the same target
933 // processor.
preprocess_phys_notifications(uvm_access_counter_service_batch_context_t * batch_context)934 static void preprocess_phys_notifications(uvm_access_counter_service_batch_context_t *batch_context)
935 {
936     if (!batch_context->phys.is_single_aperture) {
937         sort(batch_context->phys.notifications,
938              batch_context->phys.num_notifications,
939              sizeof(*batch_context->phys.notifications),
940              cmp_sort_phys_notifications_by_processor_id,
941              NULL);
942     }
943 }
944 
notify_tools_and_process_flags(uvm_gpu_t * gpu,uvm_access_counter_buffer_entry_t ** notification_start,NvU32 num_entries,NvU32 flags)945 static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
946                                                 uvm_access_counter_buffer_entry_t **notification_start,
947                                                 NvU32 num_entries,
948                                                 NvU32 flags)
949 {
950     NV_STATUS status = NV_OK;
951 
952     if (uvm_enable_builtin_tests) {
953         // TODO: Bug 4310744: [UVM][TOOLS] Attribute access counter tools events
954         //                    to va_space instead of broadcasting.
955         NvU32 i;
956 
957         for (i = 0; i < num_entries; i++)
958             uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
959     }
960 
961     if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
962         status = access_counter_clear_notifications(gpu, notification_start, num_entries);
963 
964     return status;
965 }
966 
service_va_block_locked(uvm_processor_id_t processor,uvm_va_block_t * va_block,uvm_va_block_retry_t * va_block_retry,uvm_service_block_context_t * service_context,uvm_page_mask_t * accessed_pages)967 static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
968                                          uvm_va_block_t *va_block,
969                                          uvm_va_block_retry_t *va_block_retry,
970                                          uvm_service_block_context_t *service_context,
971                                          uvm_page_mask_t *accessed_pages)
972 {
973     NV_STATUS status = NV_OK;
974     uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
975     uvm_range_group_range_iter_t iter;
976     uvm_page_index_t page_index;
977     uvm_page_index_t first_page_index;
978     uvm_page_index_t last_page_index;
979     NvU32 page_count = 0;
980     const uvm_page_mask_t *residency_mask;
981     const bool hmm_migratable = true;
982 
983     uvm_assert_mutex_locked(&va_block->lock);
984 
985     // GPU VA space could be gone since we received the notification. We handle
986     // this case by skipping service if processor is not in the mapped mask.
987     // Using this approach we also filter out notifications for pages that
988     // moved since they were reported by the GPU. This is fine because:
989     // - If the GPU is still accessing them, it should have faulted
990     // - If the GPU gets remote mappings in the future, we will get new
991     //   notifications and we will act accordingly
992     // - If the GPU does not access the pages again, we do not want to migrate
993     //   them
994     if (!uvm_processor_mask_test(&va_block->mapped, processor))
995         return NV_OK;
996 
997     if (uvm_processor_mask_test(&va_block->resident, processor))
998         residency_mask = uvm_va_block_resident_mask_get(va_block, processor, NUMA_NO_NODE);
999     else
1000         residency_mask = NULL;
1001 
1002     first_page_index = PAGES_PER_UVM_VA_BLOCK;
1003     last_page_index = 0;
1004 
1005     // Initialize fault service block context
1006     uvm_processor_mask_zero(&service_context->resident_processors);
1007     service_context->read_duplicate_count = 0;
1008     service_context->thrashing_pin_count = 0;
1009 
1010     // If the page is already resident on the accessing processor, the
1011     // notification for this page is stale. Skip it.
1012     if (residency_mask)
1013         uvm_page_mask_andnot(accessed_pages, accessed_pages, residency_mask);
1014 
1015     uvm_range_group_range_migratability_iter_first(va_space, va_block->start, va_block->end, &iter);
1016 
1017     for_each_va_block_page_in_mask(page_index, accessed_pages, va_block) {
1018         uvm_perf_thrashing_hint_t thrashing_hint;
1019         NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
1020         bool read_duplicate = false;
1021         uvm_processor_id_t new_residency;
1022         const uvm_va_policy_t *policy;
1023 
1024         // Ensure that the migratability iterator covers the current address
1025         while (iter.end < address)
1026             uvm_range_group_range_migratability_iter_next(va_space, &iter, va_block->end);
1027 
1028         UVM_ASSERT(iter.start <= address && iter.end >= address);
1029 
1030         // If the range is not migratable, skip the page
1031         if (!iter.migratable)
1032             continue;
1033 
1034         thrashing_hint = uvm_perf_thrashing_get_hint(va_block, service_context->block_context, address, processor);
1035         if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
1036             // If the page is throttling, ignore the access counter
1037             // notification
1038             continue;
1039         }
1040         else if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN) {
1041             if (service_context->thrashing_pin_count++ == 0)
1042                 uvm_page_mask_zero(&service_context->thrashing_pin_mask);
1043 
1044             uvm_page_mask_set(&service_context->thrashing_pin_mask, page_index);
1045         }
1046 
1047         // If the underlying VMA is gone, skip HMM migrations.
1048         if (uvm_va_block_is_hmm(va_block)) {
1049             status = uvm_hmm_find_vma(service_context->block_context->mm,
1050                                       &service_context->block_context->hmm.vma,
1051                                       address);
1052             if (status == NV_ERR_INVALID_ADDRESS)
1053                 continue;
1054 
1055             UVM_ASSERT(status == NV_OK);
1056         }
1057 
1058         policy = uvm_va_policy_get(va_block, address);
1059 
1060         new_residency = uvm_va_block_select_residency(va_block,
1061                                                       service_context->block_context,
1062                                                       page_index,
1063                                                       processor,
1064                                                       uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
1065                                                       policy,
1066                                                       &thrashing_hint,
1067                                                       UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
1068                                                       hmm_migratable,
1069                                                       &read_duplicate);
1070 
1071         if (!uvm_processor_mask_test_and_set(&service_context->resident_processors, new_residency))
1072             uvm_page_mask_zero(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency);
1073 
1074         uvm_page_mask_set(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency, page_index);
1075 
1076         if (page_index < first_page_index)
1077             first_page_index = page_index;
1078         if (page_index > last_page_index)
1079             last_page_index = page_index;
1080 
1081         ++page_count;
1082 
1083         service_context->access_type[page_index] = UVM_FAULT_ACCESS_TYPE_PREFETCH;
1084     }
1085 
1086     // Apply the changes computed in the service block context, if there are
1087     // pages to be serviced
1088     if (page_count > 0) {
1089         uvm_processor_id_t id;
1090         uvm_processor_mask_t *update_processors = &service_context->update_processors;
1091 
1092         uvm_processor_mask_and(update_processors, &va_block->resident, &service_context->resident_processors);
1093 
1094         // Remove pages that are already resident in the destination processors
1095         for_each_id_in_mask(id, update_processors) {
1096             bool migrate_pages;
1097             uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE);
1098             UVM_ASSERT(residency_mask);
1099 
1100             migrate_pages = uvm_page_mask_andnot(&service_context->per_processor_masks[uvm_id_value(id)].new_residency,
1101                                                  &service_context->per_processor_masks[uvm_id_value(id)].new_residency,
1102                                                  residency_mask);
1103 
1104             if (!migrate_pages)
1105                 uvm_processor_mask_clear(&service_context->resident_processors, id);
1106         }
1107 
1108         if (!uvm_processor_mask_empty(&service_context->resident_processors)) {
1109             while (first_page_index <= last_page_index) {
1110                 uvm_page_index_t outer = last_page_index + 1;
1111                 const uvm_va_policy_t *policy;
1112 
1113                 if (uvm_va_block_is_hmm(va_block)) {
1114                     status = NV_ERR_INVALID_ADDRESS;
1115                     if (service_context->block_context->mm) {
1116                         status = uvm_hmm_find_policy_vma_and_outer(va_block,
1117                                                                    &service_context->block_context->hmm.vma,
1118                                                                    first_page_index,
1119                                                                    &policy,
1120                                                                    &outer);
1121                     }
1122                     if (status != NV_OK)
1123                         break;
1124                 }
1125 
1126                 service_context->region = uvm_va_block_region(first_page_index, outer);
1127                 first_page_index = outer;
1128 
1129                 status = uvm_va_block_service_locked(processor, va_block, va_block_retry, service_context);
1130                 if (status != NV_OK)
1131                     break;
1132             }
1133         }
1134     }
1135 
1136     ++service_context->num_retries;
1137 
1138     return status;
1139 }
1140 
reverse_mappings_to_va_block_page_mask(uvm_va_block_t * va_block,const uvm_reverse_map_t * reverse_mappings,size_t num_reverse_mappings,uvm_page_mask_t * page_mask)1141 static void reverse_mappings_to_va_block_page_mask(uvm_va_block_t *va_block,
1142                                                    const uvm_reverse_map_t *reverse_mappings,
1143                                                    size_t num_reverse_mappings,
1144                                                    uvm_page_mask_t *page_mask)
1145 {
1146     NvU32 index;
1147 
1148     UVM_ASSERT(page_mask);
1149 
1150     if (num_reverse_mappings > 0)
1151         UVM_ASSERT(reverse_mappings);
1152 
1153     uvm_page_mask_zero(page_mask);
1154 
1155     // Populate the mask of accessed pages within the VA Block
1156     for (index = 0; index < num_reverse_mappings; ++index) {
1157         const uvm_reverse_map_t *reverse_map = &reverse_mappings[index];
1158         uvm_va_block_region_t region = reverse_map->region;
1159 
1160         UVM_ASSERT(reverse_map->va_block == va_block);
1161 
1162         // The VA Block could have been split since we obtained the reverse
1163         // mappings. Clamp the region to the current VA block size, to handle
1164         // the case in which it was split.
1165         region.outer = min(region.outer, (uvm_page_index_t)uvm_va_block_num_cpu_pages(va_block));
1166         region.first = min(region.first, region.outer);
1167 
1168         uvm_page_mask_region_fill(page_mask, region);
1169     }
1170 }
1171 
service_phys_single_va_block(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context,const uvm_access_counter_buffer_entry_t * current_entry,const uvm_reverse_map_t * reverse_mappings,size_t num_reverse_mappings,NvU32 * out_flags)1172 static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
1173                                               uvm_access_counter_service_batch_context_t *batch_context,
1174                                               const uvm_access_counter_buffer_entry_t *current_entry,
1175                                               const uvm_reverse_map_t *reverse_mappings,
1176                                               size_t num_reverse_mappings,
1177                                               NvU32 *out_flags)
1178 {
1179     size_t index;
1180     uvm_va_block_t *va_block = reverse_mappings[0].va_block;
1181     uvm_va_space_t *va_space = NULL;
1182     struct mm_struct *mm = NULL;
1183     NV_STATUS status = NV_OK;
1184     const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
1185                                              gpu->id: UVM_ID_CPU;
1186 
1187     *out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
1188 
1189     UVM_ASSERT(num_reverse_mappings > 0);
1190 
1191     uvm_mutex_lock(&va_block->lock);
1192     va_space = uvm_va_block_get_va_space_maybe_dead(va_block);
1193     uvm_mutex_unlock(&va_block->lock);
1194 
1195     if (va_space) {
1196         uvm_va_block_retry_t va_block_retry;
1197         va_space_access_counters_info_t *va_space_access_counters;
1198         uvm_service_block_context_t *service_context = &batch_context->block_service_context;
1199         uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
1200 
1201         // If an mm is registered with the VA space, we have to retain it
1202         // in order to lock it before locking the VA space.
1203         mm = uvm_va_space_mm_retain_lock(va_space);
1204         uvm_va_space_down_read(va_space);
1205 
1206         // Re-check that the VA block is valid after taking the VA block lock.
1207         if (uvm_va_block_is_dead(va_block))
1208             goto done;
1209 
1210         va_space_access_counters = va_space_access_counters_info_get(va_space);
1211         if (UVM_ID_IS_CPU(processor) && !atomic_read(&va_space_access_counters->params.enable_momc_migrations))
1212             goto done;
1213 
1214         if (!UVM_ID_IS_CPU(processor) && !atomic_read(&va_space_access_counters->params.enable_mimc_migrations))
1215             goto done;
1216 
1217         service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
1218         service_context->num_retries = 0;
1219 
1220         uvm_va_block_context_init(service_context->block_context, mm);
1221 
1222         if (uvm_va_block_is_hmm(va_block))
1223             uvm_hmm_migrate_begin_wait(va_block);
1224 
1225         uvm_mutex_lock(&va_block->lock);
1226 
1227         reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
1228 
1229         status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
1230                                            &va_block_retry,
1231                                            service_va_block_locked(processor,
1232                                                                    va_block,
1233                                                                    &va_block_retry,
1234                                                                    service_context,
1235                                                                    accessed_pages));
1236 
1237         uvm_mutex_unlock(&va_block->lock);
1238 
1239         if (uvm_va_block_is_hmm(va_block)) {
1240             uvm_hmm_migrate_finish(va_block);
1241 
1242             // If the pages could not be migrated, no need to try again,
1243             // this is best effort only.
1244             if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET)
1245                 status = NV_OK;
1246         }
1247 
1248         if (status == NV_OK)
1249             *out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
1250     }
1251 
1252 done:
1253     if (va_space) {
1254         uvm_va_space_up_read(va_space);
1255         uvm_va_space_mm_release_unlock(va_space, mm);
1256     }
1257 
1258     // Drop the refcounts taken by the reverse map translation routines
1259     for (index = 0; index < num_reverse_mappings; ++index)
1260         uvm_va_block_release(va_block);
1261 
1262     return status;
1263 }
1264 
service_phys_va_blocks(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context,const uvm_access_counter_buffer_entry_t * current_entry,const uvm_reverse_map_t * reverse_mappings,size_t num_reverse_mappings,NvU32 * out_flags)1265 static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
1266                                         uvm_access_counter_service_batch_context_t *batch_context,
1267                                         const uvm_access_counter_buffer_entry_t *current_entry,
1268                                         const uvm_reverse_map_t *reverse_mappings,
1269                                         size_t num_reverse_mappings,
1270                                         NvU32 *out_flags)
1271 {
1272     NV_STATUS status = NV_OK;
1273     size_t index;
1274 
1275     *out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
1276 
1277     for (index = 0; index < num_reverse_mappings; ++index) {
1278         NvU32 out_flags_local = 0;
1279         status = service_phys_single_va_block(gpu,
1280                                               batch_context,
1281                                               current_entry,
1282                                               reverse_mappings + index,
1283                                               1,
1284                                               &out_flags_local);
1285         if (status != NV_OK)
1286             break;
1287 
1288         UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
1289         *out_flags |= out_flags_local;
1290     }
1291 
1292     // In the case of failure, drop the refcounts for the remaining reverse mappings
1293     while (++index < num_reverse_mappings)
1294         uvm_va_block_release(reverse_mappings[index].va_block);
1295 
1296     return status;
1297 }
1298 
1299 // Iterate over all regions set in the given sub_granularity mask
1300 #define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions)      \
1301     for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)),                         \
1302          (region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1);   \
1303          (region_start) < (num_regions);                                                             \
1304          (region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1),        \
1305          (region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
1306 
1307 
are_reverse_mappings_on_single_block(const uvm_reverse_map_t * reverse_mappings,size_t num_reverse_mappings)1308 static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
1309 {
1310     size_t index;
1311     uvm_va_block_t *prev_va_block = NULL;
1312 
1313     for (index = 0; index < num_reverse_mappings; ++index) {
1314         uvm_va_block_t *va_block = reverse_mappings[index].va_block;
1315         UVM_ASSERT(va_block);
1316 
1317         if (prev_va_block && prev_va_block != va_block)
1318             return false;
1319 
1320         prev_va_block = va_block;
1321     }
1322 
1323     return true;
1324 }
1325 
1326 // Service the given translation range. It will return the count of the reverse
1327 // mappings found during servicing in num_reverse_mappings, even if the function
1328 // doesn't return NV_OK.
service_phys_notification_translation(uvm_gpu_t * gpu,uvm_gpu_t * resident_gpu,uvm_access_counter_service_batch_context_t * batch_context,const uvm_gpu_access_counter_type_config_t * config,const uvm_access_counter_buffer_entry_t * current_entry,NvU64 address,unsigned long sub_granularity,size_t * num_reverse_mappings,NvU32 * out_flags)1329 static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
1330                                                        uvm_gpu_t *resident_gpu,
1331                                                        uvm_access_counter_service_batch_context_t *batch_context,
1332                                                        const uvm_gpu_access_counter_type_config_t *config,
1333                                                        const uvm_access_counter_buffer_entry_t *current_entry,
1334                                                        NvU64 address,
1335                                                        unsigned long sub_granularity,
1336                                                        size_t *num_reverse_mappings,
1337                                                        NvU32 *out_flags)
1338 {
1339     NV_STATUS status;
1340     NvU32 region_start, region_end;
1341 
1342     *num_reverse_mappings = 0;
1343 
1344     // Get the reverse_map translations for all the regions set in the
1345     // sub_granularity field of the counter.
1346     for_each_sub_granularity_region(region_start,
1347                                     region_end,
1348                                     sub_granularity,
1349                                     config->sub_granularity_regions_per_translation) {
1350         NvU64 local_address = address + region_start * config->sub_granularity_region_size;
1351         NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
1352         uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
1353 
1354         // Obtain the virtual addresses of the pages within the reported
1355         // DMA range
1356         if (resident_gpu) {
1357             *num_reverse_mappings += uvm_pmm_gpu_phys_to_virt(&resident_gpu->pmm,
1358                                                               local_address,
1359                                                               local_translation_size,
1360                                                               local_reverse_mappings);
1361         }
1362         else {
1363             *num_reverse_mappings += uvm_pmm_sysmem_mappings_dma_to_virt(&gpu->pmm_reverse_sysmem_mappings,
1364                                                                          local_address,
1365                                                                          local_translation_size,
1366                                                                          local_reverse_mappings,
1367                                                                          local_translation_size / PAGE_SIZE);
1368         }
1369     }
1370 
1371     if (*num_reverse_mappings == 0)
1372         return NV_OK;
1373 
1374     // Service all the translations
1375     if (are_reverse_mappings_on_single_block(batch_context->phys.translations, *num_reverse_mappings)) {
1376         status = service_phys_single_va_block(gpu,
1377                                               batch_context,
1378                                               current_entry,
1379                                               batch_context->phys.translations,
1380                                               *num_reverse_mappings,
1381                                               out_flags);
1382     }
1383     else {
1384         status = service_phys_va_blocks(gpu,
1385                                         batch_context,
1386                                         current_entry,
1387                                         batch_context->phys.translations,
1388                                         *num_reverse_mappings,
1389                                         out_flags);
1390     }
1391 
1392     return status;
1393 }
1394 
service_phys_notification(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context,const uvm_access_counter_buffer_entry_t * current_entry,NvU32 * out_flags)1395 static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
1396                                            uvm_access_counter_service_batch_context_t *batch_context,
1397                                            const uvm_access_counter_buffer_entry_t *current_entry,
1398                                            NvU32 *out_flags)
1399 {
1400     NvU64 address;
1401     NvU64 translation_index;
1402     uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
1403     uvm_access_counter_type_t counter_type = current_entry->counter_type;
1404     const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
1405     unsigned long sub_granularity;
1406     size_t total_reverse_mappings = 0;
1407     uvm_gpu_t *resident_gpu = NULL;
1408     NV_STATUS status = NV_OK;
1409     NvU32 flags = 0;
1410 
1411     address = current_entry->address.address;
1412     UVM_ASSERT(address % config->translation_size == 0);
1413     sub_granularity = current_entry->sub_granularity;
1414 
1415     if (config->rm.granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
1416         sub_granularity = 1;
1417 
1418     if (UVM_ID_IS_GPU(current_entry->physical_info.resident_id)) {
1419         resident_gpu = uvm_gpu_get(current_entry->physical_info.resident_id);
1420         UVM_ASSERT(resident_gpu != NULL);
1421 
1422         if (gpu != resident_gpu && uvm_gpus_are_nvswitch_connected(gpu, resident_gpu)) {
1423             UVM_ASSERT(address >= resident_gpu->parent->nvswitch_info.fabric_memory_window_start);
1424             address -= resident_gpu->parent->nvswitch_info.fabric_memory_window_start;
1425         }
1426 
1427         // On P9 systems, the CPU accesses the reserved heap on vidmem via
1428         // coherent NVLINK mappings. This can trigger notifications that
1429         // fall outside of the allocatable address range. We just drop
1430         // them.
1431         if (address >= resident_gpu->mem_info.max_allocatable_address)
1432             return NV_OK;
1433     }
1434 
1435     for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
1436         size_t num_reverse_mappings;
1437         NvU32 out_flags_local = 0;
1438         status = service_phys_notification_translation(gpu,
1439                                                        resident_gpu,
1440                                                        batch_context,
1441                                                        config,
1442                                                        current_entry,
1443                                                        address,
1444                                                        sub_granularity,
1445                                                        &num_reverse_mappings,
1446                                                        &out_flags_local);
1447         total_reverse_mappings += num_reverse_mappings;
1448 
1449         UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
1450         flags |= out_flags_local;
1451 
1452         if (status != NV_OK)
1453             break;
1454 
1455         address += config->translation_size;
1456         sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
1457     }
1458 
1459     if (uvm_enable_builtin_tests)
1460         *out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
1461 
1462     if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
1463         *out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
1464 
1465     return status;
1466 }
1467 
1468 // TODO: Bug 2018899: Add statistics for dropped access counter notifications
service_phys_notifications(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context)1469 static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
1470                                             uvm_access_counter_service_batch_context_t *batch_context)
1471 {
1472     NvU32 i;
1473     uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
1474 
1475     UVM_ASSERT(gpu->parent->access_counters_can_use_physical_addresses);
1476 
1477     preprocess_phys_notifications(batch_context);
1478 
1479     for (i = 0; i < batch_context->phys.num_notifications; ++i) {
1480         NV_STATUS status;
1481         uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
1482         NvU32 flags = 0;
1483 
1484         if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
1485             continue;
1486 
1487         status = service_phys_notification(gpu, batch_context, current_entry, &flags);
1488 
1489         notify_tools_and_process_flags(gpu, &notifications[i], 1, flags);
1490 
1491         if (status != NV_OK)
1492             return status;
1493     }
1494 
1495     return NV_OK;
1496 }
1497 
service_notification_va_block_helper(struct mm_struct * mm,uvm_va_block_t * va_block,uvm_processor_id_t processor,uvm_access_counter_service_batch_context_t * batch_context)1498 static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
1499                                                       uvm_va_block_t *va_block,
1500                                                       uvm_processor_id_t processor,
1501                                                       uvm_access_counter_service_batch_context_t *batch_context)
1502 {
1503     uvm_va_block_retry_t va_block_retry;
1504     uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
1505     uvm_service_block_context_t *service_context = &batch_context->block_service_context;
1506 
1507     if (uvm_page_mask_empty(accessed_pages))
1508         return NV_OK;
1509 
1510     uvm_assert_mutex_locked(&va_block->lock);
1511 
1512     service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
1513     service_context->num_retries = 0;
1514 
1515     return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
1516                                      &va_block_retry,
1517                                      service_va_block_locked(processor,
1518                                                              va_block,
1519                                                              &va_block_retry,
1520                                                              service_context,
1521                                                              accessed_pages));
1522 }
1523 
expand_notification_block(uvm_gpu_va_space_t * gpu_va_space,uvm_va_block_t * va_block,uvm_va_block_context_t * va_block_context,uvm_page_mask_t * accessed_pages,const uvm_access_counter_buffer_entry_t * current_entry)1524 static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
1525                                       uvm_va_block_t *va_block,
1526                                       uvm_va_block_context_t *va_block_context,
1527                                       uvm_page_mask_t *accessed_pages,
1528                                       const uvm_access_counter_buffer_entry_t *current_entry)
1529 {
1530     NvU64 addr;
1531     NvU64 granularity = 0;
1532     uvm_gpu_t *resident_gpu = NULL;
1533     uvm_processor_id_t resident_id;
1534     uvm_page_index_t page_index;
1535     uvm_gpu_t *gpu = gpu_va_space->gpu;
1536     const uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
1537     const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters,
1538                                                                              UVM_ACCESS_COUNTER_TYPE_MIMC);
1539 
1540     config_granularity_to_bytes(config->rm.granularity, &granularity);
1541 
1542     // Granularities other than 2MB can only be enabled by UVM tests. Do nothing
1543     // in that case.
1544     if (granularity != UVM_PAGE_SIZE_2M)
1545         return;
1546 
1547     addr = current_entry->address.address;
1548 
1549     uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
1550     uvm_assert_mutex_locked(&va_block->lock);
1551 
1552     page_index = uvm_va_block_cpu_page_index(va_block, addr);
1553 
1554     resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, gpu->id);
1555 
1556     // resident_id might be invalid or might already be the same as the GPU
1557     // which received the notification if the memory was already migrated before
1558     // acquiring the locks either during the servicing of previous notifications
1559     // or during faults or because of explicit migrations or if the VA range was
1560     // freed after receiving the notification. Return NV_OK in such cases.
1561     if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
1562         return;
1563 
1564     if (UVM_ID_IS_GPU(resident_id))
1565         resident_gpu = uvm_va_space_get_gpu(gpu_va_space->va_space, resident_id);
1566 
1567     if (uvm_va_block_get_physical_size(va_block, resident_id, page_index) != granularity) {
1568         uvm_page_mask_set(accessed_pages, page_index);
1569     }
1570     else {
1571         NvU32 region_start;
1572         NvU32 region_end;
1573         unsigned long sub_granularity = current_entry->sub_granularity;
1574         NvU32 num_regions = config->sub_granularity_regions_per_translation;
1575         NvU32 num_sub_pages = config->sub_granularity_region_size / PAGE_SIZE;
1576         uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, NUMA_NO_NODE);
1577 
1578         UVM_ASSERT(num_sub_pages >= 1);
1579 
1580         // region_start and region_end refer to sub_granularity indices, not
1581         // page_indices.
1582         for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) {
1583             uvm_page_mask_region_fill(accessed_pages,
1584                                       uvm_va_block_region(region_start * num_sub_pages,
1585                                                           region_end * num_sub_pages));
1586         }
1587 
1588         // Remove pages in the va_block which are not resident on resident_id.
1589         // If the GPU is heavily accessing those pages, future access counter
1590         // migrations will migrate them to the GPU.
1591         uvm_page_mask_and(accessed_pages, accessed_pages, resident_mask);
1592     }
1593 }
1594 
service_virt_notifications_in_block(uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm,uvm_va_block_t * va_block,uvm_access_counter_service_batch_context_t * batch_context,NvU32 index,NvU32 * out_index)1595 static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
1596                                                      struct mm_struct *mm,
1597                                                      uvm_va_block_t *va_block,
1598                                                      uvm_access_counter_service_batch_context_t *batch_context,
1599                                                      NvU32 index,
1600                                                      NvU32 *out_index)
1601 {
1602     NvU32 i;
1603     NvU32 flags = 0;
1604     NV_STATUS status = NV_OK;
1605     NV_STATUS flags_status;
1606     uvm_gpu_t *gpu = gpu_va_space->gpu;
1607     uvm_va_space_t *va_space = gpu_va_space->va_space;
1608     uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
1609     uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
1610     uvm_service_block_context_t *service_context = &batch_context->block_service_context;
1611 
1612     UVM_ASSERT(va_block);
1613     UVM_ASSERT(index < batch_context->virt.num_notifications);
1614 
1615     uvm_assert_rwsem_locked(&va_space->lock);
1616 
1617     uvm_page_mask_zero(accessed_pages);
1618 
1619     uvm_va_block_context_init(service_context->block_context, mm);
1620 
1621     uvm_mutex_lock(&va_block->lock);
1622 
1623     for (i = index; i < batch_context->virt.num_notifications; i++) {
1624         uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
1625         NvU64 address = current_entry->address.address;
1626 
1627         if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
1628             expand_notification_block(gpu_va_space,
1629                                       va_block,
1630                                       batch_context->block_service_context.block_context,
1631                                       accessed_pages,
1632                                       current_entry);
1633         }
1634         else {
1635             break;
1636         }
1637     }
1638 
1639     *out_index = i;
1640 
1641     // Atleast one notification should have been processed.
1642     UVM_ASSERT(index < *out_index);
1643 
1644     status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
1645 
1646     uvm_mutex_unlock(&va_block->lock);
1647 
1648     if (status == NV_OK)
1649         flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
1650 
1651     flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);
1652 
1653     if ((status == NV_OK) && (flags_status != NV_OK))
1654         status = flags_status;
1655 
1656     return status;
1657 }
1658 
service_virt_notification_ats(uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm,uvm_access_counter_service_batch_context_t * batch_context,NvU32 index,NvU32 * out_index)1659 static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
1660                                                struct mm_struct *mm,
1661                                                uvm_access_counter_service_batch_context_t *batch_context,
1662                                                NvU32 index,
1663                                                NvU32 *out_index)
1664 {
1665 
1666     NvU32 i;
1667     NvU64 base;
1668     NvU64 end;
1669     NvU64 address;
1670     NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
1671     NV_STATUS status = NV_OK;
1672     NV_STATUS flags_status;
1673     struct vm_area_struct *vma = NULL;
1674     uvm_gpu_t *gpu = gpu_va_space->gpu;
1675     uvm_va_space_t *va_space = gpu_va_space->va_space;
1676     uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
1677     uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
1678 
1679     UVM_ASSERT(index < batch_context->virt.num_notifications);
1680 
1681     uvm_assert_mmap_lock_locked(mm);
1682     uvm_assert_rwsem_locked(&va_space->lock);
1683 
1684     address = notifications[index]->address.address;
1685 
1686     vma = find_vma_intersection(mm, address, address + 1);
1687     if (!vma) {
1688         // Clear the notification entry to continue receiving access counter
1689         // notifications when a new VMA is allocated in this range.
1690         status = notify_tools_and_process_flags(gpu, &notifications[index], 1, flags);
1691         *out_index = index + 1;
1692         return status;
1693     }
1694 
1695     base = UVM_VA_BLOCK_ALIGN_DOWN(address);
1696     end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
1697 
1698     uvm_page_mask_zero(&ats_context->accessed_mask);
1699 
1700     for (i = index; i < batch_context->virt.num_notifications; i++) {
1701         uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
1702         address = current_entry->address.address;
1703 
1704         if ((current_entry->virtual_info.va_space == va_space) && (address < end))
1705             uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
1706         else
1707             break;
1708     }
1709 
1710     *out_index = i;
1711 
1712     // Atleast one notification should have been processed.
1713     UVM_ASSERT(index < *out_index);
1714 
1715     // TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
1716     //                    location is set
1717     // If no pages were actually migrated, don't clear the access counters.
1718     status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
1719     if (status != NV_OK)
1720         flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
1721 
1722     flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);
1723     if ((status == NV_OK) && (flags_status != NV_OK))
1724         status = flags_status;
1725 
1726     return status;
1727 }
1728 
service_virt_notifications_batch(uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm,uvm_access_counter_service_batch_context_t * batch_context,NvU32 index,NvU32 * out_index)1729 static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
1730                                                   struct mm_struct *mm,
1731                                                   uvm_access_counter_service_batch_context_t *batch_context,
1732                                                   NvU32 index,
1733                                                   NvU32 *out_index)
1734 {
1735     NV_STATUS status;
1736     uvm_va_range_t *va_range;
1737     uvm_va_space_t *va_space = gpu_va_space->va_space;
1738     uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
1739     NvU64 address = current_entry->address.address;
1740 
1741     UVM_ASSERT(va_space);
1742 
1743     if (mm)
1744         uvm_assert_mmap_lock_locked(mm);
1745 
1746     uvm_assert_rwsem_locked(&va_space->lock);
1747 
1748     // Virtual address notifications are always 64K aligned
1749     UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
1750 
1751     va_range = uvm_va_range_find(va_space, address);
1752     if (va_range) {
1753         // Avoid clearing the entry by default.
1754         NvU32 flags = 0;
1755         uvm_va_block_t *va_block = NULL;
1756 
1757         if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
1758             size_t index = uvm_va_range_block_index(va_range, address);
1759 
1760             va_block = uvm_va_range_block(va_range, index);
1761 
1762             // If the va_range is a managed range, the notification belongs to a
1763             // recently freed va_range if va_block is NULL. If va_block is not
1764             // NULL, service_virt_notifications_in_block will process flags.
1765             // Clear the notification entry to continue receiving notifications
1766             // when a new va_range is allocated in that region.
1767             flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
1768         }
1769 
1770         if (va_block) {
1771             status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
1772         }
1773         else {
1774             status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
1775             *out_index = index + 1;
1776         }
1777     }
1778     else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
1779         status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
1780     }
1781     else {
1782         NvU32 flags;
1783         uvm_va_block_t *va_block = NULL;
1784 
1785         status = uvm_hmm_va_block_find(va_space, address, &va_block);
1786 
1787         // TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
1788         //                    migrations for virtual notifications
1789         //
1790         // - If the va_block is HMM, don't clear the notification since HMM
1791         // migrations are currently disabled.
1792         //
1793         // - If the va_block isn't HMM, the notification belongs to a recently
1794         // freed va_range. Clear the notification entry to continue receiving
1795         // notifications when a new va_range is allocated in this region.
1796         flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;
1797 
1798         UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
1799                    (status == NV_ERR_INVALID_ADDRESS)  ||
1800                    uvm_va_block_is_hmm(va_block));
1801 
1802         // Clobber status to continue processing the rest of the notifications
1803         // in the batch.
1804         status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
1805 
1806         *out_index = index + 1;
1807     }
1808 
1809     return status;
1810 }
1811 
service_virt_notifications(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context)1812 static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
1813                                             uvm_access_counter_service_batch_context_t *batch_context)
1814 {
1815     NvU32 i = 0;
1816     NV_STATUS status = NV_OK;
1817     struct mm_struct *mm = NULL;
1818     uvm_va_space_t *va_space = NULL;
1819     uvm_va_space_t *prev_va_space = NULL;
1820     uvm_gpu_va_space_t *gpu_va_space = NULL;
1821 
1822     // TODO: Bug 4299018 : Add support for virtual access counter migrations on
1823     //                     4K page sizes.
1824     if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
1825         return notify_tools_and_process_flags(gpu,
1826                                               batch_context->virt.notifications,
1827                                               batch_context->virt.num_notifications,
1828                                               0);
1829     }
1830 
1831     preprocess_virt_notifications(gpu->parent, batch_context);
1832 
1833     while (i < batch_context->virt.num_notifications) {
1834         uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
1835         va_space = current_entry->virtual_info.va_space;
1836 
1837         if (va_space != prev_va_space) {
1838 
1839             // New va_space detected, drop locks of the old va_space.
1840             if (prev_va_space) {
1841                 uvm_va_space_up_read(prev_va_space);
1842                 uvm_va_space_mm_release_unlock(prev_va_space, mm);
1843 
1844                 mm = NULL;
1845                 gpu_va_space = NULL;
1846             }
1847 
1848             // Acquire locks for the new va_space.
1849             if (va_space) {
1850                 mm = uvm_va_space_mm_retain_lock(va_space);
1851                 uvm_va_space_down_read(va_space);
1852 
1853                 gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
1854             }
1855 
1856             prev_va_space = va_space;
1857         }
1858 
1859         if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
1860             status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
1861         }
1862         else {
1863             status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
1864             i++;
1865         }
1866 
1867         if (status != NV_OK)
1868             break;
1869     }
1870 
1871     if (va_space) {
1872         uvm_va_space_up_read(va_space);
1873         uvm_va_space_mm_release_unlock(va_space, mm);
1874     }
1875 
1876     return status;
1877 }
1878 
1879 
uvm_gpu_service_access_counters(uvm_gpu_t * gpu)1880 void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
1881 {
1882     NV_STATUS status = NV_OK;
1883     uvm_access_counter_service_batch_context_t *batch_context = &gpu->parent->access_counter_buffer_info.batch_service_context;
1884 
1885     UVM_ASSERT(gpu->parent->access_counters_supported);
1886 
1887     if (gpu->parent->access_counter_buffer_info.notifications_ignored_count > 0)
1888         return;
1889 
1890     while (1) {
1891         batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(gpu,
1892                                                                                       batch_context,
1893                                                                                       NOTIFICATION_FETCH_MODE_BATCH_READY);
1894         if (batch_context->num_cached_notifications == 0)
1895             break;
1896 
1897         ++batch_context->batch_id;
1898 
1899         if (batch_context->virt.num_notifications) {
1900             status = service_virt_notifications(gpu, batch_context);
1901             if (status != NV_OK)
1902                 break;
1903         }
1904 
1905         if (batch_context->phys.num_notifications) {
1906             status = service_phys_notifications(gpu, batch_context);
1907             if (status != NV_OK)
1908                 break;
1909         }
1910     }
1911 
1912     if (status != NV_OK) {
1913         UVM_DBG_PRINT("Error %s servicing access counter notifications on GPU: %s\n",
1914                       nvstatusToString(status),
1915                       uvm_gpu_name(gpu));
1916     }
1917 }
1918 
1919 static const NvU32 g_uvm_access_counters_threshold_max = (1 << 15) - 1;
1920 
access_counters_config_from_test_params(const UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS * params,UvmGpuAccessCntrConfig * config)1921 static NV_STATUS access_counters_config_from_test_params(const UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params,
1922                                                          UvmGpuAccessCntrConfig *config)
1923 {
1924     NvU64 tracking_size;
1925     memset(config, 0, sizeof(*config));
1926 
1927     if (params->threshold == 0 || params->threshold > g_uvm_access_counters_threshold_max)
1928         return NV_ERR_INVALID_ARGUMENT;
1929 
1930     if (config_granularity_to_bytes(params->mimc_granularity, &tracking_size) != NV_OK)
1931         return NV_ERR_INVALID_ARGUMENT;
1932 
1933     if (config_granularity_to_bytes(params->momc_granularity, &tracking_size) != NV_OK)
1934         return NV_ERR_INVALID_ARGUMENT;
1935 
1936     // Since values for granularity/use limit are shared between tests and
1937     // nv_uvm_types.h, the value will be checked in the call to
1938     // nvUvmInterfaceEnableAccessCntr
1939     config->mimcGranularity = params->mimc_granularity;
1940     config->momcGranularity = params->momc_granularity;
1941 
1942     config->mimcUseLimit = params->mimc_use_limit;
1943     config->momcUseLimit = params->momc_use_limit;
1944 
1945     config->threshold = params->threshold;
1946 
1947     return NV_OK;
1948 }
1949 
uvm_va_space_has_access_counter_migrations(uvm_va_space_t * va_space)1950 bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space)
1951 {
1952     va_space_access_counters_info_t *va_space_access_counters = va_space_access_counters_info_get(va_space);
1953 
1954     return atomic_read(&va_space_access_counters->params.enable_mimc_migrations);
1955 }
1956 
uvm_perf_access_counters_init(void)1957 NV_STATUS uvm_perf_access_counters_init(void)
1958 {
1959     uvm_perf_module_init("perf_access_counters",
1960                          UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS,
1961                          g_callbacks_access_counters,
1962                          ARRAY_SIZE(g_callbacks_access_counters),
1963                          &g_module_access_counters);
1964 
1965     return NV_OK;
1966 }
1967 
uvm_perf_access_counters_exit(void)1968 void uvm_perf_access_counters_exit(void)
1969 {
1970 }
1971 
uvm_perf_access_counters_load(uvm_va_space_t * va_space)1972 NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space)
1973 {
1974     va_space_access_counters_info_t *va_space_access_counters;
1975     NV_STATUS status;
1976 
1977     status = uvm_perf_module_load(&g_module_access_counters, va_space);
1978     if (status != NV_OK)
1979         return status;
1980 
1981     va_space_access_counters = va_space_access_counters_info_create(va_space);
1982     if (!va_space_access_counters)
1983         return NV_ERR_NO_MEMORY;
1984 
1985     return NV_OK;
1986 }
1987 
uvm_perf_access_counters_unload(uvm_va_space_t * va_space)1988 void uvm_perf_access_counters_unload(uvm_va_space_t *va_space)
1989 {
1990     uvm_perf_module_unload(&g_module_access_counters, va_space);
1991 
1992     va_space_access_counters_info_destroy(va_space);
1993 }
1994 
uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS * params,struct file * filp)1995 NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
1996                                                       struct file *filp)
1997 {
1998     uvm_va_space_t *va_space = uvm_va_space_get(filp);
1999     uvm_gpu_t *gpu = NULL;
2000 
2001     gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpu_uuid);
2002     if (!gpu)
2003         return NV_ERR_INVALID_DEVICE;
2004 
2005     params->enabled = uvm_parent_gpu_access_counters_required(gpu->parent);
2006 
2007     uvm_gpu_release(gpu);
2008 
2009     return NV_OK;
2010 }
2011 
uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS * params,struct file * filp)2012 NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
2013 {
2014     NV_STATUS status = NV_OK;
2015     uvm_gpu_t *gpu = NULL;
2016     UvmGpuAccessCntrConfig config = {0};
2017     va_space_access_counters_info_t *va_space_access_counters;
2018     uvm_va_space_t *va_space_reconfiguration_owner;
2019     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2020 
2021     status = access_counters_config_from_test_params(params, &config);
2022     if (status != NV_OK)
2023         return status;
2024 
2025     gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpu_uuid);
2026     if (!gpu)
2027         return NV_ERR_INVALID_DEVICE;
2028 
2029     if (!gpu->parent->access_counters_supported) {
2030         status = NV_ERR_NOT_SUPPORTED;
2031         goto exit_release_gpu;
2032     }
2033 
2034     // ISR lock ensures that we own GET/PUT registers. It disables interrupts
2035     // and ensures that no other thread (nor the top half) will be able to
2036     // re-enable interrupts during reconfiguration.
2037     uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
2038 
2039     uvm_va_space_down_read_rm(va_space);
2040 
2041     if (!uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
2042         status = NV_ERR_INVALID_STATE;
2043         goto exit_isr_unlock;
2044     }
2045 
2046     // Unregistration already started. Fail to avoid an interleaving in which
2047     // access counters end up been enabled on an unregistered GPU:
2048     // (thread 0) uvm_va_space_unregister_gpu disables access counters
2049     // (thread 1) assuming no VA space lock is held yet by the unregistration,
2050     //            this function enables access counters and runs to completion,
2051     //            returning NV_OK
2052     // (thread 0) uvm_va_space_unregister_gpu takes the VA space lock and
2053     //            completes the unregistration
2054     if (uvm_processor_mask_test(&va_space->gpu_unregister_in_progress, gpu->id)) {
2055         status = NV_ERR_INVALID_STATE;
2056         goto exit_isr_unlock;
2057     }
2058 
2059     va_space_access_counters = va_space_access_counters_info_get(va_space);
2060 
2061     va_space_reconfiguration_owner = gpu->parent->access_counter_buffer_info.reconfiguration_owner;
2062 
2063     // If any other VA space has reconfigured access counters on this GPU,
2064     // return error to avoid overwriting its configuration.
2065     if (va_space_reconfiguration_owner && (va_space_reconfiguration_owner != va_space)) {
2066         status = NV_ERR_INVALID_STATE;
2067         goto exit_isr_unlock;
2068     }
2069 
2070     if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
2071         status = gpu_access_counters_enable(gpu, &config);
2072 
2073         if (status == NV_OK)
2074             uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
2075         else
2076             goto exit_isr_unlock;
2077     }
2078 
2079     UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
2080 
2081     // Disable counters, and renable with the new configuration.
2082     // Note that we are yielding ownership even when the access counters are
2083     // enabled in at least gpu. This inconsistent state is not visible to other
2084     // threads or VA spaces because of the ISR lock, and it is immediately
2085     // rectified by retaking ownership.
2086     access_counters_yield_ownership(gpu->parent);
2087     status = access_counters_take_ownership(gpu, &config);
2088 
2089     // Retaking ownership failed, so RM owns the interrupt.
2090     if (status != NV_OK) {
2091         // The state of any other VA space with access counters enabled is
2092         // corrupt
2093         // TODO: Bug 2419290: Fail reconfiguration if access
2094         // counters are enabled on a different VA space.
2095         if (gpu->parent->isr.access_counters.handling_ref_count > 1) {
2096             UVM_ASSERT_MSG(status == NV_OK,
2097                            "Access counters interrupt still owned by RM, other VA spaces may experience failures");
2098         }
2099 
2100         uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
2101         parent_gpu_access_counters_disable(gpu->parent);
2102         goto exit_isr_unlock;
2103     }
2104 
2105     gpu->parent->access_counter_buffer_info.reconfiguration_owner = va_space;
2106 
2107     uvm_va_space_up_read_rm(va_space);
2108     uvm_va_space_down_write(va_space);
2109     atomic_set(&va_space_access_counters->params.enable_mimc_migrations, !!params->enable_mimc_migrations);
2110     atomic_set(&va_space_access_counters->params.enable_momc_migrations, !!params->enable_momc_migrations);
2111     uvm_va_space_up_write(va_space);
2112 
2113 exit_isr_unlock:
2114     if (status != NV_OK)
2115         uvm_va_space_up_read_rm(va_space);
2116 
2117     uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
2118 
2119 exit_release_gpu:
2120     uvm_gpu_release(gpu);
2121 
2122     return status;
2123 }
2124 
uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS * params,struct file * filp)2125 NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
2126 {
2127     NV_STATUS status = NV_OK;
2128     uvm_gpu_t *gpu = NULL;
2129     uvm_access_counter_buffer_info_t *access_counters;
2130     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2131 
2132     if (params->mode >= UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX)
2133         return NV_ERR_INVALID_ARGUMENT;
2134 
2135     if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_TARGETED &&
2136         params->counter_type >= UVM_TEST_ACCESS_COUNTER_TYPE_MAX) {
2137         return NV_ERR_INVALID_ARGUMENT;
2138     }
2139 
2140     gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpu_uuid);
2141     if (!gpu)
2142         return NV_ERR_INVALID_DEVICE;
2143 
2144     if (!gpu->parent->access_counters_supported) {
2145         status = NV_ERR_NOT_SUPPORTED;
2146         goto exit_release_gpu;
2147     }
2148 
2149     uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
2150 
2151     // Access counters not enabled. Nothing to reset
2152     if (gpu->parent->isr.access_counters.handling_ref_count == 0)
2153         goto exit_isr_unlock;
2154 
2155     access_counters = &gpu->parent->access_counter_buffer_info;
2156 
2157     if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
2158         status = access_counter_clear_all(gpu);
2159     }
2160     else {
2161         uvm_access_counter_buffer_entry_t entry = { 0 };
2162         uvm_access_counter_buffer_entry_t *notification = &entry;
2163 
2164         if (params->counter_type == UVM_TEST_ACCESS_COUNTER_TYPE_MIMC)
2165             entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MIMC;
2166         else
2167             entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MOMC;
2168 
2169         entry.bank = params->bank;
2170         entry.tag = params->tag;
2171 
2172         status = access_counter_clear_notifications(gpu, &notification, 1);
2173     }
2174 
2175     if (status == NV_OK)
2176         status = uvm_tracker_wait(&access_counters->clear_tracker);
2177 
2178 exit_isr_unlock:
2179     uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
2180 
2181 exit_release_gpu:
2182     uvm_gpu_release(gpu);
2183 
2184     return status;
2185 }
2186 
uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t * parent_gpu,bool do_ignore)2187 void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
2188 {
2189     bool change_intr_state = false;
2190 
2191     if (!parent_gpu->access_counters_supported)
2192         return;
2193 
2194     uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
2195 
2196     if (do_ignore) {
2197         if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
2198             change_intr_state = true;
2199     }
2200     else {
2201         UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
2202         if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
2203             change_intr_state = true;
2204     }
2205 
2206     if (change_intr_state) {
2207         // We need to avoid an interrupt storm while ignoring notifications. We
2208         // just disable the interrupt.
2209         uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
2210 
2211         if (do_ignore)
2212             uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
2213         else
2214             uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
2215 
2216         uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
2217 
2218         if (!do_ignore)
2219             access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
2220     }
2221 
2222     uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
2223 }
2224 
uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS * params,struct file * filp)2225 NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
2226 {
2227     uvm_va_space_t *va_space = uvm_va_space_get(filp);
2228     NV_STATUS status = NV_OK;
2229     uvm_gpu_t *gpu = NULL;
2230 
2231     gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpu_uuid);
2232     if (!gpu)
2233         return NV_ERR_INVALID_DEVICE;
2234 
2235     if (gpu->parent->access_counters_supported)
2236         uvm_parent_gpu_access_counters_set_ignore(gpu->parent, params->ignore);
2237     else
2238         status = NV_ERR_NOT_SUPPORTED;
2239 
2240     uvm_gpu_release(gpu);
2241     return status;
2242 }
2243