1 /*******************************************************************************
2 Copyright (c) 2017-2024 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21 *******************************************************************************/
22
23 #include "linux/sort.h"
24 #include "nv_uvm_interface.h"
25 #include "uvm_gpu_access_counters.h"
26 #include "uvm_global.h"
27 #include "uvm_gpu.h"
28 #include "uvm_hal.h"
29 #include "uvm_kvmalloc.h"
30 #include "uvm_tools.h"
31 #include "uvm_va_block.h"
32 #include "uvm_va_range.h"
33 #include "uvm_va_space_mm.h"
34 #include "uvm_pmm_sysmem.h"
35 #include "uvm_perf_module.h"
36 #include "uvm_ats.h"
37 #include "uvm_ats_faults.h"
38
39 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
40 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
41 #define UVM_PERF_ACCESS_COUNTER_GRANULARITY UVM_ACCESS_COUNTER_GRANULARITY_2M
42 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN 1
43 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
44 #define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
45
46 #define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x1
47 #define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x2
48
49 // Each page in a tracked physical range may belong to a different VA Block. We
50 // preallocate an array of reverse map translations. However, access counter
51 // granularity can be set to up to 16G, which would require an array too large
52 // to hold all possible translations. Thus, we set an upper bound for reverse
53 // map translations, and we perform as many translation requests as needed to
54 // cover the whole tracked range.
55 #define UVM_MAX_TRANSLATION_SIZE (2 * 1024 * 1024ULL)
56 #define UVM_SUB_GRANULARITY_REGIONS 32
57
58 static unsigned g_uvm_access_counter_threshold;
59
60 // Per-VA space access counters information
61 typedef struct
62 {
63 // VA space-specific configuration settings. These override the global
64 // settings
65 struct
66 {
67 atomic_t enable_mimc_migrations;
68
69 atomic_t enable_momc_migrations;
70 } params;
71
72 uvm_va_space_t *va_space;
73 } va_space_access_counters_info_t;
74
75 // Enable/disable access-counter-guided migrations
76 //
77 static int uvm_perf_access_counter_mimc_migration_enable = -1;
78 static int uvm_perf_access_counter_momc_migration_enable = -1;
79
80 // Number of entries that are fetched from the GPU access counter notification
81 // buffer and serviced in batch
82 static unsigned uvm_perf_access_counter_batch_count = UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT;
83
84 // See module param documentation below
85 static unsigned uvm_perf_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT;
86
87 // Module parameters for the tunables
88 module_param(uvm_perf_access_counter_mimc_migration_enable, int, S_IRUGO);
89 MODULE_PARM_DESC(uvm_perf_access_counter_mimc_migration_enable,
90 "Whether MIMC access counters will trigger migrations."
91 "Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
92 module_param(uvm_perf_access_counter_momc_migration_enable, int, S_IRUGO);
93 MODULE_PARM_DESC(uvm_perf_access_counter_momc_migration_enable,
94 "Whether MOMC access counters will trigger migrations."
95 "Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
96 module_param(uvm_perf_access_counter_batch_count, uint, S_IRUGO);
97 module_param(uvm_perf_access_counter_threshold, uint, S_IRUGO);
98 MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
99 "Number of remote accesses on a region required to trigger a notification."
100 "Valid values: [1, 65535]");
101
102 static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
103 uvm_gpu_buffer_flush_mode_t flush_mode);
104
105 static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
106
107 // Performance heuristics module for access_counters
108 static uvm_perf_module_t g_module_access_counters;
109
110 // Get the access counters tracking struct for the given VA space if it exists.
111 // This information is allocated at VA space creation and freed during VA space
112 // destruction.
va_space_access_counters_info_get_or_null(uvm_va_space_t * va_space)113 static va_space_access_counters_info_t *va_space_access_counters_info_get_or_null(uvm_va_space_t *va_space)
114 {
115 return uvm_perf_module_type_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS);
116 }
117
118 // Get the access counters tracking struct for the given VA space. It asserts
119 // that the information has been previously created.
va_space_access_counters_info_get(uvm_va_space_t * va_space)120 static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va_space_t *va_space)
121 {
122 va_space_access_counters_info_t *va_space_access_counters = va_space_access_counters_info_get_or_null(va_space);
123 UVM_ASSERT(va_space_access_counters);
124
125 return va_space_access_counters;
126 }
127
128 // Whether access counter migrations are enabled or not. The policy is as
129 // follows:
130 // - MIMC migrations are disabled by default on all non-ATS systems.
131 // - MOMC migrations are disabled by default on all systems
132 // - Users can override this policy by specifying on/off
is_migration_enabled(uvm_access_counter_type_t type)133 static bool is_migration_enabled(uvm_access_counter_type_t type)
134 {
135 int val;
136 if (type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
137 val = uvm_perf_access_counter_mimc_migration_enable;
138 }
139 else {
140 val = uvm_perf_access_counter_momc_migration_enable;
141
142 UVM_ASSERT(type == UVM_ACCESS_COUNTER_TYPE_MOMC);
143 }
144
145 if (val == 0)
146 return false;
147 else if (val > 0)
148 return true;
149
150 if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
151 return false;
152
153 if (UVM_ATS_SUPPORTED())
154 return g_uvm_global.ats.supported;
155
156 return false;
157 }
158
159 // Create the access counters tracking struct for the given VA space
160 //
161 // VA space lock needs to be held in write mode
va_space_access_counters_info_create(uvm_va_space_t * va_space)162 static va_space_access_counters_info_t *va_space_access_counters_info_create(uvm_va_space_t *va_space)
163 {
164 va_space_access_counters_info_t *va_space_access_counters;
165 uvm_assert_rwsem_locked_write(&va_space->lock);
166
167 UVM_ASSERT(va_space_access_counters_info_get_or_null(va_space) == NULL);
168
169 va_space_access_counters = uvm_kvmalloc_zero(sizeof(*va_space_access_counters));
170 if (va_space_access_counters) {
171 uvm_perf_module_type_set_data(va_space->perf_modules_data,
172 va_space_access_counters,
173 UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS);
174
175 // Snap the access_counters parameters so that they can be tuned per VA space
176 atomic_set(&va_space_access_counters->params.enable_mimc_migrations,
177 is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MIMC));
178 atomic_set(&va_space_access_counters->params.enable_momc_migrations,
179 is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MOMC));
180 va_space_access_counters->va_space = va_space;
181 }
182
183 return va_space_access_counters;
184 }
185
186 // Destroy the access counters tracking struct for the given VA space
187 //
188 // VA space lock needs to be in write mode
va_space_access_counters_info_destroy(uvm_va_space_t * va_space)189 static void va_space_access_counters_info_destroy(uvm_va_space_t *va_space)
190 {
191 va_space_access_counters_info_t *va_space_access_counters = va_space_access_counters_info_get_or_null(va_space);
192 uvm_assert_rwsem_locked_write(&va_space->lock);
193
194 if (va_space_access_counters) {
195 uvm_perf_module_type_unset_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS);
196 uvm_kvfree(va_space_access_counters);
197 }
198 }
199
config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY granularity,NvU64 * bytes)200 static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY granularity, NvU64 *bytes)
201 {
202 switch (granularity) {
203 case UVM_ACCESS_COUNTER_GRANULARITY_64K:
204 *bytes = 64 * 1024ULL;
205 break;
206 case UVM_ACCESS_COUNTER_GRANULARITY_2M:
207 *bytes = 2 * UVM_SIZE_1MB;
208 break;
209 case UVM_ACCESS_COUNTER_GRANULARITY_16M:
210 *bytes = 16 * UVM_SIZE_1MB;
211 break;
212 case UVM_ACCESS_COUNTER_GRANULARITY_16G:
213 *bytes = 16 * UVM_SIZE_1GB;
214 break;
215 default:
216 return NV_ERR_INVALID_ARGUMENT;
217 }
218
219 return NV_OK;
220 }
221
222 // Clear the access counter notifications and add it to the per-GPU clear
223 // tracker.
access_counter_clear_notifications(uvm_gpu_t * gpu,uvm_access_counter_buffer_entry_t ** notification_start,NvU32 num_notifications)224 static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
225 uvm_access_counter_buffer_entry_t **notification_start,
226 NvU32 num_notifications)
227 {
228 NvU32 i;
229 NV_STATUS status;
230 uvm_push_t push;
231 uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
232
233 status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
234 if (status != NV_OK) {
235 UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
236 nvstatusToString(status),
237 uvm_gpu_name(gpu));
238 return status;
239 }
240
241 for (i = 0; i < num_notifications; i++)
242 gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
243
244 uvm_push_end(&push);
245
246 uvm_tracker_remove_completed(&access_counters->clear_tracker);
247
248 return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
249 }
250
251 // Clear all access counters and add the operation to the per-GPU clear tracker
access_counter_clear_all(uvm_gpu_t * gpu)252 static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu)
253 {
254 NV_STATUS status;
255 uvm_push_t push;
256 uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
257
258 status = uvm_push_begin(gpu->channel_manager,
259 UVM_CHANNEL_TYPE_MEMOPS,
260 &push,
261 "Clear access counter: all");
262 if (status != NV_OK) {
263 UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
264 nvstatusToString(status),
265 uvm_gpu_name(gpu));
266 return status;
267 }
268
269 gpu->parent->host_hal->access_counter_clear_all(&push);
270
271 uvm_push_end(&push);
272
273 uvm_tracker_remove_completed(&access_counters->clear_tracker);
274
275 return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
276 }
277
278 static const uvm_gpu_access_counter_type_config_t *
get_config_for_type(const uvm_access_counter_buffer_info_t * access_counters,uvm_access_counter_type_t counter_type)279 get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm_access_counter_type_t counter_type)
280 {
281 return counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? &(access_counters)->current_config.mimc :
282 &(access_counters)->current_config.momc;
283 }
284
uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t * parent_gpu)285 bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
286 {
287 UVM_ASSERT(parent_gpu->access_counters_supported);
288
289 // Fast path 1: we left some notifications unserviced in the buffer in the last pass
290 if (parent_gpu->access_counter_buffer_info.cached_get != parent_gpu->access_counter_buffer_info.cached_put)
291 return true;
292
293 // Fast path 2: read the valid bit of the notification buffer entry pointed by the cached get pointer
294 if (!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu,
295 parent_gpu->access_counter_buffer_info.cached_get)) {
296 // Slow path: read the put pointer from the GPU register via BAR0 over PCIe
297 parent_gpu->access_counter_buffer_info.cached_put =
298 UVM_GPU_READ_ONCE(*parent_gpu->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
299
300 // No interrupt pending
301 if (parent_gpu->access_counter_buffer_info.cached_get == parent_gpu->access_counter_buffer_info.cached_put)
302 return false;
303 }
304
305 return true;
306 }
307
308 // Initialize the configuration and pre-compute some required values for the
309 // given access counter type
init_access_counter_types_config(const UvmGpuAccessCntrConfig * config,uvm_access_counter_type_t counter_type,uvm_gpu_access_counter_type_config_t * counter_type_config)310 static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *config,
311 uvm_access_counter_type_t counter_type,
312 uvm_gpu_access_counter_type_config_t *counter_type_config)
313 {
314 NV_STATUS status;
315 NvU64 tracking_size = 0;
316 UVM_ACCESS_COUNTER_GRANULARITY granularity = counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? config->mimcGranularity:
317 config->momcGranularity;
318 UVM_ACCESS_COUNTER_USE_LIMIT use_limit = counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? config->mimcUseLimit:
319 config->momcUseLimit;
320
321 counter_type_config->rm.granularity = granularity;
322 counter_type_config->rm.use_limit = use_limit;
323
324 // Precompute the maximum size to use in reverse map translations and the
325 // number of translations that are required per access counter notification.
326 status = config_granularity_to_bytes(granularity, &tracking_size);
327 UVM_ASSERT(status == NV_OK);
328
329 // sub_granularity field is only filled for tracking granularities larger
330 // than 64K
331 if (granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
332 counter_type_config->sub_granularity_region_size = tracking_size;
333 else
334 counter_type_config->sub_granularity_region_size = tracking_size / UVM_SUB_GRANULARITY_REGIONS;
335
336 counter_type_config->translation_size = min(UVM_MAX_TRANSLATION_SIZE, tracking_size);
337 counter_type_config->translations_per_counter =
338 max(counter_type_config->translation_size / UVM_MAX_TRANSLATION_SIZE, 1ULL);
339 counter_type_config->sub_granularity_regions_per_translation =
340 max(counter_type_config->translation_size / counter_type_config->sub_granularity_region_size, 1ULL);
341 UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
342 }
343
uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t * parent_gpu)344 NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
345 {
346 NV_STATUS status = NV_OK;
347 uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
348 uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
349 NvU64 granularity_bytes = 0;
350
351 if (uvm_perf_access_counter_threshold < UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN) {
352 g_uvm_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN;
353 pr_info("Value %u too small for uvm_perf_access_counter_threshold, using %u instead\n",
354 uvm_perf_access_counter_threshold,
355 g_uvm_access_counter_threshold);
356 }
357 else if (uvm_perf_access_counter_threshold > UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX) {
358 g_uvm_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX;
359 pr_info("Value %u too large for uvm_perf_access_counter_threshold, using %u instead\n",
360 uvm_perf_access_counter_threshold,
361 g_uvm_access_counter_threshold);
362 }
363 else {
364 g_uvm_access_counter_threshold = uvm_perf_access_counter_threshold;
365 }
366
367 uvm_assert_mutex_locked(&g_uvm_global.global_lock);
368 UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);
369
370 status = uvm_rm_locked_call(nvUvmInterfaceInitAccessCntrInfo(parent_gpu->rm_device,
371 &access_counters->rm_info,
372 0));
373 if (status != NV_OK) {
374 UVM_ERR_PRINT("Failed to init notify buffer info from RM: %s, GPU %s\n",
375 nvstatusToString(status),
376 uvm_parent_gpu_name(parent_gpu));
377
378 // nvUvmInterfaceInitAccessCntrInfo may leave fields in rm_info
379 // populated when it returns an error. Set the buffer handle to zero as
380 // it is used by the deinitialization logic to determine if it was
381 // correctly initialized.
382 access_counters->rm_info.accessCntrBufferHandle = 0;
383 goto fail;
384 }
385
386 UVM_ASSERT(access_counters->rm_info.bufferSize %
387 parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu) == 0);
388
389 status = config_granularity_to_bytes(UVM_PERF_ACCESS_COUNTER_GRANULARITY, &granularity_bytes);
390 UVM_ASSERT(status == NV_OK);
391 if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
392 UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
393
394 parent_gpu->access_counter_buffer_info.notifications_ignored_count = 0;
395 parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
396
397 uvm_tracker_init(&access_counters->clear_tracker);
398
399 access_counters->max_notifications = parent_gpu->access_counter_buffer_info.rm_info.bufferSize /
400 parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu);
401
402 // Check provided module parameter value
403 access_counters->max_batch_size = max(uvm_perf_access_counter_batch_count,
404 (NvU32)UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN);
405 access_counters->max_batch_size = min(access_counters->max_batch_size,
406 access_counters->max_notifications);
407
408 if (access_counters->max_batch_size != uvm_perf_access_counter_batch_count) {
409 pr_info("Invalid uvm_perf_access_counter_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
410 uvm_parent_gpu_name(parent_gpu),
411 uvm_perf_access_counter_batch_count,
412 UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN,
413 access_counters->max_notifications,
414 access_counters->max_batch_size);
415 }
416
417 batch_context->notification_cache = uvm_kvmalloc_zero(access_counters->max_notifications *
418 sizeof(*batch_context->notification_cache));
419 if (!batch_context->notification_cache) {
420 status = NV_ERR_NO_MEMORY;
421 goto fail;
422 }
423
424 batch_context->virt.notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
425 sizeof(*batch_context->virt.notifications));
426 if (!batch_context->virt.notifications) {
427 status = NV_ERR_NO_MEMORY;
428 goto fail;
429 }
430
431 batch_context->phys.notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
432 sizeof(*batch_context->phys.notifications));
433 if (!batch_context->phys.notifications) {
434 status = NV_ERR_NO_MEMORY;
435 goto fail;
436 }
437
438 batch_context->phys.translations = uvm_kvmalloc_zero((UVM_MAX_TRANSLATION_SIZE / PAGE_SIZE) *
439 sizeof(*batch_context->phys.translations));
440 if (!batch_context->phys.translations) {
441 status = NV_ERR_NO_MEMORY;
442 goto fail;
443 }
444
445 return NV_OK;
446
447 fail:
448 uvm_parent_gpu_deinit_access_counters(parent_gpu);
449
450 return status;
451 }
452
uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t * parent_gpu)453 void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
454 {
455 uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
456 uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
457
458 UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
459
460 if (access_counters->rm_info.accessCntrBufferHandle) {
461 NV_STATUS status = uvm_rm_locked_call(nvUvmInterfaceDestroyAccessCntrInfo(parent_gpu->rm_device,
462 &access_counters->rm_info));
463 UVM_ASSERT(status == NV_OK);
464
465 access_counters->rm_info.accessCntrBufferHandle = 0;
466 uvm_tracker_deinit(&access_counters->clear_tracker);
467 }
468
469 uvm_kvfree(batch_context->notification_cache);
470 uvm_kvfree(batch_context->virt.notifications);
471 uvm_kvfree(batch_context->phys.notifications);
472 uvm_kvfree(batch_context->phys.translations);
473 batch_context->notification_cache = NULL;
474 batch_context->virt.notifications = NULL;
475 batch_context->phys.notifications = NULL;
476 batch_context->phys.translations = NULL;
477 }
478
uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t * parent_gpu)479 bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
480 {
481 if (!parent_gpu->access_counters_supported)
482 return false;
483
484 if (parent_gpu->rm_info.isSimulated)
485 return true;
486
487 return is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MIMC) || is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MOMC);
488 }
489
490 // This function enables access counters with the given configuration and takes
491 // ownership from RM. The function also stores the new configuration within the
492 // uvm_gpu_t struct.
access_counters_take_ownership(uvm_gpu_t * gpu,UvmGpuAccessCntrConfig * config)493 static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntrConfig *config)
494 {
495 NV_STATUS status, disable_status;
496 uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
497
498 UVM_ASSERT(gpu->parent->access_counters_supported);
499 UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
500
501 status = uvm_rm_locked_call(nvUvmInterfaceEnableAccessCntr(gpu->parent->rm_device,
502 &access_counters->rm_info,
503 config));
504 if (status != NV_OK) {
505 UVM_ERR_PRINT("Failed to enable access counter notification from RM: %s, GPU %s\n",
506 nvstatusToString(status), uvm_gpu_name(gpu));
507 return status;
508 }
509
510 status = access_counter_clear_all(gpu);
511 if (status != NV_OK)
512 goto error;
513
514 status = uvm_tracker_wait(&access_counters->clear_tracker);
515 if (status != NV_OK)
516 goto error;
517
518 // Read current get pointer as this might not be the first time we have
519 // taken control of the notify buffer since the GPU was initialized. Then
520 // flush old notifications. This will update the cached_put pointer.
521 access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
522 access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
523
524 access_counters->current_config.threshold = config->threshold;
525
526 init_access_counter_types_config(config, UVM_ACCESS_COUNTER_TYPE_MIMC, &access_counters->current_config.mimc);
527 init_access_counter_types_config(config, UVM_ACCESS_COUNTER_TYPE_MOMC, &access_counters->current_config.momc);
528
529 return NV_OK;
530
531 error:
532 disable_status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(gpu->parent->rm_device,
533 &access_counters->rm_info));
534 UVM_ASSERT(disable_status == NV_OK);
535
536 return status;
537 }
538
539 // If ownership is yielded as part of reconfiguration, the access counters
540 // handling refcount may not be 0
access_counters_yield_ownership(uvm_parent_gpu_t * parent_gpu)541 static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
542 {
543 NV_STATUS status;
544 uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
545
546 UVM_ASSERT(parent_gpu->access_counters_supported);
547 UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
548
549 // Wait for any pending clear operation befor releasing ownership
550 status = uvm_tracker_wait(&access_counters->clear_tracker);
551 if (status != NV_OK)
552 UVM_ASSERT(status == uvm_global_get_status());
553
554 status = uvm_rm_locked_call(nvUvmInterfaceDisableAccessCntr(parent_gpu->rm_device,
555 &access_counters->rm_info));
556 UVM_ASSERT(status == NV_OK);
557 }
558
559 // Increment the refcount of access counter enablement. If this is the first
560 // reference, enable the HW feature.
gpu_access_counters_enable(uvm_gpu_t * gpu,UvmGpuAccessCntrConfig * config)561 static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConfig *config)
562 {
563 UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
564 UVM_ASSERT(gpu->parent->access_counters_supported);
565 UVM_ASSERT(gpu->parent->access_counter_buffer_info.rm_info.accessCntrBufferHandle);
566
567 // There cannot be a concurrent modification of the handling count, since
568 // the only two writes of that field happen in the enable/disable functions
569 // and those are protected by the access counters ISR lock.
570 if (gpu->parent->isr.access_counters.handling_ref_count == 0) {
571 NV_STATUS status = access_counters_take_ownership(gpu, config);
572
573 if (status != NV_OK)
574 return status;
575 }
576
577 ++gpu->parent->isr.access_counters.handling_ref_count;
578 return NV_OK;
579 }
580
581 // Decrement the refcount of access counter enablement. If this is the last
582 // reference, disable the HW feature.
parent_gpu_access_counters_disable(uvm_parent_gpu_t * parent_gpu)583 static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
584 {
585 UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
586 UVM_ASSERT(parent_gpu->access_counters_supported);
587 UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);
588
589 if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
590 access_counters_yield_ownership(parent_gpu);
591 }
592
593 // Invoked during registration of the GPU in the VA space
uvm_gpu_access_counters_enable(uvm_gpu_t * gpu,uvm_va_space_t * va_space)594 NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
595 {
596 NV_STATUS status;
597
598 UVM_ASSERT(gpu->parent->access_counters_supported);
599
600 uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
601
602 if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
603 status = NV_ERR_INVALID_DEVICE;
604 }
605 else {
606 UvmGpuAccessCntrConfig default_config =
607 {
608 .mimcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
609 .momcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
610 .mimcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
611 .momcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
612 .threshold = g_uvm_access_counter_threshold,
613 };
614 status = gpu_access_counters_enable(gpu, &default_config);
615
616 // No VA space lock is currently held, so the mask is atomically
617 // modified to protect from concurrent enablement of access counters in
618 // another GPU
619 if (status == NV_OK)
620 uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
621 }
622
623 // If this is the first reference taken on access counters, dropping the
624 // ISR lock will enable interrupts.
625 uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
626
627 return status;
628 }
629
uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t * parent_gpu,uvm_va_space_t * va_space)630 void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
631 uvm_va_space_t *va_space)
632 {
633 UVM_ASSERT(parent_gpu->access_counters_supported);
634
635 uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
636
637 if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
638 parent_gpu->id)) {
639 parent_gpu_access_counters_disable(parent_gpu);
640
641 // If this is VA space reconfigured access counters, clear the
642 // ownership to allow for other processes to invoke the reconfiguration
643 if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
644 parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
645 }
646
647 uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
648 }
649
write_get(uvm_parent_gpu_t * parent_gpu,NvU32 get)650 static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
651 {
652 uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
653
654 UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
655
656 // Write get on the GPU only if it's changed.
657 if (access_counters->cached_get == get)
658 return;
659
660 access_counters->cached_get = get;
661
662 // Update get pointer on the GPU
663 UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
664 }
665
access_counter_buffer_flush_locked(uvm_parent_gpu_t * parent_gpu,uvm_gpu_buffer_flush_mode_t flush_mode)666 static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
667 uvm_gpu_buffer_flush_mode_t flush_mode)
668 {
669 NvU32 get;
670 NvU32 put;
671 uvm_spin_loop_t spin;
672 uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
673
674 UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
675 UVM_ASSERT(parent_gpu->access_counters_supported);
676
677 // Read PUT pointer from the GPU if requested
678 UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
679 if (flush_mode == UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT)
680 access_counters->cached_put = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferPut);
681
682 get = access_counters->cached_get;
683 put = access_counters->cached_put;
684
685 while (get != put) {
686 // Wait until valid bit is set
687 UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin);
688
689 parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
690 ++get;
691 if (get == access_counters->max_notifications)
692 get = 0;
693 }
694
695 write_get(parent_gpu, get);
696 }
697
uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t * parent_gpu)698 void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
699 {
700 UVM_ASSERT(parent_gpu->access_counters_supported);
701
702 // Disables access counter interrupts and notification servicing
703 uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
704
705 if (parent_gpu->isr.access_counters.handling_ref_count > 0)
706 access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
707
708 uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
709 }
710
cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t * a,const uvm_access_counter_buffer_entry_t * b)711 static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
712 const uvm_access_counter_buffer_entry_t *b)
713 {
714 int result;
715
716 result = uvm_gpu_phys_addr_cmp(a->virtual_info.instance_ptr, b->virtual_info.instance_ptr);
717 // On Volta+ we need to sort by {instance_ptr + subctx_id} pair since it can
718 // map to a different VA space
719 if (result != 0)
720 return result;
721 return UVM_CMP_DEFAULT(a->virtual_info.ve_id, b->virtual_info.ve_id);
722 }
723
724 // Sort comparator for pointers to GVA access counter notification buffer
725 // entries that sorts by instance pointer
cmp_sort_virt_notifications_by_instance_ptr(const void * _a,const void * _b)726 static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const void *_b)
727 {
728 const uvm_access_counter_buffer_entry_t *a = *(const uvm_access_counter_buffer_entry_t **)_a;
729 const uvm_access_counter_buffer_entry_t *b = *(const uvm_access_counter_buffer_entry_t **)_b;
730
731 UVM_ASSERT(a->address.is_virtual);
732 UVM_ASSERT(b->address.is_virtual);
733
734 return cmp_access_counter_instance_ptr(a, b);
735 }
736
737 // Sort comparator for pointers to GVA access counter notification buffer
738 // entries that sorts by va_space, and fault address.
cmp_sort_virt_notifications_by_va_space_address(const void * _a,const void * _b)739 static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const void *_b)
740 {
741 const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
742 const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
743
744 int result;
745
746 result = UVM_CMP_DEFAULT((*a)->virtual_info.va_space, (*b)->virtual_info.va_space);
747 if (result != 0)
748 return result;
749
750 return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
751 }
752
753 // Sort comparator for pointers to GPA access counter notification buffer
754 // entries that sorts by physical address' aperture
cmp_sort_phys_notifications_by_processor_id(const void * _a,const void * _b)755 static int cmp_sort_phys_notifications_by_processor_id(const void *_a, const void *_b)
756 {
757 const uvm_access_counter_buffer_entry_t *a = *(const uvm_access_counter_buffer_entry_t **)_a;
758 const uvm_access_counter_buffer_entry_t *b = *(const uvm_access_counter_buffer_entry_t **)_b;
759
760 UVM_ASSERT(!a->address.is_virtual);
761 UVM_ASSERT(!b->address.is_virtual);
762
763 return uvm_id_cmp(a->physical_info.resident_id, b->physical_info.resident_id);
764 }
765
766 typedef enum
767 {
768 // Fetch a batch of notifications from the buffer. Stop at the first entry
769 // that is not ready yet
770 NOTIFICATION_FETCH_MODE_BATCH_READY,
771
772 // Fetch all notifications in the buffer before PUT. Wait for all
773 // notifications to become ready
774 NOTIFICATION_FETCH_MODE_ALL,
775 } notification_fetch_mode_t;
776
fetch_access_counter_buffer_entries(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context,notification_fetch_mode_t fetch_mode)777 static NvU32 fetch_access_counter_buffer_entries(uvm_gpu_t *gpu,
778 uvm_access_counter_service_batch_context_t *batch_context,
779 notification_fetch_mode_t fetch_mode)
780 {
781 NvU32 get;
782 NvU32 put;
783 NvU32 notification_index;
784 uvm_access_counter_buffer_entry_t *notification_cache;
785 uvm_spin_loop_t spin;
786 uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
787 NvU32 last_instance_ptr_idx = 0;
788 uvm_aperture_t last_aperture = UVM_APERTURE_PEER_MAX;
789
790 UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
791 UVM_ASSERT(gpu->parent->access_counters_supported);
792
793 notification_cache = batch_context->notification_cache;
794
795 get = access_counters->cached_get;
796
797 // Read put pointer from GPU and cache it
798 if (get == access_counters->cached_put) {
799 access_counters->cached_put = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferPut);
800 }
801
802 put = access_counters->cached_put;
803
804 if (get == put)
805 return 0;
806
807 batch_context->phys.num_notifications = 0;
808 batch_context->virt.num_notifications = 0;
809
810 batch_context->virt.is_single_instance_ptr = true;
811 batch_context->phys.is_single_aperture = true;
812
813 notification_index = 0;
814
815 // Parse until get != put and have enough space to cache.
816 while ((get != put) &&
817 (fetch_mode == NOTIFICATION_FETCH_MODE_ALL || notification_index < access_counters->max_batch_size)) {
818 uvm_access_counter_buffer_entry_t *current_entry = ¬ification_cache[notification_index];
819
820 // We cannot just wait for the last entry (the one pointed by put) to become valid, we have to do it
821 // individually since entries can be written out of order
822 UVM_SPIN_WHILE(!gpu->parent->access_counter_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
823 // We have some entry to work on. Let's do the rest later.
824 if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
825 goto done;
826 }
827
828 // Prevent later accesses being moved above the read of the valid bit
829 smp_mb__after_atomic();
830
831 // Got valid bit set. Let's cache.
832 gpu->parent->access_counter_buffer_hal->parse_entry(gpu->parent, get, current_entry);
833
834 if (current_entry->address.is_virtual) {
835 batch_context->virt.notifications[batch_context->virt.num_notifications++] = current_entry;
836
837 if (batch_context->virt.is_single_instance_ptr) {
838 if (batch_context->virt.num_notifications == 1) {
839 last_instance_ptr_idx = notification_index;
840 }
841 else if (cmp_access_counter_instance_ptr(¬ification_cache[last_instance_ptr_idx],
842 current_entry) != 0) {
843 batch_context->virt.is_single_instance_ptr = false;
844 }
845 }
846 }
847 else {
848 const NvU64 translation_size = get_config_for_type(access_counters, current_entry->counter_type)->translation_size;
849 current_entry->address.address = UVM_ALIGN_DOWN(current_entry->address.address, translation_size);
850
851 batch_context->phys.notifications[batch_context->phys.num_notifications++] = current_entry;
852
853 current_entry->physical_info.resident_id =
854 uvm_gpu_get_processor_id_by_address(gpu, uvm_gpu_phys_address(current_entry->address.aperture,
855 current_entry->address.address));
856
857 if (batch_context->phys.is_single_aperture) {
858 if (batch_context->phys.num_notifications == 1)
859 last_aperture = current_entry->address.aperture;
860 else if (current_entry->address.aperture != last_aperture)
861 batch_context->phys.is_single_aperture = false;
862 }
863
864 if (current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC)
865 UVM_ASSERT(uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
866 else
867 UVM_ASSERT(!uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
868 }
869
870 ++notification_index;
871 ++get;
872 if (get == access_counters->max_notifications)
873 get = 0;
874 }
875
876 done:
877 write_get(gpu->parent, get);
878
879 return notification_index;
880 }
881
translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t * parent_gpu,uvm_access_counter_service_batch_context_t * batch_context)882 static void translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t *parent_gpu,
883 uvm_access_counter_service_batch_context_t *batch_context)
884 {
885 NvU32 i;
886 NV_STATUS status;
887
888 for (i = 0; i < batch_context->virt.num_notifications; ++i) {
889 uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
890
891 if (i == 0 ||
892 cmp_access_counter_instance_ptr(current_entry, batch_context->virt.notifications[i - 1]) != 0) {
893 // If instance_ptr is different, make a new translation. If the
894 // translation fails then va_space will be NULL and the entry will
895 // simply be ignored in subsequent processing.
896 status = uvm_parent_gpu_access_counter_entry_to_va_space(parent_gpu,
897 current_entry,
898 ¤t_entry->virtual_info.va_space);
899 if (status != NV_OK)
900 UVM_ASSERT(current_entry->virtual_info.va_space == NULL);
901 }
902 else {
903 current_entry->virtual_info.va_space = batch_context->virt.notifications[i - 1]->virtual_info.va_space;
904 }
905 }
906 }
907
908 // GVA notifications provide an instance_ptr and ve_id that can be directly
909 // translated to a VA space. In order to minimize translations, we sort the
910 // entries by instance_ptr, va_space and notification address in that order.
preprocess_virt_notifications(uvm_parent_gpu_t * parent_gpu,uvm_access_counter_service_batch_context_t * batch_context)911 static void preprocess_virt_notifications(uvm_parent_gpu_t *parent_gpu,
912 uvm_access_counter_service_batch_context_t *batch_context)
913 {
914 if (!batch_context->virt.is_single_instance_ptr) {
915 sort(batch_context->virt.notifications,
916 batch_context->virt.num_notifications,
917 sizeof(*batch_context->virt.notifications),
918 cmp_sort_virt_notifications_by_instance_ptr,
919 NULL);
920 }
921
922 translate_virt_notifications_instance_ptrs(parent_gpu, batch_context);
923
924 sort(batch_context->virt.notifications,
925 batch_context->virt.num_notifications,
926 sizeof(*batch_context->virt.notifications),
927 cmp_sort_virt_notifications_by_va_space_address,
928 NULL);
929 }
930
931 // GPA notifications provide a physical address and an aperture. Sort
932 // accesses by aperture to try to coalesce operations on the same target
933 // processor.
preprocess_phys_notifications(uvm_access_counter_service_batch_context_t * batch_context)934 static void preprocess_phys_notifications(uvm_access_counter_service_batch_context_t *batch_context)
935 {
936 if (!batch_context->phys.is_single_aperture) {
937 sort(batch_context->phys.notifications,
938 batch_context->phys.num_notifications,
939 sizeof(*batch_context->phys.notifications),
940 cmp_sort_phys_notifications_by_processor_id,
941 NULL);
942 }
943 }
944
notify_tools_and_process_flags(uvm_gpu_t * gpu,uvm_access_counter_buffer_entry_t ** notification_start,NvU32 num_entries,NvU32 flags)945 static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
946 uvm_access_counter_buffer_entry_t **notification_start,
947 NvU32 num_entries,
948 NvU32 flags)
949 {
950 NV_STATUS status = NV_OK;
951
952 if (uvm_enable_builtin_tests) {
953 // TODO: Bug 4310744: [UVM][TOOLS] Attribute access counter tools events
954 // to va_space instead of broadcasting.
955 NvU32 i;
956
957 for (i = 0; i < num_entries; i++)
958 uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
959 }
960
961 if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
962 status = access_counter_clear_notifications(gpu, notification_start, num_entries);
963
964 return status;
965 }
966
service_va_block_locked(uvm_processor_id_t processor,uvm_va_block_t * va_block,uvm_va_block_retry_t * va_block_retry,uvm_service_block_context_t * service_context,uvm_page_mask_t * accessed_pages)967 static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
968 uvm_va_block_t *va_block,
969 uvm_va_block_retry_t *va_block_retry,
970 uvm_service_block_context_t *service_context,
971 uvm_page_mask_t *accessed_pages)
972 {
973 NV_STATUS status = NV_OK;
974 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
975 uvm_range_group_range_iter_t iter;
976 uvm_page_index_t page_index;
977 uvm_page_index_t first_page_index;
978 uvm_page_index_t last_page_index;
979 NvU32 page_count = 0;
980 const uvm_page_mask_t *residency_mask;
981 const bool hmm_migratable = true;
982
983 uvm_assert_mutex_locked(&va_block->lock);
984
985 // GPU VA space could be gone since we received the notification. We handle
986 // this case by skipping service if processor is not in the mapped mask.
987 // Using this approach we also filter out notifications for pages that
988 // moved since they were reported by the GPU. This is fine because:
989 // - If the GPU is still accessing them, it should have faulted
990 // - If the GPU gets remote mappings in the future, we will get new
991 // notifications and we will act accordingly
992 // - If the GPU does not access the pages again, we do not want to migrate
993 // them
994 if (!uvm_processor_mask_test(&va_block->mapped, processor))
995 return NV_OK;
996
997 if (uvm_processor_mask_test(&va_block->resident, processor))
998 residency_mask = uvm_va_block_resident_mask_get(va_block, processor, NUMA_NO_NODE);
999 else
1000 residency_mask = NULL;
1001
1002 first_page_index = PAGES_PER_UVM_VA_BLOCK;
1003 last_page_index = 0;
1004
1005 // Initialize fault service block context
1006 uvm_processor_mask_zero(&service_context->resident_processors);
1007 service_context->read_duplicate_count = 0;
1008 service_context->thrashing_pin_count = 0;
1009
1010 // If the page is already resident on the accessing processor, the
1011 // notification for this page is stale. Skip it.
1012 if (residency_mask)
1013 uvm_page_mask_andnot(accessed_pages, accessed_pages, residency_mask);
1014
1015 uvm_range_group_range_migratability_iter_first(va_space, va_block->start, va_block->end, &iter);
1016
1017 for_each_va_block_page_in_mask(page_index, accessed_pages, va_block) {
1018 uvm_perf_thrashing_hint_t thrashing_hint;
1019 NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
1020 bool read_duplicate = false;
1021 uvm_processor_id_t new_residency;
1022 const uvm_va_policy_t *policy;
1023
1024 // Ensure that the migratability iterator covers the current address
1025 while (iter.end < address)
1026 uvm_range_group_range_migratability_iter_next(va_space, &iter, va_block->end);
1027
1028 UVM_ASSERT(iter.start <= address && iter.end >= address);
1029
1030 // If the range is not migratable, skip the page
1031 if (!iter.migratable)
1032 continue;
1033
1034 thrashing_hint = uvm_perf_thrashing_get_hint(va_block, service_context->block_context, address, processor);
1035 if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
1036 // If the page is throttling, ignore the access counter
1037 // notification
1038 continue;
1039 }
1040 else if (thrashing_hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN) {
1041 if (service_context->thrashing_pin_count++ == 0)
1042 uvm_page_mask_zero(&service_context->thrashing_pin_mask);
1043
1044 uvm_page_mask_set(&service_context->thrashing_pin_mask, page_index);
1045 }
1046
1047 // If the underlying VMA is gone, skip HMM migrations.
1048 if (uvm_va_block_is_hmm(va_block)) {
1049 status = uvm_hmm_find_vma(service_context->block_context->mm,
1050 &service_context->block_context->hmm.vma,
1051 address);
1052 if (status == NV_ERR_INVALID_ADDRESS)
1053 continue;
1054
1055 UVM_ASSERT(status == NV_OK);
1056 }
1057
1058 policy = uvm_va_policy_get(va_block, address);
1059
1060 new_residency = uvm_va_block_select_residency(va_block,
1061 service_context->block_context,
1062 page_index,
1063 processor,
1064 uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
1065 policy,
1066 &thrashing_hint,
1067 UVM_SERVICE_OPERATION_ACCESS_COUNTERS,
1068 hmm_migratable,
1069 &read_duplicate);
1070
1071 if (!uvm_processor_mask_test_and_set(&service_context->resident_processors, new_residency))
1072 uvm_page_mask_zero(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency);
1073
1074 uvm_page_mask_set(&service_context->per_processor_masks[uvm_id_value(new_residency)].new_residency, page_index);
1075
1076 if (page_index < first_page_index)
1077 first_page_index = page_index;
1078 if (page_index > last_page_index)
1079 last_page_index = page_index;
1080
1081 ++page_count;
1082
1083 service_context->access_type[page_index] = UVM_FAULT_ACCESS_TYPE_PREFETCH;
1084 }
1085
1086 // Apply the changes computed in the service block context, if there are
1087 // pages to be serviced
1088 if (page_count > 0) {
1089 uvm_processor_id_t id;
1090 uvm_processor_mask_t *update_processors = &service_context->update_processors;
1091
1092 uvm_processor_mask_and(update_processors, &va_block->resident, &service_context->resident_processors);
1093
1094 // Remove pages that are already resident in the destination processors
1095 for_each_id_in_mask(id, update_processors) {
1096 bool migrate_pages;
1097 uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE);
1098 UVM_ASSERT(residency_mask);
1099
1100 migrate_pages = uvm_page_mask_andnot(&service_context->per_processor_masks[uvm_id_value(id)].new_residency,
1101 &service_context->per_processor_masks[uvm_id_value(id)].new_residency,
1102 residency_mask);
1103
1104 if (!migrate_pages)
1105 uvm_processor_mask_clear(&service_context->resident_processors, id);
1106 }
1107
1108 if (!uvm_processor_mask_empty(&service_context->resident_processors)) {
1109 while (first_page_index <= last_page_index) {
1110 uvm_page_index_t outer = last_page_index + 1;
1111 const uvm_va_policy_t *policy;
1112
1113 if (uvm_va_block_is_hmm(va_block)) {
1114 status = NV_ERR_INVALID_ADDRESS;
1115 if (service_context->block_context->mm) {
1116 status = uvm_hmm_find_policy_vma_and_outer(va_block,
1117 &service_context->block_context->hmm.vma,
1118 first_page_index,
1119 &policy,
1120 &outer);
1121 }
1122 if (status != NV_OK)
1123 break;
1124 }
1125
1126 service_context->region = uvm_va_block_region(first_page_index, outer);
1127 first_page_index = outer;
1128
1129 status = uvm_va_block_service_locked(processor, va_block, va_block_retry, service_context);
1130 if (status != NV_OK)
1131 break;
1132 }
1133 }
1134 }
1135
1136 ++service_context->num_retries;
1137
1138 return status;
1139 }
1140
reverse_mappings_to_va_block_page_mask(uvm_va_block_t * va_block,const uvm_reverse_map_t * reverse_mappings,size_t num_reverse_mappings,uvm_page_mask_t * page_mask)1141 static void reverse_mappings_to_va_block_page_mask(uvm_va_block_t *va_block,
1142 const uvm_reverse_map_t *reverse_mappings,
1143 size_t num_reverse_mappings,
1144 uvm_page_mask_t *page_mask)
1145 {
1146 NvU32 index;
1147
1148 UVM_ASSERT(page_mask);
1149
1150 if (num_reverse_mappings > 0)
1151 UVM_ASSERT(reverse_mappings);
1152
1153 uvm_page_mask_zero(page_mask);
1154
1155 // Populate the mask of accessed pages within the VA Block
1156 for (index = 0; index < num_reverse_mappings; ++index) {
1157 const uvm_reverse_map_t *reverse_map = &reverse_mappings[index];
1158 uvm_va_block_region_t region = reverse_map->region;
1159
1160 UVM_ASSERT(reverse_map->va_block == va_block);
1161
1162 // The VA Block could have been split since we obtained the reverse
1163 // mappings. Clamp the region to the current VA block size, to handle
1164 // the case in which it was split.
1165 region.outer = min(region.outer, (uvm_page_index_t)uvm_va_block_num_cpu_pages(va_block));
1166 region.first = min(region.first, region.outer);
1167
1168 uvm_page_mask_region_fill(page_mask, region);
1169 }
1170 }
1171
service_phys_single_va_block(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context,const uvm_access_counter_buffer_entry_t * current_entry,const uvm_reverse_map_t * reverse_mappings,size_t num_reverse_mappings,NvU32 * out_flags)1172 static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
1173 uvm_access_counter_service_batch_context_t *batch_context,
1174 const uvm_access_counter_buffer_entry_t *current_entry,
1175 const uvm_reverse_map_t *reverse_mappings,
1176 size_t num_reverse_mappings,
1177 NvU32 *out_flags)
1178 {
1179 size_t index;
1180 uvm_va_block_t *va_block = reverse_mappings[0].va_block;
1181 uvm_va_space_t *va_space = NULL;
1182 struct mm_struct *mm = NULL;
1183 NV_STATUS status = NV_OK;
1184 const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
1185 gpu->id: UVM_ID_CPU;
1186
1187 *out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
1188
1189 UVM_ASSERT(num_reverse_mappings > 0);
1190
1191 uvm_mutex_lock(&va_block->lock);
1192 va_space = uvm_va_block_get_va_space_maybe_dead(va_block);
1193 uvm_mutex_unlock(&va_block->lock);
1194
1195 if (va_space) {
1196 uvm_va_block_retry_t va_block_retry;
1197 va_space_access_counters_info_t *va_space_access_counters;
1198 uvm_service_block_context_t *service_context = &batch_context->block_service_context;
1199 uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
1200
1201 // If an mm is registered with the VA space, we have to retain it
1202 // in order to lock it before locking the VA space.
1203 mm = uvm_va_space_mm_retain_lock(va_space);
1204 uvm_va_space_down_read(va_space);
1205
1206 // Re-check that the VA block is valid after taking the VA block lock.
1207 if (uvm_va_block_is_dead(va_block))
1208 goto done;
1209
1210 va_space_access_counters = va_space_access_counters_info_get(va_space);
1211 if (UVM_ID_IS_CPU(processor) && !atomic_read(&va_space_access_counters->params.enable_momc_migrations))
1212 goto done;
1213
1214 if (!UVM_ID_IS_CPU(processor) && !atomic_read(&va_space_access_counters->params.enable_mimc_migrations))
1215 goto done;
1216
1217 service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
1218 service_context->num_retries = 0;
1219
1220 uvm_va_block_context_init(service_context->block_context, mm);
1221
1222 if (uvm_va_block_is_hmm(va_block))
1223 uvm_hmm_migrate_begin_wait(va_block);
1224
1225 uvm_mutex_lock(&va_block->lock);
1226
1227 reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
1228
1229 status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
1230 &va_block_retry,
1231 service_va_block_locked(processor,
1232 va_block,
1233 &va_block_retry,
1234 service_context,
1235 accessed_pages));
1236
1237 uvm_mutex_unlock(&va_block->lock);
1238
1239 if (uvm_va_block_is_hmm(va_block)) {
1240 uvm_hmm_migrate_finish(va_block);
1241
1242 // If the pages could not be migrated, no need to try again,
1243 // this is best effort only.
1244 if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET)
1245 status = NV_OK;
1246 }
1247
1248 if (status == NV_OK)
1249 *out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
1250 }
1251
1252 done:
1253 if (va_space) {
1254 uvm_va_space_up_read(va_space);
1255 uvm_va_space_mm_release_unlock(va_space, mm);
1256 }
1257
1258 // Drop the refcounts taken by the reverse map translation routines
1259 for (index = 0; index < num_reverse_mappings; ++index)
1260 uvm_va_block_release(va_block);
1261
1262 return status;
1263 }
1264
service_phys_va_blocks(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context,const uvm_access_counter_buffer_entry_t * current_entry,const uvm_reverse_map_t * reverse_mappings,size_t num_reverse_mappings,NvU32 * out_flags)1265 static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
1266 uvm_access_counter_service_batch_context_t *batch_context,
1267 const uvm_access_counter_buffer_entry_t *current_entry,
1268 const uvm_reverse_map_t *reverse_mappings,
1269 size_t num_reverse_mappings,
1270 NvU32 *out_flags)
1271 {
1272 NV_STATUS status = NV_OK;
1273 size_t index;
1274
1275 *out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
1276
1277 for (index = 0; index < num_reverse_mappings; ++index) {
1278 NvU32 out_flags_local = 0;
1279 status = service_phys_single_va_block(gpu,
1280 batch_context,
1281 current_entry,
1282 reverse_mappings + index,
1283 1,
1284 &out_flags_local);
1285 if (status != NV_OK)
1286 break;
1287
1288 UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
1289 *out_flags |= out_flags_local;
1290 }
1291
1292 // In the case of failure, drop the refcounts for the remaining reverse mappings
1293 while (++index < num_reverse_mappings)
1294 uvm_va_block_release(reverse_mappings[index].va_block);
1295
1296 return status;
1297 }
1298
1299 // Iterate over all regions set in the given sub_granularity mask
1300 #define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) \
1301 for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)), \
1302 (region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1); \
1303 (region_start) < (num_regions); \
1304 (region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1), \
1305 (region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
1306
1307
are_reverse_mappings_on_single_block(const uvm_reverse_map_t * reverse_mappings,size_t num_reverse_mappings)1308 static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
1309 {
1310 size_t index;
1311 uvm_va_block_t *prev_va_block = NULL;
1312
1313 for (index = 0; index < num_reverse_mappings; ++index) {
1314 uvm_va_block_t *va_block = reverse_mappings[index].va_block;
1315 UVM_ASSERT(va_block);
1316
1317 if (prev_va_block && prev_va_block != va_block)
1318 return false;
1319
1320 prev_va_block = va_block;
1321 }
1322
1323 return true;
1324 }
1325
1326 // Service the given translation range. It will return the count of the reverse
1327 // mappings found during servicing in num_reverse_mappings, even if the function
1328 // doesn't return NV_OK.
service_phys_notification_translation(uvm_gpu_t * gpu,uvm_gpu_t * resident_gpu,uvm_access_counter_service_batch_context_t * batch_context,const uvm_gpu_access_counter_type_config_t * config,const uvm_access_counter_buffer_entry_t * current_entry,NvU64 address,unsigned long sub_granularity,size_t * num_reverse_mappings,NvU32 * out_flags)1329 static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
1330 uvm_gpu_t *resident_gpu,
1331 uvm_access_counter_service_batch_context_t *batch_context,
1332 const uvm_gpu_access_counter_type_config_t *config,
1333 const uvm_access_counter_buffer_entry_t *current_entry,
1334 NvU64 address,
1335 unsigned long sub_granularity,
1336 size_t *num_reverse_mappings,
1337 NvU32 *out_flags)
1338 {
1339 NV_STATUS status;
1340 NvU32 region_start, region_end;
1341
1342 *num_reverse_mappings = 0;
1343
1344 // Get the reverse_map translations for all the regions set in the
1345 // sub_granularity field of the counter.
1346 for_each_sub_granularity_region(region_start,
1347 region_end,
1348 sub_granularity,
1349 config->sub_granularity_regions_per_translation) {
1350 NvU64 local_address = address + region_start * config->sub_granularity_region_size;
1351 NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
1352 uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
1353
1354 // Obtain the virtual addresses of the pages within the reported
1355 // DMA range
1356 if (resident_gpu) {
1357 *num_reverse_mappings += uvm_pmm_gpu_phys_to_virt(&resident_gpu->pmm,
1358 local_address,
1359 local_translation_size,
1360 local_reverse_mappings);
1361 }
1362 else {
1363 *num_reverse_mappings += uvm_pmm_sysmem_mappings_dma_to_virt(&gpu->pmm_reverse_sysmem_mappings,
1364 local_address,
1365 local_translation_size,
1366 local_reverse_mappings,
1367 local_translation_size / PAGE_SIZE);
1368 }
1369 }
1370
1371 if (*num_reverse_mappings == 0)
1372 return NV_OK;
1373
1374 // Service all the translations
1375 if (are_reverse_mappings_on_single_block(batch_context->phys.translations, *num_reverse_mappings)) {
1376 status = service_phys_single_va_block(gpu,
1377 batch_context,
1378 current_entry,
1379 batch_context->phys.translations,
1380 *num_reverse_mappings,
1381 out_flags);
1382 }
1383 else {
1384 status = service_phys_va_blocks(gpu,
1385 batch_context,
1386 current_entry,
1387 batch_context->phys.translations,
1388 *num_reverse_mappings,
1389 out_flags);
1390 }
1391
1392 return status;
1393 }
1394
service_phys_notification(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context,const uvm_access_counter_buffer_entry_t * current_entry,NvU32 * out_flags)1395 static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
1396 uvm_access_counter_service_batch_context_t *batch_context,
1397 const uvm_access_counter_buffer_entry_t *current_entry,
1398 NvU32 *out_flags)
1399 {
1400 NvU64 address;
1401 NvU64 translation_index;
1402 uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
1403 uvm_access_counter_type_t counter_type = current_entry->counter_type;
1404 const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
1405 unsigned long sub_granularity;
1406 size_t total_reverse_mappings = 0;
1407 uvm_gpu_t *resident_gpu = NULL;
1408 NV_STATUS status = NV_OK;
1409 NvU32 flags = 0;
1410
1411 address = current_entry->address.address;
1412 UVM_ASSERT(address % config->translation_size == 0);
1413 sub_granularity = current_entry->sub_granularity;
1414
1415 if (config->rm.granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
1416 sub_granularity = 1;
1417
1418 if (UVM_ID_IS_GPU(current_entry->physical_info.resident_id)) {
1419 resident_gpu = uvm_gpu_get(current_entry->physical_info.resident_id);
1420 UVM_ASSERT(resident_gpu != NULL);
1421
1422 if (gpu != resident_gpu && uvm_gpus_are_nvswitch_connected(gpu, resident_gpu)) {
1423 UVM_ASSERT(address >= resident_gpu->parent->nvswitch_info.fabric_memory_window_start);
1424 address -= resident_gpu->parent->nvswitch_info.fabric_memory_window_start;
1425 }
1426
1427 // On P9 systems, the CPU accesses the reserved heap on vidmem via
1428 // coherent NVLINK mappings. This can trigger notifications that
1429 // fall outside of the allocatable address range. We just drop
1430 // them.
1431 if (address >= resident_gpu->mem_info.max_allocatable_address)
1432 return NV_OK;
1433 }
1434
1435 for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
1436 size_t num_reverse_mappings;
1437 NvU32 out_flags_local = 0;
1438 status = service_phys_notification_translation(gpu,
1439 resident_gpu,
1440 batch_context,
1441 config,
1442 current_entry,
1443 address,
1444 sub_granularity,
1445 &num_reverse_mappings,
1446 &out_flags_local);
1447 total_reverse_mappings += num_reverse_mappings;
1448
1449 UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
1450 flags |= out_flags_local;
1451
1452 if (status != NV_OK)
1453 break;
1454
1455 address += config->translation_size;
1456 sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
1457 }
1458
1459 if (uvm_enable_builtin_tests)
1460 *out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
1461
1462 if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
1463 *out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
1464
1465 return status;
1466 }
1467
1468 // TODO: Bug 2018899: Add statistics for dropped access counter notifications
service_phys_notifications(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context)1469 static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
1470 uvm_access_counter_service_batch_context_t *batch_context)
1471 {
1472 NvU32 i;
1473 uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
1474
1475 UVM_ASSERT(gpu->parent->access_counters_can_use_physical_addresses);
1476
1477 preprocess_phys_notifications(batch_context);
1478
1479 for (i = 0; i < batch_context->phys.num_notifications; ++i) {
1480 NV_STATUS status;
1481 uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
1482 NvU32 flags = 0;
1483
1484 if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
1485 continue;
1486
1487 status = service_phys_notification(gpu, batch_context, current_entry, &flags);
1488
1489 notify_tools_and_process_flags(gpu, ¬ifications[i], 1, flags);
1490
1491 if (status != NV_OK)
1492 return status;
1493 }
1494
1495 return NV_OK;
1496 }
1497
service_notification_va_block_helper(struct mm_struct * mm,uvm_va_block_t * va_block,uvm_processor_id_t processor,uvm_access_counter_service_batch_context_t * batch_context)1498 static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
1499 uvm_va_block_t *va_block,
1500 uvm_processor_id_t processor,
1501 uvm_access_counter_service_batch_context_t *batch_context)
1502 {
1503 uvm_va_block_retry_t va_block_retry;
1504 uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
1505 uvm_service_block_context_t *service_context = &batch_context->block_service_context;
1506
1507 if (uvm_page_mask_empty(accessed_pages))
1508 return NV_OK;
1509
1510 uvm_assert_mutex_locked(&va_block->lock);
1511
1512 service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
1513 service_context->num_retries = 0;
1514
1515 return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
1516 &va_block_retry,
1517 service_va_block_locked(processor,
1518 va_block,
1519 &va_block_retry,
1520 service_context,
1521 accessed_pages));
1522 }
1523
expand_notification_block(uvm_gpu_va_space_t * gpu_va_space,uvm_va_block_t * va_block,uvm_va_block_context_t * va_block_context,uvm_page_mask_t * accessed_pages,const uvm_access_counter_buffer_entry_t * current_entry)1524 static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
1525 uvm_va_block_t *va_block,
1526 uvm_va_block_context_t *va_block_context,
1527 uvm_page_mask_t *accessed_pages,
1528 const uvm_access_counter_buffer_entry_t *current_entry)
1529 {
1530 NvU64 addr;
1531 NvU64 granularity = 0;
1532 uvm_gpu_t *resident_gpu = NULL;
1533 uvm_processor_id_t resident_id;
1534 uvm_page_index_t page_index;
1535 uvm_gpu_t *gpu = gpu_va_space->gpu;
1536 const uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
1537 const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters,
1538 UVM_ACCESS_COUNTER_TYPE_MIMC);
1539
1540 config_granularity_to_bytes(config->rm.granularity, &granularity);
1541
1542 // Granularities other than 2MB can only be enabled by UVM tests. Do nothing
1543 // in that case.
1544 if (granularity != UVM_PAGE_SIZE_2M)
1545 return;
1546
1547 addr = current_entry->address.address;
1548
1549 uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
1550 uvm_assert_mutex_locked(&va_block->lock);
1551
1552 page_index = uvm_va_block_cpu_page_index(va_block, addr);
1553
1554 resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, gpu->id);
1555
1556 // resident_id might be invalid or might already be the same as the GPU
1557 // which received the notification if the memory was already migrated before
1558 // acquiring the locks either during the servicing of previous notifications
1559 // or during faults or because of explicit migrations or if the VA range was
1560 // freed after receiving the notification. Return NV_OK in such cases.
1561 if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
1562 return;
1563
1564 if (UVM_ID_IS_GPU(resident_id))
1565 resident_gpu = uvm_va_space_get_gpu(gpu_va_space->va_space, resident_id);
1566
1567 if (uvm_va_block_get_physical_size(va_block, resident_id, page_index) != granularity) {
1568 uvm_page_mask_set(accessed_pages, page_index);
1569 }
1570 else {
1571 NvU32 region_start;
1572 NvU32 region_end;
1573 unsigned long sub_granularity = current_entry->sub_granularity;
1574 NvU32 num_regions = config->sub_granularity_regions_per_translation;
1575 NvU32 num_sub_pages = config->sub_granularity_region_size / PAGE_SIZE;
1576 uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, NUMA_NO_NODE);
1577
1578 UVM_ASSERT(num_sub_pages >= 1);
1579
1580 // region_start and region_end refer to sub_granularity indices, not
1581 // page_indices.
1582 for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) {
1583 uvm_page_mask_region_fill(accessed_pages,
1584 uvm_va_block_region(region_start * num_sub_pages,
1585 region_end * num_sub_pages));
1586 }
1587
1588 // Remove pages in the va_block which are not resident on resident_id.
1589 // If the GPU is heavily accessing those pages, future access counter
1590 // migrations will migrate them to the GPU.
1591 uvm_page_mask_and(accessed_pages, accessed_pages, resident_mask);
1592 }
1593 }
1594
service_virt_notifications_in_block(uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm,uvm_va_block_t * va_block,uvm_access_counter_service_batch_context_t * batch_context,NvU32 index,NvU32 * out_index)1595 static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
1596 struct mm_struct *mm,
1597 uvm_va_block_t *va_block,
1598 uvm_access_counter_service_batch_context_t *batch_context,
1599 NvU32 index,
1600 NvU32 *out_index)
1601 {
1602 NvU32 i;
1603 NvU32 flags = 0;
1604 NV_STATUS status = NV_OK;
1605 NV_STATUS flags_status;
1606 uvm_gpu_t *gpu = gpu_va_space->gpu;
1607 uvm_va_space_t *va_space = gpu_va_space->va_space;
1608 uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
1609 uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
1610 uvm_service_block_context_t *service_context = &batch_context->block_service_context;
1611
1612 UVM_ASSERT(va_block);
1613 UVM_ASSERT(index < batch_context->virt.num_notifications);
1614
1615 uvm_assert_rwsem_locked(&va_space->lock);
1616
1617 uvm_page_mask_zero(accessed_pages);
1618
1619 uvm_va_block_context_init(service_context->block_context, mm);
1620
1621 uvm_mutex_lock(&va_block->lock);
1622
1623 for (i = index; i < batch_context->virt.num_notifications; i++) {
1624 uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
1625 NvU64 address = current_entry->address.address;
1626
1627 if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end)) {
1628 expand_notification_block(gpu_va_space,
1629 va_block,
1630 batch_context->block_service_context.block_context,
1631 accessed_pages,
1632 current_entry);
1633 }
1634 else {
1635 break;
1636 }
1637 }
1638
1639 *out_index = i;
1640
1641 // Atleast one notification should have been processed.
1642 UVM_ASSERT(index < *out_index);
1643
1644 status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
1645
1646 uvm_mutex_unlock(&va_block->lock);
1647
1648 if (status == NV_OK)
1649 flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
1650
1651 flags_status = notify_tools_and_process_flags(gpu, ¬ifications[index], *out_index - index, flags);
1652
1653 if ((status == NV_OK) && (flags_status != NV_OK))
1654 status = flags_status;
1655
1656 return status;
1657 }
1658
service_virt_notification_ats(uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm,uvm_access_counter_service_batch_context_t * batch_context,NvU32 index,NvU32 * out_index)1659 static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
1660 struct mm_struct *mm,
1661 uvm_access_counter_service_batch_context_t *batch_context,
1662 NvU32 index,
1663 NvU32 *out_index)
1664 {
1665
1666 NvU32 i;
1667 NvU64 base;
1668 NvU64 end;
1669 NvU64 address;
1670 NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
1671 NV_STATUS status = NV_OK;
1672 NV_STATUS flags_status;
1673 struct vm_area_struct *vma = NULL;
1674 uvm_gpu_t *gpu = gpu_va_space->gpu;
1675 uvm_va_space_t *va_space = gpu_va_space->va_space;
1676 uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
1677 uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
1678
1679 UVM_ASSERT(index < batch_context->virt.num_notifications);
1680
1681 uvm_assert_mmap_lock_locked(mm);
1682 uvm_assert_rwsem_locked(&va_space->lock);
1683
1684 address = notifications[index]->address.address;
1685
1686 vma = find_vma_intersection(mm, address, address + 1);
1687 if (!vma) {
1688 // Clear the notification entry to continue receiving access counter
1689 // notifications when a new VMA is allocated in this range.
1690 status = notify_tools_and_process_flags(gpu, ¬ifications[index], 1, flags);
1691 *out_index = index + 1;
1692 return status;
1693 }
1694
1695 base = UVM_VA_BLOCK_ALIGN_DOWN(address);
1696 end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
1697
1698 uvm_page_mask_zero(&ats_context->accessed_mask);
1699
1700 for (i = index; i < batch_context->virt.num_notifications; i++) {
1701 uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
1702 address = current_entry->address.address;
1703
1704 if ((current_entry->virtual_info.va_space == va_space) && (address < end))
1705 uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
1706 else
1707 break;
1708 }
1709
1710 *out_index = i;
1711
1712 // Atleast one notification should have been processed.
1713 UVM_ASSERT(index < *out_index);
1714
1715 // TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
1716 // location is set
1717 // If no pages were actually migrated, don't clear the access counters.
1718 status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
1719 if (status != NV_OK)
1720 flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
1721
1722 flags_status = notify_tools_and_process_flags(gpu, ¬ifications[index], *out_index - index, flags);
1723 if ((status == NV_OK) && (flags_status != NV_OK))
1724 status = flags_status;
1725
1726 return status;
1727 }
1728
service_virt_notifications_batch(uvm_gpu_va_space_t * gpu_va_space,struct mm_struct * mm,uvm_access_counter_service_batch_context_t * batch_context,NvU32 index,NvU32 * out_index)1729 static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
1730 struct mm_struct *mm,
1731 uvm_access_counter_service_batch_context_t *batch_context,
1732 NvU32 index,
1733 NvU32 *out_index)
1734 {
1735 NV_STATUS status;
1736 uvm_va_range_t *va_range;
1737 uvm_va_space_t *va_space = gpu_va_space->va_space;
1738 uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
1739 NvU64 address = current_entry->address.address;
1740
1741 UVM_ASSERT(va_space);
1742
1743 if (mm)
1744 uvm_assert_mmap_lock_locked(mm);
1745
1746 uvm_assert_rwsem_locked(&va_space->lock);
1747
1748 // Virtual address notifications are always 64K aligned
1749 UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
1750
1751 va_range = uvm_va_range_find(va_space, address);
1752 if (va_range) {
1753 // Avoid clearing the entry by default.
1754 NvU32 flags = 0;
1755 uvm_va_block_t *va_block = NULL;
1756
1757 if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
1758 size_t index = uvm_va_range_block_index(va_range, address);
1759
1760 va_block = uvm_va_range_block(va_range, index);
1761
1762 // If the va_range is a managed range, the notification belongs to a
1763 // recently freed va_range if va_block is NULL. If va_block is not
1764 // NULL, service_virt_notifications_in_block will process flags.
1765 // Clear the notification entry to continue receiving notifications
1766 // when a new va_range is allocated in that region.
1767 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
1768 }
1769
1770 if (va_block) {
1771 status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
1772 }
1773 else {
1774 status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
1775 *out_index = index + 1;
1776 }
1777 }
1778 else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
1779 status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
1780 }
1781 else {
1782 NvU32 flags;
1783 uvm_va_block_t *va_block = NULL;
1784
1785 status = uvm_hmm_va_block_find(va_space, address, &va_block);
1786
1787 // TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
1788 // migrations for virtual notifications
1789 //
1790 // - If the va_block is HMM, don't clear the notification since HMM
1791 // migrations are currently disabled.
1792 //
1793 // - If the va_block isn't HMM, the notification belongs to a recently
1794 // freed va_range. Clear the notification entry to continue receiving
1795 // notifications when a new va_range is allocated in this region.
1796 flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;
1797
1798 UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
1799 (status == NV_ERR_INVALID_ADDRESS) ||
1800 uvm_va_block_is_hmm(va_block));
1801
1802 // Clobber status to continue processing the rest of the notifications
1803 // in the batch.
1804 status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
1805
1806 *out_index = index + 1;
1807 }
1808
1809 return status;
1810 }
1811
service_virt_notifications(uvm_gpu_t * gpu,uvm_access_counter_service_batch_context_t * batch_context)1812 static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
1813 uvm_access_counter_service_batch_context_t *batch_context)
1814 {
1815 NvU32 i = 0;
1816 NV_STATUS status = NV_OK;
1817 struct mm_struct *mm = NULL;
1818 uvm_va_space_t *va_space = NULL;
1819 uvm_va_space_t *prev_va_space = NULL;
1820 uvm_gpu_va_space_t *gpu_va_space = NULL;
1821
1822 // TODO: Bug 4299018 : Add support for virtual access counter migrations on
1823 // 4K page sizes.
1824 if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
1825 return notify_tools_and_process_flags(gpu,
1826 batch_context->virt.notifications,
1827 batch_context->virt.num_notifications,
1828 0);
1829 }
1830
1831 preprocess_virt_notifications(gpu->parent, batch_context);
1832
1833 while (i < batch_context->virt.num_notifications) {
1834 uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
1835 va_space = current_entry->virtual_info.va_space;
1836
1837 if (va_space != prev_va_space) {
1838
1839 // New va_space detected, drop locks of the old va_space.
1840 if (prev_va_space) {
1841 uvm_va_space_up_read(prev_va_space);
1842 uvm_va_space_mm_release_unlock(prev_va_space, mm);
1843
1844 mm = NULL;
1845 gpu_va_space = NULL;
1846 }
1847
1848 // Acquire locks for the new va_space.
1849 if (va_space) {
1850 mm = uvm_va_space_mm_retain_lock(va_space);
1851 uvm_va_space_down_read(va_space);
1852
1853 gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
1854 }
1855
1856 prev_va_space = va_space;
1857 }
1858
1859 if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
1860 status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
1861 }
1862 else {
1863 status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
1864 i++;
1865 }
1866
1867 if (status != NV_OK)
1868 break;
1869 }
1870
1871 if (va_space) {
1872 uvm_va_space_up_read(va_space);
1873 uvm_va_space_mm_release_unlock(va_space, mm);
1874 }
1875
1876 return status;
1877 }
1878
1879
uvm_gpu_service_access_counters(uvm_gpu_t * gpu)1880 void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
1881 {
1882 NV_STATUS status = NV_OK;
1883 uvm_access_counter_service_batch_context_t *batch_context = &gpu->parent->access_counter_buffer_info.batch_service_context;
1884
1885 UVM_ASSERT(gpu->parent->access_counters_supported);
1886
1887 if (gpu->parent->access_counter_buffer_info.notifications_ignored_count > 0)
1888 return;
1889
1890 while (1) {
1891 batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(gpu,
1892 batch_context,
1893 NOTIFICATION_FETCH_MODE_BATCH_READY);
1894 if (batch_context->num_cached_notifications == 0)
1895 break;
1896
1897 ++batch_context->batch_id;
1898
1899 if (batch_context->virt.num_notifications) {
1900 status = service_virt_notifications(gpu, batch_context);
1901 if (status != NV_OK)
1902 break;
1903 }
1904
1905 if (batch_context->phys.num_notifications) {
1906 status = service_phys_notifications(gpu, batch_context);
1907 if (status != NV_OK)
1908 break;
1909 }
1910 }
1911
1912 if (status != NV_OK) {
1913 UVM_DBG_PRINT("Error %s servicing access counter notifications on GPU: %s\n",
1914 nvstatusToString(status),
1915 uvm_gpu_name(gpu));
1916 }
1917 }
1918
1919 static const NvU32 g_uvm_access_counters_threshold_max = (1 << 15) - 1;
1920
access_counters_config_from_test_params(const UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS * params,UvmGpuAccessCntrConfig * config)1921 static NV_STATUS access_counters_config_from_test_params(const UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params,
1922 UvmGpuAccessCntrConfig *config)
1923 {
1924 NvU64 tracking_size;
1925 memset(config, 0, sizeof(*config));
1926
1927 if (params->threshold == 0 || params->threshold > g_uvm_access_counters_threshold_max)
1928 return NV_ERR_INVALID_ARGUMENT;
1929
1930 if (config_granularity_to_bytes(params->mimc_granularity, &tracking_size) != NV_OK)
1931 return NV_ERR_INVALID_ARGUMENT;
1932
1933 if (config_granularity_to_bytes(params->momc_granularity, &tracking_size) != NV_OK)
1934 return NV_ERR_INVALID_ARGUMENT;
1935
1936 // Since values for granularity/use limit are shared between tests and
1937 // nv_uvm_types.h, the value will be checked in the call to
1938 // nvUvmInterfaceEnableAccessCntr
1939 config->mimcGranularity = params->mimc_granularity;
1940 config->momcGranularity = params->momc_granularity;
1941
1942 config->mimcUseLimit = params->mimc_use_limit;
1943 config->momcUseLimit = params->momc_use_limit;
1944
1945 config->threshold = params->threshold;
1946
1947 return NV_OK;
1948 }
1949
uvm_va_space_has_access_counter_migrations(uvm_va_space_t * va_space)1950 bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space)
1951 {
1952 va_space_access_counters_info_t *va_space_access_counters = va_space_access_counters_info_get(va_space);
1953
1954 return atomic_read(&va_space_access_counters->params.enable_mimc_migrations);
1955 }
1956
uvm_perf_access_counters_init(void)1957 NV_STATUS uvm_perf_access_counters_init(void)
1958 {
1959 uvm_perf_module_init("perf_access_counters",
1960 UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS,
1961 g_callbacks_access_counters,
1962 ARRAY_SIZE(g_callbacks_access_counters),
1963 &g_module_access_counters);
1964
1965 return NV_OK;
1966 }
1967
uvm_perf_access_counters_exit(void)1968 void uvm_perf_access_counters_exit(void)
1969 {
1970 }
1971
uvm_perf_access_counters_load(uvm_va_space_t * va_space)1972 NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space)
1973 {
1974 va_space_access_counters_info_t *va_space_access_counters;
1975 NV_STATUS status;
1976
1977 status = uvm_perf_module_load(&g_module_access_counters, va_space);
1978 if (status != NV_OK)
1979 return status;
1980
1981 va_space_access_counters = va_space_access_counters_info_create(va_space);
1982 if (!va_space_access_counters)
1983 return NV_ERR_NO_MEMORY;
1984
1985 return NV_OK;
1986 }
1987
uvm_perf_access_counters_unload(uvm_va_space_t * va_space)1988 void uvm_perf_access_counters_unload(uvm_va_space_t *va_space)
1989 {
1990 uvm_perf_module_unload(&g_module_access_counters, va_space);
1991
1992 va_space_access_counters_info_destroy(va_space);
1993 }
1994
uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS * params,struct file * filp)1995 NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
1996 struct file *filp)
1997 {
1998 uvm_va_space_t *va_space = uvm_va_space_get(filp);
1999 uvm_gpu_t *gpu = NULL;
2000
2001 gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
2002 if (!gpu)
2003 return NV_ERR_INVALID_DEVICE;
2004
2005 params->enabled = uvm_parent_gpu_access_counters_required(gpu->parent);
2006
2007 uvm_gpu_release(gpu);
2008
2009 return NV_OK;
2010 }
2011
uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS * params,struct file * filp)2012 NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
2013 {
2014 NV_STATUS status = NV_OK;
2015 uvm_gpu_t *gpu = NULL;
2016 UvmGpuAccessCntrConfig config = {0};
2017 va_space_access_counters_info_t *va_space_access_counters;
2018 uvm_va_space_t *va_space_reconfiguration_owner;
2019 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2020
2021 status = access_counters_config_from_test_params(params, &config);
2022 if (status != NV_OK)
2023 return status;
2024
2025 gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
2026 if (!gpu)
2027 return NV_ERR_INVALID_DEVICE;
2028
2029 if (!gpu->parent->access_counters_supported) {
2030 status = NV_ERR_NOT_SUPPORTED;
2031 goto exit_release_gpu;
2032 }
2033
2034 // ISR lock ensures that we own GET/PUT registers. It disables interrupts
2035 // and ensures that no other thread (nor the top half) will be able to
2036 // re-enable interrupts during reconfiguration.
2037 uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
2038
2039 uvm_va_space_down_read_rm(va_space);
2040
2041 if (!uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
2042 status = NV_ERR_INVALID_STATE;
2043 goto exit_isr_unlock;
2044 }
2045
2046 // Unregistration already started. Fail to avoid an interleaving in which
2047 // access counters end up been enabled on an unregistered GPU:
2048 // (thread 0) uvm_va_space_unregister_gpu disables access counters
2049 // (thread 1) assuming no VA space lock is held yet by the unregistration,
2050 // this function enables access counters and runs to completion,
2051 // returning NV_OK
2052 // (thread 0) uvm_va_space_unregister_gpu takes the VA space lock and
2053 // completes the unregistration
2054 if (uvm_processor_mask_test(&va_space->gpu_unregister_in_progress, gpu->id)) {
2055 status = NV_ERR_INVALID_STATE;
2056 goto exit_isr_unlock;
2057 }
2058
2059 va_space_access_counters = va_space_access_counters_info_get(va_space);
2060
2061 va_space_reconfiguration_owner = gpu->parent->access_counter_buffer_info.reconfiguration_owner;
2062
2063 // If any other VA space has reconfigured access counters on this GPU,
2064 // return error to avoid overwriting its configuration.
2065 if (va_space_reconfiguration_owner && (va_space_reconfiguration_owner != va_space)) {
2066 status = NV_ERR_INVALID_STATE;
2067 goto exit_isr_unlock;
2068 }
2069
2070 if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
2071 status = gpu_access_counters_enable(gpu, &config);
2072
2073 if (status == NV_OK)
2074 uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
2075 else
2076 goto exit_isr_unlock;
2077 }
2078
2079 UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
2080
2081 // Disable counters, and renable with the new configuration.
2082 // Note that we are yielding ownership even when the access counters are
2083 // enabled in at least gpu. This inconsistent state is not visible to other
2084 // threads or VA spaces because of the ISR lock, and it is immediately
2085 // rectified by retaking ownership.
2086 access_counters_yield_ownership(gpu->parent);
2087 status = access_counters_take_ownership(gpu, &config);
2088
2089 // Retaking ownership failed, so RM owns the interrupt.
2090 if (status != NV_OK) {
2091 // The state of any other VA space with access counters enabled is
2092 // corrupt
2093 // TODO: Bug 2419290: Fail reconfiguration if access
2094 // counters are enabled on a different VA space.
2095 if (gpu->parent->isr.access_counters.handling_ref_count > 1) {
2096 UVM_ASSERT_MSG(status == NV_OK,
2097 "Access counters interrupt still owned by RM, other VA spaces may experience failures");
2098 }
2099
2100 uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
2101 parent_gpu_access_counters_disable(gpu->parent);
2102 goto exit_isr_unlock;
2103 }
2104
2105 gpu->parent->access_counter_buffer_info.reconfiguration_owner = va_space;
2106
2107 uvm_va_space_up_read_rm(va_space);
2108 uvm_va_space_down_write(va_space);
2109 atomic_set(&va_space_access_counters->params.enable_mimc_migrations, !!params->enable_mimc_migrations);
2110 atomic_set(&va_space_access_counters->params.enable_momc_migrations, !!params->enable_momc_migrations);
2111 uvm_va_space_up_write(va_space);
2112
2113 exit_isr_unlock:
2114 if (status != NV_OK)
2115 uvm_va_space_up_read_rm(va_space);
2116
2117 uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
2118
2119 exit_release_gpu:
2120 uvm_gpu_release(gpu);
2121
2122 return status;
2123 }
2124
uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS * params,struct file * filp)2125 NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
2126 {
2127 NV_STATUS status = NV_OK;
2128 uvm_gpu_t *gpu = NULL;
2129 uvm_access_counter_buffer_info_t *access_counters;
2130 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2131
2132 if (params->mode >= UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX)
2133 return NV_ERR_INVALID_ARGUMENT;
2134
2135 if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_TARGETED &&
2136 params->counter_type >= UVM_TEST_ACCESS_COUNTER_TYPE_MAX) {
2137 return NV_ERR_INVALID_ARGUMENT;
2138 }
2139
2140 gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
2141 if (!gpu)
2142 return NV_ERR_INVALID_DEVICE;
2143
2144 if (!gpu->parent->access_counters_supported) {
2145 status = NV_ERR_NOT_SUPPORTED;
2146 goto exit_release_gpu;
2147 }
2148
2149 uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
2150
2151 // Access counters not enabled. Nothing to reset
2152 if (gpu->parent->isr.access_counters.handling_ref_count == 0)
2153 goto exit_isr_unlock;
2154
2155 access_counters = &gpu->parent->access_counter_buffer_info;
2156
2157 if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
2158 status = access_counter_clear_all(gpu);
2159 }
2160 else {
2161 uvm_access_counter_buffer_entry_t entry = { 0 };
2162 uvm_access_counter_buffer_entry_t *notification = &entry;
2163
2164 if (params->counter_type == UVM_TEST_ACCESS_COUNTER_TYPE_MIMC)
2165 entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MIMC;
2166 else
2167 entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MOMC;
2168
2169 entry.bank = params->bank;
2170 entry.tag = params->tag;
2171
2172 status = access_counter_clear_notifications(gpu, ¬ification, 1);
2173 }
2174
2175 if (status == NV_OK)
2176 status = uvm_tracker_wait(&access_counters->clear_tracker);
2177
2178 exit_isr_unlock:
2179 uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
2180
2181 exit_release_gpu:
2182 uvm_gpu_release(gpu);
2183
2184 return status;
2185 }
2186
uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t * parent_gpu,bool do_ignore)2187 void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
2188 {
2189 bool change_intr_state = false;
2190
2191 if (!parent_gpu->access_counters_supported)
2192 return;
2193
2194 uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
2195
2196 if (do_ignore) {
2197 if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
2198 change_intr_state = true;
2199 }
2200 else {
2201 UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
2202 if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
2203 change_intr_state = true;
2204 }
2205
2206 if (change_intr_state) {
2207 // We need to avoid an interrupt storm while ignoring notifications. We
2208 // just disable the interrupt.
2209 uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
2210
2211 if (do_ignore)
2212 uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
2213 else
2214 uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
2215
2216 uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
2217
2218 if (!do_ignore)
2219 access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
2220 }
2221
2222 uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
2223 }
2224
uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS * params,struct file * filp)2225 NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
2226 {
2227 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2228 NV_STATUS status = NV_OK;
2229 uvm_gpu_t *gpu = NULL;
2230
2231 gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
2232 if (!gpu)
2233 return NV_ERR_INVALID_DEVICE;
2234
2235 if (gpu->parent->access_counters_supported)
2236 uvm_parent_gpu_access_counters_set_ignore(gpu->parent, params->ignore);
2237 else
2238 status = NV_ERR_NOT_SUPPORTED;
2239
2240 uvm_gpu_release(gpu);
2241 return status;
2242 }
2243