1 /*******************************************************************************
2 Copyright (c) 2016-2023 NVIDIA Corporation
3
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to
6 deal in the Software without restriction, including without limitation the
7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 sell copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
13
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 DEALINGS IN THE SOFTWARE.
21
22 *******************************************************************************/
23
24 #include "uvm_api.h"
25 #include "uvm_global.h"
26 #include "uvm_perf_events.h"
27 #include "uvm_perf_module.h"
28 #include "uvm_perf_thrashing.h"
29 #include "uvm_perf_utils.h"
30 #include "uvm_va_block.h"
31 #include "uvm_va_range.h"
32 #include "uvm_kvmalloc.h"
33 #include "uvm_tools.h"
34 #include "uvm_procfs.h"
35 #include "uvm_test.h"
36
37 // Number of bits for page-granularity time stamps. Currently we ignore the first 6 bits
38 // of the timestamp (i.e. we have 64ns resolution, which is good enough)
39 #define PAGE_THRASHING_LAST_TIME_STAMP_BITS 58
40 #define PAGE_THRASHING_NUM_EVENTS_BITS 3
41
42 #define PAGE_THRASHING_THROTTLING_END_TIME_STAMP_BITS 58
43 #define PAGE_THRASHING_THROTTLING_COUNT_BITS 8
44
45 // Per-page thrashing detection structure.
46 typedef struct
47 {
48 struct
49 {
50 // Last time stamp when a thrashing-related event was recorded
51 NvU64 last_time_stamp : PAGE_THRASHING_LAST_TIME_STAMP_BITS;
52
53 bool has_migration_events : 1;
54
55 bool has_revocation_events : 1;
56
57 // Number of consecutive "thrashing" events (within the configured
58 // thrashing lapse)
59 NvU8 num_thrashing_events : PAGE_THRASHING_NUM_EVENTS_BITS;
60
61 bool pinned : 1;
62 };
63
64 struct
65 {
66 // Deadline for throttled processors to wake up
67 NvU64 throttling_end_time_stamp : PAGE_THRASHING_THROTTLING_END_TIME_STAMP_BITS;
68
69 // Number of times a processor has been throttled. This is used to
70 // determine when the page needs to get pinned. After getting pinned
71 // this field is always 0.
72 NvU8 throttling_count : PAGE_THRASHING_THROTTLING_COUNT_BITS;
73 };
74
75 // Processors accessing this page
76 uvm_processor_mask_t processors;
77
78 // Processors that have been throttled. This must be a subset of processors
79 uvm_processor_mask_t throttled_processors;
80
81 // Memory residency for the page when in pinning phase
82 uvm_processor_id_t pinned_residency_id;
83
84 // Processor not to be throttled in the current throttling period
85 uvm_processor_id_t do_not_throttle_processor_id;
86 } page_thrashing_info_t;
87
88 // Per-VA block thrashing detection structure. This state is protected by the
89 // VA block lock.
90 typedef struct
91 {
92 page_thrashing_info_t *pages;
93
94 NvU16 num_thrashing_pages;
95
96 NvU8 thrashing_reset_count;
97
98 uvm_processor_id_t last_processor;
99
100 NvU64 last_time_stamp;
101
102 NvU64 last_thrashing_time_stamp;
103
104 // Stats
105 NvU32 throttling_count;
106
107 uvm_page_mask_t thrashing_pages;
108
109 struct
110 {
111 NvU32 count;
112
113 uvm_page_mask_t mask;
114
115 // List of pinned pages. This list is only used if the pinning timeout
116 // is not 0.
117 struct list_head list;
118 } pinned_pages;
119 } block_thrashing_info_t;
120
121 // Descriptor for a page that has been pinned due to thrashing. This structure
122 // is only used if the pinning timeout is not 0.
123 typedef struct
124 {
125 uvm_va_block_t *va_block;
126
127 // Page index within va_block
128 uvm_page_index_t page_index;
129
130 // Absolute timestamp after which the page will be unpinned
131 NvU64 deadline;
132
133 // Entry in the per-VA Space list of pinned pages. See
134 // va_space_thrashing_info_t::pinned_pages::list.
135 struct list_head va_space_list_entry;
136
137 // Entry in the per-VA Block list of pinned pages. See
138 // block_thrashing_info_t::pinned_pages::list.
139 struct list_head va_block_list_entry;
140 } pinned_page_t;
141
142 // Per-VA space data structures and policy configuration
143 typedef struct
144 {
145 // Per-VA space accounting of pinned pages that is used to speculatively
146 // unpin pages after the configured timeout. This struct is only used if
147 // the pinning timeout is not 0.
148 struct
149 {
150 // Work descriptor that is executed asynchronously by a helper thread
151 struct delayed_work dwork;
152
153 // List of pinned pages. They are (mostly) ordered by unpin deadline.
154 // New entries are inserted blindly at the tail since the expectation
155 // is that they will have the largest deadline value. However, given
156 // the drift between when multiple threads query their timestamps and
157 // add those pages to the list under the lock, it might not be
158 // strictly ordered. But this is OK since the difference will be very
159 // small and they will be eventually removed from the list.
160 //
161 // Entries are removed when they reach the deadline by the function
162 // configured in dwork. This list is protected by lock.
163 struct list_head list;
164
165 uvm_spinlock_t lock;
166
167 uvm_va_block_context_t *va_block_context;
168
169 // Flag used to avoid scheduling delayed unpinning operations after
170 // uvm_perf_thrashing_stop has been called.
171 bool in_va_space_teardown;
172 } pinned_pages;
173
174 struct
175 {
176 // Whether thrashing mitigation is enabled on this VA space
177 bool enable;
178
179 // true if the thrashing mitigation parameters have been modified using
180 // test ioctls
181 bool test_overrides;
182
183 //
184 // Fields below are the thrashing mitigation parameters on the VA space
185 //
186 unsigned threshold;
187
188 unsigned pin_threshold;
189
190 NvU64 lapse_ns;
191
192 NvU64 nap_ns;
193
194 NvU64 epoch_ns;
195
196 unsigned max_resets;
197
198 NvU64 pin_ns;
199 } params;
200
201 uvm_va_space_t *va_space;
202 } va_space_thrashing_info_t;
203
204 typedef struct
205 {
206 // Entry for the per-processor thrashing_stats file in procfs
207 struct proc_dir_entry *procfs_file;
208
209 // Number of times thrashing is detected
210 atomic64_t num_thrashing;
211
212 // Number of times the processor was throttled while thrashing
213 atomic64_t num_throttle;
214
215 // Number of times a page was pinned on this processor while thrashing
216 atomic64_t num_pin_local;
217
218 // Number of times a page was pinned on a different processor while thrashing
219 atomic64_t num_pin_remote;
220 } processor_thrashing_stats_t;
221
222 // Pre-allocated thrashing stats structure for the CPU. This is only valid if
223 // uvm_procfs_is_debug_enabled() returns true.
224 static processor_thrashing_stats_t g_cpu_thrashing_stats;
225
226 #define PROCESSOR_THRASHING_STATS_INC(va_space, proc, field) \
227 do { \
228 processor_thrashing_stats_t *_processor_stats = thrashing_stats_get_or_null(va_space, proc); \
229 if (_processor_stats) \
230 atomic64_inc(&_processor_stats->field); \
231 } while (0)
232
233 // Global caches for the per-VA block thrashing detection structures
234 static struct kmem_cache *g_va_block_thrashing_info_cache __read_mostly;
235 static struct kmem_cache *g_pinned_page_cache __read_mostly;
236
237 //
238 // Tunables for thrashing detection/prevention (configurable via module parameters)
239 //
240
241 #define UVM_PERF_THRASHING_ENABLE_DEFAULT 1
242
243 // Enable/disable thrashing performance heuristics
244 static unsigned uvm_perf_thrashing_enable = UVM_PERF_THRASHING_ENABLE_DEFAULT;
245
246 #define UVM_PERF_THRASHING_THRESHOLD_DEFAULT 3
247 #define UVM_PERF_THRASHING_THRESHOLD_MAX ((1 << PAGE_THRASHING_NUM_EVENTS_BITS) - 1)
248
249 // Number of consecutive thrashing events to initiate thrashing prevention
250 //
251 // Maximum value is UVM_PERF_THRASHING_THRESHOLD_MAX
252 static unsigned uvm_perf_thrashing_threshold = UVM_PERF_THRASHING_THRESHOLD_DEFAULT;
253
254 #define UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT 10
255 #define UVM_PERF_THRASHING_PIN_THRESHOLD_MAX ((1 << PAGE_THRASHING_THROTTLING_COUNT_BITS) - 1)
256
257 // Number of consecutive throttling operations before trying to map remotely
258 //
259 // Maximum value is UVM_PERF_THRASHING_PIN_THRESHOLD_MAX
260 static unsigned uvm_perf_thrashing_pin_threshold = UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT;
261
262 // TODO: Bug 1768615: [uvm] Automatically tune default values for thrashing
263 // detection/prevention parameters
264 #define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT 500
265 #define UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT_EMULATION (UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 800)
266
267 // Lapse of time in microseconds that determines if two consecutive events on
268 // the same page can be considered thrashing
269 static unsigned uvm_perf_thrashing_lapse_usec = UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT;
270
271 #define UVM_PERF_THRASHING_NAP_DEFAULT 1
272 #define UVM_PERF_THRASHING_NAP_MAX 100
273
274 // Time that the processor being throttled is forbidden to work on the thrashing
275 // page. This value is a multiplier of uvm_perf_thrashing_lapse_usec.
276 static unsigned uvm_perf_thrashing_nap = UVM_PERF_THRASHING_NAP_DEFAULT;
277
278 #define UVM_PERF_THRASHING_EPOCH_DEFAULT 2000
279
280 // Time lapse after which we consider thrashing is no longer happening. This
281 // value is a multiplier of uvm_perf_thrashing_lapse_usec.
282 static unsigned uvm_perf_thrashing_epoch = UVM_PERF_THRASHING_EPOCH_DEFAULT;
283
284 // When pages are pinned and the rest of thrashing processors are mapped
285 // remotely we lose track of who is accessing the page for the rest of
286 // program execution. This can lead to tremendous performance loss if the page
287 // is not thrashing anymore and it is always being accessed remotely.
288 // In order to avoid that scenario, we use a timer that unpins memory after
289 // some time. We use a per-VA space list of pinned pages, sorted by the
290 // deadline at which it will be unmapped from remote processors. Therefore,
291 // next remote access will trigger a fault that will migrate the page.
292 #define UVM_PERF_THRASHING_PIN_DEFAULT 300
293 #define UVM_PERF_THRASHING_PIN_DEFAULT_EMULATION 10
294
295 // Time for which a page remains pinned. This value is a multiplier of
296 // uvm_perf_thrashing_lapse_usec. 0 means that it is pinned forever.
297 static unsigned uvm_perf_thrashing_pin = UVM_PERF_THRASHING_PIN_DEFAULT;
298
299 // Number of times a VA block can be reset back to non-thrashing. This
300 // mechanism tries to avoid performing optimizations on a block that periodically
301 // causes thrashing
302 #define UVM_PERF_THRASHING_MAX_RESETS_DEFAULT 4
303
304 static unsigned uvm_perf_thrashing_max_resets = UVM_PERF_THRASHING_MAX_RESETS_DEFAULT;
305
306 // Module parameters for the tunables
307 module_param(uvm_perf_thrashing_enable, uint, S_IRUGO);
308 module_param(uvm_perf_thrashing_threshold, uint, S_IRUGO);
309 module_param(uvm_perf_thrashing_pin_threshold, uint, S_IRUGO);
310 module_param(uvm_perf_thrashing_lapse_usec, uint, S_IRUGO);
311 module_param(uvm_perf_thrashing_nap, uint, S_IRUGO);
312 module_param(uvm_perf_thrashing_epoch, uint, S_IRUGO);
313 module_param(uvm_perf_thrashing_pin, uint, S_IRUGO);
314 module_param(uvm_perf_thrashing_max_resets, uint, S_IRUGO);
315
316 // See map_remote_on_atomic_fault uvm_va_block.c
317 unsigned uvm_perf_map_remote_on_native_atomics_fault = 0;
318 module_param(uvm_perf_map_remote_on_native_atomics_fault, uint, S_IRUGO);
319
320 // Global post-processed values of the module parameters. They can be overriden
321 // per VA-space.
322 static bool g_uvm_perf_thrashing_enable;
323 static unsigned g_uvm_perf_thrashing_threshold;
324 static unsigned g_uvm_perf_thrashing_pin_threshold;
325 static NvU64 g_uvm_perf_thrashing_lapse_usec;
326 static NvU64 g_uvm_perf_thrashing_nap;
327 static NvU64 g_uvm_perf_thrashing_epoch;
328 static NvU64 g_uvm_perf_thrashing_pin;
329 static unsigned g_uvm_perf_thrashing_max_resets;
330
331 // Helper macros to initialize thrashing parameters from module parameters
332 //
333 // This helper returns whether the type for the parameter is signed
334 #define THRASHING_PARAMETER_IS_SIGNED(v) (((typeof(v)) -1) < 0)
335
336 // Macro that initializes the given thrashing parameter and checks its validity
337 // (within [_mi:_ma]). Otherwise it is initialized with the given default
338 // parameter _d. The user value is read from _v, and the final value is stored
339 // in a variable named g_##_v, so it must be declared, too. Only unsigned
340 // parameters are supported.
341 #define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma) \
342 do { \
343 unsigned v = (_v); \
344 unsigned d = (_d); \
345 unsigned mi = (_mi); \
346 unsigned ma = (_ma); \
347 \
348 BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned)); \
349 BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v)); \
350 \
351 UVM_ASSERT(mi <= ma); \
352 UVM_ASSERT(d >= mi); \
353 UVM_ASSERT(d <= ma); \
354 \
355 if (v >= mi && v <= ma) { \
356 g_##_v = v; \
357 } \
358 else { \
359 pr_info("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
360 \
361 g_##_v = d; \
362 } \
363 } while (0)
364
365 #define INIT_THRASHING_PARAMETER(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, UINT_MAX)
366
367 #define INIT_THRASHING_PARAMETER_MIN(v, d, mi) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, mi, UINT_MAX)
368 #define INIT_THRASHING_PARAMETER_MAX(v, d, ma) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, ma)
369
370 #define INIT_THRASHING_PARAMETER_NONZERO(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 1u, UINT_MAX)
371 #define INIT_THRASHING_PARAMETER_NONZERO_MAX(v, d, ma) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 1u, ma)
372
373 #define INIT_THRASHING_PARAMETER_TOGGLE(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, 1u)
374
375 // Helpers to get/set the time stamp
page_thrashing_get_time_stamp(page_thrashing_info_t * entry)376 static NvU64 page_thrashing_get_time_stamp(page_thrashing_info_t *entry)
377 {
378 return entry->last_time_stamp << (64 - PAGE_THRASHING_LAST_TIME_STAMP_BITS);
379 }
380
page_thrashing_set_time_stamp(page_thrashing_info_t * entry,NvU64 time_stamp)381 static void page_thrashing_set_time_stamp(page_thrashing_info_t *entry, NvU64 time_stamp)
382 {
383 entry->last_time_stamp = time_stamp >> (64 - PAGE_THRASHING_LAST_TIME_STAMP_BITS);
384 }
385
page_thrashing_get_throttling_end_time_stamp(page_thrashing_info_t * entry)386 static NvU64 page_thrashing_get_throttling_end_time_stamp(page_thrashing_info_t *entry)
387 {
388 return entry->throttling_end_time_stamp << (64 - PAGE_THRASHING_THROTTLING_END_TIME_STAMP_BITS);
389 }
390
page_thrashing_set_throttling_end_time_stamp(page_thrashing_info_t * entry,NvU64 time_stamp)391 static void page_thrashing_set_throttling_end_time_stamp(page_thrashing_info_t *entry, NvU64 time_stamp)
392 {
393 entry->throttling_end_time_stamp = time_stamp >> (64 - PAGE_THRASHING_THROTTLING_END_TIME_STAMP_BITS);
394 }
395
396 // Performance heuristics module for thrashing
397 static uvm_perf_module_t g_module_thrashing;
398
399 // Callback declaration for the performance heuristics events
400 static void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
401 static void thrashing_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
402 static void thrashing_block_munmap_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
403
404 static uvm_perf_module_event_callback_desc_t g_callbacks_thrashing[] = {
405 { UVM_PERF_EVENT_BLOCK_DESTROY, thrashing_block_destroy_cb },
406 { UVM_PERF_EVENT_MODULE_UNLOAD, thrashing_block_destroy_cb },
407 { UVM_PERF_EVENT_BLOCK_SHRINK , thrashing_block_destroy_cb },
408 { UVM_PERF_EVENT_BLOCK_MUNMAP , thrashing_block_munmap_cb },
409 { UVM_PERF_EVENT_MIGRATION, thrashing_event_cb },
410 { UVM_PERF_EVENT_REVOCATION, thrashing_event_cb }
411 };
412
nv_procfs_read_thrashing_stats(struct seq_file * s,void * v)413 static int nv_procfs_read_thrashing_stats(struct seq_file *s, void *v)
414 {
415 processor_thrashing_stats_t *processor_stats = (processor_thrashing_stats_t *)s->private;
416
417 UVM_ASSERT(processor_stats);
418
419 if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
420 return -EAGAIN;
421
422 UVM_SEQ_OR_DBG_PRINT(s, "thrashing %llu\n", (NvU64)atomic64_read(&processor_stats->num_thrashing));
423 UVM_SEQ_OR_DBG_PRINT(s, "throttle %llu\n", (NvU64)atomic64_read(&processor_stats->num_throttle));
424 UVM_SEQ_OR_DBG_PRINT(s, "pin_local %llu\n", (NvU64)atomic64_read(&processor_stats->num_pin_local));
425 UVM_SEQ_OR_DBG_PRINT(s, "pin_remote %llu\n", (NvU64)atomic64_read(&processor_stats->num_pin_remote));
426
427 uvm_up_read(&g_uvm_global.pm.lock);
428
429 return 0;
430 }
431
nv_procfs_read_thrashing_stats_entry(struct seq_file * s,void * v)432 static int nv_procfs_read_thrashing_stats_entry(struct seq_file *s, void *v)
433 {
434 UVM_ENTRY_RET(nv_procfs_read_thrashing_stats(s, v));
435 }
436
437 UVM_DEFINE_SINGLE_PROCFS_FILE(thrashing_stats_entry);
438
439 #define THRASHING_STATS_FILE_NAME "thrashing_stats"
440
441 // Initialization/deinitialization of CPU thrashing stats
442 //
cpu_thrashing_stats_init(void)443 static NV_STATUS cpu_thrashing_stats_init(void)
444 {
445 struct proc_dir_entry *cpu_base_dir_entry = uvm_procfs_get_cpu_base_dir();
446
447 if (uvm_procfs_is_debug_enabled()) {
448 UVM_ASSERT(!g_cpu_thrashing_stats.procfs_file);
449 g_cpu_thrashing_stats.procfs_file = NV_CREATE_PROC_FILE(THRASHING_STATS_FILE_NAME,
450 cpu_base_dir_entry,
451 thrashing_stats_entry,
452 &g_cpu_thrashing_stats);
453 if (!g_cpu_thrashing_stats.procfs_file)
454 return NV_ERR_OPERATING_SYSTEM;
455 }
456
457 return NV_OK;
458 }
459
cpu_thrashing_stats_exit(void)460 static void cpu_thrashing_stats_exit(void)
461 {
462 if (g_cpu_thrashing_stats.procfs_file) {
463 UVM_ASSERT(uvm_procfs_is_debug_enabled());
464 proc_remove(g_cpu_thrashing_stats.procfs_file);
465 g_cpu_thrashing_stats.procfs_file = NULL;
466 }
467 }
468
469 // Get the thrashing stats struct for the given VA space if it exists
470 //
471 // No lock may be held. Therefore, the stats must be updated using atomics
gpu_thrashing_stats_get_or_null(uvm_gpu_t * gpu)472 static processor_thrashing_stats_t *gpu_thrashing_stats_get_or_null(uvm_gpu_t *gpu)
473 {
474 return uvm_perf_module_type_data(gpu->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
475 }
476
thrashing_stats_get_or_null(uvm_va_space_t * va_space,uvm_processor_id_t id)477 static processor_thrashing_stats_t *thrashing_stats_get_or_null(uvm_va_space_t *va_space, uvm_processor_id_t id)
478 {
479 if (UVM_ID_IS_CPU(id)) {
480 if (g_cpu_thrashing_stats.procfs_file)
481 return &g_cpu_thrashing_stats;
482
483 return NULL;
484 }
485
486 return gpu_thrashing_stats_get_or_null(uvm_va_space_get_gpu(va_space, id));
487 }
488
489 // Create the thrashing stats struct for the given GPU
490 //
491 // Global lock needs to be held
gpu_thrashing_stats_create(uvm_gpu_t * gpu)492 static NV_STATUS gpu_thrashing_stats_create(uvm_gpu_t *gpu)
493 {
494 processor_thrashing_stats_t *gpu_thrashing;
495
496 uvm_assert_mutex_locked(&g_uvm_global.global_lock);
497 UVM_ASSERT(gpu_thrashing_stats_get_or_null(gpu) == NULL);
498 UVM_ASSERT(uvm_procfs_is_debug_enabled());
499
500 gpu_thrashing = uvm_kvmalloc_zero(sizeof(*gpu_thrashing));
501 if (!gpu_thrashing)
502 return NV_ERR_NO_MEMORY;
503
504 gpu_thrashing->procfs_file = NV_CREATE_PROC_FILE(THRASHING_STATS_FILE_NAME,
505 gpu->procfs.dir,
506 thrashing_stats_entry,
507 gpu_thrashing);
508 if (!gpu_thrashing->procfs_file) {
509 uvm_kvfree(gpu_thrashing);
510 return NV_ERR_OPERATING_SYSTEM;
511 }
512
513 uvm_perf_module_type_set_data(gpu->perf_modules_data, gpu_thrashing, UVM_PERF_MODULE_TYPE_THRASHING);
514
515 return NV_OK;
516 }
517
gpu_thrashing_stats_destroy(uvm_gpu_t * gpu)518 static void gpu_thrashing_stats_destroy(uvm_gpu_t *gpu)
519 {
520 processor_thrashing_stats_t *gpu_thrashing = gpu_thrashing_stats_get_or_null(gpu);
521
522 uvm_assert_mutex_locked(&g_uvm_global.global_lock);
523
524 if (gpu_thrashing) {
525 uvm_perf_module_type_unset_data(gpu->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
526
527 if (gpu_thrashing->procfs_file)
528 proc_remove(gpu_thrashing->procfs_file);
529
530 uvm_kvfree(gpu_thrashing);
531 }
532 }
533
534 // Get the thrashing detection struct for the given VA space if it exists
535 //
536 // The caller must ensure that the va_space cannot be deleted, for the
537 // duration of this call. Holding either the va_block or va_space lock will do
538 // that.
va_space_thrashing_info_get_or_null(uvm_va_space_t * va_space)539 static va_space_thrashing_info_t *va_space_thrashing_info_get_or_null(uvm_va_space_t *va_space)
540 {
541 return uvm_perf_module_type_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
542 }
543
544 // Get the thrashing detection struct for the given VA space. It asserts that
545 // the information has been previously created.
546 //
547 // The caller must ensure that the va_space cannot be deleted, for the
548 // duration of this call. Holding either the va_block or va_space lock will do
549 // that.
va_space_thrashing_info_get(uvm_va_space_t * va_space)550 static va_space_thrashing_info_t *va_space_thrashing_info_get(uvm_va_space_t *va_space)
551 {
552 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get_or_null(va_space);
553 UVM_ASSERT(va_space_thrashing);
554
555 return va_space_thrashing;
556 }
557
va_space_thrashing_info_init_params(va_space_thrashing_info_t * va_space_thrashing)558 static void va_space_thrashing_info_init_params(va_space_thrashing_info_t *va_space_thrashing)
559 {
560 UVM_ASSERT(!va_space_thrashing->params.test_overrides);
561
562 va_space_thrashing->params.enable = g_uvm_perf_thrashing_enable;
563
564 // Snap the thrashing parameters so that they can be tuned per VA space
565 va_space_thrashing->params.threshold = g_uvm_perf_thrashing_threshold;
566 va_space_thrashing->params.pin_threshold = g_uvm_perf_thrashing_pin_threshold;
567
568 // Default thrashing parameters are overriden for simulated/emulated GPUs
569 if (g_uvm_global.num_simulated_devices > 0 &&
570 (g_uvm_perf_thrashing_lapse_usec == UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT)) {
571 va_space_thrashing->params.lapse_ns = UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT_EMULATION * 1000;
572 }
573 else {
574 va_space_thrashing->params.lapse_ns = g_uvm_perf_thrashing_lapse_usec * 1000;
575 }
576
577 va_space_thrashing->params.nap_ns = va_space_thrashing->params.lapse_ns * g_uvm_perf_thrashing_nap;
578 va_space_thrashing->params.epoch_ns = va_space_thrashing->params.lapse_ns * g_uvm_perf_thrashing_epoch;
579
580 if (g_uvm_global.num_simulated_devices > 0 && (g_uvm_perf_thrashing_pin == UVM_PERF_THRASHING_PIN_DEFAULT)) {
581 va_space_thrashing->params.pin_ns = va_space_thrashing->params.lapse_ns
582 * UVM_PERF_THRASHING_PIN_DEFAULT_EMULATION;
583 }
584 else {
585 va_space_thrashing->params.pin_ns = va_space_thrashing->params.lapse_ns * g_uvm_perf_thrashing_pin;
586 }
587
588 va_space_thrashing->params.max_resets = g_uvm_perf_thrashing_max_resets;
589 }
590
591 // Create the thrashing detection struct for the given VA space
592 //
593 // VA space lock needs to be held in write mode
va_space_thrashing_info_create(uvm_va_space_t * va_space)594 static va_space_thrashing_info_t *va_space_thrashing_info_create(uvm_va_space_t *va_space)
595 {
596 va_space_thrashing_info_t *va_space_thrashing;
597 uvm_assert_rwsem_locked_write(&va_space->lock);
598
599 UVM_ASSERT(va_space_thrashing_info_get_or_null(va_space) == NULL);
600
601 va_space_thrashing = uvm_kvmalloc_zero(sizeof(*va_space_thrashing));
602 if (va_space_thrashing) {
603 uvm_va_block_context_t *block_context = uvm_va_block_context_alloc(NULL);
604
605 if (!block_context) {
606 uvm_kvfree(va_space_thrashing);
607 return NULL;
608 }
609
610 va_space_thrashing->pinned_pages.va_block_context = block_context;
611 va_space_thrashing->va_space = va_space;
612
613 va_space_thrashing_info_init_params(va_space_thrashing);
614
615 uvm_perf_module_type_set_data(va_space->perf_modules_data, va_space_thrashing, UVM_PERF_MODULE_TYPE_THRASHING);
616 }
617
618 return va_space_thrashing;
619 }
620
621 // Destroy the thrashing detection struct for the given VA space
622 //
623 // VA space lock needs to be in write mode
va_space_thrashing_info_destroy(uvm_va_space_t * va_space)624 static void va_space_thrashing_info_destroy(uvm_va_space_t *va_space)
625 {
626 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get_or_null(va_space);
627 uvm_assert_rwsem_locked_write(&va_space->lock);
628
629 if (va_space_thrashing) {
630 uvm_perf_module_type_unset_data(va_space->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
631 uvm_va_block_context_free(va_space_thrashing->pinned_pages.va_block_context);
632 uvm_kvfree(va_space_thrashing);
633 }
634 }
635
636 // Get the thrashing detection struct for the given block
thrashing_info_get(uvm_va_block_t * va_block)637 static block_thrashing_info_t *thrashing_info_get(uvm_va_block_t *va_block)
638 {
639 uvm_assert_mutex_locked(&va_block->lock);
640 return uvm_perf_module_type_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
641 }
642
643 // Get the thrashing detection struct for the given block or create it if it
644 // does not exist
thrashing_info_get_create(uvm_va_block_t * va_block)645 static block_thrashing_info_t *thrashing_info_get_create(uvm_va_block_t *va_block)
646 {
647 block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
648
649 BUILD_BUG_ON((1 << 8 * sizeof(block_thrashing->num_thrashing_pages)) < PAGES_PER_UVM_VA_BLOCK);
650 BUILD_BUG_ON((1 << 16) < UVM_ID_MAX_PROCESSORS);
651
652 if (!block_thrashing) {
653 block_thrashing = nv_kmem_cache_zalloc(g_va_block_thrashing_info_cache, NV_UVM_GFP_FLAGS);
654 if (!block_thrashing)
655 goto done;
656
657 block_thrashing->last_processor = UVM_ID_INVALID;
658 INIT_LIST_HEAD(&block_thrashing->pinned_pages.list);
659
660 uvm_perf_module_type_set_data(va_block->perf_modules_data, block_thrashing, UVM_PERF_MODULE_TYPE_THRASHING);
661 }
662
663 done:
664 return block_thrashing;
665 }
666
667 static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 address, NvU64 bytes);
668
uvm_perf_thrashing_info_destroy(uvm_va_block_t * va_block)669 void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block)
670 {
671 block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
672
673 if (block_thrashing) {
674 thrashing_reset_pages_in_region(va_block, va_block->start, uvm_va_block_size(va_block));
675
676 uvm_perf_module_type_unset_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
677
678 uvm_kvfree(block_thrashing->pages);
679 kmem_cache_free(g_va_block_thrashing_info_cache, block_thrashing);
680 }
681 }
682
thrashing_block_destroy_cb(uvm_perf_event_t event_id,uvm_perf_event_data_t * event_data)683 void thrashing_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
684 {
685 uvm_va_block_t *va_block;
686
687 UVM_ASSERT(g_uvm_perf_thrashing_enable);
688
689 UVM_ASSERT(event_id == UVM_PERF_EVENT_BLOCK_DESTROY ||
690 event_id == UVM_PERF_EVENT_BLOCK_SHRINK ||
691 event_id == UVM_PERF_EVENT_MODULE_UNLOAD);
692
693 if (event_id == UVM_PERF_EVENT_BLOCK_DESTROY)
694 va_block = event_data->block_destroy.block;
695 else if (event_id == UVM_PERF_EVENT_BLOCK_SHRINK)
696 va_block = event_data->block_shrink.block;
697 else
698 va_block = event_data->module_unload.block;
699
700 if (!va_block)
701 return;
702
703 uvm_perf_thrashing_info_destroy(va_block);
704 }
705
thrashing_block_munmap_cb(uvm_perf_event_t event_id,uvm_perf_event_data_t * event_data)706 void thrashing_block_munmap_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
707 {
708 uvm_va_block_t *va_block = event_data->block_munmap.block;
709 uvm_va_block_region_t region = event_data->block_munmap.region;
710
711 UVM_ASSERT(g_uvm_perf_thrashing_enable);
712 UVM_ASSERT(event_id == UVM_PERF_EVENT_BLOCK_MUNMAP);
713 UVM_ASSERT(va_block);
714
715 thrashing_reset_pages_in_region(va_block,
716 uvm_va_block_region_start(va_block, region),
717 uvm_va_block_region_size(region));
718 }
719
720 // Sanity checks of the thrashing tracking state
thrashing_state_checks(uvm_va_block_t * va_block,block_thrashing_info_t * block_thrashing,page_thrashing_info_t * page_thrashing,uvm_page_index_t page_index)721 static bool thrashing_state_checks(uvm_va_block_t *va_block,
722 block_thrashing_info_t *block_thrashing,
723 page_thrashing_info_t *page_thrashing,
724 uvm_page_index_t page_index)
725 {
726 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
727 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
728
729 if (!block_thrashing) {
730 UVM_ASSERT(!page_thrashing);
731 return true;
732 }
733
734 UVM_ASSERT(uvm_page_mask_subset(&block_thrashing->pinned_pages.mask, &block_thrashing->thrashing_pages));
735
736 if (page_thrashing) {
737 UVM_ASSERT(block_thrashing->pages);
738 UVM_ASSERT(page_thrashing == &block_thrashing->pages[page_index]);
739 }
740 else {
741 UVM_ASSERT(!uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index));
742 return true;
743 }
744
745 UVM_ASSERT(uvm_processor_mask_subset(&page_thrashing->throttled_processors,
746 &page_thrashing->processors));
747
748 if (uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index))
749 UVM_ASSERT(page_thrashing->num_thrashing_events >= va_space_thrashing->params.threshold);
750
751 if (page_thrashing->pinned) {
752 UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
753 UVM_ASSERT(UVM_ID_IS_VALID(page_thrashing->pinned_residency_id));
754 UVM_ASSERT(page_thrashing->throttling_count == 0);
755 }
756 else {
757 UVM_ASSERT(!uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
758 UVM_ASSERT(UVM_ID_IS_INVALID(page_thrashing->pinned_residency_id));
759
760 if (!uvm_processor_mask_empty(&page_thrashing->throttled_processors)) {
761 UVM_ASSERT(page_thrashing->throttling_count > 0);
762 UVM_ASSERT(uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index));
763 }
764 }
765
766 return true;
767 }
768
769 // Update throttling heuristics. Mainly check if a new throttling period has
770 // started and choose the next processor not to be throttled. This function
771 // is executed before the thrashing mitigation logic kicks in.
thrashing_throttle_update(va_space_thrashing_info_t * va_space_thrashing,uvm_va_block_t * va_block,page_thrashing_info_t * page_thrashing,uvm_processor_id_t processor,NvU64 time_stamp)772 static void thrashing_throttle_update(va_space_thrashing_info_t *va_space_thrashing,
773 uvm_va_block_t *va_block,
774 page_thrashing_info_t *page_thrashing,
775 uvm_processor_id_t processor,
776 NvU64 time_stamp)
777 {
778 NvU64 current_end_time_stamp = page_thrashing_get_throttling_end_time_stamp(page_thrashing);
779
780 uvm_assert_mutex_locked(&va_block->lock);
781
782 if (time_stamp > current_end_time_stamp) {
783 NvU64 throttling_end_time_stamp = time_stamp + va_space_thrashing->params.nap_ns;
784 page_thrashing_set_throttling_end_time_stamp(page_thrashing, throttling_end_time_stamp);
785
786 // Avoid choosing the same processor in consecutive thrashing periods
787 if (uvm_id_equal(page_thrashing->do_not_throttle_processor_id, processor))
788 page_thrashing->do_not_throttle_processor_id = UVM_ID_INVALID;
789 else
790 page_thrashing->do_not_throttle_processor_id = processor;
791 }
792 else if (UVM_ID_IS_INVALID(page_thrashing->do_not_throttle_processor_id)) {
793 page_thrashing->do_not_throttle_processor_id = processor;
794 }
795 }
796
797 // Throttle the execution of a processor. If this is the first processor being
798 // throttled for a throttling period, compute the time stamp until which the
799 // rest of processors will be throttled on fault.
800 //
801 // - Page may be pinned (possible in thrashing due to revocation, such as
802 // in system-wide atomics)
803 // - Requesting processor must not be throttled at this point.
804 //
thrashing_throttle_processor(uvm_va_block_t * va_block,block_thrashing_info_t * block_thrashing,page_thrashing_info_t * page_thrashing,uvm_page_index_t page_index,uvm_processor_id_t processor)805 static void thrashing_throttle_processor(uvm_va_block_t *va_block,
806 block_thrashing_info_t *block_thrashing,
807 page_thrashing_info_t *page_thrashing,
808 uvm_page_index_t page_index,
809 uvm_processor_id_t processor)
810 {
811 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
812 NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
813
814 uvm_assert_mutex_locked(&va_block->lock);
815
816 UVM_ASSERT(!uvm_id_equal(processor, page_thrashing->do_not_throttle_processor_id));
817
818 if (!uvm_processor_mask_test_and_set(&page_thrashing->throttled_processors, processor)) {
819 // CPU is throttled by sleeping. This is done in uvm_vm_fault so it
820 // drops the VA block and VA space locks. Throttling start/end events
821 // are recorded around the sleep calls.
822 if (UVM_ID_IS_GPU(processor))
823 uvm_tools_record_throttling_start(va_space, address, processor);
824
825 if (!page_thrashing->pinned)
826 UVM_PERF_SATURATING_INC(page_thrashing->throttling_count);
827
828 UVM_PERF_SATURATING_INC(block_thrashing->throttling_count);
829 }
830
831 UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
832 }
833
834 // Stop throttling on the given processor. If this is the last processor being
835 // throttled for a throttling period, it will clear the throttling period.
836 //
837 // - Page may be pinned (possible in thrashing due to revocation, such as
838 // in system-wide atomics)
839 // - Requesting processor must be throttled at this point.
840 //
thrashing_throttle_end_processor(uvm_va_block_t * va_block,block_thrashing_info_t * block_thrashing,page_thrashing_info_t * page_thrashing,uvm_page_index_t page_index,uvm_processor_id_t processor)841 static void thrashing_throttle_end_processor(uvm_va_block_t *va_block,
842 block_thrashing_info_t *block_thrashing,
843 page_thrashing_info_t *page_thrashing,
844 uvm_page_index_t page_index,
845 uvm_processor_id_t processor)
846 {
847 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
848 NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
849
850 UVM_ASSERT(uvm_processor_mask_test(&page_thrashing->throttled_processors, processor));
851 uvm_processor_mask_clear(&page_thrashing->throttled_processors, processor);
852 if (uvm_processor_mask_empty(&page_thrashing->throttled_processors))
853 page_thrashing_set_throttling_end_time_stamp(page_thrashing, 0);
854
855 // See comment regarding throttling start/end events for CPU in
856 // thrashing_throttle_processor
857 if (UVM_ID_IS_GPU(processor))
858 uvm_tools_record_throttling_end(va_space, address, processor);
859
860 UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
861 }
862
863 // Clear the throttling state for all processors. This is used while
864 // transitioning to pinned state and during thrashing information reset.
thrashing_throttling_reset_page(uvm_va_block_t * va_block,block_thrashing_info_t * block_thrashing,page_thrashing_info_t * page_thrashing,uvm_page_index_t page_index)865 static void thrashing_throttling_reset_page(uvm_va_block_t *va_block,
866 block_thrashing_info_t *block_thrashing,
867 page_thrashing_info_t *page_thrashing,
868 uvm_page_index_t page_index)
869 {
870 uvm_processor_id_t processor_id;
871
872 for_each_id_in_mask(processor_id, &page_thrashing->throttled_processors) {
873 thrashing_throttle_end_processor(va_block,
874 block_thrashing,
875 page_thrashing,
876 page_index,
877 processor_id);
878 }
879
880 UVM_ASSERT(uvm_processor_mask_empty(&page_thrashing->throttled_processors));
881 }
882
883 // Find the pinned page descriptor for the given page index. Return NULL if the
884 // page is not pinned.
find_pinned_page(block_thrashing_info_t * block_thrashing,uvm_page_index_t page_index)885 static pinned_page_t *find_pinned_page(block_thrashing_info_t *block_thrashing, uvm_page_index_t page_index)
886 {
887 pinned_page_t *pinned_page;
888
889 list_for_each_entry(pinned_page, &block_thrashing->pinned_pages.list, va_block_list_entry) {
890 if (pinned_page->page_index == page_index)
891 return pinned_page;
892 }
893
894 return NULL;
895 }
896
897 // Pin a page on the specified processor. All thrashing processors will be
898 // mapped remotely on this location, when possible
899 //
900 // - Requesting processor cannot be throttled
901 //
thrashing_pin_page(va_space_thrashing_info_t * va_space_thrashing,uvm_va_block_t * va_block,uvm_va_block_context_t * va_block_context,block_thrashing_info_t * block_thrashing,page_thrashing_info_t * page_thrashing,uvm_page_index_t page_index,NvU64 time_stamp,uvm_processor_id_t residency,uvm_processor_id_t requester)902 static NV_STATUS thrashing_pin_page(va_space_thrashing_info_t *va_space_thrashing,
903 uvm_va_block_t *va_block,
904 uvm_va_block_context_t *va_block_context,
905 block_thrashing_info_t *block_thrashing,
906 page_thrashing_info_t *page_thrashing,
907 uvm_page_index_t page_index,
908 NvU64 time_stamp,
909 uvm_processor_id_t residency,
910 uvm_processor_id_t requester)
911 {
912 uvm_processor_mask_t *current_residency = &va_block_context->scratch_processor_mask;
913
914 uvm_assert_mutex_locked(&va_block->lock);
915 UVM_ASSERT(!uvm_processor_mask_test(&page_thrashing->throttled_processors, requester));
916
917 uvm_va_block_page_resident_processors(va_block, page_index, current_residency);
918
919 // If we are pinning the page for the first time or we are pinning it on a
920 // different location that the current location, reset the throttling state
921 // to make sure that we flush any pending ThrottlingEnd events.
922 if (!page_thrashing->pinned || !uvm_processor_mask_test(current_residency, residency))
923 thrashing_throttling_reset_page(va_block, block_thrashing, page_thrashing, page_index);
924
925 if (!page_thrashing->pinned) {
926 if (va_space_thrashing->params.pin_ns > 0) {
927 pinned_page_t *pinned_page = nv_kmem_cache_zalloc(g_pinned_page_cache, NV_UVM_GFP_FLAGS);
928 if (!pinned_page)
929 return NV_ERR_NO_MEMORY;
930
931 pinned_page->va_block = va_block;
932 pinned_page->page_index = page_index;
933 pinned_page->deadline = time_stamp + va_space_thrashing->params.pin_ns;
934
935 uvm_spin_lock(&va_space_thrashing->pinned_pages.lock);
936
937 list_add_tail(&pinned_page->va_space_list_entry, &va_space_thrashing->pinned_pages.list);
938 list_add_tail(&pinned_page->va_block_list_entry, &block_thrashing->pinned_pages.list);
939
940 // We only schedule the delayed work if the list was empty before
941 // adding this page. Otherwise, we just add it to the list. The
942 // unpinning helper will remove from the list those pages with
943 // deadline prior to its wakeup timestamp and will reschedule
944 // itself if there are remaining pages in the list.
945 if (list_is_singular(&va_space_thrashing->pinned_pages.list) &&
946 !va_space_thrashing->pinned_pages.in_va_space_teardown) {
947 int scheduled;
948 scheduled = schedule_delayed_work(&va_space_thrashing->pinned_pages.dwork,
949 usecs_to_jiffies(va_space_thrashing->params.pin_ns / 1000));
950 UVM_ASSERT(scheduled != 0);
951 }
952
953 uvm_spin_unlock(&va_space_thrashing->pinned_pages.lock);
954 }
955
956 page_thrashing->throttling_count = 0;
957 page_thrashing->pinned = true;
958 UVM_PERF_SATURATING_INC(block_thrashing->pinned_pages.count);
959 uvm_page_mask_set(&block_thrashing->pinned_pages.mask, page_index);
960 }
961
962 page_thrashing->pinned_residency_id = residency;
963
964 UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
965
966 return NV_OK;
967 }
968
969 // Unpin a page. This function just clears the pinning tracking state, and does
970 // not remove remote mappings on the page. Callers will need to do it manually
971 // BEFORE calling this function, if so desired.
972 // - Page must be pinned
973 //
thrashing_unpin_page(va_space_thrashing_info_t * va_space_thrashing,uvm_va_block_t * va_block,block_thrashing_info_t * block_thrashing,page_thrashing_info_t * page_thrashing,uvm_page_index_t page_index)974 static void thrashing_unpin_page(va_space_thrashing_info_t *va_space_thrashing,
975 uvm_va_block_t *va_block,
976 block_thrashing_info_t *block_thrashing,
977 page_thrashing_info_t *page_thrashing,
978 uvm_page_index_t page_index)
979 {
980 uvm_assert_mutex_locked(&va_block->lock);
981 UVM_ASSERT(page_thrashing->pinned);
982
983 if (va_space_thrashing->params.pin_ns > 0) {
984 bool do_free = false;
985 pinned_page_t *pinned_page = find_pinned_page(block_thrashing, page_index);
986
987 UVM_ASSERT(pinned_page);
988 UVM_ASSERT(pinned_page->page_index == page_index);
989 UVM_ASSERT(pinned_page->va_block == va_block);
990
991 // The va_space_list_entry and va_block_list_entry have special
992 // meanings here:
993 // - va_space_list_entry: when the delayed unpin worker removes the
994 // pinned_page from this list, it takes the ownership of the page and
995 // is in charge of freeing it.
996 // - va_block_list_entry: by removing the page from this list,
997 // thrashing_unpin_page tells the unpin delayed worker to skip
998 // unpinning that page.
999 uvm_spin_lock(&va_space_thrashing->pinned_pages.lock);
1000 list_del_init(&pinned_page->va_block_list_entry);
1001
1002 if (!list_empty(&pinned_page->va_space_list_entry)) {
1003 do_free = true;
1004 list_del_init(&pinned_page->va_space_list_entry);
1005
1006 if (list_empty(&va_space_thrashing->pinned_pages.list))
1007 cancel_delayed_work(&va_space_thrashing->pinned_pages.dwork);
1008 }
1009
1010 uvm_spin_unlock(&va_space_thrashing->pinned_pages.lock);
1011
1012 if (do_free)
1013 kmem_cache_free(g_pinned_page_cache, pinned_page);
1014 }
1015
1016 page_thrashing->pinned_residency_id = UVM_ID_INVALID;
1017 page_thrashing->pinned = false;
1018 uvm_page_mask_clear(&block_thrashing->pinned_pages.mask, page_index);
1019
1020 UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
1021 }
1022
thrashing_detected(uvm_va_block_t * va_block,block_thrashing_info_t * block_thrashing,page_thrashing_info_t * page_thrashing,uvm_page_index_t page_index,uvm_processor_id_t processor_id)1023 static void thrashing_detected(uvm_va_block_t *va_block,
1024 block_thrashing_info_t *block_thrashing,
1025 page_thrashing_info_t *page_thrashing,
1026 uvm_page_index_t page_index,
1027 uvm_processor_id_t processor_id)
1028 {
1029 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1030 NvU64 address = uvm_va_block_cpu_page_address(va_block, page_index);
1031
1032 // Thrashing detected, record the event
1033 uvm_tools_record_thrashing(va_space, address, PAGE_SIZE, &page_thrashing->processors);
1034 if (!uvm_page_mask_test_and_set(&block_thrashing->thrashing_pages, page_index))
1035 ++block_thrashing->num_thrashing_pages;
1036
1037 PROCESSOR_THRASHING_STATS_INC(va_space, processor_id, num_thrashing);
1038
1039 UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
1040 }
1041
1042 // Clear the thrashing information for the given page. This function does not
1043 // unmap remote mappings on the page. Callers will need to do it BEFORE calling
1044 // this function, if so desired
thrashing_reset_page(va_space_thrashing_info_t * va_space_thrashing,uvm_va_block_t * va_block,block_thrashing_info_t * block_thrashing,uvm_page_index_t page_index)1045 static void thrashing_reset_page(va_space_thrashing_info_t *va_space_thrashing,
1046 uvm_va_block_t *va_block,
1047 block_thrashing_info_t *block_thrashing,
1048 uvm_page_index_t page_index)
1049 {
1050 page_thrashing_info_t *page_thrashing = &block_thrashing->pages[page_index];
1051 uvm_assert_mutex_locked(&va_block->lock);
1052
1053 UVM_ASSERT(block_thrashing->num_thrashing_pages > 0);
1054 UVM_ASSERT(uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index));
1055 UVM_ASSERT(page_thrashing->num_thrashing_events > 0);
1056
1057 thrashing_throttling_reset_page(va_block, block_thrashing, page_thrashing, page_index);
1058 UVM_ASSERT(uvm_processor_mask_empty(&page_thrashing->throttled_processors));
1059
1060 if (page_thrashing->pinned)
1061 thrashing_unpin_page(va_space_thrashing, va_block, block_thrashing, page_thrashing, page_index);
1062
1063 page_thrashing->last_time_stamp = 0;
1064 page_thrashing->has_migration_events = 0;
1065 page_thrashing->has_revocation_events = 0;
1066 page_thrashing->num_thrashing_events = 0;
1067 uvm_processor_mask_zero(&page_thrashing->processors);
1068
1069 if (uvm_page_mask_test_and_clear(&block_thrashing->thrashing_pages, page_index))
1070 --block_thrashing->num_thrashing_pages;
1071
1072 UVM_ASSERT(thrashing_state_checks(va_block, block_thrashing, page_thrashing, page_index));
1073 }
1074
1075 // Call thrashing_reset_page for all the thrashing pages in the region
1076 // described by address and bytes
thrashing_reset_pages_in_region(uvm_va_block_t * va_block,NvU64 address,NvU64 bytes)1077 static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 address, NvU64 bytes)
1078 {
1079 uvm_page_index_t page_index;
1080 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1081 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
1082 block_thrashing_info_t *block_thrashing = NULL;
1083 uvm_va_block_region_t region = uvm_va_block_region_from_start_size(va_block, address, bytes);
1084
1085 block_thrashing = thrashing_info_get(va_block);
1086 if (!block_thrashing || !block_thrashing->pages)
1087 return;
1088
1089 // Update all pages in the region
1090 for_each_va_block_page_in_region_mask(page_index, &block_thrashing->thrashing_pages, region)
1091 thrashing_reset_page(va_space_thrashing, va_block, block_thrashing, page_index);
1092 }
1093
1094
1095 // Unmap remote mappings from the given processors on the pinned pages
1096 // described by region and block_thrashing->pinned pages.
unmap_remote_pinned_pages(uvm_va_block_t * va_block,uvm_va_block_context_t * va_block_context,block_thrashing_info_t * block_thrashing,uvm_va_block_region_t region,const uvm_processor_mask_t * unmap_processors)1097 static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
1098 uvm_va_block_context_t *va_block_context,
1099 block_thrashing_info_t *block_thrashing,
1100 uvm_va_block_region_t region,
1101 const uvm_processor_mask_t *unmap_processors)
1102 {
1103 NV_STATUS status = NV_OK;
1104 NV_STATUS tracker_status;
1105 uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
1106 uvm_processor_id_t processor_id;
1107 const uvm_va_policy_t *policy = uvm_va_policy_get(va_block, uvm_va_block_region_start(va_block, region));
1108
1109 uvm_assert_mutex_locked(&va_block->lock);
1110
1111 for_each_id_in_mask(processor_id, unmap_processors) {
1112 UVM_ASSERT(uvm_id_equal(processor_id, policy->preferred_location) ||
1113 !uvm_processor_mask_test(&policy->accessed_by, processor_id));
1114
1115 if (uvm_processor_mask_test(&va_block->resident, processor_id)) {
1116 const uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, processor_id, NUMA_NO_NODE);
1117
1118 if (!uvm_page_mask_andnot(&va_block_context->caller_page_mask,
1119 &block_thrashing->pinned_pages.mask,
1120 resident_mask))
1121 continue;
1122 }
1123 else {
1124 uvm_page_mask_copy(&va_block_context->caller_page_mask, &block_thrashing->pinned_pages.mask);
1125 }
1126
1127 status = uvm_va_block_unmap(va_block,
1128 va_block_context,
1129 processor_id,
1130 region,
1131 &va_block_context->caller_page_mask,
1132 &local_tracker);
1133 if (status != NV_OK)
1134 break;
1135 }
1136
1137 tracker_status = uvm_tracker_add_tracker_safe(&va_block->tracker, &local_tracker);
1138 if (status == NV_OK)
1139 status = tracker_status;
1140
1141 uvm_tracker_deinit(&local_tracker);
1142
1143 return status;
1144 }
1145
uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t * va_block,uvm_va_block_context_t * va_block_context,uvm_va_block_region_t region)1146 NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_block,
1147 uvm_va_block_context_t *va_block_context,
1148 uvm_va_block_region_t region)
1149 {
1150 block_thrashing_info_t *block_thrashing;
1151 uvm_processor_mask_t *unmap_processors = &va_block_context->unmap_processors_mask;
1152 const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
1153
1154 uvm_assert_mutex_locked(&va_block->lock);
1155
1156 block_thrashing = thrashing_info_get(va_block);
1157 if (!block_thrashing || !block_thrashing->pages)
1158 return NV_OK;
1159
1160 if (uvm_page_mask_empty(&block_thrashing->pinned_pages.mask))
1161 return NV_OK;
1162
1163 // Unmap all mapped processors (that are not SetAccessedBy) with
1164 // no copy of the page
1165 uvm_processor_mask_andnot(unmap_processors, &va_block->mapped, &policy->accessed_by);
1166
1167 return unmap_remote_pinned_pages(va_block, va_block_context, block_thrashing, region, unmap_processors);
1168 }
1169
1170 // Check that we are not migrating pages away from its pinned location and
1171 // that we are not prefetching thrashing pages.
migrating_wrong_pages(uvm_va_block_t * va_block,NvU64 address,NvU64 bytes,uvm_processor_id_t proc_id,uvm_make_resident_cause_t cause)1172 static bool migrating_wrong_pages(uvm_va_block_t *va_block,
1173 NvU64 address,
1174 NvU64 bytes,
1175 uvm_processor_id_t proc_id,
1176 uvm_make_resident_cause_t cause)
1177 {
1178 uvm_page_index_t page_index;
1179 block_thrashing_info_t *block_thrashing = NULL;
1180 uvm_va_block_region_t region = uvm_va_block_region_from_start_size(va_block, address, bytes);
1181
1182 block_thrashing = thrashing_info_get(va_block);
1183 if (!block_thrashing || !block_thrashing->pages)
1184 return false;
1185
1186 for_each_va_block_page_in_region(page_index, region) {
1187 page_thrashing_info_t *page_thrashing = &block_thrashing->pages[page_index];
1188 UVM_ASSERT_MSG(!page_thrashing->pinned || uvm_id_equal(proc_id, page_thrashing->pinned_residency_id),
1189 "Migrating to %u instead of %u\n",
1190 uvm_id_value(proc_id), uvm_id_value(page_thrashing->pinned_residency_id));
1191 if (cause == UVM_MAKE_RESIDENT_CAUSE_PREFETCH)
1192 UVM_ASSERT(!uvm_page_mask_test(&block_thrashing->thrashing_pages, page_index));
1193 }
1194
1195 return false;
1196 }
1197
is_migration_pinned_pages_update(uvm_va_block_t * va_block,const uvm_perf_event_data_t * event_data,NvU64 address,NvU64 bytes)1198 static bool is_migration_pinned_pages_update(uvm_va_block_t *va_block,
1199 const uvm_perf_event_data_t *event_data,
1200 NvU64 address,
1201 NvU64 bytes)
1202 {
1203 const block_thrashing_info_t *block_thrashing = NULL;
1204 uvm_va_block_region_t region = uvm_va_block_region_from_start_size(va_block, address, bytes);
1205 bool ret;
1206
1207 if (event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT &&
1208 event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER) {
1209 return false;
1210 }
1211
1212 block_thrashing = thrashing_info_get(va_block);
1213 if (!block_thrashing || !block_thrashing->pages)
1214 return false;
1215
1216 ret = uvm_page_mask_region_full(&block_thrashing->pinned_pages.mask, region);
1217 if (ret) {
1218 uvm_page_index_t page_index;
1219 for_each_va_block_page_in_region(page_index, region) {
1220 page_thrashing_info_t *page_thrashing = &block_thrashing->pages[page_index];
1221 UVM_ASSERT(uvm_id_equal(page_thrashing->pinned_residency_id, event_data->migration.dst));
1222 }
1223 }
1224
1225 return ret;
1226 }
1227
1228 // This function processes migration/revocation events and determines if the
1229 // affected pages are thrashing or not.
thrashing_event_cb(uvm_perf_event_t event_id,uvm_perf_event_data_t * event_data)1230 void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
1231 {
1232 va_space_thrashing_info_t *va_space_thrashing;
1233 block_thrashing_info_t *block_thrashing = NULL;
1234 uvm_va_block_t *va_block;
1235 uvm_va_space_t *va_space;
1236 NvU64 address;
1237 NvU64 bytes;
1238 uvm_processor_id_t processor_id;
1239 uvm_page_index_t page_index;
1240 NvU64 time_stamp;
1241 uvm_va_block_region_t region;
1242 uvm_read_duplication_policy_t read_duplication;
1243
1244 UVM_ASSERT(g_uvm_perf_thrashing_enable);
1245
1246 UVM_ASSERT(event_id == UVM_PERF_EVENT_MIGRATION || event_id == UVM_PERF_EVENT_REVOCATION);
1247
1248 if (event_id == UVM_PERF_EVENT_MIGRATION) {
1249 va_block = event_data->migration.block;
1250 address = event_data->migration.address;
1251 bytes = event_data->migration.bytes;
1252 processor_id = event_data->migration.dst;
1253
1254 // Skip the thrashing detection logic on eviction as we cannot take
1255 // the VA space lock
1256 if (event_data->migration.cause == UVM_MAKE_RESIDENT_CAUSE_EVICTION)
1257 return;
1258
1259 // Do not perform checks during the first part of staging copies
1260 if (!uvm_id_equal(event_data->migration.dst, event_data->migration.make_resident_context->dest_id))
1261 return;
1262
1263 va_space = uvm_va_block_get_va_space(va_block);
1264 va_space_thrashing = va_space_thrashing_info_get(va_space);
1265 if (!va_space_thrashing->params.enable)
1266 return;
1267
1268 // TODO: Bug 3660922: HMM will need to look up the policy when
1269 // read duplication is supported.
1270 read_duplication = uvm_va_block_is_hmm(va_block) ?
1271 UVM_READ_DUPLICATION_UNSET :
1272 uvm_va_range_get_policy(va_block->va_range)->read_duplication;
1273
1274 // We only care about migrations due to replayable faults, access
1275 // counters and page prefetching. For non-replayable faults, UVM will
1276 // try not to migrate memory since CE is transferring data anyway.
1277 // However, we can still see migration events due to initial
1278 // population. The rest of migrations are triggered due to user
1279 // commands or advice (such as read duplication) which takes precedence
1280 // over our heuristics. Therefore, we clear our internal tracking
1281 // state.
1282 if ((event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT &&
1283 event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER &&
1284 event_data->migration.cause != UVM_MAKE_RESIDENT_CAUSE_PREFETCH) ||
1285 (event_data->migration.transfer_mode != UVM_VA_BLOCK_TRANSFER_MODE_MOVE) ||
1286 (read_duplication == UVM_READ_DUPLICATION_ENABLED)) {
1287 thrashing_reset_pages_in_region(va_block, address, bytes);
1288 return;
1289 }
1290
1291 // Assert that we are not migrating pages that are pinned away from
1292 // their pinning residency, or prefetching pages that are thrashing
1293 UVM_ASSERT(!migrating_wrong_pages(va_block, address, bytes, processor_id, event_data->migration.cause));
1294
1295 // If we are being migrated due to pinning just return
1296 if (is_migration_pinned_pages_update(va_block, event_data, address, bytes))
1297 return;
1298 }
1299 else {
1300 va_block = event_data->revocation.block;
1301 address = event_data->revocation.address;
1302 bytes = event_data->revocation.bytes;
1303 processor_id = event_data->revocation.proc_id;
1304
1305 va_space = uvm_va_block_get_va_space(va_block);
1306 va_space_thrashing = va_space_thrashing_info_get(va_space);
1307 if (!va_space_thrashing->params.enable)
1308 return;
1309 }
1310
1311 block_thrashing = thrashing_info_get_create(va_block);
1312 if (!block_thrashing)
1313 return;
1314
1315 time_stamp = NV_GETTIME();
1316
1317 if (!block_thrashing->pages) {
1318 // Don't create the per-page tracking structure unless there is some potential thrashing within the block
1319 NvU16 num_block_pages;
1320
1321 if (block_thrashing->last_time_stamp == 0 ||
1322 uvm_id_equal(block_thrashing->last_processor, processor_id) ||
1323 time_stamp - block_thrashing->last_time_stamp > va_space_thrashing->params.lapse_ns)
1324 goto done;
1325
1326 num_block_pages = uvm_va_block_size(va_block) / PAGE_SIZE;
1327
1328 block_thrashing->pages = uvm_kvmalloc_zero(sizeof(*block_thrashing->pages) * num_block_pages);
1329 if (!block_thrashing->pages)
1330 goto done;
1331
1332 for (page_index = 0; page_index < num_block_pages; ++page_index) {
1333 block_thrashing->pages[page_index].pinned_residency_id = UVM_ID_INVALID;
1334 block_thrashing->pages[page_index].do_not_throttle_processor_id = UVM_ID_INVALID;
1335 }
1336 }
1337
1338 region = uvm_va_block_region_from_start_size(va_block, address, bytes);
1339
1340 // Update all pages in the region
1341 for_each_va_block_page_in_region(page_index, region) {
1342 page_thrashing_info_t *page_thrashing = &block_thrashing->pages[page_index];
1343 NvU64 last_time_stamp = page_thrashing_get_time_stamp(page_thrashing);
1344
1345 // It is not possible that a pinned page is migrated here, since the
1346 // fault that triggered the migration should have unpinned it in its
1347 // call to uvm_perf_thrashing_get_hint. Moreover page prefetching never
1348 // includes pages that are thrashing (including pinning)
1349 if (event_id == UVM_PERF_EVENT_MIGRATION)
1350 UVM_ASSERT(page_thrashing->pinned == 0);
1351
1352 uvm_processor_mask_set(&page_thrashing->processors, processor_id);
1353 page_thrashing_set_time_stamp(page_thrashing, time_stamp);
1354
1355 if (last_time_stamp == 0)
1356 continue;
1357
1358 if (time_stamp - last_time_stamp <= va_space_thrashing->params.lapse_ns) {
1359 UVM_PERF_SATURATING_INC(page_thrashing->num_thrashing_events);
1360 if (page_thrashing->num_thrashing_events == va_space_thrashing->params.threshold)
1361 thrashing_detected(va_block, block_thrashing, page_thrashing, page_index, processor_id);
1362
1363 if (page_thrashing->num_thrashing_events >= va_space_thrashing->params.threshold)
1364 block_thrashing->last_thrashing_time_stamp = time_stamp;
1365
1366 if (event_id == UVM_PERF_EVENT_MIGRATION)
1367 page_thrashing->has_migration_events = true;
1368 else
1369 page_thrashing->has_revocation_events = true;
1370 }
1371 else if (page_thrashing->num_thrashing_events >= va_space_thrashing->params.threshold &&
1372 !page_thrashing->pinned) {
1373 thrashing_reset_page(va_space_thrashing, va_block, block_thrashing, page_index);
1374 }
1375 }
1376
1377 done:
1378 block_thrashing->last_time_stamp = time_stamp;
1379 block_thrashing->last_processor = processor_id;
1380 }
1381
thrashing_processors_can_access(uvm_va_space_t * va_space,page_thrashing_info_t * page_thrashing,uvm_processor_id_t to)1382 static bool thrashing_processors_can_access(uvm_va_space_t *va_space,
1383 page_thrashing_info_t *page_thrashing,
1384 uvm_processor_id_t to)
1385 {
1386 if (UVM_ID_IS_INVALID(to))
1387 return false;
1388
1389 return uvm_processor_mask_subset(&page_thrashing->processors,
1390 &va_space->accessible_from[uvm_id_value(to)]);
1391 }
1392
thrashing_processors_have_fast_access_to(uvm_va_space_t * va_space,uvm_va_block_context_t * va_block_context,page_thrashing_info_t * page_thrashing,uvm_processor_id_t to)1393 static bool thrashing_processors_have_fast_access_to(uvm_va_space_t *va_space,
1394 uvm_va_block_context_t *va_block_context,
1395 page_thrashing_info_t *page_thrashing,
1396 uvm_processor_id_t to)
1397 {
1398 uvm_processor_mask_t *fast_to = &va_block_context->fast_access_mask;
1399
1400 if (UVM_ID_IS_INVALID(to))
1401 return false;
1402
1403 // Combine NVLINK and native atomics mask since we could have PCIe
1404 // atomics in the future
1405 uvm_processor_mask_and(fast_to,
1406 &va_space->has_nvlink[uvm_id_value(to)],
1407 &va_space->has_native_atomics[uvm_id_value(to)]);
1408 uvm_processor_mask_set(fast_to, to);
1409
1410 return uvm_processor_mask_subset(&page_thrashing->processors, fast_to);
1411 }
1412
thrashing_processors_common_locations(uvm_va_space_t * va_space,page_thrashing_info_t * page_thrashing,uvm_processor_mask_t * common_locations)1413 static void thrashing_processors_common_locations(uvm_va_space_t *va_space,
1414 page_thrashing_info_t *page_thrashing,
1415 uvm_processor_mask_t *common_locations)
1416 {
1417 bool is_first = true;
1418 uvm_processor_id_t id;
1419
1420 // Find processors that can be accessed from all thrashing processors. For
1421 // example: if A, B and C are thrashing, and A can access B and C can access
1422 // B, too, B would be the common location.
1423 uvm_processor_mask_zero(common_locations);
1424
1425 for_each_id_in_mask(id, &page_thrashing->processors) {
1426 if (is_first)
1427 uvm_processor_mask_copy(common_locations, &va_space->can_access[uvm_id_value(id)]);
1428 else
1429 uvm_processor_mask_and(common_locations, common_locations, &va_space->can_access[uvm_id_value(id)]);
1430
1431 is_first = false;
1432 }
1433 }
1434
preferred_location_is_thrashing(uvm_processor_id_t preferred_location,page_thrashing_info_t * page_thrashing)1435 static bool preferred_location_is_thrashing(uvm_processor_id_t preferred_location,
1436 page_thrashing_info_t *page_thrashing)
1437 {
1438 if (UVM_ID_IS_INVALID(preferred_location))
1439 return false;
1440
1441 return uvm_processor_mask_test(&page_thrashing->processors, preferred_location);
1442 }
1443
get_hint_for_migration_thrashing(va_space_thrashing_info_t * va_space_thrashing,uvm_va_block_t * va_block,uvm_va_block_context_t * va_block_context,uvm_page_index_t page_index,page_thrashing_info_t * page_thrashing,uvm_processor_id_t requester)1444 static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thrashing_info_t *va_space_thrashing,
1445 uvm_va_block_t *va_block,
1446 uvm_va_block_context_t *va_block_context,
1447 uvm_page_index_t page_index,
1448 page_thrashing_info_t *page_thrashing,
1449 uvm_processor_id_t requester)
1450 {
1451 uvm_perf_thrashing_hint_t hint;
1452 uvm_processor_id_t closest_resident_id;
1453 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1454 uvm_processor_id_t do_not_throttle_processor = page_thrashing->do_not_throttle_processor_id;
1455 uvm_processor_id_t pinned_residency = page_thrashing->pinned_residency_id;
1456 const uvm_va_policy_t *policy;
1457 uvm_processor_id_t preferred_location;
1458
1459 policy = uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));
1460
1461 preferred_location = policy->preferred_location;
1462
1463 hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
1464
1465 closest_resident_id = uvm_va_block_page_get_closest_resident(va_block, va_block_context, page_index, requester);
1466 if (uvm_va_block_is_hmm(va_block)) {
1467 // HMM pages always start out resident on the CPU but may not be
1468 // recorded in the va_block state because hmm_range_fault() or
1469 // similar functions haven't been called to get an accurate snapshot
1470 // of the Linux state. We can assume pages are CPU resident for the
1471 // purpose of deciding where to migrate to reduce thrashing.
1472 if (UVM_ID_IS_INVALID(closest_resident_id))
1473 closest_resident_id = UVM_ID_CPU;
1474 }
1475 else {
1476 UVM_ASSERT(UVM_ID_IS_VALID(closest_resident_id));
1477 }
1478
1479 if (thrashing_processors_can_access(va_space, page_thrashing, preferred_location)) {
1480 // The logic in uvm_va_block_select_residency chooses the preferred
1481 // location if the requester can access it, so all processors should
1482 // naturally get mapped to the preferred without thrashing. However,
1483 // we can get here if preferred location was set after processors
1484 // started thrashing.
1485 //
1486 // TODO: Bug 2527408. Reset thrashing history when a user policy
1487 // changes in a VA block.
1488 hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
1489 hint.pin.residency = preferred_location;
1490 }
1491 else if (!preferred_location_is_thrashing(preferred_location, page_thrashing) &&
1492 thrashing_processors_have_fast_access_to(va_space, va_block_context, page_thrashing, closest_resident_id)){
1493 // This is a fast path for those scenarios in which all thrashing
1494 // processors have fast (NVLINK + native atomics) access to the current
1495 // residency. This is skipped if the preferred location is thrashing and
1496 // not accessible by the rest of thrashing processors. Otherwise, we
1497 // would be in the condition above.
1498 if (UVM_ID_IS_CPU(closest_resident_id)) {
1499 // On P9 systems, we prefer the CPU to map vidmem (since it can
1500 // cache it), so don't map the GPU to sysmem.
1501 if (UVM_ID_IS_GPU(requester)) {
1502 hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
1503 hint.pin.residency = requester;
1504 }
1505 }
1506 else {
1507 hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
1508 hint.pin.residency = closest_resident_id;
1509 }
1510 }
1511 else if (uvm_id_equal(requester, preferred_location)) {
1512 if (page_thrashing->pinned) {
1513 // If the faulting processor is the preferred location, we can
1514 // only:
1515 // 1) Pin to the preferred location
1516 // 2) Throttle if it's pinned elsewhere and we are not the
1517 // do_not_throttle_processor
1518 if (uvm_id_equal(preferred_location, pinned_residency) ||
1519 uvm_id_equal(preferred_location, do_not_throttle_processor)) {
1520 hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
1521 hint.pin.residency = preferred_location;
1522 }
1523 else {
1524 hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
1525 }
1526 }
1527 else if (!uvm_id_equal(preferred_location, do_not_throttle_processor)) {
1528 hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
1529 }
1530 else if (page_thrashing->throttling_count >= va_space_thrashing->params.pin_threshold) {
1531 hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
1532 hint.pin.residency = preferred_location;
1533 }
1534 }
1535 else if (page_thrashing->pinned) {
1536 // 1) If the requester is the do_not_throttle_processor pin it to the
1537 // requester if all thrashing processors can access the requester,
1538 // or to a common location, or to the requester anyway if no common
1539 // location found.
1540 // 2) Try to map the current pinned residency.
1541 // 3) Throttle.
1542 if (uvm_id_equal(requester, do_not_throttle_processor)) {
1543 hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
1544
1545 if (thrashing_processors_can_access(va_space, page_thrashing, requester)) {
1546 hint.pin.residency = requester;
1547 }
1548 else {
1549 uvm_processor_mask_t *common_locations = &va_block_context->scratch_processor_mask;
1550
1551 thrashing_processors_common_locations(va_space, page_thrashing, common_locations);
1552 if (uvm_processor_mask_empty(common_locations)) {
1553 hint.pin.residency = requester;
1554 }
1555 else {
1556 // Find the common location that is closest to the requester
1557 hint.pin.residency = uvm_processor_mask_find_closest_id(va_space, common_locations, requester);
1558 }
1559 }
1560 }
1561 else if (uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(pinned_residency)], requester)) {
1562 if (!uvm_va_block_is_hmm(va_block))
1563 UVM_ASSERT(uvm_id_equal(closest_resident_id, pinned_residency));
1564
1565 hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
1566 hint.pin.residency = pinned_residency;
1567 }
1568 else {
1569 hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
1570 }
1571 }
1572 else if (!uvm_id_equal(requester, do_not_throttle_processor)) {
1573 hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
1574 }
1575 else if (page_thrashing->throttling_count >= va_space_thrashing->params.pin_threshold) {
1576 hint.type = UVM_PERF_THRASHING_HINT_TYPE_PIN;
1577 hint.pin.residency = requester;
1578 }
1579
1580 if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN &&
1581 !uvm_va_space_processor_has_memory(va_space, hint.pin.residency))
1582 hint.pin.residency = UVM_ID_CPU;
1583
1584 return hint;
1585 }
1586
1587 // Function called on fault that tells the fault handler if any operation
1588 // should be performed to minimize thrashing. The logic is as follows:
1589 //
1590 // - Phase0: Block thrashing. If a number of consecutive thrashing events have
1591 // been detected on the VA block, per-page thrashing tracking information is
1592 // created.
1593 // - Phase1: Throttling. When several processors fight over a page, we start a
1594 // "throttling period". During that period, only one processor will be able
1595 // to service faults on the page, and the rest will be throttled. All CPU
1596 // faults are considered to belong to the same device, even if they come from
1597 // different CPU threads.
1598 // - Phase2: Pinning. After a number of consecutive throttling periods, the page
1599 // is pinned on a specific processor which all of the thrashing processors can
1600 // access.
1601 // - Phase3: Revocation throttling. Even if the page is pinned, it can be still
1602 // thrashing due to revocation events (mainly due to system-wide atomics). In
1603 // that case we keep the page pinned while applying the same algorithm as in
1604 // Phase1.
uvm_perf_thrashing_get_hint(uvm_va_block_t * va_block,uvm_va_block_context_t * va_block_context,NvU64 address,uvm_processor_id_t requester)1605 uvm_perf_thrashing_hint_t uvm_perf_thrashing_get_hint(uvm_va_block_t *va_block,
1606 uvm_va_block_context_t *va_block_context,
1607 NvU64 address,
1608 uvm_processor_id_t requester)
1609 {
1610 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1611 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
1612 block_thrashing_info_t *block_thrashing = NULL;
1613 page_thrashing_info_t *page_thrashing = NULL;
1614 uvm_perf_thrashing_hint_t hint;
1615 uvm_page_index_t page_index = uvm_va_block_cpu_page_index(va_block, address);
1616 NvU64 time_stamp;
1617 NvU64 last_time_stamp;
1618
1619 hint.type = UVM_PERF_THRASHING_HINT_TYPE_NONE;
1620
1621 if (!va_space_thrashing->params.enable)
1622 return hint;
1623
1624 // If we don't have enough memory to store thrashing information, we assume
1625 // no thrashing
1626 block_thrashing = thrashing_info_get(va_block);
1627 if (!block_thrashing)
1628 return hint;
1629
1630 // If the per-page tracking structure has not been created yet, we assume
1631 // no thrashing
1632 if (!block_thrashing->pages)
1633 return hint;
1634
1635 time_stamp = NV_GETTIME();
1636
1637 if (block_thrashing->last_thrashing_time_stamp != 0 &&
1638 (time_stamp - block_thrashing->last_thrashing_time_stamp > va_space_thrashing->params.epoch_ns) &&
1639 block_thrashing->pinned_pages.count == 0 &&
1640 block_thrashing->thrashing_reset_count < va_space_thrashing->params.max_resets) {
1641 uvm_page_index_t reset_page_index;
1642
1643 ++block_thrashing->thrashing_reset_count;
1644
1645 // Clear the state of throttled processors to make sure that we flush
1646 // any pending ThrottlingEnd events
1647 for_each_va_block_page_in_mask(reset_page_index, &block_thrashing->thrashing_pages, va_block) {
1648 thrashing_throttling_reset_page(va_block,
1649 block_thrashing,
1650 &block_thrashing->pages[reset_page_index],
1651 reset_page_index);
1652 }
1653
1654 // Reset per-page tracking structure
1655 // TODO: Bug 1769904 [uvm] Speculatively unpin pages that were pinned on a specific memory due to thrashing
1656 UVM_ASSERT(uvm_page_mask_empty(&block_thrashing->pinned_pages.mask));
1657 uvm_kvfree(block_thrashing->pages);
1658 block_thrashing->pages = NULL;
1659 block_thrashing->num_thrashing_pages = 0;
1660 block_thrashing->last_processor = UVM_ID_INVALID;
1661 block_thrashing->last_time_stamp = 0;
1662 block_thrashing->last_thrashing_time_stamp = 0;
1663 uvm_page_mask_zero(&block_thrashing->thrashing_pages);
1664 goto done;
1665 }
1666
1667 page_thrashing = &block_thrashing->pages[page_index];
1668
1669 // Not enough thrashing events yet
1670 if (page_thrashing->num_thrashing_events < va_space_thrashing->params.threshold)
1671 goto done;
1672
1673 // If the requesting processor is throttled, check the throttling end time
1674 // stamp
1675 if (uvm_processor_mask_test(&page_thrashing->throttled_processors, requester)) {
1676 NvU64 throttling_end_time_stamp = page_thrashing_get_throttling_end_time_stamp(page_thrashing);
1677 if (time_stamp < throttling_end_time_stamp &&
1678 !uvm_id_equal(requester, page_thrashing->do_not_throttle_processor_id)) {
1679 hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
1680 goto done;
1681 }
1682
1683 thrashing_throttle_end_processor(va_block, block_thrashing, page_thrashing, page_index, requester);
1684 }
1685
1686 UVM_ASSERT(!uvm_processor_mask_test(&page_thrashing->throttled_processors, requester));
1687
1688 last_time_stamp = page_thrashing_get_time_stamp(page_thrashing);
1689
1690 // If the lapse since the last thrashing event is longer than a thrashing
1691 // lapse we are no longer thrashing
1692 if (time_stamp - last_time_stamp > va_space_thrashing->params.lapse_ns &&
1693 !page_thrashing->pinned) {
1694 goto done;
1695 }
1696
1697 // Set the requesting processor in the thrashing processors mask
1698 uvm_processor_mask_set(&page_thrashing->processors, requester);
1699
1700 UVM_ASSERT(page_thrashing->has_migration_events || page_thrashing->has_revocation_events);
1701
1702 // Update throttling heuristics
1703 thrashing_throttle_update(va_space_thrashing, va_block, page_thrashing, requester, time_stamp);
1704
1705 if (page_thrashing->pinned &&
1706 page_thrashing->has_revocation_events &&
1707 !uvm_id_equal(requester, page_thrashing->do_not_throttle_processor_id)) {
1708
1709 // When we get revocation thrashing, this is due to system-wide atomics
1710 // downgrading the permissions of other processors. Revocations only
1711 // happen when several processors are mapping the same page and there
1712 // are no migrations. In this case, the only thing we can do is to
1713 // throttle the execution of the processors.
1714 hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
1715 }
1716 else {
1717 hint = get_hint_for_migration_thrashing(va_space_thrashing,
1718 va_block,
1719 va_block_context,
1720 page_index,
1721 page_thrashing,
1722 requester);
1723 }
1724
1725 done:
1726 if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN) {
1727 NV_STATUS status = thrashing_pin_page(va_space_thrashing,
1728 va_block,
1729 va_block_context,
1730 block_thrashing,
1731 page_thrashing,
1732 page_index,
1733 time_stamp,
1734 hint.pin.residency,
1735 requester);
1736
1737 // If there was some problem pinning the page (i.e. OOM), demote to
1738 // throttling)
1739 if (status != NV_OK) {
1740 hint.type = UVM_PERF_THRASHING_HINT_TYPE_THROTTLE;
1741 }
1742 else {
1743 if (uvm_id_equal(hint.pin.residency, requester))
1744 PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_pin_local);
1745 else
1746 PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_pin_remote);
1747
1748 uvm_processor_mask_copy(&hint.pin.processors, &page_thrashing->processors);
1749 }
1750 }
1751
1752 if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_THROTTLE) {
1753 thrashing_throttle_processor(va_block,
1754 block_thrashing,
1755 page_thrashing,
1756 page_index,
1757 requester);
1758
1759 PROCESSOR_THRASHING_STATS_INC(va_space, requester, num_throttle);
1760
1761 hint.throttle.end_time_stamp = page_thrashing_get_throttling_end_time_stamp(page_thrashing);
1762 }
1763 else if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_NONE && page_thrashing) {
1764 UVM_ASSERT(!uvm_processor_mask_test(&page_thrashing->throttled_processors, requester));
1765 UVM_ASSERT(!page_thrashing->pinned);
1766 UVM_ASSERT(UVM_ID_IS_INVALID(page_thrashing->pinned_residency_id));
1767 }
1768
1769 return hint;
1770 }
1771
uvm_perf_thrashing_get_thrashing_processors(uvm_va_block_t * va_block,NvU64 address)1772 uvm_processor_mask_t *uvm_perf_thrashing_get_thrashing_processors(uvm_va_block_t *va_block, NvU64 address)
1773 {
1774 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1775 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
1776 block_thrashing_info_t *block_thrashing = NULL;
1777 page_thrashing_info_t *page_thrashing = NULL;
1778 uvm_page_index_t page_index = uvm_va_block_cpu_page_index(va_block, address);
1779
1780 UVM_ASSERT(g_uvm_perf_thrashing_enable);
1781 UVM_ASSERT(va_space_thrashing->params.enable);
1782
1783 block_thrashing = thrashing_info_get(va_block);
1784 UVM_ASSERT(block_thrashing);
1785
1786 UVM_ASSERT(block_thrashing->pages);
1787
1788 page_thrashing = &block_thrashing->pages[page_index];
1789
1790 return &page_thrashing->processors;
1791 }
1792
uvm_perf_thrashing_get_thrashing_pages(uvm_va_block_t * va_block)1793 const uvm_page_mask_t *uvm_perf_thrashing_get_thrashing_pages(uvm_va_block_t *va_block)
1794 {
1795 uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
1796 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
1797 block_thrashing_info_t *block_thrashing = NULL;
1798
1799 if (!va_space_thrashing->params.enable)
1800 return NULL;
1801
1802 block_thrashing = thrashing_info_get(va_block);
1803 if (!block_thrashing)
1804 return NULL;
1805
1806 if (block_thrashing->num_thrashing_pages == 0)
1807 return NULL;
1808
1809 return &block_thrashing->thrashing_pages;
1810 }
1811
1812 #define TIMER_GRANULARITY_NS 20000ULL
thrashing_unpin_pages(struct work_struct * work)1813 static void thrashing_unpin_pages(struct work_struct *work)
1814 {
1815 struct delayed_work *dwork = to_delayed_work(work);
1816 va_space_thrashing_info_t *va_space_thrashing = container_of(dwork, va_space_thrashing_info_t, pinned_pages.dwork);
1817 uvm_va_space_t *va_space = va_space_thrashing->va_space;
1818 uvm_va_block_context_t *va_block_context = va_space_thrashing->pinned_pages.va_block_context;
1819
1820 // Take the VA space lock so that VA blocks don't go away during this
1821 // operation.
1822 uvm_va_space_down_read(va_space);
1823
1824 if (va_space_thrashing->pinned_pages.in_va_space_teardown)
1825 goto exit_no_list_lock;
1826
1827 while (1) {
1828 pinned_page_t *pinned_page;
1829 uvm_va_block_t *va_block;
1830
1831 uvm_spin_lock(&va_space_thrashing->pinned_pages.lock);
1832 pinned_page = list_first_entry_or_null(&va_space_thrashing->pinned_pages.list,
1833 pinned_page_t,
1834 va_space_list_entry);
1835
1836 if (pinned_page) {
1837 NvU64 now = NV_GETTIME();
1838
1839 if (pinned_page->deadline <= (now + TIMER_GRANULARITY_NS)) {
1840 list_del_init(&pinned_page->va_space_list_entry);
1841
1842 // Work cancellation is left to thrashing_unpin_page() as this
1843 // would only catch the following pattern:
1844 // - Worker thread A is in thrashing_unpin_pages but hasn't
1845 // looked at the list yet
1846 // - Thread B then removes the last entry
1847 // - Thread C then adds a new entry and re-schedules work
1848 // - Worker thread A removes the entry added by C because the
1849 // deadline has passed (unlikely), then cancels the work
1850 // scheduled by C.
1851 }
1852 else {
1853 NvU64 elapsed_us = (pinned_page->deadline - now) / 1000;
1854
1855 schedule_delayed_work(&va_space_thrashing->pinned_pages.dwork, usecs_to_jiffies(elapsed_us));
1856 uvm_spin_unlock(&va_space_thrashing->pinned_pages.lock);
1857 break;
1858 }
1859 }
1860
1861 uvm_spin_unlock(&va_space_thrashing->pinned_pages.lock);
1862
1863 if (!pinned_page)
1864 break;
1865
1866 va_block = pinned_page->va_block;
1867 if (uvm_va_block_is_hmm(va_block))
1868 uvm_hmm_migrate_begin_wait(va_block);
1869 uvm_mutex_lock(&va_block->lock);
1870
1871 // Only operate if the pinned page's tracking state isn't already
1872 // cleared by thrashing_unpin_page()
1873 if (!list_empty(&pinned_page->va_block_list_entry)) {
1874 uvm_page_index_t page_index = pinned_page->page_index;
1875 block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
1876
1877 UVM_ASSERT(block_thrashing);
1878 UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
1879
1880 uvm_va_block_context_init(va_block_context, NULL);
1881
1882 uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
1883 va_block_context,
1884 uvm_va_block_region_for_page(page_index));
1885 thrashing_reset_page(va_space_thrashing, va_block, block_thrashing, page_index);
1886 }
1887
1888 uvm_mutex_unlock(&va_block->lock);
1889 if (uvm_va_block_is_hmm(va_block))
1890 uvm_hmm_migrate_finish(va_block);
1891 kmem_cache_free(g_pinned_page_cache, pinned_page);
1892 }
1893
1894 exit_no_list_lock:
1895 uvm_va_space_up_read(va_space);
1896 }
1897
thrashing_unpin_pages_entry(struct work_struct * work)1898 static void thrashing_unpin_pages_entry(struct work_struct *work)
1899 {
1900 UVM_ENTRY_VOID(thrashing_unpin_pages(work));
1901 }
1902
uvm_perf_thrashing_load(uvm_va_space_t * va_space)1903 NV_STATUS uvm_perf_thrashing_load(uvm_va_space_t *va_space)
1904 {
1905 va_space_thrashing_info_t *va_space_thrashing;
1906 NV_STATUS status;
1907
1908 status = uvm_perf_module_load(&g_module_thrashing, va_space);
1909 if (status != NV_OK)
1910 return status;
1911
1912 va_space_thrashing = va_space_thrashing_info_create(va_space);
1913 if (!va_space_thrashing)
1914 return NV_ERR_NO_MEMORY;
1915
1916 uvm_spin_lock_init(&va_space_thrashing->pinned_pages.lock, UVM_LOCK_ORDER_LEAF);
1917 INIT_LIST_HEAD(&va_space_thrashing->pinned_pages.list);
1918 INIT_DELAYED_WORK(&va_space_thrashing->pinned_pages.dwork, thrashing_unpin_pages_entry);
1919
1920 return NV_OK;
1921 }
1922
uvm_perf_thrashing_stop(uvm_va_space_t * va_space)1923 void uvm_perf_thrashing_stop(uvm_va_space_t *va_space)
1924 {
1925 va_space_thrashing_info_t *va_space_thrashing;
1926
1927 uvm_va_space_down_write(va_space);
1928 va_space_thrashing = va_space_thrashing_info_get_or_null(va_space);
1929
1930 // Prevent further unpinning operations from being scheduled
1931 if (va_space_thrashing)
1932 va_space_thrashing->pinned_pages.in_va_space_teardown = true;
1933
1934 uvm_va_space_up_write(va_space);
1935
1936 // Cancel any pending work. We can safely access va_space_thrashing
1937 // because this function is called once from the VA space teardown path,
1938 // and the only function that frees it is uvm_perf_thrashing_unload,
1939 // which is called later in the teardown path.
1940 if (va_space_thrashing)
1941 (void)cancel_delayed_work_sync(&va_space_thrashing->pinned_pages.dwork);
1942 }
1943
uvm_perf_thrashing_unload(uvm_va_space_t * va_space)1944 void uvm_perf_thrashing_unload(uvm_va_space_t *va_space)
1945 {
1946 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get_or_null(va_space);
1947
1948 uvm_perf_module_unload(&g_module_thrashing, va_space);
1949
1950 // Make sure that there are not pending work items
1951 if (va_space_thrashing) {
1952 UVM_ASSERT(list_empty(&va_space_thrashing->pinned_pages.list));
1953
1954 va_space_thrashing_info_destroy(va_space);
1955 }
1956 }
1957
uvm_perf_thrashing_register_gpu(uvm_va_space_t * va_space,uvm_gpu_t * gpu)1958 void uvm_perf_thrashing_register_gpu(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
1959 {
1960 va_space_thrashing_info_t *va_space_thrashing = va_space_thrashing_info_get(va_space);
1961
1962 // If a simulated GPU is registered, re-initialize thrashing parameters in
1963 // case they need to be adjusted.
1964 if ((g_uvm_global.num_simulated_devices > 0) && !va_space_thrashing->params.test_overrides)
1965 va_space_thrashing_info_init_params(va_space_thrashing);
1966 }
1967
uvm_perf_thrashing_init(void)1968 NV_STATUS uvm_perf_thrashing_init(void)
1969 {
1970 NV_STATUS status;
1971
1972 INIT_THRASHING_PARAMETER_TOGGLE(uvm_perf_thrashing_enable, UVM_PERF_THRASHING_ENABLE_DEFAULT);
1973 if (!g_uvm_perf_thrashing_enable)
1974 return NV_OK;
1975
1976 uvm_perf_module_init("perf_thrashing",
1977 UVM_PERF_MODULE_TYPE_THRASHING,
1978 g_callbacks_thrashing,
1979 ARRAY_SIZE(g_callbacks_thrashing),
1980 &g_module_thrashing);
1981
1982 INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_threshold,
1983 UVM_PERF_THRASHING_THRESHOLD_DEFAULT,
1984 UVM_PERF_THRASHING_THRESHOLD_MAX);
1985
1986 INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_pin_threshold,
1987 UVM_PERF_THRASHING_PIN_THRESHOLD_DEFAULT,
1988 UVM_PERF_THRASHING_PIN_THRESHOLD_MAX);
1989
1990
1991
1992 // In Confidential Computing, the DMA path is slower due to cryptographic
1993 // operations & other associated overhead. Enforce a larger window to allow
1994 // the thrashing mitigation mechanisms to work properly.
1995 if (g_uvm_global.conf_computing_enabled)
1996 INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT * 10);
1997 else
1998 INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_lapse_usec, UVM_PERF_THRASHING_LAPSE_USEC_DEFAULT);
1999
2000 INIT_THRASHING_PARAMETER_NONZERO_MAX(uvm_perf_thrashing_nap,
2001 UVM_PERF_THRASHING_NAP_DEFAULT,
2002 UVM_PERF_THRASHING_NAP_MAX);
2003
2004
2005 INIT_THRASHING_PARAMETER_NONZERO(uvm_perf_thrashing_epoch, UVM_PERF_THRASHING_EPOCH_DEFAULT);
2006
2007 INIT_THRASHING_PARAMETER(uvm_perf_thrashing_pin, UVM_PERF_THRASHING_PIN_DEFAULT);
2008
2009 INIT_THRASHING_PARAMETER(uvm_perf_thrashing_max_resets, UVM_PERF_THRASHING_MAX_RESETS_DEFAULT);
2010
2011 g_va_block_thrashing_info_cache = NV_KMEM_CACHE_CREATE("uvm_block_thrashing_info_t", block_thrashing_info_t);
2012 if (!g_va_block_thrashing_info_cache) {
2013 status = NV_ERR_NO_MEMORY;
2014 goto error;
2015 }
2016
2017 g_pinned_page_cache = NV_KMEM_CACHE_CREATE("uvm_pinned_page_t", pinned_page_t);
2018 if (!g_pinned_page_cache) {
2019 status = NV_ERR_NO_MEMORY;
2020 goto error;
2021 }
2022
2023 status = cpu_thrashing_stats_init();
2024 if (status != NV_OK)
2025 goto error;
2026
2027 return NV_OK;
2028
2029 error:
2030 uvm_perf_thrashing_exit();
2031
2032 return status;
2033 }
2034
uvm_perf_thrashing_exit(void)2035 void uvm_perf_thrashing_exit(void)
2036 {
2037 cpu_thrashing_stats_exit();
2038
2039 kmem_cache_destroy_safe(&g_va_block_thrashing_info_cache);
2040 kmem_cache_destroy_safe(&g_pinned_page_cache);
2041 }
2042
uvm_perf_thrashing_add_gpu(uvm_gpu_t * gpu)2043 NV_STATUS uvm_perf_thrashing_add_gpu(uvm_gpu_t *gpu)
2044 {
2045 if (!uvm_procfs_is_debug_enabled())
2046 return NV_OK;
2047
2048 return gpu_thrashing_stats_create(gpu);
2049 }
2050
uvm_perf_thrashing_remove_gpu(uvm_gpu_t * gpu)2051 void uvm_perf_thrashing_remove_gpu(uvm_gpu_t *gpu)
2052 {
2053 gpu_thrashing_stats_destroy(gpu);
2054 }
2055
uvm_test_get_page_thrashing_policy(UVM_TEST_GET_PAGE_THRASHING_POLICY_PARAMS * params,struct file * filp)2056 NV_STATUS uvm_test_get_page_thrashing_policy(UVM_TEST_GET_PAGE_THRASHING_POLICY_PARAMS *params, struct file *filp)
2057 {
2058 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2059 va_space_thrashing_info_t *va_space_thrashing;
2060
2061 uvm_va_space_down_read(va_space);
2062
2063 va_space_thrashing = va_space_thrashing_info_get(va_space);
2064
2065 if (va_space_thrashing->params.enable) {
2066 params->policy = UVM_TEST_PAGE_THRASHING_POLICY_ENABLE;
2067 params->nap_ns = va_space_thrashing->params.nap_ns;
2068 params->pin_ns = va_space_thrashing->params.pin_ns;
2069 params->map_remote_on_native_atomics_fault = uvm_perf_map_remote_on_native_atomics_fault != 0;
2070 }
2071 else {
2072 params->policy = UVM_TEST_PAGE_THRASHING_POLICY_DISABLE;
2073 }
2074
2075 uvm_va_space_up_read(va_space);
2076
2077 return NV_OK;
2078 }
2079
uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_PARAMS * params,struct file * filp)2080 NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_PARAMS *params, struct file *filp)
2081 {
2082 NV_STATUS status = NV_OK;
2083 uvm_va_space_t *va_space = uvm_va_space_get(filp);
2084 va_space_thrashing_info_t *va_space_thrashing;
2085
2086 if (params->policy >= UVM_TEST_PAGE_THRASHING_POLICY_MAX)
2087 return NV_ERR_INVALID_ARGUMENT;
2088
2089 if (!g_uvm_perf_thrashing_enable)
2090 return NV_ERR_INVALID_STATE;
2091
2092 uvm_va_space_down_write(va_space);
2093
2094 va_space_thrashing = va_space_thrashing_info_get(va_space);
2095 va_space_thrashing->params.test_overrides = true;
2096
2097 if (params->policy == UVM_TEST_PAGE_THRASHING_POLICY_ENABLE) {
2098 if (va_space_thrashing->params.enable)
2099 goto done_unlock_va_space;
2100
2101 va_space_thrashing->params.pin_ns = params->pin_ns;
2102 va_space_thrashing->params.enable = true;
2103 }
2104 else {
2105 if (!va_space_thrashing->params.enable)
2106 goto done_unlock_va_space;
2107
2108 va_space_thrashing->params.enable = false;
2109 }
2110
2111 // When disabling thrashing detection, destroy the thrashing tracking
2112 // information for all VA blocks and unpin pages
2113 if (!va_space_thrashing->params.enable) {
2114 uvm_va_range_t *va_range;
2115
2116 uvm_for_each_va_range(va_range, va_space) {
2117 uvm_va_block_t *va_block;
2118
2119 if (va_range->type != UVM_VA_RANGE_TYPE_MANAGED)
2120 continue;
2121
2122 for_each_va_block_in_va_range(va_range, va_block) {
2123 uvm_va_block_region_t va_block_region = uvm_va_block_region_from_block(va_block);
2124 uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
2125
2126 uvm_mutex_lock(&va_block->lock);
2127
2128 // Unmap may split PTEs and require a retry. Needs to be called
2129 // before the pinned pages information is destroyed.
2130 status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, NULL,
2131 uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
2132 block_context,
2133 va_block_region));
2134
2135 uvm_perf_thrashing_info_destroy(va_block);
2136
2137 uvm_mutex_unlock(&va_block->lock);
2138
2139 // Re-enable thrashing on failure to avoid getting asserts
2140 // about having state while thrashing is disabled
2141 if (status != NV_OK) {
2142 va_space_thrashing->params.enable = true;
2143 goto done_unlock_va_space;
2144 }
2145 }
2146 }
2147
2148 status = uvm_hmm_clear_thrashing_policy(va_space);
2149
2150 // Re-enable thrashing on failure to avoid getting asserts
2151 // about having state while thrashing is disabled
2152 if (status != NV_OK) {
2153 va_space_thrashing->params.enable = true;
2154 goto done_unlock_va_space;
2155 }
2156 }
2157
2158 done_unlock_va_space:
2159 uvm_va_space_up_write(va_space);
2160
2161 return status;
2162 }
2163