1 /*******************************************************************************
2     Copyright (c) 2016-2023 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_PROCESSORS_H__
25 #define __UVM_PROCESSORS_H__
26 
27 #include "uvm_linux.h"
28 #include "uvm_common.h"
29 #include <linux/numa.h>
30 
31 // Processor identifiers
32 // =====================
33 //
34 // UVM uses its own identifiers to refer to the processors in the system. For
35 // simplicity (and performance), integers are used. However, in order to
36 // provide type safety, they are wrapped within the uvm_parent_processor_id_t
37 // struct.
38 // The range of valid identifiers needs to cover the maximum number of
39 // supported GPUs on a system plus the CPU. CPU is assigned value 0, and GPUs
40 // range: [1, UVM_PARENT_ID_MAX_GPUS].
41 //
42 // There are some functions that only expect GPU identifiers and, in order to
43 // make it clearer, the uvm_parent_gpu_id_t alias type is provided. However, as
44 // this type is just a typedef of uvm_parent_processor_id_t, there is no type
45 // checking performed by the compiler.
46 //
47 // Identifier value vs index
48 // -------------------------
49 //
50 // Although we can rely on helpers for most of the operations related to
51 // processor ids, there are some scenarios in which we need to obtain their
52 // numerical value. Notably:
53 // - Logging
54 // - Array indexing
55 //
56 // Therefore, a helper is provided to obtain this value. However, there is a
57 // special case for array indexing, as there are some arrays that only contain
58 // entries for GPUs. In that case, the array cannot be directly indexed with
59 // the identifier's value. Instead, we use a helper that provides the index of
60 // the GPU within the GPU id space (basically id - 1).
61 //
62 // In the diagram below, MAX_SUB is used to abbreviate
63 // UVM_PARENT_ID_MAX_SUB_PROCESSORS.
64 //
65 //            |-------------------------- uvm_parent_processor_id_t ---------------------------------------------|
66 //            |                                                                                                  |
67 //            |     |----------------------- uvm_parent_gpu_id_t -----------------------------------------------||
68 //            |     |                                                                                           ||
69 // Proc type  | CPU | GPU |        ...        | GPU | ... GPU                                                   ||
70 //            |     |     |                   |     |                                                           ||
71 // ID values  |  0  |  1  |        ...        | i+1 | ... UVM_PARENT_ID_MAX_PROCESSORS-1                        ||
72 //            |     |     |                   |     |                                                           ||
73 // GPU index  |     |  0  |        ...        |  i  | ... UVM_PARENT_ID_MAX_GPUS-1                              ||
74 //            |     |     |                   |     |                                                           ||
75 //            +     +     +                   +     +                                                           ++
76 //            |     |     |-------------|     |     |-----------------------------|                             ||
77 //            |     |                   |     |                                   |                             ||
78 // GPU index  |     |  0  ... MAX_SUB-1 | ... |  i*MAX_SUB    ... (i+1)*MAX_SUB-1 | ... UVM_ID_MAX_GPUS-1       ||
79 //            |     |                   |     |                                   |                             ||
80 // ID values  |  0  |  1  ... MAX_SUB   | ... | (i*MAX_SUB)+1 ... (i+1)*MAX_SUB   | ... UVM_ID_MAX_PROCESSORS-1 ||
81 //            |     |                   |     |                                   |                             ||
82 // Proc type  | CPU | GPU ... GPU       | ... | GPU           ... GPU             | ... GPU                     ||
83 //            |     |                                                                                           ||
84 //            |     |-------------------------------------- uvm_gpu_id_t ---------------------------------------||
85 //            |                                                                                                  |
86 //            |----------------------------------------- uvm_processor_id_t -------------------------------------|
87 //
88 // When SMC is enabled, each GPU partition gets its own uvm_gpu_t object.
89 
90 #define UVM_PROCESSOR_MASK(mask_t,                                                                           \
91                            prefix_fn_mask,                                                                   \
92                            maxval,                                                                           \
93                            proc_id_t,                                                                        \
94                            proc_id_ctor)                                                                     \
95                                                                                                              \
96 typedef struct                                                                                               \
97 {                                                                                                            \
98     DECLARE_BITMAP(bitmap, maxval);                                                                          \
99 } mask_t;                                                                                                    \
100                                                                                                              \
101 static bool prefix_fn_mask##_test(const mask_t *mask, proc_id_t id)                                          \
102 {                                                                                                            \
103     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
104                                                                                                              \
105     return test_bit(id.val, mask->bitmap);                                                                   \
106 }                                                                                                            \
107                                                                                                              \
108 static void prefix_fn_mask##_set_atomic(mask_t *mask, proc_id_t id)                                          \
109 {                                                                                                            \
110     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
111                                                                                                              \
112     set_bit(id.val, mask->bitmap);                                                                           \
113 }                                                                                                            \
114                                                                                                              \
115 static void prefix_fn_mask##_set(mask_t *mask, proc_id_t id)                                                 \
116 {                                                                                                            \
117     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
118                                                                                                              \
119     __set_bit(id.val, mask->bitmap);                                                                         \
120 }                                                                                                            \
121                                                                                                              \
122 static void prefix_fn_mask##_clear_atomic(mask_t *mask, proc_id_t id)                                        \
123 {                                                                                                            \
124     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
125                                                                                                              \
126     clear_bit(id.val, mask->bitmap);                                                                         \
127 }                                                                                                            \
128                                                                                                              \
129 static void prefix_fn_mask##_clear(mask_t *mask, proc_id_t id)                                               \
130 {                                                                                                            \
131     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
132                                                                                                              \
133     __clear_bit(id.val, mask->bitmap);                                                                       \
134 }                                                                                                            \
135                                                                                                              \
136 static bool prefix_fn_mask##_test_and_set_atomic(mask_t *mask, proc_id_t id)                                 \
137 {                                                                                                            \
138     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
139                                                                                                              \
140     return test_and_set_bit(id.val, mask->bitmap);                                                           \
141 }                                                                                                            \
142                                                                                                              \
143 static bool prefix_fn_mask##_test_and_set(mask_t *mask, proc_id_t id)                                        \
144 {                                                                                                            \
145     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
146                                                                                                              \
147     return __test_and_set_bit(id.val, mask->bitmap);                                                         \
148 }                                                                                                            \
149                                                                                                              \
150 static bool prefix_fn_mask##_test_and_clear_atomic(mask_t *mask, proc_id_t id)                               \
151 {                                                                                                            \
152     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
153                                                                                                              \
154     return test_and_clear_bit(id.val, mask->bitmap);                                                         \
155 }                                                                                                            \
156                                                                                                              \
157 static bool prefix_fn_mask##_test_and_clear(mask_t *mask, proc_id_t id)                                      \
158 {                                                                                                            \
159     UVM_ASSERT_MSG(id.val < (maxval), "id %u\n", id.val);                                                    \
160                                                                                                              \
161     return __test_and_clear_bit(id.val, mask->bitmap);                                                       \
162 }                                                                                                            \
163                                                                                                              \
164 static void prefix_fn_mask##_zero(mask_t *mask)                                                              \
165 {                                                                                                            \
166     bitmap_zero(mask->bitmap, (maxval));                                                                     \
167 }                                                                                                            \
168                                                                                                              \
169 static bool prefix_fn_mask##_empty(const mask_t *mask)                                                       \
170 {                                                                                                            \
171     return bitmap_empty(mask->bitmap, (maxval));                                                             \
172 }                                                                                                            \
173                                                                                                              \
174 static void prefix_fn_mask##_copy(mask_t *dst, const mask_t *src)                                            \
175 {                                                                                                            \
176     bitmap_copy(dst->bitmap, src->bitmap, (maxval));                                                         \
177 }                                                                                                            \
178                                                                                                              \
179 static bool prefix_fn_mask##_and(mask_t *dst, const mask_t *src1, const mask_t *src2)                        \
180 {                                                                                                            \
181     return bitmap_and(dst->bitmap, src1->bitmap, src2->bitmap, (maxval)) != 0;                               \
182 }                                                                                                            \
183                                                                                                              \
184 static void prefix_fn_mask##_or(mask_t *dst, const mask_t *src1, const mask_t *src2)                         \
185 {                                                                                                            \
186     bitmap_or(dst->bitmap, src1->bitmap, src2->bitmap, (maxval));                                            \
187 }                                                                                                            \
188                                                                                                              \
189 static bool prefix_fn_mask##_andnot(mask_t *dst, const mask_t *src1, const mask_t *src2)                     \
190 {                                                                                                            \
191     return bitmap_andnot(dst->bitmap, src1->bitmap, src2->bitmap, (maxval));                                 \
192 }                                                                                                            \
193                                                                                                              \
194 static void prefix_fn_mask##_xor(mask_t *dst, const mask_t *src1, const mask_t *src2)                        \
195 {                                                                                                            \
196     bitmap_xor(dst->bitmap, src1->bitmap, src2->bitmap, (maxval));                                           \
197 }                                                                                                            \
198                                                                                                              \
199 static proc_id_t prefix_fn_mask##_find_first_id(const mask_t *mask)                                          \
200 {                                                                                                            \
201     return proc_id_ctor(find_first_bit(mask->bitmap, (maxval)));                                             \
202 }                                                                                                            \
203                                                                                                              \
204 static proc_id_t prefix_fn_mask##_find_first_gpu_id(const mask_t *mask)                                      \
205 {                                                                                                            \
206     return proc_id_ctor(find_next_bit(mask->bitmap, (maxval), UVM_PARENT_ID_GPU0_VALUE));                    \
207 }                                                                                                            \
208                                                                                                              \
209 static proc_id_t prefix_fn_mask##_find_next_id(const mask_t *mask, proc_id_t min_id)                         \
210 {                                                                                                            \
211     return proc_id_ctor(find_next_bit(mask->bitmap, (maxval), min_id.val));                                  \
212 }                                                                                                            \
213                                                                                                              \
214 static proc_id_t prefix_fn_mask##_find_next_gpu_id(const mask_t *mask, proc_id_t min_gpu_id)                 \
215 {                                                                                                            \
216     return proc_id_ctor(find_next_bit(mask->bitmap, (maxval), min_gpu_id.val));                              \
217 }                                                                                                            \
218                                                                                                              \
219 static proc_id_t prefix_fn_mask##_find_first_unset_id(const mask_t *mask)                                    \
220 {                                                                                                            \
221     return proc_id_ctor(find_first_zero_bit(mask->bitmap, (maxval)));                                        \
222 }                                                                                                            \
223                                                                                                              \
224 static proc_id_t prefix_fn_mask##_find_next_unset_id(const mask_t *mask, proc_id_t min_id)                   \
225 {                                                                                                            \
226     return proc_id_ctor(find_next_zero_bit(mask->bitmap, (maxval), min_id.val));                             \
227 }                                                                                                            \
228                                                                                                              \
229 static bool prefix_fn_mask##_equal(const mask_t *mask_in1, const mask_t *mask_in2)                           \
230 {                                                                                                            \
231     return bitmap_equal(mask_in1->bitmap, mask_in2->bitmap, (maxval)) != 0;                                  \
232 }                                                                                                            \
233                                                                                                              \
234 static bool prefix_fn_mask##_subset(const mask_t *subset, const mask_t *mask)                                \
235 {                                                                                                            \
236     return bitmap_subset(subset->bitmap, mask->bitmap, (maxval)) != 0;                                       \
237 }                                                                                                            \
238                                                                                                              \
239 static NvU32 prefix_fn_mask##_get_count(const mask_t *mask)                                                  \
240 {                                                                                                            \
241     return bitmap_weight(mask->bitmap, (maxval));                                                            \
242 }                                                                                                            \
243                                                                                                              \
244 static NvU32 prefix_fn_mask##_get_gpu_count(const mask_t *mask)                                              \
245 {                                                                                                            \
246     NvU32 gpu_count = prefix_fn_mask##_get_count(mask);                                                      \
247                                                                                                              \
248     if (prefix_fn_mask##_test(mask, proc_id_ctor(UVM_PARENT_ID_CPU_VALUE)))                                  \
249         --gpu_count;                                                                                         \
250                                                                                                              \
251     return gpu_count;                                                                                        \
252 }
253 
254 typedef struct
255 {
256     NvU32 val;
257 } uvm_parent_processor_id_t;
258 
259 typedef struct
260 {
261     NvU32 val;
262 } uvm_processor_id_t;
263 
264 typedef uvm_parent_processor_id_t uvm_parent_gpu_id_t;
265 typedef uvm_processor_id_t uvm_gpu_id_t;
266 
267 // Static value assigned to the CPU
268 #define UVM_PARENT_ID_CPU_VALUE      0
269 #define UVM_PARENT_ID_GPU0_VALUE     (UVM_PARENT_ID_CPU_VALUE + 1)
270 
271 // ID values for the CPU and first GPU, respectively; the values for both types
272 // of IDs must match to enable sharing of UVM_PROCESSOR_MASK().
273 #define UVM_ID_CPU_VALUE  UVM_PARENT_ID_CPU_VALUE
274 #define UVM_ID_GPU0_VALUE UVM_PARENT_ID_GPU0_VALUE
275 
276 // Maximum number of GPUs/processors that can be represented with the id types
277 #define UVM_PARENT_ID_MAX_GPUS       NV_MAX_DEVICES
278 #define UVM_PARENT_ID_MAX_PROCESSORS (UVM_PARENT_ID_MAX_GPUS + 1)
279 
280 #define UVM_PARENT_ID_MAX_SUB_PROCESSORS 8
281 
282 #define UVM_ID_MAX_GPUS       (UVM_PARENT_ID_MAX_GPUS * UVM_PARENT_ID_MAX_SUB_PROCESSORS)
283 #define UVM_ID_MAX_PROCESSORS (UVM_ID_MAX_GPUS + 1)
284 #define UVM_MAX_UNIQUE_GPU_PAIRS SUM_FROM_0_TO_N(UVM_ID_MAX_GPUS - 1)
285 
286 #define UVM_PARENT_ID_CPU     ((uvm_parent_processor_id_t) { .val = UVM_PARENT_ID_CPU_VALUE })
287 #define UVM_PARENT_ID_INVALID ((uvm_parent_processor_id_t) { .val = UVM_PARENT_ID_MAX_PROCESSORS })
288 #define UVM_ID_CPU     ((uvm_processor_id_t) { .val = UVM_ID_CPU_VALUE })
289 #define UVM_ID_INVALID ((uvm_processor_id_t) { .val = UVM_ID_MAX_PROCESSORS })
290 
291 #define UVM_PARENT_ID_CHECK_BOUNDS(id) UVM_ASSERT_MSG(id.val <= UVM_PARENT_ID_MAX_PROCESSORS, "id %u\n", id.val)
292 
293 #define UVM_ID_CHECK_BOUNDS(id) UVM_ASSERT_MSG(id.val <= UVM_ID_MAX_PROCESSORS, "id %u\n", id.val)
294 
uvm_parent_id_cmp(uvm_parent_processor_id_t id1,uvm_parent_processor_id_t id2)295 static int uvm_parent_id_cmp(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
296 {
297     UVM_PARENT_ID_CHECK_BOUNDS(id1);
298     UVM_PARENT_ID_CHECK_BOUNDS(id2);
299 
300     return UVM_CMP_DEFAULT(id1.val, id2.val);
301 }
302 
uvm_parent_id_equal(uvm_parent_processor_id_t id1,uvm_parent_processor_id_t id2)303 static bool uvm_parent_id_equal(uvm_parent_processor_id_t id1, uvm_parent_processor_id_t id2)
304 {
305     UVM_PARENT_ID_CHECK_BOUNDS(id1);
306     UVM_PARENT_ID_CHECK_BOUNDS(id2);
307 
308     return id1.val == id2.val;
309 }
310 
uvm_id_cmp(uvm_processor_id_t id1,uvm_processor_id_t id2)311 static int uvm_id_cmp(uvm_processor_id_t id1, uvm_processor_id_t id2)
312 {
313     UVM_ID_CHECK_BOUNDS(id1);
314     UVM_ID_CHECK_BOUNDS(id2);
315 
316     return UVM_CMP_DEFAULT(id1.val, id2.val);
317 }
318 
uvm_id_equal(uvm_processor_id_t id1,uvm_processor_id_t id2)319 static bool uvm_id_equal(uvm_processor_id_t id1, uvm_processor_id_t id2)
320 {
321     UVM_ID_CHECK_BOUNDS(id1);
322     UVM_ID_CHECK_BOUNDS(id2);
323 
324     return id1.val == id2.val;
325 }
326 
327 #define UVM_PARENT_ID_IS_CPU(id)     uvm_parent_id_equal(id, UVM_PARENT_ID_CPU)
328 #define UVM_PARENT_ID_IS_INVALID(id) uvm_parent_id_equal(id, UVM_PARENT_ID_INVALID)
329 #define UVM_PARENT_ID_IS_VALID(id)   (!UVM_PARENT_ID_IS_INVALID(id))
330 #define UVM_PARENT_ID_IS_GPU(id)     (!UVM_PARENT_ID_IS_CPU(id) && !UVM_PARENT_ID_IS_INVALID(id))
331 
332 #define UVM_ID_IS_CPU(id)     uvm_id_equal(id, UVM_ID_CPU)
333 #define UVM_ID_IS_INVALID(id) uvm_id_equal(id, UVM_ID_INVALID)
334 #define UVM_ID_IS_VALID(id)   (!UVM_ID_IS_INVALID(id))
335 #define UVM_ID_IS_GPU(id)     (!UVM_ID_IS_CPU(id) && !UVM_ID_IS_INVALID(id))
336 
uvm_parent_id_from_value(NvU32 val)337 static uvm_parent_processor_id_t uvm_parent_id_from_value(NvU32 val)
338 {
339     uvm_parent_processor_id_t ret = { .val = val };
340 
341     UVM_PARENT_ID_CHECK_BOUNDS(ret);
342 
343     return ret;
344 }
345 
uvm_parent_gpu_id_from_value(NvU32 val)346 static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_value(NvU32 val)
347 {
348     uvm_parent_gpu_id_t ret = uvm_parent_id_from_value(val);
349 
350     UVM_ASSERT(!UVM_PARENT_ID_IS_CPU(ret));
351 
352     return ret;
353 }
354 
uvm_id_from_value(NvU32 val)355 static uvm_processor_id_t uvm_id_from_value(NvU32 val)
356 {
357     uvm_processor_id_t ret = { .val = val };
358 
359     UVM_ID_CHECK_BOUNDS(ret);
360 
361     return ret;
362 }
363 
uvm_gpu_id_from_value(NvU32 val)364 static uvm_gpu_id_t uvm_gpu_id_from_value(NvU32 val)
365 {
366     uvm_gpu_id_t ret = uvm_id_from_value(val);
367 
368     UVM_ASSERT(!UVM_ID_IS_CPU(ret));
369 
370     return ret;
371 }
372 
373 // Create a parent GPU id from the given parent GPU id index (previously
374 // obtained via uvm_parent_id_gpu_index)
uvm_parent_gpu_id_from_index(NvU32 index)375 static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_index(NvU32 index)
376 {
377     return uvm_parent_gpu_id_from_value(index + UVM_PARENT_ID_GPU0_VALUE);
378 }
379 
uvm_parent_id_next(uvm_parent_processor_id_t id)380 static uvm_parent_processor_id_t uvm_parent_id_next(uvm_parent_processor_id_t id)
381 {
382     ++id.val;
383 
384     UVM_PARENT_ID_CHECK_BOUNDS(id);
385 
386     return id;
387 }
388 
uvm_parent_gpu_id_next(uvm_parent_gpu_id_t id)389 static uvm_parent_gpu_id_t uvm_parent_gpu_id_next(uvm_parent_gpu_id_t id)
390 {
391     UVM_ASSERT(UVM_PARENT_ID_IS_GPU(id));
392 
393     ++id.val;
394 
395     UVM_PARENT_ID_CHECK_BOUNDS(id);
396 
397     return id;
398 }
399 
400 // Same as uvm_parent_gpu_id_from_index but for uvm_processor_id_t
uvm_gpu_id_from_index(NvU32 index)401 static uvm_gpu_id_t uvm_gpu_id_from_index(NvU32 index)
402 {
403     return uvm_gpu_id_from_value(index + UVM_ID_GPU0_VALUE);
404 }
405 
uvm_id_next(uvm_processor_id_t id)406 static uvm_processor_id_t uvm_id_next(uvm_processor_id_t id)
407 {
408     ++id.val;
409 
410     UVM_ID_CHECK_BOUNDS(id);
411 
412     return id;
413 }
414 
uvm_gpu_id_next(uvm_gpu_id_t id)415 static uvm_gpu_id_t uvm_gpu_id_next(uvm_gpu_id_t id)
416 {
417     UVM_ASSERT(UVM_ID_IS_GPU(id));
418 
419     ++id.val;
420 
421     UVM_ID_CHECK_BOUNDS(id);
422 
423     return id;
424 }
425 
426 // This function returns the numerical value within
427 // [0, UVM_PARENT_ID_MAX_PROCESSORS) of the given parent processor id.
uvm_parent_id_value(uvm_parent_processor_id_t id)428 static NvU32 uvm_parent_id_value(uvm_parent_processor_id_t id)
429 {
430     UVM_ASSERT(UVM_PARENT_ID_IS_VALID(id));
431 
432     return id.val;
433 }
434 
435 // This function returns the numerical value within
436 // [0, UVM_ID_MAX_PROCESSORS) of the given processor id
uvm_id_value(uvm_processor_id_t id)437 static NvU32 uvm_id_value(uvm_processor_id_t id)
438 {
439     UVM_ASSERT(UVM_ID_IS_VALID(id));
440 
441     return id.val;
442 }
443 
444 // This function returns the index of the given GPU id within the GPU id space
445 // [0, UVM_PARENT_ID_MAX_GPUS)
uvm_parent_id_gpu_index(uvm_parent_gpu_id_t id)446 static NvU32 uvm_parent_id_gpu_index(uvm_parent_gpu_id_t id)
447 {
448     UVM_ASSERT(UVM_PARENT_ID_IS_GPU(id));
449 
450     return id.val - UVM_PARENT_ID_GPU0_VALUE;
451 }
452 
453 // This function returns the index of the given GPU id within the GPU id space
454 // [0, UVM_ID_MAX_GPUS)
uvm_id_gpu_index(const uvm_gpu_id_t id)455 static NvU32 uvm_id_gpu_index(const uvm_gpu_id_t id)
456 {
457     UVM_ASSERT(UVM_ID_IS_GPU(id));
458 
459     return id.val - UVM_ID_GPU0_VALUE;
460 }
461 
uvm_id_gpu_index_from_parent_gpu_id(const uvm_parent_gpu_id_t id)462 static NvU32 uvm_id_gpu_index_from_parent_gpu_id(const uvm_parent_gpu_id_t id)
463 {
464     UVM_ASSERT(UVM_PARENT_ID_IS_GPU(id));
465 
466     return uvm_parent_id_gpu_index(id) * UVM_PARENT_ID_MAX_SUB_PROCESSORS;
467 }
468 
469 // This function returns the numerical value of the parent processor ID from the
470 // given processor id.
uvm_parent_id_value_from_processor_id(const uvm_processor_id_t id)471 static NvU32 uvm_parent_id_value_from_processor_id(const uvm_processor_id_t id)
472 {
473     if (UVM_ID_IS_CPU(id))
474         return UVM_PARENT_ID_CPU_VALUE;
475 
476     return (uvm_id_gpu_index(id) / UVM_PARENT_ID_MAX_SUB_PROCESSORS) + UVM_PARENT_ID_GPU0_VALUE;
477 }
478 
uvm_parent_id_gpu_index_from_gpu_id(const uvm_gpu_id_t id)479 static NvU32 uvm_parent_id_gpu_index_from_gpu_id(const uvm_gpu_id_t id)
480 {
481     UVM_ASSERT(UVM_ID_IS_GPU(id));
482 
483     return uvm_id_gpu_index(id) / UVM_PARENT_ID_MAX_SUB_PROCESSORS;
484 }
485 
uvm_gpu_id_from_parent_gpu_id(const uvm_parent_gpu_id_t id)486 static uvm_gpu_id_t uvm_gpu_id_from_parent_gpu_id(const uvm_parent_gpu_id_t id)
487 {
488     UVM_ASSERT(UVM_PARENT_ID_IS_GPU(id));
489 
490     return uvm_gpu_id_from_index(uvm_id_gpu_index_from_parent_gpu_id(id));
491 }
492 
uvm_gpu_id_from_sub_processor_index(NvU32 index,NvU32 sub_index)493 static uvm_gpu_id_t uvm_gpu_id_from_sub_processor_index(NvU32 index, NvU32 sub_index)
494 {
495     UVM_ASSERT(index < UVM_PARENT_ID_MAX_GPUS);
496     UVM_ASSERT(sub_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS);
497 
498     return uvm_gpu_id_from_index(index * UVM_PARENT_ID_MAX_SUB_PROCESSORS + sub_index);
499 }
500 
uvm_parent_gpu_id_from_gpu_id(const uvm_gpu_id_t id)501 static uvm_parent_gpu_id_t uvm_parent_gpu_id_from_gpu_id(const uvm_gpu_id_t id)
502 {
503     UVM_ASSERT(UVM_ID_IS_GPU(id));
504 
505     return uvm_parent_gpu_id_from_index(uvm_parent_id_gpu_index_from_gpu_id(id));
506 }
507 
uvm_id_sub_processor_index(const uvm_gpu_id_t id)508 static NvU32 uvm_id_sub_processor_index(const uvm_gpu_id_t id)
509 {
510     return uvm_id_gpu_index(id) % UVM_PARENT_ID_MAX_SUB_PROCESSORS;
511 }
512 
513 UVM_PROCESSOR_MASK(uvm_parent_processor_mask_t,       \
514                    uvm_parent_processor_mask,         \
515                    UVM_PARENT_ID_MAX_PROCESSORS,      \
516                    uvm_parent_processor_id_t,         \
517                    uvm_parent_id_from_value)
518 
519 UVM_PROCESSOR_MASK(uvm_processor_mask_t,              \
520                    uvm_processor_mask,                \
521                    UVM_ID_MAX_PROCESSORS,             \
522                    uvm_processor_id_t,                \
523                    uvm_id_from_value)
524 
525 extern const uvm_processor_mask_t g_uvm_processor_mask_cpu;
526 extern const uvm_processor_mask_t g_uvm_processor_mask_empty;
527 
528 // Like uvm_processor_mask_subset() but ignores the CPU in the subset mask.
529 // Returns whether the GPUs in subset are a subset of the GPUs in mask.
530 bool uvm_processor_mask_gpu_subset(const uvm_processor_mask_t *subset,
531                                    const uvm_processor_mask_t *mask);
532 
533 // Compress a uvm_processor_mask_t down to a uvm_parent_processor_mask_t
534 // by only copying the first subprocessor bit and ignoring the CPU bit.
535 void uvm_parent_gpus_from_processor_mask(uvm_parent_processor_mask_t *parent_mask,
536                                          const uvm_processor_mask_t *mask);
537 
538 #define for_each_parent_id_in_mask(id, mask)                                                          \
539     for ((id) = uvm_parent_processor_mask_find_first_id(mask);                                        \
540          UVM_PARENT_ID_IS_VALID(id);                                                                  \
541          (id) = uvm_parent_processor_mask_find_next_id((mask), uvm_parent_id_next(id)))
542 
543 #define for_each_parent_gpu_id_in_mask(gpu_id, mask)                                                  \
544     for ((gpu_id) = uvm_parent_processor_mask_find_first_gpu_id((mask));                              \
545          UVM_PARENT_ID_IS_VALID(gpu_id);                                                              \
546          (gpu_id) = uvm_parent_processor_mask_find_next_id((mask), uvm_parent_gpu_id_next(gpu_id)))
547 
548 #define for_each_id_in_mask(id, mask)                                                          \
549     for ((id) = uvm_processor_mask_find_first_id(mask);                                        \
550          UVM_ID_IS_VALID(id);                                                                  \
551          (id) = uvm_processor_mask_find_next_id((mask), uvm_id_next(id)))
552 
553 #define for_each_gpu_id_in_mask(gpu_id, mask)                                                  \
554     for ((gpu_id) = uvm_processor_mask_find_first_gpu_id((mask));                              \
555          UVM_ID_IS_VALID(gpu_id);                                                              \
556          (gpu_id) = uvm_processor_mask_find_next_id((mask), uvm_gpu_id_next(gpu_id)))
557 
558 // Helper to iterate over all valid parent gpu ids.
559 #define for_each_parent_gpu_id(i)  \
560     for (i = uvm_parent_gpu_id_from_value(UVM_PARENT_ID_GPU0_VALUE); UVM_PARENT_ID_IS_VALID(i); i = uvm_parent_gpu_id_next(i))
561 
562 // Helper to iterate over all valid gpu ids.
563 #define for_each_gpu_id(i)  \
564     for (i = uvm_gpu_id_from_value(UVM_ID_GPU0_VALUE); UVM_ID_IS_VALID(i); i = uvm_gpu_id_next(i))
565 
566 // Helper to iterate over all gpu ids in a given parent id.
567 #define for_each_sub_processor_id_in_parent_gpu(i, id) \
568     for (i = uvm_gpu_id_from_parent_gpu_id(id); \
569          UVM_ID_IS_VALID(i) && \
570          (uvm_id_value(i) < uvm_id_value(uvm_gpu_id_from_parent_gpu_id(id)) + UVM_PARENT_ID_MAX_SUB_PROCESSORS); \
571          i = uvm_gpu_id_next(i))
572 
573 // Helper to iterate over all sub processor indexes.
574 #define for_each_sub_processor_index(i) \
575     for (i = 0; i < UVM_PARENT_ID_MAX_SUB_PROCESSORS; i++)
576 
577 // Helper to iterate over all valid processor ids.
578 #define for_each_id(i) for (i = UVM_ID_CPU; UVM_ID_IS_VALID(i); i = uvm_id_next(i))
579 
580 // Find the node in mask with the shorted distance (as returned by
581 // node_distance) for src.
582 // Note that the search is inclusive of src.
583 // If mask has no bits set, NUMA_NO_NODE is returned.
584 int uvm_find_closest_node_mask(int src, const nodemask_t *mask);
585 
586 // Iterate over all nodes in mask with increasing distance from src.
587 // Note that this iterator is destructive of the mask.
588 #define for_each_closest_uvm_node(nid, src, mask)                                                                      \
589     for ((nid) = uvm_find_closest_node_mask((src), &(mask));                                                           \
590          (nid) != NUMA_NO_NODE;                                                                                        \
591          node_clear((nid), (mask)), (nid) = uvm_find_closest_node_mask((src), &(mask)))
592 
593 #define for_each_possible_uvm_node(nid) for_each_node_mask((nid), node_possible_map)
594 
595 // Compare two NUMA node IDs for equality.
596 // The main purpose of this helper is to correctly compare
597 // in situations when the system has only a single NUMA node
598 // (which is also the case when NUMA support is disabled).
599 bool uvm_numa_id_eq(int nid0, int nid1);
600 
uvm_uuid_eq(const NvProcessorUuid * uuid0,const NvProcessorUuid * uuid1)601 static bool uvm_uuid_eq(const NvProcessorUuid *uuid0, const NvProcessorUuid *uuid1)
602 {
603     return memcmp(uuid0, uuid1, sizeof(*uuid0)) == 0;
604 }
605 
606 // Copies a UUID from source (src) to destination (dst).
uvm_uuid_copy(NvProcessorUuid * dst,const NvProcessorUuid * src)607 static void uvm_uuid_copy(NvProcessorUuid *dst, const NvProcessorUuid *src)
608 {
609     memcpy(dst, src, sizeof(*dst));
610 }
611 
uvm_uuid_is_cpu(const NvProcessorUuid * uuid)612 static inline NvBool uvm_uuid_is_cpu(const NvProcessorUuid *uuid)
613 {
614     return memcmp(uuid, &NV_PROCESSOR_UUID_CPU_DEFAULT, sizeof(*uuid)) == 0;
615 }
616 
617 // Dynamic uvm_processor_mask_t object allocation/maintenance.
618 NV_STATUS uvm_processor_mask_cache_init(void);
619 void uvm_processor_mask_cache_exit(void);
620 uvm_processor_mask_t *uvm_processor_mask_cache_alloc(void);
621 void uvm_processor_mask_cache_free(uvm_processor_mask_t *mask);
622 
623 #endif
624