1 /*******************************************************************************
2     Copyright (c) 2015-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 
22 *******************************************************************************/
23 
24 #ifndef __UVM_CHANNEL_H__
25 #define __UVM_CHANNEL_H__
26 
27 #include "nv_uvm_types.h"
28 #include "uvm_forward_decl.h"
29 #include "uvm_gpu_semaphore.h"
30 #include "uvm_pushbuffer.h"
31 #include "uvm_tracker.h"
32 
33 //
34 // UVM channels
35 //
36 // A channel manager is created as part of the GPU addition. This involves
37 // creating channels for each of the supported types (uvm_channel_type_t) in
38 // separate channel pools possibly using different CE instances in the HW. Each
39 // channel has a uvm_gpu_tracking_semaphore_t and a set of uvm_gpfifo_entry_t
40 // (one per each HW GPFIFO entry) allowing to track completion of pushes on the
41 // channel.
42 //
43 // Beginning a push on a channel implies reserving a GPFIFO entry in that
44 // channel and hence there can only be as many on-going pushes per channel as
45 // there are free GPFIFO entries. This ensures that ending a push won't have to
46 // wait for a GPFIFO entry to free up.
47 //
48 
49 #define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT 1024
50 #define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN 32
51 #define UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX (1024 * 1024)
52 
53 // Maximum number of channels per pool.
54 #define UVM_CHANNEL_MAX_NUM_CHANNELS_PER_POOL 8
55 
56 // Semaphore payloads cannot advance too much between calls to
57 // uvm_gpu_tracking_semaphore_update_completed_value(). In practice the jumps
58 // are bound by gpfifo sizing as we have to update the completed value to
59 // reclaim gpfifo entries. Set a limit based on the max gpfifo entries we could
60 // ever see.
61 //
62 // Logically this define belongs to uvm_gpu_semaphore.h but it depends on the
63 // channel GPFIFO sizing defined here so it's easiest to just have it here as
64 // uvm_channel.h includes uvm_gpu_semaphore.h.
65 #define UVM_GPU_SEMAPHORE_MAX_JUMP (2 * UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)
66 
67 #define uvm_channel_pool_assert_locked(pool) (          \
68 {                                                       \
69     if (uvm_channel_pool_is_proxy(pool))                \
70         uvm_assert_mutex_locked(&(pool)->mutex);        \
71     else                                                \
72         uvm_assert_spinlock_locked(&(pool)->spinlock);  \
73 })
74 
75 // Channel types
76 typedef enum
77 {
78     // CPU to GPU copies
79     UVM_CHANNEL_TYPE_CPU_TO_GPU,
80 
81     // GPU to CPU copies
82     UVM_CHANNEL_TYPE_GPU_TO_CPU,
83 
84     // Memsets and copies within the GPU
85     UVM_CHANNEL_TYPE_GPU_INTERNAL,
86 
87     // Memops and small memsets/copies for writing PTEs
88     UVM_CHANNEL_TYPE_MEMOPS,
89 
90     // GPU to GPU peer copies
91     UVM_CHANNEL_TYPE_GPU_TO_GPU,
92 
93     UVM_CHANNEL_TYPE_CE_COUNT,
94 
95     // ^^^^^^
96     // Channel types backed by a CE.
97     UVM_CHANNEL_TYPE_COUNT = UVM_CHANNEL_TYPE_CE_COUNT,
98 } uvm_channel_type_t;
99 
100 typedef enum
101 {
102     // A pool that contains CE channels owned by UVM.
103     UVM_CHANNEL_POOL_TYPE_CE = (1 << 0),
104 
105     // A proxy pool contains only proxy channels, so it only exists in SR-IOV
106     // heavy. The pool is only used for UVM_CHANNEL_TYPE_MEMOPS pushes.
107     //
108     // A proxy channel is a privileged CE channel owned by the vGPU plugin. A
109     // proxy channel cannot be manipulated directly by the UVM driver, who
110     // instead can only submit work to it by invoking an RM API.
111     //
112     // There is a single proxy pool and channel per GPU.
113     UVM_CHANNEL_POOL_TYPE_CE_PROXY = (1 << 1),
114 
115     UVM_CHANNEL_POOL_TYPE_COUNT = 2,
116 
117     // A mask used to select pools of any type.
118     UVM_CHANNEL_POOL_TYPE_MASK  = ((1U << UVM_CHANNEL_POOL_TYPE_COUNT) - 1)
119 } uvm_channel_pool_type_t;
120 
121 typedef enum
122 {
123     // Push-based GPFIFO entry
124     UVM_GPFIFO_ENTRY_TYPE_NORMAL,
125 
126     // Control GPFIFO entry, i.e., the LENGTH field is zero, not associated with
127     // a push.
128     UVM_GPFIFO_ENTRY_TYPE_CONTROL
129 } uvm_gpfifo_entry_type_t;
130 
131 struct uvm_gpfifo_entry_struct
132 {
133     uvm_gpfifo_entry_type_t type;
134 
135     // Channel tracking semaphore value that indicates completion of
136     // this entry.
137     NvU64 tracking_semaphore_value;
138 
139     // The following fields are only valid when type is
140     // UVM_GPFIFO_ENTRY_TYPE_NORMAL.
141 
142     // Offset of the pushbuffer in the pushbuffer allocation used by
143     // this entry.
144     NvU32 pushbuffer_offset;
145 
146     // Size of the pushbuffer used for this entry.
147     NvU32 pushbuffer_size;
148 
149     // List node used by the pushbuffer tracking
150     struct list_head pending_list_node;
151 
152     // Push info for the pending push that used this GPFIFO entry
153     uvm_push_info_t *push_info;
154 };
155 
156 // A channel pool is a set of channels that use the same engine. For example,
157 // all channels in a CE pool share the same (logical) Copy Engine.
158 typedef struct
159 {
160     // Owning channel manager
161     uvm_channel_manager_t *manager;
162 
163     // Channels in this pool
164     uvm_channel_t *channels;
165 
166     // Number of elements in the channel array
167     NvU32 num_channels;
168 
169     // Index of the engine associated with the pool (index is an offset from the
170     // first engine of the same engine type.)
171     unsigned engine_index;
172 
173     // Pool type: Refer to the uvm_channel_pool_type_t enum.
174     uvm_channel_pool_type_t pool_type;
175 
176     // Lock protecting the state of channels in the pool.
177     //
178     // There are two pool lock types available: spinlock and mutex. The mutex
179     // variant is required when the thread holding the pool lock must
180     // sleep (ex: acquire another mutex) deeper in the call stack, either in UVM
181     // or RM. For example, work submission to proxy channels in SR-IOV heavy
182     // entails calling an RM API that acquires a mutex, so the proxy channel
183     // pool must use the mutex variant.
184     //
185     // Unless the mutex is required, the spinlock is preferred. This is because,
186     // other than for proxy channels, work submission takes little time and does
187     // not involve any RM calls, so UVM can avoid any invocation that may result
188     // on a sleep. All non-proxy channel pools use the spinlock variant, even in
189     // SR-IOV heavy.
190     union {
191         uvm_spinlock_t spinlock;
192         uvm_mutex_t mutex;
193     };
194 
195 } uvm_channel_pool_t;
196 
197 struct uvm_channel_struct
198 {
199     // Owning pool
200     uvm_channel_pool_t *pool;
201 
202     // The channel name contains the CE index, and (for UVM internal channels)
203     // the HW runlist and channel IDs.
204     char name[64];
205 
206     // Array of gpfifo entries, one per each HW GPFIFO
207     uvm_gpfifo_entry_t *gpfifo_entries;
208 
209     // Number of GPFIFO entries in gpfifo_entries
210     NvU32 num_gpfifo_entries;
211 
212     // Latest GPFIFO entry submitted to the GPU
213     // Updated when new pushes are submitted to the GPU in
214     // uvm_channel_end_push().
215     NvU32 cpu_put;
216 
217     // Latest GPFIFO entry completed by the GPU
218     // Updated by uvm_channel_update_progress() after checking pending GPFIFOs
219     // for completion.
220     NvU32 gpu_get;
221 
222     // Number of currently on-going gpfifo entries on this channel
223     // A new push or control GPFIFO is only allowed to begin on the channel if
224     // there is a free GPFIFO entry for it.
225     NvU32 current_gpfifo_count;
226 
227     // Array of uvm_push_info_t for all pending pushes on the channel
228     uvm_push_info_t *push_infos;
229 
230     // Array of uvm_push_acquire_info_t for all pending pushes on the channel.
231     // Each entry corresponds to the push_infos entry with the same index.
232     uvm_push_acquire_info_t *push_acquire_infos;
233 
234     // List of uvm_push_info_entry_t that are currently available. A push info
235     // entry is not available if it has been assigned to a push
236     // (uvm_push_begin), and the GPFIFO entry associated with the push has not
237     // been marked as completed.
238     struct list_head available_push_infos;
239 
240     // GPU tracking semaphore tracking the work in the channel.
241     // Each push on the channel increments the semaphore, see
242     // uvm_channel_end_push().
243     uvm_gpu_tracking_semaphore_t tracking_sem;
244 
245     // RM channel information
246     union
247     {
248         // UVM internal channels
249         struct
250         {
251             // UVM-RM interface handle
252             uvmGpuChannelHandle handle;
253 
254             // Channel state populated by RM. Includes the GPFIFO, error
255             // notifier, work submission information etc.
256             UvmGpuChannelInfo channel_info;
257         };
258 
259         // Proxy channels (SR-IOV heavy only)
260         struct
261         {
262             // UVM-RM interface handle
263             UvmGpuPagingChannelHandle handle;
264 
265             // Channel state populated by RM. Includes the error notifier.
266             UvmGpuPagingChannelInfo channel_info;
267         } proxy;
268     };
269 
270     struct
271     {
272         struct proc_dir_entry *dir;
273         struct proc_dir_entry *info;
274         struct proc_dir_entry *pushes;
275     } procfs;
276 
277     // Information managed by the tools event notification mechanism. Mainly
278     // used to keep a list of channels with pending events, which is needed
279     // to collect the timestamps of asynchronous operations.
280     struct
281     {
282         struct list_head channel_list_node;
283         NvU32 pending_event_count;
284     } tools;
285 };
286 
287 struct uvm_channel_manager_struct
288 {
289     // The owning GPU
290     uvm_gpu_t *gpu;
291 
292     // The pushbuffer used for all pushes done with this channel manager
293     uvm_pushbuffer_t *pushbuffer;
294 
295     // Array of channel pools.
296     uvm_channel_pool_t *channel_pools;
297 
298     // Number of elements in the pool array
299     unsigned num_channel_pools;
300 
301     // Mask containing the indexes of the usable Copy Engines. Each usable CE
302     // has at least one pool associated with it.
303     DECLARE_BITMAP(ce_mask, UVM_COPY_ENGINE_COUNT_MAX);
304 
305     struct
306     {
307         // Pools to be used by each channel type by default.
308         //
309         // Transfers of a given type may use a pool different from that in
310         // default_for_type[type]. For example, transfers to NvLink GPU
311         // peers may instead use the more optimal pool stored in the gpu_to_gpu
312         // array
313         uvm_channel_pool_t *default_for_type[UVM_CHANNEL_TYPE_COUNT];
314 
315         // Optimal pools to use when writing from the owning GPU to its NvLink
316         // peers.
317         // If there is no optimal pool (the entry is NULL), use default pool
318         // default_for_type[UVM_CHANNEL_GPU_TO_GPU] instead.
319         uvm_channel_pool_t *gpu_to_gpu[UVM_ID_MAX_GPUS];
320     } pool_to_use;
321 
322     struct
323     {
324         struct proc_dir_entry *channels_dir;
325         struct proc_dir_entry *pending_pushes;
326     } procfs;
327 
328     struct
329     {
330         NvU32 num_gpfifo_entries;
331         UVM_BUFFER_LOCATION gpfifo_loc;
332         UVM_BUFFER_LOCATION gpput_loc;
333         UVM_BUFFER_LOCATION pushbuffer_loc;
334     } conf;
335 };
336 
337 // Create a channel manager for the GPU
338 NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
339 
340 static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
341 {
342     UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
343 
344     return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
345 }
346 
347 static bool uvm_channel_is_proxy(uvm_channel_t *channel)
348 {
349     return uvm_channel_pool_is_proxy(channel->pool);
350 }
351 
352 static bool uvm_channel_pool_is_ce(uvm_channel_pool_t *pool)
353 {
354     UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
355 
356     return (pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE) || uvm_channel_pool_is_proxy(pool);
357 }
358 
359 static bool uvm_channel_is_ce(uvm_channel_t *channel)
360 {
361     return uvm_channel_pool_is_ce(channel->pool);
362 }
363 
364 // Proxy channels are used to push page tree related methods, so their channel
365 // type is UVM_CHANNEL_TYPE_MEMOPS.
366 static uvm_channel_type_t uvm_channel_proxy_channel_type(void)
367 {
368     return UVM_CHANNEL_TYPE_MEMOPS;
369 }
370 
371 // Privileged channels support all the Host and engine methods, while
372 // non-privileged channels don't support privileged methods.
373 //
374 // A major limitation of non-privileged CE channels is lack of physical
375 // addressing support.
376 bool uvm_channel_is_privileged(uvm_channel_t *channel);
377 
378 // Destroy the channel manager
379 void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager);
380 
381 // Get the current status of the channel
382 // Returns NV_OK if the channel is in a good state and NV_ERR_RC_ERROR
383 // otherwise. Notably this never sets the global fatal error.
384 NV_STATUS uvm_channel_get_status(uvm_channel_t *channel);
385 
386 // Check for channel errors
387 // Checks for channel errors by calling uvm_channel_get_status(). If an error
388 // occurred, sets the global fatal error and prints errors.
389 NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel);
390 
391 // Check errors on all channels in the channel manager
392 // Also includes uvm_global_get_status
393 NV_STATUS uvm_channel_manager_check_errors(uvm_channel_manager_t *channel_manager);
394 
395 // Retrieve the GPFIFO entry that caused a channel error
396 // The channel has to be in error state prior to calling this function.
397 uvm_gpfifo_entry_t *uvm_channel_get_fatal_entry(uvm_channel_t *channel);
398 
399 // Update progress of a specific channel
400 // Returns the number of still pending GPFIFO entries for that channel.
401 // Notably some of the pending GPFIFO entries might be already completed, but
402 // the update early-outs after completing a fixed number of them to spread the
403 // cost of the updates across calls.
404 NvU32 uvm_channel_update_progress(uvm_channel_t *channel);
405 
406 // Update progress of all channels
407 // Returns the number of still pending GPFIFO entries for all channels.
408 // Notably some of the pending GPFIFO entries might be already completed, but
409 // the update early-outs after completing a fixed number of them to spread the
410 // cost of the updates across calls.
411 NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager);
412 
413 // Wait for all channels to idle
414 // It waits for anything that is running, but doesn't prevent new work from
415 // beginning.
416 NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager);
417 
418 // Get the GPU VA of semaphore_channel's tracking semaphore within the VA space
419 // associated with access_channel.
420 //
421 // The channels can belong to different GPUs, the same GPU, or even be
422 // identical, in which case uvm_channel_tracking_semaphore_get_gpu_va can be
423 // used instead.
424 NvU64 uvm_channel_tracking_semaphore_get_gpu_va_in_channel(uvm_channel_t *semaphore_channel,
425                                                            uvm_channel_t *access_channel);
426 
427 // See above.
428 static NvU64 uvm_channel_tracking_semaphore_get_gpu_va(uvm_channel_t *channel)
429 {
430     return uvm_channel_tracking_semaphore_get_gpu_va_in_channel(channel, channel);
431 }
432 
433 // Check whether the channel completed a value
434 bool uvm_channel_is_value_completed(uvm_channel_t *channel, NvU64 value);
435 
436 // Update and get the latest completed value by the channel
437 NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel);
438 
439 // Select and reserve a channel with the specified type for a push
440 NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager,
441                                    uvm_channel_type_t type,
442                                    uvm_channel_t **channel_out);
443 
444 // Select and reserve a channel for a transfer from channel_manager->gpu to
445 // dst_gpu.
446 NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *channel_manager,
447                                          uvm_gpu_t *dst_gpu,
448                                          uvm_channel_t **channel_out);
449 
450 // Reserve a specific channel for a push or for a control GPFIFO entry.
451 NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries);
452 
453 // Set optimal CE for P2P transfers between manager->gpu and peer
454 void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce);
455 
456 // Begin a push on a previously reserved channel
457 // Should be used by uvm_push_*() only.
458 NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push);
459 
460 // End a push
461 // Should be used by uvm_push_end() only.
462 void uvm_channel_end_push(uvm_push_t *push);
463 
464 // Write/send a control GPFIFO to channel. This is not supported by proxy
465 // channels.
466 // Ordering guarantees:
467 // Input: Control GPFIFO entries are guaranteed to be processed by ESCHED after
468 // all prior GPFIFO entries and pushbuffers have been fetched, but not
469 // necessarily completed.
470 // Output ordering: A caller can wait for this control entry to complete with
471 // uvm_channel_manager_wait(), or by waiting for any later push in the same
472 // channel to complete.
473 NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value);
474 
475 const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type);
476 const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type);
477 
478 // Returns the number of available GPFIFO entries. The function internally
479 // acquires the channel pool lock.
480 NvU32 uvm_channel_get_available_gpfifo_entries(uvm_channel_t *channel);
481 
482 void uvm_channel_print_pending_pushes(uvm_channel_t *channel);
483 
484 static uvm_gpu_t *uvm_channel_get_gpu(uvm_channel_t *channel)
485 {
486     return channel->pool->manager->gpu;
487 }
488 
489 // Index of a channel within the owning pool
490 static unsigned uvm_channel_index_in_pool(const uvm_channel_t *channel)
491 {
492     return channel - channel->pool->channels;
493 }
494 
495 NvU32 uvm_channel_update_progress_all(uvm_channel_t *channel);
496 
497 // Return an arbitrary channel of the given type(s)
498 uvm_channel_t *uvm_channel_any_of_type(uvm_channel_manager_t *manager, NvU32 pool_type_mask);
499 
500 // Return an arbitrary channel of any type
501 static uvm_channel_t *uvm_channel_any(uvm_channel_manager_t *manager)
502 {
503     return uvm_channel_any_of_type(manager, UVM_CHANNEL_POOL_TYPE_MASK);
504 }
505 
506 // Helper to iterate over all the channels in a pool.
507 #define uvm_for_each_channel_in_pool(channel, pool)                            \
508     for (({UVM_ASSERT(pool->channels);                                         \
509          channel = pool->channels;});                                          \
510          channel != pool->channels + pool->num_channels;                       \
511          channel++)
512 
513 uvm_channel_pool_t *uvm_channel_pool_first(uvm_channel_manager_t *manager, NvU32 pool_type_mask);
514 uvm_channel_pool_t *uvm_channel_pool_next(uvm_channel_manager_t *manager,
515                                           uvm_channel_pool_t *curr_pool,
516                                           NvU32 pool_type_mask);
517 
518 // Helper to iterate over all the channel pools of the given type(s) in a GPU.
519 // The pool mask must not be zero.
520 #define uvm_for_each_pool_of_type(pool, manager, pool_type_mask)               \
521     for (pool = uvm_channel_pool_first(manager, pool_type_mask);               \
522          pool != NULL;                                                         \
523          pool = uvm_channel_pool_next(manager, pool, pool_type_mask))
524 
525 #define uvm_for_each_pool(pool, manager) uvm_for_each_pool_of_type(pool, manager, UVM_CHANNEL_POOL_TYPE_MASK)
526 
527 #endif // __UVM_CHANNEL_H__
528