1 /******************************************************************************* 2 Copyright (c) 2015-2023 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_extern_decl.h" 25 #include "uvm_forward_decl.h" 26 #include "uvm_push.h" 27 #include "uvm_channel.h" 28 #include "uvm_hal.h" 29 #include "uvm_kvmalloc.h" 30 #include "uvm_linux.h" 31 #include "nv_stdarg.h" 32 33 // This parameter enables push description tracking in push info. It's enabled 34 // by default for debug and develop builds and disabled for release builds. 35 static unsigned uvm_debug_enable_push_desc = UVM_IS_DEBUG() || UVM_IS_DEVELOP(); 36 module_param(uvm_debug_enable_push_desc, uint, S_IRUGO|S_IWUSR); 37 MODULE_PARM_DESC(uvm_debug_enable_push_desc, "Enable push description tracking"); 38 39 static unsigned uvm_debug_enable_push_acquire_info = 0; 40 module_param(uvm_debug_enable_push_acquire_info, uint, S_IRUGO|S_IWUSR); 41 MODULE_PARM_DESC(uvm_debug_enable_push_acquire_info, "Enable push acquire information tracking"); 42 43 static uvm_push_acquire_info_t *push_acquire_info_from_push(uvm_push_t *push) 44 { 45 uvm_channel_t *channel = push->channel; 46 47 UVM_ASSERT(channel != NULL); 48 UVM_ASSERT(push->channel_tracking_value == 0); 49 50 UVM_ASSERT_MSG(push->push_info_index < channel->num_gpfifo_entries, "index %u\n", push->push_info_index); 51 52 if (!uvm_debug_enable_push_acquire_info) 53 return NULL; 54 55 return &channel->push_acquire_infos[push->push_info_index]; 56 } 57 58 // Acquire a single tracker entry. Subsequently pushed GPU work will not start 59 // before the work tracked by tracker entry is complete. 60 static void push_acquire_tracker_entry(uvm_push_t *push, 61 uvm_tracker_entry_t *tracker_entry, 62 uvm_push_acquire_info_t *push_acquire_info) 63 { 64 uvm_channel_t *entry_channel; 65 uvm_channel_t *channel; 66 NvU64 semaphore_va; 67 uvm_gpu_t *gpu; 68 69 UVM_ASSERT(push != NULL); 70 UVM_ASSERT(tracker_entry != NULL); 71 72 entry_channel = tracker_entry->channel; 73 if (entry_channel == NULL) 74 return; 75 76 channel = push->channel; 77 if (channel == entry_channel) 78 return; 79 80 semaphore_va = uvm_channel_tracking_semaphore_get_gpu_va_in_channel(entry_channel, channel); 81 gpu = uvm_channel_get_gpu(channel); 82 83 gpu->parent->host_hal->semaphore_acquire(push, semaphore_va, (NvU32)tracker_entry->value); 84 85 if (push_acquire_info) { 86 const NvU32 num_values = push_acquire_info->num_values; 87 88 UVM_ASSERT(uvm_debug_enable_push_acquire_info); 89 90 if (num_values < UVM_PUSH_ACQUIRE_INFO_MAX_ENTRIES) { 91 push_acquire_info->values[num_values].value = tracker_entry->value; 92 push_acquire_info->values[num_values].gpu_id = uvm_channel_get_gpu(entry_channel)->id; 93 push_acquire_info->values[num_values].is_proxy = uvm_channel_is_proxy(channel); 94 95 if (uvm_channel_is_proxy(channel)) { 96 push_acquire_info->values[num_values].proxy.pool_index = uvm_channel_index_in_pool(channel); 97 } 98 else { 99 push_acquire_info->values[num_values].runlist_id = entry_channel->channel_info.hwRunlistId; 100 push_acquire_info->values[num_values].channel_id = entry_channel->channel_info.hwChannelId; 101 } 102 } 103 ++push_acquire_info->num_values; 104 } 105 } 106 107 void uvm_push_acquire_tracker(uvm_push_t *push, uvm_tracker_t *tracker) 108 { 109 uvm_tracker_entry_t *entry; 110 uvm_push_acquire_info_t *push_acquire_info; 111 112 UVM_ASSERT(push != NULL); 113 114 if (tracker == NULL) 115 return; 116 117 uvm_tracker_remove_completed(tracker); 118 119 push_acquire_info = push_acquire_info_from_push(push); 120 121 for_each_tracker_entry(entry, tracker) 122 push_acquire_tracker_entry(push, entry, push_acquire_info); 123 } 124 125 static NV_STATUS push_reserve_channel(uvm_channel_manager_t *manager, 126 uvm_channel_type_t channel_type, 127 uvm_gpu_t *dst_gpu, 128 uvm_channel_t **channel) 129 { 130 NV_STATUS status; 131 132 // Pick a channel and reserve a GPFIFO entry 133 // TODO: Bug 1764953: use the dependencies in the tracker to pick a channel 134 // in a smarter way. 135 if (dst_gpu == NULL) 136 status = uvm_channel_reserve_type(manager, channel_type, channel); 137 else 138 status = uvm_channel_reserve_gpu_to_gpu(manager, dst_gpu, channel); 139 140 if (status == NV_OK) 141 UVM_ASSERT(*channel); 142 143 return status; 144 } 145 146 static void push_set_description(uvm_push_t *push, const char *format, va_list args) 147 { 148 uvm_push_info_t *push_info; 149 150 UVM_ASSERT(uvm_push_info_is_tracking_descriptions()); 151 152 push_info = uvm_push_info_from_push(push); 153 vsnprintf(push_info->description, sizeof(push_info->description), format, args); 154 } 155 156 void uvm_push_set_description(uvm_push_t *push, const char *format, ...) 157 { 158 va_list va; 159 160 if (!uvm_push_info_is_tracking_descriptions()) 161 return; 162 163 va_start(va, format); 164 push_set_description(push, format, va); 165 va_end(va); 166 } 167 168 // Internal helper to fill info push info as part of beginning a push. 169 static void push_fill_info(uvm_push_t *push, 170 const char *filename, 171 const char *function, 172 int line, 173 const char *format, 174 va_list args) 175 { 176 uvm_push_acquire_info_t *push_acquire_info; 177 uvm_push_info_t *push_info = uvm_push_info_from_push(push); 178 179 push_info->filename = kbasename(filename); 180 push_info->function = function; 181 push_info->line = line; 182 183 push_acquire_info = push_acquire_info_from_push(push); 184 if (push_acquire_info) 185 push_acquire_info->num_values = 0; 186 187 if (uvm_push_info_is_tracking_descriptions()) 188 push_set_description(push, format, args); 189 } 190 191 static NV_STATUS push_begin_acquire_with_info(uvm_channel_t *channel, 192 uvm_tracker_t *tracker, 193 uvm_push_t *push, 194 const char *filename, 195 const char *function, 196 int line, 197 const char *format, 198 va_list args) 199 { 200 NV_STATUS status; 201 202 memset(push, 0, sizeof(*push)); 203 204 push->gpu = uvm_channel_get_gpu(channel); 205 206 status = uvm_channel_begin_push(channel, push); 207 if (status != NV_OK) 208 return status; 209 210 push_fill_info(push, filename, function, line, format, args); 211 212 uvm_push_acquire_tracker(push, tracker); 213 214 return NV_OK; 215 } 216 217 __attribute__ ((format(printf, 9, 10))) 218 NV_STATUS __uvm_push_begin_acquire_with_info(uvm_channel_manager_t *manager, 219 uvm_channel_type_t type, 220 uvm_gpu_t *dst_gpu, 221 uvm_tracker_t *tracker, 222 uvm_push_t *push, 223 const char *filename, 224 const char *function, 225 int line, 226 const char *format, ...) 227 { 228 va_list args; 229 NV_STATUS status; 230 uvm_channel_t *channel; 231 232 if (dst_gpu != NULL) { 233 UVM_ASSERT(type == UVM_CHANNEL_TYPE_GPU_TO_GPU); 234 UVM_ASSERT(dst_gpu != manager->gpu); 235 } 236 237 status = push_reserve_channel(manager, type, dst_gpu, &channel); 238 if (status != NV_OK) 239 return status; 240 241 UVM_ASSERT(channel); 242 243 va_start(args, format); 244 status = push_begin_acquire_with_info(channel, tracker, push, filename, function, line, format, args); 245 va_end(args); 246 247 return status; 248 } 249 250 __attribute__ ((format(printf, 7, 8))) 251 NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel, 252 uvm_tracker_t *tracker, 253 uvm_push_t *push, 254 const char *filename, 255 const char *function, 256 int line, 257 const char *format, ...) 258 { 259 va_list args; 260 NV_STATUS status; 261 262 status = uvm_channel_reserve(channel, 1); 263 if (status != NV_OK) 264 return status; 265 266 va_start(args, format); 267 status = push_begin_acquire_with_info(channel, tracker, push, filename, function, line, format, args); 268 va_end(args); 269 270 return status; 271 } 272 273 __attribute__ ((format(printf, 7, 8))) 274 NV_STATUS __uvm_push_begin_acquire_on_reserved_channel_with_info(uvm_channel_t *channel, 275 uvm_tracker_t *tracker, 276 uvm_push_t *push, 277 const char *filename, 278 const char *function, 279 int line, 280 const char *format, ...) 281 { 282 va_list args; 283 NV_STATUS status; 284 285 va_start(args, format); 286 status = push_begin_acquire_with_info(channel, tracker, push, filename, function, line, format, args); 287 va_end(args); 288 289 return status; 290 } 291 292 bool uvm_push_info_is_tracking_descriptions(void) 293 { 294 return uvm_debug_enable_push_desc != 0; 295 } 296 297 bool uvm_push_info_is_tracking_acquires(void) 298 { 299 return uvm_debug_enable_push_acquire_info != 0; 300 } 301 302 void uvm_push_end(uvm_push_t *push) 303 { 304 uvm_push_flag_t flag; 305 uvm_channel_end_push(push); 306 307 flag = find_first_bit(push->flags, UVM_PUSH_FLAG_COUNT); 308 309 // All flags should be reset by the end of the push 310 UVM_ASSERT_MSG(flag == UVM_PUSH_FLAG_COUNT, "first flag set %d\n", flag); 311 } 312 313 NV_STATUS uvm_push_wait(uvm_push_t *push) 314 { 315 uvm_tracker_entry_t entry; 316 uvm_push_get_tracker_entry(push, &entry); 317 318 return uvm_tracker_wait_for_entry(&entry); 319 } 320 321 NV_STATUS uvm_push_end_and_wait(uvm_push_t *push) 322 { 323 uvm_push_end(push); 324 325 return uvm_push_wait(push); 326 } 327 328 NV_STATUS uvm_push_begin_fake(uvm_gpu_t *gpu, uvm_push_t *push) 329 { 330 memset(push, 0, sizeof(*push)); 331 push->begin = (NvU32 *)uvm_kvmalloc(UVM_MAX_PUSH_SIZE); 332 if (!push->begin) 333 return NV_ERR_NO_MEMORY; 334 335 push->next = push->begin; 336 push->gpu = gpu; 337 338 return NV_OK; 339 } 340 341 void uvm_push_end_fake(uvm_push_t *push) 342 { 343 uvm_kvfree(push->begin); 344 push->begin = NULL; 345 } 346 347 void *uvm_push_inline_data_get(uvm_push_inline_data_t *data, size_t size) 348 { 349 void *buffer = data->next_data; 350 351 UVM_ASSERT(!uvm_global_is_suspended()); 352 353 UVM_ASSERT_MSG(uvm_push_get_size(data->push) + uvm_push_inline_data_size(data) + UVM_METHOD_SIZE + size <= UVM_MAX_PUSH_SIZE, 354 "push size %u inline data size %zu new data size %zu max push %u\n", 355 uvm_push_get_size(data->push), uvm_push_inline_data_size(data), size, UVM_MAX_PUSH_SIZE); 356 UVM_ASSERT_MSG(uvm_push_inline_data_size(data) + size <= UVM_PUSH_INLINE_DATA_MAX_SIZE, 357 "inline data size %zu new data size %zu max %u\n", 358 uvm_push_inline_data_size(data), size, UVM_PUSH_INLINE_DATA_MAX_SIZE); 359 360 data->next_data += size; 361 362 return buffer; 363 } 364 365 void *uvm_push_inline_data_get_aligned(uvm_push_inline_data_t *data, size_t size, size_t alignment) 366 { 367 NvU64 next_ptr = (NvU64)(uintptr_t)data->next_data; 368 size_t offset = 0; 369 char *buffer; 370 371 UVM_ASSERT(alignment <= UVM_PAGE_SIZE_4K); 372 UVM_ASSERT_MSG(IS_ALIGNED(alignment, UVM_METHOD_SIZE), "alignment %zu\n", alignment); 373 374 offset = UVM_ALIGN_UP(next_ptr, alignment) - next_ptr; 375 376 buffer = (char *)uvm_push_inline_data_get(data, size + offset); 377 return buffer + offset; 378 } 379 380 uvm_gpu_address_t uvm_push_inline_data_end(uvm_push_inline_data_t *data) 381 { 382 NvU64 inline_data_address; 383 uvm_push_t *push = data->push; 384 uvm_channel_t *channel = push->channel; 385 386 // Round up the inline data size to the method size 387 size_t noop_size = roundup(uvm_push_inline_data_size(data), UVM_METHOD_SIZE); 388 389 if (channel == NULL) { 390 // Fake push, just return the CPU address. 391 inline_data_address = (NvU64) (uintptr_t)(push->next + 1); 392 } 393 else { 394 // Offset of the inlined data within the push. 395 inline_data_address = (push->next - push->begin + 1) * UVM_METHOD_SIZE; 396 397 // Add GPU VA of the push begin 398 inline_data_address += uvm_pushbuffer_get_gpu_va_for_push(channel->pool->manager->pushbuffer, push); 399 } 400 401 // This will place a noop right before the inline data that was written. 402 // Plus UVM_METHOD_SIZE for the noop method itself. 403 uvm_push_get_gpu(push)->parent->host_hal->noop(push, noop_size + UVM_METHOD_SIZE); 404 405 return uvm_gpu_address_virtual(inline_data_address); 406 } 407 408 void *uvm_push_get_single_inline_buffer(uvm_push_t *push, 409 size_t size, 410 size_t alignment, 411 uvm_gpu_address_t *gpu_address) 412 { 413 uvm_push_inline_data_t data; 414 void *buffer; 415 416 UVM_ASSERT(IS_ALIGNED(alignment, UVM_METHOD_SIZE)); 417 418 uvm_push_inline_data_begin(push, &data); 419 buffer = uvm_push_inline_data_get_aligned(&data, size, alignment); 420 *gpu_address = uvm_push_inline_data_end(&data); 421 422 gpu_address->address = UVM_ALIGN_UP(gpu_address->address, alignment); 423 424 return buffer; 425 } 426 427 NvU64 *uvm_push_timestamp(uvm_push_t *push) 428 { 429 uvm_gpu_t *gpu = uvm_push_get_gpu(push); 430 const size_t timestamp_size = 16; 431 NvU64 *timestamp; 432 uvm_gpu_address_t address; 433 434 timestamp = (NvU64 *)uvm_push_get_single_inline_buffer(push, timestamp_size, timestamp_size, &address); 435 436 // Timestamp is in the second half of the 16 byte semaphore release 437 timestamp += 1; 438 439 if (uvm_channel_is_ce(push->channel)) 440 gpu->parent->ce_hal->semaphore_timestamp(push, address.address); 441 else if (uvm_channel_is_sec2(push->channel)) 442 gpu->parent->sec2_hal->semaphore_timestamp(push, address.address); 443 else 444 UVM_ASSERT_MSG(0, "Semaphore release timestamp on an unsupported channel.\n"); 445 446 return timestamp; 447 } 448 449 bool uvm_push_method_is_valid(uvm_push_t *push, NvU8 subch, NvU32 method_address, NvU32 method_data) 450 { 451 uvm_gpu_t *gpu = uvm_push_get_gpu(push); 452 453 if (subch == UVM_SUBCHANNEL_CE) 454 return gpu->parent->ce_hal->method_is_valid(push, method_address, method_data); 455 else if (subch == UVM_SUBCHANNEL_HOST) 456 return gpu->parent->host_hal->method_is_valid(push, method_address, method_data); 457 else if (subch == UVM_SW_OBJ_SUBCHANNEL) 458 return gpu->parent->host_hal->sw_method_is_valid(push, method_address, method_data); 459 else if (subch == UVM_SUBCHANNEL_SEC2) 460 return true; 461 462 UVM_ERR_PRINT("Unsupported subchannel 0x%x\n", subch); 463 return false; 464 } 465