1 //////////////////////////////////////////////////////////////////////////////// 2 // 3 // The University of Illinois/NCSA 4 // Open Source License (NCSA) 5 // 6 // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. 7 // 8 // Developed by: 9 // 10 // AMD Research and AMD HSA Software Development 11 // 12 // Advanced Micro Devices, Inc. 13 // 14 // www.amd.com 15 // 16 // Permission is hereby granted, free of charge, to any person obtaining a copy 17 // of this software and associated documentation files (the "Software"), to 18 // deal with the Software without restriction, including without limitation 19 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 20 // and/or sell copies of the Software, and to permit persons to whom the 21 // Software is furnished to do so, subject to the following conditions: 22 // 23 // - Redistributions of source code must retain the above copyright notice, 24 // this list of conditions and the following disclaimers. 25 // - Redistributions in binary form must reproduce the above copyright 26 // notice, this list of conditions and the following disclaimers in 27 // the documentation and/or other materials provided with the distribution. 28 // - Neither the names of Advanced Micro Devices, Inc, 29 // nor the names of its contributors may be used to endorse or promote 30 // products derived from this Software without specific prior written 31 // permission. 32 // 33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 39 // DEALINGS WITH THE SOFTWARE. 40 // 41 //////////////////////////////////////////////////////////////////////////////// 42 43 // HSA runtime C++ interface file. 44 45 #ifndef HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_ 46 #define HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_ 47 #include <sstream> 48 49 #include "core/common/shared.h" 50 51 #include "core/inc/checked.h" 52 53 #include "core/util/utils.h" 54 55 #include "inc/amd_hsa_queue.h" 56 57 #include "hsakmt.h" 58 59 namespace core { 60 struct AqlPacket { 61 62 union { 63 hsa_kernel_dispatch_packet_t dispatch; 64 hsa_barrier_and_packet_t barrier_and; 65 hsa_barrier_or_packet_t barrier_or; 66 hsa_agent_dispatch_packet_t agent; 67 }; 68 typeAqlPacket69 uint8_t type() const { 70 return ((dispatch.header >> HSA_PACKET_HEADER_TYPE) & 71 ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1)); 72 } 73 IsValidAqlPacket74 bool IsValid() const { 75 return (type() <= HSA_PACKET_TYPE_BARRIER_OR) & (type() != HSA_PACKET_TYPE_INVALID); 76 } 77 stringAqlPacket78 std::string string() const { 79 std::stringstream string; 80 uint8_t type = this->type(); 81 82 const char* type_names[] = { 83 "HSA_PACKET_TYPE_VENDOR_SPECIFIC", "HSA_PACKET_TYPE_INVALID", 84 "HSA_PACKET_TYPE_KERNEL_DISPATCH", "HSA_PACKET_TYPE_BARRIER_AND", 85 "HSA_PACKET_TYPE_AGENT_DISPATCH", "HSA_PACKET_TYPE_BARRIER_OR"}; 86 87 string << "type: " << type_names[type] 88 << "\nbarrier: " << ((dispatch.header >> HSA_PACKET_HEADER_BARRIER) & 89 ((1 << HSA_PACKET_HEADER_WIDTH_BARRIER) - 1)) 90 << "\nacquire: " << ((dispatch.header >> HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) & 91 ((1 << HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE) - 1)) 92 << "\nrelease: " << ((dispatch.header >> HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE) & 93 ((1 << HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE) - 1)); 94 95 if (type == HSA_PACKET_TYPE_KERNEL_DISPATCH) { 96 string << "\nDim: " << dispatch.setup 97 << "\nworkgroup_size: " << dispatch.workgroup_size_x << ", " 98 << dispatch.workgroup_size_y << ", " << dispatch.workgroup_size_z 99 << "\ngrid_size: " << dispatch.grid_size_x << ", " 100 << dispatch.grid_size_y << ", " << dispatch.grid_size_z 101 << "\nprivate_size: " << dispatch.private_segment_size 102 << "\ngroup_size: " << dispatch.group_segment_size 103 << "\nkernel_object: " << dispatch.kernel_object 104 << "\nkern_arg: " << dispatch.kernarg_address 105 << "\nsignal: " << dispatch.completion_signal.handle; 106 } 107 108 if ((type == HSA_PACKET_TYPE_BARRIER_AND) || 109 (type == HSA_PACKET_TYPE_BARRIER_OR)) { 110 for (int i = 0; i < 5; i++) 111 string << "\ndep[" << i << "]: " << barrier_and.dep_signal[i].handle; 112 string << "\nsignal: " << barrier_and.completion_signal.handle; 113 } 114 115 return string.str(); 116 } 117 }; 118 119 class Queue; 120 121 /// @brief Helper structure to simplify conversion of amd_queue_t and 122 /// core::Queue object. 123 struct SharedQueue { 124 amd_queue_t amd_queue; 125 Queue* core_queue; 126 }; 127 128 class LocalQueue { 129 public: queue()130 SharedQueue* queue() const { return local_queue_.shared_object(); } 131 132 private: 133 Shared<SharedQueue, AMD_QUEUE_ALIGN_BYTES> local_queue_; 134 }; 135 136 /// @brief Class Queue which encapsulate user mode queues and 137 /// provides Api to access its Read, Write indices using Acquire, 138 /// Release and Relaxed semantics. 139 /* 140 Queue is intended to be an pure interface class and may be wrapped or replaced 141 by tools. 142 All funtions other than Convert and public_handle must be virtual. 143 */ 144 class Queue : public Checked<0xFA3906A679F9DB49>, private LocalQueue { 145 public: Queue()146 Queue() : LocalQueue(), amd_queue_(queue()->amd_queue) { 147 queue()->core_queue = this; 148 public_handle_ = Convert(this); 149 } 150 ~Queue()151 virtual ~Queue() {} 152 153 /// @brief Returns the handle of Queue's public data type 154 /// 155 /// @param queue Pointer to an instance of Queue implementation object 156 /// 157 /// @return hsa_queue_t * Pointer to the public data type of a queue Convert(Queue * queue)158 static __forceinline hsa_queue_t* Convert(Queue* queue) { 159 return (queue != nullptr) ? &queue->amd_queue_.hsa_queue : nullptr; 160 } 161 162 /// @brief Transform the public data type of a Queue's data type into an 163 // instance of it Queue class object 164 /// 165 /// @param queue Handle of public data type of a queue 166 /// 167 /// @return Queue * Pointer to the Queue's implementation object Convert(const hsa_queue_t * queue)168 static __forceinline Queue* Convert(const hsa_queue_t* queue) { 169 return (queue != nullptr) 170 ? reinterpret_cast<SharedQueue*>(reinterpret_cast<uintptr_t>(queue) - 171 offsetof(SharedQueue, amd_queue.hsa_queue))->core_queue 172 : nullptr; 173 } 174 175 /// @brief Inactivate the queue object. Once inactivate a 176 /// queue cannot be used anymore and must be destroyed 177 /// 178 /// @return hsa_status_t Status of request 179 virtual hsa_status_t Inactivate() = 0; 180 181 /// @brief Change the scheduling priority of the queue 182 virtual hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) = 0; 183 184 /// @brief Reads the Read Index of Queue using Acquire semantics 185 /// 186 /// @return uint64_t Value of Read index 187 virtual uint64_t LoadReadIndexAcquire() = 0; 188 189 /// @brief Reads the Read Index of Queue using Relaxed semantics 190 /// 191 /// @return uint64_t Value of Read index 192 virtual uint64_t LoadReadIndexRelaxed() = 0; 193 194 /// @brief Reads the Write Index of Queue using Acquire semantics 195 /// 196 /// @return uint64_t Value of Write index 197 virtual uint64_t LoadWriteIndexAcquire() = 0; 198 199 /// Reads the Write Index of Queue using Relaxed semantics 200 /// 201 /// @return uint64_t Value of Write index 202 virtual uint64_t LoadWriteIndexRelaxed() = 0; 203 204 /// @brief Updates the Read Index of Queue using Relaxed semantics 205 /// 206 /// @param value New value of Read index to update 207 virtual void StoreReadIndexRelaxed(uint64_t value) = 0; 208 209 /// @brief Updates the Read Index of Queue using Release semantics 210 /// 211 /// @param value New value of Read index to update 212 virtual void StoreReadIndexRelease(uint64_t value) = 0; 213 214 /// @brief Updates the Write Index of Queue using Relaxed semantics 215 /// 216 /// @param value New value of Write index to update 217 virtual void StoreWriteIndexRelaxed(uint64_t value) = 0; 218 219 /// @brief Updates the Write Index of Queue using Release semantics 220 /// 221 /// @param value New value of Write index to update 222 virtual void StoreWriteIndexRelease(uint64_t value) = 0; 223 224 /// @brief Compares and swaps Write index using Acquire and Release semantics 225 /// 226 /// @param expected Current value of write index 227 /// 228 /// @param value Value of new write index 229 /// 230 /// @return uint64_t Value of write index before the update 231 virtual uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) = 0; 232 233 /// @brief Compares and swaps Write index using Acquire semantics 234 /// 235 /// @param expected Current value of write index 236 /// 237 /// @param value Value of new write index 238 /// 239 /// @return uint64_t Value of write index before the update 240 virtual uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) = 0; 241 242 /// @brief Compares and swaps Write index using Relaxed semantics 243 /// 244 /// @param expected Current value of write index 245 /// 246 /// @param value Value of new write index 247 /// 248 /// @return uint64_t Value of write index before the update 249 virtual uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) = 0; 250 251 /// @brief Compares and swaps Write index using Release semantics 252 /// 253 /// @param expected Current value of write index 254 /// 255 /// @param value Value of new write index 256 /// 257 /// @return uint64_t Value of write index before the update 258 virtual uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) = 0; 259 260 /// @brief Updates the Write index using Acquire and Release semantics 261 /// 262 /// @param value Value of new write index 263 /// 264 /// @return uint64_t Value of write index before the update 265 virtual uint64_t AddWriteIndexAcqRel(uint64_t value) = 0; 266 267 /// @brief Updates the Write index using Acquire semantics 268 /// 269 /// @param value Value of new write index 270 /// 271 /// @return uint64_t Value of write index before the update 272 virtual uint64_t AddWriteIndexAcquire(uint64_t value) = 0; 273 274 /// @brief Updates the Write index using Relaxed semantics 275 /// 276 /// @param value Value of new write index 277 /// 278 /// @return uint64_t Value of write index before the update 279 virtual uint64_t AddWriteIndexRelaxed(uint64_t value) = 0; 280 281 /// @brief Updates the Write index using Release semantics 282 /// 283 /// @param value Value of new write index 284 /// 285 /// @return uint64_t Value of write index before the update 286 virtual uint64_t AddWriteIndexRelease(uint64_t value) = 0; 287 288 /// @brief Set CU Masking 289 /// 290 /// @param num_cu_mask_count size of mask bit array 291 /// 292 /// @param cu_mask pointer to cu mask 293 /// 294 /// @return hsa_status_t 295 virtual hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count, 296 const uint32_t* cu_mask) = 0; 297 298 // @brief Submits a block of PM4 and waits until it has been executed. 299 virtual void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) = 0; 300 SetProfiling(bool enabled)301 virtual void SetProfiling(bool enabled) { 302 AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING, 303 (enabled != 0)); 304 } 305 306 /// @ brief Reports async queue errors to stderr if no other error handler was registered. 307 static void DefaultErrorHandler(hsa_status_t status, hsa_queue_t* source, void* data); 308 309 // Handle of AMD Queue struct 310 amd_queue_t& amd_queue_; 311 public_handle()312 hsa_queue_t* public_handle() const { return public_handle_; } 313 314 typedef void* rtti_t; 315 IsType(rtti_t id)316 bool IsType(rtti_t id) { return _IsA(id); } 317 318 protected: set_public_handle(Queue * ptr,hsa_queue_t * handle)319 static void set_public_handle(Queue* ptr, hsa_queue_t* handle) { 320 ptr->do_set_public_handle(handle); 321 } do_set_public_handle(hsa_queue_t * handle)322 virtual void do_set_public_handle(hsa_queue_t* handle) { 323 public_handle_ = handle; 324 } 325 326 virtual bool _IsA(rtti_t id) const = 0; 327 328 hsa_queue_t* public_handle_; 329 330 private: 331 DISALLOW_COPY_AND_ASSIGN(Queue); 332 }; 333 } 334 335 #endif // header guard 336