1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // The University of Illinois/NCSA
4 // Open Source License (NCSA)
5 //
6 // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
7 //
8 // Developed by:
9 //
10 //                 AMD Research and AMD HSA Software Development
11 //
12 //                 Advanced Micro Devices, Inc.
13 //
14 //                 www.amd.com
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining a copy
17 // of this software and associated documentation files (the "Software"), to
18 // deal with the Software without restriction, including without limitation
19 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 // and/or sell copies of the Software, and to permit persons to whom the
21 // Software is furnished to do so, subject to the following conditions:
22 //
23 //  - Redistributions of source code must retain the above copyright notice,
24 //    this list of conditions and the following disclaimers.
25 //  - Redistributions in binary form must reproduce the above copyright
26 //    notice, this list of conditions and the following disclaimers in
27 //    the documentation and/or other materials provided with the distribution.
28 //  - Neither the names of Advanced Micro Devices, Inc,
29 //    nor the names of its contributors may be used to endorse or promote
30 //    products derived from this Software without specific prior written
31 //    permission.
32 //
33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39 // DEALINGS WITH THE SOFTWARE.
40 //
41 ////////////////////////////////////////////////////////////////////////////////
42 
43 // HSA runtime C++ interface file.
44 
45 #ifndef HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_
46 #define HSA_RUNTME_CORE_INC_COMMAND_QUEUE_H_
47 #include <sstream>
48 
49 #include "core/common/shared.h"
50 
51 #include "core/inc/checked.h"
52 
53 #include "core/util/utils.h"
54 
55 #include "inc/amd_hsa_queue.h"
56 
57 #include "hsakmt.h"
58 
59 namespace core {
60 struct AqlPacket {
61 
62   union {
63     hsa_kernel_dispatch_packet_t dispatch;
64     hsa_barrier_and_packet_t barrier_and;
65     hsa_barrier_or_packet_t barrier_or;
66     hsa_agent_dispatch_packet_t agent;
67   };
68 
typeAqlPacket69   uint8_t type() const {
70     return ((dispatch.header >> HSA_PACKET_HEADER_TYPE) &
71                       ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1));
72   }
73 
IsValidAqlPacket74   bool IsValid() const {
75     return (type() <= HSA_PACKET_TYPE_BARRIER_OR) & (type() != HSA_PACKET_TYPE_INVALID);
76   }
77 
stringAqlPacket78   std::string string() const {
79     std::stringstream string;
80     uint8_t type = this->type();
81 
82     const char* type_names[] = {
83         "HSA_PACKET_TYPE_VENDOR_SPECIFIC", "HSA_PACKET_TYPE_INVALID",
84         "HSA_PACKET_TYPE_KERNEL_DISPATCH", "HSA_PACKET_TYPE_BARRIER_AND",
85         "HSA_PACKET_TYPE_AGENT_DISPATCH",  "HSA_PACKET_TYPE_BARRIER_OR"};
86 
87     string << "type: " << type_names[type]
88            << "\nbarrier: " << ((dispatch.header >> HSA_PACKET_HEADER_BARRIER) &
89                                 ((1 << HSA_PACKET_HEADER_WIDTH_BARRIER) - 1))
90            << "\nacquire: " << ((dispatch.header >> HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE) &
91                                 ((1 << HSA_PACKET_HEADER_WIDTH_SCACQUIRE_FENCE_SCOPE) - 1))
92            << "\nrelease: " << ((dispatch.header >> HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE) &
93                                 ((1 << HSA_PACKET_HEADER_WIDTH_SCRELEASE_FENCE_SCOPE) - 1));
94 
95     if (type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
96       string << "\nDim: " << dispatch.setup
97              << "\nworkgroup_size: " << dispatch.workgroup_size_x << ", "
98              << dispatch.workgroup_size_y << ", " << dispatch.workgroup_size_z
99              << "\ngrid_size: " << dispatch.grid_size_x << ", "
100              << dispatch.grid_size_y << ", " << dispatch.grid_size_z
101              << "\nprivate_size: " << dispatch.private_segment_size
102              << "\ngroup_size: " << dispatch.group_segment_size
103              << "\nkernel_object: " << dispatch.kernel_object
104              << "\nkern_arg: " << dispatch.kernarg_address
105              << "\nsignal: " << dispatch.completion_signal.handle;
106     }
107 
108     if ((type == HSA_PACKET_TYPE_BARRIER_AND) ||
109         (type == HSA_PACKET_TYPE_BARRIER_OR)) {
110       for (int i = 0; i < 5; i++)
111         string << "\ndep[" << i << "]: " << barrier_and.dep_signal[i].handle;
112       string << "\nsignal: " << barrier_and.completion_signal.handle;
113     }
114 
115     return string.str();
116   }
117 };
118 
119 class Queue;
120 
121 /// @brief Helper structure to simplify conversion of amd_queue_t and
122 /// core::Queue object.
123 struct SharedQueue {
124   amd_queue_t amd_queue;
125   Queue* core_queue;
126 };
127 
128 class LocalQueue {
129  public:
queue()130   SharedQueue* queue() const { return local_queue_.shared_object(); }
131 
132  private:
133   Shared<SharedQueue, AMD_QUEUE_ALIGN_BYTES> local_queue_;
134 };
135 
136 /// @brief Class Queue which encapsulate user mode queues and
137 /// provides Api to access its Read, Write indices using Acquire,
138 /// Release and Relaxed semantics.
139 /*
140 Queue is intended to be an pure interface class and may be wrapped or replaced
141 by tools.
142 All funtions other than Convert and public_handle must be virtual.
143 */
144 class Queue : public Checked<0xFA3906A679F9DB49>, private LocalQueue {
145  public:
Queue()146   Queue() : LocalQueue(), amd_queue_(queue()->amd_queue) {
147     queue()->core_queue = this;
148     public_handle_ = Convert(this);
149   }
150 
~Queue()151   virtual ~Queue() {}
152 
153   /// @brief Returns the handle of Queue's public data type
154   ///
155   /// @param queue Pointer to an instance of Queue implementation object
156   ///
157   /// @return hsa_queue_t * Pointer to the public data type of a queue
Convert(Queue * queue)158   static __forceinline hsa_queue_t* Convert(Queue* queue) {
159     return (queue != nullptr) ? &queue->amd_queue_.hsa_queue : nullptr;
160   }
161 
162   /// @brief Transform the public data type of a Queue's data type into an
163   //  instance of it Queue class object
164   ///
165   /// @param queue Handle of public data type of a queue
166   ///
167   /// @return Queue * Pointer to the Queue's implementation object
Convert(const hsa_queue_t * queue)168   static __forceinline Queue* Convert(const hsa_queue_t* queue) {
169     return (queue != nullptr)
170         ? reinterpret_cast<SharedQueue*>(reinterpret_cast<uintptr_t>(queue) -
171                                          offsetof(SharedQueue, amd_queue.hsa_queue))->core_queue
172         : nullptr;
173   }
174 
175   /// @brief Inactivate the queue object. Once inactivate a
176   /// queue cannot be used anymore and must be destroyed
177   ///
178   /// @return hsa_status_t Status of request
179   virtual hsa_status_t Inactivate() = 0;
180 
181   /// @brief Change the scheduling priority of the queue
182   virtual hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) = 0;
183 
184   /// @brief Reads the Read Index of Queue using Acquire semantics
185   ///
186   /// @return uint64_t Value of Read index
187   virtual uint64_t LoadReadIndexAcquire() = 0;
188 
189   /// @brief Reads the Read Index of Queue using Relaxed semantics
190   ///
191   /// @return uint64_t Value of Read index
192   virtual uint64_t LoadReadIndexRelaxed() = 0;
193 
194   /// @brief Reads the Write Index of Queue using Acquire semantics
195   ///
196   /// @return uint64_t Value of Write index
197   virtual uint64_t LoadWriteIndexAcquire() = 0;
198 
199   /// Reads the Write Index of Queue using Relaxed semantics
200   ///
201   /// @return uint64_t Value of Write index
202   virtual uint64_t LoadWriteIndexRelaxed() = 0;
203 
204   /// @brief Updates the Read Index of Queue using Relaxed semantics
205   ///
206   /// @param value New value of Read index to update
207   virtual void StoreReadIndexRelaxed(uint64_t value) = 0;
208 
209   /// @brief Updates the Read Index of Queue using Release semantics
210   ///
211   /// @param value New value of Read index to update
212   virtual void StoreReadIndexRelease(uint64_t value) = 0;
213 
214   /// @brief Updates the Write Index of Queue using Relaxed semantics
215   ///
216   /// @param value New value of Write index to update
217   virtual void StoreWriteIndexRelaxed(uint64_t value) = 0;
218 
219   /// @brief Updates the Write Index of Queue using Release semantics
220   ///
221   /// @param value New value of Write index to update
222   virtual void StoreWriteIndexRelease(uint64_t value) = 0;
223 
224   /// @brief Compares and swaps Write index using Acquire and Release semantics
225   ///
226   /// @param expected Current value of write index
227   ///
228   /// @param value Value of new write index
229   ///
230   /// @return uint64_t Value of write index before the update
231   virtual uint64_t CasWriteIndexAcqRel(uint64_t expected, uint64_t value) = 0;
232 
233   /// @brief Compares and swaps Write index using Acquire semantics
234   ///
235   /// @param expected Current value of write index
236   ///
237   /// @param value Value of new write index
238   ///
239   /// @return uint64_t Value of write index before the update
240   virtual uint64_t CasWriteIndexAcquire(uint64_t expected, uint64_t value) = 0;
241 
242   /// @brief Compares and swaps Write index using Relaxed semantics
243   ///
244   /// @param expected Current value of write index
245   ///
246   /// @param value Value of new write index
247   ///
248   /// @return uint64_t Value of write index before the update
249   virtual uint64_t CasWriteIndexRelaxed(uint64_t expected, uint64_t value) = 0;
250 
251   /// @brief Compares and swaps Write index using Release semantics
252   ///
253   /// @param expected Current value of write index
254   ///
255   /// @param value Value of new write index
256   ///
257   /// @return uint64_t Value of write index before the update
258   virtual uint64_t CasWriteIndexRelease(uint64_t expected, uint64_t value) = 0;
259 
260   /// @brief Updates the Write index using Acquire and Release semantics
261   ///
262   /// @param value Value of new write index
263   ///
264   /// @return uint64_t Value of write index before the update
265   virtual uint64_t AddWriteIndexAcqRel(uint64_t value) = 0;
266 
267   /// @brief Updates the Write index using Acquire semantics
268   ///
269   /// @param value Value of new write index
270   ///
271   /// @return uint64_t Value of write index before the update
272   virtual uint64_t AddWriteIndexAcquire(uint64_t value) = 0;
273 
274   /// @brief Updates the Write index using Relaxed semantics
275   ///
276   /// @param value Value of new write index
277   ///
278   /// @return uint64_t Value of write index before the update
279   virtual uint64_t AddWriteIndexRelaxed(uint64_t value) = 0;
280 
281   /// @brief Updates the Write index using Release semantics
282   ///
283   /// @param value Value of new write index
284   ///
285   /// @return uint64_t Value of write index before the update
286   virtual uint64_t AddWriteIndexRelease(uint64_t value) = 0;
287 
288   /// @brief Set CU Masking
289   ///
290   /// @param num_cu_mask_count size of mask bit array
291   ///
292   /// @param cu_mask pointer to cu mask
293   ///
294   /// @return hsa_status_t
295   virtual hsa_status_t SetCUMasking(const uint32_t num_cu_mask_count,
296                                     const uint32_t* cu_mask) = 0;
297 
298   // @brief Submits a block of PM4 and waits until it has been executed.
299   virtual void ExecutePM4(uint32_t* cmd_data, size_t cmd_size_b) = 0;
300 
SetProfiling(bool enabled)301   virtual void SetProfiling(bool enabled) {
302     AMD_HSA_BITS_SET(amd_queue_.queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING,
303                      (enabled != 0));
304   }
305 
306   /// @ brief Reports async queue errors to stderr if no other error handler was registered.
307   static void DefaultErrorHandler(hsa_status_t status, hsa_queue_t* source, void* data);
308 
309   // Handle of AMD Queue struct
310   amd_queue_t& amd_queue_;
311 
public_handle()312   hsa_queue_t* public_handle() const { return public_handle_; }
313 
314   typedef void* rtti_t;
315 
IsType(rtti_t id)316   bool IsType(rtti_t id) { return _IsA(id); }
317 
318  protected:
set_public_handle(Queue * ptr,hsa_queue_t * handle)319   static void set_public_handle(Queue* ptr, hsa_queue_t* handle) {
320     ptr->do_set_public_handle(handle);
321   }
do_set_public_handle(hsa_queue_t * handle)322   virtual void do_set_public_handle(hsa_queue_t* handle) {
323     public_handle_ = handle;
324   }
325 
326   virtual bool _IsA(rtti_t id) const = 0;
327 
328   hsa_queue_t* public_handle_;
329 
330  private:
331   DISALLOW_COPY_AND_ASSIGN(Queue);
332 };
333 }
334 
335 #endif  // header guard
336