1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // The University of Illinois/NCSA
4 // Open Source License (NCSA)
5 //
6 // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
7 //
8 // Developed by:
9 //
10 //                 AMD Research and AMD HSA Software Development
11 //
12 //                 Advanced Micro Devices, Inc.
13 //
14 //                 www.amd.com
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining a copy
17 // of this software and associated documentation files (the "Software"), to
18 // deal with the Software without restriction, including without limitation
19 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 // and/or sell copies of the Software, and to permit persons to whom the
21 // Software is furnished to do so, subject to the following conditions:
22 //
23 //  - Redistributions of source code must retain the above copyright notice,
24 //    this list of conditions and the following disclaimers.
25 //  - Redistributions in binary form must reproduce the above copyright
26 //    notice, this list of conditions and the following disclaimers in
27 //    the documentation and/or other materials provided with the distribution.
28 //  - Neither the names of Advanced Micro Devices, Inc,
29 //    nor the names of its contributors may be used to endorse or promote
30 //    products derived from this Software without specific prior written
31 //    permission.
32 //
33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39 // DEALINGS WITH THE SOFTWARE.
40 //
41 ////////////////////////////////////////////////////////////////////////////////
42 
43 #ifndef HSA_RUNTME_CORE_SIGNAL_CPP_
44 #define HSA_RUNTME_CORE_SIGNAL_CPP_
45 
46 #include "core/inc/signal.h"
47 
48 #include <algorithm>
49 #include "core/util/timer.h"
50 
51 namespace core {
52 
53 KernelMutex Signal::ipcLock_;
54 std::map<decltype(hsa_signal_t::handle), Signal*> Signal::ipcMap_;
55 
registerIpc()56 void Signal::registerIpc() {
57   ScopedAcquire<KernelMutex> lock(&ipcLock_);
58   auto handle = Convert(this);
59   assert(ipcMap_.find(handle.handle) == ipcMap_.end() &&
60          "Can't register the same IPC signal twice.");
61   ipcMap_[handle.handle] = this;
62 }
63 
deregisterIpc()64 bool Signal::deregisterIpc() {
65   ScopedAcquire<KernelMutex> lock(&ipcLock_);
66   if (refcount_ != 0) return false;
67   auto handle = Convert(this);
68   const auto& it = ipcMap_.find(handle.handle);
69   assert(it != ipcMap_.end() && "Deregister on non-IPC signal.");
70   ipcMap_.erase(it);
71   return true;
72 }
73 
lookupIpc(hsa_signal_t signal)74 Signal* Signal::lookupIpc(hsa_signal_t signal) {
75   ScopedAcquire<KernelMutex> lock(&ipcLock_);
76   const auto& it = ipcMap_.find(signal.handle);
77   if (it == ipcMap_.end()) return nullptr;
78   return it->second;
79 }
80 
duplicateIpc(hsa_signal_t signal)81 Signal* Signal::duplicateIpc(hsa_signal_t signal) {
82   ScopedAcquire<KernelMutex> lock(&ipcLock_);
83   const auto& it = ipcMap_.find(signal.handle);
84   if (it == ipcMap_.end()) return nullptr;
85   it->second->refcount_++;
86   it->second->Retain();
87   return it->second;
88 }
89 
Release()90 void Signal::Release() {
91   if (--retained_ != 0) return;
92   if (!isIPC())
93     doDestroySignal();
94   else if (deregisterIpc())
95     doDestroySignal();
96 }
97 
~Signal()98 Signal::~Signal() {
99   signal_.kind = AMD_SIGNAL_KIND_INVALID;
100   if (refcount_ == 1 && isIPC()) {
101     refcount_ = 0;
102     deregisterIpc();
103   }
104 }
105 
WaitAny(uint32_t signal_count,const hsa_signal_t * hsa_signals,const hsa_signal_condition_t * conds,const hsa_signal_value_t * values,uint64_t timeout,hsa_wait_state_t wait_hint,hsa_signal_value_t * satisfying_value)106 uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
107                          const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
108                          uint64_t timeout, hsa_wait_state_t wait_hint,
109                          hsa_signal_value_t* satisfying_value) {
110   hsa_signal_handle* signals =
111       reinterpret_cast<hsa_signal_handle*>(const_cast<hsa_signal_t*>(hsa_signals));
112 
113   for (uint32_t i = 0; i < signal_count; i++) signals[i]->Retain();
114 
115   MAKE_SCOPE_GUARD([&]() {
116     for (uint32_t i = 0; i < signal_count; i++) signals[i]->Release();
117   });
118 
119   uint32_t prior = 0;
120   for (uint32_t i = 0; i < signal_count; i++) prior = Max(prior, signals[i]->waiting_++);
121 
122   MAKE_SCOPE_GUARD([&]() {
123     for (uint32_t i = 0; i < signal_count; i++) signals[i]->waiting_--;
124   });
125 
126   // Allow only the first waiter to sleep (temporary, known to be bad).
127   if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;
128 
129   // Ensure that all signals in the list can be slept on.
130   if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
131     for (uint32_t i = 0; i < signal_count; i++) {
132       if (signals[i]->EopEvent() == NULL) {
133         wait_hint = HSA_WAIT_STATE_ACTIVE;
134         break;
135       }
136     }
137   }
138 
139   const uint32_t small_size = 10;
140   HsaEvent* short_evts[small_size];
141   HsaEvent** evts = NULL;
142   uint32_t unique_evts = 0;
143   if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
144     if (signal_count > small_size)
145       evts = new HsaEvent* [signal_count];
146     else
147       evts = short_evts;
148     for (uint32_t i = 0; i < signal_count; i++)
149       evts[i] = signals[i]->EopEvent();
150     std::sort(evts, evts + signal_count);
151     HsaEvent** end = std::unique(evts, evts + signal_count);
152     unique_evts = uint32_t(end - evts);
153   }
154   MAKE_SCOPE_GUARD([&]() {
155     if (signal_count > small_size) delete[] evts;
156   });
157 
158   int64_t value;
159 
160   timer::fast_clock::time_point start_time = timer::fast_clock::now();
161 
162   // Set a polling timeout value
163   const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
164 
165   // Convert timeout value into the fast_clock domain
166   uint64_t hsa_freq;
167   HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
168   const timer::fast_clock::duration fast_timeout =
169       timer::duration_from_seconds<timer::fast_clock::duration>(
170           double(timeout) / double(hsa_freq));
171 
172   bool condition_met = false;
173   while (true) {
174     for (uint32_t i = 0; i < signal_count; i++) {
175       if (!signals[i]->IsValid()) return uint32_t(-1);
176 
177       // Handling special event.
178       if (signals[i]->EopEvent() != NULL) {
179         const HSA_EVENTTYPE event_type =
180             signals[i]->EopEvent()->EventData.EventType;
181         if (event_type == HSA_EVENTTYPE_MEMORY) {
182           const HsaMemoryAccessFault& fault =
183               signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault;
184           const uint32_t* failure =
185               reinterpret_cast<const uint32_t*>(&fault.Failure);
186           if (*failure != 0) {
187             return i;
188           }
189         }
190       }
191 
192       value =
193           atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);
194 
195       switch (conds[i]) {
196         case HSA_SIGNAL_CONDITION_EQ: {
197           condition_met = (value == values[i]);
198           break;
199         }
200         case HSA_SIGNAL_CONDITION_NE: {
201           condition_met = (value != values[i]);
202           break;
203         }
204         case HSA_SIGNAL_CONDITION_GTE: {
205           condition_met = (value >= values[i]);
206           break;
207         }
208         case HSA_SIGNAL_CONDITION_LT: {
209           condition_met = (value < values[i]);
210           break;
211         }
212         default:
213           return uint32_t(-1);
214       }
215       if (condition_met) {
216         if (satisfying_value != NULL) *satisfying_value = value;
217         return i;
218       }
219     }
220 
221     timer::fast_clock::time_point time = timer::fast_clock::now();
222     if (time - start_time > fast_timeout) {
223       return uint32_t(-1);
224     }
225 
226     if (wait_hint == HSA_WAIT_STATE_ACTIVE) {
227       continue;
228     }
229 
230     if (time - start_time < kMaxElapsed) {
231     //  os::uSleep(20);
232       continue;
233     }
234 
235     uint32_t wait_ms;
236     auto time_remaining = fast_timeout - (time - start_time);
237     uint64_t ct=timer::duration_cast<std::chrono::milliseconds>(
238       time_remaining).count();
239     wait_ms = (ct>0xFFFFFFFEu) ? 0xFFFFFFFEu : ct;
240     hsaKmtWaitOnMultipleEvents(evts, unique_evts, false, wait_ms);
241   }
242 }
243 
SignalGroup(uint32_t num_signals,const hsa_signal_t * hsa_signals)244 SignalGroup::SignalGroup(uint32_t num_signals, const hsa_signal_t* hsa_signals)
245     : count(num_signals) {
246   if (count != 0) {
247     signals = new hsa_signal_t[count];
248   } else {
249     signals = NULL;
250   }
251   if (signals == NULL) return;
252   for (uint32_t i = 0; i < count; i++) signals[i] = hsa_signals[i];
253 }
254 
255 }  // namespace core
256 
257 #endif  // header guard
258