1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // The University of Illinois/NCSA
4 // Open Source License (NCSA)
5 //
6 // Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
7 //
8 // Developed by:
9 //
10 // AMD Research and AMD HSA Software Development
11 //
12 // Advanced Micro Devices, Inc.
13 //
14 // www.amd.com
15 //
16 // Permission is hereby granted, free of charge, to any person obtaining a copy
17 // of this software and associated documentation files (the "Software"), to
18 // deal with the Software without restriction, including without limitation
19 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
20 // and/or sell copies of the Software, and to permit persons to whom the
21 // Software is furnished to do so, subject to the following conditions:
22 //
23 // - Redistributions of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimers.
25 // - Redistributions in binary form must reproduce the above copyright
26 // notice, this list of conditions and the following disclaimers in
27 // the documentation and/or other materials provided with the distribution.
28 // - Neither the names of Advanced Micro Devices, Inc,
29 // nor the names of its contributors may be used to endorse or promote
30 // products derived from this Software without specific prior written
31 // permission.
32 //
33 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
36 // THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
37 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
38 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
39 // DEALINGS WITH THE SOFTWARE.
40 //
41 ////////////////////////////////////////////////////////////////////////////////
42
43 #ifndef HSA_RUNTME_CORE_SIGNAL_CPP_
44 #define HSA_RUNTME_CORE_SIGNAL_CPP_
45
46 #include "core/inc/signal.h"
47
48 #include <algorithm>
49 #include "core/util/timer.h"
50
51 namespace core {
52
53 KernelMutex Signal::ipcLock_;
54 std::map<decltype(hsa_signal_t::handle), Signal*> Signal::ipcMap_;
55
registerIpc()56 void Signal::registerIpc() {
57 ScopedAcquire<KernelMutex> lock(&ipcLock_);
58 auto handle = Convert(this);
59 assert(ipcMap_.find(handle.handle) == ipcMap_.end() &&
60 "Can't register the same IPC signal twice.");
61 ipcMap_[handle.handle] = this;
62 }
63
deregisterIpc()64 bool Signal::deregisterIpc() {
65 ScopedAcquire<KernelMutex> lock(&ipcLock_);
66 if (refcount_ != 0) return false;
67 auto handle = Convert(this);
68 const auto& it = ipcMap_.find(handle.handle);
69 assert(it != ipcMap_.end() && "Deregister on non-IPC signal.");
70 ipcMap_.erase(it);
71 return true;
72 }
73
lookupIpc(hsa_signal_t signal)74 Signal* Signal::lookupIpc(hsa_signal_t signal) {
75 ScopedAcquire<KernelMutex> lock(&ipcLock_);
76 const auto& it = ipcMap_.find(signal.handle);
77 if (it == ipcMap_.end()) return nullptr;
78 return it->second;
79 }
80
duplicateIpc(hsa_signal_t signal)81 Signal* Signal::duplicateIpc(hsa_signal_t signal) {
82 ScopedAcquire<KernelMutex> lock(&ipcLock_);
83 const auto& it = ipcMap_.find(signal.handle);
84 if (it == ipcMap_.end()) return nullptr;
85 it->second->refcount_++;
86 it->second->Retain();
87 return it->second;
88 }
89
Release()90 void Signal::Release() {
91 if (--retained_ != 0) return;
92 if (!isIPC())
93 doDestroySignal();
94 else if (deregisterIpc())
95 doDestroySignal();
96 }
97
~Signal()98 Signal::~Signal() {
99 signal_.kind = AMD_SIGNAL_KIND_INVALID;
100 if (refcount_ == 1 && isIPC()) {
101 refcount_ = 0;
102 deregisterIpc();
103 }
104 }
105
WaitAny(uint32_t signal_count,const hsa_signal_t * hsa_signals,const hsa_signal_condition_t * conds,const hsa_signal_value_t * values,uint64_t timeout,hsa_wait_state_t wait_hint,hsa_signal_value_t * satisfying_value)106 uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
107 const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
108 uint64_t timeout, hsa_wait_state_t wait_hint,
109 hsa_signal_value_t* satisfying_value) {
110 hsa_signal_handle* signals =
111 reinterpret_cast<hsa_signal_handle*>(const_cast<hsa_signal_t*>(hsa_signals));
112
113 for (uint32_t i = 0; i < signal_count; i++) signals[i]->Retain();
114
115 MAKE_SCOPE_GUARD([&]() {
116 for (uint32_t i = 0; i < signal_count; i++) signals[i]->Release();
117 });
118
119 uint32_t prior = 0;
120 for (uint32_t i = 0; i < signal_count; i++) prior = Max(prior, signals[i]->waiting_++);
121
122 MAKE_SCOPE_GUARD([&]() {
123 for (uint32_t i = 0; i < signal_count; i++) signals[i]->waiting_--;
124 });
125
126 // Allow only the first waiter to sleep (temporary, known to be bad).
127 if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;
128
129 // Ensure that all signals in the list can be slept on.
130 if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
131 for (uint32_t i = 0; i < signal_count; i++) {
132 if (signals[i]->EopEvent() == NULL) {
133 wait_hint = HSA_WAIT_STATE_ACTIVE;
134 break;
135 }
136 }
137 }
138
139 const uint32_t small_size = 10;
140 HsaEvent* short_evts[small_size];
141 HsaEvent** evts = NULL;
142 uint32_t unique_evts = 0;
143 if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
144 if (signal_count > small_size)
145 evts = new HsaEvent* [signal_count];
146 else
147 evts = short_evts;
148 for (uint32_t i = 0; i < signal_count; i++)
149 evts[i] = signals[i]->EopEvent();
150 std::sort(evts, evts + signal_count);
151 HsaEvent** end = std::unique(evts, evts + signal_count);
152 unique_evts = uint32_t(end - evts);
153 }
154 MAKE_SCOPE_GUARD([&]() {
155 if (signal_count > small_size) delete[] evts;
156 });
157
158 int64_t value;
159
160 timer::fast_clock::time_point start_time = timer::fast_clock::now();
161
162 // Set a polling timeout value
163 const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
164
165 // Convert timeout value into the fast_clock domain
166 uint64_t hsa_freq;
167 HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
168 const timer::fast_clock::duration fast_timeout =
169 timer::duration_from_seconds<timer::fast_clock::duration>(
170 double(timeout) / double(hsa_freq));
171
172 bool condition_met = false;
173 while (true) {
174 for (uint32_t i = 0; i < signal_count; i++) {
175 if (!signals[i]->IsValid()) return uint32_t(-1);
176
177 // Handling special event.
178 if (signals[i]->EopEvent() != NULL) {
179 const HSA_EVENTTYPE event_type =
180 signals[i]->EopEvent()->EventData.EventType;
181 if (event_type == HSA_EVENTTYPE_MEMORY) {
182 const HsaMemoryAccessFault& fault =
183 signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault;
184 const uint32_t* failure =
185 reinterpret_cast<const uint32_t*>(&fault.Failure);
186 if (*failure != 0) {
187 return i;
188 }
189 }
190 }
191
192 value =
193 atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);
194
195 switch (conds[i]) {
196 case HSA_SIGNAL_CONDITION_EQ: {
197 condition_met = (value == values[i]);
198 break;
199 }
200 case HSA_SIGNAL_CONDITION_NE: {
201 condition_met = (value != values[i]);
202 break;
203 }
204 case HSA_SIGNAL_CONDITION_GTE: {
205 condition_met = (value >= values[i]);
206 break;
207 }
208 case HSA_SIGNAL_CONDITION_LT: {
209 condition_met = (value < values[i]);
210 break;
211 }
212 default:
213 return uint32_t(-1);
214 }
215 if (condition_met) {
216 if (satisfying_value != NULL) *satisfying_value = value;
217 return i;
218 }
219 }
220
221 timer::fast_clock::time_point time = timer::fast_clock::now();
222 if (time - start_time > fast_timeout) {
223 return uint32_t(-1);
224 }
225
226 if (wait_hint == HSA_WAIT_STATE_ACTIVE) {
227 continue;
228 }
229
230 if (time - start_time < kMaxElapsed) {
231 // os::uSleep(20);
232 continue;
233 }
234
235 uint32_t wait_ms;
236 auto time_remaining = fast_timeout - (time - start_time);
237 uint64_t ct=timer::duration_cast<std::chrono::milliseconds>(
238 time_remaining).count();
239 wait_ms = (ct>0xFFFFFFFEu) ? 0xFFFFFFFEu : ct;
240 hsaKmtWaitOnMultipleEvents(evts, unique_evts, false, wait_ms);
241 }
242 }
243
SignalGroup(uint32_t num_signals,const hsa_signal_t * hsa_signals)244 SignalGroup::SignalGroup(uint32_t num_signals, const hsa_signal_t* hsa_signals)
245 : count(num_signals) {
246 if (count != 0) {
247 signals = new hsa_signal_t[count];
248 } else {
249 signals = NULL;
250 }
251 if (signals == NULL) return;
252 for (uint32_t i = 0; i < count; i++) signals[i] = hsa_signals[i];
253 }
254
255 } // namespace core
256
257 #endif // header guard
258