1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 #ifndef KOKKOS_THREADS_HPP
46 #define KOKKOS_THREADS_HPP
47 
48 #include <Kokkos_Macros.hpp>
49 #if defined(KOKKOS_ENABLE_THREADS)
50 
51 #include <Kokkos_Core_fwd.hpp>
52 
53 #include <cstddef>
54 #include <iosfwd>
55 #include <Kokkos_HostSpace.hpp>
56 #include <Kokkos_ScratchSpace.hpp>
57 #include <Kokkos_Layout.hpp>
58 #include <Kokkos_MemoryTraits.hpp>
59 #include <impl/Kokkos_Profiling_Interface.hpp>
60 #include <impl/Kokkos_Tags.hpp>
61 #include <impl/Kokkos_ExecSpaceInitializer.hpp>
62 
63 /*--------------------------------------------------------------------------*/
64 
65 namespace Kokkos {
66 namespace Impl {
67 class ThreadsExec;
68 }  // namespace Impl
69 }  // namespace Kokkos
70 
71 /*--------------------------------------------------------------------------*/
72 
73 namespace Kokkos {
74 
75 /** \brief  Execution space for a pool of Pthreads or C11 threads on a CPU. */
76 class Threads {
77  public:
78   //! \name Type declarations that all Kokkos devices must provide.
79   //@{
80   //! Tag this class as a kokkos execution space
81   using execution_space = Threads;
82   using memory_space    = Kokkos::HostSpace;
83 
84   //! This execution space preferred device_type
85   using device_type = Kokkos::Device<execution_space, memory_space>;
86 
87   using array_layout = Kokkos::LayoutRight;
88   using size_type    = memory_space::size_type;
89 
90   using scratch_memory_space = ScratchMemorySpace<Threads>;
91 
92   //@}
93   /*------------------------------------------------------------------------*/
94   //! \name Static functions that all Kokkos devices must implement.
95   //@{
96 
97   /// \brief True if and only if this method is being called in a
98   ///   thread-parallel function.
99   static int in_parallel();
100 
101   /// \brief Print configuration information to the given output stream.
102   static void print_configuration(std::ostream&, const bool detail = false);
103 
104   /// \brief Wait until all dispatched functors complete.
105   ///
106   /// The parallel_for or parallel_reduce dispatch of a functor may
107   /// return asynchronously, before the functor completes.  This
108   /// method does not return until all dispatched functors on this
109   /// device have completed.
110   static void impl_static_fence();
111 
112   void fence() const;
113 
114   /** \brief  Return the maximum amount of concurrency.  */
115   static int concurrency();
116 
117   /// \brief Free any resources being consumed by the device.
118   ///
119   /// For the Threads device, this terminates spawned worker threads.
120   static void impl_finalize();
121 
122   //@}
123   /*------------------------------------------------------------------------*/
124   /*------------------------------------------------------------------------*/
125   //! \name Space-specific functions
126   //@{
127 
128   /** \brief Initialize the device in the "ready to work" state.
129    *
130    *  The device is initialized in a "ready to work" or "awake" state.
131    *  This state reduces latency and thus improves performance when
132    *  dispatching work.  However, the "awake" state consumes resources
133    *  even when no work is being done.  You may call sleep() to put
134    *  the device in a "sleeping" state that does not consume as many
135    *  resources, but it will take time (latency) to awaken the device
136    *  again (via the wake()) method so that it is ready for work.
137    *
138    *  Teams of threads are distributed as evenly as possible across
139    *  the requested number of numa regions and cores per numa region.
140    *  A team will not be split across a numa region.
141    *
142    *  If the 'use_' arguments are not supplied the hwloc is queried
143    *  to use all available cores.
144    */
145   static void impl_initialize(unsigned threads_count             = 0,
146                               unsigned use_numa_count            = 0,
147                               unsigned use_cores_per_numa        = 0,
148                               bool allow_asynchronous_threadpool = false);
149 
150   static int impl_is_initialized();
151 
152   static Threads& impl_instance(int = 0);
153 
154   //----------------------------------------
155 
156   static int impl_thread_pool_size(int depth = 0);
157 #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
158   static int impl_thread_pool_rank();
159 #else
impl_thread_pool_rank()160   KOKKOS_INLINE_FUNCTION static int impl_thread_pool_rank() { return 0; }
161 #endif
162 
impl_max_hardware_threads()163   inline static unsigned impl_max_hardware_threads() {
164     return impl_thread_pool_size(0);
165   }
impl_hardware_thread_id()166   KOKKOS_INLINE_FUNCTION static unsigned impl_hardware_thread_id() {
167     return impl_thread_pool_rank();
168   }
169 
impl_instance_id() const170   uint32_t impl_instance_id() const noexcept { return 0; }
171 
172   static const char* name();
173   //@}
174   //----------------------------------------
175 };
176 
177 namespace Tools {
178 namespace Experimental {
179 template <>
180 struct DeviceTypeTraits<Threads> {
181   static constexpr DeviceType id = DeviceType::Threads;
182 };
183 }  // namespace Experimental
184 }  // namespace Tools
185 
186 namespace Impl {
187 
188 class ThreadsSpaceInitializer : public ExecSpaceInitializerBase {
189  public:
190   ThreadsSpaceInitializer()  = default;
191   ~ThreadsSpaceInitializer() = default;
192   void initialize(const InitArguments& args) final;
193   void finalize(const bool) final;
194   void fence() final;
195   void print_configuration(std::ostream& msg, const bool detail) final;
196 };
197 
198 }  // namespace Impl
199 }  // namespace Kokkos
200 
201 /*--------------------------------------------------------------------------*/
202 
203 namespace Kokkos {
204 namespace Impl {
205 
206 template <>
207 struct MemorySpaceAccess<Kokkos::Threads::memory_space,
208                          Kokkos::Threads::scratch_memory_space> {
209   enum : bool { assignable = false };
210   enum : bool { accessible = true };
211   enum : bool { deepcopy = false };
212 };
213 
214 }  // namespace Impl
215 }  // namespace Kokkos
216 
217 /*--------------------------------------------------------------------------*/
218 
219 #include <Kokkos_ExecPolicy.hpp>
220 #include <Kokkos_Parallel.hpp>
221 #include <Threads/Kokkos_ThreadsExec.hpp>
222 #include <Threads/Kokkos_ThreadsTeam.hpp>
223 #include <Threads/Kokkos_Threads_Parallel.hpp>
224 
225 #include <KokkosExp_MDRangePolicy.hpp>
226 
227 //----------------------------------------------------------------------------
228 //----------------------------------------------------------------------------
229 
230 #endif /* #if defined( KOKKOS_ENABLE_THREADS ) */
231 #endif /* #define KOKKOS_THREADS_HPP */
232