1 /* global_state.h                  -*-C++-*-
2  *
3  *************************************************************************
4  *
5  *  @copyright
6  *  Copyright (C) 2009-2013, Intel Corporation
7  *  All rights reserved.
8  *
9  *  @copyright
10  *  Redistribution and use in source and binary forms, with or without
11  *  modification, are permitted provided that the following conditions
12  *  are met:
13  *
14  *    * Redistributions of source code must retain the above copyright
15  *      notice, this list of conditions and the following disclaimer.
16  *    * Redistributions in binary form must reproduce the above copyright
17  *      notice, this list of conditions and the following disclaimer in
18  *      the documentation and/or other materials provided with the
19  *      distribution.
20  *    * Neither the name of Intel Corporation nor the names of its
21  *      contributors may be used to endorse or promote products derived
22  *      from this software without specific prior written permission.
23  *
24  *  @copyright
25  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
32  *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33  *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
35  *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  *  POSSIBILITY OF SUCH DAMAGE.
37  **************************************************************************/
38 
39 /**
40  * @file global_state.h
41  *
42  * @brief The global_state_t structure contains most of the global context
43  * maintained by the Intel Cilk runtime.
44  */
45 
46 #ifndef INCLUDED_GLOBAL_STATE_DOT_H
47 #define INCLUDED_GLOBAL_STATE_DOT_H
48 
49 #include <cilk/common.h>
50 
51 #include "frame_malloc.h"
52 #include "stats.h"
53 #include "bug.h"
54 #include "cilk_fiber.h"
55 
56 __CILKRTS_BEGIN_EXTERN_C
57 
58 /**
59  * Non-null place-holder for a stack handle that has no meaningful value.
60  */
61 #define PLACEHOLDER_FIBER  ((cilk_fiber *) -2)
62 
63 /**
64  * States for record_or_replay
65  */
66 enum record_replay_t {
67     RECORD_REPLAY_NONE,
68     RECORD_LOG,
69     REPLAY_LOG
70 };
71 
72 /**
73  * @brief The global state is a structure that is shared by all workers in
74  * Cilk.
75  *
76  * Make the structure ready for use by calling
77  * cilkg_init_global_state() and then cilkg_publish_global_state().
78  *
79  * The same global lock should be held while both of these methods are
80  * called.  These methods are split because it is useful to execute
81  * other runtime initialization code in between.
82  *
83  * After cilkg_publish_global_state() has completed, Cilk runtime
84  * methods may call cilkg_get_global_state() to look at the published
85  * value without holding the global lock.
86  *
87  * Finally, clean up the global state by calling
88  * cilkg_deinit_global_state().  This method should be called only
89  * after all calls to cilkg_get_global_state() have completed, and
90  * while holding the global lock.
91  *
92  * Before initialization and after deinitialization, the fields in the
93  * global state have unspecified values, except for a few special
94  * fields labeled "USER SETTING", which can be read and written before
95  * initialization and after deinitialization.
96  */
97 
98 struct global_state_t { /* COMMON_PORTABLE */
99 
100     /* Fields described as "(fixed)" should not be changed after
101      * initialization.
102      */
103 
104     /*************************************************************************
105      * Note that debugger integration must reach into the
106      * global state!  The debugger integration is depending on the
107      * offsets of the addr_size, system_workers, total_workers,
108      * stealing_disabled, sysdep, and workers.  If these offsets change, the
109      * debugger integration library will need to be changed to match!!!
110      *************************************************************************/
111 
112     int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
113 
114     int system_workers; ///< Number of system workers (fixed)
115 
116     /**
117      * @brief USER SETTING: Maximum number of user workers that can be
118      * bound to cilk workers.
119      *
120      * 0 unless set by user.  Call cilkg_calc_max_user_workers to get
121      * the value.
122      */
123     int max_user_workers;
124 
125     int total_workers;  ///< Total number of worker threads allocated (fixed)
126 
127     int workers_running; ///< True when system workers have beens started */
128 
129     /// Set by debugger to disable stealing (fixed)
130     int stealing_disabled;
131 
132     /// System-dependent part of the global state
133     struct global_sysdep_state *sysdep;
134 
135     /// Array of worker structures.
136     __cilkrts_worker **workers;
137 
138     /******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
139 
140     /// Number of frames in each worker's lazy task queue
141     __STDNS size_t ltqsize;
142 
143     /**
144      * @brief USER SETTING: Force all possible reductions.
145      *
146      * TRUE if running a p-tool that requires reducers to call the reduce()
147      * method even if no actual stealing occurs.
148      *
149      * When set to TRUE, runtime will simulate steals, forcing calls to the
150      * the reduce() methods of reducers.
151      *
152      */
153     int force_reduce;
154 
155     /// USER SETTING: Per-worker fiber pool size
156     int fiber_pool_size;
157 
158     /// USER SETTING: Global fiber pool size
159     int global_fiber_pool_size;
160 
161     /**
162      * @brief TRUE when workers should exit scheduling loop so we can
163      * shut down the runtime and free the global state.
164      *
165      * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
166      * by idle workers.  We need to ensure that it's not in a cache line which
167      * may be invalidated by other cores.  The surrounding fields are either
168      * constant after initialization or not used until shutdown (stats) so we
169      * should be OK.
170      */
171     volatile int work_done;
172 
173     int under_ptool;     ///< True when running under a serial PIN tool
174 
175     statistics stats;    ///< Statistics on use of runtime
176 
177     /**
178      * @brief USER SETTING: Maximum number of stacks the runtime will
179      * allocate (apart from those created by the OS when worker
180      * threads are created).
181      *
182      * If max_stacks == 0,there is no pre-defined maximum.
183      */
184     unsigned max_stacks;
185 
186     /// Size of each stack
187     size_t stack_size;
188 
189     /// Global cache for per-worker memory
190     struct __cilkrts_frame_cache frame_malloc;
191 
192     /// Global fiber pool
193     cilk_fiber_pool fiber_pool;
194 
195 
196     /**
197      * @brief Track whether the runtime has failed to allocate a
198      * stack.
199      *
200      * Setting this flag prevents multiple warnings from being
201      * issued.
202      */
203     int failure_to_allocate_stack;
204 
205     /**
206      * @brief USER SETTING: indicate record or replay log.
207      * Set to NULL if not used in this run.
208      */
209     char *record_replay_file_name;
210 
211     /**
212      * @brief Record/replay state.
213      * Valid states are:
214      *   RECORD_REPLAY_NONE - Not recording or replaying a log
215      *   RECORD_LOG - Recording a log for replay later
216      *   REPLAY_LOG - Replay a log recorded earlier
217      */
218     enum record_replay_t record_or_replay;
219 
220     /**
221      * @brief Buffer to force max_steal_failures to appear on a
222      * different cache line from the previous member variables.
223      *
224      * This padding is needed because max_steal_failures is read
225      * constantly and other modified values in the global state will
226      * cause thrashing.
227      */
228     char cache_buf[64];
229 
230     /**
231      * @brief Maximum number of times a thread should fail to steal
232      * before checking if Cilk is shutting down.
233      */
234     unsigned int max_steal_failures;
235 
236     /// Pointer to scheduler entry point
237     void (*scheduler)(__cilkrts_worker *w);
238 
239     /**
240      * @brief Buffer to force P and Q to appear on a different cache
241      * line from the previous member variables.
242      */
243     char cache_buf_2[64];
244 
245     int P;         ///< USER SETTING: number of system workers + 1 (fixed)
246     int Q;         ///< Number of user threads currently bound to workers
247 };
248 
249 /**
250  * @brief Initialize the global state object.  This method must both
251  * complete before referencing any fields in the global state, except
252  * those specified as "user-settable values".
253  */
254 global_state_t* cilkg_init_global_state();
255 
256 /**
257  * @brief Publish the global state object, so that
258  * cilkg_is_published can return true.
259  *
260  * @param g - the global state created by cilkg_init_global_state() to
261  * publish.
262  *
263  * After the global state object has been published, a thread should
264  * not modify this state unless it has exclusive access (i.e., holds
265  * the global lock).
266  */
267 void cilkg_publish_global_state(global_state_t* g);
268 
269 /**
270  * @brief Return true if the global state has been fully initialized
271  * and published, and has not been deinitialized.
272  */
273 int cilkg_is_published(void);
274 
275 /**
276  * @brief De-initializes the global state object.  Must be called to free
277  * resources when the global state is no longer needed.
278  */
279 void cilkg_deinit_global_state(void);
280 
281 /**
282  * @brief Returns the global state object.  Result is valid only if the
283  * global state has been published (see cilkg_publish_global_state()).
284  */
285 static inline
cilkg_get_global_state(void)286 global_state_t* cilkg_get_global_state(void)
287 {
288     // "private" extern declaration:
289     extern global_state_t *cilkg_singleton_ptr;
290 
291     __CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
292     return cilkg_singleton_ptr;
293 }
294 
295 
296 /**
297  * @brief Implementation of __cilkrts_set_params.
298  *
299  * Set user controllable parameters
300  * @param param - string specifying parameter to be set
301  * @param value - string specifying new value
302  * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
303  *    CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
304  *    CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
305  *
306  * @attention The wide character version __cilkrts_set_param_w() is available
307  * only on Windows.
308  *
309  * Allowable parameter names:
310  *
311  * - "nworkers" - number of processors that should run Cilk code.
312  *   The value is a string of digits to be parsed by strtol.
313  *
314  * - "force reduce" - test reducer callbacks by allocating new views
315  *   for every spawn within which a reducer is accessed.  This can
316  *   significantly reduce performance.  The value is "1" or "true"
317  *   to enable, "0" or "false" to disable.
318  *   @warning Enabling "force reduce" when running with more than a single
319  *   worker is currently broken.
320  *
321  * - "max user workers" - (Not publicly documented) Sets the number of slots
322  *   allocated for user worker threads
323  *
324  * - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
325  *   the per-worker stack cache.  Range 1 .. 42.  See
326  *   cilkg_init_global_state for details.
327  *
328  * - "shared stacks" - (Not publicly documented) Maximum number of stacks
329  *   we'll hold in the global stack cache. Maximum value is 42.  See
330  *   __cilkrts_make_global_state for details
331  *
332  * - "nstacks" - (Not publicly documented at this time, though it may be
333  *   exposed in the future) Sets the maximum number of stacks permitted at one
334  *   time.  If the runtime reaches this maximum, it will cease to allocate
335  *   stacks and the app will lose parallelism.  0 means unlimited.  Default is
336  *   unlimited.  Minimum is twice the number of worker threads, though that
337  *   cannot be tested at this time.
338  */
339 int cilkg_set_param(const char* param, const char* value);
340 #ifdef _WIN32
341 /**
342  * @brief Implementation of __cilkrts_set_params for Unicode characters on
343  * Windows.  See the documentation on @ref cilkg_set_param for more details.
344  *
345  * Set user controllable parameters
346  * @param param - string specifying parameter to be set
347  * @param value - string specifying new value
348  * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
349  *    CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
350  *    CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
351  */
352 int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
353 #endif
354 
355 /**
356  * @brief implementation of __cilkrts_get_nworkers()
357  */
358 static inline
cilkg_get_nworkers(void)359 int cilkg_get_nworkers(void)
360 {
361     // "private" extern declaration
362     extern global_state_t* cilkg_get_user_settable_values(void);
363     return cilkg_get_user_settable_values()->P;
364 }
365 
366 /**
367  * @brief implementation of __cilkrts_get_total_workers()
368  */
369 static inline
cilkg_get_total_workers(void)370 int cilkg_get_total_workers(void)
371 {
372     // "private" extern declaration
373     extern int cilkg_calc_total_workers(void);
374 
375     // This number can fluctate until initialization so we
376     // compute it from scratch
377     return cilkg_calc_total_workers();
378 }
379 
380 /**
381  * @brief implementation of __cilkrts_get_force_reduce()
382  */
383 static inline
cilkg_get_force_reduce(void)384 int cilkg_get_force_reduce(void)
385 {
386     // "private" extern declaration
387     extern global_state_t* cilkg_get_user_settable_values(void);
388     return cilkg_get_user_settable_values()->force_reduce;
389 }
390 
391 /**
392  * @brief implementation of __cilkrts_get_stack_size()
393  */
394 static inline
cilkg_get_stack_size(void)395 size_t cilkg_get_stack_size(void)
396 {
397     // "private" extern declaration
398     extern global_state_t* cilkg_get_user_settable_values(void);
399     return cilkg_get_user_settable_values()->stack_size;
400 }
401 
402 /**
403  * @brief Run the scheduler function stored in the global_state
404  *
405  * Look up the scheduler function in global_state and run it.  Report a fatal
406  * error if an exception escapes the scheduler function.
407  *
408  * @param w - Worker structure to associate with the current thread.
409  *
410  * @attention The scheduler field of the global state must be set before this
411  * function is called.
412  */
413 void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
414 
415 __CILKRTS_END_EXTERN_C
416 
417 #endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)
418