1 /* global_state.h -*-C++-*-
2 *
3 *************************************************************************
4 *
5 * @copyright
6 * Copyright (C) 2009-2013, Intel Corporation
7 * All rights reserved.
8 *
9 * @copyright
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 * * Neither the name of Intel Corporation nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 *
24 * @copyright
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
32 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
35 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 **************************************************************************/
38
39 /**
40 * @file global_state.h
41 *
42 * @brief The global_state_t structure contains most of the global context
43 * maintained by the Intel Cilk runtime.
44 */
45
46 #ifndef INCLUDED_GLOBAL_STATE_DOT_H
47 #define INCLUDED_GLOBAL_STATE_DOT_H
48
49 #include <cilk/common.h>
50
51 #include "frame_malloc.h"
52 #include "stats.h"
53 #include "bug.h"
54 #include "cilk_fiber.h"
55
56 __CILKRTS_BEGIN_EXTERN_C
57
58 /**
59 * Non-null place-holder for a stack handle that has no meaningful value.
60 */
61 #define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
62
63 /**
64 * States for record_or_replay
65 */
66 enum record_replay_t {
67 RECORD_REPLAY_NONE,
68 RECORD_LOG,
69 REPLAY_LOG
70 };
71
72 /**
73 * @brief The global state is a structure that is shared by all workers in
74 * Cilk.
75 *
76 * Make the structure ready for use by calling
77 * cilkg_init_global_state() and then cilkg_publish_global_state().
78 *
79 * The same global lock should be held while both of these methods are
80 * called. These methods are split because it is useful to execute
81 * other runtime initialization code in between.
82 *
83 * After cilkg_publish_global_state() has completed, Cilk runtime
84 * methods may call cilkg_get_global_state() to look at the published
85 * value without holding the global lock.
86 *
87 * Finally, clean up the global state by calling
88 * cilkg_deinit_global_state(). This method should be called only
89 * after all calls to cilkg_get_global_state() have completed, and
90 * while holding the global lock.
91 *
92 * Before initialization and after deinitialization, the fields in the
93 * global state have unspecified values, except for a few special
94 * fields labeled "USER SETTING", which can be read and written before
95 * initialization and after deinitialization.
96 */
97
98 struct global_state_t { /* COMMON_PORTABLE */
99
100 /* Fields described as "(fixed)" should not be changed after
101 * initialization.
102 */
103
104 /*************************************************************************
105 * Note that debugger integration must reach into the
106 * global state! The debugger integration is depending on the
107 * offsets of the addr_size, system_workers, total_workers,
108 * stealing_disabled, sysdep, and workers. If these offsets change, the
109 * debugger integration library will need to be changed to match!!!
110 *************************************************************************/
111
112 int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
113
114 int system_workers; ///< Number of system workers (fixed)
115
116 /**
117 * @brief USER SETTING: Maximum number of user workers that can be
118 * bound to cilk workers.
119 *
120 * 0 unless set by user. Call cilkg_calc_max_user_workers to get
121 * the value.
122 */
123 int max_user_workers;
124
125 int total_workers; ///< Total number of worker threads allocated (fixed)
126
127 int workers_running; ///< True when system workers have beens started */
128
129 /// Set by debugger to disable stealing (fixed)
130 int stealing_disabled;
131
132 /// System-dependent part of the global state
133 struct global_sysdep_state *sysdep;
134
135 /// Array of worker structures.
136 __cilkrts_worker **workers;
137
138 /******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
139
140 /// Number of frames in each worker's lazy task queue
141 __STDNS size_t ltqsize;
142
143 /**
144 * @brief USER SETTING: Force all possible reductions.
145 *
146 * TRUE if running a p-tool that requires reducers to call the reduce()
147 * method even if no actual stealing occurs.
148 *
149 * When set to TRUE, runtime will simulate steals, forcing calls to the
150 * the reduce() methods of reducers.
151 *
152 */
153 int force_reduce;
154
155 /// USER SETTING: Per-worker fiber pool size
156 int fiber_pool_size;
157
158 /// USER SETTING: Global fiber pool size
159 int global_fiber_pool_size;
160
161 /**
162 * @brief TRUE when workers should exit scheduling loop so we can
163 * shut down the runtime and free the global state.
164 *
165 * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
166 * by idle workers. We need to ensure that it's not in a cache line which
167 * may be invalidated by other cores. The surrounding fields are either
168 * constant after initialization or not used until shutdown (stats) so we
169 * should be OK.
170 */
171 volatile int work_done;
172
173 int under_ptool; ///< True when running under a serial PIN tool
174
175 statistics stats; ///< Statistics on use of runtime
176
177 /**
178 * @brief USER SETTING: Maximum number of stacks the runtime will
179 * allocate (apart from those created by the OS when worker
180 * threads are created).
181 *
182 * If max_stacks == 0,there is no pre-defined maximum.
183 */
184 unsigned max_stacks;
185
186 /// Size of each stack
187 size_t stack_size;
188
189 /// Global cache for per-worker memory
190 struct __cilkrts_frame_cache frame_malloc;
191
192 /// Global fiber pool
193 cilk_fiber_pool fiber_pool;
194
195
196 /**
197 * @brief Track whether the runtime has failed to allocate a
198 * stack.
199 *
200 * Setting this flag prevents multiple warnings from being
201 * issued.
202 */
203 int failure_to_allocate_stack;
204
205 /**
206 * @brief USER SETTING: indicate record or replay log.
207 * Set to NULL if not used in this run.
208 */
209 char *record_replay_file_name;
210
211 /**
212 * @brief Record/replay state.
213 * Valid states are:
214 * RECORD_REPLAY_NONE - Not recording or replaying a log
215 * RECORD_LOG - Recording a log for replay later
216 * REPLAY_LOG - Replay a log recorded earlier
217 */
218 enum record_replay_t record_or_replay;
219
220 /**
221 * @brief Buffer to force max_steal_failures to appear on a
222 * different cache line from the previous member variables.
223 *
224 * This padding is needed because max_steal_failures is read
225 * constantly and other modified values in the global state will
226 * cause thrashing.
227 */
228 char cache_buf[64];
229
230 /**
231 * @brief Maximum number of times a thread should fail to steal
232 * before checking if Cilk is shutting down.
233 */
234 unsigned int max_steal_failures;
235
236 /// Pointer to scheduler entry point
237 void (*scheduler)(__cilkrts_worker *w);
238
239 /**
240 * @brief Buffer to force P and Q to appear on a different cache
241 * line from the previous member variables.
242 */
243 char cache_buf_2[64];
244
245 int P; ///< USER SETTING: number of system workers + 1 (fixed)
246 int Q; ///< Number of user threads currently bound to workers
247 };
248
249 /**
250 * @brief Initialize the global state object. This method must both
251 * complete before referencing any fields in the global state, except
252 * those specified as "user-settable values".
253 */
254 global_state_t* cilkg_init_global_state();
255
256 /**
257 * @brief Publish the global state object, so that
258 * cilkg_is_published can return true.
259 *
260 * @param g - the global state created by cilkg_init_global_state() to
261 * publish.
262 *
263 * After the global state object has been published, a thread should
264 * not modify this state unless it has exclusive access (i.e., holds
265 * the global lock).
266 */
267 void cilkg_publish_global_state(global_state_t* g);
268
269 /**
270 * @brief Return true if the global state has been fully initialized
271 * and published, and has not been deinitialized.
272 */
273 int cilkg_is_published(void);
274
275 /**
276 * @brief De-initializes the global state object. Must be called to free
277 * resources when the global state is no longer needed.
278 */
279 void cilkg_deinit_global_state(void);
280
281 /**
282 * @brief Returns the global state object. Result is valid only if the
283 * global state has been published (see cilkg_publish_global_state()).
284 */
285 static inline
cilkg_get_global_state(void)286 global_state_t* cilkg_get_global_state(void)
287 {
288 // "private" extern declaration:
289 extern global_state_t *cilkg_singleton_ptr;
290
291 __CILKRTS_ASSERT(cilkg_singleton_ptr); // Debug only
292 return cilkg_singleton_ptr;
293 }
294
295
296 /**
297 * @brief Implementation of __cilkrts_set_params.
298 *
299 * Set user controllable parameters
300 * @param param - string specifying parameter to be set
301 * @param value - string specifying new value
302 * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
303 * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
304 * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
305 *
306 * @attention The wide character version __cilkrts_set_param_w() is available
307 * only on Windows.
308 *
309 * Allowable parameter names:
310 *
311 * - "nworkers" - number of processors that should run Cilk code.
312 * The value is a string of digits to be parsed by strtol.
313 *
314 * - "force reduce" - test reducer callbacks by allocating new views
315 * for every spawn within which a reducer is accessed. This can
316 * significantly reduce performance. The value is "1" or "true"
317 * to enable, "0" or "false" to disable.
318 * @warning Enabling "force reduce" when running with more than a single
319 * worker is currently broken.
320 *
321 * - "max user workers" - (Not publicly documented) Sets the number of slots
322 * allocated for user worker threads
323 *
324 * - "local stacks" - (Not publicly documented) Number of stacks we'll hold in
325 * the per-worker stack cache. Range 1 .. 42. See
326 * cilkg_init_global_state for details.
327 *
328 * - "shared stacks" - (Not publicly documented) Maximum number of stacks
329 * we'll hold in the global stack cache. Maximum value is 42. See
330 * __cilkrts_make_global_state for details
331 *
332 * - "nstacks" - (Not publicly documented at this time, though it may be
333 * exposed in the future) Sets the maximum number of stacks permitted at one
334 * time. If the runtime reaches this maximum, it will cease to allocate
335 * stacks and the app will lose parallelism. 0 means unlimited. Default is
336 * unlimited. Minimum is twice the number of worker threads, though that
337 * cannot be tested at this time.
338 */
339 int cilkg_set_param(const char* param, const char* value);
340 #ifdef _WIN32
341 /**
342 * @brief Implementation of __cilkrts_set_params for Unicode characters on
343 * Windows. See the documentation on @ref cilkg_set_param for more details.
344 *
345 * Set user controllable parameters
346 * @param param - string specifying parameter to be set
347 * @param value - string specifying new value
348 * @returns One of: CILKG_SET_PARAM_SUCCESS ( = 0),
349 * CILKG_SET_PARAM_UNIMP, CILKG_SET_PARAM_XRANGE,
350 * CILKG_SET_PARAM_INVALID, or CILKG_SET_PARAM_LATE.
351 */
352 int cilkg_set_param_w(const wchar_t* param, const wchar_t* value);
353 #endif
354
355 /**
356 * @brief implementation of __cilkrts_get_nworkers()
357 */
358 static inline
cilkg_get_nworkers(void)359 int cilkg_get_nworkers(void)
360 {
361 // "private" extern declaration
362 extern global_state_t* cilkg_get_user_settable_values(void);
363 return cilkg_get_user_settable_values()->P;
364 }
365
366 /**
367 * @brief implementation of __cilkrts_get_total_workers()
368 */
369 static inline
cilkg_get_total_workers(void)370 int cilkg_get_total_workers(void)
371 {
372 // "private" extern declaration
373 extern int cilkg_calc_total_workers(void);
374
375 // This number can fluctate until initialization so we
376 // compute it from scratch
377 return cilkg_calc_total_workers();
378 }
379
380 /**
381 * @brief implementation of __cilkrts_get_force_reduce()
382 */
383 static inline
cilkg_get_force_reduce(void)384 int cilkg_get_force_reduce(void)
385 {
386 // "private" extern declaration
387 extern global_state_t* cilkg_get_user_settable_values(void);
388 return cilkg_get_user_settable_values()->force_reduce;
389 }
390
391 /**
392 * @brief implementation of __cilkrts_get_stack_size()
393 */
394 static inline
cilkg_get_stack_size(void)395 size_t cilkg_get_stack_size(void)
396 {
397 // "private" extern declaration
398 extern global_state_t* cilkg_get_user_settable_values(void);
399 return cilkg_get_user_settable_values()->stack_size;
400 }
401
402 /**
403 * @brief Run the scheduler function stored in the global_state
404 *
405 * Look up the scheduler function in global_state and run it. Report a fatal
406 * error if an exception escapes the scheduler function.
407 *
408 * @param w - Worker structure to associate with the current thread.
409 *
410 * @attention The scheduler field of the global state must be set before this
411 * function is called.
412 */
413 void __cilkrts_run_scheduler_with_exceptions(__cilkrts_worker *w);
414
415 __CILKRTS_END_EXTERN_C
416
417 #endif // ! defined(INCLUDED_GLOBAL_STATE_DOT_H)
418