1 /* record_replay.h                  -*-C++-*-
2  *
3  *************************************************************************
4  *
5  *  @copyright
6  *  Copyright (C) 2012-2013, Intel Corporation
7  *  All rights reserved.
8  *
9  *  @copyright
10  *  Redistribution and use in source and binary forms, with or without
11  *  modification, are permitted provided that the following conditions
12  *  are met:
13  *
14  *    * Redistributions of source code must retain the above copyright
15  *      notice, this list of conditions and the following disclaimer.
16  *    * Redistributions in binary form must reproduce the above copyright
17  *      notice, this list of conditions and the following disclaimer in
18  *      the documentation and/or other materials provided with the
19  *      distribution.
20  *    * Neither the name of Intel Corporation nor the names of its
21  *      contributors may be used to endorse or promote products derived
22  *      from this software without specific prior written permission.
23  *
24  *  @copyright
25  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
32  *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33  *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
35  *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  *  POSSIBILITY OF SUCH DAMAGE.
37  *
38  **************************************************************************/
39 
40 /**
41  * @file record-replay.h
42  *
43  * @brief record-replay.h and .cpp encapsulate most of the functionality to
44  * record and play back a Cilk Plus application.
45  *
46  * Recording is directed by the setting of the CILK_RECORD_LOG environment
47  * variable.  If it's defined, the value specifies the root we'll use to
48  * generate files for each worker using the following format string:
49  * "%s%d.cilklog", where the integer is the value of w->self.
50  *
51  * Replay is directed by the setting of the CILK_REPLAY_LOG environment
52  * variable, interpreted the same way as CILK_RECORD_LOG.  If both
53  * CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given
54  * and the attempt to record a log will be ignored.
55  *
56  * Recording is relatively straightforward.  We write all information about a
57  * worker to a per-worker file.
58  *
59  * Each pedigree record consists of the following fields.  All fields must be
60  * present in every record to make parsing easy.
61  *    - Type - A string identifying the pedigree record.  See the PED_TYPE_STR_
62  *      macros for the currently defined values.
63  *    - Pedigree - A string of pedigree values, with underscores between
64  *      adjacent values.
65  *    - i1 - Record type-specific value.  -1 if not used.
66  *    - i2 - Record type-specific value.  -1 if not used.
67  *
68  * WORKERS record - only written to the file for worker 0.  Note that this is
69  * the first worker in the workers array. Worker 0 is the first system worker,
70  * *NOT* a user worker.
71  *  - Type: "Workers"
72  *  - Pedigree: Always "0" - ignored
73  *  - i1: Number of workers (g->P) when we recorded the log.  A mismatch when
74  *        we attempt to replay the log will result in aborting the execution.
75  *  - i2: Log version number - Specified by PED_VERSION in record-replay.cpp
76  *
77  * STEAL record - written after a successful steal.
78  *  - Type: "Steal"
79  *  - Pedigree: Pedigree of stolen frame
80  *  - i1: Worker the frame was stolen from
81  *  - i2: -1
82  *
83  * SYNC record - written after a worker continues from a sync.
84  *  - Type: "Sync"
85  *  - Pedigree: Pedigree of sync.  Note that this is the pedigree *before*
86  *        the pedigree in incremented in setup_for_execution_pedigree().
87  *  - i1: -1
88  *  - i2: -1
89  *
90  * ORPHANED record - saved on a return to a stolen parent.
91  *  - Type: "Orphaned"
92  *  - Pedigree: Pedigree of the parent frame *before* the pedigree is
93  *        incremented by the return
94  *  - i1: -1
95  *  - i2: -1
96  *
97  * On replay, the data is loaded into a per-worker array, and the data is
98  * consumed in order as needed.
99  */
100 
101 #ifndef INCLUDED_RECORD_REPLAY_DOT_H
102 #define INCLUDED_RECORD_REPLAY_DOT_H
103 
104 #include "cilk/common.h"
105 #include "global_state.h"
106 
107 /**
108  * Define CILK_RECORD_REPLAY to enable record/replay functionality.  If
109  * CILK_RECORD_REPLAY is not defined, all of the record/replay functions in
110  * record-replay.h will be stubbed out.  Since they're declared as inline,
111  * functions, the resulting build should have no performance impact due to
112  * the implementation or record/replay.
113  */
114  #define CILK_RECORD_REPLAY 1
115 
116 /**
117  * Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log.  This
118  * should only be needed when debugging the replay functionality.  This should
119  * always be defined as 0 when record-replay.h is checked in.
120  */
121 #define RECORD_ON_REPLAY 0
122 
123 __CILKRTS_BEGIN_EXTERN_C
124 
125 #ifdef CILK_RECORD_REPLAY
126 // Declarations of internal record/replay functions.  The inlined versions
127 // further down do some preliminary testing (like if we're not recording or
128 // replaying) and will stub out the functionality if we've compiled out the
129 // record/replay feature
130 int replay_match_sync_pedigree_internal(__cilkrts_worker *w);
131 void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w);
132 void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id);
133 void replay_record_sync_internal(__cilkrts_worker *w);
134 void replay_record_orphaned_internal(__cilkrts_worker *w);
135 int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim);
136 void replay_advance_from_sync_internal (__cilkrts_worker *w);
137 int replay_get_next_recorded_victim_internal(__cilkrts_worker *w);
138 #endif //  CILK_RECORD_REPLAY
139 
140 // Publically defined record/replay API
141 
142 /**
143  * If we're replaying a log, wait for our parent to be stolen if it was when
144  * the log was recorded.  If record/replay is compiled out, this is a noop.
145  *
146  * @param w The __cilkrts_worker we're executing on.  The worker's replay
147  * list will be checked for a ORPHANED record with a matching pedigree.  If
148  * there is a match, the ORPHANED record will be consumed.
149  */
150 #ifdef CILK_RECORD_REPLAY
151 __CILKRTS_INLINE
replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker * w)152 void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
153 {
154     // Only check if we're replaying a log
155     if (REPLAY_LOG == w->g->record_or_replay)
156         replay_wait_for_steal_if_parent_was_stolen_internal(w);
157 }
158 #else
159 __CILKRTS_INLINE
replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker * w)160 void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
161 {
162     // If record/replay is disabled, we never wait
163 }
164 #endif //  CILK_RECORD_REPLAY
165 
166 /**
167  * Called from random_steal() to override the ID of the randomly chosen victim
168  * worker which this worker will attempt to steal from. Returns the worker id
169  * of the next victim this worker was recorded stealing from, or -1 if the
170  * next record in the log is not a STEAL.
171  *
172  * @note This call does NOT attempt to match the pedigree.  That will be done
173  * by replay_match_victim_pedigree() after random_steal() has locked the victim
174  * worker.
175  *
176  * @param w The __cilkrts_worker we're executing on.  The worker's replay log
177  * is checked for a STEAL record.  If we've got one, the stolen worker ID is
178  * returned.
179  * @param id The randomly chosen victim worker ID.  If we're not replaying a
180  * log, or if record/replay has been compiled out, this is the value that
181  * will be returned.
182  *
183  * @return id if we're not replaying a log
184  * @return -1 if the next record is not a STEAL
185  * @return recorded stolen worker ID if we've got a matching STEAL record
186  */
187 #ifdef CILK_RECORD_REPLAY
188 __CILKRTS_INLINE
replay_get_next_recorded_victim(__cilkrts_worker * w,int id)189 int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
190 {
191     // Only check if we're replaying a log
192     if (REPLAY_LOG == w->g->record_or_replay)
193         return replay_get_next_recorded_victim_internal(w);
194     else
195         return id;
196 }
197 #else
198 __CILKRTS_INLINE
replay_get_next_recorded_victim(__cilkrts_worker * w,int id)199 int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
200 {
201     // Record/replay is disabled.  Always return the original worker id
202     return id;
203 }
204 #endif //  CILK_RECORD_REPLAY
205 
206 /**
207  * Initialize per-worker data for record/replay.  A noop if record/replay
208  * is disabled, or if we're not recording or replaying anything.
209  *
210  * If we're recording a log, this will ready us to create the per-worker
211  * logs.
212  *
213  * If we're replaying a log, this will read the logs into the per-worker
214  * structures.
215  *
216  * @param g Cilk runtime global state
217  */
218 void replay_init_workers(global_state_t *g);
219 
220 /**
221  * Record a record on a successful steal.  A noop if record/replay is
222  * diabled, or if we're not recording anything
223  *
224  * @param w The __cilkrts_worker we're executing on.  The pedigree of
225  * the stolen frame will be walked to generate the STEAL record.
226  *
227  * @param victim_id The worker ID of the worker w stole from.
228  */
229 #ifdef CILK_RECORD_REPLAY
230 __CILKRTS_INLINE
replay_record_steal(__cilkrts_worker * w,int32_t victim_id)231 void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
232 {
233 #if RECORD_ON_REPLAY
234     // If we're recording on replay, write the record if we're recording or
235     // replaying
236     if (RECORD_REPLAY_NONE == w->g->record_or_replay)
237         return;
238 #else
239     // Only write the record if we're recording
240     if (RECORD_LOG != w->g->record_or_replay)
241         return;
242 #endif
243 
244     replay_record_steal_internal(w, victim_id);
245 }
246 #else
247 __CILKRTS_INLINE
replay_record_steal(__cilkrts_worker * w,int32_t victim_id)248 void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
249 {
250 }
251 #endif //  CILK_RECORD_REPLAY
252 
253 /**
254  * Record a record when continuing after a sync.  A noop if record/replay is
255  * diabled, or if we're not recording anything, or if the sync was abandoned,
256  * meaning this isn't the worker that continues from the sync.
257  *
258  * @param w The __cilkrts_worker for we're executing on.  The pedigree of
259  * the sync-ing frame will be walked to generate the SYNC record.
260  *
261  * @param continuing True if this worker will be continuing from the
262  * cilk_sync.  A SYNC record will only be generated if continuing is true.
263  */
264 #ifdef CILK_RECORD_REPLAY
265 __CILKRTS_INLINE
replay_record_sync(__cilkrts_worker * w,int continuing)266 void replay_record_sync(__cilkrts_worker *w, int continuing)
267 {
268     // If this was not the last worker to the syn, return
269     if (! continuing)
270         return;
271 
272 #if RECORD_ON_REPLAY
273     // If we're recording on replay, write the record if we're recording or
274     // replaying
275     if (RECORD_REPLAY_NONE == w->g->record_or_replay)
276         return;
277 #else
278     // Only write the record if we're recording
279     if (RECORD_LOG != w->g->record_or_replay)
280         return;
281 #endif
282 
283     replay_record_sync_internal(w);
284 }
285 #else
286 __CILKRTS_INLINE
replay_record_sync(__cilkrts_worker * w,int abandoned)287 void replay_record_sync(__cilkrts_worker *w, int abandoned)
288 {
289 }
290 #endif //  CILK_RECORD_REPLAY
291 
292 /**
293  * Record a record on a return to a stolen parent.  A noop if record/replay is
294  * diabled, or if we're not recording anything.
295  *
296  * @param w The __cilkrts_worker for we're executing on.  The pedigree of the
297  * frame that has discovered that its parent has been stolken will be walked
298  * to generate the ORPHANED record.
299  */
300 #ifdef CILK_RECORD_REPLAY
301 __CILKRTS_INLINE
replay_record_orphaned(__cilkrts_worker * w)302 void replay_record_orphaned(__cilkrts_worker *w)
303 {
304 #if RECORD_ON_REPLAY
305     // If we're recording on replay, write the record if we're recording or
306     // replaying
307     if (RECORD_REPLAY_NONE == w->g->record_or_replay)
308         return;
309 #else
310     // Only write the record if we're recording
311     if (RECORD_LOG != w->g->record_or_replay)
312         return;
313 #endif
314 
315     replay_record_orphaned_internal(w);
316 }
317 #else
318 __CILKRTS_INLINE
replay_record_orphaned(__cilkrts_worker * w)319 void replay_record_orphaned(__cilkrts_worker *w)
320 {
321 }
322 #endif //  CILK_RECORD_REPLAY
323 
324 /**
325  * Test whether the frame at the head of the victim matches the pedigree of
326  * the frame that was recorded being stolen.  Called in random steal to verify
327  * that we're about to steal the correct frame.
328  *
329  * @param w The __cilkrts_worker for we're executing on.  The current worker
330  * is needed to find the replay entry to be checked.
331  *
332  * @param victim The __cilkrts_worker for we're proposing to steal a frame
333  * from.  The victim's head entry is
334  * is needed to find the replay entry to be checked.
335  *
336  * @return 0 if we're replaying a log and the victim's pedigree does NOT match
337  * the next frame the worker is expected to steal.
338  *
339  * @return 1 in all other cases to indicate that the steal attempt should
340  * continue
341  */
342 #ifdef CILK_RECORD_REPLAY
343 __CILKRTS_INLINE
replay_match_victim_pedigree(__cilkrts_worker * w,__cilkrts_worker * victim)344 int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
345 {
346     // We're not replaying a log. The victim is always acceptable
347     if (REPLAY_LOG != w->g->record_or_replay)
348         return 1;
349 
350     // Return 1 if the victim's pedigree matches the frame the worker stole
351     // when we recorded the log
352     return replay_match_victim_pedigree_internal(w, victim);
353 }
354 #else
355 __CILKRTS_INLINE
replay_match_victim_pedigree(__cilkrts_worker * w,__cilkrts_worker * victim)356 int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
357 {
358     // Record/replay is disabled.  The victim is always acceptable
359     return 1;
360 }
361 #endif //  CILK_RECORD_REPLAY
362 
363 /**
364  * Test whether the current replay entry is a sync record matching the
365  * worker's pedigree.
366  *
367  * @param w The __cilkrts_worker for we're executing on.
368  *
369  * @return 1 if the current replay entry matches the current pedigree.
370  * @return 0 if there's no match, or if we're not replaying a log.
371  */
372 #ifdef CILK_RECORD_REPLAY
373 __CILKRTS_INLINE
replay_match_sync_pedigree(__cilkrts_worker * w)374 int replay_match_sync_pedigree(__cilkrts_worker *w)
375 {
376     // If we're not replaying, assume no match
377     if (REPLAY_LOG != w->g->record_or_replay)
378         return 0;
379 
380     return replay_match_sync_pedigree_internal(w);
381 }
382 #else
383 __CILKRTS_INLINE
replay_match_sync_pedigree(__cilkrts_worker * w)384 int replay_match_sync_pedigree(__cilkrts_worker *w)
385 {
386     // Record/replay is disabled.  Assume no match
387     return 0;
388 }
389 #endif
390 
391 /**
392  * Marks a sync record seen, advancing to the next record in the replay list.
393  *
394  * This function will only advance to the next record if:
395  *   - Record/replay hasn't been compiled out AND
396  *   - We're replaying a log AND
397  *   - A match was found AND
398  *   - The sync is not being abandoned
399  *
400  * @param w The __cilkrts_worker for we're executing on.
401  * @param match_found The value returned by replay_match_sync_pedigree().  If
402  * match_found is false, nothing is done.
403  * @param continuing  Flag indicating whether this worker will continue from
404  * the sync (it's the last worker to the sync) or if it will abandon the work
405  * and go to the scheduling loop to look for more work it can steal.
406  */
407 #ifdef CILK_RECORD_REPLAY
408 __CILKRTS_INLINE
replay_advance_from_sync(__cilkrts_worker * w,int match_found,int continuing)409 void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
410 {
411     // If we're replaying a log, and the current sync wasn't abandoned, and we
412     // found a match in the log, mark the sync record seen.
413     if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing)
414         replay_advance_from_sync_internal(w);
415 }
416 #else
417 __CILKRTS_INLINE
replay_advance_from_sync(__cilkrts_worker * w,int match_found,int continuing)418 void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
419 {
420 }
421 #endif
422 
423 /**
424  * Release any resources used to read or write a replay log.
425  *
426  * @param g Cilk runtime global state
427  */
428 void replay_term(global_state_t *g);
429 
430 __CILKRTS_END_EXTERN_C
431 
432 #endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H)
433