1 /* record-replay.cpp                  -*-C++-*-
2  *
3  *************************************************************************
4  *
5  *  @copyright
6  *  Copyright (C) 2012-2013, Intel Corporation
7  *  All rights reserved.
8  *
9  *  @copyright
10  *  Redistribution and use in source and binary forms, with or without
11  *  modification, are permitted provided that the following conditions
12  *  are met:
13  *
14  *    * Redistributions of source code must retain the above copyright
15  *      notice, this list of conditions and the following disclaimer.
16  *    * Redistributions in binary form must reproduce the above copyright
17  *      notice, this list of conditions and the following disclaimer in
18  *      the documentation and/or other materials provided with the
19  *      distribution.
20  *    * Neither the name of Intel Corporation nor the names of its
21  *      contributors may be used to endorse or promote products derived
22  *      from this software without specific prior written permission.
23  *
24  *  @copyright
25  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28  *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29  *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
32  *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33  *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
35  *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  *  POSSIBILITY OF SUCH DAMAGE.
37  *
38  **************************************************************************/
39 
40 /*
41  * Implementation of the record/replay functionality for Cilk Plus
42  */
43 
44 #include <cstring>
45 #include <vector>
46 #include <stdlib.h>
47 
48 // clang is really strict about printf formats, so use the annoying integer
49 // printf macros.  Unfortunately they're not avaiable on Windows
50 #ifdef _WIN32
51 #define PRIu64 "llu"
52 #else
53 #define __STDC_FORMAT_MACROS 1
54 #include <inttypes.h>
55 #endif
56 
57 #include "record-replay.h"
58 #include "bug.h"
59 #include "internal/abi.h"
60 #include "local_state.h"
61 #include "full_frame.h"
62 #include "global_state.h"
63 #include "cilk_malloc.h"
64 #include "os.h"  // for cilkos_error()
65 
66 #if RECORD_ON_REPLAY
67 #pragma message ("*** Record on Replay is enabled!")
68 #endif
69 
70 // Defined to write sequence number to the logs.  Note that you cannot
71 // diff logs with sequence numbers because the numbers may increment in
72 // different orders.
73 //#define INCLUDE_SEQUENCE_NUMBER 1
74 
75 const int PED_VERSION = 1;      // Log recording version
76 
77 // Log types
78 enum ped_type_t
79 {
80     ped_type_unknown,
81     ped_type_steal,
82     ped_type_sync,
83     ped_type_orphaned,
84     ped_type_last               // Flags end of the list
85 };
86 
87 // Log type strings
88 #define PED_TYPE_STR_STEAL "Steal"
89 #define PED_TYPE_STR_SYNC "Sync"
90 #define PED_TYPE_STR_WORKERS "Workers"
91 #define PED_TYPE_STR_ORPHANED "Orphaned"
92 
93 #define PED_TYPE_SIZE 16        // Buffer size for the type of pedigree.  Must
94                                 // hold largest pedigree record type string.
95 #define PEDIGREE_BUFF_SIZE 512  // Buffer size for the string representation
96                                 // of a pedigree.
97 
98 /**
99  * Data we store for a replay log entry
100  */
101 typedef struct replay_entry_t
102 {
103     uint64_t *m_reverse_pedigree;   /**< Reverse pedigree for replay log entry */
104     ped_type_t m_type;              /**< Type of replay log entry */
105     int16_t m_pedigree_len;         /**< Number of terms in reverse pedigree */
106     int16_t m_value;                /**< Victim for STEALs, 0 if matching steal found for ORPHANs */
107 
108    /**
109     * Load data read from the log into the entry
110     */
loadreplay_entry_t111     bool load(const char *type, const char *pedigee_str, int32_t value1, int32_t value2)
112     {
113         // Convert the type into an enum
114         if (0 == strcmp(type, PED_TYPE_STR_STEAL))
115         {
116             m_type = ped_type_steal;
117             m_value = (int16_t)value1;   // Victim
118         }
119         else
120         {
121             m_value = -1;      // Victim not valid
122             if (0 == strcmp(type, PED_TYPE_STR_SYNC))
123                 m_type = ped_type_sync;
124             else if (0 == strcmp(type, PED_TYPE_STR_ORPHANED))
125                 m_type = ped_type_orphaned;
126             else
127             {
128                 m_type = ped_type_unknown;
129                 return false;
130             }
131         }
132 
133         // Parse the pedigree
134         m_pedigree_len = 0;
135 
136         const char *p = pedigee_str;
137         char *end;
138 
139         uint64_t temp_pedigree[PEDIGREE_BUFF_SIZE/2];
140 
141         while(1)
142         {
143             temp_pedigree[m_pedigree_len++] = (uint64_t)strtol(p, &end, 10);
144             if ('\0' == *end)
145                 break;
146             p = end + 1;
147         }
148 
149         // Allocate memory to hold the pedigree.
150         // Copy the pedigree in reverse order since that's the order we'll
151         // traverse it
152         m_reverse_pedigree =
153             (uint64_t *)__cilkrts_malloc(sizeof(int64_t) * m_pedigree_len);
154         for (int n = 0; n < m_pedigree_len; n++)
155             m_reverse_pedigree[n] = temp_pedigree[(m_pedigree_len - 1) - n];
156 
157         return true;
158     }
159 
160    /**
161     * Match this entry against the data supplied.  This includes walking the
162     * pedigree from the specified node.
163     */
matchreplay_entry_t164     bool match (ped_type_t type, const __cilkrts_pedigree *node, int victim = -1)
165     {
166         int i = 0;
167 
168         // If the type isn't what they're seeking, we don't have a match
169         if (type != m_type)
170             return false;
171 
172         // If we're looking for a STEAL, then the victim must match
173         if ((type == ped_type_steal) && (victim != m_value))
174             return false;
175 
176         // Compare the current pedigree against what was recorded
177         while ((NULL != node) && (i < m_pedigree_len))
178         {
179             // If we've got a pedigree rank difference, then we don't have
180             // a match
181             if (node->rank != m_reverse_pedigree[i])
182                 return false;
183             node = node->parent;
184             i++;
185         }
186 
187         // Make sure we exhausted both the pedigree chain and the recorded
188         // pedigree
189         return ((NULL == node) && (i == m_pedigree_len));
190     }
191 
192    /**
193     * Advance to the next entry, skipping any ORPHANED records we didn't see
194     * a matching STEAL for
195     */
next_entryreplay_entry_t196     replay_entry_t *next_entry()
197     {
198         replay_entry_t *entry = this;
199 
200         // You can't go beyond the end
201         if (ped_type_last == entry->m_type)
202             return entry;
203 
204         // Advance to the next entry
205         entry++;
206 
207         // Skip any ORPHANED records that don't have a matching steal. We
208         // initialized the value field to -1 for ORPHANED.  After loading all
209         // the log data, we iterated through all the STEAL records setting the
210         // matching ORPHANED record's value field to 0. So if an ORPHANED
211         // record's value field is still -1, it doesn't have a matching STEAL
212         // record, and I don't know why we chose not to return from the
213         // spawned function.
214         while ((ped_type_orphaned == entry->m_type) && (-1 == entry->m_value))
215         {
216             entry++;
217         }
218 
219         return entry;
220     }
221 
222    /**
223     * Release any allocated resources
224     */
unloadreplay_entry_t225     void unload()
226     {
227         __cilkrts_free(m_reverse_pedigree);
228         m_reverse_pedigree = NULL;
229     }
230 
231 } replay_entry_t;
232 
233 __CILKRTS_BEGIN_EXTERN_C
234 
235 /**
236  * Walk the pedigree and generate a string representation with underscores
237  * between terms.  Currently does a recursive walk to generate a forward
238  * pedigree.
239  *
240  * @param p The buffer that is to be filled.  Assumed to be PEDIGREE_BUFF_SIZE
241  * characters long
242  * @param pnode The initial pedigree term to be written.
243  *
244  * @return A pointer into the pedigree string buffer after a term has been
245  * written.
246  */
247 static
walk_pedigree_nodes(char * p,const __cilkrts_pedigree * pnode)248 char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode)
249 {
250     CILK_ASSERT(pnode);
251     if (pnode->parent)
252     {
253         p = walk_pedigree_nodes(p, pnode->parent);
254         p += sprintf(p, "_");
255     }
256 
257     return p + sprintf(p, "%" PRIu64, pnode->rank);
258 }
259 
260 /**
261  * Write a record to a replay log file.
262  *
263  * @param w The worker we're writing the pedigree for.
264  * @param type The type of the pedigree record, as a string
265  * @param initial_node The initial pedigree node to be written, or NULL if
266  * there is no pedigree for this record type.
267  * @param i1 First integer value to be written to the record.
268  * @param i2 Second integer value to be written to the record. Only applies
269  * to STEAL records. Defaults to -1 (unused).  The second value is always
270  * written to make parsing easier.
271  */
272 static
write_to_replay_log(__cilkrts_worker * w,const char * type,const __cilkrts_pedigree * initial_node,int i1=-1,int i2=-1)273 void write_to_replay_log (__cilkrts_worker *w, const char *type,
274                           const __cilkrts_pedigree *initial_node,
275                           int i1 = -1, int i2 = -1)
276 {
277     char pedigree[PEDIGREE_BUFF_SIZE];
278 
279     // If we don't have an initial pedigree node, just use "0" to fill the slot
280     if (NULL == initial_node)
281         strcpy(pedigree, "0");
282     else
283         walk_pedigree_nodes(pedigree, initial_node);
284 
285 #ifndef INCLUDE_SEQUENCE_NUMBER
286     // Simply write the record
287     fprintf(w->l->record_replay_fptr, "%s %s %d %d\n",
288             type, pedigree, i1, i2);
289 #else
290     // Write the record with a sequence number.  The sequence number should
291     // always be the last term, and ignored on read
292 
293     static long volatile seq_num = 0;
294     long write_num;
295 
296     // Atomic increment functions are compiler/OS-specific
297 #ifdef _WIN32
298     write_num = _InterlockedIncrement(&seq_num);
299 #else /* GCC */
300     write_num = __sync_add_and_fetch(&seq_num, 1);
301 #endif // _WIN32
302 
303     fprintf(w->l->record_replay_fptr, "%s %s %d %d %ld\n",
304             type, pedigree, i1, i2, write_num);
305 #endif // INCLUDE_SEQUENCE_NUMBER
306 
307     fflush(w->l->record_replay_fptr);
308 }
309 
310 /**
311  * Record data for a successful steal.
312  *
313  * The pedigree for a STEAL record is the pedigree of the stolen frame.
314  *
315  * @note It's assumed that replay_record_steal() has already checked that we're
316  * recording a log and that the record/replay functionality has not been
317  * compiled out.
318  *
319  * @param w The worker stealing a frame.
320  * @param victim_id The ID of the worker which had it's frame stolen.
321  */
replay_record_steal_internal(__cilkrts_worker * w,int32_t victim_id)322 void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id)
323 {
324     // Follow the pedigree chain using worker's stack frame
325     CILK_ASSERT(w->l->next_frame_ff);
326     CILK_ASSERT(w->l->next_frame_ff->call_stack);
327 
328     // Record steal: STEAL pedigree victim_id thief_id
329     write_to_replay_log (w, PED_TYPE_STR_STEAL,
330                          &(w->l->next_frame_ff->call_stack->parent_pedigree),
331                          victim_id);
332 }
333 
334 /**
335  * Record data for the worker that continues from a sync
336  *
337  * The pedigree for a SYNC record is the pedigree at the sync.
338  *
339  * @note It's assumed that replay_record_sync() has already checked that we're
340  * recording a log and that the record/replay functionality has not been
341  * compiled out.
342  *
343  * @param w The worker continuing from a sync.
344  */
replay_record_sync_internal(__cilkrts_worker * w)345 void replay_record_sync_internal(__cilkrts_worker *w)
346 {
347     // Record sync: SYNC pedigree last_worker_id
348     write_to_replay_log (w, PED_TYPE_STR_SYNC, &w->pedigree);
349 }
350 
351 /**
352  * Record the pedigree of an attempt to return to a stolen parent
353  *
354  * The pedigree for an ORPHANED record is the pedigree of our parent
355  *
356  * @note It's assumed that replay_record_orphaned() has already checked that
357  * we're recording a log and that the record/replay functionality has not
358  * been compiled out.
359  *
360  * @param w The worker continuing noting that it has been orphaned.
361  */
replay_record_orphaned_internal(__cilkrts_worker * w)362 void replay_record_orphaned_internal(__cilkrts_worker *w)
363 {
364     // Record steal: ORPHANED pedigree self
365     write_to_replay_log (w, PED_TYPE_STR_ORPHANED, w->pedigree.parent);
366 }
367 
368 /**
369  * Attempt to match a SYNC record.  We have a match when this worker was
370  * recorded returning from the current call to __cilkrts_sync() with the
371  * same pedigree and this was the worker that continued from the sync, since
372  * it was the last to sync.
373  *
374  * If we find a match, the caller is expected to stall it is the last worker
375  * to reach a sync so it will be the worker to continue from the sync.
376  *
377  * @note It's assumed that replay_match_sync_pedigree() has already returned
378  * if we're not replaying a log, or if record/replay functionality has
379  * been compiled out.
380  *
381  * @param w The worker we're checking to see if we've got a match
382  */
replay_match_sync_pedigree_internal(__cilkrts_worker * w)383 int replay_match_sync_pedigree_internal(__cilkrts_worker *w)
384 {
385     // Return true if we have a match
386     if (w->l->replay_list_entry->match(ped_type_sync, &w->pedigree))
387         return 1;
388     else
389         return 0;
390 }
391 
392 /**
393  * Advance to the next log entry from a SYNC record.  Consume the current
394  * SYNC record on this worker and advance to the next one.
395  *
396  * @note It's assumed that replay_advance_from_sync() has already returned if
397  * we're not replaying a log, or if record/replay functionality has been
398  * compiled out.
399  *
400  * @param w The worker whose replay log we're advancing.
401  */
replay_advance_from_sync_internal(__cilkrts_worker * w)402 void replay_advance_from_sync_internal (__cilkrts_worker *w)
403 {
404     // The current replay entry must be a SYNC
405     CILK_ASSERT(ped_type_sync == w->l->replay_list_entry->m_type);
406 
407     // Advance to the next entry
408     w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
409 }
410 
411 /**
412  * Called from random_steal() to override the ID of the randomly chosen victim
413  * worker which this worker will attempt to steal from. Returns the worker id
414  * of the next victim this worker was recorded stealing from, or -1 if the
415  * next record in the log is not a STEAL.
416  *
417  * @note This call does NOT attempt to match the pedigree.  That will be done
418  * by replay_match_victim_pedigree() after random_steal() has locked the victim
419  * worker.
420  *
421  * @param w The __cilkrts_worker we're executing on.  The worker's replay log
422  * is checked for a STEAL record.  If we've got one, the stolen worker ID is
423  * returned.
424  *
425  * @return -1 if the next record is not a STEAL
426  * @return recorded stolen worker ID if we've got a matching STEAL record
427  */
replay_get_next_recorded_victim_internal(__cilkrts_worker * w)428 int replay_get_next_recorded_victim_internal(__cilkrts_worker *w)
429 {
430     // If the next record isn't a STEAL, abort the attempt to steal work
431     if (ped_type_steal != w->l->replay_list_entry->m_type)
432         return -1;
433 
434     // Return the victim's worker ID from the STEAL record.  We'll check
435     // the pedigree after random_steal has locked the victim worker.
436     return w->l->replay_list_entry->m_value;
437 }
438 
439 /**
440  * Called from random_steal() to determine if we have a STEAL record that
441  * matches the pedigree at the head of the victim worker.  If we do have a
442  * match, the STEAL record is consumed.
443  *
444  * @note It's assumed that replay_match_victim_pedigree() has already returned if
445  * we're not replaying a log, or if record/replay functionality has been
446  * compiled out.
447  *
448  * @return 1 if we have a match
449  * @return 0 if the current replay record isn't a STEAL record, or the victim
450  * isn't correct, or the pedigree doesn't match.
451  */
replay_match_victim_pedigree_internal(__cilkrts_worker * w,__cilkrts_worker * victim)452 int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim)
453 {
454     // If we don't have a match, return 0
455     if (! w->l->replay_list_entry->match(ped_type_steal,
456                                              &((*victim->head)->parent_pedigree),
457                                              victim->self))
458         return 0;
459 
460     // Consume this entry
461     w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
462 
463     // Return success
464     return 1;
465 }
466 
467 /**
468  * If the frame we're about to return to was recorded as being stolen,
469  * stall until it is.
470  *
471  * @note It's assumed that replay_wait_for_steal_if_parent_was_stolen() has
472  * already returned if we're not replaying a log, or if record/replay
473  * functionality has been compiled out.
474  *
475  * @param w The worker we're executing on.
476  */
replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker * w)477 void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w)
478 {
479     // If our parent wasn't recorded orphanen, return now
480     if (! w->l->replay_list_entry->match (ped_type_orphaned,
481                                               w->pedigree.parent))
482         return;
483 
484     // Stall until our parent is stolen.  Note that we're comparing head
485     // and tail, not head and exc.  The steal is not completed until tail
486     // is modified.
487     while (!((w->tail - 1) < w->head))
488         __cilkrts_sleep();
489 
490     // Consume the entry
491     w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
492 }
493 
494 /**
495  * Allocate memory for the list of logged events.
496  *
497  * This function will read through the file and count the number of records
498  * so it can estimate how big a buffer to allocate for the array or replay
499  * entries.  It will then rewind the file to the beginning so it can be
500  * loaded into memory.
501  *
502  * @param w The worker we're loading the file for.
503  * @param f The file of replay data we're scanning.
504  */
505 static
allocate_replay_list(__cilkrts_worker * w,FILE * f)506 void allocate_replay_list(__cilkrts_worker *w, FILE *f)
507 {
508     // Count the number of entries - yeah, it's a hack, but it lets me
509     // allocate the space all at once instead of in chunks
510     char buf[1024];
511     int entries = 1;    // Include "LAST" node
512 
513     while (! feof(f))
514     {
515         if (fgets(buf, 1024, f))
516         {
517             // Skip the Workers record - should only be in file for Worker 0
518             if (0 != strncmp(PED_TYPE_STR_WORKERS, buf, sizeof(PED_TYPE_STR_WORKERS)-1))
519                 entries++;
520         }
521     }
522 
523     w->l->replay_list_root =
524         (replay_entry_t *)__cilkrts_malloc(entries * sizeof(replay_entry_t));
525     w->l->replay_list_root[entries - 1].m_type = ped_type_last;
526 
527     // Reset the file to the beginning
528     rewind(f);
529 }
530 
531 /**
532  * Load the replay log for a worker into memory.
533  *
534  * @param w The worker we're loading the replay for.
535  */
536 static
load_recorded_log(__cilkrts_worker * w)537 void load_recorded_log(__cilkrts_worker *w)
538 {
539     char ped_type[PED_TYPE_SIZE];
540     char ped_str[PEDIGREE_BUFF_SIZE];
541     int32_t i1 = -1, i2 = -1;
542     int fret;
543     char local_replay_file_name[512];
544     FILE *f;
545 
546     // Open the log for reading
547     sprintf(local_replay_file_name, "%s%d.cilklog", w->g->record_replay_file_name,  w->self);
548     f = fopen(local_replay_file_name, "r");
549 
550     // Make sure we found a log!
551     CILK_ASSERT (NULL != f);
552 
553     // Initialize the replay_list
554     allocate_replay_list(w, f);
555     replay_entry_t *entry = w->l->replay_list_root;
556 
557     // Read the data out and add it to our tables
558     while (! feof(f))
559     {
560 #ifndef INCLUDE_SEQUENCE_NUMBER
561         fret = fscanf(f, "%s %s %d %d\n", ped_type, ped_str, &i1, &i2);
562         if(EOF == fret)
563             break;
564 
565         // We must have read 4 fields
566         CILK_ASSERT(4 == fret);
567 #else
568         int32_t write_num;
569         fret = fscanf(f, "%s %s %d %d %d\n", ped_type, ped_str,
570                       &i1, &i2, &write_num);
571         if(EOF == fret)
572             break;
573 
574         // We must have read 5 fields
575         CILK_ASSERT(5 == fret);
576 #endif // INCLUDE_SEQUENCE_NUMBER
577 
578         // Load the data into the entry
579         if (0 == strcmp(ped_type, PED_TYPE_STR_WORKERS))
580         {
581             // Verify we're replaying with the same number of workers we recorded with
582             if (i1 != w->g->P)
583             {
584                 // Fatal error - does not return
585                 cilkos_error("Cannot continue replay: number of workers(%d) doesn't match "
586                              "that from the recording(%d).\n", w->g->P, i1);
587             }
588 
589             // Verify that we understand this version of the pedigree file
590             if (PED_VERSION != i2)
591             {
592                 // Fatal error - does not return
593                 cilkos_error("Pedigree file version %d doesn't match current "
594                              "version %d - cannot continue.\n",
595                              i2, PED_VERSION);
596             }
597         }
598         else
599         {
600             entry->load(ped_type, ped_str, i1, i2);
601             entry++;
602         }
603     }
604 
605     // Make sure we've filled the allocated memory.  We initialized the last
606     // entry in
607     CILK_ASSERT(ped_type_last == entry->m_type);
608     w->l->replay_list_entry = w->l->replay_list_root;
609 
610     // Close the log and return
611     fclose(f);
612 }
613 
614 /**
615  * Scan a recorded log to match STEALs againsted ORPHANED records.
616  *
617  * @param g Cilk Runtime global state.  Passed to access the worker array so
618  * we can scan a worker's ORPHANED entries for one that matches a STEAL entry.
619  * @param entry The root of a replay_list for a worker.
620  */
621 static
scan_for_matching_steals(global_state_t * g,replay_entry_t * entry)622 void scan_for_matching_steals(global_state_t *g, replay_entry_t *entry)
623 {
624     // Iterate over all of the entries
625     while (ped_type_last != entry->m_type)
626     {
627         // Look for STEALs.  That will tell us which worker the frame was
628         // stolen from
629         if (ped_type_steal == entry->m_type)
630         {
631             bool found = false;
632 
633             // Validate the worker ID and make sure we've got a list
634             CILK_ASSERT((entry->m_value >= 0) && (entry->m_value < g->total_workers));
635             replay_entry_t *victim_entry = g->workers[entry->m_value]->l->replay_list_root;
636             CILK_ASSERT(NULL != victim_entry);
637 
638             // Scan the victim's list for the matching ORPHANED record
639             while ((ped_type_last != victim_entry->m_type) && ! found)
640             {
641                 if (ped_type_orphaned == victim_entry->m_type)
642                 {
643                     if (entry->m_pedigree_len == victim_entry->m_pedigree_len)
644                     {
645                         if (0 == memcmp(entry->m_reverse_pedigree,
646                                         victim_entry->m_reverse_pedigree,
647                                         entry->m_pedigree_len * sizeof(int64_t)))
648                         {
649                             // Note that this ORPHANED record has a matching steal
650                             victim_entry->m_value = 0;
651                             found = true;
652                         }
653                     }
654                 }
655                 victim_entry++;
656             }
657         }
658         entry++;
659     }
660 }
661 
662 
663 /*
664  * Initialize per-worker data for record or replay - See record-replay.h
665  * for full routine header.
666  */
replay_init_workers(global_state_t * g)667 void replay_init_workers(global_state_t *g)
668 {
669     int i;
670     char worker_file_name[512];
671 
672     // If we're not recording or replaying a log, we're done.  All of the
673     // fields in the global_state_t or local_state_t are already initialized
674     // to default values.
675     if (RECORD_REPLAY_NONE == g->record_or_replay)
676         return;
677 
678     // If we're replaying a log, read each worker's log and construct the
679     // in-memory log
680     if (REPLAY_LOG == g->record_or_replay)
681     {
682         // Read all of the data
683         for (i = 0; i < g->total_workers; ++i)
684         {
685             // This function will also initialize and fill the worker's
686             // replay list
687             load_recorded_log(g->workers[i]);
688         }
689 
690         // Scan for orphans with no matching steal.  Mark them so they'll be
691         // skipped as we advance through the log.
692         for (i = 0; i < g->total_workers; ++i)
693         {
694             scan_for_matching_steals(g, g->workers[i]->l->replay_list_root);
695         }
696 
697         // If we're recording the logs while replaying, create the log files.
698         // This will only be used for debugging.  Create the logs in the
699         // current directory.  It should be as good a place as any...
700 #if RECORD_ON_REPLAY
701         for(i = 0; i < g->total_workers; ++i)
702         {
703             __cilkrts_worker *w = g->workers[i];
704             sprintf(worker_file_name, "replay_log_%d.cilklog",  w->self);
705             w->l->record_replay_fptr = fopen(worker_file_name, "w+");
706             CILK_ASSERT(NULL != w->l->record_replay_fptr);
707         }
708 
709         // Record the number of workers, file version in Worker 0's file
710         write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION);
711 #endif // RECORD_ON_REPLAY
712     }
713 
714     // If we're recording, create the log files
715     if (RECORD_LOG == g->record_or_replay)
716     {
717         for(i = 0; i < g->total_workers; ++i)
718         {
719             __cilkrts_worker *w = g->workers[i];
720             sprintf(worker_file_name, "%s%d.cilklog",
721                     g->record_replay_file_name,
722                     w->self);
723             w->l->record_replay_fptr = fopen(worker_file_name, "w+");
724             CILK_ASSERT(NULL != w->l->record_replay_fptr);
725         }
726 
727         // Record the number of workers, file version in Worker 0's file
728         write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION);
729     }
730 }
731 
732 /*
733  * Do any necessary cleanup for the logs - See record-replay.h for full
734  * routine header.
735  */
replay_term(global_state_t * g)736 void replay_term(global_state_t *g)
737 {
738     // Free memory for the record/replay log file name, if we've got one
739     if (g->record_replay_file_name)
740         __cilkrts_free(g->record_replay_file_name);
741 
742     // Per-worker cleanup
743     for(int i = 0; i < g->total_workers; ++i)
744     {
745         __cilkrts_worker *w = g->workers[i];
746 
747         // Close the log files, if we've opened them
748         if(w->l->record_replay_fptr)
749             fclose(w->l->record_replay_fptr);
750 
751         if (w->l->replay_list_root)
752         {
753             // We should have consumed the entire list
754             CILK_ASSERT(ped_type_last == w->l->replay_list_entry->m_type);
755 
756             replay_entry_t *entry = w->l->replay_list_root;
757             while (ped_type_last != entry->m_type)
758             {
759                 // Free the pedigree memory for each entry
760                 entry->unload();
761                 entry++;
762             }
763             __cilkrts_free(w->l->replay_list_root);
764             w->l->replay_list_root = NULL;
765             w->l->replay_list_entry = NULL;
766         }
767     }
768 }
769 
770 __CILKRTS_END_EXTERN_C
771