1 /*
2 * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for details.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef __STDC_FORMAT_MACROS
14 #define __STDC_FORMAT_MACROS
15 #endif
16
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <cstdlib>
21 #include <cstring>
22 #include <inttypes.h>
23 #include <iostream>
24 #include <list>
25 #include <mutex>
26 #include <sstream>
27 #include <string>
28 #include <sys/resource.h>
29 #include <unistd.h>
30 #include <unordered_map>
31 #include <vector>
32
33 #if (defined __APPLE__ && defined __MACH__)
34 #include <dlfcn.h>
35 #endif
36
37 #include "omp-tools.h"
38
39 // Define attribute that indicates that the fall through from the previous
40 // case label is intentional and should not be diagnosed by a compiler
41 // Code from libcxx/include/__config
42 // Use a function like macro to imply that it must be followed by a semicolon
43 #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
44 #define KMP_FALLTHROUGH() [[fallthrough]]
45 #elif __has_cpp_attribute(clang::fallthrough)
46 #define KMP_FALLTHROUGH() [[clang::fallthrough]]
47 #elif __has_attribute(fallthrough) || __GNUC__ >= 7
48 #define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
49 #else
50 #define KMP_FALLTHROUGH() ((void)0)
51 #endif
52
53 static int runOnTsan;
54 static int hasReductionCallback;
55
56 class ArcherFlags {
57 public:
58 #if (LLVM_VERSION) >= 40
59 int flush_shadow{0};
60 #endif
61 int print_max_rss{0};
62 int verbose{0};
63 int enabled{1};
64 int report_data_leak{0};
65 int ignore_serial{0};
66
ArcherFlags(const char * env)67 ArcherFlags(const char *env) {
68 if (env) {
69 std::vector<std::string> tokens;
70 std::string token;
71 std::string str(env);
72 std::istringstream iss(str);
73 while (std::getline(iss, token, ' '))
74 tokens.push_back(token);
75
76 for (std::vector<std::string>::iterator it = tokens.begin();
77 it != tokens.end(); ++it) {
78 #if (LLVM_VERSION) >= 40
79 if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow))
80 continue;
81 #endif
82 if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss))
83 continue;
84 if (sscanf(it->c_str(), "verbose=%d", &verbose))
85 continue;
86 if (sscanf(it->c_str(), "report_data_leak=%d", &report_data_leak))
87 continue;
88 if (sscanf(it->c_str(), "enable=%d", &enabled))
89 continue;
90 if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial))
91 continue;
92 std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
93 << std::endl;
94 }
95 }
96 }
97 };
98
99 class TsanFlags {
100 public:
101 int ignore_noninstrumented_modules;
102
TsanFlags(const char * env)103 TsanFlags(const char *env) : ignore_noninstrumented_modules(0) {
104 if (env) {
105 std::vector<std::string> tokens;
106 std::string str(env);
107 auto end = str.end();
108 auto it = str.begin();
109 auto is_sep = [](char c) {
110 return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' ||
111 c == '\r';
112 };
113 while (it != end) {
114 auto next_it = std::find_if(it, end, is_sep);
115 tokens.emplace_back(it, next_it);
116 it = next_it;
117 if (it != end) {
118 ++it;
119 }
120 }
121
122 for (const auto &token : tokens) {
123 // we are interested in ignore_noninstrumented_modules to print a
124 // warning
125 if (sscanf(token.c_str(), "ignore_noninstrumented_modules=%d",
126 &ignore_noninstrumented_modules))
127 continue;
128 }
129 }
130 }
131 };
132
133 #if (LLVM_VERSION) >= 40
134 extern "C" {
135 int __attribute__((weak)) __archer_get_omp_status();
__tsan_flush_memory()136 void __attribute__((weak)) __tsan_flush_memory() {}
137 }
138 #endif
139 ArcherFlags *archer_flags;
140
141 #ifndef TsanHappensBefore
142 // Thread Sanitizer is a tool that finds races in code.
143 // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
144 // tsan detects these exact functions by name.
145 extern "C" {
146 #if (defined __APPLE__ && defined __MACH__)
147 static void (*AnnotateHappensAfter)(const char *, int, const volatile void *);
148 static void (*AnnotateHappensBefore)(const char *, int, const volatile void *);
149 static void (*AnnotateIgnoreWritesBegin)(const char *, int);
150 static void (*AnnotateIgnoreWritesEnd)(const char *, int);
151 static void (*AnnotateNewMemory)(const char *, int, const volatile void *,
152 size_t);
153 static void (*__tsan_func_entry)(const void *);
154 static void (*__tsan_func_exit)(void);
155
RunningOnValgrind()156 static int RunningOnValgrind() {
157 int (*fptr)();
158
159 fptr = (int (*)())dlsym(RTLD_DEFAULT, "RunningOnValgrind");
160 // If we found RunningOnValgrind other than this function, we assume
161 // Annotation functions present in this execution and leave runOnTsan=1
162 // otherwise we change to runOnTsan=0
163 if (!fptr || fptr == RunningOnValgrind)
164 runOnTsan = 0;
165 return 0;
166 }
167 #else
168 void __attribute__((weak))
169 AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {}
170 void __attribute__((weak))
171 AnnotateHappensBefore(const char *file, int line, const volatile void *cv) {}
172 void __attribute__((weak))
173 AnnotateIgnoreWritesBegin(const char *file, int line) {}
174 void __attribute__((weak)) AnnotateIgnoreWritesEnd(const char *file, int line) {
175 }
176 void __attribute__((weak))
177 AnnotateNewMemory(const char *file, int line, const volatile void *cv,
178 size_t size) {}
179 int __attribute__((weak)) RunningOnValgrind() {
180 runOnTsan = 0;
181 return 0;
182 }
183 void __attribute__((weak)) __tsan_func_entry(const void *call_pc) {}
184 void __attribute__((weak)) __tsan_func_exit(void) {}
185 #endif
186 }
187
188 // This marker is used to define a happens-before arc. The race detector will
189 // infer an arc from the begin to the end when they share the same pointer
190 // argument.
191 #define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
192
193 // This marker defines the destination of a happens-before arc.
194 #define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
195
196 // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
197 #define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
198
199 // Resume checking for racy writes.
200 #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
201
202 // We don't really delete the clock for now
203 #define TsanDeleteClock(cv)
204
205 // newMemory
206 #define TsanNewMemory(addr, size) \
207 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
208 #define TsanFreeMemory(addr, size) \
209 AnnotateNewMemory(__FILE__, __LINE__, addr, size)
210 #endif
211
212 // Function entry/exit
213 #define TsanFuncEntry(pc) __tsan_func_entry(pc)
214 #define TsanFuncExit() __tsan_func_exit()
215
216 /// Required OMPT inquiry functions.
217 static ompt_get_parallel_info_t ompt_get_parallel_info;
218 static ompt_get_thread_data_t ompt_get_thread_data;
219
220 typedef char ompt_tsan_clockid;
221
my_next_id()222 static uint64_t my_next_id() {
223 static uint64_t ID = 0;
224 uint64_t ret = __sync_fetch_and_add(&ID, 1);
225 return ret;
226 }
227
228 static int pagesize{0};
229
230 // Data structure to provide a threadsafe pool of reusable objects.
231 // DataPool<Type of objects>
232 template <typename T> struct DataPool final {
233 static __thread DataPool<T> *ThreadDataPool;
234 std::mutex DPMutex{};
235
236 // store unused objects
237 std::vector<T *> DataPointer{};
238 std::vector<T *> RemoteDataPointer{};
239
240 // store all allocated memory to finally release
241 std::list<void *> memory;
242
243 // count remotely returned data (RemoteDataPointer.size())
244 std::atomic<int> remote{0};
245
246 // totally allocated data objects in pool
247 int total{0};
248 #ifdef DEBUG_DATA
249 int remoteReturn{0};
250 int localReturn{0};
251
getRemoteDataPool252 int getRemote() { return remoteReturn + remote; }
getLocalDataPool253 int getLocal() { return localReturn; }
254 #endif
getTotalDataPool255 int getTotal() { return total; }
getMissingDataPool256 int getMissing() {
257 return total - DataPointer.size() - RemoteDataPointer.size();
258 }
259
260 // fill the pool by allocating a page of memory
newDatasDataPool261 void newDatas() {
262 if (remote > 0) {
263 const std::lock_guard<std::mutex> lock(DPMutex);
264 // DataPointer is empty, so just swap the vectors
265 DataPointer.swap(RemoteDataPointer);
266 remote = 0;
267 return;
268 }
269 // calculate size of an object including padding to cacheline size
270 size_t elemSize = sizeof(T);
271 size_t paddedSize = (((elemSize - 1) / 64) + 1) * 64;
272 // number of padded elements to allocate
273 int ndatas = pagesize / paddedSize;
274 char *datas = (char *)malloc(ndatas * paddedSize);
275 memory.push_back(datas);
276 for (int i = 0; i < ndatas; i++) {
277 DataPointer.push_back(new (datas + i * paddedSize) T(this));
278 }
279 total += ndatas;
280 }
281
282 // get data from the pool
getDataDataPool283 T *getData() {
284 T *ret;
285 if (DataPointer.empty())
286 newDatas();
287 ret = DataPointer.back();
288 DataPointer.pop_back();
289 return ret;
290 }
291
292 // accesses to the thread-local datapool don't need locks
returnOwnDataDataPool293 void returnOwnData(T *data) {
294 DataPointer.emplace_back(data);
295 #ifdef DEBUG_DATA
296 localReturn++;
297 #endif
298 }
299
300 // returning to a remote datapool using lock
returnDataDataPool301 void returnData(T *data) {
302 const std::lock_guard<std::mutex> lock(DPMutex);
303 RemoteDataPointer.emplace_back(data);
304 remote++;
305 #ifdef DEBUG_DATA
306 remoteReturn++;
307 #endif
308 }
309
~DataPoolDataPool310 ~DataPool() {
311 // we assume all memory is returned when the thread finished / destructor is
312 // called
313 if (archer_flags->report_data_leak && getMissing() != 0) {
314 printf("ERROR: While freeing DataPool (%s) we are missing %i data "
315 "objects.\n",
316 __PRETTY_FUNCTION__, getMissing());
317 exit(-3);
318 }
319 for (auto i : DataPointer)
320 if (i)
321 i->~T();
322 for (auto i : RemoteDataPointer)
323 if (i)
324 i->~T();
325 for (auto i : memory)
326 if (i)
327 free(i);
328 }
329 };
330
331 template <typename T> struct DataPoolEntry {
332 DataPool<T> *owner;
333
NewDataPoolEntry334 static T *New() { return DataPool<T>::ThreadDataPool->getData(); }
335
DeleteDataPoolEntry336 void Delete() {
337 static_cast<T *>(this)->Reset();
338 if (owner == DataPool<T>::ThreadDataPool)
339 owner->returnOwnData(static_cast<T *>(this));
340 else
341 owner->returnData(static_cast<T *>(this));
342 }
343
DataPoolEntryDataPoolEntry344 DataPoolEntry(DataPool<T> *dp) : owner(dp) {}
345 };
346
347 struct DependencyData;
348 typedef DataPool<DependencyData> DependencyDataPool;
349 template <>
350 __thread DependencyDataPool *DependencyDataPool::ThreadDataPool = nullptr;
351
352 /// Data structure to store additional information for task dependency.
353 struct DependencyData final : DataPoolEntry<DependencyData> {
354 ompt_tsan_clockid in;
355 ompt_tsan_clockid out;
356 ompt_tsan_clockid inoutset;
GetInPtrDependencyData357 void *GetInPtr() { return ∈ }
GetOutPtrDependencyData358 void *GetOutPtr() { return &out; }
GetInoutsetPtrDependencyData359 void *GetInoutsetPtr() { return &inoutset; }
360
ResetDependencyData361 void Reset() {}
362
NewDependencyData363 static DependencyData *New() { return DataPoolEntry<DependencyData>::New(); }
364
DependencyDataDependencyData365 DependencyData(DataPool<DependencyData> *dp)
366 : DataPoolEntry<DependencyData>(dp) {}
367 };
368
369 struct TaskDependency {
370 void *inPtr;
371 void *outPtr;
372 void *inoutsetPtr;
373 ompt_dependence_type_t type;
TaskDependencyTaskDependency374 TaskDependency(DependencyData *depData, ompt_dependence_type_t type)
375 : inPtr(depData->GetInPtr()), outPtr(depData->GetOutPtr()),
376 inoutsetPtr(depData->GetInoutsetPtr()), type(type) {}
AnnotateBeginTaskDependency377 void AnnotateBegin() {
378 if (type == ompt_dependence_type_out ||
379 type == ompt_dependence_type_inout ||
380 type == ompt_dependence_type_mutexinoutset) {
381 TsanHappensAfter(inPtr);
382 TsanHappensAfter(outPtr);
383 TsanHappensAfter(inoutsetPtr);
384 } else if (type == ompt_dependence_type_in) {
385 TsanHappensAfter(outPtr);
386 TsanHappensAfter(inoutsetPtr);
387 } else if (type == ompt_dependence_type_inoutset) {
388 TsanHappensAfter(inPtr);
389 TsanHappensAfter(outPtr);
390 }
391 }
AnnotateEndTaskDependency392 void AnnotateEnd() {
393 if (type == ompt_dependence_type_out ||
394 type == ompt_dependence_type_inout ||
395 type == ompt_dependence_type_mutexinoutset) {
396 TsanHappensBefore(outPtr);
397 } else if (type == ompt_dependence_type_in) {
398 TsanHappensBefore(inPtr);
399 } else if (type == ompt_dependence_type_inoutset) {
400 TsanHappensBefore(inoutsetPtr);
401 }
402 }
403 };
404
405 struct ParallelData;
406 typedef DataPool<ParallelData> ParallelDataPool;
407 template <>
408 __thread ParallelDataPool *ParallelDataPool::ThreadDataPool = nullptr;
409
410 /// Data structure to store additional information for parallel regions.
411 struct ParallelData final : DataPoolEntry<ParallelData> {
412
413 // Parallel fork is just another barrier, use Barrier[1]
414
415 /// Two addresses for relationships with barriers.
416 ompt_tsan_clockid Barrier[2];
417
418 const void *codePtr;
419
GetParallelPtrParallelData420 void *GetParallelPtr() { return &(Barrier[1]); }
421
GetBarrierPtrParallelData422 void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); }
423
InitParallelData424 ParallelData *Init(const void *codeptr) {
425 codePtr = codeptr;
426 return this;
427 }
428
ResetParallelData429 void Reset() {}
430
NewParallelData431 static ParallelData *New(const void *codeptr) {
432 return DataPoolEntry<ParallelData>::New()->Init(codeptr);
433 }
434
ParallelDataParallelData435 ParallelData(DataPool<ParallelData> *dp) : DataPoolEntry<ParallelData>(dp) {}
436 };
437
ToParallelData(ompt_data_t * parallel_data)438 static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
439 return reinterpret_cast<ParallelData *>(parallel_data->ptr);
440 }
441
442 struct Taskgroup;
443 typedef DataPool<Taskgroup> TaskgroupPool;
444 template <> __thread TaskgroupPool *TaskgroupPool::ThreadDataPool = nullptr;
445
446 /// Data structure to support stacking of taskgroups and allow synchronization.
447 struct Taskgroup final : DataPoolEntry<Taskgroup> {
448 /// Its address is used for relationships of the taskgroup's task set.
449 ompt_tsan_clockid Ptr;
450
451 /// Reference to the parent taskgroup.
452 Taskgroup *Parent;
453
GetPtrTaskgroup454 void *GetPtr() { return &Ptr; }
455
InitTaskgroup456 Taskgroup *Init(Taskgroup *parent) {
457 Parent = parent;
458 return this;
459 }
460
ResetTaskgroup461 void Reset() {}
462
NewTaskgroup463 static Taskgroup *New(Taskgroup *Parent) {
464 return DataPoolEntry<Taskgroup>::New()->Init(Parent);
465 }
466
TaskgroupTaskgroup467 Taskgroup(DataPool<Taskgroup> *dp) : DataPoolEntry<Taskgroup>(dp) {}
468 };
469
470 struct TaskData;
471 typedef DataPool<TaskData> TaskDataPool;
472 template <> __thread TaskDataPool *TaskDataPool::ThreadDataPool = nullptr;
473
474 /// Data structure to store additional information for tasks.
475 struct TaskData final : DataPoolEntry<TaskData> {
476 /// Its address is used for relationships of this task.
477 ompt_tsan_clockid Task{0};
478
479 /// Child tasks use its address to declare a relationship to a taskwait in
480 /// this task.
481 ompt_tsan_clockid Taskwait{0};
482
483 /// Whether this task is currently executing a barrier.
484 bool InBarrier{false};
485
486 /// Whether this task is an included task.
487 int TaskType{0};
488
489 /// count execution phase
490 int execution{0};
491
492 /// Index of which barrier to use next.
493 char BarrierIndex{0};
494
495 /// Count how often this structure has been put into child tasks + 1.
496 std::atomic_int RefCount{1};
497
498 /// Reference to the parent that created this task.
499 TaskData *Parent{nullptr};
500
501 /// Reference to the implicit task in the stack above this task.
502 TaskData *ImplicitTask{nullptr};
503
504 /// Reference to the team of this task.
505 ParallelData *Team{nullptr};
506
507 /// Reference to the current taskgroup that this task either belongs to or
508 /// that it just created.
509 Taskgroup *TaskGroup{nullptr};
510
511 /// Dependency information for this task.
512 TaskDependency *Dependencies{nullptr};
513
514 /// Number of dependency entries.
515 unsigned DependencyCount{0};
516
517 // The dependency-map stores DependencyData objects representing
518 // the dependency variables used on the sibling tasks created from
519 // this task
520 // We expect a rare need for the dependency-map, so alloc on demand
521 std::unordered_map<void *, DependencyData *> *DependencyMap{nullptr};
522
523 #ifdef DEBUG
524 int freed{0};
525 #endif
526
isIncludedTaskData527 bool isIncluded() { return TaskType & ompt_task_undeferred; }
isUntiedTaskData528 bool isUntied() { return TaskType & ompt_task_untied; }
isFinalTaskData529 bool isFinal() { return TaskType & ompt_task_final; }
isMergableTaskData530 bool isMergable() { return TaskType & ompt_task_mergeable; }
isMergedTaskData531 bool isMerged() { return TaskType & ompt_task_merged; }
532
isExplicitTaskData533 bool isExplicit() { return TaskType & ompt_task_explicit; }
isImplicitTaskData534 bool isImplicit() { return TaskType & ompt_task_implicit; }
isInitialTaskData535 bool isInitial() { return TaskType & ompt_task_initial; }
isTargetTaskData536 bool isTarget() { return TaskType & ompt_task_target; }
537
GetTaskPtrTaskData538 void *GetTaskPtr() { return &Task; }
539
GetTaskwaitPtrTaskData540 void *GetTaskwaitPtr() { return &Taskwait; }
541
InitTaskData542 TaskData *Init(TaskData *parent, int taskType) {
543 TaskType = taskType;
544 Parent = parent;
545 Team = Parent->Team;
546 if (Parent != nullptr) {
547 Parent->RefCount++;
548 // Copy over pointer to taskgroup. This task may set up its own stack
549 // but for now belongs to its parent's taskgroup.
550 TaskGroup = Parent->TaskGroup;
551 }
552 return this;
553 }
554
InitTaskData555 TaskData *Init(ParallelData *team, int taskType) {
556 TaskType = taskType;
557 execution = 1;
558 ImplicitTask = this;
559 Team = team;
560 return this;
561 }
562
ResetTaskData563 void Reset() {
564 InBarrier = false;
565 TaskType = 0;
566 execution = 0;
567 BarrierIndex = 0;
568 RefCount = 1;
569 Parent = nullptr;
570 ImplicitTask = nullptr;
571 Team = nullptr;
572 TaskGroup = nullptr;
573 if (DependencyMap) {
574 for (auto i : *DependencyMap)
575 i.second->Delete();
576 delete DependencyMap;
577 }
578 DependencyMap = nullptr;
579 if (Dependencies)
580 free(Dependencies);
581 Dependencies = nullptr;
582 DependencyCount = 0;
583 #ifdef DEBUG
584 freed = 0;
585 #endif
586 }
587
NewTaskData588 static TaskData *New(TaskData *parent, int taskType) {
589 return DataPoolEntry<TaskData>::New()->Init(parent, taskType);
590 }
591
NewTaskData592 static TaskData *New(ParallelData *team, int taskType) {
593 return DataPoolEntry<TaskData>::New()->Init(team, taskType);
594 }
595
TaskDataTaskData596 TaskData(DataPool<TaskData> *dp) : DataPoolEntry<TaskData>(dp) {}
597 };
598
ToTaskData(ompt_data_t * task_data)599 static inline TaskData *ToTaskData(ompt_data_t *task_data) {
600 return reinterpret_cast<TaskData *>(task_data->ptr);
601 }
602
603 /// Store a mutex for each wait_id to resolve race condition with callbacks.
604 std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
605 std::mutex LocksMutex;
606
ompt_tsan_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)607 static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
608 ompt_data_t *thread_data) {
609 ParallelDataPool::ThreadDataPool = new ParallelDataPool;
610 TsanNewMemory(ParallelDataPool::ThreadDataPool,
611 sizeof(ParallelDataPool::ThreadDataPool));
612 TaskgroupPool::ThreadDataPool = new TaskgroupPool;
613 TsanNewMemory(TaskgroupPool::ThreadDataPool,
614 sizeof(TaskgroupPool::ThreadDataPool));
615 TaskDataPool::ThreadDataPool = new TaskDataPool;
616 TsanNewMemory(TaskDataPool::ThreadDataPool,
617 sizeof(TaskDataPool::ThreadDataPool));
618 DependencyDataPool::ThreadDataPool = new DependencyDataPool;
619 TsanNewMemory(DependencyDataPool::ThreadDataPool,
620 sizeof(DependencyDataPool::ThreadDataPool));
621 thread_data->value = my_next_id();
622 }
623
ompt_tsan_thread_end(ompt_data_t * thread_data)624 static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
625 TsanIgnoreWritesBegin();
626 delete ParallelDataPool::ThreadDataPool;
627 delete TaskgroupPool::ThreadDataPool;
628 delete TaskDataPool::ThreadDataPool;
629 delete DependencyDataPool::ThreadDataPool;
630 TsanIgnoreWritesEnd();
631 }
632
633 /// OMPT event callbacks for handling parallel regions.
634
ompt_tsan_parallel_begin(ompt_data_t * parent_task_data,const ompt_frame_t * parent_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int flag,const void * codeptr_ra)635 static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
636 const ompt_frame_t *parent_task_frame,
637 ompt_data_t *parallel_data,
638 uint32_t requested_team_size, int flag,
639 const void *codeptr_ra) {
640 ParallelData *Data = ParallelData::New(codeptr_ra);
641 parallel_data->ptr = Data;
642
643 TsanHappensBefore(Data->GetParallelPtr());
644 if (archer_flags->ignore_serial && ToTaskData(parent_task_data)->isInitial())
645 TsanIgnoreWritesEnd();
646 }
647
ompt_tsan_parallel_end(ompt_data_t * parallel_data,ompt_data_t * task_data,int flag,const void * codeptr_ra)648 static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
649 ompt_data_t *task_data, int flag,
650 const void *codeptr_ra) {
651 if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial())
652 TsanIgnoreWritesBegin();
653 ParallelData *Data = ToParallelData(parallel_data);
654 TsanHappensAfter(Data->GetBarrierPtr(0));
655 TsanHappensAfter(Data->GetBarrierPtr(1));
656
657 Data->Delete();
658
659 #if (LLVM_VERSION >= 40)
660 if (&__archer_get_omp_status) {
661 if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow)
662 __tsan_flush_memory();
663 }
664 #endif
665 }
666
ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int type)667 static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
668 ompt_data_t *parallel_data,
669 ompt_data_t *task_data,
670 unsigned int team_size,
671 unsigned int thread_num, int type) {
672 switch (endpoint) {
673 case ompt_scope_begin:
674 if (type & ompt_task_initial) {
675 parallel_data->ptr = ParallelData::New(nullptr);
676 }
677 task_data->ptr = TaskData::New(ToParallelData(parallel_data), type);
678 TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
679 TsanFuncEntry(ToParallelData(parallel_data)->codePtr);
680 break;
681 case ompt_scope_end: {
682 TaskData *Data = ToTaskData(task_data);
683 #ifdef DEBUG
684 assert(Data->freed == 0 && "Implicit task end should only be called once!");
685 Data->freed = 1;
686 #endif
687 assert(Data->RefCount == 1 &&
688 "All tasks should have finished at the implicit barrier!");
689 Data->Delete();
690 if (type & ompt_task_initial) {
691 ToParallelData(parallel_data)->Delete();
692 }
693 TsanFuncExit();
694 break;
695 }
696 case ompt_scope_beginend:
697 // Should not occur according to OpenMP 5.1
698 // Tested in OMPT tests
699 break;
700 }
701 }
702
ompt_tsan_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)703 static void ompt_tsan_sync_region(ompt_sync_region_t kind,
704 ompt_scope_endpoint_t endpoint,
705 ompt_data_t *parallel_data,
706 ompt_data_t *task_data,
707 const void *codeptr_ra) {
708 TaskData *Data = ToTaskData(task_data);
709 switch (endpoint) {
710 case ompt_scope_begin:
711 case ompt_scope_beginend:
712 TsanFuncEntry(codeptr_ra);
713 switch (kind) {
714 case ompt_sync_region_barrier_implementation:
715 case ompt_sync_region_barrier_implicit:
716 case ompt_sync_region_barrier_explicit:
717 case ompt_sync_region_barrier_implicit_parallel:
718 case ompt_sync_region_barrier_implicit_workshare:
719 case ompt_sync_region_barrier_teams:
720 case ompt_sync_region_barrier: {
721 char BarrierIndex = Data->BarrierIndex;
722 TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex));
723
724 if (hasReductionCallback < ompt_set_always) {
725 // We ignore writes inside the barrier. These would either occur during
726 // 1. reductions performed by the runtime which are guaranteed to be
727 // race-free.
728 // 2. execution of another task.
729 // For the latter case we will re-enable tracking in task_switch.
730 Data->InBarrier = true;
731 TsanIgnoreWritesBegin();
732 }
733
734 break;
735 }
736
737 case ompt_sync_region_taskwait:
738 break;
739
740 case ompt_sync_region_taskgroup:
741 Data->TaskGroup = Taskgroup::New(Data->TaskGroup);
742 break;
743
744 case ompt_sync_region_reduction:
745 // should never be reached
746 break;
747 }
748 if (endpoint == ompt_scope_begin)
749 break;
750 KMP_FALLTHROUGH();
751 case ompt_scope_end:
752 TsanFuncExit();
753 switch (kind) {
754 case ompt_sync_region_barrier_implementation:
755 case ompt_sync_region_barrier_implicit:
756 case ompt_sync_region_barrier_explicit:
757 case ompt_sync_region_barrier_implicit_parallel:
758 case ompt_sync_region_barrier_implicit_workshare:
759 case ompt_sync_region_barrier_teams:
760 case ompt_sync_region_barrier: {
761 if (hasReductionCallback < ompt_set_always) {
762 // We want to track writes after the barrier again.
763 Data->InBarrier = false;
764 TsanIgnoreWritesEnd();
765 }
766
767 char BarrierIndex = Data->BarrierIndex;
768 // Barrier will end after it has been entered by all threads.
769 if (parallel_data)
770 TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex));
771
772 // It is not guaranteed that all threads have exited this barrier before
773 // we enter the next one. So we will use a different address.
774 // We are however guaranteed that this current barrier is finished
775 // by the time we exit the next one. So we can then reuse the first
776 // address.
777 Data->BarrierIndex = (BarrierIndex + 1) % 2;
778 break;
779 }
780
781 case ompt_sync_region_taskwait: {
782 if (Data->execution > 1)
783 TsanHappensAfter(Data->GetTaskwaitPtr());
784 break;
785 }
786
787 case ompt_sync_region_taskgroup: {
788 assert(Data->TaskGroup != nullptr &&
789 "Should have at least one taskgroup!");
790
791 TsanHappensAfter(Data->TaskGroup->GetPtr());
792
793 // Delete this allocated taskgroup, all descendent task are finished by
794 // now.
795 Taskgroup *Parent = Data->TaskGroup->Parent;
796 Data->TaskGroup->Delete();
797 Data->TaskGroup = Parent;
798 break;
799 }
800
801 case ompt_sync_region_reduction:
802 // Should not occur according to OpenMP 5.1
803 // Tested in OMPT tests
804 break;
805 }
806 break;
807 }
808 }
809
ompt_tsan_reduction(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)810 static void ompt_tsan_reduction(ompt_sync_region_t kind,
811 ompt_scope_endpoint_t endpoint,
812 ompt_data_t *parallel_data,
813 ompt_data_t *task_data,
814 const void *codeptr_ra) {
815 switch (endpoint) {
816 case ompt_scope_begin:
817 switch (kind) {
818 case ompt_sync_region_reduction:
819 TsanIgnoreWritesBegin();
820 break;
821 default:
822 break;
823 }
824 break;
825 case ompt_scope_end:
826 switch (kind) {
827 case ompt_sync_region_reduction:
828 TsanIgnoreWritesEnd();
829 break;
830 default:
831 break;
832 }
833 break;
834 case ompt_scope_beginend:
835 // Should not occur according to OpenMP 5.1
836 // Tested in OMPT tests
837 // Would have no implications for DR detection
838 break;
839 }
840 }
841
842 /// OMPT event callbacks for handling tasks.
843
ompt_tsan_task_create(ompt_data_t * parent_task_data,const ompt_frame_t * parent_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)844 static void ompt_tsan_task_create(
845 ompt_data_t *parent_task_data, /* id of parent task */
846 const ompt_frame_t *parent_frame, /* frame data for parent task */
847 ompt_data_t *new_task_data, /* id of created task */
848 int type, int has_dependences,
849 const void *codeptr_ra) /* pointer to outlined function */
850 {
851 TaskData *Data;
852 assert(new_task_data->ptr == NULL &&
853 "Task data should be initialized to NULL");
854 if (type & ompt_task_initial) {
855 ompt_data_t *parallel_data;
856 int team_size = 1;
857 ompt_get_parallel_info(0, ¶llel_data, &team_size);
858 ParallelData *PData = ParallelData::New(nullptr);
859 parallel_data->ptr = PData;
860
861 Data = TaskData::New(PData, type);
862 new_task_data->ptr = Data;
863 } else if (type & ompt_task_undeferred) {
864 Data = TaskData::New(ToTaskData(parent_task_data), type);
865 new_task_data->ptr = Data;
866 } else if (type & ompt_task_explicit || type & ompt_task_target) {
867 Data = TaskData::New(ToTaskData(parent_task_data), type);
868 new_task_data->ptr = Data;
869
870 // Use the newly created address. We cannot use a single address from the
871 // parent because that would declare wrong relationships with other
872 // sibling tasks that may be created before this task is started!
873 TsanHappensBefore(Data->GetTaskPtr());
874 ToTaskData(parent_task_data)->execution++;
875 }
876 }
877
freeTask(TaskData * task)878 static void freeTask(TaskData *task) {
879 while (task != nullptr && --task->RefCount == 0) {
880 TaskData *Parent = task->Parent;
881 task->Delete();
882 task = Parent;
883 }
884 }
885
releaseDependencies(TaskData * task)886 static void releaseDependencies(TaskData *task) {
887 for (unsigned i = 0; i < task->DependencyCount; i++) {
888 task->Dependencies[i].AnnotateEnd();
889 }
890 }
891
acquireDependencies(TaskData * task)892 static void acquireDependencies(TaskData *task) {
893 for (unsigned i = 0; i < task->DependencyCount; i++) {
894 task->Dependencies[i].AnnotateBegin();
895 }
896 }
897
ompt_tsan_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)898 static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
899 ompt_task_status_t prior_task_status,
900 ompt_data_t *second_task_data) {
901
902 //
903 // The necessary action depends on prior_task_status:
904 //
905 // ompt_task_early_fulfill = 5,
906 // -> ignored
907 //
908 // ompt_task_late_fulfill = 6,
909 // -> first completed, first freed, second ignored
910 //
911 // ompt_task_complete = 1,
912 // ompt_task_cancel = 3,
913 // -> first completed, first freed, second starts
914 //
915 // ompt_task_detach = 4,
916 // ompt_task_yield = 2,
917 // ompt_task_switch = 7
918 // -> first suspended, second starts
919 //
920
921 if (prior_task_status == ompt_task_early_fulfill)
922 return;
923
924 TaskData *FromTask = ToTaskData(first_task_data);
925
926 // Legacy handling for missing reduction callback
927 if (hasReductionCallback < ompt_set_always && FromTask->InBarrier) {
928 // We want to ignore writes in the runtime code during barriers,
929 // but not when executing tasks with user code!
930 TsanIgnoreWritesEnd();
931 }
932
933 // The late fulfill happens after the detached task finished execution
934 if (prior_task_status == ompt_task_late_fulfill)
935 TsanHappensAfter(FromTask->GetTaskPtr());
936
937 // task completed execution
938 if (prior_task_status == ompt_task_complete ||
939 prior_task_status == ompt_task_cancel ||
940 prior_task_status == ompt_task_late_fulfill) {
941 // Included tasks are executed sequentially, no need to track
942 // synchronization
943 if (!FromTask->isIncluded()) {
944 // Task will finish before a barrier in the surrounding parallel region
945 // ...
946 ParallelData *PData = FromTask->Team;
947 TsanHappensBefore(
948 PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex));
949
950 // ... and before an eventual taskwait by the parent thread.
951 TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr());
952
953 if (FromTask->TaskGroup != nullptr) {
954 // This task is part of a taskgroup, so it will finish before the
955 // corresponding taskgroup_end.
956 TsanHappensBefore(FromTask->TaskGroup->GetPtr());
957 }
958 }
959
960 // release dependencies
961 releaseDependencies(FromTask);
962 // free the previously running task
963 freeTask(FromTask);
964 }
965
966 // For late fulfill of detached task, there is no task to schedule to
967 if (prior_task_status == ompt_task_late_fulfill) {
968 return;
969 }
970
971 TaskData *ToTask = ToTaskData(second_task_data);
972 // Legacy handling for missing reduction callback
973 if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) {
974 // We re-enter runtime code which currently performs a barrier.
975 TsanIgnoreWritesBegin();
976 }
977
978 // task suspended
979 if (prior_task_status == ompt_task_switch ||
980 prior_task_status == ompt_task_yield ||
981 prior_task_status == ompt_task_detach) {
982 // Task may be resumed at a later point in time.
983 TsanHappensBefore(FromTask->GetTaskPtr());
984 ToTask->ImplicitTask = FromTask->ImplicitTask;
985 assert(ToTask->ImplicitTask != NULL &&
986 "A task belongs to a team and has an implicit task on the stack");
987 }
988
989 // Handle dependencies on first execution of the task
990 if (ToTask->execution == 0) {
991 ToTask->execution++;
992 acquireDependencies(ToTask);
993 }
994 // 1. Task will begin execution after it has been created.
995 // 2. Task will resume after it has been switched away.
996 TsanHappensAfter(ToTask->GetTaskPtr());
997 }
998
ompt_tsan_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)999 static void ompt_tsan_dependences(ompt_data_t *task_data,
1000 const ompt_dependence_t *deps, int ndeps) {
1001 if (ndeps > 0) {
1002 // Copy the data to use it in task_switch and task_end.
1003 TaskData *Data = ToTaskData(task_data);
1004 if (!Data->Parent->DependencyMap)
1005 Data->Parent->DependencyMap =
1006 new std::unordered_map<void *, DependencyData *>();
1007 Data->Dependencies =
1008 (TaskDependency *)malloc(sizeof(TaskDependency) * ndeps);
1009 Data->DependencyCount = ndeps;
1010 for (int i = 0; i < ndeps; i++) {
1011 auto ret = Data->Parent->DependencyMap->insert(
1012 std::make_pair(deps[i].variable.ptr, nullptr));
1013 if (ret.second) {
1014 ret.first->second = DependencyData::New();
1015 }
1016 new ((void *)(Data->Dependencies + i))
1017 TaskDependency(ret.first->second, deps[i].dependence_type);
1018 }
1019
1020 // This callback is executed before this task is first started.
1021 TsanHappensBefore(Data->GetTaskPtr());
1022 }
1023 }
1024
1025 /// OMPT event callbacks for handling locking.
ompt_tsan_mutex_acquired(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)1026 static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1027 const void *codeptr_ra) {
1028
1029 // Acquire our own lock to make sure that
1030 // 1. the previous release has finished.
1031 // 2. the next acquire doesn't start before we have finished our release.
1032 LocksMutex.lock();
1033 std::mutex &Lock = Locks[wait_id];
1034 LocksMutex.unlock();
1035
1036 Lock.lock();
1037 TsanHappensAfter(&Lock);
1038 }
1039
ompt_tsan_mutex_released(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)1040 static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1041 const void *codeptr_ra) {
1042 LocksMutex.lock();
1043 std::mutex &Lock = Locks[wait_id];
1044 LocksMutex.unlock();
1045 TsanHappensBefore(&Lock);
1046
1047 Lock.unlock();
1048 }
1049
1050 // callback , signature , variable to store result , required support level
1051 #define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
1052 do { \
1053 ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
1054 result = ompt_set_callback(ompt_callback_##event, \
1055 (ompt_callback_t)tsan_##event); \
1056 if (result < level) \
1057 printf("Registered callback '" #event "' is not supported at " #level \
1058 " (%i)\n", \
1059 result); \
1060 } while (0)
1061
1062 #define SET_CALLBACK_T(event, type) \
1063 do { \
1064 int res; \
1065 SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
1066 } while (0)
1067
1068 #define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1069
ompt_tsan_initialize(ompt_function_lookup_t lookup,int device_num,ompt_data_t * tool_data)1070 static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
1071 ompt_data_t *tool_data) {
1072 const char *options = getenv("TSAN_OPTIONS");
1073 TsanFlags tsan_flags(options);
1074
1075 ompt_set_callback_t ompt_set_callback =
1076 (ompt_set_callback_t)lookup("ompt_set_callback");
1077 if (ompt_set_callback == NULL) {
1078 std::cerr << "Could not set callback, exiting..." << std::endl;
1079 std::exit(1);
1080 }
1081 ompt_get_parallel_info =
1082 (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1083 ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1084
1085 if (ompt_get_parallel_info == NULL) {
1086 fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', "
1087 "exiting...\n");
1088 exit(1);
1089 }
1090
1091 #if (defined __APPLE__ && defined __MACH__)
1092 #define findTsanFunction(f, fSig) \
1093 do { \
1094 if (NULL == (f = fSig dlsym(RTLD_DEFAULT, #f))) \
1095 printf("Unable to find TSan function " #f ".\n"); \
1096 } while (0)
1097
1098 findTsanFunction(AnnotateHappensAfter,
1099 (void (*)(const char *, int, const volatile void *)));
1100 findTsanFunction(AnnotateHappensBefore,
1101 (void (*)(const char *, int, const volatile void *)));
1102 findTsanFunction(AnnotateIgnoreWritesBegin, (void (*)(const char *, int)));
1103 findTsanFunction(AnnotateIgnoreWritesEnd, (void (*)(const char *, int)));
1104 findTsanFunction(
1105 AnnotateNewMemory,
1106 (void (*)(const char *, int, const volatile void *, size_t)));
1107 findTsanFunction(__tsan_func_entry, (void (*)(const void *)));
1108 findTsanFunction(__tsan_func_exit, (void (*)(void)));
1109 #endif
1110
1111 SET_CALLBACK(thread_begin);
1112 SET_CALLBACK(thread_end);
1113 SET_CALLBACK(parallel_begin);
1114 SET_CALLBACK(implicit_task);
1115 SET_CALLBACK(sync_region);
1116 SET_CALLBACK(parallel_end);
1117
1118 SET_CALLBACK(task_create);
1119 SET_CALLBACK(task_schedule);
1120 SET_CALLBACK(dependences);
1121
1122 SET_CALLBACK_T(mutex_acquired, mutex);
1123 SET_CALLBACK_T(mutex_released, mutex);
1124 SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback,
1125 ompt_set_never);
1126
1127 if (!tsan_flags.ignore_noninstrumented_modules)
1128 fprintf(stderr,
1129 "Warning: please export "
1130 "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1131 "to avoid false positive reports from the OpenMP runtime!\n");
1132 if (archer_flags->ignore_serial)
1133 TsanIgnoreWritesBegin();
1134
1135 return 1; // success
1136 }
1137
ompt_tsan_finalize(ompt_data_t * tool_data)1138 static void ompt_tsan_finalize(ompt_data_t *tool_data) {
1139 if (archer_flags->ignore_serial)
1140 TsanIgnoreWritesEnd();
1141 if (archer_flags->print_max_rss) {
1142 struct rusage end;
1143 getrusage(RUSAGE_SELF, &end);
1144 printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss);
1145 }
1146
1147 if (archer_flags)
1148 delete archer_flags;
1149 }
1150
1151 extern "C" ompt_start_tool_result_t *
ompt_start_tool(unsigned int omp_version,const char * runtime_version)1152 ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
1153 const char *options = getenv("ARCHER_OPTIONS");
1154 archer_flags = new ArcherFlags(options);
1155 if (!archer_flags->enabled) {
1156 if (archer_flags->verbose)
1157 std::cout << "Archer disabled, stopping operation" << std::endl;
1158 delete archer_flags;
1159 return NULL;
1160 }
1161
1162 pagesize = getpagesize();
1163
1164 static ompt_start_tool_result_t ompt_start_tool_result = {
1165 &ompt_tsan_initialize, &ompt_tsan_finalize, {0}};
1166
1167 // The OMPT start-up code uses dlopen with RTLD_LAZY. Therefore, we cannot
1168 // rely on dlopen to fail if TSan is missing, but would get a runtime error
1169 // for the first TSan call. We use RunningOnValgrind to detect whether
1170 // an implementation of the Annotation interface is available in the
1171 // execution or disable the tool (by returning NULL).
1172
1173 runOnTsan = 1;
1174 RunningOnValgrind();
1175 if (!runOnTsan) // if we are not running on TSAN, give a different tool the
1176 // chance to be loaded
1177 {
1178 if (archer_flags->verbose)
1179 std::cout << "Archer detected OpenMP application without TSan "
1180 "stopping operation"
1181 << std::endl;
1182 delete archer_flags;
1183 return NULL;
1184 }
1185
1186 if (archer_flags->verbose)
1187 std::cout << "Archer detected OpenMP application with TSan, supplying "
1188 "OpenMP synchronization semantics"
1189 << std::endl;
1190 return &ompt_start_tool_result;
1191 }
1192