1 /***************************************************************************
2 * include/stxxl/bits/parallel.h
3 *
4 * Part of the STXXL. See http://stxxl.sourceforge.net
5 *
6 * Copyright (C) 2008-2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
7 * Copyright (C) 2011 Johannes Singler <singler@kit.edu>
8 *
9 * Distributed under the Boost Software License, Version 1.0.
10 * (See accompanying file LICENSE_1_0.txt or copy at
11 * http://www.boost.org/LICENSE_1_0.txt)
12 **************************************************************************/
13
14 #ifndef STXXL_PARALLEL_HEADER
15 #define STXXL_PARALLEL_HEADER
16
17 #include <stxxl/bits/config.h>
18
19 #undef STXXL_PARALLEL
20 #undef STXXL_PARALLEL_MODE
21
22 #if defined(_GLIBCXX_PARALLEL) || STXXL_PARALLEL_MODE_EXPLICIT
23 #define STXXL_PARALLEL_MODE
24 #endif
25
26 #if defined(STXXL_PARALLEL_MODE)
27 #define STXXL_PARALLEL 1
28 #else
29 #define STXXL_PARALLEL 0
30 #endif
31
32 #include <cassert>
33
34 #ifdef STXXL_PARALLEL_MODE
35 #include <omp.h>
36 #endif
37
38 #if STXXL_PARALLEL
39 #include <algorithm>
40 #endif
41
42 #include <stxxl/bits/namespace.h>
43 #include <stxxl/bits/common/settings.h>
44 #include <stxxl/bits/verbose.h>
45
46 #if defined(_GLIBCXX_PARALLEL)
47 //use _STXXL_FORCE_SEQUENTIAL to tag calls which are not worthwhile parallelizing
48 #define _STXXL_FORCE_SEQUENTIAL , __gnu_parallel::sequential_tag()
49 #else
50 #define _STXXL_FORCE_SEQUENTIAL
51 #endif
52
53 #if 0
54 // sorting triggers is done sequentially
55 #define _STXXL_SORT_TRIGGER_FORCE_SEQUENTIAL _STXXL_FORCE_SEQUENTIAL
56 #else
57 // sorting triggers may be parallelized
58 #define _STXXL_SORT_TRIGGER_FORCE_SEQUENTIAL
59 #endif
60
61 #if !STXXL_PARALLEL
62 #undef STXXL_PARALLEL_MULTIWAY_MERGE
63 #define STXXL_PARALLEL_MULTIWAY_MERGE 0
64 #endif
65
66 #if defined(STXXL_PARALLEL_MODE) && ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) < 40400)
67 #undef STXXL_PARALLEL_MULTIWAY_MERGE
68 #define STXXL_PARALLEL_MULTIWAY_MERGE 0
69 #endif
70
71 #if !defined(STXXL_PARALLEL_MULTIWAY_MERGE)
72 #define STXXL_PARALLEL_MULTIWAY_MERGE 1
73 #endif
74
75 #if !defined(STXXL_NOT_CONSIDER_SORT_MEMORY_OVERHEAD)
76 #define STXXL_NOT_CONSIDER_SORT_MEMORY_OVERHEAD 0
77 #endif
78
79 #if STXXL_PARALLEL_MODE_EXPLICIT
80 #include <parallel/algorithm>
81 #else
82 #include <algorithm>
83 #endif
84
85 STXXL_BEGIN_NAMESPACE
86
sort_memory_usage_factor()87 inline unsigned sort_memory_usage_factor()
88 {
89 #if STXXL_PARALLEL && !STXXL_NOT_CONSIDER_SORT_MEMORY_OVERHEAD && defined(STXXL_PARALLEL_MODE)
90 return (__gnu_parallel::_Settings::get().sort_algorithm == __gnu_parallel::MWMS && omp_get_max_threads() > 1) ? 2 : 1; //memory overhead for multiway mergesort
91 #else
92 return 1; //no overhead
93 #endif
94 }
95
check_sort_settings()96 inline void check_sort_settings()
97 {
98 #if STXXL_PARALLEL && defined(STXXL_PARALLEL_MODE) && !defined(STXXL_NO_WARN_OMP_NESTED)
99 static bool did_warn = false;
100 if (!did_warn) {
101 if (__gnu_parallel::_Settings::get().sort_algorithm != __gnu_parallel::MWMS) {
102 if (omp_get_max_threads() <= 2) {
103 did_warn = true; // no problem with at most 2 threads, no need to check again
104 } else if (!omp_get_nested()) {
105 STXXL_ERRMSG("Inefficient settings detected. To get full potential from your CPU it is recommended to set OMP_NESTED=TRUE in the environment.");
106 did_warn = true;
107 }
108 }
109 }
110 #else
111 // nothing to check
112 #endif
113 }
114
do_parallel_merge()115 inline bool do_parallel_merge()
116 {
117 #if STXXL_PARALLEL_MULTIWAY_MERGE && defined(STXXL_PARALLEL_MODE)
118 return !stxxl::SETTINGS::native_merge && omp_get_max_threads() >= 1;
119 #else
120 return false;
121 #endif
122 }
123
124 namespace potentially_parallel {
125
126 #if STXXL_PARALLEL_MODE_EXPLICIT
127 using __gnu_parallel::sort;
128 using __gnu_parallel::random_shuffle;
129 #else
130 using std::sort;
131 using std::random_shuffle;
132 #endif
133
134 } // namespace potentially_parallel
135
136 namespace parallel {
137
138 #if STXXL_PARALLEL
139
140 /*! Multi-way merging dispatcher.
141 * @param seqs_begin Begin iterator of iterator pair input sequence.
142 * @param seqs_end End iterator of iterator pair input sequence.
143 * @param target Begin iterator out output sequence.
144 * @param comp Comparator.
145 * @param length Maximum length to merge.
146 * @return End iterator of output sequence.
147 */
148 template <typename RandomAccessIteratorPairIterator,
149 typename RandomAccessIterator3, typename DiffType, typename Comparator>
150 RandomAccessIterator3
multiway_merge(RandomAccessIteratorPairIterator seqs_begin,RandomAccessIteratorPairIterator seqs_end,RandomAccessIterator3 target,Comparator comp,DiffType length)151 multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
152 RandomAccessIteratorPairIterator seqs_end,
153 RandomAccessIterator3 target,
154 Comparator comp,
155 DiffType length)
156 {
157 #if defined(STXXL_PARALLEL_MODE) && ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40400)
158 return __gnu_parallel::multiway_merge(seqs_begin, seqs_end, target, length, comp);
159 #elif defined(STXXL_PARALLEL_MODE)
160 return __gnu_parallel::multiway_merge(seqs_begin, seqs_end, target, comp, length);
161 #else
162 #error "no implementation found for multiway_merge()"
163 #endif
164 }
165
166 /*! Multi-way merging front-end.
167 * @param seqs_begin Begin iterator of iterator pair input sequence.
168 * @param seqs_end End iterator of iterator pair input sequence.
169 * @param target Begin iterator out output sequence.
170 * @param comp Comparator.
171 * @param length Maximum length to merge.
172 * @return End iterator of output sequence.
173 * @pre For each @c i, @c seqs_begin[i].second must be the end marker of the sequence, but also reference the one more sentinel element.
174 */
175 template <typename RandomAccessIteratorPairIterator,
176 typename RandomAccessIterator3, typename DiffType, typename Comparator>
177 RandomAccessIterator3
multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,RandomAccessIteratorPairIterator seqs_end,RandomAccessIterator3 target,Comparator comp,DiffType length)178 multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
179 RandomAccessIteratorPairIterator seqs_end,
180 RandomAccessIterator3 target,
181 Comparator comp,
182 DiffType length)
183 {
184 #if defined(STXXL_PARALLEL_MODE) && ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40400)
185 return __gnu_parallel::multiway_merge_sentinels(seqs_begin, seqs_end, target, length, comp);
186 #elif defined(STXXL_PARALLEL_MODE)
187 return __gnu_parallel::multiway_merge_sentinels(seqs_begin, seqs_end, target, comp, length);
188 #else
189 #error "no implementation found for multiway_merge_sentinel()"
190 #endif
191 }
192
193 #endif
194
195 } // namespace parallel
196
197 STXXL_END_NAMESPACE
198
199 #endif // !STXXL_PARALLEL_HEADER
200 // vim: et:ts=4:sw=4
201