1 /*
2     Copyright 2005-2013 Intel Corporation.  All Rights Reserved.
3 
4     This file is part of Threading Building Blocks.
5 
6     Threading Building Blocks is free software; you can redistribute it
7     and/or modify it under the terms of the GNU General Public License
8     version 2 as published by the Free Software Foundation.
9 
10     Threading Building Blocks is distributed in the hope that it will be
11     useful, but WITHOUT ANY WARRANTY; without even the implied warranty
12     of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with Threading Building Blocks; if not, write to the Free Software
17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18 
19     As a special exception, you may use this file as part of a free software
20     library without restriction.  Specifically, if other files instantiate
21     templates or use macros or inline functions from this file, or you compile
22     this file and link it with other files to produce an executable, this
23     file does not by itself cause the resulting executable to be covered by
24     the GNU General Public License.  This exception does not however
25     invalidate any other reasons why the executable file might be covered by
26     the GNU General Public License.
27 */
28 
29 #ifndef __TBB_parallel_sort_H
30 #define __TBB_parallel_sort_H
31 
32 #include <tbb/blocked_range.h>
33 #include <tbb/parallel_for.h>
34 
35 #include <algorithm>
36 #include <functional>
37 #include <iterator>
38 
39 namespace tbb
40 {
41 
42 //! @cond INTERNAL
43 namespace internal
44 {
45 
46 //! Range used in quicksort to split elements into subranges based on a value.
47 /** The split operation selects a splitter and places all elements less than or equal
48     to the value in the first range and the remaining elements in the second range.
49     @ingroup algorithms */
50 template <typename RandomAccessIterator, typename Compare>
51 class quick_sort_range : private no_assign
52 {
53 
median_of_three(const RandomAccessIterator & array,size_t l,size_t m,size_t r)54   inline size_t median_of_three(const RandomAccessIterator& array,
55                                 size_t l,
56                                 size_t m,
57                                 size_t r) const
58   {
59     return comp(array[l], array[m])
60       ? (comp(array[m], array[r]) ? m : (comp(array[l], array[r]) ? r : l))
61       : (comp(array[r], array[m]) ? m : (comp(array[r], array[l]) ? r : l));
62   }
63 
pseudo_median_of_nine(const RandomAccessIterator & array,const quick_sort_range & range)64   inline size_t pseudo_median_of_nine(const RandomAccessIterator& array,
65                                       const quick_sort_range& range) const
66   {
67     size_t offset = range.size / 8u;
68     return median_of_three(array,
69                            median_of_three(array, 0, offset, offset * 2),
70                            median_of_three(array, offset * 3, offset * 4, offset * 5),
71                            median_of_three(array, offset * 6, offset * 7, range.size - 1));
72   }
73 
74 public:
75   static const size_t grainsize = 500;
76   const Compare& comp;
77   RandomAccessIterator begin;
78   size_t size;
79 
quick_sort_range(RandomAccessIterator begin_,size_t size_,const Compare & comp_)80   quick_sort_range(RandomAccessIterator begin_, size_t size_, const Compare& comp_)
81     : comp(comp_)
82     , begin(begin_)
83     , size(size_)
84   {
85   }
86 
empty()87   bool empty() const { return size == 0; }
is_divisible()88   bool is_divisible() const { return size >= grainsize; }
89 
quick_sort_range(quick_sort_range & range,split)90   quick_sort_range(quick_sort_range& range, split)
91     : comp(range.comp)
92   {
93     using std::swap;
94     RandomAccessIterator array = range.begin;
95     RandomAccessIterator key0 = range.begin;
96     size_t m = pseudo_median_of_nine(array, range);
97     if (m)
98       swap(array[0], array[m]);
99 
100     size_t i = 0;
101     size_t j = range.size;
102     // Partition interval [i+1,j-1] with key *key0.
103     for (;;)
104     {
105       __TBB_ASSERT(i < j, nullptr);
106       // Loop must terminate since array[l]==*key0.
107       do
108       {
109         --j;
110         __TBB_ASSERT(i <= j, "bad ordering relation?");
111       } while (comp(*key0, array[j]));
112       do
113       {
114         __TBB_ASSERT(i <= j, nullptr);
115         if (i == j)
116           goto partition;
117         ++i;
118       } while (comp(array[i], *key0));
119       if (i == j)
120         goto partition;
121       swap(array[i], array[j]);
122     }
123   partition:
124     // Put the partition key were it belongs
125     swap(array[j], *key0);
126     // array[l..j) is less or equal to key.
127     // array(j..r) is greater or equal to key.
128     // array[j] is equal to key
129     i = j + 1;
130     begin = array + i;
131     size = range.size - i;
132     range.size = j;
133   }
134 };
135 
136 #if __TBB_TASK_GROUP_CONTEXT
137 //! Body class used to test if elements in a range are presorted
138 /** @ingroup algorithms */
139 template <typename RandomAccessIterator, typename Compare>
140 class quick_sort_pretest_body : internal::no_assign
141 {
142   const Compare& comp;
143 
144 public:
quick_sort_pretest_body(const Compare & _comp)145   quick_sort_pretest_body(const Compare& _comp)
146     : comp(_comp)
147   {
148   }
149 
operator()150   void operator()(const blocked_range<RandomAccessIterator>& range) const
151   {
152     task& my_task = task::self();
153     RandomAccessIterator my_end = range.end();
154 
155     int i = 0;
156     for (RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i)
157     {
158       if (i % 64 == 0 && my_task.is_cancelled())
159         break;
160 
161       // The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
162       if (comp(*(k), *(k - 1)))
163       {
164         my_task.cancel_group_execution();
165         break;
166       }
167     }
168   }
169 };
170 #endif /* __TBB_TASK_GROUP_CONTEXT */
171 
172 //! Body class used to sort elements in a range that is smaller than the grainsize.
173 /** @ingroup algorithms */
174 template <typename RandomAccessIterator, typename Compare>
175 struct quick_sort_body
176 {
operatorquick_sort_body177   void operator()(const quick_sort_range<RandomAccessIterator, Compare>& range) const
178   {
179     //SerialQuickSort( range.begin, range.size, range.comp );
180     std::sort(range.begin, range.begin + range.size, range.comp);
181   }
182 };
183 
184 //! Wrapper method to initiate the sort by calling parallel_for.
185 /** @ingroup algorithms */
186 template <typename RandomAccessIterator, typename Compare>
parallel_quick_sort(RandomAccessIterator begin,RandomAccessIterator end,const Compare & comp)187 void parallel_quick_sort(RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp)
188 {
189 #if __TBB_TASK_GROUP_CONTEXT
190   task_group_context my_context;
191   const int serial_cutoff = 9;
192 
193   __TBB_ASSERT(begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?");
194   RandomAccessIterator k;
195   for (k = begin; k != begin + serial_cutoff; ++k)
196   {
197     if (comp(*(k + 1), *k))
198     {
199       goto do_parallel_quick_sort;
200     }
201   }
202 
203   parallel_for(blocked_range<RandomAccessIterator>(k + 1, end),
204                quick_sort_pretest_body<RandomAccessIterator, Compare>(comp),
205                auto_partitioner(),
206                my_context);
207 
208   if (my_context.is_group_execution_cancelled())
209   do_parallel_quick_sort:
210 #endif /* __TBB_TASK_GROUP_CONTEXT */
211   parallel_for(quick_sort_range<RandomAccessIterator, Compare>(begin, end - begin, comp),
212                quick_sort_body<RandomAccessIterator, Compare>(),
213                auto_partitioner());
214 }
215 
216 } // namespace internal
217 //! @endcond
218 
219 //! @cond INTERNAL
220 /** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
221     Requirements on value type \c T of \c RandomAccessIterator for \c parallel_sort:
222     - \code void swap( T& x, T& y ) \endcode        Swaps \c x and \c y
223     - \code bool Compare::operator()( const T& x, const T& y ) \endcode
224                                                     True if x comes before y;
225 **/
226 
227 /** \name parallel_sort
228     See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
229 //@{
230 
231 //! Sorts the data in [begin,end) using the given comparator
232 /** The compare function object is used for all comparisons between elements during sorting.
233     The compare object must define a bool operator() function.
234     @ingroup algorithms **/
235 //! @endcond
236 template <typename RandomAccessIterator, typename Compare>
parallel_sort(RandomAccessIterator begin,RandomAccessIterator end,const Compare & comp)237 void parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp)
238 {
239   const int min_parallel_size = 500;
240   if (end > begin)
241   {
242     if (end - begin < min_parallel_size)
243     {
244       std::sort(begin, end, comp);
245     }
246     else
247     {
248       internal::parallel_quick_sort(begin, end, comp);
249     }
250   }
251 }
252 
253 //! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
254 /** @ingroup algorithms **/
255 template <typename RandomAccessIterator>
parallel_sort(RandomAccessIterator begin,RandomAccessIterator end)256 inline void parallel_sort(RandomAccessIterator begin, RandomAccessIterator end)
257 {
258   parallel_sort(
259     begin, end, std::less<typename std::iterator_traits<RandomAccessIterator>::value_type>());
260 }
261 
262 //! Sorts the data in the range \c [begin,end) with a default comparator \c std::less<T>
263 /** @ingroup algorithms **/
264 template <typename T>
parallel_sort(T * begin,T * end)265 inline void parallel_sort(T* begin, T* end)
266 {
267   parallel_sort(begin, end, std::less<T>());
268 }
269 //@}
270 
271 } // namespace tbb
272 
273 #endif
274