1 // -*- C++ -*-
2 
3 // Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10 
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 // General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file parallel/multiway_mergesort.h
26  *  @brief Parallel multiway merge sort.
27  *  This file is a GNU parallel extension to the Standard C++ Library.
28  */
29 
30 // Written by Johannes Singler.
31 
32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
34 
35 #include <vector>
36 
37 #include <parallel/basic_iterator.h>
38 #include <bits/stl_algo.h>
39 #include <parallel/parallel.h>
40 #include <parallel/multiway_merge.h>
41 
42 namespace __gnu_parallel
43 {
44   /** @brief Subsequence description. */
45   template<typename _DifferenceTp>
46     struct _Piece
47     {
48       typedef _DifferenceTp _DifferenceType;
49 
50       /** @brief Begin of subsequence. */
51       _DifferenceType _M_begin;
52 
53       /** @brief End of subsequence. */
54       _DifferenceType _M_end;
55     };
56 
57   /** @brief Data accessed by all threads.
58    *
59    *  PMWMS = parallel multiway mergesort */
60   template<typename _RAIter>
61     struct _PMWMSSortingData
62     {
63       typedef std::iterator_traits<_RAIter> _TraitsType;
64       typedef typename _TraitsType::value_type _ValueType;
65       typedef typename _TraitsType::difference_type _DifferenceType;
66 
67       /** @brief Number of threads involved. */
68       _ThreadIndex _M_num_threads;
69 
70       /** @brief Input __begin. */
71       _RAIter _M_source;
72 
73       /** @brief Start indices, per thread. */
74       _DifferenceType* _M_starts;
75 
76       /** @brief Storage in which to sort. */
77       _ValueType** _M_temporary;
78 
79       /** @brief Samples. */
80       _ValueType* _M_samples;
81 
82       /** @brief Offsets to add to the found positions. */
83       _DifferenceType* _M_offsets;
84 
85       /** @brief Pieces of data to merge @c [thread][__sequence] */
86       std::vector<_Piece<_DifferenceType> >* _M_pieces;
87   };
88 
89   /**
90    *  @brief Select _M_samples from a sequence.
91    *  @param __sd Pointer to algorithm data. _Result will be placed in
92    *  @c __sd->_M_samples.
93    *  @param __num_samples Number of _M_samples to select.
94    */
95   template<typename _RAIter, typename _DifferenceTp>
96     void
97     __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
98 			_DifferenceTp __num_samples)
99     {
100       typedef std::iterator_traits<_RAIter> _TraitsType;
101       typedef typename _TraitsType::value_type _ValueType;
102       typedef _DifferenceTp _DifferenceType;
103 
104       _ThreadIndex __iam = omp_get_thread_num();
105 
106       _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
107 
108       __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
109 		      __num_samples + 1, __es);
110 
111       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
112 	::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
113 	    _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
114 				       + __es[__i + 1]]);
115 
116       delete[] __es;
117     }
118 
119   /** @brief Split consistently. */
120   template<bool __exact, typename _RAIter,
121 	   typename _Compare, typename _SortingPlacesIterator>
122     struct _SplitConsistently
123     { };
124 
125   /** @brief Split by exact splitting. */
126   template<typename _RAIter, typename _Compare,
127 	   typename _SortingPlacesIterator>
128     struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
129     {
130       void
131       operator()(const _ThreadIndex __iam,
132 		 _PMWMSSortingData<_RAIter>* __sd,
133 		 _Compare& __comp,
134 		 const typename
135 		 std::iterator_traits<_RAIter>::difference_type
136 		 __num_samples) const
137       {
138 #       pragma omp barrier
139 
140 	std::vector<std::pair<_SortingPlacesIterator,
141 	                      _SortingPlacesIterator> >
142 	  __seqs(__sd->_M_num_threads);
143 	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
144 	  __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
145 				       __sd->_M_temporary[__s]
146 				       + (__sd->_M_starts[__s + 1]
147 					  - __sd->_M_starts[__s]));
148 
149 	std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
150 
151 	// if not last thread
152 	if (__iam < __sd->_M_num_threads - 1)
153 	  multiseq_partition(__seqs.begin(), __seqs.end(),
154 			     __sd->_M_starts[__iam + 1], __offsets.begin(),
155 			     __comp);
156 
157 	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
158 	  {
159 	    // for each sequence
160 	    if (__iam < (__sd->_M_num_threads - 1))
161 	      __sd->_M_pieces[__iam][__seq]._M_end
162 		= __offsets[__seq] - __seqs[__seq].first;
163 	    else
164 	      // very end of this sequence
165 	      __sd->_M_pieces[__iam][__seq]._M_end =
166 		__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
167 	  }
168 
169 #       pragma omp barrier
170 
171 	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
172 	  {
173 	    // For each sequence.
174 	    if (__iam > 0)
175 	      __sd->_M_pieces[__iam][__seq]._M_begin =
176 		__sd->_M_pieces[__iam - 1][__seq]._M_end;
177 	    else
178 	      // Absolute beginning.
179 	      __sd->_M_pieces[__iam][__seq]._M_begin = 0;
180 	  }
181       }
182   };
183 
184   /** @brief Split by sampling. */
185   template<typename _RAIter, typename _Compare,
186 	   typename _SortingPlacesIterator>
187     struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
188     {
189       void
190       operator()(const _ThreadIndex __iam,
191 		 _PMWMSSortingData<_RAIter>* __sd,
192 		 _Compare& __comp,
193 		 const typename
194 		 std::iterator_traits<_RAIter>::difference_type
195 		 __num_samples) const
196       {
197 	typedef std::iterator_traits<_RAIter> _TraitsType;
198 	typedef typename _TraitsType::value_type _ValueType;
199 	typedef typename _TraitsType::difference_type _DifferenceType;
200 
201 	__determine_samples(__sd, __num_samples);
202 
203 #       pragma omp barrier
204 
205 #       pragma omp single
206 	__gnu_sequential::sort(__sd->_M_samples,
207 			       __sd->_M_samples
208 			       + (__num_samples * __sd->_M_num_threads),
209 			       __comp);
210 
211 #       pragma omp barrier
212 
213 	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
214 	  {
215 	    // For each sequence.
216 	    if (__num_samples * __iam > 0)
217 	      __sd->_M_pieces[__iam][__s]._M_begin =
218                 std::lower_bound(__sd->_M_temporary[__s],
219 				 __sd->_M_temporary[__s]
220 				 + (__sd->_M_starts[__s + 1]
221 				    - __sd->_M_starts[__s]),
222 				 __sd->_M_samples[__num_samples * __iam],
223 				 __comp)
224                 - __sd->_M_temporary[__s];
225 	    else
226 	      // Absolute beginning.
227 	      __sd->_M_pieces[__iam][__s]._M_begin = 0;
228 
229 	    if ((__num_samples * (__iam + 1)) <
230 		(__num_samples * __sd->_M_num_threads))
231 	      __sd->_M_pieces[__iam][__s]._M_end =
232                 std::lower_bound(__sd->_M_temporary[__s],
233 				 __sd->_M_temporary[__s]
234 				 + (__sd->_M_starts[__s + 1]
235 				    - __sd->_M_starts[__s]),
236 				 __sd->_M_samples[__num_samples * (__iam + 1)],
237 				 __comp)
238                 - __sd->_M_temporary[__s];
239 	    else
240 	      // Absolute end.
241 	      __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
242 						    - __sd->_M_starts[__s]);
243 	  }
244       }
245   };
246 
247   template<bool __stable, typename _RAIter, typename _Compare>
248     struct __possibly_stable_sort
249     { };
250 
251   template<typename _RAIter, typename _Compare>
252     struct __possibly_stable_sort<true, _RAIter, _Compare>
253     {
254       void operator()(const _RAIter& __begin,
255 		      const _RAIter& __end, _Compare& __comp) const
256       { __gnu_sequential::stable_sort(__begin, __end, __comp); }
257     };
258 
259   template<typename _RAIter, typename _Compare>
260     struct __possibly_stable_sort<false, _RAIter, _Compare>
261     {
262       void operator()(const _RAIter __begin,
263 		      const _RAIter __end, _Compare& __comp) const
264       { __gnu_sequential::sort(__begin, __end, __comp); }
265     };
266 
267   template<bool __stable, typename Seq_RAIter,
268 	   typename _RAIter, typename _Compare,
269 	   typename DiffType>
270     struct __possibly_stable_multiway_merge
271     { };
272 
273   template<typename Seq_RAIter, typename _RAIter,
274 	   typename _Compare, typename _DiffType>
275     struct __possibly_stable_multiway_merge<true, Seq_RAIter,
276 					    _RAIter, _Compare, _DiffType>
277     {
278       void operator()(const Seq_RAIter& __seqs_begin,
279 		      const Seq_RAIter& __seqs_end,
280 		      const _RAIter& __target,
281 		      _Compare& __comp,
282 		      _DiffType __length_am) const
283       { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
284 			      __length_am, __comp, sequential_tag()); }
285     };
286 
287   template<typename Seq_RAIter, typename _RAIter,
288 	   typename _Compare, typename _DiffType>
289     struct __possibly_stable_multiway_merge<false, Seq_RAIter,
290 					    _RAIter, _Compare, _DiffType>
291     {
292       void operator()(const Seq_RAIter& __seqs_begin,
293                       const Seq_RAIter& __seqs_end,
294                       const _RAIter& __target,
295                       _Compare& __comp,
296                       _DiffType __length_am) const
297       { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
298 		       __comp, sequential_tag()); }
299     };
300 
301   /** @brief PMWMS code executed by each thread.
302    *  @param __sd Pointer to algorithm data.
303    *  @param __comp Comparator.
304    */
305   template<bool __stable, bool __exact, typename _RAIter,
306 	   typename _Compare>
307     void
308     parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
309 			  _Compare& __comp)
310     {
311       typedef std::iterator_traits<_RAIter> _TraitsType;
312       typedef typename _TraitsType::value_type _ValueType;
313       typedef typename _TraitsType::difference_type _DifferenceType;
314 
315       _ThreadIndex __iam = omp_get_thread_num();
316 
317       // Length of this thread's chunk, before merging.
318       _DifferenceType __length_local =
319 	__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
320 
321       // Sort in temporary storage, leave space for sentinel.
322 
323       typedef _ValueType* _SortingPlacesIterator;
324 
325       __sd->_M_temporary[__iam] =
326         static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
327 						* (__length_local + 1)));
328 
329       // Copy there.
330       std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
331 			      __sd->_M_source + __sd->_M_starts[__iam]
332 			      + __length_local,
333 			      __sd->_M_temporary[__iam]);
334 
335       __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
336         (__sd->_M_temporary[__iam],
337 	 __sd->_M_temporary[__iam] + __length_local,
338          __comp);
339 
340       // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
341       // __sd->_M_temporary[__iam] + __length_local.
342 
343       // No barrier here: Synchronization is done by the splitting routine.
344 
345       _DifferenceType __num_samples =
346         _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
347       _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
348         (__iam, __sd, __comp, __num_samples);
349 
350       // Offset from __target __begin, __length after merging.
351       _DifferenceType __offset = 0, __length_am = 0;
352       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
353 	{
354 	  __length_am += (__sd->_M_pieces[__iam][__s]._M_end
355 			  - __sd->_M_pieces[__iam][__s]._M_begin);
356 	  __offset += __sd->_M_pieces[__iam][__s]._M_begin;
357 	}
358 
359       typedef std::vector<
360         std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
361         _SeqVector;
362       _SeqVector __seqs(__sd->_M_num_threads);
363 
364       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
365 	{
366 	  __seqs[__s] =
367 	    std::make_pair(__sd->_M_temporary[__s]
368 			   + __sd->_M_pieces[__iam][__s]._M_begin,
369 			   __sd->_M_temporary[__s]
370 			   + __sd->_M_pieces[__iam][__s]._M_end);
371 	}
372 
373       __possibly_stable_multiway_merge<
374         __stable, typename _SeqVector::iterator,
375 	_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
376 				     __sd->_M_source + __offset, __comp,
377 				     __length_am);
378 
379 #     pragma omp barrier
380 
381       for (_DifferenceType __i = 0; __i < __length_local; ++__i)
382 	__sd->_M_temporary[__iam][__i].~_ValueType();
383       ::operator delete(__sd->_M_temporary[__iam]);
384     }
385 
386   /** @brief PMWMS main call.
387    *  @param __begin Begin iterator of sequence.
388    *  @param __end End iterator of sequence.
389    *  @param __comp Comparator.
390    *  @param __num_threads Number of threads to use.
391    */
392   template<bool __stable, bool __exact, typename _RAIter,
393            typename _Compare>
394     void
395     parallel_sort_mwms(_RAIter __begin, _RAIter __end,
396 		       _Compare __comp,
397 		       _ThreadIndex __num_threads)
398     {
399       _GLIBCXX_CALL(__end - __begin)
400 
401       typedef std::iterator_traits<_RAIter> _TraitsType;
402       typedef typename _TraitsType::value_type _ValueType;
403       typedef typename _TraitsType::difference_type _DifferenceType;
404 
405       _DifferenceType __n = __end - __begin;
406 
407       if (__n <= 1)
408 	return;
409 
410       // at least one element per thread
411       if (__num_threads > __n)
412 	__num_threads = static_cast<_ThreadIndex>(__n);
413 
414       // shared variables
415       _PMWMSSortingData<_RAIter> __sd;
416       _DifferenceType* __starts;
417       _DifferenceType __size;
418 
419 #     pragma omp parallel num_threads(__num_threads)
420       {
421         __num_threads = omp_get_num_threads(); //no more threads than requested
422 
423 #       pragma omp single
424 	{
425 	  __sd._M_num_threads = __num_threads;
426 	  __sd._M_source = __begin;
427 
428 	  __sd._M_temporary = new _ValueType*[__num_threads];
429 
430 	  if (!__exact)
431 	    {
432 	      __size =
433 		(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
434 		* __num_threads;
435 	      __sd._M_samples = static_cast<_ValueType*>
436 		(::operator new(__size * sizeof(_ValueType)));
437 	    }
438 	  else
439 	    __sd._M_samples = 0;
440 
441 	  __sd._M_offsets = new _DifferenceType[__num_threads - 1];
442 	  __sd._M_pieces
443 	    = new std::vector<_Piece<_DifferenceType> >[__num_threads];
444 	  for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
445 	    __sd._M_pieces[__s].resize(__num_threads);
446 	  __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
447 
448 	  _DifferenceType __chunk_length = __n / __num_threads;
449 	  _DifferenceType __split = __n % __num_threads;
450 	  _DifferenceType __pos = 0;
451 	  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
452 	    {
453 	      __starts[__i] = __pos;
454 	      __pos += ((__i < __split)
455 			? (__chunk_length + 1) : __chunk_length);
456 	    }
457 	  __starts[__num_threads] = __pos;
458 	} //single
459 
460         // Now sort in parallel.
461         parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
462       } //parallel
463 
464       delete[] __starts;
465       delete[] __sd._M_temporary;
466 
467       if (!__exact)
468 	{
469 	  for (_DifferenceType __i = 0; __i < __size; ++__i)
470 	    __sd._M_samples[__i].~_ValueType();
471 	  ::operator delete(__sd._M_samples);
472 	}
473 
474       delete[] __sd._M_offsets;
475       delete[] __sd._M_pieces;
476     }
477 
478 } //namespace __gnu_parallel
479 
480 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */
481