1 /*
2 Copyright (C) 2014 Intel Corporation
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 * Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in
13 the documentation and/or other materials provided with the
14 distribution.
15 * Neither the name of Intel Corporation nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26 OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29 WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31 */
32 #include <iterator>
33 #include <algorithm>
34 #include <tbb/task.h>
35
36 #include "pss_common.h"
37
38 namespace pss {
39
40 namespace internal {
41
42 template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename Compare>
43 class merge_task: public tbb::task {
44 tbb::task* execute();
45 RandomAccessIterator1 xs, xe;
46 RandomAccessIterator2 ys, ye;
47 RandomAccessIterator3 zs;
48 Compare comp;
49 bool destroy;
50 public:
merge_task(RandomAccessIterator1 xs_,RandomAccessIterator1 xe_,RandomAccessIterator2 ys_,RandomAccessIterator2 ye_,RandomAccessIterator3 zs_,bool destroy_,Compare comp_)51 merge_task( RandomAccessIterator1 xs_, RandomAccessIterator1 xe_, RandomAccessIterator2 ys_, RandomAccessIterator2 ye_, RandomAccessIterator3 zs_, bool destroy_, Compare comp_ ) :
52 xs(xs_), xe(xe_), ys(ys_), ye(ye_), zs(zs_), comp(comp_), destroy(destroy_)
53 {}
54 };
55
56 template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename Compare>
execute()57 tbb::task* merge_task<RandomAccessIterator1,RandomAccessIterator2,RandomAccessIterator3,Compare>::execute() {
58 const size_t MERGE_CUT_OFF = 2000;
59 auto n = (xe-xs) + (ye-ys);
60 if( (size_t) n <= MERGE_CUT_OFF ) {
61 serial_move_merge( xs, xe, ys, ye, zs, comp );
62 if( destroy ) {
63 serial_destroy(xs,xe);
64 serial_destroy(ys,ye);
65 }
66 return NULL;
67 } else {
68 RandomAccessIterator1 xm;
69 RandomAccessIterator2 ym;
70 if( xe-xs < ye-ys ) {
71 ym = ys+(ye-ys)/2;
72 xm = std::upper_bound(xs,xe,*ym,comp);
73 } else {
74 xm = xs+(xe-xs)/2;
75 ym = std::lower_bound(ys,ye,*xm,comp);
76 }
77 RandomAccessIterator3 zm = zs + ((xm-xs) + (ym-ys));
78 tbb::task* right = new( allocate_additional_child_of(*parent()) ) merge_task( xm, xe, ym, ye, zm, destroy, comp );
79 spawn(*right);
80 recycle_as_continuation();
81 xe = xm;
82 ye = ym;
83 return this;
84 }
85 }
86
87 template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Compare>
88 class stable_sort_task: public tbb::task {
89 tbb::task* execute();
90 RandomAccessIterator1 xs, xe;
91 RandomAccessIterator2 zs;
92 Compare comp;
93 signed char inplace;
94 public:
stable_sort_task(RandomAccessIterator1 xs_,RandomAccessIterator1 xe_,RandomAccessIterator2 zs_,int inplace_,Compare comp_)95 stable_sort_task(RandomAccessIterator1 xs_, RandomAccessIterator1 xe_, RandomAccessIterator2 zs_, int inplace_, Compare comp_ ) :
96 xs(xs_), xe(xe_), zs(zs_), comp(comp_), inplace(inplace_)
97 {}
98 };
99
100 template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Compare>
execute()101 tbb::task* stable_sort_task<RandomAccessIterator1, RandomAccessIterator2, Compare>::execute() {
102 const size_t SORT_CUT_OFF = 500;
103 if ((size_t) (xe - xs) <= SORT_CUT_OFF) {
104 stable_sort_base_case(xs, xe, zs, inplace, comp);
105 return NULL;
106 } else {
107 RandomAccessIterator1 xm = xs + (xe - xs) / 2;
108 RandomAccessIterator2 zm = zs + (xm - xs);
109 RandomAccessIterator2 ze = zs + (xe - xs);
110 task* m;
111 if (inplace)
112 m = new (allocate_continuation()) merge_task<RandomAccessIterator2,RandomAccessIterator2,RandomAccessIterator1,Compare>(zs, zm, zm, ze, xs, inplace==2, comp);
113 else
114 m = new (allocate_continuation()) merge_task<RandomAccessIterator1,RandomAccessIterator1,RandomAccessIterator2,Compare>(xs, xm, xm, xe, zs, false, comp);
115 m->set_ref_count(2);
116 task* right = new(m->allocate_child()) stable_sort_task(xm,xe,zm,!inplace, comp);
117 spawn(*right);
118 recycle_as_child_of(*m);
119 xe=xm;
120 inplace=!inplace;
121 return this;
122 }
123 }
124
125 } // namespace internal
126
127 template<typename RandomAccessIterator, typename Compare>
parallel_stable_sort(RandomAccessIterator xs,RandomAccessIterator xe,Compare comp)128 void parallel_stable_sort( RandomAccessIterator xs, RandomAccessIterator xe, Compare comp ) {
129 typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
130 if( internal::raw_buffer z = internal::raw_buffer( sizeof(T)*(xe-xs) ) ) {
131 using tbb::task;
132 typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
133 internal::raw_buffer buf( sizeof(T)*(xe-xs) );
134 task::spawn_root_and_wait(*new( task::allocate_root() ) internal::stable_sort_task<RandomAccessIterator,T*,Compare>( xs, xe, (T*)buf.get(), 2, comp ));
135 } else
136 // Not enough memory available - fall back on serial sort
137 std::stable_sort( xs, xe, comp );
138 }
139
140 } // namespace pss
141