1 /*
2   Copyright (C) 2014 Intel Corporation
3   All rights reserved.
4 
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions
7   are met:
8 
9   * Redistributions of source code must retain the above copyright
10     notice, this list of conditions and the following disclaimer.
11   * Redistributions in binary form must reproduce the above copyright
12     notice, this list of conditions and the following disclaimer in
13     the documentation and/or other materials provided with the
14     distribution.
15   * Neither the name of Intel Corporation nor the names of its
16     contributors may be used to endorse or promote products derived
17     from this software without specific prior written permission.
18 
19   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
26   OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27   AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28   LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
29   WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 */
32 #include <iterator>
33 #include <algorithm>
34 #include <tbb/task.h>
35 
36 #include "pss_common.h"
37 
38 namespace pss {
39 
40 namespace internal {
41 
42 template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename Compare>
43 class merge_task: public tbb::task {
44     tbb::task* execute();
45     RandomAccessIterator1 xs, xe;
46     RandomAccessIterator2 ys, ye;
47     RandomAccessIterator3 zs;
48     Compare comp;
49     bool destroy;
50 public:
merge_task(RandomAccessIterator1 xs_,RandomAccessIterator1 xe_,RandomAccessIterator2 ys_,RandomAccessIterator2 ye_,RandomAccessIterator3 zs_,bool destroy_,Compare comp_)51     merge_task( RandomAccessIterator1 xs_, RandomAccessIterator1 xe_, RandomAccessIterator2 ys_, RandomAccessIterator2 ye_, RandomAccessIterator3 zs_, bool destroy_, Compare comp_ ) :
52         xs(xs_), xe(xe_), ys(ys_), ye(ye_), zs(zs_), comp(comp_), destroy(destroy_)
53     {}
54 };
55 
56 template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename RandomAccessIterator3, typename Compare>
execute()57 tbb::task* merge_task<RandomAccessIterator1,RandomAccessIterator2,RandomAccessIterator3,Compare>::execute() {
58     const size_t MERGE_CUT_OFF = 2000;
59     auto n = (xe-xs) + (ye-ys);
60     if( (size_t) n <= MERGE_CUT_OFF ) {
61         serial_move_merge( xs, xe, ys, ye, zs, comp );
62         if( destroy ) {
63             serial_destroy(xs,xe);
64             serial_destroy(ys,ye);
65         }
66         return NULL;
67     } else {
68         RandomAccessIterator1 xm;
69         RandomAccessIterator2 ym;
70         if( xe-xs < ye-ys  ) {
71             ym = ys+(ye-ys)/2;
72             xm = std::upper_bound(xs,xe,*ym,comp);
73         } else {
74             xm = xs+(xe-xs)/2;
75             ym = std::lower_bound(ys,ye,*xm,comp);
76         }
77         RandomAccessIterator3 zm = zs + ((xm-xs) + (ym-ys));
78         tbb::task* right = new( allocate_additional_child_of(*parent()) ) merge_task( xm, xe, ym, ye, zm, destroy, comp );
79         spawn(*right);
80         recycle_as_continuation();
81         xe = xm;
82         ye = ym;
83         return this;
84     }
85 }
86 
87 template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Compare>
88 class stable_sort_task: public tbb::task {
89     tbb::task* execute();
90     RandomAccessIterator1 xs, xe;
91     RandomAccessIterator2 zs;
92     Compare comp;
93     signed char inplace;
94 public:
stable_sort_task(RandomAccessIterator1 xs_,RandomAccessIterator1 xe_,RandomAccessIterator2 zs_,int inplace_,Compare comp_)95     stable_sort_task(RandomAccessIterator1 xs_, RandomAccessIterator1 xe_, RandomAccessIterator2 zs_, int inplace_, Compare comp_ ) :
96         xs(xs_), xe(xe_), zs(zs_), comp(comp_), inplace(inplace_)
97     {}
98 };
99 
100 template<typename RandomAccessIterator1, typename RandomAccessIterator2, typename Compare>
execute()101 tbb::task* stable_sort_task<RandomAccessIterator1, RandomAccessIterator2, Compare>::execute() {
102     const size_t SORT_CUT_OFF = 500;
103     if ((size_t) (xe - xs) <= SORT_CUT_OFF) {
104         stable_sort_base_case(xs, xe, zs, inplace, comp);
105         return NULL;
106     } else {
107         RandomAccessIterator1 xm = xs + (xe - xs) / 2;
108         RandomAccessIterator2 zm = zs + (xm - xs);
109         RandomAccessIterator2 ze = zs + (xe - xs);
110         task* m;
111         if (inplace)
112             m = new (allocate_continuation()) merge_task<RandomAccessIterator2,RandomAccessIterator2,RandomAccessIterator1,Compare>(zs, zm, zm, ze, xs, inplace==2, comp);
113         else
114             m = new (allocate_continuation()) merge_task<RandomAccessIterator1,RandomAccessIterator1,RandomAccessIterator2,Compare>(xs, xm, xm, xe, zs, false, comp);
115         m->set_ref_count(2);
116         task* right = new(m->allocate_child()) stable_sort_task(xm,xe,zm,!inplace, comp);
117         spawn(*right);
118         recycle_as_child_of(*m);
119         xe=xm;
120         inplace=!inplace;
121         return this;
122     }
123 }
124 
125 } // namespace internal
126 
127 template<typename RandomAccessIterator, typename Compare>
parallel_stable_sort(RandomAccessIterator xs,RandomAccessIterator xe,Compare comp)128 void parallel_stable_sort( RandomAccessIterator xs, RandomAccessIterator xe, Compare comp ) {
129     typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
130     if( internal::raw_buffer z = internal::raw_buffer( sizeof(T)*(xe-xs) ) ) {
131         using tbb::task;
132         typedef typename std::iterator_traits<RandomAccessIterator>::value_type T;
133         internal::raw_buffer buf( sizeof(T)*(xe-xs) );
134         task::spawn_root_and_wait(*new( task::allocate_root() ) internal::stable_sort_task<RandomAccessIterator,T*,Compare>( xs, xe, (T*)buf.get(), 2, comp ));
135     } else
136         // Not enough memory available - fall back on serial sort
137         std::stable_sort( xs, xe, comp );
138 }
139 
140 } // namespace pss
141