1 /******************************************************************************
2 * ips4o/cleanup_margins.hpp
3 *
4 * In-place Parallel Super Scalar Samplesort (IPS⁴o)
5 *
6 ******************************************************************************
7 * BSD 2-Clause License
8 *
9 * Copyright © 2017, Michael Axtmann <michael.axtmann@kit.edu>
10 * Copyright © 2017, Daniel Ferizovic <daniel.ferizovic@student.kit.edu>
11 * Copyright © 2017, Sascha Witt <sascha.witt@kit.edu>
12 * All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are met:
16 *
17 * * Redistributions of source code must retain the above copyright notice, this
18 * list of conditions and the following disclaimer.
19 *
20 * * Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimer in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *****************************************************************************/
35
36 #pragma once
37
38 #include <limits>
39 #include <utility>
40
41 #include "ips4o_fwd.hpp"
42 #include "base_case.hpp"
43 #include "memory.hpp"
44 #include "utils.hpp"
45
46 namespace ips4o {
47 namespace detail {
48
49 /**
50 * Saves margins at thread boundaries.
51 */
52 template <class Cfg>
saveMargins(int last_bucket)53 std::pair<int, typename Cfg::difference_type> Sorter<Cfg>::saveMargins(int last_bucket) {
54 // Find last bucket boundary in this thread's area
55 diff_t tail = bucket_start_[last_bucket];
56 const diff_t end = Cfg::alignToNextBlock(tail);
57
58 // Don't need to do anything if there is no overlap, or we are in the overflow case
59 if (tail == end || end > (end_ - begin_))
60 return {-1, 0};
61
62 // Find bucket this last block belongs to
63 {
64 const auto start_of_last_block = end - Cfg::kBlockSize;
65 diff_t last_start;
66 do {
67 --last_bucket;
68 last_start = bucket_start_[last_bucket];
69 } while (last_start > start_of_last_block);
70 }
71
72 // Check if the last block has been written
73 const auto write = shared_->bucket_pointers[last_bucket].getWrite();
74 if (write < end)
75 return {-1, 0};
76
77 // Read excess elements, if necessary
78 tail = bucket_start_[last_bucket + 1];
79 local_.swap[0].readFrom(begin_ + tail, end - tail);
80
81 return {last_bucket, end - tail};
82 }
83
84 /**
85 * Fills margins from buffers.
86 */
87 template <class Cfg>
writeMargins(const int first_bucket,const int last_bucket,const int overflow_bucket,const int swap_bucket,const diff_t in_swap_buffer)88 void Sorter<Cfg>::writeMargins(const int first_bucket, const int last_bucket,
89 const int overflow_bucket, const int swap_bucket,
90 const diff_t in_swap_buffer) {
91 const bool is_last_level = end_ - begin_ <= Cfg::kSingleLevelThreshold;
92 const auto comp = classifier_->getComparator();
93
94 for (int i = first_bucket; i < last_bucket; ++i) {
95 // Get bucket information
96 const auto bstart = bucket_start_[i];
97 const auto bend = bucket_start_[i + 1];
98 const auto bwrite = bucket_pointers_[i].getWrite();
99 // Destination where elements can be written
100 auto dst = begin_ + bstart;
101 auto remaining = Cfg::alignToNextBlock(bstart) - bstart;
102
103 if (i == overflow_bucket && overflow_) {
104 // Is there overflow?
105
106 // Overflow buffer has been written => write pointer must be at end of bucket
107 IPS4O_ASSUME_NOT(Cfg::alignToNextBlock(bend) != bwrite);
108
109 auto src = overflow_->data();
110 // There must be space for at least BlockSize elements
111 IPS4O_ASSUME_NOT((bend - (bwrite - Cfg::kBlockSize)) + remaining < Cfg::kBlockSize);
112 auto tail_size = Cfg::kBlockSize - remaining;
113
114 // Fill head
115 std::move(src, src + remaining, dst);
116 src += remaining;
117 remaining = std::numeric_limits<diff_t>::max();
118
119 // Write remaining elements into tail
120 dst = begin_ + (bwrite - Cfg::kBlockSize);
121 dst = std::move(src, src + tail_size, dst);
122
123 overflow_->reset(Cfg::kBlockSize);
124 } else if (i == swap_bucket && in_swap_buffer) {
125 // Did we save this in saveMargins?
126
127 // Bucket of last block in this thread's area => write swap buffer
128 auto src = local_.swap[0].data();
129 // All elements from the buffer must fit
130 IPS4O_ASSUME_NOT(in_swap_buffer > remaining);
131
132 // Write to head
133 dst = std::move(src, src + in_swap_buffer, dst);
134 remaining -= in_swap_buffer;
135
136 local_.swap[0].reset(in_swap_buffer);
137 } else if (bwrite > bend && bend - bstart > Cfg::kBlockSize) {
138 // Final block has been written => move excess elements to head
139 IPS4O_ASSUME_NOT(Cfg::alignToNextBlock(bend) != bwrite);
140
141 auto src = begin_ + bend;
142 auto head_size = bwrite - bend;
143 // Must fit, no other empty space left
144 IPS4O_ASSUME_NOT(head_size > remaining);
145
146 // Write to head
147 dst = std::move(src, src + head_size, dst);
148 remaining -= head_size;
149 }
150
151 // Write elements from buffers
152 for (int t = 0; t < num_threads_; ++t) {
153 auto& buffers = shared_ ? shared_->local[t]->buffers : local_.buffers;
154 auto src = buffers.data(i);
155 auto count = buffers.size(i);
156
157 if (count <= remaining) {
158 dst = std::move(src, src + count, dst);
159 remaining -= count;
160 } else {
161 std::move(src, src + remaining, dst);
162 src += remaining;
163 count -= remaining;
164 remaining = std::numeric_limits<diff_t>::max();
165
166 dst = begin_ + bwrite;
167 dst = std::move(src, src + count, dst);
168 }
169
170 buffers.reset(i);
171 }
172
173 // Perform final base case sort here, while the data is still cached
174 if (is_last_level || (bend - bstart) <= 2 * Cfg::kBaseCaseSize)
175 detail::baseCaseSort(begin_ + bstart, begin_ + bend, comp);
176 }
177 }
178
179 } // namespace detail
180 } // namespace ips4o
181