1 /******************************************************************************
2  * ips4o/cleanup_margins.hpp
3  *
4  * In-place Parallel Super Scalar Samplesort (IPS⁴o)
5  *
6  ******************************************************************************
7  * BSD 2-Clause License
8  *
9  * Copyright © 2017, Michael Axtmann <michael.axtmann@kit.edu>
10  * Copyright © 2017, Daniel Ferizovic <daniel.ferizovic@student.kit.edu>
11  * Copyright © 2017, Sascha Witt <sascha.witt@kit.edu>
12  * All rights reserved.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions are met:
16  *
17  * * Redistributions of source code must retain the above copyright notice, this
18  *   list of conditions and the following disclaimer.
19  *
20  * * Redistributions in binary form must reproduce the above copyright notice,
21  *   this list of conditions and the following disclaimer in the documentation
22  *   and/or other materials provided with the distribution.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *****************************************************************************/
35 
36 #pragma once
37 
38 #include <limits>
39 #include <utility>
40 
41 #include "ips4o_fwd.hpp"
42 #include "base_case.hpp"
43 #include "memory.hpp"
44 #include "utils.hpp"
45 
46 namespace ips4o {
47 namespace detail {
48 
49 /**
50  * Saves margins at thread boundaries.
51  */
52 template <class Cfg>
saveMargins(int last_bucket)53 std::pair<int, typename Cfg::difference_type> Sorter<Cfg>::saveMargins(int last_bucket) {
54     // Find last bucket boundary in this thread's area
55     diff_t tail = bucket_start_[last_bucket];
56     const diff_t end = Cfg::alignToNextBlock(tail);
57 
58     // Don't need to do anything if there is no overlap, or we are in the overflow case
59     if (tail == end || end > (end_ - begin_))
60         return {-1, 0};
61 
62     // Find bucket this last block belongs to
63     {
64         const auto start_of_last_block = end - Cfg::kBlockSize;
65         diff_t last_start;
66         do {
67             --last_bucket;
68             last_start = bucket_start_[last_bucket];
69         } while (last_start > start_of_last_block);
70     }
71 
72     // Check if the last block has been written
73     const auto write = shared_->bucket_pointers[last_bucket].getWrite();
74     if (write < end)
75         return {-1, 0};
76 
77     // Read excess elements, if necessary
78     tail = bucket_start_[last_bucket + 1];
79     local_.swap[0].readFrom(begin_ + tail, end - tail);
80 
81     return {last_bucket, end - tail};
82 }
83 
84 /**
85  * Fills margins from buffers.
86  */
87 template <class Cfg>
writeMargins(const int first_bucket,const int last_bucket,const int overflow_bucket,const int swap_bucket,const diff_t in_swap_buffer)88 void Sorter<Cfg>::writeMargins(const int first_bucket, const int last_bucket,
89                                const int overflow_bucket, const int swap_bucket,
90                                const diff_t in_swap_buffer) {
91     const bool is_last_level = end_ - begin_ <= Cfg::kSingleLevelThreshold;
92     const auto comp = classifier_->getComparator();
93 
94     for (int i = first_bucket; i < last_bucket; ++i) {
95         // Get bucket information
96         const auto bstart = bucket_start_[i];
97         const auto bend = bucket_start_[i + 1];
98         const auto bwrite = bucket_pointers_[i].getWrite();
99         // Destination where elements can be written
100         auto dst = begin_ + bstart;
101         auto remaining = Cfg::alignToNextBlock(bstart) - bstart;
102 
103         if (i == overflow_bucket && overflow_) {
104             // Is there overflow?
105 
106             // Overflow buffer has been written => write pointer must be at end of bucket
107             IPS4O_ASSUME_NOT(Cfg::alignToNextBlock(bend) != bwrite);
108 
109             auto src = overflow_->data();
110             // There must be space for at least BlockSize elements
111             IPS4O_ASSUME_NOT((bend - (bwrite - Cfg::kBlockSize)) + remaining < Cfg::kBlockSize);
112             auto tail_size = Cfg::kBlockSize - remaining;
113 
114             // Fill head
115             std::move(src, src + remaining, dst);
116             src += remaining;
117             remaining = std::numeric_limits<diff_t>::max();
118 
119             // Write remaining elements into tail
120             dst = begin_ + (bwrite - Cfg::kBlockSize);
121             dst = std::move(src, src + tail_size, dst);
122 
123             overflow_->reset(Cfg::kBlockSize);
124         } else if (i == swap_bucket && in_swap_buffer) {
125             // Did we save this in saveMargins?
126 
127             // Bucket of last block in this thread's area => write swap buffer
128             auto src = local_.swap[0].data();
129             // All elements from the buffer must fit
130             IPS4O_ASSUME_NOT(in_swap_buffer > remaining);
131 
132             // Write to head
133             dst = std::move(src, src + in_swap_buffer, dst);
134             remaining -= in_swap_buffer;
135 
136             local_.swap[0].reset(in_swap_buffer);
137         } else if (bwrite > bend && bend - bstart > Cfg::kBlockSize) {
138             // Final block has been written => move excess elements to head
139             IPS4O_ASSUME_NOT(Cfg::alignToNextBlock(bend) != bwrite);
140 
141             auto src = begin_ + bend;
142             auto head_size = bwrite - bend;
143             // Must fit, no other empty space left
144             IPS4O_ASSUME_NOT(head_size > remaining);
145 
146             // Write to head
147             dst = std::move(src, src + head_size, dst);
148             remaining -= head_size;
149         }
150 
151         // Write elements from buffers
152         for (int t = 0; t < num_threads_; ++t) {
153             auto& buffers = shared_ ? shared_->local[t]->buffers : local_.buffers;
154             auto src = buffers.data(i);
155             auto count = buffers.size(i);
156 
157             if (count <= remaining) {
158                 dst = std::move(src, src + count, dst);
159                 remaining -= count;
160             } else {
161                 std::move(src, src + remaining, dst);
162                 src += remaining;
163                 count -= remaining;
164                 remaining = std::numeric_limits<diff_t>::max();
165 
166                 dst = begin_ + bwrite;
167                 dst = std::move(src, src + count, dst);
168             }
169 
170             buffers.reset(i);
171         }
172 
173         // Perform final base case sort here, while the data is still cached
174         if (is_last_level || (bend - bstart) <= 2 * Cfg::kBaseCaseSize)
175             detail::baseCaseSort(begin_ + bstart, begin_ + bend, comp);
176     }
177 }
178 
179 }  // namespace detail
180 }  // namespace ips4o
181