1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #pragma once
32 
33 #include <deque>
34 #include <fstream>
35 #include <memory>
36 #include <string>
37 #include <utility>
38 #include <vector>
39 
40 #include "mongo/base/disallow_copying.h"
41 #include "mongo/bson/util/builder.h"
42 
43 /**
44  * This is the public API for the Sorter (both in-memory and external)
45  *
46  * Many of the classes in this file are templated on Key and Value types which
47  * require the following public members:
48  *
49  * // A type carrying extra information used by the deserializer. Contents are
50  * // up to you, but  it should be cheap to copy. Use an empty struct if your
51  * // deserializer doesn't need extra data.
52  * struct SorterDeserializeSettings {};
53  *
54  * // Serialize this object to the BufBuilder
55  * void serializeForSorter(BufBuilder& buf) const;
56  *
57  * // Deserialize and return an object from the BufReader
58  * static Type deserializeForSorter(BufReader& buf, const Type::SorterDeserializeSettings&);
59  *
60  * // How much memory is used by your type? Include sizeof(*this) and any memory you reference.
61  * int memUsageForSorter() const;
62  *
63  * // For types with owned and unowned states, such as BSON, return an owned version.
64  * // Return *this if your type doesn't have an unowned state
65  * Type getOwned() const;
66  *
67  * Comparators are functors that that compare std::pair<Key, Value> and return an
68  * int less than, equal to, or greater than 0 depending on how the two pairs
69  * compare with the same semantics as memcmp.
70  * Example for Key=BSONObj, Value=int:
71  *
72  * class MyComparator {
73  * public:
74  *     int operator()(const std::pair<BSONObj, int>& lhs,
75  *                    const std::pair<BSONObj, int>& rhs) {
76  *         int ret = lhs.first.woCompare(rhs.first, _ord);
77  *         if (ret)
78  *             return ret;
79  *
80  *        if (lhs.second >  rhs.second) return 1;
81  *        if (lhs.second == rhs.second) return 0;
82  *        return -1;
83  *     }
84  *     Ordering _ord;
85  * };
86  */
87 
88 namespace mongo {
89 
90 /**
91  * Runtime options that control the Sorter's behavior
92  */
93 struct SortOptions {
94     // The number of KV pairs to be returned. 0 indicates no limit.
95     unsigned long long limit;
96 
97     // When in-memory memory usage exceeds this value, we try to spill to disk. This is approximate.
98     size_t maxMemoryUsageBytes;
99 
100     // Whether we are allowed to spill to disk. If this is false and in-memory exceeds
101     // maxMemoryUsageBytes, we will uassert.
102     bool extSortAllowed;
103 
104     // Directory into which we place a file when spilling to disk. Must be explicitly set if
105     // extSortAllowed is true.
106     std::string tempDir;
107 
SortOptionsSortOptions108     SortOptions() : limit(0), maxMemoryUsageBytes(64 * 1024 * 1024), extSortAllowed(false) {}
109 
110     // Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true)
111 
LimitSortOptions112     SortOptions& Limit(unsigned long long newLimit) {
113         limit = newLimit;
114         return *this;
115     }
116 
MaxMemoryUsageBytesSortOptions117     SortOptions& MaxMemoryUsageBytes(size_t newMaxMemoryUsageBytes) {
118         maxMemoryUsageBytes = newMaxMemoryUsageBytes;
119         return *this;
120     }
121 
122     SortOptions& ExtSortAllowed(bool newExtSortAllowed = true) {
123         extSortAllowed = newExtSortAllowed;
124         return *this;
125     }
126 
TempDirSortOptions127     SortOptions& TempDir(const std::string& newTempDir) {
128         tempDir = newTempDir;
129         return *this;
130     }
131 };
132 
133 /**
134  * This is the sorted output iterator from the sorting framework.
135  */
136 template <typename Key, typename Value>
137 class SortIteratorInterface {
138     MONGO_DISALLOW_COPYING(SortIteratorInterface);
139 
140 public:
141     typedef std::pair<Key, Value> Data;
142 
143     // Unowned objects are only valid until next call to any method
144 
145     virtual bool more() = 0;
146     virtual std::pair<Key, Value> next() = 0;
147 
~SortIteratorInterface()148     virtual ~SortIteratorInterface() {}
149 
150     // Returns an iterator that merges the passed in iterators
151     template <typename Comparator>
152     static SortIteratorInterface* merge(
153         const std::vector<std::shared_ptr<SortIteratorInterface>>& iters,
154         const std::string& fileName,
155         const SortOptions& opts,
156         const Comparator& comp);
157 
158     // Opens and closes the source of data over which this class iterates, if applicable.
159     virtual void openSource() = 0;
160     virtual void closeSource() = 0;
161 
162 protected:
SortIteratorInterface()163     SortIteratorInterface() {}  // can only be constructed as a base
164 };
165 
166 /**
167  * This is the way to input data to the sorting framework.
168  *
169  * Each instance of this class will generate a file name and spill sorted data ranges to that file
170  * if allowed in its given Settings. If the instance destructs before done() is called, it will
171  * handle deleting the data file used for spills. Otherwise, if done() is called, responsibility for
172  * file deletion moves to the returned Iterator object, which must then delete the file upon its own
173  * destruction.
174  *
175  * All users of Sorter implementations must define their own nextFileName() function to generate
176  * unique file names for spills to disk. This is necessary because the sorter.cpp file is separately
177  * directly included in multiple places, rather than compiled in one place and linked, and so cannot
178  * itself provide a globally unique ID for file names. See existing function implementations of
179  * nextFileName() for example.
180  */
181 template <typename Key, typename Value>
182 class Sorter {
183     MONGO_DISALLOW_COPYING(Sorter);
184 
185 public:
186     typedef std::pair<Key, Value> Data;
187     typedef SortIteratorInterface<Key, Value> Iterator;
188     typedef std::pair<typename Key::SorterDeserializeSettings,
189                       typename Value::SorterDeserializeSettings>
190         Settings;
191 
192     template <typename Comparator>
193     static Sorter* make(const SortOptions& opts,
194                         const Comparator& comp,
195                         const Settings& settings = Settings());
196 
197     virtual void add(const Key&, const Value&) = 0;
198 
199     /**
200      * Cannot add more data after calling done().
201      *
202      * The returned Iterator must not outlive the Sorter instance, which manages file clean up.
203      */
204     virtual Iterator* done() = 0;
205 
~Sorter()206     virtual ~Sorter() {}
207 
208 protected:
Sorter()209     Sorter() {}  // can only be constructed as a base
210 };
211 
212 /**
213  * Appends a pre-sorted range of data to a given file and hands back an Iterator over that file
214  * range.
215  */
216 template <typename Key, typename Value>
217 class SortedFileWriter {
218     MONGO_DISALLOW_COPYING(SortedFileWriter);
219 
220 public:
221     typedef SortIteratorInterface<Key, Value> Iterator;
222     typedef std::pair<typename Key::SorterDeserializeSettings,
223                       typename Value::SorterDeserializeSettings>
224         Settings;
225 
226     explicit SortedFileWriter(const SortOptions& opts,
227                               const std::string& fileName,
228                               const std::streampos fileStartOffset,
229                               const Settings& settings = Settings());
230 
231     void addAlreadySorted(const Key&, const Value&);
232 
233     /**
234      * Spills any data remaining in the buffer to disk and then closes the file to which data was
235      * written.
236      *
237      * No more data can be added via addAlreadySorted() after calling done().
238      */
239     Iterator* done();
240 
241     /**
242      * Only call this after done() has been called to set the end offset.
243      */
getFileEndOffset()244     std::streampos getFileEndOffset() {
245         invariant(!_file.is_open());
246         return _fileEndOffset;
247     }
248 
249 private:
250     void spill();
251 
252     const Settings _settings;
253     std::string _fileName;
254     std::ofstream _file;
255     BufBuilder _buffer;
256 
257     // Tracks where in the file we started and finished writing the sorted data range so that the
258     // information can be given to the Iterator in done(), and to the user via getFileEndOffset()
259     // for the next SortedFileWriter instance using the same file.
260     std::streampos _fileStartOffset;
261     std::streampos _fileEndOffset;
262 };
263 }
264 
265 /**
266  * #include "mongo/db/sorter/sorter.cpp" and call this in a single translation
267  * unit once for each unique set of template parameters.
268  */
269 #define MONGO_CREATE_SORTER(Key, Value, Comparator)                                      \
270     /* public classes */                                                                 \
271     template class ::mongo::Sorter<Key, Value>;                                          \
272     template class ::mongo::SortIteratorInterface<Key, Value>;                           \
273     template class ::mongo::SortedFileWriter<Key, Value>;                                \
274     /* internal classes */                                                               \
275     template class ::mongo::sorter::NoLimitSorter<Key, Value, Comparator>;               \
276     template class ::mongo::sorter::LimitOneSorter<Key, Value, Comparator>;              \
277     template class ::mongo::sorter::TopKSorter<Key, Value, Comparator>;                  \
278     template class ::mongo::sorter::MergeIterator<Key, Value, Comparator>;               \
279     template class ::mongo::sorter::InMemIterator<Key, Value>;                           \
280     template class ::mongo::sorter::FileIterator<Key, Value>;                            \
281     /* factory functions */                                                              \
282     template ::mongo::SortIteratorInterface<Key, Value>* ::mongo::                       \
283         SortIteratorInterface<Key, Value>::merge<Comparator>(                            \
284             const std::vector<std::shared_ptr<SortIteratorInterface>>& iters,            \
285             const std::string& fileName,                                                 \
286             const SortOptions& opts,                                                     \
287             const Comparator& comp);                                                     \
288     template ::mongo::Sorter<Key, Value>* ::mongo::Sorter<Key, Value>::make<Comparator>( \
289         const SortOptions& opts, const Comparator& comp, const Settings& settings);
290