1 2 /** 3 * Copyright (C) 2018-present MongoDB, Inc. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the Server Side Public License, version 1, 7 * as published by MongoDB, Inc. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * Server Side Public License for more details. 13 * 14 * You should have received a copy of the Server Side Public License 15 * along with this program. If not, see 16 * <http://www.mongodb.com/licensing/server-side-public-license>. 17 * 18 * As a special exception, the copyright holders give permission to link the 19 * code of portions of this program with the OpenSSL library under certain 20 * conditions as described in each individual source file and distribute 21 * linked combinations including the program with the OpenSSL library. You 22 * must comply with the Server Side Public License in all respects for 23 * all of the code used other than as permitted herein. If you modify file(s) 24 * with this exception, you may extend this exception to your version of the 25 * file(s), but you are not obligated to do so. If you do not wish to do so, 26 * delete this exception statement from your version. If you delete this 27 * exception statement from all source files in the program, then also delete 28 * it in the license file. 29 */ 30 31 #pragma once 32 33 #include <deque> 34 #include <fstream> 35 #include <memory> 36 #include <string> 37 #include <utility> 38 #include <vector> 39 40 #include "mongo/base/disallow_copying.h" 41 #include "mongo/bson/util/builder.h" 42 43 /** 44 * This is the public API for the Sorter (both in-memory and external) 45 * 46 * Many of the classes in this file are templated on Key and Value types which 47 * require the following public members: 48 * 49 * // A type carrying extra information used by the deserializer. Contents are 50 * // up to you, but it should be cheap to copy. Use an empty struct if your 51 * // deserializer doesn't need extra data. 52 * struct SorterDeserializeSettings {}; 53 * 54 * // Serialize this object to the BufBuilder 55 * void serializeForSorter(BufBuilder& buf) const; 56 * 57 * // Deserialize and return an object from the BufReader 58 * static Type deserializeForSorter(BufReader& buf, const Type::SorterDeserializeSettings&); 59 * 60 * // How much memory is used by your type? Include sizeof(*this) and any memory you reference. 61 * int memUsageForSorter() const; 62 * 63 * // For types with owned and unowned states, such as BSON, return an owned version. 64 * // Return *this if your type doesn't have an unowned state 65 * Type getOwned() const; 66 * 67 * Comparators are functors that that compare std::pair<Key, Value> and return an 68 * int less than, equal to, or greater than 0 depending on how the two pairs 69 * compare with the same semantics as memcmp. 70 * Example for Key=BSONObj, Value=int: 71 * 72 * class MyComparator { 73 * public: 74 * int operator()(const std::pair<BSONObj, int>& lhs, 75 * const std::pair<BSONObj, int>& rhs) { 76 * int ret = lhs.first.woCompare(rhs.first, _ord); 77 * if (ret) 78 * return ret; 79 * 80 * if (lhs.second > rhs.second) return 1; 81 * if (lhs.second == rhs.second) return 0; 82 * return -1; 83 * } 84 * Ordering _ord; 85 * }; 86 */ 87 88 namespace mongo { 89 90 /** 91 * Runtime options that control the Sorter's behavior 92 */ 93 struct SortOptions { 94 // The number of KV pairs to be returned. 0 indicates no limit. 95 unsigned long long limit; 96 97 // When in-memory memory usage exceeds this value, we try to spill to disk. This is approximate. 98 size_t maxMemoryUsageBytes; 99 100 // Whether we are allowed to spill to disk. If this is false and in-memory exceeds 101 // maxMemoryUsageBytes, we will uassert. 102 bool extSortAllowed; 103 104 // Directory into which we place a file when spilling to disk. Must be explicitly set if 105 // extSortAllowed is true. 106 std::string tempDir; 107 SortOptionsSortOptions108 SortOptions() : limit(0), maxMemoryUsageBytes(64 * 1024 * 1024), extSortAllowed(false) {} 109 110 // Fluent API to support expressions like SortOptions().Limit(1000).ExtSortAllowed(true) 111 LimitSortOptions112 SortOptions& Limit(unsigned long long newLimit) { 113 limit = newLimit; 114 return *this; 115 } 116 MaxMemoryUsageBytesSortOptions117 SortOptions& MaxMemoryUsageBytes(size_t newMaxMemoryUsageBytes) { 118 maxMemoryUsageBytes = newMaxMemoryUsageBytes; 119 return *this; 120 } 121 122 SortOptions& ExtSortAllowed(bool newExtSortAllowed = true) { 123 extSortAllowed = newExtSortAllowed; 124 return *this; 125 } 126 TempDirSortOptions127 SortOptions& TempDir(const std::string& newTempDir) { 128 tempDir = newTempDir; 129 return *this; 130 } 131 }; 132 133 /** 134 * This is the sorted output iterator from the sorting framework. 135 */ 136 template <typename Key, typename Value> 137 class SortIteratorInterface { 138 MONGO_DISALLOW_COPYING(SortIteratorInterface); 139 140 public: 141 typedef std::pair<Key, Value> Data; 142 143 // Unowned objects are only valid until next call to any method 144 145 virtual bool more() = 0; 146 virtual std::pair<Key, Value> next() = 0; 147 ~SortIteratorInterface()148 virtual ~SortIteratorInterface() {} 149 150 // Returns an iterator that merges the passed in iterators 151 template <typename Comparator> 152 static SortIteratorInterface* merge( 153 const std::vector<std::shared_ptr<SortIteratorInterface>>& iters, 154 const std::string& fileName, 155 const SortOptions& opts, 156 const Comparator& comp); 157 158 // Opens and closes the source of data over which this class iterates, if applicable. 159 virtual void openSource() = 0; 160 virtual void closeSource() = 0; 161 162 protected: SortIteratorInterface()163 SortIteratorInterface() {} // can only be constructed as a base 164 }; 165 166 /** 167 * This is the way to input data to the sorting framework. 168 * 169 * Each instance of this class will generate a file name and spill sorted data ranges to that file 170 * if allowed in its given Settings. If the instance destructs before done() is called, it will 171 * handle deleting the data file used for spills. Otherwise, if done() is called, responsibility for 172 * file deletion moves to the returned Iterator object, which must then delete the file upon its own 173 * destruction. 174 * 175 * All users of Sorter implementations must define their own nextFileName() function to generate 176 * unique file names for spills to disk. This is necessary because the sorter.cpp file is separately 177 * directly included in multiple places, rather than compiled in one place and linked, and so cannot 178 * itself provide a globally unique ID for file names. See existing function implementations of 179 * nextFileName() for example. 180 */ 181 template <typename Key, typename Value> 182 class Sorter { 183 MONGO_DISALLOW_COPYING(Sorter); 184 185 public: 186 typedef std::pair<Key, Value> Data; 187 typedef SortIteratorInterface<Key, Value> Iterator; 188 typedef std::pair<typename Key::SorterDeserializeSettings, 189 typename Value::SorterDeserializeSettings> 190 Settings; 191 192 template <typename Comparator> 193 static Sorter* make(const SortOptions& opts, 194 const Comparator& comp, 195 const Settings& settings = Settings()); 196 197 virtual void add(const Key&, const Value&) = 0; 198 199 /** 200 * Cannot add more data after calling done(). 201 * 202 * The returned Iterator must not outlive the Sorter instance, which manages file clean up. 203 */ 204 virtual Iterator* done() = 0; 205 ~Sorter()206 virtual ~Sorter() {} 207 208 protected: Sorter()209 Sorter() {} // can only be constructed as a base 210 }; 211 212 /** 213 * Appends a pre-sorted range of data to a given file and hands back an Iterator over that file 214 * range. 215 */ 216 template <typename Key, typename Value> 217 class SortedFileWriter { 218 MONGO_DISALLOW_COPYING(SortedFileWriter); 219 220 public: 221 typedef SortIteratorInterface<Key, Value> Iterator; 222 typedef std::pair<typename Key::SorterDeserializeSettings, 223 typename Value::SorterDeserializeSettings> 224 Settings; 225 226 explicit SortedFileWriter(const SortOptions& opts, 227 const std::string& fileName, 228 const std::streampos fileStartOffset, 229 const Settings& settings = Settings()); 230 231 void addAlreadySorted(const Key&, const Value&); 232 233 /** 234 * Spills any data remaining in the buffer to disk and then closes the file to which data was 235 * written. 236 * 237 * No more data can be added via addAlreadySorted() after calling done(). 238 */ 239 Iterator* done(); 240 241 /** 242 * Only call this after done() has been called to set the end offset. 243 */ getFileEndOffset()244 std::streampos getFileEndOffset() { 245 invariant(!_file.is_open()); 246 return _fileEndOffset; 247 } 248 249 private: 250 void spill(); 251 252 const Settings _settings; 253 std::string _fileName; 254 std::ofstream _file; 255 BufBuilder _buffer; 256 257 // Tracks where in the file we started and finished writing the sorted data range so that the 258 // information can be given to the Iterator in done(), and to the user via getFileEndOffset() 259 // for the next SortedFileWriter instance using the same file. 260 std::streampos _fileStartOffset; 261 std::streampos _fileEndOffset; 262 }; 263 } 264 265 /** 266 * #include "mongo/db/sorter/sorter.cpp" and call this in a single translation 267 * unit once for each unique set of template parameters. 268 */ 269 #define MONGO_CREATE_SORTER(Key, Value, Comparator) \ 270 /* public classes */ \ 271 template class ::mongo::Sorter<Key, Value>; \ 272 template class ::mongo::SortIteratorInterface<Key, Value>; \ 273 template class ::mongo::SortedFileWriter<Key, Value>; \ 274 /* internal classes */ \ 275 template class ::mongo::sorter::NoLimitSorter<Key, Value, Comparator>; \ 276 template class ::mongo::sorter::LimitOneSorter<Key, Value, Comparator>; \ 277 template class ::mongo::sorter::TopKSorter<Key, Value, Comparator>; \ 278 template class ::mongo::sorter::MergeIterator<Key, Value, Comparator>; \ 279 template class ::mongo::sorter::InMemIterator<Key, Value>; \ 280 template class ::mongo::sorter::FileIterator<Key, Value>; \ 281 /* factory functions */ \ 282 template ::mongo::SortIteratorInterface<Key, Value>* ::mongo:: \ 283 SortIteratorInterface<Key, Value>::merge<Comparator>( \ 284 const std::vector<std::shared_ptr<SortIteratorInterface>>& iters, \ 285 const std::string& fileName, \ 286 const SortOptions& opts, \ 287 const Comparator& comp); \ 288 template ::mongo::Sorter<Key, Value>* ::mongo::Sorter<Key, Value>::make<Comparator>( \ 289 const SortOptions& opts, const Comparator& comp, const Settings& settings); 290