1 // string_data.h
2
3
4 /**
5 * Copyright (C) 2018-present MongoDB, Inc.
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the Server Side Public License, version 1,
9 * as published by MongoDB, Inc.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * Server Side Public License for more details.
15 *
16 * You should have received a copy of the Server Side Public License
17 * along with this program. If not, see
18 * <http://www.mongodb.com/licensing/server-side-public-license>.
19 *
20 * As a special exception, the copyright holders give permission to link the
21 * code of portions of this program with the OpenSSL library under certain
22 * conditions as described in each individual source file and distribute
23 * linked combinations including the program with the OpenSSL library. You
24 * must comply with the Server Side Public License in all respects for
25 * all of the code used other than as permitted herein. If you modify file(s)
26 * with this exception, you may extend this exception to your version of the
27 * file(s), but you are not obligated to do so. If you do not wish to do so,
28 * delete this exception statement from your version. If you delete this
29 * exception statement from all source files in the program, then also delete
30 * it in the license file.
31 */
32
33 #pragma once
34
35 #include <algorithm> // for min
36 #include <cstring>
37 #include <iosfwd>
38 #include <limits>
39 #include <stdexcept>
40 #include <string>
41
42 #include "mongo/stdx/type_traits.h"
43 #define MONGO_INCLUDE_INVARIANT_H_WHITELISTED
44 #include "mongo/util/invariant.h"
45 #undef MONGO_INCLUDE_INVARIANT_H_WHITELISTED
46
47 namespace mongo {
48
49 /**
50 * A StringData object wraps a 'const std::string&' or a 'const char*' without copying its
51 * contents. The most common usage is as a function argument that takes any of the two
52 * forms of strings above. Fundamentally, this class tries go around the fact that string
53 * literals in C++ are char[N]'s.
54 *
55 * Notes:
56 *
57 * + The object StringData wraps around must be alive while the StringData is.
58 *
59 * + Because std::string data can be used to pass a substring around, one should never assume a
60 * rawData() terminates with a null.
61 */
62 class StringData {
63 struct TrustedInitTag {};
StringData(const char * c,size_t len,TrustedInitTag)64 constexpr StringData(const char* c, size_t len, TrustedInitTag) : _data(c), _size(len) {}
65
66 public:
67 // Declared in string_data_comparator_interface.h.
68 class ComparatorInterface;
69
70 // Iterator type
71 using const_iterator = const char*;
72
73 /** Constructs an empty StringData. */
74 constexpr StringData() = default;
75
76 /**
77 * Constructs a StringData, for the case where the length of the
78 * string is not known. 'c' must either be NULL, or a pointer to a
79 * null-terminated string.
80 */
StringData(const char * str)81 StringData(const char* str) : StringData(str, str ? std::strlen(str) : 0) {}
82
83 /**
84 * Constructs a StringData, for the case of a std::string. We can
85 * use the trusted init path with no follow on checks because
86 * string::data is assured to never return nullptr.
87 */
StringData(const std::string & s)88 StringData(const std::string& s) : StringData(s.data(), s.length(), TrustedInitTag()) {}
89
90 /**
91 * Constructs a StringData with an explicit length. 'c' must
92 * either be NULL (in which case len must be zero), or be a
93 * pointer into a character array. The StringData will refer to
94 * the first 'len' characters starting at 'c'. The range of
95 * characters c to c+len must be valid.
96 */
StringData(const char * c,size_t len)97 StringData(const char* c, size_t len) : StringData(c, len, TrustedInitTag()) {
98 invariant(_data || (_size == 0));
99 }
100
101 /**
102 * Constructs a StringData from a user defined literal. This allows
103 * for constexpr creation of StringData's that are known at compile time.
104 */
105 constexpr friend StringData operator"" _sd(const char* c, std::size_t len);
106
107 /**
108 * Constructs a StringData with begin and end iterators. begin points to the beginning of the
109 * string. end points to the position past the end of the string. In a null-terminated string,
110 * end points to the null-terminator.
111 *
112 * We template the second parameter to ensure if StringData is called with 0 in the second
113 * parameter, the (ptr,len) constructor is chosen instead.
114 */
115 template <
116 typename InputIt,
117 typename = stdx::enable_if_t<std::is_same<StringData::const_iterator, InputIt>::value>>
StringData(InputIt begin,InputIt end)118 StringData(InputIt begin, InputIt end) {
119 invariant(begin && end);
120 _data = begin;
121 _size = std::distance(begin, end);
122 }
123
124 /**
125 * Returns -1, 0, or 1 if 'this' is less, equal, or greater than 'other' in
126 * lexicographical order.
127 */
128 int compare(StringData other) const;
129
130 /**
131 * note: this uses tolower, and therefore does not handle
132 * come languages correctly.
133 * should be use sparingly
134 */
135 bool equalCaseInsensitive(StringData other) const;
136
137 void copyTo(char* dest, bool includeEndingNull) const;
138
139 StringData substr(size_t pos, size_t n = std::numeric_limits<size_t>::max()) const;
140
141 //
142 // finders
143 //
144
145 size_t find(char c, size_t fromPos = 0) const;
146 size_t find(StringData needle) const;
147 size_t rfind(char c, size_t fromPos = std::string::npos) const;
148
149 /**
150 * Returns true if 'prefix' is a substring of this instance, anchored at position 0.
151 */
152 bool startsWith(StringData prefix) const;
153
154 /**
155 * Returns true if 'suffix' is a substring of this instance, anchored at the end.
156 */
157 bool endsWith(StringData suffix) const;
158
159 //
160 // accessors
161 //
162
163 /**
164 * Get the pointer to the first byte of StringData. This is not guaranteed to be
165 * null-terminated, so if using this without checking size(), you are likely doing
166 * something wrong.
167 */
rawData()168 constexpr const char* rawData() const {
169 return _data;
170 }
171
size()172 constexpr size_t size() const {
173 return _size;
174 }
empty()175 constexpr bool empty() const {
176 return size() == 0;
177 }
toString()178 std::string toString() const {
179 return std::string(_data, size());
180 }
181 constexpr char operator[](unsigned pos) const {
182 return _data[pos];
183 }
184
185 //
186 // iterators
187 //
begin()188 constexpr const_iterator begin() const {
189 return rawData();
190 }
end()191 constexpr const_iterator end() const {
192 return rawData() + size();
193 }
194
195 private:
196 const char* _data = nullptr; // is not guaranted to be null terminated (see "notes" above)
197 size_t _size = 0; // 'size' does not include the null terminator
198 };
199
200 inline bool operator==(StringData lhs, StringData rhs) {
201 return (lhs.size() == rhs.size()) && (lhs.compare(rhs) == 0);
202 }
203
204 inline bool operator!=(StringData lhs, StringData rhs) {
205 return !(lhs == rhs);
206 }
207
208 inline bool operator<(StringData lhs, StringData rhs) {
209 return lhs.compare(rhs) < 0;
210 }
211
212 inline bool operator<=(StringData lhs, StringData rhs) {
213 return lhs.compare(rhs) <= 0;
214 }
215
216 inline bool operator>(StringData lhs, StringData rhs) {
217 return lhs.compare(rhs) > 0;
218 }
219
220 inline bool operator>=(StringData lhs, StringData rhs) {
221 return lhs.compare(rhs) >= 0;
222 }
223
224 std::ostream& operator<<(std::ostream& stream, StringData value);
225
226 constexpr StringData operator"" _sd(const char* c, std::size_t len) {
227 return StringData(c, len, StringData::TrustedInitTag{});
228 }
229
compare(StringData other)230 inline int StringData::compare(StringData other) const {
231 // It is illegal to pass nullptr to memcmp. It is an invariant of
232 // StringData that if _data is nullptr, _size is zero. If asked to
233 // compare zero bytes, memcmp returns zero (how could they
234 // differ?). So, if either StringData object has a nullptr _data
235 // object, then memcmp would return zero. Achieve this by assuming
236 // zero, and only calling memcmp if both pointers are valid.
237 int res = 0;
238 if (_data && other._data)
239 res = memcmp(_data, other._data, std::min(_size, other._size));
240
241 if (res != 0)
242 return res > 0 ? 1 : -1;
243
244 if (_size == other._size)
245 return 0;
246
247 return _size > other._size ? 1 : -1;
248 }
249
equalCaseInsensitive(StringData other)250 inline bool StringData::equalCaseInsensitive(StringData other) const {
251 if (other.size() != size())
252 return false;
253
254 for (size_t x = 0; x < size(); x++) {
255 char a = _data[x];
256 char b = other._data[x];
257 if (a == b)
258 continue;
259 if (tolower(a) == tolower(b))
260 continue;
261 return false;
262 }
263
264 return true;
265 }
266
copyTo(char * dest,bool includeEndingNull)267 inline void StringData::copyTo(char* dest, bool includeEndingNull) const {
268 if (_data)
269 memcpy(dest, _data, size());
270 if (includeEndingNull)
271 dest[size()] = 0;
272 }
273
find(char c,size_t fromPos)274 inline size_t StringData::find(char c, size_t fromPos) const {
275 if (fromPos >= size())
276 return std::string::npos;
277
278 const void* x = memchr(_data + fromPos, c, _size - fromPos);
279 if (x == 0)
280 return std::string::npos;
281 return static_cast<size_t>(static_cast<const char*>(x) - _data);
282 }
283
find(StringData needle)284 inline size_t StringData::find(StringData needle) const {
285 size_t mx = size();
286 size_t needleSize = needle.size();
287
288 if (needleSize == 0)
289 return 0;
290 else if (needleSize > mx)
291 return std::string::npos;
292
293 mx -= needleSize;
294
295 for (size_t i = 0; i <= mx; i++) {
296 if (memcmp(_data + i, needle._data, needleSize) == 0)
297 return i;
298 }
299 return std::string::npos;
300 }
301
rfind(char c,size_t fromPos)302 inline size_t StringData::rfind(char c, size_t fromPos) const {
303 const size_t sz = size();
304 if (fromPos > sz)
305 fromPos = sz;
306
307 for (const char* cur = _data + fromPos; cur > _data; --cur) {
308 if (*(cur - 1) == c)
309 return (cur - _data) - 1;
310 }
311 return std::string::npos;
312 }
313
substr(size_t pos,size_t n)314 inline StringData StringData::substr(size_t pos, size_t n) const {
315 if (pos > size())
316 throw std::out_of_range("out of range");
317
318 // truncate to end of string
319 if (n > size() - pos)
320 n = size() - pos;
321
322 return StringData(_data + pos, n);
323 }
324
startsWith(StringData prefix)325 inline bool StringData::startsWith(StringData prefix) const {
326 // TODO: Investigate an optimized implementation.
327 return substr(0, prefix.size()) == prefix;
328 }
329
endsWith(StringData suffix)330 inline bool StringData::endsWith(StringData suffix) const {
331 // TODO: Investigate an optimized implementation.
332 const size_t thisSize = size();
333 const size_t suffixSize = suffix.size();
334 if (suffixSize > thisSize)
335 return false;
336 return substr(thisSize - suffixSize) == suffix;
337 }
338
339 inline std::string operator+(std::string lhs, StringData rhs) {
340 if (!rhs.empty())
341 lhs.append(rhs.rawData(), rhs.size());
342 return lhs;
343 }
344
345 inline std::string operator+(StringData lhs, std::string rhs) {
346 if (!lhs.empty())
347 rhs.insert(0, lhs.rawData(), lhs.size());
348 return rhs;
349 }
350
351 } // namespace mongo
352