1 // string_data.h
2 
3 
4 /**
5  *    Copyright (C) 2018-present MongoDB, Inc.
6  *
7  *    This program is free software: you can redistribute it and/or modify
8  *    it under the terms of the Server Side Public License, version 1,
9  *    as published by MongoDB, Inc.
10  *
11  *    This program is distributed in the hope that it will be useful,
12  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *    Server Side Public License for more details.
15  *
16  *    You should have received a copy of the Server Side Public License
17  *    along with this program. If not, see
18  *    <http://www.mongodb.com/licensing/server-side-public-license>.
19  *
20  *    As a special exception, the copyright holders give permission to link the
21  *    code of portions of this program with the OpenSSL library under certain
22  *    conditions as described in each individual source file and distribute
23  *    linked combinations including the program with the OpenSSL library. You
24  *    must comply with the Server Side Public License in all respects for
25  *    all of the code used other than as permitted herein. If you modify file(s)
26  *    with this exception, you may extend this exception to your version of the
27  *    file(s), but you are not obligated to do so. If you do not wish to do so,
28  *    delete this exception statement from your version. If you delete this
29  *    exception statement from all source files in the program, then also delete
30  *    it in the license file.
31  */
32 
33 #pragma once
34 
35 #include <algorithm>  // for min
36 #include <cstring>
37 #include <iosfwd>
38 #include <limits>
39 #include <stdexcept>
40 #include <string>
41 
42 #include "mongo/stdx/type_traits.h"
43 #define MONGO_INCLUDE_INVARIANT_H_WHITELISTED
44 #include "mongo/util/invariant.h"
45 #undef MONGO_INCLUDE_INVARIANT_H_WHITELISTED
46 
47 namespace mongo {
48 
49 /**
50  * A StringData object wraps a 'const std::string&' or a 'const char*' without copying its
51  * contents. The most common usage is as a function argument that takes any of the two
52  * forms of strings above. Fundamentally, this class tries go around the fact that string
53  * literals in C++ are char[N]'s.
54  *
55  * Notes:
56  *
57  *  + The object StringData wraps around must be alive while the StringData is.
58  *
59  *  + Because std::string data can be used to pass a substring around, one should never assume a
60  *    rawData() terminates with a null.
61  */
62 class StringData {
63     struct TrustedInitTag {};
StringData(const char * c,size_t len,TrustedInitTag)64     constexpr StringData(const char* c, size_t len, TrustedInitTag) : _data(c), _size(len) {}
65 
66 public:
67     // Declared in string_data_comparator_interface.h.
68     class ComparatorInterface;
69 
70     // Iterator type
71     using const_iterator = const char*;
72 
73     /** Constructs an empty StringData. */
74     constexpr StringData() = default;
75 
76     /**
77      * Constructs a StringData, for the case where the length of the
78      * string is not known. 'c' must either be NULL, or a pointer to a
79      * null-terminated string.
80      */
StringData(const char * str)81     StringData(const char* str) : StringData(str, str ? std::strlen(str) : 0) {}
82 
83     /**
84      * Constructs a StringData, for the case of a std::string. We can
85      * use the trusted init path with no follow on checks because
86      * string::data is assured to never return nullptr.
87      */
StringData(const std::string & s)88     StringData(const std::string& s) : StringData(s.data(), s.length(), TrustedInitTag()) {}
89 
90     /**
91      * Constructs a StringData with an explicit length. 'c' must
92      * either be NULL (in which case len must be zero), or be a
93      * pointer into a character array. The StringData will refer to
94      * the first 'len' characters starting at 'c'. The range of
95      * characters c to c+len must be valid.
96      */
StringData(const char * c,size_t len)97     StringData(const char* c, size_t len) : StringData(c, len, TrustedInitTag()) {
98         invariant(_data || (_size == 0));
99     }
100 
101     /**
102      * Constructs a StringData from a user defined literal.  This allows
103      * for constexpr creation of StringData's that are known at compile time.
104      */
105     constexpr friend StringData operator"" _sd(const char* c, std::size_t len);
106 
107     /**
108      * Constructs a StringData with begin and end iterators. begin points to the beginning of the
109      * string. end points to the position past the end of the string. In a null-terminated string,
110      * end points to the null-terminator.
111      *
112      * We template the second parameter to ensure if StringData is called with 0 in the second
113      * parameter, the (ptr,len) constructor is chosen instead.
114      */
115     template <
116         typename InputIt,
117         typename = stdx::enable_if_t<std::is_same<StringData::const_iterator, InputIt>::value>>
StringData(InputIt begin,InputIt end)118     StringData(InputIt begin, InputIt end) {
119         invariant(begin && end);
120         _data = begin;
121         _size = std::distance(begin, end);
122     }
123 
124     /**
125      * Returns -1, 0, or 1 if 'this' is less, equal, or greater than 'other' in
126      * lexicographical order.
127      */
128     int compare(StringData other) const;
129 
130     /**
131      * note: this uses tolower, and therefore does not handle
132      *       come languages correctly.
133      *       should be use sparingly
134      */
135     bool equalCaseInsensitive(StringData other) const;
136 
137     void copyTo(char* dest, bool includeEndingNull) const;
138 
139     StringData substr(size_t pos, size_t n = std::numeric_limits<size_t>::max()) const;
140 
141     //
142     // finders
143     //
144 
145     size_t find(char c, size_t fromPos = 0) const;
146     size_t find(StringData needle) const;
147     size_t rfind(char c, size_t fromPos = std::string::npos) const;
148 
149     /**
150      * Returns true if 'prefix' is a substring of this instance, anchored at position 0.
151      */
152     bool startsWith(StringData prefix) const;
153 
154     /**
155      * Returns true if 'suffix' is a substring of this instance, anchored at the end.
156      */
157     bool endsWith(StringData suffix) const;
158 
159     //
160     // accessors
161     //
162 
163     /**
164      * Get the pointer to the first byte of StringData.  This is not guaranteed to be
165      * null-terminated, so if using this without checking size(), you are likely doing
166      * something wrong.
167      */
rawData()168     constexpr const char* rawData() const {
169         return _data;
170     }
171 
size()172     constexpr size_t size() const {
173         return _size;
174     }
empty()175     constexpr bool empty() const {
176         return size() == 0;
177     }
toString()178     std::string toString() const {
179         return std::string(_data, size());
180     }
181     constexpr char operator[](unsigned pos) const {
182         return _data[pos];
183     }
184 
185     //
186     // iterators
187     //
begin()188     constexpr const_iterator begin() const {
189         return rawData();
190     }
end()191     constexpr const_iterator end() const {
192         return rawData() + size();
193     }
194 
195 private:
196     const char* _data = nullptr;  // is not guaranted to be null terminated (see "notes" above)
197     size_t _size = 0;             // 'size' does not include the null terminator
198 };
199 
200 inline bool operator==(StringData lhs, StringData rhs) {
201     return (lhs.size() == rhs.size()) && (lhs.compare(rhs) == 0);
202 }
203 
204 inline bool operator!=(StringData lhs, StringData rhs) {
205     return !(lhs == rhs);
206 }
207 
208 inline bool operator<(StringData lhs, StringData rhs) {
209     return lhs.compare(rhs) < 0;
210 }
211 
212 inline bool operator<=(StringData lhs, StringData rhs) {
213     return lhs.compare(rhs) <= 0;
214 }
215 
216 inline bool operator>(StringData lhs, StringData rhs) {
217     return lhs.compare(rhs) > 0;
218 }
219 
220 inline bool operator>=(StringData lhs, StringData rhs) {
221     return lhs.compare(rhs) >= 0;
222 }
223 
224 std::ostream& operator<<(std::ostream& stream, StringData value);
225 
226 constexpr StringData operator"" _sd(const char* c, std::size_t len) {
227     return StringData(c, len, StringData::TrustedInitTag{});
228 }
229 
compare(StringData other)230 inline int StringData::compare(StringData other) const {
231     // It is illegal to pass nullptr to memcmp. It is an invariant of
232     // StringData that if _data is nullptr, _size is zero. If asked to
233     // compare zero bytes, memcmp returns zero (how could they
234     // differ?). So, if either StringData object has a nullptr _data
235     // object, then memcmp would return zero. Achieve this by assuming
236     // zero, and only calling memcmp if both pointers are valid.
237     int res = 0;
238     if (_data && other._data)
239         res = memcmp(_data, other._data, std::min(_size, other._size));
240 
241     if (res != 0)
242         return res > 0 ? 1 : -1;
243 
244     if (_size == other._size)
245         return 0;
246 
247     return _size > other._size ? 1 : -1;
248 }
249 
equalCaseInsensitive(StringData other)250 inline bool StringData::equalCaseInsensitive(StringData other) const {
251     if (other.size() != size())
252         return false;
253 
254     for (size_t x = 0; x < size(); x++) {
255         char a = _data[x];
256         char b = other._data[x];
257         if (a == b)
258             continue;
259         if (tolower(a) == tolower(b))
260             continue;
261         return false;
262     }
263 
264     return true;
265 }
266 
copyTo(char * dest,bool includeEndingNull)267 inline void StringData::copyTo(char* dest, bool includeEndingNull) const {
268     if (_data)
269         memcpy(dest, _data, size());
270     if (includeEndingNull)
271         dest[size()] = 0;
272 }
273 
find(char c,size_t fromPos)274 inline size_t StringData::find(char c, size_t fromPos) const {
275     if (fromPos >= size())
276         return std::string::npos;
277 
278     const void* x = memchr(_data + fromPos, c, _size - fromPos);
279     if (x == 0)
280         return std::string::npos;
281     return static_cast<size_t>(static_cast<const char*>(x) - _data);
282 }
283 
find(StringData needle)284 inline size_t StringData::find(StringData needle) const {
285     size_t mx = size();
286     size_t needleSize = needle.size();
287 
288     if (needleSize == 0)
289         return 0;
290     else if (needleSize > mx)
291         return std::string::npos;
292 
293     mx -= needleSize;
294 
295     for (size_t i = 0; i <= mx; i++) {
296         if (memcmp(_data + i, needle._data, needleSize) == 0)
297             return i;
298     }
299     return std::string::npos;
300 }
301 
rfind(char c,size_t fromPos)302 inline size_t StringData::rfind(char c, size_t fromPos) const {
303     const size_t sz = size();
304     if (fromPos > sz)
305         fromPos = sz;
306 
307     for (const char* cur = _data + fromPos; cur > _data; --cur) {
308         if (*(cur - 1) == c)
309             return (cur - _data) - 1;
310     }
311     return std::string::npos;
312 }
313 
substr(size_t pos,size_t n)314 inline StringData StringData::substr(size_t pos, size_t n) const {
315     if (pos > size())
316         throw std::out_of_range("out of range");
317 
318     // truncate to end of string
319     if (n > size() - pos)
320         n = size() - pos;
321 
322     return StringData(_data + pos, n);
323 }
324 
startsWith(StringData prefix)325 inline bool StringData::startsWith(StringData prefix) const {
326     // TODO: Investigate an optimized implementation.
327     return substr(0, prefix.size()) == prefix;
328 }
329 
endsWith(StringData suffix)330 inline bool StringData::endsWith(StringData suffix) const {
331     // TODO: Investigate an optimized implementation.
332     const size_t thisSize = size();
333     const size_t suffixSize = suffix.size();
334     if (suffixSize > thisSize)
335         return false;
336     return substr(thisSize - suffixSize) == suffix;
337 }
338 
339 inline std::string operator+(std::string lhs, StringData rhs) {
340     if (!rhs.empty())
341         lhs.append(rhs.rawData(), rhs.size());
342     return lhs;
343 }
344 
345 inline std::string operator+(StringData lhs, std::string rhs) {
346     if (!lhs.empty())
347         rhs.insert(0, lhs.rawData(), lhs.size());
348     return rhs;
349 }
350 
351 }  // namespace mongo
352