1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 // Copyright (C) 2009-2013, International Business Machines 4 // Corporation and others. All Rights Reserved. 5 // 6 // Copyright 2001 and onwards Google Inc. 7 // Author: Sanjay Ghemawat 8 9 // This code is a contribution of Google code, and the style used here is 10 // a compromise between the original Google code and the ICU coding guidelines. 11 // For example, data types are ICU-ified (size_t,int->int32_t), 12 // and API comments doxygen-ified, but function names and behavior are 13 // as in the original, if possible. 14 // Assertion-style error handling, not available in ICU, was changed to 15 // parameter "pinning" similar to UnicodeString. 16 // 17 // In addition, this is only a partial port of the original Google code, 18 // limited to what was needed so far. The (nearly) complete original code 19 // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib 20 // (see ICU ticket 6765, r25517). 21 22 #ifndef __STRINGPIECE_H__ 23 #define __STRINGPIECE_H__ 24 25 /** 26 * \file 27 * \brief C++ API: StringPiece: Read-only byte string wrapper class. 28 */ 29 30 #include "unicode/utypes.h" 31 32 #if U_SHOW_CPLUSPLUS_API 33 34 #include <cstddef> 35 #include <type_traits> 36 37 #include "unicode/uobject.h" 38 #include "unicode/std_string.h" 39 40 // Arghh! I wish C++ literals were "string". 41 42 U_NAMESPACE_BEGIN 43 44 /** 45 * A string-like object that points to a sized piece of memory. 46 * 47 * We provide non-explicit singleton constructors so users can pass 48 * in a "const char*" or a "string" wherever a "StringPiece" is 49 * expected. 50 * 51 * Functions or methods may use StringPiece parameters to accept either a 52 * "const char*" or a "string" value that will be implicitly converted to a 53 * StringPiece. 54 * 55 * Systematic usage of StringPiece is encouraged as it will reduce unnecessary 56 * conversions from "const char*" to "string" and back again. 57 * 58 * @stable ICU 4.2 59 */ 60 class U_COMMON_API StringPiece : public UMemory { 61 private: 62 const char* ptr_; 63 int32_t length_; 64 65 public: 66 /** 67 * Default constructor, creates an empty StringPiece. 68 * @stable ICU 4.2 69 */ StringPiece()70 StringPiece() : ptr_(nullptr), length_(0) { } 71 72 /** 73 * Constructs from a NUL-terminated const char * pointer. 74 * @param str a NUL-terminated const char * pointer 75 * @stable ICU 4.2 76 */ 77 StringPiece(const char* str); 78 #ifndef U_HIDE_DRAFT_API 79 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 80 /** 81 * Constructs from a NUL-terminated const char8_t * pointer. 82 * @param str a NUL-terminated const char8_t * pointer 83 * @draft ICU 67 84 */ StringPiece(const char8_t * str)85 StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {} 86 #endif 87 /** 88 * Constructs an empty StringPiece. 89 * Needed for type disambiguation from multiple other overloads. 90 * @param p nullptr 91 * @draft ICU 67 92 */ StringPiece(std::nullptr_t p)93 StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {} 94 #endif // U_HIDE_DRAFT_API 95 96 /** 97 * Constructs from a std::string. 98 * @stable ICU 4.2 99 */ StringPiece(const std::string & str)100 StringPiece(const std::string& str) 101 : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } 102 #ifndef U_HIDE_DRAFT_API 103 #if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN) 104 /** 105 * Constructs from a std::u8string. 106 * @draft ICU 67 107 */ StringPiece(const std::u8string & str)108 StringPiece(const std::u8string& str) 109 : ptr_(reinterpret_cast<const char*>(str.data())), 110 length_(static_cast<int32_t>(str.size())) { } 111 #endif 112 #endif // U_HIDE_DRAFT_API 113 114 #ifndef U_HIDE_DRAFT_API 115 /** 116 * Constructs from some other implementation of a string piece class, from any 117 * C++ record type that has these two methods: 118 * 119 * \code{.cpp} 120 * 121 * struct OtherStringPieceClass { 122 * const char* data(); // or const char8_t* 123 * size_t size(); 124 * }; 125 * 126 * \endcode 127 * 128 * The other string piece class will typically be std::string_view from C++17 129 * or absl::string_view from Abseil. 130 * 131 * Starting with C++20, data() may also return a const char8_t* pointer, 132 * as from std::u8string_view. 133 * 134 * @param str the other string piece 135 * @draft ICU 65 136 */ 137 template <typename T, 138 typename = typename std::enable_if< 139 (std::is_same<decltype(T().data()), const char*>::value 140 #if defined(__cpp_char8_t) 141 || std::is_same<decltype(T().data()), const char8_t*>::value 142 #endif 143 ) && 144 std::is_same<decltype(T().size()), size_t>::value>::type> StringPiece(T str)145 StringPiece(T str) 146 : ptr_(reinterpret_cast<const char*>(str.data())), 147 length_(static_cast<int32_t>(str.size())) {} 148 #endif // U_HIDE_DRAFT_API 149 150 /** 151 * Constructs from a const char * pointer and a specified length. 152 * @param offset a const char * pointer (need not be terminated) 153 * @param len the length of the string; must be non-negative 154 * @stable ICU 4.2 155 */ StringPiece(const char * offset,int32_t len)156 StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } 157 #ifndef U_HIDE_DRAFT_API 158 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 159 /** 160 * Constructs from a const char8_t * pointer and a specified length. 161 * @param str a const char8_t * pointer (need not be terminated) 162 * @param len the length of the string; must be non-negative 163 * @draft ICU 67 164 */ StringPiece(const char8_t * str,int32_t len)165 StringPiece(const char8_t* str, int32_t len) : 166 StringPiece(reinterpret_cast<const char*>(str), len) {} 167 #endif 168 #endif // U_HIDE_DRAFT_API 169 170 /** 171 * Substring of another StringPiece. 172 * @param x the other StringPiece 173 * @param pos start position in x; must be non-negative and <= x.length(). 174 * @stable ICU 4.2 175 */ 176 StringPiece(const StringPiece& x, int32_t pos); 177 /** 178 * Substring of another StringPiece. 179 * @param x the other StringPiece 180 * @param pos start position in x; must be non-negative and <= x.length(). 181 * @param len length of the substring; 182 * must be non-negative and will be pinned to at most x.length() - pos. 183 * @stable ICU 4.2 184 */ 185 StringPiece(const StringPiece& x, int32_t pos, int32_t len); 186 187 /** 188 * Returns the string pointer. May be nullptr if it is empty. 189 * 190 * data() may return a pointer to a buffer with embedded NULs, and the 191 * returned buffer may or may not be null terminated. Therefore it is 192 * typically a mistake to pass data() to a routine that expects a NUL 193 * terminated string. 194 * @return the string pointer 195 * @stable ICU 4.2 196 */ data()197 const char* data() const { return ptr_; } 198 /** 199 * Returns the string length. Same as length(). 200 * @return the string length 201 * @stable ICU 4.2 202 */ size()203 int32_t size() const { return length_; } 204 /** 205 * Returns the string length. Same as size(). 206 * @return the string length 207 * @stable ICU 4.2 208 */ length()209 int32_t length() const { return length_; } 210 /** 211 * Returns whether the string is empty. 212 * @return TRUE if the string is empty 213 * @stable ICU 4.2 214 */ empty()215 UBool empty() const { return length_ == 0; } 216 217 /** 218 * Sets to an empty string. 219 * @stable ICU 4.2 220 */ clear()221 void clear() { ptr_ = nullptr; length_ = 0; } 222 223 /** 224 * Reset the stringpiece to refer to new data. 225 * @param xdata pointer the new string data. Need not be nul terminated. 226 * @param len the length of the new data 227 * @stable ICU 4.8 228 */ set(const char * xdata,int32_t len)229 void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } 230 231 /** 232 * Reset the stringpiece to refer to new data. 233 * @param str a pointer to a NUL-terminated string. 234 * @stable ICU 4.8 235 */ 236 void set(const char* str); 237 238 #ifndef U_HIDE_DRAFT_API 239 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) 240 /** 241 * Resets the stringpiece to refer to new data. 242 * @param xdata pointer the new string data. Need not be NUL-terminated. 243 * @param len the length of the new data 244 * @draft ICU 67 245 */ set(const char8_t * xdata,int32_t len)246 inline void set(const char8_t* xdata, int32_t len) { 247 set(reinterpret_cast<const char*>(xdata), len); 248 } 249 250 /** 251 * Resets the stringpiece to refer to new data. 252 * @param str a pointer to a NUL-terminated string. 253 * @draft ICU 67 254 */ set(const char8_t * str)255 inline void set(const char8_t* str) { 256 set(reinterpret_cast<const char*>(str)); 257 } 258 #endif 259 #endif // U_HIDE_DRAFT_API 260 261 /** 262 * Removes the first n string units. 263 * @param n prefix length, must be non-negative and <=length() 264 * @stable ICU 4.2 265 */ remove_prefix(int32_t n)266 void remove_prefix(int32_t n) { 267 if (n >= 0) { 268 if (n > length_) { 269 n = length_; 270 } 271 ptr_ += n; 272 length_ -= n; 273 } 274 } 275 276 /** 277 * Removes the last n string units. 278 * @param n suffix length, must be non-negative and <=length() 279 * @stable ICU 4.2 280 */ remove_suffix(int32_t n)281 void remove_suffix(int32_t n) { 282 if (n >= 0) { 283 if (n <= length_) { 284 length_ -= n; 285 } else { 286 length_ = 0; 287 } 288 } 289 } 290 291 #ifndef U_HIDE_DRAFT_API 292 /** 293 * Searches the StringPiece for the given search string (needle); 294 * @param needle The string for which to search. 295 * @param offset Where to start searching within this string (haystack). 296 * @return The offset of needle in haystack, or -1 if not found. 297 * @draft ICU 67 298 */ 299 int32_t find(StringPiece needle, int32_t offset); 300 301 /** 302 * Compares this StringPiece with the other StringPiece, with semantics 303 * similar to std::string::compare(). 304 * @param other The string to compare to. 305 * @return below zero if this < other; above zero if this > other; 0 if this == other. 306 * @draft ICU 67 307 */ 308 int32_t compare(StringPiece other); 309 #endif // U_HIDE_DRAFT_API 310 311 /** 312 * Maximum integer, used as a default value for substring methods. 313 * @stable ICU 4.2 314 */ 315 static const int32_t npos; // = 0x7fffffff; 316 317 /** 318 * Returns a substring of this StringPiece. 319 * @param pos start position; must be non-negative and <= length(). 320 * @param len length of the substring; 321 * must be non-negative and will be pinned to at most length() - pos. 322 * @return the substring StringPiece 323 * @stable ICU 4.2 324 */ 325 StringPiece substr(int32_t pos, int32_t len = npos) const { 326 return StringPiece(*this, pos, len); 327 } 328 }; 329 330 /** 331 * Global operator == for StringPiece 332 * @param x The first StringPiece to compare. 333 * @param y The second StringPiece to compare. 334 * @return TRUE if the string data is equal 335 * @stable ICU 4.8 336 */ 337 U_EXPORT UBool U_EXPORT2 338 operator==(const StringPiece& x, const StringPiece& y); 339 340 /** 341 * Global operator != for StringPiece 342 * @param x The first StringPiece to compare. 343 * @param y The second StringPiece to compare. 344 * @return TRUE if the string data is not equal 345 * @stable ICU 4.8 346 */ 347 inline UBool operator!=(const StringPiece& x, const StringPiece& y) { 348 return !(x == y); 349 } 350 351 U_NAMESPACE_END 352 353 #endif /* U_SHOW_CPLUSPLUS_API */ 354 355 #endif // __STRINGPIECE_H__ 356