1 //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_ADT_TWINE_H 10 #define LLVM_ADT_TWINE_H 11 12 #include "llvm/ADT/SmallVector.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/ErrorHandling.h" 15 #include <cassert> 16 #include <cstdint> 17 #include <string> 18 19 namespace llvm { 20 21 class formatv_object_base; 22 class raw_ostream; 23 24 /// Twine - A lightweight data structure for efficiently representing the 25 /// concatenation of temporary values as strings. 26 /// 27 /// A Twine is a kind of rope, it represents a concatenated string using a 28 /// binary-tree, where the string is the preorder of the nodes. Since the 29 /// Twine can be efficiently rendered into a buffer when its result is used, 30 /// it avoids the cost of generating temporary values for intermediate string 31 /// results -- particularly in cases when the Twine result is never 32 /// required. By explicitly tracking the type of leaf nodes, we can also avoid 33 /// the creation of temporary strings for conversions operations (such as 34 /// appending an integer to a string). 35 /// 36 /// A Twine is not intended for use directly and should not be stored, its 37 /// implementation relies on the ability to store pointers to temporary stack 38 /// objects which may be deallocated at the end of a statement. Twines should 39 /// only be used accepted as const references in arguments, when an API wishes 40 /// to accept possibly-concatenated strings. 41 /// 42 /// Twines support a special 'null' value, which always concatenates to form 43 /// itself, and renders as an empty string. This can be returned from APIs to 44 /// effectively nullify any concatenations performed on the result. 45 /// 46 /// \b Implementation 47 /// 48 /// Given the nature of a Twine, it is not possible for the Twine's 49 /// concatenation method to construct interior nodes; the result must be 50 /// represented inside the returned value. For this reason a Twine object 51 /// actually holds two values, the left- and right-hand sides of a 52 /// concatenation. We also have nullary Twine objects, which are effectively 53 /// sentinel values that represent empty strings. 54 /// 55 /// Thus, a Twine can effectively have zero, one, or two children. The \see 56 /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for 57 /// testing the number of children. 58 /// 59 /// We maintain a number of invariants on Twine objects (FIXME: Why): 60 /// - Nullary twines are always represented with their Kind on the left-hand 61 /// side, and the Empty kind on the right-hand side. 62 /// - Unary twines are always represented with the value on the left-hand 63 /// side, and the Empty kind on the right-hand side. 64 /// - If a Twine has another Twine as a child, that child should always be 65 /// binary (otherwise it could have been folded into the parent). 66 /// 67 /// These invariants are check by \see isValid(). 68 /// 69 /// \b Efficiency Considerations 70 /// 71 /// The Twine is designed to yield efficient and small code for common 72 /// situations. For this reason, the concat() method is inlined so that 73 /// concatenations of leaf nodes can be optimized into stores directly into a 74 /// single stack allocated object. 75 /// 76 /// In practice, not all compilers can be trusted to optimize concat() fully, 77 /// so we provide two additional methods (and accompanying operator+ 78 /// overloads) to guarantee that particularly important cases (cstring plus 79 /// StringRef) codegen as desired. 80 class Twine { 81 /// NodeKind - Represent the type of an argument. 82 enum NodeKind : unsigned char { 83 /// An empty string; the result of concatenating anything with it is also 84 /// empty. 85 NullKind, 86 87 /// The empty string. 88 EmptyKind, 89 90 /// A pointer to a Twine instance. 91 TwineKind, 92 93 /// A pointer to a C string instance. 94 CStringKind, 95 96 /// A pointer to an std::string instance. 97 StdStringKind, 98 99 /// A pointer to a StringRef instance. 100 StringRefKind, 101 102 /// A pointer to a SmallString instance. 103 SmallStringKind, 104 105 /// A pointer to a formatv_object_base instance. 106 FormatvObjectKind, 107 108 /// A char value, to render as a character. 109 CharKind, 110 111 /// An unsigned int value, to render as an unsigned decimal integer. 112 DecUIKind, 113 114 /// An int value, to render as a signed decimal integer. 115 DecIKind, 116 117 /// A pointer to an unsigned long value, to render as an unsigned decimal 118 /// integer. 119 DecULKind, 120 121 /// A pointer to a long value, to render as a signed decimal integer. 122 DecLKind, 123 124 /// A pointer to an unsigned long long value, to render as an unsigned 125 /// decimal integer. 126 DecULLKind, 127 128 /// A pointer to a long long value, to render as a signed decimal integer. 129 DecLLKind, 130 131 /// A pointer to a uint64_t value, to render as an unsigned hexadecimal 132 /// integer. 133 UHexKind 134 }; 135 136 union Child 137 { 138 const Twine *twine; 139 const char *cString; 140 const std::string *stdString; 141 const StringRef *stringRef; 142 const SmallVectorImpl<char> *smallString; 143 const formatv_object_base *formatvObject; 144 char character; 145 unsigned int decUI; 146 int decI; 147 const unsigned long *decUL; 148 const long *decL; 149 const unsigned long long *decULL; 150 const long long *decLL; 151 const uint64_t *uHex; 152 }; 153 154 /// LHS - The prefix in the concatenation, which may be uninitialized for 155 /// Null or Empty kinds. 156 Child LHS; 157 158 /// RHS - The suffix in the concatenation, which may be uninitialized for 159 /// Null or Empty kinds. 160 Child RHS; 161 162 /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). 163 NodeKind LHSKind = EmptyKind; 164 165 /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). 166 NodeKind RHSKind = EmptyKind; 167 168 /// Construct a nullary twine; the kind must be NullKind or EmptyKind. Twine(NodeKind Kind)169 explicit Twine(NodeKind Kind) : LHSKind(Kind) { 170 assert(isNullary() && "Invalid kind!"); 171 } 172 173 /// Construct a binary twine. Twine(const Twine & LHS,const Twine & RHS)174 explicit Twine(const Twine &LHS, const Twine &RHS) 175 : LHSKind(TwineKind), RHSKind(TwineKind) { 176 this->LHS.twine = &LHS; 177 this->RHS.twine = &RHS; 178 assert(isValid() && "Invalid twine!"); 179 } 180 181 /// Construct a twine from explicit values. Twine(Child LHS,NodeKind LHSKind,Child RHS,NodeKind RHSKind)182 explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind) 183 : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) { 184 assert(isValid() && "Invalid twine!"); 185 } 186 187 /// Check for the null twine. isNull()188 bool isNull() const { 189 return getLHSKind() == NullKind; 190 } 191 192 /// Check for the empty twine. isEmpty()193 bool isEmpty() const { 194 return getLHSKind() == EmptyKind; 195 } 196 197 /// Check if this is a nullary twine (null or empty). isNullary()198 bool isNullary() const { 199 return isNull() || isEmpty(); 200 } 201 202 /// Check if this is a unary twine. isUnary()203 bool isUnary() const { 204 return getRHSKind() == EmptyKind && !isNullary(); 205 } 206 207 /// Check if this is a binary twine. isBinary()208 bool isBinary() const { 209 return getLHSKind() != NullKind && getRHSKind() != EmptyKind; 210 } 211 212 /// Check if this is a valid twine (satisfying the invariants on 213 /// order and number of arguments). isValid()214 bool isValid() const { 215 // Nullary twines always have Empty on the RHS. 216 if (isNullary() && getRHSKind() != EmptyKind) 217 return false; 218 219 // Null should never appear on the RHS. 220 if (getRHSKind() == NullKind) 221 return false; 222 223 // The RHS cannot be non-empty if the LHS is empty. 224 if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) 225 return false; 226 227 // A twine child should always be binary. 228 if (getLHSKind() == TwineKind && 229 !LHS.twine->isBinary()) 230 return false; 231 if (getRHSKind() == TwineKind && 232 !RHS.twine->isBinary()) 233 return false; 234 235 return true; 236 } 237 238 /// Get the NodeKind of the left-hand side. getLHSKind()239 NodeKind getLHSKind() const { return LHSKind; } 240 241 /// Get the NodeKind of the right-hand side. getRHSKind()242 NodeKind getRHSKind() const { return RHSKind; } 243 244 /// Print one child from a twine. 245 void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; 246 247 /// Print the representation of one child from a twine. 248 void printOneChildRepr(raw_ostream &OS, Child Ptr, 249 NodeKind Kind) const; 250 251 public: 252 /// @name Constructors 253 /// @{ 254 255 /// Construct from an empty string. Twine()256 /*implicit*/ Twine() { 257 assert(isValid() && "Invalid twine!"); 258 } 259 260 Twine(const Twine &) = default; 261 262 /// Construct from a C string. 263 /// 264 /// We take care here to optimize "" into the empty twine -- this will be 265 /// optimized out for string constants. This allows Twine arguments have 266 /// default "" values, without introducing unnecessary string constants. Twine(const char * Str)267 /*implicit*/ Twine(const char *Str) { 268 if (Str[0] != '\0') { 269 LHS.cString = Str; 270 LHSKind = CStringKind; 271 } else 272 LHSKind = EmptyKind; 273 274 assert(isValid() && "Invalid twine!"); 275 } 276 /// Delete the implicit conversion from nullptr as Twine(const char *) 277 /// cannot take nullptr. 278 /*implicit*/ Twine(std::nullptr_t) = delete; 279 280 /// Construct from an std::string. Twine(const std::string & Str)281 /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) { 282 LHS.stdString = &Str; 283 assert(isValid() && "Invalid twine!"); 284 } 285 286 /// Construct from a StringRef. Twine(const StringRef & Str)287 /*implicit*/ Twine(const StringRef &Str) : LHSKind(StringRefKind) { 288 LHS.stringRef = &Str; 289 assert(isValid() && "Invalid twine!"); 290 } 291 292 /// Construct from a SmallString. Twine(const SmallVectorImpl<char> & Str)293 /*implicit*/ Twine(const SmallVectorImpl<char> &Str) 294 : LHSKind(SmallStringKind) { 295 LHS.smallString = &Str; 296 assert(isValid() && "Invalid twine!"); 297 } 298 299 /// Construct from a formatv_object_base. Twine(const formatv_object_base & Fmt)300 /*implicit*/ Twine(const formatv_object_base &Fmt) 301 : LHSKind(FormatvObjectKind) { 302 LHS.formatvObject = &Fmt; 303 assert(isValid() && "Invalid twine!"); 304 } 305 306 /// Construct from a char. Twine(char Val)307 explicit Twine(char Val) : LHSKind(CharKind) { 308 LHS.character = Val; 309 } 310 311 /// Construct from a signed char. Twine(signed char Val)312 explicit Twine(signed char Val) : LHSKind(CharKind) { 313 LHS.character = static_cast<char>(Val); 314 } 315 316 /// Construct from an unsigned char. Twine(unsigned char Val)317 explicit Twine(unsigned char Val) : LHSKind(CharKind) { 318 LHS.character = static_cast<char>(Val); 319 } 320 321 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(unsigned Val)322 explicit Twine(unsigned Val) : LHSKind(DecUIKind) { 323 LHS.decUI = Val; 324 } 325 326 /// Construct a twine to print \p Val as a signed decimal integer. Twine(int Val)327 explicit Twine(int Val) : LHSKind(DecIKind) { 328 LHS.decI = Val; 329 } 330 331 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(const unsigned long & Val)332 explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) { 333 LHS.decUL = &Val; 334 } 335 336 /// Construct a twine to print \p Val as a signed decimal integer. Twine(const long & Val)337 explicit Twine(const long &Val) : LHSKind(DecLKind) { 338 LHS.decL = &Val; 339 } 340 341 /// Construct a twine to print \p Val as an unsigned decimal integer. Twine(const unsigned long long & Val)342 explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) { 343 LHS.decULL = &Val; 344 } 345 346 /// Construct a twine to print \p Val as a signed decimal integer. Twine(const long long & Val)347 explicit Twine(const long long &Val) : LHSKind(DecLLKind) { 348 LHS.decLL = &Val; 349 } 350 351 // FIXME: Unfortunately, to make sure this is as efficient as possible we 352 // need extra binary constructors from particular types. We can't rely on 353 // the compiler to be smart enough to fold operator+()/concat() down to the 354 // right thing. Yet. 355 356 /// Construct as the concatenation of a C string and a StringRef. Twine(const char * LHS,const StringRef & RHS)357 /*implicit*/ Twine(const char *LHS, const StringRef &RHS) 358 : LHSKind(CStringKind), RHSKind(StringRefKind) { 359 this->LHS.cString = LHS; 360 this->RHS.stringRef = &RHS; 361 assert(isValid() && "Invalid twine!"); 362 } 363 364 /// Construct as the concatenation of a StringRef and a C string. Twine(const StringRef & LHS,const char * RHS)365 /*implicit*/ Twine(const StringRef &LHS, const char *RHS) 366 : LHSKind(StringRefKind), RHSKind(CStringKind) { 367 this->LHS.stringRef = &LHS; 368 this->RHS.cString = RHS; 369 assert(isValid() && "Invalid twine!"); 370 } 371 372 /// Since the intended use of twines is as temporary objects, assignments 373 /// when concatenating might cause undefined behavior or stack corruptions 374 Twine &operator=(const Twine &) = delete; 375 376 /// Create a 'null' string, which is an empty string that always 377 /// concatenates to form another empty string. createNull()378 static Twine createNull() { 379 return Twine(NullKind); 380 } 381 382 /// @} 383 /// @name Numeric Conversions 384 /// @{ 385 386 // Construct a twine to print \p Val as an unsigned hexadecimal integer. utohexstr(const uint64_t & Val)387 static Twine utohexstr(const uint64_t &Val) { 388 Child LHS, RHS; 389 LHS.uHex = &Val; 390 RHS.twine = nullptr; 391 return Twine(LHS, UHexKind, RHS, EmptyKind); 392 } 393 394 /// @} 395 /// @name Predicate Operations 396 /// @{ 397 398 /// Check if this twine is trivially empty; a false return value does not 399 /// necessarily mean the twine is empty. isTriviallyEmpty()400 bool isTriviallyEmpty() const { 401 return isNullary(); 402 } 403 404 /// Return true if this twine can be dynamically accessed as a single 405 /// StringRef value with getSingleStringRef(). isSingleStringRef()406 bool isSingleStringRef() const { 407 if (getRHSKind() != EmptyKind) return false; 408 409 switch (getLHSKind()) { 410 case EmptyKind: 411 case CStringKind: 412 case StdStringKind: 413 case StringRefKind: 414 case SmallStringKind: 415 return true; 416 default: 417 return false; 418 } 419 } 420 421 /// @} 422 /// @name String Operations 423 /// @{ 424 425 Twine concat(const Twine &Suffix) const; 426 427 /// @} 428 /// @name Output & Conversion. 429 /// @{ 430 431 /// Return the twine contents as a std::string. 432 std::string str() const; 433 434 /// Append the concatenated string into the given SmallString or SmallVector. 435 void toVector(SmallVectorImpl<char> &Out) const; 436 437 /// This returns the twine as a single StringRef. This method is only valid 438 /// if isSingleStringRef() is true. getSingleStringRef()439 StringRef getSingleStringRef() const { 440 assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); 441 switch (getLHSKind()) { 442 default: llvm_unreachable("Out of sync with isSingleStringRef"); 443 case EmptyKind: return StringRef(); 444 case CStringKind: return StringRef(LHS.cString); 445 case StdStringKind: return StringRef(*LHS.stdString); 446 case StringRefKind: return *LHS.stringRef; 447 case SmallStringKind: 448 return StringRef(LHS.smallString->data(), LHS.smallString->size()); 449 } 450 } 451 452 /// This returns the twine as a single StringRef if it can be 453 /// represented as such. Otherwise the twine is written into the given 454 /// SmallVector and a StringRef to the SmallVector's data is returned. toStringRef(SmallVectorImpl<char> & Out)455 StringRef toStringRef(SmallVectorImpl<char> &Out) const { 456 if (isSingleStringRef()) 457 return getSingleStringRef(); 458 toVector(Out); 459 return StringRef(Out.data(), Out.size()); 460 } 461 462 /// This returns the twine as a single null terminated StringRef if it 463 /// can be represented as such. Otherwise the twine is written into the 464 /// given SmallVector and a StringRef to the SmallVector's data is returned. 465 /// 466 /// The returned StringRef's size does not include the null terminator. 467 StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; 468 469 /// Write the concatenated string represented by this twine to the 470 /// stream \p OS. 471 void print(raw_ostream &OS) const; 472 473 /// Dump the concatenated string represented by this twine to stderr. 474 void dump() const; 475 476 /// Write the representation of this twine to the stream \p OS. 477 void printRepr(raw_ostream &OS) const; 478 479 /// Dump the representation of this twine to stderr. 480 void dumpRepr() const; 481 482 /// @} 483 }; 484 485 /// @name Twine Inline Implementations 486 /// @{ 487 concat(const Twine & Suffix)488 inline Twine Twine::concat(const Twine &Suffix) const { 489 // Concatenation with null is null. 490 if (isNull() || Suffix.isNull()) 491 return Twine(NullKind); 492 493 // Concatenation with empty yields the other side. 494 if (isEmpty()) 495 return Suffix; 496 if (Suffix.isEmpty()) 497 return *this; 498 499 // Otherwise we need to create a new node, taking care to fold in unary 500 // twines. 501 Child NewLHS, NewRHS; 502 NewLHS.twine = this; 503 NewRHS.twine = &Suffix; 504 NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; 505 if (isUnary()) { 506 NewLHS = LHS; 507 NewLHSKind = getLHSKind(); 508 } 509 if (Suffix.isUnary()) { 510 NewRHS = Suffix.LHS; 511 NewRHSKind = Suffix.getLHSKind(); 512 } 513 514 return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); 515 } 516 517 inline Twine operator+(const Twine &LHS, const Twine &RHS) { 518 return LHS.concat(RHS); 519 } 520 521 /// Additional overload to guarantee simplified codegen; this is equivalent to 522 /// concat(). 523 524 inline Twine operator+(const char *LHS, const StringRef &RHS) { 525 return Twine(LHS, RHS); 526 } 527 528 /// Additional overload to guarantee simplified codegen; this is equivalent to 529 /// concat(). 530 531 inline Twine operator+(const StringRef &LHS, const char *RHS) { 532 return Twine(LHS, RHS); 533 } 534 535 inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { 536 RHS.print(OS); 537 return OS; 538 } 539 540 /// @} 541 542 } // end namespace llvm 543 544 #endif // LLVM_ADT_TWINE_H 545