1 //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_ADT_TWINE_H
10 #define LLVM_ADT_TWINE_H
11 
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/ErrorHandling.h"
15 #include <cassert>
16 #include <cstdint>
17 #include <string>
18 #include <string_view>
19 
20 namespace llvm {
21 
22   class formatv_object_base;
23   class raw_ostream;
24 
25   /// Twine - A lightweight data structure for efficiently representing the
26   /// concatenation of temporary values as strings.
27   ///
28   /// A Twine is a kind of rope, it represents a concatenated string using a
29   /// binary-tree, where the string is the preorder of the nodes. Since the
30   /// Twine can be efficiently rendered into a buffer when its result is used,
31   /// it avoids the cost of generating temporary values for intermediate string
32   /// results -- particularly in cases when the Twine result is never
33   /// required. By explicitly tracking the type of leaf nodes, we can also avoid
34   /// the creation of temporary strings for conversions operations (such as
35   /// appending an integer to a string).
36   ///
37   /// A Twine is not intended for use directly and should not be stored, its
38   /// implementation relies on the ability to store pointers to temporary stack
39   /// objects which may be deallocated at the end of a statement. Twines should
40   /// only be used as const references in arguments, when an API wishes
41   /// to accept possibly-concatenated strings.
42   ///
43   /// Twines support a special 'null' value, which always concatenates to form
44   /// itself, and renders as an empty string. This can be returned from APIs to
45   /// effectively nullify any concatenations performed on the result.
46   ///
47   /// \b Implementation
48   ///
49   /// Given the nature of a Twine, it is not possible for the Twine's
50   /// concatenation method to construct interior nodes; the result must be
51   /// represented inside the returned value. For this reason a Twine object
52   /// actually holds two values, the left- and right-hand sides of a
53   /// concatenation. We also have nullary Twine objects, which are effectively
54   /// sentinel values that represent empty strings.
55   ///
56   /// Thus, a Twine can effectively have zero, one, or two children. The \see
57   /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
58   /// testing the number of children.
59   ///
60   /// We maintain a number of invariants on Twine objects (FIXME: Why):
61   ///  - Nullary twines are always represented with their Kind on the left-hand
62   ///    side, and the Empty kind on the right-hand side.
63   ///  - Unary twines are always represented with the value on the left-hand
64   ///    side, and the Empty kind on the right-hand side.
65   ///  - If a Twine has another Twine as a child, that child should always be
66   ///    binary (otherwise it could have been folded into the parent).
67   ///
68   /// These invariants are check by \see isValid().
69   ///
70   /// \b Efficiency Considerations
71   ///
72   /// The Twine is designed to yield efficient and small code for common
73   /// situations. For this reason, the concat() method is inlined so that
74   /// concatenations of leaf nodes can be optimized into stores directly into a
75   /// single stack allocated object.
76   ///
77   /// In practice, not all compilers can be trusted to optimize concat() fully,
78   /// so we provide two additional methods (and accompanying operator+
79   /// overloads) to guarantee that particularly important cases (cstring plus
80   /// StringRef) codegen as desired.
81   class Twine {
82     /// NodeKind - Represent the type of an argument.
83     enum NodeKind : unsigned char {
84       /// An empty string; the result of concatenating anything with it is also
85       /// empty.
86       NullKind,
87 
88       /// The empty string.
89       EmptyKind,
90 
91       /// A pointer to a Twine instance.
92       TwineKind,
93 
94       /// A pointer to a C string instance.
95       CStringKind,
96 
97       /// A pointer to an std::string instance.
98       StdStringKind,
99 
100       /// A Pointer and Length representation. Used for std::string_view,
101       /// StringRef, and SmallString.  Can't use a StringRef here
102       /// because they are not trivally constructible.
103       PtrAndLengthKind,
104 
105       /// A pointer and length representation that's also null-terminated.
106       /// Guaranteed to be constructed from a compile-time string literal.
107       StringLiteralKind,
108 
109       /// A pointer to a formatv_object_base instance.
110       FormatvObjectKind,
111 
112       /// A char value, to render as a character.
113       CharKind,
114 
115       /// An unsigned int value, to render as an unsigned decimal integer.
116       DecUIKind,
117 
118       /// An int value, to render as a signed decimal integer.
119       DecIKind,
120 
121       /// A pointer to an unsigned long value, to render as an unsigned decimal
122       /// integer.
123       DecULKind,
124 
125       /// A pointer to a long value, to render as a signed decimal integer.
126       DecLKind,
127 
128       /// A pointer to an unsigned long long value, to render as an unsigned
129       /// decimal integer.
130       DecULLKind,
131 
132       /// A pointer to a long long value, to render as a signed decimal integer.
133       DecLLKind,
134 
135       /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
136       /// integer.
137       UHexKind
138     };
139 
140     union Child
141     {
142       const Twine *twine;
143       const char *cString;
144       const std::string *stdString;
145       struct {
146         const char *ptr;
147         size_t length;
148       } ptrAndLength;
149       const formatv_object_base *formatvObject;
150       char character;
151       unsigned int decUI;
152       int decI;
153       const unsigned long *decUL;
154       const long *decL;
155       const unsigned long long *decULL;
156       const long long *decLL;
157       const uint64_t *uHex;
158     };
159 
160     /// LHS - The prefix in the concatenation, which may be uninitialized for
161     /// Null or Empty kinds.
162     Child LHS;
163 
164     /// RHS - The suffix in the concatenation, which may be uninitialized for
165     /// Null or Empty kinds.
166     Child RHS;
167 
168     /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
169     NodeKind LHSKind = EmptyKind;
170 
171     /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
172     NodeKind RHSKind = EmptyKind;
173 
174     /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
Twine(NodeKind Kind)175     explicit Twine(NodeKind Kind) : LHSKind(Kind) {
176       assert(isNullary() && "Invalid kind!");
177     }
178 
179     /// Construct a binary twine.
Twine(const Twine & LHS,const Twine & RHS)180     explicit Twine(const Twine &LHS, const Twine &RHS)
181         : LHSKind(TwineKind), RHSKind(TwineKind) {
182       this->LHS.twine = &LHS;
183       this->RHS.twine = &RHS;
184       assert(isValid() && "Invalid twine!");
185     }
186 
187     /// Construct a twine from explicit values.
Twine(Child LHS,NodeKind LHSKind,Child RHS,NodeKind RHSKind)188     explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
189         : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
190       assert(isValid() && "Invalid twine!");
191     }
192 
193     /// Check for the null twine.
isNull()194     bool isNull() const {
195       return getLHSKind() == NullKind;
196     }
197 
198     /// Check for the empty twine.
isEmpty()199     bool isEmpty() const {
200       return getLHSKind() == EmptyKind;
201     }
202 
203     /// Check if this is a nullary twine (null or empty).
isNullary()204     bool isNullary() const {
205       return isNull() || isEmpty();
206     }
207 
208     /// Check if this is a unary twine.
isUnary()209     bool isUnary() const {
210       return getRHSKind() == EmptyKind && !isNullary();
211     }
212 
213     /// Check if this is a binary twine.
isBinary()214     bool isBinary() const {
215       return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
216     }
217 
218     /// Check if this is a valid twine (satisfying the invariants on
219     /// order and number of arguments).
isValid()220     bool isValid() const {
221       // Nullary twines always have Empty on the RHS.
222       if (isNullary() && getRHSKind() != EmptyKind)
223         return false;
224 
225       // Null should never appear on the RHS.
226       if (getRHSKind() == NullKind)
227         return false;
228 
229       // The RHS cannot be non-empty if the LHS is empty.
230       if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
231         return false;
232 
233       // A twine child should always be binary.
234       if (getLHSKind() == TwineKind &&
235           !LHS.twine->isBinary())
236         return false;
237       if (getRHSKind() == TwineKind &&
238           !RHS.twine->isBinary())
239         return false;
240 
241       return true;
242     }
243 
244     /// Get the NodeKind of the left-hand side.
getLHSKind()245     NodeKind getLHSKind() const { return LHSKind; }
246 
247     /// Get the NodeKind of the right-hand side.
getRHSKind()248     NodeKind getRHSKind() const { return RHSKind; }
249 
250     /// Print one child from a twine.
251     void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
252 
253     /// Print the representation of one child from a twine.
254     void printOneChildRepr(raw_ostream &OS, Child Ptr,
255                            NodeKind Kind) const;
256 
257   public:
258     /// @name Constructors
259     /// @{
260 
261     /// Construct from an empty string.
Twine()262     /*implicit*/ Twine() {
263       assert(isValid() && "Invalid twine!");
264     }
265 
266     Twine(const Twine &) = default;
267 
268     /// Construct from a C string.
269     ///
270     /// We take care here to optimize "" into the empty twine -- this will be
271     /// optimized out for string constants. This allows Twine arguments have
272     /// default "" values, without introducing unnecessary string constants.
Twine(const char * Str)273     /*implicit*/ Twine(const char *Str) {
274       if (Str[0] != '\0') {
275         LHS.cString = Str;
276         LHSKind = CStringKind;
277       } else
278         LHSKind = EmptyKind;
279 
280       assert(isValid() && "Invalid twine!");
281     }
282     /// Delete the implicit conversion from nullptr as Twine(const char *)
283     /// cannot take nullptr.
284     /*implicit*/ Twine(std::nullptr_t) = delete;
285 
286     /// Construct from an std::string.
Twine(const std::string & Str)287     /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) {
288       LHS.stdString = &Str;
289       assert(isValid() && "Invalid twine!");
290     }
291 
292     /// Construct from an std::string_view by converting it to a pointer and
293     /// length.  This handles string_views on a pure API basis, and avoids
294     /// storing one (or a pointer to one) inside a Twine, which avoids problems
295     /// when mixing code compiled under various C++ standards.
Twine(const std::string_view & Str)296     /*implicit*/ Twine(const std::string_view &Str)
297         : LHSKind(PtrAndLengthKind) {
298       LHS.ptrAndLength.ptr = Str.data();
299       LHS.ptrAndLength.length = Str.length();
300       assert(isValid() && "Invalid twine!");
301     }
302 
303     /// Construct from a StringRef.
Twine(const StringRef & Str)304     /*implicit*/ Twine(const StringRef &Str) : LHSKind(PtrAndLengthKind) {
305       LHS.ptrAndLength.ptr = Str.data();
306       LHS.ptrAndLength.length = Str.size();
307       assert(isValid() && "Invalid twine!");
308     }
309 
310     /// Construct from a StringLiteral.
Twine(const StringLiteral & Str)311     /*implicit*/ Twine(const StringLiteral &Str)
312         : LHSKind(StringLiteralKind) {
313       LHS.ptrAndLength.ptr = Str.data();
314       LHS.ptrAndLength.length = Str.size();
315       assert(isValid() && "Invalid twine!");
316     }
317 
318     /// Construct from a SmallString.
Twine(const SmallVectorImpl<char> & Str)319     /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
320         : LHSKind(PtrAndLengthKind) {
321       LHS.ptrAndLength.ptr = Str.data();
322       LHS.ptrAndLength.length = Str.size();
323       assert(isValid() && "Invalid twine!");
324     }
325 
326     /// Construct from a formatv_object_base.
Twine(const formatv_object_base & Fmt)327     /*implicit*/ Twine(const formatv_object_base &Fmt)
328         : LHSKind(FormatvObjectKind) {
329       LHS.formatvObject = &Fmt;
330       assert(isValid() && "Invalid twine!");
331     }
332 
333     /// Construct from a char.
Twine(char Val)334     explicit Twine(char Val) : LHSKind(CharKind) {
335       LHS.character = Val;
336     }
337 
338     /// Construct from a signed char.
Twine(signed char Val)339     explicit Twine(signed char Val) : LHSKind(CharKind) {
340       LHS.character = static_cast<char>(Val);
341     }
342 
343     /// Construct from an unsigned char.
Twine(unsigned char Val)344     explicit Twine(unsigned char Val) : LHSKind(CharKind) {
345       LHS.character = static_cast<char>(Val);
346     }
347 
348     /// Construct a twine to print \p Val as an unsigned decimal integer.
Twine(unsigned Val)349     explicit Twine(unsigned Val) : LHSKind(DecUIKind) {
350       LHS.decUI = Val;
351     }
352 
353     /// Construct a twine to print \p Val as a signed decimal integer.
Twine(int Val)354     explicit Twine(int Val) : LHSKind(DecIKind) {
355       LHS.decI = Val;
356     }
357 
358     /// Construct a twine to print \p Val as an unsigned decimal integer.
Twine(const unsigned long & Val)359     explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) {
360       LHS.decUL = &Val;
361     }
362 
363     /// Construct a twine to print \p Val as a signed decimal integer.
Twine(const long & Val)364     explicit Twine(const long &Val) : LHSKind(DecLKind) {
365       LHS.decL = &Val;
366     }
367 
368     /// Construct a twine to print \p Val as an unsigned decimal integer.
Twine(const unsigned long long & Val)369     explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) {
370       LHS.decULL = &Val;
371     }
372 
373     /// Construct a twine to print \p Val as a signed decimal integer.
Twine(const long long & Val)374     explicit Twine(const long long &Val) : LHSKind(DecLLKind) {
375       LHS.decLL = &Val;
376     }
377 
378     // FIXME: Unfortunately, to make sure this is as efficient as possible we
379     // need extra binary constructors from particular types. We can't rely on
380     // the compiler to be smart enough to fold operator+()/concat() down to the
381     // right thing. Yet.
382 
383     /// Construct as the concatenation of a C string and a StringRef.
Twine(const char * LHS,const StringRef & RHS)384     /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
385         : LHSKind(CStringKind), RHSKind(PtrAndLengthKind) {
386       this->LHS.cString = LHS;
387       this->RHS.ptrAndLength.ptr = RHS.data();
388       this->RHS.ptrAndLength.length = RHS.size();
389       assert(isValid() && "Invalid twine!");
390     }
391 
392     /// Construct as the concatenation of a StringRef and a C string.
Twine(const StringRef & LHS,const char * RHS)393     /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
394         : LHSKind(PtrAndLengthKind), RHSKind(CStringKind) {
395       this->LHS.ptrAndLength.ptr = LHS.data();
396       this->LHS.ptrAndLength.length = LHS.size();
397       this->RHS.cString = RHS;
398       assert(isValid() && "Invalid twine!");
399     }
400 
401     /// Since the intended use of twines is as temporary objects, assignments
402     /// when concatenating might cause undefined behavior or stack corruptions
403     Twine &operator=(const Twine &) = delete;
404 
405     /// Create a 'null' string, which is an empty string that always
406     /// concatenates to form another empty string.
createNull()407     static Twine createNull() {
408       return Twine(NullKind);
409     }
410 
411     /// @}
412     /// @name Numeric Conversions
413     /// @{
414 
415     // Construct a twine to print \p Val as an unsigned hexadecimal integer.
utohexstr(const uint64_t & Val)416     static Twine utohexstr(const uint64_t &Val) {
417       Child LHS, RHS;
418       LHS.uHex = &Val;
419       RHS.twine = nullptr;
420       return Twine(LHS, UHexKind, RHS, EmptyKind);
421     }
422 
423     /// @}
424     /// @name Predicate Operations
425     /// @{
426 
427     /// Check if this twine is trivially empty; a false return value does not
428     /// necessarily mean the twine is empty.
isTriviallyEmpty()429     bool isTriviallyEmpty() const {
430       return isNullary();
431     }
432 
433     /// Check if this twine is guaranteed to refer to single string literal.
isSingleStringLiteral()434     bool isSingleStringLiteral() const {
435       return isUnary() && getLHSKind() == StringLiteralKind;
436     }
437 
438     /// Return true if this twine can be dynamically accessed as a single
439     /// StringRef value with getSingleStringRef().
isSingleStringRef()440     bool isSingleStringRef() const {
441       if (getRHSKind() != EmptyKind) return false;
442 
443       switch (getLHSKind()) {
444       case EmptyKind:
445       case CStringKind:
446       case StdStringKind:
447       case PtrAndLengthKind:
448       case StringLiteralKind:
449         return true;
450       default:
451         return false;
452       }
453     }
454 
455     /// @}
456     /// @name String Operations
457     /// @{
458 
459     Twine concat(const Twine &Suffix) const;
460 
461     /// @}
462     /// @name Output & Conversion.
463     /// @{
464 
465     /// Return the twine contents as a std::string.
466     std::string str() const;
467 
468     /// Append the concatenated string into the given SmallString or SmallVector.
469     void toVector(SmallVectorImpl<char> &Out) const;
470 
471     /// This returns the twine as a single StringRef.  This method is only valid
472     /// if isSingleStringRef() is true.
getSingleStringRef()473     StringRef getSingleStringRef() const {
474       assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
475       switch (getLHSKind()) {
476       default: llvm_unreachable("Out of sync with isSingleStringRef");
477       case EmptyKind:
478         return StringRef();
479       case CStringKind:
480         return StringRef(LHS.cString);
481       case StdStringKind:
482         return StringRef(*LHS.stdString);
483       case PtrAndLengthKind:
484       case StringLiteralKind:
485         return StringRef(LHS.ptrAndLength.ptr, LHS.ptrAndLength.length);
486       }
487     }
488 
489     /// This returns the twine as a single StringRef if it can be
490     /// represented as such. Otherwise the twine is written into the given
491     /// SmallVector and a StringRef to the SmallVector's data is returned.
toStringRef(SmallVectorImpl<char> & Out)492     StringRef toStringRef(SmallVectorImpl<char> &Out) const {
493       if (isSingleStringRef())
494         return getSingleStringRef();
495       toVector(Out);
496       return StringRef(Out.data(), Out.size());
497     }
498 
499     /// This returns the twine as a single null terminated StringRef if it
500     /// can be represented as such. Otherwise the twine is written into the
501     /// given SmallVector and a StringRef to the SmallVector's data is returned.
502     ///
503     /// The returned StringRef's size does not include the null terminator.
504     StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
505 
506     /// Write the concatenated string represented by this twine to the
507     /// stream \p OS.
508     void print(raw_ostream &OS) const;
509 
510     /// Dump the concatenated string represented by this twine to stderr.
511     void dump() const;
512 
513     /// Write the representation of this twine to the stream \p OS.
514     void printRepr(raw_ostream &OS) const;
515 
516     /// Dump the representation of this twine to stderr.
517     void dumpRepr() const;
518 
519     /// @}
520   };
521 
522   /// @name Twine Inline Implementations
523   /// @{
524 
concat(const Twine & Suffix)525   inline Twine Twine::concat(const Twine &Suffix) const {
526     // Concatenation with null is null.
527     if (isNull() || Suffix.isNull())
528       return Twine(NullKind);
529 
530     // Concatenation with empty yields the other side.
531     if (isEmpty())
532       return Suffix;
533     if (Suffix.isEmpty())
534       return *this;
535 
536     // Otherwise we need to create a new node, taking care to fold in unary
537     // twines.
538     Child NewLHS, NewRHS;
539     NewLHS.twine = this;
540     NewRHS.twine = &Suffix;
541     NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
542     if (isUnary()) {
543       NewLHS = LHS;
544       NewLHSKind = getLHSKind();
545     }
546     if (Suffix.isUnary()) {
547       NewRHS = Suffix.LHS;
548       NewRHSKind = Suffix.getLHSKind();
549     }
550 
551     return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
552   }
553 
554   inline Twine operator+(const Twine &LHS, const Twine &RHS) {
555     return LHS.concat(RHS);
556   }
557 
558   /// Additional overload to guarantee simplified codegen; this is equivalent to
559   /// concat().
560 
561   inline Twine operator+(const char *LHS, const StringRef &RHS) {
562     return Twine(LHS, RHS);
563   }
564 
565   /// Additional overload to guarantee simplified codegen; this is equivalent to
566   /// concat().
567 
568   inline Twine operator+(const StringRef &LHS, const char *RHS) {
569     return Twine(LHS, RHS);
570   }
571 
572   inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
573     RHS.print(OS);
574     return OS;
575   }
576 
577   /// @}
578 
579 } // end namespace llvm
580 
581 #endif // LLVM_ADT_TWINE_H
582