1f4a2713aSLionel Sambuc //===-- StringRef.cpp - Lightweight String References ---------------------===//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc //                     The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc 
10f4a2713aSLionel Sambuc #include "llvm/ADT/StringRef.h"
11f4a2713aSLionel Sambuc #include "llvm/ADT/APInt.h"
12f4a2713aSLionel Sambuc #include "llvm/ADT/Hashing.h"
13f4a2713aSLionel Sambuc #include "llvm/ADT/edit_distance.h"
14f4a2713aSLionel Sambuc #include <bitset>
15f4a2713aSLionel Sambuc 
16f4a2713aSLionel Sambuc using namespace llvm;
17f4a2713aSLionel Sambuc 
18f4a2713aSLionel Sambuc // MSVC emits references to this into the translation units which reference it.
19f4a2713aSLionel Sambuc #ifndef _MSC_VER
20f4a2713aSLionel Sambuc const size_t StringRef::npos;
21f4a2713aSLionel Sambuc #endif
22f4a2713aSLionel Sambuc 
ascii_tolower(char x)23f4a2713aSLionel Sambuc static char ascii_tolower(char x) {
24f4a2713aSLionel Sambuc   if (x >= 'A' && x <= 'Z')
25f4a2713aSLionel Sambuc     return x - 'A' + 'a';
26f4a2713aSLionel Sambuc   return x;
27f4a2713aSLionel Sambuc }
28f4a2713aSLionel Sambuc 
ascii_toupper(char x)29f4a2713aSLionel Sambuc static char ascii_toupper(char x) {
30f4a2713aSLionel Sambuc   if (x >= 'a' && x <= 'z')
31f4a2713aSLionel Sambuc     return x - 'a' + 'A';
32f4a2713aSLionel Sambuc   return x;
33f4a2713aSLionel Sambuc }
34f4a2713aSLionel Sambuc 
ascii_isdigit(char x)35f4a2713aSLionel Sambuc static bool ascii_isdigit(char x) {
36f4a2713aSLionel Sambuc   return x >= '0' && x <= '9';
37f4a2713aSLionel Sambuc }
38f4a2713aSLionel Sambuc 
39f4a2713aSLionel Sambuc // strncasecmp() is not available on non-POSIX systems, so define an
40f4a2713aSLionel Sambuc // alternative function here.
ascii_strncasecmp(const char * LHS,const char * RHS,size_t Length)41f4a2713aSLionel Sambuc static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
42f4a2713aSLionel Sambuc   for (size_t I = 0; I < Length; ++I) {
43f4a2713aSLionel Sambuc     unsigned char LHC = ascii_tolower(LHS[I]);
44f4a2713aSLionel Sambuc     unsigned char RHC = ascii_tolower(RHS[I]);
45f4a2713aSLionel Sambuc     if (LHC != RHC)
46f4a2713aSLionel Sambuc       return LHC < RHC ? -1 : 1;
47f4a2713aSLionel Sambuc   }
48f4a2713aSLionel Sambuc   return 0;
49f4a2713aSLionel Sambuc }
50f4a2713aSLionel Sambuc 
51f4a2713aSLionel Sambuc /// compare_lower - Compare strings, ignoring case.
compare_lower(StringRef RHS) const52f4a2713aSLionel Sambuc int StringRef::compare_lower(StringRef RHS) const {
53*0a6a1f1dSLionel Sambuc   if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
54f4a2713aSLionel Sambuc     return Res;
55f4a2713aSLionel Sambuc   if (Length == RHS.Length)
56f4a2713aSLionel Sambuc     return 0;
57f4a2713aSLionel Sambuc   return Length < RHS.Length ? -1 : 1;
58f4a2713aSLionel Sambuc }
59f4a2713aSLionel Sambuc 
60f4a2713aSLionel Sambuc /// Check if this string starts with the given \p Prefix, ignoring case.
startswith_lower(StringRef Prefix) const61f4a2713aSLionel Sambuc bool StringRef::startswith_lower(StringRef Prefix) const {
62f4a2713aSLionel Sambuc   return Length >= Prefix.Length &&
63f4a2713aSLionel Sambuc       ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
64f4a2713aSLionel Sambuc }
65f4a2713aSLionel Sambuc 
66f4a2713aSLionel Sambuc /// Check if this string ends with the given \p Suffix, ignoring case.
endswith_lower(StringRef Suffix) const67f4a2713aSLionel Sambuc bool StringRef::endswith_lower(StringRef Suffix) const {
68f4a2713aSLionel Sambuc   return Length >= Suffix.Length &&
69f4a2713aSLionel Sambuc       ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
70f4a2713aSLionel Sambuc }
71f4a2713aSLionel Sambuc 
72f4a2713aSLionel Sambuc /// compare_numeric - Compare strings, handle embedded numbers.
compare_numeric(StringRef RHS) const73f4a2713aSLionel Sambuc int StringRef::compare_numeric(StringRef RHS) const {
74*0a6a1f1dSLionel Sambuc   for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
75f4a2713aSLionel Sambuc     // Check for sequences of digits.
76f4a2713aSLionel Sambuc     if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
77f4a2713aSLionel Sambuc       // The longer sequence of numbers is considered larger.
78f4a2713aSLionel Sambuc       // This doesn't really handle prefixed zeros well.
79f4a2713aSLionel Sambuc       size_t J;
80f4a2713aSLionel Sambuc       for (J = I + 1; J != E + 1; ++J) {
81f4a2713aSLionel Sambuc         bool ld = J < Length && ascii_isdigit(Data[J]);
82f4a2713aSLionel Sambuc         bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
83f4a2713aSLionel Sambuc         if (ld != rd)
84f4a2713aSLionel Sambuc           return rd ? -1 : 1;
85f4a2713aSLionel Sambuc         if (!rd)
86f4a2713aSLionel Sambuc           break;
87f4a2713aSLionel Sambuc       }
88f4a2713aSLionel Sambuc       // The two number sequences have the same length (J-I), just memcmp them.
89f4a2713aSLionel Sambuc       if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
90f4a2713aSLionel Sambuc         return Res < 0 ? -1 : 1;
91f4a2713aSLionel Sambuc       // Identical number sequences, continue search after the numbers.
92f4a2713aSLionel Sambuc       I = J - 1;
93f4a2713aSLionel Sambuc       continue;
94f4a2713aSLionel Sambuc     }
95f4a2713aSLionel Sambuc     if (Data[I] != RHS.Data[I])
96f4a2713aSLionel Sambuc       return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
97f4a2713aSLionel Sambuc   }
98f4a2713aSLionel Sambuc   if (Length == RHS.Length)
99f4a2713aSLionel Sambuc     return 0;
100f4a2713aSLionel Sambuc   return Length < RHS.Length ? -1 : 1;
101f4a2713aSLionel Sambuc }
102f4a2713aSLionel Sambuc 
103f4a2713aSLionel Sambuc // Compute the edit distance between the two given strings.
edit_distance(llvm::StringRef Other,bool AllowReplacements,unsigned MaxEditDistance) const104f4a2713aSLionel Sambuc unsigned StringRef::edit_distance(llvm::StringRef Other,
105f4a2713aSLionel Sambuc                                   bool AllowReplacements,
106f4a2713aSLionel Sambuc                                   unsigned MaxEditDistance) const {
107f4a2713aSLionel Sambuc   return llvm::ComputeEditDistance(
108*0a6a1f1dSLionel Sambuc       makeArrayRef(data(), size()),
109*0a6a1f1dSLionel Sambuc       makeArrayRef(Other.data(), Other.size()),
110f4a2713aSLionel Sambuc       AllowReplacements, MaxEditDistance);
111f4a2713aSLionel Sambuc }
112f4a2713aSLionel Sambuc 
113f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
114f4a2713aSLionel Sambuc // String Operations
115f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
116f4a2713aSLionel Sambuc 
lower() const117f4a2713aSLionel Sambuc std::string StringRef::lower() const {
118f4a2713aSLionel Sambuc   std::string Result(size(), char());
119f4a2713aSLionel Sambuc   for (size_type i = 0, e = size(); i != e; ++i) {
120f4a2713aSLionel Sambuc     Result[i] = ascii_tolower(Data[i]);
121f4a2713aSLionel Sambuc   }
122f4a2713aSLionel Sambuc   return Result;
123f4a2713aSLionel Sambuc }
124f4a2713aSLionel Sambuc 
upper() const125f4a2713aSLionel Sambuc std::string StringRef::upper() const {
126f4a2713aSLionel Sambuc   std::string Result(size(), char());
127f4a2713aSLionel Sambuc   for (size_type i = 0, e = size(); i != e; ++i) {
128f4a2713aSLionel Sambuc     Result[i] = ascii_toupper(Data[i]);
129f4a2713aSLionel Sambuc   }
130f4a2713aSLionel Sambuc   return Result;
131f4a2713aSLionel Sambuc }
132f4a2713aSLionel Sambuc 
133f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
134f4a2713aSLionel Sambuc // String Searching
135f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
136f4a2713aSLionel Sambuc 
137f4a2713aSLionel Sambuc 
138f4a2713aSLionel Sambuc /// find - Search for the first string \arg Str in the string.
139f4a2713aSLionel Sambuc ///
140f4a2713aSLionel Sambuc /// \return - The index of the first occurrence of \arg Str, or npos if not
141f4a2713aSLionel Sambuc /// found.
find(StringRef Str,size_t From) const142f4a2713aSLionel Sambuc size_t StringRef::find(StringRef Str, size_t From) const {
143f4a2713aSLionel Sambuc   size_t N = Str.size();
144f4a2713aSLionel Sambuc   if (N > Length)
145f4a2713aSLionel Sambuc     return npos;
146f4a2713aSLionel Sambuc 
147f4a2713aSLionel Sambuc   // For short haystacks or unsupported needles fall back to the naive algorithm
148f4a2713aSLionel Sambuc   if (Length < 16 || N > 255 || N == 0) {
149*0a6a1f1dSLionel Sambuc     for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
150f4a2713aSLionel Sambuc       if (substr(i, N).equals(Str))
151f4a2713aSLionel Sambuc         return i;
152f4a2713aSLionel Sambuc     return npos;
153f4a2713aSLionel Sambuc   }
154f4a2713aSLionel Sambuc 
155f4a2713aSLionel Sambuc   if (From >= Length)
156f4a2713aSLionel Sambuc     return npos;
157f4a2713aSLionel Sambuc 
158f4a2713aSLionel Sambuc   // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
159f4a2713aSLionel Sambuc   uint8_t BadCharSkip[256];
160f4a2713aSLionel Sambuc   std::memset(BadCharSkip, N, 256);
161f4a2713aSLionel Sambuc   for (unsigned i = 0; i != N-1; ++i)
162f4a2713aSLionel Sambuc     BadCharSkip[(uint8_t)Str[i]] = N-1-i;
163f4a2713aSLionel Sambuc 
164f4a2713aSLionel Sambuc   unsigned Len = Length-From, Pos = From;
165f4a2713aSLionel Sambuc   while (Len >= N) {
166f4a2713aSLionel Sambuc     if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
167f4a2713aSLionel Sambuc       return Pos;
168f4a2713aSLionel Sambuc 
169f4a2713aSLionel Sambuc     // Otherwise skip the appropriate number of bytes.
170f4a2713aSLionel Sambuc     uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
171f4a2713aSLionel Sambuc     Len -= Skip;
172f4a2713aSLionel Sambuc     Pos += Skip;
173f4a2713aSLionel Sambuc   }
174f4a2713aSLionel Sambuc 
175f4a2713aSLionel Sambuc   return npos;
176f4a2713aSLionel Sambuc }
177f4a2713aSLionel Sambuc 
178f4a2713aSLionel Sambuc /// rfind - Search for the last string \arg Str in the string.
179f4a2713aSLionel Sambuc ///
180f4a2713aSLionel Sambuc /// \return - The index of the last occurrence of \arg Str, or npos if not
181f4a2713aSLionel Sambuc /// found.
rfind(StringRef Str) const182f4a2713aSLionel Sambuc size_t StringRef::rfind(StringRef Str) const {
183f4a2713aSLionel Sambuc   size_t N = Str.size();
184f4a2713aSLionel Sambuc   if (N > Length)
185f4a2713aSLionel Sambuc     return npos;
186f4a2713aSLionel Sambuc   for (size_t i = Length - N + 1, e = 0; i != e;) {
187f4a2713aSLionel Sambuc     --i;
188f4a2713aSLionel Sambuc     if (substr(i, N).equals(Str))
189f4a2713aSLionel Sambuc       return i;
190f4a2713aSLionel Sambuc   }
191f4a2713aSLionel Sambuc   return npos;
192f4a2713aSLionel Sambuc }
193f4a2713aSLionel Sambuc 
194f4a2713aSLionel Sambuc /// find_first_of - Find the first character in the string that is in \arg
195f4a2713aSLionel Sambuc /// Chars, or npos if not found.
196f4a2713aSLionel Sambuc ///
197f4a2713aSLionel Sambuc /// Note: O(size() + Chars.size())
find_first_of(StringRef Chars,size_t From) const198f4a2713aSLionel Sambuc StringRef::size_type StringRef::find_first_of(StringRef Chars,
199f4a2713aSLionel Sambuc                                               size_t From) const {
200f4a2713aSLionel Sambuc   std::bitset<1 << CHAR_BIT> CharBits;
201f4a2713aSLionel Sambuc   for (size_type i = 0; i != Chars.size(); ++i)
202f4a2713aSLionel Sambuc     CharBits.set((unsigned char)Chars[i]);
203f4a2713aSLionel Sambuc 
204*0a6a1f1dSLionel Sambuc   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
205f4a2713aSLionel Sambuc     if (CharBits.test((unsigned char)Data[i]))
206f4a2713aSLionel Sambuc       return i;
207f4a2713aSLionel Sambuc   return npos;
208f4a2713aSLionel Sambuc }
209f4a2713aSLionel Sambuc 
210f4a2713aSLionel Sambuc /// find_first_not_of - Find the first character in the string that is not
211f4a2713aSLionel Sambuc /// \arg C or npos if not found.
find_first_not_of(char C,size_t From) const212f4a2713aSLionel Sambuc StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
213*0a6a1f1dSLionel Sambuc   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
214f4a2713aSLionel Sambuc     if (Data[i] != C)
215f4a2713aSLionel Sambuc       return i;
216f4a2713aSLionel Sambuc   return npos;
217f4a2713aSLionel Sambuc }
218f4a2713aSLionel Sambuc 
219f4a2713aSLionel Sambuc /// find_first_not_of - Find the first character in the string that is not
220f4a2713aSLionel Sambuc /// in the string \arg Chars, or npos if not found.
221f4a2713aSLionel Sambuc ///
222f4a2713aSLionel Sambuc /// Note: O(size() + Chars.size())
find_first_not_of(StringRef Chars,size_t From) const223f4a2713aSLionel Sambuc StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
224f4a2713aSLionel Sambuc                                                   size_t From) const {
225f4a2713aSLionel Sambuc   std::bitset<1 << CHAR_BIT> CharBits;
226f4a2713aSLionel Sambuc   for (size_type i = 0; i != Chars.size(); ++i)
227f4a2713aSLionel Sambuc     CharBits.set((unsigned char)Chars[i]);
228f4a2713aSLionel Sambuc 
229*0a6a1f1dSLionel Sambuc   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
230f4a2713aSLionel Sambuc     if (!CharBits.test((unsigned char)Data[i]))
231f4a2713aSLionel Sambuc       return i;
232f4a2713aSLionel Sambuc   return npos;
233f4a2713aSLionel Sambuc }
234f4a2713aSLionel Sambuc 
235f4a2713aSLionel Sambuc /// find_last_of - Find the last character in the string that is in \arg C,
236f4a2713aSLionel Sambuc /// or npos if not found.
237f4a2713aSLionel Sambuc ///
238f4a2713aSLionel Sambuc /// Note: O(size() + Chars.size())
find_last_of(StringRef Chars,size_t From) const239f4a2713aSLionel Sambuc StringRef::size_type StringRef::find_last_of(StringRef Chars,
240f4a2713aSLionel Sambuc                                              size_t From) const {
241f4a2713aSLionel Sambuc   std::bitset<1 << CHAR_BIT> CharBits;
242f4a2713aSLionel Sambuc   for (size_type i = 0; i != Chars.size(); ++i)
243f4a2713aSLionel Sambuc     CharBits.set((unsigned char)Chars[i]);
244f4a2713aSLionel Sambuc 
245*0a6a1f1dSLionel Sambuc   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
246f4a2713aSLionel Sambuc     if (CharBits.test((unsigned char)Data[i]))
247f4a2713aSLionel Sambuc       return i;
248f4a2713aSLionel Sambuc   return npos;
249f4a2713aSLionel Sambuc }
250f4a2713aSLionel Sambuc 
251f4a2713aSLionel Sambuc /// find_last_not_of - Find the last character in the string that is not
252f4a2713aSLionel Sambuc /// \arg C, or npos if not found.
find_last_not_of(char C,size_t From) const253f4a2713aSLionel Sambuc StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
254*0a6a1f1dSLionel Sambuc   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
255f4a2713aSLionel Sambuc     if (Data[i] != C)
256f4a2713aSLionel Sambuc       return i;
257f4a2713aSLionel Sambuc   return npos;
258f4a2713aSLionel Sambuc }
259f4a2713aSLionel Sambuc 
260f4a2713aSLionel Sambuc /// find_last_not_of - Find the last character in the string that is not in
261f4a2713aSLionel Sambuc /// \arg Chars, or npos if not found.
262f4a2713aSLionel Sambuc ///
263f4a2713aSLionel Sambuc /// Note: O(size() + Chars.size())
find_last_not_of(StringRef Chars,size_t From) const264f4a2713aSLionel Sambuc StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
265f4a2713aSLionel Sambuc                                                  size_t From) const {
266f4a2713aSLionel Sambuc   std::bitset<1 << CHAR_BIT> CharBits;
267f4a2713aSLionel Sambuc   for (size_type i = 0, e = Chars.size(); i != e; ++i)
268f4a2713aSLionel Sambuc     CharBits.set((unsigned char)Chars[i]);
269f4a2713aSLionel Sambuc 
270*0a6a1f1dSLionel Sambuc   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
271f4a2713aSLionel Sambuc     if (!CharBits.test((unsigned char)Data[i]))
272f4a2713aSLionel Sambuc       return i;
273f4a2713aSLionel Sambuc   return npos;
274f4a2713aSLionel Sambuc }
275f4a2713aSLionel Sambuc 
split(SmallVectorImpl<StringRef> & A,StringRef Separators,int MaxSplit,bool KeepEmpty) const276f4a2713aSLionel Sambuc void StringRef::split(SmallVectorImpl<StringRef> &A,
277f4a2713aSLionel Sambuc                       StringRef Separators, int MaxSplit,
278f4a2713aSLionel Sambuc                       bool KeepEmpty) const {
279f4a2713aSLionel Sambuc   StringRef rest = *this;
280f4a2713aSLionel Sambuc 
281f4a2713aSLionel Sambuc   // rest.data() is used to distinguish cases like "a," that splits into
282f4a2713aSLionel Sambuc   // "a" + "" and "a" that splits into "a" + 0.
283f4a2713aSLionel Sambuc   for (int splits = 0;
284*0a6a1f1dSLionel Sambuc        rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
285f4a2713aSLionel Sambuc        ++splits) {
286f4a2713aSLionel Sambuc     std::pair<StringRef, StringRef> p = rest.split(Separators);
287f4a2713aSLionel Sambuc 
288f4a2713aSLionel Sambuc     if (KeepEmpty || p.first.size() != 0)
289f4a2713aSLionel Sambuc       A.push_back(p.first);
290f4a2713aSLionel Sambuc     rest = p.second;
291f4a2713aSLionel Sambuc   }
292f4a2713aSLionel Sambuc   // If we have a tail left, add it.
293*0a6a1f1dSLionel Sambuc   if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
294f4a2713aSLionel Sambuc     A.push_back(rest);
295f4a2713aSLionel Sambuc }
296f4a2713aSLionel Sambuc 
297f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
298f4a2713aSLionel Sambuc // Helpful Algorithms
299f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
300f4a2713aSLionel Sambuc 
301f4a2713aSLionel Sambuc /// count - Return the number of non-overlapped occurrences of \arg Str in
302f4a2713aSLionel Sambuc /// the string.
count(StringRef Str) const303f4a2713aSLionel Sambuc size_t StringRef::count(StringRef Str) const {
304f4a2713aSLionel Sambuc   size_t Count = 0;
305f4a2713aSLionel Sambuc   size_t N = Str.size();
306f4a2713aSLionel Sambuc   if (N > Length)
307f4a2713aSLionel Sambuc     return 0;
308f4a2713aSLionel Sambuc   for (size_t i = 0, e = Length - N + 1; i != e; ++i)
309f4a2713aSLionel Sambuc     if (substr(i, N).equals(Str))
310f4a2713aSLionel Sambuc       ++Count;
311f4a2713aSLionel Sambuc   return Count;
312f4a2713aSLionel Sambuc }
313f4a2713aSLionel Sambuc 
GetAutoSenseRadix(StringRef & Str)314f4a2713aSLionel Sambuc static unsigned GetAutoSenseRadix(StringRef &Str) {
315f4a2713aSLionel Sambuc   if (Str.startswith("0x")) {
316f4a2713aSLionel Sambuc     Str = Str.substr(2);
317f4a2713aSLionel Sambuc     return 16;
318f4a2713aSLionel Sambuc   }
319f4a2713aSLionel Sambuc 
320f4a2713aSLionel Sambuc   if (Str.startswith("0b")) {
321f4a2713aSLionel Sambuc     Str = Str.substr(2);
322f4a2713aSLionel Sambuc     return 2;
323f4a2713aSLionel Sambuc   }
324f4a2713aSLionel Sambuc 
325f4a2713aSLionel Sambuc   if (Str.startswith("0o")) {
326f4a2713aSLionel Sambuc     Str = Str.substr(2);
327f4a2713aSLionel Sambuc     return 8;
328f4a2713aSLionel Sambuc   }
329f4a2713aSLionel Sambuc 
330f4a2713aSLionel Sambuc   if (Str.startswith("0"))
331f4a2713aSLionel Sambuc     return 8;
332f4a2713aSLionel Sambuc 
333f4a2713aSLionel Sambuc   return 10;
334f4a2713aSLionel Sambuc }
335f4a2713aSLionel Sambuc 
336f4a2713aSLionel Sambuc 
337f4a2713aSLionel Sambuc /// GetAsUnsignedInteger - Workhorse method that converts a integer character
338f4a2713aSLionel Sambuc /// sequence of radix up to 36 to an unsigned long long value.
getAsUnsignedInteger(StringRef Str,unsigned Radix,unsigned long long & Result)339f4a2713aSLionel Sambuc bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
340f4a2713aSLionel Sambuc                                 unsigned long long &Result) {
341f4a2713aSLionel Sambuc   // Autosense radix if not specified.
342f4a2713aSLionel Sambuc   if (Radix == 0)
343f4a2713aSLionel Sambuc     Radix = GetAutoSenseRadix(Str);
344f4a2713aSLionel Sambuc 
345f4a2713aSLionel Sambuc   // Empty strings (after the radix autosense) are invalid.
346f4a2713aSLionel Sambuc   if (Str.empty()) return true;
347f4a2713aSLionel Sambuc 
348f4a2713aSLionel Sambuc   // Parse all the bytes of the string given this radix.  Watch for overflow.
349f4a2713aSLionel Sambuc   Result = 0;
350f4a2713aSLionel Sambuc   while (!Str.empty()) {
351f4a2713aSLionel Sambuc     unsigned CharVal;
352f4a2713aSLionel Sambuc     if (Str[0] >= '0' && Str[0] <= '9')
353f4a2713aSLionel Sambuc       CharVal = Str[0]-'0';
354f4a2713aSLionel Sambuc     else if (Str[0] >= 'a' && Str[0] <= 'z')
355f4a2713aSLionel Sambuc       CharVal = Str[0]-'a'+10;
356f4a2713aSLionel Sambuc     else if (Str[0] >= 'A' && Str[0] <= 'Z')
357f4a2713aSLionel Sambuc       CharVal = Str[0]-'A'+10;
358f4a2713aSLionel Sambuc     else
359f4a2713aSLionel Sambuc       return true;
360f4a2713aSLionel Sambuc 
361f4a2713aSLionel Sambuc     // If the parsed value is larger than the integer radix, the string is
362f4a2713aSLionel Sambuc     // invalid.
363f4a2713aSLionel Sambuc     if (CharVal >= Radix)
364f4a2713aSLionel Sambuc       return true;
365f4a2713aSLionel Sambuc 
366f4a2713aSLionel Sambuc     // Add in this character.
367f4a2713aSLionel Sambuc     unsigned long long PrevResult = Result;
368f4a2713aSLionel Sambuc     Result = Result*Radix+CharVal;
369f4a2713aSLionel Sambuc 
370f4a2713aSLionel Sambuc     // Check for overflow by shifting back and seeing if bits were lost.
371f4a2713aSLionel Sambuc     if (Result/Radix < PrevResult)
372f4a2713aSLionel Sambuc       return true;
373f4a2713aSLionel Sambuc 
374f4a2713aSLionel Sambuc     Str = Str.substr(1);
375f4a2713aSLionel Sambuc   }
376f4a2713aSLionel Sambuc 
377f4a2713aSLionel Sambuc   return false;
378f4a2713aSLionel Sambuc }
379f4a2713aSLionel Sambuc 
getAsSignedInteger(StringRef Str,unsigned Radix,long long & Result)380f4a2713aSLionel Sambuc bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
381f4a2713aSLionel Sambuc                               long long &Result) {
382f4a2713aSLionel Sambuc   unsigned long long ULLVal;
383f4a2713aSLionel Sambuc 
384f4a2713aSLionel Sambuc   // Handle positive strings first.
385f4a2713aSLionel Sambuc   if (Str.empty() || Str.front() != '-') {
386f4a2713aSLionel Sambuc     if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
387f4a2713aSLionel Sambuc         // Check for value so large it overflows a signed value.
388f4a2713aSLionel Sambuc         (long long)ULLVal < 0)
389f4a2713aSLionel Sambuc       return true;
390f4a2713aSLionel Sambuc     Result = ULLVal;
391f4a2713aSLionel Sambuc     return false;
392f4a2713aSLionel Sambuc   }
393f4a2713aSLionel Sambuc 
394f4a2713aSLionel Sambuc   // Get the positive part of the value.
395f4a2713aSLionel Sambuc   if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
396f4a2713aSLionel Sambuc       // Reject values so large they'd overflow as negative signed, but allow
397f4a2713aSLionel Sambuc       // "-0".  This negates the unsigned so that the negative isn't undefined
398f4a2713aSLionel Sambuc       // on signed overflow.
399f4a2713aSLionel Sambuc       (long long)-ULLVal > 0)
400f4a2713aSLionel Sambuc     return true;
401f4a2713aSLionel Sambuc 
402f4a2713aSLionel Sambuc   Result = -ULLVal;
403f4a2713aSLionel Sambuc   return false;
404f4a2713aSLionel Sambuc }
405f4a2713aSLionel Sambuc 
getAsInteger(unsigned Radix,APInt & Result) const406f4a2713aSLionel Sambuc bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
407f4a2713aSLionel Sambuc   StringRef Str = *this;
408f4a2713aSLionel Sambuc 
409f4a2713aSLionel Sambuc   // Autosense radix if not specified.
410f4a2713aSLionel Sambuc   if (Radix == 0)
411f4a2713aSLionel Sambuc     Radix = GetAutoSenseRadix(Str);
412f4a2713aSLionel Sambuc 
413f4a2713aSLionel Sambuc   assert(Radix > 1 && Radix <= 36);
414f4a2713aSLionel Sambuc 
415f4a2713aSLionel Sambuc   // Empty strings (after the radix autosense) are invalid.
416f4a2713aSLionel Sambuc   if (Str.empty()) return true;
417f4a2713aSLionel Sambuc 
418f4a2713aSLionel Sambuc   // Skip leading zeroes.  This can be a significant improvement if
419f4a2713aSLionel Sambuc   // it means we don't need > 64 bits.
420f4a2713aSLionel Sambuc   while (!Str.empty() && Str.front() == '0')
421f4a2713aSLionel Sambuc     Str = Str.substr(1);
422f4a2713aSLionel Sambuc 
423f4a2713aSLionel Sambuc   // If it was nothing but zeroes....
424f4a2713aSLionel Sambuc   if (Str.empty()) {
425f4a2713aSLionel Sambuc     Result = APInt(64, 0);
426f4a2713aSLionel Sambuc     return false;
427f4a2713aSLionel Sambuc   }
428f4a2713aSLionel Sambuc 
429f4a2713aSLionel Sambuc   // (Over-)estimate the required number of bits.
430f4a2713aSLionel Sambuc   unsigned Log2Radix = 0;
431f4a2713aSLionel Sambuc   while ((1U << Log2Radix) < Radix) Log2Radix++;
432f4a2713aSLionel Sambuc   bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
433f4a2713aSLionel Sambuc 
434f4a2713aSLionel Sambuc   unsigned BitWidth = Log2Radix * Str.size();
435f4a2713aSLionel Sambuc   if (BitWidth < Result.getBitWidth())
436f4a2713aSLionel Sambuc     BitWidth = Result.getBitWidth(); // don't shrink the result
437f4a2713aSLionel Sambuc   else if (BitWidth > Result.getBitWidth())
438f4a2713aSLionel Sambuc     Result = Result.zext(BitWidth);
439f4a2713aSLionel Sambuc 
440f4a2713aSLionel Sambuc   APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
441f4a2713aSLionel Sambuc   if (!IsPowerOf2Radix) {
442f4a2713aSLionel Sambuc     // These must have the same bit-width as Result.
443f4a2713aSLionel Sambuc     RadixAP = APInt(BitWidth, Radix);
444f4a2713aSLionel Sambuc     CharAP = APInt(BitWidth, 0);
445f4a2713aSLionel Sambuc   }
446f4a2713aSLionel Sambuc 
447f4a2713aSLionel Sambuc   // Parse all the bytes of the string given this radix.
448f4a2713aSLionel Sambuc   Result = 0;
449f4a2713aSLionel Sambuc   while (!Str.empty()) {
450f4a2713aSLionel Sambuc     unsigned CharVal;
451f4a2713aSLionel Sambuc     if (Str[0] >= '0' && Str[0] <= '9')
452f4a2713aSLionel Sambuc       CharVal = Str[0]-'0';
453f4a2713aSLionel Sambuc     else if (Str[0] >= 'a' && Str[0] <= 'z')
454f4a2713aSLionel Sambuc       CharVal = Str[0]-'a'+10;
455f4a2713aSLionel Sambuc     else if (Str[0] >= 'A' && Str[0] <= 'Z')
456f4a2713aSLionel Sambuc       CharVal = Str[0]-'A'+10;
457f4a2713aSLionel Sambuc     else
458f4a2713aSLionel Sambuc       return true;
459f4a2713aSLionel Sambuc 
460f4a2713aSLionel Sambuc     // If the parsed value is larger than the integer radix, the string is
461f4a2713aSLionel Sambuc     // invalid.
462f4a2713aSLionel Sambuc     if (CharVal >= Radix)
463f4a2713aSLionel Sambuc       return true;
464f4a2713aSLionel Sambuc 
465f4a2713aSLionel Sambuc     // Add in this character.
466f4a2713aSLionel Sambuc     if (IsPowerOf2Radix) {
467f4a2713aSLionel Sambuc       Result <<= Log2Radix;
468f4a2713aSLionel Sambuc       Result |= CharVal;
469f4a2713aSLionel Sambuc     } else {
470f4a2713aSLionel Sambuc       Result *= RadixAP;
471f4a2713aSLionel Sambuc       CharAP = CharVal;
472f4a2713aSLionel Sambuc       Result += CharAP;
473f4a2713aSLionel Sambuc     }
474f4a2713aSLionel Sambuc 
475f4a2713aSLionel Sambuc     Str = Str.substr(1);
476f4a2713aSLionel Sambuc   }
477f4a2713aSLionel Sambuc 
478f4a2713aSLionel Sambuc   return false;
479f4a2713aSLionel Sambuc }
480f4a2713aSLionel Sambuc 
481f4a2713aSLionel Sambuc 
482f4a2713aSLionel Sambuc // Implementation of StringRef hashing.
hash_value(StringRef S)483f4a2713aSLionel Sambuc hash_code llvm::hash_value(StringRef S) {
484f4a2713aSLionel Sambuc   return hash_combine_range(S.begin(), S.end());
485f4a2713aSLionel Sambuc }
486