1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 //    names, trademarks, service marks, or product names of the Licensor
11 //    and its affiliates, except as required to comply with Section 4(c) of
12 //    the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 //     http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 
25 #include "pxr/pxr.h"
26 
27 #include "pxr/base/tf/stringUtils.h"
28 #include "pxr/base/tf/diagnostic.h"
29 #include "pxr/base/tf/pathUtils.h"
30 #include "pxr/base/tf/tf.h"
31 #include "pxr/base/tf/token.h"
32 #include "pxr/base/tf/staticData.h"
33 #include "pxr/base/arch/inttypes.h"
34 #include "pxr/base/arch/vsnprintf.h"
35 
36 #include <boost/type_traits/is_signed.hpp>
37 #include <boost/utility/enable_if.hpp>
38 
39 #include <climits>
40 #include <cstdarg>
41 #include <ctype.h>
42 #include <limits>
43 #include <utility>
44 #include <vector>
45 #include <memory>
46 
47 #include "pxrDoubleConversion/double-conversion.h"
48 #include "pxrDoubleConversion/utils.h"
49 
50 #if defined(ARCH_OS_WINDOWS)
51 #include <Shlwapi.h>
52 #endif
53 
54 using std::list;
55 using std::make_pair;
56 using std::pair;
57 using std::set;
58 using std::string;
59 using std::vector;
60 
61 PXR_NAMESPACE_OPEN_SCOPE
62 
63 string
TfVStringPrintf(const std::string & fmt,va_list ap)64 TfVStringPrintf(const std::string& fmt, va_list ap)
65 {
66     return ArchVStringPrintf(fmt.c_str(), ap);
67 }
68 
69 string
TfVStringPrintf(const char * fmt,va_list ap)70 TfVStringPrintf(const char *fmt, va_list ap)
71 {
72     return ArchVStringPrintf(fmt, ap);
73 }
74 
75 string
TfStringPrintf(const char * fmt,...)76 TfStringPrintf(const char *fmt, ...)
77 {
78     va_list ap;
79     va_start(ap, fmt);
80     string s = ArchVStringPrintf(fmt, ap);
81     va_end(ap);
82     return s;
83 }
84 
85 double
TfStringToDouble(const char * ptr)86 TfStringToDouble(const char *ptr)
87 {
88     pxr_double_conversion::StringToDoubleConverter
89         strToDouble(pxr_double_conversion::DoubleToStringConverter::NO_FLAGS,
90                     /* empty_string_value */ 0,
91                     /* junk_string_value */ 0,
92                     /* infinity symbol */ "inf",
93                     /* nan symbol */ "nan");
94     int numDigits_unused;
95     return strToDouble.StringToDouble(ptr, static_cast<int>(strlen(ptr)), &numDigits_unused);
96 }
97 
98 double
TfStringToDouble(const string & s)99 TfStringToDouble(const string& s)
100 {
101     return TfStringToDouble(s.c_str());
102 }
103 
104 // Convert a sequence of digits in a string to a negative integral value of
105 // signed integral type Int.  Caller is responsible for ensuring that p points
106 // to a valid sequence of digits.  The minus sign '-' may not appear.
107 //
108 // If the resulting value would be less than the minimum representable value,
109 // return that minimum representable value and set *outOfRange to true (if
110 // outOfRange is not NULL).
111 template <class Int>
112 static typename boost::enable_if<boost::is_signed<Int>, Int>::type
_StringToNegative(const char * p,bool * outOfRange)113 _StringToNegative(const char *p, bool *outOfRange)
114 {
115     const Int M = std::numeric_limits<Int>::min();
116     Int result = 0;
117     while (*p >= '0' && *p <= '9') {
118         Int digit = (*p++ - '0');
119         // If the new digit would exceed the range, bail.  The expression below
120         // is equivalent to 'result < (M + digit) / 10', but it avoids division.
121         if (ARCH_UNLIKELY(result < ((M / 10) + (-digit < (M % 10))))) {
122             if (outOfRange)
123                 *outOfRange = true;
124             return M;
125         }
126         result = result * 10 - digit;
127     }
128     return result;
129 }
130 
131 // Convert a sequence of digits in a string to a positive integral value of
132 // integral type Int.  Caller is responsible for ensuring that p points to a
133 // valid sequence of digits.
134 //
135 // If the resulting value would be greater than the maximum representable value,
136 // return that maximum representable value and set *outOfRange to true (if
137 // outOfRange is not NULL).
138 template <class Int>
139 static Int
_StringToPositive(const char * p,bool * outOfRange)140 _StringToPositive(const char *p, bool *outOfRange)
141 {
142     const Int R = 10;
143     const Int M = std::numeric_limits<Int>::max();
144     Int result = 0;
145     while (*p >= '0' && *p <= '9') {
146         Int digit = (*p++ - '0');
147         // If the new digit would exceed the range, bail.  The expression below
148         // is equivalent to 'result > (M - digit) / 10', but it avoids division.
149         if (ARCH_UNLIKELY(result > ((M / R) - (digit > (M % R))))) {
150             if (outOfRange)
151                 *outOfRange = true;
152             return M;
153         }
154         result = result * 10 + digit;
155     }
156     return result;
157 }
158 
159 long
TfStringToLong(const char * p,bool * outOfRange)160 TfStringToLong(const char *p, bool *outOfRange)
161 {
162     if (*p == '-') {
163         ++p;
164         return _StringToNegative<long>(p, outOfRange);
165     }
166     return _StringToPositive<long>(p, outOfRange);
167 }
168 
169 long
TfStringToLong(const std::string & txt,bool * outOfRange)170 TfStringToLong(const std::string &txt, bool *outOfRange)
171 {
172     return TfStringToLong(txt.c_str(), outOfRange);
173 }
174 
175 unsigned long
TfStringToULong(const char * p,bool * outOfRange)176 TfStringToULong(const char *p, bool *outOfRange)
177 {
178     return _StringToPositive<unsigned long>(p, outOfRange);
179 }
180 
181 unsigned long
TfStringToULong(const std::string & txt,bool * outOfRange)182 TfStringToULong(const std::string &txt, bool *outOfRange)
183 {
184     return TfStringToULong(txt.c_str(), outOfRange);
185 }
186 
187 int64_t
TfStringToInt64(const char * p,bool * outOfRange)188 TfStringToInt64(const char *p, bool *outOfRange)
189 {
190     if (*p == '-') {
191         ++p;
192         return _StringToNegative<int64_t>(p, outOfRange);
193     }
194     return _StringToPositive<int64_t>(p, outOfRange);
195 }
196 
197 int64_t
TfStringToInt64(const std::string & txt,bool * outOfRange)198 TfStringToInt64(const std::string &txt, bool *outOfRange)
199 {
200     return TfStringToInt64(txt.c_str(), outOfRange);
201 }
202 
203 uint64_t
TfStringToUInt64(const char * p,bool * outOfRange)204 TfStringToUInt64(const char *p, bool *outOfRange)
205 {
206     return _StringToPositive<uint64_t>(p, outOfRange);
207 }
208 
209 uint64_t
TfStringToUInt64(const std::string & txt,bool * outOfRange)210 TfStringToUInt64(const std::string &txt, bool *outOfRange)
211 {
212     return TfStringToUInt64(txt.c_str(), outOfRange);
213 }
214 
215 bool
TfStringStartsWith(const std::string & s,const TfToken & prefix)216 TfStringStartsWith(const std::string &s, const TfToken& prefix)
217 {
218     return TfStringStartsWith(s, prefix.GetString());
219 }
220 
221 bool
TfStringEndsWith(const std::string & s,const TfToken & suffix)222 TfStringEndsWith(const std::string &s, const TfToken& suffix)
223 {
224     return TfStringEndsWith(s, suffix.GetString());
225 }
226 
227 bool
TfStringContains(const string & s,const char * substring)228 TfStringContains(const string &s, const char *substring)
229 {
230     return s.find(substring) != string::npos;
231 }
232 bool
TfStringContains(const string & s,const TfToken & substring)233 TfStringContains(const string &s, const TfToken &substring)
234 {
235     return TfStringContains(s, substring.GetText());
236 }
237 
238 string
TfStringToLower(const string & source)239 TfStringToLower(const string &source)
240 {
241     string lower;
242     size_t length = source.length();
243 
244     lower.reserve(length);
245     for (size_t i = 0; i < length; i++) {
246         lower += tolower(source[i]);
247     }
248 
249     return lower;
250 }
251 
252 string
TfStringToUpper(const string & source)253 TfStringToUpper(const string &source)
254 {
255     string upper;
256     size_t length = source.length();
257 
258     upper.reserve(length);
259     for (size_t i = 0; i < length; i++) {
260         upper += toupper(source[i]);
261     }
262 
263     return upper;
264 }
265 
266 string
TfStringCapitalize(const string & source)267 TfStringCapitalize(const string& source)
268 {
269     if (source.empty()) {
270         return source;
271     }
272 
273     string result(source);
274     result[0] = toupper(result[0]);
275 
276     return result;
277 }
278 
279 string
TfStringGetCommonPrefix(string a,string b)280 TfStringGetCommonPrefix(string a, string b)
281 {
282     if (b.length() < a.length())
283         b.swap(a);
284 
285     std::pair<string::iterator, string::iterator> it =
286         std::mismatch(a.begin(), a.end(), b.begin());
287 
288     return string(a.begin(), it.first);
289 }
290 
291 string
TfStringGetSuffix(const string & name,char delimiter)292 TfStringGetSuffix(const string& name, char delimiter)
293 {
294     size_t i = name.rfind(delimiter);
295     if (i == string::npos)
296         return "";
297     else
298         return name.substr(i+1);
299 }
300 
301 string
TfStringGetBeforeSuffix(const string & name,char delimiter)302 TfStringGetBeforeSuffix(const string& name, char delimiter)
303 {
304     size_t i = name.rfind(delimiter);
305     if (i == string::npos)
306         return name;
307     else
308         return name.substr(0, i);
309 }
310 
311 string
TfGetBaseName(const string & fileName)312 TfGetBaseName(const string& fileName)
313 {
314     if (fileName.empty())
315         return fileName;
316 #if defined(ARCH_OS_WINDOWS)
317     const string::size_type i = fileName.find_last_of("\\/");
318 #else
319     const string::size_type i = fileName.rfind("/");
320 #endif
321     if (i == fileName.size() - 1)    // ends in directory delimiter
322         return TfGetBaseName(fileName.substr(0, i));
323 #if defined(ARCH_OS_WINDOWS)
324     const std::wstring wfileName{ ArchWindowsUtf8ToUtf16(fileName) };
325     LPWSTR result = PathFindFileNameW(wfileName.c_str());
326 
327     // If PathFindFilename returns the same string back, that means it didn't
328     // do anything.  That could mean that the patch has no basename, in which
329     // case we want to return the empty string, or it could mean that the
330     // fileName was already basename, in which case we want to return the
331     // string back.
332     if (result == wfileName.c_str()) {
333         const bool hasDriveLetter = fileName.find(":") != string::npos;
334         const bool hasPathSeparator  = i != string::npos;
335         if (hasDriveLetter || hasPathSeparator) {
336             return std::string();
337         }
338     }
339     return ArchWindowsUtf16ToUtf8(result);
340 
341 #else
342     if (i == string::npos)                      // no / in name
343         return fileName;
344     else
345         return fileName.substr(i+1);
346 #endif
347 }
348 
349 string
TfGetPathName(const string & fileName)350 TfGetPathName(const string& fileName)
351 {
352 #if defined(ARCH_OS_WINDOWS)
353     size_t i = fileName.find_last_of("\\/:");
354 #else
355     size_t i = fileName.rfind("/");
356 #endif
357     if (i == string::npos)                          // no / in name
358         return "";
359     else
360         return fileName.substr(0, i+1);
361 }
362 
363 string
TfStringTrimRight(const string & s,const char * trimChars)364 TfStringTrimRight(const string& s, const char* trimChars)
365 {
366     return s.substr(0, s.find_last_not_of(trimChars) + 1);
367 }
368 
369 string
TfStringTrimLeft(const string & s,const char * trimChars)370 TfStringTrimLeft(const string &s, const char* trimChars)
371 {
372     string::size_type i = s.find_first_not_of(trimChars);
373     return (i == string::npos) ? string() : s.substr(i);
374 }
375 
376 string
TfStringTrim(const string & s,const char * trimChars)377 TfStringTrim(const string &s, const char* trimChars)
378 {
379     string::size_type i = s.find_first_not_of(trimChars);
380     string tmp = (i == string::npos) ? string() : s.substr(i);
381     return tmp.substr( 0, tmp.find_last_not_of(trimChars) + 1);
382 }
383 
384 string
TfStringReplace(const string & source,const string & from,const string & to)385 TfStringReplace(const string& source, const string& from, const string& to)
386 {
387     if (from.empty() || from == to) {
388         return source;
389     }
390 
391     string result = source;
392     string::size_type pos = 0;
393 
394     while ((pos = result.find(from, pos)) != string::npos) {
395         result.replace(pos, from.size(), to);
396         pos += to.size();
397     }
398     return result;
399 }
400 
401 string
TfStringJoin(const vector<string> & strings,const char * separator)402 TfStringJoin(const vector<string>& strings, const char* separator)
403 {
404     return TfStringJoin(strings.begin(), strings.end(), separator);
405 }
406 
407 string
TfStringJoin(const set<string> & strings,const char * separator)408 TfStringJoin(const set<string>& strings, const char* separator)
409 {
410     return TfStringJoin(strings.begin(), strings.end(), separator);
411 }
412 
413 static inline
_TokenizeToSegments(string const & src,char const * delimiters,vector<pair<char const *,char const * >> & segments)414 void _TokenizeToSegments(string const &src, char const *delimiters,
415                          vector<pair<char const *, char const *> > &segments)
416 {
417     // Delimiter checking LUT.
418     // NOTE: For some reason, calling memset here is faster than doing the
419     // aggregate initialization.  Beats me.  Ask gcc.  (10/07)
420     char _isDelim[256]; // = {0};
421     memset(_isDelim, 0, sizeof(_isDelim));
422     for (char const *p = delimiters; *p; ++p)
423         _isDelim[static_cast<unsigned char>(*p)] = 1;
424 
425 #define IS_DELIMITER(c) (_isDelim[static_cast<unsigned char>(c)])
426 
427     // First build a vector of segments.  A segment is a pair of pointers into
428     // \a src's data, the first indicating the start of a token, the second
429     // pointing one past the last character of a token (like a pair of
430     // iterators).
431 
432     // A small amount of reservation seems to help.
433     segments.reserve(8);
434     char const *end = src.data() + src.size();
435     for (char const *c = src.data(); c < end; ++c) {
436         // skip delimiters
437         if (IS_DELIMITER(*c))
438             continue;
439         // have a token until the next delimiter.
440         // push back a new segment, but we only know the begin point yet.
441         segments.push_back(make_pair(c, c));
442         for (++c; c != end; ++c)
443             if (IS_DELIMITER(*c))
444                 break;
445         // complete the segment with the end point.
446         segments.back().second = c;
447     }
448 
449 #undef IS_DELIMITER
450 }
451 
452 vector<string>
TfStringSplit(string const & src,string const & separator)453 TfStringSplit(string const &src, string const &separator)
454 {
455     vector<string> split;
456 
457     if (src.empty())
458         return split;
459 
460     // XXX python throws a ValueError in this case, we exit silently.
461     if (separator.empty())
462         return split;
463 
464     size_t from=0;
465     size_t pos=0;
466 
467     while (true) {
468         pos = src.find(separator, from);
469         if (pos == string::npos)
470             break;
471         split.push_back(src.substr(from, pos-from));
472         from = pos + separator.size();
473     }
474 
475     // Also add the 'last' substring
476     split.push_back(src.substr(from));
477 
478     return split;
479 }
480 
481 vector<string>
TfStringTokenize(string const & src,const char * delimiters)482 TfStringTokenize(string const &src, const char* delimiters)
483 {
484     vector<pair<char const *, char const *> > segments;
485     _TokenizeToSegments(src, delimiters, segments);
486 
487     // Construct strings into the result vector from the segments of src.
488     vector<string> ret(segments.size());
489     for (size_t i = 0; i != segments.size(); ++i)
490         ret[i].append(segments[i].first, segments[i].second);
491     return ret;
492 }
493 
494 set<string>
TfStringTokenizeToSet(string const & src,const char * delimiters)495 TfStringTokenizeToSet(string const &src, const char* delimiters)
496 {
497     vector<pair<char const *, char const *> > segments;
498     _TokenizeToSegments(src, delimiters, segments);
499 
500     // Construct strings from the segments and insert them into the result.
501     set<string> ret;
502     for (size_t i = 0; i != segments.size(); ++i)
503         ret.insert(string(segments[i].first, segments[i].second));
504 
505     return ret;
506 }
507 
508 static size_t
_FindFirstOfNotEscaped(const string & source,const char * toFind,size_t offset)509 _FindFirstOfNotEscaped(const string &source, const char *toFind, size_t offset)
510 {
511     size_t pos = source.find_first_of(toFind, offset);
512 
513     while(pos != 0 && pos != string::npos && source[pos - 1] == '\\') {
514         pos = source.find_first_of(toFind, pos + 1);
515     }
516 
517     return pos;
518 }
519 
520 vector<string>
TfQuotedStringTokenize(const string & source,const char * delimiters,string * errors)521 TfQuotedStringTokenize(const string &source, const char *delimiters,
522                        string *errors)
523 {
524     vector<string> resultVec;
525     size_t j, quoteIndex, delimIndex;
526     const char *quotes = "\"\'`";
527     string token;
528 
529     if (strpbrk(delimiters, quotes) != NULL) {
530         if (errors != NULL)
531             *errors = "Cannot use quotes as delimiters.";
532 
533         return resultVec;
534     }
535 
536     string quote;
537     for (size_t i = 0; i < source.length();) {
538         // Eat leading delimiters.
539         i = source.find_first_not_of(delimiters, i);
540 
541         if (i == string::npos) {
542             // Nothing left but delimiters.
543             break;
544         }
545 
546         quote.erase();
547         token.erase();
548 
549         while ((quoteIndex = _FindFirstOfNotEscaped(source, quotes, i)) <
550                (delimIndex = source.find_first_of(delimiters, i))) {
551 
552             // Push the token from 'i' until the first quote.
553             if (i < quoteIndex)
554                 token += source.substr(i, quoteIndex - i);
555 
556             // Find matching quote. Again, we skip quotes that have been
557             // escaped with a preceding backslash.
558             j = quoteIndex;
559             quote = source[j];
560             j = _FindFirstOfNotEscaped(source, quote.c_str(), j + 1);
561 
562             // If we've reached the end of the string, then we are
563             // missing an end-quote.
564             if (j == string::npos) {
565                 if (errors != NULL) {
566                     *errors = TfStringPrintf(
567                         "String is missing an end-quote (\'%s\'): %s",
568                         quote.c_str(), source.c_str());
569                 }
570                 resultVec.clear();
571                 return resultVec;
572             }
573 
574             // Push the token between the quotes.
575             if (quoteIndex + 1 < j)
576                 token += source.substr(quoteIndex + 1, j - (quoteIndex + 1));
577 
578             // Advance past the quote.
579             i = j + 1;
580         }
581 
582         // Push token.
583         if (delimIndex == string::npos)
584             token += source.substr(i);
585         else
586             token += source.substr(i, delimIndex - i);
587 
588         // If there are quote characters in 'token', we strip away any
589         // preceding backslash before adding it to our results.
590         for(size_t q = 0; q < strlen(quotes); ++ q)
591             token = TfStringReplace(token, string("\\") + quotes[q],
592                                     string() + quotes[q]);
593 
594         resultVec.push_back(token);
595 
596         if (delimIndex == string::npos)
597             break;
598         else {
599             // Set up for next loop.
600             i = delimIndex + 1;
601         }
602     }
603     return resultVec;
604 }
605 
606 vector<string>
TfMatchedStringTokenize(const string & source,char openDelimiter,char closeDelimiter,char escapeCharacter,string * errors)607 TfMatchedStringTokenize(const string& source,
608                         char openDelimiter,
609                         char closeDelimiter,
610                         char escapeCharacter,
611                         string *errors)
612 {
613     vector<string> resultVec;
614 
615     if ((escapeCharacter == openDelimiter) ||
616         (escapeCharacter == closeDelimiter)) {
617         if (errors != NULL)
618             *errors = "Escape character cannot be a delimiter.";
619         return resultVec;
620     }
621 
622     // If a close delimiter appears before an open delimiter, and it's not
623     // preceded by the escape character, we have mismatched delimiters.
624     size_t closeIndex = source.find(closeDelimiter);
625     if ((closeIndex != string::npos) &&
626         ((closeIndex == 0) || (source[closeIndex - 1] != escapeCharacter)) &&
627         (closeIndex < source.find(openDelimiter))) {
628         if (errors != NULL) {
629             *errors = TfStringPrintf(
630                     "String has unmatched close delimiter ('%c', '%c'): %s",
631                     openDelimiter, closeDelimiter, source.c_str());
632         }
633         return resultVec;
634     }
635 
636     bool sameDelimiters = (openDelimiter == closeDelimiter);
637 
638     string specialChars;
639     if (escapeCharacter != '\0')
640         specialChars += escapeCharacter;
641 
642     specialChars += openDelimiter;
643     if (!sameDelimiters)
644         specialChars += closeDelimiter;
645 
646     size_t openIndex = 0, nextIndex = 0;
647     size_t openCount, closeCount;
648     size_t sourceSize = source.size();
649 
650     while((openIndex = source.find(openDelimiter, openIndex)) != string::npos) {
651         openCount = 1;
652         closeCount = 0;
653         nextIndex = openIndex;
654 
655         string token;
656         while(closeCount != openCount) {
657             nextIndex = source.find_first_of(specialChars, nextIndex + 1);
658             if(nextIndex == string::npos) {
659                 if (errors != NULL) {
660                     *errors = TfStringPrintf(
661                       "String has unmatched open delimiter ('%c', '%c'): %s",
662                       openDelimiter, closeDelimiter, source.c_str());
663                 }
664                 resultVec.clear();
665                 return resultVec;
666             }
667 
668             if (source[nextIndex] == escapeCharacter) {
669                 // Get character immediately after the escape character.
670                 size_t index = nextIndex + 1;
671                 if (index < sourceSize - 1) {
672                     // Add the substring to 'token'. We remove the escape
673                     // character but add the character immediately after it.
674                     token += source.substr(openIndex + 1,
675                                            nextIndex - openIndex - 1) +
676                         source[index];
677 
678                     // Reset indices for the next iteration.
679                     openIndex = index;
680                     nextIndex = index;
681                 }
682             }
683             else if (!sameDelimiters && (source[nextIndex] == openDelimiter))
684                 openCount ++;
685             else
686                 closeCount ++;
687         }
688 
689         if (nextIndex > openIndex + 1)
690             token += source.substr(openIndex + 1, nextIndex - openIndex - 1);
691 
692         resultVec.push_back(token);
693         openIndex = nextIndex + 1;
694     }
695 
696     // If a close delimiter appears after our last token, we have mismatched
697     // delimiters.
698     closeIndex = source.find(closeDelimiter, nextIndex + 1);
699     if ((closeIndex != string::npos) &&
700         (source[closeIndex - 1] != escapeCharacter)) {
701         if (errors != NULL) {
702             *errors = TfStringPrintf(
703                     "String has unmatched close delimiter ('%c', '%c'): %s",
704                     openDelimiter, closeDelimiter, source.c_str());
705         }
706         resultVec.clear();
707         return resultVec;
708     }
709 
710     return resultVec;
711 }
712 
713 namespace { // helpers for DictionaryLess
714 
IsDigit(char ch)715 inline bool IsDigit(char ch) { return '0' <= ch && ch <= '9'; }
Lower(char ch)716 inline char Lower(char ch) { return ('A' <= ch && ch <= 'Z') ? ch | 32 : ch; }
717 
718 inline long
AtoL(char const * & s)719 AtoL(char const * &s)
720 {
721     long value = 0;
722     do {
723         value = value * 10 + (*s++ - '0');
724     } while (IsDigit(*s));
725     return value;
726 }
727 
728 } // anon
729 
730 static bool
DictionaryLess(char const * l,char const * r)731 DictionaryLess(char const *l, char const *r)
732 {
733     int caseCmp = 0;
734     int leadingZerosCmp = 0;
735 
736     while (*l && *r) {
737         if (ARCH_UNLIKELY(IsDigit(*l) && IsDigit(*r))) {
738             char const *oldL = l, *oldR = r;
739             long lval = AtoL(l), rval = AtoL(r);
740             if (lval != rval)
741                 return lval < rval;
742             // Leading zeros difference only, record for later use.
743             if (!leadingZerosCmp)
744                 leadingZerosCmp = static_cast<int>((l-oldL) - (r-oldR));
745             continue;
746         }
747 
748         if (*l != *r) {
749             int lowL = Lower(*l), lowR = Lower(*r);
750             if (lowL != lowR)
751                 return lowL < lowR;
752 
753             // Case difference only, record that for later use.
754             if (!caseCmp)
755                 caseCmp = (lowL != *l) ? -1 : 1;
756         }
757 
758         ++l, ++r;
759     }
760 
761     // We are at the end of either one or both strings.  If not both, the
762     // shorter is considered less.
763     if (*l || *r)
764         return !*l;
765 
766     // Otherwise we look to differences in case or leading zeros, preferring
767     // leading zeros.
768     return (leadingZerosCmp < 0) || (caseCmp < 0);
769 }
770 
771 bool
operator ()(const string & lhs,const string & rhs) const772 TfDictionaryLessThan::operator()(const string& lhs, const string& rhs) const
773 {
774     return DictionaryLess(lhs.c_str(), rhs.c_str());
775 }
776 
777 std::string
TfStringify(bool v)778 TfStringify(bool v)
779 {
780     return (v ? "true" : "false");
781 }
782 
783 std::string
TfStringify(std::string const & s)784 TfStringify(std::string const& s)
785 {
786     return s;
787 }
788 
789 static
790 const
791 pxr_double_conversion::DoubleToStringConverter&
Tf_GetDoubleToStringConverter()792 Tf_GetDoubleToStringConverter()
793 {
794     static const pxr_double_conversion::DoubleToStringConverter conv(
795         pxr_double_conversion::DoubleToStringConverter::NO_FLAGS,
796         "inf",
797         "nan",
798         'e',
799         /* decimal_in_shortest_low */ -6,
800         /* decimal_in_shortest_high */ 15,
801         /* max_leading_padding_zeroes_in_precision_mode */ 0,
802         /* max_trailing_padding_zeroes_in_precision_mode */ 0);
803 
804     return conv;
805 }
806 
807 void
Tf_ApplyDoubleToStringConverter(float val,char * buffer,int bufferSize)808 Tf_ApplyDoubleToStringConverter(float val, char* buffer, int bufferSize)
809 {
810     const auto& conv = Tf_GetDoubleToStringConverter();
811     pxr_double_conversion::StringBuilder builder(buffer, bufferSize);
812     // This should only fail if we provide an insufficient buffer.
813     TF_VERIFY(conv.ToShortestSingle(val, &builder),
814               "double_conversion failed");
815 }
816 
817 void
Tf_ApplyDoubleToStringConverter(double val,char * buffer,int bufferSize)818 Tf_ApplyDoubleToStringConverter(double val, char* buffer, int bufferSize)
819 {
820     const auto& conv = Tf_GetDoubleToStringConverter();
821     pxr_double_conversion::StringBuilder builder(buffer, bufferSize);
822     // This should only fail if we provide an insufficient buffer.
823     TF_VERIFY(conv.ToShortest(val, &builder),
824               "double_conversion failed");
825 }
826 
827 std::string
TfStringify(float val)828 TfStringify(float val)
829 {
830     constexpr int bufferSize = 128;
831     char buffer[bufferSize];
832     Tf_ApplyDoubleToStringConverter(val, buffer, bufferSize);
833     return std::string(buffer);
834 }
835 
836 bool
TfDoubleToString(double val,char * buffer,int bufferSize,bool emitTrailingZero)837 TfDoubleToString(
838     double val, char* buffer, int bufferSize, bool emitTrailingZero)
839 {
840     if (bufferSize < 25) {
841         return false;
842     }
843     using DSC = pxr_double_conversion::DoubleToStringConverter;
844     int flags = DSC::NO_FLAGS;
845     if (emitTrailingZero) {
846         flags = DSC::EMIT_TRAILING_DECIMAL_POINT
847             | DSC::EMIT_TRAILING_ZERO_AFTER_POINT;
848     }
849     const DSC conv(
850         flags,
851         "inf",
852         "nan",
853         'e',
854         /* decimal_in_shortest_low */ -6,
855         /* decimal_in_shortest_high */ 15,
856         /* max_leading_padding_zeroes_in_precision_mode */ 0,
857         /* max_trailing_padding_zeroes_in_precision_mode */ 0);
858     pxr_double_conversion::StringBuilder builder(buffer, bufferSize);
859     // This should only fail if we provide an insufficient buffer.
860     return conv.ToShortest(val, &builder);
861 }
862 
863 std::string
TfStringify(double val)864 TfStringify(double val)
865 {
866     constexpr int bufferSize = 128;
867     char buffer[bufferSize];
868     Tf_ApplyDoubleToStringConverter(val, buffer, bufferSize);
869     return std::string(buffer);
870 }
871 
872 std::ostream&
operator <<(std::ostream & o,TfStreamFloat t)873 operator<<(std::ostream& o, TfStreamFloat t)
874 {
875     constexpr int bufferSize = 128;
876     char buffer[bufferSize];
877     Tf_ApplyDoubleToStringConverter(t.value, buffer, bufferSize);
878     return o << buffer;
879 }
880 
881 std::ostream&
operator <<(std::ostream & o,TfStreamDouble t)882 operator<<(std::ostream& o, TfStreamDouble t)
883 {
884     constexpr int bufferSize = 128;
885     char buffer[bufferSize];
886     Tf_ApplyDoubleToStringConverter(t.value, buffer, bufferSize);
887     return o << buffer;
888 }
889 
890 template <>
891 bool
TfUnstringify(const std::string & instring,bool *)892 TfUnstringify(const std::string &instring, bool*)
893 {
894     return (strcmp(instring.c_str(), "true") == 0) ||
895            (strcmp(instring.c_str(), "1") == 0) ||
896            (strcmp(instring.c_str(), "yes") == 0) ||
897            (strcmp(instring.c_str(), "on") == 0);
898 }
899 
900 template <>
901 std::string
TfUnstringify(std::string const & s,bool *)902 TfUnstringify(std::string const& s, bool*)
903 {
904     return s;
905 }
906 
907 string
TfStringGlobToRegex(const string & s)908 TfStringGlobToRegex(const string& s)
909 {
910     // Replace '.' by '\.', then '*' by '.*', and '?' by '.'
911     // TODO: could handle {,,} and do (||), although these are not part of the
912     // glob syntax.
913     string ret(s);
914     ret = TfStringReplace( ret, ".", "\\." );
915     ret = TfStringReplace( ret, "*", ".*"  );
916     ret = TfStringReplace( ret, "?", "."   );
917     return ret;
918 }
919 
920 /*
921 ** Process escape sequences in ANSI C string constants. Ignores illegal
922 ** escape sequence. Adapted from Duff code.
923 */
924 static bool
_IsOctalDigit(const char c)925 _IsOctalDigit(const char c)
926 {
927     return (('0' <= c) && (c <= '7'));
928 }
929 
930 static unsigned char
_OctalToDecimal(const char c)931 _OctalToDecimal(const char c)
932 {
933     return (c - '0');
934 }
935 
936 static unsigned char
_HexToDecimal(const char c)937 _HexToDecimal(const char c)
938 {
939          if (('a' <= c) && (c <= 'f')) return ((c - 'a') + 10);
940     else if (('A' <= c) && (c <= 'F')) return ((c - 'A') + 10);
941 
942     return (c - '0');
943 }
944 
945 void
TfEscapeStringReplaceChar(const char ** c,char ** out)946 TfEscapeStringReplaceChar(const char** c, char** out)
947 {
948     switch (*++(*c))
949     {
950         default:  *(*out)++ = **c; break;
951         case '\\': *(*out)++ = '\\'; break; // backslash
952         case 'a': *(*out)++ = '\a'; break; // bel
953         case 'b': *(*out)++ = '\b'; break; // bs
954         case 'f': *(*out)++ = '\f'; break; // np
955         case 'n': *(*out)++ = '\n'; break; // nl
956         case 'r': *(*out)++ = '\r'; break; // cr
957         case 't': *(*out)++ = '\t'; break; // ht
958         case 'v': *(*out)++ = '\v'; break; // vt
959         case 'x':
960         {
961             // Allow only up to 2 hex digits.
962             unsigned char n = 0;
963             for (int nd = 0; isxdigit(*++(*c)) && nd != 2; ++nd) {
964                 n = (n * 16) + _HexToDecimal(**c);
965             }
966             --(*c);
967             *(*out)++ = n;
968             break;
969         }
970         case '0': case '1': case '2': case '3':
971         case '4': case '5': case '6': case '7':
972         {
973             // Allow only up to 3 octal digits.
974             --(*c);
975             unsigned char n = 0;
976             for (int nd = 0; _IsOctalDigit(*++(*c)) && nd != 3; ++nd) {
977                 n = (n * 8) + _OctalToDecimal(**c);
978             }
979             --(*c);
980             *(*out)++ = n;
981             break;
982         }
983     }
984 }
985 
986 std::string
TfEscapeString(const std::string & in)987 TfEscapeString(const std::string &in)
988 {
989     // We use type char and a deleter for char[] instead of just using
990     // type char[] due to a (now fixed) bug in libc++ in LLVM.  See
991     // https://llvm.org/bugs/show_bug.cgi?id=18350.
992     std::unique_ptr<char,
993                     std::default_delete<char[]>> out(new char[in.size()+1]);
994     char *outp = out.get();
995 
996     for (const char *c = in.c_str(); *c; ++c)
997     {
998         if (*c != '\\') {
999             *outp++ = *c;
1000             continue;
1001         }
1002         TfEscapeStringReplaceChar(&c,&outp);
1003 
1004     }
1005     *outp++ = '\0';
1006     return std::string(out.get(), outp - out.get() - 1);
1007 }
1008 
1009 string
TfStringCatPaths(const string & prefix,const string & suffix)1010 TfStringCatPaths( const string &prefix, const string &suffix )
1011 {
1012     return TfNormPath(prefix + "/" + suffix);
1013 }
1014 
1015 std::string
TfMakeValidIdentifier(const std::string & in)1016 TfMakeValidIdentifier(const std::string &in)
1017 {
1018     std::string result;
1019 
1020     if (in.empty()) {
1021         result.push_back('_');
1022         return result;
1023     }
1024 
1025     result.reserve(in.size());
1026     char const *p = in.c_str();
1027     if (!(('a' <= *p && *p <= 'z') ||
1028           ('A' <= *p && *p <= 'Z') ||
1029           *p == '_')) {
1030         result.push_back('_');
1031     } else {
1032         result.push_back(*p);
1033     }
1034 
1035     for (++p; *p; ++p) {
1036         if (!(('a' <= *p && *p <= 'z') ||
1037               ('A' <= *p && *p <= 'Z') ||
1038               ('0' <= *p && *p <= '9') ||
1039               *p == '_')) {
1040             result.push_back('_');
1041         } else {
1042             result.push_back(*p);
1043         }
1044     }
1045     return result;
1046 }
1047 
1048 std::string
TfGetXmlEscapedString(const std::string & in)1049 TfGetXmlEscapedString(const std::string &in)
1050 {
1051     if (in.find_first_of("&<>\"'") == std::string::npos)
1052         return in;
1053 
1054     std::string result;
1055 
1056     result = TfStringReplace(in,     "&",  "&amp;");
1057     result = TfStringReplace(result, "<",  "&lt;");
1058     result = TfStringReplace(result, ">",  "&gt;");
1059     result = TfStringReplace(result, "\"", "&quot;");
1060     result = TfStringReplace(result, "'",  "&apos;");
1061 
1062     return result;
1063 }
1064 
1065 PXR_NAMESPACE_CLOSE_SCOPE
1066