1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4 
5 // clang-format off
6 
7 /////////////////////////////////////////////////////////////////////////
8 /// @file  strutil.h
9 ///
10 /// @brief String-related utilities, all in namespace Strutil.
11 /////////////////////////////////////////////////////////////////////////
12 
13 
14 
15 #pragma once
16 
17 #include <cstdio>
18 #include <map>
19 #include <sstream>
20 #include <string>
21 #include <vector>
22 
23 #include <OpenImageIO/export.h>
24 #include <OpenImageIO/hash.h>
25 #include <OpenImageIO/oiioversion.h>
26 #include <OpenImageIO/platform.h>
27 #include <OpenImageIO/string_view.h>
28 
29 #include <OpenImageIO/detail/farmhash.h>
30 
31 #if OIIO_GNUC_VERSION >= 70000
32 #    pragma GCC diagnostic push
33 #    pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
34 #endif
35 #ifndef FMT_HEADER_ONLY
36 #    define FMT_HEADER_ONLY
37 #endif
38 #ifndef FMT_EXCEPTIONS
39 #    define FMT_EXCEPTIONS 0
40 #endif
41 #ifndef FMT_USE_GRISU
42 #    define FMT_USE_GRISU 1
43 #endif
44 #include "detail/fmt/ostream.h"
45 #include "detail/fmt/format.h"
46 #include "detail/fmt/printf.h"
47 #if OIIO_GNUC_VERSION >= 70000
48 #    pragma GCC diagnostic pop
49 #endif
50 
51 // Allow client software to know if this version of OIIO as Strutil::sprintf
52 #define OIIO_HAS_SPRINTF 1
53 
54 // Allow client software to know if this version of OIIO has Strutil::format
55 // behave like sprintf (OIIO_FORMAT_IS_FMT==0) or like python / {fmt} /
56 // C++20ish std::format (OIIO_FORMAT_IS_FMT==1).
57 #define OIIO_FORMAT_IS_FMT 0
58 
59 // Allow client software to know that at this moment, the fmt-based string
60 // formatting is locale-independent. This was 0 in older versions when fmt
61 // was locale dependent.
62 #define OIIO_FMT_LOCALE_INDEPENDENT 1
63 
64 
65 
66 OIIO_NAMESPACE_BEGIN
67 /// @namespace Strutil
68 ///
69 /// @brief     String-related utilities.
70 namespace Strutil {
71 
72 /// Output the string to the file/stream in a synchronized fashion, so that
73 /// buffers are flushed and internal mutex is used to prevent threads from
74 /// clobbering each other -- output strings coming from concurrent threads
75 /// may be interleaved, but each string is "atomic" and will never splice
76 /// each other character-by-character.
77 void OIIO_API sync_output (FILE *file, string_view str);
78 void OIIO_API sync_output (std::ostream &file, string_view str);
79 
80 
81 /// Construct a std::string in a printf-like fashion.  For example:
82 ///
83 ///    std::string s = Strutil::sprintf ("blah %d %g", (int)foo, (float)bar);
84 ///
85 /// Uses the fmt library underneath, so it's fully type-safe, and
86 /// works with any types that understand stream output via '<<'.
87 /// The formatting of the string will always use the classic "C" locale
88 /// conventions (in particular, '.' as decimal separator for float values).
89 template<typename... Args>
sprintf(const char * fmt,const Args &...args)90 inline std::string sprintf (const char* fmt, const Args&... args)
91 {
92     return ::fmt::sprintf (fmt, args...);
93 }
94 
95 
96 
97 /// format() constructs formatted strings. Note that this is in transition!
98 ///
99 /// Strutil::old::format() uses printf conventions and matches format() used
100 /// in OIIO 1.x. It is equivalent to Strutil::sprintf().
101 ///
102 ///    std::string s = Strutil::old::sprintf ("blah %d %g", (int)foo, (float)bar);
103 ///
104 /// Strutil::fmt::format() uses "Python" conventions, in the style of string
105 /// formatting used by C++20 std::format and implemented today in the {fmt}
106 /// package (https://github.com/fmtlib/fmt). For example:
107 ///
108 ///    std::string s = Strutil::format ("blah {}  {}", (int)foo, (float)bar);
109 ///
110 /// Straight-up Strutil::format is today aliased to old::format for the sake
111 /// of back-compatibility, but will someday be switched to fmt::format.
112 ///
113 /// Recommended strategy for users:
114 /// * If you want printf conventions, switch to Strutil::sprintf().
115 /// * If you want to use the python conventions prior to the big switch,
116 ///   use Strutil::fmt::format() explicitly (but see the caveat below).
117 /// * Use of unspecified Strutil::format() is, for back compatibility,
118 ///   currently equivalent to sprintf, but beware that some point it will
119 ///   switch to the future-standard formatting rules.
120 ///
121 
122 namespace fmt {
123 template<typename... Args>
format(const char * fmt,const Args &...args)124 inline std::string format (const char* fmt, const Args&... args)
125 {
126     return ::fmt::format (fmt, args...);
127 }
128 } // namespace fmt
129 
130 namespace old {
131 template<typename... Args>
format(const char * fmt,const Args &...args)132 inline std::string format (const char* fmt, const Args&... args)
133 {
134     return Strutil::sprintf (fmt, args...);
135 }
136 
137 // DEPRECATED(2.0) string_view version. Phasing this out because
138 // std::string_view won't have a c_str() method.
139 template<typename... Args>
format(string_view fmt,const Args &...args)140 inline std::string format (string_view fmt, const Args&... args)
141 {
142     return format (fmt.c_str(), args...);
143 }
144 } // namespace old
145 
146 
147 
148 using old::format;
149 
150 
151 
152 /// Strutil::print (fmt, ...)
153 /// Strutil::fprint (FILE*, fmt, ...)
154 /// Strutil::fprint (ostream& fmt, ...)
155 ///
156 /// Output formatted strings to stdout, a FILE*, or a stream, respectively.
157 /// All use printf-like formatting rules, are type-safe, are thread-safe
158 /// (the outputs are "atomic", at least versus other calls to
159 /// Strutil::*printf), and automatically flush their outputs. They are all
160 /// locale-independent (forcing classic "C" locale).
161 
162 template<typename... Args>
printf(const char * fmt,const Args &...args)163 inline void printf (const char* fmt, const Args&... args)
164 {
165     sync_output (stdout, Strutil::sprintf(fmt, args...));
166 }
167 
168 template<typename... Args>
fprintf(FILE * file,const char * fmt,const Args &...args)169 inline void fprintf (FILE *file, const char* fmt, const Args&... args)
170 {
171     sync_output (file, Strutil::sprintf(fmt, args...));
172 }
173 
174 template<typename... Args>
fprintf(std::ostream & file,const char * fmt,const Args &...args)175 inline void fprintf (std::ostream &file, const char* fmt, const Args&... args)
176 {
177     sync_output (file, Strutil::sprintf(fmt, args...));
178 }
179 
180 
181 
182 /// Strutil::print (fmt, ...)
183 /// Strutil::print (FILE*, fmt, ...)
184 /// Strutil::print (ostream& fmt, ...)
185 ///
186 /// Output formatted strings to stdout, a FILE*, or a stream, respectively.
187 /// All use "Python-like" formatting description (as {fmt} does, and some
188 /// day, std::format), are type-safe, are thread-safe (the outputs are
189 /// "atomic", at least versus other calls to Strutil::*printf), and
190 /// automatically flush their outputs. They are all locale-independent by
191 /// default (use {:n} for locale-aware formatting).
192 
193 template<typename... Args>
print(const char * fmt,const Args &...args)194 inline void print (const char* fmt, const Args&... args)
195 {
196     sync_output (stdout, Strutil::fmt::format(fmt, args...));
197 }
198 
199 template<typename... Args>
print(FILE * file,const char * fmt,const Args &...args)200 inline void print (FILE *file, const char* fmt, const Args&... args)
201 {
202     sync_output (file, Strutil::fmt::format(fmt, args...));
203 }
204 
205 template<typename... Args>
print(std::ostream & file,const char * fmt,const Args &...args)206 inline void print (std::ostream &file, const char* fmt, const Args&... args)
207 {
208     sync_output (file, Strutil::fmt::format(fmt, args...));
209 }
210 
211 
212 
213 
214 /// Return a std::string formatted from printf-like arguments -- passed
215 /// already as a va_list.  This is not guaranteed type-safe and is not
216 /// extensible like format(). Use with caution!
217 std::string OIIO_API vsprintf (const char *fmt, va_list ap)
218 #if defined(__GNUC__) && !defined(__CUDACC__)
219     __attribute__ ((format (printf, 1, 0) ))
220 #endif
221     ;
222 
223 /// Return a std::string formatted like Strutil::format, but passed
224 /// already as a va_list.  This is not guaranteed type-safe and is not
225 /// extensible like format(). Use with caution!
226 OIIO_DEPRECATED("use `vsprintf` instead")
227 std::string OIIO_API vformat (const char *fmt, va_list ap)
228 #if defined(__GNUC__) && !defined(__CUDACC__)
229     __attribute__ ((format (printf, 1, 0) ))
230 #endif
231     ;
232 
233 /// Return a string expressing a number of bytes, in human readable form.
234 ///  - memformat(153)           -> "153 B"
235 ///  - memformat(15300)         -> "14.9 KB"
236 ///  - memformat(15300000)      -> "14.6 MB"
237 ///  - memformat(15300000000LL) -> "14.2 GB"
238 std::string OIIO_API memformat (long long bytes, int digits=1);
239 
240 /// Return a string expressing an elapsed time, in human readable form.
241 /// e.g. "0:35.2"
242 std::string OIIO_API timeintervalformat (double secs, int digits=1);
243 
244 
245 /// Get a map with RESTful arguments extracted from the given string 'str'.
246 /// Add it into the 'result' argument (Warning: the 'result' argument may
247 /// be changed even if 'get_rest_arguments ()' return an error!).
248 /// Return true on success, false on error.
249 /// Acceptable forms:
250 ///  - text?arg1=val1&arg2=val2...
251 ///  - ?arg1=val1&arg2=val2...
252 /// Everything before question mark will be saved into the 'base' argument.
253 bool OIIO_API get_rest_arguments (const std::string &str, std::string &base,
254                                    std::map<std::string, std::string> &result);
255 
256 /// Take a string that may have embedded newlines, tabs, etc., and turn
257 /// those characters into escape sequences like `\n`, `\t`, `\v`, `\b`,
258 /// `\r`, `\f`, `\a`, `\\`, `\"`.
259 std::string OIIO_API escape_chars (string_view unescaped);
260 
261 /// Take a string that has embedded escape sequences (`\\`, `\"`, `\n`,
262 /// etc.) and collapse them into the 'real' characters.
263 std::string OIIO_API unescape_chars (string_view escaped);
264 
265 /// Word-wrap string `src` to no more than `columns` width, starting with an
266 /// assumed position of `prefix` on the first line and intending by `prefix`
267 /// blanks before all lines other than the first.
268 ///
269 /// Words may be split AT any characters in `sep` or immediately AFTER any
270 /// characters in `presep`. After the break, any extra `sep` characters will
271 /// be deleted.
272 ///
273 /// By illustration,
274 ///     wordwrap("0 1 2 3 4 5 6 7 8", 10, 4)
275 /// should return:
276 ///     "0 1 2\n    3 4 5\n    6 7 8"
277 std::string OIIO_API wordwrap (string_view src, int columns = 80,
278                                int prefix = 0, string_view sep = " ",
279                                string_view presep = "");
280 
281 
282 /// Our favorite "string" hash of a length of bytes. Currently, it is just
283 /// a wrapper for an inlined, constexpr (if C++ >= 14), Cuda-safe farmhash.
284 inline OIIO_CONSTEXPR14 size_t
strhash(size_t len,const char * s)285 strhash (size_t len, const char *s)
286 {
287     return OIIO::farmhash::inlined::Hash(s, len);
288 }
289 
290 
291 /// Hash a string_view. This is OIIO's default favorite string hasher.
292 /// Currently, it uses farmhash, is constexpr (for C++14), and works in
293 /// Cuda. This is rigged, though, so that empty strings hash always hash to
294 /// 0 (that isn't would a raw farmhash would give you, but it's a useful
295 /// property, especially for trivial initialization).
296 inline OIIO_CONSTEXPR14 size_t
strhash(string_view s)297 strhash (string_view s)
298 {
299     return s.length() ? strhash(s.length(), s.data()) : 0;
300 }
301 
302 
303 
304 /// Case-insensitive comparison of strings.  For speed, this always uses
305 /// a static locale that doesn't require a mutex.
306 bool OIIO_API iequals (string_view a, string_view b);
307 
308 /// Case-insensitive ordered comparison of strings.  For speed, this always
309 /// uses a static locale that doesn't require a mutex.
310 bool OIIO_API iless (string_view a, string_view b);
311 
312 /// Does 'a' start with the string 'b', with a case-sensitive comparison?
313 bool OIIO_API starts_with (string_view a, string_view b);
314 
315 /// Does 'a' start with the string 'b', with a case-insensitive comparison?
316 /// For speed, this always uses a static locale that doesn't require a mutex.
317 bool OIIO_API istarts_with (string_view a, string_view b);
318 
319 /// Does 'a' end with the string 'b', with a case-sensitive comparison?
320 bool OIIO_API ends_with (string_view a, string_view b);
321 
322 /// Does 'a' end with the string 'b', with a case-insensitive comparison?
323 /// For speed, this always uses a static locale that doesn't require a mutex.
324 bool OIIO_API iends_with (string_view a, string_view b);
325 
326 /// Does 'a' contain the string 'b' within it?
327 bool OIIO_API contains (string_view a, string_view b);
328 
329 /// Return the position of the first occurrence of `b` within `a`, or
330 /// std::npos if not found.
331 size_t OIIO_API find(string_view a, string_view b);
332 
333 /// Return the position of the first occurrence of `b` within `a`, with a
334 /// case-insensitive comparison, or std::npos if not found.
335 size_t OIIO_API ifind(string_view a, string_view b);
336 
337 /// Return the position of the last occurrence of `b` within `a`, or npos if
338 /// not found.
339 size_t OIIO_API rfind(string_view a, string_view b);
340 
341 /// Return the position of the last occurrence of `b` within `a`, with a
342 /// case-insensitive comparison, or npos if not found.
343 size_t OIIO_API irfind(string_view a, string_view b);
344 
345 /// Does 'a' contain the string 'b' within it, using a case-insensitive
346 /// comparison?
347 bool OIIO_API icontains (string_view a, string_view b);
348 
349 /// Convert to upper case in place, faster than std::toupper because we use
350 /// a static locale that doesn't require a mutex lock.
351 void OIIO_API to_lower (std::string &a);
352 
353 /// Convert to upper case in place, faster than std::toupper because we use
354 /// a static locale that doesn't require a mutex lock.
355 void OIIO_API to_upper (std::string &a);
356 
357 /// Return an all-upper case version of `a` (locale-independent).
lower(string_view a)358 inline std::string lower (string_view a) {
359     std::string result(a);
360     to_lower(result);
361     return result;
362 }
363 
364 /// Return an all-upper case version of `a` (locale-independent).
upper(string_view a)365 inline std::string upper (string_view a) {
366     std::string result(a);
367     to_upper(result);
368     return result;
369 }
370 
371 
372 
373 /// Return a reference to the section of str that has all consecutive
374 /// characters in chars removed from the beginning and ending.  If chars is
375 /// empty, it will be interpreted as " \t\n\r\f\v" (whitespace).
376 string_view OIIO_API strip (string_view str, string_view chars=string_view());
377 
378 /// Return a reference to the section of str that has all consecutive
379 /// characters in chars removed from the beginning (left side).  If chars is
380 /// empty, it will be interpreted as " \t\n\r\f\v" (whitespace).
381 string_view OIIO_API lstrip (string_view str, string_view chars=string_view());
382 
383 /// Return a reference to the section of str that has all consecutive
384 /// characters in chars removed from the ending (right side).  If chars is
385 /// empty, it will be interpreted as " \t\n\r\f\v" (whitespace).
386 string_view OIIO_API rstrip (string_view str, string_view chars=string_view());
387 
388 
389 /// Fills the "result" list with the words in the string, using sep as
390 /// the delimiter string.  If maxsplit is > -1, at most maxsplit splits
391 /// are done. If sep is "", any whitespace string is a separator.  If the
392 /// source `str` is empty, there will be zero pieces.
393 void OIIO_API split (string_view str, std::vector<string_view> &result,
394                      string_view sep = string_view(), int maxsplit = -1);
395 void OIIO_API split (string_view str, std::vector<std::string> &result,
396                      string_view sep = string_view(), int maxsplit = -1);
397 
398 /// Split the contents of `str` using `sep` as the delimiter string. If
399 /// `sep` is "", any whitespace string is a separator. If `maxsplit > -1`,
400 /// at most `maxsplit` split fragments will be produced (for example,
401 /// maxsplit=2 will split at only the first separator, yielding at most two
402 /// fragments). The result is returned as a vector of std::string (for
403 /// `splits()`) or a vector of string_view (for `splitsv()`). If the source
404 /// `str` is empty, there will be zero pieces.
405 OIIO_API std::vector<std::string>
406 splits (string_view str, string_view sep = "", int maxsplit = -1);
407 OIIO_API std::vector<string_view>
408 splitsv (string_view str, string_view sep = "", int maxsplit = -1);
409 
410 /// Join all the strings in 'seq' into one big string, separated by the
411 /// 'sep' string. The Sequence can be any iterable collection of items that
412 /// are able to convert to string via stream output. Examples include:
413 /// std::vector<string_view>, std::vector<std::string>, std::set<ustring>,
414 /// std::vector<int>, etc.
415 template<class Sequence>
416 std::string join (const Sequence& seq, string_view sep="")
417 {
418     std::ostringstream out;
419     out.imbue(std::locale::classic());  // Force "C" locale
420     bool first = true;
421     for (auto&& s : seq) {
422         if (! first && sep.size())
423             out << sep;
424         out << s;
425         first = false;
426     }
427     return out.str();
428 }
429 
430 /// Join all the strings in 'seq' into one big string, separated by the
431 /// 'sep' string. The Sequence can be any iterable collection of items that
432 /// are able to convert to string via stream output. Examples include:
433 /// std::vector<string_view>, std::vector<std::string>, std::set<ustring>,
434 /// std::vector<int>, etc. Values will be rendered into the string in a
435 /// locale-independent manner (i.e., '.' for decimal in floats). If the
436 /// optional `len` is nonzero, exactly that number of elements will be
437 /// output (truncating or default-value-padding the sequence).
438 template<class Sequence>
join(const Sequence & seq,string_view sep,size_t len)439 std::string join (const Sequence& seq, string_view sep /*= ""*/, size_t len)
440 {
441     using E = typename std::remove_reference<decltype(*std::begin(seq))>::type;
442     std::ostringstream out;
443     out.imbue(std::locale::classic());  // Force "C" locale
444     bool first = true;
445     for (auto&& s : seq) {
446         if (! first)
447             out << sep;
448         out << s;
449         first = false;
450         if (len && (--len == 0))
451             break;
452     }
453     while (len--) {
454         if (! first)
455             out << sep;
456         out << E();
457         first = false;
458     }
459     return out.str();
460 }
461 
462 /// Concatenate two strings, returning a std::string, implemented carefully
463 /// to not perform any redundant copies or allocations. This is
464 /// semantically equivalent to `Strutil::sprintf("%s%s", s, t)`, but is
465 /// more efficient.
466 std::string OIIO_API concat(string_view s, string_view t);
467 
468 /// Repeat a string formed by concatenating str n times.
469 std::string OIIO_API repeat (string_view str, int n);
470 
471 /// Replace a pattern inside a string and return the result. If global is
472 /// true, replace all instances of the pattern, otherwise just the first.
473 std::string OIIO_API replace (string_view str, string_view pattern,
474                               string_view replacement, bool global=false);
475 
476 
477 /// strtod/strtof equivalents that are "locale-independent", always using
478 /// '.' as the decimal separator. This should be preferred for I/O and other
479 /// situations where you want the same standard formatting regardless of
480 /// locale.
481 float OIIO_API strtof (const char *nptr, char **endptr = nullptr) noexcept;
482 double OIIO_API strtod (const char *nptr, char **endptr = nullptr) noexcept;
483 
484 
485 // stoi() returns the int conversion of text from a string.
486 // No exceptions or errors -- parsing errors just return 0, over/underflow
487 // gets clamped to int range. No locale consideration.
488 OIIO_API int stoi (string_view s, size_t* pos=0, int base=10);
489 
490 // stoui() returns the unsigned int conversion of text from a string.
491 // No exceptions or errors -- parsing errors just return 0. Negative
492 // values are cast, overflow is clamped. No locale considerations.
493 inline unsigned int stoui (string_view s, size_t* pos=0, int base=10) {
494     return static_cast<unsigned int>(stoi (s, pos, base));
495 }
496 
497 /// stof() returns the float conversion of text from several string types.
498 /// No exceptions or errors -- parsing errors just return 0.0. These always
499 /// use '.' for the decimal mark (versus atof and std::strtof, which are
500 /// locale-dependent).
501 OIIO_API float stof (string_view s, size_t* pos=0);
502 #define OIIO_STRUTIL_HAS_STOF 1  /* be able to test this */
503 
504 // Temporary fix: allow separate std::string and char* versions, to avoid
505 // string_view allocation on some platforms. This will be deprecated once
506 // we can count on all supported compilers using short string optimization.
507 OIIO_API float stof (const std::string& s, size_t* pos=0);
508 OIIO_API float stof (const char* s, size_t* pos=0);
509 // N.B. For users of ustring, there's a stof(ustring) defined in ustring.h.
510 
511 OIIO_API double stod (string_view s, size_t* pos=0);
512 OIIO_API double stod (const std::string& s, size_t* pos=0);
513 OIIO_API double stod (const char* s, size_t* pos=0);
514 
515 
516 
517 /// Return true if the string is exactly (other than leading and trailing
518 /// whitespace) a valid int.
519 OIIO_API bool string_is_int (string_view s);
520 
521 /// Return true if the string is exactly (other than leading or trailing
522 /// whitespace) a valid float. This operations in a locale-independent
523 /// manner, i.e., it assumes '.' as the decimal mark.
524 OIIO_API bool string_is_float (string_view s);
525 
526 
527 
528 // Helper template to convert from generic type to string. Used when you
529 // want stoX but you're in a template. Rigged to use "C" locale.
530 template<typename T>
from_string(string_view s)531 inline T from_string (string_view s) {
532     return T(s); // Generic: assume there is an explicit converter
533 }
534 // Special case for int
535 template<> inline int from_string<int> (string_view s) {
536     return Strutil::stoi(s);
537 }
538 // Special case for uint
539 template<> inline unsigned int from_string<unsigned int> (string_view s) {
540     return Strutil::stoui(s);
541 }
542 // Special case for float -- note that by using Strutil::strtof, this
543 // always treats '.' as the decimal mark.
544 template<> inline float from_string<float> (string_view s) {
545     return Strutil::stof(s);
546 }
547 
548 
549 
550 /// Template function to convert any type to a string. The default
551 /// implementation is just to use sprintf or fmt::to_string. The template
552 /// can be overloaded if there is a better method for particular types.
553 template<typename T>
to_string(const T & value)554 inline std::string to_string (const T& value) {
555     return ::fmt::to_string(value);
556 }
557 
558 // Some special pass-through cases
to_string(const std::string & value)559 inline std::string to_string (const std::string& value) { return value; }
to_string(string_view value)560 inline std::string to_string (string_view value) { return value; }
to_string(const char * value)561 inline std::string to_string (const char* value) { return value; }
562 
563 
564 
565 // Helper template to test if a string is a generic type. Used instead of
566 // string_is_X, but when you're inside templated code.
567 template<typename T>
string_is(string_view)568 inline bool string_is (string_view /*s*/) {
569     return false; // Generic: assume there is an explicit specialization
570 }
571 // Special case for int
572 template <> inline bool string_is<int> (string_view s) {
573     return string_is_int (s);
574 }
575 // Special case for float. Note that by using Strutil::stof, this always
576 // treats '.' as the decimal character.
577 template <> inline bool string_is<float> (string_view s) {
578     return string_is_float (s);
579 }
580 
581 
582 
583 
584 /// Given a string containing values separated by a comma (or optionally
585 /// another separator), extract the individual values, placing them into
586 /// vals[] which is presumed to already contain defaults.  If only a single
587 /// value was in the list, replace all elements of vals[] with the value.
588 /// Otherwise, replace them in the same order.  A missing value will simply
589 /// not be replaced. Return the number of values found in the list
590 /// (including blank or malformed ones). If the vals vector was empty
591 /// initially, grow it as necessary.
592 ///
593 /// For example, if T=float, suppose initially, vals[] = {0, 1, 2}, then
594 ///   "3.14"       results in vals[] = {3.14, 3.14, 3.14}
595 ///   "3.14,,-2.0" results in vals[] = {3.14, 1, -2.0}
596 ///
597 /// This can work for type T = int, float, or any type for that has
598 /// an explicit constructor from a std::string.
599 template<class T, class Allocator>
600 int extract_from_list_string (std::vector<T, Allocator> &vals,
601                               string_view list,
602                               string_view sep = ",")
603 {
604     size_t nvals = vals.size();
605     std::vector<string_view> valuestrings;
606     Strutil::split (list, valuestrings, sep);
607     for (size_t i = 0, e = valuestrings.size(); i < e; ++i) {
608         T v = from_string<T> (valuestrings[i]);
609         if (nvals == 0)
610             vals.push_back (v);
611         else if (valuestrings[i].size()) {
612             if (vals.size() > i)  // don't replace non-existant entries
613                 vals[i] = from_string<T> (valuestrings[i]);
614         }
615         /* Otherwise, empty space between commas, so leave default alone */
616     }
617     if (valuestrings.size() == 1 && nvals > 0) {
618         vals.resize (1);
619         vals.resize (nvals, vals[0]);
620     }
621     return list.size() ? (int) valuestrings.size() : 0;
622 }
623 
624 
625 /// Given a string containing values separated by a comma (or optionally
626 /// another separator), extract the individual values, returning them as a
627 /// std::vector<T>. The vector will be initialized with `nvals` elements
628 /// with default value `val`. If only a single value was in the list,
629 /// replace all elements of vals[] with the value. Otherwise, replace them
630 /// in the same order.  A missing value will simply not be replaced and
631 /// will retain the initialized default value. If the string contains more
632 /// then `nvals` values, they will append to grow the vector.
633 ///
634 /// For example, if T=float,
635 ///   extract_from_list_string ("", 3, 42.0f)
636 ///       --> {42.0, 42.0, 42.0}
637 ///   extract_from_list_string ("3.14", 3, 42.0f)
638 ///       --> {3.14, 3.14, 3.14}
639 ///   extract_from_list_string ("3.14,,-2.0", 3, 42.0f)
640 ///       --> {3.14, 42.0, -2.0}
641 ///   extract_from_list_string ("1,2,3,4", 3, 42.0f)
642 ///       --> {1.0, 2.0, 3.0, 4.0}
643 ///
644 /// This can work for type T = int, float, or any type for that has
645 /// an explicit constructor from a std::string.
646 template<class T>
647 std::vector<T>
648 extract_from_list_string (string_view list, size_t nvals=0, T val=T(),
649                           string_view sep = ",")
650 {
651     std::vector<T> vals (nvals, val);
652     extract_from_list_string (vals, list, sep);
653     return vals;
654 }
655 
656 
657 
658 
659 /// C++ functor wrapper class for using strhash for unordered_map or
660 /// unordered_set.  The way this is used, in conjunction with
661 /// StringEqual, to build an efficient hash map for char*'s or
662 /// std::string's is as follows:
663 /// \code
664 ///    unordered_map <const char *, Key, Strutil::StringHash, Strutil::StringEqual>
665 /// \endcode
666 class StringHash {
667 public:
operator()668     size_t operator() (string_view s) const {
669         return (size_t)Strutil::strhash(s);
670     }
671 };
672 
673 
674 
675 /// C++ functor for comparing two strings for equality of their characters.
676 struct OIIO_API StringEqual {
operatorStringEqual677     bool operator() (const char *a, const char *b) const noexcept { return strcmp (a, b) == 0; }
operatorStringEqual678     bool operator() (string_view a, string_view b) const noexcept { return a == b; }
679 };
680 
681 
682 /// C++ functor for comparing two strings for equality of their characters
683 /// in a case-insensitive and locale-insensitive way.
684 struct OIIO_API StringIEqual {
685     bool operator() (const char *a, const char *b) const noexcept;
operatorStringIEqual686     bool operator() (string_view a, string_view b) const noexcept { return iequals (a, b); }
687 };
688 
689 
690 /// C++ functor for comparing the ordering of two strings.
691 struct OIIO_API StringLess {
operatorStringLess692     bool operator() (const char *a, const char *b) const noexcept { return strcmp (a, b) < 0; }
operatorStringLess693     bool operator() (string_view a, string_view b) const noexcept { return a < b; }
694 };
695 
696 
697 /// C++ functor for comparing the ordering of two strings in a
698 /// case-insensitive and locale-insensitive way.
699 struct OIIO_API StringILess {
700     bool operator() (const char *a, const char *b) const noexcept;
operatorStringILess701     bool operator() (string_view a, string_view b) const noexcept { return a < b; }
702 };
703 
704 
705 
706 #ifdef _WIN32
707 /// Conversion functions between UTF-8 and UTF-16 for windows.
708 ///
709 /// For historical reasons, the standard encoding for strings on windows is
710 /// UTF-16, whereas the unix world seems to have settled on UTF-8.  These two
711 /// encodings can be stored in std::string and std::wstring respectively, with
712 /// the caveat that they're both variable-width encodings, so not all the
713 /// standard string methods will make sense (for example std::string::size()
714 /// won't return the number of glyphs in a UTF-8 string, unless it happens to
715 /// be made up of only the 7-bit ASCII subset).
716 ///
717 /// The standard windows API functions usually have two versions, a UTF-16
718 /// version with a 'W' suffix (using wchar_t* strings), and an ANSI version
719 /// with a 'A' suffix (using char* strings) which uses the current windows
720 /// code page to define the encoding.  (To make matters more confusing there is
721 /// also a further "TCHAR" version which is #defined to the UTF-16 or ANSI
722 /// version, depending on whether UNICODE is defined during compilation.
723 /// This is meant to make it possible to support compiling libraries in
724 /// either unicode or ansi mode from the same codebase.)
725 ///
726 /// Using std::string as the string container (as in OIIO) implies that we
727 /// can't use UTF-16.  It also means we need a variable-width encoding to
728 /// represent characters in non-Latin alphabets in an unambiguous way; the
729 /// obvious candidate is UTF-8.  File paths in OIIO are considered to be
730 /// represented in UTF-8, and must be converted to UTF-16 before passing to
731 /// windows API file opening functions.
732 
733 /// On the other hand, the encoding used for the ANSI versions of the windows
734 /// API is the current windows code page.  This is more compatible with the
735 /// default setup of the standard windows command prompt, and may be more
736 /// appropriate for error messages.
737 
738 // Conversion to wide char
739 //
740 std::wstring OIIO_API utf8_to_utf16 (string_view utf8str) noexcept;
741 
742 // Conversion from wide char
743 //
744 std::string OIIO_API utf16_to_utf8(const std::wstring& utf16str) noexcept;
745 #endif
746 
747 
748 /// Copy at most size characters (including terminating 0 character) from
749 /// src into dst[], filling any remaining characters with 0 values. Returns
750 /// dst. Note that this behavior is identical to strncpy, except that it
751 /// guarantees that there will be a termining 0 character.
752 OIIO_API char * safe_strcpy (char *dst, string_view src, size_t size) noexcept;
753 
754 
755 /// Modify str to trim any leading whitespace (space, tab, linefeed, cr)
756 /// from the front.
757 void OIIO_API skip_whitespace (string_view &str) noexcept;
758 
759 /// Modify str to trim any trailing whitespace (space, tab, linefeed, cr)
760 /// from the back.
761 void OIIO_API remove_trailing_whitespace (string_view &str) noexcept;
762 
763 /// Modify str to trim any whitespace (space, tab, linefeed, cr) from both
764 /// the front and back.
trim_whitespace(string_view & str)765 inline void trim_whitespace (string_view &str) noexcept {
766     skip_whitespace(str);
767     remove_trailing_whitespace(str);
768 }
769 
770 /// If str's first character is c (or first non-whitespace char is c, if
771 /// skip_whitespace is true), return true and additionally modify str to
772 /// skip over that first character if eat is also true. Otherwise, if str
773 /// does not begin with character c, return false and don't modify str.
774 bool OIIO_API parse_char (string_view &str, char c,
775                           bool skip_whitespace = true, bool eat=true) noexcept;
776 
777 /// Modify str to trim all characters up to (but not including) the first
778 /// occurrence of c, and return true if c was found or false if the whole
779 /// string was trimmed without ever finding c. But if eat is false, then
780 /// don't modify str, just return true if any c is found, false if no c
781 /// is found.
782 bool OIIO_API parse_until_char (string_view &str, char c, bool eat=true) noexcept;
783 
784 /// If str's first non-whitespace characters are the prefix, return true and
785 /// additionally modify str to skip over that prefix if eat is also true.
786 /// Otherwise, if str doesn't start with optional whitespace and the prefix,
787 /// return false and don't modify str.
788 bool OIIO_API parse_prefix (string_view &str, string_view prefix, bool eat=true) noexcept;
789 
790 /// If str's first non-whitespace characters form a valid integer, return
791 /// true, place the integer's value in val, and additionally modify str to
792 /// skip over the parsed integer if eat is also true. Otherwise, if no
793 /// integer is found at the beginning of str, return false and don't modify
794 /// val or str.
795 bool OIIO_API parse_int (string_view &str, int &val, bool eat=true) noexcept;
796 
797 /// If str's first non-whitespace characters form a valid float, return
798 /// true, place the float's value in val, and additionally modify str to
799 /// skip over the parsed float if eat is also true. Otherwise, if no float
800 /// is found at the beginning of str, return false and don't modify val or
801 /// str.
802 bool OIIO_API parse_float (string_view &str, float &val, bool eat=true) noexcept;
803 
804 enum QuoteBehavior { DeleteQuotes, KeepQuotes };
805 /// If str's first non-whitespace characters form a valid string (either a
806 /// single word separated by whitespace or anything inside a double-quoted
807 /// ("") or single-quoted ('') string, return true, place the string's value
808 /// (not including surrounding double quotes) in val, and additionally
809 /// modify str to skip over the parsed string if eat is also true.
810 /// Otherwise, if no string is found at the beginning of str, return false
811 /// and don't modify val or str. If keep_quotes is true, the surrounding
812 /// double quotes (if present) will be kept in val.
813 bool OIIO_API parse_string (string_view &str, string_view &val, bool eat=true,
814                             QuoteBehavior keep_quotes=DeleteQuotes) noexcept;
815 
816 /// Return the first "word" (set of contiguous alphabetical characters) in
817 /// str, and additionally modify str to skip over the parsed word if eat is
818 /// also true. Otherwise, if no word is found at the beginning of str,
819 /// return an empty string_view and don't modify str.
820 string_view OIIO_API parse_word (string_view &str, bool eat=true) noexcept;
821 
822 /// If str's first non-whitespace characters form a valid C-like identifier,
823 /// return the identifier, and additionally modify str to skip over the
824 /// parsed identifier if eat is also true. Otherwise, if no identifier is
825 /// found at the beginning of str, return an empty string_view and don't
826 /// modify str.
827 string_view OIIO_API parse_identifier (string_view &str, bool eat=true) noexcept;
828 
829 /// If str's first non-whitespace characters form a valid C-like identifier,
830 /// return the identifier, and additionally modify str to skip over the
831 /// parsed identifier if eat is also true. Otherwise, if no identifier is
832 /// found at the beginning of str, return an empty string_view and don't
833 /// modify str. The 'allowed' parameter may specify a additional characters
834 /// accepted that would not ordinarily be allowed in C identifiers, for
835 /// example, parse_identifier (blah, "$:") would allow "identifiers"
836 /// containing dollar signs and colons as well as the usual alphanumeric and
837 /// underscore characters.
838 string_view OIIO_API parse_identifier (string_view &str,
839                                        string_view allowed, bool eat = true) noexcept;
840 
841 /// If the C-like identifier at the head of str exactly matches id,
842 /// return true, and also advance str if eat is true. If it is not a match
843 /// for id, return false and do not alter str.
844 bool OIIO_API parse_identifier_if (string_view &str, string_view id,
845                                    bool eat=true) noexcept;
846 
847 /// Return the characters until any character in sep is found, storing it in
848 /// str, and additionally modify str to skip over the parsed section if eat
849 /// is also true. Otherwise, if no word is found at the beginning of str,
850 /// return an empty string_view and don't modify str.
851 string_view OIIO_API parse_until (string_view &str,
852                                   string_view sep=" \t\r\n", bool eat=true) noexcept;
853 
854 /// Return the characters at the head of the string that match any in set,
855 /// and additionally modify str to skip over the parsed section if eat is
856 /// also true. Otherwise, if no `set` characters are found at the beginning
857 /// of str, return an empty string_view and don't modify str.
858 string_view OIIO_API parse_while (string_view &str,
859                                   string_view set, bool eat=true) noexcept;
860 
861 /// Assuming the string str starts with either '(', '[', or '{', return the
862 /// head, up to and including the corresponding closing character (')', ']',
863 /// or '}', respectively), recognizing nesting structures. For example,
864 /// parse_nested("(a(b)c)d") should return "(a(b)c)", NOT "(a(b)". Return an
865 /// empty string if str doesn't start with one of those characters, or
866 /// doesn't contain a correctly matching nested pair. If eat==true, str will
867 /// be modified to trim off the part of the string that is returned as the
868 /// match.
869 string_view OIIO_API parse_nested (string_view &str, bool eat=true) noexcept;
870 
871 
872 /// Look within `str` for the pattern:
873 ///     head nonwhitespace_chars whitespace
874 /// Remove that full pattern from `str` and return the nonwhitespace
875 /// part that followed the head (or return the empty string and leave `str`
876 /// unmodified, if the head was never found).
877 OIIO_API std::string
878 excise_string_after_head (std::string& str, string_view head);
879 
880 
881 /// Converts utf-8 string to vector of unicode codepoints. This function
882 /// will not stop on invalid sequences. It will let through some invalid
883 /// utf-8 sequences like: 0xfdd0-0xfdef, 0x??fffe/0x??ffff. It does not
884 /// support 5-6 bytes long utf-8 sequences. Will skip trailing character if
885 /// there are not enough bytes for decoding a codepoint.
886 ///
887 /// N.B. Following should probably return u32string instead of taking
888 /// vector, but C++11 support is not yet stabilized across compilers.
889 /// We will eventually add that and deprecate this one, after everybody
890 /// is caught up to C++11.
891 void OIIO_API utf8_to_unicode (string_view str, std::vector<uint32_t> &uvec);
892 
893 /// Encode the string in base64.
894 /// https://en.wikipedia.org/wiki/Base64
895 std::string OIIO_API base64_encode (string_view str);
896 
897 }  // namespace Strutil
898 
899 OIIO_NAMESPACE_END
900