1 #pragma once
2 #ifndef CATA_SRC_STRING_FORMATTER_H
3 #define CATA_SRC_STRING_FORMATTER_H
4 
5 #include <cstddef>
6 #include <iosfwd>
7 #include <new>
8 #include <string>
9 #include <type_traits>
10 #include <typeinfo>
11 
12 // TODO: replace with std::optional
13 #include "optional.h"
14 
15 class translation;
16 
17 namespace cata
18 {
19 
20 class string_formatter;
21 
22 // wrapper to allow calling string_formatter::throw_error before the definition of string_formatter
23 [[noreturn]]
24 void throw_error( const string_formatter &, const std::string & );
25 // wrapper to access string_formatter::temp_buffer before the definition of string_formatter
26 const char *string_formatter_set_temp_buffer( const string_formatter &, const std::string & );
27 // Handle currently active exception from string_formatter and return it as string
28 std::string handle_string_format_error();
29 
30 /**
31  * @defgroup string_formatter_convert Convert functions for @ref string_formatter
32  *
33  * The `convert` functions here are used to convert the input value of
34  * @ref string_formatter::parse into the requested type, as defined by the format specifiers.
35  *
36  * @tparam T the input type, as given by the call to `string_format`.
37  * @tparam RT the requested type. The `convert` functions return such a value or they throw
38  * an exception via @ref throw_error.
39  *
40  * Each function has the same parameters:
41  * First parameter defined the requested type. The value of the pointer is ignored, callers
42  * should use a (properly casted) `nullptr`. It is required to "simulate" overloading the
43  * return value. E.g. `long convert(long*, int)` and `short convert(short*, int)` both convert
44  * a input value of type `int`, but the first converts to `long` and the second converts to
45  * `short`. Without the first parameters their signature would be identical.
46  * The second parameter is used to call @ref throw_error / @ref string_formatter_set_temp_buffer.
47  * The third parameter is the input value that is to be converted.
48  * The fourth parameter is a dummy value, it is always ignored, callers should use `0` here.
49  * It is used so the fallback with the variadic arguments is *only* chosen when no other
50  * overload matches.
51  */
52 /**@{*/
53 // Test for arithmetic type, *excluding* bool. printf can not handle bool, so can't we.
54 template<typename T>
55 using is_numeric = typename std::conditional <
56                    std::is_arithmetic<typename std::decay<T>::type>::value &&
57                    !std::is_same<typename std::decay<T>::type, bool>::value, std::true_type, std::false_type >::type;
58 // Test for integer type (not floating point, not bool).
59 template<typename T>
60 using is_integer = typename std::conditional < is_numeric<T>::value &&
61                    !std::is_floating_point<typename std::decay<T>::type>::value, std::true_type,
62                    std::false_type >::type;
63 template<typename T>
64 using is_char = typename
65                 std::conditional<std::is_same<typename std::decay<T>::type, char>::value, std::true_type, std::false_type>::type;
66 // Test for std::string type.
67 template<typename T>
68 using is_string = typename
69                   std::conditional<std::is_same<typename std::decay<T>::type, std::string>::value, std::true_type, std::false_type>::type;
70 // Test for c-string type.
71 template<typename T>
72 using is_cstring = typename std::conditional <
73                    std::is_same<typename std::decay<T>::type, const char *>::value ||
74                    std::is_same<typename std::decay<T>::type, char *>::value, std::true_type, std::false_type >::type;
75 // Test for class translation
76 template<typename T>
77 using is_translation = typename std::conditional <
78                        std::is_same<typename std::decay<T>::type, translation>::value, std::true_type,
79                        std::false_type >::type;
80 
81 template<typename RT, typename T>
82 inline typename std::enable_if < is_integer<RT>::value &&is_integer<T>::value,
convert(RT *,const string_formatter &,T && value,int)83        RT >::type convert( RT *, const string_formatter &, T &&value, int )
84 {
85     return value;
86 }
87 template<typename RT, typename T>
88 inline typename std::enable_if < is_integer<RT>::value
89 &&std::is_enum<typename std::decay<T>::type>::value,
convert(RT *,const string_formatter &,T && value,int)90 RT >::type convert( RT *, const string_formatter &, T &&value, int )
91 {
92     return static_cast<RT>( value );
93 }
94 template<typename RT, typename T>
95 inline typename std::enable_if < std::is_floating_point<RT>::value &&is_numeric<T>::value
convert(RT *,const string_formatter &,T && value,int)96 &&!is_integer<T>::value, RT >::type convert( RT *, const string_formatter &, T &&value, int )
97 {
98     return value;
99 }
100 template<typename RT, typename T>
101 inline typename std::enable_if < std::is_same<RT, void *>::value
convert(RT *,const string_formatter &,T && value,int)102 &&std::is_pointer<typename std::decay<T>::type>::value, void * >::type convert( RT *,
103         const string_formatter &, T &&value, int )
104 {
105     return const_cast<typename std::remove_const<typename std::remove_pointer<typename std::decay<T>::type>::type>::type *>
106            ( value );
107 }
108 template<typename RT, typename T>
109 inline typename std::enable_if < std::is_same<RT, const char *>::value &&is_string<T>::value,
convert(RT *,const string_formatter &,T && value,int)110        const char * >::type convert( RT *, const string_formatter &, T &&value, int )
111 {
112     return value.c_str();
113 }
114 template<typename RT, typename T>
115 inline typename std::enable_if < std::is_same<RT, const char *>::value &&is_cstring<T>::value,
convert(RT *,const string_formatter &,T && value,int)116        const char * >::type convert( RT *, const string_formatter &, T &&value, int )
117 {
118     return value;
119 }
120 template<typename RT, typename T>
121 inline typename std::enable_if < std::is_same<RT, const char *>::value &&is_translation<T>::value,
convert(RT *,const string_formatter & sf,T && value,int)122        const char * >::type convert( RT *, const string_formatter &sf, T &&value, int )
123 {
124     return string_formatter_set_temp_buffer( sf, value.translated() );
125 }
126 template<typename RT, typename T>
127 inline typename std::enable_if < std::is_same<RT, const char *>::value &&is_numeric<T>::value
convert(RT *,const string_formatter & sf,T && value,int)128 &&!is_char<T>::value, const char * >::type convert( RT *, const string_formatter &sf, T &&value,
129         int )
130 {
131     return string_formatter_set_temp_buffer( sf, std::to_string( value ) );
132 }
133 template<typename RT, typename T>
134 inline typename std::enable_if < std::is_same<RT, const char *>::value &&is_numeric<T>::value
convert(RT *,const string_formatter & sf,T && value,int)135 &&is_char<T>::value, const char * >::type convert( RT *, const string_formatter &sf, T &&value,
136         int )
137 {
138     return string_formatter_set_temp_buffer( sf, std::string( 1, value ) );
139 }
140 // Catch all remaining conversions (the '...' makes this the lowest overload priority).
141 // The static_assert is used to restrict the input type to those that can actually be printed,
142 // calling `string_format` with an unknown type will trigger a compile error because no other
143 // `convert` function will match, while this one will give a static_assert error.
144 template<typename RT, typename T>
145 // NOLINTNEXTLINE(cert-dcl50-cpp)
convert(RT *,const string_formatter & sf,T &&,...)146 inline RT convert( RT *, const string_formatter &sf, T &&, ... )
147 {
148     static_assert( std::is_pointer<typename std::decay<T>::type>::value ||
149                    is_numeric<T>::value || is_string<T>::value || is_char<T>::value ||
150                    std::is_enum<typename std::decay<T>::type>::value ||
151                    is_cstring<T>::value || is_translation<T>::value, "Unsupported argument type" );
152     throw_error( sf, "Tried to convert argument of type " +
153                  std::string( typeid( T ).name() ) + " to " +
154                  std::string( typeid( RT ).name() ) + ", which is not possible" );
155 }
156 /**@}*/
157 
158 /**
159  * Type-safe and undefined-behavior free wrapper over `sprintf`.
160  * See @ref string_format for usage.
161  * Basically it extracts the format specifiers and calls `sprintf` for each one separately
162  * and with proper conversion of the input type.
163  * For example `printf("%f", 7)` would yield undefined behavior as "%f" requires a `double`
164  * as argument. This class detects the format specifier and converts the input to `double`
165  * before calling `sprintf`. Similar for `printf("%d", "foo")` (yields UB again), but this
166  * class will just throw an exception.
167  */
168 // Note: argument index is always 0-based *in this code*, but `printf` has 1-based arguments.
169 class string_formatter
170 {
171     private:
172         /// Complete format string, including all format specifiers (the string passed
173         /// to @ref printf).
174         const std::string format;
175         /// Used during parsing to denote the *next* character in @ref format to be
176         /// parsed.
177         size_t current_index_in_format = 0;
178         /// The formatted output string, filled during parsing of @ref format,
179         /// so it's only valid after the parsing has completed.
180         std::string output;
181         /// The *currently parsed format specifiers. This is extracted from @ref format
182         /// during parsing and given to @ref sprintf (along with the actual argument).
183         /// It is filled and reset during parsing for each format specifier in @ref format.
184         std::string current_format;
185         /// The *index* (not number) of the next argument to be formatted via @ref current_format.
186         int current_argument_index = 0;
187         /// Return the next character from @ref format and increment @ref current_index_in_format.
188         /// Returns a null-character when the end of the @ref format has been reached (and does not
189         /// change @ref current_index_in_format).
190         char consume_next_input();
191         /// Returns (like @ref consume_next_input) the next character from @ref format, but
192         /// does *not* change @ref current_index_in_format.
193         char get_current_input() const;
194         /// If the next character to read from @ref format is the given character, consume it
195         /// (like @ref consume_next_input) and return `true`. Otherwise don't do anything at all
196         /// and return `false`.
197         bool consume_next_input_if( char c );
198         /// Return whether @ref get_current_input has a decimal digit ('0'...'9').
199         bool has_digit() const;
200         /// Consume decimal digits, interpret them as integer and return it.
201         /// A starting '0' is allowed. Leaves @ref format at the first non-digit
202         /// character (or the end). Returns 0 if the first character is not a digit.
203         int parse_integer();
204         /// Read and consume format flag characters and append them to @ref current_format.
205         /// Leaves @ref format at the first character that is not a flag (or the end).
206         void read_flags();
207         /// Read and forward to @ref current_format any width specifier from @ref format.
208         /// Returns nothing if the width is not specified or if it is specified as fixed number,
209         /// otherwise returns the index of the printf-argument to be used for the width.
210         cata::optional<int> read_width();
211         /// See @ref read_width. This does the same, but for the precision specifier.
212         cata::optional<int> read_precision();
213         /// Read and return the index of the printf-argument that is to be formatted. Returns
214         /// nothing if @ref format does not refer to a specific index (caller should use
215         /// @ref current_argument_index).
216         cata::optional<int> read_argument_index();
217         // Helper for common logic in @ref read_width and @ref read_precision.
218         cata::optional<int> read_number_or_argument_index();
219         /// Throws an exception containing the given message and the @ref format.
220         [[noreturn]]
221         void throw_error( const std::string &msg ) const;
throw_error(const string_formatter & sf,const std::string & msg)222         friend void throw_error( const string_formatter &sf, const std::string &msg ) {
223             sf.throw_error( msg );
224         }
225         mutable std::string temp_buffer;
226         /// Stores the given text in @ref temp_buffer and returns `c_str()` of it. This is used
227         /// for printing non-strings through "%s". It *only* works because this prints each format
228         /// specifier separately, so the content of @ref temp_buffer is only used once.
string_formatter_set_temp_buffer(const string_formatter & sf,const std::string & text)229         friend const char *string_formatter_set_temp_buffer( const string_formatter &sf,
230                 const std::string &text ) {
231             sf.temp_buffer = text;
232             return sf.temp_buffer.c_str();
233         }
234         /**
235          * Extracts a printf argument from the argument list and converts it to the requested type.
236          * @tparam RT The type that the argument should be converted to.
237          * @tparam current_index The index of the first of the supplied arguments.
238          * @throws If there is no argument with the given index, or if the argument can not be
239          * converted to the requested type (via @ref convert).
240          */
241         /**@{*/
242         template<typename RT, unsigned int current_index>
get_nth_arg_as(const unsigned int requested)243         RT get_nth_arg_as( const unsigned int requested ) const {
244             throw_error( "Requested argument " + std::to_string( requested ) + " but input has only " +
245                          std::to_string( current_index ) );
246         }
247         template<typename RT, unsigned int current_index, typename T, typename ...Args>
get_nth_arg_as(const unsigned int requested,T && head,Args &&...args)248         RT get_nth_arg_as( const unsigned int requested, T &&head, Args &&... args ) const {
249             if( requested > current_index ) {
250                 return get_nth_arg_as < RT, current_index + 1 > ( requested, std::forward<Args>( args )... );
251             } else {
252                 return convert( static_cast<RT *>( nullptr ), *this, std::forward<T>( head ), 0 );
253             }
254         }
255         /**@}*/
256 
257         void add_long_long_length_modifier();
258 
259         template<typename ...Args>
read_conversion(const int format_arg_index,Args &&...args)260         void read_conversion( const int format_arg_index, Args &&... args ) {
261             // Removes the prefix "ll", "l", "h" and "hh", "z", and "t".
262             // We later add "ll" again and that
263             // would interfere with the existing prefix. We convert *all* input to (un)signed
264             // long long int and use the "ll" modifier all the time. This will print the
265             // expected value all the time, even when the original modifier did not match.
266             if( consume_next_input_if( 'l' ) ) {
267                 consume_next_input_if( 'l' );
268             } else if( consume_next_input_if( 'h' ) ) {
269                 consume_next_input_if( 'h' );
270             } else if( consume_next_input_if( 'z' ) ) {
271                 // done with it
272             } else if( consume_next_input_if( 't' ) ) {
273                 // done with it
274             }
275             const char c = consume_next_input();
276             current_format.push_back( c );
277             switch( c ) {
278                 case 'c':
279                     return do_formatting( get_nth_arg_as<int, 0>( format_arg_index, std::forward<Args>( args )... ) );
280                 case 'd':
281                 case 'i':
282                     add_long_long_length_modifier();
283                     return do_formatting( get_nth_arg_as<signed long long int, 0>( format_arg_index,
284                                           std::forward<Args>( args )... ) );
285                 case 'o':
286                 case 'u':
287                 case 'x':
288                 case 'X':
289                     add_long_long_length_modifier();
290                     return do_formatting( get_nth_arg_as<unsigned long long int, 0>( format_arg_index,
291                                           std::forward<Args>( args )... ) );
292                 case 'a':
293                 case 'A':
294                 case 'g':
295                 case 'G':
296                 case 'f':
297                 case 'F':
298                 case 'e':
299                 case 'E':
300                     return do_formatting( get_nth_arg_as<double, 0>( format_arg_index,
301                                           std::forward<Args>( args )... ) );
302                 case 'p':
303                     return do_formatting( get_nth_arg_as<void *, 0>( format_arg_index,
304                                           std::forward<Args>( args )... ) );
305                 case 's':
306                     return do_formatting( get_nth_arg_as<const char *, 0>( format_arg_index,
307                                           std::forward<Args>( args )... ) );
308                 default:
309                     throw_error( "Unsupported format conversion: " + std::string( 1, c ) );
310             }
311         }
312 
313         template<typename T>
do_formatting(T && value)314         void do_formatting( T &&value ) {
315             output.append( raw_string_format( current_format.c_str(), value ) );
316         }
317 
318     public:
319         /// @param format The format string as required by `sprintf`.
string_formatter(std::string format)320         explicit string_formatter( std::string format ) : format( std::move( format ) ) { }
321         /// Does the actual `sprintf`. It uses @ref format and puts the formatted
322         /// string into @ref output.
323         /// Note: use @ref get_output to get the formatted string after a successful
324         /// call to this function.
325         /// @throws Exceptions when the arguments do not match the format specifiers,
326         /// see @ref get_nth_arg_as, or when the format is invalid for whatever reason.
327         /// Note: @ref string_format is a wrapper that handles those exceptions.
328         template<typename ...Args>
parse(Args &&...args)329         void parse( Args &&... args ) {
330             output.reserve( format.size() );
331             output.resize( 0 );
332             current_index_in_format = 0;
333             current_argument_index = 0;
334             while( const char c = consume_next_input() ) {
335                 if( c != '%' ) {
336                     output.push_back( c );
337                     continue;
338                 }
339                 if( consume_next_input_if( '%' ) ) {
340                     output.push_back( '%' );
341                     continue;
342                 }
343                 current_format = "%";
344                 const cata::optional<int> format_arg_index = read_argument_index();
345                 read_flags();
346                 if( const cata::optional<int> width_argument_index = read_width() ) {
347                     const int w = get_nth_arg_as<int, 0>( *width_argument_index, std::forward<Args>( args )... );
348                     current_format += std::to_string( w );
349                 }
350                 if( const cata::optional<int> precision_argument_index = read_precision() ) {
351                     const int p = get_nth_arg_as<int, 0>( *precision_argument_index, std::forward<Args>( args )... );
352                     current_format += std::to_string( p );
353                 }
354                 const int arg = format_arg_index ? *format_arg_index : current_argument_index++;
355                 read_conversion( arg, std::forward<Args>( args )... );
356             }
357         }
get_output()358         std::string get_output() const {
359             return output;
360         }
361 #if defined(__clang__)
362 #define PRINTF_LIKE(a,b) __attribute__((format(printf,a,b)))
363 #elif defined(__GNUC__)
364 #define PRINTF_LIKE(a,b) __attribute__((format(gnu_printf,a,b)))
365 #else
366 #define PRINTF_LIKE(a,b)
367 #endif
368         /**
369          * Wrapper for calling @ref vsprintf - see there for documentation. Try to avoid it as it's
370          * not type safe and may easily lead to undefined behavior - use @ref string_format instead.
371          * @throws std::exception if the format is invalid / does not match the arguments, but that's
372          * not guaranteed - technically it's undefined behavior.
373          */
374         // Implemented in output.cpp
375         static std::string raw_string_format( const char *format, ... ) PRINTF_LIKE( 1, 2 );
376 #undef PRINTF_LIKE
377 };
378 
379 } // namespace cata
380 
381 /**
382  * Simple wrapper over @ref string_formatter::parse. It catches any exceptions and returns
383  * some error string. Otherwise it just returns the formatted string.
384  *
385  * These functions perform string formatting according to the rules of the `printf` function,
386  * see `man 3 printf` or any other documentation.
387  *
388  * In short: the \p format parameter is a string with optional placeholders, which will be
389  * replaced with formatted data from the further arguments. The further arguments must have
390  * a type that matches the type expected by the placeholder.
391  * The placeholders look like this:
392  * - `%s` expects an argument of type `const char*` or `std::string` or numeric (which is
393  *   converted to a string via `to_string`), which is inserted as is.
394  * - `%d` expects an argument of an integer type (int, short, ...), which is formatted as
395  *   decimal number.
396  * - `%f` expects a numeric argument (integer / floating point), which is formatted as
397  *   decimal number.
398  *
399  * There are more placeholders and options to them (see documentation of `printf`).
400  * Note that this wrapper (via @ref string_formatter) automatically converts the arguments
401  * to match the given format specifier (if possible) - see @ref string_formatter_convert.
402  */
403 /**@{*/
404 template<typename ...Args>
string_format(std::string format,Args &&...args)405 inline std::string string_format( std::string format, Args &&...args )
406 {
407     try {
408         cata::string_formatter formatter( std::move( format ) );
409         formatter.parse( std::forward<Args>( args )... );
410         return formatter.get_output();
411     } catch( ... ) {
412         return cata::handle_string_format_error();
413     }
414 }
415 template<typename ...Args>
string_format(const char * const format,Args &&...args)416 inline std::string string_format( const char *const format, Args &&...args )
417 {
418     return string_format( std::string( format ), std::forward<Args>( args )... );
419 }
420 template<typename T, typename ...Args>
421 inline typename std::enable_if<cata::is_translation<T>::value, std::string>::type
string_format(T && format,Args &&...args)422 string_format( T &&format, Args &&...args )
423 {
424     return string_format( format.translated(), std::forward<Args>( args )... );
425 }
426 /**@}*/
427 
428 #endif // CATA_SRC_STRING_FORMATTER_H
429