1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
17 
18 #include "absl/base/attributes.h"
19 #include "absl/strings/internal/str_format/arg.h"
20 #include "absl/strings/internal/str_format/extension.h"
21 
22 // Compile time check support for entry points.
23 
24 #ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
25 #if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__)
26 #define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1
27 #endif  // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__)
28 #endif  // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
29 
30 namespace absl {
31 ABSL_NAMESPACE_BEGIN
32 namespace str_format_internal {
33 
AllOf()34 constexpr bool AllOf() { return true; }
35 
36 template <typename... T>
AllOf(bool b,T...t)37 constexpr bool AllOf(bool b, T... t) {
38   return b && AllOf(t...);
39 }
40 
41 #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
42 
ContainsChar(const char * chars,char c)43 constexpr bool ContainsChar(const char* chars, char c) {
44   return *chars == c || (*chars && ContainsChar(chars + 1, c));
45 }
46 
47 // A constexpr compatible list of Convs.
48 struct ConvList {
49   const FormatConversionCharSet* array;
50   int count;
51 
52   // We do the bound check here to avoid having to do it on the callers.
53   // Returning an empty FormatConversionCharSet has the same effect as
54   // short circuiting because it will never match any conversion.
55   constexpr FormatConversionCharSet operator[](int i) const {
56     return i < count ? array[i] : FormatConversionCharSet{};
57   }
58 
without_frontConvList59   constexpr ConvList without_front() const {
60     return count != 0 ? ConvList{array + 1, count - 1} : *this;
61   }
62 };
63 
64 template <size_t count>
65 struct ConvListT {
66   // Make sure the array has size > 0.
67   FormatConversionCharSet list[count ? count : 1];
68 };
69 
GetChar(string_view str,size_t index)70 constexpr char GetChar(string_view str, size_t index) {
71   return index < str.size() ? str[index] : char{};
72 }
73 
74 constexpr string_view ConsumeFront(string_view str, size_t len = 1) {
75   return len <= str.size() ? string_view(str.data() + len, str.size() - len)
76                            : string_view();
77 }
78 
ConsumeAnyOf(string_view format,const char * chars)79 constexpr string_view ConsumeAnyOf(string_view format, const char* chars) {
80   return ContainsChar(chars, GetChar(format, 0))
81              ? ConsumeAnyOf(ConsumeFront(format), chars)
82              : format;
83 }
84 
IsDigit(char c)85 constexpr bool IsDigit(char c) { return c >= '0' && c <= '9'; }
86 
87 // Helper class for the ParseDigits function.
88 // It encapsulates the two return values we need there.
89 struct Integer {
90   string_view format;
91   int value;
92 
93   // If the next character is a '$', consume it.
94   // Otherwise, make `this` an invalid positional argument.
ConsumePositionalDollarInteger95   constexpr Integer ConsumePositionalDollar() const {
96     return GetChar(format, 0) == '$' ? Integer{ConsumeFront(format), value}
97                                      : Integer{format, 0};
98   }
99 };
100 
101 constexpr Integer ParseDigits(string_view format, int value = 0) {
102   return IsDigit(GetChar(format, 0))
103              ? ParseDigits(ConsumeFront(format),
104                            10 * value + GetChar(format, 0) - '0')
105              : Integer{format, value};
106 }
107 
108 // Parse digits for a positional argument.
109 // The parsing also consumes the '$'.
ParsePositional(string_view format)110 constexpr Integer ParsePositional(string_view format) {
111   return ParseDigits(format).ConsumePositionalDollar();
112 }
113 
114 // Parses a single conversion specifier.
115 // See ConvParser::Run() for post conditions.
116 class ConvParser {
SetFormat(string_view format)117   constexpr ConvParser SetFormat(string_view format) const {
118     return ConvParser(format, args_, error_, arg_position_, is_positional_);
119   }
120 
SetArgs(ConvList args)121   constexpr ConvParser SetArgs(ConvList args) const {
122     return ConvParser(format_, args, error_, arg_position_, is_positional_);
123   }
124 
SetError(bool error)125   constexpr ConvParser SetError(bool error) const {
126     return ConvParser(format_, args_, error_ || error, arg_position_,
127                       is_positional_);
128   }
129 
SetArgPosition(int arg_position)130   constexpr ConvParser SetArgPosition(int arg_position) const {
131     return ConvParser(format_, args_, error_, arg_position, is_positional_);
132   }
133 
134   // Consumes the next arg and verifies that it matches `conv`.
135   // `error_` is set if there is no next arg or if it doesn't match `conv`.
ConsumeNextArg(char conv)136   constexpr ConvParser ConsumeNextArg(char conv) const {
137     return SetArgs(args_.without_front()).SetError(!Contains(args_[0], conv));
138   }
139 
140   // Verify that positional argument `i.value` matches `conv`.
141   // `error_` is set if `i.value` is not a valid argument or if it doesn't
142   // match.
VerifyPositional(Integer i,char conv)143   constexpr ConvParser VerifyPositional(Integer i, char conv) const {
144     return SetFormat(i.format).SetError(!Contains(args_[i.value - 1], conv));
145   }
146 
147   // Parse the position of the arg and store it in `arg_position_`.
ParseArgPosition(Integer arg)148   constexpr ConvParser ParseArgPosition(Integer arg) const {
149     return SetFormat(arg.format).SetArgPosition(arg.value);
150   }
151 
152   // Consume the flags.
ParseFlags()153   constexpr ConvParser ParseFlags() const {
154     return SetFormat(ConsumeAnyOf(format_, "-+ #0"));
155   }
156 
157   // Consume the width.
158   // If it is '*', we verify that it matches `args_`. `error_` is set if it
159   // doesn't match.
ParseWidth()160   constexpr ConvParser ParseWidth() const {
161     return IsDigit(GetChar(format_, 0))
162                ? SetFormat(ParseDigits(format_).format)
163                : GetChar(format_, 0) == '*'
164                      ? is_positional_
165                            ? VerifyPositional(
166                                  ParsePositional(ConsumeFront(format_)), '*')
167                            : SetFormat(ConsumeFront(format_))
168                                  .ConsumeNextArg('*')
169                      : *this;
170   }
171 
172   // Consume the precision.
173   // If it is '*', we verify that it matches `args_`. `error_` is set if it
174   // doesn't match.
ParsePrecision()175   constexpr ConvParser ParsePrecision() const {
176     return GetChar(format_, 0) != '.'
177                ? *this
178                : GetChar(format_, 1) == '*'
179                      ? is_positional_
180                            ? VerifyPositional(
181                                  ParsePositional(ConsumeFront(format_, 2)), '*')
182                            : SetFormat(ConsumeFront(format_, 2))
183                                  .ConsumeNextArg('*')
184                      : SetFormat(ParseDigits(ConsumeFront(format_)).format);
185   }
186 
187   // Consume the length characters.
ParseLength()188   constexpr ConvParser ParseLength() const {
189     return SetFormat(ConsumeAnyOf(format_, "lLhjztq"));
190   }
191 
192   // Consume the conversion character and verify that it matches `args_`.
193   // `error_` is set if it doesn't match.
ParseConversion()194   constexpr ConvParser ParseConversion() const {
195     return is_positional_
196                ? VerifyPositional({ConsumeFront(format_), arg_position_},
197                                   GetChar(format_, 0))
198                : ConsumeNextArg(GetChar(format_, 0))
199                      .SetFormat(ConsumeFront(format_));
200   }
201 
ConvParser(string_view format,ConvList args,bool error,int arg_position,bool is_positional)202   constexpr ConvParser(string_view format, ConvList args, bool error,
203                        int arg_position, bool is_positional)
204       : format_(format),
205         args_(args),
206         error_(error),
207         arg_position_(arg_position),
208         is_positional_(is_positional) {}
209 
210  public:
ConvParser(string_view format,ConvList args,bool is_positional)211   constexpr ConvParser(string_view format, ConvList args, bool is_positional)
212       : format_(format),
213         args_(args),
214         error_(false),
215         arg_position_(0),
216         is_positional_(is_positional) {}
217 
218   // Consume the whole conversion specifier.
219   // `format()` will be set to the character after the conversion character.
220   // `error()` will be set if any of the arguments do not match.
Run()221   constexpr ConvParser Run() const {
222     return (is_positional_ ? ParseArgPosition(ParsePositional(format_)) : *this)
223         .ParseFlags()
224         .ParseWidth()
225         .ParsePrecision()
226         .ParseLength()
227         .ParseConversion();
228   }
229 
format()230   constexpr string_view format() const { return format_; }
args()231   constexpr ConvList args() const { return args_; }
error()232   constexpr bool error() const { return error_; }
is_positional()233   constexpr bool is_positional() const { return is_positional_; }
234 
235  private:
236   string_view format_;
237   // Current list of arguments. If we are not in positional mode we will consume
238   // from the front.
239   ConvList args_;
240   bool error_;
241   // Holds the argument position of the conversion character, if we are in
242   // positional mode. Otherwise, it is unspecified.
243   int arg_position_;
244   // Whether we are in positional mode.
245   // It changes the behavior of '*' and where to find the converted argument.
246   bool is_positional_;
247 };
248 
249 // Parses a whole format expression.
250 // See FormatParser::Run().
251 class FormatParser {
FoundPercent(string_view format)252   static constexpr bool FoundPercent(string_view format) {
253     return format.empty() ||
254            (GetChar(format, 0) == '%' && GetChar(format, 1) != '%');
255   }
256 
257   // We use an inner function to increase the recursion limit.
258   // The inner function consumes up to `limit` characters on every run.
259   // This increases the limit from 512 to ~512*limit.
260   static constexpr string_view ConsumeNonPercentInner(string_view format,
261                                                       int limit = 20) {
262     return FoundPercent(format) || !limit
263                ? format
264                : ConsumeNonPercentInner(
265                      ConsumeFront(format, GetChar(format, 0) == '%' &&
266                                                   GetChar(format, 1) == '%'
267                                               ? 2
268                                               : 1),
269                      limit - 1);
270   }
271 
272   // Consume characters until the next conversion spec %.
273   // It skips %%.
ConsumeNonPercent(string_view format)274   static constexpr string_view ConsumeNonPercent(string_view format) {
275     return FoundPercent(format)
276                ? format
277                : ConsumeNonPercent(ConsumeNonPercentInner(format));
278   }
279 
IsPositional(string_view format)280   static constexpr bool IsPositional(string_view format) {
281     return IsDigit(GetChar(format, 0)) ? IsPositional(ConsumeFront(format))
282                                        : GetChar(format, 0) == '$';
283   }
284 
RunImpl(bool is_positional)285   constexpr bool RunImpl(bool is_positional) const {
286     // In non-positional mode we require all arguments to be consumed.
287     // In positional mode just reaching the end of the format without errors is
288     // enough.
289     return (format_.empty() && (is_positional || args_.count == 0)) ||
290            (!format_.empty() &&
291             ValidateArg(
292                 ConvParser(ConsumeFront(format_), args_, is_positional).Run()));
293   }
294 
ValidateArg(ConvParser conv)295   constexpr bool ValidateArg(ConvParser conv) const {
296     return !conv.error() && FormatParser(conv.format(), conv.args())
297                                 .RunImpl(conv.is_positional());
298   }
299 
300  public:
FormatParser(string_view format,ConvList args)301   constexpr FormatParser(string_view format, ConvList args)
302       : format_(ConsumeNonPercent(format)), args_(args) {}
303 
304   // Runs the parser for `format` and `args`.
305   // It verifies that the format is valid and that all conversion specifiers
306   // match the arguments passed.
307   // In non-positional mode it also verfies that all arguments are consumed.
Run()308   constexpr bool Run() const {
309     return RunImpl(!format_.empty() && IsPositional(ConsumeFront(format_)));
310   }
311 
312  private:
313   string_view format_;
314   // Current list of arguments.
315   // If we are not in positional mode we will consume from the front and will
316   // have to be empty in the end.
317   ConvList args_;
318 };
319 
320 template <FormatConversionCharSet... C>
ValidFormatImpl(string_view format)321 constexpr bool ValidFormatImpl(string_view format) {
322   return FormatParser(format,
323                       {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)})
324       .Run();
325 }
326 
327 #endif  // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
328 
329 }  // namespace str_format_internal
330 ABSL_NAMESPACE_END
331 }  // namespace absl
332 
333 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_
334