1 #include "absl/strings/internal/str_format/parser.h"
2
3 #include <assert.h>
4 #include <string.h>
5 #include <wchar.h>
6 #include <cctype>
7 #include <cstdint>
8
9 #include <algorithm>
10 #include <initializer_list>
11 #include <limits>
12 #include <ostream>
13 #include <string>
14 #include <unordered_set>
15
16 namespace absl {
17 namespace str_format_internal {
18 namespace {
19
CheckFastPathSetting(const UnboundConversion & conv)20 bool CheckFastPathSetting(const UnboundConversion& conv) {
21 bool should_be_basic = !conv.flags.left && //
22 !conv.flags.show_pos && //
23 !conv.flags.sign_col && //
24 !conv.flags.alt && //
25 !conv.flags.zero && //
26 (conv.width.value() == -1) &&
27 (conv.precision.value() == -1);
28 if (should_be_basic != conv.flags.basic) {
29 fprintf(stderr,
30 "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
31 "width=%d precision=%d\n",
32 conv.flags.basic, conv.flags.left, conv.flags.show_pos,
33 conv.flags.sign_col, conv.flags.alt, conv.flags.zero,
34 conv.width.value(), conv.precision.value());
35 }
36 return should_be_basic == conv.flags.basic;
37 }
38
39 // Keep a single table for all the conversion chars and length modifiers.
40 // We invert the length modifiers to make them negative so that we can easily
41 // test for them.
42 // Everything else is `none`, which is a negative constant.
43 using CC = ConversionChar::Id;
44 using LM = LengthMod::Id;
45 static constexpr std::int8_t none = -128;
46 static constexpr std::int8_t kIds[] = {
47 none, none, none, none, none, none, none, none, // 00-07
48 none, none, none, none, none, none, none, none, // 08-0f
49 none, none, none, none, none, none, none, none, // 10-17
50 none, none, none, none, none, none, none, none, // 18-1f
51 none, none, none, none, none, none, none, none, // 20-27
52 none, none, none, none, none, none, none, none, // 28-2f
53 none, none, none, none, none, none, none, none, // 30-37
54 none, none, none, none, none, none, none, none, // 38-3f
55 none, CC::A, none, CC::C, none, CC::E, CC::F, CC::G, // @ABCDEFG
56 none, none, none, none, ~LM::L, none, none, none, // HIJKLMNO
57 none, none, none, CC::S, none, none, none, none, // PQRSTUVW
58 CC::X, none, none, none, none, none, none, none, // XYZ[\]^_
59 none, CC::a, none, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
60 ~LM::h, CC::i, ~LM::j, none, ~LM::l, none, CC::n, CC::o, // hijklmno
61 CC::p, ~LM::q, none, CC::s, ~LM::t, CC::u, none, none, // pqrstuvw
62 CC::x, none, ~LM::z, none, none, none, none, none, // xyz{|}~!
63 none, none, none, none, none, none, none, none, // 80-87
64 none, none, none, none, none, none, none, none, // 88-8f
65 none, none, none, none, none, none, none, none, // 90-97
66 none, none, none, none, none, none, none, none, // 98-9f
67 none, none, none, none, none, none, none, none, // a0-a7
68 none, none, none, none, none, none, none, none, // a8-af
69 none, none, none, none, none, none, none, none, // b0-b7
70 none, none, none, none, none, none, none, none, // b8-bf
71 none, none, none, none, none, none, none, none, // c0-c7
72 none, none, none, none, none, none, none, none, // c8-cf
73 none, none, none, none, none, none, none, none, // d0-d7
74 none, none, none, none, none, none, none, none, // d8-df
75 none, none, none, none, none, none, none, none, // e0-e7
76 none, none, none, none, none, none, none, none, // e8-ef
77 none, none, none, none, none, none, none, none, // f0-f7
78 none, none, none, none, none, none, none, none, // f8-ff
79 };
80
81 template <bool is_positional>
ConsumeConversion(string_view * src,UnboundConversion * conv,int * next_arg)82 bool ConsumeConversion(string_view *src, UnboundConversion *conv,
83 int *next_arg) {
84 const char *pos = src->begin();
85 const char *const end = src->end();
86 char c;
87 // Read the next char into `c` and update `pos`. Reads '\0' if at end.
88 const auto get_char = [&] { c = pos == end ? '\0' : *pos++; };
89
90 const auto parse_digits = [&] {
91 int digits = c - '0';
92 // We do not want to overflow `digits` so we consume at most digits10-1
93 // digits. If there are more digits the parsing will fail later on when the
94 // digit doesn't match the expected characters.
95 int num_digits = std::numeric_limits<int>::digits10 - 2;
96 for (get_char(); num_digits && std::isdigit(c); get_char()) {
97 --num_digits;
98 digits = 10 * digits + c - '0';
99 }
100 return digits;
101 };
102
103 if (is_positional) {
104 get_char();
105 if (c < '1' || c > '9') return false;
106 conv->arg_position = parse_digits();
107 assert(conv->arg_position > 0);
108 if (c != '$') return false;
109 }
110
111 get_char();
112
113 // We should start with the basic flag on.
114 assert(conv->flags.basic);
115
116 // Any non alpha character makes this conversion not basic.
117 // This includes flags (-+ #0), width (1-9, *) or precision (.).
118 // All conversion characters and length modifiers are alpha characters.
119 if (c < 'A') {
120 conv->flags.basic = false;
121
122 for (; c <= '0'; get_char()) {
123 switch (c) {
124 case '-':
125 conv->flags.left = true;
126 continue;
127 case '+':
128 conv->flags.show_pos = true;
129 continue;
130 case ' ':
131 conv->flags.sign_col = true;
132 continue;
133 case '#':
134 conv->flags.alt = true;
135 continue;
136 case '0':
137 conv->flags.zero = true;
138 continue;
139 }
140 break;
141 }
142
143 if (c <= '9') {
144 if (c >= '0') {
145 int maybe_width = parse_digits();
146 if (!is_positional && c == '$') {
147 if (*next_arg != 0) return false;
148 // Positional conversion.
149 *next_arg = -1;
150 conv->flags = Flags();
151 conv->flags.basic = true;
152 return ConsumeConversion<true>(src, conv, next_arg);
153 }
154 conv->width.set_value(maybe_width);
155 } else if (c == '*') {
156 get_char();
157 if (is_positional) {
158 if (c < '1' || c > '9') return false;
159 conv->width.set_from_arg(parse_digits());
160 if (c != '$') return false;
161 get_char();
162 } else {
163 conv->width.set_from_arg(++*next_arg);
164 }
165 }
166 }
167
168 if (c == '.') {
169 get_char();
170 if (std::isdigit(c)) {
171 conv->precision.set_value(parse_digits());
172 } else if (c == '*') {
173 get_char();
174 if (is_positional) {
175 if (c < '1' || c > '9') return false;
176 conv->precision.set_from_arg(parse_digits());
177 if (c != '$') return false;
178 get_char();
179 } else {
180 conv->precision.set_from_arg(++*next_arg);
181 }
182 } else {
183 conv->precision.set_value(0);
184 }
185 }
186 }
187
188 std::int8_t id = kIds[static_cast<unsigned char>(c)];
189
190 if (id < 0) {
191 if (id == none) return false;
192
193 // It is a length modifier.
194 using str_format_internal::LengthMod;
195 LengthMod length_mod = LengthMod::FromId(static_cast<LM>(~id));
196 get_char();
197 if (c == 'h' && length_mod.id() == LengthMod::h) {
198 conv->length_mod = LengthMod::FromId(LengthMod::hh);
199 get_char();
200 } else if (c == 'l' && length_mod.id() == LengthMod::l) {
201 conv->length_mod = LengthMod::FromId(LengthMod::ll);
202 get_char();
203 } else {
204 conv->length_mod = length_mod;
205 }
206 id = kIds[static_cast<unsigned char>(c)];
207 if (id < 0) return false;
208 }
209
210 assert(CheckFastPathSetting(*conv));
211 (void)(&CheckFastPathSetting);
212
213 conv->conv = ConversionChar::FromId(static_cast<CC>(id));
214 if (!is_positional) conv->arg_position = ++*next_arg;
215 *src = string_view(pos, end - pos);
216 return true;
217 }
218
219 } // namespace
220
ConsumeUnboundConversion(string_view * src,UnboundConversion * conv,int * next_arg)221 bool ConsumeUnboundConversion(string_view *src, UnboundConversion *conv,
222 int *next_arg) {
223 if (*next_arg < 0) return ConsumeConversion<true>(src, conv, next_arg);
224 return ConsumeConversion<false>(src, conv, next_arg);
225 }
226
227 struct ParsedFormatBase::ParsedFormatConsumer {
ParsedFormatConsumerabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer228 explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
229 : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
230
Appendabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer231 bool Append(string_view s) {
232 if (s.empty()) return true;
233
234 size_t text_end = AppendText(s);
235
236 if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
237 // Let's extend the existing text run.
238 parsed->items_.back().text_end = text_end;
239 } else {
240 // Let's make a new text run.
241 parsed->items_.push_back({false, text_end, {}});
242 }
243 return true;
244 }
245
ConvertOneabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer246 bool ConvertOne(const UnboundConversion &conv, string_view s) {
247 size_t text_end = AppendText(s);
248 parsed->items_.push_back({true, text_end, conv});
249 return true;
250 }
251
AppendTextabsl::str_format_internal::ParsedFormatBase::ParsedFormatConsumer252 size_t AppendText(string_view s) {
253 memcpy(data_pos, s.data(), s.size());
254 data_pos += s.size();
255 return static_cast<size_t>(data_pos - parsed->data_.get());
256 }
257
258 ParsedFormatBase *parsed;
259 char* data_pos;
260 };
261
ParsedFormatBase(string_view format,bool allow_ignored,std::initializer_list<Conv> convs)262 ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored,
263 std::initializer_list<Conv> convs)
264 : data_(format.empty() ? nullptr : new char[format.size()]) {
265 has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
266 !MatchesConversions(allow_ignored, convs);
267 }
268
MatchesConversions(bool allow_ignored,std::initializer_list<Conv> convs) const269 bool ParsedFormatBase::MatchesConversions(
270 bool allow_ignored, std::initializer_list<Conv> convs) const {
271 std::unordered_set<int> used;
272 auto add_if_valid_conv = [&](int pos, char c) {
273 if (static_cast<size_t>(pos) > convs.size() ||
274 !Contains(convs.begin()[pos - 1], c))
275 return false;
276 used.insert(pos);
277 return true;
278 };
279 for (const ConversionItem &item : items_) {
280 if (!item.is_conversion) continue;
281 auto &conv = item.conv;
282 if (conv.precision.is_from_arg() &&
283 !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
284 return false;
285 if (conv.width.is_from_arg() &&
286 !add_if_valid_conv(conv.width.get_from_arg(), '*'))
287 return false;
288 if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false;
289 }
290 return used.size() == convs.size() || allow_ignored;
291 }
292
293 } // namespace str_format_internal
294 } // namespace absl
295