1 // ----------------------------------------------------------------------------
2 // parsing.hpp :  implementation of the parsing member functions
3 //                      ( parse, parse_printf_directive)
4 // ----------------------------------------------------------------------------
5 
6 //  Copyright Samuel Krempp 2003. Use, modification, and distribution are
7 //  subject to the Boost Software License, Version 1.0. (See accompanying
8 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 
10 // see http://www.boost.org/libs/format for library home page
11 
12 // ----------------------------------------------------------------------------
13 
14 #ifndef BOOST_FORMAT_PARSING_HPP
15 #define BOOST_FORMAT_PARSING_HPP
16 
17 
18 #include <boost/format/format_class.hpp>
19 #include <boost/format/exceptions.hpp>
20 #include <boost/throw_exception.hpp>
21 #include <boost/assert.hpp>
22 #include <boost/config.hpp>
23 #include <boost/core/ignore_unused.hpp>
24 
25 namespace boost {
26 namespace io {
27 namespace detail {
28 
29 #if defined(BOOST_NO_STD_LOCALE)
30     // streams will be used for narrow / widen. but these methods are not const
31     template<class T>
const_or_not(const T & x)32     T& const_or_not(const T& x) {
33         return const_cast<T&> (x);
34     }
35 #else
36     template<class T>
37     const T& const_or_not(const T& x) {
38         return x;
39     }
40 #endif
41 
42     template<class Ch, class Facet> inline
wrap_narrow(const Facet & fac,Ch c,char deflt)43     char wrap_narrow(const Facet& fac, Ch c, char deflt) {
44         return const_or_not(fac).narrow(c, deflt);
45     }
46 
47     template<class Ch, class Facet> inline
wrap_isdigit(const Facet & fac,Ch c)48     bool wrap_isdigit(const Facet& fac, Ch c) {
49 #if ! defined( BOOST_NO_LOCALE_ISDIGIT )
50         return fac.is(std::ctype<Ch>::digit, c);
51 # else
52         ignore_unused(fac);
53         using namespace std;
54         return isdigit(c) != 0;
55 #endif
56     }
57 
58     template<class Iter, class Facet>
59     Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) {
60         using namespace std;
61         for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ;
62         return beg;
63     }
64 
65 
66     // Input : [start, last) iterators range and a
67     //          a Facet to use its widen/narrow member function
68     // Effects : read sequence and convert digits into integral n, of type Res
69     // Returns : n
70     template<class Res, class Iter, class Facet>
71     Iter str2int (const Iter & start, const Iter & last, Res & res,
72                  const Facet& fac)
73     {
74         using namespace std;
75         Iter it;
76         res=0;
77         for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) {
78             char cur_ch = wrap_narrow(fac, *it, 0); // cant fail.
79             res *= 10;
80             res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard
81         }
82         return it;
83     }
84 
85     // auxiliary func called by parse_printf_directive
86     // for centralising error handling
87     // it either throws if user sets the corresponding flag, or does nothing.
maybe_throw_exception(unsigned char exceptions,std::size_t pos,std::size_t size)88     inline void maybe_throw_exception(unsigned char exceptions,
89                                       std::size_t pos, std::size_t size)
90     {
91         if(exceptions & io::bad_format_string_bit)
92             boost::throw_exception(io::bad_format_string(pos, size) );
93     }
94 
95 
96     // Input: the position of a printf-directive in the format-string
97     //    a basic_ios& merely to use its widen/narrow member function
98     //    a bitset'exceptions' telling whether to throw exceptions on errors.
99     // Returns:
100     //  true if parse succeeded (ignore some errors if exceptions disabled)
101     //  false if it failed so bad that the directive should be printed verbatim
102     // Effects:
103     //  start is incremented so that *start is the first char after
104     //     this directive
105     //  *fpar is set with the parameters read in the directive
106     template<class Ch, class Tr, class Alloc, class Iter, class Facet>
parse_printf_directive(Iter & start,const Iter & last,detail::format_item<Ch,Tr,Alloc> * fpar,const Facet & fac,std::size_t offset,unsigned char exceptions)107     bool parse_printf_directive(Iter & start, const Iter& last,
108                                 detail::format_item<Ch, Tr, Alloc> * fpar,
109                                 const Facet& fac,
110                                 std::size_t offset, unsigned char exceptions)
111     {
112         typedef typename basic_format<Ch, Tr, Alloc>::format_item_t format_item_t;
113 
114         fpar->argN_ = format_item_t::argN_no_posit;  // if no positional-directive
115         bool precision_set = false;
116         bool in_brackets=false;
117         Iter start0 = start;
118         std::size_t fstring_size = last-start0+offset;
119         char mssiz = 0;
120 
121         if(start>= last) { // empty directive : this is a trailing %
122                 maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
123                 return false;
124         }
125 
126         if(*start== const_or_not(fac).widen( '|')) {
127             in_brackets=true;
128             if( ++start >= last ) {
129                 maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
130                 return false;
131             }
132         }
133 
134         // the flag '0' would be picked as a digit for argument order, but here it's a flag :
135         if(*start== const_or_not(fac).widen( '0'))
136             goto parse_flags;
137 
138         // handle argument order (%2$d)  or possibly width specification: %2d
139         if(wrap_isdigit(fac, *start)) {
140             int n;
141             start = str2int(start, last, n, fac);
142             if( start >= last ) {
143                 maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
144                 return false;
145             }
146 
147             // %N% case : this is already the end of the directive
148             if( *start ==  const_or_not(fac).widen( '%') ) {
149                 fpar->argN_ = n-1;
150                 ++start;
151                 if( in_brackets)
152                     maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
153                 return true;
154             }
155 
156             if ( *start== const_or_not(fac).widen( '$') ) {
157                 fpar->argN_ = n-1;
158                 ++start;
159             }
160             else {
161                 // non-positional directive
162                 fpar->fmtstate_.width_ = n;
163                 fpar->argN_  = format_item_t::argN_no_posit;
164                 goto parse_precision;
165             }
166         }
167 
168       parse_flags:
169         // handle flags
170         while (start != last) { // as long as char is one of + - = _ # 0 or ' '
171             switch ( wrap_narrow(fac, *start, 0)) {
172                 case '\'':
173                     break; // no effect yet. (painful to implement)
174                 case '-':
175                     fpar->fmtstate_.flags_ |= std::ios_base::left;
176                     break;
177                 case '=':
178                     fpar->pad_scheme_ |= format_item_t::centered;
179                     break;
180                 case '_':
181                     fpar->fmtstate_.flags_ |= std::ios_base::internal;
182                     break;
183                 case ' ':
184                     fpar->pad_scheme_ |= format_item_t::spacepad;
185                     break;
186                 case '+':
187                     fpar->fmtstate_.flags_ |= std::ios_base::showpos;
188                     break;
189                 case '0':
190                     fpar->pad_scheme_ |= format_item_t::zeropad;
191                     // need to know alignment before really setting flags,
192                     // so just add 'zeropad' flag for now, it will be processed later.
193                     break;
194                 case '#':
195                     fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase;
196                     break;
197                 default:
198                     goto parse_width;
199             }
200             ++start;
201         } // loop on flag.
202 
203         if( start>=last) {
204             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
205             return true;
206         }
207 
208       // first skip 'asterisk fields' : * or num (length)
209       parse_width:
210         if(*start == const_or_not(fac).widen( '*') )
211             ++start;
212         else if(start!=last && wrap_isdigit(fac, *start))
213             start = str2int(start, last, fpar->fmtstate_.width_, fac);
214 
215       parse_precision:
216         if( start>= last) {
217             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
218             return true;
219         }
220         // handle precision spec
221         if (*start== const_or_not(fac).widen( '.')) {
222             ++start;
223             if(start != last && *start == const_or_not(fac).widen( '*') )
224                 ++start;
225             else if(start != last && wrap_isdigit(fac, *start)) {
226                 start = str2int(start, last, fpar->fmtstate_.precision_, fac);
227                 precision_set = true;
228             }
229             else
230                 fpar->fmtstate_.precision_ =0;
231         }
232 
233       // argument type modifiers
234         while (start != last) {
235             switch (wrap_narrow(fac, *start, 0)) {
236                 case 'h':
237                 case 'l':
238                 case 'j':
239                 case 'z':
240                 case 'L':
241                     // boost::format ignores argument type modifiers as it relies on
242                     // the type of the argument fed into it by operator %
243                     break;
244 
245                 // Note that the ptrdiff_t argument type 't' from C++11 is not honored
246                 // because it was already in use as the tabulation specifier in boost::format
247                 // case 't':
248 
249                 // Microsoft extensions:
250                 // https://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx
251 
252                 case 'w':
253                     break;
254                 case 'I':
255                     mssiz = 'I';
256                     break;
257                 case '3':
258                     if (mssiz != 'I') {
259                         maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
260                         return true;
261                     }
262                     mssiz = '3';
263                     break;
264                 case '2':
265                     if (mssiz != '3') {
266                         maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
267                         return true;
268                     }
269                     mssiz = 0x00;
270                     break;
271                 case '6':
272                     if (mssiz != 'I') {
273                         maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
274                         return true;
275                     }
276                     mssiz = '6';
277                     break;
278                 case '4':
279                     if (mssiz != '6') {
280                         maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
281                         return true;
282                     }
283                     mssiz = 0x00;
284                     break;
285                 default:
286                     if (mssiz && mssiz == 'I') {
287                         mssiz = 0;
288                     }
289                     goto parse_conversion_specification;
290             }
291             ++start;
292         } // loop on argument type modifiers to pick up 'hh', 'll', and the more complex microsoft ones
293 
294       parse_conversion_specification:
295         if (start >= last || mssiz) {
296             maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
297             return true;
298         }
299 
300         if( in_brackets && *start== const_or_not(fac).widen( '|') ) {
301             ++start;
302             return true;
303         }
304 
305         // The default flags are "dec" and "skipws"
306         // so if changing the base, need to unset basefield first
307 
308         switch (wrap_narrow(fac, *start, 0))
309         {
310             // Boolean
311             case 'b':
312                 fpar->fmtstate_.flags_ |= std::ios_base::boolalpha;
313                 break;
314 
315             // Decimal
316             case 'u':
317             case 'd':
318             case 'i':
319                 // Defaults are sufficient
320                 break;
321 
322             // Hex
323             case 'X':
324                 fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
325                 BOOST_FALLTHROUGH;
326             case 'x':
327             case 'p': // pointer => set hex.
328                 fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
329                 fpar->fmtstate_.flags_ |= std::ios_base::hex;
330                 break;
331 
332             // Octal
333             case 'o':
334                 fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
335                 fpar->fmtstate_.flags_ |= std::ios_base::oct;
336                 break;
337 
338             // Floating
339             case 'A':
340                 fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
341                 BOOST_FALLTHROUGH;
342             case 'a':
343                 fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
344                 fpar->fmtstate_.flags_ |= std::ios_base::fixed;
345                 fpar->fmtstate_.flags_ |= std::ios_base::scientific;
346                 break;
347             case 'E':
348                 fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
349                 BOOST_FALLTHROUGH;
350             case 'e':
351                 fpar->fmtstate_.flags_ |= std::ios_base::scientific;
352                 break;
353             case 'F':
354                 fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
355                 BOOST_FALLTHROUGH;
356             case 'f':
357                 fpar->fmtstate_.flags_ |= std::ios_base::fixed;
358                 break;
359             case 'G':
360                 fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
361                 BOOST_FALLTHROUGH;
362             case 'g':
363                 // default flags are correct here
364                 break;
365 
366             // Tabulation (a boost::format extension)
367             case 'T':
368                 ++start;
369                 if( start >= last) {
370                     maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
371                     return false;
372                 } else {
373                     fpar->fmtstate_.fill_ = *start;
374                 }
375                 fpar->pad_scheme_ |= format_item_t::tabulation;
376                 fpar->argN_ = format_item_t::argN_tabulation;
377                 break;
378             case 't':
379                 fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' ');
380                 fpar->pad_scheme_ |= format_item_t::tabulation;
381                 fpar->argN_ = format_item_t::argN_tabulation;
382                 break;
383 
384             // Character
385             case 'C':
386             case 'c':
387                 fpar->truncate_ = 1;
388                 break;
389 
390             // String
391             case 'S':
392             case 's':
393                 if(precision_set) // handle truncation manually, with own parameter.
394                     fpar->truncate_ = fpar->fmtstate_.precision_;
395                 fpar->fmtstate_.precision_ = 6; // default stream precision.
396                 break;
397 
398             // %n is insecure and ignored by boost::format
399             case 'n' :
400                 fpar->argN_ = format_item_t::argN_ignored;
401                 break;
402 
403             default:
404                 maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
405         }
406         ++start;
407 
408         if( in_brackets ) {
409             if( start != last && *start== const_or_not(fac).widen( '|') ) {
410                 ++start;
411                 return true;
412             }
413             else  maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
414         }
415         return true;
416     }
417     // -end parse_printf_directive()
418 
419     template<class String, class Facet>
upper_bound_from_fstring(const String & buf,const typename String::value_type arg_mark,const Facet & fac,unsigned char exceptions)420     int upper_bound_from_fstring(const String& buf,
421                                  const typename String::value_type arg_mark,
422                                  const Facet& fac,
423                                  unsigned char exceptions)
424     {
425         // quick-parsing of the format-string to count arguments mark (arg_mark, '%')
426         // returns : upper bound on the number of format items in the format strings
427         using namespace boost::io;
428         typename String::size_type i1=0;
429         int num_items=0;
430         while( (i1=buf.find(arg_mark,i1)) != String::npos ) {
431             if( i1+1 >= buf.size() ) {
432                 if(exceptions & bad_format_string_bit)
433                     boost::throw_exception(bad_format_string(i1, buf.size() )); // must not end in ".. %"
434                 else {
435                   ++num_items;
436                   break;
437                 }
438             }
439             if(buf[i1+1] == buf[i1] ) {// escaped "%%"
440                 i1+=2; continue;
441             }
442 
443             ++i1;
444             // in case of %N% directives, dont count it double (wastes allocations..) :
445             i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin();
446             if( i1 < buf.size() && buf[i1] == arg_mark )
447                 ++i1;
448             ++num_items;
449         }
450         return num_items;
451     }
452     template<class String> inline
append_string(String & dst,const String & src,const typename String::size_type beg,const typename String::size_type end)453     void append_string(String& dst, const String& src,
454                        const typename String::size_type beg,
455                        const typename String::size_type end) {
456         dst.append(src.begin()+beg, src.begin()+end);
457     }
458 
459 } // detail namespace
460 } // io namespace
461 
462 
463 
464 // -----------------------------------------------
465 //  format :: parse(..)
466 
467     template<class Ch, class Tr, class Alloc>
468     basic_format<Ch, Tr, Alloc>& basic_format<Ch, Tr, Alloc>::
parse(const string_type & buf)469     parse (const string_type& buf) {
470         // parse the format-string
471         using namespace std;
472 #if !defined(BOOST_NO_STD_LOCALE)
473         const std::ctype<Ch> & fac = BOOST_USE_FACET( std::ctype<Ch>, getloc());
474 #else
475         io::basic_oaltstringstream<Ch, Tr, Alloc> fac;
476         //has widen and narrow even on compilers without locale
477 #endif
478 
479         const Ch arg_mark = io::detail::const_or_not(fac).widen( '%');
480         bool ordered_args=true;
481         int max_argN=-1;
482 
483         // A: find upper_bound on num_items and allocates arrays
484         int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions());
485         make_or_reuse_data(num_items);
486 
487         // B: Now the real parsing of the format string :
488         num_items=0;
489         typename string_type::size_type i0=0, i1=0;
490         typename string_type::const_iterator it;
491         bool special_things=false;
492         int cur_item=0;
493         while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) {
494             string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
495             if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%'
496                 io::detail::append_string(piece, buf, i0, i1+1);
497                 i1+=2; i0=i1;
498                 continue;
499             }
500             BOOST_ASSERT(  static_cast<unsigned int>(cur_item) < items_.size() || cur_item==0);
501 
502             if(i1!=i0) {
503                 io::detail::append_string(piece, buf, i0, i1);
504                 i0=i1;
505             }
506             ++i1;
507             it = buf.begin()+i1;
508             bool parse_ok = io::detail::parse_printf_directive(
509                 it, buf.end(), &items_[cur_item], fac, i1, exceptions());
510             i1 = it - buf.begin();
511             if( ! parse_ok ) // the directive will be printed verbatim
512                 continue;
513             i0=i1;
514             items_[cur_item].compute_states(); // process complex options, like zeropad, into params
515 
516             int argN=items_[cur_item].argN_;
517             if(argN == format_item_t::argN_ignored)
518                 continue;
519             if(argN ==format_item_t::argN_no_posit)
520                 ordered_args=false;
521             else if(argN == format_item_t::argN_tabulation) special_things=true;
522             else if(argN > max_argN) max_argN = argN;
523             ++num_items;
524             ++cur_item;
525         } // loop on %'s
526         BOOST_ASSERT(cur_item == num_items);
527 
528         // store the final piece of string
529         {
530             string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
531             io::detail::append_string(piece, buf, i0, buf.size());
532         }
533 
534         if( !ordered_args) {
535             if(max_argN >= 0 ) {  // dont mix positional with non-positionnal directives
536                 if(exceptions() & io::bad_format_string_bit)
537                     boost::throw_exception(
538                         io::bad_format_string(static_cast<std::size_t>(max_argN), 0));
539                 // else do nothing. => positionnal arguments are processed as non-positionnal
540             }
541             // set things like it would have been with positional directives :
542             int non_ordered_items = 0;
543             for(int i=0; i< num_items; ++i)
544                 if(items_[i].argN_ == format_item_t::argN_no_posit) {
545                     items_[i].argN_ = non_ordered_items;
546                     ++non_ordered_items;
547                 }
548             max_argN = non_ordered_items-1;
549         }
550 
551         // C: set some member data :
552         items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) );
553 
554         if(special_things) style_ |= special_needs;
555         num_args_ = max_argN + 1;
556         if(ordered_args) style_ |=  ordered;
557         else style_ &= ~ordered;
558         return *this;
559     }
560 
561 } // namespace boost
562 
563 
564 #endif //  BOOST_FORMAT_PARSING_HPP
565