1 // ----------------------------------------------------------------------------
2 // parsing.hpp :  implementation of the parsing member functions
3 //                      ( parse, parse_printf_directive)
4 // ----------------------------------------------------------------------------
5 
6 //  Copyright Samuel Krempp 2003. Use, modification, and distribution are
7 //  subject to the Boost Software License, Version 1.0. (See accompanying
8 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 
10 // see http://www.boost.org/libs/format for library home page
11 
12 // ----------------------------------------------------------------------------
13 
14 #ifndef BOOST_FORMAT_PARSING_HPP
15 #define BOOST_FORMAT_PARSING_HPP
16 
17 
18 #include <boost/format/format_class.hpp>
19 #include <boost/format/exceptions.hpp>
20 #include <boost/throw_exception.hpp>
21 #include <boost/assert.hpp>
22 
23 
24 namespace boost {
25 namespace io {
26 namespace detail {
27 
28 #if defined(BOOST_NO_STD_LOCALE)
29     // streams will be used for narrow / widen. but these methods are not const
30     template<class T>
const_or_not(const T & x)31     T& const_or_not(const T& x) {
32         return const_cast<T&> (x);
33     }
34 #else
35     template<class T>
36     const T& const_or_not(const T& x) {
37         return x;
38     }
39 #endif
40 
41     template<class Ch, class Facet> inline
wrap_narrow(const Facet & fac,Ch c,char deflt)42     char wrap_narrow(const Facet& fac, Ch c, char deflt) {
43         return const_or_not(fac).narrow(c, deflt);
44     }
45 
46     template<class Ch, class Facet> inline
wrap_isdigit(const Facet & fac,Ch c)47     bool wrap_isdigit(const Facet& fac, Ch c) {
48 #if ! defined( BOOST_NO_LOCALE_ISDIGIT )
49         return fac.is(std::ctype<Ch>::digit, c);
50 # else
51         (void) fac;     // remove "unused parameter" warning
52         using namespace std;
53         return isdigit(c) != 0;
54 #endif
55     }
56 
57     template<class Iter, class Facet>
58     Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) {
59         using namespace std;
60         for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ;
61         return beg;
62     }
63 
64 
65     // Input : [start, last) iterators range and a
66     //          a Facet to use its widen/narrow member function
67     // Effects : read sequence and convert digits into integral n, of type Res
68     // Returns : n
69     template<class Res, class Iter, class Facet>
70     Iter str2int (const Iter & start, const Iter & last, Res & res,
71                  const Facet& fac)
72     {
73         using namespace std;
74         Iter it;
75         res=0;
76         for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) {
77             char cur_ch = wrap_narrow(fac, *it, 0); // cant fail.
78             res *= 10;
79             res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard
80         }
81         return it;
82     }
83 
84     // skip printf's "asterisk-fields" directives in the format-string buf
85     // Input : char string, with starting index *pos_p
86     //         a Facet merely to use its widen/narrow member function
87     // Effects : advance *pos_p by skipping printf's asterisk fields.
88     // Returns : nothing
89     template<class Iter, class Facet>
90     Iter skip_asterisk(Iter start, Iter last, const Facet& fac)
91     {
92         using namespace std;
93         ++ start;
94         start = wrap_scan_notdigit(fac, start, last);
95         if(start!=last && *start== const_or_not(fac).widen( '$') )
96             ++start;
97         return start;
98     }
99 
100 
101     // auxiliary func called by parse_printf_directive
102     // for centralising error handling
103     // it either throws if user sets the corresponding flag, or does nothing.
maybe_throw_exception(unsigned char exceptions,std::size_t pos,std::size_t size)104     inline void maybe_throw_exception(unsigned char exceptions,
105                                       std::size_t pos, std::size_t size)
106     {
107         if(exceptions & io::bad_format_string_bit)
108             boost::throw_exception(io::bad_format_string(pos, size) );
109     }
110 
111 
112     // Input: the position of a printf-directive in the format-string
113     //    a basic_ios& merely to use its widen/narrow member function
114     //    a bitset'exceptions' telling whether to throw exceptions on errors.
115     // Returns:
116     //  true if parse succeeded (ignore some errors if exceptions disabled)
117     //  false if it failed so bad that the directive should be printed verbatim
118     // Effects:
119     //  start is incremented so that *start is the first char after
120     //     this directive
121     //  *fpar is set with the parameters read in the directive
122     template<class Ch, class Tr, class Alloc, class Iter, class Facet>
parse_printf_directive(Iter & start,const Iter & last,detail::format_item<Ch,Tr,Alloc> * fpar,const Facet & fac,std::size_t offset,unsigned char exceptions)123     bool parse_printf_directive(Iter & start, const Iter& last,
124                                 detail::format_item<Ch, Tr, Alloc> * fpar,
125                                 const Facet& fac,
126                                 std::size_t offset, unsigned char exceptions)
127     {
128         typedef typename basic_format<Ch, Tr, Alloc>::format_item_t format_item_t;
129 
130         fpar->argN_ = format_item_t::argN_no_posit;  // if no positional-directive
131         bool precision_set = false;
132         bool in_brackets=false;
133         Iter start0 = start;
134         std::size_t fstring_size = last-start0+offset;
135 
136         if(start>= last) { // empty directive : this is a trailing %
137                 maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
138                 return false;
139         }
140 
141         if(*start== const_or_not(fac).widen( '|')) {
142             in_brackets=true;
143             if( ++start >= last ) {
144                 maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
145                 return false;
146             }
147         }
148 
149         // the flag '0' would be picked as a digit for argument order, but here it's a flag :
150         if(*start== const_or_not(fac).widen( '0'))
151             goto parse_flags;
152 
153         // handle argument order (%2$d)  or possibly width specification: %2d
154         if(wrap_isdigit(fac, *start)) {
155             int n;
156             start = str2int(start, last, n, fac);
157             if( start >= last ) {
158                 maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
159                 return false;
160             }
161 
162             // %N% case : this is already the end of the directive
163             if( *start ==  const_or_not(fac).widen( '%') ) {
164                 fpar->argN_ = n-1;
165                 ++start;
166                 if( in_brackets)
167                     maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
168                 // but don't return.  maybe "%" was used in lieu of '$', so we go on.
169                 else
170                     return true;
171             }
172 
173             if ( *start== const_or_not(fac).widen( '$') ) {
174                 fpar->argN_ = n-1;
175                 ++start;
176             }
177             else {
178                 // non-positionnal directive
179                 fpar->fmtstate_.width_ = n;
180                 fpar->argN_  = format_item_t::argN_no_posit;
181                 goto parse_precision;
182             }
183         }
184 
185       parse_flags:
186         // handle flags
187         while ( start != last) { // as long as char is one of + - = _ # 0 l h   or ' '
188             // misc switches
189             switch ( wrap_narrow(fac, *start, 0)) {
190             case '\'' : break; // no effect yet. (painful to implement)
191             case 'l':
192             case 'h':  // short/long modifier : for printf-comaptibility (no action needed)
193                 break;
194             case '-':
195                 fpar->fmtstate_.flags_ |= std::ios_base::left;
196                 break;
197             case '=':
198                 fpar->pad_scheme_ |= format_item_t::centered;
199                 break;
200             case '_':
201                 fpar->fmtstate_.flags_ |= std::ios_base::internal;
202                 break;
203             case ' ':
204                 fpar->pad_scheme_ |= format_item_t::spacepad;
205                 break;
206             case '+':
207                 fpar->fmtstate_.flags_ |= std::ios_base::showpos;
208                 break;
209             case '0':
210                 fpar->pad_scheme_ |= format_item_t::zeropad;
211                 // need to know alignment before really setting flags,
212                 // so just add 'zeropad' flag for now, it will be processed later.
213                 break;
214             case '#':
215                 fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase;
216                 break;
217             default:
218                 goto parse_width;
219             }
220             ++start;
221         } // loop on flag.
222 
223         if( start>=last) {
224             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
225             return true;
226         }
227       parse_width:
228         // handle width spec
229         // first skip 'asterisk fields' :  *, or *N$
230         if(*start == const_or_not(fac).widen( '*') )
231             start = skip_asterisk(start, last, fac);
232         if(start!=last && wrap_isdigit(fac, *start))
233             start = str2int(start, last, fpar->fmtstate_.width_, fac);
234 
235       parse_precision:
236         if( start>= last) {
237             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
238             return true;
239         }
240         // handle precision spec
241         if (*start== const_or_not(fac).widen( '.')) {
242             ++start;
243             if(start != last && *start == const_or_not(fac).widen( '*') )
244                 start = skip_asterisk(start, last, fac);
245             if(start != last && wrap_isdigit(fac, *start)) {
246                 start = str2int(start, last, fpar->fmtstate_.precision_, fac);
247                 precision_set = true;
248             }
249             else
250                 fpar->fmtstate_.precision_ =0;
251         }
252 
253         // handle  formatting-type flags :
254         while( start != last && ( *start== const_or_not(fac).widen( 'l')
255                                   || *start== const_or_not(fac).widen( 'L')
256                                   || *start== const_or_not(fac).widen( 'h')) )
257             ++start;
258         if( start>=last) {
259             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
260             return true;
261         }
262 
263         if( in_brackets && *start== const_or_not(fac).widen( '|') ) {
264             ++start;
265             return true;
266         }
267         switch ( wrap_narrow(fac, *start, 0) ) {
268         case 'X':
269             fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
270         case 'p': // pointer => set hex.
271         case 'x':
272             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
273             fpar->fmtstate_.flags_ |= std::ios_base::hex;
274             break;
275 
276         case 'o':
277             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
278             fpar->fmtstate_.flags_ |=  std::ios_base::oct;
279             break;
280 
281         case 'E':
282             fpar->fmtstate_.flags_ |=  std::ios_base::uppercase;
283         case 'e':
284             fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
285             fpar->fmtstate_.flags_ |=  std::ios_base::scientific;
286 
287             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
288             fpar->fmtstate_.flags_ |=  std::ios_base::dec;
289             break;
290 
291         case 'f':
292             fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
293             fpar->fmtstate_.flags_ |=  std::ios_base::fixed;
294         case 'u':
295         case 'd':
296         case 'i':
297             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
298             fpar->fmtstate_.flags_ |=  std::ios_base::dec;
299             break;
300 
301         case 'T':
302             ++start;
303             if( start >= last)
304                 maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
305             else
306                 fpar->fmtstate_.fill_ = *start;
307             fpar->pad_scheme_ |= format_item_t::tabulation;
308             fpar->argN_ = format_item_t::argN_tabulation;
309             break;
310         case 't':
311             fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' ');
312             fpar->pad_scheme_ |= format_item_t::tabulation;
313             fpar->argN_ = format_item_t::argN_tabulation;
314             break;
315 
316         case 'G':
317             fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
318             break;
319         case 'g': // 'g' conversion is default for floats.
320             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
321             fpar->fmtstate_.flags_ |=  std::ios_base::dec;
322 
323             // CLEAR all floatield flags, so stream will CHOOSE
324             fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
325             break;
326 
327         case 'C':
328         case 'c':
329             fpar->truncate_ = 1;
330             break;
331         case 'S':
332         case 's':
333             if(precision_set) // handle truncation manually, with own parameter.
334                 fpar->truncate_ = fpar->fmtstate_.precision_;
335             fpar->fmtstate_.precision_ = 6; // default stream precision.
336             break;
337         case 'n' :
338             fpar->argN_ = format_item_t::argN_ignored;
339             break;
340         default:
341             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
342         }
343         ++start;
344 
345         if( in_brackets ) {
346             if( start != last && *start== const_or_not(fac).widen( '|') ) {
347                 ++start;
348                 return true;
349             }
350             else  maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
351         }
352         return true;
353     }
354     // -end parse_printf_directive()
355 
356     template<class String, class Facet>
upper_bound_from_fstring(const String & buf,const typename String::value_type arg_mark,const Facet & fac,unsigned char exceptions)357     int upper_bound_from_fstring(const String& buf,
358                                  const typename String::value_type arg_mark,
359                                  const Facet& fac,
360                                  unsigned char exceptions)
361     {
362         // quick-parsing of the format-string to count arguments mark (arg_mark, '%')
363         // returns : upper bound on the number of format items in the format strings
364         using namespace boost::io;
365         typename String::size_type i1=0;
366         int num_items=0;
367         while( (i1=buf.find(arg_mark,i1)) != String::npos ) {
368             if( i1+1 >= buf.size() ) {
369                 if(exceptions & bad_format_string_bit)
370                     boost::throw_exception(bad_format_string(i1, buf.size() )); // must not end in ".. %"
371                 else {
372                   ++num_items;
373                   break;
374                 }
375             }
376             if(buf[i1+1] == buf[i1] ) {// escaped "%%"
377                 i1+=2; continue;
378             }
379 
380             ++i1;
381             // in case of %N% directives, dont count it double (wastes allocations..) :
382             i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin();
383             if( i1 < buf.size() && buf[i1] == arg_mark )
384                 ++i1;
385             ++num_items;
386         }
387         return num_items;
388     }
389     template<class String> inline
append_string(String & dst,const String & src,const typename String::size_type beg,const typename String::size_type end)390     void append_string(String& dst, const String& src,
391                        const typename String::size_type beg,
392                        const typename String::size_type end) {
393         dst.append(src.begin()+beg, src.begin()+end);
394     }
395 
396 } // detail namespace
397 } // io namespace
398 
399 
400 
401 // -----------------------------------------------
402 //  format :: parse(..)
403 
404     template<class Ch, class Tr, class Alloc>
405     basic_format<Ch, Tr, Alloc>& basic_format<Ch, Tr, Alloc>::
parse(const string_type & buf)406     parse (const string_type& buf) {
407         // parse the format-string
408         using namespace std;
409 #if !defined(BOOST_NO_STD_LOCALE)
410         const std::ctype<Ch> & fac = BOOST_USE_FACET( std::ctype<Ch>, getloc());
411 #else
412         io::basic_oaltstringstream<Ch, Tr, Alloc> fac;
413         //has widen and narrow even on compilers without locale
414 #endif
415 
416         const Ch arg_mark = io::detail::const_or_not(fac).widen( '%');
417         bool ordered_args=true;
418         int max_argN=-1;
419 
420         // A: find upper_bound on num_items and allocates arrays
421         int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions());
422         make_or_reuse_data(num_items);
423 
424         // B: Now the real parsing of the format string :
425         num_items=0;
426         typename string_type::size_type i0=0, i1=0;
427         typename string_type::const_iterator it;
428         bool special_things=false;
429         int cur_item=0;
430         while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) {
431             string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
432             if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%'
433                 io::detail::append_string(piece, buf, i0, i1+1);
434                 i1+=2; i0=i1;
435                 continue;
436             }
437             BOOST_ASSERT(  static_cast<unsigned int>(cur_item) < items_.size() || cur_item==0);
438 
439             if(i1!=i0) {
440                 io::detail::append_string(piece, buf, i0, i1);
441                 i0=i1;
442             }
443             ++i1;
444             it = buf.begin()+i1;
445             bool parse_ok = io::detail::parse_printf_directive(
446                 it, buf.end(), &items_[cur_item], fac, i1, exceptions());
447             i1 = it - buf.begin();
448             if( ! parse_ok ) // the directive will be printed verbatim
449                 continue;
450             i0=i1;
451             items_[cur_item].compute_states(); // process complex options, like zeropad, into params
452 
453             int argN=items_[cur_item].argN_;
454             if(argN == format_item_t::argN_ignored)
455                 continue;
456             if(argN ==format_item_t::argN_no_posit)
457                 ordered_args=false;
458             else if(argN == format_item_t::argN_tabulation) special_things=true;
459             else if(argN > max_argN) max_argN = argN;
460             ++num_items;
461             ++cur_item;
462         } // loop on %'s
463         BOOST_ASSERT(cur_item == num_items);
464 
465         // store the final piece of string
466         {
467             string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
468             io::detail::append_string(piece, buf, i0, buf.size());
469         }
470 
471         if( !ordered_args) {
472             if(max_argN >= 0 ) {  // dont mix positional with non-positionnal directives
473                 if(exceptions() & io::bad_format_string_bit)
474                     boost::throw_exception(
475                         io::bad_format_string(static_cast<std::size_t>(max_argN), 0));
476                 // else do nothing. => positionnal arguments are processed as non-positionnal
477             }
478             // set things like it would have been with positional directives :
479             int non_ordered_items = 0;
480             for(int i=0; i< num_items; ++i)
481                 if(items_[i].argN_ == format_item_t::argN_no_posit) {
482                     items_[i].argN_ = non_ordered_items;
483                     ++non_ordered_items;
484                 }
485             max_argN = non_ordered_items-1;
486         }
487 
488         // C: set some member data :
489         items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) );
490 
491         if(special_things) style_ |= special_needs;
492         num_args_ = max_argN + 1;
493         if(ordered_args) style_ |=  ordered;
494         else style_ &= ~ordered;
495         return *this;
496     }
497 
498 } // namespace boost
499 
500 
501 #endif //  BOOST_FORMAT_PARSING_HPP
502