1 // ----------------------------------------------------------------------------
2 // parsing.hpp :  implementation of the parsing member functions
3 //                      ( parse, parse_printf_directive)
4 // ----------------------------------------------------------------------------
5 
6 //  Copyright Samuel Krempp 2003. Use, modification, and distribution are
7 //  subject to the Boost Software License, Version 1.0. (See accompanying
8 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 
10 // see http://www.boost.org/libs/format for library home page
11 
12 // ----------------------------------------------------------------------------
13 
14 #ifndef BOOST_FORMAT_PARSING_HPP
15 #define BOOST_FORMAT_PARSING_HPP
16 
17 
18 #include <boost/format/format_class.hpp>
19 #include <boost/throw_exception.hpp>
20 #include <boost/assert.hpp>
21 
22 
23 namespace boost {
24 namespace io {
25 namespace detail {
26 
27 #if defined(BOOST_NO_STD_LOCALE)
28     // streams will be used for narrow / widen. but these methods are not const
29     template<class T>
const_or_not(const T & x)30     T& const_or_not(const T& x) {
31         return const_cast<T&> (x);
32     }
33 #else
34     template<class T>
35     const T& const_or_not(const T& x) {
36         return x;
37     }
38 #endif
39 
40     template<class Ch, class Facet> inline
wrap_narrow(const Facet & fac,Ch c,char deflt)41     char wrap_narrow(const Facet& fac, Ch c, char deflt) {
42         return const_or_not(fac).narrow(c, deflt);
43     }
44 
45     template<class Ch, class Facet> inline
wrap_isdigit(const Facet & fac,Ch c)46     bool wrap_isdigit(const Facet& fac, Ch c) {
47 #if ! defined( BOOST_NO_LOCALE_ISDIGIT )
48         return fac.is(std::ctype<Ch>::digit, c);
49 # else
50         using namespace std;
51         return isdigit(c);
52 #endif
53     }
54 
55     template<class Iter, class Facet>
56     Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) {
57         using namespace std;
58         for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ;
59         return beg;
60     }
61 
62 
63     // Input : [start, last) iterators range and a
64     //          a Facet to use its widen/narrow member function
65     // Effects : read sequence and convert digits into integral n, of type Res
66     // Returns : n
67     template<class Res, class Iter, class Facet>
68     Iter str2int (const Iter & start, const Iter & last, Res & res,
69                  const Facet& fac)
70     {
71         using namespace std;
72         Iter it;
73         res=0;
74         for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) {
75             char cur_ch = wrap_narrow(fac, *it, 0); // cant fail.
76             res *= 10;
77             res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard
78         }
79         return it;
80     }
81 
82     // skip printf's "asterisk-fields" directives in the format-string buf
83     // Input : char string, with starting index *pos_p
84     //         a Facet merely to use its widen/narrow member function
85     // Effects : advance *pos_p by skipping printf's asterisk fields.
86     // Returns : nothing
87     template<class Iter, class Facet>
88     Iter skip_asterisk(Iter start, Iter last, const Facet& fac)
89     {
90         using namespace std;
91         ++ start;
92         start = wrap_scan_notdigit(fac, start, last);
93         if(start!=last && *start== const_or_not(fac).widen( '$') )
94             ++start;
95         return start;
96     }
97 
98 
99     // auxiliary func called by parse_printf_directive
100     // for centralising error handling
101     // it either throws if user sets the corresponding flag, or does nothing.
maybe_throw_exception(unsigned char exceptions,std::size_t pos,std::size_t size)102     inline void maybe_throw_exception(unsigned char exceptions,
103                                       std::size_t pos, std::size_t size)
104     {
105         if(exceptions & io::bad_format_string_bit)
106             boost::throw_exception(io::bad_format_string(pos, size) );
107     }
108 
109 
110     // Input: the position of a printf-directive in the format-string
111     //    a basic_ios& merely to use its widen/narrow member function
112     //    a bitset'exceptions' telling whether to throw exceptions on errors.
113     // Returns:
114     //  true if parse succeeded (ignore some errors if exceptions disabled)
115     //  false if it failed so bad that the directive should be printed verbatim
116     // Effects:
117     //  start is incremented so that *start is the first char after
118     //     this directive
119     //  *fpar is set with the parameters read in the directive
120     template<class Ch, class Tr, class Alloc, class Iter, class Facet>
parse_printf_directive(Iter & start,const Iter & last,detail::format_item<Ch,Tr,Alloc> * fpar,const Facet & fac,std::size_t offset,unsigned char exceptions)121     bool parse_printf_directive(Iter & start, const Iter& last,
122                                 detail::format_item<Ch, Tr, Alloc> * fpar,
123                                 const Facet& fac,
124                                 std::size_t offset, unsigned char exceptions)
125     {
126         typedef typename basic_format<Ch, Tr, Alloc>::format_item_t format_item_t;
127 
128         fpar->argN_ = format_item_t::argN_no_posit;  // if no positional-directive
129         bool precision_set = false;
130         bool in_brackets=false;
131         Iter start0 = start;
132         std::size_t fstring_size = last-start0+offset;
133         if(*start== const_or_not(fac).widen( '|')) {
134             in_brackets=true;
135             if( ++start >= last ) {
136                 maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
137                 return false;
138             }
139         }
140 
141         // the flag '0' would be picked as a digit for argument order, but here it's a flag :
142         if(*start== const_or_not(fac).widen( '0'))
143             goto parse_flags;
144 
145         // handle argument order (%2$d)  or possibly width specification: %2d
146         if(wrap_isdigit(fac, *start)) {
147             int n;
148             start = str2int(start, last, n, fac);
149             if( start >= last ) {
150                 maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
151                 return false;
152             }
153 
154             // %N% case : this is already the end of the directive
155             if( *start ==  const_or_not(fac).widen( '%') ) {
156                 fpar->argN_ = n-1;
157                 ++start;
158                 if( in_brackets)
159                     maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
160                 // but don't return.  maybe "%" was used in lieu of '$', so we go on.
161                 else
162                     return true;
163             }
164 
165             if ( *start== const_or_not(fac).widen( '$') ) {
166                 fpar->argN_ = n-1;
167                 ++start;
168             }
169             else {
170                 // non-positionnal directive
171                 fpar->fmtstate_.width_ = n;
172                 fpar->argN_  = format_item_t::argN_no_posit;
173                 goto parse_precision;
174             }
175         }
176 
177       parse_flags:
178         // handle flags
179         while ( start != last) { // as long as char is one of + - = _ # 0 l h   or ' '
180             // misc switches
181             switch ( wrap_narrow(fac, *start, 0)) {
182             case '\'' : break; // no effect yet. (painful to implement)
183             case 'l':
184             case 'h':  // short/long modifier : for printf-comaptibility (no action needed)
185                 break;
186             case '-':
187                 fpar->fmtstate_.flags_ |= std::ios_base::left;
188                 break;
189             case '=':
190                 fpar->pad_scheme_ |= format_item_t::centered;
191                 break;
192             case '_':
193                 fpar->fmtstate_.flags_ |= std::ios_base::internal;
194                 break;
195             case ' ':
196                 fpar->pad_scheme_ |= format_item_t::spacepad;
197                 break;
198             case '+':
199                 fpar->fmtstate_.flags_ |= std::ios_base::showpos;
200                 break;
201             case '0':
202                 fpar->pad_scheme_ |= format_item_t::zeropad;
203                 // need to know alignment before really setting flags,
204                 // so just add 'zeropad' flag for now, it will be processed later.
205                 break;
206             case '#':
207                 fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase;
208                 break;
209             default:
210                 goto parse_width;
211             }
212             ++start;
213         } // loop on flag.
214 
215         if( start>=last) {
216             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
217             return true;
218         }
219       parse_width:
220         // handle width spec
221         // first skip 'asterisk fields' :  *, or *N$
222         if(*start == const_or_not(fac).widen( '*') )
223             start = skip_asterisk(start, last, fac);
224         if(start!=last && wrap_isdigit(fac, *start))
225             start = str2int(start, last, fpar->fmtstate_.width_, fac);
226 
227       parse_precision:
228         if( start>= last) {
229             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
230             return true;
231         }
232         // handle precision spec
233         if (*start== const_or_not(fac).widen( '.')) {
234             ++start;
235             if(start != last && *start == const_or_not(fac).widen( '*') )
236                 start = skip_asterisk(start, last, fac);
237             if(start != last && wrap_isdigit(fac, *start)) {
238                 start = str2int(start, last, fpar->fmtstate_.precision_, fac);
239                 precision_set = true;
240             }
241             else
242                 fpar->fmtstate_.precision_ =0;
243         }
244 
245         // handle  formatting-type flags :
246         while( start != last && ( *start== const_or_not(fac).widen( 'l')
247                                   || *start== const_or_not(fac).widen( 'L')
248                                   || *start== const_or_not(fac).widen( 'h')) )
249             ++start;
250         if( start>=last) {
251             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
252             return true;
253         }
254 
255         if( in_brackets && *start== const_or_not(fac).widen( '|') ) {
256             ++start;
257             return true;
258         }
259         switch ( wrap_narrow(fac, *start, 0) ) {
260         case 'X':
261             fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
262         case 'p': // pointer => set hex.
263         case 'x':
264             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
265             fpar->fmtstate_.flags_ |= std::ios_base::hex;
266             break;
267 
268         case 'o':
269             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
270             fpar->fmtstate_.flags_ |=  std::ios_base::oct;
271             break;
272 
273         case 'E':
274             fpar->fmtstate_.flags_ |=  std::ios_base::uppercase;
275         case 'e':
276             fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
277             fpar->fmtstate_.flags_ |=  std::ios_base::scientific;
278 
279             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
280             fpar->fmtstate_.flags_ |=  std::ios_base::dec;
281             break;
282 
283         case 'f':
284             fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
285             fpar->fmtstate_.flags_ |=  std::ios_base::fixed;
286         case 'u':
287         case 'd':
288         case 'i':
289             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
290             fpar->fmtstate_.flags_ |=  std::ios_base::dec;
291             break;
292 
293         case 'T':
294             ++start;
295             if( start >= last)
296                 maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
297             else
298                 fpar->fmtstate_.fill_ = *start;
299             fpar->pad_scheme_ |= format_item_t::tabulation;
300             fpar->argN_ = format_item_t::argN_tabulation;
301             break;
302         case 't':
303             fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' ');
304             fpar->pad_scheme_ |= format_item_t::tabulation;
305             fpar->argN_ = format_item_t::argN_tabulation;
306             break;
307 
308         case 'G':
309             fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
310             break;
311         case 'g': // 'g' conversion is default for floats.
312             fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
313             fpar->fmtstate_.flags_ |=  std::ios_base::dec;
314 
315             // CLEAR all floatield flags, so stream will CHOOSE
316             fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield;
317             break;
318 
319         case 'C':
320         case 'c':
321             fpar->truncate_ = 1;
322             break;
323         case 'S':
324         case 's':
325             if(precision_set) // handle truncation manually, with own parameter.
326                 fpar->truncate_ = fpar->fmtstate_.precision_;
327             fpar->fmtstate_.precision_ = 6; // default stream precision.
328             break;
329         case 'n' :
330             fpar->argN_ = format_item_t::argN_ignored;
331             break;
332         default:
333             maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
334         }
335         ++start;
336 
337         if( in_brackets ) {
338             if( start != last && *start== const_or_not(fac).widen( '|') ) {
339                 ++start;
340                 return true;
341             }
342             else  maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
343         }
344         return true;
345     }
346     // -end parse_printf_directive()
347 
348     template<class String, class Facet>
upper_bound_from_fstring(const String & buf,const typename String::value_type arg_mark,const Facet & fac,unsigned char exceptions)349     int upper_bound_from_fstring(const String& buf,
350                                  const typename String::value_type arg_mark,
351                                  const Facet& fac,
352                                  unsigned char exceptions)
353     {
354         // quick-parsing of the format-string to count arguments mark (arg_mark, '%')
355         // returns : upper bound on the number of format items in the format strings
356         using namespace boost::io;
357         typename String::size_type i1=0;
358         int num_items=0;
359         while( (i1=buf.find(arg_mark,i1)) != String::npos ) {
360             if( i1+1 >= buf.size() ) {
361                 if(exceptions & bad_format_string_bit)
362                     boost::throw_exception(bad_format_string(i1, buf.size() )); // must not end in ".. %"
363                 else break; // stop there, ignore last '%'
364             }
365             if(buf[i1+1] == buf[i1] ) {// escaped "%%"
366                 i1+=2; continue;
367             }
368 
369             ++i1;
370             // in case of %N% directives, dont count it double (wastes allocations..) :
371             i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin();
372             if( i1 < buf.size() && buf[i1] == arg_mark )
373                 ++i1;
374             ++num_items;
375         }
376         return num_items;
377     }
378     template<class String> inline
append_string(String & dst,const String & src,const typename String::size_type beg,const typename String::size_type end)379     void append_string(String& dst, const String& src,
380                        const typename String::size_type beg,
381                        const typename String::size_type end) {
382 #if !defined(BOOST_NO_STRING_APPEND)
383         dst.append(src.begin()+beg, src.begin()+end);
384 #else
385         dst += src.substr(beg, end-beg);
386 #endif
387     }
388 
389 } // detail namespace
390 } // io namespace
391 
392 
393 
394 // -----------------------------------------------
395 //  format :: parse(..)
396 
397     template<class Ch, class Tr, class Alloc>
398     basic_format<Ch, Tr, Alloc>& basic_format<Ch, Tr, Alloc>::
parse(const string_type & buf)399     parse (const string_type& buf) {
400         // parse the format-string
401         using namespace std;
402 #if !defined(BOOST_NO_STD_LOCALE)
403         const std::ctype<Ch> & fac = BOOST_USE_FACET( std::ctype<Ch>, getloc());
404 #else
405         io::basic_oaltstringstream<Ch, Tr, Alloc> fac;
406         //has widen and narrow even on compilers without locale
407 #endif
408 
409         const Ch arg_mark = io::detail::const_or_not(fac).widen( '%');
410         bool ordered_args=true;
411         int max_argN=-1;
412 
413         // A: find upper_bound on num_items and allocates arrays
414         int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions());
415         make_or_reuse_data(num_items);
416 
417         // B: Now the real parsing of the format string :
418         num_items=0;
419         typename string_type::size_type i0=0, i1=0;
420         typename string_type::const_iterator it;
421         bool special_things=false;
422         int cur_item=0;
423         while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) {
424             string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
425             if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%'
426                 io::detail::append_string(piece, buf, i0, i1+1);
427                 i1+=2; i0=i1;
428                 continue;
429             }
430             BOOST_ASSERT(  static_cast<unsigned int>(cur_item) < items_.size() || cur_item==0);
431 
432             if(i1!=i0)
433                 io::detail::append_string(piece, buf, i0, i1);
434             ++i1;
435             it = buf.begin()+i1;
436             bool parse_ok = io::detail::parse_printf_directive(
437                 it, buf.end(), &items_[cur_item], fac, i1, exceptions());
438             i1 = it - buf.begin();
439             if( ! parse_ok ) // the directive will be printed verbatim
440                 continue;
441             i0=i1;
442             items_[cur_item].compute_states(); // process complex options, like zeropad, into params
443 
444             int argN=items_[cur_item].argN_;
445             if(argN == format_item_t::argN_ignored)
446                 continue;
447             if(argN ==format_item_t::argN_no_posit)
448                 ordered_args=false;
449             else if(argN == format_item_t::argN_tabulation) special_things=true;
450             else if(argN > max_argN) max_argN = argN;
451             ++num_items;
452             ++cur_item;
453         } // loop on %'s
454         BOOST_ASSERT(cur_item == num_items);
455 
456         // store the final piece of string
457         {
458             string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
459             io::detail::append_string(piece, buf, i0, buf.size());
460         }
461 
462         if( !ordered_args) {
463             if(max_argN >= 0 ) {  // dont mix positional with non-positionnal directives
464                 if(exceptions() & io::bad_format_string_bit)
465                     boost::throw_exception(io::bad_format_string(max_argN, 0));
466                 // else do nothing. => positionnal arguments are processed as non-positionnal
467             }
468             // set things like it would have been with positional directives :
469             int non_ordered_items = 0;
470             for(int i=0; i< num_items; ++i)
471                 if(items_[i].argN_ == format_item_t::argN_no_posit) {
472                     items_[i].argN_ = non_ordered_items;
473                     ++non_ordered_items;
474                 }
475             max_argN = non_ordered_items-1;
476         }
477 
478         // C: set some member data :
479         items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) );
480 
481         if(special_things) style_ |= special_needs;
482         num_args_ = max_argN + 1;
483         if(ordered_args) style_ |=  ordered;
484         else style_ &= ~ordered;
485         return *this;
486     }
487 
488 } // namespace boost
489 
490 
491 #endif //  BOOST_FORMAT_PARSING_HPP
492