1 /*
2  *
3  * Copyright (c) 1998-2002
4  * John Maddock
5  *
6  * Use, modification and distribution are subject to the
7  * Boost Software License, Version 1.0. (See accompanying file
8  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9  *
10  */
11 
12  /*
13   *   LOCATION:    see http://www.boost.org for most recent version.
14   *   FILE         regex_format.hpp
15   *   VERSION      see <boost/version.hpp>
16   *   DESCRIPTION: Provides formatting output routines for search and replace
17   *                operations.  Note this is an internal header file included
18   *                by regex.hpp, do not include on its own.
19   */
20 
21 #ifndef BOOST_REGEX_FORMAT_HPP
22 #define BOOST_REGEX_FORMAT_HPP
23 
24 
25 namespace boost{
26 
27 #ifdef BOOST_HAS_ABI_HEADERS
28 #  include BOOST_ABI_PREFIX
29 #endif
30 
31 //
32 // Forward declaration:
33 //
34    template <class BidiIterator, class Allocator = BOOST_DEDUCED_TYPENAME std::vector<sub_match<BidiIterator> >::allocator_type >
35 class match_results;
36 
37 namespace re_detail{
38 
39 //
40 // struct trivial_format_traits:
41 // defines minimum localisation support for formatting
42 // in the case that the actual regex traits is unavailable.
43 //
44 template <class charT>
45 struct trivial_format_traits
46 {
47    typedef charT char_type;
48 
lengthboost::re_detail::trivial_format_traits49    static std::ptrdiff_t length(const charT* p)
50    {
51       return global_length(p);
52    }
tolowerboost::re_detail::trivial_format_traits53    static charT tolower(charT c)
54    {
55       return ::boost::re_detail::global_lower(c);
56    }
toupperboost::re_detail::trivial_format_traits57    static charT toupper(charT c)
58    {
59       return ::boost::re_detail::global_upper(c);
60    }
valueboost::re_detail::trivial_format_traits61    static int value(const charT c, int radix)
62    {
63       int result = global_value(c);
64       return result >= radix ? -1 : result;
65    }
toiboost::re_detail::trivial_format_traits66    int toi(const charT*& p1, const charT* p2, int radix)const
67    {
68       return global_toi(p1, p2, radix, *this);
69    }
70 };
71 
72 template <class OutputIterator, class Results, class traits>
73 class basic_regex_formatter
74 {
75 public:
76    typedef typename traits::char_type char_type;
basic_regex_formatter(OutputIterator o,const Results & r,const traits & t)77    basic_regex_formatter(OutputIterator o, const Results& r, const traits& t)
78       : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_have_conditional(false) {}
79    OutputIterator format(const char_type* p1, const char_type* p2, match_flag_type f);
format(const char_type * p1,match_flag_type f)80    OutputIterator format(const char_type* p1, match_flag_type f)
81    {
82       return format(p1, p1 + m_traits.length(p1), f);
83    }
84 private:
85    typedef typename Results::value_type sub_match_type;
86    enum output_state
87    {
88       output_copy,
89       output_next_lower,
90       output_next_upper,
91       output_lower,
92       output_upper,
93       output_none
94    };
95 
96    void put(char_type c);
97    void put(const sub_match_type& sub);
98    void format_all();
99    void format_perl();
100    void format_escape();
101    void format_conditional();
102    void format_until_scope_end();
103 
104    const traits& m_traits;       // the traits class for localised formatting operations
105    const Results& m_results;     // the match_results being used.
106    OutputIterator m_out;         // where to send output.
107    const char_type* m_position;  // format string, current position
108    const char_type* m_end;       // format string end
109    match_flag_type m_flags;      // format flags to use
110    output_state    m_state;      // what to do with the next character
111    bool            m_have_conditional; // we are parsing a conditional
112 private:
113    basic_regex_formatter(const basic_regex_formatter&);
114    basic_regex_formatter& operator=(const basic_regex_formatter&);
115 };
116 
117 template <class OutputIterator, class Results, class traits>
118 OutputIterator basic_regex_formatter<OutputIterator, Results, traits>::format(const char_type* p1, const char_type* p2, match_flag_type f)
119 {
120    m_position = p1;
121    m_end = p2;
122    m_flags = f;
123    format_all();
124    return m_out;
125 }
126 
127 template <class OutputIterator, class Results, class traits>
format_all()128 void basic_regex_formatter<OutputIterator, Results, traits>::format_all()
129 {
130    // over and over:
131    while(m_position != m_end)
132    {
133       switch(*m_position)
134       {
135       case '&':
136          if(m_flags & ::boost::regex_constants::format_sed)
137          {
138             ++m_position;
139             put(m_results[0]);
140             break;
141          }
142          put(*m_position++);
143          break;
144       case '\\':
145          format_escape();
146          break;
147       case '(':
148          if(m_flags & boost::regex_constants::format_all)
149          {
150             ++m_position;
151             bool have_conditional = m_have_conditional;
152             m_have_conditional = false;
153             format_until_scope_end();
154             m_have_conditional = have_conditional;
155             if(m_position == m_end)
156                return;
157             BOOST_ASSERT(*m_position == static_cast<char_type>(')'));
158             ++m_position;  // skip the closing ')'
159             break;
160          }
161          put(*m_position);
162          ++m_position;
163          break;
164       case ')':
165          if(m_flags & boost::regex_constants::format_all)
166          {
167             return;
168          }
169          put(*m_position);
170          ++m_position;
171          break;
172       case ':':
173          if((m_flags & boost::regex_constants::format_all) && m_have_conditional)
174          {
175             return;
176          }
177          put(*m_position);
178          ++m_position;
179          break;
180       case '?':
181          if(m_flags & boost::regex_constants::format_all)
182          {
183             ++m_position;
184             format_conditional();
185             break;
186          }
187          put(*m_position);
188          ++m_position;
189          break;
190       case '$':
191          if((m_flags & format_sed) == 0)
192          {
193             format_perl();
194             break;
195          }
196          // fall through, not a special character:
197       default:
198          put(*m_position);
199          ++m_position;
200          break;
201       }
202    }
203 }
204 
205 template <class OutputIterator, class Results, class traits>
format_perl()206 void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
207 {
208    //
209    // On entry *m_position points to a '$' character
210    // output the information that goes with it:
211    //
212    BOOST_ASSERT(*m_position == '$');
213    //
214    // see if this is a trailing '$':
215    //
216    if(++m_position == m_end)
217    {
218       --m_position;
219       put(*m_position);
220       ++m_position;
221       return;
222    }
223    //
224    // OK find out what kind it is:
225    //
226    switch(*m_position)
227    {
228    case '&':
229       ++m_position;
230       put(this->m_results[0]);
231       break;
232    case '`':
233       ++m_position;
234       put(this->m_results.prefix());
235       break;
236    case '\'':
237       ++m_position;
238       put(this->m_results.suffix());
239       break;
240    case '$':
241       put(*m_position++);
242       break;
243    default:
244       // see if we have a number:
245       {
246          std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), ::boost::re_detail::distance(m_position, m_end));
247          int v = m_traits.toi(m_position, m_position + len, 10);
248          if(v < 0)
249          {
250             // leave the $ as is, and carry on:
251             --m_position;
252             put(*m_position);
253             ++m_position;
254             break;
255          }
256          // otherwise output sub v:
257          put(this->m_results[v]);
258       }
259    }
260 }
261 
262 template <class OutputIterator, class Results, class traits>
format_escape()263 void basic_regex_formatter<OutputIterator, Results, traits>::format_escape()
264 {
265    // skip the escape and check for trailing escape:
266    if(++m_position == m_end)
267    {
268       put(static_cast<char_type>('\\'));
269       return;
270    }
271    // now switch on the escape type:
272    switch(*m_position)
273    {
274    case 'a':
275       put(static_cast<char_type>('\a'));
276       ++m_position;
277       break;
278    case 'f':
279       put(static_cast<char_type>('\f'));
280       ++m_position;
281       break;
282    case 'n':
283       put(static_cast<char_type>('\n'));
284       ++m_position;
285       break;
286    case 'r':
287       put(static_cast<char_type>('\r'));
288       ++m_position;
289       break;
290    case 't':
291       put(static_cast<char_type>('\t'));
292       ++m_position;
293       break;
294    case 'v':
295       put(static_cast<char_type>('\v'));
296       ++m_position;
297       break;
298    case 'x':
299       if(++m_position == m_end)
300       {
301          put(static_cast<char_type>('x'));
302          return;
303       }
304       // maybe have \x{ddd}
305       if(*m_position == static_cast<char_type>('{'))
306       {
307          ++m_position;
308          int val = m_traits.toi(m_position, m_end, 16);
309          if(val < 0)
310          {
311             // invalid value treat everything as literals:
312             put(static_cast<char_type>('x'));
313             put(static_cast<char_type>('{'));
314             return;
315          }
316          if(*m_position != static_cast<char_type>('}'))
317          {
318             while(*m_position != static_cast<char_type>('\\'))
319                --m_position;
320             ++m_position;
321             put(*m_position++);
322             return;
323          }
324          ++m_position;
325          put(static_cast<char_type>(val));
326          return;
327       }
328       else
329       {
330          std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), ::boost::re_detail::distance(m_position, m_end));
331          int val = m_traits.toi(m_position, m_position + len, 16);
332          if(val < 0)
333          {
334             --m_position;
335             put(*m_position++);
336             return;
337          }
338          put(static_cast<char_type>(val));
339       }
340       break;
341    case 'c':
342       if(++m_position == m_end)
343       {
344          --m_position;
345          put(*m_position++);
346          return;
347       }
348       put(static_cast<char_type>(*m_position++ % 32));
349       break;
350    case 'e':
351       put(static_cast<char_type>(27));
352       ++m_position;
353       break;
354    default:
355       // see if we have a perl specific escape:
356       if((m_flags & boost::regex_constants::format_sed) == 0)
357       {
358          bool breakout = false;
359          switch(*m_position)
360          {
361          case 'l':
362             ++m_position;
363             m_state = output_next_lower;
364             breakout = true;
365             break;
366          case 'L':
367             ++m_position;
368             m_state = output_lower;
369             breakout = true;
370             break;
371          case 'u':
372             ++m_position;
373             m_state = output_next_upper;
374             breakout = true;
375             break;
376          case 'U':
377             ++m_position;
378             m_state = output_upper;
379             breakout = true;
380             break;
381          case 'E':
382             ++m_position;
383             m_state = output_copy;
384             breakout = true;
385             break;
386          }
387          if(breakout)
388             break;
389       }
390       // see if we have a \n sed style backreference:
391       int v = m_traits.toi(m_position, m_position+1, 10);
392       if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed)))
393       {
394          put(m_results[v]);
395          break;
396       }
397       else if(v == 0)
398       {
399          // octal ecape sequence:
400          --m_position;
401          std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(4), ::boost::re_detail::distance(m_position, m_end));
402          v = m_traits.toi(m_position, m_position + len, 8);
403          BOOST_ASSERT(v >= 0);
404          put(static_cast<char_type>(v));
405          break;
406       }
407       // Otherwise output the character "as is":
408       put(*m_position++);
409       break;
410    }
411 }
412 
413 template <class OutputIterator, class Results, class traits>
format_conditional()414 void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional()
415 {
416    if(m_position == m_end)
417    {
418       // oops trailing '?':
419       put(static_cast<char_type>('?'));
420       return;
421    }
422    std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), ::boost::re_detail::distance(m_position, m_end));
423    int v = m_traits.toi(m_position, m_position + len, 10);
424    if(v < 0)
425    {
426       // oops not a number:
427       put(static_cast<char_type>('?'));
428       return;
429    }
430 
431    // output varies depending upon whether sub-expression v matched or not:
432    if(m_results[v].matched)
433    {
434       m_have_conditional = true;
435       format_all();
436       m_have_conditional = false;
437       if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
438       {
439          // skip the ':':
440          ++m_position;
441          // save output state, then turn it off:
442          output_state saved_state = m_state;
443          m_state = output_none;
444          // format the rest of this scope:
445          format_until_scope_end();
446          // restore output state:
447          m_state = saved_state;
448       }
449    }
450    else
451    {
452       // save output state, then turn it off:
453       output_state saved_state = m_state;
454       m_state = output_none;
455       // format until ':' or ')':
456       m_have_conditional = true;
457       format_all();
458       m_have_conditional = false;
459       // restore state:
460       m_state = saved_state;
461       if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
462       {
463          // skip the ':':
464          ++m_position;
465          // format the rest of this scope:
466          format_until_scope_end();
467       }
468    }
469 }
470 
471 template <class OutputIterator, class Results, class traits>
format_until_scope_end()472 void basic_regex_formatter<OutputIterator, Results, traits>::format_until_scope_end()
473 {
474    do
475    {
476       format_all();
477       if((m_position == m_end) || (*m_position == static_cast<char_type>(')')))
478          return;
479       put(*m_position++);
480    }while(m_position != m_end);
481 }
482 
483 template <class OutputIterator, class Results, class traits>
put(char_type c)484 void basic_regex_formatter<OutputIterator, Results, traits>::put(char_type c)
485 {
486    // write a single character to output
487    // according to which case translation mode we are in:
488    switch(this->m_state)
489    {
490    case output_none:
491       return;
492    case output_next_lower:
493       c = m_traits.tolower(c);
494       this->m_state = output_copy;
495       break;
496    case output_next_upper:
497       c = m_traits.toupper(c);
498       this->m_state = output_copy;
499       break;
500    case output_lower:
501       c = m_traits.tolower(c);
502       break;
503    case output_upper:
504       c = m_traits.toupper(c);
505       break;
506    default:
507       break;
508    }
509    *m_out = c;
510    ++m_out;
511 }
512 
513 template <class OutputIterator, class Results, class traits>
put(const sub_match_type & sub)514 void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match_type& sub)
515 {
516    typedef typename sub_match_type::iterator iterator_type;
517    iterator_type i = sub.first;
518    while(i != sub.second)
519    {
520       put(*i);
521       ++i;
522    }
523 }
524 
525 template <class S>
526 class string_out_iterator
527 #ifndef BOOST_NO_STD_ITERATOR
528    : public std::iterator<std::output_iterator_tag, typename S::value_type>
529 #endif
530 {
531    S* out;
532 public:
string_out_iterator(S & s)533    string_out_iterator(S& s) : out(&s) {}
operator ++()534    string_out_iterator& operator++() { return *this; }
operator ++(int)535    string_out_iterator& operator++(int) { return *this; }
operator *()536    string_out_iterator& operator*() { return *this; }
operator =(typename S::value_type v)537    string_out_iterator& operator=(typename S::value_type v)
538    {
539       out->append(1, v);
540       return *this;
541    }
542 
543 #ifdef BOOST_NO_STD_ITERATOR
544    typedef std::ptrdiff_t difference_type;
545    typedef typename S::value_type value_type;
546    typedef value_type* pointer;
547    typedef value_type& reference;
548    typedef std::output_iterator_tag iterator_category;
549 #endif
550 };
551 
552 template <class OutputIterator, class Iterator, class Alloc, class charT, class traits>
553 OutputIterator regex_format_imp(OutputIterator out,
554                           const match_results<Iterator, Alloc>& m,
555                           const charT* p1, const charT* p2,
556                           match_flag_type flags,
557                           const traits& t
558                          )
559 {
560    if(flags & regex_constants::format_literal)
561    {
562       return re_detail::copy(p1, p2, out);
563    }
564 
565    re_detail::basic_regex_formatter<
566       OutputIterator,
567       match_results<Iterator, Alloc>,
568       traits > f(out, m, t);
569    return f.format(p1, p2, flags);
570 }
571 
572 
573 } // namespace re_detail
574 
575 template <class OutputIterator, class Iterator, class charT>
576 OutputIterator regex_format(OutputIterator out,
577                           const match_results<Iterator>& m,
578                           const charT* fmt,
579                           match_flag_type flags = format_all
580                          )
581 {
582    re_detail::trivial_format_traits<charT> traits;
583    return re_detail::regex_format_imp(out, m, fmt, fmt + traits.length(fmt), flags, traits);
584 }
585 
586 template <class OutputIterator, class Iterator, class charT>
587 OutputIterator regex_format(OutputIterator out,
588                           const match_results<Iterator>& m,
589                           const std::basic_string<charT>& fmt,
590                           match_flag_type flags = format_all
591                          )
592 {
593    re_detail::trivial_format_traits<charT> traits;
594    return re_detail::regex_format_imp(out, m, fmt.data(), fmt.data() + fmt.size(), flags, traits);
595 }
596 
597 template <class Iterator, class charT>
regex_format(const match_results<Iterator> & m,const charT * fmt,match_flag_type flags=format_all)598 std::basic_string<charT> regex_format(const match_results<Iterator>& m,
599                                       const charT* fmt,
600                                       match_flag_type flags = format_all)
601 {
602    std::basic_string<charT> result;
603    re_detail::string_out_iterator<std::basic_string<charT> > i(result);
604    re_detail::trivial_format_traits<charT> traits;
605    re_detail::regex_format_imp(i, m, fmt, fmt + traits.length(fmt), flags, traits);
606    return result;
607 }
608 
609 template <class Iterator, class charT>
regex_format(const match_results<Iterator> & m,const std::basic_string<charT> & fmt,match_flag_type flags=format_all)610 std::basic_string<charT> regex_format(const match_results<Iterator>& m,
611                                       const std::basic_string<charT>& fmt,
612                                       match_flag_type flags = format_all)
613 {
614    std::basic_string<charT> result;
615    re_detail::string_out_iterator<std::basic_string<charT> > i(result);
616    re_detail::trivial_format_traits<charT> traits;
617    re_detail::regex_format_imp(i, m, fmt.data(), fmt.data() + fmt.size(), flags, traits);
618    return result;
619 }
620 
621 #ifdef BOOST_HAS_ABI_HEADERS
622 #  include BOOST_ABI_SUFFIX
623 #endif
624 
625 } // namespace boost
626 
627 #endif  // BOOST_REGEX_FORMAT_HPP
628 
629 
630 
631 
632 
633 
634