1 /*
2 *
3 * Copyright (c) 1998-2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE regex_format.hpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Provides formatting output routines for search and replace
17 * operations. Note this is an internal header file included
18 * by regex.hpp, do not include on its own.
19 */
20
21 #ifndef BOOST_REGEX_FORMAT_HPP
22 #define BOOST_REGEX_FORMAT_HPP
23
24
25 namespace boost{
26
27 #ifdef BOOST_HAS_ABI_HEADERS
28 # include BOOST_ABI_PREFIX
29 #endif
30
31 //
32 // Forward declaration:
33 //
34 template <class BidiIterator, class Allocator = BOOST_DEDUCED_TYPENAME std::vector<sub_match<BidiIterator> >::allocator_type >
35 class match_results;
36
37 namespace re_detail{
38
39 //
40 // struct trivial_format_traits:
41 // defines minimum localisation support for formatting
42 // in the case that the actual regex traits is unavailable.
43 //
44 template <class charT>
45 struct trivial_format_traits
46 {
47 typedef charT char_type;
48
lengthboost::re_detail::trivial_format_traits49 static std::ptrdiff_t length(const charT* p)
50 {
51 return global_length(p);
52 }
tolowerboost::re_detail::trivial_format_traits53 static charT tolower(charT c)
54 {
55 return ::boost::re_detail::global_lower(c);
56 }
toupperboost::re_detail::trivial_format_traits57 static charT toupper(charT c)
58 {
59 return ::boost::re_detail::global_upper(c);
60 }
valueboost::re_detail::trivial_format_traits61 static int value(const charT c, int radix)
62 {
63 int result = global_value(c);
64 return result >= radix ? -1 : result;
65 }
toiboost::re_detail::trivial_format_traits66 int toi(const charT*& p1, const charT* p2, int radix)const
67 {
68 return global_toi(p1, p2, radix, *this);
69 }
70 };
71
72 template <class OutputIterator, class Results, class traits>
73 class basic_regex_formatter
74 {
75 public:
76 typedef typename traits::char_type char_type;
basic_regex_formatter(OutputIterator o,const Results & r,const traits & t)77 basic_regex_formatter(OutputIterator o, const Results& r, const traits& t)
78 : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_have_conditional(false) {}
79 OutputIterator format(const char_type* p1, const char_type* p2, match_flag_type f);
format(const char_type * p1,match_flag_type f)80 OutputIterator format(const char_type* p1, match_flag_type f)
81 {
82 return format(p1, p1 + m_traits.length(p1), f);
83 }
84 private:
85 typedef typename Results::value_type sub_match_type;
86 enum output_state
87 {
88 output_copy,
89 output_next_lower,
90 output_next_upper,
91 output_lower,
92 output_upper,
93 output_none
94 };
95
96 void put(char_type c);
97 void put(const sub_match_type& sub);
98 void format_all();
99 void format_perl();
100 void format_escape();
101 void format_conditional();
102 void format_until_scope_end();
103
104 const traits& m_traits; // the traits class for localised formatting operations
105 const Results& m_results; // the match_results being used.
106 OutputIterator m_out; // where to send output.
107 const char_type* m_position; // format string, current position
108 const char_type* m_end; // format string end
109 match_flag_type m_flags; // format flags to use
110 output_state m_state; // what to do with the next character
111 bool m_have_conditional; // we are parsing a conditional
112 private:
113 basic_regex_formatter(const basic_regex_formatter&);
114 basic_regex_formatter& operator=(const basic_regex_formatter&);
115 };
116
117 template <class OutputIterator, class Results, class traits>
118 OutputIterator basic_regex_formatter<OutputIterator, Results, traits>::format(const char_type* p1, const char_type* p2, match_flag_type f)
119 {
120 m_position = p1;
121 m_end = p2;
122 m_flags = f;
123 format_all();
124 return m_out;
125 }
126
127 template <class OutputIterator, class Results, class traits>
format_all()128 void basic_regex_formatter<OutputIterator, Results, traits>::format_all()
129 {
130 // over and over:
131 while(m_position != m_end)
132 {
133 switch(*m_position)
134 {
135 case '&':
136 if(m_flags & ::boost::regex_constants::format_sed)
137 {
138 ++m_position;
139 put(m_results[0]);
140 break;
141 }
142 put(*m_position++);
143 break;
144 case '\\':
145 format_escape();
146 break;
147 case '(':
148 if(m_flags & boost::regex_constants::format_all)
149 {
150 ++m_position;
151 bool have_conditional = m_have_conditional;
152 m_have_conditional = false;
153 format_until_scope_end();
154 m_have_conditional = have_conditional;
155 if(m_position == m_end)
156 return;
157 BOOST_ASSERT(*m_position == static_cast<char_type>(')'));
158 ++m_position; // skip the closing ')'
159 break;
160 }
161 put(*m_position);
162 ++m_position;
163 break;
164 case ')':
165 if(m_flags & boost::regex_constants::format_all)
166 {
167 return;
168 }
169 put(*m_position);
170 ++m_position;
171 break;
172 case ':':
173 if((m_flags & boost::regex_constants::format_all) && m_have_conditional)
174 {
175 return;
176 }
177 put(*m_position);
178 ++m_position;
179 break;
180 case '?':
181 if(m_flags & boost::regex_constants::format_all)
182 {
183 ++m_position;
184 format_conditional();
185 break;
186 }
187 put(*m_position);
188 ++m_position;
189 break;
190 case '$':
191 if((m_flags & format_sed) == 0)
192 {
193 format_perl();
194 break;
195 }
196 // fall through, not a special character:
197 default:
198 put(*m_position);
199 ++m_position;
200 break;
201 }
202 }
203 }
204
205 template <class OutputIterator, class Results, class traits>
format_perl()206 void basic_regex_formatter<OutputIterator, Results, traits>::format_perl()
207 {
208 //
209 // On entry *m_position points to a '$' character
210 // output the information that goes with it:
211 //
212 BOOST_ASSERT(*m_position == '$');
213 //
214 // see if this is a trailing '$':
215 //
216 if(++m_position == m_end)
217 {
218 --m_position;
219 put(*m_position);
220 ++m_position;
221 return;
222 }
223 //
224 // OK find out what kind it is:
225 //
226 switch(*m_position)
227 {
228 case '&':
229 ++m_position;
230 put(this->m_results[0]);
231 break;
232 case '`':
233 ++m_position;
234 put(this->m_results.prefix());
235 break;
236 case '\'':
237 ++m_position;
238 put(this->m_results.suffix());
239 break;
240 case '$':
241 put(*m_position++);
242 break;
243 default:
244 // see if we have a number:
245 {
246 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), ::boost::re_detail::distance(m_position, m_end));
247 int v = m_traits.toi(m_position, m_position + len, 10);
248 if(v < 0)
249 {
250 // leave the $ as is, and carry on:
251 --m_position;
252 put(*m_position);
253 ++m_position;
254 break;
255 }
256 // otherwise output sub v:
257 put(this->m_results[v]);
258 }
259 }
260 }
261
262 template <class OutputIterator, class Results, class traits>
format_escape()263 void basic_regex_formatter<OutputIterator, Results, traits>::format_escape()
264 {
265 // skip the escape and check for trailing escape:
266 if(++m_position == m_end)
267 {
268 put(static_cast<char_type>('\\'));
269 return;
270 }
271 // now switch on the escape type:
272 switch(*m_position)
273 {
274 case 'a':
275 put(static_cast<char_type>('\a'));
276 ++m_position;
277 break;
278 case 'f':
279 put(static_cast<char_type>('\f'));
280 ++m_position;
281 break;
282 case 'n':
283 put(static_cast<char_type>('\n'));
284 ++m_position;
285 break;
286 case 'r':
287 put(static_cast<char_type>('\r'));
288 ++m_position;
289 break;
290 case 't':
291 put(static_cast<char_type>('\t'));
292 ++m_position;
293 break;
294 case 'v':
295 put(static_cast<char_type>('\v'));
296 ++m_position;
297 break;
298 case 'x':
299 if(++m_position == m_end)
300 {
301 put(static_cast<char_type>('x'));
302 return;
303 }
304 // maybe have \x{ddd}
305 if(*m_position == static_cast<char_type>('{'))
306 {
307 ++m_position;
308 int val = m_traits.toi(m_position, m_end, 16);
309 if(val < 0)
310 {
311 // invalid value treat everything as literals:
312 put(static_cast<char_type>('x'));
313 put(static_cast<char_type>('{'));
314 return;
315 }
316 if(*m_position != static_cast<char_type>('}'))
317 {
318 while(*m_position != static_cast<char_type>('\\'))
319 --m_position;
320 ++m_position;
321 put(*m_position++);
322 return;
323 }
324 ++m_position;
325 put(static_cast<char_type>(val));
326 return;
327 }
328 else
329 {
330 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), ::boost::re_detail::distance(m_position, m_end));
331 int val = m_traits.toi(m_position, m_position + len, 16);
332 if(val < 0)
333 {
334 --m_position;
335 put(*m_position++);
336 return;
337 }
338 put(static_cast<char_type>(val));
339 }
340 break;
341 case 'c':
342 if(++m_position == m_end)
343 {
344 --m_position;
345 put(*m_position++);
346 return;
347 }
348 put(static_cast<char_type>(*m_position++ % 32));
349 break;
350 case 'e':
351 put(static_cast<char_type>(27));
352 ++m_position;
353 break;
354 default:
355 // see if we have a perl specific escape:
356 if((m_flags & boost::regex_constants::format_sed) == 0)
357 {
358 bool breakout = false;
359 switch(*m_position)
360 {
361 case 'l':
362 ++m_position;
363 m_state = output_next_lower;
364 breakout = true;
365 break;
366 case 'L':
367 ++m_position;
368 m_state = output_lower;
369 breakout = true;
370 break;
371 case 'u':
372 ++m_position;
373 m_state = output_next_upper;
374 breakout = true;
375 break;
376 case 'U':
377 ++m_position;
378 m_state = output_upper;
379 breakout = true;
380 break;
381 case 'E':
382 ++m_position;
383 m_state = output_copy;
384 breakout = true;
385 break;
386 }
387 if(breakout)
388 break;
389 }
390 // see if we have a \n sed style backreference:
391 int v = m_traits.toi(m_position, m_position+1, 10);
392 if((v > 0) || ((v == 0) && (m_flags & ::boost::regex_constants::format_sed)))
393 {
394 put(m_results[v]);
395 break;
396 }
397 else if(v == 0)
398 {
399 // octal ecape sequence:
400 --m_position;
401 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(4), ::boost::re_detail::distance(m_position, m_end));
402 v = m_traits.toi(m_position, m_position + len, 8);
403 BOOST_ASSERT(v >= 0);
404 put(static_cast<char_type>(v));
405 break;
406 }
407 // Otherwise output the character "as is":
408 put(*m_position++);
409 break;
410 }
411 }
412
413 template <class OutputIterator, class Results, class traits>
format_conditional()414 void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional()
415 {
416 if(m_position == m_end)
417 {
418 // oops trailing '?':
419 put(static_cast<char_type>('?'));
420 return;
421 }
422 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), ::boost::re_detail::distance(m_position, m_end));
423 int v = m_traits.toi(m_position, m_position + len, 10);
424 if(v < 0)
425 {
426 // oops not a number:
427 put(static_cast<char_type>('?'));
428 return;
429 }
430
431 // output varies depending upon whether sub-expression v matched or not:
432 if(m_results[v].matched)
433 {
434 m_have_conditional = true;
435 format_all();
436 m_have_conditional = false;
437 if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
438 {
439 // skip the ':':
440 ++m_position;
441 // save output state, then turn it off:
442 output_state saved_state = m_state;
443 m_state = output_none;
444 // format the rest of this scope:
445 format_until_scope_end();
446 // restore output state:
447 m_state = saved_state;
448 }
449 }
450 else
451 {
452 // save output state, then turn it off:
453 output_state saved_state = m_state;
454 m_state = output_none;
455 // format until ':' or ')':
456 m_have_conditional = true;
457 format_all();
458 m_have_conditional = false;
459 // restore state:
460 m_state = saved_state;
461 if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
462 {
463 // skip the ':':
464 ++m_position;
465 // format the rest of this scope:
466 format_until_scope_end();
467 }
468 }
469 }
470
471 template <class OutputIterator, class Results, class traits>
format_until_scope_end()472 void basic_regex_formatter<OutputIterator, Results, traits>::format_until_scope_end()
473 {
474 do
475 {
476 format_all();
477 if((m_position == m_end) || (*m_position == static_cast<char_type>(')')))
478 return;
479 put(*m_position++);
480 }while(m_position != m_end);
481 }
482
483 template <class OutputIterator, class Results, class traits>
put(char_type c)484 void basic_regex_formatter<OutputIterator, Results, traits>::put(char_type c)
485 {
486 // write a single character to output
487 // according to which case translation mode we are in:
488 switch(this->m_state)
489 {
490 case output_none:
491 return;
492 case output_next_lower:
493 c = m_traits.tolower(c);
494 this->m_state = output_copy;
495 break;
496 case output_next_upper:
497 c = m_traits.toupper(c);
498 this->m_state = output_copy;
499 break;
500 case output_lower:
501 c = m_traits.tolower(c);
502 break;
503 case output_upper:
504 c = m_traits.toupper(c);
505 break;
506 default:
507 break;
508 }
509 *m_out = c;
510 ++m_out;
511 }
512
513 template <class OutputIterator, class Results, class traits>
put(const sub_match_type & sub)514 void basic_regex_formatter<OutputIterator, Results, traits>::put(const sub_match_type& sub)
515 {
516 typedef typename sub_match_type::iterator iterator_type;
517 iterator_type i = sub.first;
518 while(i != sub.second)
519 {
520 put(*i);
521 ++i;
522 }
523 }
524
525 template <class S>
526 class string_out_iterator
527 #ifndef BOOST_NO_STD_ITERATOR
528 : public std::iterator<std::output_iterator_tag, typename S::value_type>
529 #endif
530 {
531 S* out;
532 public:
string_out_iterator(S & s)533 string_out_iterator(S& s) : out(&s) {}
operator ++()534 string_out_iterator& operator++() { return *this; }
operator ++(int)535 string_out_iterator& operator++(int) { return *this; }
operator *()536 string_out_iterator& operator*() { return *this; }
operator =(typename S::value_type v)537 string_out_iterator& operator=(typename S::value_type v)
538 {
539 out->append(1, v);
540 return *this;
541 }
542
543 #ifdef BOOST_NO_STD_ITERATOR
544 typedef std::ptrdiff_t difference_type;
545 typedef typename S::value_type value_type;
546 typedef value_type* pointer;
547 typedef value_type& reference;
548 typedef std::output_iterator_tag iterator_category;
549 #endif
550 };
551
552 template <class OutputIterator, class Iterator, class Alloc, class charT, class traits>
553 OutputIterator regex_format_imp(OutputIterator out,
554 const match_results<Iterator, Alloc>& m,
555 const charT* p1, const charT* p2,
556 match_flag_type flags,
557 const traits& t
558 )
559 {
560 if(flags & regex_constants::format_literal)
561 {
562 return re_detail::copy(p1, p2, out);
563 }
564
565 re_detail::basic_regex_formatter<
566 OutputIterator,
567 match_results<Iterator, Alloc>,
568 traits > f(out, m, t);
569 return f.format(p1, p2, flags);
570 }
571
572
573 } // namespace re_detail
574
575 template <class OutputIterator, class Iterator, class charT>
576 OutputIterator regex_format(OutputIterator out,
577 const match_results<Iterator>& m,
578 const charT* fmt,
579 match_flag_type flags = format_all
580 )
581 {
582 re_detail::trivial_format_traits<charT> traits;
583 return re_detail::regex_format_imp(out, m, fmt, fmt + traits.length(fmt), flags, traits);
584 }
585
586 template <class OutputIterator, class Iterator, class charT>
587 OutputIterator regex_format(OutputIterator out,
588 const match_results<Iterator>& m,
589 const std::basic_string<charT>& fmt,
590 match_flag_type flags = format_all
591 )
592 {
593 re_detail::trivial_format_traits<charT> traits;
594 return re_detail::regex_format_imp(out, m, fmt.data(), fmt.data() + fmt.size(), flags, traits);
595 }
596
597 template <class Iterator, class charT>
regex_format(const match_results<Iterator> & m,const charT * fmt,match_flag_type flags=format_all)598 std::basic_string<charT> regex_format(const match_results<Iterator>& m,
599 const charT* fmt,
600 match_flag_type flags = format_all)
601 {
602 std::basic_string<charT> result;
603 re_detail::string_out_iterator<std::basic_string<charT> > i(result);
604 re_detail::trivial_format_traits<charT> traits;
605 re_detail::regex_format_imp(i, m, fmt, fmt + traits.length(fmt), flags, traits);
606 return result;
607 }
608
609 template <class Iterator, class charT>
regex_format(const match_results<Iterator> & m,const std::basic_string<charT> & fmt,match_flag_type flags=format_all)610 std::basic_string<charT> regex_format(const match_results<Iterator>& m,
611 const std::basic_string<charT>& fmt,
612 match_flag_type flags = format_all)
613 {
614 std::basic_string<charT> result;
615 re_detail::string_out_iterator<std::basic_string<charT> > i(result);
616 re_detail::trivial_format_traits<charT> traits;
617 re_detail::regex_format_imp(i, m, fmt.data(), fmt.data() + fmt.size(), flags, traits);
618 return result;
619 }
620
621 #ifdef BOOST_HAS_ABI_HEADERS
622 # include BOOST_ABI_SUFFIX
623 #endif
624
625 } // namespace boost
626
627 #endif // BOOST_REGEX_FORMAT_HPP
628
629
630
631
632
633
634