1 /*
2  * Copyright 2006-2008 The FLWOR Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 #ifndef ZORBA_STRING_UTIL_ASCII_UTIL_H
18 #define ZORBA_STRING_UTIL_ASCII_UTIL_H
19 
20 #include <algorithm>
21 #include <cctype>
22 #include <cstddef>
23 #include <cstring>
24 
25 #include "stl_util.h"
26 
27 namespace zorba {
28 namespace ascii {
29 
30 ////////// types //////////////////////////////////////////////////////////////
31 
32 /**
33  * The size type.
34  */
35 typedef std::size_t size_type;
36 
37 ////////// constants //////////////////////////////////////////////////////////
38 
39 char const whitespace[] = " \f\n\r\t\v";
40 
41 ////////// Non-ASCII character stripping //////////////////////////////////////
42 
43 /**
44  * A %back_ascii_insert_iterator can be used to append characters to a string
45  * ensuring that non-ASCII characters are excluded.
46  *
47  * @tparam StringType The string type.
48  */
49 template<class StringType>
50 class back_ascii_insert_iterator :
51   public
52     ztd::back_insert_iterator_base<
53       StringType, back_ascii_insert_iterator<StringType>
54     >
55 {
56   typedef ztd::back_insert_iterator_base<
57     StringType, back_ascii_insert_iterator<StringType>
58   > base_type;
59 public:
60   typedef typename base_type::container_type container_type;
61   typedef typename StringType::value_type value_type;
62 
63   /**
64    * Constructs a %back_ascii_insert_iterator.
65    *
66    * @param s The string to append to.
67    */
back_ascii_insert_iterator(StringType & s)68   explicit back_ascii_insert_iterator( StringType &s ) : base_type( s ) {
69   }
70 
71   back_ascii_insert_iterator& operator=( value_type c );
72 };
73 
74 /**
75  * This is a convenience function to create a back_ascii_insert_iterator.
76  *
77  * @tparam StringType The string type.
78  * @param out The output string.
79  */
80 template<class StringType> inline back_ascii_insert_iterator<StringType>
back_ascii_inserter(StringType & out)81 back_ascii_inserter( StringType &out ) {
82   return back_ascii_insert_iterator<StringType>( out );
83 }
84 
85 ////////// Character testing //////////////////////////////////////////////////
86 
87 /**
88  * Checks whether the given character is an ASCII character.  This
89  * function exists to make a proper function out of the standard isascii(3)
90  * that may be implemented as a macro.
91  *
92  * @param CharType The character type.
93  * @param c The character to check.
94  * @return Returns \c true only if the character is an ASCII character.
95  */
96 template<typename CharType> inline
is_ascii(CharType c)97 bool is_ascii( CharType c ) {
98 #ifdef WIN32
99   return __isascii( c );
100 #else
101   return isascii( c );
102 #endif
103 }
104 
105 /**
106  * Checks whether the given character is an alphabetic character.  This
107  * function exists to make a proper function out of the standard isalpha(3)
108  * that may be implemented as a macro.
109  *
110  * @param CharType The character type.
111  * @param c The character to check.
112  * @return Returns \c true only if the character is an alphabetic character.
113  */
114 template<typename CharType> inline
is_alpha(CharType c)115 bool is_alpha( CharType c ) {
116 #ifdef WIN32
117   // Windows' isalpha() implementation crashes for non-ASCII characters.
118   return __isascii( c ) && isalpha( c );
119 #else
120   return isalpha( c );
121 #endif
122 }
123 
124 /**
125  * Checks whether the given character is an alpha-numeric character.  This
126  * function exists to make a proper function out of the standard isalnum(3)
127  * that may be implemented as a macro.
128  *
129  * @param CharType The character type.
130  * @param c The character to check.
131  * @return Returns \c true only if the character is an alpha-numeric character.
132  */
133 template<typename CharType> inline
is_alnum(CharType c)134 bool is_alnum( CharType c ) {
135 #ifdef WIN32
136   // Windows' isalpha() implementation crashes for non-ASCII characters.
137   return __isascii( c ) && isalnum( c );
138 #else
139   return isalnum( c );
140 #endif
141 }
142 
143 /**
144  * Checks whether the given character is a control character.  This function
145  * exists to make a proper function out of the standard iscntrl(3) that may be
146  * implemented as a macro.
147  *
148  * @param CharType The character type.
149  * @param c The character to check.
150  * @return Returns \c true only if the character is a control character.
151  */
152 template<typename CharType> inline
is_cntrl(CharType c)153 bool is_cntrl( CharType c ) {
154 #ifdef WIN32
155   // Windows' iscntrl() implementation crashes for non-ASCII characters.
156   return __isascii( c ) && iscntrl( c );
157 #else
158   return iscntrl( c );
159 #endif
160 }
161 
162 /**
163  * Checks whether the given character is a decimal digit.  This function exists
164  * to make a proper function out of the standard isdigit(3) that may be
165  * implemented as a macro.
166  *
167  * @param CharType The character type.
168  * @param c The character to check.
169  * @return Returns \c true only if the character is a decimal digit.
170  */
171 template<typename CharType> inline
is_digit(CharType c)172 bool is_digit( CharType c ) {
173 #ifdef WIN32
174   // Windows' isdigit() implementation crashes for non-ASCII characters.
175   return __isascii( c ) && isdigit( c );
176 #else
177   return isdigit( c );
178 #endif
179 }
180 
181 /**
182  * Checks whether the given character is a printing character.  This function
183  * exists to make a proper function out of the standard isprint(3) that may be
184  * implemented as a macro.
185  *
186  * @param CharType The character type.
187  * @param c The character to check.
188  * @return Returns \c true only if the character is a printing character.
189  */
190 template<typename CharType> inline
is_print(CharType c)191 bool is_print( CharType c ) {
192 #ifdef WIN32
193   // Windows' isprint() implementation crashes for non-ASCII characters.
194   return __isascii( c ) && isprint( c );
195 #else
196   return isprint( c );
197 #endif
198 }
199 
200 /**
201  * Checks whether the given character is a punctuation character.  This function
202  * exists to make a proper function out of the standard ispunct(3) that may be
203  * implemented as a macro.
204  *
205  * @param CharType The character type.
206  * @param c The character to check.
207  * @return Returns \c true only if the character is a punctuation character.
208  */
209 template<typename CharType> inline
is_punct(CharType c)210 bool is_punct( CharType c ) {
211 #ifdef WIN32
212   // Windows' ispunct() implementation crashes for non-ASCII characters.
213   return __isascii( c ) && ispunct( c );
214 #else
215   return ispunct( c );
216 #endif
217 }
218 
219 /**
220  * Checks whether the given character is a whitespace character.  This function
221  * exists to make a proper function out of the standard isspace(3) that may be
222  * implemented as a macro.
223  *
224  * @param CharType The character type.
225  * @param c The character to check.
226  * @return Returns \c true only if the character is a whitespace character.
227  */
228 template<typename CharType> inline
is_space(CharType c)229 bool is_space( CharType c ) {
230 #ifdef WIN32
231   // Windows' isspace() implementation crashes for non-ASCII characters.
232   return __isascii( c ) && isspace( c );
233 #else
234   return isspace( c );
235 #endif
236 }
237 
238 /**
239  * Checks whether an entire string is whitespace.
240  *
241  * @param s The NULL-terminated C string to check.
242  * @return Returns \c true only if the entire string is whitespace.
243  */
244 bool is_whitespace( char const *s );
245 
246 /**
247  * Checks whether an entire string is whitespace.
248  *
249  * @param s The string to check.
250  * @return Returns \c true only if the entire string is whitespace.
251  */
252 template<class StringType> inline
is_whitespace(StringType const & s)253 bool is_whitespace( StringType const &s ) {
254   return is_whitespace( s.c_str() );
255 }
256 
257 /**
258  * Checks whether the given character is a hexadecimal decimal digit.  This
259  * function exists to make a proper function out of the standard isxdigit(3)
260  * that may be implemented as a macro.
261  *
262  * @param CharType The character type.
263  * @param c The character to check.
264  * @return Returns \c true only if the character is a hexadecimal digit.
265  */
266 template<typename CharType> inline
is_xdigit(CharType c)267 bool is_xdigit( CharType c ) {
268 #ifdef WIN32
269   // Windows' isxdigit() implementation crashes for non-ASCII characters.
270   return __isascii( c ) && isxdigit( c );
271 #else
272   return isxdigit( c );
273 #endif
274 }
275 
276 ////////// begins/ends_with ///////////////////////////////////////////////////
277 
278 /**
279  * Checks whether a string begins with a given prefix.
280  *
281  * @param s The string to check.
282  * @param c The prefix character.
283  * @return Returns \c true only if \a s begins with \a c.
284  */
begins_with(char const * s,char c)285 inline bool begins_with( char const *s, char c ) {
286   return s[0] == c;
287 }
288 
289 /**
290  * Checks whether a string begins with a given prefix.
291  *
292  * @tparam StringType The string type.
293  * @param s The string to check.
294  * @param c The prefix character.
295  * @return Returns \c true only if \a s begins with \a c.
296  */
297 template<class StringType> inline
begins_with(StringType const & s,char c)298 bool begins_with( StringType const &s, char c ) {
299   return !s.empty() && s[0] == c;
300 }
301 
302 /**
303  * Checks whether a string begins with a given prefix.
304  *
305  * @param s The string to check.
306  * @param ps The prefix string.
307  * @param n The number of bytes to compare.
308  * @return Returns \c true only if \a s begins with \a ps.
309  */
begins_with(char const * s,char const * ps,size_type n)310 inline bool begins_with( char const *s, char const *ps, size_type n ) {
311   return std::strncmp( s, ps, n ) == 0;
312 }
313 
314 /**
315  * Checks whether a string begins with a given prefix.
316  *
317  * @param s The string to check.
318  * @param ps The prefix string.
319  * @return Returns \c true only if \a s begins with \a ps.
320  */
begins_with(char const * s,char const * ps)321 inline bool begins_with( char const *s, char const *ps ) {
322   return begins_with( s, ps, std::strlen( ps ) );
323 }
324 
325 /**
326  * Checks whether a string begins with a given prefix.
327  *
328  * @tparam StringType The string type.
329  * @param s The string to check.
330  * @param ps The prefix string.
331  * @param n The number of bytes to compare.
332  * @return Returns \c true only if \a s begins with \a ps.
333  */
334 template<class StringType> inline
begins_with(StringType const & s,char const * ps,typename StringType::size_type n)335 bool begins_with( StringType const &s, char const *ps,
336                   typename StringType::size_type n ) {
337   typedef typename StringType::traits_type traits_type;
338   return n <= s.size() && traits_type::compare( s.data(), ps, n ) == 0;
339 }
340 
341 /**
342  * Checks whether a string begins with a given prefix.
343  *
344  * @tparam StringType The string type.
345  * @param s The string to check.
346  * @param ps The prefix string.
347  * @return Returns \c true only if \a s begins with \a ps.
348  */
349 template<class StringType> inline
begins_with(StringType const & s,char const * ps)350 bool begins_with( StringType const &s, char const *ps ) {
351   return begins_with( s, ps, std::strlen( ps ) );
352 }
353 
354 /**
355  * Checks whether a string begins with a given prefix.
356  *
357  * @tparam StringType The string type.
358  * @tparam PrefixStringType The suffix string type.
359  * @param s The string to check.
360  * @param ps The suffix string.
361  * @return Returns \c true only if \a s ends with \a ps.
362  */
363 template<class StringType,class PrefixStringType> inline
begins_with(StringType const & s,PrefixStringType const & ps)364 bool begins_with( StringType const &s, PrefixStringType const &ps ) {
365   return begins_with( s, ps.data(), ps.size() );
366 }
367 
368 /**
369  * Checks whether a string ends with a given suffix.
370  *
371  * @param s The string to check.
372  * @param c The suffix character.
373  * @return Returns \c true only if \a s ends with \a c.
374  */
ends_with(char const * s,char c)375 inline bool ends_with( char const *s, char c ) {
376   size_type const len = std::strlen( s );
377   return len > 0 && s[ len - 1 ] == c;
378 }
379 
380 /**
381  * Checks whether a string ends with a given suffix.
382  *
383  * @tparam StringType The string type.
384  * @param s The string to check.
385  * @param c The suffix character.
386  * @return Returns \c true only if \a s ends with \a c.
387  */
388 template<class StringType> inline
ends_with(StringType const & s,char c)389 bool ends_with( StringType const &s, char c ) {
390   return !s.empty() && s[ s.size() - 1 ] == c;
391 }
392 
393 /**
394  * Checks whether a string ends with a given suffix.
395  *
396  * @param s The string to check.
397  * @param ss The suffix string.
398  * @param ss_len The length of the suffix string.
399  * @return Returns \c true only if \a s ends with \a ss.
400  */
ends_with(char const * s,char const * ss,size_type ss_len)401 inline bool ends_with( char const *s, char const *ss, size_type ss_len ) {
402   size_type const len = std::strlen( s );
403   return ss_len <= len && std::strncmp( s + len - ss_len, ss, ss_len ) == 0;
404 }
405 
406 /**
407  * Checks whether a string ends with a given suffix.
408  *
409  * @param s The string to check.
410  * @param ss The suffix string.
411  * @return Returns \c true only if \a s ends with \a ss.
412  */
ends_with(char const * s,char const * ss)413 inline bool ends_with( char const *s, char const *ss ) {
414   return ends_with( s, ss, std::strlen( ss ) );
415 }
416 
417 /**
418  * Checks whether a string ends with a given suffix.
419  *
420  * @tparam StringType The string type.
421  * @param s The string to check.
422  * @param ss The suffix string.
423  * @param ss_len The length of the suffix string.
424  * @return Returns \c true only if \a s ends with \a ss.
425  */
426 template<class StringType> inline
ends_with(StringType const & s,char const * ss,typename StringType::size_type ss_len)427 bool ends_with( StringType const &s, char const *ss,
428                 typename StringType::size_type ss_len ) {
429   typename StringType::size_type const result = s.rfind( ss );
430   return result != StringType::npos && result + ss_len == s.size();
431 }
432 
433 /**
434  * Checks whether a string ends with a given suffix.
435  *
436  * @tparam StringType The string type.
437  * @param s The string to check.
438  * @param ss The suffix string.
439  * @return Returns \c true only if \a s ends with \a ss.
440  */
441 template<class StringType> inline
ends_with(StringType const & s,char const * ss)442 bool ends_with( StringType const &s, char const *ss ) {
443   return ends_with( s, ss, std::strlen( ss ) );
444 }
445 
446 /**
447  * Checks whether a string ends with a given suffix.
448  *
449  * @tparam StringType The string type.
450  * @tparam SuffixStringType The suffix string type.
451  * @param s The string to check.
452  * @param ss The suffix string.
453  * @return Returns \c true only if \a s ends with \a ss.
454  */
455 template<class StringType,class SuffixStringType> inline
ends_with(StringType const & s,SuffixStringType const & ss)456 bool ends_with( StringType const &s, SuffixStringType const &ss ) {
457   return ends_with( s, ss.data(), ss.size() );
458 }
459 
460 ////////// Case conversion ////////////////////////////////////////////////////
461 
462 /**
463  * Converts the given character to lower-case.  This function exists to make a
464  * proper function out of the standard tolower(3) that may be implemented as a
465  * macro.
466  *
467  * @param c The character to convert.
468  * @return Returns the character converted to lower-case (if it was upper-case)
469  * or the original character if either it already is lower-case or if it is not
470  * a letter.
471  */
to_lower(char c)472 inline char to_lower( char c ) {
473   return tolower( c );
474 }
475 
476 /**
477  * Converts a string to lower-case in-place.
478  *
479  * @param begin A pointer to the first character of the string.
480  * @param end A pointer to one past the last character of the string.
481  */
to_lower(char * begin,char * end)482 inline void to_lower( char *begin, char *end ) {
483   std::transform( begin, end, begin, static_cast<char (*)(char)>( to_lower ) );
484 }
485 
486 /**
487  * Converts a string to lower-case in-place.
488  *
489  * @param s The string to convert.
490  */
to_lower(char * s)491 inline void to_lower( char *s ) {
492   to_lower( s, s + std::strlen( s ) );
493 }
494 
495 /**
496  * Converts a string to lower-case in-place.
497  *
498  * @tparam StringType The string type.
499  * @param s The string to convert.
500  */
501 template<class StringType> inline
to_lower(StringType & s)502 void to_lower( StringType &s ) {
503   std::transform(
504     s.begin(), s.end(), s.begin(), static_cast<char (*)(char)>( to_lower )
505   );
506 }
507 
508 /**
509  * Converts a string to lower-case.
510  *
511  * @tparam InputStringType The input string type.
512  * @tparam OutputStringType The output string type.
513  * @param in The input string.
514  * @param out The output string (which must be different from \a in).  Its
515  * contents are overwritten.
516  */
517 template<class InputStringType,class OutputStringType> inline
to_lower(InputStringType const & in,OutputStringType * out)518 void to_lower( InputStringType const &in, OutputStringType *out ) {
519   std::transform(
520     in.begin(), in.end(), std::back_inserter( *out ),
521     static_cast<char (*)(char)>( to_lower )
522   );
523 }
524 
525 /**
526  * Converts the given character to upper-case.  This function exists to make a
527  * proper function out of the standard toupper(3) that may be implemented as a
528  * macro.
529  *
530  * @param c The character to convert.
531  * @return Returns the character converted to upper-case (if it was lower-case)
532  * or the original character if either it already is upper-case or if it is not
533  * a letter.
534  */
to_upper(char c)535 inline char to_upper( char c ) {
536   return toupper( c );
537 }
538 
539 /**
540  * Converts a string to upper-case in-place.
541  *
542  * @param begin A pointer to the first character of the string.
543  * @param end A pointer to one past the last character of the string.
544  */
to_upper(char * begin,char * end)545 inline void to_upper( char *begin, char *end ) {
546   std::transform( begin, end, begin, static_cast<char (*)(char)>( to_upper ) );
547 }
548 
549 /**
550  * Converts a string to upper-case in-place.
551  *
552  * @param s The string to convert.
553  */
to_upper(char * s)554 inline void to_upper( char *s ) {
555   to_upper( s, s + std::strlen( s ) );
556 }
557 
558 /**
559  * Converts a string to upper-case in-place.
560  *
561  * @tparam StringType The string type.
562  * @param s The string to convert.
563  */
564 template<class StringType> inline
to_upper(StringType & s)565 void to_upper( StringType &s ) {
566   std::transform(
567     s.begin(), s.end(), s.begin(), static_cast<char (*)(char)>( to_upper )
568   );
569 }
570 
571 /**
572  * Converts a string to upper-case.
573  *
574  * @tparam InputStringType The input string type.
575  * @tparam OutputStringType The output string type.
576  * @param in The input string.
577  * @param out The output string (which must be different from \a in).  Its
578  * contents are overwritten.
579  */
580 template<class InputStringType,class OutputStringType> inline
to_upper(InputStringType const & in,OutputStringType * out)581 void to_upper( InputStringType const &in, OutputStringType *out ) {
582   std::transform(
583     in.begin(), in.end(), std::back_inserter( *out ),
584     static_cast<char (*)(char)>( to_upper )
585   );
586 }
587 
588 ////////// Replacement ////////////////////////////////////////////////////////
589 
590 /**
591  * Replaces all occurrences of one character with another.
592  *
593  * @tparam StringType The string type.
594  * @param s The string to modify.
595  * @param from The character to replace.
596  * @param to The character to replace with.
597  * @return Returns \c true only if at least one replacement is performed.
598  */
599 template<class StringType>
600 bool replace_all( StringType &s, char from, char to );
601 
602 /**
603  * Replaces all occurrences of one substring with another.
604  *
605  * @tparam StringType The string type.
606  * @param s The string to modify.
607  * @param from The substring to replace.
608  * @param from_len The length of \a from.
609  * @param to The substring to replace with.
610  * @param to_len The length of \a to.
611  * @return Returns \c true only if at least one replacement is performed.
612  */
613 template<class StringType>
614 bool replace_all( StringType &s,
615                   char const *from, typename StringType::size_type from_len,
616                   char const *to, typename StringType::size_type to_len );
617 
618 /**
619  * Replaces all occurrences of one substring with another.
620  *
621  * @tparam StringType The string type.
622  * @param s The string to modify.
623  * @param from The substring to replace.
624  * @param to The substring to replace with.
625  * @return Returns \c true only if at least one replacement is performed.
626  */
627 template<class StringType> inline
replace_all(StringType & s,char const * from,char const * to)628 bool replace_all( StringType &s, char const *from, char const *to ) {
629   return replace_all( s, from, std::strlen( from ), to, std::strlen( to ) );
630 }
631 
632 /**
633  * Replaces all occurrences of one substring with another.
634  *
635  * @tparam StringType The string type.
636  * @param s The string to modify.
637  * @param from The substring to replace.
638  * @param to The substring to replace with.
639  * @return Returns \c true only if at least one replacement is performed.
640  */
641 template<class StringType,class ToStringType> inline
replace_all(StringType & s,char const * from,ToStringType const & to)642 bool replace_all( StringType &s, char const *from, ToStringType const &to ) {
643   return replace_all( s, from, std::strlen( from ), to.data(), to.size() );
644 }
645 
646 /**
647  * Replaces all occurrences of one substring with another.
648  *
649  * @tparam StringType The string type.
650  * @param s The string to modify.
651  * @param from The substring to replace.
652  * @param to The substring to replace with.
653  * @return Returns \c true only if at least one replacement is performed.
654  */
655 template<class StringType,class FromStringType,class ToStringType> inline
replace_all(StringType & s,FromStringType const & from,ToStringType const & to)656 bool replace_all( StringType &s, FromStringType const &from,
657                                  ToStringType const &to ) {
658   return replace_all( s, from.data(), from.size(), to.data(), to.size() );
659 }
660 
661 ////////// Whitespace /////////////////////////////////////////////////////////
662 
663 /**
664  * Converts sequences of one or more whitespace characters to a single space.
665  * Additionally, all leading and trailing whitespace is removed.
666  *
667  * @tparam InputStringType The input string type.
668  * @tparam OutputStringType The output string type.
669  * @param in The input string.
670  * @param out The output string (which must be different from \a in).
671  */
672 template<class InputStringType,class OutputStringType>
673 void normalize_whitespace( InputStringType const &in, OutputStringType *out );
674 
675 /**
676  * Converts sequences of one or more whitespace characters to a single space.
677  * Additionally, all leading and trailing whitespace is removed.
678  *
679  * @tparam StringType The string type.
680  * @param s The string.
681  */
682 template<class StringType> inline
normalize_whitespace(StringType & s)683 void normalize_whitespace( StringType &s ) {
684   StringType temp;
685   normalize_whitespace( s, &temp );
686   s = temp;
687 }
688 
689 /**
690  * Removes all specified characters by shifting the contents of the buffer to
691  * the left.
692  *
693  * @param s The string.
694  * @param s_len The length of \a s.
695  * @param chars The characters to remove.
696  * @return Returns the new length of \a s with all \a chars removed.
697  */
698 size_type remove_chars( char *s, size_type s_len, char const *chars );
699 
700 /**
701  * Removes all whitespace characters by shifting the contents of the buffer to
702  * the left.
703  *
704  * @param s The string.
705  * @param s_len The length of \a s.
706  * @return Returns the new length of \a s with all whitespace removed.
707  */
remove_whitespace(char * s,size_type s_len)708 inline size_type remove_whitespace( char *s, size_type s_len ) {
709   return remove_chars( s, s_len, whitespace );
710 }
711 
712 /**
713  * Removes all leading and trailing specified characters.
714  *
715  * @tparam InputStringType The input string type.
716  * @tparam OutputStringType The output string type.
717  * @param in The input string.
718  * @param chars The characters to trim.
719  * @param out The output string (which must be different from \a in).
720  */
721 template<class InputStringType,class OutputStringType>
722 void trim( InputStringType const &in, char const *chars,
723            OutputStringType *out );
724 
725 /**
726  * Removes all leading and trailing specified characters.
727  *
728  * @tparam StringType The string type.
729  * @param s The string.
730  */
731 template<class StringType> inline
trim(StringType & s,char const * chars)732 void trim( StringType &s, char const *chars ) {
733   StringType temp;
734   trim( s, chars, &temp );
735   s = temp;
736 }
737 
738 /**
739  * Skips leading specified characters.
740  *
741  * @param s The string to trim.
742  * @param chars The characters to trim.
743  * @return Returns a pointer to the first character in \a s that is not among
744  * the characters in \a chars.
745  */
746 char const* trim_start( char const *s, char const *chars );
747 
748 /**
749  * Skips leading specified characters.
750  *
751  * @param s The string to trim.
752  * @param s_len The length of \a s.
753  * @param chars The characters to trim.
754  * @return Returns a pointer to the first character in \a s that is not among
755  * the characters in \a chars.
756  */
757 char const* trim_start( char const *s, size_type s_len, char const *chars );
758 
759 /**
760  * Removes all leading specified characters.
761  *
762  * @tparam InputStringType The input string type.
763  * @tparam OutputStringType The output string type.
764  * @param in The input string.
765  * @param chars The characters to trim.
766  * @param out The output string (which must be different from \a in).
767  */
768 template<class InputStringType,class OutputStringType>
769 void trim_start( InputStringType const &in, char const *chars,
770                  OutputStringType *out );
771 
772 /**
773  * Removes all leading specified characters.
774  *
775  * @tparam StringType The string type.
776  * @param s The string.
777  */
778 template<class StringType> inline
trim_start(StringType & s,char const * chars)779 void trim_start( StringType &s, char const *chars ) {
780   StringType temp;
781   trim_start( s, chars, &temp );
782   s = temp;
783 }
784 
785 /**
786  * Skips leading whitespace chracters.
787  *
788  * @param s The string to trim.
789  * @return Returns a pointer to the first character in \a s that is not a
790  * whitespace character.
791  */
trim_start_whitespace(char const * s)792 inline char const* trim_start_whitespace( char const *s ) {
793   return trim_start( s, whitespace );
794 }
795 
796 /**
797  * Skips leading whitespace characters.
798  *
799  * @param s The string to trim.
800  * @param s_len The length of \a s.
801  * @return Returns a pointer to the first character in \a s that is not a
802  * whitespace character.
803  */
trim_start_whitespace(char const * s,size_type s_len)804 inline char const* trim_start_whitespace( char const *s, size_type s_len ) {
805   return trim_start( s, s_len, whitespace );
806 }
807 
808 /**
809  * Removes all leading whitespace characters.
810  *
811  * @tparam InputStringType The input string type.
812  * @tparam OutputStringType The output string type.
813  * @param in The input string.
814  * @param out The output string (which must be different from \a in).
815  */
816 template<class InputStringType,class OutputStringType> inline
trim_start_whitespace(InputStringType const & in,OutputStringType * out)817 void trim_start_whitespace( InputStringType const &in, OutputStringType *out ) {
818   trim_start( in, whitespace, out );
819 }
820 
821 /**
822  * Removes all leading whitespace characters.
823  *
824  * @tparam StringType The string type.
825  * @param s The string.
826  */
827 template<class StringType> inline
trim_start_whitespace(StringType & s)828 void trim_start_whitespace( StringType &s ) {
829   StringType temp;
830   trim_start_whitespace( s, &temp );
831   s = temp;
832 }
833 
834 /**
835  * Skips trailing specified characters.
836  *
837  * @param s The string to trim.
838  * @param s_len The length of \a s.
839  * @param chars The characters to trim.
840  * @return Returns the new length of \a s.
841  */
842 size_type trim_end( char const *s, size_type s_len, char const *chars );
843 
844 /**
845  * Skips trailing specified characters.
846  *
847  * @param s The string to trim.
848  * @param chars The characters to trim.
849  * @return Returns the new length of \a s.
850  */
trim_end(char const * s,char const * chars)851 inline size_type trim_end( char const *s, char const *chars ) {
852   return trim_end( s, std::strlen( s ), chars );
853 }
854 
855 /**
856  * Removes all trailing specified characters.
857  *
858  * @tparam InputStringType The input string type.
859  * @tparam OutputStringType The output string type.
860  * @param in The input string.
861  * @param chars The characters to trim.
862  * @param out The output string (which must be different from \a in).
863  */
864 template<class InputStringType,class OutputStringType>
865 void trim_end( InputStringType const &in, char const *chars,
866                OutputStringType *out );
867 
868 /**
869  * Removes all trailing specified characters.
870  *
871  * @tparam StringType The string type.
872  * @param s The string.
873  */
874 template<class StringType> inline
trim_end(StringType & s,char const * chars)875 void trim_end( StringType &s, char const *chars ) {
876   StringType temp;
877   trim_end( s, chars, &temp );
878   s = temp;
879 }
880 
881 /**
882  * Skips trailing whitespace characters.
883  *
884  * @param s The string to trim.
885  * @param s_len The length of \a s.
886  * @return Returns the new length of \a s.
887  */
trim_end_whitespace(char const * s,size_type s_len)888 inline size_type trim_end_whitespace( char const *s, size_type s_len ) {
889   return trim_end( s, s_len, whitespace );
890 }
891 
892 /**
893  * Skips trailing whitespace characters.
894  *
895  * @param s The string to trim.
896  * @return Returns the new length of \a s.
897  */
trim_end_whitespace(char const * s)898 inline size_type trim_end_whitespace( char const *s ) {
899   return trim_end( s, whitespace );
900 }
901 
902 /**
903  * Removes all trailing whitespace characters.
904  *
905  * @tparam InputStringType The input string type.
906  * @tparam OutputStringType The output string type.
907  * @param in The input string.
908  * @param out The output string (which must be different from \a in).
909  */
910 template<class InputStringType,class OutputStringType>
trim_end_whitespace(InputStringType const & in,OutputStringType * out)911 void trim_end_whitespace( InputStringType const &in, OutputStringType *out ) {
912   return trim_end( in, whitespace, out );
913 }
914 
915 /**
916  * Removes all trailing whitespace characters.
917  *
918  * @tparam StringType The string type.
919  * @param s The string.
920  */
921 template<class StringType> inline
trim_end_whitespace(StringType & s,char const * chars)922 void trim_end_whitespace( StringType &s, char const *chars ) {
923   trim_end( s, whitespace );
924 }
925 
926 /**
927  * Removes all leading and trailing whitespace.
928  *
929  * @tparam InputStringType The input string type.
930  * @tparam OutputStringType The output string type.
931  * @param in The input string.
932  * @param out The output string (which must be different from \a in).
933  */
934 template<class InputStringType,class OutputStringType> inline
trim_whitespace(InputStringType const & in,OutputStringType * out)935 void trim_whitespace( InputStringType const &in, OutputStringType *out ) {
936   trim( in, whitespace, out );
937 }
938 
939 /**
940  * Removes all leading and trailing whitespace.
941  *
942  * @tparam StringType The string type.
943  * @param s The string.
944  */
945 template<class StringType> inline
trim_whitespace(StringType & s)946 void trim_whitespace( StringType &s ) {
947   StringType temp;
948   trim_whitespace( s, &temp );
949   s = temp;
950 }
951 
952 /**
953  * Skips any consecutive whitespace chars that are found at a given starting
954  * position within a given C string.
955  *
956  * @param s The input C string.
957  * @param s_len The length of the string.
958  * @param pos The position within \a s where to start looking for whitespace.
959  * On return, \a pos is updated with the position of the 1st non-whitespace
960  * char.
961  * @deprecated Use trim_start_whitespace() instead.
962  */
skip_whitespace(char const * s,size_type s_len,size_type * pos)963 inline void skip_whitespace( char const *s, size_type s_len, size_type *pos ) {
964   *pos = trim_start_whitespace( s + *pos, s_len - *pos ) - s;
965 }
966 
967 ////////// Miscellaneous //////////////////////////////////////////////////////
968 
969 /**
970  * Reverses the characters in a string.
971  *
972  * @tparam InputStringType The input string type.
973  * @tparam OutputStringType The output string type.
974  * @param in The input string.
975  * @param out The output string.
976  */
977 template<class InputStringType,class OutputStringType> inline
reverse(InputStringType const & in,OutputStringType * out)978 void reverse( InputStringType const &in, OutputStringType *out ) {
979   std::reverse_copy( in.begin(), in.end(), std::back_inserter( *out ) );
980 }
981 
982 ///////////////////////////////////////////////////////////////////////////////
983 
984 } // namespace ascii
985 } // namespace zorba
986 
987 #include "ascii_util.tcc"
988 
989 #endif /* ZORBA_STRING_UTIL_ASCII_UTIL_H */
990 
991 /*
992  * Local variables:
993  * mode: c++
994  * End:
995  */
996 /* vim:set et sw=2 ts=2: */
997