1 /*
2 * Copyright 2006-2008 The FLWOR Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #pragma once
17 #ifndef ZORBA_STRING_UTIL_ASCII_UTIL_H
18 #define ZORBA_STRING_UTIL_ASCII_UTIL_H
19
20 #include <algorithm>
21 #include <cctype>
22 #include <cstddef>
23 #include <cstring>
24
25 #include "stl_util.h"
26
27 namespace zorba {
28 namespace ascii {
29
30 ////////// types //////////////////////////////////////////////////////////////
31
32 /**
33 * The size type.
34 */
35 typedef std::size_t size_type;
36
37 ////////// constants //////////////////////////////////////////////////////////
38
39 char const whitespace[] = " \f\n\r\t\v";
40
41 ////////// Non-ASCII character stripping //////////////////////////////////////
42
43 /**
44 * A %back_ascii_insert_iterator can be used to append characters to a string
45 * ensuring that non-ASCII characters are excluded.
46 *
47 * @tparam StringType The string type.
48 */
49 template<class StringType>
50 class back_ascii_insert_iterator :
51 public
52 ztd::back_insert_iterator_base<
53 StringType, back_ascii_insert_iterator<StringType>
54 >
55 {
56 typedef ztd::back_insert_iterator_base<
57 StringType, back_ascii_insert_iterator<StringType>
58 > base_type;
59 public:
60 typedef typename base_type::container_type container_type;
61 typedef typename StringType::value_type value_type;
62
63 /**
64 * Constructs a %back_ascii_insert_iterator.
65 *
66 * @param s The string to append to.
67 */
back_ascii_insert_iterator(StringType & s)68 explicit back_ascii_insert_iterator( StringType &s ) : base_type( s ) {
69 }
70
71 back_ascii_insert_iterator& operator=( value_type c );
72 };
73
74 /**
75 * This is a convenience function to create a back_ascii_insert_iterator.
76 *
77 * @tparam StringType The string type.
78 * @param out The output string.
79 */
80 template<class StringType> inline back_ascii_insert_iterator<StringType>
back_ascii_inserter(StringType & out)81 back_ascii_inserter( StringType &out ) {
82 return back_ascii_insert_iterator<StringType>( out );
83 }
84
85 ////////// Character testing //////////////////////////////////////////////////
86
87 /**
88 * Checks whether the given character is an ASCII character. This
89 * function exists to make a proper function out of the standard isascii(3)
90 * that may be implemented as a macro.
91 *
92 * @param CharType The character type.
93 * @param c The character to check.
94 * @return Returns \c true only if the character is an ASCII character.
95 */
96 template<typename CharType> inline
is_ascii(CharType c)97 bool is_ascii( CharType c ) {
98 #ifdef WIN32
99 return __isascii( c );
100 #else
101 return isascii( c );
102 #endif
103 }
104
105 /**
106 * Checks whether the given character is an alphabetic character. This
107 * function exists to make a proper function out of the standard isalpha(3)
108 * that may be implemented as a macro.
109 *
110 * @param CharType The character type.
111 * @param c The character to check.
112 * @return Returns \c true only if the character is an alphabetic character.
113 */
114 template<typename CharType> inline
is_alpha(CharType c)115 bool is_alpha( CharType c ) {
116 #ifdef WIN32
117 // Windows' isalpha() implementation crashes for non-ASCII characters.
118 return __isascii( c ) && isalpha( c );
119 #else
120 return isalpha( c );
121 #endif
122 }
123
124 /**
125 * Checks whether the given character is an alpha-numeric character. This
126 * function exists to make a proper function out of the standard isalnum(3)
127 * that may be implemented as a macro.
128 *
129 * @param CharType The character type.
130 * @param c The character to check.
131 * @return Returns \c true only if the character is an alpha-numeric character.
132 */
133 template<typename CharType> inline
is_alnum(CharType c)134 bool is_alnum( CharType c ) {
135 #ifdef WIN32
136 // Windows' isalpha() implementation crashes for non-ASCII characters.
137 return __isascii( c ) && isalnum( c );
138 #else
139 return isalnum( c );
140 #endif
141 }
142
143 /**
144 * Checks whether the given character is a control character. This function
145 * exists to make a proper function out of the standard iscntrl(3) that may be
146 * implemented as a macro.
147 *
148 * @param CharType The character type.
149 * @param c The character to check.
150 * @return Returns \c true only if the character is a control character.
151 */
152 template<typename CharType> inline
is_cntrl(CharType c)153 bool is_cntrl( CharType c ) {
154 #ifdef WIN32
155 // Windows' iscntrl() implementation crashes for non-ASCII characters.
156 return __isascii( c ) && iscntrl( c );
157 #else
158 return iscntrl( c );
159 #endif
160 }
161
162 /**
163 * Checks whether the given character is a decimal digit. This function exists
164 * to make a proper function out of the standard isdigit(3) that may be
165 * implemented as a macro.
166 *
167 * @param CharType The character type.
168 * @param c The character to check.
169 * @return Returns \c true only if the character is a decimal digit.
170 */
171 template<typename CharType> inline
is_digit(CharType c)172 bool is_digit( CharType c ) {
173 #ifdef WIN32
174 // Windows' isdigit() implementation crashes for non-ASCII characters.
175 return __isascii( c ) && isdigit( c );
176 #else
177 return isdigit( c );
178 #endif
179 }
180
181 /**
182 * Checks whether the given character is a printing character. This function
183 * exists to make a proper function out of the standard isprint(3) that may be
184 * implemented as a macro.
185 *
186 * @param CharType The character type.
187 * @param c The character to check.
188 * @return Returns \c true only if the character is a printing character.
189 */
190 template<typename CharType> inline
is_print(CharType c)191 bool is_print( CharType c ) {
192 #ifdef WIN32
193 // Windows' isprint() implementation crashes for non-ASCII characters.
194 return __isascii( c ) && isprint( c );
195 #else
196 return isprint( c );
197 #endif
198 }
199
200 /**
201 * Checks whether the given character is a punctuation character. This function
202 * exists to make a proper function out of the standard ispunct(3) that may be
203 * implemented as a macro.
204 *
205 * @param CharType The character type.
206 * @param c The character to check.
207 * @return Returns \c true only if the character is a punctuation character.
208 */
209 template<typename CharType> inline
is_punct(CharType c)210 bool is_punct( CharType c ) {
211 #ifdef WIN32
212 // Windows' ispunct() implementation crashes for non-ASCII characters.
213 return __isascii( c ) && ispunct( c );
214 #else
215 return ispunct( c );
216 #endif
217 }
218
219 /**
220 * Checks whether the given character is a whitespace character. This function
221 * exists to make a proper function out of the standard isspace(3) that may be
222 * implemented as a macro.
223 *
224 * @param CharType The character type.
225 * @param c The character to check.
226 * @return Returns \c true only if the character is a whitespace character.
227 */
228 template<typename CharType> inline
is_space(CharType c)229 bool is_space( CharType c ) {
230 #ifdef WIN32
231 // Windows' isspace() implementation crashes for non-ASCII characters.
232 return __isascii( c ) && isspace( c );
233 #else
234 return isspace( c );
235 #endif
236 }
237
238 /**
239 * Checks whether an entire string is whitespace.
240 *
241 * @param s The NULL-terminated C string to check.
242 * @return Returns \c true only if the entire string is whitespace.
243 */
244 bool is_whitespace( char const *s );
245
246 /**
247 * Checks whether an entire string is whitespace.
248 *
249 * @param s The string to check.
250 * @return Returns \c true only if the entire string is whitespace.
251 */
252 template<class StringType> inline
is_whitespace(StringType const & s)253 bool is_whitespace( StringType const &s ) {
254 return is_whitespace( s.c_str() );
255 }
256
257 /**
258 * Checks whether the given character is a hexadecimal decimal digit. This
259 * function exists to make a proper function out of the standard isxdigit(3)
260 * that may be implemented as a macro.
261 *
262 * @param CharType The character type.
263 * @param c The character to check.
264 * @return Returns \c true only if the character is a hexadecimal digit.
265 */
266 template<typename CharType> inline
is_xdigit(CharType c)267 bool is_xdigit( CharType c ) {
268 #ifdef WIN32
269 // Windows' isxdigit() implementation crashes for non-ASCII characters.
270 return __isascii( c ) && isxdigit( c );
271 #else
272 return isxdigit( c );
273 #endif
274 }
275
276 ////////// begins/ends_with ///////////////////////////////////////////////////
277
278 /**
279 * Checks whether a string begins with a given prefix.
280 *
281 * @param s The string to check.
282 * @param c The prefix character.
283 * @return Returns \c true only if \a s begins with \a c.
284 */
begins_with(char const * s,char c)285 inline bool begins_with( char const *s, char c ) {
286 return s[0] == c;
287 }
288
289 /**
290 * Checks whether a string begins with a given prefix.
291 *
292 * @tparam StringType The string type.
293 * @param s The string to check.
294 * @param c The prefix character.
295 * @return Returns \c true only if \a s begins with \a c.
296 */
297 template<class StringType> inline
begins_with(StringType const & s,char c)298 bool begins_with( StringType const &s, char c ) {
299 return !s.empty() && s[0] == c;
300 }
301
302 /**
303 * Checks whether a string begins with a given prefix.
304 *
305 * @param s The string to check.
306 * @param ps The prefix string.
307 * @param n The number of bytes to compare.
308 * @return Returns \c true only if \a s begins with \a ps.
309 */
begins_with(char const * s,char const * ps,size_type n)310 inline bool begins_with( char const *s, char const *ps, size_type n ) {
311 return std::strncmp( s, ps, n ) == 0;
312 }
313
314 /**
315 * Checks whether a string begins with a given prefix.
316 *
317 * @param s The string to check.
318 * @param ps The prefix string.
319 * @return Returns \c true only if \a s begins with \a ps.
320 */
begins_with(char const * s,char const * ps)321 inline bool begins_with( char const *s, char const *ps ) {
322 return begins_with( s, ps, std::strlen( ps ) );
323 }
324
325 /**
326 * Checks whether a string begins with a given prefix.
327 *
328 * @tparam StringType The string type.
329 * @param s The string to check.
330 * @param ps The prefix string.
331 * @param n The number of bytes to compare.
332 * @return Returns \c true only if \a s begins with \a ps.
333 */
334 template<class StringType> inline
begins_with(StringType const & s,char const * ps,typename StringType::size_type n)335 bool begins_with( StringType const &s, char const *ps,
336 typename StringType::size_type n ) {
337 typedef typename StringType::traits_type traits_type;
338 return n <= s.size() && traits_type::compare( s.data(), ps, n ) == 0;
339 }
340
341 /**
342 * Checks whether a string begins with a given prefix.
343 *
344 * @tparam StringType The string type.
345 * @param s The string to check.
346 * @param ps The prefix string.
347 * @return Returns \c true only if \a s begins with \a ps.
348 */
349 template<class StringType> inline
begins_with(StringType const & s,char const * ps)350 bool begins_with( StringType const &s, char const *ps ) {
351 return begins_with( s, ps, std::strlen( ps ) );
352 }
353
354 /**
355 * Checks whether a string begins with a given prefix.
356 *
357 * @tparam StringType The string type.
358 * @tparam PrefixStringType The suffix string type.
359 * @param s The string to check.
360 * @param ps The suffix string.
361 * @return Returns \c true only if \a s ends with \a ps.
362 */
363 template<class StringType,class PrefixStringType> inline
begins_with(StringType const & s,PrefixStringType const & ps)364 bool begins_with( StringType const &s, PrefixStringType const &ps ) {
365 return begins_with( s, ps.data(), ps.size() );
366 }
367
368 /**
369 * Checks whether a string ends with a given suffix.
370 *
371 * @param s The string to check.
372 * @param c The suffix character.
373 * @return Returns \c true only if \a s ends with \a c.
374 */
ends_with(char const * s,char c)375 inline bool ends_with( char const *s, char c ) {
376 size_type const len = std::strlen( s );
377 return len > 0 && s[ len - 1 ] == c;
378 }
379
380 /**
381 * Checks whether a string ends with a given suffix.
382 *
383 * @tparam StringType The string type.
384 * @param s The string to check.
385 * @param c The suffix character.
386 * @return Returns \c true only if \a s ends with \a c.
387 */
388 template<class StringType> inline
ends_with(StringType const & s,char c)389 bool ends_with( StringType const &s, char c ) {
390 return !s.empty() && s[ s.size() - 1 ] == c;
391 }
392
393 /**
394 * Checks whether a string ends with a given suffix.
395 *
396 * @param s The string to check.
397 * @param ss The suffix string.
398 * @param ss_len The length of the suffix string.
399 * @return Returns \c true only if \a s ends with \a ss.
400 */
ends_with(char const * s,char const * ss,size_type ss_len)401 inline bool ends_with( char const *s, char const *ss, size_type ss_len ) {
402 size_type const len = std::strlen( s );
403 return ss_len <= len && std::strncmp( s + len - ss_len, ss, ss_len ) == 0;
404 }
405
406 /**
407 * Checks whether a string ends with a given suffix.
408 *
409 * @param s The string to check.
410 * @param ss The suffix string.
411 * @return Returns \c true only if \a s ends with \a ss.
412 */
ends_with(char const * s,char const * ss)413 inline bool ends_with( char const *s, char const *ss ) {
414 return ends_with( s, ss, std::strlen( ss ) );
415 }
416
417 /**
418 * Checks whether a string ends with a given suffix.
419 *
420 * @tparam StringType The string type.
421 * @param s The string to check.
422 * @param ss The suffix string.
423 * @param ss_len The length of the suffix string.
424 * @return Returns \c true only if \a s ends with \a ss.
425 */
426 template<class StringType> inline
ends_with(StringType const & s,char const * ss,typename StringType::size_type ss_len)427 bool ends_with( StringType const &s, char const *ss,
428 typename StringType::size_type ss_len ) {
429 typename StringType::size_type const result = s.rfind( ss );
430 return result != StringType::npos && result + ss_len == s.size();
431 }
432
433 /**
434 * Checks whether a string ends with a given suffix.
435 *
436 * @tparam StringType The string type.
437 * @param s The string to check.
438 * @param ss The suffix string.
439 * @return Returns \c true only if \a s ends with \a ss.
440 */
441 template<class StringType> inline
ends_with(StringType const & s,char const * ss)442 bool ends_with( StringType const &s, char const *ss ) {
443 return ends_with( s, ss, std::strlen( ss ) );
444 }
445
446 /**
447 * Checks whether a string ends with a given suffix.
448 *
449 * @tparam StringType The string type.
450 * @tparam SuffixStringType The suffix string type.
451 * @param s The string to check.
452 * @param ss The suffix string.
453 * @return Returns \c true only if \a s ends with \a ss.
454 */
455 template<class StringType,class SuffixStringType> inline
ends_with(StringType const & s,SuffixStringType const & ss)456 bool ends_with( StringType const &s, SuffixStringType const &ss ) {
457 return ends_with( s, ss.data(), ss.size() );
458 }
459
460 ////////// Case conversion ////////////////////////////////////////////////////
461
462 /**
463 * Converts the given character to lower-case. This function exists to make a
464 * proper function out of the standard tolower(3) that may be implemented as a
465 * macro.
466 *
467 * @param c The character to convert.
468 * @return Returns the character converted to lower-case (if it was upper-case)
469 * or the original character if either it already is lower-case or if it is not
470 * a letter.
471 */
to_lower(char c)472 inline char to_lower( char c ) {
473 return tolower( c );
474 }
475
476 /**
477 * Converts a string to lower-case in-place.
478 *
479 * @param begin A pointer to the first character of the string.
480 * @param end A pointer to one past the last character of the string.
481 */
to_lower(char * begin,char * end)482 inline void to_lower( char *begin, char *end ) {
483 std::transform( begin, end, begin, static_cast<char (*)(char)>( to_lower ) );
484 }
485
486 /**
487 * Converts a string to lower-case in-place.
488 *
489 * @param s The string to convert.
490 */
to_lower(char * s)491 inline void to_lower( char *s ) {
492 to_lower( s, s + std::strlen( s ) );
493 }
494
495 /**
496 * Converts a string to lower-case in-place.
497 *
498 * @tparam StringType The string type.
499 * @param s The string to convert.
500 */
501 template<class StringType> inline
to_lower(StringType & s)502 void to_lower( StringType &s ) {
503 std::transform(
504 s.begin(), s.end(), s.begin(), static_cast<char (*)(char)>( to_lower )
505 );
506 }
507
508 /**
509 * Converts a string to lower-case.
510 *
511 * @tparam InputStringType The input string type.
512 * @tparam OutputStringType The output string type.
513 * @param in The input string.
514 * @param out The output string (which must be different from \a in). Its
515 * contents are overwritten.
516 */
517 template<class InputStringType,class OutputStringType> inline
to_lower(InputStringType const & in,OutputStringType * out)518 void to_lower( InputStringType const &in, OutputStringType *out ) {
519 std::transform(
520 in.begin(), in.end(), std::back_inserter( *out ),
521 static_cast<char (*)(char)>( to_lower )
522 );
523 }
524
525 /**
526 * Converts the given character to upper-case. This function exists to make a
527 * proper function out of the standard toupper(3) that may be implemented as a
528 * macro.
529 *
530 * @param c The character to convert.
531 * @return Returns the character converted to upper-case (if it was lower-case)
532 * or the original character if either it already is upper-case or if it is not
533 * a letter.
534 */
to_upper(char c)535 inline char to_upper( char c ) {
536 return toupper( c );
537 }
538
539 /**
540 * Converts a string to upper-case in-place.
541 *
542 * @param begin A pointer to the first character of the string.
543 * @param end A pointer to one past the last character of the string.
544 */
to_upper(char * begin,char * end)545 inline void to_upper( char *begin, char *end ) {
546 std::transform( begin, end, begin, static_cast<char (*)(char)>( to_upper ) );
547 }
548
549 /**
550 * Converts a string to upper-case in-place.
551 *
552 * @param s The string to convert.
553 */
to_upper(char * s)554 inline void to_upper( char *s ) {
555 to_upper( s, s + std::strlen( s ) );
556 }
557
558 /**
559 * Converts a string to upper-case in-place.
560 *
561 * @tparam StringType The string type.
562 * @param s The string to convert.
563 */
564 template<class StringType> inline
to_upper(StringType & s)565 void to_upper( StringType &s ) {
566 std::transform(
567 s.begin(), s.end(), s.begin(), static_cast<char (*)(char)>( to_upper )
568 );
569 }
570
571 /**
572 * Converts a string to upper-case.
573 *
574 * @tparam InputStringType The input string type.
575 * @tparam OutputStringType The output string type.
576 * @param in The input string.
577 * @param out The output string (which must be different from \a in). Its
578 * contents are overwritten.
579 */
580 template<class InputStringType,class OutputStringType> inline
to_upper(InputStringType const & in,OutputStringType * out)581 void to_upper( InputStringType const &in, OutputStringType *out ) {
582 std::transform(
583 in.begin(), in.end(), std::back_inserter( *out ),
584 static_cast<char (*)(char)>( to_upper )
585 );
586 }
587
588 ////////// Replacement ////////////////////////////////////////////////////////
589
590 /**
591 * Replaces all occurrences of one character with another.
592 *
593 * @tparam StringType The string type.
594 * @param s The string to modify.
595 * @param from The character to replace.
596 * @param to The character to replace with.
597 * @return Returns \c true only if at least one replacement is performed.
598 */
599 template<class StringType>
600 bool replace_all( StringType &s, char from, char to );
601
602 /**
603 * Replaces all occurrences of one substring with another.
604 *
605 * @tparam StringType The string type.
606 * @param s The string to modify.
607 * @param from The substring to replace.
608 * @param from_len The length of \a from.
609 * @param to The substring to replace with.
610 * @param to_len The length of \a to.
611 * @return Returns \c true only if at least one replacement is performed.
612 */
613 template<class StringType>
614 bool replace_all( StringType &s,
615 char const *from, typename StringType::size_type from_len,
616 char const *to, typename StringType::size_type to_len );
617
618 /**
619 * Replaces all occurrences of one substring with another.
620 *
621 * @tparam StringType The string type.
622 * @param s The string to modify.
623 * @param from The substring to replace.
624 * @param to The substring to replace with.
625 * @return Returns \c true only if at least one replacement is performed.
626 */
627 template<class StringType> inline
replace_all(StringType & s,char const * from,char const * to)628 bool replace_all( StringType &s, char const *from, char const *to ) {
629 return replace_all( s, from, std::strlen( from ), to, std::strlen( to ) );
630 }
631
632 /**
633 * Replaces all occurrences of one substring with another.
634 *
635 * @tparam StringType The string type.
636 * @param s The string to modify.
637 * @param from The substring to replace.
638 * @param to The substring to replace with.
639 * @return Returns \c true only if at least one replacement is performed.
640 */
641 template<class StringType,class ToStringType> inline
replace_all(StringType & s,char const * from,ToStringType const & to)642 bool replace_all( StringType &s, char const *from, ToStringType const &to ) {
643 return replace_all( s, from, std::strlen( from ), to.data(), to.size() );
644 }
645
646 /**
647 * Replaces all occurrences of one substring with another.
648 *
649 * @tparam StringType The string type.
650 * @param s The string to modify.
651 * @param from The substring to replace.
652 * @param to The substring to replace with.
653 * @return Returns \c true only if at least one replacement is performed.
654 */
655 template<class StringType,class FromStringType,class ToStringType> inline
replace_all(StringType & s,FromStringType const & from,ToStringType const & to)656 bool replace_all( StringType &s, FromStringType const &from,
657 ToStringType const &to ) {
658 return replace_all( s, from.data(), from.size(), to.data(), to.size() );
659 }
660
661 ////////// Whitespace /////////////////////////////////////////////////////////
662
663 /**
664 * Converts sequences of one or more whitespace characters to a single space.
665 * Additionally, all leading and trailing whitespace is removed.
666 *
667 * @tparam InputStringType The input string type.
668 * @tparam OutputStringType The output string type.
669 * @param in The input string.
670 * @param out The output string (which must be different from \a in).
671 */
672 template<class InputStringType,class OutputStringType>
673 void normalize_whitespace( InputStringType const &in, OutputStringType *out );
674
675 /**
676 * Converts sequences of one or more whitespace characters to a single space.
677 * Additionally, all leading and trailing whitespace is removed.
678 *
679 * @tparam StringType The string type.
680 * @param s The string.
681 */
682 template<class StringType> inline
normalize_whitespace(StringType & s)683 void normalize_whitespace( StringType &s ) {
684 StringType temp;
685 normalize_whitespace( s, &temp );
686 s = temp;
687 }
688
689 /**
690 * Removes all specified characters by shifting the contents of the buffer to
691 * the left.
692 *
693 * @param s The string.
694 * @param s_len The length of \a s.
695 * @param chars The characters to remove.
696 * @return Returns the new length of \a s with all \a chars removed.
697 */
698 size_type remove_chars( char *s, size_type s_len, char const *chars );
699
700 /**
701 * Removes all whitespace characters by shifting the contents of the buffer to
702 * the left.
703 *
704 * @param s The string.
705 * @param s_len The length of \a s.
706 * @return Returns the new length of \a s with all whitespace removed.
707 */
remove_whitespace(char * s,size_type s_len)708 inline size_type remove_whitespace( char *s, size_type s_len ) {
709 return remove_chars( s, s_len, whitespace );
710 }
711
712 /**
713 * Removes all leading and trailing specified characters.
714 *
715 * @tparam InputStringType The input string type.
716 * @tparam OutputStringType The output string type.
717 * @param in The input string.
718 * @param chars The characters to trim.
719 * @param out The output string (which must be different from \a in).
720 */
721 template<class InputStringType,class OutputStringType>
722 void trim( InputStringType const &in, char const *chars,
723 OutputStringType *out );
724
725 /**
726 * Removes all leading and trailing specified characters.
727 *
728 * @tparam StringType The string type.
729 * @param s The string.
730 */
731 template<class StringType> inline
trim(StringType & s,char const * chars)732 void trim( StringType &s, char const *chars ) {
733 StringType temp;
734 trim( s, chars, &temp );
735 s = temp;
736 }
737
738 /**
739 * Skips leading specified characters.
740 *
741 * @param s The string to trim.
742 * @param chars The characters to trim.
743 * @return Returns a pointer to the first character in \a s that is not among
744 * the characters in \a chars.
745 */
746 char const* trim_start( char const *s, char const *chars );
747
748 /**
749 * Skips leading specified characters.
750 *
751 * @param s The string to trim.
752 * @param s_len The length of \a s.
753 * @param chars The characters to trim.
754 * @return Returns a pointer to the first character in \a s that is not among
755 * the characters in \a chars.
756 */
757 char const* trim_start( char const *s, size_type s_len, char const *chars );
758
759 /**
760 * Removes all leading specified characters.
761 *
762 * @tparam InputStringType The input string type.
763 * @tparam OutputStringType The output string type.
764 * @param in The input string.
765 * @param chars The characters to trim.
766 * @param out The output string (which must be different from \a in).
767 */
768 template<class InputStringType,class OutputStringType>
769 void trim_start( InputStringType const &in, char const *chars,
770 OutputStringType *out );
771
772 /**
773 * Removes all leading specified characters.
774 *
775 * @tparam StringType The string type.
776 * @param s The string.
777 */
778 template<class StringType> inline
trim_start(StringType & s,char const * chars)779 void trim_start( StringType &s, char const *chars ) {
780 StringType temp;
781 trim_start( s, chars, &temp );
782 s = temp;
783 }
784
785 /**
786 * Skips leading whitespace chracters.
787 *
788 * @param s The string to trim.
789 * @return Returns a pointer to the first character in \a s that is not a
790 * whitespace character.
791 */
trim_start_whitespace(char const * s)792 inline char const* trim_start_whitespace( char const *s ) {
793 return trim_start( s, whitespace );
794 }
795
796 /**
797 * Skips leading whitespace characters.
798 *
799 * @param s The string to trim.
800 * @param s_len The length of \a s.
801 * @return Returns a pointer to the first character in \a s that is not a
802 * whitespace character.
803 */
trim_start_whitespace(char const * s,size_type s_len)804 inline char const* trim_start_whitespace( char const *s, size_type s_len ) {
805 return trim_start( s, s_len, whitespace );
806 }
807
808 /**
809 * Removes all leading whitespace characters.
810 *
811 * @tparam InputStringType The input string type.
812 * @tparam OutputStringType The output string type.
813 * @param in The input string.
814 * @param out The output string (which must be different from \a in).
815 */
816 template<class InputStringType,class OutputStringType> inline
trim_start_whitespace(InputStringType const & in,OutputStringType * out)817 void trim_start_whitespace( InputStringType const &in, OutputStringType *out ) {
818 trim_start( in, whitespace, out );
819 }
820
821 /**
822 * Removes all leading whitespace characters.
823 *
824 * @tparam StringType The string type.
825 * @param s The string.
826 */
827 template<class StringType> inline
trim_start_whitespace(StringType & s)828 void trim_start_whitespace( StringType &s ) {
829 StringType temp;
830 trim_start_whitespace( s, &temp );
831 s = temp;
832 }
833
834 /**
835 * Skips trailing specified characters.
836 *
837 * @param s The string to trim.
838 * @param s_len The length of \a s.
839 * @param chars The characters to trim.
840 * @return Returns the new length of \a s.
841 */
842 size_type trim_end( char const *s, size_type s_len, char const *chars );
843
844 /**
845 * Skips trailing specified characters.
846 *
847 * @param s The string to trim.
848 * @param chars The characters to trim.
849 * @return Returns the new length of \a s.
850 */
trim_end(char const * s,char const * chars)851 inline size_type trim_end( char const *s, char const *chars ) {
852 return trim_end( s, std::strlen( s ), chars );
853 }
854
855 /**
856 * Removes all trailing specified characters.
857 *
858 * @tparam InputStringType The input string type.
859 * @tparam OutputStringType The output string type.
860 * @param in The input string.
861 * @param chars The characters to trim.
862 * @param out The output string (which must be different from \a in).
863 */
864 template<class InputStringType,class OutputStringType>
865 void trim_end( InputStringType const &in, char const *chars,
866 OutputStringType *out );
867
868 /**
869 * Removes all trailing specified characters.
870 *
871 * @tparam StringType The string type.
872 * @param s The string.
873 */
874 template<class StringType> inline
trim_end(StringType & s,char const * chars)875 void trim_end( StringType &s, char const *chars ) {
876 StringType temp;
877 trim_end( s, chars, &temp );
878 s = temp;
879 }
880
881 /**
882 * Skips trailing whitespace characters.
883 *
884 * @param s The string to trim.
885 * @param s_len The length of \a s.
886 * @return Returns the new length of \a s.
887 */
trim_end_whitespace(char const * s,size_type s_len)888 inline size_type trim_end_whitespace( char const *s, size_type s_len ) {
889 return trim_end( s, s_len, whitespace );
890 }
891
892 /**
893 * Skips trailing whitespace characters.
894 *
895 * @param s The string to trim.
896 * @return Returns the new length of \a s.
897 */
trim_end_whitespace(char const * s)898 inline size_type trim_end_whitespace( char const *s ) {
899 return trim_end( s, whitespace );
900 }
901
902 /**
903 * Removes all trailing whitespace characters.
904 *
905 * @tparam InputStringType The input string type.
906 * @tparam OutputStringType The output string type.
907 * @param in The input string.
908 * @param out The output string (which must be different from \a in).
909 */
910 template<class InputStringType,class OutputStringType>
trim_end_whitespace(InputStringType const & in,OutputStringType * out)911 void trim_end_whitespace( InputStringType const &in, OutputStringType *out ) {
912 return trim_end( in, whitespace, out );
913 }
914
915 /**
916 * Removes all trailing whitespace characters.
917 *
918 * @tparam StringType The string type.
919 * @param s The string.
920 */
921 template<class StringType> inline
trim_end_whitespace(StringType & s,char const * chars)922 void trim_end_whitespace( StringType &s, char const *chars ) {
923 trim_end( s, whitespace );
924 }
925
926 /**
927 * Removes all leading and trailing whitespace.
928 *
929 * @tparam InputStringType The input string type.
930 * @tparam OutputStringType The output string type.
931 * @param in The input string.
932 * @param out The output string (which must be different from \a in).
933 */
934 template<class InputStringType,class OutputStringType> inline
trim_whitespace(InputStringType const & in,OutputStringType * out)935 void trim_whitespace( InputStringType const &in, OutputStringType *out ) {
936 trim( in, whitespace, out );
937 }
938
939 /**
940 * Removes all leading and trailing whitespace.
941 *
942 * @tparam StringType The string type.
943 * @param s The string.
944 */
945 template<class StringType> inline
trim_whitespace(StringType & s)946 void trim_whitespace( StringType &s ) {
947 StringType temp;
948 trim_whitespace( s, &temp );
949 s = temp;
950 }
951
952 /**
953 * Skips any consecutive whitespace chars that are found at a given starting
954 * position within a given C string.
955 *
956 * @param s The input C string.
957 * @param s_len The length of the string.
958 * @param pos The position within \a s where to start looking for whitespace.
959 * On return, \a pos is updated with the position of the 1st non-whitespace
960 * char.
961 * @deprecated Use trim_start_whitespace() instead.
962 */
skip_whitespace(char const * s,size_type s_len,size_type * pos)963 inline void skip_whitespace( char const *s, size_type s_len, size_type *pos ) {
964 *pos = trim_start_whitespace( s + *pos, s_len - *pos ) - s;
965 }
966
967 ////////// Miscellaneous //////////////////////////////////////////////////////
968
969 /**
970 * Reverses the characters in a string.
971 *
972 * @tparam InputStringType The input string type.
973 * @tparam OutputStringType The output string type.
974 * @param in The input string.
975 * @param out The output string.
976 */
977 template<class InputStringType,class OutputStringType> inline
reverse(InputStringType const & in,OutputStringType * out)978 void reverse( InputStringType const &in, OutputStringType *out ) {
979 std::reverse_copy( in.begin(), in.end(), std::back_inserter( *out ) );
980 }
981
982 ///////////////////////////////////////////////////////////////////////////////
983
984 } // namespace ascii
985 } // namespace zorba
986
987 #include "ascii_util.tcc"
988
989 #endif /* ZORBA_STRING_UTIL_ASCII_UTIL_H */
990
991 /*
992 * Local variables:
993 * mode: c++
994 * End:
995 */
996 /* vim:set et sw=2 ts=2: */
997