1 // rak - Rakshasa's toolbox
2 // Copyright (C) 2005-2007, Jari Sundell
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 //
18 // In addition, as a special exception, the copyright holders give
19 // permission to link the code of portions of this program with the
20 // OpenSSL library under certain conditions as described in each
21 // individual source file, and distribute linked combinations
22 // including the two.
23 //
24 // You must obey the GNU General Public License in all respects for
25 // all of the code used other than OpenSSL.  If you modify file(s)
26 // with this exception, you may extend this exception to your version
27 // of the file(s), but you are not obligated to do so.  If you do not
28 // wish to do so, delete this exception statement from your version.
29 // If you delete this exception statement from all source files in the
30 // program, then also delete it here.
31 //
32 // Contact:  Jari Sundell <jaris@ifi.uio.no>
33 //
34 //           Skomakerveien 33
35 //           3185 Skoppum, NORWAY
36 
37 #ifndef RAK_STRING_MANIP_H
38 #define RAK_STRING_MANIP_H
39 
40 #include <algorithm>
41 #include <cctype>
42 #include <climits>
43 #include <cstdlib>
44 #include <functional>
45 #include <iterator>
46 #include <locale>
47 #include <random>
48 
49 
50 namespace rak {
51 
52 // Use these trim functions until n1872 is widely supported.
53 
54 template <typename Sequence>
trim_begin(const Sequence & seq)55 Sequence trim_begin(const Sequence& seq) {
56   if (seq.empty() || !std::isspace(*seq.begin()))
57     return seq;
58 
59   typename Sequence::size_type pos = 0;
60 
61   while (pos != seq.length() && std::isspace(seq[pos]))
62     pos++;
63 
64   return seq.substr(pos, seq.length() - pos);
65 }
66 
67 template <typename Sequence>
trim_end(const Sequence & seq)68 Sequence trim_end(const Sequence& seq) {
69   if (seq.empty() || !std::isspace(*(--seq.end())))
70     return seq;
71 
72   typename Sequence::size_type pos = seq.size();
73 
74   while (pos != 0 && std::isspace(seq[pos - 1]))
75     pos--;
76 
77   return seq.substr(0, pos);
78 }
79 
80 template <typename Sequence>
trim(const Sequence & seq)81 Sequence trim(const Sequence& seq) {
82   return trim_begin(trim_end(seq));
83 }
84 
85 template <typename Sequence>
trim_begin_classic(const Sequence & seq)86 Sequence trim_begin_classic(const Sequence& seq) {
87   if (seq.empty() || !std::isspace(*seq.begin(), std::locale::classic()))
88     return seq;
89 
90   typename Sequence::size_type pos = 0;
91 
92   while (pos != seq.length() && std::isspace(seq[pos], std::locale::classic()))
93     pos++;
94 
95   return seq.substr(pos, seq.length() - pos);
96 }
97 
98 template <typename Sequence>
trim_end_classic(const Sequence & seq)99 Sequence trim_end_classic(const Sequence& seq) {
100   if (seq.empty() || !std::isspace(*(--seq.end()), std::locale::classic()))
101     return seq;
102 
103   typename Sequence::size_type pos = seq.size();
104 
105   while (pos != 0 && std::isspace(seq[pos - 1], std::locale::classic()))
106     pos--;
107 
108   return seq.substr(0, pos);
109 }
110 
111 template <typename Sequence>
trim_classic(const Sequence & seq)112 Sequence trim_classic(const Sequence& seq) {
113   return trim_begin_classic(trim_end_classic(seq));
114 }
115 
116 // Consider rewritting such that m_seq is replaced by first/last.
117 template <typename Sequence>
118 class split_iterator_t {
119 public:
120   typedef typename Sequence::const_iterator const_iterator;
121   typedef typename Sequence::value_type     value_type;
122 
split_iterator_t()123   split_iterator_t() {}
124 
split_iterator_t(const Sequence & seq,value_type delim)125   split_iterator_t(const Sequence& seq, value_type delim) :
126     m_seq(&seq),
127     m_delim(delim),
128     m_pos(seq.begin()),
129     m_next(std::find(seq.begin(), seq.end(), delim)) {
130   }
131 
132   Sequence operator * () { return Sequence(m_pos, m_next); }
133 
134   split_iterator_t& operator ++ () {
135     m_pos = m_next;
136 
137     if (m_pos == m_seq->end())
138       return *this;
139 
140     m_pos++;
141     m_next = std::find(m_pos, m_seq->end(), m_delim);
142 
143     return *this;
144   }
145 
146   bool operator == (__UNUSED const split_iterator_t& itr) const { return m_pos == m_seq->end(); }
147   bool operator != (__UNUSED const split_iterator_t& itr) const { return m_pos != m_seq->end(); }
148 
149 private:
150   const Sequence* m_seq;
151   value_type      m_delim;
152   const_iterator  m_pos;
153   const_iterator  m_next;
154 };
155 
156 template <typename Sequence>
157 inline split_iterator_t<Sequence>
split_iterator(const Sequence & seq,typename Sequence::value_type delim)158 split_iterator(const Sequence& seq, typename Sequence::value_type delim) {
159   return split_iterator_t<Sequence>(seq, delim);
160 }
161 
162 template <typename Sequence>
163 inline split_iterator_t<Sequence>
split_iterator(__UNUSED const Sequence & seq)164 split_iterator(__UNUSED const Sequence& seq) {
165   return split_iterator_t<Sequence>();
166 }
167 
168 // Could optimize this abit.
169 inline char
hexchar_to_value(char c)170 hexchar_to_value(char c) {
171   if (c >= '0' && c <= '9')
172     return c - '0';
173 
174   else if (c >= 'A' && c <= 'F')
175     return 10 + c - 'A';
176 
177   else
178     return 10 + c - 'a';
179 }
180 
181 template <int pos, typename Value>
182 inline char
value_to_hexchar(Value v)183 value_to_hexchar(Value v) {
184   v >>= pos * 4;
185   v &= 0xf;
186 
187   if (v < 0xA)
188     return '0' + v;
189   else
190     return 'A' + v - 0xA;
191 }
192 
193 template <typename InputIterator, typename OutputIterator>
194 OutputIterator
copy_escape_html(InputIterator first,InputIterator last,OutputIterator dest)195 copy_escape_html(InputIterator first, InputIterator last, OutputIterator dest) {
196   while (first != last) {
197     if (std::isalpha(*first, std::locale::classic()) ||
198         std::isdigit(*first, std::locale::classic()) ||
199         *first == '-') {
200       *(dest++) = *first;
201 
202     } else {
203       *(dest++) = '%';
204       *(dest++) = value_to_hexchar<1>(*first);
205       *(dest++) = value_to_hexchar<0>(*first);
206     }
207 
208     ++first;
209   }
210 
211   return dest;
212 }
213 
214 template <typename InputIterator, typename OutputIterator>
215 OutputIterator
copy_escape_html(InputIterator first1,InputIterator last1,OutputIterator first2,OutputIterator last2)216 copy_escape_html(InputIterator first1, InputIterator last1, OutputIterator first2, OutputIterator last2) {
217   while (first1 != last1) {
218     if (std::isalpha(*first1, std::locale::classic()) ||
219         std::isdigit(*first1, std::locale::classic()) ||
220         *first1 == '-') {
221       if (first2 == last2) break; else *(first2++) = *first1;
222 
223     } else {
224       if (first2 == last2) break; else *(first2++) = '%';
225       if (first2 == last2) break; else *(first2++) = value_to_hexchar<1>(*first1);
226       if (first2 == last2) break; else *(first2++) = value_to_hexchar<0>(*first1);
227     }
228 
229     ++first1;
230   }
231 
232   return first2;
233 }
234 
235 template <typename Iterator>
236 inline std::string
copy_escape_html(Iterator first,Iterator last)237 copy_escape_html(Iterator first, Iterator last) {
238   std::string dest;
239   copy_escape_html(first, last, std::back_inserter(dest));
240 
241   return dest;
242 }
243 
244 template <typename Sequence>
245 inline Sequence
copy_escape_html(const Sequence & src)246 copy_escape_html(const Sequence& src) {
247   Sequence dest;
248   copy_escape_html(src.begin(), src.end(), std::back_inserter(dest));
249 
250   return dest;
251 }
252 
253 template <typename Sequence>
254 inline std::string
copy_escape_html_str(const Sequence & src)255 copy_escape_html_str(const Sequence& src) {
256   std::string dest;
257   copy_escape_html(src.begin(), src.end(), std::back_inserter(dest));
258 
259   return dest;
260 }
261 
262 // Consider support for larger than char type.
263 template <typename InputIterator, typename OutputIterator>
264 OutputIterator
transform_hex(InputIterator first,InputIterator last,OutputIterator dest)265 transform_hex(InputIterator first, InputIterator last, OutputIterator dest) {
266   while (first != last) {
267     *(dest++) = value_to_hexchar<1>(*first);
268     *(dest++) = value_to_hexchar<0>(*first);
269 
270     ++first;
271   }
272 
273   return dest;
274 }
275 
276 template <typename InputIterator, typename OutputIterator>
277 OutputIterator
transform_hex(InputIterator first1,InputIterator last1,OutputIterator first2,OutputIterator last2)278 transform_hex(InputIterator first1, InputIterator last1, OutputIterator first2, OutputIterator last2) {
279   while (first1 != last1) {
280     if (first2 == last2) break; else *(first2++) = value_to_hexchar<1>(*first1);
281     if (first2 == last2) break; else *(first2++) = value_to_hexchar<0>(*first1);
282 
283     ++first1;
284   }
285 
286   return first2;
287 }
288 
289 template <typename Sequence>
290 inline Sequence
transform_hex(const Sequence & src)291 transform_hex(const Sequence& src) {
292   Sequence dest;
293   transform_hex(src.begin(), src.end(), std::back_inserter(dest));
294 
295   return dest;
296 }
297 
298 template <typename Iterator>
299 inline std::string
transform_hex(Iterator first,Iterator last)300 transform_hex(Iterator first, Iterator last) {
301   std::string dest;
302   transform_hex(first, last, std::back_inserter(dest));
303 
304   return dest;
305 }
306 
307 template <typename Sequence>
308 inline std::string
transform_hex_str(const Sequence & seq)309 transform_hex_str(const Sequence& seq) {
310   std::string dest;
311   transform_hex(seq.begin(), seq.end(), std::back_inserter(dest));
312 
313   return dest;
314 }
315 
316 template <typename Sequence>
317 Sequence
generate_random(size_t length)318 generate_random(size_t length) {
319   std::random_device rd;
320   std::mt19937 mt(rd());
321   using bytes_randomizer = std::independent_bits_engine<std::mt19937, CHAR_BIT, uint8_t>;
322   bytes_randomizer bytes(mt);
323   Sequence s;
324   s.reserve(length);
325   std::generate_n(std::back_inserter(s), length, std::ref(bytes));
326   return s;
327 }
328 
329 template <typename Iterator>
330 inline bool
is_all_alpha(Iterator first,Iterator last)331 is_all_alpha(Iterator first, Iterator last) {
332   while (first != last)
333     if (!std::isalpha(*first++, std::locale::classic()))
334       return false;
335 
336   return true;
337 }
338 
339 template <typename Sequence>
340 inline bool
is_all_alpha(const Sequence & src)341 is_all_alpha(const Sequence& src) {
342   return is_all_alpha(src.begin(), src.end());
343 }
344 
345 template <typename Iterator>
346 inline bool
is_all_alnum(Iterator first,Iterator last)347 is_all_alnum(Iterator first, Iterator last) {
348   while (first != last)
349     if (!std::isalnum(*first++, std::locale::classic()))
350       return false;
351 
352   return true;
353 }
354 
355 template <typename Sequence>
356 inline bool
is_all_alnum(const Sequence & src)357 is_all_alnum(const Sequence& src) {
358   return is_all_alnum(src.begin(), src.end());
359 }
360 
361 template <typename Iterator>
362 inline bool
is_all_name(Iterator first,Iterator last)363 is_all_name(Iterator first, Iterator last) {
364   while (first != last) {
365     if (!std::isalnum(*first, std::locale::classic()) && *first != '_')
366       return false;
367 
368     first++;
369   }
370 
371   return true;
372 }
373 
374 template <typename Sequence>
375 inline bool
is_all_name(const Sequence & src)376 is_all_name(const Sequence& src) {
377   return is_all_name(src.begin(), src.end());
378 }
379 
380 template <typename Iterator>
381 std::string
sanitize(Iterator first,Iterator last)382 sanitize(Iterator first, Iterator last) {
383   std::string dest;
384   for (; first != last; ++first) {
385     if (std::isprint(*first) && *first != '\r' && *first != '\n' && *first != '\t')
386       dest += *first;
387     else
388       dest += " ";
389   }
390 
391   return dest;
392 }
393 
394 template <typename Sequence>
395 std::string
sanitize(const Sequence & src)396 sanitize(const Sequence& src) {
397     return trim(sanitize(src.begin(), src.end()));
398 }
399 
400 template <typename Iterator>
striptags(Iterator first,Iterator last)401 std::string striptags(Iterator first, Iterator last) {
402   bool copychar = true;
403   std::string dest;
404 
405   for (; first != last; ++first) {
406     if (std::isprint(*first) && *first == '<') {
407       copychar = false;
408     } else if (std::isprint(*first) && *first == '>') {
409       copychar = true;
410       continue;
411     }
412 
413     if (copychar)
414       dest += *first;
415   }
416 
417   return dest;
418 }
419 
420 template <typename Sequence>
striptags(const Sequence & src)421 std::string striptags(const Sequence& src) {
422     return striptags(src.begin(), src.end());
423 }
424 
425 }
426 
427 #endif
428