1 // LAF Base Library
2 // Copyright (c) 2001-2017 David Capello
3 //
4 // This file is released under the terms of the MIT license.
5 // Read LICENSE.txt for more information.
6 
7 #ifndef BASE_STRING_H_INCLUDED
8 #define BASE_STRING_H_INCLUDED
9 #pragma once
10 
11 #include <cstdarg>
12 #include <iterator>
13 #include <string>
14 
15 namespace base {
16 
17   std::string string_printf(const char* format, ...);
18   std::string string_vprintf(const char* format, std::va_list ap);
19 
20   std::string string_to_lower(const std::string& original);
21   std::string string_to_upper(const std::string& original);
22 
23   std::string to_utf8(const std::wstring& widestring);
24   std::wstring from_utf8(const std::string& utf8string);
25 
26   int utf8_length(const std::string& utf8string);
27   int utf8_icmp(const std::string& a, const std::string& b, int n = 0);
28 
29   template<typename SubIterator>
30   class utf8_iteratorT : public std::iterator<std::forward_iterator_tag,
31                                               std::string::value_type,
32                                               std::string::difference_type,
33                                               typename SubIterator::pointer,
34                                               typename SubIterator::reference> {
35   public:
36     typedef typename SubIterator::pointer pointer; // Needed for GCC
37 
utf8_iteratorT()38     utf8_iteratorT() {
39     }
40 
utf8_iteratorT(const SubIterator & it)41     explicit utf8_iteratorT(const SubIterator& it)
42       : m_internal(it) {
43     }
44 
45     // Based on Allegro Unicode code (allegro/src/unicode.c)
46     utf8_iteratorT& operator++() {
47       int c = *m_internal;
48       ++m_internal;
49 
50       if (c & 0x80) {
51         int n = 1;
52         while (c & (0x80>>n))
53           n++;
54 
55         c &= (1<<(8-n))-1;
56 
57         while (--n > 0) {
58           int t = *m_internal;
59           ++m_internal;
60 
61           if ((!(t & 0x80)) || (t & 0x40)) {
62             --m_internal;
63             return *this;
64           }
65 
66           c = (c<<6) | (t & 0x3F);
67         }
68       }
69 
70       return *this;
71     }
72 
73     utf8_iteratorT& operator+=(int i) {
74       while (i--)
75         operator++();
76       return *this;
77     }
78 
79     utf8_iteratorT operator+(int i) {
80       utf8_iteratorT it(*this);
81       it += i;
82       return it;
83     }
84 
85     const int operator*() const {
86       SubIterator it = m_internal;
87       int c = *it;
88       ++it;
89 
90       if (c & 0x80) {
91         int n = 1;
92         while (c & (0x80>>n))
93           n++;
94 
95         c &= (1<<(8-n))-1;
96 
97         while (--n > 0) {
98           int t = *it;
99           ++it;
100 
101           if ((!(t & 0x80)) || (t & 0x40))
102             return '^';
103 
104           c = (c<<6) | (t & 0x3F);
105         }
106       }
107 
108       return c;
109     }
110 
111     bool operator==(const utf8_iteratorT& it) const {
112       return m_internal == it.m_internal;
113     }
114 
115     bool operator!=(const utf8_iteratorT& it) const {
116       return m_internal != it.m_internal;
117     }
118 
119     pointer operator->() {
120       return m_internal.operator->();
121     }
122 
123     std::string::difference_type operator-(const utf8_iteratorT& it) {
124       return m_internal - it.m_internal;
125     }
126 
127   private:
128     SubIterator m_internal;
129   };
130 
131   class utf8_iterator : public utf8_iteratorT<std::string::iterator> {
132   public:
utf8_iterator()133     utf8_iterator() { }
utf8_iterator(const utf8_iteratorT<std::string::iterator> & it)134     utf8_iterator(const utf8_iteratorT<std::string::iterator>& it)
135       : utf8_iteratorT<std::string::iterator>(it) {
136     }
utf8_iterator(const std::string::iterator & it)137     explicit utf8_iterator(const std::string::iterator& it)
138       : utf8_iteratorT<std::string::iterator>(it) {
139     }
140   };
141 
142   class utf8_const_iterator : public utf8_iteratorT<std::string::const_iterator> {
143   public:
utf8_const_iterator()144     utf8_const_iterator() { }
utf8_const_iterator(const utf8_iteratorT<std::string::const_iterator> & it)145     utf8_const_iterator(const utf8_iteratorT<std::string::const_iterator>& it)
146       : utf8_iteratorT<std::string::const_iterator>(it) {
147     }
utf8_const_iterator(const std::string::const_iterator & it)148     explicit utf8_const_iterator(const std::string::const_iterator& it)
149       : utf8_iteratorT<std::string::const_iterator>(it) {
150     }
151   };
152 
153   class utf8 {
154   public:
utf8(std::string & s)155     utf8(std::string& s) : m_begin(utf8_iterator(s.begin())),
156                            m_end(utf8_iterator(s.end())) {
157     }
begin()158     const utf8_iterator& begin() const { return m_begin; }
end()159     const utf8_iterator& end() const { return m_end; }
160   private:
161     utf8_iterator m_begin;
162     utf8_iterator m_end;
163   };
164 
165   class utf8_const {
166   public:
utf8_const(const std::string & s)167     utf8_const(const std::string& s) : m_begin(utf8_const_iterator(s.begin())),
168                                        m_end(utf8_const_iterator(s.end())) {
169     }
begin()170     const utf8_const_iterator& begin() const { return m_begin; }
end()171     const utf8_const_iterator& end() const { return m_end; }
172   private:
173     utf8_const_iterator m_begin;
174     utf8_const_iterator m_end;
175   };
176 
177 }
178 
179 #endif
180