1 // LAF Base Library 2 // Copyright (c) 2001-2017 David Capello 3 // 4 // This file is released under the terms of the MIT license. 5 // Read LICENSE.txt for more information. 6 7 #ifndef BASE_STRING_H_INCLUDED 8 #define BASE_STRING_H_INCLUDED 9 #pragma once 10 11 #include <cstdarg> 12 #include <iterator> 13 #include <string> 14 15 namespace base { 16 17 std::string string_printf(const char* format, ...); 18 std::string string_vprintf(const char* format, std::va_list ap); 19 20 std::string string_to_lower(const std::string& original); 21 std::string string_to_upper(const std::string& original); 22 23 std::string to_utf8(const std::wstring& widestring); 24 std::wstring from_utf8(const std::string& utf8string); 25 26 int utf8_length(const std::string& utf8string); 27 int utf8_icmp(const std::string& a, const std::string& b, int n = 0); 28 29 template<typename SubIterator> 30 class utf8_iteratorT : public std::iterator<std::forward_iterator_tag, 31 std::string::value_type, 32 std::string::difference_type, 33 typename SubIterator::pointer, 34 typename SubIterator::reference> { 35 public: 36 typedef typename SubIterator::pointer pointer; // Needed for GCC 37 utf8_iteratorT()38 utf8_iteratorT() { 39 } 40 utf8_iteratorT(const SubIterator & it)41 explicit utf8_iteratorT(const SubIterator& it) 42 : m_internal(it) { 43 } 44 45 // Based on Allegro Unicode code (allegro/src/unicode.c) 46 utf8_iteratorT& operator++() { 47 int c = *m_internal; 48 ++m_internal; 49 50 if (c & 0x80) { 51 int n = 1; 52 while (c & (0x80>>n)) 53 n++; 54 55 c &= (1<<(8-n))-1; 56 57 while (--n > 0) { 58 int t = *m_internal; 59 ++m_internal; 60 61 if ((!(t & 0x80)) || (t & 0x40)) { 62 --m_internal; 63 return *this; 64 } 65 66 c = (c<<6) | (t & 0x3F); 67 } 68 } 69 70 return *this; 71 } 72 73 utf8_iteratorT& operator+=(int i) { 74 while (i--) 75 operator++(); 76 return *this; 77 } 78 79 utf8_iteratorT operator+(int i) { 80 utf8_iteratorT it(*this); 81 it += i; 82 return it; 83 } 84 85 const int operator*() const { 86 SubIterator it = m_internal; 87 int c = *it; 88 ++it; 89 90 if (c & 0x80) { 91 int n = 1; 92 while (c & (0x80>>n)) 93 n++; 94 95 c &= (1<<(8-n))-1; 96 97 while (--n > 0) { 98 int t = *it; 99 ++it; 100 101 if ((!(t & 0x80)) || (t & 0x40)) 102 return '^'; 103 104 c = (c<<6) | (t & 0x3F); 105 } 106 } 107 108 return c; 109 } 110 111 bool operator==(const utf8_iteratorT& it) const { 112 return m_internal == it.m_internal; 113 } 114 115 bool operator!=(const utf8_iteratorT& it) const { 116 return m_internal != it.m_internal; 117 } 118 119 pointer operator->() { 120 return m_internal.operator->(); 121 } 122 123 std::string::difference_type operator-(const utf8_iteratorT& it) { 124 return m_internal - it.m_internal; 125 } 126 127 private: 128 SubIterator m_internal; 129 }; 130 131 class utf8_iterator : public utf8_iteratorT<std::string::iterator> { 132 public: utf8_iterator()133 utf8_iterator() { } utf8_iterator(const utf8_iteratorT<std::string::iterator> & it)134 utf8_iterator(const utf8_iteratorT<std::string::iterator>& it) 135 : utf8_iteratorT<std::string::iterator>(it) { 136 } utf8_iterator(const std::string::iterator & it)137 explicit utf8_iterator(const std::string::iterator& it) 138 : utf8_iteratorT<std::string::iterator>(it) { 139 } 140 }; 141 142 class utf8_const_iterator : public utf8_iteratorT<std::string::const_iterator> { 143 public: utf8_const_iterator()144 utf8_const_iterator() { } utf8_const_iterator(const utf8_iteratorT<std::string::const_iterator> & it)145 utf8_const_iterator(const utf8_iteratorT<std::string::const_iterator>& it) 146 : utf8_iteratorT<std::string::const_iterator>(it) { 147 } utf8_const_iterator(const std::string::const_iterator & it)148 explicit utf8_const_iterator(const std::string::const_iterator& it) 149 : utf8_iteratorT<std::string::const_iterator>(it) { 150 } 151 }; 152 153 class utf8 { 154 public: utf8(std::string & s)155 utf8(std::string& s) : m_begin(utf8_iterator(s.begin())), 156 m_end(utf8_iterator(s.end())) { 157 } begin()158 const utf8_iterator& begin() const { return m_begin; } end()159 const utf8_iterator& end() const { return m_end; } 160 private: 161 utf8_iterator m_begin; 162 utf8_iterator m_end; 163 }; 164 165 class utf8_const { 166 public: utf8_const(const std::string & s)167 utf8_const(const std::string& s) : m_begin(utf8_const_iterator(s.begin())), 168 m_end(utf8_const_iterator(s.end())) { 169 } begin()170 const utf8_const_iterator& begin() const { return m_begin; } end()171 const utf8_const_iterator& end() const { return m_end; } 172 private: 173 utf8_const_iterator m_begin; 174 utf8_const_iterator m_end; 175 }; 176 177 } 178 179 #endif 180