1 // 2 // UTFString.h 3 // 4 // Library: Foundation 5 // Package: Text 6 // Module: UTFString 7 // 8 // Definitions of strings for UTF encodings. 9 // 10 // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. 11 // and Contributors. 12 // 13 // SPDX-License-Identifier: BSL-1.0 14 // 15 16 17 #ifndef Foundation_UTFString_INCLUDED 18 #define Foundation_UTFString_INCLUDED 19 20 21 #include "Poco/Foundation.h" 22 #include "Poco/Types.h" 23 #include <string> 24 25 26 namespace Poco { 27 28 29 struct UTF16CharTraits 30 { 31 typedef std::fpos<std::mbstate_t> u16streampos; 32 typedef UInt16 char_type; 33 typedef int int_type; 34 typedef std::streamoff off_type; 35 typedef u16streampos pos_type; 36 typedef std::mbstate_t state_type; 37 assignUTF16CharTraits38 static void assign(char_type& c1, const char_type& c2) 39 { 40 c1 = c2; 41 } 42 eqUTF16CharTraits43 static bool eq(char_type c1, char_type c2) 44 { 45 return c1 == c2; 46 } 47 ltUTF16CharTraits48 static bool lt(char_type c1, char_type c2) 49 { 50 return c1 < c2; 51 } 52 compareUTF16CharTraits53 static int compare(const char_type* s1, const char_type* s2, std::size_t n) 54 { 55 for (; n; --n, ++s1, ++s2) 56 { 57 if (lt(*s1, *s2)) 58 return -1; 59 if (lt(*s2, *s1)) 60 return 1; 61 } 62 return 0; 63 } 64 lengthUTF16CharTraits65 static std::size_t length(const char_type* s) 66 { 67 std::size_t len = 0; 68 for (; !eq(*s, char_type(0)); ++s) 69 ++len; 70 return len; 71 } 72 findUTF16CharTraits73 static const char_type* find(const char_type* s, std::size_t n, const char_type& a) 74 { 75 for (; n; --n) 76 { 77 if (eq(*s, a)) 78 return s; 79 ++s; 80 } 81 return 0; 82 } 83 moveUTF16CharTraits84 static char_type* move(char_type* s1, const char_type* s2, std::size_t n) 85 { 86 char_type* r = s1; 87 if (s1 < s2) 88 { 89 for (; n; --n, ++s1, ++s2) 90 assign(*s1, *s2); 91 } 92 else if (s2 < s1) 93 { 94 s1 += n; 95 s2 += n; 96 for (; n; --n) 97 assign(*--s1, *--s2); 98 } 99 return r; 100 } 101 copyUTF16CharTraits102 static char_type* copy(char_type* s1, const char_type* s2, std::size_t n) 103 { 104 poco_assert(s2 < s1 || s2 >= s1 + n); 105 char_type* r = s1; 106 for (; n; --n, ++s1, ++s2) 107 assign(*s1, *s2); 108 return r; 109 } 110 assignUTF16CharTraits111 static char_type* assign(char_type* s, std::size_t n, char_type a) 112 { 113 char_type* r = s; 114 for (; n; --n, ++s) 115 assign(*s, a); 116 return r; 117 } 118 not_eofUTF16CharTraits119 static int_type not_eof(int_type c) 120 { 121 return eq_int_type(c, eof()) ? ~eof() : c; 122 } 123 to_char_typeUTF16CharTraits124 static char_type to_char_type(int_type c) 125 { 126 return char_type(c); 127 } 128 to_int_typeUTF16CharTraits129 static int_type to_int_type(char_type c) 130 { 131 return int_type(c); 132 } 133 eq_int_typeUTF16CharTraits134 static bool eq_int_type(int_type c1, int_type c2) 135 { 136 return c1 == c2; 137 } 138 eofUTF16CharTraits139 static int_type eof() 140 { 141 return int_type(0xDFFF); 142 } 143 }; 144 145 146 struct UTF32CharTraits 147 { 148 typedef std::fpos<std::mbstate_t> u32streampos; 149 typedef UInt32 char_type; 150 typedef int int_type; 151 typedef std::streamoff off_type; 152 typedef u32streampos pos_type; 153 typedef std::mbstate_t state_type; 154 assignUTF32CharTraits155 static void assign(char_type& c1, const char_type& c2) 156 { 157 c1 = c2; 158 } 159 eqUTF32CharTraits160 static bool eq(char_type c1, char_type c2) 161 { 162 return c1 == c2; 163 } 164 ltUTF32CharTraits165 static bool lt(char_type c1, char_type c2) 166 { 167 return c1 < c2; 168 } 169 compareUTF32CharTraits170 static int compare(const char_type* s1, const char_type* s2, std::size_t n) 171 { 172 for (; n; --n, ++s1, ++s2) 173 { 174 if (lt(*s1, *s2)) 175 return -1; 176 if (lt(*s2, *s1)) 177 return 1; 178 } 179 return 0; 180 } 181 lengthUTF32CharTraits182 static std::size_t length(const char_type* s) 183 { 184 std::size_t len = 0; 185 for (; !eq(*s, char_type(0)); ++s) 186 ++len; 187 return len; 188 } 189 findUTF32CharTraits190 static const char_type* find(const char_type* s, std::size_t n, const char_type& a) 191 { 192 for (; n; --n) 193 { 194 if (eq(*s, a)) 195 return s; 196 ++s; 197 } 198 return 0; 199 } 200 moveUTF32CharTraits201 static char_type* move(char_type* s1, const char_type* s2, std::size_t n) 202 { 203 char_type* r = s1; 204 if (s1 < s2) 205 { 206 for (; n; --n, ++s1, ++s2) 207 assign(*s1, *s2); 208 } 209 else if (s2 < s1) 210 { 211 s1 += n; 212 s2 += n; 213 for (; n; --n) 214 assign(*--s1, *--s2); 215 } 216 return r; 217 } 218 copyUTF32CharTraits219 static char_type* copy(char_type* s1, const char_type* s2, std::size_t n) 220 { 221 poco_assert(s2 < s1 || s2 >= s1 + n); 222 char_type* r = s1; 223 for (; n; --n, ++s1, ++s2) 224 assign(*s1, *s2); 225 return r; 226 } 227 assignUTF32CharTraits228 static char_type* assign(char_type* s, std::size_t n, char_type a) 229 { 230 char_type* r = s; 231 for (; n; --n, ++s) 232 assign(*s, a); 233 return r; 234 } 235 not_eofUTF32CharTraits236 static int_type not_eof(int_type c) 237 { 238 return eq_int_type(c, eof()) ? ~eof() : c; 239 } 240 to_char_typeUTF32CharTraits241 static char_type to_char_type(int_type c) 242 { 243 return char_type(c); 244 } 245 to_int_typeUTF32CharTraits246 static int_type to_int_type(char_type c) 247 { 248 return int_type(c); 249 } 250 eq_int_typeUTF32CharTraits251 static bool eq_int_type(int_type c1, int_type c2) 252 { 253 return c1 == c2; 254 } 255 eofUTF32CharTraits256 static int_type eof() 257 { 258 return int_type(0xDFFF); 259 } 260 }; 261 262 263 //#if defined(POCO_ENABLE_CPP11) //TODO 264 // typedef char16_t UTF16Char; 265 // typedef std::u16string UTF16String; 266 // typedef char32_t UTF32Char; 267 // typedef std::u32string UTF32String; 268 //#else 269 #ifdef POCO_NO_WSTRING 270 typedef Poco::UInt16 UTF16Char; 271 typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String; 272 typedef UInt32 UTF32Char; 273 typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String; 274 #else // POCO_NO_WSTRING 275 #if defined(POCO_OS_FAMILY_WINDOWS) 276 typedef wchar_t UTF16Char; 277 typedef std::wstring UTF16String; 278 typedef UInt32 UTF32Char; 279 typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String; 280 #elif defined(__SIZEOF_WCHAR_T__) //gcc 281 #if (__SIZEOF_WCHAR_T__ == 2) 282 typedef wchar_t UTF16Char; 283 typedef std::wstring UTF16String; 284 typedef UInt32 UTF32Char; 285 typedef std::basic_string<UTF32Char, UTF32CharTraits> UTF32String; 286 #elif (__SIZEOF_WCHAR_T__ == 4) 287 typedef Poco::UInt16 UTF16Char; 288 typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String; 289 typedef wchar_t UTF32Char; 290 typedef std::wstring UTF32String; 291 #endif 292 #else // default to 32-bit wchar_t 293 typedef Poco::UInt16 UTF16Char; 294 typedef std::basic_string<UTF16Char, UTF16CharTraits> UTF16String; 295 typedef wchar_t UTF32Char; 296 typedef std::wstring UTF32String; 297 #endif //POCO_OS_FAMILY_WINDOWS 298 #endif //POCO_NO_WSTRING 299 //#endif // POCO_ENABLE_CPP11 300 301 302 } // namespace Poco 303 304 305 #endif // Foundation_UTFString_INCLUDED 306