1 /*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8 //-----------------------------------------------------------------------
9 //
10 // File: StringUtils.cpp
11 //
12 // Purpose: ATL split string utility
13 // Author: Paul J. Weiss
14 //
15 // Modified to use J O'Leary's std::string class by kraqh3d
16 //
17 //------------------------------------------------------------------------
18
19 #ifdef HAVE_NEW_CROSSGUID
20 #include <guid.hpp>
21 #else
22 #include <guid.h>
23 #endif
24
25 #if defined(TARGET_ANDROID)
26 #include <androidjni/JNIThreading.h>
27 #endif
28
29 #include "CharsetConverter.h"
30 #include "LangInfo.h"
31 #include "StringUtils.h"
32 #include "Util.h"
33
34 #include <algorithm>
35 #include <array>
36 #include <assert.h>
37 #include <functional>
38 #include <inttypes.h>
39 #include <iomanip>
40 #include <math.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <time.h>
45
46 #include <fstrcmp.h>
47 #include <memory.h>
48
49 // don't move or std functions end up in PCRE namespace
50 // clang-format off
51 #include "utils/RegExp.h"
52 // clang-format on
53
54 #define FORMAT_BLOCK_SIZE 512 // # of bytes for initial allocation for printf
55
56 static constexpr const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
57
58 /* empty string for use in returns by ref */
59 const std::string StringUtils::Empty = "";
60
61 // Copyright (c) Leigh Brasington 2012. All rights reserved.
62 // This code may be used and reproduced without written permission.
63 // http://www.leighb.com/tounicupper.htm
64 //
65 // The tables were constructed from
66 // http://publib.boulder.ibm.com/infocenter/iseries/v7r1m0/index.jsp?topic=%2Fnls%2Frbagslowtoupmaptable.htm
67
68 static constexpr wchar_t unicode_lowers[] = {
69 (wchar_t)0x0061, (wchar_t)0x0062, (wchar_t)0x0063, (wchar_t)0x0064, (wchar_t)0x0065, (wchar_t)0x0066, (wchar_t)0x0067, (wchar_t)0x0068, (wchar_t)0x0069,
70 (wchar_t)0x006A, (wchar_t)0x006B, (wchar_t)0x006C, (wchar_t)0x006D, (wchar_t)0x006E, (wchar_t)0x006F, (wchar_t)0x0070, (wchar_t)0x0071, (wchar_t)0x0072,
71 (wchar_t)0x0073, (wchar_t)0x0074, (wchar_t)0x0075, (wchar_t)0x0076, (wchar_t)0x0077, (wchar_t)0x0078, (wchar_t)0x0079, (wchar_t)0x007A, (wchar_t)0x00E0,
72 (wchar_t)0x00E1, (wchar_t)0x00E2, (wchar_t)0x00E3, (wchar_t)0x00E4, (wchar_t)0x00E5, (wchar_t)0x00E6, (wchar_t)0x00E7, (wchar_t)0x00E8, (wchar_t)0x00E9,
73 (wchar_t)0x00EA, (wchar_t)0x00EB, (wchar_t)0x00EC, (wchar_t)0x00ED, (wchar_t)0x00EE, (wchar_t)0x00EF, (wchar_t)0x00F0, (wchar_t)0x00F1, (wchar_t)0x00F2,
74 (wchar_t)0x00F3, (wchar_t)0x00F4, (wchar_t)0x00F5, (wchar_t)0x00F6, (wchar_t)0x00F8, (wchar_t)0x00F9, (wchar_t)0x00FA, (wchar_t)0x00FB, (wchar_t)0x00FC,
75 (wchar_t)0x00FD, (wchar_t)0x00FE, (wchar_t)0x00FF, (wchar_t)0x0101, (wchar_t)0x0103, (wchar_t)0x0105, (wchar_t)0x0107, (wchar_t)0x0109, (wchar_t)0x010B,
76 (wchar_t)0x010D, (wchar_t)0x010F, (wchar_t)0x0111, (wchar_t)0x0113, (wchar_t)0x0115, (wchar_t)0x0117, (wchar_t)0x0119, (wchar_t)0x011B, (wchar_t)0x011D,
77 (wchar_t)0x011F, (wchar_t)0x0121, (wchar_t)0x0123, (wchar_t)0x0125, (wchar_t)0x0127, (wchar_t)0x0129, (wchar_t)0x012B, (wchar_t)0x012D, (wchar_t)0x012F,
78 (wchar_t)0x0131, (wchar_t)0x0133, (wchar_t)0x0135, (wchar_t)0x0137, (wchar_t)0x013A, (wchar_t)0x013C, (wchar_t)0x013E, (wchar_t)0x0140, (wchar_t)0x0142,
79 (wchar_t)0x0144, (wchar_t)0x0146, (wchar_t)0x0148, (wchar_t)0x014B, (wchar_t)0x014D, (wchar_t)0x014F, (wchar_t)0x0151, (wchar_t)0x0153, (wchar_t)0x0155,
80 (wchar_t)0x0157, (wchar_t)0x0159, (wchar_t)0x015B, (wchar_t)0x015D, (wchar_t)0x015F, (wchar_t)0x0161, (wchar_t)0x0163, (wchar_t)0x0165, (wchar_t)0x0167,
81 (wchar_t)0x0169, (wchar_t)0x016B, (wchar_t)0x016D, (wchar_t)0x016F, (wchar_t)0x0171, (wchar_t)0x0173, (wchar_t)0x0175, (wchar_t)0x0177, (wchar_t)0x017A,
82 (wchar_t)0x017C, (wchar_t)0x017E, (wchar_t)0x0183, (wchar_t)0x0185, (wchar_t)0x0188, (wchar_t)0x018C, (wchar_t)0x0192, (wchar_t)0x0199, (wchar_t)0x01A1,
83 (wchar_t)0x01A3, (wchar_t)0x01A5, (wchar_t)0x01A8, (wchar_t)0x01AD, (wchar_t)0x01B0, (wchar_t)0x01B4, (wchar_t)0x01B6, (wchar_t)0x01B9, (wchar_t)0x01BD,
84 (wchar_t)0x01C6, (wchar_t)0x01C9, (wchar_t)0x01CC, (wchar_t)0x01CE, (wchar_t)0x01D0, (wchar_t)0x01D2, (wchar_t)0x01D4, (wchar_t)0x01D6, (wchar_t)0x01D8,
85 (wchar_t)0x01DA, (wchar_t)0x01DC, (wchar_t)0x01DF, (wchar_t)0x01E1, (wchar_t)0x01E3, (wchar_t)0x01E5, (wchar_t)0x01E7, (wchar_t)0x01E9, (wchar_t)0x01EB,
86 (wchar_t)0x01ED, (wchar_t)0x01EF, (wchar_t)0x01F3, (wchar_t)0x01F5, (wchar_t)0x01FB, (wchar_t)0x01FD, (wchar_t)0x01FF, (wchar_t)0x0201, (wchar_t)0x0203,
87 (wchar_t)0x0205, (wchar_t)0x0207, (wchar_t)0x0209, (wchar_t)0x020B, (wchar_t)0x020D, (wchar_t)0x020F, (wchar_t)0x0211, (wchar_t)0x0213, (wchar_t)0x0215,
88 (wchar_t)0x0217, (wchar_t)0x0253, (wchar_t)0x0254, (wchar_t)0x0257, (wchar_t)0x0258, (wchar_t)0x0259, (wchar_t)0x025B, (wchar_t)0x0260, (wchar_t)0x0263,
89 (wchar_t)0x0268, (wchar_t)0x0269, (wchar_t)0x026F, (wchar_t)0x0272, (wchar_t)0x0275, (wchar_t)0x0283, (wchar_t)0x0288, (wchar_t)0x028A, (wchar_t)0x028B,
90 (wchar_t)0x0292, (wchar_t)0x03AC, (wchar_t)0x03AD, (wchar_t)0x03AE, (wchar_t)0x03AF, (wchar_t)0x03B1, (wchar_t)0x03B2, (wchar_t)0x03B3, (wchar_t)0x03B4,
91 (wchar_t)0x03B5, (wchar_t)0x03B6, (wchar_t)0x03B7, (wchar_t)0x03B8, (wchar_t)0x03B9, (wchar_t)0x03BA, (wchar_t)0x03BB, (wchar_t)0x03BC, (wchar_t)0x03BD,
92 (wchar_t)0x03BE, (wchar_t)0x03BF, (wchar_t)0x03C0, (wchar_t)0x03C1, (wchar_t)0x03C3, (wchar_t)0x03C4, (wchar_t)0x03C5, (wchar_t)0x03C6, (wchar_t)0x03C7,
93 (wchar_t)0x03C8, (wchar_t)0x03C9, (wchar_t)0x03CA, (wchar_t)0x03CB, (wchar_t)0x03CC, (wchar_t)0x03CD, (wchar_t)0x03CE, (wchar_t)0x03E3, (wchar_t)0x03E5,
94 (wchar_t)0x03E7, (wchar_t)0x03E9, (wchar_t)0x03EB, (wchar_t)0x03ED, (wchar_t)0x03EF, (wchar_t)0x0430, (wchar_t)0x0431, (wchar_t)0x0432, (wchar_t)0x0433,
95 (wchar_t)0x0434, (wchar_t)0x0435, (wchar_t)0x0436, (wchar_t)0x0437, (wchar_t)0x0438, (wchar_t)0x0439, (wchar_t)0x043A, (wchar_t)0x043B, (wchar_t)0x043C,
96 (wchar_t)0x043D, (wchar_t)0x043E, (wchar_t)0x043F, (wchar_t)0x0440, (wchar_t)0x0441, (wchar_t)0x0442, (wchar_t)0x0443, (wchar_t)0x0444, (wchar_t)0x0445,
97 (wchar_t)0x0446, (wchar_t)0x0447, (wchar_t)0x0448, (wchar_t)0x0449, (wchar_t)0x044A, (wchar_t)0x044B, (wchar_t)0x044C, (wchar_t)0x044D, (wchar_t)0x044E,
98 (wchar_t)0x044F, (wchar_t)0x0451, (wchar_t)0x0452, (wchar_t)0x0453, (wchar_t)0x0454, (wchar_t)0x0455, (wchar_t)0x0456, (wchar_t)0x0457, (wchar_t)0x0458,
99 (wchar_t)0x0459, (wchar_t)0x045A, (wchar_t)0x045B, (wchar_t)0x045C, (wchar_t)0x045E, (wchar_t)0x045F, (wchar_t)0x0461, (wchar_t)0x0463, (wchar_t)0x0465,
100 (wchar_t)0x0467, (wchar_t)0x0469, (wchar_t)0x046B, (wchar_t)0x046D, (wchar_t)0x046F, (wchar_t)0x0471, (wchar_t)0x0473, (wchar_t)0x0475, (wchar_t)0x0477,
101 (wchar_t)0x0479, (wchar_t)0x047B, (wchar_t)0x047D, (wchar_t)0x047F, (wchar_t)0x0481, (wchar_t)0x0491, (wchar_t)0x0493, (wchar_t)0x0495, (wchar_t)0x0497,
102 (wchar_t)0x0499, (wchar_t)0x049B, (wchar_t)0x049D, (wchar_t)0x049F, (wchar_t)0x04A1, (wchar_t)0x04A3, (wchar_t)0x04A5, (wchar_t)0x04A7, (wchar_t)0x04A9,
103 (wchar_t)0x04AB, (wchar_t)0x04AD, (wchar_t)0x04AF, (wchar_t)0x04B1, (wchar_t)0x04B3, (wchar_t)0x04B5, (wchar_t)0x04B7, (wchar_t)0x04B9, (wchar_t)0x04BB,
104 (wchar_t)0x04BD, (wchar_t)0x04BF, (wchar_t)0x04C2, (wchar_t)0x04C4, (wchar_t)0x04C8, (wchar_t)0x04CC, (wchar_t)0x04D1, (wchar_t)0x04D3, (wchar_t)0x04D5,
105 (wchar_t)0x04D7, (wchar_t)0x04D9, (wchar_t)0x04DB, (wchar_t)0x04DD, (wchar_t)0x04DF, (wchar_t)0x04E1, (wchar_t)0x04E3, (wchar_t)0x04E5, (wchar_t)0x04E7,
106 (wchar_t)0x04E9, (wchar_t)0x04EB, (wchar_t)0x04EF, (wchar_t)0x04F1, (wchar_t)0x04F3, (wchar_t)0x04F5, (wchar_t)0x04F9, (wchar_t)0x0561, (wchar_t)0x0562,
107 (wchar_t)0x0563, (wchar_t)0x0564, (wchar_t)0x0565, (wchar_t)0x0566, (wchar_t)0x0567, (wchar_t)0x0568, (wchar_t)0x0569, (wchar_t)0x056A, (wchar_t)0x056B,
108 (wchar_t)0x056C, (wchar_t)0x056D, (wchar_t)0x056E, (wchar_t)0x056F, (wchar_t)0x0570, (wchar_t)0x0571, (wchar_t)0x0572, (wchar_t)0x0573, (wchar_t)0x0574,
109 (wchar_t)0x0575, (wchar_t)0x0576, (wchar_t)0x0577, (wchar_t)0x0578, (wchar_t)0x0579, (wchar_t)0x057A, (wchar_t)0x057B, (wchar_t)0x057C, (wchar_t)0x057D,
110 (wchar_t)0x057E, (wchar_t)0x057F, (wchar_t)0x0580, (wchar_t)0x0581, (wchar_t)0x0582, (wchar_t)0x0583, (wchar_t)0x0584, (wchar_t)0x0585, (wchar_t)0x0586,
111 (wchar_t)0x10D0, (wchar_t)0x10D1, (wchar_t)0x10D2, (wchar_t)0x10D3, (wchar_t)0x10D4, (wchar_t)0x10D5, (wchar_t)0x10D6, (wchar_t)0x10D7, (wchar_t)0x10D8,
112 (wchar_t)0x10D9, (wchar_t)0x10DA, (wchar_t)0x10DB, (wchar_t)0x10DC, (wchar_t)0x10DD, (wchar_t)0x10DE, (wchar_t)0x10DF, (wchar_t)0x10E0, (wchar_t)0x10E1,
113 (wchar_t)0x10E2, (wchar_t)0x10E3, (wchar_t)0x10E4, (wchar_t)0x10E5, (wchar_t)0x10E6, (wchar_t)0x10E7, (wchar_t)0x10E8, (wchar_t)0x10E9, (wchar_t)0x10EA,
114 (wchar_t)0x10EB, (wchar_t)0x10EC, (wchar_t)0x10ED, (wchar_t)0x10EE, (wchar_t)0x10EF, (wchar_t)0x10F0, (wchar_t)0x10F1, (wchar_t)0x10F2, (wchar_t)0x10F3,
115 (wchar_t)0x10F4, (wchar_t)0x10F5, (wchar_t)0x1E01, (wchar_t)0x1E03, (wchar_t)0x1E05, (wchar_t)0x1E07, (wchar_t)0x1E09, (wchar_t)0x1E0B, (wchar_t)0x1E0D,
116 (wchar_t)0x1E0F, (wchar_t)0x1E11, (wchar_t)0x1E13, (wchar_t)0x1E15, (wchar_t)0x1E17, (wchar_t)0x1E19, (wchar_t)0x1E1B, (wchar_t)0x1E1D, (wchar_t)0x1E1F,
117 (wchar_t)0x1E21, (wchar_t)0x1E23, (wchar_t)0x1E25, (wchar_t)0x1E27, (wchar_t)0x1E29, (wchar_t)0x1E2B, (wchar_t)0x1E2D, (wchar_t)0x1E2F, (wchar_t)0x1E31,
118 (wchar_t)0x1E33, (wchar_t)0x1E35, (wchar_t)0x1E37, (wchar_t)0x1E39, (wchar_t)0x1E3B, (wchar_t)0x1E3D, (wchar_t)0x1E3F, (wchar_t)0x1E41, (wchar_t)0x1E43,
119 (wchar_t)0x1E45, (wchar_t)0x1E47, (wchar_t)0x1E49, (wchar_t)0x1E4B, (wchar_t)0x1E4D, (wchar_t)0x1E4F, (wchar_t)0x1E51, (wchar_t)0x1E53, (wchar_t)0x1E55,
120 (wchar_t)0x1E57, (wchar_t)0x1E59, (wchar_t)0x1E5B, (wchar_t)0x1E5D, (wchar_t)0x1E5F, (wchar_t)0x1E61, (wchar_t)0x1E63, (wchar_t)0x1E65, (wchar_t)0x1E67,
121 (wchar_t)0x1E69, (wchar_t)0x1E6B, (wchar_t)0x1E6D, (wchar_t)0x1E6F, (wchar_t)0x1E71, (wchar_t)0x1E73, (wchar_t)0x1E75, (wchar_t)0x1E77, (wchar_t)0x1E79,
122 (wchar_t)0x1E7B, (wchar_t)0x1E7D, (wchar_t)0x1E7F, (wchar_t)0x1E81, (wchar_t)0x1E83, (wchar_t)0x1E85, (wchar_t)0x1E87, (wchar_t)0x1E89, (wchar_t)0x1E8B,
123 (wchar_t)0x1E8D, (wchar_t)0x1E8F, (wchar_t)0x1E91, (wchar_t)0x1E93, (wchar_t)0x1E95, (wchar_t)0x1EA1, (wchar_t)0x1EA3, (wchar_t)0x1EA5, (wchar_t)0x1EA7,
124 (wchar_t)0x1EA9, (wchar_t)0x1EAB, (wchar_t)0x1EAD, (wchar_t)0x1EAF, (wchar_t)0x1EB1, (wchar_t)0x1EB3, (wchar_t)0x1EB5, (wchar_t)0x1EB7, (wchar_t)0x1EB9,
125 (wchar_t)0x1EBB, (wchar_t)0x1EBD, (wchar_t)0x1EBF, (wchar_t)0x1EC1, (wchar_t)0x1EC3, (wchar_t)0x1EC5, (wchar_t)0x1EC7, (wchar_t)0x1EC9, (wchar_t)0x1ECB,
126 (wchar_t)0x1ECD, (wchar_t)0x1ECF, (wchar_t)0x1ED1, (wchar_t)0x1ED3, (wchar_t)0x1ED5, (wchar_t)0x1ED7, (wchar_t)0x1ED9, (wchar_t)0x1EDB, (wchar_t)0x1EDD,
127 (wchar_t)0x1EDF, (wchar_t)0x1EE1, (wchar_t)0x1EE3, (wchar_t)0x1EE5, (wchar_t)0x1EE7, (wchar_t)0x1EE9, (wchar_t)0x1EEB, (wchar_t)0x1EED, (wchar_t)0x1EEF,
128 (wchar_t)0x1EF1, (wchar_t)0x1EF3, (wchar_t)0x1EF5, (wchar_t)0x1EF7, (wchar_t)0x1EF9, (wchar_t)0x1F00, (wchar_t)0x1F01, (wchar_t)0x1F02, (wchar_t)0x1F03,
129 (wchar_t)0x1F04, (wchar_t)0x1F05, (wchar_t)0x1F06, (wchar_t)0x1F07, (wchar_t)0x1F10, (wchar_t)0x1F11, (wchar_t)0x1F12, (wchar_t)0x1F13, (wchar_t)0x1F14,
130 (wchar_t)0x1F15, (wchar_t)0x1F20, (wchar_t)0x1F21, (wchar_t)0x1F22, (wchar_t)0x1F23, (wchar_t)0x1F24, (wchar_t)0x1F25, (wchar_t)0x1F26, (wchar_t)0x1F27,
131 (wchar_t)0x1F30, (wchar_t)0x1F31, (wchar_t)0x1F32, (wchar_t)0x1F33, (wchar_t)0x1F34, (wchar_t)0x1F35, (wchar_t)0x1F36, (wchar_t)0x1F37, (wchar_t)0x1F40,
132 (wchar_t)0x1F41, (wchar_t)0x1F42, (wchar_t)0x1F43, (wchar_t)0x1F44, (wchar_t)0x1F45, (wchar_t)0x1F51, (wchar_t)0x1F53, (wchar_t)0x1F55, (wchar_t)0x1F57,
133 (wchar_t)0x1F60, (wchar_t)0x1F61, (wchar_t)0x1F62, (wchar_t)0x1F63, (wchar_t)0x1F64, (wchar_t)0x1F65, (wchar_t)0x1F66, (wchar_t)0x1F67, (wchar_t)0x1F80,
134 (wchar_t)0x1F81, (wchar_t)0x1F82, (wchar_t)0x1F83, (wchar_t)0x1F84, (wchar_t)0x1F85, (wchar_t)0x1F86, (wchar_t)0x1F87, (wchar_t)0x1F90, (wchar_t)0x1F91,
135 (wchar_t)0x1F92, (wchar_t)0x1F93, (wchar_t)0x1F94, (wchar_t)0x1F95, (wchar_t)0x1F96, (wchar_t)0x1F97, (wchar_t)0x1FA0, (wchar_t)0x1FA1, (wchar_t)0x1FA2,
136 (wchar_t)0x1FA3, (wchar_t)0x1FA4, (wchar_t)0x1FA5, (wchar_t)0x1FA6, (wchar_t)0x1FA7, (wchar_t)0x1FB0, (wchar_t)0x1FB1, (wchar_t)0x1FD0, (wchar_t)0x1FD1,
137 (wchar_t)0x1FE0, (wchar_t)0x1FE1, (wchar_t)0x24D0, (wchar_t)0x24D1, (wchar_t)0x24D2, (wchar_t)0x24D3, (wchar_t)0x24D4, (wchar_t)0x24D5, (wchar_t)0x24D6,
138 (wchar_t)0x24D7, (wchar_t)0x24D8, (wchar_t)0x24D9, (wchar_t)0x24DA, (wchar_t)0x24DB, (wchar_t)0x24DC, (wchar_t)0x24DD, (wchar_t)0x24DE, (wchar_t)0x24DF,
139 (wchar_t)0x24E0, (wchar_t)0x24E1, (wchar_t)0x24E2, (wchar_t)0x24E3, (wchar_t)0x24E4, (wchar_t)0x24E5, (wchar_t)0x24E6, (wchar_t)0x24E7, (wchar_t)0x24E8,
140 (wchar_t)0x24E9, (wchar_t)0xFF41, (wchar_t)0xFF42, (wchar_t)0xFF43, (wchar_t)0xFF44, (wchar_t)0xFF45, (wchar_t)0xFF46, (wchar_t)0xFF47, (wchar_t)0xFF48,
141 (wchar_t)0xFF49, (wchar_t)0xFF4A, (wchar_t)0xFF4B, (wchar_t)0xFF4C, (wchar_t)0xFF4D, (wchar_t)0xFF4E, (wchar_t)0xFF4F, (wchar_t)0xFF50, (wchar_t)0xFF51,
142 (wchar_t)0xFF52, (wchar_t)0xFF53, (wchar_t)0xFF54, (wchar_t)0xFF55, (wchar_t)0xFF56, (wchar_t)0xFF57, (wchar_t)0xFF58, (wchar_t)0xFF59, (wchar_t)0xFF5A
143 };
144
145 static const wchar_t unicode_uppers[] = {
146 (wchar_t)0x0041, (wchar_t)0x0042, (wchar_t)0x0043, (wchar_t)0x0044, (wchar_t)0x0045, (wchar_t)0x0046, (wchar_t)0x0047, (wchar_t)0x0048, (wchar_t)0x0049,
147 (wchar_t)0x004A, (wchar_t)0x004B, (wchar_t)0x004C, (wchar_t)0x004D, (wchar_t)0x004E, (wchar_t)0x004F, (wchar_t)0x0050, (wchar_t)0x0051, (wchar_t)0x0052,
148 (wchar_t)0x0053, (wchar_t)0x0054, (wchar_t)0x0055, (wchar_t)0x0056, (wchar_t)0x0057, (wchar_t)0x0058, (wchar_t)0x0059, (wchar_t)0x005A, (wchar_t)0x00C0,
149 (wchar_t)0x00C1, (wchar_t)0x00C2, (wchar_t)0x00C3, (wchar_t)0x00C4, (wchar_t)0x00C5, (wchar_t)0x00C6, (wchar_t)0x00C7, (wchar_t)0x00C8, (wchar_t)0x00C9,
150 (wchar_t)0x00CA, (wchar_t)0x00CB, (wchar_t)0x00CC, (wchar_t)0x00CD, (wchar_t)0x00CE, (wchar_t)0x00CF, (wchar_t)0x00D0, (wchar_t)0x00D1, (wchar_t)0x00D2,
151 (wchar_t)0x00D3, (wchar_t)0x00D4, (wchar_t)0x00D5, (wchar_t)0x00D6, (wchar_t)0x00D8, (wchar_t)0x00D9, (wchar_t)0x00DA, (wchar_t)0x00DB, (wchar_t)0x00DC,
152 (wchar_t)0x00DD, (wchar_t)0x00DE, (wchar_t)0x0178, (wchar_t)0x0100, (wchar_t)0x0102, (wchar_t)0x0104, (wchar_t)0x0106, (wchar_t)0x0108, (wchar_t)0x010A,
153 (wchar_t)0x010C, (wchar_t)0x010E, (wchar_t)0x0110, (wchar_t)0x0112, (wchar_t)0x0114, (wchar_t)0x0116, (wchar_t)0x0118, (wchar_t)0x011A, (wchar_t)0x011C,
154 (wchar_t)0x011E, (wchar_t)0x0120, (wchar_t)0x0122, (wchar_t)0x0124, (wchar_t)0x0126, (wchar_t)0x0128, (wchar_t)0x012A, (wchar_t)0x012C, (wchar_t)0x012E,
155 (wchar_t)0x0049, (wchar_t)0x0132, (wchar_t)0x0134, (wchar_t)0x0136, (wchar_t)0x0139, (wchar_t)0x013B, (wchar_t)0x013D, (wchar_t)0x013F, (wchar_t)0x0141,
156 (wchar_t)0x0143, (wchar_t)0x0145, (wchar_t)0x0147, (wchar_t)0x014A, (wchar_t)0x014C, (wchar_t)0x014E, (wchar_t)0x0150, (wchar_t)0x0152, (wchar_t)0x0154,
157 (wchar_t)0x0156, (wchar_t)0x0158, (wchar_t)0x015A, (wchar_t)0x015C, (wchar_t)0x015E, (wchar_t)0x0160, (wchar_t)0x0162, (wchar_t)0x0164, (wchar_t)0x0166,
158 (wchar_t)0x0168, (wchar_t)0x016A, (wchar_t)0x016C, (wchar_t)0x016E, (wchar_t)0x0170, (wchar_t)0x0172, (wchar_t)0x0174, (wchar_t)0x0176, (wchar_t)0x0179,
159 (wchar_t)0x017B, (wchar_t)0x017D, (wchar_t)0x0182, (wchar_t)0x0184, (wchar_t)0x0187, (wchar_t)0x018B, (wchar_t)0x0191, (wchar_t)0x0198, (wchar_t)0x01A0,
160 (wchar_t)0x01A2, (wchar_t)0x01A4, (wchar_t)0x01A7, (wchar_t)0x01AC, (wchar_t)0x01AF, (wchar_t)0x01B3, (wchar_t)0x01B5, (wchar_t)0x01B8, (wchar_t)0x01BC,
161 (wchar_t)0x01C4, (wchar_t)0x01C7, (wchar_t)0x01CA, (wchar_t)0x01CD, (wchar_t)0x01CF, (wchar_t)0x01D1, (wchar_t)0x01D3, (wchar_t)0x01D5, (wchar_t)0x01D7,
162 (wchar_t)0x01D9, (wchar_t)0x01DB, (wchar_t)0x01DE, (wchar_t)0x01E0, (wchar_t)0x01E2, (wchar_t)0x01E4, (wchar_t)0x01E6, (wchar_t)0x01E8, (wchar_t)0x01EA,
163 (wchar_t)0x01EC, (wchar_t)0x01EE, (wchar_t)0x01F1, (wchar_t)0x01F4, (wchar_t)0x01FA, (wchar_t)0x01FC, (wchar_t)0x01FE, (wchar_t)0x0200, (wchar_t)0x0202,
164 (wchar_t)0x0204, (wchar_t)0x0206, (wchar_t)0x0208, (wchar_t)0x020A, (wchar_t)0x020C, (wchar_t)0x020E, (wchar_t)0x0210, (wchar_t)0x0212, (wchar_t)0x0214,
165 (wchar_t)0x0216, (wchar_t)0x0181, (wchar_t)0x0186, (wchar_t)0x018A, (wchar_t)0x018E, (wchar_t)0x018F, (wchar_t)0x0190, (wchar_t)0x0193, (wchar_t)0x0194,
166 (wchar_t)0x0197, (wchar_t)0x0196, (wchar_t)0x019C, (wchar_t)0x019D, (wchar_t)0x019F, (wchar_t)0x01A9, (wchar_t)0x01AE, (wchar_t)0x01B1, (wchar_t)0x01B2,
167 (wchar_t)0x01B7, (wchar_t)0x0386, (wchar_t)0x0388, (wchar_t)0x0389, (wchar_t)0x038A, (wchar_t)0x0391, (wchar_t)0x0392, (wchar_t)0x0393, (wchar_t)0x0394,
168 (wchar_t)0x0395, (wchar_t)0x0396, (wchar_t)0x0397, (wchar_t)0x0398, (wchar_t)0x0399, (wchar_t)0x039A, (wchar_t)0x039B, (wchar_t)0x039C, (wchar_t)0x039D,
169 (wchar_t)0x039E, (wchar_t)0x039F, (wchar_t)0x03A0, (wchar_t)0x03A1, (wchar_t)0x03A3, (wchar_t)0x03A4, (wchar_t)0x03A5, (wchar_t)0x03A6, (wchar_t)0x03A7,
170 (wchar_t)0x03A8, (wchar_t)0x03A9, (wchar_t)0x03AA, (wchar_t)0x03AB, (wchar_t)0x038C, (wchar_t)0x038E, (wchar_t)0x038F, (wchar_t)0x03E2, (wchar_t)0x03E4,
171 (wchar_t)0x03E6, (wchar_t)0x03E8, (wchar_t)0x03EA, (wchar_t)0x03EC, (wchar_t)0x03EE, (wchar_t)0x0410, (wchar_t)0x0411, (wchar_t)0x0412, (wchar_t)0x0413,
172 (wchar_t)0x0414, (wchar_t)0x0415, (wchar_t)0x0416, (wchar_t)0x0417, (wchar_t)0x0418, (wchar_t)0x0419, (wchar_t)0x041A, (wchar_t)0x041B, (wchar_t)0x041C,
173 (wchar_t)0x041D, (wchar_t)0x041E, (wchar_t)0x041F, (wchar_t)0x0420, (wchar_t)0x0421, (wchar_t)0x0422, (wchar_t)0x0423, (wchar_t)0x0424, (wchar_t)0x0425,
174 (wchar_t)0x0426, (wchar_t)0x0427, (wchar_t)0x0428, (wchar_t)0x0429, (wchar_t)0x042A, (wchar_t)0x042B, (wchar_t)0x042C, (wchar_t)0x042D, (wchar_t)0x042E,
175 (wchar_t)0x042F, (wchar_t)0x0401, (wchar_t)0x0402, (wchar_t)0x0403, (wchar_t)0x0404, (wchar_t)0x0405, (wchar_t)0x0406, (wchar_t)0x0407, (wchar_t)0x0408,
176 (wchar_t)0x0409, (wchar_t)0x040A, (wchar_t)0x040B, (wchar_t)0x040C, (wchar_t)0x040E, (wchar_t)0x040F, (wchar_t)0x0460, (wchar_t)0x0462, (wchar_t)0x0464,
177 (wchar_t)0x0466, (wchar_t)0x0468, (wchar_t)0x046A, (wchar_t)0x046C, (wchar_t)0x046E, (wchar_t)0x0470, (wchar_t)0x0472, (wchar_t)0x0474, (wchar_t)0x0476,
178 (wchar_t)0x0478, (wchar_t)0x047A, (wchar_t)0x047C, (wchar_t)0x047E, (wchar_t)0x0480, (wchar_t)0x0490, (wchar_t)0x0492, (wchar_t)0x0494, (wchar_t)0x0496,
179 (wchar_t)0x0498, (wchar_t)0x049A, (wchar_t)0x049C, (wchar_t)0x049E, (wchar_t)0x04A0, (wchar_t)0x04A2, (wchar_t)0x04A4, (wchar_t)0x04A6, (wchar_t)0x04A8,
180 (wchar_t)0x04AA, (wchar_t)0x04AC, (wchar_t)0x04AE, (wchar_t)0x04B0, (wchar_t)0x04B2, (wchar_t)0x04B4, (wchar_t)0x04B6, (wchar_t)0x04B8, (wchar_t)0x04BA,
181 (wchar_t)0x04BC, (wchar_t)0x04BE, (wchar_t)0x04C1, (wchar_t)0x04C3, (wchar_t)0x04C7, (wchar_t)0x04CB, (wchar_t)0x04D0, (wchar_t)0x04D2, (wchar_t)0x04D4,
182 (wchar_t)0x04D6, (wchar_t)0x04D8, (wchar_t)0x04DA, (wchar_t)0x04DC, (wchar_t)0x04DE, (wchar_t)0x04E0, (wchar_t)0x04E2, (wchar_t)0x04E4, (wchar_t)0x04E6,
183 (wchar_t)0x04E8, (wchar_t)0x04EA, (wchar_t)0x04EE, (wchar_t)0x04F0, (wchar_t)0x04F2, (wchar_t)0x04F4, (wchar_t)0x04F8, (wchar_t)0x0531, (wchar_t)0x0532,
184 (wchar_t)0x0533, (wchar_t)0x0534, (wchar_t)0x0535, (wchar_t)0x0536, (wchar_t)0x0537, (wchar_t)0x0538, (wchar_t)0x0539, (wchar_t)0x053A, (wchar_t)0x053B,
185 (wchar_t)0x053C, (wchar_t)0x053D, (wchar_t)0x053E, (wchar_t)0x053F, (wchar_t)0x0540, (wchar_t)0x0541, (wchar_t)0x0542, (wchar_t)0x0543, (wchar_t)0x0544,
186 (wchar_t)0x0545, (wchar_t)0x0546, (wchar_t)0x0547, (wchar_t)0x0548, (wchar_t)0x0549, (wchar_t)0x054A, (wchar_t)0x054B, (wchar_t)0x054C, (wchar_t)0x054D,
187 (wchar_t)0x054E, (wchar_t)0x054F, (wchar_t)0x0550, (wchar_t)0x0551, (wchar_t)0x0552, (wchar_t)0x0553, (wchar_t)0x0554, (wchar_t)0x0555, (wchar_t)0x0556,
188 (wchar_t)0x10A0, (wchar_t)0x10A1, (wchar_t)0x10A2, (wchar_t)0x10A3, (wchar_t)0x10A4, (wchar_t)0x10A5, (wchar_t)0x10A6, (wchar_t)0x10A7, (wchar_t)0x10A8,
189 (wchar_t)0x10A9, (wchar_t)0x10AA, (wchar_t)0x10AB, (wchar_t)0x10AC, (wchar_t)0x10AD, (wchar_t)0x10AE, (wchar_t)0x10AF, (wchar_t)0x10B0, (wchar_t)0x10B1,
190 (wchar_t)0x10B2, (wchar_t)0x10B3, (wchar_t)0x10B4, (wchar_t)0x10B5, (wchar_t)0x10B6, (wchar_t)0x10B7, (wchar_t)0x10B8, (wchar_t)0x10B9, (wchar_t)0x10BA,
191 (wchar_t)0x10BB, (wchar_t)0x10BC, (wchar_t)0x10BD, (wchar_t)0x10BE, (wchar_t)0x10BF, (wchar_t)0x10C0, (wchar_t)0x10C1, (wchar_t)0x10C2, (wchar_t)0x10C3,
192 (wchar_t)0x10C4, (wchar_t)0x10C5, (wchar_t)0x1E00, (wchar_t)0x1E02, (wchar_t)0x1E04, (wchar_t)0x1E06, (wchar_t)0x1E08, (wchar_t)0x1E0A, (wchar_t)0x1E0C,
193 (wchar_t)0x1E0E, (wchar_t)0x1E10, (wchar_t)0x1E12, (wchar_t)0x1E14, (wchar_t)0x1E16, (wchar_t)0x1E18, (wchar_t)0x1E1A, (wchar_t)0x1E1C, (wchar_t)0x1E1E,
194 (wchar_t)0x1E20, (wchar_t)0x1E22, (wchar_t)0x1E24, (wchar_t)0x1E26, (wchar_t)0x1E28, (wchar_t)0x1E2A, (wchar_t)0x1E2C, (wchar_t)0x1E2E, (wchar_t)0x1E30,
195 (wchar_t)0x1E32, (wchar_t)0x1E34, (wchar_t)0x1E36, (wchar_t)0x1E38, (wchar_t)0x1E3A, (wchar_t)0x1E3C, (wchar_t)0x1E3E, (wchar_t)0x1E40, (wchar_t)0x1E42,
196 (wchar_t)0x1E44, (wchar_t)0x1E46, (wchar_t)0x1E48, (wchar_t)0x1E4A, (wchar_t)0x1E4C, (wchar_t)0x1E4E, (wchar_t)0x1E50, (wchar_t)0x1E52, (wchar_t)0x1E54,
197 (wchar_t)0x1E56, (wchar_t)0x1E58, (wchar_t)0x1E5A, (wchar_t)0x1E5C, (wchar_t)0x1E5E, (wchar_t)0x1E60, (wchar_t)0x1E62, (wchar_t)0x1E64, (wchar_t)0x1E66,
198 (wchar_t)0x1E68, (wchar_t)0x1E6A, (wchar_t)0x1E6C, (wchar_t)0x1E6E, (wchar_t)0x1E70, (wchar_t)0x1E72, (wchar_t)0x1E74, (wchar_t)0x1E76, (wchar_t)0x1E78,
199 (wchar_t)0x1E7A, (wchar_t)0x1E7C, (wchar_t)0x1E7E, (wchar_t)0x1E80, (wchar_t)0x1E82, (wchar_t)0x1E84, (wchar_t)0x1E86, (wchar_t)0x1E88, (wchar_t)0x1E8A,
200 (wchar_t)0x1E8C, (wchar_t)0x1E8E, (wchar_t)0x1E90, (wchar_t)0x1E92, (wchar_t)0x1E94, (wchar_t)0x1EA0, (wchar_t)0x1EA2, (wchar_t)0x1EA4, (wchar_t)0x1EA6,
201 (wchar_t)0x1EA8, (wchar_t)0x1EAA, (wchar_t)0x1EAC, (wchar_t)0x1EAE, (wchar_t)0x1EB0, (wchar_t)0x1EB2, (wchar_t)0x1EB4, (wchar_t)0x1EB6, (wchar_t)0x1EB8,
202 (wchar_t)0x1EBA, (wchar_t)0x1EBC, (wchar_t)0x1EBE, (wchar_t)0x1EC0, (wchar_t)0x1EC2, (wchar_t)0x1EC4, (wchar_t)0x1EC6, (wchar_t)0x1EC8, (wchar_t)0x1ECA,
203 (wchar_t)0x1ECC, (wchar_t)0x1ECE, (wchar_t)0x1ED0, (wchar_t)0x1ED2, (wchar_t)0x1ED4, (wchar_t)0x1ED6, (wchar_t)0x1ED8, (wchar_t)0x1EDA, (wchar_t)0x1EDC,
204 (wchar_t)0x1EDE, (wchar_t)0x1EE0, (wchar_t)0x1EE2, (wchar_t)0x1EE4, (wchar_t)0x1EE6, (wchar_t)0x1EE8, (wchar_t)0x1EEA, (wchar_t)0x1EEC, (wchar_t)0x1EEE,
205 (wchar_t)0x1EF0, (wchar_t)0x1EF2, (wchar_t)0x1EF4, (wchar_t)0x1EF6, (wchar_t)0x1EF8, (wchar_t)0x1F08, (wchar_t)0x1F09, (wchar_t)0x1F0A, (wchar_t)0x1F0B,
206 (wchar_t)0x1F0C, (wchar_t)0x1F0D, (wchar_t)0x1F0E, (wchar_t)0x1F0F, (wchar_t)0x1F18, (wchar_t)0x1F19, (wchar_t)0x1F1A, (wchar_t)0x1F1B, (wchar_t)0x1F1C,
207 (wchar_t)0x1F1D, (wchar_t)0x1F28, (wchar_t)0x1F29, (wchar_t)0x1F2A, (wchar_t)0x1F2B, (wchar_t)0x1F2C, (wchar_t)0x1F2D, (wchar_t)0x1F2E, (wchar_t)0x1F2F,
208 (wchar_t)0x1F38, (wchar_t)0x1F39, (wchar_t)0x1F3A, (wchar_t)0x1F3B, (wchar_t)0x1F3C, (wchar_t)0x1F3D, (wchar_t)0x1F3E, (wchar_t)0x1F3F, (wchar_t)0x1F48,
209 (wchar_t)0x1F49, (wchar_t)0x1F4A, (wchar_t)0x1F4B, (wchar_t)0x1F4C, (wchar_t)0x1F4D, (wchar_t)0x1F59, (wchar_t)0x1F5B, (wchar_t)0x1F5D, (wchar_t)0x1F5F,
210 (wchar_t)0x1F68, (wchar_t)0x1F69, (wchar_t)0x1F6A, (wchar_t)0x1F6B, (wchar_t)0x1F6C, (wchar_t)0x1F6D, (wchar_t)0x1F6E, (wchar_t)0x1F6F, (wchar_t)0x1F88,
211 (wchar_t)0x1F89, (wchar_t)0x1F8A, (wchar_t)0x1F8B, (wchar_t)0x1F8C, (wchar_t)0x1F8D, (wchar_t)0x1F8E, (wchar_t)0x1F8F, (wchar_t)0x1F98, (wchar_t)0x1F99,
212 (wchar_t)0x1F9A, (wchar_t)0x1F9B, (wchar_t)0x1F9C, (wchar_t)0x1F9D, (wchar_t)0x1F9E, (wchar_t)0x1F9F, (wchar_t)0x1FA8, (wchar_t)0x1FA9, (wchar_t)0x1FAA,
213 (wchar_t)0x1FAB, (wchar_t)0x1FAC, (wchar_t)0x1FAD, (wchar_t)0x1FAE, (wchar_t)0x1FAF, (wchar_t)0x1FB8, (wchar_t)0x1FB9, (wchar_t)0x1FD8, (wchar_t)0x1FD9,
214 (wchar_t)0x1FE8, (wchar_t)0x1FE9, (wchar_t)0x24B6, (wchar_t)0x24B7, (wchar_t)0x24B8, (wchar_t)0x24B9, (wchar_t)0x24BA, (wchar_t)0x24BB, (wchar_t)0x24BC,
215 (wchar_t)0x24BD, (wchar_t)0x24BE, (wchar_t)0x24BF, (wchar_t)0x24C0, (wchar_t)0x24C1, (wchar_t)0x24C2, (wchar_t)0x24C3, (wchar_t)0x24C4, (wchar_t)0x24C5,
216 (wchar_t)0x24C6, (wchar_t)0x24C7, (wchar_t)0x24C8, (wchar_t)0x24C9, (wchar_t)0x24CA, (wchar_t)0x24CB, (wchar_t)0x24CC, (wchar_t)0x24CD, (wchar_t)0x24CE,
217 (wchar_t)0x24CF, (wchar_t)0xFF21, (wchar_t)0xFF22, (wchar_t)0xFF23, (wchar_t)0xFF24, (wchar_t)0xFF25, (wchar_t)0xFF26, (wchar_t)0xFF27, (wchar_t)0xFF28,
218 (wchar_t)0xFF29, (wchar_t)0xFF2A, (wchar_t)0xFF2B, (wchar_t)0xFF2C, (wchar_t)0xFF2D, (wchar_t)0xFF2E, (wchar_t)0xFF2F, (wchar_t)0xFF30, (wchar_t)0xFF31,
219 (wchar_t)0xFF32, (wchar_t)0xFF33, (wchar_t)0xFF34, (wchar_t)0xFF35, (wchar_t)0xFF36, (wchar_t)0xFF37, (wchar_t)0xFF38, (wchar_t)0xFF39, (wchar_t)0xFF3A
220 };
221
222
FormatV(const char * fmt,va_list args)223 std::string StringUtils::FormatV(const char *fmt, va_list args)
224 {
225 if (!fmt || !fmt[0])
226 return "";
227
228 int size = FORMAT_BLOCK_SIZE;
229 va_list argCopy;
230
231 while (true)
232 {
233 char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
234 if (!cstr)
235 return "";
236
237 va_copy(argCopy, args);
238 int nActual = vsnprintf(cstr, size, fmt, argCopy);
239 va_end(argCopy);
240
241 if (nActual > -1 && nActual < size) // We got a valid result
242 {
243 std::string str(cstr, nActual);
244 free(cstr);
245 return str;
246 }
247 free(cstr);
248 #ifndef TARGET_WINDOWS
249 if (nActual > -1) // Exactly what we will need (glibc 2.1)
250 size = nActual + 1;
251 else // Let's try to double the size (glibc 2.0)
252 size *= 2;
253 #else // TARGET_WINDOWS
254 va_copy(argCopy, args);
255 size = _vscprintf(fmt, argCopy);
256 va_end(argCopy);
257 if (size < 0)
258 return "";
259 else
260 size++; // increment for null-termination
261 #endif // TARGET_WINDOWS
262 }
263
264 return ""; // unreachable
265 }
266
FormatV(const wchar_t * fmt,va_list args)267 std::wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
268 {
269 if (!fmt || !fmt[0])
270 return L"";
271
272 int size = FORMAT_BLOCK_SIZE;
273 va_list argCopy;
274
275 while (true)
276 {
277 wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
278 if (!cstr)
279 return L"";
280
281 va_copy(argCopy, args);
282 int nActual = vswprintf(cstr, size, fmt, argCopy);
283 va_end(argCopy);
284
285 if (nActual > -1 && nActual < size) // We got a valid result
286 {
287 std::wstring str(cstr, nActual);
288 free(cstr);
289 return str;
290 }
291 free(cstr);
292
293 #ifndef TARGET_WINDOWS
294 if (nActual > -1) // Exactly what we will need (glibc 2.1)
295 size = nActual + 1;
296 else // Let's try to double the size (glibc 2.0)
297 size *= 2;
298 #else // TARGET_WINDOWS
299 va_copy(argCopy, args);
300 size = _vscwprintf(fmt, argCopy);
301 va_end(argCopy);
302 if (size < 0)
303 return L"";
304 else
305 size++; // increment for null-termination
306 #endif // TARGET_WINDOWS
307 }
308
309 return L"";
310 }
311
compareWchar(const void * a,const void * b)312 int compareWchar (const void* a, const void* b)
313 {
314 if (*(const wchar_t*)a < *(const wchar_t*)b)
315 return -1;
316 else if (*(const wchar_t*)a > *(const wchar_t*)b)
317 return 1;
318 return 0;
319 }
320
tolowerUnicode(const wchar_t & c)321 wchar_t tolowerUnicode(const wchar_t& c)
322 {
323 wchar_t* p = (wchar_t*) bsearch (&c, unicode_uppers, sizeof(unicode_uppers) / sizeof(wchar_t), sizeof(wchar_t), compareWchar);
324 if (p)
325 return *(unicode_lowers + (p - unicode_uppers));
326
327 return c;
328 }
329
toupperUnicode(const wchar_t & c)330 wchar_t toupperUnicode(const wchar_t& c)
331 {
332 wchar_t* p = (wchar_t*) bsearch (&c, unicode_lowers, sizeof(unicode_lowers) / sizeof(wchar_t), sizeof(wchar_t), compareWchar);
333 if (p)
334 return *(unicode_uppers + (p - unicode_lowers));
335
336 return c;
337 }
338
ToUpper(std::string & str)339 void StringUtils::ToUpper(std::string &str)
340 {
341 std::transform(str.begin(), str.end(), str.begin(), ::toupper);
342 }
343
ToUpper(std::wstring & str)344 void StringUtils::ToUpper(std::wstring &str)
345 {
346 transform(str.begin(), str.end(), str.begin(), toupperUnicode);
347 }
348
ToLower(std::string & str)349 void StringUtils::ToLower(std::string &str)
350 {
351 transform(str.begin(), str.end(), str.begin(), ::tolower);
352 }
353
ToLower(std::wstring & str)354 void StringUtils::ToLower(std::wstring &str)
355 {
356 transform(str.begin(), str.end(), str.begin(), tolowerUnicode);
357 }
358
ToCapitalize(std::string & str)359 void StringUtils::ToCapitalize(std::string &str)
360 {
361 std::wstring wstr;
362 g_charsetConverter.utf8ToW(str, wstr);
363 ToCapitalize(wstr);
364 g_charsetConverter.wToUTF8(wstr, str);
365 }
366
ToCapitalize(std::wstring & str)367 void StringUtils::ToCapitalize(std::wstring &str)
368 {
369 const std::locale& loc = g_langInfo.GetSystemLocale();
370 bool isFirstLetter = true;
371 for (std::wstring::iterator it = str.begin(); it < str.end(); ++it)
372 {
373 // capitalize after spaces and punctuation characters (except apostrophes)
374 if (std::isspace(*it, loc) || (std::ispunct(*it, loc) && *it != '\''))
375 isFirstLetter = true;
376 else if (isFirstLetter)
377 {
378 *it = std::toupper(*it, loc);
379 isFirstLetter = false;
380 }
381 }
382 }
383
EqualsNoCase(const std::string & str1,const std::string & str2)384 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
385 {
386 // before we do the char-by-char comparison, first compare sizes of both strings.
387 // This led to a 33% improvement in benchmarking on average. (size() just returns a member of std::string)
388 if (str1.size() != str2.size())
389 return false;
390 return EqualsNoCase(str1.c_str(), str2.c_str());
391 }
392
EqualsNoCase(const std::string & str1,const char * s2)393 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
394 {
395 return EqualsNoCase(str1.c_str(), s2);
396 }
397
EqualsNoCase(const char * s1,const char * s2)398 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
399 {
400 char c2; // we need only one char outside the loop
401 do
402 {
403 const char c1 = *s1++; // const local variable should help compiler to optimize
404 c2 = *s2++;
405 if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
406 return false;
407 } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
408 return true;
409 }
410
CompareNoCase(const std::string & str1,const std::string & str2,size_t n)411 int StringUtils::CompareNoCase(const std::string& str1, const std::string& str2, size_t n /* = 0 */)
412 {
413 return CompareNoCase(str1.c_str(), str2.c_str(), n);
414 }
415
CompareNoCase(const char * s1,const char * s2,size_t n)416 int StringUtils::CompareNoCase(const char* s1, const char* s2, size_t n /* = 0 */)
417 {
418 char c2; // we need only one char outside the loop
419 size_t index = 0;
420 do
421 {
422 const char c1 = *s1++; // const local variable should help compiler to optimize
423 c2 = *s2++;
424 index++;
425 if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
426 return ::tolower(c1) - ::tolower(c2);
427 } while (c2 != '\0' &&
428 index != n); // At this point, we know c1 == c2, so there's no need to test them both.
429 return 0;
430 }
431
Left(const std::string & str,size_t count)432 std::string StringUtils::Left(const std::string &str, size_t count)
433 {
434 count = std::max((size_t)0, std::min(count, str.size()));
435 return str.substr(0, count);
436 }
437
Mid(const std::string & str,size_t first,size_t count)438 std::string StringUtils::Mid(const std::string &str, size_t first, size_t count /* = string::npos */)
439 {
440 if (first + count > str.size())
441 count = str.size() - first;
442
443 if (first > str.size())
444 return std::string();
445
446 assert(first + count <= str.size());
447
448 return str.substr(first, count);
449 }
450
Right(const std::string & str,size_t count)451 std::string StringUtils::Right(const std::string &str, size_t count)
452 {
453 count = std::max((size_t)0, std::min(count, str.size()));
454 return str.substr(str.size() - count);
455 }
456
Trim(std::string & str)457 std::string& StringUtils::Trim(std::string &str)
458 {
459 TrimLeft(str);
460 return TrimRight(str);
461 }
462
Trim(std::string & str,const char * const chars)463 std::string& StringUtils::Trim(std::string &str, const char* const chars)
464 {
465 TrimLeft(str, chars);
466 return TrimRight(str, chars);
467 }
468
469 // hack to check only first byte of UTF-8 character
470 // without this hack "TrimX" functions failed on Win32 and OS X with UTF-8 strings
isspace_c(char c)471 static int isspace_c(char c)
472 {
473 return (c & 0x80) == 0 && ::isspace(c);
474 }
475
TrimLeft(std::string & str)476 std::string& StringUtils::TrimLeft(std::string &str)
477 {
478 str.erase(str.begin(),
479 std::find_if(str.begin(), str.end(), [](char s) { return isspace_c(s) == 0; }));
480 return str;
481 }
482
TrimLeft(std::string & str,const char * const chars)483 std::string& StringUtils::TrimLeft(std::string &str, const char* const chars)
484 {
485 size_t nidx = str.find_first_not_of(chars);
486 str.erase(0, nidx);
487 return str;
488 }
489
TrimRight(std::string & str)490 std::string& StringUtils::TrimRight(std::string &str)
491 {
492 str.erase(std::find_if(str.rbegin(), str.rend(), [](char s) { return isspace_c(s) == 0; }).base(),
493 str.end());
494 return str;
495 }
496
TrimRight(std::string & str,const char * const chars)497 std::string& StringUtils::TrimRight(std::string &str, const char* const chars)
498 {
499 size_t nidx = str.find_last_not_of(chars);
500 str.erase(str.npos == nidx ? 0 : ++nidx);
501 return str;
502 }
503
ReturnDigits(const std::string & str)504 int StringUtils::ReturnDigits(const std::string& str)
505 {
506 std::stringstream ss;
507 for (const auto& character : str)
508 {
509 if (isdigit(character))
510 ss << character;
511 }
512 return atoi(ss.str().c_str());
513 }
514
RemoveDuplicatedSpacesAndTabs(std::string & str)515 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
516 {
517 std::string::iterator it = str.begin();
518 bool onSpace = false;
519 while(it != str.end())
520 {
521 if (*it == '\t')
522 *it = ' ';
523
524 if (*it == ' ')
525 {
526 if (onSpace)
527 {
528 it = str.erase(it);
529 continue;
530 }
531 else
532 onSpace = true;
533 }
534 else
535 onSpace = false;
536
537 ++it;
538 }
539 return str;
540 }
541
Replace(std::string & str,char oldChar,char newChar)542 int StringUtils::Replace(std::string &str, char oldChar, char newChar)
543 {
544 int replacedChars = 0;
545 for (std::string::iterator it = str.begin(); it != str.end(); ++it)
546 {
547 if (*it == oldChar)
548 {
549 *it = newChar;
550 replacedChars++;
551 }
552 }
553
554 return replacedChars;
555 }
556
Replace(std::string & str,const std::string & oldStr,const std::string & newStr)557 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
558 {
559 if (oldStr.empty())
560 return 0;
561
562 int replacedChars = 0;
563 size_t index = 0;
564
565 while (index < str.size() && (index = str.find(oldStr, index)) != std::string::npos)
566 {
567 str.replace(index, oldStr.size(), newStr);
568 index += newStr.size();
569 replacedChars++;
570 }
571
572 return replacedChars;
573 }
574
Replace(std::wstring & str,const std::wstring & oldStr,const std::wstring & newStr)575 int StringUtils::Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr)
576 {
577 if (oldStr.empty())
578 return 0;
579
580 int replacedChars = 0;
581 size_t index = 0;
582
583 while (index < str.size() && (index = str.find(oldStr, index)) != std::string::npos)
584 {
585 str.replace(index, oldStr.size(), newStr);
586 index += newStr.size();
587 replacedChars++;
588 }
589
590 return replacedChars;
591 }
592
StartsWith(const std::string & str1,const std::string & str2)593 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
594 {
595 return str1.compare(0, str2.size(), str2) == 0;
596 }
597
StartsWith(const std::string & str1,const char * s2)598 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
599 {
600 return StartsWith(str1.c_str(), s2);
601 }
602
StartsWith(const char * s1,const char * s2)603 bool StringUtils::StartsWith(const char *s1, const char *s2)
604 {
605 while (*s2 != '\0')
606 {
607 if (*s1 != *s2)
608 return false;
609 s1++;
610 s2++;
611 }
612 return true;
613 }
614
StartsWithNoCase(const std::string & str1,const std::string & str2)615 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
616 {
617 return StartsWithNoCase(str1.c_str(), str2.c_str());
618 }
619
StartsWithNoCase(const std::string & str1,const char * s2)620 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
621 {
622 return StartsWithNoCase(str1.c_str(), s2);
623 }
624
StartsWithNoCase(const char * s1,const char * s2)625 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
626 {
627 while (*s2 != '\0')
628 {
629 if (::tolower(*s1) != ::tolower(*s2))
630 return false;
631 s1++;
632 s2++;
633 }
634 return true;
635 }
636
EndsWith(const std::string & str1,const std::string & str2)637 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
638 {
639 if (str1.size() < str2.size())
640 return false;
641 return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
642 }
643
EndsWith(const std::string & str1,const char * s2)644 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
645 {
646 size_t len2 = strlen(s2);
647 if (str1.size() < len2)
648 return false;
649 return str1.compare(str1.size() - len2, len2, s2) == 0;
650 }
651
EndsWithNoCase(const std::string & str1,const std::string & str2)652 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
653 {
654 if (str1.size() < str2.size())
655 return false;
656 const char *s1 = str1.c_str() + str1.size() - str2.size();
657 const char *s2 = str2.c_str();
658 while (*s2 != '\0')
659 {
660 if (::tolower(*s1) != ::tolower(*s2))
661 return false;
662 s1++;
663 s2++;
664 }
665 return true;
666 }
667
EndsWithNoCase(const std::string & str1,const char * s2)668 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
669 {
670 size_t len2 = strlen(s2);
671 if (str1.size() < len2)
672 return false;
673 const char *s1 = str1.c_str() + str1.size() - len2;
674 while (*s2 != '\0')
675 {
676 if (::tolower(*s1) != ::tolower(*s2))
677 return false;
678 s1++;
679 s2++;
680 }
681 return true;
682 }
683
Split(const std::string & input,const std::string & delimiter,unsigned int iMaxStrings)684 std::vector<std::string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings)
685 {
686 std::vector<std::string> result;
687 SplitTo(std::back_inserter(result), input, delimiter, iMaxStrings);
688 return result;
689 }
690
Split(const std::string & input,const char delimiter,size_t iMaxStrings)691 std::vector<std::string> StringUtils::Split(const std::string& input, const char delimiter, size_t iMaxStrings)
692 {
693 std::vector<std::string> result;
694 SplitTo(std::back_inserter(result), input, delimiter, iMaxStrings);
695 return result;
696 }
697
Split(const std::string & input,const std::vector<std::string> & delimiters)698 std::vector<std::string> StringUtils::Split(const std::string& input, const std::vector<std::string>& delimiters)
699 {
700 std::vector<std::string> result;
701 SplitTo(std::back_inserter(result), input, delimiters);
702 return result;
703 }
704
SplitMulti(const std::vector<std::string> & input,const std::vector<std::string> & delimiters,size_t iMaxStrings)705 std::vector<std::string> StringUtils::SplitMulti(const std::vector<std::string>& input,
706 const std::vector<std::string>& delimiters,
707 size_t iMaxStrings /* = 0 */)
708 {
709 if (input.empty())
710 return std::vector<std::string>();
711
712 std::vector<std::string> results(input);
713
714 if (delimiters.empty() || (iMaxStrings > 0 && iMaxStrings <= input.size()))
715 return results;
716
717 std::vector<std::string> strings1;
718 if (iMaxStrings == 0)
719 {
720 for (size_t di = 0; di < delimiters.size(); di++)
721 {
722 for (size_t i = 0; i < results.size(); i++)
723 {
724 std::vector<std::string> substrings = StringUtils::Split(results[i], delimiters[di]);
725 for (size_t j = 0; j < substrings.size(); j++)
726 strings1.push_back(substrings[j]);
727 }
728 results = strings1;
729 strings1.clear();
730 }
731 return results;
732 }
733
734 // Control the number of strings input is split into, keeping the original strings.
735 // Note iMaxStrings > input.size()
736 int64_t iNew = iMaxStrings - results.size();
737 for (size_t di = 0; di < delimiters.size(); di++)
738 {
739 for (size_t i = 0; i < results.size(); i++)
740 {
741 if (iNew > 0)
742 {
743 std::vector<std::string> substrings = StringUtils::Split(results[i], delimiters[di], iNew + 1);
744 iNew = iNew - substrings.size() + 1;
745 for (size_t j = 0; j < substrings.size(); j++)
746 strings1.push_back(substrings[j]);
747 }
748 else
749 strings1.push_back(results[i]);
750 }
751 results = strings1;
752 iNew = iMaxStrings - results.size();
753 strings1.clear();
754 if ((iNew <= 0))
755 break; //Stop trying any more delimiters
756 }
757 return results;
758 }
759
760 // returns the number of occurrences of strFind in strInput.
FindNumber(const std::string & strInput,const std::string & strFind)761 int StringUtils::FindNumber(const std::string& strInput, const std::string &strFind)
762 {
763 size_t pos = strInput.find(strFind, 0);
764 int numfound = 0;
765 while (pos != std::string::npos)
766 {
767 numfound++;
768 pos = strInput.find(strFind, pos + 1);
769 }
770 return numfound;
771 }
772
773 // Plane maps for MySQL utf8_general_ci (now known as utf8mb3_general_ci) collation
774 // Derived from https://github.com/MariaDB/server/blob/10.5/strings/ctype-utf8.c
775
776 // clang-format off
777 static const uint16_t plane00[] = {
778 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
779 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
780 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
781 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
782 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
783 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
784 0x0060, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
785 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
786 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
787 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
788 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
789 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x039C, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
790 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
791 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00D7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0053,
792 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00C6, 0x0043, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049,
793 0x00D0, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x00F7, 0x00D8, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00DE, 0x0059
794 };
795
796 static const uint16_t plane01[] = {
797 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0043, 0x0044, 0x0044,
798 0x0110, 0x0110, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0047, 0x0047, 0x0047, 0x0047,
799 0x0047, 0x0047, 0x0047, 0x0047, 0x0048, 0x0048, 0x0126, 0x0126, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049, 0x0049,
800 0x0049, 0x0049, 0x0132, 0x0132, 0x004A, 0x004A, 0x004B, 0x004B, 0x0138, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x013F,
801 0x013F, 0x0141, 0x0141, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x0149, 0x014A, 0x014A, 0x004F, 0x004F, 0x004F, 0x004F,
802 0x004F, 0x004F, 0x0152, 0x0152, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053,
803 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0166, 0x0166, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
804 0x0055, 0x0055, 0x0055, 0x0055, 0x0057, 0x0057, 0x0059, 0x0059, 0x0059, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0053,
805 0x0180, 0x0181, 0x0182, 0x0182, 0x0184, 0x0184, 0x0186, 0x0187, 0x0187, 0x0189, 0x018A, 0x018B, 0x018B, 0x018D, 0x018E, 0x018F,
806 0x0190, 0x0191, 0x0191, 0x0193, 0x0194, 0x01F6, 0x0196, 0x0197, 0x0198, 0x0198, 0x019A, 0x019B, 0x019C, 0x019D, 0x019E, 0x019F,
807 0x004F, 0x004F, 0x01A2, 0x01A2, 0x01A4, 0x01A4, 0x01A6, 0x01A7, 0x01A7, 0x01A9, 0x01AA, 0x01AB, 0x01AC, 0x01AC, 0x01AE, 0x0055,
808 0x0055, 0x01B1, 0x01B2, 0x01B3, 0x01B3, 0x01B5, 0x01B5, 0x01B7, 0x01B8, 0x01B8, 0x01BA, 0x01BB, 0x01BC, 0x01BC, 0x01BE, 0x01F7,
809 0x01C0, 0x01C1, 0x01C2, 0x01C3, 0x01C4, 0x01C4, 0x01C4, 0x01C7, 0x01C7, 0x01C7, 0x01CA, 0x01CA, 0x01CA, 0x0041, 0x0041, 0x0049,
810 0x0049, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x018E, 0x0041, 0x0041,
811 0x0041, 0x0041, 0x00C6, 0x00C6, 0x01E4, 0x01E4, 0x0047, 0x0047, 0x004B, 0x004B, 0x004F, 0x004F, 0x004F, 0x004F, 0x01B7, 0x01B7,
812 0x004A, 0x01F1, 0x01F1, 0x01F1, 0x0047, 0x0047, 0x01F6, 0x01F7, 0x004E, 0x004E, 0x0041, 0x0041, 0x00C6, 0x00C6, 0x00D8, 0x00D8
813 };
814
815 static const uint16_t plane02[] = {
816 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
817 0x0052, 0x0052, 0x0052, 0x0052, 0x0055, 0x0055, 0x0055, 0x0055, 0x0053, 0x0053, 0x0054, 0x0054, 0x021C, 0x021C, 0x0048, 0x0048,
818 0x0220, 0x0221, 0x0222, 0x0222, 0x0224, 0x0224, 0x0041, 0x0041, 0x0045, 0x0045, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
819 0x004F, 0x004F, 0x0059, 0x0059, 0x0234, 0x0235, 0x0236, 0x0237, 0x0238, 0x0239, 0x023A, 0x023B, 0x023C, 0x023D, 0x023E, 0x023F,
820 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 0x0248, 0x0249, 0x024A, 0x024B, 0x024C, 0x024D, 0x024E, 0x024F,
821 0x0250, 0x0251, 0x0252, 0x0181, 0x0186, 0x0255, 0x0189, 0x018A, 0x0258, 0x018F, 0x025A, 0x0190, 0x025C, 0x025D, 0x025E, 0x025F,
822 0x0193, 0x0261, 0x0262, 0x0194, 0x0264, 0x0265, 0x0266, 0x0267, 0x0197, 0x0196, 0x026A, 0x026B, 0x026C, 0x026D, 0x026E, 0x019C,
823 0x0270, 0x0271, 0x019D, 0x0273, 0x0274, 0x019F, 0x0276, 0x0277, 0x0278, 0x0279, 0x027A, 0x027B, 0x027C, 0x027D, 0x027E, 0x027F,
824 0x01A6, 0x0281, 0x0282, 0x01A9, 0x0284, 0x0285, 0x0286, 0x0287, 0x01AE, 0x0289, 0x01B1, 0x01B2, 0x028C, 0x028D, 0x028E, 0x028F,
825 0x0290, 0x0291, 0x01B7, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 0x0298, 0x0299, 0x029A, 0x029B, 0x029C, 0x029D, 0x029E, 0x029F,
826 0x02A0, 0x02A1, 0x02A2, 0x02A3, 0x02A4, 0x02A5, 0x02A6, 0x02A7, 0x02A8, 0x02A9, 0x02AA, 0x02AB, 0x02AC, 0x02AD, 0x02AE, 0x02AF,
827 0x02B0, 0x02B1, 0x02B2, 0x02B3, 0x02B4, 0x02B5, 0x02B6, 0x02B7, 0x02B8, 0x02B9, 0x02BA, 0x02BB, 0x02BC, 0x02BD, 0x02BE, 0x02BF,
828 0x02C0, 0x02C1, 0x02C2, 0x02C3, 0x02C4, 0x02C5, 0x02C6, 0x02C7, 0x02C8, 0x02C9, 0x02CA, 0x02CB, 0x02CC, 0x02CD, 0x02CE, 0x02CF,
829 0x02D0, 0x02D1, 0x02D2, 0x02D3, 0x02D4, 0x02D5, 0x02D6, 0x02D7, 0x02D8, 0x02D9, 0x02DA, 0x02DB, 0x02DC, 0x02DD, 0x02DE, 0x02DF,
830 0x02E0, 0x02E1, 0x02E2, 0x02E3, 0x02E4, 0x02E5, 0x02E6, 0x02E7, 0x02E8, 0x02E9, 0x02EA, 0x02EB, 0x02EC, 0x02ED, 0x02EE, 0x02EF,
831 0x02F0, 0x02F1, 0x02F2, 0x02F3, 0x02F4, 0x02F5, 0x02F6, 0x02F7, 0x02F8, 0x02F9, 0x02FA, 0x02FB, 0x02FC, 0x02FD, 0x02FE, 0x02FF
832 };
833
834 static const uint16_t plane03[] = {
835 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030D, 0x030E, 0x030F,
836 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 0x0318, 0x0319, 0x031A, 0x031B, 0x031C, 0x031D, 0x031E, 0x031F,
837 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 0x0328, 0x0329, 0x032A, 0x032B, 0x032C, 0x032D, 0x032E, 0x032F,
838 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 0x0338, 0x0339, 0x033A, 0x033B, 0x033C, 0x033D, 0x033E, 0x033F,
839 0x0340, 0x0341, 0x0342, 0x0343, 0x0344, 0x0399, 0x0346, 0x0347, 0x0348, 0x0349, 0x034A, 0x034B, 0x034C, 0x034D, 0x034E, 0x034F,
840 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 0x0358, 0x0359, 0x035A, 0x035B, 0x035C, 0x035D, 0x035E, 0x035F,
841 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 0x0368, 0x0369, 0x036A, 0x036B, 0x036C, 0x036D, 0x036E, 0x036F,
842 0x0370, 0x0371, 0x0372, 0x0373, 0x0374, 0x0375, 0x0376, 0x0377, 0x0378, 0x0379, 0x037A, 0x037B, 0x037C, 0x037D, 0x037E, 0x037F,
843 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0391, 0x0387, 0x0395, 0x0397, 0x0399, 0x038B, 0x039F, 0x038D, 0x03A5, 0x03A9,
844 0x0399, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
845 0x03A0, 0x03A1, 0x03A2, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x0391, 0x0395, 0x0397, 0x0399,
846 0x03A5, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
847 0x03A0, 0x03A1, 0x03A3, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x0399, 0x03A5, 0x039F, 0x03A5, 0x03A9, 0x03CF,
848 0x0392, 0x0398, 0x03D2, 0x03D2, 0x03D2, 0x03A6, 0x03A0, 0x03D7, 0x03D8, 0x03D9, 0x03DA, 0x03DA, 0x03DC, 0x03DC, 0x03DE, 0x03DE,
849 0x03E0, 0x03E0, 0x03E2, 0x03E2, 0x03E4, 0x03E4, 0x03E6, 0x03E6, 0x03E8, 0x03E8, 0x03EA, 0x03EA, 0x03EC, 0x03EC, 0x03EE, 0x03EE,
850 0x039A, 0x03A1, 0x03A3, 0x03F3, 0x03F4, 0x03F5, 0x03F6, 0x03F7, 0x03F8, 0x03F9, 0x03FA, 0x03FB, 0x03FC, 0x03FD, 0x03FE, 0x03FF
851 };
852
853 static const uint16_t plane04[] = {
854 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
855 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
856 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
857 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
858 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
859 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 0x0408, 0x0409, 0x040A, 0x040B, 0x041A, 0x0418, 0x0423, 0x040F,
860 0x0460, 0x0460, 0x0462, 0x0462, 0x0464, 0x0464, 0x0466, 0x0466, 0x0468, 0x0468, 0x046A, 0x046A, 0x046C, 0x046C, 0x046E, 0x046E,
861 0x0470, 0x0470, 0x0472, 0x0472, 0x0474, 0x0474, 0x0474, 0x0474, 0x0478, 0x0478, 0x047A, 0x047A, 0x047C, 0x047C, 0x047E, 0x047E,
862 0x0480, 0x0480, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 0x0488, 0x0489, 0x048A, 0x048B, 0x048C, 0x048C, 0x048E, 0x048E,
863 0x0490, 0x0490, 0x0492, 0x0492, 0x0494, 0x0494, 0x0496, 0x0496, 0x0498, 0x0498, 0x049A, 0x049A, 0x049C, 0x049C, 0x049E, 0x049E,
864 0x04A0, 0x04A0, 0x04A2, 0x04A2, 0x04A4, 0x04A4, 0x04A6, 0x04A6, 0x04A8, 0x04A8, 0x04AA, 0x04AA, 0x04AC, 0x04AC, 0x04AE, 0x04AE,
865 0x04B0, 0x04B0, 0x04B2, 0x04B2, 0x04B4, 0x04B4, 0x04B6, 0x04B6, 0x04B8, 0x04B8, 0x04BA, 0x04BA, 0x04BC, 0x04BC, 0x04BE, 0x04BE,
866 0x04C0, 0x0416, 0x0416, 0x04C3, 0x04C3, 0x04C5, 0x04C6, 0x04C7, 0x04C7, 0x04C9, 0x04CA, 0x04CB, 0x04CB, 0x04CD, 0x04CE, 0x04CF,
867 0x0410, 0x0410, 0x0410, 0x0410, 0x04D4, 0x04D4, 0x0415, 0x0415, 0x04D8, 0x04D8, 0x04D8, 0x04D8, 0x0416, 0x0416, 0x0417, 0x0417,
868 0x04E0, 0x04E0, 0x0418, 0x0418, 0x0418, 0x0418, 0x041E, 0x041E, 0x04E8, 0x04E8, 0x04E8, 0x04E8, 0x042D, 0x042D, 0x0423, 0x0423,
869 0x0423, 0x0423, 0x0423, 0x0423, 0x0427, 0x0427, 0x04F6, 0x04F7, 0x042B, 0x042B, 0x04FA, 0x04FB, 0x04FC, 0x04FD, 0x04FE, 0x04FF
870 };
871
872 static const uint16_t plane05[] = {
873 0x0500, 0x0501, 0x0502, 0x0503, 0x0504, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, 0x050A, 0x050B, 0x050C, 0x050D, 0x050E, 0x050F,
874 0x0510, 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0516, 0x0517, 0x0518, 0x0519, 0x051A, 0x051B, 0x051C, 0x051D, 0x051E, 0x051F,
875 0x0520, 0x0521, 0x0522, 0x0523, 0x0524, 0x0525, 0x0526, 0x0527, 0x0528, 0x0529, 0x052A, 0x052B, 0x052C, 0x052D, 0x052E, 0x052F,
876 0x0530, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
877 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
878 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0557, 0x0558, 0x0559, 0x055A, 0x055B, 0x055C, 0x055D, 0x055E, 0x055F,
879 0x0560, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F,
880 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C, 0x054D, 0x054E, 0x054F,
881 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0587, 0x0588, 0x0589, 0x058A, 0x058B, 0x058C, 0x058D, 0x058E, 0x058F,
882 0x0590, 0x0591, 0x0592, 0x0593, 0x0594, 0x0595, 0x0596, 0x0597, 0x0598, 0x0599, 0x059A, 0x059B, 0x059C, 0x059D, 0x059E, 0x059F,
883 0x05A0, 0x05A1, 0x05A2, 0x05A3, 0x05A4, 0x05A5, 0x05A6, 0x05A7, 0x05A8, 0x05A9, 0x05AA, 0x05AB, 0x05AC, 0x05AD, 0x05AE, 0x05AF,
884 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BA, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
885 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05C4, 0x05C5, 0x05C6, 0x05C7, 0x05C8, 0x05C9, 0x05CA, 0x05CB, 0x05CC, 0x05CD, 0x05CE, 0x05CF,
886 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
887 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x05EB, 0x05EC, 0x05ED, 0x05EE, 0x05EF,
888 0x05F0, 0x05F1, 0x05F2, 0x05F3, 0x05F4, 0x05F5, 0x05F6, 0x05F7, 0x05F8, 0x05F9, 0x05FA, 0x05FB, 0x05FC, 0x05FD, 0x05FE, 0x05FF
889 };
890
891 static const uint16_t plane1E[] = {
892 0x0041, 0x0041, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0042, 0x0043, 0x0043, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044, 0x0044,
893 0x0044, 0x0044, 0x0044, 0x0044, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0046, 0x0046,
894 0x0047, 0x0047, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0048, 0x0049, 0x0049, 0x0049, 0x0049,
895 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004B, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004C, 0x004D, 0x004D,
896 0x004D, 0x004D, 0x004D, 0x004D, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004E, 0x004F, 0x004F, 0x004F, 0x004F,
897 0x004F, 0x004F, 0x004F, 0x004F, 0x0050, 0x0050, 0x0050, 0x0050, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052, 0x0052,
898 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0053, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054, 0x0054,
899 0x0054, 0x0054, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0056, 0x0056, 0x0056, 0x0056,
900 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0057, 0x0058, 0x0058, 0x0058, 0x0058, 0x0059, 0x0059,
901 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x005A, 0x0048, 0x0054, 0x0057, 0x0059, 0x1E9A, 0x0053, 0x1E9C, 0x1E9D, 0x1E9E, 0x1E9F,
902 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041,
903 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045,
904 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004F, 0x004F, 0x004F, 0x004F,
905 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F, 0x004F,
906 0x004F, 0x004F, 0x004F, 0x004F, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055, 0x0055,
907 0x0055, 0x0055, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x0059, 0x1EFA, 0x1EFB, 0x1EFC, 0x1EFD, 0x1EFE, 0x1EFF
908 };
909
910 static const uint16_t plane1F[] = {
911 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
912 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F16, 0x1F17, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x0395, 0x1F1E, 0x1F1F,
913 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
914 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399,
915 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F46, 0x1F47, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x039F, 0x1F4E, 0x1F4F,
916 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1F58, 0x03A5, 0x1F5A, 0x03A5, 0x1F5C, 0x03A5, 0x1F5E, 0x03A5,
917 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
918 0x0391, 0x1FBB, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0399, 0x1FDB, 0x039F, 0x1FF9, 0x03A5, 0x1FEB, 0x03A9, 0x1FFB, 0x1F7E, 0x1F7F,
919 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391,
920 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397, 0x0397,
921 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9, 0x03A9,
922 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FB5, 0x0391, 0x0391, 0x0391, 0x0391, 0x0391, 0x1FBB, 0x0391, 0x1FBD, 0x0399, 0x1FBF,
923 0x1FC0, 0x1FC1, 0x0397, 0x0397, 0x0397, 0x1FC5, 0x0397, 0x0397, 0x0395, 0x1FC9, 0x0397, 0x1FCB, 0x0397, 0x1FCD, 0x1FCE, 0x1FCF,
924 0x0399, 0x0399, 0x0399, 0x1FD3, 0x1FD4, 0x1FD5, 0x0399, 0x0399, 0x0399, 0x0399, 0x0399, 0x1FDB, 0x1FDC, 0x1FDD, 0x1FDE, 0x1FDF,
925 0x03A5, 0x03A5, 0x03A5, 0x1FE3, 0x03A1, 0x03A1, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x03A5, 0x1FEB, 0x03A1, 0x1FED, 0x1FEE, 0x1FEF,
926 0x1FF0, 0x1FF1, 0x03A9, 0x03A9, 0x03A9, 0x1FF5, 0x03A9, 0x03A9, 0x039F, 0x1FF9, 0x03A9, 0x1FFB, 0x03A9, 0x1FFD, 0x1FFE, 0x1FFF
927 };
928
929 static const uint16_t plane21[] = {
930 0x2100, 0x2101, 0x2102, 0x2103, 0x2104, 0x2105, 0x2106, 0x2107, 0x2108, 0x2109, 0x210A, 0x210B, 0x210C, 0x210D, 0x210E, 0x210F,
931 0x2110, 0x2111, 0x2112, 0x2113, 0x2114, 0x2115, 0x2116, 0x2117, 0x2118, 0x2119, 0x211A, 0x211B, 0x211C, 0x211D, 0x211E, 0x211F,
932 0x2120, 0x2121, 0x2122, 0x2123, 0x2124, 0x2125, 0x2126, 0x2127, 0x2128, 0x2129, 0x212A, 0x212B, 0x212C, 0x212D, 0x212E, 0x212F,
933 0x2130, 0x2131, 0x2132, 0x2133, 0x2134, 0x2135, 0x2136, 0x2137, 0x2138, 0x2139, 0x213A, 0x213B, 0x213C, 0x213D, 0x213E, 0x213F,
934 0x2140, 0x2141, 0x2142, 0x2143, 0x2144, 0x2145, 0x2146, 0x2147, 0x2148, 0x2149, 0x214A, 0x214B, 0x214C, 0x214D, 0x214E, 0x214F,
935 0x2150, 0x2151, 0x2152, 0x2153, 0x2154, 0x2155, 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, 0x215E, 0x215F,
936 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
937 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A, 0x216B, 0x216C, 0x216D, 0x216E, 0x216F,
938 0x2180, 0x2181, 0x2182, 0x2183, 0x2184, 0x2185, 0x2186, 0x2187, 0x2188, 0x2189, 0x218A, 0x218B, 0x218C, 0x218D, 0x218E, 0x218F,
939 0x2190, 0x2191, 0x2192, 0x2193, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x219A, 0x219B, 0x219C, 0x219D, 0x219E, 0x219F,
940 0x21A0, 0x21A1, 0x21A2, 0x21A3, 0x21A4, 0x21A5, 0x21A6, 0x21A7, 0x21A8, 0x21A9, 0x21AA, 0x21AB, 0x21AC, 0x21AD, 0x21AE, 0x21AF,
941 0x21B0, 0x21B1, 0x21B2, 0x21B3, 0x21B4, 0x21B5, 0x21B6, 0x21B7, 0x21B8, 0x21B9, 0x21BA, 0x21BB, 0x21BC, 0x21BD, 0x21BE, 0x21BF,
942 0x21C0, 0x21C1, 0x21C2, 0x21C3, 0x21C4, 0x21C5, 0x21C6, 0x21C7, 0x21C8, 0x21C9, 0x21CA, 0x21CB, 0x21CC, 0x21CD, 0x21CE, 0x21CF,
943 0x21D0, 0x21D1, 0x21D2, 0x21D3, 0x21D4, 0x21D5, 0x21D6, 0x21D7, 0x21D8, 0x21D9, 0x21DA, 0x21DB, 0x21DC, 0x21DD, 0x21DE, 0x21DF,
944 0x21E0, 0x21E1, 0x21E2, 0x21E3, 0x21E4, 0x21E5, 0x21E6, 0x21E7, 0x21E8, 0x21E9, 0x21EA, 0x21EB, 0x21EC, 0x21ED, 0x21EE, 0x21EF,
945 0x21F0, 0x21F1, 0x21F2, 0x21F3, 0x21F4, 0x21F5, 0x21F6, 0x21F7, 0x21F8, 0x21F9, 0x21FA, 0x21FB, 0x21FC, 0x21FD, 0x21FE, 0x21FF
946 };
947
948 static const uint16_t plane24[] = {
949 0x2400, 0x2401, 0x2402, 0x2403, 0x2404, 0x2405, 0x2406, 0x2407, 0x2408, 0x2409, 0x240A, 0x240B, 0x240C, 0x240D, 0x240E, 0x240F,
950 0x2410, 0x2411, 0x2412, 0x2413, 0x2414, 0x2415, 0x2416, 0x2417, 0x2418, 0x2419, 0x241A, 0x241B, 0x241C, 0x241D, 0x241E, 0x241F,
951 0x2420, 0x2421, 0x2422, 0x2423, 0x2424, 0x2425, 0x2426, 0x2427, 0x2428, 0x2429, 0x242A, 0x242B, 0x242C, 0x242D, 0x242E, 0x242F,
952 0x2430, 0x2431, 0x2432, 0x2433, 0x2434, 0x2435, 0x2436, 0x2437, 0x2438, 0x2439, 0x243A, 0x243B, 0x243C, 0x243D, 0x243E, 0x243F,
953 0x2440, 0x2441, 0x2442, 0x2443, 0x2444, 0x2445, 0x2446, 0x2447, 0x2448, 0x2449, 0x244A, 0x244B, 0x244C, 0x244D, 0x244E, 0x244F,
954 0x2450, 0x2451, 0x2452, 0x2453, 0x2454, 0x2455, 0x2456, 0x2457, 0x2458, 0x2459, 0x245A, 0x245B, 0x245C, 0x245D, 0x245E, 0x245F,
955 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246A, 0x246B, 0x246C, 0x246D, 0x246E, 0x246F,
956 0x2470, 0x2471, 0x2472, 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F,
957 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 0x2487, 0x2488, 0x2489, 0x248A, 0x248B, 0x248C, 0x248D, 0x248E, 0x248F,
958 0x2490, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 0x2497, 0x2498, 0x2499, 0x249A, 0x249B, 0x249C, 0x249D, 0x249E, 0x249F,
959 0x24A0, 0x24A1, 0x24A2, 0x24A3, 0x24A4, 0x24A5, 0x24A6, 0x24A7, 0x24A8, 0x24A9, 0x24AA, 0x24AB, 0x24AC, 0x24AD, 0x24AE, 0x24AF,
960 0x24B0, 0x24B1, 0x24B2, 0x24B3, 0x24B4, 0x24B5, 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF,
961 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5, 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF,
962 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA, 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF, 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4, 0x24C5,
963 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE, 0x24CF, 0x24EA, 0x24EB, 0x24EC, 0x24ED, 0x24EE, 0x24EF,
964 0x24F0, 0x24F1, 0x24F2, 0x24F3, 0x24F4, 0x24F5, 0x24F6, 0x24F7, 0x24F8, 0x24F9, 0x24FA, 0x24FB, 0x24FC, 0x24FD, 0x24FE, 0x24FF
965 };
966
967 static const uint16_t planeFF[] = {
968 0xFF00, 0xFF01, 0xFF02, 0xFF03, 0xFF04, 0xFF05, 0xFF06, 0xFF07, 0xFF08, 0xFF09, 0xFF0A, 0xFF0B, 0xFF0C, 0xFF0D, 0xFF0E, 0xFF0F,
969 0xFF10, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 0xFF18, 0xFF19, 0xFF1A, 0xFF1B, 0xFF1C, 0xFF1D, 0xFF1E, 0xFF1F,
970 0xFF20, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
971 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF3B, 0xFF3C, 0xFF3D, 0xFF3E, 0xFF3F,
972 0xFF40, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29, 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F,
973 0xFF30, 0xFF31, 0xFF32, 0xFF33, 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A, 0xFF5B, 0xFF5C, 0xFF5D, 0xFF5E, 0xFF5F,
974 0xFF60, 0xFF61, 0xFF62, 0xFF63, 0xFF64, 0xFF65, 0xFF66, 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
975 0xFF70, 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF76, 0xFF77, 0xFF78, 0xFF79, 0xFF7A, 0xFF7B, 0xFF7C, 0xFF7D, 0xFF7E, 0xFF7F,
976 0xFF80, 0xFF81, 0xFF82, 0xFF83, 0xFF84, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8B, 0xFF8C, 0xFF8D, 0xFF8E, 0xFF8F,
977 0xFF90, 0xFF91, 0xFF92, 0xFF93, 0xFF94, 0xFF95, 0xFF96, 0xFF97, 0xFF98, 0xFF99, 0xFF9A, 0xFF9B, 0xFF9C, 0xFF9D, 0xFF9E, 0xFF9F,
978 0xFFA0, 0xFFA1, 0xFFA2, 0xFFA3, 0xFFA4, 0xFFA5, 0xFFA6, 0xFFA7, 0xFFA8, 0xFFA9, 0xFFAA, 0xFFAB, 0xFFAC, 0xFFAD, 0xFFAE, 0xFFAF,
979 0xFFB0, 0xFFB1, 0xFFB2, 0xFFB3, 0xFFB4, 0xFFB5, 0xFFB6, 0xFFB7, 0xFFB8, 0xFFB9, 0xFFBA, 0xFFBB, 0xFFBC, 0xFFBD, 0xFFBE, 0xFFBF,
980 0xFFC0, 0xFFC1, 0xFFC2, 0xFFC3, 0xFFC4, 0xFFC5, 0xFFC6, 0xFFC7, 0xFFC8, 0xFFC9, 0xFFCA, 0xFFCB, 0xFFCC, 0xFFCD, 0xFFCE, 0xFFCF,
981 0xFFD0, 0xFFD1, 0xFFD2, 0xFFD3, 0xFFD4, 0xFFD5, 0xFFD6, 0xFFD7, 0xFFD8, 0xFFD9, 0xFFDA, 0xFFDB, 0xFFDC, 0xFFDD, 0xFFDE, 0xFFDF,
982 0xFFE0, 0xFFE1, 0xFFE2, 0xFFE3, 0xFFE4, 0xFFE5, 0xFFE6, 0xFFE7, 0xFFE8, 0xFFE9, 0xFFEA, 0xFFEB, 0xFFEC, 0xFFED, 0xFFEE, 0xFFEF,
983 0xFFF0, 0xFFF1, 0xFFF2, 0xFFF3, 0xFFF4, 0xFFF5, 0xFFF6, 0xFFF7, 0xFFF8, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF
984 };
985
986 static const uint16_t* const planemap[256] = {
987 plane00, plane01, plane02, plane03, plane04, plane05, NULL, NULL, NULL, NULL, NULL,
988 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
989 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, plane1E, plane1F, NULL,
990 plane21, NULL, NULL, plane24, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
991 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
992 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
993 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
994 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
995 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
996 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
997 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
998 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
999 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1000 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1001 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1002 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1003 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1004 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1005 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1006 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1007 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1008 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1009 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1010 NULL, NULL, planeFF
1011 };
1012 // clang-format on
1013
GetCollationWeight(const wchar_t & r)1014 static wchar_t GetCollationWeight(const wchar_t& r)
1015 {
1016 // Lookup the "weight" of a UTF8 char, equivalent lowercase ascii letter, in the plane map,
1017 // the character comparison value used by using "accent folding" collation utf8_general_ci
1018 // in MySQL (AKA utf8mb3_general_ci in MariaDB 10)
1019 auto index = r >> 8;
1020 if (index > 255)
1021 return 0xFFFD;
1022 auto plane = planemap[index];
1023 if (plane == nullptr)
1024 return r;
1025 return static_cast<wchar_t>(plane[r & 0xFF]);
1026 }
1027
1028 // Compares separately the numeric and alphabetic parts of a wide string.
1029 // returns negative if left < right, positive if left > right
1030 // and 0 if they are identical.
1031 // See also the equivalent StringUtils::AlphaNumericCollation() for UFT8 data
AlphaNumericCompare(const wchar_t * left,const wchar_t * right)1032 int64_t StringUtils::AlphaNumericCompare(const wchar_t* left, const wchar_t* right)
1033 {
1034 const wchar_t *l = left;
1035 const wchar_t *r = right;
1036 const wchar_t *ld, *rd;
1037 wchar_t lc, rc;
1038 int64_t lnum, rnum;
1039 bool lsym, rsym;
1040 while (*l != 0 && *r != 0)
1041 {
1042 // check if we have a numerical value
1043 if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
1044 {
1045 ld = l;
1046 lnum = *ld++ - L'0';
1047 while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
1048 { // compare only up to 15 digits
1049 lnum *= 10;
1050 lnum += *ld++ - L'0';
1051 }
1052 rd = r;
1053 rnum = *rd++ - L'0';
1054 while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
1055 { // compare only up to 15 digits
1056 rnum *= 10;
1057 rnum += *rd++ - L'0';
1058 }
1059 // do we have numbers?
1060 if (lnum != rnum)
1061 { // yes - and they're different!
1062 return lnum - rnum;
1063 }
1064 l = ld;
1065 r = rd;
1066 continue;
1067 }
1068
1069 lc = *l;
1070 rc = *r;
1071 // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ above the other
1072 // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
1073 // above all other unicode letters, symbols and punctuation.
1074 // (Locale collation of these chars varies across platforms)
1075 lsym = (lc >= 32 && lc < L'0') || (lc > L'9' && lc < L'A') ||
1076 (lc > L'Z' && lc < L'a') || (lc > L'z' && lc < 128);
1077 rsym = (rc >= 32 && rc < L'0') || (rc > L'9' && rc < L'A') ||
1078 (rc > L'Z' && rc < L'a') || (rc > L'z' && rc < 128);
1079 if (lsym && !rsym)
1080 return -1;
1081 if (!lsym && rsym)
1082 return 1;
1083 if (lsym && rsym)
1084 {
1085 if (lc != rc)
1086 return lc - rc;
1087 else
1088 { // Same symbol advance to next wchar
1089 l++;
1090 r++;
1091 continue;
1092 }
1093 }
1094 if (!g_langInfo.UseLocaleCollation())
1095 {
1096 // Apply case sensitive accent folding collation to non-ascii chars.
1097 // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
1098 // for any platformthat doesn't have a language specific collate facet implemented
1099 if (lc > 128)
1100 lc = GetCollationWeight(lc);
1101 if (rc > 128)
1102 rc = GetCollationWeight(rc);
1103 }
1104 // Do case less comparison, convert ascii upper case to lower case
1105 if (lc >= L'A' && lc <= L'Z')
1106 lc += L'a' - L'A';
1107 if (rc >= L'A' && rc <= L'Z')
1108 rc += L'a' - L'A';
1109
1110 if (lc != rc)
1111 {
1112 if (!g_langInfo.UseLocaleCollation())
1113 {
1114 // Compare unicode (having applied accent folding collation to non-ascii chars).
1115 int i = wcsncmp(&lc, &rc, 1);
1116 return i;
1117 }
1118 else
1119 {
1120 // Fetch collation facet from locale to do comparison of wide char although on some
1121 // platforms this is not langauge specific but just compares unicode
1122 const std::collate<wchar_t>& coll =
1123 std::use_facet<std::collate<wchar_t>>(g_langInfo.GetSystemLocale());
1124 int cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1);
1125 if (cmp_res != 0)
1126 return cmp_res;
1127 }
1128 }
1129 l++; r++;
1130 }
1131 if (*r)
1132 { // r is longer
1133 return -1;
1134 }
1135 else if (*l)
1136 { // l is longer
1137 return 1;
1138 }
1139 return 0; // files are the same
1140 }
1141
1142 /*
1143 Convert the UTF8 character to which z points into a 31-bit Unicode point.
1144 Return how many bytes (0 to 3) of UTF8 data encode the character.
1145 This only works right if z points to a well-formed UTF8 string.
1146 Byte-0 Byte-1 Byte-2 Byte-3 Value
1147 0xxxxxxx 00000000 00000000 0xxxxxxx
1148 110yyyyy 10xxxxxx 00000000 00000yyy yyxxxxxx
1149 1110zzzz 10yyyyyy 10xxxxxx 00000000 zzzzyyyy yyxxxxxx
1150 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 000uuuuu zzzzyyyy yyxxxxxx
1151 */
UTF8ToUnicode(const unsigned char * z,int nKey,unsigned char & bytes)1152 static uint32_t UTF8ToUnicode(const unsigned char* z, int nKey, unsigned char& bytes)
1153 {
1154 // Lookup table used decode the first byte of a multi-byte UTF8 character
1155 // clang-format off
1156 static const unsigned char utf8Trans1[] = {
1157 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1158 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1159 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1160 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1161 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1162 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1163 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1164 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1165 };
1166 // clang-format on
1167
1168 uint32_t c;
1169 bytes = 0;
1170 c = z[0];
1171 if (c >= 0xc0)
1172 {
1173 c = utf8Trans1[c - 0xc0];
1174 int index = 1;
1175 while (index < nKey && (z[index] & 0xc0) == 0x80)
1176 {
1177 c = (c << 6) + (0x3f & z[index]);
1178 index++;
1179 }
1180 if (c < 0x80 || (c & 0xFFFFF800) == 0xD800 || (c & 0xFFFFFFFE) == 0xFFFE)
1181 c = 0xFFFD;
1182 bytes = static_cast<unsigned char>(index - 1);
1183 }
1184 return c;
1185 }
1186
1187 /*
1188 SQLite collating function, see sqlite3_create_collation
1189 The equivalent of AlphaNumericCompare() but for comparing UTF8 encoded data
1190
1191 This only processes enough data to find a difference, and avoids expensive data conversions.
1192 When sorting in memory item data is converted once to wstring in advance prior to sorting, the
1193 SQLite callback function can not do that kind of preparation. Instead, in order to use
1194 AlphaNumericCompare(), it would have to repeatedly convert the full input data to wstring for
1195 every pair comparison made. That approach was found to be 10 times slower than using this
1196 separate routine.
1197 */
AlphaNumericCollation(int nKey1,const void * pKey1,int nKey2,const void * pKey2)1198 int StringUtils::AlphaNumericCollation(int nKey1, const void* pKey1, int nKey2, const void* pKey2)
1199 {
1200 // Get exact matches of shorter text to start of larger test fast
1201 int n = std::min(nKey1, nKey2);
1202 int r = memcmp(pKey1, pKey2, n);
1203 if (r == 0)
1204 return nKey1 - nKey2;
1205
1206 //Not a binary match, so process character at a time
1207 const unsigned char* zA = static_cast<const unsigned char*>(pKey1);
1208 const unsigned char* zB = static_cast<const unsigned char*>(pKey2);
1209 wchar_t lc, rc;
1210 unsigned char bytes;
1211 int64_t lnum, rnum;
1212 bool lsym, rsym;
1213 int ld, rd;
1214 int i = 0;
1215 int j = 0;
1216 // Looping Unicode point at a time through potentially 1 to 4 multi-byte encoded UTF8 data
1217 while (i < nKey1 && j < nKey2)
1218 {
1219 // Check if we have numerical values, compare only up to 15 digits
1220 if (isdigit(zA[i]) && isdigit(zB[j]))
1221 {
1222 lnum = zA[i] - '0';
1223 ld = i + 1;
1224 while (ld < nKey1 && isdigit(zA[ld]) && ld < i + 15)
1225 {
1226 lnum *= 10;
1227 lnum += zA[ld] - '0';
1228 ld++;
1229 }
1230 rnum = zB[j] - '0';
1231 rd = j + 1;
1232 while (rd < nKey2 && isdigit(zB[rd]) && rd < j + 15)
1233 {
1234 rnum *= 10;
1235 rnum += zB[rd] - '0';
1236 rd++;
1237 }
1238 // do we have numbers?
1239 if (lnum != rnum)
1240 { // yes - and they're different!
1241 return static_cast<int>(lnum - rnum);
1242 }
1243 // Advance to after digits
1244 i = ld;
1245 j = rd;
1246 continue;
1247 }
1248 // Put ascii punctuation and symbols e.g. !#$&()*+,-./:;<=>?@[\]^_ `{|}~ before the other
1249 // alphanumeric ascii, rather than some being mixed between the numbers and letters, and
1250 // above all other unicode letters, symbols and punctuation.
1251 // (Locale collation of these chars varies across platforms)
1252 lsym = (zA[i] >= 32 && zA[i] < '0') || (zA[i] > '9' && zA[i] < 'A') ||
1253 (zA[i] > 'Z' && zA[i] < 'a') || (zA[i] > 'z' && zA[i] < 128);
1254 rsym = (zB[j] >= 32 && zB[j] < '0') || (zB[j] > '9' && zB[j] < 'A') ||
1255 (zB[j] > 'Z' && zB[j] < 'a') || (zB[j] > 'z' && zB[j] < 128);
1256 if (lsym && !rsym)
1257 return -1;
1258 if (!lsym && rsym)
1259 return 1;
1260 if (lsym && rsym)
1261 {
1262 if (zA[i] != zB[j])
1263 return zA[i] - zB[j];
1264 else
1265 { // Same symbol advance to next
1266 i++;
1267 j++;
1268 continue;
1269 }
1270 }
1271 //Decode single (1 to 4 bytes) UTF8 character to Unicode
1272 lc = UTF8ToUnicode(&zA[i], nKey1 - i, bytes);
1273 i += bytes;
1274 rc = UTF8ToUnicode(&zB[j], nKey2 - j, bytes);
1275 j += bytes;
1276 if (!g_langInfo.UseLocaleCollation())
1277 {
1278 // Apply case sensitive accent folding collation to non-ascii chars.
1279 // This mimics utf8_general_ci collation, and provides simple collation of LATIN-1 chars
1280 // for any platform that doesn't have a language specific collate facet implemented
1281 if (lc > 128)
1282 lc = GetCollationWeight(lc);
1283 if (rc > 128)
1284 rc = GetCollationWeight(rc);
1285 }
1286 // Caseless comparison so convert ascii upper case to lower case
1287 if (lc >= 'A' && lc <= 'Z')
1288 lc += 'a' - 'A';
1289 if (rc >= 'A' && rc <= 'Z')
1290 rc += 'a' - 'A';
1291
1292 if (lc != rc)
1293 {
1294 if (!g_langInfo.UseLocaleCollation() || (lc <= 128 && rc <= 128))
1295 // Compare unicode (having applied accent folding collation to non-ascii chars).
1296 return lc - rc;
1297 else
1298 {
1299 // Fetch collation facet from locale to do comparison of wide char although on some
1300 // platforms this is not langauge specific but just compares unicode
1301 const std::collate<wchar_t>& coll =
1302 std::use_facet<std::collate<wchar_t>>(g_langInfo.GetSystemLocale());
1303 int cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1);
1304 if (cmp_res != 0)
1305 return cmp_res;
1306 }
1307 }
1308 i++;
1309 j++;
1310 }
1311 // Compared characters of shortest are the same as longest, length determines order
1312 return (nKey1 - nKey2);
1313 }
1314
DateStringToYYYYMMDD(const std::string & dateString)1315 int StringUtils::DateStringToYYYYMMDD(const std::string &dateString)
1316 {
1317 std::vector<std::string> days = StringUtils::Split(dateString, '-');
1318 if (days.size() == 1)
1319 return atoi(days[0].c_str());
1320 else if (days.size() == 2)
1321 return atoi(days[0].c_str())*100+atoi(days[1].c_str());
1322 else if (days.size() == 3)
1323 return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
1324 else
1325 return -1;
1326 }
1327
ISODateToLocalizedDate(const std::string & strIsoDate)1328 std::string StringUtils::ISODateToLocalizedDate(const std::string& strIsoDate)
1329 {
1330 // Convert ISO8601 date strings YYYY, YYYY-MM, or YYYY-MM-DD to (partial) localized date strings
1331 CDateTime date;
1332 std::string formattedDate = strIsoDate;
1333 if (formattedDate.size() == 10)
1334 {
1335 date.SetFromDBDate(strIsoDate);
1336 formattedDate = date.GetAsLocalizedDate();
1337 }
1338 else if (formattedDate.size() == 7)
1339 {
1340 std::string strFormat = date.GetAsLocalizedDate(false);
1341 std::string tempdate;
1342 // find which date separator we are using. Can be -./
1343 size_t pos = strFormat.find_first_of("-./");
1344 if (pos != std::string::npos)
1345 {
1346 bool yearFirst = strFormat.find("1601") == 0; // true if year comes first
1347 std::string sep = strFormat.substr(pos, 1);
1348 if (yearFirst)
1349 { // build formatted date with year first, then separator and month
1350 tempdate = formattedDate.substr(0, 4);
1351 tempdate += sep;
1352 tempdate += formattedDate.substr(5, 2);
1353 }
1354 else
1355 {
1356 tempdate = formattedDate.substr(5, 2);
1357 tempdate += sep;
1358 tempdate += formattedDate.substr(0, 4);
1359 }
1360 formattedDate = tempdate;
1361 }
1362 // return either just the year or the locally formatted version of the ISO date
1363 }
1364 return formattedDate;
1365 }
1366
TimeStringToSeconds(const std::string & timeString)1367 long StringUtils::TimeStringToSeconds(const std::string &timeString)
1368 {
1369 std::string strCopy(timeString);
1370 StringUtils::Trim(strCopy);
1371 if(StringUtils::EndsWithNoCase(strCopy, " min"))
1372 {
1373 // this is imdb format of "XXX min"
1374 return 60 * atoi(strCopy.c_str());
1375 }
1376 else
1377 {
1378 std::vector<std::string> secs = StringUtils::Split(strCopy, ':');
1379 int timeInSecs = 0;
1380 for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
1381 {
1382 timeInSecs *= 60;
1383 timeInSecs += atoi(secs[i].c_str());
1384 }
1385 return timeInSecs;
1386 }
1387 }
1388
SecondsToTimeString(long lSeconds,TIME_FORMAT format)1389 std::string StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
1390 {
1391 bool isNegative = lSeconds < 0;
1392 lSeconds = std::abs(lSeconds);
1393
1394 std::string strHMS;
1395 if (format == TIME_FORMAT_SECS)
1396 strHMS = StringUtils::Format("%i", lSeconds);
1397 else if (format == TIME_FORMAT_MINS)
1398 strHMS = StringUtils::Format("%i", lrintf(static_cast<float>(lSeconds) / 60.0f));
1399 else if (format == TIME_FORMAT_HOURS)
1400 strHMS = StringUtils::Format("%i", lrintf(static_cast<float>(lSeconds) / 3600.0f));
1401 else if (format & TIME_FORMAT_M)
1402 strHMS += StringUtils::Format("%i", lSeconds % 3600 / 60);
1403 else
1404 {
1405 int hh = lSeconds / 3600;
1406 lSeconds = lSeconds % 3600;
1407 int mm = lSeconds / 60;
1408 int ss = lSeconds % 60;
1409
1410 if (format == TIME_FORMAT_GUESS)
1411 format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
1412 if (format & TIME_FORMAT_HH)
1413 strHMS += StringUtils::Format("%2.2i", hh);
1414 else if (format & TIME_FORMAT_H)
1415 strHMS += StringUtils::Format("%i", hh);
1416 if (format & TIME_FORMAT_MM)
1417 strHMS += StringUtils::Format(strHMS.empty() ? "%2.2i" : ":%2.2i", mm);
1418 if (format & TIME_FORMAT_SS)
1419 strHMS += StringUtils::Format(strHMS.empty() ? "%2.2i" : ":%2.2i", ss);
1420 }
1421
1422 if (isNegative)
1423 strHMS = "-" + strHMS;
1424
1425 return strHMS;
1426 }
1427
IsNaturalNumber(const std::string & str)1428 bool StringUtils::IsNaturalNumber(const std::string& str)
1429 {
1430 size_t i = 0, n = 0;
1431 // allow whitespace,digits,whitespace
1432 while (i < str.size() && isspace((unsigned char) str[i]))
1433 i++;
1434 while (i < str.size() && isdigit((unsigned char) str[i]))
1435 {
1436 i++; n++;
1437 }
1438 while (i < str.size() && isspace((unsigned char) str[i]))
1439 i++;
1440 return i == str.size() && n > 0;
1441 }
1442
IsInteger(const std::string & str)1443 bool StringUtils::IsInteger(const std::string& str)
1444 {
1445 size_t i = 0, n = 0;
1446 // allow whitespace,-,digits,whitespace
1447 while (i < str.size() && isspace((unsigned char) str[i]))
1448 i++;
1449 if (i < str.size() && str[i] == '-')
1450 i++;
1451 while (i < str.size() && isdigit((unsigned char) str[i]))
1452 {
1453 i++; n++;
1454 }
1455 while (i < str.size() && isspace((unsigned char) str[i]))
1456 i++;
1457 return i == str.size() && n > 0;
1458 }
1459
asciidigitvalue(char chr)1460 int StringUtils::asciidigitvalue(char chr)
1461 {
1462 if (!isasciidigit(chr))
1463 return -1;
1464
1465 return chr - '0';
1466 }
1467
asciixdigitvalue(char chr)1468 int StringUtils::asciixdigitvalue(char chr)
1469 {
1470 int v = asciidigitvalue(chr);
1471 if (v >= 0)
1472 return v;
1473 if (chr >= 'a' && chr <= 'f')
1474 return chr - 'a' + 10;
1475 if (chr >= 'A' && chr <= 'F')
1476 return chr - 'A' + 10;
1477
1478 return -1;
1479 }
1480
1481
RemoveCRLF(std::string & strLine)1482 void StringUtils::RemoveCRLF(std::string& strLine)
1483 {
1484 StringUtils::TrimRight(strLine, "\n\r");
1485 }
1486
SizeToString(int64_t size)1487 std::string StringUtils::SizeToString(int64_t size)
1488 {
1489 std::string strLabel;
1490 const char prefixes[] = {' ', 'k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
1491 unsigned int i = 0;
1492 double s = (double)size;
1493 while (i < ARRAY_SIZE(prefixes) && s >= 1000.0)
1494 {
1495 s /= 1024.0;
1496 i++;
1497 }
1498
1499 if (!i)
1500 strLabel = StringUtils::Format("%.lf B", s);
1501 else if (i == ARRAY_SIZE(prefixes))
1502 {
1503 if (s >= 1000.0)
1504 strLabel = StringUtils::Format(">999.99 %cB", prefixes[i - 1]);
1505 else
1506 strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i - 1]);
1507 }
1508 else if (s >= 100.0)
1509 strLabel = StringUtils::Format("%.1lf %cB", s, prefixes[i]);
1510 else
1511 strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i]);
1512
1513 return strLabel;
1514 }
1515
BinaryStringToString(const std::string & in)1516 std::string StringUtils::BinaryStringToString(const std::string& in)
1517 {
1518 std::string out;
1519 out.reserve(in.size() / 2);
1520 for (const char *cur = in.c_str(), *end = cur + in.size(); cur != end; ++cur) {
1521 if (*cur == '\\') {
1522 ++cur;
1523 if (cur == end) {
1524 break;
1525 }
1526 if (isdigit(*cur)) {
1527 char* end;
1528 unsigned long num = strtol(cur, &end, 10);
1529 cur = end - 1;
1530 out.push_back(num);
1531 continue;
1532 }
1533 }
1534 out.push_back(*cur);
1535 }
1536 return out;
1537 }
1538
ToHexadecimal(const std::string & in)1539 std::string StringUtils::ToHexadecimal(const std::string& in)
1540 {
1541 std::ostringstream ss;
1542 ss << std::hex;
1543 for (unsigned char ch : in) {
1544 ss << std::setw(2) << std::setfill('0') << static_cast<unsigned long> (ch);
1545 }
1546 return ss.str();
1547 }
1548
1549 // return -1 if not, else return the utf8 char length.
IsUTF8Letter(const unsigned char * str)1550 int IsUTF8Letter(const unsigned char *str)
1551 {
1552 // reference:
1553 // unicode -> utf8 table: http://www.utf8-chartable.de/
1554 // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
1555 unsigned char ch = str[0];
1556 if (!ch)
1557 return -1;
1558 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
1559 return 1;
1560 if (!(ch & 0x80))
1561 return -1;
1562 unsigned char ch2 = str[1];
1563 if (!ch2)
1564 return -1;
1565 // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
1566 if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
1567 return 2;
1568 // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
1569 if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
1570 return 2;
1571 // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
1572 // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
1573 if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
1574 || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
1575 return 2;
1576 return -1;
1577 }
1578
FindWords(const char * str,const char * wordLowerCase)1579 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
1580 {
1581 // NOTE: This assumes word is lowercase!
1582 const unsigned char *s = (const unsigned char *)str;
1583 do
1584 {
1585 // start with a compare
1586 const unsigned char *c = s;
1587 const unsigned char *w = (const unsigned char *)wordLowerCase;
1588 bool same = true;
1589 while (same && *c && *w)
1590 {
1591 unsigned char lc = *c++;
1592 if (lc >= 'A' && lc <= 'Z')
1593 lc += 'a'-'A';
1594
1595 if (lc != *w++) // different
1596 same = false;
1597 }
1598 if (same && *w == 0) // only the same if word has been exhausted
1599 return (const char *)s - str;
1600
1601 // otherwise, skip current word (composed by latin letters) or number
1602 int l;
1603 if (*s >= '0' && *s <= '9')
1604 {
1605 ++s;
1606 while (*s >= '0' && *s <= '9') ++s;
1607 }
1608 else if ((l = IsUTF8Letter(s)) > 0)
1609 {
1610 s += l;
1611 while ((l = IsUTF8Letter(s)) > 0) s += l;
1612 }
1613 else
1614 ++s;
1615 while (*s && *s == ' ') s++;
1616
1617 // and repeat until we're done
1618 } while (*s);
1619
1620 return std::string::npos;
1621 }
1622
1623 // assumes it is called from after the first open bracket is found
FindEndBracket(const std::string & str,char opener,char closer,int startPos)1624 int StringUtils::FindEndBracket(const std::string &str, char opener, char closer, int startPos)
1625 {
1626 int blocks = 1;
1627 for (unsigned int i = startPos; i < str.size(); i++)
1628 {
1629 if (str[i] == opener)
1630 blocks++;
1631 else if (str[i] == closer)
1632 {
1633 blocks--;
1634 if (!blocks)
1635 return i;
1636 }
1637 }
1638
1639 return (int)std::string::npos;
1640 }
1641
WordToDigits(std::string & word)1642 void StringUtils::WordToDigits(std::string &word)
1643 {
1644 static const char word_to_letter[] = "22233344455566677778889999";
1645 StringUtils::ToLower(word);
1646 for (unsigned int i = 0; i < word.size(); ++i)
1647 { // NB: This assumes ascii, which probably needs extending at some point.
1648 char letter = word[i];
1649 if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
1650 {
1651 word[i] = word_to_letter[letter-'a'];
1652 }
1653 else if (letter < '0' || letter > '9') // We want to keep 0-9!
1654 {
1655 word[i] = ' '; // replace everything else with a space
1656 }
1657 }
1658 }
1659
CreateUUID()1660 std::string StringUtils::CreateUUID()
1661 {
1662 #ifdef HAVE_NEW_CROSSGUID
1663 return xg::newGuid().str();
1664 #else
1665 static GuidGenerator guidGenerator;
1666 auto guid = guidGenerator.newGuid();
1667
1668 std::stringstream strGuid; strGuid << guid;
1669 return strGuid.str();
1670 #endif
1671 }
1672
ValidateUUID(const std::string & uuid)1673 bool StringUtils::ValidateUUID(const std::string &uuid)
1674 {
1675 CRegExp guidRE;
1676 guidRE.RegComp(ADDON_GUID_RE);
1677 return (guidRE.RegFind(uuid.c_str()) == 0);
1678 }
1679
CompareFuzzy(const std::string & left,const std::string & right)1680 double StringUtils::CompareFuzzy(const std::string &left, const std::string &right)
1681 {
1682 return (0.5 + fstrcmp(left.c_str(), right.c_str()) * (left.length() + right.length())) / 2.0;
1683 }
1684
FindBestMatch(const std::string & str,const std::vector<std::string> & strings,double & matchscore)1685 int StringUtils::FindBestMatch(const std::string &str, const std::vector<std::string> &strings, double &matchscore)
1686 {
1687 int best = -1;
1688 matchscore = 0;
1689
1690 int i = 0;
1691 for (std::vector<std::string>::const_iterator it = strings.begin(); it != strings.end(); ++it, i++)
1692 {
1693 int maxlength = std::max(str.length(), it->length());
1694 double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
1695 if (score > matchscore)
1696 {
1697 matchscore = score;
1698 best = i;
1699 }
1700 }
1701 return best;
1702 }
1703
ContainsKeyword(const std::string & str,const std::vector<std::string> & keywords)1704 bool StringUtils::ContainsKeyword(const std::string &str, const std::vector<std::string> &keywords)
1705 {
1706 for (std::vector<std::string>::const_iterator it = keywords.begin(); it != keywords.end(); ++it)
1707 {
1708 if (str.find(*it) != str.npos)
1709 return true;
1710 }
1711 return false;
1712 }
1713
utf8_strlen(const char * s)1714 size_t StringUtils::utf8_strlen(const char *s)
1715 {
1716 size_t length = 0;
1717 while (*s)
1718 {
1719 if ((*s++ & 0xC0) != 0x80)
1720 length++;
1721 }
1722 return length;
1723 }
1724
Paramify(const std::string & param)1725 std::string StringUtils::Paramify(const std::string ¶m)
1726 {
1727 std::string result = param;
1728 // escape backspaces
1729 StringUtils::Replace(result, "\\", "\\\\");
1730 // escape double quotes
1731 StringUtils::Replace(result, "\"", "\\\"");
1732
1733 // add double quotes around the whole string
1734 return "\"" + result + "\"";
1735 }
1736
Tokenize(const std::string & input,const std::string & delimiters)1737 std::vector<std::string> StringUtils::Tokenize(const std::string &input, const std::string &delimiters)
1738 {
1739 std::vector<std::string> tokens;
1740 Tokenize(input, tokens, delimiters);
1741 return tokens;
1742 }
1743
Tokenize(const std::string & input,std::vector<std::string> & tokens,const std::string & delimiters)1744 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
1745 {
1746 tokens.clear();
1747 // Skip delimiters at beginning.
1748 std::string::size_type dataPos = input.find_first_not_of(delimiters);
1749 while (dataPos != std::string::npos)
1750 {
1751 // Find next delimiter
1752 const std::string::size_type nextDelimPos = input.find_first_of(delimiters, dataPos);
1753 // Found a token, add it to the vector.
1754 tokens.push_back(input.substr(dataPos, nextDelimPos - dataPos));
1755 // Skip delimiters. Note the "not_of"
1756 dataPos = input.find_first_not_of(delimiters, nextDelimPos);
1757 }
1758 }
1759
Tokenize(const std::string & input,const char delimiter)1760 std::vector<std::string> StringUtils::Tokenize(const std::string &input, const char delimiter)
1761 {
1762 std::vector<std::string> tokens;
1763 Tokenize(input, tokens, delimiter);
1764 return tokens;
1765 }
1766
Tokenize(const std::string & input,std::vector<std::string> & tokens,const char delimiter)1767 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const char delimiter)
1768 {
1769 tokens.clear();
1770 // Skip delimiters at beginning.
1771 std::string::size_type dataPos = input.find_first_not_of(delimiter);
1772 while (dataPos != std::string::npos)
1773 {
1774 // Find next delimiter
1775 const std::string::size_type nextDelimPos = input.find(delimiter, dataPos);
1776 // Found a token, add it to the vector.
1777 tokens.push_back(input.substr(dataPos, nextDelimPos - dataPos));
1778 // Skip delimiters. Note the "not_of"
1779 dataPos = input.find_first_not_of(delimiter, nextDelimPos);
1780 }
1781 }
1782
ToUint64(const std::string & str,uint64_t fallback)1783 uint64_t StringUtils::ToUint64(const std::string& str, uint64_t fallback) noexcept
1784 {
1785 std::istringstream iss(str);
1786 uint64_t result(fallback);
1787 iss >> result;
1788 return result;
1789 }
1790
FormatFileSize(uint64_t bytes)1791 std::string StringUtils::FormatFileSize(uint64_t bytes)
1792 {
1793 const std::array<std::string, 6> units{{"B", "kB", "MB", "GB", "TB", "PB"}};
1794 if (bytes < 1000)
1795 return Format("%" PRIu64 "B", bytes);
1796
1797 size_t i = 0;
1798 double value = static_cast<double>(bytes);
1799 while (i + 1 < units.size() && value >= 999.5)
1800 {
1801 ++i;
1802 value /= 1024.0;
1803 }
1804 unsigned int decimals = value < 9.995 ? 2 : (value < 99.95 ? 1 : 0);
1805 auto frmt = "%." + Format("%u", decimals) + "f%s";
1806 return Format(frmt.c_str(), value, units[i].c_str());
1807 }
1808
GetOriginalLocale()1809 const std::locale& StringUtils::GetOriginalLocale() noexcept
1810 {
1811 return g_langInfo.GetOriginalLocale();
1812 }
1813