1 /*
2 * IceWM - C++ wrapper for locale/unicode conversion
3 * Copyright (C) 2001 The Authors of IceWM
4 *
5 * Released under terms of the GNU Library General Public License
6 *
7 * 2001/07/21: Mathias Hasselmann <mathias.hasselmann@gmx.net>
8 * - initial revision
9 */
10
11 #include "config.h"
12 #include "ylocale.h"
13 #include "ascii.h"
14 #include "base.h"
15 #include "intl.h"
16 #include <string.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <wchar.h>
20
21 #ifdef CONFIG_I18N
22 #include <errno.h>
23 #include <langinfo.h>
24 #include <locale.h>
25 #include <assert.h>
26 #include <X11/Xlib.h>
27 #include <iconv.h>
28
29 const iconv_t invalid = iconv_t(-1);
30
31 class YConverter {
32 public:
33 YConverter(const char* localeName);
34 ~YConverter();
35
unicode() const36 iconv_t unicode() const { return toUnicode; }
localer() const37 iconv_t localer() const { return toLocale; }
localeName() const38 const char* localeName() const { return fLocaleName; }
codesetName() const39 const char* codesetName() const { return fCodeset; }
modifiers() const40 const char* modifiers() const { return fModifiers; }
41
42 private:
43 void getConverters();
44 iconv_t getConverter(const char* from, const char**& to);
45 const char* getCodeset();
46
47 iconv_t toUnicode;
48 iconv_t toLocale;
49 const char* fLocaleName;
50 const char* fModifiers;
51 const char* fCodeset;
52 };
53
YConverter(const char * localeName)54 YConverter::YConverter(const char* localeName) :
55 toUnicode(invalid),
56 toLocale(invalid),
57 fLocaleName(setlocale(LC_ALL, localeName))
58 {
59 if ( !fLocaleName || !XSupportsLocale()) {
60 warn(_("Locale not supported by C library or Xlib. "
61 "Falling back to 'C' locale'."));
62 fLocaleName = setlocale(LC_ALL, "C");
63 }
64 fModifiers = XSetLocaleModifiers("");
65 fCodeset = getCodeset();
66
67 MSG(("locale: %s, MB_CUR_MAX: %zd, codeset: %s, endian: %c",
68 fLocaleName, MB_CUR_MAX, fCodeset, little() ? 'l' : 'b'));
69
70 getConverters();
71 }
72
getCodeset()73 const char* YConverter::getCodeset() {
74 const char* codeset = nullptr;
75 int const codesetItems[] = {
76 #ifdef CONFIG_NL_CODESETS
77 CONFIG_NL_CODESETS
78 #else
79 CODESET,
80 #ifdef _NL_CTYPE_CODESET_NAME
81 _NL_CTYPE_CODESET_NAME,
82 #endif
83 0
84 #endif
85 };
86
87 for (int i = 0; i + 1 < int ACOUNT(codesetItems); ++i) {
88 codeset = nl_langinfo(codesetItems[i]);
89 if (nonempty(codeset)) {
90 break;
91 }
92 }
93
94 if (isEmpty(codeset)) {
95 warn(_("Failed to determinate the current locale's codeset. "
96 "Assuming ISO-8859-1.\n"));
97 codeset = "ISO-8859-1";
98 }
99 return codeset;
100 }
101
getConverters()102 void YConverter::getConverters() {
103
104 // #warning "this is getting way too complicated"
105
106 const char* unicodeCharsets[] = {
107 #ifdef CONFIG_UNICODE_SET
108 CONFIG_UNICODE_SET,
109 #endif
110 // "WCHAR_T//TRANSLIT",
111 (little() ? "UCS-4LE//TRANSLIT" : "UCS-4BE//TRANSLIT"),
112 // "WCHAR_T",
113 (little() ? "UCS-4LE" : "UCS-4BE"),
114 "UCS-4//TRANSLIT",
115 "UCS-4",
116 nullptr
117 };
118
119 const char* localeCharsets[] = {
120 cstrJoin(fCodeset, "//TRANSLIT", nullptr),
121 fCodeset,
122 nullptr
123 };
124
125 const char** ucs(unicodeCharsets);
126 toUnicode = getConverter(localeCharsets[1], ucs);
127 if (toUnicode == invalid)
128 die(1, _("iconv doesn't supply (sufficient) "
129 "%s to %s converters."), localeCharsets[1], "Unicode");
130
131 MSG(("toUnicode converts from %s to %s", localeCharsets[1], *ucs));
132
133 const char** lcs(localeCharsets);
134 toLocale = getConverter(*ucs, lcs);
135 if (toLocale == invalid)
136 die(1, _("iconv doesn't supply (sufficient) "
137 "%s to %s converters."), "Unicode", localeCharsets[1]);
138
139 MSG(("toLocale converts from %s to %s", *ucs, *lcs));
140
141 delete[] localeCharsets[0];
142 }
143
~YConverter()144 YConverter::~YConverter() {
145 iconv_close(toUnicode);
146 iconv_close(toLocale);
147 }
148
getConverter(const char * from,const char ** & to)149 iconv_t YConverter::getConverter(const char* from, const char**& to) {
150 iconv_t ic;
151 do {
152 ic = iconv_open(*to, from);
153 } while (ic == invalid && *++to);
154 return ic;
155 }
156
157 #endif
158
159 YLocale* YLocale::instance;
160
YLocale(const char * localeName)161 YLocale::YLocale(const char* localeName)
162 : converter(nullptr)
163 , rightToLeft(false)
164 , codesetUTF8(false)
165 {
166 if (instance == nullptr) {
167 instance = this;
168 #ifdef CONFIG_I18N
169 converter = new YConverter(localeName);
170 codesetUTF8 = (0 == strncmp(converter->codesetName(), "UTF-8", 5));
171 #endif
172 bindtextdomain(PACKAGE, LOCDIR);
173 textdomain(PACKAGE);
174 getDirection();
175 }
176 }
177
~YLocale()178 YLocale::~YLocale() {
179 if (instance == this) {
180 instance = nullptr;
181 #ifdef CONFIG_I18N
182 delete converter;
183 #endif
184 }
185 }
186
187 #ifdef CONFIG_I18N
localeString(const wchar_t * uStr,size_t uLen,size_t & lLen)188 char* YLocale::localeString(const wchar_t* uStr, size_t uLen, size_t &lLen) {
189 PRECONDITION(instance);
190 if (uStr == nullptr)
191 return nullptr;
192
193 iconv(instance->converter->localer(), nullptr, nullptr, nullptr, nullptr);
194
195 size_t lSize = 4 * uLen;
196 char* lStr = new char[lSize + 1];
197 #ifdef __NetBSD__
198 const
199 #endif
200 char* inbuf = (char *) uStr;
201 char* outbuf = lStr;
202 size_t inlen = uLen * sizeof(wchar_t);
203 size_t outlen = lSize;
204
205 errno = 0;
206 size_t count = iconv(instance->converter->localer(),
207 &inbuf, &inlen, &outbuf, &outlen);
208 if (count == size_t(-1)) {
209 static unsigned count, shift;
210 if (++count <= 2 || (count - 2) >= (1U << shift)) {
211 ++shift;
212 warn("Invalid unicode string: %s (%zd/%u)",
213 strerror(errno), ((wchar_t*)inbuf - uStr), *inbuf);
214 }
215 }
216
217 *outbuf = '\0';
218 lLen = outbuf - lStr;
219
220 return lStr;
221 }
222
unicodeString(const char * lStr,size_t const lLen,size_t & uLen)223 wchar_t* YLocale::unicodeString(const char* lStr, size_t const lLen,
224 size_t& uLen)
225 {
226 PRECONDITION(instance);
227 if (lStr == nullptr)
228 return nullptr;
229
230 iconv(instance->converter->unicode(), nullptr, nullptr, nullptr, nullptr);
231
232 wchar_t* uStr(new wchar_t[lLen + 1]);
233 #ifdef __NetBSD__
234 const
235 #endif
236 char* inbuf(const_cast<char *>(lStr));
237 char* outbuf(reinterpret_cast<char *>(uStr));
238 size_t inlen(lLen), outlen(4 * lLen);
239
240 errno = 0;
241 size_t count = iconv(instance->converter->unicode(),
242 &inbuf, &inlen, &outbuf, &outlen);
243 if (count == size_t(-1)) {
244 static unsigned count, shift;
245 if (++count >= (1U << shift)) {
246 ++shift;
247 warn(_("Invalid multibyte string \"%s\": %s"), lStr, strerror(errno));
248 }
249 }
250
251 *(reinterpret_cast<wchar_t *>(outbuf)) = 0;
252 uLen = reinterpret_cast<wchar_t *>(outbuf) - uStr;
253
254 return uStr;
255 }
256 #else
257
wideCharString(const char * str,size_t len,size_t & out)258 wchar_t* YLocale::wideCharString(const char* str, size_t len, size_t& out) {
259 wchar_t* text = new wchar_t[len + 1];
260 size_t count = 0;
261 mbtowc(nullptr, nullptr, size_t(0));
262 for (size_t i = 0; i < len; ++i) {
263 int k = mbtowc(&text[count], str + i, len - i);
264 if (k < 1) {
265 i++;
266 } else {
267 i += k;
268 count++;
269 }
270 }
271 text[count] = 0;
272 out = count;
273 return text;
274 }
275 #endif
276
narrowString(const wchar_t * uStr,size_t uLen,size_t & lLen)277 char* YLocale::narrowString(const wchar_t* uStr, size_t uLen, size_t& lLen) {
278 PRECONDITION(instance);
279 if (uStr == nullptr || uLen == 0) {
280 lLen = 0;
281 return nullptr;
282 }
283
284 size_t size = 4 + 3 * uLen / 2;
285 char* dest = new char[size + 1];
286 size_t done;
287
288 for (;;) {
289 const wchar_t* ptr = uStr;
290 mbstate_t state;
291 memset(&state, 0, sizeof(mbstate_t));
292 done = wcsrtombs(dest, &ptr, size, &state);
293 if (done == size_t(-1)) {
294 done = (ptr > uStr) ? ptr - uStr : 0;
295 if (done + 4 >= size) {
296 delete[] dest;
297 size = 4 + 3 * size / 2;
298 dest = new char[size + 1];
299 } else {
300 break;
301 }
302 } else {
303 break;
304 }
305 }
306
307 if (done == 0) {
308 delete[] dest;
309 dest = nullptr;
310 }
311 else if (2 * done < size && 30 < size) {
312 char* copy = new char[done + 1];
313 memcpy(copy, dest, done);
314 copy[done] = '\0';
315 delete[] dest;
316 dest = copy;
317 } else {
318 dest[done] = '\0';
319 }
320
321 lLen = done;
322 return dest;
323 }
324
getLocaleName()325 const char *YLocale::getLocaleName() {
326 #ifdef CONFIG_I18N
327 return instance->converter->localeName();
328 #else
329 return "C";
330 #endif
331 }
332
getRating(const char * localeStr)333 int YLocale::getRating(const char *localeStr) {
334 const char *s1 = getLocaleName();
335 const char *s2 = localeStr;
336 int i = 0;
337 while (s1[i] && s1[i] == s2[i])
338 i++;
339 if (s1[i]) {
340 while (i && strchr("_@.", s2[i - 1]))
341 i--;
342 }
343 return i;
344 }
345
getDirection()346 void YLocale::getDirection() {
347 #ifdef CONFIG_I18N
348 using namespace ASCII;
349 const char* loc = converter ? converter->localeName() : "C";
350 if (loc && isLower(*loc) && isLower(loc[1]) && !isAlpha(loc[2])) {
351 const char rtls[][4] = {
352 "ar", // arabic
353 "fa", // farsi
354 "he", // hebrew
355 "ps", // pashto
356 "sd", // sindhi
357 "ur", // urdu
358 };
359 for (auto rtl : rtls) {
360 if (rtl[0] == loc[0] && rtl[1] == loc[1]) {
361 rightToLeft = true;
362 break;
363 }
364 }
365 }
366 #endif
367 }
368
369 // vim: set sw=4 ts=4 et:
370