1 /*
2  *  IceWM - C++ wrapper for locale/unicode conversion
3  *  Copyright (C) 2001 The Authors of IceWM
4  *
5  *  Released under terms of the GNU Library General Public License
6  *
7  *  2001/07/21: Mathias Hasselmann <mathias.hasselmann@gmx.net>
8  *      - initial revision
9  */
10 
11 #include "config.h"
12 #include "ylocale.h"
13 #include "ascii.h"
14 #include "base.h"
15 #include "intl.h"
16 #include <string.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <wchar.h>
20 
21 #ifdef CONFIG_I18N
22 #include <errno.h>
23 #include <langinfo.h>
24 #include <locale.h>
25 #include <assert.h>
26 #include <X11/Xlib.h>
27 #include <iconv.h>
28 
29 const iconv_t invalid = iconv_t(-1);
30 
31 class YConverter {
32 public:
33     YConverter(const char* localeName);
34     ~YConverter();
35 
unicode() const36     iconv_t unicode() const { return toUnicode; }
localer() const37     iconv_t localer() const { return toLocale; }
localeName() const38     const char* localeName() const { return fLocaleName; }
codesetName() const39     const char* codesetName() const { return fCodeset; }
modifiers() const40     const char* modifiers() const { return fModifiers; }
41 
42 private:
43     void getConverters();
44     iconv_t getConverter(const char* from, const char**& to);
45     const char* getCodeset();
46 
47     iconv_t toUnicode;
48     iconv_t toLocale;
49     const char* fLocaleName;
50     const char* fModifiers;
51     const char* fCodeset;
52 };
53 
YConverter(const char * localeName)54 YConverter::YConverter(const char* localeName) :
55     toUnicode(invalid),
56     toLocale(invalid),
57     fLocaleName(setlocale(LC_ALL, localeName))
58 {
59     if ( !fLocaleName || !XSupportsLocale()) {
60         warn(_("Locale not supported by C library or Xlib. "
61                "Falling back to 'C' locale'."));
62         fLocaleName = setlocale(LC_ALL, "C");
63     }
64     fModifiers = XSetLocaleModifiers("");
65     fCodeset = getCodeset();
66 
67     MSG(("locale: %s, MB_CUR_MAX: %zd, codeset: %s, endian: %c",
68          fLocaleName, MB_CUR_MAX, fCodeset, little() ? 'l' : 'b'));
69 
70     getConverters();
71 }
72 
getCodeset()73 const char* YConverter::getCodeset() {
74     const char* codeset = nullptr;
75     int const codesetItems[] = {
76 #ifdef CONFIG_NL_CODESETS
77         CONFIG_NL_CODESETS
78 #else
79         CODESET,
80 #ifdef _NL_CTYPE_CODESET_NAME
81         _NL_CTYPE_CODESET_NAME,
82 #endif
83         0
84 #endif
85     };
86 
87     for (int i = 0; i + 1 < int ACOUNT(codesetItems); ++i) {
88         codeset = nl_langinfo(codesetItems[i]);
89         if (nonempty(codeset)) {
90             break;
91         }
92     }
93 
94     if (isEmpty(codeset)) {
95         warn(_("Failed to determinate the current locale's codeset. "
96                "Assuming ISO-8859-1.\n"));
97         codeset = "ISO-8859-1";
98     }
99     return codeset;
100 }
101 
getConverters()102 void YConverter::getConverters() {
103 
104     // #warning "this is getting way too complicated"
105 
106     const char* unicodeCharsets[] = {
107 #ifdef CONFIG_UNICODE_SET
108         CONFIG_UNICODE_SET,
109 #endif
110 //      "WCHAR_T//TRANSLIT",
111         (little() ? "UCS-4LE//TRANSLIT" : "UCS-4BE//TRANSLIT"),
112 //      "WCHAR_T",
113         (little() ? "UCS-4LE" : "UCS-4BE"),
114         "UCS-4//TRANSLIT",
115         "UCS-4",
116         nullptr
117     };
118 
119     const char* localeCharsets[] = {
120         cstrJoin(fCodeset, "//TRANSLIT", nullptr),
121         fCodeset,
122         nullptr
123     };
124 
125     const char** ucs(unicodeCharsets);
126     toUnicode = getConverter(localeCharsets[1], ucs);
127     if (toUnicode == invalid)
128         die(1, _("iconv doesn't supply (sufficient) "
129                  "%s to %s converters."), localeCharsets[1], "Unicode");
130 
131     MSG(("toUnicode converts from %s to %s", localeCharsets[1], *ucs));
132 
133     const char** lcs(localeCharsets);
134     toLocale = getConverter(*ucs, lcs);
135     if (toLocale == invalid)
136         die(1, _("iconv doesn't supply (sufficient) "
137                  "%s to %s converters."), "Unicode", localeCharsets[1]);
138 
139     MSG(("toLocale converts from %s to %s", *ucs, *lcs));
140 
141     delete[] localeCharsets[0];
142 }
143 
~YConverter()144 YConverter::~YConverter() {
145     iconv_close(toUnicode);
146     iconv_close(toLocale);
147 }
148 
getConverter(const char * from,const char ** & to)149 iconv_t YConverter::getConverter(const char* from, const char**& to) {
150     iconv_t ic;
151     do {
152         ic = iconv_open(*to, from);
153     } while (ic == invalid && *++to);
154     return ic;
155 }
156 
157 #endif
158 
159 YLocale* YLocale::instance;
160 
YLocale(const char * localeName)161 YLocale::YLocale(const char* localeName)
162     : converter(nullptr)
163     , rightToLeft(false)
164     , codesetUTF8(false)
165 {
166     if (instance == nullptr) {
167         instance = this;
168 #ifdef CONFIG_I18N
169         converter = new YConverter(localeName);
170         codesetUTF8 = (0 == strncmp(converter->codesetName(), "UTF-8", 5));
171 #endif
172         bindtextdomain(PACKAGE, LOCDIR);
173         textdomain(PACKAGE);
174         getDirection();
175     }
176 }
177 
~YLocale()178 YLocale::~YLocale() {
179     if (instance == this) {
180         instance = nullptr;
181 #ifdef CONFIG_I18N
182         delete converter;
183 #endif
184     }
185 }
186 
187 #ifdef CONFIG_I18N
localeString(const wchar_t * uStr,size_t uLen,size_t & lLen)188 char* YLocale::localeString(const wchar_t* uStr, size_t uLen, size_t &lLen) {
189     PRECONDITION(instance);
190     if (uStr == nullptr)
191         return nullptr;
192 
193     iconv(instance->converter->localer(), nullptr, nullptr, nullptr, nullptr);
194 
195     size_t lSize = 4 * uLen;
196     char* lStr = new char[lSize + 1];
197 #ifdef __NetBSD__
198     const
199 #endif
200     char* inbuf = (char *) uStr;
201     char* outbuf = lStr;
202     size_t inlen = uLen * sizeof(wchar_t);
203     size_t outlen = lSize;
204 
205     errno = 0;
206     size_t count = iconv(instance->converter->localer(),
207                          &inbuf, &inlen, &outbuf, &outlen);
208     if (count == size_t(-1)) {
209         static unsigned count, shift;
210         if (++count <= 2 || (count - 2) >= (1U << shift)) {
211             ++shift;
212             warn("Invalid unicode string: %s (%zd/%u)",
213                  strerror(errno), ((wchar_t*)inbuf - uStr), *inbuf);
214         }
215     }
216 
217     *outbuf = '\0';
218     lLen = outbuf - lStr;
219 
220     return lStr;
221 }
222 
unicodeString(const char * lStr,size_t const lLen,size_t & uLen)223 wchar_t* YLocale::unicodeString(const char* lStr, size_t const lLen,
224                                size_t& uLen)
225 {
226     PRECONDITION(instance);
227     if (lStr == nullptr)
228         return nullptr;
229 
230     iconv(instance->converter->unicode(), nullptr, nullptr, nullptr, nullptr);
231 
232     wchar_t* uStr(new wchar_t[lLen + 1]);
233 #ifdef __NetBSD__
234     const
235 #endif
236     char* inbuf(const_cast<char *>(lStr));
237     char* outbuf(reinterpret_cast<char *>(uStr));
238     size_t inlen(lLen), outlen(4 * lLen);
239 
240     errno = 0;
241     size_t count = iconv(instance->converter->unicode(),
242                          &inbuf, &inlen, &outbuf, &outlen);
243     if (count == size_t(-1)) {
244         static unsigned count, shift;
245         if (++count >= (1U << shift)) {
246             ++shift;
247             warn(_("Invalid multibyte string \"%s\": %s"), lStr, strerror(errno));
248         }
249     }
250 
251     *(reinterpret_cast<wchar_t *>(outbuf)) = 0;
252     uLen = reinterpret_cast<wchar_t *>(outbuf) - uStr;
253 
254     return uStr;
255 }
256 #else
257 
wideCharString(const char * str,size_t len,size_t & out)258 wchar_t* YLocale::wideCharString(const char* str, size_t len, size_t& out) {
259     wchar_t* text = new wchar_t[len + 1];
260     size_t count = 0;
261     mbtowc(nullptr, nullptr, size_t(0));
262     for (size_t i = 0; i < len; ++i) {
263         int k = mbtowc(&text[count], str + i, len - i);
264         if (k < 1) {
265             i++;
266         } else {
267             i += k;
268             count++;
269         }
270     }
271     text[count] = 0;
272     out = count;
273     return text;
274 }
275 #endif
276 
narrowString(const wchar_t * uStr,size_t uLen,size_t & lLen)277 char* YLocale::narrowString(const wchar_t* uStr, size_t uLen, size_t& lLen) {
278     PRECONDITION(instance);
279     if (uStr == nullptr || uLen == 0) {
280         lLen = 0;
281         return nullptr;
282     }
283 
284     size_t size = 4 + 3 * uLen / 2;
285     char* dest = new char[size + 1];
286     size_t done;
287 
288     for (;;) {
289         const wchar_t* ptr = uStr;
290         mbstate_t state;
291         memset(&state, 0, sizeof(mbstate_t));
292         done = wcsrtombs(dest, &ptr, size, &state);
293         if (done == size_t(-1)) {
294             done = (ptr > uStr) ? ptr - uStr : 0;
295             if (done + 4 >= size) {
296                 delete[] dest;
297                 size = 4 + 3 * size / 2;
298                 dest = new char[size + 1];
299             } else {
300                 break;
301             }
302         } else {
303             break;
304         }
305     }
306 
307     if (done == 0) {
308         delete[] dest;
309         dest = nullptr;
310     }
311     else if (2 * done < size && 30 < size) {
312         char* copy = new char[done + 1];
313         memcpy(copy, dest, done);
314         copy[done] = '\0';
315         delete[] dest;
316         dest = copy;
317     } else {
318         dest[done] = '\0';
319     }
320 
321     lLen = done;
322     return dest;
323 }
324 
getLocaleName()325 const char *YLocale::getLocaleName() {
326 #ifdef CONFIG_I18N
327     return instance->converter->localeName();
328 #else
329     return "C";
330 #endif
331 }
332 
getRating(const char * localeStr)333 int YLocale::getRating(const char *localeStr) {
334     const char *s1 = getLocaleName();
335     const char *s2 = localeStr;
336     int i = 0;
337     while (s1[i] && s1[i] == s2[i])
338         i++;
339     if (s1[i]) {
340         while (i && strchr("_@.", s2[i - 1]))
341             i--;
342     }
343     return i;
344 }
345 
getDirection()346 void YLocale::getDirection() {
347 #ifdef CONFIG_I18N
348     using namespace ASCII;
349     const char* loc = converter ? converter->localeName() : "C";
350     if (loc && isLower(*loc) && isLower(loc[1]) && !isAlpha(loc[2])) {
351         const char rtls[][4] = {
352             "ar",   // arabic
353             "fa",   // farsi
354             "he",   // hebrew
355             "ps",   // pashto
356             "sd",   // sindhi
357             "ur",   // urdu
358         };
359         for (auto rtl : rtls) {
360             if (rtl[0] == loc[0] && rtl[1] == loc[1]) {
361                 rightToLeft = true;
362                 break;
363             }
364         }
365     }
366 #endif
367 }
368 
369 // vim: set sw=4 ts=4 et:
370