1 /**********************************************************************
2 Freeciv - Copyright (C) 2003-2004 - The Freeciv Project
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2, or (at your option)
6 any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12 ***********************************************************************/
13
14 #ifdef HAVE_CONFIG_H
15 #include <fc_config.h>
16 #endif
17
18 #include <errno.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <string.h>
22
23 #ifdef HAVE_ICONV
24 #include <iconv.h>
25 #endif
26
27 #ifdef HAVE_LANGINFO_CODESET
28 #include <langinfo.h>
29 #endif
30
31 #ifdef HAVE_LIBCHARSET
32 #include <libcharset.h>
33 #endif
34
35 /* utility */
36 #include "fciconv.h"
37 #include "fcintl.h"
38 #include "log.h"
39 #include "mem.h"
40 #include "support.h"
41
42 static bool is_init = FALSE;
43 static char convert_buffer[4096];
44 static const char *transliteration_string;
45
46 #ifdef HAVE_ICONV
47 static const char *local_encoding, *data_encoding, *internal_encoding;
48 #else /* HAVE_ICONV */
49 /* Hack to confuse the compiler into working. */
50 # define local_encoding get_local_encoding()
51 # define data_encoding get_local_encoding()
52 # define internal_encoding get_local_encoding()
53 #endif /* HAVE_ICONV */
54
55 /***************************************************************************
56 Must be called during the initialization phase of server and client to
57 initialize the character encodings to be used.
58
59 Pass an internal encoding of NULL to use the local encoding internally.
60 ***************************************************************************/
init_character_encodings(const char * my_internal_encoding,bool my_use_transliteration)61 void init_character_encodings(const char *my_internal_encoding,
62 bool my_use_transliteration)
63 {
64 transliteration_string = "";
65 #ifdef HAVE_ICONV
66 if (my_use_transliteration) {
67 transliteration_string = "//TRANSLIT";
68 }
69
70 /* Set the data encoding - first check $FREECIV_DATA_ENCODING,
71 * then fall back to the default. */
72 data_encoding = getenv("FREECIV_DATA_ENCODING");
73 if (!data_encoding) {
74 data_encoding = FC_DEFAULT_DATA_ENCODING;
75 }
76
77 /* Set the local encoding - first check $FREECIV_LOCAL_ENCODING,
78 * then ask the system. */
79 local_encoding = getenv("FREECIV_LOCAL_ENCODING");
80 if (!local_encoding) {
81 #ifdef HAVE_LIBCHARSET
82 local_encoding = locale_charset();
83 #else /* HAVE_LIBCHARSET */
84 #ifdef HAVE_LANGINFO_CODESET
85 local_encoding = nl_langinfo(CODESET);
86 #else /* HAVE_LANGINFO_CODESET */
87 local_encoding = "";
88 #endif /* HAVE_LANGINFO_CODESET */
89 #endif /* HAVE_LIBCHARSET */
90 if (fc_strcasecmp(local_encoding, "ANSI_X3.4-1968") == 0
91 || fc_strcasecmp(local_encoding, "ASCII") == 0
92 || fc_strcasecmp(local_encoding, "US-ASCII") == 0) {
93 /* HACK: use latin1 instead of ascii in typical cases when the
94 * encoding is unconfigured. */
95 local_encoding = "ISO-8859-1";
96 }
97
98 if (fc_strcasecmp(local_encoding, "646") == 0) {
99 /* HACK: On Solaris the encoding always comes up as "646" (ascii),
100 * which iconv doesn't understand. Work around it by using UTF-8
101 * instead. */
102 local_encoding = "UTF-8";
103 }
104 }
105
106 /* Set the internal encoding - first check $FREECIV_INTERNAL_ENCODING,
107 * then check the passed-in default value, then fall back to the local
108 * encoding. */
109 internal_encoding = getenv("FREECIV_INTERNAL_ENCODING");
110 if (!internal_encoding) {
111 internal_encoding = my_internal_encoding;
112
113 if (!internal_encoding) {
114 internal_encoding = local_encoding;
115 }
116 }
117
118 #ifdef ENABLE_NLS
119 bind_textdomain_codeset(PACKAGE, internal_encoding);
120 #endif
121
122 #ifdef DEBUG
123 fprintf(stderr, "Encodings: Data=%s, Local=%s, Internal=%s\n",
124 data_encoding, local_encoding, internal_encoding);
125 #endif /* DEBUG */
126
127 #else /* HAVE_ICONV */
128 /* log_* may not work at this point. */
129 fprintf(stderr,
130 _("You are running Freeciv without using iconv. Unless\n"
131 "you are using the UTF-8 character set, some characters\n"
132 "may not be displayed properly. You can download iconv\n"
133 "at http://gnu.org/.\n"));
134 #endif /* HAVE_ICONV */
135
136 is_init = TRUE;
137 }
138
139 /***************************************************************************
140 Return the data encoding (usually UTF-8).
141 ***************************************************************************/
get_data_encoding(void)142 const char *get_data_encoding(void)
143 {
144 fc_assert_ret_val(is_init, NULL);
145 return data_encoding;
146 }
147
148 /***************************************************************************
149 Return the local encoding (dependent on the system).
150 ***************************************************************************/
get_local_encoding(void)151 const char *get_local_encoding(void)
152 {
153 #ifdef HAVE_ICONV
154 fc_assert_ret_val(is_init, NULL);
155 return local_encoding;
156 #else /* HAVE_ICONV */
157 # ifdef HAVE_LIBCHARSET
158 return locale_charset();
159 # else /* HAVE_LIBCHARSET */
160 # ifdef HAVE_LANGINFO_CODESET
161 return nl_langinfo(CODESET);
162 # else /* HAVE_LANGINFO_CODESET */
163 return "";
164 # endif /* HAVE_LANGINFO_CODESET */
165 # endif /* HAVE_LIBCHARSET */
166 #endif /* HAVE_ICONV */
167 }
168
169 /***************************************************************************
170 Return the internal encoding. This depends on the server or GUI being
171 used.
172 ***************************************************************************/
get_internal_encoding(void)173 const char *get_internal_encoding(void)
174 {
175 fc_assert_ret_val(is_init, NULL);
176 return internal_encoding;
177 }
178
179 /***************************************************************************
180 Convert the text. Both 'from' and 'to' must be 8-bit charsets. The
181 result will be put into the buf buffer unless it is NULL, in which case it
182 will be allocated on demand.
183
184 Don't use this function if you can avoid it. Use one of the
185 xxx_to_yyy_string functions.
186 ***************************************************************************/
convert_string(const char * text,const char * from,const char * to,char * buf,size_t bufsz)187 char *convert_string(const char *text,
188 const char *from,
189 const char *to,
190 char *buf, size_t bufsz)
191 {
192 #ifdef HAVE_ICONV
193 iconv_t cd = iconv_open(to, from);
194 size_t from_len = strlen(text) + 1, to_len;
195 bool alloc = (buf == NULL);
196
197 fc_assert_ret_val(is_init && NULL != from && NULL != to, NULL);
198 fc_assert_ret_val(NULL != text, NULL);
199
200 if (cd == (iconv_t) (-1)) {
201 /* Do not do potentially recursive call to freeciv logging here,
202 * but use fprintf(stderr) */
203 /* Use the real OS-provided strerror and errno rather than Freeciv's
204 * abstraction, as that wouldn't do the correct thing with third-party
205 * iconv on Windows */
206
207 /* TRANS: "Could not convert text from <encoding a> to <encoding b>:"
208 * <externally translated error string>."*/
209 fprintf(stderr, _("Could not convert text from %s to %s: %s.\n"),
210 from, to, strerror(errno));
211 /* The best we can do? */
212 if (alloc) {
213 return fc_strdup(text);
214 } else {
215 fc_snprintf(buf, bufsz, "%s", text);
216 return buf;
217 }
218 }
219
220 if (alloc) {
221 to_len = from_len;
222 } else {
223 to_len = bufsz;
224 }
225
226 do {
227 size_t flen = from_len, tlen = to_len, res;
228 const char *mytext = text;
229 char *myresult;
230
231 if (alloc) {
232 buf = fc_malloc(to_len);
233 }
234
235 myresult = buf;
236
237 /* Since we may do multiple translations, we may need to reset iconv
238 * in between. */
239 iconv(cd, NULL, NULL, NULL, NULL);
240
241 res = iconv(cd, (ICONV_CONST char **)&mytext, &flen, &myresult, &tlen);
242 if (res == (size_t) (-1)) {
243 if (errno != E2BIG) {
244 /* Invalid input. */
245
246 fprintf(stderr, "Invalid string conversion from %s to %s: %s.\n",
247 from, to, strerror(errno));
248 iconv_close(cd);
249 if (alloc) {
250 free(buf);
251 return fc_strdup(text); /* The best we can do? */
252 } else {
253 fc_snprintf(buf, bufsz, "%s", text);
254 return buf;
255 }
256 }
257 } else {
258 /* Success. */
259 iconv_close(cd);
260
261 /* There may be wasted space here, but there's nothing we can do
262 * about it. */
263 return buf;
264 }
265
266 if (alloc) {
267 /* Not enough space; try again. */
268 buf[to_len - 1] = 0;
269
270 free(buf);
271 to_len *= 2;
272 }
273 } while (alloc);
274
275 return buf;
276 #else /* HAVE_ICONV */
277 if (buf) {
278 strncpy(buf, text, bufsz);
279 buf[bufsz - 1] = '\0';
280 return buf;
281 } else {
282 return fc_strdup(text);
283 }
284 #endif /* HAVE_ICONV */
285 }
286
287 #define CONV_FUNC_MALLOC(src, dst) \
288 char *src ## _to_ ## dst ## _string_malloc(const char *text) \
289 { \
290 const char *encoding1 = (dst ## _encoding); \
291 char encoding[strlen(encoding1) + strlen(transliteration_string) + 1]; \
292 \
293 fc_snprintf(encoding, sizeof(encoding), \
294 "%s%s", encoding1, transliteration_string); \
295 return convert_string(text, (src ## _encoding), \
296 (encoding), NULL, 0); \
297 }
298
299 #define CONV_FUNC_BUFFER(src, dst) \
300 char *src ## _to_ ## dst ## _string_buffer(const char *text, \
301 char *buf, size_t bufsz) \
302 { \
303 const char *encoding1 = (dst ## _encoding); \
304 char encoding[strlen(encoding1) + strlen(transliteration_string) + 1]; \
305 \
306 fc_snprintf(encoding, sizeof(encoding), \
307 "%s%s", encoding1, transliteration_string); \
308 return convert_string(text, (src ## _encoding), \
309 encoding, buf, bufsz); \
310 }
311
312 #define CONV_FUNC_STATIC(src, dst) \
313 char *src ## _to_ ## dst ## _string_static(const char *text) \
314 { \
315 (src ## _to_ ## dst ## _string_buffer)(text, \
316 convert_buffer, \
317 sizeof(convert_buffer)); \
318 return convert_buffer; \
319 }
320
CONV_FUNC_MALLOC(data,internal)321 CONV_FUNC_MALLOC(data, internal)
322 CONV_FUNC_MALLOC(internal, data)
323 CONV_FUNC_MALLOC(internal, local)
324 CONV_FUNC_MALLOC(local, internal)
325
326 CONV_FUNC_BUFFER(local, internal)
327 CONV_FUNC_BUFFER(internal, local)
328
329 static CONV_FUNC_STATIC(internal, local)
330
331 /***************************************************************************
332 Do a fprintf from the internal charset into the local charset.
333 ***************************************************************************/
334 void fc_fprintf(FILE *stream, const char *format, ...)
335 {
336 va_list ap;
337 char string[4096];
338 const char *output;
339 static bool recursion = FALSE;
340
341 /* The recursion variable is used to prevent a recursive loop. If
342 * an iconv conversion fails, then log_* will be called and an
343 * fc_fprintf will be done. But below we do another iconv conversion
344 * on the error messages, which is of course likely to fail also. */
345 if (recursion) {
346 return;
347 }
348
349 va_start(ap, format);
350 fc_vsnprintf(string, sizeof(string), format, ap);
351 va_end(ap);
352
353 recursion = TRUE;
354 if (is_init) {
355 output = internal_to_local_string_static(string);
356 } else {
357 output = string;
358 }
359 recursion = FALSE;
360
361 fputs(output, stream);
362 fflush(stream);
363 }
364
365 /****************************************************************************
366 Return the length, in *characters*, of the string. This can be used in
367 place of strlen in some places because it returns the number of characters
368 not the number of bytes (with multi-byte characters in UTF-8, the two
369 may not be the same).
370
371 Use of this function outside of GUI layout code is probably a hack. For
372 instance the demographics code uses it, but this should instead pass the
373 data directly to the GUI library for formatting.
374 ****************************************************************************/
get_internal_string_length(const char * text)375 size_t get_internal_string_length(const char *text)
376 {
377 int text2[(strlen(text) + 1)]; /* UCS-4 text */
378 int i;
379 int len = 0;
380
381 convert_string(text, internal_encoding, "UCS-4",
382 (char *)text2, sizeof(text2));
383 for (i = 0; ; i++) {
384 if (text2[i] == 0) {
385 return len;
386 }
387 if (text2[i] != 0x0000FEFF && text2[i] != 0xFFFE0000) {
388 /* Not BOM */
389 len++;
390 }
391 }
392 }
393