1 /**********************************************************************
2  Freeciv - Copyright (C) 2003-2004 - The Freeciv Project
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; either version 2, or (at your option)
6    any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 ***********************************************************************/
13 
14 #ifdef HAVE_CONFIG_H
15 #include <fc_config.h>
16 #endif
17 
18 #include <errno.h>
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <string.h>
22 
23 #ifdef HAVE_ICONV
24 #include <iconv.h>
25 #endif
26 
27 #ifdef HAVE_LANGINFO_CODESET
28 #include <langinfo.h>
29 #endif
30 
31 #ifdef HAVE_LIBCHARSET
32 #include <libcharset.h>
33 #endif
34 
35 /* utility */
36 #include "fciconv.h"
37 #include "fcintl.h"
38 #include "log.h"
39 #include "mem.h"
40 #include "support.h"
41 
42 static bool is_init = FALSE;
43 static char convert_buffer[4096];
44 static const char *transliteration_string;
45 
46 #ifdef HAVE_ICONV
47 static const char *local_encoding, *data_encoding, *internal_encoding;
48 #else  /* HAVE_ICONV */
49 /* Hack to confuse the compiler into working. */
50 #  define local_encoding get_local_encoding()
51 #  define data_encoding get_local_encoding()
52 #  define internal_encoding get_local_encoding()
53 #endif /* HAVE_ICONV */
54 
55 /***************************************************************************
56   Must be called during the initialization phase of server and client to
57   initialize the character encodings to be used.
58 
59   Pass an internal encoding of NULL to use the local encoding internally.
60 ***************************************************************************/
init_character_encodings(const char * my_internal_encoding,bool my_use_transliteration)61 void init_character_encodings(const char *my_internal_encoding,
62 			      bool my_use_transliteration)
63 {
64   transliteration_string = "";
65 #ifdef HAVE_ICONV
66   if (my_use_transliteration) {
67     transliteration_string = "//TRANSLIT";
68   }
69 
70   /* Set the data encoding - first check $FREECIV_DATA_ENCODING,
71    * then fall back to the default. */
72   data_encoding = getenv("FREECIV_DATA_ENCODING");
73   if (!data_encoding) {
74     data_encoding = FC_DEFAULT_DATA_ENCODING;
75   }
76 
77   /* Set the local encoding - first check $FREECIV_LOCAL_ENCODING,
78    * then ask the system. */
79   local_encoding = getenv("FREECIV_LOCAL_ENCODING");
80   if (!local_encoding) {
81 #ifdef HAVE_LIBCHARSET
82     local_encoding = locale_charset();
83 #else  /* HAVE_LIBCHARSET */
84 #ifdef HAVE_LANGINFO_CODESET
85     local_encoding = nl_langinfo(CODESET);
86 #else  /* HAVE_LANGINFO_CODESET */
87     local_encoding = "";
88 #endif /* HAVE_LANGINFO_CODESET */
89 #endif /* HAVE_LIBCHARSET */
90     if (fc_strcasecmp(local_encoding, "ANSI_X3.4-1968") == 0
91         || fc_strcasecmp(local_encoding, "ASCII") == 0
92         || fc_strcasecmp(local_encoding, "US-ASCII") == 0) {
93       /* HACK: use latin1 instead of ascii in typical cases when the
94        * encoding is unconfigured. */
95       local_encoding = "ISO-8859-1";
96     }
97 
98     if (fc_strcasecmp(local_encoding, "646") == 0) {
99       /* HACK: On Solaris the encoding always comes up as "646" (ascii),
100        * which iconv doesn't understand.  Work around it by using UTF-8
101        * instead. */
102       local_encoding = "UTF-8";
103     }
104   }
105 
106   /* Set the internal encoding - first check $FREECIV_INTERNAL_ENCODING,
107    * then check the passed-in default value, then fall back to the local
108    * encoding. */
109   internal_encoding = getenv("FREECIV_INTERNAL_ENCODING");
110   if (!internal_encoding) {
111     internal_encoding = my_internal_encoding;
112 
113     if (!internal_encoding) {
114       internal_encoding = local_encoding;
115     }
116   }
117 
118 #ifdef ENABLE_NLS
119   bind_textdomain_codeset(PACKAGE, internal_encoding);
120 #endif
121 
122 #ifdef DEBUG
123   fprintf(stderr, "Encodings: Data=%s, Local=%s, Internal=%s\n",
124           data_encoding, local_encoding, internal_encoding);
125 #endif /* DEBUG */
126 
127 #else  /* HAVE_ICONV */
128    /* log_* may not work at this point. */
129   fprintf(stderr,
130           _("You are running Freeciv without using iconv. Unless\n"
131             "you are using the UTF-8 character set, some characters\n"
132             "may not be displayed properly. You can download iconv\n"
133             "at http://gnu.org/.\n"));
134 #endif /* HAVE_ICONV */
135 
136   is_init = TRUE;
137 }
138 
139 /***************************************************************************
140   Return the data encoding (usually UTF-8).
141 ***************************************************************************/
get_data_encoding(void)142 const char *get_data_encoding(void)
143 {
144   fc_assert_ret_val(is_init, NULL);
145   return data_encoding;
146 }
147 
148 /***************************************************************************
149   Return the local encoding (dependent on the system).
150 ***************************************************************************/
get_local_encoding(void)151 const char *get_local_encoding(void)
152 {
153 #ifdef HAVE_ICONV
154   fc_assert_ret_val(is_init, NULL);
155   return local_encoding;
156 #else  /* HAVE_ICONV */
157 #  ifdef HAVE_LIBCHARSET
158   return locale_charset();
159 #  else  /* HAVE_LIBCHARSET */
160 #    ifdef HAVE_LANGINFO_CODESET
161   return nl_langinfo(CODESET);
162 #    else  /* HAVE_LANGINFO_CODESET */
163   return "";
164 #    endif /* HAVE_LANGINFO_CODESET */
165 #  endif /* HAVE_LIBCHARSET */
166 #endif /* HAVE_ICONV */
167 }
168 
169 /***************************************************************************
170   Return the internal encoding.  This depends on the server or GUI being
171   used.
172 ***************************************************************************/
get_internal_encoding(void)173 const char *get_internal_encoding(void)
174 {
175   fc_assert_ret_val(is_init, NULL);
176   return internal_encoding;
177 }
178 
179 /***************************************************************************
180   Convert the text.  Both 'from' and 'to' must be 8-bit charsets.  The
181   result will be put into the buf buffer unless it is NULL, in which case it
182   will be allocated on demand.
183 
184   Don't use this function if you can avoid it.  Use one of the
185   xxx_to_yyy_string functions.
186 ***************************************************************************/
convert_string(const char * text,const char * from,const char * to,char * buf,size_t bufsz)187 char *convert_string(const char *text,
188 		     const char *from,
189 		     const char *to,
190 		     char *buf, size_t bufsz)
191 {
192 #ifdef HAVE_ICONV
193   iconv_t cd = iconv_open(to, from);
194   size_t from_len = strlen(text) + 1, to_len;
195   bool alloc = (buf == NULL);
196 
197   fc_assert_ret_val(is_init && NULL != from && NULL != to, NULL);
198   fc_assert_ret_val(NULL != text, NULL);
199 
200   if (cd == (iconv_t) (-1)) {
201     /* Do not do potentially recursive call to freeciv logging here,
202      * but use fprintf(stderr) */
203     /* Use the real OS-provided strerror and errno rather than Freeciv's
204      * abstraction, as that wouldn't do the correct thing with third-party
205      * iconv on Windows */
206 
207     /* TRANS: "Could not convert text from <encoding a> to <encoding b>:"
208      *        <externally translated error string>."*/
209     fprintf(stderr, _("Could not convert text from %s to %s: %s.\n"),
210             from, to, strerror(errno));
211     /* The best we can do? */
212     if (alloc) {
213       return fc_strdup(text);
214     } else {
215       fc_snprintf(buf, bufsz, "%s", text);
216       return buf;
217     }
218   }
219 
220   if (alloc) {
221     to_len = from_len;
222   } else {
223     to_len = bufsz;
224   }
225 
226   do {
227     size_t flen = from_len, tlen = to_len, res;
228     const char *mytext = text;
229     char *myresult;
230 
231     if (alloc) {
232       buf = fc_malloc(to_len);
233     }
234 
235     myresult = buf;
236 
237     /* Since we may do multiple translations, we may need to reset iconv
238      * in between. */
239     iconv(cd, NULL, NULL, NULL, NULL);
240 
241     res = iconv(cd, (ICONV_CONST char **)&mytext, &flen, &myresult, &tlen);
242     if (res == (size_t) (-1)) {
243       if (errno != E2BIG) {
244         /* Invalid input. */
245 
246         fprintf(stderr, "Invalid string conversion from %s to %s: %s.\n",
247                 from, to, strerror(errno));
248         iconv_close(cd);
249         if (alloc) {
250           free(buf);
251           return fc_strdup(text); /* The best we can do? */
252         } else {
253           fc_snprintf(buf, bufsz, "%s", text);
254           return buf;
255         }
256       }
257     } else {
258       /* Success. */
259       iconv_close(cd);
260 
261       /* There may be wasted space here, but there's nothing we can do
262        * about it. */
263       return buf;
264     }
265 
266     if (alloc) {
267       /* Not enough space; try again. */
268       buf[to_len - 1] = 0;
269 
270       free(buf);
271       to_len *= 2;
272     }
273   } while (alloc);
274 
275   return buf;
276 #else /* HAVE_ICONV */
277   if (buf) {
278     strncpy(buf, text, bufsz);
279     buf[bufsz - 1] = '\0';
280     return buf;
281   } else {
282     return fc_strdup(text);
283   }
284 #endif /* HAVE_ICONV */
285 }
286 
287 #define CONV_FUNC_MALLOC(src, dst)                                          \
288 char *src ## _to_ ## dst ## _string_malloc(const char *text)                \
289 {                                                                           \
290   const char *encoding1 = (dst ## _encoding);				    \
291   char encoding[strlen(encoding1) + strlen(transliteration_string) + 1];    \
292 									    \
293   fc_snprintf(encoding, sizeof(encoding),				    \
294 	      "%s%s", encoding1, transliteration_string);		    \
295   return convert_string(text, (src ## _encoding),			    \
296 			(encoding), NULL, 0);				    \
297 }
298 
299 #define CONV_FUNC_BUFFER(src, dst)                                          \
300 char *src ## _to_ ## dst ## _string_buffer(const char *text,                \
301 					   char *buf, size_t bufsz)         \
302 {                                                                           \
303   const char *encoding1 = (dst ## _encoding);				    \
304   char encoding[strlen(encoding1) + strlen(transliteration_string) + 1];    \
305 									    \
306   fc_snprintf(encoding, sizeof(encoding),				    \
307 	      "%s%s", encoding1, transliteration_string);		    \
308   return convert_string(text, (src ## _encoding),			    \
309                         encoding, buf, bufsz);				    \
310 }
311 
312 #define CONV_FUNC_STATIC(src, dst)                                          \
313 char *src ## _to_ ## dst ## _string_static(const char *text)                \
314 {                                                                           \
315   (src ## _to_ ## dst ## _string_buffer)(text,                              \
316 					convert_buffer,                     \
317 					sizeof(convert_buffer));            \
318   return convert_buffer;                                                    \
319 }
320 
CONV_FUNC_MALLOC(data,internal)321 CONV_FUNC_MALLOC(data, internal)
322 CONV_FUNC_MALLOC(internal, data)
323 CONV_FUNC_MALLOC(internal, local)
324 CONV_FUNC_MALLOC(local, internal)
325 
326 CONV_FUNC_BUFFER(local, internal)
327 CONV_FUNC_BUFFER(internal, local)
328 
329 static CONV_FUNC_STATIC(internal, local)
330 
331 /***************************************************************************
332   Do a fprintf from the internal charset into the local charset.
333 ***************************************************************************/
334 void fc_fprintf(FILE *stream, const char *format, ...)
335 {
336   va_list ap;
337   char string[4096];
338   const char *output;
339   static bool recursion = FALSE;
340 
341   /* The recursion variable is used to prevent a recursive loop.  If
342    * an iconv conversion fails, then log_* will be called and an
343    * fc_fprintf will be done.  But below we do another iconv conversion
344    * on the error messages, which is of course likely to fail also. */
345   if (recursion) {
346     return;
347   }
348 
349   va_start(ap, format);
350   fc_vsnprintf(string, sizeof(string), format, ap);
351   va_end(ap);
352 
353   recursion = TRUE;
354   if (is_init) {
355     output = internal_to_local_string_static(string);
356   } else {
357     output = string;
358   }
359   recursion = FALSE;
360 
361   fputs(output, stream);
362   fflush(stream);
363 }
364 
365 /****************************************************************************
366   Return the length, in *characters*, of the string.  This can be used in
367   place of strlen in some places because it returns the number of characters
368   not the number of bytes (with multi-byte characters in UTF-8, the two
369   may not be the same).
370 
371   Use of this function outside of GUI layout code is probably a hack.  For
372   instance the demographics code uses it, but this should instead pass the
373   data directly to the GUI library for formatting.
374 ****************************************************************************/
get_internal_string_length(const char * text)375 size_t get_internal_string_length(const char *text)
376 {
377   int text2[(strlen(text) + 1)]; /* UCS-4 text */
378   int i;
379   int len = 0;
380 
381   convert_string(text, internal_encoding, "UCS-4",
382                  (char *)text2, sizeof(text2));
383   for (i = 0; ; i++) {
384     if (text2[i] == 0) {
385       return len;
386     }
387     if (text2[i] != 0x0000FEFF && text2[i] != 0xFFFE0000) {
388       /* Not BOM */
389       len++;
390     }
391   }
392 }
393