1 /**********************************************************************
2  Freeciv - Copyright (C) 2003-2004 - The Freeciv Project
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; either version 2, or (at your option)
6    any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 ***********************************************************************/
13 #ifndef FC__FCICONV_H
14 #define FC__FCICONV_H
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif /* __cplusplus */
19 
20 #include <stdio.h>
21 
22 #include "shared.h"
23 
24 /*
25   Technical details:
26 
27   - There are three encodings used by freeciv: the data encoding, the
28     internal encoding, and the local encoding.  Each is a character set
29     (like utf-8 or latin1).  Each string in the code must be in one of these
30     three encodings; to cut down on bugs always document whenever you have
31     a string in anything other than the internal encoding and never make
32     global variables hold anything other than the internal encoding; the
33     local and data encodings should only be used locally within the code
34     and always documented as such.
35 
36   - The data_encoding is used in all data files and network transactions.
37     This is always UTF-8.
38 
39   - The internal_encoding is used internally within freeciv.  This is always
40     UTF-8 at the server, but can be configured by the GUI client.  The GTK2
41     client uses UTF-8 here but other clients will use whatever their GUI
42     library or platform requires.  By using the GUI encoding internally at
43     the client it allows us to pass any strings directly to the GUI without
44     needing to convert them.  The drawback is that we have to convert them
45     into the data encoding before sending them over the network (a likely
46     source of bugs).  Also, gettext is set up to always return strings in
47     the internal encoding.
48 
49   - The local_encoding is the one supported on the command line, which is
50     generally the value listed in the $LANG environment variable.  This is
51     not under freeciv control; all output to the command line must be
52     converted or it will not display correctly.
53 
54   Practical details:
55 
56   - Translation files are not controlled by freeciv iconv.  The .po files
57     can be in any character set, as set at the top of the file.
58 
59   - All translatable texts should be American English ASCII. In the past,
60     gettext documentation has always said to stick to ASCII for the gettext
61     input (pre-translated texts) and rely on translations to supply the
62     needed non-ASCII characters.
63 
64   - All other texts, including rulesets, nations, and code files must be in
65     UTF-8 (ASCII is a subset of UTF-8, and is fine for use here).
66 
67   - The server uses UTF-8 for everything; UTF-8 is the server's "internal
68     encoding".
69 
70   - Everything sent over the network is always in UTF-8.
71 
72   - Everything in the client is converted into the client's "internal
73     encoding" when it is received from the server.  Depending on which
74     GUI is used, this may be just about any character set.  Conversely when
75     sending strings from the client to the server they need to be converted
76     into the data encoding.  This should be done internally within the
77     network code.
78 
79   - Everything printed to the command line must be converted into the
80     "local encoding" which may be anything as defined by the system.  Using
81     fc_fprintf is generally the easiest way to print to the command line
82     in which case all strings passed to it should be in the internal
83     encoding.
84 
85   See PR#40028 in the old RT for additional explanation.
86 */
87 
88 #define FC_DEFAULT_DATA_ENCODING "UTF-8"
89 
90 void init_character_encodings(const char *internal_encoding,
91 			      bool use_transliteration);
92 
93 const char *get_data_encoding(void);
94 const char *get_local_encoding(void);
95 const char *get_internal_encoding(void);
96 
97 char *data_to_internal_string_malloc(const char *text);
98 char *internal_to_data_string_malloc(const char *text);
99 char *internal_to_local_string_malloc(const char *text);
100 char *local_to_internal_string_malloc(const char *text);
101 
102 char *local_to_internal_string_buffer(const char *text,
103 				      char *buf, size_t bufsz);
104 char *internal_to_local_string_buffer(const char *text,
105 				      char *buf, size_t bufsz);
106 
107 #define fc_printf(...) fc_fprintf(stdout, __VA_ARGS__)
108 void fc_fprintf(FILE *stream, const char *format, ...)
109       fc__attribute((__format__ (__printf__, 2, 3)));
110 
111 char *convert_string(const char *text,
112 		     const char *from,
113 		     const char *to,
114 		     char *buf, size_t bufsz);
115 
116 size_t get_internal_string_length(const char *text);
117 
118 #ifdef __cplusplus
119 }
120 #endif /* __cplusplus */
121 
122 #endif /* FC__FCICONV_H */
123