1 /*
2 Copyright (c) 2001-2002 Perry Rapp
3 "The MIT license"
4 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7 */
8 /*=============================================================
9 * icvt.c -- Module that handles iconv calls
10 *==============================================================*/
11
12 #include "llstdlib.h" /* includes standard.h, sys_inc.h, llnls.h, config.h */
13 #include <errno.h>
14 #ifdef HAVE_ICONV
15 # ifdef WIN32_ICONV_SHIM
16 # include "mswin/iconvshim.h"
17 # else
18 # include <iconv.h>
19 # endif
20 #endif
21 /* wcslen may be declared in "arch.h" or <wchar.h> */
22 #include "arch.h"
23 #ifdef HAVE_WCHAR_H
24 #include <wchar.h>
25 #endif
26 #include "zstr.h"
27 #include "icvt.h"
28
29
30 /*===================================================
31 * iconv_can_trans -- Can iconv do this translation ?
32 *=================================================*/
33 BOOLEAN
iconv_can_trans(CNSTRING src,CNSTRING dest)34 iconv_can_trans (CNSTRING src, CNSTRING dest)
35 {
36 #ifdef HAVE_ICONV
37 iconv_t ict;
38
39 ict = iconv_open(dest, src);
40 if (ict == (iconv_t)-1)
41 return FALSE;
42 iconv_close(ict);
43 return TRUE;
44 #else
45 src=src; /* unused */
46 dest=dest; /* unused */
47 return FALSE;
48 #endif
49 }
50 /*===================================================
51 * iconv_trans -- Translate string via iconv
52 * src: [IN] source codeset
53 * dest: [IN] string to translate (& delete)
54 * sin: [IN] source string to be converted
55 * zout: [I/O] converted result
56 * illegal: [IN] character to use as placeholder for unconvertible input
57 *=================================================*/
58 BOOLEAN
iconv_trans(CNSTRING src,CNSTRING dest,CNSTRING sin,ZSTR zout,char illegal)59 iconv_trans (CNSTRING src, CNSTRING dest, CNSTRING sin, ZSTR zout, char illegal)
60 {
61 #ifdef HAVE_ICONV
62 iconv_t ict;
63 const char * inptr;
64 char * outptr;
65 size_t inleft;
66 size_t outleft;
67 size_t cvted;
68 #ifdef ICONV_SET_TRANSLITERATE
69 int transliterate=2;
70 #endif
71 double expand=1.3;
72 int chwidth=1;
73 int badchars=0; /* count # illegal placeholders inserted */
74 int inlen = sin ? strlen(sin) : 0;
75
76 ASSERT(src);
77 ASSERT(dest);
78
79 ict = iconv_open(dest, src);
80
81 if (ict == (iconv_t)-1) {
82 return FALSE;
83 }
84 if (!strncmp(src, "UCS-2", strlen("UCS-2"))) {
85 /* assume MS-Windows makenarrow call */
86 inlen = 2 * wcslen((const wchar_t *)sin);
87 }
88 if (!strncmp(src, "UCS-4", strlen("UCS-4"))) {
89 /* assume UNIX makenarrow call */
90 inlen = 4 * wcslen((const wchar_t *)sin);
91 }
92 if (!strncmp(dest, "UCS-2", strlen("UCS-2"))) {
93 chwidth = expand = 2;
94 }
95 if (!strncmp(dest, "UCS-4", strlen("UCS-4"))) {
96 chwidth = expand = 4;
97 }
98 if (eqstr(dest, "wchar_t")) {
99 chwidth = expand = sizeof(wchar_t);
100
101 }
102 /* TODO: What about UTF-16 or UTF-32 ? */
103
104 zs_reserve(zout, (unsigned int)(inlen*expand+6));
105
106 if (!inlen) {
107 outptr = zs_str(zout);
108 goto icvt_terminate_and_exit;
109 }
110
111 /* testing recursive transliteration in my private iconv, Perry, 2002.07.11 */
112 #ifdef ICONV_SET_TRANSLITERATE
113 iconvctl(ict, ICONV_SET_TRANSLITERATE, &transliterate);
114 #endif
115
116
117 inptr = sin;
118 outptr = zs_str(zout);
119 inleft = inlen;
120 /* we are terminating with 4 zero bytes just in case dest is UCS-4 */
121 outleft = zs_allocsize(zout)-zs_len(zout)-4;
122 cvted = 0;
123
124 cvting:
125 /* main convert */
126 cvted = iconv (ict, &inptr, &inleft, &outptr, &outleft);
127
128 /* zero terminate & fix output zstring */
129 /* there may be embedded nulls, if UCS-2/4 is target! */
130 *outptr=0;
131 zs_set_len(zout, outptr-zs_str(zout));
132
133 /* handle error cases */
134 if (cvted == (size_t)-1) {
135 /* errno is not reliable, because on MS-Windows we called
136 iconv in a dll & didn't get errno */
137 if (outleft<3) {
138 /* may be out of space, so grow & retry */
139 zs_reserve(zout, (unsigned int)(inleft * expand + 6 + zs_allocsize(zout)));
140 } else {
141 /* unconvertible input character */
142 /* append placeholder & skip over */
143 size_t wid = 1;
144 if (eqstr(src, "UTF-8")) {
145 wid = utf8len(*inptr);
146 }
147 if (wid > inleft)
148 wid = inleft;
149 inptr += wid;
150 inleft -= wid;
151 /* Following code is only correct for UCS-2LE, UCS-4LE */
152 if (chwidth == 2)
153 {
154 unsigned short * u = (unsigned short *)outptr;
155 *u = illegal;
156 outptr += sizeof(u);
157 }
158 else if (chwidth == 4)
159 {
160 unsigned int * u = (unsigned int *)outptr;
161 *u = illegal;
162 outptr += sizeof(u);
163 }
164 else
165 {
166 *outptr++ = illegal;
167 }
168 ++badchars;
169 zs_set_len(zout, outptr-zs_str(zout));
170 }
171 /* update output variables */
172 /* (may have reallocated, plus need to point to end */
173 outptr = zs_str(zout)+zs_len(zout);
174 outleft = zs_allocsize(zout)-zs_len(zout)-4;
175 if (inleft)
176 goto cvting;
177 }
178
179 icvt_terminate_and_exit:
180 /* zero-terminate with appropriately wide zero */
181 if (chwidth > 1) {
182 *outptr++=0;
183 if (chwidth > 2) {
184 *outptr++=0;
185 *outptr++=0;
186 }
187 }
188 *outptr=0;
189 zs_set_len(zout, outptr-zs_str(zout));
190
191 iconv_close(ict);
192 return TRUE;
193 #else
194 src=src; /* unused */
195 dest=dest; /* unused */
196 sin=sin; /* unused */
197 zout=zout; /* unused */
198 illegal=illegal; /* unused */
199 return FALSE;
200 #endif /* HAVE_ICONV */
201 }
202 /*===================================================
203 * init_win32_iconv_shim -- Helper for loading iconv.dll on win32
204 *=================================================*/
205 void
init_win32_iconv_shim(CNSTRING dllpath)206 init_win32_iconv_shim (CNSTRING dllpath)
207 {
208 #ifdef WIN32_ICONV_SHIM
209 if (dllpath && dllpath[0])
210 iconvshim_set_property("dll_path", dllpath);
211 #else
212 dllpath=dllpath; /* unused */
213 #endif
214 }
215