1 /*
2    Copyright (c) 2001-2002 Perry Rapp
3    "The MIT license"
4    Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5    The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7 */
8 /*=============================================================
9  * icvt.c -- Module that handles iconv calls
10  *==============================================================*/
11 
12 #include "llstdlib.h" /* includes standard.h, sys_inc.h, llnls.h, config.h */
13 #include <errno.h>
14 #ifdef HAVE_ICONV
15 # ifdef WIN32_ICONV_SHIM
16 #  include "mswin/iconvshim.h"
17 # else
18 #  include <iconv.h>
19 # endif
20 #endif
21 /* wcslen may be declared in "arch.h" or <wchar.h> */
22 #include "arch.h"
23 #ifdef HAVE_WCHAR_H
24 #include <wchar.h>
25 #endif
26 #include "zstr.h"
27 #include "icvt.h"
28 
29 
30 /*===================================================
31  * iconv_can_trans -- Can iconv do this translation ?
32  *=================================================*/
33 BOOLEAN
iconv_can_trans(CNSTRING src,CNSTRING dest)34 iconv_can_trans (CNSTRING src, CNSTRING dest)
35 {
36 #ifdef HAVE_ICONV
37 	iconv_t ict;
38 
39 	ict = iconv_open(dest, src);
40 	if (ict == (iconv_t)-1)
41 		return FALSE;
42     iconv_close(ict);
43 	return TRUE;
44 #else
45 	src=src; /* unused */
46 	dest=dest; /* unused */
47 	return FALSE;
48 #endif
49 }
50 /*===================================================
51  * iconv_trans -- Translate string via iconv
52  *  src:     [IN]  source codeset
53  *  dest:    [IN]  string to translate (& delete)
54  *  sin:     [IN]  source string to be converted
55  *  zout:    [I/O] converted result
56  *  illegal: [IN]  character to use as placeholder for unconvertible input
57  *=================================================*/
58 BOOLEAN
iconv_trans(CNSTRING src,CNSTRING dest,CNSTRING sin,ZSTR zout,char illegal)59 iconv_trans (CNSTRING src, CNSTRING dest, CNSTRING sin, ZSTR zout, char illegal)
60 {
61 #ifdef HAVE_ICONV
62 	iconv_t ict;
63 	const char * inptr;
64 	char * outptr;
65 	size_t inleft;
66 	size_t outleft;
67 	size_t cvted;
68 #ifdef ICONV_SET_TRANSLITERATE
69 	int transliterate=2;
70 #endif
71 	double expand=1.3;
72 	int chwidth=1;
73 	int badchars=0; /* count # illegal placeholders inserted */
74 	int inlen = sin ? strlen(sin) : 0;
75 
76 	ASSERT(src);
77 	ASSERT(dest);
78 
79 	ict = iconv_open(dest, src);
80 
81 	if (ict == (iconv_t)-1) {
82 		return FALSE;
83 	}
84 	if (!strncmp(src, "UCS-2", strlen("UCS-2"))) {
85 		/* assume MS-Windows makenarrow call */
86 		inlen = 2 * wcslen((const wchar_t *)sin);
87 	}
88 	if (!strncmp(src, "UCS-4", strlen("UCS-4"))) {
89 		/* assume UNIX makenarrow call */
90 		inlen = 4 * wcslen((const wchar_t *)sin);
91 	}
92 	if (!strncmp(dest, "UCS-2", strlen("UCS-2"))) {
93 		chwidth = expand = 2;
94 	}
95 	if (!strncmp(dest, "UCS-4", strlen("UCS-4"))) {
96 		chwidth = expand = 4;
97 	}
98 	if (eqstr(dest, "wchar_t")) {
99 		chwidth = expand = sizeof(wchar_t);
100 
101 	}
102 	/* TODO: What about UTF-16 or UTF-32 ? */
103 
104 	zs_reserve(zout, (unsigned int)(inlen*expand+6));
105 
106 	if (!inlen) {
107 		outptr = zs_str(zout);
108 		goto icvt_terminate_and_exit;
109 	}
110 
111 	/* testing recursive transliteration in my private iconv, Perry, 2002.07.11 */
112 #ifdef ICONV_SET_TRANSLITERATE
113 	iconvctl(ict, ICONV_SET_TRANSLITERATE, &transliterate);
114 #endif
115 
116 
117 	inptr = sin;
118 	outptr = zs_str(zout);
119 	inleft = inlen;
120 	/* we are terminating with 4 zero bytes just in case dest is UCS-4 */
121 	outleft = zs_allocsize(zout)-zs_len(zout)-4;
122 	cvted = 0;
123 
124 cvting:
125 	/* main convert */
126 	cvted = iconv (ict, &inptr, &inleft, &outptr, &outleft);
127 
128 	/* zero terminate & fix output zstring */
129 	/* there may be embedded nulls, if UCS-2/4 is target! */
130 	*outptr=0;
131 	zs_set_len(zout, outptr-zs_str(zout));
132 
133 	/* handle error cases */
134 	if (cvted == (size_t)-1) {
135 		/* errno is not reliable, because on MS-Windows we called
136 		iconv in a dll & didn't get errno */
137 		if (outleft<3) {
138 			/* may be out of space, so grow & retry */
139 			zs_reserve(zout, (unsigned int)(inleft * expand + 6 + zs_allocsize(zout)));
140 		} else {
141 			/* unconvertible input character */
142 			/* append placeholder & skip over */
143 			size_t wid = 1;
144 			if (eqstr(src, "UTF-8")) {
145 				wid = utf8len(*inptr);
146 			}
147 			if (wid > inleft)
148 				wid = inleft;
149 			inptr += wid;
150 			inleft -= wid;
151 			/* Following code is only correct for UCS-2LE, UCS-4LE */
152 			if (chwidth == 2)
153 			{
154 				unsigned short * u = (unsigned short *)outptr;
155 				*u = illegal;
156 				outptr += sizeof(u);
157 			}
158 			else if (chwidth == 4)
159 			{
160 				unsigned int * u = (unsigned int *)outptr;
161 				*u = illegal;
162 				outptr += sizeof(u);
163 			}
164 			else
165 			{
166 				*outptr++ = illegal;
167 			}
168 			++badchars;
169 			zs_set_len(zout, outptr-zs_str(zout));
170 		}
171 		/* update output variables */
172 		/* (may have reallocated, plus need to point to end */
173 		outptr = zs_str(zout)+zs_len(zout);
174 		outleft = zs_allocsize(zout)-zs_len(zout)-4;
175 		if (inleft)
176 			goto cvting;
177 	}
178 
179 icvt_terminate_and_exit:
180 	/* zero-terminate with appropriately wide zero */
181 	if (chwidth > 1) {
182 		*outptr++=0;
183 		if (chwidth > 2) {
184 			*outptr++=0;
185 			*outptr++=0;
186 		}
187 	}
188 	*outptr=0;
189 	zs_set_len(zout, outptr-zs_str(zout));
190 
191 	iconv_close(ict);
192 	return TRUE;
193 #else
194 	src=src; /* unused */
195 	dest=dest; /* unused */
196 	sin=sin; /* unused */
197 	zout=zout; /* unused */
198 	illegal=illegal; /* unused */
199 	return FALSE;
200 #endif /* HAVE_ICONV */
201 }
202 /*===================================================
203  * init_win32_iconv_shim -- Helper for loading iconv.dll on win32
204  *=================================================*/
205 void
init_win32_iconv_shim(CNSTRING dllpath)206 init_win32_iconv_shim (CNSTRING dllpath)
207 {
208 #ifdef WIN32_ICONV_SHIM
209 	if (dllpath && dllpath[0])
210 		iconvshim_set_property("dll_path", dllpath);
211 #else
212 	dllpath=dllpath; /* unused */
213 #endif
214 }
215