1 /* $NetBSD: conv.c,v 1.11 2019/10/24 18:17:14 kamil Exp $ */
2 /*-
3 * Copyright (c) 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
7 *
8 * See the LICENSE file for redistribution information.
9 */
10
11 #include "config.h"
12
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: conv.c,v 1.11 2019/10/24 18:17:14 kamil Exp $");
20 #endif
21
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
25
26 #include <bitstring.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #include "common.h"
35
36 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
37 #include <langinfo.h>
38 #include <iconv.h>
39
40 #define LANGCODESET nl_langinfo(CODESET)
41 #else
42 #define LANGCODESET ""
43 #endif
44
45 #include <locale.h>
46
47 #ifdef USE_WIDECHAR
48 #ifdef USE_ICONV
49 static int
raw2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)50 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
51 const CHAR_T **dst)
52 {
53 int i;
54 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
55 size_t *blen = &cw->blen1;
56
57 BINC_RETW(NULL, *tostr, *blen, len);
58
59 *tolen = len;
60 for (i = 0; i < len; ++i) {
61 CHAR_T w = (u_char)str[i];
62 memcpy((*tostr) + i, &w, sizeof(**tostr));
63 }
64
65 *dst = cw->bp1;
66
67 return 0;
68 }
69 #endif
70
71 #ifndef ERROR_ON_CONVERT
72 #define HANDLE_ICONV_ERROR(o, i, ol, il) do { \
73 *o++ = *i++; \
74 ol--; il--; \
75 } while (/*CONSTCOND*/0)
76 #define HANDLE_MBR_ERROR(n, mbs, d, s) do { \
77 d = s; \
78 MEMSET(&mbs, 0, 1); \
79 n = 1; \
80 } while (/*CONSTCOND*/0)
81 #else
82 #define HANDLE_ICONV_ERROR goto err
83 #define HANDLE_MBR_ERROR goto err
84 #endif
85
86 #define CONV_BUFFER_SIZE 512
87 /* fill the buffer with codeset encoding of string pointed to by str
88 * left has the number of bytes left in str and is adjusted
89 * len contains the number of bytes put in the buffer
90 */
91 #ifdef USE_ICONV
92 #define CONVERT(str, left, src, len) \
93 do { \
94 size_t outleft; \
95 char *bp = buffer; \
96 outleft = CONV_BUFFER_SIZE; \
97 errno = 0; \
98 if (iconv(id, (char **)(void *)&str, &left, &bp, &outleft) \
99 == (size_t)-1 && errno != E2BIG) \
100 HANDLE_ICONV_ERROR(bp, str, outleft, left); \
101 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
102 error = -left; \
103 goto err; \
104 } \
105 src = buffer; \
106 } while (0)
107 #endif
108
109 static int
default_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst,const char * enc)110 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
111 size_t *tolen, const CHAR_T **dst, const char *enc)
112 {
113 int j;
114 size_t i = 0;
115 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
116 size_t *blen = &cw->blen1;
117 mbstate_t mbs;
118 size_t n;
119 ssize_t nlen = len;
120 const char *src = (const char *)str;
121 int error = 1;
122 #ifdef USE_ICONV
123 iconv_t id = (iconv_t)-1;
124 char buffer[CONV_BUFFER_SIZE];
125 size_t left = len;
126 #endif
127
128 MEMSET(&mbs, 0, 1);
129 BINC_RETW(NULL, *tostr, *blen, nlen);
130
131 #ifdef USE_ICONV
132 if (strcmp(nl_langinfo(CODESET), enc)) {
133 id = iconv_open(nl_langinfo(CODESET), enc);
134 if (id == (iconv_t)-1)
135 goto err;
136 CONVERT(str, left, src, len);
137 }
138 #endif
139
140 for (i = 0, j = 0; j < len; ) {
141 CHAR_T w;
142 n = mbrtowc(&w, src + j, len - j, &mbs);
143 memcpy((*tostr) + i, &w, sizeof(**tostr));
144 /* NULL character converted */
145 if (n == (size_t)-2) error = -(len - j);
146 if (n == (size_t)-1 || n == (size_t)-2) {
147 HANDLE_MBR_ERROR(n, mbs, w, src[j]);
148 memcpy((*tostr) + i, &w, sizeof(**tostr));
149 }
150 if (n == 0) n = 1;
151 j += n;
152 if (++i >= *blen) {
153 nlen += 256;
154 BINC_GOTOW(NULL, *tostr, *blen, nlen);
155 }
156 #ifdef USE_ICONV
157 if (id != (iconv_t)-1 && j == len && left) {
158 CONVERT(str, left, src, len);
159 j = 0;
160 }
161 #endif
162 }
163 *tolen = i;
164
165 #ifdef USE_ICONV
166 if (id != (iconv_t)-1)
167 iconv_close(id);
168 #endif
169
170 *dst = cw->bp1;
171
172 return 0;
173 alloc_err:
174 #ifdef USE_ICONV
175 err:
176 if (id != (iconv_t)-1)
177 iconv_close(id);
178 #endif
179 *tolen = i;
180 *dst = cw->bp1;
181
182 return error;
183 }
184
185 static int
fe_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)186 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
187 size_t *tolen, const CHAR_T **dst)
188 {
189 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
190 }
191
192 static int
ie_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)193 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
194 size_t *tolen, const CHAR_T **dst)
195 {
196 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
197 }
198
199 static int
cs_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)200 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
201 size_t *tolen, const CHAR_T **dst)
202 {
203 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
204 }
205
206 #ifdef USE_ICONV
207 static int
CHAR_T_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)208 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
209 size_t *tolen, const char **dst)
210 {
211 *tolen = len * sizeof(CHAR_T);
212 *dst = (const char *)(const void *)str;
213
214 return 0;
215 }
216
217 static int
CHAR_T_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)218 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
219 size_t *tolen, const CHAR_T **dst)
220 {
221 *tolen = len / sizeof(CHAR_T);
222 *dst = (const CHAR_T*) str;
223
224 return 0;
225 }
226
227 static int
int2raw(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)228 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
229 const char **dst)
230 {
231 int i;
232 char **tostr = (char **)(void *)&cw->bp1;
233 size_t *blen = &cw->blen1;
234
235 BINC_RETC(NULL, *tostr, *blen, len);
236
237 *tolen = len;
238 for (i = 0; i < len; ++i) {
239 CHAR_T w;
240 memcpy(&w, str + i, sizeof(w));
241 (*tostr)[i] = w;
242 }
243
244 *dst = cw->bp1;
245
246 return 0;
247 }
248 #endif
249
250 static int
default_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** pdst,const char * enc)251 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
252 size_t *tolen, const char **pdst, const char *enc)
253 {
254 size_t i, j = 0;
255 char **tostr = (char **)(void *)&cw->bp1;
256 size_t *blen = &cw->blen1;
257 mbstate_t mbs;
258 size_t n;
259 ssize_t nlen = len + MB_CUR_MAX;
260 char *dst;
261 size_t buflen;
262 #ifdef USE_ICONV
263 int offset = 0;
264 char buffer[CONV_BUFFER_SIZE];
265 iconv_t id = (iconv_t)-1;
266 #endif
267
268 /* convert first len bytes of buffer and append it to cw->bp
269 * len is adjusted => 0
270 * offset contains the offset in cw->bp and is adjusted
271 * cw->bp is grown as required
272 */
273 #ifdef USE_ICONV
274 #define CONVERT2(_buffer, lenp, cw, offset) \
275 do { \
276 const char *bp = _buffer; \
277 size_t ret; \
278 do { \
279 size_t outleft = cw->blen1 - offset; \
280 char *obp = (char *)cw->bp1 + offset; \
281 if (cw->blen1 < offset + MB_CUR_MAX) { \
282 nlen += 256; \
283 BINC_GOTOC(NULL, cw->bp1, cw->blen1, nlen); \
284 } \
285 errno = 0; \
286 ret = iconv(id, (char **)(void *)&bp, lenp, &obp, &outleft);\
287 if (ret == (size_t)-1 && errno != E2BIG) \
288 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \
289 offset = cw->blen1 - outleft; \
290 } while (ret != 0); \
291 } while (0)
292 #endif
293
294 MEMSET(&mbs, 0, 1);
295 BINC_RETC(NULL, *tostr, *blen, nlen);
296 dst = *tostr; buflen = *blen;
297
298 #ifdef USE_ICONV
299 if (strcmp(nl_langinfo(CODESET), enc)) {
300 id = iconv_open(enc, nl_langinfo(CODESET));
301 if (id == (iconv_t)-1)
302 goto err;
303 dst = buffer; buflen = CONV_BUFFER_SIZE;
304 }
305 #endif
306
307 for (i = 0, j = 0; i < (size_t)len; ++i) {
308 CHAR_T w;
309 memcpy(&w, str + i, sizeof(w));
310 n = wcrtomb(dst + j, w, &mbs);
311 if (n == (size_t)-1)
312 HANDLE_MBR_ERROR(n, mbs, dst[j], w);
313 j += n;
314 if (buflen < j + MB_CUR_MAX) {
315 #ifdef USE_ICONV
316 if (id != (iconv_t)-1) {
317 CONVERT2(buffer, &j, cw, offset);
318 } else
319 #endif
320 {
321 nlen += 256;
322 BINC_RETC(NULL, *tostr, *blen, nlen);
323 dst = *tostr; buflen = *blen;
324 }
325 }
326 }
327
328 n = wcrtomb(dst + j, L'\0', &mbs);
329 j += n - 1; /* don't count NUL at the end */
330 *tolen = j;
331
332 #ifdef USE_ICONV
333 if (id != (iconv_t)-1) {
334 CONVERT2(buffer, &j, cw, offset);
335 CONVERT2(NULL, NULL, cw, offset); /* back to the initial state */
336 *tolen = offset;
337 iconv_close(id);
338 }
339 #endif
340
341 *pdst = cw->bp1;
342
343 return 0;
344 #ifdef USE_ICONV
345 alloc_err:
346 err:
347 if (id != (iconv_t)-1)
348 iconv_close(id);
349 *tolen = j;
350 *pdst = cw->bp1;
351
352 return 1;
353 #endif
354 }
355
356 static int
fe_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)357 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
358 size_t *tolen, const char **dst)
359 {
360 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
361 }
362
363 static int
cs_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)364 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
365 size_t *tolen, const char **dst)
366 {
367 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
368 }
369
370 #endif
371
372
373 void
conv_init(SCR * orig,SCR * sp)374 conv_init (SCR *orig, SCR *sp)
375 {
376 if (orig != NULL)
377 MEMCPY(&sp->conv, &orig->conv, 1);
378 else {
379 setlocale(LC_ALL, "");
380 #ifdef USE_WIDECHAR
381 sp->conv.sys2int = cs_char2int;
382 sp->conv.int2sys = cs_int2char;
383 sp->conv.file2int = fe_char2int;
384 sp->conv.int2file = fe_int2char;
385 sp->conv.input2int = ie_char2int;
386 #ifdef USE_ICONV
387 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
388 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
389 #endif
390 #endif
391 }
392 }
393
394 int
conv_enc(SCR * sp,int option,const char * enc)395 conv_enc (SCR *sp, int option, const char *enc)
396 {
397 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
398 iconv_t id;
399 char2wchar_t *c2w;
400 wchar2char_t *w2c;
401
402 switch (option) {
403 case O_FILEENCODING:
404 c2w = &sp->conv.file2int;
405 w2c = &sp->conv.int2file;
406 break;
407 case O_INPUTENCODING:
408 c2w = &sp->conv.input2int;
409 w2c = NULL;
410 break;
411 default:
412 c2w = NULL;
413 w2c = NULL;
414 break;
415 }
416
417 if (!*enc) {
418 if (c2w) *c2w = raw2int;
419 if (w2c) *w2c = int2raw;
420 return 0;
421 }
422
423 if (!strcmp(enc, "WCHAR_T")) {
424 if (c2w) *c2w = CHAR_T_char2int;
425 if (w2c) *w2c = CHAR_T_int2char;
426 return 0;
427 }
428
429 id = iconv_open(enc, nl_langinfo(CODESET));
430 if (id == (iconv_t)-1)
431 goto err;
432 iconv_close(id);
433 id = iconv_open(nl_langinfo(CODESET), enc);
434 if (id == (iconv_t)-1)
435 goto err;
436 iconv_close(id);
437
438 switch (option) {
439 case O_FILEENCODING:
440 *c2w = fe_char2int;
441 *w2c = fe_int2char;
442 break;
443 case O_INPUTENCODING:
444 *c2w = ie_char2int;
445 break;
446 }
447
448 F_CLR(sp, SC_CONV_ERROR);
449 F_SET(sp, SC_SCR_REFORMAT);
450
451 return 0;
452 err:
453 switch (option) {
454 case O_FILEENCODING:
455 msgq(sp, M_ERR,
456 "321|File encoding conversion not supported");
457 break;
458 case O_INPUTENCODING:
459 msgq(sp, M_ERR,
460 "322|Input encoding conversion not supported");
461 break;
462 }
463 #endif
464 return 1;
465 }
466