1 /* $NetBSD: conv.c,v 1.4 2014/01/26 21:43:45 christos Exp $ */
2 /*-
3 * Copyright (c) 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
7 *
8 * See the LICENSE file for redistribution information.
9 */
10
11 #include "config.h"
12
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: conv.c,v 1.4 2014/01/26 21:43:45 christos Exp $");
20 #endif
21
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
25
26 #include <bitstring.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #include "common.h"
35
36 #ifdef USE_ICONV
37 #include <langinfo.h>
38 #include <iconv.h>
39
40 #define LANGCODESET nl_langinfo(CODESET)
41 #else
42 typedef int iconv_t;
43
44 #define LANGCODESET ""
45 #endif
46
47 #include <locale.h>
48
49 #ifdef USE_WIDECHAR
50 static int
raw2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)51 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
52 const CHAR_T **dst)
53 {
54 int i;
55 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
56 size_t *blen = &cw->blen1;
57
58 BINC_RETW(NULL, *tostr, *blen, len);
59
60 *tolen = len;
61 for (i = 0; i < len; ++i) {
62 CHAR_T w = (u_char)str[i];
63 memcpy((*tostr) + i, &w, sizeof(**tostr));
64 }
65
66 *dst = cw->bp1;
67
68 return 0;
69 }
70
71 #ifndef ERROR_ON_CONVERT
72 #define HANDLE_ICONV_ERROR(o, i, ol, il) do { \
73 *o++ = *i++; \
74 ol--; il--; \
75 } while (/*CONSTCOND*/0)
76 #define HANDLE_MBR_ERROR(n, mbs, d, s) do { \
77 d = s; \
78 MEMSET(&mbs, 0, 1); \
79 n = 1; \
80 } while (/*CONSTCOND*/0)
81 #else
82 #define HANDLE_ICONV_ERROR goto err
83 #define HANDLE_MBR_ERROR goto err
84 #endif
85
86 #define CONV_BUFFER_SIZE 512
87 /* fill the buffer with codeset encoding of string pointed to by str
88 * left has the number of bytes left in str and is adjusted
89 * len contains the number of bytes put in the buffer
90 */
91 #ifdef USE_ICONV
92 #define CONVERT(str, left, src, len) \
93 do { \
94 size_t outleft; \
95 char *bp = buffer; \
96 outleft = CONV_BUFFER_SIZE; \
97 errno = 0; \
98 if (iconv(id, (const char **)&str, &left, &bp, &outleft) \
99 == (size_t)-1 /* && errno != E2BIG */) \
100 HANDLE_ICONV_ERROR(bp, str, outleft, left); \
101 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \
102 error = -left; \
103 goto err; \
104 } \
105 src = buffer; \
106 } while (0)
107 #else
108 #define CONVERT(str, left, src, len)
109 #endif
110
111 static int
default_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst,const char * enc)112 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
113 size_t *tolen, const CHAR_T **dst, const char *enc)
114 {
115 int j;
116 size_t i = 0;
117 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
118 size_t *blen = &cw->blen1;
119 mbstate_t mbs;
120 size_t n;
121 ssize_t nlen = len;
122 const char *src = (const char *)str;
123 iconv_t id = (iconv_t)-1;
124 char buffer[CONV_BUFFER_SIZE];
125 size_t left = len;
126 int error = 1;
127
128 MEMSET(&mbs, 0, 1);
129 BINC_RETW(NULL, *tostr, *blen, nlen);
130
131 #ifdef USE_ICONV
132 if (strcmp(nl_langinfo(CODESET), enc)) {
133 id = iconv_open(nl_langinfo(CODESET), enc);
134 if (id == (iconv_t)-1)
135 goto err;
136 CONVERT(str, left, src, len);
137 }
138 #endif
139
140 for (i = 0, j = 0; j < len; ) {
141 CHAR_T w;
142 n = mbrtowc(&w, src + j, len - j, &mbs);
143 memcpy((*tostr) + i, &w, sizeof(**tostr));
144 /* NULL character converted */
145 if (n == (size_t)-2) error = -(len - j);
146 if (n == (size_t)-1 || n == (size_t)-2) {
147 HANDLE_MBR_ERROR(n, mbs, w, src[j]);
148 memcpy((*tostr) + i, &w, sizeof(**tostr));
149 }
150 if (n == 0) n = 1;
151 j += n;
152 if (++i >= *blen) {
153 nlen += 256;
154 BINC_RETW(NULL, *tostr, *blen, nlen);
155 }
156 if (id != (iconv_t)-1 && j == len && left) {
157 CONVERT(str, left, src, len);
158 j = 0;
159 }
160 }
161 *tolen = i;
162
163 if (id != (iconv_t)-1)
164 iconv_close(id);
165
166 *dst = cw->bp1;
167
168 return 0;
169 err:
170 *tolen = i;
171 if (id != (iconv_t)-1)
172 iconv_close(id);
173 *dst = cw->bp1;
174
175 return error;
176 }
177
178 static int
fe_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)179 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
180 size_t *tolen, const CHAR_T **dst)
181 {
182 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
183 }
184
185 static int
ie_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)186 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
187 size_t *tolen, const CHAR_T **dst)
188 {
189 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
190 }
191
192 static int
cs_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)193 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
194 size_t *tolen, const CHAR_T **dst)
195 {
196 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
197 }
198
199 static int
CHAR_T_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)200 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
201 size_t *tolen, const char **dst)
202 {
203 *tolen = len * sizeof(CHAR_T);
204 *dst = (const char *)(const void *)str;
205
206 return 0;
207 }
208
209 static int
CHAR_T_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)210 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
211 size_t *tolen, const CHAR_T **dst)
212 {
213 *tolen = len / sizeof(CHAR_T);
214 *dst = (const CHAR_T*) str;
215
216 return 0;
217 }
218
219 static int
int2raw(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)220 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
221 const char **dst)
222 {
223 int i;
224 char **tostr = (char **)(void *)&cw->bp1;
225 size_t *blen = &cw->blen1;
226
227 BINC_RETC(NULL, *tostr, *blen, len);
228
229 *tolen = len;
230 for (i = 0; i < len; ++i) {
231 CHAR_T w;
232 memcpy(&w, str + i, sizeof(w));
233 (*tostr)[i] = w;
234 }
235
236 *dst = cw->bp1;
237
238 return 0;
239 }
240
241 static int
default_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** pdst,const char * enc)242 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
243 size_t *tolen, const char **pdst, const char *enc)
244 {
245 size_t i, j;
246 int offset = 0;
247 char **tostr = (char **)(void *)&cw->bp1;
248 size_t *blen = &cw->blen1;
249 mbstate_t mbs;
250 size_t n;
251 ssize_t nlen = len + MB_CUR_MAX;
252 char *dst;
253 size_t buflen;
254 char buffer[CONV_BUFFER_SIZE];
255 iconv_t id = (iconv_t)-1;
256
257 /* convert first len bytes of buffer and append it to cw->bp
258 * len is adjusted => 0
259 * offset contains the offset in cw->bp and is adjusted
260 * cw->bp is grown as required
261 */
262 #ifdef USE_ICONV
263 #define CONVERT2(len, cw, offset) \
264 do { \
265 const char *bp = buffer; \
266 while (len != 0) { \
267 size_t outleft = cw->blen1 - offset; \
268 char *obp = (char *)cw->bp1 + offset; \
269 if (cw->blen1 < offset + MB_CUR_MAX) { \
270 nlen += 256; \
271 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \
272 } \
273 errno = 0; \
274 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \
275 errno != E2BIG) \
276 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \
277 offset = cw->blen1 - outleft; \
278 } \
279 } while (0)
280 #else
281 #define CONVERT2(len, cw, offset)
282 #endif
283
284
285 MEMSET(&mbs, 0, 1);
286 BINC_RETC(NULL, *tostr, *blen, nlen);
287 dst = *tostr; buflen = *blen;
288
289 #ifdef USE_ICONV
290 if (strcmp(nl_langinfo(CODESET), enc)) {
291 id = iconv_open(enc, nl_langinfo(CODESET));
292 if (id == (iconv_t)-1)
293 goto err;
294 dst = buffer; buflen = CONV_BUFFER_SIZE;
295 }
296 #endif
297
298 for (i = 0, j = 0; i < (size_t)len; ++i) {
299 CHAR_T w;
300 memcpy(&w, str + i, sizeof(w));
301 n = wcrtomb(dst + j, w, &mbs);
302 if (n == (size_t)-1)
303 HANDLE_MBR_ERROR(n, mbs, dst[j], w);
304 j += n;
305 if (buflen < j + MB_CUR_MAX) {
306 if (id != (iconv_t)-1) {
307 CONVERT2(j, cw, offset);
308 } else {
309 nlen += 256;
310 BINC_RETC(NULL, *tostr, *blen, nlen);
311 dst = *tostr; buflen = *blen;
312 }
313 }
314 }
315
316 n = wcrtomb(dst + j, L'\0', &mbs);
317 j += n - 1; /* don't count NUL at the end */
318 *tolen = j;
319
320 if (id != (iconv_t)-1) {
321 CONVERT2(j, cw, offset);
322 *tolen = offset;
323 }
324
325 *pdst = cw->bp1;
326
327 return 0;
328 err:
329 *tolen = j;
330
331 *pdst = cw->bp1;
332
333 return 1;
334 }
335
336 static int
fe_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)337 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
338 size_t *tolen, const char **dst)
339 {
340 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
341 }
342
343 static int
cs_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)344 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
345 size_t *tolen, const char **dst)
346 {
347 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
348 }
349
350 #endif
351
352
353 void
conv_init(SCR * orig,SCR * sp)354 conv_init (SCR *orig, SCR *sp)
355 {
356 if (orig != NULL)
357 MEMCPY(&sp->conv, &orig->conv, 1);
358 else {
359 setlocale(LC_ALL, "");
360 #ifdef USE_WIDECHAR
361 sp->conv.sys2int = cs_char2int;
362 sp->conv.int2sys = cs_int2char;
363 sp->conv.file2int = fe_char2int;
364 sp->conv.int2file = fe_int2char;
365 sp->conv.input2int = ie_char2int;
366 #endif
367 #ifdef USE_ICONV
368 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
369 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
370 #endif
371 }
372 }
373
374 int
conv_enc(SCR * sp,int option,const char * enc)375 conv_enc (SCR *sp, int option, const char *enc)
376 {
377 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
378 iconv_t id;
379 char2wchar_t *c2w;
380 wchar2char_t *w2c;
381
382 switch (option) {
383 case O_FILEENCODING:
384 c2w = &sp->conv.file2int;
385 w2c = &sp->conv.int2file;
386 break;
387 case O_INPUTENCODING:
388 c2w = &sp->conv.input2int;
389 w2c = NULL;
390 break;
391 default:
392 c2w = NULL;
393 w2c = NULL;
394 break;
395 }
396
397 if (!*enc) {
398 if (c2w) *c2w = raw2int;
399 if (w2c) *w2c = int2raw;
400 return 0;
401 }
402
403 if (!strcmp(enc, "WCHAR_T")) {
404 if (c2w) *c2w = CHAR_T_char2int;
405 if (w2c) *w2c = CHAR_T_int2char;
406 return 0;
407 }
408
409 id = iconv_open(enc, nl_langinfo(CODESET));
410 if (id == (iconv_t)-1)
411 goto err;
412 iconv_close(id);
413 id = iconv_open(nl_langinfo(CODESET), enc);
414 if (id == (iconv_t)-1)
415 goto err;
416 iconv_close(id);
417
418 switch (option) {
419 case O_FILEENCODING:
420 *c2w = fe_char2int;
421 *w2c = fe_int2char;
422 break;
423 case O_INPUTENCODING:
424 *c2w = ie_char2int;
425 break;
426 }
427
428 F_CLR(sp, SC_CONV_ERROR);
429 F_SET(sp, SC_SCR_REFORMAT);
430
431 return 0;
432 err:
433 switch (option) {
434 case O_FILEENCODING:
435 msgq(sp, M_ERR,
436 "321|File encoding conversion not supported");
437 break;
438 case O_INPUTENCODING:
439 msgq(sp, M_ERR,
440 "322|Input encoding conversion not supported");
441 break;
442 }
443 #endif
444 return 1;
445 }
446
447