xref: /minix/external/bsd/nvi/dist/common/conv.c (revision 0a6a1f1d)
1 /*	$NetBSD: conv.c,v 1.4 2014/01/26 21:43:45 christos Exp $ */
2 /*-
3  * Copyright (c) 1993, 1994
4  *	The Regents of the University of California.  All rights reserved.
5  * Copyright (c) 1993, 1994, 1995, 1996
6  *	Keith Bostic.  All rights reserved.
7  *
8  * See the LICENSE file for redistribution information.
9  */
10 
11 #include "config.h"
12 
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp  (Berkeley) Date: 2001/08/18 21:41:41 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: conv.c,v 1.4 2014/01/26 21:43:45 christos Exp $");
20 #endif
21 
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
25 
26 #include <bitstring.h>
27 #include <errno.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33 
34 #include "common.h"
35 
36 #ifdef USE_ICONV
37 #include <langinfo.h>
38 #include <iconv.h>
39 
40 #define LANGCODESET	nl_langinfo(CODESET)
41 #else
42 typedef int	iconv_t;
43 
44 #define LANGCODESET	""
45 #endif
46 
47 #include <locale.h>
48 
49 #ifdef USE_WIDECHAR
50 static int
raw2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)51 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen,
52 	const CHAR_T **dst)
53 {
54     int i;
55     CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
56     size_t  *blen = &cw->blen1;
57 
58     BINC_RETW(NULL, *tostr, *blen, len);
59 
60     *tolen = len;
61     for (i = 0; i < len; ++i) {
62 	CHAR_T w = (u_char)str[i];
63 	memcpy((*tostr) + i, &w, sizeof(**tostr));
64     }
65 
66     *dst = cw->bp1;
67 
68     return 0;
69 }
70 
71 #ifndef ERROR_ON_CONVERT
72 #define HANDLE_ICONV_ERROR(o, i, ol, il) do {				\
73 		*o++ = *i++;						\
74 		ol--; il--;						\
75 	} while (/*CONSTCOND*/0)
76 #define HANDLE_MBR_ERROR(n, mbs, d, s) do {				\
77 		d = s;							\
78 		MEMSET(&mbs, 0, 1); 					\
79 		n = 1; 							\
80 	} while (/*CONSTCOND*/0)
81 #else
82 #define HANDLE_ICONV_ERROR goto err
83 #define	HANDLE_MBR_ERROR goto err
84 #endif
85 
86 #define CONV_BUFFER_SIZE    512
87 /* fill the buffer with codeset encoding of string pointed to by str
88  * left has the number of bytes left in str and is adjusted
89  * len contains the number of bytes put in the buffer
90  */
91 #ifdef USE_ICONV
92 #define CONVERT(str, left, src, len)				    	\
93     do {								\
94 	size_t outleft;							\
95 	char *bp = buffer;						\
96 	outleft = CONV_BUFFER_SIZE;					\
97 	errno = 0;							\
98 	if (iconv(id, (const char **)&str, &left, &bp, &outleft) 	\
99 	    == (size_t)-1 /* && errno != E2BIG */)			\
100 		HANDLE_ICONV_ERROR(bp, str, outleft, left);		\
101 	if ((len = CONV_BUFFER_SIZE - outleft) == 0) {			\
102 	    error = -left;						\
103 	    goto err;							\
104 	}				    				\
105 	src = buffer;							\
106     } while (0)
107 #else
108 #define CONVERT(str, left, src, len)
109 #endif
110 
111 static int
default_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst,const char * enc)112 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
113 		size_t *tolen, const CHAR_T **dst, const char *enc)
114 {
115     int j;
116     size_t i = 0;
117     CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1;
118     size_t  *blen = &cw->blen1;
119     mbstate_t mbs;
120     size_t   n;
121     ssize_t  nlen = len;
122     const char *src = (const char *)str;
123     iconv_t	id = (iconv_t)-1;
124     char	buffer[CONV_BUFFER_SIZE];
125     size_t	left = len;
126     int		error = 1;
127 
128     MEMSET(&mbs, 0, 1);
129     BINC_RETW(NULL, *tostr, *blen, nlen);
130 
131 #ifdef USE_ICONV
132     if (strcmp(nl_langinfo(CODESET), enc)) {
133 	id = iconv_open(nl_langinfo(CODESET), enc);
134 	if (id == (iconv_t)-1)
135 	    goto err;
136 	CONVERT(str, left, src, len);
137     }
138 #endif
139 
140     for (i = 0, j = 0; j < len; ) {
141 	CHAR_T w;
142 	n = mbrtowc(&w, src + j, len - j, &mbs);
143 	memcpy((*tostr) + i, &w, sizeof(**tostr));
144 	/* NULL character converted */
145 	if (n == (size_t)-2) error = -(len - j);
146 	if (n == (size_t)-1 || n == (size_t)-2) {
147 	    HANDLE_MBR_ERROR(n, mbs, w, src[j]);
148 	    memcpy((*tostr) + i, &w, sizeof(**tostr));
149 	}
150 	if (n == 0) n = 1;
151 	j += n;
152 	if (++i >= *blen) {
153 	    nlen += 256;
154 	    BINC_RETW(NULL, *tostr, *blen, nlen);
155 	}
156 	if (id != (iconv_t)-1 && j == len && left) {
157 	    CONVERT(str, left, src, len);
158 	    j = 0;
159 	}
160     }
161     *tolen = i;
162 
163     if (id != (iconv_t)-1)
164 	iconv_close(id);
165 
166     *dst = cw->bp1;
167 
168     return 0;
169 err:
170     *tolen = i;
171     if (id != (iconv_t)-1)
172 	iconv_close(id);
173     *dst = cw->bp1;
174 
175     return error;
176 }
177 
178 static int
fe_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)179 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
180 	    size_t *tolen, const CHAR_T **dst)
181 {
182     return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
183 }
184 
185 static int
ie_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)186 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
187 	    size_t *tolen, const CHAR_T **dst)
188 {
189     return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING));
190 }
191 
192 static int
cs_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)193 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
194 	    size_t *tolen, const CHAR_T **dst)
195 {
196     return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET);
197 }
198 
199 static int
CHAR_T_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)200 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
201 	size_t *tolen, const char **dst)
202 {
203     *tolen = len * sizeof(CHAR_T);
204     *dst = (const char *)(const void *)str;
205 
206     return 0;
207 }
208 
209 static int
CHAR_T_char2int(SCR * sp,const char * str,ssize_t len,CONVWIN * cw,size_t * tolen,const CHAR_T ** dst)210 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw,
211 	size_t *tolen, const CHAR_T **dst)
212 {
213     *tolen = len / sizeof(CHAR_T);
214     *dst = (const CHAR_T*) str;
215 
216     return 0;
217 }
218 
219 static int
int2raw(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)220 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen,
221 	const char **dst)
222 {
223     int i;
224     char **tostr = (char **)(void *)&cw->bp1;
225     size_t  *blen = &cw->blen1;
226 
227     BINC_RETC(NULL, *tostr, *blen, len);
228 
229     *tolen = len;
230     for (i = 0; i < len; ++i) {
231 	CHAR_T w;
232 	memcpy(&w, str + i, sizeof(w));
233 	(*tostr)[i] = w;
234     }
235 
236     *dst = cw->bp1;
237 
238     return 0;
239 }
240 
241 static int
default_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** pdst,const char * enc)242 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
243 		size_t *tolen, const char **pdst, const char *enc)
244 {
245     size_t i, j;
246     int offset = 0;
247     char **tostr = (char **)(void *)&cw->bp1;
248     size_t  *blen = &cw->blen1;
249     mbstate_t mbs;
250     size_t n;
251     ssize_t  nlen = len + MB_CUR_MAX;
252     char *dst;
253     size_t buflen;
254     char	buffer[CONV_BUFFER_SIZE];
255     iconv_t	id = (iconv_t)-1;
256 
257 /* convert first len bytes of buffer and append it to cw->bp
258  * len is adjusted => 0
259  * offset contains the offset in cw->bp and is adjusted
260  * cw->bp is grown as required
261  */
262 #ifdef USE_ICONV
263 #define CONVERT2(len, cw, offset)					\
264     do {								\
265 	const char *bp = buffer;					\
266 	while (len != 0) {						\
267 	    size_t outleft = cw->blen1 - offset;			\
268 	    char *obp = (char *)cw->bp1 + offset;		    	\
269 	    if (cw->blen1 < offset + MB_CUR_MAX) {		    	\
270 		nlen += 256;						\
271 		BINC_RETC(NULL, cw->bp1, cw->blen1, nlen);		\
272 	    }						    		\
273 	    errno = 0;						    	\
274 	    if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 &&	\
275 		errno != E2BIG) 					\
276 		    HANDLE_ICONV_ERROR(obp, bp, outleft, len);		\
277 	    offset = cw->blen1 - outleft;			        \
278 	}							        \
279     } while (0)
280 #else
281 #define CONVERT2(len, cw, offset)
282 #endif
283 
284 
285     MEMSET(&mbs, 0, 1);
286     BINC_RETC(NULL, *tostr, *blen, nlen);
287     dst = *tostr; buflen = *blen;
288 
289 #ifdef USE_ICONV
290     if (strcmp(nl_langinfo(CODESET), enc)) {
291 	id = iconv_open(enc, nl_langinfo(CODESET));
292 	if (id == (iconv_t)-1)
293 	    goto err;
294 	dst = buffer; buflen = CONV_BUFFER_SIZE;
295     }
296 #endif
297 
298     for (i = 0, j = 0; i < (size_t)len; ++i) {
299 	CHAR_T w;
300 	memcpy(&w, str + i, sizeof(w));
301 	n = wcrtomb(dst + j, w, &mbs);
302 	if (n == (size_t)-1)
303 	   HANDLE_MBR_ERROR(n, mbs, dst[j], w);
304 	j += n;
305 	if (buflen < j + MB_CUR_MAX) {
306 	    if (id != (iconv_t)-1) {
307 		CONVERT2(j, cw, offset);
308 	    } else {
309 		nlen += 256;
310 		BINC_RETC(NULL, *tostr, *blen, nlen);
311 		dst = *tostr; buflen = *blen;
312 	    }
313 	}
314     }
315 
316     n = wcrtomb(dst + j, L'\0', &mbs);
317     j += n - 1;				/* don't count NUL at the end */
318     *tolen = j;
319 
320     if (id != (iconv_t)-1) {
321 	CONVERT2(j, cw, offset);
322 	*tolen = offset;
323     }
324 
325     *pdst = cw->bp1;
326 
327     return 0;
328 err:
329     *tolen = j;
330 
331     *pdst = cw->bp1;
332 
333     return 1;
334 }
335 
336 static int
fe_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)337 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
338 	    size_t *tolen, const char **dst)
339 {
340     return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING));
341 }
342 
343 static int
cs_int2char(SCR * sp,const CHAR_T * str,ssize_t len,CONVWIN * cw,size_t * tolen,const char ** dst)344 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw,
345 	    size_t *tolen, const char **dst)
346 {
347     return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET);
348 }
349 
350 #endif
351 
352 
353 void
conv_init(SCR * orig,SCR * sp)354 conv_init (SCR *orig, SCR *sp)
355 {
356     if (orig != NULL)
357 	MEMCPY(&sp->conv, &orig->conv, 1);
358     else {
359 	setlocale(LC_ALL, "");
360 #ifdef USE_WIDECHAR
361 	sp->conv.sys2int = cs_char2int;
362 	sp->conv.int2sys = cs_int2char;
363 	sp->conv.file2int = fe_char2int;
364 	sp->conv.int2file = fe_int2char;
365 	sp->conv.input2int = ie_char2int;
366 #endif
367 #ifdef USE_ICONV
368 	o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
369 	o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0);
370 #endif
371     }
372 }
373 
374 int
conv_enc(SCR * sp,int option,const char * enc)375 conv_enc (SCR *sp, int option, const char *enc)
376 {
377 #if defined(USE_WIDECHAR) && defined(USE_ICONV)
378     iconv_t id;
379     char2wchar_t    *c2w;
380     wchar2char_t    *w2c;
381 
382     switch (option) {
383     case O_FILEENCODING:
384 	c2w = &sp->conv.file2int;
385 	w2c = &sp->conv.int2file;
386 	break;
387     case O_INPUTENCODING:
388 	c2w = &sp->conv.input2int;
389 	w2c = NULL;
390 	break;
391     default:
392 	c2w = NULL;
393 	w2c = NULL;
394 	break;
395     }
396 
397     if (!*enc) {
398 	if (c2w) *c2w = raw2int;
399 	if (w2c) *w2c = int2raw;
400 	return 0;
401     }
402 
403     if (!strcmp(enc, "WCHAR_T")) {
404 	if (c2w) *c2w = CHAR_T_char2int;
405 	if (w2c) *w2c = CHAR_T_int2char;
406 	return 0;
407     }
408 
409     id = iconv_open(enc, nl_langinfo(CODESET));
410     if (id == (iconv_t)-1)
411 	goto err;
412     iconv_close(id);
413     id = iconv_open(nl_langinfo(CODESET), enc);
414     if (id == (iconv_t)-1)
415 	goto err;
416     iconv_close(id);
417 
418     switch (option) {
419     case O_FILEENCODING:
420 	*c2w = fe_char2int;
421 	*w2c = fe_int2char;
422 	break;
423     case O_INPUTENCODING:
424 	*c2w = ie_char2int;
425 	break;
426     }
427 
428     F_CLR(sp, SC_CONV_ERROR);
429     F_SET(sp, SC_SCR_REFORMAT);
430 
431     return 0;
432 err:
433     switch (option) {
434     case O_FILEENCODING:
435 	msgq(sp, M_ERR,
436 	    "321|File encoding conversion not supported");
437 	break;
438     case O_INPUTENCODING:
439 	msgq(sp, M_ERR,
440 	    "322|Input encoding conversion not supported");
441 	break;
442     }
443 #endif
444     return 1;
445 }
446 
447