1 /*- 2 * Copyright (c) 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * Copyright (c) 2011, 2012 7 * Zhihao Yuan. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12 #include "config.h" 13 14 #ifndef lint 15 static const char sccsid[] = "$Id: conv.c,v 2.40 2014/02/27 16:25:29 zy Exp $"; 16 #endif /* not lint */ 17 18 #include <sys/types.h> 19 #include <sys/queue.h> 20 #include <sys/time.h> 21 22 #include <bitstring.h> 23 #include <errno.h> 24 #include <limits.h> 25 #include <langinfo.h> 26 #include <locale.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <strings.h> 31 #include <unistd.h> 32 33 #include "common.h" 34 35 /* 36 * codeset -- 37 * Get the locale encoding. 38 * 39 * PUBLIC: char * codeset(void); 40 */ 41 char * 42 codeset(void) 43 { 44 static char *cs; 45 46 if (cs == NULL) 47 cs = nl_langinfo(CODESET); 48 49 return cs; 50 } 51 52 #ifdef USE_WIDECHAR 53 static int 54 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 55 CHAR_T **dst) 56 { 57 int i; 58 CHAR_T **tostr = &cw->bp1.wc; 59 size_t *blen = &cw->blen1; 60 61 BINC_RETW(NULL, *tostr, *blen, len); 62 63 *tolen = len; 64 for (i = 0; i < len; ++i) 65 (*tostr)[i] = (u_char) str[i]; 66 67 *dst = cw->bp1.wc; 68 69 return 0; 70 } 71 72 #define CONV_BUFFER_SIZE 512 73 /* fill the buffer with codeset encoding of string pointed to by str 74 * left has the number of bytes left in str and is adjusted 75 * len contains the number of bytes put in the buffer 76 */ 77 #ifdef USE_ICONV 78 #define CONVERT(str, left, src, len) \ 79 do { \ 80 size_t outleft; \ 81 char *bp = buffer; \ 82 outleft = CONV_BUFFER_SIZE; \ 83 errno = 0; \ 84 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) \ 85 == -1 && errno != E2BIG) \ 86 goto err; \ 87 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ 88 error = -left; \ 89 goto err; \ 90 } \ 91 src = buffer; \ 92 } while (0) 93 94 #define IC_RESET() \ 95 do { \ 96 if (id != (iconv_t)-1) \ 97 iconv(id, NULL, NULL, NULL, NULL); \ 98 } while(0) 99 #else 100 #define CONVERT(str, left, src, len) 101 #define IC_RESET() 102 #endif 103 104 static int 105 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 106 size_t *tolen, CHAR_T **dst, iconv_t id) 107 { 108 size_t i = 0, j; 109 CHAR_T **tostr = &cw->bp1.wc; 110 size_t *blen = &cw->blen1; 111 mbstate_t mbs; 112 size_t n; 113 ssize_t nlen = len; 114 char *src = (char *)str; 115 #ifdef USE_ICONV 116 char buffer[CONV_BUFFER_SIZE]; 117 #endif 118 size_t left = len; 119 int error = 1; 120 121 BZERO(&mbs, 1); 122 BINC_RETW(NULL, *tostr, *blen, nlen); 123 124 #ifdef USE_ICONV 125 if (id != (iconv_t)-1) 126 CONVERT(str, left, src, len); 127 #endif 128 129 for (i = 0, j = 0; j < len; ) { 130 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); 131 /* NULL character converted */ 132 if (n == -2) 133 error = -(len-j); 134 if (n == -1 || n == -2) 135 goto err; 136 if (n == 0) 137 n = 1; 138 j += n; 139 if (++i >= *blen) { 140 nlen += 256; 141 BINC_RETW(NULL, *tostr, *blen, nlen); 142 } 143 if (id != (iconv_t)-1 && j == len && left) { 144 CONVERT(str, left, src, len); 145 j = 0; 146 } 147 } 148 149 error = 0; 150 err: 151 *tolen = i; 152 *dst = cw->bp1.wc; 153 IC_RESET(); 154 155 return error; 156 } 157 158 static int 159 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 160 CHAR_T **dst) 161 { 162 return default_char2int(sp, str, len, cw, tolen, dst, 163 sp->conv.id[IC_FE_CHAR2INT]); 164 } 165 166 static int 167 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 168 CHAR_T **dst) 169 { 170 return default_char2int(sp, str, len, cw, tolen, dst, 171 sp->conv.id[IC_IE_CHAR2INT]); 172 } 173 174 static int 175 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 176 CHAR_T **dst) 177 { 178 return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1); 179 } 180 181 static int 182 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen, 183 char **dst) 184 { 185 int i; 186 char **tostr = &cw->bp1.c; 187 size_t *blen = &cw->blen1; 188 189 BINC_RETC(NULL, *tostr, *blen, len); 190 191 *tolen = len; 192 for (i = 0; i < len; ++i) 193 (*tostr)[i] = str[i]; 194 195 *dst = cw->bp1.c; 196 197 return 0; 198 } 199 200 static int 201 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 202 size_t *tolen, char **pdst, iconv_t id) 203 { 204 size_t i, j, offset = 0; 205 char **tostr = &cw->bp1.c; 206 size_t *blen = &cw->blen1; 207 mbstate_t mbs; 208 size_t n; 209 ssize_t nlen = len + MB_CUR_MAX; 210 char *dst; 211 size_t buflen; 212 #ifdef USE_ICONV 213 char buffer[CONV_BUFFER_SIZE]; 214 #endif 215 int error = 1; 216 217 /* convert first len bytes of buffer and append it to cw->bp 218 * len is adjusted => 0 219 * offset contains the offset in cw->bp and is adjusted 220 * cw->bp is grown as required 221 */ 222 #ifdef USE_ICONV 223 #define CONVERT2(_buffer, lenp, cw, offset) \ 224 do { \ 225 char *bp = _buffer; \ 226 int ret; \ 227 do { \ 228 size_t outleft = cw->blen1 - offset; \ 229 char *obp = cw->bp1.c + offset; \ 230 if (cw->blen1 < offset + MB_CUR_MAX) { \ 231 nlen += 256; \ 232 BINC_RETC(NULL, cw->bp1.c, cw->blen1, \ 233 nlen); \ 234 } \ 235 errno = 0; \ 236 ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, \ 237 &outleft); \ 238 if (ret == -1 && errno != E2BIG) \ 239 goto err; \ 240 offset = cw->blen1 - outleft; \ 241 } while (ret != 0); \ 242 } while (0) 243 #else 244 #define CONVERT2(_buffer, lenp, cw, offset) 245 #endif 246 247 248 BZERO(&mbs, 1); 249 BINC_RETC(NULL, *tostr, *blen, nlen); 250 dst = *tostr; buflen = *blen; 251 252 #ifdef USE_ICONV 253 if (id != (iconv_t)-1) { 254 dst = buffer; buflen = CONV_BUFFER_SIZE; 255 } 256 #endif 257 258 for (i = 0, j = 0; i < len; ++i) { 259 n = wcrtomb(dst+j, str[i], &mbs); 260 if (n == -1) 261 goto err; 262 j += n; 263 if (buflen < j + MB_CUR_MAX) { 264 if (id != (iconv_t)-1) { 265 CONVERT2(buffer, &j, cw, offset); 266 } else { 267 nlen += 256; 268 BINC_RETC(NULL, *tostr, *blen, nlen); 269 dst = *tostr; buflen = *blen; 270 } 271 } 272 } 273 274 n = wcrtomb(dst+j, L'\0', &mbs); 275 j += n - 1; /* don't count NUL at the end */ 276 *tolen = j; 277 278 if (id != (iconv_t)-1) { 279 CONVERT2(buffer, &j, cw, offset); 280 /* back to the initial state */ 281 CONVERT2(NULL, NULL, cw, offset); 282 *tolen = offset; 283 } 284 285 error = 0; 286 err: 287 if (error) 288 *tolen = j; 289 *pdst = cw->bp1.c; 290 IC_RESET(); 291 292 return error; 293 } 294 295 static int 296 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 297 size_t *tolen, char **dst) 298 { 299 return default_int2char(sp, str, len, cw, tolen, dst, 300 sp->conv.id[IC_FE_INT2CHAR]); 301 } 302 303 static int 304 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 305 size_t *tolen, char **dst) 306 { 307 return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1); 308 } 309 310 #endif 311 312 /* 313 * conv_init -- 314 * Initialize the iconv environment. 315 * 316 * PUBLIC: void conv_init(SCR *, SCR *); 317 */ 318 void 319 conv_init(SCR *orig, SCR *sp) 320 { 321 int i; 322 323 if (orig == NULL) 324 setlocale(LC_ALL, ""); 325 if (orig != NULL) 326 BCOPY(&orig->conv, &sp->conv, 1); 327 #ifdef USE_WIDECHAR 328 else { 329 char *ctype = setlocale(LC_CTYPE, NULL); 330 331 /* 332 * XXX 333 * This hack fixes the libncursesw issue on FreeBSD. 334 */ 335 if (!strcmp(ctype, "ko_KR.CP949")) 336 setlocale(LC_CTYPE, "ko_KR.eucKR"); 337 else if (!strcmp(ctype, "zh_CN.GB2312")) 338 setlocale(LC_CTYPE, "zh_CN.eucCN"); 339 else if (!strcmp(ctype, "zh_CN.GBK")) 340 setlocale(LC_CTYPE, "zh_CN.GB18030"); 341 else if (!strcmp(ctype, "zh_Hans_CN.GB2312")) 342 setlocale(LC_CTYPE, "zh_Hans_CN.eucCN"); 343 else if (!strcmp(ctype, "zh_Hans_CN.GBK")) 344 setlocale(LC_CTYPE, "zh_Hans_CN.GB18030"); 345 346 /* 347 * Switch to 8bit mode if locale is C; 348 * LC_CTYPE should be reseted to C if unmatched. 349 */ 350 if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) { 351 sp->conv.sys2int = sp->conv.file2int = raw2int; 352 sp->conv.int2sys = sp->conv.int2file = int2raw; 353 sp->conv.input2int = raw2int; 354 } else { 355 sp->conv.sys2int = cs_char2int; 356 sp->conv.int2sys = cs_int2char; 357 sp->conv.file2int = fe_char2int; 358 sp->conv.int2file = fe_int2char; 359 sp->conv.input2int = ie_char2int; 360 } 361 #ifdef USE_ICONV 362 o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0); 363 #endif 364 } 365 #endif 366 367 /* iconv descriptors must be distinct to screens. */ 368 for (i = 0; i <= IC_IE_TO_UTF16; ++i) 369 sp->conv.id[i] = (iconv_t)-1; 370 #ifdef USE_ICONV 371 conv_enc(sp, O_INPUTENCODING, 0); 372 #endif 373 } 374 375 /* 376 * conv_enc -- 377 * Convert file/input encoding. 378 * 379 * PUBLIC: int conv_enc(SCR *, int, char *); 380 */ 381 int 382 conv_enc(SCR *sp, int option, char *enc) 383 { 384 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 385 iconv_t *c2w, *w2c; 386 iconv_t id_c2w, id_w2c; 387 388 switch (option) { 389 case O_FILEENCODING: 390 c2w = sp->conv.id + IC_FE_CHAR2INT; 391 w2c = sp->conv.id + IC_FE_INT2CHAR; 392 if (!enc) 393 enc = O_STR(sp, O_FILEENCODING); 394 395 if (strcasecmp(codeset(), enc)) { 396 if ((id_c2w = iconv_open(codeset(), enc)) == 397 (iconv_t)-1) 398 goto err; 399 if ((id_w2c = iconv_open(enc, codeset())) == 400 (iconv_t)-1) 401 goto err; 402 } else { 403 id_c2w = (iconv_t)-1; 404 id_w2c = (iconv_t)-1; 405 } 406 407 break; 408 409 case O_INPUTENCODING: 410 c2w = sp->conv.id + IC_IE_CHAR2INT; 411 w2c = sp->conv.id + IC_IE_TO_UTF16; 412 if (!enc) 413 enc = O_STR(sp, O_INPUTENCODING); 414 415 if (strcasecmp(codeset(), enc)) { 416 if ((id_c2w = iconv_open(codeset(), enc)) == 417 (iconv_t)-1) 418 goto err; 419 } else 420 id_c2w = (iconv_t)-1; 421 422 /* UTF-16 can not be locale and can not be inputed. */ 423 if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1) 424 goto err; 425 426 break; 427 428 default: 429 abort(); 430 } 431 432 if (*c2w != (iconv_t)-1) 433 iconv_close(*c2w); 434 if (*w2c != (iconv_t)-1) 435 iconv_close(*w2c); 436 437 *c2w = id_c2w; 438 *w2c = id_w2c; 439 440 F_CLR(sp, SC_CONV_ERROR); 441 F_SET(sp, SC_SCR_REFORMAT); 442 443 return 0; 444 err: 445 #endif 446 switch (option) { 447 case O_FILEENCODING: 448 msgq(sp, M_ERR, "321|File encoding conversion not supported"); 449 break; 450 case O_INPUTENCODING: 451 msgq(sp, M_ERR, "322|Input encoding conversion not supported"); 452 break; 453 } 454 return 1; 455 } 456 457 /* 458 * conv_end -- 459 * Close the iconv descriptors, release the buffer. 460 * 461 * PUBLIC: void conv_end(SCR *); 462 */ 463 void 464 conv_end(SCR *sp) 465 { 466 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 467 int i; 468 for (i = 0; i <= IC_IE_TO_UTF16; ++i) 469 if (sp->conv.id[i] != (iconv_t)-1) 470 iconv_close(sp->conv.id[i]); 471 if (sp->cw.bp1.c != NULL) 472 free(sp->cw.bp1.c); 473 #endif 474 } 475