1 /*- 2 * Copyright (c) 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * Copyright (c) 2011, 2012 7 * Zhihao Yuan. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12 #include "config.h" 13 14 #include <sys/types.h> 15 #include <sys/queue.h> 16 #include <sys/time.h> 17 18 #include <bitstring.h> 19 #include <errno.h> 20 #include <limits.h> 21 #include <langinfo.h> 22 #include <locale.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <strings.h> 27 #include <unistd.h> 28 29 #include "common.h" 30 31 /* 32 * codeset -- 33 * Get the locale encoding. 34 * 35 * PUBLIC: char * codeset(void); 36 */ 37 char * 38 codeset(void) 39 { 40 static char *cs; 41 42 if (cs == NULL) 43 cs = nl_langinfo(CODESET); 44 45 return cs; 46 } 47 48 #ifdef USE_WIDECHAR 49 static int 50 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 51 CHAR_T **dst) 52 { 53 int i; 54 CHAR_T **tostr = &cw->bp1.wc; 55 size_t *blen = &cw->blen1; 56 57 BINC_RETW(NULL, *tostr, *blen, len); 58 59 *tolen = len; 60 for (i = 0; i < len; ++i) 61 (*tostr)[i] = (u_char) str[i]; 62 63 *dst = cw->bp1.wc; 64 65 return 0; 66 } 67 68 #define CONV_BUFFER_SIZE 512 69 /* fill the buffer with codeset encoding of string pointed to by str 70 * left has the number of bytes left in str and is adjusted 71 * len contains the number of bytes put in the buffer 72 */ 73 #ifdef USE_ICONV 74 #define CONVERT(str, left, src, len) \ 75 do { \ 76 size_t outleft; \ 77 char *bp = buffer; \ 78 outleft = CONV_BUFFER_SIZE; \ 79 errno = 0; \ 80 if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) \ 81 == -1 && errno != E2BIG) \ 82 goto err; \ 83 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ 84 error = -left; \ 85 goto err; \ 86 } \ 87 src = buffer; \ 88 } while (0) 89 90 #define IC_RESET() \ 91 do { \ 92 if (id != (iconv_t)-1) \ 93 iconv(id, NULL, NULL, NULL, NULL); \ 94 } while(0) 95 #else 96 #define CONVERT(str, left, src, len) 97 #define IC_RESET() 98 #endif 99 100 static int 101 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 102 size_t *tolen, CHAR_T **dst, iconv_t id) 103 { 104 size_t i = 0, j; 105 CHAR_T **tostr = &cw->bp1.wc; 106 size_t *blen = &cw->blen1; 107 mbstate_t mbs; 108 size_t n; 109 ssize_t nlen = len; 110 char *src = (char *)str; 111 #ifdef USE_ICONV 112 char buffer[CONV_BUFFER_SIZE]; 113 #endif 114 size_t left = len; 115 int error = 1; 116 117 memset(&mbs, 0, sizeof(mbs)); 118 BINC_RETW(NULL, *tostr, *blen, nlen); 119 120 #ifdef USE_ICONV 121 if (id != (iconv_t)-1) 122 CONVERT(str, left, src, len); 123 #endif 124 125 for (i = 0, j = 0; j < len; ) { 126 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); 127 /* NULL character converted */ 128 if (n == -2) 129 error = -(len-j); 130 if (n == -1 || n == -2) 131 goto err; 132 if (n == 0) 133 n = 1; 134 j += n; 135 if (++i >= *blen) { 136 nlen += 256; 137 BINC_RETW(NULL, *tostr, *blen, nlen); 138 } 139 if (id != (iconv_t)-1 && j == len && left) { 140 CONVERT(str, left, src, len); 141 j = 0; 142 } 143 } 144 145 error = 0; 146 err: 147 *tolen = i; 148 *dst = cw->bp1.wc; 149 IC_RESET(); 150 151 return error; 152 } 153 154 static int 155 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 156 CHAR_T **dst) 157 { 158 return default_char2int(sp, str, len, cw, tolen, dst, 159 sp->conv.id[IC_FE_CHAR2INT]); 160 } 161 162 static int 163 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 164 CHAR_T **dst) 165 { 166 return default_char2int(sp, str, len, cw, tolen, dst, 167 sp->conv.id[IC_IE_CHAR2INT]); 168 } 169 170 static int 171 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 172 CHAR_T **dst) 173 { 174 return default_char2int(sp, str, len, cw, tolen, dst, (iconv_t)-1); 175 } 176 177 static int 178 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen, 179 char **dst) 180 { 181 int i; 182 char **tostr = &cw->bp1.c; 183 size_t *blen = &cw->blen1; 184 185 BINC_RETC(NULL, *tostr, *blen, len); 186 187 *tolen = len; 188 for (i = 0; i < len; ++i) 189 (*tostr)[i] = str[i]; 190 191 *dst = cw->bp1.c; 192 193 return 0; 194 } 195 196 static int 197 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 198 size_t *tolen, char **pdst, iconv_t id) 199 { 200 size_t i, j, offset = 0; 201 char **tostr = &cw->bp1.c; 202 size_t *blen = &cw->blen1; 203 mbstate_t mbs; 204 size_t n; 205 ssize_t nlen = len + MB_CUR_MAX; 206 char *dst; 207 size_t buflen; 208 #ifdef USE_ICONV 209 char buffer[CONV_BUFFER_SIZE]; 210 #endif 211 int error = 1; 212 213 /* convert first len bytes of buffer and append it to cw->bp 214 * len is adjusted => 0 215 * offset contains the offset in cw->bp and is adjusted 216 * cw->bp is grown as required 217 */ 218 #ifdef USE_ICONV 219 #define CONVERT2(_buffer, lenp, cw, offset) \ 220 do { \ 221 char *bp = _buffer; \ 222 int ret; \ 223 do { \ 224 size_t outleft = cw->blen1 - offset; \ 225 char *obp = cw->bp1.c + offset; \ 226 if (cw->blen1 < offset + MB_CUR_MAX) { \ 227 nlen += 256; \ 228 BINC_RETC(NULL, cw->bp1.c, cw->blen1, \ 229 nlen); \ 230 } \ 231 errno = 0; \ 232 ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, \ 233 &outleft); \ 234 if (ret == -1 && errno != E2BIG) \ 235 goto err; \ 236 offset = cw->blen1 - outleft; \ 237 } while (ret != 0); \ 238 } while (0) 239 #else 240 #define CONVERT2(_buffer, lenp, cw, offset) 241 #endif 242 243 244 memset(&mbs, 0, sizeof(mbs)); 245 BINC_RETC(NULL, *tostr, *blen, nlen); 246 dst = *tostr; buflen = *blen; 247 248 #ifdef USE_ICONV 249 if (id != (iconv_t)-1) { 250 dst = buffer; buflen = CONV_BUFFER_SIZE; 251 } 252 #endif 253 254 for (i = 0, j = 0; i < len; ++i) { 255 n = wcrtomb(dst+j, str[i], &mbs); 256 if (n == -1) 257 goto err; 258 j += n; 259 if (buflen < j + MB_CUR_MAX) { 260 if (id != (iconv_t)-1) { 261 CONVERT2(buffer, &j, cw, offset); 262 } else { 263 nlen += 256; 264 BINC_RETC(NULL, *tostr, *blen, nlen); 265 dst = *tostr; buflen = *blen; 266 } 267 } 268 } 269 270 n = wcrtomb(dst+j, L'\0', &mbs); 271 j += n - 1; /* don't count NUL at the end */ 272 *tolen = j; 273 274 if (id != (iconv_t)-1) { 275 CONVERT2(buffer, &j, cw, offset); 276 /* back to the initial state */ 277 CONVERT2(NULL, NULL, cw, offset); 278 *tolen = offset; 279 } 280 281 error = 0; 282 err: 283 if (error) 284 *tolen = j; 285 *pdst = cw->bp1.c; 286 IC_RESET(); 287 288 return error; 289 } 290 291 static int 292 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 293 size_t *tolen, char **dst) 294 { 295 return default_int2char(sp, str, len, cw, tolen, dst, 296 sp->conv.id[IC_FE_INT2CHAR]); 297 } 298 299 static int 300 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 301 size_t *tolen, char **dst) 302 { 303 return default_int2char(sp, str, len, cw, tolen, dst, (iconv_t)-1); 304 } 305 306 #endif 307 308 /* 309 * conv_init -- 310 * Initialize the iconv environment. 311 * 312 * PUBLIC: void conv_init(SCR *, SCR *); 313 */ 314 void 315 conv_init(SCR *orig, SCR *sp) 316 { 317 int i; 318 319 if (orig == NULL) 320 setlocale(LC_ALL, ""); 321 if (orig != NULL) 322 memmove(&sp->conv, &orig->conv, sizeof(CONV)); 323 #ifdef USE_WIDECHAR 324 else { 325 char *ctype = setlocale(LC_CTYPE, NULL); 326 327 /* 328 * XXX 329 * This hack fixes the libncursesw issue on FreeBSD. 330 */ 331 if (!strcmp(ctype, "ko_KR.CP949")) 332 setlocale(LC_CTYPE, "ko_KR.eucKR"); 333 else if (!strcmp(ctype, "zh_CN.GB2312")) 334 setlocale(LC_CTYPE, "zh_CN.eucCN"); 335 else if (!strcmp(ctype, "zh_CN.GBK")) 336 setlocale(LC_CTYPE, "zh_CN.GB18030"); 337 338 /* 339 * Switch to 8bit mode if locale is C; 340 * LC_CTYPE should be reseted to C if unmatched. 341 */ 342 if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) { 343 sp->conv.sys2int = sp->conv.file2int = raw2int; 344 sp->conv.int2sys = sp->conv.int2file = int2raw; 345 sp->conv.input2int = raw2int; 346 } else { 347 sp->conv.sys2int = cs_char2int; 348 sp->conv.int2sys = cs_int2char; 349 sp->conv.file2int = fe_char2int; 350 sp->conv.int2file = fe_int2char; 351 sp->conv.input2int = ie_char2int; 352 } 353 #ifdef USE_ICONV 354 o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0); 355 #endif 356 } 357 #endif 358 359 /* iconv descriptors must be distinct to screens. */ 360 for (i = 0; i <= IC_IE_TO_UTF16; ++i) 361 sp->conv.id[i] = (iconv_t)-1; 362 #ifdef USE_ICONV 363 conv_enc(sp, O_INPUTENCODING, 0); 364 #endif 365 } 366 367 /* 368 * conv_enc -- 369 * Convert file/input encoding. 370 * 371 * PUBLIC: int conv_enc(SCR *, int, char *); 372 */ 373 int 374 conv_enc(SCR *sp, int option, char *enc) 375 { 376 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 377 iconv_t *c2w, *w2c; 378 iconv_t id_c2w, id_w2c; 379 380 switch (option) { 381 case O_FILEENCODING: 382 c2w = sp->conv.id + IC_FE_CHAR2INT; 383 w2c = sp->conv.id + IC_FE_INT2CHAR; 384 if (!enc) 385 enc = O_STR(sp, O_FILEENCODING); 386 387 if (strcasecmp(codeset(), enc)) { 388 if ((id_c2w = iconv_open(codeset(), enc)) == 389 (iconv_t)-1) 390 goto err; 391 if ((id_w2c = iconv_open(enc, codeset())) == 392 (iconv_t)-1) 393 goto err; 394 } else { 395 id_c2w = (iconv_t)-1; 396 id_w2c = (iconv_t)-1; 397 } 398 399 break; 400 401 case O_INPUTENCODING: 402 c2w = sp->conv.id + IC_IE_CHAR2INT; 403 w2c = sp->conv.id + IC_IE_TO_UTF16; 404 if (!enc) 405 enc = O_STR(sp, O_INPUTENCODING); 406 407 if (strcasecmp(codeset(), enc)) { 408 if ((id_c2w = iconv_open(codeset(), enc)) == 409 (iconv_t)-1) 410 goto err; 411 } else 412 id_c2w = (iconv_t)-1; 413 414 /* UTF-16 can not be locale and can not be inputed. */ 415 if ((id_w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1) 416 goto err; 417 418 break; 419 420 default: 421 abort(); 422 } 423 424 if (*c2w != (iconv_t)-1) 425 iconv_close(*c2w); 426 if (*w2c != (iconv_t)-1) 427 iconv_close(*w2c); 428 429 *c2w = id_c2w; 430 *w2c = id_w2c; 431 432 F_CLR(sp, SC_CONV_ERROR); 433 F_SET(sp, SC_SCR_REFORMAT); 434 435 return 0; 436 err: 437 #endif 438 switch (option) { 439 case O_FILEENCODING: 440 msgq(sp, M_ERR, "321|File encoding conversion not supported"); 441 break; 442 case O_INPUTENCODING: 443 msgq(sp, M_ERR, "322|Input encoding conversion not supported"); 444 break; 445 } 446 return 1; 447 } 448 449 /* 450 * conv_end -- 451 * Close the iconv descriptors, release the buffer. 452 * 453 * PUBLIC: void conv_end(SCR *); 454 */ 455 void 456 conv_end(SCR *sp) 457 { 458 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 459 int i; 460 for (i = 0; i <= IC_IE_TO_UTF16; ++i) 461 if (sp->conv.id[i] != (iconv_t)-1) 462 iconv_close(sp->conv.id[i]); 463 free(sp->cw.bp1.c); 464 #endif 465 } 466