1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 1991-2003 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <errno.h> 30 #include <euc.h> 31 #include "japanese.h" 32 33 /* 34 * struct _cv_state; to keep status 35 */ 36 struct _icv_state { 37 int _st_cset; 38 int _st_cset_sav; 39 }; 40 41 static unsigned short lookuptbl(unsigned short); 42 43 void * 44 _icv_open() 45 { 46 struct _icv_state *st; 47 48 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state))) 49 == NULL) 50 return ((void *)ERR_RETURN); 51 52 st->_st_cset = st->_st_cset_sav = CS_0; 53 54 return (st); 55 } 56 57 void 58 _icv_close(struct _icv_state *st) 59 { 60 free(st); 61 } 62 63 size_t 64 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft, 65 char **outbuf, size_t *outbytesleft) 66 { 67 int cset; 68 int stat = ST_INIT; 69 unsigned char *op, ic; 70 char *ip; 71 size_t ileft, oleft; 72 size_t retval; 73 74 /* 75 * If inbuf and/or *inbuf are NULL, reset conversion descriptor 76 * and put escape sequence if needed. 77 */ 78 if ((inbuf == NULL) || (*inbuf == NULL)) { 79 st->_st_cset_sav = st->_st_cset = CS_0; 80 return ((size_t)0); 81 } 82 83 cset = st->_st_cset; 84 85 ip = *inbuf; 86 op = (unsigned char *)*outbuf; 87 ileft = *inbytesleft; 88 oleft = *outbytesleft; 89 90 /* 91 * Main loop; basically 1 loop per 1 input byte 92 */ 93 94 while ((int)ileft > 0) { 95 GET(ic); 96 if (stat == ST_INIT) { 97 goto text; 98 } 99 if (stat == ST_ESC) { 100 if (ic == MBTOG0_1) { 101 if ((int)ileft > 0) { 102 stat = ST_MBTOG0_1; 103 continue; 104 } else { 105 UNGET(); 106 UNGET(); 107 errno = EINVAL; 108 retval = (size_t)ERR_RETURN; 109 goto ret; 110 } 111 } else if (ic == SBTOG0_1) { 112 if ((int)ileft > 0) { 113 stat = ST_SBTOG0; 114 continue; 115 } else { 116 UNGET(); 117 UNGET(); 118 errno = EINVAL; 119 retval = (size_t)ERR_RETURN; 120 goto ret; 121 } 122 } else if (ic == X208REV_1) { 123 if ((int)ileft > 0) { 124 stat = ST_208REV_1; 125 continue; 126 } else { 127 UNGET(); 128 UNGET(); 129 errno = EINVAL; 130 retval = (size_t)ERR_RETURN; 131 goto ret; 132 } 133 } else { 134 UNGET(); 135 UNGET(); 136 errno = EILSEQ; 137 retval = (size_t)ERR_RETURN; 138 goto ret; 139 } 140 } else if (stat == ST_MBTOG0_1) { 141 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) { 142 stat = ST_INIT; 143 st->_st_cset_sav = cset = CS_1; 144 continue; 145 } else if (ic == MBTOG0_2) { 146 if ((int)ileft > 0) { 147 stat = ST_MBTOG0_2; 148 continue; 149 } else { 150 UNGET(); 151 UNGET(); 152 UNGET(); 153 errno = EINVAL; 154 retval = (size_t)ERR_RETURN; 155 goto ret; 156 } 157 } else if (ic == F_X0212_90) { 158 stat = ST_INIT; 159 st->_st_cset_sav = cset = CS_3; 160 continue; 161 } else { 162 UNGET(); 163 UNGET(); 164 UNGET(); 165 errno = EILSEQ; 166 retval = (size_t)ERR_RETURN; 167 goto ret; 168 } 169 } else if (stat == ST_MBTOG0_2) { 170 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) { 171 stat = ST_INIT; 172 st->_st_cset_sav = cset = CS_1; 173 continue; 174 } else if (ic == F_X0212_90) { 175 stat = ST_INIT; 176 st->_st_cset_sav = cset = CS_3; 177 continue; 178 } else { 179 UNGET(); 180 UNGET(); 181 UNGET(); 182 UNGET(); 183 errno = EILSEQ; 184 retval = (size_t)ERR_RETURN; 185 goto ret; 186 } 187 } else if (stat == ST_SBTOG0) { 188 if ((ic == F_ASCII) || 189 (ic == F_X0201_RM) || 190 (ic == F_ISO646)) { 191 stat = ST_INIT; 192 st->_st_cset_sav = cset = CS_0; 193 continue; 194 } else if (ic == F_X0201_KN) { 195 cset = CS_2; 196 stat = ST_INIT; 197 continue; 198 } else { 199 UNGET(); 200 UNGET(); 201 UNGET(); 202 errno = EILSEQ; 203 retval = (size_t)ERR_RETURN; 204 goto ret; 205 } 206 } else if (stat == ST_208REV_1) { 207 if (ic == X208REV_2) { 208 if ((int)ileft > 0) { 209 stat = ST_208REV_2; 210 continue; 211 } else { 212 UNGET(); 213 UNGET(); 214 UNGET(); 215 errno = EINVAL; 216 retval = (size_t)ERR_RETURN; 217 goto ret; 218 } 219 } else { 220 UNGET(); 221 UNGET(); 222 UNGET(); 223 errno = EILSEQ; 224 retval = (size_t)ERR_RETURN; 225 goto ret; 226 } 227 } else if (stat == ST_208REV_2) { 228 if (ic == ESC) { 229 if ((int)ileft > 0) { 230 stat = ST_REV_AFT_ESC; 231 continue; 232 } else { 233 UNGET(); 234 UNGET(); 235 UNGET(); 236 UNGET(); 237 errno = EINVAL; 238 retval = (size_t)ERR_RETURN; 239 goto ret; 240 } 241 } else { 242 UNGET(); 243 UNGET(); 244 UNGET(); 245 UNGET(); 246 errno = EILSEQ; 247 retval = (size_t)ERR_RETURN; 248 goto ret; 249 } 250 } else if (stat == ST_REV_AFT_ESC) { 251 if (ic == MBTOG0_1) { 252 if ((int)ileft > 0) { 253 stat = ST_REV_AFT_MBTOG0_1; 254 continue; 255 } else { 256 UNGET(); 257 UNGET(); 258 UNGET(); 259 UNGET(); 260 UNGET(); 261 errno = EINVAL; 262 retval = (size_t)ERR_RETURN; 263 goto ret; 264 } 265 } else { 266 UNGET(); 267 UNGET(); 268 UNGET(); 269 UNGET(); 270 UNGET(); 271 errno = EILSEQ; 272 retval = (size_t)ERR_RETURN; 273 goto ret; 274 } 275 } else if (stat == ST_REV_AFT_MBTOG0_1) { 276 if (ic == F_X0208_83_90) { 277 stat = ST_INIT; 278 st->_st_cset_sav = cset = CS_1; 279 continue; 280 } else if (ic == MBTOG0_2) { 281 if ((int)ileft > 0) { 282 stat = ST_REV_AFT_MBTOG0_2; 283 continue; 284 } else { 285 UNGET(); 286 UNGET(); 287 UNGET(); 288 UNGET(); 289 UNGET(); 290 UNGET(); 291 errno = EINVAL; 292 retval = (size_t)ERR_RETURN; 293 goto ret; 294 } 295 } else { 296 UNGET(); 297 UNGET(); 298 UNGET(); 299 UNGET(); 300 UNGET(); 301 UNGET(); 302 errno = EILSEQ; 303 retval = (size_t)ERR_RETURN; 304 goto ret; 305 } 306 } else if (stat == ST_REV_AFT_MBTOG0_2) { 307 if (ic == F_X0208_83_90) { 308 stat = ST_INIT; 309 st->_st_cset_sav = cset = CS_1; 310 continue; 311 } else { 312 UNGET(); 313 UNGET(); 314 UNGET(); 315 UNGET(); 316 UNGET(); 317 UNGET(); 318 UNGET(); 319 errno = EILSEQ; 320 retval = (size_t)ERR_RETURN; 321 goto ret; 322 } 323 } 324 text: 325 /* 326 * Break through chars or ESC sequence 327 */ 328 if (ic == ESC) { 329 if ((int)ileft > 0) { 330 stat = ST_ESC; 331 continue; 332 } else { 333 UNGET(); 334 errno = EINVAL; 335 retval = (size_t)ERR_RETURN; 336 goto ret; 337 } 338 } else if (ic == SO) { 339 cset = CS_2; 340 stat = ST_INIT; 341 continue; 342 } else if (ic == SI) { 343 cset = st->_st_cset_sav; 344 stat = ST_INIT; 345 continue; 346 } 347 if (!(ic & CMSB)) { 348 if (cset == CS_0) { 349 /* ASCII or JIS roman : may be 8bit chars */ 350 if (oleft < SJISW0) { 351 UNGET(); 352 errno = E2BIG; 353 retval = (size_t)ERR_RETURN; 354 goto ret; 355 } 356 PUT(ic); 357 continue; 358 } else if (cset == CS_1) { /* CS_1 Kanji starts */ 359 if ((int)ileft > 0) { 360 int even_ku; 361 if (oleft < SJISW1) { 362 UNGET(); 363 errno = E2BIG; 364 retval = (size_t)ERR_RETURN; 365 goto ret; 366 } 367 if ((ic < 0x21) || (ic == 0x7f)) { 368 UNGET(); 369 errno = EILSEQ; 370 retval = (size_t)ERR_RETURN; 371 goto ret; 372 } 373 if ((*ip < 0x21) || (*ip == 0x7f)) { 374 UNGET(); 375 errno = EILSEQ; 376 retval = (size_t)ERR_RETURN; 377 goto ret; 378 } 379 PUT(jis208tosj1[ic]); 380 if ((ic % 2) == 0) 381 even_ku = TRUE; 382 else 383 even_ku = FALSE; 384 GET(ic); 385 if (even_ku) 386 ic += 0x80; 387 PUT(jistosj2[ic]); 388 continue; 389 } else { /* input fragment of Kanji */ 390 UNGET(); 391 errno = EINVAL; 392 retval = (size_t)ERR_RETURN; 393 goto ret; 394 } 395 } else if (cset == CS_2) { /* Hankaku Katakana */ 396 if (oleft < SJISW2) { 397 UNGET(); 398 errno = E2BIG; 399 retval = (size_t)ERR_RETURN; 400 goto ret; 401 } 402 PUT(ic | CMSB); 403 continue; 404 } else if (cset == CS_3) { /* CS_3 Kanji starts */ 405 unsigned short dest; 406 if ((int)ileft > 0) { 407 if (oleft < SJISW1) { 408 UNGET(); 409 errno = E2BIG; 410 retval = (size_t)ERR_RETURN; 411 goto ret; 412 } 413 if ((ic < 0x21) || (ic == 0x7f)) { 414 UNGET(); 415 errno = EILSEQ; 416 retval = (size_t)ERR_RETURN; 417 goto ret; 418 } 419 if ((*ip < 0x21) || (*ip == 0x7f)) { 420 UNGET(); 421 errno = EILSEQ; 422 retval = (size_t)ERR_RETURN; 423 goto ret; 424 } 425 if (ic < 0x75) { /* check IBM area */ 426 dest = (ic << 8); 427 GET(ic); 428 dest += ic; 429 dest = lookuptbl(dest); 430 if (dest == 0xffff) { 431 /* 432 * Illegal code points 433 * in G3 plane. 434 */ 435 UNGET(); 436 UNGET(); 437 errno = EILSEQ; 438 retval = 439 (size_t)ERR_RETURN; 440 goto ret; 441 } else { 442 PUT((dest >> 8) & 443 0xff); 444 PUT(dest & 0xff); 445 } 446 continue; 447 } else { 448 int even_ku; 449 450 if ((ic % 2) == 0) 451 even_ku = TRUE; 452 else 453 even_ku = FALSE; 454 PUT(jis212tosj1[ic]); 455 GET(ic); 456 if (even_ku) 457 ic += 0x80; 458 PUT(jistosj2[ic]); 459 continue; 460 } 461 } else { /* input fragment of Kanji */ 462 UNGET(); 463 errno = EINVAL; 464 retval = (size_t)ERR_RETURN; 465 goto ret; 466 } 467 } 468 } else { 469 if (oleft < UNKNOWNW) { 470 UNGET(); 471 errno = E2BIG; 472 retval = (size_t)ERR_RETURN; 473 goto ret; 474 } 475 PUT(ic); 476 continue; 477 } 478 } 479 retval = ileft; 480 ret: 481 *inbuf = ip; 482 *inbytesleft = ileft; 483 *outbuf = (char *)op; 484 *outbytesleft = oleft; 485 st->_st_cset = cset; 486 487 return (retval); 488 } 489 490 /* 491 * lookuptbl() 492 * Return the index number if its index-ed number 493 * is the same as dest value. 494 */ 495 static unsigned short 496 lookuptbl(unsigned short dest) 497 { 498 unsigned short tmp; 499 int i; 500 int sz = (sizeof (sjtoibmext) / sizeof (sjtoibmext[0])); 501 502 for (i = 0; i < sz; i++) { 503 tmp = (sjtoibmext[i] & 0x7f7f); 504 if (tmp == dest) 505 return ((i + 0xfa40 + ((i / 0xc0) * 0x40))); 506 } 507 return (PGETA); 508 } 509