116d86563SAlexander Pyhalov /*
216d86563SAlexander Pyhalov * CDDL HEADER START
316d86563SAlexander Pyhalov *
416d86563SAlexander Pyhalov * The contents of this file are subject to the terms of the
516d86563SAlexander Pyhalov * Common Development and Distribution License (the "License").
616d86563SAlexander Pyhalov * You may not use this file except in compliance with the License.
716d86563SAlexander Pyhalov *
816d86563SAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
916d86563SAlexander Pyhalov * or http://www.opensolaris.org/os/licensing.
1016d86563SAlexander Pyhalov * See the License for the specific language governing permissions
1116d86563SAlexander Pyhalov * and limitations under the License.
1216d86563SAlexander Pyhalov *
1316d86563SAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each
1416d86563SAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE.
1516d86563SAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the
1616d86563SAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying
1716d86563SAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner]
1816d86563SAlexander Pyhalov *
1916d86563SAlexander Pyhalov * CDDL HEADER END
2016d86563SAlexander Pyhalov */
2116d86563SAlexander Pyhalov
2216d86563SAlexander Pyhalov /*
2316d86563SAlexander Pyhalov * Copyright (c) 1995, by Sun Microsystems, Inc.
2416d86563SAlexander Pyhalov * All rights reserved.
2516d86563SAlexander Pyhalov */
2616d86563SAlexander Pyhalov
2716d86563SAlexander Pyhalov #include <stdio.h>
2816d86563SAlexander Pyhalov #include <stdlib.h>
2916d86563SAlexander Pyhalov #include <errno.h>
3016d86563SAlexander Pyhalov #include "cns11643_big5.h" /* CNS 11643 to Big-5 mapping table */
3116d86563SAlexander Pyhalov
3216d86563SAlexander Pyhalov #define MSB 0x80 /* most significant bit */
3316d86563SAlexander Pyhalov #define MBYTE 0x8e /* multi-byte (4 byte character) */
3416d86563SAlexander Pyhalov #define PMASK 0xa0 /* plane number mask */
3516d86563SAlexander Pyhalov #define ONEBYTE 0xff /* right most byte */
3616d86563SAlexander Pyhalov #define MSB_OFF 0x7f /* mask off MBS */
3716d86563SAlexander Pyhalov
3816d86563SAlexander Pyhalov #define NON_ID_CHAR '_' /* non-identified character */
3916d86563SAlexander Pyhalov
4016d86563SAlexander Pyhalov typedef struct _icv_state {
4116d86563SAlexander Pyhalov char keepc[4]; /* maximum # byte of CNS11643 code */
4216d86563SAlexander Pyhalov short cstate; /* state machine id */
4316d86563SAlexander Pyhalov int _errno; /* internal errno */
4416d86563SAlexander Pyhalov } _iconv_st;
4516d86563SAlexander Pyhalov
4616d86563SAlexander Pyhalov enum _CSTATE { C0, C1, C2, C3 };
4716d86563SAlexander Pyhalov
4816d86563SAlexander Pyhalov
4916d86563SAlexander Pyhalov static int get_plane_no_by_char(const char);
5016d86563SAlexander Pyhalov static int cns_to_big5(int, char[], char*, size_t);
5116d86563SAlexander Pyhalov static int binsearch(unsigned long, table_t[], int);
5216d86563SAlexander Pyhalov
5316d86563SAlexander Pyhalov
5416d86563SAlexander Pyhalov /*
5516d86563SAlexander Pyhalov * Open; called from iconv_open()
5616d86563SAlexander Pyhalov */
5716d86563SAlexander Pyhalov void *
_icv_open()5816d86563SAlexander Pyhalov _icv_open()
5916d86563SAlexander Pyhalov {
6016d86563SAlexander Pyhalov _iconv_st *st;
6116d86563SAlexander Pyhalov
6216d86563SAlexander Pyhalov if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
6316d86563SAlexander Pyhalov errno = ENOMEM;
6416d86563SAlexander Pyhalov return ((void *) -1);
6516d86563SAlexander Pyhalov }
6616d86563SAlexander Pyhalov
6716d86563SAlexander Pyhalov st->cstate = C0;
6816d86563SAlexander Pyhalov st->_errno = 0;
6916d86563SAlexander Pyhalov
7016d86563SAlexander Pyhalov #ifdef DEBUG
7116d86563SAlexander Pyhalov fprintf(stderr, "========== iconv(): CNS11643 --> Big-5 ==========\n");
7216d86563SAlexander Pyhalov #endif
7316d86563SAlexander Pyhalov
7416d86563SAlexander Pyhalov return ((void *) st);
7516d86563SAlexander Pyhalov }
7616d86563SAlexander Pyhalov
7716d86563SAlexander Pyhalov
7816d86563SAlexander Pyhalov /*
7916d86563SAlexander Pyhalov * Close; called from iconv_close()
8016d86563SAlexander Pyhalov */
8116d86563SAlexander Pyhalov void
_icv_close(_iconv_st * st)8216d86563SAlexander Pyhalov _icv_close(_iconv_st *st)
8316d86563SAlexander Pyhalov {
8416d86563SAlexander Pyhalov if (!st)
8516d86563SAlexander Pyhalov errno = EBADF;
8616d86563SAlexander Pyhalov else
8716d86563SAlexander Pyhalov free(st);
8816d86563SAlexander Pyhalov }
8916d86563SAlexander Pyhalov
9016d86563SAlexander Pyhalov
9116d86563SAlexander Pyhalov /*
9216d86563SAlexander Pyhalov * Actual conversion; called from iconv()
9316d86563SAlexander Pyhalov */
9416d86563SAlexander Pyhalov /*=======================================================
9516d86563SAlexander Pyhalov *
9616d86563SAlexander Pyhalov * State Machine for interpreting CNS 11643 code
9716d86563SAlexander Pyhalov *
9816d86563SAlexander Pyhalov *=======================================================
9916d86563SAlexander Pyhalov *
10016d86563SAlexander Pyhalov * plane 2 - 16
10116d86563SAlexander Pyhalov * 1st C 2nd C 3rd C
10216d86563SAlexander Pyhalov * +------> C0 -----> C1 -----------> C2 -----> C3
10316d86563SAlexander Pyhalov * | ascii | plane 1 | 4th C |
10416d86563SAlexander Pyhalov * ^ v 2nd C v v
10516d86563SAlexander Pyhalov * +----<---+-----<----+-------<---------<-------+
10616d86563SAlexander Pyhalov *
10716d86563SAlexander Pyhalov *=======================================================*/
10816d86563SAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)10916d86563SAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
11016d86563SAlexander Pyhalov char **outbuf, size_t *outbytesleft)
11116d86563SAlexander Pyhalov {
11216d86563SAlexander Pyhalov int plane_no = -1, n;
11316d86563SAlexander Pyhalov
11416d86563SAlexander Pyhalov if (st == NULL) {
11516d86563SAlexander Pyhalov errno = EBADF;
11616d86563SAlexander Pyhalov return ((size_t) -1);
11716d86563SAlexander Pyhalov }
11816d86563SAlexander Pyhalov
11916d86563SAlexander Pyhalov if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
12016d86563SAlexander Pyhalov st->cstate = C0;
12116d86563SAlexander Pyhalov st->_errno = 0;
12216d86563SAlexander Pyhalov return ((size_t) 0);
12316d86563SAlexander Pyhalov }
12416d86563SAlexander Pyhalov
12516d86563SAlexander Pyhalov #ifdef DEBUG
12616d86563SAlexander Pyhalov fprintf(stderr, "=== (Re-entry) iconv(): CNS 11643 --> Big-5 ===\n");
12716d86563SAlexander Pyhalov #endif
12816d86563SAlexander Pyhalov st->_errno = 0; /* reset internal errno */
12916d86563SAlexander Pyhalov errno = 0; /* reset external errno */
13016d86563SAlexander Pyhalov
13116d86563SAlexander Pyhalov /* a state machine for interpreting CNS 11643 code */
13216d86563SAlexander Pyhalov while (*inbytesleft > 0 && *outbytesleft > 0) {
13316d86563SAlexander Pyhalov switch (st->cstate) {
13416d86563SAlexander Pyhalov case C0: /* assuming ASCII in the beginning */
13516d86563SAlexander Pyhalov if (**inbuf & MSB) {
13616d86563SAlexander Pyhalov st->keepc[0] = (**inbuf);
13716d86563SAlexander Pyhalov st->cstate = C1;
13816d86563SAlexander Pyhalov } else { /* real ASCII */
13916d86563SAlexander Pyhalov **outbuf = **inbuf;
14016d86563SAlexander Pyhalov (*outbuf)++;
14116d86563SAlexander Pyhalov (*outbytesleft)--;
14216d86563SAlexander Pyhalov }
14316d86563SAlexander Pyhalov break;
14416d86563SAlexander Pyhalov case C1: /* Chinese characters: 2nd byte */
14516d86563SAlexander Pyhalov if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */
14616d86563SAlexander Pyhalov plane_no = get_plane_no_by_char(**inbuf);
14716d86563SAlexander Pyhalov if (plane_no == -1) { /* illegal plane */
14816d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
14916d86563SAlexander Pyhalov } else { /* 4-byte Chinese character */
15016d86563SAlexander Pyhalov st->keepc[1] = (**inbuf);
15116d86563SAlexander Pyhalov st->cstate = C2;
15216d86563SAlexander Pyhalov }
15316d86563SAlexander Pyhalov } else { /* 2-byte Chinese character - plane #1 */
15416d86563SAlexander Pyhalov if (**inbuf & MSB) { /* plane #1 */
15516d86563SAlexander Pyhalov st->keepc[1] = (**inbuf);
156*f642269fSToomas Soome st->keepc[2] = st->keepc[3] = '\0';
15716d86563SAlexander Pyhalov n = cns_to_big5(1, st->keepc, *outbuf,
15816d86563SAlexander Pyhalov *outbytesleft);
15916d86563SAlexander Pyhalov if (n > 0) {
16016d86563SAlexander Pyhalov (*outbuf) += n;
16116d86563SAlexander Pyhalov (*outbytesleft) -= n;
16216d86563SAlexander Pyhalov
16316d86563SAlexander Pyhalov st->cstate = C0;
16416d86563SAlexander Pyhalov } else { /* don't reset state */
16516d86563SAlexander Pyhalov st->_errno = errno = E2BIG;
16616d86563SAlexander Pyhalov }
16716d86563SAlexander Pyhalov } else { /* input char doesn't belong
16816d86563SAlexander Pyhalov * to the input code set
16916d86563SAlexander Pyhalov */
17016d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
17116d86563SAlexander Pyhalov }
17216d86563SAlexander Pyhalov }
17316d86563SAlexander Pyhalov break;
17416d86563SAlexander Pyhalov case C2: /* plane #2 - #16 (4 bytes): get 3nd byte */
17516d86563SAlexander Pyhalov if (**inbuf & MSB) { /* 3rd byte */
17616d86563SAlexander Pyhalov st->keepc[2] = (**inbuf);
17716d86563SAlexander Pyhalov st->cstate = C3;
17816d86563SAlexander Pyhalov } else {
17916d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
18016d86563SAlexander Pyhalov }
18116d86563SAlexander Pyhalov break;
18216d86563SAlexander Pyhalov case C3: /* plane #2 - #16 (4 bytes): get 4th byte */
18316d86563SAlexander Pyhalov if (**inbuf & MSB) { /* 4th byte */
18416d86563SAlexander Pyhalov st->keepc[3] = (**inbuf);
18516d86563SAlexander Pyhalov n = cns_to_big5(plane_no, st->keepc, *outbuf,
18616d86563SAlexander Pyhalov *outbytesleft );
18716d86563SAlexander Pyhalov if (n > 0) {
18816d86563SAlexander Pyhalov (*outbuf) += n;
18916d86563SAlexander Pyhalov (*outbytesleft) -= n;
19016d86563SAlexander Pyhalov
19116d86563SAlexander Pyhalov st->cstate = C0; /* reset state */
19216d86563SAlexander Pyhalov } else { /* don't reset state */
19316d86563SAlexander Pyhalov st->_errno = errno = E2BIG;
19416d86563SAlexander Pyhalov }
19516d86563SAlexander Pyhalov } else {
19616d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
19716d86563SAlexander Pyhalov }
19816d86563SAlexander Pyhalov break;
19916d86563SAlexander Pyhalov default: /* should never come here */
20016d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
20116d86563SAlexander Pyhalov st->cstate = C0; /* reset state */
20216d86563SAlexander Pyhalov break;
20316d86563SAlexander Pyhalov }
20416d86563SAlexander Pyhalov
20516d86563SAlexander Pyhalov if (st->_errno) {
20616d86563SAlexander Pyhalov #ifdef DEBUG
20716d86563SAlexander Pyhalov fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
20816d86563SAlexander Pyhalov st->_errno, st->cstate);
20916d86563SAlexander Pyhalov #endif
21016d86563SAlexander Pyhalov break;
21116d86563SAlexander Pyhalov }
21216d86563SAlexander Pyhalov
21316d86563SAlexander Pyhalov (*inbuf)++;
21416d86563SAlexander Pyhalov (*inbytesleft)--;
21516d86563SAlexander Pyhalov }
21616d86563SAlexander Pyhalov
21716d86563SAlexander Pyhalov if (errno) return ((size_t) -1);
21816d86563SAlexander Pyhalov
21916d86563SAlexander Pyhalov if (*inbytesleft == 0 && st->cstate != C0) {
22016d86563SAlexander Pyhalov errno = EINVAL;
22116d86563SAlexander Pyhalov return ((size_t) -1);
22216d86563SAlexander Pyhalov }
22316d86563SAlexander Pyhalov
22416d86563SAlexander Pyhalov if (*inbytesleft > 0 && *outbytesleft == 0) {
22516d86563SAlexander Pyhalov errno = E2BIG;
22616d86563SAlexander Pyhalov return((size_t)-1);
22716d86563SAlexander Pyhalov }
22816d86563SAlexander Pyhalov return (*inbytesleft);
22916d86563SAlexander Pyhalov }
23016d86563SAlexander Pyhalov
23116d86563SAlexander Pyhalov
23216d86563SAlexander Pyhalov /*
23316d86563SAlexander Pyhalov * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc.
23416d86563SAlexander Pyhalov * Returns -1 on error conditions
23516d86563SAlexander Pyhalov */
get_plane_no_by_char(const char inbuf)23616d86563SAlexander Pyhalov static int get_plane_no_by_char(const char inbuf)
23716d86563SAlexander Pyhalov {
23816d86563SAlexander Pyhalov int ret;
23916d86563SAlexander Pyhalov unsigned char uc = (unsigned char) inbuf;
24016d86563SAlexander Pyhalov
24116d86563SAlexander Pyhalov ret = uc - PMASK;
24216d86563SAlexander Pyhalov switch (ret) {
24316d86563SAlexander Pyhalov case 1: /* 0x8EA1 */
24416d86563SAlexander Pyhalov case 2: /* 0x8EA2 */
24516d86563SAlexander Pyhalov case 3: /* 0x8EA3 */
24616d86563SAlexander Pyhalov case 4: /* 0x8EA4 */
24716d86563SAlexander Pyhalov case 5: /* 0x8EA5 */
24816d86563SAlexander Pyhalov case 6: /* 0x8EA6 */
24916d86563SAlexander Pyhalov case 7: /* 0x8EA7 */
25016d86563SAlexander Pyhalov case 12: /* 0x8EAC */
25116d86563SAlexander Pyhalov case 14: /* 0x8EAE */
25216d86563SAlexander Pyhalov case 15: /* 0x8EAF */
25316d86563SAlexander Pyhalov case 16: /* 0x8EB0 */
25416d86563SAlexander Pyhalov return (ret);
25516d86563SAlexander Pyhalov default:
25616d86563SAlexander Pyhalov return (-1);
25716d86563SAlexander Pyhalov }
25816d86563SAlexander Pyhalov }
25916d86563SAlexander Pyhalov
26016d86563SAlexander Pyhalov
26116d86563SAlexander Pyhalov /*
26216d86563SAlexander Pyhalov * CNS 11643 code --> Big-5
26316d86563SAlexander Pyhalov * Return: > 0 - converted with enough space in output buffer
26416d86563SAlexander Pyhalov * = 0 - no space in outbuf
26516d86563SAlexander Pyhalov */
cns_to_big5(int plane_no,char keepc[],char * buf,size_t buflen)26616d86563SAlexander Pyhalov static int cns_to_big5(int plane_no, char keepc[], char *buf, size_t buflen)
26716d86563SAlexander Pyhalov {
26816d86563SAlexander Pyhalov char cns_str[3];
26916d86563SAlexander Pyhalov unsigned long cns_val; /* MSB mask off CNS 11643 value */
27016d86563SAlexander Pyhalov int unidx; /* binary search index */
27116d86563SAlexander Pyhalov unsigned long big5_val, val; /* Big-5 code */
27216d86563SAlexander Pyhalov
27316d86563SAlexander Pyhalov #ifdef DEBUG
27416d86563SAlexander Pyhalov fprintf(stderr, "%s %d ", keepc, plane_no);
27516d86563SAlexander Pyhalov #endif
27616d86563SAlexander Pyhalov if (buflen < 2) {
27716d86563SAlexander Pyhalov errno = E2BIG;
27816d86563SAlexander Pyhalov return(0);
27916d86563SAlexander Pyhalov }
28016d86563SAlexander Pyhalov
28116d86563SAlexander Pyhalov if (plane_no == 1) {
28216d86563SAlexander Pyhalov cns_str[0] = keepc[0] & MSB_OFF;
28316d86563SAlexander Pyhalov cns_str[1] = keepc[1] & MSB_OFF;
28416d86563SAlexander Pyhalov } else {
28516d86563SAlexander Pyhalov cns_str[0] = keepc[2] & MSB_OFF;
28616d86563SAlexander Pyhalov cns_str[1] = keepc[3] & MSB_OFF;
28716d86563SAlexander Pyhalov }
28816d86563SAlexander Pyhalov cns_val = (cns_str[0] << 8) + cns_str[1];
28916d86563SAlexander Pyhalov #ifdef DEBUG
29016d86563SAlexander Pyhalov fprintf(stderr, "%x\t", cns_val);
29116d86563SAlexander Pyhalov #endif
29216d86563SAlexander Pyhalov
29316d86563SAlexander Pyhalov switch (plane_no) {
29416d86563SAlexander Pyhalov case 1:
29516d86563SAlexander Pyhalov unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
29616d86563SAlexander Pyhalov if (unidx >= 0)
29716d86563SAlexander Pyhalov big5_val = cns_big5_tab1[unidx].value;
29816d86563SAlexander Pyhalov break;
29916d86563SAlexander Pyhalov case 2:
30016d86563SAlexander Pyhalov unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
30116d86563SAlexander Pyhalov if (unidx >= 0)
30216d86563SAlexander Pyhalov big5_val = cns_big5_tab2[unidx].value;
30316d86563SAlexander Pyhalov break;
30416d86563SAlexander Pyhalov case 3:
30516d86563SAlexander Pyhalov unidx = binsearch(cns_val, cns_big5_tab3, MAX_CNS3_NUM);
30616d86563SAlexander Pyhalov if (unidx >= 0)
30716d86563SAlexander Pyhalov big5_val = cns_big5_tab3[unidx].value;
30816d86563SAlexander Pyhalov break;
30916d86563SAlexander Pyhalov default:
31016d86563SAlexander Pyhalov unidx = -1; /* no mapping from CNS to Big-5 */
31116d86563SAlexander Pyhalov break;
31216d86563SAlexander Pyhalov }
31316d86563SAlexander Pyhalov
31416d86563SAlexander Pyhalov #ifdef DEBUG
31516d86563SAlexander Pyhalov fprintf(stderr, "unidx = %d, value = %x\t", unidx, big5_val);
31616d86563SAlexander Pyhalov #endif
31716d86563SAlexander Pyhalov
31816d86563SAlexander Pyhalov if (unidx < 0) { /* no match from CNS to Big-5 */
31916d86563SAlexander Pyhalov *buf = *(buf+1) = NON_ID_CHAR;
32016d86563SAlexander Pyhalov } else {
32116d86563SAlexander Pyhalov val = big5_val & 0xffff;
32216d86563SAlexander Pyhalov *buf = (char) ((val & 0xff00) >> 8);
32316d86563SAlexander Pyhalov *(buf+1) = (char) (val & 0xff);
32416d86563SAlexander Pyhalov }
32516d86563SAlexander Pyhalov
32616d86563SAlexander Pyhalov #ifdef DEBUG
32716d86563SAlexander Pyhalov fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1));
32816d86563SAlexander Pyhalov #endif
32916d86563SAlexander Pyhalov
33016d86563SAlexander Pyhalov return(2);
33116d86563SAlexander Pyhalov }
33216d86563SAlexander Pyhalov
33316d86563SAlexander Pyhalov
33416d86563SAlexander Pyhalov /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,table_t v[],int n)33516d86563SAlexander Pyhalov static int binsearch(unsigned long x, table_t v[], int n)
33616d86563SAlexander Pyhalov {
33716d86563SAlexander Pyhalov int low, high, mid;
33816d86563SAlexander Pyhalov
33916d86563SAlexander Pyhalov low = 0;
34016d86563SAlexander Pyhalov high = n - 1;
34116d86563SAlexander Pyhalov while (low <= high) {
34216d86563SAlexander Pyhalov mid = (low + high) / 2;
34316d86563SAlexander Pyhalov if (x < v[mid].key)
34416d86563SAlexander Pyhalov high = mid - 1;
34516d86563SAlexander Pyhalov else if (x > v[mid].key)
34616d86563SAlexander Pyhalov low = mid + 1;
34716d86563SAlexander Pyhalov else /* found match */
34816d86563SAlexander Pyhalov return mid;
34916d86563SAlexander Pyhalov }
35016d86563SAlexander Pyhalov return (-1); /* no match */
35116d86563SAlexander Pyhalov }
352