1 /* $Header: /home/yav/catty/fkiss/RCS/codeconv.c,v 1.5 2000/08/24 02:10:14 yav Exp $
2  * Japanese KANJI code coversion
3  * written by yav <yav@bigfoot.com>
4  *
5  * define CODECONV_STANDALONE_TEST for Self check
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21 
22 char id_codeconv[] = "$Id: codeconv.c,v 1.5 2000/08/24 02:10:14 yav Exp $";
23 
24 #ifdef CODECONV_STANDALONE_TEST
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 /* jiscode.c */
31 extern unsigned short yjis2sjis();
32 
33 /* Japanese text coding type */
34 #define CODING_UNKNOWN	0
35 #define CODING_SJIS	1
36 #define CODING_EUC	2
37 #define CODING_JIS	3
38 #define CODING_MAX	4	/* number of Japanese text coding types */
39 
40 static int coding_type = CODING_UNKNOWN;
41 
42 #else /* CODECONV_STANDALONE_TEST */
43 
44 #include <X11/Xos.h>
45 #include <X11/Xlib.h>
46 #include <stdio.h>
47 
48 #include "config.h"
49 #include "headers.h"
50 #include "fkiss.h"
51 #include "work.h"
52 #define PUBLIC_CODECONV_C
53 #include "extern.h"
54 
55 #endif /* CODECONV_STANDALONE_TEST */
56 
57 /* get Japanese coding systen name */
coding_name(n)58 char *coding_name(n)
59      int n;			/* coding type code (CODING_*) */
60 {
61   int i;
62   static struct {int type; char *name;} nametbl[] = {
63     {CODING_UNKNOWN,	"unknown"},
64     {CODING_SJIS,	"Shift-JIS"},
65     {CODING_EUC,	"EUC"},
66     {CODING_JIS,	"JIS"},
67     {-1,		NULL}
68   };
69 
70   for (i = 0; nametbl[i].name != NULL; i++) {
71     if (nametbl[i].type == n)
72       return nametbl[i].name;
73   }
74   return "Illegal coding!";
75 }
76 
77 /* search string and identify encoding type */
check_coding(str)78 int check_coding(str)
79      char *str;
80 {
81   unsigned char c;
82   unsigned char *p;
83   unsigned char *p0;
84 
85   /* JIS check */
86   for (p0 = (unsigned char *)str; (p = (unsigned char *)index(p0, 033)) != NULL; p0 = p) {
87     switch(*++p) {
88     case '$':			/* Kanji in */
89     case '(':			/* Kanji out */
90       return CODING_JIS;
91     }
92   }
93   /* EUC, Shift-JIS */
94   p = (unsigned char *)str;
95   while ((c = *p++) != '\0') {
96     /* high byte */
97     if (c < 0x81) {
98       continue;
99     } else if (c <= 0x9f) {
100       return CODING_SJIS;	/* 0x81 - 0x9f SJIS 1st byte */
101     } else if (c <= 0xa0) {
102       return CODING_SJIS;	/* 0xa0 JIS X0201 single byte KANA space */
103     } else if (c <= 0xa8) {
104       /* 0xa1 - 0xa8 EUC high or KANA */
105       c = *p++;			/* 2nd byte */
106       /* check EUC low byte */
107       if (c < 0xa1 || c >0xfe)
108 	return CODING_SJIS;	/* not EUC 2nd byte! JIS X0201 single byte KANA */
109       continue;
110     } else if (c < 0xb0) {
111       return CODING_SJIS;	/* single byte KANA */
112     } else if (c <= 0xf3) {
113       /* 0xb0 - 0xf3 EUC or SJIS 1st byte or JIS X0201 single byte KANA */
114       c = *p++;			/* 2nd byte */
115       if (c < 0x40) {
116 	return CODING_SJIS;	/* JIS X0201 single byte KANA */
117       } else if (c < 0xa1) {
118 	return CODING_SJIS;	/* SJIS 2nd byte */
119       } else if (c <= 0xfc) {
120 	continue;		/* EUC or SJIS 2nd byte */
121       } else if (c <= 0xfe) {
122 	return CODING_EUC;	/* EUC 2nd byte */
123       }
124       /* ??? illegal code */
125       continue;
126     } else if (c <= 0xfc) {
127       return CODING_SJIS;	/* 0xf4 - 0xfc SJIS 1st byte */
128     }
129     /* illegal code */
130   }
131   return CODING_UNKNOWN;	/* Cannot to identify, need more sample */
132 }
133 
134 /* convert JIS to Shift-JIS string
135  * Caution! destination string is not terminated.
136  */
strnjis2sjis(dst,src,n)137 void strnjis2sjis(dst, src, n)
138      unsigned char *dst;
139      unsigned char *src;
140      int n;			/* length */
141 {
142   unsigned short code;
143 
144   while (--n >= 0) {
145     code = *src++;
146     if (--n >= 0) {
147       code <<= 8;
148       code += *src++;
149       code = yjis2sjis(code);
150       *dst++ = code >> 8;
151     }
152     *dst++= code;
153   }
154 }
155 
156 /* convert string from JIS to Shift-JIS */
convert_from_jis(dst,str)157 void convert_from_jis(dst, str)
158      char *dst;
159      char *str;
160 {
161   char *p;
162   char *p0;
163   int i;
164   int inkanji;
165 
166   inkanji = 0;
167   for (p0 = str; (p = index(p0, 033)) != NULL; p0 = p) {
168     i = p - p0;
169     if (inkanji)
170       strnjis2sjis(dst, p0, i);
171     else
172       strncpy(dst, p0, i);
173     dst += i;
174     p++;
175     if (*p == '$' && *(p+1) != '\0') {
176       /* ESC '$' ? Kanji in */
177       if (*(p+1) == '(')
178 	p += 3;			/* ESC $ ( B */
179       else
180 	p += 2;			/* ESC $ B (JIS 1983) or ESC $ @ (JIS 1978) */
181       inkanji = 1;
182     } else if (*p == '(' && *(p+1) != '\0' ) {
183       /* ESC '(' ? Kanji out */
184       p += 2;			/* ESC ( B */
185       inkanji = 0;
186     }
187   }
188   i = strlen(p0);
189   if (inkanji)
190     strnjis2sjis(dst, p0, i);
191   else
192     strncpy(dst, p0, i);
193   *(dst+i) = '\0';
194 }
195 
196 /* convert string from EUC to Shift-JIS */
convert_from_euc(dst,src)197 void convert_from_euc(dst, src)
198      unsigned char *dst;
199      unsigned char *src;
200 {
201   unsigned short code;
202 
203   while ((code = *src++) != '\0') {
204     if ((code & 0x80) && (*src != '\0')) {
205       code &= 0x7f;
206       code <<= 8;
207       code += (*src++) & 0x7f;
208       code = yjis2sjis(code);
209       *dst++ = code >> 8;
210     }
211     *dst++ = code;
212   }
213   *dst = '\0';
214 }
215 
216 /* convert any coding types to Shift-JIS
217  * source coding type indicated coding_type
218  */
convert_coding(dst,src)219 void convert_coding(dst, src)
220      char *dst;
221      char *src;
222 {
223   switch(coding_type) {
224   case CODING_JIS:
225     convert_from_jis(dst, src);
226     break;
227   case CODING_EUC:
228     convert_from_euc(dst, src);
229     break;
230   default:			/* Shift-JIS or Unknown */
231     strcpy(dst, src);
232     break;
233   }
234 }
235 
236 #ifdef CODECONV_STANDALONE_TEST
237 
238 static int linecnt = 0;
239 static char **lineptr = NULL;
240 
store_line(char * str)241 static void store_line(char *str)
242 {
243   if (lineptr == NULL)
244     lineptr = (char **)malloc(sizeof(char *));
245   else
246     lineptr = (char **)realloc(lineptr, sizeof(char *)*(linecnt + 1));
247   *(lineptr+linecnt) = strdup(str);
248   linecnt++;
249 }
250 
main(int argc,char ** argv)251 void main(int argc, char **argv)
252 {
253   int i;
254   char buf[BUFSIZ];
255 
256   while (fgets(buf, sizeof(buf), stdin) != NULL) {
257     if (coding_type == CODING_UNKNOWN)
258       fprintf(stderr, "<%s", buf);
259     store_line(buf);
260     if (coding_type == CODING_UNKNOWN) {
261       coding_type = check_coding(buf);
262       if (coding_type != CODING_UNKNOWN) {
263 	fprintf(stderr, "*** %s ***\n", coding_name(coding_type));
264       }
265     }
266   }
267   if (coding_type == CODING_UNKNOWN) {
268     fprintf(stderr, "unknown coding default * EUC *\n");
269     coding_type = CODING_EUC;
270   }
271   for (i = 0; i < linecnt; i++) {
272     convert_coding(buf, *(lineptr+i));
273     fputs(buf, stdout);
274   }
275   exit(0);
276 }
277 
278 #endif /* CODECONV_STANDALONE_TEST */
279 
280 /* End of file */
281