1 /* $Header: /home/yav/catty/fkiss/RCS/codeconv.c,v 1.5 2000/08/24 02:10:14 yav Exp $
2 * Japanese KANJI code coversion
3 * written by yav <yav@bigfoot.com>
4 *
5 * define CODECONV_STANDALONE_TEST for Self check
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21
22 char id_codeconv[] = "$Id: codeconv.c,v 1.5 2000/08/24 02:10:14 yav Exp $";
23
24 #ifdef CODECONV_STANDALONE_TEST
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 /* jiscode.c */
31 extern unsigned short yjis2sjis();
32
33 /* Japanese text coding type */
34 #define CODING_UNKNOWN 0
35 #define CODING_SJIS 1
36 #define CODING_EUC 2
37 #define CODING_JIS 3
38 #define CODING_MAX 4 /* number of Japanese text coding types */
39
40 static int coding_type = CODING_UNKNOWN;
41
42 #else /* CODECONV_STANDALONE_TEST */
43
44 #include <X11/Xos.h>
45 #include <X11/Xlib.h>
46 #include <stdio.h>
47
48 #include "config.h"
49 #include "headers.h"
50 #include "fkiss.h"
51 #include "work.h"
52 #define PUBLIC_CODECONV_C
53 #include "extern.h"
54
55 #endif /* CODECONV_STANDALONE_TEST */
56
57 /* get Japanese coding systen name */
coding_name(n)58 char *coding_name(n)
59 int n; /* coding type code (CODING_*) */
60 {
61 int i;
62 static struct {int type; char *name;} nametbl[] = {
63 {CODING_UNKNOWN, "unknown"},
64 {CODING_SJIS, "Shift-JIS"},
65 {CODING_EUC, "EUC"},
66 {CODING_JIS, "JIS"},
67 {-1, NULL}
68 };
69
70 for (i = 0; nametbl[i].name != NULL; i++) {
71 if (nametbl[i].type == n)
72 return nametbl[i].name;
73 }
74 return "Illegal coding!";
75 }
76
77 /* search string and identify encoding type */
check_coding(str)78 int check_coding(str)
79 char *str;
80 {
81 unsigned char c;
82 unsigned char *p;
83 unsigned char *p0;
84
85 /* JIS check */
86 for (p0 = (unsigned char *)str; (p = (unsigned char *)index(p0, 033)) != NULL; p0 = p) {
87 switch(*++p) {
88 case '$': /* Kanji in */
89 case '(': /* Kanji out */
90 return CODING_JIS;
91 }
92 }
93 /* EUC, Shift-JIS */
94 p = (unsigned char *)str;
95 while ((c = *p++) != '\0') {
96 /* high byte */
97 if (c < 0x81) {
98 continue;
99 } else if (c <= 0x9f) {
100 return CODING_SJIS; /* 0x81 - 0x9f SJIS 1st byte */
101 } else if (c <= 0xa0) {
102 return CODING_SJIS; /* 0xa0 JIS X0201 single byte KANA space */
103 } else if (c <= 0xa8) {
104 /* 0xa1 - 0xa8 EUC high or KANA */
105 c = *p++; /* 2nd byte */
106 /* check EUC low byte */
107 if (c < 0xa1 || c >0xfe)
108 return CODING_SJIS; /* not EUC 2nd byte! JIS X0201 single byte KANA */
109 continue;
110 } else if (c < 0xb0) {
111 return CODING_SJIS; /* single byte KANA */
112 } else if (c <= 0xf3) {
113 /* 0xb0 - 0xf3 EUC or SJIS 1st byte or JIS X0201 single byte KANA */
114 c = *p++; /* 2nd byte */
115 if (c < 0x40) {
116 return CODING_SJIS; /* JIS X0201 single byte KANA */
117 } else if (c < 0xa1) {
118 return CODING_SJIS; /* SJIS 2nd byte */
119 } else if (c <= 0xfc) {
120 continue; /* EUC or SJIS 2nd byte */
121 } else if (c <= 0xfe) {
122 return CODING_EUC; /* EUC 2nd byte */
123 }
124 /* ??? illegal code */
125 continue;
126 } else if (c <= 0xfc) {
127 return CODING_SJIS; /* 0xf4 - 0xfc SJIS 1st byte */
128 }
129 /* illegal code */
130 }
131 return CODING_UNKNOWN; /* Cannot to identify, need more sample */
132 }
133
134 /* convert JIS to Shift-JIS string
135 * Caution! destination string is not terminated.
136 */
strnjis2sjis(dst,src,n)137 void strnjis2sjis(dst, src, n)
138 unsigned char *dst;
139 unsigned char *src;
140 int n; /* length */
141 {
142 unsigned short code;
143
144 while (--n >= 0) {
145 code = *src++;
146 if (--n >= 0) {
147 code <<= 8;
148 code += *src++;
149 code = yjis2sjis(code);
150 *dst++ = code >> 8;
151 }
152 *dst++= code;
153 }
154 }
155
156 /* convert string from JIS to Shift-JIS */
convert_from_jis(dst,str)157 void convert_from_jis(dst, str)
158 char *dst;
159 char *str;
160 {
161 char *p;
162 char *p0;
163 int i;
164 int inkanji;
165
166 inkanji = 0;
167 for (p0 = str; (p = index(p0, 033)) != NULL; p0 = p) {
168 i = p - p0;
169 if (inkanji)
170 strnjis2sjis(dst, p0, i);
171 else
172 strncpy(dst, p0, i);
173 dst += i;
174 p++;
175 if (*p == '$' && *(p+1) != '\0') {
176 /* ESC '$' ? Kanji in */
177 if (*(p+1) == '(')
178 p += 3; /* ESC $ ( B */
179 else
180 p += 2; /* ESC $ B (JIS 1983) or ESC $ @ (JIS 1978) */
181 inkanji = 1;
182 } else if (*p == '(' && *(p+1) != '\0' ) {
183 /* ESC '(' ? Kanji out */
184 p += 2; /* ESC ( B */
185 inkanji = 0;
186 }
187 }
188 i = strlen(p0);
189 if (inkanji)
190 strnjis2sjis(dst, p0, i);
191 else
192 strncpy(dst, p0, i);
193 *(dst+i) = '\0';
194 }
195
196 /* convert string from EUC to Shift-JIS */
convert_from_euc(dst,src)197 void convert_from_euc(dst, src)
198 unsigned char *dst;
199 unsigned char *src;
200 {
201 unsigned short code;
202
203 while ((code = *src++) != '\0') {
204 if ((code & 0x80) && (*src != '\0')) {
205 code &= 0x7f;
206 code <<= 8;
207 code += (*src++) & 0x7f;
208 code = yjis2sjis(code);
209 *dst++ = code >> 8;
210 }
211 *dst++ = code;
212 }
213 *dst = '\0';
214 }
215
216 /* convert any coding types to Shift-JIS
217 * source coding type indicated coding_type
218 */
convert_coding(dst,src)219 void convert_coding(dst, src)
220 char *dst;
221 char *src;
222 {
223 switch(coding_type) {
224 case CODING_JIS:
225 convert_from_jis(dst, src);
226 break;
227 case CODING_EUC:
228 convert_from_euc(dst, src);
229 break;
230 default: /* Shift-JIS or Unknown */
231 strcpy(dst, src);
232 break;
233 }
234 }
235
236 #ifdef CODECONV_STANDALONE_TEST
237
238 static int linecnt = 0;
239 static char **lineptr = NULL;
240
store_line(char * str)241 static void store_line(char *str)
242 {
243 if (lineptr == NULL)
244 lineptr = (char **)malloc(sizeof(char *));
245 else
246 lineptr = (char **)realloc(lineptr, sizeof(char *)*(linecnt + 1));
247 *(lineptr+linecnt) = strdup(str);
248 linecnt++;
249 }
250
main(int argc,char ** argv)251 void main(int argc, char **argv)
252 {
253 int i;
254 char buf[BUFSIZ];
255
256 while (fgets(buf, sizeof(buf), stdin) != NULL) {
257 if (coding_type == CODING_UNKNOWN)
258 fprintf(stderr, "<%s", buf);
259 store_line(buf);
260 if (coding_type == CODING_UNKNOWN) {
261 coding_type = check_coding(buf);
262 if (coding_type != CODING_UNKNOWN) {
263 fprintf(stderr, "*** %s ***\n", coding_name(coding_type));
264 }
265 }
266 }
267 if (coding_type == CODING_UNKNOWN) {
268 fprintf(stderr, "unknown coding default * EUC *\n");
269 coding_type = CODING_EUC;
270 }
271 for (i = 0; i < linecnt; i++) {
272 convert_coding(buf, *(lineptr+i));
273 fputs(buf, stdout);
274 }
275 exit(0);
276 }
277
278 #endif /* CODECONV_STANDALONE_TEST */
279
280 /* End of file */
281