1 /*
2 * $Id: rk_bltinfn.c,v 1.6 2005/04/10 15:26:38 aonoto Exp $
3 */
4
5 /*
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
7 * This file is part of FreeWnn.
8 *
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
10 * 1987, 1988, 1989, 1990, 1991, 1992
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
13 * Copyright FreeWnn Project 1999, 2000, 2002
14 *
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
16 *
17 * This library is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU Lesser General Public
19 * License as published by the Free Software Foundation; either
20 * version 2 of the License, or (at your option) any later version.
21 *
22 * This library is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 * Lesser General Public License for more details.
26 *
27 * You should have received a copy of the GNU Lesser General Public
28 * License along with this library; if not, write to the
29 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
30 * Boston, MA 02111-1307, USA.
31 */
32
33 /***********************************************************************
34 rk_bltinfn.c
35 87.12. 6 �� ��
36
37 �Ѵ��Ѥ��Ȥ߹��ߴؿ��Τ���ʣ���ʤ�Τ�������Ƥ��롣
38 ���Ѣ���Ⱦ�Ѥ��Ѵ������Ρ�
39 ***********************************************************************/
40 /* Version 3.0 */
41
42 #ifdef HAVE_CONFIG_H
43 # include <config.h>
44 #endif
45
46 #if STDC_HEADERS
47 # include <string.h>
48 #elif HAVE_STRINGS_H
49 # include <strings.h>
50 #endif /* STDC_HEADERS */
51
52 #include "commonhd.h"
53 #include "wnn_config.h"
54 #include "rk_header.h"
55
56 /* Ⱦ��ʸ���Υ����ɤ�define */
57 #define HKCHOU (HNKAK1 * 0x100 + 0xB0) /* �� */
58 #define HKDKTN (HNKAK1 * 0x100 + 0xDE) /* �� */
59 #define HKHNDK (HNKAK1 * 0x100 + 0xDF) /* �� */
60 #define HKMARU (HNKAK1 * 0x100 + 0xA1) /* �� */
61 #define HKHRKG (HNKAK1 * 0x100 + 0xA2) /* �� */
62 #define HKTJKG (HNKAK1 * 0x100 + 0xA3) /* �� */
63 #define HKTTEN (HNKAK1 * 0x100 + 0xA4) /* �� */
64 #define HKNKPT (HNKAK1 * 0x100 + 0xA5) /* �� */
65
66 /* ����ʸ���Υ����ɤ�define */
67 #define CHOUON (0xA1BC) /* �� */
68 #define DAKUTN (0xA1AB) /* �� */
69 #define HNDAKU (0xA1AC) /* �� */
70 #define MNMARU (0xA1A3) /* �� */ /* ̾���� MaNMARU�ʤޤ�ޤ�ˤ�ά */
71 #define HRKKAG (0xA1D6) /* �� */
72 #define TJIKAG (0xA1D7) /* �� */
73 #define TOUTEN (0xA1A2) /* �� */
74 #define NKPOTU (0xA1A6) /* �� */
75
76 static char _lang[6];
77
78 void
romkan_set_lang(lang)79 romkan_set_lang (lang)
80 char *lang;
81 {
82 strncpy (_lang, lang, 5);
83 _lang[5] = 0;
84 }
85
86 /** ASCIIʸ�������� */
87 /* *INDENT-OFF* */
88 letter
to_zenalpha(l)89 to_zenalpha (l)
90 letter l;
91 /* *INDENT-ON* */
92 {
93 letter retval;
94
95 static uns_chr *data = (uns_chr *) "�����ɡ������ǡʡˡ��ܡ��ݡ������������������������������䡩\
96 �����£ãģţƣǣȣɣʣˣ̣ͣΣϣУѣңӣԣգ֣ףأ٣ڡΡ�ϡ�����������\
97 �����������������������������Сáѡ�";
98
99 #ifdef CHINESE
100 static uns_chr *data_cn = (uns_chr *) "���������磥����������������������������������������������������\
101 �����£ãģţƣǣȣɣʣˣ̣ͣΣϣУѣңӣԣգ֣ףأ٣ڣۣ��ݣޣߣ������\
102 ������������������������������������";
103 #endif /* CHINESE */
104 #ifdef KOREAN
105 static uns_chr *data_ko = (uns_chr *) "����������������������������������������������������������������\
106 �����£ãģţƣǣȣɣʣˣ̣ͣΣϣУѣңӣԣգ֣ףأ٣ڣۣܣݣޣߣ������\
107 ������������������������������������";
108 #endif /* KOREAN */
109
110 if (' ' <= l && l <= '~')
111 {
112 l = (l - ' ') << 1;
113 #ifdef CHINESE
114 if (!strcmp (_lang, WNN_C_LANG) || !strcmp (_lang, WNN_T_LANG))
115 {
116 retval = data_cn[l] << 8;
117 retval += data_cn[++l];
118 }
119 else
120 #endif
121 #ifdef KOREAN
122 if (!strcmp (_lang, WNN_K_LANG))
123 {
124 retval = data_ko[l] << 8;
125 retval += data_ko[++l];
126 }
127 else
128 #endif /* KOREAN */
129 {
130 retval = data[l] << 8;
131 retval += data[++l];
132 }
133 return (retval);
134 }
135 else
136 return (l);
137 }
138
139 static char *hankdata[] = {
140 "��", "��", "��", "��", "��", "��", "��", "��", "��", "��",
141 "��", "����", "��", "����", "��", "����", "��", "����", "��", "����",
142 "��", "����", "��", "����", "��", "����", "��", "����", "��", "����",
143 "��", "����", "��", "����", "��", "��", "��", "��", "��", "��", "��",
144 "��", "��", "��", "��", "��",
145 "��", "�ʎ�", "�ʎ�", "��", "�ˎ�", "�ˎ�", "��", "�̎�", "�̎�",
146 "��", "�͎�", "�͎�", "��", "�Ύ�", "�Ύ�",
147 "��", "��", "��", "��", "��",
148 "��", "��", "��", "��", "��", "��",
149 "��", "��", "��", "��", "��",
150 "��", "��", "��", "��", "��", "��",
151 "����", "��", "��"
152 }; /* ���Ѥ������äƤ�Τ���� */
153
154 /** ���hankdata�����ºݤ˻Ȥ�Ⱦ�ѥ����ɤ�ɽ���Ƥ��ʤ��Ȥ����ºݤΤ�Τ�
155 �������롣���������˰������Ƥ� */
156 void
hank_setup()157 hank_setup ()
158 {
159 int i;
160 char *s, orig_hnkak1;
161
162 orig_hnkak1 = *hankdata[0];
163 /* *hankdata[] �Ǥ�Ⱦ��ʸ���Σ��Х��ȤᡣȾ��ʸ���Σ��Х��Ȥ�������ۤʤ�
164 �褦��¾����˰ܿ�����Ȥ��ϡ�HNKAK1��define���Ѥ����OK��â��romkan��
165 ���������Ⱦ��ʸ���ʤ��Υե�����ˤΤ�¸�ߡˤ⥳��С��Ȥ��ơ����ε���
166 �˹�碌��ۤ���˾�ޤ����������������ǥ����Ǥ��Υե��������������
167 ������ˡ�Ⱦ��ʸ���ΰ��������ޤ������ʤ���礬����Τǡ��ä�
168 ����С��Ȥʤ��Ȥ�ư���褦�˽��֤Ϥ��Ƥ��롣���줬������
169 hank_setup()�Ǥ��롣hankdata�ϡ����������� hank_setup()�ˤ�ä�
170 �ºݤ�Ⱦ�ѥ����ɤ�ľ����롣 */
171
172 if (orig_hnkak1 == (char) HNKAK1)
173 return;
174 for (i = 0; i < numberof (hankdata); i++)
175 {
176 for (s = hankdata[i]; *s; s += 2)
177 if (*s == orig_hnkak1)
178 *s = HNKAK1;
179 }
180 }
181
182 /** ���ʢ�Ⱦ�ѥ������ʡ���̤���ʸ���ˤʤ뤳�Ȥ⤢�롣*/
183 void
to_hankata(in,outp)184 to_hankata (in, outp)
185 letter in, **outp;
186 {
187 uns_chr *p, c;
188 letter *out;
189
190 out = *outp;
191 switch (in)
192 {
193 case CHOUON:
194 *out++ = HKCHOU;
195 break;
196 case DAKUTN:
197 *out++ = HKDKTN;
198 break;
199 case HNDAKU:
200 *out++ = HKHNDK;
201 break;
202 case MNMARU:
203 *out++ = HKMARU;
204 break;
205 case HRKKAG:
206 *out++ = HKHRKG;
207 break;
208 case TJIKAG:
209 *out++ = HKTJKG;
210 break;
211 case TOUTEN:
212 *out++ = HKTTEN;
213 break;
214 case NKPOTU:
215 *out++ = HKNKPT;
216 break;
217 default:
218 if (is_kata (in))
219 {
220 for (p = (uns_chr *) hankdata[in - KATBGN]; c = *p; p++)
221 *out++ = (c << 8) + *++p;
222 }
223 else if (is_hira (in))
224 {
225 for (p = (uns_chr *) hankdata[in - HIRBGN]; c = *p; p++)
226 *out++ = (c << 8) + *++p;
227 }
228 else
229 {
230 *out++ = in;
231 }
232 }
233 *out = EOLTTR;
234 *outp = out;
235 }
236
237 /** Ⱦ�ѥ������ʢ��Ҥ餬�ʡ�â�������������ʸ�����ĤˤޤȤ�Ƥ�
238 ����ʤ��Τ���ա�*/
239 /* *INDENT-OFF* */
240 letter
to_zenhira(l)241 to_zenhira (l)
242 letter l;
243 /* *INDENT-ON* */
244 {
245 letter retval;
246
247 static uns_chr *data = (uns_chr *) "���֡ס���������������á���������������������������������\
248 ���ĤƤȤʤˤ̤ͤΤϤҤդؤۤޤߤ�������������";
249
250 #ifdef CHINESE
251 static uns_chr *data_cn = (uns_chr *) "����������������������á������������������Ȥ���������\
252 ���ĤƤȤʤˤ̤͵ĤϤҤդؤۤޤߤ��Ҳ�������������";
253 #endif /* CHINESE */
254
255 if (is_hankata (l))
256 {
257 l = (l - HKKBGN) << 1;
258 #ifdef CHINESE
259 if (!strcmp (_lang, WNN_C_LANG) || !strcmp (_lang, WNN_T_LANG))
260 {
261 retval = data_cn[l] << 8;
262 retval += data_cn[++l];
263 }
264 else
265 #endif
266 {
267 retval = data[l] << 8;
268 retval += data[++l];
269 }
270 return (retval);
271 }
272 else
273 return (l);
274 }
275
276 /** Ⱦ�ѥ������ʢ����ѡ�â�������������ʸ�����ĤˤޤȤ�Ƥ�
277 ����ʤ��Τ���ա�*/
278 /* *INDENT-OFF* */
279 letter
to_zenkata(l)280 to_zenkata (l)
281 letter l;
282 /* *INDENT-ON* */
283 {
284 return (is_hankata (l) ? (l = to_zenhira (l), to_kata (l)) : l);
285 }
286
287 /* �ӥåȥ٥����ι��� */
288 #define bitvec(b0, b1, b2, b3, b4, b5, b6, b7) ( \
289 (char)b0 | ((char)b1 << 1) | ((char)b2 << 2) | ((char)b3 << 3) | ((char)b4 << 4) | ((char)b5 << 5) | \
290 ((char)b6 << 6) | ((char)b7 << 7) \
291 )
292
293 /** char������ h ��ӥåȥ٥����ȸ��Ƥ�����i�ӥåȤ�����å����� */
294 #define bitlook(h, i) (h[(i) >> 3] & (1 << ((i) & 7)))
295
296 #define KATRPT 0xA1B3 /* �� */
297 #define HIRRPT 0xA1B5 /* �� */
298 #define KATA_U 0xA5A6 /* �� */
299 #define KAT_VU 0xA5F4 /* �� */
300 #define HIR_KA 0xA4AB /* �� */
301 #define HIR_HO 0xA4DB /* �� */
302 #define KAT_KA 0xA5AB /* �� */
303 #define KAT_HO 0xA5DB /* �� */
304 #define HIR_HA 0xA4CF /* �� */
305 #define KAT_HA 0xA5CF /* �� */
306
307 /** ����Ⱦ�����äĤ��롣��̤ϰ�������ʸ����*/
308 void
handakuadd(in,outp)309 handakuadd (in, outp)
310 letter in, **outp;
311 {
312 if ((HIR_HA <= in && in <= HIR_HO) ? 0 == (in - HIR_HA) % 3 : (KAT_HA <= in && in <= KAT_HO && 0 == (in - KAT_HA) % 3))
313 {
314 *(*outp)++ = in + 2;
315 }
316 else
317 {
318 *(*outp)++ = in;
319 *(*outp)++ = HNDAKU;
320 }
321 **outp = EOLTTR;
322 }
323
324 /** ���������äĤ��롣��̤ϰ�������ʸ����*/
325 void
dakuadd(in,outp)326 dakuadd (in, outp)
327 letter in, **outp;
328 {
329 static char flgbit[] = {
330 bitvec (1, 0, 1, 0, 1, 0, 1, 0), /* ���������������� */
331 bitvec (1, 0, 1, 0, 1, 0, 1, 0), /* ���������������� */
332 bitvec (1, 0, 1, 0, 1, 0, 1, 0), /* ���������������� */
333 bitvec (0, 1, 0, 1, 0, 1, 0, 0), /* �äĤŤƤǤȤɤ� */
334 bitvec (0, 0, 0, 0, 1, 0, 0, 1), /* �ˤ̤ͤΤϤФѤ� */
335 bitvec (0, 0, 1, 0, 0, 1, 0, 0), /* �ӤԤդ֤פؤ٤� */
336 bitvec (1, 0, 0, 0, 0, 0, 0, 0) /* �� */
337 };
338 letter c;
339
340 if ((HIR_KA <= in && in <= HIR_HO) ? (c = in - HIR_KA, 1) : (KAT_KA <= in && in <= KAT_HO && (c = in - KAT_KA, 1)))
341 {
342 if (bitlook (flgbit, c))
343 {
344 *(*outp)++ = in + 1;
345 }
346 else
347 {
348 *(*outp)++ = in;
349 *(*outp)++ = DAKUTN;
350 }
351 }
352 else
353 switch (in)
354 {
355 case KATRPT:
356 case HIRRPT:
357 *(*outp)++ = in + 1;
358 break;
359 case KATA_U:
360 *(*outp)++ = KAT_VU;
361 break;
362 default:
363 *(*outp)++ = in;
364 *(*outp)++ = DAKUTN;
365 }
366 **outp = EOLTTR;
367 }
368
369 /** in��Ϳ����줿�����ɤ�base�ʤο����ˤ���outp������롣*/
370 void
to_digit(in,base,outp)371 to_digit (in, base, outp)
372 letter in, base, **outp;
373 {
374 letter c, vtol ();
375
376 if (c = in, c /= base)
377 to_digit (c, base, outp);
378 *(*outp)++ = vtol (in % base);
379 **outp = EOLTTR;
380 }
381