1 /*
2  * KAKASI (Kanji Kana Simple inversion program)
3  * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
4  * Copyright (C) 1992
5  * Hironobu Takahashi (takahasi@tiny.or.jp)
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either versions 2, or (at your option)
10  * any later version.
11  *
12  * This program is distributed in the hope that it will be useful
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with KAKASI, see the file COPYING.  If not, write to the Free
19  * Software Foundation Inc., 59 Temple Place - Suite 330, Boston, MA
20  * 02111-1307, USA.
21  */
22 
23 #ifdef HAVE_CONFIG_H
24 # include <config.h>
25 #endif
26 
27 #include <stdio.h>
28 #ifdef HAVE_STRING_H
29 # include <string.h>
30 #else
31 # include <strings.h>
32 #endif
33 #include "kakasi.h"
34 #include <assert.h>
35 
36 #define J2HBUF 256
37 #define J2READ 10
38 
39 static int
J2_cletter(l,c1,c2)40 J2_cletter(l, c1, c2)
41      unsigned int l;
42      unsigned int c1;
43      unsigned int c2;
44 {
45     static char *cl_table[96] = {
46 	"", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
47 	"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
48 	"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
49 	"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
50 	"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
51 	"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
52 	"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
53 	"k", "", "", "", "", "", "", "", "", ""};
54 
55     char *p;
56 
57     if ((c1 == 0xa4) && (0xa0 <= c2 ) && (c2 < 0xff)) {
58 	for (p = cl_table[c2 - 0xa0]; *p != '\0'; ++ p) {
59 	    if (*p == l)
60 		return 0;
61 	}
62     }
63     return 1;
64 }
65 
66 static void
J2append(n,str)67 J2append(n, str)
68      Character *n;
69      unsigned char *str;
70 {
71     int i, j;
72 
73     j = 0;
74     for (i = 0; str[i] != '\0'; ++ i, ++j) {
75 	if (str[i] > 0xa0) {
76 	    n[j].type = JIS83;
77 	    n[j].c1 = str[i];
78 	    n[j].c2 = str[i+1];
79 	    ++ i;
80 	} else {
81 	    n[j].type = ASCII;
82 	    n[j].c1 = str[i];
83 	}
84     }
85     n[j].type = OTHER;
86     n[j].c1 = 0;
87     n[j].c2 = 0;
88 }
89 
90 int
J2H(c,n)91 J2H(c, n)
92      Character *c;
93      Character *n;
94 {
95     int c1, c2;
96     unsigned char Jstr[J2HBUF], Hstr[J2READ][J2HBUF], Dstr[J2HBUF], *h_point;
97     int max_len, length, match_more, n_read, i, clen;
98     struct kanji_yomi *ptr;
99 
100     max_len = 0;
101     match_more = 0;
102     n_read = 0;
103 
104     for (i = 0; i * 2 < J2HBUF - 2 && c[i].c1 != 0; ++ i) { /* FIXME: chop down incoming string (ad-hoc solution)*/
105 	c1 = c[i].c1;
106 	c2 = c[i].c2;
107 	if ((c[i].type == JIS83) || (c[i].type == JIS78)) {
108 	    itaijiknj(&c1, &c2);
109 	    Jstr[i*2]   = c1;
110 	    Jstr[i*2+1] = c2;
111 	} else {
112 	    Jstr[i*2]   = 0;
113 	    Jstr[i*2+1] = 0;
114 	}
115     }
116     assert(i*2 < J2HBUF);
117     Jstr[i*2] = '\0';
118     clen = i*2;
119 
120     add_kanwa((int)Jstr[0], (int)Jstr[1]);
121 
122     for (ptr = jisyo_table[Jstr[0]&0x7f][Jstr[1]&0x7f];
123 	 ptr != NULL;
124 	 ptr = ptr->next) {
125 	length = ptr->length;
126 	if (clen >= length) {
127 	    if (strncmp((char *)Jstr+2, (char *)(ptr->kanji),
128 			(length & 1) ? length-3 : length-2))
129 		continue;
130 	    if (length & 1)
131 		if (J2_cletter(ptr->tail,Jstr[length-1],Jstr[length]))
132 		    continue;
133 	    if (max_len < length) {
134 		if (length & 1) {
135 		    sprintf((char *)Hstr[0], "%s%c%c", ptr->yomi,
136 			    Jstr[length-1],Jstr[length]);
137 		} else {
138 		    strcpy((char *)Hstr[0], (const char *)(ptr->yomi));
139 		}
140 		max_len = length;
141 		n_read = 1;
142 	    } else if (max_len == length) {
143 		if ((heiki_mode) && (n_read < J2READ)) {
144 		    if (length & 1) {
145 			sprintf((char *)Hstr[n_read], "%s%c%c", ptr->yomi,
146 				Jstr[length-1],Jstr[length]);
147 		    } else {
148 			strcpy((char *)Hstr[n_read], (const char *)(ptr->yomi));
149 		    }
150 		    for (i = 0; i < n_read; ++ i) {
151 			if (strcmp((const char *)Hstr[i], (const char *)Hstr[n_read]) == 0) goto next;
152 		    }
153 		    n_read ++;
154 		  next:;
155 		}
156 	    }
157 	} else {
158 	    if (clen == 2)
159 		match_more = 1;
160 	    else if (strncmp((char *)Jstr+2, (char *)(ptr->kanji), clen-2) == 0)
161 		match_more = 1;
162 	}
163     }
164 
165     if (max_len == 0) {
166 	n[0].type = OTHER;
167 	n[0].c1 = 0;
168 	n[0].c2 = 0;
169 	return 1;
170     }
171 
172     h_point = Jstr+((max_len-1) & 0xfffe);
173     if (strncmp((const char *)h_point, "\244\303", 2) == 0) {
174 	if (clen <= max_len+1)
175 	    match_more = 1;
176 	else {
177 	    max_len += 2;
178 	    for (i = 0; i < n_read; ++ i) {
179 		sprintf((char *)Hstr[i], "%s%c%c", Hstr[i], h_point[2], h_point[3]);
180 	    }
181 	}
182     }
183 
184     if (n_read > 1) {
185 	strcpy((char *)Dstr, "{");
186 	for (i = 0; i < n_read; ++ i) {
187 	    strcat((char *)Dstr, (const char *)Hstr[i]);
188 	    if (n_read - i == 1)
189 		strcat((char *)Dstr, "}");
190 	    else
191 		strcat((char *)Dstr, "|");
192 	}
193 	J2append(n, Dstr);
194     } else {
195 	J2append(n, Hstr[0]);
196     }
197     return (match_more == 0) ? (max_len+1)/2 : -(max_len+1)/2;
198 }
199 
200 static void
J2convert(m,n,proc)201 J2convert(m, n, proc)
202      Character *m;
203      Character *n;
204      int (*proc)();
205 {
206     int mp=0, np=0;
207     int ret;
208 
209     while(m[mp].c1 != 0) {
210 	if (m[mp].type != JIS83) {
211 	    n[np].type = m[mp].type;
212 	    n[np].c1 = m[mp].c1;
213 	    n[np].c2 = m[mp].c2;
214 	    ++ np;
215 	    ++ mp;
216 	} else {
217 	    ret = (* proc)(m+mp, n+np);
218 	    if (ret == 0) ret = 1;
219 	    mp += (ret < 0) ? -ret : ret;
220 	    for (; n[np].c1 != 0; ++ np) ;
221 	}
222     }
223     n[np].type = OTHER;
224     n[np].c1 = 0;
225     n[np].c2 = 0;
226 }
227 
228 int
J2a(c,n)229 J2a(c, n)
230      Character *c;
231      Character *n;
232 {
233     Character m[256];
234 
235     int ret;
236     ret = J2H(c, m);
237     J2convert(m, n, H2a);
238     return ret;
239 }
240 
241 int
J2j(c,n)242 J2j(c, n)
243      Character *c;
244      Character *n;
245 {
246     Character m[256];
247 
248     int ret;
249     ret = J2H(c, m);
250     J2convert(m, n, H2j);
251     return ret;
252 }
253 
254 int
J2k(c,n)255 J2k(c, n)
256      Character *c;
257      Character *n;
258 {
259     Character m[256];
260 
261     int ret;
262     ret = J2H(c, m);
263     J2convert(m, n, H2k);
264     return ret;
265 }
266 
267 int
J2K(c,n)268 J2K(c, n)
269      Character *c;
270      Character *n;
271 {
272     Character m[256];
273 
274     int ret;
275     ret = J2H(c, m);
276     J2convert(m, n, H2K);
277     return ret;
278 }
279