1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
5 #include "kuten.h"
6 
7 /*
8  * Routines for handling dictionaries in the "Languages of the World"
9  * format.  worldnextoff *must* be called with <address of valid entry>+1.
10  */
11 
12 #define	GSHORT(p)	(((p)[0]<<8)|(p)[1])
13 
14 #define putchar dictputchar
15 
16 static void	putchar(int, int*);
17 
18 #define	NONE	0xffff
19 
20 /* adapted from jhelling@cs.ruu.nl (Jeroen Hellingman) */
21 
22 static Rune chartab[] = {
23 
24 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
25 	NONE,	NONE,'\n',	0xe6,	0xf8,	0xe5,	0xe4,	0xf6,
26 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
27 	NONE,	NONE,	NONE,	0xc6,	0xd8,	0xc5,	0xc4,	0xd6,
28 
29 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	'\'',
30 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
31 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
32 	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f,
33 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
34 	0x48,	0x49,	0x4a,	0x4b,'L',	0x4d,	0x4e,	0x4f,
35 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
36 	0x58,	0x59,	0x5a,	0x5b,'\\',	0x5d,	0x5e,	0x5f,
37 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
38 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
39 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
40 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
41 
42 /*80*/	0xc7,	0xfc,	0xe9,	0xe2,	0xe4,	0xe0,	0xe5,	0xe7,
43 	0xea,	0xeb,	0xe8,	0xef,	0xee,	0xec,	0xc4,	0xc5,
44 /*90*/	0xc9,	0xe6,	0xc6,	0xf4,	0xf6,	0xf2,	0xfb,	0xf9,
45 	0xff,	0xd6,	0xdc,	0xa2,	0xa3,	0xa5,	0x20a7,	0x283,
46 /*a0*/	0xe1,	0xed,	0xf3,	0xfa,	0xf1,	0xd1,	0xaa,	0xba,
47 	0xbf,	0x2310,	0xac,	0xbd,	0xbc,	0xa1,	0xab,	0xbb,
48 
49 /*b0*/	0x254,	0x259,	0xf0,	0x283,	0x292,	0x14b,	0x251,	0x7a,
50 	0x26a,	0xf0,	0x292,	0xe3,	0x153,	0x169,	0x28c,	0x265,
51 /*c0*/	0x280,	0xeb,	0x6c,	0x28c,	0xf5,	0xf1,	0x152,	NONE,
52 	NONE,	0x53,	0x73,	0x5a,	0x7a,	NONE,	NONE,	NONE,
53 /*d0*/	0xdf,	NONE,	NONE,	0x101,	0x12b,	0x16b,	0x113,	0x14d,
54 	NONE,	NONE,	NONE,	0x20,	NONE,	NONE,	NONE,	NONE,
55 
56 /*e0*/	0x3b1,	0x3b2,	0x3b3,	0x3c0,	0x3a3,	0x3c3,	0xb5,	0x3c4,
57 	0x3a6,	0x398,	0x3a9,	0x3b4,	0x221e,	0xd8,	0x3b5,	0x2229,
58 /*f0*/	0x2261,	0xb1,	0x2265,	0x2264,	0x2320,	0x2321,	0xf7,	0x2248,
59 	0xb0,	0x2219,	0xb7,	NONE,	NONE,	NONE,	NONE,	NONE
60 };
61 
62 enum{ Utf, Kanahi, Kanalo=Kanahi+1, GBhi, GBlo=GBhi+1 };
63 
64 void
worldprintentry(Entry e,int cmd)65 worldprintentry(Entry e, int cmd)
66 {
67 	int nh, state[3];
68 	uchar *p, *pe;
69 
70 	p = (uchar *)e.start;
71 	pe = (uchar *)e.end;
72 	nh = GSHORT(p);
73 	p += 6;
74 	if(cmd == 'h')
75 		pe = p+nh;
76 	state[0] = Utf;
77 	state[1] = 0;
78 	state[2] = 0;
79 	while(p < pe){
80 		if(cmd == 'r')
81 			outchar(*p++);
82 		else
83 			putchar(*p++, state);
84 	}
85 	outnl(0);
86 }
87 
88 long
worldnextoff(long fromoff)89 worldnextoff(long fromoff)
90 {
91 	int nh, np, nd;
92 	uchar buf[6];
93 
94 	if(Bseek(bdict, fromoff-1, 0) < 0)
95 		return -1;
96 	if(Bread(bdict, buf, 6) != 6)
97 		return -1;
98 	nh = GSHORT(buf);
99 	np = GSHORT(buf+2);
100 	nd = GSHORT(buf+4);
101 	return fromoff-1 + 6 + nh + np + nd;
102 }
103 
104 static void
putchar(int c,int * state)105 putchar(int c, int *state)
106 {
107 	int xflag = 0;
108 	Rune r;
109 	int hi, lo;
110 
111 	switch(state[0]){
112 	case Kanahi:
113 	case GBhi:
114 		if(CANS2JH(c) || c == 0xff){
115 			state[0]++;
116 			state[1] = c;
117 			break;
118 		}
119 		/* fall through */
120 	case Utf:
121 		if(c == 0xfe){
122 			state[0] = Kanahi;
123 			break;
124 		}else if(c == 0xff){
125 			state[0] = GBhi;
126 			break;
127 		}
128 		r = chartab[c];
129 		if(r < 0x80 && state[2] == 0)
130 			outchar(r);
131 		else if(r == NONE){
132 			switch(c){
133 			case 0xfb:
134 				if(!xflag){
135 					state[2] = 1;
136 					break;
137 				}
138 			case 0xfc:
139 				if(!xflag){
140 					state[2] = 0;
141 					break;
142 				}
143 			case 0x10:
144 			case 0xc7: case 0xc8:
145 			case 0xd8: case 0xd9: case 0xda:
146 			case 0xdc: case 0xdd: case 0xde: case 0xdf:
147 			case 0xfd:
148 				if(!xflag)
149 					break;
150 				/* fall through */
151 			default:
152 				outprint("\\%.2ux", c);
153 			}
154 		}else if(state[2] == 0)
155 			outrune(r);
156 		break;
157 	case Kanalo:
158 	case GBlo:
159 		if(state[1] == 0xff && c == 0xff){
160 			state[0] = Utf;
161 			break;
162 		}
163 		state[0]--;
164 		hi = state[1];
165 		lo = c;
166 		S2J(hi, lo);		/* convert to JIS */
167 		r = hi*100 + lo - 3232;	/* convert to jis208 */
168 		if(state[0] == Kanahi && r < JIS208MAX)
169 			r = tabjis208[r];
170 		else if(state[0] == GBhi && r < GB2312MAX)
171 			r = tabgb2312[r];
172 		else
173 			r = NONE;
174 		if(r == NONE)
175 			outprint("\\%.2ux\\%.2ux", state[1], c);
176 		else
177 			outrune(r);
178 		break;
179 	}
180 }
181 
182 void
worldprintkey(void)183 worldprintkey(void)
184 {
185 	Bprint(bout, "No pronunciation key.\n");
186 }
187