1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
5 
6 enum {
7 	Buflen=1000,
8 	Maxaux=5
9 };
10 
11 /* Possible tags */
12 enum {
13 	A,		/* author in quote (small caps) */
14 	B,		/* bold */
15 	Ba,		/* author inside bib */
16 	Bch,		/* builtup chem component */
17 	Bib,		/* surrounds word 'in' for bibliographic ref */
18 	Bl,		/* bold */
19 	Bo,		/* bond over */
20 	Bu,		/* bond under */
21 	Cb,		/* ? block of stuff (indent) */
22 	Cf,		/* cross ref to another entry (italics) */
23 	Chem,		/* chemistry formula */
24 	Co,		/* over (preceding sum, integral, etc.) */
25 	Col,		/* column of table (aux just may be r) */
26 	Cu,		/* under (preceding sum, integral, etc.) */
27 	Dat,		/* date */
28 	Db,		/* def block? indent */
29 	Dn,		/* denominator of fraction */
30 	E,		/* main entry */
31 	Ed,		/* editor's comments (in [...]) */
32 	Etym,		/* etymology (in [...]) */
33 	Fq,		/* frequency count (superscript) */
34 	Form,		/* formula */
35 	Fr,		/* fraction (contains <nu>, then <dn>) */
36 	Gk,		/* greek (transliteration) */
37 	Gr,		/* grammar? (e.g., around 'pa.' in 'pa. pple.') */
38 	Hg,		/* headword group */
39 	Hm,		/* homonym (superscript) */
40 	Hw,		/* headword (bold) */
41 	I,		/* italics */
42 	Il,		/* italic list? */
43 	In,		/* inferior (subscript) */
44 	L,		/* row of col of table */
45 	La,		/* status or usage label (italic) */
46 	Lc,		/* chapter/verse sort of thing for works */
47 	N,		/* note (smaller type) */
48 	Nu,		/* numerator of fraction */
49 	Ov,		/* needs overline */
50 	P,		/* paragraph (indent) */
51 	Ph,		/* pronunciation (transliteration) */
52 	Pi,		/* pile (frac without line) */
53 	Pqp,		/* subblock of quote */
54 	Pr,		/* pronunciation (in (...)) */
55 	Ps,		/* position (e.g., adv.) (italic) */
56 	Pt,		/* part (in lc) */
57 	Q,		/* quote in quote block */
58 	Qd,		/* quote date (bold) */
59 	Qig,		/* quote number (greek) */
60 	Qla,		/* status or usage label in quote (italic) */
61 	Qp,		/* quote block (small type, indent) */
62 	Qsn,		/* quote number */
63 	Qt,		/* quote words */
64 	R,		/* roman type style */
65 	Rx,		/* relative cross reference (e.g., next) */
66 	S,		/* another form? (italic) */
67 	S0,		/* sense (sometimes surrounds several sx's) */
68 	S1,		/* sense (aux num: indented bold letter) */
69 	S2,		/* sense (aux num: indented bold capital rom num) */
70 	S3,		/* sense (aux num: indented number of asterisks) */
71 	S4,		/* sense (aux num: indented bold number) */
72 	S5,		/* sense (aux num: indented number of asterisks) */
73 	S6,		/* subsense (aux num: bold letter) */
74 	S7a,		/* subsense (aux num: letter) */
75 	S7n,		/* subsense (aux num: roman numeral) */
76 	Sc,		/* small caps */
77 	Sgk,		/* subsense (aux num: transliterated greek) */
78 	Sn,		/* sense of subdefinition (aux num: roman letter) */
79 	Ss,		/* sans serif */
80 	Ssb,		/* sans serif bold */
81 	Ssi,		/* sans serif italic */
82 	Su,		/* superior (superscript) */
83 	Sub,		/* subdefinition */
84 	Table,		/* table (aux cols=number of columns) */
85 	Tt,		/* title? (italics) */
86 	Vd,		/* numeric label for variant form */
87 	Ve,		/* variant entry */
88 	Vf,		/* variant form (light bold) */
89 	Vfl,		/* list of vf's (starts with Also or Forms) */
90 	W,		/* work (e.g., Beowulf) (italics) */
91 	X,		/* cross reference to main word (small caps) */
92 	Xd,		/* cross reference to quotation by date */
93 	Xi,		/* internal cross reference ? (italic) */
94 	Xid,		/* cross reference identifer, in quote ? */
95 	Xs,		/* cross reference sense (lower number) */
96 	Xr,		/* list of x's */
97 	Ntag		/* end of tags */
98 };
99 
100 /* Assoc tables must be sorted on first field */
101 
102 static Assoc tagtab[] = {
103 	{"a",		A},
104 	{"b",		B},
105 	{"ba",		Ba},
106 	{"bch",		Bch},
107 	{"bib",		Bib},
108 	{"bl",		Bl},
109 	{"bo",		Bo},
110 	{"bu",		Bu},
111 	{"cb",		Cb},
112 	{"cf",		Cf},
113 	{"chem",	Chem},
114 	{"co",		Co},
115 	{"col",		Col},
116 	{"cu",		Cu},
117 	{"dat",		Dat},
118 	{"db",		Db},
119 	{"dn",		Dn},
120 	{"e",		E},
121 	{"ed",		Ed},
122 	{"et",		Etym},
123 	{"etym",	Etym},
124 	{"form",	Form},
125 	{"fq",		Fq},
126 	{"fr",		Fr},
127 	{"frac",	Fr},
128 	{"gk",		Gk},
129 	{"gr",		Gr},
130 	{"hg",		Hg},
131 	{"hm",		Hm},
132 	{"hw",		Hw},
133 	{"i",		I},
134 	{"il",		Il},
135 	{"in",		In},
136 	{"l",		L},
137 	{"la",		La},
138 	{"lc",		Lc},
139 	{"n",		N},
140 	{"nu",		Nu},
141 	{"ov",		Ov},
142 	{"p",		P},
143 	{"ph",		Ph},
144 	{"pi",		Pi},
145 	{"pqp",		Pqp},
146 	{"pr",		Pr},
147 	{"ps",		Ps},
148 	{"pt",		Pt},
149 	{"q",		Q},
150 	{"qd",		Qd},
151 	{"qig",		Qig},
152 	{"qla",		Qla},
153 	{"qp",		Qp},
154 	{"qsn",		Qsn},
155 	{"qt",		Qt},
156 	{"r",		R},
157 	{"rx",		Rx},
158 	{"s",		S},
159 	{"s0",		S0},
160 	{"s1",		S1},
161 	{"s2",		S2},
162 	{"s3",		S3},
163 	{"s4",		S4},
164 	{"s5",		S5},
165 	{"s6",		S6},
166 	{"s7a",		S7a},
167 	{"s7n",		S7n},
168 	{"sc",		Sc},
169 	{"sgk",		Sgk},
170 	{"sn",		Sn},
171 	{"ss",		Ss,},
172 	{"ssb",		Ssb},
173 	{"ssi",		Ssi},
174 	{"su",		Su},
175 	{"sub",		Sub},
176 	{"table",	Table},
177 	{"tt",		Tt},
178 	{"vd",		Vd},
179 	{"ve",		Ve},
180 	{"vf",		Vf},
181 	{"vfl",		Vfl},
182 	{"w",		W},
183 	{"x",		X},
184 	{"xd",		Xd},
185 	{"xi",		Xi},
186 	{"xid",		Xid},
187 	{"xr",		Xr},
188 	{"xs",		Xs}
189 };
190 
191 /* Possible tag auxilliary info */
192 enum {
193 	Cols,		/* number of columns in a table */
194 	Num,		/* letter or number, for a sense */
195 	St,		/* status (e.g., obs) */
196 	Naux
197 };
198 
199 static Assoc auxtab[] = {
200 	{"cols",	Cols},
201 	{"num",		Num},
202 	{"st",		St}
203 };
204 
205 static Assoc spectab[] = {
206 	{"3on4",	0xbe},
207 	{"Aacu",	0xc1},
208 	{"Aang",	0xc5},
209 	{"Abarab",	0x100},
210 	{"Acirc",	0xc2},
211 	{"Ae",		0xc6},
212 	{"Agrave",	0xc0},
213 	{"Alpha",	0x391},
214 	{"Amac",	0x100},
215 	{"Asg",		0x1b7},		/* Unicyle. Cf "Sake" */
216 	{"Auml",	0xc4},
217 	{"Beta",	0x392},
218 	{"Cced",	0xc7},
219 	{"Chacek",	0x10c},
220 	{"Chi",		0x3a7},
221 	{"Chirho",	0x2627},		/* Chi Rho U+2627 */
222 	{"Csigma",	0x3da},
223 	{"Delta",	0x394},
224 	{"Eacu",	0xc9},
225 	{"Ecirc",	0xca},
226 	{"Edh",		0xd0},
227 	{"Epsilon",	0x395},
228 	{"Eta",		0x397},
229 	{"Gamma",	0x393},
230 	{"Iacu",	0xcd},
231 	{"Icirc",	0xce},
232 	{"Imac",	0x12a},
233 	{"Integ",	0x222b},
234 	{"Iota",	0x399},
235 	{"Kappa",	0x39a},
236 	{"Koppa",	0x3de},
237 	{"Lambda",	0x39b},
238 	{"Lbar",	0x141},
239 	{"Mu",		0x39c},
240 	{"Naira",	0x4e},		/* should have bar through */
241 	{"Nplus",	0x4e},		/* should have plus above */
242 	{"Ntilde",	0xd1},
243 	{"Nu",		0x39d},
244 	{"Oacu",	0xd3},
245 	{"Obar",	0xd8},
246 	{"Ocirc",	0xd4},
247 	{"Oe",		0x152},
248 	{"Omega",	0x3a9},
249 	{"Omicron",	0x39f},
250 	{"Ouml",	0xd6},
251 	{"Phi",		0x3a6},
252 	{"Pi",		0x3a0},
253 	{"Psi",		0x3a8},
254 	{"Rho",		0x3a1},
255 	{"Sacu",	0x15a},
256 	{"Sigma",	0x3a3},
257 	{"Summ",	0x2211},
258 	{"Tau",		0x3a4},
259 	{"Th",		0xde},
260 	{"Theta",	0x398},
261 	{"Tse",		0x426},
262 	{"Uacu",	0xda},
263 	{"Ucirc",	0xdb},
264 	{"Upsilon",	0x3a5},
265 	{"Uuml",	0xdc},
266 	{"Wyn",		0x1bf},		/* wynn U+01BF */
267 	{"Xi",		0x39e},
268 	{"Ygh",		0x1b7},		/* Yogh	U+01B7 */
269 	{"Zeta",	0x396},
270 	{"Zh",		0x1b7},		/* looks like Yogh. Cf "Sake" */
271 	{"a",		0x61},		/* ante */
272 	{"aacu",	0xe1},
273 	{"aang",	0xe5},
274 	{"aasper",	MAAS},
275 	{"abreve",	0x103},
276 	{"acirc",	0xe2},
277 	{"acu",		LACU},
278 	{"ae",		0xe6},
279 	{"agrave",	0xe0},
280 	{"ahook",	0x105},
281 	{"alenis",	MALN},
282 	{"alpha",	0x3b1},
283 	{"amac",	0x101},
284 	{"amp",		0x26},
285 	{"and",		MAND},
286 	{"ang",		LRNG},
287 	{"angle",	0x2220},
288 	{"ankh",	0x2625},		/* ankh U+2625 */
289 	{"ante",	0x61},		/* before (year) */
290 	{"aonq",	MAOQ},
291 	{"appreq",	0x2243},
292 	{"aquar",	0x2652},
293 	{"arDadfull",	0x636},		/* Dad U+0636 */
294 	{"arHa",	0x62d},		/* haa U+062D */
295 	{"arTa",	0x62a},		/* taa U+062A */
296 	{"arain",	0x639},		/* ain U+0639 */
297 	{"arainfull",	0x639},		/* ain U+0639 */
298 	{"aralif",	0x627},		/* alef U+0627 */
299 	{"arba",	0x628},		/* baa U+0628 */
300 	{"arha",	0x647},		/* ha U+0647 */
301 	{"aries",	0x2648},
302 	{"arnun",	0x646},		/* noon U+0646 */
303 	{"arnunfull",	0x646},		/* noon U+0646 */
304 	{"arpa",	0x647},		/* ha U+0647 */
305 	{"arqoph",	0x642},		/* qaf U+0642 */
306 	{"arshinfull",	0x634},		/* sheen U+0634 */
307 	{"arta",	0x62a},		/* taa U+062A */
308 	{"artafull",	0x62a},		/* taa U+062A */
309 	{"artha",	0x62b},		/* thaa U+062B */
310 	{"arwaw",	0x648},		/* waw U+0648 */
311 	{"arya",	0x64a},		/* ya U+064A */
312 	{"aryafull",	0x64a},		/* ya U+064A */
313 	{"arzero",	0x660},		/* indic zero U+0660 */
314 	{"asg",		0x292},		/* unicycle character. Cf "hallow" */
315 	{"asper",	LASP},
316 	{"assert",	0x22a2},
317 	{"astm",	0x2042},		/* asterism: should be upside down */
318 	{"at",		0x40},
319 	{"atilde",	0xe3},
320 	{"auml",	0xe4},
321 	{"ayin",	0x639},		/* arabic ain U+0639 */
322 	{"b1",		0x2d},		/* single bond */
323 	{"b2",		0x3d},		/* double bond */
324 	{"b3",		0x2261},		/* triple bond */
325 	{"bbar",	0x180},		/* b with bar U+0180 */
326 	{"beta",	0x3b2},
327 	{"bigobl",	0x2f},
328 	{"blC",		0x43},		/* should be black letter */
329 	{"blJ",		0x4a},		/* should be black letter */
330 	{"blU",		0x55},		/* should be black letter */
331 	{"blb",		0x62},		/* should be black letter */
332 	{"blozenge",	0x25ca},		/* U+25CA; should be black */
333 	{"bly",		0x79},		/* should be black letter */
334 	{"bra",		MBRA},
335 	{"brbl",	LBRB},
336 	{"breve",	LBRV},
337 	{"bslash",	'\\'},
338 	{"bsquare",	0x25a0},		/* black square U+25A0 */
339 	{"btril",	0x25c0},		/* U+25C0 */
340 	{"btrir",	0x25b6},		/* U+25B6 */
341 	{"c",		0x63},		/* circa */
342 	{"cab",		0x232a},
343 	{"cacu",	0x107},
344 	{"canc",	0x264b},
345 	{"capr",	0x2651},
346 	{"caret",	0x5e},
347 	{"cb",		0x7d},
348 	{"cbigb",	0x7d},
349 	{"cbigpren",	0x29},
350 	{"cbigsb",	0x5d},
351 	{"cced",	0xe7},
352 	{"cdil",	LCED},
353 	{"cdsb",	0x301b},		/* ]] U+301b */
354 	{"cent",	0xa2},
355 	{"chacek",	0x10d},
356 	{"chi",		0x3c7},
357 	{"circ",	LRNG},
358 	{"circa",	0x63},		/* about (year) */
359 	{"circbl",	0x325},		/* ring below accent U+0325 */
360 	{"circle",	0x25cb},		/* U+25CB */
361 	{"circledot",	0x2299},
362 	{"click",	0x296},
363 	{"club",	0x2663},
364 	{"comtime",	0x43},
365 	{"conj",	0x260c},
366 	{"cprt",	0xa9},
367 	{"cq",		'\''},
368 	{"cqq",		0x201d},
369 	{"cross",	0x2720},		/* maltese cross U+2720 */
370 	{"crotchet",	0x2669},
371 	{"csb",		0x5d},
372 	{"ctilde",	0x63},		/* +tilde */
373 	{"ctlig",	MLCT},
374 	{"cyra",	0x430},
375 	{"cyre",	0x435},
376 	{"cyrhard",	0x44a},
377 	{"cyrjat",	0x463},
378 	{"cyrm",	0x43c},
379 	{"cyrn",	0x43d},
380 	{"cyrr",	0x440},
381 	{"cyrsoft",	0x44c},
382 	{"cyrt",	0x442},
383 	{"cyry",	0x44b},
384 	{"dag",		0x2020},
385 	{"dbar",	0x111},
386 	{"dblar",	0x21cb},
387 	{"dblgt",	0x226b},
388 	{"dbllt",	0x226a},
389 	{"dced",	0x64},		/* +cedilla */
390 	{"dd",		MDD},
391 	{"ddag",	0x2021},
392 	{"ddd",		MDDD},
393 	{"decr",	0x2193},
394 	{"deg",		0xb0},
395 	{"dele",	0x64},		/* should be dele */
396 	{"delta",	0x3b4},
397 	{"descnode",	0x260b},		/* descending node U+260B */
398 	{"diamond",	0x2662},
399 	{"digamma",	0x3dd},
400 	{"div",		0xf7},
401 	{"dlessi",	0x131},
402 	{"dlessj1",	0x6a},		/* should be dotless */
403 	{"dlessj2",	0x6a},		/* should be dotless */
404 	{"dlessj3",	0x6a},		/* should be dotless */
405 	{"dollar",	0x24},
406 	{"dotab",	LDOT},
407 	{"dotbl",	LDTB},
408 	{"drachm",	0x292},
409 	{"dubh",	0x2d},
410 	{"eacu",	0xe9},
411 	{"earth",	0x2641},
412 	{"easper",	MEAS},
413 	{"ebreve",	0x115},
414 	{"ecirc",	0xea},
415 	{"edh",		0xf0},
416 	{"egrave",	0xe8},
417 	{"ehacek",	0x11b},
418 	{"ehook",	0x119},
419 	{"elem",	0x220a},
420 	{"elenis",	MELN},
421 	{"em",		0x2014},
422 	{"emac",	0x113},
423 	{"emem",	MEMM},
424 	{"en",		0x2013},
425 	{"epsilon",	0x3b5},
426 	{"equil",	0x21cb},
427 	{"ergo",	0x2234},
428 	{"es",		MES},
429 	{"eszett",	0xdf},
430 	{"eta",		0x3b7},
431 	{"eth",		0xf0},
432 	{"euml",	0xeb},
433 	{"expon",	0x2191},
434 	{"fact",	0x21},
435 	{"fata",	0x251},
436 	{"fatpara",	0xb6},		/* should have fatter, filled in bowl */
437 	{"female",	0x2640},
438 	{"ffilig",	MLFFI},
439 	{"fflig",	MLFF},
440 	{"ffllig",	MLFFL},
441 	{"filig",	MLFI},
442 	{"flat",	0x266d},
443 	{"fllig",	MLFL},
444 	{"frE",		0x45},		/* should be curly */
445 	{"frL",		'L'},		/* should be curly */
446 	{"frR",		0x52},		/* should be curly */
447 	{"frakB",	0x42},		/* should have fraktur style */
448 	{"frakG",	0x47},
449 	{"frakH",	0x48},
450 	{"frakI",	0x49},
451 	{"frakM",	0x4d},
452 	{"frakU",	0x55},
453 	{"frakX",	0x58},
454 	{"frakY",	0x59},
455 	{"frakh",	0x68},
456 	{"frbl",	LFRB},
457 	{"frown",	LFRN},
458 	{"fs",		0x20},
459 	{"fsigma",	0x3c2},
460 	{"gAacu",	0xc1},		/* should be Α+acute */
461 	{"gaacu",	0x3b1},		/* +acute */
462 	{"gabreve",	0x3b1},		/* +breve */
463 	{"gafrown",	0x3b1},		/* +frown */
464 	{"gagrave",	0x3b1},		/* +grave */
465 	{"gamac",	0x3b1},		/* +macron */
466 	{"gamma",	0x3b3},
467 	{"gauml",	0x3b1},		/* +umlaut */
468 	{"ge",		0x2267},
469 	{"geacu",	0x3b5},		/* +acute */
470 	{"gegrave",	0x3b5},		/* +grave */
471 	{"ghacu",	0x3b7},		/* +acute */
472 	{"ghfrown",	0x3b7},		/* +frown */
473 	{"ghgrave",	0x3b7},		/* +grave */
474 	{"ghmac",	0x3b7},		/* +macron */
475 	{"giacu",	0x3b9},		/* +acute */
476 	{"gibreve",	0x3b9},		/* +breve */
477 	{"gifrown",	0x3b9},		/* +frown */
478 	{"gigrave",	0x3b9},		/* +grave */
479 	{"gimac",	0x3b9},		/* +macron */
480 	{"giuml",	0x3b9},		/* +umlaut */
481 	{"glagjat",	0x467},
482 	{"glots",	0x2c0},
483 	{"goacu",	0x3bf},		/* +acute */
484 	{"gobreve",	0x3bf},		/* +breve */
485 	{"grave",	LGRV},
486 	{"gt",		0x3e},
487 	{"guacu",	0x3c5},		/* +acute */
488 	{"gufrown",	0x3c5},		/* +frown */
489 	{"gugrave",	0x3c5},		/* +grave */
490 	{"gumac",	0x3c5},		/* +macron */
491 	{"guuml",	0x3c5},		/* +umlaut */
492 	{"gwacu",	0x3c9},		/* +acute */
493 	{"gwfrown",	0x3c9},		/* +frown */
494 	{"gwgrave",	0x3c9},		/* +grave */
495 	{"hacek",	LHCK},
496 	{"halft",	0x2308},
497 	{"hash",	0x23},
498 	{"hasper",	MHAS},
499 	{"hatpath",	0x5b2},		/* hataf patah U+05B2 */
500 	{"hatqam",	0x5b3},		/* hataf qamats U+05B3 */
501 	{"hatseg",	0x5b1},		/* hataf segol U+05B1 */
502 	{"hbar",	0x127},
503 	{"heart",	0x2661},
504 	{"hebaleph",	0x5d0},		/* aleph U+05D0 */
505 	{"hebayin",	0x5e2},		/* ayin U+05E2 */
506 	{"hebbet",	0x5d1},		/* bet U+05D1 */
507 	{"hebbeth",	0x5d1},		/* bet U+05D1 */
508 	{"hebcheth",	0x5d7},		/* bet U+05D7 */
509 	{"hebdaleth",	0x5d3},		/* dalet U+05D3 */
510 	{"hebgimel",	0x5d2},		/* gimel U+05D2 */
511 	{"hebhe",	0x5d4},		/* he U+05D4 */
512 	{"hebkaph",	0x5db},		/* kaf U+05DB */
513 	{"heblamed",	0x5dc},		/* lamed U+05DC */
514 	{"hebmem",	0x5de},		/* mem U+05DE */
515 	{"hebnun",	0x5e0},		/* nun U+05E0 */
516 	{"hebnunfin",	0x5df},		/* final nun U+05DF */
517 	{"hebpe",	0x5e4},		/* pe U+05E4 */
518 	{"hebpedag",	0x5e3},		/* final pe? U+05E3 */
519 	{"hebqoph",	0x5e7},		/* qof U+05E7 */
520 	{"hebresh",	0x5e8},		/* resh U+05E8 */
521 	{"hebshin",	0x5e9},		/* shin U+05E9 */
522 	{"hebtav",	0x5ea},		/* tav U+05EA */
523 	{"hebtsade",	0x5e6},		/* tsadi U+05E6 */
524 	{"hebwaw",	0x5d5},		/* vav? U+05D5 */
525 	{"hebyod",	0x5d9},		/* yod U+05D9 */
526 	{"hebzayin",	0x5d6},		/* zayin U+05D6 */
527 	{"hgz",		0x292},		/* ??? Cf "alet" */
528 	{"hireq",	0x5b4},		/* U+05B4 */
529 	{"hlenis",	MHLN},
530 	{"hook",	LOGO},
531 	{"horizE",	0x45},		/* should be on side */
532 	{"horizP",	0x50},		/* should be on side */
533 	{"horizS",	0x223d},
534 	{"horizT",	0x22a3},
535 	{"horizb",	0x7b},		/* should be underbrace */
536 	{"ia",		0x3b1},
537 	{"iacu",	0xed},
538 	{"iasper",	MIAS},
539 	{"ib",		0x3b2},
540 	{"ibar",	0x268},
541 	{"ibreve",	0x12d},
542 	{"icirc",	0xee},
543 	{"id",		0x3b4},
544 	{"ident",	0x2261},
545 	{"ie",		0x3b5},
546 	{"ifilig",	MLFI},
547 	{"ifflig",	MLFF},
548 	{"ig",		0x3b3},
549 	{"igrave",	0xec},
550 	{"ih",		0x3b7},
551 	{"ii",		0x3b9},
552 	{"ik",		0x3ba},
553 	{"ilenis",	MILN},
554 	{"imac",	0x12b},
555 	{"implies",	0x21d2},
556 	{"index",	0x261e},
557 	{"infin",	0x221e},
558 	{"integ",	0x222b},
559 	{"intsec",	0x2229},
560 	{"invpri",	0x2cf},
561 	{"iota",	0x3b9},
562 	{"iq",		0x3c8},
563 	{"istlig",	MLST},
564 	{"isub",	0x3f5},		/* iota below accent */
565 	{"iuml",	0xef},
566 	{"iz",		0x3b6},
567 	{"jup",		0x2643},
568 	{"kappa",	0x3ba},
569 	{"koppa",	0x3df},
570 	{"lambda",	0x3bb},
571 	{"lar",		0x2190},
572 	{"lbar",	0x142},
573 	{"le",		0x2266},
574 	{"lenis",	LLEN},
575 	{"leo",		0x264c},
576 	{"lhalfbr",	0x2308},
577 	{"lhshoe",	0x2283},
578 	{"libra",	0x264e},
579 	{"llswing",	MLLS},
580 	{"lm",		0x2d0},
581 	{"logicand",	0x2227},
582 	{"logicor",	0x2228},
583 	{"longs",	0x283},
584 	{"lrar",	0x2194},
585 	{"lt",		0x3c},
586 	{"ltappr",	0x227e},
587 	{"ltflat",	0x2220},
588 	{"lumlbl",	0x6c},		/* +umlaut below */
589 	{"mac",		LMAC},
590 	{"male",	0x2642},
591 	{"mc",		0x63},		/* should be raised */
592 	{"merc",	0x263f},		/* mercury U+263F */
593 	{"min",		0x2212},
594 	{"moonfq",	0x263d},		/* first quarter moon U+263D */
595 	{"moonlq",	0x263e},		/* last quarter moon U+263E */
596 	{"msylab",	0x6d},		/* +sylab (ˌ) */
597 	{"mu",		0x3bc},
598 	{"nacu",	0x144},
599 	{"natural",	0x266e},
600 	{"neq",		0x2260},
601 	{"nfacu",	0x2032},
602 	{"nfasper",	0x2bd},
603 	{"nfbreve",	0x2d8},
604 	{"nfced",	0xb8},
605 	{"nfcirc",	0x2c6},
606 	{"nffrown",	0x2322},
607 	{"nfgra",	0x2cb},
608 	{"nfhacek",	0x2c7},
609 	{"nfmac",	0xaf},
610 	{"nftilde",	0x2dc},
611 	{"nfuml",	0xa8},
612 	{"ng",		0x14b},
613 	{"not",		0xac},
614 	{"notelem",	0x2209},
615 	{"ntilde",	0xf1},
616 	{"nu",		0x3bd},
617 	{"oab",		0x2329},
618 	{"oacu",	0xf3},
619 	{"oasper",	MOAS},
620 	{"ob",		0x7b},
621 	{"obar",	0xf8},
622 	{"obigb",	0x7b},		/* should be big */
623 	{"obigpren",	0x28},
624 	{"obigsb",	0x5b},		/* should be big */
625 	{"obreve",	0x14f},
626 	{"ocirc",	0xf4},
627 	{"odsb",	0x301a},		/* [[ U+301A */
628 	{"oe",		0x153},
629 	{"oeamp",	0x26},
630 	{"ograve",	0xf2},
631 	{"ohook",	0x6f},		/* +hook */
632 	{"olenis",	MOLN},
633 	{"omac",	0x14d},
634 	{"omega",	0x3c9},
635 	{"omicron",	0x3bf},
636 	{"ope",		0x25b},
637 	{"opp",		0x260d},
638 	{"oq",		0x60},
639 	{"oqq",		0x201c},
640 	{"or",		MOR},
641 	{"osb",		0x5b},
642 	{"otilde",	0xf5},
643 	{"ouml",	0xf6},
644 	{"ounce",	0x2125},		/* ounce U+2125 */
645 	{"ovparen",	0x2322},		/* should be sideways ( */
646 	{"p",		0x2032},
647 	{"pa",		0x2202},
648 	{"page",	0x50},
649 	{"pall",	0x28e},
650 	{"paln",	0x272},
651 	{"par",		PAR},
652 	{"para",	0xb6},
653 	{"pbar",	0x70},		/* +bar */
654 	{"per",		0x2118},		/* per U+2118 */
655 	{"phi",		0x3c6},
656 	{"phi2",	0x3d5},
657 	{"pi",		0x3c0},
658 	{"pisces",	0x2653},
659 	{"planck",	0x127},
660 	{"plantinJ",	0x4a},		/* should be script */
661 	{"pm",		0xb1},
662 	{"pmil",	0x2030},
663 	{"pp",		0x2033},
664 	{"ppp",		0x2034},
665 	{"prop",	0x221d},
666 	{"psi",		0x3c8},
667 	{"pstlg",	0xa3},
668 	{"q",		0x3f},		/* should be raised */
669 	{"qamets",	0x5b3},		/* U+05B3 */
670 	{"quaver",	0x266a},
671 	{"rar",		0x2192},
672 	{"rasper",	MRAS},
673 	{"rdot",	0xb7},
674 	{"recipe",	0x211e},		/* U+211E */
675 	{"reg",		0xae},
676 	{"revC",	0x186},		/* open O U+0186 */
677 	{"reva",	0x252},
678 	{"revc",	0x254},
679 	{"revope",	0x25c},
680 	{"revr",	0x279},
681 	{"revsc",	0x2d2},		/* upside-down semicolon */
682 	{"revv",	0x28c},
683 	{"rfa",		0x6f},		/* +hook (Cf "goal") */
684 	{"rhacek",	0x159},
685 	{"rhalfbr",	0x2309},
686 	{"rho",		0x3c1},
687 	{"rhshoe",	0x2282},
688 	{"rlenis",	MRLN},
689 	{"rsylab",	0x72},		/* +sylab */
690 	{"runash",	0x46},		/* should be runic 'ash' */
691 	{"rvow",	0x2d4},
692 	{"sacu",	0x15b},
693 	{"sagit",	0x2650},
694 	{"sampi",	0x3e1},
695 	{"saturn",	0x2644},
696 	{"sced",	0x15f},
697 	{"schwa",	0x259},
698 	{"scorpio",	0x264f},
699 	{"scrA",	0x41},		/* should be script */
700 	{"scrC",	0x43},
701 	{"scrE",	0x45},
702 	{"scrF",	0x46},
703 	{"scrI",	0x49},
704 	{"scrJ",	0x4a},
705 	{"scrL",	'L'},
706 	{"scrO",	0x4f},
707 	{"scrP",	0x50},
708 	{"scrQ",	0x51},
709 	{"scrS",	0x53},
710 	{"scrT",	0x54},
711 	{"scrb",	0x62},
712 	{"scrd",	0x64},
713 	{"scrh",	0x68},
714 	{"scrl",	0x6c},
715 	{"scruple",	0x2108},		/* U+2108 */
716 	{"sdd",		0x2d0},
717 	{"sect",	0xa7},
718 	{"semE",	0x2203},
719 	{"sh",		0x283},
720 	{"shacek",	0x161},
721 	{"sharp",	0x266f},
722 	{"sheva",	0x5b0},		/* U+05B0 */
723 	{"shti",	0x26a},
724 	{"shtsyll",	0x222a},
725 	{"shtu",	0x28a},
726 	{"sidetri",	0x22b2},
727 	{"sigma",	0x3c3},
728 	{"since",	0x2235},
729 	{"slge",	0x2265},		/* should have slanted line under */
730 	{"slle",	0x2264},		/* should have slanted line under */
731 	{"sm",		0x2c8},
732 	{"smm",		0x2cc},
733 	{"spade",	0x2660},
734 	{"sqrt",	0x221a},
735 	{"square",	0x25a1},		/* U+25A1 */
736 	{"ssChi",	0x3a7},		/* should be sans serif */
737 	{"ssIota",	0x399},
738 	{"ssOmicron",	0x39f},
739 	{"ssPi",	0x3a0},
740 	{"ssRho",	0x3a1},
741 	{"ssSigma",	0x3a3},
742 	{"ssTau",	0x3a4},
743 	{"star",	0x2a},
744 	{"stlig",	MLST},
745 	{"sup2",	0x2072},
746 	{"supgt",	0x2c3},
747 	{"suplt",	0x2c2},
748 	{"sur",		0x2b3},
749 	{"swing",	0x223c},
750 	{"tau",		0x3c4},
751 	{"taur",	0x2649},
752 	{"th",		0xfe},
753 	{"thbar",	0xfe},		/* +bar */
754 	{"theta",	0x3b8},
755 	{"thinqm",	0x3f},		/* should be thinner */
756 	{"tilde",	LTIL},
757 	{"times",	0xd7},
758 	{"tri",		0x2206},
759 	{"trli",	0x2016},
760 	{"ts",		0x2009},
761 	{"uacu",	0xfa},
762 	{"uasper",	MUAS},
763 	{"ubar",	0x75},		/* +bar */
764 	{"ubreve",	0x16d},
765 	{"ucirc",	0xfb},
766 	{"udA",		0x2200},
767 	{"udT",		0x22a5},
768 	{"uda",		0x250},
769 	{"udh",		0x265},
770 	{"udqm",	0xbf},
771 	{"udpsi",	0x22d4},
772 	{"udtr",	0x2207},
773 	{"ugrave",	0xf9},
774 	{"ulenis",	MULN},
775 	{"umac",	0x16b},
776 	{"uml",		LUML},
777 	{"undl",	0x2cd},		/* underline accent */
778 	{"union",	0x222a},
779 	{"upsilon",	0x3c5},
780 	{"uuml",	0xfc},
781 	{"vavpath",	0x5d5},		/* vav U+05D5 (+patah) */
782 	{"vavsheva",	0x5d5},		/* vav U+05D5 (+sheva) */
783 	{"vb",		0x7c},
784 	{"vddd",	0x22ee},
785 	{"versicle2",	0x2123},		/* U+2123 */
786 	{"vinc",	0xaf},
787 	{"virgo",	0x264d},
788 	{"vpal",	0x25f},
789 	{"vvf",		0x263},
790 	{"wasper",	MWAS},
791 	{"wavyeq",	0x2248},
792 	{"wlenis",	MWLN},
793 	{"wyn",		0x1bf},		/* wynn U+01BF */
794 	{"xi",		0x3be},
795 	{"yacu",	0xfd},
796 	{"ycirc",	0x177},
797 	{"ygh",		0x292},
798 	{"ymac",	0x79},		/* +macron */
799 	{"yuml",	0xff},
800 	{"zced",	0x7a},		/* +cedilla */
801 	{"zeta",	0x3b6},
802 	{"zh",		0x292},
803 	{"zhacek",	0x17e}
804 };
805 /*
806    The following special characters don't have close enough
807    equivalents in Unicode, so aren't in the above table.
808 	22n		2^(2^n) Cf Fermat
809 	2on4		2/4
810 	3on8		3/8
811 	Bantuo		Bantu O. Cf Otshi-herero
812 	Car		C with circular arrow on top
813 	albrtime 	cut-time: C with vertical line
814 	ardal		Cf dental
815 	bantuo		Bantu o. Cf Otshi-herero
816 	bbc1		single chem bond below
817 	bbc2		double chem bond below
818 	bbl1		chem bond like /
819 	bbl2		chem bond like //
820 	bbr1		chem bond like \
821 	bbr2		chem bond \\
822 	bcop1		copper symbol. Cf copper
823 	bcop2		copper symbol. Cf copper
824 	benchm		Cf benchmark
825 	btc1		single chem bond above
826 	btc2		double chem bond above
827 	btl1		chem bond like \
828 	btl2		chem bond like \\
829 	btr1		chem bond like /
830 	btr2		chem bond line //
831 	burman		Cf Burman
832 	devph		sanskrit letter. Cf ph
833 	devrfls		sanskrit letter. Cf cerebral
834 	duplong[12]	musical note
835 	egchi		early form of chi
836 	eggamma[12]	early form of gamma
837 	egiota		early form of iota
838 	egkappa		early form of kappa
839 	eglambda	early form of lambda
840 	egmu[12]	early form of mu
841 	egnu[12]	early form of nu
842 	egpi[123]	early form of pi
843 	egrho[12]	early form of rho
844 	egsampi		early form of sampi
845 	egsan		early form of san
846 	egsigma[12]	early form of sigma
847 	egxi[123]	early form of xi
848 	elatS		early form of S
849 	elatc[12]	early form of C
850 	elatg[12]	early form of G
851 	glagjeri	Slavonic Glagolitic jeri
852 	glagjeru	Slavonic Glagolitic jeru
853 	hypolem		hypolemisk (line with underdot)
854 	lhrbr		lower half }
855 	longmord	long mordent
856 	mbwvow		backwards scretched C. Cf retract.
857 	mord		music symbol.  Cf mordent
858 	mostra		Cf direct
859 	ohgcirc		old form of circumflex
860 	oldbeta		old form of β. Cf perturbate
861 	oldsemibr[12]	old forms of semibreve. Cf prolation
862 	ormg		old form of g. Cf G
863 	para[12345]	form of ¶
864 	pauseo		musical pause sign
865 	pauseu		musical pause sign
866 	pharyng		Cf pharyngal
867 	ragr		Black letter ragged r
868 	repetn		musical repeat. Cf retort
869 	segno		musical segno sign
870 	semain[12]	semitic ain
871 	semhe		semitic he
872 	semheth		semitic heth
873 	semkaph		semitic kaph
874 	semlamed[12]	semitic lamed
875 	semmem		semitic mem
876 	semnum		semitic nun
877 	sempe		semitic pe
878 	semqoph[123]	semitic qoph
879 	semresh		semitic resh
880 	semtav[1234]	semitic tav
881 	semyod		semitic yod
882 	semzayin[123]	semitic zayin
883 	shtlong[12]	U with underbar. Cf glyconic
884 	sigmatau	σ,τ combination
885 	squaver		sixteenth note
886 	sqbreve		square musical breve note
887 	swast		swastika
888 	uhrbr		upper half of big }
889 	versicle1		Cf versicle
890  */
891 
892 
893 static Rune normtab[128] = {
894 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
895 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
896 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
897 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
898 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
899 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
900 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
901 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
902 	0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
903 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
904 	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
905 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
906 	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
907 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
908 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
909 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
910 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
911 };
912 static Rune phtab[128] = {
913 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
914 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
915 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
916 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
917 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
918 /*20*/	0x20,	0x21,	0x2c8,	0x23,	0x24,	0x2cc,	0xe6,	'\'',
919 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
920 /*30*/  0x30,	0x31,	0x32,	0x25c,	0x34,	0x35,	0x36,	0x37,
921 	0x38,	0xf8,	0x2d0,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
922 /*40*/  0x259,	0x251,	0x42,	0x43,	0xf0,	0x25b,	0x46,	0x47,
923 	0x48,	0x26a,	0x4a,	0x4b,	'L',	0x4d,	0x14b,	0x254,
924 /*50*/	0x50,	0x252,	0x52,	0x283,	0x3b8,	0x28a,	0x28c,	0x57,
925 	0x58,	0x59,	0x292,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
926 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
927 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
928 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
929 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
930 };
931 static Rune grtab[128] = {
932 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
933 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
934 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
935 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
936 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
937 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
938 	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
939 /*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
940 	0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
941 /*40*/  0x40,	0x391,	0x392,	0x39e,	0x394,	0x395,	0x3a6,	0x393,
942 	0x397,	0x399,	0x3da,	0x39a,	0x39b,	0x39c,	0x39d,	0x39f,
943 /*50*/	0x3a0,	0x398,	0x3a1,	0x3a3,	0x3a4,	0x3a5,	0x56,	0x3a9,
944 	0x3a7,	0x3a8,	0x396,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
945 /*60*/	0x60,	0x3b1,	0x3b2,	0x3be,	0x3b4,	0x3b5,	0x3c6,	0x3b3,
946 	0x3b7,	0x3b9,	0x3c2,	0x3ba,	0x3bb,	0x3bc,	0x3bd,	0x3bf,
947 /*70*/	0x3c0,	0x3b8,	0x3c1,	0x3c3,	0x3c4,	0x3c5,	0x76,	0x3c9,
948 	0x3c7,	0x3c8,	0x3b6,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
949 };
950 static Rune subtab[128] = {
951 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
952 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
953 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
954 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
955 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
956 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
957 	0x208d,	0x208e,	0x2a,	0x208a,	0x2c,	0x208b,	0x2e,	0x2f,
958 /*30*/  0x2080,	0x2081,	0x2082,	0x2083,	0x2084,	0x2085,	0x2086,	0x2087,
959 	0x2088,	0x2089,	0x3a,	0x3b,	TAGS,	0x208c,	TAGE,	0x3f,
960 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
961 	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
962 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
963 	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
964 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
965 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
966 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
967 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
968 };
969 static Rune suptab[128] = {
970 	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
971 /*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
972 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
973 /*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
974 	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
975 /*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
976 	0x207d,	0x207e,	0x2a,	0x207a,	0x2c,	0x207b,	0x2e,	0x2f,
977 /*30*/  0x2070,	0x2071,	0x2072,	0x2073,	0x2074,	0x2075,	0x2076,	0x2077,
978 	0x2078,	0x2079,	0x3a,	0x3b,	TAGS,	0x207c,	TAGE,	0x3f,
979 /*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
980 	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
981 /*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
982 	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
983 /*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
984 	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
985 /*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
986 	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
987 };
988 
989 static int	tagstarts;
990 static char	tag[Buflen];
991 static int	naux;
992 static char	auxname[Maxaux][Buflen];
993 static char	auxval[Maxaux][Buflen];
994 static char	spec[Buflen];
995 static char	*auxstate[Naux];	/* vals for most recent tag */
996 static Entry	curentry;
997 #define cursize (curentry.end-curentry.start)
998 
999 static char	*getspec(char *, char *);
1000 static char	*gettag(char *, char *);
1001 static void	dostatus(void);
1002 
1003 /*
1004  * cmd is one of:
1005  *    'p': normal print
1006  *    'h': just print headwords
1007  *    'P': print raw
1008  */
1009 void
oedprintentry(Entry e,int cmd)1010 oedprintentry(Entry e, int cmd)
1011 {
1012 	char *p, *pe;
1013 	int t, a, i;
1014 	long r, rprev, rlig;
1015 	Rune *transtab;
1016 
1017 	p = e.start;
1018 	pe = e.end;
1019 	transtab = normtab;
1020 	rprev = NONE;
1021 	changett(0, 0, 0);
1022 	curentry = e;
1023 	if(cmd == 'h')
1024 		outinhibit = 1;
1025 	while(p < pe) {
1026 		if(cmd == 'r') {
1027 			outchar(*p++);
1028 			continue;
1029 		}
1030 		r = transtab[(*p++)&0x7F];
1031 		if(r < NONE) {
1032 			/* Emit the rune, but buffer in case of ligature */
1033 			if(rprev != NONE)
1034 				outrune(rprev);
1035 			rprev = r;
1036 		} else if(r == SPCS) {
1037 			/* Start of special character name */
1038 			p = getspec(p, pe);
1039 			r = lookassoc(spectab, asize(spectab), spec);
1040 			if(r == -1) {
1041 				if(debug)
1042 					err("spec %ld %d %s",
1043 						e.doff, cursize, spec);
1044 				r = 0xfffd;
1045 			}
1046 			if(r >= LIGS && r < LIGE) {
1047 				/* handle possible ligature */
1048 				rlig = liglookup(r, rprev);
1049 				if(rlig != NONE)
1050 					rprev = rlig;	/* overwrite rprev */
1051 				else {
1052 					/* could print accent, but let's not */
1053 					if(rprev != NONE) outrune(rprev);
1054 					rprev = NONE;
1055 				}
1056 			} else if(r >= MULTI && r < MULTIE) {
1057 				if(rprev != NONE) {
1058 					outrune(rprev);
1059 					rprev = NONE;
1060 				}
1061 				outrunes(multitab[r-MULTI]);
1062 			} else if(r == PAR) {
1063 				if(rprev != NONE) {
1064 					outrune(rprev);
1065 					rprev = NONE;
1066 				}
1067 				outnl(1);
1068 			} else {
1069 				if(rprev != NONE) outrune(rprev);
1070 				rprev = r;
1071 			}
1072 		} else if(r == TAGS) {
1073 			/* Start of tag name */
1074 			if(rprev != NONE) {
1075 				outrune(rprev);
1076 				rprev = NONE;
1077 			}
1078 			p = gettag(p, pe);
1079 			t = lookassoc(tagtab, asize(tagtab), tag);
1080 			if(t == -1) {
1081 				if(debug)
1082 					err("tag %ld %d %s",
1083 						e.doff, cursize, tag);
1084 				continue;
1085 			}
1086 			for(i = 0; i < Naux; i++)
1087 				auxstate[i] = 0;
1088 			for(i = 0; i < naux; i++) {
1089 				a = lookassoc(auxtab, asize(auxtab), auxname[i]);
1090 				if(a == -1) {
1091 					if(debug)
1092 						err("aux %ld %d %s",
1093 							e.doff, cursize, auxname[i]);
1094 				} else
1095 					auxstate[a] = auxval[i];
1096 			}
1097 			switch(t){
1098 			case E:
1099 			case Ve:
1100 				outnl(0);
1101 				if(tagstarts)
1102 					dostatus();
1103 				break;
1104 			case Ed:
1105 			case Etym:
1106 				outchar(tagstarts? '[' : ']');
1107 				break;
1108 			case Pr:
1109 				outchar(tagstarts? '(' : ')');
1110 				break;
1111 			case In:
1112 				transtab = changett(transtab, subtab, tagstarts);
1113 				break;
1114 			case Hm:
1115 			case Su:
1116 			case Fq:
1117 				transtab = changett(transtab, suptab, tagstarts);
1118 				break;
1119 			case Gk:
1120 				transtab = changett(transtab, grtab, tagstarts);
1121 				break;
1122 			case Ph:
1123 				transtab = changett(transtab, phtab, tagstarts);
1124 				break;
1125 			case Hw:
1126 				if(cmd == 'h') {
1127 					if(!tagstarts)
1128 						outchar(' ');
1129 					outinhibit = !tagstarts;
1130 				}
1131 				break;
1132 			case S0:
1133 			case S1:
1134 			case S2:
1135 			case S3:
1136 			case S4:
1137 			case S5:
1138 			case S6:
1139 			case S7a:
1140 			case S7n:
1141 			case Sn:
1142 			case Sgk:
1143 				if(tagstarts) {
1144 					outnl(2);
1145 					dostatus();
1146 					if(auxstate[Num]) {
1147 						if(t == S3 || t == S5) {
1148 							i = atoi(auxstate[Num]);
1149 							while(i--)
1150 								outchar('*');
1151 							outchars("  ");
1152 						} else if(t == S7a || t == S7n || t == Sn) {
1153 							outchar('(');
1154 							outchars(auxstate[Num]);
1155 							outchars(") ");
1156 						} else if(t == Sgk) {
1157 							i = grtab[(uchar)auxstate[Num][0]];
1158 							if(i != NONE)
1159 								outrune(i);
1160 							outchars(".  ");
1161 						} else {
1162 							outchars(auxstate[Num]);
1163 							outchars(".  ");
1164 						}
1165 					}
1166 				}
1167 				break;
1168 			case Cb:
1169 			case Db:
1170 			case Qp:
1171 			case P:
1172 				if(tagstarts)
1173 					outnl(1);
1174 				break;
1175 			case Table:
1176 				/*
1177 				 * Todo: gather columns, justify them, etc.
1178 				 * For now, just let colums come out as rows
1179 				 */
1180 				if(!tagstarts)
1181 					outnl(0);
1182 				break;
1183 			case Col:
1184 				if(tagstarts)
1185 					outnl(0);
1186 				break;
1187 			case Dn:
1188 				if(tagstarts)
1189 					outchar('/');
1190 				break;
1191 			}
1192 		}
1193 	}
1194 	if(cmd == 'h') {
1195 		outinhibit = 0;
1196 		outnl(0);
1197 	}
1198 }
1199 
1200 /*
1201  * Return offset into bdict where next oed entry after fromoff starts.
1202  * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
1203  */
1204 long
oednextoff(long fromoff)1205 oednextoff(long fromoff)
1206 {
1207 	long a, n;
1208 	int c;
1209 
1210 	a = Bseek(bdict, fromoff, 0);
1211 	if(a < 0)
1212 		return -1;
1213 	n = 0;
1214 	for(;;) {
1215 		c = Bgetc(bdict);
1216 		if(c < 0)
1217 			break;
1218 		if(c == '<') {
1219 			c = Bgetc(bdict);
1220 			if(c == 'e') {
1221 				c = Bgetc(bdict);
1222 				if(c == '>' || c == ' ')
1223 					n = 3;
1224 			} else if(c == 'v' && Bgetc(bdict) == 'e') {
1225 				c = Bgetc(bdict);
1226 				if(c == '>' || c == ' ')
1227 					n = 4;
1228 			}
1229 			if(n)
1230 				break;
1231 		}
1232 	}
1233 	return (Boffset(bdict)-n);
1234 }
1235 
1236 static char *prkey1 =
1237 "KEY TO THE PRONUNCIATION\n"
1238 "\n"
1239 "I. CONSONANTS\n"
1240 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
1241 "\n"
1242 "g as in go (gəʊ)\n"
1243 "h  ...  ho! (həʊ)\n"
1244 "r  ...  run (rʌn), terrier (ˈtɛriə(r))\n"
1245 "(r)...  her (hɜː(r))\n"
1246 "s  ...  see (siː), success (səkˈsɜs)\n"
1247 "w  ...  wear (wɛə(r))\n"
1248 "hw ...  when (hwɛn)\n"
1249 "j  ...  yes (jɛs)\n"
1250 "θ  ...  thin (θin), bath (bɑːθ)\n"
1251 "ð  ...  then (ðɛn), bathe (beɪð)\n"
1252 "ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n"
1253 "tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n"
1254 "ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n"
1255 ;
1256 static char *prkey2 =
1257 "dʒ ...  judge (dʒʌdʒ)\n"
1258 "ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n"
1259 "ŋg ...  finger (ˈfiŋgə(r))\n"
1260 "\n"
1261 "Foreign\n"
1262 "ʎ as in It. seraglio (serˈraʎo)\n"
1263 "ɲ  ...  Fr. cognac (kɔɲak)\n"
1264 "x  ...  Ger. ach (ax), Sc. loch (lɒx)\n"
1265 "ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
1266 "ɣ  ...  North Ger. sagen (ˈzaːɣən)\n"
1267 "c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
1268 "ɥ  ...  Fr. cuisine (kɥizin)\n"
1269 "\n"
1270 ;
1271 static char *prkey3 =
1272 "II. VOWELS AND DIPTHONGS\n"
1273 "\n"
1274 "Short\n"
1275 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
1276 "ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n"
1277 "æ  ...  pat (pæt)\n"
1278 "ʌ  ...  putt (pʌt)\n"
1279 "ɒ  ...  pot (pɒt)\n"
1280 "ʊ  ...  put (pʊt)\n"
1281 "ə  ...  another (əˈnʌðə(r))\n"
1282 "(ə)...  beaten (ˈbiːt(ə)n)\n"
1283 "i  ...  Fr. si (si)\n"
1284 "e  ...  Fr. bébé (bebe)\n"
1285 "a  ...  Fr. mari (mari)\n"
1286 "ɑ  ...  Fr. bâtiment (bɑtimã)\n"
1287 "ɔ  ...  Fr. homme (ɔm)\n"
1288 "o  ...  Fr. eau (o)\n"
1289 "ø  ...  Fr. peu (pø)\n"
1290 ;
1291 static char *prkey4 =
1292 "œ  ...  Fr. boeuf (bœf), coeur (kœr)\n"
1293 "u  ...  Fr. douce (dus)\n"
1294 "ʏ  ...  Ger. Müller (ˈmʏlər)\n"
1295 "y  ...  Fr. du (dy)\n"
1296 "\n"
1297 "Long\n"
1298 "iː as in bean (biːn)\n"
1299 "ɑː ...  barn (bɑːn)\n"
1300 "ɔː ...  born (bɔːn)\n"
1301 "uː ...  boon (buːn)\n"
1302 "ɜː ...  burn (bɜːn)\n"
1303 "eː ...  Ger. Schnee (ʃneː)\n"
1304 "ɛː ...  Ger. Fähre (ˈfɛːrə)\n"
1305 "aː ...  Ger. Tag (taːk)\n"
1306 "oː ...  Ger. Sohn (zoːn)\n"
1307 "øː ...  Ger. Goethe (gøːtə)\n"
1308 "yː ...  Ger. grün (gryːn)\n"
1309 "\n"
1310 ;
1311 static char *prkey5 =
1312 "Nasal\n"
1313 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
1314 "ã  ...  Fr. franc (frã)\n"
1315 "ɔ˜ ...  Fr. bon (bɔ˜n)\n"
1316 "œ˜ ...  Fr. un (œ˜)\n"
1317 "\n"
1318 "Dipthongs, etc.\n"
1319 "eɪ as in bay (beɪ)\n"
1320 "aɪ ...  buy (baɪ)\n"
1321 "ɔɪ ...  boy (bɔɪ)\n"
1322 "əʊ ...  no (nəʊ)\n"
1323 "aʊ ...  now (naʊ)\n"
1324 "ɪə ...  peer (pɪə(r))\n"
1325 "ɛə ...  pair (pɛə(r))\n"
1326 "ʊə ...  tour (tʊə(r))\n"
1327 "ɔə ...  boar (bɔə(r))\n"
1328 "\n"
1329 ;
1330 static char *prkey6 =
1331 "III. STRESS\n"
1332 "\n"
1333 "Main stress: ˈ preceding stressed syllable\n"
1334 "Secondary stress: ˌ preceding stressed syllable\n"
1335 "\n"
1336 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
1337 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
1338 
1339 void
oedprintkey(void)1340 oedprintkey(void)
1341 {
1342 	Bprint(bout, "%s%s%s%s%s%s",
1343 		prkey1, prkey2, prkey3, prkey4, prkey5, prkey6);
1344 }
1345 
1346 /*
1347  * f points just after a '&', fe points at end of entry.
1348  * Accumulate the special name, starting after the &
1349  * and continuing until the next '.', in spec[].
1350  * Return pointer to char after '.'.
1351  */
1352 static char *
getspec(char * f,char * fe)1353 getspec(char *f, char *fe)
1354 {
1355 	char *t;
1356 	int c, i;
1357 
1358 	t = spec;
1359 	i = sizeof spec;
1360 	while(--i > 0) {
1361 		c = *f++;
1362 		if(c == '.' || f == fe)
1363 			break;
1364 		*t++ = c;
1365 	}
1366 	*t = 0;
1367 	return f;
1368 }
1369 
1370 /*
1371  * f points just after '<'; fe points at end of entry.
1372  * Expect next characters from bin to match:
1373  *  [/][^ >]+( [^>=]+=[^ >]+)*>
1374  *      tag   auxname auxval
1375  * Accumulate the tag and its auxilliary information in
1376  * tag[], auxname[][] and auxval[][].
1377  * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
1378  * Set naux to the number of aux pairs found.
1379  * Return pointer to after final '>'.
1380  */
1381 static char *
gettag(char * f,char * fe)1382 gettag(char *f, char *fe)
1383 {
1384 	char *t;
1385 	int c, i;
1386 
1387 	t = tag;
1388 	c = *f++;
1389 	if(c == '/')
1390 		tagstarts = 0;
1391 	else {
1392 		tagstarts = 1;
1393 		*t++ = c;
1394 	}
1395 	i = Buflen;
1396 	naux = 0;
1397 	while(--i > 0) {
1398 		c = *f++;
1399 		if(c == '>' || f == fe)
1400 			break;
1401 		if(c == ' ') {
1402 			*t = 0;
1403 			t = auxname[naux];
1404 			i = Buflen;
1405 			if(naux < Maxaux-1)
1406 				naux++;
1407 		} else if(naux && c == '=') {
1408 			*t = 0;
1409 			t = auxval[naux-1];
1410 			i = Buflen;
1411 		} else
1412 			*t++ = c;
1413 	}
1414 	*t = 0;
1415 	return f;
1416 }
1417 
1418 static void
dostatus(void)1419 dostatus(void)
1420 {
1421 	char *s;
1422 
1423 	s = auxstate[St];
1424 	if(s) {
1425 		if(strcmp(s, "obs") == 0)
1426 			outrune(0x2020);
1427 		else if(strcmp(s, "ali") == 0)
1428 			outrune(0x2016);
1429 		else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
1430 			outrune(0xb6);
1431 		else if(strcmp(s, "xref") == 0)
1432 			{/* nothing */}
1433 		else if(debug)
1434 			err("status %ld %d %s", curentry.doff, cursize, s);
1435 	}
1436 }
1437