1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 /*	from OpenSolaris "n8.c	1.8	05/06/08 SMI"	*/
32 
33 /*
34  * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany
35  *
36  * Sccsid @(#)n8.c	1.44 (gritter) 9/26/10
37  */
38 
39 /*
40  * University Copyright- Copyright (c) 1982, 1986, 1988
41  * The Regents of the University of California
42  * All Rights Reserved
43  *
44  * University Acknowledgment- Portions of this document are derived from
45  * software developed by the University of California, Berkeley, and its
46  * contributors.
47  */
48 
49 #include	<stddef.h>
50 #include	<stdio.h>
51 #ifdef	EUC
52 #include	<wctype.h>
53 #endif
54 #include	<ctype.h>
55 #include	<stdlib.h>
56 #include	<string.h>
57 #include	"tdef.h"
58 #include "ext.h"
59 #include "pt.h"
60 #include "libhnj/hyphen.h"
61 #define	HY_BIT	0200	/* generic stuff in here only works for ascii */
62 #define	HY_BIT2	0x80000000
63 
64 /*
65  * troff8.c
66  *
67  * hyphenation
68  */
69 
70 int	*hbuf;
71 int	NHEX;
72 int	*nexth;
73 tchar	*hyend;
74 #define THRESH 160 /*digram goodness threshold*/
75 int	thresh = THRESH;
76 
77 static	void		hyphenhnj(void);
78 
79 static int *
growhbuf(int ** pp)80 growhbuf(int **pp)
81 {
82 	int	*nhbuf;
83 	int	inc = 4;
84 	ptrdiff_t	j;
85 
86 	if ((nhbuf = realloc(hbuf, (NHEX+inc) * sizeof *hbuf)) == NULL)
87 		return NULL;
88 	NHEX += inc;
89 	j = (char *)nhbuf - (char *)hbuf;
90 	nexth = (int *)((char *)nexth + j);
91 	if (pp)
92 		*pp = (int *)((char *)*pp + j);
93 	return hbuf = nhbuf;
94 }
95 
96 void
hyphen(tchar * wp)97 hyphen(tchar *wp)
98 {
99 	register int j;
100 	register tchar *i;
101 	tchar	*_wdstart, *_wdend;
102 
103 	i = wp;
104 	while (punct(*i++))
105 		;
106 	if (!alph(*--i))
107 		return;
108 	wdstart = i++;
109 	while (hyext ? *i++ : alph(*i++))
110 		;
111 	hyend = wdend = --i - 1;
112 	while (punct(*i++))
113 		;
114 	if (*--i)
115 		return;
116 	if (!(wdhyf & 060) && (wdend - wdstart - (hylen - 1)) < 0)
117 		return;
118 	hyp = hyptr;
119 	*hyp = 0;
120 	hyoff = 2;
121 	if (dicthnj) {
122 		i = _wdstart = wdstart;
123 		_wdend = wdend;
124 		do {
125 			if (cbits(*i) == '-' || cbits(*i) == EMDASH ||
126 					i == _wdend) {
127 				while (wdstart <= i && (punct(*wdstart) ||
128 						(cbits(*wdstart) >= '0' &&
129 						 cbits(*wdstart) <= '9')))
130 					wdstart++;
131 				for (wdend = wdstart; wdend <= i; wdend++) {
132 					if (!alph(*wdend) ||
133 							(cbits(*wdend) >= '0' &&
134 							 cbits(*wdend) <= '9'))
135 						break;
136 				}
137 				hyend = --wdend;
138 				if ((wdhyf & 060 || wdstart + 3 <= wdend) &&
139 						!exword())
140 					hyphenhnj();
141 				wdstart = &i[1];
142 				if (i < _wdend) {
143 					*hyp++ = &i[1];
144 					if (hyp > (hyptr + NHYP - 1))
145 						hyp = hyptr + NHYP - 1;
146 				}
147 			}
148 		} while (i++ <= _wdend);
149 		wdstart = _wdstart;
150 		wdend = _wdend;
151 	} else if (!exword() && !suffix())
152 		digram();
153 	*hyp++ = 0;
154 	if (*hyptr)
155 		for (j = 1; j; ) {
156 			j = 0;
157 			for (hyp = hyptr + 1; *hyp != 0; hyp++) {
158 				if (*(hyp - 1) > *hyp) {
159 					j++;
160 					i = *hyp;
161 					*hyp = *(hyp - 1);
162 					*(hyp - 1) = i;
163 				}
164 			}
165 		}
166 }
167 
168 
169 int
punct(tchar i)170 punct(tchar i)
171 {
172 	if (!cbits(i) || alph(i))
173 		return(0);
174 	else
175 		return(1);
176 }
177 
178 
179 int
alph(tchar j)180 alph(tchar j)
181 {
182 	int i;
183 	int f;
184 	int	h;
185 
186 	while (isxfunc(j, CHAR))
187 		j = charout[sbits(j)].ch;
188 	i = cbits(j);
189 	f = fbits(j);
190 	if (!ismot(j) && i < nhcode && (h = hcode[i]) != 0) {
191 		if (h & ~0177)
192 			h = tr2un(h, f);
193 #ifdef EUC
194 		return hyext ? iswalnum(h) : iswalpha(h);
195 	} else
196 #else	/* !EUC */
197 		i = h;
198 	}
199 #endif	/* !EUC */
200 #ifdef EUC
201 	if (!ismot(j) && i & ~0177) {
202 		int	u;
203 #ifndef	NROFF
204 		if (islig(j) && hyext &&
205 				lgrevtab && lgrevtab[f] && lgrevtab[f][i])
206 			return 1;
207 #endif	/* !NROFF */
208 		u = tr2un(i, f);
209 		if (u == 0x017F)	/* longs */
210 			u = 's';
211 		return hyext ? iswalnum(u) : iswalpha(u);
212 	} else
213 #endif	/* EUC */
214 	if ((!ismot(j) && i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z') ||
215 			(hyext && i >= '0' && i <= '9'))
216 		return(1);
217 	else
218 		return(0);
219 }
220 
221 
222 void
caseht(void)223 caseht(void)
224 {
225 	thresh = THRESH;
226 	if (skip(0))
227 		return;
228 	noscale++;
229 	thresh = hatoi();
230 	noscale = 0;
231 }
232 
233 
234 void
casehw(void)235 casehw(void)
236 {
237 	register int i, k;
238 	int	*j;
239 	tchar t;
240 	int	cnt = 0;
241 
242 	lgf++;
243 	if (nexth == NULL)
244 		growhbuf(NULL);
245 	k = 0;
246 	while (!skip(!cnt++)) {
247 		if ((j = nexth) >= (hbuf + NHEX - 2) && growhbuf(&j) == NULL)
248 			goto full;
249 		for (; ; ) {
250 			if (ismot(t = getch()))
251 				continue;
252 			i = cbits(t);
253 			if (i == ' ' || i == '\n') {
254 				*j++ = 0;
255 				nexth = j;
256 				*j = 0;
257 				if (i == ' ')
258 					break;
259 				else
260 					return;
261 			}
262 			if (i == '-') {
263 				k = HY_BIT2;
264 				continue;
265 			}
266 			*j++ = maplow(t) | k;
267 			k = 0;
268 			if (j >= (hbuf + NHEX - 2) && growhbuf(&j) == NULL)
269 				goto full;
270 		}
271 	}
272 	return;
273 full:
274 	errprint("exception word list full.");
275 	*nexth = 0;
276 }
277 
278 
279 int
exword(void)280 exword(void)
281 {
282 	register tchar *w;
283 	register int	*e;
284 	int	*save;
285 
286 	e = hbuf;
287 	while (1) {
288 		save = e;
289 		if (e == NULL || *e == 0)
290 			return(0);
291 		w = wdstart;
292 		while (*e && w <= hyend) {
293 #ifndef NROFF
294 			int	i, m, f;
295 			m = cbits(*w);
296 			f = fbits(*w);
297 			if (islig(*w) && lgrevtab && lgrevtab[f] &&
298 					lgrevtab[f][m]) {
299 				for (i = 0; lgrevtab[f][m][i]; i++) {
300 					if ((*e&~HY_BIT2) ==
301 					  maplow(lgrevtab[f][m][i])) {
302 						e++;
303 					} else
304 						goto end;
305 				}
306 				w++;
307 			} else
308 #endif
309 			{
310 				if ((*e&~HY_BIT2) == maplow(*w)) {
311 					e++;
312 					w++;
313 				} else
314 					goto end;
315 			}
316 		}
317 	end:	if (!*e) {
318 			if (w-1 == hyend || (w == wdend && maplow(*w) == 's')) {
319 				w = wdstart;
320 				for (e = save; *e; e++) {
321 #ifndef NROFF
322 					int	i, m, f;
323 					m = cbits(*w);
324 					f = fbits(*w);
325 					if (islig(*w) && lgrevtab &&
326 							lgrevtab[f] &&
327 							lgrevtab[f][m]) {
328 						for (i = 0; lgrevtab[f][m][i];
329 								i++) {
330 							if (*e++ & HY_BIT2) {
331 								*hyp = (void *)
332 								  ((intptr_t)w |
333 								   i);
334 								hyp++;
335 							}
336 						}
337 						e--;
338 					} else
339 #endif
340 					{
341 						if (*e & HY_BIT2)
342 							*hyp++ = w;
343 					}
344 					w++;
345 					if (hyp > (hyptr + NHYP - 1))
346 						hyp = hyptr + NHYP - 1;
347 				}
348 				return(1);
349 			} else {
350 				e++;
351 				continue;
352 			}
353 		} else
354 			while (*e++)
355 				;
356 	}
357 }
358 
359 
360 int
suffix(void)361 suffix(void)
362 {
363 	register tchar *w;
364 	register const char	*s, *s0;
365 	tchar i;
366 	extern const char	*suftab[];
367 
368 again:
369 	i = cbits(*hyend);
370 	if (i >= 128 || !alph(*hyend))
371 		return(0);
372 	if (i < 'a')
373 		i -= 'A' - 'a';
374 	if ((s0 = suftab[i-'a']) == 0)
375 		return(0);
376 	for (; ; ) {
377 		if ((i = *s0 & 017) == 0)
378 			return(0);
379 		s = s0 + i - 1;
380 		w = hyend - 1;
381 		while (s > s0 && w >= wdstart && (*s & 0177) == maplow(*w)) {
382 			s--;
383 			w--;
384 		}
385 		if (s == s0)
386 			break;
387 		s0 += i;
388 	}
389 	s = s0 + i - 1;
390 	w = hyend;
391 	if (*s0 & HY_BIT)
392 		goto mark;
393 	while (s > s0) {
394 		w--;
395 		if (*s-- & HY_BIT) {
396 mark:
397 			hyend = w - 1;
398 			if (*s0 & 0100)
399 				continue;
400 			if (!chkvow(w))
401 				return(0);
402 			*hyp++ = w;
403 		}
404 	}
405 	if (*s0 & 040)
406 		return(0);
407 	if (exword())
408 		return(1);
409 	goto again;
410 }
411 
412 
413 int
maplow(tchar t)414 maplow(tchar t)
415 {
416 	int	h, i, f;
417 
418 	while (isxfunc(t, CHAR))
419 		t = charout[sbits(t)].ch;
420 	i = cbits(t);
421 	f = fbits(t);
422 	if (!ismot(t) && i < nhcode && (h = hcode[i]) != 0) {
423 		if (h & ~0177)
424 			h = tr2un(h, f);
425 		h = tr2un(h, f);
426 		return(h);
427 	} else
428 #ifdef EUC
429 	if (!ismot(t) && i & ~0177) {
430 		i = tr2un(i, f);
431 		if (i == 0x017F)	/* longs */
432 			i = 's';
433 		if (iswupper(i))
434 			i = towlower(i);
435 	} else
436 #endif	/* EUC */
437 	if (ischar(i) && isupper(i))
438 		i = tolower(i);
439 	return(i);
440 }
441 
442 
443 int
vowel(tchar i)444 vowel(tchar i)
445 {
446 	switch (maplow(i)) {
447 	case 'a':
448 	case 'e':
449 	case 'i':
450 	case 'o':
451 	case 'u':
452 	case 'y':
453 		return(1);
454 	default:
455 		return(0);
456 	}
457 }
458 
459 
460 tchar *
chkvow(tchar * w)461 chkvow(tchar *w)
462 {
463 	while (--w >= wdstart)
464 		if (vowel(*w))
465 			return(w);
466 	return(0);
467 }
468 
469 
470 void
digram(void)471 digram(void)
472 {
473 	register tchar *w;
474 	register int val;
475 	tchar * nhyend, *maxw = 0;
476 	int	maxval;
477 	extern const char	bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
478 
479 	for (w = wdstart; w <= wdend; w++)
480 		if (cbits(*w) & ~0177)
481 			return;
482 
483 again:
484 	if (!(w = chkvow(hyend + 1)))
485 		return;
486 	hyend = w;
487 	if (!(w = chkvow(hyend)))
488 		return;
489 	nhyend = w;
490 	maxval = 0;
491 	w--;
492 	while ((++w < hyend) && (w < (wdend - 1))) {
493 		val = 1;
494 		if (w == wdstart)
495 			val *= dilook('a', *w, bxh);
496 		else if (w == wdstart + 1)
497 			val *= dilook(*(w-1), *w, bxxh);
498 		else
499 			val *= dilook(*(w-1), *w, xxh);
500 		val *= dilook(*w, *(w+1), xhx);
501 		val *= dilook(*(w+1), *(w+2), hxx);
502 		if (val > maxval) {
503 			maxval = val;
504 			maxw = w + 1;
505 		}
506 	}
507 	hyend = nhyend;
508 	if (maxval > thresh)
509 		*hyp++ = maxw;
510 	goto again;
511 }
512 
513 
514 int
dilook(tchar a,tchar b,const char t[26][13])515 dilook(tchar a, tchar b, const char t[26][13])
516 {
517 	register int i, j;
518 
519 	i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
520 	if (!(j & 01))
521 		i >>= 4;
522 	return(i & 017);
523 }
524 
525 void
casehylang(void)526 casehylang(void)
527 {
528 	int	c, i = 0, sz = 0;
529 	char	*path = NULL;
530 	size_t	l;
531 
532 	dicthnj = NULL;
533 	free(hylang);
534 	hylang = NULL;
535 	hyext = 0;
536 	skip(0);
537 	do {
538 		c = getach();
539 		if (i >= sz)
540 			hylang = realloc(hylang, (sz += 8) * sizeof *hylang);
541 		hylang[i++] = c;
542 	} while (c);
543 	if (i == 1) {
544 		free(hylang);
545 		hylang = NULL;
546 		return;
547 	}
548 	if (strchr(hylang, '/') == NULL) {
549 		l = strlen(hylang) + strlen(HYPDIR) + 12;
550 		path = malloc(l);
551 		snprintf(path, l, "%s/hyph_%s.dic", HYPDIR, hylang);
552 	} else {
553 		l = strlen(hylang) + 1;
554 		path = malloc(l);
555 		n_strcpy(path, hylang, l);
556 	}
557 	if ((dicthnj = hnj_hyphen_load(path)) == NULL) {
558 		errprint("Can't load %s", path);
559 		free(hylang);
560 		hylang = NULL;
561 		free(path);
562 		return;
563 	}
564 	free(path);
565 	hyext = 1;
566 }
567 
568 static int
addc(int m,char ** cp,tchar ** wp,int ** wpp,int distance)569 addc(int m, char **cp, tchar **wp, int **wpp, int distance)
570 {
571 	tchar	t;
572 
573 	t = m ? m | sfmask(**wp) : **wp;
574 	m = maplow(t);
575 	if (m > 0 && m <= 0x7f) {
576 		*(*cp)++ = m;
577 		*(*wpp)++ = distance;
578 	} else if (m >= 0x80 && m <= 0x7ff) {
579 		*(*cp)++ = (m >> 6 & 037) | 0300;
580 		*(*wpp)++ = distance;
581 		*(*cp)++ = (m & 077) | 0200;
582 		*(*wpp)++ = -1000;
583 	} else if (m >= 0x800 && m <= 0xffff) {
584 		*(*cp)++ = (m >> 12 & 017) | 0340;
585 		*(*wpp)++ = distance;
586 		*(*cp)++ = (m >> 6 & 077) | 0200;
587 		*(*wpp)++ = -1000;
588 		*(*cp)++ = (m & 077) | 0200;
589 		*(*wpp)++ = -1000;
590 	} else
591 		return 0;
592 	return 1;
593 }
594 
595 static void
hyphenhnj(void)596 hyphenhnj(void)
597 {
598 	tchar	*wp;
599 	char	*cb, *cp, *hb;
600 	int	*wpos, *wpp;
601 	int	i, j, k;
602 
603 	i = 12 * (wdend - wdstart) + 1;
604 	cb = malloc(i * sizeof *cb);
605 	hb = malloc(i * sizeof *hb);
606 	wpos = malloc(i * sizeof *wpos);
607 	cp = cb;
608 	wpp = wpos;
609 	for (wp = wdstart; wp <= wdend; wp++) {
610 #ifndef	NROFF
611 		int m = cbits(*wp);
612 		int f = fbits(*wp);
613 		if (islig(*wp) && lgrevtab && lgrevtab[f] && lgrevtab[f][m]) {
614 			for (i = 0; lgrevtab[f][m][i]; i++) {
615 				if (addc(lgrevtab[f][m][i], &cp, &wp, &wpp,
616 						i ? -i : wp-wdstart) == 0)
617 					goto retn;
618 			}
619 		} else
620 #endif
621 		{
622 			if (addc(0, &cp, &wp, &wpp, wp - wdstart) == 0)
623 				goto retn;
624 		}
625 	}
626 	*cp = '\0';
627 	j = cp - cb;
628 	while (wpp <= &wpos[j])
629 		*wpp++ = -1000;
630 	hnj_hyphen_hyphenate(dicthnj, cb, j, hb);
631 	k = 0;
632 	for (i = 0; i < j; i++) {
633 		if (wpos[i+1] >= 0)
634 			k = wpos[i+1];
635 		if ((hb[i] - '0') & 1 && wpos[i+1] >= -3) {
636 			if (wpos[i+1] >= 0)
637 				*hyp = &wdstart[wpos[i+1]];
638 			else {
639 				*hyp = &wdstart[k];
640 				*hyp = (void *)((intptr_t)*hyp | -wpos[i+1]);
641 			}
642 			if (++hyp > (hyptr + NHYP - 1))
643 				hyp = hyptr + NHYP - 1;
644 		}
645 	}
646 retn:
647 	free(cb);
648 	free(hb);
649 	free(wpos);
650 }
651