1 #ifndef lint
2 static char sccsid[] = "@(#)n8.c	2.1 (CWI) 85/07/18";
3 #endif lint
4 #include	<ctype.h>
5 #include	"tdef.h"
6 #include <sgtty.h>
7 #include "ext.h"
8 #define	HY_BIT	0200	/* stuff in here only works for ascii */
9 
10 /*
11  * troff8.c
12  *
13  * hyphenation
14  */
15 
16 char	hbuf[NHEX];
17 char	*nexth = hbuf;
18 tchar	*hyend;
19 
20 hyphen(wp)
21 	tchar *wp;
22 {
23 	register j;
24 	register tchar *i;
25 
26 	i = wp;
27 	while (punct(cbits(*i++)))
28 		;
29 	if (!alph(cbits(*--i)))
30 		return;
31 	wdstart = i++;
32 	while (alph(cbits(*i++)))
33 		;
34 	hyend = wdend = --i - 1;
35 	while (punct(cbits(*i++)))
36 		;
37 	if (*--i)
38 		return;
39 	if ((wdend - wdstart - 4) < 0)
40 		return;
41 	hyp = hyptr;
42 	*hyp = 0;
43 	hyoff = 2;
44 /*
45 	if (!exword() && !suffix())
46 		digram();
47 */
48 	if (!exword()) {
49 		if (hyalg == ORIGINAL && !suffix())
50 			digram();
51 		if (hyalg == DUTCH)
52 			split(wdstart, wdend);
53 	}
54 	*hyp++ = 0;
55 	if (*hyptr)
56 		for (j = 1; j; ) {
57 			j = 0;
58 			for (hyp = hyptr + 1; *hyp != 0; hyp++) {
59 				if (*(hyp - 1) > *hyp) {
60 					j++;
61 					i = *hyp;
62 					*hyp = *(hyp - 1);
63 					*(hyp - 1) = i;
64 				}
65 			}
66 		}
67 }
68 
69 
70 punct(i)
71 {
72 	if (!i || alph(i))
73 		return(0);
74 	else
75 		return(1);
76 }
77 
78 
79 alph(i)
80 {
81 	if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z')
82 		return(1);
83 	else
84 		return(0);
85 }
86 
87 /*
88  * set the hyphenation algorithm
89  *
90  * jna
91  */
92 
93 caseha()
94 {	register i;
95 
96 	if ( skip())
97 		i = hyalg1;
98 	else {
99 		noscale++;
100 		noscale = 0;
101 		i = max(atoi(), 0);
102 		if (nonumb)
103 			return;
104 		if (i > MAXDIALECTS) {
105 			errprint("Unknown dialect %d", i);
106 			return;
107 		}
108 	}
109 	hyalg1 = hyalg;
110 	hyalg = i;
111 	if( hyalg == DUTCH)
112 		thresh = DUTCH_THRESH;
113 }
114 
115 caseht()
116 {
117 	switch(hyalg) {
118 		case ORIGINAL:
119 			thresh = THRESH;
120 			break;
121 		case DUTCH:
122 			thresh = DUTCH_THRESH;
123 			break;
124 	}
125 	if (skip())
126 		return;
127 	noscale++;
128 	if (hyalg == DUTCH)
129 		thresh = max(atoi(), 1);
130 	else
131 		thresh = atoi();
132 	noscale = 0;
133 }
134 
135 
136 casehw()
137 {
138 	register i, k;
139 	register char	*j;
140 	tchar t;
141 
142 	k = 0;
143 	while (!skip()) {
144 		if ((j = nexth) >= (hbuf + NHEX - 2))
145 			goto full;
146 		for (; ; ) {
147 			if (ismot(t = getch()))
148 				continue;
149 			i = cbits(t);
150 			if (i == ' ' || i == '\n') {
151 				*j++ = 0;
152 				nexth = j;
153 				*j = 0;
154 				if (i == ' ')
155 					break;
156 				else
157 					return;
158 			}
159 			if (i == '-') {
160 				k = HY_BIT;
161 				continue;
162 			}
163 			*j++ = maplow(i) | k;
164 			k = 0;
165 			if (j >= (hbuf + NHEX - 2))
166 				goto full;
167 		}
168 	}
169 	return;
170 full:
171 	errprint("exception word list full.");
172 	*nexth = 0;
173 }
174 
175 
176 exword()
177 {
178 	register tchar *w;
179 	register char	*e;
180 	char	*save;
181 
182 	e = hbuf;
183 	while (1) {
184 		save = e;
185 		if (*e == 0)
186 			return(0);
187 		w = wdstart;
188 		while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
189 			e++;
190 			w++;
191 		};
192 		if (!*e) {
193 			if (w-1 == hyend ||
194 			   (hyalg == ORIGINAL /* s-extension only in original */
195 				&& (w == wdend && maplow(cbits(*w)) == 's'))) {
196 				w = wdstart;
197 				for (e = save; *e; e++) {
198 					if (*e & HY_BIT)
199 						*hyp++ = w;
200 					if (hyp > (hyptr + NHYP - 1))
201 						hyp = hyptr + NHYP - 1;
202 					w++;
203 				}
204 				return(1);
205 			} else {
206 				e++;
207 				continue;
208 			}
209 		} else
210 			while (*e++)
211 				;
212 	}
213 }
214 
215 
216 suffix()
217 {
218 	register tchar *w;
219 	register char	*s, *s0;
220 	tchar i;
221 	extern char	*suftab[];
222 	extern tchar *chkvow();
223 
224 again:
225 	if (!alph(cbits(i = cbits(*hyend))))
226 		return(0);
227 	if (i < 'a')
228 		i -= 'A' - 'a';
229 	if ((s0 = suftab[i-'a']) == 0)
230 		return(0);
231 	for (; ; ) {
232 		if ((i = *s0 & 017) == 0)
233 			return(0);
234 		s = s0 + i - 1;
235 		w = hyend - 1;
236 		while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
237 			s--;
238 			w--;
239 		}
240 		if (s == s0)
241 			break;
242 		s0 += i;
243 	}
244 	s = s0 + i - 1;
245 	w = hyend;
246 	if (*s0 & HY_BIT)
247 		goto mark;
248 	while (s > s0) {
249 		w--;
250 		if (*s-- & HY_BIT) {
251 mark:
252 			hyend = w - 1;
253 			if (*s0 & 0100)
254 				continue;
255 			if (!chkvow(w))
256 				return(0);
257 			*hyp++ = w;
258 		}
259 	}
260 	if (*s0 & 040)
261 		return(0);
262 	if (exword())
263 		return(1);
264 	goto again;
265 }
266 
267 
268 maplow(i)
269 register int	i;
270 {
271 	if (isupper(i))
272 		i = tolower(i);
273 	return(i);
274 }
275 
276 
277 vowel(i)
278 int	i;
279 {
280 	switch (maplow(i)) {
281 	case 'a':
282 	case 'e':
283 	case 'i':
284 	case 'o':
285 	case 'u':
286 	case 'y':
287 		return(1);
288 	default:
289 		return(0);
290 	}
291 }
292 
293 
294 tchar *chkvow(w)
295 tchar *w;
296 {
297 	while (--w >= wdstart)
298 		if (vowel(cbits(*w)))
299 			return(w);
300 	return(0);
301 }
302 
303 
304 digram()
305 {
306 	register tchar *w;
307 	register val;
308 	tchar * nhyend, *maxw;
309 	int	maxval;
310 	extern char	bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
311 
312 again:
313 	if (!(w = chkvow(hyend + 1)))
314 		return;
315 	hyend = w;
316 	if (!(w = chkvow(hyend)))
317 		return;
318 	nhyend = w;
319 	maxval = 0;
320 	w--;
321 	while ((++w < hyend) && (w < (wdend - 1))) {
322 		val = 1;
323 		if (w == wdstart)
324 			val *= dilook('a', cbits(*w), bxh);
325 		else if (w == wdstart + 1)
326 			val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
327 		else
328 			val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
329 		val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
330 		val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
331 		if (val > maxval) {
332 			maxval = val;
333 			maxw = w + 1;
334 		}
335 	}
336 	hyend = nhyend;
337 	if (maxval > thresh)
338 		*hyp++ = maxw;
339 	goto again;
340 }
341 
342 
343 dilook(a, b, t)
344 int	a, b;
345 char	t[26][13];
346 {
347 	register i, j;
348 
349 	i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
350 	if (!(j & 01))
351 		i >>= 4;
352 	return(i & 017);
353 }
354 
355 
356 /*
357  * All these jazz is to have the dialect dutch being hyphenated
358  * It first appeared in the dutch version of troff (nltroff), due to
359  * teus hagen.
360  * The original program has converted from Algol60 to C by, I think
361  * bert ijsselstein.
362  * It's a mess, anyway.
363  *
364  * Planted in this version of troff by jaap akkerhuis (jna).
365  *
366  * Note that this is licensed software!
367  *
368  */
369 
370 #ifndef	NULL
371 #define NULL	0
372 #endif
373 #define MAXLETT 50  /* at most the first MAXLETT characters of a word
374                        will be processed */
375 #define MAXSYLL 20  /* at most the first MAXSYLL syllables of a word
376 		       will be processed */
377 
378 #define LETTEREE 27
379 #define LETTERJ 41
380 #define LETTERV 55
381 #define LETTERX 57
382 #define LETTERZ 58
383 
384 /*
385  * split(..) needs to be cleaned up, could install hjt's version...
386  */
387 
388 split( aword, anend ) register tchar *aword, *anend;
389 {	register tchar *place;
390 	extern tchar *bestsplit1();
391 
392 	place = bestsplit1( aword, anend );
393 	if( place != (tchar *) NULL )
394 	{	*hyp++ = place;
395 		if( place - aword > thresh && anend - place > thresh )
396 			split( aword, place+1 );
397 		if( anend - place > thresh && place - aword > thresh )
398 			split( place, anend );
399 	}
400 }
401 
402 tchar *
403 bestsplit1( tosplit , aend )
404 tchar *tosplit, *aend;
405 {
406 /* This function determines the "best" place to split into two parts the
407  * Dutch word contained in a string of <size> characters which starts at
408  * the address <tosplit> .
409  * The input characters should be in ASCII code .
410  * The function returns as value the number of characters of the first
411  * of the two parts .
412  * If the returned value exceeds the character count of the line the
413  * user may try to invoke bestsplit1 again but now with <size> equal to
414  * the returned value plus one .
415  * The algorithm is adapted from the Mathematical Centre report NR 28/72,
416  * "BESTESPLITS1, EEN PROCEDURE VOOR HET AUTOMATISCH AFBREKEN VAN NEDER-
417  * LANDSE WOORDEN" , which has been written by J.C. VAN VLIET.
418  */
419 	extern char translate[], comprimation[][14], consonant[][23],
420 		prefix[][3] ;
421 	short woord[ MAXLETT +1], reference[ MAXLETT +1], vowel[ MAXSYLL ],
422             turn[ MAXSYLL ] , letter, nextlett, vowel1, vowel2,
423             l0, l1, l2 ;
424         short numlett, numsyll, turnindex, differ, start1, start2, stop,
425 	    level, bp ;
426 	register int i, j, help ;
427 	short size = aend - tosplit + 1;
428 
429 	/* translate into bestsplit code : */
430 	woord[0] = 0 ;
431 	i = 1 ;
432 	help = -1 ;
433 	while ( (++help < size) && (i <  MAXLETT ) ) {
434 		reference[i] = i;
435 		woord[i++] = translate[maplow(cbits(tosplit[help])) - 'a'] ;
436 	}
437 	/* end of translation : */
438 
439 	numlett = i ;
440 	if ( numlett < 4 ) goto nosplit ;
441 	i = j = 1 ;
442 	help = 0 ;
443 	while ( i < numlett ) {
444 		letter = woord[i] ;
445  		/* comprimation of vowels : */
446  		if ( (25 < letter) && (letter < 41) ) {
447  			nextlett = woord[i+1] ;
448  			if ( (28 < nextlett) && (nextlett < 43) ) {
449  				letter = comprimation[letter-26][nextlett-29] ;
450  				if (letter > 0) {
451  					i++ ;
452  					help++ ;
453  					woord[i] = letter ;
454 					continue ;
455  				}
456  			}
457  		} /* end of comprimation */
458 
459  		woord[j] = woord[i] ;
460  		j++ ;
461  		i++ ;
462  		reference[j] += help ;
463 	}
464 	woord[j] = woord[numlett] ;
465 	numlett = j ;
466 
467 
468 	/* determination of the number of syllables */
469 	j = -1 ;
470 	i = 0 ;
471 	while ( ( ++i <= numlett ) && ( j < MAXSYLL ) ) {
472 		if (woord[i] < 39) {
473 			j++ ;
474 			vowel[j] = i ;
475 		}
476 	}
477 	numsyll = j+1 ;
478 
479 	if ( numsyll < 2 ) goto nosplit ;
480 	turnindex = 0 ;
481 	differ = 1 ;
482 	start1 = 0 ;
483 	start2 = numsyll - 1 ;
484 	stop = start2 ;
485 
486 	while ( turnindex < stop ) {
487 		vowel1 = vowel[stop] ;
488 		for ( i = stop - 1 ; i >= 0 ; i-- ) {
489 			vowel2 = vowel[i] ;
490 			if ( vowel1 - vowel2 == differ) {
491 				turn[turnindex] = i ;
492 				turnindex++ ;
493 			}
494 			vowel1 = vowel2 ;
495 		}
496 		if ( differ == 1 ) start1 = turnindex ;
497 		else if ( differ == 2 ) start2 = turnindex ;
498 		differ++ ;
499 	}
500 
501 	turnindex = start2 - 1 ;
502 	stop = numsyll - 1 ;
503 	level = 1 ;
504 
505 next :
506 	turnindex++ ;
507 	if ( turnindex >= stop ) {
508 		if ( level == 1 ) turnindex = start2 ;
509 		else if ( level == 2 ) {
510 				turnindex = start1 ;
511 				stop = start2 ;
512 			}
513 			else goto nosplit ;
514 		level++ ;
515 		if ( turnindex >= stop ) goto next ;
516 	}
517 	j = turn[turnindex] ;
518 	vowel1 = vowel[j] ;
519 	vowel2 = vowel[j+1] ;
520 
521 	switch ( level ) {
522 	case 1 :
523 		for ( j = vowel2-2 ; j >= vowel1+1 ; j-- ) {
524 			help = consonant[woord[j]-39][woord[j+1]-39] ;
525 			if ( abs(help) == 1 ) goto splitafterj ;
526 			if ( help < 0 ) goto next ;
527 		}
528 		break ;   /* end of first phase */
529 
530 	case 2 :
531 		for ( i = vowel2-2 ; i >= vowel1+1 ; i-- ) {
532 			help = consonant[woord[i]-39][woord[i+1]-39] ;
533 			if ( abs(help) == 2 ) {
534 				j = i ;
535 				goto splitafterj ;
536 			}
537 			if ( abs(help) == 3 ) {
538 				if ( i == vowel1+1 ) {
539 					j = vowel1 ;
540 					goto splitafterj ;
541 				}
542 				help = abs(consonant[woord[i-1]-39][woord[i]-39]) ;
543 				if ( help == 2 ) {
544 					j = i - 1 ;
545 					goto splitafterj ;
546 				}
547 				if ( help == 3 ) {
548 					j = i - 2 ;
549 					goto splitafterj ;
550 				}
551 			}
552 			else if ( ( abs(help) == 4 ) &&
553 						( i == vowel2-2 ) ) {
554 				j = i ;
555 				goto splitafterj ;
556 			}
557 			if ( help < 0 ) goto next ;
558 		}
559 		break ;   /* end of second phase */
560 
561 	case 3 :
562 		j = vowel1 ;
563 		help = woord[j+1] ;
564 		if ( (help == LETTERJ) || (help == LETTERV) ||
565 				  (help == LETTERZ) ) goto splitafterj ;
566 		if ( help == LETTERX ) goto next ;
567 		l1 = woord[j] ;
568 		if ( l1 == LETTEREE ) goto next ;
569 		if ( ( l1 > 24 ) && ( l1 < 29 ) ) {
570 			j++ ;
571 			goto splitafterj ;
572 		}
573 		l0 = woord[j-1] ;
574 		l2 = woord[j+1] ;
575 		for ( i = 0 ; i < 7 ; i++ )
576 			if ( ( l0 == prefix[i][0] ) &&
577 			     ( l1 == prefix[i][1] ) &&
578 			     ( l2 == prefix[i][2] ) ) goto next ;
579 		goto splitafterj ;
580 		break ;   /* end of third phase */
581 
582 	}
583 
584 
585 	goto next ;
586 
587 splitafterj :
588 	bp = reference[j+1] - 1 ;
589 	if((bp < size-1) && (bp > 0))
590 		goto away;
591 	else
592 		goto next;
593 
594 nosplit :
595 	bp = 0 ;
596 	level = 4 ;
597 away :
598 	return(bp == 0? (tchar *) NULL : tosplit+bp) ;
599 }
600