1 #ifndef lint 2 static char sccsid[] = "@(#)n8.c 2.1 (CWI) 85/07/18"; 3 #endif lint 4 #include <ctype.h> 5 #include "tdef.h" 6 #include <sgtty.h> 7 #include "ext.h" 8 #define HY_BIT 0200 /* stuff in here only works for ascii */ 9 10 /* 11 * troff8.c 12 * 13 * hyphenation 14 */ 15 16 char hbuf[NHEX]; 17 char *nexth = hbuf; 18 tchar *hyend; 19 20 hyphen(wp) 21 tchar *wp; 22 { 23 register j; 24 register tchar *i; 25 26 i = wp; 27 while (punct(cbits(*i++))) 28 ; 29 if (!alph(cbits(*--i))) 30 return; 31 wdstart = i++; 32 while (alph(cbits(*i++))) 33 ; 34 hyend = wdend = --i - 1; 35 while (punct(cbits(*i++))) 36 ; 37 if (*--i) 38 return; 39 if ((wdend - wdstart - 4) < 0) 40 return; 41 hyp = hyptr; 42 *hyp = 0; 43 hyoff = 2; 44 /* 45 if (!exword() && !suffix()) 46 digram(); 47 */ 48 if (!exword()) { 49 if (hyalg == ORIGINAL && !suffix()) 50 digram(); 51 if (hyalg == DUTCH) 52 split(wdstart, wdend); 53 } 54 *hyp++ = 0; 55 if (*hyptr) 56 for (j = 1; j; ) { 57 j = 0; 58 for (hyp = hyptr + 1; *hyp != 0; hyp++) { 59 if (*(hyp - 1) > *hyp) { 60 j++; 61 i = *hyp; 62 *hyp = *(hyp - 1); 63 *(hyp - 1) = i; 64 } 65 } 66 } 67 } 68 69 70 punct(i) 71 { 72 if (!i || alph(i)) 73 return(0); 74 else 75 return(1); 76 } 77 78 79 alph(i) 80 { 81 if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z') 82 return(1); 83 else 84 return(0); 85 } 86 87 /* 88 * set the hyphenation algorithm 89 * 90 * jna 91 */ 92 93 caseha() 94 { register i; 95 96 if ( skip()) 97 i = hyalg1; 98 else { 99 noscale++; 100 noscale = 0; 101 i = max(atoi(), 0); 102 if (nonumb) 103 return; 104 if (i > MAXDIALECTS) { 105 errprint("Unknown dialect %d", i); 106 return; 107 } 108 } 109 hyalg1 = hyalg; 110 hyalg = i; 111 if( hyalg == DUTCH) 112 thresh = DUTCH_THRESH; 113 } 114 115 caseht() 116 { 117 switch(hyalg) { 118 case ORIGINAL: 119 thresh = THRESH; 120 break; 121 case DUTCH: 122 thresh = DUTCH_THRESH; 123 break; 124 } 125 if (skip()) 126 return; 127 noscale++; 128 if (hyalg == DUTCH) 129 thresh = max(atoi(), 1); 130 else 131 thresh = atoi(); 132 noscale = 0; 133 } 134 135 136 casehw() 137 { 138 register i, k; 139 register char *j; 140 tchar t; 141 142 k = 0; 143 while (!skip()) { 144 if ((j = nexth) >= (hbuf + NHEX - 2)) 145 goto full; 146 for (; ; ) { 147 if (ismot(t = getch())) 148 continue; 149 i = cbits(t); 150 if (i == ' ' || i == '\n') { 151 *j++ = 0; 152 nexth = j; 153 *j = 0; 154 if (i == ' ') 155 break; 156 else 157 return; 158 } 159 if (i == '-') { 160 k = HY_BIT; 161 continue; 162 } 163 *j++ = maplow(i) | k; 164 k = 0; 165 if (j >= (hbuf + NHEX - 2)) 166 goto full; 167 } 168 } 169 return; 170 full: 171 errprint("exception word list full."); 172 *nexth = 0; 173 } 174 175 176 exword() 177 { 178 register tchar *w; 179 register char *e; 180 char *save; 181 182 e = hbuf; 183 while (1) { 184 save = e; 185 if (*e == 0) 186 return(0); 187 w = wdstart; 188 while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) { 189 e++; 190 w++; 191 }; 192 if (!*e) { 193 if (w-1 == hyend || 194 (hyalg == ORIGINAL /* s-extension only in original */ 195 && (w == wdend && maplow(cbits(*w)) == 's'))) { 196 w = wdstart; 197 for (e = save; *e; e++) { 198 if (*e & HY_BIT) 199 *hyp++ = w; 200 if (hyp > (hyptr + NHYP - 1)) 201 hyp = hyptr + NHYP - 1; 202 w++; 203 } 204 return(1); 205 } else { 206 e++; 207 continue; 208 } 209 } else 210 while (*e++) 211 ; 212 } 213 } 214 215 216 suffix() 217 { 218 register tchar *w; 219 register char *s, *s0; 220 tchar i; 221 extern char *suftab[]; 222 extern tchar *chkvow(); 223 224 again: 225 if (!alph(cbits(i = cbits(*hyend)))) 226 return(0); 227 if (i < 'a') 228 i -= 'A' - 'a'; 229 if ((s0 = suftab[i-'a']) == 0) 230 return(0); 231 for (; ; ) { 232 if ((i = *s0 & 017) == 0) 233 return(0); 234 s = s0 + i - 1; 235 w = hyend - 1; 236 while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) { 237 s--; 238 w--; 239 } 240 if (s == s0) 241 break; 242 s0 += i; 243 } 244 s = s0 + i - 1; 245 w = hyend; 246 if (*s0 & HY_BIT) 247 goto mark; 248 while (s > s0) { 249 w--; 250 if (*s-- & HY_BIT) { 251 mark: 252 hyend = w - 1; 253 if (*s0 & 0100) 254 continue; 255 if (!chkvow(w)) 256 return(0); 257 *hyp++ = w; 258 } 259 } 260 if (*s0 & 040) 261 return(0); 262 if (exword()) 263 return(1); 264 goto again; 265 } 266 267 268 maplow(i) 269 register int i; 270 { 271 if (isupper(i)) 272 i = tolower(i); 273 return(i); 274 } 275 276 277 vowel(i) 278 int i; 279 { 280 switch (maplow(i)) { 281 case 'a': 282 case 'e': 283 case 'i': 284 case 'o': 285 case 'u': 286 case 'y': 287 return(1); 288 default: 289 return(0); 290 } 291 } 292 293 294 tchar *chkvow(w) 295 tchar *w; 296 { 297 while (--w >= wdstart) 298 if (vowel(cbits(*w))) 299 return(w); 300 return(0); 301 } 302 303 304 digram() 305 { 306 register tchar *w; 307 register val; 308 tchar * nhyend, *maxw; 309 int maxval; 310 extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13]; 311 312 again: 313 if (!(w = chkvow(hyend + 1))) 314 return; 315 hyend = w; 316 if (!(w = chkvow(hyend))) 317 return; 318 nhyend = w; 319 maxval = 0; 320 w--; 321 while ((++w < hyend) && (w < (wdend - 1))) { 322 val = 1; 323 if (w == wdstart) 324 val *= dilook('a', cbits(*w), bxh); 325 else if (w == wdstart + 1) 326 val *= dilook(cbits(*(w-1)), cbits(*w), bxxh); 327 else 328 val *= dilook(cbits(*(w-1)), cbits(*w), xxh); 329 val *= dilook(cbits(*w), cbits(*(w+1)), xhx); 330 val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx); 331 if (val > maxval) { 332 maxval = val; 333 maxw = w + 1; 334 } 335 } 336 hyend = nhyend; 337 if (maxval > thresh) 338 *hyp++ = maxw; 339 goto again; 340 } 341 342 343 dilook(a, b, t) 344 int a, b; 345 char t[26][13]; 346 { 347 register i, j; 348 349 i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2]; 350 if (!(j & 01)) 351 i >>= 4; 352 return(i & 017); 353 } 354 355 356 /* 357 * All these jazz is to have the dialect dutch being hyphenated 358 * It first appeared in the dutch version of troff (nltroff), due to 359 * teus hagen. 360 * The original program has converted from Algol60 to C by, I think 361 * bert ijsselstein. 362 * It's a mess, anyway. 363 * 364 * Planted in this version of troff by jaap akkerhuis (jna). 365 * 366 * Note that this is licensed software! 367 * 368 */ 369 370 #ifndef NULL 371 #define NULL 0 372 #endif 373 #define MAXLETT 50 /* at most the first MAXLETT characters of a word 374 will be processed */ 375 #define MAXSYLL 20 /* at most the first MAXSYLL syllables of a word 376 will be processed */ 377 378 #define LETTEREE 27 379 #define LETTERJ 41 380 #define LETTERV 55 381 #define LETTERX 57 382 #define LETTERZ 58 383 384 /* 385 * split(..) needs to be cleaned up, could install hjt's version... 386 */ 387 388 split( aword, anend ) register tchar *aword, *anend; 389 { register tchar *place; 390 extern tchar *bestsplit1(); 391 392 place = bestsplit1( aword, anend ); 393 if( place != (tchar *) NULL ) 394 { *hyp++ = place; 395 if( place - aword > thresh && anend - place > thresh ) 396 split( aword, place+1 ); 397 if( anend - place > thresh && place - aword > thresh ) 398 split( place, anend ); 399 } 400 } 401 402 tchar * 403 bestsplit1( tosplit , aend ) 404 tchar *tosplit, *aend; 405 { 406 /* This function determines the "best" place to split into two parts the 407 * Dutch word contained in a string of <size> characters which starts at 408 * the address <tosplit> . 409 * The input characters should be in ASCII code . 410 * The function returns as value the number of characters of the first 411 * of the two parts . 412 * If the returned value exceeds the character count of the line the 413 * user may try to invoke bestsplit1 again but now with <size> equal to 414 * the returned value plus one . 415 * The algorithm is adapted from the Mathematical Centre report NR 28/72, 416 * "BESTESPLITS1, EEN PROCEDURE VOOR HET AUTOMATISCH AFBREKEN VAN NEDER- 417 * LANDSE WOORDEN" , which has been written by J.C. VAN VLIET. 418 */ 419 extern char translate[], comprimation[][14], consonant[][23], 420 prefix[][3] ; 421 short woord[ MAXLETT +1], reference[ MAXLETT +1], vowel[ MAXSYLL ], 422 turn[ MAXSYLL ] , letter, nextlett, vowel1, vowel2, 423 l0, l1, l2 ; 424 short numlett, numsyll, turnindex, differ, start1, start2, stop, 425 level, bp ; 426 register int i, j, help ; 427 short size = aend - tosplit + 1; 428 429 /* translate into bestsplit code : */ 430 woord[0] = 0 ; 431 i = 1 ; 432 help = -1 ; 433 while ( (++help < size) && (i < MAXLETT ) ) { 434 reference[i] = i; 435 woord[i++] = translate[maplow(cbits(tosplit[help])) - 'a'] ; 436 } 437 /* end of translation : */ 438 439 numlett = i ; 440 if ( numlett < 4 ) goto nosplit ; 441 i = j = 1 ; 442 help = 0 ; 443 while ( i < numlett ) { 444 letter = woord[i] ; 445 /* comprimation of vowels : */ 446 if ( (25 < letter) && (letter < 41) ) { 447 nextlett = woord[i+1] ; 448 if ( (28 < nextlett) && (nextlett < 43) ) { 449 letter = comprimation[letter-26][nextlett-29] ; 450 if (letter > 0) { 451 i++ ; 452 help++ ; 453 woord[i] = letter ; 454 continue ; 455 } 456 } 457 } /* end of comprimation */ 458 459 woord[j] = woord[i] ; 460 j++ ; 461 i++ ; 462 reference[j] += help ; 463 } 464 woord[j] = woord[numlett] ; 465 numlett = j ; 466 467 468 /* determination of the number of syllables */ 469 j = -1 ; 470 i = 0 ; 471 while ( ( ++i <= numlett ) && ( j < MAXSYLL ) ) { 472 if (woord[i] < 39) { 473 j++ ; 474 vowel[j] = i ; 475 } 476 } 477 numsyll = j+1 ; 478 479 if ( numsyll < 2 ) goto nosplit ; 480 turnindex = 0 ; 481 differ = 1 ; 482 start1 = 0 ; 483 start2 = numsyll - 1 ; 484 stop = start2 ; 485 486 while ( turnindex < stop ) { 487 vowel1 = vowel[stop] ; 488 for ( i = stop - 1 ; i >= 0 ; i-- ) { 489 vowel2 = vowel[i] ; 490 if ( vowel1 - vowel2 == differ) { 491 turn[turnindex] = i ; 492 turnindex++ ; 493 } 494 vowel1 = vowel2 ; 495 } 496 if ( differ == 1 ) start1 = turnindex ; 497 else if ( differ == 2 ) start2 = turnindex ; 498 differ++ ; 499 } 500 501 turnindex = start2 - 1 ; 502 stop = numsyll - 1 ; 503 level = 1 ; 504 505 next : 506 turnindex++ ; 507 if ( turnindex >= stop ) { 508 if ( level == 1 ) turnindex = start2 ; 509 else if ( level == 2 ) { 510 turnindex = start1 ; 511 stop = start2 ; 512 } 513 else goto nosplit ; 514 level++ ; 515 if ( turnindex >= stop ) goto next ; 516 } 517 j = turn[turnindex] ; 518 vowel1 = vowel[j] ; 519 vowel2 = vowel[j+1] ; 520 521 switch ( level ) { 522 case 1 : 523 for ( j = vowel2-2 ; j >= vowel1+1 ; j-- ) { 524 help = consonant[woord[j]-39][woord[j+1]-39] ; 525 if ( abs(help) == 1 ) goto splitafterj ; 526 if ( help < 0 ) goto next ; 527 } 528 break ; /* end of first phase */ 529 530 case 2 : 531 for ( i = vowel2-2 ; i >= vowel1+1 ; i-- ) { 532 help = consonant[woord[i]-39][woord[i+1]-39] ; 533 if ( abs(help) == 2 ) { 534 j = i ; 535 goto splitafterj ; 536 } 537 if ( abs(help) == 3 ) { 538 if ( i == vowel1+1 ) { 539 j = vowel1 ; 540 goto splitafterj ; 541 } 542 help = abs(consonant[woord[i-1]-39][woord[i]-39]) ; 543 if ( help == 2 ) { 544 j = i - 1 ; 545 goto splitafterj ; 546 } 547 if ( help == 3 ) { 548 j = i - 2 ; 549 goto splitafterj ; 550 } 551 } 552 else if ( ( abs(help) == 4 ) && 553 ( i == vowel2-2 ) ) { 554 j = i ; 555 goto splitafterj ; 556 } 557 if ( help < 0 ) goto next ; 558 } 559 break ; /* end of second phase */ 560 561 case 3 : 562 j = vowel1 ; 563 help = woord[j+1] ; 564 if ( (help == LETTERJ) || (help == LETTERV) || 565 (help == LETTERZ) ) goto splitafterj ; 566 if ( help == LETTERX ) goto next ; 567 l1 = woord[j] ; 568 if ( l1 == LETTEREE ) goto next ; 569 if ( ( l1 > 24 ) && ( l1 < 29 ) ) { 570 j++ ; 571 goto splitafterj ; 572 } 573 l0 = woord[j-1] ; 574 l2 = woord[j+1] ; 575 for ( i = 0 ; i < 7 ; i++ ) 576 if ( ( l0 == prefix[i][0] ) && 577 ( l1 == prefix[i][1] ) && 578 ( l2 == prefix[i][2] ) ) goto next ; 579 goto splitafterj ; 580 break ; /* end of third phase */ 581 582 } 583 584 585 goto next ; 586 587 splitafterj : 588 bp = reference[j+1] - 1 ; 589 if((bp < size-1) && (bp > 0)) 590 goto away; 591 else 592 goto next; 593 594 nosplit : 595 bp = 0 ; 596 level = 4 ; 597 away : 598 return(bp == 0? (tchar *) NULL : tosplit+bp) ; 599 } 600