1% hyphen.w 2% 3% Libhnj is dual licensed under LGPL and MPL. Boilerplate for both 4% licenses follows. 5% 6% 7% LibHnj - a library for high quality hyphenation and justification 8% Copyright (C) 1998 Raph Levien, 9% (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org), 10% (C) 2001 Peter Novodvorsky (nidd@@cs.msu.su) 11% 12% This library is free software; you can redistribute it and/or 13% modify it under the terms of the GNU Library General Public 14% License as published by the Free Software Foundation; either 15% version 2 of the License, or (at your option) any later version. 16% 17% This library is distributed in the hope that it will be useful, 18% but WITHOUT ANY WARRANTY; without even the implied warranty of 19% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20% Library General Public License for more details. 21% 22% You should have received a copy of the GNU Library General Public 23% License along with this library; if not, write to the 24% Free Software Foundation, Inc., 59 Temple Place - Suite 330, 25% Boston, MA 02111-1307 USA. 26% 27% 28% 29% The contents of this file are subject to the Mozilla Public License 30% Version 1.0 (the "MPL"); you may not use this file except in 31% compliance with the MPL. You may obtain a copy of the MPL at 32% http://www.mozilla.org/MPL/ 33% 34% Software distributed under the MPL is distributed on an "AS IS" basis, 35% WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL 36% for the specific language governing rights and limitations under the 37% MPL. 38 39 40@ @c 41 42 43#include "ptexlib.h" 44 45#include <stdlib.h> /* for NULL, malloc */ 46#include <stdio.h> /* for fprintf */ 47#include <string.h> /* for strdup */ 48#include <stdlib.h> /* for malloc used by substring inclusion */ 49 50#define MAXPATHS 40960 51 52#ifdef UNX 53# include <unistd.h> /* for exit */ 54#endif 55 56#include <kpathsea/c-ctype.h> 57 58#define noVERBOSE 59 60#include "lang/hnjalloc.h" 61 62@ TODO: should be moved to separate library 63 64@c 65static unsigned char *hnj_strdup(const unsigned char *s) 66{ 67 unsigned char *new; 68 size_t l; 69 70 l = strlen((const char *) s); 71 new = hnj_malloc((int) l + 1); 72 memcpy(new, s, l); 73 new[l] = 0; 74 return new; 75} 76 77@* Type definitions. 78 79@ a little bit of a hash table implementation. This simply maps strings 80 to state numbers 81 82@c 83typedef struct _HashTab HashTab; 84typedef struct _HashEntry HashEntry; 85typedef struct _HashIter HashIter; 86typedef union _HashVal HashVal; 87 88/* A cheap, but effective, hack. */ 89#define HASH_SIZE 31627 90 91struct _HashTab { 92 HashEntry *entries[HASH_SIZE]; 93}; 94 95union _HashVal { 96 int state; 97 char *hyppat; 98}; 99 100struct _HashEntry { 101 HashEntry *next; 102 unsigned char *key; 103 HashVal u; 104}; 105 106struct _HashIter { 107 HashEntry **e; 108 HashEntry *cur; 109 int ndx; 110}; 111 112@ State machine 113 114@c 115typedef struct _HyphenState HyphenState; 116typedef struct _HyphenTrans HyphenTrans; 117#define MAX_CHARS 256 118#define MAX_NAME 20 119 120struct _HyphenDict { 121 int num_states; 122 int pat_length; 123 char cset[MAX_NAME]; 124 HyphenState *states; 125 HashTab *patterns; 126 HashTab *merged; 127 HashTab *state_num; 128}; 129 130struct _HyphenState { 131 char *match; 132 /*char *repl; */ 133 /*signed char replindex; */ 134 /*signed char replcut; */ 135 int fallback_state; 136 int num_trans; 137 HyphenTrans *trans; 138}; 139 140struct _HyphenTrans { 141 int uni_ch; 142 int new_state; 143}; 144 145 146@ Combine two right-aligned number patterns, 04000 + 020 becomes 04020 147 148@c 149static char *combine(char *expr, const char *subexpr) 150{ 151 size_t l1 = strlen(expr); 152 size_t l2 = strlen(subexpr); 153 size_t off = l1 - l2; 154 unsigned j; 155 /* this works also for utf8 sequences because the substring is identical 156 to the last substring-length bytes of expr except for the (single byte) 157 hyphenation encoders 158 */ 159 for (j = 0; j < l2; j++) { 160 if (expr[off + j] < subexpr[j]) 161 expr[off + j] = subexpr[j]; 162 } 163 return expr; 164} 165 166 167@ ORIGINAL CODE 168@c 169static HashIter *new_HashIter(HashTab * h) 170{ 171 HashIter *i = hnj_malloc(sizeof(HashIter)); 172 i->e = h->entries; 173 i->cur = NULL; 174 i->ndx = -1; 175 return i; 176} 177 178 179static int nextHashStealPattern(HashIter * i, unsigned char **word, char **pattern) 180{ 181 while (i->cur == NULL) { 182 if (i->ndx >= HASH_SIZE - 1) 183 return 0; 184 i->cur = i->e[++i->ndx]; 185 } 186 *word = i->cur->key; 187 *pattern = i->cur->u.hyppat; 188 i->cur->u.hyppat = NULL; 189 i->cur = i->cur->next; 190 return 1; 191} 192 193 194static int nextHash(HashIter * i, unsigned char **word) 195{ 196 while (i->cur == NULL) { 197 if (i->ndx >= HASH_SIZE - 1) 198 return 0; 199 i->cur = i->e[++i->ndx]; 200 } 201 *word = i->cur->key; 202 i->cur = i->cur->next; 203 return 1; 204} 205 206 207static int eachHash(HashIter * i, unsigned char **word, char **pattern) 208{ 209 while (i->cur == NULL) { 210 if (i->ndx >= HASH_SIZE - 1) 211 return 0; 212 i->cur = i->e[++i->ndx]; 213 } 214 *word = i->cur->key; 215 *pattern = i->cur->u.hyppat; 216 i->cur = i->cur->next; 217 return 1; 218} 219 220 221static void delete_HashIter(HashIter * i) 222{ 223 hnj_free(i); 224} 225 226 227@ a |char*| hash function from ASU - adapted from Gtk+ 228 229@c 230static unsigned int hnj_string_hash(const unsigned char *s) 231{ 232 const unsigned char *p; 233 unsigned int h = 0, g; 234 235 for (p = s; *p != '\0'; p += 1) { 236 h = (h << 4) + *p; 237 if ((g = (h & 0xf0000000))) { 238 h = h ^ (g >> 24); 239 h = h ^ g; 240 } 241 } 242 return h /* \% M */ ; 243} 244 245 246@ assumes that key is not already present! 247 248@c 249static void state_insert(HashTab * hashtab, unsigned char *key, int state) 250{ 251 int i; 252 HashEntry *e; 253 254 i = (int) (hnj_string_hash(key) % HASH_SIZE); 255 e = hnj_malloc(sizeof(HashEntry)); 256 e->next = hashtab->entries[i]; 257 e->key = key; 258 e->u.state = state; 259 hashtab->entries[i] = e; 260} 261 262 263@ assumes that key is not already present! 264 265@c 266static void hyppat_insert(HashTab * hashtab, unsigned char *key, char *hyppat) 267{ 268 int i; 269 HashEntry *e; 270 271 i = (int) (hnj_string_hash(key) % HASH_SIZE); 272 for (e = hashtab->entries[i]; e; e = e->next) { 273 if (strcmp((char *) e->key, (char *) key) == 0) { 274 if (e->u.hyppat) { 275 if (hyppat 276 && strcmp((char *) e->u.hyppat, (char *) hyppat) != 0) { 277 print_err("Conflicting pattern ignored"); 278 error(); 279 } 280 hnj_free(e->u.hyppat); 281 } 282 e->u.hyppat = hyppat; 283 hnj_free(key); 284 return; 285 } 286 } 287 e = hnj_malloc(sizeof(HashEntry)); 288 e->next = hashtab->entries[i]; 289 e->key = key; 290 e->u.hyppat = hyppat; 291 hashtab->entries[i] = e; 292} 293 294 295@ return state if found, otherwise $-1$ 296 297@c 298static int state_lookup(HashTab * hashtab, const unsigned char *key) 299{ 300 int i; 301 HashEntry *e; 302 303 i = (int) (hnj_string_hash(key) % HASH_SIZE); 304 for (e = hashtab->entries[i]; e; e = e->next) { 305 if (!strcmp((const char *) key, (const char *) e->key)) { 306 return e->u.state; 307 } 308 } 309 return -1; 310} 311 312 313@ return state if found, otherwise $-1$ 314 315@c 316static char *hyppat_lookup(HashTab * hashtab, const unsigned char *chars, int l) 317{ 318 int i; 319 HashEntry *e; 320 unsigned char key[256]; /* should be ample */ 321 strncpy((char *) key, (const char *) chars, (size_t) l); 322 key[l] = 0; 323 i = (int) (hnj_string_hash(key) % HASH_SIZE); 324 for (e = hashtab->entries[i]; e; e = e->next) { 325 if (!strcmp((char *) key, (char *) e->key)) { 326 return e->u.hyppat; 327 } 328 } 329 return NULL; 330} 331 332 333@ Get the state number, allocating a new state if necessary. 334 335@c 336static int hnj_get_state(HyphenDict * dict, 337 const unsigned char *str, int *state_num) 338{ 339 *state_num = state_lookup(dict->state_num, str); 340 341 if (*state_num >= 0) 342 return *state_num; 343 344 state_insert(dict->state_num, hnj_strdup(str), dict->num_states); 345 /* predicate is true if |dict->num_states| is a power of two */ 346 if (!(dict->num_states & (dict->num_states - 1))) { 347 dict->states = hnj_realloc(dict->states, 348 (int) ((dict->num_states << 1) * 349 (int) sizeof(HyphenState))); 350 } 351 dict->states[dict->num_states].match = NULL; 352 dict->states[dict->num_states].fallback_state = -1; 353 dict->states[dict->num_states].num_trans = 0; 354 dict->states[dict->num_states].trans = NULL; 355 return dict->num_states++; 356} 357 358 359@ Add a transition from state1 to state2 through ch - assumes that the 360 transition does not already exist 361 362@c 363static void hnj_add_trans(HyphenDict * dict, int state1, int state2, int uni_ch) 364{ 365 int num_trans; 366 /* TH: this test was a bit too strict, it is quite normal for old 367 patterns to have chars in the range 0-31 or 127-159 (inclusive). 368 To ease the transition, let's only disallow NUL for now 369 (this is probably a requirement of the code anyway). 370 */ 371 if (uni_ch == 0) { 372 fprintf(stderr, "Character out of bounds: u%04x \n", uni_ch); 373 exit(1); 374 } 375 num_trans = dict->states[state1].num_trans; 376 if (num_trans == 0) { 377 dict->states[state1].trans = hnj_malloc(sizeof(HyphenTrans)); 378 } else { 379 /* TH: The old version did 380 } else if (!(num_trans & (num_trans - 1))) { 381 ... hnj_realloc(dict->states[state1].trans, 382 (int) ((num_trans << 1) * 383 sizeof(HyphenTrans))); 384 but that is incredibly nasty when adding patters one-at-a-time. 385 Controlled growth would be nicer than the current +1, but if 386 noone complains, this is good enough ;) 387 */ 388 dict->states[state1].trans = hnj_realloc(dict->states[state1].trans, 389 (int) ((num_trans + 1) * 390 sizeof(HyphenTrans))); 391 } 392 dict->states[state1].trans[num_trans].uni_ch = uni_ch; 393 dict->states[state1].trans[num_trans].new_state = state2; 394 dict->states[state1].num_trans++; 395} 396 397 398#ifdef VERBOSE 399 400static unsigned char *get_state_str(int state) 401{ 402 int i; 403 HashEntry *e; 404 405 for (i = 0; i < HASH_SIZE; i++) 406 for (e = global->entries[i]; e; e = e->next) 407 if (e->u.state == state) 408 return e->key; 409 return NULL; 410} 411#endif 412 413 414@ I've changed the semantics a bit here: |hnj_hyphen_load| used to 415 operate on a file, but now the argument is a string buffer. 416 417@c 418static const unsigned char *next_pattern(size_t * length, 419 const unsigned char **buf) 420{ 421 const unsigned char *here, *rover = *buf; 422 while (*rover && isspace(*rover)) 423 rover++; 424 here = rover; 425 while (*rover) { 426 if (isspace(*rover)) { 427 *length = (size_t) (rover - here); 428 *buf = rover; 429 return here; 430 } 431 rover++; 432 } 433 *length = (size_t) (rover - here); 434 *buf = rover; 435 return *length ? here : NULL; /* zero sensed */ 436} 437 438static void init_hash(HashTab ** h) 439{ 440 int i; 441 if (*h) 442 return; 443 *h = hnj_malloc(sizeof(HashTab)); 444 for (i = 0; i < HASH_SIZE; i++) 445 (*h)->entries[i] = NULL; 446} 447 448 449static void clear_state_hash(HashTab ** h) 450{ 451 int i; 452 if (*h == NULL) 453 return; 454 for (i = 0; i < HASH_SIZE; i++) { 455 HashEntry *e, *next; 456 for (e = (*h)->entries[i]; e; e = next) { 457 next = e->next; 458 hnj_free(e->key); 459 hnj_free(e); 460 } 461 } 462 hnj_free(*h); 463 *h = NULL; 464} 465 466 467static void clear_hyppat_hash(HashTab ** h) 468{ 469 int i; 470 if (*h == NULL) 471 return; 472 for (i = 0; i < HASH_SIZE; i++) { 473 HashEntry *e, *next; 474 for (e = (*h)->entries[i]; e; e = next) { 475 next = e->next; 476 hnj_free(e->key); 477 if (e->u.hyppat) 478 hnj_free(e->u.hyppat); 479 hnj_free(e); 480 } 481 } 482 hnj_free(*h); 483 *h = NULL; 484} 485 486 487static void init_dict(HyphenDict * dict) 488{ 489 dict->num_states = 1; 490 dict->pat_length = 0; 491 dict->states = hnj_malloc(sizeof(HyphenState)); 492 dict->states[0].match = NULL; 493 dict->states[0].fallback_state = -1; 494 dict->states[0].num_trans = 0; 495 dict->states[0].trans = NULL; 496 dict->patterns = NULL; 497 dict->merged = NULL; 498 dict->state_num = NULL; 499 init_hash(&dict->patterns); 500} 501 502 503static void clear_dict(HyphenDict * dict) 504{ 505 int state_num; 506 for (state_num = 0; state_num < dict->num_states; state_num++) { 507 HyphenState *hstate = &dict->states[state_num]; 508 if (hstate->match) 509 hnj_free(hstate->match); 510 if (hstate->trans) 511 hnj_free(hstate->trans); 512 } 513 hnj_free(dict->states); 514 clear_hyppat_hash(&dict->patterns); 515 clear_hyppat_hash(&dict->merged); 516 clear_state_hash(&dict->state_num); 517} 518 519 520 521HyphenDict *hnj_hyphen_new(void) 522{ 523 HyphenDict *dict = hnj_malloc(sizeof(HyphenDict)); 524 init_dict(dict); 525 return dict; 526} 527 528 529void hnj_hyphen_clear(HyphenDict * dict) 530{ 531 clear_dict(dict); 532 init_dict(dict); 533} 534 535 536void hnj_hyphen_free(HyphenDict * dict) 537{ 538 clear_dict(dict); 539 hnj_free(dict); 540} 541 542unsigned char *hnj_serialize(HyphenDict * dict) 543{ 544 HashIter *v; 545 unsigned char *word; 546 char *pattern; 547 unsigned char *buf = hnj_malloc(dict->pat_length); 548 unsigned char *cur = buf; 549 v = new_HashIter(dict->patterns); 550 while (eachHash(v, &word, &pattern)) { 551 int i = 0, e = 0; 552 while (word[e + i]) { 553 if (pattern[i] != '0') 554 *cur++ = (unsigned char) pattern[i]; 555 *cur++ = word[e + i++]; 556 while (is_utf8_follow(word[e + i])) 557 *cur++ = word[i + e++]; 558 } 559 if (pattern[i] != '0') 560 *cur++ = (unsigned char) pattern[i]; 561 *cur++ = ' '; 562 } 563 delete_HashIter(v); 564 *cur = 0; 565 return buf; 566} 567 568 569void hnj_free_serialize(unsigned char *c) 570{ 571 hnj_free(c); 572} 573 574 575@ hyphenation pattern: 576 577signed bytes 578 5790 indicates end (actually any negative number) 580 581: prio(1+),startpos,length,len1,[replace],len2,[replace] 582 583most basic example is: 584 585p n 0 0 0 586 587for a hyphenation point between characters 588 589 590@c 591void hnj_hyphen_load(HyphenDict * dict, const unsigned char *f) 592{ 593 int state_num, last_state; 594 int ch; 595 int found; 596 HashEntry *e; 597 HashIter *v; 598 unsigned char *word; 599 char *pattern; 600 size_t l = 0; 601 602 const unsigned char *format; 603 const unsigned char *begin = f; 604 unsigned char *pat; 605 char *org; 606 while ((format = next_pattern(&l, &f)) != NULL) { 607 int i, j, e1; 608 if (l>=255) { 609 help1("Individual patterns should not be longer than 254 bytes total."); 610 print_err("Pattern of enormous length ignored"); 611 error(); 612 continue; 613 } 614#if 0 615 printf("%s\n",format); 616 char* repl = strnchr(format, '/',l); 617 int replindex = 0; 618 int replcut = 0; 619 if (repl) { 620 int clen = l-(repl-format); 621 l = repl-format; 622 char * index = strnchr(repl + 1, ',',clen); 623 if (index) { 624 char * index2 = strnchr(index + 1, ',',clen-(index-repl)); 625 if (index2) { 626 replindex = (signed char) atoi(index + 1) - 1; 627 replcut = (signed char) atoi(index2 + 1); 628 } 629 } else { 630 hnj_strchomp(repl + 1); 631 replindex = 0; 632 replcut = strlen(buf); 633 } 634 repl = hnj_strdup(repl + 1); 635 } 636#endif 637 for (i = 0, j = 0, e1 = 0; (unsigned) i < l; i++) { 638 if (format[i] >= '0' && format[i] <= '9') 639 j++; 640 if (is_utf8_follow(format[i])) 641 e1++; 642 } 643 /* |l-e1| => number of {\it characters} not {\it bytes} */ 644 /* |l-j| => number of pattern bytes */ 645 /* |l-e1-j| => number of pattern characters */ 646 pat = (unsigned char *) malloc((1 + l - (size_t) j)); 647 org = (char *) malloc((size_t) (2 + l - (size_t) e1 - (size_t) j)); 648 /* remove hyphenation encoders (digits) from pat */ 649 org[0] = '0'; 650 for (i = 0, j = 0, e1 = 0; (unsigned) i < l; i++) { 651 unsigned char c = format[i]; 652 if (is_utf8_follow(c)) { 653 pat[j + e1++] = c; 654 } else if (c < '0' || c > '9') { 655 pat[e1 + j++] = c; 656 org[j] = '0'; 657 } else { 658 org[j] = (char) c; 659 } 660 } 661 pat[e1 + j] = 0; 662 org[j + 1] = 0; 663 hyppat_insert(dict->patterns, pat, org); 664 } 665 dict->pat_length += (int) ((f - begin) + 2); /* 2 for spurious spaces */ 666 init_hash(&dict->merged); 667 v = new_HashIter(dict->patterns); 668 while (nextHash(v, &word)) { 669 int wordsize = (int) strlen((char *) word); 670 int j1, l1; 671 for (l1 = 1; l1 <= wordsize; l1++) { 672 if (is_utf8_follow(word[l1])) 673 continue; /* Do not clip an utf8 sequence */ 674 for (j1 = 1; j1 <= l1; j1++) { 675 char *subpat_pat; 676 int i1 = l1 - j1; 677 if (is_utf8_follow(word[i1])) 678 continue; /* Do not start halfway an utf8 sequence */ 679 if ((subpat_pat = 680 hyppat_lookup(dict->patterns, word + i1, j1)) != NULL) { 681 char *newpat_pat; 682 if ((newpat_pat = 683 hyppat_lookup(dict->merged, word, l1)) == NULL) { 684 char *neworg; 685 unsigned char *newword = 686 (unsigned char *) malloc((size_t) (l1 + 1)); 687 int e1 = 0; 688 strncpy((char *) newword, (char *) word, (size_t) l1); 689 newword[l1] = 0; 690 for (i1 = 0; i1 < l1; i1++) 691 if (is_utf8_follow(newword[i1])) 692 e1++; 693 neworg = malloc((size_t) (l1 + 2 - e1)); 694 sprintf(neworg, "%0*d", l1 + 1 - e1, 0); /* fill with right amount of '0' */ 695 hyppat_insert(dict->merged, newword, 696 combine(neworg, subpat_pat)); 697 } else { 698 combine(newpat_pat, subpat_pat); 699 } 700 } 701 } 702 } 703 } 704 delete_HashIter(v); 705 706 init_hash(&dict->state_num); 707 state_insert(dict->state_num, hnj_strdup((const unsigned char *) ""), 0); 708 v = new_HashIter(dict->merged); 709 while (nextHashStealPattern(v, &word, &pattern)) { 710 static unsigned char mask[] = { 0x3F, 0x1F, 0xF, 0x7 }; 711 int j1 = (int) strlen((char *) word); 712#ifdef VERBOSE 713 printf("word %s pattern %s, j = %d\n", word, pattern, j1); 714#endif 715 state_num = hnj_get_state(dict, word, &found); 716 dict->states[state_num].match = pattern; 717 718 /* now, put in the prefix transitions */ 719 while (found < 0) { 720 j1--; 721 last_state = state_num; 722 ch = word[j1]; 723 if (ch >= 0x80) { 724 int m; 725 int i1 = 1; 726 while (is_utf8_follow(word[j1 - i1])) 727 i1++; 728 ch = word[j1 - i1] & mask[i1]; 729 m = j1 - i1; 730 while (i1--) { 731 ch = (ch << 6) + (0x3F & word[j1 - i1]); 732 } 733 j1 = m; 734 } 735 word[j1] = '\0'; 736 state_num = hnj_get_state(dict, word, &found); 737 hnj_add_trans(dict, state_num, last_state, ch); 738 } 739 } 740 delete_HashIter(v); 741 clear_hyppat_hash(&dict->merged); 742 743 /* put in the fallback states */ 744 { 745 int i, j = 0; 746 for (i = 0; i < HASH_SIZE; i++) { 747 for (e = dict->state_num->entries[i]; e; e = e->next) { 748 /* do not do state==0 otherwise things get confused */ 749 if (e->u.state) { 750 for (j = 1; 1; j++) { 751 state_num = state_lookup(dict->state_num, e->key + j); 752 if (state_num >= 0) 753 break; 754 } 755 dict->states[e->u.state].fallback_state = state_num; 756 } 757 } 758 } 759#ifdef VERBOSE 760 for (i = 0; i < HASH_SIZE; i++) { 761 for (e = dict->state_num->entries[i]; e; e = e->next) { 762 printf("%d string %s state %d, fallback=%d\n", i, e->key, 763 e->u.state, dict->states[e->u.state].fallback_state); 764 for (j = 0; j < dict->states[e->u.state].num_trans; j++) { 765 printf(" u%4x->%d\n", 766 (int) dict->states[e->u.state].trans[j].uni_ch, 767 dict->states[e->u.state].trans[j].new_state); 768 } 769 } 770 } 771#endif 772 } 773 clear_state_hash(&dict->state_num); 774} 775 776@ @c 777void hnj_hyphen_hyphenate(HyphenDict * dict, 778 halfword first1, 779 halfword last1, 780 int length, 781 halfword left, halfword right, lang_variables * lan) 782{ 783 int char_num; 784 halfword here; 785 int state = 0; 786 /* +2 for dots at each end, +1 for points /outside/ characters */ 787 int ext_word_len = length + 2; 788 int hyphen_len = ext_word_len + 1; 789 char *hyphens = hnj_malloc(hyphen_len + 1); 790 791 /* Add a '.' to beginning and end to facilitate matching */ 792 set_vlink(begin_point, first1); 793 set_vlink(end_point, get_vlink(last1)); 794 set_vlink(last1, end_point); 795 796 for (char_num = 0; char_num < hyphen_len; char_num++) { 797 hyphens[char_num] = '0'; 798 } 799 hyphens[hyphen_len] = 0; 800 801 /* now, run the finite state machine */ 802 for (char_num = 0, here = begin_point; here != get_vlink(end_point); 803 here = get_vlink(here)) { 804 805 int ch; 806 if (here == begin_point || here == end_point) 807 ch = '.'; 808 else 809 ch = get_lc_code(get_character(here)); 810 while (state != -1) { 811#if 0 812 printf("%*s%s%c",char_num-strlen(get_state_str(state)),"",get_state_str(state),(char)ch); 813#endif 814 HyphenState *hstate = &dict->states[state]; 815 int k; 816 for (k = 0; k < hstate->num_trans; k++) { 817 if (hstate->trans[k].uni_ch == ch) { 818 char *match; 819 state = hstate->trans[k].new_state; 820#if 0 821 printf(" state %d\n",state); 822#endif 823 match = dict->states[state].match; 824 if (match) { 825 /* +2 because: 826 1 string length is one bigger than offset 827 1 hyphenation starts before first character 828 */ 829 int offset = (int) (char_num + 2 - (int) strlen(match)); 830#if 0 831 printf ("%*s%s\n", offset,"", match); 832#endif 833 int m; 834 for (m = 0; match[m]; m++) { 835 if (hyphens[offset + m] < match[m]) 836 hyphens[offset + m] = match[m]; 837 } 838 } 839 goto try_next_letter; 840 } 841 } 842 state = hstate->fallback_state; 843#if 0 844 printf (" back to %d\n", state); 845#endif 846 } 847 /* nothing worked, let's go to the next character */ 848 state = 0; 849 try_next_letter:; 850 char_num++; 851 } 852 853 /* restore the correct pointers */ 854 set_vlink(last1, get_vlink(end_point)); 855 856 /* pattern is \.{\^.\^w\^o\^r\^d\^.\^} |word_len|=4, |ext_word_len|=6, |hyphens|=7 857 * check \.{ \^ \^ \^ } so drop first two and stop after |word_len-1| 858 */ 859 for (here = first1, char_num = 2; here != left; here = get_vlink(here)) 860 char_num++; 861 for (; here != right; here = get_vlink(here)) { 862 if (hyphens[char_num] & 1) 863 here = insert_syllable_discretionary(here, lan); 864 char_num++; 865 } 866 hnj_free(hyphens); 867} 868