1% hyphen.w
2%
3% Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
4% licenses follows.
5%
6%
7% LibHnj - a library for high quality hyphenation and justification
8% Copyright (C) 1998 Raph Levien,
9% 	     (C) 2001 ALTLinux, Moscow (http://www.alt-linux.org),
10%           (C) 2001 Peter Novodvorsky (nidd@@cs.msu.su)
11%
12% This library is free software; you can redistribute it and/or
13% modify it under the terms of the GNU Library General Public
14% License as published by the Free Software Foundation; either
15% version 2 of the License, or (at your option) any later version.
16%
17% This library is distributed in the hope that it will be useful,
18% but WITHOUT ANY WARRANTY; without even the implied warranty of
19% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20% Library General Public License for more details.
21%
22% You should have received a copy of the GNU Library General Public
23% License along with this library; if not, write to the
24% Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25% Boston, MA  02111-1307  USA.
26%
27%
28%
29% The contents of this file are subject to the Mozilla Public License
30% Version 1.0 (the "MPL"); you may not use this file except in
31% compliance with the MPL.  You may obtain a copy of the MPL at
32% http://www.mozilla.org/MPL/
33%
34% Software distributed under the MPL is distributed on an "AS IS" basis,
35% WITHOUT WARRANTY OF ANY KIND, either express or implied. See the MPL
36% for the specific language governing rights and limitations under the
37% MPL.
38
39
40@ @c
41
42
43#include "ptexlib.h"
44
45#include <stdlib.h>             /* for NULL, malloc */
46#include <stdio.h>              /* for fprintf */
47#include <string.h>             /* for strdup */
48#include <stdlib.h>             /* for malloc used by substring inclusion */
49
50#define MAXPATHS 40960
51
52#ifdef UNX
53#  include <unistd.h>           /* for exit */
54#endif
55
56#include <kpathsea/c-ctype.h>
57
58#define noVERBOSE
59
60#include "lang/hnjalloc.h"
61
62@ TODO: should be moved to separate library
63
64@c
65static unsigned char *hnj_strdup(const unsigned char *s)
66{
67    unsigned char *new;
68    size_t l;
69
70    l = strlen((const char *) s);
71    new = hnj_malloc((int) l + 1);
72    memcpy(new, s, l);
73    new[l] = 0;
74    return new;
75}
76
77@* Type definitions.
78
79@ a little bit of a hash table implementation. This simply maps strings
80   to state numbers
81
82@c
83typedef struct _HashTab HashTab;
84typedef struct _HashEntry HashEntry;
85typedef struct _HashIter HashIter;
86typedef union _HashVal HashVal;
87
88/* A cheap, but effective, hack. */
89#define HASH_SIZE 31627
90
91struct _HashTab {
92    HashEntry *entries[HASH_SIZE];
93};
94
95union _HashVal {
96    int state;
97    char *hyppat;
98};
99
100struct _HashEntry {
101    HashEntry *next;
102    unsigned char *key;
103    HashVal u;
104};
105
106struct _HashIter {
107    HashEntry **e;
108    HashEntry *cur;
109    int ndx;
110};
111
112@ State machine
113
114@c
115typedef struct _HyphenState HyphenState;
116typedef struct _HyphenTrans HyphenTrans;
117#define MAX_CHARS 256
118#define MAX_NAME 20
119
120struct _HyphenDict {
121    int num_states;
122    int pat_length;
123    char cset[MAX_NAME];
124    HyphenState *states;
125    HashTab *patterns;
126    HashTab *merged;
127    HashTab *state_num;
128};
129
130struct _HyphenState {
131    char *match;
132    /*char *repl; */
133    /*signed char replindex; */
134    /*signed char replcut; */
135    int fallback_state;
136    int num_trans;
137    HyphenTrans *trans;
138};
139
140struct _HyphenTrans {
141    int uni_ch;
142    int new_state;
143};
144
145
146@ Combine two right-aligned number patterns, 04000 + 020 becomes 04020
147
148@c
149static char *combine(char *expr, const char *subexpr)
150{
151    size_t l1 = strlen(expr);
152    size_t l2 = strlen(subexpr);
153    size_t off = l1 - l2;
154    unsigned j;
155    /* this works also for utf8 sequences because the substring is identical
156     to the last substring-length bytes of expr except for the (single byte)
157     hyphenation encoders
158     */
159    for (j = 0; j < l2; j++) {
160        if (expr[off + j] < subexpr[j])
161            expr[off + j] = subexpr[j];
162    }
163    return expr;
164}
165
166
167@ ORIGINAL CODE
168@c
169static HashIter *new_HashIter(HashTab * h)
170{
171    HashIter *i = hnj_malloc(sizeof(HashIter));
172    i->e = h->entries;
173    i->cur = NULL;
174    i->ndx = -1;
175    return i;
176}
177
178
179static int nextHashStealPattern(HashIter * i, unsigned char **word, char **pattern)
180{
181    while (i->cur == NULL) {
182        if (i->ndx >= HASH_SIZE - 1)
183            return 0;
184        i->cur = i->e[++i->ndx];
185    }
186    *word = i->cur->key;
187    *pattern = i->cur->u.hyppat;
188    i->cur->u.hyppat = NULL;
189    i->cur = i->cur->next;
190    return 1;
191}
192
193
194static int nextHash(HashIter * i, unsigned char **word)
195{
196    while (i->cur == NULL) {
197        if (i->ndx >= HASH_SIZE - 1)
198            return 0;
199        i->cur = i->e[++i->ndx];
200    }
201    *word = i->cur->key;
202    i->cur = i->cur->next;
203    return 1;
204}
205
206
207static int eachHash(HashIter * i, unsigned char **word, char **pattern)
208{
209    while (i->cur == NULL) {
210        if (i->ndx >= HASH_SIZE - 1)
211            return 0;
212        i->cur = i->e[++i->ndx];
213    }
214    *word = i->cur->key;
215    *pattern = i->cur->u.hyppat;
216    i->cur = i->cur->next;
217    return 1;
218}
219
220
221static void delete_HashIter(HashIter * i)
222{
223    hnj_free(i);
224}
225
226
227@ a |char*| hash function from ASU - adapted from Gtk+
228
229@c
230static unsigned int hnj_string_hash(const unsigned char *s)
231{
232    const unsigned char *p;
233    unsigned int h = 0, g;
234
235    for (p = s; *p != '\0'; p += 1) {
236        h = (h << 4) + *p;
237        if ((g = (h & 0xf0000000))) {
238            h = h ^ (g >> 24);
239            h = h ^ g;
240        }
241    }
242    return h /* \% M */ ;
243}
244
245
246@ assumes that key is not already present!
247
248@c
249static void state_insert(HashTab * hashtab, unsigned char *key, int state)
250{
251    int i;
252    HashEntry *e;
253
254    i = (int) (hnj_string_hash(key) % HASH_SIZE);
255    e = hnj_malloc(sizeof(HashEntry));
256    e->next = hashtab->entries[i];
257    e->key = key;
258    e->u.state = state;
259    hashtab->entries[i] = e;
260}
261
262
263@ assumes that key is not already present!
264
265@c
266static void hyppat_insert(HashTab * hashtab, unsigned char *key, char *hyppat)
267{
268    int i;
269    HashEntry *e;
270
271    i = (int) (hnj_string_hash(key) % HASH_SIZE);
272    for (e = hashtab->entries[i]; e; e = e->next) {
273        if (strcmp((char *) e->key, (char *) key) == 0) {
274            if (e->u.hyppat) {
275                if (hyppat
276                    && strcmp((char *) e->u.hyppat, (char *) hyppat) != 0) {
277                    print_err("Conflicting pattern ignored");
278                    error();
279                }
280                hnj_free(e->u.hyppat);
281            }
282            e->u.hyppat = hyppat;
283            hnj_free(key);
284            return;
285        }
286    }
287    e = hnj_malloc(sizeof(HashEntry));
288    e->next = hashtab->entries[i];
289    e->key = key;
290    e->u.hyppat = hyppat;
291    hashtab->entries[i] = e;
292}
293
294
295@ return state if found, otherwise $-1$
296
297@c
298static int state_lookup(HashTab * hashtab, const unsigned char *key)
299{
300    int i;
301    HashEntry *e;
302
303    i = (int) (hnj_string_hash(key) % HASH_SIZE);
304    for (e = hashtab->entries[i]; e; e = e->next) {
305        if (!strcmp((const char *) key, (const char *) e->key)) {
306            return e->u.state;
307        }
308    }
309    return -1;
310}
311
312
313@ return state if found, otherwise $-1$
314
315@c
316static char *hyppat_lookup(HashTab * hashtab, const unsigned char *chars, int l)
317{
318    int i;
319    HashEntry *e;
320    unsigned char key[256];     /* should be ample */
321    strncpy((char *) key, (const char *) chars, (size_t) l);
322    key[l] = 0;
323    i = (int) (hnj_string_hash(key) % HASH_SIZE);
324    for (e = hashtab->entries[i]; e; e = e->next) {
325        if (!strcmp((char *) key, (char *) e->key)) {
326            return e->u.hyppat;
327        }
328    }
329    return NULL;
330}
331
332
333@ Get the state number, allocating a new state if necessary.
334
335@c
336static int hnj_get_state(HyphenDict * dict,
337                         const unsigned char *str, int *state_num)
338{
339    *state_num = state_lookup(dict->state_num, str);
340
341    if (*state_num >= 0)
342        return *state_num;
343
344    state_insert(dict->state_num, hnj_strdup(str), dict->num_states);
345    /* predicate is true if |dict->num_states| is a power of two */
346    if (!(dict->num_states & (dict->num_states - 1))) {
347        dict->states = hnj_realloc(dict->states,
348                                   (int) ((dict->num_states << 1) *
349                                          (int) sizeof(HyphenState)));
350    }
351    dict->states[dict->num_states].match = NULL;
352    dict->states[dict->num_states].fallback_state = -1;
353    dict->states[dict->num_states].num_trans = 0;
354    dict->states[dict->num_states].trans = NULL;
355    return dict->num_states++;
356}
357
358
359@ Add a transition from state1 to state2 through ch - assumes that the
360   transition does not already exist
361
362@c
363static void hnj_add_trans(HyphenDict * dict, int state1, int state2, int uni_ch)
364{
365    int num_trans;
366    /* TH: this test was a bit too strict, it is quite normal for old
367       patterns to have chars in the range 0-31 or 127-159 (inclusive).
368       To ease the transition, let's only disallow NUL for now
369       (this is probably a requirement of the code anyway).
370     */
371    if (uni_ch == 0) {
372        fprintf(stderr, "Character out of bounds: u%04x \n", uni_ch);
373        exit(1);
374    }
375    num_trans = dict->states[state1].num_trans;
376    if (num_trans == 0) {
377        dict->states[state1].trans = hnj_malloc(sizeof(HyphenTrans));
378    } else {
379        /* TH: The old version did
380           } else if (!(num_trans & (num_trans - 1))) {
381             ... hnj_realloc(dict->states[state1].trans,
382                                                 (int) ((num_trans << 1) *
383                                                        sizeof(HyphenTrans)));
384           but that is incredibly nasty when adding patters one-at-a-time.
385           Controlled growth would be nicer than the current +1, but if
386           noone complains, this is good enough ;)
387         */
388        dict->states[state1].trans = hnj_realloc(dict->states[state1].trans,
389                                                 (int) ((num_trans + 1) *
390                                                        sizeof(HyphenTrans)));
391    }
392    dict->states[state1].trans[num_trans].uni_ch = uni_ch;
393    dict->states[state1].trans[num_trans].new_state = state2;
394    dict->states[state1].num_trans++;
395}
396
397
398#ifdef VERBOSE
399
400static unsigned char *get_state_str(int state)
401{
402    int i;
403    HashEntry *e;
404
405    for (i = 0; i < HASH_SIZE; i++)
406        for (e = global->entries[i]; e; e = e->next)
407            if (e->u.state == state)
408                return e->key;
409    return NULL;
410}
411#endif
412
413
414@ I've changed the semantics a bit here: |hnj_hyphen_load| used to
415   operate on a file, but now the argument is a string buffer.
416
417@c
418static const unsigned char *next_pattern(size_t * length,
419                                         const unsigned char **buf)
420{
421    const unsigned char *here, *rover = *buf;
422    while (*rover && isspace(*rover))
423        rover++;
424    here = rover;
425    while (*rover) {
426        if (isspace(*rover)) {
427            *length = (size_t) (rover - here);
428            *buf = rover;
429            return here;
430        }
431        rover++;
432    }
433    *length = (size_t) (rover - here);
434    *buf = rover;
435    return *length ? here : NULL;       /* zero sensed */
436}
437
438static void init_hash(HashTab ** h)
439{
440    int i;
441    if (*h)
442        return;
443    *h = hnj_malloc(sizeof(HashTab));
444    for (i = 0; i < HASH_SIZE; i++)
445        (*h)->entries[i] = NULL;
446}
447
448
449static void clear_state_hash(HashTab ** h)
450{
451    int i;
452    if (*h == NULL)
453        return;
454    for (i = 0; i < HASH_SIZE; i++) {
455        HashEntry *e, *next;
456        for (e = (*h)->entries[i]; e; e = next) {
457            next = e->next;
458            hnj_free(e->key);
459            hnj_free(e);
460        }
461    }
462    hnj_free(*h);
463    *h = NULL;
464}
465
466
467static void clear_hyppat_hash(HashTab ** h)
468{
469    int i;
470    if (*h == NULL)
471        return;
472    for (i = 0; i < HASH_SIZE; i++) {
473        HashEntry *e, *next;
474        for (e = (*h)->entries[i]; e; e = next) {
475            next = e->next;
476            hnj_free(e->key);
477            if (e->u.hyppat)
478                hnj_free(e->u.hyppat);
479            hnj_free(e);
480        }
481    }
482    hnj_free(*h);
483    *h = NULL;
484}
485
486
487static void init_dict(HyphenDict * dict)
488{
489    dict->num_states = 1;
490    dict->pat_length = 0;
491    dict->states = hnj_malloc(sizeof(HyphenState));
492    dict->states[0].match = NULL;
493    dict->states[0].fallback_state = -1;
494    dict->states[0].num_trans = 0;
495    dict->states[0].trans = NULL;
496    dict->patterns = NULL;
497    dict->merged = NULL;
498    dict->state_num = NULL;
499    init_hash(&dict->patterns);
500}
501
502
503static void clear_dict(HyphenDict * dict)
504{
505    int state_num;
506    for (state_num = 0; state_num < dict->num_states; state_num++) {
507        HyphenState *hstate = &dict->states[state_num];
508        if (hstate->match)
509            hnj_free(hstate->match);
510        if (hstate->trans)
511            hnj_free(hstate->trans);
512    }
513    hnj_free(dict->states);
514    clear_hyppat_hash(&dict->patterns);
515    clear_hyppat_hash(&dict->merged);
516    clear_state_hash(&dict->state_num);
517}
518
519
520
521HyphenDict *hnj_hyphen_new(void)
522{
523    HyphenDict *dict = hnj_malloc(sizeof(HyphenDict));
524    init_dict(dict);
525    return dict;
526}
527
528
529void hnj_hyphen_clear(HyphenDict * dict)
530{
531    clear_dict(dict);
532    init_dict(dict);
533}
534
535
536void hnj_hyphen_free(HyphenDict * dict)
537{
538    clear_dict(dict);
539    hnj_free(dict);
540}
541
542unsigned char *hnj_serialize(HyphenDict * dict)
543{
544    HashIter *v;
545    unsigned char *word;
546    char *pattern;
547    unsigned char *buf = hnj_malloc(dict->pat_length);
548    unsigned char *cur = buf;
549    v = new_HashIter(dict->patterns);
550    while (eachHash(v, &word, &pattern)) {
551        int i = 0, e = 0;
552        while (word[e + i]) {
553            if (pattern[i] != '0')
554                *cur++ = (unsigned char) pattern[i];
555            *cur++ = word[e + i++];
556            while (is_utf8_follow(word[e + i]))
557                *cur++ = word[i + e++];
558        }
559        if (pattern[i] != '0')
560            *cur++ = (unsigned char) pattern[i];
561        *cur++ = ' ';
562    }
563    delete_HashIter(v);
564    *cur = 0;
565    return buf;
566}
567
568
569void hnj_free_serialize(unsigned char *c)
570{
571    hnj_free(c);
572}
573
574
575@ hyphenation pattern:
576
577signed bytes
578
5790 indicates end (actually any negative number)
580
581: prio(1+),startpos,length,len1,[replace],len2,[replace]
582
583most basic example is:
584
585p n 0 0 0
586
587for a hyphenation point between characters
588
589
590@c
591void hnj_hyphen_load(HyphenDict * dict, const unsigned char *f)
592{
593    int state_num, last_state;
594    int ch;
595    int found;
596    HashEntry *e;
597    HashIter *v;
598    unsigned char *word;
599    char *pattern;
600    size_t l = 0;
601
602    const unsigned char *format;
603    const unsigned char *begin = f;
604    unsigned char *pat;
605    char *org;
606    while ((format = next_pattern(&l, &f)) != NULL) {
607        int i, j, e1;
608        if (l>=255) {
609           help1("Individual patterns should not be longer than 254 bytes total.");
610           print_err("Pattern of enormous length ignored");
611           error();
612           continue;
613        }
614#if 0
615           printf("%s\n",format);
616           char* repl = strnchr(format, '/',l);
617           int replindex = 0;
618           int replcut = 0;
619           if (repl) {
620           int clen = l-(repl-format);
621           l = repl-format;
622           char * index = strnchr(repl + 1, ',',clen);
623           if (index) {
624           char * index2 = strnchr(index + 1, ',',clen-(index-repl));
625           if (index2) {
626           replindex = (signed char) atoi(index + 1) - 1;
627           replcut = (signed char) atoi(index2 + 1);
628           }
629           } else {
630           hnj_strchomp(repl + 1);
631           replindex = 0;
632           replcut = strlen(buf);
633           }
634           repl = hnj_strdup(repl + 1);
635           }
636#endif
637        for (i = 0, j = 0, e1 = 0; (unsigned) i < l; i++) {
638            if (format[i] >= '0' && format[i] <= '9')
639                j++;
640            if (is_utf8_follow(format[i]))
641                e1++;
642        }
643        /* |l-e1|   => number of {\it characters} not {\it bytes} */
644        /* |l-j|   => number of pattern bytes */
645        /* |l-e1-j| => number of pattern characters */
646        pat = (unsigned char *) malloc((1 + l - (size_t) j));
647        org = (char *) malloc((size_t) (2 + l - (size_t) e1 - (size_t) j));
648        /* remove hyphenation encoders (digits) from pat */
649        org[0] = '0';
650        for (i = 0, j = 0, e1 = 0; (unsigned) i < l; i++) {
651            unsigned char c = format[i];
652            if (is_utf8_follow(c)) {
653                pat[j + e1++] = c;
654            } else if (c < '0' || c > '9') {
655                pat[e1 + j++] = c;
656                org[j] = '0';
657            } else {
658                org[j] = (char) c;
659            }
660        }
661        pat[e1 + j] = 0;
662        org[j + 1] = 0;
663        hyppat_insert(dict->patterns, pat, org);
664    }
665    dict->pat_length += (int) ((f - begin) + 2);        /* 2 for spurious spaces */
666    init_hash(&dict->merged);
667    v = new_HashIter(dict->patterns);
668    while (nextHash(v, &word)) {
669        int wordsize = (int) strlen((char *) word);
670        int j1, l1;
671        for (l1 = 1; l1 <= wordsize; l1++) {
672            if (is_utf8_follow(word[l1]))
673                continue;       /* Do not clip an utf8 sequence */
674            for (j1 = 1; j1 <= l1; j1++) {
675                char *subpat_pat;
676                int i1 = l1 - j1;
677                if (is_utf8_follow(word[i1]))
678                    continue;   /* Do not start halfway an utf8 sequence */
679                if ((subpat_pat =
680                     hyppat_lookup(dict->patterns, word + i1, j1)) != NULL) {
681                    char *newpat_pat;
682                    if ((newpat_pat =
683                         hyppat_lookup(dict->merged, word, l1)) == NULL) {
684                        char *neworg;
685                        unsigned char *newword =
686                            (unsigned char *) malloc((size_t) (l1 + 1));
687                        int e1 = 0;
688                        strncpy((char *) newword, (char *) word, (size_t) l1);
689                        newword[l1] = 0;
690                        for (i1 = 0; i1 < l1; i1++)
691                            if (is_utf8_follow(newword[i1]))
692                                e1++;
693                        neworg = malloc((size_t) (l1 + 2 - e1));
694                        sprintf(neworg, "%0*d", l1 + 1 - e1, 0);  /* fill with right amount of '0' */
695                        hyppat_insert(dict->merged, newword,
696                                      combine(neworg, subpat_pat));
697                    } else {
698                        combine(newpat_pat, subpat_pat);
699                    }
700                }
701            }
702        }
703    }
704    delete_HashIter(v);
705
706    init_hash(&dict->state_num);
707    state_insert(dict->state_num, hnj_strdup((const unsigned char *) ""), 0);
708    v = new_HashIter(dict->merged);
709    while (nextHashStealPattern(v, &word, &pattern)) {
710        static unsigned char mask[] = { 0x3F, 0x1F, 0xF, 0x7 };
711        int j1 = (int) strlen((char *) word);
712#ifdef VERBOSE
713        printf("word %s pattern %s, j = %d\n", word, pattern, j1);
714#endif
715        state_num = hnj_get_state(dict, word, &found);
716        dict->states[state_num].match = pattern;
717
718        /* now, put in the prefix transitions */
719        while (found < 0) {
720            j1--;
721            last_state = state_num;
722            ch = word[j1];
723            if (ch >= 0x80) {
724                int m;
725                int i1 = 1;
726                while (is_utf8_follow(word[j1 - i1]))
727                    i1++;
728                ch = word[j1 - i1] & mask[i1];
729                m = j1 - i1;
730                while (i1--) {
731                    ch = (ch << 6) + (0x3F & word[j1 - i1]);
732                }
733                j1 = m;
734            }
735            word[j1] = '\0';
736            state_num = hnj_get_state(dict, word, &found);
737            hnj_add_trans(dict, state_num, last_state, ch);
738        }
739    }
740    delete_HashIter(v);
741    clear_hyppat_hash(&dict->merged);
742
743    /* put in the fallback states */
744    {
745    int i, j = 0;
746    for (i = 0; i < HASH_SIZE; i++) {
747        for (e = dict->state_num->entries[i]; e; e = e->next) {
748            /* do not do state==0 otherwise things get confused */
749            if (e->u.state) {
750                for (j = 1; 1; j++) {
751                    state_num = state_lookup(dict->state_num, e->key + j);
752                    if (state_num >= 0)
753                        break;
754                }
755                dict->states[e->u.state].fallback_state = state_num;
756            }
757        }
758    }
759#ifdef VERBOSE
760    for (i = 0; i < HASH_SIZE; i++) {
761        for (e = dict->state_num->entries[i]; e; e = e->next) {
762            printf("%d string %s state %d, fallback=%d\n", i, e->key,
763                   e->u.state, dict->states[e->u.state].fallback_state);
764            for (j = 0; j < dict->states[e->u.state].num_trans; j++) {
765                printf(" u%4x->%d\n",
766                       (int) dict->states[e->u.state].trans[j].uni_ch,
767                       dict->states[e->u.state].trans[j].new_state);
768            }
769        }
770    }
771#endif
772    }
773    clear_state_hash(&dict->state_num);
774}
775
776@ @c
777void hnj_hyphen_hyphenate(HyphenDict * dict,
778                          halfword first1,
779                          halfword last1,
780                          int length,
781                          halfword left, halfword right, lang_variables * lan)
782{
783    int char_num;
784    halfword here;
785    int state = 0;
786    /* +2 for dots at each end, +1 for points /outside/ characters */
787    int ext_word_len = length + 2;
788    int hyphen_len = ext_word_len + 1;
789    char *hyphens = hnj_malloc(hyphen_len + 1);
790
791    /* Add a '.' to beginning and end to facilitate matching */
792    set_vlink(begin_point, first1);
793    set_vlink(end_point, get_vlink(last1));
794    set_vlink(last1, end_point);
795
796    for (char_num = 0; char_num < hyphen_len; char_num++) {
797        hyphens[char_num] = '0';
798    }
799    hyphens[hyphen_len] = 0;
800
801    /* now, run the finite state machine */
802    for (char_num = 0, here = begin_point; here != get_vlink(end_point);
803         here = get_vlink(here)) {
804
805        int ch;
806        if (here == begin_point || here == end_point)
807            ch = '.';
808        else
809            ch = get_lc_code(get_character(here));
810        while (state != -1) {
811#if 0
812            printf("%*s%s%c",char_num-strlen(get_state_str(state)),"",get_state_str(state),(char)ch);
813#endif
814            HyphenState *hstate = &dict->states[state];
815            int k;
816            for (k = 0; k < hstate->num_trans; k++) {
817                if (hstate->trans[k].uni_ch == ch) {
818                    char *match;
819                    state = hstate->trans[k].new_state;
820#if 0
821                    printf(" state %d\n",state);
822#endif
823                    match = dict->states[state].match;
824                    if (match) {
825                        /* +2 because:
826                         1 string length is one bigger than offset
827                         1 hyphenation starts before first character
828                         */
829                        int offset = (int) (char_num + 2 - (int) strlen(match));
830#if 0
831                        printf ("%*s%s\n", offset,"", match);
832#endif
833                        int m;
834                        for (m = 0; match[m]; m++) {
835                            if (hyphens[offset + m] < match[m])
836                                hyphens[offset + m] = match[m];
837                        }
838                    }
839                    goto try_next_letter;
840                }
841            }
842            state = hstate->fallback_state;
843#if 0
844            printf (" back to %d\n", state);
845#endif
846        }
847        /* nothing worked, let's go to the next character */
848        state = 0;
849      try_next_letter:;
850        char_num++;
851    }
852
853    /* restore the correct pointers */
854    set_vlink(last1, get_vlink(end_point));
855
856    /* pattern is \.{\^.\^w\^o\^r\^d\^.\^}   |word_len|=4, |ext_word_len|=6, |hyphens|=7
857     * check      \.{    \^ \^ \^    }   so drop first two and stop after |word_len-1|
858     */
859    for (here = first1, char_num = 2; here != left; here = get_vlink(here))
860        char_num++;
861    for (; here != right; here = get_vlink(here)) {
862        if (hyphens[char_num] & 1)
863            here = insert_syllable_discretionary(here, lan);
864        char_num++;
865    }
866    hnj_free(hyphens);
867}
868