1 #ifndef EVAS_TEXTBLOCK_HYPHENATION_H
2 #define EVAS_TEXTBLOCK_HYPHENATION_H
3 #ifdef HAVE_HYPHEN
4 #include <hyphen.h>
5 
6 typedef struct
7 {
8    const char *lang;
9    HyphenDict *dict;
10 } Dict_Hyphen;
11 
12 /* Hyphenation dictionaries */
13 static Dict_Hyphen _dicts_hyphen[64];
14 static size_t     _hyphens_num = 0;
15 static size_t     _hyphen_clients = 0;
16 
17 static void
_dicts_hyphen_init(Eo * eo_obj)18 _dicts_hyphen_init(Eo *eo_obj)
19 {
20    Efl_Canvas_Textblock_Data *o = efl_data_scope_get(eo_obj, MY_CLASS);
21 
22    if (!o->hyphenating)
23      {
24         _hyphen_clients++;
25         o->hyphenating = EINA_TRUE;
26      }
27 }
28 
29 static HyphenDict *
_dict_hyphen_load(const char * lang)30 _dict_hyphen_load(const char *lang)
31 {
32    Eina_Iterator *it = NULL;
33    Eina_File_Direct_Info *dir;
34    HyphenDict *dict = NULL;
35    const char *env_dir = getenv("EVAS_DICTS_HYPHEN_DIR");
36 
37    if (env_dir && strlen(env_dir) > 0)
38      it = eina_file_direct_ls(env_dir);
39 
40    if (!it) it = eina_file_direct_ls(EVAS_DICTS_HYPHEN_DIR);
41 
42    if (!it)
43      {
44         ERR("Couldn't list files in hyphens path: %s\n", EVAS_DICTS_HYPHEN_DIR);
45         return NULL;
46      }
47 
48    /* The following is based on how files are installed in arch linux:
49     * the files are in the pattern of "hyph_xx_XX.dic" (e.g. hyph_en_US.dic).
50     * We are actually trying a bit more in case these are installed in another
51     * name. We assume that they probably end in "xx_XX.dic" anyway. */
52    EINA_ITERATOR_FOREACH(it, dir)
53      {
54         const char *file = dir->path + dir->name_start;
55         char *prefix_off; /* 'hyph_' prefix (may be in some distros) */
56         char *dic_off; /* '.dic' file extension offset */
57 
58         /* Check a few assumptions and reject if aren't met. */
59         prefix_off = strstr(file, "hyph_");
60         dic_off = strrchr(file, '.');
61         if (!dic_off || ((size_t) (dic_off - file) + 4 != dir->name_length) ||
62             (dic_off - file < 5)  ||
63             ((dic_off - file > 0) && !prefix_off) ||
64             strncmp(dic_off, ".dic", 4) || strncmp((dic_off - 5), lang, strlen(lang)))
65           {
66              continue;
67           }
68 
69         dict = hnj_hyphen_load(dir->path);
70         if (!dict)
71           {
72              ERR("Couldn't load hyphen dictionary: %s\n", dic_off - 5);
73              continue;
74           }
75         _dicts_hyphen[_hyphens_num].lang = eina_strndup(dic_off - 5, 5);
76         _dicts_hyphen[_hyphens_num++].dict = dict;
77         break;
78      }
79 
80    if (it) eina_iterator_free(it);
81 
82    return dict;
83 }
84 
85 static void
_dicts_hyphen_free(void)86 _dicts_hyphen_free(void)
87 {
88    if (_hyphens_num == 0) return;
89 
90    for (size_t i = 0; i < _hyphens_num; i++)
91      {
92         hnj_hyphen_free(_dicts_hyphen[i].dict);
93      }
94 
95    _hyphens_num = 0;
96 }
97 
98 static inline void
_dicts_hyphen_detach(Eo * eo_obj)99 _dicts_hyphen_detach(Eo *eo_obj)
100 {
101    Efl_Canvas_Textblock_Data *o = efl_data_scope_get(eo_obj, MY_CLASS);
102 
103    if (!o->hyphenating) return;
104    o->hyphenating = EINA_FALSE;
105    _hyphen_clients--;
106    if (_hyphen_clients == 0) _dicts_hyphen_free();
107 }
108 
109 /* Returns the hyphen dictionary that matches the given language
110  * string. The string should be in the format xx_XX e.g. en_US */
111 static inline HyphenDict *
_hyphen_dict_get_from_lang(const char * lang)112 _hyphen_dict_get_from_lang(const char *lang)
113 {
114    if (!lang || !(*lang))
115      {
116         if (!lang) lang = evas_common_language_from_locale_full_get();
117         if (!lang || !(*lang)) return NULL;
118      }
119 
120    for (size_t i = 0; i < _hyphens_num; i++)
121      {
122         if (!strcmp(_dicts_hyphen[i].lang, lang))
123           {
124              return _dicts_hyphen[i].dict;
125           }
126      }
127 
128    return _dict_hyphen_load(lang);
129 }
130 
131 static char *
_layout_wrap_hyphens_get(const Eina_Unicode * text,const char * lang,int word_start,int word_len)132 _layout_wrap_hyphens_get(const Eina_Unicode *text, const char *lang,
133       int word_start, int word_len)
134 {
135    char *hyphens = NULL;
136    char **rep = NULL;
137    int *pos = NULL;
138    int *cut = NULL;
139    HyphenDict *dict;
140    char *converted_text = NULL;
141    size_t converted_text_offset = 0;
142    size_t converted_len = 0;
143 
144    dict = _hyphen_dict_get_from_lang(lang);
145    if (!dict)
146      {
147         ERR("Couldn't find matching dictionary and couldn't fallback to locale %s\n", lang);
148         return NULL;
149      }
150 
151    /* Convert UTF-32 encoded text to the other encoding
152     * which is described in hyphen dictionary. */
153    if (strcmp(dict->cset, "UTF-32"))
154      {
155         converted_text = eina_str_convert_len("UTF-32", dict->cset,
156                                               (char *)(text + word_start),
157                                               word_len * sizeof(Eina_Unicode),
158                                               &converted_len);
159 
160         if (!converted_text) goto hyphens_done;
161 
162         /* Skip BOM character (0xFFFE) from converted text */
163         if ((converted_len >= 2) &&
164             (converted_text[0] == 0xff) &&
165             (converted_text[1] == 0xfe))
166           converted_text_offset = 2;
167 
168         /* If there is only a BOM character, return NULL */
169         if (converted_len == converted_text_offset)
170           goto hyphens_done;
171      }
172 
173    if (converted_text)
174      {
175         hyphens = malloc(sizeof(char) * (converted_len + 5));
176         hnj_hyphen_hyphenate2(dict, converted_text + converted_text_offset,
177                               (int)(converted_len - converted_text_offset), hyphens, NULL, &rep, &pos, &cut);
178      }
179    else
180      {
181         hyphens = malloc(sizeof(char) * (word_len + 5));
182         hnj_hyphen_hyphenate2(dict, (char *)(text + word_start),
183                               word_len, hyphens, NULL, &rep, &pos, &cut);
184      }
185 
186 hyphens_done:
187    if (converted_text) free(converted_text);
188 
189    return hyphens;
190 }
191 
192 #endif //HAVE_HYPHEN
193 #endif //EVAS_TEXTBLOCK_HYPHENATION_H_
194