1 #ifndef EVAS_TEXTBLOCK_HYPHENATION_H
2 #define EVAS_TEXTBLOCK_HYPHENATION_H
3 #ifdef HAVE_HYPHEN
4 #include <hyphen.h>
5
6 typedef struct
7 {
8 const char *lang;
9 HyphenDict *dict;
10 } Dict_Hyphen;
11
12 /* Hyphenation dictionaries */
13 static Dict_Hyphen _dicts_hyphen[64];
14 static size_t _hyphens_num = 0;
15 static size_t _hyphen_clients = 0;
16
17 static void
_dicts_hyphen_init(Eo * eo_obj)18 _dicts_hyphen_init(Eo *eo_obj)
19 {
20 Efl_Canvas_Textblock_Data *o = efl_data_scope_get(eo_obj, MY_CLASS);
21
22 if (!o->hyphenating)
23 {
24 _hyphen_clients++;
25 o->hyphenating = EINA_TRUE;
26 }
27 }
28
29 static HyphenDict *
_dict_hyphen_load(const char * lang)30 _dict_hyphen_load(const char *lang)
31 {
32 Eina_Iterator *it = NULL;
33 Eina_File_Direct_Info *dir;
34 HyphenDict *dict = NULL;
35 const char *env_dir = getenv("EVAS_DICTS_HYPHEN_DIR");
36
37 if (env_dir && strlen(env_dir) > 0)
38 it = eina_file_direct_ls(env_dir);
39
40 if (!it) it = eina_file_direct_ls(EVAS_DICTS_HYPHEN_DIR);
41
42 if (!it)
43 {
44 ERR("Couldn't list files in hyphens path: %s\n", EVAS_DICTS_HYPHEN_DIR);
45 return NULL;
46 }
47
48 /* The following is based on how files are installed in arch linux:
49 * the files are in the pattern of "hyph_xx_XX.dic" (e.g. hyph_en_US.dic).
50 * We are actually trying a bit more in case these are installed in another
51 * name. We assume that they probably end in "xx_XX.dic" anyway. */
52 EINA_ITERATOR_FOREACH(it, dir)
53 {
54 const char *file = dir->path + dir->name_start;
55 char *prefix_off; /* 'hyph_' prefix (may be in some distros) */
56 char *dic_off; /* '.dic' file extension offset */
57
58 /* Check a few assumptions and reject if aren't met. */
59 prefix_off = strstr(file, "hyph_");
60 dic_off = strrchr(file, '.');
61 if (!dic_off || ((size_t) (dic_off - file) + 4 != dir->name_length) ||
62 (dic_off - file < 5) ||
63 ((dic_off - file > 0) && !prefix_off) ||
64 strncmp(dic_off, ".dic", 4) || strncmp((dic_off - 5), lang, strlen(lang)))
65 {
66 continue;
67 }
68
69 dict = hnj_hyphen_load(dir->path);
70 if (!dict)
71 {
72 ERR("Couldn't load hyphen dictionary: %s\n", dic_off - 5);
73 continue;
74 }
75 _dicts_hyphen[_hyphens_num].lang = eina_strndup(dic_off - 5, 5);
76 _dicts_hyphen[_hyphens_num++].dict = dict;
77 break;
78 }
79
80 if (it) eina_iterator_free(it);
81
82 return dict;
83 }
84
85 static void
_dicts_hyphen_free(void)86 _dicts_hyphen_free(void)
87 {
88 if (_hyphens_num == 0) return;
89
90 for (size_t i = 0; i < _hyphens_num; i++)
91 {
92 hnj_hyphen_free(_dicts_hyphen[i].dict);
93 }
94
95 _hyphens_num = 0;
96 }
97
98 static inline void
_dicts_hyphen_detach(Eo * eo_obj)99 _dicts_hyphen_detach(Eo *eo_obj)
100 {
101 Efl_Canvas_Textblock_Data *o = efl_data_scope_get(eo_obj, MY_CLASS);
102
103 if (!o->hyphenating) return;
104 o->hyphenating = EINA_FALSE;
105 _hyphen_clients--;
106 if (_hyphen_clients == 0) _dicts_hyphen_free();
107 }
108
109 /* Returns the hyphen dictionary that matches the given language
110 * string. The string should be in the format xx_XX e.g. en_US */
111 static inline HyphenDict *
_hyphen_dict_get_from_lang(const char * lang)112 _hyphen_dict_get_from_lang(const char *lang)
113 {
114 if (!lang || !(*lang))
115 {
116 if (!lang) lang = evas_common_language_from_locale_full_get();
117 if (!lang || !(*lang)) return NULL;
118 }
119
120 for (size_t i = 0; i < _hyphens_num; i++)
121 {
122 if (!strcmp(_dicts_hyphen[i].lang, lang))
123 {
124 return _dicts_hyphen[i].dict;
125 }
126 }
127
128 return _dict_hyphen_load(lang);
129 }
130
131 static char *
_layout_wrap_hyphens_get(const Eina_Unicode * text,const char * lang,int word_start,int word_len)132 _layout_wrap_hyphens_get(const Eina_Unicode *text, const char *lang,
133 int word_start, int word_len)
134 {
135 char *hyphens = NULL;
136 char **rep = NULL;
137 int *pos = NULL;
138 int *cut = NULL;
139 HyphenDict *dict;
140 char *converted_text = NULL;
141 size_t converted_text_offset = 0;
142 size_t converted_len = 0;
143
144 dict = _hyphen_dict_get_from_lang(lang);
145 if (!dict)
146 {
147 ERR("Couldn't find matching dictionary and couldn't fallback to locale %s\n", lang);
148 return NULL;
149 }
150
151 /* Convert UTF-32 encoded text to the other encoding
152 * which is described in hyphen dictionary. */
153 if (strcmp(dict->cset, "UTF-32"))
154 {
155 converted_text = eina_str_convert_len("UTF-32", dict->cset,
156 (char *)(text + word_start),
157 word_len * sizeof(Eina_Unicode),
158 &converted_len);
159
160 if (!converted_text) goto hyphens_done;
161
162 /* Skip BOM character (0xFFFE) from converted text */
163 if ((converted_len >= 2) &&
164 (converted_text[0] == 0xff) &&
165 (converted_text[1] == 0xfe))
166 converted_text_offset = 2;
167
168 /* If there is only a BOM character, return NULL */
169 if (converted_len == converted_text_offset)
170 goto hyphens_done;
171 }
172
173 if (converted_text)
174 {
175 hyphens = malloc(sizeof(char) * (converted_len + 5));
176 hnj_hyphen_hyphenate2(dict, converted_text + converted_text_offset,
177 (int)(converted_len - converted_text_offset), hyphens, NULL, &rep, &pos, &cut);
178 }
179 else
180 {
181 hyphens = malloc(sizeof(char) * (word_len + 5));
182 hnj_hyphen_hyphenate2(dict, (char *)(text + word_start),
183 word_len, hyphens, NULL, &rep, &pos, &cut);
184 }
185
186 hyphens_done:
187 if (converted_text) free(converted_text);
188
189 return hyphens;
190 }
191
192 #endif //HAVE_HYPHEN
193 #endif //EVAS_TEXTBLOCK_HYPHENATION_H_
194