1 /*
2 LibRCC - interface to enca library
3
4 Copyright (C) 2005-2008 Suren A. Chilingaryan <csa@dside.dyndns.org>
5
6 This library is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License version 2.1 or later
8 as published by the Free Software Foundation.
9
10 This library is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
13 for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <stdio.h>
21 #include <string.h>
22
23 #include "internal.h"
24 #include "plugin.h"
25 #include "engine.h"
26 #include "rccconfig.h"
27
28 #include "rccenca.h"
29 #ifdef RCC_ENCA_SUPPORT
30 # ifdef RCC_ENCA_DYNAMIC
31 # include "fake_enca.h"
32 # else
33 # include <enca.h>
34 # endif /* RCC_ENCA_DYNAMIC */
35 #endif /* RCC_ENCA_SUPPORT */
36
37 #ifdef RCC_ENCA_DYNAMIC
38 static rcc_library_handle enca_handle = NULL;
39 #endif /* RCC_ENCA_DYNAMIC */
40 static rcc_engine *enca_engines = NULL;
41
42
43 /* CORK, KEYBCS2 is missing */
44 rcc_enca_corrections rcc_enca_missing_corrections[] = {
45 { "be", "KOI8-UNI", "ISO-IR-111" },
46 { NULL, "macce", "MACCENTRALEUROPE" },
47 { "zh", "HZ", "HZ" },
48 { "sk", "KOI-8_CS_2", "CSKOI8R" },
49 { NULL, NULL, NULL }
50 };
51
52 rcc_enca_corrections rcc_enca_error_corrections[] = {
53 { NULL, "ECMA-cyrillic", "ISO-IR-111" },
54 { NULL, NULL, NULL }
55 };
56
57
rccEncaGetCorrection(const char * lang,const char * charset)58 static const char *rccEncaGetCorrection(const char *lang, const char *charset) {
59 int i;
60 for (i=0;rcc_enca_error_corrections[i].enca_charset;i++) {
61 if (((!rcc_enca_error_corrections[i].lang)||((lang)&&(!strcmp(lang, rcc_enca_error_corrections[i].lang))))&&(!strcmp(charset, rcc_enca_error_corrections[i].enca_charset)))
62 return rcc_enca_error_corrections[i].iconv_charset;
63 }
64 return charset;
65 }
66
rccEncaGetMissing(const char * lang,const char * charset)67 static const char *rccEncaGetMissing(const char *lang, const char *charset) {
68 int i;
69 for (i=0;rcc_enca_missing_corrections[i].enca_charset;i++) {
70 if (((!rcc_enca_missing_corrections[i].lang)||((lang)&&(!strcmp(lang, rcc_enca_missing_corrections[i].lang))))&&(!strcmp(charset, rcc_enca_missing_corrections[i].enca_charset)))
71 return rcc_enca_missing_corrections[i].iconv_charset;
72 }
73 return charset;
74 }
75
76
rccEncaInitContext(rcc_engine_context ctx)77 rcc_engine_internal rccEncaInitContext(rcc_engine_context ctx) {
78 #ifdef RCC_ENCA_SUPPORT
79 EncaAnalyser enca;
80
81 if ((!ctx)||(!ctx->config)) return NULL;
82
83 enca = enca_analyser_alloc(rccConfigGetLanguageName(ctx->config));
84 if (!enca) return NULL;
85
86 enca_set_threshold(enca, 1);
87 enca_set_multibyte(enca, 1);
88 enca_set_ambiguity(enca, 1);
89 enca_set_garbage_test(enca, 0);
90 enca_set_filtering(enca, 0);
91 enca_set_significant(enca,1);
92 enca_set_termination_strictness(enca,0);
93
94 return (rcc_engine_internal)enca;
95 #else /* RCC_ENCA_SUPPORT */
96 return NULL;
97 #endif /* RCC_ENCA_SUPPORT */
98 }
99
rccEncaFreeContext(rcc_engine_context ctx)100 void rccEncaFreeContext(rcc_engine_context ctx) {
101 rcc_engine_internal internal;
102 #ifdef RCC_ENCA_SUPPORT
103 internal = rccEngineGetInternal(ctx);
104 if (internal)
105 enca_analyser_free(internal);
106 #endif /* RCC_ENCA_SUPPORT */
107 }
108
rccEnca(rcc_engine_context ctx,const char * buf,int len)109 rcc_autocharset_id rccEnca(rcc_engine_context ctx, const char *buf, int len) {
110 #ifdef RCC_ENCA_SUPPORT
111 rcc_engine_internal internal;
112 const char *charset;
113 EncaEncoding ee;
114
115 internal = rccEngineGetInternal(ctx);
116 if ((!internal)||(!buf)) return (rcc_charset_id)-1;
117
118 ee = enca_analyse_const((EncaAnalyser)ctx->internal,(const unsigned char*)buf,len?len:strlen(buf));
119 if (ee.charset<0) return (rcc_charset_id)-1;
120
121 charset = enca_charset_name(ee.charset, ENCA_NAME_STYLE_ICONV);
122 if (charset) {
123 charset = rccEncaGetCorrection(rccEngineGetLanguage(ctx)->sn, charset);
124 } else {
125 charset = rccEncaGetMissing(rccEngineGetLanguage(ctx)->sn, enca_charset_name(ee.charset, ENCA_NAME_STYLE_ENCA));
126 }
127 return rccEngineGetAutoCharsetByName(ctx, charset);
128 #else /* RCC_ENCA_SUPPORT */
129 return (rcc_charset_id)-1;
130 #endif /* RCC_ENCA_SUPPORT */
131 }
132
133 rcc_engine rcc_enca_engine = {
134 "Enca Library", &rccEncaInitContext, &rccEncaFreeContext, &rccEnca, {"UTF-8", NULL}
135 };
136
137
rccEncaLibraryLoad()138 static int rccEncaLibraryLoad() {
139 #ifdef RCC_ENCA_DYNAMIC
140 if (enca_handle) return 0;
141
142 enca_handle = rccLibraryOpen(RCC_ENCA_LIB);
143 if (!enca_handle) return -1;
144
145 enca_set_multibyte=rccLibraryFind(enca_handle,"enca_set_multibyte");
146 enca_set_interpreted_surfaces=rccLibraryFind(enca_handle,"enca_set_interpreted_surfaces");
147 enca_set_ambiguity=rccLibraryFind(enca_handle,"enca_set_ambiguity");
148 enca_set_filtering=rccLibraryFind(enca_handle,"enca_set_filtering");
149 enca_set_garbage_test=rccLibraryFind(enca_handle,"enca_set_garbage_test");
150 enca_set_termination_strictness=rccLibraryFind(enca_handle,"enca_set_termination_strictness");
151 enca_set_significant=rccLibraryFind(enca_handle,"enca_set_significant");
152 enca_set_threshold=rccLibraryFind(enca_handle,"enca_set_threshold");
153 enca_charset_name=rccLibraryFind(enca_handle,"enca_charset_name");
154 enca_get_language_charsets=rccLibraryFind(enca_handle,"enca_get_language_charsets");
155 enca_analyser_alloc=rccLibraryFind(enca_handle,"enca_analyser_alloc");
156 enca_analyser_free=rccLibraryFind(enca_handle,"enca_analyser_free");
157 enca_analyse_const=rccLibraryFind(enca_handle,"enca_analyse_const");
158
159 if ((!enca_set_multibyte)||(!enca_set_interpreted_surfaces)||(!enca_set_ambiguity)||
160 (!enca_set_filtering)||(!enca_set_garbage_test)||(!enca_set_termination_strictness)||
161 (!enca_set_significant)||(!enca_set_threshold)||(!enca_charset_name)||
162 (!enca_get_language_charsets)||(!enca_analyser_alloc)||(!enca_analyser_free)||
163 (!enca_analyse_const)) {
164 rccLibraryClose(enca_handle);
165 enca_handle = NULL;
166 # ifdef RCC_DEBUG
167 perror( "rccEnca. Incomplete function set in library" );
168 # endif /* RCC_DEBUG */
169 return -1;
170 }
171
172 #endif /* RCC_ENCA_DYNAMIC */
173 return 0;
174 }
175
rccEncaLibraryUnload()176 static void rccEncaLibraryUnload() {
177 #ifdef RCC_ENCA_DYNAMIC
178 if (enca_handle) {
179 rccLibraryClose(enca_handle);
180 enca_handle = NULL;
181 }
182 #endif /* RCC_ENCA_DYNAMIC */
183 }
184
rccEncaInit()185 int rccEncaInit() {
186 int err;
187 unsigned int i,j,k,l;
188
189 rcc_engine **engines;
190
191 int *charsets;
192 size_t n_charsets;
193 const char *charset;
194
195 #ifdef RCC_ENCA_SUPPORT
196 if (enca_engines) return 0;
197 for (i=0;rcc_default_languages[i].sn;i++);
198 enca_engines = (rcc_engine*)malloc(i*sizeof(rcc_engine));
199 if (!enca_engines) return -1;
200
201 err = rccEncaLibraryLoad();
202 if (err) return 0;
203
204 for (i=0;rcc_default_languages[i].sn;i++) {
205 engines = rcc_default_languages[i].engines;
206 for (j=0;engines[j];j++)
207 if (j >= RCC_MAX_ENGINES) continue;
208
209 if (strlen(rcc_default_languages[i].sn)==2)
210 charsets = enca_get_language_charsets(rcc_default_languages[i].sn, &n_charsets);
211 else
212 charsets = NULL;
213 if (charsets) {
214 memcpy(enca_engines+i, &rcc_enca_engine, sizeof(rcc_engine));
215 for (k=0;enca_engines[i].charsets[k];k++);
216
217 if (n_charsets+k>=RCC_MAX_CHARSETS) n_charsets = RCC_MAX_CHARSETS-k;
218
219 for (l=0;l<n_charsets;l++) {
220 // Enca bug, STYLE_ICONV return's a lot of NULL's
221 charset = enca_charset_name(charsets[l], ENCA_NAME_STYLE_ICONV);
222 if (charset) {
223 charset = rccEncaGetCorrection(rcc_default_languages[i].sn, charset);
224 } else {
225 charset = rccEncaGetMissing(rcc_default_languages[i].sn, enca_charset_name(charsets[l], ENCA_NAME_STYLE_ENCA));
226 }
227 enca_engines[i].charsets[k++] = charset;
228 }
229 enca_engines[j].charsets[k] = NULL;
230
231 engines[j] = enca_engines + i;
232 engines[j+1] = NULL;
233
234 free(charsets);
235 }
236 }
237 #endif /* RCC_ENCA_SUPPORT */
238
239 return 0;
240 }
241
rccEncaFree()242 void rccEncaFree() {
243 #ifdef RCC_ENCA_SUPPORT
244 rccEncaLibraryUnload();
245 if (enca_engines) {
246 free(enca_engines);
247 enca_engines = NULL;
248 }
249 #endif /* RCC_ENCA_SUPPORT */
250 }
251