1 /*
2 LibRCC - module providing charset recoding for the specified language
3
4 Copyright (C) 2005-2008 Suren A. Chilingaryan <csa@dside.dyndns.org>
5
6 This library is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License version 2.1 or later
8 as published by the Free Software Foundation.
9
10 This library is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
13 for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "../config.h"
25
26 #include "internal.h"
27 #include "fs.h"
28
rccConfigDetectCharsetInternal(rcc_language_config config,rcc_class_id class_id,const char * buf,size_t len)29 static rcc_autocharset_id rccConfigDetectCharsetInternal(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
30 int err;
31 rcc_context ctx;
32 rcc_class_type class_type;
33 rcc_autocharset_id autocharset_id;
34
35 if ((!buf)||(!config)) return (rcc_autocharset_id)-1;
36
37 ctx = config->ctx;
38
39 err = rccConfigConfigure(config);
40 if (err) return (rcc_autocharset_id)-1;
41
42 class_type = rccGetClassType(ctx, class_id);
43 if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) {
44 rccMutexLock(config->mutex);
45 autocharset_id = rccEngineDetectCharset(&config->engine_ctx, buf, len);
46 rccMutexUnLock(config->mutex);
47 return autocharset_id;
48 }
49
50 return (rcc_autocharset_id)-1;
51 }
52
53
rccConfigDetectCharset(rcc_language_config config,rcc_class_id class_id,const char * buf,size_t len)54 rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
55 return rccConfigDetectCharsetInternal(config, class_id, buf, len);
56 }
57
rccConfigSizedFrom(rcc_language_config config,rcc_class_id class_id,const char * buf,size_t len)58 rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
59 rcc_context ctx;
60 rcc_class_type class_type;
61 rcc_string result;
62 rcc_option_value usedb4;
63 rcc_autocharset_id charset_id;
64 const char *charset;
65
66
67 if (!config) return NULL;
68 ctx = config->ctx;
69
70 if (rccStringSizedCheck(buf, len)) return NULL;
71
72 usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
73
74 if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
75 result = rccDb4GetKey(ctx->db4ctx, buf, len);
76 if (result) {
77 if (rccStringFixID(result, ctx)) free(result);
78 else return result;
79 }
80 }
81
82 class_type = rccGetClassType(ctx, class_id);
83
84 if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
85 else charset_id = rccConfigDetectCharset(config, class_id, buf, len);
86 if (charset_id != (rcc_autocharset_id)-1)
87 charset = rccConfigGetAutoCharsetName(config, charset_id);
88 else
89 charset = rccConfigGetCurrentCharsetName(config, class_id);
90
91 if (charset) {
92 result = rccSizedFromCharset(ctx, charset, buf, len);
93 if (result) rccStringChangeID(result, rccGetLanguageByName(ctx, config->language->sn));
94 return result;
95 }
96
97 return NULL;
98 }
99
100 /* The supplied config have priority over language tag in the buf! */
rccConfigSizedTo(rcc_language_config config,rcc_class_id class_id,rcc_const_string buf,size_t * rlen)101 char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_const_string buf, size_t *rlen) {
102 char *result;
103 const char *charset;
104
105 if (!config) return NULL;
106
107 if ((rccGetClassType(config->ctx, class_id) == RCC_CLASS_FS)&&(rccGetOption(config->ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) {
108 result = rccFS5(config->ctx, config, class_id, rccStringGetString(buf));
109 if (result) {
110 if (rlen) *rlen = strlen(result);
111 return result;
112 }
113 }
114
115 charset = rccConfigGetCurrentCharsetName(config, class_id);
116
117 if (charset)
118 return rccSizedToCharset(config->ctx, charset, buf, rlen);
119
120 return NULL;
121 }
122
123
rccConfigSizedRecode(rcc_language_config config,rcc_class_id from,rcc_class_id to,const char * buf,size_t len,size_t * rlen)124 char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) {
125 rcc_context ctx;
126 rcc_class_type class_type;
127 rcc_string result;
128 rcc_option_value usedb4;
129 rcc_autocharset_id charset_id;
130 rcc_string stmp;
131 const char *tocharset, *fromcharset;
132
133
134 if (!config) return NULL;
135 ctx = config->ctx;
136
137 if (rccStringSizedCheck(buf, len)) return NULL;
138
139 usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
140
141 if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
142 stmp = rccDb4GetKey(ctx->db4ctx, buf, len);
143 if (stmp) {
144 if (rccStringFixID(stmp, ctx)) free(stmp);
145 else {
146 result = rccConfigSizedTo(config, to, stmp, rlen);
147 free(stmp);
148 return result;
149 }
150 }
151 }
152
153 class_type = rccGetClassType(ctx, from);
154
155 if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
156 else charset_id = rccConfigDetectCharset(config, from, buf, len);
157 if (charset_id != (rcc_autocharset_id)-1)
158 fromcharset = rccConfigGetAutoCharsetName(config, charset_id);
159 else
160 fromcharset = rccConfigGetCurrentCharsetName(config, from);
161
162 tocharset = rccConfigGetCurrentCharsetName(config, to);
163
164 if ((fromcharset)&&(tocharset))
165 return rccSizedRecodeCharsets(ctx, fromcharset, tocharset, buf, len, rlen);
166
167 return NULL;
168
169 }
170
171
rccConfigSizedRecodeToCharset(rcc_language_config config,rcc_class_id class_id,const char * charset,rcc_const_string buf,size_t len,size_t * rlen)172 char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen) {
173 rcc_context ctx;
174 rcc_class_type class_type;
175 rcc_string result;
176 rcc_option_value usedb4;
177 rcc_autocharset_id charset_id;
178 rcc_string stmp;
179 const char *ocharset;
180
181
182 if (!config) return NULL;
183 ctx = config->ctx;
184
185 if (rccStringSizedCheck(buf, len)) return NULL;
186
187 usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
188
189 if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
190 stmp = rccDb4GetKey(ctx->db4ctx, buf, len);
191 if (stmp) {
192 if (rccStringFixID(stmp, ctx)) free(stmp);
193 else {
194 result = rccSizedToCharset(ctx, charset, stmp, rlen);
195 free(stmp);
196 return result;
197 }
198 }
199 }
200
201 class_type = rccGetClassType(ctx, class_id);
202
203 if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
204 else charset_id = rccConfigDetectCharset(config, class_id, buf, len);
205 if (charset_id != (rcc_autocharset_id)-1)
206 ocharset = rccConfigGetAutoCharsetName(config, charset_id);
207 else
208 ocharset = rccConfigGetCurrentCharsetName(config, class_id);
209
210 if (ocharset)
211 return rccSizedRecodeCharsets(ctx, ocharset, charset, buf, len, rlen);
212
213 return NULL;
214 }
215
rccConfigSizedRecodeFromCharset(rcc_language_config config,rcc_class_id class_id,const char * charset,const char * buf,size_t len,size_t * rlen)216 char *rccConfigSizedRecodeFromCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) {
217 rcc_context ctx;
218 const char *ocharset;
219
220 if (!config) return NULL;
221 ctx = config->ctx;
222
223 ocharset = rccConfigGetCurrentCharsetName(config, class_id);
224
225 if (ocharset)
226 return rccSizedRecodeCharsets(ctx, charset, ocharset, buf, len, rlen);
227
228 return NULL;
229 }
230