1 /*
2   LibRCC - module providing charset recoding for the specified language
3 
4   Copyright (C) 2005-2008 Suren A. Chilingaryan <csa@dside.dyndns.org>
5 
6   This library is free software; you can redistribute it and/or modify it
7   under the terms of the GNU Lesser General Public License version 2.1 or later
8   as published by the Free Software Foundation.
9 
10   This library is distributed in the hope that it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
13   for more details.
14 
15   You should have received a copy of the GNU Lesser General Public License
16   along with this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 
24 #include "../config.h"
25 
26 #include "internal.h"
27 #include "fs.h"
28 
rccConfigDetectCharsetInternal(rcc_language_config config,rcc_class_id class_id,const char * buf,size_t len)29 static rcc_autocharset_id rccConfigDetectCharsetInternal(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
30     int err;
31     rcc_context ctx;
32     rcc_class_type class_type;
33     rcc_autocharset_id autocharset_id;
34 
35     if ((!buf)||(!config)) return (rcc_autocharset_id)-1;
36 
37     ctx = config->ctx;
38 
39     err = rccConfigConfigure(config);
40     if (err) return (rcc_autocharset_id)-1;
41 
42     class_type = rccGetClassType(ctx, class_id);
43     if ((class_type != RCC_CLASS_FS)||((class_type == RCC_CLASS_FS)&&(rccGetOption(ctx, RCC_OPTION_AUTODETECT_FS_TITLES)))) {
44 	rccMutexLock(config->mutex);
45 	autocharset_id = rccEngineDetectCharset(&config->engine_ctx, buf, len);
46 	rccMutexUnLock(config->mutex);
47 	return autocharset_id;
48     }
49 
50     return (rcc_autocharset_id)-1;
51 }
52 
53 
rccConfigDetectCharset(rcc_language_config config,rcc_class_id class_id,const char * buf,size_t len)54 rcc_autocharset_id rccConfigDetectCharset(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
55     return rccConfigDetectCharsetInternal(config, class_id, buf, len);
56 }
57 
rccConfigSizedFrom(rcc_language_config config,rcc_class_id class_id,const char * buf,size_t len)58 rcc_string rccConfigSizedFrom(rcc_language_config config, rcc_class_id class_id, const char *buf, size_t len) {
59     rcc_context ctx;
60     rcc_class_type class_type;
61     rcc_string result;
62     rcc_option_value usedb4;
63     rcc_autocharset_id charset_id;
64     const char *charset;
65 
66 
67     if (!config) return NULL;
68     ctx = config->ctx;
69 
70     if (rccStringSizedCheck(buf, len)) return NULL;
71 
72     usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
73 
74     if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
75 	result = rccDb4GetKey(ctx->db4ctx, buf, len);
76 	if (result) {
77 	     if (rccStringFixID(result, ctx)) free(result);
78 	     else return result;
79 	}
80     }
81 
82     class_type = rccGetClassType(ctx, class_id);
83 
84     if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
85     else charset_id = rccConfigDetectCharset(config, class_id, buf, len);
86     if (charset_id != (rcc_autocharset_id)-1)
87 	charset = rccConfigGetAutoCharsetName(config, charset_id);
88     else
89 	charset = rccConfigGetCurrentCharsetName(config, class_id);
90 
91     if (charset) {
92 	result = rccSizedFromCharset(ctx, charset, buf, len);
93 	if (result) rccStringChangeID(result, rccGetLanguageByName(ctx, config->language->sn));
94 	return result;
95     }
96 
97     return NULL;
98 }
99 
100 /* The supplied config have priority over language tag in the buf! */
rccConfigSizedTo(rcc_language_config config,rcc_class_id class_id,rcc_const_string buf,size_t * rlen)101 char *rccConfigSizedTo(rcc_language_config config, rcc_class_id class_id, rcc_const_string buf, size_t *rlen) {
102     char *result;
103     const char *charset;
104 
105     if (!config) return NULL;
106 
107     if ((rccGetClassType(config->ctx, class_id) == RCC_CLASS_FS)&&(rccGetOption(config->ctx, RCC_OPTION_AUTODETECT_FS_NAMES))) {
108 	result = rccFS5(config->ctx, config, class_id, rccStringGetString(buf));
109 	if (result) {
110 	    if (rlen) *rlen = strlen(result);
111 	    return result;
112 	}
113     }
114 
115     charset = rccConfigGetCurrentCharsetName(config, class_id);
116 
117     if (charset)
118 	return rccSizedToCharset(config->ctx, charset, buf, rlen);
119 
120     return NULL;
121 }
122 
123 
rccConfigSizedRecode(rcc_language_config config,rcc_class_id from,rcc_class_id to,const char * buf,size_t len,size_t * rlen)124 char *rccConfigSizedRecode(rcc_language_config config, rcc_class_id from, rcc_class_id to, const char *buf, size_t len, size_t *rlen) {
125     rcc_context ctx;
126     rcc_class_type class_type;
127     rcc_string result;
128     rcc_option_value usedb4;
129     rcc_autocharset_id charset_id;
130     rcc_string stmp;
131     const char *tocharset, *fromcharset;
132 
133 
134     if (!config) return NULL;
135     ctx = config->ctx;
136 
137     if (rccStringSizedCheck(buf, len)) return NULL;
138 
139     usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
140 
141     if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
142 	stmp = rccDb4GetKey(ctx->db4ctx, buf, len);
143 	if (stmp) {
144 	     if (rccStringFixID(stmp, ctx)) free(stmp);
145 	     else {
146 		result = rccConfigSizedTo(config, to, stmp, rlen);
147 		free(stmp);
148 		return result;
149 	    }
150 	}
151     }
152 
153     class_type = rccGetClassType(ctx, from);
154 
155     if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
156     else charset_id = rccConfigDetectCharset(config, from, buf, len);
157     if (charset_id != (rcc_autocharset_id)-1)
158 	fromcharset = rccConfigGetAutoCharsetName(config, charset_id);
159     else
160 	fromcharset = rccConfigGetCurrentCharsetName(config, from);
161 
162     tocharset = rccConfigGetCurrentCharsetName(config, to);
163 
164     if ((fromcharset)&&(tocharset))
165 	return rccSizedRecodeCharsets(ctx, fromcharset, tocharset, buf, len, rlen);
166 
167     return NULL;
168 
169 }
170 
171 
rccConfigSizedRecodeToCharset(rcc_language_config config,rcc_class_id class_id,const char * charset,rcc_const_string buf,size_t len,size_t * rlen)172 char *rccConfigSizedRecodeToCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, rcc_const_string buf, size_t len, size_t *rlen) {
173     rcc_context ctx;
174     rcc_class_type class_type;
175     rcc_string result;
176     rcc_option_value usedb4;
177     rcc_autocharset_id charset_id;
178     rcc_string stmp;
179     const char *ocharset;
180 
181 
182     if (!config) return NULL;
183     ctx = config->ctx;
184 
185     if (rccStringSizedCheck(buf, len)) return NULL;
186 
187     usedb4 = rccGetOption(ctx, RCC_OPTION_LEARNING_MODE);
188 
189     if (usedb4&RCC_OPTION_LEARNING_FLAG_USE) {
190 	stmp = rccDb4GetKey(ctx->db4ctx, buf, len);
191 	if (stmp) {
192 	     if (rccStringFixID(stmp, ctx)) free(stmp);
193 	     else {
194 		result = rccSizedToCharset(ctx, charset, stmp, rlen);
195 		free(stmp);
196 	        return result;
197 	    }
198 	}
199     }
200 
201     class_type = rccGetClassType(ctx, class_id);
202 
203     if (class_type == RCC_CLASS_KNOWN) charset_id = (rcc_autocharset_id)-1;
204     else charset_id = rccConfigDetectCharset(config, class_id, buf, len);
205     if (charset_id != (rcc_autocharset_id)-1)
206 	ocharset = rccConfigGetAutoCharsetName(config, charset_id);
207     else
208 	ocharset = rccConfigGetCurrentCharsetName(config, class_id);
209 
210     if (ocharset)
211 	return rccSizedRecodeCharsets(ctx, ocharset, charset, buf, len, rlen);
212 
213     return NULL;
214 }
215 
rccConfigSizedRecodeFromCharset(rcc_language_config config,rcc_class_id class_id,const char * charset,const char * buf,size_t len,size_t * rlen)216 char *rccConfigSizedRecodeFromCharset(rcc_language_config config, rcc_class_id class_id, const char *charset, const char *buf, size_t len, size_t *rlen) {
217     rcc_context ctx;
218     const char *ocharset;
219 
220     if (!config) return NULL;
221     ctx = config->ctx;
222 
223     ocharset = rccConfigGetCurrentCharsetName(config, class_id);
224 
225     if (ocharset)
226 	return rccSizedRecodeCharsets(ctx, charset, ocharset, buf, len, rlen);
227 
228     return NULL;
229 }
230