xref: /minix/lib/libintl/gettext_iconv.c (revision fb9c64b2)
1 /*	$NetBSD: gettext_iconv.c,v 1.8 2009/02/18 13:08:22 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 2004 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $Citrus$
29  */
30 
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 
35 #include <errno.h>
36 #include <iconv.h>
37 #include <libintl.h>
38 #include <langinfo.h>
39 #include <search.h>
40 #include <stdlib.h>
41 #include <string.h>
42 
43 #include "libintl_local.h"
44 
45 struct cache {
46 	const char *c_origmsg;
47 	const char *c_resultmsg;
48 };
49 
50 static const struct cache *cache_find(const char *, struct domainbinding *);
51 static int cache_enter(const char *, const char *);
52 static int cache_cmp(const void *, const void *);
53 
54 static void *cacheroot;
55 
56 /* ARGSUSED1 */
57 static const struct cache *
58 cache_find(const char *msg, struct domainbinding *db)
59 {
60 	struct cache key;
61 	struct cache **c;
62 
63 	key.c_origmsg = msg;
64 	c = tfind(&key, &cacheroot, cache_cmp);
65 
66 	return c ? *c : NULL;
67 }
68 
69 static int
70 cache_enter(const char *origmsg, const char *resultmsg)
71 {
72 	struct cache *c;
73 
74 	c = malloc(sizeof(*c));
75 	if (c == NULL)
76 		return -1;
77 
78 	c->c_origmsg = origmsg;
79 	c->c_resultmsg = resultmsg;
80 
81 	if (tsearch(c, &cacheroot, cache_cmp) == NULL) {
82 		free(c);
83 		return -1;
84 	}
85 
86 	return 0;
87 }
88 
89 static int
90 cache_cmp(const void *va, const void *vb)
91 {
92 	const struct cache *a = va;
93 	const struct cache *b = vb;
94 	int result;
95 
96 	if (a->c_origmsg > b->c_origmsg) {
97 		result = 1;
98 	} else if (a->c_origmsg < b->c_origmsg) {
99 		result = -1;
100 	} else {
101 		result = 0;
102 	}
103 
104 	return result;
105 }
106 
107 #define	GETTEXT_ICONV_MALLOC_CHUNK	(16 * 1024)
108 
109 const char *
110 __gettext_iconv(const char *origmsg, struct domainbinding *db)
111 {
112 	const char *tocode;
113 	const char *fromcode = db->mohandle.mo.mo_charset;
114 	const struct cache *cache;
115 	const char *result;
116 	iconv_t cd;
117 	const char *src;
118 	char *dst;
119 	size_t origlen;
120 	size_t srclen;
121 	size_t dstlen;
122 	size_t nvalid;
123 	int savederrno = errno;
124 
125 	/*
126 	 * static buffer for converted texts.
127 	 *
128 	 * note:
129 	 * we never free buffers once returned to callers.
130 	 * because of interface design of gettext, we can't know
131 	 * the lifetime of them.
132 	 */
133 	static char *buffer;
134 	static size_t bufferlen;
135 
136 	/*
137 	 * don't convert message if *.mo doesn't specify codeset.
138 	 */
139 	if (fromcode == NULL)
140 		return origmsg;
141 
142 	tocode = db->codeset;
143 	if (tocode == NULL) {
144 		/*
145 		 * codeset isn't specified explicitly by
146 		 * bind_textdomain_codeset().
147 		 * use current locale(LC_CTYPE)'s codeset.
148 		 *
149 		 * XXX maybe wrong; it can mismatch with
150 		 * environment variable setting.
151 		 */
152 		tocode = nl_langinfo(CODESET);
153 	}
154 
155 	/*
156 	 * shortcut if possible.
157 	 * XXX should handle aliases
158 	 */
159 	if (!strcasecmp(tocode, fromcode))
160 		return origmsg;
161 
162 	/* XXX LOCK */
163 
164 	/* XXX should detect change of tocode and purge caches? */
165 
166 	/*
167 	 * see if we have already converted this message.
168 	 */
169 	cache = cache_find(origmsg, db);
170 	if (cache) {
171 		result = cache->c_resultmsg;
172 		goto out;
173 	}
174 
175 	origlen = strlen(origmsg) + 1;
176 again:
177 	cd = iconv_open(tocode, fromcode);
178 	if (cd == (iconv_t)-1) {
179 		result = origmsg;
180 		goto out;
181 	}
182 
183 	src = origmsg;
184 	srclen = origlen;
185 	dst = buffer;
186 	dstlen = bufferlen;
187 	nvalid = iconv(cd, &src, &srclen, &dst, &dstlen);
188 	iconv_close(cd);
189 
190 	if (nvalid == (size_t)-1) {
191 		/*
192 		 * try to allocate a new buffer.
193 		 *
194 		 * just give up if GETTEXT_ICONV_MALLOC_CHUNK was not enough.
195 		 */
196 		if (errno == E2BIG &&
197 		    bufferlen != GETTEXT_ICONV_MALLOC_CHUNK) {
198 			buffer = malloc(GETTEXT_ICONV_MALLOC_CHUNK);
199 			if (buffer) {
200 				bufferlen = GETTEXT_ICONV_MALLOC_CHUNK;
201 				goto again;
202 			}
203 		}
204 
205 		result = origmsg;
206 	} else if (cache_enter(origmsg, buffer)) {
207 		/*
208 		 * failed to enter cache.  give up.
209 		 */
210 		result = origmsg;
211 	} else {
212 		size_t resultlen = dst - buffer;
213 
214 		result = buffer;
215 		bufferlen -= resultlen;
216 		buffer += resultlen;
217 	}
218 
219 out:
220 	/* XXX UNLOCK */
221 	errno = savederrno;
222 
223 	return result;
224 }
225