1 /*	$NetBSD: normalizer.c,v 1.4 2014/12/10 04:37:55 christos Exp $	*/
2 
3 #ifndef lint
4 static char *rcsid = "Id: normalizer.c,v 1.1 2003/06/04 00:26:05 marka Exp ";
5 #endif
6 
7 /*
8  * Copyright (c) 2000,2002 Japan Network Information Center.
9  * All rights reserved.
10  *
11  * By using this file, you agree to the terms and conditions set forth bellow.
12  *
13  * 			LICENSE TERMS AND CONDITIONS
14  *
15  * The following License Terms and Conditions apply, unless a different
16  * license is obtained from Japan Network Information Center ("JPNIC"),
17  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18  * Chiyoda-ku, Tokyo 101-0047, Japan.
19  *
20  * 1. Use, Modification and Redistribution (including distribution of any
21  *    modified or derived work) in source and/or binary forms is permitted
22  *    under this License Terms and Conditions.
23  *
24  * 2. Redistribution of source code must retain the copyright notices as they
25  *    appear in each source code file, this License Terms and Conditions.
26  *
27  * 3. Redistribution in binary form must reproduce the Copyright Notice,
28  *    this License Terms and Conditions, in the documentation and/or other
29  *    materials provided with the distribution.  For the purposes of binary
30  *    distribution the "Copyright Notice" refers to the following language:
31  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
32  *
33  * 4. The name of JPNIC may not be used to endorse or promote products
34  *    derived from this Software without specific prior written approval of
35  *    JPNIC.
36  *
37  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
41  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
48  */
49 
50 #include <config.h>
51 
52 #include <stddef.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <ctype.h>
56 
57 #include <idn/assert.h>
58 #include <idn/logmacro.h>
59 #include <idn/result.h>
60 #include <idn/normalizer.h>
61 #include <idn/strhash.h>
62 #include <idn/unormalize.h>
63 #include <idn/unicode.h>
64 #include <idn/ucs4.h>
65 #include <idn/debug.h>
66 #include <idn/util.h>
67 
68 #define MAX_LOCAL_SCHEME	3
69 
70 #define INITIALIZED		(scheme_hash != NULL)
71 
72 typedef struct {
73 	char *name;
74 	idn_normalizer_proc_t proc;
75 } normalize_scheme_t;
76 
77 struct idn_normalizer {
78 	int nschemes;
79 	int scheme_size;
80 	normalize_scheme_t **schemes;
81 	normalize_scheme_t *local_buf[MAX_LOCAL_SCHEME];
82 	int reference_count;
83 };
84 
85 static idn__strhash_t scheme_hash;
86 
87 static idn__unicode_version_t vcur = NULL;
88 static idn__unicode_version_t v320 = NULL;
89 #define INIT_VERSION(version, var) \
90 	if (var == NULL) { \
91 		idn_result_t r = idn__unicode_create(version, &var); \
92 		if (r != idn_success) \
93 			return (r); \
94 	}
95 
96 static idn_result_t	expand_schemes(idn_normalizer_t ctx);
97 static idn_result_t	register_standard_normalizers(void);
98 static idn_result_t	normalizer_formkc(const unsigned long *from,
99 					  unsigned long *to, size_t tolen);
100 static idn_result_t	normalizer_formkc_v320(const unsigned long *from,
101 					       unsigned long *to,
102 					       size_t tolen);
103 
104 static struct standard_normalizer {
105 	char *name;
106 	idn_normalizer_proc_t proc;
107 } standard_normalizer[] = {
108 	{ "unicode-form-kc", normalizer_formkc },
109 	{ "unicode-form-kc/3.2.0", normalizer_formkc_v320 },
110 	{ "RFC3491", normalizer_formkc_v320 },
111 	{ NULL, NULL },
112 };
113 
114 idn_result_t
115 idn_normalizer_initialize(void) {
116 	idn__strhash_t hash;
117 	idn_result_t r;
118 
119 	TRACE(("idn_normalizer_initialize()\n"));
120 
121 	if (scheme_hash != NULL) {
122 		r = idn_success;	/* already initialized */
123 		goto ret;
124 	}
125 
126 	if ((r = idn__strhash_create(&hash)) != idn_success)
127 		goto ret;
128 	scheme_hash = hash;
129 
130 	/* Register standard normalizers */
131 	r = register_standard_normalizers();
132 ret:
133 	TRACE(("idn_normalizer_initialize(): %s\n", idn_result_tostring(r)));
134 	return (r);
135 }
136 
137 idn_result_t
138 idn_normalizer_create(idn_normalizer_t *ctxp) {
139 	idn_normalizer_t ctx;
140 	idn_result_t r;
141 
142 	assert(ctxp != NULL);
143 	TRACE(("idn_normalizer_create()\n"));
144 
145 	if ((ctx = malloc(sizeof(struct idn_normalizer))) == NULL) {
146 		r = idn_nomemory;
147 		goto ret;
148 	}
149 
150 	ctx->nschemes = 0;
151 	ctx->scheme_size = MAX_LOCAL_SCHEME;
152 	ctx->schemes = ctx->local_buf;
153 	ctx->reference_count = 1;
154 	*ctxp = ctx;
155 
156 	r = idn_success;
157 ret:
158 	TRACE(("idn_normalizer_create(): %s\n", idn_result_tostring(r)));
159 	return (r);
160 }
161 
162 void
163 idn_normalizer_destroy(idn_normalizer_t ctx) {
164 	assert(ctx != NULL);
165 
166 	TRACE(("idn_normalizer_destroy()\n"));
167 
168 	ctx->reference_count--;
169 	if (ctx->reference_count <= 0) {
170 		TRACE(("idn_normalizer_destroy(): the object is destroyed\n"));
171 		if (ctx->schemes != ctx->local_buf)
172 			free(ctx->schemes);
173 		free(ctx);
174 	} else {
175 		TRACE(("idn_normalizer_destroy(): "
176 		       "update reference count (%d->%d)\n",
177 		       ctx->reference_count + 1, ctx->reference_count));
178 	}
179 }
180 
181 void
182 idn_normalizer_incrref(idn_normalizer_t ctx) {
183 	assert(ctx != NULL);
184 
185 	TRACE(("idn_normalizer_incrref()\n"));
186 	TRACE(("idn_normalizer_incrref: update reference count (%d->%d)\n",
187 	    ctx->reference_count, ctx->reference_count + 1));
188 
189 	ctx->reference_count++;
190 }
191 
192 idn_result_t
193 idn_normalizer_add(idn_normalizer_t ctx, const char *scheme_name) {
194 	idn_result_t r;
195 	void *v;
196 	normalize_scheme_t *scheme;
197 
198 	assert(ctx != NULL && scheme_name != NULL);
199 
200 	TRACE(("idn_normalizer_add(scheme_name=%s)\n", scheme_name));
201 
202 	assert(INITIALIZED);
203 
204 	if (idn__strhash_get(scheme_hash, scheme_name, &v) != idn_success) {
205 		ERROR(("idn_normalizer_add(): invalid scheme \"%-.30s\"\n",
206 		       scheme_name));
207 		r = idn_invalid_name;
208 		goto ret;
209 	}
210 
211 	scheme = v;
212 
213 	assert(ctx->nschemes <= ctx->scheme_size);
214 
215 	if (ctx->nschemes == ctx->scheme_size &&
216 	    (r = expand_schemes(ctx)) != idn_success) {
217 		goto ret;
218 	}
219 
220 	ctx->schemes[ctx->nschemes++] = scheme;
221 	r = idn_success;
222 ret:
223 	TRACE(("idn_normalizer_add(): %s\n", idn_result_tostring(r)));
224 	return (r);
225 }
226 
227 idn_result_t
228 idn_normalizer_addall(idn_normalizer_t ctx, const char **scheme_names,
229 		      int nschemes) {
230 	idn_result_t r;
231 	int i;
232 
233 	assert(ctx != NULL && scheme_names != NULL);
234 
235 	TRACE(("idn_normalizer_addall(nschemes=%d)\n", nschemes));
236 
237 	for (i = 0; i < nschemes; i++) {
238 		r = idn_normalizer_add(ctx, (const char *)*scheme_names);
239 		if (r != idn_success)
240 			goto ret;
241 		scheme_names++;
242 	}
243 
244 	r = idn_success;
245 ret:
246 	TRACE(("idn_normalizer_addall(): %s\n", idn_result_tostring(r)));
247 	return (r);
248 }
249 
250 idn_result_t
251 idn_normalizer_normalize(idn_normalizer_t ctx, const unsigned long *from,
252 			 unsigned long *to, size_t tolen) {
253 	idn_result_t r;
254 	unsigned long *src, *dst;
255 	unsigned long *buffers[2] = {NULL, NULL};
256 	size_t buflen[2] = {0, 0};
257 	size_t dstlen;
258 	int idx;
259 	int i;
260 
261 	assert(scheme_hash != NULL);
262 	assert(ctx != NULL && from != NULL && to != NULL);
263 
264 	TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n",
265 	       idn__debug_ucs4xstring(from, 50), (int)tolen));
266 
267 	if (ctx->nschemes <= 0) {
268 		if (tolen < idn_ucs4_strlen(from) + 1) {
269 			r = idn_buffer_overflow;
270 			goto ret;
271 		}
272 		idn_ucs4_strcpy(to, from);
273 		r = idn_success;
274 		goto ret;
275 	}
276 
277 	/*
278 	 * Normalize.
279 	 */
280 	src = (void *)from;
281 	dstlen = idn_ucs4_strlen(from) + 1;
282 
283 	i = 0;
284 	while (i < ctx->nschemes) {
285 		TRACE(("idn_normalizer_normalize(): normalize %s\n",
286 		       ctx->schemes[i]->name));
287 
288 		/*
289 		 * Choose destination area to restore the result of a mapping.
290 		 */
291 		if (i + 1 == ctx->nschemes) {
292 			dst = to;
293 			dstlen = tolen;
294 		} else {
295 			if (src == buffers[0])
296 				idx = 1;
297 			else
298 				idx = 0;
299 
300 			if (buflen[idx] < dstlen) {
301 				void *newbuf;
302 
303 				newbuf = realloc(buffers[idx],
304 						 sizeof(long) * dstlen);
305 				if (newbuf == NULL) {
306 					r = idn_nomemory;
307 					goto ret;
308 				}
309 				buffers[idx] = (unsigned long *)newbuf;
310 				buflen[idx] = dstlen;
311 			}
312 
313 			dst = buffers[idx];
314 			dstlen = buflen[idx];
315 		}
316 
317 		/*
318 		 * Perform i-th normalization scheme.
319 		 * If buffer size is not enough, we double it and try again.
320 		 */
321 		r = (ctx->schemes[i]->proc)(src, dst, dstlen);
322 		if (r == idn_buffer_overflow && dst != to) {
323 			dstlen *= 2;
324 			continue;
325 		}
326 		if (r != idn_success)
327 			goto ret;
328 
329 		src = dst;
330 		i++;
331 	}
332 
333 	r = idn_success;
334 ret:
335 	free(buffers[0]);
336 	free(buffers[1]);
337 	if (r == idn_success) {
338 		TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n",
339 		       idn__debug_ucs4xstring(to, 50)));
340 	} else {
341 		TRACE(("idn_normalizer_normalize(): %s\n",
342 		       idn_result_tostring(r)));
343 	}
344 	return (r);
345 }
346 
347 idn_result_t
348 idn_normalizer_register(const char *scheme_name, idn_normalizer_proc_t proc) {
349 	idn_result_t r;
350 	normalize_scheme_t *scheme;
351 
352 	assert(scheme_name != NULL && proc != NULL);
353 
354 	TRACE(("idn_normalizer_register(scheme_name=%s)\n", scheme_name));
355 
356 	assert(INITIALIZED);
357 
358 	scheme = malloc(sizeof(*scheme) + strlen(scheme_name) + 1);
359 	if (scheme == NULL) {
360 		r = idn_nomemory;
361 		goto ret;
362 	}
363 	scheme->name = (char *)(scheme + 1);
364 	(void)strcpy(scheme->name, scheme_name);
365 	scheme->proc = proc;
366 
367 	r = idn__strhash_put(scheme_hash, scheme_name, scheme);
368 	if (r != idn_success)
369 		goto ret;
370 
371 	r = idn_success;
372 ret:
373 	TRACE(("idn_normalizer_register(): %s\n", idn_result_tostring(r)));
374 	return (r);
375 }
376 
377 static idn_result_t
378 expand_schemes(idn_normalizer_t ctx) {
379 	normalize_scheme_t **new_schemes;
380 	int new_size = ctx->scheme_size * 2;
381 
382 	if (ctx->schemes == ctx->local_buf) {
383 		new_schemes = malloc(sizeof(normalize_scheme_t) * new_size);
384 	} else {
385 		new_schemes = realloc(ctx->schemes,
386 				      sizeof(normalize_scheme_t) * new_size);
387 	}
388 	if (new_schemes == NULL)
389 		return (idn_nomemory);
390 
391 	if (ctx->schemes == ctx->local_buf)
392 		memcpy(new_schemes, ctx->local_buf, sizeof(ctx->local_buf));
393 
394 	ctx->schemes = new_schemes;
395 	ctx->scheme_size = new_size;
396 
397 	return (idn_success);
398 }
399 
400 static idn_result_t
401 register_standard_normalizers(void) {
402 	int i;
403 	int failed = 0;
404 
405 	for (i = 0; standard_normalizer[i].name != NULL; i++) {
406 		idn_result_t r;
407 		r = idn_normalizer_register(standard_normalizer[i].name,
408 					    standard_normalizer[i].proc);
409 		if (r != idn_success) {
410 			WARNING(("idn_normalizer_initialize(): "
411 				"failed to register \"%-.100s\"\n",
412 				standard_normalizer[i].name));
413 			failed++;
414 		}
415 	}
416 	if (failed > 0)
417 		return (idn_failure);
418 	else
419 		return (idn_success);
420 }
421 
422 /*
423  * Unicode Normalization Forms -- latest version
424  */
425 
426 static idn_result_t
427 normalizer_formkc(const unsigned long *from, unsigned long *to, size_t tolen) {
428 	INIT_VERSION(NULL, vcur);
429 	return (idn__unormalize_formkc(vcur, from, to, tolen));
430 }
431 
432 /*
433  * Unicode Normalization Forms -- version 3.2.0
434  */
435 
436 static idn_result_t
437 normalizer_formkc_v320(const unsigned long *from, unsigned long *to,
438 		       size_t tolen) {
439 	INIT_VERSION("3.2.0", v320);
440 	return (idn__unormalize_formkc(v320, from, to, tolen));
441 }
442