1 /* $NetBSD: normalizer.c,v 1.4 2014/12/10 04:37:55 christos Exp $ */ 2 3 #ifndef lint 4 static char *rcsid = "Id: normalizer.c,v 1.1 2003/06/04 00:26:05 marka Exp "; 5 #endif 6 7 /* 8 * Copyright (c) 2000,2002 Japan Network Information Center. 9 * All rights reserved. 10 * 11 * By using this file, you agree to the terms and conditions set forth bellow. 12 * 13 * LICENSE TERMS AND CONDITIONS 14 * 15 * The following License Terms and Conditions apply, unless a different 16 * license is obtained from Japan Network Information Center ("JPNIC"), 17 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 18 * Chiyoda-ku, Tokyo 101-0047, Japan. 19 * 20 * 1. Use, Modification and Redistribution (including distribution of any 21 * modified or derived work) in source and/or binary forms is permitted 22 * under this License Terms and Conditions. 23 * 24 * 2. Redistribution of source code must retain the copyright notices as they 25 * appear in each source code file, this License Terms and Conditions. 26 * 27 * 3. Redistribution in binary form must reproduce the Copyright Notice, 28 * this License Terms and Conditions, in the documentation and/or other 29 * materials provided with the distribution. For the purposes of binary 30 * distribution the "Copyright Notice" refers to the following language: 31 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 32 * 33 * 4. The name of JPNIC may not be used to endorse or promote products 34 * derived from this Software without specific prior written approval of 35 * JPNIC. 36 * 37 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 40 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 42 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 43 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 44 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 47 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 48 */ 49 50 #include <config.h> 51 52 #include <stddef.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <ctype.h> 56 57 #include <idn/assert.h> 58 #include <idn/logmacro.h> 59 #include <idn/result.h> 60 #include <idn/normalizer.h> 61 #include <idn/strhash.h> 62 #include <idn/unormalize.h> 63 #include <idn/unicode.h> 64 #include <idn/ucs4.h> 65 #include <idn/debug.h> 66 #include <idn/util.h> 67 68 #define MAX_LOCAL_SCHEME 3 69 70 #define INITIALIZED (scheme_hash != NULL) 71 72 typedef struct { 73 char *name; 74 idn_normalizer_proc_t proc; 75 } normalize_scheme_t; 76 77 struct idn_normalizer { 78 int nschemes; 79 int scheme_size; 80 normalize_scheme_t **schemes; 81 normalize_scheme_t *local_buf[MAX_LOCAL_SCHEME]; 82 int reference_count; 83 }; 84 85 static idn__strhash_t scheme_hash; 86 87 static idn__unicode_version_t vcur = NULL; 88 static idn__unicode_version_t v320 = NULL; 89 #define INIT_VERSION(version, var) \ 90 if (var == NULL) { \ 91 idn_result_t r = idn__unicode_create(version, &var); \ 92 if (r != idn_success) \ 93 return (r); \ 94 } 95 96 static idn_result_t expand_schemes(idn_normalizer_t ctx); 97 static idn_result_t register_standard_normalizers(void); 98 static idn_result_t normalizer_formkc(const unsigned long *from, 99 unsigned long *to, size_t tolen); 100 static idn_result_t normalizer_formkc_v320(const unsigned long *from, 101 unsigned long *to, 102 size_t tolen); 103 104 static struct standard_normalizer { 105 char *name; 106 idn_normalizer_proc_t proc; 107 } standard_normalizer[] = { 108 { "unicode-form-kc", normalizer_formkc }, 109 { "unicode-form-kc/3.2.0", normalizer_formkc_v320 }, 110 { "RFC3491", normalizer_formkc_v320 }, 111 { NULL, NULL }, 112 }; 113 114 idn_result_t 115 idn_normalizer_initialize(void) { 116 idn__strhash_t hash; 117 idn_result_t r; 118 119 TRACE(("idn_normalizer_initialize()\n")); 120 121 if (scheme_hash != NULL) { 122 r = idn_success; /* already initialized */ 123 goto ret; 124 } 125 126 if ((r = idn__strhash_create(&hash)) != idn_success) 127 goto ret; 128 scheme_hash = hash; 129 130 /* Register standard normalizers */ 131 r = register_standard_normalizers(); 132 ret: 133 TRACE(("idn_normalizer_initialize(): %s\n", idn_result_tostring(r))); 134 return (r); 135 } 136 137 idn_result_t 138 idn_normalizer_create(idn_normalizer_t *ctxp) { 139 idn_normalizer_t ctx; 140 idn_result_t r; 141 142 assert(ctxp != NULL); 143 TRACE(("idn_normalizer_create()\n")); 144 145 if ((ctx = malloc(sizeof(struct idn_normalizer))) == NULL) { 146 r = idn_nomemory; 147 goto ret; 148 } 149 150 ctx->nschemes = 0; 151 ctx->scheme_size = MAX_LOCAL_SCHEME; 152 ctx->schemes = ctx->local_buf; 153 ctx->reference_count = 1; 154 *ctxp = ctx; 155 156 r = idn_success; 157 ret: 158 TRACE(("idn_normalizer_create(): %s\n", idn_result_tostring(r))); 159 return (r); 160 } 161 162 void 163 idn_normalizer_destroy(idn_normalizer_t ctx) { 164 assert(ctx != NULL); 165 166 TRACE(("idn_normalizer_destroy()\n")); 167 168 ctx->reference_count--; 169 if (ctx->reference_count <= 0) { 170 TRACE(("idn_normalizer_destroy(): the object is destroyed\n")); 171 if (ctx->schemes != ctx->local_buf) 172 free(ctx->schemes); 173 free(ctx); 174 } else { 175 TRACE(("idn_normalizer_destroy(): " 176 "update reference count (%d->%d)\n", 177 ctx->reference_count + 1, ctx->reference_count)); 178 } 179 } 180 181 void 182 idn_normalizer_incrref(idn_normalizer_t ctx) { 183 assert(ctx != NULL); 184 185 TRACE(("idn_normalizer_incrref()\n")); 186 TRACE(("idn_normalizer_incrref: update reference count (%d->%d)\n", 187 ctx->reference_count, ctx->reference_count + 1)); 188 189 ctx->reference_count++; 190 } 191 192 idn_result_t 193 idn_normalizer_add(idn_normalizer_t ctx, const char *scheme_name) { 194 idn_result_t r; 195 void *v; 196 normalize_scheme_t *scheme; 197 198 assert(ctx != NULL && scheme_name != NULL); 199 200 TRACE(("idn_normalizer_add(scheme_name=%s)\n", scheme_name)); 201 202 assert(INITIALIZED); 203 204 if (idn__strhash_get(scheme_hash, scheme_name, &v) != idn_success) { 205 ERROR(("idn_normalizer_add(): invalid scheme \"%-.30s\"\n", 206 scheme_name)); 207 r = idn_invalid_name; 208 goto ret; 209 } 210 211 scheme = v; 212 213 assert(ctx->nschemes <= ctx->scheme_size); 214 215 if (ctx->nschemes == ctx->scheme_size && 216 (r = expand_schemes(ctx)) != idn_success) { 217 goto ret; 218 } 219 220 ctx->schemes[ctx->nschemes++] = scheme; 221 r = idn_success; 222 ret: 223 TRACE(("idn_normalizer_add(): %s\n", idn_result_tostring(r))); 224 return (r); 225 } 226 227 idn_result_t 228 idn_normalizer_addall(idn_normalizer_t ctx, const char **scheme_names, 229 int nschemes) { 230 idn_result_t r; 231 int i; 232 233 assert(ctx != NULL && scheme_names != NULL); 234 235 TRACE(("idn_normalizer_addall(nschemes=%d)\n", nschemes)); 236 237 for (i = 0; i < nschemes; i++) { 238 r = idn_normalizer_add(ctx, (const char *)*scheme_names); 239 if (r != idn_success) 240 goto ret; 241 scheme_names++; 242 } 243 244 r = idn_success; 245 ret: 246 TRACE(("idn_normalizer_addall(): %s\n", idn_result_tostring(r))); 247 return (r); 248 } 249 250 idn_result_t 251 idn_normalizer_normalize(idn_normalizer_t ctx, const unsigned long *from, 252 unsigned long *to, size_t tolen) { 253 idn_result_t r; 254 unsigned long *src, *dst; 255 unsigned long *buffers[2] = {NULL, NULL}; 256 size_t buflen[2] = {0, 0}; 257 size_t dstlen; 258 int idx; 259 int i; 260 261 assert(scheme_hash != NULL); 262 assert(ctx != NULL && from != NULL && to != NULL); 263 264 TRACE(("idn_normalizer_normalize(from=\"%s\", tolen=%d)\n", 265 idn__debug_ucs4xstring(from, 50), (int)tolen)); 266 267 if (ctx->nschemes <= 0) { 268 if (tolen < idn_ucs4_strlen(from) + 1) { 269 r = idn_buffer_overflow; 270 goto ret; 271 } 272 idn_ucs4_strcpy(to, from); 273 r = idn_success; 274 goto ret; 275 } 276 277 /* 278 * Normalize. 279 */ 280 src = (void *)from; 281 dstlen = idn_ucs4_strlen(from) + 1; 282 283 i = 0; 284 while (i < ctx->nschemes) { 285 TRACE(("idn_normalizer_normalize(): normalize %s\n", 286 ctx->schemes[i]->name)); 287 288 /* 289 * Choose destination area to restore the result of a mapping. 290 */ 291 if (i + 1 == ctx->nschemes) { 292 dst = to; 293 dstlen = tolen; 294 } else { 295 if (src == buffers[0]) 296 idx = 1; 297 else 298 idx = 0; 299 300 if (buflen[idx] < dstlen) { 301 void *newbuf; 302 303 newbuf = realloc(buffers[idx], 304 sizeof(long) * dstlen); 305 if (newbuf == NULL) { 306 r = idn_nomemory; 307 goto ret; 308 } 309 buffers[idx] = (unsigned long *)newbuf; 310 buflen[idx] = dstlen; 311 } 312 313 dst = buffers[idx]; 314 dstlen = buflen[idx]; 315 } 316 317 /* 318 * Perform i-th normalization scheme. 319 * If buffer size is not enough, we double it and try again. 320 */ 321 r = (ctx->schemes[i]->proc)(src, dst, dstlen); 322 if (r == idn_buffer_overflow && dst != to) { 323 dstlen *= 2; 324 continue; 325 } 326 if (r != idn_success) 327 goto ret; 328 329 src = dst; 330 i++; 331 } 332 333 r = idn_success; 334 ret: 335 free(buffers[0]); 336 free(buffers[1]); 337 if (r == idn_success) { 338 TRACE(("idn_normalizer_normalize(): success (to=\"%s\")\n", 339 idn__debug_ucs4xstring(to, 50))); 340 } else { 341 TRACE(("idn_normalizer_normalize(): %s\n", 342 idn_result_tostring(r))); 343 } 344 return (r); 345 } 346 347 idn_result_t 348 idn_normalizer_register(const char *scheme_name, idn_normalizer_proc_t proc) { 349 idn_result_t r; 350 normalize_scheme_t *scheme; 351 352 assert(scheme_name != NULL && proc != NULL); 353 354 TRACE(("idn_normalizer_register(scheme_name=%s)\n", scheme_name)); 355 356 assert(INITIALIZED); 357 358 scheme = malloc(sizeof(*scheme) + strlen(scheme_name) + 1); 359 if (scheme == NULL) { 360 r = idn_nomemory; 361 goto ret; 362 } 363 scheme->name = (char *)(scheme + 1); 364 (void)strcpy(scheme->name, scheme_name); 365 scheme->proc = proc; 366 367 r = idn__strhash_put(scheme_hash, scheme_name, scheme); 368 if (r != idn_success) 369 goto ret; 370 371 r = idn_success; 372 ret: 373 TRACE(("idn_normalizer_register(): %s\n", idn_result_tostring(r))); 374 return (r); 375 } 376 377 static idn_result_t 378 expand_schemes(idn_normalizer_t ctx) { 379 normalize_scheme_t **new_schemes; 380 int new_size = ctx->scheme_size * 2; 381 382 if (ctx->schemes == ctx->local_buf) { 383 new_schemes = malloc(sizeof(normalize_scheme_t) * new_size); 384 } else { 385 new_schemes = realloc(ctx->schemes, 386 sizeof(normalize_scheme_t) * new_size); 387 } 388 if (new_schemes == NULL) 389 return (idn_nomemory); 390 391 if (ctx->schemes == ctx->local_buf) 392 memcpy(new_schemes, ctx->local_buf, sizeof(ctx->local_buf)); 393 394 ctx->schemes = new_schemes; 395 ctx->scheme_size = new_size; 396 397 return (idn_success); 398 } 399 400 static idn_result_t 401 register_standard_normalizers(void) { 402 int i; 403 int failed = 0; 404 405 for (i = 0; standard_normalizer[i].name != NULL; i++) { 406 idn_result_t r; 407 r = idn_normalizer_register(standard_normalizer[i].name, 408 standard_normalizer[i].proc); 409 if (r != idn_success) { 410 WARNING(("idn_normalizer_initialize(): " 411 "failed to register \"%-.100s\"\n", 412 standard_normalizer[i].name)); 413 failed++; 414 } 415 } 416 if (failed > 0) 417 return (idn_failure); 418 else 419 return (idn_success); 420 } 421 422 /* 423 * Unicode Normalization Forms -- latest version 424 */ 425 426 static idn_result_t 427 normalizer_formkc(const unsigned long *from, unsigned long *to, size_t tolen) { 428 INIT_VERSION(NULL, vcur); 429 return (idn__unormalize_formkc(vcur, from, to, tolen)); 430 } 431 432 /* 433 * Unicode Normalization Forms -- version 3.2.0 434 */ 435 436 static idn_result_t 437 normalizer_formkc_v320(const unsigned long *from, unsigned long *to, 438 size_t tolen) { 439 INIT_VERSION("3.2.0", v320); 440 return (idn__unormalize_formkc(v320, from, to, tolen)); 441 } 442