1 /* $NetBSD: ucsmap.c,v 1.4 2014/12/10 04:37:55 christos Exp $ */ 2 3 #ifndef lint 4 static char *rcsid = "Id: ucsmap.c,v 1.1 2003/06/04 00:26:14 marka Exp "; 5 #endif 6 7 /* 8 * Copyright (c) 2001 Japan Network Information Center. All rights reserved. 9 * 10 * By using this file, you agree to the terms and conditions set forth bellow. 11 * 12 * LICENSE TERMS AND CONDITIONS 13 * 14 * The following License Terms and Conditions apply, unless a different 15 * license is obtained from Japan Network Information Center ("JPNIC"), 16 * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, 17 * Chiyoda-ku, Tokyo 101-0047, Japan. 18 * 19 * 1. Use, Modification and Redistribution (including distribution of any 20 * modified or derived work) in source and/or binary forms is permitted 21 * under this License Terms and Conditions. 22 * 23 * 2. Redistribution of source code must retain the copyright notices as they 24 * appear in each source code file, this License Terms and Conditions. 25 * 26 * 3. Redistribution in binary form must reproduce the Copyright Notice, 27 * this License Terms and Conditions, in the documentation and/or other 28 * materials provided with the distribution. For the purposes of binary 29 * distribution the "Copyright Notice" refers to the following language: 30 * "Copyright (c) 2000-2002 Japan Network Information Center. All rights reserved." 31 * 32 * 4. The name of JPNIC may not be used to endorse or promote products 33 * derived from this Software without specific prior written approval of 34 * JPNIC. 35 * 36 * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC 37 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 38 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 39 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 41 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 42 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 43 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 44 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 45 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 46 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 47 */ 48 49 #include <config.h> 50 51 #include <stdlib.h> 52 #include <string.h> 53 54 #include <idn/result.h> 55 #include <idn/assert.h> 56 #include <idn/log.h> 57 #include <idn/logmacro.h> 58 #include <idn/ucsmap.h> 59 60 #define INIT_SIZE 50 61 #define DEFAULT_BUF_SIZE 500 62 #define UCSMAP_HASH_SIZE 103 63 #define MAX_MAPLEN 0xffff 64 65 /* 66 * This module implements UCS 1-to-N mapping. 67 * To speed up mapping table lookup, a combination of hash and 68 * binary search is used. 69 */ 70 71 /* 72 * Mapping entry. 73 * Entries are sorted by its hash index and code point. 74 */ 75 typedef struct { 76 short hidx; /* hash index */ 77 unsigned short len; /* length of mapped sequence */ 78 unsigned long ucs; /* code point to be mapped */ 79 unsigned long *map; /* mapped sequence of code points */ 80 } ucsmap_entry_t; 81 82 /* 83 * Hash table entry. 84 * Since the entries pointed by ucsmap_hash_t.entry are sorted, 85 * binary search can be used. 86 */ 87 typedef struct { 88 ucsmap_entry_t *entry; /* sorted by code point */ 89 int n; /* length of 'entry' */ 90 } ucsmap_hash_t; 91 92 /* 93 * UCS character buffer for storing target character sequence. 94 */ 95 typedef struct ucsmap_buf { 96 struct ucsmap_buf *next; 97 unsigned long buf[1]; /* actually a variable length array */ 98 } ucsmap_buf_t; 99 100 /* 101 * Mapping object. 102 */ 103 typedef struct idn_ucsmap { 104 ucsmap_hash_t hash[UCSMAP_HASH_SIZE]; 105 ucsmap_entry_t *entries; /* array of entries */ 106 size_t entry_size; /* allocated size */ 107 size_t nentries; /* # of entries in use */ 108 ucsmap_buf_t *mapdata; /* list of character buffers */ 109 size_t mapdata_size; /* allocated size of current buffer */ 110 size_t mapdata_used; /* # of chars in use */ 111 int fixed; /* already fixed? */ 112 int refcnt; /* reference count */ 113 } ucsmap_t; 114 115 static int ucsmap_hash(unsigned long v); 116 static unsigned long *save_mapped_sequence(idn_ucsmap_t ctx, 117 unsigned long *map, 118 size_t maplen); 119 static void free_mapbuf(ucsmap_buf_t *buf); 120 static int comp_entry(const void *v1, const void *v2); 121 122 idn_result_t 123 idn_ucsmap_create(idn_ucsmap_t *ctxp) { 124 idn_ucsmap_t ctx; 125 126 assert(ctxp != NULL); 127 128 TRACE(("idn_ucsmap_create()\n")); 129 130 if ((ctx = malloc(sizeof(*ctx))) == NULL) { 131 WARNING(("idn_ucsmap_create: malloc failed\n")); 132 return (idn_nomemory); 133 } 134 135 ctx->entry_size = 0; 136 ctx->nentries = 0; 137 ctx->entries = NULL; 138 ctx->mapdata = NULL; 139 ctx->mapdata_size = 0; 140 ctx->mapdata_used = 0; 141 ctx->fixed = 0; 142 ctx->refcnt = 1; 143 *ctxp = ctx; 144 return (idn_success); 145 } 146 147 void 148 idn_ucsmap_destroy(idn_ucsmap_t ctx) { 149 assert(ctx != NULL && ctx->refcnt > 0); 150 151 TRACE(("idn_ucsmap_destroy()\n")); 152 153 if (--ctx->refcnt == 0) { 154 if (ctx->entries != NULL) 155 free(ctx->entries); 156 if (ctx->mapdata != NULL) 157 free_mapbuf(ctx->mapdata); 158 free(ctx); 159 } 160 } 161 162 void 163 idn_ucsmap_incrref(idn_ucsmap_t ctx) { 164 assert(ctx != NULL && ctx->refcnt > 0); 165 166 ctx->refcnt++; 167 } 168 169 idn_result_t 170 idn_ucsmap_add(idn_ucsmap_t ctx, unsigned long ucs, 171 unsigned long *map, size_t maplen) 172 { 173 ucsmap_entry_t *e; 174 ucsmap_entry_t *newbuf; 175 176 assert(ctx != NULL && ctx->refcnt > 0); 177 178 TRACE(("idn_ucsmap_add(ucs=U+%lX, maplen=%u)\n", ucs, maplen)); 179 180 /* Make sure it is not fixed yet. */ 181 if (ctx->fixed) { 182 WARNING(("idn_ucsmap_add: attempt to add to fixed map\n")); 183 return (idn_failure); 184 } 185 186 if (maplen > MAX_MAPLEN) { 187 WARNING(("idn_ucsmap_add: maplen too large (> %d)\n", 188 MAX_MAPLEN)); 189 return (idn_failure); 190 } 191 192 /* Append an entry. */ 193 if (ctx->nentries >= ctx->entry_size) { 194 if (ctx->entry_size == 0) 195 ctx->entry_size = INIT_SIZE; 196 else 197 ctx->entry_size *= 2; 198 newbuf = realloc(ctx->entries, sizeof(*e) * ctx->entry_size); 199 if (newbuf == NULL) 200 return (idn_nomemory); 201 ctx->entries = newbuf; 202 } 203 e = &ctx->entries[ctx->nentries]; 204 e->hidx = ucsmap_hash(ucs); 205 e->len = maplen; 206 e->ucs = ucs; 207 if (maplen > 0) { 208 /* Save mapped sequence in the buffer. */ 209 e->map = save_mapped_sequence(ctx, map, maplen); 210 if (e->map == NULL) 211 return (idn_nomemory); 212 } else { 213 /* 214 * Zero 'maplen' is perfectly valid meaning one-to-zero 215 * mapping. 216 */ 217 e->map = NULL; 218 } 219 ctx->nentries++; 220 221 return (idn_success); 222 } 223 224 void 225 idn_ucsmap_fix(idn_ucsmap_t ctx) { 226 ucsmap_entry_t *e; 227 int last_hidx; 228 int i; 229 230 assert(ctx != NULL && ctx->refcnt > 0); 231 232 TRACE(("idn_ucsmap_fix()\n")); 233 234 if (ctx->fixed) 235 return; 236 237 ctx->fixed = 1; 238 239 /* Initialize hash. */ 240 for (i = 0; i < UCSMAP_HASH_SIZE; i++) { 241 ctx->hash[i].entry = NULL; 242 ctx->hash[i].n = 0; 243 } 244 245 if (ctx->nentries == 0) 246 return; 247 248 /* Sort entries by the hash value and code point. */ 249 qsort(ctx->entries, ctx->nentries, sizeof(ucsmap_entry_t), comp_entry); 250 251 /* 252 * Now the entries are sorted by their hash value, and 253 * sorted by its code point among the ones with the same hash value. 254 */ 255 256 /* Build hash table. */ 257 last_hidx = -1; 258 for (i = 0, e = ctx->entries; i < ctx->nentries; i++, e++) { 259 if (e->hidx != last_hidx) { 260 ctx->hash[e->hidx].entry = e; 261 last_hidx = e->hidx; 262 } 263 ctx->hash[last_hidx].n++; 264 } 265 } 266 267 idn_result_t 268 idn_ucsmap_map(idn_ucsmap_t ctx, unsigned long v, unsigned long *to, 269 size_t tolen, size_t *maplenp) { 270 int hash; 271 ucsmap_entry_t *e; 272 int n; 273 int hi, lo, mid; 274 275 assert(ctx != NULL && ctx->refcnt > 0 && to != NULL && 276 maplenp != NULL); 277 278 TRACE(("idn_ucsmap_map(v=U+%lX)\n", v)); 279 280 if (!ctx->fixed) { 281 WARNING(("idn_ucsmap_map: not fixed yet\n")); 282 return (idn_failure); 283 } 284 285 /* First, look up hash table. */ 286 hash = ucsmap_hash(v); 287 if ((n = ctx->hash[hash].n) == 0) 288 goto nomap; 289 290 /* Then do binary search. */ 291 e = ctx->hash[hash].entry; 292 lo = 0; 293 hi = n - 1; 294 while (lo <= hi) { 295 mid = (lo + hi) / 2; 296 if (v < e[mid].ucs) 297 hi = mid - 1; 298 else if (v > e[mid].ucs) 299 lo = mid + 1; 300 else { 301 /* Found. */ 302 if (tolen < e[mid].len) 303 return (idn_buffer_overflow); 304 memcpy(to, e[mid].map, sizeof(*to) * e[mid].len); 305 *maplenp = e[mid].len; 306 return (idn_success); 307 } 308 } 309 310 /* 311 * Not found. Put the original character to 'to' 312 * just for convenience. 313 */ 314 nomap: 315 if (tolen < 1) 316 return (idn_buffer_overflow); 317 *to = v; 318 *maplenp = 1; 319 return (idn_nomapping); 320 } 321 322 static int 323 ucsmap_hash(unsigned long v) { 324 return (v % UCSMAP_HASH_SIZE); 325 } 326 327 static unsigned long * 328 save_mapped_sequence(idn_ucsmap_t ctx, unsigned long *map, size_t maplen) { 329 ucsmap_buf_t *buf; 330 unsigned long *p; 331 size_t allocsize; 332 333 /* 334 * If the current buffer (the first one in the ctx->mapdata list) 335 * has enough space, use it. Otherwise, allocate a new buffer and 336 * insert it at the beginning of the list. 337 */ 338 if (ctx->mapdata_used + maplen > ctx->mapdata_size) { 339 if (maplen > DEFAULT_BUF_SIZE) 340 allocsize = maplen * 2; 341 else 342 allocsize = DEFAULT_BUF_SIZE; 343 buf = malloc(sizeof(ucsmap_hash_t) + 344 sizeof(unsigned long) * allocsize); 345 if (buf == NULL) 346 return (NULL); 347 buf->next = ctx->mapdata; 348 ctx->mapdata = buf; 349 ctx->mapdata_size = allocsize; 350 ctx->mapdata_used = 0; 351 } 352 p = ctx->mapdata->buf + ctx->mapdata_used; 353 memcpy(p, map, sizeof(unsigned long) * maplen); 354 ctx->mapdata_used += maplen; 355 return (p); 356 } 357 358 static void 359 free_mapbuf(ucsmap_buf_t *buf) { 360 while (buf != NULL) { 361 ucsmap_buf_t *next = buf->next; 362 free(buf); 363 buf = next; 364 } 365 } 366 367 static int 368 comp_entry(const void *v1, const void *v2) { 369 const ucsmap_entry_t *e1 = v1; 370 const ucsmap_entry_t *e2 = v2; 371 372 if (e1->hidx < e2->hidx) 373 return (-1); 374 else if (e1->hidx > e2->hidx) 375 return (1); 376 else if (e1->ucs < e2->ucs) 377 return (-1); 378 else if (e1->ucs > e2->ucs) 379 return (1); 380 else 381 return (0); 382 } 383