1 /*	$NetBSD: ucsmap.c,v 1.4 2014/12/10 04:37:55 christos Exp $	*/
2 
3 #ifndef lint
4 static char *rcsid = "Id: ucsmap.c,v 1.1 2003/06/04 00:26:14 marka Exp ";
5 #endif
6 
7 /*
8  * Copyright (c) 2001 Japan Network Information Center.  All rights reserved.
9  *
10  * By using this file, you agree to the terms and conditions set forth bellow.
11  *
12  * 			LICENSE TERMS AND CONDITIONS
13  *
14  * The following License Terms and Conditions apply, unless a different
15  * license is obtained from Japan Network Information Center ("JPNIC"),
16  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
17  * Chiyoda-ku, Tokyo 101-0047, Japan.
18  *
19  * 1. Use, Modification and Redistribution (including distribution of any
20  *    modified or derived work) in source and/or binary forms is permitted
21  *    under this License Terms and Conditions.
22  *
23  * 2. Redistribution of source code must retain the copyright notices as they
24  *    appear in each source code file, this License Terms and Conditions.
25  *
26  * 3. Redistribution in binary form must reproduce the Copyright Notice,
27  *    this License Terms and Conditions, in the documentation and/or other
28  *    materials provided with the distribution.  For the purposes of binary
29  *    distribution the "Copyright Notice" refers to the following language:
30  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
31  *
32  * 4. The name of JPNIC may not be used to endorse or promote products
33  *    derived from this Software without specific prior written approval of
34  *    JPNIC.
35  *
36  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
37  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
38  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
39  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
40  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
41  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
42  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
46  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
47  */
48 
49 #include <config.h>
50 
51 #include <stdlib.h>
52 #include <string.h>
53 
54 #include <idn/result.h>
55 #include <idn/assert.h>
56 #include <idn/log.h>
57 #include <idn/logmacro.h>
58 #include <idn/ucsmap.h>
59 
60 #define INIT_SIZE		50
61 #define DEFAULT_BUF_SIZE	500
62 #define UCSMAP_HASH_SIZE	103
63 #define MAX_MAPLEN		0xffff
64 
65 /*
66  * This module implements UCS 1-to-N mapping.
67  * To speed up mapping table lookup, a combination of hash and
68  * binary search is used.
69  */
70 
71 /*
72  * Mapping entry.
73  * Entries are sorted by its hash index and code point.
74  */
75 typedef struct {
76 	short hidx;		/* hash index */
77 	unsigned short len;	/* length of mapped sequence */
78 	unsigned long ucs;	/* code point to be mapped */
79 	unsigned long *map;	/* mapped sequence of code points */
80 } ucsmap_entry_t;
81 
82 /*
83  * Hash table entry.
84  * Since the entries pointed by ucsmap_hash_t.entry are sorted,
85  * binary search can be used.
86  */
87 typedef struct {
88 	ucsmap_entry_t *entry;	/* sorted by code point */
89 	int n;			/* length of 'entry' */
90 } ucsmap_hash_t;
91 
92 /*
93  * UCS character buffer for storing target character sequence.
94  */
95 typedef struct ucsmap_buf {
96 	struct ucsmap_buf *next;
97 	unsigned long buf[1];		/* actually a variable length array */
98 } ucsmap_buf_t;
99 
100 /*
101  * Mapping object.
102  */
103 typedef struct idn_ucsmap {
104 	ucsmap_hash_t hash[UCSMAP_HASH_SIZE];
105 	ucsmap_entry_t *entries;	/* array of entries */
106 	size_t entry_size;		/* allocated size */
107 	size_t nentries;		/* # of entries in use */
108 	ucsmap_buf_t *mapdata;		/* list of character buffers */
109 	size_t mapdata_size;		/* allocated size of current buffer */
110 	size_t mapdata_used;		/* # of chars in use */
111 	int fixed;			/* already fixed? */
112 	int refcnt;			/* reference count */
113 } ucsmap_t;
114 
115 static int		ucsmap_hash(unsigned long v);
116 static unsigned long	*save_mapped_sequence(idn_ucsmap_t ctx,
117 					      unsigned long *map,
118 					      size_t maplen);
119 static void		free_mapbuf(ucsmap_buf_t *buf);
120 static int		comp_entry(const void *v1, const void *v2);
121 
122 idn_result_t
123 idn_ucsmap_create(idn_ucsmap_t *ctxp) {
124 	idn_ucsmap_t ctx;
125 
126 	assert(ctxp != NULL);
127 
128 	TRACE(("idn_ucsmap_create()\n"));
129 
130 	if ((ctx = malloc(sizeof(*ctx))) == NULL) {
131 		WARNING(("idn_ucsmap_create: malloc failed\n"));
132 		return (idn_nomemory);
133 	}
134 
135 	ctx->entry_size = 0;
136 	ctx->nentries = 0;
137 	ctx->entries = NULL;
138 	ctx->mapdata = NULL;
139 	ctx->mapdata_size = 0;
140 	ctx->mapdata_used = 0;
141 	ctx->fixed = 0;
142 	ctx->refcnt = 1;
143 	*ctxp = ctx;
144 	return (idn_success);
145 }
146 
147 void
148 idn_ucsmap_destroy(idn_ucsmap_t ctx) {
149 	assert(ctx != NULL && ctx->refcnt > 0);
150 
151 	TRACE(("idn_ucsmap_destroy()\n"));
152 
153 	if (--ctx->refcnt == 0) {
154 		if (ctx->entries != NULL)
155 			free(ctx->entries);
156 		if (ctx->mapdata != NULL)
157 			free_mapbuf(ctx->mapdata);
158 		free(ctx);
159 	}
160 }
161 
162 void
163 idn_ucsmap_incrref(idn_ucsmap_t ctx) {
164 	assert(ctx != NULL && ctx->refcnt > 0);
165 
166 	ctx->refcnt++;
167 }
168 
169 idn_result_t
170 idn_ucsmap_add(idn_ucsmap_t ctx, unsigned long ucs,
171 	       unsigned long *map, size_t maplen)
172 {
173 	ucsmap_entry_t *e;
174 	ucsmap_entry_t *newbuf;
175 
176 	assert(ctx != NULL && ctx->refcnt > 0);
177 
178 	TRACE(("idn_ucsmap_add(ucs=U+%lX, maplen=%u)\n", ucs, maplen));
179 
180 	/* Make sure it is not fixed yet. */
181 	if (ctx->fixed) {
182 		WARNING(("idn_ucsmap_add: attempt to add to fixed map\n"));
183 		return (idn_failure);
184 	}
185 
186 	if (maplen > MAX_MAPLEN) {
187 		WARNING(("idn_ucsmap_add: maplen too large (> %d)\n",
188 			 MAX_MAPLEN));
189 		return (idn_failure);
190 	}
191 
192 	/* Append an entry. */
193 	if (ctx->nentries >= ctx->entry_size) {
194 		if (ctx->entry_size == 0)
195 			ctx->entry_size = INIT_SIZE;
196 		else
197 			ctx->entry_size *= 2;
198 		newbuf = realloc(ctx->entries, sizeof(*e) * ctx->entry_size);
199 		if (newbuf == NULL)
200 			return (idn_nomemory);
201 		ctx->entries = newbuf;
202 	}
203 	e = &ctx->entries[ctx->nentries];
204 	e->hidx = ucsmap_hash(ucs);
205 	e->len = maplen;
206 	e->ucs = ucs;
207 	if (maplen > 0) {
208 		/* Save mapped sequence in the buffer. */
209 		e->map = save_mapped_sequence(ctx, map, maplen);
210 		if (e->map == NULL)
211 			return (idn_nomemory);
212 	} else {
213 		/*
214 		 * Zero 'maplen' is perfectly valid meaning one-to-zero
215 		 * mapping.
216 		 */
217 		e->map = NULL;
218 	}
219 	ctx->nentries++;
220 
221 	return (idn_success);
222 }
223 
224 void
225 idn_ucsmap_fix(idn_ucsmap_t ctx) {
226 	ucsmap_entry_t *e;
227 	int last_hidx;
228 	int i;
229 
230 	assert(ctx != NULL && ctx->refcnt > 0);
231 
232 	TRACE(("idn_ucsmap_fix()\n"));
233 
234 	if (ctx->fixed)
235 		return;
236 
237 	ctx->fixed = 1;
238 
239 	/* Initialize hash. */
240 	for (i = 0; i < UCSMAP_HASH_SIZE; i++) {
241 		ctx->hash[i].entry = NULL;
242 		ctx->hash[i].n = 0;
243 	}
244 
245 	if (ctx->nentries == 0)
246 		return;
247 
248 	/* Sort entries by the hash value and code point. */
249 	qsort(ctx->entries, ctx->nentries, sizeof(ucsmap_entry_t), comp_entry);
250 
251 	/*
252 	 * Now the entries are sorted by their hash value, and
253 	 * sorted by its code point among the ones with the same hash value.
254 	 */
255 
256 	/* Build hash table. */
257 	last_hidx = -1;
258 	for (i = 0, e = ctx->entries; i < ctx->nentries; i++, e++) {
259 		if (e->hidx != last_hidx) {
260 			ctx->hash[e->hidx].entry = e;
261 			last_hidx = e->hidx;
262 		}
263 		ctx->hash[last_hidx].n++;
264 	}
265 }
266 
267 idn_result_t
268 idn_ucsmap_map(idn_ucsmap_t ctx, unsigned long v, unsigned long *to,
269 	       size_t tolen, size_t *maplenp) {
270 	int hash;
271 	ucsmap_entry_t *e;
272 	int n;
273 	int hi, lo, mid;
274 
275 	assert(ctx != NULL && ctx->refcnt > 0 && to != NULL &&
276 	       maplenp != NULL);
277 
278 	TRACE(("idn_ucsmap_map(v=U+%lX)\n", v));
279 
280 	if (!ctx->fixed) {
281 		WARNING(("idn_ucsmap_map: not fixed yet\n"));
282 		return (idn_failure);
283 	}
284 
285 	/* First, look up hash table. */
286 	hash = ucsmap_hash(v);
287 	if ((n = ctx->hash[hash].n) == 0)
288 		goto nomap;
289 
290 	/* Then do binary search. */
291 	e = ctx->hash[hash].entry;
292 	lo = 0;
293 	hi = n - 1;
294 	while (lo <= hi) {
295 		mid = (lo + hi) / 2;
296 		if (v < e[mid].ucs)
297 			hi = mid - 1;
298 		else if (v > e[mid].ucs)
299 			lo = mid + 1;
300 		else {
301 			/* Found. */
302 			if (tolen < e[mid].len)
303 				return (idn_buffer_overflow);
304 			memcpy(to, e[mid].map, sizeof(*to) * e[mid].len);
305 			*maplenp = e[mid].len;
306 			return (idn_success);
307 		}
308 	}
309 
310 	/*
311 	 * Not found. Put the original character to 'to'
312 	 * just for convenience.
313 	 */
314  nomap:
315 	if (tolen < 1)
316 		return (idn_buffer_overflow);
317 	*to = v;
318 	*maplenp = 1;
319 	return (idn_nomapping);
320 }
321 
322 static int
323 ucsmap_hash(unsigned long v) {
324 	return (v % UCSMAP_HASH_SIZE);
325 }
326 
327 static unsigned long *
328 save_mapped_sequence(idn_ucsmap_t ctx, unsigned long *map, size_t maplen) {
329 	ucsmap_buf_t *buf;
330 	unsigned long *p;
331 	size_t allocsize;
332 
333 	/*
334 	 * If the current buffer (the first one in the ctx->mapdata list)
335 	 * has enough space, use it.  Otherwise, allocate a new buffer and
336 	 * insert it at the beginning of the list.
337 	 */
338 	if (ctx->mapdata_used + maplen > ctx->mapdata_size) {
339 		if (maplen > DEFAULT_BUF_SIZE)
340 			allocsize = maplen * 2;
341 		else
342 			allocsize = DEFAULT_BUF_SIZE;
343 		buf = malloc(sizeof(ucsmap_hash_t) +
344 			     sizeof(unsigned long) * allocsize);
345 		if (buf == NULL)
346 			return (NULL);
347 		buf->next = ctx->mapdata;
348 		ctx->mapdata = buf;
349 		ctx->mapdata_size = allocsize;
350 		ctx->mapdata_used = 0;
351 	}
352 	p = ctx->mapdata->buf + ctx->mapdata_used;
353 	memcpy(p, map, sizeof(unsigned long) * maplen);
354 	ctx->mapdata_used += maplen;
355 	return (p);
356 }
357 
358 static void
359 free_mapbuf(ucsmap_buf_t *buf) {
360 	while (buf != NULL) {
361 		ucsmap_buf_t *next = buf->next;
362 		free(buf);
363 		buf = next;
364 	}
365 }
366 
367 static int
368 comp_entry(const void *v1, const void *v2) {
369 	const ucsmap_entry_t *e1 = v1;
370 	const ucsmap_entry_t *e2 = v2;
371 
372 	if (e1->hidx < e2->hidx)
373 		return (-1);
374 	else if (e1->hidx > e2->hidx)
375 		return (1);
376 	else if (e1->ucs < e2->ucs)
377 		return (-1);
378 	else if (e1->ucs > e2->ucs)
379 		return (1);
380 	else
381 		return (0);
382 }
383