1 /*
2  *                           TERMS AND CONDITIONS
3  *                                   FOR
4  *                         OPEN SOURCE CODE LICENSE
5  *                               Version 1.1
6  *
7  * Japan Registry Services Co., Ltd. ("JPRS"), a Japanese corporation
8  * having its head office at Chiyoda First Bldg. East 13F 3-8-1 Nishi-Kanda,
9  * Chiyoda-ku, Tokyo 101-0065, Japan, grants you the license for open source
10  * code specified in EXHIBIT A the "Code" subject to the following Terms and
11  * Conditions ("OSCL").
12  *
13  * 1. License Grant.
14  *   JPRS hereby grants you a worldwide, royalty-free, non-exclusive
15  *   license, subject to third party intellectual property claims:
16  *   (a) under intellectual property rights (other than patent or
17  *       trademark) licensable by JPRS to use, reproduce, modify, display,
18  *       perform, sublicense and distribute the Code (or portions thereof)
19  *       with or without modifications, and/or as part of a derivative work;
20  *       or
21  *   (b) under claims of the infringement through the making, using,
22  *       offering to sell and/or otherwise disposing the JPRS Revised Code
23  *       (or portions thereof);
24  *   (c) the licenses granted in this Section 1(a) and (b) are effective on
25  *       the date JPRS first distributes the Code to you under the terms of
26  *       this OSCL;
27  *   (d) Notwithstanding the above stated terms, no patent license is
28  *       granted:
29  *       1)  for a code that you delete from the Code;
30  *       2)  separate from the Code; or
31  *       3)  for infringements caused by:
32  *            i) modification of the Code; or
33  *           ii) combination of the Code with other software or devices.
34  *
35  * 2. Consents.
36  *   You agree that:
37  *   (a) you must include a copy of this OSCL and the notice set forth in
38  *       EXHIBIT A with every copy of the Code you distribute;
39  *   (b) you must include a copy of this OSCL and the notice set forth in
40  *       EXHIBIT A with every copy of binary form of the Code in the
41  *       documentation and/or other materials provided with the distribution;
42  *   (c) you may not offer or impose any terms on any source code version
43  *       that alters or restricts the applicable version of this OSCL or
44  *       the recipients' rights hereunder.
45  *   (d) If the terms and conditions are set forth in EXHIBIT A, you must
46  *       comply with those terms and conditions.
47  *
48  * 3. Proprietary Information.
49  *   All trademarks, service marks, patents, copyrights, trade secrets, and
50  *   other proprietary rights in or related to the Code are and will remain
51  *   the exclusive property of JPRS or its licensors, whether or not
52  *   specifically recognized or perfected under local law except specified
53  *   in this OSCL; provided however you agree and understand that the JPRS
54  *   name may not be used to endorse or promote this Code without prior
55  *   written approval of JPRS.
56  *
57  * 4. WARRANTY DISCLAIMER.
58  *   JPRS MAKES NO REPRESENTATIONS AND WARRANTIES REGARDING THE USE OF THE
59  *   CODE, NOR DOES JPRS MAKE ANY REPRESENTATIONS THAT THE CODE WILL BECOME
60  *   COMMERCIALLY AVAILABLE. JPRS, ITS AFFILIATES, AND ITS SUPPLIERS DO NOT
61  *   WARRANT OR REPRESENT THAT THE CODE IS FREE OF ERRORS OR THAT THE CODE
62  *   IS SUITABLE FOR TRANSLATION AND/OR LOCALIZATION. THE CODE IS PROVIDED
63  *   ON AN "AS IS" BASIS AND JPRS AND ITS SUPPLIERS HAVE NO OBLIGATION TO
64  *   CORRECT ERRORS OR TO SUPPORT THE CODE UNDER THIS OSCL FOR ANY REASON.
65  *   TO THE FULL EXTENT PERMITTED BY LAW, ALL OBLIGATIONS ARE HEREBY
66  *   EXCLUDED WHETHER EXPRESS, STATUTORY OR IMPLIED UNDER LAW, COURSE OF
67  *   DEALING, CUSTOM, TRADE USAGE, ORAL OR WRITTEN STATEMENT OR OTHERWISE,
68  *   INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY
69  *   OR FITNESS FOR A PARTICULAR PURPOSE CONCERNING THE CODE.
70  *
71  * 5. NO LIABILITY.
72  *   UNDER NO CIRCUMSTANCES SHALL JPRS AND/OR ITS AFFILIATES, LICENSORS, OR
73  *   REPRESENTATIVES BE LIABLE FOR ANY DAMAGES INCLUDING BUT NOT LIMITED TO
74  *   CONSEQUENTIAL, INDIRECT, SPECIAL, PUNITIVE OR INCIDENTAL DAMAGES,
75  *   WHETHER FORESEEABLE OR UNFORESEEABLE, BASED ON YOUR CLAIMS, INCLUDING,
76  *   BUT NOT LIMITED TO, CLAIMS FOR LOSS OF DATA, GOODWILL, PROFITS, USE OF
77  *   MONEY, INTERRUPTION IN USE OR AVAILABILITY OF DATA, STOPPAGE, IMPLIED
78  *   WARRANTY, BREACH OF CONTRACT, MISREPRESENTATION, NEGLIGENCE, STRICT
79  *   LIABILITY IN TORT, OR OTHERWISE.
80  *
81  * 6. Indemnification.
82  *   You hereby agree to indemnify, defend, and hold harmless JPRS for any
83  *   liability incurred by JRPS due to your terms of warranty, support,
84  *   indemnity, or liability offered by you to any third party.
85  *
86  * 7. Termination.
87  * 7.1 This OSCL shall be automatically terminated in the events that:
88  *   (a) You fail to comply with the terms herein and fail to cure such
89  *       breach within 30 days of becoming aware of the breach;
90  *   (b) You initiate patent or copyright infringement litigation against
91  *       any party (including a cross-claim or counterclaim in a lawsuit)
92  *       alleging that the Code constitutes a direct or indirect patent or
93  *       copyright infringement, in such case, this OSCL to you shall
94  *       terminate as of the date such litigation is filed;
95  * 7.2 In the event of termination under Sections 7.1(a) or 7.1(b) above,
96  *     all end user license agreements (excluding distributors and
97  *     resellers) which have been validly granted by You or any distributor
98  *     hereunder prior to termination shall survive termination.
99  *
100  *
101  * 8. General.
102  *   This OSCL shall be governed by, and construed and enforced in
103  *   accordance with, the laws of Japan. Any litigation or arbitration
104  *   between the parties shall be conducted exclusively in Tokyo, Japan
105  *   except written consent of JPRS provides other venue.
106  *
107  *
108  *                                EXHIBIT A
109  *
110  * The original open source code of idnkit-2 is idnkit-1.0 developed and
111  * conceived by Japan Network Information Center ("JPNIC"), a Japanese
112  * association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
113  * Chiyoda-ku, Tokyo 101-0047, Japan, and JPRS modifies above original code
114  * under following Terms and Conditions set forth by JPNIC.
115  *
116  *                                  JPNIC
117  *
118  * Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved.
119  *
120  * By using this file, you agree to the terms and conditions set forth bellow.
121  *
122  *                       LICENSE TERMS AND CONDITIONS
123  *
124  * The following License Terms and Conditions apply, unless a different
125  * license is obtained from Japan Network Information Center ("JPNIC"),
126  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
127  * Chiyoda-ku, Tokyo 101-0047, Japan.
128  *
129  * 1. Use, Modification and Redistribution (including distribution of any
130  *    modified or derived work) in source and/or binary forms is permitted
131  *    under this License Terms and Conditions.
132  *
133  * 2. Redistribution of source code must retain the copyright notices as they
134  *    appear in each source code file, this License Terms and Conditions.
135  *
136  * 3. Redistribution in binary form must reproduce the Copyright Notice,
137  *    this License Terms and Conditions, in the documentation and/or other
138  *    materials provided with the distribution. For the purposes of binary
139  *    distribution the "Copyright Notice" refers to the following language:
140  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
141  *
142  * 4. The name of JPNIC may not be used to endorse or promote products
143  *    derived from this Software without specific prior written approval of
144  *    JPNIC.
145  *
146  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
147  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
148  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
149  *    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JPNIC BE LIABLE
150  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
151  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
152  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
153  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
154  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
155  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
156  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
157  *
158  *
159  *                        JPRS Public License Notice
160  *                                   For
161  *                                idnkit-2.
162  *
163  * The contents of this file are subject to the Terms and Conditions for
164  * the Open Source Code License (the "OSCL"). You may not use this file
165  * except in compliance with above terms and conditions. A copy of the OSCL
166  * is available at <http://jprs.co.jp/idn/>.
167  * The JPRS Revised Code is idnkit-2.
168  * The Initial Developer of the JPRS Revised Code is Japan Network
169  * Information Center ("JPNIC"), a Japanese association,
170  * Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda, Chiyoda-ku, Tokyo
171  * 101-0047, Japan.
172  * "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
173  * "Copyright (c) 2010-2012 Japan Registry Services Co., Ltd.  All rights reserved."
174  * Contributor(s): ______________________________________.
175  *
176  * If you wish to allow use of your version of this file only under the
177  * above License(s) and not to allow others to use your version of this
178  * file, please indicate your decision by deleting the relevant provisions
179  * above and replacing them with the notice and other provisions required
180  * by the above License(s). If you do not delete the relevant provisions,
181  * a recipient may use your version of this file under either the above
182  * License(s).
183  */
184 
185 #include <config.h>
186 
187 #include <stddef.h>
188 #include <stdlib.h>
189 #include <string.h>
190 
191 #include <idn/assert.h>
192 #include <idn/debug.h>
193 #include <idn/logmacro.h>
194 #include <idn/result.h>
195 #include <idn/punycode.h>
196 #include <idn/utf32.h>
197 
198 /*
199  * ACE prefix for Punycode.
200  */
201 static const unsigned long ace_prefix[] = {'x', 'n', '-', '-', '\0'};
202 
203 #define ACE_PREFIX_LEN (sizeof(ace_prefix) / sizeof(long) - 1)
204 
205 /*
206  * As the draft states, it is possible that `delta' may overflow during
207  * the encoding.  The upper bound of 'delta' is:
208  *   <# of chars. of input string> + <max. difference in code point> *
209  *   <# of chars. of input string + 1>
210  * For this value not to be greater than 0xffffffff (since the calculation
211  * is done using unsigned long, which is at least 32bit long), the maxmum
212  * input string size is about 3850 characters, which is long enough for
213  * a domain label...
214  */
215 #define PUNYCODE_MAXINPUT	3800
216 
217 /*
218  * Parameters.
219  */
220 #define PUNYCODE_BASE		36
221 #define PUNYCODE_TMIN		1
222 #define PUNYCODE_TMAX		26
223 #define PUNYCODE_SKEW		38
224 #define PUNYCODE_DAMP		700
225 #define PUNYCODE_INITIAL_BIAS	72
226 #define PUNYCODE_INITIAL_N	0x80
227 
228 static int		punycode_getwc(const unsigned long *s, size_t len,
229 				       int bias, unsigned long *vp);
230 static int		punycode_putwc(unsigned long *s, size_t len,
231 				       unsigned long delta, int bias);
232 static int		punycode_update_bias(unsigned long delta,
233 					     size_t npoints, int first);
234 
235 int
idn__punycode_isacelabel(const unsigned long * label)236 idn__punycode_isacelabel(const unsigned long *label) {
237 	return (idn__utf32_strncasecmp(label, ace_prefix, ACE_PREFIX_LEN) == 0);
238 }
239 
240 idn_result_t
idn__punycode_decode(void * privdata,const unsigned long * from,unsigned long * to,size_t tolen)241 idn__punycode_decode(void *privdata, const unsigned long *from,
242 		     unsigned long *to, size_t tolen) {
243 	idn_result_t r = idn_success;
244 	unsigned long *to_org = to;
245 	unsigned long c, idx;
246 	size_t fromlen;
247 	size_t uidx, fidx, ucslen;
248 	int first, bias;
249 
250 	TRACE(("idn__punycode_decode(from=\"%s\", tolen=%d)\n",
251 	       idn__debug_utf32xstring(from), (int)tolen));
252 
253 	if (idn__utf32_strncasecmp(from, ace_prefix, ACE_PREFIX_LEN) != 0) {
254 		if (*from == '\0') {
255 			r = idn__utf32_strcpy(to, tolen, from);
256 			goto ret;
257 		}
258 		r = idn_invalid_encoding;
259 		goto ret;
260 	}
261 	from += ACE_PREFIX_LEN;
262 	fromlen = idn__utf32_strlen(from);
263 
264 	/*
265 	 * Find the last delimiter, and copy the characters
266 	 * before it verbatim.
267 	 */
268 	ucslen = 0;
269 	for (fidx = fromlen; fidx > 0; fidx--) {
270 		if (from[fidx - 1] == '-') {
271 			if (tolen < fidx) {
272 				r = idn_buffer_overflow;
273 				goto ret;
274 			}
275 			for (uidx = 0; uidx < fidx - 1; uidx++) {
276 				to[uidx] = from[uidx];
277 			}
278 			ucslen = uidx;
279 			break;
280 		}
281 	}
282 
283 	first = 1;
284 	bias = PUNYCODE_INITIAL_BIAS;
285 	c = PUNYCODE_INITIAL_N;
286 	idx = 0;
287 	while (fidx < fromlen) {
288 		int len;
289 		unsigned long delta;
290 		int i;
291 
292 		len = punycode_getwc(from + fidx, fromlen - fidx, bias, &delta);
293 		if (len == 0) {
294 			r = idn_invalid_encoding;
295 			goto ret;
296 		}
297 		fidx += len;
298 
299 		bias = punycode_update_bias(delta, ucslen + 1, first);
300 		first = 0;
301 		idx += delta;
302 		c += idx / (ucslen + 1);
303 		uidx = idx % (ucslen + 1);
304 
305 		/* Insert 'c' at uidx. */
306 		if (tolen-- <= 0) {
307 			r = idn_buffer_overflow;
308 			goto ret;
309 		}
310 		for (i = ucslen; i > uidx; i--)
311 			to[i] = to[i - 1];
312 		if (c == 0 || c > UTF32_MAX || IS_SURROGATE(c)) {
313 			r = idn_invalid_encoding;
314 			goto ret;
315 		}
316 		to[uidx] = c;
317 
318 		ucslen++;
319 		idx = uidx + 1;
320 	}
321 
322 	/* Terminate with NUL. */
323 	if (tolen <= 0) {
324 		r = idn_buffer_overflow;
325 		goto ret;
326 	}
327 	to[ucslen] = '\0';
328 
329 ret:
330 	if (r == idn_success) {
331 		TRACE(("idn__punycode_decode(): succcess (to=\"%s\")\n",
332 		       idn__debug_utf32xstring(to_org)));
333 	} else {
334 		TRACE(("idn__punycode_decode(): %s\n", idn_result_tostring(r)));
335 	}
336 	return (r);
337 }
338 
339 idn_result_t
idn__punycode_encode(void * privdata,const unsigned long * from,unsigned long * to,size_t tolen)340 idn__punycode_encode(void *privdata, const unsigned long *from,
341 		     unsigned long *to, size_t tolen) {
342 	idn_result_t r = idn_success;
343 	unsigned long *to_org = to;
344 	unsigned long cur_code, next_code, delta;
345 	size_t fromlen;
346 	size_t ucsdone;
347 	size_t toidx;
348 	int uidx, bias, first;
349 
350 	TRACE(("idn__punycode_encode(from=\"%s\", tolen=%d)\n",
351 	       idn__debug_utf32xstring(from), (int)tolen));
352 
353 	if (*from == '\0') {
354 		r = idn__utf32_strcpy(to, tolen, from);
355 		goto ret;
356 	}
357 
358 	r = idn__utf32_strcpy(to, tolen, ace_prefix);
359 	if (r != idn_success)
360 		goto ret;
361 
362 	to += ACE_PREFIX_LEN;
363 	tolen -= ACE_PREFIX_LEN;
364 
365 	fromlen = idn__utf32_strlen(from);
366 
367 	/*
368 	 * If the input string is too long (actually too long to be sane),
369 	 * return failure in order to prevent possible overflow.
370 	 */
371 	if (fromlen > PUNYCODE_MAXINPUT) {
372 		ERROR(("idn__punycode_encode(): "
373 		       "the input string is too long to convert Punycode\n",
374 		       idn__debug_utf32xstring(from)));
375 		r = idn_failure;
376 		goto ret;
377 	}
378 
379 	ucsdone = 0;	/* number of characters processed */
380 	toidx = 0;
381 
382 	/*
383 	 * First, pick up basic code points and copy them to 'to'.
384 	 */
385 	for (uidx = 0; uidx < fromlen; uidx++) {
386 		if (from[uidx] < 0x80) {
387 			if (toidx >= tolen) {
388 				r = idn_buffer_overflow;
389 				goto ret;
390 			}
391 			to[toidx++] = from[uidx];
392 			ucsdone++;
393 		}
394 	}
395 
396 	/*
397 	 * If there are any basic code points, output a delimiter
398 	 * (hyphen-minus).
399 	 */
400 	if (toidx > 0) {
401 		if (toidx >= tolen) {
402 			r = idn_buffer_overflow;
403 			goto ret;
404 		}
405 		to[toidx++] = '-';
406 		to += toidx;
407 		tolen -= toidx;
408 	}
409 
410 	/*
411 	 * Then encode non-basic characters.
412 	 */
413 	first = 1;
414 	cur_code = PUNYCODE_INITIAL_N;
415 	bias = PUNYCODE_INITIAL_BIAS;
416 	delta = 0;
417 	while (ucsdone < fromlen) {
418 		int limit = -1, rest;
419 
420 		/*
421 		 * Find the smallest code point equal to or greater
422 		 * than 'cur_code'.  Also remember the index of the
423 		 * last occurence of the code point.
424 		 */
425 		next_code = UTF32_MAX;
426 		for (uidx = fromlen - 1; uidx >= 0; uidx--) {
427 			if (from[uidx] >= cur_code &&
428 			    (limit < 0 || from[uidx] < next_code)) {
429 				next_code = from[uidx];
430 				limit = uidx;
431 			}
432 		}
433 		/* There must be such code point. */
434 		if (limit < 0 || next_code == 0 ||
435 		    next_code > UTF32_MAX || IS_SURROGATE(next_code)) {
436 			r = idn_invalid_encoding;
437 			goto ret;
438 		}
439 
440 		delta += (next_code - cur_code) * (ucsdone + 1);
441 		cur_code = next_code;
442 
443 		/*
444 		 * Scan the input string again, and encode characters
445 		 * whose code point is 'cur_code'.  Use 'limit' to avoid
446 		 * unnecessary scan.
447 		 */
448 		for (uidx = 0, rest = ucsdone; uidx <= limit; uidx++) {
449 			if (from[uidx] < cur_code) {
450 				delta++;
451 				rest--;
452 			} else if (from[uidx] == cur_code) {
453 				int sz = punycode_putwc(to, tolen, delta, bias);
454 				if (sz == 0) {
455 					r = idn_buffer_overflow;
456 					goto ret;
457 				}
458 				to += sz;
459 				tolen -= sz;
460 				ucsdone++;
461 				bias = punycode_update_bias(delta, ucsdone,
462 							   first);
463 				delta = 0;
464 				first = 0;
465 			}
466 		}
467 		delta += rest + 1;
468 		cur_code++;
469 	}
470 
471 	/*
472 	 * Terminate with NUL.
473 	 */
474 	if (tolen <= 0) {
475 		r = idn_buffer_overflow;
476 		goto ret;
477 	}
478 	*to = '\0';
479 
480 ret:
481 	if (r == idn_success) {
482 		TRACE(("idn__punycode_encode(): succcess (to=\"%s\")\n",
483 		       idn__debug_utf32xstring(to_org)));
484 	} else {
485 		TRACE(("idn__punycode_encode(): %s\n", idn_result_tostring(r)));
486 	}
487 	return (r);
488 }
489 
490 static int
punycode_getwc(const unsigned long * s,size_t len,int bias,unsigned long * vp)491 punycode_getwc(const unsigned long *s, size_t len, int bias,
492 	       unsigned long *vp) {
493 	size_t orglen = len;
494 	unsigned long v = 0, w = 1;
495 	int k;
496 
497 	for (k = PUNYCODE_BASE - bias; len > 0; k += PUNYCODE_BASE) {
498 		unsigned long c = *s++;
499 		int t = (k < PUNYCODE_TMIN) ? PUNYCODE_TMIN :
500 			(k > PUNYCODE_TMAX) ? PUNYCODE_TMAX : k;
501 
502 		len--;
503 		if ('a' <= c && c <= 'z')
504 			c = c - 'a';
505 		else if ('A' <= c && c <= 'Z')
506 			c = c - 'A';
507 		else if ('0' <= c && c <= '9')
508 			c = c - '0' + 26;
509 		else
510 			return (0);	/* invalid character */
511 
512 		v += c * w;
513 
514 		if (c < t) {
515 			*vp = v;
516 			return (orglen - len);
517 		}
518 
519 		w *= (PUNYCODE_BASE - t);
520 	}
521 
522 	return (0);	/* final character missing */
523 }
524 
525 static int
punycode_putwc(unsigned long * s,size_t len,unsigned long delta,int bias)526 punycode_putwc(unsigned long *s, size_t len, unsigned long delta, int bias) {
527 	static const char *punycode_base36
528 		= "abcdefghijklmnopqrstuvwxyz0123456789";
529 	unsigned long *sorg = s;
530 	int k;
531 
532 	for (k = PUNYCODE_BASE - bias; 1; k += PUNYCODE_BASE) {
533 		int t = (k < PUNYCODE_TMIN) ? PUNYCODE_TMIN :
534 			(k > PUNYCODE_TMAX) ? PUNYCODE_TMAX : k;
535 
536 		if (delta < t)
537 			break;
538 		if (len < 1)
539 			return (0);
540 		*s++ = punycode_base36[t + ((delta - t) % (PUNYCODE_BASE - t))];
541 		len--;
542 		delta = (delta - t) / (PUNYCODE_BASE - t);
543 	}
544 	if (len < 1)
545 		return (0);
546 	*s++ = punycode_base36[delta];
547 	return (s - sorg);
548 }
549 
550 static int
punycode_update_bias(unsigned long delta,size_t npoints,int first)551 punycode_update_bias(unsigned long delta, size_t npoints, int first) {
552 	int k = 0;
553 
554 	delta /= first ? PUNYCODE_DAMP : 2;
555 	delta += delta / npoints;
556 
557 	while (delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2) {
558 		delta /= PUNYCODE_BASE - PUNYCODE_TMIN;
559 		k++;
560 	}
561 	return (PUNYCODE_BASE * k +
562 		(((PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta) /
563 		 (delta + PUNYCODE_SKEW)));
564 }
565