1 /*	$NetBSD: punycode.c,v 1.1.1.1 2011/04/13 18:15:58 elric Exp $	*/
2 
3 /*
4  * Copyright (c) 2004 Kungliga Tekniska Högskolan
5  * (Royal Institute of Technology, Stockholm, Sweden).
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * 3. Neither the name of the Institute nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include <string.h>
40 #include "windlocl.h"
41 
42 static const unsigned base         = 36;
43 static const unsigned t_min        = 1;
44 static const unsigned t_max        = 26;
45 static const unsigned skew         = 38;
46 static const unsigned damp         = 700;
47 static const unsigned initial_n    = 128;
48 static const unsigned initial_bias = 72;
49 
50 static unsigned
digit(unsigned n)51 digit(unsigned n)
52 {
53     return "abcdefghijklmnopqrstuvwxyz0123456789"[n];
54 }
55 
56 static unsigned
adapt(unsigned delta,unsigned numpoints,int first)57 adapt(unsigned delta, unsigned numpoints, int first)
58 {
59     unsigned k;
60 
61     if (first)
62 	delta = delta / damp;
63     else
64 	delta /= 2;
65     delta += delta / numpoints;
66     k = 0;
67     while (delta > ((base - t_min) * t_max) / 2) {
68 	delta /= base - t_min;
69 	k += base;
70     }
71     return k + (((base - t_min + 1) * delta) / (delta + skew));
72 }
73 
74 /**
75  * Convert an UCS4 string to a puny-coded DNS label string suitable
76  * when combined with delimiters and other labels for DNS lookup.
77  *
78  * @param in an UCS4 string to convert
79  * @param in_len the length of in.
80  * @param out the resulting puny-coded string. The string is not NUL
81  * terminatied.
82  * @param out_len before processing out_len should be the length of
83  * the out variable, after processing it will be the length of the out
84  * string.
85  *
86  * @return returns 0 on success, an wind error code otherwise
87  * @ingroup wind
88  */
89 
90 int
wind_punycode_label_toascii(const uint32_t * in,size_t in_len,char * out,size_t * out_len)91 wind_punycode_label_toascii(const uint32_t *in, size_t in_len,
92 			    char *out, size_t *out_len)
93 {
94     unsigned n     = initial_n;
95     unsigned delta = 0;
96     unsigned bias  = initial_bias;
97     unsigned h = 0;
98     unsigned b;
99     unsigned i;
100     unsigned o = 0;
101     unsigned m;
102 
103     for (i = 0; i < in_len; ++i) {
104 	if (in[i] < 0x80) {
105 	    ++h;
106 	    if (o >= *out_len)
107 		return WIND_ERR_OVERRUN;
108 	    out[o++] = in[i];
109 	}
110     }
111     b = h;
112     if (b > 0) {
113 	if (o >= *out_len)
114 	    return WIND_ERR_OVERRUN;
115 	out[o++] = 0x2D;
116     }
117     /* is this string punycoded */
118     if (h < in_len) {
119 	if (o + 4 >= *out_len)
120 	    return WIND_ERR_OVERRUN;
121 	memmove(out + 4, out, o);
122 	memcpy(out, "xn--", 4);
123 	o += 4;
124     }
125 
126     while (h < in_len) {
127 	m = (unsigned)-1;
128 	for (i = 0; i < in_len; ++i)
129 	    if(in[i] < m && in[i] >= n)
130 		m = in[i];
131 
132 	delta += (m - n) * (h + 1);
133 	n = m;
134 	for (i = 0; i < in_len; ++i) {
135 	    if (in[i] < n) {
136 		++delta;
137 	    } else if (in[i] == n) {
138 		unsigned q = delta;
139 		unsigned k;
140 		for (k = base; ; k += base) {
141 		    unsigned t;
142 		    if (k <= bias)
143 			t = t_min;
144 		    else if (k >= bias + t_max)
145 			t = t_max;
146 		    else
147 			t = k - bias;
148 		    if (q < t)
149 			break;
150 		    if (o >= *out_len)
151 			return WIND_ERR_OVERRUN;
152 		    out[o++] = digit(t + ((q - t) % (base - t)));
153 		    q = (q - t) / (base - t);
154 		}
155 		if (o >= *out_len)
156 		    return WIND_ERR_OVERRUN;
157 		out[o++] = digit(q);
158 		/* output */
159 		bias = adapt(delta, h + 1, h == b);
160 		delta = 0;
161 		++h;
162 	    }
163 	}
164 	++delta;
165 	++n;
166     }
167 
168     *out_len = o;
169     return 0;
170 }
171