1<?php
2/**
3 * Copyright 2014 TrueServer B.V.
4 * Copyright 2015-2017 Horde LLC (http://www.horde.org/)
5 *
6 * See the enclosed file COPYING for license information (BSD). If you
7 * did not receive this file, see http://www.horde.org/licenses/bsd.
8 *
9 * @author   Renan Gonçalves <renan.saddam@gmail.com>
10 * @author   Michael Slusarz <slusarz@horde.org>
11 * @category Horde
12 * @license  http://www.horde.org/licenses/bsd BSD
13 * @package  Idna
14 */
15
16/**
17 * Punycode implementation as described in RFC 3492.
18 *
19 * Original code (v1.0.1; released under the MIT License):
20 *     https://github.com/true/php-punycode/
21 *
22 * @author    Renan Gonçalves <renan.saddam@gmail.com>
23 * @author    Michael Slusarz <slusarz@horde.org>
24 * @category  Horde
25 * @copyright 2014 TrueServer B.V.
26 * @copyright 2015-2017 Horde LLC
27 * @license   http://www.horde.org/licenses/bsd BSD
28 * @package   Idna
29 * @link      http://tools.ietf.org/html/rfc3492
30 */
31class Horde_Idna_Punycode
32{
33    /**
34     * Bootstring parameter values.
35     */
36    const BASE         = 36;
37    const TMIN         = 1;
38    const TMAX         = 26;
39    const SKEW         = 38;
40    const DAMP         = 700;
41    const INITIAL_BIAS = 72;
42    const INITIAL_N    = 128;
43    const PREFIX       = 'xn--';
44    const DELIMITER    = '-';
45
46    /**
47     * Encode table.
48     *
49     * @param array
50     */
51    protected static $_encodeTable = array(
52        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
53        'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
54        'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
55    );
56
57    /**
58     * Decode table.
59     *
60     * @param array
61     */
62    protected static $_decodeTable = array(
63        'a' =>  0, 'b' =>  1, 'c' =>  2, 'd' =>  3, 'e' =>  4, 'f' =>  5,
64        'g' =>  6, 'h' =>  7, 'i' =>  8, 'j' =>  9, 'k' => 10, 'l' => 11,
65        'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17,
66        's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23,
67        'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29,
68        '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35
69    );
70
71    /**
72     * Encode a domain to its Punycode version.
73     *
74     * @param string $input  Domain name in Unicde to be encoded.
75     *
76     * @return string  Punycode representation in ASCII.
77     */
78    public function encode($input)
79    {
80        $parts = explode('.', $input);
81
82        foreach ($parts as &$part) {
83            $part = $this->_encodePart($part);
84        }
85
86        return implode('.', $parts);
87    }
88
89    /**
90     * Encode a part of a domain name, such as tld, to its Punycode version.
91     *
92     * @param string $input  Part of a domain name.
93     *
94     * @return string  Punycode representation of a domain part.
95     */
96    protected function _encodePart($input)
97    {
98        $codePoints = $this->_codePoints($input);
99
100        $n = static::INITIAL_N;
101        $bias = static::INITIAL_BIAS;
102        $delta = 0;
103        $h = $b = count($codePoints['basic']);
104
105        $output = '';
106        foreach ($codePoints['basic'] as $code) {
107            $output .= $this->_codePointToChar($code);
108        }
109        if ($input === $output) {
110            return $output;
111        }
112        if ($b > 0) {
113            $output .= static::DELIMITER;
114        }
115
116        $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']);
117        sort($codePoints['nonBasic']);
118
119        $i = 0;
120        $length = Horde_String::length($input, 'UTF-8');
121
122        while ($h < $length) {
123            $m = $codePoints['nonBasic'][$i++];
124            $delta = $delta + ($m - $n) * ($h + 1);
125            $n = $m;
126
127            foreach ($codePoints['all'] as $c) {
128                if (($c < $n) || ($c < static::INITIAL_N)) {
129                    ++$delta;
130                }
131
132                if ($c === $n) {
133                    $q = $delta;
134                    for ($k = static::BASE; ; $k += static::BASE) {
135                        $t = $this->_calculateThreshold($k, $bias);
136                        if ($q < $t) {
137                            break;
138                        }
139
140                        $code = $t + (($q - $t) % (static::BASE - $t));
141                        $output .= static::$_encodeTable[$code];
142
143                        $q = ($q - $t) / (static::BASE - $t);
144                    }
145
146                    $output .= static::$_encodeTable[$q];
147                    $bias = $this->_adapt($delta, $h + 1, ($h === $b));
148                    $delta = 0;
149                    ++$h;
150                }
151            }
152
153            ++$delta;
154            ++$n;
155        }
156
157        return static::PREFIX . $output;
158    }
159
160    /**
161     * Decode a Punycode domain name to its Unicode counterpart.
162     *
163     * @param string $input  Domain name in Punycode
164     *
165     * @return string  Unicode domain name.
166     */
167    public function decode($input)
168    {
169        $parts = explode('.', $input);
170
171        foreach ($parts as &$part) {
172            if (strpos($part, static::PREFIX) === 0) {
173                $part = $this->_decodePart(
174                    substr($part, strlen(static::PREFIX))
175                );
176            }
177        }
178
179        return implode('.', $parts);
180    }
181
182    /**
183     * Decode a part of domain name, such as tld.
184     *
185     * @param string $input  Part of a domain name.
186     *
187     * @return string  Unicode domain part.
188     */
189    protected function _decodePart($input)
190    {
191        $n = static::INITIAL_N;
192        $i = 0;
193        $bias = static::INITIAL_BIAS;
194        $output = '';
195
196        $pos = strrpos($input, static::DELIMITER);
197        if ($pos !== false) {
198            $output = substr($input, 0, $pos++);
199        } else {
200            $pos = 0;
201        }
202
203        $outputLength = strlen($output);
204        $inputLength = strlen($input);
205
206        /* Punycode lookup is case-insensitive. */
207        $input = Horde_String::lower($input);
208
209        while ($pos < $inputLength) {
210            $oldi = $i;
211            $w = 1;
212
213            for ($k = static::BASE; ; $k += static::BASE) {
214                $digit = static::$_decodeTable[$input[$pos++]];
215                $i = $i + ($digit * $w);
216                $t = $this->_calculateThreshold($k, $bias);
217
218                if ($digit < $t) {
219                    break;
220                }
221
222                $w = $w * (static::BASE - $t);
223            }
224
225            $bias = $this->_adapt($i - $oldi, ++$outputLength, ($oldi === 0));
226            $n = $n + (int) ($i / $outputLength);
227            $i = $i % ($outputLength);
228
229            $output = Horde_String::substr($output, 0, $i, 'UTF-8') .
230                $this->_codePointToChar($n) .
231                Horde_String::substr($output, $i, $outputLength - 1, 'UTF-8');
232
233            ++$i;
234        }
235
236        return $output;
237    }
238
239    /**
240     * Calculate the bias threshold to fall between TMIN and TMAX.
241     *
242     * @param integer $k
243     * @param integer $bias
244     *
245     * @return integer
246     */
247    protected function _calculateThreshold($k, $bias)
248    {
249        if ($k <= ($bias + static::TMIN)) {
250            return static::TMIN;
251        } elseif ($k >= ($bias + static::TMAX)) {
252            return static::TMAX;
253        }
254        return $k - $bias;
255    }
256
257    /**
258     * Bias adaptation.
259     *
260     * @param integer $delta
261     * @param integer $numPoints
262     * @param boolean $firstTime
263     *
264     * @return integer
265     */
266    protected function _adapt($delta, $numPoints, $firstTime)
267    {
268        $delta = (int) (
269            ($firstTime)
270                ? $delta / static::DAMP
271                : $delta / 2
272            );
273        $delta += (int) ($delta / $numPoints);
274
275        $k = 0;
276        while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) {
277            $delta = (int) ($delta / (static::BASE - static::TMIN));
278            $k = $k + static::BASE;
279        }
280        $k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW));
281
282        return $k;
283    }
284
285    /**
286     * List code points for a given input.
287     *
288     * @param string $input
289     *
290     * @return array  Multi-dimension array with basic, non-basic and
291     *                aggregated code points.
292     */
293    protected function _codePoints($input)
294    {
295        $codePoints = array(
296            'all'      => array(),
297            'basic'    => array(),
298            'nonBasic' => array()
299        );
300
301        $len = Horde_String::length($input, 'UTF-8');
302        for ($i = 0; $i < $len; ++$i) {
303            $char = Horde_String::substr($input, $i, 1, 'UTF-8');
304            $code = $this->_charToCodePoint($char);
305            if ($code < 128) {
306                $codePoints['all'][] = $codePoints['basic'][] = $code;
307            } else {
308                $codePoints['all'][] = $codePoints['nonBasic'][] = $code;
309            }
310        }
311
312        return $codePoints;
313    }
314
315    /**
316     * Convert a single or multi-byte character to its code point.
317     *
318     * @param string $char
319     *
320     * @return integer
321     */
322    protected function _charToCodePoint($char)
323    {
324        $code = ord($char[0]);
325        if ($code < 128) {
326            return $code;
327        } elseif ($code < 224) {
328            return (($code - 192) * 64) + (ord($char[1]) - 128);
329        } elseif ($code < 240) {
330            return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128);
331        }
332        return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128);
333    }
334
335    /**
336     * Convert a code point to its single or multi-byte character
337     *
338     * @param integer $code
339     *
340     * @return string
341     */
342    protected function _codePointToChar($code)
343    {
344        if ($code <= 0x7F) {
345            return chr($code);
346        } elseif ($code <= 0x7FF) {
347            return chr(($code >> 6) + 192) . chr(($code & 63) + 128);
348        } elseif ($code <= 0xFFFF) {
349            return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
350        }
351        return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
352    }
353
354}
355