1# 2# 3# Nim's Runtime Library 4# (c) Copyright 2016 Andreas Rumpf 5# 6# See the file "copying.txt", included in this 7# distribution, for details about the copyright. 8# 9 10## Implements a representation of Unicode with the limited 11## ASCII character subset. 12 13import strutils 14import unicode 15 16# issue #3045 17 18const 19 Base = 36 20 TMin = 1 21 TMax = 26 22 Skew = 38 23 Damp = 700 24 InitialBias = 72 25 InitialN = 128 26 Delimiter = '-' 27 28type 29 PunyError* = object of ValueError 30 31proc decodeDigit(x: char): int {.raises: [PunyError].} = 32 if '0' <= x and x <= '9': 33 result = ord(x) - (ord('0') - 26) 34 elif 'A' <= x and x <= 'Z': 35 result = ord(x) - ord('A') 36 elif 'a' <= x and x <= 'z': 37 result = ord(x) - ord('a') 38 else: 39 raise newException(PunyError, "Bad input") 40 41proc encodeDigit(digit: int): Rune {.raises: [PunyError].} = 42 if 0 <= digit and digit < 26: 43 result = Rune(digit + ord('a')) 44 elif 26 <= digit and digit < 36: 45 result = Rune(digit + (ord('0') - 26)) 46 else: 47 raise newException(PunyError, "internal error in punycode encoding") 48 49proc isBasic(c: char): bool = ord(c) < 0x80 50proc isBasic(r: Rune): bool = int(r) < 0x80 51 52proc adapt(delta, numPoints: int, first: bool): int = 53 var d = if first: delta div Damp else: delta div 2 54 d += d div numPoints 55 var k = 0 56 while d > ((Base-TMin)*TMax) div 2: 57 d = d div (Base - TMin) 58 k += Base 59 result = k + (Base - TMin + 1) * d div (d + Skew) 60 61proc encode*(prefix, s: string): string {.raises: [PunyError].} = 62 ## Encode a string that may contain Unicode. 63 ## Prepend `prefix` to the result 64 result = prefix 65 var (d, n, bias) = (0, InitialN, InitialBias) 66 var (b, remaining) = (0, 0) 67 for r in s.runes: 68 if r.isBasic: 69 # basic Ascii character 70 inc b 71 result.add($r) 72 else: 73 # special character 74 inc remaining 75 76 var h = b 77 if b > 0: 78 result.add(Delimiter) # we have some Ascii chars 79 while remaining != 0: 80 var m: int = high(int32) 81 for r in s.runes: 82 if m > int(r) and int(r) >= n: 83 m = int(r) 84 d += (m - n) * (h + 1) 85 if d < 0: 86 raise newException(PunyError, "invalid label " & s) 87 n = m 88 for r in s.runes: 89 if int(r) < n: 90 inc d 91 if d < 0: 92 raise newException(PunyError, "invalid label " & s) 93 continue 94 if int(r) > n: 95 continue 96 var q = d 97 var k = Base 98 while true: 99 var t = k - bias 100 if t < TMin: 101 t = TMin 102 elif t > TMax: 103 t = TMax 104 if q < t: 105 break 106 result.add($encodeDigit(t + (q - t) mod (Base - t))) 107 q = (q - t) div (Base - t) 108 k += Base 109 result.add($encodeDigit(q)) 110 bias = adapt(d, h + 1, h == b) 111 d = 0 112 inc h 113 dec remaining 114 inc d 115 inc n 116 117proc encode*(s: string): string {.raises: [PunyError].} = 118 ## Encode a string that may contain Unicode. Prefix is empty. 119 result = encode("", s) 120 121proc decode*(encoded: string): string {.raises: [PunyError].} = 122 ## Decode a Punycode-encoded string 123 var 124 n = InitialN 125 i = 0 126 bias = InitialBias 127 var d = rfind(encoded, Delimiter) 128 result = "" 129 130 if d > 0: 131 # found Delimiter 132 for j in 0..<d: 133 var c = encoded[j] # char 134 if not c.isBasic: 135 raise newException(PunyError, "Encoded contains a non-basic char") 136 result.add(c) # add the character 137 inc d 138 else: 139 d = 0 # set to first index 140 141 while (d < len(encoded)): 142 var oldi = i 143 var w = 1 144 var k = Base 145 while true: 146 if d == len(encoded): 147 raise newException(PunyError, "Bad input: " & encoded) 148 var c = encoded[d]; inc d 149 var digit = int(decodeDigit(c)) 150 if digit > (high(int32) - i) div w: 151 raise newException(PunyError, "Too large a value: " & $digit) 152 i += digit * w 153 var t: int 154 if k <= bias: 155 t = TMin 156 elif k >= bias + TMax: 157 t = TMax 158 else: 159 t = k - bias 160 if digit < t: 161 break 162 w *= Base - t 163 k += Base 164 bias = adapt(i - oldi, runeLen(result) + 1, oldi == 0) 165 166 if i div (runeLen(result) + 1) > high(int32) - n: 167 raise newException(PunyError, "Value too large") 168 169 n += i div (runeLen(result) + 1) 170 i = i mod (runeLen(result) + 1) 171 insert(result, $Rune(n), i) 172 inc i 173 174 175runnableExamples: 176 static: 177 block: 178 doAssert encode("") == "" 179 doAssert encode("a") == "a-" 180 doAssert encode("A") == "A-" 181 doAssert encode("3") == "3-" 182 doAssert encode("-") == "--" 183 doAssert encode("--") == "---" 184 doAssert encode("abc") == "abc-" 185 doAssert encode("London") == "London-" 186 doAssert encode("Lloyd-Atkinson") == "Lloyd-Atkinson-" 187 doAssert encode("This has spaces") == "This has spaces-" 188 doAssert encode("ü") == "tda" 189 doAssert encode("München") == "Mnchen-3ya" 190 doAssert encode("Mnchen-3ya") == "Mnchen-3ya-" 191 doAssert encode("München-Ost") == "Mnchen-Ost-9db" 192 doAssert encode("Bahnhof München-Ost") == "Bahnhof Mnchen-Ost-u6b" 193 block: 194 doAssert decode("") == "" 195 doAssert decode("a-") == "a" 196 doAssert decode("A-") == "A" 197 doAssert decode("3-") == "3" 198 doAssert decode("--") == "-" 199 doAssert decode("---") == "--" 200 doAssert decode("abc-") == "abc" 201 doAssert decode("London-") == "London" 202 doAssert decode("Lloyd-Atkinson-") == "Lloyd-Atkinson" 203 doAssert decode("This has spaces-") == "This has spaces" 204 doAssert decode("tda") == "ü" 205 doAssert decode("Mnchen-3ya") == "München" 206 doAssert decode("Mnchen-3ya-") == "Mnchen-3ya" 207 doAssert decode("Mnchen-Ost-9db") == "München-Ost" 208 doAssert decode("Bahnhof Mnchen-Ost-u6b") == "Bahnhof München-Ost" 209