1// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. 2 3// Copyright 2016 The Go Authors. All rights reserved. 4// Use of this source code is governed by a BSD-style 5// license that can be found in the LICENSE file. 6 7// +build !go1.10 8 9// Package idna implements IDNA2008 using the compatibility processing 10// defined by UTS (Unicode Technical Standard) #46, which defines a standard to 11// deal with the transition from IDNA2003. 12// 13// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC 14// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894. 15// UTS #46 is defined in https://www.unicode.org/reports/tr46. 16// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the 17// differences between these two standards. 18package idna // import "golang.org/x/net/idna" 19 20import ( 21 "fmt" 22 "strings" 23 "unicode/utf8" 24 25 "golang.org/x/text/secure/bidirule" 26 "golang.org/x/text/unicode/norm" 27) 28 29// NOTE: Unlike common practice in Go APIs, the functions will return a 30// sanitized domain name in case of errors. Browsers sometimes use a partially 31// evaluated string as lookup. 32// TODO: the current error handling is, in my opinion, the least opinionated. 33// Other strategies are also viable, though: 34// Option 1) Return an empty string in case of error, but allow the user to 35// specify explicitly which errors to ignore. 36// Option 2) Return the partially evaluated string if it is itself a valid 37// string, otherwise return the empty string in case of error. 38// Option 3) Option 1 and 2. 39// Option 4) Always return an empty string for now and implement Option 1 as 40// needed, and document that the return string may not be empty in case of 41// error in the future. 42// I think Option 1 is best, but it is quite opinionated. 43 44// ToASCII is a wrapper for Punycode.ToASCII. 45func ToASCII(s string) (string, error) { 46 return Punycode.process(s, true) 47} 48 49// ToUnicode is a wrapper for Punycode.ToUnicode. 50func ToUnicode(s string) (string, error) { 51 return Punycode.process(s, false) 52} 53 54// An Option configures a Profile at creation time. 55type Option func(*options) 56 57// Transitional sets a Profile to use the Transitional mapping as defined in UTS 58// #46. This will cause, for example, "ß" to be mapped to "ss". Using the 59// transitional mapping provides a compromise between IDNA2003 and IDNA2008 60// compatibility. It is used by most browsers when resolving domain names. This 61// option is only meaningful if combined with MapForLookup. 62func Transitional(transitional bool) Option { 63 return func(o *options) { o.transitional = true } 64} 65 66// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts 67// are longer than allowed by the RFC. 68func VerifyDNSLength(verify bool) Option { 69 return func(o *options) { o.verifyDNSLength = verify } 70} 71 72// RemoveLeadingDots removes leading label separators. Leading runes that map to 73// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well. 74// 75// This is the behavior suggested by the UTS #46 and is adopted by some 76// browsers. 77func RemoveLeadingDots(remove bool) Option { 78 return func(o *options) { o.removeLeadingDots = remove } 79} 80 81// ValidateLabels sets whether to check the mandatory label validation criteria 82// as defined in Section 5.4 of RFC 5891. This includes testing for correct use 83// of hyphens ('-'), normalization, validity of runes, and the context rules. 84func ValidateLabels(enable bool) Option { 85 return func(o *options) { 86 // Don't override existing mappings, but set one that at least checks 87 // normalization if it is not set. 88 if o.mapping == nil && enable { 89 o.mapping = normalize 90 } 91 o.trie = trie 92 o.validateLabels = enable 93 o.fromPuny = validateFromPunycode 94 } 95} 96 97// StrictDomainName limits the set of permissable ASCII characters to those 98// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the 99// hyphen). This is set by default for MapForLookup and ValidateForRegistration. 100// 101// This option is useful, for instance, for browsers that allow characters 102// outside this range, for example a '_' (U+005F LOW LINE). See 103// http://www.rfc-editor.org/std/std3.txt for more details This option 104// corresponds to the UseSTD3ASCIIRules option in UTS #46. 105func StrictDomainName(use bool) Option { 106 return func(o *options) { 107 o.trie = trie 108 o.useSTD3Rules = use 109 o.fromPuny = validateFromPunycode 110 } 111} 112 113// NOTE: the following options pull in tables. The tables should not be linked 114// in as long as the options are not used. 115 116// BidiRule enables the Bidi rule as defined in RFC 5893. Any application 117// that relies on proper validation of labels should include this rule. 118func BidiRule() Option { 119 return func(o *options) { o.bidirule = bidirule.ValidString } 120} 121 122// ValidateForRegistration sets validation options to verify that a given IDN is 123// properly formatted for registration as defined by Section 4 of RFC 5891. 124func ValidateForRegistration() Option { 125 return func(o *options) { 126 o.mapping = validateRegistration 127 StrictDomainName(true)(o) 128 ValidateLabels(true)(o) 129 VerifyDNSLength(true)(o) 130 BidiRule()(o) 131 } 132} 133 134// MapForLookup sets validation and mapping options such that a given IDN is 135// transformed for domain name lookup according to the requirements set out in 136// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894, 137// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option 138// to add this check. 139// 140// The mappings include normalization and mapping case, width and other 141// compatibility mappings. 142func MapForLookup() Option { 143 return func(o *options) { 144 o.mapping = validateAndMap 145 StrictDomainName(true)(o) 146 ValidateLabels(true)(o) 147 RemoveLeadingDots(true)(o) 148 } 149} 150 151type options struct { 152 transitional bool 153 useSTD3Rules bool 154 validateLabels bool 155 verifyDNSLength bool 156 removeLeadingDots bool 157 158 trie *idnaTrie 159 160 // fromPuny calls validation rules when converting A-labels to U-labels. 161 fromPuny func(p *Profile, s string) error 162 163 // mapping implements a validation and mapping step as defined in RFC 5895 164 // or UTS 46, tailored to, for example, domain registration or lookup. 165 mapping func(p *Profile, s string) (string, error) 166 167 // bidirule, if specified, checks whether s conforms to the Bidi Rule 168 // defined in RFC 5893. 169 bidirule func(s string) bool 170} 171 172// A Profile defines the configuration of a IDNA mapper. 173type Profile struct { 174 options 175} 176 177func apply(o *options, opts []Option) { 178 for _, f := range opts { 179 f(o) 180 } 181} 182 183// New creates a new Profile. 184// 185// With no options, the returned Profile is the most permissive and equals the 186// Punycode Profile. Options can be passed to further restrict the Profile. The 187// MapForLookup and ValidateForRegistration options set a collection of options, 188// for lookup and registration purposes respectively, which can be tailored by 189// adding more fine-grained options, where later options override earlier 190// options. 191func New(o ...Option) *Profile { 192 p := &Profile{} 193 apply(&p.options, o) 194 return p 195} 196 197// ToASCII converts a domain or domain label to its ASCII form. For example, 198// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and 199// ToASCII("golang") is "golang". If an error is encountered it will return 200// an error and a (partially) processed result. 201func (p *Profile) ToASCII(s string) (string, error) { 202 return p.process(s, true) 203} 204 205// ToUnicode converts a domain or domain label to its Unicode form. For example, 206// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and 207// ToUnicode("golang") is "golang". If an error is encountered it will return 208// an error and a (partially) processed result. 209func (p *Profile) ToUnicode(s string) (string, error) { 210 pp := *p 211 pp.transitional = false 212 return pp.process(s, false) 213} 214 215// String reports a string with a description of the profile for debugging 216// purposes. The string format may change with different versions. 217func (p *Profile) String() string { 218 s := "" 219 if p.transitional { 220 s = "Transitional" 221 } else { 222 s = "NonTransitional" 223 } 224 if p.useSTD3Rules { 225 s += ":UseSTD3Rules" 226 } 227 if p.validateLabels { 228 s += ":ValidateLabels" 229 } 230 if p.verifyDNSLength { 231 s += ":VerifyDNSLength" 232 } 233 return s 234} 235 236var ( 237 // Punycode is a Profile that does raw punycode processing with a minimum 238 // of validation. 239 Punycode *Profile = punycode 240 241 // Lookup is the recommended profile for looking up domain names, according 242 // to Section 5 of RFC 5891. The exact configuration of this profile may 243 // change over time. 244 Lookup *Profile = lookup 245 246 // Display is the recommended profile for displaying domain names. 247 // The configuration of this profile may change over time. 248 Display *Profile = display 249 250 // Registration is the recommended profile for checking whether a given 251 // IDN is valid for registration, according to Section 4 of RFC 5891. 252 Registration *Profile = registration 253 254 punycode = &Profile{} 255 lookup = &Profile{options{ 256 transitional: true, 257 useSTD3Rules: true, 258 validateLabels: true, 259 removeLeadingDots: true, 260 trie: trie, 261 fromPuny: validateFromPunycode, 262 mapping: validateAndMap, 263 bidirule: bidirule.ValidString, 264 }} 265 display = &Profile{options{ 266 useSTD3Rules: true, 267 validateLabels: true, 268 removeLeadingDots: true, 269 trie: trie, 270 fromPuny: validateFromPunycode, 271 mapping: validateAndMap, 272 bidirule: bidirule.ValidString, 273 }} 274 registration = &Profile{options{ 275 useSTD3Rules: true, 276 validateLabels: true, 277 verifyDNSLength: true, 278 trie: trie, 279 fromPuny: validateFromPunycode, 280 mapping: validateRegistration, 281 bidirule: bidirule.ValidString, 282 }} 283 284 // TODO: profiles 285 // Register: recommended for approving domain names: don't do any mappings 286 // but rather reject on invalid input. Bundle or block deviation characters. 287) 288 289type labelError struct{ label, code_ string } 290 291func (e labelError) code() string { return e.code_ } 292func (e labelError) Error() string { 293 return fmt.Sprintf("idna: invalid label %q", e.label) 294} 295 296type runeError rune 297 298func (e runeError) code() string { return "P1" } 299func (e runeError) Error() string { 300 return fmt.Sprintf("idna: disallowed rune %U", e) 301} 302 303// process implements the algorithm described in section 4 of UTS #46, 304// see https://www.unicode.org/reports/tr46. 305func (p *Profile) process(s string, toASCII bool) (string, error) { 306 var err error 307 if p.mapping != nil { 308 s, err = p.mapping(p, s) 309 } 310 // Remove leading empty labels. 311 if p.removeLeadingDots { 312 for ; len(s) > 0 && s[0] == '.'; s = s[1:] { 313 } 314 } 315 // It seems like we should only create this error on ToASCII, but the 316 // UTS 46 conformance tests suggests we should always check this. 317 if err == nil && p.verifyDNSLength && s == "" { 318 err = &labelError{s, "A4"} 319 } 320 labels := labelIter{orig: s} 321 for ; !labels.done(); labels.next() { 322 label := labels.label() 323 if label == "" { 324 // Empty labels are not okay. The label iterator skips the last 325 // label if it is empty. 326 if err == nil && p.verifyDNSLength { 327 err = &labelError{s, "A4"} 328 } 329 continue 330 } 331 if strings.HasPrefix(label, acePrefix) { 332 u, err2 := decode(label[len(acePrefix):]) 333 if err2 != nil { 334 if err == nil { 335 err = err2 336 } 337 // Spec says keep the old label. 338 continue 339 } 340 labels.set(u) 341 if err == nil && p.validateLabels { 342 err = p.fromPuny(p, u) 343 } 344 if err == nil { 345 // This should be called on NonTransitional, according to the 346 // spec, but that currently does not have any effect. Use the 347 // original profile to preserve options. 348 err = p.validateLabel(u) 349 } 350 } else if err == nil { 351 err = p.validateLabel(label) 352 } 353 } 354 if toASCII { 355 for labels.reset(); !labels.done(); labels.next() { 356 label := labels.label() 357 if !ascii(label) { 358 a, err2 := encode(acePrefix, label) 359 if err == nil { 360 err = err2 361 } 362 label = a 363 labels.set(a) 364 } 365 n := len(label) 366 if p.verifyDNSLength && err == nil && (n == 0 || n > 63) { 367 err = &labelError{label, "A4"} 368 } 369 } 370 } 371 s = labels.result() 372 if toASCII && p.verifyDNSLength && err == nil { 373 // Compute the length of the domain name minus the root label and its dot. 374 n := len(s) 375 if n > 0 && s[n-1] == '.' { 376 n-- 377 } 378 if len(s) < 1 || n > 253 { 379 err = &labelError{s, "A4"} 380 } 381 } 382 return s, err 383} 384 385func normalize(p *Profile, s string) (string, error) { 386 return norm.NFC.String(s), nil 387} 388 389func validateRegistration(p *Profile, s string) (string, error) { 390 if !norm.NFC.IsNormalString(s) { 391 return s, &labelError{s, "V1"} 392 } 393 for i := 0; i < len(s); { 394 v, sz := trie.lookupString(s[i:]) 395 // Copy bytes not copied so far. 396 switch p.simplify(info(v).category()) { 397 // TODO: handle the NV8 defined in the Unicode idna data set to allow 398 // for strict conformance to IDNA2008. 399 case valid, deviation: 400 case disallowed, mapped, unknown, ignored: 401 r, _ := utf8.DecodeRuneInString(s[i:]) 402 return s, runeError(r) 403 } 404 i += sz 405 } 406 return s, nil 407} 408 409func validateAndMap(p *Profile, s string) (string, error) { 410 var ( 411 err error 412 b []byte 413 k int 414 ) 415 for i := 0; i < len(s); { 416 v, sz := trie.lookupString(s[i:]) 417 start := i 418 i += sz 419 // Copy bytes not copied so far. 420 switch p.simplify(info(v).category()) { 421 case valid: 422 continue 423 case disallowed: 424 if err == nil { 425 r, _ := utf8.DecodeRuneInString(s[start:]) 426 err = runeError(r) 427 } 428 continue 429 case mapped, deviation: 430 b = append(b, s[k:start]...) 431 b = info(v).appendMapping(b, s[start:i]) 432 case ignored: 433 b = append(b, s[k:start]...) 434 // drop the rune 435 case unknown: 436 b = append(b, s[k:start]...) 437 b = append(b, "\ufffd"...) 438 } 439 k = i 440 } 441 if k == 0 { 442 // No changes so far. 443 s = norm.NFC.String(s) 444 } else { 445 b = append(b, s[k:]...) 446 if norm.NFC.QuickSpan(b) != len(b) { 447 b = norm.NFC.Bytes(b) 448 } 449 // TODO: the punycode converters require strings as input. 450 s = string(b) 451 } 452 return s, err 453} 454 455// A labelIter allows iterating over domain name labels. 456type labelIter struct { 457 orig string 458 slice []string 459 curStart int 460 curEnd int 461 i int 462} 463 464func (l *labelIter) reset() { 465 l.curStart = 0 466 l.curEnd = 0 467 l.i = 0 468} 469 470func (l *labelIter) done() bool { 471 return l.curStart >= len(l.orig) 472} 473 474func (l *labelIter) result() string { 475 if l.slice != nil { 476 return strings.Join(l.slice, ".") 477 } 478 return l.orig 479} 480 481func (l *labelIter) label() string { 482 if l.slice != nil { 483 return l.slice[l.i] 484 } 485 p := strings.IndexByte(l.orig[l.curStart:], '.') 486 l.curEnd = l.curStart + p 487 if p == -1 { 488 l.curEnd = len(l.orig) 489 } 490 return l.orig[l.curStart:l.curEnd] 491} 492 493// next sets the value to the next label. It skips the last label if it is empty. 494func (l *labelIter) next() { 495 l.i++ 496 if l.slice != nil { 497 if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" { 498 l.curStart = len(l.orig) 499 } 500 } else { 501 l.curStart = l.curEnd + 1 502 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' { 503 l.curStart = len(l.orig) 504 } 505 } 506} 507 508func (l *labelIter) set(s string) { 509 if l.slice == nil { 510 l.slice = strings.Split(l.orig, ".") 511 } 512 l.slice[l.i] = s 513} 514 515// acePrefix is the ASCII Compatible Encoding prefix. 516const acePrefix = "xn--" 517 518func (p *Profile) simplify(cat category) category { 519 switch cat { 520 case disallowedSTD3Mapped: 521 if p.useSTD3Rules { 522 cat = disallowed 523 } else { 524 cat = mapped 525 } 526 case disallowedSTD3Valid: 527 if p.useSTD3Rules { 528 cat = disallowed 529 } else { 530 cat = valid 531 } 532 case deviation: 533 if !p.transitional { 534 cat = valid 535 } 536 case validNV8, validXV8: 537 // TODO: handle V2008 538 cat = valid 539 } 540 return cat 541} 542 543func validateFromPunycode(p *Profile, s string) error { 544 if !norm.NFC.IsNormalString(s) { 545 return &labelError{s, "V1"} 546 } 547 for i := 0; i < len(s); { 548 v, sz := trie.lookupString(s[i:]) 549 if c := p.simplify(info(v).category()); c != valid && c != deviation { 550 return &labelError{s, "V6"} 551 } 552 i += sz 553 } 554 return nil 555} 556 557const ( 558 zwnj = "\u200c" 559 zwj = "\u200d" 560) 561 562type joinState int8 563 564const ( 565 stateStart joinState = iota 566 stateVirama 567 stateBefore 568 stateBeforeVirama 569 stateAfter 570 stateFAIL 571) 572 573var joinStates = [][numJoinTypes]joinState{ 574 stateStart: { 575 joiningL: stateBefore, 576 joiningD: stateBefore, 577 joinZWNJ: stateFAIL, 578 joinZWJ: stateFAIL, 579 joinVirama: stateVirama, 580 }, 581 stateVirama: { 582 joiningL: stateBefore, 583 joiningD: stateBefore, 584 }, 585 stateBefore: { 586 joiningL: stateBefore, 587 joiningD: stateBefore, 588 joiningT: stateBefore, 589 joinZWNJ: stateAfter, 590 joinZWJ: stateFAIL, 591 joinVirama: stateBeforeVirama, 592 }, 593 stateBeforeVirama: { 594 joiningL: stateBefore, 595 joiningD: stateBefore, 596 joiningT: stateBefore, 597 }, 598 stateAfter: { 599 joiningL: stateFAIL, 600 joiningD: stateBefore, 601 joiningT: stateAfter, 602 joiningR: stateStart, 603 joinZWNJ: stateFAIL, 604 joinZWJ: stateFAIL, 605 joinVirama: stateAfter, // no-op as we can't accept joiners here 606 }, 607 stateFAIL: { 608 0: stateFAIL, 609 joiningL: stateFAIL, 610 joiningD: stateFAIL, 611 joiningT: stateFAIL, 612 joiningR: stateFAIL, 613 joinZWNJ: stateFAIL, 614 joinZWJ: stateFAIL, 615 joinVirama: stateFAIL, 616 }, 617} 618 619// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are 620// already implicitly satisfied by the overall implementation. 621func (p *Profile) validateLabel(s string) error { 622 if s == "" { 623 if p.verifyDNSLength { 624 return &labelError{s, "A4"} 625 } 626 return nil 627 } 628 if p.bidirule != nil && !p.bidirule(s) { 629 return &labelError{s, "B"} 630 } 631 if !p.validateLabels { 632 return nil 633 } 634 trie := p.trie // p.validateLabels is only set if trie is set. 635 if len(s) > 4 && s[2] == '-' && s[3] == '-' { 636 return &labelError{s, "V2"} 637 } 638 if s[0] == '-' || s[len(s)-1] == '-' { 639 return &labelError{s, "V3"} 640 } 641 // TODO: merge the use of this in the trie. 642 v, sz := trie.lookupString(s) 643 x := info(v) 644 if x.isModifier() { 645 return &labelError{s, "V5"} 646 } 647 // Quickly return in the absence of zero-width (non) joiners. 648 if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 { 649 return nil 650 } 651 st := stateStart 652 for i := 0; ; { 653 jt := x.joinType() 654 if s[i:i+sz] == zwj { 655 jt = joinZWJ 656 } else if s[i:i+sz] == zwnj { 657 jt = joinZWNJ 658 } 659 st = joinStates[st][jt] 660 if x.isViramaModifier() { 661 st = joinStates[st][joinVirama] 662 } 663 if i += sz; i == len(s) { 664 break 665 } 666 v, sz = trie.lookupString(s[i:]) 667 x = info(v) 668 } 669 if st == stateFAIL || st == stateAfter { 670 return &labelError{s, "C"} 671 } 672 return nil 673} 674 675func ascii(s string) bool { 676 for i := 0; i < len(s); i++ { 677 if s[i] >= utf8.RuneSelf { 678 return false 679 } 680 } 681 return true 682} 683