1// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
2
3// Copyright 2016 The Go Authors. All rights reserved.
4// Use of this source code is governed by a BSD-style
5// license that can be found in the LICENSE file.
6
7//go:build !go1.10
8// +build !go1.10
9
10// Package idna implements IDNA2008 using the compatibility processing
11// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
12// deal with the transition from IDNA2003.
13//
14// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
15// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
16// UTS #46 is defined in https://www.unicode.org/reports/tr46.
17// See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
18// differences between these two standards.
19package idna // import "golang.org/x/net/idna"
20
21import (
22	"fmt"
23	"strings"
24	"unicode/utf8"
25
26	"golang.org/x/text/secure/bidirule"
27	"golang.org/x/text/unicode/norm"
28)
29
30// NOTE: Unlike common practice in Go APIs, the functions will return a
31// sanitized domain name in case of errors. Browsers sometimes use a partially
32// evaluated string as lookup.
33// TODO: the current error handling is, in my opinion, the least opinionated.
34// Other strategies are also viable, though:
35// Option 1) Return an empty string in case of error, but allow the user to
36//    specify explicitly which errors to ignore.
37// Option 2) Return the partially evaluated string if it is itself a valid
38//    string, otherwise return the empty string in case of error.
39// Option 3) Option 1 and 2.
40// Option 4) Always return an empty string for now and implement Option 1 as
41//    needed, and document that the return string may not be empty in case of
42//    error in the future.
43// I think Option 1 is best, but it is quite opinionated.
44
45// ToASCII is a wrapper for Punycode.ToASCII.
46func ToASCII(s string) (string, error) {
47	return Punycode.process(s, true)
48}
49
50// ToUnicode is a wrapper for Punycode.ToUnicode.
51func ToUnicode(s string) (string, error) {
52	return Punycode.process(s, false)
53}
54
55// An Option configures a Profile at creation time.
56type Option func(*options)
57
58// Transitional sets a Profile to use the Transitional mapping as defined in UTS
59// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
60// transitional mapping provides a compromise between IDNA2003 and IDNA2008
61// compatibility. It is used by some browsers when resolving domain names. This
62// option is only meaningful if combined with MapForLookup.
63func Transitional(transitional bool) Option {
64	return func(o *options) { o.transitional = transitional }
65}
66
67// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
68// are longer than allowed by the RFC.
69//
70// This option corresponds to the VerifyDnsLength flag in UTS #46.
71func VerifyDNSLength(verify bool) Option {
72	return func(o *options) { o.verifyDNSLength = verify }
73}
74
75// RemoveLeadingDots removes leading label separators. Leading runes that map to
76// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
77func RemoveLeadingDots(remove bool) Option {
78	return func(o *options) { o.removeLeadingDots = remove }
79}
80
81// ValidateLabels sets whether to check the mandatory label validation criteria
82// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
83// of hyphens ('-'), normalization, validity of runes, and the context rules.
84// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
85// in UTS #46.
86func ValidateLabels(enable bool) Option {
87	return func(o *options) {
88		// Don't override existing mappings, but set one that at least checks
89		// normalization if it is not set.
90		if o.mapping == nil && enable {
91			o.mapping = normalize
92		}
93		o.trie = trie
94		o.checkJoiners = enable
95		o.checkHyphens = enable
96		if enable {
97			o.fromPuny = validateFromPunycode
98		} else {
99			o.fromPuny = nil
100		}
101	}
102}
103
104// CheckHyphens sets whether to check for correct use of hyphens ('-') in
105// labels. Most web browsers do not have this option set, since labels such as
106// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
107//
108// This option corresponds to the CheckHyphens flag in UTS #46.
109func CheckHyphens(enable bool) Option {
110	return func(o *options) { o.checkHyphens = enable }
111}
112
113// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
114// A of RFC 5892, concerning the use of joiner runes.
115//
116// This option corresponds to the CheckJoiners flag in UTS #46.
117func CheckJoiners(enable bool) Option {
118	return func(o *options) {
119		o.trie = trie
120		o.checkJoiners = enable
121	}
122}
123
124// StrictDomainName limits the set of permissable ASCII characters to those
125// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
126// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
127// but is only useful if ValidateLabels is set.
128//
129// This option is useful, for instance, for browsers that allow characters
130// outside this range, for example a '_' (U+005F LOW LINE). See
131// http://www.rfc-editor.org/std/std3.txt for more details.
132//
133// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
134func StrictDomainName(use bool) Option {
135	return func(o *options) { o.useSTD3Rules = use }
136}
137
138// NOTE: the following options pull in tables. The tables should not be linked
139// in as long as the options are not used.
140
141// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
142// that relies on proper validation of labels should include this rule.
143//
144// This option corresponds to the CheckBidi flag in UTS #46.
145func BidiRule() Option {
146	return func(o *options) { o.bidirule = bidirule.ValidString }
147}
148
149// ValidateForRegistration sets validation options to verify that a given IDN is
150// properly formatted for registration as defined by Section 4 of RFC 5891.
151func ValidateForRegistration() Option {
152	return func(o *options) {
153		o.mapping = validateRegistration
154		StrictDomainName(true)(o)
155		ValidateLabels(true)(o)
156		VerifyDNSLength(true)(o)
157		BidiRule()(o)
158	}
159}
160
161// MapForLookup sets validation and mapping options such that a given IDN is
162// transformed for domain name lookup according to the requirements set out in
163// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
164// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
165// to add this check.
166//
167// The mappings include normalization and mapping case, width and other
168// compatibility mappings.
169func MapForLookup() Option {
170	return func(o *options) {
171		o.mapping = validateAndMap
172		StrictDomainName(true)(o)
173		ValidateLabels(true)(o)
174		RemoveLeadingDots(true)(o)
175	}
176}
177
178type options struct {
179	transitional      bool
180	useSTD3Rules      bool
181	checkHyphens      bool
182	checkJoiners      bool
183	verifyDNSLength   bool
184	removeLeadingDots bool
185
186	trie *idnaTrie
187
188	// fromPuny calls validation rules when converting A-labels to U-labels.
189	fromPuny func(p *Profile, s string) error
190
191	// mapping implements a validation and mapping step as defined in RFC 5895
192	// or UTS 46, tailored to, for example, domain registration or lookup.
193	mapping func(p *Profile, s string) (string, error)
194
195	// bidirule, if specified, checks whether s conforms to the Bidi Rule
196	// defined in RFC 5893.
197	bidirule func(s string) bool
198}
199
200// A Profile defines the configuration of a IDNA mapper.
201type Profile struct {
202	options
203}
204
205func apply(o *options, opts []Option) {
206	for _, f := range opts {
207		f(o)
208	}
209}
210
211// New creates a new Profile.
212//
213// With no options, the returned Profile is the most permissive and equals the
214// Punycode Profile. Options can be passed to further restrict the Profile. The
215// MapForLookup and ValidateForRegistration options set a collection of options,
216// for lookup and registration purposes respectively, which can be tailored by
217// adding more fine-grained options, where later options override earlier
218// options.
219func New(o ...Option) *Profile {
220	p := &Profile{}
221	apply(&p.options, o)
222	return p
223}
224
225// ToASCII converts a domain or domain label to its ASCII form. For example,
226// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
227// ToASCII("golang") is "golang". If an error is encountered it will return
228// an error and a (partially) processed result.
229func (p *Profile) ToASCII(s string) (string, error) {
230	return p.process(s, true)
231}
232
233// ToUnicode converts a domain or domain label to its Unicode form. For example,
234// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
235// ToUnicode("golang") is "golang". If an error is encountered it will return
236// an error and a (partially) processed result.
237func (p *Profile) ToUnicode(s string) (string, error) {
238	pp := *p
239	pp.transitional = false
240	return pp.process(s, false)
241}
242
243// String reports a string with a description of the profile for debugging
244// purposes. The string format may change with different versions.
245func (p *Profile) String() string {
246	s := ""
247	if p.transitional {
248		s = "Transitional"
249	} else {
250		s = "NonTransitional"
251	}
252	if p.useSTD3Rules {
253		s += ":UseSTD3Rules"
254	}
255	if p.checkHyphens {
256		s += ":CheckHyphens"
257	}
258	if p.checkJoiners {
259		s += ":CheckJoiners"
260	}
261	if p.verifyDNSLength {
262		s += ":VerifyDNSLength"
263	}
264	return s
265}
266
267var (
268	// Punycode is a Profile that does raw punycode processing with a minimum
269	// of validation.
270	Punycode *Profile = punycode
271
272	// Lookup is the recommended profile for looking up domain names, according
273	// to Section 5 of RFC 5891. The exact configuration of this profile may
274	// change over time.
275	Lookup *Profile = lookup
276
277	// Display is the recommended profile for displaying domain names.
278	// The configuration of this profile may change over time.
279	Display *Profile = display
280
281	// Registration is the recommended profile for checking whether a given
282	// IDN is valid for registration, according to Section 4 of RFC 5891.
283	Registration *Profile = registration
284
285	punycode = &Profile{}
286	lookup   = &Profile{options{
287		transitional:      true,
288		removeLeadingDots: true,
289		useSTD3Rules:      true,
290		checkHyphens:      true,
291		checkJoiners:      true,
292		trie:              trie,
293		fromPuny:          validateFromPunycode,
294		mapping:           validateAndMap,
295		bidirule:          bidirule.ValidString,
296	}}
297	display = &Profile{options{
298		useSTD3Rules:      true,
299		removeLeadingDots: true,
300		checkHyphens:      true,
301		checkJoiners:      true,
302		trie:              trie,
303		fromPuny:          validateFromPunycode,
304		mapping:           validateAndMap,
305		bidirule:          bidirule.ValidString,
306	}}
307	registration = &Profile{options{
308		useSTD3Rules:    true,
309		verifyDNSLength: true,
310		checkHyphens:    true,
311		checkJoiners:    true,
312		trie:            trie,
313		fromPuny:        validateFromPunycode,
314		mapping:         validateRegistration,
315		bidirule:        bidirule.ValidString,
316	}}
317
318	// TODO: profiles
319	// Register: recommended for approving domain names: don't do any mappings
320	// but rather reject on invalid input. Bundle or block deviation characters.
321)
322
323type labelError struct{ label, code_ string }
324
325func (e labelError) code() string { return e.code_ }
326func (e labelError) Error() string {
327	return fmt.Sprintf("idna: invalid label %q", e.label)
328}
329
330type runeError rune
331
332func (e runeError) code() string { return "P1" }
333func (e runeError) Error() string {
334	return fmt.Sprintf("idna: disallowed rune %U", e)
335}
336
337// process implements the algorithm described in section 4 of UTS #46,
338// see https://www.unicode.org/reports/tr46.
339func (p *Profile) process(s string, toASCII bool) (string, error) {
340	var err error
341	if p.mapping != nil {
342		s, err = p.mapping(p, s)
343	}
344	// Remove leading empty labels.
345	if p.removeLeadingDots {
346		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
347		}
348	}
349	// It seems like we should only create this error on ToASCII, but the
350	// UTS 46 conformance tests suggests we should always check this.
351	if err == nil && p.verifyDNSLength && s == "" {
352		err = &labelError{s, "A4"}
353	}
354	labels := labelIter{orig: s}
355	for ; !labels.done(); labels.next() {
356		label := labels.label()
357		if label == "" {
358			// Empty labels are not okay. The label iterator skips the last
359			// label if it is empty.
360			if err == nil && p.verifyDNSLength {
361				err = &labelError{s, "A4"}
362			}
363			continue
364		}
365		if strings.HasPrefix(label, acePrefix) {
366			u, err2 := decode(label[len(acePrefix):])
367			if err2 != nil {
368				if err == nil {
369					err = err2
370				}
371				// Spec says keep the old label.
372				continue
373			}
374			labels.set(u)
375			if err == nil && p.fromPuny != nil {
376				err = p.fromPuny(p, u)
377			}
378			if err == nil {
379				// This should be called on NonTransitional, according to the
380				// spec, but that currently does not have any effect. Use the
381				// original profile to preserve options.
382				err = p.validateLabel(u)
383			}
384		} else if err == nil {
385			err = p.validateLabel(label)
386		}
387	}
388	if toASCII {
389		for labels.reset(); !labels.done(); labels.next() {
390			label := labels.label()
391			if !ascii(label) {
392				a, err2 := encode(acePrefix, label)
393				if err == nil {
394					err = err2
395				}
396				label = a
397				labels.set(a)
398			}
399			n := len(label)
400			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
401				err = &labelError{label, "A4"}
402			}
403		}
404	}
405	s = labels.result()
406	if toASCII && p.verifyDNSLength && err == nil {
407		// Compute the length of the domain name minus the root label and its dot.
408		n := len(s)
409		if n > 0 && s[n-1] == '.' {
410			n--
411		}
412		if len(s) < 1 || n > 253 {
413			err = &labelError{s, "A4"}
414		}
415	}
416	return s, err
417}
418
419func normalize(p *Profile, s string) (string, error) {
420	return norm.NFC.String(s), nil
421}
422
423func validateRegistration(p *Profile, s string) (string, error) {
424	if !norm.NFC.IsNormalString(s) {
425		return s, &labelError{s, "V1"}
426	}
427	for i := 0; i < len(s); {
428		v, sz := trie.lookupString(s[i:])
429		// Copy bytes not copied so far.
430		switch p.simplify(info(v).category()) {
431		// TODO: handle the NV8 defined in the Unicode idna data set to allow
432		// for strict conformance to IDNA2008.
433		case valid, deviation:
434		case disallowed, mapped, unknown, ignored:
435			r, _ := utf8.DecodeRuneInString(s[i:])
436			return s, runeError(r)
437		}
438		i += sz
439	}
440	return s, nil
441}
442
443func validateAndMap(p *Profile, s string) (string, error) {
444	var (
445		err error
446		b   []byte
447		k   int
448	)
449	for i := 0; i < len(s); {
450		v, sz := trie.lookupString(s[i:])
451		start := i
452		i += sz
453		// Copy bytes not copied so far.
454		switch p.simplify(info(v).category()) {
455		case valid:
456			continue
457		case disallowed:
458			if err == nil {
459				r, _ := utf8.DecodeRuneInString(s[start:])
460				err = runeError(r)
461			}
462			continue
463		case mapped, deviation:
464			b = append(b, s[k:start]...)
465			b = info(v).appendMapping(b, s[start:i])
466		case ignored:
467			b = append(b, s[k:start]...)
468			// drop the rune
469		case unknown:
470			b = append(b, s[k:start]...)
471			b = append(b, "\ufffd"...)
472		}
473		k = i
474	}
475	if k == 0 {
476		// No changes so far.
477		s = norm.NFC.String(s)
478	} else {
479		b = append(b, s[k:]...)
480		if norm.NFC.QuickSpan(b) != len(b) {
481			b = norm.NFC.Bytes(b)
482		}
483		// TODO: the punycode converters require strings as input.
484		s = string(b)
485	}
486	return s, err
487}
488
489// A labelIter allows iterating over domain name labels.
490type labelIter struct {
491	orig     string
492	slice    []string
493	curStart int
494	curEnd   int
495	i        int
496}
497
498func (l *labelIter) reset() {
499	l.curStart = 0
500	l.curEnd = 0
501	l.i = 0
502}
503
504func (l *labelIter) done() bool {
505	return l.curStart >= len(l.orig)
506}
507
508func (l *labelIter) result() string {
509	if l.slice != nil {
510		return strings.Join(l.slice, ".")
511	}
512	return l.orig
513}
514
515func (l *labelIter) label() string {
516	if l.slice != nil {
517		return l.slice[l.i]
518	}
519	p := strings.IndexByte(l.orig[l.curStart:], '.')
520	l.curEnd = l.curStart + p
521	if p == -1 {
522		l.curEnd = len(l.orig)
523	}
524	return l.orig[l.curStart:l.curEnd]
525}
526
527// next sets the value to the next label. It skips the last label if it is empty.
528func (l *labelIter) next() {
529	l.i++
530	if l.slice != nil {
531		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
532			l.curStart = len(l.orig)
533		}
534	} else {
535		l.curStart = l.curEnd + 1
536		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
537			l.curStart = len(l.orig)
538		}
539	}
540}
541
542func (l *labelIter) set(s string) {
543	if l.slice == nil {
544		l.slice = strings.Split(l.orig, ".")
545	}
546	l.slice[l.i] = s
547}
548
549// acePrefix is the ASCII Compatible Encoding prefix.
550const acePrefix = "xn--"
551
552func (p *Profile) simplify(cat category) category {
553	switch cat {
554	case disallowedSTD3Mapped:
555		if p.useSTD3Rules {
556			cat = disallowed
557		} else {
558			cat = mapped
559		}
560	case disallowedSTD3Valid:
561		if p.useSTD3Rules {
562			cat = disallowed
563		} else {
564			cat = valid
565		}
566	case deviation:
567		if !p.transitional {
568			cat = valid
569		}
570	case validNV8, validXV8:
571		// TODO: handle V2008
572		cat = valid
573	}
574	return cat
575}
576
577func validateFromPunycode(p *Profile, s string) error {
578	if !norm.NFC.IsNormalString(s) {
579		return &labelError{s, "V1"}
580	}
581	for i := 0; i < len(s); {
582		v, sz := trie.lookupString(s[i:])
583		if c := p.simplify(info(v).category()); c != valid && c != deviation {
584			return &labelError{s, "V6"}
585		}
586		i += sz
587	}
588	return nil
589}
590
591const (
592	zwnj = "\u200c"
593	zwj  = "\u200d"
594)
595
596type joinState int8
597
598const (
599	stateStart joinState = iota
600	stateVirama
601	stateBefore
602	stateBeforeVirama
603	stateAfter
604	stateFAIL
605)
606
607var joinStates = [][numJoinTypes]joinState{
608	stateStart: {
609		joiningL:   stateBefore,
610		joiningD:   stateBefore,
611		joinZWNJ:   stateFAIL,
612		joinZWJ:    stateFAIL,
613		joinVirama: stateVirama,
614	},
615	stateVirama: {
616		joiningL: stateBefore,
617		joiningD: stateBefore,
618	},
619	stateBefore: {
620		joiningL:   stateBefore,
621		joiningD:   stateBefore,
622		joiningT:   stateBefore,
623		joinZWNJ:   stateAfter,
624		joinZWJ:    stateFAIL,
625		joinVirama: stateBeforeVirama,
626	},
627	stateBeforeVirama: {
628		joiningL: stateBefore,
629		joiningD: stateBefore,
630		joiningT: stateBefore,
631	},
632	stateAfter: {
633		joiningL:   stateFAIL,
634		joiningD:   stateBefore,
635		joiningT:   stateAfter,
636		joiningR:   stateStart,
637		joinZWNJ:   stateFAIL,
638		joinZWJ:    stateFAIL,
639		joinVirama: stateAfter, // no-op as we can't accept joiners here
640	},
641	stateFAIL: {
642		0:          stateFAIL,
643		joiningL:   stateFAIL,
644		joiningD:   stateFAIL,
645		joiningT:   stateFAIL,
646		joiningR:   stateFAIL,
647		joinZWNJ:   stateFAIL,
648		joinZWJ:    stateFAIL,
649		joinVirama: stateFAIL,
650	},
651}
652
653// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
654// already implicitly satisfied by the overall implementation.
655func (p *Profile) validateLabel(s string) error {
656	if s == "" {
657		if p.verifyDNSLength {
658			return &labelError{s, "A4"}
659		}
660		return nil
661	}
662	if p.bidirule != nil && !p.bidirule(s) {
663		return &labelError{s, "B"}
664	}
665	if p.checkHyphens {
666		if len(s) > 4 && s[2] == '-' && s[3] == '-' {
667			return &labelError{s, "V2"}
668		}
669		if s[0] == '-' || s[len(s)-1] == '-' {
670			return &labelError{s, "V3"}
671		}
672	}
673	if !p.checkJoiners {
674		return nil
675	}
676	trie := p.trie // p.checkJoiners is only set if trie is set.
677	// TODO: merge the use of this in the trie.
678	v, sz := trie.lookupString(s)
679	x := info(v)
680	if x.isModifier() {
681		return &labelError{s, "V5"}
682	}
683	// Quickly return in the absence of zero-width (non) joiners.
684	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
685		return nil
686	}
687	st := stateStart
688	for i := 0; ; {
689		jt := x.joinType()
690		if s[i:i+sz] == zwj {
691			jt = joinZWJ
692		} else if s[i:i+sz] == zwnj {
693			jt = joinZWNJ
694		}
695		st = joinStates[st][jt]
696		if x.isViramaModifier() {
697			st = joinStates[st][joinVirama]
698		}
699		if i += sz; i == len(s) {
700			break
701		}
702		v, sz = trie.lookupString(s[i:])
703		x = info(v)
704	}
705	if st == stateFAIL || st == stateAfter {
706		return &labelError{s, "C"}
707	}
708	return nil
709}
710
711func ascii(s string) bool {
712	for i := 0; i < len(s); i++ {
713		if s[i] >= utf8.RuneSelf {
714			return false
715		}
716	}
717	return true
718}
719