1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package language
6
7import (
8	"bytes"
9	"errors"
10	"fmt"
11	"sort"
12	"strconv"
13	"strings"
14
15	"golang.org/x/text/internal/tag"
16)
17
18// isAlpha returns true if the byte is not a digit.
19// b must be an ASCII letter or digit.
20func isAlpha(b byte) bool {
21	return b > '9'
22}
23
24// isAlphaNum returns true if the string contains only ASCII letters or digits.
25func isAlphaNum(s []byte) bool {
26	for _, c := range s {
27		if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
28			return false
29		}
30	}
31	return true
32}
33
34// errSyntax is returned by any of the parsing functions when the
35// input is not well-formed, according to BCP 47.
36// TODO: return the position at which the syntax error occurred?
37var errSyntax = errors.New("language: tag is not well-formed")
38
39// ValueError is returned by any of the parsing functions when the
40// input is well-formed but the respective subtag is not recognized
41// as a valid value.
42type ValueError struct {
43	v [8]byte
44}
45
46func mkErrInvalid(s []byte) error {
47	var e ValueError
48	copy(e.v[:], s)
49	return e
50}
51
52func (e ValueError) tag() []byte {
53	n := bytes.IndexByte(e.v[:], 0)
54	if n == -1 {
55		n = 8
56	}
57	return e.v[:n]
58}
59
60// Error implements the error interface.
61func (e ValueError) Error() string {
62	return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
63}
64
65// Subtag returns the subtag for which the error occurred.
66func (e ValueError) Subtag() string {
67	return string(e.tag())
68}
69
70// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
71type scanner struct {
72	b     []byte
73	bytes [max99thPercentileSize]byte
74	token []byte
75	start int // start position of the current token
76	end   int // end position of the current token
77	next  int // next point for scan
78	err   error
79	done  bool
80}
81
82func makeScannerString(s string) scanner {
83	scan := scanner{}
84	if len(s) <= len(scan.bytes) {
85		scan.b = scan.bytes[:copy(scan.bytes[:], s)]
86	} else {
87		scan.b = []byte(s)
88	}
89	scan.init()
90	return scan
91}
92
93// makeScanner returns a scanner using b as the input buffer.
94// b is not copied and may be modified by the scanner routines.
95func makeScanner(b []byte) scanner {
96	scan := scanner{b: b}
97	scan.init()
98	return scan
99}
100
101func (s *scanner) init() {
102	for i, c := range s.b {
103		if c == '_' {
104			s.b[i] = '-'
105		}
106	}
107	s.scan()
108}
109
110// restToLower converts the string between start and end to lower case.
111func (s *scanner) toLower(start, end int) {
112	for i := start; i < end; i++ {
113		c := s.b[i]
114		if 'A' <= c && c <= 'Z' {
115			s.b[i] += 'a' - 'A'
116		}
117	}
118}
119
120func (s *scanner) setError(e error) {
121	if s.err == nil || (e == errSyntax && s.err != errSyntax) {
122		s.err = e
123	}
124}
125
126// resizeRange shrinks or grows the array at position oldStart such that
127// a new string of size newSize can fit between oldStart and oldEnd.
128// Sets the scan point to after the resized range.
129func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
130	s.start = oldStart
131	if end := oldStart + newSize; end != oldEnd {
132		diff := end - oldEnd
133		if end < cap(s.b) {
134			b := make([]byte, len(s.b)+diff)
135			copy(b, s.b[:oldStart])
136			copy(b[end:], s.b[oldEnd:])
137			s.b = b
138		} else {
139			s.b = append(s.b[end:], s.b[oldEnd:]...)
140		}
141		s.next = end + (s.next - s.end)
142		s.end = end
143	}
144}
145
146// replace replaces the current token with repl.
147func (s *scanner) replace(repl string) {
148	s.resizeRange(s.start, s.end, len(repl))
149	copy(s.b[s.start:], repl)
150}
151
152// gobble removes the current token from the input.
153// Caller must call scan after calling gobble.
154func (s *scanner) gobble(e error) {
155	s.setError(e)
156	if s.start == 0 {
157		s.b = s.b[:+copy(s.b, s.b[s.next:])]
158		s.end = 0
159	} else {
160		s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
161		s.end = s.start - 1
162	}
163	s.next = s.start
164}
165
166// deleteRange removes the given range from s.b before the current token.
167func (s *scanner) deleteRange(start, end int) {
168	s.setError(errSyntax)
169	s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
170	diff := end - start
171	s.next -= diff
172	s.start -= diff
173	s.end -= diff
174}
175
176// scan parses the next token of a BCP 47 string.  Tokens that are larger
177// than 8 characters or include non-alphanumeric characters result in an error
178// and are gobbled and removed from the output.
179// It returns the end position of the last token consumed.
180func (s *scanner) scan() (end int) {
181	end = s.end
182	s.token = nil
183	for s.start = s.next; s.next < len(s.b); {
184		i := bytes.IndexByte(s.b[s.next:], '-')
185		if i == -1 {
186			s.end = len(s.b)
187			s.next = len(s.b)
188			i = s.end - s.start
189		} else {
190			s.end = s.next + i
191			s.next = s.end + 1
192		}
193		token := s.b[s.start:s.end]
194		if i < 1 || i > 8 || !isAlphaNum(token) {
195			s.gobble(errSyntax)
196			continue
197		}
198		s.token = token
199		return end
200	}
201	if n := len(s.b); n > 0 && s.b[n-1] == '-' {
202		s.setError(errSyntax)
203		s.b = s.b[:len(s.b)-1]
204	}
205	s.done = true
206	return end
207}
208
209// acceptMinSize parses multiple tokens of the given size or greater.
210// It returns the end position of the last token consumed.
211func (s *scanner) acceptMinSize(min int) (end int) {
212	end = s.end
213	s.scan()
214	for ; len(s.token) >= min; s.scan() {
215		end = s.end
216	}
217	return end
218}
219
220// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
221// failed it returns an error and any part of the tag that could be parsed.
222// If parsing succeeded but an unknown value was found, it returns
223// ValueError. The Tag returned in this case is just stripped of the unknown
224// value. All other values are preserved. It accepts tags in the BCP 47 format
225// and extensions to this standard defined in
226// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
227// The resulting tag is canonicalized using the default canonicalization type.
228func Parse(s string) (t Tag, err error) {
229	return Default.Parse(s)
230}
231
232// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
233// failed it returns an error and any part of the tag that could be parsed.
234// If parsing succeeded but an unknown value was found, it returns
235// ValueError. The Tag returned in this case is just stripped of the unknown
236// value. All other values are preserved. It accepts tags in the BCP 47 format
237// and extensions to this standard defined in
238// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
239// The resulting tag is canonicalized using the the canonicalization type c.
240func (c CanonType) Parse(s string) (t Tag, err error) {
241	// TODO: consider supporting old-style locale key-value pairs.
242	if s == "" {
243		return und, errSyntax
244	}
245	if len(s) <= maxAltTaglen {
246		b := [maxAltTaglen]byte{}
247		for i, c := range s {
248			// Generating invalid UTF-8 is okay as it won't match.
249			if 'A' <= c && c <= 'Z' {
250				c += 'a' - 'A'
251			} else if c == '_' {
252				c = '-'
253			}
254			b[i] = byte(c)
255		}
256		if t, ok := grandfathered(b); ok {
257			return t, nil
258		}
259	}
260	scan := makeScannerString(s)
261	t, err = parse(&scan, s)
262	t, changed := t.canonicalize(c)
263	if changed {
264		t.remakeString()
265	}
266	return t, err
267}
268
269func parse(scan *scanner, s string) (t Tag, err error) {
270	t = und
271	var end int
272	if n := len(scan.token); n <= 1 {
273		scan.toLower(0, len(scan.b))
274		if n == 0 || scan.token[0] != 'x' {
275			return t, errSyntax
276		}
277		end = parseExtensions(scan)
278	} else if n >= 4 {
279		return und, errSyntax
280	} else { // the usual case
281		t, end = parseTag(scan)
282		if n := len(scan.token); n == 1 {
283			t.pExt = uint16(end)
284			end = parseExtensions(scan)
285		} else if end < len(scan.b) {
286			scan.setError(errSyntax)
287			scan.b = scan.b[:end]
288		}
289	}
290	if int(t.pVariant) < len(scan.b) {
291		if end < len(s) {
292			s = s[:end]
293		}
294		if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
295			t.str = s
296		} else {
297			t.str = string(scan.b)
298		}
299	} else {
300		t.pVariant, t.pExt = 0, 0
301	}
302	return t, scan.err
303}
304
305// parseTag parses language, script, region and variants.
306// It returns a Tag and the end position in the input that was parsed.
307func parseTag(scan *scanner) (t Tag, end int) {
308	var e error
309	// TODO: set an error if an unknown lang, script or region is encountered.
310	t.lang, e = getLangID(scan.token)
311	scan.setError(e)
312	scan.replace(t.lang.String())
313	langStart := scan.start
314	end = scan.scan()
315	for len(scan.token) == 3 && isAlpha(scan.token[0]) {
316		// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
317		// to a tag of the form <extlang>.
318		lang, e := getLangID(scan.token)
319		if lang != 0 {
320			t.lang = lang
321			copy(scan.b[langStart:], lang.String())
322			scan.b[langStart+3] = '-'
323			scan.start = langStart + 4
324		}
325		scan.gobble(e)
326		end = scan.scan()
327	}
328	if len(scan.token) == 4 && isAlpha(scan.token[0]) {
329		t.script, e = getScriptID(script, scan.token)
330		if t.script == 0 {
331			scan.gobble(e)
332		}
333		end = scan.scan()
334	}
335	if n := len(scan.token); n >= 2 && n <= 3 {
336		t.region, e = getRegionID(scan.token)
337		if t.region == 0 {
338			scan.gobble(e)
339		} else {
340			scan.replace(t.region.String())
341		}
342		end = scan.scan()
343	}
344	scan.toLower(scan.start, len(scan.b))
345	t.pVariant = byte(end)
346	end = parseVariants(scan, end, t)
347	t.pExt = uint16(end)
348	return t, end
349}
350
351var separator = []byte{'-'}
352
353// parseVariants scans tokens as long as each token is a valid variant string.
354// Duplicate variants are removed.
355func parseVariants(scan *scanner, end int, t Tag) int {
356	start := scan.start
357	varIDBuf := [4]uint8{}
358	variantBuf := [4][]byte{}
359	varID := varIDBuf[:0]
360	variant := variantBuf[:0]
361	last := -1
362	needSort := false
363	for ; len(scan.token) >= 4; scan.scan() {
364		// TODO: measure the impact of needing this conversion and redesign
365		// the data structure if there is an issue.
366		v, ok := variantIndex[string(scan.token)]
367		if !ok {
368			// unknown variant
369			// TODO: allow user-defined variants?
370			scan.gobble(mkErrInvalid(scan.token))
371			continue
372		}
373		varID = append(varID, v)
374		variant = append(variant, scan.token)
375		if !needSort {
376			if last < int(v) {
377				last = int(v)
378			} else {
379				needSort = true
380				// There is no legal combinations of more than 7 variants
381				// (and this is by no means a useful sequence).
382				const maxVariants = 8
383				if len(varID) > maxVariants {
384					break
385				}
386			}
387		}
388		end = scan.end
389	}
390	if needSort {
391		sort.Sort(variantsSort{varID, variant})
392		k, l := 0, -1
393		for i, v := range varID {
394			w := int(v)
395			if l == w {
396				// Remove duplicates.
397				continue
398			}
399			varID[k] = varID[i]
400			variant[k] = variant[i]
401			k++
402			l = w
403		}
404		if str := bytes.Join(variant[:k], separator); len(str) == 0 {
405			end = start - 1
406		} else {
407			scan.resizeRange(start, end, len(str))
408			copy(scan.b[scan.start:], str)
409			end = scan.end
410		}
411	}
412	return end
413}
414
415type variantsSort struct {
416	i []uint8
417	v [][]byte
418}
419
420func (s variantsSort) Len() int {
421	return len(s.i)
422}
423
424func (s variantsSort) Swap(i, j int) {
425	s.i[i], s.i[j] = s.i[j], s.i[i]
426	s.v[i], s.v[j] = s.v[j], s.v[i]
427}
428
429func (s variantsSort) Less(i, j int) bool {
430	return s.i[i] < s.i[j]
431}
432
433type bytesSort [][]byte
434
435func (b bytesSort) Len() int {
436	return len(b)
437}
438
439func (b bytesSort) Swap(i, j int) {
440	b[i], b[j] = b[j], b[i]
441}
442
443func (b bytesSort) Less(i, j int) bool {
444	return bytes.Compare(b[i], b[j]) == -1
445}
446
447// parseExtensions parses and normalizes the extensions in the buffer.
448// It returns the last position of scan.b that is part of any extension.
449// It also trims scan.b to remove excess parts accordingly.
450func parseExtensions(scan *scanner) int {
451	start := scan.start
452	exts := [][]byte{}
453	private := []byte{}
454	end := scan.end
455	for len(scan.token) == 1 {
456		extStart := scan.start
457		ext := scan.token[0]
458		end = parseExtension(scan)
459		extension := scan.b[extStart:end]
460		if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
461			scan.setError(errSyntax)
462			end = extStart
463			continue
464		} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
465			scan.b = scan.b[:end]
466			return end
467		} else if ext == 'x' {
468			private = extension
469			break
470		}
471		exts = append(exts, extension)
472	}
473	sort.Sort(bytesSort(exts))
474	if len(private) > 0 {
475		exts = append(exts, private)
476	}
477	scan.b = scan.b[:start]
478	if len(exts) > 0 {
479		scan.b = append(scan.b, bytes.Join(exts, separator)...)
480	} else if start > 0 {
481		// Strip trailing '-'.
482		scan.b = scan.b[:start-1]
483	}
484	return end
485}
486
487// parseExtension parses a single extension and returns the position of
488// the extension end.
489func parseExtension(scan *scanner) int {
490	start, end := scan.start, scan.end
491	switch scan.token[0] {
492	case 'u':
493		attrStart := end
494		scan.scan()
495		for last := []byte{}; len(scan.token) > 2; scan.scan() {
496			if bytes.Compare(scan.token, last) != -1 {
497				// Attributes are unsorted. Start over from scratch.
498				p := attrStart + 1
499				scan.next = p
500				attrs := [][]byte{}
501				for scan.scan(); len(scan.token) > 2; scan.scan() {
502					attrs = append(attrs, scan.token)
503					end = scan.end
504				}
505				sort.Sort(bytesSort(attrs))
506				copy(scan.b[p:], bytes.Join(attrs, separator))
507				break
508			}
509			last = scan.token
510			end = scan.end
511		}
512		var last, key []byte
513		for attrEnd := end; len(scan.token) == 2; last = key {
514			key = scan.token
515			keyEnd := scan.end
516			end = scan.acceptMinSize(3)
517			// TODO: check key value validity
518			if keyEnd == end || bytes.Compare(key, last) != 1 {
519				// We have an invalid key or the keys are not sorted.
520				// Start scanning keys from scratch and reorder.
521				p := attrEnd + 1
522				scan.next = p
523				keys := [][]byte{}
524				for scan.scan(); len(scan.token) == 2; {
525					keyStart, keyEnd := scan.start, scan.end
526					end = scan.acceptMinSize(3)
527					if keyEnd != end {
528						keys = append(keys, scan.b[keyStart:end])
529					} else {
530						scan.setError(errSyntax)
531						end = keyStart
532					}
533				}
534				sort.Sort(bytesSort(keys))
535				reordered := bytes.Join(keys, separator)
536				if e := p + len(reordered); e < end {
537					scan.deleteRange(e, end)
538					end = e
539				}
540				copy(scan.b[p:], bytes.Join(keys, separator))
541				break
542			}
543		}
544	case 't':
545		scan.scan()
546		if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
547			_, end = parseTag(scan)
548			scan.toLower(start, end)
549		}
550		for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
551			end = scan.acceptMinSize(3)
552		}
553	case 'x':
554		end = scan.acceptMinSize(1)
555	default:
556		end = scan.acceptMinSize(2)
557	}
558	return end
559}
560
561// Compose creates a Tag from individual parts, which may be of type Tag, Base,
562// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
563// Base, Script or Region or slice of type Variant or Extension is passed more
564// than once, the latter will overwrite the former. Variants and Extensions are
565// accumulated, but if two extensions of the same type are passed, the latter
566// will replace the former. A Tag overwrites all former values and typically
567// only makes sense as the first argument. The resulting tag is returned after
568// canonicalizing using the Default CanonType. If one or more errors are
569// encountered, one of the errors is returned.
570func Compose(part ...interface{}) (t Tag, err error) {
571	return Default.Compose(part...)
572}
573
574// Compose creates a Tag from individual parts, which may be of type Tag, Base,
575// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
576// Base, Script or Region or slice of type Variant or Extension is passed more
577// than once, the latter will overwrite the former. Variants and Extensions are
578// accumulated, but if two extensions of the same type are passed, the latter
579// will replace the former. A Tag overwrites all former values and typically
580// only makes sense as the first argument. The resulting tag is returned after
581// canonicalizing using CanonType c. If one or more errors are encountered,
582// one of the errors is returned.
583func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
584	var b builder
585	if err = b.update(part...); err != nil {
586		return und, err
587	}
588	t, _ = b.tag.canonicalize(c)
589
590	if len(b.ext) > 0 || len(b.variant) > 0 {
591		sort.Sort(sortVariant(b.variant))
592		sort.Strings(b.ext)
593		if b.private != "" {
594			b.ext = append(b.ext, b.private)
595		}
596		n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...)
597		buf := make([]byte, n)
598		p := t.genCoreBytes(buf)
599		t.pVariant = byte(p)
600		p += appendTokens(buf[p:], b.variant...)
601		t.pExt = uint16(p)
602		p += appendTokens(buf[p:], b.ext...)
603		t.str = string(buf[:p])
604	} else if b.private != "" {
605		t.str = b.private
606		t.remakeString()
607	}
608	return
609}
610
611type builder struct {
612	tag Tag
613
614	private string // the x extension
615	ext     []string
616	variant []string
617
618	err error
619}
620
621func (b *builder) addExt(e string) {
622	if e == "" {
623	} else if e[0] == 'x' {
624		b.private = e
625	} else {
626		b.ext = append(b.ext, e)
627	}
628}
629
630var errInvalidArgument = errors.New("invalid Extension or Variant")
631
632func (b *builder) update(part ...interface{}) (err error) {
633	replace := func(l *[]string, s string, eq func(a, b string) bool) bool {
634		if s == "" {
635			b.err = errInvalidArgument
636			return true
637		}
638		for i, v := range *l {
639			if eq(v, s) {
640				(*l)[i] = s
641				return true
642			}
643		}
644		return false
645	}
646	for _, x := range part {
647		switch v := x.(type) {
648		case Tag:
649			b.tag.lang = v.lang
650			b.tag.region = v.region
651			b.tag.script = v.script
652			if v.str != "" {
653				b.variant = nil
654				for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; {
655					x, s = nextToken(s)
656					b.variant = append(b.variant, x)
657				}
658				b.ext, b.private = nil, ""
659				for i, e := int(v.pExt), ""; i < len(v.str); {
660					i, e = getExtension(v.str, i)
661					b.addExt(e)
662				}
663			}
664		case Base:
665			b.tag.lang = v.langID
666		case Script:
667			b.tag.script = v.scriptID
668		case Region:
669			b.tag.region = v.regionID
670		case Variant:
671			if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) {
672				b.variant = append(b.variant, v.variant)
673			}
674		case Extension:
675			if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) {
676				b.addExt(v.s)
677			}
678		case []Variant:
679			b.variant = nil
680			for _, x := range v {
681				b.update(x)
682			}
683		case []Extension:
684			b.ext, b.private = nil, ""
685			for _, e := range v {
686				b.update(e)
687			}
688		// TODO: support parsing of raw strings based on morphology or just extensions?
689		case error:
690			err = v
691		}
692	}
693	return
694}
695
696func tokenLen(token ...string) (n int) {
697	for _, t := range token {
698		n += len(t) + 1
699	}
700	return
701}
702
703func appendTokens(b []byte, token ...string) int {
704	p := 0
705	for _, t := range token {
706		b[p] = '-'
707		copy(b[p+1:], t)
708		p += 1 + len(t)
709	}
710	return p
711}
712
713type sortVariant []string
714
715func (s sortVariant) Len() int {
716	return len(s)
717}
718
719func (s sortVariant) Swap(i, j int) {
720	s[j], s[i] = s[i], s[j]
721}
722
723func (s sortVariant) Less(i, j int) bool {
724	return variantIndex[s[i]] < variantIndex[s[j]]
725}
726
727func findExt(list []string, x byte) int {
728	for i, e := range list {
729		if e[0] == x {
730			return i
731		}
732	}
733	return -1
734}
735
736// getExtension returns the name, body and end position of the extension.
737func getExtension(s string, p int) (end int, ext string) {
738	if s[p] == '-' {
739		p++
740	}
741	if s[p] == 'x' {
742		return len(s), s[p:]
743	}
744	end = nextExtension(s, p)
745	return end, s[p:end]
746}
747
748// nextExtension finds the next extension within the string, searching
749// for the -<char>- pattern from position p.
750// In the fast majority of cases, language tags will have at most
751// one extension and extensions tend to be small.
752func nextExtension(s string, p int) int {
753	for n := len(s) - 3; p < n; {
754		if s[p] == '-' {
755			if s[p+2] == '-' {
756				return p
757			}
758			p += 3
759		} else {
760			p++
761		}
762	}
763	return len(s)
764}
765
766var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
767
768// ParseAcceptLanguage parses the contents of an Accept-Language header as
769// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
770// a list of corresponding quality weights. It is more permissive than RFC 2616
771// and may return non-nil slices even if the input is not valid.
772// The Tags will be sorted by highest weight first and then by first occurrence.
773// Tags with a weight of zero will be dropped. An error will be returned if the
774// input could not be parsed.
775func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
776	var entry string
777	for s != "" {
778		if entry, s = split(s, ','); entry == "" {
779			continue
780		}
781
782		entry, weight := split(entry, ';')
783
784		// Scan the language.
785		t, err := Parse(entry)
786		if err != nil {
787			id, ok := acceptFallback[entry]
788			if !ok {
789				return nil, nil, err
790			}
791			t = Tag{lang: id}
792		}
793
794		// Scan the optional weight.
795		w := 1.0
796		if weight != "" {
797			weight = consume(weight, 'q')
798			weight = consume(weight, '=')
799			// consume returns the empty string when a token could not be
800			// consumed, resulting in an error for ParseFloat.
801			if w, err = strconv.ParseFloat(weight, 32); err != nil {
802				return nil, nil, errInvalidWeight
803			}
804			// Drop tags with a quality weight of 0.
805			if w <= 0 {
806				continue
807			}
808		}
809
810		tag = append(tag, t)
811		q = append(q, float32(w))
812	}
813	sortStable(&tagSort{tag, q})
814	return tag, q, nil
815}
816
817// consume removes a leading token c from s and returns the result or the empty
818// string if there is no such token.
819func consume(s string, c byte) string {
820	if s == "" || s[0] != c {
821		return ""
822	}
823	return strings.TrimSpace(s[1:])
824}
825
826func split(s string, c byte) (head, tail string) {
827	if i := strings.IndexByte(s, c); i >= 0 {
828		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
829	}
830	return strings.TrimSpace(s), ""
831}
832
833// Add hack mapping to deal with a small number of cases that that occur
834// in Accept-Language (with reasonable frequency).
835var acceptFallback = map[string]langID{
836	"english": _en,
837	"deutsch": _de,
838	"italian": _it,
839	"french":  _fr,
840	"*":       _mul, // defined in the spec to match all languages.
841}
842
843type tagSort struct {
844	tag []Tag
845	q   []float32
846}
847
848func (s *tagSort) Len() int {
849	return len(s.q)
850}
851
852func (s *tagSort) Less(i, j int) bool {
853	return s.q[i] > s.q[j]
854}
855
856func (s *tagSort) Swap(i, j int) {
857	s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
858	s.q[i], s.q[j] = s.q[j], s.q[i]
859}
860