1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package language 6 7import ( 8 "bytes" 9 "errors" 10 "fmt" 11 "sort" 12 "strconv" 13 "strings" 14 15 "golang.org/x/text/internal/tag" 16) 17 18// isAlpha returns true if the byte is not a digit. 19// b must be an ASCII letter or digit. 20func isAlpha(b byte) bool { 21 return b > '9' 22} 23 24// isAlphaNum returns true if the string contains only ASCII letters or digits. 25func isAlphaNum(s []byte) bool { 26 for _, c := range s { 27 if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') { 28 return false 29 } 30 } 31 return true 32} 33 34// errSyntax is returned by any of the parsing functions when the 35// input is not well-formed, according to BCP 47. 36// TODO: return the position at which the syntax error occurred? 37var errSyntax = errors.New("language: tag is not well-formed") 38 39// ValueError is returned by any of the parsing functions when the 40// input is well-formed but the respective subtag is not recognized 41// as a valid value. 42type ValueError struct { 43 v [8]byte 44} 45 46func mkErrInvalid(s []byte) error { 47 var e ValueError 48 copy(e.v[:], s) 49 return e 50} 51 52func (e ValueError) tag() []byte { 53 n := bytes.IndexByte(e.v[:], 0) 54 if n == -1 { 55 n = 8 56 } 57 return e.v[:n] 58} 59 60// Error implements the error interface. 61func (e ValueError) Error() string { 62 return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag()) 63} 64 65// Subtag returns the subtag for which the error occurred. 66func (e ValueError) Subtag() string { 67 return string(e.tag()) 68} 69 70// scanner is used to scan BCP 47 tokens, which are separated by _ or -. 71type scanner struct { 72 b []byte 73 bytes [max99thPercentileSize]byte 74 token []byte 75 start int // start position of the current token 76 end int // end position of the current token 77 next int // next point for scan 78 err error 79 done bool 80} 81 82func makeScannerString(s string) scanner { 83 scan := scanner{} 84 if len(s) <= len(scan.bytes) { 85 scan.b = scan.bytes[:copy(scan.bytes[:], s)] 86 } else { 87 scan.b = []byte(s) 88 } 89 scan.init() 90 return scan 91} 92 93// makeScanner returns a scanner using b as the input buffer. 94// b is not copied and may be modified by the scanner routines. 95func makeScanner(b []byte) scanner { 96 scan := scanner{b: b} 97 scan.init() 98 return scan 99} 100 101func (s *scanner) init() { 102 for i, c := range s.b { 103 if c == '_' { 104 s.b[i] = '-' 105 } 106 } 107 s.scan() 108} 109 110// restToLower converts the string between start and end to lower case. 111func (s *scanner) toLower(start, end int) { 112 for i := start; i < end; i++ { 113 c := s.b[i] 114 if 'A' <= c && c <= 'Z' { 115 s.b[i] += 'a' - 'A' 116 } 117 } 118} 119 120func (s *scanner) setError(e error) { 121 if s.err == nil || (e == errSyntax && s.err != errSyntax) { 122 s.err = e 123 } 124} 125 126// resizeRange shrinks or grows the array at position oldStart such that 127// a new string of size newSize can fit between oldStart and oldEnd. 128// Sets the scan point to after the resized range. 129func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) { 130 s.start = oldStart 131 if end := oldStart + newSize; end != oldEnd { 132 diff := end - oldEnd 133 if end < cap(s.b) { 134 b := make([]byte, len(s.b)+diff) 135 copy(b, s.b[:oldStart]) 136 copy(b[end:], s.b[oldEnd:]) 137 s.b = b 138 } else { 139 s.b = append(s.b[end:], s.b[oldEnd:]...) 140 } 141 s.next = end + (s.next - s.end) 142 s.end = end 143 } 144} 145 146// replace replaces the current token with repl. 147func (s *scanner) replace(repl string) { 148 s.resizeRange(s.start, s.end, len(repl)) 149 copy(s.b[s.start:], repl) 150} 151 152// gobble removes the current token from the input. 153// Caller must call scan after calling gobble. 154func (s *scanner) gobble(e error) { 155 s.setError(e) 156 if s.start == 0 { 157 s.b = s.b[:+copy(s.b, s.b[s.next:])] 158 s.end = 0 159 } else { 160 s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])] 161 s.end = s.start - 1 162 } 163 s.next = s.start 164} 165 166// deleteRange removes the given range from s.b before the current token. 167func (s *scanner) deleteRange(start, end int) { 168 s.setError(errSyntax) 169 s.b = s.b[:start+copy(s.b[start:], s.b[end:])] 170 diff := end - start 171 s.next -= diff 172 s.start -= diff 173 s.end -= diff 174} 175 176// scan parses the next token of a BCP 47 string. Tokens that are larger 177// than 8 characters or include non-alphanumeric characters result in an error 178// and are gobbled and removed from the output. 179// It returns the end position of the last token consumed. 180func (s *scanner) scan() (end int) { 181 end = s.end 182 s.token = nil 183 for s.start = s.next; s.next < len(s.b); { 184 i := bytes.IndexByte(s.b[s.next:], '-') 185 if i == -1 { 186 s.end = len(s.b) 187 s.next = len(s.b) 188 i = s.end - s.start 189 } else { 190 s.end = s.next + i 191 s.next = s.end + 1 192 } 193 token := s.b[s.start:s.end] 194 if i < 1 || i > 8 || !isAlphaNum(token) { 195 s.gobble(errSyntax) 196 continue 197 } 198 s.token = token 199 return end 200 } 201 if n := len(s.b); n > 0 && s.b[n-1] == '-' { 202 s.setError(errSyntax) 203 s.b = s.b[:len(s.b)-1] 204 } 205 s.done = true 206 return end 207} 208 209// acceptMinSize parses multiple tokens of the given size or greater. 210// It returns the end position of the last token consumed. 211func (s *scanner) acceptMinSize(min int) (end int) { 212 end = s.end 213 s.scan() 214 for ; len(s.token) >= min; s.scan() { 215 end = s.end 216 } 217 return end 218} 219 220// Parse parses the given BCP 47 string and returns a valid Tag. If parsing 221// failed it returns an error and any part of the tag that could be parsed. 222// If parsing succeeded but an unknown value was found, it returns 223// ValueError. The Tag returned in this case is just stripped of the unknown 224// value. All other values are preserved. It accepts tags in the BCP 47 format 225// and extensions to this standard defined in 226// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. 227// The resulting tag is canonicalized using the default canonicalization type. 228func Parse(s string) (t Tag, err error) { 229 return Default.Parse(s) 230} 231 232// Parse parses the given BCP 47 string and returns a valid Tag. If parsing 233// failed it returns an error and any part of the tag that could be parsed. 234// If parsing succeeded but an unknown value was found, it returns 235// ValueError. The Tag returned in this case is just stripped of the unknown 236// value. All other values are preserved. It accepts tags in the BCP 47 format 237// and extensions to this standard defined in 238// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. 239// The resulting tag is canonicalized using the the canonicalization type c. 240func (c CanonType) Parse(s string) (t Tag, err error) { 241 // TODO: consider supporting old-style locale key-value pairs. 242 if s == "" { 243 return und, errSyntax 244 } 245 if len(s) <= maxAltTaglen { 246 b := [maxAltTaglen]byte{} 247 for i, c := range s { 248 // Generating invalid UTF-8 is okay as it won't match. 249 if 'A' <= c && c <= 'Z' { 250 c += 'a' - 'A' 251 } else if c == '_' { 252 c = '-' 253 } 254 b[i] = byte(c) 255 } 256 if t, ok := grandfathered(b); ok { 257 return t, nil 258 } 259 } 260 scan := makeScannerString(s) 261 t, err = parse(&scan, s) 262 t, changed := t.canonicalize(c) 263 if changed { 264 t.remakeString() 265 } 266 return t, err 267} 268 269func parse(scan *scanner, s string) (t Tag, err error) { 270 t = und 271 var end int 272 if n := len(scan.token); n <= 1 { 273 scan.toLower(0, len(scan.b)) 274 if n == 0 || scan.token[0] != 'x' { 275 return t, errSyntax 276 } 277 end = parseExtensions(scan) 278 } else if n >= 4 { 279 return und, errSyntax 280 } else { // the usual case 281 t, end = parseTag(scan) 282 if n := len(scan.token); n == 1 { 283 t.pExt = uint16(end) 284 end = parseExtensions(scan) 285 } else if end < len(scan.b) { 286 scan.setError(errSyntax) 287 scan.b = scan.b[:end] 288 } 289 } 290 if int(t.pVariant) < len(scan.b) { 291 if end < len(s) { 292 s = s[:end] 293 } 294 if len(s) > 0 && tag.Compare(s, scan.b) == 0 { 295 t.str = s 296 } else { 297 t.str = string(scan.b) 298 } 299 } else { 300 t.pVariant, t.pExt = 0, 0 301 } 302 return t, scan.err 303} 304 305// parseTag parses language, script, region and variants. 306// It returns a Tag and the end position in the input that was parsed. 307func parseTag(scan *scanner) (t Tag, end int) { 308 var e error 309 // TODO: set an error if an unknown lang, script or region is encountered. 310 t.lang, e = getLangID(scan.token) 311 scan.setError(e) 312 scan.replace(t.lang.String()) 313 langStart := scan.start 314 end = scan.scan() 315 for len(scan.token) == 3 && isAlpha(scan.token[0]) { 316 // From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent 317 // to a tag of the form <extlang>. 318 lang, e := getLangID(scan.token) 319 if lang != 0 { 320 t.lang = lang 321 copy(scan.b[langStart:], lang.String()) 322 scan.b[langStart+3] = '-' 323 scan.start = langStart + 4 324 } 325 scan.gobble(e) 326 end = scan.scan() 327 } 328 if len(scan.token) == 4 && isAlpha(scan.token[0]) { 329 t.script, e = getScriptID(script, scan.token) 330 if t.script == 0 { 331 scan.gobble(e) 332 } 333 end = scan.scan() 334 } 335 if n := len(scan.token); n >= 2 && n <= 3 { 336 t.region, e = getRegionID(scan.token) 337 if t.region == 0 { 338 scan.gobble(e) 339 } else { 340 scan.replace(t.region.String()) 341 } 342 end = scan.scan() 343 } 344 scan.toLower(scan.start, len(scan.b)) 345 t.pVariant = byte(end) 346 end = parseVariants(scan, end, t) 347 t.pExt = uint16(end) 348 return t, end 349} 350 351var separator = []byte{'-'} 352 353// parseVariants scans tokens as long as each token is a valid variant string. 354// Duplicate variants are removed. 355func parseVariants(scan *scanner, end int, t Tag) int { 356 start := scan.start 357 varIDBuf := [4]uint8{} 358 variantBuf := [4][]byte{} 359 varID := varIDBuf[:0] 360 variant := variantBuf[:0] 361 last := -1 362 needSort := false 363 for ; len(scan.token) >= 4; scan.scan() { 364 // TODO: measure the impact of needing this conversion and redesign 365 // the data structure if there is an issue. 366 v, ok := variantIndex[string(scan.token)] 367 if !ok { 368 // unknown variant 369 // TODO: allow user-defined variants? 370 scan.gobble(mkErrInvalid(scan.token)) 371 continue 372 } 373 varID = append(varID, v) 374 variant = append(variant, scan.token) 375 if !needSort { 376 if last < int(v) { 377 last = int(v) 378 } else { 379 needSort = true 380 // There is no legal combinations of more than 7 variants 381 // (and this is by no means a useful sequence). 382 const maxVariants = 8 383 if len(varID) > maxVariants { 384 break 385 } 386 } 387 } 388 end = scan.end 389 } 390 if needSort { 391 sort.Sort(variantsSort{varID, variant}) 392 k, l := 0, -1 393 for i, v := range varID { 394 w := int(v) 395 if l == w { 396 // Remove duplicates. 397 continue 398 } 399 varID[k] = varID[i] 400 variant[k] = variant[i] 401 k++ 402 l = w 403 } 404 if str := bytes.Join(variant[:k], separator); len(str) == 0 { 405 end = start - 1 406 } else { 407 scan.resizeRange(start, end, len(str)) 408 copy(scan.b[scan.start:], str) 409 end = scan.end 410 } 411 } 412 return end 413} 414 415type variantsSort struct { 416 i []uint8 417 v [][]byte 418} 419 420func (s variantsSort) Len() int { 421 return len(s.i) 422} 423 424func (s variantsSort) Swap(i, j int) { 425 s.i[i], s.i[j] = s.i[j], s.i[i] 426 s.v[i], s.v[j] = s.v[j], s.v[i] 427} 428 429func (s variantsSort) Less(i, j int) bool { 430 return s.i[i] < s.i[j] 431} 432 433type bytesSort [][]byte 434 435func (b bytesSort) Len() int { 436 return len(b) 437} 438 439func (b bytesSort) Swap(i, j int) { 440 b[i], b[j] = b[j], b[i] 441} 442 443func (b bytesSort) Less(i, j int) bool { 444 return bytes.Compare(b[i], b[j]) == -1 445} 446 447// parseExtensions parses and normalizes the extensions in the buffer. 448// It returns the last position of scan.b that is part of any extension. 449// It also trims scan.b to remove excess parts accordingly. 450func parseExtensions(scan *scanner) int { 451 start := scan.start 452 exts := [][]byte{} 453 private := []byte{} 454 end := scan.end 455 for len(scan.token) == 1 { 456 extStart := scan.start 457 ext := scan.token[0] 458 end = parseExtension(scan) 459 extension := scan.b[extStart:end] 460 if len(extension) < 3 || (ext != 'x' && len(extension) < 4) { 461 scan.setError(errSyntax) 462 end = extStart 463 continue 464 } else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) { 465 scan.b = scan.b[:end] 466 return end 467 } else if ext == 'x' { 468 private = extension 469 break 470 } 471 exts = append(exts, extension) 472 } 473 sort.Sort(bytesSort(exts)) 474 if len(private) > 0 { 475 exts = append(exts, private) 476 } 477 scan.b = scan.b[:start] 478 if len(exts) > 0 { 479 scan.b = append(scan.b, bytes.Join(exts, separator)...) 480 } else if start > 0 { 481 // Strip trailing '-'. 482 scan.b = scan.b[:start-1] 483 } 484 return end 485} 486 487// parseExtension parses a single extension and returns the position of 488// the extension end. 489func parseExtension(scan *scanner) int { 490 start, end := scan.start, scan.end 491 switch scan.token[0] { 492 case 'u': 493 attrStart := end 494 scan.scan() 495 for last := []byte{}; len(scan.token) > 2; scan.scan() { 496 if bytes.Compare(scan.token, last) != -1 { 497 // Attributes are unsorted. Start over from scratch. 498 p := attrStart + 1 499 scan.next = p 500 attrs := [][]byte{} 501 for scan.scan(); len(scan.token) > 2; scan.scan() { 502 attrs = append(attrs, scan.token) 503 end = scan.end 504 } 505 sort.Sort(bytesSort(attrs)) 506 copy(scan.b[p:], bytes.Join(attrs, separator)) 507 break 508 } 509 last = scan.token 510 end = scan.end 511 } 512 var last, key []byte 513 for attrEnd := end; len(scan.token) == 2; last = key { 514 key = scan.token 515 keyEnd := scan.end 516 end = scan.acceptMinSize(3) 517 // TODO: check key value validity 518 if keyEnd == end || bytes.Compare(key, last) != 1 { 519 // We have an invalid key or the keys are not sorted. 520 // Start scanning keys from scratch and reorder. 521 p := attrEnd + 1 522 scan.next = p 523 keys := [][]byte{} 524 for scan.scan(); len(scan.token) == 2; { 525 keyStart, keyEnd := scan.start, scan.end 526 end = scan.acceptMinSize(3) 527 if keyEnd != end { 528 keys = append(keys, scan.b[keyStart:end]) 529 } else { 530 scan.setError(errSyntax) 531 end = keyStart 532 } 533 } 534 sort.Sort(bytesSort(keys)) 535 reordered := bytes.Join(keys, separator) 536 if e := p + len(reordered); e < end { 537 scan.deleteRange(e, end) 538 end = e 539 } 540 copy(scan.b[p:], bytes.Join(keys, separator)) 541 break 542 } 543 } 544 case 't': 545 scan.scan() 546 if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { 547 _, end = parseTag(scan) 548 scan.toLower(start, end) 549 } 550 for len(scan.token) == 2 && !isAlpha(scan.token[1]) { 551 end = scan.acceptMinSize(3) 552 } 553 case 'x': 554 end = scan.acceptMinSize(1) 555 default: 556 end = scan.acceptMinSize(2) 557 } 558 return end 559} 560 561// Compose creates a Tag from individual parts, which may be of type Tag, Base, 562// Script, Region, Variant, []Variant, Extension, []Extension or error. If a 563// Base, Script or Region or slice of type Variant or Extension is passed more 564// than once, the latter will overwrite the former. Variants and Extensions are 565// accumulated, but if two extensions of the same type are passed, the latter 566// will replace the former. A Tag overwrites all former values and typically 567// only makes sense as the first argument. The resulting tag is returned after 568// canonicalizing using the Default CanonType. If one or more errors are 569// encountered, one of the errors is returned. 570func Compose(part ...interface{}) (t Tag, err error) { 571 return Default.Compose(part...) 572} 573 574// Compose creates a Tag from individual parts, which may be of type Tag, Base, 575// Script, Region, Variant, []Variant, Extension, []Extension or error. If a 576// Base, Script or Region or slice of type Variant or Extension is passed more 577// than once, the latter will overwrite the former. Variants and Extensions are 578// accumulated, but if two extensions of the same type are passed, the latter 579// will replace the former. A Tag overwrites all former values and typically 580// only makes sense as the first argument. The resulting tag is returned after 581// canonicalizing using CanonType c. If one or more errors are encountered, 582// one of the errors is returned. 583func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { 584 var b builder 585 if err = b.update(part...); err != nil { 586 return und, err 587 } 588 t, _ = b.tag.canonicalize(c) 589 590 if len(b.ext) > 0 || len(b.variant) > 0 { 591 sort.Sort(sortVariant(b.variant)) 592 sort.Strings(b.ext) 593 if b.private != "" { 594 b.ext = append(b.ext, b.private) 595 } 596 n := maxCoreSize + tokenLen(b.variant...) + tokenLen(b.ext...) 597 buf := make([]byte, n) 598 p := t.genCoreBytes(buf) 599 t.pVariant = byte(p) 600 p += appendTokens(buf[p:], b.variant...) 601 t.pExt = uint16(p) 602 p += appendTokens(buf[p:], b.ext...) 603 t.str = string(buf[:p]) 604 } else if b.private != "" { 605 t.str = b.private 606 t.remakeString() 607 } 608 return 609} 610 611type builder struct { 612 tag Tag 613 614 private string // the x extension 615 ext []string 616 variant []string 617 618 err error 619} 620 621func (b *builder) addExt(e string) { 622 if e == "" { 623 } else if e[0] == 'x' { 624 b.private = e 625 } else { 626 b.ext = append(b.ext, e) 627 } 628} 629 630var errInvalidArgument = errors.New("invalid Extension or Variant") 631 632func (b *builder) update(part ...interface{}) (err error) { 633 replace := func(l *[]string, s string, eq func(a, b string) bool) bool { 634 if s == "" { 635 b.err = errInvalidArgument 636 return true 637 } 638 for i, v := range *l { 639 if eq(v, s) { 640 (*l)[i] = s 641 return true 642 } 643 } 644 return false 645 } 646 for _, x := range part { 647 switch v := x.(type) { 648 case Tag: 649 b.tag.lang = v.lang 650 b.tag.region = v.region 651 b.tag.script = v.script 652 if v.str != "" { 653 b.variant = nil 654 for x, s := "", v.str[v.pVariant:v.pExt]; s != ""; { 655 x, s = nextToken(s) 656 b.variant = append(b.variant, x) 657 } 658 b.ext, b.private = nil, "" 659 for i, e := int(v.pExt), ""; i < len(v.str); { 660 i, e = getExtension(v.str, i) 661 b.addExt(e) 662 } 663 } 664 case Base: 665 b.tag.lang = v.langID 666 case Script: 667 b.tag.script = v.scriptID 668 case Region: 669 b.tag.region = v.regionID 670 case Variant: 671 if !replace(&b.variant, v.variant, func(a, b string) bool { return a == b }) { 672 b.variant = append(b.variant, v.variant) 673 } 674 case Extension: 675 if !replace(&b.ext, v.s, func(a, b string) bool { return a[0] == b[0] }) { 676 b.addExt(v.s) 677 } 678 case []Variant: 679 b.variant = nil 680 for _, x := range v { 681 b.update(x) 682 } 683 case []Extension: 684 b.ext, b.private = nil, "" 685 for _, e := range v { 686 b.update(e) 687 } 688 // TODO: support parsing of raw strings based on morphology or just extensions? 689 case error: 690 err = v 691 } 692 } 693 return 694} 695 696func tokenLen(token ...string) (n int) { 697 for _, t := range token { 698 n += len(t) + 1 699 } 700 return 701} 702 703func appendTokens(b []byte, token ...string) int { 704 p := 0 705 for _, t := range token { 706 b[p] = '-' 707 copy(b[p+1:], t) 708 p += 1 + len(t) 709 } 710 return p 711} 712 713type sortVariant []string 714 715func (s sortVariant) Len() int { 716 return len(s) 717} 718 719func (s sortVariant) Swap(i, j int) { 720 s[j], s[i] = s[i], s[j] 721} 722 723func (s sortVariant) Less(i, j int) bool { 724 return variantIndex[s[i]] < variantIndex[s[j]] 725} 726 727func findExt(list []string, x byte) int { 728 for i, e := range list { 729 if e[0] == x { 730 return i 731 } 732 } 733 return -1 734} 735 736// getExtension returns the name, body and end position of the extension. 737func getExtension(s string, p int) (end int, ext string) { 738 if s[p] == '-' { 739 p++ 740 } 741 if s[p] == 'x' { 742 return len(s), s[p:] 743 } 744 end = nextExtension(s, p) 745 return end, s[p:end] 746} 747 748// nextExtension finds the next extension within the string, searching 749// for the -<char>- pattern from position p. 750// In the fast majority of cases, language tags will have at most 751// one extension and extensions tend to be small. 752func nextExtension(s string, p int) int { 753 for n := len(s) - 3; p < n; { 754 if s[p] == '-' { 755 if s[p+2] == '-' { 756 return p 757 } 758 p += 3 759 } else { 760 p++ 761 } 762 } 763 return len(s) 764} 765 766var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") 767 768// ParseAcceptLanguage parses the contents of an Accept-Language header as 769// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and 770// a list of corresponding quality weights. It is more permissive than RFC 2616 771// and may return non-nil slices even if the input is not valid. 772// The Tags will be sorted by highest weight first and then by first occurrence. 773// Tags with a weight of zero will be dropped. An error will be returned if the 774// input could not be parsed. 775func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { 776 var entry string 777 for s != "" { 778 if entry, s = split(s, ','); entry == "" { 779 continue 780 } 781 782 entry, weight := split(entry, ';') 783 784 // Scan the language. 785 t, err := Parse(entry) 786 if err != nil { 787 id, ok := acceptFallback[entry] 788 if !ok { 789 return nil, nil, err 790 } 791 t = Tag{lang: id} 792 } 793 794 // Scan the optional weight. 795 w := 1.0 796 if weight != "" { 797 weight = consume(weight, 'q') 798 weight = consume(weight, '=') 799 // consume returns the empty string when a token could not be 800 // consumed, resulting in an error for ParseFloat. 801 if w, err = strconv.ParseFloat(weight, 32); err != nil { 802 return nil, nil, errInvalidWeight 803 } 804 // Drop tags with a quality weight of 0. 805 if w <= 0 { 806 continue 807 } 808 } 809 810 tag = append(tag, t) 811 q = append(q, float32(w)) 812 } 813 sortStable(&tagSort{tag, q}) 814 return tag, q, nil 815} 816 817// consume removes a leading token c from s and returns the result or the empty 818// string if there is no such token. 819func consume(s string, c byte) string { 820 if s == "" || s[0] != c { 821 return "" 822 } 823 return strings.TrimSpace(s[1:]) 824} 825 826func split(s string, c byte) (head, tail string) { 827 if i := strings.IndexByte(s, c); i >= 0 { 828 return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]) 829 } 830 return strings.TrimSpace(s), "" 831} 832 833// Add hack mapping to deal with a small number of cases that that occur 834// in Accept-Language (with reasonable frequency). 835var acceptFallback = map[string]langID{ 836 "english": _en, 837 "deutsch": _de, 838 "italian": _it, 839 "french": _fr, 840 "*": _mul, // defined in the spec to match all languages. 841} 842 843type tagSort struct { 844 tag []Tag 845 q []float32 846} 847 848func (s *tagSort) Len() int { 849 return len(s.q) 850} 851 852func (s *tagSort) Less(i, j int) bool { 853 return s.q[i] > s.q[j] 854} 855 856func (s *tagSort) Swap(i, j int) { 857 s.tag[i], s.tag[j] = s.tag[j], s.tag[i] 858 s.q[i], s.q[j] = s.q[j], s.q[i] 859} 860