1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package language 6 7import ( 8 "errors" 9 "strconv" 10 "strings" 11 12 "golang.org/x/text/internal/language" 13) 14 15// ValueError is returned by any of the parsing functions when the 16// input is well-formed but the respective subtag is not recognized 17// as a valid value. 18type ValueError interface { 19 error 20 21 // Subtag returns the subtag for which the error occurred. 22 Subtag() string 23} 24 25// Parse parses the given BCP 47 string and returns a valid Tag. If parsing 26// failed it returns an error and any part of the tag that could be parsed. 27// If parsing succeeded but an unknown value was found, it returns 28// ValueError. The Tag returned in this case is just stripped of the unknown 29// value. All other values are preserved. It accepts tags in the BCP 47 format 30// and extensions to this standard defined in 31// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. 32// The resulting tag is canonicalized using the default canonicalization type. 33func Parse(s string) (t Tag, err error) { 34 return Default.Parse(s) 35} 36 37// Parse parses the given BCP 47 string and returns a valid Tag. If parsing 38// failed it returns an error and any part of the tag that could be parsed. 39// If parsing succeeded but an unknown value was found, it returns 40// ValueError. The Tag returned in this case is just stripped of the unknown 41// value. All other values are preserved. It accepts tags in the BCP 47 format 42// and extensions to this standard defined in 43// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers. 44// The resulting tag is canonicalized using the canonicalization type c. 45func (c CanonType) Parse(s string) (t Tag, err error) { 46 tt, err := language.Parse(s) 47 if err != nil { 48 return makeTag(tt), err 49 } 50 tt, changed := canonicalize(c, tt) 51 if changed { 52 tt.RemakeString() 53 } 54 return makeTag(tt), err 55} 56 57// Compose creates a Tag from individual parts, which may be of type Tag, Base, 58// Script, Region, Variant, []Variant, Extension, []Extension or error. If a 59// Base, Script or Region or slice of type Variant or Extension is passed more 60// than once, the latter will overwrite the former. Variants and Extensions are 61// accumulated, but if two extensions of the same type are passed, the latter 62// will replace the former. For -u extensions, though, the key-type pairs are 63// added, where later values overwrite older ones. A Tag overwrites all former 64// values and typically only makes sense as the first argument. The resulting 65// tag is returned after canonicalizing using the Default CanonType. If one or 66// more errors are encountered, one of the errors is returned. 67func Compose(part ...interface{}) (t Tag, err error) { 68 return Default.Compose(part...) 69} 70 71// Compose creates a Tag from individual parts, which may be of type Tag, Base, 72// Script, Region, Variant, []Variant, Extension, []Extension or error. If a 73// Base, Script or Region or slice of type Variant or Extension is passed more 74// than once, the latter will overwrite the former. Variants and Extensions are 75// accumulated, but if two extensions of the same type are passed, the latter 76// will replace the former. For -u extensions, though, the key-type pairs are 77// added, where later values overwrite older ones. A Tag overwrites all former 78// values and typically only makes sense as the first argument. The resulting 79// tag is returned after canonicalizing using CanonType c. If one or more errors 80// are encountered, one of the errors is returned. 81func (c CanonType) Compose(part ...interface{}) (t Tag, err error) { 82 var b language.Builder 83 if err = update(&b, part...); err != nil { 84 return und, err 85 } 86 b.Tag, _ = canonicalize(c, b.Tag) 87 return makeTag(b.Make()), err 88} 89 90var errInvalidArgument = errors.New("invalid Extension or Variant") 91 92func update(b *language.Builder, part ...interface{}) (err error) { 93 for _, x := range part { 94 switch v := x.(type) { 95 case Tag: 96 b.SetTag(v.tag()) 97 case Base: 98 b.Tag.LangID = v.langID 99 case Script: 100 b.Tag.ScriptID = v.scriptID 101 case Region: 102 b.Tag.RegionID = v.regionID 103 case Variant: 104 if v.variant == "" { 105 err = errInvalidArgument 106 break 107 } 108 b.AddVariant(v.variant) 109 case Extension: 110 if v.s == "" { 111 err = errInvalidArgument 112 break 113 } 114 b.SetExt(v.s) 115 case []Variant: 116 b.ClearVariants() 117 for _, v := range v { 118 b.AddVariant(v.variant) 119 } 120 case []Extension: 121 b.ClearExtensions() 122 for _, e := range v { 123 b.SetExt(e.s) 124 } 125 // TODO: support parsing of raw strings based on morphology or just extensions? 126 case error: 127 if v != nil { 128 err = v 129 } 130 } 131 } 132 return 133} 134 135var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") 136 137// ParseAcceptLanguage parses the contents of an Accept-Language header as 138// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and 139// a list of corresponding quality weights. It is more permissive than RFC 2616 140// and may return non-nil slices even if the input is not valid. 141// The Tags will be sorted by highest weight first and then by first occurrence. 142// Tags with a weight of zero will be dropped. An error will be returned if the 143// input could not be parsed. 144func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) { 145 var entry string 146 for s != "" { 147 if entry, s = split(s, ','); entry == "" { 148 continue 149 } 150 151 entry, weight := split(entry, ';') 152 153 // Scan the language. 154 t, err := Parse(entry) 155 if err != nil { 156 id, ok := acceptFallback[entry] 157 if !ok { 158 return nil, nil, err 159 } 160 t = makeTag(language.Tag{LangID: id}) 161 } 162 163 // Scan the optional weight. 164 w := 1.0 165 if weight != "" { 166 weight = consume(weight, 'q') 167 weight = consume(weight, '=') 168 // consume returns the empty string when a token could not be 169 // consumed, resulting in an error for ParseFloat. 170 if w, err = strconv.ParseFloat(weight, 32); err != nil { 171 return nil, nil, errInvalidWeight 172 } 173 // Drop tags with a quality weight of 0. 174 if w <= 0 { 175 continue 176 } 177 } 178 179 tag = append(tag, t) 180 q = append(q, float32(w)) 181 } 182 sortStable(&tagSort{tag, q}) 183 return tag, q, nil 184} 185 186// consume removes a leading token c from s and returns the result or the empty 187// string if there is no such token. 188func consume(s string, c byte) string { 189 if s == "" || s[0] != c { 190 return "" 191 } 192 return strings.TrimSpace(s[1:]) 193} 194 195func split(s string, c byte) (head, tail string) { 196 if i := strings.IndexByte(s, c); i >= 0 { 197 return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:]) 198 } 199 return strings.TrimSpace(s), "" 200} 201 202// Add hack mapping to deal with a small number of cases that occur 203// in Accept-Language (with reasonable frequency). 204var acceptFallback = map[string]language.Language{ 205 "english": _en, 206 "deutsch": _de, 207 "italian": _it, 208 "french": _fr, 209 "*": _mul, // defined in the spec to match all languages. 210} 211 212type tagSort struct { 213 tag []Tag 214 q []float32 215} 216 217func (s *tagSort) Len() int { 218 return len(s.q) 219} 220 221func (s *tagSort) Less(i, j int) bool { 222 return s.q[i] > s.q[j] 223} 224 225func (s *tagSort) Swap(i, j int) { 226 s.tag[i], s.tag[j] = s.tag[j], s.tag[i] 227 s.q[i], s.q[j] = s.q[j], s.q[i] 228} 229