1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:generate go run gen.go gen_index.go -output tables.go 6//go:generate go run gen_parents.go 7 8package compact 9 10// TODO: Remove above NOTE after: 11// - verifying that tables are dropped correctly (most notably matcher tables). 12 13import ( 14 "strings" 15 16 "golang.org/x/text/internal/language" 17) 18 19// Tag represents a BCP 47 language tag. It is used to specify an instance of a 20// specific language or locale. All language tag values are guaranteed to be 21// well-formed. 22type Tag struct { 23 // NOTE: exported tags will become part of the public API. 24 language ID 25 locale ID 26 full fullTag // always a language.Tag for now. 27} 28 29const _und = 0 30 31type fullTag interface { 32 IsRoot() bool 33 Parent() language.Tag 34} 35 36// Make a compact Tag from a fully specified internal language Tag. 37func Make(t language.Tag) (tag Tag) { 38 if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" { 39 if r, err := language.ParseRegion(region[:2]); err == nil { 40 tFull := t 41 t, _ = t.SetTypeForKey("rg", "") 42 // TODO: should we not consider "va" for the language tag? 43 var exact1, exact2 bool 44 tag.language, exact1 = FromTag(t) 45 t.RegionID = r 46 tag.locale, exact2 = FromTag(t) 47 if !exact1 || !exact2 { 48 tag.full = tFull 49 } 50 return tag 51 } 52 } 53 lang, ok := FromTag(t) 54 tag.language = lang 55 tag.locale = lang 56 if !ok { 57 tag.full = t 58 } 59 return tag 60} 61 62// Tag returns an internal language Tag version of this tag. 63func (t Tag) Tag() language.Tag { 64 if t.full != nil { 65 return t.full.(language.Tag) 66 } 67 tag := t.language.Tag() 68 if t.language != t.locale { 69 loc := t.locale.Tag() 70 tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz") 71 } 72 return tag 73} 74 75// IsCompact reports whether this tag is fully defined in terms of ID. 76func (t *Tag) IsCompact() bool { 77 return t.full == nil 78} 79 80// MayHaveVariants reports whether a tag may have variants. If it returns false 81// it is guaranteed the tag does not have variants. 82func (t Tag) MayHaveVariants() bool { 83 return t.full != nil || int(t.language) >= len(coreTags) 84} 85 86// MayHaveExtensions reports whether a tag may have extensions. If it returns 87// false it is guaranteed the tag does not have them. 88func (t Tag) MayHaveExtensions() bool { 89 return t.full != nil || 90 int(t.language) >= len(coreTags) || 91 t.language != t.locale 92} 93 94// IsRoot returns true if t is equal to language "und". 95func (t Tag) IsRoot() bool { 96 if t.full != nil { 97 return t.full.IsRoot() 98 } 99 return t.language == _und 100} 101 102// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a 103// specific language are substituted with fields from the parent language. 104// The parent for a language may change for newer versions of CLDR. 105func (t Tag) Parent() Tag { 106 if t.full != nil { 107 return Make(t.full.Parent()) 108 } 109 if t.language != t.locale { 110 // Simulate stripping -u-rg-xxxxxx 111 return Tag{language: t.language, locale: t.language} 112 } 113 // TODO: use parent lookup table once cycle from internal package is 114 // removed. Probably by internalizing the table and declaring this fast 115 // enough. 116 // lang := compactID(internal.Parent(uint16(t.language))) 117 lang, _ := FromTag(t.language.Tag().Parent()) 118 return Tag{language: lang, locale: lang} 119} 120 121// returns token t and the rest of the string. 122func nextToken(s string) (t, tail string) { 123 p := strings.Index(s[1:], "-") 124 if p == -1 { 125 return s[1:], "" 126 } 127 p++ 128 return s[1:p], s[p:] 129} 130 131// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags 132// for which data exists in the text repository.The index will change over time 133// and should not be stored in persistent storage. If t does not match a compact 134// index, exact will be false and the compact index will be returned for the 135// first match after repeatedly taking the Parent of t. 136func LanguageID(t Tag) (id ID, exact bool) { 137 return t.language, t.full == nil 138} 139 140// RegionalID returns the ID for the regional variant of this tag. This index is 141// used to indicate region-specific overrides, such as default currency, default 142// calendar and week data, default time cycle, and default measurement system 143// and unit preferences. 144// 145// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US 146// settings for currency, number formatting, etc. The CompactIndex for this tag 147// will be that for en-GB, while the RegionalID will be the one corresponding to 148// en-US. 149func RegionalID(t Tag) (id ID, exact bool) { 150 return t.locale, t.full == nil 151} 152 153// LanguageTag returns t stripped of regional variant indicators. 154// 155// At the moment this means it is stripped of a regional and variant subtag "rg" 156// and "va" in the "u" extension. 157func (t Tag) LanguageTag() Tag { 158 if t.full == nil { 159 return Tag{language: t.language, locale: t.language} 160 } 161 tt := t.Tag() 162 tt.SetTypeForKey("rg", "") 163 tt.SetTypeForKey("va", "") 164 return Make(tt) 165} 166 167// RegionalTag returns the regional variant of the tag. 168// 169// At the moment this means that the region is set from the regional subtag 170// "rg" in the "u" extension. 171func (t Tag) RegionalTag() Tag { 172 rt := Tag{language: t.locale, locale: t.locale} 173 if t.full == nil { 174 return rt 175 } 176 b := language.Builder{} 177 tag := t.Tag() 178 // tag, _ = tag.SetTypeForKey("rg", "") 179 b.SetTag(t.locale.Tag()) 180 if v := tag.Variants(); v != "" { 181 for _, v := range strings.Split(v, "-") { 182 b.AddVariant(v) 183 } 184 } 185 for _, e := range tag.Extensions() { 186 b.AddExt(e) 187 } 188 return t 189} 190 191// FromTag reports closest matching ID for an internal language Tag. 192func FromTag(t language.Tag) (id ID, exact bool) { 193 // TODO: perhaps give more frequent tags a lower index. 194 // TODO: we could make the indexes stable. This will excluded some 195 // possibilities for optimization, so don't do this quite yet. 196 exact = true 197 198 b, s, r := t.Raw() 199 if t.HasString() { 200 if t.IsPrivateUse() { 201 // We have no entries for user-defined tags. 202 return 0, false 203 } 204 hasExtra := false 205 if t.HasVariants() { 206 if t.HasExtensions() { 207 build := language.Builder{} 208 build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r}) 209 build.AddVariant(t.Variants()) 210 exact = false 211 t = build.Make() 212 } 213 hasExtra = true 214 } else if _, ok := t.Extension('u'); ok { 215 // TODO: va may mean something else. Consider not considering it. 216 // Strip all but the 'va' entry. 217 old := t 218 variant := t.TypeForKey("va") 219 t = language.Tag{LangID: b, ScriptID: s, RegionID: r} 220 if variant != "" { 221 t, _ = t.SetTypeForKey("va", variant) 222 hasExtra = true 223 } 224 exact = old == t 225 } else { 226 exact = false 227 } 228 if hasExtra { 229 // We have some variants. 230 for i, s := range specialTags { 231 if s == t { 232 return ID(i + len(coreTags)), exact 233 } 234 } 235 exact = false 236 } 237 } 238 if x, ok := getCoreIndex(t); ok { 239 return x, exact 240 } 241 exact = false 242 if r != 0 && s == 0 { 243 // Deal with cases where an extra script is inserted for the region. 244 t, _ := t.Maximize() 245 if x, ok := getCoreIndex(t); ok { 246 return x, exact 247 } 248 } 249 for t = t.Parent(); t != root; t = t.Parent() { 250 // No variants specified: just compare core components. 251 // The key has the form lllssrrr, where l, s, and r are nibbles for 252 // respectively the langID, scriptID, and regionID. 253 if x, ok := getCoreIndex(t); ok { 254 return x, exact 255 } 256 } 257 return 0, exact 258} 259 260var root = language.Tag{} 261