1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package display
6
7// This file contains common lookup code that is shared between the various
8// implementations of Namer and Dictionaries.
9
10import (
11	"fmt"
12	"sort"
13	"strings"
14
15	"golang.org/x/text/language"
16)
17
18type namer interface {
19	// name gets the string for the given index. It should walk the
20	// inheritance chain if a value is not present in the base index.
21	name(idx int) string
22}
23
24func nameLanguage(n namer, x interface{}) string {
25	t, _ := language.All.Compose(x)
26	for {
27		i, _, _ := langTagSet.index(t.Raw())
28		if s := n.name(i); s != "" {
29			return s
30		}
31		if t = t.Parent(); t == language.Und {
32			return ""
33		}
34	}
35}
36
37func nameScript(n namer, x interface{}) string {
38	t, _ := language.DeprecatedScript.Compose(x)
39	_, s, _ := t.Raw()
40	return n.name(scriptIndex.index(s.String()))
41}
42
43func nameRegion(n namer, x interface{}) string {
44	t, _ := language.DeprecatedRegion.Compose(x)
45	_, _, r := t.Raw()
46	return n.name(regionIndex.index(r.String()))
47}
48
49func nameTag(langN, scrN, regN namer, x interface{}) string {
50	t, ok := x.(language.Tag)
51	if !ok {
52		return ""
53	}
54	const form = language.All &^ language.SuppressScript
55	if c, err := form.Canonicalize(t); err == nil {
56		t = c
57	}
58	_, sRaw, rRaw := t.Raw()
59	i, scr, reg := langTagSet.index(t.Raw())
60	for i != -1 {
61		if str := langN.name(i); str != "" {
62			if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
63				ss, sr := "", ""
64				if hasS {
65					ss = scrN.name(scriptIndex.index(scr.String()))
66				}
67				if hasR {
68					sr = regN.name(regionIndex.index(reg.String()))
69				}
70				// TODO: use patterns in CLDR or at least confirm they are the
71				// same for all languages.
72				if ss != "" && sr != "" {
73					return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
74				}
75				if ss != "" || sr != "" {
76					return fmt.Sprintf("%s (%s%s)", str, ss, sr)
77				}
78			}
79			return str
80		}
81		scr, reg = sRaw, rRaw
82		if t = t.Parent(); t == language.Und {
83			return ""
84		}
85		i, _, _ = langTagSet.index(t.Raw())
86	}
87	return ""
88}
89
90// header contains the data and indexes for a single namer.
91// data contains a series of strings concatenated into one. index contains the
92// offsets for a string in data. For example, consider a header that defines
93// strings for the languages de, el, en, fi, and nl:
94//
95// 		header{
96// 			data: "GermanGreekEnglishDutch",
97//  		index: []uint16{ 0, 6, 11, 18, 18, 23 },
98// 		}
99//
100// For a language with index i, the string is defined by
101// data[index[i]:index[i+1]]. So the number of elements in index is always one
102// greater than the number of languages for which header defines a value.
103// A string for a language may be empty, which means the name is undefined. In
104// the above example, the name for fi (Finnish) is undefined.
105type header struct {
106	data  string
107	index []uint16
108}
109
110// name looks up the name for a tag in the dictionary, given its index.
111func (h *header) name(i int) string {
112	if 0 <= i && i < len(h.index)-1 {
113		return h.data[h.index[i]:h.index[i+1]]
114	}
115	return ""
116}
117
118// tagSet is used to find the index of a language in a set of tags.
119type tagSet struct {
120	single tagIndex
121	long   []string
122}
123
124var (
125	langTagSet = tagSet{
126		single: langIndex,
127		long:   langTagsLong,
128	}
129
130	// selfTagSet is used for indexing the language strings in their own
131	// language.
132	selfTagSet = tagSet{
133		single: selfIndex,
134		long:   selfTagsLong,
135	}
136
137	zzzz = language.MustParseScript("Zzzz")
138	zz   = language.MustParseRegion("ZZ")
139)
140
141// index returns the index of the tag for the given base, script and region or
142// its parent if the tag is not available. If the match is for a parent entry,
143// the excess script and region are returned.
144func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
145	lang := base.String()
146	index := -1
147	if (scr != language.Script{} || reg != language.Region{}) {
148		if scr == zzzz {
149			scr = language.Script{}
150		}
151		if reg == zz {
152			reg = language.Region{}
153		}
154
155		i := sort.SearchStrings(ts.long, lang)
156		// All entries have either a script or a region and not both.
157		scrStr, regStr := scr.String(), reg.String()
158		for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
159			if s := ts.long[i][len(lang)+1:]; s == scrStr {
160				scr = language.Script{}
161				index = i + ts.single.len()
162				break
163			} else if s == regStr {
164				reg = language.Region{}
165				index = i + ts.single.len()
166				break
167			}
168		}
169	}
170	if index == -1 {
171		index = ts.single.index(lang)
172	}
173	return index, scr, reg
174}
175
176func (ts *tagSet) Tags() []language.Tag {
177	tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
178	ts.single.keys(func(s string) {
179		tags = append(tags, language.Raw.MustParse(s))
180	})
181	for _, s := range ts.long {
182		tags = append(tags, language.Raw.MustParse(s))
183	}
184	return tags
185}
186
187func supportedScripts() []language.Script {
188	scr := make([]language.Script, 0, scriptIndex.len())
189	scriptIndex.keys(func(s string) {
190		scr = append(scr, language.MustParseScript(s))
191	})
192	return scr
193}
194
195func supportedRegions() []language.Region {
196	reg := make([]language.Region, 0, regionIndex.len())
197	regionIndex.keys(func(s string) {
198		reg = append(reg, language.MustParseRegion(s))
199	})
200	return reg
201}
202
203// tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
204// for each length, which can be used in combination with binary search to get
205// the index associated with a tag.
206// For example, a tagIndex{
207//   "arenesfrruzh",  // 6 2-byte tags.
208//   "barwae",        // 2 3-byte tags.
209//   "",
210// }
211// would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
212// "wae" had an index of 7.
213type tagIndex [3]string
214
215func (t *tagIndex) index(s string) int {
216	sz := len(s)
217	if sz < 2 || 4 < sz {
218		return -1
219	}
220	a := t[sz-2]
221	index := sort.Search(len(a)/sz, func(i int) bool {
222		p := i * sz
223		return a[p:p+sz] >= s
224	})
225	p := index * sz
226	if end := p + sz; end > len(a) || a[p:end] != s {
227		return -1
228	}
229	// Add the number of tags for smaller sizes.
230	for i := 0; i < sz-2; i++ {
231		index += len(t[i]) / (i + 2)
232	}
233	return index
234}
235
236// len returns the number of tags that are contained in the tagIndex.
237func (t *tagIndex) len() (n int) {
238	for i, s := range t {
239		n += len(s) / (i + 2)
240	}
241	return n
242}
243
244// keys calls f for each tag.
245func (t *tagIndex) keys(f func(key string)) {
246	for i, s := range *t {
247		for ; s != ""; s = s[i+2:] {
248			f(s[:i+2])
249		}
250	}
251}
252