language/display/lookup.go

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package display

// This file contains common lookup code that is shared between the various
// implementations of Namer and Dictionaries.

import (
	"fmt"
	"sort"
	"strings"

	"golang.org/x/text/language"
)

type namer interface {
	// name gets the string for the given index. It should walk the
	// inheritance chain if a value is not present in the base index.
	name(idx int) string
}

func nameLanguage(n namer, x interface{}) string {
	t, _ := language.All.Compose(x)
	for {
		i, _, _ := langTagSet.index(t.Raw())
		if s := n.name(i); s != "" {
			return s
		}
		if t = t.Parent(); t == language.Und {
			return ""
		}
	}
}

func nameScript(n namer, x interface{}) string {
	t, _ := language.DeprecatedScript.Compose(x)
	_, s, _ := t.Raw()
	return n.name(scriptIndex.index(s.String()))
}

func nameRegion(n namer, x interface{}) string {
	t, _ := language.DeprecatedRegion.Compose(x)
	_, _, r := t.Raw()
	return n.name(regionIndex.index(r.String()))
}

func nameTag(langN, scrN, regN namer, x interface{}) string {
	t, ok := x.(language.Tag)
	if !ok {
		return ""
	}
	const form = language.All &^ language.SuppressScript
	if c, err := form.Canonicalize(t); err == nil {
		t = c
	}
	_, sRaw, rRaw := t.Raw()
	i, scr, reg := langTagSet.index(t.Raw())
	for i != -1 {
		if str := langN.name(i); str != "" {
			if hasS, hasR := (scr != language.Script{}), (reg != language.Region{}); hasS || hasR {
				ss, sr := "", ""
				if hasS {
					ss = scrN.name(scriptIndex.index(scr.String()))
				}
				if hasR {
					sr = regN.name(regionIndex.index(reg.String()))
				}
				// TODO: use patterns in CLDR or at least confirm they are the
				// same for all languages.
				if ss != "" && sr != "" {
					return fmt.Sprintf("%s (%s, %s)", str, ss, sr)
				}
				if ss != "" || sr != "" {
					return fmt.Sprintf("%s (%s%s)", str, ss, sr)
				}
			}
			return str
		}
		scr, reg = sRaw, rRaw
		if t = t.Parent(); t == language.Und {
			return ""
		}
		i, _, _ = langTagSet.index(t.Raw())
	}
	return ""
}

// header contains the data and indexes for a single namer.
// data contains a series of strings concatenated into one. index contains the
// offsets for a string in data. For example, consider a header that defines
// strings for the languages de, el, en, fi, and nl:
//
// 		header{
// 			data: "GermanGreekEnglishDutch",
//  		index: []uint16{ 0, 6, 11, 18, 18, 23 },
// 		}
//
// For a language with index i, the string is defined by
// data[index[i]:index[i+1]]. So the number of elements in index is always one
// greater than the number of languages for which header defines a value.
// A string for a language may be empty, which means the name is undefined. In
// the above example, the name for fi (Finnish) is undefined.
type header struct {
	data  string
	index []uint16
}

// name looks up the name for a tag in the dictionary, given its index.
func (h *header) name(i int) string {
	if 0 <= i && i < len(h.index)-1 {
		return h.data[h.index[i]:h.index[i+1]]
	}
	return ""
}

// tagSet is used to find the index of a language in a set of tags.
type tagSet struct {
	single tagIndex
	long   []string
}

var (
	langTagSet = tagSet{
		single: langIndex,
		long:   langTagsLong,
	}

	// selfTagSet is used for indexing the language strings in their own
	// language.
	selfTagSet = tagSet{
		single: selfIndex,
		long:   selfTagsLong,
	}

	zzzz = language.MustParseScript("Zzzz")
	zz   = language.MustParseRegion("ZZ")
)

// index returns the index of the tag for the given base, script and region or
// its parent if the tag is not available. If the match is for a parent entry,
// the excess script and region are returned.
func (ts *tagSet) index(base language.Base, scr language.Script, reg language.Region) (int, language.Script, language.Region) {
	lang := base.String()
	index := -1
	if (scr != language.Script{} || reg != language.Region{}) {
		if scr == zzzz {
			scr = language.Script{}
		}
		if reg == zz {
			reg = language.Region{}
		}

		i := sort.SearchStrings(ts.long, lang)
		// All entries have either a script or a region and not both.
		scrStr, regStr := scr.String(), reg.String()
		for ; i < len(ts.long) && strings.HasPrefix(ts.long[i], lang); i++ {
			if s := ts.long[i][len(lang)+1:]; s == scrStr {
				scr = language.Script{}
				index = i + ts.single.len()
				break
			} else if s == regStr {
				reg = language.Region{}
				index = i + ts.single.len()
				break
			}
		}
	}
	if index == -1 {
		index = ts.single.index(lang)
	}
	return index, scr, reg
}

func (ts *tagSet) Tags() []language.Tag {
	tags := make([]language.Tag, 0, ts.single.len()+len(ts.long))
	ts.single.keys(func(s string) {
		tags = append(tags, language.Raw.MustParse(s))
	})
	for _, s := range ts.long {
		tags = append(tags, language.Raw.MustParse(s))
	}
	return tags
}

func supportedScripts() []language.Script {
	scr := make([]language.Script, 0, scriptIndex.len())
	scriptIndex.keys(func(s string) {
		scr = append(scr, language.MustParseScript(s))
	})
	return scr
}

func supportedRegions() []language.Region {
	reg := make([]language.Region, 0, regionIndex.len())
	regionIndex.keys(func(s string) {
		reg = append(reg, language.MustParseRegion(s))
	})
	return reg
}

// tagIndex holds a concatenated lists of subtags of length 2 to 4, one string
// for each length, which can be used in combination with binary search to get
// the index associated with a tag.
// For example, a tagIndex{
//   "arenesfrruzh",  // 6 2-byte tags.
//   "barwae",        // 2 3-byte tags.
//   "",
// }
// would mean that the 2-byte tag "fr" had an index of 3, and the 3-byte tag
// "wae" had an index of 7.
type tagIndex [3]string

func (t *tagIndex) index(s string) int {
	sz := len(s)
	if sz < 2 || 4 < sz {
		return -1
	}
	a := t[sz-2]
	index := sort.Search(len(a)/sz, func(i int) bool {
		p := i * sz
		return a[p:p+sz] >= s
	})
	p := index * sz
	if end := p + sz; end > len(a) || a[p:end] != s {
		return -1
	}
	// Add the number of tags for smaller sizes.
	for i := 0; i < sz-2; i++ {
		index += len(t[i]) / (i + 2)
	}
	return index
}

// len returns the number of tags that are contained in the tagIndex.
func (t *tagIndex) len() (n int) {
	for i, s := range t {
		n += len(s) / (i + 2)
	}
	return n
}

// keys calls f for each tag.
func (t *tagIndex) keys(f func(key string)) {
	for i, s := range *t {
		for ; s != ""; s = s[i+2:] {
			f(s[:i+2])
		}
	}
}