1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package colltab contains functionality related to collation tables.
6// It is only to be used by the collate and search packages.
7package colltab // import "golang.org/x/text/internal/colltab"
8
9import (
10	"sort"
11
12	"golang.org/x/text/language"
13)
14
15// MatchLang finds the index of t in tags, using a matching algorithm used for
16// collation and search. tags[0] must be language.Und, the remaining tags should
17// be sorted alphabetically.
18//
19// Language matching for collation and search is different from the matching
20// defined by language.Matcher: the (inferred) base language must be an exact
21// match for the relevant fields. For example, "gsw" should not match "de".
22// Also the parent relation is different, as a parent may have a different
23// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
24// zh.
25func MatchLang(t language.Tag, tags []language.Tag) int {
26	// Canonicalize the values, including collapsing macro languages.
27	t, _ = language.All.Canonicalize(t)
28
29	base, conf := t.Base()
30	// Estimate the base language, but only use high-confidence values.
31	if conf < language.High {
32		// The root locale supports "search" and "standard". We assume that any
33		// implementation will only use one of both.
34		return 0
35	}
36
37	// Maximize base and script and normalize the tag.
38	if _, s, r := t.Raw(); (r != language.Region{}) {
39		p, _ := language.Raw.Compose(base, s, r)
40		// Taking the parent forces the script to be maximized.
41		p = p.Parent()
42		// Add back region and extensions.
43		t, _ = language.Raw.Compose(p, r, t.Extensions())
44	} else {
45		// Set the maximized base language.
46		t, _ = language.Raw.Compose(base, s, t.Extensions())
47	}
48
49	// Find start index of the language tag.
50	start := 1 + sort.Search(len(tags)-1, func(i int) bool {
51		b, _, _ := tags[i+1].Raw()
52		return base.String() <= b.String()
53	})
54	if start < len(tags) {
55		if b, _, _ := tags[start].Raw(); b != base {
56			return 0
57		}
58	}
59
60	// Besides the base language, script and region, only the collation type and
61	// the custom variant defined in the 'u' extension are used to distinguish a
62	// locale.
63	// Strip all variants and extensions and add back the custom variant.
64	tdef, _ := language.Raw.Compose(t.Raw())
65	tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))
66
67	// First search for a specialized collation type, if present.
68	try := []language.Tag{tdef}
69	if co := t.TypeForKey("co"); co != "" {
70		tco, _ := tdef.SetTypeForKey("co", co)
71		try = []language.Tag{tco, tdef}
72	}
73
74	for _, tx := range try {
75		for ; tx != language.Und; tx = parent(tx) {
76			for i, t := range tags[start:] {
77				if b, _, _ := t.Raw(); b != base {
78					break
79				}
80				if tx == t {
81					return start + i
82				}
83			}
84		}
85	}
86	return 0
87}
88
89// parent computes the structural parent. This means inheritance may change
90// script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
91func parent(t language.Tag) language.Tag {
92	if t.TypeForKey("va") != "" {
93		t, _ = t.SetTypeForKey("va", "")
94		return t
95	}
96	result := language.Und
97	if b, s, r := t.Raw(); (r != language.Region{}) {
98		result, _ = language.Raw.Compose(b, s, t.Extensions())
99	} else if (s != language.Script{}) {
100		result, _ = language.Raw.Compose(b, t.Extensions())
101	} else if (b != language.Base{}) {
102		result, _ = language.Raw.Compose(t.Extensions())
103	}
104	return result
105}
106