1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package build
6
7import (
8	"fmt"
9	"log"
10	"sort"
11	"strings"
12	"unicode"
13
14	"golang.org/x/text/internal/colltab"
15	"golang.org/x/text/unicode/norm"
16)
17
18type logicalAnchor int
19
20const (
21	firstAnchor logicalAnchor = -1
22	noAnchor                  = 0
23	lastAnchor                = 1
24)
25
26// entry is used to keep track of a single entry in the collation element table
27// during building. Examples of entries can be found in the Default Unicode
28// Collation Element Table.
29// See https://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
30type entry struct {
31	str    string // same as string(runes)
32	runes  []rune
33	elems  []rawCE // the collation elements
34	extend string  // weights of extend to be appended to elems
35	before bool    // weights relative to next instead of previous.
36	lock   bool    // entry is used in extension and can no longer be moved.
37
38	// prev, next, and level are used to keep track of tailorings.
39	prev, next *entry
40	level      colltab.Level // next differs at this level
41	skipRemove bool          // do not unlink when removed
42
43	decompose bool // can use NFKD decomposition to generate elems
44	exclude   bool // do not include in table
45	implicit  bool // derived, is not included in the list
46	modified  bool // entry was modified in tailoring
47	logical   logicalAnchor
48
49	expansionIndex    int // used to store index into expansion table
50	contractionHandle ctHandle
51	contractionIndex  int // index into contraction elements
52}
53
54func (e *entry) String() string {
55	return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
56		e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
57}
58
59func (e *entry) skip() bool {
60	return e.contraction()
61}
62
63func (e *entry) expansion() bool {
64	return !e.decompose && len(e.elems) > 1
65}
66
67func (e *entry) contraction() bool {
68	return len(e.runes) > 1
69}
70
71func (e *entry) contractionStarter() bool {
72	return e.contractionHandle.n != 0
73}
74
75// nextIndexed gets the next entry that needs to be stored in the table.
76// It returns the entry and the collation level at which the next entry differs
77// from the current entry.
78// Entries that can be explicitly derived and logical reset positions are
79// examples of entries that will not be indexed.
80func (e *entry) nextIndexed() (*entry, colltab.Level) {
81	level := e.level
82	for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
83		if e.level < level {
84			level = e.level
85		}
86	}
87	return e, level
88}
89
90// remove unlinks entry e from the sorted chain and clears the collation
91// elements. e may not be at the front or end of the list. This should always
92// be the case, as the front and end of the list are always logical anchors,
93// which may not be removed.
94func (e *entry) remove() {
95	if e.logical != noAnchor {
96		log.Fatalf("may not remove anchor %q", e.str)
97	}
98	// TODO: need to set e.prev.level to e.level if e.level is smaller?
99	e.elems = nil
100	if !e.skipRemove {
101		if e.prev != nil {
102			e.prev.next = e.next
103		}
104		if e.next != nil {
105			e.next.prev = e.prev
106		}
107	}
108	e.skipRemove = false
109}
110
111// insertAfter inserts n after e.
112func (e *entry) insertAfter(n *entry) {
113	if e == n {
114		panic("e == anchor")
115	}
116	if e == nil {
117		panic("unexpected nil anchor")
118	}
119	n.remove()
120	n.decompose = false // redo decomposition test
121
122	n.next = e.next
123	n.prev = e
124	if e.next != nil {
125		e.next.prev = n
126	}
127	e.next = n
128}
129
130// insertBefore inserts n before e.
131func (e *entry) insertBefore(n *entry) {
132	if e == n {
133		panic("e == anchor")
134	}
135	if e == nil {
136		panic("unexpected nil anchor")
137	}
138	n.remove()
139	n.decompose = false // redo decomposition test
140
141	n.prev = e.prev
142	n.next = e
143	if e.prev != nil {
144		e.prev.next = n
145	}
146	e.prev = n
147}
148
149func (e *entry) encodeBase() (ce uint32, err error) {
150	switch {
151	case e.expansion():
152		ce, err = makeExpandIndex(e.expansionIndex)
153	default:
154		if e.decompose {
155			log.Fatal("decompose should be handled elsewhere")
156		}
157		ce, err = makeCE(e.elems[0])
158	}
159	return
160}
161
162func (e *entry) encode() (ce uint32, err error) {
163	if e.skip() {
164		log.Fatal("cannot build colElem for entry that should be skipped")
165	}
166	switch {
167	case e.decompose:
168		t1 := e.elems[0].w[2]
169		t2 := 0
170		if len(e.elems) > 1 {
171			t2 = e.elems[1].w[2]
172		}
173		ce, err = makeDecompose(t1, t2)
174	case e.contractionStarter():
175		ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
176	default:
177		if len(e.runes) > 1 {
178			log.Fatal("colElem: contractions are handled in contraction trie")
179		}
180		ce, err = e.encodeBase()
181	}
182	return
183}
184
185// entryLess returns true if a sorts before b and false otherwise.
186func entryLess(a, b *entry) bool {
187	if res, _ := compareWeights(a.elems, b.elems); res != 0 {
188		return res == -1
189	}
190	if a.logical != noAnchor {
191		return a.logical == firstAnchor
192	}
193	if b.logical != noAnchor {
194		return b.logical == lastAnchor
195	}
196	return a.str < b.str
197}
198
199type sortedEntries []*entry
200
201func (s sortedEntries) Len() int {
202	return len(s)
203}
204
205func (s sortedEntries) Swap(i, j int) {
206	s[i], s[j] = s[j], s[i]
207}
208
209func (s sortedEntries) Less(i, j int) bool {
210	return entryLess(s[i], s[j])
211}
212
213type ordering struct {
214	id       string
215	entryMap map[string]*entry
216	ordered  []*entry
217	handle   *trieHandle
218}
219
220// insert inserts e into both entryMap and ordered.
221// Note that insert simply appends e to ordered.  To reattain a sorted
222// order, o.sort() should be called.
223func (o *ordering) insert(e *entry) {
224	if e.logical == noAnchor {
225		o.entryMap[e.str] = e
226	} else {
227		// Use key format as used in UCA rules.
228		o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
229		// Also add index entry for XML format.
230		o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
231	}
232	o.ordered = append(o.ordered, e)
233}
234
235// newEntry creates a new entry for the given info and inserts it into
236// the index.
237func (o *ordering) newEntry(s string, ces []rawCE) *entry {
238	e := &entry{
239		runes: []rune(s),
240		elems: ces,
241		str:   s,
242	}
243	o.insert(e)
244	return e
245}
246
247// find looks up and returns the entry for the given string.
248// It returns nil if str is not in the index and if an implicit value
249// cannot be derived, that is, if str represents more than one rune.
250func (o *ordering) find(str string) *entry {
251	e := o.entryMap[str]
252	if e == nil {
253		r := []rune(str)
254		if len(r) == 1 {
255			const (
256				firstHangul = 0xAC00
257				lastHangul  = 0xD7A3
258			)
259			if r[0] >= firstHangul && r[0] <= lastHangul {
260				ce := []rawCE{}
261				nfd := norm.NFD.String(str)
262				for _, r := range nfd {
263					ce = append(ce, o.find(string(r)).elems...)
264				}
265				e = o.newEntry(nfd, ce)
266			} else {
267				e = o.newEntry(string(r[0]), []rawCE{
268					{w: []int{
269						implicitPrimary(r[0]),
270						defaultSecondary,
271						defaultTertiary,
272						int(r[0]),
273					},
274					},
275				})
276				e.modified = true
277			}
278			e.exclude = true // do not index implicits
279		}
280	}
281	return e
282}
283
284// makeRootOrdering returns a newly initialized ordering value and populates
285// it with a set of logical reset points that can be used as anchors.
286// The anchors first_tertiary_ignorable and __END__ will always sort at
287// the beginning and end, respectively. This means that prev and next are non-nil
288// for any indexed entry.
289func makeRootOrdering() ordering {
290	const max = unicode.MaxRune
291	o := ordering{
292		entryMap: make(map[string]*entry),
293	}
294	insert := func(typ logicalAnchor, s string, ce []int) {
295		e := &entry{
296			elems:   []rawCE{{w: ce}},
297			str:     s,
298			exclude: true,
299			logical: typ,
300		}
301		o.insert(e)
302	}
303	insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
304	insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
305	insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
306	insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
307	insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
308	return o
309}
310
311// patchForInsert eleminates entries from the list with more than one collation element.
312// The next and prev fields of the eliminated entries still point to appropriate
313// values in the newly created list.
314// It requires that sort has been called.
315func (o *ordering) patchForInsert() {
316	for i := 0; i < len(o.ordered)-1; {
317		e := o.ordered[i]
318		lev := e.level
319		n := e.next
320		for ; n != nil && len(n.elems) > 1; n = n.next {
321			if n.level < lev {
322				lev = n.level
323			}
324			n.skipRemove = true
325		}
326		for ; o.ordered[i] != n; i++ {
327			o.ordered[i].level = lev
328			o.ordered[i].next = n
329			o.ordered[i+1].prev = e
330		}
331	}
332}
333
334// clone copies all ordering of es into a new ordering value.
335func (o *ordering) clone() *ordering {
336	o.sort()
337	oo := ordering{
338		entryMap: make(map[string]*entry),
339	}
340	for _, e := range o.ordered {
341		ne := &entry{
342			runes:     e.runes,
343			elems:     e.elems,
344			str:       e.str,
345			decompose: e.decompose,
346			exclude:   e.exclude,
347			logical:   e.logical,
348		}
349		oo.insert(ne)
350	}
351	oo.sort() // link all ordering.
352	oo.patchForInsert()
353	return &oo
354}
355
356// front returns the first entry to be indexed.
357// It assumes that sort() has been called.
358func (o *ordering) front() *entry {
359	e := o.ordered[0]
360	if e.prev != nil {
361		log.Panicf("unexpected first entry: %v", e)
362	}
363	// The first entry is always a logical position, which should not be indexed.
364	e, _ = e.nextIndexed()
365	return e
366}
367
368// sort sorts all ordering based on their collation elements and initializes
369// the prev, next, and level fields accordingly.
370func (o *ordering) sort() {
371	sort.Sort(sortedEntries(o.ordered))
372	l := o.ordered
373	for i := 1; i < len(l); i++ {
374		k := i - 1
375		l[k].next = l[i]
376		_, l[k].level = compareWeights(l[k].elems, l[i].elems)
377		l[i].prev = l[k]
378	}
379}
380
381// genColElems generates a collation element array from the runes in str. This
382// assumes that all collation elements have already been added to the Builder.
383func (o *ordering) genColElems(str string) []rawCE {
384	elems := []rawCE{}
385	for _, r := range []rune(str) {
386		for _, ce := range o.find(string(r)).elems {
387			if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
388				elems = append(elems, ce)
389			}
390		}
391	}
392	return elems
393}
394