1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package collate
6
7import (
8	"bytes"
9	"testing"
10
11	"golang.org/x/text/internal/colltab"
12	"golang.org/x/text/language"
13)
14
15type weightsTest struct {
16	opt     opts
17	in, out ColElems
18}
19
20type opts struct {
21	lev int
22	alt alternateHandling
23	top int
24
25	backwards bool
26	caseLevel bool
27}
28
29// ignore returns an initialized boolean array based on the given Level.
30// A negative value means using the default setting of quaternary.
31func ignore(level colltab.Level) (ignore [colltab.NumLevels]bool) {
32	if level < 0 {
33		level = colltab.Quaternary
34	}
35	for i := range ignore {
36		ignore[i] = level < colltab.Level(i)
37	}
38	return ignore
39}
40
41func makeCE(w []int) colltab.Elem {
42	ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3]))
43	if err != nil {
44		panic(err)
45	}
46	return ce
47}
48
49func (o opts) collator() *Collator {
50	c := &Collator{
51		options: options{
52			ignore:      ignore(colltab.Level(o.lev - 1)),
53			alternate:   o.alt,
54			backwards:   o.backwards,
55			caseLevel:   o.caseLevel,
56			variableTop: uint32(o.top),
57		},
58	}
59	return c
60}
61
62const (
63	maxQ = 0x1FFFFF
64)
65
66func wpq(p, q int) Weights {
67	return W(p, defaults.Secondary, defaults.Tertiary, q)
68}
69
70func wsq(s, q int) Weights {
71	return W(0, s, defaults.Tertiary, q)
72}
73
74func wq(q int) Weights {
75	return W(0, 0, 0, q)
76}
77
78var zero = W(0, 0, 0, 0)
79
80var processTests = []weightsTest{
81	// Shifted
82	{ // simple sequence of non-variables
83		opt: opts{alt: altShifted, top: 100},
84		in:  ColElems{W(200), W(300), W(400)},
85		out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
86	},
87	{ // first is a variable
88		opt: opts{alt: altShifted, top: 250},
89		in:  ColElems{W(200), W(300), W(400)},
90		out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
91	},
92	{ // all but first are variable
93		opt: opts{alt: altShifted, top: 999},
94		in:  ColElems{W(1000), W(200), W(300), W(400)},
95		out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
96	},
97	{ // first is a modifier
98		opt: opts{alt: altShifted, top: 999},
99		in:  ColElems{W(0, 10), W(1000)},
100		out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
101	},
102	{ // primary ignorables
103		opt: opts{alt: altShifted, top: 250},
104		in:  ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
105		out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
106	},
107	{ // secondary ignorables
108		opt: opts{alt: altShifted, top: 250},
109		in:  ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
110		out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
111	},
112	{ // tertiary ignorables, no change
113		opt: opts{alt: altShifted, top: 250},
114		in:  ColElems{W(200), zero, W(300), zero, W(400)},
115		out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
116	},
117
118	// ShiftTrimmed (same as Shifted)
119	{ // simple sequence of non-variables
120		opt: opts{alt: altShiftTrimmed, top: 100},
121		in:  ColElems{W(200), W(300), W(400)},
122		out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
123	},
124	{ // first is a variable
125		opt: opts{alt: altShiftTrimmed, top: 250},
126		in:  ColElems{W(200), W(300), W(400)},
127		out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
128	},
129	{ // all but first are variable
130		opt: opts{alt: altShiftTrimmed, top: 999},
131		in:  ColElems{W(1000), W(200), W(300), W(400)},
132		out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
133	},
134	{ // first is a modifier
135		opt: opts{alt: altShiftTrimmed, top: 999},
136		in:  ColElems{W(0, 10), W(1000)},
137		out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
138	},
139	{ // primary ignorables
140		opt: opts{alt: altShiftTrimmed, top: 250},
141		in:  ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
142		out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
143	},
144	{ // secondary ignorables
145		opt: opts{alt: altShiftTrimmed, top: 250},
146		in:  ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
147		out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
148	},
149	{ // tertiary ignorables, no change
150		opt: opts{alt: altShiftTrimmed, top: 250},
151		in:  ColElems{W(200), zero, W(300), zero, W(400)},
152		out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
153	},
154
155	// Blanked
156	{ // simple sequence of non-variables
157		opt: opts{alt: altBlanked, top: 100},
158		in:  ColElems{W(200), W(300), W(400)},
159		out: ColElems{W(200), W(300), W(400)},
160	},
161	{ // first is a variable
162		opt: opts{alt: altBlanked, top: 250},
163		in:  ColElems{W(200), W(300), W(400)},
164		out: ColElems{zero, W(300), W(400)},
165	},
166	{ // all but first are variable
167		opt: opts{alt: altBlanked, top: 999},
168		in:  ColElems{W(1000), W(200), W(300), W(400)},
169		out: ColElems{W(1000), zero, zero, zero},
170	},
171	{ // first is a modifier
172		opt: opts{alt: altBlanked, top: 999},
173		in:  ColElems{W(0, 10), W(1000)},
174		out: ColElems{W(0, 10), W(1000)},
175	},
176	{ // primary ignorables
177		opt: opts{alt: altBlanked, top: 250},
178		in:  ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
179		out: ColElems{zero, zero, W(300), W(0, 15), W(400)},
180	},
181	{ // secondary ignorables
182		opt: opts{alt: altBlanked, top: 250},
183		in:  ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
184		out: ColElems{zero, zero, W(300), W(0, 0, 15), W(400)},
185	},
186	{ // tertiary ignorables, no change
187		opt: opts{alt: altBlanked, top: 250},
188		in:  ColElems{W(200), zero, W(300), zero, W(400)},
189		out: ColElems{zero, zero, W(300), zero, W(400)},
190	},
191
192	// Non-ignorable: input is always equal to output.
193	{ // all but first are variable
194		opt: opts{alt: altNonIgnorable, top: 999},
195		in:  ColElems{W(1000), W(200), W(300), W(400)},
196		out: ColElems{W(1000), W(200), W(300), W(400)},
197	},
198	{ // primary ignorables
199		opt: opts{alt: altNonIgnorable, top: 250},
200		in:  ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
201		out: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
202	},
203	{ // secondary ignorables
204		opt: opts{alt: altNonIgnorable, top: 250},
205		in:  ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
206		out: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
207	},
208	{ // tertiary ignorables, no change
209		opt: opts{alt: altNonIgnorable, top: 250},
210		in:  ColElems{W(200), zero, W(300), zero, W(400)},
211		out: ColElems{W(200), zero, W(300), zero, W(400)},
212	},
213}
214
215func TestProcessWeights(t *testing.T) {
216	for i, tt := range processTests {
217		in := convertFromWeights(tt.in)
218		out := convertFromWeights(tt.out)
219		processWeights(tt.opt.alt, uint32(tt.opt.top), in)
220		for j, w := range in {
221			if w != out[j] {
222				t.Errorf("%d: Weights %d was %v; want %v", i, j, w, out[j])
223			}
224		}
225	}
226}
227
228type keyFromElemTest struct {
229	opt opts
230	in  ColElems
231	out []byte
232}
233
234var defS = byte(defaults.Secondary)
235var defT = byte(defaults.Tertiary)
236
237const sep = 0 // separator byte
238
239var keyFromElemTests = []keyFromElemTest{
240	{ // simple primary and secondary weights.
241		opts{alt: altShifted},
242		ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
243		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
244			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
245			sep, sep, defT, defT, defT, defT, // tertiary
246			sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
247		},
248	},
249	{ // same as first, but with zero element that need to be removed
250		opts{alt: altShifted},
251		ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
252		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
253			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
254			sep, sep, defT, defT, defT, defT, // tertiary
255			sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
256		},
257	},
258	{ // same as first, with large primary values
259		opts{alt: altShifted},
260		ColElems{W(0x200), W(0x8000), W(0, 0x30), W(0x12345)},
261		[]byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary
262			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
263			sep, sep, defT, defT, defT, defT, // tertiary
264			sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
265		},
266	},
267	{ // same as first, but with the secondary level backwards
268		opts{alt: altShifted, backwards: true},
269		ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
270		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
271			sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary
272			sep, sep, defT, defT, defT, defT, // tertiary
273			sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
274		},
275	},
276	{ // same as first, ignoring quaternary level
277		opts{alt: altShifted, lev: 3},
278		ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
279		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
280			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
281			sep, sep, defT, defT, defT, defT, // tertiary
282		},
283	},
284	{ // same as first, ignoring tertiary level
285		opts{alt: altShifted, lev: 2},
286		ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
287		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
288			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
289		},
290	},
291	{ // same as first, ignoring secondary level
292		opts{alt: altShifted, lev: 1},
293		ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
294		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00},
295	},
296	{ // simple primary and secondary weights.
297		opts{alt: altShiftTrimmed, top: 0x250},
298		ColElems{W(0x300), W(0x200), W(0x7FFF), W(0, 0x30), W(0x800)},
299		[]byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary
300			sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
301			sep, sep, defT, defT, defT, defT, // tertiary
302			sep, 0xFF, 0x2, 0, // quaternary
303		},
304	},
305	{ // as first, primary with case level enabled
306		opts{alt: altShifted, lev: 1, caseLevel: true},
307		ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
308		[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
309			sep, sep, // secondary
310			sep, sep, defT, defT, defT, defT, // tertiary
311		},
312	},
313}
314
315func TestKeyFromElems(t *testing.T) {
316	buf := Buffer{}
317	for i, tt := range keyFromElemTests {
318		buf.Reset()
319		in := convertFromWeights(tt.in)
320		processWeights(tt.opt.alt, uint32(tt.opt.top), in)
321		tt.opt.collator().keyFromElems(&buf, in)
322		res := buf.key
323		if len(res) != len(tt.out) {
324			t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out)
325		}
326		n := len(res)
327		if len(tt.out) < n {
328			n = len(tt.out)
329		}
330		for j, c := range res[:n] {
331			if c != tt.out[j] {
332				t.Errorf("%d: byte %d was %X; want %X", i, j, c, tt.out[j])
333			}
334		}
335	}
336}
337
338func TestGetColElems(t *testing.T) {
339	for i, tt := range appendNextTests {
340		c, err := makeTable(tt.in)
341		if err != nil {
342			// error is reported in TestAppendNext
343			continue
344		}
345		// Create one large test per table
346		str := make([]byte, 0, 4000)
347		out := ColElems{}
348		for len(str) < 3000 {
349			for _, chk := range tt.chk {
350				str = append(str, chk.in[:chk.n]...)
351				out = append(out, chk.out...)
352			}
353		}
354		for j, chk := range append(tt.chk, check{string(str), len(str), out}) {
355			out := convertFromWeights(chk.out)
356			ce := c.getColElems([]byte(chk.in)[:chk.n])
357			if len(ce) != len(out) {
358				t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ce), len(out))
359				continue
360			}
361			cnt := 0
362			for k, w := range ce {
363				w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
364				if w != out[k] {
365					t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
366					cnt++
367				}
368				if cnt > 10 {
369					break
370				}
371			}
372		}
373	}
374}
375
376type keyTest struct {
377	in  string
378	out []byte
379}
380
381var keyTests = []keyTest{
382	{"abc",
383		[]byte{0, 100, 0, 200, 1, 44, 0, 0, 0, 32, 0, 32, 0, 32, 0, 0, 2, 2, 2, 0, 255, 255, 255},
384	},
385	{"a\u0301",
386		[]byte{0, 102, 0, 0, 0, 32, 0, 0, 2, 0, 255},
387	},
388	{"aaaaa",
389		[]byte{0, 100, 0, 100, 0, 100, 0, 100, 0, 100, 0, 0,
390			0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 0,
391			2, 2, 2, 2, 2, 0,
392			255, 255, 255, 255, 255,
393		},
394	},
395	// Issue 16391: incomplete rune at end of UTF-8 sequence.
396	{"\xc2", []byte{133, 255, 253, 0, 0, 0, 32, 0, 0, 2, 0, 255}},
397	{"\xc2a", []byte{133, 255, 253, 0, 100, 0, 0, 0, 32, 0, 32, 0, 0, 2, 2, 0, 255, 255}},
398}
399
400func TestKey(t *testing.T) {
401	c, _ := makeTable(appendNextTests[4].in)
402	c.alternate = altShifted
403	c.ignore = ignore(colltab.Quaternary)
404	buf := Buffer{}
405	keys1 := [][]byte{}
406	keys2 := [][]byte{}
407	for _, tt := range keyTests {
408		keys1 = append(keys1, c.Key(&buf, []byte(tt.in)))
409		keys2 = append(keys2, c.KeyFromString(&buf, tt.in))
410	}
411	// Separate generation from testing to ensure buffers are not overwritten.
412	for i, tt := range keyTests {
413		if !bytes.Equal(keys1[i], tt.out) {
414			t.Errorf("%d: Key(%q) = %d; want %d", i, tt.in, keys1[i], tt.out)
415		}
416		if !bytes.Equal(keys2[i], tt.out) {
417			t.Errorf("%d: KeyFromString(%q) = %d; want %d", i, tt.in, keys2[i], tt.out)
418		}
419	}
420}
421
422type compareTest struct {
423	a, b string
424	res  int // comparison result
425}
426
427var compareTests = []compareTest{
428	{"a\u0301", "a", 1},
429	{"a\u0301b", "ab", 1},
430	{"a", "a\u0301", -1},
431	{"ab", "a\u0301b", -1},
432	{"bc", "a\u0301c", 1},
433	{"ab", "aB", -1},
434	{"a\u0301", "a\u0301", 0},
435	{"a", "a", 0},
436	// Only clip prefixes of whole runes.
437	{"\u302E", "\u302F", 1},
438	// Don't clip prefixes when last rune of prefix may be part of contraction.
439	{"a\u035E", "a\u0301\u035F", -1},
440	{"a\u0301\u035Fb", "a\u0301\u035F", -1},
441}
442
443func TestCompare(t *testing.T) {
444	c, _ := makeTable(appendNextTests[4].in)
445	for i, tt := range compareTests {
446		if res := c.Compare([]byte(tt.a), []byte(tt.b)); res != tt.res {
447			t.Errorf("%d: Compare(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
448		}
449		if res := c.CompareString(tt.a, tt.b); res != tt.res {
450			t.Errorf("%d: CompareString(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
451		}
452	}
453}
454
455func TestNumeric(t *testing.T) {
456	c := New(language.English, Loose, Numeric)
457
458	for i, tt := range []struct {
459		a, b string
460		want int
461	}{
462		{"1", "2", -1},
463		{"2", "12", -1},
464		{"2", "12", -1}, // Fullwidth is sorted as usual.
465		{"₂", "₁₂", 1},  // Subscript is not sorted as numbers.
466		{"②", "①②", 1},  // Circled is not sorted as numbers.
467		{ // Imperial Aramaic, is not sorted as number.
468			"\U00010859",
469			"\U00010858\U00010859",
470			1,
471		},
472		{"12", "2", 1},
473		{"A-1", "A-2", -1},
474		{"A-2", "A-12", -1},
475		{"A-12", "A-2", 1},
476		{"A-0001", "A-1", 0},
477	} {
478		if got := c.CompareString(tt.a, tt.b); got != tt.want {
479			t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)
480		}
481	}
482}
483