1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package rangetable
6
7import (
8	"testing"
9	"unicode"
10)
11
12var (
13	maxRuneTable = &unicode.RangeTable{
14		R32: []unicode.Range32{
15			{unicode.MaxRune, unicode.MaxRune, 1},
16		},
17	}
18
19	overlap1 = &unicode.RangeTable{
20		R16: []unicode.Range16{
21			{0x100, 0xfffc, 4},
22		},
23		R32: []unicode.Range32{
24			{0x100000, 0x10fffc, 4},
25		},
26	}
27
28	overlap2 = &unicode.RangeTable{
29		R16: []unicode.Range16{
30			{0x101, 0xfffd, 4},
31		},
32		R32: []unicode.Range32{
33			{0x100001, 0x10fffd, 3},
34		},
35	}
36
37	// The following table should be compacted into two entries for R16 and R32.
38	optimize = &unicode.RangeTable{
39		R16: []unicode.Range16{
40			{0x1, 0x1, 1},
41			{0x2, 0x2, 1},
42			{0x3, 0x3, 1},
43			{0x5, 0x5, 1},
44			{0x7, 0x7, 1},
45			{0x9, 0x9, 1},
46			{0xb, 0xf, 2},
47		},
48		R32: []unicode.Range32{
49			{0x10001, 0x10001, 1},
50			{0x10002, 0x10002, 1},
51			{0x10003, 0x10003, 1},
52			{0x10005, 0x10005, 1},
53			{0x10007, 0x10007, 1},
54			{0x10009, 0x10009, 1},
55			{0x1000b, 0x1000f, 2},
56		},
57	}
58)
59
60func TestMerge(t *testing.T) {
61	for i, tt := range [][]*unicode.RangeTable{
62		{unicode.Cc, unicode.Cf},
63		{unicode.L, unicode.Ll},
64		{unicode.L, unicode.Ll, unicode.Lu},
65		{unicode.Ll, unicode.Lu},
66		{unicode.M},
67		unicode.GraphicRanges,
68		cased,
69
70		// Merge R16 only and R32 only and vice versa.
71		{unicode.Khmer, unicode.Khudawadi},
72		{unicode.Imperial_Aramaic, unicode.Radical},
73
74		// Merge with empty.
75		{&unicode.RangeTable{}},
76		{&unicode.RangeTable{}, &unicode.RangeTable{}},
77		{&unicode.RangeTable{}, &unicode.RangeTable{}, &unicode.RangeTable{}},
78		{&unicode.RangeTable{}, unicode.Hiragana},
79		{unicode.Inherited, &unicode.RangeTable{}},
80		{&unicode.RangeTable{}, unicode.Hanunoo, &unicode.RangeTable{}},
81
82		// Hypothetical tables.
83		{maxRuneTable},
84		{overlap1, overlap2},
85
86		// Optimization
87		{optimize},
88	} {
89		rt := Merge(tt...)
90		for r := rune(0); r <= unicode.MaxRune; r++ {
91			if got, want := unicode.Is(rt, r), unicode.In(r, tt...); got != want {
92				t.Fatalf("%d:%U: got %v; want %v", i, r, got, want)
93			}
94		}
95		// Test optimization and correctness for R16.
96		for k := 0; k < len(rt.R16)-1; k++ {
97			if lo, hi := rt.R16[k].Lo, rt.R16[k].Hi; lo > hi {
98				t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
99			}
100			if hi, lo := rt.R16[k].Hi, rt.R16[k+1].Lo; hi >= lo {
101				t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
102			}
103			if rt.R16[k].Hi+rt.R16[k].Stride == rt.R16[k+1].Lo {
104				t.Errorf("%d: missed optimization for R16 at %d between %X and %x",
105					i, k, rt.R16[k], rt.R16[k+1])
106			}
107		}
108		// Test optimization and correctness for R32.
109		for k := 0; k < len(rt.R32)-1; k++ {
110			if lo, hi := rt.R32[k].Lo, rt.R32[k].Hi; lo > hi {
111				t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
112			}
113			if hi, lo := rt.R32[k].Hi, rt.R32[k+1].Lo; hi >= lo {
114				t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
115			}
116			if rt.R32[k].Hi+rt.R32[k].Stride == rt.R32[k+1].Lo {
117				t.Errorf("%d: missed optimization for R32 at %d between %X and %X",
118					i, k, rt.R32[k], rt.R32[k+1])
119			}
120		}
121	}
122}
123
124const runes = "Hello World in 2015!,\U0010fffd"
125
126func BenchmarkNotMerged(t *testing.B) {
127	for i := 0; i < t.N; i++ {
128		for _, r := range runes {
129			unicode.In(r, unicode.GraphicRanges...)
130		}
131	}
132}
133
134func BenchmarkMerged(t *testing.B) {
135	rt := Merge(unicode.GraphicRanges...)
136
137	for i := 0; i < t.N; i++ {
138		for _, r := range runes {
139			unicode.Is(rt, r)
140		}
141	}
142}
143
144var cased = []*unicode.RangeTable{
145	unicode.Lower,
146	unicode.Upper,
147	unicode.Title,
148	unicode.Other_Lowercase,
149	unicode.Other_Uppercase,
150}
151
152func BenchmarkNotMergedCased(t *testing.B) {
153	for i := 0; i < t.N; i++ {
154		for _, r := range runes {
155			unicode.In(r, cased...)
156		}
157	}
158}
159
160func BenchmarkMergedCased(t *testing.B) {
161	// This reduces len(R16) from 243 to 82 and len(R32) from 65 to 35 for
162	// Unicode 7.0.0.
163	rt := Merge(cased...)
164
165	for i := 0; i < t.N; i++ {
166		for _, r := range runes {
167			unicode.Is(rt, r)
168		}
169	}
170}
171
172func BenchmarkInit(t *testing.B) {
173	for i := 0; i < t.N; i++ {
174		Merge(cased...)
175		Merge(unicode.GraphicRanges...)
176	}
177}
178
179func BenchmarkInit2(t *testing.B) {
180	// Hypothetical near-worst-case performance.
181	for i := 0; i < t.N; i++ {
182		Merge(overlap1, overlap2)
183	}
184}
185