1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package norm
6
7import (
8	"bufio"
9	"bytes"
10	"fmt"
11	"regexp"
12	"runtime"
13	"strconv"
14	"strings"
15	"sync"
16	"testing"
17	"time"
18	"unicode/utf8"
19
20	"golang.org/x/text/internal/gen"
21	"golang.org/x/text/internal/testtext"
22)
23
24var once sync.Once
25
26func skipShort(t *testing.T) {
27	testtext.SkipIfNotLong(t)
28
29	once.Do(func() { loadTestData(t) })
30}
31
32// This regression test runs the test set in NormalizationTest.txt
33// (taken from https://www.unicode.org/Public/<unicode.Version>/ucd/).
34//
35// NormalizationTest.txt has form:
36// @Part0 # Specific cases
37// #
38// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
39// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
40//
41// Each test has 5 columns (c1, c2, c3, c4, c5), where
42// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
43//
44// CONFORMANCE:
45// 1. The following invariants must be true for all conformant implementations
46//
47//    NFC
48//      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
49//      c4 ==  NFC(c4) ==  NFC(c5)
50//
51//    NFD
52//      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
53//      c5 ==  NFD(c4) ==  NFD(c5)
54//
55//    NFKC
56//      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
57//
58//    NFKD
59//      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
60//
61// 2. For every code point X assigned in this version of Unicode that is not
62//    specifically listed in Part 1, the following invariants must be true
63//    for all conformant implementations:
64//
65//      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
66//
67
68// Column types.
69const (
70	cRaw = iota
71	cNFC
72	cNFD
73	cNFKC
74	cNFKD
75	cMaxColumns
76)
77
78// Holds data from NormalizationTest.txt
79var part []Part
80
81type Part struct {
82	name   string
83	number int
84	tests  []Test
85}
86
87type Test struct {
88	name   string
89	partnr int
90	number int
91	r      rune                // used for character by character test
92	cols   [cMaxColumns]string // Each has 5 entries, see below.
93}
94
95func (t Test) Name() string {
96	if t.number < 0 {
97		return part[t.partnr].name
98	}
99	return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
100}
101
102var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
103var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
104
105var counter int
106
107// Load the data form NormalizationTest.txt
108func loadTestData(t *testing.T) {
109	f := gen.OpenUCDFile("NormalizationTest.txt")
110	defer f.Close()
111	scanner := bufio.NewScanner(f)
112	for scanner.Scan() {
113		line := scanner.Text()
114		if len(line) == 0 || line[0] == '#' {
115			continue
116		}
117		m := partRe.FindStringSubmatch(line)
118		if m != nil {
119			if len(m) < 3 {
120				t.Fatal("Failed to parse Part: ", line)
121			}
122			i, err := strconv.Atoi(m[1])
123			if err != nil {
124				t.Fatal(err)
125			}
126			name := m[2]
127			part = append(part, Part{name: name[:len(name)-1], number: i})
128			continue
129		}
130		m = testRe.FindStringSubmatch(line)
131		if m == nil || len(m) < 7 {
132			t.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
133		}
134		test := Test{name: m[6], partnr: len(part) - 1, number: counter}
135		counter++
136		for j := 1; j < len(m)-1; j++ {
137			for _, split := range strings.Split(m[j], " ") {
138				r, err := strconv.ParseUint(split, 16, 64)
139				if err != nil {
140					t.Fatal(err)
141				}
142				if test.r == 0 {
143					// save for CharacterByCharacterTests
144					test.r = rune(r)
145				}
146				var buf [utf8.UTFMax]byte
147				sz := utf8.EncodeRune(buf[:], rune(r))
148				test.cols[j-1] += string(buf[:sz])
149			}
150		}
151		part := &part[len(part)-1]
152		part.tests = append(part.tests, test)
153	}
154	if scanner.Err() != nil {
155		t.Fatal(scanner.Err())
156	}
157}
158
159func cmpResult(t *testing.T, tc *Test, name string, f Form, gold, test, result string) {
160	if gold != result {
161		t.Errorf("%s:%s: %s(%+q)=%+q; want %+q: %s",
162			tc.Name(), name, fstr[f], test, result, gold, tc.name)
163	}
164}
165
166func cmpIsNormal(t *testing.T, tc *Test, name string, f Form, test string, result, want bool) {
167	if result != want {
168		t.Errorf("%s:%s: %s(%+q)=%v; want %v", tc.Name(), name, fstr[f], test, result, want)
169	}
170}
171
172func doTest(t *testing.T, tc *Test, f Form, gold, test string) {
173	testb := []byte(test)
174	result := f.Bytes(testb)
175	cmpResult(t, tc, "Bytes", f, gold, test, string(result))
176
177	sresult := f.String(test)
178	cmpResult(t, tc, "String", f, gold, test, sresult)
179
180	acc := []byte{}
181	i := Iter{}
182	i.InitString(f, test)
183	for !i.Done() {
184		acc = append(acc, i.Next()...)
185	}
186	cmpResult(t, tc, "Iter.Next", f, gold, test, string(acc))
187
188	buf := make([]byte, 128)
189	acc = nil
190	for p := 0; p < len(testb); {
191		nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
192		acc = append(acc, buf[:nDst]...)
193		p += nSrc
194	}
195	cmpResult(t, tc, "Transform", f, gold, test, string(acc))
196
197	for i := range test {
198		out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
199		cmpResult(t, tc, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
200	}
201	cmpIsNormal(t, tc, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
202	cmpIsNormal(t, tc, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
203}
204
205func doConformanceTests(t *testing.T, tc *Test, partn int) {
206	for i := 0; i <= 2; i++ {
207		doTest(t, tc, NFC, tc.cols[1], tc.cols[i])
208		doTest(t, tc, NFD, tc.cols[2], tc.cols[i])
209		doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
210		doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
211	}
212	for i := 3; i <= 4; i++ {
213		doTest(t, tc, NFC, tc.cols[3], tc.cols[i])
214		doTest(t, tc, NFD, tc.cols[4], tc.cols[i])
215		doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
216		doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
217	}
218}
219
220func TestCharacterByCharacter(t *testing.T) {
221	skipShort(t)
222	tests := part[1].tests
223	var last rune = 0
224	for i := 0; i <= len(tests); i++ { // last one is special case
225		var r rune
226		if i == len(tests) {
227			r = 0x2FA1E // Don't have to go to 0x10FFFF
228		} else {
229			r = tests[i].r
230		}
231		for last++; last < r; last++ {
232			// Check all characters that were not explicitly listed in the test.
233			tc := &Test{partnr: 1, number: -1}
234			char := string(last)
235			doTest(t, tc, NFC, char, char)
236			doTest(t, tc, NFD, char, char)
237			doTest(t, tc, NFKC, char, char)
238			doTest(t, tc, NFKD, char, char)
239		}
240		if i < len(tests) {
241			doConformanceTests(t, &tests[i], 1)
242		}
243	}
244}
245
246func TestStandardTests(t *testing.T) {
247	skipShort(t)
248	for _, j := range []int{0, 2, 3} {
249		for _, test := range part[j].tests {
250			doConformanceTests(t, &test, j)
251		}
252	}
253}
254
255// TestPerformance verifies that normalization is O(n). If any of the
256// code does not properly check for maxCombiningChars, normalization
257// may exhibit O(n**2) behavior.
258func TestPerformance(t *testing.T) {
259	skipShort(t)
260	runtime.GOMAXPROCS(2)
261	success := make(chan bool, 1)
262	go func() {
263		buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
264		buf = append(buf, "\u035B"...)
265		NFC.Append(nil, buf...)
266		success <- true
267	}()
268	timeout := time.After(1 * time.Second)
269	select {
270	case <-success:
271		// test completed before the timeout
272	case <-timeout:
273		t.Errorf(`unexpectedly long time to complete PerformanceTest`)
274	}
275}
276