1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runes
6
7import (
8	"strings"
9	"testing"
10	"unicode"
11
12	"golang.org/x/text/cases"
13	"golang.org/x/text/language"
14	"golang.org/x/text/transform"
15)
16
17var (
18	toUpper = cases.Upper(language.Und)
19	toLower = cases.Lower(language.Und)
20)
21
22type spanformer interface {
23	transform.SpanningTransformer
24}
25
26func TestPredicate(t *testing.T) {
27	testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
28		return If(Predicate(func(r rune) bool {
29			return unicode.Is(rt, r)
30		}), t, f)
31	})
32}
33
34func TestIn(t *testing.T) {
35	testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
36		return If(In(rt), t, f)
37	})
38}
39
40func TestNotIn(t *testing.T) {
41	testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
42		return If(NotIn(rt), f, t)
43	})
44}
45
46func testConditional(t *testing.T, f func(rt *unicode.RangeTable, t, f spanformer) spanformer) {
47	lower := f(unicode.Latin, toLower, toLower)
48
49	for i, tt := range []transformTest{{
50		desc:    "empty",
51		szDst:   large,
52		atEOF:   true,
53		in:      "",
54		out:     "",
55		outFull: "",
56		t:       lower,
57	}, {
58		desc:    "small",
59		szDst:   1,
60		atEOF:   true,
61		in:      "B",
62		out:     "b",
63		outFull: "b",
64		errSpan: transform.ErrEndOfSpan,
65		t:       lower,
66	}, {
67		desc:    "short dst",
68		szDst:   2,
69		atEOF:   true,
70		in:      "AAA",
71		out:     "aa",
72		outFull: "aaa",
73		err:     transform.ErrShortDst,
74		errSpan: transform.ErrEndOfSpan,
75		t:       lower,
76	}, {
77		desc:    "short dst writing error",
78		szDst:   1,
79		atEOF:   false,
80		in:      "A\x80",
81		out:     "a",
82		outFull: "a\x80",
83		err:     transform.ErrShortDst,
84		errSpan: transform.ErrEndOfSpan,
85		t:       lower,
86	}, {
87		desc:    "short dst writing incomplete rune",
88		szDst:   2,
89		atEOF:   true,
90		in:      "Σ\xc2",
91		out:     "Σ",
92		outFull: "Σ\xc2",
93		err:     transform.ErrShortDst,
94		t:       f(unicode.Latin, toLower, nil),
95	}, {
96		desc:    "short dst, longer",
97		szDst:   5,
98		atEOF:   true,
99		in:      "Hellø",
100		out:     "Hell",
101		outFull: "Hellø",
102		err:     transform.ErrShortDst,
103		// idem is used to test short buffers by forcing processing of full-rune increments.
104		t: f(unicode.Latin, Map(idem), nil),
105	}, {
106		desc:    "short dst, longer, writing error",
107		szDst:   6,
108		atEOF:   false,
109		in:      "\x80Hello\x80",
110		out:     "\x80Hello",
111		outFull: "\x80Hello\x80",
112		err:     transform.ErrShortDst,
113		t:       f(unicode.Latin, Map(idem), nil),
114	}, {
115		desc:    "short src",
116		szDst:   2,
117		atEOF:   false,
118		in:      "A\xc2",
119		out:     "a",
120		outFull: "a\xc2",
121		err:     transform.ErrShortSrc,
122		errSpan: transform.ErrEndOfSpan,
123		t:       lower,
124	}, {
125		desc:    "short src no change",
126		szDst:   2,
127		atEOF:   false,
128		in:      "a\xc2",
129		out:     "a",
130		outFull: "a\xc2",
131		err:     transform.ErrShortSrc,
132		errSpan: transform.ErrShortSrc,
133		nSpan:   1,
134		t:       lower,
135	}, {
136		desc:    "invalid input, atEOF",
137		szDst:   large,
138		atEOF:   true,
139		in:      "\x80",
140		out:     "\x80",
141		outFull: "\x80",
142		t:       lower,
143	}, {
144		desc:    "invalid input, !atEOF",
145		szDst:   large,
146		atEOF:   false,
147		in:      "\x80",
148		out:     "\x80",
149		outFull: "\x80",
150		t:       lower,
151	}, {
152		desc:    "invalid input, incomplete rune atEOF",
153		szDst:   large,
154		atEOF:   true,
155		in:      "\xc2",
156		out:     "\xc2",
157		outFull: "\xc2",
158		t:       lower,
159	}, {
160		desc:    "nop",
161		szDst:   large,
162		atEOF:   true,
163		in:      "Hello World!",
164		out:     "Hello World!",
165		outFull: "Hello World!",
166		t:       f(unicode.Latin, nil, nil),
167	}, {
168		desc:    "nop in",
169		szDst:   large,
170		atEOF:   true,
171		in:      "THIS IS α ΤΕΣΤ",
172		out:     "this is α ΤΕΣΤ",
173		outFull: "this is α ΤΕΣΤ",
174		errSpan: transform.ErrEndOfSpan,
175		t:       f(unicode.Greek, nil, toLower),
176	}, {
177		desc:    "nop in latin",
178		szDst:   large,
179		atEOF:   true,
180		in:      "THIS IS α ΤΕΣΤ",
181		out:     "THIS IS α τεστ",
182		outFull: "THIS IS α τεστ",
183		errSpan: transform.ErrEndOfSpan,
184		t:       f(unicode.Latin, nil, toLower),
185	}, {
186		desc:    "nop not in",
187		szDst:   large,
188		atEOF:   true,
189		in:      "THIS IS α ΤΕΣΤ",
190		out:     "this is α ΤΕΣΤ",
191		outFull: "this is α ΤΕΣΤ",
192		errSpan: transform.ErrEndOfSpan,
193		t:       f(unicode.Latin, toLower, nil),
194	}, {
195		desc:    "pass atEOF is true when at end",
196		szDst:   large,
197		atEOF:   true,
198		in:      "hello",
199		out:     "HELLO",
200		outFull: "HELLO",
201		errSpan: transform.ErrEndOfSpan,
202		t:       f(unicode.Latin, upperAtEOF{}, nil),
203	}, {
204		desc:    "pass atEOF is true when at end of segment",
205		szDst:   large,
206		atEOF:   true,
207		in:      "hello ",
208		out:     "HELLO ",
209		outFull: "HELLO ",
210		errSpan: transform.ErrEndOfSpan,
211		t:       f(unicode.Latin, upperAtEOF{}, nil),
212	}, {
213		desc:    "don't pass atEOF is true when atEOF is false",
214		szDst:   large,
215		atEOF:   false,
216		in:      "hello",
217		out:     "",
218		outFull: "HELLO",
219		err:     transform.ErrShortSrc,
220		errSpan: transform.ErrShortSrc,
221		t:       f(unicode.Latin, upperAtEOF{}, nil),
222	}, {
223		desc:    "pass atEOF is true when at end, no change",
224		szDst:   large,
225		atEOF:   true,
226		in:      "HELLO",
227		out:     "HELLO",
228		outFull: "HELLO",
229		t:       f(unicode.Latin, upperAtEOF{}, nil),
230	}, {
231		desc:    "pass atEOF is true when at end of segment, no change",
232		szDst:   large,
233		atEOF:   true,
234		in:      "HELLO ",
235		out:     "HELLO ",
236		outFull: "HELLO ",
237		t:       f(unicode.Latin, upperAtEOF{}, nil),
238	}, {
239		desc:    "large input ASCII",
240		szDst:   12000,
241		atEOF:   false,
242		in:      strings.Repeat("HELLO", 2000),
243		out:     strings.Repeat("hello", 2000),
244		outFull: strings.Repeat("hello", 2000),
245		errSpan: transform.ErrEndOfSpan,
246		err:     nil,
247		t:       lower,
248	}, {
249		desc:    "large input non-ASCII",
250		szDst:   12000,
251		atEOF:   false,
252		in:      strings.Repeat("\u3333", 2000),
253		out:     strings.Repeat("\u3333", 2000),
254		outFull: strings.Repeat("\u3333", 2000),
255		err:     nil,
256		t:       lower,
257	}} {
258		tt.check(t, i)
259	}
260}
261
262// upperAtEOF is a strange Transformer that converts text to uppercase, but only
263// if atEOF is true.
264type upperAtEOF struct{ transform.NopResetter }
265
266func (upperAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
267	if !atEOF {
268		return 0, 0, transform.ErrShortSrc
269	}
270	return toUpper.Transform(dst, src, atEOF)
271}
272
273func (upperAtEOF) Span(src []byte, atEOF bool) (n int, err error) {
274	if !atEOF {
275		return 0, transform.ErrShortSrc
276	}
277	return toUpper.Span(src, atEOF)
278}
279
280func BenchmarkConditional(b *testing.B) {
281	doBench(b, If(In(unicode.Hangul), transform.Nop, transform.Nop))
282}
283