1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cases
6
7import (
8	"strings"
9	"testing"
10	"unicode"
11
12	"golang.org/x/text/internal/testtext"
13	"golang.org/x/text/language"
14	"golang.org/x/text/transform"
15	"golang.org/x/text/unicode/norm"
16	"golang.org/x/text/unicode/rangetable"
17)
18
19// The following definitions are taken directly from Chapter 3 of The Unicode
20// Standard.
21
22func propCased(r rune) bool {
23	return propLower(r) || propUpper(r) || unicode.IsTitle(r)
24}
25
26func propLower(r rune) bool {
27	return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
28}
29
30func propUpper(r rune) bool {
31	return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
32}
33
34func propIgnore(r rune) bool {
35	if unicode.In(r, unicode.Mn, unicode.Me, unicode.Cf, unicode.Lm, unicode.Sk) {
36		return true
37	}
38	return caseIgnorable[r]
39}
40
41func hasBreakProp(r rune) bool {
42	// binary search over ranges
43	lo := 0
44	hi := len(breakProp)
45	for lo < hi {
46		m := lo + (hi-lo)/2
47		bp := &breakProp[m]
48		if bp.lo <= r && r <= bp.hi {
49			return true
50		}
51		if r < bp.lo {
52			hi = m
53		} else {
54			lo = m + 1
55		}
56	}
57	return false
58}
59
60func contextFromRune(r rune) *context {
61	c := context{dst: make([]byte, 128), src: []byte(string(r)), atEOF: true}
62	c.next()
63	return &c
64}
65
66func TestCaseProperties(t *testing.T) {
67	if unicode.Version != UnicodeVersion {
68		// Properties of existing code points may change by Unicode version, so
69		// we need to skip.
70		t.Skipf("Skipping as core Unicode version %s different than %s", unicode.Version, UnicodeVersion)
71	}
72	assigned := rangetable.Assigned(UnicodeVersion)
73	coreVersion := rangetable.Assigned(unicode.Version)
74	for r := rune(0); r <= lastRuneForTesting; r++ {
75		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
76			continue
77		}
78		c := contextFromRune(r)
79		if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want {
80			t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info)
81		}
82		// New letters may change case types, but existing case pairings should
83		// not change. See Case Pair Stability in
84		// https://unicode.org/policies/stability_policy.html.
85		if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) {
86			if got, want := c.info.isCased(), propCased(r); got != want {
87				t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info)
88			}
89			if got, want := c.caseType() == cUpper, propUpper(r); got != want {
90				t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info)
91			}
92			if got, want := c.caseType() == cLower, propLower(r); got != want {
93				t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info)
94			}
95		}
96		if got, want := c.info.isBreak(), hasBreakProp(r); got != want {
97			t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info)
98		}
99	}
100	// TODO: get title case from unicode file.
101}
102
103func TestMapping(t *testing.T) {
104	assigned := rangetable.Assigned(UnicodeVersion)
105	coreVersion := rangetable.Assigned(unicode.Version)
106	if coreVersion == nil {
107		coreVersion = assigned
108	}
109	apply := func(r rune, f func(c *context) bool) string {
110		c := contextFromRune(r)
111		f(c)
112		return string(c.dst[:c.pDst])
113	}
114
115	for r, tt := range special {
116		if got, want := apply(r, lower), tt.toLower; got != want {
117			t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want)
118		}
119		if got, want := apply(r, title), tt.toTitle; got != want {
120			t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want)
121		}
122		if got, want := apply(r, upper), tt.toUpper; got != want {
123			t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want)
124		}
125	}
126
127	for r := rune(0); r <= lastRuneForTesting; r++ {
128		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
129			continue
130		}
131		if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) {
132			continue
133		}
134		if _, ok := special[r]; ok {
135			continue
136		}
137		want := string(unicode.ToLower(r))
138		if got := apply(r, lower); got != want {
139			t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
140		}
141
142		want = string(unicode.ToUpper(r))
143		if got := apply(r, upper); got != want {
144			t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
145		}
146
147		want = string(unicode.ToTitle(r))
148		if got := apply(r, title); got != want {
149			t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
150		}
151	}
152}
153
154func runeFoldData(r rune) (x struct{ simple, full, special string }) {
155	x = foldMap[r]
156	if x.simple == "" {
157		x.simple = string(unicode.ToLower(r))
158	}
159	if x.full == "" {
160		x.full = string(unicode.ToLower(r))
161	}
162	if x.special == "" {
163		x.special = x.full
164	}
165	return
166}
167
168func TestFoldData(t *testing.T) {
169	assigned := rangetable.Assigned(UnicodeVersion)
170	coreVersion := rangetable.Assigned(unicode.Version)
171	if coreVersion == nil {
172		coreVersion = assigned
173	}
174	apply := func(r rune, f func(c *context) bool) (string, info) {
175		c := contextFromRune(r)
176		f(c)
177		return string(c.dst[:c.pDst]), c.info.cccType()
178	}
179	for r := rune(0); r <= lastRuneForTesting; r++ {
180		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
181			continue
182		}
183		x := runeFoldData(r)
184		if got, info := apply(r, foldFull); got != x.full {
185			t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info)
186		}
187		// TODO: special and simple.
188	}
189}
190
191func TestCCC(t *testing.T) {
192	assigned := rangetable.Assigned(UnicodeVersion)
193	normVersion := rangetable.Assigned(norm.Version)
194	for r := rune(0); r <= lastRuneForTesting; r++ {
195		if !unicode.In(r, assigned) || !unicode.In(r, normVersion) {
196			continue
197		}
198		c := contextFromRune(r)
199
200		p := norm.NFC.PropertiesString(string(r))
201		want := cccOther
202		switch p.CCC() {
203		case 0:
204			want = cccZero
205		case above:
206			want = cccAbove
207		}
208		if got := c.info.cccType(); got != want {
209			t.Errorf("%U: got %x; want %x", r, got, want)
210		}
211	}
212}
213
214func TestWordBreaks(t *testing.T) {
215	for _, tt := range breakTest {
216		testtext.Run(t, tt, func(t *testing.T) {
217			parts := strings.Split(tt, "|")
218			want := ""
219			for _, s := range parts {
220				found := false
221				// This algorithm implements title casing given word breaks
222				// as defined in the Unicode standard 3.13 R3.
223				for _, r := range s {
224					title := unicode.ToTitle(r)
225					lower := unicode.ToLower(r)
226					if !found && title != lower {
227						found = true
228						want += string(title)
229					} else {
230						want += string(lower)
231					}
232				}
233			}
234			src := strings.Join(parts, "")
235			got := Title(language.Und).String(src)
236			if got != want {
237				t.Errorf("got %q; want %q", got, want)
238			}
239		})
240	}
241}
242
243func TestContext(t *testing.T) {
244	tests := []struct {
245		desc       string
246		dstSize    int
247		atEOF      bool
248		src        string
249		out        string
250		nSrc       int
251		err        error
252		ops        string
253		prefixArg  string
254		prefixWant bool
255	}{{
256		desc:    "next: past end, atEOF, no checkpoint",
257		dstSize: 10,
258		atEOF:   true,
259		src:     "12",
260		out:     "",
261		nSrc:    2,
262		ops:     "next;next;next",
263		// Test that calling prefix with a non-empty argument when the buffer
264		// is depleted returns false.
265		prefixArg:  "x",
266		prefixWant: false,
267	}, {
268		desc:       "next: not at end, atEOF, no checkpoint",
269		dstSize:    10,
270		atEOF:      false,
271		src:        "12",
272		out:        "",
273		nSrc:       0,
274		err:        transform.ErrShortSrc,
275		ops:        "next;next",
276		prefixArg:  "",
277		prefixWant: true,
278	}, {
279		desc:       "next: past end, !atEOF, no checkpoint",
280		dstSize:    10,
281		atEOF:      false,
282		src:        "12",
283		out:        "",
284		nSrc:       0,
285		err:        transform.ErrShortSrc,
286		ops:        "next;next;next",
287		prefixArg:  "",
288		prefixWant: true,
289	}, {
290		desc:       "next: past end, !atEOF, checkpoint",
291		dstSize:    10,
292		atEOF:      false,
293		src:        "12",
294		out:        "",
295		nSrc:       2,
296		ops:        "next;next;checkpoint;next",
297		prefixArg:  "",
298		prefixWant: true,
299	}, {
300		desc:       "copy: exact count, atEOF, no checkpoint",
301		dstSize:    2,
302		atEOF:      true,
303		src:        "12",
304		out:        "12",
305		nSrc:       2,
306		ops:        "next;copy;next;copy;next",
307		prefixArg:  "",
308		prefixWant: true,
309	}, {
310		desc:       "copy: past end, !atEOF, no checkpoint",
311		dstSize:    2,
312		atEOF:      false,
313		src:        "12",
314		out:        "",
315		nSrc:       0,
316		err:        transform.ErrShortSrc,
317		ops:        "next;copy;next;copy;next",
318		prefixArg:  "",
319		prefixWant: true,
320	}, {
321		desc:       "copy: past end, !atEOF, checkpoint",
322		dstSize:    2,
323		atEOF:      false,
324		src:        "12",
325		out:        "12",
326		nSrc:       2,
327		ops:        "next;copy;next;copy;checkpoint;next",
328		prefixArg:  "",
329		prefixWant: true,
330	}, {
331		desc:       "copy: short dst",
332		dstSize:    1,
333		atEOF:      false,
334		src:        "12",
335		out:        "",
336		nSrc:       0,
337		err:        transform.ErrShortDst,
338		ops:        "next;copy;next;copy;checkpoint;next",
339		prefixArg:  "12",
340		prefixWant: false,
341	}, {
342		desc:       "copy: short dst, checkpointed",
343		dstSize:    1,
344		atEOF:      false,
345		src:        "12",
346		out:        "1",
347		nSrc:       1,
348		err:        transform.ErrShortDst,
349		ops:        "next;copy;checkpoint;next;copy;next",
350		prefixArg:  "",
351		prefixWant: true,
352	}, {
353		desc:       "writeString: simple",
354		dstSize:    3,
355		atEOF:      true,
356		src:        "1",
357		out:        "1ab",
358		nSrc:       1,
359		ops:        "next;copy;writeab;next",
360		prefixArg:  "",
361		prefixWant: true,
362	}, {
363		desc:       "writeString: short dst",
364		dstSize:    2,
365		atEOF:      true,
366		src:        "12",
367		out:        "",
368		nSrc:       0,
369		err:        transform.ErrShortDst,
370		ops:        "next;copy;writeab;next",
371		prefixArg:  "2",
372		prefixWant: true,
373	}, {
374		desc:       "writeString: simple",
375		dstSize:    3,
376		atEOF:      true,
377		src:        "12",
378		out:        "1ab",
379		nSrc:       2,
380		ops:        "next;copy;next;writeab;next",
381		prefixArg:  "",
382		prefixWant: true,
383	}, {
384		desc:       "writeString: short dst",
385		dstSize:    2,
386		atEOF:      true,
387		src:        "12",
388		out:        "",
389		nSrc:       0,
390		err:        transform.ErrShortDst,
391		ops:        "next;copy;next;writeab;next",
392		prefixArg:  "1",
393		prefixWant: false,
394	}, {
395		desc:    "prefix",
396		dstSize: 2,
397		atEOF:   true,
398		src:     "12",
399		out:     "",
400		nSrc:    0,
401		// Context will assign an ErrShortSrc if the input wasn't exhausted.
402		err:        transform.ErrShortSrc,
403		prefixArg:  "12",
404		prefixWant: true,
405	}}
406	for _, tt := range tests {
407		c := context{dst: make([]byte, tt.dstSize), src: []byte(tt.src), atEOF: tt.atEOF}
408
409		for _, op := range strings.Split(tt.ops, ";") {
410			switch op {
411			case "next":
412				c.next()
413			case "checkpoint":
414				c.checkpoint()
415			case "writeab":
416				c.writeString("ab")
417			case "copy":
418				c.copy()
419			case "":
420			default:
421				t.Fatalf("unknown op %q", op)
422			}
423		}
424		if got := c.hasPrefix(tt.prefixArg); got != tt.prefixWant {
425			t.Errorf("%s:\nprefix was %v; want %v", tt.desc, got, tt.prefixWant)
426		}
427		nDst, nSrc, err := c.ret()
428		if err != tt.err {
429			t.Errorf("%s:\nerror was %v; want %v", tt.desc, err, tt.err)
430		}
431		if out := string(c.dst[:nDst]); out != tt.out {
432			t.Errorf("%s:\nout was %q; want %q", tt.desc, out, tt.out)
433		}
434		if nSrc != tt.nSrc {
435			t.Errorf("%s:\nnSrc was %d; want %d", tt.desc, nSrc, tt.nSrc)
436		}
437	}
438}
439