1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package width
6
7import (
8	"bytes"
9	"strings"
10	"testing"
11
12	"golang.org/x/text/internal/testtext"
13	"golang.org/x/text/transform"
14)
15
16func foldRune(r rune) (folded rune, ok bool) {
17	alt, ok := mapRunes[r]
18	if ok && alt.e&tagNeedsFold != 0 {
19		return alt.r, true
20	}
21	return r, false
22}
23
24func widenRune(r rune) (wide rune, ok bool) {
25	alt, ok := mapRunes[r]
26	if k := alt.e.kind(); k == EastAsianHalfwidth || k == EastAsianNarrow {
27		return alt.r, true
28	}
29	return r, false
30}
31
32func narrowRune(r rune) (narrow rune, ok bool) {
33	alt, ok := mapRunes[r]
34	if k := alt.e.kind(); k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous {
35		return alt.r, true
36	}
37	return r, false
38}
39
40func TestFoldSingleRunes(t *testing.T) {
41	for r := rune(0); r < 0x1FFFF; r++ {
42		if loSurrogate <= r && r <= hiSurrogate {
43			continue
44		}
45		x, _ := foldRune(r)
46		want := string(x)
47		got := Fold.String(string(r))
48		if got != want {
49			t.Errorf("Fold().String(%U) = %+q; want %+q", r, got, want)
50		}
51	}
52}
53
54type transformTest struct {
55	desc    string
56	src     string
57	nBuf    int
58	nDst    int
59	atEOF   bool
60	dst     string
61	nSrc    int
62	err     error
63	nSpan   int
64	errSpan error
65}
66
67func (tc *transformTest) doTest(t *testing.T, tr Transformer) {
68	testtext.Run(t, tc.desc, func(t *testing.T) {
69		b := make([]byte, tc.nBuf)
70		nDst, nSrc, err := tr.Transform(b, []byte(tc.src), tc.atEOF)
71		if got := string(b[:nDst]); got != tc.dst[:nDst] {
72			t.Errorf("dst was %+q; want %+q", got, tc.dst)
73		}
74		if nDst != tc.nDst {
75			t.Errorf("nDst was %d; want %d", nDst, tc.nDst)
76		}
77		if nSrc != tc.nSrc {
78			t.Errorf("nSrc was %d; want %d", nSrc, tc.nSrc)
79		}
80		if err != tc.err {
81			t.Errorf("error was %v; want %v", err, tc.err)
82		}
83		if got := tr.String(tc.src); got != tc.dst {
84			t.Errorf("String(%q) = %q; want %q", tc.src, got, tc.dst)
85		}
86		n, err := tr.Span([]byte(tc.src), tc.atEOF)
87		if n != tc.nSpan || err != tc.errSpan {
88			t.Errorf("Span: got %d, %v; want %d, %v", n, err, tc.nSpan, tc.errSpan)
89		}
90	})
91}
92
93func TestFold(t *testing.T) {
94	for _, tc := range []transformTest{{
95		desc:    "empty",
96		src:     "",
97		nBuf:    10,
98		dst:     "",
99		nDst:    0,
100		nSrc:    0,
101		atEOF:   false,
102		err:     nil,
103		nSpan:   0,
104		errSpan: nil,
105	}, {
106		desc:    "short source 1",
107		src:     "a\xc2",
108		nBuf:    10,
109		dst:     "a\xc2",
110		nDst:    1,
111		nSrc:    1,
112		atEOF:   false,
113		err:     transform.ErrShortSrc,
114		nSpan:   1,
115		errSpan: transform.ErrShortSrc,
116	}, {
117		desc:    "short source 2",
118		src:     "a\xe0\x80",
119		nBuf:    10,
120		dst:     "a\xe0\x80",
121		nDst:    1,
122		nSrc:    1,
123		atEOF:   false,
124		err:     transform.ErrShortSrc,
125		nSpan:   1,
126		errSpan: transform.ErrShortSrc,
127	}, {
128		desc:    "incomplete but terminated source 1",
129		src:     "a\xc2",
130		nBuf:    10,
131		dst:     "a\xc2",
132		nDst:    2,
133		nSrc:    2,
134		atEOF:   true,
135		err:     nil,
136		nSpan:   2,
137		errSpan: nil,
138	}, {
139		desc:    "incomplete but terminated source 2",
140		src:     "a\xe0\x80",
141		nBuf:    10,
142		dst:     "a\xe0\x80",
143		nDst:    3,
144		nSrc:    3,
145		atEOF:   true,
146		err:     nil,
147		nSpan:   3,
148		errSpan: nil,
149	}, {
150		desc:    "exact fit dst",
151		src:     "a\uff01",
152		nBuf:    2,
153		dst:     "a!",
154		nDst:    2,
155		nSrc:    4,
156		atEOF:   false,
157		err:     nil,
158		nSpan:   1,
159		errSpan: transform.ErrEndOfSpan,
160	}, {
161		desc:    "exact fit dst and src ascii",
162		src:     "ab",
163		nBuf:    2,
164		dst:     "ab",
165		nDst:    2,
166		nSrc:    2,
167		atEOF:   true,
168		err:     nil,
169		nSpan:   2,
170		errSpan: nil,
171	}, {
172		desc:    "empty dst",
173		src:     "\u0300",
174		nBuf:    0,
175		dst:     "\u0300",
176		nDst:    0,
177		nSrc:    0,
178		atEOF:   true,
179		err:     transform.ErrShortDst,
180		nSpan:   2,
181		errSpan: nil,
182	}, {
183		desc:    "empty dst ascii",
184		src:     "a",
185		nBuf:    0,
186		dst:     "a",
187		nDst:    0,
188		nSrc:    0,
189		atEOF:   true,
190		err:     transform.ErrShortDst,
191		nSpan:   1,
192		errSpan: nil,
193	}, {
194		desc:    "short dst 1",
195		src:     "a\uffe0", // ¢
196		nBuf:    2,
197		dst:     "a\u00a2", // ¢
198		nDst:    1,
199		nSrc:    1,
200		atEOF:   false,
201		err:     transform.ErrShortDst,
202		nSpan:   1,
203		errSpan: transform.ErrEndOfSpan,
204	}, {
205		desc:    "short dst 2",
206		src:     "不夠",
207		nBuf:    3,
208		dst:     "不夠",
209		nDst:    3,
210		nSrc:    3,
211		atEOF:   true,
212		err:     transform.ErrShortDst,
213		nSpan:   6,
214		errSpan: nil,
215	}, {
216		desc:    "short dst fast path",
217		src:     "fast",
218		nDst:    3,
219		dst:     "fast",
220		nBuf:    3,
221		nSrc:    3,
222		atEOF:   true,
223		err:     transform.ErrShortDst,
224		nSpan:   4,
225		errSpan: nil,
226	}, {
227		desc:    "short dst larger buffer",
228		src:     "\uff21" + strings.Repeat("0", 127) + "B",
229		nBuf:    128,
230		dst:     "A" + strings.Repeat("0", 127) + "B",
231		nDst:    128,
232		nSrc:    130,
233		atEOF:   true,
234		err:     transform.ErrShortDst,
235		nSpan:   0,
236		errSpan: transform.ErrEndOfSpan,
237	}, {
238		desc:    "fast path alternation",
239		src:     "fast路徑fast路徑",
240		nBuf:    20,
241		dst:     "fast路徑fast路徑",
242		nDst:    20,
243		nSrc:    20,
244		atEOF:   true,
245		err:     nil,
246		nSpan:   20,
247		errSpan: nil,
248	}} {
249		tc.doTest(t, Fold)
250	}
251}
252
253func TestWidenSingleRunes(t *testing.T) {
254	for r := rune(0); r < 0x1FFFF; r++ {
255		if loSurrogate <= r && r <= hiSurrogate {
256			continue
257		}
258		alt, _ := widenRune(r)
259		want := string(alt)
260		got := Widen.String(string(r))
261		if got != want {
262			t.Errorf("Widen().String(%U) = %+q; want %+q", r, got, want)
263		}
264	}
265}
266
267func TestWiden(t *testing.T) {
268	for _, tc := range []transformTest{{
269		desc:    "empty",
270		src:     "",
271		nBuf:    10,
272		dst:     "",
273		nDst:    0,
274		nSrc:    0,
275		atEOF:   false,
276		err:     nil,
277		nSpan:   0,
278		errSpan: nil,
279	}, {
280		desc:    "short source 1",
281		src:     "a\xc2",
282		nBuf:    10,
283		dst:     "a\xc2",
284		nDst:    3,
285		nSrc:    1,
286		atEOF:   false,
287		err:     transform.ErrShortSrc,
288		nSpan:   0,
289		errSpan: transform.ErrEndOfSpan,
290	}, {
291		desc:    "short source 2",
292		src:     "a\xe0\x80",
293		nBuf:    10,
294		dst:     "a\xe0\x80",
295		nDst:    3,
296		nSrc:    1,
297		atEOF:   false,
298		err:     transform.ErrShortSrc,
299		nSpan:   0,
300		errSpan: transform.ErrEndOfSpan,
301	}, {
302		desc:    "incomplete but terminated source 1",
303		src:     "a\xc2",
304		nBuf:    10,
305		dst:     "a\xc2",
306		nDst:    4,
307		nSrc:    2,
308		atEOF:   true,
309		err:     nil,
310		nSpan:   0,
311		errSpan: transform.ErrEndOfSpan,
312	}, {
313		desc:    "incomplete but terminated source 2",
314		src:     "a\xe0\x80",
315		nBuf:    10,
316		dst:     "a\xe0\x80",
317		nDst:    5,
318		nSrc:    3,
319		atEOF:   true,
320		err:     nil,
321		nSpan:   0,
322		errSpan: transform.ErrEndOfSpan,
323	}, {
324		desc:    "short source 1 some span",
325		src:     "a\xc2",
326		nBuf:    10,
327		dst:     "a\xc2",
328		nDst:    3,
329		nSrc:    3,
330		atEOF:   false,
331		err:     transform.ErrShortSrc,
332		nSpan:   3,
333		errSpan: transform.ErrShortSrc,
334	}, {
335		desc:    "short source 2 some span",
336		src:     "a\xe0\x80",
337		nBuf:    10,
338		dst:     "a\xe0\x80",
339		nDst:    3,
340		nSrc:    3,
341		atEOF:   false,
342		err:     transform.ErrShortSrc,
343		nSpan:   3,
344		errSpan: transform.ErrShortSrc,
345	}, {
346		desc:    "incomplete but terminated source 1 some span",
347		src:     "a\xc2",
348		nBuf:    10,
349		dst:     "a\xc2",
350		nDst:    4,
351		nSrc:    4,
352		atEOF:   true,
353		err:     nil,
354		nSpan:   4,
355		errSpan: nil,
356	}, {
357		desc:    "incomplete but terminated source 2 some span",
358		src:     "a\xe0\x80",
359		nBuf:    10,
360		dst:     "a\xe0\x80",
361		nDst:    5,
362		nSrc:    5,
363		atEOF:   true,
364		err:     nil,
365		nSpan:   5,
366		errSpan: nil,
367	}, {
368		desc:    "exact fit dst",
369		src:     "a!",
370		nBuf:    6,
371		dst:     "a\uff01",
372		nDst:    6,
373		nSrc:    2,
374		atEOF:   false,
375		err:     nil,
376		nSpan:   0,
377		errSpan: transform.ErrEndOfSpan,
378	}, {
379		desc:    "empty dst",
380		src:     "\u0300",
381		nBuf:    0,
382		dst:     "\u0300",
383		nDst:    0,
384		nSrc:    0,
385		atEOF:   true,
386		err:     transform.ErrShortDst,
387		nSpan:   2,
388		errSpan: nil,
389	}, {
390		desc:    "empty dst ascii",
391		src:     "a",
392		nBuf:    0,
393		dst:     "a",
394		nDst:    0,
395		nSrc:    0,
396		atEOF:   true,
397		err:     transform.ErrShortDst,
398		nSpan:   0,
399		errSpan: transform.ErrEndOfSpan,
400	}, {
401		desc:    "short dst 1",
402		src:     "a\uffe0",
403		nBuf:    4,
404		dst:     "a\uffe0",
405		nDst:    3,
406		nSrc:    1,
407		atEOF:   false,
408		err:     transform.ErrShortDst,
409		nSpan:   0,
410		errSpan: transform.ErrEndOfSpan,
411	}, {
412		desc:    "short dst 2",
413		src:     "不夠",
414		nBuf:    3,
415		dst:     "不夠",
416		nDst:    3,
417		nSrc:    3,
418		atEOF:   true,
419		err:     transform.ErrShortDst,
420		nSpan:   6,
421		errSpan: nil,
422	}, {
423		desc:    "short dst ascii",
424		src:     "ascii",
425		nBuf:    3,
426		dst:     "ascii", // U+ff41, ...
427		nDst:    3,
428		nSrc:    1,
429		atEOF:   true,
430		err:     transform.ErrShortDst,
431		nSpan:   0,
432		errSpan: transform.ErrEndOfSpan,
433	}, {
434		desc:    "ambiguous",
435		src:     "\uffe9",
436		nBuf:    4,
437		dst:     "\u2190",
438		nDst:    3,
439		nSrc:    3,
440		atEOF:   false,
441		err:     nil,
442		nSpan:   0,
443		errSpan: transform.ErrEndOfSpan,
444	}} {
445		tc.doTest(t, Widen)
446	}
447}
448
449func TestNarrowSingleRunes(t *testing.T) {
450	for r := rune(0); r < 0x1FFFF; r++ {
451		if loSurrogate <= r && r <= hiSurrogate {
452			continue
453		}
454		alt, _ := narrowRune(r)
455		want := string(alt)
456		got := Narrow.String(string(r))
457		if got != want {
458			t.Errorf("Narrow().String(%U) = %+q; want %+q", r, got, want)
459		}
460	}
461}
462
463func TestNarrow(t *testing.T) {
464	for _, tc := range []transformTest{{
465		desc:    "empty",
466		src:     "",
467		nBuf:    10,
468		dst:     "",
469		nDst:    0,
470		nSrc:    0,
471		atEOF:   false,
472		err:     nil,
473		nSpan:   0,
474		errSpan: nil,
475	}, {
476		desc:    "short source 1",
477		src:     "a\xc2",
478		nBuf:    10,
479		dst:     "a\xc2",
480		nDst:    1,
481		nSrc:    1,
482		atEOF:   false,
483		err:     transform.ErrShortSrc,
484		nSpan:   1,
485		errSpan: transform.ErrShortSrc,
486	}, {
487		desc:    "short source 2",
488		src:     "a\xe0\x80",
489		nBuf:    10,
490		dst:     "a\xe0\x80",
491		nDst:    1,
492		nSrc:    3,
493		atEOF:   false,
494		err:     transform.ErrShortSrc,
495		nSpan:   0,
496		errSpan: transform.ErrEndOfSpan,
497	}, {
498		desc:    "incomplete but terminated source 1",
499		src:     "a\xc2",
500		nBuf:    10,
501		dst:     "a\xc2",
502		nDst:    2,
503		nSrc:    4,
504		atEOF:   true,
505		err:     nil,
506		nSpan:   0,
507		errSpan: transform.ErrEndOfSpan,
508	}, {
509		desc:    "incomplete but terminated source 2",
510		src:     "a\xe0\x80",
511		nBuf:    10,
512		dst:     "a\xe0\x80",
513		nDst:    3,
514		nSrc:    5,
515		atEOF:   true,
516		err:     nil,
517		nSpan:   0,
518		errSpan: transform.ErrEndOfSpan,
519	}, {
520		desc:    "exact fit dst",
521		src:     "a\uff01",
522		nBuf:    2,
523		dst:     "a!",
524		nDst:    2,
525		nSrc:    6,
526		atEOF:   false,
527		err:     nil,
528		nSpan:   0,
529		errSpan: transform.ErrEndOfSpan,
530	}, {
531		desc:    "exact fit dst some span",
532		src:     "a\uff01",
533		nBuf:    2,
534		dst:     "a!",
535		nDst:    2,
536		nSrc:    4,
537		atEOF:   false,
538		err:     nil,
539		nSpan:   1,
540		errSpan: transform.ErrEndOfSpan,
541	}, {
542		desc:    "empty dst",
543		src:     "\u0300",
544		nBuf:    0,
545		dst:     "\u0300",
546		nDst:    0,
547		nSrc:    0,
548		atEOF:   true,
549		err:     transform.ErrShortDst,
550		nSpan:   2,
551		errSpan: nil,
552	}, {
553		desc:    "empty dst ascii",
554		src:     "a",
555		nBuf:    0,
556		dst:     "a",
557		nDst:    0,
558		nSrc:    0,
559		atEOF:   true,
560		err:     transform.ErrShortDst,
561		nSpan:   1,
562		errSpan: nil,
563	}, {
564		desc:    "short dst 1",
565		src:     "a\uffe0", // ¢
566		nBuf:    2,
567		dst:     "a\u00a2", // ¢
568		nDst:    1,
569		nSrc:    3,
570		atEOF:   false,
571		err:     transform.ErrShortDst,
572		nSpan:   0,
573		errSpan: transform.ErrEndOfSpan,
574	}, {
575		desc:    "short dst 2",
576		src:     "不夠",
577		nBuf:    3,
578		dst:     "不夠",
579		nDst:    3,
580		nSrc:    3,
581		atEOF:   true,
582		err:     transform.ErrShortDst,
583		nSpan:   6,
584		errSpan: nil,
585	}, {
586		// Create a narrow variant of ambiguous runes, if they exist.
587		desc:    "ambiguous",
588		src:     "\u2190",
589		nBuf:    4,
590		dst:     "\uffe9",
591		nDst:    3,
592		nSrc:    3,
593		atEOF:   false,
594		err:     nil,
595		nSpan:   0,
596		errSpan: transform.ErrEndOfSpan,
597	}, {
598		desc:    "short dst fast path",
599		src:     "fast",
600		nBuf:    3,
601		dst:     "fast",
602		nDst:    3,
603		nSrc:    3,
604		atEOF:   true,
605		err:     transform.ErrShortDst,
606		nSpan:   4,
607		errSpan: nil,
608	}, {
609		desc:    "short dst larger buffer",
610		src:     "\uff21" + strings.Repeat("0", 127) + "B",
611		nBuf:    128,
612		dst:     "A" + strings.Repeat("0", 127) + "B",
613		nDst:    128,
614		nSrc:    130,
615		atEOF:   true,
616		err:     transform.ErrShortDst,
617		nSpan:   0,
618		errSpan: transform.ErrEndOfSpan,
619	}, {
620		desc:    "fast path alternation",
621		src:     "fast路徑fast路徑",
622		nBuf:    20,
623		dst:     "fast路徑fast路徑",
624		nDst:    20,
625		nSrc:    20,
626		atEOF:   true,
627		err:     nil,
628		nSpan:   20,
629		errSpan: nil,
630	}} {
631		tc.doTest(t, Narrow)
632	}
633}
634
635func bench(b *testing.B, t Transformer, s string) {
636	dst := make([]byte, 1024)
637	src := []byte(s)
638	b.SetBytes(int64(len(src)))
639	b.ResetTimer()
640	for i := 0; i < b.N; i++ {
641		t.Transform(dst, src, true)
642	}
643}
644
645func changingRunes(f func(r rune) (rune, bool)) string {
646	buf := &bytes.Buffer{}
647	for r := rune(0); r <= 0xFFFF; r++ {
648		if _, ok := foldRune(r); ok {
649			buf.WriteRune(r)
650		}
651	}
652	return buf.String()
653}
654
655func BenchmarkFoldASCII(b *testing.B) {
656	bench(b, Fold, testtext.ASCII)
657}
658
659func BenchmarkFoldCJK(b *testing.B) {
660	bench(b, Fold, testtext.CJK)
661}
662
663func BenchmarkFoldNonCanonical(b *testing.B) {
664	bench(b, Fold, changingRunes(foldRune))
665}
666
667func BenchmarkFoldOther(b *testing.B) {
668	bench(b, Fold, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
669}
670
671func BenchmarkWideASCII(b *testing.B) {
672	bench(b, Widen, testtext.ASCII)
673}
674
675func BenchmarkWideCJK(b *testing.B) {
676	bench(b, Widen, testtext.CJK)
677}
678
679func BenchmarkWideNonCanonical(b *testing.B) {
680	bench(b, Widen, changingRunes(widenRune))
681}
682
683func BenchmarkWideOther(b *testing.B) {
684	bench(b, Widen, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
685}
686
687func BenchmarkNarrowASCII(b *testing.B) {
688	bench(b, Narrow, testtext.ASCII)
689}
690
691func BenchmarkNarrowCJK(b *testing.B) {
692	bench(b, Narrow, testtext.CJK)
693}
694
695func BenchmarkNarrowNonCanonical(b *testing.B) {
696	bench(b, Narrow, changingRunes(narrowRune))
697}
698
699func BenchmarkNarrowOther(b *testing.B) {
700	bench(b, Narrow, testtext.TwoByteUTF8+testtext.ThreeByteUTF8)
701}
702