1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package utf16_test
6
7import (
8	"reflect"
9	"testing"
10	"unicode"
11	. "unicode/utf16"
12)
13
14// Validate the constants redefined from unicode.
15func TestConstants(t *testing.T) {
16	if MaxRune != unicode.MaxRune {
17		t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune)
18	}
19	if ReplacementChar != unicode.ReplacementChar {
20		t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar)
21	}
22}
23
24type encodeTest struct {
25	in  []rune
26	out []uint16
27}
28
29var encodeTests = []encodeTest{
30	{[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
31	{[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
32		[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
33	{[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
34		[]uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
35}
36
37func TestEncode(t *testing.T) {
38	for _, tt := range encodeTests {
39		out := Encode(tt.in)
40		if !reflect.DeepEqual(out, tt.out) {
41			t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out)
42		}
43	}
44}
45
46func TestEncodeRune(t *testing.T) {
47	for i, tt := range encodeTests {
48		j := 0
49		for _, r := range tt.in {
50			r1, r2 := EncodeRune(r)
51			if r < 0x10000 || r > unicode.MaxRune {
52				if j >= len(tt.out) {
53					t.Errorf("#%d: ran out of tt.out", i)
54					break
55				}
56				if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
57					t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
58				}
59				j++
60			} else {
61				if j+1 >= len(tt.out) {
62					t.Errorf("#%d: ran out of tt.out", i)
63					break
64				}
65				if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) {
66					t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
67				}
68				j += 2
69				dec := DecodeRune(r1, r2)
70				if dec != r {
71					t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
72				}
73			}
74		}
75		if j != len(tt.out) {
76			t.Errorf("#%d: EncodeRune didn't generate enough output", i)
77		}
78	}
79}
80
81type decodeTest struct {
82	in  []uint16
83	out []rune
84}
85
86var decodeTests = []decodeTest{
87	{[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}},
88	{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
89		[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
90	{[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}},
91	{[]uint16{0xdfff}, []rune{0xfffd}},
92}
93
94func TestDecode(t *testing.T) {
95	for _, tt := range decodeTests {
96		out := Decode(tt.in)
97		if !reflect.DeepEqual(out, tt.out) {
98			t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out)
99		}
100	}
101}
102
103var decodeRuneTests = []struct {
104	r1, r2 rune
105	want   rune
106}{
107	{0xd800, 0xdc00, 0x10000},
108	{0xd800, 0xdc01, 0x10001},
109	{0xd808, 0xdf45, 0x12345},
110	{0xdbff, 0xdfff, 0x10ffff},
111	{0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted
112}
113
114func TestDecodeRune(t *testing.T) {
115	for i, tt := range decodeRuneTests {
116		got := DecodeRune(tt.r1, tt.r2)
117		if got != tt.want {
118			t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want)
119		}
120	}
121}
122
123var surrogateTests = []struct {
124	r    rune
125	want bool
126}{
127	// from http://en.wikipedia.org/wiki/UTF-16
128	{'\u007A', false},     // LATIN SMALL LETTER Z
129	{'\u6C34', false},     // CJK UNIFIED IDEOGRAPH-6C34 (water)
130	{'\uFEFF', false},     // Byte Order Mark
131	{'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point)
132	{'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF
133	{'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point)
134
135	{rune(0xd7ff), false}, // surr1-1
136	{rune(0xd800), true},  // surr1
137	{rune(0xdc00), true},  // surr2
138	{rune(0xe000), false}, // surr3
139	{rune(0xdfff), true},  // surr3-1
140}
141
142func TestIsSurrogate(t *testing.T) {
143	for i, tt := range surrogateTests {
144		got := IsSurrogate(tt.r)
145		if got != tt.want {
146			t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want)
147		}
148	}
149}
150
151func BenchmarkDecodeValidASCII(b *testing.B) {
152	// "hello world"
153	data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100}
154	for i := 0; i < b.N; i++ {
155		Decode(data)
156	}
157}
158
159func BenchmarkDecodeValidJapaneseChars(b *testing.B) {
160	// "日本語日本語日本語"
161	data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486}
162	for i := 0; i < b.N; i++ {
163		Decode(data)
164	}
165}
166
167func BenchmarkDecodeRune(b *testing.B) {
168	rs := make([]rune, 10)
169	// U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS
170	for i, u := range []rune{'��', '��', '��', '��', '��'} {
171		rs[2*i], rs[2*i+1] = EncodeRune(u)
172	}
173
174	b.ResetTimer()
175	for i := 0; i < b.N; i++ {
176		for j := 0; j < 5; j++ {
177			DecodeRune(rs[2*j], rs[2*j+1])
178		}
179	}
180}
181
182func BenchmarkEncodeValidASCII(b *testing.B) {
183	data := []rune{'h', 'e', 'l', 'l', 'o'}
184	for i := 0; i < b.N; i++ {
185		Encode(data)
186	}
187}
188
189func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
190	data := []rune{'日', '本', '語'}
191	for i := 0; i < b.N; i++ {
192		Encode(data)
193	}
194}
195
196func BenchmarkEncodeRune(b *testing.B) {
197	for i := 0; i < b.N; i++ {
198		for _, u := range []rune{'��', '��', '��', '��', '��'} {
199			EncodeRune(u)
200		}
201	}
202}
203