1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package utf16_test 6 7import ( 8 "reflect" 9 "testing" 10 "unicode" 11 . "unicode/utf16" 12) 13 14// Validate the constants redefined from unicode. 15func TestConstants(t *testing.T) { 16 if MaxRune != unicode.MaxRune { 17 t.Errorf("utf16.maxRune is wrong: %x should be %x", MaxRune, unicode.MaxRune) 18 } 19 if ReplacementChar != unicode.ReplacementChar { 20 t.Errorf("utf16.replacementChar is wrong: %x should be %x", ReplacementChar, unicode.ReplacementChar) 21 } 22} 23 24type encodeTest struct { 25 in []rune 26 out []uint16 27} 28 29var encodeTests = []encodeTest{ 30 {[]rune{1, 2, 3, 4}, []uint16{1, 2, 3, 4}}, 31 {[]rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}, 32 []uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}}, 33 {[]rune{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1}, 34 []uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}}, 35} 36 37func TestEncode(t *testing.T) { 38 for _, tt := range encodeTests { 39 out := Encode(tt.in) 40 if !reflect.DeepEqual(out, tt.out) { 41 t.Errorf("Encode(%x) = %x; want %x", tt.in, out, tt.out) 42 } 43 } 44} 45 46func TestEncodeRune(t *testing.T) { 47 for i, tt := range encodeTests { 48 j := 0 49 for _, r := range tt.in { 50 r1, r2 := EncodeRune(r) 51 if r < 0x10000 || r > unicode.MaxRune { 52 if j >= len(tt.out) { 53 t.Errorf("#%d: ran out of tt.out", i) 54 break 55 } 56 if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar { 57 t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2) 58 } 59 j++ 60 } else { 61 if j+1 >= len(tt.out) { 62 t.Errorf("#%d: ran out of tt.out", i) 63 break 64 } 65 if r1 != rune(tt.out[j]) || r2 != rune(tt.out[j+1]) { 66 t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1]) 67 } 68 j += 2 69 dec := DecodeRune(r1, r2) 70 if dec != r { 71 t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r) 72 } 73 } 74 } 75 if j != len(tt.out) { 76 t.Errorf("#%d: EncodeRune didn't generate enough output", i) 77 } 78 } 79} 80 81type decodeTest struct { 82 in []uint16 83 out []rune 84} 85 86var decodeTests = []decodeTest{ 87 {[]uint16{1, 2, 3, 4}, []rune{1, 2, 3, 4}}, 88 {[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}, 89 []rune{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}}, 90 {[]uint16{0xd800, 'a'}, []rune{0xfffd, 'a'}}, 91 {[]uint16{0xdfff}, []rune{0xfffd}}, 92} 93 94func TestDecode(t *testing.T) { 95 for _, tt := range decodeTests { 96 out := Decode(tt.in) 97 if !reflect.DeepEqual(out, tt.out) { 98 t.Errorf("Decode(%x) = %x; want %x", tt.in, out, tt.out) 99 } 100 } 101} 102 103var decodeRuneTests = []struct { 104 r1, r2 rune 105 want rune 106}{ 107 {0xd800, 0xdc00, 0x10000}, 108 {0xd800, 0xdc01, 0x10001}, 109 {0xd808, 0xdf45, 0x12345}, 110 {0xdbff, 0xdfff, 0x10ffff}, 111 {0xd800, 'a', 0xfffd}, // illegal, replacement rune substituted 112} 113 114func TestDecodeRune(t *testing.T) { 115 for i, tt := range decodeRuneTests { 116 got := DecodeRune(tt.r1, tt.r2) 117 if got != tt.want { 118 t.Errorf("%d: DecodeRune(%q, %q) = %v; want %v", i, tt.r1, tt.r2, got, tt.want) 119 } 120 } 121} 122 123var surrogateTests = []struct { 124 r rune 125 want bool 126}{ 127 // from http://en.wikipedia.org/wiki/UTF-16 128 {'\u007A', false}, // LATIN SMALL LETTER Z 129 {'\u6C34', false}, // CJK UNIFIED IDEOGRAPH-6C34 (water) 130 {'\uFEFF', false}, // Byte Order Mark 131 {'\U00010000', false}, // LINEAR B SYLLABLE B008 A (first non-BMP code point) 132 {'\U0001D11E', false}, // MUSICAL SYMBOL G CLEF 133 {'\U0010FFFD', false}, // PRIVATE USE CHARACTER-10FFFD (last Unicode code point) 134 135 {rune(0xd7ff), false}, // surr1-1 136 {rune(0xd800), true}, // surr1 137 {rune(0xdc00), true}, // surr2 138 {rune(0xe000), false}, // surr3 139 {rune(0xdfff), true}, // surr3-1 140} 141 142func TestIsSurrogate(t *testing.T) { 143 for i, tt := range surrogateTests { 144 got := IsSurrogate(tt.r) 145 if got != tt.want { 146 t.Errorf("%d: IsSurrogate(%q) = %v; want %v", i, tt.r, got, tt.want) 147 } 148 } 149} 150 151func BenchmarkDecodeValidASCII(b *testing.B) { 152 // "hello world" 153 data := []uint16{104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100} 154 for i := 0; i < b.N; i++ { 155 Decode(data) 156 } 157} 158 159func BenchmarkDecodeValidJapaneseChars(b *testing.B) { 160 // "日本語日本語日本語" 161 data := []uint16{26085, 26412, 35486, 26085, 26412, 35486, 26085, 26412, 35486} 162 for i := 0; i < b.N; i++ { 163 Decode(data) 164 } 165} 166 167func BenchmarkDecodeRune(b *testing.B) { 168 rs := make([]rune, 10) 169 // U+1D4D0 to U+1D4D4: MATHEMATICAL BOLD SCRIPT CAPITAL LETTERS 170 for i, u := range []rune{'', '', '', '', ''} { 171 rs[2*i], rs[2*i+1] = EncodeRune(u) 172 } 173 174 b.ResetTimer() 175 for i := 0; i < b.N; i++ { 176 for j := 0; j < 5; j++ { 177 DecodeRune(rs[2*j], rs[2*j+1]) 178 } 179 } 180} 181 182func BenchmarkEncodeValidASCII(b *testing.B) { 183 data := []rune{'h', 'e', 'l', 'l', 'o'} 184 for i := 0; i < b.N; i++ { 185 Encode(data) 186 } 187} 188 189func BenchmarkEncodeValidJapaneseChars(b *testing.B) { 190 data := []rune{'日', '本', '語'} 191 for i := 0; i < b.N; i++ { 192 Encode(data) 193 } 194} 195 196func BenchmarkEncodeRune(b *testing.B) { 197 for i := 0; i < b.N; i++ { 198 for _, u := range []rune{'', '', '', '', ''} { 199 EncodeRune(u) 200 } 201 } 202} 203