1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/char_iterator.h"
6
7 #include "base/strings/utf_string_conversions.h"
8 #include "testing/gtest/include/gtest/gtest.h"
9
10 namespace base {
11 namespace i18n {
12
13 // This test string contains 4 characters:
14 // x
15 // u with circumflex - 2 bytes in UTF8, 1 codeword in UTF16
16 // math double-struck A - 4 bytes in UTF8, 2 codewords in UTF16
17 // z
18 static const char* const kTestString = "x\u00FB\U0001D538z";
19
TEST(CharIteratorsTest,TestUTF8)20 TEST(CharIteratorsTest, TestUTF8) {
21 std::string empty;
22 UTF8CharIterator empty_iter(&empty);
23 EXPECT_TRUE(empty_iter.end());
24 EXPECT_EQ(0, empty_iter.array_pos());
25 EXPECT_EQ(0, empty_iter.char_pos());
26 EXPECT_FALSE(empty_iter.Advance());
27
28 std::string str("s\303\273r"); // [u with circumflex]
29 UTF8CharIterator iter(&str);
30 EXPECT_FALSE(iter.end());
31 EXPECT_EQ(0, iter.array_pos());
32 EXPECT_EQ(0, iter.char_pos());
33 EXPECT_EQ('s', iter.get());
34 EXPECT_TRUE(iter.Advance());
35
36 EXPECT_FALSE(iter.end());
37 EXPECT_EQ(1, iter.array_pos());
38 EXPECT_EQ(1, iter.char_pos());
39 EXPECT_EQ(251, iter.get());
40 EXPECT_TRUE(iter.Advance());
41
42 EXPECT_FALSE(iter.end());
43 EXPECT_EQ(3, iter.array_pos());
44 EXPECT_EQ(2, iter.char_pos());
45 EXPECT_EQ('r', iter.get());
46 EXPECT_TRUE(iter.Advance());
47
48 EXPECT_TRUE(iter.end());
49 EXPECT_EQ(4, iter.array_pos());
50 EXPECT_EQ(3, iter.char_pos());
51
52 // Don't care what it returns, but this shouldn't crash
53 iter.get();
54
55 EXPECT_FALSE(iter.Advance());
56 }
57
TEST(CharIteratorsTest,TestUTF16_Empty)58 TEST(CharIteratorsTest, TestUTF16_Empty) {
59 string16 empty;
60 UTF16CharIterator empty_iter(&empty);
61 EXPECT_TRUE(empty_iter.end());
62 EXPECT_TRUE(empty_iter.start());
63 EXPECT_EQ(0, empty_iter.array_pos());
64 EXPECT_EQ(0, empty_iter.char_offset());
65 EXPECT_FALSE(empty_iter.Advance());
66
67 // These shouldn't crash.
68 empty_iter.get();
69 empty_iter.NextCodePoint();
70 empty_iter.PreviousCodePoint();
71 }
72
TEST(CharIteratorsTest,TestUTF16)73 TEST(CharIteratorsTest, TestUTF16) {
74 string16 str = UTF8ToUTF16(kTestString);
75 UTF16CharIterator iter(&str);
76 EXPECT_FALSE(iter.end());
77 EXPECT_TRUE(iter.start());
78 EXPECT_EQ(0, iter.array_pos());
79 EXPECT_EQ(0, iter.char_offset());
80 EXPECT_EQ('x', iter.get());
81 // This shouldn't crash.
82 iter.PreviousCodePoint();
83 EXPECT_EQ(0xFB, iter.NextCodePoint());
84 EXPECT_TRUE(iter.Advance());
85
86 EXPECT_FALSE(iter.end());
87 EXPECT_FALSE(iter.start());
88 EXPECT_EQ(1, iter.array_pos());
89 EXPECT_EQ(1, iter.char_offset());
90 EXPECT_EQ(0xFB, iter.get());
91 EXPECT_EQ('x', iter.PreviousCodePoint());
92 EXPECT_EQ(0x1D538, iter.NextCodePoint());
93 EXPECT_TRUE(iter.Advance());
94
95 EXPECT_FALSE(iter.end());
96 EXPECT_FALSE(iter.start());
97 EXPECT_EQ(2, iter.array_pos());
98 EXPECT_EQ(2, iter.char_offset());
99 EXPECT_EQ(0x1D538, iter.get());
100 EXPECT_EQ(0xFB, iter.PreviousCodePoint());
101 EXPECT_EQ('z', iter.NextCodePoint());
102 EXPECT_TRUE(iter.Advance());
103
104 EXPECT_FALSE(iter.end());
105 EXPECT_FALSE(iter.start());
106 EXPECT_EQ(4, iter.array_pos());
107 EXPECT_EQ(3, iter.char_offset());
108 EXPECT_EQ('z', iter.get());
109 EXPECT_EQ(0x1D538, iter.PreviousCodePoint());
110 // This shouldn't crash.
111 iter.NextCodePoint();
112 EXPECT_TRUE(iter.Advance());
113
114 EXPECT_TRUE(iter.end());
115 EXPECT_FALSE(iter.start());
116 EXPECT_EQ(5, iter.array_pos());
117 EXPECT_EQ(4, iter.char_offset());
118 EXPECT_EQ('z', iter.PreviousCodePoint());
119
120 // Don't care what it returns, but these shouldn't crash
121 iter.get();
122 iter.NextCodePoint();
123
124 EXPECT_FALSE(iter.Advance());
125 }
126
TEST(CharIteratorsTest,TestUTF16_Rewind)127 TEST(CharIteratorsTest, TestUTF16_Rewind) {
128 string16 str = UTF8ToUTF16(kTestString);
129
130 // It is valid for the starting array index to be on the terminating null
131 // character; in fact, this is where end() reports true. So we'll start on the
132 // terminator for this test so we can check the behavior of end().
133 UTF16CharIterator iter = UTF16CharIterator::UpperBound(&str, str.length());
134 EXPECT_TRUE(iter.end());
135 EXPECT_FALSE(iter.start());
136 // This is the index of the terminating null character, and the length of the
137 // string in char16s.
138 EXPECT_EQ(5, iter.array_pos());
139 EXPECT_EQ(0, iter.char_offset());
140 EXPECT_EQ('z', iter.PreviousCodePoint());
141 // Don't care what it returns, but these shouldn't crash
142 iter.get();
143 iter.NextCodePoint();
144 EXPECT_TRUE(iter.Rewind());
145
146 EXPECT_FALSE(iter.end());
147 EXPECT_FALSE(iter.start());
148 EXPECT_EQ(4, iter.array_pos());
149 EXPECT_EQ(-1, iter.char_offset());
150 EXPECT_EQ('z', iter.get());
151 EXPECT_EQ(0x1D538, iter.PreviousCodePoint());
152 // This shouldn't crash.
153 iter.NextCodePoint();
154 EXPECT_TRUE(iter.Rewind());
155
156 EXPECT_FALSE(iter.end());
157 EXPECT_FALSE(iter.start());
158 EXPECT_EQ(2, iter.array_pos());
159 EXPECT_EQ(-2, iter.char_offset());
160 EXPECT_EQ(0x1D538, iter.get());
161 EXPECT_EQ(0xFB, iter.PreviousCodePoint());
162 EXPECT_EQ('z', iter.NextCodePoint());
163 EXPECT_TRUE(iter.Rewind());
164
165 EXPECT_FALSE(iter.end());
166 EXPECT_FALSE(iter.start());
167 EXPECT_EQ(1, iter.array_pos());
168 EXPECT_EQ(-3, iter.char_offset());
169 EXPECT_EQ(0xFB, iter.get());
170 EXPECT_EQ('x', iter.PreviousCodePoint());
171 EXPECT_EQ(0x1D538, iter.NextCodePoint());
172 EXPECT_TRUE(iter.Rewind());
173
174 EXPECT_FALSE(iter.end());
175 EXPECT_TRUE(iter.start());
176 EXPECT_EQ(0, iter.array_pos());
177 EXPECT_EQ(-4, iter.char_offset());
178 EXPECT_EQ('x', iter.get());
179 EXPECT_EQ(0xFB, iter.NextCodePoint());
180 // This shouldn't crash.
181 iter.PreviousCodePoint();
182
183 EXPECT_FALSE(iter.Rewind());
184 }
185
TEST(CharIteratorsTest,TestUTF16_UpperBound)186 TEST(CharIteratorsTest, TestUTF16_UpperBound) {
187 string16 str = UTF8ToUTF16(kTestString);
188 ASSERT_EQ(0, UTF16CharIterator::UpperBound(&str, 0).array_pos());
189 ASSERT_EQ(1, UTF16CharIterator::UpperBound(&str, 1).array_pos());
190 ASSERT_EQ(2, UTF16CharIterator::UpperBound(&str, 2).array_pos());
191 ASSERT_EQ(4, UTF16CharIterator::UpperBound(&str, 3).array_pos());
192 ASSERT_EQ(4, UTF16CharIterator::UpperBound(&str, 4).array_pos());
193 ASSERT_EQ(5, UTF16CharIterator::UpperBound(&str, 5).array_pos());
194 }
195
TEST(CharIteratorsTest,TestUTF16_LowerBound)196 TEST(CharIteratorsTest, TestUTF16_LowerBound) {
197 string16 str = UTF8ToUTF16(kTestString);
198 ASSERT_EQ(0, UTF16CharIterator::LowerBound(&str, 0).array_pos());
199 ASSERT_EQ(1, UTF16CharIterator::LowerBound(&str, 1).array_pos());
200 ASSERT_EQ(2, UTF16CharIterator::LowerBound(&str, 2).array_pos());
201 ASSERT_EQ(2, UTF16CharIterator::LowerBound(&str, 3).array_pos());
202 ASSERT_EQ(4, UTF16CharIterator::LowerBound(&str, 4).array_pos());
203 ASSERT_EQ(5, UTF16CharIterator::LowerBound(&str, 5).array_pos());
204 }
205
206 } // namespace i18n
207 } // namespace base
208