1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/char_iterator.h"
6 
7 #include "base/strings/utf_string_conversions.h"
8 #include "testing/gtest/include/gtest/gtest.h"
9 
10 namespace base {
11 namespace i18n {
12 
13 // This test string contains 4 characters:
14 //   x
15 //   u with circumflex - 2 bytes in UTF8, 1 codeword in UTF16
16 //   math double-struck A - 4 bytes in UTF8, 2 codewords in UTF16
17 //   z
18 static const char* const kTestString = "x\u00FB\U0001D538z";
19 
TEST(CharIteratorsTest,TestUTF8)20 TEST(CharIteratorsTest, TestUTF8) {
21   std::string empty;
22   UTF8CharIterator empty_iter(&empty);
23   EXPECT_TRUE(empty_iter.end());
24   EXPECT_EQ(0, empty_iter.array_pos());
25   EXPECT_EQ(0, empty_iter.char_pos());
26   EXPECT_FALSE(empty_iter.Advance());
27 
28   std::string str("s\303\273r");  // [u with circumflex]
29   UTF8CharIterator iter(&str);
30   EXPECT_FALSE(iter.end());
31   EXPECT_EQ(0, iter.array_pos());
32   EXPECT_EQ(0, iter.char_pos());
33   EXPECT_EQ('s', iter.get());
34   EXPECT_TRUE(iter.Advance());
35 
36   EXPECT_FALSE(iter.end());
37   EXPECT_EQ(1, iter.array_pos());
38   EXPECT_EQ(1, iter.char_pos());
39   EXPECT_EQ(251, iter.get());
40   EXPECT_TRUE(iter.Advance());
41 
42   EXPECT_FALSE(iter.end());
43   EXPECT_EQ(3, iter.array_pos());
44   EXPECT_EQ(2, iter.char_pos());
45   EXPECT_EQ('r', iter.get());
46   EXPECT_TRUE(iter.Advance());
47 
48   EXPECT_TRUE(iter.end());
49   EXPECT_EQ(4, iter.array_pos());
50   EXPECT_EQ(3, iter.char_pos());
51 
52   // Don't care what it returns, but this shouldn't crash
53   iter.get();
54 
55   EXPECT_FALSE(iter.Advance());
56 }
57 
TEST(CharIteratorsTest,TestUTF16_Empty)58 TEST(CharIteratorsTest, TestUTF16_Empty) {
59   string16 empty;
60   UTF16CharIterator empty_iter(&empty);
61   EXPECT_TRUE(empty_iter.end());
62   EXPECT_TRUE(empty_iter.start());
63   EXPECT_EQ(0, empty_iter.array_pos());
64   EXPECT_EQ(0, empty_iter.char_offset());
65   EXPECT_FALSE(empty_iter.Advance());
66 
67   // These shouldn't crash.
68   empty_iter.get();
69   empty_iter.NextCodePoint();
70   empty_iter.PreviousCodePoint();
71 }
72 
TEST(CharIteratorsTest,TestUTF16)73 TEST(CharIteratorsTest, TestUTF16) {
74   string16 str = UTF8ToUTF16(kTestString);
75   UTF16CharIterator iter(&str);
76   EXPECT_FALSE(iter.end());
77   EXPECT_TRUE(iter.start());
78   EXPECT_EQ(0, iter.array_pos());
79   EXPECT_EQ(0, iter.char_offset());
80   EXPECT_EQ('x', iter.get());
81   // This shouldn't crash.
82   iter.PreviousCodePoint();
83   EXPECT_EQ(0xFB, iter.NextCodePoint());
84   EXPECT_TRUE(iter.Advance());
85 
86   EXPECT_FALSE(iter.end());
87   EXPECT_FALSE(iter.start());
88   EXPECT_EQ(1, iter.array_pos());
89   EXPECT_EQ(1, iter.char_offset());
90   EXPECT_EQ(0xFB, iter.get());
91   EXPECT_EQ('x', iter.PreviousCodePoint());
92   EXPECT_EQ(0x1D538, iter.NextCodePoint());
93   EXPECT_TRUE(iter.Advance());
94 
95   EXPECT_FALSE(iter.end());
96   EXPECT_FALSE(iter.start());
97   EXPECT_EQ(2, iter.array_pos());
98   EXPECT_EQ(2, iter.char_offset());
99   EXPECT_EQ(0x1D538, iter.get());
100   EXPECT_EQ(0xFB, iter.PreviousCodePoint());
101   EXPECT_EQ('z', iter.NextCodePoint());
102   EXPECT_TRUE(iter.Advance());
103 
104   EXPECT_FALSE(iter.end());
105   EXPECT_FALSE(iter.start());
106   EXPECT_EQ(4, iter.array_pos());
107   EXPECT_EQ(3, iter.char_offset());
108   EXPECT_EQ('z', iter.get());
109   EXPECT_EQ(0x1D538, iter.PreviousCodePoint());
110   // This shouldn't crash.
111   iter.NextCodePoint();
112   EXPECT_TRUE(iter.Advance());
113 
114   EXPECT_TRUE(iter.end());
115   EXPECT_FALSE(iter.start());
116   EXPECT_EQ(5, iter.array_pos());
117   EXPECT_EQ(4, iter.char_offset());
118   EXPECT_EQ('z', iter.PreviousCodePoint());
119 
120   // Don't care what it returns, but these shouldn't crash
121   iter.get();
122   iter.NextCodePoint();
123 
124   EXPECT_FALSE(iter.Advance());
125 }
126 
TEST(CharIteratorsTest,TestUTF16_Rewind)127 TEST(CharIteratorsTest, TestUTF16_Rewind) {
128   string16 str = UTF8ToUTF16(kTestString);
129 
130   // It is valid for the starting array index to be on the terminating null
131   // character; in fact, this is where end() reports true. So we'll start on the
132   // terminator for this test so we can check the behavior of end().
133   UTF16CharIterator iter = UTF16CharIterator::UpperBound(&str, str.length());
134   EXPECT_TRUE(iter.end());
135   EXPECT_FALSE(iter.start());
136   // This is the index of the terminating null character, and the length of the
137   // string in char16s.
138   EXPECT_EQ(5, iter.array_pos());
139   EXPECT_EQ(0, iter.char_offset());
140   EXPECT_EQ('z', iter.PreviousCodePoint());
141   // Don't care what it returns, but these shouldn't crash
142   iter.get();
143   iter.NextCodePoint();
144   EXPECT_TRUE(iter.Rewind());
145 
146   EXPECT_FALSE(iter.end());
147   EXPECT_FALSE(iter.start());
148   EXPECT_EQ(4, iter.array_pos());
149   EXPECT_EQ(-1, iter.char_offset());
150   EXPECT_EQ('z', iter.get());
151   EXPECT_EQ(0x1D538, iter.PreviousCodePoint());
152   // This shouldn't crash.
153   iter.NextCodePoint();
154   EXPECT_TRUE(iter.Rewind());
155 
156   EXPECT_FALSE(iter.end());
157   EXPECT_FALSE(iter.start());
158   EXPECT_EQ(2, iter.array_pos());
159   EXPECT_EQ(-2, iter.char_offset());
160   EXPECT_EQ(0x1D538, iter.get());
161   EXPECT_EQ(0xFB, iter.PreviousCodePoint());
162   EXPECT_EQ('z', iter.NextCodePoint());
163   EXPECT_TRUE(iter.Rewind());
164 
165   EXPECT_FALSE(iter.end());
166   EXPECT_FALSE(iter.start());
167   EXPECT_EQ(1, iter.array_pos());
168   EXPECT_EQ(-3, iter.char_offset());
169   EXPECT_EQ(0xFB, iter.get());
170   EXPECT_EQ('x', iter.PreviousCodePoint());
171   EXPECT_EQ(0x1D538, iter.NextCodePoint());
172   EXPECT_TRUE(iter.Rewind());
173 
174   EXPECT_FALSE(iter.end());
175   EXPECT_TRUE(iter.start());
176   EXPECT_EQ(0, iter.array_pos());
177   EXPECT_EQ(-4, iter.char_offset());
178   EXPECT_EQ('x', iter.get());
179   EXPECT_EQ(0xFB, iter.NextCodePoint());
180   // This shouldn't crash.
181   iter.PreviousCodePoint();
182 
183   EXPECT_FALSE(iter.Rewind());
184 }
185 
TEST(CharIteratorsTest,TestUTF16_UpperBound)186 TEST(CharIteratorsTest, TestUTF16_UpperBound) {
187   string16 str = UTF8ToUTF16(kTestString);
188   ASSERT_EQ(0, UTF16CharIterator::UpperBound(&str, 0).array_pos());
189   ASSERT_EQ(1, UTF16CharIterator::UpperBound(&str, 1).array_pos());
190   ASSERT_EQ(2, UTF16CharIterator::UpperBound(&str, 2).array_pos());
191   ASSERT_EQ(4, UTF16CharIterator::UpperBound(&str, 3).array_pos());
192   ASSERT_EQ(4, UTF16CharIterator::UpperBound(&str, 4).array_pos());
193   ASSERT_EQ(5, UTF16CharIterator::UpperBound(&str, 5).array_pos());
194 }
195 
TEST(CharIteratorsTest,TestUTF16_LowerBound)196 TEST(CharIteratorsTest, TestUTF16_LowerBound) {
197   string16 str = UTF8ToUTF16(kTestString);
198   ASSERT_EQ(0, UTF16CharIterator::LowerBound(&str, 0).array_pos());
199   ASSERT_EQ(1, UTF16CharIterator::LowerBound(&str, 1).array_pos());
200   ASSERT_EQ(2, UTF16CharIterator::LowerBound(&str, 2).array_pos());
201   ASSERT_EQ(2, UTF16CharIterator::LowerBound(&str, 3).array_pos());
202   ASSERT_EQ(4, UTF16CharIterator::LowerBound(&str, 4).array_pos());
203   ASSERT_EQ(5, UTF16CharIterator::LowerBound(&str, 5).array_pos());
204 }
205 
206 }  // namespace i18n
207 }  // namespace base
208