1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/char_iterator.h"
6
7 #include "base/logging.h"
8 #include "third_party/icu/source/common/unicode/utf16.h"
9 #include "third_party/icu/source/common/unicode/utf8.h"
10
11 namespace base {
12 namespace i18n {
13
14 // UTF8CharIterator ------------------------------------------------------------
15
UTF8CharIterator(const std::string * str)16 UTF8CharIterator::UTF8CharIterator(const std::string* str)
17 : str_(reinterpret_cast<const uint8_t*>(str->data())),
18 len_(str->size()),
19 array_pos_(0),
20 next_pos_(0),
21 char_pos_(0),
22 char_(0) {
23 if (len_)
24 U8_NEXT(str_, next_pos_, len_, char_);
25 }
26
27 UTF8CharIterator::~UTF8CharIterator() = default;
28
Advance()29 bool UTF8CharIterator::Advance() {
30 if (array_pos_ >= len_)
31 return false;
32
33 array_pos_ = next_pos_;
34 char_pos_++;
35 if (next_pos_ < len_)
36 U8_NEXT(str_, next_pos_, len_, char_);
37
38 return true;
39 }
40
41 // UTF16CharIterator -----------------------------------------------------------
42
UTF16CharIterator(const string16 * str)43 UTF16CharIterator::UTF16CharIterator(const string16* str)
44 : UTF16CharIterator(str, 0) {}
45
UTF16CharIterator(const char16 * str,size_t str_len)46 UTF16CharIterator::UTF16CharIterator(const char16* str, size_t str_len)
47 : UTF16CharIterator(str, str_len, 0) {}
48
49 UTF16CharIterator::UTF16CharIterator(UTF16CharIterator&& to_move) = default;
50
51 UTF16CharIterator::~UTF16CharIterator() = default;
52
53 UTF16CharIterator& UTF16CharIterator::operator=(UTF16CharIterator&& to_move) =
54 default;
55
56 // static
LowerBound(const string16 * str,size_t array_index)57 UTF16CharIterator UTF16CharIterator::LowerBound(const string16* str,
58 size_t array_index) {
59 return LowerBound(reinterpret_cast<const char16*>(str->data()), str->length(),
60 array_index);
61 }
62
63 // static
LowerBound(const char16 * str,size_t length,size_t array_index)64 UTF16CharIterator UTF16CharIterator::LowerBound(const char16* str,
65 size_t length,
66 size_t array_index) {
67 DCHECK_LE(array_index, length);
68 U16_SET_CP_START(str, 0, array_index);
69 return UTF16CharIterator(str, length, array_index);
70 }
71
72 // static
UpperBound(const string16 * str,size_t array_index)73 UTF16CharIterator UTF16CharIterator::UpperBound(const string16* str,
74 size_t array_index) {
75 return UpperBound(reinterpret_cast<const char16*>(str->data()), str->length(),
76 array_index);
77 }
78
79 // static
UpperBound(const char16 * str,size_t length,size_t array_index)80 UTF16CharIterator UTF16CharIterator::UpperBound(const char16* str,
81 size_t length,
82 size_t array_index) {
83 DCHECK_LE(array_index, length);
84 U16_SET_CP_LIMIT(str, 0, array_index, length);
85 return UTF16CharIterator(str, length, array_index);
86 }
87
NextCodePoint() const88 int32_t UTF16CharIterator::NextCodePoint() const {
89 if (next_pos_ >= len_)
90 return 0;
91
92 UChar32 c;
93 U16_GET(str_, 0, next_pos_, len_, c);
94 return c;
95 }
96
PreviousCodePoint() const97 int32_t UTF16CharIterator::PreviousCodePoint() const {
98 if (array_pos_ <= 0)
99 return 0;
100
101 uint32_t pos = array_pos_;
102 UChar32 c;
103 U16_PREV(str_, 0, pos, c);
104 return c;
105 }
106
Advance()107 bool UTF16CharIterator::Advance() {
108 if (array_pos_ >= len_)
109 return false;
110
111 array_pos_ = next_pos_;
112 char_offset_++;
113 if (next_pos_ < len_)
114 ReadChar();
115
116 return true;
117 }
118
Rewind()119 bool UTF16CharIterator::Rewind() {
120 if (array_pos_ <= 0)
121 return false;
122
123 next_pos_ = array_pos_;
124 char_offset_--;
125 U16_PREV(str_, 0, array_pos_, char_);
126 return true;
127 }
128
UTF16CharIterator(const string16 * str,int32_t initial_pos)129 UTF16CharIterator::UTF16CharIterator(const string16* str, int32_t initial_pos)
130 : UTF16CharIterator(str->data(), str->length(), initial_pos) {}
131
UTF16CharIterator(const char16 * str,size_t str_len,int32_t initial_pos)132 UTF16CharIterator::UTF16CharIterator(const char16* str,
133 size_t str_len,
134 int32_t initial_pos)
135 : str_(str),
136 len_(str_len),
137 array_pos_(initial_pos),
138 next_pos_(initial_pos),
139 char_offset_(0),
140 char_(0) {
141 // This has the side-effect of advancing |next_pos_|.
142 if (array_pos_ < len_)
143 ReadChar();
144 }
145
ReadChar()146 void UTF16CharIterator::ReadChar() {
147 // This is actually a huge macro, so is worth having in a separate function.
148 U16_NEXT(str_, next_pos_, len_, char_);
149 }
150
151 } // namespace i18n
152 } // namespace base
153