1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/char_iterator.h"
6 
7 #include "base/logging.h"
8 #include "third_party/icu/source/common/unicode/utf16.h"
9 #include "third_party/icu/source/common/unicode/utf8.h"
10 
11 namespace base {
12 namespace i18n {
13 
14 // UTF8CharIterator ------------------------------------------------------------
15 
UTF8CharIterator(const std::string * str)16 UTF8CharIterator::UTF8CharIterator(const std::string* str)
17     : str_(reinterpret_cast<const uint8_t*>(str->data())),
18       len_(str->size()),
19       array_pos_(0),
20       next_pos_(0),
21       char_pos_(0),
22       char_(0) {
23   if (len_)
24     U8_NEXT(str_, next_pos_, len_, char_);
25 }
26 
27 UTF8CharIterator::~UTF8CharIterator() = default;
28 
Advance()29 bool UTF8CharIterator::Advance() {
30   if (array_pos_ >= len_)
31     return false;
32 
33   array_pos_ = next_pos_;
34   char_pos_++;
35   if (next_pos_ < len_)
36     U8_NEXT(str_, next_pos_, len_, char_);
37 
38   return true;
39 }
40 
41 // UTF16CharIterator -----------------------------------------------------------
42 
UTF16CharIterator(const string16 * str)43 UTF16CharIterator::UTF16CharIterator(const string16* str)
44     : UTF16CharIterator(str, 0) {}
45 
UTF16CharIterator(const char16 * str,size_t str_len)46 UTF16CharIterator::UTF16CharIterator(const char16* str, size_t str_len)
47     : UTF16CharIterator(str, str_len, 0) {}
48 
49 UTF16CharIterator::UTF16CharIterator(UTF16CharIterator&& to_move) = default;
50 
51 UTF16CharIterator::~UTF16CharIterator() = default;
52 
53 UTF16CharIterator& UTF16CharIterator::operator=(UTF16CharIterator&& to_move) =
54     default;
55 
56 // static
LowerBound(const string16 * str,size_t array_index)57 UTF16CharIterator UTF16CharIterator::LowerBound(const string16* str,
58                                                 size_t array_index) {
59   return LowerBound(reinterpret_cast<const char16*>(str->data()), str->length(),
60                     array_index);
61 }
62 
63 // static
LowerBound(const char16 * str,size_t length,size_t array_index)64 UTF16CharIterator UTF16CharIterator::LowerBound(const char16* str,
65                                                 size_t length,
66                                                 size_t array_index) {
67   DCHECK_LE(array_index, length);
68   U16_SET_CP_START(str, 0, array_index);
69   return UTF16CharIterator(str, length, array_index);
70 }
71 
72 // static
UpperBound(const string16 * str,size_t array_index)73 UTF16CharIterator UTF16CharIterator::UpperBound(const string16* str,
74                                                 size_t array_index) {
75   return UpperBound(reinterpret_cast<const char16*>(str->data()), str->length(),
76                     array_index);
77 }
78 
79 // static
UpperBound(const char16 * str,size_t length,size_t array_index)80 UTF16CharIterator UTF16CharIterator::UpperBound(const char16* str,
81                                                 size_t length,
82                                                 size_t array_index) {
83   DCHECK_LE(array_index, length);
84   U16_SET_CP_LIMIT(str, 0, array_index, length);
85   return UTF16CharIterator(str, length, array_index);
86 }
87 
NextCodePoint() const88 int32_t UTF16CharIterator::NextCodePoint() const {
89   if (next_pos_ >= len_)
90     return 0;
91 
92   UChar32 c;
93   U16_GET(str_, 0, next_pos_, len_, c);
94   return c;
95 }
96 
PreviousCodePoint() const97 int32_t UTF16CharIterator::PreviousCodePoint() const {
98   if (array_pos_ <= 0)
99     return 0;
100 
101   uint32_t pos = array_pos_;
102   UChar32 c;
103   U16_PREV(str_, 0, pos, c);
104   return c;
105 }
106 
Advance()107 bool UTF16CharIterator::Advance() {
108   if (array_pos_ >= len_)
109     return false;
110 
111   array_pos_ = next_pos_;
112   char_offset_++;
113   if (next_pos_ < len_)
114     ReadChar();
115 
116   return true;
117 }
118 
Rewind()119 bool UTF16CharIterator::Rewind() {
120   if (array_pos_ <= 0)
121     return false;
122 
123   next_pos_ = array_pos_;
124   char_offset_--;
125   U16_PREV(str_, 0, array_pos_, char_);
126   return true;
127 }
128 
UTF16CharIterator(const string16 * str,int32_t initial_pos)129 UTF16CharIterator::UTF16CharIterator(const string16* str, int32_t initial_pos)
130     : UTF16CharIterator(str->data(), str->length(), initial_pos) {}
131 
UTF16CharIterator(const char16 * str,size_t str_len,int32_t initial_pos)132 UTF16CharIterator::UTF16CharIterator(const char16* str,
133                                      size_t str_len,
134                                      int32_t initial_pos)
135     : str_(str),
136       len_(str_len),
137       array_pos_(initial_pos),
138       next_pos_(initial_pos),
139       char_offset_(0),
140       char_(0) {
141   // This has the side-effect of advancing |next_pos_|.
142   if (array_pos_ < len_)
143     ReadChar();
144 }
145 
ReadChar()146 void UTF16CharIterator::ReadChar() {
147   // This is actually a huge macro, so is worth having in a separate function.
148   U16_NEXT(str_, next_pos_, len_, char_);
149 }
150 
151 }  // namespace i18n
152 }  // namespace base
153