1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "ui/accessibility/ax_text_utils.h"
6
7 #include <algorithm>
8
9 #include "base/check_op.h"
10 #include "base/i18n/break_iterator.h"
11 #include "base/notreached.h"
12 #include "base/numerics/safe_conversions.h"
13 #include "base/optional.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "ui/accessibility/ax_enums.mojom.h"
17
18 namespace ui {
19
20 namespace {
21
ICUBreakTypeForBoundaryType(ax::mojom::TextBoundary boundary)22 base::i18n::BreakIterator::BreakType ICUBreakTypeForBoundaryType(
23 ax::mojom::TextBoundary boundary) {
24 switch (boundary) {
25 case ax::mojom::TextBoundary::kCharacter:
26 return base::i18n::BreakIterator::BREAK_CHARACTER;
27 case ax::mojom::TextBoundary::kSentenceStart:
28 return base::i18n::BreakIterator::BREAK_SENTENCE;
29 case ax::mojom::TextBoundary::kWordStart:
30 case ax::mojom::TextBoundary::kWordStartOrEnd:
31 return base::i18n::BreakIterator::BREAK_WORD;
32 // These are currently unused since line breaking is done via an array of
33 // line break offsets, and object boundary by finding no boundary within the
34 // current node.
35 case ax::mojom::TextBoundary::kObject:
36 case ax::mojom::TextBoundary::kLineStart:
37 case ax::mojom::TextBoundary::kParagraphStart:
38 return base::i18n::BreakIterator::BREAK_NEWLINE;
39 default:
40 NOTREACHED() << boundary;
41 return base::i18n::BreakIterator::BREAK_NEWLINE;
42 }
43 }
44
45 } // namespace
46
47 // line_breaks is a Misnomer. Blink provides the start offsets of each line
48 // not the line breaks.
49 // TODO(nektar): Rename line_breaks a11y attribute and variable references.
FindAccessibleTextBoundary(const base::string16 & text,const std::vector<int> & line_breaks,ax::mojom::TextBoundary boundary,size_t start_offset,ax::mojom::MoveDirection direction,ax::mojom::TextAffinity affinity)50 size_t FindAccessibleTextBoundary(const base::string16& text,
51 const std::vector<int>& line_breaks,
52 ax::mojom::TextBoundary boundary,
53 size_t start_offset,
54 ax::mojom::MoveDirection direction,
55 ax::mojom::TextAffinity affinity) {
56 DCHECK_NE(boundary, ax::mojom::TextBoundary::kNone);
57 size_t text_size = text.size();
58 DCHECK_LE(start_offset, text_size);
59 DCHECK_NE(direction, ax::mojom::MoveDirection::kNone);
60
61 base::i18n::BreakIterator::BreakType break_type =
62 ICUBreakTypeForBoundaryType(boundary);
63 base::i18n::BreakIterator break_iter(text, break_type);
64 if (boundary == ax::mojom::TextBoundary::kCharacter ||
65 boundary == ax::mojom::TextBoundary::kSentenceStart ||
66 boundary == ax::mojom::TextBoundary::kWordStart ||
67 boundary == ax::mojom::TextBoundary::kWordStartOrEnd) {
68 if (!break_iter.Init())
69 return start_offset;
70 }
71
72 if (boundary == ax::mojom::TextBoundary::kLineStart) {
73 if (direction == ax::mojom::MoveDirection::kForward) {
74 for (int line_break : line_breaks) {
75 size_t clamped_line_break = size_t{std::max(0, line_break)};
76 if ((affinity == ax::mojom::TextAffinity::kDownstream &&
77 clamped_line_break > start_offset) ||
78 (affinity == ax::mojom::TextAffinity::kUpstream &&
79 clamped_line_break >= start_offset)) {
80 return clamped_line_break;
81 }
82 }
83 return text_size;
84 } else {
85 for (size_t j = line_breaks.size(); j != 0; --j) {
86 size_t line_break = line_breaks[j - 1] >= 0 ? line_breaks[j - 1] : 0;
87 if ((affinity == ax::mojom::TextAffinity::kDownstream &&
88 line_break <= start_offset) ||
89 (affinity == ax::mojom::TextAffinity::kUpstream &&
90 line_break < start_offset)) {
91 return line_break;
92 }
93 }
94 return 0;
95 }
96 }
97
98 size_t result = start_offset;
99 for (;;) {
100 size_t pos;
101 if (direction == ax::mojom::MoveDirection::kForward) {
102 if (result >= text_size)
103 return text_size;
104 pos = result;
105 } else {
106 if (result == 0)
107 return 0;
108 pos = result - 1;
109 }
110
111 switch (boundary) {
112 case ax::mojom::TextBoundary::kLineStart:
113 NOTREACHED() << boundary; // This is handled above.
114 return result;
115 case ax::mojom::TextBoundary::kCharacter:
116 if (break_iter.IsGraphemeBoundary(result)) {
117 // If we are searching forward and we are still at the start offset,
118 // we need to find the next character.
119 if (direction == ax::mojom::MoveDirection::kBackward ||
120 result != start_offset)
121 return result;
122 }
123 break;
124 case ax::mojom::TextBoundary::kWordStart:
125 if (break_iter.IsStartOfWord(result)) {
126 // If we are searching forward and we are still at the start offset,
127 // we need to find the next word.
128 DCHECK_NE(direction, ax::mojom::MoveDirection::kNone);
129 if (direction == ax::mojom::MoveDirection::kBackward ||
130 result != start_offset)
131 return result;
132 }
133 break;
134 case ax::mojom::TextBoundary::kWordStartOrEnd:
135 if (break_iter.IsStartOfWord(result)) {
136 // If we are searching forward and we are still at the start offset,
137 // we need to find the next word.
138 DCHECK_NE(direction, ax::mojom::MoveDirection::kNone);
139 if (direction == ax::mojom::MoveDirection::kBackward ||
140 result != start_offset)
141 return result;
142 } else if (break_iter.IsEndOfWord(result)) {
143 // If we are searching backward and we are still at the end offset, we
144 // need to find the previous word.
145 DCHECK_NE(direction, ax::mojom::MoveDirection::kNone);
146 if (direction == ax::mojom::MoveDirection::kForward ||
147 result != start_offset)
148 return result;
149 }
150 break;
151 case ax::mojom::TextBoundary::kSentenceStart:
152 if (break_iter.IsSentenceBoundary(result)) {
153 // If we are searching forward and we are still at the start offset,
154 // we need to find the next sentence.
155 if (direction == ax::mojom::MoveDirection::kBackward ||
156 result != start_offset) {
157 // ICU sometimes returns sentence boundaries in the whitespace
158 // between sentences. For the purposes of accessibility, we want to
159 // include all whitespace at the end of a sentence. We move the
160 // boundary past the last whitespace offset. This works the same for
161 // backwards and forwards searches.
162 while (result < text_size &&
163 base::IsUnicodeWhitespace(text[result]))
164 result++;
165 return result;
166 }
167 }
168 break;
169 case ax::mojom::TextBoundary::kParagraphStart:
170 if (text[pos] == '\n')
171 return result;
172 break;
173 default:
174 break;
175 }
176
177 if (direction == ax::mojom::MoveDirection::kForward) {
178 result++;
179 } else {
180 result--;
181 }
182 }
183 }
184
GetWordStartOffsets(const base::string16 & text)185 std::vector<int> GetWordStartOffsets(const base::string16& text) {
186 std::vector<int> word_starts;
187 base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
188 if (!iter.Init())
189 return word_starts;
190 // iter.Advance() returns false if we've run past end of the text.
191 while (iter.Advance()) {
192 if (!iter.IsWord())
193 continue;
194 word_starts.push_back(
195 base::checked_cast<int>(iter.prev()) /* start index */);
196 }
197 return word_starts;
198 }
199
GetWordEndOffsets(const base::string16 & text)200 std::vector<int> GetWordEndOffsets(const base::string16& text) {
201 std::vector<int> word_ends;
202 base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
203 if (!iter.Init())
204 return word_ends;
205 // iter.Advance() returns false if we've run past end of the text.
206 while (iter.Advance()) {
207 if (!iter.IsWord())
208 continue;
209 word_ends.push_back(base::checked_cast<int>(iter.pos()) /* end index */);
210 }
211 return word_ends;
212 }
213
214 } // namespace ui
215