1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "ui/accessibility/ax_text_utils.h"
6 
7 #include <algorithm>
8 
9 #include "base/check_op.h"
10 #include "base/i18n/break_iterator.h"
11 #include "base/notreached.h"
12 #include "base/numerics/safe_conversions.h"
13 #include "base/optional.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "ui/accessibility/ax_enums.mojom.h"
17 
18 namespace ui {
19 
20 namespace {
21 
ICUBreakTypeForBoundaryType(ax::mojom::TextBoundary boundary)22 base::i18n::BreakIterator::BreakType ICUBreakTypeForBoundaryType(
23     ax::mojom::TextBoundary boundary) {
24   switch (boundary) {
25     case ax::mojom::TextBoundary::kCharacter:
26       return base::i18n::BreakIterator::BREAK_CHARACTER;
27     case ax::mojom::TextBoundary::kSentenceStart:
28       return base::i18n::BreakIterator::BREAK_SENTENCE;
29     case ax::mojom::TextBoundary::kWordStart:
30     case ax::mojom::TextBoundary::kWordStartOrEnd:
31       return base::i18n::BreakIterator::BREAK_WORD;
32     // These are currently unused since line breaking is done via an array of
33     // line break offsets, and object boundary by finding no boundary within the
34     // current node.
35     case ax::mojom::TextBoundary::kObject:
36     case ax::mojom::TextBoundary::kLineStart:
37     case ax::mojom::TextBoundary::kParagraphStart:
38       return base::i18n::BreakIterator::BREAK_NEWLINE;
39     default:
40       NOTREACHED() << boundary;
41       return base::i18n::BreakIterator::BREAK_NEWLINE;
42   }
43 }
44 
45 }  // namespace
46 
47 // line_breaks is a Misnomer. Blink provides the start offsets of each line
48 // not the line breaks.
49 // TODO(nektar): Rename line_breaks a11y attribute and variable references.
FindAccessibleTextBoundary(const base::string16 & text,const std::vector<int> & line_breaks,ax::mojom::TextBoundary boundary,size_t start_offset,ax::mojom::MoveDirection direction,ax::mojom::TextAffinity affinity)50 size_t FindAccessibleTextBoundary(const base::string16& text,
51                                   const std::vector<int>& line_breaks,
52                                   ax::mojom::TextBoundary boundary,
53                                   size_t start_offset,
54                                   ax::mojom::MoveDirection direction,
55                                   ax::mojom::TextAffinity affinity) {
56   DCHECK_NE(boundary, ax::mojom::TextBoundary::kNone);
57   size_t text_size = text.size();
58   DCHECK_LE(start_offset, text_size);
59   DCHECK_NE(direction, ax::mojom::MoveDirection::kNone);
60 
61   base::i18n::BreakIterator::BreakType break_type =
62       ICUBreakTypeForBoundaryType(boundary);
63   base::i18n::BreakIterator break_iter(text, break_type);
64   if (boundary == ax::mojom::TextBoundary::kCharacter ||
65       boundary == ax::mojom::TextBoundary::kSentenceStart ||
66       boundary == ax::mojom::TextBoundary::kWordStart ||
67       boundary == ax::mojom::TextBoundary::kWordStartOrEnd) {
68     if (!break_iter.Init())
69       return start_offset;
70   }
71 
72   if (boundary == ax::mojom::TextBoundary::kLineStart) {
73     if (direction == ax::mojom::MoveDirection::kForward) {
74       for (int line_break : line_breaks) {
75         size_t clamped_line_break = size_t{std::max(0, line_break)};
76         if ((affinity == ax::mojom::TextAffinity::kDownstream &&
77              clamped_line_break > start_offset) ||
78             (affinity == ax::mojom::TextAffinity::kUpstream &&
79              clamped_line_break >= start_offset)) {
80           return clamped_line_break;
81         }
82       }
83       return text_size;
84     } else {
85       for (size_t j = line_breaks.size(); j != 0; --j) {
86         size_t line_break = line_breaks[j - 1] >= 0 ? line_breaks[j - 1] : 0;
87         if ((affinity == ax::mojom::TextAffinity::kDownstream &&
88              line_break <= start_offset) ||
89             (affinity == ax::mojom::TextAffinity::kUpstream &&
90              line_break < start_offset)) {
91           return line_break;
92         }
93       }
94       return 0;
95     }
96   }
97 
98   size_t result = start_offset;
99   for (;;) {
100     size_t pos;
101     if (direction == ax::mojom::MoveDirection::kForward) {
102       if (result >= text_size)
103         return text_size;
104       pos = result;
105     } else {
106       if (result == 0)
107         return 0;
108       pos = result - 1;
109     }
110 
111     switch (boundary) {
112       case ax::mojom::TextBoundary::kLineStart:
113         NOTREACHED() << boundary;  // This is handled above.
114         return result;
115       case ax::mojom::TextBoundary::kCharacter:
116         if (break_iter.IsGraphemeBoundary(result)) {
117           // If we are searching forward and we are still at the start offset,
118           // we need to find the next character.
119           if (direction == ax::mojom::MoveDirection::kBackward ||
120               result != start_offset)
121             return result;
122         }
123         break;
124       case ax::mojom::TextBoundary::kWordStart:
125         if (break_iter.IsStartOfWord(result)) {
126           // If we are searching forward and we are still at the start offset,
127           // we need to find the next word.
128           DCHECK_NE(direction, ax::mojom::MoveDirection::kNone);
129           if (direction == ax::mojom::MoveDirection::kBackward ||
130               result != start_offset)
131             return result;
132         }
133         break;
134       case ax::mojom::TextBoundary::kWordStartOrEnd:
135         if (break_iter.IsStartOfWord(result)) {
136           // If we are searching forward and we are still at the start offset,
137           // we need to find the next word.
138           DCHECK_NE(direction, ax::mojom::MoveDirection::kNone);
139           if (direction == ax::mojom::MoveDirection::kBackward ||
140               result != start_offset)
141             return result;
142         } else if (break_iter.IsEndOfWord(result)) {
143           // If we are searching backward and we are still at the end offset, we
144           // need to find the previous word.
145           DCHECK_NE(direction, ax::mojom::MoveDirection::kNone);
146           if (direction == ax::mojom::MoveDirection::kForward ||
147               result != start_offset)
148             return result;
149         }
150         break;
151       case ax::mojom::TextBoundary::kSentenceStart:
152         if (break_iter.IsSentenceBoundary(result)) {
153           // If we are searching forward and we are still at the start offset,
154           // we need to find the next sentence.
155           if (direction == ax::mojom::MoveDirection::kBackward ||
156               result != start_offset) {
157             // ICU sometimes returns sentence boundaries in the whitespace
158             // between sentences. For the purposes of accessibility, we want to
159             // include all whitespace at the end of a sentence. We move the
160             // boundary past the last whitespace offset. This works the same for
161             // backwards and forwards searches.
162             while (result < text_size &&
163                    base::IsUnicodeWhitespace(text[result]))
164               result++;
165             return result;
166           }
167         }
168         break;
169       case ax::mojom::TextBoundary::kParagraphStart:
170         if (text[pos] == '\n')
171           return result;
172         break;
173       default:
174         break;
175     }
176 
177     if (direction == ax::mojom::MoveDirection::kForward) {
178       result++;
179     } else {
180       result--;
181     }
182   }
183 }
184 
GetWordStartOffsets(const base::string16 & text)185 std::vector<int> GetWordStartOffsets(const base::string16& text) {
186   std::vector<int> word_starts;
187   base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
188   if (!iter.Init())
189     return word_starts;
190   // iter.Advance() returns false if we've run past end of the text.
191   while (iter.Advance()) {
192     if (!iter.IsWord())
193       continue;
194     word_starts.push_back(
195         base::checked_cast<int>(iter.prev()) /* start index */);
196   }
197   return word_starts;
198 }
199 
GetWordEndOffsets(const base::string16 & text)200 std::vector<int> GetWordEndOffsets(const base::string16& text) {
201   std::vector<int> word_ends;
202   base::i18n::BreakIterator iter(text, base::i18n::BreakIterator::BREAK_WORD);
203   if (!iter.Init())
204     return word_ends;
205   // iter.Advance() returns false if we've run past end of the text.
206   while (iter.Advance()) {
207     if (!iter.IsWord())
208       continue;
209     word_ends.push_back(base::checked_cast<int>(iter.pos()) /* end index */);
210   }
211   return word_ends;
212 }
213 
214 }  // namespace ui
215