1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/blink/renderer/core/page/scrolling/text_fragment_finder.h"
6
7 #include <memory>
8
9 #include "third_party/blink/public/platform/web_string.h"
10 #include "third_party/blink/renderer/core/dom/document.h"
11 #include "third_party/blink/renderer/core/dom/range.h"
12 #include "third_party/blink/renderer/core/editing/ephemeral_range.h"
13 #include "third_party/blink/renderer/core/editing/finder/find_buffer.h"
14 #include "third_party/blink/renderer/core/editing/finder/find_options.h"
15 #include "third_party/blink/renderer/core/editing/iterators/character_iterator.h"
16 #include "third_party/blink/renderer/core/editing/position.h"
17 #include "third_party/blink/renderer/core/page/scrolling/text_fragment_selector.h"
18 #include "third_party/blink/renderer/platform/text/text_boundaries.h"
19
20 namespace blink {
21
22 namespace {
23
24 const char kNoContext[] = "";
25
26 // Determines whether the start and end positions of |range| are on word
27 // boundaries.
28 // TODO(crbug/924965): Determine how this should check node boundaries. This
29 // treats node boundaries as word boundaries, for example "o" is a whole word
30 // match in "f<i>o</i>o".
IsWholeWordMatch(EphemeralRangeInFlatTree range)31 bool IsWholeWordMatch(EphemeralRangeInFlatTree range) {
32 wtf_size_t start_position = range.StartPosition().OffsetInContainerNode();
33
34 if (start_position != 0) {
35 String start_text = range.StartPosition().AnchorNode()->textContent();
36 start_text.Ensure16Bit();
37 wtf_size_t word_start = FindWordStartBoundary(
38 start_text.Characters16(), start_text.length(), start_position);
39 if (word_start != start_position)
40 return false;
41 }
42
43 wtf_size_t end_position = range.EndPosition().OffsetInContainerNode();
44 String end_text = range.EndPosition().AnchorNode()->textContent();
45
46 if (end_position != end_text.length()) {
47 end_text.Ensure16Bit();
48 // We expect end_position to be a word boundary, and FindWordEndBoundary
49 // finds the next word boundary, so start from end_position - 1.
50 wtf_size_t word_end = FindWordEndBoundary(
51 end_text.Characters16(), end_text.length(), end_position - 1);
52 if (word_end != end_position)
53 return false;
54 }
55
56 return true;
57 }
58
FindMatchInRange(String search_text,PositionInFlatTree search_start,PositionInFlatTree search_end)59 EphemeralRangeInFlatTree FindMatchInRange(String search_text,
60 PositionInFlatTree search_start,
61 PositionInFlatTree search_end) {
62 while (search_start < search_end) {
63 const EphemeralRangeInFlatTree search_range(search_start, search_end);
64 EphemeralRangeInFlatTree potential_match = FindBuffer::FindMatchInRange(
65 search_range, search_text, kCaseInsensitive);
66
67 if (potential_match.IsNull() || IsWholeWordMatch(potential_match))
68 return potential_match;
69
70 search_start = potential_match.EndPosition();
71 }
72
73 return EphemeralRangeInFlatTree();
74 }
75
NextTextPosition(PositionInFlatTree position,PositionInFlatTree end_position)76 PositionInFlatTree NextTextPosition(PositionInFlatTree position,
77 PositionInFlatTree end_position) {
78 const TextIteratorBehavior options =
79 TextIteratorBehavior::Builder().SetEmitsSpaceForNbsp(true).Build();
80 CharacterIteratorInFlatTree char_it(position, end_position, options);
81
82 for (; char_it.length(); char_it.Advance(1)) {
83 if (!IsSpaceOrNewline(char_it.CharacterAt(0)))
84 return char_it.StartPosition();
85 }
86
87 return end_position;
88 }
89
90 // Find and return the range of |search_text| if the first text in the search
91 // range is |search_text|, skipping over whitespace and element boundaries.
FindImmediateMatch(String search_text,PositionInFlatTree search_start,PositionInFlatTree search_end)92 EphemeralRangeInFlatTree FindImmediateMatch(String search_text,
93 PositionInFlatTree search_start,
94 PositionInFlatTree search_end) {
95 if (search_text.IsEmpty())
96 return EphemeralRangeInFlatTree();
97
98 search_start = NextTextPosition(search_start, search_end);
99 if (search_start == search_end)
100 return EphemeralRangeInFlatTree();
101
102 FindBuffer buffer(EphemeralRangeInFlatTree(search_start, search_end));
103
104 // TODO(nburris): FindBuffer will search the rest of the document for a match,
105 // but we only need to check for an immediate match, so we should stop
106 // searching if there's no immediate match.
107 FindBuffer::Results match_results =
108 buffer.FindMatches(search_text, kCaseInsensitive);
109
110 if (!match_results.IsEmpty() && match_results.front().start == 0u) {
111 FindBuffer::BufferMatchResult buffer_match = match_results.front();
112 EphemeralRangeInFlatTree match = buffer.RangeFromBufferIndex(
113 buffer_match.start, buffer_match.start + buffer_match.length);
114 if (IsWholeWordMatch(match))
115 return match;
116 }
117
118 return EphemeralRangeInFlatTree();
119 }
120
FindMatchInRangeWithContext(const String & search_text,const String & prefix,const String & suffix,PositionInFlatTree search_start,PositionInFlatTree search_end)121 EphemeralRangeInFlatTree FindMatchInRangeWithContext(
122 const String& search_text,
123 const String& prefix,
124 const String& suffix,
125 PositionInFlatTree search_start,
126 PositionInFlatTree search_end) {
127 while (search_start != search_end) {
128 EphemeralRangeInFlatTree potential_match;
129
130 if (!prefix.IsEmpty()) {
131 EphemeralRangeInFlatTree prefix_match =
132 FindMatchInRange(prefix, search_start, search_end);
133
134 // No prefix_match in remaining range
135 if (prefix_match.IsNull())
136 return EphemeralRangeInFlatTree();
137
138 search_start = prefix_match.EndPosition();
139 potential_match =
140 FindImmediateMatch(search_text, search_start, search_end);
141
142 // No search_text match after current prefix_match
143 if (potential_match.IsNull())
144 continue;
145 } else {
146 potential_match = FindMatchInRange(search_text, search_start, search_end);
147
148 // No search_text match in remaining range
149 if (potential_match.IsNull())
150 return EphemeralRangeInFlatTree();
151
152 search_start = potential_match.EndPosition();
153 }
154
155 PositionInFlatTree suffix_start = potential_match.EndPosition();
156 DCHECK(potential_match.IsNotNull());
157 if (!suffix.IsEmpty()) {
158 EphemeralRangeInFlatTree suffix_match =
159 FindImmediateMatch(suffix, suffix_start, search_end);
160
161 // No suffix match after current potential_match
162 if (suffix_match.IsNull())
163 continue;
164 }
165
166 // If we reach here without a return or continue, we have a full match.
167 return potential_match;
168 }
169
170 return EphemeralRangeInFlatTree();
171 }
172
173 } // namespace
174
TextFragmentFinder(Client & client,const TextFragmentSelector & selector)175 TextFragmentFinder::TextFragmentFinder(Client& client,
176 const TextFragmentSelector& selector)
177 : client_(client), selector_(selector) {
178 DCHECK(!selector_.Start().IsEmpty());
179 }
180
FindMatch(Document & document)181 void TextFragmentFinder::FindMatch(Document& document) {
182 PositionInFlatTree search_start =
183 PositionInFlatTree::FirstPositionInNode(document);
184
185 auto forced_lock_scope = document.GetScopedForceActivatableLocks();
186 document.UpdateStyleAndLayout(DocumentUpdateReason::kFindInPage);
187
188 EphemeralRangeInFlatTree match =
189 FindMatchFromPosition(document, search_start);
190
191 if (match.IsNotNull()) {
192 client_.DidFindMatch(match);
193
194 // Continue searching to see if we have an ambiguous selector.
195 // TODO(crbug.com/919204): This is temporary and only for measuring
196 // ambiguous matching during prototyping.
197 EphemeralRangeInFlatTree ambiguous_match =
198 FindMatchFromPosition(document, match.EndPosition());
199 if (ambiguous_match.IsNotNull())
200 client_.DidFindAmbiguousMatch();
201 }
202 }
203
FindMatchFromPosition(Document & document,PositionInFlatTree search_start)204 EphemeralRangeInFlatTree TextFragmentFinder::FindMatchFromPosition(
205 Document& document,
206 PositionInFlatTree search_start) {
207 PositionInFlatTree search_end;
208 if (document.documentElement() && document.documentElement()->lastChild()) {
209 search_end =
210 PositionInFlatTree::AfterNode(*document.documentElement()->lastChild());
211 } else {
212 search_end = PositionInFlatTree::LastPositionInNode(document);
213 }
214
215 // TODO(crbug.com/930156): Make FindMatch work asynchronously.
216 EphemeralRangeInFlatTree match;
217 if (selector_.Type() == TextFragmentSelector::kExact) {
218 match = FindMatchInRangeWithContext(selector_.Start(), selector_.Prefix(),
219 selector_.Suffix(), search_start,
220 search_end);
221 } else {
222 EphemeralRangeInFlatTree start_match =
223 FindMatchInRangeWithContext(selector_.Start(), selector_.Prefix(),
224 kNoContext, search_start, search_end);
225 if (start_match.IsNull())
226 return start_match;
227
228 // TODO(crbug.com/924964): Determine what we should do if the start text and
229 // end text are the same (and there are no context terms). This
230 // implementation continues searching for the next instance of the text,
231 // from the end of the first instance.
232 search_start = start_match.EndPosition();
233 EphemeralRangeInFlatTree end_match = FindMatchInRangeWithContext(
234 selector_.End(), kNoContext, selector_.Suffix(), search_start,
235 search_end);
236 if (end_match.IsNotNull()) {
237 match = EphemeralRangeInFlatTree(start_match.StartPosition(),
238 end_match.EndPosition());
239 }
240 }
241
242 return match;
243 }
244
245 } // namespace blink
246