1 // Copyright 2019 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "third_party/blink/renderer/core/page/scrolling/text_fragment_finder.h"
6 
7 #include <memory>
8 
9 #include "third_party/blink/public/platform/web_string.h"
10 #include "third_party/blink/renderer/core/dom/document.h"
11 #include "third_party/blink/renderer/core/dom/range.h"
12 #include "third_party/blink/renderer/core/editing/ephemeral_range.h"
13 #include "third_party/blink/renderer/core/editing/finder/find_buffer.h"
14 #include "third_party/blink/renderer/core/editing/finder/find_options.h"
15 #include "third_party/blink/renderer/core/editing/iterators/character_iterator.h"
16 #include "third_party/blink/renderer/core/editing/position.h"
17 #include "third_party/blink/renderer/core/page/scrolling/text_fragment_selector.h"
18 #include "third_party/blink/renderer/platform/text/text_boundaries.h"
19 
20 namespace blink {
21 
22 namespace {
23 
24 const char kNoContext[] = "";
25 
26 // Determines whether the start and end positions of |range| are on word
27 // boundaries.
28 // TODO(crbug/924965): Determine how this should check node boundaries. This
29 // treats node boundaries as word boundaries, for example "o" is a whole word
30 // match in "f<i>o</i>o".
IsWholeWordMatch(EphemeralRangeInFlatTree range)31 bool IsWholeWordMatch(EphemeralRangeInFlatTree range) {
32   wtf_size_t start_position = range.StartPosition().OffsetInContainerNode();
33 
34   if (start_position != 0) {
35     String start_text = range.StartPosition().AnchorNode()->textContent();
36     start_text.Ensure16Bit();
37     wtf_size_t word_start = FindWordStartBoundary(
38         start_text.Characters16(), start_text.length(), start_position);
39     if (word_start != start_position)
40       return false;
41   }
42 
43   wtf_size_t end_position = range.EndPosition().OffsetInContainerNode();
44   String end_text = range.EndPosition().AnchorNode()->textContent();
45 
46   if (end_position != end_text.length()) {
47     end_text.Ensure16Bit();
48     // We expect end_position to be a word boundary, and FindWordEndBoundary
49     // finds the next word boundary, so start from end_position - 1.
50     wtf_size_t word_end = FindWordEndBoundary(
51         end_text.Characters16(), end_text.length(), end_position - 1);
52     if (word_end != end_position)
53       return false;
54   }
55 
56   return true;
57 }
58 
FindMatchInRange(String search_text,PositionInFlatTree search_start,PositionInFlatTree search_end)59 EphemeralRangeInFlatTree FindMatchInRange(String search_text,
60                                           PositionInFlatTree search_start,
61                                           PositionInFlatTree search_end) {
62   while (search_start < search_end) {
63     const EphemeralRangeInFlatTree search_range(search_start, search_end);
64     EphemeralRangeInFlatTree potential_match = FindBuffer::FindMatchInRange(
65         search_range, search_text, kCaseInsensitive);
66 
67     if (potential_match.IsNull() || IsWholeWordMatch(potential_match))
68       return potential_match;
69 
70     search_start = potential_match.EndPosition();
71   }
72 
73   return EphemeralRangeInFlatTree();
74 }
75 
NextTextPosition(PositionInFlatTree position,PositionInFlatTree end_position)76 PositionInFlatTree NextTextPosition(PositionInFlatTree position,
77                                     PositionInFlatTree end_position) {
78   const TextIteratorBehavior options =
79       TextIteratorBehavior::Builder().SetEmitsSpaceForNbsp(true).Build();
80   CharacterIteratorInFlatTree char_it(position, end_position, options);
81 
82   for (; char_it.length(); char_it.Advance(1)) {
83     if (!IsSpaceOrNewline(char_it.CharacterAt(0)))
84       return char_it.StartPosition();
85   }
86 
87   return end_position;
88 }
89 
90 // Find and return the range of |search_text| if the first text in the search
91 // range is |search_text|, skipping over whitespace and element boundaries.
FindImmediateMatch(String search_text,PositionInFlatTree search_start,PositionInFlatTree search_end)92 EphemeralRangeInFlatTree FindImmediateMatch(String search_text,
93                                             PositionInFlatTree search_start,
94                                             PositionInFlatTree search_end) {
95   if (search_text.IsEmpty())
96     return EphemeralRangeInFlatTree();
97 
98   search_start = NextTextPosition(search_start, search_end);
99   if (search_start == search_end)
100     return EphemeralRangeInFlatTree();
101 
102   FindBuffer buffer(EphemeralRangeInFlatTree(search_start, search_end));
103 
104   // TODO(nburris): FindBuffer will search the rest of the document for a match,
105   // but we only need to check for an immediate match, so we should stop
106   // searching if there's no immediate match.
107   FindBuffer::Results match_results =
108       buffer.FindMatches(search_text, kCaseInsensitive);
109 
110   if (!match_results.IsEmpty() && match_results.front().start == 0u) {
111     FindBuffer::BufferMatchResult buffer_match = match_results.front();
112     EphemeralRangeInFlatTree match = buffer.RangeFromBufferIndex(
113         buffer_match.start, buffer_match.start + buffer_match.length);
114     if (IsWholeWordMatch(match))
115       return match;
116   }
117 
118   return EphemeralRangeInFlatTree();
119 }
120 
FindMatchInRangeWithContext(const String & search_text,const String & prefix,const String & suffix,PositionInFlatTree search_start,PositionInFlatTree search_end)121 EphemeralRangeInFlatTree FindMatchInRangeWithContext(
122     const String& search_text,
123     const String& prefix,
124     const String& suffix,
125     PositionInFlatTree search_start,
126     PositionInFlatTree search_end) {
127   while (search_start != search_end) {
128     EphemeralRangeInFlatTree potential_match;
129 
130     if (!prefix.IsEmpty()) {
131       EphemeralRangeInFlatTree prefix_match =
132           FindMatchInRange(prefix, search_start, search_end);
133 
134       // No prefix_match in remaining range
135       if (prefix_match.IsNull())
136         return EphemeralRangeInFlatTree();
137 
138       search_start = prefix_match.EndPosition();
139       potential_match =
140           FindImmediateMatch(search_text, search_start, search_end);
141 
142       // No search_text match after current prefix_match
143       if (potential_match.IsNull())
144         continue;
145     } else {
146       potential_match = FindMatchInRange(search_text, search_start, search_end);
147 
148       // No search_text match in remaining range
149       if (potential_match.IsNull())
150         return EphemeralRangeInFlatTree();
151 
152       search_start = potential_match.EndPosition();
153     }
154 
155     PositionInFlatTree suffix_start = potential_match.EndPosition();
156     DCHECK(potential_match.IsNotNull());
157     if (!suffix.IsEmpty()) {
158       EphemeralRangeInFlatTree suffix_match =
159           FindImmediateMatch(suffix, suffix_start, search_end);
160 
161       // No suffix match after current potential_match
162       if (suffix_match.IsNull())
163         continue;
164     }
165 
166     // If we reach here without a return or continue, we have a full match.
167     return potential_match;
168   }
169 
170   return EphemeralRangeInFlatTree();
171 }
172 
173 }  // namespace
174 
TextFragmentFinder(Client & client,const TextFragmentSelector & selector)175 TextFragmentFinder::TextFragmentFinder(Client& client,
176                                        const TextFragmentSelector& selector)
177     : client_(client), selector_(selector) {
178   DCHECK(!selector_.Start().IsEmpty());
179 }
180 
FindMatch(Document & document)181 void TextFragmentFinder::FindMatch(Document& document) {
182   PositionInFlatTree search_start =
183       PositionInFlatTree::FirstPositionInNode(document);
184 
185   auto forced_lock_scope = document.GetScopedForceActivatableLocks();
186   document.UpdateStyleAndLayout(DocumentUpdateReason::kFindInPage);
187 
188   EphemeralRangeInFlatTree match =
189       FindMatchFromPosition(document, search_start);
190 
191   if (match.IsNotNull()) {
192     client_.DidFindMatch(match);
193 
194     // Continue searching to see if we have an ambiguous selector.
195     // TODO(crbug.com/919204): This is temporary and only for measuring
196     // ambiguous matching during prototyping.
197     EphemeralRangeInFlatTree ambiguous_match =
198         FindMatchFromPosition(document, match.EndPosition());
199     if (ambiguous_match.IsNotNull())
200       client_.DidFindAmbiguousMatch();
201   }
202 }
203 
FindMatchFromPosition(Document & document,PositionInFlatTree search_start)204 EphemeralRangeInFlatTree TextFragmentFinder::FindMatchFromPosition(
205     Document& document,
206     PositionInFlatTree search_start) {
207   PositionInFlatTree search_end;
208   if (document.documentElement() && document.documentElement()->lastChild()) {
209     search_end =
210         PositionInFlatTree::AfterNode(*document.documentElement()->lastChild());
211   } else {
212     search_end = PositionInFlatTree::LastPositionInNode(document);
213   }
214 
215   // TODO(crbug.com/930156): Make FindMatch work asynchronously.
216   EphemeralRangeInFlatTree match;
217   if (selector_.Type() == TextFragmentSelector::kExact) {
218     match = FindMatchInRangeWithContext(selector_.Start(), selector_.Prefix(),
219                                         selector_.Suffix(), search_start,
220                                         search_end);
221   } else {
222     EphemeralRangeInFlatTree start_match =
223         FindMatchInRangeWithContext(selector_.Start(), selector_.Prefix(),
224                                     kNoContext, search_start, search_end);
225     if (start_match.IsNull())
226       return start_match;
227 
228     // TODO(crbug.com/924964): Determine what we should do if the start text and
229     // end text are the same (and there are no context terms). This
230     // implementation continues searching for the next instance of the text,
231     // from the end of the first instance.
232     search_start = start_match.EndPosition();
233     EphemeralRangeInFlatTree end_match = FindMatchInRangeWithContext(
234         selector_.End(), kNoContext, selector_.Suffix(), search_start,
235         search_end);
236     if (end_match.IsNotNull()) {
237       match = EphemeralRangeInFlatTree(start_match.StartPosition(),
238                                        end_match.EndPosition());
239     }
240   }
241 
242   return match;
243 }
244 
245 }  // namespace blink
246