1 // Copyright 2017 The xi-editor Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 //! Implementation of string finding in ropes.
16 
17 use std::cmp::min;
18 
19 use memchr::{memchr, memchr2, memchr3};
20 
21 use crate::rope::BaseMetric;
22 use crate::rope::LinesRaw;
23 use crate::rope::RopeInfo;
24 use crate::tree::Cursor;
25 use regex::Regex;
26 use std::borrow::Cow;
27 use std::iter::FromIterator;
28 use std::str;
29 
30 /// The result of a [`find`][find] operation.
31 ///
32 /// [find]: fn.find.html
33 pub enum FindResult {
34     /// The pattern was found at this position.
35     Found(usize),
36     /// The pattern was not found.
37     NotFound,
38     /// The cursor has been advanced by some amount. The pattern is not
39     /// found before the new cursor, but may be at or beyond it.
40     TryAgain,
41 }
42 
43 /// A policy for case matching. There may be more choices in the future (for
44 /// example, an even more forgiving mode that ignores accents, or possibly
45 /// handling Unicode normalization).
46 #[derive(Clone, Copy, PartialEq)]
47 pub enum CaseMatching {
48     /// Require an exact codepoint-for-codepoint match (implies case sensitivity).
49     Exact,
50     /// Case insensitive match. Guaranteed to work for the ASCII case, and
51     /// reasonably well otherwise (it is currently defined in terms of the
52     /// `to_lowercase` methods in the Rust standard library).
53     CaseInsensitive,
54 }
55 
56 /// Finds a pattern string in the rope referenced by the cursor, starting at
57 /// the current location of the cursor (and finding the first match). Both
58 /// case sensitive and case insensitive matching is provided, controlled by
59 /// the `cm` parameter. The `regex` parameter controls whether the query
60 /// should be considered as a regular expression.
61 ///
62 /// On success, the cursor is updated to immediately follow the found string.
63 /// On failure, the cursor's position is indeterminate.
64 ///
65 /// Can panic if `pat` is empty.
find( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, cm: CaseMatching, pat: &str, regex: Option<&Regex>, ) -> Option<usize>66 pub fn find(
67     cursor: &mut Cursor<RopeInfo>,
68     lines: &mut LinesRaw,
69     cm: CaseMatching,
70     pat: &str,
71     regex: Option<&Regex>,
72 ) -> Option<usize> {
73     match find_progress(cursor, lines, cm, pat, usize::max_value(), regex) {
74         FindResult::Found(start) => Some(start),
75         FindResult::NotFound => None,
76         FindResult::TryAgain => unreachable!("find_progress got stuck"),
77     }
78 }
79 
80 /// A variant of [`find`][find] that makes a bounded amount of progress, then either
81 /// returns or suspends (returning `TryAgain`).
82 ///
83 /// The `num_steps` parameter controls the number of "steps" processed per
84 /// call. The unit of "step" is not formally defined but is typically
85 /// scanning one leaf (using a memchr-like scan) or testing one candidate
86 /// when scanning produces a result. It should be empirically tuned for a
87 /// balance between overhead and impact on interactive performance, but the
88 /// exact value is probably not critical.
89 ///
90 /// [find]: fn.find.html
find_progress( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, cm: CaseMatching, pat: &str, num_steps: usize, regex: Option<&Regex>, ) -> FindResult91 pub fn find_progress(
92     cursor: &mut Cursor<RopeInfo>,
93     lines: &mut LinesRaw,
94     cm: CaseMatching,
95     pat: &str,
96     num_steps: usize,
97     regex: Option<&Regex>,
98 ) -> FindResult {
99     // empty search string
100     if pat.is_empty() {
101         return FindResult::NotFound;
102     }
103 
104     match regex {
105         Some(r) => find_progress_iter(
106             cursor,
107             lines,
108             pat,
109             |_| Some(0),
110             |cursor, lines, pat| compare_cursor_regex(cursor, lines, pat, &r),
111             num_steps,
112         ),
113         None => {
114             match cm {
115                 CaseMatching::Exact => {
116                     let b = pat.as_bytes()[0];
117                     let scanner = |s: &str| memchr(b, s.as_bytes());
118                     let matcher = compare_cursor_str;
119                     find_progress_iter(cursor, lines, pat, scanner, matcher, num_steps)
120                 }
121                 CaseMatching::CaseInsensitive => {
122                     let pat_lower = pat.to_lowercase();
123                     let b = pat_lower.as_bytes()[0];
124                     let matcher = compare_cursor_str_casei;
125                     if b == b'i' {
126                         // 0xC4 is first utf-8 byte of 'İ'
127                         let scanner = |s: &str| memchr3(b'i', b'I', 0xC4, s.as_bytes());
128                         find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
129                     } else if b == b'k' {
130                         // 0xE2 is first utf-8 byte of u+212A (kelvin sign)
131                         let scanner = |s: &str| memchr3(b'k', b'K', 0xE2, s.as_bytes());
132                         find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
133                     } else if b >= b'a' && b <= b'z' {
134                         let scanner = |s: &str| memchr2(b, b - 0x20, s.as_bytes());
135                         find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
136                     } else if b < 0x80 {
137                         let scanner = |s: &str| memchr(b, s.as_bytes());
138                         find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
139                     } else {
140                         let c = pat.chars().next().unwrap();
141                         let scanner = |s: &str| scan_lowercase(c, s);
142                         find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
143                     }
144                 }
145             }
146         }
147     }
148 }
149 
150 // Run the core repeatedly until there is a result, up to a certain number of steps.
find_progress_iter( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, pat: &str, scanner: impl Fn(&str) -> Option<usize>, matcher: impl Fn(&mut Cursor<RopeInfo>, &mut LinesRaw, &str) -> Option<usize>, num_steps: usize, ) -> FindResult151 fn find_progress_iter(
152     cursor: &mut Cursor<RopeInfo>,
153     lines: &mut LinesRaw,
154     pat: &str,
155     scanner: impl Fn(&str) -> Option<usize>,
156     matcher: impl Fn(&mut Cursor<RopeInfo>, &mut LinesRaw, &str) -> Option<usize>,
157     num_steps: usize,
158 ) -> FindResult {
159     for _ in 0..num_steps {
160         match find_core(cursor, lines, pat, &scanner, &matcher) {
161             FindResult::TryAgain => (),
162             result => return result,
163         }
164     }
165     FindResult::TryAgain
166 }
167 
168 // The core of the find algorithm. It takes a "scanner", which quickly
169 // scans through a single leaf searching for some prefix of the pattern,
170 // then a "matcher" which confirms that such a candidate actually matches
171 // in the full rope.
find_core( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, pat: &str, scanner: impl Fn(&str) -> Option<usize>, matcher: impl Fn(&mut Cursor<RopeInfo>, &mut LinesRaw, &str) -> Option<usize>, ) -> FindResult172 fn find_core(
173     cursor: &mut Cursor<RopeInfo>,
174     lines: &mut LinesRaw,
175     pat: &str,
176     scanner: impl Fn(&str) -> Option<usize>,
177     matcher: impl Fn(&mut Cursor<RopeInfo>, &mut LinesRaw, &str) -> Option<usize>,
178 ) -> FindResult {
179     let orig_pos = cursor.pos();
180 
181     // if cursor reached the end of the text then no match has been found
182     if orig_pos == cursor.total_len() {
183         return FindResult::NotFound;
184     }
185 
186     if let Some((leaf, pos_in_leaf)) = cursor.get_leaf() {
187         if let Some(off) = scanner(&leaf[pos_in_leaf..]) {
188             let candidate_pos = orig_pos + off;
189             cursor.set(candidate_pos);
190             if let Some(actual_pos) = matcher(cursor, lines, pat) {
191                 return FindResult::Found(actual_pos);
192             }
193         } else {
194             let _ = cursor.next_leaf();
195         }
196 
197         FindResult::TryAgain
198     } else {
199         FindResult::NotFound
200     }
201 }
202 
203 /// Compare whether the substring beginning at the current cursor location
204 /// is equal to the provided string. Leaves the cursor at an indeterminate
205 /// position on failure, but the end of the string on success. Returns the
206 /// start position of the match.
compare_cursor_str( cursor: &mut Cursor<RopeInfo>, _lines: &mut LinesRaw, mut pat: &str, ) -> Option<usize>207 pub fn compare_cursor_str(
208     cursor: &mut Cursor<RopeInfo>,
209     _lines: &mut LinesRaw,
210     mut pat: &str,
211 ) -> Option<usize> {
212     let start_position = cursor.pos();
213     if pat.is_empty() {
214         return Some(start_position);
215     }
216     let success_pos = cursor.pos() + pat.len();
217     while let Some((leaf, pos_in_leaf)) = cursor.get_leaf() {
218         let n = min(pat.len(), leaf.len() - pos_in_leaf);
219         if leaf.as_bytes()[pos_in_leaf..pos_in_leaf + n] != pat.as_bytes()[..n] {
220             cursor.set(start_position);
221             cursor.next::<BaseMetric>();
222             return None;
223         }
224         pat = &pat[n..];
225         if pat.is_empty() {
226             cursor.set(success_pos);
227             return Some(start_position);
228         }
229         let _ = cursor.next_leaf();
230     }
231     cursor.set(start_position);
232     cursor.next::<BaseMetric>();
233     None
234 }
235 
236 /// Like `compare_cursor_str` but case invariant (using to_lowercase() to
237 /// normalize both strings before comparison). Returns the start position
238 /// of the match.
compare_cursor_str_casei( cursor: &mut Cursor<RopeInfo>, _lines: &mut LinesRaw, pat: &str, ) -> Option<usize>239 pub fn compare_cursor_str_casei(
240     cursor: &mut Cursor<RopeInfo>,
241     _lines: &mut LinesRaw,
242     pat: &str,
243 ) -> Option<usize> {
244     let start_position = cursor.pos();
245     let mut pat_iter = pat.chars();
246     let mut c = pat_iter.next().unwrap();
247     loop {
248         if let Some(rope_c) = cursor.next_codepoint() {
249             for lc_c in rope_c.to_lowercase() {
250                 if c != lc_c {
251                     cursor.set(start_position);
252                     cursor.next::<BaseMetric>();
253                     return None;
254                 }
255                 if let Some(next_c) = pat_iter.next() {
256                     c = next_c;
257                 } else {
258                     return Some(start_position);
259                 }
260             }
261         } else {
262             // end of string before pattern is complete
263             cursor.set(start_position);
264             cursor.next::<BaseMetric>();
265             return None;
266         }
267     }
268 }
269 
270 /// Compare whether the substring beginning at the cursor location matches
271 /// the provided regular expression. The substring begins at the beginning
272 /// of the start of the line.
273 /// If the regular expression can match multiple lines then the entire text
274 /// is consumed and matched against the regular expression. Otherwise only
275 /// the current line is matched. Returns the start position of the match.
compare_cursor_regex( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, pat: &str, regex: &Regex, ) -> Option<usize>276 pub fn compare_cursor_regex(
277     cursor: &mut Cursor<RopeInfo>,
278     lines: &mut LinesRaw,
279     pat: &str,
280     regex: &Regex,
281 ) -> Option<usize> {
282     let orig_position = cursor.pos();
283 
284     if pat.is_empty() {
285         return Some(orig_position);
286     }
287 
288     let text: Cow<str>;
289 
290     if is_multiline_regex(pat) {
291         // consume all of the text if regex is multi line matching
292         text = Cow::from(String::from_iter(lines));
293     } else {
294         match lines.next() {
295             Some(line) => text = line,
296             _ => return None,
297         }
298     }
299 
300     // match regex against text
301     match regex.find(&text) {
302         Some(mat) => {
303             // calculate start position based on where the match starts
304             let start_position = orig_position + mat.start();
305 
306             // update cursor and set to end of match
307             let end_position = orig_position + mat.end();
308             cursor.set(end_position);
309             Some(start_position)
310         }
311         None => {
312             cursor.set(orig_position + text.len());
313             None
314         }
315     }
316 }
317 
318 /// Checks if a regular expression can match multiple lines.
is_multiline_regex(regex: &str) -> bool319 pub fn is_multiline_regex(regex: &str) -> bool {
320     // regex characters that match line breaks
321     // todo: currently multiline mode is ignored
322     let multiline_indicators = vec![r"\n", r"\r", r"[[:space:]]"];
323 
324     multiline_indicators.iter().any(|&i| regex.contains(i))
325 }
326 
327 /// Scan for a codepoint that, after conversion to lowercase, matches the probe.
scan_lowercase(probe: char, s: &str) -> Option<usize>328 fn scan_lowercase(probe: char, s: &str) -> Option<usize> {
329     for (i, c) in s.char_indices() {
330         if c.to_lowercase().next().unwrap() == probe {
331             return Some(i);
332         }
333     }
334     None
335 }
336 
337 #[cfg(test)]
338 mod tests {
339     use super::CaseMatching::{CaseInsensitive, Exact};
340     use super::*;
341     use crate::rope::Rope;
342     use crate::tree::Cursor;
343     use regex::RegexBuilder;
344 
345     const REGEX_SIZE_LIMIT: usize = 1000000;
346 
347     #[test]
find_small()348     fn find_small() {
349         let a = Rope::from("Löwe 老虎 Léopard");
350         let mut c = Cursor::new(&a, 0);
351         let mut raw_lines = a.lines_raw(..);
352         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), Some(0));
353         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), Some(13));
354         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), None);
355         c.set(0);
356         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", None), Some(13));
357         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", None), None);
358         c.set(0);
359         // Note: these two characters both start with 0xE8 in utf-8
360         assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", None), Some(6));
361         assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", None), None);
362         c.set(0);
363         assert_eq!(find(&mut c, &mut raw_lines, Exact, "虎", None), Some(9));
364         assert_eq!(find(&mut c, &mut raw_lines, Exact, "虎", None), None);
365         c.set(0);
366         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Tiger", None), None);
367     }
368 
369     #[test]
find_medium()370     fn find_medium() {
371         let mut s = String::new();
372         for _ in 0..4000 {
373             s.push('x');
374         }
375         s.push_str("Löwe 老虎 Léopard");
376         let a = Rope::from(&s);
377         let mut c = Cursor::new(&a, 0);
378         let mut raw_lines = a.lines_raw(0..a.len());
379         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), Some(4000));
380         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), Some(4013));
381         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), None);
382         c.set(0);
383         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", None), Some(4013));
384         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", None), None);
385         c.set(0);
386         assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", None), Some(4006));
387         assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", None), None);
388         c.set(0);
389         assert_eq!(find(&mut c, &mut raw_lines, Exact, "虎", None), Some(4009));
390         assert_eq!(find(&mut c, &mut raw_lines, Exact, "虎", None), None);
391         c.set(0);
392         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Tiger", None), None);
393     }
394 
395     #[test]
find_casei_small()396     fn find_casei_small() {
397         let a = Rope::from("Löwe 老虎 Léopard");
398         let mut c = Cursor::new(&a, 0);
399         let mut raw_lines = a.lines_raw(0..a.len());
400         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "l", None), Some(0));
401         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "l", None), Some(13));
402         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "l", None), None);
403         c.set(0);
404         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "léopard", None), Some(13));
405         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "léopard", None), None);
406         c.set(0);
407         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "LÉOPARD", None), Some(13));
408         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "LÉOPARD", None), None);
409         c.set(0);
410         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", None), Some(6));
411         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", None), None);
412         c.set(0);
413         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Tiger", None), None);
414     }
415 
416     #[test]
find_casei_ascii_nonalpha()417     fn find_casei_ascii_nonalpha() {
418         let a = Rope::from("![cfg(test)]");
419         let mut c = Cursor::new(&a, 0);
420         let mut raw_lines = a.lines_raw(0..a.len());
421         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "(test)", None), Some(5));
422         c.set(0);
423         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "(TEST)", None), Some(5));
424     }
425 
426     #[test]
find_casei_special()427     fn find_casei_special() {
428         let a = Rope::from("İ");
429         let mut c = Cursor::new(&a, 0);
430         let mut raw_lines = a.lines_raw(0..a.len());
431         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "i̇", None), Some(0));
432 
433         let a = Rope::from("i̇");
434         let mut c = Cursor::new(&a, 0);
435         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "İ", None), Some(0));
436 
437         let a = Rope::from("\u{212A}");
438         let mut c = Cursor::new(&a, 0);
439         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "k", None), Some(0));
440 
441         let a = Rope::from("k");
442         let mut c = Cursor::new(&a, 0);
443         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "\u{212A}", None), Some(0));
444     }
445 
446     #[test]
find_casei_0xc4()447     fn find_casei_0xc4() {
448         let a = Rope::from("\u{0100}I");
449         let mut c = Cursor::new(&a, 0);
450         let mut raw_lines = a.lines_raw(0..a.len());
451         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "i", None), Some(2));
452     }
453 
454     #[test]
find_regex_small_casei()455     fn find_regex_small_casei() {
456         let a = Rope::from("Löwe 老虎 Léopard\nSecond line");
457         let mut c = Cursor::new(&a, 0);
458         let mut raw_lines = a.lines_raw(0..a.len());
459         let regex =
460             RegexBuilder::new("L").size_limit(REGEX_SIZE_LIMIT).case_insensitive(true).build().ok();
461         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(0));
462         raw_lines = a.lines_raw(c.pos()..a.len());
463         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(13));
464         raw_lines = a.lines_raw(c.pos()..a.len());
465         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(29));
466         c.set(0);
467         let regex = RegexBuilder::new("Léopard")
468             .size_limit(REGEX_SIZE_LIMIT)
469             .case_insensitive(true)
470             .build()
471             .ok();
472         let mut raw_lines = a.lines_raw(0..a.len());
473         assert_eq!(
474             find(&mut c, &mut raw_lines, CaseInsensitive, "Léopard", regex.as_ref()),
475             Some(13)
476         );
477         raw_lines = a.lines_raw(c.pos()..a.len());
478         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Léopard", regex.as_ref()), None);
479         c.set(0);
480         let mut raw_lines = a.lines_raw(0..a.len());
481         let regex = RegexBuilder::new("老虎")
482             .size_limit(REGEX_SIZE_LIMIT)
483             .case_insensitive(true)
484             .build()
485             .ok();
486         assert_eq!(
487             find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", regex.as_ref()),
488             Some(6)
489         );
490         raw_lines = a.lines_raw(c.pos()..a.len());
491         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", regex.as_ref()), None);
492         c.set(0);
493         let regex = RegexBuilder::new("Tiger")
494             .size_limit(REGEX_SIZE_LIMIT)
495             .case_insensitive(true)
496             .build()
497             .ok();
498         let mut raw_lines = a.lines_raw(0..a.len());
499         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Tiger", regex.as_ref()), None);
500         c.set(0);
501         let regex =
502             RegexBuilder::new(".").size_limit(REGEX_SIZE_LIMIT).case_insensitive(true).build().ok();
503         let mut raw_lines = a.lines_raw(0..a.len());
504         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, ".", regex.as_ref()), Some(0));
505         raw_lines = a.lines_raw(c.pos()..a.len());
506         let regex = RegexBuilder::new("\\s")
507             .size_limit(REGEX_SIZE_LIMIT)
508             .case_insensitive(true)
509             .build()
510             .ok();
511         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "\\s", regex.as_ref()), Some(5));
512         raw_lines = a.lines_raw(c.pos()..a.len());
513         let regex = RegexBuilder::new("\\sLéopard\n.*")
514             .size_limit(REGEX_SIZE_LIMIT)
515             .case_insensitive(true)
516             .build()
517             .ok();
518         assert_eq!(
519             find(&mut c, &mut raw_lines, CaseInsensitive, "\\sLéopard\n.*", regex.as_ref()),
520             Some(12)
521         );
522     }
523 
524     #[test]
find_regex_small()525     fn find_regex_small() {
526         let a = Rope::from("Löwe 老虎 Léopard\nSecond line");
527         let mut c = Cursor::new(&a, 0);
528         let mut raw_lines = a.lines_raw(0..a.len());
529         let regex = RegexBuilder::new("L")
530             .size_limit(REGEX_SIZE_LIMIT)
531             .case_insensitive(false)
532             .build()
533             .ok();
534         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", regex.as_ref()), Some(0));
535         raw_lines = a.lines_raw(c.pos()..a.len());
536         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", regex.as_ref()), Some(13));
537         raw_lines = a.lines_raw(c.pos()..a.len());
538         assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", regex.as_ref()), None);
539         c.set(0);
540         let regex = RegexBuilder::new("Léopard")
541             .size_limit(REGEX_SIZE_LIMIT)
542             .case_insensitive(false)
543             .build()
544             .ok();
545         let mut raw_lines = a.lines_raw(0..a.len());
546         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", regex.as_ref()), Some(13));
547         raw_lines = a.lines_raw(c.pos()..a.len());
548         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", regex.as_ref()), None);
549         c.set(0);
550         let regex = RegexBuilder::new("老虎")
551             .size_limit(REGEX_SIZE_LIMIT)
552             .case_insensitive(false)
553             .build()
554             .ok();
555         let mut raw_lines = a.lines_raw(0..a.len());
556         assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", regex.as_ref()), Some(6));
557         raw_lines = a.lines_raw(c.pos()..a.len());
558         assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", regex.as_ref()), None);
559         c.set(0);
560         let regex = RegexBuilder::new("Tiger")
561             .size_limit(REGEX_SIZE_LIMIT)
562             .case_insensitive(false)
563             .build()
564             .ok();
565         let mut raw_lines = a.lines_raw(0..a.len());
566         assert_eq!(find(&mut c, &mut raw_lines, Exact, "Tiger", regex.as_ref()), None);
567         c.set(0);
568         let regex = RegexBuilder::new(".")
569             .size_limit(REGEX_SIZE_LIMIT)
570             .case_insensitive(false)
571             .build()
572             .ok();
573         let mut raw_lines = a.lines_raw(0..a.len());
574         assert_eq!(find(&mut c, &mut raw_lines, Exact, ".", regex.as_ref()), Some(0));
575         raw_lines = a.lines_raw(c.pos()..a.len());
576         let regex = RegexBuilder::new("\\s")
577             .size_limit(REGEX_SIZE_LIMIT)
578             .case_insensitive(false)
579             .build()
580             .ok();
581         assert_eq!(find(&mut c, &mut raw_lines, Exact, "\\s", regex.as_ref()), Some(5));
582         raw_lines = a.lines_raw(c.pos()..a.len());
583         let regex = RegexBuilder::new("\\sLéopard\n.*")
584             .size_limit(REGEX_SIZE_LIMIT)
585             .case_insensitive(false)
586             .build()
587             .ok();
588         assert_eq!(
589             find(&mut c, &mut raw_lines, Exact, "\\sLéopard\n.*", regex.as_ref()),
590             Some(12)
591         );
592     }
593 
594     #[test]
find_regex_medium()595     fn find_regex_medium() {
596         let mut s = String::new();
597         for _ in 0..4000 {
598             s.push('x');
599         }
600         s.push_str("Löwe 老虎 Léopard\nSecond line");
601         let a = Rope::from(&s);
602         let mut c = Cursor::new(&a, 0);
603         let mut raw_lines = a.lines_raw(0..a.len());
604         let regex =
605             RegexBuilder::new("L").size_limit(REGEX_SIZE_LIMIT).case_insensitive(true).build().ok();
606         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(4000));
607         raw_lines = a.lines_raw(c.pos()..a.len());
608         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(4013));
609         raw_lines = a.lines_raw(c.pos()..a.len());
610         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(4029));
611         c.set(0);
612         let mut raw_lines = a.lines_raw(0..a.len());
613         let regex = RegexBuilder::new("Léopard")
614             .size_limit(REGEX_SIZE_LIMIT)
615             .case_insensitive(true)
616             .build()
617             .ok();
618         assert_eq!(
619             find(&mut c, &mut raw_lines, CaseInsensitive, "Léopard", regex.as_ref()),
620             Some(4013)
621         );
622         raw_lines = a.lines_raw(c.pos()..a.len());
623         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Léopard", regex.as_ref()), None);
624         c.set(0);
625         let regex = RegexBuilder::new("老虎")
626             .size_limit(REGEX_SIZE_LIMIT)
627             .case_insensitive(true)
628             .build()
629             .ok();
630         let mut raw_lines = a.lines_raw(0..a.len());
631         assert_eq!(
632             find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", regex.as_ref()),
633             Some(4006)
634         );
635         raw_lines = a.lines_raw(c.pos()..a.len());
636         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", regex.as_ref()), None);
637         c.set(0);
638         let regex = RegexBuilder::new("Tiger")
639             .size_limit(REGEX_SIZE_LIMIT)
640             .case_insensitive(true)
641             .build()
642             .ok();
643         let mut raw_lines = a.lines_raw(0..a.len());
644         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Tiger", regex.as_ref()), None);
645         c.set(0);
646         let mut raw_lines = a.lines_raw(0..a.len());
647         let regex =
648             RegexBuilder::new(".").size_limit(REGEX_SIZE_LIMIT).case_insensitive(true).build().ok();
649         assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, ".", regex.as_ref()), Some(0));
650         raw_lines = a.lines_raw(c.pos()..a.len());
651         let regex = RegexBuilder::new("\\s")
652             .size_limit(REGEX_SIZE_LIMIT)
653             .case_insensitive(true)
654             .build()
655             .ok();
656         assert_eq!(
657             find(&mut c, &mut raw_lines, CaseInsensitive, "\\s", regex.as_ref()),
658             Some(4005)
659         );
660         raw_lines = a.lines_raw(c.pos()..a.len());
661         let regex = RegexBuilder::new("\\sLéopard\n.*")
662             .size_limit(REGEX_SIZE_LIMIT)
663             .case_insensitive(true)
664             .build()
665             .ok();
666         assert_eq!(
667             find(&mut c, &mut raw_lines, CaseInsensitive, "\\sLéopard\n.*", regex.as_ref()),
668             Some(4012)
669         );
670     }
671 
672     #[test]
compare_cursor_regex_singleline()673     fn compare_cursor_regex_singleline() {
674         let regex = Regex::new(r"^(\*+)(?: +(.*?))?[ \t]*$").unwrap();
675         let rope = Rope::from("** level 2 headline");
676         let mut c = Cursor::new(&rope, 0);
677         let mut l = rope.lines_raw(c.pos()..rope.len());
678         assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), &regex).is_some());
679 
680         c.set(3);
681         l = rope.lines_raw(c.pos()..rope.len());
682         assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), &regex).is_none());
683     }
684 
685     #[test]
compare_cursor_regex_multiline()686     fn compare_cursor_regex_multiline() {
687         let regex = Regex::new(
688             r"^[ \t]*:PROPERTIES:[ \t]*\n(?:[ \t]*:\S+:(?: .*)?[ \t]*\n)*?[ \t]*:END:[ \t]*\n",
689         )
690         .unwrap();
691 
692         // taken from http://doc.norang.ca/org-mode.html#DiaryForAppointments
693         let s = "\
694                  #+FILETAGS: PERSONAL\
695                  \n* Appointments\
696                  \n  :PROPERTIES:\
697                  \n  :CATEGORY: Appt\
698                  \n  :ARCHIVE:  %s_archive::* Appointments\
699                  \n  :END:\
700                  \n** Holidays\
701                  \n   :PROPERTIES:\
702                  \n   :Category: Holiday\
703                  \n   :END:\
704                  \n   %%(org-calendar-holiday)\
705                  \n** Some other Appointment\n";
706         let rope = Rope::from(s);
707         let mut c = Cursor::new(&rope, 0);
708         let mut l = rope.lines_raw(c.pos()..rope.len());
709         assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), &regex).is_none());
710 
711         // move to the next line after "* Appointments"
712         c.set(36);
713         l = rope.lines_raw(c.pos()..rope.len());
714         assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), &regex).is_some());
715         assert_eq!(117, c.pos());
716         assert_eq!(Some('*'), c.next_codepoint());
717 
718         // move to the next line after "** Holidays"
719         c.set(129);
720         l = rope.lines_raw(c.pos()..rope.len());
721         assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), &regex).is_some());
722         c.next_codepoint();
723         c.next_codepoint();
724         c.next_codepoint();
725         assert_eq!(Some('%'), c.next_codepoint());
726     }
727 
728     #[test]
compare_cursor_str_small()729     fn compare_cursor_str_small() {
730         let a = Rope::from("Löwe 老虎 Léopard");
731         let mut c = Cursor::new(&a, 0);
732         let pat = "Löwe 老虎 Léopard";
733         let mut raw_lines = a.lines_raw(0..a.len());
734         assert!(compare_cursor_str(&mut c, &mut raw_lines, pat).is_some());
735         assert_eq!(c.pos(), pat.len());
736         c.set(0);
737         let pat = "Löwe";
738         assert!(compare_cursor_str(&mut c, &mut raw_lines, pat).is_some());
739         assert_eq!(c.pos(), pat.len());
740         c.set(0);
741         // Empty string is valid for compare_cursor_str (but not find)
742         let pat = "";
743         assert!(compare_cursor_str(&mut c, &mut raw_lines, pat).is_some());
744         assert_eq!(c.pos(), pat.len());
745         c.set(0);
746         assert!(compare_cursor_str(&mut c, &mut raw_lines, "Löwe 老虎 Léopardfoo").is_none());
747     }
748 
749     #[test]
compare_cursor_str_medium()750     fn compare_cursor_str_medium() {
751         let mut s = String::new();
752         for _ in 0..4000 {
753             s.push('x');
754         }
755         s.push_str("Löwe 老虎 Léopard");
756         let a = Rope::from(&s);
757         let mut c = Cursor::new(&a, 0);
758         let mut raw_lines = a.lines_raw(0..a.len());
759         assert!(compare_cursor_str(&mut c, &mut raw_lines, &s).is_some());
760         c.set(2000);
761         assert!(compare_cursor_str(&mut c, &mut raw_lines, &s[2000..]).is_some());
762     }
763 }
764