1 // Copyright 2017 The xi-editor Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 //! Implementation of string finding in ropes.
16
17 use std::cmp::min;
18
19 use memchr::{memchr, memchr2, memchr3};
20
21 use crate::rope::BaseMetric;
22 use crate::rope::LinesRaw;
23 use crate::rope::RopeInfo;
24 use crate::tree::Cursor;
25 use regex::Regex;
26 use std::borrow::Cow;
27 use std::iter::FromIterator;
28 use std::str;
29
30 /// The result of a [`find`][find] operation.
31 ///
32 /// [find]: fn.find.html
33 pub enum FindResult {
34 /// The pattern was found at this position.
35 Found(usize),
36 /// The pattern was not found.
37 NotFound,
38 /// The cursor has been advanced by some amount. The pattern is not
39 /// found before the new cursor, but may be at or beyond it.
40 TryAgain,
41 }
42
43 /// A policy for case matching. There may be more choices in the future (for
44 /// example, an even more forgiving mode that ignores accents, or possibly
45 /// handling Unicode normalization).
46 #[derive(Clone, Copy, PartialEq)]
47 pub enum CaseMatching {
48 /// Require an exact codepoint-for-codepoint match (implies case sensitivity).
49 Exact,
50 /// Case insensitive match. Guaranteed to work for the ASCII case, and
51 /// reasonably well otherwise (it is currently defined in terms of the
52 /// `to_lowercase` methods in the Rust standard library).
53 CaseInsensitive,
54 }
55
56 /// Finds a pattern string in the rope referenced by the cursor, starting at
57 /// the current location of the cursor (and finding the first match). Both
58 /// case sensitive and case insensitive matching is provided, controlled by
59 /// the `cm` parameter. The `regex` parameter controls whether the query
60 /// should be considered as a regular expression.
61 ///
62 /// On success, the cursor is updated to immediately follow the found string.
63 /// On failure, the cursor's position is indeterminate.
64 ///
65 /// Can panic if `pat` is empty.
find( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, cm: CaseMatching, pat: &str, regex: Option<&Regex>, ) -> Option<usize>66 pub fn find(
67 cursor: &mut Cursor<RopeInfo>,
68 lines: &mut LinesRaw,
69 cm: CaseMatching,
70 pat: &str,
71 regex: Option<&Regex>,
72 ) -> Option<usize> {
73 match find_progress(cursor, lines, cm, pat, usize::max_value(), regex) {
74 FindResult::Found(start) => Some(start),
75 FindResult::NotFound => None,
76 FindResult::TryAgain => unreachable!("find_progress got stuck"),
77 }
78 }
79
80 /// A variant of [`find`][find] that makes a bounded amount of progress, then either
81 /// returns or suspends (returning `TryAgain`).
82 ///
83 /// The `num_steps` parameter controls the number of "steps" processed per
84 /// call. The unit of "step" is not formally defined but is typically
85 /// scanning one leaf (using a memchr-like scan) or testing one candidate
86 /// when scanning produces a result. It should be empirically tuned for a
87 /// balance between overhead and impact on interactive performance, but the
88 /// exact value is probably not critical.
89 ///
90 /// [find]: fn.find.html
find_progress( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, cm: CaseMatching, pat: &str, num_steps: usize, regex: Option<&Regex>, ) -> FindResult91 pub fn find_progress(
92 cursor: &mut Cursor<RopeInfo>,
93 lines: &mut LinesRaw,
94 cm: CaseMatching,
95 pat: &str,
96 num_steps: usize,
97 regex: Option<&Regex>,
98 ) -> FindResult {
99 // empty search string
100 if pat.is_empty() {
101 return FindResult::NotFound;
102 }
103
104 match regex {
105 Some(r) => find_progress_iter(
106 cursor,
107 lines,
108 pat,
109 |_| Some(0),
110 |cursor, lines, pat| compare_cursor_regex(cursor, lines, pat, &r),
111 num_steps,
112 ),
113 None => {
114 match cm {
115 CaseMatching::Exact => {
116 let b = pat.as_bytes()[0];
117 let scanner = |s: &str| memchr(b, s.as_bytes());
118 let matcher = compare_cursor_str;
119 find_progress_iter(cursor, lines, pat, scanner, matcher, num_steps)
120 }
121 CaseMatching::CaseInsensitive => {
122 let pat_lower = pat.to_lowercase();
123 let b = pat_lower.as_bytes()[0];
124 let matcher = compare_cursor_str_casei;
125 if b == b'i' {
126 // 0xC4 is first utf-8 byte of 'İ'
127 let scanner = |s: &str| memchr3(b'i', b'I', 0xC4, s.as_bytes());
128 find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
129 } else if b == b'k' {
130 // 0xE2 is first utf-8 byte of u+212A (kelvin sign)
131 let scanner = |s: &str| memchr3(b'k', b'K', 0xE2, s.as_bytes());
132 find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
133 } else if b >= b'a' && b <= b'z' {
134 let scanner = |s: &str| memchr2(b, b - 0x20, s.as_bytes());
135 find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
136 } else if b < 0x80 {
137 let scanner = |s: &str| memchr(b, s.as_bytes());
138 find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
139 } else {
140 let c = pat.chars().next().unwrap();
141 let scanner = |s: &str| scan_lowercase(c, s);
142 find_progress_iter(cursor, lines, &pat_lower, scanner, matcher, num_steps)
143 }
144 }
145 }
146 }
147 }
148 }
149
150 // Run the core repeatedly until there is a result, up to a certain number of steps.
find_progress_iter( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, pat: &str, scanner: impl Fn(&str) -> Option<usize>, matcher: impl Fn(&mut Cursor<RopeInfo>, &mut LinesRaw, &str) -> Option<usize>, num_steps: usize, ) -> FindResult151 fn find_progress_iter(
152 cursor: &mut Cursor<RopeInfo>,
153 lines: &mut LinesRaw,
154 pat: &str,
155 scanner: impl Fn(&str) -> Option<usize>,
156 matcher: impl Fn(&mut Cursor<RopeInfo>, &mut LinesRaw, &str) -> Option<usize>,
157 num_steps: usize,
158 ) -> FindResult {
159 for _ in 0..num_steps {
160 match find_core(cursor, lines, pat, &scanner, &matcher) {
161 FindResult::TryAgain => (),
162 result => return result,
163 }
164 }
165 FindResult::TryAgain
166 }
167
168 // The core of the find algorithm. It takes a "scanner", which quickly
169 // scans through a single leaf searching for some prefix of the pattern,
170 // then a "matcher" which confirms that such a candidate actually matches
171 // in the full rope.
find_core( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, pat: &str, scanner: impl Fn(&str) -> Option<usize>, matcher: impl Fn(&mut Cursor<RopeInfo>, &mut LinesRaw, &str) -> Option<usize>, ) -> FindResult172 fn find_core(
173 cursor: &mut Cursor<RopeInfo>,
174 lines: &mut LinesRaw,
175 pat: &str,
176 scanner: impl Fn(&str) -> Option<usize>,
177 matcher: impl Fn(&mut Cursor<RopeInfo>, &mut LinesRaw, &str) -> Option<usize>,
178 ) -> FindResult {
179 let orig_pos = cursor.pos();
180
181 // if cursor reached the end of the text then no match has been found
182 if orig_pos == cursor.total_len() {
183 return FindResult::NotFound;
184 }
185
186 if let Some((leaf, pos_in_leaf)) = cursor.get_leaf() {
187 if let Some(off) = scanner(&leaf[pos_in_leaf..]) {
188 let candidate_pos = orig_pos + off;
189 cursor.set(candidate_pos);
190 if let Some(actual_pos) = matcher(cursor, lines, pat) {
191 return FindResult::Found(actual_pos);
192 }
193 } else {
194 let _ = cursor.next_leaf();
195 }
196
197 FindResult::TryAgain
198 } else {
199 FindResult::NotFound
200 }
201 }
202
203 /// Compare whether the substring beginning at the current cursor location
204 /// is equal to the provided string. Leaves the cursor at an indeterminate
205 /// position on failure, but the end of the string on success. Returns the
206 /// start position of the match.
compare_cursor_str( cursor: &mut Cursor<RopeInfo>, _lines: &mut LinesRaw, mut pat: &str, ) -> Option<usize>207 pub fn compare_cursor_str(
208 cursor: &mut Cursor<RopeInfo>,
209 _lines: &mut LinesRaw,
210 mut pat: &str,
211 ) -> Option<usize> {
212 let start_position = cursor.pos();
213 if pat.is_empty() {
214 return Some(start_position);
215 }
216 let success_pos = cursor.pos() + pat.len();
217 while let Some((leaf, pos_in_leaf)) = cursor.get_leaf() {
218 let n = min(pat.len(), leaf.len() - pos_in_leaf);
219 if leaf.as_bytes()[pos_in_leaf..pos_in_leaf + n] != pat.as_bytes()[..n] {
220 cursor.set(start_position);
221 cursor.next::<BaseMetric>();
222 return None;
223 }
224 pat = &pat[n..];
225 if pat.is_empty() {
226 cursor.set(success_pos);
227 return Some(start_position);
228 }
229 let _ = cursor.next_leaf();
230 }
231 cursor.set(start_position);
232 cursor.next::<BaseMetric>();
233 None
234 }
235
236 /// Like `compare_cursor_str` but case invariant (using to_lowercase() to
237 /// normalize both strings before comparison). Returns the start position
238 /// of the match.
compare_cursor_str_casei( cursor: &mut Cursor<RopeInfo>, _lines: &mut LinesRaw, pat: &str, ) -> Option<usize>239 pub fn compare_cursor_str_casei(
240 cursor: &mut Cursor<RopeInfo>,
241 _lines: &mut LinesRaw,
242 pat: &str,
243 ) -> Option<usize> {
244 let start_position = cursor.pos();
245 let mut pat_iter = pat.chars();
246 let mut c = pat_iter.next().unwrap();
247 loop {
248 if let Some(rope_c) = cursor.next_codepoint() {
249 for lc_c in rope_c.to_lowercase() {
250 if c != lc_c {
251 cursor.set(start_position);
252 cursor.next::<BaseMetric>();
253 return None;
254 }
255 if let Some(next_c) = pat_iter.next() {
256 c = next_c;
257 } else {
258 return Some(start_position);
259 }
260 }
261 } else {
262 // end of string before pattern is complete
263 cursor.set(start_position);
264 cursor.next::<BaseMetric>();
265 return None;
266 }
267 }
268 }
269
270 /// Compare whether the substring beginning at the cursor location matches
271 /// the provided regular expression. The substring begins at the beginning
272 /// of the start of the line.
273 /// If the regular expression can match multiple lines then the entire text
274 /// is consumed and matched against the regular expression. Otherwise only
275 /// the current line is matched. Returns the start position of the match.
compare_cursor_regex( cursor: &mut Cursor<RopeInfo>, lines: &mut LinesRaw, pat: &str, regex: &Regex, ) -> Option<usize>276 pub fn compare_cursor_regex(
277 cursor: &mut Cursor<RopeInfo>,
278 lines: &mut LinesRaw,
279 pat: &str,
280 regex: &Regex,
281 ) -> Option<usize> {
282 let orig_position = cursor.pos();
283
284 if pat.is_empty() {
285 return Some(orig_position);
286 }
287
288 let text: Cow<str>;
289
290 if is_multiline_regex(pat) {
291 // consume all of the text if regex is multi line matching
292 text = Cow::from(String::from_iter(lines));
293 } else {
294 match lines.next() {
295 Some(line) => text = line,
296 _ => return None,
297 }
298 }
299
300 // match regex against text
301 match regex.find(&text) {
302 Some(mat) => {
303 // calculate start position based on where the match starts
304 let start_position = orig_position + mat.start();
305
306 // update cursor and set to end of match
307 let end_position = orig_position + mat.end();
308 cursor.set(end_position);
309 Some(start_position)
310 }
311 None => {
312 cursor.set(orig_position + text.len());
313 None
314 }
315 }
316 }
317
318 /// Checks if a regular expression can match multiple lines.
is_multiline_regex(regex: &str) -> bool319 pub fn is_multiline_regex(regex: &str) -> bool {
320 // regex characters that match line breaks
321 // todo: currently multiline mode is ignored
322 let multiline_indicators = vec![r"\n", r"\r", r"[[:space:]]"];
323
324 multiline_indicators.iter().any(|&i| regex.contains(i))
325 }
326
327 /// Scan for a codepoint that, after conversion to lowercase, matches the probe.
scan_lowercase(probe: char, s: &str) -> Option<usize>328 fn scan_lowercase(probe: char, s: &str) -> Option<usize> {
329 for (i, c) in s.char_indices() {
330 if c.to_lowercase().next().unwrap() == probe {
331 return Some(i);
332 }
333 }
334 None
335 }
336
337 #[cfg(test)]
338 mod tests {
339 use super::CaseMatching::{CaseInsensitive, Exact};
340 use super::*;
341 use crate::rope::Rope;
342 use crate::tree::Cursor;
343 use regex::RegexBuilder;
344
345 const REGEX_SIZE_LIMIT: usize = 1000000;
346
347 #[test]
find_small()348 fn find_small() {
349 let a = Rope::from("Löwe 老虎 Léopard");
350 let mut c = Cursor::new(&a, 0);
351 let mut raw_lines = a.lines_raw(..);
352 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), Some(0));
353 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), Some(13));
354 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), None);
355 c.set(0);
356 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", None), Some(13));
357 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", None), None);
358 c.set(0);
359 // Note: these two characters both start with 0xE8 in utf-8
360 assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", None), Some(6));
361 assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", None), None);
362 c.set(0);
363 assert_eq!(find(&mut c, &mut raw_lines, Exact, "虎", None), Some(9));
364 assert_eq!(find(&mut c, &mut raw_lines, Exact, "虎", None), None);
365 c.set(0);
366 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Tiger", None), None);
367 }
368
369 #[test]
find_medium()370 fn find_medium() {
371 let mut s = String::new();
372 for _ in 0..4000 {
373 s.push('x');
374 }
375 s.push_str("Löwe 老虎 Léopard");
376 let a = Rope::from(&s);
377 let mut c = Cursor::new(&a, 0);
378 let mut raw_lines = a.lines_raw(0..a.len());
379 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), Some(4000));
380 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), Some(4013));
381 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", None), None);
382 c.set(0);
383 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", None), Some(4013));
384 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", None), None);
385 c.set(0);
386 assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", None), Some(4006));
387 assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", None), None);
388 c.set(0);
389 assert_eq!(find(&mut c, &mut raw_lines, Exact, "虎", None), Some(4009));
390 assert_eq!(find(&mut c, &mut raw_lines, Exact, "虎", None), None);
391 c.set(0);
392 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Tiger", None), None);
393 }
394
395 #[test]
find_casei_small()396 fn find_casei_small() {
397 let a = Rope::from("Löwe 老虎 Léopard");
398 let mut c = Cursor::new(&a, 0);
399 let mut raw_lines = a.lines_raw(0..a.len());
400 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "l", None), Some(0));
401 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "l", None), Some(13));
402 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "l", None), None);
403 c.set(0);
404 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "léopard", None), Some(13));
405 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "léopard", None), None);
406 c.set(0);
407 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "LÉOPARD", None), Some(13));
408 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "LÉOPARD", None), None);
409 c.set(0);
410 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", None), Some(6));
411 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", None), None);
412 c.set(0);
413 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Tiger", None), None);
414 }
415
416 #[test]
find_casei_ascii_nonalpha()417 fn find_casei_ascii_nonalpha() {
418 let a = Rope::from("![cfg(test)]");
419 let mut c = Cursor::new(&a, 0);
420 let mut raw_lines = a.lines_raw(0..a.len());
421 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "(test)", None), Some(5));
422 c.set(0);
423 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "(TEST)", None), Some(5));
424 }
425
426 #[test]
find_casei_special()427 fn find_casei_special() {
428 let a = Rope::from("İ");
429 let mut c = Cursor::new(&a, 0);
430 let mut raw_lines = a.lines_raw(0..a.len());
431 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "i̇", None), Some(0));
432
433 let a = Rope::from("i̇");
434 let mut c = Cursor::new(&a, 0);
435 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "İ", None), Some(0));
436
437 let a = Rope::from("\u{212A}");
438 let mut c = Cursor::new(&a, 0);
439 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "k", None), Some(0));
440
441 let a = Rope::from("k");
442 let mut c = Cursor::new(&a, 0);
443 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "\u{212A}", None), Some(0));
444 }
445
446 #[test]
find_casei_0xc4()447 fn find_casei_0xc4() {
448 let a = Rope::from("\u{0100}I");
449 let mut c = Cursor::new(&a, 0);
450 let mut raw_lines = a.lines_raw(0..a.len());
451 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "i", None), Some(2));
452 }
453
454 #[test]
find_regex_small_casei()455 fn find_regex_small_casei() {
456 let a = Rope::from("Löwe 老虎 Léopard\nSecond line");
457 let mut c = Cursor::new(&a, 0);
458 let mut raw_lines = a.lines_raw(0..a.len());
459 let regex =
460 RegexBuilder::new("L").size_limit(REGEX_SIZE_LIMIT).case_insensitive(true).build().ok();
461 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(0));
462 raw_lines = a.lines_raw(c.pos()..a.len());
463 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(13));
464 raw_lines = a.lines_raw(c.pos()..a.len());
465 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(29));
466 c.set(0);
467 let regex = RegexBuilder::new("Léopard")
468 .size_limit(REGEX_SIZE_LIMIT)
469 .case_insensitive(true)
470 .build()
471 .ok();
472 let mut raw_lines = a.lines_raw(0..a.len());
473 assert_eq!(
474 find(&mut c, &mut raw_lines, CaseInsensitive, "Léopard", regex.as_ref()),
475 Some(13)
476 );
477 raw_lines = a.lines_raw(c.pos()..a.len());
478 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Léopard", regex.as_ref()), None);
479 c.set(0);
480 let mut raw_lines = a.lines_raw(0..a.len());
481 let regex = RegexBuilder::new("老虎")
482 .size_limit(REGEX_SIZE_LIMIT)
483 .case_insensitive(true)
484 .build()
485 .ok();
486 assert_eq!(
487 find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", regex.as_ref()),
488 Some(6)
489 );
490 raw_lines = a.lines_raw(c.pos()..a.len());
491 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", regex.as_ref()), None);
492 c.set(0);
493 let regex = RegexBuilder::new("Tiger")
494 .size_limit(REGEX_SIZE_LIMIT)
495 .case_insensitive(true)
496 .build()
497 .ok();
498 let mut raw_lines = a.lines_raw(0..a.len());
499 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Tiger", regex.as_ref()), None);
500 c.set(0);
501 let regex =
502 RegexBuilder::new(".").size_limit(REGEX_SIZE_LIMIT).case_insensitive(true).build().ok();
503 let mut raw_lines = a.lines_raw(0..a.len());
504 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, ".", regex.as_ref()), Some(0));
505 raw_lines = a.lines_raw(c.pos()..a.len());
506 let regex = RegexBuilder::new("\\s")
507 .size_limit(REGEX_SIZE_LIMIT)
508 .case_insensitive(true)
509 .build()
510 .ok();
511 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "\\s", regex.as_ref()), Some(5));
512 raw_lines = a.lines_raw(c.pos()..a.len());
513 let regex = RegexBuilder::new("\\sLéopard\n.*")
514 .size_limit(REGEX_SIZE_LIMIT)
515 .case_insensitive(true)
516 .build()
517 .ok();
518 assert_eq!(
519 find(&mut c, &mut raw_lines, CaseInsensitive, "\\sLéopard\n.*", regex.as_ref()),
520 Some(12)
521 );
522 }
523
524 #[test]
find_regex_small()525 fn find_regex_small() {
526 let a = Rope::from("Löwe 老虎 Léopard\nSecond line");
527 let mut c = Cursor::new(&a, 0);
528 let mut raw_lines = a.lines_raw(0..a.len());
529 let regex = RegexBuilder::new("L")
530 .size_limit(REGEX_SIZE_LIMIT)
531 .case_insensitive(false)
532 .build()
533 .ok();
534 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", regex.as_ref()), Some(0));
535 raw_lines = a.lines_raw(c.pos()..a.len());
536 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", regex.as_ref()), Some(13));
537 raw_lines = a.lines_raw(c.pos()..a.len());
538 assert_eq!(find(&mut c, &mut raw_lines, Exact, "L", regex.as_ref()), None);
539 c.set(0);
540 let regex = RegexBuilder::new("Léopard")
541 .size_limit(REGEX_SIZE_LIMIT)
542 .case_insensitive(false)
543 .build()
544 .ok();
545 let mut raw_lines = a.lines_raw(0..a.len());
546 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", regex.as_ref()), Some(13));
547 raw_lines = a.lines_raw(c.pos()..a.len());
548 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Léopard", regex.as_ref()), None);
549 c.set(0);
550 let regex = RegexBuilder::new("老虎")
551 .size_limit(REGEX_SIZE_LIMIT)
552 .case_insensitive(false)
553 .build()
554 .ok();
555 let mut raw_lines = a.lines_raw(0..a.len());
556 assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", regex.as_ref()), Some(6));
557 raw_lines = a.lines_raw(c.pos()..a.len());
558 assert_eq!(find(&mut c, &mut raw_lines, Exact, "老虎", regex.as_ref()), None);
559 c.set(0);
560 let regex = RegexBuilder::new("Tiger")
561 .size_limit(REGEX_SIZE_LIMIT)
562 .case_insensitive(false)
563 .build()
564 .ok();
565 let mut raw_lines = a.lines_raw(0..a.len());
566 assert_eq!(find(&mut c, &mut raw_lines, Exact, "Tiger", regex.as_ref()), None);
567 c.set(0);
568 let regex = RegexBuilder::new(".")
569 .size_limit(REGEX_SIZE_LIMIT)
570 .case_insensitive(false)
571 .build()
572 .ok();
573 let mut raw_lines = a.lines_raw(0..a.len());
574 assert_eq!(find(&mut c, &mut raw_lines, Exact, ".", regex.as_ref()), Some(0));
575 raw_lines = a.lines_raw(c.pos()..a.len());
576 let regex = RegexBuilder::new("\\s")
577 .size_limit(REGEX_SIZE_LIMIT)
578 .case_insensitive(false)
579 .build()
580 .ok();
581 assert_eq!(find(&mut c, &mut raw_lines, Exact, "\\s", regex.as_ref()), Some(5));
582 raw_lines = a.lines_raw(c.pos()..a.len());
583 let regex = RegexBuilder::new("\\sLéopard\n.*")
584 .size_limit(REGEX_SIZE_LIMIT)
585 .case_insensitive(false)
586 .build()
587 .ok();
588 assert_eq!(
589 find(&mut c, &mut raw_lines, Exact, "\\sLéopard\n.*", regex.as_ref()),
590 Some(12)
591 );
592 }
593
594 #[test]
find_regex_medium()595 fn find_regex_medium() {
596 let mut s = String::new();
597 for _ in 0..4000 {
598 s.push('x');
599 }
600 s.push_str("Löwe 老虎 Léopard\nSecond line");
601 let a = Rope::from(&s);
602 let mut c = Cursor::new(&a, 0);
603 let mut raw_lines = a.lines_raw(0..a.len());
604 let regex =
605 RegexBuilder::new("L").size_limit(REGEX_SIZE_LIMIT).case_insensitive(true).build().ok();
606 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(4000));
607 raw_lines = a.lines_raw(c.pos()..a.len());
608 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(4013));
609 raw_lines = a.lines_raw(c.pos()..a.len());
610 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "L", regex.as_ref()), Some(4029));
611 c.set(0);
612 let mut raw_lines = a.lines_raw(0..a.len());
613 let regex = RegexBuilder::new("Léopard")
614 .size_limit(REGEX_SIZE_LIMIT)
615 .case_insensitive(true)
616 .build()
617 .ok();
618 assert_eq!(
619 find(&mut c, &mut raw_lines, CaseInsensitive, "Léopard", regex.as_ref()),
620 Some(4013)
621 );
622 raw_lines = a.lines_raw(c.pos()..a.len());
623 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Léopard", regex.as_ref()), None);
624 c.set(0);
625 let regex = RegexBuilder::new("老虎")
626 .size_limit(REGEX_SIZE_LIMIT)
627 .case_insensitive(true)
628 .build()
629 .ok();
630 let mut raw_lines = a.lines_raw(0..a.len());
631 assert_eq!(
632 find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", regex.as_ref()),
633 Some(4006)
634 );
635 raw_lines = a.lines_raw(c.pos()..a.len());
636 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "老虎", regex.as_ref()), None);
637 c.set(0);
638 let regex = RegexBuilder::new("Tiger")
639 .size_limit(REGEX_SIZE_LIMIT)
640 .case_insensitive(true)
641 .build()
642 .ok();
643 let mut raw_lines = a.lines_raw(0..a.len());
644 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, "Tiger", regex.as_ref()), None);
645 c.set(0);
646 let mut raw_lines = a.lines_raw(0..a.len());
647 let regex =
648 RegexBuilder::new(".").size_limit(REGEX_SIZE_LIMIT).case_insensitive(true).build().ok();
649 assert_eq!(find(&mut c, &mut raw_lines, CaseInsensitive, ".", regex.as_ref()), Some(0));
650 raw_lines = a.lines_raw(c.pos()..a.len());
651 let regex = RegexBuilder::new("\\s")
652 .size_limit(REGEX_SIZE_LIMIT)
653 .case_insensitive(true)
654 .build()
655 .ok();
656 assert_eq!(
657 find(&mut c, &mut raw_lines, CaseInsensitive, "\\s", regex.as_ref()),
658 Some(4005)
659 );
660 raw_lines = a.lines_raw(c.pos()..a.len());
661 let regex = RegexBuilder::new("\\sLéopard\n.*")
662 .size_limit(REGEX_SIZE_LIMIT)
663 .case_insensitive(true)
664 .build()
665 .ok();
666 assert_eq!(
667 find(&mut c, &mut raw_lines, CaseInsensitive, "\\sLéopard\n.*", regex.as_ref()),
668 Some(4012)
669 );
670 }
671
672 #[test]
compare_cursor_regex_singleline()673 fn compare_cursor_regex_singleline() {
674 let regex = Regex::new(r"^(\*+)(?: +(.*?))?[ \t]*$").unwrap();
675 let rope = Rope::from("** level 2 headline");
676 let mut c = Cursor::new(&rope, 0);
677 let mut l = rope.lines_raw(c.pos()..rope.len());
678 assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), ®ex).is_some());
679
680 c.set(3);
681 l = rope.lines_raw(c.pos()..rope.len());
682 assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), ®ex).is_none());
683 }
684
685 #[test]
compare_cursor_regex_multiline()686 fn compare_cursor_regex_multiline() {
687 let regex = Regex::new(
688 r"^[ \t]*:PROPERTIES:[ \t]*\n(?:[ \t]*:\S+:(?: .*)?[ \t]*\n)*?[ \t]*:END:[ \t]*\n",
689 )
690 .unwrap();
691
692 // taken from http://doc.norang.ca/org-mode.html#DiaryForAppointments
693 let s = "\
694 #+FILETAGS: PERSONAL\
695 \n* Appointments\
696 \n :PROPERTIES:\
697 \n :CATEGORY: Appt\
698 \n :ARCHIVE: %s_archive::* Appointments\
699 \n :END:\
700 \n** Holidays\
701 \n :PROPERTIES:\
702 \n :Category: Holiday\
703 \n :END:\
704 \n %%(org-calendar-holiday)\
705 \n** Some other Appointment\n";
706 let rope = Rope::from(s);
707 let mut c = Cursor::new(&rope, 0);
708 let mut l = rope.lines_raw(c.pos()..rope.len());
709 assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), ®ex).is_none());
710
711 // move to the next line after "* Appointments"
712 c.set(36);
713 l = rope.lines_raw(c.pos()..rope.len());
714 assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), ®ex).is_some());
715 assert_eq!(117, c.pos());
716 assert_eq!(Some('*'), c.next_codepoint());
717
718 // move to the next line after "** Holidays"
719 c.set(129);
720 l = rope.lines_raw(c.pos()..rope.len());
721 assert!(compare_cursor_regex(&mut c, &mut l, regex.as_str(), ®ex).is_some());
722 c.next_codepoint();
723 c.next_codepoint();
724 c.next_codepoint();
725 assert_eq!(Some('%'), c.next_codepoint());
726 }
727
728 #[test]
compare_cursor_str_small()729 fn compare_cursor_str_small() {
730 let a = Rope::from("Löwe 老虎 Léopard");
731 let mut c = Cursor::new(&a, 0);
732 let pat = "Löwe 老虎 Léopard";
733 let mut raw_lines = a.lines_raw(0..a.len());
734 assert!(compare_cursor_str(&mut c, &mut raw_lines, pat).is_some());
735 assert_eq!(c.pos(), pat.len());
736 c.set(0);
737 let pat = "Löwe";
738 assert!(compare_cursor_str(&mut c, &mut raw_lines, pat).is_some());
739 assert_eq!(c.pos(), pat.len());
740 c.set(0);
741 // Empty string is valid for compare_cursor_str (but not find)
742 let pat = "";
743 assert!(compare_cursor_str(&mut c, &mut raw_lines, pat).is_some());
744 assert_eq!(c.pos(), pat.len());
745 c.set(0);
746 assert!(compare_cursor_str(&mut c, &mut raw_lines, "Löwe 老虎 Léopardfoo").is_none());
747 }
748
749 #[test]
compare_cursor_str_medium()750 fn compare_cursor_str_medium() {
751 let mut s = String::new();
752 for _ in 0..4000 {
753 s.push('x');
754 }
755 s.push_str("Löwe 老虎 Léopard");
756 let a = Rope::from(&s);
757 let mut c = Cursor::new(&a, 0);
758 let mut raw_lines = a.lines_raw(0..a.len());
759 assert!(compare_cursor_str(&mut c, &mut raw_lines, &s).is_some());
760 c.set(2000);
761 assert!(compare_cursor_str(&mut c, &mut raw_lines, &s[2000..]).is_some());
762 }
763 }
764