1 //! This crate provides a safe wrapper around the
2 //! [Oniguruma](https://github.com/kkos/oniguruma) regular expression library.
3 //!
4 //! # Examples
5 //!
6 //! ```rust
7 //! use onig::Regex;
8 //!
9 //! let regex = Regex::new("e(l+)").unwrap();
10 //! for (i, pos) in regex.captures("hello").unwrap().iter_pos().enumerate() {
11 //!     match pos {
12 //!          Some((beg, end)) =>
13 //!              println!("Group {} captured in position {}:{}", i, beg, end),
14 //!          None =>
15 //!              println!("Group {} is not captured", i)
16 //!     }
17 //! }
18 //! ```
19 //!
20 //! # Match vs Search
21 //!
22 //! There are two basic things you can do with a `Regex` pattern; test
23 //! if the pattern matches the whole of a given string, and search for
24 //! occurences of the pattern within a string. Oniguruma exposes these
25 //! two concepts with the *match* and *search* APIs.
26 //!
27 //! In addition two these two base Onigurma APIs this crate exposes a
28 //! third *find* API, built on top of the *search* API.
29 //!
30 //! ```
31 //! # use onig::Regex;
32 //! let pattern = Regex::new("hello").unwrap();
33 //! assert_eq!(true, pattern.find("hello world").is_some());
34 //! assert_eq!(false, pattern.is_match("hello world"));
35 //! ```
36 //!
37 //! ## The *Match* API
38 //!
39 //! Functions in the match API check if a pattern matches the entire
40 //! string. The simplest of these is `Regex::is_match`. This retuns a
41 //! `true` if the pattern matches the string. For more complex useage
42 //! then `Regex::match_with_options` and `Regex::match_with_encoding`
43 //! can be used. These allow the capture groups to be inspected,
44 //! matching with different options, and matching sub-sections of a
45 //! given text.
46 //!
47 //! ## The *Search* API
48 //!
49 //! Function in the search API search for a pattern anywhere within a
50 //! string. The simplist of these is `Regex::find`. This returns the
51 //! offset of the first occurence of the pattern within the string.
52 //! For more complex useage `Regex::search_with_options` and
53 //! `Regex::search_with_encoding` can be used. These allow capture
54 //! groups to be inspected, searching with different options and
55 //! searching within subsections of a given text.
56 //!
57 //! ## The *Find* API
58 //!
59 //! The find API is built on top of the search API. Functions in this
60 //! API allow iteration across all matches of the pattern within a
61 //! string, not just the first one. The functions deal with some of
62 //! the complexities of this, such as zero-length matches.
63 //!
64 //! The simplest step-up from the basic search API `Regex::find` is
65 //! getting the captures relating to a match with the
66 //! `Regex::capturess` method. To find capture information for all
67 //! matches within a string `Regex::find_iter` and
68 //! `Regex::captures_iter` can be used. The former exposes the start
69 //! and end of the match as `Regex::find` does, the latter exposes the
70 //! whole capture group information as `Regex::captures` does.
71 //!
72 //! # The `std::pattern` API
73 //!
74 //! In addition to the main Oniguruma API it is possible to use the
75 //! `Regex` object with the
76 //! [`std::pattern`](https://doc.rust-lang.org/std/str/pattern/)
77 //! API. To enable support compile with the `std-pattern` feature. If
78 //! you're using Cargo you can do this by adding the following to your
79 //! Cargo.toml:
80 //!
81 //! ```toml
82 //! [dependencies.onig]
83 //! version = "1.2"
84 //! features = ["std-pattern"]
85 //! ```
86 
87 #![cfg_attr(feature = "std-pattern", feature(pattern))]
88 #![deny(missing_docs)]
89 
90 #[macro_use]
91 extern crate bitflags;
92 #[macro_use]
93 extern crate lazy_static;
94 extern crate onig_sys;
95 #[cfg(windows)]
96 extern crate libc;
97 
98 mod find;
99 mod flags;
100 mod region;
101 mod replace;
102 mod match_param;
103 mod names;
104 mod syntax;
105 mod tree;
106 mod utils;
107 mod buffers;
108 
109 #[cfg(feature = "std-pattern")]
110 mod pattern;
111 
112 // re-export the onig types publically
113 pub use flags::*;
114 pub use match_param::MatchParam;
115 pub use names::CaptureNames;
116 pub use region::Region;
117 pub use find::{Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures,
118                SubCapturesPos};
119 pub use buffers::{EncodedBytes, EncodedChars};
120 pub use replace::Replacer;
121 pub use tree::{CaptureTreeNode, CaptureTreeNodeIter};
122 pub use syntax::{MetaChar, Syntax};
123 pub use utils::{copyright, define_user_property, version};
124 
125 use std::{error, fmt, str};
126 use std::sync::Mutex;
127 use std::ptr::{null, null_mut};
128 use std::os::raw::c_int;
129 
130 /// This struture represents an error from the underlying Oniguruma libray.
131 pub struct Error {
132     code: c_int,
133     description: String,
134 }
135 
136 /// This struct is a wrapper around an Oniguruma regular expression
137 /// pointer. This represents a compiled regex which can be used in
138 /// search and match operations.
139 #[derive(Debug, Eq, PartialEq)]
140 pub struct Regex {
141     raw: onig_sys::OnigRegexMut,
142 }
143 
144 unsafe impl Send for Regex {}
145 unsafe impl Sync for Regex {}
146 
147 impl Error {
from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Error148     fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Error {
149         Error::new(code, info)
150     }
151 
from_code(code: c_int) -> Error152     fn from_code(code: c_int) -> Error {
153         Error::new(code, null())
154     }
155 
new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Error156     fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Error {
157         let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize];
158         let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) };
159         let description = str::from_utf8(&buff[..len as usize]).unwrap();
160         Error {
161             code,
162             description: description.to_owned(),
163         }
164     }
165 
166     /// Return Oniguruma engine error code.
code(&self) -> i32167     pub fn code(&self) -> i32 {
168         self.code
169     }
170 
171     /// Return error description provided by Oniguruma engine.
description(&self) -> &str172     pub fn description(&self) -> &str {
173         &self.description
174     }
175 }
176 
177 impl error::Error for Error {
description(&self) -> &str178     fn description(&self) -> &str {
179         &self.description
180     }
181 }
182 
183 impl fmt::Display for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result184     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
185         write!(f, "Oniguruma error: {}", self.description())
186     }
187 }
188 
189 impl fmt::Debug for Error {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result190     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
191         write!(f, "Error({}, {})", self.code, self.description())
192     }
193 }
194 
195 lazy_static! {
196     static ref REGEX_NEW_MUTEX: Mutex<()> = Mutex::new(());
197 }
198 
199 impl Regex {
200     /// Create a Regex
201     ///
202     /// Simple regular expression constructor. Compiles a new regular
203     /// expression with the default options using the ruby syntax.
204     /// Once compiled, it can be used repeatedly to search in a string. If an
205     /// invalid expression is given, then an error is returned.
206     ///
207     /// # Arguments
208     ///
209     /// * `pattern` - The regex pattern to compile
210     ///
211     /// # Examples
212     ///
213     /// ```
214     /// use onig::Regex;
215     /// let r = Regex::new(r#"hello (\w+)"#);
216     /// assert!(r.is_ok());
217     /// ```
new(pattern: &str) -> Result<Self, Error>218     pub fn new(pattern: &str) -> Result<Self, Error> {
219         Regex::with_encoding(pattern)
220     }
221 
222     /// Create a Regex, Specifying an Encoding
223     ///
224     /// Attempts to compile `pattern` into a new `Regex`
225     /// instance. Instead of assuming UTF-8 as the encoding scheme the
226     /// encoding is inferred from the `pattern` buffer.
227     ///
228     /// # Arguments
229     ///
230     /// * `pattern` - The regex pattern to compile
231     ///
232     /// # Examples
233     ///
234     /// ```
235     /// use onig::{Regex, EncodedBytes};
236     /// let utf8 = Regex::with_encoding("hello");
237     /// assert!(utf8.is_ok());
238     /// let ascii = Regex::with_encoding(EncodedBytes::ascii(b"world"));
239     /// assert!(ascii.is_ok());
240     /// ```
with_encoding<T>(pattern: T) -> Result<Regex, Error> where T: EncodedChars,241     pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error>
242     where
243         T: EncodedChars,
244     {
245         Regex::with_options_and_encoding(
246             pattern,
247             RegexOptions::REGEX_OPTION_NONE,
248             Syntax::default(),
249         )
250     }
251 
252     /// Create a new Regex
253     ///
254     /// Attempts to compile a pattern into a new `Regex` instance.
255     /// Once compiled, it can be used repeatedly to search in a string. If an
256     /// invalid expression is given, then an error is returned.
257     /// See [`onig_sys::onig_new`][regex_new] for more information.
258     ///
259     /// # Arguments
260     ///
261     ///  * `pattern` - The regex pattern to compile.
262     ///  * `options` - The regex compilation options.
263     ///  * `syntax`  - The syntax which the regex is written in.
264     ///
265     /// # Examples
266     ///
267     /// ```
268     /// use onig::{Regex, Syntax, RegexOptions};
269     /// let r = Regex::with_options("hello.*world",
270     ///                             RegexOptions::REGEX_OPTION_NONE,
271     ///                             Syntax::default());
272     /// assert!(r.is_ok());
273     /// ```
274     ///
275     /// [regex_new]: ./onig_sys/fn.onig_new.html
with_options( pattern: &str, option: RegexOptions, syntax: &Syntax, ) -> Result<Regex, Error>276     pub fn with_options(
277         pattern: &str,
278         option: RegexOptions,
279         syntax: &Syntax,
280     ) -> Result<Regex, Error> {
281         Regex::with_options_and_encoding(pattern, option, syntax)
282     }
283 
284     /// Create a new Regex, Specifying Options and Ecoding
285     ///
286     /// Attempts to comile the given `pattern` into a new `Regex`
287     /// instance. Instead of assuming UTF-8 as the encoding scheme the
288     /// encoding is inferred from the `pattern` buffer. If the regex
289     /// fails to compile the returned `Error` value from
290     /// [`onig_new`][regex_new] contains more information.
291     ///
292     /// [regex_new]: ./onig_sys/fn.onig_new.html
293     ///
294     /// # Arguments
295     ///
296     ///  * `pattern` - The regex pattern to compile.
297     ///  * `options` - The regex compilation options.
298     ///  * `syntax`  - The syntax which the regex is written in.
299     ///
300     /// # Examples
301     /// ```
302     /// use onig::{Regex, Syntax, EncodedBytes, RegexOptions};
303     /// let pattern = EncodedBytes::ascii(b"hello");
304     /// let r = Regex::with_options_and_encoding(pattern,
305     ///                                          RegexOptions::REGEX_OPTION_SINGLELINE,
306     ///                                          Syntax::default());
307     /// assert!(r.is_ok());
308     /// ```
with_options_and_encoding<T>( pattern: T, option: RegexOptions, syntax: &Syntax, ) -> Result<Self, Error> where T: EncodedChars,309     pub fn with_options_and_encoding<T>(
310         pattern: T,
311         option: RegexOptions,
312         syntax: &Syntax,
313     ) -> Result<Self, Error>
314     where
315         T: EncodedChars,
316     {
317         // Convert the rust types to those required for the call to
318         // `onig_new`.
319         let mut reg: onig_sys::OnigRegexMut = null_mut();
320         let reg_ptr = &mut reg as *mut onig_sys::OnigRegexMut;
321 
322         // We can use this later to get an error message to pass back
323         // if regex creation fails.
324         let mut error = onig_sys::OnigErrorInfo {
325             enc: null(),
326             par: null(),
327             par_end: null(),
328         };
329 
330         let err = unsafe {
331             // Grab a lock to make sure that `onig_new` isn't called by
332             // more than one thread at a time.
333             let _guard = REGEX_NEW_MUTEX.lock().unwrap();
334             onig_sys::onig_new(
335                 reg_ptr,
336                 pattern.start_ptr(),
337                 pattern.limit_ptr(),
338                 option.bits(),
339                 pattern.encoding(),
340                 syntax as *const Syntax as *const onig_sys::OnigSyntaxType,
341                 &mut error,
342             )
343         };
344 
345         if err == onig_sys::ONIG_NORMAL {
346             Ok(Regex { raw: reg })
347         } else {
348             Err(Error::from_code_and_info(err, &error))
349         }
350     }
351 
352     /// Match String
353     ///
354     /// Try to match the regex against the given string slice,
355     /// starting at a given offset. This method works the same way as
356     /// `match_with_encoding`, but the encoding is always utf-8.
357     ///
358     /// For more information see [Match vs
359     /// Search](index.html#match-vs-search)
360     ///
361     /// # Arguments
362     ///
363     /// * `str` - The string slice to match against.
364     /// * `at` - The byte index in the passed slice to start matching
365     /// * `options` - The regex match options.
366     /// * `region` - The region for return group match range info
367     ///
368     /// # Returns
369     ///
370     /// `Some(len)` if the regex matched, with `len` being the number
371     /// of bytes matched. `None` if the regex doesn't match.
372     ///
373     /// # Examples
374     ///
375     /// ```
376     /// use onig::{Regex, SearchOptions};
377     ///
378     /// let r = Regex::new(".*").unwrap();
379     /// let res = r.match_with_options("hello", 0, SearchOptions::SEARCH_OPTION_NONE, None);
380     /// assert!(res.is_some()); // it matches
381     /// assert!(res.unwrap() == 5); // 5 characters matched
382     /// ```
match_with_options( &self, str: &str, at: usize, options: SearchOptions, region: Option<&mut Region>, ) -> Option<usize>383     pub fn match_with_options(
384         &self,
385         str: &str,
386         at: usize,
387         options: SearchOptions,
388         region: Option<&mut Region>,
389     ) -> Option<usize> {
390         self.match_with_encoding(str, at, options, region)
391     }
392 
393     /// Match String with Encoding
394     ///
395     /// Match the regex against a string. This method will start at
396     /// the offset `at` into the string and try and match the
397     /// regex. If the regex matches then the return value is the
398     /// number of characters which matched. If the regex doesn't match
399     /// the return is `None`.
400     ///
401     /// For more information see [Match vs
402     /// Search](index.html#match-vs-search)
403     ///
404     /// The contents of `chars` must have the same encoding that was
405     /// used to construct the regex.
406     ///
407     /// # Arguments
408     ///
409     /// * `chars` - The buffer to match against.
410     /// * `at` - The byte index in the passed buffer to start matching
411     /// * `options` - The regex match options.
412     /// * `region` - The region for return group match range info
413     ///
414     /// # Returns
415     ///
416     /// `Some(len)` if the regex matched, with `len` being the number
417     /// of bytes matched. `None` if the regex doesn't match.
418     ///
419     /// # Examples
420     ///
421     /// ```
422     /// use onig::{Regex, EncodedBytes, SearchOptions};
423     ///
424     /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
425     /// let res = r.match_with_encoding(EncodedBytes::ascii(b"world"),
426     ///                                 0, SearchOptions::SEARCH_OPTION_NONE, None);
427     /// assert!(res.is_some()); // it matches
428     /// assert!(res.unwrap() == 5); // 5 characters matched
429     /// ```
match_with_encoding<T>( &self, chars: T, at: usize, options: SearchOptions, region: Option<&mut Region>, ) -> Option<usize> where T: EncodedChars,430     pub fn match_with_encoding<T>(
431         &self,
432         chars: T,
433         at: usize,
434         options: SearchOptions,
435         region: Option<&mut Region>,
436     ) -> Option<usize>
437     where
438         T: EncodedChars,
439     {
440         let match_param = MatchParam::default();
441         let result = self.match_with_param(chars, at, options, region, match_param);
442 
443         match result {
444             Ok(r) => r,
445             Err(e) => panic!("Onig: Regex match error: {}", e.description())
446         }
447     }
448 
449     /// Match string with encoding and match param
450     ///
451     /// Match the regex against a string. This method will start at
452     /// the offset `at` into the string and try and match the
453     /// regex. If the regex matches then the return value is the
454     /// number of characters which matched. If the regex doesn't match
455     /// the return is `None`.
456     ///
457     /// For more information see [Match vs
458     /// Search](index.html#match-vs-search)
459     ///
460     /// The contents of `chars` must have the same encoding that was
461     /// used to construct the regex.
462     ///
463     /// # Arguments
464     ///
465     /// * `chars` - The buffer to match against.
466     /// * `at` - The byte index in the passed buffer to start matching
467     /// * `options` - The regex match options.
468     /// * `region` - The region for return group match range info
469     /// * `match_param` - The match parameters
470     ///
471     /// # Returns
472     ///
473     /// `Ok(Some(len))` if the regex matched, with `len` being the number
474     /// of bytes matched. `Ok(None)` if the regex doesn't match. `Err` with an
475     /// `Error` if an error occurred (e.g. retry-limit-in-match exceeded).
476     ///
477     /// # Examples
478     ///
479     /// ```
480     /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
481     ///
482     /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap();
483     /// let res = r.match_with_param(EncodedBytes::ascii(b"world"),
484     ///                              0, SearchOptions::SEARCH_OPTION_NONE,
485     ///                              None, MatchParam::default());
486     /// assert!(res.is_ok()); // matching did not error
487     /// assert!(res.unwrap() == Some(5)); // 5 characters matched
488     /// ```
match_with_param<T>( &self, chars: T, at: usize, options: SearchOptions, region: Option<&mut Region>, match_param: MatchParam, ) -> Result<Option<usize>, Error> where T: EncodedChars,489     pub fn match_with_param<T>(
490         &self,
491         chars: T,
492         at: usize,
493         options: SearchOptions,
494         region: Option<&mut Region>,
495         match_param: MatchParam,
496     ) -> Result<Option<usize>, Error>
497         where
498             T: EncodedChars,
499     {
500         assert_eq!(chars.encoding(), self.encoding());
501         let r = unsafe {
502             let offset = chars.start_ptr().add(at);
503             assert!(offset <= chars.limit_ptr());
504             onig_sys::onig_match_with_param(
505                 self.raw,
506                 chars.start_ptr(),
507                 chars.limit_ptr(),
508                 offset,
509                 match region {
510                     Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
511                     None => std::ptr::null_mut(),
512                 },
513                 options.bits(),
514                 match_param.as_raw()
515             )
516         };
517 
518         if r >= 0 {
519             Ok(Some(r as usize))
520         } else if r == onig_sys::ONIG_MISMATCH {
521             Ok(None)
522         } else {
523             Err(Error::from_code(r))
524         }
525     }
526 
527     /// Search pattern in string
528     ///
529     /// Search for matches the regex in a string. This method will return the
530     /// index of the first match of the regex within the string, if
531     /// there is one. If `from` is less than `to`, then search is performed
532     /// in forward order, otherwise – in backward order.
533     ///
534     /// For more information see [Match vs
535     /// Search](index.html#match-vs-search)
536     ///
537     /// # Arguments
538     ///
539     ///  * `str` - The string to search in.
540     ///  * `from` - The byte index in the passed slice to start search
541     ///  * `to` - The byte index in the passed slice to finish search
542     ///  * `options` - The options for the search.
543     ///  * `region` - The region for return group match range info
544     ///
545     /// # Returns
546     ///
547     /// `Some(pos)` if the regex matches, where `pos` is the
548     /// byte-position of the start of the match. `None` if the regex
549     /// doesn't match anywhere in `str`.
550     ///
551     /// # Examples
552     ///
553     /// ```
554     /// use onig::{Regex, SearchOptions};
555     ///
556     /// let r = Regex::new("l{1,2}").unwrap();
557     /// let res = r.search_with_options("hello", 0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
558     /// assert!(res.is_some()); // it matches
559     /// assert!(res.unwrap() == 2); // match starts at character 3
560     /// ```
search_with_options( &self, str: &str, from: usize, to: usize, options: SearchOptions, region: Option<&mut Region>, ) -> Option<usize>561     pub fn search_with_options(
562         &self,
563         str: &str,
564         from: usize,
565         to: usize,
566         options: SearchOptions,
567         region: Option<&mut Region>,
568     ) -> Option<usize> {
569         self.search_with_encoding(str, from, to, options, region)
570     }
571 
572     /// Search for a Pattern in a String with an Encoding
573     ///
574     /// Search for matches the regex in a string. This method will
575     /// return the index of the first match of the regex within the
576     /// string, if there is one. If `from` is less than `to`, then
577     /// search is performed in forward order, otherwise – in backward
578     /// order.
579     ///
580     /// For more information see [Match vs
581     /// Search](index.html#match-vs-search)
582     ///
583     /// The encoding of the buffer passed to search in must match the
584     /// encoding of the regex.
585     ///
586     /// # Arguments
587     ///
588     ///  * `chars` - The character buffer to search in.
589     ///  * `from` - The byte index in the passed slice to start search
590     ///  * `to` - The byte index in the passed slice to finish search
591     ///  * `options` - The options for the search.
592     ///  * `region` - The region for return group match range info
593     ///
594     /// # Returns
595     ///
596     /// `Some(pos)` if the regex matches, where `pos` is the
597     /// byte-position of the start of the match. `None` if the regex
598     /// doesn't match anywhere in `chars`.
599     ///
600     /// # Examples
601     ///
602     /// ```
603     /// use onig::{Regex, EncodedBytes, SearchOptions};
604     ///
605     /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
606     /// let res = r.search_with_encoding(EncodedBytes::ascii(b"hello"),
607     ///                                  0, 5, SearchOptions::SEARCH_OPTION_NONE, None);
608     /// assert!(res.is_some()); // it matches
609     /// assert!(res.unwrap() == 2); // match starts at character 3
610     /// ```
search_with_encoding<T>( &self, chars: T, from: usize, to: usize, options: SearchOptions, region: Option<&mut Region>, ) -> Option<usize> where T: EncodedChars,611     pub fn search_with_encoding<T>(
612         &self,
613         chars: T,
614         from: usize,
615         to: usize,
616         options: SearchOptions,
617         region: Option<&mut Region>,
618     ) -> Option<usize>
619     where
620         T: EncodedChars,
621     {
622         let match_param = MatchParam::default();
623         let result = self.search_with_param(chars, from, to, options, region, match_param);
624 
625         match result {
626             Ok(r) => r,
627             Err(e) => panic!("Onig: Regex search error: {}", e.description)
628         }
629     }
630 
631     /// Search pattern in string with encoding and match param
632     ///
633     /// Search for matches the regex in a string. This method will
634     /// return the index of the first match of the regex within the
635     /// string, if there is one. If `from` is less than `to`, then
636     /// search is performed in forward order, otherwise – in backward
637     /// order.
638     ///
639     /// For more information see [Match vs
640     /// Search](index.html#match-vs-search)
641     ///
642     /// The encoding of the buffer passed to search in must match the
643     /// encoding of the regex.
644     ///
645     /// # Arguments
646     ///
647     ///  * `chars` - The character buffer to search in.
648     ///  * `from` - The byte index in the passed slice to start search
649     ///  * `to` - The byte index in the passed slice to finish search
650     ///  * `options` - The options for the search.
651     ///  * `region` - The region for return group match range info
652     ///  * `match_param` - The match parameters
653     ///
654     /// # Returns
655     ///
656     /// `Ok(Some(pos))` if the regex matches, where `pos` is the
657     /// byte-position of the start of the match. `Ok(None)` if the regex
658     /// doesn't match anywhere in `chars`. `Err` with an `Error` if an error
659     /// occurred (e.g. retry-limit-in-match exceeded).
660     ///
661     /// # Examples
662     ///
663     /// ```
664     /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions};
665     ///
666     /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap();
667     /// let res = r.search_with_param(EncodedBytes::ascii(b"hello"),
668     ///                               0, 5, SearchOptions::SEARCH_OPTION_NONE,
669     ///                               None, MatchParam::default());
670     /// assert!(res.is_ok()); // matching did not error
671     /// assert!(res.unwrap() == Some(2)); // match starts at character 3
672     /// ```
search_with_param<T>( &self, chars: T, from: usize, to: usize, options: SearchOptions, region: Option<&mut Region>, match_param: MatchParam, ) -> Result<Option<usize>, Error> where T: EncodedChars,673     pub fn search_with_param<T>(
674         &self,
675         chars: T,
676         from: usize,
677         to: usize,
678         options: SearchOptions,
679         region: Option<&mut Region>,
680         match_param: MatchParam,
681     ) -> Result<Option<usize>, Error>
682         where
683             T: EncodedChars,
684     {
685         let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
686         assert_eq!(self.encoding(), chars.encoding());
687         let r = unsafe {
688             let start = beg.add(from );
689             let range = beg.add(to);
690             assert!(start <= end);
691             assert!(range <= end);
692             onig_sys::onig_search_with_param(
693                 self.raw,
694                 beg,
695                 end,
696                 start,
697                 range,
698                 match region {
699                     Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
700                     None => std::ptr::null_mut(),
701                 },
702                 options.bits(),
703                 match_param.as_raw()
704             )
705         };
706 
707         if r >= 0 {
708             Ok(Some(r as usize))
709         } else if r == onig_sys::ONIG_MISMATCH {
710             Ok(None)
711         } else {
712             Err(Error::from_code(r))
713         }
714     }
715 
716     /// Returns true if and only if the regex matches the string given.
717     ///
718     /// For more information see [Match vs
719     /// Search](index.html#match-vs-search)
720     ///
721     /// # Arguments
722     ///  * `text` - The string slice to test against the pattern.
723     ///
724     /// # Returns
725     ///
726     /// `true` if the pattern matches the whole of `text`, `false` otherwise.
is_match(&self, text: &str) -> bool727     pub fn is_match(&self, text: &str) -> bool {
728         self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None)
729             .map(|r| r == text.len())
730             .unwrap_or(false)
731     }
732 
733     /// Find a Match in a Buffer, With Encoding
734     ///
735     /// Finds the first match of the regular expression within the
736     /// buffer.
737     ///
738     /// Note that this should only be used if you want to discover the
739     /// position of the match within a string. Testing if a pattern
740     /// matches the whole string is faster if you use `is_match`.  For
741     /// more information see [Match vs
742     /// Search](index.html#match-vs-search)
743     ///
744     /// # Arguments
745     ///  * `text` - The text to search in.
746     ///
747     /// # Returns
748     ///
749     ///  The offset of the start and end of the first match. If no
750     ///  match exists `None` is returned.
find(&self, text: &str) -> Option<(usize, usize)>751     pub fn find(&self, text: &str) -> Option<(usize, usize)> {
752         self.find_with_encoding(text)
753     }
754 
755     /// Find a Match in a Buffer, With Encoding
756     ///
757     /// Finds the first match of the regular expression within the
758     /// buffer.
759     ///
760     /// For more information see [Match vs
761     /// Search](index.html#match-vs-search)
762     ///
763     /// # Arguments
764     ///  * `text` - The text to search in.
765     ///
766     /// # Returns
767     ///
768     ///  The offset of the start and end of the first match. If no
769     ///  match exists `None` is returned.
find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)> where T: EncodedChars,770     pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)>
771     where
772         T: EncodedChars,
773     {
774         let mut region = Region::new();
775         let len = text.len();
776         self.search_with_encoding(
777             text,
778             0,
779             len,
780             SearchOptions::SEARCH_OPTION_NONE,
781             Some(&mut region),
782         ).and_then(|_| region.pos(0))
783     }
784 
785     /// Get the Encoding of the Regex
786     ///
787     /// # Returns
788     ///
789     /// Returns a reference to an oniguruma encoding which was used
790     /// when this regex was created.
encoding(&self) -> onig_sys::OnigEncoding791     pub fn encoding(&self) -> onig_sys::OnigEncoding {
792         unsafe { onig_sys::onig_get_encoding(self.raw) }
793     }
794 
795     /// Get the Number of Capture Groups in this Pattern
captures_len(&self) -> usize796     pub fn captures_len(&self) -> usize {
797         unsafe { onig_sys::onig_number_of_captures(self.raw) as usize }
798     }
799 
800     /// Get the Size of the Capture Histories for this Pattern
capture_histories_len(&self) -> usize801     pub fn capture_histories_len(&self) -> usize {
802         unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize }
803     }
804 }
805 
806 impl Drop for Regex {
drop(&mut self)807     fn drop(&mut self) {
808         unsafe {
809             onig_sys::onig_free(self.raw);
810         }
811     }
812 }
813 
814 #[cfg(test)]
815 mod tests {
816     use super::*;
817     use std::panic;
818 
819     #[test]
test_regex_create()820     fn test_regex_create() {
821         Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap();
822 
823         Regex::new(r#"a \w+ word"#).unwrap();
824     }
825 
826     #[test]
test_regex_invalid()827     fn test_regex_invalid() {
828         let e = Regex::new("\\p{foo}").unwrap_err();
829         assert_eq!(e.code(), -223);
830         assert_eq!(e.description(), "invalid character property name {foo}");
831     }
832 
833     #[test]
test_failed_match()834     fn test_failed_match() {
835         let regex = Regex::new("foo").unwrap();
836         let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None);
837         assert!(res.is_none());
838     }
839 
840     #[test]
test_regex_search_with_options()841     fn test_regex_search_with_options() {
842         let mut region = Region::new();
843         let regex = Regex::new("e(l+)").unwrap();
844 
845         let r = regex.search_with_options(
846             "hello",
847             0,
848             5,
849             SearchOptions::SEARCH_OPTION_NONE,
850             Some(&mut region),
851         );
852 
853         assert!(region.tree().is_none());
854         assert_eq!(r, Some(1));
855         assert_eq!(region.len(), 2);
856         let pos1 = region.pos(0).unwrap();
857         let pos2 = region.pos(1).unwrap();
858         assert_eq!(pos1, (1, 4));
859         assert_eq!(pos2, (2, 4));
860 
861         // test cloning here since we already have a filled region
862         let cloned_region = region.clone();
863         let pos1_clone = cloned_region.pos(0).unwrap();
864         assert_eq!(pos1_clone, pos1);
865     }
866 
867     #[test]
test_regex_match_with_options()868     fn test_regex_match_with_options() {
869         let mut region = Region::new();
870         let regex = Regex::new("he(l+)").unwrap();
871 
872         let r = regex.match_with_options(
873             "hello",
874             0,
875             SearchOptions::SEARCH_OPTION_NONE,
876             Some(&mut region),
877         );
878 
879         assert!(region.tree().is_none());
880         assert_eq!(r, Some(4));
881         assert_eq!(region.len(), 2);
882         let pos1 = region.pos(0).unwrap();
883         let pos2 = region.pos(1).unwrap();
884         assert_eq!(pos1, (0, 4));
885         assert_eq!(pos2, (2, 4));
886     }
887 
888     #[test]
test_regex_is_match()889     fn test_regex_is_match() {
890         let regex = Regex::new("he(l+)o").unwrap();
891         assert!(regex.is_match("hello"));
892         assert!(!regex.is_match("hello 2.0"));
893     }
894 
895     #[test]
test_regex_find()896     fn test_regex_find() {
897         let regex = Regex::new("he(l+)o").unwrap();
898         assert_eq!(regex.find("hey, hello!"), Some((5, 10)));
899         assert_eq!(regex.find("hey, honey!"), None);
900     }
901 
902     #[test]
test_regex_captures_len()903     fn test_regex_captures_len() {
904         let regex = Regex::new("(he)(l+)(o)").unwrap();
905         assert_eq!(regex.captures_len(), 3);
906     }
907 
908     #[test]
test_regex_error_is_match()909     fn test_regex_error_is_match() {
910         let regex = Regex::new("(a|b|ab)*bc").unwrap();
911         let result = regex.match_with_param(
912             "ababababababababababababababababababababababababababababacbc",
913             0, SearchOptions::SEARCH_OPTION_NONE, None, MatchParam::default());
914 
915         let e = result.err().unwrap();
916         assert_eq!("retry-limit-in-match over", e.description());
917     }
918 
919     #[test]
test_regex_panic_is_match()920     fn test_regex_panic_is_match() {
921         let regex = Regex::new("(a|b|ab)*bc").unwrap();
922         let result = panic::catch_unwind(||
923             regex.is_match("ababababababababababababababababababababababababababababacbc")
924         );
925         let e = result.err().unwrap();
926         let message = e.downcast_ref::<String>().unwrap();
927         assert_eq!(message.as_str(),
928                    "Onig: Regex match error: retry-limit-in-match over");
929     }
930 
931     #[test]
test_regex_error_find()932     fn test_regex_error_find() {
933         let regex = Regex::new("(a|b|ab)*bc").unwrap();
934         let s = "ababababababababababababababababababababababababababababacbc";
935         let result = regex.search_with_param(
936             s, 0, s.len(), SearchOptions::SEARCH_OPTION_NONE, None, MatchParam::default());
937 
938         let e = result.err().unwrap();
939         assert_eq!("retry-limit-in-match over", e.description());
940     }
941 
942     #[test]
test_regex_panic_find()943     fn test_regex_panic_find() {
944         let regex = Regex::new("(a|b|ab)*bc").unwrap();
945         let result = panic::catch_unwind(||
946             regex.find("ababababababababababababababababababababababababababababacbc")
947         );
948         let e = result.err().unwrap();
949         let message = e.downcast_ref::<String>().unwrap();
950         assert_eq!(message.as_str(),
951                    "Onig: Regex search error: retry-limit-in-match over");
952     }
953 }
954