1 //! This crate provides a safe wrapper around the 2 //! [Oniguruma](https://github.com/kkos/oniguruma) regular expression library. 3 //! 4 //! # Examples 5 //! 6 //! ```rust 7 //! use onig::Regex; 8 //! 9 //! let regex = Regex::new("e(l+)").unwrap(); 10 //! for (i, pos) in regex.captures("hello").unwrap().iter_pos().enumerate() { 11 //! match pos { 12 //! Some((beg, end)) => 13 //! println!("Group {} captured in position {}:{}", i, beg, end), 14 //! None => 15 //! println!("Group {} is not captured", i) 16 //! } 17 //! } 18 //! ``` 19 //! 20 //! # Match vs Search 21 //! 22 //! There are two basic things you can do with a `Regex` pattern; test 23 //! if the pattern matches the whole of a given string, and search for 24 //! occurences of the pattern within a string. Oniguruma exposes these 25 //! two concepts with the *match* and *search* APIs. 26 //! 27 //! In addition two these two base Onigurma APIs this crate exposes a 28 //! third *find* API, built on top of the *search* API. 29 //! 30 //! ``` 31 //! # use onig::Regex; 32 //! let pattern = Regex::new("hello").unwrap(); 33 //! assert_eq!(true, pattern.find("hello world").is_some()); 34 //! assert_eq!(false, pattern.is_match("hello world")); 35 //! ``` 36 //! 37 //! ## The *Match* API 38 //! 39 //! Functions in the match API check if a pattern matches the entire 40 //! string. The simplest of these is `Regex::is_match`. This retuns a 41 //! `true` if the pattern matches the string. For more complex useage 42 //! then `Regex::match_with_options` and `Regex::match_with_encoding` 43 //! can be used. These allow the capture groups to be inspected, 44 //! matching with different options, and matching sub-sections of a 45 //! given text. 46 //! 47 //! ## The *Search* API 48 //! 49 //! Function in the search API search for a pattern anywhere within a 50 //! string. The simplist of these is `Regex::find`. This returns the 51 //! offset of the first occurence of the pattern within the string. 52 //! For more complex useage `Regex::search_with_options` and 53 //! `Regex::search_with_encoding` can be used. These allow capture 54 //! groups to be inspected, searching with different options and 55 //! searching within subsections of a given text. 56 //! 57 //! ## The *Find* API 58 //! 59 //! The find API is built on top of the search API. Functions in this 60 //! API allow iteration across all matches of the pattern within a 61 //! string, not just the first one. The functions deal with some of 62 //! the complexities of this, such as zero-length matches. 63 //! 64 //! The simplest step-up from the basic search API `Regex::find` is 65 //! getting the captures relating to a match with the 66 //! `Regex::capturess` method. To find capture information for all 67 //! matches within a string `Regex::find_iter` and 68 //! `Regex::captures_iter` can be used. The former exposes the start 69 //! and end of the match as `Regex::find` does, the latter exposes the 70 //! whole capture group information as `Regex::captures` does. 71 //! 72 //! # The `std::pattern` API 73 //! 74 //! In addition to the main Oniguruma API it is possible to use the 75 //! `Regex` object with the 76 //! [`std::pattern`](https://doc.rust-lang.org/std/str/pattern/) 77 //! API. To enable support compile with the `std-pattern` feature. If 78 //! you're using Cargo you can do this by adding the following to your 79 //! Cargo.toml: 80 //! 81 //! ```toml 82 //! [dependencies.onig] 83 //! version = "1.2" 84 //! features = ["std-pattern"] 85 //! ``` 86 87 #![cfg_attr(feature = "std-pattern", feature(pattern))] 88 #![deny(missing_docs)] 89 90 #[macro_use] 91 extern crate bitflags; 92 #[macro_use] 93 extern crate lazy_static; 94 extern crate onig_sys; 95 #[cfg(windows)] 96 extern crate libc; 97 98 mod find; 99 mod flags; 100 mod region; 101 mod replace; 102 mod match_param; 103 mod names; 104 mod syntax; 105 mod tree; 106 mod utils; 107 mod buffers; 108 109 #[cfg(feature = "std-pattern")] 110 mod pattern; 111 112 // re-export the onig types publically 113 pub use flags::*; 114 pub use match_param::MatchParam; 115 pub use names::CaptureNames; 116 pub use region::Region; 117 pub use find::{Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures, 118 SubCapturesPos}; 119 pub use buffers::{EncodedBytes, EncodedChars}; 120 pub use replace::Replacer; 121 pub use tree::{CaptureTreeNode, CaptureTreeNodeIter}; 122 pub use syntax::{MetaChar, Syntax}; 123 pub use utils::{copyright, define_user_property, version}; 124 125 use std::{error, fmt, str}; 126 use std::sync::Mutex; 127 use std::ptr::{null, null_mut}; 128 use std::os::raw::c_int; 129 130 /// This struture represents an error from the underlying Oniguruma libray. 131 pub struct Error { 132 code: c_int, 133 description: String, 134 } 135 136 /// This struct is a wrapper around an Oniguruma regular expression 137 /// pointer. This represents a compiled regex which can be used in 138 /// search and match operations. 139 #[derive(Debug, Eq, PartialEq)] 140 pub struct Regex { 141 raw: onig_sys::OnigRegexMut, 142 } 143 144 unsafe impl Send for Regex {} 145 unsafe impl Sync for Regex {} 146 147 impl Error { from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Error148 fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Error { 149 Error::new(code, info) 150 } 151 from_code(code: c_int) -> Error152 fn from_code(code: c_int) -> Error { 153 Error::new(code, null()) 154 } 155 new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Error156 fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Error { 157 let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize]; 158 let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) }; 159 let description = str::from_utf8(&buff[..len as usize]).unwrap(); 160 Error { 161 code, 162 description: description.to_owned(), 163 } 164 } 165 166 /// Return Oniguruma engine error code. code(&self) -> i32167 pub fn code(&self) -> i32 { 168 self.code 169 } 170 171 /// Return error description provided by Oniguruma engine. description(&self) -> &str172 pub fn description(&self) -> &str { 173 &self.description 174 } 175 } 176 177 impl error::Error for Error { description(&self) -> &str178 fn description(&self) -> &str { 179 &self.description 180 } 181 } 182 183 impl fmt::Display for Error { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result184 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 185 write!(f, "Oniguruma error: {}", self.description()) 186 } 187 } 188 189 impl fmt::Debug for Error { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result190 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 191 write!(f, "Error({}, {})", self.code, self.description()) 192 } 193 } 194 195 lazy_static! { 196 static ref REGEX_NEW_MUTEX: Mutex<()> = Mutex::new(()); 197 } 198 199 impl Regex { 200 /// Create a Regex 201 /// 202 /// Simple regular expression constructor. Compiles a new regular 203 /// expression with the default options using the ruby syntax. 204 /// Once compiled, it can be used repeatedly to search in a string. If an 205 /// invalid expression is given, then an error is returned. 206 /// 207 /// # Arguments 208 /// 209 /// * `pattern` - The regex pattern to compile 210 /// 211 /// # Examples 212 /// 213 /// ``` 214 /// use onig::Regex; 215 /// let r = Regex::new(r#"hello (\w+)"#); 216 /// assert!(r.is_ok()); 217 /// ``` new(pattern: &str) -> Result<Self, Error>218 pub fn new(pattern: &str) -> Result<Self, Error> { 219 Regex::with_encoding(pattern) 220 } 221 222 /// Create a Regex, Specifying an Encoding 223 /// 224 /// Attempts to compile `pattern` into a new `Regex` 225 /// instance. Instead of assuming UTF-8 as the encoding scheme the 226 /// encoding is inferred from the `pattern` buffer. 227 /// 228 /// # Arguments 229 /// 230 /// * `pattern` - The regex pattern to compile 231 /// 232 /// # Examples 233 /// 234 /// ``` 235 /// use onig::{Regex, EncodedBytes}; 236 /// let utf8 = Regex::with_encoding("hello"); 237 /// assert!(utf8.is_ok()); 238 /// let ascii = Regex::with_encoding(EncodedBytes::ascii(b"world")); 239 /// assert!(ascii.is_ok()); 240 /// ``` with_encoding<T>(pattern: T) -> Result<Regex, Error> where T: EncodedChars,241 pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error> 242 where 243 T: EncodedChars, 244 { 245 Regex::with_options_and_encoding( 246 pattern, 247 RegexOptions::REGEX_OPTION_NONE, 248 Syntax::default(), 249 ) 250 } 251 252 /// Create a new Regex 253 /// 254 /// Attempts to compile a pattern into a new `Regex` instance. 255 /// Once compiled, it can be used repeatedly to search in a string. If an 256 /// invalid expression is given, then an error is returned. 257 /// See [`onig_sys::onig_new`][regex_new] for more information. 258 /// 259 /// # Arguments 260 /// 261 /// * `pattern` - The regex pattern to compile. 262 /// * `options` - The regex compilation options. 263 /// * `syntax` - The syntax which the regex is written in. 264 /// 265 /// # Examples 266 /// 267 /// ``` 268 /// use onig::{Regex, Syntax, RegexOptions}; 269 /// let r = Regex::with_options("hello.*world", 270 /// RegexOptions::REGEX_OPTION_NONE, 271 /// Syntax::default()); 272 /// assert!(r.is_ok()); 273 /// ``` 274 /// 275 /// [regex_new]: ./onig_sys/fn.onig_new.html with_options( pattern: &str, option: RegexOptions, syntax: &Syntax, ) -> Result<Regex, Error>276 pub fn with_options( 277 pattern: &str, 278 option: RegexOptions, 279 syntax: &Syntax, 280 ) -> Result<Regex, Error> { 281 Regex::with_options_and_encoding(pattern, option, syntax) 282 } 283 284 /// Create a new Regex, Specifying Options and Ecoding 285 /// 286 /// Attempts to comile the given `pattern` into a new `Regex` 287 /// instance. Instead of assuming UTF-8 as the encoding scheme the 288 /// encoding is inferred from the `pattern` buffer. If the regex 289 /// fails to compile the returned `Error` value from 290 /// [`onig_new`][regex_new] contains more information. 291 /// 292 /// [regex_new]: ./onig_sys/fn.onig_new.html 293 /// 294 /// # Arguments 295 /// 296 /// * `pattern` - The regex pattern to compile. 297 /// * `options` - The regex compilation options. 298 /// * `syntax` - The syntax which the regex is written in. 299 /// 300 /// # Examples 301 /// ``` 302 /// use onig::{Regex, Syntax, EncodedBytes, RegexOptions}; 303 /// let pattern = EncodedBytes::ascii(b"hello"); 304 /// let r = Regex::with_options_and_encoding(pattern, 305 /// RegexOptions::REGEX_OPTION_SINGLELINE, 306 /// Syntax::default()); 307 /// assert!(r.is_ok()); 308 /// ``` with_options_and_encoding<T>( pattern: T, option: RegexOptions, syntax: &Syntax, ) -> Result<Self, Error> where T: EncodedChars,309 pub fn with_options_and_encoding<T>( 310 pattern: T, 311 option: RegexOptions, 312 syntax: &Syntax, 313 ) -> Result<Self, Error> 314 where 315 T: EncodedChars, 316 { 317 // Convert the rust types to those required for the call to 318 // `onig_new`. 319 let mut reg: onig_sys::OnigRegexMut = null_mut(); 320 let reg_ptr = &mut reg as *mut onig_sys::OnigRegexMut; 321 322 // We can use this later to get an error message to pass back 323 // if regex creation fails. 324 let mut error = onig_sys::OnigErrorInfo { 325 enc: null(), 326 par: null(), 327 par_end: null(), 328 }; 329 330 let err = unsafe { 331 // Grab a lock to make sure that `onig_new` isn't called by 332 // more than one thread at a time. 333 let _guard = REGEX_NEW_MUTEX.lock().unwrap(); 334 onig_sys::onig_new( 335 reg_ptr, 336 pattern.start_ptr(), 337 pattern.limit_ptr(), 338 option.bits(), 339 pattern.encoding(), 340 syntax as *const Syntax as *const onig_sys::OnigSyntaxType, 341 &mut error, 342 ) 343 }; 344 345 if err == onig_sys::ONIG_NORMAL { 346 Ok(Regex { raw: reg }) 347 } else { 348 Err(Error::from_code_and_info(err, &error)) 349 } 350 } 351 352 /// Match String 353 /// 354 /// Try to match the regex against the given string slice, 355 /// starting at a given offset. This method works the same way as 356 /// `match_with_encoding`, but the encoding is always utf-8. 357 /// 358 /// For more information see [Match vs 359 /// Search](index.html#match-vs-search) 360 /// 361 /// # Arguments 362 /// 363 /// * `str` - The string slice to match against. 364 /// * `at` - The byte index in the passed slice to start matching 365 /// * `options` - The regex match options. 366 /// * `region` - The region for return group match range info 367 /// 368 /// # Returns 369 /// 370 /// `Some(len)` if the regex matched, with `len` being the number 371 /// of bytes matched. `None` if the regex doesn't match. 372 /// 373 /// # Examples 374 /// 375 /// ``` 376 /// use onig::{Regex, SearchOptions}; 377 /// 378 /// let r = Regex::new(".*").unwrap(); 379 /// let res = r.match_with_options("hello", 0, SearchOptions::SEARCH_OPTION_NONE, None); 380 /// assert!(res.is_some()); // it matches 381 /// assert!(res.unwrap() == 5); // 5 characters matched 382 /// ``` match_with_options( &self, str: &str, at: usize, options: SearchOptions, region: Option<&mut Region>, ) -> Option<usize>383 pub fn match_with_options( 384 &self, 385 str: &str, 386 at: usize, 387 options: SearchOptions, 388 region: Option<&mut Region>, 389 ) -> Option<usize> { 390 self.match_with_encoding(str, at, options, region) 391 } 392 393 /// Match String with Encoding 394 /// 395 /// Match the regex against a string. This method will start at 396 /// the offset `at` into the string and try and match the 397 /// regex. If the regex matches then the return value is the 398 /// number of characters which matched. If the regex doesn't match 399 /// the return is `None`. 400 /// 401 /// For more information see [Match vs 402 /// Search](index.html#match-vs-search) 403 /// 404 /// The contents of `chars` must have the same encoding that was 405 /// used to construct the regex. 406 /// 407 /// # Arguments 408 /// 409 /// * `chars` - The buffer to match against. 410 /// * `at` - The byte index in the passed buffer to start matching 411 /// * `options` - The regex match options. 412 /// * `region` - The region for return group match range info 413 /// 414 /// # Returns 415 /// 416 /// `Some(len)` if the regex matched, with `len` being the number 417 /// of bytes matched. `None` if the regex doesn't match. 418 /// 419 /// # Examples 420 /// 421 /// ``` 422 /// use onig::{Regex, EncodedBytes, SearchOptions}; 423 /// 424 /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap(); 425 /// let res = r.match_with_encoding(EncodedBytes::ascii(b"world"), 426 /// 0, SearchOptions::SEARCH_OPTION_NONE, None); 427 /// assert!(res.is_some()); // it matches 428 /// assert!(res.unwrap() == 5); // 5 characters matched 429 /// ``` match_with_encoding<T>( &self, chars: T, at: usize, options: SearchOptions, region: Option<&mut Region>, ) -> Option<usize> where T: EncodedChars,430 pub fn match_with_encoding<T>( 431 &self, 432 chars: T, 433 at: usize, 434 options: SearchOptions, 435 region: Option<&mut Region>, 436 ) -> Option<usize> 437 where 438 T: EncodedChars, 439 { 440 let match_param = MatchParam::default(); 441 let result = self.match_with_param(chars, at, options, region, match_param); 442 443 match result { 444 Ok(r) => r, 445 Err(e) => panic!("Onig: Regex match error: {}", e.description()) 446 } 447 } 448 449 /// Match string with encoding and match param 450 /// 451 /// Match the regex against a string. This method will start at 452 /// the offset `at` into the string and try and match the 453 /// regex. If the regex matches then the return value is the 454 /// number of characters which matched. If the regex doesn't match 455 /// the return is `None`. 456 /// 457 /// For more information see [Match vs 458 /// Search](index.html#match-vs-search) 459 /// 460 /// The contents of `chars` must have the same encoding that was 461 /// used to construct the regex. 462 /// 463 /// # Arguments 464 /// 465 /// * `chars` - The buffer to match against. 466 /// * `at` - The byte index in the passed buffer to start matching 467 /// * `options` - The regex match options. 468 /// * `region` - The region for return group match range info 469 /// * `match_param` - The match parameters 470 /// 471 /// # Returns 472 /// 473 /// `Ok(Some(len))` if the regex matched, with `len` being the number 474 /// of bytes matched. `Ok(None)` if the regex doesn't match. `Err` with an 475 /// `Error` if an error occurred (e.g. retry-limit-in-match exceeded). 476 /// 477 /// # Examples 478 /// 479 /// ``` 480 /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions}; 481 /// 482 /// let r = Regex::with_encoding(EncodedBytes::ascii(b".*")).unwrap(); 483 /// let res = r.match_with_param(EncodedBytes::ascii(b"world"), 484 /// 0, SearchOptions::SEARCH_OPTION_NONE, 485 /// None, MatchParam::default()); 486 /// assert!(res.is_ok()); // matching did not error 487 /// assert!(res.unwrap() == Some(5)); // 5 characters matched 488 /// ``` match_with_param<T>( &self, chars: T, at: usize, options: SearchOptions, region: Option<&mut Region>, match_param: MatchParam, ) -> Result<Option<usize>, Error> where T: EncodedChars,489 pub fn match_with_param<T>( 490 &self, 491 chars: T, 492 at: usize, 493 options: SearchOptions, 494 region: Option<&mut Region>, 495 match_param: MatchParam, 496 ) -> Result<Option<usize>, Error> 497 where 498 T: EncodedChars, 499 { 500 assert_eq!(chars.encoding(), self.encoding()); 501 let r = unsafe { 502 let offset = chars.start_ptr().add(at); 503 assert!(offset <= chars.limit_ptr()); 504 onig_sys::onig_match_with_param( 505 self.raw, 506 chars.start_ptr(), 507 chars.limit_ptr(), 508 offset, 509 match region { 510 Some(region) => region as *mut Region as *mut onig_sys::OnigRegion, 511 None => std::ptr::null_mut(), 512 }, 513 options.bits(), 514 match_param.as_raw() 515 ) 516 }; 517 518 if r >= 0 { 519 Ok(Some(r as usize)) 520 } else if r == onig_sys::ONIG_MISMATCH { 521 Ok(None) 522 } else { 523 Err(Error::from_code(r)) 524 } 525 } 526 527 /// Search pattern in string 528 /// 529 /// Search for matches the regex in a string. This method will return the 530 /// index of the first match of the regex within the string, if 531 /// there is one. If `from` is less than `to`, then search is performed 532 /// in forward order, otherwise – in backward order. 533 /// 534 /// For more information see [Match vs 535 /// Search](index.html#match-vs-search) 536 /// 537 /// # Arguments 538 /// 539 /// * `str` - The string to search in. 540 /// * `from` - The byte index in the passed slice to start search 541 /// * `to` - The byte index in the passed slice to finish search 542 /// * `options` - The options for the search. 543 /// * `region` - The region for return group match range info 544 /// 545 /// # Returns 546 /// 547 /// `Some(pos)` if the regex matches, where `pos` is the 548 /// byte-position of the start of the match. `None` if the regex 549 /// doesn't match anywhere in `str`. 550 /// 551 /// # Examples 552 /// 553 /// ``` 554 /// use onig::{Regex, SearchOptions}; 555 /// 556 /// let r = Regex::new("l{1,2}").unwrap(); 557 /// let res = r.search_with_options("hello", 0, 5, SearchOptions::SEARCH_OPTION_NONE, None); 558 /// assert!(res.is_some()); // it matches 559 /// assert!(res.unwrap() == 2); // match starts at character 3 560 /// ``` search_with_options( &self, str: &str, from: usize, to: usize, options: SearchOptions, region: Option<&mut Region>, ) -> Option<usize>561 pub fn search_with_options( 562 &self, 563 str: &str, 564 from: usize, 565 to: usize, 566 options: SearchOptions, 567 region: Option<&mut Region>, 568 ) -> Option<usize> { 569 self.search_with_encoding(str, from, to, options, region) 570 } 571 572 /// Search for a Pattern in a String with an Encoding 573 /// 574 /// Search for matches the regex in a string. This method will 575 /// return the index of the first match of the regex within the 576 /// string, if there is one. If `from` is less than `to`, then 577 /// search is performed in forward order, otherwise – in backward 578 /// order. 579 /// 580 /// For more information see [Match vs 581 /// Search](index.html#match-vs-search) 582 /// 583 /// The encoding of the buffer passed to search in must match the 584 /// encoding of the regex. 585 /// 586 /// # Arguments 587 /// 588 /// * `chars` - The character buffer to search in. 589 /// * `from` - The byte index in the passed slice to start search 590 /// * `to` - The byte index in the passed slice to finish search 591 /// * `options` - The options for the search. 592 /// * `region` - The region for return group match range info 593 /// 594 /// # Returns 595 /// 596 /// `Some(pos)` if the regex matches, where `pos` is the 597 /// byte-position of the start of the match. `None` if the regex 598 /// doesn't match anywhere in `chars`. 599 /// 600 /// # Examples 601 /// 602 /// ``` 603 /// use onig::{Regex, EncodedBytes, SearchOptions}; 604 /// 605 /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap(); 606 /// let res = r.search_with_encoding(EncodedBytes::ascii(b"hello"), 607 /// 0, 5, SearchOptions::SEARCH_OPTION_NONE, None); 608 /// assert!(res.is_some()); // it matches 609 /// assert!(res.unwrap() == 2); // match starts at character 3 610 /// ``` search_with_encoding<T>( &self, chars: T, from: usize, to: usize, options: SearchOptions, region: Option<&mut Region>, ) -> Option<usize> where T: EncodedChars,611 pub fn search_with_encoding<T>( 612 &self, 613 chars: T, 614 from: usize, 615 to: usize, 616 options: SearchOptions, 617 region: Option<&mut Region>, 618 ) -> Option<usize> 619 where 620 T: EncodedChars, 621 { 622 let match_param = MatchParam::default(); 623 let result = self.search_with_param(chars, from, to, options, region, match_param); 624 625 match result { 626 Ok(r) => r, 627 Err(e) => panic!("Onig: Regex search error: {}", e.description) 628 } 629 } 630 631 /// Search pattern in string with encoding and match param 632 /// 633 /// Search for matches the regex in a string. This method will 634 /// return the index of the first match of the regex within the 635 /// string, if there is one. If `from` is less than `to`, then 636 /// search is performed in forward order, otherwise – in backward 637 /// order. 638 /// 639 /// For more information see [Match vs 640 /// Search](index.html#match-vs-search) 641 /// 642 /// The encoding of the buffer passed to search in must match the 643 /// encoding of the regex. 644 /// 645 /// # Arguments 646 /// 647 /// * `chars` - The character buffer to search in. 648 /// * `from` - The byte index in the passed slice to start search 649 /// * `to` - The byte index in the passed slice to finish search 650 /// * `options` - The options for the search. 651 /// * `region` - The region for return group match range info 652 /// * `match_param` - The match parameters 653 /// 654 /// # Returns 655 /// 656 /// `Ok(Some(pos))` if the regex matches, where `pos` is the 657 /// byte-position of the start of the match. `Ok(None)` if the regex 658 /// doesn't match anywhere in `chars`. `Err` with an `Error` if an error 659 /// occurred (e.g. retry-limit-in-match exceeded). 660 /// 661 /// # Examples 662 /// 663 /// ``` 664 /// use onig::{Regex, EncodedBytes, MatchParam, SearchOptions}; 665 /// 666 /// let r = Regex::with_encoding(EncodedBytes::ascii(b"l{1,2}")).unwrap(); 667 /// let res = r.search_with_param(EncodedBytes::ascii(b"hello"), 668 /// 0, 5, SearchOptions::SEARCH_OPTION_NONE, 669 /// None, MatchParam::default()); 670 /// assert!(res.is_ok()); // matching did not error 671 /// assert!(res.unwrap() == Some(2)); // match starts at character 3 672 /// ``` search_with_param<T>( &self, chars: T, from: usize, to: usize, options: SearchOptions, region: Option<&mut Region>, match_param: MatchParam, ) -> Result<Option<usize>, Error> where T: EncodedChars,673 pub fn search_with_param<T>( 674 &self, 675 chars: T, 676 from: usize, 677 to: usize, 678 options: SearchOptions, 679 region: Option<&mut Region>, 680 match_param: MatchParam, 681 ) -> Result<Option<usize>, Error> 682 where 683 T: EncodedChars, 684 { 685 let (beg, end) = (chars.start_ptr(), chars.limit_ptr()); 686 assert_eq!(self.encoding(), chars.encoding()); 687 let r = unsafe { 688 let start = beg.add(from ); 689 let range = beg.add(to); 690 assert!(start <= end); 691 assert!(range <= end); 692 onig_sys::onig_search_with_param( 693 self.raw, 694 beg, 695 end, 696 start, 697 range, 698 match region { 699 Some(region) => region as *mut Region as *mut onig_sys::OnigRegion, 700 None => std::ptr::null_mut(), 701 }, 702 options.bits(), 703 match_param.as_raw() 704 ) 705 }; 706 707 if r >= 0 { 708 Ok(Some(r as usize)) 709 } else if r == onig_sys::ONIG_MISMATCH { 710 Ok(None) 711 } else { 712 Err(Error::from_code(r)) 713 } 714 } 715 716 /// Returns true if and only if the regex matches the string given. 717 /// 718 /// For more information see [Match vs 719 /// Search](index.html#match-vs-search) 720 /// 721 /// # Arguments 722 /// * `text` - The string slice to test against the pattern. 723 /// 724 /// # Returns 725 /// 726 /// `true` if the pattern matches the whole of `text`, `false` otherwise. is_match(&self, text: &str) -> bool727 pub fn is_match(&self, text: &str) -> bool { 728 self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None) 729 .map(|r| r == text.len()) 730 .unwrap_or(false) 731 } 732 733 /// Find a Match in a Buffer, With Encoding 734 /// 735 /// Finds the first match of the regular expression within the 736 /// buffer. 737 /// 738 /// Note that this should only be used if you want to discover the 739 /// position of the match within a string. Testing if a pattern 740 /// matches the whole string is faster if you use `is_match`. For 741 /// more information see [Match vs 742 /// Search](index.html#match-vs-search) 743 /// 744 /// # Arguments 745 /// * `text` - The text to search in. 746 /// 747 /// # Returns 748 /// 749 /// The offset of the start and end of the first match. If no 750 /// match exists `None` is returned. find(&self, text: &str) -> Option<(usize, usize)>751 pub fn find(&self, text: &str) -> Option<(usize, usize)> { 752 self.find_with_encoding(text) 753 } 754 755 /// Find a Match in a Buffer, With Encoding 756 /// 757 /// Finds the first match of the regular expression within the 758 /// buffer. 759 /// 760 /// For more information see [Match vs 761 /// Search](index.html#match-vs-search) 762 /// 763 /// # Arguments 764 /// * `text` - The text to search in. 765 /// 766 /// # Returns 767 /// 768 /// The offset of the start and end of the first match. If no 769 /// match exists `None` is returned. find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)> where T: EncodedChars,770 pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)> 771 where 772 T: EncodedChars, 773 { 774 let mut region = Region::new(); 775 let len = text.len(); 776 self.search_with_encoding( 777 text, 778 0, 779 len, 780 SearchOptions::SEARCH_OPTION_NONE, 781 Some(&mut region), 782 ).and_then(|_| region.pos(0)) 783 } 784 785 /// Get the Encoding of the Regex 786 /// 787 /// # Returns 788 /// 789 /// Returns a reference to an oniguruma encoding which was used 790 /// when this regex was created. encoding(&self) -> onig_sys::OnigEncoding791 pub fn encoding(&self) -> onig_sys::OnigEncoding { 792 unsafe { onig_sys::onig_get_encoding(self.raw) } 793 } 794 795 /// Get the Number of Capture Groups in this Pattern captures_len(&self) -> usize796 pub fn captures_len(&self) -> usize { 797 unsafe { onig_sys::onig_number_of_captures(self.raw) as usize } 798 } 799 800 /// Get the Size of the Capture Histories for this Pattern capture_histories_len(&self) -> usize801 pub fn capture_histories_len(&self) -> usize { 802 unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize } 803 } 804 } 805 806 impl Drop for Regex { drop(&mut self)807 fn drop(&mut self) { 808 unsafe { 809 onig_sys::onig_free(self.raw); 810 } 811 } 812 } 813 814 #[cfg(test)] 815 mod tests { 816 use super::*; 817 use std::panic; 818 819 #[test] test_regex_create()820 fn test_regex_create() { 821 Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap(); 822 823 Regex::new(r#"a \w+ word"#).unwrap(); 824 } 825 826 #[test] test_regex_invalid()827 fn test_regex_invalid() { 828 let e = Regex::new("\\p{foo}").unwrap_err(); 829 assert_eq!(e.code(), -223); 830 assert_eq!(e.description(), "invalid character property name {foo}"); 831 } 832 833 #[test] test_failed_match()834 fn test_failed_match() { 835 let regex = Regex::new("foo").unwrap(); 836 let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None); 837 assert!(res.is_none()); 838 } 839 840 #[test] test_regex_search_with_options()841 fn test_regex_search_with_options() { 842 let mut region = Region::new(); 843 let regex = Regex::new("e(l+)").unwrap(); 844 845 let r = regex.search_with_options( 846 "hello", 847 0, 848 5, 849 SearchOptions::SEARCH_OPTION_NONE, 850 Some(&mut region), 851 ); 852 853 assert!(region.tree().is_none()); 854 assert_eq!(r, Some(1)); 855 assert_eq!(region.len(), 2); 856 let pos1 = region.pos(0).unwrap(); 857 let pos2 = region.pos(1).unwrap(); 858 assert_eq!(pos1, (1, 4)); 859 assert_eq!(pos2, (2, 4)); 860 861 // test cloning here since we already have a filled region 862 let cloned_region = region.clone(); 863 let pos1_clone = cloned_region.pos(0).unwrap(); 864 assert_eq!(pos1_clone, pos1); 865 } 866 867 #[test] test_regex_match_with_options()868 fn test_regex_match_with_options() { 869 let mut region = Region::new(); 870 let regex = Regex::new("he(l+)").unwrap(); 871 872 let r = regex.match_with_options( 873 "hello", 874 0, 875 SearchOptions::SEARCH_OPTION_NONE, 876 Some(&mut region), 877 ); 878 879 assert!(region.tree().is_none()); 880 assert_eq!(r, Some(4)); 881 assert_eq!(region.len(), 2); 882 let pos1 = region.pos(0).unwrap(); 883 let pos2 = region.pos(1).unwrap(); 884 assert_eq!(pos1, (0, 4)); 885 assert_eq!(pos2, (2, 4)); 886 } 887 888 #[test] test_regex_is_match()889 fn test_regex_is_match() { 890 let regex = Regex::new("he(l+)o").unwrap(); 891 assert!(regex.is_match("hello")); 892 assert!(!regex.is_match("hello 2.0")); 893 } 894 895 #[test] test_regex_find()896 fn test_regex_find() { 897 let regex = Regex::new("he(l+)o").unwrap(); 898 assert_eq!(regex.find("hey, hello!"), Some((5, 10))); 899 assert_eq!(regex.find("hey, honey!"), None); 900 } 901 902 #[test] test_regex_captures_len()903 fn test_regex_captures_len() { 904 let regex = Regex::new("(he)(l+)(o)").unwrap(); 905 assert_eq!(regex.captures_len(), 3); 906 } 907 908 #[test] test_regex_error_is_match()909 fn test_regex_error_is_match() { 910 let regex = Regex::new("(a|b|ab)*bc").unwrap(); 911 let result = regex.match_with_param( 912 "ababababababababababababababababababababababababababababacbc", 913 0, SearchOptions::SEARCH_OPTION_NONE, None, MatchParam::default()); 914 915 let e = result.err().unwrap(); 916 assert_eq!("retry-limit-in-match over", e.description()); 917 } 918 919 #[test] test_regex_panic_is_match()920 fn test_regex_panic_is_match() { 921 let regex = Regex::new("(a|b|ab)*bc").unwrap(); 922 let result = panic::catch_unwind(|| 923 regex.is_match("ababababababababababababababababababababababababababababacbc") 924 ); 925 let e = result.err().unwrap(); 926 let message = e.downcast_ref::<String>().unwrap(); 927 assert_eq!(message.as_str(), 928 "Onig: Regex match error: retry-limit-in-match over"); 929 } 930 931 #[test] test_regex_error_find()932 fn test_regex_error_find() { 933 let regex = Regex::new("(a|b|ab)*bc").unwrap(); 934 let s = "ababababababababababababababababababababababababababababacbc"; 935 let result = regex.search_with_param( 936 s, 0, s.len(), SearchOptions::SEARCH_OPTION_NONE, None, MatchParam::default()); 937 938 let e = result.err().unwrap(); 939 assert_eq!("retry-limit-in-match over", e.description()); 940 } 941 942 #[test] test_regex_panic_find()943 fn test_regex_panic_find() { 944 let regex = Regex::new("(a|b|ab)*bc").unwrap(); 945 let result = panic::catch_unwind(|| 946 regex.find("ababababababababababababababababababababababababababababacbc") 947 ); 948 let e = result.err().unwrap(); 949 let message = e.downcast_ref::<String>().unwrap(); 950 assert_eq!(message.as_str(), 951 "Onig: Regex search error: retry-limit-in-match over"); 952 } 953 } 954