1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT 2 // file at the top-level directory of this distribution and at 3 // http://rust-lang.org/COPYRIGHT. 4 // 5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or 6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license 7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 8 // option. This file may not be copied, modified, or distributed 9 // except according to those terms. 10 11 //! Unicode character composition and decomposition utilities 12 //! as described in 13 //! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/). 14 //! 15 //! ```rust 16 //! extern crate unicode_normalization; 17 //! 18 //! use unicode_normalization::char::compose; 19 //! use unicode_normalization::UnicodeNormalization; 20 //! 21 //! fn main() { 22 //! assert_eq!(compose('A','\u{30a}'), Some('Å')); 23 //! 24 //! let s = "ÅΩ"; 25 //! let c = s.nfc().collect::<String>(); 26 //! assert_eq!(c, "ÅΩ"); 27 //! } 28 //! ``` 29 //! 30 //! # crates.io 31 //! 32 //! You can use this package in your project by adding the following 33 //! to your `Cargo.toml`: 34 //! 35 //! ```toml 36 //! [dependencies] 37 //! unicode-normalization = "0.1.8" 38 //! ``` 39 40 #![deny(missing_docs, unsafe_code)] 41 #![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png", 42 html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")] 43 44 extern crate smallvec; 45 46 pub use tables::UNICODE_VERSION; 47 pub use decompose::Decompositions; 48 pub use quick_check::{ 49 IsNormalized, 50 is_nfc, 51 is_nfc_quick, 52 is_nfkc, 53 is_nfkc_quick, 54 is_nfc_stream_safe, 55 is_nfc_stream_safe_quick, 56 is_nfd, 57 is_nfd_quick, 58 is_nfkd, 59 is_nfkd_quick, 60 is_nfd_stream_safe, 61 is_nfd_stream_safe_quick, 62 }; 63 pub use recompose::Recompositions; 64 pub use stream_safe::StreamSafe; 65 use std::str::Chars; 66 67 mod decompose; 68 mod lookups; 69 mod normalize; 70 mod perfect_hash; 71 mod recompose; 72 mod quick_check; 73 mod stream_safe; 74 mod tables; 75 76 #[cfg(test)] 77 mod test; 78 #[doc(hidden)] 79 pub mod __test_api; 80 81 /// Methods for composing and decomposing characters. 82 pub mod char { 83 pub use normalize::{decompose_canonical, decompose_compatible, compose}; 84 85 pub use lookups::{canonical_combining_class, is_combining_mark}; 86 } 87 88 89 /// Methods for iterating over strings while applying Unicode normalizations 90 /// as described in 91 /// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/). 92 pub trait UnicodeNormalization<I: Iterator<Item=char>> { 93 /// Returns an iterator over the string in Unicode Normalization Form D 94 /// (canonical decomposition). 95 #[inline] nfd(self) -> Decompositions<I>96 fn nfd(self) -> Decompositions<I>; 97 98 /// Returns an iterator over the string in Unicode Normalization Form KD 99 /// (compatibility decomposition). 100 #[inline] nfkd(self) -> Decompositions<I>101 fn nfkd(self) -> Decompositions<I>; 102 103 /// An Iterator over the string in Unicode Normalization Form C 104 /// (canonical decomposition followed by canonical composition). 105 #[inline] nfc(self) -> Recompositions<I>106 fn nfc(self) -> Recompositions<I>; 107 108 /// An Iterator over the string in Unicode Normalization Form KC 109 /// (compatibility decomposition followed by canonical composition). 110 #[inline] nfkc(self) -> Recompositions<I>111 fn nfkc(self) -> Recompositions<I>; 112 113 /// An Iterator over the string with Conjoining Grapheme Joiner characters 114 /// inserted according to the Stream-Safe Text Process (UAX15-D4) 115 #[inline] stream_safe(self) -> StreamSafe<I>116 fn stream_safe(self) -> StreamSafe<I>; 117 } 118 119 impl<'a> UnicodeNormalization<Chars<'a>> for &'a str { 120 #[inline] nfd(self) -> Decompositions<Chars<'a>>121 fn nfd(self) -> Decompositions<Chars<'a>> { 122 decompose::new_canonical(self.chars()) 123 } 124 125 #[inline] nfkd(self) -> Decompositions<Chars<'a>>126 fn nfkd(self) -> Decompositions<Chars<'a>> { 127 decompose::new_compatible(self.chars()) 128 } 129 130 #[inline] nfc(self) -> Recompositions<Chars<'a>>131 fn nfc(self) -> Recompositions<Chars<'a>> { 132 recompose::new_canonical(self.chars()) 133 } 134 135 #[inline] nfkc(self) -> Recompositions<Chars<'a>>136 fn nfkc(self) -> Recompositions<Chars<'a>> { 137 recompose::new_compatible(self.chars()) 138 } 139 140 #[inline] stream_safe(self) -> StreamSafe<Chars<'a>>141 fn stream_safe(self) -> StreamSafe<Chars<'a>> { 142 StreamSafe::new(self.chars()) 143 } 144 } 145 146 impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I { 147 #[inline] nfd(self) -> Decompositions<I>148 fn nfd(self) -> Decompositions<I> { 149 decompose::new_canonical(self) 150 } 151 152 #[inline] nfkd(self) -> Decompositions<I>153 fn nfkd(self) -> Decompositions<I> { 154 decompose::new_compatible(self) 155 } 156 157 #[inline] nfc(self) -> Recompositions<I>158 fn nfc(self) -> Recompositions<I> { 159 recompose::new_canonical(self) 160 } 161 162 #[inline] nfkc(self) -> Recompositions<I>163 fn nfkc(self) -> Recompositions<I> { 164 recompose::new_compatible(self) 165 } 166 167 #[inline] stream_safe(self) -> StreamSafe<I>168 fn stream_safe(self) -> StreamSafe<I> { 169 StreamSafe::new(self) 170 } 171 } 172