1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 //! Unicode character composition and decomposition utilities
12 //! as described in
13 //! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
14 //!
15 //! ```rust
16 //! extern crate unicode_normalization;
17 //!
18 //! use unicode_normalization::char::compose;
19 //! use unicode_normalization::UnicodeNormalization;
20 //!
21 //! fn main() {
22 //!     assert_eq!(compose('A','\u{30a}'), Some('Å'));
23 //!
24 //!     let s = "ÅΩ";
25 //!     let c = s.nfc().collect::<String>();
26 //!     assert_eq!(c, "ÅΩ");
27 //! }
28 //! ```
29 //!
30 //! # crates.io
31 //!
32 //! You can use this package in your project by adding the following
33 //! to your `Cargo.toml`:
34 //!
35 //! ```toml
36 //! [dependencies]
37 //! unicode-normalization = "0.1.8"
38 //! ```
39 
40 #![deny(missing_docs, unsafe_code)]
41 #![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
42        html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
43 
44 extern crate smallvec;
45 
46 pub use tables::UNICODE_VERSION;
47 pub use decompose::Decompositions;
48 pub use quick_check::{
49     IsNormalized,
50     is_nfc,
51     is_nfc_quick,
52     is_nfkc,
53     is_nfkc_quick,
54     is_nfc_stream_safe,
55     is_nfc_stream_safe_quick,
56     is_nfd,
57     is_nfd_quick,
58     is_nfkd,
59     is_nfkd_quick,
60     is_nfd_stream_safe,
61     is_nfd_stream_safe_quick,
62 };
63 pub use recompose::Recompositions;
64 pub use stream_safe::StreamSafe;
65 use std::str::Chars;
66 
67 mod decompose;
68 mod lookups;
69 mod normalize;
70 mod perfect_hash;
71 mod recompose;
72 mod quick_check;
73 mod stream_safe;
74 mod tables;
75 
76 #[cfg(test)]
77 mod test;
78 #[doc(hidden)]
79 pub mod __test_api;
80 
81 /// Methods for composing and decomposing characters.
82 pub mod char {
83     pub use normalize::{decompose_canonical, decompose_compatible, compose};
84 
85     pub use lookups::{canonical_combining_class, is_combining_mark};
86 }
87 
88 
89 /// Methods for iterating over strings while applying Unicode normalizations
90 /// as described in
91 /// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
92 pub trait UnicodeNormalization<I: Iterator<Item=char>> {
93     /// Returns an iterator over the string in Unicode Normalization Form D
94     /// (canonical decomposition).
95     #[inline]
nfd(self) -> Decompositions<I>96     fn nfd(self) -> Decompositions<I>;
97 
98     /// Returns an iterator over the string in Unicode Normalization Form KD
99     /// (compatibility decomposition).
100     #[inline]
nfkd(self) -> Decompositions<I>101     fn nfkd(self) -> Decompositions<I>;
102 
103     /// An Iterator over the string in Unicode Normalization Form C
104     /// (canonical decomposition followed by canonical composition).
105     #[inline]
nfc(self) -> Recompositions<I>106     fn nfc(self) -> Recompositions<I>;
107 
108     /// An Iterator over the string in Unicode Normalization Form KC
109     /// (compatibility decomposition followed by canonical composition).
110     #[inline]
nfkc(self) -> Recompositions<I>111     fn nfkc(self) -> Recompositions<I>;
112 
113     /// An Iterator over the string with Conjoining Grapheme Joiner characters
114     /// inserted according to the Stream-Safe Text Process (UAX15-D4)
115     #[inline]
stream_safe(self) -> StreamSafe<I>116     fn stream_safe(self) -> StreamSafe<I>;
117 }
118 
119 impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
120     #[inline]
nfd(self) -> Decompositions<Chars<'a>>121     fn nfd(self) -> Decompositions<Chars<'a>> {
122         decompose::new_canonical(self.chars())
123     }
124 
125     #[inline]
nfkd(self) -> Decompositions<Chars<'a>>126     fn nfkd(self) -> Decompositions<Chars<'a>> {
127         decompose::new_compatible(self.chars())
128     }
129 
130     #[inline]
nfc(self) -> Recompositions<Chars<'a>>131     fn nfc(self) -> Recompositions<Chars<'a>> {
132         recompose::new_canonical(self.chars())
133     }
134 
135     #[inline]
nfkc(self) -> Recompositions<Chars<'a>>136     fn nfkc(self) -> Recompositions<Chars<'a>> {
137         recompose::new_compatible(self.chars())
138     }
139 
140     #[inline]
stream_safe(self) -> StreamSafe<Chars<'a>>141     fn stream_safe(self) -> StreamSafe<Chars<'a>> {
142         StreamSafe::new(self.chars())
143     }
144 }
145 
146 impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I {
147     #[inline]
nfd(self) -> Decompositions<I>148     fn nfd(self) -> Decompositions<I> {
149         decompose::new_canonical(self)
150     }
151 
152     #[inline]
nfkd(self) -> Decompositions<I>153     fn nfkd(self) -> Decompositions<I> {
154         decompose::new_compatible(self)
155     }
156 
157     #[inline]
nfc(self) -> Recompositions<I>158     fn nfc(self) -> Recompositions<I> {
159         recompose::new_canonical(self)
160     }
161 
162     #[inline]
nfkc(self) -> Recompositions<I>163     fn nfkc(self) -> Recompositions<I> {
164         recompose::new_compatible(self)
165     }
166 
167     #[inline]
stream_safe(self) -> StreamSafe<I>168     fn stream_safe(self) -> StreamSafe<I> {
169         StreamSafe::new(self)
170     }
171 }
172