1 //! count occurrences of a given byte, or the number of UTF-8 code points, in a
2 //! byte slice, fast.
3 //!
4 //! This crate has the [`count`](fn.count.html) method to count byte
5 //! occurrences (for example newlines) in a larger `&[u8]` slice.
6 //!
7 //! For example:
8 //!
9 //! ```rust
10 //! assert_eq!(5, bytecount::count(b"Hello, this is the bytecount crate!", b' '));
11 //! ```
12 //!
13 //! Also there is a [`num_chars`](fn.num_chars.html) method to count
14 //! the number of UTF8 characters in a slice. It will work the same as
15 //! `str::chars().count()` for byte slices of correct UTF-8 character
16 //! sequences. The result will likely be off for invalid sequences,
17 //! although the result is guaranteed to be between `0` and
18 //! `[_]::len()`, inclusive.
19 //!
20 //! Example:
21 //!
22 //! ```rust
23 //! let sequence = "Wenn ich ein Vöglein wär, flög ich zu Dir!";
24 //! assert_eq!(sequence.chars().count(),
25 //!            bytecount::num_chars(sequence.as_bytes()));
26 //! ```
27 //!
28 //! For completeness and easy comparison, the "naive" versions of both
29 //! count and num_chars are provided. Those are also faster if used on
30 //! predominantly small strings. The
31 //! [`naive_count_32`](fn.naive_count_32.html) method can be faster
32 //! still on small strings.
33 
34 #![deny(missing_docs)]
35 
36 #![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]
37 
38 #[cfg(not(feature = "runtime-dispatch-simd"))]
39 use core::mem;
40 #[cfg(feature = "runtime-dispatch-simd")]
41 use std::mem;
42 
43 mod naive;
44 pub use naive::*;
45 mod integer_simd;
46 
47 #[cfg(any(
48     all(feature = "runtime-dispatch-simd", any(target_arch = "x86", target_arch = "x86_64")),
49     feature = "generic-simd"
50 ))]
51 mod simd;
52 
53 /// Count occurrences of a byte in a slice of bytes, fast
54 ///
55 /// # Examples
56 ///
57 /// ```
58 /// let s = b"This is a Text with spaces";
59 /// let number_of_spaces = bytecount::count(s, b' ');
60 /// assert_eq!(number_of_spaces, 5);
61 /// ```
count(haystack: &[u8], needle: u8) -> usize62 pub fn count(haystack: &[u8], needle: u8) -> usize {
63     if haystack.len() >= 32 {
64         #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
65         {
66             if is_x86_feature_detected!("avx2") {
67                 unsafe { return simd::x86_avx2::chunk_count(haystack, needle); }
68             }
69         }
70 
71         #[cfg(feature = "generic-simd")]
72         return simd::generic::chunk_count(haystack, needle);
73     }
74 
75     if haystack.len() >= 16 {
76         #[cfg(all(
77             feature = "runtime-dispatch-simd",
78             any(target_arch = "x86", target_arch = "x86_64"),
79             not(feature = "generic-simd")
80         ))]
81         {
82             if is_x86_feature_detected!("sse2") {
83                 unsafe { return simd::x86_sse2::chunk_count(haystack, needle); }
84             }
85         }
86     }
87 
88     if haystack.len() >= mem::size_of::<usize>() {
89         return integer_simd::chunk_count(haystack, needle);
90     }
91 
92     naive_count(haystack, needle)
93 }
94 
95 /// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, fast
96 ///
97 /// This function is safe to use on any byte array, valid UTF-8 or not,
98 /// but the output is only meaningful for well-formed UTF-8.
99 ///
100 /// # Example
101 ///
102 /// ```
103 /// let swordfish = "メカジキ";
104 /// let char_count = bytecount::num_chars(swordfish.as_bytes());
105 /// assert_eq!(char_count, 4);
106 /// ```
num_chars(utf8_chars: &[u8]) -> usize107 pub fn num_chars(utf8_chars: &[u8]) -> usize {
108     if utf8_chars.len() >= 32 {
109         #[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
110         {
111             if is_x86_feature_detected!("avx2") {
112                 unsafe { return simd::x86_avx2::chunk_num_chars(utf8_chars); }
113             }
114         }
115 
116         #[cfg(feature = "generic-simd")]
117         return simd::generic::chunk_num_chars(utf8_chars);
118     }
119 
120     if utf8_chars.len() >= 16 {
121         #[cfg(all(
122             feature = "runtime-dispatch-simd",
123             any(target_arch = "x86", target_arch = "x86_64"),
124             not(feature = "generic-simd")
125         ))]
126         {
127             if is_x86_feature_detected!("sse2") {
128                 unsafe { return simd::x86_sse2::chunk_num_chars(utf8_chars); }
129             }
130         }
131     }
132 
133     if utf8_chars.len() >= mem::size_of::<usize>() {
134         return integer_simd::chunk_num_chars(utf8_chars);
135     }
136 
137     naive_num_chars(utf8_chars)
138 }
139