1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4 
5 //! String utils for attributes and similar stuff.
6 
7 #![deny(missing_docs)]
8 
9 use num_traits::ToPrimitive;
10 use std::borrow::Cow;
11 use std::convert::AsRef;
12 use std::iter::{Filter, Peekable};
13 use std::str::Split;
14 
15 /// A static slice of characters.
16 pub type StaticCharVec = &'static [char];
17 
18 /// A static slice of `str`s.
19 pub type StaticStringVec = &'static [&'static str];
20 
21 /// A "space character" according to:
22 ///
23 /// <https://html.spec.whatwg.org/multipage/#space-character>
24 pub static HTML_SPACE_CHARACTERS: StaticCharVec =
25     &['\u{0020}', '\u{0009}', '\u{000a}', '\u{000c}', '\u{000d}'];
26 
27 /// Whether a character is a HTML whitespace character.
28 #[inline]
char_is_whitespace(c: char) -> bool29 pub fn char_is_whitespace(c: char) -> bool {
30     HTML_SPACE_CHARACTERS.contains(&c)
31 }
32 
33 /// Whether all the string is HTML whitespace.
34 #[inline]
is_whitespace(s: &str) -> bool35 pub fn is_whitespace(s: &str) -> bool {
36     s.chars().all(char_is_whitespace)
37 }
38 
39 #[inline]
not_empty(&split: &&str) -> bool40 fn not_empty(&split: &&str) -> bool {
41     !split.is_empty()
42 }
43 
44 /// Split a string on HTML whitespace.
45 #[inline]
split_html_space_chars<'a>( s: &'a str, ) -> Filter<Split<'a, StaticCharVec>, fn(&&str) -> bool>46 pub fn split_html_space_chars<'a>(
47     s: &'a str,
48 ) -> Filter<Split<'a, StaticCharVec>, fn(&&str) -> bool> {
49     s.split(HTML_SPACE_CHARACTERS)
50         .filter(not_empty as fn(&&str) -> bool)
51 }
52 
53 /// Split a string on commas.
54 #[inline]
split_commas<'a>(s: &'a str) -> Filter<Split<'a, char>, fn(&&str) -> bool>55 pub fn split_commas<'a>(s: &'a str) -> Filter<Split<'a, char>, fn(&&str) -> bool> {
56     s.split(',').filter(not_empty as fn(&&str) -> bool)
57 }
58 
59 /// Character is ascii digit
is_ascii_digit(c: &char) -> bool60 pub fn is_ascii_digit(c: &char) -> bool {
61     match *c {
62         '0'..='9' => true,
63         _ => false,
64     }
65 }
66 
is_decimal_point(c: char) -> bool67 fn is_decimal_point(c: char) -> bool {
68     c == '.'
69 }
70 
is_exponent_char(c: char) -> bool71 fn is_exponent_char(c: char) -> bool {
72     match c {
73         'e' | 'E' => true,
74         _ => false,
75     }
76 }
77 
78 /// Read a set of ascii digits and read them into a number.
read_numbers<I: Iterator<Item = char>>(mut iter: Peekable<I>) -> (Option<i64>, usize)79 pub fn read_numbers<I: Iterator<Item = char>>(mut iter: Peekable<I>) -> (Option<i64>, usize) {
80     match iter.peek() {
81         Some(c) if is_ascii_digit(c) => (),
82         _ => return (None, 0),
83     }
84 
85     iter.take_while(is_ascii_digit)
86         .map(|d| d as i64 - '0' as i64)
87         .fold((Some(0i64), 0), |accumulator, d| {
88             let digits = accumulator
89                 .0
90                 .and_then(|accumulator| accumulator.checked_mul(10))
91                 .and_then(|accumulator| accumulator.checked_add(d));
92             (digits, accumulator.1 + 1)
93         })
94 }
95 
96 /// Read a decimal fraction.
read_fraction<I: Iterator<Item = char>>( mut iter: Peekable<I>, mut divisor: f64, value: f64, ) -> (f64, usize)97 pub fn read_fraction<I: Iterator<Item = char>>(
98     mut iter: Peekable<I>,
99     mut divisor: f64,
100     value: f64,
101 ) -> (f64, usize) {
102     match iter.peek() {
103         Some(c) if is_decimal_point(*c) => (),
104         _ => return (value, 0),
105     }
106     iter.next();
107 
108     iter.take_while(is_ascii_digit)
109         .map(|d| d as i64 - '0' as i64)
110         .fold((value, 1), |accumulator, d| {
111             divisor *= 10f64;
112             (accumulator.0 + d as f64 / divisor, accumulator.1 + 1)
113         })
114 }
115 
116 /// Reads an exponent from an iterator over chars, for example `e100`.
read_exponent<I: Iterator<Item = char>>(mut iter: Peekable<I>) -> Option<i32>117 pub fn read_exponent<I: Iterator<Item = char>>(mut iter: Peekable<I>) -> Option<i32> {
118     match iter.peek() {
119         Some(c) if is_exponent_char(*c) => (),
120         _ => return None,
121     }
122     iter.next();
123 
124     match iter.peek() {
125         None => None,
126         Some(&'-') => {
127             iter.next();
128             read_numbers(iter).0.map(|exp| -exp.to_i32().unwrap_or(0))
129         },
130         Some(&'+') => {
131             iter.next();
132             read_numbers(iter).0.map(|exp| exp.to_i32().unwrap_or(0))
133         },
134         Some(_) => read_numbers(iter).0.map(|exp| exp.to_i32().unwrap_or(0)),
135     }
136 }
137 
138 /// Join a set of strings with a given delimiter `join`.
str_join<I, T>(strs: I, join: &str) -> String where I: IntoIterator<Item = T>, T: AsRef<str>,139 pub fn str_join<I, T>(strs: I, join: &str) -> String
140 where
141     I: IntoIterator<Item = T>,
142     T: AsRef<str>,
143 {
144     strs.into_iter()
145         .enumerate()
146         .fold(String::new(), |mut acc, (i, s)| {
147             if i > 0 {
148                 acc.push_str(join);
149             }
150             acc.push_str(s.as_ref());
151             acc
152         })
153 }
154 
155 /// Returns true if a given string has a given prefix with case-insensitive match.
starts_with_ignore_ascii_case(string: &str, prefix: &str) -> bool156 pub fn starts_with_ignore_ascii_case(string: &str, prefix: &str) -> bool {
157     string.len() >= prefix.len() &&
158         string.as_bytes()[0..prefix.len()].eq_ignore_ascii_case(prefix.as_bytes())
159 }
160 
161 /// Returns an ascii lowercase version of a string, only allocating if needed.
string_as_ascii_lowercase<'a>(input: &'a str) -> Cow<'a, str>162 pub fn string_as_ascii_lowercase<'a>(input: &'a str) -> Cow<'a, str> {
163     if input.bytes().any(|c| matches!(c, b'A'..=b'Z')) {
164         input.to_ascii_lowercase().into()
165     } else {
166         // Already ascii lowercase.
167         Cow::Borrowed(input)
168     }
169 }
170 
171 /// To avoid accidentally instantiating multiple monomorphizations of large
172 /// serialization routines, we define explicit concrete types and require
173 /// them in those routines. This avoids accidental mixing of String and
174 /// nsACString arguments in Gecko, which would cause code size to blow up.
175 #[cfg(feature = "gecko")]
176 pub type CssStringWriter = ::nsstring::nsACString;
177 
178 /// String type that coerces to CssStringWriter, used when serialization code
179 /// needs to allocate a temporary string.
180 #[cfg(feature = "gecko")]
181 pub type CssString = ::nsstring::nsCString;
182