1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 extern crate nserror;
6 use self::nserror::*;
7
8 extern crate nsstring;
9 use self::nsstring::{nsACString, nsCString};
10
11 extern crate thin_vec;
12 use self::thin_vec::ThinVec;
13
14 use std::fs::File;
15 use std::io::{self, BufRead};
16 use std::net::Ipv4Addr;
17
18 /// HTTP leading whitespace, defined in netwerk/protocol/http/nsHttp.h
19 static HTTP_LWS: &'static [u8] = &[' ' as u8, '\t' as u8];
20
21 /// Trim leading whitespace, trailing whitespace, and quality-value
22 /// from a token.
trim_token(token: &[u8]) -> &[u8]23 fn trim_token(token: &[u8]) -> &[u8] {
24 // Trim left whitespace
25 let ltrim = token
26 .iter()
27 .take_while(|c| HTTP_LWS.iter().any(|ws| &ws == c))
28 .count();
29
30 // Trim right whitespace
31 // remove "; q=..." if present
32 let rtrim = token[ltrim..]
33 .iter()
34 .take_while(|c| **c != (';' as u8) && HTTP_LWS.iter().all(|ws| ws != *c))
35 .count();
36
37 &token[ltrim..ltrim + rtrim]
38 }
39
40 #[no_mangle]
41 /// Allocates an nsACString that contains a ISO 639 language list
42 /// notated with HTTP "q" values for output with an HTTP Accept-Language
43 /// header. Previous q values will be stripped because the order of
44 /// the langs implies the q value. The q values are calculated by dividing
45 /// 1.0 amongst the number of languages present.
46 ///
47 /// Ex: passing: "en, ja"
48 /// returns: "en,ja;q=0.5"
49 ///
50 /// passing: "en, ja, fr_CA"
51 /// returns: "en,ja;q=0.7,fr_CA;q=0.3"
rust_prepare_accept_languages<'a, 'b>( i_accept_languages: &'a nsACString, o_accept_languages: &'b mut nsACString, ) -> nsresult52 pub extern "C" fn rust_prepare_accept_languages<'a, 'b>(
53 i_accept_languages: &'a nsACString,
54 o_accept_languages: &'b mut nsACString,
55 ) -> nsresult {
56 if i_accept_languages.is_empty() {
57 return NS_OK;
58 }
59
60 let make_tokens = || {
61 i_accept_languages
62 .split(|c| *c == (',' as u8))
63 .map(|token| trim_token(token))
64 .filter(|token| token.len() != 0)
65 };
66
67 let n = make_tokens().count();
68
69 for (count_n, i_token) in make_tokens().enumerate() {
70 // delimiter if not first item
71 if count_n != 0 {
72 o_accept_languages.append(",");
73 }
74
75 let token_pos = o_accept_languages.len();
76 o_accept_languages.append(&i_token as &[u8]);
77
78 {
79 let o_token = o_accept_languages.to_mut();
80 canonicalize_language_tag(&mut o_token[token_pos..]);
81 }
82
83 // Divide the quality-values evenly among the languages.
84 let q = 1.0 - count_n as f32 / n as f32;
85
86 let u: u32 = ((q + 0.005) * 100.0) as u32;
87 // Only display q-value if less than 1.00.
88 if u < 100 {
89 // With a small number of languages, one decimal place is
90 // enough to prevent duplicate q-values.
91 // Also, trailing zeroes do not add any information, so
92 // they can be removed.
93 if n < 10 || u % 10 == 0 {
94 let u = (u + 5) / 10;
95 o_accept_languages.append(&format!(";q=0.{}", u));
96 } else {
97 // Values below 10 require zero padding.
98 o_accept_languages.append(&format!(";q=0.{:02}", u));
99 }
100 }
101 }
102
103 NS_OK
104 }
105
106 /// Defines a consistent capitalization for a given language string.
107 ///
108 /// # Arguments
109 /// * `token` - a narrow char slice describing a language.
110 ///
111 /// Valid language tags are of the form
112 /// "*", "fr", "en-US", "es-419", "az-Arab", "x-pig-latin", "man-Nkoo-GN"
113 ///
114 /// Language tags are defined in the
115 /// [rfc5646](https://tools.ietf.org/html/rfc5646) spec. According to
116 /// the spec:
117 ///
118 /// > At all times, language tags and their subtags, including private
119 /// > use and extensions, are to be treated as case insensitive: there
120 /// > exist conventions for the capitalization of some of the subtags,
121 /// > but these MUST NOT be taken to carry meaning.
122 ///
123 /// So why is this code even here? See bug 1108183, I guess.
canonicalize_language_tag(token: &mut [u8])124 fn canonicalize_language_tag(token: &mut [u8]) {
125 for c in token.iter_mut() {
126 *c = c.to_ascii_lowercase();
127 }
128
129 let sub_tags = token.split_mut(|c| *c == ('-' as u8));
130 for (i, sub_tag) in sub_tags.enumerate() {
131 if i == 0 {
132 // ISO 639-1 language code, like the "en" in "en-US"
133 continue;
134 }
135
136 match sub_tag.len() {
137 // Singleton tag, like "x" or "i". These signify a
138 // non-standard language, so we stop capitalizing after
139 // these.
140 1 => break,
141 // ISO 3166-1 Country code, like "US"
142 2 => {
143 sub_tag[0] = sub_tag[0].to_ascii_uppercase();
144 sub_tag[1] = sub_tag[1].to_ascii_uppercase();
145 }
146 // ISO 15924 script code, like "Nkoo"
147 4 => {
148 sub_tag[0] = sub_tag[0].to_ascii_uppercase();
149 }
150 _ => {}
151 };
152 }
153 }
154
155 #[no_mangle]
rust_net_is_valid_ipv4_addr<'a>(addr: &'a nsACString) -> bool156 pub extern "C" fn rust_net_is_valid_ipv4_addr<'a>(addr: &'a nsACString) -> bool {
157 is_valid_ipv4_addr(addr)
158 }
159
160 #[inline]
try_apply_digit(current_octet: u8, digit_to_apply: u8) -> Option<u8>161 fn try_apply_digit(current_octet: u8, digit_to_apply: u8) -> Option<u8> {
162 current_octet.checked_mul(10)?.checked_add(digit_to_apply)
163 }
164
is_valid_ipv4_addr<'a>(addr: &'a [u8]) -> bool165 pub fn is_valid_ipv4_addr<'a>(addr: &'a [u8]) -> bool {
166 let mut current_octet: Option<u8> = None;
167 let mut dots: u8 = 0;
168 for c in addr {
169 let c = *c as char;
170 match c {
171 '.' => {
172 match current_octet {
173 None => {
174 // starting an octet with a . is not allowed
175 return false;
176 }
177 Some(_) => {
178 dots = dots + 1;
179 current_octet = None;
180 }
181 }
182 }
183 // The character is not a digit
184 no_digit if no_digit.to_digit(10).is_none() => {
185 return false;
186 }
187 digit => {
188 match current_octet {
189 None => {
190 // Unwrap is sound because it has been checked in the previous arm
191 current_octet = Some(digit.to_digit(10).unwrap() as u8);
192 }
193 Some(octet) => {
194 if let Some(0) = current_octet {
195 // Leading 0 is not allowed
196 return false;
197 }
198 if let Some(applied) =
199 try_apply_digit(octet, digit.to_digit(10).unwrap() as u8)
200 {
201 current_octet = Some(applied);
202 } else {
203 // Multiplication or Addition overflowed
204 return false;
205 }
206 }
207 }
208 }
209 }
210 }
211 dots == 3 && current_octet.is_some()
212 }
213
214 #[no_mangle]
rust_net_is_valid_ipv6_addr<'a>(addr: &'a nsACString) -> bool215 pub extern "C" fn rust_net_is_valid_ipv6_addr<'a>(addr: &'a nsACString) -> bool {
216 is_valid_ipv6_addr(addr)
217 }
218
219 #[inline(always)]
fast_is_hex_digit(c: u8) -> bool220 fn fast_is_hex_digit(c: u8) -> bool {
221 match c {
222 b'0'..=b'9' => true,
223 b'a'..=b'f' => true,
224 b'A'..=b'F' => true,
225 _ => false,
226 }
227 }
228
is_valid_ipv6_addr<'a>(addr: &'a [u8]) -> bool229 pub fn is_valid_ipv6_addr<'a>(addr: &'a [u8]) -> bool {
230 let mut double_colon = false;
231 let mut colon_before = false;
232 let mut digits: u8 = 0;
233 let mut blocks: u8 = 0;
234
235 // The smallest ipv6 is unspecified (::)
236 // The IP starts with a single colon
237 if addr.len() < 2 || addr[0] == b':' && addr[1] != b':' {
238 return false;
239 }
240 //Enumerate with an u8 for cache locality
241 for (i, c) in (0u8..).zip(addr) {
242 match c {
243 maybe_digit if fast_is_hex_digit(*maybe_digit) => {
244 // Too many digits in the block
245 if digits == 4 {
246 return false;
247 }
248 colon_before = false;
249 digits += 1;
250 }
251 b':' => {
252 // Too many columns
253 if double_colon && colon_before || blocks == 8 {
254 return false;
255 }
256 if !colon_before {
257 if digits != 0 {
258 blocks += 1;
259 }
260 digits = 0;
261 colon_before = true;
262 } else if !double_colon {
263 double_colon = true;
264 }
265 }
266 b'.' => {
267 // IPv4 from the last block
268 if is_valid_ipv4_addr(&addr[(i - digits) as usize..]) {
269 return double_colon && blocks < 6 || !double_colon && blocks == 6;
270 }
271 return false;
272 }
273 _ => {
274 // Invalid character
275 return false;
276 }
277 }
278 }
279 if colon_before && !double_colon {
280 // The IP ends with a single colon
281 return false;
282 }
283 if digits != 0 {
284 blocks += 1;
285 }
286
287 double_colon && blocks < 8 || !double_colon && blocks == 8
288 }
289
290 #[no_mangle]
rust_net_is_valid_scheme_char(a_char: u8) -> bool291 pub extern "C" fn rust_net_is_valid_scheme_char(a_char: u8) -> bool {
292 is_valid_scheme_char(a_char)
293 }
294
295 #[no_mangle]
rust_net_is_valid_scheme<'a>(scheme: &'a nsACString) -> bool296 pub extern "C" fn rust_net_is_valid_scheme<'a>(scheme: &'a nsACString) -> bool {
297 if scheme.is_empty() {
298 return false;
299 }
300
301 // first char must be alpha
302 if !scheme[0].is_ascii_alphabetic() {
303 return false;
304 }
305
306 scheme[1..]
307 .iter()
308 .all(|a_char| is_valid_scheme_char(*a_char))
309 }
310
is_valid_scheme_char(a_char: u8) -> bool311 fn is_valid_scheme_char(a_char: u8) -> bool {
312 a_char.is_ascii_alphanumeric() || a_char == b'+' || a_char == b'.' || a_char == b'-'
313 }
314
315 pub type ParsingCallback = extern "C" fn(&ThinVec<nsCString>) -> bool;
316
317 #[no_mangle]
rust_parse_etc_hosts<'a>(path: &'a nsACString, callback: ParsingCallback)318 pub extern "C" fn rust_parse_etc_hosts<'a>(path: &'a nsACString, callback: ParsingCallback) {
319 let file = match File::open(&*path.to_utf8()) {
320 Ok(file) => io::BufReader::new(file),
321 Err(..) => return,
322 };
323
324 let mut array = ThinVec::new();
325 for line in file.lines() {
326 let line = match line {
327 Ok(l) => l,
328 Err(..) => break,
329 };
330
331 let mut iter = line.split('#').next().unwrap().split_whitespace();
332 iter.next(); // skip the IP
333
334 array.extend(
335 iter.filter(|host| {
336 // Make sure it's a valid domain
337 let invalid = [
338 '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']',
339 ];
340 host.parse::<Ipv4Addr>().is_err() && !host.contains(&invalid[..])
341 })
342 .map(nsCString::from),
343 );
344
345 // /etc/hosts files can be huge. To make sure we don't block shutdown
346 // for every 100 domains that we parse we call the callback passing the
347 // domains and see if we should keep parsing.
348 if array.len() > 100 {
349 let keep_going = callback(&array);
350 array.clear();
351 if !keep_going {
352 break;
353 }
354 }
355 }
356
357 if !array.is_empty() {
358 callback(&array);
359 }
360 }
361