1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 extern crate nserror;
6 use self::nserror::*;
7 
8 extern crate nsstring;
9 use self::nsstring::{nsACString, nsCString};
10 
11 extern crate thin_vec;
12 use self::thin_vec::ThinVec;
13 
14 use std::fs::File;
15 use std::io::{self, BufRead};
16 use std::net::Ipv4Addr;
17 
18 /// HTTP leading whitespace, defined in netwerk/protocol/http/nsHttp.h
19 static HTTP_LWS: &'static [u8] = &[' ' as u8, '\t' as u8];
20 
21 /// Trim leading whitespace, trailing whitespace, and quality-value
22 /// from a token.
trim_token(token: &[u8]) -> &[u8]23 fn trim_token(token: &[u8]) -> &[u8] {
24     // Trim left whitespace
25     let ltrim = token
26         .iter()
27         .take_while(|c| HTTP_LWS.iter().any(|ws| &ws == c))
28         .count();
29 
30     // Trim right whitespace
31     // remove "; q=..." if present
32     let rtrim = token[ltrim..]
33         .iter()
34         .take_while(|c| **c != (';' as u8) && HTTP_LWS.iter().all(|ws| ws != *c))
35         .count();
36 
37     &token[ltrim..ltrim + rtrim]
38 }
39 
40 #[no_mangle]
41 /// Allocates an nsACString that contains a ISO 639 language list
42 /// notated with HTTP "q" values for output with an HTTP Accept-Language
43 /// header. Previous q values will be stripped because the order of
44 /// the langs implies the q value. The q values are calculated by dividing
45 /// 1.0 amongst the number of languages present.
46 ///
47 /// Ex: passing: "en, ja"
48 ///     returns: "en,ja;q=0.5"
49 ///
50 ///     passing: "en, ja, fr_CA"
51 ///     returns: "en,ja;q=0.7,fr_CA;q=0.3"
rust_prepare_accept_languages<'a, 'b>( i_accept_languages: &'a nsACString, o_accept_languages: &'b mut nsACString, ) -> nsresult52 pub extern "C" fn rust_prepare_accept_languages<'a, 'b>(
53     i_accept_languages: &'a nsACString,
54     o_accept_languages: &'b mut nsACString,
55 ) -> nsresult {
56     if i_accept_languages.is_empty() {
57         return NS_OK;
58     }
59 
60     let make_tokens = || {
61         i_accept_languages
62             .split(|c| *c == (',' as u8))
63             .map(|token| trim_token(token))
64             .filter(|token| token.len() != 0)
65     };
66 
67     let n = make_tokens().count();
68 
69     for (count_n, i_token) in make_tokens().enumerate() {
70         // delimiter if not first item
71         if count_n != 0 {
72             o_accept_languages.append(",");
73         }
74 
75         let token_pos = o_accept_languages.len();
76         o_accept_languages.append(&i_token as &[u8]);
77 
78         {
79             let o_token = o_accept_languages.to_mut();
80             canonicalize_language_tag(&mut o_token[token_pos..]);
81         }
82 
83         // Divide the quality-values evenly among the languages.
84         let q = 1.0 - count_n as f32 / n as f32;
85 
86         let u: u32 = ((q + 0.005) * 100.0) as u32;
87         // Only display q-value if less than 1.00.
88         if u < 100 {
89             // With a small number of languages, one decimal place is
90             // enough to prevent duplicate q-values.
91             // Also, trailing zeroes do not add any information, so
92             // they can be removed.
93             if n < 10 || u % 10 == 0 {
94                 let u = (u + 5) / 10;
95                 o_accept_languages.append(&format!(";q=0.{}", u));
96             } else {
97                 // Values below 10 require zero padding.
98                 o_accept_languages.append(&format!(";q=0.{:02}", u));
99             }
100         }
101     }
102 
103     NS_OK
104 }
105 
106 /// Defines a consistent capitalization for a given language string.
107 ///
108 /// # Arguments
109 /// * `token` - a narrow char slice describing a language.
110 ///
111 /// Valid language tags are of the form
112 /// "*", "fr", "en-US", "es-419", "az-Arab", "x-pig-latin", "man-Nkoo-GN"
113 ///
114 /// Language tags are defined in the
115 /// [rfc5646](https://tools.ietf.org/html/rfc5646) spec. According to
116 /// the spec:
117 ///
118 /// > At all times, language tags and their subtags, including private
119 /// > use and extensions, are to be treated as case insensitive: there
120 /// > exist conventions for the capitalization of some of the subtags,
121 /// > but these MUST NOT be taken to carry meaning.
122 ///
123 /// So why is this code even here? See bug 1108183, I guess.
canonicalize_language_tag(token: &mut [u8])124 fn canonicalize_language_tag(token: &mut [u8]) {
125     for c in token.iter_mut() {
126         *c = c.to_ascii_lowercase();
127     }
128 
129     let sub_tags = token.split_mut(|c| *c == ('-' as u8));
130     for (i, sub_tag) in sub_tags.enumerate() {
131         if i == 0 {
132             // ISO 639-1 language code, like the "en" in "en-US"
133             continue;
134         }
135 
136         match sub_tag.len() {
137             // Singleton tag, like "x" or "i". These signify a
138             // non-standard language, so we stop capitalizing after
139             // these.
140             1 => break,
141             // ISO 3166-1 Country code, like "US"
142             2 => {
143                 sub_tag[0] = sub_tag[0].to_ascii_uppercase();
144                 sub_tag[1] = sub_tag[1].to_ascii_uppercase();
145             }
146             // ISO 15924 script code, like "Nkoo"
147             4 => {
148                 sub_tag[0] = sub_tag[0].to_ascii_uppercase();
149             }
150             _ => {}
151         };
152     }
153 }
154 
155 #[no_mangle]
rust_net_is_valid_ipv4_addr<'a>(addr: &'a nsACString) -> bool156 pub extern "C" fn rust_net_is_valid_ipv4_addr<'a>(addr: &'a nsACString) -> bool {
157     is_valid_ipv4_addr(addr)
158 }
159 
160 #[inline]
try_apply_digit(current_octet: u8, digit_to_apply: u8) -> Option<u8>161 fn try_apply_digit(current_octet: u8, digit_to_apply: u8) -> Option<u8> {
162     current_octet.checked_mul(10)?.checked_add(digit_to_apply)
163 }
164 
is_valid_ipv4_addr<'a>(addr: &'a [u8]) -> bool165 pub fn is_valid_ipv4_addr<'a>(addr: &'a [u8]) -> bool {
166     let mut current_octet: Option<u8> = None;
167     let mut dots: u8 = 0;
168     for c in addr {
169         let c = *c as char;
170         match c {
171             '.' => {
172                 match current_octet {
173                     None => {
174                         // starting an octet with a . is not allowed
175                         return false;
176                     }
177                     Some(_) => {
178                         dots = dots + 1;
179                         current_octet = None;
180                     }
181                 }
182             }
183             // The character is not a digit
184             no_digit if no_digit.to_digit(10).is_none() => {
185                 return false;
186             }
187             digit => {
188                 match current_octet {
189                     None => {
190                         // Unwrap is sound because it has been checked in the previous arm
191                         current_octet = Some(digit.to_digit(10).unwrap() as u8);
192                     }
193                     Some(octet) => {
194                         if let Some(0) = current_octet {
195                             // Leading 0 is not allowed
196                             return false;
197                         }
198                         if let Some(applied) =
199                             try_apply_digit(octet, digit.to_digit(10).unwrap() as u8)
200                         {
201                             current_octet = Some(applied);
202                         } else {
203                             // Multiplication or Addition overflowed
204                             return false;
205                         }
206                     }
207                 }
208             }
209         }
210     }
211     dots == 3 && current_octet.is_some()
212 }
213 
214 #[no_mangle]
rust_net_is_valid_ipv6_addr<'a>(addr: &'a nsACString) -> bool215 pub extern "C" fn rust_net_is_valid_ipv6_addr<'a>(addr: &'a nsACString) -> bool {
216     is_valid_ipv6_addr(addr)
217 }
218 
219 #[inline(always)]
fast_is_hex_digit(c: u8) -> bool220 fn fast_is_hex_digit(c: u8) -> bool {
221     match c {
222         b'0'..=b'9' => true,
223         b'a'..=b'f' => true,
224         b'A'..=b'F' => true,
225         _ => false,
226     }
227 }
228 
is_valid_ipv6_addr<'a>(addr: &'a [u8]) -> bool229 pub fn is_valid_ipv6_addr<'a>(addr: &'a [u8]) -> bool {
230     let mut double_colon = false;
231     let mut colon_before = false;
232     let mut digits: u8 = 0;
233     let mut blocks: u8 = 0;
234 
235     // The smallest ipv6 is unspecified (::)
236     // The IP starts with a single colon
237     if addr.len() < 2 || addr[0] == b':' && addr[1] != b':' {
238         return false;
239     }
240     //Enumerate with an u8 for cache locality
241     for (i, c) in (0u8..).zip(addr) {
242         match c {
243             maybe_digit if fast_is_hex_digit(*maybe_digit) => {
244                 // Too many digits in the block
245                 if digits == 4 {
246                     return false;
247                 }
248                 colon_before = false;
249                 digits += 1;
250             }
251             b':' => {
252                 // Too many columns
253                 if double_colon && colon_before || blocks == 8 {
254                     return false;
255                 }
256                 if !colon_before {
257                     if digits != 0 {
258                         blocks += 1;
259                     }
260                     digits = 0;
261                     colon_before = true;
262                 } else if !double_colon {
263                     double_colon = true;
264                 }
265             }
266             b'.' => {
267                 // IPv4 from the last block
268                 if is_valid_ipv4_addr(&addr[(i - digits) as usize..]) {
269                     return double_colon && blocks < 6 || !double_colon && blocks == 6;
270                 }
271                 return false;
272             }
273             _ => {
274                 // Invalid character
275                 return false;
276             }
277         }
278     }
279     if colon_before && !double_colon {
280         // The IP ends with a single colon
281         return false;
282     }
283     if digits != 0 {
284         blocks += 1;
285     }
286 
287     double_colon && blocks < 8 || !double_colon && blocks == 8
288 }
289 
290 #[no_mangle]
rust_net_is_valid_scheme_char(a_char: u8) -> bool291 pub extern "C" fn rust_net_is_valid_scheme_char(a_char: u8) -> bool {
292     is_valid_scheme_char(a_char)
293 }
294 
295 #[no_mangle]
rust_net_is_valid_scheme<'a>(scheme: &'a nsACString) -> bool296 pub extern "C" fn rust_net_is_valid_scheme<'a>(scheme: &'a nsACString) -> bool {
297     if scheme.is_empty() {
298         return false;
299     }
300 
301     // first char must be alpha
302     if !scheme[0].is_ascii_alphabetic() {
303         return false;
304     }
305 
306     scheme[1..]
307         .iter()
308         .all(|a_char| is_valid_scheme_char(*a_char))
309 }
310 
is_valid_scheme_char(a_char: u8) -> bool311 fn is_valid_scheme_char(a_char: u8) -> bool {
312     a_char.is_ascii_alphanumeric() || a_char == b'+' || a_char == b'.' || a_char == b'-'
313 }
314 
315 pub type ParsingCallback = extern "C" fn(&ThinVec<nsCString>) -> bool;
316 
317 #[no_mangle]
rust_parse_etc_hosts<'a>(path: &'a nsACString, callback: ParsingCallback)318 pub extern "C" fn rust_parse_etc_hosts<'a>(path: &'a nsACString, callback: ParsingCallback) {
319     let file = match File::open(&*path.to_utf8()) {
320         Ok(file) => io::BufReader::new(file),
321         Err(..) => return,
322     };
323 
324     let mut array = ThinVec::new();
325     for line in file.lines() {
326         let line = match line {
327             Ok(l) => l,
328             Err(..) => break,
329         };
330 
331         let mut iter = line.split('#').next().unwrap().split_whitespace();
332         iter.next(); // skip the IP
333 
334         array.extend(
335             iter.filter(|host| {
336                 // Make sure it's a valid domain
337                 let invalid = [
338                     '\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']',
339                 ];
340                 host.parse::<Ipv4Addr>().is_err() && !host.contains(&invalid[..])
341             })
342             .map(nsCString::from),
343         );
344 
345         // /etc/hosts files can be huge. To make sure we don't block shutdown
346         // for every 100 domains that we parse we call the callback passing the
347         // domains and see if we should keep parsing.
348         if array.len() > 100 {
349             let keep_going = callback(&array);
350             array.clear();
351             if !keep_going {
352                 break;
353             }
354         }
355     }
356 
357     if !array.is_empty() {
358         callback(&array);
359     }
360 }
361