1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 use crate::preferences::{Pref, PrefValue, Preferences};
6 use std::borrow::Borrow;
7 use std::borrow::Cow;
8 use std::char;
9 use std::error::Error;
10 use std::fmt;
11 use std::io::{self, Write};
12 use std::iter::Iterator;
13 use std::mem;
14 use std::str;
15 
16 impl PrefReaderError {
new(message: String, position: Position, parent: Option<Box<dyn Error>>) -> PrefReaderError17     fn new(message: String, position: Position, parent: Option<Box<dyn Error>>) -> PrefReaderError {
18         PrefReaderError {
19             message,
20             position,
21             parent,
22         }
23     }
24 }
25 
26 impl fmt::Display for PrefReaderError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result27     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
28         write!(
29             f,
30             "{} at line {}, column {}",
31             self.message, self.position.line, self.position.column
32         )
33     }
34 }
35 
36 impl Error for PrefReaderError {
description(&self) -> &str37     fn description(&self) -> &str {
38         &self.message
39     }
40 
cause(&self) -> Option<&dyn Error>41     fn cause(&self) -> Option<&dyn Error> {
42         self.parent.as_deref()
43     }
44 }
45 
46 impl From<io::Error> for PrefReaderError {
from(err: io::Error) -> PrefReaderError47     fn from(err: io::Error) -> PrefReaderError {
48         PrefReaderError::new("IOError".into(), Position::new(), Some(err.into()))
49     }
50 }
51 
52 #[derive(Copy, Clone, Debug, PartialEq)]
53 enum TokenizerState {
54     Junk,
55     CommentStart,
56     CommentLine,
57     CommentBlock,
58     FunctionName,
59     AfterFunctionName,
60     FunctionArgs,
61     FunctionArg,
62     DoubleQuotedString,
63     SingleQuotedString,
64     Number,
65     Bool,
66     AfterFunctionArg,
67     AfterFunction,
68     Error,
69 }
70 
71 #[derive(Copy, Clone, Debug, Default, PartialEq)]
72 pub struct Position {
73     line: u32,
74     column: u32,
75 }
76 
77 impl Position {
new() -> Position78     pub fn new() -> Position {
79         Position { line: 1, column: 0 }
80     }
81 }
82 
83 #[derive(Copy, Clone, Debug, PartialEq)]
84 pub enum TokenType {
85     None,
86     PrefFunction,
87     UserPrefFunction,
88     StickyPrefFunction,
89     CommentBlock,
90     CommentLine,
91     CommentBashLine,
92     Paren,
93     Semicolon,
94     Comma,
95     String,
96     Int,
97     Bool,
98     Error,
99 }
100 
101 #[derive(Debug, PartialEq)]
102 pub enum PrefToken<'a> {
103     PrefFunction(Position),
104     UserPrefFunction(Position),
105     StickyPrefFunction(Position),
106     CommentBlock(Cow<'a, str>, Position),
107     CommentLine(Cow<'a, str>, Position),
108     CommentBashLine(Cow<'a, str>, Position),
109     Paren(char, Position),
110     Semicolon(Position),
111     Comma(Position),
112     String(Cow<'a, str>, Position),
113     Int(i64, Position),
114     Bool(bool, Position),
115     Error(String, Position),
116 }
117 
118 impl<'a> PrefToken<'a> {
position(&self) -> Position119     fn position(&self) -> Position {
120         match *self {
121             PrefToken::PrefFunction(position) => position,
122             PrefToken::UserPrefFunction(position) => position,
123             PrefToken::StickyPrefFunction(position) => position,
124             PrefToken::CommentBlock(_, position) => position,
125             PrefToken::CommentLine(_, position) => position,
126             PrefToken::CommentBashLine(_, position) => position,
127             PrefToken::Paren(_, position) => position,
128             PrefToken::Semicolon(position) => position,
129             PrefToken::Comma(position) => position,
130             PrefToken::String(_, position) => position,
131             PrefToken::Int(_, position) => position,
132             PrefToken::Bool(_, position) => position,
133             PrefToken::Error(_, position) => position,
134         }
135     }
136 }
137 
138 #[derive(Debug)]
139 pub struct PrefReaderError {
140     message: String,
141     position: Position,
142     parent: Option<Box<dyn Error>>,
143 }
144 
145 struct TokenData<'a> {
146     token_type: TokenType,
147     complete: bool,
148     position: Position,
149     data: Cow<'a, str>,
150     start_pos: usize,
151 }
152 
153 impl<'a> TokenData<'a> {
new(token_type: TokenType, position: Position, start_pos: usize) -> TokenData<'a>154     fn new(token_type: TokenType, position: Position, start_pos: usize) -> TokenData<'a> {
155         TokenData {
156             token_type,
157             complete: false,
158             position,
159             data: Cow::Borrowed(""),
160             start_pos,
161         }
162     }
163 
start(&mut self, tokenizer: &PrefTokenizer, token_type: TokenType)164     fn start(&mut self, tokenizer: &PrefTokenizer, token_type: TokenType) {
165         self.token_type = token_type;
166         self.position = tokenizer.position;
167         self.start_pos = tokenizer.pos;
168     }
169 
end(&mut self, buf: &'a [u8], end_pos: usize) -> Result<(), PrefReaderError>170     fn end(&mut self, buf: &'a [u8], end_pos: usize) -> Result<(), PrefReaderError> {
171         self.complete = true;
172         self.add_slice_to_token(buf, end_pos)
173     }
174 
add_slice_to_token(&mut self, buf: &'a [u8], end_pos: usize) -> Result<(), PrefReaderError>175     fn add_slice_to_token(&mut self, buf: &'a [u8], end_pos: usize) -> Result<(), PrefReaderError> {
176         let data = match str::from_utf8(&buf[self.start_pos..end_pos]) {
177             Ok(x) => x,
178             Err(_) => {
179                 return Err(PrefReaderError::new(
180                     "Could not convert string to utf8".into(),
181                     self.position,
182                     None,
183                 ));
184             }
185         };
186         if self.data != "" {
187             self.data.to_mut().push_str(data)
188         } else {
189             self.data = Cow::Borrowed(data)
190         };
191         Ok(())
192     }
193 
push_char(&mut self, tokenizer: &PrefTokenizer, data: char)194     fn push_char(&mut self, tokenizer: &PrefTokenizer, data: char) {
195         self.data.to_mut().push(data);
196         self.start_pos = tokenizer.pos + 1;
197     }
198 }
199 
200 pub struct PrefTokenizer<'a> {
201     data: &'a [u8],
202     pos: usize,
203     cur: Option<char>,
204     position: Position,
205     state: TokenizerState,
206     next_state: Option<TokenizerState>,
207 }
208 
209 impl<'a> PrefTokenizer<'a> {
new(data: &'a [u8]) -> PrefTokenizer<'a>210     pub fn new(data: &'a [u8]) -> PrefTokenizer<'a> {
211         PrefTokenizer {
212             data,
213             pos: 0,
214             cur: None,
215             position: Position::new(),
216             state: TokenizerState::Junk,
217             next_state: Some(TokenizerState::FunctionName),
218         }
219     }
220 
make_token(&mut self, token_data: TokenData<'a>) -> PrefToken<'a>221     fn make_token(&mut self, token_data: TokenData<'a>) -> PrefToken<'a> {
222         let buf = token_data.data;
223         let position = token_data.position;
224         // Note: the panic! here are for cases where the invalid input is regarded as
225         // a bug in the caller. In cases where `make_token` can legitimately be called
226         // with invalid data we must instead return a PrefToken::Error
227         match token_data.token_type {
228             TokenType::None => panic!("Got a token without a type"),
229             TokenType::PrefFunction => PrefToken::PrefFunction(position),
230             TokenType::UserPrefFunction => PrefToken::UserPrefFunction(position),
231             TokenType::StickyPrefFunction => PrefToken::StickyPrefFunction(position),
232             TokenType::CommentBlock => PrefToken::CommentBlock(buf, position),
233             TokenType::CommentLine => PrefToken::CommentLine(buf, position),
234             TokenType::CommentBashLine => PrefToken::CommentBashLine(buf, position),
235             TokenType::Paren => {
236                 if buf.len() != 1 {
237                     panic!("Expected a buffer of length one");
238                 }
239                 PrefToken::Paren(buf.chars().next().unwrap(), position)
240             }
241             TokenType::Semicolon => PrefToken::Semicolon(position),
242             TokenType::Comma => PrefToken::Comma(position),
243             TokenType::String => PrefToken::String(buf, position),
244             TokenType::Int => {
245                 return match buf.parse::<i64>() {
246                     Ok(value) => PrefToken::Int(value, position),
247                     Err(_) => PrefToken::Error(format!("Expected integer, got {}", buf), position),
248                 }
249             }
250             TokenType::Bool => {
251                 let value = match buf.borrow() {
252                     "true" => true,
253                     "false" => false,
254                     x => panic!("Boolean wasn't 'true' or 'false' (was {})", x),
255                 };
256                 PrefToken::Bool(value, position)
257             }
258             TokenType::Error => panic!("make_token can't construct errors"),
259         }
260     }
261 
get_char(&mut self) -> Option<char>262     fn get_char(&mut self) -> Option<char> {
263         if self.pos + 1 >= self.data.len() {
264             self.cur = None;
265             return None;
266         };
267         if self.cur.is_some() {
268             self.pos += 1;
269         }
270         let c = self.data[self.pos] as char;
271         if self.cur == Some('\n') {
272             self.position.line += 1;
273             self.position.column = 0;
274         } else if self.cur.is_some() {
275             self.position.column += 1;
276         };
277         self.cur = Some(c);
278         self.cur
279     }
280 
unget_char(&mut self) -> Option<char>281     fn unget_char(&mut self) -> Option<char> {
282         if self.pos == 0 {
283             self.position.column = 0;
284             self.cur = None
285         } else {
286             self.pos -= 1;
287             let c = self.data[self.pos] as char;
288             if c == '\n' {
289                 self.position.line -= 1;
290                 let mut col_pos = self.pos;
291                 while col_pos > 0 {
292                     col_pos -= 1;
293                     if self.data[col_pos] as char == '\n' {
294                         break;
295                     }
296                 }
297                 self.position.column = (self.pos - col_pos as usize) as u32;
298             } else {
299                 self.position.column -= 1;
300             }
301             self.cur = Some(c);
302         }
303         self.cur
304     }
305 
is_space(c: char) -> bool306     fn is_space(c: char) -> bool {
307         matches!(c, ' ' | '\t' | '\r' | '\n')
308     }
309 
skip_whitespace(&mut self) -> Option<char>310     fn skip_whitespace(&mut self) -> Option<char> {
311         while let Some(c) = self.cur {
312             if PrefTokenizer::is_space(c) {
313                 self.get_char();
314             } else {
315                 break;
316             };
317         }
318         self.cur
319     }
320 
consume_escape(&mut self, token_data: &mut TokenData<'a>) -> Result<(), PrefReaderError>321     fn consume_escape(&mut self, token_data: &mut TokenData<'a>) -> Result<(), PrefReaderError> {
322         let pos = self.pos;
323         let escaped = self.read_escape()?;
324         if let Some(escape_char) = escaped {
325             token_data.add_slice_to_token(self.data, pos)?;
326             token_data.push_char(self, escape_char);
327         };
328         Ok(())
329     }
330 
read_escape(&mut self) -> Result<Option<char>, PrefReaderError>331     fn read_escape(&mut self) -> Result<Option<char>, PrefReaderError> {
332         let escape_char = match self.get_char() {
333             Some('u') => self.read_hex_escape(4, true)?,
334             Some('x') => self.read_hex_escape(2, true)?,
335             Some('\\') => '\\' as u32,
336             Some('"') => '"' as u32,
337             Some('\'') => '\'' as u32,
338             Some('r') => '\r' as u32,
339             Some('n') => '\n' as u32,
340             Some(_) => return Ok(None),
341             None => {
342                 return Err(PrefReaderError::new(
343                     "EOF in character escape".into(),
344                     self.position,
345                     None,
346                 ))
347             }
348         };
349         Ok(Some(char::from_u32(escape_char).ok_or_else(|| {
350             PrefReaderError::new(
351                 "Invalid codepoint decoded from escape".into(),
352                 self.position,
353                 None,
354             )
355         })?))
356     }
357 
read_hex_escape(&mut self, hex_chars: isize, first: bool) -> Result<u32, PrefReaderError>358     fn read_hex_escape(&mut self, hex_chars: isize, first: bool) -> Result<u32, PrefReaderError> {
359         let mut value = 0;
360         for _ in 0..hex_chars {
361             match self.get_char() {
362                 Some(x) => {
363                     value <<= 4;
364                     match x {
365                         '0'..='9' => value += x as u32 - '0' as u32,
366                         'a'..='f' => value += x as u32 - 'a' as u32,
367                         'A'..='F' => value += x as u32 - 'A' as u32,
368                         _ => {
369                             return Err(PrefReaderError::new(
370                                 "Unexpected character in escape".into(),
371                                 self.position,
372                                 None,
373                             ))
374                         }
375                     }
376                 }
377                 None => {
378                     return Err(PrefReaderError::new(
379                         "Unexpected EOF in escape".into(),
380                         self.position,
381                         None,
382                     ))
383                 }
384             }
385         }
386         if first && (0xD800..=0xDBFF).contains(&value) {
387             // First part of a surrogate pair
388             if self.get_char() != Some('\\') || self.get_char() != Some('u') {
389                 return Err(PrefReaderError::new(
390                     "Lone high surrogate in surrogate pair".into(),
391                     self.position,
392                     None,
393                 ));
394             }
395             self.unget_char();
396             let high_surrogate = value;
397             let low_surrogate = self.read_hex_escape(4, false)?;
398             let high_value = (high_surrogate - 0xD800) << 10;
399             let low_value = low_surrogate - 0xDC00;
400             value = high_value + low_value + 0x10000;
401         } else if first && (0xDC00..=0xDFFF).contains(&value) {
402             return Err(PrefReaderError::new(
403                 "Lone low surrogate".into(),
404                 self.position,
405                 None,
406             ));
407         } else if !first && !(0xDC00..=0xDFFF).contains(&value) {
408             return Err(PrefReaderError::new(
409                 "Invalid low surrogate in surrogate pair".into(),
410                 self.position,
411                 None,
412             ));
413         }
414         Ok(value)
415     }
416 
get_match(&mut self, target: &str, separators: &str) -> bool417     fn get_match(&mut self, target: &str, separators: &str) -> bool {
418         let initial_pos = self.pos;
419         let mut matched = true;
420         for c in target.chars() {
421             if self.cur == Some(c) {
422                 self.get_char();
423             } else {
424                 matched = false;
425                 break;
426             }
427         }
428 
429         if !matched {
430             for _ in 0..(self.pos - initial_pos) {
431                 self.unget_char();
432             }
433         } else {
434             // Check that the next character is whitespace or a separator
435             if let Some(c) = self.cur {
436                 if !(PrefTokenizer::is_space(c) || separators.contains(c) || c == '/') {
437                     matched = false;
438                 }
439                 self.unget_char();
440             }
441             // Otherwise the token was followed by EOF. That's a valid match, but
442             // will presumably cause a parse error later.
443         }
444 
445         matched
446     }
447 
next_token(&mut self) -> Result<Option<TokenData<'a>>, PrefReaderError>448     fn next_token(&mut self) -> Result<Option<TokenData<'a>>, PrefReaderError> {
449         let mut token_data = TokenData::new(TokenType::None, Position::new(), 0);
450 
451         loop {
452             let mut c = match self.get_char() {
453                 Some(x) => x,
454                 None => return Ok(None),
455             };
456 
457             self.state = match self.state {
458                 TokenizerState::Junk => {
459                     c = match self.skip_whitespace() {
460                         Some(x) => x,
461                         None => return Ok(None),
462                     };
463                     match c {
464                         '/' => TokenizerState::CommentStart,
465                         '#' => {
466                             token_data.start(self, TokenType::CommentBashLine);
467                             token_data.start_pos = self.pos + 1;
468                             TokenizerState::CommentLine
469                         }
470                         _ => {
471                             self.unget_char();
472                             let next = match self.next_state {
473                                 Some(x) => x,
474                                 None => {
475                                     return Err(PrefReaderError::new(
476                                         "In Junk state without a next state defined".into(),
477                                         self.position,
478                                         None,
479                                     ))
480                                 }
481                             };
482                             self.next_state = None;
483                             next
484                         }
485                     }
486                 }
487                 TokenizerState::CommentStart => match c {
488                     '*' => {
489                         token_data.start(self, TokenType::CommentBlock);
490                         token_data.start_pos = self.pos + 1;
491                         TokenizerState::CommentBlock
492                     }
493                     '/' => {
494                         token_data.start(self, TokenType::CommentLine);
495                         token_data.start_pos = self.pos + 1;
496                         TokenizerState::CommentLine
497                     }
498                     _ => {
499                         return Err(PrefReaderError::new(
500                             "Invalid character after /".into(),
501                             self.position,
502                             None,
503                         ))
504                     }
505                 },
506                 TokenizerState::CommentLine => match c {
507                     '\n' => {
508                         token_data.end(self.data, self.pos)?;
509                         TokenizerState::Junk
510                     }
511                     _ => TokenizerState::CommentLine,
512                 },
513                 TokenizerState::CommentBlock => match c {
514                     '*' => {
515                         if self.get_char() == Some('/') {
516                             token_data.end(self.data, self.pos - 1)?;
517                             TokenizerState::Junk
518                         } else {
519                             TokenizerState::CommentBlock
520                         }
521                     }
522                     _ => TokenizerState::CommentBlock,
523                 },
524                 TokenizerState::FunctionName => {
525                     let position = self.position;
526                     let start_pos = self.pos;
527                     match c {
528                         'u' => {
529                             if self.get_match("user_pref", "(") {
530                                 token_data.start(self, TokenType::UserPrefFunction);
531                             }
532                         }
533                         's' => {
534                             if self.get_match("sticky_pref", "(") {
535                                 token_data.start(self, TokenType::StickyPrefFunction);
536                             }
537                         }
538                         'p' => {
539                             if self.get_match("pref", "(") {
540                                 token_data.start(self, TokenType::PrefFunction);
541                             }
542                         }
543                         _ => {}
544                     };
545                     if token_data.token_type == TokenType::None {
546                         // We didn't match anything
547                         return Err(PrefReaderError::new(
548                             "Expected a pref function name".into(),
549                             position,
550                             None,
551                         ));
552                     } else {
553                         token_data.start_pos = start_pos;
554                         token_data.position = position;
555                         token_data.end(self.data, self.pos + 1)?;
556                         self.next_state = Some(TokenizerState::AfterFunctionName);
557                         TokenizerState::Junk
558                     }
559                 }
560                 TokenizerState::AfterFunctionName => match c {
561                     '(' => {
562                         self.next_state = Some(TokenizerState::FunctionArgs);
563                         token_data.start(self, TokenType::Paren);
564                         token_data.end(self.data, self.pos + 1)?;
565                         self.next_state = Some(TokenizerState::FunctionArgs);
566                         TokenizerState::Junk
567                     }
568                     _ => {
569                         return Err(PrefReaderError::new(
570                             "Expected an opening paren".into(),
571                             self.position,
572                             None,
573                         ))
574                     }
575                 },
576                 TokenizerState::FunctionArgs => match c {
577                     ')' => {
578                         token_data.start(self, TokenType::Paren);
579                         token_data.end(self.data, self.pos + 1)?;
580                         self.next_state = Some(TokenizerState::AfterFunction);
581                         TokenizerState::Junk
582                     }
583                     _ => {
584                         self.unget_char();
585                         TokenizerState::FunctionArg
586                     }
587                 },
588                 TokenizerState::FunctionArg => match c {
589                     '"' => {
590                         token_data.start(self, TokenType::String);
591                         token_data.start_pos = self.pos + 1;
592                         TokenizerState::DoubleQuotedString
593                     }
594                     '\'' => {
595                         token_data.start(self, TokenType::String);
596                         token_data.start_pos = self.pos + 1;
597                         TokenizerState::SingleQuotedString
598                     }
599                     't' | 'f' => {
600                         self.unget_char();
601                         TokenizerState::Bool
602                     }
603                     '0'..='9' | '-' | '+' => {
604                         token_data.start(self, TokenType::Int);
605                         TokenizerState::Number
606                     }
607                     _ => {
608                         return Err(PrefReaderError::new(
609                             "Invalid character at start of function argument".into(),
610                             self.position,
611                             None,
612                         ))
613                     }
614                 },
615                 TokenizerState::DoubleQuotedString => match c {
616                     '"' => {
617                         token_data.end(self.data, self.pos)?;
618                         self.next_state = Some(TokenizerState::AfterFunctionArg);
619                         TokenizerState::Junk
620                     }
621                     '\n' => {
622                         return Err(PrefReaderError::new(
623                             "EOL in double quoted string".into(),
624                             self.position,
625                             None,
626                         ))
627                     }
628                     '\\' => {
629                         self.consume_escape(&mut token_data)?;
630                         TokenizerState::DoubleQuotedString
631                     }
632                     _ => TokenizerState::DoubleQuotedString,
633                 },
634                 TokenizerState::SingleQuotedString => match c {
635                     '\'' => {
636                         token_data.end(self.data, self.pos)?;
637                         self.next_state = Some(TokenizerState::AfterFunctionArg);
638                         TokenizerState::Junk
639                     }
640                     '\n' => {
641                         return Err(PrefReaderError::new(
642                             "EOL in single quoted string".into(),
643                             self.position,
644                             None,
645                         ))
646                     }
647                     '\\' => {
648                         self.consume_escape(&mut token_data)?;
649                         TokenizerState::SingleQuotedString
650                     }
651                     _ => TokenizerState::SingleQuotedString,
652                 },
653                 TokenizerState::Number => match c {
654                     '0'..='9' => TokenizerState::Number,
655                     ')' | ',' => {
656                         token_data.end(self.data, self.pos)?;
657                         self.unget_char();
658                         self.next_state = Some(TokenizerState::AfterFunctionArg);
659                         TokenizerState::Junk
660                     }
661                     x if PrefTokenizer::is_space(x) => {
662                         token_data.end(self.data, self.pos)?;
663                         self.next_state = Some(TokenizerState::AfterFunctionArg);
664                         TokenizerState::Junk
665                     }
666                     _ => {
667                         return Err(PrefReaderError::new(
668                             "Invalid character in number literal".into(),
669                             self.position,
670                             None,
671                         ))
672                     }
673                 },
674                 TokenizerState::Bool => {
675                     let start_pos = self.pos;
676                     let position = self.position;
677                     match c {
678                         't' => {
679                             if self.get_match("true", ",)") {
680                                 token_data.start(self, TokenType::Bool)
681                             }
682                         }
683                         'f' => {
684                             if self.get_match("false", ",)") {
685                                 token_data.start(self, TokenType::Bool)
686                             }
687                         }
688                         _ => {}
689                     };
690                     if token_data.token_type == TokenType::None {
691                         return Err(PrefReaderError::new(
692                             "Unexpected characters in function argument".into(),
693                             position,
694                             None,
695                         ));
696                     } else {
697                         token_data.start_pos = start_pos;
698                         token_data.position = position;
699                         token_data.end(self.data, self.pos + 1)?;
700                         self.next_state = Some(TokenizerState::AfterFunctionArg);
701                         TokenizerState::Junk
702                     }
703                 }
704                 TokenizerState::AfterFunctionArg => match c {
705                     ',' => {
706                         token_data.start(self, TokenType::Comma);
707                         token_data.end(self.data, self.pos + 1)?;
708                         self.next_state = Some(TokenizerState::FunctionArg);
709                         TokenizerState::Junk
710                     }
711                     ')' => {
712                         token_data.start(self, TokenType::Paren);
713                         token_data.end(self.data, self.pos + 1)?;
714                         self.next_state = Some(TokenizerState::AfterFunction);
715                         TokenizerState::Junk
716                     }
717                     _ => {
718                         return Err(PrefReaderError::new(
719                             "Unexpected character after function argument".into(),
720                             self.position,
721                             None,
722                         ))
723                     }
724                 },
725                 TokenizerState::AfterFunction => match c {
726                     ';' => {
727                         token_data.start(self, TokenType::Semicolon);
728                         token_data.end(self.data, self.pos)?;
729                         self.next_state = Some(TokenizerState::FunctionName);
730                         TokenizerState::Junk
731                     }
732                     _ => {
733                         return Err(PrefReaderError::new(
734                             "Unexpected character after function".into(),
735                             self.position,
736                             None,
737                         ))
738                     }
739                 },
740                 TokenizerState::Error => TokenizerState::Error,
741             };
742             if token_data.complete {
743                 return Ok(Some(token_data));
744             }
745         }
746     }
747 }
748 
749 impl<'a> Iterator for PrefTokenizer<'a> {
750     type Item = PrefToken<'a>;
751 
next(&mut self) -> Option<PrefToken<'a>>752     fn next(&mut self) -> Option<PrefToken<'a>> {
753         if let TokenizerState::Error = self.state {
754             return None;
755         }
756         let token_data = match self.next_token() {
757             Err(e) => {
758                 self.state = TokenizerState::Error;
759                 return Some(PrefToken::Error(e.message.clone(), e.position));
760             }
761             Ok(Some(token_data)) => token_data,
762             Ok(None) => return None,
763         };
764         let token = self.make_token(token_data);
765         Some(token)
766     }
767 }
768 
tokenize(data: &[u8]) -> PrefTokenizer769 pub fn tokenize(data: &[u8]) -> PrefTokenizer {
770     PrefTokenizer::new(data)
771 }
772 
serialize_token<T: Write>(token: &PrefToken, output: &mut T) -> Result<(), PrefReaderError>773 pub fn serialize_token<T: Write>(token: &PrefToken, output: &mut T) -> Result<(), PrefReaderError> {
774     let mut data_buf = String::new();
775 
776     let data = match *token {
777         PrefToken::PrefFunction(_) => "pref",
778         PrefToken::UserPrefFunction(_) => "user_pref",
779         PrefToken::StickyPrefFunction(_) => "sticky_pref",
780         PrefToken::CommentBlock(ref data, _) => {
781             data_buf.reserve(data.len() + 4);
782             data_buf.push_str("/*");
783             data_buf.push_str(data.borrow());
784             data_buf.push('*');
785             &*data_buf
786         }
787         PrefToken::CommentLine(ref data, _) => {
788             data_buf.reserve(data.len() + 2);
789             data_buf.push_str("//");
790             data_buf.push_str(data.borrow());
791             &*data_buf
792         }
793         PrefToken::CommentBashLine(ref data, _) => {
794             data_buf.reserve(data.len() + 1);
795             data_buf.push('#');
796             data_buf.push_str(data.borrow());
797             &*data_buf
798         }
799         PrefToken::Paren(data, _) => {
800             data_buf.push(data);
801             &*data_buf
802         }
803         PrefToken::Comma(_) => ",",
804         PrefToken::Semicolon(_) => ";\n",
805         PrefToken::String(ref data, _) => {
806             data_buf.reserve(data.len() + 2);
807             data_buf.push('"');
808             data_buf.push_str(escape_quote(data.borrow()).borrow());
809             data_buf.push('"');
810             &*data_buf
811         }
812         PrefToken::Int(data, _) => {
813             data_buf.push_str(&*data.to_string());
814             &*data_buf
815         }
816         PrefToken::Bool(data, _) => {
817             if data {
818                 "true"
819             } else {
820                 "false"
821             }
822         }
823         PrefToken::Error(ref data, pos) => {
824             return Err(PrefReaderError::new(data.clone(), pos, None))
825         }
826     };
827     output.write_all(data.as_bytes())?;
828     Ok(())
829 }
830 
serialize_tokens<'a, I, W>(tokens: I, output: &mut W) -> Result<(), PrefReaderError> where I: Iterator<Item = &'a PrefToken<'a>>, W: Write,831 pub fn serialize_tokens<'a, I, W>(tokens: I, output: &mut W) -> Result<(), PrefReaderError>
832 where
833     I: Iterator<Item = &'a PrefToken<'a>>,
834     W: Write,
835 {
836     for token in tokens {
837         serialize_token(token, output)?;
838     }
839     Ok(())
840 }
841 
escape_quote(data: &str) -> Cow<str>842 fn escape_quote(data: &str) -> Cow<str> {
843     // Not very efficient…
844     if data.contains('"') || data.contains('\\') {
845         Cow::Owned(data.replace(r#"\"#, r#"\\"#).replace(r#"""#, r#"\""#))
846     } else {
847         Cow::Borrowed(data)
848     }
849 }
850 
851 #[derive(Debug, PartialEq)]
852 enum ParserState {
853     Function,
854     Key,
855     Value,
856 }
857 
858 struct PrefBuilder {
859     key: Option<String>,
860     value: Option<PrefValue>,
861     sticky: bool,
862 }
863 
864 impl PrefBuilder {
new() -> PrefBuilder865     fn new() -> PrefBuilder {
866         PrefBuilder {
867             key: None,
868             value: None,
869             sticky: false,
870         }
871     }
872 }
873 
skip_comments<'a>(tokenizer: &mut PrefTokenizer<'a>) -> Option<PrefToken<'a>>874 fn skip_comments<'a>(tokenizer: &mut PrefTokenizer<'a>) -> Option<PrefToken<'a>> {
875     loop {
876         match tokenizer.next() {
877             Some(PrefToken::CommentBashLine(_, _))
878             | Some(PrefToken::CommentBlock(_, _))
879             | Some(PrefToken::CommentLine(_, _)) => {}
880             Some(x) => return Some(x),
881             None => return None,
882         }
883     }
884 }
885 
parse_tokens(tokenizer: &mut PrefTokenizer<'_>) -> Result<Preferences, PrefReaderError>886 pub fn parse_tokens(tokenizer: &mut PrefTokenizer<'_>) -> Result<Preferences, PrefReaderError> {
887     let mut state = ParserState::Function;
888     let mut current_pref = PrefBuilder::new();
889     let mut rv = Preferences::new();
890 
891     loop {
892         // Not just using a for loop here seems strange, but this restricts the
893         // scope of the borrow
894         let token = {
895             match tokenizer.next() {
896                 Some(x) => x,
897                 None => break,
898             }
899         };
900         // First deal with comments and errors
901         match token {
902             PrefToken::Error(msg, position) => {
903                 return Err(PrefReaderError::new(msg, position, None));
904             }
905             PrefToken::CommentBashLine(_, _)
906             | PrefToken::CommentLine(_, _)
907             | PrefToken::CommentBlock(_, _) => continue,
908             _ => {}
909         }
910         state = match state {
911             ParserState::Function => {
912                 match token {
913                     PrefToken::PrefFunction(_) => {
914                         current_pref.sticky = false;
915                     }
916                     PrefToken::UserPrefFunction(_) => {
917                         current_pref.sticky = false;
918                     }
919                     PrefToken::StickyPrefFunction(_) => {
920                         current_pref.sticky = true;
921                     }
922                     _ => {
923                         return Err(PrefReaderError::new(
924                             "Expected pref function".into(),
925                             token.position(),
926                             None,
927                         ));
928                     }
929                 }
930                 let next = skip_comments(tokenizer);
931                 match next {
932                     Some(PrefToken::Paren('(', _)) => ParserState::Key,
933                     _ => {
934                         return Err(PrefReaderError::new(
935                             "Expected open paren".into(),
936                             next.map(|x| x.position()).unwrap_or(tokenizer.position),
937                             None,
938                         ))
939                     }
940                 }
941             }
942             ParserState::Key => {
943                 match token {
944                     PrefToken::String(data, _) => current_pref.key = Some(data.into_owned()),
945                     _ => {
946                         return Err(PrefReaderError::new(
947                             "Expected string".into(),
948                             token.position(),
949                             None,
950                         ));
951                     }
952                 }
953                 let next = skip_comments(tokenizer);
954                 match next {
955                     Some(PrefToken::Comma(_)) => ParserState::Value,
956                     _ => {
957                         return Err(PrefReaderError::new(
958                             "Expected comma".into(),
959                             next.map(|x| x.position()).unwrap_or(tokenizer.position),
960                             None,
961                         ))
962                     }
963                 }
964             }
965             ParserState::Value => {
966                 match token {
967                     PrefToken::String(data, _) => {
968                         current_pref.value = Some(PrefValue::String(data.into_owned()))
969                     }
970                     PrefToken::Int(data, _) => current_pref.value = Some(PrefValue::Int(data)),
971                     PrefToken::Bool(data, _) => current_pref.value = Some(PrefValue::Bool(data)),
972                     _ => {
973                         return Err(PrefReaderError::new(
974                             "Expected value".into(),
975                             token.position(),
976                             None,
977                         ))
978                     }
979                 }
980                 let next = skip_comments(tokenizer);
981                 match next {
982                     Some(PrefToken::Paren(')', _)) => {}
983                     _ => {
984                         return Err(PrefReaderError::new(
985                             "Expected close paren".into(),
986                             next.map(|x| x.position()).unwrap_or(tokenizer.position),
987                             None,
988                         ))
989                     }
990                 }
991                 let next = skip_comments(tokenizer);
992                 match next {
993                     Some(PrefToken::Semicolon(_)) | None => {}
994                     _ => {
995                         return Err(PrefReaderError::new(
996                             "Expected semicolon".into(),
997                             next.map(|x| x.position()).unwrap_or(tokenizer.position),
998                             None,
999                         ))
1000                     }
1001                 }
1002                 let key = mem::replace(&mut current_pref.key, None);
1003                 let value = mem::replace(&mut current_pref.value, None);
1004                 let pref = if current_pref.sticky {
1005                     Pref::new_sticky(value.unwrap())
1006                 } else {
1007                     Pref::new(value.unwrap())
1008                 };
1009                 rv.insert(key.unwrap(), pref);
1010                 current_pref.sticky = false;
1011                 ParserState::Function
1012             }
1013         }
1014     }
1015     match state {
1016         ParserState::Key | ParserState::Value => {
1017             return Err(PrefReaderError::new(
1018                 "EOF in middle of function".into(),
1019                 tokenizer.position,
1020                 None,
1021             ));
1022         }
1023         _ => {}
1024     }
1025     Ok(rv)
1026 }
1027 
serialize<W: Write>(prefs: &Preferences, output: &mut W) -> io::Result<()>1028 pub fn serialize<W: Write>(prefs: &Preferences, output: &mut W) -> io::Result<()> {
1029     let mut p: Vec<_> = prefs.iter().collect();
1030     p.sort_by(|a, b| a.0.cmp(b.0));
1031     for &(key, pref) in &p {
1032         let func = if pref.sticky {
1033             "sticky_pref("
1034         } else {
1035             "user_pref("
1036         }
1037         .as_bytes();
1038         output.write_all(func)?;
1039         output.write_all(b"\"")?;
1040         output.write_all(escape_quote(key).as_bytes())?;
1041         output.write_all(b"\"")?;
1042         output.write_all(b", ")?;
1043         match pref.value {
1044             PrefValue::Bool(x) => {
1045                 output.write_all(if x { b"true" } else { b"false" })?;
1046             }
1047             PrefValue::Int(x) => {
1048                 output.write_all(x.to_string().as_bytes())?;
1049             }
1050             PrefValue::String(ref x) => {
1051                 output.write_all(b"\"")?;
1052                 output.write_all(escape_quote(x).as_bytes())?;
1053                 output.write_all(b"\"")?;
1054             }
1055         };
1056         output.write_all(b");\n")?;
1057     }
1058     Ok(())
1059 }
1060 
parse(data: &[u8]) -> Result<Preferences, PrefReaderError>1061 pub fn parse(data: &[u8]) -> Result<Preferences, PrefReaderError> {
1062     let mut tokenizer = tokenize(data);
1063     parse_tokens(&mut tokenizer)
1064 }
1065