1 use std::cmp;
2 use std::error;
3 use std::fmt;
4 use std::result;
5 
6 use crate::ast;
7 use crate::hir;
8 
9 /// A type alias for dealing with errors returned by this crate.
10 pub type Result<T> = result::Result<T, Error>;
11 
12 /// This error type encompasses any error that can be returned by this crate.
13 #[derive(Clone, Debug, Eq, PartialEq)]
14 pub enum Error {
15     /// An error that occurred while translating concrete syntax into abstract
16     /// syntax (AST).
17     Parse(ast::Error),
18     /// An error that occurred while translating abstract syntax into a high
19     /// level intermediate representation (HIR).
20     Translate(hir::Error),
21     /// Hints that destructuring should not be exhaustive.
22     ///
23     /// This enum may grow additional variants, so this makes sure clients
24     /// don't count on exhaustive matching. (Otherwise, adding a new variant
25     /// could break existing code.)
26     #[doc(hidden)]
27     __Nonexhaustive,
28 }
29 
30 impl From<ast::Error> for Error {
from(err: ast::Error) -> Error31     fn from(err: ast::Error) -> Error {
32         Error::Parse(err)
33     }
34 }
35 
36 impl From<hir::Error> for Error {
from(err: hir::Error) -> Error37     fn from(err: hir::Error) -> Error {
38         Error::Translate(err)
39     }
40 }
41 
42 impl error::Error for Error {
43     // TODO: Remove this method entirely on the next breaking semver release.
44     #[allow(deprecated)]
description(&self) -> &str45     fn description(&self) -> &str {
46         match *self {
47             Error::Parse(ref x) => x.description(),
48             Error::Translate(ref x) => x.description(),
49             _ => unreachable!(),
50         }
51     }
52 }
53 
54 impl fmt::Display for Error {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result55     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
56         match *self {
57             Error::Parse(ref x) => x.fmt(f),
58             Error::Translate(ref x) => x.fmt(f),
59             _ => unreachable!(),
60         }
61     }
62 }
63 
64 /// A helper type for formatting nice error messages.
65 ///
66 /// This type is responsible for reporting regex parse errors in a nice human
67 /// readable format. Most of its complexity is from interspersing notational
68 /// markers pointing out the position where an error occurred.
69 #[derive(Debug)]
70 pub struct Formatter<'e, E> {
71     /// The original regex pattern in which the error occurred.
72     pattern: &'e str,
73     /// The error kind. It must impl fmt::Display.
74     err: &'e E,
75     /// The primary span of the error.
76     span: &'e ast::Span,
77     /// An auxiliary and optional span, in case the error needs to point to
78     /// two locations (e.g., when reporting a duplicate capture group name).
79     aux_span: Option<&'e ast::Span>,
80 }
81 
82 impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
from(err: &'e ast::Error) -> Self83     fn from(err: &'e ast::Error) -> Self {
84         Formatter {
85             pattern: err.pattern(),
86             err: err.kind(),
87             span: err.span(),
88             aux_span: err.auxiliary_span(),
89         }
90     }
91 }
92 
93 impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
from(err: &'e hir::Error) -> Self94     fn from(err: &'e hir::Error) -> Self {
95         Formatter {
96             pattern: err.pattern(),
97             err: err.kind(),
98             span: err.span(),
99             aux_span: None,
100         }
101     }
102 }
103 
104 impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result105     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106         let spans = Spans::from_formatter(self);
107         if self.pattern.contains('\n') {
108             let divider = repeat_char('~', 79);
109 
110             writeln!(f, "regex parse error:")?;
111             writeln!(f, "{}", divider)?;
112             let notated = spans.notate();
113             write!(f, "{}", notated)?;
114             writeln!(f, "{}", divider)?;
115             // If we have error spans that cover multiple lines, then we just
116             // note the line numbers.
117             if !spans.multi_line.is_empty() {
118                 let mut notes = vec![];
119                 for span in &spans.multi_line {
120                     notes.push(format!(
121                         "on line {} (column {}) through line {} (column {})",
122                         span.start.line,
123                         span.start.column,
124                         span.end.line,
125                         span.end.column - 1
126                     ));
127                 }
128                 writeln!(f, "{}", notes.join("\n"))?;
129             }
130             write!(f, "error: {}", self.err)?;
131         } else {
132             writeln!(f, "regex parse error:")?;
133             let notated = Spans::from_formatter(self).notate();
134             write!(f, "{}", notated)?;
135             write!(f, "error: {}", self.err)?;
136         }
137         Ok(())
138     }
139 }
140 
141 /// This type represents an arbitrary number of error spans in a way that makes
142 /// it convenient to notate the regex pattern. ("Notate" means "point out
143 /// exactly where the error occurred in the regex pattern.")
144 ///
145 /// Technically, we can only ever have two spans given our current error
146 /// structure. However, after toiling with a specific algorithm for handling
147 /// two spans, it became obvious that an algorithm to handle an arbitrary
148 /// number of spans was actually much simpler.
149 struct Spans<'p> {
150     /// The original regex pattern string.
151     pattern: &'p str,
152     /// The total width that should be used for line numbers. The width is
153     /// used for left padding the line numbers for alignment.
154     ///
155     /// A value of `0` means line numbers should not be displayed. That is,
156     /// the pattern is itself only one line.
157     line_number_width: usize,
158     /// All error spans that occur on a single line. This sequence always has
159     /// length equivalent to the number of lines in `pattern`, where the index
160     /// of the sequence represents a line number, starting at `0`. The spans
161     /// in each line are sorted in ascending order.
162     by_line: Vec<Vec<ast::Span>>,
163     /// All error spans that occur over one or more lines. That is, the start
164     /// and end position of the span have different line numbers. The spans are
165     /// sorted in ascending order.
166     multi_line: Vec<ast::Span>,
167 }
168 
169 impl<'p> Spans<'p> {
170     /// Build a sequence of spans from a formatter.
from_formatter<'e, E: fmt::Display>( fmter: &'p Formatter<'e, E>, ) -> Spans<'p>171     fn from_formatter<'e, E: fmt::Display>(
172         fmter: &'p Formatter<'e, E>,
173     ) -> Spans<'p> {
174         let mut line_count = fmter.pattern.lines().count();
175         // If the pattern ends with a `\n` literal, then our line count is
176         // off by one, since a span can occur immediately after the last `\n`,
177         // which is consider to be an additional line.
178         if fmter.pattern.ends_with('\n') {
179             line_count += 1;
180         }
181         let line_number_width =
182             if line_count <= 1 { 0 } else { line_count.to_string().len() };
183         let mut spans = Spans {
184             pattern: &fmter.pattern,
185             line_number_width: line_number_width,
186             by_line: vec![vec![]; line_count],
187             multi_line: vec![],
188         };
189         spans.add(fmter.span.clone());
190         if let Some(span) = fmter.aux_span {
191             spans.add(span.clone());
192         }
193         spans
194     }
195 
196     /// Add the given span to this sequence, putting it in the right place.
add(&mut self, span: ast::Span)197     fn add(&mut self, span: ast::Span) {
198         // This is grossly inefficient since we sort after each add, but right
199         // now, we only ever add two spans at most.
200         if span.is_one_line() {
201             let i = span.start.line - 1; // because lines are 1-indexed
202             self.by_line[i].push(span);
203             self.by_line[i].sort();
204         } else {
205             self.multi_line.push(span);
206             self.multi_line.sort();
207         }
208     }
209 
210     /// Notate the pattern string with carents (`^`) pointing at each span
211     /// location. This only applies to spans that occur within a single line.
notate(&self) -> String212     fn notate(&self) -> String {
213         let mut notated = String::new();
214         for (i, line) in self.pattern.lines().enumerate() {
215             if self.line_number_width > 0 {
216                 notated.push_str(&self.left_pad_line_number(i + 1));
217                 notated.push_str(": ");
218             } else {
219                 notated.push_str("    ");
220             }
221             notated.push_str(line);
222             notated.push('\n');
223             if let Some(notes) = self.notate_line(i) {
224                 notated.push_str(&notes);
225                 notated.push('\n');
226             }
227         }
228         notated
229     }
230 
231     /// Return notes for the line indexed at `i` (zero-based). If there are no
232     /// spans for the given line, then `None` is returned. Otherwise, an
233     /// appropriately space padded string with correctly positioned `^` is
234     /// returned, accounting for line numbers.
notate_line(&self, i: usize) -> Option<String>235     fn notate_line(&self, i: usize) -> Option<String> {
236         let spans = &self.by_line[i];
237         if spans.is_empty() {
238             return None;
239         }
240         let mut notes = String::new();
241         for _ in 0..self.line_number_padding() {
242             notes.push(' ');
243         }
244         let mut pos = 0;
245         for span in spans {
246             for _ in pos..(span.start.column - 1) {
247                 notes.push(' ');
248                 pos += 1;
249             }
250             let note_len = span.end.column.saturating_sub(span.start.column);
251             for _ in 0..cmp::max(1, note_len) {
252                 notes.push('^');
253                 pos += 1;
254             }
255         }
256         Some(notes)
257     }
258 
259     /// Left pad the given line number with spaces such that it is aligned with
260     /// other line numbers.
left_pad_line_number(&self, n: usize) -> String261     fn left_pad_line_number(&self, n: usize) -> String {
262         let n = n.to_string();
263         let pad = self.line_number_width.checked_sub(n.len()).unwrap();
264         let mut result = repeat_char(' ', pad);
265         result.push_str(&n);
266         result
267     }
268 
269     /// Return the line number padding beginning at the start of each line of
270     /// the pattern.
271     ///
272     /// If the pattern is only one line, then this returns a fixed padding
273     /// for visual indentation.
line_number_padding(&self) -> usize274     fn line_number_padding(&self) -> usize {
275         if self.line_number_width == 0 {
276             4
277         } else {
278             2 + self.line_number_width
279         }
280     }
281 }
282 
repeat_char(c: char, count: usize) -> String283 fn repeat_char(c: char, count: usize) -> String {
284     ::std::iter::repeat(c).take(count).collect()
285 }
286 
287 #[cfg(test)]
288 mod tests {
289     use crate::ast::parse::Parser;
290 
assert_panic_message(pattern: &str, expected_msg: &str) -> ()291     fn assert_panic_message(pattern: &str, expected_msg: &str) -> () {
292         let result = Parser::new().parse(pattern);
293         match result {
294             Ok(_) => {
295                 panic!("regex should not have parsed");
296             }
297             Err(err) => {
298                 assert_eq!(err.to_string(), expected_msg.trim());
299             }
300         }
301     }
302 
303     // See: https://github.com/rust-lang/regex/issues/464
304     #[test]
regression_464()305     fn regression_464() {
306         let err = Parser::new().parse("a{\n").unwrap_err();
307         // This test checks that the error formatter doesn't panic.
308         assert!(!err.to_string().is_empty());
309     }
310 
311     // See: https://github.com/rust-lang/regex/issues/545
312     #[test]
repetition_quantifier_expects_a_valid_decimal()313     fn repetition_quantifier_expects_a_valid_decimal() {
314         assert_panic_message(
315             r"\\u{[^}]*}",
316             r#"
317 regex parse error:
318     \\u{[^}]*}
319         ^
320 error: repetition quantifier expects a valid decimal
321 "#,
322         );
323     }
324 }
325