1 //! Source file support for diagnostic reporting.
2 //!
3 //! The main trait defined in this module is the [`Files`] trait, which provides
4 //! provides the minimum amount of functionality required for printing [`Diagnostics`]
5 //! with the [`term::emit`] function.
6 //!
7 //! Simple implementations of this trait are implemented:
8 //!
9 //! - [`SimpleFile`]: For single-file use-cases
10 //! - [`SimpleFiles`]: For multi-file use-cases
11 //!
12 //! These data structures provide a pretty minimal API, however,
13 //! so end-users are encouraged to create their own implementations for their
14 //! own specific use-cases, such as an implementation that accesses the file
15 //! system directly (and caches the line start locations), or an implementation
16 //! using an incremental compilation library like [`salsa`].
17 //!
18 //! [`term::emit`]: crate::term::emit
19 //! [`Diagnostics`]: crate::diagnostic::Diagnostic
20 //! [`Files`]: Files
21 //! [`SimpleFile`]: SimpleFile
22 //! [`SimpleFiles`]: SimpleFiles
23 //!
24 //! [`salsa`]: https://crates.io/crates/salsa
25 
26 use std::ops::Range;
27 
28 /// An enum representing an error that happened while looking up a file or a piece of content in that file.
29 #[derive(Debug)]
30 #[non_exhaustive]
31 pub enum Error {
32     /// A required file is not in the file database.
33     FileMissing,
34     /// The file is present, but does not contain the specified byte index.
35     IndexTooLarge { given: usize, max: usize },
36     /// The file is present, but does not contain the specified line index.
37     LineTooLarge { given: usize, max: usize },
38     /// The file is present and contains the specified line index, but the line does not contain the specified column index.
39     ColumnTooLarge { given: usize, max: usize },
40     /// The given index is contained in the file, but is not a boundary of a UTF-8 code point.
41     InvalidCharBoundary { given: usize },
42     /// There was a error while doing IO.
43     Io(std::io::Error),
44 }
45 
46 impl From<std::io::Error> for Error {
from(err: std::io::Error) -> Error47     fn from(err: std::io::Error) -> Error {
48         Error::Io(err)
49     }
50 }
51 
52 impl std::fmt::Display for Error {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result53     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54         match self {
55             Error::FileMissing => write!(f, "file missing"),
56             Error::IndexTooLarge { given, max } => {
57                 write!(f, "invalid index {}, maximum index is {}", given, max)
58             }
59             Error::LineTooLarge { given, max } => {
60                 write!(f, "invalid line {}, maximum line is {}", given, max)
61             }
62             Error::ColumnTooLarge { given, max } => {
63                 write!(f, "invalid column {}, maximum column {}", given, max)
64             }
65             Error::InvalidCharBoundary { .. } => write!(f, "index is not a code point boundary"),
66             Error::Io(err) => write!(f, "{}", err),
67         }
68     }
69 }
70 
71 impl std::error::Error for Error {
source(&self) -> Option<&(dyn std::error::Error + 'static)>72     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
73         match &self {
74             Error::Io(err) => Some(err),
75             _ => None,
76         }
77     }
78 }
79 
80 /// A minimal interface for accessing source files when rendering diagnostics.
81 ///
82 /// A lifetime parameter `'a` is provided to allow any of the returned values to returned by reference.
83 /// This is to workaround the lack of higher kinded lifetime parameters.
84 /// This can be ignored if this is not needed, however.
85 pub trait Files<'a> {
86     /// A unique identifier for files in the file provider. This will be used
87     /// for rendering `diagnostic::Label`s in the corresponding source files.
88     type FileId: 'a + Copy + PartialEq;
89     /// The user-facing name of a file, to be displayed in diagnostics.
90     type Name: 'a + std::fmt::Display;
91     /// The source code of a file.
92     type Source: 'a + AsRef<str>;
93 
94     /// The user-facing name of a file.
name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>95     fn name(&'a self, id: Self::FileId) -> Result<Self::Name, Error>;
96 
97     /// The source code of a file.
source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>98     fn source(&'a self, id: Self::FileId) -> Result<Self::Source, Error>;
99 
100     /// The index of the line at the given byte index.
101     /// If the byte index is past the end of the file, returns the maximum line index in the file.
102     /// This means that this function only fails if the file is not present.
103     ///
104     /// # Note for trait implementors
105     ///
106     /// This can be implemented efficiently by performing a binary search over
107     /// a list of line starts that was computed by calling the [`line_starts`]
108     /// function that is exported from the [`files`] module. It might be useful
109     /// to pre-compute and cache these line starts.
110     ///
111     /// [`line_starts`]: crate::files::line_starts
112     /// [`files`]: crate::files
line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>113     fn line_index(&'a self, id: Self::FileId, byte_index: usize) -> Result<usize, Error>;
114 
115     /// The user-facing line number at the given line index.
116     /// It is not necessarily checked that the specified line index
117     /// is actually in the file.
118     ///
119     /// # Note for trait implementors
120     ///
121     /// This is usually 1-indexed from the beginning of the file, but
122     /// can be useful for implementing something like the
123     /// [C preprocessor's `#line` macro][line-macro].
124     ///
125     /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line
126     #[allow(unused_variables)]
line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error>127     fn line_number(&'a self, id: Self::FileId, line_index: usize) -> Result<usize, Error> {
128         Ok(line_index + 1)
129     }
130 
131     /// The user-facing column number at the given line index and byte index.
132     ///
133     /// # Note for trait implementors
134     ///
135     /// This is usually 1-indexed from the the start of the line.
136     /// A default implementation is provided, based on the [`column_index`]
137     /// function that is exported from the [`files`] module.
138     ///
139     /// [`files`]: crate::files
140     /// [`column_index`]: crate::files::column_index
column_number( &'a self, id: Self::FileId, line_index: usize, byte_index: usize, ) -> Result<usize, Error>141     fn column_number(
142         &'a self,
143         id: Self::FileId,
144         line_index: usize,
145         byte_index: usize,
146     ) -> Result<usize, Error> {
147         let source = self.source(id)?;
148         let line_range = self.line_range(id, line_index)?;
149         let column_index = column_index(source.as_ref(), line_range, byte_index);
150 
151         Ok(column_index + 1)
152     }
153 
154     /// Convenience method for returning line and column number at the given
155     /// byte index in the file.
location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error>156     fn location(&'a self, id: Self::FileId, byte_index: usize) -> Result<Location, Error> {
157         let line_index = self.line_index(id, byte_index)?;
158 
159         Ok(Location {
160             line_number: self.line_number(id, line_index)?,
161             column_number: self.column_number(id, line_index, byte_index)?,
162         })
163     }
164 
165     /// The byte range of line in the source of the file.
line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>166     fn line_range(&'a self, id: Self::FileId, line_index: usize) -> Result<Range<usize>, Error>;
167 }
168 
169 /// A user-facing location in a source file.
170 ///
171 /// Returned by [`Files::location`].
172 ///
173 /// [`Files::location`]: Files::location
174 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
175 pub struct Location {
176     /// The user-facing line number.
177     pub line_number: usize,
178     /// The user-facing column number.
179     pub column_number: usize,
180 }
181 
182 /// The column index at the given byte index in the source file.
183 /// This is the number of characters to the given byte index.
184 ///
185 /// If the byte index is smaller than the start of the line, then `0` is returned.
186 /// If the byte index is past the end of the line, the column index of the last
187 /// character `+ 1` is returned.
188 ///
189 /// # Example
190 ///
191 /// ```rust
192 /// use codespan_reporting::files;
193 ///
194 /// let source = "\n\n��∈��\n\n";
195 ///
196 /// assert_eq!(files::column_index(source, 0..1, 0), 0);
197 /// assert_eq!(files::column_index(source, 2..13, 0), 0);
198 /// assert_eq!(files::column_index(source, 2..13, 2 + 0), 0);
199 /// assert_eq!(files::column_index(source, 2..13, 2 + 1), 0);
200 /// assert_eq!(files::column_index(source, 2..13, 2 + 4), 1);
201 /// assert_eq!(files::column_index(source, 2..13, 2 + 8), 2);
202 /// assert_eq!(files::column_index(source, 2..13, 2 + 10), 2);
203 /// assert_eq!(files::column_index(source, 2..13, 2 + 11), 3);
204 /// assert_eq!(files::column_index(source, 2..13, 2 + 12), 3);
205 /// ```
column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize206 pub fn column_index(source: &str, line_range: Range<usize>, byte_index: usize) -> usize {
207     let end_index = std::cmp::min(byte_index, std::cmp::min(line_range.end, source.len()));
208 
209     (line_range.start..end_index)
210         .filter(|byte_index| source.is_char_boundary(byte_index + 1))
211         .count()
212 }
213 
214 /// Return the starting byte index of each line in the source string.
215 ///
216 /// This can make it easier to implement [`Files::line_index`] by allowing
217 /// implementors of [`Files`] to pre-compute the line starts, then search for
218 /// the corresponding line range, as shown in the example below.
219 ///
220 /// [`Files`]: Files
221 /// [`Files::line_index`]: Files::line_index
222 ///
223 /// # Example
224 ///
225 /// ```rust
226 /// use codespan_reporting::files;
227 ///
228 /// let source = "foo\nbar\r\n\nbaz";
229 /// let line_starts: Vec<_> = files::line_starts(source).collect();
230 ///
231 /// assert_eq!(
232 ///     line_starts,
233 ///     [
234 ///         0,  // "foo\n"
235 ///         4,  // "bar\r\n"
236 ///         9,  // ""
237 ///         10, // "baz"
238 ///     ],
239 /// );
240 ///
241 /// fn line_index(line_starts: &[usize], byte_index: usize) -> Option<usize> {
242 ///     match line_starts.binary_search(&byte_index) {
243 ///         Ok(line) => Some(line),
244 ///         Err(next_line) => Some(next_line - 1),
245 ///     }
246 /// }
247 ///
248 /// assert_eq!(line_index(&line_starts, 5), Some(1));
249 /// ```
250 // NOTE: this is copied in `codespan::file::line_starts` and should be kept in sync.
line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize>251 pub fn line_starts<'source>(source: &'source str) -> impl 'source + Iterator<Item = usize> {
252     std::iter::once(0).chain(source.match_indices('\n').map(|(i, _)| i + 1))
253 }
254 
255 /// A file database that contains a single source file.
256 ///
257 /// Because there is only single file in this database we use `()` as a [`FileId`].
258 ///
259 /// This is useful for simple language tests, but it might be worth creating a
260 /// custom implementation when a language scales beyond a certain size.
261 ///
262 /// [`FileId`]: Files::FileId
263 #[derive(Debug, Clone)]
264 pub struct SimpleFile<Name, Source> {
265     /// The name of the file.
266     name: Name,
267     /// The source code of the file.
268     source: Source,
269     /// The starting byte indices in the source code.
270     line_starts: Vec<usize>,
271 }
272 
273 impl<Name, Source> SimpleFile<Name, Source>
274 where
275     Name: std::fmt::Display,
276     Source: AsRef<str>,
277 {
278     /// Create a new source file.
new(name: Name, source: Source) -> SimpleFile<Name, Source>279     pub fn new(name: Name, source: Source) -> SimpleFile<Name, Source> {
280         SimpleFile {
281             name,
282             line_starts: line_starts(source.as_ref()).collect(),
283             source,
284         }
285     }
286 
287     /// Return the name of the file.
name(&self) -> &Name288     pub fn name(&self) -> &Name {
289         &self.name
290     }
291 
292     /// Return the source of the file.
source(&self) -> &Source293     pub fn source(&self) -> &Source {
294         &self.source
295     }
296 
297     /// Return the starting byte index of the line with the specified line index.
298     /// Convenience method that already generates errors if necessary.
line_start(&self, line_index: usize) -> Result<usize, Error>299     fn line_start(&self, line_index: usize) -> Result<usize, Error> {
300         use std::cmp::Ordering;
301 
302         match line_index.cmp(&self.line_starts.len()) {
303             Ordering::Less => Ok(self
304                 .line_starts
305                 .get(line_index)
306                 .cloned()
307                 .expect("failed despite previous check")),
308             Ordering::Equal => Ok(self.source.as_ref().len()),
309             Ordering::Greater => Err(Error::LineTooLarge {
310                 given: line_index,
311                 max: self.line_starts.len() - 1,
312             }),
313         }
314     }
315 }
316 
317 impl<'a, Name, Source> Files<'a> for SimpleFile<Name, Source>
318 where
319     Name: 'a + std::fmt::Display + Clone,
320     Source: 'a + AsRef<str>,
321 {
322     type FileId = ();
323     type Name = Name;
324     type Source = &'a str;
325 
name(&self, (): ()) -> Result<Name, Error>326     fn name(&self, (): ()) -> Result<Name, Error> {
327         Ok(self.name.clone())
328     }
329 
source(&self, (): ()) -> Result<&str, Error>330     fn source(&self, (): ()) -> Result<&str, Error> {
331         Ok(self.source.as_ref())
332     }
333 
line_index(&self, (): (), byte_index: usize) -> Result<usize, Error>334     fn line_index(&self, (): (), byte_index: usize) -> Result<usize, Error> {
335         Ok(self
336             .line_starts
337             .binary_search(&byte_index)
338             .unwrap_or_else(|next_line| next_line - 1))
339     }
340 
line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error>341     fn line_range(&self, (): (), line_index: usize) -> Result<Range<usize>, Error> {
342         let line_start = self.line_start(line_index)?;
343         let next_line_start = self.line_start(line_index + 1)?;
344 
345         Ok(line_start..next_line_start)
346     }
347 }
348 
349 /// A file database that can store multiple source files.
350 ///
351 /// This is useful for simple language tests, but it might be worth creating a
352 /// custom implementation when a language scales beyond a certain size.
353 /// It is a glorified `Vec<SimpleFile>` that implements the `Files` trait.
354 #[derive(Debug, Clone)]
355 pub struct SimpleFiles<Name, Source> {
356     files: Vec<SimpleFile<Name, Source>>,
357 }
358 
359 impl<Name, Source> SimpleFiles<Name, Source>
360 where
361     Name: std::fmt::Display,
362     Source: AsRef<str>,
363 {
364     /// Create a new files database.
new() -> SimpleFiles<Name, Source>365     pub fn new() -> SimpleFiles<Name, Source> {
366         SimpleFiles { files: Vec::new() }
367     }
368 
369     /// Add a file to the database, returning the handle that can be used to
370     /// refer to it again.
add(&mut self, name: Name, source: Source) -> usize371     pub fn add(&mut self, name: Name, source: Source) -> usize {
372         let file_id = self.files.len();
373         self.files.push(SimpleFile::new(name, source));
374         file_id
375     }
376 
377     /// Get the file corresponding to the given id.
get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error>378     pub fn get(&self, file_id: usize) -> Result<&SimpleFile<Name, Source>, Error> {
379         self.files.get(file_id).ok_or(Error::FileMissing)
380     }
381 }
382 
383 impl<'a, Name, Source> Files<'a> for SimpleFiles<Name, Source>
384 where
385     Name: 'a + std::fmt::Display + Clone,
386     Source: 'a + AsRef<str>,
387 {
388     type FileId = usize;
389     type Name = Name;
390     type Source = &'a str;
391 
name(&self, file_id: usize) -> Result<Name, Error>392     fn name(&self, file_id: usize) -> Result<Name, Error> {
393         Ok(self.get(file_id)?.name().clone())
394     }
395 
source(&self, file_id: usize) -> Result<&str, Error>396     fn source(&self, file_id: usize) -> Result<&str, Error> {
397         Ok(self.get(file_id)?.source().as_ref())
398     }
399 
line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error>400     fn line_index(&self, file_id: usize, byte_index: usize) -> Result<usize, Error> {
401         self.get(file_id)?.line_index((), byte_index)
402     }
403 
line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error>404     fn line_range(&self, file_id: usize, line_index: usize) -> Result<Range<usize>, Error> {
405         self.get(file_id)?.line_range((), line_index)
406     }
407 }
408 
409 #[cfg(test)]
410 mod test {
411     use super::*;
412 
413     const TEST_SOURCE: &str = "foo\nbar\r\n\nbaz";
414 
415     #[test]
line_starts()416     fn line_starts() {
417         let file = SimpleFile::new("test", TEST_SOURCE);
418 
419         assert_eq!(
420             file.line_starts,
421             [
422                 0,  // "foo\n"
423                 4,  // "bar\r\n"
424                 9,  // ""
425                 10, // "baz"
426             ],
427         );
428     }
429 
430     #[test]
line_span_sources()431     fn line_span_sources() {
432         let file = SimpleFile::new("test", TEST_SOURCE);
433 
434         let line_sources = (0..4)
435             .map(|line| {
436                 let line_range = file.line_range((), line).unwrap();
437                 &file.source[line_range]
438             })
439             .collect::<Vec<_>>();
440 
441         assert_eq!(line_sources, ["foo\n", "bar\r\n", "\n", "baz"]);
442     }
443 }
444