1 //! A Rust parser for the [WebAssembly Text format][wat]
2 //!
3 //! This crate contains a stable interface to the parser for the [WAT][wat]
4 //! format of WebAssembly text files. The format parsed by this crate follows
5 //! the [online specification][wat].
6 //!
7 //! # Examples
8 //!
9 //! Parse an in-memory string:
10 //!
11 //! ```
12 //! # fn foo() -> wat::Result<()> {
13 //! let wat = r#"
14 //!     (module
15 //!         (func $foo)
16 //!
17 //!         (func (export "bar")
18 //!             call $foo
19 //!         )
20 //!     )
21 //! "#;
22 //!
23 //! let binary = wat::parse_str(wat)?;
24 //! // ...
25 //! # Ok(())
26 //! # }
27 //! ```
28 //!
29 //! Parse an on-disk file:
30 //!
31 //! ```
32 //! # fn foo() -> wat::Result<()> {
33 //! let binary = wat::parse_file("./foo.wat")?;
34 //! // ...
35 //! # Ok(())
36 //! # }
37 //! ```
38 //!
39 //! ## Evolution of the WAT Format
40 //!
41 //! WebAssembly, and the WAT format, are an evolving specification. Features are
42 //! added to WAT, WAT changes, and sometimes WAT breaks. The policy of this
43 //! crate is that it will always follow the [official specification][wat] for
44 //! WAT files.
45 //!
46 //! Future WebAssembly features will be accepted to this parser **and they will
47 //! not require a feature gate to opt-in**. All implemented WebAssembly features
48 //! will be enabled at all times. Using a future WebAssembly feature in the WAT
49 //! format may cause breakage because while specifications are in development
50 //! the WAT syntax (and/or binary encoding) will often change. This crate will
51 //! do its best to keep up with these proposals, but breaking textual changes
52 //! will be published as non-breaking semver changes to this crate.
53 //!
54 //! ## Stability
55 //!
56 //! This crate is intended to be a very stable shim over the `wast` crate
57 //! which is expected to be much more unstable. The `wast` crate contains
58 //! AST data structures for parsing `*.wat` files and they will evolve was the
59 //! WAT and WebAssembly specifications evolve over time.
60 //!
61 //! This crate is currently at version 1.x.y, and it is intended that it will
62 //! remain here for quite some time. Breaking changes to the WAT format will be
63 //! landed as a non-semver-breaking version change in this crate. This crate
64 //! will always follow the [official specification for WAT][wat].
65 //!
66 //! [wat]: http://webassembly.github.io/spec/core/text/index.html
67 
68 #![deny(missing_docs)]
69 
70 use std::borrow::Cow;
71 use std::fmt;
72 use std::path::{Path, PathBuf};
73 use std::str;
74 use wast::parser::{self, ParseBuffer};
75 
76 /// Parses a file on disk as a [WebAssembly Text format][wat] file, or a binary
77 /// WebAssembly file
78 ///
79 /// This function will read the bytes on disk and delegate them to the
80 /// [`parse_bytes`] function. For more information on the behavior of parsing
81 /// see [`parse_bytes`].
82 ///
83 /// # Errors
84 ///
85 /// For information about errors, see the [`parse_bytes`] documentation.
86 ///
87 /// # Examples
88 ///
89 /// ```
90 /// # fn foo() -> wat::Result<()> {
91 /// let binary = wat::parse_file("./foo.wat")?;
92 /// // ...
93 /// # Ok(())
94 /// # }
95 /// ```
96 ///
97 /// [wat]: http://webassembly.github.io/spec/core/text/index.html
parse_file(file: impl AsRef<Path>) -> Result<Vec<u8>>98 pub fn parse_file(file: impl AsRef<Path>) -> Result<Vec<u8>> {
99     _parse_file(file.as_ref())
100 }
101 
_parse_file(file: &Path) -> Result<Vec<u8>>102 fn _parse_file(file: &Path) -> Result<Vec<u8>> {
103     let contents = std::fs::read(file).map_err(|err| Error {
104         kind: Box::new(ErrorKind::Io {
105             err,
106             file: Some(file.to_owned()),
107         }),
108     })?;
109     match parse_bytes(&contents) {
110         Ok(bytes) => Ok(bytes.into_owned()),
111         Err(mut e) => {
112             e.set_path(file);
113             Err(e)
114         }
115     }
116 }
117 
118 /// Parses in-memory bytes as either the [WebAssembly Text format][wat], or a
119 /// binary WebAssembly module.
120 ///
121 /// This function will attempt to interpret the given bytes as one of two
122 /// options:
123 ///
124 /// * A utf-8 string which is a `*.wat` file to be parsed.
125 /// * A binary WebAssembly file starting with `b"\0asm"`
126 ///
127 /// If the input is a string then it will be parsed as `*.wat`, and then after
128 /// parsing it will be encoded back into a WebAssembly binary module. If the
129 /// input is a binary that starts with `b"\0asm"` it will be returned verbatim.
130 /// Everything that doesn't start with `b"\0asm"` will be parsed as a utf-8
131 /// `*.wat` file, returning errors as appropriate.
132 ///
133 /// For more information about parsing wat files, see [`parse_str`].
134 ///
135 /// # Errors
136 ///
137 /// In addition to all of the errors that can be returned from [`parse_str`],
138 /// this function will also return an error if the input does not start with
139 /// `b"\0asm"` and is invalid utf-8. (failed to even try to call [`parse_str`]).
140 ///
141 /// # Examples
142 ///
143 /// ```
144 /// # fn foo() -> wat::Result<()> {
145 /// // Parsing bytes that are actually `*.wat` files
146 /// assert_eq!(&*wat::parse_bytes(b"(module)")?, b"\0asm\x01\0\0\0");
147 /// assert!(wat::parse_bytes(b"module").is_err());
148 /// assert!(wat::parse_bytes(b"binary\0file\0\that\0is\0not\0wat").is_err());
149 ///
150 /// // Pass through binaries that look like real wasm files
151 /// assert_eq!(&*wat::parse_bytes(b"\0asm\x01\0\0\0")?, b"\0asm\x01\0\0\0");
152 /// # Ok(())
153 /// # }
154 /// ```
155 ///
156 /// [wat]: http://webassembly.github.io/spec/core/text/index.html
parse_bytes(bytes: &[u8]) -> Result<Cow<'_, [u8]>>157 pub fn parse_bytes(bytes: &[u8]) -> Result<Cow<'_, [u8]>> {
158     if bytes.starts_with(b"\0asm") {
159         return Ok(bytes.into());
160     }
161     match str::from_utf8(bytes) {
162         Ok(s) => _parse_str(s).map(|s| s.into()),
163         Err(_) => Err(Error {
164             kind: Box::new(ErrorKind::Custom {
165                 msg: "input bytes aren't valid utf-8".to_string(),
166                 file: None,
167             }),
168         }),
169     }
170 }
171 
172 /// Parses an in-memory string as the [WebAssembly Text format][wat], returning
173 /// the file as a binary WebAssembly file.
174 ///
175 /// This function is intended to be a stable convenience function for parsing a
176 /// wat file into a WebAssembly binary file. This is a high-level operation
177 /// which does not expose any parsing internals, for that you'll want to use the
178 /// `wast` crate.
179 ///
180 /// # Errors
181 ///
182 /// This function can fail for a number of reasons, including (but not limited
183 /// to):
184 ///
185 /// * The `wat` input may fail to lex, such as having invalid tokens or syntax
186 /// * The `wat` input may fail to parse, such as having incorrect syntactical
187 ///   structure
188 /// * The `wat` input may contain names that could not be resolved
189 ///
190 /// # Examples
191 ///
192 /// ```
193 /// # fn foo() -> wat::Result<()> {
194 /// assert_eq!(wat::parse_str("(module)")?, b"\0asm\x01\0\0\0");
195 /// assert!(wat::parse_str("module").is_err());
196 ///
197 /// let wat = r#"
198 ///     (module
199 ///         (func $foo)
200 ///
201 ///         (func (export "bar")
202 ///             call $foo
203 ///         )
204 ///     )
205 /// "#;
206 ///
207 /// let binary = wat::parse_str(wat)?;
208 /// // ...
209 /// # Ok(())
210 /// # }
211 /// ```
212 ///
213 /// [wat]: http://webassembly.github.io/spec/core/text/index.html
parse_str(wat: impl AsRef<str>) -> Result<Vec<u8>>214 pub fn parse_str(wat: impl AsRef<str>) -> Result<Vec<u8>> {
215     _parse_str(wat.as_ref())
216 }
217 
_parse_str(wat: &str) -> Result<Vec<u8>>218 fn _parse_str(wat: &str) -> Result<Vec<u8>> {
219     let buf = ParseBuffer::new(&wat).map_err(|e| Error::cvt(e, wat))?;
220     let mut ast = parser::parse::<wast::Wat>(&buf).map_err(|e| Error::cvt(e, wat))?;
221     Ok(ast.module.encode().map_err(|e| Error::cvt(e, wat))?)
222 }
223 
224 /// A convenience type definition for `Result` where the error is [`Error`]
225 pub type Result<T> = std::result::Result<T, Error>;
226 
227 /// Errors from this crate related to parsing WAT files
228 ///
229 /// An error can during example phases like:
230 ///
231 /// * Lexing can fail if the document is syntactically invalid.
232 /// * A string may not be utf-8
233 /// * The syntactical structure of the wat file may be invalid
234 /// * The wat file may be semantically invalid such as having name resolution
235 ///   failures
236 #[derive(Debug)]
237 pub struct Error {
238     kind: Box<ErrorKind>,
239 }
240 
241 #[derive(Debug)]
242 enum ErrorKind {
243     Wast(wast::Error),
244     Io {
245         err: std::io::Error,
246         file: Option<PathBuf>,
247     },
248     Custom {
249         msg: String,
250         file: Option<PathBuf>,
251     },
252 }
253 
254 impl Error {
cvt<E: Into<wast::Error>>(e: E, contents: &str) -> Error255     fn cvt<E: Into<wast::Error>>(e: E, contents: &str) -> Error {
256         let mut err = e.into();
257         err.set_text(contents);
258         Error {
259             kind: Box::new(ErrorKind::Wast(err)),
260         }
261     }
262 
263     /// To provide a more useful error this function can be used to set
264     /// the file name that this error is associated with.
265     ///
266     /// The `file` here will be stored in this error and later rendered in the
267     /// `Display` implementation.
set_path<P: AsRef<Path>>(&mut self, file: P)268     pub fn set_path<P: AsRef<Path>>(&mut self, file: P) {
269         let file = file.as_ref();
270         match &mut *self.kind {
271             ErrorKind::Wast(e) => e.set_path(file),
272             ErrorKind::Custom { file: f, .. } => *f = Some(file.to_owned()),
273             ErrorKind::Io { file: f, .. } => *f = Some(file.to_owned()),
274         }
275     }
276 }
277 
278 impl fmt::Display for Error {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result279     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
280         match &*self.kind {
281             ErrorKind::Wast(err) => err.fmt(f),
282             ErrorKind::Custom { msg, file, .. } => match file {
283                 Some(file) => {
284                     write!(f, "failed to parse `{}`: {}", file.display(), msg)
285                 }
286                 None => msg.fmt(f),
287             },
288             ErrorKind::Io { err, file, .. } => match file {
289                 Some(file) => {
290                     write!(f, "failed to read from `{}`: {}", file.display(), err)
291                 }
292                 None => err.fmt(f),
293             },
294         }
295     }
296 }
297 
298 impl std::error::Error for Error {
source(&self) -> Option<&(dyn std::error::Error + 'static)>299     fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
300         match &*self.kind {
301             ErrorKind::Wast(_) => None,
302             ErrorKind::Custom { .. } => None,
303             ErrorKind::Io { err, .. } => Some(err),
304         }
305     }
306 }
307 
308 #[cfg(test)]
309 mod test {
310     use super::*;
311 
312     #[test]
test_set_path()313     fn test_set_path() {
314         let mut e = parse_bytes(&[0xFF]).unwrap_err();
315         e.set_path("foo");
316         assert_eq!(
317             e.to_string(),
318             "failed to parse `foo`: input bytes aren't valid utf-8"
319         );
320 
321         let e = parse_file("_does_not_exist_").unwrap_err();
322         assert!(e
323             .to_string()
324             .starts_with("failed to read from `_does_not_exist_`: "));
325 
326         let mut e = parse_bytes("()".as_bytes()).unwrap_err();
327         e.set_path("foo");
328         assert_eq!(
329             e.to_string(),
330             "expected valid module field\n     --> foo:1:2\n      |\n    1 | ()\n      |  ^"
331         );
332     }
333 }
334