1 // Copyright 2018 Tomasz Miąsko
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
4 // or the MIT license <LICENSE-MIT>, at your option.
5 //
6 //! Process command line according to parsing rules of Unix shell as specified in [Shell Command
7 //! Language in POSIX.1-2008][posix-shell].
8 //!
9 //! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
10 
11 #![forbid(unsafe_code)]
12 
13 use std::borrow::Cow;
14 use std::error;
15 use std::fmt;
16 use std::mem;
17 use std::result;
18 
19 /// An error returned when shell parsing fails.
20 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
21 pub struct ParseError;
22 
23 impl fmt::Display for ParseError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result24     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
25         write!(f, "missing closing quote")
26     }
27 }
28 
29 impl error::Error for ParseError {}
30 
31 enum State {
32     /// Within a delimiter.
33     Delimiter,
34     /// After backslash, but before starting word.
35     Backslash,
36     /// Within an unquoted word.
37     Unquoted,
38     /// After backslash in an unquoted word.
39     UnquotedBackslash,
40     /// Within a single quoted word.
41     SingleQuoted,
42     /// Within a double quoted word.
43     DoubleQuoted,
44     /// After backslash inside a double quoted word.
45     DoubleQuotedBackslash,
46     /// Inside a comment.
47     Comment,
48 }
49 
50 /// Splits command line into separate arguments, in much the same way Unix shell would, but without
51 /// many of expansion the shell would perform.
52 ///
53 /// The split functionality is compatible with behaviour of Unix shell, but with word expansions
54 /// limited to quote removal, and without special token recognition rules for operators.
55 ///
56 /// The result is exactly the same as one obtained from Unix shell as long as those unsupported
57 /// features are not present in input: no operators, no variable assignments, no tilde expansion,
58 /// no parameter expansion, no command substitution, no arithmetic expansion, no pathname
59 /// expansion.
60 ///
61 /// In case those unsupported shell features are present, the syntax that introduce them is
62 /// interpreted literally.
63 ///
64 /// # Errors
65 ///
66 /// When input contains unmatched quote, an error is returned.
67 ///
68 /// # Compatibility with other implementations
69 ///
70 /// It should be fully compatible with g_shell_parse_argv from GLib, except that in GLib
71 /// it is an error not to have any words after tokenization.
72 ///
73 /// It is also very close to shlex.split available in Python standard library, when used in POSIX
74 /// mode with support for comments. Though, shlex implementation diverges from POSIX, and from
75 /// implementation contained herein in three aspects. First, it doesn't support line continuations.
76 /// Second, inside double quotes, the backslash characters retains its special meaning as an escape
77 /// character only when followed by \\ or \", whereas POSIX specifies that it should retain its
78 /// special meaning when followed by: $, \`, \", \\, or a newline. Third, it treats carriage return
79 /// as one of delimiters.
80 ///
81 /// # Examples
82 ///
83 /// Building an executable using compiler obtained from CC environment variable
84 /// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build
85 /// rule for C used in GNU Make:
86 ///
87 /// ```rust,no_run
88 /// use std::env::var;
89 /// use std::process::Command;
90 ///
91 /// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned());
92 ///
93 /// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new());
94 /// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS");
95 ///
96 /// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new());
97 /// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS");
98 ///
99 /// Command::new(cc)
100 ///     .args(cflags)
101 ///     .args(cppflags)
102 ///     .args(&["-c", "a.c", "-o", "a.out"])
103 ///     .spawn()
104 ///     .expect("failed to start subprocess")
105 ///     .wait()
106 ///     .expect("failed to wait for subprocess");
107 /// ```
split(s: &str) -> result::Result<Vec<String>, ParseError>108 pub fn split(s: &str) -> result::Result<Vec<String>, ParseError> {
109     use State::*;
110 
111     let mut words = Vec::new();
112     let mut word = String::new();
113     let mut chars = s.chars();
114     let mut state = Delimiter;
115 
116     loop {
117         let c = chars.next();
118         state = match state {
119             Delimiter => match c {
120                 None => break,
121                 Some('\'') => SingleQuoted,
122                 Some('\"') => DoubleQuoted,
123                 Some('\\') => Backslash,
124                 Some('\t') | Some(' ') | Some('\n') => Delimiter,
125                 Some('#') => Comment,
126                 Some(c) => {
127                     word.push(c);
128                     Unquoted
129                 }
130             },
131             Backslash => match c {
132                 None => {
133                     word.push('\\');
134                     words.push(mem::replace(&mut word, String::new()));
135                     break;
136                 }
137                 Some('\n') => Delimiter,
138                 Some(c) => {
139                     word.push(c);
140                     Unquoted
141                 }
142             },
143             Unquoted => match c {
144                 None => {
145                     words.push(mem::replace(&mut word, String::new()));
146                     break;
147                 }
148                 Some('\'') => SingleQuoted,
149                 Some('\"') => DoubleQuoted,
150                 Some('\\') => UnquotedBackslash,
151                 Some('\t') | Some(' ') | Some('\n') => {
152                     words.push(mem::replace(&mut word, String::new()));
153                     Delimiter
154                 }
155                 Some(c) => {
156                     word.push(c);
157                     Unquoted
158                 }
159             },
160             UnquotedBackslash => match c {
161                 None => {
162                     word.push('\\');
163                     words.push(mem::replace(&mut word, String::new()));
164                     break;
165                 }
166                 Some('\n') => Unquoted,
167                 Some(c) => {
168                     word.push(c);
169                     Unquoted
170                 }
171             },
172             SingleQuoted => match c {
173                 None => return Err(ParseError),
174                 Some('\'') => Unquoted,
175                 Some(c) => {
176                     word.push(c);
177                     SingleQuoted
178                 }
179             },
180             DoubleQuoted => match c {
181                 None => return Err(ParseError),
182                 Some('\"') => Unquoted,
183                 Some('\\') => DoubleQuotedBackslash,
184                 Some(c) => {
185                     word.push(c);
186                     DoubleQuoted
187                 }
188             },
189             DoubleQuotedBackslash => match c {
190                 None => return Err(ParseError),
191                 Some('\n') => DoubleQuoted,
192                 Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
193                     word.push(c);
194                     DoubleQuoted
195                 }
196                 Some(c) => {
197                     word.push('\\');
198                     word.push(c);
199                     DoubleQuoted
200                 }
201             },
202             Comment => match c {
203                 None => break,
204                 Some('\n') => Delimiter,
205                 Some(_) => Comment,
206             },
207         }
208     }
209 
210     Ok(words)
211 }
212 
213 enum EscapeStyle {
214     /// No escaping.
215     None,
216     /// Wrap in single quotes.
217     SingleQuoted,
218     /// Single quotes combined with backslash.
219     Mixed,
220 }
221 
222 /// Determines escaping style to use.
escape_style(s: &str) -> EscapeStyle223 fn escape_style(s: &str) -> EscapeStyle {
224     if s.is_empty() {
225         return EscapeStyle::SingleQuoted;
226     }
227 
228     let mut special = false;
229     let mut newline = false;
230     let mut single_quote = false;
231 
232     for c in s.chars() {
233         match c {
234             '\n' => {
235                 newline = true;
236                 special = true;
237             }
238             '\'' => {
239                 single_quote = true;
240                 special = true;
241             }
242             '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
243             | '?' | '[' | '#' | '˜' | '=' | '%' => {
244                 special = true;
245             }
246             _ => continue,
247         }
248     }
249 
250     if !special {
251         EscapeStyle::None
252     } else if newline && !single_quote {
253         EscapeStyle::SingleQuoted
254     } else {
255         EscapeStyle::Mixed
256     }
257 }
258 
259 /// Escapes special characters in a string, so that it will retain its literal meaning when used as
260 /// a part of command in Unix shell.
261 ///
262 /// It tries to avoid introducing any unnecessary quotes or escape characters, but specifics
263 /// regarding quoting style are left unspecified.
quote(s: &str) -> Cow<str>264 pub fn quote(s: &str) -> Cow<str> {
265     // We are going somewhat out of the way to provide
266     // minimal amount of quoting in typical cases.
267     match escape_style(s) {
268         EscapeStyle::None => s.into(),
269         EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
270         EscapeStyle::Mixed => {
271             let mut quoted = String::new();
272             quoted.push('\'');
273             for c in s.chars() {
274                 if c == '\'' {
275                     quoted.push_str("'\\''");
276                 } else {
277                     quoted.push(c);
278                 }
279             }
280             quoted.push('\'');
281             quoted.into()
282         }
283     }
284 }
285 
286 /// Joins arguments into a single command line suitable for execution in Unix shell.
287 ///
288 /// Each argument is quoted using [`quote`] to preserve its literal meaning when parsed by Unix
289 /// shell.
290 ///
291 /// Note: This function is essentially an inverse of [`split`].
292 ///
293 /// # Examples
294 ///
295 /// Logging executed commands in format that can be easily copied and pasted into an actual shell:
296 ///
297 /// ```rust,no_run
298 /// fn execute(args: &[&str]) {
299 ///     use std::process::Command;
300 ///     println!("Executing: {}", shell_words::join(args));
301 ///     Command::new(&args[0])
302 ///         .args(&args[1..])
303 ///         .spawn()
304 ///         .expect("failed to start subprocess")
305 ///         .wait()
306 ///         .expect("failed to wait for subprocess");
307 /// }
308 ///
309 /// execute(&["python", "-c", "print('Hello world!')"]);
310 /// ```
311 ///
312 /// [`quote`]: fn.quote.html
313 /// [`split`]: fn.split.html
join<I, S>(words: I) -> String where I: IntoIterator<Item = S>, S: AsRef<str>,314 pub fn join<I, S>(words: I) -> String
315 where
316     I: IntoIterator<Item = S>,
317     S: AsRef<str>,
318 {
319     let mut line = words.into_iter().fold(String::new(), |mut line, word| {
320         let quoted = quote(word.as_ref());
321         line.push_str(quoted.as_ref());
322         line.push(' ');
323         line
324     });
325     line.pop();
326     line
327 }
328 
329 #[cfg(test)]
330 mod tests {
331     use super::*;
332 
split_ok(cases: &[(&str, &[&str])])333     fn split_ok(cases: &[(&str, &[&str])]) {
334         for &(input, expected) in cases {
335             match split(input) {
336                 Err(actual) => {
337                     panic!(
338                         "After split({:?})\nexpected: Ok({:?})\n  actual: Err({:?})\n",
339                         input, expected, actual
340                     );
341                 }
342                 Ok(actual) => {
343                     assert!(
344                         expected == actual.as_slice(),
345                         "After split({:?}).unwrap()\nexpected: {:?}\n  actual: {:?}\n",
346                         input,
347                         expected,
348                         actual
349                     );
350                 }
351             }
352         }
353     }
354 
355     #[test]
split_empty()356     fn split_empty() {
357         split_ok(&[("", &[])]);
358     }
359 
360     #[test]
split_initial_whitespace_is_removed()361     fn split_initial_whitespace_is_removed() {
362         split_ok(&[
363             ("     a", &["a"]),
364             ("\t\t\t\tbar", &["bar"]),
365             ("\t \nc", &["c"]),
366         ]);
367     }
368 
369     #[test]
split_trailing_whitespace_is_removed()370     fn split_trailing_whitespace_is_removed() {
371         split_ok(&[
372             ("a  ", &["a"]),
373             ("b\t", &["b"]),
374             ("c\t \n \n \n", &["c"]),
375             ("d\n\n", &["d"]),
376         ]);
377     }
378 
379     #[test]
split_carriage_return_is_not_special()380     fn split_carriage_return_is_not_special() {
381         split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
382     }
383 
384     #[test]
split_single_quotes()385     fn split_single_quotes() {
386         split_ok(&[
387             (r#"''"#, &[r#""#]),
388             (r#"'a'"#, &[r#"a"#]),
389             (r#"'\'"#, &[r#"\"#]),
390             (r#"' \ '"#, &[r#" \ "#]),
391             (r#"'#'"#, &[r#"#"#]),
392         ]);
393     }
394 
395     #[test]
split_double_quotes()396     fn split_double_quotes() {
397         split_ok(&[
398             (r#""""#, &[""]),
399             (r#""""""#, &[""]),
400             (r#""a b c' d""#, &["a b c' d"]),
401             (r#""\a""#, &["\\a"]),
402             (r#""$""#, &["$"]),
403             (r#""\$""#, &["$"]),
404             (r#""`""#, &["`"]),
405             (r#""\`""#, &["`"]),
406             (r#""\"""#, &["\""]),
407             (r#""\\""#, &["\\"]),
408             ("\"\n\"", &["\n"]),
409             ("\"\\\n\"", &[""]),
410         ]);
411     }
412 
413     #[test]
split_unquoted()414     fn split_unquoted() {
415         split_ok(&[
416             (r#"\|\&\;"#, &[r#"|&;"#]),
417             (r#"\<\>"#, &[r#"<>"#]),
418             (r#"\(\)"#, &[r#"()"#]),
419             (r#"\$"#, &[r#"$"#]),
420             (r#"\`"#, &[r#"`"#]),
421             (r#"\""#, &[r#"""#]),
422             (r#"\'"#, &[r#"'"#]),
423             ("\\\n", &[]),
424             (" \\\n \n", &[]),
425             ("a\nb\nc", &["a", "b", "c"]),
426             ("a\\\nb\\\nc", &["abc"]),
427             ("foo bar baz", &["foo", "bar", "baz"]),
428             (r#"\��"#, &[r"��"]),
429         ]);
430     }
431 
432     #[test]
split_trailing_backslash()433     fn split_trailing_backslash() {
434         split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
435     }
436 
437     #[test]
438     fn split_errors() {
439         assert_eq!(split("'abc"), Err(ParseError));
440         assert_eq!(split("\""), Err(ParseError));
441         assert_eq!(split("'\\"), Err(ParseError));
442         assert_eq!(split("'\\"), Err(ParseError));
443     }
444 
445     #[test]
446     fn split_comments() {
447         split_ok(&[
448             (r#" x # comment "#, &["x"]),
449             (r#" w1#w2 "#, &["w1#w2"]),
450             (r#"'not really a # comment'"#, &["not really a # comment"]),
451             (" a # very long comment \n b # another comment", &["a", "b"]),
452         ]);
453     }
454 
455     #[test]
456     fn test_quote() {
457         assert_eq!(quote(""), "''");
458         assert_eq!(quote("'"), "''\\'''");
459         assert_eq!(quote("abc"), "abc");
460         assert_eq!(quote("a \n  b"), "'a \n  b'");
461         assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
462     }
463 
464     #[test]
465     fn test_join() {
466         assert_eq!(join(&["a", "b", "c"]), "a b c");
467         assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
468     }
469 
470     #[test]
471     fn join_followed_by_split_is_identity() {
472         let cases: Vec<&[&str]> = vec![
473             &["a"],
474             &["python", "-c", "print('Hello world!')"],
475             &["echo", " arg with spaces ", "arg \' with \" quotes"],
476             &["even newlines are quoted correctly\n", "\n", "\n\n\t "],
477             &["$", "`test`"],
478             &["cat", "~user/log*"],
479             &["test", "'a \"b", "\"X'"],
480             &["empty", "", "", ""],
481         ];
482         for argv in cases {
483             let args = join(argv);
484             assert_eq!(split(&args).unwrap(), argv);
485         }
486     }
487 }
488