// Copyright 2018 Tomasz Miąsko // // Licensed under the Apache License, Version 2.0 // or the MIT license , at your option. // //! Process command line according to parsing rules of Unix shell as specified in [Shell Command //! Language in POSIX.1-2008][posix-shell]. //! //! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html #![forbid(unsafe_code)] use std::borrow::Cow; use std::error; use std::fmt; use std::mem; use std::result; /// An error returned when shell parsing fails. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct ParseError; impl fmt::Display for ParseError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "missing closing quote") } } impl error::Error for ParseError {} enum State { /// Within a delimiter. Delimiter, /// After backslash, but before starting word. Backslash, /// Within an unquoted word. Unquoted, /// After backslash in an unquoted word. UnquotedBackslash, /// Within a single quoted word. SingleQuoted, /// Within a double quoted word. DoubleQuoted, /// After backslash inside a double quoted word. DoubleQuotedBackslash, /// Inside a comment. Comment, } /// Splits command line into separate arguments, in much the same way Unix shell would, but without /// many of expansion the shell would perform. /// /// The split functionality is compatible with behaviour of Unix shell, but with word expansions /// limited to quote removal, and without special token recognition rules for operators. /// /// The result is exactly the same as one obtained from Unix shell as long as those unsupported /// features are not present in input: no operators, no variable assignments, no tilde expansion, /// no parameter expansion, no command substitution, no arithmetic expansion, no pathname /// expansion. /// /// In case those unsupported shell features are present, the syntax that introduce them is /// interpreted literally. /// /// # Errors /// /// When input contains unmatched quote, an error is returned. /// /// # Compatibility with other implementations /// /// It should be fully compatible with g_shell_parse_argv from GLib, except that in GLib /// it is an error not to have any words after tokenization. /// /// It is also very close to shlex.split available in Python standard library, when used in POSIX /// mode with support for comments. Though, shlex implementation diverges from POSIX, and from /// implementation contained herein in three aspects. First, it doesn't support line continuations. /// Second, inside double quotes, the backslash characters retains its special meaning as an escape /// character only when followed by \\ or \", whereas POSIX specifies that it should retain its /// special meaning when followed by: $, \`, \", \\, or a newline. Third, it treats carriage return /// as one of delimiters. /// /// # Examples /// /// Building an executable using compiler obtained from CC environment variable /// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build /// rule for C used in GNU Make: /// /// ```rust,no_run /// use std::env::var; /// use std::process::Command; /// /// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned()); /// /// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new()); /// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS"); /// /// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new()); /// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS"); /// /// Command::new(cc) /// .args(cflags) /// .args(cppflags) /// .args(&["-c", "a.c", "-o", "a.out"]) /// .spawn() /// .expect("failed to start subprocess") /// .wait() /// .expect("failed to wait for subprocess"); /// ``` pub fn split(s: &str) -> result::Result, ParseError> { use State::*; let mut words = Vec::new(); let mut word = String::new(); let mut chars = s.chars(); let mut state = Delimiter; loop { let c = chars.next(); state = match state { Delimiter => match c { None => break, Some('\'') => SingleQuoted, Some('\"') => DoubleQuoted, Some('\\') => Backslash, Some('\t') | Some(' ') | Some('\n') => Delimiter, Some('#') => Comment, Some(c) => { word.push(c); Unquoted } }, Backslash => match c { None => { word.push('\\'); words.push(mem::replace(&mut word, String::new())); break; } Some('\n') => Delimiter, Some(c) => { word.push(c); Unquoted } }, Unquoted => match c { None => { words.push(mem::replace(&mut word, String::new())); break; } Some('\'') => SingleQuoted, Some('\"') => DoubleQuoted, Some('\\') => UnquotedBackslash, Some('\t') | Some(' ') | Some('\n') => { words.push(mem::replace(&mut word, String::new())); Delimiter } Some(c) => { word.push(c); Unquoted } }, UnquotedBackslash => match c { None => { word.push('\\'); words.push(mem::replace(&mut word, String::new())); break; } Some('\n') => Unquoted, Some(c) => { word.push(c); Unquoted } }, SingleQuoted => match c { None => return Err(ParseError), Some('\'') => Unquoted, Some(c) => { word.push(c); SingleQuoted } }, DoubleQuoted => match c { None => return Err(ParseError), Some('\"') => Unquoted, Some('\\') => DoubleQuotedBackslash, Some(c) => { word.push(c); DoubleQuoted } }, DoubleQuotedBackslash => match c { None => return Err(ParseError), Some('\n') => DoubleQuoted, Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => { word.push(c); DoubleQuoted } Some(c) => { word.push('\\'); word.push(c); DoubleQuoted } }, Comment => match c { None => break, Some('\n') => Delimiter, Some(_) => Comment, }, } } Ok(words) } enum EscapeStyle { /// No escaping. None, /// Wrap in single quotes. SingleQuoted, /// Single quotes combined with backslash. Mixed, } /// Determines escaping style to use. fn escape_style(s: &str) -> EscapeStyle { if s.is_empty() { return EscapeStyle::SingleQuoted; } let mut special = false; let mut newline = false; let mut single_quote = false; for c in s.chars() { match c { '\n' => { newline = true; special = true; } '\'' => { single_quote = true; special = true; } '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*' | '?' | '[' | '#' | '˜' | '=' | '%' => { special = true; } _ => continue, } } if !special { EscapeStyle::None } else if newline && !single_quote { EscapeStyle::SingleQuoted } else { EscapeStyle::Mixed } } /// Escapes special characters in a string, so that it will retain its literal meaning when used as /// a part of command in Unix shell. /// /// It tries to avoid introducing any unnecessary quotes or escape characters, but specifics /// regarding quoting style are left unspecified. pub fn quote(s: &str) -> Cow { // We are going somewhat out of the way to provide // minimal amount of quoting in typical cases. match escape_style(s) { EscapeStyle::None => s.into(), EscapeStyle::SingleQuoted => format!("'{}'", s).into(), EscapeStyle::Mixed => { let mut quoted = String::new(); quoted.push('\''); for c in s.chars() { if c == '\'' { quoted.push_str("'\\''"); } else { quoted.push(c); } } quoted.push('\''); quoted.into() } } } /// Joins arguments into a single command line suitable for execution in Unix shell. /// /// Each argument is quoted using [`quote`] to preserve its literal meaning when parsed by Unix /// shell. /// /// Note: This function is essentially an inverse of [`split`]. /// /// # Examples /// /// Logging executed commands in format that can be easily copied and pasted into an actual shell: /// /// ```rust,no_run /// fn execute(args: &[&str]) { /// use std::process::Command; /// println!("Executing: {}", shell_words::join(args)); /// Command::new(&args[0]) /// .args(&args[1..]) /// .spawn() /// .expect("failed to start subprocess") /// .wait() /// .expect("failed to wait for subprocess"); /// } /// /// execute(&["python", "-c", "print('Hello world!')"]); /// ``` /// /// [`quote`]: fn.quote.html /// [`split`]: fn.split.html pub fn join(words: I) -> String where I: IntoIterator, S: AsRef, { let mut line = words.into_iter().fold(String::new(), |mut line, word| { let quoted = quote(word.as_ref()); line.push_str(quoted.as_ref()); line.push(' '); line }); line.pop(); line } #[cfg(test)] mod tests { use super::*; fn split_ok(cases: &[(&str, &[&str])]) { for &(input, expected) in cases { match split(input) { Err(actual) => { panic!( "After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n", input, expected, actual ); } Ok(actual) => { assert!( expected == actual.as_slice(), "After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n", input, expected, actual ); } } } } #[test] fn split_empty() { split_ok(&[("", &[])]); } #[test] fn split_initial_whitespace_is_removed() { split_ok(&[ (" a", &["a"]), ("\t\t\t\tbar", &["bar"]), ("\t \nc", &["c"]), ]); } #[test] fn split_trailing_whitespace_is_removed() { split_ok(&[ ("a ", &["a"]), ("b\t", &["b"]), ("c\t \n \n \n", &["c"]), ("d\n\n", &["d"]), ]); } #[test] fn split_carriage_return_is_not_special() { split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]); } #[test] fn split_single_quotes() { split_ok(&[ (r#"''"#, &[r#""#]), (r#"'a'"#, &[r#"a"#]), (r#"'\'"#, &[r#"\"#]), (r#"' \ '"#, &[r#" \ "#]), (r#"'#'"#, &[r#"#"#]), ]); } #[test] fn split_double_quotes() { split_ok(&[ (r#""""#, &[""]), (r#""""""#, &[""]), (r#""a b c' d""#, &["a b c' d"]), (r#""\a""#, &["\\a"]), (r#""$""#, &["$"]), (r#""\$""#, &["$"]), (r#""`""#, &["`"]), (r#""\`""#, &["`"]), (r#""\"""#, &["\""]), (r#""\\""#, &["\\"]), ("\"\n\"", &["\n"]), ("\"\\\n\"", &[""]), ]); } #[test] fn split_unquoted() { split_ok(&[ (r#"\|\&\;"#, &[r#"|&;"#]), (r#"\<\>"#, &[r#"<>"#]), (r#"\(\)"#, &[r#"()"#]), (r#"\$"#, &[r#"$"#]), (r#"\`"#, &[r#"`"#]), (r#"\""#, &[r#"""#]), (r#"\'"#, &[r#"'"#]), ("\\\n", &[]), (" \\\n \n", &[]), ("a\nb\nc", &["a", "b", "c"]), ("a\\\nb\\\nc", &["abc"]), ("foo bar baz", &["foo", "bar", "baz"]), (r#"\🦉"#, &[r"🦉"]), ]); } #[test] fn split_trailing_backslash() { split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]); } #[test] fn split_errors() { assert_eq!(split("'abc"), Err(ParseError)); assert_eq!(split("\""), Err(ParseError)); assert_eq!(split("'\\"), Err(ParseError)); assert_eq!(split("'\\"), Err(ParseError)); } #[test] fn split_comments() { split_ok(&[ (r#" x # comment "#, &["x"]), (r#" w1#w2 "#, &["w1#w2"]), (r#"'not really a # comment'"#, &["not really a # comment"]), (" a # very long comment \n b # another comment", &["a", "b"]), ]); } #[test] fn test_quote() { assert_eq!(quote(""), "''"); assert_eq!(quote("'"), "''\\'''"); assert_eq!(quote("abc"), "abc"); assert_eq!(quote("a \n b"), "'a \n b'"); assert_eq!(quote("X'\nY"), "'X'\\''\nY'"); } #[test] fn test_join() { assert_eq!(join(&["a", "b", "c"]), "a b c"); assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'"); } #[test] fn join_followed_by_split_is_identity() { let cases: Vec<&[&str]> = vec![ &["a"], &["python", "-c", "print('Hello world!')"], &["echo", " arg with spaces ", "arg \' with \" quotes"], &["even newlines are quoted correctly\n", "\n", "\n\n\t "], &["$", "`test`"], &["cat", "~user/log*"], &["test", "'a \"b", "\"X'"], &["empty", "", "", ""], ]; for argv in cases { let args = join(argv); assert_eq!(split(&args).unwrap(), argv); } } }