1 // Copyright 2018 Tomasz Miąsko
2 //
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
4 // or the MIT license <LICENSE-MIT>, at your option.
5 //
6 //! Process command line according to parsing rules of Unix shell as specified in [Shell Command
7 //! Language in POSIX.1-2008][posix-shell].
8 //!
9 //! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
10
11 #![forbid(unsafe_code)]
12
13 use std::borrow::Cow;
14 use std::error;
15 use std::fmt;
16 use std::mem;
17 use std::result;
18
19 /// An error returned when shell parsing fails.
20 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
21 pub struct ParseError;
22
23 impl fmt::Display for ParseError {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result24 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
25 write!(f, "missing closing quote")
26 }
27 }
28
29 impl error::Error for ParseError {}
30
31 enum State {
32 /// Within a delimiter.
33 Delimiter,
34 /// After backslash, but before starting word.
35 Backslash,
36 /// Within an unquoted word.
37 Unquoted,
38 /// After backslash in an unquoted word.
39 UnquotedBackslash,
40 /// Within a single quoted word.
41 SingleQuoted,
42 /// Within a double quoted word.
43 DoubleQuoted,
44 /// After backslash inside a double quoted word.
45 DoubleQuotedBackslash,
46 /// Inside a comment.
47 Comment,
48 }
49
50 /// Splits command line into separate arguments, in much the same way Unix shell would, but without
51 /// many of expansion the shell would perform.
52 ///
53 /// The split functionality is compatible with behaviour of Unix shell, but with word expansions
54 /// limited to quote removal, and without special token recognition rules for operators.
55 ///
56 /// The result is exactly the same as one obtained from Unix shell as long as those unsupported
57 /// features are not present in input: no operators, no variable assignments, no tilde expansion,
58 /// no parameter expansion, no command substitution, no arithmetic expansion, no pathname
59 /// expansion.
60 ///
61 /// In case those unsupported shell features are present, the syntax that introduce them is
62 /// interpreted literally.
63 ///
64 /// # Errors
65 ///
66 /// When input contains unmatched quote, an error is returned.
67 ///
68 /// # Compatibility with other implementations
69 ///
70 /// It should be fully compatible with g_shell_parse_argv from GLib, except that in GLib
71 /// it is an error not to have any words after tokenization.
72 ///
73 /// It is also very close to shlex.split available in Python standard library, when used in POSIX
74 /// mode with support for comments. Though, shlex implementation diverges from POSIX, and from
75 /// implementation contained herein in three aspects. First, it doesn't support line continuations.
76 /// Second, inside double quotes, the backslash characters retains its special meaning as an escape
77 /// character only when followed by \\ or \", whereas POSIX specifies that it should retain its
78 /// special meaning when followed by: $, \`, \", \\, or a newline. Third, it treats carriage return
79 /// as one of delimiters.
80 ///
81 /// # Examples
82 ///
83 /// Building an executable using compiler obtained from CC environment variable
84 /// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build
85 /// rule for C used in GNU Make:
86 ///
87 /// ```rust,no_run
88 /// use std::env::var;
89 /// use std::process::Command;
90 ///
91 /// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned());
92 ///
93 /// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new());
94 /// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS");
95 ///
96 /// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new());
97 /// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS");
98 ///
99 /// Command::new(cc)
100 /// .args(cflags)
101 /// .args(cppflags)
102 /// .args(&["-c", "a.c", "-o", "a.out"])
103 /// .spawn()
104 /// .expect("failed to start subprocess")
105 /// .wait()
106 /// .expect("failed to wait for subprocess");
107 /// ```
split(s: &str) -> result::Result<Vec<String>, ParseError>108 pub fn split(s: &str) -> result::Result<Vec<String>, ParseError> {
109 use State::*;
110
111 let mut words = Vec::new();
112 let mut word = String::new();
113 let mut chars = s.chars();
114 let mut state = Delimiter;
115
116 loop {
117 let c = chars.next();
118 state = match state {
119 Delimiter => match c {
120 None => break,
121 Some('\'') => SingleQuoted,
122 Some('\"') => DoubleQuoted,
123 Some('\\') => Backslash,
124 Some('\t') | Some(' ') | Some('\n') => Delimiter,
125 Some('#') => Comment,
126 Some(c) => {
127 word.push(c);
128 Unquoted
129 }
130 },
131 Backslash => match c {
132 None => {
133 word.push('\\');
134 words.push(mem::replace(&mut word, String::new()));
135 break;
136 }
137 Some('\n') => Delimiter,
138 Some(c) => {
139 word.push(c);
140 Unquoted
141 }
142 },
143 Unquoted => match c {
144 None => {
145 words.push(mem::replace(&mut word, String::new()));
146 break;
147 }
148 Some('\'') => SingleQuoted,
149 Some('\"') => DoubleQuoted,
150 Some('\\') => UnquotedBackslash,
151 Some('\t') | Some(' ') | Some('\n') => {
152 words.push(mem::replace(&mut word, String::new()));
153 Delimiter
154 }
155 Some(c) => {
156 word.push(c);
157 Unquoted
158 }
159 },
160 UnquotedBackslash => match c {
161 None => {
162 word.push('\\');
163 words.push(mem::replace(&mut word, String::new()));
164 break;
165 }
166 Some('\n') => Unquoted,
167 Some(c) => {
168 word.push(c);
169 Unquoted
170 }
171 },
172 SingleQuoted => match c {
173 None => return Err(ParseError),
174 Some('\'') => Unquoted,
175 Some(c) => {
176 word.push(c);
177 SingleQuoted
178 }
179 },
180 DoubleQuoted => match c {
181 None => return Err(ParseError),
182 Some('\"') => Unquoted,
183 Some('\\') => DoubleQuotedBackslash,
184 Some(c) => {
185 word.push(c);
186 DoubleQuoted
187 }
188 },
189 DoubleQuotedBackslash => match c {
190 None => return Err(ParseError),
191 Some('\n') => DoubleQuoted,
192 Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
193 word.push(c);
194 DoubleQuoted
195 }
196 Some(c) => {
197 word.push('\\');
198 word.push(c);
199 DoubleQuoted
200 }
201 },
202 Comment => match c {
203 None => break,
204 Some('\n') => Delimiter,
205 Some(_) => Comment,
206 },
207 }
208 }
209
210 Ok(words)
211 }
212
213 enum EscapeStyle {
214 /// No escaping.
215 None,
216 /// Wrap in single quotes.
217 SingleQuoted,
218 /// Single quotes combined with backslash.
219 Mixed,
220 }
221
222 /// Determines escaping style to use.
escape_style(s: &str) -> EscapeStyle223 fn escape_style(s: &str) -> EscapeStyle {
224 if s.is_empty() {
225 return EscapeStyle::SingleQuoted;
226 }
227
228 let mut special = false;
229 let mut newline = false;
230 let mut single_quote = false;
231
232 for c in s.chars() {
233 match c {
234 '\n' => {
235 newline = true;
236 special = true;
237 }
238 '\'' => {
239 single_quote = true;
240 special = true;
241 }
242 '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
243 | '?' | '[' | '#' | '˜' | '=' | '%' => {
244 special = true;
245 }
246 _ => continue,
247 }
248 }
249
250 if !special {
251 EscapeStyle::None
252 } else if newline && !single_quote {
253 EscapeStyle::SingleQuoted
254 } else {
255 EscapeStyle::Mixed
256 }
257 }
258
259 /// Escapes special characters in a string, so that it will retain its literal meaning when used as
260 /// a part of command in Unix shell.
261 ///
262 /// It tries to avoid introducing any unnecessary quotes or escape characters, but specifics
263 /// regarding quoting style are left unspecified.
quote(s: &str) -> Cow<str>264 pub fn quote(s: &str) -> Cow<str> {
265 // We are going somewhat out of the way to provide
266 // minimal amount of quoting in typical cases.
267 match escape_style(s) {
268 EscapeStyle::None => s.into(),
269 EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
270 EscapeStyle::Mixed => {
271 let mut quoted = String::new();
272 quoted.push('\'');
273 for c in s.chars() {
274 if c == '\'' {
275 quoted.push_str("'\\''");
276 } else {
277 quoted.push(c);
278 }
279 }
280 quoted.push('\'');
281 quoted.into()
282 }
283 }
284 }
285
286 /// Joins arguments into a single command line suitable for execution in Unix shell.
287 ///
288 /// Each argument is quoted using [`quote`] to preserve its literal meaning when parsed by Unix
289 /// shell.
290 ///
291 /// Note: This function is essentially an inverse of [`split`].
292 ///
293 /// # Examples
294 ///
295 /// Logging executed commands in format that can be easily copied and pasted into an actual shell:
296 ///
297 /// ```rust,no_run
298 /// fn execute(args: &[&str]) {
299 /// use std::process::Command;
300 /// println!("Executing: {}", shell_words::join(args));
301 /// Command::new(&args[0])
302 /// .args(&args[1..])
303 /// .spawn()
304 /// .expect("failed to start subprocess")
305 /// .wait()
306 /// .expect("failed to wait for subprocess");
307 /// }
308 ///
309 /// execute(&["python", "-c", "print('Hello world!')"]);
310 /// ```
311 ///
312 /// [`quote`]: fn.quote.html
313 /// [`split`]: fn.split.html
join<I, S>(words: I) -> String where I: IntoIterator<Item = S>, S: AsRef<str>,314 pub fn join<I, S>(words: I) -> String
315 where
316 I: IntoIterator<Item = S>,
317 S: AsRef<str>,
318 {
319 let mut line = words.into_iter().fold(String::new(), |mut line, word| {
320 let quoted = quote(word.as_ref());
321 line.push_str(quoted.as_ref());
322 line.push(' ');
323 line
324 });
325 line.pop();
326 line
327 }
328
329 #[cfg(test)]
330 mod tests {
331 use super::*;
332
split_ok(cases: &[(&str, &[&str])])333 fn split_ok(cases: &[(&str, &[&str])]) {
334 for &(input, expected) in cases {
335 match split(input) {
336 Err(actual) => {
337 panic!(
338 "After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n",
339 input, expected, actual
340 );
341 }
342 Ok(actual) => {
343 assert!(
344 expected == actual.as_slice(),
345 "After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n",
346 input,
347 expected,
348 actual
349 );
350 }
351 }
352 }
353 }
354
355 #[test]
split_empty()356 fn split_empty() {
357 split_ok(&[("", &[])]);
358 }
359
360 #[test]
split_initial_whitespace_is_removed()361 fn split_initial_whitespace_is_removed() {
362 split_ok(&[
363 (" a", &["a"]),
364 ("\t\t\t\tbar", &["bar"]),
365 ("\t \nc", &["c"]),
366 ]);
367 }
368
369 #[test]
split_trailing_whitespace_is_removed()370 fn split_trailing_whitespace_is_removed() {
371 split_ok(&[
372 ("a ", &["a"]),
373 ("b\t", &["b"]),
374 ("c\t \n \n \n", &["c"]),
375 ("d\n\n", &["d"]),
376 ]);
377 }
378
379 #[test]
split_carriage_return_is_not_special()380 fn split_carriage_return_is_not_special() {
381 split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
382 }
383
384 #[test]
split_single_quotes()385 fn split_single_quotes() {
386 split_ok(&[
387 (r#"''"#, &[r#""#]),
388 (r#"'a'"#, &[r#"a"#]),
389 (r#"'\'"#, &[r#"\"#]),
390 (r#"' \ '"#, &[r#" \ "#]),
391 (r#"'#'"#, &[r#"#"#]),
392 ]);
393 }
394
395 #[test]
split_double_quotes()396 fn split_double_quotes() {
397 split_ok(&[
398 (r#""""#, &[""]),
399 (r#""""""#, &[""]),
400 (r#""a b c' d""#, &["a b c' d"]),
401 (r#""\a""#, &["\\a"]),
402 (r#""$""#, &["$"]),
403 (r#""\$""#, &["$"]),
404 (r#""`""#, &["`"]),
405 (r#""\`""#, &["`"]),
406 (r#""\"""#, &["\""]),
407 (r#""\\""#, &["\\"]),
408 ("\"\n\"", &["\n"]),
409 ("\"\\\n\"", &[""]),
410 ]);
411 }
412
413 #[test]
split_unquoted()414 fn split_unquoted() {
415 split_ok(&[
416 (r#"\|\&\;"#, &[r#"|&;"#]),
417 (r#"\<\>"#, &[r#"<>"#]),
418 (r#"\(\)"#, &[r#"()"#]),
419 (r#"\$"#, &[r#"$"#]),
420 (r#"\`"#, &[r#"`"#]),
421 (r#"\""#, &[r#"""#]),
422 (r#"\'"#, &[r#"'"#]),
423 ("\\\n", &[]),
424 (" \\\n \n", &[]),
425 ("a\nb\nc", &["a", "b", "c"]),
426 ("a\\\nb\\\nc", &["abc"]),
427 ("foo bar baz", &["foo", "bar", "baz"]),
428 (r#"\"#, &[r""]),
429 ]);
430 }
431
432 #[test]
split_trailing_backslash()433 fn split_trailing_backslash() {
434 split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
435 }
436
437 #[test]
438 fn split_errors() {
439 assert_eq!(split("'abc"), Err(ParseError));
440 assert_eq!(split("\""), Err(ParseError));
441 assert_eq!(split("'\\"), Err(ParseError));
442 assert_eq!(split("'\\"), Err(ParseError));
443 }
444
445 #[test]
446 fn split_comments() {
447 split_ok(&[
448 (r#" x # comment "#, &["x"]),
449 (r#" w1#w2 "#, &["w1#w2"]),
450 (r#"'not really a # comment'"#, &["not really a # comment"]),
451 (" a # very long comment \n b # another comment", &["a", "b"]),
452 ]);
453 }
454
455 #[test]
456 fn test_quote() {
457 assert_eq!(quote(""), "''");
458 assert_eq!(quote("'"), "''\\'''");
459 assert_eq!(quote("abc"), "abc");
460 assert_eq!(quote("a \n b"), "'a \n b'");
461 assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
462 }
463
464 #[test]
465 fn test_join() {
466 assert_eq!(join(&["a", "b", "c"]), "a b c");
467 assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
468 }
469
470 #[test]
471 fn join_followed_by_split_is_identity() {
472 let cases: Vec<&[&str]> = vec![
473 &["a"],
474 &["python", "-c", "print('Hello world!')"],
475 &["echo", " arg with spaces ", "arg \' with \" quotes"],
476 &["even newlines are quoted correctly\n", "\n", "\n\n\t "],
477 &["$", "`test`"],
478 &["cat", "~user/log*"],
479 &["test", "'a \"b", "\"X'"],
480 &["empty", "", "", ""],
481 ];
482 for argv in cases {
483 let args = join(argv);
484 assert_eq!(split(&args).unwrap(), argv);
485 }
486 }
487 }
488