1 //! Filesystem paths in Windows are a total mess. This crate normalizes paths to the most
2 //! compatible (but still correct) format, so that you don't have to worry about the mess.
3 //!
4 //! In Windows the regular/legacy paths (`C:\foo`) are supported by all programs, but have
5 //! lots of bizarre restrictions for backwards compatibility with MS-DOS.
6 //!
7 //! And there are Windows NT UNC paths (`\\?\C:\foo`), which are more robust and with fewer
8 //! gotchas, but are rarely supported by Windows programs. Even Microsoft's own!
9 //!
10 //! This crate converts paths to legacy format whenever possible, but leaves UNC paths as-is
11 //! when they can't be unambiguously expressed in a simpler way. This allows legacy programs
12 //! to access all paths they can possibly access, and UNC-aware programs to access all paths.
13 //!
14 //! On non-Windows platforms these functions leave paths unmodified, so it's safe to use them
15 //! unconditionally for all platforms.
16 //!
17 //! Parsing is based on https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
18 //!
19 //! [Project homepage](https://crates.rs/crates/dunce).
20 #![doc(html_logo_url = "https://assets.gitlab-static.net/uploads/-/system/project/avatar/4717715/dyc.png")]
21 
22 #[cfg(any(windows, test))]
23 use std::ffi::OsStr;
24 use std::fs;
25 use std::io;
26 #[cfg(windows)]
27 use std::os::windows::ffi::OsStrExt;
28 #[cfg(windows)]
29 use std::path::{Component, Prefix};
30 use std::path::{Path, PathBuf};
31 
32 /// Takes any path, and when possible, converts Windows UNC paths to regular paths.
33 ///
34 /// On non-Windows this is no-op.
35 ///
36 /// `\\?\C:\Windows` will be converted to `C:\Windows`,
37 /// but `\\?\C:\COM` will be left as-is (due to a reserved filename).
38 ///
39 /// Use this to pass arbitrary paths to programs that may not be UNC-aware.
40 /// It's generally safe to pass UNC paths to legacy programs, because
41 /// the paths contain a reserved character, so will gracefully fail
42 /// if used with wrong APIs.
43 ///
44 /// This function does not perform any I/O.
45 ///
46 /// Currently paths with unpaired surrogates aren't converted even if they
47 /// can be due to limitations of Rust's `OsStr` API.
simplified(path: &Path) -> &Path48 pub fn simplified(path: &Path) -> &Path {
49     if is_safe_to_strip_unc(path) {
50         // unfortunately we can't safely strip prefix from a non-Unicode path
51         path.to_str().and_then(|s| s.get(4..)).map(Path::new).unwrap_or(path)
52     } else {
53         path
54     }
55 }
56 
57 /// Like `std::fs::canonicalize()`, but on Windows it outputs the most
58 /// compatible form of a path instead of UNC.
59 #[cfg(windows)]
canonicalize<P: AsRef<Path>>(path: P) -> io::Result<PathBuf>60 pub fn canonicalize<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
61     let real_path = fs::canonicalize(path)?;
62     Ok(if is_safe_to_strip_unc(&real_path) {
63         real_path.to_str().and_then(|s| s.get(4..)).map(PathBuf::from).unwrap_or(real_path)
64     } else {
65         real_path
66     })
67 }
68 
69 /// Like `std::fs::canonicalize()`, but on Windows it outputs the most
70 /// compatible form of a path instead of UNC.
71 #[cfg(not(windows))]
72 #[inline]
canonicalize<P: AsRef<Path>>(path: P) -> io::Result<PathBuf>73 pub fn canonicalize<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
74     fs::canonicalize(path)
75 }
76 
77 pub use self::canonicalize as realpath;
78 
79 #[cfg(any(windows,test))]
windows_char_len(s: &OsStr) -> usize80 fn windows_char_len(s: &OsStr) -> usize {
81     #[cfg(unix)]
82     let len = s.to_string_lossy().chars().map(|c| if c as u32 <= 0xFFFF {1} else {2}).sum();
83     #[cfg(windows)]
84     let len = s.encode_wide().count();
85     len
86 }
87 
88 #[cfg(any(windows,test))]
is_valid_filename<P: AsRef<OsStr>>(file_name: P) -> bool89 fn is_valid_filename<P: AsRef<OsStr>>(file_name: P) -> bool {
90     let file_name = file_name.as_ref();
91     if windows_char_len(file_name) > 255 {
92         return false;
93     }
94 
95     // Non-unicode is safe, but Rust can't reasonably losslessly operate on such strings
96     let file_name = if let Some(s) = file_name.to_str() {
97         s
98     } else {
99         return false;
100     };
101     if file_name.is_empty() {
102         return false;
103     }
104     // Only ASCII subset is checked, and UTF-8 is safe for that
105     let byte_str = file_name.as_bytes();
106     for &c in byte_str {
107         match c {
108             0..=31 |
109             b'<' | b'>' | b':' | b'"' |
110             b'/' | b'\\' | b'|' | b'?' | b'*' => return false,
111             _ => {},
112         }
113     }
114 
115     // Filename can't end with . or space (except before extension, but this checks the whole name)
116     let last_char = byte_str[byte_str.len()-1];
117     if last_char == b' ' || last_char == b'.' {
118         return false;
119     }
120     true
121 }
122 
123 #[cfg(any(windows, test))]
124 const RESERVED_NAMES: [&'static str; 22] = [
125     "AUX", "NUL", "PRN", "CON", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
126     "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
127 ];
128 
129 #[cfg(any(windows, test))]
is_reserved<P: AsRef<OsStr>>(file_name: P) -> bool130 fn is_reserved<P: AsRef<OsStr>>(file_name: P) -> bool {
131     // con.txt is reserved too
132     if let Some(stem) = Path::new(&file_name).file_stem() {
133         // all reserved DOS names have ASCII-compatible stem
134         if let Some(name) = stem.to_str() {
135             // "con.. .txt" is "CON" for DOS
136             let trimmed = right_trim(name);
137             if trimmed.len() <= 4 {
138                 for name in &RESERVED_NAMES {
139                     if name.eq_ignore_ascii_case(trimmed) {
140                         return true;
141                     }
142                 }
143             }
144         }
145     }
146     false
147 }
148 
149 #[cfg(not(windows))]
is_safe_to_strip_unc(_path: &Path) -> bool150 fn is_safe_to_strip_unc(_path: &Path) -> bool {
151     false
152 }
153 
154 #[cfg(windows)]
is_safe_to_strip_unc(path: &Path) -> bool155 fn is_safe_to_strip_unc(path: &Path) -> bool {
156     let mut components = path.components();
157     match components.next() {
158         Some(Component::Prefix(p)) => match p.kind() {
159             Prefix::VerbatimDisk(..) => {},
160             _ => return false, // Other kinds of UNC paths
161         },
162         _ => return false, // relative or empty
163     }
164 
165     for component in components {
166         match component {
167             Component::RootDir => {},
168             Component::Normal(file_name) => {
169                 // it doesn't allocate in most cases,
170                 // and checks are interested only in the ASCII subset, so lossy is fine
171                 if !is_valid_filename(file_name) || is_reserved(file_name) {
172                     return false;
173                 }
174             }
175             _ => return false, // UNC paths take things like ".." literally
176         };
177     }
178 
179     if windows_char_len(path.as_os_str()) > 260 { // However, if the path is going to be used as a directory it's 248
180         return false;
181     }
182     true
183 }
184 
185 /// Trim '.' and ' '
186 #[cfg(any(windows, test))]
right_trim(mut s: &str) -> &str187 fn right_trim(mut s: &str) -> &str {
188     while s.len() > 0 {
189         let last = s.len()-1;
190         unsafe {
191             if s.as_bytes()[last] == b'.' || s.as_bytes()[last] == b' ' {
192                 s = s.get_unchecked(0..last) // trim of ASCII byte can't break UTF-8
193             } else {
194                 break;
195             }
196         }
197     }
198     s
199 }
200 
201 #[test]
trim_test()202 fn trim_test() {
203     assert_eq!("a", right_trim("a."));
204     assert_eq!("ą", right_trim("ą."));
205     assert_eq!("a", right_trim("a "));
206     assert_eq!("ąą", right_trim("ąą "));
207     assert_eq!("a", right_trim("a. . . ....   "));
208     assert_eq!("a. . . ..ź", right_trim("a. . . ..ź..   "));
209     assert_eq!(" b", right_trim(" b"));
210     assert_eq!(" べ", right_trim(" べ"));
211     assert_eq!("c. c", right_trim("c. c."));
212     assert_eq!("。", right_trim("。"));
213     assert_eq!("", right_trim(""));
214 }
215 
216 #[test]
reserved()217 fn reserved() {
218     assert!(is_reserved("CON"));
219     assert!(is_reserved("con"));
220     assert!(is_reserved("con.con"));
221     assert!(is_reserved("COM4"));
222     assert!(is_reserved("COM4.txt"));
223     assert!(is_reserved("COM4 .txt"));
224     assert!(is_reserved("con."));
225     assert!(is_reserved("con ."));
226     assert!(is_reserved("con  "));
227     assert!(is_reserved("con . "));
228     assert!(is_reserved("con . .txt"));
229     assert!(is_reserved("con.....txt"));
230     assert!(is_reserved("PrN....."));
231 
232     assert!(!is_reserved(" PrN....."));
233     assert!(!is_reserved(" CON"));
234     assert!(!is_reserved("COM0"));
235     assert!(!is_reserved("COM77"));
236     assert!(!is_reserved(" CON "));
237     assert!(!is_reserved(".CON"));
238     assert!(!is_reserved("@CON"));
239     assert!(!is_reserved("not.CON"));
240     assert!(!is_reserved("CON。"));
241 }
242 
243 #[test]
len()244 fn len() {
245     assert_eq!(1, windows_char_len(OsStr::new("a")));
246     assert_eq!(1, windows_char_len(OsStr::new("€")));
247     assert_eq!(1, windows_char_len(OsStr::new("本")));
248     assert_eq!(2, windows_char_len(OsStr::new("��")));
249     assert_eq!(2, windows_char_len(OsStr::new("®®")));
250 }
251 
252 #[test]
valid()253 fn valid() {
254     assert!(!is_valid_filename(".."));
255     assert!(!is_valid_filename("."));
256     assert!(!is_valid_filename("aaaaaaaaaa:"));
257     assert!(!is_valid_filename("ą:ą"));
258     assert!(!is_valid_filename(""));
259     assert!(!is_valid_filename("a "));
260     assert!(!is_valid_filename(" a. "));
261     assert!(!is_valid_filename("a/"));
262     assert!(!is_valid_filename("/a"));
263     assert!(!is_valid_filename("/"));
264     assert!(!is_valid_filename("\\"));
265     assert!(!is_valid_filename("\\a"));
266     assert!(!is_valid_filename("<x>"));
267     assert!(!is_valid_filename("a*"));
268     assert!(!is_valid_filename("?x"));
269     assert!(!is_valid_filename("a\0a"));
270     assert!(!is_valid_filename("\x1f"));
271     assert!(!is_valid_filename(::std::iter::repeat("a").take(257).collect::<String>()));
272 
273     assert!(is_valid_filename(::std::iter::repeat("®").take(254).collect::<String>()));
274     assert!(is_valid_filename("ファイル"));
275     assert!(is_valid_filename("a"));
276     assert!(is_valid_filename("a.aaaaaaaa"));
277     assert!(is_valid_filename("a........a"));
278     assert!(is_valid_filename("       b"));
279 }
280 
281 #[test]
282 #[cfg(windows)]
283 fn realpath_test() {
284     assert_eq!(r"C:\WINDOWS", canonicalize(r"C:\Windows").unwrap().to_str().unwrap().to_uppercase());
285     assert_ne!(r".", canonicalize(r".").unwrap().to_str().unwrap());
286 }
287 
288 #[test]
289 #[cfg(windows)]
290 fn strip() {
291     assert_eq!(Path::new(r"C:\foo\��"), simplified(Path::new(r"\\?\C:\foo\��")));
292     assert_eq!(Path::new(r"\\?\serv\"), simplified(Path::new(r"\\?\serv\")));
293     assert_eq!(Path::new(r"\\.\C:\notdisk"), simplified(Path::new(r"\\.\C:\notdisk")));
294 }
295 
296 #[test]
297 #[cfg(windows)]
298 fn safe() {
299     assert!(is_safe_to_strip_unc(Path::new(r"\\?\C:\foo\bar")));
300     assert!(is_safe_to_strip_unc(Path::new(r"\\?\Z:\foo\bar\")));
301     assert!(is_safe_to_strip_unc(Path::new(r"\\?\Z:\��\��\")));
302     assert!(is_safe_to_strip_unc(Path::new(r"\\?\c:\foo")));
303 
304     let long = ::std::iter::repeat("®").take(160).collect::<String>();
305     assert!(is_safe_to_strip_unc(Path::new(&format!(r"\\?\c:\{}", long))));
306     assert!(!is_safe_to_strip_unc(Path::new(&format!(r"\\?\c:\{}\{}", long, long))));
307 
308     assert!(!is_safe_to_strip_unc(Path::new(r"\\?\C:\foo\.\bar")));
309     assert!(!is_safe_to_strip_unc(Path::new(r"\\?\C:\foo\..\bar")));
310     assert!(!is_safe_to_strip_unc(Path::new(r"\\?\c\foo")));
311     assert!(!is_safe_to_strip_unc(Path::new(r"\\?\c\foo/bar")));
312     assert!(!is_safe_to_strip_unc(Path::new(r"\\?\c:foo")));
313     assert!(!is_safe_to_strip_unc(Path::new(r"\\?\cc:foo")));
314     assert!(!is_safe_to_strip_unc(Path::new(r"\\?\c:foo\bar")));
315 }
316