1 use super::{c, fill_utf16_buf, to_u16s};
2 use crate::ffi::{OsStr, OsString};
3 use crate::io;
4 use crate::mem;
5 use crate::path::{Path, PathBuf, Prefix};
6 use crate::ptr;
7 
8 #[cfg(test)]
9 mod tests;
10 
11 pub const MAIN_SEP_STR: &str = "\\";
12 pub const MAIN_SEP: char = '\\';
13 
14 /// # Safety
15 ///
16 /// `bytes` must be a valid wtf8 encoded slice
17 #[inline]
18 unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr {
19     // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8,
20     // which is compatible with &[u8].
21     mem::transmute(bytes)
22 }
23 
24 #[inline]
25 pub fn is_sep_byte(b: u8) -> bool {
26     b == b'/' || b == b'\\'
27 }
28 
29 #[inline]
30 pub fn is_verbatim_sep(b: u8) -> bool {
31     b == b'\\'
32 }
33 
34 /// Returns true if `path` looks like a lone filename.
35 pub(crate) fn is_file_name(path: &OsStr) -> bool {
36     !path.bytes().iter().copied().any(is_sep_byte)
37 }
38 pub(crate) fn has_trailing_slash(path: &OsStr) -> bool {
39     let is_verbatim = path.bytes().starts_with(br"\\?\");
40     let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte };
41     if let Some(&c) = path.bytes().last() { is_separator(c) } else { false }
42 }
43 
44 /// Appends a suffix to a path.
45 ///
46 /// Can be used to append an extension without removing an existing extension.
47 pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
48     let mut path = OsString::from(path);
49     path.push(suffix);
50     path.into()
51 }
52 
53 pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
54     use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};
55 
56     if let Some(path) = strip_prefix(path, r"\\") {
57         // \\
58         if let Some(path) = strip_prefix(path, r"?\") {
59             // \\?\
60             if let Some(path) = strip_prefix(path, r"UNC\") {
61                 // \\?\UNC\server\share
62 
63                 let (server, path) = parse_next_component(path, true);
64                 let (share, _) = parse_next_component(path, true);
65 
66                 Some(VerbatimUNC(server, share))
67             } else {
68                 let (prefix, _) = parse_next_component(path, true);
69 
70                 // in verbatim paths only recognize an exact drive prefix
71                 if let Some(drive) = parse_drive_exact(prefix) {
72                     // \\?\C:
73                     Some(VerbatimDisk(drive))
74                 } else {
75                     // \\?\prefix
76                     Some(Verbatim(prefix))
77                 }
78             }
79         } else if let Some(path) = strip_prefix(path, r".\") {
80             // \\.\COM42
81             let (prefix, _) = parse_next_component(path, false);
82             Some(DeviceNS(prefix))
83         } else {
84             let (server, path) = parse_next_component(path, false);
85             let (share, _) = parse_next_component(path, false);
86 
87             if !server.is_empty() && !share.is_empty() {
88                 // \\server\share
89                 Some(UNC(server, share))
90             } else {
91                 // no valid prefix beginning with "\\" recognized
92                 None
93             }
94         }
95     } else if let Some(drive) = parse_drive(path) {
96         // C:
97         Some(Disk(drive))
98     } else {
99         // no prefix
100         None
101     }
102 }
103 
104 // Parses a drive prefix, e.g. "C:" and "C:\whatever"
105 fn parse_drive(prefix: &OsStr) -> Option<u8> {
106     // In most DOS systems, it is not possible to have more than 26 drive letters.
107     // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
108     fn is_valid_drive_letter(drive: &u8) -> bool {
109         drive.is_ascii_alphabetic()
110     }
111 
112     match prefix.bytes() {
113         [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
114         _ => None,
115     }
116 }
117 
118 // Parses a drive prefix exactly, e.g. "C:"
119 fn parse_drive_exact(prefix: &OsStr) -> Option<u8> {
120     // only parse two bytes: the drive letter and the drive separator
121     if prefix.len() == 2 { parse_drive(prefix) } else { None }
122 }
123 
124 fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> {
125     // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]`
126     // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice.
127     match path.bytes().strip_prefix(prefix.as_bytes()) {
128         Some(path) => unsafe { Some(bytes_as_os_str(path)) },
129         None => None,
130     }
131 }
132 
133 // Parse the next path component.
134 //
135 // Returns the next component and the rest of the path excluding the component and separator.
136 // Does not recognize `/` as a separator character if `verbatim` is true.
137 fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
138     let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
139 
140     match path.bytes().iter().position(|&x| separator(x)) {
141         Some(separator_start) => {
142             let mut separator_end = separator_start + 1;
143 
144             // a series of multiple separator characters is treated as a single separator,
145             // except in verbatim paths
146             while !verbatim && separator_end < path.len() && separator(path.bytes()[separator_end])
147             {
148                 separator_end += 1;
149             }
150 
151             let component = &path.bytes()[..separator_start];
152 
153             // Panic safe
154             // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
155             let path = &path.bytes()[separator_end..];
156 
157             // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
158             // is encoded in a single byte, therefore `bytes[separator_start]` and
159             // `bytes[separator_end]` must be code point boundaries and thus
160             // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
161             unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) }
162         }
163         None => (path, OsStr::new("")),
164     }
165 }
166 
167 /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
168 ///
169 /// This path may or may not have a verbatim prefix.
170 pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
171     // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL).
172     // However, for APIs such as CreateDirectory[1], the limit is 248.
173     //
174     // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters
175     const LEGACY_MAX_PATH: usize = 248;
176     // UTF-16 encoded code points, used in parsing and building UTF-16 paths.
177     // All of these are in the ASCII range so they can be cast directly to `u16`.
178     const SEP: u16 = b'\\' as _;
179     const ALT_SEP: u16 = b'/' as _;
180     const QUERY: u16 = b'?' as _;
181     const COLON: u16 = b':' as _;
182     const DOT: u16 = b'.' as _;
183     const U: u16 = b'U' as _;
184     const N: u16 = b'N' as _;
185     const C: u16 = b'C' as _;
186 
187     // \\?\
188     const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP];
189     // \??\
190     const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP];
191     // \\?\UNC\
192     const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP];
193 
194     let mut path = to_u16s(path)?;
195     if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == &[0] {
196         // Early return for paths that are already verbatim or empty.
197         return Ok(path);
198     } else if path.len() < LEGACY_MAX_PATH {
199         // Early return if an absolute path is less < 260 UTF-16 code units.
200         // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily.
201         match path.as_slice() {
202             // Starts with `D:`, `D:\`, `D:/`, etc.
203             // Does not match if the path starts with a `\` or `/`.
204             [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..]
205                 if *drive != SEP && *drive != ALT_SEP =>
206             {
207                 return Ok(path);
208             }
209             // Starts with `\\`, `//`, etc
210             [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path),
211             _ => {}
212         }
213     }
214 
215     // Firstly, get the absolute path using `GetFullPathNameW`.
216     // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
217     let lpfilename = path.as_ptr();
218     fill_utf16_buf(
219         // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
220         // `lpfilename` is a pointer to a null terminated string that is not
221         // invalidated until after `GetFullPathNameW` returns successfully.
222         |buffer, size| unsafe {
223             // While the docs for `GetFullPathNameW` have the standard note
224             // about needing a `\\?\` path for a long lpfilename, this does not
225             // appear to be true in practice.
226             // See:
227             // https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths
228             // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
229             c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut())
230         },
231         |mut absolute| {
232             path.clear();
233 
234             // Secondly, add the verbatim prefix. This is easier here because we know the
235             // path is now absolute and fully normalized (e.g. `/` has been changed to `\`).
236             let prefix = match absolute {
237                 // C:\ => \\?\C:\
238                 [_, COLON, SEP, ..] => VERBATIM_PREFIX,
239                 // \\.\ => \\?\
240                 [SEP, SEP, DOT, SEP, ..] => {
241                     absolute = &absolute[4..];
242                     VERBATIM_PREFIX
243                 }
244                 // Leave \\?\ and \??\ as-is.
245                 [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[],
246                 // \\ => \\?\UNC\
247                 [SEP, SEP, ..] => {
248                     absolute = &absolute[2..];
249                     UNC_PREFIX
250                 }
251                 // Anything else we leave alone.
252                 _ => &[],
253             };
254 
255             path.reserve_exact(prefix.len() + absolute.len() + 1);
256             path.extend_from_slice(prefix);
257             path.extend_from_slice(absolute);
258             path.push(0);
259         },
260     )?;
261     Ok(path)
262 }
263