1 use super::{c, fill_utf16_buf, to_u16s}; 2 use crate::ffi::{OsStr, OsString}; 3 use crate::io; 4 use crate::mem; 5 use crate::path::{Path, PathBuf, Prefix}; 6 use crate::ptr; 7 8 #[cfg(test)] 9 mod tests; 10 11 pub const MAIN_SEP_STR: &str = "\\"; 12 pub const MAIN_SEP: char = '\\'; 13 14 /// # Safety 15 /// 16 /// `bytes` must be a valid wtf8 encoded slice 17 #[inline] 18 unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr { 19 // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8, 20 // which is compatible with &[u8]. 21 mem::transmute(bytes) 22 } 23 24 #[inline] 25 pub fn is_sep_byte(b: u8) -> bool { 26 b == b'/' || b == b'\\' 27 } 28 29 #[inline] 30 pub fn is_verbatim_sep(b: u8) -> bool { 31 b == b'\\' 32 } 33 34 /// Returns true if `path` looks like a lone filename. 35 pub(crate) fn is_file_name(path: &OsStr) -> bool { 36 !path.bytes().iter().copied().any(is_sep_byte) 37 } 38 pub(crate) fn has_trailing_slash(path: &OsStr) -> bool { 39 let is_verbatim = path.bytes().starts_with(br"\\?\"); 40 let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte }; 41 if let Some(&c) = path.bytes().last() { is_separator(c) } else { false } 42 } 43 44 /// Appends a suffix to a path. 45 /// 46 /// Can be used to append an extension without removing an existing extension. 47 pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf { 48 let mut path = OsString::from(path); 49 path.push(suffix); 50 path.into() 51 } 52 53 pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { 54 use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC}; 55 56 if let Some(path) = strip_prefix(path, r"\\") { 57 // \\ 58 if let Some(path) = strip_prefix(path, r"?\") { 59 // \\?\ 60 if let Some(path) = strip_prefix(path, r"UNC\") { 61 // \\?\UNC\server\share 62 63 let (server, path) = parse_next_component(path, true); 64 let (share, _) = parse_next_component(path, true); 65 66 Some(VerbatimUNC(server, share)) 67 } else { 68 let (prefix, _) = parse_next_component(path, true); 69 70 // in verbatim paths only recognize an exact drive prefix 71 if let Some(drive) = parse_drive_exact(prefix) { 72 // \\?\C: 73 Some(VerbatimDisk(drive)) 74 } else { 75 // \\?\prefix 76 Some(Verbatim(prefix)) 77 } 78 } 79 } else if let Some(path) = strip_prefix(path, r".\") { 80 // \\.\COM42 81 let (prefix, _) = parse_next_component(path, false); 82 Some(DeviceNS(prefix)) 83 } else { 84 let (server, path) = parse_next_component(path, false); 85 let (share, _) = parse_next_component(path, false); 86 87 if !server.is_empty() && !share.is_empty() { 88 // \\server\share 89 Some(UNC(server, share)) 90 } else { 91 // no valid prefix beginning with "\\" recognized 92 None 93 } 94 } 95 } else if let Some(drive) = parse_drive(path) { 96 // C: 97 Some(Disk(drive)) 98 } else { 99 // no prefix 100 None 101 } 102 } 103 104 // Parses a drive prefix, e.g. "C:" and "C:\whatever" 105 fn parse_drive(prefix: &OsStr) -> Option<u8> { 106 // In most DOS systems, it is not possible to have more than 26 drive letters. 107 // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. 108 fn is_valid_drive_letter(drive: &u8) -> bool { 109 drive.is_ascii_alphabetic() 110 } 111 112 match prefix.bytes() { 113 [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), 114 _ => None, 115 } 116 } 117 118 // Parses a drive prefix exactly, e.g. "C:" 119 fn parse_drive_exact(prefix: &OsStr) -> Option<u8> { 120 // only parse two bytes: the drive letter and the drive separator 121 if prefix.len() == 2 { parse_drive(prefix) } else { None } 122 } 123 124 fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> { 125 // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]` 126 // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice. 127 match path.bytes().strip_prefix(prefix.as_bytes()) { 128 Some(path) => unsafe { Some(bytes_as_os_str(path)) }, 129 None => None, 130 } 131 } 132 133 // Parse the next path component. 134 // 135 // Returns the next component and the rest of the path excluding the component and separator. 136 // Does not recognize `/` as a separator character if `verbatim` is true. 137 fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { 138 let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; 139 140 match path.bytes().iter().position(|&x| separator(x)) { 141 Some(separator_start) => { 142 let mut separator_end = separator_start + 1; 143 144 // a series of multiple separator characters is treated as a single separator, 145 // except in verbatim paths 146 while !verbatim && separator_end < path.len() && separator(path.bytes()[separator_end]) 147 { 148 separator_end += 1; 149 } 150 151 let component = &path.bytes()[..separator_start]; 152 153 // Panic safe 154 // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. 155 let path = &path.bytes()[separator_end..]; 156 157 // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') 158 // is encoded in a single byte, therefore `bytes[separator_start]` and 159 // `bytes[separator_end]` must be code point boundaries and thus 160 // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. 161 unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) } 162 } 163 None => (path, OsStr::new("")), 164 } 165 } 166 167 /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits. 168 /// 169 /// This path may or may not have a verbatim prefix. 170 pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> { 171 // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL). 172 // However, for APIs such as CreateDirectory[1], the limit is 248. 173 // 174 // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters 175 const LEGACY_MAX_PATH: usize = 248; 176 // UTF-16 encoded code points, used in parsing and building UTF-16 paths. 177 // All of these are in the ASCII range so they can be cast directly to `u16`. 178 const SEP: u16 = b'\\' as _; 179 const ALT_SEP: u16 = b'/' as _; 180 const QUERY: u16 = b'?' as _; 181 const COLON: u16 = b':' as _; 182 const DOT: u16 = b'.' as _; 183 const U: u16 = b'U' as _; 184 const N: u16 = b'N' as _; 185 const C: u16 = b'C' as _; 186 187 // \\?\ 188 const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP]; 189 // \??\ 190 const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP]; 191 // \\?\UNC\ 192 const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP]; 193 194 let mut path = to_u16s(path)?; 195 if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == &[0] { 196 // Early return for paths that are already verbatim or empty. 197 return Ok(path); 198 } else if path.len() < LEGACY_MAX_PATH { 199 // Early return if an absolute path is less < 260 UTF-16 code units. 200 // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily. 201 match path.as_slice() { 202 // Starts with `D:`, `D:\`, `D:/`, etc. 203 // Does not match if the path starts with a `\` or `/`. 204 [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..] 205 if *drive != SEP && *drive != ALT_SEP => 206 { 207 return Ok(path); 208 } 209 // Starts with `\\`, `//`, etc 210 [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path), 211 _ => {} 212 } 213 } 214 215 // Firstly, get the absolute path using `GetFullPathNameW`. 216 // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew 217 let lpfilename = path.as_ptr(); 218 fill_utf16_buf( 219 // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid. 220 // `lpfilename` is a pointer to a null terminated string that is not 221 // invalidated until after `GetFullPathNameW` returns successfully. 222 |buffer, size| unsafe { 223 // While the docs for `GetFullPathNameW` have the standard note 224 // about needing a `\\?\` path for a long lpfilename, this does not 225 // appear to be true in practice. 226 // See: 227 // https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths 228 // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html 229 c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) 230 }, 231 |mut absolute| { 232 path.clear(); 233 234 // Secondly, add the verbatim prefix. This is easier here because we know the 235 // path is now absolute and fully normalized (e.g. `/` has been changed to `\`). 236 let prefix = match absolute { 237 // C:\ => \\?\C:\ 238 [_, COLON, SEP, ..] => VERBATIM_PREFIX, 239 // \\.\ => \\?\ 240 [SEP, SEP, DOT, SEP, ..] => { 241 absolute = &absolute[4..]; 242 VERBATIM_PREFIX 243 } 244 // Leave \\?\ and \??\ as-is. 245 [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[], 246 // \\ => \\?\UNC\ 247 [SEP, SEP, ..] => { 248 absolute = &absolute[2..]; 249 UNC_PREFIX 250 } 251 // Anything else we leave alone. 252 _ => &[], 253 }; 254 255 path.reserve_exact(prefix.len() + absolute.len() + 1); 256 path.extend_from_slice(prefix); 257 path.extend_from_slice(absolute); 258 path.push(0); 259 }, 260 )?; 261 Ok(path) 262 } 263