1 /*** 2 *splitpath.c - break down path name into components 3 * 4 * Copyright (c) Microsoft Corporation. All rights reserved. 5 * 6 *Purpose: 7 * To provide support for accessing the individual components of an 8 * arbitrary path name 9 * 10 *******************************************************************************/ 11 #include <corecrt_internal.h> 12 #include <mbctype.h> 13 #include <stdlib.h> 14 #include <corecrt_internal_securecrt.h> 15 #include <corecrt_internal_traits.h> 16 17 namespace 18 { 19 template <typename Character> 20 struct component_buffers 21 { 22 _Null_terminated_ _Maybenull_ 23 Character* _drive; 24 size_t _drive_count; 25 _Null_terminated_ _Maybenull_ 26 Character* _directory; 27 size_t _directory_count; 28 _Null_terminated_ _Maybenull_ 29 Character* _file_name; 30 size_t _file_name_count; 31 _Null_terminated_ _Maybenull_ 32 Character* _extension; 33 size_t _extension_count; 34 }; 35 } 36 37 template <typename Character, typename ResetPolicy> 38 static void __cdecl reset_buffers( 39 component_buffers<Character>* const components, 40 ResetPolicy const reset_buffer 41 ) throw() 42 { 43 reset_buffer(components->_drive, components->_drive_count ); 44 reset_buffer(components->_directory, components->_directory_count); 45 reset_buffer(components->_file_name, components->_file_name_count); 46 reset_buffer(components->_extension, components->_extension_count); 47 } 48 49 // is_lead_byte helper 50 // these functions are only used to ensure that trailing bytes that might 51 // look like slashes or periods aren't misdetected. 52 // UTF-8/UTF-16 don't have that problem as trail bytes never look like \ or . 53 static bool __cdecl needs_trail_byte(char const c) throw() 54 { 55 // UTF-8 is OK here as the caller is really only concerned about trail 56 // bytes that look like . or \ and UTF-8 trail bytes never will. 57 return _ismbblead(c) != 0; 58 } 59 60 static bool __cdecl needs_trail_byte(wchar_t) throw() 61 { 62 // UTF-16 is OK here as the caller is really only concerned about trail 63 // characters that look like . or \ and UTF-16 surrogate pairs never will. 64 return false; 65 } 66 67 template <typename Character, typename ResetPolicy, typename BufferCountTransformer> 68 static errno_t __cdecl common_splitpath_internal( 69 Character const* const path, 70 component_buffers<Character>* const components, 71 ResetPolicy const reset_buffer, 72 BufferCountTransformer const transform_buffer_count 73 ) throw() 74 { 75 using traits = __crt_char_traits<Character>; 76 77 if (!path || !components) 78 { 79 reset_buffers(components, reset_buffer); 80 _VALIDATE_RETURN_ERRCODE(false, EINVAL); 81 } 82 83 if ((components->_drive == nullptr) != (components->_drive_count == 0) || 84 (components->_directory == nullptr) != (components->_directory_count == 0) || 85 (components->_file_name == nullptr) != (components->_file_name_count == 0) || 86 (components->_extension == nullptr) != (components->_extension_count == 0)) 87 { 88 reset_buffers(components, reset_buffer); 89 _VALIDATE_RETURN_ERRCODE(false, EINVAL); 90 } 91 92 Character const* path_it = path; 93 94 // Extract drive letter and ':', if any: 95 { 96 size_t skip = _MAX_DRIVE - 2; 97 Character const* p = path_it; 98 while (skip > 0 && *p != '\0') 99 { 100 --skip; 101 ++p; 102 } 103 104 if (*p == ':') 105 { 106 if (components->_drive) 107 { 108 if (components->_drive_count < _MAX_DRIVE) 109 { 110 reset_buffers(components, reset_buffer); 111 return errno = ERANGE; 112 } 113 114 traits::tcsncpy_s(components->_drive, transform_buffer_count(components->_drive_count), path_it, _MAX_DRIVE - 1); 115 } 116 117 path_it = p + 1; 118 } 119 else 120 { 121 reset_buffer(components->_drive, components->_drive_count); 122 } 123 } 124 125 // Extract the path string, if any. The path iterator now points to the first 126 // character of the path, if there is one, or to the filename or extension if 127 // no path was specified. Scan ahead for the last occurence, if any, of a '/' 128 // or '\' path separator character. If none is found, there is no path. We 129 // will also note the last '.' character found, if any, to aid in handling the 130 // extension. 131 Character const* p = path_it; 132 Character const* last_slash = nullptr; 133 Character const* last_dot = nullptr; 134 for (; *p != '\0'; ++p) 135 { 136 // UTF-8 will never look like slashes or periods so this will be OK for UTF-8 137 if (needs_trail_byte(*p)) 138 { 139 // For narrow character strings, skip any multibyte characters to avoid 140 // matching trail bytes that "look like" slashes or periods. This ++p 141 // will skip the lead byte; the ++p in the for loop will skip the trail 142 // byte. 143 ++p; 144 145 // If we've reached the end of the string, there is no trail byte. 146 // (Technically, the string is malformed.) 147 if (*p == '\0') 148 { 149 break; 150 } 151 } 152 else if (*p == '/' || *p == '\\') 153 { 154 last_slash = p + 1; // Point one past for later copy 155 } 156 else if (*p == '.') 157 { 158 last_dot = p; 159 } 160 } 161 162 if (last_slash) 163 { 164 if (components->_directory) 165 { 166 size_t const length = static_cast<size_t>(last_slash - path_it); 167 if (components->_directory_count <= length) 168 { 169 reset_buffers(components, reset_buffer); 170 return errno = ERANGE; 171 } 172 173 traits::tcsncpy_s(components->_directory, transform_buffer_count(components->_directory_count), path_it, length); 174 } 175 176 path_it = last_slash; 177 } 178 else 179 { 180 reset_buffer(components->_directory, components->_directory_count); 181 } 182 183 // Extract the file name and extension, if any. The path iterator now points 184 // to the first character of the file name, if any, or the extension if no 185 // file name was given. The dot points to the '.' beginning the extension, 186 // if any. 187 if (last_dot && last_dot >= path_it) 188 { 189 // We found a dot; it separates the file name from the extension: 190 if (components->_file_name) 191 { 192 size_t const length = static_cast<size_t>(last_dot - path_it); 193 if (components->_file_name_count <= length) 194 { 195 reset_buffers(components, reset_buffer); 196 return errno = ERANGE; 197 } 198 199 traits::tcsncpy_s(components->_file_name, transform_buffer_count(components->_file_name_count), path_it, length); 200 } 201 202 if (components->_extension) 203 { 204 size_t const length = static_cast<size_t>(p - last_dot); 205 if (components->_extension_count <= length) 206 { 207 reset_buffers(components, reset_buffer); 208 return errno = ERANGE; 209 } 210 211 traits::tcsncpy_s(components->_extension, transform_buffer_count(components->_extension_count), last_dot, length); 212 } 213 } 214 else 215 { 216 // No extension found; reset the extension and treat the remaining text 217 // as the file name: 218 if (components->_file_name) 219 { 220 size_t const length = static_cast<size_t>(p - path_it); 221 if (components->_file_name_count <= length) 222 { 223 reset_buffers(components, reset_buffer); 224 return errno = ERANGE; 225 } 226 227 traits::tcsncpy_s(components->_file_name, transform_buffer_count(components->_file_name_count), path_it, length); 228 } 229 230 if (components->_extension) 231 { 232 reset_buffer(components->_extension, components->_extension_count); 233 } 234 } 235 236 return 0; 237 } 238 239 template <typename Character> 240 _Success_(return == 0) 241 static errno_t __cdecl common_splitpath_s( 242 Character const* const path, 243 component_buffers<Character>* const components 244 ) throw() 245 { 246 return common_splitpath_internal(path, components, [](_Out_writes_z_(buffer_count) Character* const buffer, size_t const buffer_count) 247 { 248 UNREFERENCED_PARAMETER(buffer); 249 UNREFERENCED_PARAMETER(buffer_count); 250 if (buffer) 251 { 252 _RESET_STRING(buffer, buffer_count); 253 } 254 }, 255 [](size_t const n) { return n; }); 256 } 257 258 259 extern "C" errno_t __cdecl _splitpath_s( 260 char const* const path, 261 char* const drive, 262 size_t const drive_count, 263 char* const directory, 264 size_t const directory_count, 265 char* const file_name, 266 size_t const file_name_count, 267 char* const extension, 268 size_t const extension_count 269 ) 270 { 271 component_buffers<char> components = 272 { 273 drive, drive_count, 274 directory, directory_count, 275 file_name, file_name_count, 276 extension, extension_count 277 }; 278 279 return common_splitpath_s(path, &components); 280 } 281 282 extern "C" errno_t __cdecl _wsplitpath_s( 283 wchar_t const* const path, 284 wchar_t* const drive, 285 size_t const drive_count, 286 wchar_t* const directory, 287 size_t const directory_count, 288 wchar_t* const file_name, 289 size_t const file_name_count, 290 wchar_t* const extension, 291 size_t const extension_count 292 ) 293 { 294 component_buffers<wchar_t> components = 295 { 296 drive, drive_count, 297 directory, directory_count, 298 file_name, file_name_count, 299 extension, extension_count 300 }; 301 302 return common_splitpath_s(path, &components); 303 } 304 305 template <typename Character> 306 static void __cdecl common_splitpath( 307 _In_z_ Character const* const path, 308 _Pre_maybenull_ _Post_z_ Character* const drive, 309 _Pre_maybenull_ _Post_z_ Character* const directory, 310 _Pre_maybenull_ _Post_z_ Character* const file_name, 311 _Pre_maybenull_ _Post_z_ Character* const extension 312 ) throw() 313 { 314 component_buffers<Character> components = 315 { 316 drive, drive ? _MAX_DRIVE : 0, 317 directory, directory ? _MAX_DIR : 0, 318 file_name, file_name ? _MAX_FNAME : 0, 319 extension, extension ? _MAX_EXT : 0 320 }; 321 322 common_splitpath_internal(path, &components, [](Character* const buffer, size_t const buffer_count) 323 { 324 if (buffer && buffer_count != 0) 325 { 326 buffer[0] = '\0'; 327 } 328 }, 329 [](size_t){ return static_cast<size_t>(-1); }); 330 } 331 332 extern "C" void __cdecl _splitpath( 333 char const* const path, 334 char* const drive, 335 char* const directory, 336 char* const file_name, 337 char* const extension 338 ) 339 { 340 return common_splitpath(path, drive, directory, file_name, extension); 341 } 342 343 extern "C" void __cdecl _wsplitpath( 344 wchar_t const* const path, 345 wchar_t* const drive, 346 wchar_t* const directory, 347 wchar_t* const file_name, 348 wchar_t* const extension 349 ) 350 { 351 return common_splitpath(path, drive, directory, file_name, extension); 352 } 353