1 /***
2 *splitpath.c - break down path name into components
3 *
4 * Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 * To provide support for accessing the individual components of an
8 * arbitrary path name
9 *
10 *******************************************************************************/
11 #include <corecrt_internal.h>
12 #include <mbctype.h>
13 #include <stdlib.h>
14 #include <corecrt_internal_securecrt.h>
15 #include <corecrt_internal_traits.h>
16
17 namespace
18 {
19 template <typename Character>
20 struct component_buffers
21 {
22 _Null_terminated_ _Maybenull_
23 Character* _drive;
24 size_t _drive_count;
25 _Null_terminated_ _Maybenull_
26 Character* _directory;
27 size_t _directory_count;
28 _Null_terminated_ _Maybenull_
29 Character* _file_name;
30 size_t _file_name_count;
31 _Null_terminated_ _Maybenull_
32 Character* _extension;
33 size_t _extension_count;
34 };
35 }
36
37 template <typename Character, typename ResetPolicy>
reset_buffers(component_buffers<Character> * const components,ResetPolicy const reset_buffer)38 static void __cdecl reset_buffers(
39 component_buffers<Character>* const components,
40 ResetPolicy const reset_buffer
41 ) throw()
42 {
43 reset_buffer(components->_drive, components->_drive_count );
44 reset_buffer(components->_directory, components->_directory_count);
45 reset_buffer(components->_file_name, components->_file_name_count);
46 reset_buffer(components->_extension, components->_extension_count);
47 }
48
49 // is_lead_byte helper
50 // these functions are only used to ensure that trailing bytes that might
51 // look like slashes or periods aren't misdetected.
52 // UTF-8/UTF-16 don't have that problem as trail bytes never look like \ or .
needs_trail_byte(char const c)53 static bool __cdecl needs_trail_byte(char const c) throw()
54 {
55 // UTF-8 is OK here as the caller is really only concerned about trail
56 // bytes that look like . or \ and UTF-8 trail bytes never will.
57 return _ismbblead(c) != 0;
58 }
59
needs_trail_byte(wchar_t)60 static bool __cdecl needs_trail_byte(wchar_t) throw()
61 {
62 // UTF-16 is OK here as the caller is really only concerned about trail
63 // characters that look like . or \ and UTF-16 surrogate pairs never will.
64 return false;
65 }
66
67 template <typename Character, typename ResetPolicy, typename BufferCountTransformer>
common_splitpath_internal(Character const * const path,component_buffers<Character> * const components,ResetPolicy const reset_buffer,BufferCountTransformer const transform_buffer_count)68 static errno_t __cdecl common_splitpath_internal(
69 Character const* const path,
70 component_buffers<Character>* const components,
71 ResetPolicy const reset_buffer,
72 BufferCountTransformer const transform_buffer_count
73 ) throw()
74 {
75 using traits = __crt_char_traits<Character>;
76
77 if (!path || !components)
78 {
79 reset_buffers(components, reset_buffer);
80 _VALIDATE_RETURN_ERRCODE(false, EINVAL);
81 }
82
83 if ((components->_drive == nullptr) != (components->_drive_count == 0) ||
84 (components->_directory == nullptr) != (components->_directory_count == 0) ||
85 (components->_file_name == nullptr) != (components->_file_name_count == 0) ||
86 (components->_extension == nullptr) != (components->_extension_count == 0))
87 {
88 reset_buffers(components, reset_buffer);
89 _VALIDATE_RETURN_ERRCODE(false, EINVAL);
90 }
91
92 Character const* path_it = path;
93
94 // Extract drive letter and ':', if any:
95 {
96 size_t skip = _MAX_DRIVE - 2;
97 Character const* p = path_it;
98 while (skip > 0 && *p != '\0')
99 {
100 --skip;
101 ++p;
102 }
103
104 if (*p == ':')
105 {
106 if (components->_drive)
107 {
108 if (components->_drive_count < _MAX_DRIVE)
109 {
110 reset_buffers(components, reset_buffer);
111 return errno = ERANGE;
112 }
113
114 traits::tcsncpy_s(components->_drive, transform_buffer_count(components->_drive_count), path_it, _MAX_DRIVE - 1);
115 }
116
117 path_it = p + 1;
118 }
119 else
120 {
121 reset_buffer(components->_drive, components->_drive_count);
122 }
123 }
124
125 // Extract the path string, if any. The path iterator now points to the first
126 // character of the path, if there is one, or to the filename or extension if
127 // no path was specified. Scan ahead for the last occurence, if any, of a '/'
128 // or '\' path separator character. If none is found, there is no path. We
129 // will also note the last '.' character found, if any, to aid in handling the
130 // extension.
131 Character const* p = path_it;
132 Character const* last_slash = nullptr;
133 Character const* last_dot = nullptr;
134 for (; *p != '\0'; ++p)
135 {
136 // UTF-8 will never look like slashes or periods so this will be OK for UTF-8
137 if (needs_trail_byte(*p))
138 {
139 // For narrow character strings, skip any multibyte characters to avoid
140 // matching trail bytes that "look like" slashes or periods. This ++p
141 // will skip the lead byte; the ++p in the for loop will skip the trail
142 // byte.
143 ++p;
144
145 // If we've reached the end of the string, there is no trail byte.
146 // (Technically, the string is malformed.)
147 if (*p == '\0')
148 {
149 break;
150 }
151 }
152 else if (*p == '/' || *p == '\\')
153 {
154 last_slash = p + 1; // Point one past for later copy
155 }
156 else if (*p == '.')
157 {
158 last_dot = p;
159 }
160 }
161
162 if (last_slash)
163 {
164 if (components->_directory)
165 {
166 size_t const length = static_cast<size_t>(last_slash - path_it);
167 if (components->_directory_count <= length)
168 {
169 reset_buffers(components, reset_buffer);
170 return errno = ERANGE;
171 }
172
173 traits::tcsncpy_s(components->_directory, transform_buffer_count(components->_directory_count), path_it, length);
174 }
175
176 path_it = last_slash;
177 }
178 else
179 {
180 reset_buffer(components->_directory, components->_directory_count);
181 }
182
183 // Extract the file name and extension, if any. The path iterator now points
184 // to the first character of the file name, if any, or the extension if no
185 // file name was given. The dot points to the '.' beginning the extension,
186 // if any.
187 if (last_dot && last_dot >= path_it)
188 {
189 // We found a dot; it separates the file name from the extension:
190 if (components->_file_name)
191 {
192 size_t const length = static_cast<size_t>(last_dot - path_it);
193 if (components->_file_name_count <= length)
194 {
195 reset_buffers(components, reset_buffer);
196 return errno = ERANGE;
197 }
198
199 traits::tcsncpy_s(components->_file_name, transform_buffer_count(components->_file_name_count), path_it, length);
200 }
201
202 if (components->_extension)
203 {
204 size_t const length = static_cast<size_t>(p - last_dot);
205 if (components->_extension_count <= length)
206 {
207 reset_buffers(components, reset_buffer);
208 return errno = ERANGE;
209 }
210
211 traits::tcsncpy_s(components->_extension, transform_buffer_count(components->_extension_count), last_dot, length);
212 }
213 }
214 else
215 {
216 // No extension found; reset the extension and treat the remaining text
217 // as the file name:
218 if (components->_file_name)
219 {
220 size_t const length = static_cast<size_t>(p - path_it);
221 if (components->_file_name_count <= length)
222 {
223 reset_buffers(components, reset_buffer);
224 return errno = ERANGE;
225 }
226
227 traits::tcsncpy_s(components->_file_name, transform_buffer_count(components->_file_name_count), path_it, length);
228 }
229
230 if (components->_extension)
231 {
232 reset_buffer(components->_extension, components->_extension_count);
233 }
234 }
235
236 return 0;
237 }
238
239 template <typename Character>
240 _Success_(return == 0)
common_splitpath_s(Character const * const path,component_buffers<Character> * const components)241 static errno_t __cdecl common_splitpath_s(
242 Character const* const path,
243 component_buffers<Character>* const components
244 ) throw()
245 {
246 return common_splitpath_internal(path, components, [](_Out_writes_z_(buffer_count) Character* const buffer, size_t const buffer_count)
247 {
248 UNREFERENCED_PARAMETER(buffer);
249 UNREFERENCED_PARAMETER(buffer_count);
250 if (buffer)
251 {
252 _RESET_STRING(buffer, buffer_count);
253 }
254 },
255 [](size_t const n) { return n; });
256 }
257
258
_splitpath_s(char const * const path,char * const drive,size_t const drive_count,char * const directory,size_t const directory_count,char * const file_name,size_t const file_name_count,char * const extension,size_t const extension_count)259 extern "C" errno_t __cdecl _splitpath_s(
260 char const* const path,
261 char* const drive,
262 size_t const drive_count,
263 char* const directory,
264 size_t const directory_count,
265 char* const file_name,
266 size_t const file_name_count,
267 char* const extension,
268 size_t const extension_count
269 )
270 {
271 component_buffers<char> components =
272 {
273 drive, drive_count,
274 directory, directory_count,
275 file_name, file_name_count,
276 extension, extension_count
277 };
278
279 return common_splitpath_s(path, &components);
280 }
281
_wsplitpath_s(wchar_t const * const path,wchar_t * const drive,size_t const drive_count,wchar_t * const directory,size_t const directory_count,wchar_t * const file_name,size_t const file_name_count,wchar_t * const extension,size_t const extension_count)282 extern "C" errno_t __cdecl _wsplitpath_s(
283 wchar_t const* const path,
284 wchar_t* const drive,
285 size_t const drive_count,
286 wchar_t* const directory,
287 size_t const directory_count,
288 wchar_t* const file_name,
289 size_t const file_name_count,
290 wchar_t* const extension,
291 size_t const extension_count
292 )
293 {
294 component_buffers<wchar_t> components =
295 {
296 drive, drive_count,
297 directory, directory_count,
298 file_name, file_name_count,
299 extension, extension_count
300 };
301
302 return common_splitpath_s(path, &components);
303 }
304
305 template <typename Character>
common_splitpath(_In_z_ Character const * const path,_Pre_maybenull_ _Post_z_ Character * const drive,_Pre_maybenull_ _Post_z_ Character * const directory,_Pre_maybenull_ _Post_z_ Character * const file_name,_Pre_maybenull_ _Post_z_ Character * const extension)306 static void __cdecl common_splitpath(
307 _In_z_ Character const* const path,
308 _Pre_maybenull_ _Post_z_ Character* const drive,
309 _Pre_maybenull_ _Post_z_ Character* const directory,
310 _Pre_maybenull_ _Post_z_ Character* const file_name,
311 _Pre_maybenull_ _Post_z_ Character* const extension
312 ) throw()
313 {
314 component_buffers<Character> components =
315 {
316 drive, drive ? _MAX_DRIVE : 0,
317 directory, directory ? _MAX_DIR : 0,
318 file_name, file_name ? _MAX_FNAME : 0,
319 extension, extension ? _MAX_EXT : 0
320 };
321
322 common_splitpath_internal(path, &components, [](Character* const buffer, size_t const buffer_count)
323 {
324 if (buffer && buffer_count != 0)
325 {
326 buffer[0] = '\0';
327 }
328 },
329 [](size_t){ return static_cast<size_t>(-1); });
330 }
331
_splitpath(char const * const path,char * const drive,char * const directory,char * const file_name,char * const extension)332 extern "C" void __cdecl _splitpath(
333 char const* const path,
334 char* const drive,
335 char* const directory,
336 char* const file_name,
337 char* const extension
338 )
339 {
340 return common_splitpath(path, drive, directory, file_name, extension);
341 }
342
_wsplitpath(wchar_t const * const path,wchar_t * const drive,wchar_t * const directory,wchar_t * const file_name,wchar_t * const extension)343 extern "C" void __cdecl _wsplitpath(
344 wchar_t const* const path,
345 wchar_t* const drive,
346 wchar_t* const directory,
347 wchar_t* const file_name,
348 wchar_t* const extension
349 )
350 {
351 return common_splitpath(path, drive, directory, file_name, extension);
352 }
353