xref: /reactos/sdk/lib/ucrt/filesystem/splitpath.cpp (revision 04e0dc4a)
1 /***
2 *splitpath.c - break down path name into components
3 *
4 *       Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 *Purpose:
7 *       To provide support for accessing the individual components of an
8 *       arbitrary path name
9 *
10 *******************************************************************************/
11 #include <corecrt_internal.h>
12 #include <mbctype.h>
13 #include <stdlib.h>
14 #include <corecrt_internal_securecrt.h>
15 #include <corecrt_internal_traits.h>
16 
17 namespace
18 {
19     template <typename Character>
20     struct component_buffers
21     {
22         _Null_terminated_ _Maybenull_
23         Character* _drive;
24         size_t     _drive_count;
25         _Null_terminated_ _Maybenull_
26         Character* _directory;
27         size_t     _directory_count;
28         _Null_terminated_ _Maybenull_
29         Character* _file_name;
30         size_t     _file_name_count;
31         _Null_terminated_ _Maybenull_
32         Character* _extension;
33         size_t     _extension_count;
34     };
35 }
36 
37 template <typename Character, typename ResetPolicy>
reset_buffers(component_buffers<Character> * const components,ResetPolicy const reset_buffer)38 static void __cdecl reset_buffers(
39     component_buffers<Character>* const components,
40     ResetPolicy                   const reset_buffer
41     ) throw()
42 {
43     reset_buffer(components->_drive,     components->_drive_count    );
44     reset_buffer(components->_directory, components->_directory_count);
45     reset_buffer(components->_file_name, components->_file_name_count);
46     reset_buffer(components->_extension, components->_extension_count);
47 }
48 
49 // is_lead_byte helper
50 // these functions are only used to ensure that trailing bytes that might
51 // look like slashes or periods aren't misdetected.
52 // UTF-8/UTF-16 don't have that problem as trail bytes never look like \ or .
needs_trail_byte(char const c)53 static bool __cdecl needs_trail_byte(char const c) throw()
54 {
55     // UTF-8 is OK here as the caller is really only concerned about trail
56     // bytes that look like . or \ and UTF-8 trail bytes never will.
57     return _ismbblead(c) != 0;
58 }
59 
needs_trail_byte(wchar_t)60 static bool __cdecl needs_trail_byte(wchar_t) throw()
61 {
62     // UTF-16 is OK here as the caller is really only concerned about trail
63     // characters that look like . or \ and UTF-16 surrogate pairs never will.
64     return false;
65 }
66 
67 template <typename Character, typename ResetPolicy, typename BufferCountTransformer>
common_splitpath_internal(Character const * const path,component_buffers<Character> * const components,ResetPolicy const reset_buffer,BufferCountTransformer const transform_buffer_count)68 static errno_t __cdecl common_splitpath_internal(
69     Character const*              const path,
70     component_buffers<Character>* const components,
71     ResetPolicy                   const reset_buffer,
72     BufferCountTransformer        const transform_buffer_count
73     ) throw()
74 {
75     using traits = __crt_char_traits<Character>;
76 
77     if (!path || !components)
78     {
79         reset_buffers(components, reset_buffer);
80         _VALIDATE_RETURN_ERRCODE(false, EINVAL);
81     }
82 
83     if ((components->_drive     == nullptr) != (components->_drive_count     == 0) ||
84         (components->_directory == nullptr) != (components->_directory_count == 0) ||
85         (components->_file_name == nullptr) != (components->_file_name_count == 0) ||
86         (components->_extension == nullptr) != (components->_extension_count == 0))
87     {
88         reset_buffers(components, reset_buffer);
89         _VALIDATE_RETURN_ERRCODE(false, EINVAL);
90     }
91 
92     Character const* path_it = path;
93 
94     // Extract drive letter and ':', if any:
95     {
96         size_t skip = _MAX_DRIVE - 2;
97         Character const* p = path_it;
98         while (skip > 0 && *p != '\0')
99         {
100             --skip;
101             ++p;
102         }
103 
104         if (*p == ':')
105         {
106             if (components->_drive)
107             {
108                 if (components->_drive_count < _MAX_DRIVE)
109                 {
110                     reset_buffers(components, reset_buffer);
111                     return errno = ERANGE;
112                 }
113 
114                 traits::tcsncpy_s(components->_drive, transform_buffer_count(components->_drive_count), path_it, _MAX_DRIVE - 1);
115             }
116 
117             path_it = p + 1;
118         }
119         else
120         {
121             reset_buffer(components->_drive, components->_drive_count);
122         }
123     }
124 
125     // Extract the path string, if any.  The path iterator now points to the first
126     // character of the path, if there is one, or to the filename or extension if
127     // no path was specified.  Scan ahead for the last occurence, if any, of a '/'
128     //  or '\' path separator character.  If none is found, there is no path.  We
129     // will also note the last '.' character found, if any, to aid in handling the
130     // extension.
131     Character const* p          = path_it;
132     Character const* last_slash = nullptr;
133     Character const* last_dot   = nullptr;
134     for (; *p != '\0'; ++p)
135     {
136         // UTF-8 will never look like slashes or periods so this will be OK for UTF-8
137         if (needs_trail_byte(*p))
138         {
139             // For narrow character strings, skip any multibyte characters to avoid
140             // matching trail bytes that "look like" slashes or periods.  This ++p
141             // will skip the lead byte; the ++p in the for loop will skip the trail
142             // byte.
143             ++p;
144 
145             // If we've reached the end of the string, there is no trail byte.
146             // (Technically, the string is malformed.)
147             if (*p == '\0')
148             {
149                 break;
150             }
151         }
152         else if (*p == '/' || *p == '\\')
153         {
154             last_slash = p + 1; // Point one past for later copy
155         }
156         else if (*p == '.')
157         {
158             last_dot = p;
159         }
160     }
161 
162     if (last_slash)
163     {
164         if (components->_directory)
165         {
166             size_t const length = static_cast<size_t>(last_slash - path_it);
167             if (components->_directory_count <= length)
168             {
169                 reset_buffers(components, reset_buffer);
170                 return errno = ERANGE;
171             }
172 
173             traits::tcsncpy_s(components->_directory, transform_buffer_count(components->_directory_count), path_it, length);
174         }
175 
176         path_it = last_slash;
177     }
178     else
179     {
180         reset_buffer(components->_directory, components->_directory_count);
181     }
182 
183     // Extract the file name and extension, if any.  The path iterator now points
184     // to the first character of the file name, if any, or the extension if no
185     // file name was given.  The dot points to the '.' beginning the extension,
186     // if any.
187     if (last_dot && last_dot >= path_it)
188     {
189         // We found a dot; it separates the file name from the extension:
190         if (components->_file_name)
191         {
192             size_t const length = static_cast<size_t>(last_dot - path_it);
193             if (components->_file_name_count <= length)
194             {
195                 reset_buffers(components, reset_buffer);
196                 return errno = ERANGE;
197             }
198 
199             traits::tcsncpy_s(components->_file_name, transform_buffer_count(components->_file_name_count), path_it, length);
200         }
201 
202         if (components->_extension)
203         {
204             size_t const length = static_cast<size_t>(p - last_dot);
205             if (components->_extension_count <= length)
206             {
207                 reset_buffers(components, reset_buffer);
208                 return errno = ERANGE;
209             }
210 
211             traits::tcsncpy_s(components->_extension, transform_buffer_count(components->_extension_count), last_dot, length);
212         }
213     }
214     else
215     {
216         // No extension found; reset the extension and treat the remaining text
217         // as the file name:
218         if (components->_file_name)
219         {
220             size_t const length = static_cast<size_t>(p - path_it);
221             if (components->_file_name_count <= length)
222             {
223                 reset_buffers(components, reset_buffer);
224                 return errno = ERANGE;
225             }
226 
227             traits::tcsncpy_s(components->_file_name, transform_buffer_count(components->_file_name_count), path_it, length);
228         }
229 
230         if (components->_extension)
231         {
232             reset_buffer(components->_extension, components->_extension_count);
233         }
234     }
235 
236     return 0;
237 }
238 
239 template <typename Character>
240 _Success_(return == 0)
common_splitpath_s(Character const * const path,component_buffers<Character> * const components)241 static errno_t __cdecl common_splitpath_s(
242     Character const*              const path,
243     component_buffers<Character>* const components
244     ) throw()
245 {
246     return common_splitpath_internal(path, components, [](_Out_writes_z_(buffer_count) Character* const buffer, size_t const buffer_count)
247     {
248         UNREFERENCED_PARAMETER(buffer);
249         UNREFERENCED_PARAMETER(buffer_count);
250         if (buffer)
251         {
252             _RESET_STRING(buffer, buffer_count);
253         }
254     },
255     [](size_t const n) { return n; });
256 }
257 
258 
_splitpath_s(char const * const path,char * const drive,size_t const drive_count,char * const directory,size_t const directory_count,char * const file_name,size_t const file_name_count,char * const extension,size_t const extension_count)259 extern "C" errno_t __cdecl _splitpath_s(
260     char const* const path,
261     char*       const drive,
262     size_t      const drive_count,
263     char*       const directory,
264     size_t      const directory_count,
265     char*       const file_name,
266     size_t      const file_name_count,
267     char*       const extension,
268     size_t      const extension_count
269     )
270 {
271     component_buffers<char> components =
272     {
273         drive,     drive_count,
274         directory, directory_count,
275         file_name, file_name_count,
276         extension, extension_count
277     };
278 
279     return common_splitpath_s(path, &components);
280 }
281 
_wsplitpath_s(wchar_t const * const path,wchar_t * const drive,size_t const drive_count,wchar_t * const directory,size_t const directory_count,wchar_t * const file_name,size_t const file_name_count,wchar_t * const extension,size_t const extension_count)282 extern "C" errno_t __cdecl _wsplitpath_s(
283     wchar_t const* const path,
284     wchar_t*       const drive,
285     size_t         const drive_count,
286     wchar_t*       const directory,
287     size_t         const directory_count,
288     wchar_t*       const file_name,
289     size_t         const file_name_count,
290     wchar_t*       const extension,
291     size_t         const extension_count
292     )
293 {
294     component_buffers<wchar_t> components =
295     {
296         drive,     drive_count,
297         directory, directory_count,
298         file_name, file_name_count,
299         extension, extension_count
300     };
301 
302     return common_splitpath_s(path, &components);
303 }
304 
305 template <typename Character>
common_splitpath(_In_z_ Character const * const path,_Pre_maybenull_ _Post_z_ Character * const drive,_Pre_maybenull_ _Post_z_ Character * const directory,_Pre_maybenull_ _Post_z_ Character * const file_name,_Pre_maybenull_ _Post_z_ Character * const extension)306 static void __cdecl common_splitpath(
307     _In_z_                   Character const* const path,
308     _Pre_maybenull_ _Post_z_ Character*       const drive,
309     _Pre_maybenull_ _Post_z_ Character*       const directory,
310     _Pre_maybenull_ _Post_z_ Character*       const file_name,
311     _Pre_maybenull_ _Post_z_ Character*       const extension
312     ) throw()
313 {
314     component_buffers<Character> components =
315     {
316         drive,     drive     ? _MAX_DRIVE : 0,
317         directory, directory ? _MAX_DIR   : 0,
318         file_name, file_name ? _MAX_FNAME : 0,
319         extension, extension ? _MAX_EXT   : 0
320     };
321 
322     common_splitpath_internal(path, &components, [](Character* const buffer, size_t const buffer_count)
323     {
324         if (buffer && buffer_count != 0)
325         {
326             buffer[0] = '\0';
327         }
328     },
329     [](size_t){ return static_cast<size_t>(-1); });
330 }
331 
_splitpath(char const * const path,char * const drive,char * const directory,char * const file_name,char * const extension)332 extern "C" void __cdecl _splitpath(
333     char const* const path,
334     char*       const drive,
335     char*       const directory,
336     char*       const file_name,
337     char*       const extension
338     )
339 {
340     return common_splitpath(path, drive, directory, file_name, extension);
341 }
342 
_wsplitpath(wchar_t const * const path,wchar_t * const drive,wchar_t * const directory,wchar_t * const file_name,wchar_t * const extension)343 extern "C" void __cdecl _wsplitpath(
344     wchar_t const* const path,
345     wchar_t*       const drive,
346     wchar_t*       const directory,
347     wchar_t*       const file_name,
348     wchar_t*       const extension
349     )
350 {
351     return common_splitpath(path, drive, directory, file_name, extension);
352 }
353