1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 using System.Diagnostics;
6 using System.Runtime.CompilerServices;
7 using System.Text;
8 
9 namespace System.IO
10 {
11     /// <summary>Contains internal path helpers that are shared between many projects.</summary>
12     internal static partial class PathInternal
13     {
14         // All paths in Win32 ultimately end up becoming a path to a File object in the Windows object manager. Passed in paths get mapped through
15         // DosDevice symbolic links in the object tree to actual File objects under \Devices. To illustrate, this is what happens with a typical
16         // path "Foo" passed as a filename to any Win32 API:
17         //
18         //  1. "Foo" is recognized as a relative path and is appended to the current directory (say, "C:\" in our example)
19         //  2. "C:\Foo" is prepended with the DosDevice namespace "\??\"
20         //  3. CreateFile tries to create an object handle to the requested file "\??\C:\Foo"
21         //  4. The Object Manager recognizes the DosDevices prefix and looks
22         //      a. First in the current session DosDevices ("\Sessions\1\DosDevices\" for example, mapped network drives go here)
23         //      b. If not found in the session, it looks in the Global DosDevices ("\GLOBAL??\")
24         //  5. "C:" is found in DosDevices (in our case "\GLOBAL??\C:", which is a symbolic link to "\Device\HarddiskVolume6")
25         //  6. The full path is now "\Device\HarddiskVolume6\Foo", "\Device\HarddiskVolume6" is a File object and parsing is handed off
26         //      to the registered parsing method for Files
27         //  7. The registered open method for File objects is invoked to create the file handle which is then returned
28         //
29         // There are multiple ways to directly specify a DosDevices path. The final format of "\??\" is one way. It can also be specified
30         // as "\\.\" (the most commonly documented way) and "\\?\". If the question mark syntax is used the path will skip normalization
31         // (essentially GetFullPathName()) and path length checks.
32 
33         // Windows Kernel-Mode Object Manager
34         // https://msdn.microsoft.com/en-us/library/windows/hardware/ff565763.aspx
35         // https://channel9.msdn.com/Shows/Going+Deep/Windows-NT-Object-Manager
36         //
37         // Introduction to MS-DOS Device Names
38         // https://msdn.microsoft.com/en-us/library/windows/hardware/ff548088.aspx
39         //
40         // Local and Global MS-DOS Device Names
41         // https://msdn.microsoft.com/en-us/library/windows/hardware/ff554302.aspx
42 
43         internal const char DirectorySeparatorChar = '\\';
44         internal const char AltDirectorySeparatorChar = '/';
45         internal const char VolumeSeparatorChar = ':';
46         internal const char PathSeparator = ';';
47 
48         internal const string DirectorySeparatorCharAsString = "\\";
49 
50         internal const string ExtendedPathPrefix = @"\\?\";
51         internal const string UncPathPrefix = @"\\";
52         internal const string UncExtendedPrefixToInsert = @"?\UNC\";
53         internal const string UncExtendedPathPrefix = @"\\?\UNC\";
54         internal const string DevicePathPrefix = @"\\.\";
55         internal const string ParentDirectoryPrefix = @"..\";
56 
57         internal const int MaxShortPath = 260;
58         internal const int MaxShortDirectoryPath = 248;
59         // \\?\, \\.\, \??\
60         internal const int DevicePrefixLength = 4;
61         // \\
62         internal const int UncPrefixLength = 2;
63         // \\?\UNC\, \\.\UNC\
64         internal const int UncExtendedPrefixLength = 8;
65 
66         /// <summary>
67         /// Returns true if the given character is a valid drive letter
68         /// </summary>
IsValidDriveChar(char value)69         internal static bool IsValidDriveChar(char value)
70         {
71             return ((value >= 'A' && value <= 'Z') || (value >= 'a' && value <= 'z'));
72         }
73 
74         /// <summary>
75         /// Adds the extended path prefix (\\?\) if not already a device path, IF the path is not relative,
76         /// AND the path is more than 259 characters. (> MAX_PATH + null)
77         /// </summary>
EnsureExtendedPrefixOverMaxPath(string path)78         internal static string EnsureExtendedPrefixOverMaxPath(string path)
79         {
80             if (path != null && path.Length >= MaxShortPath)
81             {
82                 return EnsureExtendedPrefix(path);
83             }
84             else
85             {
86                 return path;
87             }
88         }
89 
90         /// <summary>
91         /// Adds the extended path prefix (\\?\) if not relative or already a device path.
92         /// </summary>
EnsureExtendedPrefix(string path)93         internal static string EnsureExtendedPrefix(string path)
94         {
95             // Putting the extended prefix on the path changes the processing of the path. It won't get normalized, which
96             // means adding to relative paths will prevent them from getting the appropriate current directory inserted.
97 
98             // If it already has some variant of a device path (\??\, \\?\, \\.\, //./, etc.) we don't need to change it
99             // as it is either correct or we will be changing the behavior. When/if Windows supports long paths implicitly
100             // in the future we wouldn't want normalization to come back and break existing code.
101 
102             // In any case, all internal usages should be hitting normalize path (Path.GetFullPath) before they hit this
103             // shimming method. (Or making a change that doesn't impact normalization, such as adding a filename to a
104             // normalized base path.)
105             if (IsPartiallyQualified(path) || IsDevice(path))
106                 return path;
107 
108             // Given \\server\share in longpath becomes \\?\UNC\server\share
109             if (path.StartsWith(UncPathPrefix, StringComparison.OrdinalIgnoreCase))
110                 return path.Insert(2, UncExtendedPrefixToInsert);
111 
112             return ExtendedPathPrefix + path;
113         }
114 
115         /// <summary>
116         /// Returns true if the path uses any of the DOS device path syntaxes. ("\\.\", "\\?\", or "\??\")
117         /// </summary>
IsDevice(string path)118         internal static bool IsDevice(string path)
119         {
120             // If the path begins with any two separators is will be recognized and normalized and prepped with
121             // "\??\" for internal usage correctly. "\??\" is recognized and handled, "/??/" is not.
122             return IsExtended(path)
123                 ||
124                 (
125                     path.Length >= DevicePrefixLength
126                     && IsDirectorySeparator(path[0])
127                     && IsDirectorySeparator(path[1])
128                     && (path[2] == '.' || path[2] == '?')
129                     && IsDirectorySeparator(path[3])
130                 );
131         }
132 
133         /// <summary>
134         /// Returns true if the path uses the canonical form of extended syntax ("\\?\" or "\??\"). If the
135         /// path matches exactly (cannot use alternate directory separators) Windows will skip normalization
136         /// and path length checks.
137         /// </summary>
IsExtended(string path)138         internal static bool IsExtended(string path)
139         {
140             // While paths like "//?/C:/" will work, they're treated the same as "\\.\" paths.
141             // Skipping of normalization will *only* occur if back slashes ('\') are used.
142             return path.Length >= DevicePrefixLength
143                 && path[0] == '\\'
144                 && (path[1] == '\\' || path[1] == '?')
145                 && path[2] == '?'
146                 && path[3] == '\\';
147         }
148 
149         /// <summary>
150         /// Check for known wildcard characters. '*' and '?' are the most common ones.
151         /// </summary>
HasWildCardCharacters(string path)152         internal static bool HasWildCardCharacters(string path)
153         {
154             // Question mark is part of dos device syntax so we have to skip if we are
155             int startIndex = IsDevice(path) ? ExtendedPathPrefix.Length : 0;
156 
157             // [MS - FSA] 2.1.4.4 Algorithm for Determining if a FileName Is in an Expression
158             // https://msdn.microsoft.com/en-us/library/ff469270.aspx
159             for (int i = startIndex; i < path.Length; i++)
160             {
161                 char c = path[i];
162                 if (c <= '?') // fast path for common case - '?' is highest wildcard character
163                 {
164                     if (c == '\"' || c == '<' || c == '>' || c == '*' || c == '?')
165                         return true;
166                 }
167             }
168 
169             return false;
170         }
171 
172         /// <summary>
173         /// Gets the length of the root of the path (drive, share, etc.).
174         /// </summary>
GetRootLength(string path)175         internal unsafe static int GetRootLength(string path)
176         {
177             fixed (char* value = path)
178             {
179                 return GetRootLength(value, path.Length);
180             }
181         }
182 
GetRootLength(char* path, int pathLength)183         private unsafe static int GetRootLength(char* path, int pathLength)
184         {
185             int i = 0;
186             int volumeSeparatorLength = 2;  // Length to the colon "C:"
187             int uncRootLength = 2;          // Length to the start of the server name "\\"
188 
189             bool extendedSyntax = StartsWithOrdinal(path, pathLength, ExtendedPathPrefix);
190             bool extendedUncSyntax = StartsWithOrdinal(path, pathLength, UncExtendedPathPrefix);
191             if (extendedSyntax)
192             {
193                 // Shift the position we look for the root from to account for the extended prefix
194                 if (extendedUncSyntax)
195                 {
196                     // "\\" -> "\\?\UNC\"
197                     uncRootLength = UncExtendedPathPrefix.Length;
198                 }
199                 else
200                 {
201                     // "C:" -> "\\?\C:"
202                     volumeSeparatorLength += ExtendedPathPrefix.Length;
203                 }
204             }
205 
206             if ((!extendedSyntax || extendedUncSyntax) && pathLength > 0 && IsDirectorySeparator(path[0]))
207             {
208                 // UNC or simple rooted path (e.g. "\foo", NOT "\\?\C:\foo")
209 
210                 i = 1; //  Drive rooted (\foo) is one character
211                 if (extendedUncSyntax || (pathLength > 1 && IsDirectorySeparator(path[1])))
212                 {
213                     // UNC (\\?\UNC\ or \\), scan past the next two directory separators at most
214                     // (e.g. to \\?\UNC\Server\Share or \\Server\Share\)
215                     i = uncRootLength;
216                     int n = 2; // Maximum separators to skip
217                     while (i < pathLength && (!IsDirectorySeparator(path[i]) || --n > 0)) i++;
218                 }
219             }
220             else if (pathLength >= volumeSeparatorLength && path[volumeSeparatorLength - 1] == VolumeSeparatorChar)
221             {
222                 // Path is at least longer than where we expect a colon, and has a colon (\\?\A:, A:)
223                 // If the colon is followed by a directory separator, move past it
224                 i = volumeSeparatorLength;
225                 if (pathLength >= volumeSeparatorLength + 1 && IsDirectorySeparator(path[volumeSeparatorLength])) i++;
226             }
227             return i;
228         }
229 
StartsWithOrdinal(char* source, int sourceLength, string value)230         private unsafe static bool StartsWithOrdinal(char* source, int sourceLength, string value)
231         {
232             if (sourceLength < value.Length) return false;
233             for (int i = 0; i < value.Length; i++)
234             {
235                 if (value[i] != source[i]) return false;
236             }
237             return true;
238         }
239 
240         /// <summary>
241         /// Returns true if the path specified is relative to the current drive or working directory.
242         /// Returns false if the path is fixed to a specific drive or UNC path.  This method does no
243         /// validation of the path (URIs will be returned as relative as a result).
244         /// </summary>
245         /// <remarks>
246         /// Handles paths that use the alternate directory separator.  It is a frequent mistake to
247         /// assume that rooted paths (Path.IsPathRooted) are not relative.  This isn't the case.
248         /// "C:a" is drive relative- meaning that it will be resolved against the current directory
249         /// for C: (rooted, but relative). "C:\a" is rooted and not relative (the current directory
250         /// will not be used to modify the path).
251         /// </remarks>
IsPartiallyQualified(string path)252         internal static bool IsPartiallyQualified(string path)
253         {
254             if (path.Length < 2)
255             {
256                 // It isn't fixed, it must be relative.  There is no way to specify a fixed
257                 // path with one character (or less).
258                 return true;
259             }
260 
261             if (IsDirectorySeparator(path[0]))
262             {
263                 // There is no valid way to specify a relative path with two initial slashes or
264                 // \? as ? isn't valid for drive relative paths and \??\ is equivalent to \\?\
265                 return !(path[1] == '?' || IsDirectorySeparator(path[1]));
266             }
267 
268             // The only way to specify a fixed path that doesn't begin with two slashes
269             // is the drive, colon, slash format- i.e. C:\
270             return !((path.Length >= 3)
271                 && (path[1] == VolumeSeparatorChar)
272                 && IsDirectorySeparator(path[2])
273                 // To match old behavior we'll check the drive character for validity as the path is technically
274                 // not qualified if you don't have a valid drive. "=:\" is the "=" file's default data stream.
275                 && IsValidDriveChar(path[0]));
276         }
277 
278         /// <summary>
279         /// Returns the characters to skip at the start of the path if it starts with space(s) and a drive or directory separator.
280         /// (examples are " C:", " \")
281         /// This is a legacy behavior of Path.GetFullPath().
282         /// </summary>
283         /// <remarks>
284         /// Note that this conflicts with IsPathRooted() which doesn't (and never did) such a skip.
285         /// </remarks>
PathStartSkip(string path)286         internal static int PathStartSkip(string path)
287         {
288             int startIndex = 0;
289             while (startIndex < path.Length && path[startIndex] == ' ') startIndex++;
290 
291             if (startIndex > 0 && (startIndex < path.Length && IsDirectorySeparator(path[startIndex]))
292                 || (startIndex + 1 < path.Length && path[startIndex + 1] == ':' && IsValidDriveChar(path[startIndex])))
293             {
294                 // Go ahead and skip spaces as we're either " C:" or " \"
295                 return startIndex;
296             }
297 
298             return 0;
299         }
300 
301         /// <summary>
302         /// True if the given character is a directory separator.
303         /// </summary>
304         [MethodImpl(MethodImplOptions.AggressiveInlining)]
IsDirectorySeparator(char c)305         internal static bool IsDirectorySeparator(char c)
306         {
307             return c == DirectorySeparatorChar || c == AltDirectorySeparatorChar;
308         }
309 
310         /// <summary>
311         /// Normalize separators in the given path. Converts forward slashes into back slashes and compresses slash runs, keeping initial 2 if present.
312         /// Also trims initial whitespace in front of "rooted" paths (see PathStartSkip).
313         ///
314         /// This effectively replicates the behavior of the legacy NormalizePath when it was called with fullCheck=false and expandShortpaths=false.
315         /// The current NormalizePath gets directory separator normalization from Win32's GetFullPathName(), which will resolve relative paths and as
316         /// such can't be used here (and is overkill for our uses).
317         ///
318         /// Like the current NormalizePath this will not try and analyze periods/spaces within directory segments.
319         /// </summary>
320         /// <remarks>
321         /// The only callers that used to use Path.Normalize(fullCheck=false) were Path.GetDirectoryName() and Path.GetPathRoot(). Both usages do
322         /// not need trimming of trailing whitespace here.
323         ///
324         /// GetPathRoot() could technically skip normalizing separators after the second segment- consider as a future optimization.
325         ///
326         /// For legacy desktop behavior with ExpandShortPaths:
327         ///  - It has no impact on GetPathRoot() so doesn't need consideration.
328         ///  - It could impact GetDirectoryName(), but only if the path isn't relative (C:\ or \\Server\Share).
329         ///
330         /// In the case of GetDirectoryName() the ExpandShortPaths behavior was undocumented and provided inconsistent results if the path was
331         /// fixed/relative. For example: "C:\PROGRA~1\A.TXT" would return "C:\Program Files" while ".\PROGRA~1\A.TXT" would return ".\PROGRA~1". If you
332         /// ultimately call GetFullPath() this doesn't matter, but if you don't or have any intermediate string handling could easily be tripped up by
333         /// this undocumented behavior.
334         ///
335         /// We won't match this old behavior because:
336         ///
337         ///   1. It was undocumented
338         ///   2. It was costly (extremely so if it actually contained '~')
339         ///   3. Doesn't play nice with string logic
340         ///   4. Isn't a cross-plat friendly concept/behavior
341         /// </remarks>
NormalizeDirectorySeparators(string path)342         internal static string NormalizeDirectorySeparators(string path)
343         {
344             if (string.IsNullOrEmpty(path)) return path;
345 
346             char current;
347             int start = PathStartSkip(path);
348 
349             if (start == 0)
350             {
351                 // Make a pass to see if we need to normalize so we can potentially skip allocating
352                 bool normalized = true;
353 
354                 for (int i = 0; i < path.Length; i++)
355                 {
356                     current = path[i];
357                     if (IsDirectorySeparator(current)
358                         && (current != DirectorySeparatorChar
359                             // Check for sequential separators past the first position (we need to keep initial two for UNC/extended)
360                             || (i > 0 && i + 1 < path.Length && IsDirectorySeparator(path[i + 1]))))
361                     {
362                         normalized = false;
363                         break;
364                     }
365                 }
366 
367                 if (normalized) return path;
368             }
369 
370             StringBuilder builder = new StringBuilder(path.Length);
371 
372             if (IsDirectorySeparator(path[start]))
373             {
374                 start++;
375                 builder.Append(DirectorySeparatorChar);
376             }
377 
378             for (int i = start; i < path.Length; i++)
379             {
380                 current = path[i];
381 
382                 // If we have a separator
383                 if (IsDirectorySeparator(current))
384                 {
385                     // If the next is a separator, skip adding this
386                     if (i + 1 < path.Length && IsDirectorySeparator(path[i + 1]))
387                     {
388                         continue;
389                     }
390 
391                     // Ensure it is the primary separator
392                     current = DirectorySeparatorChar;
393                 }
394 
395                 builder.Append(current);
396             }
397 
398             return builder.ToString();
399         }
400 
401         /// <summary>
402         /// Returns true if the character is a directory or volume separator.
403         /// </summary>
404         /// <param name="ch">The character to test.</param>
IsDirectoryOrVolumeSeparator(char ch)405         internal static bool IsDirectoryOrVolumeSeparator(char ch)
406         {
407             return IsDirectorySeparator(ch) || VolumeSeparatorChar == ch;
408         }
409 
410         /// <summary>
411         /// Returns true if the path is effectively empty for the current OS.
412         /// For unix, this is empty or null. For Windows, this is empty, null, or
413         /// just spaces ((char)32).
414         /// </summary>
IsEffectivelyEmpty(string path)415         internal static bool IsEffectivelyEmpty(string path)
416         {
417             if (string.IsNullOrEmpty(path))
418                 return true;
419 
420             foreach (char c in path)
421             {
422                 if (c != ' ')
423                     return false;
424             }
425             return true;
426         }
427     }
428 }
429