1 // Copyright (c) Microsoft. All rights reserved.
2 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 
4 using System;
5 using System.Collections.Concurrent;
6 using System.Collections.Generic;
7 using System.Collections.Immutable;
8 using System.Diagnostics;
9 using System.Text;
10 using System.IO;
11 using System.Linq;
12 using Microsoft.Build.Shared;
13 using System.Text.RegularExpressions;
14 using Microsoft.Build.Utilities;
15 using Microsoft.Build.Shared.EscapingStringExtensions;
16 
17 namespace Microsoft.Build.Internal
18 {
19     internal class EngineFileUtilities
20     {
21         // Regexes for wildcard filespecs that should not get expanded
22         // By default all wildcards are expanded.
23         private static List<Regex> s_lazyWildCardExpansionRegexes;
24 
EngineFileUtilities()25         static EngineFileUtilities()
26         {
27             if (Traits.Instance.UseLazyWildCardEvaluation)
28             {
29                 CaptureLazyWildcardRegexes();
30             }
31         }
32 
33         // used by test to reset regexes
CaptureLazyWildcardRegexes()34         internal static void CaptureLazyWildcardRegexes()
35         {
36             s_lazyWildCardExpansionRegexes = PopulateRegexFromEnvironment();
37         }
38 
39 
40     /// <summary>
41         /// Used for the purposes of evaluating an item specification. Given a filespec that may include wildcard characters * and
42         /// ?, we translate it into an actual list of files. If the input filespec doesn't contain any wildcard characters, and it
43         /// doesn't appear to point to an actual file on disk, then we just give back the input string as an array of length one,
44         /// assuming that it wasn't really intended to be a filename (as items are not required to necessarily represent files).
45         /// Any wildcards passed in that are unescaped will be treated as real wildcards.
46         /// The "include" of items passed back from the filesystem will be returned canonically escaped.
47         /// The ordering of the list returned is deterministic (it is sorted).
48         /// Will never throw IO exceptions. If path is invalid, just returns filespec verbatim.
49         /// </summary>
50         /// <param name="directoryEscaped">The directory to evaluate, escaped.</param>
51         /// <param name="filespecEscaped">The filespec to evaluate, escaped.</param>
52         /// <param name="forceEvaluate">Whether to force file glob expansion when eager expansion is turned off</param>
53         /// <returns>Array of file paths, unescaped.</returns>
GetFileListUnescaped( string directoryEscaped, string filespecEscaped, bool forceEvaluate = false )54         internal static string[] GetFileListUnescaped
55             (
56             string directoryEscaped,
57             string filespecEscaped,
58             bool forceEvaluate = false
59             )
60 
61         {
62             return GetFileList(directoryEscaped, filespecEscaped, false /* returnEscaped */, forceEvaluate);
63         }
64 
65         /// <summary>
66         /// Used for the purposes of evaluating an item specification. Given a filespec that may include wildcard characters * and
67         /// ?, we translate it into an actual list of files. If the input filespec doesn't contain any wildcard characters, and it
68         /// doesn't appear to point to an actual file on disk, then we just give back the input string as an array of length one,
69         /// assuming that it wasn't really intended to be a filename (as items are not required to necessarily represent files).
70         /// Any wildcards passed in that are unescaped will be treated as real wildcards.
71         /// The "include" of items passed back from the filesystem will be returned canonically escaped.
72         /// The ordering of the list returned is deterministic (it is sorted).
73         /// Will never throw IO exceptions. If path is invalid, just returns filespec verbatim.
74         /// </summary>
75         /// <param name="directoryEscaped">The directory to evaluate, escaped.</param>
76         /// <param name="filespecEscaped">The filespec to evaluate, escaped.</param>
77         /// <param name="excludeSpecsEscaped">Filespecs to exclude, escaped.</param>
78         /// <param name="forceEvaluate">Whether to force file glob expansion when eager expansion is turned off</param>
79         /// <param name="entriesCache">Cache used for caching IO operation results</param>
80         /// <returns>Array of file paths, escaped.</returns>
GetFileListEscaped( string directoryEscaped, string filespecEscaped, IEnumerable<string> excludeSpecsEscaped = null, bool forceEvaluate = false, ConcurrentDictionary<string, ImmutableArray<string>> entriesCache = null )81         internal static string[] GetFileListEscaped
82             (
83             string directoryEscaped,
84             string filespecEscaped,
85             IEnumerable<string> excludeSpecsEscaped = null,
86             bool forceEvaluate = false,
87             ConcurrentDictionary<string, ImmutableArray<string>> entriesCache = null
88             )
89         {
90             return GetFileList(directoryEscaped, filespecEscaped, true /* returnEscaped */, forceEvaluate, excludeSpecsEscaped, entriesCache);
91         }
92 
FilespecHasWildcards(string filespecEscaped)93         internal static bool FilespecHasWildcards(string filespecEscaped)
94         {
95             bool containsEscapedWildcards = EscapingUtilities.ContainsEscapedWildcards(filespecEscaped);
96             bool containsRealWildcards = FileMatcher.HasWildcards(filespecEscaped);
97 
98             if (containsEscapedWildcards && containsRealWildcards)
99             {
100                 // Umm, this makes no sense.  The item's Include has both escaped wildcards and
101                 // real wildcards.  What does he want us to do?  Go to the file system and find
102                 // files that literally have '*' in their filename?  Well, that's not going to
103                 // happen because '*' is an illegal character to have in a filename.
104 
105                 return false;
106             }
107             else if (!containsEscapedWildcards && containsRealWildcards)
108             {
109                 return true;
110             }
111             else
112             {
113                 return false;
114             }
115         }
116 
117         /// <summary>
118         /// Used for the purposes of evaluating an item specification. Given a filespec that may include wildcard characters * and
119         /// ?, we translate it into an actual list of files. If the input filespec doesn't contain any wildcard characters, and it
120         /// doesn't appear to point to an actual file on disk, then we just give back the input string as an array of length one,
121         /// assuming that it wasn't really intended to be a filename (as items are not required to necessarily represent files).
122         /// Any wildcards passed in that are unescaped will be treated as real wildcards.
123         /// The "include" of items passed back from the filesystem will be returned canonically escaped.
124         /// The ordering of the list returned is deterministic (it is sorted).
125         /// Will never throw IO exceptions: if there is no match, returns the input verbatim.
126         /// </summary>
127         /// <param name="directoryEscaped">The directory to evaluate, escaped.</param>
128         /// <param name="filespecEscaped">The filespec to evaluate, escaped.</param>
129         /// <param name="returnEscaped"><code>true</code> to return escaped specs.</param>
130         /// <param name="forceEvaluateWildCards">Whether to force file glob expansion when eager expansion is turned off</param>
131         /// <param name="excludeSpecsEscaped">The exclude specification, escaped.</param>
132         /// <param name="entriesCache">Cache used for caching IO operation results</param>
133         /// <returns>Array of file paths.</returns>
GetFileList( string directoryEscaped, string filespecEscaped, bool returnEscaped, bool forceEvaluateWildCards, IEnumerable<string> excludeSpecsEscaped = null, ConcurrentDictionary<string, ImmutableArray<string>> entriesCache = null )134         private static string[] GetFileList
135             (
136             string directoryEscaped,
137             string filespecEscaped,
138             bool returnEscaped,
139             bool forceEvaluateWildCards,
140             IEnumerable<string> excludeSpecsEscaped = null,
141             ConcurrentDictionary<string, ImmutableArray<string>> entriesCache = null
142             )
143         {
144             ErrorUtilities.VerifyThrowInternalLength(filespecEscaped, "filespecEscaped");
145 
146             if (excludeSpecsEscaped == null)
147             {
148                 excludeSpecsEscaped = Enumerable.Empty<string>();
149             }
150 
151             string[] fileList;
152 
153             if (!FilespecHasWildcards(filespecEscaped) ||
154                 FilespecMatchesLazyWildcard(filespecEscaped, forceEvaluateWildCards))
155             {
156                 // Just return the original string.
157                 fileList = new string[] { returnEscaped ? filespecEscaped : EscapingUtilities.UnescapeAll(filespecEscaped) };
158             }
159             else
160             {
161                 if (Traits.Instance.LogExpandedWildcards)
162                 {
163                     ErrorUtilities.DebugTraceMessage("Expanding wildcard for file spec {0}", filespecEscaped);
164                 }
165 
166                 // Unescape before handing it to the filesystem.
167                 var directoryUnescaped = EscapingUtilities.UnescapeAll(directoryEscaped);
168                 var filespecUnescaped = EscapingUtilities.UnescapeAll(filespecEscaped);
169                 var excludeSpecsUnescaped = excludeSpecsEscaped.Where(IsValidExclude).Select(EscapingUtilities.UnescapeAll).ToList();
170 
171                 // Get the list of actual files which match the filespec.  Put
172                 // the list into a string array.  If the filespec started out
173                 // as a relative path, we will get back a bunch of relative paths.
174                 // If the filespec started out as an absolute path, we will get
175                 // back a bunch of absolute paths.
176                 fileList = FileMatcher.GetFiles(directoryUnescaped, filespecUnescaped, excludeSpecsUnescaped, entriesCache);
177 
178                 ErrorUtilities.VerifyThrow(fileList != null, "We must have a list of files here, even if it's empty.");
179 
180                 // Before actually returning the file list, we sort them alphabetically.  This
181                 // provides a certain amount of extra determinism and reproducability.  That is,
182                 // we're sure that the build will behave in exactly the same way every time,
183                 // and on every machine.
184                 Array.Sort(fileList, StringComparer.OrdinalIgnoreCase);
185 
186                 if (returnEscaped)
187                 {
188                     // We must now go back and make sure all special characters are escaped because we always
189                     // store data in the engine in escaped form so it doesn't interfere with our parsing.
190                     // Note that this means that characters that were not escaped in the original filespec
191                     // may now be escaped, but that's not easy to avoid.
192                     for (int i = 0; i < fileList.Length; i++)
193                     {
194                         fileList[i] = EscapingUtilities.Escape(fileList[i]);
195                     }
196                 }
197             }
198 
199             return fileList;
200         }
201 
FilespecMatchesLazyWildcard(string filespecEscaped, bool forceEvaluateWildCards)202         private static bool FilespecMatchesLazyWildcard(string filespecEscaped, bool forceEvaluateWildCards)
203         {
204             return Traits.Instance.UseLazyWildCardEvaluation && !forceEvaluateWildCards && MatchesLazyWildcard(filespecEscaped);
205         }
206 
IsValidExclude(string exclude)207         private static bool IsValidExclude(string exclude)
208         {
209             // TODO: assumption on legal path characters: https://github.com/Microsoft/msbuild/issues/781
210             // Excludes that have both wildcards and non escaped wildcards will never be matched on Windows, because
211             // wildcard characters are invalid in Windows paths.
212             // Filtering these excludes early keeps the glob expander simpler. Otherwise unescaping logic would reach all the way down to
213             // filespec parsing (parse escaped string (to correctly ignore escaped wildcards) and then
214             // unescape the path fragments to unfold potentially escaped wildcard chars)
215             var hasBothWildcardsAndEscapedWildcards = FileMatcher.HasWildcards(exclude) && EscapingUtilities.ContainsEscapedWildcards(exclude);
216             return !hasBothWildcardsAndEscapedWildcards;
217         }
218 
PopulateRegexFromEnvironment()219         private static List<Regex> PopulateRegexFromEnvironment()
220         {
221             string wildCards = Environment.GetEnvironmentVariable("MsBuildSkipEagerWildCardEvaluationRegexes");
222             if (string.IsNullOrEmpty(wildCards))
223             {
224                 return new List<Regex>(0);
225             }
226             else
227             {
228                 List<Regex> regexes = new List<Regex>();
229                 foreach (string regex in wildCards.Split(';'))
230                 {
231                     Regex item = new Regex(regex, RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnoreCase);
232                     // trigger a match first?
233                     item.IsMatch("foo");
234                     regexes.Add(item);
235                 }
236 
237                 return regexes;
238             }
239         }
240 
241         // TODO: assumption on file system case sensitivity: https://github.com/Microsoft/msbuild/issues/781
242         private static readonly Lazy<ConcurrentDictionary<string, bool>> _regexMatchCache = new Lazy<ConcurrentDictionary<string, bool>>(() => new ConcurrentDictionary<string, bool>(StringComparer.OrdinalIgnoreCase));
243 
MatchesLazyWildcard(string fileSpec)244         private static bool MatchesLazyWildcard(string fileSpec)
245         {
246             return _regexMatchCache.Value.GetOrAdd(fileSpec, file => s_lazyWildCardExpansionRegexes.Any(regex => regex.IsMatch(fileSpec)));
247         }
248 
249         /// Returns a Func that will return true IFF its argument matches any of the specified filespecs
250         /// Assumes filespec may be escaped, so it unescapes it
251         /// The returned function makes no escaping assumptions or escaping operations. Its callers should control escaping.
GetFileSpecMatchTester(IList<string> filespecsEscaped, string currentDirectory)252         internal static Func<string, bool> GetFileSpecMatchTester(IList<string> filespecsEscaped, string currentDirectory)
253         {
254             var matchers = filespecsEscaped
255                 .Select(fs => new Lazy<FileSpecMatcherTester>(() => FileSpecMatcherTester.Parse(currentDirectory, fs)))
256                 .ToList();
257 
258             return file => matchers.Any(m => m.Value.IsMatch(file));
259         }
260 
261         internal class IOCache
262         {
263             private readonly Lazy<ConcurrentDictionary<string, bool>> existenceCache = new Lazy<ConcurrentDictionary<string, bool>>(() => new ConcurrentDictionary<string, bool>(), true);
264 
DirectoryExists(string directory)265             public virtual bool DirectoryExists(string directory)
266             {
267                 return existenceCache.Value.GetOrAdd(directory, Directory.Exists);
268             }
269 
FileExists(string file)270             public virtual bool FileExists(string file)
271             {
272                 return existenceCache.Value.GetOrAdd(file, File.Exists);
273             }
274         }
275     }
276 }
277