1 // Copyright (c) Microsoft. All rights reserved. 2 // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 4 using System; 5 using System.Collections.Concurrent; 6 using System.Collections.Generic; 7 using System.Collections.Immutable; 8 using System.Diagnostics; 9 using System.Text; 10 using System.IO; 11 using System.Linq; 12 using Microsoft.Build.Shared; 13 using System.Text.RegularExpressions; 14 using Microsoft.Build.Utilities; 15 using Microsoft.Build.Shared.EscapingStringExtensions; 16 17 namespace Microsoft.Build.Internal 18 { 19 internal class EngineFileUtilities 20 { 21 // Regexes for wildcard filespecs that should not get expanded 22 // By default all wildcards are expanded. 23 private static List<Regex> s_lazyWildCardExpansionRegexes; 24 EngineFileUtilities()25 static EngineFileUtilities() 26 { 27 if (Traits.Instance.UseLazyWildCardEvaluation) 28 { 29 CaptureLazyWildcardRegexes(); 30 } 31 } 32 33 // used by test to reset regexes CaptureLazyWildcardRegexes()34 internal static void CaptureLazyWildcardRegexes() 35 { 36 s_lazyWildCardExpansionRegexes = PopulateRegexFromEnvironment(); 37 } 38 39 40 /// <summary> 41 /// Used for the purposes of evaluating an item specification. Given a filespec that may include wildcard characters * and 42 /// ?, we translate it into an actual list of files. If the input filespec doesn't contain any wildcard characters, and it 43 /// doesn't appear to point to an actual file on disk, then we just give back the input string as an array of length one, 44 /// assuming that it wasn't really intended to be a filename (as items are not required to necessarily represent files). 45 /// Any wildcards passed in that are unescaped will be treated as real wildcards. 46 /// The "include" of items passed back from the filesystem will be returned canonically escaped. 47 /// The ordering of the list returned is deterministic (it is sorted). 48 /// Will never throw IO exceptions. If path is invalid, just returns filespec verbatim. 49 /// </summary> 50 /// <param name="directoryEscaped">The directory to evaluate, escaped.</param> 51 /// <param name="filespecEscaped">The filespec to evaluate, escaped.</param> 52 /// <param name="forceEvaluate">Whether to force file glob expansion when eager expansion is turned off</param> 53 /// <returns>Array of file paths, unescaped.</returns> GetFileListUnescaped( string directoryEscaped, string filespecEscaped, bool forceEvaluate = false )54 internal static string[] GetFileListUnescaped 55 ( 56 string directoryEscaped, 57 string filespecEscaped, 58 bool forceEvaluate = false 59 ) 60 61 { 62 return GetFileList(directoryEscaped, filespecEscaped, false /* returnEscaped */, forceEvaluate); 63 } 64 65 /// <summary> 66 /// Used for the purposes of evaluating an item specification. Given a filespec that may include wildcard characters * and 67 /// ?, we translate it into an actual list of files. If the input filespec doesn't contain any wildcard characters, and it 68 /// doesn't appear to point to an actual file on disk, then we just give back the input string as an array of length one, 69 /// assuming that it wasn't really intended to be a filename (as items are not required to necessarily represent files). 70 /// Any wildcards passed in that are unescaped will be treated as real wildcards. 71 /// The "include" of items passed back from the filesystem will be returned canonically escaped. 72 /// The ordering of the list returned is deterministic (it is sorted). 73 /// Will never throw IO exceptions. If path is invalid, just returns filespec verbatim. 74 /// </summary> 75 /// <param name="directoryEscaped">The directory to evaluate, escaped.</param> 76 /// <param name="filespecEscaped">The filespec to evaluate, escaped.</param> 77 /// <param name="excludeSpecsEscaped">Filespecs to exclude, escaped.</param> 78 /// <param name="forceEvaluate">Whether to force file glob expansion when eager expansion is turned off</param> 79 /// <param name="entriesCache">Cache used for caching IO operation results</param> 80 /// <returns>Array of file paths, escaped.</returns> GetFileListEscaped( string directoryEscaped, string filespecEscaped, IEnumerable<string> excludeSpecsEscaped = null, bool forceEvaluate = false, ConcurrentDictionary<string, ImmutableArray<string>> entriesCache = null )81 internal static string[] GetFileListEscaped 82 ( 83 string directoryEscaped, 84 string filespecEscaped, 85 IEnumerable<string> excludeSpecsEscaped = null, 86 bool forceEvaluate = false, 87 ConcurrentDictionary<string, ImmutableArray<string>> entriesCache = null 88 ) 89 { 90 return GetFileList(directoryEscaped, filespecEscaped, true /* returnEscaped */, forceEvaluate, excludeSpecsEscaped, entriesCache); 91 } 92 FilespecHasWildcards(string filespecEscaped)93 internal static bool FilespecHasWildcards(string filespecEscaped) 94 { 95 bool containsEscapedWildcards = EscapingUtilities.ContainsEscapedWildcards(filespecEscaped); 96 bool containsRealWildcards = FileMatcher.HasWildcards(filespecEscaped); 97 98 if (containsEscapedWildcards && containsRealWildcards) 99 { 100 // Umm, this makes no sense. The item's Include has both escaped wildcards and 101 // real wildcards. What does he want us to do? Go to the file system and find 102 // files that literally have '*' in their filename? Well, that's not going to 103 // happen because '*' is an illegal character to have in a filename. 104 105 return false; 106 } 107 else if (!containsEscapedWildcards && containsRealWildcards) 108 { 109 return true; 110 } 111 else 112 { 113 return false; 114 } 115 } 116 117 /// <summary> 118 /// Used for the purposes of evaluating an item specification. Given a filespec that may include wildcard characters * and 119 /// ?, we translate it into an actual list of files. If the input filespec doesn't contain any wildcard characters, and it 120 /// doesn't appear to point to an actual file on disk, then we just give back the input string as an array of length one, 121 /// assuming that it wasn't really intended to be a filename (as items are not required to necessarily represent files). 122 /// Any wildcards passed in that are unescaped will be treated as real wildcards. 123 /// The "include" of items passed back from the filesystem will be returned canonically escaped. 124 /// The ordering of the list returned is deterministic (it is sorted). 125 /// Will never throw IO exceptions: if there is no match, returns the input verbatim. 126 /// </summary> 127 /// <param name="directoryEscaped">The directory to evaluate, escaped.</param> 128 /// <param name="filespecEscaped">The filespec to evaluate, escaped.</param> 129 /// <param name="returnEscaped"><code>true</code> to return escaped specs.</param> 130 /// <param name="forceEvaluateWildCards">Whether to force file glob expansion when eager expansion is turned off</param> 131 /// <param name="excludeSpecsEscaped">The exclude specification, escaped.</param> 132 /// <param name="entriesCache">Cache used for caching IO operation results</param> 133 /// <returns>Array of file paths.</returns> GetFileList( string directoryEscaped, string filespecEscaped, bool returnEscaped, bool forceEvaluateWildCards, IEnumerable<string> excludeSpecsEscaped = null, ConcurrentDictionary<string, ImmutableArray<string>> entriesCache = null )134 private static string[] GetFileList 135 ( 136 string directoryEscaped, 137 string filespecEscaped, 138 bool returnEscaped, 139 bool forceEvaluateWildCards, 140 IEnumerable<string> excludeSpecsEscaped = null, 141 ConcurrentDictionary<string, ImmutableArray<string>> entriesCache = null 142 ) 143 { 144 ErrorUtilities.VerifyThrowInternalLength(filespecEscaped, "filespecEscaped"); 145 146 if (excludeSpecsEscaped == null) 147 { 148 excludeSpecsEscaped = Enumerable.Empty<string>(); 149 } 150 151 string[] fileList; 152 153 if (!FilespecHasWildcards(filespecEscaped) || 154 FilespecMatchesLazyWildcard(filespecEscaped, forceEvaluateWildCards)) 155 { 156 // Just return the original string. 157 fileList = new string[] { returnEscaped ? filespecEscaped : EscapingUtilities.UnescapeAll(filespecEscaped) }; 158 } 159 else 160 { 161 if (Traits.Instance.LogExpandedWildcards) 162 { 163 ErrorUtilities.DebugTraceMessage("Expanding wildcard for file spec {0}", filespecEscaped); 164 } 165 166 // Unescape before handing it to the filesystem. 167 var directoryUnescaped = EscapingUtilities.UnescapeAll(directoryEscaped); 168 var filespecUnescaped = EscapingUtilities.UnescapeAll(filespecEscaped); 169 var excludeSpecsUnescaped = excludeSpecsEscaped.Where(IsValidExclude).Select(EscapingUtilities.UnescapeAll).ToList(); 170 171 // Get the list of actual files which match the filespec. Put 172 // the list into a string array. If the filespec started out 173 // as a relative path, we will get back a bunch of relative paths. 174 // If the filespec started out as an absolute path, we will get 175 // back a bunch of absolute paths. 176 fileList = FileMatcher.GetFiles(directoryUnescaped, filespecUnescaped, excludeSpecsUnescaped, entriesCache); 177 178 ErrorUtilities.VerifyThrow(fileList != null, "We must have a list of files here, even if it's empty."); 179 180 // Before actually returning the file list, we sort them alphabetically. This 181 // provides a certain amount of extra determinism and reproducability. That is, 182 // we're sure that the build will behave in exactly the same way every time, 183 // and on every machine. 184 Array.Sort(fileList, StringComparer.OrdinalIgnoreCase); 185 186 if (returnEscaped) 187 { 188 // We must now go back and make sure all special characters are escaped because we always 189 // store data in the engine in escaped form so it doesn't interfere with our parsing. 190 // Note that this means that characters that were not escaped in the original filespec 191 // may now be escaped, but that's not easy to avoid. 192 for (int i = 0; i < fileList.Length; i++) 193 { 194 fileList[i] = EscapingUtilities.Escape(fileList[i]); 195 } 196 } 197 } 198 199 return fileList; 200 } 201 FilespecMatchesLazyWildcard(string filespecEscaped, bool forceEvaluateWildCards)202 private static bool FilespecMatchesLazyWildcard(string filespecEscaped, bool forceEvaluateWildCards) 203 { 204 return Traits.Instance.UseLazyWildCardEvaluation && !forceEvaluateWildCards && MatchesLazyWildcard(filespecEscaped); 205 } 206 IsValidExclude(string exclude)207 private static bool IsValidExclude(string exclude) 208 { 209 // TODO: assumption on legal path characters: https://github.com/Microsoft/msbuild/issues/781 210 // Excludes that have both wildcards and non escaped wildcards will never be matched on Windows, because 211 // wildcard characters are invalid in Windows paths. 212 // Filtering these excludes early keeps the glob expander simpler. Otherwise unescaping logic would reach all the way down to 213 // filespec parsing (parse escaped string (to correctly ignore escaped wildcards) and then 214 // unescape the path fragments to unfold potentially escaped wildcard chars) 215 var hasBothWildcardsAndEscapedWildcards = FileMatcher.HasWildcards(exclude) && EscapingUtilities.ContainsEscapedWildcards(exclude); 216 return !hasBothWildcardsAndEscapedWildcards; 217 } 218 PopulateRegexFromEnvironment()219 private static List<Regex> PopulateRegexFromEnvironment() 220 { 221 string wildCards = Environment.GetEnvironmentVariable("MsBuildSkipEagerWildCardEvaluationRegexes"); 222 if (string.IsNullOrEmpty(wildCards)) 223 { 224 return new List<Regex>(0); 225 } 226 else 227 { 228 List<Regex> regexes = new List<Regex>(); 229 foreach (string regex in wildCards.Split(';')) 230 { 231 Regex item = new Regex(regex, RegexOptions.Compiled | RegexOptions.Singleline | RegexOptions.IgnoreCase); 232 // trigger a match first? 233 item.IsMatch("foo"); 234 regexes.Add(item); 235 } 236 237 return regexes; 238 } 239 } 240 241 // TODO: assumption on file system case sensitivity: https://github.com/Microsoft/msbuild/issues/781 242 private static readonly Lazy<ConcurrentDictionary<string, bool>> _regexMatchCache = new Lazy<ConcurrentDictionary<string, bool>>(() => new ConcurrentDictionary<string, bool>(StringComparer.OrdinalIgnoreCase)); 243 MatchesLazyWildcard(string fileSpec)244 private static bool MatchesLazyWildcard(string fileSpec) 245 { 246 return _regexMatchCache.Value.GetOrAdd(fileSpec, file => s_lazyWildCardExpansionRegexes.Any(regex => regex.IsMatch(fileSpec))); 247 } 248 249 /// Returns a Func that will return true IFF its argument matches any of the specified filespecs 250 /// Assumes filespec may be escaped, so it unescapes it 251 /// The returned function makes no escaping assumptions or escaping operations. Its callers should control escaping. GetFileSpecMatchTester(IList<string> filespecsEscaped, string currentDirectory)252 internal static Func<string, bool> GetFileSpecMatchTester(IList<string> filespecsEscaped, string currentDirectory) 253 { 254 var matchers = filespecsEscaped 255 .Select(fs => new Lazy<FileSpecMatcherTester>(() => FileSpecMatcherTester.Parse(currentDirectory, fs))) 256 .ToList(); 257 258 return file => matchers.Any(m => m.Value.IsMatch(file)); 259 } 260 261 internal class IOCache 262 { 263 private readonly Lazy<ConcurrentDictionary<string, bool>> existenceCache = new Lazy<ConcurrentDictionary<string, bool>>(() => new ConcurrentDictionary<string, bool>(), true); 264 DirectoryExists(string directory)265 public virtual bool DirectoryExists(string directory) 266 { 267 return existenceCache.Value.GetOrAdd(directory, Directory.Exists); 268 } 269 FileExists(string file)270 public virtual bool FileExists(string file) 271 { 272 return existenceCache.Value.GetOrAdd(file, File.Exists); 273 } 274 } 275 } 276 } 277