1 // Copyright (c) Microsoft. All rights reserved.
2 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 
4 using System;
5 using System.Xml;
6 using System.Collections;
7 using System.Globalization;
8 using System.Text;
9 using System.Text.RegularExpressions;
10 
11 using Microsoft.Build.BuildEngine.Shared;
12 using System.Collections.Generic;
13 
14 namespace Microsoft.Build.BuildEngine
15 {
16     /// <summary>
17     /// This class is used by objects in the build engine that have the ability to execute themselves in batches, to partition the
18     /// items they consume into "buckets", based on the values of select item metadata.
19     /// </summary>
20     /// <remarks>
21     /// What batching does
22     ///
23     /// Batching partitions the items consumed by the batchable object into buckets, where each bucket
24     /// contains a set of items that have the same value set on all item metadata consumed by the object.
25     /// Metadata consumed may be unqualified, for example %(m), or qualified by the item list to which it
26     /// refers, for example %(a.m).
27     ///
28     /// If metadata is qualified, for example %(a.m), then this is considered distinct to metadata with the
29     /// same name on a different item type. For example, %(a.m) is distinct to %(b.m), and items of type �b�
30     /// are considered to always have a blank value for %(a.m). This means items of type �b� will only be
31     /// placed in buckets where %(a.m) is blank. However %(a.m) is equivalent to %(m) on items of type �a�.
32     ///
33     /// There is an extra ambiguity rule: every items consumed by the object must have an explicit value for
34     /// every piece of unqualified metadata. For example, if @(a), %(m), and %(a.n) are consumed, every item
35     /// of type �a� must have a value for the metadata �m� but need not all necessarily have a value for the
36     /// metadata �n�. This rule eliminates ambiguity about whether items that do not define values for an
37     /// unqualified metadata should go in all buckets, or just into buckets with a blank value for
38     /// that metadata.
39     ///
40     /// For example
41     ///
42     /// <ItemGroup>
43     /// <a Include='a1;a2'>
44     ///   <n>m0</n>
45     /// </a>
46     /// <a Include='a3'>
47     ///   <n>m1</n>
48     /// </a>
49     /// <b Include='b1'>
50     ///   <n>n0</n>
51     /// </b>
52     /// <b Include='b2;b3'>
53     ///   <n>n1</n>
54     /// </b>
55     /// <b Include='b4'/>
56     /// </ItemGroup>
57     ///
58     /// <Target Name="t" >
59     ///   <Message Text="a={@(a).%(a.n)} b={@(b).%(b.n)}" />
60     /// </Target>
61     ///
62     /// Will produce 5 buckets:
63     ///
64     /// a={a1;a2.m0} b={.}
65     /// a={a3.m1} b={.}
66     /// a={.} b={b1.n0}
67     /// a={.} b={b2;b3.n1}
68     /// a={.} b={b4.}
69     ///
70     /// </remarks>
71     internal static class BatchingEngine
72     {
73         #region Methods
74 
75         /// <summary>
76         /// Determines how many times the batchable object needs to be executed (each execution is termed a "batch"), and prepares
77         /// buckets of items to pass to the object in each batch.
78         /// </summary>
79         /// <returns>ArrayList containing ItemBucket objects, each one representing an execution batch.</returns>
PrepareBatchingBuckets( XmlNode parentNode, List<string> batchableObjectParameters, Lookup lookup )80         internal static ArrayList PrepareBatchingBuckets
81         (
82             XmlNode parentNode,
83             List<string> batchableObjectParameters,
84             Lookup lookup
85         )
86         {
87             return PrepareBatchingBuckets(parentNode, batchableObjectParameters, lookup, null);
88         }
89 
90         /// <summary>
91         /// Determines how many times the batchable object needs to be executed (each execution is termed a "batch"), and prepares
92         /// buckets of items to pass to the object in each batch.
93         /// </summary>
94         /// <param name="parentNode"></param>
95         /// <param name="batchableObjectParameters"></param>
96         /// <param name="lookup"></param>
97         /// <param name="implicitBatchableItemType">Any item type that can be considered an implicit input to this batchable object.
98         /// This is useful for items inside targets, where the item name is plainly an item type that's an "input" to the object.</param>
99         /// <returns>ArrayList containing ItemBucket objects, each one representing an execution batch.</returns>
PrepareBatchingBuckets( XmlNode parentNode, List<string> batchableObjectParameters, Lookup lookup, string implicitBatchableItemType )100         internal static ArrayList PrepareBatchingBuckets
101         (
102             XmlNode parentNode,
103             List<string> batchableObjectParameters,
104             Lookup lookup,
105             string implicitBatchableItemType
106         )
107         {
108             ErrorUtilities.VerifyThrow(parentNode != null, "Need the XML node that represents the batchable object.");
109             ErrorUtilities.VerifyThrow(batchableObjectParameters != null, "Need the parameters of the batchable object to determine if it can be batched.");
110             ErrorUtilities.VerifyThrow(lookup != null, "Need to specify the lookup.");
111 
112             ItemsAndMetadataPair pair = ExpressionShredder.GetReferencedItemNamesAndMetadata(batchableObjectParameters);
113 
114             // All the @(itemname) item list references in the tag, including transforms, etc.
115             // The keys in the hashtable are the item names, and the values are all String.Empty (not used).
116             Hashtable consumedItemReferences = pair.Items;
117 
118             // All the %(itemname.metadataname) references in the tag (not counting those embedded
119             // inside item transforms), and note that the itemname portion is optional.
120             // The keys in the returned hash table are the qualified metadata names (e.g. "EmbeddedResource.Culture"
121             // or just "Culture").  The values are MetadataReference structs, which simply split out the item
122             // name (possibly null) and the actual metadata name.
123             Dictionary<string, MetadataReference> consumedMetadataReferences = pair.Metadata;
124 
125             ArrayList buckets = null;
126             if (consumedMetadataReferences != null && consumedMetadataReferences.Count > 0)
127             {
128                 // Add any item types that we were explicitly told to assume.
129                 if (implicitBatchableItemType != null)
130                 {
131                     consumedItemReferences = Utilities.CreateTableIfNecessary(consumedItemReferences);
132                     consumedItemReferences[implicitBatchableItemType] = String.Empty;
133                 }
134 
135                 // This method goes through all the item list references and figures out which ones
136                 // will be participating in batching, and which ones won't.  We get back a hashtable
137                 // where the key is the item name that will be participating in batching.  The values
138                 // are all String.Empty (not used).  This method may return additional item names
139                 // that weren't represented in "consumedItemReferences"... this would happen if there
140                 // were qualified metadata references in the consumedMetadataReferences table, such as
141                 // %(EmbeddedResource.Culture).
142                 Hashtable itemListsToBeBatched = GetItemListsToBeBatched(parentNode, consumedMetadataReferences, consumedItemReferences, lookup);
143 
144                 // At this point, if there were any metadata references in the tag, but no item
145                 // references to batch on, we've got a problem because we can't figure out which
146                 // item lists the user wants us to batch.
147                 if (itemListsToBeBatched.Count == 0)
148                 {
149                     foreach (string unqualifiedMetadataName in consumedMetadataReferences.Keys)
150                     {
151                         // Of course, since this throws an exception, there's no way we're ever going
152                         // to really loop here... it's just that the foreach is the only way I can
153                         // figure out how to get data out of the hashtable without knowing any of the
154                         // keys!
155                         ProjectErrorUtilities.VerifyThrowInvalidProject(false,
156                             parentNode, "CannotReferenceItemMetadataWithoutItemName", unqualifiedMetadataName);
157                     }
158                 }
159                 else
160                 {
161                     // If the batchable object consumes item metadata as well as items to be batched,
162                     // we need to partition the items consumed by the object.
163                     buckets = BucketConsumedItems(parentNode, lookup, itemListsToBeBatched, consumedMetadataReferences);
164                 }
165             }
166 
167             // if the batchable object does not consume any item metadata or items, or if the item lists it consumes are all
168             // empty, then the object does not need to be batched
169             if ((buckets == null) || (buckets.Count == 0))
170             {
171                 // create a default bucket that references the project items and properties -- this way we always have a bucket
172                 buckets = new ArrayList(1);
173                 buckets.Add(new ItemBucket(null, null, lookup, buckets.Count));
174             }
175 
176             return buckets;
177         }
178 
179         /// <summary>
180         /// Of all the item lists that are referenced in this batchable object, which ones should we
181         /// batch on, and which ones should we just pass in wholesale to every invocation of the
182         /// target/task?
183         ///
184         /// Rule #1.  If the user has referenced any *qualified* item metadata such as %(EmbeddedResource.Culture),
185         /// then that item list "EmbeddedResource" will definitely get batched.
186         ///
187         /// Rule #2.  For all the unqualified item metadata such as %(Culture), we make sure that
188         /// every single item in every single item list being passed into the task contains a value
189         /// for that metadata.  If not, it's an error.  If so, we batch all of those item lists.
190         ///
191         /// All other item lists will not be batched, and instead will be passed in wholesale to all buckets.
192         /// </summary>
193         /// <returns>Hashtable containing the item names that should be batched.</returns>
GetItemListsToBeBatched( XmlNode parentNode, Dictionary<string, MetadataReference> consumedMetadataReferences, Hashtable consumedItemReferenceNames, Lookup lookup )194         private static Hashtable GetItemListsToBeBatched
195         (
196             XmlNode parentNode,
197             Dictionary<string, MetadataReference> consumedMetadataReferences,   // Key is [string] potentially qualified metadata name
198                                                     // Value is [struct MetadataReference]
199             Hashtable consumedItemReferenceNames,       // Key is [string] item name.
200                                                     // Value is always String.Empty (unused).
201             Lookup lookup
202         )
203         {
204             // The keys in this hashtable are the names of the items that we will batch on.
205             // The values are always String.Empty (not used).
206             Hashtable itemListsToBeBatched = new Hashtable(StringComparer.OrdinalIgnoreCase);
207 
208             // Loop through all the metadata references and find the ones that are qualified
209             // with an item name.
210             foreach (MetadataReference consumedMetadataReference in consumedMetadataReferences.Values)
211             {
212                 if (consumedMetadataReference.itemName != null)
213                 {
214                     // Rule #1.  Qualified metadata reference.
215                     // For metadata references that are qualified with an item name
216                     // (e.g., %(EmbeddedResource.Culture) ), we add that item name to the list of
217                     // consumed item names, even if the item name wasn't otherwise referenced via
218                     // @(...) syntax, and even if every item in the list doesn't necessary contain
219                     // a value for this metadata.  This is the special power that you get by qualifying
220                     // the metadata reference with an item name.
221                     itemListsToBeBatched[consumedMetadataReference.itemName] = String.Empty;
222 
223                     // Also add this qualified item to the consumed item references list, because
224                     // %(EmbeddedResource.Culture) effectively means that @(EmbeddedResource) is
225                     // being consumed, even though we may not see literally "@(EmbeddedResource)"
226                     // in the tag anywhere.  Adding it to this list allows us (down below in this
227                     // method) to check that every item in this list has a value for each
228                     // unqualified metadata reference.
229                     consumedItemReferenceNames = Utilities.CreateTableIfNecessary(consumedItemReferenceNames);
230                     consumedItemReferenceNames[consumedMetadataReference.itemName] = String.Empty;
231                 }
232             }
233 
234             // Loop through all the metadata references and find the ones that are unqualified.
235             foreach (MetadataReference consumedMetadataReference in consumedMetadataReferences.Values)
236             {
237                 if (consumedMetadataReference.itemName == null)
238                 {
239                     // Rule #2.  Unqualified metadata reference.
240                     // For metadata references that are unqualified, every single consumed item
241                     // must contain a value for that metadata.  If any item doesn't, it's an error
242                     // to use unqualified metadata.
243                     if (consumedItemReferenceNames != null)
244                     {
245                         foreach (string consumedItemName in consumedItemReferenceNames.Keys)
246                         {
247                             // Loop through all the items in the item list.
248                             BuildItemGroup items = lookup.GetItems(consumedItemName);
249 
250                             if (items != null)
251                             {
252                                 // Loop through all the items in the BuildItemGroup.
253                                 foreach (BuildItem item in items)
254                                 {
255                                     ProjectErrorUtilities.VerifyThrowInvalidProject(
256                                         item.HasMetadata(consumedMetadataReference.metadataName),
257                                         parentNode, "ItemDoesNotContainValueForUnqualifiedMetadata",
258                                         item.Include, consumedItemName, consumedMetadataReference.metadataName);
259                                 }
260                             }
261 
262                             // This item list passes the test of having every single item containing
263                             // a value for this metadata.  Therefore, add this item list to the batching list.
264                             // Also, to save doing lookup.GetItems again, put the items in the table as the value.
265                             itemListsToBeBatched[consumedItemName] = items;
266                         }
267                     }
268                 }
269             }
270 
271             return itemListsToBeBatched;
272         }
273 
274         /// <summary>
275         /// Partitions the items consumed by the batchable object into buckets, where each bucket contains a set of items that
276         /// have the same value set on all item metadata consumed by the object.
277         /// </summary>
278         /// <remarks>
279         /// PERF NOTE: Given n items and m batching metadata that produce l buckets, it is usually the case that n > l > m,
280         /// because a batchable object typically uses one or two item metadata to control batching, and only has a handful of
281         /// buckets. The number of buckets is typically only large if a batchable object is using single-item batching
282         /// (where l == n). Any algorithm devised for bucketing therefore, should try to minimize n and l in its complexity
283         /// equation. The algorithm below has a complexity of O(n*lg(l)*m/2) in its comparisons, and is effectively O(n) when
284         /// l is small, and O(n*lg(n)) in the worst case as l -> n. However, note that the comparison complexity is not the
285         /// same as the operational complexity for this algorithm. The operational complexity of this algorithm is actually
286         /// O(n*m + n*lg(l)*m/2 + n*l/2 + n + l), which is effectively O(n^2) in the worst case. The additional complexity comes
287         /// from the array and metadata operations that are performed. However, those operations are extremely cheap compared
288         /// to the comparison operations, which dominate the time spent in this method.
289         /// </remarks>
290         /// <returns>ArrayList containing ItemBucket objects (can be empty), each one representing an execution batch.</returns>
BucketConsumedItems( XmlNode parentNode, Lookup lookup, Hashtable itemListsToBeBatched, Dictionary<string, MetadataReference> consumedMetadataReferences )291         private static ArrayList BucketConsumedItems
292         (
293             XmlNode parentNode,
294             Lookup lookup,
295             Hashtable itemListsToBeBatched,
296             Dictionary<string, MetadataReference> consumedMetadataReferences
297         )
298         {
299             ErrorUtilities.VerifyThrow(itemListsToBeBatched.Count > 0, "Need item types consumed by the batchable object.");
300             ErrorUtilities.VerifyThrow(consumedMetadataReferences.Count > 0, "Need item metadata consumed by the batchable object.");
301 
302             ArrayList buckets = new ArrayList();
303 
304             // Get and iterate through the list of item names that we're supposed to batch on.
305             foreach (DictionaryEntry entry in itemListsToBeBatched)
306             {
307                 string itemName = (string)entry.Key;
308 
309                 // Use the previously-fetched items, if possible
310                 BuildItemGroup items;
311                 if (entry.Value is BuildItemGroup)
312                 {
313                     items = (BuildItemGroup)entry.Value;
314                 }
315                 else
316                 {
317                     items = lookup.GetItems(itemName);
318                 }
319 
320                 if (items != null)
321                 {
322                     foreach (BuildItem item in items)
323                     {
324                         // Get this item's values for all the metadata consumed by the batchable object.
325                         Dictionary<string, string> itemMetadataValues = GetItemMetadataValues(parentNode, item, consumedMetadataReferences);
326 
327                         // put the metadata into a dummy bucket we can use for searching
328                         ItemBucket dummyBucket = ItemBucket.GetDummyBucketForComparisons(itemMetadataValues);
329 
330                         // look through all previously created buckets to find a bucket whose items have the same values as
331                         // this item for all metadata consumed by the batchable object
332                         int matchingBucketIndex = buckets.BinarySearch(dummyBucket);
333 
334                         ItemBucket matchingBucket = (matchingBucketIndex >= 0)
335                             ? (ItemBucket)buckets[matchingBucketIndex]
336                             : null;
337 
338                         // If we didn't find a bucket that matches this item, create a new one, adding
339                         // this item to the bucket.
340                         if (null == matchingBucket)
341                         {
342                             matchingBucket = new ItemBucket(itemListsToBeBatched.Keys, itemMetadataValues, lookup, buckets.Count);
343 
344                             // make sure to put the new bucket into the appropriate location
345                             // in the sorted list as indicated by the binary search
346                             // NOTE: observe the ~ operator (bitwise complement) in front of
347                             // the index -- see MSDN for more information on the return value
348                             // from the ArrayList.BinarySearch() method
349                             buckets.Insert(~matchingBucketIndex, matchingBucket);
350                         }
351 
352                         // We already have a bucket for this type of item, so add this item to
353                         // the bucket.
354                         matchingBucket.AddItem(item);
355                     }
356                 }
357             }
358 
359             // Put the buckets back in the order in which they were discovered, so that the first
360             // item declared in the project file ends up in the first batch passed into the target/task.
361             ArrayList orderedBuckets = ArrayList.Repeat(null, buckets.Count);
362             foreach (ItemBucket bucket in buckets)
363             {
364                 orderedBuckets[bucket.BucketSequenceNumber] = bucket;
365             }
366             return orderedBuckets;
367         }
368 
369         /// <summary>
370         /// Gets the values of the specified metadata for the given item.
371         /// The keys in the dictionary returned may be qualified and/or unqualified, exactly
372         /// as they are found in the metadata reference.
373         /// For example if %(x) is found, the key is "x", if %(z.x) is found, the key is "z.x".
374         /// This dictionary in each bucket is used by Expander to expand exactly the same metadata references, so
375         /// %(x) is expanded using the key "x", and %(z.x) is expanded using the key "z.x".
376         /// </summary>
377         /// <returns>the metadata values</returns>
GetItemMetadataValues( XmlNode parentNode, BuildItem item, Dictionary<string, MetadataReference> consumedMetadataReferences )378         private static Dictionary<string, string> GetItemMetadataValues
379         (
380             XmlNode parentNode,
381             BuildItem item,
382             Dictionary<string, MetadataReference> consumedMetadataReferences
383         )
384         {
385             Dictionary<string, string> itemMetadataValues = new Dictionary<string, string>(consumedMetadataReferences.Count, StringComparer.OrdinalIgnoreCase);
386 
387             foreach (KeyValuePair<string, MetadataReference> consumedMetadataReference in consumedMetadataReferences)
388             {
389                 string metadataQualifiedName = consumedMetadataReference.Key;
390                 string metadataItemName = consumedMetadataReference.Value.itemName;
391                 string metadataName = consumedMetadataReference.Value.metadataName;
392 
393                 if  (
394                         (metadataItemName != null) &&
395                         (0 != String.Compare(item.Name, metadataItemName, StringComparison.OrdinalIgnoreCase))
396                     )
397                 {
398                     itemMetadataValues[metadataQualifiedName] = String.Empty;
399                 }
400                 else
401                 {
402                     try
403                     {
404                         itemMetadataValues[metadataQualifiedName] = item.GetEvaluatedMetadataEscaped(metadataName);
405                     }
406                     catch (InvalidOperationException e)
407                     {
408                         ProjectErrorUtilities.VerifyThrowInvalidProject(false, parentNode,
409                             "CannotEvaluateItemMetadata", metadataName, e.Message);
410                     }
411                 }
412             }
413 
414             return itemMetadataValues;
415         }
416 
417 
418         #endregion
419     }
420 }
421