1 //---------------------------------------------------------------------
2 // <copyright file="SubqueryTrackingVisitor.cs" company="Microsoft">
3 //      Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>
5 //
6 // @owner  Microsoft
7 // @backupOwner Microsoft
8 //---------------------------------------------------------------------
9 
10 using System;
11 using System.Collections.Generic;
12 using System.Data.Query.InternalTrees;
13 //using System.Diagnostics; // Please use PlanCompiler.Assert instead of Debug.Assert in this class...
14 
15 // It is fine to use Debug.Assert in cases where you assert an obvious thing that is supposed
16 // to prevent from simple mistakes during development (e.g. method argument validation
17 // in cases where it was you who created the variables or the variables had already been validated or
18 // in "else" clauses where due to code changes (e.g. adding a new value to an enum type) the default
19 // "else" block is chosen why the new condition should be treated separately). This kind of asserts are
20 // (can be) helpful when developing new code to avoid simple mistakes but have no or little value in
21 // the shipped product.
22 // PlanCompiler.Assert *MUST* be used to verify conditions in the trees. These would be assumptions
23 // about how the tree was built etc. - in these cases we probably want to throw an exception (this is
24 // what PlanCompiler.Assert does when the condition is not met) if either the assumption is not correct
25 // or the tree was built/rewritten not the way we thought it was.
26 // Use your judgment - if you rather remove an assert than ship it use Debug.Assert otherwise use
27 // PlanCompiler.Assert.
28 
29 namespace System.Data.Query.PlanCompiler
30 {
31     /// <summary>
32     /// The SubqueryTracking Visitor serves as a base class for the visitors that may turn
33     /// scalar subqueryies into outer-apply subqueries.
34     /// </summary>
35     internal abstract class SubqueryTrackingVisitor : BasicOpVisitorOfNode
36     {
37         #region Private State
38         protected readonly PlanCompiler m_compilerState;
39         protected Command m_command { get { return m_compilerState.Command; } }
40 
41         // nested subquery tracking
42         protected readonly Stack<Node> m_ancestors = new Stack<Node>();
43         private readonly Dictionary<Node, List<Node>> m_nodeSubqueries = new Dictionary<Node, List<Node>>();
44         #endregion
45 
46         #region Constructor
SubqueryTrackingVisitor(PlanCompiler planCompilerState)47         protected SubqueryTrackingVisitor(PlanCompiler planCompilerState)
48         {
49             this.m_compilerState = planCompilerState;
50         }
51         #endregion
52 
53         #region Subquery Handling
54         /// <summary>
55         /// Adds a subquery to the list of subqueries for the relOpNode
56         /// </summary>
57         /// <param name="relOpNode">the RelOp node</param>
58         /// <param name="subquery">the subquery</param>
AddSubqueryToRelOpNode(Node relOpNode, Node subquery)59         protected void AddSubqueryToRelOpNode(Node relOpNode, Node subquery)
60         {
61             List<Node> nestedSubqueries;
62 
63             // Create an entry in the map if there isn't one already
64             if (!m_nodeSubqueries.TryGetValue(relOpNode, out nestedSubqueries))
65             {
66                 nestedSubqueries = new List<Node>();
67                 m_nodeSubqueries[relOpNode] = nestedSubqueries;
68             }
69             // add this subquery to the list of currently tracked subqueries
70             nestedSubqueries.Add(subquery);
71         }
72 
73         /// <summary>
74         /// Add a subquery to the "parent" relop node
75         /// </summary>
76         /// <param name="outputVar">the output var to be used - at the current location - in lieu of the subquery</param>
77         /// <param name="subquery">the subquery to move</param>
78         /// <returns>a var ref node for the var returned from the subquery</returns>
AddSubqueryToParentRelOp(Var outputVar, Node subquery)79         protected Node AddSubqueryToParentRelOp(Var outputVar, Node subquery)
80         {
81             Node ancestor = FindRelOpAncestor();
82             PlanCompiler.Assert(ancestor != null, "no ancestors found?");
83             AddSubqueryToRelOpNode(ancestor, subquery);
84 
85             subquery = m_command.CreateNode(m_command.CreateVarRefOp(outputVar));
86             return subquery;
87         }
88 
89         /// <summary>
90         /// Find the first RelOp node that is in my ancestral path.
91         /// If I see a PhysicalOp, then I don't have a RelOp parent
92         /// </summary>
93         /// <returns>the first RelOp node</returns>
FindRelOpAncestor()94         protected Node FindRelOpAncestor()
95         {
96             foreach (Node n in m_ancestors)
97             {
98                 if (n.Op.IsRelOp)
99                 {
100                     return n;
101                 }
102                 else if (n.Op.IsPhysicalOp)
103                 {
104                     return null;
105                 }
106             }
107             return null;
108         }
109         #endregion
110 
111         #region Visitor Helpers
112 
113         /// <summary>
114         /// Extends the base class implementation of VisitChildren.
115         /// Wraps the call to visitchildren() by first adding the current node
116         /// to the stack of "ancestors", and then popping back the node at the end
117         /// </summary>
118         /// <param name="n">Current node</param>
VisitChildren(Node n)119         protected override void VisitChildren(Node n)
120         {
121             // Push the current node onto the stack
122             m_ancestors.Push(n);
123 
124             for (int i = 0; i < n.Children.Count; i++)
125             {
126                 n.Children[i] = VisitNode(n.Children[i]);
127             }
128 
129             m_ancestors.Pop();
130         }
131 
132         #endregion
133 
134         #region Visitor Methods
135 
136         #region RelOps
137 
138         /// <summary>
139         /// Augments a node with a number of OuterApply's - one for each subquery
140         /// If S1, S2, ... are the list of subqueries for the node, and D is the
141         /// original (driver) input, we convert D into
142         ///    OuterApply(OuterApply(D, S1), S2), ...
143         /// </summary>
144         /// <param name="input">the input (driver) node</param>
145         /// <param name="subqueries">List of subqueries</param>
146         /// <param name="inputFirst">should the input node be first in the apply chain, or the last?</param>
147         /// <returns>The resulting node tree</returns>
AugmentWithSubqueries(Node input, List<Node> subqueries, bool inputFirst)148         private Node AugmentWithSubqueries(Node input, List<Node> subqueries, bool inputFirst)
149         {
150             Node newNode;
151             int subqueriesStartPos;
152 
153             if (inputFirst)
154             {
155                 newNode = input;
156                 subqueriesStartPos = 0;
157             }
158             else
159             {
160                 newNode = subqueries[0];
161                 subqueriesStartPos = 1;
162             }
163             for (int i = subqueriesStartPos; i < subqueries.Count; i++)
164             {
165                 OuterApplyOp op = m_command.CreateOuterApplyOp();
166                 newNode = m_command.CreateNode(op, newNode, subqueries[i]);
167             }
168             if (!inputFirst)
169             {
170                 // The driver node uses a cross apply to ensure that no results are produced
171                 // for an empty driver.
172                 newNode = m_command.CreateNode(m_command.CreateCrossApplyOp(), newNode, input);
173             }
174 
175             // We may need to perform join elimination
176             m_compilerState.MarkPhaseAsNeeded(PlanCompilerPhase.JoinElimination);
177             return newNode;
178         }
179 
180         /// <summary>
181         /// Default processing for RelOps.
182         /// - First, we mark the current node as its own ancestor (so that any
183         ///   subqueries that we detect internally will be added to this node's list)
184         /// - then, visit each child
185         /// - finally, accumulate all nested subqueries.
186         /// - if the current RelOp has only one input, then add the nested subqueries via
187         ///   Outer apply nodes to this input.
188         ///
189         /// The interesting RelOps are
190         ///   Project, Filter, GroupBy, Sort,
191         /// Should we break this out into separate functions instead?
192         /// </summary>
193         /// <param name="op">Current RelOp</param>
194         /// <param name="n">Node to process</param>
195         /// <returns>Current subtree</returns>
VisitRelOpDefault(RelOp op, Node n)196         protected override Node VisitRelOpDefault(RelOp op, Node n)
197         {
198             VisitChildren(n); // visit all my children first
199 
200             // Then identify all the subqueries that have shown up as part of my node
201             // Create Apply Nodes for each of these.
202             List<Node> nestedSubqueries;
203             if (m_nodeSubqueries.TryGetValue(n, out nestedSubqueries) && nestedSubqueries.Count > 0)
204             {
205                 // Validate - this must only apply to the following nodes
206                 PlanCompiler.Assert(
207                     n.Op.OpType == OpType.Project || n.Op.OpType == OpType.Filter ||
208                     n.Op.OpType == OpType.GroupBy || n.Op.OpType == OpType.GroupByInto,
209                     "VisitRelOpDefault: Unexpected op?" + n.Op.OpType);
210 
211                 Node newInputNode = AugmentWithSubqueries(n.Child0, nestedSubqueries, true);
212                 // Now make this the new input child
213                 n.Child0 = newInputNode;
214             }
215 
216             return n;
217         }
218 
219         /// <summary>
220         /// Processing for all JoinOps
221         /// </summary>
222         /// <param name="op">JoinOp</param>
223         /// <param name="n">Current subtree</param>
224         /// <returns>Whether the node was modified</returns>
ProcessJoinOp(JoinBaseOp op, Node n)225         protected bool ProcessJoinOp(JoinBaseOp op, Node n)
226         {
227             VisitChildren(n); // visit all my children first
228 
229             // then check to see if we have any nested subqueries. This can only
230             // occur in the join condition.
231             // What we'll do in this case is to convert the join condition - "p" into
232             //    p -> Exists(Filter(SingleRowTableOp, p))
233             // We will then move the subqueries into an outerApply on the SingleRowTable
234             List<Node> nestedSubqueries;
235             if (!m_nodeSubqueries.TryGetValue(n, out nestedSubqueries))
236             {
237                 return false;
238             }
239 
240             PlanCompiler.Assert(n.Op.OpType == OpType.InnerJoin ||
241                 n.Op.OpType == OpType.LeftOuterJoin ||
242                 n.Op.OpType == OpType.FullOuterJoin, "unexpected op?");
243             PlanCompiler.Assert(n.HasChild2, "missing second child to JoinOp?");
244             Node joinCondition = n.Child2;
245 
246             Node inputNode = m_command.CreateNode(m_command.CreateSingleRowTableOp());
247             inputNode = AugmentWithSubqueries(inputNode, nestedSubqueries, true);
248             Node filterNode = m_command.CreateNode(m_command.CreateFilterOp(), inputNode, joinCondition);
249             Node existsNode = m_command.CreateNode(m_command.CreateExistsOp(), filterNode);
250 
251             n.Child2 = existsNode;
252             return true;
253         }
254 
255         /// <summary>
256         /// Visitor for UnnestOp. If the child has any subqueries, we need to convert this
257         /// into an
258         ///    OuterApply(S, Unnest)
259         /// unlike the other cases where the OuterApply will appear as the input of the node
260         /// </summary>
261         /// <param name="op">the unnestOp</param>
262         /// <param name="n">current subtree</param>
263         /// <returns>modified subtree</returns>
Visit(UnnestOp op, Node n)264         public override Node Visit(UnnestOp op, Node n)
265         {
266             VisitChildren(n); // visit all my children first
267 
268             List<Node> nestedSubqueries;
269             if (m_nodeSubqueries.TryGetValue(n, out nestedSubqueries))
270             {
271                 // We pass 'inputFirst = false' since the subqueries contribute to the driver in the unnest,
272                 // they are not generated by the unnest.
273                 Node newNode = AugmentWithSubqueries(n, nestedSubqueries, false /* inputFirst */);
274                 return newNode;
275             }
276             else
277             {
278                 return n;
279             }
280         }
281 
282         #endregion
283 
284         #endregion
285 
286     }
287 }
288