1 // Copyright (c) 2018 Google LLC.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "source/opt/loop_unroller.h"
16 
17 #include <limits>
18 #include <map>
19 #include <memory>
20 #include <unordered_map>
21 #include <utility>
22 #include <vector>
23 
24 #include "source/opt/ir_builder.h"
25 #include "source/opt/loop_utils.h"
26 
27 // Implements loop util unrolling functionality for fully and partially
28 // unrolling loops. Given a factor it will duplicate the loop that many times,
29 // appending each one to the end of the old loop and removing backedges, to
30 // create a new unrolled loop.
31 //
32 // 1 - User calls LoopUtils::FullyUnroll or LoopUtils::PartiallyUnroll with a
33 // loop they wish to unroll. LoopUtils::CanPerformUnroll is used to
34 // validate that a given loop can be unrolled. That method (along with the
35 // constructor of loop) checks that the IR is in the expected canonicalised
36 // format.
37 //
38 // 2 - The LoopUtils methods create a LoopUnrollerUtilsImpl object to actually
39 // perform the unrolling. This implements helper methods to copy the loop basic
40 // blocks and remap the ids of instructions used inside them.
41 //
42 // 3 - The core of LoopUnrollerUtilsImpl is the Unroll method, this method
43 // actually performs the loop duplication. It does this by creating a
44 // LoopUnrollState object and then copying the loop as given by the factor
45 // parameter. The LoopUnrollState object retains the state of the unroller
46 // between the loop body copies as each iteration needs information on the last
47 // to adjust the phi induction variable, adjust the OpLoopMerge instruction in
48 // the main loop header, and change the previous continue block to point to the
49 // new header and the new continue block to the main loop header.
50 //
51 // 4 - If the loop is to be fully unrolled then it is simply closed after step
52 // 3, with the OpLoopMerge being deleted, the backedge removed, and the
53 // condition blocks folded.
54 //
55 // 5 - If it is being partially unrolled: if the unrolling factor leaves the
56 // loop with an even number of bodies with respect to the number of loop
57 // iterations then step 3 is all that is needed. If it is uneven then we need to
58 // duplicate the loop completely and unroll the duplicated loop to cover the
59 // residual part and adjust the first loop to cover only the "even" part. For
60 // instance if you request an unroll factor of 3 on a loop with 10 iterations
61 // then copying the body three times would leave you with three bodies in the
62 // loop
63 // where the loop still iterates over each 4 times. So we make two loops one
64 // iterating once then a second loop of three iterating 3 times.
65 
66 namespace spvtools {
67 namespace opt {
68 namespace {
69 
70 // Loop control constant value for DontUnroll flag.
71 static const uint32_t kLoopControlDontUnrollIndex = 2;
72 
73 // Operand index of the loop control parameter of the OpLoopMerge.
74 static const uint32_t kLoopControlIndex = 2;
75 
76 // This utility class encapsulates some of the state we need to maintain between
77 // loop unrolls. Specifically it maintains key blocks and the induction variable
78 // in the current loop duplication step and the blocks from the previous one.
79 // This is because each step of the unroll needs to use data from both the
80 // preceding step and the original loop.
81 struct LoopUnrollState {
LoopUnrollStatespvtools::opt::__anone255fbf00111::LoopUnrollState82   LoopUnrollState()
83       : previous_phi_(nullptr),
84         previous_latch_block_(nullptr),
85         previous_condition_block_(nullptr),
86         new_phi(nullptr),
87         new_continue_block(nullptr),
88         new_condition_block(nullptr),
89         new_header_block(nullptr) {}
90 
91   // Initialize from the loop descriptor class.
LoopUnrollStatespvtools::opt::__anone255fbf00111::LoopUnrollState92   LoopUnrollState(Instruction* induction, BasicBlock* latch_block,
93                   BasicBlock* condition, std::vector<Instruction*>&& phis)
94       : previous_phi_(induction),
95         previous_latch_block_(latch_block),
96         previous_condition_block_(condition),
97         new_phi(nullptr),
98         new_continue_block(nullptr),
99         new_condition_block(nullptr),
100         new_header_block(nullptr) {
101     previous_phis_ = std::move(phis);
102   }
103 
104   // Swap the state so that the new nodes are now the previous nodes.
NextIterationStatespvtools::opt::__anone255fbf00111::LoopUnrollState105   void NextIterationState() {
106     previous_phi_ = new_phi;
107     previous_latch_block_ = new_latch_block;
108     previous_condition_block_ = new_condition_block;
109     previous_phis_ = std::move(new_phis_);
110 
111     // Clear new nodes.
112     new_phi = nullptr;
113     new_continue_block = nullptr;
114     new_condition_block = nullptr;
115     new_header_block = nullptr;
116     new_latch_block = nullptr;
117 
118     // Clear new block/instruction maps.
119     new_blocks.clear();
120     new_inst.clear();
121     ids_to_new_inst.clear();
122   }
123 
124   // The induction variable from the immediately preceding loop body.
125   Instruction* previous_phi_;
126 
127   // All the phi nodes from the previous loop iteration.
128   std::vector<Instruction*> previous_phis_;
129 
130   std::vector<Instruction*> new_phis_;
131 
132   // The previous latch block. The backedge will be removed from this and
133   // added to the new latch block.
134   BasicBlock* previous_latch_block_;
135 
136   // The previous condition block. This may be folded to flatten the loop.
137   BasicBlock* previous_condition_block_;
138 
139   // The new induction variable.
140   Instruction* new_phi;
141 
142   // The new continue block.
143   BasicBlock* new_continue_block;
144 
145   // The new condition block.
146   BasicBlock* new_condition_block;
147 
148   // The new header block.
149   BasicBlock* new_header_block;
150 
151   // The new latch block.
152   BasicBlock* new_latch_block;
153 
154   // A mapping of new block ids to the original blocks which they were copied
155   // from.
156   std::unordered_map<uint32_t, BasicBlock*> new_blocks;
157 
158   // A mapping of the original instruction ids to the instruction ids to their
159   // copies.
160   std::unordered_map<uint32_t, uint32_t> new_inst;
161 
162   std::unordered_map<uint32_t, Instruction*> ids_to_new_inst;
163 };
164 
165 // This class implements the actual unrolling. It uses a LoopUnrollState to
166 // maintain the state of the unrolling inbetween steps.
167 class LoopUnrollerUtilsImpl {
168  public:
169   using BasicBlockListTy = std::vector<std::unique_ptr<BasicBlock>>;
170 
LoopUnrollerUtilsImpl(IRContext * c,Function * function)171   LoopUnrollerUtilsImpl(IRContext* c, Function* function)
172       : context_(c),
173         function_(*function),
174         loop_condition_block_(nullptr),
175         loop_induction_variable_(nullptr),
176         number_of_loop_iterations_(0),
177         loop_step_value_(0),
178         loop_init_value_(0) {}
179 
180   // Unroll the |loop| by given |factor| by copying the whole body |factor|
181   // times. The resulting basicblock structure will remain a loop.
182   void PartiallyUnroll(Loop*, size_t factor);
183 
184   // If partially unrolling the |loop| would leave the loop with too many bodies
185   // for its number of iterations then this method should be used. This method
186   // will duplicate the |loop| completely, making the duplicated loop the
187   // successor of the original's merge block. The original loop will have its
188   // condition changed to loop over the residual part and the duplicate will be
189   // partially unrolled. The resulting structure will be two loops.
190   void PartiallyUnrollResidualFactor(Loop* loop, size_t factor);
191 
192   // Fully unroll the |loop| by copying the full body by the total number of
193   // loop iterations, folding all conditions, and removing the backedge from the
194   // continue block to the header.
195   void FullyUnroll(Loop* loop);
196 
197   // Get the ID of the variable in the |phi| paired with |label|.
198   uint32_t GetPhiDefID(const Instruction* phi, uint32_t label) const;
199 
200   // Close the loop by removing the OpLoopMerge from the |loop| header block and
201   // making the backedge point to the merge block.
202   void CloseUnrolledLoop(Loop* loop);
203 
204   // Remove the OpConditionalBranch instruction inside |conditional_block| used
205   // to branch to either exit or continue the loop and replace it with an
206   // unconditional OpBranch to block |new_target|.
207   void FoldConditionBlock(BasicBlock* condtion_block, uint32_t new_target);
208 
209   // Add all blocks_to_add_ to function_ at the |insert_point|.
210   void AddBlocksToFunction(const BasicBlock* insert_point);
211 
212   // Duplicates the |old_loop|, cloning each body and remaping the ids without
213   // removing instructions or changing relative structure. Result will be stored
214   // in |new_loop|.
215   void DuplicateLoop(Loop* old_loop, Loop* new_loop);
216 
GetLoopIterationCount() const217   inline size_t GetLoopIterationCount() const {
218     return number_of_loop_iterations_;
219   }
220 
221   // Extracts the initial state information from the |loop|.
222   void Init(Loop* loop);
223 
224   // Replace the uses of each induction variable outside the loop with the final
225   // value of the induction variable before the loop exit. To reflect the proper
226   // state of a fully unrolled loop.
227   void ReplaceInductionUseWithFinalValue(Loop* loop);
228 
229   // Remove all the instructions in the invalidated_instructions_ vector.
230   void RemoveDeadInstructions();
231 
232   // Replace any use of induction variables outwith the loop with the final
233   // value of the induction variable in the unrolled loop.
234   void ReplaceOutsideLoopUseWithFinalValue(Loop* loop);
235 
236   // Set the LoopControl operand of the OpLoopMerge instruction to be
237   // DontUnroll.
238   void MarkLoopControlAsDontUnroll(Loop* loop) const;
239 
240  private:
241   // Remap all the in |basic_block| to new IDs and keep the mapping of new ids
242   // to old
243   // ids. |loop| is used to identify special loop blocks (header, continue,
244   // ect).
245   void AssignNewResultIds(BasicBlock* basic_block);
246 
247   // Using the map built by AssignNewResultIds, replace the uses in |inst|
248   // by the id that the use maps to.
249   void RemapOperands(Instruction* inst);
250 
251   // Using the map built by AssignNewResultIds, for each instruction in
252   // |basic_block| use
253   // that map to substitute the IDs used by instructions (in the operands) with
254   // the new ids.
255   void RemapOperands(BasicBlock* basic_block);
256 
257   // Copy the whole body of the loop, all blocks dominated by the |loop| header
258   // and not dominated by the |loop| merge. The copied body will be linked to by
259   // the old |loop| continue block and the new body will link to the |loop|
260   // header via the new continue block. |eliminate_conditions| is used to decide
261   // whether or not to fold all the condition blocks other than the last one.
262   void CopyBody(Loop* loop, bool eliminate_conditions);
263 
264   // Copy a given |block_to_copy| in the |loop| and record the mapping of the
265   // old/new ids. |preserve_instructions| determines whether or not the method
266   // will modify (other than result_id) instructions which are copied.
267   void CopyBasicBlock(Loop* loop, const BasicBlock* block_to_copy,
268                       bool preserve_instructions);
269 
270   // The actual implementation of the unroll step. Unrolls |loop| by given
271   // |factor| by copying the body by |factor| times. Also propagates the
272   // induction variable value throughout the copies.
273   void Unroll(Loop* loop, size_t factor);
274 
275   // Fills the loop_blocks_inorder_ field with the ordered list of basic blocks
276   // as computed by the method ComputeLoopOrderedBlocks.
277   void ComputeLoopOrderedBlocks(Loop* loop);
278 
279   // Adds the blocks_to_add_ to both the |loop| and to the parent of |loop| if
280   // the parent exists.
281   void AddBlocksToLoop(Loop* loop) const;
282 
283   // After the partially unroll step the phi instructions in the header block
284   // will be in an illegal format. This function makes the phis legal by making
285   // the edge from the latch block come from the new latch block and the value
286   // to be the actual value of the phi at that point.
287   void LinkLastPhisToStart(Loop* loop) const;
288 
289   // A pointer to the IRContext. Used to add/remove instructions and for usedef
290   // chains.
291   IRContext* context_;
292 
293   // A reference the function the loop is within.
294   Function& function_;
295 
296   // A list of basic blocks to be added to the loop at the end of an unroll
297   // step.
298   BasicBlockListTy blocks_to_add_;
299 
300   // List of instructions which are now dead and can be removed.
301   std::vector<Instruction*> invalidated_instructions_;
302 
303   // Maintains the current state of the transform between calls to unroll.
304   LoopUnrollState state_;
305 
306   // An ordered list containing the loop basic blocks.
307   std::vector<BasicBlock*> loop_blocks_inorder_;
308 
309   // The block containing the condition check which contains a conditional
310   // branch to the merge and continue block.
311   BasicBlock* loop_condition_block_;
312 
313   // The induction variable of the loop.
314   Instruction* loop_induction_variable_;
315 
316   // Phis used in the loop need to be remapped to use the actual result values
317   // and then be remapped at the end.
318   std::vector<Instruction*> loop_phi_instructions_;
319 
320   // The number of loop iterations that the loop would preform pre-unroll.
321   size_t number_of_loop_iterations_;
322 
323   // The amount that the loop steps each iteration.
324   int64_t loop_step_value_;
325 
326   // The value the loop starts stepping from.
327   int64_t loop_init_value_;
328 };
329 
330 /*
331  * Static helper functions.
332  */
333 
334 // Retrieve the index of the OpPhi instruction |phi| which corresponds to the
335 // incoming |block| id.
GetPhiIndexFromLabel(const BasicBlock * block,const Instruction * phi)336 static uint32_t GetPhiIndexFromLabel(const BasicBlock* block,
337                                      const Instruction* phi) {
338   for (uint32_t i = 1; i < phi->NumInOperands(); i += 2) {
339     if (block->id() == phi->GetSingleWordInOperand(i)) {
340       return i;
341     }
342   }
343   assert(false && "Could not find operand in instruction.");
344   return 0;
345 }
346 
Init(Loop * loop)347 void LoopUnrollerUtilsImpl::Init(Loop* loop) {
348   loop_condition_block_ = loop->FindConditionBlock();
349 
350   // When we reinit the second loop during PartiallyUnrollResidualFactor we need
351   // to use the cached value from the duplicate step as the dominator tree
352   // basded solution, loop->FindConditionBlock, requires all the nodes to be
353   // connected up with the correct branches. They won't be at this point.
354   if (!loop_condition_block_) {
355     loop_condition_block_ = state_.new_condition_block;
356   }
357   assert(loop_condition_block_);
358 
359   loop_induction_variable_ = loop->FindConditionVariable(loop_condition_block_);
360   assert(loop_induction_variable_);
361 
362   bool found = loop->FindNumberOfIterations(
363       loop_induction_variable_, &*loop_condition_block_->ctail(),
364       &number_of_loop_iterations_, &loop_step_value_, &loop_init_value_);
365   (void)found;  // To silence unused variable warning on release builds.
366   assert(found);
367 
368   // Blocks are stored in an unordered set of ids in the loop class, we need to
369   // create the dominator ordered list.
370   ComputeLoopOrderedBlocks(loop);
371 }
372 
373 // This function is used to partially unroll the loop when the factor provided
374 // would normally lead to an illegal optimization. Instead of just unrolling the
375 // loop it creates two loops and unrolls one and adjusts the condition on the
376 // other. The end result being that the new loop pair iterates over the correct
377 // number of bodies.
PartiallyUnrollResidualFactor(Loop * loop,size_t factor)378 void LoopUnrollerUtilsImpl::PartiallyUnrollResidualFactor(Loop* loop,
379                                                           size_t factor) {
380   // TODO(1841): Handle id overflow.
381   std::unique_ptr<Instruction> new_label{new Instruction(
382       context_, SpvOp::SpvOpLabel, 0, context_->TakeNextId(), {})};
383   std::unique_ptr<BasicBlock> new_exit_bb{new BasicBlock(std::move(new_label))};
384 
385   // Save the id of the block before we move it.
386   uint32_t new_merge_id = new_exit_bb->id();
387 
388   // Add the block the list of blocks to add, we want this merge block to be
389   // right at the start of the new blocks.
390   blocks_to_add_.push_back(std::move(new_exit_bb));
391   BasicBlock* new_exit_bb_raw = blocks_to_add_[0].get();
392   Instruction& original_conditional_branch = *loop_condition_block_->tail();
393   // Duplicate the loop, providing access to the blocks of both loops.
394   // This is a naked new due to the VS2013 requirement of not having unique
395   // pointers in vectors, as it will be inserted into a vector with
396   // loop_descriptor.AddLoop.
397   std::unique_ptr<Loop> new_loop = MakeUnique<Loop>(*loop);
398 
399   // Clear the basic blocks of the new loop.
400   new_loop->ClearBlocks();
401 
402   DuplicateLoop(loop, new_loop.get());
403 
404   // Add the blocks to the function.
405   AddBlocksToFunction(loop->GetMergeBlock());
406   blocks_to_add_.clear();
407 
408   // Create a new merge block for the first loop.
409   InstructionBuilder builder{context_, new_exit_bb_raw};
410   // Make the first loop branch to the second.
411   builder.AddBranch(new_loop->GetHeaderBlock()->id());
412 
413   loop_condition_block_ = state_.new_condition_block;
414   loop_induction_variable_ = state_.new_phi;
415   // Unroll the new loop by the factor with the usual -1 to account for the
416   // existing block iteration.
417   Unroll(new_loop.get(), factor);
418 
419   LinkLastPhisToStart(new_loop.get());
420   AddBlocksToLoop(new_loop.get());
421 
422   // Add the new merge block to the back of the list of blocks to be added. It
423   // needs to be the last block added to maintain dominator order in the binary.
424   blocks_to_add_.push_back(
425       std::unique_ptr<BasicBlock>(new_loop->GetMergeBlock()));
426 
427   // Add the blocks to the function.
428   AddBlocksToFunction(loop->GetMergeBlock());
429 
430   // Reset the usedef analysis.
431   context_->InvalidateAnalysesExceptFor(
432       IRContext::Analysis::kAnalysisLoopAnalysis);
433   analysis::DefUseManager* def_use_manager = context_->get_def_use_mgr();
434 
435   // The loop condition.
436   Instruction* condition_check = def_use_manager->GetDef(
437       original_conditional_branch.GetSingleWordOperand(0));
438 
439   // This should have been checked by the LoopUtils::CanPerformUnroll function
440   // before entering this.
441   assert(loop->IsSupportedCondition(condition_check->opcode()));
442 
443   // We need to account for the initial body when calculating the remainder.
444   int64_t remainder = Loop::GetResidualConditionValue(
445       condition_check->opcode(), loop_init_value_, loop_step_value_,
446       number_of_loop_iterations_, factor);
447 
448   assert(remainder > std::numeric_limits<int32_t>::min() &&
449          remainder < std::numeric_limits<int32_t>::max());
450 
451   Instruction* new_constant = nullptr;
452 
453   // If the remainder is negative then we add a signed constant, otherwise just
454   // add an unsigned constant.
455   if (remainder < 0) {
456     new_constant = builder.GetSintConstant(static_cast<int32_t>(remainder));
457   } else {
458     new_constant = builder.GetUintConstant(static_cast<int32_t>(remainder));
459   }
460 
461   uint32_t constant_id = new_constant->result_id();
462 
463   // Update the condition check.
464   condition_check->SetInOperand(1, {constant_id});
465 
466   // Update the next phi node. The phi will have a constant value coming in from
467   // the preheader block. For the duplicated loop we need to update the constant
468   // to be the amount of iterations covered by the first loop and the incoming
469   // block to be the first loops new merge block.
470   std::vector<Instruction*> new_inductions;
471   new_loop->GetInductionVariables(new_inductions);
472 
473   std::vector<Instruction*> old_inductions;
474   loop->GetInductionVariables(old_inductions);
475   for (size_t index = 0; index < new_inductions.size(); ++index) {
476     Instruction* new_induction = new_inductions[index];
477     Instruction* old_induction = old_inductions[index];
478     // Get the index of the loop initalizer, the value coming in from the
479     // preheader.
480     uint32_t initalizer_index =
481         GetPhiIndexFromLabel(new_loop->GetPreHeaderBlock(), old_induction);
482 
483     // Replace the second loop initalizer with the phi from the first
484     new_induction->SetInOperand(initalizer_index - 1,
485                                 {old_induction->result_id()});
486     new_induction->SetInOperand(initalizer_index, {new_merge_id});
487 
488     // If the use of the first loop induction variable is outside of the loop
489     // then replace that use with the second loop induction variable.
490     uint32_t second_loop_induction = new_induction->result_id();
491     auto replace_use_outside_of_loop = [loop, second_loop_induction](
492                                            Instruction* user,
493                                            uint32_t operand_index) {
494       if (!loop->IsInsideLoop(user)) {
495         user->SetOperand(operand_index, {second_loop_induction});
496       }
497     };
498 
499     context_->get_def_use_mgr()->ForEachUse(old_induction,
500                                             replace_use_outside_of_loop);
501   }
502 
503   context_->InvalidateAnalysesExceptFor(
504       IRContext::Analysis::kAnalysisLoopAnalysis);
505 
506   context_->ReplaceAllUsesWith(loop->GetMergeBlock()->id(), new_merge_id);
507 
508   LoopDescriptor& loop_descriptor = *context_->GetLoopDescriptor(&function_);
509 
510   loop_descriptor.AddLoop(std::move(new_loop), loop->GetParent());
511 
512   RemoveDeadInstructions();
513 }
514 
515 // Mark this loop as DontUnroll as it will already be unrolled and it may not
516 // be safe to unroll a previously partially unrolled loop.
MarkLoopControlAsDontUnroll(Loop * loop) const517 void LoopUnrollerUtilsImpl::MarkLoopControlAsDontUnroll(Loop* loop) const {
518   Instruction* loop_merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
519   assert(loop_merge_inst &&
520          "Loop merge instruction could not be found after entering unroller "
521          "(should have exited before this)");
522   loop_merge_inst->SetInOperand(kLoopControlIndex,
523                                 {kLoopControlDontUnrollIndex});
524 }
525 
526 // Duplicate the |loop| body |factor| - 1 number of times while keeping the loop
527 // backedge intact. This will leave the loop with |factor| number of bodies
528 // after accounting for the initial body.
Unroll(Loop * loop,size_t factor)529 void LoopUnrollerUtilsImpl::Unroll(Loop* loop, size_t factor) {
530   // If we unroll a loop partially it will not be safe to unroll it further.
531   // This is due to the current method of calculating the number of loop
532   // iterations.
533   MarkLoopControlAsDontUnroll(loop);
534 
535   std::vector<Instruction*> inductions;
536   loop->GetInductionVariables(inductions);
537   state_ = LoopUnrollState{loop_induction_variable_, loop->GetLatchBlock(),
538                            loop_condition_block_, std::move(inductions)};
539   for (size_t i = 0; i < factor - 1; ++i) {
540     CopyBody(loop, true);
541   }
542 }
543 
RemoveDeadInstructions()544 void LoopUnrollerUtilsImpl::RemoveDeadInstructions() {
545   // Remove the dead instructions.
546   for (Instruction* inst : invalidated_instructions_) {
547     context_->KillInst(inst);
548   }
549 }
550 
ReplaceInductionUseWithFinalValue(Loop * loop)551 void LoopUnrollerUtilsImpl::ReplaceInductionUseWithFinalValue(Loop* loop) {
552   context_->InvalidateAnalysesExceptFor(
553       IRContext::Analysis::kAnalysisLoopAnalysis |
554       IRContext::Analysis::kAnalysisDefUse |
555       IRContext::Analysis::kAnalysisInstrToBlockMapping);
556 
557   std::vector<Instruction*> inductions;
558   loop->GetInductionVariables(inductions);
559 
560   for (size_t index = 0; index < inductions.size(); ++index) {
561     uint32_t trip_step_id = GetPhiDefID(state_.previous_phis_[index],
562                                         state_.previous_latch_block_->id());
563     context_->ReplaceAllUsesWith(inductions[index]->result_id(), trip_step_id);
564     invalidated_instructions_.push_back(inductions[index]);
565   }
566 }
567 
568 // Fully unroll the loop by partially unrolling it by the number of loop
569 // iterations minus one for the body already accounted for.
FullyUnroll(Loop * loop)570 void LoopUnrollerUtilsImpl::FullyUnroll(Loop* loop) {
571   // We unroll the loop by number of iterations in the loop.
572   Unroll(loop, number_of_loop_iterations_);
573 
574   // The first condition block is preserved until now so it can be copied.
575   FoldConditionBlock(loop_condition_block_, 1);
576 
577   // Delete the OpLoopMerge and remove the backedge to the header.
578   CloseUnrolledLoop(loop);
579 
580   // Mark the loop for later deletion. This allows us to preserve the loop
581   // iterators but still disregard dead loops.
582   loop->MarkLoopForRemoval();
583 
584   // If the loop has a parent add the new blocks to the parent.
585   if (loop->GetParent()) {
586     AddBlocksToLoop(loop->GetParent());
587   }
588 
589   // Add the blocks to the function.
590   AddBlocksToFunction(loop->GetMergeBlock());
591 
592   ReplaceInductionUseWithFinalValue(loop);
593 
594   RemoveDeadInstructions();
595   // Invalidate all analyses.
596   context_->InvalidateAnalysesExceptFor(
597       IRContext::Analysis::kAnalysisLoopAnalysis |
598       IRContext::Analysis::kAnalysisDefUse);
599 }
600 
601 // Copy a given basic block, give it a new result_id, and store the new block
602 // and the id mapping in the state. |preserve_instructions| is used to determine
603 // whether or not this function should edit instructions other than the
604 // |result_id|.
CopyBasicBlock(Loop * loop,const BasicBlock * itr,bool preserve_instructions)605 void LoopUnrollerUtilsImpl::CopyBasicBlock(Loop* loop, const BasicBlock* itr,
606                                            bool preserve_instructions) {
607   // Clone the block exactly, including the IDs.
608   BasicBlock* basic_block = itr->Clone(context_);
609   basic_block->SetParent(itr->GetParent());
610 
611   // Assign each result a new unique ID and keep a mapping of the old ids to
612   // the new ones.
613   AssignNewResultIds(basic_block);
614 
615   // If this is the continue block we are copying.
616   if (itr == loop->GetContinueBlock()) {
617     // Make the OpLoopMerge point to this block for the continue.
618     if (!preserve_instructions) {
619       Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
620       merge_inst->SetInOperand(1, {basic_block->id()});
621       context_->UpdateDefUse(merge_inst);
622     }
623 
624     state_.new_continue_block = basic_block;
625   }
626 
627   // If this is the header block we are copying.
628   if (itr == loop->GetHeaderBlock()) {
629     state_.new_header_block = basic_block;
630 
631     if (!preserve_instructions) {
632       // Remove the loop merge instruction if it exists.
633       Instruction* merge_inst = basic_block->GetLoopMergeInst();
634       if (merge_inst) invalidated_instructions_.push_back(merge_inst);
635     }
636   }
637 
638   // If this is the latch block being copied, record it in the state.
639   if (itr == loop->GetLatchBlock()) state_.new_latch_block = basic_block;
640 
641   // If this is the condition block we are copying.
642   if (itr == loop_condition_block_) {
643     state_.new_condition_block = basic_block;
644   }
645 
646   // Add this block to the list of blocks to add to the function at the end of
647   // the unrolling process.
648   blocks_to_add_.push_back(std::unique_ptr<BasicBlock>(basic_block));
649 
650   // Keep tracking the old block via a map.
651   state_.new_blocks[itr->id()] = basic_block;
652 }
653 
CopyBody(Loop * loop,bool eliminate_conditions)654 void LoopUnrollerUtilsImpl::CopyBody(Loop* loop, bool eliminate_conditions) {
655   // Copy each basic block in the loop, give them new ids, and save state
656   // information.
657   for (const BasicBlock* itr : loop_blocks_inorder_) {
658     CopyBasicBlock(loop, itr, false);
659   }
660 
661   // Set the previous latch block to point to the new header.
662   Instruction* latch_branch = state_.previous_latch_block_->terminator();
663   latch_branch->SetInOperand(0, {state_.new_header_block->id()});
664   context_->UpdateDefUse(latch_branch);
665 
666   // As the algorithm copies the original loop blocks exactly, the tail of the
667   // latch block on iterations after the first one will be a branch to the new
668   // header and not the actual loop header. The last continue block in the loop
669   // should always be a backedge to the global header.
670   Instruction* new_latch_branch = state_.new_latch_block->terminator();
671   new_latch_branch->SetInOperand(0, {loop->GetHeaderBlock()->id()});
672   context_->AnalyzeUses(new_latch_branch);
673 
674   std::vector<Instruction*> inductions;
675   loop->GetInductionVariables(inductions);
676   for (size_t index = 0; index < inductions.size(); ++index) {
677     Instruction* master_copy = inductions[index];
678 
679     assert(master_copy->result_id() != 0);
680     Instruction* induction_clone =
681         state_.ids_to_new_inst[state_.new_inst[master_copy->result_id()]];
682 
683     state_.new_phis_.push_back(induction_clone);
684     assert(induction_clone->result_id() != 0);
685 
686     if (!state_.previous_phis_.empty()) {
687       state_.new_inst[master_copy->result_id()] = GetPhiDefID(
688           state_.previous_phis_[index], state_.previous_latch_block_->id());
689     } else {
690       // Do not replace the first phi block ids.
691       state_.new_inst[master_copy->result_id()] = master_copy->result_id();
692     }
693   }
694 
695   if (eliminate_conditions &&
696       state_.new_condition_block != loop_condition_block_) {
697     FoldConditionBlock(state_.new_condition_block, 1);
698   }
699 
700   // Only reference to the header block is the backedge in the latch block,
701   // don't change this.
702   state_.new_inst[loop->GetHeaderBlock()->id()] = loop->GetHeaderBlock()->id();
703 
704   for (auto& pair : state_.new_blocks) {
705     RemapOperands(pair.second);
706   }
707 
708   for (Instruction* dead_phi : state_.new_phis_)
709     invalidated_instructions_.push_back(dead_phi);
710 
711   // Swap the state so the new is now the previous.
712   state_.NextIterationState();
713 }
714 
GetPhiDefID(const Instruction * phi,uint32_t label) const715 uint32_t LoopUnrollerUtilsImpl::GetPhiDefID(const Instruction* phi,
716                                             uint32_t label) const {
717   for (uint32_t operand = 3; operand < phi->NumOperands(); operand += 2) {
718     if (phi->GetSingleWordOperand(operand) == label) {
719       return phi->GetSingleWordOperand(operand - 1);
720     }
721   }
722   assert(false && "Could not find a phi index matching the provided label");
723   return 0;
724 }
725 
FoldConditionBlock(BasicBlock * condition_block,uint32_t operand_label)726 void LoopUnrollerUtilsImpl::FoldConditionBlock(BasicBlock* condition_block,
727                                                uint32_t operand_label) {
728   // Remove the old conditional branch to the merge and continue blocks.
729   Instruction& old_branch = *condition_block->tail();
730   uint32_t new_target = old_branch.GetSingleWordOperand(operand_label);
731 
732   context_->KillInst(&old_branch);
733   // Add the new unconditional branch to the merge block.
734   InstructionBuilder builder(
735       context_, condition_block,
736       IRContext::Analysis::kAnalysisDefUse |
737           IRContext::Analysis::kAnalysisInstrToBlockMapping);
738   builder.AddBranch(new_target);
739 }
740 
CloseUnrolledLoop(Loop * loop)741 void LoopUnrollerUtilsImpl::CloseUnrolledLoop(Loop* loop) {
742   // Remove the OpLoopMerge instruction from the function.
743   Instruction* merge_inst = loop->GetHeaderBlock()->GetLoopMergeInst();
744   invalidated_instructions_.push_back(merge_inst);
745 
746   // Remove the final backedge to the header and make it point instead to the
747   // merge block.
748   Instruction* latch_instruction = state_.previous_latch_block_->terminator();
749   latch_instruction->SetInOperand(0, {loop->GetMergeBlock()->id()});
750   context_->UpdateDefUse(latch_instruction);
751 
752   // Remove all induction variables as the phis will now be invalid. Replace all
753   // uses with the constant initializer value (all uses of phis will be in
754   // the first iteration with the subsequent phis already having been removed).
755   std::vector<Instruction*> inductions;
756   loop->GetInductionVariables(inductions);
757 
758   // We can use the state instruction mechanism to replace all internal loop
759   // values within the first loop trip (as the subsequent ones will be updated
760   // by the copy function) with the value coming in from the preheader and then
761   // use context ReplaceAllUsesWith for the uses outside the loop with the final
762   // trip phi value.
763   state_.new_inst.clear();
764   for (Instruction* induction : inductions) {
765     uint32_t initalizer_id =
766         GetPhiDefID(induction, loop->GetPreHeaderBlock()->id());
767 
768     state_.new_inst[induction->result_id()] = initalizer_id;
769   }
770 
771   for (BasicBlock* block : loop_blocks_inorder_) {
772     RemapOperands(block);
773   }
774 
775   // Rewrite the last phis, since they may still reference the original phi.
776   for (Instruction* last_phi : state_.previous_phis_) {
777     RemapOperands(last_phi);
778   }
779 }
780 
781 // Uses the first loop to create a copy of the loop with new IDs.
DuplicateLoop(Loop * old_loop,Loop * new_loop)782 void LoopUnrollerUtilsImpl::DuplicateLoop(Loop* old_loop, Loop* new_loop) {
783   std::vector<BasicBlock*> new_block_order;
784 
785   // Copy every block in the old loop.
786   for (const BasicBlock* itr : loop_blocks_inorder_) {
787     CopyBasicBlock(old_loop, itr, true);
788     new_block_order.push_back(blocks_to_add_.back().get());
789   }
790 
791   // Clone the merge block, give it a new id and record it in the state.
792   BasicBlock* new_merge = old_loop->GetMergeBlock()->Clone(context_);
793   new_merge->SetParent(old_loop->GetMergeBlock()->GetParent());
794   AssignNewResultIds(new_merge);
795   state_.new_blocks[old_loop->GetMergeBlock()->id()] = new_merge;
796 
797   // Remap the operands of every instruction in the loop to point to the new
798   // copies.
799   for (auto& pair : state_.new_blocks) {
800     RemapOperands(pair.second);
801   }
802 
803   loop_blocks_inorder_ = std::move(new_block_order);
804 
805   AddBlocksToLoop(new_loop);
806 
807   new_loop->SetHeaderBlock(state_.new_header_block);
808   new_loop->SetContinueBlock(state_.new_continue_block);
809   new_loop->SetLatchBlock(state_.new_latch_block);
810   new_loop->SetMergeBlock(new_merge);
811 }
812 
813 // Whenever the utility copies a block it stores it in a tempory buffer, this
814 // function adds the buffer into the Function. The blocks will be inserted
815 // after the block |insert_point|.
AddBlocksToFunction(const BasicBlock * insert_point)816 void LoopUnrollerUtilsImpl::AddBlocksToFunction(
817     const BasicBlock* insert_point) {
818   for (auto basic_block_iterator = function_.begin();
819        basic_block_iterator != function_.end(); ++basic_block_iterator) {
820     if (basic_block_iterator->id() == insert_point->id()) {
821       basic_block_iterator.InsertBefore(&blocks_to_add_);
822       return;
823     }
824   }
825 
826   assert(
827       false &&
828       "Could not add basic blocks to function as insert point was not found.");
829 }
830 
831 // Assign all result_ids in |basic_block| instructions to new IDs and preserve
832 // the mapping of new ids to old ones.
AssignNewResultIds(BasicBlock * basic_block)833 void LoopUnrollerUtilsImpl::AssignNewResultIds(BasicBlock* basic_block) {
834   analysis::DefUseManager* def_use_mgr = context_->get_def_use_mgr();
835 
836   // Label instructions aren't covered by normal traversal of the
837   // instructions.
838   // TODO(1841): Handle id overflow.
839   uint32_t new_label_id = context_->TakeNextId();
840 
841   // Assign a new id to the label.
842   state_.new_inst[basic_block->GetLabelInst()->result_id()] = new_label_id;
843   basic_block->GetLabelInst()->SetResultId(new_label_id);
844   def_use_mgr->AnalyzeInstDefUse(basic_block->GetLabelInst());
845 
846   for (Instruction& inst : *basic_block) {
847     uint32_t old_id = inst.result_id();
848 
849     // Ignore stores etc.
850     if (old_id == 0) {
851       continue;
852     }
853 
854     // Give the instruction a new id.
855     // TODO(1841): Handle id overflow.
856     inst.SetResultId(context_->TakeNextId());
857     def_use_mgr->AnalyzeInstDef(&inst);
858 
859     // Save the mapping of old_id -> new_id.
860     state_.new_inst[old_id] = inst.result_id();
861     // Check if this instruction is the induction variable.
862     if (loop_induction_variable_->result_id() == old_id) {
863       // Save a pointer to the new copy of it.
864       state_.new_phi = &inst;
865     }
866     state_.ids_to_new_inst[inst.result_id()] = &inst;
867   }
868 }
869 
RemapOperands(Instruction * inst)870 void LoopUnrollerUtilsImpl::RemapOperands(Instruction* inst) {
871   auto remap_operands_to_new_ids = [this](uint32_t* id) {
872     auto itr = state_.new_inst.find(*id);
873 
874     if (itr != state_.new_inst.end()) {
875       *id = itr->second;
876     }
877   };
878 
879   inst->ForEachInId(remap_operands_to_new_ids);
880   context_->AnalyzeUses(inst);
881 }
882 
RemapOperands(BasicBlock * basic_block)883 void LoopUnrollerUtilsImpl::RemapOperands(BasicBlock* basic_block) {
884   for (Instruction& inst : *basic_block) {
885     RemapOperands(&inst);
886   }
887 }
888 
889 // Generate the ordered list of basic blocks in the |loop| and cache it for
890 // later use.
ComputeLoopOrderedBlocks(Loop * loop)891 void LoopUnrollerUtilsImpl::ComputeLoopOrderedBlocks(Loop* loop) {
892   loop_blocks_inorder_.clear();
893   loop->ComputeLoopStructuredOrder(&loop_blocks_inorder_);
894 }
895 
896 // Adds the blocks_to_add_ to both the loop and to the parent.
AddBlocksToLoop(Loop * loop) const897 void LoopUnrollerUtilsImpl::AddBlocksToLoop(Loop* loop) const {
898   // Add the blocks to this loop.
899   for (auto& block_itr : blocks_to_add_) {
900     loop->AddBasicBlock(block_itr.get());
901   }
902 
903   // Add the blocks to the parent as well.
904   if (loop->GetParent()) AddBlocksToLoop(loop->GetParent());
905 }
906 
LinkLastPhisToStart(Loop * loop) const907 void LoopUnrollerUtilsImpl::LinkLastPhisToStart(Loop* loop) const {
908   std::vector<Instruction*> inductions;
909   loop->GetInductionVariables(inductions);
910 
911   for (size_t i = 0; i < inductions.size(); ++i) {
912     Instruction* last_phi_in_block = state_.previous_phis_[i];
913 
914     uint32_t phi_index =
915         GetPhiIndexFromLabel(state_.previous_latch_block_, last_phi_in_block);
916     uint32_t phi_variable =
917         last_phi_in_block->GetSingleWordInOperand(phi_index - 1);
918     uint32_t phi_label = last_phi_in_block->GetSingleWordInOperand(phi_index);
919 
920     Instruction* phi = inductions[i];
921     phi->SetInOperand(phi_index - 1, {phi_variable});
922     phi->SetInOperand(phi_index, {phi_label});
923   }
924 }
925 
926 // Duplicate the |loop| body |factor| number of times while keeping the loop
927 // backedge intact.
PartiallyUnroll(Loop * loop,size_t factor)928 void LoopUnrollerUtilsImpl::PartiallyUnroll(Loop* loop, size_t factor) {
929   Unroll(loop, factor);
930   LinkLastPhisToStart(loop);
931   AddBlocksToLoop(loop);
932   AddBlocksToFunction(loop->GetMergeBlock());
933   RemoveDeadInstructions();
934 }
935 
936 /*
937  * End LoopUtilsImpl.
938  */
939 
940 }  // namespace
941 
942 /*
943  *
944  *  Begin Utils.
945  *
946  * */
947 
CanPerformUnroll()948 bool LoopUtils::CanPerformUnroll() {
949   // The loop is expected to be in structured order.
950   if (!loop_->GetHeaderBlock()->GetMergeInst()) {
951     return false;
952   }
953 
954   // Find check the loop has a condition we can find and evaluate.
955   const BasicBlock* condition = loop_->FindConditionBlock();
956   if (!condition) return false;
957 
958   // Check that we can find and process the induction variable.
959   const Instruction* induction = loop_->FindConditionVariable(condition);
960   if (!induction || induction->opcode() != SpvOpPhi) return false;
961 
962   // Check that we can find the number of loop iterations.
963   if (!loop_->FindNumberOfIterations(induction, &*condition->ctail(), nullptr))
964     return false;
965 
966   // Make sure the latch block is a unconditional branch to the header
967   // block.
968   const Instruction& branch = *loop_->GetLatchBlock()->ctail();
969   bool branching_assumption =
970       branch.opcode() == SpvOpBranch &&
971       branch.GetSingleWordInOperand(0) == loop_->GetHeaderBlock()->id();
972   if (!branching_assumption) {
973     return false;
974   }
975 
976   std::vector<Instruction*> inductions;
977   loop_->GetInductionVariables(inductions);
978 
979   // Ban breaks within the loop.
980   const std::vector<uint32_t>& merge_block_preds =
981       context_->cfg()->preds(loop_->GetMergeBlock()->id());
982   if (merge_block_preds.size() != 1) {
983     return false;
984   }
985 
986   // Ban continues within the loop.
987   const std::vector<uint32_t>& continue_block_preds =
988       context_->cfg()->preds(loop_->GetContinueBlock()->id());
989   if (continue_block_preds.size() != 1) {
990     return false;
991   }
992 
993   // Ban returns in the loop.
994   // Iterate over all the blocks within the loop and check that none of them
995   // exit the loop.
996   for (uint32_t label_id : loop_->GetBlocks()) {
997     const BasicBlock* block = context_->cfg()->block(label_id);
998     if (block->ctail()->opcode() == SpvOp::SpvOpKill ||
999         block->ctail()->opcode() == SpvOp::SpvOpReturn ||
1000         block->ctail()->opcode() == SpvOp::SpvOpReturnValue) {
1001       return false;
1002     }
1003   }
1004   // Can only unroll inner loops.
1005   if (!loop_->AreAllChildrenMarkedForRemoval()) {
1006     return false;
1007   }
1008 
1009   return true;
1010 }
1011 
PartiallyUnroll(size_t factor)1012 bool LoopUtils::PartiallyUnroll(size_t factor) {
1013   if (factor == 1 || !CanPerformUnroll()) return false;
1014 
1015   // Create the unroller utility.
1016   LoopUnrollerUtilsImpl unroller{context_,
1017                                  loop_->GetHeaderBlock()->GetParent()};
1018   unroller.Init(loop_);
1019 
1020   // If the unrolling factor is larger than or the same size as the loop just
1021   // fully unroll the loop.
1022   if (factor >= unroller.GetLoopIterationCount()) {
1023     unroller.FullyUnroll(loop_);
1024     return true;
1025   }
1026 
1027   // If the loop unrolling factor is an residual number of iterations we need to
1028   // let run the loop for the residual part then let it branch into the unrolled
1029   // remaining part. We add one when calucating the remainder to take into
1030   // account the one iteration already in the loop.
1031   if (unroller.GetLoopIterationCount() % factor != 0) {
1032     unroller.PartiallyUnrollResidualFactor(loop_, factor);
1033   } else {
1034     unroller.PartiallyUnroll(loop_, factor);
1035   }
1036 
1037   return true;
1038 }
1039 
FullyUnroll()1040 bool LoopUtils::FullyUnroll() {
1041   if (!CanPerformUnroll()) return false;
1042 
1043   std::vector<Instruction*> inductions;
1044   loop_->GetInductionVariables(inductions);
1045 
1046   LoopUnrollerUtilsImpl unroller{context_,
1047                                  loop_->GetHeaderBlock()->GetParent()};
1048 
1049   unroller.Init(loop_);
1050   unroller.FullyUnroll(loop_);
1051 
1052   return true;
1053 }
1054 
Finalize()1055 void LoopUtils::Finalize() {
1056   // Clean up the loop descriptor to preserve the analysis.
1057 
1058   LoopDescriptor* LD = context_->GetLoopDescriptor(&function_);
1059   LD->PostModificationCleanup();
1060 }
1061 
1062 /*
1063  *
1064  * Begin Pass.
1065  *
1066  */
1067 
Process()1068 Pass::Status LoopUnroller::Process() {
1069   bool changed = false;
1070   for (Function& f : *context()->module()) {
1071     LoopDescriptor* LD = context()->GetLoopDescriptor(&f);
1072     for (Loop& loop : *LD) {
1073       LoopUtils loop_utils{context(), &loop};
1074       if (!loop.HasUnrollLoopControl() || !loop_utils.CanPerformUnroll()) {
1075         continue;
1076       }
1077 
1078       if (fully_unroll_) {
1079         loop_utils.FullyUnroll();
1080       } else {
1081         loop_utils.PartiallyUnroll(unroll_factor_);
1082       }
1083       changed = true;
1084     }
1085     LD->PostModificationCleanup();
1086   }
1087 
1088   return changed ? Status::SuccessWithChange : Status::SuccessWithoutChange;
1089 }
1090 
1091 }  // namespace opt
1092 }  // namespace spvtools
1093