1 //===- HexagonGenInsert.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "BitTracker.h"
10 #include "HexagonBitTracker.h"
11 #include "HexagonInstrInfo.h"
12 #include "HexagonRegisterInfo.h"
13 #include "HexagonSubtarget.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/GraphTraits.h"
17 #include "llvm/ADT/PostOrderIterator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineDominators.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/TargetRegisterInfo.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/Timer.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <algorithm>
40 #include <cassert>
41 #include <cstdint>
42 #include <iterator>
43 #include <utility>
44 #include <vector>
45 
46 #define DEBUG_TYPE "hexinsert"
47 
48 using namespace llvm;
49 
50 static cl::opt<unsigned>
51     VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), cl::Hidden,
52                     cl::desc("Vreg# cutoff for insert generation."));
53 // The distance cutoff is selected based on the precheckin-perf results:
54 // cutoffs 20, 25, 35, and 40 are worse than 30.
55 static cl::opt<unsigned>
56     VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden,
57                    cl::desc("Vreg distance cutoff for insert "
58                             "generation."));
59 
60 // Limit the container sizes for extreme cases where we run out of memory.
61 static cl::opt<unsigned>
62     MaxORLSize("insert-max-orl", cl::init(4096), cl::Hidden,
63                cl::desc("Maximum size of OrderedRegisterList"));
64 static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
65                                     cl::Hidden,
66                                     cl::desc("Maximum size of IFMap"));
67 
68 static cl::opt<bool> OptTiming("insert-timing", cl::Hidden,
69                                cl::desc("Enable timing of insert generation"));
70 static cl::opt<bool>
71     OptTimingDetail("insert-timing-detail", cl::Hidden,
72                     cl::desc("Enable detailed timing of insert "
73                              "generation"));
74 
75 static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden);
76 static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden);
77 // Whether to construct constant values via "insert". Could eliminate constant
78 // extenders, but often not practical.
79 static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden);
80 
81 // The preprocessor gets confused when the DEBUG macro is passed larger
82 // chunks of code. Use this function to detect debugging.
83 inline static bool isDebug() {
84 #ifndef NDEBUG
85   return DebugFlag && isCurrentDebugType(DEBUG_TYPE);
86 #else
87   return false;
88 #endif
89 }
90 
91 namespace {
92 
93   // Set of virtual registers, based on BitVector.
94   struct RegisterSet : private BitVector {
95     RegisterSet() = default;
96     explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
97     RegisterSet(const RegisterSet &RS) = default;
98     RegisterSet &operator=(const RegisterSet &RS) = default;
99 
100     using BitVector::clear;
101 
102     unsigned find_first() const {
103       int First = BitVector::find_first();
104       if (First < 0)
105         return 0;
106       return x2v(First);
107     }
108 
109     unsigned find_next(unsigned Prev) const {
110       int Next = BitVector::find_next(v2x(Prev));
111       if (Next < 0)
112         return 0;
113       return x2v(Next);
114     }
115 
116     RegisterSet &insert(unsigned R) {
117       unsigned Idx = v2x(R);
118       ensure(Idx);
119       return static_cast<RegisterSet&>(BitVector::set(Idx));
120     }
121     RegisterSet &remove(unsigned R) {
122       unsigned Idx = v2x(R);
123       if (Idx >= size())
124         return *this;
125       return static_cast<RegisterSet&>(BitVector::reset(Idx));
126     }
127 
128     RegisterSet &insert(const RegisterSet &Rs) {
129       return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
130     }
131     RegisterSet &remove(const RegisterSet &Rs) {
132       return static_cast<RegisterSet&>(BitVector::reset(Rs));
133     }
134 
135     reference operator[](unsigned R) {
136       unsigned Idx = v2x(R);
137       ensure(Idx);
138       return BitVector::operator[](Idx);
139     }
140     bool operator[](unsigned R) const {
141       unsigned Idx = v2x(R);
142       assert(Idx < size());
143       return BitVector::operator[](Idx);
144     }
145     bool has(unsigned R) const {
146       unsigned Idx = v2x(R);
147       if (Idx >= size())
148         return false;
149       return BitVector::test(Idx);
150     }
151 
152     bool empty() const {
153       return !BitVector::any();
154     }
155     bool includes(const RegisterSet &Rs) const {
156       // A.BitVector::test(B)  <=>  A-B != {}
157       return !Rs.BitVector::test(*this);
158     }
159     bool intersects(const RegisterSet &Rs) const {
160       return BitVector::anyCommon(Rs);
161     }
162 
163   private:
164     void ensure(unsigned Idx) {
165       if (size() <= Idx)
166         resize(std::max(Idx+1, 32U));
167     }
168 
169     static inline unsigned v2x(unsigned v) {
170       return Register::virtReg2Index(v);
171     }
172 
173     static inline unsigned x2v(unsigned x) {
174       return Register::index2VirtReg(x);
175     }
176   };
177 
178   struct PrintRegSet {
179     PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
180       : RS(S), TRI(RI) {}
181 
182     friend raw_ostream &operator<< (raw_ostream &OS,
183           const PrintRegSet &P);
184 
185   private:
186     const RegisterSet &RS;
187     const TargetRegisterInfo *TRI;
188   };
189 
190   raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
191     OS << '{';
192     for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
193       OS << ' ' << printReg(R, P.TRI);
194     OS << " }";
195     return OS;
196   }
197 
198   // A convenience class to associate unsigned numbers (such as virtual
199   // registers) with unsigned numbers.
200   struct UnsignedMap : public DenseMap<unsigned,unsigned> {
201     UnsignedMap() = default;
202 
203   private:
204     using BaseType = DenseMap<unsigned, unsigned>;
205   };
206 
207   // A utility to establish an ordering between virtual registers:
208   // VRegA < VRegB  <=>  RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
209   // This is meant as a cache for the ordering of virtual registers defined
210   // by a potentially expensive comparison function, or obtained by a proce-
211   // dure that should not be repeated each time two registers are compared.
212   struct RegisterOrdering : public UnsignedMap {
213     RegisterOrdering() = default;
214 
215     unsigned operator[](unsigned VR) const {
216       const_iterator F = find(VR);
217       assert(F != end());
218       return F->second;
219     }
220 
221     // Add operator(), so that objects of this class can be used as
222     // comparators in std::sort et al.
223     bool operator() (unsigned VR1, unsigned VR2) const {
224       return operator[](VR1) < operator[](VR2);
225     }
226   };
227 
228   // Ordering of bit values. This class does not have operator[], but
229   // is supplies a comparison operator() for use in std:: algorithms.
230   // The order is as follows:
231   // - 0 < 1 < ref
232   // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
233   //   or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
234   struct BitValueOrdering {
235     BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
236 
237     bool operator() (const BitTracker::BitValue &V1,
238           const BitTracker::BitValue &V2) const;
239 
240     const RegisterOrdering &BaseOrd;
241   };
242 
243 } // end anonymous namespace
244 
245 bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
246       const BitTracker::BitValue &V2) const {
247   if (V1 == V2)
248     return false;
249   // V1==0 => true, V2==0 => false
250   if (V1.is(0) || V2.is(0))
251     return V1.is(0);
252   // Neither of V1,V2 is 0, and V1!=V2.
253   // V2==1 => false, V1==1 => true
254   if (V2.is(1) || V1.is(1))
255     return !V2.is(1);
256   // Both V1,V2 are refs.
257   unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
258   if (Ind1 != Ind2)
259     return Ind1 < Ind2;
260   // If V1.Pos==V2.Pos
261   assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
262   return V1.RefI.Pos < V2.RefI.Pos;
263 }
264 
265 namespace {
266 
267   // Cache for the BitTracker's cell map. Map lookup has a logarithmic
268   // complexity, this class will memoize the lookup results to reduce
269   // the access time for repeated lookups of the same cell.
270   struct CellMapShadow {
271     CellMapShadow(const BitTracker &T) : BT(T) {}
272 
273     const BitTracker::RegisterCell &lookup(unsigned VR) {
274       unsigned RInd = Register::virtReg2Index(VR);
275       // Grow the vector to at least 32 elements.
276       if (RInd >= CVect.size())
277         CVect.resize(std::max(RInd+16, 32U), nullptr);
278       const BitTracker::RegisterCell *CP = CVect[RInd];
279       if (CP == nullptr)
280         CP = CVect[RInd] = &BT.lookup(VR);
281       return *CP;
282     }
283 
284     const BitTracker &BT;
285 
286   private:
287     using CellVectType = std::vector<const BitTracker::RegisterCell *>;
288 
289     CellVectType CVect;
290   };
291 
292   // Comparator class for lexicographic ordering of virtual registers
293   // according to the corresponding BitTracker::RegisterCell objects.
294   struct RegisterCellLexCompare {
295     RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
296       : BitOrd(BO), CM(M) {}
297 
298     bool operator() (unsigned VR1, unsigned VR2) const;
299 
300   private:
301     const BitValueOrdering &BitOrd;
302     CellMapShadow &CM;
303   };
304 
305   // Comparator class for lexicographic ordering of virtual registers
306   // according to the specified bits of the corresponding BitTracker::
307   // RegisterCell objects.
308   // Specifically, this class will be used to compare bit B of a register
309   // cell for a selected virtual register R with bit N of any register
310   // other than R.
311   struct RegisterCellBitCompareSel {
312     RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
313           const BitValueOrdering &BO, CellMapShadow &M)
314       : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
315 
316     bool operator() (unsigned VR1, unsigned VR2) const;
317 
318   private:
319     const unsigned SelR, SelB;
320     const unsigned BitN;
321     const BitValueOrdering &BitOrd;
322     CellMapShadow &CM;
323   };
324 
325 } // end anonymous namespace
326 
327 bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
328   // Ordering of registers, made up from two given orderings:
329   // - the ordering of the register numbers, and
330   // - the ordering of register cells.
331   // Def. R1 < R2 if:
332   // - cell(R1) < cell(R2), or
333   // - cell(R1) == cell(R2), and index(R1) < index(R2).
334   //
335   // For register cells, the ordering is lexicographic, with index 0 being
336   // the most significant.
337   if (VR1 == VR2)
338     return false;
339 
340   const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
341   uint16_t W1 = RC1.width(), W2 = RC2.width();
342   for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
343     const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
344     if (V1 != V2)
345       return BitOrd(V1, V2);
346   }
347   // Cells are equal up until the common length.
348   if (W1 != W2)
349     return W1 < W2;
350 
351   return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
352 }
353 
354 bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
355   if (VR1 == VR2)
356     return false;
357   const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
358   const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
359   uint16_t W1 = RC1.width(), W2 = RC2.width();
360   uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
361   uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
362   // If Bit1 exceeds the width of VR1, then:
363   // - return false, if at the same time Bit2 exceeds VR2, or
364   // - return true, otherwise.
365   // (I.e. "a bit value that does not exist is less than any bit value
366   // that does exist".)
367   if (W1 <= Bit1)
368     return Bit2 < W2;
369   // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
370   if (W2 <= Bit2)
371     return false;
372 
373   const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
374   if (V1 != V2)
375     return BitOrd(V1, V2);
376   return false;
377 }
378 
379 namespace {
380 
381   class OrderedRegisterList {
382     using ListType = std::vector<unsigned>;
383     const unsigned MaxSize;
384 
385   public:
386     OrderedRegisterList(const RegisterOrdering &RO)
387       : MaxSize(MaxORLSize), Ord(RO) {}
388 
389     void insert(unsigned VR);
390     void remove(unsigned VR);
391 
392     unsigned operator[](unsigned Idx) const {
393       assert(Idx < Seq.size());
394       return Seq[Idx];
395     }
396 
397     unsigned size() const {
398       return Seq.size();
399     }
400 
401     using iterator = ListType::iterator;
402     using const_iterator = ListType::const_iterator;
403 
404     iterator begin() { return Seq.begin(); }
405     iterator end() { return Seq.end(); }
406     const_iterator begin() const { return Seq.begin(); }
407     const_iterator end() const { return Seq.end(); }
408 
409     // Convenience function to convert an iterator to the corresponding index.
410     unsigned idx(iterator It) const { return It-begin(); }
411 
412   private:
413     ListType Seq;
414     const RegisterOrdering &Ord;
415   };
416 
417   struct PrintORL {
418     PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
419       : RL(L), TRI(RI) {}
420 
421     friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
422 
423   private:
424     const OrderedRegisterList &RL;
425     const TargetRegisterInfo *TRI;
426   };
427 
428   raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
429     OS << '(';
430     OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
431     for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
432       if (I != B)
433         OS << ", ";
434       OS << printReg(*I, P.TRI);
435     }
436     OS << ')';
437     return OS;
438   }
439 
440 } // end anonymous namespace
441 
442 void OrderedRegisterList::insert(unsigned VR) {
443   iterator L = llvm::lower_bound(Seq, VR, Ord);
444   if (L == Seq.end())
445     Seq.push_back(VR);
446   else
447     Seq.insert(L, VR);
448 
449   unsigned S = Seq.size();
450   if (S > MaxSize)
451     Seq.resize(MaxSize);
452   assert(Seq.size() <= MaxSize);
453 }
454 
455 void OrderedRegisterList::remove(unsigned VR) {
456   iterator L = llvm::lower_bound(Seq, VR, Ord);
457   if (L != Seq.end())
458     Seq.erase(L);
459 }
460 
461 namespace {
462 
463   // A record of the insert form. The fields correspond to the operands
464   // of the "insert" instruction:
465   // ... = insert(SrcR, InsR, #Wdh, #Off)
466   struct IFRecord {
467     IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
468       : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
469 
470     unsigned SrcR, InsR;
471     uint16_t Wdh, Off;
472   };
473 
474   struct PrintIFR {
475     PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
476       : IFR(R), TRI(RI) {}
477 
478   private:
479     friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
480 
481     const IFRecord &IFR;
482     const TargetRegisterInfo *TRI;
483   };
484 
485   raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
486     unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
487     OS << '(' << printReg(SrcR, P.TRI) << ',' << printReg(InsR, P.TRI)
488        << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
489     return OS;
490   }
491 
492   using IFRecordWithRegSet = std::pair<IFRecord, RegisterSet>;
493 
494 } // end anonymous namespace
495 
496 namespace llvm {
497 
498   void initializeHexagonGenInsertPass(PassRegistry&);
499   FunctionPass *createHexagonGenInsert();
500 
501 } // end namespace llvm
502 
503 namespace {
504 
505   class HexagonGenInsert : public MachineFunctionPass {
506   public:
507     static char ID;
508 
509     HexagonGenInsert() : MachineFunctionPass(ID) {
510       initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry());
511     }
512 
513     StringRef getPassName() const override {
514       return "Hexagon generate \"insert\" instructions";
515     }
516 
517     void getAnalysisUsage(AnalysisUsage &AU) const override {
518       AU.addRequired<MachineDominatorTree>();
519       AU.addPreserved<MachineDominatorTree>();
520       MachineFunctionPass::getAnalysisUsage(AU);
521     }
522 
523     bool runOnMachineFunction(MachineFunction &MF) override;
524 
525   private:
526     using PairMapType = DenseMap<std::pair<unsigned, unsigned>, unsigned>;
527 
528     void buildOrderingMF(RegisterOrdering &RO) const;
529     void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
530     bool isIntClass(const TargetRegisterClass *RC) const;
531     bool isConstant(unsigned VR) const;
532     bool isSmallConstant(unsigned VR) const;
533     bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
534           uint16_t L, uint16_t S) const;
535     bool findSelfReference(unsigned VR) const;
536     bool findNonSelfReference(unsigned VR) const;
537     void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
538     void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
539     unsigned distance(const MachineBasicBlock *FromB,
540           const MachineBasicBlock *ToB, const UnsignedMap &RPO,
541           PairMapType &M) const;
542     unsigned distance(MachineBasicBlock::const_iterator FromI,
543           MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
544           PairMapType &M) const;
545     bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
546     void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
547     void findRemovableRegisters(unsigned VR, IFRecord IF,
548           RegisterSet &RMs) const;
549     void computeRemovableRegisters();
550 
551     void pruneEmptyLists();
552     void pruneCoveredSets(unsigned VR);
553     void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
554     void pruneRegCopies(unsigned VR);
555     void pruneCandidates();
556     void selectCandidates();
557     bool generateInserts();
558 
559     bool removeDeadCode(MachineDomTreeNode *N);
560 
561     // IFRecord coupled with a set of potentially removable registers:
562     using IFListType = std::vector<IFRecordWithRegSet>;
563     using IFMapType = DenseMap<unsigned, IFListType>; // vreg -> IFListType
564 
565     void dump_map() const;
566 
567     const HexagonInstrInfo *HII = nullptr;
568     const HexagonRegisterInfo *HRI = nullptr;
569 
570     MachineFunction *MFN;
571     MachineRegisterInfo *MRI;
572     MachineDominatorTree *MDT;
573     CellMapShadow *CMS;
574 
575     RegisterOrdering BaseOrd;
576     RegisterOrdering CellOrd;
577     IFMapType IFMap;
578   };
579 
580 } // end anonymous namespace
581 
582 char HexagonGenInsert::ID = 0;
583 
584 void HexagonGenInsert::dump_map() const {
585   for (const auto &I : IFMap) {
586     dbgs() << "  " << printReg(I.first, HRI) << ":\n";
587     const IFListType &LL = I.second;
588     for (const auto &J : LL)
589       dbgs() << "    " << PrintIFR(J.first, HRI) << ", "
590              << PrintRegSet(J.second, HRI) << '\n';
591   }
592 }
593 
594 void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
595   unsigned Index = 0;
596 
597   for (const MachineBasicBlock &B : *MFN) {
598     if (!CMS->BT.reached(&B))
599       continue;
600 
601     for (const MachineInstr &MI : B) {
602       for (const MachineOperand &MO : MI.operands()) {
603         if (MO.isReg() && MO.isDef()) {
604           Register R = MO.getReg();
605           assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
606           if (R.isVirtual())
607             RO.insert(std::make_pair(R, Index++));
608         }
609       }
610     }
611   }
612   // Since some virtual registers may have had their def and uses eliminated,
613   // they are no longer referenced in the code, and so they will not appear
614   // in the map.
615 }
616 
617 void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
618       RegisterOrdering &RO) const {
619   // Create a vector of all virtual registers (collect them from the base
620   // ordering RB), and then sort it using the RegisterCell comparator.
621   BitValueOrdering BVO(RB);
622   RegisterCellLexCompare LexCmp(BVO, *CMS);
623 
624   using SortableVectorType = std::vector<unsigned>;
625 
626   SortableVectorType VRs;
627   for (auto &I : RB)
628     VRs.push_back(I.first);
629   llvm::sort(VRs, LexCmp);
630   // Transfer the results to the outgoing register ordering.
631   for (unsigned i = 0, n = VRs.size(); i < n; ++i)
632     RO.insert(std::make_pair(VRs[i], i));
633 }
634 
635 inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
636   return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
637 }
638 
639 bool HexagonGenInsert::isConstant(unsigned VR) const {
640   const BitTracker::RegisterCell &RC = CMS->lookup(VR);
641   uint16_t W = RC.width();
642   for (uint16_t i = 0; i < W; ++i) {
643     const BitTracker::BitValue &BV = RC[i];
644     if (BV.is(0) || BV.is(1))
645       continue;
646     return false;
647   }
648   return true;
649 }
650 
651 bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
652   const BitTracker::RegisterCell &RC = CMS->lookup(VR);
653   uint16_t W = RC.width();
654   if (W > 64)
655     return false;
656   uint64_t V = 0, B = 1;
657   for (uint16_t i = 0; i < W; ++i) {
658     const BitTracker::BitValue &BV = RC[i];
659     if (BV.is(1))
660       V |= B;
661     else if (!BV.is(0))
662       return false;
663     B <<= 1;
664   }
665 
666   // For 32-bit registers, consider: Rd = #s16.
667   if (W == 32)
668     return isInt<16>(V);
669 
670   // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
671   return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
672 }
673 
674 bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
675       unsigned InsR, uint16_t L, uint16_t S) const {
676   const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
677   const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
678   const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
679   // Only integet (32-/64-bit) register classes.
680   if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
681     return false;
682   // The "source" register must be of the same class as DstR.
683   if (DstRC != SrcRC)
684     return false;
685   if (DstRC == InsRC)
686     return true;
687   // A 64-bit register can only be generated from other 64-bit registers.
688   if (DstRC == &Hexagon::DoubleRegsRegClass)
689     return false;
690   // Otherwise, the L and S cannot span 32-bit word boundary.
691   if (S < 32 && S+L > 32)
692     return false;
693   return true;
694 }
695 
696 bool HexagonGenInsert::findSelfReference(unsigned VR) const {
697   const BitTracker::RegisterCell &RC = CMS->lookup(VR);
698   for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
699     const BitTracker::BitValue &V = RC[i];
700     if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
701       return true;
702   }
703   return false;
704 }
705 
706 bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
707   BitTracker::RegisterCell RC = CMS->lookup(VR);
708   for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
709     const BitTracker::BitValue &V = RC[i];
710     if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
711       return true;
712   }
713   return false;
714 }
715 
716 void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
717       RegisterSet &Defs) const {
718   for (const MachineOperand &MO : MI->operands()) {
719     if (!MO.isReg() || !MO.isDef())
720       continue;
721     Register R = MO.getReg();
722     if (!R.isVirtual())
723       continue;
724     Defs.insert(R);
725   }
726 }
727 
728 void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
729       RegisterSet &Uses) const {
730   for (const MachineOperand &MO : MI->operands()) {
731     if (!MO.isReg() || !MO.isUse())
732       continue;
733     Register R = MO.getReg();
734     if (!R.isVirtual())
735       continue;
736     Uses.insert(R);
737   }
738 }
739 
740 unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
741       const MachineBasicBlock *ToB, const UnsignedMap &RPO,
742       PairMapType &M) const {
743   // Forward distance from the end of a block to the beginning of it does
744   // not make sense. This function should not be called with FromB == ToB.
745   assert(FromB != ToB);
746 
747   unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
748   // If we have already computed it, return the cached result.
749   PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
750   if (F != M.end())
751     return F->second;
752   unsigned ToRPO = RPO.lookup(ToN);
753 
754   unsigned MaxD = 0;
755 
756   for (const MachineBasicBlock *PB : ToB->predecessors()) {
757     // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
758     // along that path will be 0, and we don't need to do any calculations
759     // on it.
760     if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
761       continue;
762     unsigned D = PB->size() + distance(FromB, PB, RPO, M);
763     if (D > MaxD)
764       MaxD = D;
765   }
766 
767   // Memoize the result for later lookup.
768   M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
769   return MaxD;
770 }
771 
772 unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
773       MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
774       PairMapType &M) const {
775   const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
776   if (FB == TB)
777     return std::distance(FromI, ToI);
778   unsigned D1 = std::distance(TB->begin(), ToI);
779   unsigned D2 = distance(FB, TB, RPO, M);
780   unsigned D3 = std::distance(FromI, FB->end());
781   return D1+D2+D3;
782 }
783 
784 bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
785       OrderedRegisterList &AVs) {
786   if (isDebug()) {
787     dbgs() << __func__ << ": " << printReg(VR, HRI)
788            << "  AVs: " << PrintORL(AVs, HRI) << "\n";
789   }
790   if (AVs.size() == 0)
791     return false;
792 
793   using iterator = OrderedRegisterList::iterator;
794 
795   BitValueOrdering BVO(BaseOrd);
796   const BitTracker::RegisterCell &RC = CMS->lookup(VR);
797   uint16_t W = RC.width();
798 
799   using RSRecord = std::pair<unsigned, uint16_t>; // (reg,shift)
800   using RSListType = std::vector<RSRecord>;
801   // Have a map, with key being the matching prefix length, and the value
802   // being the list of pairs (R,S), where R's prefix matches VR at S.
803   // (DenseMap<uint16_t,RSListType> fails to instantiate.)
804   using LRSMapType = DenseMap<unsigned, RSListType>;
805   LRSMapType LM;
806 
807   // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
808   // and find matching prefixes from AVs with the rotated RC. Such a prefix
809   // would match a string of bits (of length L) in RC starting at S.
810   for (uint16_t S = 0; S < W; ++S) {
811     iterator B = AVs.begin(), E = AVs.end();
812     // The registers in AVs are ordered according to the lexical order of
813     // the corresponding register cells. This means that the range of regis-
814     // ters in AVs that match a prefix of length L+1 will be contained in
815     // the range that matches a prefix of length L. This means that we can
816     // keep narrowing the search space as the prefix length goes up. This
817     // helps reduce the overall complexity of the search.
818     uint16_t L;
819     for (L = 0; L < W-S; ++L) {
820       // Compare against VR's bits starting at S, which emulates rotation
821       // of VR by S.
822       RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
823       iterator NewB = std::lower_bound(B, E, VR, RCB);
824       iterator NewE = std::upper_bound(NewB, E, VR, RCB);
825       // For the registers that are eliminated from the next range, L is
826       // the longest prefix matching VR at position S (their prefixes
827       // differ from VR at S+L). If L>0, record this information for later
828       // use.
829       if (L > 0) {
830         for (iterator I = B; I != NewB; ++I)
831           LM[L].push_back(std::make_pair(*I, S));
832         for (iterator I = NewE; I != E; ++I)
833           LM[L].push_back(std::make_pair(*I, S));
834       }
835       B = NewB, E = NewE;
836       if (B == E)
837         break;
838     }
839     // Record the final register range. If this range is non-empty, then
840     // L=W-S.
841     assert(B == E || L == W-S);
842     if (B != E) {
843       for (iterator I = B; I != E; ++I)
844         LM[L].push_back(std::make_pair(*I, S));
845       // If B!=E, then we found a range of registers whose prefixes cover the
846       // rest of VR from position S. There is no need to further advance S.
847       break;
848     }
849   }
850 
851   if (isDebug()) {
852     dbgs() << "Prefixes matching register " << printReg(VR, HRI) << "\n";
853     for (const auto &I : LM) {
854       dbgs() << "  L=" << I.first << ':';
855       const RSListType &LL = I.second;
856       for (const auto &J : LL)
857         dbgs() << " (" << printReg(J.first, HRI) << ",@" << J.second << ')';
858       dbgs() << '\n';
859     }
860   }
861 
862   bool Recorded = false;
863 
864   for (unsigned SrcR : AVs) {
865     int FDi = -1, LDi = -1;   // First/last different bit.
866     const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
867     uint16_t AW = AC.width();
868     for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
869       if (RC[i] == AC[i])
870         continue;
871       if (FDi == -1)
872         FDi = i;
873       LDi = i;
874     }
875     if (FDi == -1)
876       continue;  // TODO (future): Record identical registers.
877     // Look for a register whose prefix could patch the range [FD..LD]
878     // where VR and SrcR differ.
879     uint16_t FD = FDi, LD = LDi;  // Switch to unsigned type.
880     uint16_t MinL = LD-FD+1;
881     for (uint16_t L = MinL; L < W; ++L) {
882       LRSMapType::iterator F = LM.find(L);
883       if (F == LM.end())
884         continue;
885       RSListType &LL = F->second;
886       for (const auto &I : LL) {
887         uint16_t S = I.second;
888         // MinL is the minimum length of the prefix. Any length above MinL
889         // allows some flexibility as to where the prefix can start:
890         // given the extra length EL=L-MinL, the prefix must start between
891         // max(0,FD-EL) and FD.
892         if (S > FD)   // Starts too late.
893           continue;
894         uint16_t EL = L-MinL;
895         uint16_t LowS = (EL < FD) ? FD-EL : 0;
896         if (S < LowS) // Starts too early.
897           continue;
898         unsigned InsR = I.first;
899         if (!isValidInsertForm(VR, SrcR, InsR, L, S))
900           continue;
901         if (isDebug()) {
902           dbgs() << printReg(VR, HRI) << " = insert(" << printReg(SrcR, HRI)
903                  << ',' << printReg(InsR, HRI) << ",#" << L << ",#"
904                  << S << ")\n";
905         }
906         IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
907         IFMap[VR].push_back(RR);
908         Recorded = true;
909       }
910     }
911   }
912 
913   return Recorded;
914 }
915 
916 void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
917       OrderedRegisterList &AVs) {
918   if (isDebug())
919     dbgs() << "visiting block " << printMBBReference(*B) << "\n";
920 
921   // First, check if this block is reachable at all. If not, the bit tracker
922   // will not have any information about registers in it.
923   if (!CMS->BT.reached(B))
924     return;
925 
926   bool DoConst = OptConst;
927   // Keep a separate set of registers defined in this block, so that we
928   // can remove them from the list of available registers once all DT
929   // successors have been processed.
930   RegisterSet BlockDefs, InsDefs;
931   for (MachineInstr &MI : *B) {
932     InsDefs.clear();
933     getInstrDefs(&MI, InsDefs);
934     // Leave those alone. They are more transparent than "insert".
935     bool Skip = MI.isCopy() || MI.isRegSequence();
936 
937     if (!Skip) {
938       // Visit all defined registers, and attempt to find the corresponding
939       // "insert" representations.
940       for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
941         // Do not collect registers that are known to be compile-time cons-
942         // tants, unless requested.
943         if (!DoConst && isConstant(VR))
944           continue;
945         // If VR's cell contains a reference to VR, then VR cannot be defined
946         // via "insert". If VR is a constant that can be generated in a single
947         // instruction (without constant extenders), generating it via insert
948         // makes no sense.
949         if (findSelfReference(VR) || isSmallConstant(VR))
950           continue;
951 
952         findRecordInsertForms(VR, AVs);
953         // Stop if the map size is too large.
954         if (IFMap.size() > MaxIFMSize)
955           return;
956       }
957     }
958 
959     // Insert the defined registers into the list of available registers
960     // after they have been processed.
961     for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
962       AVs.insert(VR);
963     BlockDefs.insert(InsDefs);
964   }
965 
966   for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
967     MachineBasicBlock *SB = DTN->getBlock();
968     collectInBlock(SB, AVs);
969   }
970 
971   for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
972     AVs.remove(VR);
973 }
974 
975 void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
976       RegisterSet &RMs) const {
977   // For a given register VR and a insert form, find the registers that are
978   // used by the current definition of VR, and which would no longer be
979   // needed for it after the definition of VR is replaced with the insert
980   // form. These are the registers that could potentially become dead.
981   RegisterSet Regs[2];
982 
983   unsigned S = 0;  // Register set selector.
984   Regs[S].insert(VR);
985 
986   while (!Regs[S].empty()) {
987     // Breadth-first search.
988     unsigned OtherS = 1-S;
989     Regs[OtherS].clear();
990     for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
991       Regs[S].remove(R);
992       if (R == IF.SrcR || R == IF.InsR)
993         continue;
994       // Check if a given register has bits that are references to any other
995       // registers. This is to detect situations where the instruction that
996       // defines register R takes register Q as an operand, but R itself does
997       // not contain any bits from Q. Loads are examples of how this could
998       // happen:
999       //   R = load Q
1000       // In this case (assuming we do not have any knowledge about the loaded
1001       // value), we must not treat R as a "conveyance" of the bits from Q.
1002       // (The information in BT about R's bits would have them as constants,
1003       // in case of zero-extending loads, or refs to R.)
1004       if (!findNonSelfReference(R))
1005         continue;
1006       RMs.insert(R);
1007       const MachineInstr *DefI = MRI->getVRegDef(R);
1008       assert(DefI);
1009       // Do not iterate past PHI nodes to avoid infinite loops. This can
1010       // make the final set a bit less accurate, but the removable register
1011       // sets are an approximation anyway.
1012       if (DefI->isPHI())
1013         continue;
1014       getInstrUses(DefI, Regs[OtherS]);
1015     }
1016     S = OtherS;
1017   }
1018   // The register VR is added to the list as a side-effect of the algorithm,
1019   // but it is not "potentially removable". A potentially removable register
1020   // is one that may become unused (dead) after conversion to the insert form
1021   // IF, and obviously VR (or its replacement) will not become dead by apply-
1022   // ing IF.
1023   RMs.remove(VR);
1024 }
1025 
1026 void HexagonGenInsert::computeRemovableRegisters() {
1027   for (auto &I : IFMap) {
1028     IFListType &LL = I.second;
1029     for (auto &J : LL)
1030       findRemovableRegisters(I.first, J.first, J.second);
1031   }
1032 }
1033 
1034 void HexagonGenInsert::pruneEmptyLists() {
1035   // Remove all entries from the map, where the register has no insert forms
1036   // associated with it.
1037   using IterListType = SmallVector<IFMapType::iterator, 16>;
1038   IterListType Prune;
1039   for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1040     if (I->second.empty())
1041       Prune.push_back(I);
1042   }
1043   for (unsigned i = 0, n = Prune.size(); i < n; ++i)
1044     IFMap.erase(Prune[i]);
1045 }
1046 
1047 void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
1048   IFMapType::iterator F = IFMap.find(VR);
1049   assert(F != IFMap.end());
1050   IFListType &LL = F->second;
1051 
1052   // First, examine the IF candidates for register VR whose removable-regis-
1053   // ter sets are empty. This means that a given candidate will not help eli-
1054   // minate any registers, but since "insert" is not a constant-extendable
1055   // instruction, using such a candidate may reduce code size if the defini-
1056   // tion of VR is constant-extended.
1057   // If there exists a candidate with a non-empty set, the ones with empty
1058   // sets will not be used and can be removed.
1059   MachineInstr *DefVR = MRI->getVRegDef(VR);
1060   bool DefEx = HII->isConstExtended(*DefVR);
1061   bool HasNE = false;
1062   for (const auto &I : LL) {
1063     if (I.second.empty())
1064       continue;
1065     HasNE = true;
1066     break;
1067   }
1068   if (!DefEx || HasNE) {
1069     // The definition of VR is not constant-extended, or there is a candidate
1070     // with a non-empty set. Remove all candidates with empty sets.
1071     auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
1072       return IR.second.empty();
1073     };
1074     llvm::erase_if(LL, IsEmpty);
1075   } else {
1076     // The definition of VR is constant-extended, and all candidates have
1077     // empty removable-register sets. Pick the maximum candidate, and remove
1078     // all others. The "maximum" does not have any special meaning here, it
1079     // is only so that the candidate that will remain on the list is selec-
1080     // ted deterministically.
1081     IFRecord MaxIF = LL[0].first;
1082     for (unsigned i = 1, n = LL.size(); i < n; ++i) {
1083       // If LL[MaxI] < LL[i], then MaxI = i.
1084       const IFRecord &IF = LL[i].first;
1085       unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
1086       unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
1087       if (M0 > R0)
1088         continue;
1089       if (M0 == R0) {
1090         if (M1 > R1)
1091           continue;
1092         if (M1 == R1) {
1093           if (MaxIF.Wdh > IF.Wdh)
1094             continue;
1095           if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
1096             continue;
1097         }
1098       }
1099       // MaxIF < IF.
1100       MaxIF = IF;
1101     }
1102     // Remove everything except the maximum candidate. All register sets
1103     // are empty, so no need to preserve anything.
1104     LL.clear();
1105     LL.push_back(std::make_pair(MaxIF, RegisterSet()));
1106   }
1107 
1108   // Now, remove those whose sets of potentially removable registers are
1109   // contained in another IF candidate for VR. For example, given these
1110   // candidates for %45,
1111   //   %45:
1112   //     (%44,%41,#9,#8), { %42 }
1113   //     (%43,%41,#9,#8), { %42 %44 }
1114   // remove the first one, since it is contained in the second one.
1115   for (unsigned i = 0, n = LL.size(); i < n; ) {
1116     const RegisterSet &RMi = LL[i].second;
1117     unsigned j = 0;
1118     while (j < n) {
1119       if (j != i && LL[j].second.includes(RMi))
1120         break;
1121       j++;
1122     }
1123     if (j == n) {   // RMi not contained in anything else.
1124       i++;
1125       continue;
1126     }
1127     LL.erase(LL.begin()+i);
1128     n = LL.size();
1129   }
1130 }
1131 
1132 void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
1133       PairMapType &M) {
1134   IFMapType::iterator F = IFMap.find(VR);
1135   assert(F != IFMap.end());
1136   IFListType &LL = F->second;
1137   unsigned Cutoff = VRegDistCutoff;
1138   const MachineInstr *DefV = MRI->getVRegDef(VR);
1139 
1140   for (unsigned i = LL.size(); i > 0; --i) {
1141     unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
1142     const MachineInstr *DefS = MRI->getVRegDef(SR);
1143     const MachineInstr *DefI = MRI->getVRegDef(IR);
1144     unsigned DSV = distance(DefS, DefV, RPO, M);
1145     if (DSV < Cutoff) {
1146       unsigned DIV = distance(DefI, DefV, RPO, M);
1147       if (DIV < Cutoff)
1148         continue;
1149     }
1150     LL.erase(LL.begin()+(i-1));
1151   }
1152 }
1153 
1154 void HexagonGenInsert::pruneRegCopies(unsigned VR) {
1155   IFMapType::iterator F = IFMap.find(VR);
1156   assert(F != IFMap.end());
1157   IFListType &LL = F->second;
1158 
1159   auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
1160     return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
1161   };
1162   llvm::erase_if(LL, IsCopy);
1163 }
1164 
1165 void HexagonGenInsert::pruneCandidates() {
1166   // Remove candidates that are not beneficial, regardless of the final
1167   // selection method.
1168   // First, remove candidates whose potentially removable set is a subset
1169   // of another candidate's set.
1170   for (const auto &I : IFMap)
1171     pruneCoveredSets(I.first);
1172 
1173   UnsignedMap RPO;
1174 
1175   using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
1176 
1177   RPOTType RPOT(MFN);
1178   unsigned RPON = 0;
1179   for (const auto &I : RPOT)
1180     RPO[I->getNumber()] = RPON++;
1181 
1182   PairMapType Memo; // Memoization map for distance calculation.
1183   // Remove candidates that would use registers defined too far away.
1184   for (const auto &I : IFMap)
1185     pruneUsesTooFar(I.first, RPO, Memo);
1186 
1187   pruneEmptyLists();
1188 
1189   for (const auto &I : IFMap)
1190     pruneRegCopies(I.first);
1191 }
1192 
1193 namespace {
1194 
1195   // Class for comparing IF candidates for registers that have multiple of
1196   // them. The smaller the candidate, according to this ordering, the better.
1197   // First, compare the number of zeros in the associated potentially remova-
1198   // ble register sets. "Zero" indicates that the register is very likely to
1199   // become dead after this transformation.
1200   // Second, compare "averages", i.e. use-count per size. The lower wins.
1201   // After that, it does not really matter which one is smaller. Resolve
1202   // the tie in some deterministic way.
1203   struct IFOrdering {
1204     IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
1205       : UseC(UC), BaseOrd(BO) {}
1206 
1207     bool operator() (const IFRecordWithRegSet &A,
1208                      const IFRecordWithRegSet &B) const;
1209 
1210   private:
1211     void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1212           unsigned &Sum) const;
1213 
1214     const UnsignedMap &UseC;
1215     const RegisterOrdering &BaseOrd;
1216   };
1217 
1218 } // end anonymous namespace
1219 
1220 bool IFOrdering::operator() (const IFRecordWithRegSet &A,
1221       const IFRecordWithRegSet &B) const {
1222   unsigned SizeA = 0, ZeroA = 0, SumA = 0;
1223   unsigned SizeB = 0, ZeroB = 0, SumB = 0;
1224   stats(A.second, SizeA, ZeroA, SumA);
1225   stats(B.second, SizeB, ZeroB, SumB);
1226 
1227   // We will pick the minimum element. The more zeros, the better.
1228   if (ZeroA != ZeroB)
1229     return ZeroA > ZeroB;
1230   // Compare SumA/SizeA with SumB/SizeB, lower is better.
1231   uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
1232   if (AvgA != AvgB)
1233     return AvgA < AvgB;
1234 
1235   // The sets compare identical so far. Resort to comparing the IF records.
1236   // The actual values don't matter, this is only for determinism.
1237   unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
1238   if (OSA != OSB)
1239     return OSA < OSB;
1240   unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
1241   if (OIA != OIB)
1242     return OIA < OIB;
1243   if (A.first.Wdh != B.first.Wdh)
1244     return A.first.Wdh < B.first.Wdh;
1245   return A.first.Off < B.first.Off;
1246 }
1247 
1248 void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
1249       unsigned &Sum) const {
1250   for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
1251     UnsignedMap::const_iterator F = UseC.find(R);
1252     assert(F != UseC.end());
1253     unsigned UC = F->second;
1254     if (UC == 0)
1255       Zero++;
1256     Sum += UC;
1257     Size++;
1258   }
1259 }
1260 
1261 void HexagonGenInsert::selectCandidates() {
1262   // Some registers may have multiple valid candidates. Pick the best one
1263   // (or decide not to use any).
1264 
1265   // Compute the "removability" measure of R:
1266   // For each potentially removable register R, record the number of regis-
1267   // ters with IF candidates, where R appears in at least one set.
1268   RegisterSet AllRMs;
1269   UnsignedMap UseC, RemC;
1270   IFMapType::iterator End = IFMap.end();
1271 
1272   for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1273     const IFListType &LL = I->second;
1274     RegisterSet TT;
1275     for (const auto &J : LL)
1276       TT.insert(J.second);
1277     for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
1278       RemC[R]++;
1279     AllRMs.insert(TT);
1280   }
1281 
1282   for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
1283     using use_iterator = MachineRegisterInfo::use_nodbg_iterator;
1284     using InstrSet = SmallSet<const MachineInstr *, 16>;
1285 
1286     InstrSet UIs;
1287     // Count as the number of instructions in which R is used, not the
1288     // number of operands.
1289     use_iterator E = MRI->use_nodbg_end();
1290     for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
1291       UIs.insert(I->getParent());
1292     unsigned C = UIs.size();
1293     // Calculate a measure, which is the number of instructions using R,
1294     // minus the "removability" count computed earlier.
1295     unsigned D = RemC[R];
1296     UseC[R] = (C > D) ? C-D : 0;  // doz
1297   }
1298 
1299   bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
1300   if (!SelectAll0 && !SelectHas0)
1301     SelectAll0 = true;
1302 
1303   // The smaller the number UseC for a given register R, the "less used"
1304   // R is aside from the opportunities for removal offered by generating
1305   // "insert" instructions.
1306   // Iterate over the IF map, and for those registers that have multiple
1307   // candidates, pick the minimum one according to IFOrdering.
1308   IFOrdering IFO(UseC, BaseOrd);
1309   for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1310     IFListType &LL = I->second;
1311     if (LL.empty())
1312       continue;
1313     // Get the minimum element, remember it and clear the list. If the
1314     // element found is adequate, we will put it back on the list, other-
1315     // wise the list will remain empty, and the entry for this register
1316     // will be removed (i.e. this register will not be replaced by insert).
1317     IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO);
1318     assert(MinI != LL.end());
1319     IFRecordWithRegSet M = *MinI;
1320     LL.clear();
1321 
1322     // We want to make sure that this replacement will have a chance to be
1323     // beneficial, and that means that we want to have indication that some
1324     // register will be removed. The most likely registers to be eliminated
1325     // are the use operands in the definition of I->first. Accept/reject a
1326     // candidate based on how many of its uses it can potentially eliminate.
1327 
1328     RegisterSet Us;
1329     const MachineInstr *DefI = MRI->getVRegDef(I->first);
1330     getInstrUses(DefI, Us);
1331     bool Accept = false;
1332 
1333     if (SelectAll0) {
1334       bool All0 = true;
1335       for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1336         if (UseC[R] == 0)
1337           continue;
1338         All0 = false;
1339         break;
1340       }
1341       Accept = All0;
1342     } else if (SelectHas0) {
1343       bool Has0 = false;
1344       for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
1345         if (UseC[R] != 0)
1346           continue;
1347         Has0 = true;
1348         break;
1349       }
1350       Accept = Has0;
1351     }
1352     if (Accept)
1353       LL.push_back(M);
1354   }
1355 
1356   // Remove candidates that add uses of removable registers, unless the
1357   // removable registers are among replacement candidates.
1358   // Recompute the removable registers, since some candidates may have
1359   // been eliminated.
1360   AllRMs.clear();
1361   for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1362     const IFListType &LL = I->second;
1363     if (!LL.empty())
1364       AllRMs.insert(LL[0].second);
1365   }
1366   for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
1367     IFListType &LL = I->second;
1368     if (LL.empty())
1369       continue;
1370     unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
1371     if (AllRMs[SR] || AllRMs[IR])
1372       LL.clear();
1373   }
1374 
1375   pruneEmptyLists();
1376 }
1377 
1378 bool HexagonGenInsert::generateInserts() {
1379   // Create a new register for each one from IFMap, and store them in the
1380   // map.
1381   UnsignedMap RegMap;
1382   for (auto &I : IFMap) {
1383     unsigned VR = I.first;
1384     const TargetRegisterClass *RC = MRI->getRegClass(VR);
1385     Register NewVR = MRI->createVirtualRegister(RC);
1386     RegMap[VR] = NewVR;
1387   }
1388 
1389   // We can generate the "insert" instructions using potentially stale re-
1390   // gisters: SrcR and InsR for a given VR may be among other registers that
1391   // are also replaced. This is fine, we will do the mass "rauw" a bit later.
1392   for (auto &I : IFMap) {
1393     MachineInstr *MI = MRI->getVRegDef(I.first);
1394     MachineBasicBlock &B = *MI->getParent();
1395     DebugLoc DL = MI->getDebugLoc();
1396     unsigned NewR = RegMap[I.first];
1397     bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
1398     const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
1399                                : HII->get(Hexagon::S2_insertp);
1400     IFRecord IF = I.second[0].first;
1401     unsigned Wdh = IF.Wdh, Off = IF.Off;
1402     unsigned InsS = 0;
1403     if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
1404       InsS = Hexagon::isub_lo;
1405       if (Off >= 32) {
1406         InsS = Hexagon::isub_hi;
1407         Off -= 32;
1408       }
1409     }
1410     // Advance to the proper location for inserting instructions. This could
1411     // be B.end().
1412     MachineBasicBlock::iterator At = MI;
1413     if (MI->isPHI())
1414       At = B.getFirstNonPHI();
1415 
1416     BuildMI(B, At, DL, D, NewR)
1417       .addReg(IF.SrcR)
1418       .addReg(IF.InsR, 0, InsS)
1419       .addImm(Wdh)
1420       .addImm(Off);
1421 
1422     MRI->clearKillFlags(IF.SrcR);
1423     MRI->clearKillFlags(IF.InsR);
1424   }
1425 
1426   for (const auto &I : IFMap) {
1427     MachineInstr *DefI = MRI->getVRegDef(I.first);
1428     MRI->replaceRegWith(I.first, RegMap[I.first]);
1429     DefI->eraseFromParent();
1430   }
1431 
1432   return true;
1433 }
1434 
1435 bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
1436   bool Changed = false;
1437 
1438   for (auto *DTN : children<MachineDomTreeNode*>(N))
1439     Changed |= removeDeadCode(DTN);
1440 
1441   MachineBasicBlock *B = N->getBlock();
1442   std::vector<MachineInstr*> Instrs;
1443   for (MachineInstr &MI : llvm::reverse(*B))
1444     Instrs.push_back(&MI);
1445 
1446   for (MachineInstr *MI : Instrs) {
1447     unsigned Opc = MI->getOpcode();
1448     // Do not touch lifetime markers. This is why the target-independent DCE
1449     // cannot be used.
1450     if (Opc == TargetOpcode::LIFETIME_START ||
1451         Opc == TargetOpcode::LIFETIME_END)
1452       continue;
1453     bool Store = false;
1454     if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))
1455       continue;
1456 
1457     bool AllDead = true;
1458     SmallVector<unsigned,2> Regs;
1459     for (const MachineOperand &MO : MI->operands()) {
1460       if (!MO.isReg() || !MO.isDef())
1461         continue;
1462       Register R = MO.getReg();
1463       if (!R.isVirtual() || !MRI->use_nodbg_empty(R)) {
1464         AllDead = false;
1465         break;
1466       }
1467       Regs.push_back(R);
1468     }
1469     if (!AllDead)
1470       continue;
1471 
1472     B->erase(MI);
1473     for (unsigned I = 0, N = Regs.size(); I != N; ++I)
1474       MRI->markUsesInDebugValueAsUndef(Regs[I]);
1475     Changed = true;
1476   }
1477 
1478   return Changed;
1479 }
1480 
1481 bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
1482   if (skipFunction(MF.getFunction()))
1483     return false;
1484 
1485   bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
1486   bool Changed = false;
1487 
1488   // Verify: one, but not both.
1489   assert(!OptSelectAll0 || !OptSelectHas0);
1490 
1491   IFMap.clear();
1492   BaseOrd.clear();
1493   CellOrd.clear();
1494 
1495   const auto &ST = MF.getSubtarget<HexagonSubtarget>();
1496   HII = ST.getInstrInfo();
1497   HRI = ST.getRegisterInfo();
1498   MFN = &MF;
1499   MRI = &MF.getRegInfo();
1500   MDT = &getAnalysis<MachineDominatorTree>();
1501 
1502   // Clean up before any further processing, so that dead code does not
1503   // get used in a newly generated "insert" instruction. Have a custom
1504   // version of DCE that preserves lifetime markers. Without it, merging
1505   // of stack objects can fail to recognize and merge disjoint objects
1506   // leading to unnecessary stack growth.
1507   Changed = removeDeadCode(MDT->getRootNode());
1508 
1509   const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
1510   BitTracker BTLoc(HE, MF);
1511   BTLoc.trace(isDebug());
1512   BTLoc.run();
1513   CellMapShadow MS(BTLoc);
1514   CMS = &MS;
1515 
1516   buildOrderingMF(BaseOrd);
1517   buildOrderingBT(BaseOrd, CellOrd);
1518 
1519   if (isDebug()) {
1520     dbgs() << "Cell ordering:\n";
1521     for (const auto &I : CellOrd) {
1522       unsigned VR = I.first, Pos = I.second;
1523       dbgs() << printReg(VR, HRI) << " -> " << Pos << "\n";
1524     }
1525   }
1526 
1527   // Collect candidates for conversion into the insert forms.
1528   MachineBasicBlock *RootB = MDT->getRoot();
1529   OrderedRegisterList AvailR(CellOrd);
1530 
1531   const char *const TGName = "hexinsert";
1532   const char *const TGDesc = "Generate Insert Instructions";
1533 
1534   {
1535     NamedRegionTimer _T("collection", "collection", TGName, TGDesc,
1536                         TimingDetail);
1537     collectInBlock(RootB, AvailR);
1538     // Complete the information gathered in IFMap.
1539     computeRemovableRegisters();
1540   }
1541 
1542   if (isDebug()) {
1543     dbgs() << "Candidates after collection:\n";
1544     dump_map();
1545   }
1546 
1547   if (IFMap.empty())
1548     return Changed;
1549 
1550   {
1551     NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);
1552     pruneCandidates();
1553   }
1554 
1555   if (isDebug()) {
1556     dbgs() << "Candidates after pruning:\n";
1557     dump_map();
1558   }
1559 
1560   if (IFMap.empty())
1561     return Changed;
1562 
1563   {
1564     NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);
1565     selectCandidates();
1566   }
1567 
1568   if (isDebug()) {
1569     dbgs() << "Candidates after selection:\n";
1570     dump_map();
1571   }
1572 
1573   // Filter out vregs beyond the cutoff.
1574   if (VRegIndexCutoff.getPosition()) {
1575     unsigned Cutoff = VRegIndexCutoff;
1576 
1577     using IterListType = SmallVector<IFMapType::iterator, 16>;
1578 
1579     IterListType Out;
1580     for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
1581       unsigned Idx = Register::virtReg2Index(I->first);
1582       if (Idx >= Cutoff)
1583         Out.push_back(I);
1584     }
1585     for (unsigned i = 0, n = Out.size(); i < n; ++i)
1586       IFMap.erase(Out[i]);
1587   }
1588   if (IFMap.empty())
1589     return Changed;
1590 
1591   {
1592     NamedRegionTimer _T("generation", "generation", TGName, TGDesc,
1593                         TimingDetail);
1594     generateInserts();
1595   }
1596 
1597   return true;
1598 }
1599 
1600 FunctionPass *llvm::createHexagonGenInsert() {
1601   return new HexagonGenInsert();
1602 }
1603 
1604 //===----------------------------------------------------------------------===//
1605 //                         Public Constructor Functions
1606 //===----------------------------------------------------------------------===//
1607 
1608 INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
1609   "Hexagon generate \"insert\" instructions", false, false)
1610 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1611 INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
1612   "Hexagon generate \"insert\" instructions", false, false)
1613