10b57cec5SDimitry Andric //===- StackColoring.cpp --------------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass implements the stack-coloring optimization that looks for 100b57cec5SDimitry Andric // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END), 110b57cec5SDimitry Andric // which represent the possible lifetime of stack slots. It attempts to 120b57cec5SDimitry Andric // merge disjoint stack slots and reduce the used stack space. 130b57cec5SDimitry Andric // NOTE: This pass is not StackSlotColoring, which optimizes spill slots. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric // TODO: In the future we plan to improve stack coloring in the following ways: 160b57cec5SDimitry Andric // 1. Allow merging multiple small slots into a single larger slot at different 170b57cec5SDimitry Andric // offsets. 180b57cec5SDimitry Andric // 2. Merge this pass with StackSlotColoring and allow merging of allocas with 190b57cec5SDimitry Andric // spill slots. 200b57cec5SDimitry Andric // 210b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric #include "llvm/ADT/BitVector.h" 240b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 250b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 260b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 270b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 280b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 290b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 300b57cec5SDimitry Andric #include "llvm/CodeGen/LiveInterval.h" 310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 330b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 340b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 350b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 360b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 370b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 380b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h" 390b57cec5SDimitry Andric #include "llvm/CodeGen/SelectionDAGNodes.h" 400b57cec5SDimitry Andric #include "llvm/CodeGen/SlotIndexes.h" 410b57cec5SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h" 420b57cec5SDimitry Andric #include "llvm/CodeGen/WinEHFuncInfo.h" 430b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h" 440b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 450b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 460b57cec5SDimitry Andric #include "llvm/IR/Function.h" 470b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 480b57cec5SDimitry Andric #include "llvm/IR/Metadata.h" 490b57cec5SDimitry Andric #include "llvm/IR/Use.h" 500b57cec5SDimitry Andric #include "llvm/IR/Value.h" 51480093f4SDimitry Andric #include "llvm/InitializePasses.h" 520b57cec5SDimitry Andric #include "llvm/Pass.h" 530b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 540b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 550b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 560b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 570b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 580b57cec5SDimitry Andric #include <algorithm> 590b57cec5SDimitry Andric #include <cassert> 600b57cec5SDimitry Andric #include <limits> 610b57cec5SDimitry Andric #include <memory> 620b57cec5SDimitry Andric #include <utility> 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric using namespace llvm; 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric #define DEBUG_TYPE "stack-coloring" 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric static cl::opt<bool> 690b57cec5SDimitry Andric DisableColoring("no-stack-coloring", 700b57cec5SDimitry Andric cl::init(false), cl::Hidden, 710b57cec5SDimitry Andric cl::desc("Disable stack coloring")); 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric /// The user may write code that uses allocas outside of the declared lifetime 740b57cec5SDimitry Andric /// zone. This can happen when the user returns a reference to a local 750b57cec5SDimitry Andric /// data-structure. We can detect these cases and decide not to optimize the 760b57cec5SDimitry Andric /// code. If this flag is enabled, we try to save the user. This option 770b57cec5SDimitry Andric /// is treated as overriding LifetimeStartOnFirstUse below. 780b57cec5SDimitry Andric static cl::opt<bool> 790b57cec5SDimitry Andric ProtectFromEscapedAllocas("protect-from-escaped-allocas", 800b57cec5SDimitry Andric cl::init(false), cl::Hidden, 810b57cec5SDimitry Andric cl::desc("Do not optimize lifetime zones that " 820b57cec5SDimitry Andric "are broken")); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric /// Enable enhanced dataflow scheme for lifetime analysis (treat first 850b57cec5SDimitry Andric /// use of stack slot as start of slot lifetime, as opposed to looking 860b57cec5SDimitry Andric /// for LIFETIME_START marker). See "Implementation notes" below for 870b57cec5SDimitry Andric /// more info. 880b57cec5SDimitry Andric static cl::opt<bool> 890b57cec5SDimitry Andric LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", 900b57cec5SDimitry Andric cl::init(true), cl::Hidden, 910b57cec5SDimitry Andric cl::desc("Treat stack lifetimes as starting on first use, not on START marker.")); 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); 950b57cec5SDimitry Andric STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); 960b57cec5SDimitry Andric STATISTIC(StackSlotMerged, "Number of stack slot merged."); 970b57cec5SDimitry Andric STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1000b57cec5SDimitry Andric // StackColoring Pass 1010b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1020b57cec5SDimitry Andric // 1030b57cec5SDimitry Andric // Stack Coloring reduces stack usage by merging stack slots when they 1040b57cec5SDimitry Andric // can't be used together. For example, consider the following C program: 1050b57cec5SDimitry Andric // 1060b57cec5SDimitry Andric // void bar(char *, int); 1070b57cec5SDimitry Andric // void foo(bool var) { 1080b57cec5SDimitry Andric // A: { 1090b57cec5SDimitry Andric // char z[4096]; 1100b57cec5SDimitry Andric // bar(z, 0); 1110b57cec5SDimitry Andric // } 1120b57cec5SDimitry Andric // 1130b57cec5SDimitry Andric // char *p; 1140b57cec5SDimitry Andric // char x[4096]; 1150b57cec5SDimitry Andric // char y[4096]; 1160b57cec5SDimitry Andric // if (var) { 1170b57cec5SDimitry Andric // p = x; 1180b57cec5SDimitry Andric // } else { 1190b57cec5SDimitry Andric // bar(y, 1); 1200b57cec5SDimitry Andric // p = y + 1024; 1210b57cec5SDimitry Andric // } 1220b57cec5SDimitry Andric // B: 1230b57cec5SDimitry Andric // bar(p, 2); 1240b57cec5SDimitry Andric // } 1250b57cec5SDimitry Andric // 1260b57cec5SDimitry Andric // Naively-compiled, this program would use 12k of stack space. However, the 1270b57cec5SDimitry Andric // stack slot corresponding to `z` is always destroyed before either of the 1280b57cec5SDimitry Andric // stack slots for `x` or `y` are used, and then `x` is only used if `var` 1290b57cec5SDimitry Andric // is true, while `y` is only used if `var` is false. So in no time are 2 1300b57cec5SDimitry Andric // of the stack slots used together, and therefore we can merge them, 1310b57cec5SDimitry Andric // compiling the function using only a single 4k alloca: 1320b57cec5SDimitry Andric // 1330b57cec5SDimitry Andric // void foo(bool var) { // equivalent 1340b57cec5SDimitry Andric // char x[4096]; 1350b57cec5SDimitry Andric // char *p; 1360b57cec5SDimitry Andric // bar(x, 0); 1370b57cec5SDimitry Andric // if (var) { 1380b57cec5SDimitry Andric // p = x; 1390b57cec5SDimitry Andric // } else { 1400b57cec5SDimitry Andric // bar(x, 1); 1410b57cec5SDimitry Andric // p = x + 1024; 1420b57cec5SDimitry Andric // } 1430b57cec5SDimitry Andric // bar(p, 2); 1440b57cec5SDimitry Andric // } 1450b57cec5SDimitry Andric // 1460b57cec5SDimitry Andric // This is an important optimization if we want stack space to be under 1470b57cec5SDimitry Andric // control in large functions, both open-coded ones and ones created by 1480b57cec5SDimitry Andric // inlining. 1490b57cec5SDimitry Andric // 1500b57cec5SDimitry Andric // Implementation Notes: 1510b57cec5SDimitry Andric // --------------------- 1520b57cec5SDimitry Andric // 1530b57cec5SDimitry Andric // An important part of the above reasoning is that `z` can't be accessed 1540b57cec5SDimitry Andric // while the latter 2 calls to `bar` are running. This is justified because 1550b57cec5SDimitry Andric // `z`'s lifetime is over after we exit from block `A:`, so any further 1560b57cec5SDimitry Andric // accesses to it would be UB. The way we represent this information 1570b57cec5SDimitry Andric // in LLVM is by having frontends delimit blocks with `lifetime.start` 1580b57cec5SDimitry Andric // and `lifetime.end` intrinsics. 1590b57cec5SDimitry Andric // 1600b57cec5SDimitry Andric // The effect of these intrinsics seems to be as follows (maybe I should 1610b57cec5SDimitry Andric // specify this in the reference?): 1620b57cec5SDimitry Andric // 1630b57cec5SDimitry Andric // L1) at start, each stack-slot is marked as *out-of-scope*, unless no 1640b57cec5SDimitry Andric // lifetime intrinsic refers to that stack slot, in which case 1650b57cec5SDimitry Andric // it is marked as *in-scope*. 1660b57cec5SDimitry Andric // L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and 1670b57cec5SDimitry Andric // the stack slot is overwritten with `undef`. 1680b57cec5SDimitry Andric // L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*. 1690b57cec5SDimitry Andric // L4) on function exit, all stack slots are marked as *out-of-scope*. 1700b57cec5SDimitry Andric // L5) `lifetime.end` is a no-op when called on a slot that is already 1710b57cec5SDimitry Andric // *out-of-scope*. 1720b57cec5SDimitry Andric // L6) memory accesses to *out-of-scope* stack slots are UB. 1730b57cec5SDimitry Andric // L7) when a stack-slot is marked as *out-of-scope*, all pointers to it 1740b57cec5SDimitry Andric // are invalidated, unless the slot is "degenerate". This is used to 1750b57cec5SDimitry Andric // justify not marking slots as in-use until the pointer to them is 1760b57cec5SDimitry Andric // used, but feels a bit hacky in the presence of things like LICM. See 1770b57cec5SDimitry Andric // the "Degenerate Slots" section for more details. 1780b57cec5SDimitry Andric // 1790b57cec5SDimitry Andric // Now, let's ground stack coloring on these rules. We'll define a slot 1800b57cec5SDimitry Andric // as *in-use* at a (dynamic) point in execution if it either can be 1810b57cec5SDimitry Andric // written to at that point, or if it has a live and non-undef content 1820b57cec5SDimitry Andric // at that point. 1830b57cec5SDimitry Andric // 1840b57cec5SDimitry Andric // Obviously, slots that are never *in-use* together can be merged, and 1850b57cec5SDimitry Andric // in our example `foo`, the slots for `x`, `y` and `z` are never 1860b57cec5SDimitry Andric // in-use together (of course, sometimes slots that *are* in-use together 1870b57cec5SDimitry Andric // might still be mergable, but we don't care about that here). 1880b57cec5SDimitry Andric // 1890b57cec5SDimitry Andric // In this implementation, we successively merge pairs of slots that are 1900b57cec5SDimitry Andric // not *in-use* together. We could be smarter - for example, we could merge 1910b57cec5SDimitry Andric // a single large slot with 2 small slots, or we could construct the 1920b57cec5SDimitry Andric // interference graph and run a "smart" graph coloring algorithm, but with 1930b57cec5SDimitry Andric // that aside, how do we find out whether a pair of slots might be *in-use* 1940b57cec5SDimitry Andric // together? 1950b57cec5SDimitry Andric // 1960b57cec5SDimitry Andric // From our rules, we see that *out-of-scope* slots are never *in-use*, 1970b57cec5SDimitry Andric // and from (L7) we see that "non-degenerate" slots remain non-*in-use* 1980b57cec5SDimitry Andric // until their address is taken. Therefore, we can approximate slot activity 1990b57cec5SDimitry Andric // using dataflow. 2000b57cec5SDimitry Andric // 2010b57cec5SDimitry Andric // A subtle point: naively, we might try to figure out which pairs of 2020b57cec5SDimitry Andric // stack-slots interfere by propagating `S in-use` through the CFG for every 2030b57cec5SDimitry Andric // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in 2040b57cec5SDimitry Andric // which they are both *in-use*. 2050b57cec5SDimitry Andric // 2060b57cec5SDimitry Andric // That is sound, but overly conservative in some cases: in our (artificial) 2070b57cec5SDimitry Andric // example `foo`, either `x` or `y` might be in use at the label `B:`, but 2080b57cec5SDimitry Andric // as `x` is only in use if we came in from the `var` edge and `y` only 2090b57cec5SDimitry Andric // if we came from the `!var` edge, they still can't be in use together. 2100b57cec5SDimitry Andric // See PR32488 for an important real-life case. 2110b57cec5SDimitry Andric // 2120b57cec5SDimitry Andric // If we wanted to find all points of interference precisely, we could 2130b57cec5SDimitry Andric // propagate `S in-use` and `S&T in-use` predicates through the CFG. That 2140b57cec5SDimitry Andric // would be precise, but requires propagating `O(n^2)` dataflow facts. 2150b57cec5SDimitry Andric // 2160b57cec5SDimitry Andric // However, we aren't interested in the *set* of points of interference 2170b57cec5SDimitry Andric // between 2 stack slots, only *whether* there *is* such a point. So we 2180b57cec5SDimitry Andric // can rely on a little trick: for `S` and `T` to be in-use together, 2190b57cec5SDimitry Andric // one of them needs to become in-use while the other is in-use (or 2200b57cec5SDimitry Andric // they might both become in use simultaneously). We can check this 2210b57cec5SDimitry Andric // by also keeping track of the points at which a stack slot might *start* 2220b57cec5SDimitry Andric // being in-use. 2230b57cec5SDimitry Andric // 2240b57cec5SDimitry Andric // Exact first use: 2250b57cec5SDimitry Andric // ---------------- 2260b57cec5SDimitry Andric // 2270b57cec5SDimitry Andric // Consider the following motivating example: 2280b57cec5SDimitry Andric // 2290b57cec5SDimitry Andric // int foo() { 2300b57cec5SDimitry Andric // char b1[1024], b2[1024]; 2310b57cec5SDimitry Andric // if (...) { 2320b57cec5SDimitry Andric // char b3[1024]; 2330b57cec5SDimitry Andric // <uses of b1, b3>; 2340b57cec5SDimitry Andric // return x; 2350b57cec5SDimitry Andric // } else { 2360b57cec5SDimitry Andric // char b4[1024], b5[1024]; 2370b57cec5SDimitry Andric // <uses of b2, b4, b5>; 2380b57cec5SDimitry Andric // return y; 2390b57cec5SDimitry Andric // } 2400b57cec5SDimitry Andric // } 2410b57cec5SDimitry Andric // 2420b57cec5SDimitry Andric // In the code above, "b3" and "b4" are declared in distinct lexical 2430b57cec5SDimitry Andric // scopes, meaning that it is easy to prove that they can share the 2440b57cec5SDimitry Andric // same stack slot. Variables "b1" and "b2" are declared in the same 2450b57cec5SDimitry Andric // scope, meaning that from a lexical point of view, their lifetimes 2460b57cec5SDimitry Andric // overlap. From a control flow pointer of view, however, the two 2470b57cec5SDimitry Andric // variables are accessed in disjoint regions of the CFG, thus it 2480b57cec5SDimitry Andric // should be possible for them to share the same stack slot. An ideal 2490b57cec5SDimitry Andric // stack allocation for the function above would look like: 2500b57cec5SDimitry Andric // 2510b57cec5SDimitry Andric // slot 0: b1, b2 2520b57cec5SDimitry Andric // slot 1: b3, b4 2530b57cec5SDimitry Andric // slot 2: b5 2540b57cec5SDimitry Andric // 2550b57cec5SDimitry Andric // Achieving this allocation is tricky, however, due to the way 2560b57cec5SDimitry Andric // lifetime markers are inserted. Here is a simplified view of the 2570b57cec5SDimitry Andric // control flow graph for the code above: 2580b57cec5SDimitry Andric // 2590b57cec5SDimitry Andric // +------ block 0 -------+ 2600b57cec5SDimitry Andric // 0| LIFETIME_START b1, b2 | 2610b57cec5SDimitry Andric // 1| <test 'if' condition> | 2620b57cec5SDimitry Andric // +-----------------------+ 2630b57cec5SDimitry Andric // ./ \. 2640b57cec5SDimitry Andric // +------ block 1 -------+ +------ block 2 -------+ 2650b57cec5SDimitry Andric // 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 | 2660b57cec5SDimitry Andric // 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> | 2670b57cec5SDimitry Andric // 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 | 2680b57cec5SDimitry Andric // +-----------------------+ +-----------------------+ 2690b57cec5SDimitry Andric // \. /. 2700b57cec5SDimitry Andric // +------ block 3 -------+ 2710b57cec5SDimitry Andric // 8| <cleanupcode> | 2720b57cec5SDimitry Andric // 9| LIFETIME_END b1, b2 | 2730b57cec5SDimitry Andric // 10| return | 2740b57cec5SDimitry Andric // +-----------------------+ 2750b57cec5SDimitry Andric // 2760b57cec5SDimitry Andric // If we create live intervals for the variables above strictly based 2770b57cec5SDimitry Andric // on the lifetime markers, we'll get the set of intervals on the 2780b57cec5SDimitry Andric // left. If we ignore the lifetime start markers and instead treat a 2790b57cec5SDimitry Andric // variable's lifetime as beginning with the first reference to the 2800b57cec5SDimitry Andric // var, then we get the intervals on the right. 2810b57cec5SDimitry Andric // 2820b57cec5SDimitry Andric // LIFETIME_START First Use 2830b57cec5SDimitry Andric // b1: [0,9] [3,4] [8,9] 2840b57cec5SDimitry Andric // b2: [0,9] [6,9] 2850b57cec5SDimitry Andric // b3: [2,4] [3,4] 2860b57cec5SDimitry Andric // b4: [5,7] [6,7] 2870b57cec5SDimitry Andric // b5: [5,7] [6,7] 2880b57cec5SDimitry Andric // 2890b57cec5SDimitry Andric // For the intervals on the left, the best we can do is overlap two 2900b57cec5SDimitry Andric // variables (b3 and b4, for example); this gives us a stack size of 2910b57cec5SDimitry Andric // 4*1024 bytes, not ideal. When treating first-use as the start of a 2920b57cec5SDimitry Andric // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 2930b57cec5SDimitry Andric // byte stack (better). 2940b57cec5SDimitry Andric // 2950b57cec5SDimitry Andric // Degenerate Slots: 2960b57cec5SDimitry Andric // ----------------- 2970b57cec5SDimitry Andric // 2980b57cec5SDimitry Andric // Relying entirely on first-use of stack slots is problematic, 2990b57cec5SDimitry Andric // however, due to the fact that optimizations can sometimes migrate 3000b57cec5SDimitry Andric // uses of a variable outside of its lifetime start/end region. Here 3010b57cec5SDimitry Andric // is an example: 3020b57cec5SDimitry Andric // 3030b57cec5SDimitry Andric // int bar() { 3040b57cec5SDimitry Andric // char b1[1024], b2[1024]; 3050b57cec5SDimitry Andric // if (...) { 3060b57cec5SDimitry Andric // <uses of b2> 3070b57cec5SDimitry Andric // return y; 3080b57cec5SDimitry Andric // } else { 3090b57cec5SDimitry Andric // <uses of b1> 3100b57cec5SDimitry Andric // while (...) { 3110b57cec5SDimitry Andric // char b3[1024]; 3120b57cec5SDimitry Andric // <uses of b3> 3130b57cec5SDimitry Andric // } 3140b57cec5SDimitry Andric // } 3150b57cec5SDimitry Andric // } 3160b57cec5SDimitry Andric // 3170b57cec5SDimitry Andric // Before optimization, the control flow graph for the code above 3180b57cec5SDimitry Andric // might look like the following: 3190b57cec5SDimitry Andric // 3200b57cec5SDimitry Andric // +------ block 0 -------+ 3210b57cec5SDimitry Andric // 0| LIFETIME_START b1, b2 | 3220b57cec5SDimitry Andric // 1| <test 'if' condition> | 3230b57cec5SDimitry Andric // +-----------------------+ 3240b57cec5SDimitry Andric // ./ \. 3250b57cec5SDimitry Andric // +------ block 1 -------+ +------- block 2 -------+ 3260b57cec5SDimitry Andric // 2| <uses of b2> | 3| <uses of b1> | 3270b57cec5SDimitry Andric // +-----------------------+ +-----------------------+ 3280b57cec5SDimitry Andric // | | 3290b57cec5SDimitry Andric // | +------- block 3 -------+ <-\. 3300b57cec5SDimitry Andric // | 4| <while condition> | | 3310b57cec5SDimitry Andric // | +-----------------------+ | 3320b57cec5SDimitry Andric // | / | | 3330b57cec5SDimitry Andric // | / +------- block 4 -------+ 3340b57cec5SDimitry Andric // \ / 5| LIFETIME_START b3 | | 3350b57cec5SDimitry Andric // \ / 6| <uses of b3> | | 3360b57cec5SDimitry Andric // \ / 7| LIFETIME_END b3 | | 3370b57cec5SDimitry Andric // \ | +------------------------+ | 3380b57cec5SDimitry Andric // \ | \ / 3390b57cec5SDimitry Andric // +------ block 5 -----+ \--------------- 3400b57cec5SDimitry Andric // 8| <cleanupcode> | 3410b57cec5SDimitry Andric // 9| LIFETIME_END b1, b2 | 3420b57cec5SDimitry Andric // 10| return | 3430b57cec5SDimitry Andric // +---------------------+ 3440b57cec5SDimitry Andric // 3450b57cec5SDimitry Andric // During optimization, however, it can happen that an instruction 3460b57cec5SDimitry Andric // computing an address in "b3" (for example, a loop-invariant GEP) is 3470b57cec5SDimitry Andric // hoisted up out of the loop from block 4 to block 2. [Note that 3480b57cec5SDimitry Andric // this is not an actual load from the stack, only an instruction that 3490b57cec5SDimitry Andric // computes the address to be loaded]. If this happens, there is now a 3500b57cec5SDimitry Andric // path leading from the first use of b3 to the return instruction 3510b57cec5SDimitry Andric // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is 3520b57cec5SDimitry Andric // now larger than if we were computing live intervals strictly based 3530b57cec5SDimitry Andric // on lifetime markers. In the example above, this lengthened lifetime 3540b57cec5SDimitry Andric // would mean that it would appear illegal to overlap b3 with b2. 3550b57cec5SDimitry Andric // 3560b57cec5SDimitry Andric // To deal with this such cases, the code in ::collectMarkers() below 3570b57cec5SDimitry Andric // tries to identify "degenerate" slots -- those slots where on a single 3580b57cec5SDimitry Andric // forward pass through the CFG we encounter a first reference to slot 3590b57cec5SDimitry Andric // K before we hit the slot K lifetime start marker. For such slots, 3600b57cec5SDimitry Andric // we fall back on using the lifetime start marker as the beginning of 3610b57cec5SDimitry Andric // the variable's lifetime. NB: with this implementation, slots can 3620b57cec5SDimitry Andric // appear degenerate in cases where there is unstructured control flow: 3630b57cec5SDimitry Andric // 3640b57cec5SDimitry Andric // if (q) goto mid; 3650b57cec5SDimitry Andric // if (x > 9) { 3660b57cec5SDimitry Andric // int b[100]; 3670b57cec5SDimitry Andric // memcpy(&b[0], ...); 3680b57cec5SDimitry Andric // mid: b[k] = ...; 3690b57cec5SDimitry Andric // abc(&b); 3700b57cec5SDimitry Andric // } 3710b57cec5SDimitry Andric // 3720b57cec5SDimitry Andric // If in RPO ordering chosen to walk the CFG we happen to visit the b[k] 3730b57cec5SDimitry Andric // before visiting the memcpy block (which will contain the lifetime start 3740b57cec5SDimitry Andric // for "b" then it will appear that 'b' has a degenerate lifetime. 3750b57cec5SDimitry Andric // 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric namespace { 3780b57cec5SDimitry Andric 3790b57cec5SDimitry Andric /// StackColoring - A machine pass for merging disjoint stack allocations, 3800b57cec5SDimitry Andric /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. 3810b57cec5SDimitry Andric class StackColoring : public MachineFunctionPass { 3820b57cec5SDimitry Andric MachineFrameInfo *MFI; 3830b57cec5SDimitry Andric MachineFunction *MF; 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric /// A class representing liveness information for a single basic block. 3860b57cec5SDimitry Andric /// Each bit in the BitVector represents the liveness property 3870b57cec5SDimitry Andric /// for a different stack slot. 3880b57cec5SDimitry Andric struct BlockLifetimeInfo { 3890b57cec5SDimitry Andric /// Which slots BEGINs in each basic block. 3900b57cec5SDimitry Andric BitVector Begin; 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric /// Which slots ENDs in each basic block. 3930b57cec5SDimitry Andric BitVector End; 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric /// Which slots are marked as LIVE_IN, coming into each basic block. 3960b57cec5SDimitry Andric BitVector LiveIn; 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric /// Which slots are marked as LIVE_OUT, coming out of each basic block. 3990b57cec5SDimitry Andric BitVector LiveOut; 4000b57cec5SDimitry Andric }; 4010b57cec5SDimitry Andric 4020b57cec5SDimitry Andric /// Maps active slots (per bit) for each basic block. 4030b57cec5SDimitry Andric using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>; 4040b57cec5SDimitry Andric LivenessMap BlockLiveness; 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric /// Maps serial numbers to basic blocks. 4070b57cec5SDimitry Andric DenseMap<const MachineBasicBlock *, int> BasicBlocks; 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric /// Maps basic blocks to a serial number. 4100b57cec5SDimitry Andric SmallVector<const MachineBasicBlock *, 8> BasicBlockNumbering; 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric /// Maps slots to their use interval. Outside of this interval, slots 4130b57cec5SDimitry Andric /// values are either dead or `undef` and they will not be written to. 4140b57cec5SDimitry Andric SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals; 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric /// Maps slots to the points where they can become in-use. 4170b57cec5SDimitry Andric SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts; 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric /// VNInfo is used for the construction of LiveIntervals. 4200b57cec5SDimitry Andric VNInfo::Allocator VNInfoAllocator; 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric /// SlotIndex analysis object. 4230b57cec5SDimitry Andric SlotIndexes *Indexes; 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric /// The list of lifetime markers found. These markers are to be removed 4260b57cec5SDimitry Andric /// once the coloring is done. 4270b57cec5SDimitry Andric SmallVector<MachineInstr*, 8> Markers; 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andric /// Record the FI slots for which we have seen some sort of 4300b57cec5SDimitry Andric /// lifetime marker (either start or end). 4310b57cec5SDimitry Andric BitVector InterestingSlots; 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric /// FI slots that need to be handled conservatively (for these 4340b57cec5SDimitry Andric /// slots lifetime-start-on-first-use is disabled). 4350b57cec5SDimitry Andric BitVector ConservativeSlots; 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric /// Number of iterations taken during data flow analysis. 4380b57cec5SDimitry Andric unsigned NumIterations; 4390b57cec5SDimitry Andric 4400b57cec5SDimitry Andric public: 4410b57cec5SDimitry Andric static char ID; 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric StackColoring() : MachineFunctionPass(ID) { 4440b57cec5SDimitry Andric initializeStackColoringPass(*PassRegistry::getPassRegistry()); 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric 4470b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 4480b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &Func) override; 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric private: 4510b57cec5SDimitry Andric /// Used in collectMarkers 4520b57cec5SDimitry Andric using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>; 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric /// Debug. 4550b57cec5SDimitry Andric void dump() const; 4560b57cec5SDimitry Andric void dumpIntervals() const; 4570b57cec5SDimitry Andric void dumpBB(MachineBasicBlock *MBB) const; 4580b57cec5SDimitry Andric void dumpBV(const char *tag, const BitVector &BV) const; 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric /// Removes all of the lifetime marker instructions from the function. 4610b57cec5SDimitry Andric /// \returns true if any markers were removed. 4620b57cec5SDimitry Andric bool removeAllMarkers(); 4630b57cec5SDimitry Andric 4640b57cec5SDimitry Andric /// Scan the machine function and find all of the lifetime markers. 4650b57cec5SDimitry Andric /// Record the findings in the BEGIN and END vectors. 4660b57cec5SDimitry Andric /// \returns the number of markers found. 4670b57cec5SDimitry Andric unsigned collectMarkers(unsigned NumSlot); 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric /// Perform the dataflow calculation and calculate the lifetime for each of 4700b57cec5SDimitry Andric /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and 4710b57cec5SDimitry Andric /// LifetimeLIVE_OUT maps that represent which stack slots are live coming 4720b57cec5SDimitry Andric /// in and out blocks. 4730b57cec5SDimitry Andric void calculateLocalLiveness(); 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric /// Returns TRUE if we're using the first-use-begins-lifetime method for 4760b57cec5SDimitry Andric /// this slot (if FALSE, then the start marker is treated as start of lifetime). 4770b57cec5SDimitry Andric bool applyFirstUse(int Slot) { 4780b57cec5SDimitry Andric if (!LifetimeStartOnFirstUse || ProtectFromEscapedAllocas) 4790b57cec5SDimitry Andric return false; 4800b57cec5SDimitry Andric if (ConservativeSlots.test(Slot)) 4810b57cec5SDimitry Andric return false; 4820b57cec5SDimitry Andric return true; 4830b57cec5SDimitry Andric } 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric /// Examines the specified instruction and returns TRUE if the instruction 4860b57cec5SDimitry Andric /// represents the start or end of an interesting lifetime. The slot or slots 4870b57cec5SDimitry Andric /// starting or ending are added to the vector "slots" and "isStart" is set 4880b57cec5SDimitry Andric /// accordingly. 4890b57cec5SDimitry Andric /// \returns True if inst contains a lifetime start or end 4900b57cec5SDimitry Andric bool isLifetimeStartOrEnd(const MachineInstr &MI, 4910b57cec5SDimitry Andric SmallVector<int, 4> &slots, 4920b57cec5SDimitry Andric bool &isStart); 4930b57cec5SDimitry Andric 4940b57cec5SDimitry Andric /// Construct the LiveIntervals for the slots. 4950b57cec5SDimitry Andric void calculateLiveIntervals(unsigned NumSlots); 4960b57cec5SDimitry Andric 4970b57cec5SDimitry Andric /// Go over the machine function and change instructions which use stack 4980b57cec5SDimitry Andric /// slots to use the joint slots. 4990b57cec5SDimitry Andric void remapInstructions(DenseMap<int, int> &SlotRemap); 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric /// The input program may contain instructions which are not inside lifetime 5020b57cec5SDimitry Andric /// markers. This can happen due to a bug in the compiler or due to a bug in 5030b57cec5SDimitry Andric /// user code (for example, returning a reference to a local variable). 5040b57cec5SDimitry Andric /// This procedure checks all of the instructions in the function and 5050b57cec5SDimitry Andric /// invalidates lifetime ranges which do not contain all of the instructions 5060b57cec5SDimitry Andric /// which access that frame slot. 5070b57cec5SDimitry Andric void removeInvalidSlotRanges(); 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric /// Map entries which point to other entries to their destination. 5100b57cec5SDimitry Andric /// A->B->C becomes A->C. 5110b57cec5SDimitry Andric void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); 5120b57cec5SDimitry Andric }; 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric } // end anonymous namespace 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric char StackColoring::ID = 0; 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric char &llvm::StackColoringID = StackColoring::ID; 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, 5210b57cec5SDimitry Andric "Merge disjoint stack slots", false, false) 5220b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 5230b57cec5SDimitry Andric INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE, 5240b57cec5SDimitry Andric "Merge disjoint stack slots", false, false) 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { 5270b57cec5SDimitry Andric AU.addRequired<SlotIndexes>(); 5280b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 5290b57cec5SDimitry Andric } 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 5320b57cec5SDimitry Andric LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag, 5330b57cec5SDimitry Andric const BitVector &BV) const { 5340b57cec5SDimitry Andric dbgs() << tag << " : { "; 5350b57cec5SDimitry Andric for (unsigned I = 0, E = BV.size(); I != E; ++I) 5360b57cec5SDimitry Andric dbgs() << BV.test(I) << " "; 5370b57cec5SDimitry Andric dbgs() << "}\n"; 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const { 5410b57cec5SDimitry Andric LivenessMap::const_iterator BI = BlockLiveness.find(MBB); 5420b57cec5SDimitry Andric assert(BI != BlockLiveness.end() && "Block not found"); 5430b57cec5SDimitry Andric const BlockLifetimeInfo &BlockInfo = BI->second; 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric dumpBV("BEGIN", BlockInfo.Begin); 5460b57cec5SDimitry Andric dumpBV("END", BlockInfo.End); 5470b57cec5SDimitry Andric dumpBV("LIVE_IN", BlockInfo.LiveIn); 5480b57cec5SDimitry Andric dumpBV("LIVE_OUT", BlockInfo.LiveOut); 5490b57cec5SDimitry Andric } 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric LLVM_DUMP_METHOD void StackColoring::dump() const { 5520b57cec5SDimitry Andric for (MachineBasicBlock *MBB : depth_first(MF)) { 5530b57cec5SDimitry Andric dbgs() << "Inspecting block #" << MBB->getNumber() << " [" 5540b57cec5SDimitry Andric << MBB->getName() << "]\n"; 5550b57cec5SDimitry Andric dumpBB(MBB); 5560b57cec5SDimitry Andric } 5570b57cec5SDimitry Andric } 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const { 5600b57cec5SDimitry Andric for (unsigned I = 0, E = Intervals.size(); I != E; ++I) { 5610b57cec5SDimitry Andric dbgs() << "Interval[" << I << "]:\n"; 5620b57cec5SDimitry Andric Intervals[I]->dump(); 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric } 5650b57cec5SDimitry Andric #endif 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric static inline int getStartOrEndSlot(const MachineInstr &MI) 5680b57cec5SDimitry Andric { 5690b57cec5SDimitry Andric assert((MI.getOpcode() == TargetOpcode::LIFETIME_START || 5700b57cec5SDimitry Andric MI.getOpcode() == TargetOpcode::LIFETIME_END) && 5710b57cec5SDimitry Andric "Expected LIFETIME_START or LIFETIME_END op"); 5720b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(0); 5730b57cec5SDimitry Andric int Slot = MO.getIndex(); 5740b57cec5SDimitry Andric if (Slot >= 0) 5750b57cec5SDimitry Andric return Slot; 5760b57cec5SDimitry Andric return -1; 5770b57cec5SDimitry Andric } 5780b57cec5SDimitry Andric 5790b57cec5SDimitry Andric // At the moment the only way to end a variable lifetime is with 5800b57cec5SDimitry Andric // a VARIABLE_LIFETIME op (which can't contain a start). If things 5810b57cec5SDimitry Andric // change and the IR allows for a single inst that both begins 5820b57cec5SDimitry Andric // and ends lifetime(s), this interface will need to be reworked. 5830b57cec5SDimitry Andric bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, 5840b57cec5SDimitry Andric SmallVector<int, 4> &slots, 5850b57cec5SDimitry Andric bool &isStart) { 5860b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::LIFETIME_START || 5870b57cec5SDimitry Andric MI.getOpcode() == TargetOpcode::LIFETIME_END) { 5880b57cec5SDimitry Andric int Slot = getStartOrEndSlot(MI); 5890b57cec5SDimitry Andric if (Slot < 0) 5900b57cec5SDimitry Andric return false; 5910b57cec5SDimitry Andric if (!InterestingSlots.test(Slot)) 5920b57cec5SDimitry Andric return false; 5930b57cec5SDimitry Andric slots.push_back(Slot); 5940b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::LIFETIME_END) { 5950b57cec5SDimitry Andric isStart = false; 5960b57cec5SDimitry Andric return true; 5970b57cec5SDimitry Andric } 5980b57cec5SDimitry Andric if (!applyFirstUse(Slot)) { 5990b57cec5SDimitry Andric isStart = true; 6000b57cec5SDimitry Andric return true; 6010b57cec5SDimitry Andric } 6020b57cec5SDimitry Andric } else if (LifetimeStartOnFirstUse && !ProtectFromEscapedAllocas) { 6030b57cec5SDimitry Andric if (!MI.isDebugInstr()) { 6040b57cec5SDimitry Andric bool found = false; 6050b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 6060b57cec5SDimitry Andric if (!MO.isFI()) 6070b57cec5SDimitry Andric continue; 6080b57cec5SDimitry Andric int Slot = MO.getIndex(); 6090b57cec5SDimitry Andric if (Slot<0) 6100b57cec5SDimitry Andric continue; 6110b57cec5SDimitry Andric if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) { 6120b57cec5SDimitry Andric slots.push_back(Slot); 6130b57cec5SDimitry Andric found = true; 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric if (found) { 6170b57cec5SDimitry Andric isStart = true; 6180b57cec5SDimitry Andric return true; 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric return false; 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric unsigned StackColoring::collectMarkers(unsigned NumSlot) { 6260b57cec5SDimitry Andric unsigned MarkersFound = 0; 6270b57cec5SDimitry Andric BlockBitVecMap SeenStartMap; 6280b57cec5SDimitry Andric InterestingSlots.clear(); 6290b57cec5SDimitry Andric InterestingSlots.resize(NumSlot); 6300b57cec5SDimitry Andric ConservativeSlots.clear(); 6310b57cec5SDimitry Andric ConservativeSlots.resize(NumSlot); 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric // number of start and end lifetime ops for each slot 6340b57cec5SDimitry Andric SmallVector<int, 8> NumStartLifetimes(NumSlot, 0); 6350b57cec5SDimitry Andric SmallVector<int, 8> NumEndLifetimes(NumSlot, 0); 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric // Step 1: collect markers and populate the "InterestingSlots" 6380b57cec5SDimitry Andric // and "ConservativeSlots" sets. 6390b57cec5SDimitry Andric for (MachineBasicBlock *MBB : depth_first(MF)) { 6400b57cec5SDimitry Andric // Compute the set of slots for which we've seen a START marker but have 6410b57cec5SDimitry Andric // not yet seen an END marker at this point in the walk (e.g. on entry 6420b57cec5SDimitry Andric // to this bb). 6430b57cec5SDimitry Andric BitVector BetweenStartEnd; 6440b57cec5SDimitry Andric BetweenStartEnd.resize(NumSlot); 6450b57cec5SDimitry Andric for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), 6460b57cec5SDimitry Andric PE = MBB->pred_end(); PI != PE; ++PI) { 6470b57cec5SDimitry Andric BlockBitVecMap::const_iterator I = SeenStartMap.find(*PI); 6480b57cec5SDimitry Andric if (I != SeenStartMap.end()) { 6490b57cec5SDimitry Andric BetweenStartEnd |= I->second; 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric // Walk the instructions in the block to look for start/end ops. 6540b57cec5SDimitry Andric for (MachineInstr &MI : *MBB) { 6550b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::LIFETIME_START || 6560b57cec5SDimitry Andric MI.getOpcode() == TargetOpcode::LIFETIME_END) { 6570b57cec5SDimitry Andric int Slot = getStartOrEndSlot(MI); 6580b57cec5SDimitry Andric if (Slot < 0) 6590b57cec5SDimitry Andric continue; 6600b57cec5SDimitry Andric InterestingSlots.set(Slot); 6610b57cec5SDimitry Andric if (MI.getOpcode() == TargetOpcode::LIFETIME_START) { 6620b57cec5SDimitry Andric BetweenStartEnd.set(Slot); 6630b57cec5SDimitry Andric NumStartLifetimes[Slot] += 1; 6640b57cec5SDimitry Andric } else { 6650b57cec5SDimitry Andric BetweenStartEnd.reset(Slot); 6660b57cec5SDimitry Andric NumEndLifetimes[Slot] += 1; 6670b57cec5SDimitry Andric } 6680b57cec5SDimitry Andric const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); 6690b57cec5SDimitry Andric if (Allocation) { 6700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found a lifetime "); 6710b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START 6720b57cec5SDimitry Andric ? "start" 6730b57cec5SDimitry Andric : "end")); 6740b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " marker for slot #" << Slot); 6750b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 6760b57cec5SDimitry Andric << " with allocation: " << Allocation->getName() << "\n"); 6770b57cec5SDimitry Andric } 6780b57cec5SDimitry Andric Markers.push_back(&MI); 6790b57cec5SDimitry Andric MarkersFound += 1; 6800b57cec5SDimitry Andric } else { 6810b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 6820b57cec5SDimitry Andric if (!MO.isFI()) 6830b57cec5SDimitry Andric continue; 6840b57cec5SDimitry Andric int Slot = MO.getIndex(); 6850b57cec5SDimitry Andric if (Slot < 0) 6860b57cec5SDimitry Andric continue; 6870b57cec5SDimitry Andric if (! BetweenStartEnd.test(Slot)) { 6880b57cec5SDimitry Andric ConservativeSlots.set(Slot); 6890b57cec5SDimitry Andric } 6900b57cec5SDimitry Andric } 6910b57cec5SDimitry Andric } 6920b57cec5SDimitry Andric } 6930b57cec5SDimitry Andric BitVector &SeenStart = SeenStartMap[MBB]; 6940b57cec5SDimitry Andric SeenStart |= BetweenStartEnd; 6950b57cec5SDimitry Andric } 6960b57cec5SDimitry Andric if (!MarkersFound) { 6970b57cec5SDimitry Andric return 0; 6980b57cec5SDimitry Andric } 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric // PR27903: slots with multiple start or end lifetime ops are not 7010b57cec5SDimitry Andric // safe to enable for "lifetime-start-on-first-use". 7020b57cec5SDimitry Andric for (unsigned slot = 0; slot < NumSlot; ++slot) 7030b57cec5SDimitry Andric if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1) 7040b57cec5SDimitry Andric ConservativeSlots.set(slot); 7050b57cec5SDimitry Andric LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots)); 7060b57cec5SDimitry Andric 7070b57cec5SDimitry Andric // Step 2: compute begin/end sets for each block 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric // NOTE: We use a depth-first iteration to ensure that we obtain a 7100b57cec5SDimitry Andric // deterministic numbering. 7110b57cec5SDimitry Andric for (MachineBasicBlock *MBB : depth_first(MF)) { 7120b57cec5SDimitry Andric // Assign a serial number to this basic block. 7130b57cec5SDimitry Andric BasicBlocks[MBB] = BasicBlockNumbering.size(); 7140b57cec5SDimitry Andric BasicBlockNumbering.push_back(MBB); 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric // Keep a reference to avoid repeated lookups. 7170b57cec5SDimitry Andric BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB]; 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric BlockInfo.Begin.resize(NumSlot); 7200b57cec5SDimitry Andric BlockInfo.End.resize(NumSlot); 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric SmallVector<int, 4> slots; 7230b57cec5SDimitry Andric for (MachineInstr &MI : *MBB) { 7240b57cec5SDimitry Andric bool isStart = false; 7250b57cec5SDimitry Andric slots.clear(); 7260b57cec5SDimitry Andric if (isLifetimeStartOrEnd(MI, slots, isStart)) { 7270b57cec5SDimitry Andric if (!isStart) { 7280b57cec5SDimitry Andric assert(slots.size() == 1 && "unexpected: MI ends multiple slots"); 7290b57cec5SDimitry Andric int Slot = slots[0]; 7300b57cec5SDimitry Andric if (BlockInfo.Begin.test(Slot)) { 7310b57cec5SDimitry Andric BlockInfo.Begin.reset(Slot); 7320b57cec5SDimitry Andric } 7330b57cec5SDimitry Andric BlockInfo.End.set(Slot); 7340b57cec5SDimitry Andric } else { 7350b57cec5SDimitry Andric for (auto Slot : slots) { 7360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot); 7370b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 7380b57cec5SDimitry Andric << " at " << printMBBReference(*MBB) << " index "); 7390b57cec5SDimitry Andric LLVM_DEBUG(Indexes->getInstructionIndex(MI).print(dbgs())); 7400b57cec5SDimitry Andric const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); 7410b57cec5SDimitry Andric if (Allocation) { 7420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 7430b57cec5SDimitry Andric << " with allocation: " << Allocation->getName()); 7440b57cec5SDimitry Andric } 7450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n"); 7460b57cec5SDimitry Andric if (BlockInfo.End.test(Slot)) { 7470b57cec5SDimitry Andric BlockInfo.End.reset(Slot); 7480b57cec5SDimitry Andric } 7490b57cec5SDimitry Andric BlockInfo.Begin.set(Slot); 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric } 7520b57cec5SDimitry Andric } 7530b57cec5SDimitry Andric } 7540b57cec5SDimitry Andric } 7550b57cec5SDimitry Andric 7560b57cec5SDimitry Andric // Update statistics. 7570b57cec5SDimitry Andric NumMarkerSeen += MarkersFound; 7580b57cec5SDimitry Andric return MarkersFound; 7590b57cec5SDimitry Andric } 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric void StackColoring::calculateLocalLiveness() { 7620b57cec5SDimitry Andric unsigned NumIters = 0; 7630b57cec5SDimitry Andric bool changed = true; 7640b57cec5SDimitry Andric while (changed) { 7650b57cec5SDimitry Andric changed = false; 7660b57cec5SDimitry Andric ++NumIters; 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andric for (const MachineBasicBlock *BB : BasicBlockNumbering) { 7690b57cec5SDimitry Andric // Use an iterator to avoid repeated lookups. 7700b57cec5SDimitry Andric LivenessMap::iterator BI = BlockLiveness.find(BB); 7710b57cec5SDimitry Andric assert(BI != BlockLiveness.end() && "Block not found"); 7720b57cec5SDimitry Andric BlockLifetimeInfo &BlockInfo = BI->second; 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric // Compute LiveIn by unioning together the LiveOut sets of all preds. 7750b57cec5SDimitry Andric BitVector LocalLiveIn; 7760b57cec5SDimitry Andric for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), 7770b57cec5SDimitry Andric PE = BB->pred_end(); PI != PE; ++PI) { 7780b57cec5SDimitry Andric LivenessMap::const_iterator I = BlockLiveness.find(*PI); 7790b57cec5SDimitry Andric // PR37130: transformations prior to stack coloring can 7800b57cec5SDimitry Andric // sometimes leave behind statically unreachable blocks; these 7810b57cec5SDimitry Andric // can be safely skipped here. 7820b57cec5SDimitry Andric if (I != BlockLiveness.end()) 7830b57cec5SDimitry Andric LocalLiveIn |= I->second.LiveOut; 7840b57cec5SDimitry Andric } 7850b57cec5SDimitry Andric 7860b57cec5SDimitry Andric // Compute LiveOut by subtracting out lifetimes that end in this 7870b57cec5SDimitry Andric // block, then adding in lifetimes that begin in this block. If 7880b57cec5SDimitry Andric // we have both BEGIN and END markers in the same basic block 7890b57cec5SDimitry Andric // then we know that the BEGIN marker comes after the END, 7900b57cec5SDimitry Andric // because we already handle the case where the BEGIN comes 7910b57cec5SDimitry Andric // before the END when collecting the markers (and building the 7920b57cec5SDimitry Andric // BEGIN/END vectors). 7930b57cec5SDimitry Andric BitVector LocalLiveOut = LocalLiveIn; 7940b57cec5SDimitry Andric LocalLiveOut.reset(BlockInfo.End); 7950b57cec5SDimitry Andric LocalLiveOut |= BlockInfo.Begin; 7960b57cec5SDimitry Andric 7970b57cec5SDimitry Andric // Update block LiveIn set, noting whether it has changed. 7980b57cec5SDimitry Andric if (LocalLiveIn.test(BlockInfo.LiveIn)) { 7990b57cec5SDimitry Andric changed = true; 8000b57cec5SDimitry Andric BlockInfo.LiveIn |= LocalLiveIn; 8010b57cec5SDimitry Andric } 8020b57cec5SDimitry Andric 8030b57cec5SDimitry Andric // Update block LiveOut set, noting whether it has changed. 8040b57cec5SDimitry Andric if (LocalLiveOut.test(BlockInfo.LiveOut)) { 8050b57cec5SDimitry Andric changed = true; 8060b57cec5SDimitry Andric BlockInfo.LiveOut |= LocalLiveOut; 8070b57cec5SDimitry Andric } 8080b57cec5SDimitry Andric } 8090b57cec5SDimitry Andric } // while changed. 8100b57cec5SDimitry Andric 8110b57cec5SDimitry Andric NumIterations = NumIters; 8120b57cec5SDimitry Andric } 8130b57cec5SDimitry Andric 8140b57cec5SDimitry Andric void StackColoring::calculateLiveIntervals(unsigned NumSlots) { 8150b57cec5SDimitry Andric SmallVector<SlotIndex, 16> Starts; 8160b57cec5SDimitry Andric SmallVector<bool, 16> DefinitelyInUse; 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric // For each block, find which slots are active within this block 8190b57cec5SDimitry Andric // and update the live intervals. 8200b57cec5SDimitry Andric for (const MachineBasicBlock &MBB : *MF) { 8210b57cec5SDimitry Andric Starts.clear(); 8220b57cec5SDimitry Andric Starts.resize(NumSlots); 8230b57cec5SDimitry Andric DefinitelyInUse.clear(); 8240b57cec5SDimitry Andric DefinitelyInUse.resize(NumSlots); 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric // Start the interval of the slots that we previously found to be 'in-use'. 8270b57cec5SDimitry Andric BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; 8280b57cec5SDimitry Andric for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; 8290b57cec5SDimitry Andric pos = MBBLiveness.LiveIn.find_next(pos)) { 8300b57cec5SDimitry Andric Starts[pos] = Indexes->getMBBStartIdx(&MBB); 8310b57cec5SDimitry Andric } 8320b57cec5SDimitry Andric 8330b57cec5SDimitry Andric // Create the interval for the basic blocks containing lifetime begin/end. 8340b57cec5SDimitry Andric for (const MachineInstr &MI : MBB) { 8350b57cec5SDimitry Andric SmallVector<int, 4> slots; 8360b57cec5SDimitry Andric bool IsStart = false; 8370b57cec5SDimitry Andric if (!isLifetimeStartOrEnd(MI, slots, IsStart)) 8380b57cec5SDimitry Andric continue; 8390b57cec5SDimitry Andric SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); 8400b57cec5SDimitry Andric for (auto Slot : slots) { 8410b57cec5SDimitry Andric if (IsStart) { 8420b57cec5SDimitry Andric // If a slot is already definitely in use, we don't have to emit 8430b57cec5SDimitry Andric // a new start marker because there is already a pre-existing 8440b57cec5SDimitry Andric // one. 8450b57cec5SDimitry Andric if (!DefinitelyInUse[Slot]) { 8460b57cec5SDimitry Andric LiveStarts[Slot].push_back(ThisIndex); 8470b57cec5SDimitry Andric DefinitelyInUse[Slot] = true; 8480b57cec5SDimitry Andric } 8490b57cec5SDimitry Andric if (!Starts[Slot].isValid()) 8500b57cec5SDimitry Andric Starts[Slot] = ThisIndex; 8510b57cec5SDimitry Andric } else { 8520b57cec5SDimitry Andric if (Starts[Slot].isValid()) { 8530b57cec5SDimitry Andric VNInfo *VNI = Intervals[Slot]->getValNumInfo(0); 8540b57cec5SDimitry Andric Intervals[Slot]->addSegment( 8550b57cec5SDimitry Andric LiveInterval::Segment(Starts[Slot], ThisIndex, VNI)); 8560b57cec5SDimitry Andric Starts[Slot] = SlotIndex(); // Invalidate the start index 8570b57cec5SDimitry Andric DefinitelyInUse[Slot] = false; 8580b57cec5SDimitry Andric } 8590b57cec5SDimitry Andric } 8600b57cec5SDimitry Andric } 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric // Finish up started segments 8640b57cec5SDimitry Andric for (unsigned i = 0; i < NumSlots; ++i) { 8650b57cec5SDimitry Andric if (!Starts[i].isValid()) 8660b57cec5SDimitry Andric continue; 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andric SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB); 8690b57cec5SDimitry Andric VNInfo *VNI = Intervals[i]->getValNumInfo(0); 8700b57cec5SDimitry Andric Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI)); 8710b57cec5SDimitry Andric } 8720b57cec5SDimitry Andric } 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric bool StackColoring::removeAllMarkers() { 8760b57cec5SDimitry Andric unsigned Count = 0; 8770b57cec5SDimitry Andric for (MachineInstr *MI : Markers) { 8780b57cec5SDimitry Andric MI->eraseFromParent(); 8790b57cec5SDimitry Andric Count++; 8800b57cec5SDimitry Andric } 8810b57cec5SDimitry Andric Markers.clear(); 8820b57cec5SDimitry Andric 8830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n"); 8840b57cec5SDimitry Andric return Count; 8850b57cec5SDimitry Andric } 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { 8880b57cec5SDimitry Andric unsigned FixedInstr = 0; 8890b57cec5SDimitry Andric unsigned FixedMemOp = 0; 8900b57cec5SDimitry Andric unsigned FixedDbg = 0; 8910b57cec5SDimitry Andric 8920b57cec5SDimitry Andric // Remap debug information that refers to stack slots. 8930b57cec5SDimitry Andric for (auto &VI : MF->getVariableDbgInfo()) { 8940b57cec5SDimitry Andric if (!VI.Var) 8950b57cec5SDimitry Andric continue; 8960b57cec5SDimitry Andric if (SlotRemap.count(VI.Slot)) { 8970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Remapping debug info for [" 8980b57cec5SDimitry Andric << cast<DILocalVariable>(VI.Var)->getName() << "].\n"); 8990b57cec5SDimitry Andric VI.Slot = SlotRemap[VI.Slot]; 9000b57cec5SDimitry Andric FixedDbg++; 9010b57cec5SDimitry Andric } 9020b57cec5SDimitry Andric } 9030b57cec5SDimitry Andric 9040b57cec5SDimitry Andric // Keep a list of *allocas* which need to be remapped. 9050b57cec5SDimitry Andric DenseMap<const AllocaInst*, const AllocaInst*> Allocas; 9060b57cec5SDimitry Andric 9070b57cec5SDimitry Andric // Keep a list of allocas which has been affected by the remap. 9080b57cec5SDimitry Andric SmallPtrSet<const AllocaInst*, 32> MergedAllocas; 9090b57cec5SDimitry Andric 9100b57cec5SDimitry Andric for (const std::pair<int, int> &SI : SlotRemap) { 9110b57cec5SDimitry Andric const AllocaInst *From = MFI->getObjectAllocation(SI.first); 9120b57cec5SDimitry Andric const AllocaInst *To = MFI->getObjectAllocation(SI.second); 9130b57cec5SDimitry Andric assert(To && From && "Invalid allocation object"); 9140b57cec5SDimitry Andric Allocas[From] = To; 9150b57cec5SDimitry Andric 9160b57cec5SDimitry Andric // AA might be used later for instruction scheduling, and we need it to be 9170b57cec5SDimitry Andric // able to deduce the correct aliasing releationships between pointers 9180b57cec5SDimitry Andric // derived from the alloca being remapped and the target of that remapping. 9190b57cec5SDimitry Andric // The only safe way, without directly informing AA about the remapping 9200b57cec5SDimitry Andric // somehow, is to directly update the IR to reflect the change being made 9210b57cec5SDimitry Andric // here. 9220b57cec5SDimitry Andric Instruction *Inst = const_cast<AllocaInst *>(To); 9230b57cec5SDimitry Andric if (From->getType() != To->getType()) { 9240b57cec5SDimitry Andric BitCastInst *Cast = new BitCastInst(Inst, From->getType()); 9250b57cec5SDimitry Andric Cast->insertAfter(Inst); 9260b57cec5SDimitry Andric Inst = Cast; 9270b57cec5SDimitry Andric } 9280b57cec5SDimitry Andric 9290b57cec5SDimitry Andric // We keep both slots to maintain AliasAnalysis metadata later. 9300b57cec5SDimitry Andric MergedAllocas.insert(From); 9310b57cec5SDimitry Andric MergedAllocas.insert(To); 9320b57cec5SDimitry Andric 9330b57cec5SDimitry Andric // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf 9340b57cec5SDimitry Andric // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure 9350b57cec5SDimitry Andric // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray. 9360b57cec5SDimitry Andric MachineFrameInfo::SSPLayoutKind FromKind 9370b57cec5SDimitry Andric = MFI->getObjectSSPLayout(SI.first); 9380b57cec5SDimitry Andric MachineFrameInfo::SSPLayoutKind ToKind = MFI->getObjectSSPLayout(SI.second); 9390b57cec5SDimitry Andric if (FromKind != MachineFrameInfo::SSPLK_None && 9400b57cec5SDimitry Andric (ToKind == MachineFrameInfo::SSPLK_None || 9410b57cec5SDimitry Andric (ToKind != MachineFrameInfo::SSPLK_LargeArray && 9420b57cec5SDimitry Andric FromKind != MachineFrameInfo::SSPLK_AddrOf))) 9430b57cec5SDimitry Andric MFI->setObjectSSPLayout(SI.second, FromKind); 9440b57cec5SDimitry Andric 9450b57cec5SDimitry Andric // The new alloca might not be valid in a llvm.dbg.declare for this 9460b57cec5SDimitry Andric // variable, so undef out the use to make the verifier happy. 9470b57cec5SDimitry Andric AllocaInst *FromAI = const_cast<AllocaInst *>(From); 9480b57cec5SDimitry Andric if (FromAI->isUsedByMetadata()) 9490b57cec5SDimitry Andric ValueAsMetadata::handleRAUW(FromAI, UndefValue::get(FromAI->getType())); 9500b57cec5SDimitry Andric for (auto &Use : FromAI->uses()) { 9510b57cec5SDimitry Andric if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get())) 9520b57cec5SDimitry Andric if (BCI->isUsedByMetadata()) 9530b57cec5SDimitry Andric ValueAsMetadata::handleRAUW(BCI, UndefValue::get(BCI->getType())); 9540b57cec5SDimitry Andric } 9550b57cec5SDimitry Andric 9560b57cec5SDimitry Andric // Note that this will not replace uses in MMOs (which we'll update below), 9570b57cec5SDimitry Andric // or anywhere else (which is why we won't delete the original 9580b57cec5SDimitry Andric // instruction). 9590b57cec5SDimitry Andric FromAI->replaceAllUsesWith(Inst); 9600b57cec5SDimitry Andric } 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric // Remap all instructions to the new stack slots. 96355e4f9d5SDimitry Andric std::vector<std::vector<MachineMemOperand *>> SSRefs(MFI->getObjectIndexEnd()); 9640b57cec5SDimitry Andric for (MachineBasicBlock &BB : *MF) 9650b57cec5SDimitry Andric for (MachineInstr &I : BB) { 9660b57cec5SDimitry Andric // Skip lifetime markers. We'll remove them soon. 9670b57cec5SDimitry Andric if (I.getOpcode() == TargetOpcode::LIFETIME_START || 9680b57cec5SDimitry Andric I.getOpcode() == TargetOpcode::LIFETIME_END) 9690b57cec5SDimitry Andric continue; 9700b57cec5SDimitry Andric 9710b57cec5SDimitry Andric // Update the MachineMemOperand to use the new alloca. 9720b57cec5SDimitry Andric for (MachineMemOperand *MMO : I.memoperands()) { 9730b57cec5SDimitry Andric // We've replaced IR-level uses of the remapped allocas, so we only 9740b57cec5SDimitry Andric // need to replace direct uses here. 9750b57cec5SDimitry Andric const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue()); 9760b57cec5SDimitry Andric if (!AI) 9770b57cec5SDimitry Andric continue; 9780b57cec5SDimitry Andric 9790b57cec5SDimitry Andric if (!Allocas.count(AI)) 9800b57cec5SDimitry Andric continue; 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric MMO->setValue(Allocas[AI]); 9830b57cec5SDimitry Andric FixedMemOp++; 9840b57cec5SDimitry Andric } 9850b57cec5SDimitry Andric 9860b57cec5SDimitry Andric // Update all of the machine instruction operands. 9870b57cec5SDimitry Andric for (MachineOperand &MO : I.operands()) { 9880b57cec5SDimitry Andric if (!MO.isFI()) 9890b57cec5SDimitry Andric continue; 9900b57cec5SDimitry Andric int FromSlot = MO.getIndex(); 9910b57cec5SDimitry Andric 9920b57cec5SDimitry Andric // Don't touch arguments. 9930b57cec5SDimitry Andric if (FromSlot<0) 9940b57cec5SDimitry Andric continue; 9950b57cec5SDimitry Andric 9960b57cec5SDimitry Andric // Only look at mapped slots. 9970b57cec5SDimitry Andric if (!SlotRemap.count(FromSlot)) 9980b57cec5SDimitry Andric continue; 9990b57cec5SDimitry Andric 10000b57cec5SDimitry Andric // In a debug build, check that the instruction that we are modifying is 10010b57cec5SDimitry Andric // inside the expected live range. If the instruction is not inside 10020b57cec5SDimitry Andric // the calculated range then it means that the alloca usage moved 10030b57cec5SDimitry Andric // outside of the lifetime markers, or that the user has a bug. 10040b57cec5SDimitry Andric // NOTE: Alloca address calculations which happen outside the lifetime 10050b57cec5SDimitry Andric // zone are okay, despite the fact that we don't have a good way 10060b57cec5SDimitry Andric // for validating all of the usages of the calculation. 10070b57cec5SDimitry Andric #ifndef NDEBUG 1008480093f4SDimitry Andric bool TouchesMemory = I.mayLoadOrStore(); 10090b57cec5SDimitry Andric // If we *don't* protect the user from escaped allocas, don't bother 10100b57cec5SDimitry Andric // validating the instructions. 10110b57cec5SDimitry Andric if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) { 10120b57cec5SDimitry Andric SlotIndex Index = Indexes->getInstructionIndex(I); 10130b57cec5SDimitry Andric const LiveInterval *Interval = &*Intervals[FromSlot]; 10140b57cec5SDimitry Andric assert(Interval->find(Index) != Interval->end() && 10150b57cec5SDimitry Andric "Found instruction usage outside of live range."); 10160b57cec5SDimitry Andric } 10170b57cec5SDimitry Andric #endif 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric // Fix the machine instructions. 10200b57cec5SDimitry Andric int ToSlot = SlotRemap[FromSlot]; 10210b57cec5SDimitry Andric MO.setIndex(ToSlot); 10220b57cec5SDimitry Andric FixedInstr++; 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric // We adjust AliasAnalysis information for merged stack slots. 10260b57cec5SDimitry Andric SmallVector<MachineMemOperand *, 2> NewMMOs; 10270b57cec5SDimitry Andric bool ReplaceMemOps = false; 10280b57cec5SDimitry Andric for (MachineMemOperand *MMO : I.memoperands()) { 102955e4f9d5SDimitry Andric // Collect MachineMemOperands which reference 103055e4f9d5SDimitry Andric // FixedStackPseudoSourceValues with old frame indices. 103155e4f9d5SDimitry Andric if (const auto *FSV = dyn_cast_or_null<FixedStackPseudoSourceValue>( 103255e4f9d5SDimitry Andric MMO->getPseudoValue())) { 103355e4f9d5SDimitry Andric int FI = FSV->getFrameIndex(); 103455e4f9d5SDimitry Andric auto To = SlotRemap.find(FI); 103555e4f9d5SDimitry Andric if (To != SlotRemap.end()) 103655e4f9d5SDimitry Andric SSRefs[FI].push_back(MMO); 103755e4f9d5SDimitry Andric } 103855e4f9d5SDimitry Andric 10390b57cec5SDimitry Andric // If this memory location can be a slot remapped here, 10400b57cec5SDimitry Andric // we remove AA information. 10410b57cec5SDimitry Andric bool MayHaveConflictingAAMD = false; 10420b57cec5SDimitry Andric if (MMO->getAAInfo()) { 10430b57cec5SDimitry Andric if (const Value *MMOV = MMO->getValue()) { 10440b57cec5SDimitry Andric SmallVector<Value *, 4> Objs; 10450b57cec5SDimitry Andric getUnderlyingObjectsForCodeGen(MMOV, Objs, MF->getDataLayout()); 10460b57cec5SDimitry Andric 10470b57cec5SDimitry Andric if (Objs.empty()) 10480b57cec5SDimitry Andric MayHaveConflictingAAMD = true; 10490b57cec5SDimitry Andric else 10500b57cec5SDimitry Andric for (Value *V : Objs) { 10510b57cec5SDimitry Andric // If this memory location comes from a known stack slot 10520b57cec5SDimitry Andric // that is not remapped, we continue checking. 10530b57cec5SDimitry Andric // Otherwise, we need to invalidate AA infomation. 10540b57cec5SDimitry Andric const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V); 10550b57cec5SDimitry Andric if (AI && MergedAllocas.count(AI)) { 10560b57cec5SDimitry Andric MayHaveConflictingAAMD = true; 10570b57cec5SDimitry Andric break; 10580b57cec5SDimitry Andric } 10590b57cec5SDimitry Andric } 10600b57cec5SDimitry Andric } 10610b57cec5SDimitry Andric } 10620b57cec5SDimitry Andric if (MayHaveConflictingAAMD) { 10630b57cec5SDimitry Andric NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes())); 10640b57cec5SDimitry Andric ReplaceMemOps = true; 10650b57cec5SDimitry Andric } else { 10660b57cec5SDimitry Andric NewMMOs.push_back(MMO); 10670b57cec5SDimitry Andric } 10680b57cec5SDimitry Andric } 10690b57cec5SDimitry Andric 10700b57cec5SDimitry Andric // If any memory operand is updated, set memory references of 10710b57cec5SDimitry Andric // this instruction. 10720b57cec5SDimitry Andric if (ReplaceMemOps) 10730b57cec5SDimitry Andric I.setMemRefs(*MF, NewMMOs); 10740b57cec5SDimitry Andric } 10750b57cec5SDimitry Andric 107655e4f9d5SDimitry Andric // Rewrite MachineMemOperands that reference old frame indices. 107755e4f9d5SDimitry Andric for (auto E : enumerate(SSRefs)) { 107855e4f9d5SDimitry Andric const PseudoSourceValue *NewSV = 107955e4f9d5SDimitry Andric MF->getPSVManager().getFixedStack(SlotRemap[E.index()]); 108055e4f9d5SDimitry Andric for (MachineMemOperand *Ref : E.value()) 108155e4f9d5SDimitry Andric Ref->setValue(NewSV); 108255e4f9d5SDimitry Andric } 108355e4f9d5SDimitry Andric 10840b57cec5SDimitry Andric // Update the location of C++ catch objects for the MSVC personality routine. 10850b57cec5SDimitry Andric if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo()) 10860b57cec5SDimitry Andric for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap) 10870b57cec5SDimitry Andric for (WinEHHandlerType &H : TBME.HandlerArray) 10880b57cec5SDimitry Andric if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() && 10890b57cec5SDimitry Andric SlotRemap.count(H.CatchObj.FrameIndex)) 10900b57cec5SDimitry Andric H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex]; 10910b57cec5SDimitry Andric 10920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n"); 10930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n"); 10940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n"); 10950b57cec5SDimitry Andric } 10960b57cec5SDimitry Andric 10970b57cec5SDimitry Andric void StackColoring::removeInvalidSlotRanges() { 10980b57cec5SDimitry Andric for (MachineBasicBlock &BB : *MF) 10990b57cec5SDimitry Andric for (MachineInstr &I : BB) { 11000b57cec5SDimitry Andric if (I.getOpcode() == TargetOpcode::LIFETIME_START || 11010b57cec5SDimitry Andric I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr()) 11020b57cec5SDimitry Andric continue; 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric // Some intervals are suspicious! In some cases we find address 11050b57cec5SDimitry Andric // calculations outside of the lifetime zone, but not actual memory 11060b57cec5SDimitry Andric // read or write. Memory accesses outside of the lifetime zone are a clear 11070b57cec5SDimitry Andric // violation, but address calculations are okay. This can happen when 11080b57cec5SDimitry Andric // GEPs are hoisted outside of the lifetime zone. 11090b57cec5SDimitry Andric // So, in here we only check instructions which can read or write memory. 11100b57cec5SDimitry Andric if (!I.mayLoad() && !I.mayStore()) 11110b57cec5SDimitry Andric continue; 11120b57cec5SDimitry Andric 11130b57cec5SDimitry Andric // Check all of the machine operands. 11140b57cec5SDimitry Andric for (const MachineOperand &MO : I.operands()) { 11150b57cec5SDimitry Andric if (!MO.isFI()) 11160b57cec5SDimitry Andric continue; 11170b57cec5SDimitry Andric 11180b57cec5SDimitry Andric int Slot = MO.getIndex(); 11190b57cec5SDimitry Andric 11200b57cec5SDimitry Andric if (Slot<0) 11210b57cec5SDimitry Andric continue; 11220b57cec5SDimitry Andric 11230b57cec5SDimitry Andric if (Intervals[Slot]->empty()) 11240b57cec5SDimitry Andric continue; 11250b57cec5SDimitry Andric 11260b57cec5SDimitry Andric // Check that the used slot is inside the calculated lifetime range. 11270b57cec5SDimitry Andric // If it is not, warn about it and invalidate the range. 11280b57cec5SDimitry Andric LiveInterval *Interval = &*Intervals[Slot]; 11290b57cec5SDimitry Andric SlotIndex Index = Indexes->getInstructionIndex(I); 11300b57cec5SDimitry Andric if (Interval->find(Index) == Interval->end()) { 11310b57cec5SDimitry Andric Interval->clear(); 11320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n"); 11330b57cec5SDimitry Andric EscapedAllocas++; 11340b57cec5SDimitry Andric } 11350b57cec5SDimitry Andric } 11360b57cec5SDimitry Andric } 11370b57cec5SDimitry Andric } 11380b57cec5SDimitry Andric 11390b57cec5SDimitry Andric void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, 11400b57cec5SDimitry Andric unsigned NumSlots) { 11410b57cec5SDimitry Andric // Expunge slot remap map. 11420b57cec5SDimitry Andric for (unsigned i=0; i < NumSlots; ++i) { 11430b57cec5SDimitry Andric // If we are remapping i 11440b57cec5SDimitry Andric if (SlotRemap.count(i)) { 11450b57cec5SDimitry Andric int Target = SlotRemap[i]; 11460b57cec5SDimitry Andric // As long as our target is mapped to something else, follow it. 11470b57cec5SDimitry Andric while (SlotRemap.count(Target)) { 11480b57cec5SDimitry Andric Target = SlotRemap[Target]; 11490b57cec5SDimitry Andric SlotRemap[i] = Target; 11500b57cec5SDimitry Andric } 11510b57cec5SDimitry Andric } 11520b57cec5SDimitry Andric } 11530b57cec5SDimitry Andric } 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric bool StackColoring::runOnMachineFunction(MachineFunction &Func) { 11560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n" 11570b57cec5SDimitry Andric << "********** Function: " << Func.getName() << '\n'); 11580b57cec5SDimitry Andric MF = &Func; 11590b57cec5SDimitry Andric MFI = &MF->getFrameInfo(); 11600b57cec5SDimitry Andric Indexes = &getAnalysis<SlotIndexes>(); 11610b57cec5SDimitry Andric BlockLiveness.clear(); 11620b57cec5SDimitry Andric BasicBlocks.clear(); 11630b57cec5SDimitry Andric BasicBlockNumbering.clear(); 11640b57cec5SDimitry Andric Markers.clear(); 11650b57cec5SDimitry Andric Intervals.clear(); 11660b57cec5SDimitry Andric LiveStarts.clear(); 11670b57cec5SDimitry Andric VNInfoAllocator.Reset(); 11680b57cec5SDimitry Andric 11690b57cec5SDimitry Andric unsigned NumSlots = MFI->getObjectIndexEnd(); 11700b57cec5SDimitry Andric 11710b57cec5SDimitry Andric // If there are no stack slots then there are no markers to remove. 11720b57cec5SDimitry Andric if (!NumSlots) 11730b57cec5SDimitry Andric return false; 11740b57cec5SDimitry Andric 11750b57cec5SDimitry Andric SmallVector<int, 8> SortedSlots; 11760b57cec5SDimitry Andric SortedSlots.reserve(NumSlots); 11770b57cec5SDimitry Andric Intervals.reserve(NumSlots); 11780b57cec5SDimitry Andric LiveStarts.resize(NumSlots); 11790b57cec5SDimitry Andric 11800b57cec5SDimitry Andric unsigned NumMarkers = collectMarkers(NumSlots); 11810b57cec5SDimitry Andric 11820b57cec5SDimitry Andric unsigned TotalSize = 0; 11830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots 11840b57cec5SDimitry Andric << " slots\n"); 11850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Slot structure:\n"); 11860b57cec5SDimitry Andric 11870b57cec5SDimitry Andric for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { 11880b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i) 11890b57cec5SDimitry Andric << " bytes.\n"); 11900b57cec5SDimitry Andric TotalSize += MFI->getObjectSize(i); 11910b57cec5SDimitry Andric } 11920b57cec5SDimitry Andric 11930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n"); 11940b57cec5SDimitry Andric 11950b57cec5SDimitry Andric // Don't continue because there are not enough lifetime markers, or the 11960b57cec5SDimitry Andric // stack is too small, or we are told not to optimize the slots. 11970b57cec5SDimitry Andric if (NumMarkers < 2 || TotalSize < 16 || DisableColoring || 11980b57cec5SDimitry Andric skipFunction(Func.getFunction())) { 11990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n"); 12000b57cec5SDimitry Andric return removeAllMarkers(); 12010b57cec5SDimitry Andric } 12020b57cec5SDimitry Andric 12030b57cec5SDimitry Andric for (unsigned i=0; i < NumSlots; ++i) { 12040b57cec5SDimitry Andric std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0)); 12050b57cec5SDimitry Andric LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); 12060b57cec5SDimitry Andric Intervals.push_back(std::move(LI)); 12070b57cec5SDimitry Andric SortedSlots.push_back(i); 12080b57cec5SDimitry Andric } 12090b57cec5SDimitry Andric 12100b57cec5SDimitry Andric // Calculate the liveness of each block. 12110b57cec5SDimitry Andric calculateLocalLiveness(); 12120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n"); 12130b57cec5SDimitry Andric LLVM_DEBUG(dump()); 12140b57cec5SDimitry Andric 12150b57cec5SDimitry Andric // Propagate the liveness information. 12160b57cec5SDimitry Andric calculateLiveIntervals(NumSlots); 12170b57cec5SDimitry Andric LLVM_DEBUG(dumpIntervals()); 12180b57cec5SDimitry Andric 12190b57cec5SDimitry Andric // Search for allocas which are used outside of the declared lifetime 12200b57cec5SDimitry Andric // markers. 12210b57cec5SDimitry Andric if (ProtectFromEscapedAllocas) 12220b57cec5SDimitry Andric removeInvalidSlotRanges(); 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric // Maps old slots to new slots. 12250b57cec5SDimitry Andric DenseMap<int, int> SlotRemap; 12260b57cec5SDimitry Andric unsigned RemovedSlots = 0; 12270b57cec5SDimitry Andric unsigned ReducedSize = 0; 12280b57cec5SDimitry Andric 12290b57cec5SDimitry Andric // Do not bother looking at empty intervals. 12300b57cec5SDimitry Andric for (unsigned I = 0; I < NumSlots; ++I) { 12310b57cec5SDimitry Andric if (Intervals[SortedSlots[I]]->empty()) 12320b57cec5SDimitry Andric SortedSlots[I] = -1; 12330b57cec5SDimitry Andric } 12340b57cec5SDimitry Andric 12350b57cec5SDimitry Andric // This is a simple greedy algorithm for merging allocas. First, sort the 12360b57cec5SDimitry Andric // slots, placing the largest slots first. Next, perform an n^2 scan and look 12370b57cec5SDimitry Andric // for disjoint slots. When you find disjoint slots, merge the samller one 12380b57cec5SDimitry Andric // into the bigger one and update the live interval. Remove the small alloca 12390b57cec5SDimitry Andric // and continue. 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric // Sort the slots according to their size. Place unused slots at the end. 12420b57cec5SDimitry Andric // Use stable sort to guarantee deterministic code generation. 12430b57cec5SDimitry Andric llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) { 12440b57cec5SDimitry Andric // We use -1 to denote a uninteresting slot. Place these slots at the end. 12450b57cec5SDimitry Andric if (LHS == -1) 12460b57cec5SDimitry Andric return false; 12470b57cec5SDimitry Andric if (RHS == -1) 12480b57cec5SDimitry Andric return true; 12490b57cec5SDimitry Andric // Sort according to size. 12500b57cec5SDimitry Andric return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); 12510b57cec5SDimitry Andric }); 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric for (auto &s : LiveStarts) 12540b57cec5SDimitry Andric llvm::sort(s); 12550b57cec5SDimitry Andric 12560b57cec5SDimitry Andric bool Changed = true; 12570b57cec5SDimitry Andric while (Changed) { 12580b57cec5SDimitry Andric Changed = false; 12590b57cec5SDimitry Andric for (unsigned I = 0; I < NumSlots; ++I) { 12600b57cec5SDimitry Andric if (SortedSlots[I] == -1) 12610b57cec5SDimitry Andric continue; 12620b57cec5SDimitry Andric 12630b57cec5SDimitry Andric for (unsigned J=I+1; J < NumSlots; ++J) { 12640b57cec5SDimitry Andric if (SortedSlots[J] == -1) 12650b57cec5SDimitry Andric continue; 12660b57cec5SDimitry Andric 12670b57cec5SDimitry Andric int FirstSlot = SortedSlots[I]; 12680b57cec5SDimitry Andric int SecondSlot = SortedSlots[J]; 12690b57cec5SDimitry Andric LiveInterval *First = &*Intervals[FirstSlot]; 12700b57cec5SDimitry Andric LiveInterval *Second = &*Intervals[SecondSlot]; 12710b57cec5SDimitry Andric auto &FirstS = LiveStarts[FirstSlot]; 12720b57cec5SDimitry Andric auto &SecondS = LiveStarts[SecondSlot]; 12730b57cec5SDimitry Andric assert(!First->empty() && !Second->empty() && "Found an empty range"); 12740b57cec5SDimitry Andric 12750b57cec5SDimitry Andric // Merge disjoint slots. This is a little bit tricky - see the 12760b57cec5SDimitry Andric // Implementation Notes section for an explanation. 12770b57cec5SDimitry Andric if (!First->isLiveAtIndexes(SecondS) && 12780b57cec5SDimitry Andric !Second->isLiveAtIndexes(FirstS)) { 12790b57cec5SDimitry Andric Changed = true; 12800b57cec5SDimitry Andric First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); 12810b57cec5SDimitry Andric 12820b57cec5SDimitry Andric int OldSize = FirstS.size(); 12830b57cec5SDimitry Andric FirstS.append(SecondS.begin(), SecondS.end()); 12840b57cec5SDimitry Andric auto Mid = FirstS.begin() + OldSize; 12850b57cec5SDimitry Andric std::inplace_merge(FirstS.begin(), Mid, FirstS.end()); 12860b57cec5SDimitry Andric 12870b57cec5SDimitry Andric SlotRemap[SecondSlot] = FirstSlot; 12880b57cec5SDimitry Andric SortedSlots[J] = -1; 12890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #" 12900b57cec5SDimitry Andric << SecondSlot << " together.\n"); 12910b57cec5SDimitry Andric unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), 12920b57cec5SDimitry Andric MFI->getObjectAlignment(SecondSlot)); 12930b57cec5SDimitry Andric 12940b57cec5SDimitry Andric assert(MFI->getObjectSize(FirstSlot) >= 12950b57cec5SDimitry Andric MFI->getObjectSize(SecondSlot) && 12960b57cec5SDimitry Andric "Merging a small object into a larger one"); 12970b57cec5SDimitry Andric 12980b57cec5SDimitry Andric RemovedSlots+=1; 12990b57cec5SDimitry Andric ReducedSize += MFI->getObjectSize(SecondSlot); 13000b57cec5SDimitry Andric MFI->setObjectAlignment(FirstSlot, MaxAlignment); 13010b57cec5SDimitry Andric MFI->RemoveStackObject(SecondSlot); 13020b57cec5SDimitry Andric } 13030b57cec5SDimitry Andric } 13040b57cec5SDimitry Andric } 13050b57cec5SDimitry Andric }// While changed. 13060b57cec5SDimitry Andric 13070b57cec5SDimitry Andric // Record statistics. 13080b57cec5SDimitry Andric StackSpaceSaved += ReducedSize; 13090b57cec5SDimitry Andric StackSlotMerged += RemovedSlots; 13100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved " 13110b57cec5SDimitry Andric << ReducedSize << " bytes\n"); 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andric // Scan the entire function and update all machine operands that use frame 13140b57cec5SDimitry Andric // indices to use the remapped frame index. 13150b57cec5SDimitry Andric expungeSlotMap(SlotRemap, NumSlots); 13160b57cec5SDimitry Andric remapInstructions(SlotRemap); 13170b57cec5SDimitry Andric 13180b57cec5SDimitry Andric return removeAllMarkers(); 13190b57cec5SDimitry Andric } 1320