1 //===- MVEGatherScatterLowering.cpp - Gather/Scatter lowering -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// This pass custom lowers llvm.gather and llvm.scatter instructions to
10 /// arm.mve.gather and arm.mve.scatter intrinsics, optimising the code to
11 /// produce a better final result as we go.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "ARM.h"
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMSubtarget.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/Analysis/TargetTransformInfo.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/CodeGen/TargetPassConfig.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/InitializePasses.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/InstrTypes.h"
30 #include "llvm/IR/Instruction.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Intrinsics.h"
34 #include "llvm/IR/IntrinsicsARM.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/PatternMatch.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/IR/Value.h"
39 #include "llvm/Pass.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Transforms/Utils/Local.h"
42 #include <algorithm>
43 #include <cassert>
44
45 using namespace llvm;
46
47 #define DEBUG_TYPE "mve-gather-scatter-lowering"
48
49 cl::opt<bool> EnableMaskedGatherScatters(
50 "enable-arm-maskedgatscat", cl::Hidden, cl::init(false),
51 cl::desc("Enable the generation of masked gathers and scatters"));
52
53 namespace {
54
55 class MVEGatherScatterLowering : public FunctionPass {
56 public:
57 static char ID; // Pass identification, replacement for typeid
58
MVEGatherScatterLowering()59 explicit MVEGatherScatterLowering() : FunctionPass(ID) {
60 initializeMVEGatherScatterLoweringPass(*PassRegistry::getPassRegistry());
61 }
62
63 bool runOnFunction(Function &F) override;
64
getPassName() const65 StringRef getPassName() const override {
66 return "MVE gather/scatter lowering";
67 }
68
getAnalysisUsage(AnalysisUsage & AU) const69 void getAnalysisUsage(AnalysisUsage &AU) const override {
70 AU.setPreservesCFG();
71 AU.addRequired<TargetPassConfig>();
72 AU.addRequired<LoopInfoWrapperPass>();
73 FunctionPass::getAnalysisUsage(AU);
74 }
75
76 private:
77 LoopInfo *LI = nullptr;
78
79 // Check this is a valid gather with correct alignment
80 bool isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize,
81 Align Alignment);
82 // Check whether Ptr is hidden behind a bitcast and look through it
83 void lookThroughBitcast(Value *&Ptr);
84 // Check for a getelementptr and deduce base and offsets from it, on success
85 // returning the base directly and the offsets indirectly using the Offsets
86 // argument
87 Value *checkGEP(Value *&Offsets, Type *Ty, GetElementPtrInst *GEP,
88 IRBuilder<> &Builder);
89 // Compute the scale of this gather/scatter instruction
90 int computeScale(unsigned GEPElemSize, unsigned MemoryElemSize);
91 // If the value is a constant, or derived from constants via additions
92 // and multilications, return its numeric value
93 Optional<int64_t> getIfConst(const Value *V);
94 // If Inst is an add instruction, check whether one summand is a
95 // constant. If so, scale this constant and return it together with
96 // the other summand.
97 std::pair<Value *, int64_t> getVarAndConst(Value *Inst, int TypeScale);
98
99 Value *lowerGather(IntrinsicInst *I);
100 // Create a gather from a base + vector of offsets
101 Value *tryCreateMaskedGatherOffset(IntrinsicInst *I, Value *Ptr,
102 Instruction *&Root, IRBuilder<> &Builder);
103 // Create a gather from a vector of pointers
104 Value *tryCreateMaskedGatherBase(IntrinsicInst *I, Value *Ptr,
105 IRBuilder<> &Builder, int64_t Increment = 0);
106 // Create an incrementing gather from a vector of pointers
107 Value *tryCreateMaskedGatherBaseWB(IntrinsicInst *I, Value *Ptr,
108 IRBuilder<> &Builder,
109 int64_t Increment = 0);
110
111 Value *lowerScatter(IntrinsicInst *I);
112 // Create a scatter to a base + vector of offsets
113 Value *tryCreateMaskedScatterOffset(IntrinsicInst *I, Value *Offsets,
114 IRBuilder<> &Builder);
115 // Create a scatter to a vector of pointers
116 Value *tryCreateMaskedScatterBase(IntrinsicInst *I, Value *Ptr,
117 IRBuilder<> &Builder,
118 int64_t Increment = 0);
119 // Create an incrementing scatter from a vector of pointers
120 Value *tryCreateMaskedScatterBaseWB(IntrinsicInst *I, Value *Ptr,
121 IRBuilder<> &Builder,
122 int64_t Increment = 0);
123
124 // QI gathers and scatters can increment their offsets on their own if
125 // the increment is a constant value (digit)
126 Value *tryCreateIncrementingGatScat(IntrinsicInst *I, Value *BasePtr,
127 Value *Ptr, GetElementPtrInst *GEP,
128 IRBuilder<> &Builder);
129 // QI gathers/scatters can increment their offsets on their own if the
130 // increment is a constant value (digit) - this creates a writeback QI
131 // gather/scatter
132 Value *tryCreateIncrementingWBGatScat(IntrinsicInst *I, Value *BasePtr,
133 Value *Ptr, unsigned TypeScale,
134 IRBuilder<> &Builder);
135 // Check whether these offsets could be moved out of the loop they're in
136 bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
137 // Pushes the given add out of the loop
138 void pushOutAdd(PHINode *&Phi, Value *OffsSecondOperand, unsigned StartIndex);
139 // Pushes the given mul out of the loop
140 void pushOutMul(PHINode *&Phi, Value *IncrementPerRound,
141 Value *OffsSecondOperand, unsigned LoopIncrement,
142 IRBuilder<> &Builder);
143 };
144
145 } // end anonymous namespace
146
147 char MVEGatherScatterLowering::ID = 0;
148
149 INITIALIZE_PASS(MVEGatherScatterLowering, DEBUG_TYPE,
150 "MVE gather/scattering lowering pass", false, false)
151
createMVEGatherScatterLoweringPass()152 Pass *llvm::createMVEGatherScatterLoweringPass() {
153 return new MVEGatherScatterLowering();
154 }
155
isLegalTypeAndAlignment(unsigned NumElements,unsigned ElemSize,Align Alignment)156 bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements,
157 unsigned ElemSize,
158 Align Alignment) {
159 if (((NumElements == 4 &&
160 (ElemSize == 32 || ElemSize == 16 || ElemSize == 8)) ||
161 (NumElements == 8 && (ElemSize == 16 || ElemSize == 8)) ||
162 (NumElements == 16 && ElemSize == 8)) &&
163 Alignment >= ElemSize / 8)
164 return true;
165 LLVM_DEBUG(dbgs() << "masked gathers/scatters: instruction does not have "
166 << "valid alignment or vector type \n");
167 return false;
168 }
169
checkGEP(Value * & Offsets,Type * Ty,GetElementPtrInst * GEP,IRBuilder<> & Builder)170 Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, Type *Ty,
171 GetElementPtrInst *GEP,
172 IRBuilder<> &Builder) {
173 if (!GEP) {
174 LLVM_DEBUG(
175 dbgs() << "masked gathers/scatters: no getelementpointer found\n");
176 return nullptr;
177 }
178 LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementpointer found."
179 << " Looking at intrinsic for base + vector of offsets\n");
180 Value *GEPPtr = GEP->getPointerOperand();
181 if (GEPPtr->getType()->isVectorTy()) {
182 return nullptr;
183 }
184 if (GEP->getNumOperands() != 2) {
185 LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementptr with too many"
186 << " operands. Expanding.\n");
187 return nullptr;
188 }
189 Offsets = GEP->getOperand(1);
190 // Paranoid check whether the number of parallel lanes is the same
191 assert(cast<FixedVectorType>(Ty)->getNumElements() ==
192 cast<FixedVectorType>(Offsets->getType())->getNumElements());
193 // Only <N x i32> offsets can be integrated into an arm gather, any smaller
194 // type would have to be sign extended by the gep - and arm gathers can only
195 // zero extend. Additionally, the offsets do have to originate from a zext of
196 // a vector with element types smaller or equal the type of the gather we're
197 // looking at
198 if (Offsets->getType()->getScalarSizeInBits() != 32)
199 return nullptr;
200 if (ZExtInst *ZextOffs = dyn_cast<ZExtInst>(Offsets))
201 Offsets = ZextOffs->getOperand(0);
202 else if (!(cast<FixedVectorType>(Offsets->getType())->getNumElements() == 4 &&
203 Offsets->getType()->getScalarSizeInBits() == 32))
204 return nullptr;
205
206 if (Ty != Offsets->getType()) {
207 if ((Ty->getScalarSizeInBits() <
208 Offsets->getType()->getScalarSizeInBits())) {
209 LLVM_DEBUG(dbgs() << "masked gathers/scatters: no correct offset type."
210 << " Can't create intrinsic.\n");
211 return nullptr;
212 } else {
213 Offsets = Builder.CreateZExt(
214 Offsets, VectorType::getInteger(cast<VectorType>(Ty)));
215 }
216 }
217 // If none of the checks failed, return the gep's base pointer
218 LLVM_DEBUG(dbgs() << "masked gathers/scatters: found correct offsets\n");
219 return GEPPtr;
220 }
221
lookThroughBitcast(Value * & Ptr)222 void MVEGatherScatterLowering::lookThroughBitcast(Value *&Ptr) {
223 // Look through bitcast instruction if #elements is the same
224 if (auto *BitCast = dyn_cast<BitCastInst>(Ptr)) {
225 auto *BCTy = cast<FixedVectorType>(BitCast->getType());
226 auto *BCSrcTy = cast<FixedVectorType>(BitCast->getOperand(0)->getType());
227 if (BCTy->getNumElements() == BCSrcTy->getNumElements()) {
228 LLVM_DEBUG(
229 dbgs() << "masked gathers/scatters: looking through bitcast\n");
230 Ptr = BitCast->getOperand(0);
231 }
232 }
233 }
234
computeScale(unsigned GEPElemSize,unsigned MemoryElemSize)235 int MVEGatherScatterLowering::computeScale(unsigned GEPElemSize,
236 unsigned MemoryElemSize) {
237 // This can be a 32bit load/store scaled by 4, a 16bit load/store scaled by 2,
238 // or a 8bit, 16bit or 32bit load/store scaled by 1
239 if (GEPElemSize == 32 && MemoryElemSize == 32)
240 return 2;
241 else if (GEPElemSize == 16 && MemoryElemSize == 16)
242 return 1;
243 else if (GEPElemSize == 8)
244 return 0;
245 LLVM_DEBUG(dbgs() << "masked gathers/scatters: incorrect scale. Can't "
246 << "create intrinsic\n");
247 return -1;
248 }
249
getIfConst(const Value * V)250 Optional<int64_t> MVEGatherScatterLowering::getIfConst(const Value *V) {
251 const Constant *C = dyn_cast<Constant>(V);
252 if (C != nullptr)
253 return Optional<int64_t>{C->getUniqueInteger().getSExtValue()};
254 if (!isa<Instruction>(V))
255 return Optional<int64_t>{};
256
257 const Instruction *I = cast<Instruction>(V);
258 if (I->getOpcode() == Instruction::Add ||
259 I->getOpcode() == Instruction::Mul) {
260 Optional<int64_t> Op0 = getIfConst(I->getOperand(0));
261 Optional<int64_t> Op1 = getIfConst(I->getOperand(1));
262 if (!Op0 || !Op1)
263 return Optional<int64_t>{};
264 if (I->getOpcode() == Instruction::Add)
265 return Optional<int64_t>{Op0.getValue() + Op1.getValue()};
266 if (I->getOpcode() == Instruction::Mul)
267 return Optional<int64_t>{Op0.getValue() * Op1.getValue()};
268 }
269 return Optional<int64_t>{};
270 }
271
272 std::pair<Value *, int64_t>
getVarAndConst(Value * Inst,int TypeScale)273 MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) {
274 std::pair<Value *, int64_t> ReturnFalse =
275 std::pair<Value *, int64_t>(nullptr, 0);
276 // At this point, the instruction we're looking at must be an add or we
277 // bail out
278 Instruction *Add = dyn_cast<Instruction>(Inst);
279 if (Add == nullptr || Add->getOpcode() != Instruction::Add)
280 return ReturnFalse;
281
282 Value *Summand;
283 Optional<int64_t> Const;
284 // Find out which operand the value that is increased is
285 if ((Const = getIfConst(Add->getOperand(0))))
286 Summand = Add->getOperand(1);
287 else if ((Const = getIfConst(Add->getOperand(1))))
288 Summand = Add->getOperand(0);
289 else
290 return ReturnFalse;
291
292 // Check that the constant is small enough for an incrementing gather
293 int64_t Immediate = Const.getValue() << TypeScale;
294 if (Immediate > 512 || Immediate < -512 || Immediate % 4 != 0)
295 return ReturnFalse;
296
297 return std::pair<Value *, int64_t>(Summand, Immediate);
298 }
299
lowerGather(IntrinsicInst * I)300 Value *MVEGatherScatterLowering::lowerGather(IntrinsicInst *I) {
301 using namespace PatternMatch;
302 LLVM_DEBUG(dbgs() << "masked gathers: checking transform preconditions\n");
303
304 // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
305 // Attempt to turn the masked gather in I into a MVE intrinsic
306 // Potentially optimising the addressing modes as we do so.
307 auto *Ty = cast<FixedVectorType>(I->getType());
308 Value *Ptr = I->getArgOperand(0);
309 Align Alignment = cast<ConstantInt>(I->getArgOperand(1))->getAlignValue();
310 Value *Mask = I->getArgOperand(2);
311 Value *PassThru = I->getArgOperand(3);
312
313 if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(),
314 Alignment))
315 return nullptr;
316 lookThroughBitcast(Ptr);
317 assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
318
319 IRBuilder<> Builder(I->getContext());
320 Builder.SetInsertPoint(I);
321 Builder.SetCurrentDebugLocation(I->getDebugLoc());
322
323 Instruction *Root = I;
324 Value *Load = tryCreateMaskedGatherOffset(I, Ptr, Root, Builder);
325 if (!Load)
326 Load = tryCreateMaskedGatherBase(I, Ptr, Builder);
327 if (!Load)
328 return nullptr;
329
330 if (!isa<UndefValue>(PassThru) && !match(PassThru, m_Zero())) {
331 LLVM_DEBUG(dbgs() << "masked gathers: found non-trivial passthru - "
332 << "creating select\n");
333 Load = Builder.CreateSelect(Mask, Load, PassThru);
334 }
335
336 Root->replaceAllUsesWith(Load);
337 Root->eraseFromParent();
338 if (Root != I)
339 // If this was an extending gather, we need to get rid of the sext/zext
340 // sext/zext as well as of the gather itself
341 I->eraseFromParent();
342
343 LLVM_DEBUG(dbgs() << "masked gathers: successfully built masked gather\n");
344 return Load;
345 }
346
tryCreateMaskedGatherBase(IntrinsicInst * I,Value * Ptr,IRBuilder<> & Builder,int64_t Increment)347 Value *MVEGatherScatterLowering::tryCreateMaskedGatherBase(IntrinsicInst *I,
348 Value *Ptr,
349 IRBuilder<> &Builder,
350 int64_t Increment) {
351 using namespace PatternMatch;
352 auto *Ty = cast<FixedVectorType>(I->getType());
353 LLVM_DEBUG(dbgs() << "masked gathers: loading from vector of pointers\n");
354 if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
355 // Can't build an intrinsic for this
356 return nullptr;
357 Value *Mask = I->getArgOperand(2);
358 if (match(Mask, m_One()))
359 return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base,
360 {Ty, Ptr->getType()},
361 {Ptr, Builder.getInt32(Increment)});
362 else
363 return Builder.CreateIntrinsic(
364 Intrinsic::arm_mve_vldr_gather_base_predicated,
365 {Ty, Ptr->getType(), Mask->getType()},
366 {Ptr, Builder.getInt32(Increment), Mask});
367 }
368
tryCreateMaskedGatherBaseWB(IntrinsicInst * I,Value * Ptr,IRBuilder<> & Builder,int64_t Increment)369 Value *MVEGatherScatterLowering::tryCreateMaskedGatherBaseWB(
370 IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) {
371 using namespace PatternMatch;
372 auto *Ty = cast<FixedVectorType>(I->getType());
373 LLVM_DEBUG(
374 dbgs()
375 << "masked gathers: loading from vector of pointers with writeback\n");
376 if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
377 // Can't build an intrinsic for this
378 return nullptr;
379 Value *Mask = I->getArgOperand(2);
380 if (match(Mask, m_One()))
381 return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base_wb,
382 {Ty, Ptr->getType()},
383 {Ptr, Builder.getInt32(Increment)});
384 else
385 return Builder.CreateIntrinsic(
386 Intrinsic::arm_mve_vldr_gather_base_wb_predicated,
387 {Ty, Ptr->getType(), Mask->getType()},
388 {Ptr, Builder.getInt32(Increment), Mask});
389 }
390
tryCreateMaskedGatherOffset(IntrinsicInst * I,Value * Ptr,Instruction * & Root,IRBuilder<> & Builder)391 Value *MVEGatherScatterLowering::tryCreateMaskedGatherOffset(
392 IntrinsicInst *I, Value *Ptr, Instruction *&Root, IRBuilder<> &Builder) {
393 using namespace PatternMatch;
394
395 Type *OriginalTy = I->getType();
396 Type *ResultTy = OriginalTy;
397
398 unsigned Unsigned = 1;
399 // The size of the gather was already checked in isLegalTypeAndAlignment;
400 // if it was not a full vector width an appropriate extend should follow.
401 auto *Extend = Root;
402 if (OriginalTy->getPrimitiveSizeInBits() < 128) {
403 // Only transform gathers with exactly one use
404 if (!I->hasOneUse())
405 return nullptr;
406
407 // The correct root to replace is not the CallInst itself, but the
408 // instruction which extends it
409 Extend = cast<Instruction>(*I->users().begin());
410 if (isa<SExtInst>(Extend)) {
411 Unsigned = 0;
412 } else if (!isa<ZExtInst>(Extend)) {
413 LLVM_DEBUG(dbgs() << "masked gathers: extend needed but not provided. "
414 << "Expanding\n");
415 return nullptr;
416 }
417 LLVM_DEBUG(dbgs() << "masked gathers: found an extending gather\n");
418 ResultTy = Extend->getType();
419 // The final size of the gather must be a full vector width
420 if (ResultTy->getPrimitiveSizeInBits() != 128) {
421 LLVM_DEBUG(dbgs() << "masked gathers: extending from the wrong type. "
422 << "Expanding\n");
423 return nullptr;
424 }
425 }
426
427 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
428 Value *Offsets;
429 Value *BasePtr = checkGEP(Offsets, ResultTy, GEP, Builder);
430 if (!BasePtr)
431 return nullptr;
432 // Check whether the offset is a constant increment that could be merged into
433 // a QI gather
434 Value *Load = tryCreateIncrementingGatScat(I, BasePtr, Offsets, GEP, Builder);
435 if (Load)
436 return Load;
437
438 int Scale = computeScale(
439 BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits(),
440 OriginalTy->getScalarSizeInBits());
441 if (Scale == -1)
442 return nullptr;
443 Root = Extend;
444
445 Value *Mask = I->getArgOperand(2);
446 if (!match(Mask, m_One()))
447 return Builder.CreateIntrinsic(
448 Intrinsic::arm_mve_vldr_gather_offset_predicated,
449 {ResultTy, BasePtr->getType(), Offsets->getType(), Mask->getType()},
450 {BasePtr, Offsets, Builder.getInt32(OriginalTy->getScalarSizeInBits()),
451 Builder.getInt32(Scale), Builder.getInt32(Unsigned), Mask});
452 else
453 return Builder.CreateIntrinsic(
454 Intrinsic::arm_mve_vldr_gather_offset,
455 {ResultTy, BasePtr->getType(), Offsets->getType()},
456 {BasePtr, Offsets, Builder.getInt32(OriginalTy->getScalarSizeInBits()),
457 Builder.getInt32(Scale), Builder.getInt32(Unsigned)});
458 }
459
lowerScatter(IntrinsicInst * I)460 Value *MVEGatherScatterLowering::lowerScatter(IntrinsicInst *I) {
461 using namespace PatternMatch;
462 LLVM_DEBUG(dbgs() << "masked scatters: checking transform preconditions\n");
463
464 // @llvm.masked.scatter.*(data, ptrs, alignment, mask)
465 // Attempt to turn the masked scatter in I into a MVE intrinsic
466 // Potentially optimising the addressing modes as we do so.
467 Value *Input = I->getArgOperand(0);
468 Value *Ptr = I->getArgOperand(1);
469 Align Alignment = cast<ConstantInt>(I->getArgOperand(2))->getAlignValue();
470 auto *Ty = cast<FixedVectorType>(Input->getType());
471
472 if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(),
473 Alignment))
474 return nullptr;
475
476 lookThroughBitcast(Ptr);
477 assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
478
479 IRBuilder<> Builder(I->getContext());
480 Builder.SetInsertPoint(I);
481 Builder.SetCurrentDebugLocation(I->getDebugLoc());
482
483 Value *Store = tryCreateMaskedScatterOffset(I, Ptr, Builder);
484 if (!Store)
485 Store = tryCreateMaskedScatterBase(I, Ptr, Builder);
486 if (!Store)
487 return nullptr;
488
489 LLVM_DEBUG(dbgs() << "masked scatters: successfully built masked scatter\n");
490 I->eraseFromParent();
491 return Store;
492 }
493
tryCreateMaskedScatterBase(IntrinsicInst * I,Value * Ptr,IRBuilder<> & Builder,int64_t Increment)494 Value *MVEGatherScatterLowering::tryCreateMaskedScatterBase(
495 IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) {
496 using namespace PatternMatch;
497 Value *Input = I->getArgOperand(0);
498 auto *Ty = cast<FixedVectorType>(Input->getType());
499 // Only QR variants allow truncating
500 if (!(Ty->getNumElements() == 4 && Ty->getScalarSizeInBits() == 32)) {
501 // Can't build an intrinsic for this
502 return nullptr;
503 }
504 Value *Mask = I->getArgOperand(3);
505 // int_arm_mve_vstr_scatter_base(_predicated) addr, offset, data(, mask)
506 LLVM_DEBUG(dbgs() << "masked scatters: storing to a vector of pointers\n");
507 if (match(Mask, m_One()))
508 return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base,
509 {Ptr->getType(), Input->getType()},
510 {Ptr, Builder.getInt32(Increment), Input});
511 else
512 return Builder.CreateIntrinsic(
513 Intrinsic::arm_mve_vstr_scatter_base_predicated,
514 {Ptr->getType(), Input->getType(), Mask->getType()},
515 {Ptr, Builder.getInt32(Increment), Input, Mask});
516 }
517
tryCreateMaskedScatterBaseWB(IntrinsicInst * I,Value * Ptr,IRBuilder<> & Builder,int64_t Increment)518 Value *MVEGatherScatterLowering::tryCreateMaskedScatterBaseWB(
519 IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) {
520 using namespace PatternMatch;
521 Value *Input = I->getArgOperand(0);
522 auto *Ty = cast<FixedVectorType>(Input->getType());
523 LLVM_DEBUG(
524 dbgs()
525 << "masked scatters: storing to a vector of pointers with writeback\n");
526 if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32)
527 // Can't build an intrinsic for this
528 return nullptr;
529 Value *Mask = I->getArgOperand(3);
530 if (match(Mask, m_One()))
531 return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base_wb,
532 {Ptr->getType(), Input->getType()},
533 {Ptr, Builder.getInt32(Increment), Input});
534 else
535 return Builder.CreateIntrinsic(
536 Intrinsic::arm_mve_vstr_scatter_base_wb_predicated,
537 {Ptr->getType(), Input->getType(), Mask->getType()},
538 {Ptr, Builder.getInt32(Increment), Input, Mask});
539 }
540
tryCreateMaskedScatterOffset(IntrinsicInst * I,Value * Ptr,IRBuilder<> & Builder)541 Value *MVEGatherScatterLowering::tryCreateMaskedScatterOffset(
542 IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder) {
543 using namespace PatternMatch;
544 Value *Input = I->getArgOperand(0);
545 Value *Mask = I->getArgOperand(3);
546 Type *InputTy = Input->getType();
547 Type *MemoryTy = InputTy;
548 LLVM_DEBUG(dbgs() << "masked scatters: getelementpointer found. Storing"
549 << " to base + vector of offsets\n");
550 // If the input has been truncated, try to integrate that trunc into the
551 // scatter instruction (we don't care about alignment here)
552 if (TruncInst *Trunc = dyn_cast<TruncInst>(Input)) {
553 Value *PreTrunc = Trunc->getOperand(0);
554 Type *PreTruncTy = PreTrunc->getType();
555 if (PreTruncTy->getPrimitiveSizeInBits() == 128) {
556 Input = PreTrunc;
557 InputTy = PreTruncTy;
558 }
559 }
560 if (InputTy->getPrimitiveSizeInBits() != 128) {
561 LLVM_DEBUG(
562 dbgs() << "masked scatters: cannot create scatters for non-standard"
563 << " input types. Expanding.\n");
564 return nullptr;
565 }
566
567 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
568 Value *Offsets;
569 Value *BasePtr = checkGEP(Offsets, InputTy, GEP, Builder);
570 if (!BasePtr)
571 return nullptr;
572 // Check whether the offset is a constant increment that could be merged into
573 // a QI gather
574 Value *Store =
575 tryCreateIncrementingGatScat(I, BasePtr, Offsets, GEP, Builder);
576 if (Store)
577 return Store;
578 int Scale = computeScale(
579 BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits(),
580 MemoryTy->getScalarSizeInBits());
581 if (Scale == -1)
582 return nullptr;
583
584 if (!match(Mask, m_One()))
585 return Builder.CreateIntrinsic(
586 Intrinsic::arm_mve_vstr_scatter_offset_predicated,
587 {BasePtr->getType(), Offsets->getType(), Input->getType(),
588 Mask->getType()},
589 {BasePtr, Offsets, Input,
590 Builder.getInt32(MemoryTy->getScalarSizeInBits()),
591 Builder.getInt32(Scale), Mask});
592 else
593 return Builder.CreateIntrinsic(
594 Intrinsic::arm_mve_vstr_scatter_offset,
595 {BasePtr->getType(), Offsets->getType(), Input->getType()},
596 {BasePtr, Offsets, Input,
597 Builder.getInt32(MemoryTy->getScalarSizeInBits()),
598 Builder.getInt32(Scale)});
599 }
600
tryCreateIncrementingGatScat(IntrinsicInst * I,Value * BasePtr,Value * Offsets,GetElementPtrInst * GEP,IRBuilder<> & Builder)601 Value *MVEGatherScatterLowering::tryCreateIncrementingGatScat(
602 IntrinsicInst *I, Value *BasePtr, Value *Offsets, GetElementPtrInst *GEP,
603 IRBuilder<> &Builder) {
604 FixedVectorType *Ty;
605 if (I->getIntrinsicID() == Intrinsic::masked_gather)
606 Ty = cast<FixedVectorType>(I->getType());
607 else
608 Ty = cast<FixedVectorType>(I->getArgOperand(0)->getType());
609 // Incrementing gathers only exist for v4i32
610 if (Ty->getNumElements() != 4 ||
611 Ty->getScalarSizeInBits() != 32)
612 return nullptr;
613 Loop *L = LI->getLoopFor(I->getParent());
614 if (L == nullptr)
615 // Incrementing gathers are not beneficial outside of a loop
616 return nullptr;
617 LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to build incrementing "
618 "wb gather/scatter\n");
619
620 // The gep was in charge of making sure the offsets are scaled correctly
621 // - calculate that factor so it can be applied by hand
622 DataLayout DT = I->getParent()->getParent()->getParent()->getDataLayout();
623 int TypeScale =
624 computeScale(DT.getTypeSizeInBits(GEP->getOperand(0)->getType()),
625 DT.getTypeSizeInBits(GEP->getType()) /
626 cast<FixedVectorType>(GEP->getType())->getNumElements());
627 if (TypeScale == -1)
628 return nullptr;
629
630 if (GEP->hasOneUse()) {
631 // Only in this case do we want to build a wb gather, because the wb will
632 // change the phi which does affect other users of the gep (which will still
633 // be using the phi in the old way)
634 Value *Load =
635 tryCreateIncrementingWBGatScat(I, BasePtr, Offsets, TypeScale, Builder);
636 if (Load != nullptr)
637 return Load;
638 }
639 LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to build incrementing "
640 "non-wb gather/scatter\n");
641
642 std::pair<Value *, int64_t> Add = getVarAndConst(Offsets, TypeScale);
643 if (Add.first == nullptr)
644 return nullptr;
645 Value *OffsetsIncoming = Add.first;
646 int64_t Immediate = Add.second;
647
648 // Make sure the offsets are scaled correctly
649 Instruction *ScaledOffsets = BinaryOperator::Create(
650 Instruction::Shl, OffsetsIncoming,
651 Builder.CreateVectorSplat(Ty->getNumElements(), Builder.getInt32(TypeScale)),
652 "ScaledIndex", I);
653 // Add the base to the offsets
654 OffsetsIncoming = BinaryOperator::Create(
655 Instruction::Add, ScaledOffsets,
656 Builder.CreateVectorSplat(
657 Ty->getNumElements(),
658 Builder.CreatePtrToInt(
659 BasePtr,
660 cast<VectorType>(ScaledOffsets->getType())->getElementType())),
661 "StartIndex", I);
662
663 if (I->getIntrinsicID() == Intrinsic::masked_gather)
664 return cast<IntrinsicInst>(
665 tryCreateMaskedGatherBase(I, OffsetsIncoming, Builder, Immediate));
666 else
667 return cast<IntrinsicInst>(
668 tryCreateMaskedScatterBase(I, OffsetsIncoming, Builder, Immediate));
669 }
670
tryCreateIncrementingWBGatScat(IntrinsicInst * I,Value * BasePtr,Value * Offsets,unsigned TypeScale,IRBuilder<> & Builder)671 Value *MVEGatherScatterLowering::tryCreateIncrementingWBGatScat(
672 IntrinsicInst *I, Value *BasePtr, Value *Offsets, unsigned TypeScale,
673 IRBuilder<> &Builder) {
674 // Check whether this gather's offset is incremented by a constant - if so,
675 // and the load is of the right type, we can merge this into a QI gather
676 Loop *L = LI->getLoopFor(I->getParent());
677 // Offsets that are worth merging into this instruction will be incremented
678 // by a constant, thus we're looking for an add of a phi and a constant
679 PHINode *Phi = dyn_cast<PHINode>(Offsets);
680 if (Phi == nullptr || Phi->getNumIncomingValues() != 2 ||
681 Phi->getParent() != L->getHeader() || Phi->getNumUses() != 2)
682 // No phi means no IV to write back to; if there is a phi, we expect it
683 // to have exactly two incoming values; the only phis we are interested in
684 // will be loop IV's and have exactly two uses, one in their increment and
685 // one in the gather's gep
686 return nullptr;
687
688 unsigned IncrementIndex =
689 Phi->getIncomingBlock(0) == L->getLoopLatch() ? 0 : 1;
690 // Look through the phi to the phi increment
691 Offsets = Phi->getIncomingValue(IncrementIndex);
692
693 std::pair<Value *, int64_t> Add = getVarAndConst(Offsets, TypeScale);
694 if (Add.first == nullptr)
695 return nullptr;
696 Value *OffsetsIncoming = Add.first;
697 int64_t Immediate = Add.second;
698 if (OffsetsIncoming != Phi)
699 // Then the increment we are looking at is not an increment of the
700 // induction variable, and we don't want to do a writeback
701 return nullptr;
702
703 Builder.SetInsertPoint(&Phi->getIncomingBlock(1 - IncrementIndex)->back());
704 unsigned NumElems =
705 cast<FixedVectorType>(OffsetsIncoming->getType())->getNumElements();
706
707 // Make sure the offsets are scaled correctly
708 Instruction *ScaledOffsets = BinaryOperator::Create(
709 Instruction::Shl, Phi->getIncomingValue(1 - IncrementIndex),
710 Builder.CreateVectorSplat(NumElems, Builder.getInt32(TypeScale)),
711 "ScaledIndex", &Phi->getIncomingBlock(1 - IncrementIndex)->back());
712 // Add the base to the offsets
713 OffsetsIncoming = BinaryOperator::Create(
714 Instruction::Add, ScaledOffsets,
715 Builder.CreateVectorSplat(
716 NumElems,
717 Builder.CreatePtrToInt(
718 BasePtr,
719 cast<VectorType>(ScaledOffsets->getType())->getElementType())),
720 "StartIndex", &Phi->getIncomingBlock(1 - IncrementIndex)->back());
721 // The gather is pre-incrementing
722 OffsetsIncoming = BinaryOperator::Create(
723 Instruction::Sub, OffsetsIncoming,
724 Builder.CreateVectorSplat(NumElems, Builder.getInt32(Immediate)),
725 "PreIncrementStartIndex",
726 &Phi->getIncomingBlock(1 - IncrementIndex)->back());
727 Phi->setIncomingValue(1 - IncrementIndex, OffsetsIncoming);
728
729 Builder.SetInsertPoint(I);
730
731 Value *EndResult;
732 Value *NewInduction;
733 if (I->getIntrinsicID() == Intrinsic::masked_gather) {
734 // Build the incrementing gather
735 Value *Load = tryCreateMaskedGatherBaseWB(I, Phi, Builder, Immediate);
736 // One value to be handed to whoever uses the gather, one is the loop
737 // increment
738 EndResult = Builder.CreateExtractValue(Load, 0, "Gather");
739 NewInduction = Builder.CreateExtractValue(Load, 1, "GatherIncrement");
740 } else {
741 // Build the incrementing scatter
742 NewInduction = tryCreateMaskedScatterBaseWB(I, Phi, Builder, Immediate);
743 EndResult = NewInduction;
744 }
745 Instruction *AddInst = cast<Instruction>(Offsets);
746 AddInst->replaceAllUsesWith(NewInduction);
747 AddInst->eraseFromParent();
748 Phi->setIncomingValue(IncrementIndex, NewInduction);
749
750 return EndResult;
751 }
752
pushOutAdd(PHINode * & Phi,Value * OffsSecondOperand,unsigned StartIndex)753 void MVEGatherScatterLowering::pushOutAdd(PHINode *&Phi,
754 Value *OffsSecondOperand,
755 unsigned StartIndex) {
756 LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising add instruction\n");
757 Instruction *InsertionPoint =
758 &cast<Instruction>(Phi->getIncomingBlock(StartIndex)->back());
759 // Initialize the phi with a vector that contains a sum of the constants
760 Instruction *NewIndex = BinaryOperator::Create(
761 Instruction::Add, Phi->getIncomingValue(StartIndex), OffsSecondOperand,
762 "PushedOutAdd", InsertionPoint);
763 unsigned IncrementIndex = StartIndex == 0 ? 1 : 0;
764
765 // Order such that start index comes first (this reduces mov's)
766 Phi->addIncoming(NewIndex, Phi->getIncomingBlock(StartIndex));
767 Phi->addIncoming(Phi->getIncomingValue(IncrementIndex),
768 Phi->getIncomingBlock(IncrementIndex));
769 Phi->removeIncomingValue(IncrementIndex);
770 Phi->removeIncomingValue(StartIndex);
771 }
772
pushOutMul(PHINode * & Phi,Value * IncrementPerRound,Value * OffsSecondOperand,unsigned LoopIncrement,IRBuilder<> & Builder)773 void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
774 Value *IncrementPerRound,
775 Value *OffsSecondOperand,
776 unsigned LoopIncrement,
777 IRBuilder<> &Builder) {
778 LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising mul instruction\n");
779
780 // Create a new scalar add outside of the loop and transform it to a splat
781 // by which loop variable can be incremented
782 Instruction *InsertionPoint = &cast<Instruction>(
783 Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)->back());
784
785 // Create a new index
786 Value *StartIndex = BinaryOperator::Create(
787 Instruction::Mul, Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1),
788 OffsSecondOperand, "PushedOutMul", InsertionPoint);
789
790 Instruction *Product =
791 BinaryOperator::Create(Instruction::Mul, IncrementPerRound,
792 OffsSecondOperand, "Product", InsertionPoint);
793 // Increment NewIndex by Product instead of the multiplication
794 Instruction *NewIncrement = BinaryOperator::Create(
795 Instruction::Add, Phi, Product, "IncrementPushedOutMul",
796 cast<Instruction>(Phi->getIncomingBlock(LoopIncrement)->back())
797 .getPrevNode());
798
799 Phi->addIncoming(StartIndex,
800 Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1));
801 Phi->addIncoming(NewIncrement, Phi->getIncomingBlock(LoopIncrement));
802 Phi->removeIncomingValue((unsigned)0);
803 Phi->removeIncomingValue((unsigned)0);
804 return;
805 }
806
807 // Check whether all usages of this instruction are as offsets of
808 // gathers/scatters or simple arithmetics only used by gathers/scatters
hasAllGatScatUsers(Instruction * I)809 static bool hasAllGatScatUsers(Instruction *I) {
810 if (I->hasNUses(0)) {
811 return false;
812 }
813 bool Gatscat = true;
814 for (User *U : I->users()) {
815 if (!isa<Instruction>(U))
816 return false;
817 if (isa<GetElementPtrInst>(U) ||
818 isGatherScatter(dyn_cast<IntrinsicInst>(U))) {
819 return Gatscat;
820 } else {
821 unsigned OpCode = cast<Instruction>(U)->getOpcode();
822 if ((OpCode == Instruction::Add || OpCode == Instruction::Mul) &&
823 hasAllGatScatUsers(cast<Instruction>(U))) {
824 continue;
825 }
826 return false;
827 }
828 }
829 return Gatscat;
830 }
831
optimiseOffsets(Value * Offsets,BasicBlock * BB,LoopInfo * LI)832 bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
833 LoopInfo *LI) {
834 LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize\n");
835 // Optimise the addresses of gathers/scatters by moving invariant
836 // calculations out of the loop
837 if (!isa<Instruction>(Offsets))
838 return false;
839 Instruction *Offs = cast<Instruction>(Offsets);
840 if (Offs->getOpcode() != Instruction::Add &&
841 Offs->getOpcode() != Instruction::Mul)
842 return false;
843 Loop *L = LI->getLoopFor(BB);
844 if (L == nullptr)
845 return false;
846 if (!Offs->hasOneUse()) {
847 if (!hasAllGatScatUsers(Offs))
848 return false;
849 }
850
851 // Find out which, if any, operand of the instruction
852 // is a phi node
853 PHINode *Phi;
854 int OffsSecondOp;
855 if (isa<PHINode>(Offs->getOperand(0))) {
856 Phi = cast<PHINode>(Offs->getOperand(0));
857 OffsSecondOp = 1;
858 } else if (isa<PHINode>(Offs->getOperand(1))) {
859 Phi = cast<PHINode>(Offs->getOperand(1));
860 OffsSecondOp = 0;
861 } else {
862 bool Changed = true;
863 if (isa<Instruction>(Offs->getOperand(0)) &&
864 L->contains(cast<Instruction>(Offs->getOperand(0))))
865 Changed |= optimiseOffsets(Offs->getOperand(0), BB, LI);
866 if (isa<Instruction>(Offs->getOperand(1)) &&
867 L->contains(cast<Instruction>(Offs->getOperand(1))))
868 Changed |= optimiseOffsets(Offs->getOperand(1), BB, LI);
869 if (!Changed) {
870 return false;
871 } else {
872 if (isa<PHINode>(Offs->getOperand(0))) {
873 Phi = cast<PHINode>(Offs->getOperand(0));
874 OffsSecondOp = 1;
875 } else if (isa<PHINode>(Offs->getOperand(1))) {
876 Phi = cast<PHINode>(Offs->getOperand(1));
877 OffsSecondOp = 0;
878 } else {
879 return false;
880 }
881 }
882 }
883 // A phi node we want to perform this function on should be from the
884 // loop header, and shouldn't have more than 2 incoming values
885 if (Phi->getParent() != L->getHeader() ||
886 Phi->getNumIncomingValues() != 2)
887 return false;
888
889 // The phi must be an induction variable
890 Instruction *Op;
891 int IncrementingBlock = -1;
892
893 for (int i = 0; i < 2; i++)
894 if ((Op = dyn_cast<Instruction>(Phi->getIncomingValue(i))) != nullptr)
895 if (Op->getOpcode() == Instruction::Add &&
896 (Op->getOperand(0) == Phi || Op->getOperand(1) == Phi))
897 IncrementingBlock = i;
898 if (IncrementingBlock == -1)
899 return false;
900
901 Instruction *IncInstruction =
902 cast<Instruction>(Phi->getIncomingValue(IncrementingBlock));
903
904 // If the phi is not used by anything else, we can just adapt it when
905 // replacing the instruction; if it is, we'll have to duplicate it
906 PHINode *NewPhi;
907 Value *IncrementPerRound = IncInstruction->getOperand(
908 (IncInstruction->getOperand(0) == Phi) ? 1 : 0);
909
910 // Get the value that is added to/multiplied with the phi
911 Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp);
912
913 if (IncrementPerRound->getType() != OffsSecondOperand->getType())
914 // Something has gone wrong, abort
915 return false;
916
917 // Only proceed if the increment per round is a constant or an instruction
918 // which does not originate from within the loop
919 if (!isa<Constant>(IncrementPerRound) &&
920 !(isa<Instruction>(IncrementPerRound) &&
921 !L->contains(cast<Instruction>(IncrementPerRound))))
922 return false;
923
924 if (Phi->getNumUses() == 2) {
925 // No other users -> reuse existing phi (One user is the instruction
926 // we're looking at, the other is the phi increment)
927 if (IncInstruction->getNumUses() != 1) {
928 // If the incrementing instruction does have more users than
929 // our phi, we need to copy it
930 IncInstruction = BinaryOperator::Create(
931 Instruction::BinaryOps(IncInstruction->getOpcode()), Phi,
932 IncrementPerRound, "LoopIncrement", IncInstruction);
933 Phi->setIncomingValue(IncrementingBlock, IncInstruction);
934 }
935 NewPhi = Phi;
936 } else {
937 // There are other users -> create a new phi
938 NewPhi = PHINode::Create(Phi->getType(), 0, "NewPhi", Phi);
939 std::vector<Value *> Increases;
940 // Copy the incoming values of the old phi
941 NewPhi->addIncoming(Phi->getIncomingValue(IncrementingBlock == 1 ? 0 : 1),
942 Phi->getIncomingBlock(IncrementingBlock == 1 ? 0 : 1));
943 IncInstruction = BinaryOperator::Create(
944 Instruction::BinaryOps(IncInstruction->getOpcode()), NewPhi,
945 IncrementPerRound, "LoopIncrement", IncInstruction);
946 NewPhi->addIncoming(IncInstruction,
947 Phi->getIncomingBlock(IncrementingBlock));
948 IncrementingBlock = 1;
949 }
950
951 IRBuilder<> Builder(BB->getContext());
952 Builder.SetInsertPoint(Phi);
953 Builder.SetCurrentDebugLocation(Offs->getDebugLoc());
954
955 switch (Offs->getOpcode()) {
956 case Instruction::Add:
957 pushOutAdd(NewPhi, OffsSecondOperand, IncrementingBlock == 1 ? 0 : 1);
958 break;
959 case Instruction::Mul:
960 pushOutMul(NewPhi, IncrementPerRound, OffsSecondOperand, IncrementingBlock,
961 Builder);
962 break;
963 default:
964 return false;
965 }
966 LLVM_DEBUG(
967 dbgs() << "masked gathers/scatters: simplified loop variable add/mul\n");
968
969 // The instruction has now been "absorbed" into the phi value
970 Offs->replaceAllUsesWith(NewPhi);
971 if (Offs->hasNUses(0))
972 Offs->eraseFromParent();
973 // Clean up the old increment in case it's unused because we built a new
974 // one
975 if (IncInstruction->hasNUses(0))
976 IncInstruction->eraseFromParent();
977
978 return true;
979 }
980
runOnFunction(Function & F)981 bool MVEGatherScatterLowering::runOnFunction(Function &F) {
982 if (!EnableMaskedGatherScatters)
983 return false;
984 auto &TPC = getAnalysis<TargetPassConfig>();
985 auto &TM = TPC.getTM<TargetMachine>();
986 auto *ST = &TM.getSubtarget<ARMSubtarget>(F);
987 if (!ST->hasMVEIntegerOps())
988 return false;
989 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
990 SmallVector<IntrinsicInst *, 4> Gathers;
991 SmallVector<IntrinsicInst *, 4> Scatters;
992
993 bool Changed = false;
994
995 for (BasicBlock &BB : F) {
996 for (Instruction &I : BB) {
997 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
998 if (II && II->getIntrinsicID() == Intrinsic::masked_gather) {
999 Gathers.push_back(II);
1000 if (isa<GetElementPtrInst>(II->getArgOperand(0)))
1001 Changed |= optimiseOffsets(
1002 cast<Instruction>(II->getArgOperand(0))->getOperand(1),
1003 II->getParent(), LI);
1004 } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter) {
1005 Scatters.push_back(II);
1006 if (isa<GetElementPtrInst>(II->getArgOperand(1)))
1007 Changed |= optimiseOffsets(
1008 cast<Instruction>(II->getArgOperand(1))->getOperand(1),
1009 II->getParent(), LI);
1010 }
1011 }
1012 }
1013
1014 for (unsigned i = 0; i < Gathers.size(); i++) {
1015 IntrinsicInst *I = Gathers[i];
1016 Value *L = lowerGather(I);
1017 if (L == nullptr)
1018 continue;
1019
1020 // Get rid of any now dead instructions
1021 SimplifyInstructionsInBlock(cast<Instruction>(L)->getParent());
1022 Changed = true;
1023 }
1024
1025 for (unsigned i = 0; i < Scatters.size(); i++) {
1026 IntrinsicInst *I = Scatters[i];
1027 Value *S = lowerScatter(I);
1028 if (S == nullptr)
1029 continue;
1030
1031 // Get rid of any now dead instructions
1032 SimplifyInstructionsInBlock(cast<Instruction>(S)->getParent());
1033 Changed = true;
1034 }
1035 return Changed;
1036 }
1037