1 //===--- CGAtomic.cpp - Emit LLVM IR for atomic operations ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the code for emitting atomic operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCall.h"
14 #include "CGRecordLayout.h"
15 #include "CodeGenFunction.h"
16 #include "CodeGenModule.h"
17 #include "TargetInfo.h"
18 #include "clang/AST/ASTContext.h"
19 #include "clang/CodeGen/CGFunctionInfo.h"
20 #include "clang/Frontend/FrontendDiagnostic.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/Operator.h"
25 
26 using namespace clang;
27 using namespace CodeGen;
28 
29 namespace {
30   class AtomicInfo {
31     CodeGenFunction &CGF;
32     QualType AtomicTy;
33     QualType ValueTy;
34     uint64_t AtomicSizeInBits;
35     uint64_t ValueSizeInBits;
36     CharUnits AtomicAlign;
37     CharUnits ValueAlign;
38     TypeEvaluationKind EvaluationKind;
39     bool UseLibcall;
40     LValue LVal;
41     CGBitFieldInfo BFI;
42   public:
43     AtomicInfo(CodeGenFunction &CGF, LValue &lvalue)
44         : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0),
45           EvaluationKind(TEK_Scalar), UseLibcall(true) {
46       assert(!lvalue.isGlobalReg());
47       ASTContext &C = CGF.getContext();
48       if (lvalue.isSimple()) {
49         AtomicTy = lvalue.getType();
50         if (auto *ATy = AtomicTy->getAs<AtomicType>())
51           ValueTy = ATy->getValueType();
52         else
53           ValueTy = AtomicTy;
54         EvaluationKind = CGF.getEvaluationKind(ValueTy);
55 
56         uint64_t ValueAlignInBits;
57         uint64_t AtomicAlignInBits;
58         TypeInfo ValueTI = C.getTypeInfo(ValueTy);
59         ValueSizeInBits = ValueTI.Width;
60         ValueAlignInBits = ValueTI.Align;
61 
62         TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
63         AtomicSizeInBits = AtomicTI.Width;
64         AtomicAlignInBits = AtomicTI.Align;
65 
66         assert(ValueSizeInBits <= AtomicSizeInBits);
67         assert(ValueAlignInBits <= AtomicAlignInBits);
68 
69         AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
70         ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
71         if (lvalue.getAlignment().isZero())
72           lvalue.setAlignment(AtomicAlign);
73 
74         LVal = lvalue;
75       } else if (lvalue.isBitField()) {
76         ValueTy = lvalue.getType();
77         ValueSizeInBits = C.getTypeSize(ValueTy);
78         auto &OrigBFI = lvalue.getBitFieldInfo();
79         auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
80         AtomicSizeInBits = C.toBits(
81             C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1)
82                 .alignTo(lvalue.getAlignment()));
83         auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldPointer());
84         auto OffsetInChars =
85             (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) *
86             lvalue.getAlignment();
87         VoidPtrAddr = CGF.Builder.CreateConstGEP1_64(
88             CGF.Int8Ty, VoidPtrAddr, OffsetInChars.getQuantity());
89         llvm::Type *IntTy = CGF.Builder.getIntNTy(AtomicSizeInBits);
90         auto Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
91             VoidPtrAddr, IntTy->getPointerTo(), "atomic_bitfield_base");
92         BFI = OrigBFI;
93         BFI.Offset = Offset;
94         BFI.StorageSize = AtomicSizeInBits;
95         BFI.StorageOffset += OffsetInChars;
96         LVal = LValue::MakeBitfield(Address(Addr, IntTy, lvalue.getAlignment()),
97                                     BFI, lvalue.getType(), lvalue.getBaseInfo(),
98                                     lvalue.getTBAAInfo());
99         AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned);
100         if (AtomicTy.isNull()) {
101           llvm::APInt Size(
102               /*numBits=*/32,
103               C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity());
104           AtomicTy =
105               C.getConstantArrayType(C.CharTy, Size, nullptr, ArrayType::Normal,
106                                      /*IndexTypeQuals=*/0);
107         }
108         AtomicAlign = ValueAlign = lvalue.getAlignment();
109       } else if (lvalue.isVectorElt()) {
110         ValueTy = lvalue.getType()->castAs<VectorType>()->getElementType();
111         ValueSizeInBits = C.getTypeSize(ValueTy);
112         AtomicTy = lvalue.getType();
113         AtomicSizeInBits = C.getTypeSize(AtomicTy);
114         AtomicAlign = ValueAlign = lvalue.getAlignment();
115         LVal = lvalue;
116       } else {
117         assert(lvalue.isExtVectorElt());
118         ValueTy = lvalue.getType();
119         ValueSizeInBits = C.getTypeSize(ValueTy);
120         AtomicTy = ValueTy = CGF.getContext().getExtVectorType(
121             lvalue.getType(), cast<llvm::FixedVectorType>(
122                                   lvalue.getExtVectorAddress().getElementType())
123                                   ->getNumElements());
124         AtomicSizeInBits = C.getTypeSize(AtomicTy);
125         AtomicAlign = ValueAlign = lvalue.getAlignment();
126         LVal = lvalue;
127       }
128       UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
129           AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
130     }
131 
132     QualType getAtomicType() const { return AtomicTy; }
133     QualType getValueType() const { return ValueTy; }
134     CharUnits getAtomicAlignment() const { return AtomicAlign; }
135     uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
136     uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
137     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
138     bool shouldUseLibcall() const { return UseLibcall; }
139     const LValue &getAtomicLValue() const { return LVal; }
140     llvm::Value *getAtomicPointer() const {
141       if (LVal.isSimple())
142         return LVal.getPointer(CGF);
143       else if (LVal.isBitField())
144         return LVal.getBitFieldPointer();
145       else if (LVal.isVectorElt())
146         return LVal.getVectorPointer();
147       assert(LVal.isExtVectorElt());
148       return LVal.getExtVectorPointer();
149     }
150     Address getAtomicAddress() const {
151       llvm::Type *ElTy;
152       if (LVal.isSimple())
153         ElTy = LVal.getAddress(CGF).getElementType();
154       else if (LVal.isBitField())
155         ElTy = LVal.getBitFieldAddress().getElementType();
156       else if (LVal.isVectorElt())
157         ElTy = LVal.getVectorAddress().getElementType();
158       else
159         ElTy = LVal.getExtVectorAddress().getElementType();
160       return Address(getAtomicPointer(), ElTy, getAtomicAlignment());
161     }
162 
163     Address getAtomicAddressAsAtomicIntPointer() const {
164       return emitCastToAtomicIntPointer(getAtomicAddress());
165     }
166 
167     /// Is the atomic size larger than the underlying value type?
168     ///
169     /// Note that the absence of padding does not mean that atomic
170     /// objects are completely interchangeable with non-atomic
171     /// objects: we might have promoted the alignment of a type
172     /// without making it bigger.
173     bool hasPadding() const {
174       return (ValueSizeInBits != AtomicSizeInBits);
175     }
176 
177     bool emitMemSetZeroIfNecessary() const;
178 
179     llvm::Value *getAtomicSizeValue() const {
180       CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
181       return CGF.CGM.getSize(size);
182     }
183 
184     /// Cast the given pointer to an integer pointer suitable for atomic
185     /// operations if the source.
186     Address emitCastToAtomicIntPointer(Address Addr) const;
187 
188     /// If Addr is compatible with the iN that will be used for an atomic
189     /// operation, bitcast it. Otherwise, create a temporary that is suitable
190     /// and copy the value across.
191     Address convertToAtomicIntPointer(Address Addr) const;
192 
193     /// Turn an atomic-layout object into an r-value.
194     RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
195                                      SourceLocation loc, bool AsValue) const;
196 
197     /// Converts a rvalue to integer value.
198     llvm::Value *convertRValueToInt(RValue RVal) const;
199 
200     RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal,
201                                      AggValueSlot ResultSlot,
202                                      SourceLocation Loc, bool AsValue) const;
203 
204     /// Copy an atomic r-value into atomic-layout memory.
205     void emitCopyIntoMemory(RValue rvalue) const;
206 
207     /// Project an l-value down to the value field.
208     LValue projectValue() const {
209       assert(LVal.isSimple());
210       Address addr = getAtomicAddress();
211       if (hasPadding())
212         addr = CGF.Builder.CreateStructGEP(addr, 0);
213 
214       return LValue::MakeAddr(addr, getValueType(), CGF.getContext(),
215                               LVal.getBaseInfo(), LVal.getTBAAInfo());
216     }
217 
218     /// Emits atomic load.
219     /// \returns Loaded value.
220     RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
221                           bool AsValue, llvm::AtomicOrdering AO,
222                           bool IsVolatile);
223 
224     /// Emits atomic compare-and-exchange sequence.
225     /// \param Expected Expected value.
226     /// \param Desired Desired value.
227     /// \param Success Atomic ordering for success operation.
228     /// \param Failure Atomic ordering for failed operation.
229     /// \param IsWeak true if atomic operation is weak, false otherwise.
230     /// \returns Pair of values: previous value from storage (value type) and
231     /// boolean flag (i1 type) with true if success and false otherwise.
232     std::pair<RValue, llvm::Value *>
233     EmitAtomicCompareExchange(RValue Expected, RValue Desired,
234                               llvm::AtomicOrdering Success =
235                                   llvm::AtomicOrdering::SequentiallyConsistent,
236                               llvm::AtomicOrdering Failure =
237                                   llvm::AtomicOrdering::SequentiallyConsistent,
238                               bool IsWeak = false);
239 
240     /// Emits atomic update.
241     /// \param AO Atomic ordering.
242     /// \param UpdateOp Update operation for the current lvalue.
243     void EmitAtomicUpdate(llvm::AtomicOrdering AO,
244                           const llvm::function_ref<RValue(RValue)> &UpdateOp,
245                           bool IsVolatile);
246     /// Emits atomic update.
247     /// \param AO Atomic ordering.
248     void EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
249                           bool IsVolatile);
250 
251     /// Materialize an atomic r-value in atomic-layout memory.
252     Address materializeRValue(RValue rvalue) const;
253 
254     /// Creates temp alloca for intermediate operations on atomic value.
255     Address CreateTempAlloca() const;
256   private:
257     bool requiresMemSetZero(llvm::Type *type) const;
258 
259 
260     /// Emits atomic load as a libcall.
261     void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
262                                llvm::AtomicOrdering AO, bool IsVolatile);
263     /// Emits atomic load as LLVM instruction.
264     llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile);
265     /// Emits atomic compare-and-exchange op as a libcall.
266     llvm::Value *EmitAtomicCompareExchangeLibcall(
267         llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
268         llvm::AtomicOrdering Success =
269             llvm::AtomicOrdering::SequentiallyConsistent,
270         llvm::AtomicOrdering Failure =
271             llvm::AtomicOrdering::SequentiallyConsistent);
272     /// Emits atomic compare-and-exchange op as LLVM instruction.
273     std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
274         llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
275         llvm::AtomicOrdering Success =
276             llvm::AtomicOrdering::SequentiallyConsistent,
277         llvm::AtomicOrdering Failure =
278             llvm::AtomicOrdering::SequentiallyConsistent,
279         bool IsWeak = false);
280     /// Emit atomic update as libcalls.
281     void
282     EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
283                             const llvm::function_ref<RValue(RValue)> &UpdateOp,
284                             bool IsVolatile);
285     /// Emit atomic update as LLVM instructions.
286     void EmitAtomicUpdateOp(llvm::AtomicOrdering AO,
287                             const llvm::function_ref<RValue(RValue)> &UpdateOp,
288                             bool IsVolatile);
289     /// Emit atomic update as libcalls.
290     void EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO, RValue UpdateRVal,
291                                  bool IsVolatile);
292     /// Emit atomic update as LLVM instructions.
293     void EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRal,
294                             bool IsVolatile);
295   };
296 }
297 
298 Address AtomicInfo::CreateTempAlloca() const {
299   Address TempAlloca = CGF.CreateMemTemp(
300       (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy
301                                                                 : AtomicTy,
302       getAtomicAlignment(),
303       "atomic-temp");
304   // Cast to pointer to value type for bitfields.
305   if (LVal.isBitField())
306     return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
307         TempAlloca, getAtomicAddress().getType(),
308         getAtomicAddress().getElementType());
309   return TempAlloca;
310 }
311 
312 static RValue emitAtomicLibcall(CodeGenFunction &CGF,
313                                 StringRef fnName,
314                                 QualType resultType,
315                                 CallArgList &args) {
316   const CGFunctionInfo &fnInfo =
317     CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args);
318   llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
319   llvm::AttrBuilder fnAttrB(CGF.getLLVMContext());
320   fnAttrB.addAttribute(llvm::Attribute::NoUnwind);
321   fnAttrB.addAttribute(llvm::Attribute::WillReturn);
322   llvm::AttributeList fnAttrs = llvm::AttributeList::get(
323       CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, fnAttrB);
324 
325   llvm::FunctionCallee fn =
326       CGF.CGM.CreateRuntimeFunction(fnTy, fnName, fnAttrs);
327   auto callee = CGCallee::forDirect(fn);
328   return CGF.EmitCall(fnInfo, callee, ReturnValueSlot(), args);
329 }
330 
331 /// Does a store of the given IR type modify the full expected width?
332 static bool isFullSizeType(CodeGenModule &CGM, llvm::Type *type,
333                            uint64_t expectedSize) {
334   return (CGM.getDataLayout().getTypeStoreSize(type) * 8 == expectedSize);
335 }
336 
337 /// Does the atomic type require memsetting to zero before initialization?
338 ///
339 /// The IR type is provided as a way of making certain queries faster.
340 bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
341   // If the atomic type has size padding, we definitely need a memset.
342   if (hasPadding()) return true;
343 
344   // Otherwise, do some simple heuristics to try to avoid it:
345   switch (getEvaluationKind()) {
346   // For scalars and complexes, check whether the store size of the
347   // type uses the full size.
348   case TEK_Scalar:
349     return !isFullSizeType(CGF.CGM, type, AtomicSizeInBits);
350   case TEK_Complex:
351     return !isFullSizeType(CGF.CGM, type->getStructElementType(0),
352                            AtomicSizeInBits / 2);
353 
354   // Padding in structs has an undefined bit pattern.  User beware.
355   case TEK_Aggregate:
356     return false;
357   }
358   llvm_unreachable("bad evaluation kind");
359 }
360 
361 bool AtomicInfo::emitMemSetZeroIfNecessary() const {
362   assert(LVal.isSimple());
363   Address addr = LVal.getAddress(CGF);
364   if (!requiresMemSetZero(addr.getElementType()))
365     return false;
366 
367   CGF.Builder.CreateMemSet(
368       addr.getPointer(), llvm::ConstantInt::get(CGF.Int8Ty, 0),
369       CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(),
370       LVal.getAlignment().getAsAlign());
371   return true;
372 }
373 
374 static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
375                               Address Dest, Address Ptr,
376                               Address Val1, Address Val2,
377                               uint64_t Size,
378                               llvm::AtomicOrdering SuccessOrder,
379                               llvm::AtomicOrdering FailureOrder,
380                               llvm::SyncScope::ID Scope) {
381   // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
382   llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
383   llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
384 
385   llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
386       Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder,
387       Scope);
388   Pair->setVolatile(E->isVolatile());
389   Pair->setWeak(IsWeak);
390 
391   // Cmp holds the result of the compare-exchange operation: true on success,
392   // false on failure.
393   llvm::Value *Old = CGF.Builder.CreateExtractValue(Pair, 0);
394   llvm::Value *Cmp = CGF.Builder.CreateExtractValue(Pair, 1);
395 
396   // This basic block is used to hold the store instruction if the operation
397   // failed.
398   llvm::BasicBlock *StoreExpectedBB =
399       CGF.createBasicBlock("cmpxchg.store_expected", CGF.CurFn);
400 
401   // This basic block is the exit point of the operation, we should end up
402   // here regardless of whether or not the operation succeeded.
403   llvm::BasicBlock *ContinueBB =
404       CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
405 
406   // Update Expected if Expected isn't equal to Old, otherwise branch to the
407   // exit point.
408   CGF.Builder.CreateCondBr(Cmp, ContinueBB, StoreExpectedBB);
409 
410   CGF.Builder.SetInsertPoint(StoreExpectedBB);
411   // Update the memory at Expected with Old's value.
412   CGF.Builder.CreateStore(Old, Val1);
413   // Finally, branch to the exit point.
414   CGF.Builder.CreateBr(ContinueBB);
415 
416   CGF.Builder.SetInsertPoint(ContinueBB);
417   // Update the memory at Dest with Cmp's value.
418   CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
419 }
420 
421 /// Given an ordering required on success, emit all possible cmpxchg
422 /// instructions to cope with the provided (but possibly only dynamically known)
423 /// FailureOrder.
424 static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
425                                         bool IsWeak, Address Dest, Address Ptr,
426                                         Address Val1, Address Val2,
427                                         llvm::Value *FailureOrderVal,
428                                         uint64_t Size,
429                                         llvm::AtomicOrdering SuccessOrder,
430                                         llvm::SyncScope::ID Scope) {
431   llvm::AtomicOrdering FailureOrder;
432   if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
433     auto FOS = FO->getSExtValue();
434     if (!llvm::isValidAtomicOrderingCABI(FOS))
435       FailureOrder = llvm::AtomicOrdering::Monotonic;
436     else
437       switch ((llvm::AtomicOrderingCABI)FOS) {
438       case llvm::AtomicOrderingCABI::relaxed:
439       // 31.7.2.18: "The failure argument shall not be memory_order_release
440       // nor memory_order_acq_rel". Fallback to monotonic.
441       case llvm::AtomicOrderingCABI::release:
442       case llvm::AtomicOrderingCABI::acq_rel:
443         FailureOrder = llvm::AtomicOrdering::Monotonic;
444         break;
445       case llvm::AtomicOrderingCABI::consume:
446       case llvm::AtomicOrderingCABI::acquire:
447         FailureOrder = llvm::AtomicOrdering::Acquire;
448         break;
449       case llvm::AtomicOrderingCABI::seq_cst:
450         FailureOrder = llvm::AtomicOrdering::SequentiallyConsistent;
451         break;
452       }
453     // Prior to c++17, "the failure argument shall be no stronger than the
454     // success argument". This condition has been lifted and the only
455     // precondition is 31.7.2.18. Effectively treat this as a DR and skip
456     // language version checks.
457     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
458                       FailureOrder, Scope);
459     return;
460   }
461 
462   // Create all the relevant BB's
463   auto *MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
464   auto *AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
465   auto *SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn);
466   auto *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn);
467 
468   // MonotonicBB is arbitrarily chosen as the default case; in practice, this
469   // doesn't matter unless someone is crazy enough to use something that
470   // doesn't fold to a constant for the ordering.
471   llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(FailureOrderVal, MonotonicBB);
472   // Implemented as acquire, since it's the closest in LLVM.
473   SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
474               AcquireBB);
475   SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
476               AcquireBB);
477   SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
478               SeqCstBB);
479 
480   // Emit all the different atomics
481   CGF.Builder.SetInsertPoint(MonotonicBB);
482   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
483                     Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
484   CGF.Builder.CreateBr(ContBB);
485 
486   CGF.Builder.SetInsertPoint(AcquireBB);
487   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
488                     llvm::AtomicOrdering::Acquire, Scope);
489   CGF.Builder.CreateBr(ContBB);
490 
491   CGF.Builder.SetInsertPoint(SeqCstBB);
492   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
493                     llvm::AtomicOrdering::SequentiallyConsistent, Scope);
494   CGF.Builder.CreateBr(ContBB);
495 
496   CGF.Builder.SetInsertPoint(ContBB);
497 }
498 
499 /// Duplicate the atomic min/max operation in conventional IR for the builtin
500 /// variants that return the new rather than the original value.
501 static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
502                                          AtomicExpr::AtomicOp Op,
503                                          bool IsSigned,
504                                          llvm::Value *OldVal,
505                                          llvm::Value *RHS) {
506   llvm::CmpInst::Predicate Pred;
507   switch (Op) {
508   default:
509     llvm_unreachable("Unexpected min/max operation");
510   case AtomicExpr::AO__atomic_max_fetch:
511     Pred = IsSigned ? llvm::CmpInst::ICMP_SGT : llvm::CmpInst::ICMP_UGT;
512     break;
513   case AtomicExpr::AO__atomic_min_fetch:
514     Pred = IsSigned ? llvm::CmpInst::ICMP_SLT : llvm::CmpInst::ICMP_ULT;
515     break;
516   }
517   llvm::Value *Cmp = Builder.CreateICmp(Pred, OldVal, RHS, "tst");
518   return Builder.CreateSelect(Cmp, OldVal, RHS, "newval");
519 }
520 
521 static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
522                          Address Ptr, Address Val1, Address Val2,
523                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
524                          uint64_t Size, llvm::AtomicOrdering Order,
525                          llvm::SyncScope::ID Scope) {
526   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
527   bool PostOpMinMax = false;
528   unsigned PostOp = 0;
529 
530   switch (E->getOp()) {
531   case AtomicExpr::AO__c11_atomic_init:
532   case AtomicExpr::AO__opencl_atomic_init:
533     llvm_unreachable("Already handled!");
534 
535   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
536   case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
537   case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
538     emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
539                                 FailureOrder, Size, Order, Scope);
540     return;
541   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
542   case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
543   case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
544     emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
545                                 FailureOrder, Size, Order, Scope);
546     return;
547   case AtomicExpr::AO__atomic_compare_exchange:
548   case AtomicExpr::AO__atomic_compare_exchange_n: {
549     if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
550       emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
551                                   Val1, Val2, FailureOrder, Size, Order, Scope);
552     } else {
553       // Create all the relevant BB's
554       llvm::BasicBlock *StrongBB =
555           CGF.createBasicBlock("cmpxchg.strong", CGF.CurFn);
556       llvm::BasicBlock *WeakBB = CGF.createBasicBlock("cmxchg.weak", CGF.CurFn);
557       llvm::BasicBlock *ContBB =
558           CGF.createBasicBlock("cmpxchg.continue", CGF.CurFn);
559 
560       llvm::SwitchInst *SI = CGF.Builder.CreateSwitch(IsWeak, WeakBB);
561       SI->addCase(CGF.Builder.getInt1(false), StrongBB);
562 
563       CGF.Builder.SetInsertPoint(StrongBB);
564       emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
565                                   FailureOrder, Size, Order, Scope);
566       CGF.Builder.CreateBr(ContBB);
567 
568       CGF.Builder.SetInsertPoint(WeakBB);
569       emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
570                                   FailureOrder, Size, Order, Scope);
571       CGF.Builder.CreateBr(ContBB);
572 
573       CGF.Builder.SetInsertPoint(ContBB);
574     }
575     return;
576   }
577   case AtomicExpr::AO__c11_atomic_load:
578   case AtomicExpr::AO__opencl_atomic_load:
579   case AtomicExpr::AO__hip_atomic_load:
580   case AtomicExpr::AO__atomic_load_n:
581   case AtomicExpr::AO__atomic_load: {
582     llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
583     Load->setAtomic(Order, Scope);
584     Load->setVolatile(E->isVolatile());
585     CGF.Builder.CreateStore(Load, Dest);
586     return;
587   }
588 
589   case AtomicExpr::AO__c11_atomic_store:
590   case AtomicExpr::AO__opencl_atomic_store:
591   case AtomicExpr::AO__hip_atomic_store:
592   case AtomicExpr::AO__atomic_store:
593   case AtomicExpr::AO__atomic_store_n: {
594     llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
595     llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
596     Store->setAtomic(Order, Scope);
597     Store->setVolatile(E->isVolatile());
598     return;
599   }
600 
601   case AtomicExpr::AO__c11_atomic_exchange:
602   case AtomicExpr::AO__hip_atomic_exchange:
603   case AtomicExpr::AO__opencl_atomic_exchange:
604   case AtomicExpr::AO__atomic_exchange_n:
605   case AtomicExpr::AO__atomic_exchange:
606     Op = llvm::AtomicRMWInst::Xchg;
607     break;
608 
609   case AtomicExpr::AO__atomic_add_fetch:
610     PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FAdd
611                                                  : llvm::Instruction::Add;
612     [[fallthrough]];
613   case AtomicExpr::AO__c11_atomic_fetch_add:
614   case AtomicExpr::AO__hip_atomic_fetch_add:
615   case AtomicExpr::AO__opencl_atomic_fetch_add:
616   case AtomicExpr::AO__atomic_fetch_add:
617     Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
618                                              : llvm::AtomicRMWInst::Add;
619     break;
620 
621   case AtomicExpr::AO__atomic_sub_fetch:
622     PostOp = E->getValueType()->isFloatingType() ? llvm::Instruction::FSub
623                                                  : llvm::Instruction::Sub;
624     [[fallthrough]];
625   case AtomicExpr::AO__c11_atomic_fetch_sub:
626   case AtomicExpr::AO__opencl_atomic_fetch_sub:
627   case AtomicExpr::AO__atomic_fetch_sub:
628     Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FSub
629                                              : llvm::AtomicRMWInst::Sub;
630     break;
631 
632   case AtomicExpr::AO__atomic_min_fetch:
633     PostOpMinMax = true;
634     [[fallthrough]];
635   case AtomicExpr::AO__c11_atomic_fetch_min:
636   case AtomicExpr::AO__hip_atomic_fetch_min:
637   case AtomicExpr::AO__opencl_atomic_fetch_min:
638   case AtomicExpr::AO__atomic_fetch_min:
639     Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
640                                                   : llvm::AtomicRMWInst::UMin;
641     break;
642 
643   case AtomicExpr::AO__atomic_max_fetch:
644     PostOpMinMax = true;
645     [[fallthrough]];
646   case AtomicExpr::AO__c11_atomic_fetch_max:
647   case AtomicExpr::AO__hip_atomic_fetch_max:
648   case AtomicExpr::AO__opencl_atomic_fetch_max:
649   case AtomicExpr::AO__atomic_fetch_max:
650     Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
651                                                   : llvm::AtomicRMWInst::UMax;
652     break;
653 
654   case AtomicExpr::AO__atomic_and_fetch:
655     PostOp = llvm::Instruction::And;
656     [[fallthrough]];
657   case AtomicExpr::AO__c11_atomic_fetch_and:
658   case AtomicExpr::AO__hip_atomic_fetch_and:
659   case AtomicExpr::AO__opencl_atomic_fetch_and:
660   case AtomicExpr::AO__atomic_fetch_and:
661     Op = llvm::AtomicRMWInst::And;
662     break;
663 
664   case AtomicExpr::AO__atomic_or_fetch:
665     PostOp = llvm::Instruction::Or;
666     [[fallthrough]];
667   case AtomicExpr::AO__c11_atomic_fetch_or:
668   case AtomicExpr::AO__hip_atomic_fetch_or:
669   case AtomicExpr::AO__opencl_atomic_fetch_or:
670   case AtomicExpr::AO__atomic_fetch_or:
671     Op = llvm::AtomicRMWInst::Or;
672     break;
673 
674   case AtomicExpr::AO__atomic_xor_fetch:
675     PostOp = llvm::Instruction::Xor;
676     [[fallthrough]];
677   case AtomicExpr::AO__c11_atomic_fetch_xor:
678   case AtomicExpr::AO__hip_atomic_fetch_xor:
679   case AtomicExpr::AO__opencl_atomic_fetch_xor:
680   case AtomicExpr::AO__atomic_fetch_xor:
681     Op = llvm::AtomicRMWInst::Xor;
682     break;
683 
684   case AtomicExpr::AO__atomic_nand_fetch:
685     PostOp = llvm::Instruction::And; // the NOT is special cased below
686     [[fallthrough]];
687   case AtomicExpr::AO__c11_atomic_fetch_nand:
688   case AtomicExpr::AO__atomic_fetch_nand:
689     Op = llvm::AtomicRMWInst::Nand;
690     break;
691   }
692 
693   llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
694   llvm::AtomicRMWInst *RMWI =
695       CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope);
696   RMWI->setVolatile(E->isVolatile());
697 
698   // For __atomic_*_fetch operations, perform the operation again to
699   // determine the value which was written.
700   llvm::Value *Result = RMWI;
701   if (PostOpMinMax)
702     Result = EmitPostAtomicMinMax(CGF.Builder, E->getOp(),
703                                   E->getValueType()->isSignedIntegerType(),
704                                   RMWI, LoadVal1);
705   else if (PostOp)
706     Result = CGF.Builder.CreateBinOp((llvm::Instruction::BinaryOps)PostOp, RMWI,
707                                      LoadVal1);
708   if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
709     Result = CGF.Builder.CreateNot(Result);
710   CGF.Builder.CreateStore(Result, Dest);
711 }
712 
713 // This function emits any expression (scalar, complex, or aggregate)
714 // into a temporary alloca.
715 static Address
716 EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
717   Address DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp");
718   CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(),
719                        /*Init*/ true);
720   return DeclPtr;
721 }
722 
723 static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
724                          Address Ptr, Address Val1, Address Val2,
725                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
726                          uint64_t Size, llvm::AtomicOrdering Order,
727                          llvm::Value *Scope) {
728   auto ScopeModel = Expr->getScopeModel();
729 
730   // LLVM atomic instructions always have synch scope. If clang atomic
731   // expression has no scope operand, use default LLVM synch scope.
732   if (!ScopeModel) {
733     EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
734                  Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID(""));
735     return;
736   }
737 
738   // Handle constant scope.
739   if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) {
740     auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID(
741         CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()),
742         Order, CGF.CGM.getLLVMContext());
743     EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
744                  Order, SCID);
745     return;
746   }
747 
748   // Handle non-constant scope.
749   auto &Builder = CGF.Builder;
750   auto Scopes = ScopeModel->getRuntimeValues();
751   llvm::DenseMap<unsigned, llvm::BasicBlock *> BB;
752   for (auto S : Scopes)
753     BB[S] = CGF.createBasicBlock(getAsString(ScopeModel->map(S)), CGF.CurFn);
754 
755   llvm::BasicBlock *ContBB =
756       CGF.createBasicBlock("atomic.scope.continue", CGF.CurFn);
757 
758   auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
759   // If unsupported synch scope is encountered at run time, assume a fallback
760   // synch scope value.
761   auto FallBack = ScopeModel->getFallBackValue();
762   llvm::SwitchInst *SI = Builder.CreateSwitch(SC, BB[FallBack]);
763   for (auto S : Scopes) {
764     auto *B = BB[S];
765     if (S != FallBack)
766       SI->addCase(Builder.getInt32(S), B);
767 
768     Builder.SetInsertPoint(B);
769     EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
770                  Order,
771                  CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(),
772                                                          ScopeModel->map(S),
773                                                          Order,
774                                                          CGF.getLLVMContext()));
775     Builder.CreateBr(ContBB);
776   }
777 
778   Builder.SetInsertPoint(ContBB);
779 }
780 
781 static void
782 AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
783                   bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy,
784                   SourceLocation Loc, CharUnits SizeInChars) {
785   if (UseOptimizedLibcall) {
786     // Load value and pass it to the function directly.
787     CharUnits Align = CGF.getContext().getTypeAlignInChars(ValTy);
788     int64_t SizeInBits = CGF.getContext().toBits(SizeInChars);
789     ValTy =
790         CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false);
791     llvm::Type *ITy = llvm::IntegerType::get(CGF.getLLVMContext(), SizeInBits);
792     Address Ptr = Address(CGF.Builder.CreateBitCast(Val, ITy->getPointerTo()),
793                           ITy, Align);
794     Val = CGF.EmitLoadOfScalar(Ptr, false,
795                                CGF.getContext().getPointerType(ValTy),
796                                Loc);
797     // Coerce the value into an appropriately sized integer type.
798     Args.add(RValue::get(Val), ValTy);
799   } else {
800     // Non-optimized functions always take a reference.
801     Args.add(RValue::get(CGF.EmitCastToVoidPtr(Val)),
802                          CGF.getContext().VoidPtrTy);
803   }
804 }
805 
806 RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
807   QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
808   QualType MemTy = AtomicTy;
809   if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
810     MemTy = AT->getValueType();
811   llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;
812 
813   Address Val1 = Address::invalid();
814   Address Val2 = Address::invalid();
815   Address Dest = Address::invalid();
816   Address Ptr = EmitPointerWithAlignment(E->getPtr());
817 
818   if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
819       E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
820     LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
821     EmitAtomicInit(E->getVal1(), lvalue);
822     return RValue::get(nullptr);
823   }
824 
825   auto TInfo = getContext().getTypeInfoInChars(AtomicTy);
826   uint64_t Size = TInfo.Width.getQuantity();
827   unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
828 
829   bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
830   bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
831   bool UseLibcall = Misaligned | Oversized;
832   bool ShouldCastToIntPtrTy = true;
833 
834   CharUnits MaxInlineWidth =
835       getContext().toCharUnitsFromBits(MaxInlineWidthInBits);
836 
837   DiagnosticsEngine &Diags = CGM.getDiags();
838 
839   if (Misaligned) {
840     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
841         << (int)TInfo.Width.getQuantity()
842         << (int)Ptr.getAlignment().getQuantity();
843   }
844 
845   if (Oversized) {
846     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_oversized)
847         << (int)TInfo.Width.getQuantity() << (int)MaxInlineWidth.getQuantity();
848   }
849 
850   llvm::Value *Order = EmitScalarExpr(E->getOrder());
851   llvm::Value *Scope =
852       E->getScopeModel() ? EmitScalarExpr(E->getScope()) : nullptr;
853 
854   switch (E->getOp()) {
855   case AtomicExpr::AO__c11_atomic_init:
856   case AtomicExpr::AO__opencl_atomic_init:
857     llvm_unreachable("Already handled above with EmitAtomicInit!");
858 
859   case AtomicExpr::AO__c11_atomic_load:
860   case AtomicExpr::AO__opencl_atomic_load:
861   case AtomicExpr::AO__hip_atomic_load:
862   case AtomicExpr::AO__atomic_load_n:
863     break;
864 
865   case AtomicExpr::AO__atomic_load:
866     Dest = EmitPointerWithAlignment(E->getVal1());
867     break;
868 
869   case AtomicExpr::AO__atomic_store:
870     Val1 = EmitPointerWithAlignment(E->getVal1());
871     break;
872 
873   case AtomicExpr::AO__atomic_exchange:
874     Val1 = EmitPointerWithAlignment(E->getVal1());
875     Dest = EmitPointerWithAlignment(E->getVal2());
876     break;
877 
878   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
879   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
880   case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
881   case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
882   case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
883   case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
884   case AtomicExpr::AO__atomic_compare_exchange_n:
885   case AtomicExpr::AO__atomic_compare_exchange:
886     Val1 = EmitPointerWithAlignment(E->getVal1());
887     if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
888       Val2 = EmitPointerWithAlignment(E->getVal2());
889     else
890       Val2 = EmitValToTemp(*this, E->getVal2());
891     OrderFail = EmitScalarExpr(E->getOrderFail());
892     if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange_n ||
893         E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
894       IsWeak = EmitScalarExpr(E->getWeak());
895     break;
896 
897   case AtomicExpr::AO__c11_atomic_fetch_add:
898   case AtomicExpr::AO__c11_atomic_fetch_sub:
899   case AtomicExpr::AO__hip_atomic_fetch_add:
900   case AtomicExpr::AO__opencl_atomic_fetch_add:
901   case AtomicExpr::AO__opencl_atomic_fetch_sub:
902     if (MemTy->isPointerType()) {
903       // For pointer arithmetic, we're required to do a bit of math:
904       // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
905       // ... but only for the C11 builtins. The GNU builtins expect the
906       // user to multiply by sizeof(T).
907       QualType Val1Ty = E->getVal1()->getType();
908       llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
909       CharUnits PointeeIncAmt =
910           getContext().getTypeSizeInChars(MemTy->getPointeeType());
911       Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
912       auto Temp = CreateMemTemp(Val1Ty, ".atomictmp");
913       Val1 = Temp;
914       EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty));
915       break;
916     }
917     [[fallthrough]];
918   case AtomicExpr::AO__atomic_fetch_add:
919   case AtomicExpr::AO__atomic_fetch_sub:
920   case AtomicExpr::AO__atomic_add_fetch:
921   case AtomicExpr::AO__atomic_sub_fetch:
922     ShouldCastToIntPtrTy = !MemTy->isFloatingType();
923     [[fallthrough]];
924 
925   case AtomicExpr::AO__c11_atomic_store:
926   case AtomicExpr::AO__c11_atomic_exchange:
927   case AtomicExpr::AO__opencl_atomic_store:
928   case AtomicExpr::AO__hip_atomic_store:
929   case AtomicExpr::AO__opencl_atomic_exchange:
930   case AtomicExpr::AO__hip_atomic_exchange:
931   case AtomicExpr::AO__atomic_store_n:
932   case AtomicExpr::AO__atomic_exchange_n:
933   case AtomicExpr::AO__c11_atomic_fetch_and:
934   case AtomicExpr::AO__c11_atomic_fetch_or:
935   case AtomicExpr::AO__c11_atomic_fetch_xor:
936   case AtomicExpr::AO__c11_atomic_fetch_nand:
937   case AtomicExpr::AO__c11_atomic_fetch_max:
938   case AtomicExpr::AO__c11_atomic_fetch_min:
939   case AtomicExpr::AO__opencl_atomic_fetch_and:
940   case AtomicExpr::AO__opencl_atomic_fetch_or:
941   case AtomicExpr::AO__opencl_atomic_fetch_xor:
942   case AtomicExpr::AO__opencl_atomic_fetch_min:
943   case AtomicExpr::AO__opencl_atomic_fetch_max:
944   case AtomicExpr::AO__atomic_fetch_and:
945   case AtomicExpr::AO__hip_atomic_fetch_and:
946   case AtomicExpr::AO__atomic_fetch_or:
947   case AtomicExpr::AO__hip_atomic_fetch_or:
948   case AtomicExpr::AO__atomic_fetch_xor:
949   case AtomicExpr::AO__hip_atomic_fetch_xor:
950   case AtomicExpr::AO__atomic_fetch_nand:
951   case AtomicExpr::AO__atomic_and_fetch:
952   case AtomicExpr::AO__atomic_or_fetch:
953   case AtomicExpr::AO__atomic_xor_fetch:
954   case AtomicExpr::AO__atomic_nand_fetch:
955   case AtomicExpr::AO__atomic_max_fetch:
956   case AtomicExpr::AO__atomic_min_fetch:
957   case AtomicExpr::AO__atomic_fetch_max:
958   case AtomicExpr::AO__hip_atomic_fetch_max:
959   case AtomicExpr::AO__atomic_fetch_min:
960   case AtomicExpr::AO__hip_atomic_fetch_min:
961     Val1 = EmitValToTemp(*this, E->getVal1());
962     break;
963   }
964 
965   QualType RValTy = E->getType().getUnqualifiedType();
966 
967   // The inlined atomics only function on iN types, where N is a power of 2. We
968   // need to make sure (via temporaries if necessary) that all incoming values
969   // are compatible.
970   LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
971   AtomicInfo Atomics(*this, AtomicVal);
972 
973   if (ShouldCastToIntPtrTy) {
974     Ptr = Atomics.emitCastToAtomicIntPointer(Ptr);
975     if (Val1.isValid())
976       Val1 = Atomics.convertToAtomicIntPointer(Val1);
977     if (Val2.isValid())
978       Val2 = Atomics.convertToAtomicIntPointer(Val2);
979   }
980   if (Dest.isValid()) {
981     if (ShouldCastToIntPtrTy)
982       Dest = Atomics.emitCastToAtomicIntPointer(Dest);
983   } else if (E->isCmpXChg())
984     Dest = CreateMemTemp(RValTy, "cmpxchg.bool");
985   else if (!RValTy->isVoidType()) {
986     Dest = Atomics.CreateTempAlloca();
987     if (ShouldCastToIntPtrTy)
988       Dest = Atomics.emitCastToAtomicIntPointer(Dest);
989   }
990 
991   // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
992   if (UseLibcall) {
993     bool UseOptimizedLibcall = false;
994     switch (E->getOp()) {
995     case AtomicExpr::AO__c11_atomic_init:
996     case AtomicExpr::AO__opencl_atomic_init:
997       llvm_unreachable("Already handled above with EmitAtomicInit!");
998 
999     case AtomicExpr::AO__c11_atomic_fetch_add:
1000     case AtomicExpr::AO__opencl_atomic_fetch_add:
1001     case AtomicExpr::AO__atomic_fetch_add:
1002     case AtomicExpr::AO__hip_atomic_fetch_add:
1003     case AtomicExpr::AO__c11_atomic_fetch_and:
1004     case AtomicExpr::AO__opencl_atomic_fetch_and:
1005     case AtomicExpr::AO__hip_atomic_fetch_and:
1006     case AtomicExpr::AO__atomic_fetch_and:
1007     case AtomicExpr::AO__c11_atomic_fetch_or:
1008     case AtomicExpr::AO__opencl_atomic_fetch_or:
1009     case AtomicExpr::AO__hip_atomic_fetch_or:
1010     case AtomicExpr::AO__atomic_fetch_or:
1011     case AtomicExpr::AO__c11_atomic_fetch_nand:
1012     case AtomicExpr::AO__atomic_fetch_nand:
1013     case AtomicExpr::AO__c11_atomic_fetch_sub:
1014     case AtomicExpr::AO__opencl_atomic_fetch_sub:
1015     case AtomicExpr::AO__atomic_fetch_sub:
1016     case AtomicExpr::AO__c11_atomic_fetch_xor:
1017     case AtomicExpr::AO__opencl_atomic_fetch_xor:
1018     case AtomicExpr::AO__opencl_atomic_fetch_min:
1019     case AtomicExpr::AO__opencl_atomic_fetch_max:
1020     case AtomicExpr::AO__atomic_fetch_xor:
1021     case AtomicExpr::AO__hip_atomic_fetch_xor:
1022     case AtomicExpr::AO__c11_atomic_fetch_max:
1023     case AtomicExpr::AO__c11_atomic_fetch_min:
1024     case AtomicExpr::AO__atomic_add_fetch:
1025     case AtomicExpr::AO__atomic_and_fetch:
1026     case AtomicExpr::AO__atomic_nand_fetch:
1027     case AtomicExpr::AO__atomic_or_fetch:
1028     case AtomicExpr::AO__atomic_sub_fetch:
1029     case AtomicExpr::AO__atomic_xor_fetch:
1030     case AtomicExpr::AO__atomic_fetch_max:
1031     case AtomicExpr::AO__hip_atomic_fetch_max:
1032     case AtomicExpr::AO__atomic_fetch_min:
1033     case AtomicExpr::AO__hip_atomic_fetch_min:
1034     case AtomicExpr::AO__atomic_max_fetch:
1035     case AtomicExpr::AO__atomic_min_fetch:
1036       // For these, only library calls for certain sizes exist.
1037       UseOptimizedLibcall = true;
1038       break;
1039 
1040     case AtomicExpr::AO__atomic_load:
1041     case AtomicExpr::AO__atomic_store:
1042     case AtomicExpr::AO__atomic_exchange:
1043     case AtomicExpr::AO__atomic_compare_exchange:
1044       // Use the generic version if we don't know that the operand will be
1045       // suitably aligned for the optimized version.
1046       if (Misaligned)
1047         break;
1048       [[fallthrough]];
1049     case AtomicExpr::AO__c11_atomic_load:
1050     case AtomicExpr::AO__c11_atomic_store:
1051     case AtomicExpr::AO__c11_atomic_exchange:
1052     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
1053     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
1054     case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
1055     case AtomicExpr::AO__opencl_atomic_load:
1056     case AtomicExpr::AO__hip_atomic_load:
1057     case AtomicExpr::AO__opencl_atomic_store:
1058     case AtomicExpr::AO__hip_atomic_store:
1059     case AtomicExpr::AO__opencl_atomic_exchange:
1060     case AtomicExpr::AO__hip_atomic_exchange:
1061     case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
1062     case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
1063     case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
1064     case AtomicExpr::AO__atomic_load_n:
1065     case AtomicExpr::AO__atomic_store_n:
1066     case AtomicExpr::AO__atomic_exchange_n:
1067     case AtomicExpr::AO__atomic_compare_exchange_n:
1068       // Only use optimized library calls for sizes for which they exist.
1069       // FIXME: Size == 16 optimized library functions exist too.
1070       if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
1071         UseOptimizedLibcall = true;
1072       break;
1073     }
1074 
1075     CallArgList Args;
1076     if (!UseOptimizedLibcall) {
1077       // For non-optimized library calls, the size is the first parameter
1078       Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
1079                getContext().getSizeType());
1080     }
1081     // Atomic address is the first or second parameter
1082     // The OpenCL atomic library functions only accept pointer arguments to
1083     // generic address space.
1084     auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) {
1085       if (!E->isOpenCL())
1086         return V;
1087       auto AS = PT->castAs<PointerType>()->getPointeeType().getAddressSpace();
1088       if (AS == LangAS::opencl_generic)
1089         return V;
1090       auto DestAS = getContext().getTargetAddressSpace(LangAS::opencl_generic);
1091       auto T = llvm::cast<llvm::PointerType>(V->getType());
1092       auto *DestType = llvm::PointerType::getWithSamePointeeType(T, DestAS);
1093 
1094       return getTargetHooks().performAddrSpaceCast(
1095           *this, V, AS, LangAS::opencl_generic, DestType, false);
1096     };
1097 
1098     Args.add(RValue::get(CastToGenericAddrSpace(
1099                  EmitCastToVoidPtr(Ptr.getPointer()), E->getPtr()->getType())),
1100              getContext().VoidPtrTy);
1101 
1102     std::string LibCallName;
1103     QualType LoweredMemTy =
1104       MemTy->isPointerType() ? getContext().getIntPtrType() : MemTy;
1105     QualType RetTy;
1106     bool HaveRetTy = false;
1107     llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
1108     bool PostOpMinMax = false;
1109     switch (E->getOp()) {
1110     case AtomicExpr::AO__c11_atomic_init:
1111     case AtomicExpr::AO__opencl_atomic_init:
1112       llvm_unreachable("Already handled!");
1113 
1114     // There is only one libcall for compare an exchange, because there is no
1115     // optimisation benefit possible from a libcall version of a weak compare
1116     // and exchange.
1117     // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
1118     //                                void *desired, int success, int failure)
1119     // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
1120     //                                  int success, int failure)
1121     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
1122     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
1123     case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
1124     case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
1125     case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
1126     case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
1127     case AtomicExpr::AO__atomic_compare_exchange:
1128     case AtomicExpr::AO__atomic_compare_exchange_n:
1129       LibCallName = "__atomic_compare_exchange";
1130       RetTy = getContext().BoolTy;
1131       HaveRetTy = true;
1132       Args.add(
1133           RValue::get(CastToGenericAddrSpace(
1134               EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())),
1135           getContext().VoidPtrTy);
1136       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
1137                         MemTy, E->getExprLoc(), TInfo.Width);
1138       Args.add(RValue::get(Order), getContext().IntTy);
1139       Order = OrderFail;
1140       break;
1141     // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
1142     //                        int order)
1143     // T __atomic_exchange_N(T *mem, T val, int order)
1144     case AtomicExpr::AO__c11_atomic_exchange:
1145     case AtomicExpr::AO__opencl_atomic_exchange:
1146     case AtomicExpr::AO__atomic_exchange_n:
1147     case AtomicExpr::AO__atomic_exchange:
1148     case AtomicExpr::AO__hip_atomic_exchange:
1149       LibCallName = "__atomic_exchange";
1150       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1151                         MemTy, E->getExprLoc(), TInfo.Width);
1152       break;
1153     // void __atomic_store(size_t size, void *mem, void *val, int order)
1154     // void __atomic_store_N(T *mem, T val, int order)
1155     case AtomicExpr::AO__c11_atomic_store:
1156     case AtomicExpr::AO__opencl_atomic_store:
1157     case AtomicExpr::AO__hip_atomic_store:
1158     case AtomicExpr::AO__atomic_store:
1159     case AtomicExpr::AO__atomic_store_n:
1160       LibCallName = "__atomic_store";
1161       RetTy = getContext().VoidTy;
1162       HaveRetTy = true;
1163       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1164                         MemTy, E->getExprLoc(), TInfo.Width);
1165       break;
1166     // void __atomic_load(size_t size, void *mem, void *return, int order)
1167     // T __atomic_load_N(T *mem, int order)
1168     case AtomicExpr::AO__c11_atomic_load:
1169     case AtomicExpr::AO__opencl_atomic_load:
1170     case AtomicExpr::AO__hip_atomic_load:
1171     case AtomicExpr::AO__atomic_load:
1172     case AtomicExpr::AO__atomic_load_n:
1173       LibCallName = "__atomic_load";
1174       break;
1175     // T __atomic_add_fetch_N(T *mem, T val, int order)
1176     // T __atomic_fetch_add_N(T *mem, T val, int order)
1177     case AtomicExpr::AO__atomic_add_fetch:
1178       PostOp = llvm::Instruction::Add;
1179       [[fallthrough]];
1180     case AtomicExpr::AO__c11_atomic_fetch_add:
1181     case AtomicExpr::AO__opencl_atomic_fetch_add:
1182     case AtomicExpr::AO__atomic_fetch_add:
1183     case AtomicExpr::AO__hip_atomic_fetch_add:
1184       LibCallName = "__atomic_fetch_add";
1185       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1186                         LoweredMemTy, E->getExprLoc(), TInfo.Width);
1187       break;
1188     // T __atomic_and_fetch_N(T *mem, T val, int order)
1189     // T __atomic_fetch_and_N(T *mem, T val, int order)
1190     case AtomicExpr::AO__atomic_and_fetch:
1191       PostOp = llvm::Instruction::And;
1192       [[fallthrough]];
1193     case AtomicExpr::AO__c11_atomic_fetch_and:
1194     case AtomicExpr::AO__opencl_atomic_fetch_and:
1195     case AtomicExpr::AO__hip_atomic_fetch_and:
1196     case AtomicExpr::AO__atomic_fetch_and:
1197       LibCallName = "__atomic_fetch_and";
1198       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1199                         MemTy, E->getExprLoc(), TInfo.Width);
1200       break;
1201     // T __atomic_or_fetch_N(T *mem, T val, int order)
1202     // T __atomic_fetch_or_N(T *mem, T val, int order)
1203     case AtomicExpr::AO__atomic_or_fetch:
1204       PostOp = llvm::Instruction::Or;
1205       [[fallthrough]];
1206     case AtomicExpr::AO__c11_atomic_fetch_or:
1207     case AtomicExpr::AO__opencl_atomic_fetch_or:
1208     case AtomicExpr::AO__hip_atomic_fetch_or:
1209     case AtomicExpr::AO__atomic_fetch_or:
1210       LibCallName = "__atomic_fetch_or";
1211       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1212                         MemTy, E->getExprLoc(), TInfo.Width);
1213       break;
1214     // T __atomic_sub_fetch_N(T *mem, T val, int order)
1215     // T __atomic_fetch_sub_N(T *mem, T val, int order)
1216     case AtomicExpr::AO__atomic_sub_fetch:
1217       PostOp = llvm::Instruction::Sub;
1218       [[fallthrough]];
1219     case AtomicExpr::AO__c11_atomic_fetch_sub:
1220     case AtomicExpr::AO__opencl_atomic_fetch_sub:
1221     case AtomicExpr::AO__atomic_fetch_sub:
1222       LibCallName = "__atomic_fetch_sub";
1223       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1224                         LoweredMemTy, E->getExprLoc(), TInfo.Width);
1225       break;
1226     // T __atomic_xor_fetch_N(T *mem, T val, int order)
1227     // T __atomic_fetch_xor_N(T *mem, T val, int order)
1228     case AtomicExpr::AO__atomic_xor_fetch:
1229       PostOp = llvm::Instruction::Xor;
1230       [[fallthrough]];
1231     case AtomicExpr::AO__c11_atomic_fetch_xor:
1232     case AtomicExpr::AO__opencl_atomic_fetch_xor:
1233     case AtomicExpr::AO__hip_atomic_fetch_xor:
1234     case AtomicExpr::AO__atomic_fetch_xor:
1235       LibCallName = "__atomic_fetch_xor";
1236       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1237                         MemTy, E->getExprLoc(), TInfo.Width);
1238       break;
1239     case AtomicExpr::AO__atomic_min_fetch:
1240       PostOpMinMax = true;
1241       [[fallthrough]];
1242     case AtomicExpr::AO__c11_atomic_fetch_min:
1243     case AtomicExpr::AO__atomic_fetch_min:
1244     case AtomicExpr::AO__hip_atomic_fetch_min:
1245     case AtomicExpr::AO__opencl_atomic_fetch_min:
1246       LibCallName = E->getValueType()->isSignedIntegerType()
1247                         ? "__atomic_fetch_min"
1248                         : "__atomic_fetch_umin";
1249       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1250                         LoweredMemTy, E->getExprLoc(), TInfo.Width);
1251       break;
1252     case AtomicExpr::AO__atomic_max_fetch:
1253       PostOpMinMax = true;
1254       [[fallthrough]];
1255     case AtomicExpr::AO__c11_atomic_fetch_max:
1256     case AtomicExpr::AO__atomic_fetch_max:
1257     case AtomicExpr::AO__hip_atomic_fetch_max:
1258     case AtomicExpr::AO__opencl_atomic_fetch_max:
1259       LibCallName = E->getValueType()->isSignedIntegerType()
1260                         ? "__atomic_fetch_max"
1261                         : "__atomic_fetch_umax";
1262       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1263                         LoweredMemTy, E->getExprLoc(), TInfo.Width);
1264       break;
1265     // T __atomic_nand_fetch_N(T *mem, T val, int order)
1266     // T __atomic_fetch_nand_N(T *mem, T val, int order)
1267     case AtomicExpr::AO__atomic_nand_fetch:
1268       PostOp = llvm::Instruction::And; // the NOT is special cased below
1269       [[fallthrough]];
1270     case AtomicExpr::AO__c11_atomic_fetch_nand:
1271     case AtomicExpr::AO__atomic_fetch_nand:
1272       LibCallName = "__atomic_fetch_nand";
1273       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
1274                         MemTy, E->getExprLoc(), TInfo.Width);
1275       break;
1276     }
1277 
1278     if (E->isOpenCL()) {
1279       LibCallName = std::string("__opencl") +
1280           StringRef(LibCallName).drop_front(1).str();
1281 
1282     }
1283     // Optimized functions have the size in their name.
1284     if (UseOptimizedLibcall)
1285       LibCallName += "_" + llvm::utostr(Size);
1286     // By default, assume we return a value of the atomic type.
1287     if (!HaveRetTy) {
1288       if (UseOptimizedLibcall) {
1289         // Value is returned directly.
1290         // The function returns an appropriately sized integer type.
1291         RetTy = getContext().getIntTypeForBitwidth(
1292             getContext().toBits(TInfo.Width), /*Signed=*/false);
1293       } else {
1294         // Value is returned through parameter before the order.
1295         RetTy = getContext().VoidTy;
1296         Args.add(RValue::get(EmitCastToVoidPtr(Dest.getPointer())),
1297                  getContext().VoidPtrTy);
1298       }
1299     }
1300     // order is always the last parameter
1301     Args.add(RValue::get(Order),
1302              getContext().IntTy);
1303     if (E->isOpenCL())
1304       Args.add(RValue::get(Scope), getContext().IntTy);
1305 
1306     // PostOp is only needed for the atomic_*_fetch operations, and
1307     // thus is only needed for and implemented in the
1308     // UseOptimizedLibcall codepath.
1309     assert(UseOptimizedLibcall || (!PostOp && !PostOpMinMax));
1310 
1311     RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
1312     // The value is returned directly from the libcall.
1313     if (E->isCmpXChg())
1314       return Res;
1315 
1316     // The value is returned directly for optimized libcalls but the expr
1317     // provided an out-param.
1318     if (UseOptimizedLibcall && Res.getScalarVal()) {
1319       llvm::Value *ResVal = Res.getScalarVal();
1320       if (PostOpMinMax) {
1321         llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
1322         ResVal = EmitPostAtomicMinMax(Builder, E->getOp(),
1323                                       E->getValueType()->isSignedIntegerType(),
1324                                       ResVal, LoadVal1);
1325       } else if (PostOp) {
1326         llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
1327         ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
1328       }
1329       if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
1330         ResVal = Builder.CreateNot(ResVal);
1331 
1332       Builder.CreateStore(
1333           ResVal, Builder.CreateElementBitCast(Dest, ResVal->getType()));
1334     }
1335 
1336     if (RValTy->isVoidType())
1337       return RValue::get(nullptr);
1338 
1339     return convertTempToRValue(
1340         Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)),
1341         RValTy, E->getExprLoc());
1342   }
1343 
1344   bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
1345                  E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
1346                  E->getOp() == AtomicExpr::AO__hip_atomic_store ||
1347                  E->getOp() == AtomicExpr::AO__atomic_store ||
1348                  E->getOp() == AtomicExpr::AO__atomic_store_n;
1349   bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
1350                 E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
1351                 E->getOp() == AtomicExpr::AO__hip_atomic_load ||
1352                 E->getOp() == AtomicExpr::AO__atomic_load ||
1353                 E->getOp() == AtomicExpr::AO__atomic_load_n;
1354 
1355   if (isa<llvm::ConstantInt>(Order)) {
1356     auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1357     // We should not ever get to a case where the ordering isn't a valid C ABI
1358     // value, but it's hard to enforce that in general.
1359     if (llvm::isValidAtomicOrderingCABI(ord))
1360       switch ((llvm::AtomicOrderingCABI)ord) {
1361       case llvm::AtomicOrderingCABI::relaxed:
1362         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1363                      llvm::AtomicOrdering::Monotonic, Scope);
1364         break;
1365       case llvm::AtomicOrderingCABI::consume:
1366       case llvm::AtomicOrderingCABI::acquire:
1367         if (IsStore)
1368           break; // Avoid crashing on code with undefined behavior
1369         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1370                      llvm::AtomicOrdering::Acquire, Scope);
1371         break;
1372       case llvm::AtomicOrderingCABI::release:
1373         if (IsLoad)
1374           break; // Avoid crashing on code with undefined behavior
1375         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1376                      llvm::AtomicOrdering::Release, Scope);
1377         break;
1378       case llvm::AtomicOrderingCABI::acq_rel:
1379         if (IsLoad || IsStore)
1380           break; // Avoid crashing on code with undefined behavior
1381         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1382                      llvm::AtomicOrdering::AcquireRelease, Scope);
1383         break;
1384       case llvm::AtomicOrderingCABI::seq_cst:
1385         EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1386                      llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1387         break;
1388       }
1389     if (RValTy->isVoidType())
1390       return RValue::get(nullptr);
1391 
1392     return convertTempToRValue(
1393         Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)),
1394         RValTy, E->getExprLoc());
1395   }
1396 
1397   // Long case, when Order isn't obviously constant.
1398 
1399   // Create all the relevant BB's
1400   llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
1401                    *ReleaseBB = nullptr, *AcqRelBB = nullptr,
1402                    *SeqCstBB = nullptr;
1403   MonotonicBB = createBasicBlock("monotonic", CurFn);
1404   if (!IsStore)
1405     AcquireBB = createBasicBlock("acquire", CurFn);
1406   if (!IsLoad)
1407     ReleaseBB = createBasicBlock("release", CurFn);
1408   if (!IsLoad && !IsStore)
1409     AcqRelBB = createBasicBlock("acqrel", CurFn);
1410   SeqCstBB = createBasicBlock("seqcst", CurFn);
1411   llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1412 
1413   // Create the switch for the split
1414   // MonotonicBB is arbitrarily chosen as the default case; in practice, this
1415   // doesn't matter unless someone is crazy enough to use something that
1416   // doesn't fold to a constant for the ordering.
1417   Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1418   llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
1419 
1420   // Emit all the different atomics
1421   Builder.SetInsertPoint(MonotonicBB);
1422   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1423                llvm::AtomicOrdering::Monotonic, Scope);
1424   Builder.CreateBr(ContBB);
1425   if (!IsStore) {
1426     Builder.SetInsertPoint(AcquireBB);
1427     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1428                  llvm::AtomicOrdering::Acquire, Scope);
1429     Builder.CreateBr(ContBB);
1430     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
1431                 AcquireBB);
1432     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
1433                 AcquireBB);
1434   }
1435   if (!IsLoad) {
1436     Builder.SetInsertPoint(ReleaseBB);
1437     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1438                  llvm::AtomicOrdering::Release, Scope);
1439     Builder.CreateBr(ContBB);
1440     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
1441                 ReleaseBB);
1442   }
1443   if (!IsLoad && !IsStore) {
1444     Builder.SetInsertPoint(AcqRelBB);
1445     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1446                  llvm::AtomicOrdering::AcquireRelease, Scope);
1447     Builder.CreateBr(ContBB);
1448     SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
1449                 AcqRelBB);
1450   }
1451   Builder.SetInsertPoint(SeqCstBB);
1452   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
1453                llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1454   Builder.CreateBr(ContBB);
1455   SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
1456               SeqCstBB);
1457 
1458   // Cleanup and return
1459   Builder.SetInsertPoint(ContBB);
1460   if (RValTy->isVoidType())
1461     return RValue::get(nullptr);
1462 
1463   assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits());
1464   return convertTempToRValue(
1465       Builder.CreateElementBitCast(Dest, ConvertTypeForMem(RValTy)),
1466       RValTy, E->getExprLoc());
1467 }
1468 
1469 Address AtomicInfo::emitCastToAtomicIntPointer(Address addr) const {
1470   llvm::IntegerType *ty =
1471     llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits);
1472   return CGF.Builder.CreateElementBitCast(addr, ty);
1473 }
1474 
1475 Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const {
1476   llvm::Type *Ty = Addr.getElementType();
1477   uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty);
1478   if (SourceSizeInBits != AtomicSizeInBits) {
1479     Address Tmp = CreateTempAlloca();
1480     CGF.Builder.CreateMemCpy(Tmp, Addr,
1481                              std::min(AtomicSizeInBits, SourceSizeInBits) / 8);
1482     Addr = Tmp;
1483   }
1484 
1485   return emitCastToAtomicIntPointer(Addr);
1486 }
1487 
1488 RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
1489                                              AggValueSlot resultSlot,
1490                                              SourceLocation loc,
1491                                              bool asValue) const {
1492   if (LVal.isSimple()) {
1493     if (EvaluationKind == TEK_Aggregate)
1494       return resultSlot.asRValue();
1495 
1496     // Drill into the padding structure if we have one.
1497     if (hasPadding())
1498       addr = CGF.Builder.CreateStructGEP(addr, 0);
1499 
1500     // Otherwise, just convert the temporary to an r-value using the
1501     // normal conversion routine.
1502     return CGF.convertTempToRValue(addr, getValueType(), loc);
1503   }
1504   if (!asValue)
1505     // Get RValue from temp memory as atomic for non-simple lvalues
1506     return RValue::get(CGF.Builder.CreateLoad(addr));
1507   if (LVal.isBitField())
1508     return CGF.EmitLoadOfBitfieldLValue(
1509         LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(),
1510                              LVal.getBaseInfo(), TBAAAccessInfo()), loc);
1511   if (LVal.isVectorElt())
1512     return CGF.EmitLoadOfLValue(
1513         LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(),
1514                               LVal.getBaseInfo(), TBAAAccessInfo()), loc);
1515   assert(LVal.isExtVectorElt());
1516   return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
1517       addr, LVal.getExtVectorElts(), LVal.getType(),
1518       LVal.getBaseInfo(), TBAAAccessInfo()));
1519 }
1520 
1521 RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
1522                                              AggValueSlot ResultSlot,
1523                                              SourceLocation Loc,
1524                                              bool AsValue) const {
1525   // Try not to in some easy cases.
1526   assert(IntVal->getType()->isIntegerTy() && "Expected integer value");
1527   if (getEvaluationKind() == TEK_Scalar &&
1528       (((!LVal.isBitField() ||
1529          LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
1530         !hasPadding()) ||
1531        !AsValue)) {
1532     auto *ValTy = AsValue
1533                       ? CGF.ConvertTypeForMem(ValueTy)
1534                       : getAtomicAddress().getElementType();
1535     if (ValTy->isIntegerTy()) {
1536       assert(IntVal->getType() == ValTy && "Different integer types.");
1537       return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
1538     } else if (ValTy->isPointerTy())
1539       return RValue::get(CGF.Builder.CreateIntToPtr(IntVal, ValTy));
1540     else if (llvm::CastInst::isBitCastable(IntVal->getType(), ValTy))
1541       return RValue::get(CGF.Builder.CreateBitCast(IntVal, ValTy));
1542   }
1543 
1544   // Create a temporary.  This needs to be big enough to hold the
1545   // atomic integer.
1546   Address Temp = Address::invalid();
1547   bool TempIsVolatile = false;
1548   if (AsValue && getEvaluationKind() == TEK_Aggregate) {
1549     assert(!ResultSlot.isIgnored());
1550     Temp = ResultSlot.getAddress();
1551     TempIsVolatile = ResultSlot.isVolatile();
1552   } else {
1553     Temp = CreateTempAlloca();
1554   }
1555 
1556   // Slam the integer into the temporary.
1557   Address CastTemp = emitCastToAtomicIntPointer(Temp);
1558   CGF.Builder.CreateStore(IntVal, CastTemp)
1559       ->setVolatile(TempIsVolatile);
1560 
1561   return convertAtomicTempToRValue(Temp, ResultSlot, Loc, AsValue);
1562 }
1563 
1564 void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
1565                                        llvm::AtomicOrdering AO, bool) {
1566   // void __atomic_load(size_t size, void *mem, void *return, int order);
1567   CallArgList Args;
1568   Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
1569   Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicPointer())),
1570            CGF.getContext().VoidPtrTy);
1571   Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)),
1572            CGF.getContext().VoidPtrTy);
1573   Args.add(
1574       RValue::get(llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(AO))),
1575       CGF.getContext().IntTy);
1576   emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args);
1577 }
1578 
1579 llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
1580                                           bool IsVolatile) {
1581   // Okay, we're doing this natively.
1582   Address Addr = getAtomicAddressAsAtomicIntPointer();
1583   llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
1584   Load->setAtomic(AO);
1585 
1586   // Other decoration.
1587   if (IsVolatile)
1588     Load->setVolatile(true);
1589   CGF.CGM.DecorateInstructionWithTBAA(Load, LVal.getTBAAInfo());
1590   return Load;
1591 }
1592 
1593 /// An LValue is a candidate for having its loads and stores be made atomic if
1594 /// we are operating under /volatile:ms *and* the LValue itself is volatile and
1595 /// performing such an operation can be performed without a libcall.
1596 bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) {
1597   if (!CGM.getLangOpts().MSVolatile) return false;
1598   AtomicInfo AI(*this, LV);
1599   bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType());
1600   // An atomic is inline if we don't need to use a libcall.
1601   bool AtomicIsInline = !AI.shouldUseLibcall();
1602   // MSVC doesn't seem to do this for types wider than a pointer.
1603   if (getContext().getTypeSize(LV.getType()) >
1604       getContext().getTypeSize(getContext().getIntPtrType()))
1605     return false;
1606   return IsVolatile && AtomicIsInline;
1607 }
1608 
1609 RValue CodeGenFunction::EmitAtomicLoad(LValue LV, SourceLocation SL,
1610                                        AggValueSlot Slot) {
1611   llvm::AtomicOrdering AO;
1612   bool IsVolatile = LV.isVolatileQualified();
1613   if (LV.getType()->isAtomicType()) {
1614     AO = llvm::AtomicOrdering::SequentiallyConsistent;
1615   } else {
1616     AO = llvm::AtomicOrdering::Acquire;
1617     IsVolatile = true;
1618   }
1619   return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot);
1620 }
1621 
1622 RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
1623                                   bool AsValue, llvm::AtomicOrdering AO,
1624                                   bool IsVolatile) {
1625   // Check whether we should use a library call.
1626   if (shouldUseLibcall()) {
1627     Address TempAddr = Address::invalid();
1628     if (LVal.isSimple() && !ResultSlot.isIgnored()) {
1629       assert(getEvaluationKind() == TEK_Aggregate);
1630       TempAddr = ResultSlot.getAddress();
1631     } else
1632       TempAddr = CreateTempAlloca();
1633 
1634     EmitAtomicLoadLibcall(TempAddr.getPointer(), AO, IsVolatile);
1635 
1636     // Okay, turn that back into the original value or whole atomic (for
1637     // non-simple lvalues) type.
1638     return convertAtomicTempToRValue(TempAddr, ResultSlot, Loc, AsValue);
1639   }
1640 
1641   // Okay, we're doing this natively.
1642   auto *Load = EmitAtomicLoadOp(AO, IsVolatile);
1643 
1644   // If we're ignoring an aggregate return, don't do anything.
1645   if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
1646     return RValue::getAggregate(Address::invalid(), false);
1647 
1648   // Okay, turn that back into the original value or atomic (for non-simple
1649   // lvalues) type.
1650   return ConvertIntToValueOrAtomic(Load, ResultSlot, Loc, AsValue);
1651 }
1652 
1653 /// Emit a load from an l-value of atomic type.  Note that the r-value
1654 /// we produce is an r-value of the atomic *value* type.
1655 RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc,
1656                                        llvm::AtomicOrdering AO, bool IsVolatile,
1657                                        AggValueSlot resultSlot) {
1658   AtomicInfo Atomics(*this, src);
1659   return Atomics.EmitAtomicLoad(resultSlot, loc, /*AsValue=*/true, AO,
1660                                 IsVolatile);
1661 }
1662 
1663 /// Copy an r-value into memory as part of storing to an atomic type.
1664 /// This needs to create a bit-pattern suitable for atomic operations.
1665 void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
1666   assert(LVal.isSimple());
1667   // If we have an r-value, the rvalue should be of the atomic type,
1668   // which means that the caller is responsible for having zeroed
1669   // any padding.  Just do an aggregate copy of that type.
1670   if (rvalue.isAggregate()) {
1671     LValue Dest = CGF.MakeAddrLValue(getAtomicAddress(), getAtomicType());
1672     LValue Src = CGF.MakeAddrLValue(rvalue.getAggregateAddress(),
1673                                     getAtomicType());
1674     bool IsVolatile = rvalue.isVolatileQualified() ||
1675                       LVal.isVolatileQualified();
1676     CGF.EmitAggregateCopy(Dest, Src, getAtomicType(),
1677                           AggValueSlot::DoesNotOverlap, IsVolatile);
1678     return;
1679   }
1680 
1681   // Okay, otherwise we're copying stuff.
1682 
1683   // Zero out the buffer if necessary.
1684   emitMemSetZeroIfNecessary();
1685 
1686   // Drill past the padding if present.
1687   LValue TempLVal = projectValue();
1688 
1689   // Okay, store the rvalue in.
1690   if (rvalue.isScalar()) {
1691     CGF.EmitStoreOfScalar(rvalue.getScalarVal(), TempLVal, /*init*/ true);
1692   } else {
1693     CGF.EmitStoreOfComplex(rvalue.getComplexVal(), TempLVal, /*init*/ true);
1694   }
1695 }
1696 
1697 
1698 /// Materialize an r-value into memory for the purposes of storing it
1699 /// to an atomic type.
1700 Address AtomicInfo::materializeRValue(RValue rvalue) const {
1701   // Aggregate r-values are already in memory, and EmitAtomicStore
1702   // requires them to be values of the atomic type.
1703   if (rvalue.isAggregate())
1704     return rvalue.getAggregateAddress();
1705 
1706   // Otherwise, make a temporary and materialize into it.
1707   LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType());
1708   AtomicInfo Atomics(CGF, TempLV);
1709   Atomics.emitCopyIntoMemory(rvalue);
1710   return TempLV.getAddress(CGF);
1711 }
1712 
1713 llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const {
1714   // If we've got a scalar value of the right size, try to avoid going
1715   // through memory.
1716   if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) {
1717     llvm::Value *Value = RVal.getScalarVal();
1718     if (isa<llvm::IntegerType>(Value->getType()))
1719       return CGF.EmitToMemory(Value, ValueTy);
1720     else {
1721       llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
1722           CGF.getLLVMContext(),
1723           LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits());
1724       if (isa<llvm::PointerType>(Value->getType()))
1725         return CGF.Builder.CreatePtrToInt(Value, InputIntTy);
1726       else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
1727         return CGF.Builder.CreateBitCast(Value, InputIntTy);
1728     }
1729   }
1730   // Otherwise, we need to go through memory.
1731   // Put the r-value in memory.
1732   Address Addr = materializeRValue(RVal);
1733 
1734   // Cast the temporary to the atomic int type and pull a value out.
1735   Addr = emitCastToAtomicIntPointer(Addr);
1736   return CGF.Builder.CreateLoad(Addr);
1737 }
1738 
1739 std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
1740     llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
1741     llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) {
1742   // Do the atomic store.
1743   Address Addr = getAtomicAddressAsAtomicIntPointer();
1744   auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr.getPointer(),
1745                                                ExpectedVal, DesiredVal,
1746                                                Success, Failure);
1747   // Other decoration.
1748   Inst->setVolatile(LVal.isVolatileQualified());
1749   Inst->setWeak(IsWeak);
1750 
1751   // Okay, turn that back into the original value type.
1752   auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0);
1753   auto *SuccessFailureVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/1);
1754   return std::make_pair(PreviousVal, SuccessFailureVal);
1755 }
1756 
1757 llvm::Value *
1758 AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
1759                                              llvm::Value *DesiredAddr,
1760                                              llvm::AtomicOrdering Success,
1761                                              llvm::AtomicOrdering Failure) {
1762   // bool __atomic_compare_exchange(size_t size, void *obj, void *expected,
1763   // void *desired, int success, int failure);
1764   CallArgList Args;
1765   Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
1766   Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicPointer())),
1767            CGF.getContext().VoidPtrTy);
1768   Args.add(RValue::get(CGF.EmitCastToVoidPtr(ExpectedAddr)),
1769            CGF.getContext().VoidPtrTy);
1770   Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)),
1771            CGF.getContext().VoidPtrTy);
1772   Args.add(RValue::get(
1773                llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Success))),
1774            CGF.getContext().IntTy);
1775   Args.add(RValue::get(
1776                llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Failure))),
1777            CGF.getContext().IntTy);
1778   auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange",
1779                                               CGF.getContext().BoolTy, Args);
1780 
1781   return SuccessFailureRVal.getScalarVal();
1782 }
1783 
1784 std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
1785     RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
1786     llvm::AtomicOrdering Failure, bool IsWeak) {
1787   // Check whether we should use a library call.
1788   if (shouldUseLibcall()) {
1789     // Produce a source address.
1790     Address ExpectedAddr = materializeRValue(Expected);
1791     Address DesiredAddr = materializeRValue(Desired);
1792     auto *Res = EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
1793                                                  DesiredAddr.getPointer(),
1794                                                  Success, Failure);
1795     return std::make_pair(
1796         convertAtomicTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
1797                                   SourceLocation(), /*AsValue=*/false),
1798         Res);
1799   }
1800 
1801   // If we've got a scalar value of the right size, try to avoid going
1802   // through memory.
1803   auto *ExpectedVal = convertRValueToInt(Expected);
1804   auto *DesiredVal = convertRValueToInt(Desired);
1805   auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
1806                                          Failure, IsWeak);
1807   return std::make_pair(
1808       ConvertIntToValueOrAtomic(Res.first, AggValueSlot::ignored(),
1809                                 SourceLocation(), /*AsValue=*/false),
1810       Res.second);
1811 }
1812 
1813 static void
1814 EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal,
1815                       const llvm::function_ref<RValue(RValue)> &UpdateOp,
1816                       Address DesiredAddr) {
1817   RValue UpRVal;
1818   LValue AtomicLVal = Atomics.getAtomicLValue();
1819   LValue DesiredLVal;
1820   if (AtomicLVal.isSimple()) {
1821     UpRVal = OldRVal;
1822     DesiredLVal = CGF.MakeAddrLValue(DesiredAddr, AtomicLVal.getType());
1823   } else {
1824     // Build new lvalue for temp address.
1825     Address Ptr = Atomics.materializeRValue(OldRVal);
1826     LValue UpdateLVal;
1827     if (AtomicLVal.isBitField()) {
1828       UpdateLVal =
1829           LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(),
1830                                AtomicLVal.getType(),
1831                                AtomicLVal.getBaseInfo(),
1832                                AtomicLVal.getTBAAInfo());
1833       DesiredLVal =
1834           LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
1835                                AtomicLVal.getType(), AtomicLVal.getBaseInfo(),
1836                                AtomicLVal.getTBAAInfo());
1837     } else if (AtomicLVal.isVectorElt()) {
1838       UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(),
1839                                          AtomicLVal.getType(),
1840                                          AtomicLVal.getBaseInfo(),
1841                                          AtomicLVal.getTBAAInfo());
1842       DesiredLVal = LValue::MakeVectorElt(
1843           DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(),
1844           AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo());
1845     } else {
1846       assert(AtomicLVal.isExtVectorElt());
1847       UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(),
1848                                             AtomicLVal.getType(),
1849                                             AtomicLVal.getBaseInfo(),
1850                                             AtomicLVal.getTBAAInfo());
1851       DesiredLVal = LValue::MakeExtVectorElt(
1852           DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
1853           AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo());
1854     }
1855     UpRVal = CGF.EmitLoadOfLValue(UpdateLVal, SourceLocation());
1856   }
1857   // Store new value in the corresponding memory area.
1858   RValue NewRVal = UpdateOp(UpRVal);
1859   if (NewRVal.isScalar()) {
1860     CGF.EmitStoreThroughLValue(NewRVal, DesiredLVal);
1861   } else {
1862     assert(NewRVal.isComplex());
1863     CGF.EmitStoreOfComplex(NewRVal.getComplexVal(), DesiredLVal,
1864                            /*isInit=*/false);
1865   }
1866 }
1867 
1868 void AtomicInfo::EmitAtomicUpdateLibcall(
1869     llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
1870     bool IsVolatile) {
1871   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
1872 
1873   Address ExpectedAddr = CreateTempAlloca();
1874 
1875   EmitAtomicLoadLibcall(ExpectedAddr.getPointer(), AO, IsVolatile);
1876   auto *ContBB = CGF.createBasicBlock("atomic_cont");
1877   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
1878   CGF.EmitBlock(ContBB);
1879   Address DesiredAddr = CreateTempAlloca();
1880   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
1881       requiresMemSetZero(getAtomicAddress().getElementType())) {
1882     auto *OldVal = CGF.Builder.CreateLoad(ExpectedAddr);
1883     CGF.Builder.CreateStore(OldVal, DesiredAddr);
1884   }
1885   auto OldRVal = convertAtomicTempToRValue(ExpectedAddr,
1886                                            AggValueSlot::ignored(),
1887                                            SourceLocation(), /*AsValue=*/false);
1888   EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, DesiredAddr);
1889   auto *Res =
1890       EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
1891                                        DesiredAddr.getPointer(),
1892                                        AO, Failure);
1893   CGF.Builder.CreateCondBr(Res, ExitBB, ContBB);
1894   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1895 }
1896 
1897 void AtomicInfo::EmitAtomicUpdateOp(
1898     llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
1899     bool IsVolatile) {
1900   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
1901 
1902   // Do the atomic load.
1903   auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
1904   // For non-simple lvalues perform compare-and-swap procedure.
1905   auto *ContBB = CGF.createBasicBlock("atomic_cont");
1906   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
1907   auto *CurBB = CGF.Builder.GetInsertBlock();
1908   CGF.EmitBlock(ContBB);
1909   llvm::PHINode *PHI = CGF.Builder.CreatePHI(OldVal->getType(),
1910                                              /*NumReservedValues=*/2);
1911   PHI->addIncoming(OldVal, CurBB);
1912   Address NewAtomicAddr = CreateTempAlloca();
1913   Address NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr);
1914   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
1915       requiresMemSetZero(getAtomicAddress().getElementType())) {
1916     CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
1917   }
1918   auto OldRVal = ConvertIntToValueOrAtomic(PHI, AggValueSlot::ignored(),
1919                                            SourceLocation(), /*AsValue=*/false);
1920   EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr);
1921   auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
1922   // Try to write new value using cmpxchg operation.
1923   auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure);
1924   PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock());
1925   CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB);
1926   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1927 }
1928 
1929 static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics,
1930                                   RValue UpdateRVal, Address DesiredAddr) {
1931   LValue AtomicLVal = Atomics.getAtomicLValue();
1932   LValue DesiredLVal;
1933   // Build new lvalue for temp address.
1934   if (AtomicLVal.isBitField()) {
1935     DesiredLVal =
1936         LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
1937                              AtomicLVal.getType(), AtomicLVal.getBaseInfo(),
1938                              AtomicLVal.getTBAAInfo());
1939   } else if (AtomicLVal.isVectorElt()) {
1940     DesiredLVal =
1941         LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(),
1942                               AtomicLVal.getType(), AtomicLVal.getBaseInfo(),
1943                               AtomicLVal.getTBAAInfo());
1944   } else {
1945     assert(AtomicLVal.isExtVectorElt());
1946     DesiredLVal = LValue::MakeExtVectorElt(
1947         DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
1948         AtomicLVal.getBaseInfo(), AtomicLVal.getTBAAInfo());
1949   }
1950   // Store new value in the corresponding memory area.
1951   assert(UpdateRVal.isScalar());
1952   CGF.EmitStoreThroughLValue(UpdateRVal, DesiredLVal);
1953 }
1954 
1955 void AtomicInfo::EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
1956                                          RValue UpdateRVal, bool IsVolatile) {
1957   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
1958 
1959   Address ExpectedAddr = CreateTempAlloca();
1960 
1961   EmitAtomicLoadLibcall(ExpectedAddr.getPointer(), AO, IsVolatile);
1962   auto *ContBB = CGF.createBasicBlock("atomic_cont");
1963   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
1964   CGF.EmitBlock(ContBB);
1965   Address DesiredAddr = CreateTempAlloca();
1966   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
1967       requiresMemSetZero(getAtomicAddress().getElementType())) {
1968     auto *OldVal = CGF.Builder.CreateLoad(ExpectedAddr);
1969     CGF.Builder.CreateStore(OldVal, DesiredAddr);
1970   }
1971   EmitAtomicUpdateValue(CGF, *this, UpdateRVal, DesiredAddr);
1972   auto *Res =
1973       EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
1974                                        DesiredAddr.getPointer(),
1975                                        AO, Failure);
1976   CGF.Builder.CreateCondBr(Res, ExitBB, ContBB);
1977   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
1978 }
1979 
1980 void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal,
1981                                     bool IsVolatile) {
1982   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
1983 
1984   // Do the atomic load.
1985   auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
1986   // For non-simple lvalues perform compare-and-swap procedure.
1987   auto *ContBB = CGF.createBasicBlock("atomic_cont");
1988   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
1989   auto *CurBB = CGF.Builder.GetInsertBlock();
1990   CGF.EmitBlock(ContBB);
1991   llvm::PHINode *PHI = CGF.Builder.CreatePHI(OldVal->getType(),
1992                                              /*NumReservedValues=*/2);
1993   PHI->addIncoming(OldVal, CurBB);
1994   Address NewAtomicAddr = CreateTempAlloca();
1995   Address NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr);
1996   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
1997       requiresMemSetZero(getAtomicAddress().getElementType())) {
1998     CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
1999   }
2000   EmitAtomicUpdateValue(CGF, *this, UpdateRVal, NewAtomicAddr);
2001   auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
2002   // Try to write new value using cmpxchg operation.
2003   auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure);
2004   PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock());
2005   CGF.Builder.CreateCondBr(Res.second, ExitBB, ContBB);
2006   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2007 }
2008 
2009 void AtomicInfo::EmitAtomicUpdate(
2010     llvm::AtomicOrdering AO, const llvm::function_ref<RValue(RValue)> &UpdateOp,
2011     bool IsVolatile) {
2012   if (shouldUseLibcall()) {
2013     EmitAtomicUpdateLibcall(AO, UpdateOp, IsVolatile);
2014   } else {
2015     EmitAtomicUpdateOp(AO, UpdateOp, IsVolatile);
2016   }
2017 }
2018 
2019 void AtomicInfo::EmitAtomicUpdate(llvm::AtomicOrdering AO, RValue UpdateRVal,
2020                                   bool IsVolatile) {
2021   if (shouldUseLibcall()) {
2022     EmitAtomicUpdateLibcall(AO, UpdateRVal, IsVolatile);
2023   } else {
2024     EmitAtomicUpdateOp(AO, UpdateRVal, IsVolatile);
2025   }
2026 }
2027 
2028 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
2029                                       bool isInit) {
2030   bool IsVolatile = lvalue.isVolatileQualified();
2031   llvm::AtomicOrdering AO;
2032   if (lvalue.getType()->isAtomicType()) {
2033     AO = llvm::AtomicOrdering::SequentiallyConsistent;
2034   } else {
2035     AO = llvm::AtomicOrdering::Release;
2036     IsVolatile = true;
2037   }
2038   return EmitAtomicStore(rvalue, lvalue, AO, IsVolatile, isInit);
2039 }
2040 
2041 /// Emit a store to an l-value of atomic type.
2042 ///
2043 /// Note that the r-value is expected to be an r-value *of the atomic
2044 /// type*; this means that for aggregate r-values, it should include
2045 /// storage for any padding that was necessary.
2046 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
2047                                       llvm::AtomicOrdering AO, bool IsVolatile,
2048                                       bool isInit) {
2049   // If this is an aggregate r-value, it should agree in type except
2050   // maybe for address-space qualification.
2051   assert(!rvalue.isAggregate() ||
2052          rvalue.getAggregateAddress().getElementType() ==
2053              dest.getAddress(*this).getElementType());
2054 
2055   AtomicInfo atomics(*this, dest);
2056   LValue LVal = atomics.getAtomicLValue();
2057 
2058   // If this is an initialization, just put the value there normally.
2059   if (LVal.isSimple()) {
2060     if (isInit) {
2061       atomics.emitCopyIntoMemory(rvalue);
2062       return;
2063     }
2064 
2065     // Check whether we should use a library call.
2066     if (atomics.shouldUseLibcall()) {
2067       // Produce a source address.
2068       Address srcAddr = atomics.materializeRValue(rvalue);
2069 
2070       // void __atomic_store(size_t size, void *mem, void *val, int order)
2071       CallArgList args;
2072       args.add(RValue::get(atomics.getAtomicSizeValue()),
2073                getContext().getSizeType());
2074       args.add(RValue::get(EmitCastToVoidPtr(atomics.getAtomicPointer())),
2075                getContext().VoidPtrTy);
2076       args.add(RValue::get(EmitCastToVoidPtr(srcAddr.getPointer())),
2077                getContext().VoidPtrTy);
2078       args.add(
2079           RValue::get(llvm::ConstantInt::get(IntTy, (int)llvm::toCABI(AO))),
2080           getContext().IntTy);
2081       emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
2082       return;
2083     }
2084 
2085     // Okay, we're doing this natively.
2086     llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
2087 
2088     // Do the atomic store.
2089     Address addr =
2090         atomics.emitCastToAtomicIntPointer(atomics.getAtomicAddress());
2091     intValue = Builder.CreateIntCast(
2092         intValue, addr.getElementType(), /*isSigned=*/false);
2093     llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
2094 
2095     if (AO == llvm::AtomicOrdering::Acquire)
2096       AO = llvm::AtomicOrdering::Monotonic;
2097     else if (AO == llvm::AtomicOrdering::AcquireRelease)
2098       AO = llvm::AtomicOrdering::Release;
2099     // Initializations don't need to be atomic.
2100     if (!isInit)
2101       store->setAtomic(AO);
2102 
2103     // Other decoration.
2104     if (IsVolatile)
2105       store->setVolatile(true);
2106     CGM.DecorateInstructionWithTBAA(store, dest.getTBAAInfo());
2107     return;
2108   }
2109 
2110   // Emit simple atomic update operation.
2111   atomics.EmitAtomicUpdate(AO, rvalue, IsVolatile);
2112 }
2113 
2114 /// Emit a compare-and-exchange op for atomic type.
2115 ///
2116 std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
2117     LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
2118     llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak,
2119     AggValueSlot Slot) {
2120   // If this is an aggregate r-value, it should agree in type except
2121   // maybe for address-space qualification.
2122   assert(!Expected.isAggregate() ||
2123          Expected.getAggregateAddress().getElementType() ==
2124              Obj.getAddress(*this).getElementType());
2125   assert(!Desired.isAggregate() ||
2126          Desired.getAggregateAddress().getElementType() ==
2127              Obj.getAddress(*this).getElementType());
2128   AtomicInfo Atomics(*this, Obj);
2129 
2130   return Atomics.EmitAtomicCompareExchange(Expected, Desired, Success, Failure,
2131                                            IsWeak);
2132 }
2133 
2134 void CodeGenFunction::EmitAtomicUpdate(
2135     LValue LVal, llvm::AtomicOrdering AO,
2136     const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile) {
2137   AtomicInfo Atomics(*this, LVal);
2138   Atomics.EmitAtomicUpdate(AO, UpdateOp, IsVolatile);
2139 }
2140 
2141 void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
2142   AtomicInfo atomics(*this, dest);
2143 
2144   switch (atomics.getEvaluationKind()) {
2145   case TEK_Scalar: {
2146     llvm::Value *value = EmitScalarExpr(init);
2147     atomics.emitCopyIntoMemory(RValue::get(value));
2148     return;
2149   }
2150 
2151   case TEK_Complex: {
2152     ComplexPairTy value = EmitComplexExpr(init);
2153     atomics.emitCopyIntoMemory(RValue::getComplex(value));
2154     return;
2155   }
2156 
2157   case TEK_Aggregate: {
2158     // Fix up the destination if the initializer isn't an expression
2159     // of atomic type.
2160     bool Zeroed = false;
2161     if (!init->getType()->isAtomicType()) {
2162       Zeroed = atomics.emitMemSetZeroIfNecessary();
2163       dest = atomics.projectValue();
2164     }
2165 
2166     // Evaluate the expression directly into the destination.
2167     AggValueSlot slot = AggValueSlot::forLValue(
2168         dest, *this, AggValueSlot::IsNotDestructed,
2169         AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsNotAliased,
2170         AggValueSlot::DoesNotOverlap,
2171         Zeroed ? AggValueSlot::IsZeroed : AggValueSlot::IsNotZeroed);
2172 
2173     EmitAggExpr(init, slot);
2174     return;
2175   }
2176   }
2177   llvm_unreachable("bad evaluation kind");
2178 }
2179