1; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s 2 3; CHECK-LABEL: %bb22 4 5; Load from %arg has alias store in Loop 6 7; CHECK: flat_load_dword 8 9; ##################################################################### 10 11; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] 12; However, our analysis cannot detect this. 13 14; CHECK: flat_load_dword 15 16; ##################################################################### 17 18; CHECK-LABEL: %bb11 19 20; Load from %arg in a Loop body has alias store 21 22; CHECK: flat_load_dword 23 24; CHECK-LABEL: %bb20 25 26; CHECK: flat_store_dword 27 28define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { 29bb: 30 %tmp = sext i32 %arg2 to i64 31 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp 32 %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4, !tbaa !0 33 %tmp5 = icmp sgt i32 %tmp4, 0 34 br i1 %tmp5, label %bb6, label %bb8 35 36bb6: ; preds = %bb 37 br label %bb11 38 39bb7: ; preds = %bb22 40 br label %bb8 41 42bb8: ; preds = %bb7, %bb 43 %tmp9 = phi i32 [ 0, %bb ], [ %tmp30, %bb7 ] 44 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp 45 store i32 %tmp9, i32 addrspace(1)* %tmp10, align 4, !tbaa !0 46 ret void 47 48bb11: ; preds = %bb22, %bb6 49 %tmp12 = phi i32 [ %tmp30, %bb22 ], [ 0, %bb6 ] 50 %tmp13 = phi i32 [ %tmp25, %bb22 ], [ 0, %bb6 ] 51 %tmp14 = srem i32 %tmp13, %arg2 52 %tmp15 = sext i32 %tmp14 to i64 53 %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15 54 %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0 55 %tmp18 = icmp sgt i32 %tmp17, 100 56 %tmp19 = sext i32 %tmp13 to i64 57 br i1 %tmp18, label %bb20, label %bb22 58 59bb20: ; preds = %bb11 60 %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19 61 store i32 0, i32 addrspace(1)* %tmp21, align 4, !tbaa !0 62 br label %bb22 63 64bb22: ; preds = %bb20, %bb11 65 %tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp19 66 %tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4, !tbaa !0 67 %tmp25 = add nuw nsw i32 %tmp13, 1 68 %tmp26 = sext i32 %tmp25 to i64 69 %tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp26 70 %tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4, !tbaa !0 71 %tmp29 = add i32 %tmp24, %tmp12 72 %tmp30 = add i32 %tmp29, %tmp28 73 %tmp31 = icmp eq i32 %tmp25, %tmp4 74 br i1 %tmp31, label %bb7, label %bb11 75} 76 77; one more test to ensure that aliasing store after the load 78; is considered clobbering if load parent block is the same 79; as a loop header block. 80 81; CHECK-LABEL: %bb1 82 83; Load from %arg has alias store that is after the load 84; but is considered clobbering because of the loop. 85 86; CHECK: flat_load_dword 87 88define amdgpu_kernel void @cfg_selfloop(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { 89bb: 90 br label %bb1 91 92bb2: 93 ret void 94 95bb1: 96 %tmp13 = phi i32 [ %tmp25, %bb1 ], [ 0, %bb ] 97 %tmp14 = srem i32 %tmp13, %arg2 98 %tmp15 = sext i32 %tmp14 to i64 99 %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15 100 %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0 101 %tmp19 = sext i32 %tmp13 to i64 102 %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19 103 store i32 %tmp17, i32 addrspace(1)* %tmp21, align 4, !tbaa !0 104 %tmp25 = add nuw nsw i32 %tmp13, 1 105 %tmp31 = icmp eq i32 %tmp25, 100 106 br i1 %tmp31, label %bb2, label %bb1 107} 108 109 110attributes #0 = { "target-cpu"="fiji" } 111 112!0 = !{!1, !1, i64 0} 113!1 = !{!"int", !2, i64 0} 114!2 = !{!"omnipotent char", !3, i64 0} 115!3 = !{!"Simple C/C++ TBAA"} 116