1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}lsh8_or_and: 4; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400 5; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 6define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 7bb: 8 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 9 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 10 %tmp = load i32, i32 addrspace(1)* %gep, align 4 11 %tmp2 = shl i32 %tmp, 8 12 %tmp3 = and i32 %arg1, 255 13 %tmp4 = or i32 %tmp2, %tmp3 14 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 15 ret void 16} 17 18; GCN-LABEL: {{^}}lsr24_or_and: 19; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 20; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 21define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 22bb: 23 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 24 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 25 %tmp = load i32, i32 addrspace(1)* %gep, align 4 26 %tmp2 = lshr i32 %tmp, 24 27 %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00 28 %tmp4 = or i32 %tmp2, %tmp3 29 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 30 ret void 31} 32 33; GCN-LABEL: {{^}}and_or_lsr24: 34; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 35; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 36define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 37bb: 38 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 39 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 40 %tmp = load i32, i32 addrspace(1)* %gep, align 4 41 %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00 42 %tmp3 = lshr i32 %arg1, 24 43 %tmp4 = or i32 %tmp2, %tmp3 44 %tmp5 = xor i32 %tmp4, -2147483648 45 store i32 %tmp5, i32 addrspace(1)* %gep, align 4 46 ret void 47} 48 49; GCN-LABEL: {{^}}and_or_and: 50; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500 51; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 52define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 53bb: 54 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 55 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 56 %tmp = load i32, i32 addrspace(1)* %gep, align 4 57 %tmp2 = and i32 %tmp, -16711936 58 %tmp3 = and i32 %arg1, 16711935 59 %tmp4 = or i32 %tmp2, %tmp3 60 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 61 ret void 62} 63 64; GCN-LABEL: {{^}}lsh8_or_lsr24: 65; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050403 66; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 67define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 68bb: 69 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 70 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 71 %tmp = load i32, i32 addrspace(1)* %gep, align 4 72 %tmp2 = shl i32 %tmp, 8 73 %tmp3 = lshr i32 %arg1, 24 74 %tmp4 = or i32 %tmp2, %tmp3 75 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 76 ret void 77} 78 79; GCN-LABEL: {{^}}lsh16_or_lsr24: 80; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03 81; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 82define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 83bb: 84 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 85 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 86 %tmp = load i32, i32 addrspace(1)* %gep, align 4 87 %tmp2 = shl i32 %tmp, 16 88 %tmp3 = lshr i32 %arg1, 24 89 %tmp4 = or i32 %tmp2, %tmp3 90 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 91 ret void 92} 93 94; GCN-LABEL: {{^}}and_xor_and: 95; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 96; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 97define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 98bb: 99 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 100 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 101 %tmp = load i32, i32 addrspace(1)* %gep, align 4 102 %tmp2 = and i32 %tmp, -16776961 103 %tmp3 = and i32 %arg1, 16776960 104 %tmp4 = xor i32 %tmp2, %tmp3 105 store i32 %tmp4, i32 addrspace(1)* %gep, align 4 106 ret void 107} 108 109; GCN-LABEL: {{^}}and_or_or_and: 110; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 111; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 112define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 113bb: 114 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 115 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 116 %tmp = load i32, i32 addrspace(1)* %gep, align 4 117 %and = and i32 %tmp, 16711935 ; 0x00ff00ff 118 %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00 119 %tmp2 = or i32 %tmp1, -65536 120 %tmp3 = or i32 %tmp2, %and 121 store i32 %tmp3, i32 addrspace(1)* %gep, align 4 122 ret void 123} 124 125; GCN-LABEL: {{^}}and_or_and_shl: 126; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 127; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 128define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 129bb: 130 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 131 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 132 %tmp = load i32, i32 addrspace(1)* %gep, align 4 133 %tmp2 = shl i32 %tmp, 16 134 %tmp3 = and i32 %arg1, 65535 135 %tmp4 = or i32 %tmp2, %tmp3 136 %and = and i32 %tmp4, 4278190335 137 store i32 %and, i32 addrspace(1)* %gep, align 4 138 ret void 139} 140 141; GCN-LABEL: {{^}}or_and_or: 142; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 143; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 144define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 145bb: 146 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 147 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 148 %tmp = load i32, i32 addrspace(1)* %gep, align 4 149 %or1 = or i32 %tmp, 16776960 ; 0x00ffff00 150 %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff 151 %and = and i32 %or1, %or2 152 store i32 %and, i32 addrspace(1)* %gep, align 4 153 ret void 154} 155 156; GCN-LABEL: {{^}}known_ffff0500: 157; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 158; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 159; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 160; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} 161define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 162bb: 163 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 164 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 165 %load = load i32, i32 addrspace(1)* %gep, align 4 166 %mask1 = or i32 %arg1, 32768 ; 0x8000 167 %mask2 = or i32 %load, 4 168 %and = and i32 %mask2, 16711935 ; 0x00ff00ff 169 %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00 170 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000 171 %tmp3 = or i32 %tmp2, %and 172 store i32 %tmp3, i32 addrspace(1)* %gep, align 4 173 %v = and i32 %tmp3, 4294934532 ; 0xffff8004 174 store i32 %v, i32 addrspace(1)* %arg, align 4 175 ret void 176} 177 178; GCN-LABEL: {{^}}known_050c0c00: 179; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 180; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}} 181; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 182; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} 183define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 184bb: 185 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 186 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 187 %tmp = load i32, i32 addrspace(1)* %gep, align 4 188 %tmp2 = shl i32 %tmp, 16 189 %mask = or i32 %arg1, 4 190 %tmp3 = and i32 %mask, 65535 191 %tmp4 = or i32 %tmp2, %tmp3 192 %and = and i32 %tmp4, 4278190335 193 store i32 %and, i32 addrspace(1)* %gep, align 4 194 %v = and i32 %and, 16776964 195 store i32 %v, i32 addrspace(1)* %arg, align 4 196 ret void 197} 198 199; GCN-LABEL: {{^}}known_ffff8004: 200; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 201; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 202; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] 203; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} 204define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) { 205bb: 206 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 207 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id 208 %load = load i32, i32 addrspace(1)* %gep, align 4 209 %mask1 = or i32 %arg1, 4 210 %mask2 = or i32 %load, 32768 ; 0x8000 211 %and = and i32 %mask1, 16711935 ; 0x00ff00ff 212 %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00 213 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000 214 %tmp3 = or i32 %tmp2, %and 215 store i32 %tmp3, i32 addrspace(1)* %gep, align 4 216 %v = and i32 %tmp3, 4294934532 ; 0xffff8004 217 store i32 %v, i32 addrspace(1)* %arg, align 4 218 ret void 219} 220 221declare i32 @llvm.amdgcn.workitem.id.x() 222