1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; EG-LABEL: {{^}}or_v2i32: 6; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 7; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 8 9; SI-LABEL: {{^}}or_v2i32: 10; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 11; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 12 13define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 14 %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1 15 %a = load <2 x i32> addrspace(1) * %in 16 %b = load <2 x i32> addrspace(1) * %b_ptr 17 %result = or <2 x i32> %a, %b 18 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 19 ret void 20} 21 22; EG-LABEL: {{^}}or_v4i32: 23; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 24; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 25; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 26; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 27 28; SI-LABEL: {{^}}or_v4i32: 29; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 30; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 31; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 32; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 33 34define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 35 %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 36 %a = load <4 x i32> addrspace(1) * %in 37 %b = load <4 x i32> addrspace(1) * %b_ptr 38 %result = or <4 x i32> %a, %b 39 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 40 ret void 41} 42 43; SI-LABEL: {{^}}scalar_or_i32: 44; SI: s_or_b32 45define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { 46 %or = or i32 %a, %b 47 store i32 %or, i32 addrspace(1)* %out 48 ret void 49} 50 51; SI-LABEL: {{^}}vector_or_i32: 52; SI: v_or_b32_e32 v{{[0-9]}} 53define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) { 54 %loada = load i32 addrspace(1)* %a 55 %or = or i32 %loada, %b 56 store i32 %or, i32 addrspace(1)* %out 57 ret void 58} 59 60; SI-LABEL: {{^}}scalar_or_literal_i32: 61; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f 62define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) { 63 %or = or i32 %a, 99999 64 store i32 %or, i32 addrspace(1)* %out, align 4 65 ret void 66} 67 68; SI-LABEL: {{^}}vector_or_literal_i32: 69; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} 70define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { 71 %loada = load i32 addrspace(1)* %a, align 4 72 %or = or i32 %loada, 65535 73 store i32 %or, i32 addrspace(1)* %out, align 4 74 ret void 75} 76 77; SI-LABEL: {{^}}vector_or_inline_immediate_i32: 78; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}} 79define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { 80 %loada = load i32 addrspace(1)* %a, align 4 81 %or = or i32 %loada, 4 82 store i32 %or, i32 addrspace(1)* %out, align 4 83 ret void 84} 85 86; EG-LABEL: {{^}}scalar_or_i64: 87; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y 88; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z 89; SI-LABEL: {{^}}scalar_or_i64: 90; SI: s_or_b64 91define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { 92 %or = or i64 %a, %b 93 store i64 %or, i64 addrspace(1)* %out 94 ret void 95} 96 97; SI-LABEL: {{^}}vector_or_i64: 98; SI: v_or_b32_e32 v{{[0-9]}} 99; SI: v_or_b32_e32 v{{[0-9]}} 100define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 101 %loada = load i64 addrspace(1)* %a, align 8 102 %loadb = load i64 addrspace(1)* %a, align 8 103 %or = or i64 %loada, %loadb 104 store i64 %or, i64 addrspace(1)* %out 105 ret void 106} 107 108; SI-LABEL: {{^}}scalar_vector_or_i64: 109; SI: v_or_b32_e32 v{{[0-9]}} 110; SI: v_or_b32_e32 v{{[0-9]}} 111define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) { 112 %loada = load i64 addrspace(1)* %a 113 %or = or i64 %loada, %b 114 store i64 %or, i64 addrspace(1)* %out 115 ret void 116} 117 118; SI-LABEL: {{^}}vector_or_i64_loadimm: 119; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f 120; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f 121; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 122; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 123; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] 124; SI: s_endpgm 125define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 126 %loada = load i64 addrspace(1)* %a, align 8 127 %or = or i64 %loada, 22470723082367 128 store i64 %or, i64 addrspace(1)* %out 129 ret void 130} 131 132; FIXME: The or 0 should really be removed. 133; SI-LABEL: {{^}}vector_or_i64_imm: 134; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 135; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]] 136; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}} 137; SI: s_endpgm 138define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 139 %loada = load i64 addrspace(1)* %a, align 8 140 %or = or i64 %loada, 8 141 store i64 %or, i64 addrspace(1)* %out 142 ret void 143} 144 145; SI-LABEL: {{^}}trunc_i64_or_to_i32: 146; SI: s_load_dword s[[SREG0:[0-9]+]] 147; SI: s_load_dword s[[SREG1:[0-9]+]] 148; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]] 149; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]] 150; SI: buffer_store_dword [[VRESULT]], 151define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { 152 %add = or i64 %b, %a 153 %trunc = trunc i64 %add to i32 154 store i32 %trunc, i32 addrspace(1)* %out, align 8 155 ret void 156} 157 158; EG-CHECK: {{^}}or_i1: 159; EG-CHECK: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} 160 161; SI-CHECK: {{^}}or_i1: 162; SI-CHECK: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] 163define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { 164 %a = load float addrspace(1) * %in0 165 %b = load float addrspace(1) * %in1 166 %acmp = fcmp oge float %a, 0.000000e+00 167 %bcmp = fcmp oge float %b, 0.000000e+00 168 %or = or i1 %acmp, %bcmp 169 %result = select i1 %or, float %a, float %b 170 store float %result, float addrspace(1)* %out 171 ret void 172} 173