1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4
5; EG-LABEL: {{^}}or_v2i32:
6; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
7; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
8
9; SI-LABEL: {{^}}or_v2i32:
10; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
11; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
12
13define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
14  %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
15  %a = load <2 x i32> addrspace(1) * %in
16  %b = load <2 x i32> addrspace(1) * %b_ptr
17  %result = or <2 x i32> %a, %b
18  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
19  ret void
20}
21
22; EG-LABEL: {{^}}or_v4i32:
23; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
25; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
26; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
27
28; SI-LABEL: {{^}}or_v4i32:
29; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
30; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
31; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
32; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
33
34define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
35  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
36  %a = load <4 x i32> addrspace(1) * %in
37  %b = load <4 x i32> addrspace(1) * %b_ptr
38  %result = or <4 x i32> %a, %b
39  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
40  ret void
41}
42
43; SI-LABEL: {{^}}scalar_or_i32:
44; SI: s_or_b32
45define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
46  %or = or i32 %a, %b
47  store i32 %or, i32 addrspace(1)* %out
48  ret void
49}
50
51; SI-LABEL: {{^}}vector_or_i32:
52; SI: v_or_b32_e32 v{{[0-9]}}
53define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
54  %loada = load i32 addrspace(1)* %a
55  %or = or i32 %loada, %b
56  store i32 %or, i32 addrspace(1)* %out
57  ret void
58}
59
60; SI-LABEL: {{^}}scalar_or_literal_i32:
61; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
62define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
63  %or = or i32 %a, 99999
64  store i32 %or, i32 addrspace(1)* %out, align 4
65  ret void
66}
67
68; SI-LABEL: {{^}}vector_or_literal_i32:
69; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
70define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
71  %loada = load i32 addrspace(1)* %a, align 4
72  %or = or i32 %loada, 65535
73  store i32 %or, i32 addrspace(1)* %out, align 4
74  ret void
75}
76
77; SI-LABEL: {{^}}vector_or_inline_immediate_i32:
78; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
79define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
80  %loada = load i32 addrspace(1)* %a, align 4
81  %or = or i32 %loada, 4
82  store i32 %or, i32 addrspace(1)* %out, align 4
83  ret void
84}
85
86; EG-LABEL: {{^}}scalar_or_i64:
87; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
88; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
89; SI-LABEL: {{^}}scalar_or_i64:
90; SI: s_or_b64
91define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
92  %or = or i64 %a, %b
93  store i64 %or, i64 addrspace(1)* %out
94  ret void
95}
96
97; SI-LABEL: {{^}}vector_or_i64:
98; SI: v_or_b32_e32 v{{[0-9]}}
99; SI: v_or_b32_e32 v{{[0-9]}}
100define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
101  %loada = load i64 addrspace(1)* %a, align 8
102  %loadb = load i64 addrspace(1)* %a, align 8
103  %or = or i64 %loada, %loadb
104  store i64 %or, i64 addrspace(1)* %out
105  ret void
106}
107
108; SI-LABEL: {{^}}scalar_vector_or_i64:
109; SI: v_or_b32_e32 v{{[0-9]}}
110; SI: v_or_b32_e32 v{{[0-9]}}
111define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
112  %loada = load i64 addrspace(1)* %a
113  %or = or i64 %loada, %b
114  store i64 %or, i64 addrspace(1)* %out
115  ret void
116}
117
118; SI-LABEL: {{^}}vector_or_i64_loadimm:
119; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
120; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
121; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
122; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
123; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
124; SI: s_endpgm
125define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
126  %loada = load i64 addrspace(1)* %a, align 8
127  %or = or i64 %loada, 22470723082367
128  store i64 %or, i64 addrspace(1)* %out
129  ret void
130}
131
132; FIXME: The or 0 should really be removed.
133; SI-LABEL: {{^}}vector_or_i64_imm:
134; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
135; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
136; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
137; SI: s_endpgm
138define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
139  %loada = load i64 addrspace(1)* %a, align 8
140  %or = or i64 %loada, 8
141  store i64 %or, i64 addrspace(1)* %out
142  ret void
143}
144
145; SI-LABEL: {{^}}trunc_i64_or_to_i32:
146; SI: s_load_dword s[[SREG0:[0-9]+]]
147; SI: s_load_dword s[[SREG1:[0-9]+]]
148; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
149; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
150; SI: buffer_store_dword [[VRESULT]],
151define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
152  %add = or i64 %b, %a
153  %trunc = trunc i64 %add to i32
154  store i32 %trunc, i32 addrspace(1)* %out, align 8
155  ret void
156}
157
158; EG-CHECK: {{^}}or_i1:
159; EG-CHECK: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
160
161; SI-CHECK: {{^}}or_i1:
162; SI-CHECK: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
163define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
164  %a = load float addrspace(1) * %in0
165  %b = load float addrspace(1) * %in1
166  %acmp = fcmp oge float %a, 0.000000e+00
167  %bcmp = fcmp oge float %b, 0.000000e+00
168  %or = or i1 %acmp, %bcmp
169  %result = select i1 %or, float %a, float %b
170  store float %result, float addrspace(1)* %out
171  ret void
172}
173