1; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
2; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s
3
4; CHECK: {{^}}test_8_min_char:
5; CHECK: buffer_store_byte
6; CHECK: buffer_store_byte
7; CHECK: buffer_store_byte
8; CHECK: buffer_store_byte
9; CHECK: buffer_store_byte
10; CHECK: buffer_store_byte
11; CHECK: buffer_store_byte
12; CHECK: buffer_store_byte
13; ModuleID = 'radeon'
14
15define amdgpu_kernel void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
16entry:
17  %0 = load i8, i8 addrspace(1)* %in0, align 1
18  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
19  %arrayidx2.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 1
20  %2 = load i8, i8 addrspace(1)* %arrayidx2.i.i, align 1
21  %3 = insertelement <8 x i8> %1, i8 %2, i32 1
22  %arrayidx6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 2
23  %4 = load i8, i8 addrspace(1)* %arrayidx6.i.i, align 1
24  %5 = insertelement <8 x i8> %3, i8 %4, i32 2
25  %arrayidx10.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 3
26  %6 = load i8, i8 addrspace(1)* %arrayidx10.i.i, align 1
27  %7 = insertelement <8 x i8> %5, i8 %6, i32 3
28  %arrayidx.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 4
29  %8 = load i8, i8 addrspace(1)* %arrayidx.i.i, align 1
30  %9 = insertelement <8 x i8> undef, i8 %8, i32 0
31  %arrayidx2.i9.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 5
32  %10 = load i8, i8 addrspace(1)* %arrayidx2.i9.i, align 1
33  %11 = insertelement <8 x i8> %9, i8 %10, i32 1
34  %arrayidx6.i11.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 6
35  %12 = load i8, i8 addrspace(1)* %arrayidx6.i11.i, align 1
36  %13 = insertelement <8 x i8> %11, i8 %12, i32 2
37  %arrayidx10.i13.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 7
38  %14 = load i8, i8 addrspace(1)* %arrayidx10.i13.i, align 1
39  %15 = insertelement <8 x i8> %13, i8 %14, i32 3
40  %vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
41  %16 = load i8, i8 addrspace(1)* %in1, align 1
42  %17 = insertelement <8 x i8> undef, i8 %16, i32 0
43  %arrayidx2.i.i4 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 1
44  %18 = load i8, i8 addrspace(1)* %arrayidx2.i.i4, align 1
45  %19 = insertelement <8 x i8> %17, i8 %18, i32 1
46  %arrayidx6.i.i5 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 2
47  %20 = load i8, i8 addrspace(1)* %arrayidx6.i.i5, align 1
48  %21 = insertelement <8 x i8> %19, i8 %20, i32 2
49  %arrayidx10.i.i6 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 3
50  %22 = load i8, i8 addrspace(1)* %arrayidx10.i.i6, align 1
51  %23 = insertelement <8 x i8> %21, i8 %22, i32 3
52  %arrayidx.i.i7 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 4
53  %24 = load i8, i8 addrspace(1)* %arrayidx.i.i7, align 1
54  %25 = insertelement <8 x i8> undef, i8 %24, i32 0
55  %arrayidx2.i9.i8 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 5
56  %26 = load i8, i8 addrspace(1)* %arrayidx2.i9.i8, align 1
57  %27 = insertelement <8 x i8> %25, i8 %26, i32 1
58  %arrayidx6.i11.i9 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 6
59  %28 = load i8, i8 addrspace(1)* %arrayidx6.i11.i9, align 1
60  %29 = insertelement <8 x i8> %27, i8 %28, i32 2
61  %arrayidx10.i13.i10 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 7
62  %30 = load i8, i8 addrspace(1)* %arrayidx10.i13.i10, align 1
63  %31 = insertelement <8 x i8> %29, i8 %30, i32 3
64  %vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
65  %cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
66  %cond.i = select <8 x i1> %cmp.i, <8 x i8> %vecinit5.i, <8 x i8> %vecinit5.i11
67  %32 = extractelement <8 x i8> %cond.i, i32 0
68  store i8 %32, i8 addrspace(1)* %out, align 1
69  %33 = extractelement <8 x i8> %cond.i, i32 1
70  %arrayidx2.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
71  store i8 %33, i8 addrspace(1)* %arrayidx2.i.i.i, align 1
72  %34 = extractelement <8 x i8> %cond.i, i32 2
73  %arrayidx.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 2
74  store i8 %34, i8 addrspace(1)* %arrayidx.i.i.i, align 1
75  %35 = extractelement <8 x i8> %cond.i, i32 3
76  %arrayidx2.i6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 3
77  store i8 %35, i8 addrspace(1)* %arrayidx2.i6.i.i, align 1
78  %arrayidx.i.i3 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4
79  %36 = extractelement <8 x i8> %cond.i, i32 4
80  store i8 %36, i8 addrspace(1)* %arrayidx.i.i3, align 1
81  %37 = extractelement <8 x i8> %cond.i, i32 5
82  %arrayidx2.i.i6.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 5
83  store i8 %37, i8 addrspace(1)* %arrayidx2.i.i6.i, align 1
84  %38 = extractelement <8 x i8> %cond.i, i32 6
85  %arrayidx.i.i7.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 6
86  store i8 %38, i8 addrspace(1)* %arrayidx.i.i7.i, align 1
87  %39 = extractelement <8 x i8> %cond.i, i32 7
88  %arrayidx2.i6.i8.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 7
89  store i8 %39, i8 addrspace(1)* %arrayidx2.i6.i8.i, align 1
90  ret void
91}
92
93attributes #0 = { nounwind }
94
95!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
96
97!0 = !{null}
98!1 = !{null}
99!2 = !{null}
100!3 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
101!4 = !{null}
102!5 = !{null}
103!6 = !{null}
104!7 = !{null}
105!8 = !{null}
106