1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
3
4
5; FUNC-LABEL: {{^}}scalar_to_vector_v2i32:
6; SI: buffer_load_dword [[VAL:v[0-9]+]],
7; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
8; SI: buffer_store_short [[RESULT]]
9; SI: buffer_store_short [[RESULT]]
10; SI: buffer_store_short [[RESULT]]
11; SI: buffer_store_short [[RESULT]]
12; SI: s_endpgm
13define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
14  %tmp1 = load i32 addrspace(1)* %in, align 4
15  %bc = bitcast i32 %tmp1 to <2 x i16>
16  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
17  store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
18  ret void
19}
20
21; FUNC-LABEL: {{^}}scalar_to_vector_v2f32:
22; SI: buffer_load_dword [[VAL:v[0-9]+]],
23; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
24; SI: buffer_store_short [[RESULT]]
25; SI: buffer_store_short [[RESULT]]
26; SI: buffer_store_short [[RESULT]]
27; SI: buffer_store_short [[RESULT]]
28; SI: s_endpgm
29define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
30  %tmp1 = load float addrspace(1)* %in, align 4
31  %bc = bitcast float %tmp1 to <2 x i16>
32  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
33  store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
34  ret void
35}
36
37; Getting a SCALAR_TO_VECTOR seems to be tricky. These cases managed
38; to produce one, but for some reason never made it to selection.
39
40
41; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
42;   %tmp1 = load i32 addrspace(1)* %in, align 4
43;   %bc = bitcast i32 %tmp1 to <4 x i8>
44
45;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
46;   store <8 x i8> %tmp2, <8 x i8> addrspace(1)* %out, align 4
47;   ret void
48; }
49
50; define void @scalar_to_vector_test3(<4 x i32> addrspace(1)* %out) nounwind {
51;   %newvec0 = insertelement <2 x i64> undef, i64 12345, i32 0
52;   %newvec1 = insertelement <2 x i64> %newvec0, i64 undef, i32 1
53;   %bc = bitcast <2 x i64> %newvec1 to <4 x i32>
54;   %add = add <4 x i32> %bc, <i32 1, i32 2, i32 3, i32 4>
55;   store <4 x i32> %add, <4 x i32> addrspace(1)* %out, align 16
56;   ret void
57; }
58
59; define void @scalar_to_vector_test4(<8 x i16> addrspace(1)* %out) nounwind {
60;   %newvec0 = insertelement <4 x i32> undef, i32 12345, i32 0
61;   %bc = bitcast <4 x i32> %newvec0 to <8 x i16>
62;   %add = add <8 x i16> %bc, <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>
63;   store <8 x i16> %add, <8 x i16> addrspace(1)* %out, align 16
64;   ret void
65; }
66
67; define void @scalar_to_vector_test5(<4 x i16> addrspace(1)* %out) nounwind {
68;   %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
69;   %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
70;   %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
71;   store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
72;   ret void
73; }
74
75; define void @scalar_to_vector_test6(<4 x i16> addrspace(1)* %out) nounwind {
76;   %newvec0 = insertelement <2 x i32> undef, i32 12345, i32 0
77;   %bc = bitcast <2 x i32> %newvec0 to <4 x i16>
78;   %add = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
79;   store <4 x i16> %add, <4 x i16> addrspace(1)* %out, align 16
80;   ret void
81; }
82