1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7-avx | FileCheck %s
3; RUN: opt -instsimplify -disable-output < %s
4
5define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
6; CHECK-LABEL: AGEP0:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm0
9; CHECK-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
10; CHECK-NEXT:    retl
11  %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
12  %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
13  %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
14  %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
15  %A2 = getelementptr i32, <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
16  %A3 = getelementptr i32, <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
17  ret <4 x i32*> %A3
18}
19
20define i32 @AGEP1(<4 x i32*> %param) nounwind {
21; CHECK-LABEL: AGEP1:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vextractps $3, %xmm0, %eax
24; CHECK-NEXT:    movl 16(%eax), %eax
25; CHECK-NEXT:    retl
26  %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
27  %k = extractelement <4 x i32*> %A2, i32 3
28  %v = load i32, i32* %k
29  ret i32 %v
30}
31
32define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
33; CHECK-LABEL: AGEP2:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vpslld $2, %xmm1, %xmm1
36; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
37; CHECK-NEXT:    vpextrd $3, %xmm0, %eax
38; CHECK-NEXT:    movl (%eax), %eax
39; CHECK-NEXT:    retl
40  %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off
41  %k = extractelement <4 x i32*> %A2, i32 3
42  %v = load i32, i32* %k
43  ret i32 %v
44}
45
46define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
47; CHECK-LABEL: AGEP3:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    pushl %eax
50; CHECK-NEXT:    vpslld $2, %xmm1, %xmm1
51; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
52; CHECK-NEXT:    movl %esp, %eax
53; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
54; CHECK-NEXT:    popl %eax
55; CHECK-NEXT:    retl
56  %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off
57  %v = alloca i32
58  %k = insertelement <4 x i32*> %A2, i32* %v, i32 3
59  ret <4 x i32*> %k
60}
61
62define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
63; Multiply offset by two (add it to itself).
64; add the base to the offset
65; CHECK-LABEL: AGEP4:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
68; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
69; CHECK-NEXT:    retl
70  %A = getelementptr i16, <4 x i16*> %param, <4 x i32> %off
71  ret <4 x i16*> %A
72}
73
74define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
75; CHECK-LABEL: AGEP5:
76; CHECK:       # %bb.0:
77; CHECK-NEXT:    vpmovsxbd %xmm1, %xmm1
78; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
79; CHECK-NEXT:    retl
80  %A = getelementptr i8, <4 x i8*> %param, <4 x i8> %off
81  ret <4 x i8*> %A
82}
83
84
85; The size of each element is 1 byte. No need to multiply by element size.
86define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
87; CHECK-LABEL: AGEP6:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
90; CHECK-NEXT:    retl
91  %A = getelementptr i8, <4 x i8*> %param, <4 x i32> %off
92  ret <4 x i8*> %A
93}
94
95define <4 x i8*> @AGEP7(<4 x i8*> %param, i32 %off) nounwind {
96; CHECK-LABEL: AGEP7:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm1
99; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
100; CHECK-NEXT:    retl
101  %A = getelementptr i8, <4 x i8*> %param, i32 %off
102  ret <4 x i8*> %A
103}
104
105define <4 x i16*> @AGEP8(i16* %param, <4 x i32> %off) nounwind {
106; Multiply offset by two (add it to itself).
107; add the base to the offset
108; CHECK-LABEL: AGEP8:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
111; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm1
112; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
113; CHECK-NEXT:    retl
114  %A = getelementptr i16, i16* %param, <4 x i32> %off
115  ret <4 x i16*> %A
116}
117
118define <64 x i16*> @AGEP9(i16* %param, <64 x i32> %off) nounwind {
119; CHECK-LABEL: AGEP9:
120; CHECK:       # %bb.0:
121; CHECK-NEXT:    pushl %ebp
122; CHECK-NEXT:    movl %esp, %ebp
123; CHECK-NEXT:    andl $-32, %esp
124; CHECK-NEXT:    subl $160, %esp
125; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm3
126; CHECK-NEXT:    vbroadcastss 12(%ebp), %xmm5
127; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm3
128; CHECK-NEXT:    vmovdqa %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
129; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
130; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
131; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
132; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
133; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm0
134; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
135; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
136; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm0
137; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
138; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
139; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
140; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm0
141; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
142; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
143; CHECK-NEXT:    vextractf128 $1, %ymm2, %xmm0
144; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
145; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
146; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
147; CHECK-NEXT:    vmovdqa 40(%ebp), %xmm0
148; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
149; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
150; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
151; CHECK-NEXT:    vmovdqa 56(%ebp), %xmm0
152; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
153; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
154; CHECK-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
155; CHECK-NEXT:    vmovdqa 72(%ebp), %xmm3
156; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm3
157; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm0
158; CHECK-NEXT:    vmovdqa %xmm0, (%esp) # 16-byte Spill
159; CHECK-NEXT:    vmovdqa 88(%ebp), %xmm4
160; CHECK-NEXT:    vpaddd %xmm4, %xmm4, %xmm4
161; CHECK-NEXT:    vpaddd %xmm4, %xmm5, %xmm4
162; CHECK-NEXT:    vmovdqa 104(%ebp), %xmm1
163; CHECK-NEXT:    vpaddd %xmm1, %xmm1, %xmm1
164; CHECK-NEXT:    vpaddd %xmm1, %xmm5, %xmm1
165; CHECK-NEXT:    vmovdqa 120(%ebp), %xmm6
166; CHECK-NEXT:    vpaddd %xmm6, %xmm6, %xmm6
167; CHECK-NEXT:    vpaddd %xmm6, %xmm5, %xmm6
168; CHECK-NEXT:    vmovdqa 136(%ebp), %xmm2
169; CHECK-NEXT:    vpaddd %xmm2, %xmm2, %xmm2
170; CHECK-NEXT:    vpaddd %xmm2, %xmm5, %xmm2
171; CHECK-NEXT:    vmovdqa 152(%ebp), %xmm7
172; CHECK-NEXT:    vpaddd %xmm7, %xmm7, %xmm7
173; CHECK-NEXT:    vpaddd %xmm7, %xmm5, %xmm7
174; CHECK-NEXT:    vmovdqa 168(%ebp), %xmm0
175; CHECK-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
176; CHECK-NEXT:    vpaddd %xmm0, %xmm5, %xmm0
177; CHECK-NEXT:    vmovdqa 184(%ebp), %xmm3
178; CHECK-NEXT:    vpaddd %xmm3, %xmm3, %xmm3
179; CHECK-NEXT:    vpaddd %xmm3, %xmm5, %xmm3
180; CHECK-NEXT:    movl 8(%ebp), %eax
181; CHECK-NEXT:    vmovdqa %xmm3, 240(%eax)
182; CHECK-NEXT:    vmovdqa %xmm0, 224(%eax)
183; CHECK-NEXT:    vmovdqa %xmm7, 208(%eax)
184; CHECK-NEXT:    vmovdqa %xmm2, 192(%eax)
185; CHECK-NEXT:    vmovdqa %xmm6, 176(%eax)
186; CHECK-NEXT:    vmovdqa %xmm1, 160(%eax)
187; CHECK-NEXT:    vmovdqa %xmm4, 144(%eax)
188; CHECK-NEXT:    vmovaps (%esp), %xmm0 # 16-byte Reload
189; CHECK-NEXT:    vmovaps %xmm0, 128(%eax)
190; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
191; CHECK-NEXT:    vmovaps %xmm0, 112(%eax)
192; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
193; CHECK-NEXT:    vmovaps %xmm0, 96(%eax)
194; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
195; CHECK-NEXT:    vmovaps %xmm0, 80(%eax)
196; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
197; CHECK-NEXT:    vmovaps %xmm0, 64(%eax)
198; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
199; CHECK-NEXT:    vmovaps %xmm0, 48(%eax)
200; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
201; CHECK-NEXT:    vmovaps %xmm0, 32(%eax)
202; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
203; CHECK-NEXT:    vmovaps %xmm0, 16(%eax)
204; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
205; CHECK-NEXT:    vmovaps %xmm0, (%eax)
206; CHECK-NEXT:    movl %ebp, %esp
207; CHECK-NEXT:    popl %ebp
208; CHECK-NEXT:    vzeroupper
209; CHECK-NEXT:    retl $4
210  %A = getelementptr i16, i16* %param, <64 x i32> %off
211  ret <64 x i16*> %A
212}
213
214