1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
3; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
4; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
5; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
6
7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8target triple = "x86_64-unknown-linux-gnu"
9
10%struct.S = type { i32, i32, i32, i32 }
11
12; Function Attrs: nounwind uwtable
13define void @test_conditional_block(%struct.S* nocapture noalias %s1 , %struct.S* nocapture noalias %s2, i32 %x, %struct.S* nocapture noalias  %s3, %struct.S* nocapture noalias readonly %s4) local_unnamed_addr #0 {
14; CHECK-LABEL: test_conditional_block:
15; CHECK:       # %bb.0: # %entry
16; CHECK-NEXT:    cmpl $18, %edx
17; CHECK-NEXT:    jl .LBB0_2
18; CHECK-NEXT:  # %bb.1: # %if.then
19; CHECK-NEXT:    movl %edx, 4(%rdi)
20; CHECK-NEXT:  .LBB0_2: # %if.end
21; CHECK-NEXT:    movups (%r8), %xmm0
22; CHECK-NEXT:    movups %xmm0, (%rcx)
23; CHECK-NEXT:    movl (%rdi), %eax
24; CHECK-NEXT:    movl %eax, (%rsi)
25; CHECK-NEXT:    movl 4(%rdi), %eax
26; CHECK-NEXT:    movl %eax, 4(%rsi)
27; CHECK-NEXT:    movq 8(%rdi), %rax
28; CHECK-NEXT:    movq %rax, 8(%rsi)
29; CHECK-NEXT:    retq
30;
31; DISABLED-LABEL: test_conditional_block:
32; DISABLED:       # %bb.0: # %entry
33; DISABLED-NEXT:    cmpl $18, %edx
34; DISABLED-NEXT:    jl .LBB0_2
35; DISABLED-NEXT:  # %bb.1: # %if.then
36; DISABLED-NEXT:    movl %edx, 4(%rdi)
37; DISABLED-NEXT:  .LBB0_2: # %if.end
38; DISABLED-NEXT:    movups (%r8), %xmm0
39; DISABLED-NEXT:    movups %xmm0, (%rcx)
40; DISABLED-NEXT:    movups (%rdi), %xmm0
41; DISABLED-NEXT:    movups %xmm0, (%rsi)
42; DISABLED-NEXT:    retq
43;
44; CHECK-AVX2-LABEL: test_conditional_block:
45; CHECK-AVX2:       # %bb.0: # %entry
46; CHECK-AVX2-NEXT:    cmpl $18, %edx
47; CHECK-AVX2-NEXT:    jl .LBB0_2
48; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
49; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
50; CHECK-AVX2-NEXT:  .LBB0_2: # %if.end
51; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
52; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
53; CHECK-AVX2-NEXT:    movl (%rdi), %eax
54; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
55; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
56; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
57; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
58; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
59; CHECK-AVX2-NEXT:    retq
60;
61; CHECK-AVX512-LABEL: test_conditional_block:
62; CHECK-AVX512:       # %bb.0: # %entry
63; CHECK-AVX512-NEXT:    cmpl $18, %edx
64; CHECK-AVX512-NEXT:    jl .LBB0_2
65; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
66; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
67; CHECK-AVX512-NEXT:  .LBB0_2: # %if.end
68; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
69; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
70; CHECK-AVX512-NEXT:    movl (%rdi), %eax
71; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
72; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
73; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
74; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
75; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
76; CHECK-AVX512-NEXT:    retq
77entry:
78  %cmp = icmp sgt i32 %x, 17
79  br i1 %cmp, label %if.then, label %if.end
80
81if.then:                                          ; preds = %entry
82  %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
83  store i32 %x, i32* %b, align 4
84  br label %if.end
85
86if.end:                                           ; preds = %if.then, %entry
87  %0 = bitcast %struct.S* %s3 to i8*
88  %1 = bitcast %struct.S* %s4 to i8*
89  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
90  %2 = bitcast %struct.S* %s2 to i8*
91  %3 = bitcast %struct.S* %s1 to i8*
92  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
93  ret void
94}
95
96; Function Attrs: nounwind uwtable
97define void @test_imm_store(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3) local_unnamed_addr #0 {
98; CHECK-LABEL: test_imm_store:
99; CHECK:       # %bb.0: # %entry
100; CHECK-NEXT:    movl $0, (%rdi)
101; CHECK-NEXT:    movl $1, (%rcx)
102; CHECK-NEXT:    movl (%rdi), %eax
103; CHECK-NEXT:    movl %eax, (%rsi)
104; CHECK-NEXT:    movq 4(%rdi), %rax
105; CHECK-NEXT:    movq %rax, 4(%rsi)
106; CHECK-NEXT:    movl 12(%rdi), %eax
107; CHECK-NEXT:    movl %eax, 12(%rsi)
108; CHECK-NEXT:    retq
109;
110; DISABLED-LABEL: test_imm_store:
111; DISABLED:       # %bb.0: # %entry
112; DISABLED-NEXT:    movl $0, (%rdi)
113; DISABLED-NEXT:    movl $1, (%rcx)
114; DISABLED-NEXT:    movups (%rdi), %xmm0
115; DISABLED-NEXT:    movups %xmm0, (%rsi)
116; DISABLED-NEXT:    retq
117;
118; CHECK-AVX2-LABEL: test_imm_store:
119; CHECK-AVX2:       # %bb.0: # %entry
120; CHECK-AVX2-NEXT:    movl $0, (%rdi)
121; CHECK-AVX2-NEXT:    movl $1, (%rcx)
122; CHECK-AVX2-NEXT:    movl (%rdi), %eax
123; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
124; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
125; CHECK-AVX2-NEXT:    movq %rax, 4(%rsi)
126; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
127; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
128; CHECK-AVX2-NEXT:    retq
129;
130; CHECK-AVX512-LABEL: test_imm_store:
131; CHECK-AVX512:       # %bb.0: # %entry
132; CHECK-AVX512-NEXT:    movl $0, (%rdi)
133; CHECK-AVX512-NEXT:    movl $1, (%rcx)
134; CHECK-AVX512-NEXT:    movl (%rdi), %eax
135; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
136; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
137; CHECK-AVX512-NEXT:    movq %rax, 4(%rsi)
138; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
139; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
140; CHECK-AVX512-NEXT:    retq
141entry:
142  %a = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 0
143  store i32 0, i32* %a, align 4
144  %a1 = getelementptr inbounds %struct.S, %struct.S* %s3, i64 0, i32 0
145  store i32 1, i32* %a1, align 4
146  %0 = bitcast %struct.S* %s2 to i8*
147  %1 = bitcast %struct.S* %s1 to i8*
148  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
149  ret void
150}
151
152; Function Attrs: nounwind uwtable
153define void @test_nondirect_br(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
154; CHECK-LABEL: test_nondirect_br:
155; CHECK:       # %bb.0: # %entry
156; CHECK-NEXT:    cmpl $18, %edx
157; CHECK-NEXT:    jl .LBB2_2
158; CHECK-NEXT:  # %bb.1: # %if.then
159; CHECK-NEXT:    movl %edx, 4(%rdi)
160; CHECK-NEXT:  .LBB2_2: # %if.end
161; CHECK-NEXT:    cmpl $14, %r9d
162; CHECK-NEXT:    jl .LBB2_4
163; CHECK-NEXT:  # %bb.3: # %if.then2
164; CHECK-NEXT:    movl %r9d, 12(%rdi)
165; CHECK-NEXT:  .LBB2_4: # %if.end3
166; CHECK-NEXT:    movups (%r8), %xmm0
167; CHECK-NEXT:    movups %xmm0, (%rcx)
168; CHECK-NEXT:    movq (%rdi), %rax
169; CHECK-NEXT:    movq %rax, (%rsi)
170; CHECK-NEXT:    movl 8(%rdi), %eax
171; CHECK-NEXT:    movl %eax, 8(%rsi)
172; CHECK-NEXT:    movl 12(%rdi), %eax
173; CHECK-NEXT:    movl %eax, 12(%rsi)
174; CHECK-NEXT:    retq
175;
176; DISABLED-LABEL: test_nondirect_br:
177; DISABLED:       # %bb.0: # %entry
178; DISABLED-NEXT:    cmpl $18, %edx
179; DISABLED-NEXT:    jl .LBB2_2
180; DISABLED-NEXT:  # %bb.1: # %if.then
181; DISABLED-NEXT:    movl %edx, 4(%rdi)
182; DISABLED-NEXT:  .LBB2_2: # %if.end
183; DISABLED-NEXT:    cmpl $14, %r9d
184; DISABLED-NEXT:    jl .LBB2_4
185; DISABLED-NEXT:  # %bb.3: # %if.then2
186; DISABLED-NEXT:    movl %r9d, 12(%rdi)
187; DISABLED-NEXT:  .LBB2_4: # %if.end3
188; DISABLED-NEXT:    movups (%r8), %xmm0
189; DISABLED-NEXT:    movups %xmm0, (%rcx)
190; DISABLED-NEXT:    movups (%rdi), %xmm0
191; DISABLED-NEXT:    movups %xmm0, (%rsi)
192; DISABLED-NEXT:    retq
193;
194; CHECK-AVX2-LABEL: test_nondirect_br:
195; CHECK-AVX2:       # %bb.0: # %entry
196; CHECK-AVX2-NEXT:    cmpl $18, %edx
197; CHECK-AVX2-NEXT:    jl .LBB2_2
198; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
199; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
200; CHECK-AVX2-NEXT:  .LBB2_2: # %if.end
201; CHECK-AVX2-NEXT:    cmpl $14, %r9d
202; CHECK-AVX2-NEXT:    jl .LBB2_4
203; CHECK-AVX2-NEXT:  # %bb.3: # %if.then2
204; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
205; CHECK-AVX2-NEXT:  .LBB2_4: # %if.end3
206; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
207; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
208; CHECK-AVX2-NEXT:    movq (%rdi), %rax
209; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
210; CHECK-AVX2-NEXT:    movl 8(%rdi), %eax
211; CHECK-AVX2-NEXT:    movl %eax, 8(%rsi)
212; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
213; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
214; CHECK-AVX2-NEXT:    retq
215;
216; CHECK-AVX512-LABEL: test_nondirect_br:
217; CHECK-AVX512:       # %bb.0: # %entry
218; CHECK-AVX512-NEXT:    cmpl $18, %edx
219; CHECK-AVX512-NEXT:    jl .LBB2_2
220; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
221; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
222; CHECK-AVX512-NEXT:  .LBB2_2: # %if.end
223; CHECK-AVX512-NEXT:    cmpl $14, %r9d
224; CHECK-AVX512-NEXT:    jl .LBB2_4
225; CHECK-AVX512-NEXT:  # %bb.3: # %if.then2
226; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
227; CHECK-AVX512-NEXT:  .LBB2_4: # %if.end3
228; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
229; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
230; CHECK-AVX512-NEXT:    movq (%rdi), %rax
231; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
232; CHECK-AVX512-NEXT:    movl 8(%rdi), %eax
233; CHECK-AVX512-NEXT:    movl %eax, 8(%rsi)
234; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
235; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
236; CHECK-AVX512-NEXT:    retq
237entry:
238  %cmp = icmp sgt i32 %x, 17
239  br i1 %cmp, label %if.then, label %if.end
240
241if.then:                                          ; preds = %entry
242  %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
243  store i32 %x, i32* %b, align 4
244  br label %if.end
245
246if.end:                                           ; preds = %if.then, %entry
247  %cmp1 = icmp sgt i32 %x2, 13
248  br i1 %cmp1, label %if.then2, label %if.end3
249
250if.then2:                                         ; preds = %if.end
251  %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
252  store i32 %x2, i32* %d, align 4
253  br label %if.end3
254
255if.end3:                                          ; preds = %if.then2, %if.end
256  %0 = bitcast %struct.S* %s3 to i8*
257  %1 = bitcast %struct.S* %s4 to i8*
258  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
259  %2 = bitcast %struct.S* %s2 to i8*
260  %3 = bitcast %struct.S* %s1 to i8*
261  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
262  ret void
263}
264
265; Function Attrs: nounwind uwtable
266define void @test_2preds_block(%struct.S* nocapture noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
267; CHECK-LABEL: test_2preds_block:
268; CHECK:       # %bb.0: # %entry
269; CHECK-NEXT:    movl %r9d, 12(%rdi)
270; CHECK-NEXT:    cmpl $18, %edx
271; CHECK-NEXT:    jl .LBB3_2
272; CHECK-NEXT:  # %bb.1: # %if.then
273; CHECK-NEXT:    movl %edx, 4(%rdi)
274; CHECK-NEXT:  .LBB3_2: # %if.end
275; CHECK-NEXT:    movups (%r8), %xmm0
276; CHECK-NEXT:    movups %xmm0, (%rcx)
277; CHECK-NEXT:    movl (%rdi), %eax
278; CHECK-NEXT:    movl %eax, (%rsi)
279; CHECK-NEXT:    movl 4(%rdi), %eax
280; CHECK-NEXT:    movl %eax, 4(%rsi)
281; CHECK-NEXT:    movl 8(%rdi), %eax
282; CHECK-NEXT:    movl %eax, 8(%rsi)
283; CHECK-NEXT:    movl 12(%rdi), %eax
284; CHECK-NEXT:    movl %eax, 12(%rsi)
285; CHECK-NEXT:    retq
286;
287; DISABLED-LABEL: test_2preds_block:
288; DISABLED:       # %bb.0: # %entry
289; DISABLED-NEXT:    movl %r9d, 12(%rdi)
290; DISABLED-NEXT:    cmpl $18, %edx
291; DISABLED-NEXT:    jl .LBB3_2
292; DISABLED-NEXT:  # %bb.1: # %if.then
293; DISABLED-NEXT:    movl %edx, 4(%rdi)
294; DISABLED-NEXT:  .LBB3_2: # %if.end
295; DISABLED-NEXT:    movups (%r8), %xmm0
296; DISABLED-NEXT:    movups %xmm0, (%rcx)
297; DISABLED-NEXT:    movups (%rdi), %xmm0
298; DISABLED-NEXT:    movups %xmm0, (%rsi)
299; DISABLED-NEXT:    retq
300;
301; CHECK-AVX2-LABEL: test_2preds_block:
302; CHECK-AVX2:       # %bb.0: # %entry
303; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
304; CHECK-AVX2-NEXT:    cmpl $18, %edx
305; CHECK-AVX2-NEXT:    jl .LBB3_2
306; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
307; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
308; CHECK-AVX2-NEXT:  .LBB3_2: # %if.end
309; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
310; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
311; CHECK-AVX2-NEXT:    movl (%rdi), %eax
312; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
313; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
314; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
315; CHECK-AVX2-NEXT:    movl 8(%rdi), %eax
316; CHECK-AVX2-NEXT:    movl %eax, 8(%rsi)
317; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
318; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
319; CHECK-AVX2-NEXT:    retq
320;
321; CHECK-AVX512-LABEL: test_2preds_block:
322; CHECK-AVX512:       # %bb.0: # %entry
323; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
324; CHECK-AVX512-NEXT:    cmpl $18, %edx
325; CHECK-AVX512-NEXT:    jl .LBB3_2
326; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
327; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
328; CHECK-AVX512-NEXT:  .LBB3_2: # %if.end
329; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
330; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
331; CHECK-AVX512-NEXT:    movl (%rdi), %eax
332; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
333; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
334; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
335; CHECK-AVX512-NEXT:    movl 8(%rdi), %eax
336; CHECK-AVX512-NEXT:    movl %eax, 8(%rsi)
337; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
338; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
339; CHECK-AVX512-NEXT:    retq
340entry:
341  %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
342  store i32 %x2, i32* %d, align 4
343  %cmp = icmp sgt i32 %x, 17
344  br i1 %cmp, label %if.then, label %if.end
345
346if.then:                                          ; preds = %entry
347  %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
348  store i32 %x, i32* %b, align 4
349  br label %if.end
350
351if.end:                                           ; preds = %if.then, %entry
352  %0 = bitcast %struct.S* %s3 to i8*
353  %1 = bitcast %struct.S* %s4 to i8*
354  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
355  %2 = bitcast %struct.S* %s2 to i8*
356  %3 = bitcast %struct.S* %s1 to i8*
357  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
358  ret void
359}
360%struct.S2 = type { i64, i64 }
361
362; Function Attrs: nounwind uwtable
363define void @test_type64(%struct.S2* nocapture noalias %s1, %struct.S2* nocapture %s2, i32 %x, %struct.S2* nocapture %s3, %struct.S2* nocapture readonly %s4) local_unnamed_addr #0 {
364; CHECK-LABEL: test_type64:
365; CHECK:       # %bb.0: # %entry
366; CHECK-NEXT:    cmpl $18, %edx
367; CHECK-NEXT:    jl .LBB4_2
368; CHECK-NEXT:  # %bb.1: # %if.then
369; CHECK-NEXT:    movslq %edx, %rax
370; CHECK-NEXT:    movq %rax, 8(%rdi)
371; CHECK-NEXT:  .LBB4_2: # %if.end
372; CHECK-NEXT:    movups (%r8), %xmm0
373; CHECK-NEXT:    movups %xmm0, (%rcx)
374; CHECK-NEXT:    movq (%rdi), %rax
375; CHECK-NEXT:    movq %rax, (%rsi)
376; CHECK-NEXT:    movq 8(%rdi), %rax
377; CHECK-NEXT:    movq %rax, 8(%rsi)
378; CHECK-NEXT:    retq
379;
380; DISABLED-LABEL: test_type64:
381; DISABLED:       # %bb.0: # %entry
382; DISABLED-NEXT:    cmpl $18, %edx
383; DISABLED-NEXT:    jl .LBB4_2
384; DISABLED-NEXT:  # %bb.1: # %if.then
385; DISABLED-NEXT:    movslq %edx, %rax
386; DISABLED-NEXT:    movq %rax, 8(%rdi)
387; DISABLED-NEXT:  .LBB4_2: # %if.end
388; DISABLED-NEXT:    movups (%r8), %xmm0
389; DISABLED-NEXT:    movups %xmm0, (%rcx)
390; DISABLED-NEXT:    movups (%rdi), %xmm0
391; DISABLED-NEXT:    movups %xmm0, (%rsi)
392; DISABLED-NEXT:    retq
393;
394; CHECK-AVX2-LABEL: test_type64:
395; CHECK-AVX2:       # %bb.0: # %entry
396; CHECK-AVX2-NEXT:    cmpl $18, %edx
397; CHECK-AVX2-NEXT:    jl .LBB4_2
398; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
399; CHECK-AVX2-NEXT:    movslq %edx, %rax
400; CHECK-AVX2-NEXT:    movq %rax, 8(%rdi)
401; CHECK-AVX2-NEXT:  .LBB4_2: # %if.end
402; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
403; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
404; CHECK-AVX2-NEXT:    movq (%rdi), %rax
405; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
406; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
407; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
408; CHECK-AVX2-NEXT:    retq
409;
410; CHECK-AVX512-LABEL: test_type64:
411; CHECK-AVX512:       # %bb.0: # %entry
412; CHECK-AVX512-NEXT:    cmpl $18, %edx
413; CHECK-AVX512-NEXT:    jl .LBB4_2
414; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
415; CHECK-AVX512-NEXT:    movslq %edx, %rax
416; CHECK-AVX512-NEXT:    movq %rax, 8(%rdi)
417; CHECK-AVX512-NEXT:  .LBB4_2: # %if.end
418; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
419; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
420; CHECK-AVX512-NEXT:    movq (%rdi), %rax
421; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
422; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
423; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
424; CHECK-AVX512-NEXT:    retq
425entry:
426  %cmp = icmp sgt i32 %x, 17
427  br i1 %cmp, label %if.then, label %if.end
428
429if.then:                                          ; preds = %entry
430  %conv = sext i32 %x to i64
431  %b = getelementptr inbounds %struct.S2, %struct.S2* %s1, i64 0, i32 1
432  store i64 %conv, i64* %b, align 8
433  br label %if.end
434
435if.end:                                           ; preds = %if.then, %entry
436  %0 = bitcast %struct.S2* %s3 to i8*
437  %1 = bitcast %struct.S2* %s4 to i8*
438  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
439  %2 = bitcast %struct.S2* %s2 to i8*
440  %3 = bitcast %struct.S2* %s1 to i8*
441  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 8, i1 false)
442  ret void
443}
444%struct.S3 = type { i64, i8, i8, i16, i32 }
445
446; Function Attrs: noinline nounwind uwtable
447define void @test_mixed_type(%struct.S3* nocapture noalias %s1, %struct.S3* nocapture %s2, i32 %x, %struct.S3* nocapture readnone %s3, %struct.S3* nocapture readnone %s4) local_unnamed_addr #0 {
448; CHECK-LABEL: test_mixed_type:
449; CHECK:       # %bb.0: # %entry
450; CHECK-NEXT:    cmpl $18, %edx
451; CHECK-NEXT:    jl .LBB5_2
452; CHECK-NEXT:  # %bb.1: # %if.then
453; CHECK-NEXT:    movslq %edx, %rax
454; CHECK-NEXT:    movq %rax, (%rdi)
455; CHECK-NEXT:    movb %dl, 8(%rdi)
456; CHECK-NEXT:  .LBB5_2: # %if.end
457; CHECK-NEXT:    movq (%rdi), %rax
458; CHECK-NEXT:    movq %rax, (%rsi)
459; CHECK-NEXT:    movb 8(%rdi), %al
460; CHECK-NEXT:    movb %al, 8(%rsi)
461; CHECK-NEXT:    movl 9(%rdi), %eax
462; CHECK-NEXT:    movl %eax, 9(%rsi)
463; CHECK-NEXT:    movzwl 13(%rdi), %eax
464; CHECK-NEXT:    movw %ax, 13(%rsi)
465; CHECK-NEXT:    movb 15(%rdi), %al
466; CHECK-NEXT:    movb %al, 15(%rsi)
467; CHECK-NEXT:    retq
468;
469; DISABLED-LABEL: test_mixed_type:
470; DISABLED:       # %bb.0: # %entry
471; DISABLED-NEXT:    cmpl $18, %edx
472; DISABLED-NEXT:    jl .LBB5_2
473; DISABLED-NEXT:  # %bb.1: # %if.then
474; DISABLED-NEXT:    movslq %edx, %rax
475; DISABLED-NEXT:    movq %rax, (%rdi)
476; DISABLED-NEXT:    movb %dl, 8(%rdi)
477; DISABLED-NEXT:  .LBB5_2: # %if.end
478; DISABLED-NEXT:    movups (%rdi), %xmm0
479; DISABLED-NEXT:    movups %xmm0, (%rsi)
480; DISABLED-NEXT:    retq
481;
482; CHECK-AVX2-LABEL: test_mixed_type:
483; CHECK-AVX2:       # %bb.0: # %entry
484; CHECK-AVX2-NEXT:    cmpl $18, %edx
485; CHECK-AVX2-NEXT:    jl .LBB5_2
486; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
487; CHECK-AVX2-NEXT:    movslq %edx, %rax
488; CHECK-AVX2-NEXT:    movq %rax, (%rdi)
489; CHECK-AVX2-NEXT:    movb %dl, 8(%rdi)
490; CHECK-AVX2-NEXT:  .LBB5_2: # %if.end
491; CHECK-AVX2-NEXT:    movq (%rdi), %rax
492; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
493; CHECK-AVX2-NEXT:    movb 8(%rdi), %al
494; CHECK-AVX2-NEXT:    movb %al, 8(%rsi)
495; CHECK-AVX2-NEXT:    movl 9(%rdi), %eax
496; CHECK-AVX2-NEXT:    movl %eax, 9(%rsi)
497; CHECK-AVX2-NEXT:    movzwl 13(%rdi), %eax
498; CHECK-AVX2-NEXT:    movw %ax, 13(%rsi)
499; CHECK-AVX2-NEXT:    movb 15(%rdi), %al
500; CHECK-AVX2-NEXT:    movb %al, 15(%rsi)
501; CHECK-AVX2-NEXT:    retq
502;
503; CHECK-AVX512-LABEL: test_mixed_type:
504; CHECK-AVX512:       # %bb.0: # %entry
505; CHECK-AVX512-NEXT:    cmpl $18, %edx
506; CHECK-AVX512-NEXT:    jl .LBB5_2
507; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
508; CHECK-AVX512-NEXT:    movslq %edx, %rax
509; CHECK-AVX512-NEXT:    movq %rax, (%rdi)
510; CHECK-AVX512-NEXT:    movb %dl, 8(%rdi)
511; CHECK-AVX512-NEXT:  .LBB5_2: # %if.end
512; CHECK-AVX512-NEXT:    movq (%rdi), %rax
513; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
514; CHECK-AVX512-NEXT:    movb 8(%rdi), %al
515; CHECK-AVX512-NEXT:    movb %al, 8(%rsi)
516; CHECK-AVX512-NEXT:    movl 9(%rdi), %eax
517; CHECK-AVX512-NEXT:    movl %eax, 9(%rsi)
518; CHECK-AVX512-NEXT:    movzwl 13(%rdi), %eax
519; CHECK-AVX512-NEXT:    movw %ax, 13(%rsi)
520; CHECK-AVX512-NEXT:    movb 15(%rdi), %al
521; CHECK-AVX512-NEXT:    movb %al, 15(%rsi)
522; CHECK-AVX512-NEXT:    retq
523entry:
524  %cmp = icmp sgt i32 %x, 17
525  br i1 %cmp, label %if.then, label %if.end
526
527if.then:                                          ; preds = %entry
528  %conv = sext i32 %x to i64
529  %a = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 0
530  store i64 %conv, i64* %a, align 8
531  %conv1 = trunc i32 %x to i8
532  %b = getelementptr inbounds %struct.S3, %struct.S3* %s1, i64 0, i32 1
533  store i8 %conv1, i8* %b, align 8
534  br label %if.end
535
536if.end:                                           ; preds = %if.then, %entry
537  %0 = bitcast %struct.S3* %s2 to i8*
538  %1 = bitcast %struct.S3* %s1 to i8*
539  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false)
540  ret void
541}
542%struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
543
544; Function Attrs: nounwind uwtable
545define void @test_multiple_blocks(%struct.S4* nocapture noalias %s1, %struct.S4* nocapture %s2) local_unnamed_addr #0 {
546; CHECK-LABEL: test_multiple_blocks:
547; CHECK:       # %bb.0: # %entry
548; CHECK-NEXT:    movl $0, 4(%rdi)
549; CHECK-NEXT:    movl $0, 36(%rdi)
550; CHECK-NEXT:    movups 16(%rdi), %xmm0
551; CHECK-NEXT:    movups %xmm0, 16(%rsi)
552; CHECK-NEXT:    movl 32(%rdi), %eax
553; CHECK-NEXT:    movl %eax, 32(%rsi)
554; CHECK-NEXT:    movl 36(%rdi), %eax
555; CHECK-NEXT:    movl %eax, 36(%rsi)
556; CHECK-NEXT:    movq 40(%rdi), %rax
557; CHECK-NEXT:    movq %rax, 40(%rsi)
558; CHECK-NEXT:    movl (%rdi), %eax
559; CHECK-NEXT:    movl %eax, (%rsi)
560; CHECK-NEXT:    movl 4(%rdi), %eax
561; CHECK-NEXT:    movl %eax, 4(%rsi)
562; CHECK-NEXT:    movq 8(%rdi), %rax
563; CHECK-NEXT:    movq %rax, 8(%rsi)
564; CHECK-NEXT:    retq
565;
566; DISABLED-LABEL: test_multiple_blocks:
567; DISABLED:       # %bb.0: # %entry
568; DISABLED-NEXT:    movl $0, 4(%rdi)
569; DISABLED-NEXT:    movl $0, 36(%rdi)
570; DISABLED-NEXT:    movups 16(%rdi), %xmm0
571; DISABLED-NEXT:    movups %xmm0, 16(%rsi)
572; DISABLED-NEXT:    movups 32(%rdi), %xmm0
573; DISABLED-NEXT:    movups %xmm0, 32(%rsi)
574; DISABLED-NEXT:    movups (%rdi), %xmm0
575; DISABLED-NEXT:    movups %xmm0, (%rsi)
576; DISABLED-NEXT:    retq
577;
578; CHECK-AVX2-LABEL: test_multiple_blocks:
579; CHECK-AVX2:       # %bb.0: # %entry
580; CHECK-AVX2-NEXT:    movl $0, 4(%rdi)
581; CHECK-AVX2-NEXT:    movl $0, 36(%rdi)
582; CHECK-AVX2-NEXT:    vmovups 16(%rdi), %xmm0
583; CHECK-AVX2-NEXT:    vmovups %xmm0, 16(%rsi)
584; CHECK-AVX2-NEXT:    movl 32(%rdi), %eax
585; CHECK-AVX2-NEXT:    movl %eax, 32(%rsi)
586; CHECK-AVX2-NEXT:    movl 36(%rdi), %eax
587; CHECK-AVX2-NEXT:    movl %eax, 36(%rsi)
588; CHECK-AVX2-NEXT:    movq 40(%rdi), %rax
589; CHECK-AVX2-NEXT:    movq %rax, 40(%rsi)
590; CHECK-AVX2-NEXT:    movl (%rdi), %eax
591; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
592; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
593; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
594; CHECK-AVX2-NEXT:    vmovups 8(%rdi), %xmm0
595; CHECK-AVX2-NEXT:    vmovups %xmm0, 8(%rsi)
596; CHECK-AVX2-NEXT:    movq 24(%rdi), %rax
597; CHECK-AVX2-NEXT:    movq %rax, 24(%rsi)
598; CHECK-AVX2-NEXT:    retq
599;
600; CHECK-AVX512-LABEL: test_multiple_blocks:
601; CHECK-AVX512:       # %bb.0: # %entry
602; CHECK-AVX512-NEXT:    movl $0, 4(%rdi)
603; CHECK-AVX512-NEXT:    movl $0, 36(%rdi)
604; CHECK-AVX512-NEXT:    vmovups 16(%rdi), %xmm0
605; CHECK-AVX512-NEXT:    vmovups %xmm0, 16(%rsi)
606; CHECK-AVX512-NEXT:    movl 32(%rdi), %eax
607; CHECK-AVX512-NEXT:    movl %eax, 32(%rsi)
608; CHECK-AVX512-NEXT:    movl 36(%rdi), %eax
609; CHECK-AVX512-NEXT:    movl %eax, 36(%rsi)
610; CHECK-AVX512-NEXT:    movq 40(%rdi), %rax
611; CHECK-AVX512-NEXT:    movq %rax, 40(%rsi)
612; CHECK-AVX512-NEXT:    movl (%rdi), %eax
613; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
614; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
615; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
616; CHECK-AVX512-NEXT:    vmovups 8(%rdi), %xmm0
617; CHECK-AVX512-NEXT:    vmovups %xmm0, 8(%rsi)
618; CHECK-AVX512-NEXT:    movq 24(%rdi), %rax
619; CHECK-AVX512-NEXT:    movq %rax, 24(%rsi)
620; CHECK-AVX512-NEXT:    retq
621entry:
622  %b = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 1
623  store i32 0, i32* %b, align 4
624  %b3 = getelementptr inbounds %struct.S4, %struct.S4* %s1, i64 0, i32 9
625  store i32 0, i32* %b3, align 4
626  %0 = bitcast %struct.S4* %s2 to i8*
627  %1 = bitcast %struct.S4* %s1 to i8*
628  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 48, i32 4, i1 false)
629  ret void
630}
631%struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 }
632
633; Function Attrs: nounwind uwtable
634define void @test_type16(%struct.S5* nocapture noalias %s1, %struct.S5* nocapture %s2, i32 %x, %struct.S5* nocapture %s3, %struct.S5* nocapture readonly %s4) local_unnamed_addr #0 {
635; CHECK-LABEL: test_type16:
636; CHECK:       # %bb.0: # %entry
637; CHECK-NEXT:    cmpl $18, %edx
638; CHECK-NEXT:    jl .LBB7_2
639; CHECK-NEXT:  # %bb.1: # %if.then
640; CHECK-NEXT:    movw %dx, 2(%rdi)
641; CHECK-NEXT:  .LBB7_2: # %if.end
642; CHECK-NEXT:    movups (%r8), %xmm0
643; CHECK-NEXT:    movups %xmm0, (%rcx)
644; CHECK-NEXT:    movzwl (%rdi), %eax
645; CHECK-NEXT:    movw %ax, (%rsi)
646; CHECK-NEXT:    movzwl 2(%rdi), %eax
647; CHECK-NEXT:    movw %ax, 2(%rsi)
648; CHECK-NEXT:    movq 4(%rdi), %rax
649; CHECK-NEXT:    movq %rax, 4(%rsi)
650; CHECK-NEXT:    movl 12(%rdi), %eax
651; CHECK-NEXT:    movl %eax, 12(%rsi)
652; CHECK-NEXT:    retq
653;
654; DISABLED-LABEL: test_type16:
655; DISABLED:       # %bb.0: # %entry
656; DISABLED-NEXT:    cmpl $18, %edx
657; DISABLED-NEXT:    jl .LBB7_2
658; DISABLED-NEXT:  # %bb.1: # %if.then
659; DISABLED-NEXT:    movw %dx, 2(%rdi)
660; DISABLED-NEXT:  .LBB7_2: # %if.end
661; DISABLED-NEXT:    movups (%r8), %xmm0
662; DISABLED-NEXT:    movups %xmm0, (%rcx)
663; DISABLED-NEXT:    movups (%rdi), %xmm0
664; DISABLED-NEXT:    movups %xmm0, (%rsi)
665; DISABLED-NEXT:    retq
666;
667; CHECK-AVX2-LABEL: test_type16:
668; CHECK-AVX2:       # %bb.0: # %entry
669; CHECK-AVX2-NEXT:    cmpl $18, %edx
670; CHECK-AVX2-NEXT:    jl .LBB7_2
671; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
672; CHECK-AVX2-NEXT:    movw %dx, 2(%rdi)
673; CHECK-AVX2-NEXT:  .LBB7_2: # %if.end
674; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
675; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
676; CHECK-AVX2-NEXT:    movzwl (%rdi), %eax
677; CHECK-AVX2-NEXT:    movw %ax, (%rsi)
678; CHECK-AVX2-NEXT:    movzwl 2(%rdi), %eax
679; CHECK-AVX2-NEXT:    movw %ax, 2(%rsi)
680; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
681; CHECK-AVX2-NEXT:    movq %rax, 4(%rsi)
682; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
683; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
684; CHECK-AVX2-NEXT:    retq
685;
686; CHECK-AVX512-LABEL: test_type16:
687; CHECK-AVX512:       # %bb.0: # %entry
688; CHECK-AVX512-NEXT:    cmpl $18, %edx
689; CHECK-AVX512-NEXT:    jl .LBB7_2
690; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
691; CHECK-AVX512-NEXT:    movw %dx, 2(%rdi)
692; CHECK-AVX512-NEXT:  .LBB7_2: # %if.end
693; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
694; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
695; CHECK-AVX512-NEXT:    movzwl (%rdi), %eax
696; CHECK-AVX512-NEXT:    movw %ax, (%rsi)
697; CHECK-AVX512-NEXT:    movzwl 2(%rdi), %eax
698; CHECK-AVX512-NEXT:    movw %ax, 2(%rsi)
699; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
700; CHECK-AVX512-NEXT:    movq %rax, 4(%rsi)
701; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
702; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
703; CHECK-AVX512-NEXT:    retq
704entry:
705  %cmp = icmp sgt i32 %x, 17
706  br i1 %cmp, label %if.then, label %if.end
707
708if.then:                                          ; preds = %entry
709  %conv = trunc i32 %x to i16
710  %b = getelementptr inbounds %struct.S5, %struct.S5* %s1, i64 0, i32 1
711  store i16 %conv, i16* %b, align 2
712  br label %if.end
713
714if.end:                                           ; preds = %if.then, %entry
715  %0 = bitcast %struct.S5* %s3 to i8*
716  %1 = bitcast %struct.S5* %s4 to i8*
717  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 2, i1 false)
718  %2 = bitcast %struct.S5* %s2 to i8*
719  %3 = bitcast %struct.S5* %s1 to i8*
720  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 2, i1 false)
721  ret void
722}
723
724%struct.S6 = type { [4 x i32], i32, i32, i32, i32 }
725
726; Function Attrs: nounwind uwtable
727define void @test_stack(%struct.S6* noalias nocapture sret(%struct.S6) %agg.result, %struct.S6* byval(%struct.S6) nocapture readnone align 8 %s1, %struct.S6* byval(%struct.S6) nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
728; CHECK-LABEL: test_stack:
729; CHECK:       # %bb.0: # %entry
730; CHECK-NEXT:    movq %rdi, %rax
731; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
732; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
733; CHECK-NEXT:    movups %xmm0, (%rdi)
734; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
735; CHECK-NEXT:    movq %rcx, 16(%rdi)
736; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
737; CHECK-NEXT:    movl %ecx, 24(%rdi)
738; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
739; CHECK-NEXT:    movl %ecx, 28(%rdi)
740; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
741; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
742; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %edx
743; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %esi
744; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
745; CHECK-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
746; CHECK-NEXT:    movl %edx, {{[0-9]+}}(%rsp)
747; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
748; CHECK-NEXT:    retq
749;
750; DISABLED-LABEL: test_stack:
751; DISABLED:       # %bb.0: # %entry
752; DISABLED-NEXT:    movq %rdi, %rax
753; DISABLED-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
754; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
755; DISABLED-NEXT:    movups %xmm0, (%rdi)
756; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
757; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
758; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
759; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
760; DISABLED-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
761; DISABLED-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
762; DISABLED-NEXT:    retq
763;
764; CHECK-AVX2-LABEL: test_stack:
765; CHECK-AVX2:       # %bb.0: # %entry
766; CHECK-AVX2-NEXT:    movq %rdi, %rax
767; CHECK-AVX2-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
768; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
769; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
770; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
771; CHECK-AVX2-NEXT:    movq %rcx, 16(%rdi)
772; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
773; CHECK-AVX2-NEXT:    movl %ecx, 24(%rdi)
774; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
775; CHECK-AVX2-NEXT:    movl %ecx, 28(%rdi)
776; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
777; CHECK-AVX2-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
778; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
779; CHECK-AVX2-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
780; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
781; CHECK-AVX2-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
782; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
783; CHECK-AVX2-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
784; CHECK-AVX2-NEXT:    retq
785;
786; CHECK-AVX512-LABEL: test_stack:
787; CHECK-AVX512:       # %bb.0: # %entry
788; CHECK-AVX512-NEXT:    movq %rdi, %rax
789; CHECK-AVX512-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
790; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
791; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
792; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
793; CHECK-AVX512-NEXT:    movq %rcx, 16(%rdi)
794; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
795; CHECK-AVX512-NEXT:    movl %ecx, 24(%rdi)
796; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
797; CHECK-AVX512-NEXT:    movl %ecx, 28(%rdi)
798; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
799; CHECK-AVX512-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
800; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
801; CHECK-AVX512-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
802; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
803; CHECK-AVX512-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
804; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
805; CHECK-AVX512-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
806; CHECK-AVX512-NEXT:    retq
807entry:
808  %s6.sroa.0.0..sroa_cast1 = bitcast %struct.S6* %s2 to i8*
809  %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, %struct.S6* %s2, i64 0, i32 3
810  store i32 %x, i32* %s6.sroa.3.0..sroa_idx4, align 8
811  %0 = bitcast %struct.S6* %agg.result to i8*
812  %s6.sroa.0.0..sroa_cast2 = bitcast %struct.S6* %s1 to i8*
813  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
814  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %s6.sroa.0.0..sroa_cast2, i8* nonnull %s6.sroa.0.0..sroa_cast1, i64 32, i32 4, i1 false)
815
816  ret void
817}
818
819; Function Attrs: nounwind uwtable
820define void @test_limit_all(%struct.S* noalias  %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
821; CHECK-LABEL: test_limit_all:
822; CHECK:       # %bb.0: # %entry
823; CHECK-NEXT:    pushq %rbp
824; CHECK-NEXT:    .cfi_def_cfa_offset 16
825; CHECK-NEXT:    pushq %r15
826; CHECK-NEXT:    .cfi_def_cfa_offset 24
827; CHECK-NEXT:    pushq %r14
828; CHECK-NEXT:    .cfi_def_cfa_offset 32
829; CHECK-NEXT:    pushq %r12
830; CHECK-NEXT:    .cfi_def_cfa_offset 40
831; CHECK-NEXT:    pushq %rbx
832; CHECK-NEXT:    .cfi_def_cfa_offset 48
833; CHECK-NEXT:    .cfi_offset %rbx, -48
834; CHECK-NEXT:    .cfi_offset %r12, -40
835; CHECK-NEXT:    .cfi_offset %r14, -32
836; CHECK-NEXT:    .cfi_offset %r15, -24
837; CHECK-NEXT:    .cfi_offset %rbp, -16
838; CHECK-NEXT:    movq %r8, %r15
839; CHECK-NEXT:    movq %rcx, %r14
840; CHECK-NEXT:    movl %edx, %ebp
841; CHECK-NEXT:    movq %rsi, %r12
842; CHECK-NEXT:    movq %rdi, %rbx
843; CHECK-NEXT:    movl %r9d, 12(%rdi)
844; CHECK-NEXT:    callq bar@PLT
845; CHECK-NEXT:    cmpl $18, %ebp
846; CHECK-NEXT:    jl .LBB9_2
847; CHECK-NEXT:  # %bb.1: # %if.then
848; CHECK-NEXT:    movl %ebp, 4(%rbx)
849; CHECK-NEXT:    movq %rbx, %rdi
850; CHECK-NEXT:    callq bar@PLT
851; CHECK-NEXT:  .LBB9_2: # %if.end
852; CHECK-NEXT:    movups (%r15), %xmm0
853; CHECK-NEXT:    movups %xmm0, (%r14)
854; CHECK-NEXT:    movups (%rbx), %xmm0
855; CHECK-NEXT:    movups %xmm0, (%r12)
856; CHECK-NEXT:    popq %rbx
857; CHECK-NEXT:    .cfi_def_cfa_offset 40
858; CHECK-NEXT:    popq %r12
859; CHECK-NEXT:    .cfi_def_cfa_offset 32
860; CHECK-NEXT:    popq %r14
861; CHECK-NEXT:    .cfi_def_cfa_offset 24
862; CHECK-NEXT:    popq %r15
863; CHECK-NEXT:    .cfi_def_cfa_offset 16
864; CHECK-NEXT:    popq %rbp
865; CHECK-NEXT:    .cfi_def_cfa_offset 8
866; CHECK-NEXT:    retq
867;
868; DISABLED-LABEL: test_limit_all:
869; DISABLED:       # %bb.0: # %entry
870; DISABLED-NEXT:    pushq %rbp
871; DISABLED-NEXT:    .cfi_def_cfa_offset 16
872; DISABLED-NEXT:    pushq %r15
873; DISABLED-NEXT:    .cfi_def_cfa_offset 24
874; DISABLED-NEXT:    pushq %r14
875; DISABLED-NEXT:    .cfi_def_cfa_offset 32
876; DISABLED-NEXT:    pushq %r12
877; DISABLED-NEXT:    .cfi_def_cfa_offset 40
878; DISABLED-NEXT:    pushq %rbx
879; DISABLED-NEXT:    .cfi_def_cfa_offset 48
880; DISABLED-NEXT:    .cfi_offset %rbx, -48
881; DISABLED-NEXT:    .cfi_offset %r12, -40
882; DISABLED-NEXT:    .cfi_offset %r14, -32
883; DISABLED-NEXT:    .cfi_offset %r15, -24
884; DISABLED-NEXT:    .cfi_offset %rbp, -16
885; DISABLED-NEXT:    movq %r8, %r15
886; DISABLED-NEXT:    movq %rcx, %r14
887; DISABLED-NEXT:    movl %edx, %ebp
888; DISABLED-NEXT:    movq %rsi, %r12
889; DISABLED-NEXT:    movq %rdi, %rbx
890; DISABLED-NEXT:    movl %r9d, 12(%rdi)
891; DISABLED-NEXT:    callq bar@PLT
892; DISABLED-NEXT:    cmpl $18, %ebp
893; DISABLED-NEXT:    jl .LBB9_2
894; DISABLED-NEXT:  # %bb.1: # %if.then
895; DISABLED-NEXT:    movl %ebp, 4(%rbx)
896; DISABLED-NEXT:    movq %rbx, %rdi
897; DISABLED-NEXT:    callq bar@PLT
898; DISABLED-NEXT:  .LBB9_2: # %if.end
899; DISABLED-NEXT:    movups (%r15), %xmm0
900; DISABLED-NEXT:    movups %xmm0, (%r14)
901; DISABLED-NEXT:    movups (%rbx), %xmm0
902; DISABLED-NEXT:    movups %xmm0, (%r12)
903; DISABLED-NEXT:    popq %rbx
904; DISABLED-NEXT:    .cfi_def_cfa_offset 40
905; DISABLED-NEXT:    popq %r12
906; DISABLED-NEXT:    .cfi_def_cfa_offset 32
907; DISABLED-NEXT:    popq %r14
908; DISABLED-NEXT:    .cfi_def_cfa_offset 24
909; DISABLED-NEXT:    popq %r15
910; DISABLED-NEXT:    .cfi_def_cfa_offset 16
911; DISABLED-NEXT:    popq %rbp
912; DISABLED-NEXT:    .cfi_def_cfa_offset 8
913; DISABLED-NEXT:    retq
914;
915; CHECK-AVX2-LABEL: test_limit_all:
916; CHECK-AVX2:       # %bb.0: # %entry
917; CHECK-AVX2-NEXT:    pushq %rbp
918; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
919; CHECK-AVX2-NEXT:    pushq %r15
920; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
921; CHECK-AVX2-NEXT:    pushq %r14
922; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
923; CHECK-AVX2-NEXT:    pushq %r12
924; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
925; CHECK-AVX2-NEXT:    pushq %rbx
926; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 48
927; CHECK-AVX2-NEXT:    .cfi_offset %rbx, -48
928; CHECK-AVX2-NEXT:    .cfi_offset %r12, -40
929; CHECK-AVX2-NEXT:    .cfi_offset %r14, -32
930; CHECK-AVX2-NEXT:    .cfi_offset %r15, -24
931; CHECK-AVX2-NEXT:    .cfi_offset %rbp, -16
932; CHECK-AVX2-NEXT:    movq %r8, %r15
933; CHECK-AVX2-NEXT:    movq %rcx, %r14
934; CHECK-AVX2-NEXT:    movl %edx, %ebp
935; CHECK-AVX2-NEXT:    movq %rsi, %r12
936; CHECK-AVX2-NEXT:    movq %rdi, %rbx
937; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
938; CHECK-AVX2-NEXT:    callq bar@PLT
939; CHECK-AVX2-NEXT:    cmpl $18, %ebp
940; CHECK-AVX2-NEXT:    jl .LBB9_2
941; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
942; CHECK-AVX2-NEXT:    movl %ebp, 4(%rbx)
943; CHECK-AVX2-NEXT:    movq %rbx, %rdi
944; CHECK-AVX2-NEXT:    callq bar@PLT
945; CHECK-AVX2-NEXT:  .LBB9_2: # %if.end
946; CHECK-AVX2-NEXT:    vmovups (%r15), %xmm0
947; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r14)
948; CHECK-AVX2-NEXT:    vmovups (%rbx), %xmm0
949; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r12)
950; CHECK-AVX2-NEXT:    popq %rbx
951; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
952; CHECK-AVX2-NEXT:    popq %r12
953; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
954; CHECK-AVX2-NEXT:    popq %r14
955; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
956; CHECK-AVX2-NEXT:    popq %r15
957; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
958; CHECK-AVX2-NEXT:    popq %rbp
959; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
960; CHECK-AVX2-NEXT:    retq
961;
962; CHECK-AVX512-LABEL: test_limit_all:
963; CHECK-AVX512:       # %bb.0: # %entry
964; CHECK-AVX512-NEXT:    pushq %rbp
965; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
966; CHECK-AVX512-NEXT:    pushq %r15
967; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
968; CHECK-AVX512-NEXT:    pushq %r14
969; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
970; CHECK-AVX512-NEXT:    pushq %r12
971; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
972; CHECK-AVX512-NEXT:    pushq %rbx
973; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 48
974; CHECK-AVX512-NEXT:    .cfi_offset %rbx, -48
975; CHECK-AVX512-NEXT:    .cfi_offset %r12, -40
976; CHECK-AVX512-NEXT:    .cfi_offset %r14, -32
977; CHECK-AVX512-NEXT:    .cfi_offset %r15, -24
978; CHECK-AVX512-NEXT:    .cfi_offset %rbp, -16
979; CHECK-AVX512-NEXT:    movq %r8, %r15
980; CHECK-AVX512-NEXT:    movq %rcx, %r14
981; CHECK-AVX512-NEXT:    movl %edx, %ebp
982; CHECK-AVX512-NEXT:    movq %rsi, %r12
983; CHECK-AVX512-NEXT:    movq %rdi, %rbx
984; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
985; CHECK-AVX512-NEXT:    callq bar@PLT
986; CHECK-AVX512-NEXT:    cmpl $18, %ebp
987; CHECK-AVX512-NEXT:    jl .LBB9_2
988; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
989; CHECK-AVX512-NEXT:    movl %ebp, 4(%rbx)
990; CHECK-AVX512-NEXT:    movq %rbx, %rdi
991; CHECK-AVX512-NEXT:    callq bar@PLT
992; CHECK-AVX512-NEXT:  .LBB9_2: # %if.end
993; CHECK-AVX512-NEXT:    vmovups (%r15), %xmm0
994; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r14)
995; CHECK-AVX512-NEXT:    vmovups (%rbx), %xmm0
996; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r12)
997; CHECK-AVX512-NEXT:    popq %rbx
998; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
999; CHECK-AVX512-NEXT:    popq %r12
1000; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
1001; CHECK-AVX512-NEXT:    popq %r14
1002; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
1003; CHECK-AVX512-NEXT:    popq %r15
1004; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
1005; CHECK-AVX512-NEXT:    popq %rbp
1006; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
1007; CHECK-AVX512-NEXT:    retq
1008entry:
1009  %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
1010  store i32 %x2, i32* %d, align 4
1011  tail call void @bar(%struct.S* %s1) #3
1012  %cmp = icmp sgt i32 %x, 17
1013  br i1 %cmp, label %if.then, label %if.end
1014
1015if.then:                                          ; preds = %entry
1016  %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
1017  store i32 %x, i32* %b, align 4
1018  tail call void @bar(%struct.S* nonnull %s1) #3
1019  br label %if.end
1020
1021if.end:                                           ; preds = %if.then, %entry
1022  %0 = bitcast %struct.S* %s3 to i8*
1023  %1 = bitcast %struct.S* %s4 to i8*
1024  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
1025  %2 = bitcast %struct.S* %s2 to i8*
1026  %3 = bitcast %struct.S* %s1 to i8*
1027  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
1028  ret void
1029}
1030
1031; Function Attrs: nounwind uwtable
1032define void @test_limit_one_pred(%struct.S* noalias %s1, %struct.S* nocapture %s2, i32 %x, %struct.S* nocapture %s3, %struct.S* nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
1033; CHECK-LABEL: test_limit_one_pred:
1034; CHECK:       # %bb.0: # %entry
1035; CHECK-NEXT:    pushq %r15
1036; CHECK-NEXT:    .cfi_def_cfa_offset 16
1037; CHECK-NEXT:    pushq %r14
1038; CHECK-NEXT:    .cfi_def_cfa_offset 24
1039; CHECK-NEXT:    pushq %r12
1040; CHECK-NEXT:    .cfi_def_cfa_offset 32
1041; CHECK-NEXT:    pushq %rbx
1042; CHECK-NEXT:    .cfi_def_cfa_offset 40
1043; CHECK-NEXT:    pushq %rax
1044; CHECK-NEXT:    .cfi_def_cfa_offset 48
1045; CHECK-NEXT:    .cfi_offset %rbx, -40
1046; CHECK-NEXT:    .cfi_offset %r12, -32
1047; CHECK-NEXT:    .cfi_offset %r14, -24
1048; CHECK-NEXT:    .cfi_offset %r15, -16
1049; CHECK-NEXT:    movq %r8, %r12
1050; CHECK-NEXT:    movq %rcx, %r15
1051; CHECK-NEXT:    movq %rsi, %r14
1052; CHECK-NEXT:    movq %rdi, %rbx
1053; CHECK-NEXT:    movl %r9d, 12(%rdi)
1054; CHECK-NEXT:    cmpl $18, %edx
1055; CHECK-NEXT:    jl .LBB10_2
1056; CHECK-NEXT:  # %bb.1: # %if.then
1057; CHECK-NEXT:    movl %edx, 4(%rbx)
1058; CHECK-NEXT:    movq %rbx, %rdi
1059; CHECK-NEXT:    callq bar@PLT
1060; CHECK-NEXT:  .LBB10_2: # %if.end
1061; CHECK-NEXT:    movups (%r12), %xmm0
1062; CHECK-NEXT:    movups %xmm0, (%r15)
1063; CHECK-NEXT:    movq (%rbx), %rax
1064; CHECK-NEXT:    movq %rax, (%r14)
1065; CHECK-NEXT:    movl 8(%rbx), %eax
1066; CHECK-NEXT:    movl %eax, 8(%r14)
1067; CHECK-NEXT:    movl 12(%rbx), %eax
1068; CHECK-NEXT:    movl %eax, 12(%r14)
1069; CHECK-NEXT:    addq $8, %rsp
1070; CHECK-NEXT:    .cfi_def_cfa_offset 40
1071; CHECK-NEXT:    popq %rbx
1072; CHECK-NEXT:    .cfi_def_cfa_offset 32
1073; CHECK-NEXT:    popq %r12
1074; CHECK-NEXT:    .cfi_def_cfa_offset 24
1075; CHECK-NEXT:    popq %r14
1076; CHECK-NEXT:    .cfi_def_cfa_offset 16
1077; CHECK-NEXT:    popq %r15
1078; CHECK-NEXT:    .cfi_def_cfa_offset 8
1079; CHECK-NEXT:    retq
1080;
1081; DISABLED-LABEL: test_limit_one_pred:
1082; DISABLED:       # %bb.0: # %entry
1083; DISABLED-NEXT:    pushq %r15
1084; DISABLED-NEXT:    .cfi_def_cfa_offset 16
1085; DISABLED-NEXT:    pushq %r14
1086; DISABLED-NEXT:    .cfi_def_cfa_offset 24
1087; DISABLED-NEXT:    pushq %r12
1088; DISABLED-NEXT:    .cfi_def_cfa_offset 32
1089; DISABLED-NEXT:    pushq %rbx
1090; DISABLED-NEXT:    .cfi_def_cfa_offset 40
1091; DISABLED-NEXT:    pushq %rax
1092; DISABLED-NEXT:    .cfi_def_cfa_offset 48
1093; DISABLED-NEXT:    .cfi_offset %rbx, -40
1094; DISABLED-NEXT:    .cfi_offset %r12, -32
1095; DISABLED-NEXT:    .cfi_offset %r14, -24
1096; DISABLED-NEXT:    .cfi_offset %r15, -16
1097; DISABLED-NEXT:    movq %r8, %r15
1098; DISABLED-NEXT:    movq %rcx, %r14
1099; DISABLED-NEXT:    movq %rsi, %r12
1100; DISABLED-NEXT:    movq %rdi, %rbx
1101; DISABLED-NEXT:    movl %r9d, 12(%rdi)
1102; DISABLED-NEXT:    cmpl $18, %edx
1103; DISABLED-NEXT:    jl .LBB10_2
1104; DISABLED-NEXT:  # %bb.1: # %if.then
1105; DISABLED-NEXT:    movl %edx, 4(%rbx)
1106; DISABLED-NEXT:    movq %rbx, %rdi
1107; DISABLED-NEXT:    callq bar@PLT
1108; DISABLED-NEXT:  .LBB10_2: # %if.end
1109; DISABLED-NEXT:    movups (%r15), %xmm0
1110; DISABLED-NEXT:    movups %xmm0, (%r14)
1111; DISABLED-NEXT:    movups (%rbx), %xmm0
1112; DISABLED-NEXT:    movups %xmm0, (%r12)
1113; DISABLED-NEXT:    addq $8, %rsp
1114; DISABLED-NEXT:    .cfi_def_cfa_offset 40
1115; DISABLED-NEXT:    popq %rbx
1116; DISABLED-NEXT:    .cfi_def_cfa_offset 32
1117; DISABLED-NEXT:    popq %r12
1118; DISABLED-NEXT:    .cfi_def_cfa_offset 24
1119; DISABLED-NEXT:    popq %r14
1120; DISABLED-NEXT:    .cfi_def_cfa_offset 16
1121; DISABLED-NEXT:    popq %r15
1122; DISABLED-NEXT:    .cfi_def_cfa_offset 8
1123; DISABLED-NEXT:    retq
1124;
1125; CHECK-AVX2-LABEL: test_limit_one_pred:
1126; CHECK-AVX2:       # %bb.0: # %entry
1127; CHECK-AVX2-NEXT:    pushq %r15
1128; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
1129; CHECK-AVX2-NEXT:    pushq %r14
1130; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
1131; CHECK-AVX2-NEXT:    pushq %r12
1132; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
1133; CHECK-AVX2-NEXT:    pushq %rbx
1134; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
1135; CHECK-AVX2-NEXT:    pushq %rax
1136; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 48
1137; CHECK-AVX2-NEXT:    .cfi_offset %rbx, -40
1138; CHECK-AVX2-NEXT:    .cfi_offset %r12, -32
1139; CHECK-AVX2-NEXT:    .cfi_offset %r14, -24
1140; CHECK-AVX2-NEXT:    .cfi_offset %r15, -16
1141; CHECK-AVX2-NEXT:    movq %r8, %r12
1142; CHECK-AVX2-NEXT:    movq %rcx, %r15
1143; CHECK-AVX2-NEXT:    movq %rsi, %r14
1144; CHECK-AVX2-NEXT:    movq %rdi, %rbx
1145; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
1146; CHECK-AVX2-NEXT:    cmpl $18, %edx
1147; CHECK-AVX2-NEXT:    jl .LBB10_2
1148; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
1149; CHECK-AVX2-NEXT:    movl %edx, 4(%rbx)
1150; CHECK-AVX2-NEXT:    movq %rbx, %rdi
1151; CHECK-AVX2-NEXT:    callq bar@PLT
1152; CHECK-AVX2-NEXT:  .LBB10_2: # %if.end
1153; CHECK-AVX2-NEXT:    vmovups (%r12), %xmm0
1154; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r15)
1155; CHECK-AVX2-NEXT:    movq (%rbx), %rax
1156; CHECK-AVX2-NEXT:    movq %rax, (%r14)
1157; CHECK-AVX2-NEXT:    movl 8(%rbx), %eax
1158; CHECK-AVX2-NEXT:    movl %eax, 8(%r14)
1159; CHECK-AVX2-NEXT:    movl 12(%rbx), %eax
1160; CHECK-AVX2-NEXT:    movl %eax, 12(%r14)
1161; CHECK-AVX2-NEXT:    addq $8, %rsp
1162; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
1163; CHECK-AVX2-NEXT:    popq %rbx
1164; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
1165; CHECK-AVX2-NEXT:    popq %r12
1166; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
1167; CHECK-AVX2-NEXT:    popq %r14
1168; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
1169; CHECK-AVX2-NEXT:    popq %r15
1170; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
1171; CHECK-AVX2-NEXT:    retq
1172;
1173; CHECK-AVX512-LABEL: test_limit_one_pred:
1174; CHECK-AVX512:       # %bb.0: # %entry
1175; CHECK-AVX512-NEXT:    pushq %r15
1176; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
1177; CHECK-AVX512-NEXT:    pushq %r14
1178; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
1179; CHECK-AVX512-NEXT:    pushq %r12
1180; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
1181; CHECK-AVX512-NEXT:    pushq %rbx
1182; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
1183; CHECK-AVX512-NEXT:    pushq %rax
1184; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 48
1185; CHECK-AVX512-NEXT:    .cfi_offset %rbx, -40
1186; CHECK-AVX512-NEXT:    .cfi_offset %r12, -32
1187; CHECK-AVX512-NEXT:    .cfi_offset %r14, -24
1188; CHECK-AVX512-NEXT:    .cfi_offset %r15, -16
1189; CHECK-AVX512-NEXT:    movq %r8, %r12
1190; CHECK-AVX512-NEXT:    movq %rcx, %r15
1191; CHECK-AVX512-NEXT:    movq %rsi, %r14
1192; CHECK-AVX512-NEXT:    movq %rdi, %rbx
1193; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
1194; CHECK-AVX512-NEXT:    cmpl $18, %edx
1195; CHECK-AVX512-NEXT:    jl .LBB10_2
1196; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
1197; CHECK-AVX512-NEXT:    movl %edx, 4(%rbx)
1198; CHECK-AVX512-NEXT:    movq %rbx, %rdi
1199; CHECK-AVX512-NEXT:    callq bar@PLT
1200; CHECK-AVX512-NEXT:  .LBB10_2: # %if.end
1201; CHECK-AVX512-NEXT:    vmovups (%r12), %xmm0
1202; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r15)
1203; CHECK-AVX512-NEXT:    movq (%rbx), %rax
1204; CHECK-AVX512-NEXT:    movq %rax, (%r14)
1205; CHECK-AVX512-NEXT:    movl 8(%rbx), %eax
1206; CHECK-AVX512-NEXT:    movl %eax, 8(%r14)
1207; CHECK-AVX512-NEXT:    movl 12(%rbx), %eax
1208; CHECK-AVX512-NEXT:    movl %eax, 12(%r14)
1209; CHECK-AVX512-NEXT:    addq $8, %rsp
1210; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
1211; CHECK-AVX512-NEXT:    popq %rbx
1212; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
1213; CHECK-AVX512-NEXT:    popq %r12
1214; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
1215; CHECK-AVX512-NEXT:    popq %r14
1216; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
1217; CHECK-AVX512-NEXT:    popq %r15
1218; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
1219; CHECK-AVX512-NEXT:    retq
1220entry:
1221  %d = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 3
1222  store i32 %x2, i32* %d, align 4
1223  %cmp = icmp sgt i32 %x, 17
1224  br i1 %cmp, label %if.then, label %if.end
1225
1226if.then:                                          ; preds = %entry
1227  %b = getelementptr inbounds %struct.S, %struct.S* %s1, i64 0, i32 1
1228  store i32 %x, i32* %b, align 4
1229  tail call void @bar(%struct.S* nonnull %s1) #3
1230  br label %if.end
1231
1232if.end:                                           ; preds = %if.then, %entry
1233  %0 = bitcast %struct.S* %s3 to i8*
1234  %1 = bitcast %struct.S* %s4 to i8*
1235  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 4, i1 false)
1236  %2 = bitcast %struct.S* %s2 to i8*
1237  %3 = bitcast %struct.S* %s1 to i8*
1238  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 16, i32 4, i1 false)
1239  ret void
1240}
1241
1242
1243declare void @bar(%struct.S*) local_unnamed_addr #1
1244
1245
1246%struct.S7 = type { float, float, float , float, float, float, float, float }
1247
1248; Function Attrs: nounwind uwtable
1249define void @test_conditional_block_float(%struct.S7* nocapture noalias %s1, %struct.S7* nocapture %s2, i32 %x, %struct.S7* nocapture %s3, %struct.S7* nocapture readonly %s4, float %y) local_unnamed_addr #0 {
1250; CHECK-LABEL: test_conditional_block_float:
1251; CHECK:       # %bb.0: # %entry
1252; CHECK-NEXT:    cmpl $18, %edx
1253; CHECK-NEXT:    jl .LBB11_2
1254; CHECK-NEXT:  # %bb.1: # %if.then
1255; CHECK-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
1256; CHECK-NEXT:  .LBB11_2: # %if.end
1257; CHECK-NEXT:    movups (%r8), %xmm0
1258; CHECK-NEXT:    movups 16(%r8), %xmm1
1259; CHECK-NEXT:    movups %xmm1, 16(%rcx)
1260; CHECK-NEXT:    movups %xmm0, (%rcx)
1261; CHECK-NEXT:    movl (%rdi), %eax
1262; CHECK-NEXT:    movl 4(%rdi), %ecx
1263; CHECK-NEXT:    movq 8(%rdi), %rdx
1264; CHECK-NEXT:    movups 16(%rdi), %xmm0
1265; CHECK-NEXT:    movups %xmm0, 16(%rsi)
1266; CHECK-NEXT:    movl %eax, (%rsi)
1267; CHECK-NEXT:    movl %ecx, 4(%rsi)
1268; CHECK-NEXT:    movq %rdx, 8(%rsi)
1269; CHECK-NEXT:    retq
1270;
1271; DISABLED-LABEL: test_conditional_block_float:
1272; DISABLED:       # %bb.0: # %entry
1273; DISABLED-NEXT:    cmpl $18, %edx
1274; DISABLED-NEXT:    jl .LBB11_2
1275; DISABLED-NEXT:  # %bb.1: # %if.then
1276; DISABLED-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
1277; DISABLED-NEXT:  .LBB11_2: # %if.end
1278; DISABLED-NEXT:    movups (%r8), %xmm0
1279; DISABLED-NEXT:    movups 16(%r8), %xmm1
1280; DISABLED-NEXT:    movups %xmm1, 16(%rcx)
1281; DISABLED-NEXT:    movups %xmm0, (%rcx)
1282; DISABLED-NEXT:    movups (%rdi), %xmm0
1283; DISABLED-NEXT:    movups 16(%rdi), %xmm1
1284; DISABLED-NEXT:    movups %xmm1, 16(%rsi)
1285; DISABLED-NEXT:    movups %xmm0, (%rsi)
1286; DISABLED-NEXT:    retq
1287;
1288; CHECK-AVX2-LABEL: test_conditional_block_float:
1289; CHECK-AVX2:       # %bb.0: # %entry
1290; CHECK-AVX2-NEXT:    cmpl $18, %edx
1291; CHECK-AVX2-NEXT:    jl .LBB11_2
1292; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
1293; CHECK-AVX2-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
1294; CHECK-AVX2-NEXT:  .LBB11_2: # %if.end
1295; CHECK-AVX2-NEXT:    vmovups (%r8), %ymm0
1296; CHECK-AVX2-NEXT:    vmovups %ymm0, (%rcx)
1297; CHECK-AVX2-NEXT:    movl (%rdi), %eax
1298; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
1299; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
1300; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
1301; CHECK-AVX2-NEXT:    vmovups 8(%rdi), %xmm0
1302; CHECK-AVX2-NEXT:    vmovups %xmm0, 8(%rsi)
1303; CHECK-AVX2-NEXT:    movq 24(%rdi), %rax
1304; CHECK-AVX2-NEXT:    movq %rax, 24(%rsi)
1305; CHECK-AVX2-NEXT:    vzeroupper
1306; CHECK-AVX2-NEXT:    retq
1307;
1308; CHECK-AVX512-LABEL: test_conditional_block_float:
1309; CHECK-AVX512:       # %bb.0: # %entry
1310; CHECK-AVX512-NEXT:    cmpl $18, %edx
1311; CHECK-AVX512-NEXT:    jl .LBB11_2
1312; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
1313; CHECK-AVX512-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
1314; CHECK-AVX512-NEXT:  .LBB11_2: # %if.end
1315; CHECK-AVX512-NEXT:    vmovups (%r8), %ymm0
1316; CHECK-AVX512-NEXT:    vmovups %ymm0, (%rcx)
1317; CHECK-AVX512-NEXT:    movl (%rdi), %eax
1318; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
1319; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
1320; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
1321; CHECK-AVX512-NEXT:    vmovups 8(%rdi), %xmm0
1322; CHECK-AVX512-NEXT:    vmovups %xmm0, 8(%rsi)
1323; CHECK-AVX512-NEXT:    movq 24(%rdi), %rax
1324; CHECK-AVX512-NEXT:    movq %rax, 24(%rsi)
1325; CHECK-AVX512-NEXT:    vzeroupper
1326; CHECK-AVX512-NEXT:    retq
1327entry:
1328  %cmp = icmp sgt i32 %x, 17
1329  br i1 %cmp, label %if.then, label %if.end
1330
1331if.then:                                          ; preds = %entry
1332  %b = getelementptr inbounds %struct.S7, %struct.S7* %s1, i64 0, i32 1
1333  store float 1.0, float* %b, align 4
1334  br label %if.end
1335
1336if.end:                                           ; preds = %if.then, %entry
1337  %0 = bitcast %struct.S7* %s3 to i8*
1338  %1 = bitcast %struct.S7* %s4 to i8*
1339  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
1340  %2 = bitcast %struct.S7* %s2 to i8*
1341  %3 = bitcast %struct.S7* %s1 to i8*
1342  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
1343  ret void
1344}
1345
1346%struct.S8 = type { i64, i64, i64, i64, i64, i64 }
1347
1348; Function Attrs: nounwind uwtable
1349define void @test_conditional_block_ymm(%struct.S8* nocapture noalias %s1, %struct.S8* nocapture %s2, i32 %x, %struct.S8* nocapture %s3, %struct.S8* nocapture readonly %s4) local_unnamed_addr #0 {
1350; CHECK-LABEL: test_conditional_block_ymm:
1351; CHECK:       # %bb.0: # %entry
1352; CHECK-NEXT:    cmpl $18, %edx
1353; CHECK-NEXT:    jl .LBB12_2
1354; CHECK-NEXT:  # %bb.1: # %if.then
1355; CHECK-NEXT:    movq $1, 8(%rdi)
1356; CHECK-NEXT:  .LBB12_2: # %if.end
1357; CHECK-NEXT:    movups (%r8), %xmm0
1358; CHECK-NEXT:    movups 16(%r8), %xmm1
1359; CHECK-NEXT:    movups %xmm1, 16(%rcx)
1360; CHECK-NEXT:    movups %xmm0, (%rcx)
1361; CHECK-NEXT:    movq (%rdi), %rax
1362; CHECK-NEXT:    movq 8(%rdi), %rcx
1363; CHECK-NEXT:    movups 16(%rdi), %xmm0
1364; CHECK-NEXT:    movups %xmm0, 16(%rsi)
1365; CHECK-NEXT:    movq %rax, (%rsi)
1366; CHECK-NEXT:    movq %rcx, 8(%rsi)
1367; CHECK-NEXT:    retq
1368;
1369; DISABLED-LABEL: test_conditional_block_ymm:
1370; DISABLED:       # %bb.0: # %entry
1371; DISABLED-NEXT:    cmpl $18, %edx
1372; DISABLED-NEXT:    jl .LBB12_2
1373; DISABLED-NEXT:  # %bb.1: # %if.then
1374; DISABLED-NEXT:    movq $1, 8(%rdi)
1375; DISABLED-NEXT:  .LBB12_2: # %if.end
1376; DISABLED-NEXT:    movups (%r8), %xmm0
1377; DISABLED-NEXT:    movups 16(%r8), %xmm1
1378; DISABLED-NEXT:    movups %xmm1, 16(%rcx)
1379; DISABLED-NEXT:    movups %xmm0, (%rcx)
1380; DISABLED-NEXT:    movups (%rdi), %xmm0
1381; DISABLED-NEXT:    movups 16(%rdi), %xmm1
1382; DISABLED-NEXT:    movups %xmm1, 16(%rsi)
1383; DISABLED-NEXT:    movups %xmm0, (%rsi)
1384; DISABLED-NEXT:    retq
1385;
1386; CHECK-AVX2-LABEL: test_conditional_block_ymm:
1387; CHECK-AVX2:       # %bb.0: # %entry
1388; CHECK-AVX2-NEXT:    cmpl $18, %edx
1389; CHECK-AVX2-NEXT:    jl .LBB12_2
1390; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
1391; CHECK-AVX2-NEXT:    movq $1, 8(%rdi)
1392; CHECK-AVX2-NEXT:  .LBB12_2: # %if.end
1393; CHECK-AVX2-NEXT:    vmovups (%r8), %ymm0
1394; CHECK-AVX2-NEXT:    vmovups %ymm0, (%rcx)
1395; CHECK-AVX2-NEXT:    movq (%rdi), %rax
1396; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
1397; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
1398; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
1399; CHECK-AVX2-NEXT:    vmovups 16(%rdi), %xmm0
1400; CHECK-AVX2-NEXT:    vmovups %xmm0, 16(%rsi)
1401; CHECK-AVX2-NEXT:    vzeroupper
1402; CHECK-AVX2-NEXT:    retq
1403;
1404; CHECK-AVX512-LABEL: test_conditional_block_ymm:
1405; CHECK-AVX512:       # %bb.0: # %entry
1406; CHECK-AVX512-NEXT:    cmpl $18, %edx
1407; CHECK-AVX512-NEXT:    jl .LBB12_2
1408; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
1409; CHECK-AVX512-NEXT:    movq $1, 8(%rdi)
1410; CHECK-AVX512-NEXT:  .LBB12_2: # %if.end
1411; CHECK-AVX512-NEXT:    vmovups (%r8), %ymm0
1412; CHECK-AVX512-NEXT:    vmovups %ymm0, (%rcx)
1413; CHECK-AVX512-NEXT:    movq (%rdi), %rax
1414; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
1415; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
1416; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
1417; CHECK-AVX512-NEXT:    vmovups 16(%rdi), %xmm0
1418; CHECK-AVX512-NEXT:    vmovups %xmm0, 16(%rsi)
1419; CHECK-AVX512-NEXT:    vzeroupper
1420; CHECK-AVX512-NEXT:    retq
1421entry:
1422  %cmp = icmp sgt i32 %x, 17
1423  br i1 %cmp, label %if.then, label %if.end
1424
1425if.then:                                          ; preds = %entry
1426  %b = getelementptr inbounds %struct.S8, %struct.S8* %s1, i64 0, i32 1
1427  store i64 1, i64* %b, align 4
1428  br label %if.end
1429
1430if.end:                                           ; preds = %if.then, %entry
1431  %0 = bitcast %struct.S8* %s3 to i8*
1432  %1 = bitcast %struct.S8* %s4 to i8*
1433  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 4, i1 false)
1434  %2 = bitcast %struct.S8* %s2 to i8*
1435  %3 = bitcast %struct.S8* %s1 to i8*
1436  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 32, i32 4, i1 false)
1437  ret void
1438}
1439
1440define dso_local void @test_alias(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
1441; CHECK-LABEL: test_alias:
1442; CHECK:       # %bb.0: # %entry
1443; CHECK-NEXT:    movl %esi, (%rdi)
1444; CHECK-NEXT:    movups (%rdi), %xmm0
1445; CHECK-NEXT:    movups %xmm0, 4(%rdi)
1446; CHECK-NEXT:    retq
1447;
1448; DISABLED-LABEL: test_alias:
1449; DISABLED:       # %bb.0: # %entry
1450; DISABLED-NEXT:    movl %esi, (%rdi)
1451; DISABLED-NEXT:    movups (%rdi), %xmm0
1452; DISABLED-NEXT:    movups %xmm0, 4(%rdi)
1453; DISABLED-NEXT:    retq
1454;
1455; CHECK-AVX2-LABEL: test_alias:
1456; CHECK-AVX2:       # %bb.0: # %entry
1457; CHECK-AVX2-NEXT:    movl %esi, (%rdi)
1458; CHECK-AVX2-NEXT:    vmovups (%rdi), %xmm0
1459; CHECK-AVX2-NEXT:    vmovups %xmm0, 4(%rdi)
1460; CHECK-AVX2-NEXT:    retq
1461;
1462; CHECK-AVX512-LABEL: test_alias:
1463; CHECK-AVX512:       # %bb.0: # %entry
1464; CHECK-AVX512-NEXT:    movl %esi, (%rdi)
1465; CHECK-AVX512-NEXT:    vmovups (%rdi), %xmm0
1466; CHECK-AVX512-NEXT:    vmovups %xmm0, 4(%rdi)
1467; CHECK-AVX512-NEXT:    retq
1468entry:
1469  %a = bitcast i8* %A to i32*
1470  store i32 %x, i32* %a, align 4
1471  %add.ptr = getelementptr inbounds i8, i8* %A, i64 4
1472  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr, i8* align 4 %A, i64 16, i32 4, i1 false)
1473  ret void
1474}
1475
1476; Function Attrs: nounwind uwtable
1477define dso_local void @test_noalias(i8* nocapture %A, i32 %x) local_unnamed_addr #0 {
1478; CHECK-LABEL: test_noalias:
1479; CHECK:       # %bb.0: # %entry
1480; CHECK-NEXT:    movl %esi, (%rdi)
1481; CHECK-NEXT:    movl (%rdi), %eax
1482; CHECK-NEXT:    movl %eax, 20(%rdi)
1483; CHECK-NEXT:    movq 4(%rdi), %rax
1484; CHECK-NEXT:    movq %rax, 24(%rdi)
1485; CHECK-NEXT:    movl 12(%rdi), %eax
1486; CHECK-NEXT:    movl %eax, 32(%rdi)
1487; CHECK-NEXT:    retq
1488;
1489; DISABLED-LABEL: test_noalias:
1490; DISABLED:       # %bb.0: # %entry
1491; DISABLED-NEXT:    movl %esi, (%rdi)
1492; DISABLED-NEXT:    movups (%rdi), %xmm0
1493; DISABLED-NEXT:    movups %xmm0, 20(%rdi)
1494; DISABLED-NEXT:    retq
1495;
1496; CHECK-AVX2-LABEL: test_noalias:
1497; CHECK-AVX2:       # %bb.0: # %entry
1498; CHECK-AVX2-NEXT:    movl %esi, (%rdi)
1499; CHECK-AVX2-NEXT:    movl (%rdi), %eax
1500; CHECK-AVX2-NEXT:    movl %eax, 20(%rdi)
1501; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
1502; CHECK-AVX2-NEXT:    movq %rax, 24(%rdi)
1503; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
1504; CHECK-AVX2-NEXT:    movl %eax, 32(%rdi)
1505; CHECK-AVX2-NEXT:    retq
1506;
1507; CHECK-AVX512-LABEL: test_noalias:
1508; CHECK-AVX512:       # %bb.0: # %entry
1509; CHECK-AVX512-NEXT:    movl %esi, (%rdi)
1510; CHECK-AVX512-NEXT:    movl (%rdi), %eax
1511; CHECK-AVX512-NEXT:    movl %eax, 20(%rdi)
1512; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
1513; CHECK-AVX512-NEXT:    movq %rax, 24(%rdi)
1514; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
1515; CHECK-AVX512-NEXT:    movl %eax, 32(%rdi)
1516; CHECK-AVX512-NEXT:    retq
1517entry:
1518  %a = bitcast i8* %A to i32*
1519  store i32 %x, i32* %a, align 4
1520  %add.ptr = getelementptr inbounds i8, i8* %A, i64 20
1521  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr, i8* align 4 %A, i64 16, i32 4, i1 false)
1522  ret void
1523}
1524
1525; Function Attrs: argmemonly nounwind
1526declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1
1527
1528attributes #0 = { nounwind uwtable }
1529