1; REQUIRES: asserts
2
3; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -debug -disable-output %s 2>&1 | FileCheck %s
4
5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6
7@a = common global [2048 x i32] zeroinitializer, align 16
8@b = common global [2048 x i32] zeroinitializer, align 16
9@c = common global [2048 x i32] zeroinitializer, align 16
10
11
12; CHECK-LABEL: LV: Checking a loop in "sink1"
13; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
14; CHECK-NEXT: loop:
15; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
16; CHECK-NEXT:   EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
17; CHECK-NEXT: Successor(s): loop.0
18
19; CHECK:      loop.0:
20; CHECK-NEXT: Successor(s): pred.store
21
22; CHECK:      <xVFxUF> pred.store: {
23; CHECK-NEXT:   pred.store.entry:
24; CHECK-NEXT:     BRANCH-ON-MASK vp<%2>
25; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
26; CHECK-NEXT:   CondBit: vp<%2> (loop)
27
28; CHECK:      pred.store.if:
29; CHECK-NEXT:     REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
30; CHECK-NEXT:     REPLICATE ir<%lv.b> = load ir<%gep.b>
31; CHECK-NEXT:     REPLICATE ir<%add> = add ir<%lv.b>, ir<10>
32; CHECK-NEXT:     REPLICATE ir<%mul> = mul ir<2>, ir<%add>
33; CHECK-NEXT:     REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
34; CHECK-NEXT:     REPLICATE store ir<%mul>, ir<%gep.a>
35; CHECK-NEXT:   Successor(s): pred.store.continue
36
37; CHECK:      pred.store.continue:
38; CHECK-NEXT:   PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.b>
39; CHECK-NEXT:   No successors
40; CHECK-NEXT: }
41
42; CHECK:      loop.1:
43; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
44; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
45; CHECK-NEXT: No successors
46; CHECK-NEXT: }
47;
48define void @sink1(i32 %k) {
49entry:
50  br label %loop
51
52loop:
53  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
54  %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %iv
55  %lv.b  = load i32, i32* %gep.b, align 4
56  %add = add i32 %lv.b, 10
57  %mul = mul i32 2, %add
58  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %iv
59  store i32 %mul, i32* %gep.a, align 4
60  %iv.next = add i32 %iv, 1
61  %large = icmp sge i32 %iv, 8
62  %exitcond = icmp eq i32 %iv, %k
63  %realexit = or i1 %large, %exitcond
64  br i1 %realexit, label %exit, label %loop
65
66exit:
67  ret void
68}
69
70; CHECK-LABEL: LV: Checking a loop in "sink2"
71; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
72; CHECK-NEXT: loop:
73; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
74; CHECK-NEXT:   EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
75; CHECK-NEXT: Successor(s): pred.load
76
77; CHECK:      <xVFxUF> pred.load: {
78; CHECK-NEXT:   pred.load.entry:
79; CHECK-NEXT:     BRANCH-ON-MASK vp<%2>
80; CHECK-NEXT:   Successor(s): pred.load.if, pred.load.continue
81; CHECK-NEXT:   CondBit: vp<%2> (loop)
82
83; CHECK:      pred.load.if:
84; CHECK-NEXT:     REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
85; CHECK-NEXT:     REPLICATE ir<%lv.b> = load ir<%gep.b>
86; CHECK-NEXT:   Successor(s): pred.load.continue
87
88; CHECK:      pred.load.continue:
89; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.b>
90; CHECK-NEXT:   No successors
91; CHECK-NEXT: }
92
93; CHECK:      loop.0:
94; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%iv>, ir<2>
95; CHECK-NEXT: Successor(s): pred.store
96
97; CHECK:      <xVFxUF> pred.store: {
98; CHECK-NEXT:   pred.store.entry:
99; CHECK-NEXT:     BRANCH-ON-MASK vp<%2>
100; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
101; CHECK-NEXT:   CondBit: vp<%2> (loop)
102
103; CHECK:       pred.store.if:
104; CHECK-NEXT:     REPLICATE ir<%add> = add vp<%5>, ir<10>
105; CHECK-NEXT:     REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul>
106; CHECK-NEXT:     REPLICATE store ir<%add>, ir<%gep.a>
107; CHECK-NEXT:   Successor(s): pred.store.continue
108
109; CHECK:      pred.store.continue:
110; CHECK-NEXT:   No successors
111; CHECK-NEXT: }
112
113; CHECK:       loop.1:
114; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
115; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
116; CHECK-NEXT: No successors
117; CHECK-NEXT: }
118;
119define void @sink2(i32 %k) {
120entry:
121  br label %loop
122
123loop:
124  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
125  %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %iv
126  %lv.b  = load i32, i32* %gep.b, align 4
127  %add = add i32 %lv.b, 10
128  %mul = mul i32 %iv, 2
129  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %mul
130  store i32 %add, i32* %gep.a, align 4
131  %iv.next = add i32 %iv, 1
132  %large = icmp sge i32 %iv, 8
133  %exitcond = icmp eq i32 %iv, %k
134  %realexit = or i1 %large, %exitcond
135  br i1 %realexit, label %exit, label %loop
136
137exit:
138  ret void
139}
140
141; CHECK-LABEL: LV: Checking a loop in "sink3"
142; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
143; CHECK-NEXT: loop:
144; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
145; CHECK-NEXT:   EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
146; CHECK-NEXT: Successor(s): pred.load
147
148; CHECK:      <xVFxUF> pred.load: {
149; CHECK-NEXT:   pred.load.entry:
150; CHECK-NEXT:     BRANCH-ON-MASK vp<%2>
151; CHECK-NEXT:   Successor(s): pred.load.if, pred.load.continue
152; CHECK-NEXT:   CondBit: vp<%2> (loop)
153
154; CHECK:       pred.load.if:
155; CHECK-NEXT:     REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
156; CHECK-NEXT:     REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
157; CHECK-NEXT:   Successor(s): pred.load.continue
158
159; CHECK:       pred.load.continue:
160; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%5> = ir<%lv.b>
161; CHECK-NEXT:   No successors
162; CHECK-NEXT: }
163
164; CHECK:      loop.0:
165; CHECK-NEXT:   WIDEN ir<%add> = add vp<%5>, ir<10>
166; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%iv>, ir<%add>
167; CHECK-NEXT: Successor(s): pred.store
168
169; CHECK:      <xVFxUF> pred.store: {
170; CHECK-NEXT:   pred.store.entry:
171; CHECK-NEXT:     BRANCH-ON-MASK vp<%2>
172; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
173; CHECK-NEXT:   CondBit: vp<%2> (loop)
174
175; CHECK:      pred.store.if:
176; CHECK-NEXT:     REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul>
177; CHECK-NEXT:     REPLICATE store ir<%add>, ir<%gep.a>
178; CHECK-NEXT:   Successor(s): pred.store.continue
179
180; CHECK:      pred.store.continue:
181; CHECK-NEXT:   No successors
182; CHECK-NEXT: }
183
184; CHECK:      loop.1:
185; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
186; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
187; CHECK-NEXT: No successors
188; CHECK-NEXT: }
189;
190define void @sink3(i32 %k) {
191entry:
192  br label %loop
193
194loop:
195  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
196  %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %iv
197  %lv.b  = load i32, i32* %gep.b, align 4
198  %add = add i32 %lv.b, 10
199  %mul = mul i32 %iv, %add
200  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %mul
201  store i32 %add, i32* %gep.a, align 4
202  %iv.next = add i32 %iv, 1
203  %large = icmp sge i32 %iv, 8
204  %exitcond = icmp eq i32 %iv, %k
205  %realexit = or i1 %large, %exitcond
206  br i1 %realexit, label %exit, label %loop
207
208exit:
209  ret void
210}
211
212; Make sure we do not sink uniform instructions.
213define void @uniform_gep(i64 %k, i16* noalias %A, i16* noalias %B) {
214; CHECK-LABEL: LV: Checking a loop in "uniform_gep"
215; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
216; CHECK-NEXT: loop:
217; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 21, %iv.next
218; CHECK-NEXT:   EMIT vp<%2> = WIDEN-CANONICAL-INDUCTION
219; CHECK-NEXT:   EMIT vp<%3> = icmp ule vp<%2> vp<%0>
220; CHECK-NEXT:   CLONE ir<%gep.A.uniform> = getelementptr ir<%A>, ir<0>
221; CHECK-NEXT: Successor(s): pred.load
222; CHECK-EMPTY:
223; CHECK-NEXT: <xVFxUF> pred.load: {
224; CHECK-NEXT:   pred.load.entry:
225; CHECK-NEXT:     BRANCH-ON-MASK vp<%3>
226; CHECK-NEXT:   Successor(s): pred.load.if, pred.load.continue
227; CHECK-NEXT:   CondBit: vp<%3> (loop)
228; CHECK-EMPTY:
229; CHECK-NEXT:   pred.load.if:
230; CHECK-NEXT:     REPLICATE ir<%lv> = load ir<%gep.A.uniform>
231; CHECK-NEXT:   Successor(s): pred.load.continue
232; CHECK-EMPTY:
233; CHECK-NEXT:   pred.load.continue:
234; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%lv>
235; CHECK-NEXT:   No successors
236; CHECK-NEXT: }
237; CHECK-NEXT: Successor(s): loop.0
238; CHECK-EMPTY:
239; CHECK-NEXT: loop.0:
240; CHECK-NEXT:   WIDEN ir<%cmp> = icmp ir<%iv>, ir<%k>
241; CHECK-NEXT: Successor(s): loop.then
242; CHECK-EMPTY:
243; CHECK-NEXT: loop.then:
244; CHECK-NEXT:   EMIT vp<%8> = not ir<%cmp>
245; CHECK-NEXT:   EMIT vp<%9> = select vp<%3> vp<%8> ir<false>
246; CHECK-NEXT: Successor(s): pred.store
247; CHECK-EMPTY:
248; CHECK-NEXT: <xVFxUF> pred.store: {
249; CHECK-NEXT:   pred.store.entry:
250; CHECK-NEXT:     BRANCH-ON-MASK vp<%9>
251; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
252; CHECK-NEXT:   CondBit: vp<%9> (loop.then)
253; CHECK-EMPTY:
254; CHECK-NEXT:   pred.store.if:
255; CHECK-NEXT:     REPLICATE ir<%gep.B> = getelementptr ir<%B>, ir<%iv>
256; CHECK-NEXT:     REPLICATE store vp<%6>, ir<%gep.B>
257; CHECK-NEXT:   Successor(s): pred.store.continue
258; CHECK-EMPTY:
259; CHECK-NEXT:   pred.store.continue:
260; CHECK-NEXT:   No successors
261; CHECK-NEXT: }
262; CHECK-NEXT: Successor(s): loop.then.0
263; CHECK-EMPTY:
264; CHECK-NEXT: loop.then.0:
265; CHECK-NEXT: Successor(s): loop.latch
266; CHECK-EMPTY:
267; CHECK-NEXT: loop.latch:
268; CHECK-NEXT: No successors
269; CHECK-NEXT: }
270;
271entry:
272  br label %loop
273
274loop:
275  %iv = phi i64 [ 21, %entry ], [ %iv.next, %loop.latch ]
276  %gep.A.uniform = getelementptr inbounds i16, i16* %A, i64 0
277  %gep.B = getelementptr inbounds i16, i16* %B, i64 %iv
278  %lv = load i16, i16* %gep.A.uniform, align 1
279  %cmp = icmp ult i64 %iv, %k
280  br i1 %cmp, label %loop.latch, label %loop.then
281
282loop.then:
283  store i16 %lv, i16* %gep.B, align 1
284  br label %loop.latch
285
286loop.latch:
287  %iv.next = add nsw i64 %iv, 1
288  %cmp179 = icmp slt i64 %iv.next, 32
289  br i1 %cmp179, label %loop, label %exit
290exit:
291  ret void
292}
293
294; Loop with predicated load.
295define void @pred_cfg1(i32 %k, i32 %j) {
296; CHECK-LABEL: LV: Checking a loop in "pred_cfg1"
297; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
298; CHECK-NEXT: loop:
299; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
300; CHECK-NEXT:   WIDEN ir<%c.1> = icmp ir<%iv>, ir<%j>
301; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%iv>, ir<10>
302; CHECK-NEXT: Successor(s): then.0
303; CHECK-EMPTY:
304; CHECK-NEXT: then.0:
305; CHECK-NEXT:   EMIT vp<%4> = icmp ule ir<%iv> vp<%0>
306; CHECK-NEXT:   EMIT vp<%5> = select vp<%4> ir<%c.1> ir<false>
307; CHECK-NEXT: Successor(s): pred.load
308; CHECK-EMPTY:
309; CHECK-NEXT: <xVFxUF> pred.load: {
310; CHECK-NEXT:   pred.load.entry:
311; CHECK-NEXT:     BRANCH-ON-MASK vp<%5>
312; CHECK-NEXT:   Successor(s): pred.load.if, pred.load.continue
313; CHECK-NEXT:   CondBit: vp<%5> (then.0)
314; CHECK-EMPTY:
315; CHECK-NEXT:   pred.load.if:
316; CHECK-NEXT:     REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
317; CHECK-NEXT:     REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
318; CHECK-NEXT:   Successor(s): pred.load.continue
319; CHECK-EMPTY:
320; CHECK-NEXT:   pred.load.continue:
321; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%lv.b>
322; CHECK-NEXT:   No successors
323; CHECK-NEXT: }
324; CHECK-NEXT: Successor(s): then.0.0
325; CHECK-EMPTY:
326; CHECK-NEXT: then.0.0:
327; CHECK-NEXT: Successor(s): next.0
328; CHECK-EMPTY:
329; CHECK-NEXT: next.0:
330; CHECK-NEXT:   EMIT vp<%9> = not ir<%c.1>
331; CHECK-NEXT:   EMIT vp<%10> = select vp<%4> vp<%9> ir<false>
332; CHECK-NEXT:   BLEND %p = ir<0>/vp<%10> vp<%8>/vp<%5>
333; CHECK-NEXT:   EMIT vp<%12> = or vp<%5> vp<%10>
334; CHECK-NEXT: Successor(s): pred.store
335; CHECK-EMPTY:
336; CHECK-NEXT: <xVFxUF> pred.store: {
337; CHECK-NEXT:   pred.store.entry:
338; CHECK-NEXT:     BRANCH-ON-MASK vp<%12>
339; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
340; CHECK-NEXT:   CondBit: vp<%12> (next.0)
341; CHECK-EMPTY:
342; CHECK-NEXT:   pred.store.if:
343; CHECK-NEXT:     REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul>
344; CHECK-NEXT:     REPLICATE store ir<%p>, ir<%gep.a>
345; CHECK-NEXT:   Successor(s): pred.store.continue
346; CHECK-EMPTY:
347; CHECK-NEXT:   pred.store.continue:
348; CHECK-NEXT:   No successors
349; CHECK-NEXT: }
350; CHECK-NEXT: Successor(s): next.0.0
351; CHECK-EMPTY:
352; CHECK-NEXT: next.0.0:
353; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
354; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
355; CHECK-NEXT: No successors
356; CHECK-NEXT: }
357;
358entry:
359  br label %loop
360
361loop:
362  %iv = phi i32 [ 0, %entry ], [ %iv.next, %next.0 ]
363  %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %iv
364  %c.1 = icmp ult i32 %iv, %j
365  %mul = mul i32 %iv, 10
366  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %mul
367  br i1 %c.1, label %then.0, label %next.0
368
369then.0:
370  %lv.b  = load i32, i32* %gep.b, align 4
371  br label %next.0
372
373next.0:
374  %p = phi i32 [ 0, %loop ], [ %lv.b, %then.0 ]
375  store i32 %p, i32* %gep.a, align 4
376  %iv.next = add i32 %iv, 1
377  %large = icmp sge i32 %iv, 8
378  %exitcond = icmp eq i32 %iv, %k
379  %realexit = or i1 %large, %exitcond
380  br i1 %realexit, label %exit, label %loop
381
382exit:
383  ret void
384}
385
386; Loop with predicated load and store in separate blocks, store depends on
387; loaded value.
388define void @pred_cfg2(i32 %k, i32 %j) {
389; CHECK-LABEL: LV: Checking a loop in "pred_cfg2"
390; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
391; CHECK-NEXT: loop:
392; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
393; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%iv>, ir<10>
394; CHECK-NEXT:   WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j>
395; CHECK-NEXT:   WIDEN ir<%c.1> = icmp ir<%iv>, ir<%j>
396; CHECK-NEXT: Successor(s): then.0
397; CHECK-EMPTY:
398; CHECK-NEXT: then.0:
399; CHECK-NEXT:   EMIT vp<%5> = icmp ule ir<%iv> vp<%0>
400; CHECK-NEXT:   EMIT vp<%6> = select vp<%5> ir<%c.0> ir<false>
401; CHECK-NEXT: Successor(s): pred.load
402; CHECK-EMPTY:
403; CHECK-NEXT: <xVFxUF> pred.load: {
404; CHECK-NEXT:   pred.load.entry:
405; CHECK-NEXT:     BRANCH-ON-MASK vp<%6>
406; CHECK-NEXT:   Successor(s): pred.load.if, pred.load.continue
407; CHECK-NEXT:   CondBit: vp<%6> (then.0)
408; CHECK-EMPTY:
409; CHECK-NEXT:   pred.load.if:
410; CHECK-NEXT:     REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
411; CHECK-NEXT:     REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
412; CHECK-NEXT:   Successor(s): pred.load.continue
413; CHECK-EMPTY:
414; CHECK-NEXT:   pred.load.continue:
415; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%9> = ir<%lv.b>
416; CHECK-NEXT:   No successors
417; CHECK-NEXT: }
418; CHECK-NEXT: Successor(s): then.0.0
419; CHECK-EMPTY:
420; CHECK-NEXT: then.0.0:
421; CHECK-NEXT: Successor(s): next.0
422; CHECK-EMPTY:
423; CHECK-NEXT: next.0:
424; CHECK-NEXT:   EMIT vp<%10> = not ir<%c.0>
425; CHECK-NEXT:   EMIT vp<%11> = select vp<%5> vp<%10> ir<false>
426; CHECK-NEXT:   BLEND %p = ir<0>/vp<%11> vp<%9>/vp<%6>
427; CHECK-NEXT: Successor(s): then.1
428; CHECK-EMPTY:
429; CHECK-NEXT: then.1:
430; CHECK-NEXT:   EMIT vp<%13> = or vp<%6> vp<%11>
431; CHECK-NEXT:   EMIT vp<%14> = select vp<%13> ir<%c.1> ir<false>
432; CHECK-NEXT: Successor(s): pred.store
433; CHECK-EMPTY:
434; CHECK-NEXT: <xVFxUF> pred.store: {
435; CHECK-NEXT:   pred.store.entry:
436; CHECK-NEXT:     BRANCH-ON-MASK vp<%14>
437; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
438; CHECK-NEXT:   CondBit: vp<%14> (then.1)
439; CHECK-EMPTY:
440; CHECK-NEXT:   pred.store.if:
441; CHECK-NEXT:     REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul>
442; CHECK-NEXT:     REPLICATE store ir<%p>, ir<%gep.a>
443; CHECK-NEXT:   Successor(s): pred.store.continue
444; CHECK-EMPTY:
445; CHECK-NEXT:   pred.store.continue:
446; CHECK-NEXT:   No successors
447; CHECK-NEXT: }
448; CHECK-NEXT: Successor(s): then.1.0
449; CHECK-EMPTY:
450; CHECK-NEXT: then.1.0:
451; CHECK-NEXT: Successor(s): next.1
452; CHECK-EMPTY:
453; CHECK-NEXT: next.1:
454; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
455; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
456; CHECK-NEXT: No successors
457; CHECK-NEXT: }
458;
459entry:
460  br label %loop
461
462loop:
463  %iv = phi i32 [ 0, %entry ], [ %iv.next, %next.1 ]
464  %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %iv
465  %mul = mul i32 %iv, 10
466  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %mul
467  %c.0 = icmp ult i32 %iv, %j
468  %c.1 = icmp ugt i32 %iv, %j
469  br i1 %c.0, label %then.0, label %next.0
470
471then.0:
472  %lv.b  = load i32, i32* %gep.b, align 4
473  br label %next.0
474
475next.0:
476  %p = phi i32 [ 0, %loop ], [ %lv.b, %then.0 ]
477  br i1 %c.1, label %then.1, label %next.1
478
479then.1:
480  store i32 %p, i32* %gep.a, align 4
481  br label %next.1
482
483next.1:
484  %iv.next = add i32 %iv, 1
485  %large = icmp sge i32 %iv, 8
486  %exitcond = icmp eq i32 %iv, %k
487  %realexit = or i1 %large, %exitcond
488  br i1 %realexit, label %exit, label %loop
489
490exit:
491  ret void
492}
493
494; Loop with predicated load and store in separate blocks, store does not depend
495; on loaded value.
496define void @pred_cfg3(i32 %k, i32 %j) {
497; CHECK-LABEL: LV: Checking a loop in "pred_cfg3"
498; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
499; CHECK-NEXT: loop:
500; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
501; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%iv>, ir<10>
502; CHECK-NEXT:   WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j>
503; CHECK-NEXT: Successor(s): then.0
504; CHECK-EMPTY:
505; CHECK-NEXT: then.0:
506; CHECK-NEXT:   EMIT vp<%4> = icmp ule ir<%iv> vp<%0>
507; CHECK-NEXT:   EMIT vp<%5> = select vp<%4> ir<%c.0> ir<false>
508; CHECK-NEXT: Successor(s): pred.load
509; CHECK-EMPTY:
510; CHECK-NEXT: <xVFxUF> pred.load: {
511; CHECK-NEXT:   pred.load.entry:
512; CHECK-NEXT:     BRANCH-ON-MASK vp<%5>
513; CHECK-NEXT:   Successor(s): pred.load.if, pred.load.continue
514; CHECK-NEXT:   CondBit: vp<%5> (then.0)
515; CHECK-EMPTY:
516; CHECK-NEXT:   pred.load.if:
517; CHECK-NEXT:     REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
518; CHECK-NEXT:     REPLICATE ir<%lv.b> = load ir<%gep.b>
519; CHECK-NEXT:   Successor(s): pred.load.continue
520; CHECK-EMPTY:
521; CHECK-NEXT:   pred.load.continue:
522; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%lv.b>
523; CHECK-NEXT:   No successors
524; CHECK-NEXT: }
525; CHECK-NEXT: Successor(s): then.0.0
526; CHECK-EMPTY:
527; CHECK-NEXT: then.0.0:
528; CHECK-NEXT: Successor(s): next.0
529; CHECK-EMPTY:
530; CHECK-NEXT: next.0:
531; CHECK-NEXT: Successor(s): then.1
532; CHECK-EMPTY:
533; CHECK-NEXT: then.1:
534; CHECK-NEXT:   EMIT vp<%9> = not ir<%c.0>
535; CHECK-NEXT:   EMIT vp<%10> = select vp<%4> vp<%9> ir<false>
536; CHECK-NEXT:   EMIT vp<%11> = or vp<%5> vp<%10>
537; CHECK-NEXT:   EMIT vp<%12> = select vp<%11> ir<%c.0> ir<false>
538; CHECK-NEXT: Successor(s): pred.store
539; CHECK-EMPTY:
540; CHECK-NEXT: <xVFxUF> pred.store: {
541; CHECK-NEXT:   pred.store.entry:
542; CHECK-NEXT:     BRANCH-ON-MASK vp<%12>
543; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
544; CHECK-NEXT:   CondBit: vp<%12> (then.1)
545; CHECK-EMPTY:
546; CHECK-NEXT:   pred.store.if:
547; CHECK-NEXT:     REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%mul>
548; CHECK-NEXT:     REPLICATE store ir<0>, ir<%gep.a>
549; CHECK-NEXT:   Successor(s): pred.store.continue
550; CHECK-EMPTY:
551; CHECK-NEXT:   pred.store.continue:
552; CHECK-NEXT:   No successors
553; CHECK-NEXT: }
554; CHECK-NEXT: Successor(s): then.1.0
555; CHECK-EMPTY:
556; CHECK-NEXT: then.1.0:
557; CHECK-NEXT: Successor(s): next.1
558; CHECK-EMPTY:
559; CHECK-NEXT: next.1:
560; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
561; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
562; CHECK-NEXT: No successors
563; CHECK-NEXT: }
564;
565entry:
566  br label %loop
567
568loop:
569  %iv = phi i32 [ 0, %entry ], [ %iv.next, %next.1 ]
570  %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %iv
571  %mul = mul i32 %iv, 10
572  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %mul
573  %c.0 = icmp ult i32 %iv, %j
574  br i1 %c.0, label %then.0, label %next.0
575
576then.0:
577  %lv.b  = load i32, i32* %gep.b, align 4
578  br label %next.0
579
580next.0:
581  br i1 %c.0, label %then.1, label %next.1
582
583then.1:
584  store i32 0, i32* %gep.a, align 4
585  br label %next.1
586
587next.1:
588  %iv.next = add i32 %iv, 1
589  %large = icmp sge i32 %iv, 8
590  %exitcond = icmp eq i32 %iv, %k
591  %realexit = or i1 %large, %exitcond
592  br i1 %realexit, label %exit, label %loop
593
594exit:
595  ret void
596}
597
598define void @merge_3_replicate_region(i32 %k, i32 %j) {
599; CHECK-LABEL: LV: Checking a loop in "merge_3_replicate_region"
600; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
601; CHECK-NEXT: loop:
602; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
603; CHECK-NEXT:   EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
604; CHECK-NEXT:   REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
605; CHECK-NEXT: Successor(s): loop.0
606; CHECK-EMPTY:
607; CHECK-NEXT: loop.0:
608; CHECK-NEXT: Successor(s): loop.1
609; CHECK-EMPTY:
610; CHECK-NEXT: loop.1:
611; CHECK-NEXT: Successor(s): loop.2
612; CHECK-EMPTY:
613; CHECK-NEXT: loop.2:
614; CHECK-NEXT: Successor(s): pred.store
615; CHECK-EMPTY:
616; CHECK-NEXT: <xVFxUF> pred.store: {
617; CHECK-NEXT:   pred.store.entry:
618; CHECK-NEXT:     BRANCH-ON-MASK vp<%2>
619; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
620; CHECK-NEXT:   CondBit: vp<%2> (loop)
621; CHECK-EMPTY:
622; CHECK-NEXT:   pred.store.if:
623; CHECK-NEXT:     REPLICATE ir<%lv.a> = load ir<%gep.a>
624; CHECK-NEXT:     REPLICATE ir<%gep.b> = getelementptr ir<@b>, ir<0>, ir<%iv>
625; CHECK-NEXT:     REPLICATE ir<%lv.b> = load ir<%gep.b>
626; CHECK-NEXT:     REPLICATE ir<%gep.c> = getelementptr ir<@c>, ir<0>, ir<%iv>
627; CHECK-NEXT:     REPLICATE store ir<%lv.a>, ir<%gep.c>
628; CHECK-NEXT:     REPLICATE store ir<%lv.b>, ir<%gep.a>
629; CHECK-NEXT:   Successor(s): pred.store.continue
630; CHECK-EMPTY:
631; CHECK-NEXT:   pred.store.continue:
632; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%lv.a>
633; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%11> = ir<%lv.b>
634; CHECK-NEXT:   No successors
635; CHECK-NEXT: }
636; CHECK-NEXT: Successor(s): loop.3
637; CHECK-EMPTY:
638; CHECK-NEXT: loop.3:
639; CHECK-NEXT:   WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j>
640; CHECK-NEXT: Successor(s): then.0
641; CHECK-EMPTY:
642; CHECK-NEXT: then.0:
643; CHECK-NEXT:   WIDEN ir<%mul> = mul vp<%10>, vp<%11>
644; CHECK-NEXT:   EMIT vp<%14> = select vp<%2> ir<%c.0> ir<false>
645; CHECK-NEXT: Successor(s): pred.store
646; CHECK-EMPTY:
647; CHECK-NEXT: <xVFxUF> pred.store: {
648; CHECK-NEXT:   pred.store.entry:
649; CHECK-NEXT:     BRANCH-ON-MASK vp<%14>
650; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
651; CHECK-NEXT:   CondBit: vp<%14> (then.0)
652; CHECK-EMPTY:
653; CHECK-NEXT:   pred.store.if:
654; CHECK-NEXT:     REPLICATE ir<%gep.c.1> = getelementptr ir<@c>, ir<0>, ir<%iv>
655; CHECK-NEXT:     REPLICATE store ir<%mul>, ir<%gep.c.1>
656; CHECK-NEXT:   Successor(s): pred.store.continue
657; CHECK-EMPTY:
658; CHECK-NEXT:   pred.store.continue:
659; CHECK-NEXT:   No successors
660; CHECK-NEXT: }
661; CHECK-NEXT: Successor(s): then.0.0
662; CHECK-EMPTY:
663; CHECK-NEXT: then.0.0:
664; CHECK-NEXT: Successor(s): latch
665; CHECK-EMPTY:
666; CHECK-NEXT: latch:
667; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
668; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
669; CHECK-NEXT: No successors
670; CHECK-NEXT: }
671;
672entry:
673  br label %loop
674
675loop:
676  %iv = phi i32 [ 0, %entry ], [ %iv.next, %latch ]
677  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %iv
678  %lv.a  = load i32, i32* %gep.a, align 4
679  %gep.b = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i32 0, i32 %iv
680  %lv.b  = load i32, i32* %gep.b, align 4
681  %gep.c = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i32 0, i32 %iv
682  store i32 %lv.a, i32* %gep.c, align 4
683  store i32 %lv.b, i32* %gep.a, align 4
684  %c.0 = icmp ult i32 %iv, %j
685  br i1 %c.0, label %then.0, label %latch
686
687then.0:
688  %mul = mul i32 %lv.a, %lv.b
689  %gep.c.1 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i32 0, i32 %iv
690  store i32 %mul, i32* %gep.c.1, align 4
691  br label %latch
692
693latch:
694  %iv.next = add i32 %iv, 1
695  %large = icmp sge i32 %iv, 8
696  %exitcond = icmp eq i32 %iv, %k
697  %realexit = or i1 %large, %exitcond
698  br i1 %realexit, label %exit, label %loop
699
700exit:
701  ret void
702}
703
704
705define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
706; CHECK-LABEL: LV: Checking a loop in "update_2_uses_in_same_recipe_in_merged_block"
707; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
708; CHECK-NEXT: loop:
709; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
710; CHECK-NEXT:   EMIT vp<%2> = icmp ule ir<%iv> vp<%0>
711; CHECK-NEXT:   REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
712; CHECK-NEXT: Successor(s): loop.0
713; CHECK-EMPTY:
714; CHECK-NEXT: loop.0:
715; CHECK-NEXT: Successor(s): loop.1
716; CHECK-EMPTY:
717; CHECK-NEXT: loop.1:
718; CHECK-NEXT: Successor(s): pred.store
719; CHECK-EMPTY:
720; CHECK-NEXT: <xVFxUF> pred.store: {
721; CHECK-NEXT:   pred.store.entry:
722; CHECK-NEXT:     BRANCH-ON-MASK vp<%2>
723; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
724; CHECK-NEXT:   CondBit: vp<%2> (loop)
725; CHECK-EMPTY:
726; CHECK-NEXT:   pred.store.if:
727; CHECK-NEXT:     REPLICATE ir<%lv.a> = load ir<%gep.a>
728; CHECK-NEXT:     REPLICATE ir<%div> = sdiv ir<%lv.a>, ir<%lv.a>
729; CHECK-NEXT:     REPLICATE store ir<%div>, ir<%gep.a>
730; CHECK-NEXT:   Successor(s): pred.store.continue
731; CHECK-EMPTY:
732; CHECK-NEXT:   pred.store.continue:
733; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%lv.a>
734; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%div>
735; CHECK-NEXT:   No successors
736; CHECK-NEXT: }
737; CHECK-NEXT: Successor(s): loop.2
738; CHECK-EMPTY:
739; CHECK-NEXT: loop.2:
740; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
741; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
742; CHECK-NEXT: No successors
743; CHECK-NEXT: }
744;
745entry:
746  br label %loop
747
748loop:
749  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
750  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %iv
751  %lv.a  = load i32, i32* %gep.a, align 4
752  %div = sdiv i32 %lv.a, %lv.a
753  store i32 %div, i32* %gep.a, align 4
754  %iv.next = add i32 %iv, 1
755  %large = icmp sge i32 %iv, 8
756  %exitcond = icmp eq i32 %iv, %k
757  %realexit = or i1 %large, %exitcond
758  br i1 %realexit, label %exit, label %loop
759
760exit:
761  ret void
762}
763
764define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
765; CHECK-LABEL: LV: Checking a loop in "recipe_in_merge_candidate_used_by_first_order_recurrence"
766; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
767; CHECK-NEXT: loop:
768; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
769; CHECK-NEXT:   FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, ir<%lv.a>
770; CHECK-NEXT:   EMIT vp<%3> = icmp ule ir<%iv> vp<%0>
771; CHECK-NEXT:   REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
772; CHECK-NEXT: Successor(s): pred.load
773; CHECK-EMPTY:
774; CHECK-NEXT: <xVFxUF> pred.load: {
775; CHECK-NEXT:   pred.load.entry:
776; CHECK-NEXT:     BRANCH-ON-MASK vp<%3>
777; CHECK-NEXT:   Successor(s): pred.load.if, pred.load.continue
778; CHECK-NEXT:   CondBit: vp<%3> (loop)
779; CHECK-EMPTY:
780; CHECK-NEXT:   pred.load.if:
781; CHECK-NEXT:     REPLICATE ir<%lv.a> = load ir<%gep.a>
782; CHECK-NEXT:   Successor(s): pred.load.continue
783; CHECK-EMPTY:
784; CHECK-NEXT:   pred.load.continue:
785; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%lv.a>
786; CHECK-NEXT:   No successors
787; CHECK-NEXT: }
788; CHECK-NEXT: Successor(s): loop.0
789; CHECK-EMPTY:
790; CHECK-NEXT: loop.0:
791; CHECK-NEXT:   EMIT vp<%7> = first-order splice ir<%for> ir<%lv.a>
792; CHECK-NEXT: Successor(s): loop.1
793; CHECK-EMPTY:
794; CHECK-NEXT: loop.1:
795; CHECK-NEXT: Successor(s): pred.store
796; CHECK-EMPTY:
797; CHECK-NEXT: <xVFxUF> pred.store: {
798; CHECK-NEXT:   pred.store.entry:
799; CHECK-NEXT:     BRANCH-ON-MASK vp<%3>
800; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
801; CHECK-NEXT:   CondBit: vp<%3> (loop)
802; CHECK-EMPTY:
803; CHECK-NEXT:   pred.store.if:
804; CHECK-NEXT:     REPLICATE ir<%div> = sdiv vp<%7>, vp<%6>
805; CHECK-NEXT:     REPLICATE store ir<%div>, ir<%gep.a>
806; CHECK-NEXT:   Successor(s): pred.store.continue
807; CHECK-EMPTY:
808; CHECK-NEXT:   pred.store.continue:
809; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%10> = ir<%div>
810; CHECK-NEXT:   No successors
811; CHECK-NEXT: }
812; CHECK-NEXT: Successor(s): loop.2
813; CHECK-EMPTY:
814; CHECK-NEXT: loop.2:
815; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
816; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
817; CHECK-NEXT: No successors
818; CHECK-NEXT: }
819;
820entry:
821  br label %loop
822
823loop:
824  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
825  %for = phi i32 [ 0, %entry ], [ %lv.a, %loop ]
826  %gep.a = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i32 0, i32 %iv
827  %lv.a  = load i32, i32* %gep.a, align 4
828  %div = sdiv i32 %for, %lv.a
829  store i32 %div, i32* %gep.a, align 4
830  %iv.next = add i32 %iv, 1
831  %large = icmp sge i32 %iv, 8
832  %exitcond = icmp eq i32 %iv, %k
833  %realexit = or i1 %large, %exitcond
834  br i1 %realexit, label %exit, label %loop
835
836exit:
837  ret void
838}
839
840define void @update_multiple_users(i16* noalias %src, i8* noalias %dst, i1 %c) {
841; CHECK-LABEL: LV: Checking a loop in "update_multiple_users"
842; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
843; CHECK-NEXT: loop.header:
844; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
845; CHECK-NEXT: Successor(s): loop.then
846; CHECK-EMPTY:
847; CHECK-NEXT: loop.then:
848; CHECK-NEXT: Successor(s): loop.then.0
849; CHECK-EMPTY:
850; CHECK-NEXT: loop.then.0:
851; CHECK-NEXT: Successor(s): pred.store
852; CHECK-EMPTY:
853; CHECK-NEXT: <xVFxUF> pred.store: {
854; CHECK-NEXT:   pred.store.entry:
855; CHECK-NEXT:     BRANCH-ON-MASK ir<%c>
856; CHECK-NEXT:   Successor(s): pred.store.if, pred.store.continue
857; CHECK-NEXT:   CondBit: ir<%c>
858; CHECK-EMPTY:
859; CHECK-NEXT:   pred.store.if:
860; CHECK-NEXT:     REPLICATE ir<%l1> = load ir<%src>
861; CHECK-NEXT:     REPLICATE ir<%cmp> = icmp ir<%l1>, ir<0>
862; CHECK-NEXT:     REPLICATE ir<%l2> = trunc ir<%l1>
863; CHECK-NEXT:     REPLICATE ir<%sel> = select ir<%cmp>, ir<5>, ir<%l2>
864; CHECK-NEXT:     REPLICATE store ir<%sel>, ir<%dst>
865; CHECK-NEXT:   Successor(s): pred.store.continue
866; CHECK-EMPTY:
867; CHECK-NEXT:   pred.store.continue:
868; CHECK-NEXT:     PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%l1>
869; CHECK-NEXT:   No successors
870; CHECK-NEXT: }
871; CHECK-NEXT: Successor(s): loop.then.1
872; CHECK-EMPTY:
873; CHECK-NEXT: loop.then.1:
874; CHECK-NEXT:   WIDEN ir<%sext.l1> = sext vp<%6>
875; CHECK-NEXT: Successor(s): loop.latch
876; CHECK-EMPTY:
877; CHECK-NEXT: loop.latch:
878; CHECK-NEXT: No successors
879; CHECK-NEXT: }
880;
881entry:
882  br label %loop.header
883
884loop.header:
885  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
886  br i1 %c, label %loop.then, label %loop.latch
887
888loop.then:
889  %l1 = load i16, i16* %src, align 2
890  %l2 = trunc i16 %l1 to i8
891  %cmp = icmp eq i16 %l1, 0
892  %sel = select i1 %cmp, i8 5, i8 %l2
893  store i8 %sel, i8* %dst, align 1
894  %sext.l1 = sext i16 %l1 to i32
895  br label %loop.latch
896
897loop.latch:
898  %iv.next = add nsw i64 %iv, 1
899  %ec = icmp eq i64 %iv.next, 999
900  br i1 %ec, label %exit, label %loop.header
901
902exit:
903  ret void
904}
905