1; RUN: llc -O3 -mtriple=x86_64-apple-macosx -o - < %s -mattr=+avx2 -enable-unsafe-fp-math -mcpu=core2 | FileCheck %s
2; Check that the ExeDepsFix pass correctly fixes the domain for broadcast instructions.
3; <rdar://problem/16354675>
4
5; CHECK-LABEL: ExeDepsFix_broadcastss
6; CHECK: broadcastss
7; CHECK: vandps
8; CHECK: vmaxps
9; CHECK: ret
10define <4 x float> @ExeDepsFix_broadcastss(<4 x float> %arg, <4 x float> %arg2) {
11  %bitcast = bitcast <4 x float> %arg to <4 x i32>
12  %and = and <4 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
13  %floatcast = bitcast <4 x i32> %and to <4 x float>
14  %max_is_x = fcmp oge <4 x float> %floatcast, %arg2
15  %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
16  ret <4 x float> %max
17}
18
19; CHECK-LABEL: ExeDepsFix_broadcastss256
20; CHECK: broadcastss
21; CHECK: vandps
22; CHECK: vmaxps
23; CHECK: ret
24define <8 x float> @ExeDepsFix_broadcastss256(<8 x float> %arg, <8 x float> %arg2) {
25  %bitcast = bitcast <8 x float> %arg to <8 x i32>
26  %and = and <8 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
27  %floatcast = bitcast <8 x i32> %and to <8 x float>
28  %max_is_x = fcmp oge <8 x float> %floatcast, %arg2
29  %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
30  ret <8 x float> %max
31}
32
33
34; CHECK-LABEL: ExeDepsFix_broadcastss_inreg
35; CHECK: broadcastss
36; CHECK: vandps
37; CHECK: vmaxps
38; CHECK: ret
39define <4 x float> @ExeDepsFix_broadcastss_inreg(<4 x float> %arg, <4 x float> %arg2, i32 %broadcastvalue) {
40  %bitcast = bitcast <4 x float> %arg to <4 x i32>
41  %in = insertelement <4 x i32> undef, i32 %broadcastvalue, i32 0
42  %mask = shufflevector <4 x i32> %in, <4 x i32> undef, <4 x i32> zeroinitializer
43  %and = and <4 x i32> %bitcast, %mask
44  %floatcast = bitcast <4 x i32> %and to <4 x float>
45  %max_is_x = fcmp oge <4 x float> %floatcast, %arg2
46  %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
47  ret <4 x float> %max
48}
49
50; CHECK-LABEL: ExeDepsFix_broadcastss256_inreg
51; CHECK: broadcastss
52; CHECK: vandps
53; CHECK: vmaxps
54; CHECK: ret
55define <8 x float> @ExeDepsFix_broadcastss256_inreg(<8 x float> %arg, <8 x float> %arg2, i32 %broadcastvalue) {
56  %bitcast = bitcast <8 x float> %arg to <8 x i32>
57  %in = insertelement <8 x i32> undef, i32 %broadcastvalue, i32 0
58  %mask = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
59  %and = and <8 x i32> %bitcast, %mask
60  %floatcast = bitcast <8 x i32> %and to <8 x float>
61  %max_is_x = fcmp oge <8 x float> %floatcast, %arg2
62  %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
63  ret <8 x float> %max
64}
65
66; CHECK-LABEL: ExeDepsFix_broadcastsd
67; In that case the broadcast is directly folded into vandpd.
68; CHECK: vandpd
69; CHECK: vmaxpd
70; CHECK:ret
71define <2 x double> @ExeDepsFix_broadcastsd(<2 x double> %arg, <2 x double> %arg2) {
72  %bitcast = bitcast <2 x double> %arg to <2 x i64>
73  %and = and <2 x i64> %bitcast, <i64 2147483647, i64 2147483647>
74  %floatcast = bitcast <2 x i64> %and to <2 x double>
75  %max_is_x = fcmp oge <2 x double> %floatcast, %arg2
76  %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
77  ret <2 x double> %max
78}
79
80; CHECK-LABEL: ExeDepsFix_broadcastsd256
81; CHECK: broadcastsd
82; CHECK: vandpd
83; CHECK: vmaxpd
84; CHECK: ret
85define <4 x double> @ExeDepsFix_broadcastsd256(<4 x double> %arg, <4 x double> %arg2) {
86  %bitcast = bitcast <4 x double> %arg to <4 x i64>
87  %and = and <4 x i64> %bitcast, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
88  %floatcast = bitcast <4 x i64> %and to <4 x double>
89  %max_is_x = fcmp oge <4 x double> %floatcast, %arg2
90  %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
91  ret <4 x double> %max
92}
93
94
95; CHECK-LABEL: ExeDepsFix_broadcastsd_inreg
96; ExeDepsFix works top down, thus it coalesces vpunpcklqdq domain with
97; vpand and there is nothing more you can do to match vmaxpd.
98; CHECK: vmovq
99; CHECK: vpbroadcastq
100; CHECK: vpand
101; CHECK: vmaxpd
102; CHECK: ret
103define <2 x double> @ExeDepsFix_broadcastsd_inreg(<2 x double> %arg, <2 x double> %arg2, i64 %broadcastvalue) {
104  %bitcast = bitcast <2 x double> %arg to <2 x i64>
105  %in = insertelement <2 x i64> undef, i64 %broadcastvalue, i32 0
106  %mask = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> zeroinitializer
107  %and = and <2 x i64> %bitcast, %mask
108  %floatcast = bitcast <2 x i64> %and to <2 x double>
109  %max_is_x = fcmp oge <2 x double> %floatcast, %arg2
110  %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
111  ret <2 x double> %max
112}
113
114; CHECK-LABEL: ExeDepsFix_broadcastsd256_inreg
115; CHECK: broadcastsd
116; CHECK: vandpd
117; CHECK: vmaxpd
118; CHECK: ret
119define <4 x double> @ExeDepsFix_broadcastsd256_inreg(<4 x double> %arg, <4 x double> %arg2, i64 %broadcastvalue) {
120  %bitcast = bitcast <4 x double> %arg to <4 x i64>
121  %in = insertelement <4 x i64> undef, i64 %broadcastvalue, i32 0
122  %mask = shufflevector <4 x i64> %in, <4 x i64> undef, <4 x i32> zeroinitializer
123  %and = and <4 x i64> %bitcast, %mask
124  %floatcast = bitcast <4 x i64> %and to <4 x double>
125  %max_is_x = fcmp oge <4 x double> %floatcast, %arg2
126  %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
127  ret <4 x double> %max
128}
129
130