1 #include "CodeGen_PowerPC.h"
2 #include "ConciseCasts.h"
3 #include "IRMatch.h"
4 #include "IROperator.h"
5 #include "LLVM_Headers.h"
6 #include "Util.h"
7
8 namespace Halide {
9 namespace Internal {
10
11 using std::string;
12 using std::vector;
13
14 using namespace Halide::ConciseCasts;
15 using namespace llvm;
16
CodeGen_PowerPC(Target t)17 CodeGen_PowerPC::CodeGen_PowerPC(Target t)
18 : CodeGen_Posix(t) {
19 #if !defined(WITH_POWERPC)
20 user_error << "llvm build not configured with PowerPC target enabled.\n";
21 #endif
22 user_assert(llvm_PowerPC_enabled) << "llvm build not configured with PowerPC target enabled.\n";
23 }
24
altivec_int_type_name(const Type & t)25 const char *CodeGen_PowerPC::altivec_int_type_name(const Type &t) {
26 if (t.is_int()) {
27 switch (t.bits()) {
28 case 8:
29 return "sb";
30 case 16:
31 return "sh";
32 case 32:
33 return "sw";
34 case 64:
35 return "sd";
36 }
37 } else if (t.is_uint()) {
38 switch (t.bits()) {
39 case 8:
40 return "ub";
41 case 16:
42 return "uh";
43 case 32:
44 return "uw";
45 case 64:
46 return "ud";
47 }
48 }
49 return nullptr; // not a recognized int type.
50 }
51
visit(const Cast * op)52 void CodeGen_PowerPC::visit(const Cast *op) {
53 if (!op->type.is_vector()) {
54 // We only have peephole optimizations for vectors in here.
55 CodeGen_Posix::visit(op);
56 return;
57 }
58
59 vector<Expr> matches;
60
61 struct Pattern {
62 bool needs_vsx;
63 bool wide_op;
64 Type type;
65 string intrin;
66 Expr pattern;
67 };
68
69 static Pattern patterns[] = {
70 {false, true, Int(8, 16), "llvm.ppc.altivec.vaddsbs",
71 i8_sat(wild_i16x_ + wild_i16x_)},
72 {false, true, Int(8, 16), "llvm.ppc.altivec.vsubsbs",
73 i8_sat(wild_i16x_ - wild_i16x_)},
74 {false, true, UInt(8, 16), "llvm.ppc.altivec.vaddubs",
75 u8_sat(wild_u16x_ + wild_u16x_)},
76 {false, true, UInt(8, 16), "llvm.ppc.altivec.vsububs",
77 u8(max(wild_i16x_ - wild_i16x_, 0))},
78 {false, true, Int(16, 8), "llvm.ppc.altivec.vaddshs",
79 i16_sat(wild_i32x_ + wild_i32x_)},
80 {false, true, Int(16, 8), "llvm.ppc.altivec.vsubshs",
81 i16_sat(wild_i32x_ - wild_i32x_)},
82 {false, true, UInt(16, 8), "llvm.ppc.altivec.vadduhs",
83 u16_sat(wild_u32x_ + wild_u32x_)},
84 {false, true, UInt(16, 8), "llvm.ppc.altivec.vsubuhs",
85 u16(max(wild_i32x_ - wild_i32x_, 0))},
86 {false, true, Int(32, 4), "llvm.ppc.altivec.vaddsws",
87 i32_sat(wild_i64x_ + wild_i64x_)},
88 {false, true, Int(32, 4), "llvm.ppc.altivec.vsubsws",
89 i32_sat(wild_i64x_ - wild_i64x_)},
90 {false, true, UInt(32, 4), "llvm.ppc.altivec.vadduws",
91 u32_sat(wild_u64x_ + wild_u64x_)},
92 {false, true, UInt(32, 4), "llvm.ppc.altivec.vsubuws",
93 u32(max(wild_i64x_ - wild_i64x_, 0))},
94 {false, true, Int(8, 16), "llvm.ppc.altivec.vavgsb",
95 i8(((wild_i16x_ + wild_i16x_) + 1) / 2)},
96 {false, true, UInt(8, 16), "llvm.ppc.altivec.vavgub",
97 u8(((wild_u16x_ + wild_u16x_) + 1) / 2)},
98 {false, true, Int(16, 8), "llvm.ppc.altivec.vavgsh",
99 i16(((wild_i32x_ + wild_i32x_) + 1) / 2)},
100 {false, true, UInt(16, 8), "llvm.ppc.altivec.vavguh",
101 u16(((wild_u32x_ + wild_u32x_) + 1) / 2)},
102 {false, true, Int(32, 4), "llvm.ppc.altivec.vavgsw",
103 i32(((wild_i64x_ + wild_i64x_) + 1) / 2)},
104 {false, true, UInt(32, 4), "llvm.ppc.altivec.vavguw",
105 u32(((wild_u64x_ + wild_u64x_) + 1) / 2)},
106 };
107
108 for (size_t i = 0; i < sizeof(patterns) / sizeof(patterns[0]); i++) {
109 const Pattern &pattern = patterns[i];
110
111 if (!target.has_feature(Target::VSX) && pattern.needs_vsx) {
112 continue;
113 }
114
115 if (expr_match(pattern.pattern, op, matches)) {
116 bool match = true;
117 if (pattern.wide_op) {
118 // Try to narrow the matches to the target type.
119 for (size_t i = 0; i < matches.size(); i++) {
120 matches[i] = lossless_cast(op->type, matches[i]);
121 if (!matches[i].defined()) match = false;
122 }
123 }
124 if (match) {
125 value = call_intrin(op->type, pattern.type.lanes(), pattern.intrin, matches);
126 return;
127 }
128 }
129 }
130
131 CodeGen_Posix::visit(op);
132 }
133
visit(const Min * op)134 void CodeGen_PowerPC::visit(const Min *op) {
135 if (!op->type.is_vector()) {
136 CodeGen_Posix::visit(op);
137 return;
138 }
139
140 bool vsx = target.has_feature(Target::VSX);
141 bool arch_2_07 = target.has_feature(Target::POWER_ARCH_2_07);
142
143 const Type &element_type = op->type.element_of();
144 const char *element_type_name = altivec_int_type_name(element_type);
145
146 if (element_type_name != nullptr &&
147 (element_type.bits() < 64 || arch_2_07)) {
148 value = call_intrin(op->type, (128 / element_type.bits()),
149 std::string("llvm.ppc.altivec.vmin") + element_type_name,
150 {op->a, op->b});
151 } else if (op->type.element_of() == Float(32)) {
152 value = call_intrin(op->type, 4, "llvm.ppc.altivec.vminfp", {op->a, op->b});
153 } else if (vsx && op->type.element_of() == Float(64)) {
154 value = call_intrin(op->type, 2, "llvm.ppc.vsx.xvmindp", {op->a, op->b});
155 } else {
156 CodeGen_Posix::visit(op);
157 }
158 }
159
visit(const Max * op)160 void CodeGen_PowerPC::visit(const Max *op) {
161 if (!op->type.is_vector()) {
162 CodeGen_Posix::visit(op);
163 return;
164 }
165
166 bool vsx = target.has_feature(Target::VSX);
167 bool arch_2_07 = target.has_feature(Target::POWER_ARCH_2_07);
168
169 const Type &element_type = op->type.element_of();
170 const char *element_type_name = altivec_int_type_name(element_type);
171
172 if (element_type_name != nullptr &&
173 (element_type.bits() < 64 || arch_2_07)) {
174 value = call_intrin(op->type, (128 / element_type.bits()),
175 std::string("llvm.ppc.altivec.vmax") + element_type_name,
176 {op->a, op->b});
177 } else if (op->type.element_of() == Float(32)) {
178 value = call_intrin(op->type, 4, "llvm.ppc.altivec.vmaxfp", {op->a, op->b});
179 } else if (vsx && op->type.element_of() == Float(64)) {
180 value = call_intrin(op->type, 2, "llvm.ppc.vsx.xvmaxdp", {op->a, op->b});
181 } else {
182 CodeGen_Posix::visit(op);
183 }
184 }
185
mcpu() const186 string CodeGen_PowerPC::mcpu() const {
187 if (target.bits == 32) {
188 return "ppc32";
189 } else {
190 if (target.has_feature(Target::POWER_ARCH_2_07))
191 return "pwr8";
192 else if (target.has_feature(Target::VSX))
193 return "pwr7";
194 else
195 return "ppc64";
196 }
197 }
198
mattrs() const199 string CodeGen_PowerPC::mattrs() const {
200 std::string features;
201 std::string separator;
202 std::string enable;
203
204 features += "+altivec";
205 separator = ",";
206
207 enable = target.has_feature(Target::VSX) ? "+" : "-";
208 features += separator + enable + "vsx";
209 separator = ",";
210
211 enable = target.has_feature(Target::POWER_ARCH_2_07) ? "+" : "-";
212 features += separator + enable + "power8-altivec";
213 separator = ",";
214
215 // These move instructions are defined in POWER ISA 2.06 but we do
216 // not check for 2.06 currently. So disable this for anything
217 // lower than ISA 2.07
218 features += separator + enable + "direct-move";
219 separator = ",";
220
221 return features;
222 }
223
use_soft_float_abi() const224 bool CodeGen_PowerPC::use_soft_float_abi() const {
225 return false;
226 }
227
native_vector_bits() const228 int CodeGen_PowerPC::native_vector_bits() const {
229 return 128;
230 }
231
232 } // namespace Internal
233 } // namespace Halide
234