1 #include "CodeGen_PowerPC.h"
2 #include "ConciseCasts.h"
3 #include "IRMatch.h"
4 #include "IROperator.h"
5 #include "LLVM_Headers.h"
6 #include "Util.h"
7 
8 namespace Halide {
9 namespace Internal {
10 
11 using std::string;
12 using std::vector;
13 
14 using namespace Halide::ConciseCasts;
15 using namespace llvm;
16 
CodeGen_PowerPC(Target t)17 CodeGen_PowerPC::CodeGen_PowerPC(Target t)
18     : CodeGen_Posix(t) {
19 #if !defined(WITH_POWERPC)
20     user_error << "llvm build not configured with PowerPC target enabled.\n";
21 #endif
22     user_assert(llvm_PowerPC_enabled) << "llvm build not configured with PowerPC target enabled.\n";
23 }
24 
altivec_int_type_name(const Type & t)25 const char *CodeGen_PowerPC::altivec_int_type_name(const Type &t) {
26     if (t.is_int()) {
27         switch (t.bits()) {
28         case 8:
29             return "sb";
30         case 16:
31             return "sh";
32         case 32:
33             return "sw";
34         case 64:
35             return "sd";
36         }
37     } else if (t.is_uint()) {
38         switch (t.bits()) {
39         case 8:
40             return "ub";
41         case 16:
42             return "uh";
43         case 32:
44             return "uw";
45         case 64:
46             return "ud";
47         }
48     }
49     return nullptr;  // not a recognized int type.
50 }
51 
visit(const Cast * op)52 void CodeGen_PowerPC::visit(const Cast *op) {
53     if (!op->type.is_vector()) {
54         // We only have peephole optimizations for vectors in here.
55         CodeGen_Posix::visit(op);
56         return;
57     }
58 
59     vector<Expr> matches;
60 
61     struct Pattern {
62         bool needs_vsx;
63         bool wide_op;
64         Type type;
65         string intrin;
66         Expr pattern;
67     };
68 
69     static Pattern patterns[] = {
70         {false, true, Int(8, 16), "llvm.ppc.altivec.vaddsbs",
71          i8_sat(wild_i16x_ + wild_i16x_)},
72         {false, true, Int(8, 16), "llvm.ppc.altivec.vsubsbs",
73          i8_sat(wild_i16x_ - wild_i16x_)},
74         {false, true, UInt(8, 16), "llvm.ppc.altivec.vaddubs",
75          u8_sat(wild_u16x_ + wild_u16x_)},
76         {false, true, UInt(8, 16), "llvm.ppc.altivec.vsububs",
77          u8(max(wild_i16x_ - wild_i16x_, 0))},
78         {false, true, Int(16, 8), "llvm.ppc.altivec.vaddshs",
79          i16_sat(wild_i32x_ + wild_i32x_)},
80         {false, true, Int(16, 8), "llvm.ppc.altivec.vsubshs",
81          i16_sat(wild_i32x_ - wild_i32x_)},
82         {false, true, UInt(16, 8), "llvm.ppc.altivec.vadduhs",
83          u16_sat(wild_u32x_ + wild_u32x_)},
84         {false, true, UInt(16, 8), "llvm.ppc.altivec.vsubuhs",
85          u16(max(wild_i32x_ - wild_i32x_, 0))},
86         {false, true, Int(32, 4), "llvm.ppc.altivec.vaddsws",
87          i32_sat(wild_i64x_ + wild_i64x_)},
88         {false, true, Int(32, 4), "llvm.ppc.altivec.vsubsws",
89          i32_sat(wild_i64x_ - wild_i64x_)},
90         {false, true, UInt(32, 4), "llvm.ppc.altivec.vadduws",
91          u32_sat(wild_u64x_ + wild_u64x_)},
92         {false, true, UInt(32, 4), "llvm.ppc.altivec.vsubuws",
93          u32(max(wild_i64x_ - wild_i64x_, 0))},
94         {false, true, Int(8, 16), "llvm.ppc.altivec.vavgsb",
95          i8(((wild_i16x_ + wild_i16x_) + 1) / 2)},
96         {false, true, UInt(8, 16), "llvm.ppc.altivec.vavgub",
97          u8(((wild_u16x_ + wild_u16x_) + 1) / 2)},
98         {false, true, Int(16, 8), "llvm.ppc.altivec.vavgsh",
99          i16(((wild_i32x_ + wild_i32x_) + 1) / 2)},
100         {false, true, UInt(16, 8), "llvm.ppc.altivec.vavguh",
101          u16(((wild_u32x_ + wild_u32x_) + 1) / 2)},
102         {false, true, Int(32, 4), "llvm.ppc.altivec.vavgsw",
103          i32(((wild_i64x_ + wild_i64x_) + 1) / 2)},
104         {false, true, UInt(32, 4), "llvm.ppc.altivec.vavguw",
105          u32(((wild_u64x_ + wild_u64x_) + 1) / 2)},
106     };
107 
108     for (size_t i = 0; i < sizeof(patterns) / sizeof(patterns[0]); i++) {
109         const Pattern &pattern = patterns[i];
110 
111         if (!target.has_feature(Target::VSX) && pattern.needs_vsx) {
112             continue;
113         }
114 
115         if (expr_match(pattern.pattern, op, matches)) {
116             bool match = true;
117             if (pattern.wide_op) {
118                 // Try to narrow the matches to the target type.
119                 for (size_t i = 0; i < matches.size(); i++) {
120                     matches[i] = lossless_cast(op->type, matches[i]);
121                     if (!matches[i].defined()) match = false;
122                 }
123             }
124             if (match) {
125                 value = call_intrin(op->type, pattern.type.lanes(), pattern.intrin, matches);
126                 return;
127             }
128         }
129     }
130 
131     CodeGen_Posix::visit(op);
132 }
133 
visit(const Min * op)134 void CodeGen_PowerPC::visit(const Min *op) {
135     if (!op->type.is_vector()) {
136         CodeGen_Posix::visit(op);
137         return;
138     }
139 
140     bool vsx = target.has_feature(Target::VSX);
141     bool arch_2_07 = target.has_feature(Target::POWER_ARCH_2_07);
142 
143     const Type &element_type = op->type.element_of();
144     const char *element_type_name = altivec_int_type_name(element_type);
145 
146     if (element_type_name != nullptr &&
147         (element_type.bits() < 64 || arch_2_07)) {
148         value = call_intrin(op->type, (128 / element_type.bits()),
149                             std::string("llvm.ppc.altivec.vmin") + element_type_name,
150                             {op->a, op->b});
151     } else if (op->type.element_of() == Float(32)) {
152         value = call_intrin(op->type, 4, "llvm.ppc.altivec.vminfp", {op->a, op->b});
153     } else if (vsx && op->type.element_of() == Float(64)) {
154         value = call_intrin(op->type, 2, "llvm.ppc.vsx.xvmindp", {op->a, op->b});
155     } else {
156         CodeGen_Posix::visit(op);
157     }
158 }
159 
visit(const Max * op)160 void CodeGen_PowerPC::visit(const Max *op) {
161     if (!op->type.is_vector()) {
162         CodeGen_Posix::visit(op);
163         return;
164     }
165 
166     bool vsx = target.has_feature(Target::VSX);
167     bool arch_2_07 = target.has_feature(Target::POWER_ARCH_2_07);
168 
169     const Type &element_type = op->type.element_of();
170     const char *element_type_name = altivec_int_type_name(element_type);
171 
172     if (element_type_name != nullptr &&
173         (element_type.bits() < 64 || arch_2_07)) {
174         value = call_intrin(op->type, (128 / element_type.bits()),
175                             std::string("llvm.ppc.altivec.vmax") + element_type_name,
176                             {op->a, op->b});
177     } else if (op->type.element_of() == Float(32)) {
178         value = call_intrin(op->type, 4, "llvm.ppc.altivec.vmaxfp", {op->a, op->b});
179     } else if (vsx && op->type.element_of() == Float(64)) {
180         value = call_intrin(op->type, 2, "llvm.ppc.vsx.xvmaxdp", {op->a, op->b});
181     } else {
182         CodeGen_Posix::visit(op);
183     }
184 }
185 
mcpu() const186 string CodeGen_PowerPC::mcpu() const {
187     if (target.bits == 32) {
188         return "ppc32";
189     } else {
190         if (target.has_feature(Target::POWER_ARCH_2_07))
191             return "pwr8";
192         else if (target.has_feature(Target::VSX))
193             return "pwr7";
194         else
195             return "ppc64";
196     }
197 }
198 
mattrs() const199 string CodeGen_PowerPC::mattrs() const {
200     std::string features;
201     std::string separator;
202     std::string enable;
203 
204     features += "+altivec";
205     separator = ",";
206 
207     enable = target.has_feature(Target::VSX) ? "+" : "-";
208     features += separator + enable + "vsx";
209     separator = ",";
210 
211     enable = target.has_feature(Target::POWER_ARCH_2_07) ? "+" : "-";
212     features += separator + enable + "power8-altivec";
213     separator = ",";
214 
215     // These move instructions are defined in POWER ISA 2.06 but we do
216     // not check for 2.06 currently.  So disable this for anything
217     // lower than ISA 2.07
218     features += separator + enable + "direct-move";
219     separator = ",";
220 
221     return features;
222 }
223 
use_soft_float_abi() const224 bool CodeGen_PowerPC::use_soft_float_abi() const {
225     return false;
226 }
227 
native_vector_bits() const228 int CodeGen_PowerPC::native_vector_bits() const {
229     return 128;
230 }
231 
232 }  // namespace Internal
233 }  // namespace Halide
234