1 #include "ppsspp_config.h"
2 #if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
3
4 #include "Core/MIPS/x86/IRToX86.h"
5
6 namespace MIPSComp {
7
8 // Initial attempt at converting IR directly to x86.
9 // This is intended to be an easy way to benefit from the IR with the current infrastructure.
10 // Later tries may go across multiple blocks and a different representation.
11
12 struct GPRMapping {
13 Gen::OpArg dest;
14 Gen::OpArg src1;
15 Gen::OpArg src2;
16 };
17
18 struct FPRMapping {
19 Gen::OpArg dest;
20 Gen::OpArg src1;
21 Gen::OpArg src2;
22 };
23
24
25 class GreedyRegallocGPR {
26 public:
27 GPRMapping Map(IRInst inst, const IRMeta &meta);
28
29 private:
30
31 };
32
33
Map(IRInst inst,const IRMeta & meta)34 GPRMapping GreedyRegallocGPR::Map(IRInst inst, const IRMeta &meta) {
35 GPRMapping mapping;
36 if (meta.types[0] == 'G') {
37
38 }
39 // etc..
40 return mapping;
41 }
42
43 // Every 4 registers can also be mapped into an SSE register.
44 // When changing from single to vec4 mapping, we'll just flush, for now.
45 class GreedyRegallocFPR {
46 public:
47 FPRMapping Map(IRInst inst, const IRMeta &meta);
48 };
49
Map(IRInst inst,const IRMeta & meta)50 FPRMapping GreedyRegallocFPR::Map(IRInst inst, const IRMeta &meta) {
51 FPRMapping mapping;
52
53 return mapping;
54 }
55
56
57 // This requires that ThreeOpToTwoOp has been run as the last pass.
ConvertIRToNative(const IRInst * instructions,int count,const u32 * constants)58 void IRToX86::ConvertIRToNative(const IRInst *instructions, int count, const u32 *constants) {
59 // Set up regcaches
60 using namespace Gen;
61
62 GreedyRegallocGPR gprAlloc;
63 GreedyRegallocFPR fprAlloc;
64
65 // Loop through all the instructions, emitting code as we go.
66 // Note that we do need to implement them all - fallbacks are not permitted.
67 for (int i = 0; i < count; i++) {
68 const IRInst *inst = &instructions[i];
69 const IRMeta &meta = *GetIRMeta(inst->op);
70 GPRMapping gpr = gprAlloc.Map(*inst, meta);
71 FPRMapping fpr = fprAlloc.Map(*inst, meta);
72
73 bool symmetric = false;
74 switch (inst->op) {
75 case IROp::Nop:
76 _assert_(false);
77 break;
78
79 // Output-only
80 case IROp::SetConst:
81 code_->MOV(32, gpr.dest, Imm32(constants[inst->src1]));
82 break;
83 case IROp::SetConstF:
84 code_->MOV(32, gpr.dest, Imm32(constants[inst->src1]));
85 break;
86
87 // Add gets to be special cased because we have LEA.
88 case IROp::Add:
89 if (gpr.dest.IsSimpleReg() && gpr.src1.IsSimpleReg() && gpr.src2.IsSimpleReg()) {
90 code_->LEA(32, gpr.dest.GetSimpleReg(), MRegSum(gpr.src1.GetSimpleReg(), gpr.src2.GetSimpleReg()));
91 break;
92 }
93 // Else fall through.
94 // 3-op arithmetic that directly corresponds to x86
95 // (often 2-op in practice if src1 == dst). x86 only does 2-op though so some of these will need splitting.
96 case IROp::Sub:
97 case IROp::And:
98 case IROp::Or:
99 case IROp::Xor:
100 if (inst->src1 != inst->dest) {
101 code_->MOV(32, gpr.dest, gpr.src1);
102 }
103 // Emit directly
104 switch (inst->op) {
105 case IROp::Add: code_->ADD(32, gpr.dest, gpr.src2); break;
106 case IROp::Sub: code_->SUB(32, gpr.dest, gpr.src2); break;
107 case IROp::And: code_->AND(32, gpr.dest, gpr.src2); break;
108 case IROp::Or: code_->OR(32, gpr.dest, gpr.src2); break;
109 case IROp::Xor: code_->XOR(32, gpr.dest, gpr.src2); break;
110 }
111 break;
112
113 // Variable shifts.
114 case IROp::Shl:
115 case IROp::Shr:
116 case IROp::Sar:
117 case IROp::Ror:
118
119 case IROp::Slt:
120 case IROp::SltU:
121 case IROp::MovZ:
122 case IROp::MovNZ:
123 case IROp::Max:
124 case IROp::Min:
125 break;
126
127 // 2-op arithmetic with constant
128 case IROp::AddConst:
129 case IROp::SubConst:
130 case IROp::AndConst:
131 case IROp::OrConst:
132 case IROp::XorConst:
133 case IROp::SltConst:
134 case IROp::SltUConst:
135
136 // 2-op arithmetic with immediate
137 case IROp::ShlImm:
138 case IROp::ShrImm:
139 case IROp::SarImm:
140 case IROp::RorImm:
141
142 // 2-op arithmetic
143 case IROp::Mov:
144 code_->MOV(32, gpr.dest, gpr.src1);
145 break;
146
147 case IROp::Neg:
148 case IROp::Not:
149 case IROp::Ext8to32:
150 case IROp::Ext16to32:
151 case IROp::ReverseBits:
152 case IROp::BSwap16:
153 case IROp::BSwap32:
154 case IROp::Clz:
155 if (inst->dest != inst->src1) {
156 code_->NEG(32, gpr.dest); break;
157 }
158 break;
159 // Multiplier control
160 case IROp::MtLo:
161 case IROp::MtHi:
162 case IROp::MfLo:
163 case IROp::MfHi:
164 case IROp::Mult:
165 case IROp::MultU:
166 case IROp::Madd:
167 case IROp::MaddU:
168 case IROp::Msub:
169 case IROp::MsubU:
170 case IROp::Div:
171 case IROp::DivU:
172
173 // Memory access
174 case IROp::Load8:
175 case IROp::Load8Ext:
176 case IROp::Load16:
177 case IROp::Load16Ext:
178 case IROp::Load32:
179 case IROp::LoadFloat:
180 case IROp::Store8:
181 case IROp::Store16:
182 case IROp::Store32:
183 case IROp::StoreFloat:
184 case IROp::LoadVec4:
185 case IROp::StoreVec4:
186
187 // Output-only SIMD functions
188 case IROp::Vec4Init:
189 case IROp::Vec4Shuffle:
190
191 // 2-op SIMD functions
192 case IROp::Vec4Mov:
193 code_->MOVAPS(fpr.dest.GetSimpleReg(), fpr.src1);
194 break;
195 case IROp::Vec4Neg:
196 case IROp::Vec4Abs:
197 break;
198 case IROp::Vec4ClampToZero:
199 code_->PXOR(XMM0, R(XMM0));
200 code_->PMAXSW(XMM0, fpr.src1);
201 code_->MOVAPD(fpr.dest, XMM0);
202 break;
203 case IROp::Vec4DuplicateUpperBitsAndShift1:
204 case IROp::Vec2ClampToZero:
205
206 // 3-op SIMD functions
207 case IROp::Vec4Add:
208 case IROp::Vec4Sub:
209 case IROp::Vec4Mul:
210 case IROp::Vec4Div:
211
212 case IROp::Vec4Scale:
213 case IROp::Vec4Dot:
214
215 // Pack-unpack
216 case IROp::Vec2Unpack16To31:
217 case IROp::Vec2Unpack16To32:
218 case IROp::Vec4Unpack8To32:
219 case IROp::Vec2Pack32To16:
220 case IROp::Vec2Pack31To16:
221 case IROp::Vec4Pack32To8:
222 case IROp::Vec4Pack31To8:
223
224 case IROp::FCmpVfpuBit:
225 case IROp::FCmpVfpuAggregate:
226 case IROp::FCmovVfpuCC:
227
228 // Trancendental functions (non-simd)
229 case IROp::FSin:
230 case IROp::FCos:
231 case IROp::FRSqrt:
232 case IROp::FRecip:
233 case IROp::FAsin:
234
235 // 3-Op FP
236 case IROp::FAdd:
237 case IROp::FSub:
238 case IROp::FMul:
239 case IROp::FDiv:
240 case IROp::FMin:
241 case IROp::FMax:
242
243 // 2-Op FP
244 case IROp::FMov:
245 case IROp::FAbs:
246 case IROp::FSqrt:
247 case IROp::FNeg:
248 case IROp::FSat0_1:
249 case IROp::FSatMinus1_1:
250 case IROp::FSign:
251 case IROp::FCeil:
252 case IROp::FFloor:
253 case IROp::FCmp:
254 case IROp::FCvtSW:
255 case IROp::FCvtWS:
256 case IROp::FRound:
257 case IROp::FTrunc:
258
259 // Cross moves
260 case IROp::FMovFromGPR:
261 case IROp::FMovToGPR:
262 case IROp::FpCondToReg:
263 case IROp::VfpuCtrlToReg:
264
265 // VFPU flag/control
266 case IROp::SetCtrlVFPU:
267 case IROp::SetCtrlVFPUReg:
268 case IROp::SetCtrlVFPUFReg:
269 case IROp::ZeroFpCond:
270
271 // Block Exits
272 case IROp::ExitToConst:
273 case IROp::ExitToReg:
274 case IROp::ExitToConstIfEq:
275 case IROp::ExitToConstIfNeq:
276 case IROp::ExitToConstIfGtZ:
277 case IROp::ExitToConstIfGeZ:
278 case IROp::ExitToConstIfLtZ:
279 case IROp::ExitToConstIfLeZ:
280 case IROp::ExitToPC:
281
282 // Utilities
283 case IROp::Downcount:
284 case IROp::SetPC:
285 case IROp::SetPCConst:
286 case IROp::Syscall:
287 case IROp::Interpret: // SLOW fallback. Can be made faster.
288 case IROp::CallReplacement:
289 case IROp::Break:
290 default:
291 break;
292 }
293 }
294 }
295
296
297 } // namespace
298
299 #endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
300