1 #include "ppsspp_config.h"
2 #if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
3 
4 #include "Core/MIPS/x86/IRToX86.h"
5 
6 namespace MIPSComp {
7 
8 // Initial attempt at converting IR directly to x86.
9 // This is intended to be an easy way to benefit from the IR with the current infrastructure.
10 // Later tries may go across multiple blocks and a different representation.
11 
12 struct GPRMapping {
13 	Gen::OpArg dest;
14 	Gen::OpArg src1;
15 	Gen::OpArg src2;
16 };
17 
18 struct FPRMapping {
19 	Gen::OpArg dest;
20 	Gen::OpArg src1;
21 	Gen::OpArg src2;
22 };
23 
24 
25 class GreedyRegallocGPR {
26 public:
27 	GPRMapping Map(IRInst inst, const IRMeta &meta);
28 
29 private:
30 
31 };
32 
33 
Map(IRInst inst,const IRMeta & meta)34 GPRMapping GreedyRegallocGPR::Map(IRInst inst, const IRMeta &meta) {
35 	GPRMapping mapping;
36 	if (meta.types[0] == 'G') {
37 
38 	}
39 	// etc..
40 	return mapping;
41 }
42 
43 // Every 4 registers can also be mapped into an SSE register.
44 // When changing from single to vec4 mapping, we'll just flush, for now.
45 class GreedyRegallocFPR {
46 public:
47 	FPRMapping Map(IRInst inst, const IRMeta &meta);
48 };
49 
Map(IRInst inst,const IRMeta & meta)50 FPRMapping GreedyRegallocFPR::Map(IRInst inst, const IRMeta &meta) {
51 	FPRMapping mapping;
52 
53 	return mapping;
54 }
55 
56 
57 // This requires that ThreeOpToTwoOp has been run as the last pass.
ConvertIRToNative(const IRInst * instructions,int count,const u32 * constants)58 void IRToX86::ConvertIRToNative(const IRInst *instructions, int count, const u32 *constants) {
59 	// Set up regcaches
60 	using namespace Gen;
61 
62 	GreedyRegallocGPR gprAlloc;
63 	GreedyRegallocFPR fprAlloc;
64 
65 	// Loop through all the instructions, emitting code as we go.
66 	// Note that we do need to implement them all - fallbacks are not permitted.
67 	for (int i = 0; i < count; i++) {
68 		const IRInst *inst = &instructions[i];
69 		const IRMeta &meta = *GetIRMeta(inst->op);
70 		GPRMapping gpr = gprAlloc.Map(*inst, meta);
71 		FPRMapping fpr = fprAlloc.Map(*inst, meta);
72 
73 		bool symmetric = false;
74 		switch (inst->op) {
75 		case IROp::Nop:
76 			_assert_(false);
77 			break;
78 
79 			// Output-only
80 		case IROp::SetConst:
81 			code_->MOV(32, gpr.dest, Imm32(constants[inst->src1]));
82 			break;
83 		case IROp::SetConstF:
84 			code_->MOV(32, gpr.dest, Imm32(constants[inst->src1]));
85 			break;
86 
87 			// Add gets to be special cased because we have LEA.
88 		case IROp::Add:
89 			if (gpr.dest.IsSimpleReg() && gpr.src1.IsSimpleReg() && gpr.src2.IsSimpleReg()) {
90 				code_->LEA(32, gpr.dest.GetSimpleReg(), MRegSum(gpr.src1.GetSimpleReg(), gpr.src2.GetSimpleReg()));
91 				break;
92 			}
93 			// Else fall through.
94 			// 3-op arithmetic that directly corresponds to x86
95 			// (often 2-op in practice if src1 == dst). x86 only does 2-op though so some of these will need splitting.
96 		case IROp::Sub:
97 		case IROp::And:
98 		case IROp::Or:
99 		case IROp::Xor:
100 			if (inst->src1 != inst->dest) {
101 				code_->MOV(32, gpr.dest, gpr.src1);
102 			}
103 			// Emit directly
104 			switch (inst->op) {
105 			case IROp::Add: code_->ADD(32, gpr.dest, gpr.src2); break;
106 			case IROp::Sub: code_->SUB(32, gpr.dest, gpr.src2); break;
107 			case IROp::And: code_->AND(32, gpr.dest, gpr.src2); break;
108 			case IROp::Or: code_->OR(32, gpr.dest, gpr.src2); break;
109 			case IROp::Xor: code_->XOR(32, gpr.dest, gpr.src2); break;
110 			}
111 			break;
112 
113 			// Variable shifts.
114 		case IROp::Shl:
115 		case IROp::Shr:
116 		case IROp::Sar:
117 		case IROp::Ror:
118 
119 		case IROp::Slt:
120 		case IROp::SltU:
121 		case IROp::MovZ:
122 		case IROp::MovNZ:
123 		case IROp::Max:
124 		case IROp::Min:
125 			break;
126 
127 			// 2-op arithmetic with constant
128 		case IROp::AddConst:
129 		case IROp::SubConst:
130 		case IROp::AndConst:
131 		case IROp::OrConst:
132 		case IROp::XorConst:
133 		case IROp::SltConst:
134 		case IROp::SltUConst:
135 
136 			// 2-op arithmetic with immediate
137 		case IROp::ShlImm:
138 		case IROp::ShrImm:
139 		case IROp::SarImm:
140 		case IROp::RorImm:
141 
142 			// 2-op arithmetic
143 		case IROp::Mov:
144 			code_->MOV(32, gpr.dest, gpr.src1);
145 			break;
146 
147 		case IROp::Neg:
148 		case IROp::Not:
149 		case IROp::Ext8to32:
150 		case IROp::Ext16to32:
151 		case IROp::ReverseBits:
152 		case IROp::BSwap16:
153 		case IROp::BSwap32:
154 		case IROp::Clz:
155 			if (inst->dest != inst->src1) {
156 				code_->NEG(32, gpr.dest); break;
157 			}
158 			break;
159 				// Multiplier control
160 		case IROp::MtLo:
161 		case IROp::MtHi:
162 		case IROp::MfLo:
163 		case IROp::MfHi:
164 		case IROp::Mult:
165 		case IROp::MultU:
166 		case IROp::Madd:
167 		case IROp::MaddU:
168 		case IROp::Msub:
169 		case IROp::MsubU:
170 		case IROp::Div:
171 		case IROp::DivU:
172 
173 			// Memory access
174 		case IROp::Load8:
175 		case IROp::Load8Ext:
176 		case IROp::Load16:
177 		case IROp::Load16Ext:
178 		case IROp::Load32:
179 		case IROp::LoadFloat:
180 		case IROp::Store8:
181 		case IROp::Store16:
182 		case IROp::Store32:
183 		case IROp::StoreFloat:
184 		case IROp::LoadVec4:
185 		case IROp::StoreVec4:
186 
187 			// Output-only SIMD functions
188 		case IROp::Vec4Init:
189 		case IROp::Vec4Shuffle:
190 
191 			// 2-op SIMD functions
192 		case IROp::Vec4Mov:
193 			code_->MOVAPS(fpr.dest.GetSimpleReg(), fpr.src1);
194 			break;
195 		case IROp::Vec4Neg:
196 		case IROp::Vec4Abs:
197 			break;
198 		case IROp::Vec4ClampToZero:
199 			code_->PXOR(XMM0, R(XMM0));
200 			code_->PMAXSW(XMM0, fpr.src1);
201 			code_->MOVAPD(fpr.dest, XMM0);
202 			break;
203 		case IROp::Vec4DuplicateUpperBitsAndShift1:
204 		case IROp::Vec2ClampToZero:
205 
206 			// 3-op SIMD functions
207 		case IROp::Vec4Add:
208 		case IROp::Vec4Sub:
209 		case IROp::Vec4Mul:
210 		case IROp::Vec4Div:
211 
212 		case IROp::Vec4Scale:
213 		case IROp::Vec4Dot:
214 
215 			// Pack-unpack
216 		case IROp::Vec2Unpack16To31:
217 		case IROp::Vec2Unpack16To32:
218 		case IROp::Vec4Unpack8To32:
219 		case IROp::Vec2Pack32To16:
220 		case IROp::Vec2Pack31To16:
221 		case IROp::Vec4Pack32To8:
222 		case IROp::Vec4Pack31To8:
223 
224 		case IROp::FCmpVfpuBit:
225 		case IROp::FCmpVfpuAggregate:
226 		case IROp::FCmovVfpuCC:
227 
228 			// Trancendental functions (non-simd)
229 		case IROp::FSin:
230 		case IROp::FCos:
231 		case IROp::FRSqrt:
232 		case IROp::FRecip:
233 		case IROp::FAsin:
234 
235 			// 3-Op FP
236 		case IROp::FAdd:
237 		case IROp::FSub:
238 		case IROp::FMul:
239 		case IROp::FDiv:
240 		case IROp::FMin:
241 		case IROp::FMax:
242 
243 			// 2-Op FP
244 		case IROp::FMov:
245 		case IROp::FAbs:
246 		case IROp::FSqrt:
247 		case IROp::FNeg:
248 		case IROp::FSat0_1:
249 		case IROp::FSatMinus1_1:
250 		case IROp::FSign:
251 		case IROp::FCeil:
252 		case IROp::FFloor:
253 		case IROp::FCmp:
254 		case IROp::FCvtSW:
255 		case IROp::FCvtWS:
256 		case IROp::FRound:
257 		case IROp::FTrunc:
258 
259 			// Cross moves
260 		case IROp::FMovFromGPR:
261 		case IROp::FMovToGPR:
262 		case IROp::FpCondToReg:
263 		case IROp::VfpuCtrlToReg:
264 
265 			// VFPU flag/control
266 		case IROp::SetCtrlVFPU:
267 		case IROp::SetCtrlVFPUReg:
268 		case IROp::SetCtrlVFPUFReg:
269 		case IROp::ZeroFpCond:
270 
271 			// Block Exits
272 		case IROp::ExitToConst:
273 		case IROp::ExitToReg:
274 		case IROp::ExitToConstIfEq:
275 		case IROp::ExitToConstIfNeq:
276 		case IROp::ExitToConstIfGtZ:
277 		case IROp::ExitToConstIfGeZ:
278 		case IROp::ExitToConstIfLtZ:
279 		case IROp::ExitToConstIfLeZ:
280 		case IROp::ExitToPC:
281 
282 			// Utilities
283 		case IROp::Downcount:
284 		case IROp::SetPC:
285 		case IROp::SetPCConst:
286 		case IROp::Syscall:
287 		case IROp::Interpret:  // SLOW fallback. Can be made faster.
288 		case IROp::CallReplacement:
289 		case IROp::Break:
290 		default:
291 			break;
292 			}
293 		}
294 	}
295 
296 
297 }  // namespace
298 
299 #endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
300