1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "jit/x86-shared/MoveEmitter-x86-shared.h"
8
9 #include "jit/MacroAssembler-inl.h"
10
11 using namespace js;
12 using namespace js::jit;
13
14 using mozilla::Maybe;
15
MoveEmitterX86(MacroAssembler & masm)16 MoveEmitterX86::MoveEmitterX86(MacroAssembler& masm)
17 : inCycle_(false), masm(masm), pushedAtCycle_(-1) {
18 pushedAtStart_ = masm.framePushed();
19 }
20
21 // Examine the cycle in moves starting at position i. Determine if it's a
22 // simple cycle consisting of all register-to-register moves in a single class,
23 // and whether it can be implemented entirely by swaps.
characterizeCycle(const MoveResolver & moves,size_t i,bool * allGeneralRegs,bool * allFloatRegs)24 size_t MoveEmitterX86::characterizeCycle(const MoveResolver& moves, size_t i,
25 bool* allGeneralRegs,
26 bool* allFloatRegs) {
27 size_t swapCount = 0;
28
29 for (size_t j = i;; j++) {
30 const MoveOp& move = moves.getMove(j);
31
32 // If it isn't a cycle of registers of the same kind, we won't be able
33 // to optimize it.
34 if (!move.to().isGeneralReg()) {
35 *allGeneralRegs = false;
36 }
37 if (!move.to().isFloatReg()) {
38 *allFloatRegs = false;
39 }
40 if (!*allGeneralRegs && !*allFloatRegs) {
41 return -1;
42 }
43
44 // Stop iterating when we see the last one.
45 if (j != i && move.isCycleEnd()) {
46 break;
47 }
48
49 // Check that this move is actually part of the cycle. This is
50 // over-conservative when there are multiple reads from the same source,
51 // but that's expected to be rare.
52 if (move.from() != moves.getMove(j + 1).to()) {
53 *allGeneralRegs = false;
54 *allFloatRegs = false;
55 return -1;
56 }
57
58 swapCount++;
59 }
60
61 // Check that the last move cycles back to the first move.
62 const MoveOp& move = moves.getMove(i + swapCount);
63 if (move.from() != moves.getMove(i).to()) {
64 *allGeneralRegs = false;
65 *allFloatRegs = false;
66 return -1;
67 }
68
69 return swapCount;
70 }
71
72 // If we can emit optimized code for the cycle in moves starting at position i,
73 // do so, and return true.
maybeEmitOptimizedCycle(const MoveResolver & moves,size_t i,bool allGeneralRegs,bool allFloatRegs,size_t swapCount)74 bool MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver& moves,
75 size_t i, bool allGeneralRegs,
76 bool allFloatRegs,
77 size_t swapCount) {
78 if (allGeneralRegs && swapCount <= 2) {
79 // Use x86's swap-integer-registers instruction if we only have a few
80 // swaps. (x86 also has a swap between registers and memory but it's
81 // slow.)
82 for (size_t k = 0; k < swapCount; k++) {
83 masm.xchg(moves.getMove(i + k).to().reg(),
84 moves.getMove(i + k + 1).to().reg());
85 }
86 return true;
87 }
88
89 if (allFloatRegs && swapCount == 1) {
90 // There's no xchg for xmm registers, but if we only need a single swap,
91 // it's cheap to do an XOR swap.
92 FloatRegister a = moves.getMove(i).to().floatReg();
93 FloatRegister b = moves.getMove(i + 1).to().floatReg();
94 masm.vxorpd(a, b, b);
95 masm.vxorpd(b, a, a);
96 masm.vxorpd(a, b, b);
97 return true;
98 }
99
100 return false;
101 }
102
emit(const MoveResolver & moves)103 void MoveEmitterX86::emit(const MoveResolver& moves) {
104 #if defined(JS_CODEGEN_X86) && defined(DEBUG)
105 // Clobber any scratch register we have, to make regalloc bugs more visible.
106 if (scratchRegister_.isSome()) {
107 masm.mov(ImmWord(0xdeadbeef), scratchRegister_.value());
108 }
109 #endif
110
111 for (size_t i = 0; i < moves.numMoves(); i++) {
112 #if defined(JS_CODEGEN_X86) && defined(DEBUG)
113 if (!scratchRegister_.isSome()) {
114 Maybe<Register> reg = findScratchRegister(moves, i);
115 if (reg.isSome()) {
116 masm.mov(ImmWord(0xdeadbeef), reg.value());
117 }
118 }
119 #endif
120
121 const MoveOp& move = moves.getMove(i);
122 const MoveOperand& from = move.from();
123 const MoveOperand& to = move.to();
124
125 if (move.isCycleEnd()) {
126 MOZ_ASSERT(inCycle_);
127 completeCycle(to, move.type());
128 inCycle_ = false;
129 continue;
130 }
131
132 if (move.isCycleBegin()) {
133 MOZ_ASSERT(!inCycle_);
134
135 // Characterize the cycle.
136 bool allGeneralRegs = true, allFloatRegs = true;
137 size_t swapCount =
138 characterizeCycle(moves, i, &allGeneralRegs, &allFloatRegs);
139
140 // Attempt to optimize it to avoid using the stack.
141 if (maybeEmitOptimizedCycle(moves, i, allGeneralRegs, allFloatRegs,
142 swapCount)) {
143 i += swapCount;
144 continue;
145 }
146
147 // Otherwise use the stack.
148 breakCycle(to, move.endCycleType());
149 inCycle_ = true;
150 }
151
152 // A normal move which is not part of a cycle.
153 switch (move.type()) {
154 case MoveOp::FLOAT32:
155 emitFloat32Move(from, to);
156 break;
157 case MoveOp::DOUBLE:
158 emitDoubleMove(from, to);
159 break;
160 case MoveOp::INT32:
161 emitInt32Move(from, to, moves, i);
162 break;
163 case MoveOp::GENERAL:
164 emitGeneralMove(from, to, moves, i);
165 break;
166 case MoveOp::SIMD128:
167 emitSimd128Move(from, to);
168 break;
169 default:
170 MOZ_CRASH("Unexpected move type");
171 }
172 }
173 }
174
~MoveEmitterX86()175 MoveEmitterX86::~MoveEmitterX86() { assertDone(); }
176
cycleSlot()177 Address MoveEmitterX86::cycleSlot() {
178 if (pushedAtCycle_ == -1) {
179 // Reserve stack for cycle resolution
180 masm.reserveStack(Simd128DataSize);
181 pushedAtCycle_ = masm.framePushed();
182 }
183
184 return Address(StackPointer, masm.framePushed() - pushedAtCycle_);
185 }
186
toAddress(const MoveOperand & operand) const187 Address MoveEmitterX86::toAddress(const MoveOperand& operand) const {
188 if (operand.base() != StackPointer) {
189 return Address(operand.base(), operand.disp());
190 }
191
192 MOZ_ASSERT(operand.disp() >= 0);
193
194 // Otherwise, the stack offset may need to be adjusted.
195 return Address(StackPointer,
196 operand.disp() + (masm.framePushed() - pushedAtStart_));
197 }
198
199 // Warning, do not use the resulting operand with pop instructions, since they
200 // compute the effective destination address after altering the stack pointer.
201 // Use toPopOperand if an Operand is needed for a pop.
toOperand(const MoveOperand & operand) const202 Operand MoveEmitterX86::toOperand(const MoveOperand& operand) const {
203 if (operand.isMemoryOrEffectiveAddress()) {
204 return Operand(toAddress(operand));
205 }
206 if (operand.isGeneralReg()) {
207 return Operand(operand.reg());
208 }
209
210 MOZ_ASSERT(operand.isFloatReg());
211 return Operand(operand.floatReg());
212 }
213
214 // This is the same as toOperand except that it computes an Operand suitable for
215 // use in a pop.
toPopOperand(const MoveOperand & operand) const216 Operand MoveEmitterX86::toPopOperand(const MoveOperand& operand) const {
217 if (operand.isMemory()) {
218 if (operand.base() != StackPointer) {
219 return Operand(operand.base(), operand.disp());
220 }
221
222 MOZ_ASSERT(operand.disp() >= 0);
223
224 // Otherwise, the stack offset may need to be adjusted.
225 // Note the adjustment by the stack slot here, to offset for the fact that
226 // pop computes its effective address after incrementing the stack pointer.
227 return Operand(
228 StackPointer,
229 operand.disp() + (masm.framePushed() - sizeof(void*) - pushedAtStart_));
230 }
231 if (operand.isGeneralReg()) {
232 return Operand(operand.reg());
233 }
234
235 MOZ_ASSERT(operand.isFloatReg());
236 return Operand(operand.floatReg());
237 }
238
breakCycle(const MoveOperand & to,MoveOp::Type type)239 void MoveEmitterX86::breakCycle(const MoveOperand& to, MoveOp::Type type) {
240 // There is some pattern:
241 // (A -> B)
242 // (B -> A)
243 //
244 // This case handles (A -> B), which we reach first. We save B, then allow
245 // the original move to continue.
246 switch (type) {
247 case MoveOp::SIMD128:
248 if (to.isMemory()) {
249 ScratchSimd128Scope scratch(masm);
250 masm.loadUnalignedSimd128(toAddress(to), scratch);
251 masm.storeUnalignedSimd128(scratch, cycleSlot());
252 } else {
253 masm.storeUnalignedSimd128(to.floatReg(), cycleSlot());
254 }
255 break;
256 case MoveOp::FLOAT32:
257 if (to.isMemory()) {
258 ScratchFloat32Scope scratch(masm);
259 masm.loadFloat32(toAddress(to), scratch);
260 masm.storeFloat32(scratch, cycleSlot());
261 } else {
262 masm.storeFloat32(to.floatReg(), cycleSlot());
263 }
264 break;
265 case MoveOp::DOUBLE:
266 if (to.isMemory()) {
267 ScratchDoubleScope scratch(masm);
268 masm.loadDouble(toAddress(to), scratch);
269 masm.storeDouble(scratch, cycleSlot());
270 } else {
271 masm.storeDouble(to.floatReg(), cycleSlot());
272 }
273 break;
274 case MoveOp::INT32:
275 #ifdef JS_CODEGEN_X64
276 // x64 can't pop to a 32-bit destination, so don't push.
277 if (to.isMemory()) {
278 masm.load32(toAddress(to), ScratchReg);
279 masm.store32(ScratchReg, cycleSlot());
280 } else {
281 masm.store32(to.reg(), cycleSlot());
282 }
283 break;
284 #endif
285 case MoveOp::GENERAL:
286 masm.Push(toOperand(to));
287 break;
288 default:
289 MOZ_CRASH("Unexpected move type");
290 }
291 }
292
completeCycle(const MoveOperand & to,MoveOp::Type type)293 void MoveEmitterX86::completeCycle(const MoveOperand& to, MoveOp::Type type) {
294 // There is some pattern:
295 // (A -> B)
296 // (B -> A)
297 //
298 // This case handles (B -> A), which we reach last. We emit a move from the
299 // saved value of B, to A.
300 switch (type) {
301 case MoveOp::SIMD128:
302 MOZ_ASSERT(pushedAtCycle_ != -1);
303 MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= Simd128DataSize);
304 if (to.isMemory()) {
305 ScratchSimd128Scope scratch(masm);
306 masm.loadUnalignedSimd128(cycleSlot(), scratch);
307 masm.storeUnalignedSimd128(scratch, toAddress(to));
308 } else {
309 masm.loadUnalignedSimd128(cycleSlot(), to.floatReg());
310 }
311 break;
312 case MoveOp::FLOAT32:
313 MOZ_ASSERT(pushedAtCycle_ != -1);
314 MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(float));
315 if (to.isMemory()) {
316 ScratchFloat32Scope scratch(masm);
317 masm.loadFloat32(cycleSlot(), scratch);
318 masm.storeFloat32(scratch, toAddress(to));
319 } else {
320 masm.loadFloat32(cycleSlot(), to.floatReg());
321 }
322 break;
323 case MoveOp::DOUBLE:
324 MOZ_ASSERT(pushedAtCycle_ != -1);
325 MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(double));
326 if (to.isMemory()) {
327 ScratchDoubleScope scratch(masm);
328 masm.loadDouble(cycleSlot(), scratch);
329 masm.storeDouble(scratch, toAddress(to));
330 } else {
331 masm.loadDouble(cycleSlot(), to.floatReg());
332 }
333 break;
334 case MoveOp::INT32:
335 #ifdef JS_CODEGEN_X64
336 MOZ_ASSERT(pushedAtCycle_ != -1);
337 MOZ_ASSERT(pushedAtCycle_ - pushedAtStart_ >= sizeof(int32_t));
338 // x64 can't pop to a 32-bit destination.
339 if (to.isMemory()) {
340 masm.load32(cycleSlot(), ScratchReg);
341 masm.store32(ScratchReg, toAddress(to));
342 } else {
343 masm.load32(cycleSlot(), to.reg());
344 }
345 break;
346 #endif
347 case MoveOp::GENERAL:
348 MOZ_ASSERT(masm.framePushed() - pushedAtStart_ >= sizeof(intptr_t));
349 masm.Pop(toPopOperand(to));
350 break;
351 default:
352 MOZ_CRASH("Unexpected move type");
353 }
354 }
355
emitInt32Move(const MoveOperand & from,const MoveOperand & to,const MoveResolver & moves,size_t i)356 void MoveEmitterX86::emitInt32Move(const MoveOperand& from,
357 const MoveOperand& to,
358 const MoveResolver& moves, size_t i) {
359 if (from.isGeneralReg()) {
360 masm.move32(from.reg(), toOperand(to));
361 } else if (to.isGeneralReg()) {
362 MOZ_ASSERT(from.isMemory());
363 masm.load32(toAddress(from), to.reg());
364 } else {
365 // Memory to memory gpr move.
366 MOZ_ASSERT(from.isMemory());
367 Maybe<Register> reg = findScratchRegister(moves, i);
368 if (reg.isSome()) {
369 masm.load32(toAddress(from), reg.value());
370 masm.move32(reg.value(), toOperand(to));
371 } else {
372 // No scratch register available; bounce it off the stack.
373 masm.Push(toOperand(from));
374 masm.Pop(toPopOperand(to));
375 }
376 }
377 }
378
emitGeneralMove(const MoveOperand & from,const MoveOperand & to,const MoveResolver & moves,size_t i)379 void MoveEmitterX86::emitGeneralMove(const MoveOperand& from,
380 const MoveOperand& to,
381 const MoveResolver& moves, size_t i) {
382 if (from.isGeneralReg()) {
383 masm.mov(from.reg(), toOperand(to));
384 } else if (to.isGeneralReg()) {
385 MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
386 if (from.isMemory()) {
387 masm.loadPtr(toAddress(from), to.reg());
388 } else {
389 masm.lea(toOperand(from), to.reg());
390 }
391 } else if (from.isMemory()) {
392 // Memory to memory gpr move.
393 Maybe<Register> reg = findScratchRegister(moves, i);
394 if (reg.isSome()) {
395 masm.loadPtr(toAddress(from), reg.value());
396 masm.mov(reg.value(), toOperand(to));
397 } else {
398 // No scratch register available; bounce it off the stack.
399 masm.Push(toOperand(from));
400 masm.Pop(toPopOperand(to));
401 }
402 } else {
403 // Effective address to memory move.
404 MOZ_ASSERT(from.isEffectiveAddress());
405 Maybe<Register> reg = findScratchRegister(moves, i);
406 if (reg.isSome()) {
407 masm.lea(toOperand(from), reg.value());
408 masm.mov(reg.value(), toOperand(to));
409 } else {
410 // This is tricky without a scratch reg. We can't do an lea. Bounce the
411 // base register off the stack, then add the offset in place. Note that
412 // this clobbers FLAGS!
413 masm.Push(from.base());
414 masm.Pop(toPopOperand(to));
415 MOZ_ASSERT(to.isMemoryOrEffectiveAddress());
416 masm.addPtr(Imm32(from.disp()), toAddress(to));
417 }
418 }
419 }
420
emitFloat32Move(const MoveOperand & from,const MoveOperand & to)421 void MoveEmitterX86::emitFloat32Move(const MoveOperand& from,
422 const MoveOperand& to) {
423 MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isSingle());
424 MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isSingle());
425
426 if (from.isFloatReg()) {
427 if (to.isFloatReg()) {
428 masm.moveFloat32(from.floatReg(), to.floatReg());
429 } else {
430 masm.storeFloat32(from.floatReg(), toAddress(to));
431 }
432 } else if (to.isFloatReg()) {
433 masm.loadFloat32(toAddress(from), to.floatReg());
434 } else {
435 // Memory to memory move.
436 MOZ_ASSERT(from.isMemory());
437 ScratchFloat32Scope scratch(masm);
438 masm.loadFloat32(toAddress(from), scratch);
439 masm.storeFloat32(scratch, toAddress(to));
440 }
441 }
442
emitDoubleMove(const MoveOperand & from,const MoveOperand & to)443 void MoveEmitterX86::emitDoubleMove(const MoveOperand& from,
444 const MoveOperand& to) {
445 MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isDouble());
446 MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isDouble());
447
448 if (from.isFloatReg()) {
449 if (to.isFloatReg()) {
450 masm.moveDouble(from.floatReg(), to.floatReg());
451 } else {
452 masm.storeDouble(from.floatReg(), toAddress(to));
453 }
454 } else if (to.isFloatReg()) {
455 masm.loadDouble(toAddress(from), to.floatReg());
456 } else {
457 // Memory to memory move.
458 MOZ_ASSERT(from.isMemory());
459 ScratchDoubleScope scratch(masm);
460 masm.loadDouble(toAddress(from), scratch);
461 masm.storeDouble(scratch, toAddress(to));
462 }
463 }
464
emitSimd128Move(const MoveOperand & from,const MoveOperand & to)465 void MoveEmitterX86::emitSimd128Move(const MoveOperand& from,
466 const MoveOperand& to) {
467 MOZ_ASSERT_IF(from.isFloatReg(), from.floatReg().isSimd128());
468 MOZ_ASSERT_IF(to.isFloatReg(), to.floatReg().isSimd128());
469
470 if (from.isFloatReg()) {
471 if (to.isFloatReg()) {
472 masm.moveSimd128(from.floatReg(), to.floatReg());
473 } else {
474 masm.storeUnalignedSimd128(from.floatReg(), toAddress(to));
475 }
476 } else if (to.isFloatReg()) {
477 masm.loadUnalignedSimd128(toAddress(from), to.floatReg());
478 } else {
479 // Memory to memory move.
480 MOZ_ASSERT(from.isMemory());
481 ScratchSimd128Scope scratch(masm);
482 masm.loadUnalignedSimd128(toAddress(from), scratch);
483 masm.storeUnalignedSimd128(scratch, toAddress(to));
484 }
485 }
486
assertDone()487 void MoveEmitterX86::assertDone() { MOZ_ASSERT(!inCycle_); }
488
finish()489 void MoveEmitterX86::finish() {
490 assertDone();
491
492 masm.freeStack(masm.framePushed() - pushedAtStart_);
493 }
494
findScratchRegister(const MoveResolver & moves,size_t initial)495 Maybe<Register> MoveEmitterX86::findScratchRegister(const MoveResolver& moves,
496 size_t initial) {
497 #ifdef JS_CODEGEN_X86
498 if (scratchRegister_.isSome()) {
499 return scratchRegister_;
500 }
501
502 // All registers are either in use by this move group or are live
503 // afterwards. Look through the remaining moves for a register which is
504 // clobbered before it is used, and is thus dead at this point.
505 AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
506 for (size_t i = initial; i < moves.numMoves(); i++) {
507 const MoveOp& move = moves.getMove(i);
508 if (move.from().isGeneralReg()) {
509 regs.takeUnchecked(move.from().reg());
510 } else if (move.from().isMemoryOrEffectiveAddress()) {
511 regs.takeUnchecked(move.from().base());
512 }
513 if (move.to().isGeneralReg()) {
514 if (i != initial && !move.isCycleBegin() && regs.has(move.to().reg())) {
515 return mozilla::Some(move.to().reg());
516 }
517 regs.takeUnchecked(move.to().reg());
518 } else if (move.to().isMemoryOrEffectiveAddress()) {
519 regs.takeUnchecked(move.to().base());
520 }
521 }
522
523 return mozilla::Nothing();
524 #else
525 return mozilla::Some(ScratchReg);
526 #endif
527 }
528