1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/backend/code-generator.h"
6
7 #include <limits>
8
9 #include "src/base/overflowing-math.h"
10 #include "src/codegen/macro-assembler.h"
11 #include "src/codegen/optimized-compilation-info.h"
12 #include "src/codegen/x64/assembler-x64.h"
13 #include "src/compiler/backend/code-generator-impl.h"
14 #include "src/compiler/backend/gap-resolver.h"
15 #include "src/compiler/node-matchers.h"
16 #include "src/compiler/osr.h"
17 #include "src/heap/heap-inl.h" // crbug.com/v8/8499
18 #include "src/objects/smi.h"
19 #include "src/wasm/wasm-code-manager.h"
20 #include "src/wasm/wasm-objects.h"
21
22 namespace v8 {
23 namespace internal {
24 namespace compiler {
25
26 #define __ tasm()->
27
28 // Adds X64 specific methods for decoding operands.
29 class X64OperandConverter : public InstructionOperandConverter {
30 public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)31 X64OperandConverter(CodeGenerator* gen, Instruction* instr)
32 : InstructionOperandConverter(gen, instr) {}
33
InputImmediate(size_t index)34 Immediate InputImmediate(size_t index) {
35 return ToImmediate(instr_->InputAt(index));
36 }
37
InputOperand(size_t index,int extra=0)38 Operand InputOperand(size_t index, int extra = 0) {
39 return ToOperand(instr_->InputAt(index), extra);
40 }
41
OutputOperand()42 Operand OutputOperand() { return ToOperand(instr_->Output()); }
43
ToImmediate(InstructionOperand * operand)44 Immediate ToImmediate(InstructionOperand* operand) {
45 Constant constant = ToConstant(operand);
46 if (constant.type() == Constant::kFloat64) {
47 DCHECK_EQ(0, constant.ToFloat64().AsUint64());
48 return Immediate(0);
49 }
50 if (RelocInfo::IsWasmReference(constant.rmode())) {
51 return Immediate(constant.ToInt32(), constant.rmode());
52 }
53 return Immediate(constant.ToInt32());
54 }
55
ToOperand(InstructionOperand * op,int extra=0)56 Operand ToOperand(InstructionOperand* op, int extra = 0) {
57 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
58 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59 }
60
SlotToOperand(int slot_index,int extra=0)61 Operand SlotToOperand(int slot_index, int extra = 0) {
62 FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
63 return Operand(offset.from_stack_pointer() ? rsp : rbp,
64 offset.offset() + extra);
65 }
66
NextOffset(size_t * offset)67 static size_t NextOffset(size_t* offset) {
68 size_t i = *offset;
69 (*offset)++;
70 return i;
71 }
72
ScaleFor(AddressingMode one,AddressingMode mode)73 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
74 STATIC_ASSERT(0 == static_cast<int>(times_1));
75 STATIC_ASSERT(1 == static_cast<int>(times_2));
76 STATIC_ASSERT(2 == static_cast<int>(times_4));
77 STATIC_ASSERT(3 == static_cast<int>(times_8));
78 int scale = static_cast<int>(mode - one);
79 DCHECK(scale >= 0 && scale < 4);
80 return static_cast<ScaleFactor>(scale);
81 }
82
MemoryOperand(size_t * offset)83 Operand MemoryOperand(size_t* offset) {
84 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
85 switch (mode) {
86 case kMode_MR: {
87 Register base = InputRegister(NextOffset(offset));
88 int32_t disp = 0;
89 return Operand(base, disp);
90 }
91 case kMode_MRI: {
92 Register base = InputRegister(NextOffset(offset));
93 int32_t disp = InputInt32(NextOffset(offset));
94 return Operand(base, disp);
95 }
96 case kMode_MR1:
97 case kMode_MR2:
98 case kMode_MR4:
99 case kMode_MR8: {
100 Register base = InputRegister(NextOffset(offset));
101 Register index = InputRegister(NextOffset(offset));
102 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
103 int32_t disp = 0;
104 return Operand(base, index, scale, disp);
105 }
106 case kMode_MR1I:
107 case kMode_MR2I:
108 case kMode_MR4I:
109 case kMode_MR8I: {
110 Register base = InputRegister(NextOffset(offset));
111 Register index = InputRegister(NextOffset(offset));
112 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
113 int32_t disp = InputInt32(NextOffset(offset));
114 return Operand(base, index, scale, disp);
115 }
116 case kMode_M1: {
117 Register base = InputRegister(NextOffset(offset));
118 int32_t disp = 0;
119 return Operand(base, disp);
120 }
121 case kMode_M2:
122 UNREACHABLE(); // Should use kModeMR with more compact encoding instead
123 return Operand(no_reg, 0);
124 case kMode_M4:
125 case kMode_M8: {
126 Register index = InputRegister(NextOffset(offset));
127 ScaleFactor scale = ScaleFor(kMode_M1, mode);
128 int32_t disp = 0;
129 return Operand(index, scale, disp);
130 }
131 case kMode_M1I:
132 case kMode_M2I:
133 case kMode_M4I:
134 case kMode_M8I: {
135 Register index = InputRegister(NextOffset(offset));
136 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
137 int32_t disp = InputInt32(NextOffset(offset));
138 return Operand(index, scale, disp);
139 }
140 case kMode_Root: {
141 Register base = kRootRegister;
142 int32_t disp = InputInt32(NextOffset(offset));
143 return Operand(base, disp);
144 }
145 case kMode_None:
146 UNREACHABLE();
147 }
148 UNREACHABLE();
149 }
150
MemoryOperand(size_t first_input=0)151 Operand MemoryOperand(size_t first_input = 0) {
152 return MemoryOperand(&first_input);
153 }
154 };
155
156 namespace {
157
HasAddressingMode(Instruction * instr)158 bool HasAddressingMode(Instruction* instr) {
159 return instr->addressing_mode() != kMode_None;
160 }
161
HasImmediateInput(Instruction * instr,size_t index)162 bool HasImmediateInput(Instruction* instr, size_t index) {
163 return instr->InputAt(index)->IsImmediate();
164 }
165
HasRegisterInput(Instruction * instr,size_t index)166 bool HasRegisterInput(Instruction* instr, size_t index) {
167 return instr->InputAt(index)->IsRegister();
168 }
169
170 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
171 public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)172 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
173 : OutOfLineCode(gen), result_(result) {}
174
Generate()175 void Generate() final {
176 __ Xorps(result_, result_);
177 __ Divss(result_, result_);
178 }
179
180 private:
181 XMMRegister const result_;
182 };
183
184 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
185 public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)186 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
187 : OutOfLineCode(gen), result_(result) {}
188
Generate()189 void Generate() final {
190 __ Xorpd(result_, result_);
191 __ Divsd(result_, result_);
192 }
193
194 private:
195 XMMRegister const result_;
196 };
197
198 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
199 public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)200 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
201 XMMRegister input, StubCallMode stub_mode,
202 UnwindingInfoWriter* unwinding_info_writer)
203 : OutOfLineCode(gen),
204 result_(result),
205 input_(input),
206 stub_mode_(stub_mode),
207 unwinding_info_writer_(unwinding_info_writer),
208 isolate_(gen->isolate()),
209 zone_(gen->zone()) {}
210
Generate()211 void Generate() final {
212 __ AllocateStackSpace(kDoubleSize);
213 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
214 kDoubleSize);
215 __ Movsd(MemOperand(rsp, 0), input_);
216 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
217 // A direct call to a wasm runtime stub defined in this module.
218 // Just encode the stub index. This will be patched when the code
219 // is added to the native module and copied into wasm code space.
220 __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
221 } else if (tasm()->options().inline_offheap_trampolines) {
222 // With embedded builtins we do not need the isolate here. This allows
223 // the call to be generated asynchronously.
224 __ CallBuiltin(Builtins::kDoubleToI);
225 } else {
226 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
227 }
228 __ movl(result_, MemOperand(rsp, 0));
229 __ addq(rsp, Immediate(kDoubleSize));
230 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
231 -kDoubleSize);
232 }
233
234 private:
235 Register const result_;
236 XMMRegister const input_;
237 StubCallMode stub_mode_;
238 UnwindingInfoWriter* const unwinding_info_writer_;
239 Isolate* isolate_;
240 Zone* zone_;
241 };
242
243 class OutOfLineRecordWrite final : public OutOfLineCode {
244 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)245 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
246 Register value, Register scratch0, Register scratch1,
247 RecordWriteMode mode, StubCallMode stub_mode)
248 : OutOfLineCode(gen),
249 object_(object),
250 operand_(operand),
251 value_(value),
252 scratch0_(scratch0),
253 scratch1_(scratch1),
254 mode_(mode),
255 stub_mode_(stub_mode),
256 zone_(gen->zone()) {}
257
Generate()258 void Generate() final {
259 if (mode_ > RecordWriteMode::kValueIsPointer) {
260 __ JumpIfSmi(value_, exit());
261 }
262 if (COMPRESS_POINTERS_BOOL) {
263 __ DecompressTaggedPointer(value_, value_);
264 }
265 __ CheckPageFlag(value_, scratch0_,
266 MemoryChunk::kPointersToHereAreInterestingMask, zero,
267 exit());
268 __ leaq(scratch1_, operand_);
269
270 RememberedSetAction const remembered_set_action =
271 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
272 : OMIT_REMEMBERED_SET;
273 SaveFPRegsMode const save_fp_mode =
274 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
275
276 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
277 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
278 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
279 // A direct call to a wasm runtime stub defined in this module.
280 // Just encode the stub index. This will be patched when the code
281 // is added to the native module and copied into wasm code space.
282 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
283 save_fp_mode, wasm::WasmCode::kRecordWrite);
284 } else {
285 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
286 save_fp_mode);
287 }
288 }
289
290 private:
291 Register const object_;
292 Operand const operand_;
293 Register const value_;
294 Register const scratch0_;
295 Register const scratch1_;
296 RecordWriteMode const mode_;
297 StubCallMode const stub_mode_;
298 Zone* zone_;
299 };
300
301 class WasmOutOfLineTrap : public OutOfLineCode {
302 public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)303 WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
304 : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
305
Generate()306 void Generate() override {
307 X64OperandConverter i(gen_, instr_);
308 TrapId trap_id =
309 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
310 GenerateWithTrapId(trap_id);
311 }
312
313 protected:
314 CodeGenerator* gen_;
315
GenerateWithTrapId(TrapId trap_id)316 void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
317
318 private:
GenerateCallToTrap(TrapId trap_id)319 void GenerateCallToTrap(TrapId trap_id) {
320 if (!gen_->wasm_runtime_exception_support()) {
321 // We cannot test calls to the runtime in cctest/test-run-wasm.
322 // Therefore we emit a call to C here instead of a call to the runtime.
323 __ PrepareCallCFunction(0);
324 __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
325 0);
326 __ LeaveFrame(StackFrame::WASM_COMPILED);
327 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
328 size_t pop_size =
329 call_descriptor->StackParameterCount() * kSystemPointerSize;
330 // Use rcx as a scratch register, we return anyways immediately.
331 __ Ret(static_cast<int>(pop_size), rcx);
332 } else {
333 gen_->AssembleSourcePosition(instr_);
334 // A direct call to a wasm runtime stub defined in this module.
335 // Just encode the stub index. This will be patched when the code
336 // is added to the native module and copied into wasm code space.
337 __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
338 ReferenceMap* reference_map =
339 new (gen_->zone()) ReferenceMap(gen_->zone());
340 gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
341 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
342 }
343 }
344
345 Instruction* instr_;
346 };
347
348 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
349 public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)350 WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
351 : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
352
Generate()353 void Generate() final {
354 gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
355 GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
356 }
357
358 private:
359 int pc_;
360 };
361
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)362 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
363 InstructionCode opcode, Instruction* instr,
364 int pc) {
365 const MemoryAccessMode access_mode =
366 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
367 if (access_mode == kMemoryAccessProtected) {
368 new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
369 }
370 }
371
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,X64OperandConverter const & i)372 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
373 InstructionCode opcode, Instruction* instr,
374 X64OperandConverter const& i) {
375 const MemoryAccessMode access_mode =
376 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
377 if (access_mode == kMemoryAccessPoisoned) {
378 Register value = i.OutputRegister();
379 codegen->tasm()->andq(value, kSpeculationPoisonRegister);
380 }
381 }
382
383 } // namespace
384
385 #define ASSEMBLE_UNOP(asm_instr) \
386 do { \
387 if (instr->Output()->IsRegister()) { \
388 __ asm_instr(i.OutputRegister()); \
389 } else { \
390 __ asm_instr(i.OutputOperand()); \
391 } \
392 } while (false)
393
394 #define ASSEMBLE_BINOP(asm_instr) \
395 do { \
396 if (HasAddressingMode(instr)) { \
397 size_t index = 1; \
398 Operand right = i.MemoryOperand(&index); \
399 __ asm_instr(i.InputRegister(0), right); \
400 } else { \
401 if (HasImmediateInput(instr, 1)) { \
402 if (HasRegisterInput(instr, 0)) { \
403 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
404 } else { \
405 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
406 } \
407 } else { \
408 if (HasRegisterInput(instr, 1)) { \
409 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
410 } else { \
411 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
412 } \
413 } \
414 } \
415 } while (false)
416
417 #define ASSEMBLE_COMPARE(asm_instr) \
418 do { \
419 if (HasAddressingMode(instr)) { \
420 size_t index = 0; \
421 Operand left = i.MemoryOperand(&index); \
422 if (HasImmediateInput(instr, index)) { \
423 __ asm_instr(left, i.InputImmediate(index)); \
424 } else { \
425 __ asm_instr(left, i.InputRegister(index)); \
426 } \
427 } else { \
428 if (HasImmediateInput(instr, 1)) { \
429 if (HasRegisterInput(instr, 0)) { \
430 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
431 } else { \
432 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
433 } \
434 } else { \
435 if (HasRegisterInput(instr, 1)) { \
436 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
437 } else { \
438 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
439 } \
440 } \
441 } \
442 } while (false)
443
444 #define ASSEMBLE_MULT(asm_instr) \
445 do { \
446 if (HasImmediateInput(instr, 1)) { \
447 if (HasRegisterInput(instr, 0)) { \
448 __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
449 i.InputImmediate(1)); \
450 } else { \
451 __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
452 i.InputImmediate(1)); \
453 } \
454 } else { \
455 if (HasRegisterInput(instr, 1)) { \
456 __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
457 } else { \
458 __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
459 } \
460 } \
461 } while (false)
462
463 #define ASSEMBLE_SHIFT(asm_instr, width) \
464 do { \
465 if (HasImmediateInput(instr, 1)) { \
466 if (instr->Output()->IsRegister()) { \
467 __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
468 } else { \
469 __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
470 } \
471 } else { \
472 if (instr->Output()->IsRegister()) { \
473 __ asm_instr##_cl(i.OutputRegister()); \
474 } else { \
475 __ asm_instr##_cl(i.OutputOperand()); \
476 } \
477 } \
478 } while (false)
479
480 #define ASSEMBLE_MOVX(asm_instr) \
481 do { \
482 if (HasAddressingMode(instr)) { \
483 __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
484 } else if (HasRegisterInput(instr, 0)) { \
485 __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
486 } else { \
487 __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
488 } \
489 } while (false)
490
491 #define ASSEMBLE_SSE_BINOP(asm_instr) \
492 do { \
493 if (instr->InputAt(1)->IsFPRegister()) { \
494 __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
495 } else { \
496 __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
497 } \
498 } while (false)
499
500 #define ASSEMBLE_SSE_UNOP(asm_instr) \
501 do { \
502 if (instr->InputAt(0)->IsFPRegister()) { \
503 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
504 } else { \
505 __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
506 } \
507 } while (false)
508
509 #define ASSEMBLE_AVX_BINOP(asm_instr) \
510 do { \
511 CpuFeatureScope avx_scope(tasm(), AVX); \
512 if (instr->InputAt(1)->IsFPRegister()) { \
513 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
514 i.InputDoubleRegister(1)); \
515 } else { \
516 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
517 i.InputOperand(1)); \
518 } \
519 } while (false)
520
521 #define ASSEMBLE_IEEE754_BINOP(name) \
522 do { \
523 __ PrepareCallCFunction(2); \
524 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
525 } while (false)
526
527 #define ASSEMBLE_IEEE754_UNOP(name) \
528 do { \
529 __ PrepareCallCFunction(1); \
530 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
531 } while (false)
532
533 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
534 do { \
535 Label binop; \
536 __ bind(&binop); \
537 __ mov_inst(rax, i.MemoryOperand(1)); \
538 __ movl(i.TempRegister(0), rax); \
539 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
540 __ lock(); \
541 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
542 __ j(not_equal, &binop); \
543 } while (false)
544
545 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
546 do { \
547 Label binop; \
548 __ bind(&binop); \
549 __ mov_inst(rax, i.MemoryOperand(1)); \
550 __ movq(i.TempRegister(0), rax); \
551 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
552 __ lock(); \
553 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
554 __ j(not_equal, &binop); \
555 } while (false)
556
557 #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
558 do { \
559 if (instr->InputAt(index)->IsSimd128Register()) { \
560 __ opcode(dst_operand, i.InputSimd128Register(index)); \
561 } else { \
562 __ opcode(dst_operand, i.InputOperand(index)); \
563 } \
564 } while (false)
565
566 #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
567 do { \
568 if (instr->InputAt(index)->IsSimd128Register()) { \
569 __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
570 } else { \
571 __ opcode(dst_operand, i.InputOperand(index), imm); \
572 } \
573 } while (false)
574
575 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
576 do { \
577 XMMRegister dst = i.OutputSimd128Register(); \
578 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
579 byte input_index = instr->InputCount() == 2 ? 1 : 0; \
580 ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
581 } while (false)
582
583 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
584 do { \
585 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
586 __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
587 } while (false)
588
589 #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
590 do { \
591 CpuFeatureScope sse_scope(tasm(), SSE4_1); \
592 Register dst = i.OutputRegister(); \
593 XMMRegister tmp = i.TempSimd128Register(0); \
594 __ xorq(dst, dst); \
595 __ Pxor(tmp, tmp); \
596 __ opcode(tmp, i.InputSimd128Register(0)); \
597 __ Ptest(tmp, tmp); \
598 __ setcc(equal, dst); \
599 } while (false)
600
601 // This macro will directly emit the opcode if the shift is an immediate - the
602 // shift value will be taken modulo 2^width. Otherwise, it will emit code to
603 // perform the modulus operation.
604 #define ASSEMBLE_SIMD_SHIFT(opcode, width) \
605 do { \
606 XMMRegister dst = i.OutputSimd128Register(); \
607 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
608 if (HasImmediateInput(instr, 1)) { \
609 __ opcode(dst, static_cast<byte>(i.InputInt##width(1))); \
610 } else { \
611 XMMRegister tmp = i.TempSimd128Register(0); \
612 Register shift = i.InputRegister(1); \
613 constexpr int mask = (1 << width) - 1; \
614 __ andq(shift, Immediate(mask)); \
615 __ Movq(tmp, shift); \
616 __ opcode(dst, tmp); \
617 } \
618 } while (false)
619
AssembleDeconstructFrame()620 void CodeGenerator::AssembleDeconstructFrame() {
621 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
622 __ movq(rsp, rbp);
623 __ popq(rbp);
624 }
625
AssemblePrepareTailCall()626 void CodeGenerator::AssemblePrepareTailCall() {
627 if (frame_access_state()->has_frame()) {
628 __ movq(rbp, MemOperand(rbp, 0));
629 }
630 frame_access_state()->SetFrameAccessToSP();
631 }
632
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)633 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
634 Register scratch1,
635 Register scratch2,
636 Register scratch3) {
637 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
638 Label done;
639
640 // Check if current frame is an arguments adaptor frame.
641 __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
642 Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
643 __ j(not_equal, &done, Label::kNear);
644
645 // Load arguments count from current arguments adaptor frame (note, it
646 // does not include receiver).
647 Register caller_args_count_reg = scratch1;
648 __ SmiUntag(caller_args_count_reg,
649 Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
650
651 __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
652 __ bind(&done);
653 }
654
655 namespace {
656
AdjustStackPointerForTailCall(TurboAssembler * assembler,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)657 void AdjustStackPointerForTailCall(TurboAssembler* assembler,
658 FrameAccessState* state,
659 int new_slot_above_sp,
660 bool allow_shrinkage = true) {
661 int current_sp_offset = state->GetSPToFPSlotCount() +
662 StandardFrameConstants::kFixedSlotCountAboveFp;
663 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
664 if (stack_slot_delta > 0) {
665 assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
666 state->IncreaseSPDelta(stack_slot_delta);
667 } else if (allow_shrinkage && stack_slot_delta < 0) {
668 assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
669 state->IncreaseSPDelta(stack_slot_delta);
670 }
671 }
672
SetupShuffleMaskOnStack(TurboAssembler * assembler,uint32_t * mask)673 void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
674 int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
675 assembler->movq(kScratchRegister, shuffle_mask);
676 assembler->Push(kScratchRegister);
677 shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
678 assembler->movq(kScratchRegister, shuffle_mask);
679 assembler->Push(kScratchRegister);
680 }
681
682 } // namespace
683
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)684 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
685 int first_unused_stack_slot) {
686 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
687 ZoneVector<MoveOperands*> pushes(zone());
688 GetPushCompatibleMoves(instr, flags, &pushes);
689
690 if (!pushes.empty() &&
691 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
692 first_unused_stack_slot)) {
693 X64OperandConverter g(this, instr);
694 for (auto move : pushes) {
695 LocationOperand destination_location(
696 LocationOperand::cast(move->destination()));
697 InstructionOperand source(move->source());
698 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
699 destination_location.index());
700 if (source.IsStackSlot()) {
701 LocationOperand source_location(LocationOperand::cast(source));
702 __ Push(g.SlotToOperand(source_location.index()));
703 } else if (source.IsRegister()) {
704 LocationOperand source_location(LocationOperand::cast(source));
705 __ Push(source_location.GetRegister());
706 } else if (source.IsImmediate()) {
707 __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
708 } else {
709 // Pushes of non-scalar data types is not supported.
710 UNIMPLEMENTED();
711 }
712 frame_access_state()->IncreaseSPDelta(1);
713 move->Eliminate();
714 }
715 }
716 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
717 first_unused_stack_slot, false);
718 }
719
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)720 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
721 int first_unused_stack_slot) {
722 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
723 first_unused_stack_slot);
724 }
725
726 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()727 void CodeGenerator::AssembleCodeStartRegisterCheck() {
728 __ ComputeCodeStartAddress(rbx);
729 __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
730 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
731 }
732
733 // Check if the code object is marked for deoptimization. If it is, then it
734 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
735 // to:
736 // 1. read from memory the word that contains that bit, which can be found in
737 // the flags in the referenced {CodeDataContainer} object;
738 // 2. test kMarkedForDeoptimizationBit in those flags; and
739 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()740 void CodeGenerator::BailoutIfDeoptimized() {
741 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
742 __ LoadTaggedPointerField(rbx,
743 Operand(kJavaScriptCallCodeStartRegister, offset));
744 __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
745 Immediate(1 << Code::kMarkedForDeoptimizationBit));
746 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
747 RelocInfo::CODE_TARGET, not_zero);
748 }
749
GenerateSpeculationPoisonFromCodeStartRegister()750 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
751 // Set a mask which has all bits set in the normal case, but has all
752 // bits cleared if we are speculatively executing the wrong PC.
753 __ ComputeCodeStartAddress(rbx);
754 __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
755 __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
756 __ movq(rbx, Immediate(-1));
757 __ cmovq(equal, kSpeculationPoisonRegister, rbx);
758 }
759
AssembleRegisterArgumentPoisoning()760 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
761 __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
762 __ andq(kContextRegister, kSpeculationPoisonRegister);
763 __ andq(rsp, kSpeculationPoisonRegister);
764 }
765
766 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)767 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
768 Instruction* instr) {
769 X64OperandConverter i(this, instr);
770 InstructionCode opcode = instr->opcode();
771 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
772 switch (arch_opcode) {
773 case kArchCallCodeObject: {
774 if (HasImmediateInput(instr, 0)) {
775 Handle<Code> code = i.InputCode(0);
776 __ Call(code, RelocInfo::CODE_TARGET);
777 } else {
778 Register reg = i.InputRegister(0);
779 DCHECK_IMPLIES(
780 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
781 reg == kJavaScriptCallCodeStartRegister);
782 __ LoadCodeObjectEntry(reg, reg);
783 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
784 __ RetpolineCall(reg);
785 } else {
786 __ call(reg);
787 }
788 }
789 RecordCallPosition(instr);
790 frame_access_state()->ClearSPDelta();
791 break;
792 }
793 case kArchCallBuiltinPointer: {
794 DCHECK(!HasImmediateInput(instr, 0));
795 Register builtin_index = i.InputRegister(0);
796 __ CallBuiltinByIndex(builtin_index);
797 RecordCallPosition(instr);
798 frame_access_state()->ClearSPDelta();
799 break;
800 }
801 case kArchCallWasmFunction: {
802 if (HasImmediateInput(instr, 0)) {
803 Constant constant = i.ToConstant(instr->InputAt(0));
804 Address wasm_code = static_cast<Address>(constant.ToInt64());
805 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
806 __ near_call(wasm_code, constant.rmode());
807 } else {
808 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
809 __ RetpolineCall(wasm_code, constant.rmode());
810 } else {
811 __ Call(wasm_code, constant.rmode());
812 }
813 }
814 } else {
815 Register reg = i.InputRegister(0);
816 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
817 __ RetpolineCall(reg);
818 } else {
819 __ call(reg);
820 }
821 }
822 RecordCallPosition(instr);
823 frame_access_state()->ClearSPDelta();
824 break;
825 }
826 case kArchTailCallCodeObjectFromJSFunction:
827 case kArchTailCallCodeObject: {
828 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
829 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
830 i.TempRegister(0), i.TempRegister(1),
831 i.TempRegister(2));
832 }
833 if (HasImmediateInput(instr, 0)) {
834 Handle<Code> code = i.InputCode(0);
835 __ Jump(code, RelocInfo::CODE_TARGET);
836 } else {
837 Register reg = i.InputRegister(0);
838 DCHECK_IMPLIES(
839 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
840 reg == kJavaScriptCallCodeStartRegister);
841 __ LoadCodeObjectEntry(reg, reg);
842 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
843 __ RetpolineJump(reg);
844 } else {
845 __ jmp(reg);
846 }
847 }
848 unwinding_info_writer_.MarkBlockWillExit();
849 frame_access_state()->ClearSPDelta();
850 frame_access_state()->SetFrameAccessToDefault();
851 break;
852 }
853 case kArchTailCallWasm: {
854 if (HasImmediateInput(instr, 0)) {
855 Constant constant = i.ToConstant(instr->InputAt(0));
856 Address wasm_code = static_cast<Address>(constant.ToInt64());
857 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
858 __ near_jmp(wasm_code, constant.rmode());
859 } else {
860 __ Move(kScratchRegister, wasm_code, constant.rmode());
861 __ jmp(kScratchRegister);
862 }
863 } else {
864 Register reg = i.InputRegister(0);
865 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
866 __ RetpolineJump(reg);
867 } else {
868 __ jmp(reg);
869 }
870 }
871 unwinding_info_writer_.MarkBlockWillExit();
872 frame_access_state()->ClearSPDelta();
873 frame_access_state()->SetFrameAccessToDefault();
874 break;
875 }
876 case kArchTailCallAddress: {
877 CHECK(!HasImmediateInput(instr, 0));
878 Register reg = i.InputRegister(0);
879 DCHECK_IMPLIES(
880 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
881 reg == kJavaScriptCallCodeStartRegister);
882 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
883 __ RetpolineJump(reg);
884 } else {
885 __ jmp(reg);
886 }
887 unwinding_info_writer_.MarkBlockWillExit();
888 frame_access_state()->ClearSPDelta();
889 frame_access_state()->SetFrameAccessToDefault();
890 break;
891 }
892 case kArchCallJSFunction: {
893 Register func = i.InputRegister(0);
894 if (FLAG_debug_code) {
895 // Check the function's context matches the context argument.
896 __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
897 __ Assert(equal, AbortReason::kWrongFunctionContext);
898 }
899 static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
900 __ LoadTaggedPointerField(rcx,
901 FieldOperand(func, JSFunction::kCodeOffset));
902 __ CallCodeObject(rcx);
903 frame_access_state()->ClearSPDelta();
904 RecordCallPosition(instr);
905 break;
906 }
907 case kArchPrepareCallCFunction: {
908 // Frame alignment requires using FP-relative frame addressing.
909 frame_access_state()->SetFrameAccessToFP();
910 int const num_parameters = MiscField::decode(instr->opcode());
911 __ PrepareCallCFunction(num_parameters);
912 break;
913 }
914 case kArchSaveCallerRegisters: {
915 fp_mode_ =
916 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
917 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
918 // kReturnRegister0 should have been saved before entering the stub.
919 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
920 DCHECK(IsAligned(bytes, kSystemPointerSize));
921 DCHECK_EQ(0, frame_access_state()->sp_delta());
922 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
923 DCHECK(!caller_registers_saved_);
924 caller_registers_saved_ = true;
925 break;
926 }
927 case kArchRestoreCallerRegisters: {
928 DCHECK(fp_mode_ ==
929 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
930 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
931 // Don't overwrite the returned value.
932 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
933 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
934 DCHECK_EQ(0, frame_access_state()->sp_delta());
935 DCHECK(caller_registers_saved_);
936 caller_registers_saved_ = false;
937 break;
938 }
939 case kArchPrepareTailCall:
940 AssemblePrepareTailCall();
941 break;
942 case kArchCallCFunction: {
943 int const num_parameters = MiscField::decode(instr->opcode());
944 Label return_location;
945 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
946 // Put the return address in a stack slot.
947 __ leaq(kScratchRegister, Operand(&return_location, 0));
948 __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
949 kScratchRegister);
950 }
951 if (HasImmediateInput(instr, 0)) {
952 ExternalReference ref = i.InputExternalReference(0);
953 __ CallCFunction(ref, num_parameters);
954 } else {
955 Register func = i.InputRegister(0);
956 __ CallCFunction(func, num_parameters);
957 }
958 __ bind(&return_location);
959 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
960 RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
961 }
962 frame_access_state()->SetFrameAccessToDefault();
963 // Ideally, we should decrement SP delta to match the change of stack
964 // pointer in CallCFunction. However, for certain architectures (e.g.
965 // ARM), there may be more strict alignment requirement, causing old SP
966 // to be saved on the stack. In those cases, we can not calculate the SP
967 // delta statically.
968 frame_access_state()->ClearSPDelta();
969 if (caller_registers_saved_) {
970 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
971 // Here, we assume the sequence to be:
972 // kArchSaveCallerRegisters;
973 // kArchCallCFunction;
974 // kArchRestoreCallerRegisters;
975 int bytes =
976 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
977 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
978 }
979 // TODO(tebbi): Do we need an lfence here?
980 break;
981 }
982 case kArchJmp:
983 AssembleArchJump(i.InputRpo(0));
984 break;
985 case kArchBinarySearchSwitch:
986 AssembleArchBinarySearchSwitch(instr);
987 break;
988 case kArchTableSwitch:
989 AssembleArchTableSwitch(instr);
990 break;
991 case kArchComment:
992 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
993 break;
994 case kArchAbortCSAAssert:
995 DCHECK(i.InputRegister(0) == rdx);
996 {
997 // We don't actually want to generate a pile of code for this, so just
998 // claim there is a stack frame, without generating one.
999 FrameScope scope(tasm(), StackFrame::NONE);
1000 __ Call(
1001 isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
1002 RelocInfo::CODE_TARGET);
1003 }
1004 __ int3();
1005 unwinding_info_writer_.MarkBlockWillExit();
1006 break;
1007 case kArchDebugBreak:
1008 __ DebugBreak();
1009 break;
1010 case kArchThrowTerminator:
1011 unwinding_info_writer_.MarkBlockWillExit();
1012 break;
1013 case kArchNop:
1014 // don't emit code for nops.
1015 break;
1016 case kArchDeoptimize: {
1017 DeoptimizationExit* exit =
1018 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
1019 CodeGenResult result = AssembleDeoptimizerCall(exit);
1020 if (result != kSuccess) return result;
1021 unwinding_info_writer_.MarkBlockWillExit();
1022 break;
1023 }
1024 case kArchRet:
1025 AssembleReturn(instr->InputAt(0));
1026 break;
1027 case kArchFramePointer:
1028 __ movq(i.OutputRegister(), rbp);
1029 break;
1030 case kArchParentFramePointer:
1031 if (frame_access_state()->has_frame()) {
1032 __ movq(i.OutputRegister(), Operand(rbp, 0));
1033 } else {
1034 __ movq(i.OutputRegister(), rbp);
1035 }
1036 break;
1037 case kArchStackPointerGreaterThan: {
1038 // Potentially apply an offset to the current stack pointer before the
1039 // comparison to consider the size difference of an optimized frame versus
1040 // the contained unoptimized frames.
1041
1042 Register lhs_register = rsp;
1043 uint32_t offset;
1044
1045 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
1046 lhs_register = kScratchRegister;
1047 __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1048 }
1049
1050 constexpr size_t kValueIndex = 0;
1051 if (HasAddressingMode(instr)) {
1052 __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1053 } else {
1054 __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1055 }
1056 break;
1057 }
1058 case kArchStackCheckOffset:
1059 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1060 break;
1061 case kArchTruncateDoubleToI: {
1062 auto result = i.OutputRegister();
1063 auto input = i.InputDoubleRegister(0);
1064 auto ool = new (zone()) OutOfLineTruncateDoubleToI(
1065 this, result, input, DetermineStubCallMode(),
1066 &unwinding_info_writer_);
1067 // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1068 // use of Cvttsd2siq requires the movl below to avoid sign extension.
1069 __ Cvttsd2siq(result, input);
1070 __ cmpq(result, Immediate(1));
1071 __ j(overflow, ool->entry());
1072 __ bind(ool->exit());
1073 __ movl(result, result);
1074 break;
1075 }
1076 case kArchStoreWithWriteBarrier: {
1077 RecordWriteMode mode =
1078 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1079 Register object = i.InputRegister(0);
1080 size_t index = 0;
1081 Operand operand = i.MemoryOperand(&index);
1082 Register value = i.InputRegister(index);
1083 Register scratch0 = i.TempRegister(0);
1084 Register scratch1 = i.TempRegister(1);
1085 auto ool = new (zone())
1086 OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
1087 mode, DetermineStubCallMode());
1088 __ StoreTaggedField(operand, value);
1089 __ CheckPageFlag(object, scratch0,
1090 MemoryChunk::kPointersFromHereAreInterestingMask,
1091 not_zero, ool->entry());
1092 __ bind(ool->exit());
1093 break;
1094 }
1095 case kArchWordPoisonOnSpeculation:
1096 DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1097 __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1098 break;
1099 case kX64MFence:
1100 __ mfence();
1101 break;
1102 case kX64LFence:
1103 __ lfence();
1104 break;
1105 case kArchStackSlot: {
1106 FrameOffset offset =
1107 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1108 Register base = offset.from_stack_pointer() ? rsp : rbp;
1109 __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1110 break;
1111 }
1112 case kIeee754Float64Acos:
1113 ASSEMBLE_IEEE754_UNOP(acos);
1114 break;
1115 case kIeee754Float64Acosh:
1116 ASSEMBLE_IEEE754_UNOP(acosh);
1117 break;
1118 case kIeee754Float64Asin:
1119 ASSEMBLE_IEEE754_UNOP(asin);
1120 break;
1121 case kIeee754Float64Asinh:
1122 ASSEMBLE_IEEE754_UNOP(asinh);
1123 break;
1124 case kIeee754Float64Atan:
1125 ASSEMBLE_IEEE754_UNOP(atan);
1126 break;
1127 case kIeee754Float64Atanh:
1128 ASSEMBLE_IEEE754_UNOP(atanh);
1129 break;
1130 case kIeee754Float64Atan2:
1131 ASSEMBLE_IEEE754_BINOP(atan2);
1132 break;
1133 case kIeee754Float64Cbrt:
1134 ASSEMBLE_IEEE754_UNOP(cbrt);
1135 break;
1136 case kIeee754Float64Cos:
1137 ASSEMBLE_IEEE754_UNOP(cos);
1138 break;
1139 case kIeee754Float64Cosh:
1140 ASSEMBLE_IEEE754_UNOP(cosh);
1141 break;
1142 case kIeee754Float64Exp:
1143 ASSEMBLE_IEEE754_UNOP(exp);
1144 break;
1145 case kIeee754Float64Expm1:
1146 ASSEMBLE_IEEE754_UNOP(expm1);
1147 break;
1148 case kIeee754Float64Log:
1149 ASSEMBLE_IEEE754_UNOP(log);
1150 break;
1151 case kIeee754Float64Log1p:
1152 ASSEMBLE_IEEE754_UNOP(log1p);
1153 break;
1154 case kIeee754Float64Log2:
1155 ASSEMBLE_IEEE754_UNOP(log2);
1156 break;
1157 case kIeee754Float64Log10:
1158 ASSEMBLE_IEEE754_UNOP(log10);
1159 break;
1160 case kIeee754Float64Pow:
1161 ASSEMBLE_IEEE754_BINOP(pow);
1162 break;
1163 case kIeee754Float64Sin:
1164 ASSEMBLE_IEEE754_UNOP(sin);
1165 break;
1166 case kIeee754Float64Sinh:
1167 ASSEMBLE_IEEE754_UNOP(sinh);
1168 break;
1169 case kIeee754Float64Tan:
1170 ASSEMBLE_IEEE754_UNOP(tan);
1171 break;
1172 case kIeee754Float64Tanh:
1173 ASSEMBLE_IEEE754_UNOP(tanh);
1174 break;
1175 case kX64Add32:
1176 ASSEMBLE_BINOP(addl);
1177 break;
1178 case kX64Add:
1179 ASSEMBLE_BINOP(addq);
1180 break;
1181 case kX64Sub32:
1182 ASSEMBLE_BINOP(subl);
1183 break;
1184 case kX64Sub:
1185 ASSEMBLE_BINOP(subq);
1186 break;
1187 case kX64And32:
1188 ASSEMBLE_BINOP(andl);
1189 break;
1190 case kX64And:
1191 ASSEMBLE_BINOP(andq);
1192 break;
1193 case kX64Cmp8:
1194 ASSEMBLE_COMPARE(cmpb);
1195 break;
1196 case kX64Cmp16:
1197 ASSEMBLE_COMPARE(cmpw);
1198 break;
1199 case kX64Cmp32:
1200 ASSEMBLE_COMPARE(cmpl);
1201 break;
1202 case kX64Cmp:
1203 ASSEMBLE_COMPARE(cmpq);
1204 break;
1205 case kX64Test8:
1206 ASSEMBLE_COMPARE(testb);
1207 break;
1208 case kX64Test16:
1209 ASSEMBLE_COMPARE(testw);
1210 break;
1211 case kX64Test32:
1212 ASSEMBLE_COMPARE(testl);
1213 break;
1214 case kX64Test:
1215 ASSEMBLE_COMPARE(testq);
1216 break;
1217 case kX64Imul32:
1218 ASSEMBLE_MULT(imull);
1219 break;
1220 case kX64Imul:
1221 ASSEMBLE_MULT(imulq);
1222 break;
1223 case kX64ImulHigh32:
1224 if (HasRegisterInput(instr, 1)) {
1225 __ imull(i.InputRegister(1));
1226 } else {
1227 __ imull(i.InputOperand(1));
1228 }
1229 break;
1230 case kX64UmulHigh32:
1231 if (HasRegisterInput(instr, 1)) {
1232 __ mull(i.InputRegister(1));
1233 } else {
1234 __ mull(i.InputOperand(1));
1235 }
1236 break;
1237 case kX64Idiv32:
1238 __ cdq();
1239 __ idivl(i.InputRegister(1));
1240 break;
1241 case kX64Idiv:
1242 __ cqo();
1243 __ idivq(i.InputRegister(1));
1244 break;
1245 case kX64Udiv32:
1246 __ xorl(rdx, rdx);
1247 __ divl(i.InputRegister(1));
1248 break;
1249 case kX64Udiv:
1250 __ xorq(rdx, rdx);
1251 __ divq(i.InputRegister(1));
1252 break;
1253 case kX64Not:
1254 ASSEMBLE_UNOP(notq);
1255 break;
1256 case kX64Not32:
1257 ASSEMBLE_UNOP(notl);
1258 break;
1259 case kX64Neg:
1260 ASSEMBLE_UNOP(negq);
1261 break;
1262 case kX64Neg32:
1263 ASSEMBLE_UNOP(negl);
1264 break;
1265 case kX64Or32:
1266 ASSEMBLE_BINOP(orl);
1267 break;
1268 case kX64Or:
1269 ASSEMBLE_BINOP(orq);
1270 break;
1271 case kX64Xor32:
1272 ASSEMBLE_BINOP(xorl);
1273 break;
1274 case kX64Xor:
1275 ASSEMBLE_BINOP(xorq);
1276 break;
1277 case kX64Shl32:
1278 ASSEMBLE_SHIFT(shll, 5);
1279 break;
1280 case kX64Shl:
1281 ASSEMBLE_SHIFT(shlq, 6);
1282 break;
1283 case kX64Shr32:
1284 ASSEMBLE_SHIFT(shrl, 5);
1285 break;
1286 case kX64Shr:
1287 ASSEMBLE_SHIFT(shrq, 6);
1288 break;
1289 case kX64Sar32:
1290 ASSEMBLE_SHIFT(sarl, 5);
1291 break;
1292 case kX64Sar:
1293 ASSEMBLE_SHIFT(sarq, 6);
1294 break;
1295 case kX64Ror32:
1296 ASSEMBLE_SHIFT(rorl, 5);
1297 break;
1298 case kX64Ror:
1299 ASSEMBLE_SHIFT(rorq, 6);
1300 break;
1301 case kX64Lzcnt:
1302 if (HasRegisterInput(instr, 0)) {
1303 __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1304 } else {
1305 __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1306 }
1307 break;
1308 case kX64Lzcnt32:
1309 if (HasRegisterInput(instr, 0)) {
1310 __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1311 } else {
1312 __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1313 }
1314 break;
1315 case kX64Tzcnt:
1316 if (HasRegisterInput(instr, 0)) {
1317 __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1318 } else {
1319 __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1320 }
1321 break;
1322 case kX64Tzcnt32:
1323 if (HasRegisterInput(instr, 0)) {
1324 __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1325 } else {
1326 __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1327 }
1328 break;
1329 case kX64Popcnt:
1330 if (HasRegisterInput(instr, 0)) {
1331 __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1332 } else {
1333 __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1334 }
1335 break;
1336 case kX64Popcnt32:
1337 if (HasRegisterInput(instr, 0)) {
1338 __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1339 } else {
1340 __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1341 }
1342 break;
1343 case kX64Bswap:
1344 __ bswapq(i.OutputRegister());
1345 break;
1346 case kX64Bswap32:
1347 __ bswapl(i.OutputRegister());
1348 break;
1349 case kSSEFloat32Cmp:
1350 ASSEMBLE_SSE_BINOP(Ucomiss);
1351 break;
1352 case kSSEFloat32Add:
1353 ASSEMBLE_SSE_BINOP(addss);
1354 break;
1355 case kSSEFloat32Sub:
1356 ASSEMBLE_SSE_BINOP(subss);
1357 break;
1358 case kSSEFloat32Mul:
1359 ASSEMBLE_SSE_BINOP(mulss);
1360 break;
1361 case kSSEFloat32Div:
1362 ASSEMBLE_SSE_BINOP(divss);
1363 // Don't delete this mov. It may improve performance on some CPUs,
1364 // when there is a (v)mulss depending on the result.
1365 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1366 break;
1367 case kSSEFloat32Abs: {
1368 // TODO(bmeurer): Use RIP relative 128-bit constants.
1369 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1370 __ Pcmpeqd(tmp, tmp);
1371 __ Psrlq(tmp, 33);
1372 __ Andps(i.OutputDoubleRegister(), tmp);
1373 break;
1374 }
1375 case kSSEFloat32Neg: {
1376 // TODO(bmeurer): Use RIP relative 128-bit constants.
1377 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1378 __ Pcmpeqd(tmp, tmp);
1379 __ Psllq(tmp, 31);
1380 __ Xorps(i.OutputDoubleRegister(), tmp);
1381 break;
1382 }
1383 case kSSEFloat32Sqrt:
1384 ASSEMBLE_SSE_UNOP(sqrtss);
1385 break;
1386 case kSSEFloat32ToFloat64:
1387 ASSEMBLE_SSE_UNOP(Cvtss2sd);
1388 break;
1389 case kSSEFloat32Round: {
1390 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1391 RoundingMode const mode =
1392 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1393 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1394 break;
1395 }
1396 case kSSEFloat32ToInt32:
1397 if (instr->InputAt(0)->IsFPRegister()) {
1398 __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1399 } else {
1400 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1401 }
1402 break;
1403 case kSSEFloat32ToUint32: {
1404 if (instr->InputAt(0)->IsFPRegister()) {
1405 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1406 } else {
1407 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1408 }
1409 break;
1410 }
1411 case kSSEFloat64Cmp:
1412 ASSEMBLE_SSE_BINOP(Ucomisd);
1413 break;
1414 case kSSEFloat64Add:
1415 ASSEMBLE_SSE_BINOP(addsd);
1416 break;
1417 case kSSEFloat64Sub:
1418 ASSEMBLE_SSE_BINOP(subsd);
1419 break;
1420 case kSSEFloat64Mul:
1421 ASSEMBLE_SSE_BINOP(mulsd);
1422 break;
1423 case kSSEFloat64Div:
1424 ASSEMBLE_SSE_BINOP(divsd);
1425 // Don't delete this mov. It may improve performance on some CPUs,
1426 // when there is a (v)mulsd depending on the result.
1427 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1428 break;
1429 case kSSEFloat64Mod: {
1430 __ AllocateStackSpace(kDoubleSize);
1431 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1432 kDoubleSize);
1433 // Move values to st(0) and st(1).
1434 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1435 __ fld_d(Operand(rsp, 0));
1436 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1437 __ fld_d(Operand(rsp, 0));
1438 // Loop while fprem isn't done.
1439 Label mod_loop;
1440 __ bind(&mod_loop);
1441 // This instructions traps on all kinds inputs, but we are assuming the
1442 // floating point control word is set to ignore them all.
1443 __ fprem();
1444 // The following 2 instruction implicitly use rax.
1445 __ fnstsw_ax();
1446 if (CpuFeatures::IsSupported(SAHF)) {
1447 CpuFeatureScope sahf_scope(tasm(), SAHF);
1448 __ sahf();
1449 } else {
1450 __ shrl(rax, Immediate(8));
1451 __ andl(rax, Immediate(0xFF));
1452 __ pushq(rax);
1453 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1454 kSystemPointerSize);
1455 __ popfq();
1456 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1457 -kSystemPointerSize);
1458 }
1459 __ j(parity_even, &mod_loop);
1460 // Move output to stack and clean up.
1461 __ fstp(1);
1462 __ fstp_d(Operand(rsp, 0));
1463 __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1464 __ addq(rsp, Immediate(kDoubleSize));
1465 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1466 -kDoubleSize);
1467 break;
1468 }
1469 case kSSEFloat32Max: {
1470 Label compare_swap, done_compare;
1471 if (instr->InputAt(1)->IsFPRegister()) {
1472 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1473 } else {
1474 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1475 }
1476 auto ool =
1477 new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1478 __ j(parity_even, ool->entry());
1479 __ j(above, &done_compare, Label::kNear);
1480 __ j(below, &compare_swap, Label::kNear);
1481 __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1482 __ testl(kScratchRegister, Immediate(1));
1483 __ j(zero, &done_compare, Label::kNear);
1484 __ bind(&compare_swap);
1485 if (instr->InputAt(1)->IsFPRegister()) {
1486 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1487 } else {
1488 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1489 }
1490 __ bind(&done_compare);
1491 __ bind(ool->exit());
1492 break;
1493 }
1494 case kSSEFloat32Min: {
1495 Label compare_swap, done_compare;
1496 if (instr->InputAt(1)->IsFPRegister()) {
1497 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1498 } else {
1499 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1500 }
1501 auto ool =
1502 new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1503 __ j(parity_even, ool->entry());
1504 __ j(below, &done_compare, Label::kNear);
1505 __ j(above, &compare_swap, Label::kNear);
1506 if (instr->InputAt(1)->IsFPRegister()) {
1507 __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1508 } else {
1509 __ Movss(kScratchDoubleReg, i.InputOperand(1));
1510 __ Movmskps(kScratchRegister, kScratchDoubleReg);
1511 }
1512 __ testl(kScratchRegister, Immediate(1));
1513 __ j(zero, &done_compare, Label::kNear);
1514 __ bind(&compare_swap);
1515 if (instr->InputAt(1)->IsFPRegister()) {
1516 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1517 } else {
1518 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1519 }
1520 __ bind(&done_compare);
1521 __ bind(ool->exit());
1522 break;
1523 }
1524 case kSSEFloat64Max: {
1525 Label compare_swap, done_compare;
1526 if (instr->InputAt(1)->IsFPRegister()) {
1527 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1528 } else {
1529 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1530 }
1531 auto ool =
1532 new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1533 __ j(parity_even, ool->entry());
1534 __ j(above, &done_compare, Label::kNear);
1535 __ j(below, &compare_swap, Label::kNear);
1536 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1537 __ testl(kScratchRegister, Immediate(1));
1538 __ j(zero, &done_compare, Label::kNear);
1539 __ bind(&compare_swap);
1540 if (instr->InputAt(1)->IsFPRegister()) {
1541 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1542 } else {
1543 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1544 }
1545 __ bind(&done_compare);
1546 __ bind(ool->exit());
1547 break;
1548 }
1549 case kSSEFloat64Min: {
1550 Label compare_swap, done_compare;
1551 if (instr->InputAt(1)->IsFPRegister()) {
1552 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1553 } else {
1554 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1555 }
1556 auto ool =
1557 new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1558 __ j(parity_even, ool->entry());
1559 __ j(below, &done_compare, Label::kNear);
1560 __ j(above, &compare_swap, Label::kNear);
1561 if (instr->InputAt(1)->IsFPRegister()) {
1562 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1563 } else {
1564 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1565 __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1566 }
1567 __ testl(kScratchRegister, Immediate(1));
1568 __ j(zero, &done_compare, Label::kNear);
1569 __ bind(&compare_swap);
1570 if (instr->InputAt(1)->IsFPRegister()) {
1571 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1572 } else {
1573 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1574 }
1575 __ bind(&done_compare);
1576 __ bind(ool->exit());
1577 break;
1578 }
1579 case kX64F64x2Abs:
1580 case kSSEFloat64Abs: {
1581 // TODO(bmeurer): Use RIP relative 128-bit constants.
1582 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1583 __ Pcmpeqd(tmp, tmp);
1584 __ Psrlq(tmp, 1);
1585 __ Andpd(i.OutputDoubleRegister(), tmp);
1586 break;
1587 }
1588 case kX64F64x2Neg:
1589 case kSSEFloat64Neg: {
1590 // TODO(bmeurer): Use RIP relative 128-bit constants.
1591 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1592 __ Pcmpeqd(tmp, tmp);
1593 __ Psllq(tmp, 63);
1594 __ Xorpd(i.OutputDoubleRegister(), tmp);
1595 break;
1596 }
1597 case kSSEFloat64Sqrt:
1598 ASSEMBLE_SSE_UNOP(Sqrtsd);
1599 break;
1600 case kSSEFloat64Round: {
1601 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1602 RoundingMode const mode =
1603 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1604 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1605 break;
1606 }
1607 case kSSEFloat64ToFloat32:
1608 ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1609 break;
1610 case kSSEFloat64ToInt32:
1611 if (instr->InputAt(0)->IsFPRegister()) {
1612 __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1613 } else {
1614 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1615 }
1616 break;
1617 case kSSEFloat64ToUint32: {
1618 if (instr->InputAt(0)->IsFPRegister()) {
1619 __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1620 } else {
1621 __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1622 }
1623 if (MiscField::decode(instr->opcode())) {
1624 __ AssertZeroExtended(i.OutputRegister());
1625 }
1626 break;
1627 }
1628 case kSSEFloat32ToInt64:
1629 if (instr->InputAt(0)->IsFPRegister()) {
1630 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1631 } else {
1632 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1633 }
1634 if (instr->OutputCount() > 1) {
1635 __ Set(i.OutputRegister(1), 1);
1636 Label done;
1637 Label fail;
1638 __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1639 if (instr->InputAt(0)->IsFPRegister()) {
1640 __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1641 } else {
1642 __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1643 }
1644 // If the input is NaN, then the conversion fails.
1645 __ j(parity_even, &fail, Label::kNear);
1646 // If the input is INT64_MIN, then the conversion succeeds.
1647 __ j(equal, &done, Label::kNear);
1648 __ cmpq(i.OutputRegister(0), Immediate(1));
1649 // If the conversion results in INT64_MIN, but the input was not
1650 // INT64_MIN, then the conversion fails.
1651 __ j(no_overflow, &done, Label::kNear);
1652 __ bind(&fail);
1653 __ Set(i.OutputRegister(1), 0);
1654 __ bind(&done);
1655 }
1656 break;
1657 case kSSEFloat64ToInt64:
1658 if (instr->InputAt(0)->IsFPRegister()) {
1659 __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1660 } else {
1661 __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1662 }
1663 if (instr->OutputCount() > 1) {
1664 __ Set(i.OutputRegister(1), 1);
1665 Label done;
1666 Label fail;
1667 __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1668 if (instr->InputAt(0)->IsFPRegister()) {
1669 __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1670 } else {
1671 __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1672 }
1673 // If the input is NaN, then the conversion fails.
1674 __ j(parity_even, &fail, Label::kNear);
1675 // If the input is INT64_MIN, then the conversion succeeds.
1676 __ j(equal, &done, Label::kNear);
1677 __ cmpq(i.OutputRegister(0), Immediate(1));
1678 // If the conversion results in INT64_MIN, but the input was not
1679 // INT64_MIN, then the conversion fails.
1680 __ j(no_overflow, &done, Label::kNear);
1681 __ bind(&fail);
1682 __ Set(i.OutputRegister(1), 0);
1683 __ bind(&done);
1684 }
1685 break;
1686 case kSSEFloat32ToUint64: {
1687 Label fail;
1688 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1689 if (instr->InputAt(0)->IsFPRegister()) {
1690 __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1691 } else {
1692 __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1693 }
1694 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1695 __ bind(&fail);
1696 break;
1697 }
1698 case kSSEFloat64ToUint64: {
1699 Label fail;
1700 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1701 if (instr->InputAt(0)->IsFPRegister()) {
1702 __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1703 } else {
1704 __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1705 }
1706 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1707 __ bind(&fail);
1708 break;
1709 }
1710 case kSSEInt32ToFloat64:
1711 if (HasRegisterInput(instr, 0)) {
1712 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1713 } else {
1714 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1715 }
1716 break;
1717 case kSSEInt32ToFloat32:
1718 if (HasRegisterInput(instr, 0)) {
1719 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1720 } else {
1721 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1722 }
1723 break;
1724 case kSSEInt64ToFloat32:
1725 if (HasRegisterInput(instr, 0)) {
1726 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1727 } else {
1728 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1729 }
1730 break;
1731 case kSSEInt64ToFloat64:
1732 if (HasRegisterInput(instr, 0)) {
1733 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1734 } else {
1735 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1736 }
1737 break;
1738 case kSSEUint64ToFloat32:
1739 if (HasRegisterInput(instr, 0)) {
1740 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1741 } else {
1742 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1743 }
1744 break;
1745 case kSSEUint64ToFloat64:
1746 if (HasRegisterInput(instr, 0)) {
1747 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1748 } else {
1749 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1750 }
1751 break;
1752 case kSSEUint32ToFloat64:
1753 if (HasRegisterInput(instr, 0)) {
1754 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1755 } else {
1756 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1757 }
1758 break;
1759 case kSSEUint32ToFloat32:
1760 if (HasRegisterInput(instr, 0)) {
1761 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1762 } else {
1763 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1764 }
1765 break;
1766 case kSSEFloat64ExtractLowWord32:
1767 if (instr->InputAt(0)->IsFPStackSlot()) {
1768 __ movl(i.OutputRegister(), i.InputOperand(0));
1769 } else {
1770 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1771 }
1772 break;
1773 case kSSEFloat64ExtractHighWord32:
1774 if (instr->InputAt(0)->IsFPStackSlot()) {
1775 __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1776 } else {
1777 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1778 }
1779 break;
1780 case kSSEFloat64InsertLowWord32:
1781 if (HasRegisterInput(instr, 1)) {
1782 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1783 } else {
1784 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1785 }
1786 break;
1787 case kSSEFloat64InsertHighWord32:
1788 if (HasRegisterInput(instr, 1)) {
1789 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1790 } else {
1791 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1792 }
1793 break;
1794 case kSSEFloat64LoadLowWord32:
1795 if (HasRegisterInput(instr, 0)) {
1796 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1797 } else {
1798 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1799 }
1800 break;
1801 case kAVXFloat32Cmp: {
1802 CpuFeatureScope avx_scope(tasm(), AVX);
1803 if (instr->InputAt(1)->IsFPRegister()) {
1804 __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1805 } else {
1806 __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1807 }
1808 break;
1809 }
1810 case kAVXFloat32Add:
1811 ASSEMBLE_AVX_BINOP(vaddss);
1812 break;
1813 case kAVXFloat32Sub:
1814 ASSEMBLE_AVX_BINOP(vsubss);
1815 break;
1816 case kAVXFloat32Mul:
1817 ASSEMBLE_AVX_BINOP(vmulss);
1818 break;
1819 case kAVXFloat32Div:
1820 ASSEMBLE_AVX_BINOP(vdivss);
1821 // Don't delete this mov. It may improve performance on some CPUs,
1822 // when there is a (v)mulss depending on the result.
1823 __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1824 break;
1825 case kAVXFloat64Cmp: {
1826 CpuFeatureScope avx_scope(tasm(), AVX);
1827 if (instr->InputAt(1)->IsFPRegister()) {
1828 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1829 } else {
1830 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1831 }
1832 break;
1833 }
1834 case kAVXFloat64Add:
1835 ASSEMBLE_AVX_BINOP(vaddsd);
1836 break;
1837 case kAVXFloat64Sub:
1838 ASSEMBLE_AVX_BINOP(vsubsd);
1839 break;
1840 case kAVXFloat64Mul:
1841 ASSEMBLE_AVX_BINOP(vmulsd);
1842 break;
1843 case kAVXFloat64Div:
1844 ASSEMBLE_AVX_BINOP(vdivsd);
1845 // Don't delete this mov. It may improve performance on some CPUs,
1846 // when there is a (v)mulsd depending on the result.
1847 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1848 break;
1849 case kAVXFloat32Abs: {
1850 // TODO(bmeurer): Use RIP relative 128-bit constants.
1851 CpuFeatureScope avx_scope(tasm(), AVX);
1852 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1853 __ vpcmpeqd(tmp, tmp, tmp);
1854 __ vpsrlq(tmp, tmp, 33);
1855 if (instr->InputAt(0)->IsFPRegister()) {
1856 __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1857 } else {
1858 __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1859 }
1860 break;
1861 }
1862 case kAVXFloat32Neg: {
1863 // TODO(bmeurer): Use RIP relative 128-bit constants.
1864 CpuFeatureScope avx_scope(tasm(), AVX);
1865 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1866 __ vpcmpeqd(tmp, tmp, tmp);
1867 __ vpsllq(tmp, tmp, 31);
1868 if (instr->InputAt(0)->IsFPRegister()) {
1869 __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1870 } else {
1871 __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1872 }
1873 break;
1874 }
1875 case kAVXFloat64Abs: {
1876 // TODO(bmeurer): Use RIP relative 128-bit constants.
1877 CpuFeatureScope avx_scope(tasm(), AVX);
1878 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1879 __ vpcmpeqd(tmp, tmp, tmp);
1880 __ vpsrlq(tmp, tmp, 1);
1881 if (instr->InputAt(0)->IsFPRegister()) {
1882 __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1883 } else {
1884 __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1885 }
1886 break;
1887 }
1888 case kAVXFloat64Neg: {
1889 // TODO(bmeurer): Use RIP relative 128-bit constants.
1890 CpuFeatureScope avx_scope(tasm(), AVX);
1891 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1892 __ vpcmpeqd(tmp, tmp, tmp);
1893 __ vpsllq(tmp, tmp, 63);
1894 if (instr->InputAt(0)->IsFPRegister()) {
1895 __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1896 } else {
1897 __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1898 }
1899 break;
1900 }
1901 case kSSEFloat64SilenceNaN:
1902 __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1903 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1904 break;
1905 case kX64Movsxbl:
1906 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1907 ASSEMBLE_MOVX(movsxbl);
1908 __ AssertZeroExtended(i.OutputRegister());
1909 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1910 break;
1911 case kX64Movzxbl:
1912 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1913 ASSEMBLE_MOVX(movzxbl);
1914 __ AssertZeroExtended(i.OutputRegister());
1915 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1916 break;
1917 case kX64Movsxbq:
1918 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1919 ASSEMBLE_MOVX(movsxbq);
1920 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1921 break;
1922 case kX64Movzxbq:
1923 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1924 ASSEMBLE_MOVX(movzxbq);
1925 __ AssertZeroExtended(i.OutputRegister());
1926 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1927 break;
1928 case kX64Movb: {
1929 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1930 size_t index = 0;
1931 Operand operand = i.MemoryOperand(&index);
1932 if (HasImmediateInput(instr, index)) {
1933 __ movb(operand, Immediate(i.InputInt8(index)));
1934 } else {
1935 __ movb(operand, i.InputRegister(index));
1936 }
1937 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1938 break;
1939 }
1940 case kX64Movsxwl:
1941 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1942 ASSEMBLE_MOVX(movsxwl);
1943 __ AssertZeroExtended(i.OutputRegister());
1944 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1945 break;
1946 case kX64Movzxwl:
1947 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1948 ASSEMBLE_MOVX(movzxwl);
1949 __ AssertZeroExtended(i.OutputRegister());
1950 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1951 break;
1952 case kX64Movsxwq:
1953 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1954 ASSEMBLE_MOVX(movsxwq);
1955 break;
1956 case kX64Movzxwq:
1957 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1958 ASSEMBLE_MOVX(movzxwq);
1959 __ AssertZeroExtended(i.OutputRegister());
1960 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1961 break;
1962 case kX64Movw: {
1963 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1964 size_t index = 0;
1965 Operand operand = i.MemoryOperand(&index);
1966 if (HasImmediateInput(instr, index)) {
1967 __ movw(operand, Immediate(i.InputInt16(index)));
1968 } else {
1969 __ movw(operand, i.InputRegister(index));
1970 }
1971 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1972 break;
1973 }
1974 case kX64Movl:
1975 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1976 if (instr->HasOutput()) {
1977 if (HasAddressingMode(instr)) {
1978 __ movl(i.OutputRegister(), i.MemoryOperand());
1979 } else {
1980 if (HasRegisterInput(instr, 0)) {
1981 __ movl(i.OutputRegister(), i.InputRegister(0));
1982 } else {
1983 __ movl(i.OutputRegister(), i.InputOperand(0));
1984 }
1985 }
1986 __ AssertZeroExtended(i.OutputRegister());
1987 } else {
1988 size_t index = 0;
1989 Operand operand = i.MemoryOperand(&index);
1990 if (HasImmediateInput(instr, index)) {
1991 __ movl(operand, i.InputImmediate(index));
1992 } else {
1993 __ movl(operand, i.InputRegister(index));
1994 }
1995 }
1996 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1997 break;
1998 case kX64Movsxlq:
1999 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2000 ASSEMBLE_MOVX(movsxlq);
2001 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2002 break;
2003 case kX64MovqDecompressTaggedSigned: {
2004 CHECK(instr->HasOutput());
2005 __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
2006 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2007 break;
2008 }
2009 case kX64MovqDecompressTaggedPointer: {
2010 CHECK(instr->HasOutput());
2011 __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
2012 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2013 break;
2014 }
2015 case kX64MovqDecompressAnyTagged: {
2016 CHECK(instr->HasOutput());
2017 __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
2018 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2019 break;
2020 }
2021 case kX64MovqCompressTagged: {
2022 CHECK(!instr->HasOutput());
2023 size_t index = 0;
2024 Operand operand = i.MemoryOperand(&index);
2025 if (HasImmediateInput(instr, index)) {
2026 __ StoreTaggedField(operand, i.InputImmediate(index));
2027 } else {
2028 __ StoreTaggedField(operand, i.InputRegister(index));
2029 }
2030 break;
2031 }
2032 case kX64Movq:
2033 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2034 if (instr->HasOutput()) {
2035 __ movq(i.OutputRegister(), i.MemoryOperand());
2036 } else {
2037 size_t index = 0;
2038 Operand operand = i.MemoryOperand(&index);
2039 if (HasImmediateInput(instr, index)) {
2040 __ movq(operand, i.InputImmediate(index));
2041 } else {
2042 __ movq(operand, i.InputRegister(index));
2043 }
2044 }
2045 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2046 break;
2047 case kX64Movss:
2048 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2049 if (instr->HasOutput()) {
2050 __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
2051 } else {
2052 size_t index = 0;
2053 Operand operand = i.MemoryOperand(&index);
2054 __ Movss(operand, i.InputDoubleRegister(index));
2055 }
2056 break;
2057 case kX64Movsd: {
2058 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2059 if (instr->HasOutput()) {
2060 const MemoryAccessMode access_mode =
2061 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
2062 if (access_mode == kMemoryAccessPoisoned) {
2063 // If we have to poison the loaded value, we load into a general
2064 // purpose register first, mask it with the poison, and move the
2065 // value from the general purpose register into the double register.
2066 __ movq(kScratchRegister, i.MemoryOperand());
2067 __ andq(kScratchRegister, kSpeculationPoisonRegister);
2068 __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2069 } else {
2070 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2071 }
2072 } else {
2073 size_t index = 0;
2074 Operand operand = i.MemoryOperand(&index);
2075 __ Movsd(operand, i.InputDoubleRegister(index));
2076 }
2077 break;
2078 }
2079 case kX64Movdqu: {
2080 CpuFeatureScope sse_scope(tasm(), SSSE3);
2081 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2082 if (instr->HasOutput()) {
2083 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2084 } else {
2085 size_t index = 0;
2086 Operand operand = i.MemoryOperand(&index);
2087 __ Movdqu(operand, i.InputSimd128Register(index));
2088 }
2089 break;
2090 }
2091 case kX64BitcastFI:
2092 if (instr->InputAt(0)->IsFPStackSlot()) {
2093 __ movl(i.OutputRegister(), i.InputOperand(0));
2094 } else {
2095 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2096 }
2097 break;
2098 case kX64BitcastDL:
2099 if (instr->InputAt(0)->IsFPStackSlot()) {
2100 __ movq(i.OutputRegister(), i.InputOperand(0));
2101 } else {
2102 __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2103 }
2104 break;
2105 case kX64BitcastIF:
2106 if (HasRegisterInput(instr, 0)) {
2107 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2108 } else {
2109 __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
2110 }
2111 break;
2112 case kX64BitcastLD:
2113 if (HasRegisterInput(instr, 0)) {
2114 __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2115 } else {
2116 __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2117 }
2118 break;
2119 case kX64Lea32: {
2120 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2121 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2122 // and addressing mode just happens to work out. The "addl"/"subl" forms
2123 // in these cases are faster based on measurements.
2124 if (i.InputRegister(0) == i.OutputRegister()) {
2125 if (mode == kMode_MRI) {
2126 int32_t constant_summand = i.InputInt32(1);
2127 DCHECK_NE(0, constant_summand);
2128 if (constant_summand > 0) {
2129 __ addl(i.OutputRegister(), Immediate(constant_summand));
2130 } else {
2131 __ subl(i.OutputRegister(),
2132 Immediate(base::NegateWithWraparound(constant_summand)));
2133 }
2134 } else if (mode == kMode_MR1) {
2135 if (i.InputRegister(1) == i.OutputRegister()) {
2136 __ shll(i.OutputRegister(), Immediate(1));
2137 } else {
2138 __ addl(i.OutputRegister(), i.InputRegister(1));
2139 }
2140 } else if (mode == kMode_M2) {
2141 __ shll(i.OutputRegister(), Immediate(1));
2142 } else if (mode == kMode_M4) {
2143 __ shll(i.OutputRegister(), Immediate(2));
2144 } else if (mode == kMode_M8) {
2145 __ shll(i.OutputRegister(), Immediate(3));
2146 } else {
2147 __ leal(i.OutputRegister(), i.MemoryOperand());
2148 }
2149 } else if (mode == kMode_MR1 &&
2150 i.InputRegister(1) == i.OutputRegister()) {
2151 __ addl(i.OutputRegister(), i.InputRegister(0));
2152 } else {
2153 __ leal(i.OutputRegister(), i.MemoryOperand());
2154 }
2155 __ AssertZeroExtended(i.OutputRegister());
2156 break;
2157 }
2158 case kX64Lea: {
2159 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2160 // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2161 // and addressing mode just happens to work out. The "addq"/"subq" forms
2162 // in these cases are faster based on measurements.
2163 if (i.InputRegister(0) == i.OutputRegister()) {
2164 if (mode == kMode_MRI) {
2165 int32_t constant_summand = i.InputInt32(1);
2166 if (constant_summand > 0) {
2167 __ addq(i.OutputRegister(), Immediate(constant_summand));
2168 } else if (constant_summand < 0) {
2169 __ subq(i.OutputRegister(), Immediate(-constant_summand));
2170 }
2171 } else if (mode == kMode_MR1) {
2172 if (i.InputRegister(1) == i.OutputRegister()) {
2173 __ shlq(i.OutputRegister(), Immediate(1));
2174 } else {
2175 __ addq(i.OutputRegister(), i.InputRegister(1));
2176 }
2177 } else if (mode == kMode_M2) {
2178 __ shlq(i.OutputRegister(), Immediate(1));
2179 } else if (mode == kMode_M4) {
2180 __ shlq(i.OutputRegister(), Immediate(2));
2181 } else if (mode == kMode_M8) {
2182 __ shlq(i.OutputRegister(), Immediate(3));
2183 } else {
2184 __ leaq(i.OutputRegister(), i.MemoryOperand());
2185 }
2186 } else if (mode == kMode_MR1 &&
2187 i.InputRegister(1) == i.OutputRegister()) {
2188 __ addq(i.OutputRegister(), i.InputRegister(0));
2189 } else {
2190 __ leaq(i.OutputRegister(), i.MemoryOperand());
2191 }
2192 break;
2193 }
2194 case kX64Dec32:
2195 __ decl(i.OutputRegister());
2196 break;
2197 case kX64Inc32:
2198 __ incl(i.OutputRegister());
2199 break;
2200 case kX64Push:
2201 if (HasAddressingMode(instr)) {
2202 size_t index = 0;
2203 Operand operand = i.MemoryOperand(&index);
2204 __ pushq(operand);
2205 frame_access_state()->IncreaseSPDelta(1);
2206 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2207 kSystemPointerSize);
2208 } else if (HasImmediateInput(instr, 0)) {
2209 __ pushq(i.InputImmediate(0));
2210 frame_access_state()->IncreaseSPDelta(1);
2211 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2212 kSystemPointerSize);
2213 } else if (HasRegisterInput(instr, 0)) {
2214 __ pushq(i.InputRegister(0));
2215 frame_access_state()->IncreaseSPDelta(1);
2216 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2217 kSystemPointerSize);
2218 } else if (instr->InputAt(0)->IsFloatRegister() ||
2219 instr->InputAt(0)->IsDoubleRegister()) {
2220 // TODO(titzer): use another machine instruction?
2221 __ AllocateStackSpace(kDoubleSize);
2222 frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2223 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2224 kDoubleSize);
2225 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2226 } else if (instr->InputAt(0)->IsSimd128Register()) {
2227 // TODO(titzer): use another machine instruction?
2228 __ AllocateStackSpace(kSimd128Size);
2229 frame_access_state()->IncreaseSPDelta(kSimd128Size /
2230 kSystemPointerSize);
2231 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2232 kSimd128Size);
2233 __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2234 } else if (instr->InputAt(0)->IsStackSlot() ||
2235 instr->InputAt(0)->IsFloatStackSlot() ||
2236 instr->InputAt(0)->IsDoubleStackSlot()) {
2237 __ pushq(i.InputOperand(0));
2238 frame_access_state()->IncreaseSPDelta(1);
2239 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2240 kSystemPointerSize);
2241 } else {
2242 DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2243 __ Movups(kScratchDoubleReg, i.InputOperand(0));
2244 // TODO(titzer): use another machine instruction?
2245 __ AllocateStackSpace(kSimd128Size);
2246 frame_access_state()->IncreaseSPDelta(kSimd128Size /
2247 kSystemPointerSize);
2248 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2249 kSimd128Size);
2250 __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2251 }
2252 break;
2253 case kX64Poke: {
2254 int slot = MiscField::decode(instr->opcode());
2255 if (HasImmediateInput(instr, 0)) {
2256 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2257 } else {
2258 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2259 }
2260 break;
2261 }
2262 case kX64Peek: {
2263 int reverse_slot = i.InputInt32(0);
2264 int offset =
2265 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2266 if (instr->OutputAt(0)->IsFPRegister()) {
2267 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2268 if (op->representation() == MachineRepresentation::kFloat64) {
2269 __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2270 } else {
2271 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2272 __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2273 }
2274 } else {
2275 __ movq(i.OutputRegister(), Operand(rbp, offset));
2276 }
2277 break;
2278 }
2279 case kX64F64x2Splat: {
2280 CpuFeatureScope sse_scope(tasm(), SSE3);
2281 XMMRegister dst = i.OutputSimd128Register();
2282 if (instr->InputAt(0)->IsFPRegister()) {
2283 __ Movddup(dst, i.InputDoubleRegister(0));
2284 } else {
2285 __ Movddup(dst, i.InputOperand(0));
2286 }
2287 break;
2288 }
2289 case kX64F64x2ReplaceLane: {
2290 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2291 if (instr->InputAt(2)->IsFPRegister()) {
2292 __ Movq(kScratchRegister, i.InputDoubleRegister(2));
2293 __ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
2294 } else {
2295 __ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2296 }
2297 break;
2298 }
2299 case kX64F64x2ExtractLane: {
2300 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2301 __ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2302 __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2303 break;
2304 }
2305 case kX64F64x2Sqrt: {
2306 __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2307 break;
2308 }
2309 case kX64F64x2Add: {
2310 ASSEMBLE_SSE_BINOP(Addpd);
2311 break;
2312 }
2313 case kX64F64x2Sub: {
2314 ASSEMBLE_SSE_BINOP(Subpd);
2315 break;
2316 }
2317 case kX64F64x2Mul: {
2318 ASSEMBLE_SSE_BINOP(Mulpd);
2319 break;
2320 }
2321 case kX64F64x2Div: {
2322 ASSEMBLE_SSE_BINOP(Divpd);
2323 break;
2324 }
2325 case kX64F64x2Min: {
2326 XMMRegister src1 = i.InputSimd128Register(1),
2327 dst = i.OutputSimd128Register();
2328 DCHECK_EQ(dst, i.InputSimd128Register(0));
2329 // The minpd instruction doesn't propagate NaNs and +0's in its first
2330 // operand. Perform minpd in both orders, merge the resuls, and adjust.
2331 __ Movapd(kScratchDoubleReg, src1);
2332 __ Minpd(kScratchDoubleReg, dst);
2333 __ Minpd(dst, src1);
2334 // propagate -0's and NaNs, which may be non-canonical.
2335 __ Orpd(kScratchDoubleReg, dst);
2336 // Canonicalize NaNs by quieting and clearing the payload.
2337 __ Cmppd(dst, kScratchDoubleReg, static_cast<int8_t>(3));
2338 __ Orpd(kScratchDoubleReg, dst);
2339 __ Psrlq(dst, 13);
2340 __ Andnpd(dst, kScratchDoubleReg);
2341 break;
2342 }
2343 case kX64F64x2Max: {
2344 XMMRegister src1 = i.InputSimd128Register(1),
2345 dst = i.OutputSimd128Register();
2346 DCHECK_EQ(dst, i.InputSimd128Register(0));
2347 // The maxpd instruction doesn't propagate NaNs and +0's in its first
2348 // operand. Perform maxpd in both orders, merge the resuls, and adjust.
2349 __ Movapd(kScratchDoubleReg, src1);
2350 __ Maxpd(kScratchDoubleReg, dst);
2351 __ Maxpd(dst, src1);
2352 // Find discrepancies.
2353 __ Xorpd(dst, kScratchDoubleReg);
2354 // Propagate NaNs, which may be non-canonical.
2355 __ Orpd(kScratchDoubleReg, dst);
2356 // Propagate sign discrepancy and (subtle) quiet NaNs.
2357 __ Subpd(kScratchDoubleReg, dst);
2358 // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2359 __ Cmppd(dst, kScratchDoubleReg, static_cast<int8_t>(3));
2360 __ Psrlq(dst, 13);
2361 __ Andnpd(dst, kScratchDoubleReg);
2362 break;
2363 }
2364 case kX64F64x2Eq: {
2365 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2366 __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2367 break;
2368 }
2369 case kX64F64x2Ne: {
2370 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2371 __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2372 break;
2373 }
2374 case kX64F64x2Lt: {
2375 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2376 __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2377 break;
2378 }
2379 case kX64F64x2Le: {
2380 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2381 __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2382 break;
2383 }
2384 case kX64F64x2Qfma: {
2385 if (CpuFeatures::IsSupported(FMA3)) {
2386 CpuFeatureScope fma3_scope(tasm(), FMA3);
2387 __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
2388 i.InputSimd128Register(2));
2389 } else {
2390 XMMRegister tmp = i.TempSimd128Register(0);
2391 __ Movapd(tmp, i.InputSimd128Register(2));
2392 __ Mulpd(tmp, i.InputSimd128Register(1));
2393 __ Addpd(i.OutputSimd128Register(), tmp);
2394 }
2395 break;
2396 }
2397 case kX64F64x2Qfms: {
2398 if (CpuFeatures::IsSupported(FMA3)) {
2399 CpuFeatureScope fma3_scope(tasm(), FMA3);
2400 __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
2401 i.InputSimd128Register(2));
2402 } else {
2403 XMMRegister tmp = i.TempSimd128Register(0);
2404 __ Movapd(tmp, i.InputSimd128Register(2));
2405 __ Mulpd(tmp, i.InputSimd128Register(1));
2406 __ Subpd(i.OutputSimd128Register(), tmp);
2407 }
2408 break;
2409 }
2410 // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2411 case kX64F32x4Splat: {
2412 XMMRegister dst = i.OutputSimd128Register();
2413 if (instr->InputAt(0)->IsFPRegister()) {
2414 __ Movss(dst, i.InputDoubleRegister(0));
2415 } else {
2416 __ Movss(dst, i.InputOperand(0));
2417 }
2418 __ Shufps(dst, dst, static_cast<byte>(0x0));
2419 break;
2420 }
2421 case kX64F32x4ExtractLane: {
2422 __ Extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2423 __ Movd(i.OutputDoubleRegister(), kScratchRegister);
2424 break;
2425 }
2426 case kX64F32x4ReplaceLane: {
2427 // The insertps instruction uses imm8[5:4] to indicate the lane
2428 // that needs to be replaced.
2429 byte select = i.InputInt8(1) << 4 & 0x30;
2430 if (instr->InputAt(2)->IsFPRegister()) {
2431 __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2432 select);
2433 } else {
2434 __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2435 }
2436 break;
2437 }
2438 case kX64F32x4SConvertI32x4: {
2439 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2440 break;
2441 }
2442 case kX64F32x4UConvertI32x4: {
2443 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2444 DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2445 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2446 XMMRegister dst = i.OutputSimd128Register();
2447 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2448 __ Pblendw(kScratchDoubleReg, dst,
2449 static_cast<uint8_t>(0x55)); // get lo 16 bits
2450 __ Psubd(dst, kScratchDoubleReg); // get hi 16 bits
2451 __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2452 __ Psrld(dst,
2453 static_cast<byte>(1)); // divide by 2 to get in unsigned range
2454 __ Cvtdq2ps(dst, dst); // convert hi exactly
2455 __ Addps(dst, dst); // double hi, exactly
2456 __ Addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2457 break;
2458 }
2459 case kX64F32x4Abs: {
2460 XMMRegister dst = i.OutputSimd128Register();
2461 XMMRegister src = i.InputSimd128Register(0);
2462 if (dst == src) {
2463 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2464 __ Psrld(kScratchDoubleReg, static_cast<byte>(1));
2465 __ Andps(i.OutputSimd128Register(), kScratchDoubleReg);
2466 } else {
2467 __ Pcmpeqd(dst, dst);
2468 __ Psrld(dst, static_cast<byte>(1));
2469 __ Andps(dst, i.InputSimd128Register(0));
2470 }
2471 break;
2472 }
2473 case kX64F32x4Neg: {
2474 XMMRegister dst = i.OutputSimd128Register();
2475 XMMRegister src = i.InputSimd128Register(0);
2476 if (dst == src) {
2477 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2478 __ Pslld(kScratchDoubleReg, static_cast<byte>(31));
2479 __ Xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2480 } else {
2481 __ Pcmpeqd(dst, dst);
2482 __ Pslld(dst, static_cast<byte>(31));
2483 __ Xorps(dst, i.InputSimd128Register(0));
2484 }
2485 break;
2486 }
2487 case kX64F32x4Sqrt: {
2488 __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2489 break;
2490 }
2491 case kX64F32x4RecipApprox: {
2492 __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2493 break;
2494 }
2495 case kX64F32x4RecipSqrtApprox: {
2496 __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2497 break;
2498 }
2499 case kX64F32x4Add: {
2500 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2501 __ Addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2502 break;
2503 }
2504 case kX64F32x4AddHoriz: {
2505 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2506 CpuFeatureScope sse_scope(tasm(), SSE3);
2507 __ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2508 break;
2509 }
2510 case kX64F32x4Sub: {
2511 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2512 __ Subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2513 break;
2514 }
2515 case kX64F32x4Mul: {
2516 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2517 __ Mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2518 break;
2519 }
2520 case kX64F32x4Div: {
2521 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2522 __ Divps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2523 break;
2524 }
2525 case kX64F32x4Min: {
2526 XMMRegister src1 = i.InputSimd128Register(1),
2527 dst = i.OutputSimd128Register();
2528 DCHECK_EQ(dst, i.InputSimd128Register(0));
2529 // The minps instruction doesn't propagate NaNs and +0's in its first
2530 // operand. Perform minps in both orders, merge the resuls, and adjust.
2531 __ Movaps(kScratchDoubleReg, src1);
2532 __ Minps(kScratchDoubleReg, dst);
2533 __ Minps(dst, src1);
2534 // propagate -0's and NaNs, which may be non-canonical.
2535 __ Orps(kScratchDoubleReg, dst);
2536 // Canonicalize NaNs by quieting and clearing the payload.
2537 __ Cmpps(dst, kScratchDoubleReg, static_cast<int8_t>(3));
2538 __ Orps(kScratchDoubleReg, dst);
2539 __ Psrld(dst, static_cast<byte>(10));
2540 __ Andnps(dst, kScratchDoubleReg);
2541 break;
2542 }
2543 case kX64F32x4Max: {
2544 XMMRegister src1 = i.InputSimd128Register(1),
2545 dst = i.OutputSimd128Register();
2546 DCHECK_EQ(dst, i.InputSimd128Register(0));
2547 // The maxps instruction doesn't propagate NaNs and +0's in its first
2548 // operand. Perform maxps in both orders, merge the resuls, and adjust.
2549 __ Movaps(kScratchDoubleReg, src1);
2550 __ Maxps(kScratchDoubleReg, dst);
2551 __ Maxps(dst, src1);
2552 // Find discrepancies.
2553 __ Xorps(dst, kScratchDoubleReg);
2554 // Propagate NaNs, which may be non-canonical.
2555 __ Orps(kScratchDoubleReg, dst);
2556 // Propagate sign discrepancy and (subtle) quiet NaNs.
2557 __ Subps(kScratchDoubleReg, dst);
2558 // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2559 __ Cmpps(dst, kScratchDoubleReg, static_cast<int8_t>(3));
2560 __ Psrld(dst, static_cast<byte>(10));
2561 __ Andnps(dst, kScratchDoubleReg);
2562 break;
2563 }
2564 case kX64F32x4Eq: {
2565 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2566 __ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2567 static_cast<int8_t>(0x0));
2568 break;
2569 }
2570 case kX64F32x4Ne: {
2571 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2572 __ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2573 static_cast<int8_t>(0x4));
2574 break;
2575 }
2576 case kX64F32x4Lt: {
2577 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2578 __ Cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2579 break;
2580 }
2581 case kX64F32x4Le: {
2582 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2583 __ Cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2584 break;
2585 }
2586 case kX64F32x4Qfma: {
2587 if (CpuFeatures::IsSupported(FMA3)) {
2588 CpuFeatureScope fma3_scope(tasm(), FMA3);
2589 __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2590 i.InputSimd128Register(2));
2591 } else {
2592 XMMRegister tmp = i.TempSimd128Register(0);
2593 __ Movaps(tmp, i.InputSimd128Register(2));
2594 __ Mulps(tmp, i.InputSimd128Register(1));
2595 __ Addps(i.OutputSimd128Register(), tmp);
2596 }
2597 break;
2598 }
2599 case kX64F32x4Qfms: {
2600 if (CpuFeatures::IsSupported(FMA3)) {
2601 CpuFeatureScope fma3_scope(tasm(), FMA3);
2602 __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2603 i.InputSimd128Register(2));
2604 } else {
2605 XMMRegister tmp = i.TempSimd128Register(0);
2606 __ Movaps(tmp, i.InputSimd128Register(2));
2607 __ Mulps(tmp, i.InputSimd128Register(1));
2608 __ Subps(i.OutputSimd128Register(), tmp);
2609 }
2610 break;
2611 }
2612 case kX64I64x2Splat: {
2613 CpuFeatureScope sse_scope(tasm(), SSE3);
2614 XMMRegister dst = i.OutputSimd128Register();
2615 if (HasRegisterInput(instr, 0)) {
2616 __ Movq(dst, i.InputRegister(0));
2617 } else {
2618 __ Movq(dst, i.InputOperand(0));
2619 }
2620 __ Movddup(dst, dst);
2621 break;
2622 }
2623 case kX64I64x2ExtractLane: {
2624 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2625 __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2626 break;
2627 }
2628 case kX64I64x2ReplaceLane: {
2629 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2630 if (HasRegisterInput(instr, 2)) {
2631 __ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
2632 i.InputInt8(1));
2633 } else {
2634 __ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2635 }
2636 break;
2637 }
2638 case kX64I64x2Neg: {
2639 XMMRegister dst = i.OutputSimd128Register();
2640 XMMRegister src = i.InputSimd128Register(0);
2641 if (dst == src) {
2642 __ Movapd(kScratchDoubleReg, src);
2643 src = kScratchDoubleReg;
2644 }
2645 __ Pxor(dst, dst);
2646 __ Psubq(dst, src);
2647 break;
2648 }
2649 case kX64I64x2Shl: {
2650 // Take shift value modulo 2^6.
2651 ASSEMBLE_SIMD_SHIFT(Psllq, 6);
2652 break;
2653 }
2654 case kX64I64x2ShrS: {
2655 // TODO(zhin): there is vpsraq but requires AVX512
2656 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2657 // ShrS on each quadword one at a time
2658 XMMRegister dst = i.OutputSimd128Register();
2659 XMMRegister src = i.InputSimd128Register(0);
2660 Register tmp = i.ToRegister(instr->TempAt(0));
2661 // Modulo 64 not required as sarq_cl will mask cl to 6 bits.
2662
2663 // lower quadword
2664 __ Pextrq(tmp, src, static_cast<int8_t>(0x0));
2665 __ sarq_cl(tmp);
2666 __ Pinsrq(dst, tmp, static_cast<int8_t>(0x0));
2667
2668 // upper quadword
2669 __ Pextrq(tmp, src, static_cast<int8_t>(0x1));
2670 __ sarq_cl(tmp);
2671 __ Pinsrq(dst, tmp, static_cast<int8_t>(0x1));
2672 break;
2673 }
2674 case kX64I64x2Add: {
2675 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2676 __ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2677 break;
2678 }
2679 case kX64I64x2Sub: {
2680 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2681 __ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2682 break;
2683 }
2684 case kX64I64x2Mul: {
2685 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2686 XMMRegister left = i.InputSimd128Register(0);
2687 XMMRegister right = i.InputSimd128Register(1);
2688 XMMRegister tmp1 = i.TempSimd128Register(0);
2689 XMMRegister tmp2 = i.TempSimd128Register(1);
2690
2691 __ Movaps(tmp1, left);
2692 __ Movaps(tmp2, right);
2693
2694 // Multiply high dword of each qword of left with right.
2695 __ Psrlq(tmp1, 32);
2696 __ Pmuludq(tmp1, right);
2697
2698 // Multiply high dword of each qword of right with left.
2699 __ Psrlq(tmp2, 32);
2700 __ Pmuludq(tmp2, left);
2701
2702 __ Paddq(tmp2, tmp1);
2703 __ Psllq(tmp2, 32);
2704
2705 __ Pmuludq(left, right);
2706 __ Paddq(left, tmp2); // left == dst
2707 break;
2708 }
2709 case kX64I64x2MinS: {
2710 if (CpuFeatures::IsSupported(SSE4_2)) {
2711 CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
2712 XMMRegister dst = i.OutputSimd128Register();
2713 XMMRegister src0 = i.InputSimd128Register(0);
2714 XMMRegister src1 = i.InputSimd128Register(1);
2715 XMMRegister tmp = i.TempSimd128Register(0);
2716 DCHECK_EQ(tmp, xmm0);
2717
2718 __ movaps(tmp, src1);
2719 __ pcmpgtq(tmp, src0);
2720 __ movaps(dst, src1);
2721 __ blendvpd(dst, src0); // implicit use of xmm0 as mask
2722 } else {
2723 CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
2724 XMMRegister dst = i.OutputSimd128Register();
2725 XMMRegister src = i.InputSimd128Register(1);
2726 XMMRegister tmp = i.TempSimd128Register(0);
2727 Register tmp1 = i.TempRegister(1);
2728 Register tmp2 = i.TempRegister(2);
2729 DCHECK_EQ(dst, i.InputSimd128Register(0));
2730 // backup src since we cannot change it
2731 __ movaps(tmp, src);
2732
2733 // compare the lower quardwords
2734 __ movq(tmp1, dst);
2735 __ movq(tmp2, tmp);
2736 __ cmpq(tmp1, tmp2);
2737 // tmp2 now has the min of lower quadwords
2738 __ cmovq(less_equal, tmp2, tmp1);
2739 // tmp1 now has the higher quadword
2740 // must do this before movq, movq clears top quadword
2741 __ pextrq(tmp1, dst, 1);
2742 // save tmp2 into dst
2743 __ movq(dst, tmp2);
2744 // tmp2 now has the higher quadword
2745 __ pextrq(tmp2, tmp, 1);
2746 // compare higher quadwords
2747 __ cmpq(tmp1, tmp2);
2748 // tmp2 now has the min of higher quadwords
2749 __ cmovq(less_equal, tmp2, tmp1);
2750 __ movq(tmp, tmp2);
2751 // dst = [tmp[0], dst[0]]
2752 __ punpcklqdq(dst, tmp);
2753 }
2754 break;
2755 }
2756 case kX64I64x2MaxS: {
2757 CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
2758 XMMRegister dst = i.OutputSimd128Register();
2759 XMMRegister src = i.InputSimd128Register(1);
2760 XMMRegister tmp = i.TempSimd128Register(0);
2761 DCHECK_EQ(dst, i.InputSimd128Register(0));
2762 DCHECK_EQ(tmp, xmm0);
2763
2764 __ movaps(tmp, src);
2765 __ pcmpgtq(tmp, dst);
2766 __ blendvpd(dst, src); // implicit use of xmm0 as mask
2767 break;
2768 }
2769 case kX64I64x2Eq: {
2770 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2771 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2772 break;
2773 }
2774 case kX64I64x2Ne: {
2775 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2776 XMMRegister tmp = i.TempSimd128Register(0);
2777 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2778 __ Pcmpeqq(tmp, tmp);
2779 __ Pxor(i.OutputSimd128Register(), tmp);
2780 break;
2781 }
2782 case kX64I64x2GtS: {
2783 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2784 __ Pcmpgtq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2785 break;
2786 }
2787 case kX64I64x2GeS: {
2788 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2789 XMMRegister dst = i.OutputSimd128Register();
2790 XMMRegister src = i.InputSimd128Register(1);
2791 XMMRegister tmp = i.TempSimd128Register(0);
2792
2793 __ Movaps(tmp, src);
2794 __ Pcmpgtq(tmp, dst);
2795 __ Pcmpeqd(dst, dst);
2796 __ Pxor(dst, tmp);
2797 break;
2798 }
2799 case kX64I64x2ShrU: {
2800 // Take shift value modulo 2^6.
2801 ASSEMBLE_SIMD_SHIFT(Psrlq, 6);
2802 break;
2803 }
2804 case kX64I64x2MinU: {
2805 CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
2806 CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
2807 XMMRegister dst = i.OutputSimd128Register();
2808 XMMRegister src0 = i.InputSimd128Register(0);
2809 XMMRegister src1 = i.InputSimd128Register(1);
2810 XMMRegister tmp0 = i.TempSimd128Register(0);
2811 XMMRegister tmp1 = i.TempSimd128Register(1);
2812 DCHECK_EQ(tmp1, xmm0);
2813
2814 __ movaps(dst, src1);
2815 __ movaps(tmp0, src0);
2816
2817 __ pcmpeqd(tmp1, tmp1);
2818 __ psllq(tmp1, 63);
2819
2820 __ pxor(tmp0, tmp1);
2821 __ pxor(tmp1, dst);
2822
2823 __ pcmpgtq(tmp1, tmp0);
2824 __ blendvpd(dst, src0); // implicit use of xmm0 as mask
2825 break;
2826 }
2827 case kX64I64x2MaxU: {
2828 CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
2829 CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
2830 XMMRegister dst = i.OutputSimd128Register();
2831 XMMRegister src = i.InputSimd128Register(1);
2832 XMMRegister dst_tmp = i.TempSimd128Register(0);
2833 XMMRegister tmp = i.TempSimd128Register(1);
2834 DCHECK_EQ(dst, i.InputSimd128Register(0));
2835 DCHECK_EQ(tmp, xmm0);
2836
2837 __ movaps(dst_tmp, dst);
2838
2839 __ pcmpeqd(tmp, tmp);
2840 __ psllq(tmp, 63);
2841
2842 __ pxor(dst_tmp, tmp);
2843 __ pxor(tmp, src);
2844
2845 __ pcmpgtq(tmp, dst_tmp);
2846 __ blendvpd(dst, src); // implicit use of xmm0 as mask
2847 break;
2848 }
2849 case kX64I64x2GtU: {
2850 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2851 XMMRegister dst = i.OutputSimd128Register();
2852 XMMRegister src = i.InputSimd128Register(1);
2853 XMMRegister tmp = i.TempSimd128Register(0);
2854
2855 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2856 __ Psllq(kScratchDoubleReg, 63);
2857
2858 __ Movaps(tmp, src);
2859 __ Pxor(tmp, kScratchDoubleReg);
2860 __ Pxor(dst, kScratchDoubleReg);
2861 __ Pcmpgtq(dst, tmp);
2862 break;
2863 }
2864 case kX64I64x2GeU: {
2865 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2866 CpuFeatureScope sse_scope(tasm(), SSE4_2);
2867 XMMRegister dst = i.OutputSimd128Register();
2868 XMMRegister src = i.InputSimd128Register(1);
2869 XMMRegister tmp = i.TempSimd128Register(0);
2870
2871 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2872 __ Psllq(kScratchDoubleReg, 63);
2873
2874 __ Movaps(tmp, src);
2875 __ Pxor(dst, kScratchDoubleReg);
2876 __ Pxor(tmp, kScratchDoubleReg);
2877 __ Pcmpgtq(tmp, dst);
2878 __ Pcmpeqd(dst, dst);
2879 __ Pxor(dst, tmp);
2880 break;
2881 }
2882 case kX64I32x4Splat: {
2883 XMMRegister dst = i.OutputSimd128Register();
2884 if (HasRegisterInput(instr, 0)) {
2885 __ Movd(dst, i.InputRegister(0));
2886 } else {
2887 __ Movd(dst, i.InputOperand(0));
2888 }
2889 __ Pshufd(dst, dst, static_cast<uint8_t>(0x0));
2890 break;
2891 }
2892 case kX64I32x4ExtractLane: {
2893 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2894 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2895 break;
2896 }
2897 case kX64I32x4ReplaceLane: {
2898 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2899 if (HasRegisterInput(instr, 2)) {
2900 __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2901 i.InputInt8(1));
2902 } else {
2903 __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2904 }
2905 break;
2906 }
2907 case kX64I32x4SConvertF32x4: {
2908 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2909 XMMRegister dst = i.OutputSimd128Register();
2910 XMMRegister tmp = i.TempSimd128Register(0);
2911 // NAN->0
2912 __ Movaps(tmp, dst);
2913 __ Cmpeqps(tmp, tmp);
2914 __ Pand(dst, tmp);
2915 // Set top bit if >= 0 (but not -0.0!)
2916 __ Pxor(tmp, dst);
2917 // Convert
2918 __ Cvttps2dq(dst, dst);
2919 // Set top bit if >=0 is now < 0
2920 __ Pand(tmp, dst);
2921 __ Psrad(tmp, static_cast<byte>(31));
2922 // Set positive overflow lanes to 0x7FFFFFFF
2923 __ Pxor(dst, tmp);
2924 break;
2925 }
2926 case kX64I32x4SConvertI16x8Low: {
2927 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2928 __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2929 break;
2930 }
2931 case kX64I32x4SConvertI16x8High: {
2932 XMMRegister dst = i.OutputSimd128Register();
2933 __ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
2934 __ Pmovsxwd(dst, dst);
2935 break;
2936 }
2937 case kX64I32x4Neg: {
2938 CpuFeatureScope sse_scope(tasm(), SSSE3);
2939 XMMRegister dst = i.OutputSimd128Register();
2940 XMMRegister src = i.InputSimd128Register(0);
2941 if (dst == src) {
2942 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2943 __ Psignd(dst, kScratchDoubleReg);
2944 } else {
2945 __ Pxor(dst, dst);
2946 __ Psubd(dst, src);
2947 }
2948 break;
2949 }
2950 case kX64I32x4Shl: {
2951 // Take shift value modulo 2^5.
2952 ASSEMBLE_SIMD_SHIFT(Pslld, 5);
2953 break;
2954 }
2955 case kX64I32x4ShrS: {
2956 // Take shift value modulo 2^5.
2957 ASSEMBLE_SIMD_SHIFT(Psrad, 5);
2958 break;
2959 }
2960 case kX64I32x4Add: {
2961 __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2962 break;
2963 }
2964 case kX64I32x4AddHoriz: {
2965 CpuFeatureScope sse_scope(tasm(), SSSE3);
2966 __ Phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2967 break;
2968 }
2969 case kX64I32x4Sub: {
2970 __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2971 break;
2972 }
2973 case kX64I32x4Mul: {
2974 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2975 __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2976 break;
2977 }
2978 case kX64I32x4MinS: {
2979 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2980 __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2981 break;
2982 }
2983 case kX64I32x4MaxS: {
2984 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2985 __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2986 break;
2987 }
2988 case kX64I32x4Eq: {
2989 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2990 break;
2991 }
2992 case kX64I32x4Ne: {
2993 XMMRegister tmp = i.TempSimd128Register(0);
2994 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2995 __ Pcmpeqd(tmp, tmp);
2996 __ Pxor(i.OutputSimd128Register(), tmp);
2997 break;
2998 }
2999 case kX64I32x4GtS: {
3000 __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
3001 break;
3002 }
3003 case kX64I32x4GeS: {
3004 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3005 XMMRegister dst = i.OutputSimd128Register();
3006 XMMRegister src = i.InputSimd128Register(1);
3007 __ Pminsd(dst, src);
3008 __ Pcmpeqd(dst, src);
3009 break;
3010 }
3011 case kX64I32x4UConvertF32x4: {
3012 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3013 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3014 XMMRegister dst = i.OutputSimd128Register();
3015 XMMRegister tmp = i.TempSimd128Register(0);
3016 XMMRegister tmp2 = i.TempSimd128Register(1);
3017 // NAN->0, negative->0
3018 __ Pxor(tmp2, tmp2);
3019 __ Maxps(dst, tmp2);
3020 // scratch: float representation of max_signed
3021 __ Pcmpeqd(tmp2, tmp2);
3022 __ Psrld(tmp2, static_cast<uint8_t>(1)); // 0x7fffffff
3023 __ Cvtdq2ps(tmp2, tmp2); // 0x4f000000
3024 // tmp: convert (src-max_signed).
3025 // Positive overflow lanes -> 0x7FFFFFFF
3026 // Negative lanes -> 0
3027 __ Movaps(tmp, dst);
3028 __ Subps(tmp, tmp2);
3029 __ Cmpleps(tmp2, tmp);
3030 __ Cvttps2dq(tmp, tmp);
3031 __ Pxor(tmp, tmp2);
3032 __ Pxor(tmp2, tmp2);
3033 __ Pmaxsd(tmp, tmp2);
3034 // convert. Overflow lanes above max_signed will be 0x80000000
3035 __ Cvttps2dq(dst, dst);
3036 // Add (src-max_signed) for overflow lanes.
3037 __ Paddd(dst, tmp);
3038 break;
3039 }
3040 case kX64I32x4UConvertI16x8Low: {
3041 __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3042 break;
3043 }
3044 case kX64I32x4UConvertI16x8High: {
3045 XMMRegister dst = i.OutputSimd128Register();
3046 __ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
3047 __ Pmovzxwd(dst, dst);
3048 break;
3049 }
3050 case kX64I32x4ShrU: {
3051 // Take shift value modulo 2^5.
3052 ASSEMBLE_SIMD_SHIFT(Psrld, 5);
3053 break;
3054 }
3055 case kX64I32x4MinU: {
3056 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3057 __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
3058 break;
3059 }
3060 case kX64I32x4MaxU: {
3061 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3062 __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
3063 break;
3064 }
3065 case kX64I32x4GtU: {
3066 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3067 XMMRegister dst = i.OutputSimd128Register();
3068 XMMRegister src = i.InputSimd128Register(1);
3069 XMMRegister tmp = i.TempSimd128Register(0);
3070 __ Pmaxud(dst, src);
3071 __ Pcmpeqd(dst, src);
3072 __ Pcmpeqd(tmp, tmp);
3073 __ Pxor(dst, tmp);
3074 break;
3075 }
3076 case kX64I32x4GeU: {
3077 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3078 XMMRegister dst = i.OutputSimd128Register();
3079 XMMRegister src = i.InputSimd128Register(1);
3080 __ Pminud(dst, src);
3081 __ Pcmpeqd(dst, src);
3082 break;
3083 }
3084 case kX64I32x4Abs: {
3085 __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3086 break;
3087 }
3088 case kX64S128Zero: {
3089 XMMRegister dst = i.OutputSimd128Register();
3090 __ Xorps(dst, dst);
3091 break;
3092 }
3093 case kX64I16x8Splat: {
3094 XMMRegister dst = i.OutputSimd128Register();
3095 if (HasRegisterInput(instr, 0)) {
3096 __ Movd(dst, i.InputRegister(0));
3097 } else {
3098 __ Movd(dst, i.InputOperand(0));
3099 }
3100 __ Pshuflw(dst, dst, static_cast<uint8_t>(0x0));
3101 __ Pshufd(dst, dst, static_cast<uint8_t>(0x0));
3102 break;
3103 }
3104 case kX64I16x8ExtractLaneU: {
3105 Register dst = i.OutputRegister();
3106 __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
3107 break;
3108 }
3109 case kX64I16x8ExtractLaneS: {
3110 Register dst = i.OutputRegister();
3111 __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
3112 __ movsxwl(dst, dst);
3113 break;
3114 }
3115 case kX64I16x8ReplaceLane: {
3116 if (HasRegisterInput(instr, 2)) {
3117 __ Pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
3118 i.InputInt8(1));
3119 } else {
3120 __ Pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
3121 }
3122 break;
3123 }
3124 case kX64I16x8SConvertI8x16Low: {
3125 __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3126 break;
3127 }
3128 case kX64I16x8SConvertI8x16High: {
3129 XMMRegister dst = i.OutputSimd128Register();
3130 __ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
3131 __ Pmovsxbw(dst, dst);
3132 break;
3133 }
3134 case kX64I16x8Neg: {
3135 XMMRegister dst = i.OutputSimd128Register();
3136 XMMRegister src = i.InputSimd128Register(0);
3137 if (dst == src) {
3138 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3139 __ Psignw(dst, kScratchDoubleReg);
3140 } else {
3141 __ Pxor(dst, dst);
3142 __ Psubw(dst, src);
3143 }
3144 break;
3145 }
3146 case kX64I16x8Shl: {
3147 // Take shift value modulo 2^4.
3148 ASSEMBLE_SIMD_SHIFT(Psllw, 4);
3149 break;
3150 }
3151 case kX64I16x8ShrS: {
3152 // Take shift value modulo 2^4.
3153 ASSEMBLE_SIMD_SHIFT(Psraw, 4);
3154 break;
3155 }
3156 case kX64I16x8SConvertI32x4: {
3157 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3158 __ Packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3159 break;
3160 }
3161 case kX64I16x8Add: {
3162 __ Paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3163 break;
3164 }
3165 case kX64I16x8AddSaturateS: {
3166 __ Paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3167 break;
3168 }
3169 case kX64I16x8AddHoriz: {
3170 __ Phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3171 break;
3172 }
3173 case kX64I16x8Sub: {
3174 __ Psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3175 break;
3176 }
3177 case kX64I16x8SubSaturateS: {
3178 __ Psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3179 break;
3180 }
3181 case kX64I16x8Mul: {
3182 __ Pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3183 break;
3184 }
3185 case kX64I16x8MinS: {
3186 __ Pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3187 break;
3188 }
3189 case kX64I16x8MaxS: {
3190 __ Pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3191 break;
3192 }
3193 case kX64I16x8Eq: {
3194 __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3195 break;
3196 }
3197 case kX64I16x8Ne: {
3198 XMMRegister tmp = i.TempSimd128Register(0);
3199 __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3200 __ Pcmpeqw(tmp, tmp);
3201 __ Pxor(i.OutputSimd128Register(), tmp);
3202 break;
3203 }
3204 case kX64I16x8GtS: {
3205 __ Pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3206 break;
3207 }
3208 case kX64I16x8GeS: {
3209 XMMRegister dst = i.OutputSimd128Register();
3210 XMMRegister src = i.InputSimd128Register(1);
3211 __ Pminsw(dst, src);
3212 __ Pcmpeqw(dst, src);
3213 break;
3214 }
3215 case kX64I16x8UConvertI8x16Low: {
3216 __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3217 break;
3218 }
3219 case kX64I16x8UConvertI8x16High: {
3220 XMMRegister dst = i.OutputSimd128Register();
3221 __ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
3222 __ Pmovzxbw(dst, dst);
3223 break;
3224 }
3225 case kX64I16x8ShrU: {
3226 // Take shift value modulo 2^4.
3227 ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
3228 break;
3229 }
3230 case kX64I16x8UConvertI32x4: {
3231 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3232 __ Packusdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3233 break;
3234 }
3235 case kX64I16x8AddSaturateU: {
3236 __ Paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3237 break;
3238 }
3239 case kX64I16x8SubSaturateU: {
3240 __ Psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3241 break;
3242 }
3243 case kX64I16x8MinU: {
3244 __ Pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3245 break;
3246 }
3247 case kX64I16x8MaxU: {
3248 __ Pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3249 break;
3250 }
3251 case kX64I16x8GtU: {
3252 XMMRegister dst = i.OutputSimd128Register();
3253 XMMRegister src = i.InputSimd128Register(1);
3254 XMMRegister tmp = i.TempSimd128Register(0);
3255 __ Pmaxuw(dst, src);
3256 __ Pcmpeqw(dst, src);
3257 __ Pcmpeqw(tmp, tmp);
3258 __ Pxor(dst, tmp);
3259 break;
3260 }
3261 case kX64I16x8GeU: {
3262 XMMRegister dst = i.OutputSimd128Register();
3263 XMMRegister src = i.InputSimd128Register(1);
3264 __ Pminuw(dst, src);
3265 __ Pcmpeqw(dst, src);
3266 break;
3267 }
3268 case kX64I16x8RoundingAverageU: {
3269 __ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3270 break;
3271 }
3272 case kX64I16x8Abs: {
3273 __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3274 break;
3275 }
3276 case kX64I8x16Splat: {
3277 XMMRegister dst = i.OutputSimd128Register();
3278 if (HasRegisterInput(instr, 0)) {
3279 __ Movd(dst, i.InputRegister(0));
3280 } else {
3281 __ Movd(dst, i.InputOperand(0));
3282 }
3283 __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
3284 __ Pshufb(dst, kScratchDoubleReg);
3285 break;
3286 }
3287 case kX64I8x16ExtractLaneU: {
3288 Register dst = i.OutputRegister();
3289 __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
3290 break;
3291 }
3292 case kX64I8x16ExtractLaneS: {
3293 Register dst = i.OutputRegister();
3294 __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
3295 __ movsxbl(dst, dst);
3296 break;
3297 }
3298 case kX64I8x16ReplaceLane: {
3299 if (HasRegisterInput(instr, 2)) {
3300 __ Pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
3301 i.InputInt8(1));
3302 } else {
3303 __ Pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
3304 }
3305 break;
3306 }
3307 case kX64I8x16SConvertI16x8: {
3308 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3309 __ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3310 break;
3311 }
3312 case kX64I8x16Neg: {
3313 XMMRegister dst = i.OutputSimd128Register();
3314 XMMRegister src = i.InputSimd128Register(0);
3315 if (dst == src) {
3316 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3317 __ Psignb(dst, kScratchDoubleReg);
3318 } else {
3319 __ Pxor(dst, dst);
3320 __ Psubb(dst, src);
3321 }
3322 break;
3323 }
3324 case kX64I8x16Shl: {
3325 XMMRegister dst = i.OutputSimd128Register();
3326 DCHECK_EQ(dst, i.InputSimd128Register(0));
3327 // Temp registers for shift mask and additional moves to XMM registers.
3328 Register tmp = i.ToRegister(instr->TempAt(0));
3329 XMMRegister tmp_simd = i.TempSimd128Register(1);
3330 if (HasImmediateInput(instr, 1)) {
3331 // Perform 16-bit shift, then mask away low bits.
3332 uint8_t shift = i.InputInt3(1);
3333 __ Psllw(dst, static_cast<byte>(shift));
3334
3335 uint8_t bmask = static_cast<uint8_t>(0xff << shift);
3336 uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3337 __ movl(tmp, Immediate(mask));
3338 __ Movd(tmp_simd, tmp);
3339 __ Pshufd(tmp_simd, tmp_simd, static_cast<uint8_t>(0));
3340 __ Pand(dst, tmp_simd);
3341 } else {
3342 Register shift = i.InputRegister(1);
3343 // Mask off the unwanted bits before word-shifting.
3344 __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3345 // Take shift value modulo 8.
3346 __ andq(shift, Immediate(7));
3347 __ movq(tmp, shift);
3348 __ addq(tmp, Immediate(8));
3349 __ Movq(tmp_simd, tmp);
3350 __ Psrlw(kScratchDoubleReg, tmp_simd);
3351 __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
3352 __ Pand(dst, kScratchDoubleReg);
3353 __ Movq(tmp_simd, shift);
3354 __ Psllw(dst, tmp_simd);
3355 }
3356 break;
3357 }
3358 case kX64I8x16ShrS: {
3359 XMMRegister dst = i.OutputSimd128Register();
3360 DCHECK_EQ(dst, i.InputSimd128Register(0));
3361 if (HasImmediateInput(instr, 1)) {
3362 __ Punpckhbw(kScratchDoubleReg, dst);
3363 __ Punpcklbw(dst, dst);
3364 uint8_t shift = i.InputInt3(1) + 8;
3365 __ Psraw(kScratchDoubleReg, shift);
3366 __ Psraw(dst, shift);
3367 __ Packsswb(dst, kScratchDoubleReg);
3368 } else {
3369 // Temp registers for shift mask andadditional moves to XMM registers.
3370 Register tmp = i.ToRegister(instr->TempAt(0));
3371 XMMRegister tmp_simd = i.TempSimd128Register(1);
3372 // Unpack the bytes into words, do arithmetic shifts, and repack.
3373 __ Punpckhbw(kScratchDoubleReg, dst);
3374 __ Punpcklbw(dst, dst);
3375 // Prepare shift value
3376 __ movq(tmp, i.InputRegister(1));
3377 // Take shift value modulo 8.
3378 __ andq(tmp, Immediate(7));
3379 __ addq(tmp, Immediate(8));
3380 __ Movq(tmp_simd, tmp);
3381 __ Psraw(kScratchDoubleReg, tmp_simd);
3382 __ Psraw(dst, tmp_simd);
3383 __ Packsswb(dst, kScratchDoubleReg);
3384 }
3385 break;
3386 }
3387 case kX64I8x16Add: {
3388 __ Paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3389 break;
3390 }
3391 case kX64I8x16AddSaturateS: {
3392 __ Paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3393 break;
3394 }
3395 case kX64I8x16Sub: {
3396 __ Psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3397 break;
3398 }
3399 case kX64I8x16SubSaturateS: {
3400 __ Psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3401 break;
3402 }
3403 case kX64I8x16Mul: {
3404 XMMRegister dst = i.OutputSimd128Register();
3405 DCHECK_EQ(dst, i.InputSimd128Register(0));
3406 XMMRegister right = i.InputSimd128Register(1);
3407 XMMRegister tmp = i.TempSimd128Register(0);
3408 // I16x8 view of I8x16
3409 // left = AAaa AAaa ... AAaa AAaa
3410 // right= BBbb BBbb ... BBbb BBbb
3411 // t = 00AA 00AA ... 00AA 00AA
3412 // s = 00BB 00BB ... 00BB 00BB
3413 __ Movaps(tmp, dst);
3414 __ Movaps(kScratchDoubleReg, right);
3415 __ Psrlw(tmp, static_cast<byte>(8));
3416 __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3417 // dst = left * 256
3418 __ Psllw(dst, static_cast<byte>(8));
3419 // t = I16x8Mul(t, s)
3420 // => __PP __PP ... __PP __PP
3421 __ Pmullw(tmp, kScratchDoubleReg);
3422 // dst = I16x8Mul(left * 256, right)
3423 // => pp__ pp__ ... pp__ pp__
3424 __ Pmullw(dst, right);
3425 // t = I16x8Shl(t, 8)
3426 // => PP00 PP00 ... PP00 PP00
3427 __ Psllw(tmp, static_cast<byte>(8));
3428 // dst = I16x8Shr(dst, 8)
3429 // => 00pp 00pp ... 00pp 00pp
3430 __ Psrlw(dst, static_cast<byte>(8));
3431 // dst = I16x8Or(dst, t)
3432 // => PPpp PPpp ... PPpp PPpp
3433 __ Por(dst, tmp);
3434 break;
3435 }
3436 case kX64I8x16MinS: {
3437 __ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3438 break;
3439 }
3440 case kX64I8x16MaxS: {
3441 __ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3442 break;
3443 }
3444 case kX64I8x16Eq: {
3445 __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3446 break;
3447 }
3448 case kX64I8x16Ne: {
3449 XMMRegister tmp = i.TempSimd128Register(0);
3450 __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3451 __ Pcmpeqb(tmp, tmp);
3452 __ Pxor(i.OutputSimd128Register(), tmp);
3453 break;
3454 }
3455 case kX64I8x16GtS: {
3456 __ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3457 break;
3458 }
3459 case kX64I8x16GeS: {
3460 XMMRegister dst = i.OutputSimd128Register();
3461 XMMRegister src = i.InputSimd128Register(1);
3462 __ Pminsb(dst, src);
3463 __ Pcmpeqb(dst, src);
3464 break;
3465 }
3466 case kX64I8x16UConvertI16x8: {
3467 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3468 __ Packuswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3469 break;
3470 }
3471 case kX64I8x16ShrU: {
3472 XMMRegister dst = i.OutputSimd128Register();
3473 // Unpack the bytes into words, do logical shifts, and repack.
3474 DCHECK_EQ(dst, i.InputSimd128Register(0));
3475 // Temp registers for shift mask andadditional moves to XMM registers.
3476 Register tmp = i.ToRegister(instr->TempAt(0));
3477 XMMRegister tmp_simd = i.TempSimd128Register(1);
3478 if (HasImmediateInput(instr, 1)) {
3479 // Perform 16-bit shift, then mask away high bits.
3480 uint8_t shift = i.InputInt3(1);
3481 __ Psrlw(dst, static_cast<byte>(shift));
3482
3483 uint8_t bmask = 0xff >> shift;
3484 uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3485 __ movl(tmp, Immediate(mask));
3486 __ Movd(tmp_simd, tmp);
3487 __ Pshufd(tmp_simd, tmp_simd, static_cast<byte>(0));
3488 __ Pand(dst, tmp_simd);
3489 } else {
3490 __ Punpckhbw(kScratchDoubleReg, dst);
3491 __ Punpcklbw(dst, dst);
3492 // Prepare shift value
3493 __ movq(tmp, i.InputRegister(1));
3494 // Take shift value modulo 8.
3495 __ andq(tmp, Immediate(7));
3496 __ addq(tmp, Immediate(8));
3497 __ Movq(tmp_simd, tmp);
3498 __ Psrlw(kScratchDoubleReg, tmp_simd);
3499 __ Psrlw(dst, tmp_simd);
3500 __ Packuswb(dst, kScratchDoubleReg);
3501 }
3502 break;
3503 }
3504 case kX64I8x16AddSaturateU: {
3505 __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3506 break;
3507 }
3508 case kX64I8x16SubSaturateU: {
3509 __ Psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3510 break;
3511 }
3512 case kX64I8x16MinU: {
3513 __ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
3514 break;
3515 }
3516 case kX64I8x16MaxU: {
3517 __ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
3518 break;
3519 }
3520 case kX64I8x16GtU: {
3521 XMMRegister dst = i.OutputSimd128Register();
3522 XMMRegister src = i.InputSimd128Register(1);
3523 XMMRegister tmp = i.TempSimd128Register(0);
3524 __ Pmaxub(dst, src);
3525 __ Pcmpeqb(dst, src);
3526 __ Pcmpeqb(tmp, tmp);
3527 __ Pxor(dst, tmp);
3528 break;
3529 }
3530 case kX64I8x16GeU: {
3531 XMMRegister dst = i.OutputSimd128Register();
3532 XMMRegister src = i.InputSimd128Register(1);
3533 __ Pminub(dst, src);
3534 __ Pcmpeqb(dst, src);
3535 break;
3536 }
3537 case kX64I8x16RoundingAverageU: {
3538 __ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3539 break;
3540 }
3541 case kX64I8x16Abs: {
3542 __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3543 break;
3544 }
3545 case kX64S128And: {
3546 __ Pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
3547 break;
3548 }
3549 case kX64S128Or: {
3550 __ Por(i.OutputSimd128Register(), i.InputSimd128Register(1));
3551 break;
3552 }
3553 case kX64S128Xor: {
3554 __ Pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
3555 break;
3556 }
3557 case kX64S128Not: {
3558 XMMRegister dst = i.OutputSimd128Register();
3559 XMMRegister src = i.InputSimd128Register(0);
3560 if (dst == src) {
3561 __ Movaps(kScratchDoubleReg, dst);
3562 __ Pcmpeqd(dst, dst);
3563 __ Pxor(dst, kScratchDoubleReg);
3564 } else {
3565 __ Pcmpeqd(dst, dst);
3566 __ Pxor(dst, src);
3567 }
3568
3569 break;
3570 }
3571 case kX64S128Select: {
3572 // Mask used here is stored in dst.
3573 XMMRegister dst = i.OutputSimd128Register();
3574 __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3575 __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3576 __ Andps(dst, kScratchDoubleReg);
3577 __ Xorps(dst, i.InputSimd128Register(2));
3578 break;
3579 }
3580 case kX64S128AndNot: {
3581 XMMRegister dst = i.OutputSimd128Register();
3582 DCHECK_EQ(dst, i.InputSimd128Register(0));
3583 // The inputs have been inverted by instruction selector, so we can call
3584 // andnps here without any modifications.
3585 __ Andnps(dst, i.InputSimd128Register(1));
3586 break;
3587 }
3588 case kX64S8x16Swizzle: {
3589 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3590 XMMRegister dst = i.OutputSimd128Register();
3591 XMMRegister mask = i.TempSimd128Register(0);
3592
3593 // Out-of-range indices should return 0, add 112 so that any value > 15
3594 // saturates to 128 (top bit set), so pshufb will zero that lane.
3595 __ Move(mask, static_cast<uint32_t>(0x70707070));
3596 __ Pshufd(mask, mask, static_cast<uint8_t>(0x0));
3597 __ Paddusb(mask, i.InputSimd128Register(1));
3598 __ Pshufb(dst, mask);
3599 break;
3600 }
3601 case kX64S8x16Shuffle: {
3602 XMMRegister dst = i.OutputSimd128Register();
3603 Register tmp = i.TempRegister(0);
3604 // Prepare 16 byte aligned buffer for shuffle control mask
3605 __ movq(tmp, rsp);
3606 __ andq(rsp, Immediate(-16));
3607 if (instr->InputCount() == 5) { // only one input operand
3608 uint32_t mask[4] = {};
3609 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3610 for (int j = 4; j > 0; j--) {
3611 mask[j - 1] = i.InputUint32(j);
3612 }
3613
3614 SetupShuffleMaskOnStack(tasm(), mask);
3615 __ Pshufb(dst, Operand(rsp, 0));
3616 } else { // two input operands
3617 DCHECK_EQ(6, instr->InputCount());
3618 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0);
3619 uint32_t mask[4] = {};
3620 for (int j = 5; j > 1; j--) {
3621 uint32_t lanes = i.InputUint32(j);
3622 for (int k = 0; k < 32; k += 8) {
3623 uint8_t lane = lanes >> k;
3624 mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3625 }
3626 }
3627 SetupShuffleMaskOnStack(tasm(), mask);
3628 __ Pshufb(kScratchDoubleReg, Operand(rsp, 0));
3629 uint32_t mask1[4] = {};
3630 if (instr->InputAt(1)->IsSimd128Register()) {
3631 XMMRegister src1 = i.InputSimd128Register(1);
3632 if (src1 != dst) __ movups(dst, src1);
3633 } else {
3634 __ Movups(dst, i.InputOperand(1));
3635 }
3636 for (int j = 5; j > 1; j--) {
3637 uint32_t lanes = i.InputUint32(j);
3638 for (int k = 0; k < 32; k += 8) {
3639 uint8_t lane = lanes >> k;
3640 mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3641 }
3642 }
3643 SetupShuffleMaskOnStack(tasm(), mask1);
3644 __ Pshufb(dst, Operand(rsp, 0));
3645 __ Por(dst, kScratchDoubleReg);
3646 }
3647 __ movq(rsp, tmp);
3648 break;
3649 }
3650 case kX64S8x16LoadSplat: {
3651 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3652 __ Pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
3653 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3654 __ Pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
3655 break;
3656 }
3657 case kX64S16x8LoadSplat: {
3658 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3659 __ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
3660 __ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
3661 static_cast<uint8_t>(0));
3662 __ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
3663 break;
3664 }
3665 case kX64S32x4LoadSplat: {
3666 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3667 if (CpuFeatures::IsSupported(AVX)) {
3668 CpuFeatureScope avx_scope(tasm(), AVX);
3669 __ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
3670 } else {
3671 __ Movss(i.OutputSimd128Register(), i.MemoryOperand());
3672 __ Shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
3673 static_cast<byte>(0));
3674 }
3675 break;
3676 }
3677 case kX64S64x2LoadSplat: {
3678 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3679 __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3680 break;
3681 }
3682 case kX64I16x8Load8x8S: {
3683 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3684 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3685 __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3686 break;
3687 }
3688 case kX64I16x8Load8x8U: {
3689 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3690 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3691 __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3692 break;
3693 }
3694 case kX64I32x4Load16x4S: {
3695 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3696 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3697 __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3698 break;
3699 }
3700 case kX64I32x4Load16x4U: {
3701 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3702 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3703 __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3704 break;
3705 }
3706 case kX64I64x2Load32x2S: {
3707 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3708 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3709 __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3710 break;
3711 }
3712 case kX64I64x2Load32x2U: {
3713 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3714 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3715 __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3716 break;
3717 }
3718 case kX64S32x4Swizzle: {
3719 DCHECK_EQ(2, instr->InputCount());
3720 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
3721 i.InputUint8(1));
3722 break;
3723 }
3724 case kX64S32x4Shuffle: {
3725 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3726 uint8_t shuffle = i.InputUint8(2);
3727 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3728 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
3729 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
3730 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3731 break;
3732 }
3733 case kX64S16x8Blend: {
3734 ASSEMBLE_SIMD_IMM_SHUFFLE(Pblendw, i.InputUint8(2));
3735 break;
3736 }
3737 case kX64S16x8HalfShuffle1: {
3738 XMMRegister dst = i.OutputSimd128Register();
3739 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(1));
3740 __ Pshufhw(dst, dst, i.InputUint8(2));
3741 break;
3742 }
3743 case kX64S16x8HalfShuffle2: {
3744 XMMRegister dst = i.OutputSimd128Register();
3745 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
3746 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3747 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
3748 __ Pshufhw(dst, dst, i.InputUint8(3));
3749 __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3750 break;
3751 }
3752 case kX64S8x16Alignr: {
3753 ASSEMBLE_SIMD_IMM_SHUFFLE(Palignr, i.InputUint8(2));
3754 break;
3755 }
3756 case kX64S16x8Dup: {
3757 XMMRegister dst = i.OutputSimd128Register();
3758 uint8_t lane = i.InputInt8(1) & 0x7;
3759 uint8_t lane4 = lane & 0x3;
3760 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3761 if (lane < 4) {
3762 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
3763 __ Pshufd(dst, dst, static_cast<uint8_t>(0));
3764 } else {
3765 ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
3766 __ Pshufd(dst, dst, static_cast<uint8_t>(0xaa));
3767 }
3768 break;
3769 }
3770 case kX64S8x16Dup: {
3771 XMMRegister dst = i.OutputSimd128Register();
3772 uint8_t lane = i.InputInt8(1) & 0xf;
3773 DCHECK_EQ(dst, i.InputSimd128Register(0));
3774 if (lane < 8) {
3775 __ Punpcklbw(dst, dst);
3776 } else {
3777 __ Punpckhbw(dst, dst);
3778 }
3779 lane &= 0x7;
3780 uint8_t lane4 = lane & 0x3;
3781 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3782 if (lane < 4) {
3783 __ Pshuflw(dst, dst, half_dup);
3784 __ Pshufd(dst, dst, static_cast<uint8_t>(0));
3785 } else {
3786 __ Pshufhw(dst, dst, half_dup);
3787 __ Pshufd(dst, dst, static_cast<uint8_t>(0xaa));
3788 }
3789 break;
3790 }
3791 case kX64S64x2UnpackHigh:
3792 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhqdq);
3793 break;
3794 case kX64S32x4UnpackHigh:
3795 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhdq);
3796 break;
3797 case kX64S16x8UnpackHigh:
3798 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhwd);
3799 break;
3800 case kX64S8x16UnpackHigh:
3801 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhbw);
3802 break;
3803 case kX64S64x2UnpackLow:
3804 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklqdq);
3805 break;
3806 case kX64S32x4UnpackLow:
3807 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckldq);
3808 break;
3809 case kX64S16x8UnpackLow:
3810 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklwd);
3811 break;
3812 case kX64S8x16UnpackLow:
3813 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklbw);
3814 break;
3815 case kX64S16x8UnzipHigh: {
3816 XMMRegister dst = i.OutputSimd128Register();
3817 XMMRegister src2 = dst;
3818 DCHECK_EQ(dst, i.InputSimd128Register(0));
3819 if (instr->InputCount() == 2) {
3820 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3821 __ Psrld(kScratchDoubleReg, static_cast<byte>(16));
3822 src2 = kScratchDoubleReg;
3823 }
3824 __ Psrld(dst, static_cast<byte>(16));
3825 __ Packusdw(dst, src2);
3826 break;
3827 }
3828 case kX64S16x8UnzipLow: {
3829 XMMRegister dst = i.OutputSimd128Register();
3830 XMMRegister src2 = dst;
3831 DCHECK_EQ(dst, i.InputSimd128Register(0));
3832 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3833 if (instr->InputCount() == 2) {
3834 ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1,
3835 static_cast<uint8_t>(0x55));
3836 src2 = kScratchDoubleReg;
3837 }
3838 __ Pblendw(dst, kScratchDoubleReg, static_cast<uint8_t>(0xaa));
3839 __ Packusdw(dst, src2);
3840 break;
3841 }
3842 case kX64S8x16UnzipHigh: {
3843 XMMRegister dst = i.OutputSimd128Register();
3844 XMMRegister src2 = dst;
3845 DCHECK_EQ(dst, i.InputSimd128Register(0));
3846 if (instr->InputCount() == 2) {
3847 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3848 __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3849 src2 = kScratchDoubleReg;
3850 }
3851 __ Psrlw(dst, static_cast<byte>(8));
3852 __ Packuswb(dst, src2);
3853 break;
3854 }
3855 case kX64S8x16UnzipLow: {
3856 XMMRegister dst = i.OutputSimd128Register();
3857 XMMRegister src2 = dst;
3858 DCHECK_EQ(dst, i.InputSimd128Register(0));
3859 if (instr->InputCount() == 2) {
3860 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3861 __ Psllw(kScratchDoubleReg, static_cast<byte>(8));
3862 __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3863 src2 = kScratchDoubleReg;
3864 }
3865 __ Psllw(dst, static_cast<byte>(8));
3866 __ Psrlw(dst, static_cast<byte>(8));
3867 __ Packuswb(dst, src2);
3868 break;
3869 }
3870 case kX64S8x16TransposeLow: {
3871 XMMRegister dst = i.OutputSimd128Register();
3872 DCHECK_EQ(dst, i.InputSimd128Register(0));
3873 __ Psllw(dst, static_cast<byte>(8));
3874 if (instr->InputCount() == 1) {
3875 __ Movups(kScratchDoubleReg, dst);
3876 } else {
3877 DCHECK_EQ(2, instr->InputCount());
3878 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3879 __ Psllw(kScratchDoubleReg, static_cast<byte>(8));
3880 }
3881 __ Psrlw(dst, static_cast<byte>(8));
3882 __ Por(dst, kScratchDoubleReg);
3883 break;
3884 }
3885 case kX64S8x16TransposeHigh: {
3886 XMMRegister dst = i.OutputSimd128Register();
3887 DCHECK_EQ(dst, i.InputSimd128Register(0));
3888 __ Psrlw(dst, static_cast<byte>(8));
3889 if (instr->InputCount() == 1) {
3890 __ Movups(kScratchDoubleReg, dst);
3891 } else {
3892 DCHECK_EQ(2, instr->InputCount());
3893 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3894 __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3895 }
3896 __ Psllw(kScratchDoubleReg, static_cast<byte>(8));
3897 __ Por(dst, kScratchDoubleReg);
3898 break;
3899 }
3900 case kX64S8x8Reverse:
3901 case kX64S8x4Reverse:
3902 case kX64S8x2Reverse: {
3903 DCHECK_EQ(1, instr->InputCount());
3904 XMMRegister dst = i.OutputSimd128Register();
3905 DCHECK_EQ(dst, i.InputSimd128Register(0));
3906 if (arch_opcode != kX64S8x2Reverse) {
3907 // First shuffle words into position.
3908 uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3909 __ Pshuflw(dst, dst, shuffle_mask);
3910 __ Pshufhw(dst, dst, shuffle_mask);
3911 }
3912 __ Movaps(kScratchDoubleReg, dst);
3913 __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3914 __ Psllw(dst, static_cast<byte>(8));
3915 __ Por(dst, kScratchDoubleReg);
3916 break;
3917 }
3918 case kX64S1x2AnyTrue:
3919 case kX64S1x4AnyTrue:
3920 case kX64S1x8AnyTrue:
3921 case kX64S1x16AnyTrue: {
3922 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3923 Register dst = i.OutputRegister();
3924 XMMRegister src = i.InputSimd128Register(0);
3925
3926 __ xorq(dst, dst);
3927 __ Ptest(src, src);
3928 __ setcc(not_equal, dst);
3929 break;
3930 }
3931 // Need to split up all the different lane structures because the
3932 // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
3933 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
3934 // respectively.
3935 case kX64S1x2AllTrue: {
3936 ASSEMBLE_SIMD_ALL_TRUE(pcmpeqq);
3937 break;
3938 }
3939 case kX64S1x4AllTrue: {
3940 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
3941 break;
3942 }
3943 case kX64S1x8AllTrue: {
3944 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
3945 break;
3946 }
3947 case kX64S1x16AllTrue: {
3948 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
3949 break;
3950 }
3951 case kWord32AtomicExchangeInt8: {
3952 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3953 __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3954 break;
3955 }
3956 case kWord32AtomicExchangeUint8: {
3957 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3958 __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3959 break;
3960 }
3961 case kWord32AtomicExchangeInt16: {
3962 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3963 __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3964 break;
3965 }
3966 case kWord32AtomicExchangeUint16: {
3967 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3968 __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3969 break;
3970 }
3971 case kWord32AtomicExchangeWord32: {
3972 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3973 break;
3974 }
3975 case kWord32AtomicCompareExchangeInt8: {
3976 __ lock();
3977 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3978 __ movsxbl(rax, rax);
3979 break;
3980 }
3981 case kWord32AtomicCompareExchangeUint8: {
3982 __ lock();
3983 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3984 __ movzxbl(rax, rax);
3985 break;
3986 }
3987 case kWord32AtomicCompareExchangeInt16: {
3988 __ lock();
3989 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3990 __ movsxwl(rax, rax);
3991 break;
3992 }
3993 case kWord32AtomicCompareExchangeUint16: {
3994 __ lock();
3995 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3996 __ movzxwl(rax, rax);
3997 break;
3998 }
3999 case kWord32AtomicCompareExchangeWord32: {
4000 __ lock();
4001 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4002 break;
4003 }
4004 #define ATOMIC_BINOP_CASE(op, inst) \
4005 case kWord32Atomic##op##Int8: \
4006 ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
4007 __ movsxbl(rax, rax); \
4008 break; \
4009 case kWord32Atomic##op##Uint8: \
4010 ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
4011 __ movzxbl(rax, rax); \
4012 break; \
4013 case kWord32Atomic##op##Int16: \
4014 ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
4015 __ movsxwl(rax, rax); \
4016 break; \
4017 case kWord32Atomic##op##Uint16: \
4018 ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
4019 __ movzxwl(rax, rax); \
4020 break; \
4021 case kWord32Atomic##op##Word32: \
4022 ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
4023 break;
4024 ATOMIC_BINOP_CASE(Add, addl)
4025 ATOMIC_BINOP_CASE(Sub, subl)
4026 ATOMIC_BINOP_CASE(And, andl)
4027 ATOMIC_BINOP_CASE(Or, orl)
4028 ATOMIC_BINOP_CASE(Xor, xorl)
4029 #undef ATOMIC_BINOP_CASE
4030 case kX64Word64AtomicExchangeUint8: {
4031 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4032 __ movzxbq(i.InputRegister(0), i.InputRegister(0));
4033 break;
4034 }
4035 case kX64Word64AtomicExchangeUint16: {
4036 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4037 __ movzxwq(i.InputRegister(0), i.InputRegister(0));
4038 break;
4039 }
4040 case kX64Word64AtomicExchangeUint32: {
4041 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
4042 break;
4043 }
4044 case kX64Word64AtomicExchangeUint64: {
4045 __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
4046 break;
4047 }
4048 case kX64Word64AtomicCompareExchangeUint8: {
4049 __ lock();
4050 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4051 __ movzxbq(rax, rax);
4052 break;
4053 }
4054 case kX64Word64AtomicCompareExchangeUint16: {
4055 __ lock();
4056 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4057 __ movzxwq(rax, rax);
4058 break;
4059 }
4060 case kX64Word64AtomicCompareExchangeUint32: {
4061 __ lock();
4062 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4063 // Zero-extend the 32 bit value to 64 bit.
4064 __ movl(rax, rax);
4065 break;
4066 }
4067 case kX64Word64AtomicCompareExchangeUint64: {
4068 __ lock();
4069 __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
4070 break;
4071 }
4072 #define ATOMIC64_BINOP_CASE(op, inst) \
4073 case kX64Word64Atomic##op##Uint8: \
4074 ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
4075 __ movzxbq(rax, rax); \
4076 break; \
4077 case kX64Word64Atomic##op##Uint16: \
4078 ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
4079 __ movzxwq(rax, rax); \
4080 break; \
4081 case kX64Word64Atomic##op##Uint32: \
4082 ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
4083 break; \
4084 case kX64Word64Atomic##op##Uint64: \
4085 ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
4086 break;
4087 ATOMIC64_BINOP_CASE(Add, addq)
4088 ATOMIC64_BINOP_CASE(Sub, subq)
4089 ATOMIC64_BINOP_CASE(And, andq)
4090 ATOMIC64_BINOP_CASE(Or, orq)
4091 ATOMIC64_BINOP_CASE(Xor, xorq)
4092 #undef ATOMIC64_BINOP_CASE
4093 case kWord32AtomicLoadInt8:
4094 case kWord32AtomicLoadUint8:
4095 case kWord32AtomicLoadInt16:
4096 case kWord32AtomicLoadUint16:
4097 case kWord32AtomicLoadWord32:
4098 case kWord32AtomicStoreWord8:
4099 case kWord32AtomicStoreWord16:
4100 case kWord32AtomicStoreWord32:
4101 case kX64Word64AtomicLoadUint8:
4102 case kX64Word64AtomicLoadUint16:
4103 case kX64Word64AtomicLoadUint32:
4104 case kX64Word64AtomicLoadUint64:
4105 case kX64Word64AtomicStoreWord8:
4106 case kX64Word64AtomicStoreWord16:
4107 case kX64Word64AtomicStoreWord32:
4108 case kX64Word64AtomicStoreWord64:
4109 UNREACHABLE(); // Won't be generated by instruction selector.
4110 break;
4111 }
4112 return kSuccess;
4113 } // NOLadability/fn_size)
4114
4115 #undef ASSEMBLE_UNOP
4116 #undef ASSEMBLE_BINOP
4117 #undef ASSEMBLE_COMPARE
4118 #undef ASSEMBLE_MULT
4119 #undef ASSEMBLE_SHIFT
4120 #undef ASSEMBLE_MOVX
4121 #undef ASSEMBLE_SSE_BINOP
4122 #undef ASSEMBLE_SSE_UNOP
4123 #undef ASSEMBLE_AVX_BINOP
4124 #undef ASSEMBLE_IEEE754_BINOP
4125 #undef ASSEMBLE_IEEE754_UNOP
4126 #undef ASSEMBLE_ATOMIC_BINOP
4127 #undef ASSEMBLE_ATOMIC64_BINOP
4128 #undef ASSEMBLE_SIMD_INSTR
4129 #undef ASSEMBLE_SIMD_IMM_INSTR
4130 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4131 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4132 #undef ASSEMBLE_SIMD_ALL_TRUE
4133 #undef ASSEMBLE_SIMD_SHIFT
4134
4135 namespace {
4136
FlagsConditionToCondition(FlagsCondition condition)4137 Condition FlagsConditionToCondition(FlagsCondition condition) {
4138 switch (condition) {
4139 case kUnorderedEqual:
4140 case kEqual:
4141 return equal;
4142 case kUnorderedNotEqual:
4143 case kNotEqual:
4144 return not_equal;
4145 case kSignedLessThan:
4146 return less;
4147 case kSignedGreaterThanOrEqual:
4148 return greater_equal;
4149 case kSignedLessThanOrEqual:
4150 return less_equal;
4151 case kSignedGreaterThan:
4152 return greater;
4153 case kUnsignedLessThan:
4154 return below;
4155 case kUnsignedGreaterThanOrEqual:
4156 return above_equal;
4157 case kUnsignedLessThanOrEqual:
4158 return below_equal;
4159 case kUnsignedGreaterThan:
4160 return above;
4161 case kOverflow:
4162 return overflow;
4163 case kNotOverflow:
4164 return no_overflow;
4165 default:
4166 break;
4167 }
4168 UNREACHABLE();
4169 }
4170
4171 } // namespace
4172
4173 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4174 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4175 Label::Distance flabel_distance =
4176 branch->fallthru ? Label::kNear : Label::kFar;
4177 Label* tlabel = branch->true_label;
4178 Label* flabel = branch->false_label;
4179 if (branch->condition == kUnorderedEqual) {
4180 __ j(parity_even, flabel, flabel_distance);
4181 } else if (branch->condition == kUnorderedNotEqual) {
4182 __ j(parity_even, tlabel);
4183 }
4184 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4185
4186 if (!branch->fallthru) __ jmp(flabel, flabel_distance);
4187 }
4188
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)4189 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
4190 Instruction* instr) {
4191 // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
4192 if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
4193 return;
4194 }
4195
4196 condition = NegateFlagsCondition(condition);
4197 __ movl(kScratchRegister, Immediate(0));
4198 __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
4199 kScratchRegister);
4200 }
4201
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4202 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4203 BranchInfo* branch) {
4204 Label::Distance flabel_distance =
4205 branch->fallthru ? Label::kNear : Label::kFar;
4206 Label* tlabel = branch->true_label;
4207 Label* flabel = branch->false_label;
4208 Label nodeopt;
4209 if (branch->condition == kUnorderedEqual) {
4210 __ j(parity_even, flabel, flabel_distance);
4211 } else if (branch->condition == kUnorderedNotEqual) {
4212 __ j(parity_even, tlabel);
4213 }
4214 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4215
4216 if (FLAG_deopt_every_n_times > 0) {
4217 ExternalReference counter =
4218 ExternalReference::stress_deopt_count(isolate());
4219
4220 __ pushfq();
4221 __ pushq(rax);
4222 __ load_rax(counter);
4223 __ decl(rax);
4224 __ j(not_zero, &nodeopt, Label::kNear);
4225
4226 __ Set(rax, FLAG_deopt_every_n_times);
4227 __ store_rax(counter);
4228 __ popq(rax);
4229 __ popfq();
4230 __ jmp(tlabel);
4231
4232 __ bind(&nodeopt);
4233 __ store_rax(counter);
4234 __ popq(rax);
4235 __ popfq();
4236 }
4237
4238 if (!branch->fallthru) {
4239 __ jmp(flabel, flabel_distance);
4240 }
4241 }
4242
AssembleArchJump(RpoNumber target)4243 void CodeGenerator::AssembleArchJump(RpoNumber target) {
4244 if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
4245 }
4246
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4247 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4248 FlagsCondition condition) {
4249 auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
4250 Label* tlabel = ool->entry();
4251 Label end;
4252 if (condition == kUnorderedEqual) {
4253 __ j(parity_even, &end, Label::kNear);
4254 } else if (condition == kUnorderedNotEqual) {
4255 __ j(parity_even, tlabel);
4256 }
4257 __ j(FlagsConditionToCondition(condition), tlabel);
4258 __ bind(&end);
4259 }
4260
4261 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4262 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4263 FlagsCondition condition) {
4264 X64OperandConverter i(this, instr);
4265 Label done;
4266
4267 // Materialize a full 64-bit 1 or 0 value. The result register is always the
4268 // last output of the instruction.
4269 Label check;
4270 DCHECK_NE(0u, instr->OutputCount());
4271 Register reg = i.OutputRegister(instr->OutputCount() - 1);
4272 if (condition == kUnorderedEqual) {
4273 __ j(parity_odd, &check, Label::kNear);
4274 __ movl(reg, Immediate(0));
4275 __ jmp(&done, Label::kNear);
4276 } else if (condition == kUnorderedNotEqual) {
4277 __ j(parity_odd, &check, Label::kNear);
4278 __ movl(reg, Immediate(1));
4279 __ jmp(&done, Label::kNear);
4280 }
4281 __ bind(&check);
4282 __ setcc(FlagsConditionToCondition(condition), reg);
4283 __ movzxbl(reg, reg);
4284 __ bind(&done);
4285 }
4286
AssembleArchBinarySearchSwitch(Instruction * instr)4287 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4288 X64OperandConverter i(this, instr);
4289 Register input = i.InputRegister(0);
4290 std::vector<std::pair<int32_t, Label*>> cases;
4291 for (size_t index = 2; index < instr->InputCount(); index += 2) {
4292 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4293 }
4294 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4295 cases.data() + cases.size());
4296 }
4297
AssembleArchTableSwitch(Instruction * instr)4298 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4299 X64OperandConverter i(this, instr);
4300 Register input = i.InputRegister(0);
4301 int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
4302 Label** cases = zone()->NewArray<Label*>(case_count);
4303 for (int32_t index = 0; index < case_count; ++index) {
4304 cases[index] = GetLabel(i.InputRpo(index + 2));
4305 }
4306 Label* const table = AddJumpTable(cases, case_count);
4307 __ cmpl(input, Immediate(case_count));
4308 __ j(above_equal, GetLabel(i.InputRpo(1)));
4309 __ leaq(kScratchRegister, Operand(table));
4310 __ jmp(Operand(kScratchRegister, input, times_8, 0));
4311 }
4312
4313 namespace {
4314
4315 static const int kQuadWordSize = 16;
4316
4317 } // namespace
4318
FinishFrame(Frame * frame)4319 void CodeGenerator::FinishFrame(Frame* frame) {
4320 auto call_descriptor = linkage()->GetIncomingDescriptor();
4321
4322 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4323 if (saves_fp != 0) {
4324 frame->AlignSavedCalleeRegisterSlots();
4325 if (saves_fp != 0) { // Save callee-saved XMM registers.
4326 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4327 frame->AllocateSavedCalleeRegisterSlots(
4328 saves_fp_count * (kQuadWordSize / kSystemPointerSize));
4329 }
4330 }
4331 const RegList saves = call_descriptor->CalleeSavedRegisters();
4332 if (saves != 0) { // Save callee-saved registers.
4333 int count = 0;
4334 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4335 if (((1 << i) & saves)) {
4336 ++count;
4337 }
4338 }
4339 frame->AllocateSavedCalleeRegisterSlots(count);
4340 }
4341 }
4342
AssembleConstructFrame()4343 void CodeGenerator::AssembleConstructFrame() {
4344 auto call_descriptor = linkage()->GetIncomingDescriptor();
4345 if (frame_access_state()->has_frame()) {
4346 int pc_base = __ pc_offset();
4347
4348 if (call_descriptor->IsCFunctionCall()) {
4349 __ pushq(rbp);
4350 __ movq(rbp, rsp);
4351 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4352 __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4353 // Reserve stack space for saving the c_entry_fp later.
4354 __ AllocateStackSpace(kSystemPointerSize);
4355 }
4356 } else if (call_descriptor->IsJSFunctionCall()) {
4357 __ Prologue();
4358 if (call_descriptor->PushArgumentCount()) {
4359 __ pushq(kJavaScriptCallArgCountRegister);
4360 }
4361 } else {
4362 __ StubPrologue(info()->GetOutputStackFrameType());
4363 if (call_descriptor->IsWasmFunctionCall()) {
4364 __ pushq(kWasmInstanceRegister);
4365 } else if (call_descriptor->IsWasmImportWrapper() ||
4366 call_descriptor->IsWasmCapiFunction()) {
4367 // Wasm import wrappers are passed a tuple in the place of the instance.
4368 // Unpack the tuple into the instance and the target callable.
4369 // This must be done here in the codegen because it cannot be expressed
4370 // properly in the graph.
4371 __ LoadTaggedPointerField(
4372 kJSFunctionRegister,
4373 FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
4374 __ LoadTaggedPointerField(
4375 kWasmInstanceRegister,
4376 FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
4377 __ pushq(kWasmInstanceRegister);
4378 if (call_descriptor->IsWasmCapiFunction()) {
4379 // Reserve space for saving the PC later.
4380 __ AllocateStackSpace(kSystemPointerSize);
4381 }
4382 }
4383 }
4384
4385 unwinding_info_writer_.MarkFrameConstructed(pc_base);
4386 }
4387 int required_slots =
4388 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4389
4390 if (info()->is_osr()) {
4391 // TurboFan OSR-compiled functions cannot be entered directly.
4392 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4393
4394 // Unoptimized code jumps directly to this entrypoint while the unoptimized
4395 // frame is still on the stack. Optimized code uses OSR values directly from
4396 // the unoptimized frame. Thus, all that needs to be done is to allocate the
4397 // remaining stack slots.
4398 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
4399 osr_pc_offset_ = __ pc_offset();
4400 required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
4401 ResetSpeculationPoison();
4402 }
4403
4404 const RegList saves = call_descriptor->CalleeSavedRegisters();
4405 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4406
4407 if (required_slots > 0) {
4408 DCHECK(frame_access_state()->has_frame());
4409 if (info()->IsWasm() && required_slots > 128) {
4410 // For WebAssembly functions with big frames we have to do the stack
4411 // overflow check before we construct the frame. Otherwise we may not
4412 // have enough space on the stack to call the runtime for the stack
4413 // overflow.
4414 Label done;
4415
4416 // If the frame is bigger than the stack, we throw the stack overflow
4417 // exception unconditionally. Thereby we can avoid the integer overflow
4418 // check in the condition code.
4419 if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
4420 __ movq(kScratchRegister,
4421 FieldOperand(kWasmInstanceRegister,
4422 WasmInstanceObject::kRealStackLimitAddressOffset));
4423 __ movq(kScratchRegister, Operand(kScratchRegister, 0));
4424 __ addq(kScratchRegister,
4425 Immediate(required_slots * kSystemPointerSize));
4426 __ cmpq(rsp, kScratchRegister);
4427 __ j(above_equal, &done, Label::kNear);
4428 }
4429
4430 __ near_call(wasm::WasmCode::kWasmStackOverflow,
4431 RelocInfo::WASM_STUB_CALL);
4432 ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
4433 RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
4434 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4435 __ bind(&done);
4436 }
4437
4438 // Skip callee-saved and return slots, which are created below.
4439 required_slots -= base::bits::CountPopulation(saves);
4440 required_slots -= base::bits::CountPopulation(saves_fp) *
4441 (kQuadWordSize / kSystemPointerSize);
4442 required_slots -= frame()->GetReturnSlotCount();
4443 if (required_slots > 0) {
4444 __ AllocateStackSpace(required_slots * kSystemPointerSize);
4445 }
4446 }
4447
4448 if (saves_fp != 0) { // Save callee-saved XMM registers.
4449 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4450 const int stack_size = saves_fp_count * kQuadWordSize;
4451 // Adjust the stack pointer.
4452 __ AllocateStackSpace(stack_size);
4453 // Store the registers on the stack.
4454 int slot_idx = 0;
4455 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4456 if (!((1 << i) & saves_fp)) continue;
4457 __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
4458 XMMRegister::from_code(i));
4459 slot_idx++;
4460 }
4461 }
4462
4463 if (saves != 0) { // Save callee-saved registers.
4464 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4465 if (!((1 << i) & saves)) continue;
4466 __ pushq(Register::from_code(i));
4467 }
4468 }
4469
4470 // Allocate return slots (located after callee-saved).
4471 if (frame()->GetReturnSlotCount() > 0) {
4472 __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4473 }
4474 }
4475
AssembleReturn(InstructionOperand * pop)4476 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
4477 auto call_descriptor = linkage()->GetIncomingDescriptor();
4478
4479 // Restore registers.
4480 const RegList saves = call_descriptor->CalleeSavedRegisters();
4481 if (saves != 0) {
4482 const int returns = frame()->GetReturnSlotCount();
4483 if (returns != 0) {
4484 __ addq(rsp, Immediate(returns * kSystemPointerSize));
4485 }
4486 for (int i = 0; i < Register::kNumRegisters; i++) {
4487 if (!((1 << i) & saves)) continue;
4488 __ popq(Register::from_code(i));
4489 }
4490 }
4491 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4492 if (saves_fp != 0) {
4493 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4494 const int stack_size = saves_fp_count * kQuadWordSize;
4495 // Load the registers from the stack.
4496 int slot_idx = 0;
4497 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4498 if (!((1 << i) & saves_fp)) continue;
4499 __ Movdqu(XMMRegister::from_code(i),
4500 Operand(rsp, kQuadWordSize * slot_idx));
4501 slot_idx++;
4502 }
4503 // Adjust the stack pointer.
4504 __ addq(rsp, Immediate(stack_size));
4505 }
4506
4507 unwinding_info_writer_.MarkBlockWillExit();
4508
4509 // Might need rcx for scratch if pop_size is too big or if there is a variable
4510 // pop count.
4511 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
4512 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
4513 size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
4514 X64OperandConverter g(this, nullptr);
4515 if (call_descriptor->IsCFunctionCall()) {
4516 AssembleDeconstructFrame();
4517 } else if (frame_access_state()->has_frame()) {
4518 if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
4519 // Canonicalize JSFunction return sites for now.
4520 if (return_label_.is_bound()) {
4521 __ jmp(&return_label_);
4522 return;
4523 } else {
4524 __ bind(&return_label_);
4525 AssembleDeconstructFrame();
4526 }
4527 } else {
4528 AssembleDeconstructFrame();
4529 }
4530 }
4531
4532 if (pop->IsImmediate()) {
4533 pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
4534 CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4535 __ Ret(static_cast<int>(pop_size), rcx);
4536 } else {
4537 Register pop_reg = g.ToRegister(pop);
4538 Register scratch_reg = pop_reg == rcx ? rdx : rcx;
4539 __ popq(scratch_reg);
4540 __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
4541 __ jmp(scratch_reg);
4542 }
4543 }
4544
FinishCode()4545 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
4546
PrepareForDeoptimizationExits(int deopt_count)4547 void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {}
4548
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4549 void CodeGenerator::AssembleMove(InstructionOperand* source,
4550 InstructionOperand* destination) {
4551 X64OperandConverter g(this, nullptr);
4552 // Helper function to write the given constant to the dst register.
4553 auto MoveConstantToRegister = [&](Register dst, Constant src) {
4554 switch (src.type()) {
4555 case Constant::kInt32: {
4556 if (RelocInfo::IsWasmReference(src.rmode())) {
4557 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4558 } else {
4559 int32_t value = src.ToInt32();
4560 if (value == 0) {
4561 __ xorl(dst, dst);
4562 } else {
4563 __ movl(dst, Immediate(value));
4564 }
4565 }
4566 break;
4567 }
4568 case Constant::kInt64:
4569 if (RelocInfo::IsWasmReference(src.rmode())) {
4570 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4571 } else {
4572 __ Set(dst, src.ToInt64());
4573 }
4574 break;
4575 case Constant::kFloat32:
4576 __ MoveNumber(dst, src.ToFloat32());
4577 break;
4578 case Constant::kFloat64:
4579 __ MoveNumber(dst, src.ToFloat64().value());
4580 break;
4581 case Constant::kExternalReference:
4582 __ Move(dst, src.ToExternalReference());
4583 break;
4584 case Constant::kHeapObject: {
4585 Handle<HeapObject> src_object = src.ToHeapObject();
4586 RootIndex index;
4587 if (IsMaterializableFromRoot(src_object, &index)) {
4588 __ LoadRoot(dst, index);
4589 } else {
4590 __ Move(dst, src_object);
4591 }
4592 break;
4593 }
4594 case Constant::kCompressedHeapObject: {
4595 Handle<HeapObject> src_object = src.ToHeapObject();
4596 RootIndex index;
4597 if (IsMaterializableFromRoot(src_object, &index)) {
4598 __ LoadRoot(dst, index);
4599 } else {
4600 __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4601 }
4602 break;
4603 }
4604 case Constant::kDelayedStringConstant: {
4605 const StringConstantBase* src_constant = src.ToDelayedStringConstant();
4606 __ MoveStringConstant(dst, src_constant);
4607 break;
4608 }
4609 case Constant::kRpoNumber:
4610 UNREACHABLE(); // TODO(dcarney): load of labels on x64.
4611 break;
4612 }
4613 };
4614 // Helper function to write the given constant to the stack.
4615 auto MoveConstantToSlot = [&](Operand dst, Constant src) {
4616 if (!RelocInfo::IsWasmReference(src.rmode())) {
4617 switch (src.type()) {
4618 case Constant::kInt32:
4619 __ movq(dst, Immediate(src.ToInt32()));
4620 return;
4621 case Constant::kInt64:
4622 __ Set(dst, src.ToInt64());
4623 return;
4624 default:
4625 break;
4626 }
4627 }
4628 MoveConstantToRegister(kScratchRegister, src);
4629 __ movq(dst, kScratchRegister);
4630 };
4631 // Dispatch on the source and destination operand kinds.
4632 switch (MoveType::InferMove(source, destination)) {
4633 case MoveType::kRegisterToRegister:
4634 if (source->IsRegister()) {
4635 __ movq(g.ToRegister(destination), g.ToRegister(source));
4636 } else {
4637 DCHECK(source->IsFPRegister());
4638 __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4639 }
4640 return;
4641 case MoveType::kRegisterToStack: {
4642 Operand dst = g.ToOperand(destination);
4643 if (source->IsRegister()) {
4644 __ movq(dst, g.ToRegister(source));
4645 } else {
4646 DCHECK(source->IsFPRegister());
4647 XMMRegister src = g.ToDoubleRegister(source);
4648 MachineRepresentation rep =
4649 LocationOperand::cast(source)->representation();
4650 if (rep != MachineRepresentation::kSimd128) {
4651 __ Movsd(dst, src);
4652 } else {
4653 __ Movups(dst, src);
4654 }
4655 }
4656 return;
4657 }
4658 case MoveType::kStackToRegister: {
4659 Operand src = g.ToOperand(source);
4660 if (source->IsStackSlot()) {
4661 __ movq(g.ToRegister(destination), src);
4662 } else {
4663 DCHECK(source->IsFPStackSlot());
4664 XMMRegister dst = g.ToDoubleRegister(destination);
4665 MachineRepresentation rep =
4666 LocationOperand::cast(source)->representation();
4667 if (rep != MachineRepresentation::kSimd128) {
4668 __ Movsd(dst, src);
4669 } else {
4670 __ Movups(dst, src);
4671 }
4672 }
4673 return;
4674 }
4675 case MoveType::kStackToStack: {
4676 Operand src = g.ToOperand(source);
4677 Operand dst = g.ToOperand(destination);
4678 if (source->IsStackSlot()) {
4679 // Spill on demand to use a temporary register for memory-to-memory
4680 // moves.
4681 __ movq(kScratchRegister, src);
4682 __ movq(dst, kScratchRegister);
4683 } else {
4684 MachineRepresentation rep =
4685 LocationOperand::cast(source)->representation();
4686 if (rep != MachineRepresentation::kSimd128) {
4687 __ Movsd(kScratchDoubleReg, src);
4688 __ Movsd(dst, kScratchDoubleReg);
4689 } else {
4690 DCHECK(source->IsSimd128StackSlot());
4691 __ Movups(kScratchDoubleReg, src);
4692 __ Movups(dst, kScratchDoubleReg);
4693 }
4694 }
4695 return;
4696 }
4697 case MoveType::kConstantToRegister: {
4698 Constant src = g.ToConstant(source);
4699 if (destination->IsRegister()) {
4700 MoveConstantToRegister(g.ToRegister(destination), src);
4701 } else {
4702 DCHECK(destination->IsFPRegister());
4703 XMMRegister dst = g.ToDoubleRegister(destination);
4704 if (src.type() == Constant::kFloat32) {
4705 // TODO(turbofan): Can we do better here?
4706 __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4707 } else {
4708 DCHECK_EQ(src.type(), Constant::kFloat64);
4709 __ Move(dst, src.ToFloat64().AsUint64());
4710 }
4711 }
4712 return;
4713 }
4714 case MoveType::kConstantToStack: {
4715 Constant src = g.ToConstant(source);
4716 Operand dst = g.ToOperand(destination);
4717 if (destination->IsStackSlot()) {
4718 MoveConstantToSlot(dst, src);
4719 } else {
4720 DCHECK(destination->IsFPStackSlot());
4721 if (src.type() == Constant::kFloat32) {
4722 __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4723 } else {
4724 DCHECK_EQ(src.type(), Constant::kFloat64);
4725 __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4726 __ movq(dst, kScratchRegister);
4727 }
4728 }
4729 return;
4730 }
4731 }
4732 UNREACHABLE();
4733 }
4734
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4735 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4736 InstructionOperand* destination) {
4737 X64OperandConverter g(this, nullptr);
4738 // Dispatch on the source and destination operand kinds. Not all
4739 // combinations are possible.
4740 switch (MoveType::InferSwap(source, destination)) {
4741 case MoveType::kRegisterToRegister: {
4742 if (source->IsRegister()) {
4743 Register src = g.ToRegister(source);
4744 Register dst = g.ToRegister(destination);
4745 __ movq(kScratchRegister, src);
4746 __ movq(src, dst);
4747 __ movq(dst, kScratchRegister);
4748 } else {
4749 DCHECK(source->IsFPRegister());
4750 XMMRegister src = g.ToDoubleRegister(source);
4751 XMMRegister dst = g.ToDoubleRegister(destination);
4752 __ Movapd(kScratchDoubleReg, src);
4753 __ Movapd(src, dst);
4754 __ Movapd(dst, kScratchDoubleReg);
4755 }
4756 return;
4757 }
4758 case MoveType::kRegisterToStack: {
4759 if (source->IsRegister()) {
4760 Register src = g.ToRegister(source);
4761 __ pushq(src);
4762 frame_access_state()->IncreaseSPDelta(1);
4763 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4764 kSystemPointerSize);
4765 __ movq(src, g.ToOperand(destination));
4766 frame_access_state()->IncreaseSPDelta(-1);
4767 __ popq(g.ToOperand(destination));
4768 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4769 -kSystemPointerSize);
4770 } else {
4771 DCHECK(source->IsFPRegister());
4772 XMMRegister src = g.ToDoubleRegister(source);
4773 Operand dst = g.ToOperand(destination);
4774 MachineRepresentation rep =
4775 LocationOperand::cast(source)->representation();
4776 if (rep != MachineRepresentation::kSimd128) {
4777 __ Movsd(kScratchDoubleReg, src);
4778 __ Movsd(src, dst);
4779 __ Movsd(dst, kScratchDoubleReg);
4780 } else {
4781 __ Movups(kScratchDoubleReg, src);
4782 __ Movups(src, dst);
4783 __ Movups(dst, kScratchDoubleReg);
4784 }
4785 }
4786 return;
4787 }
4788 case MoveType::kStackToStack: {
4789 Operand src = g.ToOperand(source);
4790 Operand dst = g.ToOperand(destination);
4791 MachineRepresentation rep =
4792 LocationOperand::cast(source)->representation();
4793 if (rep != MachineRepresentation::kSimd128) {
4794 Register tmp = kScratchRegister;
4795 __ movq(tmp, dst);
4796 __ pushq(src); // Then use stack to copy src to destination.
4797 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4798 kSystemPointerSize);
4799 __ popq(dst);
4800 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4801 -kSystemPointerSize);
4802 __ movq(src, tmp);
4803 } else {
4804 // Without AVX, misaligned reads and writes will trap. Move using the
4805 // stack, in two parts.
4806 __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
4807 __ pushq(src); // Then use stack to copy src to destination.
4808 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4809 kSystemPointerSize);
4810 __ popq(dst);
4811 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4812 -kSystemPointerSize);
4813 __ pushq(g.ToOperand(source, kSystemPointerSize));
4814 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4815 kSystemPointerSize);
4816 __ popq(g.ToOperand(destination, kSystemPointerSize));
4817 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4818 -kSystemPointerSize);
4819 __ movups(src, kScratchDoubleReg);
4820 }
4821 return;
4822 }
4823 default:
4824 UNREACHABLE();
4825 }
4826 }
4827
AssembleJumpTable(Label ** targets,size_t target_count)4828 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4829 for (size_t index = 0; index < target_count; ++index) {
4830 __ dq(targets[index]);
4831 }
4832 }
4833
4834 #undef __
4835
4836 } // namespace compiler
4837 } // namespace internal
4838 } // namespace v8
4839