1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <limits>
6
7 #include "src/base/overflowing-math.h"
8 #include "src/codegen/macro-assembler.h"
9 #include "src/codegen/optimized-compilation-info.h"
10 #include "src/codegen/x64/assembler-x64.h"
11 #include "src/compiler/backend/code-generator-impl.h"
12 #include "src/compiler/backend/code-generator.h"
13 #include "src/compiler/backend/gap-resolver.h"
14 #include "src/compiler/node-matchers.h"
15 #include "src/compiler/osr.h"
16 #include "src/heap/memory-chunk.h"
17 #include "src/objects/smi.h"
18 #include "src/wasm/wasm-code-manager.h"
19 #include "src/wasm/wasm-objects.h"
20
21 namespace v8 {
22 namespace internal {
23 namespace compiler {
24
25 #define __ tasm()->
26
27 // Adds X64 specific methods for decoding operands.
28 class X64OperandConverter : public InstructionOperandConverter {
29 public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)30 X64OperandConverter(CodeGenerator* gen, Instruction* instr)
31 : InstructionOperandConverter(gen, instr) {}
32
InputImmediate(size_t index)33 Immediate InputImmediate(size_t index) {
34 return ToImmediate(instr_->InputAt(index));
35 }
36
InputOperand(size_t index,int extra=0)37 Operand InputOperand(size_t index, int extra = 0) {
38 return ToOperand(instr_->InputAt(index), extra);
39 }
40
OutputOperand()41 Operand OutputOperand() { return ToOperand(instr_->Output()); }
42
ToImmediate(InstructionOperand * operand)43 Immediate ToImmediate(InstructionOperand* operand) {
44 Constant constant = ToConstant(operand);
45 if (constant.type() == Constant::kFloat64) {
46 DCHECK_EQ(0, constant.ToFloat64().AsUint64());
47 return Immediate(0);
48 }
49 if (RelocInfo::IsWasmReference(constant.rmode())) {
50 return Immediate(constant.ToInt32(), constant.rmode());
51 }
52 return Immediate(constant.ToInt32());
53 }
54
ToOperand(InstructionOperand * op,int extra=0)55 Operand ToOperand(InstructionOperand* op, int extra = 0) {
56 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
57 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
58 }
59
SlotToOperand(int slot_index,int extra=0)60 Operand SlotToOperand(int slot_index, int extra = 0) {
61 FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
62 return Operand(offset.from_stack_pointer() ? rsp : rbp,
63 offset.offset() + extra);
64 }
65
NextOffset(size_t * offset)66 static size_t NextOffset(size_t* offset) {
67 size_t i = *offset;
68 (*offset)++;
69 return i;
70 }
71
ScaleFor(AddressingMode one,AddressingMode mode)72 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
73 STATIC_ASSERT(0 == static_cast<int>(times_1));
74 STATIC_ASSERT(1 == static_cast<int>(times_2));
75 STATIC_ASSERT(2 == static_cast<int>(times_4));
76 STATIC_ASSERT(3 == static_cast<int>(times_8));
77 int scale = static_cast<int>(mode - one);
78 DCHECK(scale >= 0 && scale < 4);
79 return static_cast<ScaleFactor>(scale);
80 }
81
MemoryOperand(size_t * offset)82 Operand MemoryOperand(size_t* offset) {
83 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
84 switch (mode) {
85 case kMode_MR: {
86 Register base = InputRegister(NextOffset(offset));
87 int32_t disp = 0;
88 return Operand(base, disp);
89 }
90 case kMode_MRI: {
91 Register base = InputRegister(NextOffset(offset));
92 int32_t disp = InputInt32(NextOffset(offset));
93 return Operand(base, disp);
94 }
95 case kMode_MR1:
96 case kMode_MR2:
97 case kMode_MR4:
98 case kMode_MR8: {
99 Register base = InputRegister(NextOffset(offset));
100 Register index = InputRegister(NextOffset(offset));
101 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
102 int32_t disp = 0;
103 return Operand(base, index, scale, disp);
104 }
105 case kMode_MR1I:
106 case kMode_MR2I:
107 case kMode_MR4I:
108 case kMode_MR8I: {
109 Register base = InputRegister(NextOffset(offset));
110 Register index = InputRegister(NextOffset(offset));
111 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
112 int32_t disp = InputInt32(NextOffset(offset));
113 return Operand(base, index, scale, disp);
114 }
115 case kMode_M1: {
116 Register base = InputRegister(NextOffset(offset));
117 int32_t disp = 0;
118 return Operand(base, disp);
119 }
120 case kMode_M2:
121 UNREACHABLE(); // Should use kModeMR with more compact encoding instead
122 return Operand(no_reg, 0);
123 case kMode_M4:
124 case kMode_M8: {
125 Register index = InputRegister(NextOffset(offset));
126 ScaleFactor scale = ScaleFor(kMode_M1, mode);
127 int32_t disp = 0;
128 return Operand(index, scale, disp);
129 }
130 case kMode_M1I:
131 case kMode_M2I:
132 case kMode_M4I:
133 case kMode_M8I: {
134 Register index = InputRegister(NextOffset(offset));
135 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
136 int32_t disp = InputInt32(NextOffset(offset));
137 return Operand(index, scale, disp);
138 }
139 case kMode_Root: {
140 Register base = kRootRegister;
141 int32_t disp = InputInt32(NextOffset(offset));
142 return Operand(base, disp);
143 }
144 case kMode_None:
145 UNREACHABLE();
146 }
147 UNREACHABLE();
148 }
149
MemoryOperand(size_t first_input=0)150 Operand MemoryOperand(size_t first_input = 0) {
151 return MemoryOperand(&first_input);
152 }
153 };
154
155 namespace {
156
HasAddressingMode(Instruction * instr)157 bool HasAddressingMode(Instruction* instr) {
158 return instr->addressing_mode() != kMode_None;
159 }
160
HasImmediateInput(Instruction * instr,size_t index)161 bool HasImmediateInput(Instruction* instr, size_t index) {
162 return instr->InputAt(index)->IsImmediate();
163 }
164
HasRegisterInput(Instruction * instr,size_t index)165 bool HasRegisterInput(Instruction* instr, size_t index) {
166 return instr->InputAt(index)->IsRegister();
167 }
168
169 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
170 public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)171 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
172 : OutOfLineCode(gen), result_(result) {}
173
Generate()174 void Generate() final {
175 __ Xorps(result_, result_);
176 __ Divss(result_, result_);
177 }
178
179 private:
180 XMMRegister const result_;
181 };
182
183 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
184 public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)185 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
186 : OutOfLineCode(gen), result_(result) {}
187
Generate()188 void Generate() final {
189 __ Xorpd(result_, result_);
190 __ Divsd(result_, result_);
191 }
192
193 private:
194 XMMRegister const result_;
195 };
196
197 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
198 public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)199 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
200 XMMRegister input, StubCallMode stub_mode,
201 UnwindingInfoWriter* unwinding_info_writer)
202 : OutOfLineCode(gen),
203 result_(result),
204 input_(input),
205 stub_mode_(stub_mode),
206 unwinding_info_writer_(unwinding_info_writer),
207 isolate_(gen->isolate()),
208 zone_(gen->zone()) {}
209
Generate()210 void Generate() final {
211 __ AllocateStackSpace(kDoubleSize);
212 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
213 kDoubleSize);
214 __ Movsd(MemOperand(rsp, 0), input_);
215 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
216 // A direct call to a wasm runtime stub defined in this module.
217 // Just encode the stub index. This will be patched when the code
218 // is added to the native module and copied into wasm code space.
219 __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
220 } else if (tasm()->options().inline_offheap_trampolines) {
221 // With embedded builtins we do not need the isolate here. This allows
222 // the call to be generated asynchronously.
223 __ CallBuiltin(Builtins::kDoubleToI);
224 } else {
225 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
226 }
227 __ movl(result_, MemOperand(rsp, 0));
228 __ addq(rsp, Immediate(kDoubleSize));
229 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
230 -kDoubleSize);
231 }
232
233 private:
234 Register const result_;
235 XMMRegister const input_;
236 StubCallMode stub_mode_;
237 UnwindingInfoWriter* const unwinding_info_writer_;
238 Isolate* isolate_;
239 Zone* zone_;
240 };
241
242 class OutOfLineRecordWrite final : public OutOfLineCode {
243 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)244 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
245 Register value, Register scratch0, Register scratch1,
246 RecordWriteMode mode, StubCallMode stub_mode)
247 : OutOfLineCode(gen),
248 object_(object),
249 operand_(operand),
250 value_(value),
251 scratch0_(scratch0),
252 scratch1_(scratch1),
253 mode_(mode),
254 stub_mode_(stub_mode),
255 zone_(gen->zone()) {}
256
Generate()257 void Generate() final {
258 if (mode_ > RecordWriteMode::kValueIsPointer) {
259 __ JumpIfSmi(value_, exit());
260 }
261 if (COMPRESS_POINTERS_BOOL) {
262 __ DecompressTaggedPointer(value_, value_);
263 }
264 __ CheckPageFlag(value_, scratch0_,
265 MemoryChunk::kPointersToHereAreInterestingMask, zero,
266 exit());
267 __ leaq(scratch1_, operand_);
268
269 RememberedSetAction const remembered_set_action =
270 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
271 : OMIT_REMEMBERED_SET;
272 SaveFPRegsMode const save_fp_mode =
273 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
274
275 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
276 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
277 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
278 // A direct call to a wasm runtime stub defined in this module.
279 // Just encode the stub index. This will be patched when the code
280 // is added to the native module and copied into wasm code space.
281 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
282 save_fp_mode, wasm::WasmCode::kRecordWrite);
283 } else {
284 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
285 save_fp_mode);
286 }
287 }
288
289 private:
290 Register const object_;
291 Operand const operand_;
292 Register const value_;
293 Register const scratch0_;
294 Register const scratch1_;
295 RecordWriteMode const mode_;
296 StubCallMode const stub_mode_;
297 Zone* zone_;
298 };
299
300 class WasmOutOfLineTrap : public OutOfLineCode {
301 public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)302 WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
303 : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
304
Generate()305 void Generate() override {
306 X64OperandConverter i(gen_, instr_);
307 TrapId trap_id =
308 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
309 GenerateWithTrapId(trap_id);
310 }
311
312 protected:
313 CodeGenerator* gen_;
314
GenerateWithTrapId(TrapId trap_id)315 void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
316
317 private:
GenerateCallToTrap(TrapId trap_id)318 void GenerateCallToTrap(TrapId trap_id) {
319 if (!gen_->wasm_runtime_exception_support()) {
320 // We cannot test calls to the runtime in cctest/test-run-wasm.
321 // Therefore we emit a call to C here instead of a call to the runtime.
322 __ PrepareCallCFunction(0);
323 __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
324 0);
325 __ LeaveFrame(StackFrame::WASM);
326 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
327 size_t pop_size =
328 call_descriptor->StackParameterCount() * kSystemPointerSize;
329 // Use rcx as a scratch register, we return anyways immediately.
330 __ Ret(static_cast<int>(pop_size), rcx);
331 } else {
332 gen_->AssembleSourcePosition(instr_);
333 // A direct call to a wasm runtime stub defined in this module.
334 // Just encode the stub index. This will be patched when the code
335 // is added to the native module and copied into wasm code space.
336 __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
337 ReferenceMap* reference_map =
338 gen_->zone()->New<ReferenceMap>(gen_->zone());
339 gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
340 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
341 }
342 }
343
344 Instruction* instr_;
345 };
346
347 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
348 public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)349 WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
350 : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
351
Generate()352 void Generate() final {
353 gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
354 GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
355 }
356
357 private:
358 int pc_;
359 };
360
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)361 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
362 InstructionCode opcode, Instruction* instr,
363 int pc) {
364 const MemoryAccessMode access_mode =
365 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
366 if (access_mode == kMemoryAccessProtected) {
367 zone->New<WasmProtectedInstructionTrap>(codegen, pc, instr);
368 }
369 }
370
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,X64OperandConverter const & i)371 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
372 InstructionCode opcode, Instruction* instr,
373 X64OperandConverter const& i) {
374 const MemoryAccessMode access_mode =
375 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
376 if (access_mode == kMemoryAccessPoisoned) {
377 Register value = i.OutputRegister();
378 codegen->tasm()->andq(value, kSpeculationPoisonRegister);
379 }
380 }
381
382 } // namespace
383
384 #define ASSEMBLE_UNOP(asm_instr) \
385 do { \
386 if (instr->Output()->IsRegister()) { \
387 __ asm_instr(i.OutputRegister()); \
388 } else { \
389 __ asm_instr(i.OutputOperand()); \
390 } \
391 } while (false)
392
393 #define ASSEMBLE_BINOP(asm_instr) \
394 do { \
395 if (HasAddressingMode(instr)) { \
396 size_t index = 1; \
397 Operand right = i.MemoryOperand(&index); \
398 __ asm_instr(i.InputRegister(0), right); \
399 } else { \
400 if (HasImmediateInput(instr, 1)) { \
401 if (HasRegisterInput(instr, 0)) { \
402 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
403 } else { \
404 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
405 } \
406 } else { \
407 if (HasRegisterInput(instr, 1)) { \
408 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
409 } else { \
410 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
411 } \
412 } \
413 } \
414 } while (false)
415
416 #define ASSEMBLE_COMPARE(asm_instr) \
417 do { \
418 if (HasAddressingMode(instr)) { \
419 size_t index = 0; \
420 Operand left = i.MemoryOperand(&index); \
421 if (HasImmediateInput(instr, index)) { \
422 __ asm_instr(left, i.InputImmediate(index)); \
423 } else { \
424 __ asm_instr(left, i.InputRegister(index)); \
425 } \
426 } else { \
427 if (HasImmediateInput(instr, 1)) { \
428 if (HasRegisterInput(instr, 0)) { \
429 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
430 } else { \
431 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
432 } \
433 } else { \
434 if (HasRegisterInput(instr, 1)) { \
435 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
436 } else { \
437 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
438 } \
439 } \
440 } \
441 } while (false)
442
443 #define ASSEMBLE_MULT(asm_instr) \
444 do { \
445 if (HasImmediateInput(instr, 1)) { \
446 if (HasRegisterInput(instr, 0)) { \
447 __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
448 i.InputImmediate(1)); \
449 } else { \
450 __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
451 i.InputImmediate(1)); \
452 } \
453 } else { \
454 if (HasRegisterInput(instr, 1)) { \
455 __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
456 } else { \
457 __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
458 } \
459 } \
460 } while (false)
461
462 #define ASSEMBLE_SHIFT(asm_instr, width) \
463 do { \
464 if (HasImmediateInput(instr, 1)) { \
465 if (instr->Output()->IsRegister()) { \
466 __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
467 } else { \
468 __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
469 } \
470 } else { \
471 if (instr->Output()->IsRegister()) { \
472 __ asm_instr##_cl(i.OutputRegister()); \
473 } else { \
474 __ asm_instr##_cl(i.OutputOperand()); \
475 } \
476 } \
477 } while (false)
478
479 #define ASSEMBLE_MOVX(asm_instr) \
480 do { \
481 if (HasAddressingMode(instr)) { \
482 __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
483 } else if (HasRegisterInput(instr, 0)) { \
484 __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
485 } else { \
486 __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
487 } \
488 } while (false)
489
490 #define ASSEMBLE_SSE_BINOP(asm_instr) \
491 do { \
492 if (HasAddressingMode(instr)) { \
493 size_t index = 1; \
494 Operand right = i.MemoryOperand(&index); \
495 __ asm_instr(i.InputDoubleRegister(0), right); \
496 } else { \
497 if (instr->InputAt(1)->IsFPRegister()) { \
498 __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
499 } else { \
500 __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
501 } \
502 } \
503 } while (false)
504
505 #define ASSEMBLE_SSE_UNOP(asm_instr) \
506 do { \
507 if (instr->InputAt(0)->IsFPRegister()) { \
508 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
509 } else { \
510 __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
511 } \
512 } while (false)
513
514 #define ASSEMBLE_AVX_BINOP(asm_instr) \
515 do { \
516 CpuFeatureScope avx_scope(tasm(), AVX); \
517 if (HasAddressingMode(instr)) { \
518 size_t index = 1; \
519 Operand right = i.MemoryOperand(&index); \
520 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
521 } else { \
522 if (instr->InputAt(1)->IsFPRegister()) { \
523 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
524 i.InputDoubleRegister(1)); \
525 } else { \
526 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
527 i.InputOperand(1)); \
528 } \
529 } \
530 } while (false)
531
532 #define ASSEMBLE_IEEE754_BINOP(name) \
533 do { \
534 __ PrepareCallCFunction(2); \
535 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
536 } while (false)
537
538 #define ASSEMBLE_IEEE754_UNOP(name) \
539 do { \
540 __ PrepareCallCFunction(1); \
541 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
542 } while (false)
543
544 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
545 do { \
546 Label binop; \
547 __ bind(&binop); \
548 __ mov_inst(rax, i.MemoryOperand(1)); \
549 __ movl(i.TempRegister(0), rax); \
550 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
551 __ lock(); \
552 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
553 __ j(not_equal, &binop); \
554 } while (false)
555
556 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
557 do { \
558 Label binop; \
559 __ bind(&binop); \
560 __ mov_inst(rax, i.MemoryOperand(1)); \
561 __ movq(i.TempRegister(0), rax); \
562 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
563 __ lock(); \
564 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
565 __ j(not_equal, &binop); \
566 } while (false)
567
568 // Handles both SSE and AVX codegen. For SSE we use DefineSameAsFirst, so the
569 // dst and first src will be the same. For AVX we don't restrict it that way, so
570 // we will omit unnecessary moves.
571 #define ASSEMBLE_SIMD_BINOP(opcode) \
572 do { \
573 if (CpuFeatures::IsSupported(AVX)) { \
574 CpuFeatureScope avx_scope(tasm(), AVX); \
575 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
576 i.InputSimd128Register(1)); \
577 } else { \
578 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
579 __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1)); \
580 } \
581 } while (false)
582
583 #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
584 do { \
585 if (instr->InputAt(index)->IsSimd128Register()) { \
586 __ opcode(dst_operand, i.InputSimd128Register(index)); \
587 } else { \
588 __ opcode(dst_operand, i.InputOperand(index)); \
589 } \
590 } while (false)
591
592 #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
593 do { \
594 if (instr->InputAt(index)->IsSimd128Register()) { \
595 __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
596 } else { \
597 __ opcode(dst_operand, i.InputOperand(index), imm); \
598 } \
599 } while (false)
600
601 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
602 do { \
603 XMMRegister dst = i.OutputSimd128Register(); \
604 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
605 byte input_index = instr->InputCount() == 2 ? 1 : 0; \
606 ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
607 } while (false)
608
609 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
610 do { \
611 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
612 if (instr->InputAt(1)->IsSimd128Register()) { \
613 __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
614 } else { \
615 __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
616 } \
617 } while (false)
618
619 #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
620 do { \
621 Register dst = i.OutputRegister(); \
622 XMMRegister tmp = i.TempSimd128Register(0); \
623 __ xorq(dst, dst); \
624 __ Pxor(tmp, tmp); \
625 __ opcode(tmp, i.InputSimd128Register(0)); \
626 __ Ptest(tmp, tmp); \
627 __ setcc(equal, dst); \
628 } while (false)
629
630 // This macro will directly emit the opcode if the shift is an immediate - the
631 // shift value will be taken modulo 2^width. Otherwise, it will emit code to
632 // perform the modulus operation.
633 #define ASSEMBLE_SIMD_SHIFT(opcode, width) \
634 do { \
635 XMMRegister dst = i.OutputSimd128Register(); \
636 if (HasImmediateInput(instr, 1)) { \
637 if (CpuFeatures::IsSupported(AVX)) { \
638 CpuFeatureScope avx_scope(tasm(), AVX); \
639 __ v##opcode(dst, i.InputSimd128Register(0), \
640 byte{i.InputInt##width(1)}); \
641 } else { \
642 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
643 __ opcode(dst, byte{i.InputInt##width(1)}); \
644 } \
645 } else { \
646 XMMRegister tmp = i.TempSimd128Register(0); \
647 Register tmp_shift = i.TempRegister(1); \
648 constexpr int mask = (1 << width) - 1; \
649 __ movq(tmp_shift, i.InputRegister(1)); \
650 __ andq(tmp_shift, Immediate(mask)); \
651 __ Movq(tmp, tmp_shift); \
652 if (CpuFeatures::IsSupported(AVX)) { \
653 CpuFeatureScope avx_scope(tasm(), AVX); \
654 __ v##opcode(dst, i.InputSimd128Register(0), tmp); \
655 } else { \
656 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
657 __ opcode(dst, tmp); \
658 } \
659 } \
660 } while (false)
661
662 #define ASSEMBLE_PINSR(ASM_INSTR) \
663 do { \
664 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
665 XMMRegister dst = i.OutputSimd128Register(); \
666 XMMRegister src = i.InputSimd128Register(0); \
667 uint8_t laneidx = i.InputUint8(1); \
668 if (HasAddressingMode(instr)) { \
669 __ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx); \
670 break; \
671 } \
672 if (instr->InputAt(2)->IsFPRegister()) { \
673 __ Movq(kScratchRegister, i.InputDoubleRegister(2)); \
674 __ ASM_INSTR(dst, src, kScratchRegister, laneidx); \
675 } else if (instr->InputAt(2)->IsRegister()) { \
676 __ ASM_INSTR(dst, src, i.InputRegister(2), laneidx); \
677 } else { \
678 __ ASM_INSTR(dst, src, i.InputOperand(2), laneidx); \
679 } \
680 } while (false)
681
AssembleDeconstructFrame()682 void CodeGenerator::AssembleDeconstructFrame() {
683 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
684 __ movq(rsp, rbp);
685 __ popq(rbp);
686 }
687
AssemblePrepareTailCall()688 void CodeGenerator::AssemblePrepareTailCall() {
689 if (frame_access_state()->has_frame()) {
690 __ movq(rbp, MemOperand(rbp, 0));
691 }
692 frame_access_state()->SetFrameAccessToSP();
693 }
694
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)695 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
696 Register scratch1,
697 Register scratch2,
698 Register scratch3) {
699 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
700 Label done;
701
702 // Check if current frame is an arguments adaptor frame.
703 __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
704 Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
705 __ j(not_equal, &done, Label::kNear);
706
707 // Load arguments count from current arguments adaptor frame (note, it
708 // does not include receiver).
709 Register caller_args_count_reg = scratch1;
710 __ SmiUntag(caller_args_count_reg,
711 Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
712
713 __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
714 __ bind(&done);
715 }
716
717 namespace {
718
AdjustStackPointerForTailCall(Instruction * instr,TurboAssembler * assembler,Linkage * linkage,OptimizedCompilationInfo * info,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)719 void AdjustStackPointerForTailCall(Instruction* instr,
720 TurboAssembler* assembler, Linkage* linkage,
721 OptimizedCompilationInfo* info,
722 FrameAccessState* state,
723 int new_slot_above_sp,
724 bool allow_shrinkage = true) {
725 int stack_slot_delta;
726 if (instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
727 // For this special tail-call mode, the callee has the same arguments and
728 // linkage as the caller, and arguments adapter frames must be preserved.
729 // Thus we simply have reset the stack pointer register to its original
730 // value before frame construction.
731 // See also: AssembleConstructFrame.
732 DCHECK(!info->is_osr());
733 DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedRegisters(), 0);
734 DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedFPRegisters(), 0);
735 DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0);
736 stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() -
737 kReturnAddressStackSlotCount) *
738 -1;
739 DCHECK_LE(stack_slot_delta, 0);
740 } else {
741 int current_sp_offset = state->GetSPToFPSlotCount() +
742 StandardFrameConstants::kFixedSlotCountAboveFp;
743 stack_slot_delta = new_slot_above_sp - current_sp_offset;
744 }
745
746 if (stack_slot_delta > 0) {
747 assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
748 state->IncreaseSPDelta(stack_slot_delta);
749 } else if (allow_shrinkage && stack_slot_delta < 0) {
750 assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
751 state->IncreaseSPDelta(stack_slot_delta);
752 }
753 }
754
SetupSimdImmediateInRegister(TurboAssembler * assembler,uint32_t * imms,XMMRegister reg)755 void SetupSimdImmediateInRegister(TurboAssembler* assembler, uint32_t* imms,
756 XMMRegister reg) {
757 assembler->Move(reg, make_uint64(imms[3], imms[2]),
758 make_uint64(imms[1], imms[0]));
759 }
760
761 } // namespace
762
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)763 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
764 int first_unused_stack_slot) {
765 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
766 ZoneVector<MoveOperands*> pushes(zone());
767 GetPushCompatibleMoves(instr, flags, &pushes);
768
769 if (!pushes.empty() &&
770 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
771 first_unused_stack_slot)) {
772 DCHECK(!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp));
773 X64OperandConverter g(this, instr);
774 for (auto move : pushes) {
775 LocationOperand destination_location(
776 LocationOperand::cast(move->destination()));
777 InstructionOperand source(move->source());
778 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
779 frame_access_state(),
780 destination_location.index());
781 if (source.IsStackSlot()) {
782 LocationOperand source_location(LocationOperand::cast(source));
783 __ Push(g.SlotToOperand(source_location.index()));
784 } else if (source.IsRegister()) {
785 LocationOperand source_location(LocationOperand::cast(source));
786 __ Push(source_location.GetRegister());
787 } else if (source.IsImmediate()) {
788 __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
789 } else {
790 // Pushes of non-scalar data types is not supported.
791 UNIMPLEMENTED();
792 }
793 frame_access_state()->IncreaseSPDelta(1);
794 move->Eliminate();
795 }
796 }
797 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
798 frame_access_state(), first_unused_stack_slot,
799 false);
800 }
801
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)802 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
803 int first_unused_stack_slot) {
804 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
805 frame_access_state(), first_unused_stack_slot);
806 }
807
808 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()809 void CodeGenerator::AssembleCodeStartRegisterCheck() {
810 __ ComputeCodeStartAddress(rbx);
811 __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
812 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
813 }
814
815 // Check if the code object is marked for deoptimization. If it is, then it
816 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
817 // to:
818 // 1. read from memory the word that contains that bit, which can be found in
819 // the flags in the referenced {CodeDataContainer} object;
820 // 2. test kMarkedForDeoptimizationBit in those flags; and
821 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()822 void CodeGenerator::BailoutIfDeoptimized() {
823 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
824 __ LoadTaggedPointerField(rbx,
825 Operand(kJavaScriptCallCodeStartRegister, offset));
826 __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
827 Immediate(1 << Code::kMarkedForDeoptimizationBit));
828 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
829 RelocInfo::CODE_TARGET, not_zero);
830 }
831
GenerateSpeculationPoisonFromCodeStartRegister()832 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
833 // Set a mask which has all bits set in the normal case, but has all
834 // bits cleared if we are speculatively executing the wrong PC.
835 __ ComputeCodeStartAddress(rbx);
836 __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
837 __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
838 __ movq(rbx, Immediate(-1));
839 __ cmovq(equal, kSpeculationPoisonRegister, rbx);
840 }
841
AssembleRegisterArgumentPoisoning()842 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
843 __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
844 __ andq(kContextRegister, kSpeculationPoisonRegister);
845 __ andq(rsp, kSpeculationPoisonRegister);
846 }
847
848 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)849 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
850 Instruction* instr) {
851 X64OperandConverter i(this, instr);
852 InstructionCode opcode = instr->opcode();
853 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
854 switch (arch_opcode) {
855 case kArchCallCodeObject: {
856 if (HasImmediateInput(instr, 0)) {
857 Handle<Code> code = i.InputCode(0);
858 __ Call(code, RelocInfo::CODE_TARGET);
859 } else {
860 Register reg = i.InputRegister(0);
861 DCHECK_IMPLIES(
862 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
863 reg == kJavaScriptCallCodeStartRegister);
864 __ LoadCodeObjectEntry(reg, reg);
865 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
866 __ RetpolineCall(reg);
867 } else {
868 __ call(reg);
869 }
870 }
871 RecordCallPosition(instr);
872 frame_access_state()->ClearSPDelta();
873 break;
874 }
875 case kArchCallBuiltinPointer: {
876 DCHECK(!HasImmediateInput(instr, 0));
877 Register builtin_index = i.InputRegister(0);
878 __ CallBuiltinByIndex(builtin_index);
879 RecordCallPosition(instr);
880 frame_access_state()->ClearSPDelta();
881 break;
882 }
883 case kArchCallWasmFunction: {
884 if (HasImmediateInput(instr, 0)) {
885 Constant constant = i.ToConstant(instr->InputAt(0));
886 Address wasm_code = static_cast<Address>(constant.ToInt64());
887 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
888 __ near_call(wasm_code, constant.rmode());
889 } else {
890 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
891 __ RetpolineCall(wasm_code, constant.rmode());
892 } else {
893 __ Call(wasm_code, constant.rmode());
894 }
895 }
896 } else {
897 Register reg = i.InputRegister(0);
898 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
899 __ RetpolineCall(reg);
900 } else {
901 __ call(reg);
902 }
903 }
904 RecordCallPosition(instr);
905 frame_access_state()->ClearSPDelta();
906 break;
907 }
908 case kArchTailCallCodeObjectFromJSFunction:
909 if (!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
910 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
911 i.TempRegister(0), i.TempRegister(1),
912 i.TempRegister(2));
913 }
914 V8_FALLTHROUGH;
915 case kArchTailCallCodeObject: {
916 if (HasImmediateInput(instr, 0)) {
917 Handle<Code> code = i.InputCode(0);
918 __ Jump(code, RelocInfo::CODE_TARGET);
919 } else {
920 Register reg = i.InputRegister(0);
921 DCHECK_IMPLIES(
922 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
923 reg == kJavaScriptCallCodeStartRegister);
924 __ LoadCodeObjectEntry(reg, reg);
925 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
926 __ RetpolineJump(reg);
927 } else {
928 __ jmp(reg);
929 }
930 }
931 unwinding_info_writer_.MarkBlockWillExit();
932 frame_access_state()->ClearSPDelta();
933 frame_access_state()->SetFrameAccessToDefault();
934 break;
935 }
936 case kArchTailCallWasm: {
937 if (HasImmediateInput(instr, 0)) {
938 Constant constant = i.ToConstant(instr->InputAt(0));
939 Address wasm_code = static_cast<Address>(constant.ToInt64());
940 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
941 __ near_jmp(wasm_code, constant.rmode());
942 } else {
943 __ Move(kScratchRegister, wasm_code, constant.rmode());
944 __ jmp(kScratchRegister);
945 }
946 } else {
947 Register reg = i.InputRegister(0);
948 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
949 __ RetpolineJump(reg);
950 } else {
951 __ jmp(reg);
952 }
953 }
954 unwinding_info_writer_.MarkBlockWillExit();
955 frame_access_state()->ClearSPDelta();
956 frame_access_state()->SetFrameAccessToDefault();
957 break;
958 }
959 case kArchTailCallAddress: {
960 CHECK(!HasImmediateInput(instr, 0));
961 Register reg = i.InputRegister(0);
962 DCHECK_IMPLIES(
963 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
964 reg == kJavaScriptCallCodeStartRegister);
965 if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
966 __ RetpolineJump(reg);
967 } else {
968 __ jmp(reg);
969 }
970 unwinding_info_writer_.MarkBlockWillExit();
971 frame_access_state()->ClearSPDelta();
972 frame_access_state()->SetFrameAccessToDefault();
973 break;
974 }
975 case kArchCallJSFunction: {
976 Register func = i.InputRegister(0);
977 if (FLAG_debug_code) {
978 // Check the function's context matches the context argument.
979 __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
980 __ Assert(equal, AbortReason::kWrongFunctionContext);
981 }
982 static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
983 __ LoadTaggedPointerField(rcx,
984 FieldOperand(func, JSFunction::kCodeOffset));
985 __ CallCodeObject(rcx);
986 frame_access_state()->ClearSPDelta();
987 RecordCallPosition(instr);
988 break;
989 }
990 case kArchPrepareCallCFunction: {
991 // Frame alignment requires using FP-relative frame addressing.
992 frame_access_state()->SetFrameAccessToFP();
993 int const num_parameters = MiscField::decode(instr->opcode());
994 __ PrepareCallCFunction(num_parameters);
995 break;
996 }
997 case kArchSaveCallerRegisters: {
998 fp_mode_ =
999 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1000 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
1001 // kReturnRegister0 should have been saved before entering the stub.
1002 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
1003 DCHECK(IsAligned(bytes, kSystemPointerSize));
1004 DCHECK_EQ(0, frame_access_state()->sp_delta());
1005 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1006 DCHECK(!caller_registers_saved_);
1007 caller_registers_saved_ = true;
1008 break;
1009 }
1010 case kArchRestoreCallerRegisters: {
1011 DCHECK(fp_mode_ ==
1012 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
1013 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
1014 // Don't overwrite the returned value.
1015 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
1016 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
1017 DCHECK_EQ(0, frame_access_state()->sp_delta());
1018 DCHECK(caller_registers_saved_);
1019 caller_registers_saved_ = false;
1020 break;
1021 }
1022 case kArchPrepareTailCall:
1023 AssemblePrepareTailCall();
1024 break;
1025 case kArchCallCFunction: {
1026 int const num_parameters = MiscField::decode(instr->opcode());
1027 Label return_location;
1028 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1029 // Put the return address in a stack slot.
1030 __ leaq(kScratchRegister, Operand(&return_location, 0));
1031 __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
1032 kScratchRegister);
1033 }
1034 if (HasImmediateInput(instr, 0)) {
1035 ExternalReference ref = i.InputExternalReference(0);
1036 __ CallCFunction(ref, num_parameters);
1037 } else {
1038 Register func = i.InputRegister(0);
1039 __ CallCFunction(func, num_parameters);
1040 }
1041 __ bind(&return_location);
1042 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1043 RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
1044 }
1045 frame_access_state()->SetFrameAccessToDefault();
1046 // Ideally, we should decrement SP delta to match the change of stack
1047 // pointer in CallCFunction. However, for certain architectures (e.g.
1048 // ARM), there may be more strict alignment requirement, causing old SP
1049 // to be saved on the stack. In those cases, we can not calculate the SP
1050 // delta statically.
1051 frame_access_state()->ClearSPDelta();
1052 if (caller_registers_saved_) {
1053 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
1054 // Here, we assume the sequence to be:
1055 // kArchSaveCallerRegisters;
1056 // kArchCallCFunction;
1057 // kArchRestoreCallerRegisters;
1058 int bytes =
1059 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
1060 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1061 }
1062 // TODO(tebbi): Do we need an lfence here?
1063 break;
1064 }
1065 case kArchJmp:
1066 AssembleArchJump(i.InputRpo(0));
1067 break;
1068 case kArchBinarySearchSwitch:
1069 AssembleArchBinarySearchSwitch(instr);
1070 break;
1071 case kArchTableSwitch:
1072 AssembleArchTableSwitch(instr);
1073 break;
1074 case kArchComment:
1075 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
1076 break;
1077 case kArchAbortCSAAssert:
1078 DCHECK(i.InputRegister(0) == rdx);
1079 {
1080 // We don't actually want to generate a pile of code for this, so just
1081 // claim there is a stack frame, without generating one.
1082 FrameScope scope(tasm(), StackFrame::NONE);
1083 __ Call(
1084 isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
1085 RelocInfo::CODE_TARGET);
1086 }
1087 __ int3();
1088 unwinding_info_writer_.MarkBlockWillExit();
1089 break;
1090 case kArchDebugBreak:
1091 __ DebugBreak();
1092 break;
1093 case kArchThrowTerminator:
1094 unwinding_info_writer_.MarkBlockWillExit();
1095 break;
1096 case kArchNop:
1097 // don't emit code for nops.
1098 break;
1099 case kArchDeoptimize: {
1100 DeoptimizationExit* exit =
1101 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
1102 __ jmp(exit->label());
1103 break;
1104 }
1105 case kArchRet:
1106 AssembleReturn(instr->InputAt(0));
1107 break;
1108 case kArchFramePointer:
1109 __ movq(i.OutputRegister(), rbp);
1110 break;
1111 case kArchParentFramePointer:
1112 if (frame_access_state()->has_frame()) {
1113 __ movq(i.OutputRegister(), Operand(rbp, 0));
1114 } else {
1115 __ movq(i.OutputRegister(), rbp);
1116 }
1117 break;
1118 case kArchStackPointerGreaterThan: {
1119 // Potentially apply an offset to the current stack pointer before the
1120 // comparison to consider the size difference of an optimized frame versus
1121 // the contained unoptimized frames.
1122
1123 Register lhs_register = rsp;
1124 uint32_t offset;
1125
1126 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
1127 lhs_register = kScratchRegister;
1128 __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1129 }
1130
1131 constexpr size_t kValueIndex = 0;
1132 if (HasAddressingMode(instr)) {
1133 __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1134 } else {
1135 __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1136 }
1137 break;
1138 }
1139 case kArchStackCheckOffset:
1140 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1141 break;
1142 case kArchTruncateDoubleToI: {
1143 auto result = i.OutputRegister();
1144 auto input = i.InputDoubleRegister(0);
1145 auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
1146 this, result, input, DetermineStubCallMode(),
1147 &unwinding_info_writer_);
1148 // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1149 // use of Cvttsd2siq requires the movl below to avoid sign extension.
1150 __ Cvttsd2siq(result, input);
1151 __ cmpq(result, Immediate(1));
1152 __ j(overflow, ool->entry());
1153 __ bind(ool->exit());
1154 __ movl(result, result);
1155 break;
1156 }
1157 case kArchStoreWithWriteBarrier: {
1158 RecordWriteMode mode =
1159 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1160 Register object = i.InputRegister(0);
1161 size_t index = 0;
1162 Operand operand = i.MemoryOperand(&index);
1163 Register value = i.InputRegister(index);
1164 Register scratch0 = i.TempRegister(0);
1165 Register scratch1 = i.TempRegister(1);
1166 auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
1167 scratch0, scratch1, mode,
1168 DetermineStubCallMode());
1169 __ StoreTaggedField(operand, value);
1170 __ CheckPageFlag(object, scratch0,
1171 MemoryChunk::kPointersFromHereAreInterestingMask,
1172 not_zero, ool->entry());
1173 __ bind(ool->exit());
1174 break;
1175 }
1176 case kArchWordPoisonOnSpeculation:
1177 DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1178 __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1179 break;
1180 case kX64MFence:
1181 __ mfence();
1182 break;
1183 case kX64LFence:
1184 __ lfence();
1185 break;
1186 case kArchStackSlot: {
1187 FrameOffset offset =
1188 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1189 Register base = offset.from_stack_pointer() ? rsp : rbp;
1190 __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1191 break;
1192 }
1193 case kIeee754Float64Acos:
1194 ASSEMBLE_IEEE754_UNOP(acos);
1195 break;
1196 case kIeee754Float64Acosh:
1197 ASSEMBLE_IEEE754_UNOP(acosh);
1198 break;
1199 case kIeee754Float64Asin:
1200 ASSEMBLE_IEEE754_UNOP(asin);
1201 break;
1202 case kIeee754Float64Asinh:
1203 ASSEMBLE_IEEE754_UNOP(asinh);
1204 break;
1205 case kIeee754Float64Atan:
1206 ASSEMBLE_IEEE754_UNOP(atan);
1207 break;
1208 case kIeee754Float64Atanh:
1209 ASSEMBLE_IEEE754_UNOP(atanh);
1210 break;
1211 case kIeee754Float64Atan2:
1212 ASSEMBLE_IEEE754_BINOP(atan2);
1213 break;
1214 case kIeee754Float64Cbrt:
1215 ASSEMBLE_IEEE754_UNOP(cbrt);
1216 break;
1217 case kIeee754Float64Cos:
1218 ASSEMBLE_IEEE754_UNOP(cos);
1219 break;
1220 case kIeee754Float64Cosh:
1221 ASSEMBLE_IEEE754_UNOP(cosh);
1222 break;
1223 case kIeee754Float64Exp:
1224 ASSEMBLE_IEEE754_UNOP(exp);
1225 break;
1226 case kIeee754Float64Expm1:
1227 ASSEMBLE_IEEE754_UNOP(expm1);
1228 break;
1229 case kIeee754Float64Log:
1230 ASSEMBLE_IEEE754_UNOP(log);
1231 break;
1232 case kIeee754Float64Log1p:
1233 ASSEMBLE_IEEE754_UNOP(log1p);
1234 break;
1235 case kIeee754Float64Log2:
1236 ASSEMBLE_IEEE754_UNOP(log2);
1237 break;
1238 case kIeee754Float64Log10:
1239 ASSEMBLE_IEEE754_UNOP(log10);
1240 break;
1241 case kIeee754Float64Pow:
1242 ASSEMBLE_IEEE754_BINOP(pow);
1243 break;
1244 case kIeee754Float64Sin:
1245 ASSEMBLE_IEEE754_UNOP(sin);
1246 break;
1247 case kIeee754Float64Sinh:
1248 ASSEMBLE_IEEE754_UNOP(sinh);
1249 break;
1250 case kIeee754Float64Tan:
1251 ASSEMBLE_IEEE754_UNOP(tan);
1252 break;
1253 case kIeee754Float64Tanh:
1254 ASSEMBLE_IEEE754_UNOP(tanh);
1255 break;
1256 case kX64Add32:
1257 ASSEMBLE_BINOP(addl);
1258 break;
1259 case kX64Add:
1260 ASSEMBLE_BINOP(addq);
1261 break;
1262 case kX64Sub32:
1263 ASSEMBLE_BINOP(subl);
1264 break;
1265 case kX64Sub:
1266 ASSEMBLE_BINOP(subq);
1267 break;
1268 case kX64And32:
1269 ASSEMBLE_BINOP(andl);
1270 break;
1271 case kX64And:
1272 ASSEMBLE_BINOP(andq);
1273 break;
1274 case kX64Cmp8:
1275 ASSEMBLE_COMPARE(cmpb);
1276 break;
1277 case kX64Cmp16:
1278 ASSEMBLE_COMPARE(cmpw);
1279 break;
1280 case kX64Cmp32:
1281 ASSEMBLE_COMPARE(cmpl);
1282 break;
1283 case kX64Cmp:
1284 ASSEMBLE_COMPARE(cmpq);
1285 break;
1286 case kX64Test8:
1287 ASSEMBLE_COMPARE(testb);
1288 break;
1289 case kX64Test16:
1290 ASSEMBLE_COMPARE(testw);
1291 break;
1292 case kX64Test32:
1293 ASSEMBLE_COMPARE(testl);
1294 break;
1295 case kX64Test:
1296 ASSEMBLE_COMPARE(testq);
1297 break;
1298 case kX64Imul32:
1299 ASSEMBLE_MULT(imull);
1300 break;
1301 case kX64Imul:
1302 ASSEMBLE_MULT(imulq);
1303 break;
1304 case kX64ImulHigh32:
1305 if (HasRegisterInput(instr, 1)) {
1306 __ imull(i.InputRegister(1));
1307 } else {
1308 __ imull(i.InputOperand(1));
1309 }
1310 break;
1311 case kX64UmulHigh32:
1312 if (HasRegisterInput(instr, 1)) {
1313 __ mull(i.InputRegister(1));
1314 } else {
1315 __ mull(i.InputOperand(1));
1316 }
1317 break;
1318 case kX64Idiv32:
1319 __ cdq();
1320 __ idivl(i.InputRegister(1));
1321 break;
1322 case kX64Idiv:
1323 __ cqo();
1324 __ idivq(i.InputRegister(1));
1325 break;
1326 case kX64Udiv32:
1327 __ xorl(rdx, rdx);
1328 __ divl(i.InputRegister(1));
1329 break;
1330 case kX64Udiv:
1331 __ xorq(rdx, rdx);
1332 __ divq(i.InputRegister(1));
1333 break;
1334 case kX64Not:
1335 ASSEMBLE_UNOP(notq);
1336 break;
1337 case kX64Not32:
1338 ASSEMBLE_UNOP(notl);
1339 break;
1340 case kX64Neg:
1341 ASSEMBLE_UNOP(negq);
1342 break;
1343 case kX64Neg32:
1344 ASSEMBLE_UNOP(negl);
1345 break;
1346 case kX64Or32:
1347 ASSEMBLE_BINOP(orl);
1348 break;
1349 case kX64Or:
1350 ASSEMBLE_BINOP(orq);
1351 break;
1352 case kX64Xor32:
1353 ASSEMBLE_BINOP(xorl);
1354 break;
1355 case kX64Xor:
1356 ASSEMBLE_BINOP(xorq);
1357 break;
1358 case kX64Shl32:
1359 ASSEMBLE_SHIFT(shll, 5);
1360 break;
1361 case kX64Shl:
1362 ASSEMBLE_SHIFT(shlq, 6);
1363 break;
1364 case kX64Shr32:
1365 ASSEMBLE_SHIFT(shrl, 5);
1366 break;
1367 case kX64Shr:
1368 ASSEMBLE_SHIFT(shrq, 6);
1369 break;
1370 case kX64Sar32:
1371 ASSEMBLE_SHIFT(sarl, 5);
1372 break;
1373 case kX64Sar:
1374 ASSEMBLE_SHIFT(sarq, 6);
1375 break;
1376 case kX64Rol32:
1377 ASSEMBLE_SHIFT(roll, 5);
1378 break;
1379 case kX64Rol:
1380 ASSEMBLE_SHIFT(rolq, 6);
1381 break;
1382 case kX64Ror32:
1383 ASSEMBLE_SHIFT(rorl, 5);
1384 break;
1385 case kX64Ror:
1386 ASSEMBLE_SHIFT(rorq, 6);
1387 break;
1388 case kX64Lzcnt:
1389 if (HasRegisterInput(instr, 0)) {
1390 __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1391 } else {
1392 __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1393 }
1394 break;
1395 case kX64Lzcnt32:
1396 if (HasRegisterInput(instr, 0)) {
1397 __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1398 } else {
1399 __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1400 }
1401 break;
1402 case kX64Tzcnt:
1403 if (HasRegisterInput(instr, 0)) {
1404 __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1405 } else {
1406 __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1407 }
1408 break;
1409 case kX64Tzcnt32:
1410 if (HasRegisterInput(instr, 0)) {
1411 __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1412 } else {
1413 __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1414 }
1415 break;
1416 case kX64Popcnt:
1417 if (HasRegisterInput(instr, 0)) {
1418 __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1419 } else {
1420 __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1421 }
1422 break;
1423 case kX64Popcnt32:
1424 if (HasRegisterInput(instr, 0)) {
1425 __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1426 } else {
1427 __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1428 }
1429 break;
1430 case kX64Bswap:
1431 __ bswapq(i.OutputRegister());
1432 break;
1433 case kX64Bswap32:
1434 __ bswapl(i.OutputRegister());
1435 break;
1436 case kSSEFloat32Cmp:
1437 ASSEMBLE_SSE_BINOP(Ucomiss);
1438 break;
1439 case kSSEFloat32Add:
1440 ASSEMBLE_SSE_BINOP(addss);
1441 break;
1442 case kSSEFloat32Sub:
1443 ASSEMBLE_SSE_BINOP(subss);
1444 break;
1445 case kSSEFloat32Mul:
1446 ASSEMBLE_SSE_BINOP(mulss);
1447 break;
1448 case kSSEFloat32Div:
1449 ASSEMBLE_SSE_BINOP(divss);
1450 // Don't delete this mov. It may improve performance on some CPUs,
1451 // when there is a (v)mulss depending on the result.
1452 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1453 break;
1454 case kSSEFloat32Abs: {
1455 // TODO(bmeurer): Use RIP relative 128-bit constants.
1456 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1457 __ Pcmpeqd(tmp, tmp);
1458 __ Psrlq(tmp, 33);
1459 __ Andps(i.OutputDoubleRegister(), tmp);
1460 break;
1461 }
1462 case kSSEFloat32Neg: {
1463 // TODO(bmeurer): Use RIP relative 128-bit constants.
1464 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1465 __ Pcmpeqd(tmp, tmp);
1466 __ Psllq(tmp, 31);
1467 __ Xorps(i.OutputDoubleRegister(), tmp);
1468 break;
1469 }
1470 case kSSEFloat32Sqrt:
1471 ASSEMBLE_SSE_UNOP(sqrtss);
1472 break;
1473 case kSSEFloat32ToFloat64:
1474 ASSEMBLE_SSE_UNOP(Cvtss2sd);
1475 break;
1476 case kSSEFloat32Round: {
1477 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1478 RoundingMode const mode =
1479 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1480 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1481 break;
1482 }
1483 case kSSEFloat32ToInt32:
1484 if (instr->InputAt(0)->IsFPRegister()) {
1485 __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1486 } else {
1487 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1488 }
1489 break;
1490 case kSSEFloat32ToUint32: {
1491 if (instr->InputAt(0)->IsFPRegister()) {
1492 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1493 } else {
1494 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1495 }
1496 break;
1497 }
1498 case kSSEFloat64Cmp:
1499 ASSEMBLE_SSE_BINOP(Ucomisd);
1500 break;
1501 case kSSEFloat64Add:
1502 ASSEMBLE_SSE_BINOP(addsd);
1503 break;
1504 case kSSEFloat64Sub:
1505 ASSEMBLE_SSE_BINOP(subsd);
1506 break;
1507 case kSSEFloat64Mul:
1508 ASSEMBLE_SSE_BINOP(mulsd);
1509 break;
1510 case kSSEFloat64Div:
1511 ASSEMBLE_SSE_BINOP(divsd);
1512 // Don't delete this mov. It may improve performance on some CPUs,
1513 // when there is a (v)mulsd depending on the result.
1514 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1515 break;
1516 case kSSEFloat64Mod: {
1517 __ AllocateStackSpace(kDoubleSize);
1518 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1519 kDoubleSize);
1520 // Move values to st(0) and st(1).
1521 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1522 __ fld_d(Operand(rsp, 0));
1523 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1524 __ fld_d(Operand(rsp, 0));
1525 // Loop while fprem isn't done.
1526 Label mod_loop;
1527 __ bind(&mod_loop);
1528 // This instructions traps on all kinds inputs, but we are assuming the
1529 // floating point control word is set to ignore them all.
1530 __ fprem();
1531 // The following 2 instruction implicitly use rax.
1532 __ fnstsw_ax();
1533 if (CpuFeatures::IsSupported(SAHF)) {
1534 CpuFeatureScope sahf_scope(tasm(), SAHF);
1535 __ sahf();
1536 } else {
1537 __ shrl(rax, Immediate(8));
1538 __ andl(rax, Immediate(0xFF));
1539 __ pushq(rax);
1540 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1541 kSystemPointerSize);
1542 __ popfq();
1543 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1544 -kSystemPointerSize);
1545 }
1546 __ j(parity_even, &mod_loop);
1547 // Move output to stack and clean up.
1548 __ fstp(1);
1549 __ fstp_d(Operand(rsp, 0));
1550 __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1551 __ addq(rsp, Immediate(kDoubleSize));
1552 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1553 -kDoubleSize);
1554 break;
1555 }
1556 case kSSEFloat32Max: {
1557 Label compare_swap, done_compare;
1558 if (instr->InputAt(1)->IsFPRegister()) {
1559 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1560 } else {
1561 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1562 }
1563 auto ool =
1564 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1565 __ j(parity_even, ool->entry());
1566 __ j(above, &done_compare, Label::kNear);
1567 __ j(below, &compare_swap, Label::kNear);
1568 __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1569 __ testl(kScratchRegister, Immediate(1));
1570 __ j(zero, &done_compare, Label::kNear);
1571 __ bind(&compare_swap);
1572 if (instr->InputAt(1)->IsFPRegister()) {
1573 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1574 } else {
1575 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1576 }
1577 __ bind(&done_compare);
1578 __ bind(ool->exit());
1579 break;
1580 }
1581 case kSSEFloat32Min: {
1582 Label compare_swap, done_compare;
1583 if (instr->InputAt(1)->IsFPRegister()) {
1584 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1585 } else {
1586 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1587 }
1588 auto ool =
1589 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1590 __ j(parity_even, ool->entry());
1591 __ j(below, &done_compare, Label::kNear);
1592 __ j(above, &compare_swap, Label::kNear);
1593 if (instr->InputAt(1)->IsFPRegister()) {
1594 __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1595 } else {
1596 __ Movss(kScratchDoubleReg, i.InputOperand(1));
1597 __ Movmskps(kScratchRegister, kScratchDoubleReg);
1598 }
1599 __ testl(kScratchRegister, Immediate(1));
1600 __ j(zero, &done_compare, Label::kNear);
1601 __ bind(&compare_swap);
1602 if (instr->InputAt(1)->IsFPRegister()) {
1603 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1604 } else {
1605 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1606 }
1607 __ bind(&done_compare);
1608 __ bind(ool->exit());
1609 break;
1610 }
1611 case kSSEFloat64Max: {
1612 Label compare_swap, done_compare;
1613 if (instr->InputAt(1)->IsFPRegister()) {
1614 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1615 } else {
1616 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1617 }
1618 auto ool =
1619 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1620 __ j(parity_even, ool->entry());
1621 __ j(above, &done_compare, Label::kNear);
1622 __ j(below, &compare_swap, Label::kNear);
1623 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1624 __ testl(kScratchRegister, Immediate(1));
1625 __ j(zero, &done_compare, Label::kNear);
1626 __ bind(&compare_swap);
1627 if (instr->InputAt(1)->IsFPRegister()) {
1628 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1629 } else {
1630 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1631 }
1632 __ bind(&done_compare);
1633 __ bind(ool->exit());
1634 break;
1635 }
1636 case kSSEFloat64Min: {
1637 Label compare_swap, done_compare;
1638 if (instr->InputAt(1)->IsFPRegister()) {
1639 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1640 } else {
1641 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1642 }
1643 auto ool =
1644 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1645 __ j(parity_even, ool->entry());
1646 __ j(below, &done_compare, Label::kNear);
1647 __ j(above, &compare_swap, Label::kNear);
1648 if (instr->InputAt(1)->IsFPRegister()) {
1649 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1650 } else {
1651 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1652 __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1653 }
1654 __ testl(kScratchRegister, Immediate(1));
1655 __ j(zero, &done_compare, Label::kNear);
1656 __ bind(&compare_swap);
1657 if (instr->InputAt(1)->IsFPRegister()) {
1658 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1659 } else {
1660 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1661 }
1662 __ bind(&done_compare);
1663 __ bind(ool->exit());
1664 break;
1665 }
1666 case kX64F64x2Abs:
1667 case kSSEFloat64Abs: {
1668 // TODO(bmeurer): Use RIP relative 128-bit constants.
1669 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1670 __ Pcmpeqd(tmp, tmp);
1671 __ Psrlq(tmp, 1);
1672 __ Andpd(i.OutputDoubleRegister(), tmp);
1673 break;
1674 }
1675 case kX64F64x2Neg:
1676 case kSSEFloat64Neg: {
1677 // TODO(bmeurer): Use RIP relative 128-bit constants.
1678 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1679 __ Pcmpeqd(tmp, tmp);
1680 __ Psllq(tmp, 63);
1681 __ Xorpd(i.OutputDoubleRegister(), tmp);
1682 break;
1683 }
1684 case kSSEFloat64Sqrt:
1685 ASSEMBLE_SSE_UNOP(Sqrtsd);
1686 break;
1687 case kSSEFloat64Round: {
1688 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1689 RoundingMode const mode =
1690 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1691 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1692 break;
1693 }
1694 case kSSEFloat64ToFloat32:
1695 ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1696 break;
1697 case kSSEFloat64ToInt32:
1698 if (instr->InputAt(0)->IsFPRegister()) {
1699 __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1700 } else {
1701 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1702 }
1703 break;
1704 case kSSEFloat64ToUint32: {
1705 if (instr->InputAt(0)->IsFPRegister()) {
1706 __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1707 } else {
1708 __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1709 }
1710 if (MiscField::decode(instr->opcode())) {
1711 __ AssertZeroExtended(i.OutputRegister());
1712 }
1713 break;
1714 }
1715 case kSSEFloat32ToInt64:
1716 if (instr->InputAt(0)->IsFPRegister()) {
1717 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1718 } else {
1719 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1720 }
1721 if (instr->OutputCount() > 1) {
1722 __ Set(i.OutputRegister(1), 1);
1723 Label done;
1724 Label fail;
1725 __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1726 if (instr->InputAt(0)->IsFPRegister()) {
1727 __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1728 } else {
1729 __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1730 }
1731 // If the input is NaN, then the conversion fails.
1732 __ j(parity_even, &fail, Label::kNear);
1733 // If the input is INT64_MIN, then the conversion succeeds.
1734 __ j(equal, &done, Label::kNear);
1735 __ cmpq(i.OutputRegister(0), Immediate(1));
1736 // If the conversion results in INT64_MIN, but the input was not
1737 // INT64_MIN, then the conversion fails.
1738 __ j(no_overflow, &done, Label::kNear);
1739 __ bind(&fail);
1740 __ Set(i.OutputRegister(1), 0);
1741 __ bind(&done);
1742 }
1743 break;
1744 case kSSEFloat64ToInt64:
1745 if (instr->InputAt(0)->IsFPRegister()) {
1746 __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1747 } else {
1748 __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1749 }
1750 if (instr->OutputCount() > 1) {
1751 __ Set(i.OutputRegister(1), 1);
1752 Label done;
1753 Label fail;
1754 __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1755 if (instr->InputAt(0)->IsFPRegister()) {
1756 __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1757 } else {
1758 __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1759 }
1760 // If the input is NaN, then the conversion fails.
1761 __ j(parity_even, &fail, Label::kNear);
1762 // If the input is INT64_MIN, then the conversion succeeds.
1763 __ j(equal, &done, Label::kNear);
1764 __ cmpq(i.OutputRegister(0), Immediate(1));
1765 // If the conversion results in INT64_MIN, but the input was not
1766 // INT64_MIN, then the conversion fails.
1767 __ j(no_overflow, &done, Label::kNear);
1768 __ bind(&fail);
1769 __ Set(i.OutputRegister(1), 0);
1770 __ bind(&done);
1771 }
1772 break;
1773 case kSSEFloat32ToUint64: {
1774 Label fail;
1775 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1776 if (instr->InputAt(0)->IsFPRegister()) {
1777 __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1778 } else {
1779 __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1780 }
1781 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1782 __ bind(&fail);
1783 break;
1784 }
1785 case kSSEFloat64ToUint64: {
1786 Label fail;
1787 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1788 if (instr->InputAt(0)->IsFPRegister()) {
1789 __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1790 } else {
1791 __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1792 }
1793 if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1794 __ bind(&fail);
1795 break;
1796 }
1797 case kSSEInt32ToFloat64:
1798 if (HasRegisterInput(instr, 0)) {
1799 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1800 } else {
1801 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1802 }
1803 break;
1804 case kSSEInt32ToFloat32:
1805 if (HasRegisterInput(instr, 0)) {
1806 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1807 } else {
1808 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1809 }
1810 break;
1811 case kSSEInt64ToFloat32:
1812 if (HasRegisterInput(instr, 0)) {
1813 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1814 } else {
1815 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1816 }
1817 break;
1818 case kSSEInt64ToFloat64:
1819 if (HasRegisterInput(instr, 0)) {
1820 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1821 } else {
1822 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1823 }
1824 break;
1825 case kSSEUint64ToFloat32:
1826 if (HasRegisterInput(instr, 0)) {
1827 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1828 } else {
1829 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1830 }
1831 break;
1832 case kSSEUint64ToFloat64:
1833 if (HasRegisterInput(instr, 0)) {
1834 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1835 } else {
1836 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1837 }
1838 break;
1839 case kSSEUint32ToFloat64:
1840 if (HasRegisterInput(instr, 0)) {
1841 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1842 } else {
1843 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1844 }
1845 break;
1846 case kSSEUint32ToFloat32:
1847 if (HasRegisterInput(instr, 0)) {
1848 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1849 } else {
1850 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1851 }
1852 break;
1853 case kSSEFloat64ExtractLowWord32:
1854 if (instr->InputAt(0)->IsFPStackSlot()) {
1855 __ movl(i.OutputRegister(), i.InputOperand(0));
1856 } else {
1857 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1858 }
1859 break;
1860 case kSSEFloat64ExtractHighWord32:
1861 if (instr->InputAt(0)->IsFPStackSlot()) {
1862 __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1863 } else {
1864 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1865 }
1866 break;
1867 case kSSEFloat64InsertLowWord32:
1868 if (HasRegisterInput(instr, 1)) {
1869 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1870 } else {
1871 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1872 }
1873 break;
1874 case kSSEFloat64InsertHighWord32:
1875 if (HasRegisterInput(instr, 1)) {
1876 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1877 } else {
1878 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1879 }
1880 break;
1881 case kSSEFloat64LoadLowWord32:
1882 if (HasRegisterInput(instr, 0)) {
1883 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1884 } else {
1885 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1886 }
1887 break;
1888 case kAVXFloat32Cmp: {
1889 CpuFeatureScope avx_scope(tasm(), AVX);
1890 if (instr->InputAt(1)->IsFPRegister()) {
1891 __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1892 } else {
1893 __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1894 }
1895 break;
1896 }
1897 case kAVXFloat32Add:
1898 ASSEMBLE_AVX_BINOP(vaddss);
1899 break;
1900 case kAVXFloat32Sub:
1901 ASSEMBLE_AVX_BINOP(vsubss);
1902 break;
1903 case kAVXFloat32Mul:
1904 ASSEMBLE_AVX_BINOP(vmulss);
1905 break;
1906 case kAVXFloat32Div:
1907 ASSEMBLE_AVX_BINOP(vdivss);
1908 // Don't delete this mov. It may improve performance on some CPUs,
1909 // when there is a (v)mulss depending on the result.
1910 __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1911 break;
1912 case kAVXFloat64Cmp: {
1913 CpuFeatureScope avx_scope(tasm(), AVX);
1914 if (instr->InputAt(1)->IsFPRegister()) {
1915 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1916 } else {
1917 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1918 }
1919 break;
1920 }
1921 case kAVXFloat64Add:
1922 ASSEMBLE_AVX_BINOP(vaddsd);
1923 break;
1924 case kAVXFloat64Sub:
1925 ASSEMBLE_AVX_BINOP(vsubsd);
1926 break;
1927 case kAVXFloat64Mul:
1928 ASSEMBLE_AVX_BINOP(vmulsd);
1929 break;
1930 case kAVXFloat64Div:
1931 ASSEMBLE_AVX_BINOP(vdivsd);
1932 // Don't delete this mov. It may improve performance on some CPUs,
1933 // when there is a (v)mulsd depending on the result.
1934 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1935 break;
1936 case kAVXFloat32Abs: {
1937 // TODO(bmeurer): Use RIP relative 128-bit constants.
1938 CpuFeatureScope avx_scope(tasm(), AVX);
1939 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1940 __ vpcmpeqd(tmp, tmp, tmp);
1941 __ vpsrlq(tmp, tmp, 33);
1942 if (instr->InputAt(0)->IsFPRegister()) {
1943 __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1944 } else {
1945 __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1946 }
1947 break;
1948 }
1949 case kAVXFloat32Neg: {
1950 // TODO(bmeurer): Use RIP relative 128-bit constants.
1951 CpuFeatureScope avx_scope(tasm(), AVX);
1952 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1953 __ vpcmpeqd(tmp, tmp, tmp);
1954 __ vpsllq(tmp, tmp, 31);
1955 if (instr->InputAt(0)->IsFPRegister()) {
1956 __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1957 } else {
1958 __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1959 }
1960 break;
1961 }
1962 case kAVXFloat64Abs: {
1963 // TODO(bmeurer): Use RIP relative 128-bit constants.
1964 CpuFeatureScope avx_scope(tasm(), AVX);
1965 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1966 __ vpcmpeqd(tmp, tmp, tmp);
1967 __ vpsrlq(tmp, tmp, 1);
1968 if (instr->InputAt(0)->IsFPRegister()) {
1969 __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1970 } else {
1971 __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1972 }
1973 break;
1974 }
1975 case kAVXFloat64Neg: {
1976 // TODO(bmeurer): Use RIP relative 128-bit constants.
1977 CpuFeatureScope avx_scope(tasm(), AVX);
1978 XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1979 __ vpcmpeqd(tmp, tmp, tmp);
1980 __ vpsllq(tmp, tmp, 63);
1981 if (instr->InputAt(0)->IsFPRegister()) {
1982 __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1983 } else {
1984 __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1985 }
1986 break;
1987 }
1988 case kSSEFloat64SilenceNaN:
1989 __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1990 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1991 break;
1992 case kX64Movsxbl:
1993 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1994 ASSEMBLE_MOVX(movsxbl);
1995 __ AssertZeroExtended(i.OutputRegister());
1996 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1997 break;
1998 case kX64Movzxbl:
1999 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2000 ASSEMBLE_MOVX(movzxbl);
2001 __ AssertZeroExtended(i.OutputRegister());
2002 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2003 break;
2004 case kX64Movsxbq:
2005 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2006 ASSEMBLE_MOVX(movsxbq);
2007 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2008 break;
2009 case kX64Movzxbq:
2010 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2011 ASSEMBLE_MOVX(movzxbq);
2012 __ AssertZeroExtended(i.OutputRegister());
2013 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2014 break;
2015 case kX64Movb: {
2016 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2017 size_t index = 0;
2018 Operand operand = i.MemoryOperand(&index);
2019 if (HasImmediateInput(instr, index)) {
2020 __ movb(operand, Immediate(i.InputInt8(index)));
2021 } else {
2022 __ movb(operand, i.InputRegister(index));
2023 }
2024 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2025 break;
2026 }
2027 case kX64Movsxwl:
2028 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2029 ASSEMBLE_MOVX(movsxwl);
2030 __ AssertZeroExtended(i.OutputRegister());
2031 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2032 break;
2033 case kX64Movzxwl:
2034 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2035 ASSEMBLE_MOVX(movzxwl);
2036 __ AssertZeroExtended(i.OutputRegister());
2037 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2038 break;
2039 case kX64Movsxwq:
2040 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2041 ASSEMBLE_MOVX(movsxwq);
2042 break;
2043 case kX64Movzxwq:
2044 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2045 ASSEMBLE_MOVX(movzxwq);
2046 __ AssertZeroExtended(i.OutputRegister());
2047 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2048 break;
2049 case kX64Movw: {
2050 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2051 size_t index = 0;
2052 Operand operand = i.MemoryOperand(&index);
2053 if (HasImmediateInput(instr, index)) {
2054 __ movw(operand, Immediate(i.InputInt16(index)));
2055 } else {
2056 __ movw(operand, i.InputRegister(index));
2057 }
2058 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2059 break;
2060 }
2061 case kX64Movl:
2062 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2063 if (instr->HasOutput()) {
2064 if (HasAddressingMode(instr)) {
2065 __ movl(i.OutputRegister(), i.MemoryOperand());
2066 } else {
2067 if (HasRegisterInput(instr, 0)) {
2068 __ movl(i.OutputRegister(), i.InputRegister(0));
2069 } else {
2070 __ movl(i.OutputRegister(), i.InputOperand(0));
2071 }
2072 }
2073 __ AssertZeroExtended(i.OutputRegister());
2074 } else {
2075 size_t index = 0;
2076 Operand operand = i.MemoryOperand(&index);
2077 if (HasImmediateInput(instr, index)) {
2078 __ movl(operand, i.InputImmediate(index));
2079 } else {
2080 __ movl(operand, i.InputRegister(index));
2081 }
2082 }
2083 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2084 break;
2085 case kX64Movsxlq:
2086 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2087 ASSEMBLE_MOVX(movsxlq);
2088 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2089 break;
2090 case kX64MovqDecompressTaggedSigned: {
2091 CHECK(instr->HasOutput());
2092 __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
2093 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2094 break;
2095 }
2096 case kX64MovqDecompressTaggedPointer: {
2097 CHECK(instr->HasOutput());
2098 __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
2099 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2100 break;
2101 }
2102 case kX64MovqDecompressAnyTagged: {
2103 CHECK(instr->HasOutput());
2104 __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
2105 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2106 break;
2107 }
2108 case kX64MovqCompressTagged: {
2109 CHECK(!instr->HasOutput());
2110 size_t index = 0;
2111 Operand operand = i.MemoryOperand(&index);
2112 if (HasImmediateInput(instr, index)) {
2113 __ StoreTaggedField(operand, i.InputImmediate(index));
2114 } else {
2115 __ StoreTaggedField(operand, i.InputRegister(index));
2116 }
2117 break;
2118 }
2119 case kX64Movq:
2120 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2121 if (instr->HasOutput()) {
2122 __ movq(i.OutputRegister(), i.MemoryOperand());
2123 } else {
2124 size_t index = 0;
2125 Operand operand = i.MemoryOperand(&index);
2126 if (HasImmediateInput(instr, index)) {
2127 __ movq(operand, i.InputImmediate(index));
2128 } else {
2129 __ movq(operand, i.InputRegister(index));
2130 }
2131 }
2132 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2133 break;
2134 case kX64Movss:
2135 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2136 if (instr->HasOutput()) {
2137 __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
2138 } else {
2139 size_t index = 0;
2140 Operand operand = i.MemoryOperand(&index);
2141 __ Movss(operand, i.InputDoubleRegister(index));
2142 }
2143 break;
2144 case kX64Movsd: {
2145 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2146 if (instr->HasOutput()) {
2147 const MemoryAccessMode access_mode =
2148 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
2149 if (access_mode == kMemoryAccessPoisoned) {
2150 // If we have to poison the loaded value, we load into a general
2151 // purpose register first, mask it with the poison, and move the
2152 // value from the general purpose register into the double register.
2153 __ movq(kScratchRegister, i.MemoryOperand());
2154 __ andq(kScratchRegister, kSpeculationPoisonRegister);
2155 __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2156 } else {
2157 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2158 }
2159 } else {
2160 size_t index = 0;
2161 Operand operand = i.MemoryOperand(&index);
2162 __ Movsd(operand, i.InputDoubleRegister(index));
2163 }
2164 break;
2165 }
2166 case kX64Movdqu: {
2167 CpuFeatureScope sse_scope(tasm(), SSSE3);
2168 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2169 if (instr->HasOutput()) {
2170 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2171 } else {
2172 size_t index = 0;
2173 Operand operand = i.MemoryOperand(&index);
2174 __ Movdqu(operand, i.InputSimd128Register(index));
2175 }
2176 break;
2177 }
2178 case kX64BitcastFI:
2179 if (instr->InputAt(0)->IsFPStackSlot()) {
2180 __ movl(i.OutputRegister(), i.InputOperand(0));
2181 } else {
2182 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2183 }
2184 break;
2185 case kX64BitcastDL:
2186 if (instr->InputAt(0)->IsFPStackSlot()) {
2187 __ movq(i.OutputRegister(), i.InputOperand(0));
2188 } else {
2189 __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2190 }
2191 break;
2192 case kX64BitcastIF:
2193 if (HasRegisterInput(instr, 0)) {
2194 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2195 } else {
2196 __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
2197 }
2198 break;
2199 case kX64BitcastLD:
2200 if (HasRegisterInput(instr, 0)) {
2201 __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2202 } else {
2203 __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2204 }
2205 break;
2206 case kX64Lea32: {
2207 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2208 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2209 // and addressing mode just happens to work out. The "addl"/"subl" forms
2210 // in these cases are faster based on measurements.
2211 if (i.InputRegister(0) == i.OutputRegister()) {
2212 if (mode == kMode_MRI) {
2213 int32_t constant_summand = i.InputInt32(1);
2214 DCHECK_NE(0, constant_summand);
2215 if (constant_summand > 0) {
2216 __ addl(i.OutputRegister(), Immediate(constant_summand));
2217 } else {
2218 __ subl(i.OutputRegister(),
2219 Immediate(base::NegateWithWraparound(constant_summand)));
2220 }
2221 } else if (mode == kMode_MR1) {
2222 if (i.InputRegister(1) == i.OutputRegister()) {
2223 __ shll(i.OutputRegister(), Immediate(1));
2224 } else {
2225 __ addl(i.OutputRegister(), i.InputRegister(1));
2226 }
2227 } else if (mode == kMode_M2) {
2228 __ shll(i.OutputRegister(), Immediate(1));
2229 } else if (mode == kMode_M4) {
2230 __ shll(i.OutputRegister(), Immediate(2));
2231 } else if (mode == kMode_M8) {
2232 __ shll(i.OutputRegister(), Immediate(3));
2233 } else {
2234 __ leal(i.OutputRegister(), i.MemoryOperand());
2235 }
2236 } else if (mode == kMode_MR1 &&
2237 i.InputRegister(1) == i.OutputRegister()) {
2238 __ addl(i.OutputRegister(), i.InputRegister(0));
2239 } else {
2240 __ leal(i.OutputRegister(), i.MemoryOperand());
2241 }
2242 __ AssertZeroExtended(i.OutputRegister());
2243 break;
2244 }
2245 case kX64Lea: {
2246 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2247 // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2248 // and addressing mode just happens to work out. The "addq"/"subq" forms
2249 // in these cases are faster based on measurements.
2250 if (i.InputRegister(0) == i.OutputRegister()) {
2251 if (mode == kMode_MRI) {
2252 int32_t constant_summand = i.InputInt32(1);
2253 if (constant_summand > 0) {
2254 __ addq(i.OutputRegister(), Immediate(constant_summand));
2255 } else if (constant_summand < 0) {
2256 __ subq(i.OutputRegister(), Immediate(-constant_summand));
2257 }
2258 } else if (mode == kMode_MR1) {
2259 if (i.InputRegister(1) == i.OutputRegister()) {
2260 __ shlq(i.OutputRegister(), Immediate(1));
2261 } else {
2262 __ addq(i.OutputRegister(), i.InputRegister(1));
2263 }
2264 } else if (mode == kMode_M2) {
2265 __ shlq(i.OutputRegister(), Immediate(1));
2266 } else if (mode == kMode_M4) {
2267 __ shlq(i.OutputRegister(), Immediate(2));
2268 } else if (mode == kMode_M8) {
2269 __ shlq(i.OutputRegister(), Immediate(3));
2270 } else {
2271 __ leaq(i.OutputRegister(), i.MemoryOperand());
2272 }
2273 } else if (mode == kMode_MR1 &&
2274 i.InputRegister(1) == i.OutputRegister()) {
2275 __ addq(i.OutputRegister(), i.InputRegister(0));
2276 } else {
2277 __ leaq(i.OutputRegister(), i.MemoryOperand());
2278 }
2279 break;
2280 }
2281 case kX64Dec32:
2282 __ decl(i.OutputRegister());
2283 break;
2284 case kX64Inc32:
2285 __ incl(i.OutputRegister());
2286 break;
2287 case kX64Push:
2288 if (HasAddressingMode(instr)) {
2289 size_t index = 0;
2290 Operand operand = i.MemoryOperand(&index);
2291 __ pushq(operand);
2292 frame_access_state()->IncreaseSPDelta(1);
2293 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2294 kSystemPointerSize);
2295 } else if (HasImmediateInput(instr, 0)) {
2296 __ pushq(i.InputImmediate(0));
2297 frame_access_state()->IncreaseSPDelta(1);
2298 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2299 kSystemPointerSize);
2300 } else if (HasRegisterInput(instr, 0)) {
2301 __ pushq(i.InputRegister(0));
2302 frame_access_state()->IncreaseSPDelta(1);
2303 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2304 kSystemPointerSize);
2305 } else if (instr->InputAt(0)->IsFloatRegister() ||
2306 instr->InputAt(0)->IsDoubleRegister()) {
2307 // TODO(titzer): use another machine instruction?
2308 __ AllocateStackSpace(kDoubleSize);
2309 frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2310 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2311 kDoubleSize);
2312 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2313 } else if (instr->InputAt(0)->IsSimd128Register()) {
2314 // TODO(titzer): use another machine instruction?
2315 __ AllocateStackSpace(kSimd128Size);
2316 frame_access_state()->IncreaseSPDelta(kSimd128Size /
2317 kSystemPointerSize);
2318 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2319 kSimd128Size);
2320 __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2321 } else if (instr->InputAt(0)->IsStackSlot() ||
2322 instr->InputAt(0)->IsFloatStackSlot() ||
2323 instr->InputAt(0)->IsDoubleStackSlot()) {
2324 __ pushq(i.InputOperand(0));
2325 frame_access_state()->IncreaseSPDelta(1);
2326 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2327 kSystemPointerSize);
2328 } else {
2329 DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2330 __ Movups(kScratchDoubleReg, i.InputOperand(0));
2331 // TODO(titzer): use another machine instruction?
2332 __ AllocateStackSpace(kSimd128Size);
2333 frame_access_state()->IncreaseSPDelta(kSimd128Size /
2334 kSystemPointerSize);
2335 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2336 kSimd128Size);
2337 __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2338 }
2339 break;
2340 case kX64Poke: {
2341 int slot = MiscField::decode(instr->opcode());
2342 if (HasImmediateInput(instr, 0)) {
2343 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2344 } else if (instr->InputAt(0)->IsFPRegister()) {
2345 LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
2346 if (op->representation() == MachineRepresentation::kFloat64) {
2347 __ Movsd(Operand(rsp, slot * kSystemPointerSize),
2348 i.InputDoubleRegister(0));
2349 } else {
2350 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2351 __ Movss(Operand(rsp, slot * kSystemPointerSize),
2352 i.InputFloatRegister(0));
2353 }
2354 } else {
2355 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2356 }
2357 break;
2358 }
2359 case kX64Peek: {
2360 int reverse_slot = i.InputInt32(0);
2361 int offset =
2362 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2363 if (instr->OutputAt(0)->IsFPRegister()) {
2364 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2365 if (op->representation() == MachineRepresentation::kFloat64) {
2366 __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2367 } else if (op->representation() == MachineRepresentation::kFloat32) {
2368 __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2369 } else {
2370 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
2371 __ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
2372 }
2373 } else {
2374 __ movq(i.OutputRegister(), Operand(rbp, offset));
2375 }
2376 break;
2377 }
2378 case kX64F64x2Splat: {
2379 XMMRegister dst = i.OutputSimd128Register();
2380 if (instr->InputAt(0)->IsFPRegister()) {
2381 __ Movddup(dst, i.InputDoubleRegister(0));
2382 } else {
2383 __ Movddup(dst, i.InputOperand(0));
2384 }
2385 break;
2386 }
2387 case kX64F64x2ExtractLane: {
2388 __ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2389 __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2390 break;
2391 }
2392 case kX64F64x2Sqrt: {
2393 __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2394 break;
2395 }
2396 case kX64F64x2Add: {
2397 ASSEMBLE_SIMD_BINOP(addpd);
2398 break;
2399 }
2400 case kX64F64x2Sub: {
2401 ASSEMBLE_SIMD_BINOP(subpd);
2402 break;
2403 }
2404 case kX64F64x2Mul: {
2405 ASSEMBLE_SIMD_BINOP(mulpd);
2406 break;
2407 }
2408 case kX64F64x2Div: {
2409 ASSEMBLE_SIMD_BINOP(divpd);
2410 break;
2411 }
2412 case kX64F64x2Min: {
2413 XMMRegister src1 = i.InputSimd128Register(1),
2414 dst = i.OutputSimd128Register();
2415 DCHECK_EQ(dst, i.InputSimd128Register(0));
2416 // The minpd instruction doesn't propagate NaNs and +0's in its first
2417 // operand. Perform minpd in both orders, merge the resuls, and adjust.
2418 __ Movapd(kScratchDoubleReg, src1);
2419 __ Minpd(kScratchDoubleReg, dst);
2420 __ Minpd(dst, src1);
2421 // propagate -0's and NaNs, which may be non-canonical.
2422 __ Orpd(kScratchDoubleReg, dst);
2423 // Canonicalize NaNs by quieting and clearing the payload.
2424 __ Cmppd(dst, kScratchDoubleReg, int8_t{3});
2425 __ Orpd(kScratchDoubleReg, dst);
2426 __ Psrlq(dst, 13);
2427 __ Andnpd(dst, kScratchDoubleReg);
2428 break;
2429 }
2430 case kX64F64x2Max: {
2431 XMMRegister src1 = i.InputSimd128Register(1),
2432 dst = i.OutputSimd128Register();
2433 DCHECK_EQ(dst, i.InputSimd128Register(0));
2434 // The maxpd instruction doesn't propagate NaNs and +0's in its first
2435 // operand. Perform maxpd in both orders, merge the resuls, and adjust.
2436 __ Movapd(kScratchDoubleReg, src1);
2437 __ Maxpd(kScratchDoubleReg, dst);
2438 __ Maxpd(dst, src1);
2439 // Find discrepancies.
2440 __ Xorpd(dst, kScratchDoubleReg);
2441 // Propagate NaNs, which may be non-canonical.
2442 __ Orpd(kScratchDoubleReg, dst);
2443 // Propagate sign discrepancy and (subtle) quiet NaNs.
2444 __ Subpd(kScratchDoubleReg, dst);
2445 // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2446 __ Cmppd(dst, kScratchDoubleReg, int8_t{3});
2447 __ Psrlq(dst, 13);
2448 __ Andnpd(dst, kScratchDoubleReg);
2449 break;
2450 }
2451 case kX64F64x2Eq: {
2452 ASSEMBLE_SIMD_BINOP(cmpeqpd);
2453 break;
2454 }
2455 case kX64F64x2Ne: {
2456 ASSEMBLE_SIMD_BINOP(cmpneqpd);
2457 break;
2458 }
2459 case kX64F64x2Lt: {
2460 ASSEMBLE_SIMD_BINOP(cmpltpd);
2461 break;
2462 }
2463 case kX64F64x2Le: {
2464 ASSEMBLE_SIMD_BINOP(cmplepd);
2465 break;
2466 }
2467 case kX64F64x2Qfma: {
2468 if (CpuFeatures::IsSupported(FMA3)) {
2469 CpuFeatureScope fma3_scope(tasm(), FMA3);
2470 __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
2471 i.InputSimd128Register(2));
2472 } else {
2473 XMMRegister tmp = i.TempSimd128Register(0);
2474 __ Movapd(tmp, i.InputSimd128Register(2));
2475 __ Mulpd(tmp, i.InputSimd128Register(1));
2476 __ Addpd(i.OutputSimd128Register(), tmp);
2477 }
2478 break;
2479 }
2480 case kX64F64x2Qfms: {
2481 if (CpuFeatures::IsSupported(FMA3)) {
2482 CpuFeatureScope fma3_scope(tasm(), FMA3);
2483 __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
2484 i.InputSimd128Register(2));
2485 } else {
2486 XMMRegister tmp = i.TempSimd128Register(0);
2487 __ Movapd(tmp, i.InputSimd128Register(2));
2488 __ Mulpd(tmp, i.InputSimd128Register(1));
2489 __ Subpd(i.OutputSimd128Register(), tmp);
2490 }
2491 break;
2492 }
2493 case kX64F32x4Splat: {
2494 __ Shufps(i.OutputSimd128Register(), i.InputDoubleRegister(0), 0);
2495 break;
2496 }
2497 case kX64F32x4ExtractLane: {
2498 if (CpuFeatures::IsSupported(AVX)) {
2499 CpuFeatureScope avx_scope(tasm(), AVX);
2500 XMMRegister src = i.InputSimd128Register(0);
2501 // vshufps and leave junk in the 3 high lanes.
2502 __ vshufps(i.OutputDoubleRegister(), src, src, i.InputInt8(1));
2503 } else {
2504 __ extractps(kScratchRegister, i.InputSimd128Register(0),
2505 i.InputUint8(1));
2506 __ movd(i.OutputDoubleRegister(), kScratchRegister);
2507 }
2508 break;
2509 }
2510 case kX64F32x4ReplaceLane: {
2511 // The insertps instruction uses imm8[5:4] to indicate the lane
2512 // that needs to be replaced.
2513 byte select = i.InputInt8(1) << 4 & 0x30;
2514 if (instr->InputAt(2)->IsFPRegister()) {
2515 __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2516 select);
2517 } else {
2518 __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2519 }
2520 break;
2521 }
2522 case kX64F32x4SConvertI32x4: {
2523 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2524 break;
2525 }
2526 case kX64F32x4UConvertI32x4: {
2527 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2528 DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2529 XMMRegister dst = i.OutputSimd128Register();
2530 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2531 __ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55}); // get lo 16 bits
2532 __ Psubd(dst, kScratchDoubleReg); // get hi 16 bits
2533 __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2534 __ Psrld(dst, byte{1}); // divide by 2 to get in unsigned range
2535 __ Cvtdq2ps(dst, dst); // convert hi exactly
2536 __ Addps(dst, dst); // double hi, exactly
2537 __ Addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2538 break;
2539 }
2540 case kX64F32x4Abs: {
2541 XMMRegister dst = i.OutputSimd128Register();
2542 XMMRegister src = i.InputSimd128Register(0);
2543 if (dst == src) {
2544 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2545 __ Psrld(kScratchDoubleReg, byte{1});
2546 __ Andps(i.OutputSimd128Register(), kScratchDoubleReg);
2547 } else {
2548 __ Pcmpeqd(dst, dst);
2549 __ Psrld(dst, byte{1});
2550 __ Andps(dst, i.InputSimd128Register(0));
2551 }
2552 break;
2553 }
2554 case kX64F32x4Neg: {
2555 XMMRegister dst = i.OutputSimd128Register();
2556 XMMRegister src = i.InputSimd128Register(0);
2557 if (dst == src) {
2558 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2559 __ Pslld(kScratchDoubleReg, byte{31});
2560 __ Xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2561 } else {
2562 __ Pcmpeqd(dst, dst);
2563 __ Pslld(dst, byte{31});
2564 __ Xorps(dst, i.InputSimd128Register(0));
2565 }
2566 break;
2567 }
2568 case kX64F32x4Sqrt: {
2569 __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2570 break;
2571 }
2572 case kX64F32x4RecipApprox: {
2573 __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2574 break;
2575 }
2576 case kX64F32x4RecipSqrtApprox: {
2577 __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2578 break;
2579 }
2580 case kX64F32x4Add: {
2581 ASSEMBLE_SIMD_BINOP(addps);
2582 break;
2583 }
2584 case kX64F32x4AddHoriz: {
2585 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2586 __ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2587 break;
2588 }
2589 case kX64F32x4Sub: {
2590 ASSEMBLE_SIMD_BINOP(subps);
2591 break;
2592 }
2593 case kX64F32x4Mul: {
2594 ASSEMBLE_SIMD_BINOP(mulps);
2595 break;
2596 }
2597 case kX64F32x4Div: {
2598 ASSEMBLE_SIMD_BINOP(divps);
2599 break;
2600 }
2601 case kX64F32x4Min: {
2602 XMMRegister src1 = i.InputSimd128Register(1),
2603 dst = i.OutputSimd128Register();
2604 DCHECK_EQ(dst, i.InputSimd128Register(0));
2605 // The minps instruction doesn't propagate NaNs and +0's in its first
2606 // operand. Perform minps in both orders, merge the resuls, and adjust.
2607 __ Movaps(kScratchDoubleReg, src1);
2608 __ Minps(kScratchDoubleReg, dst);
2609 __ Minps(dst, src1);
2610 // propagate -0's and NaNs, which may be non-canonical.
2611 __ Orps(kScratchDoubleReg, dst);
2612 // Canonicalize NaNs by quieting and clearing the payload.
2613 __ Cmpps(dst, kScratchDoubleReg, int8_t{3});
2614 __ Orps(kScratchDoubleReg, dst);
2615 __ Psrld(dst, byte{10});
2616 __ Andnps(dst, kScratchDoubleReg);
2617 break;
2618 }
2619 case kX64F32x4Max: {
2620 XMMRegister src1 = i.InputSimd128Register(1),
2621 dst = i.OutputSimd128Register();
2622 DCHECK_EQ(dst, i.InputSimd128Register(0));
2623 // The maxps instruction doesn't propagate NaNs and +0's in its first
2624 // operand. Perform maxps in both orders, merge the resuls, and adjust.
2625 __ Movaps(kScratchDoubleReg, src1);
2626 __ Maxps(kScratchDoubleReg, dst);
2627 __ Maxps(dst, src1);
2628 // Find discrepancies.
2629 __ Xorps(dst, kScratchDoubleReg);
2630 // Propagate NaNs, which may be non-canonical.
2631 __ Orps(kScratchDoubleReg, dst);
2632 // Propagate sign discrepancy and (subtle) quiet NaNs.
2633 __ Subps(kScratchDoubleReg, dst);
2634 // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2635 __ Cmpps(dst, kScratchDoubleReg, int8_t{3});
2636 __ Psrld(dst, byte{10});
2637 __ Andnps(dst, kScratchDoubleReg);
2638 break;
2639 }
2640 case kX64F32x4Eq: {
2641 ASSEMBLE_SIMD_BINOP(cmpeqps);
2642 break;
2643 }
2644 case kX64F32x4Ne: {
2645 ASSEMBLE_SIMD_BINOP(cmpneqps);
2646 break;
2647 }
2648 case kX64F32x4Lt: {
2649 ASSEMBLE_SIMD_BINOP(cmpltps);
2650 break;
2651 }
2652 case kX64F32x4Le: {
2653 ASSEMBLE_SIMD_BINOP(cmpleps);
2654 break;
2655 }
2656 case kX64F32x4Qfma: {
2657 if (CpuFeatures::IsSupported(FMA3)) {
2658 CpuFeatureScope fma3_scope(tasm(), FMA3);
2659 __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2660 i.InputSimd128Register(2));
2661 } else {
2662 XMMRegister tmp = i.TempSimd128Register(0);
2663 __ Movaps(tmp, i.InputSimd128Register(2));
2664 __ Mulps(tmp, i.InputSimd128Register(1));
2665 __ Addps(i.OutputSimd128Register(), tmp);
2666 }
2667 break;
2668 }
2669 case kX64F32x4Qfms: {
2670 if (CpuFeatures::IsSupported(FMA3)) {
2671 CpuFeatureScope fma3_scope(tasm(), FMA3);
2672 __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2673 i.InputSimd128Register(2));
2674 } else {
2675 XMMRegister tmp = i.TempSimd128Register(0);
2676 __ Movaps(tmp, i.InputSimd128Register(2));
2677 __ Mulps(tmp, i.InputSimd128Register(1));
2678 __ Subps(i.OutputSimd128Register(), tmp);
2679 }
2680 break;
2681 }
2682 case kX64F32x4Pmin: {
2683 XMMRegister dst = i.OutputSimd128Register();
2684 DCHECK_EQ(dst, i.InputSimd128Register(0));
2685 __ Minps(dst, i.InputSimd128Register(1));
2686 break;
2687 }
2688 case kX64F32x4Pmax: {
2689 XMMRegister dst = i.OutputSimd128Register();
2690 DCHECK_EQ(dst, i.InputSimd128Register(0));
2691 __ Maxps(dst, i.InputSimd128Register(1));
2692 break;
2693 }
2694 case kX64F32x4Round: {
2695 RoundingMode const mode =
2696 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2697 __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
2698 break;
2699 }
2700 case kX64F64x2Round: {
2701 RoundingMode const mode =
2702 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2703 __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
2704 break;
2705 }
2706 case kX64F64x2Pmin: {
2707 XMMRegister dst = i.OutputSimd128Register();
2708 DCHECK_EQ(dst, i.InputSimd128Register(0));
2709 __ Minpd(dst, i.InputSimd128Register(1));
2710 break;
2711 }
2712 case kX64F64x2Pmax: {
2713 XMMRegister dst = i.OutputSimd128Register();
2714 DCHECK_EQ(dst, i.InputSimd128Register(0));
2715 __ Maxpd(dst, i.InputSimd128Register(1));
2716 break;
2717 }
2718 case kX64I64x2Splat: {
2719 XMMRegister dst = i.OutputSimd128Register();
2720 if (HasRegisterInput(instr, 0)) {
2721 __ Movq(dst, i.InputRegister(0));
2722 } else {
2723 __ Movq(dst, i.InputOperand(0));
2724 }
2725 __ Movddup(dst, dst);
2726 break;
2727 }
2728 case kX64I64x2ExtractLane: {
2729 __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2730 break;
2731 }
2732 case kX64I64x2Neg: {
2733 XMMRegister dst = i.OutputSimd128Register();
2734 XMMRegister src = i.InputSimd128Register(0);
2735 if (dst == src) {
2736 __ Movapd(kScratchDoubleReg, src);
2737 src = kScratchDoubleReg;
2738 }
2739 __ Pxor(dst, dst);
2740 __ Psubq(dst, src);
2741 break;
2742 }
2743 case kX64I64x2BitMask: {
2744 __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
2745 break;
2746 }
2747 case kX64I64x2Shl: {
2748 // Take shift value modulo 2^6.
2749 ASSEMBLE_SIMD_SHIFT(psllq, 6);
2750 break;
2751 }
2752 case kX64I64x2ShrS: {
2753 // TODO(zhin): there is vpsraq but requires AVX512
2754 // ShrS on each quadword one at a time
2755 XMMRegister dst = i.OutputSimd128Register();
2756 XMMRegister src = i.InputSimd128Register(0);
2757 Register tmp = i.ToRegister(instr->TempAt(0));
2758 // Modulo 64 not required as sarq_cl will mask cl to 6 bits.
2759
2760 // lower quadword
2761 __ Pextrq(tmp, src, int8_t{0x0});
2762 __ sarq_cl(tmp);
2763 __ Pinsrq(dst, tmp, uint8_t{0x0});
2764
2765 // upper quadword
2766 __ Pextrq(tmp, src, int8_t{0x1});
2767 __ sarq_cl(tmp);
2768 __ Pinsrq(dst, tmp, uint8_t{0x1});
2769 break;
2770 }
2771 case kX64I64x2Add: {
2772 ASSEMBLE_SIMD_BINOP(paddq);
2773 break;
2774 }
2775 case kX64I64x2Sub: {
2776 ASSEMBLE_SIMD_BINOP(psubq);
2777 break;
2778 }
2779 case kX64I64x2Mul: {
2780 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2781 XMMRegister left = i.InputSimd128Register(0);
2782 XMMRegister right = i.InputSimd128Register(1);
2783 XMMRegister tmp1 = i.TempSimd128Register(0);
2784 XMMRegister tmp2 = i.TempSimd128Register(1);
2785
2786 __ Movaps(tmp1, left);
2787 __ Movaps(tmp2, right);
2788
2789 // Multiply high dword of each qword of left with right.
2790 __ Psrlq(tmp1, 32);
2791 __ Pmuludq(tmp1, right);
2792
2793 // Multiply high dword of each qword of right with left.
2794 __ Psrlq(tmp2, 32);
2795 __ Pmuludq(tmp2, left);
2796
2797 __ Paddq(tmp2, tmp1);
2798 __ Psllq(tmp2, 32);
2799
2800 __ Pmuludq(left, right);
2801 __ Paddq(left, tmp2); // left == dst
2802 break;
2803 }
2804 case kX64I64x2Eq: {
2805 ASSEMBLE_SIMD_BINOP(pcmpeqq);
2806 break;
2807 }
2808 case kX64I64x2ShrU: {
2809 // Take shift value modulo 2^6.
2810 ASSEMBLE_SIMD_SHIFT(psrlq, 6);
2811 break;
2812 }
2813 case kX64I32x4Splat: {
2814 XMMRegister dst = i.OutputSimd128Register();
2815 if (HasRegisterInput(instr, 0)) {
2816 __ Movd(dst, i.InputRegister(0));
2817 } else {
2818 __ Movd(dst, i.InputOperand(0));
2819 }
2820 __ Pshufd(dst, dst, uint8_t{0x0});
2821 break;
2822 }
2823 case kX64I32x4ExtractLane: {
2824 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2825 break;
2826 }
2827 case kX64I32x4SConvertF32x4: {
2828 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2829 XMMRegister dst = i.OutputSimd128Register();
2830 XMMRegister tmp = i.TempSimd128Register(0);
2831 // NAN->0
2832 __ Movaps(tmp, dst);
2833 __ Cmpeqps(tmp, tmp);
2834 __ Pand(dst, tmp);
2835 // Set top bit if >= 0 (but not -0.0!)
2836 __ Pxor(tmp, dst);
2837 // Convert
2838 __ Cvttps2dq(dst, dst);
2839 // Set top bit if >=0 is now < 0
2840 __ Pand(tmp, dst);
2841 __ Psrad(tmp, byte{31});
2842 // Set positive overflow lanes to 0x7FFFFFFF
2843 __ Pxor(dst, tmp);
2844 break;
2845 }
2846 case kX64I32x4SConvertI16x8Low: {
2847 __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2848 break;
2849 }
2850 case kX64I32x4SConvertI16x8High: {
2851 XMMRegister dst = i.OutputSimd128Register();
2852 __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
2853 __ Pmovsxwd(dst, dst);
2854 break;
2855 }
2856 case kX64I32x4Neg: {
2857 XMMRegister dst = i.OutputSimd128Register();
2858 XMMRegister src = i.InputSimd128Register(0);
2859 if (dst == src) {
2860 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2861 __ Psignd(dst, kScratchDoubleReg);
2862 } else {
2863 __ Pxor(dst, dst);
2864 __ Psubd(dst, src);
2865 }
2866 break;
2867 }
2868 case kX64I32x4Shl: {
2869 // Take shift value modulo 2^5.
2870 ASSEMBLE_SIMD_SHIFT(pslld, 5);
2871 break;
2872 }
2873 case kX64I32x4ShrS: {
2874 // Take shift value modulo 2^5.
2875 ASSEMBLE_SIMD_SHIFT(psrad, 5);
2876 break;
2877 }
2878 case kX64I32x4Add: {
2879 ASSEMBLE_SIMD_BINOP(paddd);
2880 break;
2881 }
2882 case kX64I32x4AddHoriz: {
2883 ASSEMBLE_SIMD_BINOP(phaddd);
2884 break;
2885 }
2886 case kX64I32x4Sub: {
2887 ASSEMBLE_SIMD_BINOP(psubd);
2888 break;
2889 }
2890 case kX64I32x4Mul: {
2891 ASSEMBLE_SIMD_BINOP(pmulld);
2892 break;
2893 }
2894 case kX64I32x4MinS: {
2895 ASSEMBLE_SIMD_BINOP(pminsd);
2896 break;
2897 }
2898 case kX64I32x4MaxS: {
2899 ASSEMBLE_SIMD_BINOP(pmaxsd);
2900 break;
2901 }
2902 case kX64I32x4Eq: {
2903 ASSEMBLE_SIMD_BINOP(pcmpeqd);
2904 break;
2905 }
2906 case kX64I32x4Ne: {
2907 XMMRegister tmp = i.TempSimd128Register(0);
2908 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2909 __ Pcmpeqd(tmp, tmp);
2910 __ Pxor(i.OutputSimd128Register(), tmp);
2911 break;
2912 }
2913 case kX64I32x4GtS: {
2914 ASSEMBLE_SIMD_BINOP(pcmpgtd);
2915 break;
2916 }
2917 case kX64I32x4GeS: {
2918 XMMRegister dst = i.OutputSimd128Register();
2919 XMMRegister src = i.InputSimd128Register(1);
2920 __ Pminsd(dst, src);
2921 __ Pcmpeqd(dst, src);
2922 break;
2923 }
2924 case kX64I32x4UConvertF32x4: {
2925 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2926 XMMRegister dst = i.OutputSimd128Register();
2927 XMMRegister tmp = i.TempSimd128Register(0);
2928 XMMRegister tmp2 = i.TempSimd128Register(1);
2929 // NAN->0, negative->0
2930 __ Pxor(tmp2, tmp2);
2931 __ Maxps(dst, tmp2);
2932 // scratch: float representation of max_signed
2933 __ Pcmpeqd(tmp2, tmp2);
2934 __ Psrld(tmp2, uint8_t{1}); // 0x7fffffff
2935 __ Cvtdq2ps(tmp2, tmp2); // 0x4f000000
2936 // tmp: convert (src-max_signed).
2937 // Positive overflow lanes -> 0x7FFFFFFF
2938 // Negative lanes -> 0
2939 __ Movaps(tmp, dst);
2940 __ Subps(tmp, tmp2);
2941 __ Cmpleps(tmp2, tmp);
2942 __ Cvttps2dq(tmp, tmp);
2943 __ Pxor(tmp, tmp2);
2944 __ Pxor(tmp2, tmp2);
2945 __ Pmaxsd(tmp, tmp2);
2946 // convert. Overflow lanes above max_signed will be 0x80000000
2947 __ Cvttps2dq(dst, dst);
2948 // Add (src-max_signed) for overflow lanes.
2949 __ Paddd(dst, tmp);
2950 break;
2951 }
2952 case kX64I32x4UConvertI16x8Low: {
2953 __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2954 break;
2955 }
2956 case kX64I32x4UConvertI16x8High: {
2957 XMMRegister dst = i.OutputSimd128Register();
2958 __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
2959 __ Pmovzxwd(dst, dst);
2960 break;
2961 }
2962 case kX64I32x4ShrU: {
2963 // Take shift value modulo 2^5.
2964 ASSEMBLE_SIMD_SHIFT(psrld, 5);
2965 break;
2966 }
2967 case kX64I32x4MinU: {
2968 ASSEMBLE_SIMD_BINOP(pminud);
2969 break;
2970 }
2971 case kX64I32x4MaxU: {
2972 ASSEMBLE_SIMD_BINOP(pmaxud);
2973 break;
2974 }
2975 case kX64I32x4GtU: {
2976 XMMRegister dst = i.OutputSimd128Register();
2977 XMMRegister src = i.InputSimd128Register(1);
2978 XMMRegister tmp = i.TempSimd128Register(0);
2979 __ Pmaxud(dst, src);
2980 __ Pcmpeqd(dst, src);
2981 __ Pcmpeqd(tmp, tmp);
2982 __ Pxor(dst, tmp);
2983 break;
2984 }
2985 case kX64I32x4GeU: {
2986 XMMRegister dst = i.OutputSimd128Register();
2987 XMMRegister src = i.InputSimd128Register(1);
2988 __ Pminud(dst, src);
2989 __ Pcmpeqd(dst, src);
2990 break;
2991 }
2992 case kX64I32x4Abs: {
2993 __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2994 break;
2995 }
2996 case kX64I32x4BitMask: {
2997 __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
2998 break;
2999 }
3000 case kX64I32x4DotI16x8S: {
3001 ASSEMBLE_SIMD_BINOP(pmaddwd);
3002 break;
3003 }
3004 case kX64S128Const: {
3005 // Emit code for generic constants as all zeros, or ones cases will be
3006 // handled separately by the selector.
3007 XMMRegister dst = i.OutputSimd128Register();
3008 uint32_t imm[4] = {};
3009 for (int j = 0; j < 4; j++) {
3010 imm[j] = i.InputUint32(j);
3011 }
3012 SetupSimdImmediateInRegister(tasm(), imm, dst);
3013 break;
3014 }
3015 case kX64S128Zero: {
3016 XMMRegister dst = i.OutputSimd128Register();
3017 __ Pxor(dst, dst);
3018 break;
3019 }
3020 case kX64S128AllOnes: {
3021 XMMRegister dst = i.OutputSimd128Register();
3022 __ Pcmpeqd(dst, dst);
3023 break;
3024 }
3025 case kX64I16x8Splat: {
3026 XMMRegister dst = i.OutputSimd128Register();
3027 if (HasRegisterInput(instr, 0)) {
3028 __ Movd(dst, i.InputRegister(0));
3029 } else {
3030 __ Movd(dst, i.InputOperand(0));
3031 }
3032 __ Pshuflw(dst, dst, uint8_t{0x0});
3033 __ Pshufd(dst, dst, uint8_t{0x0});
3034 break;
3035 }
3036 case kX64I16x8ExtractLaneS: {
3037 Register dst = i.OutputRegister();
3038 __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
3039 __ movsxwl(dst, dst);
3040 break;
3041 }
3042 case kX64I16x8SConvertI8x16Low: {
3043 __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3044 break;
3045 }
3046 case kX64I16x8SConvertI8x16High: {
3047 XMMRegister dst = i.OutputSimd128Register();
3048 __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
3049 __ Pmovsxbw(dst, dst);
3050 break;
3051 }
3052 case kX64I16x8Neg: {
3053 XMMRegister dst = i.OutputSimd128Register();
3054 XMMRegister src = i.InputSimd128Register(0);
3055 if (dst == src) {
3056 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3057 __ Psignw(dst, kScratchDoubleReg);
3058 } else {
3059 __ Pxor(dst, dst);
3060 __ Psubw(dst, src);
3061 }
3062 break;
3063 }
3064 case kX64I16x8Shl: {
3065 // Take shift value modulo 2^4.
3066 ASSEMBLE_SIMD_SHIFT(psllw, 4);
3067 break;
3068 }
3069 case kX64I16x8ShrS: {
3070 // Take shift value modulo 2^4.
3071 ASSEMBLE_SIMD_SHIFT(psraw, 4);
3072 break;
3073 }
3074 case kX64I16x8SConvertI32x4: {
3075 ASSEMBLE_SIMD_BINOP(packssdw);
3076 break;
3077 }
3078 case kX64I16x8Add: {
3079 ASSEMBLE_SIMD_BINOP(paddw);
3080 break;
3081 }
3082 case kX64I16x8AddSatS: {
3083 ASSEMBLE_SIMD_BINOP(paddsw);
3084 break;
3085 }
3086 case kX64I16x8AddHoriz: {
3087 ASSEMBLE_SIMD_BINOP(phaddw);
3088 break;
3089 }
3090 case kX64I16x8Sub: {
3091 ASSEMBLE_SIMD_BINOP(psubw);
3092 break;
3093 }
3094 case kX64I16x8SubSatS: {
3095 ASSEMBLE_SIMD_BINOP(psubsw);
3096 break;
3097 }
3098 case kX64I16x8Mul: {
3099 ASSEMBLE_SIMD_BINOP(pmullw);
3100 break;
3101 }
3102 case kX64I16x8MinS: {
3103 ASSEMBLE_SIMD_BINOP(pminsw);
3104 break;
3105 }
3106 case kX64I16x8MaxS: {
3107 ASSEMBLE_SIMD_BINOP(pmaxsw);
3108 break;
3109 }
3110 case kX64I16x8Eq: {
3111 ASSEMBLE_SIMD_BINOP(pcmpeqw);
3112 break;
3113 }
3114 case kX64I16x8Ne: {
3115 XMMRegister tmp = i.TempSimd128Register(0);
3116 __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3117 __ Pcmpeqw(tmp, tmp);
3118 __ Pxor(i.OutputSimd128Register(), tmp);
3119 break;
3120 }
3121 case kX64I16x8GtS: {
3122 ASSEMBLE_SIMD_BINOP(pcmpgtw);
3123 break;
3124 }
3125 case kX64I16x8GeS: {
3126 XMMRegister dst = i.OutputSimd128Register();
3127 XMMRegister src = i.InputSimd128Register(1);
3128 __ Pminsw(dst, src);
3129 __ Pcmpeqw(dst, src);
3130 break;
3131 }
3132 case kX64I16x8UConvertI8x16Low: {
3133 __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3134 break;
3135 }
3136 case kX64I16x8UConvertI8x16High: {
3137 XMMRegister dst = i.OutputSimd128Register();
3138 __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
3139 __ Pmovzxbw(dst, dst);
3140 break;
3141 }
3142 case kX64I16x8ShrU: {
3143 // Take shift value modulo 2^4.
3144 ASSEMBLE_SIMD_SHIFT(psrlw, 4);
3145 break;
3146 }
3147 case kX64I16x8UConvertI32x4: {
3148 ASSEMBLE_SIMD_BINOP(packusdw);
3149 break;
3150 }
3151 case kX64I16x8AddSatU: {
3152 ASSEMBLE_SIMD_BINOP(paddusw);
3153 break;
3154 }
3155 case kX64I16x8SubSatU: {
3156 ASSEMBLE_SIMD_BINOP(psubusw);
3157 break;
3158 }
3159 case kX64I16x8MinU: {
3160 ASSEMBLE_SIMD_BINOP(pminuw);
3161 break;
3162 }
3163 case kX64I16x8MaxU: {
3164 ASSEMBLE_SIMD_BINOP(pmaxuw);
3165 break;
3166 }
3167 case kX64I16x8GtU: {
3168 XMMRegister dst = i.OutputSimd128Register();
3169 XMMRegister src = i.InputSimd128Register(1);
3170 XMMRegister tmp = i.TempSimd128Register(0);
3171 __ Pmaxuw(dst, src);
3172 __ Pcmpeqw(dst, src);
3173 __ Pcmpeqw(tmp, tmp);
3174 __ Pxor(dst, tmp);
3175 break;
3176 }
3177 case kX64I16x8GeU: {
3178 XMMRegister dst = i.OutputSimd128Register();
3179 XMMRegister src = i.InputSimd128Register(1);
3180 __ Pminuw(dst, src);
3181 __ Pcmpeqw(dst, src);
3182 break;
3183 }
3184 case kX64I16x8RoundingAverageU: {
3185 ASSEMBLE_SIMD_BINOP(pavgw);
3186 break;
3187 }
3188 case kX64I16x8Abs: {
3189 __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3190 break;
3191 }
3192 case kX64I16x8BitMask: {
3193 Register dst = i.OutputRegister();
3194 XMMRegister tmp = i.TempSimd128Register(0);
3195 __ Packsswb(tmp, i.InputSimd128Register(0));
3196 __ Pmovmskb(dst, tmp);
3197 __ shrq(dst, Immediate(8));
3198 break;
3199 }
3200 case kX64I8x16Splat: {
3201 XMMRegister dst = i.OutputSimd128Register();
3202 if (HasRegisterInput(instr, 0)) {
3203 __ Movd(dst, i.InputRegister(0));
3204 } else {
3205 __ Movd(dst, i.InputOperand(0));
3206 }
3207 __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
3208 __ Pshufb(dst, kScratchDoubleReg);
3209 break;
3210 }
3211 case kX64Pextrb: {
3212 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3213 size_t index = 0;
3214 if (HasAddressingMode(instr)) {
3215 Operand operand = i.MemoryOperand(&index);
3216 __ Pextrb(operand, i.InputSimd128Register(index),
3217 i.InputUint8(index + 1));
3218 } else {
3219 __ Pextrb(i.OutputRegister(), i.InputSimd128Register(0),
3220 i.InputUint8(1));
3221 }
3222 break;
3223 }
3224 case kX64Pextrw: {
3225 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3226 size_t index = 0;
3227 if (HasAddressingMode(instr)) {
3228 Operand operand = i.MemoryOperand(&index);
3229 __ Pextrw(operand, i.InputSimd128Register(index),
3230 i.InputUint8(index + 1));
3231 } else {
3232 __ Pextrw(i.OutputRegister(), i.InputSimd128Register(0),
3233 i.InputUint8(1));
3234 }
3235 break;
3236 }
3237 case kX64I8x16ExtractLaneS: {
3238 Register dst = i.OutputRegister();
3239 __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
3240 __ movsxbl(dst, dst);
3241 break;
3242 }
3243 case kX64Pinsrb: {
3244 ASSEMBLE_PINSR(Pinsrb);
3245 break;
3246 }
3247 case kX64Pinsrw: {
3248 ASSEMBLE_PINSR(Pinsrw);
3249 break;
3250 }
3251 case kX64Pinsrd: {
3252 ASSEMBLE_PINSR(Pinsrd);
3253 break;
3254 }
3255 case kX64Pinsrq: {
3256 ASSEMBLE_PINSR(Pinsrq);
3257 break;
3258 }
3259 case kX64I8x16SConvertI16x8: {
3260 ASSEMBLE_SIMD_BINOP(packsswb);
3261 break;
3262 }
3263 case kX64I8x16Neg: {
3264 XMMRegister dst = i.OutputSimd128Register();
3265 XMMRegister src = i.InputSimd128Register(0);
3266 if (dst == src) {
3267 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3268 __ Psignb(dst, kScratchDoubleReg);
3269 } else {
3270 __ Pxor(dst, dst);
3271 __ Psubb(dst, src);
3272 }
3273 break;
3274 }
3275 case kX64I8x16Shl: {
3276 XMMRegister dst = i.OutputSimd128Register();
3277 DCHECK_EQ(dst, i.InputSimd128Register(0));
3278 // Temp registers for shift mask and additional moves to XMM registers.
3279 Register tmp = i.ToRegister(instr->TempAt(0));
3280 XMMRegister tmp_simd = i.TempSimd128Register(1);
3281 if (HasImmediateInput(instr, 1)) {
3282 // Perform 16-bit shift, then mask away low bits.
3283 uint8_t shift = i.InputInt3(1);
3284 __ Psllw(dst, byte{shift});
3285
3286 uint8_t bmask = static_cast<uint8_t>(0xff << shift);
3287 uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3288 __ movl(tmp, Immediate(mask));
3289 __ Movd(tmp_simd, tmp);
3290 __ Pshufd(tmp_simd, tmp_simd, uint8_t{0});
3291 __ Pand(dst, tmp_simd);
3292 } else {
3293 // Mask off the unwanted bits before word-shifting.
3294 __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3295 // Take shift value modulo 8.
3296 __ movq(tmp, i.InputRegister(1));
3297 __ andq(tmp, Immediate(7));
3298 __ addq(tmp, Immediate(8));
3299 __ Movq(tmp_simd, tmp);
3300 __ Psrlw(kScratchDoubleReg, tmp_simd);
3301 __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
3302 __ Pand(dst, kScratchDoubleReg);
3303 // TODO(zhin): subq here to avoid asking for another temporary register,
3304 // examine codegen for other i8x16 shifts, they use less instructions.
3305 __ subq(tmp, Immediate(8));
3306 __ Movq(tmp_simd, tmp);
3307 __ Psllw(dst, tmp_simd);
3308 }
3309 break;
3310 }
3311 case kX64I8x16ShrS: {
3312 XMMRegister dst = i.OutputSimd128Register();
3313 DCHECK_EQ(dst, i.InputSimd128Register(0));
3314 if (HasImmediateInput(instr, 1)) {
3315 __ Punpckhbw(kScratchDoubleReg, dst);
3316 __ Punpcklbw(dst, dst);
3317 uint8_t shift = i.InputInt3(1) + 8;
3318 __ Psraw(kScratchDoubleReg, shift);
3319 __ Psraw(dst, shift);
3320 __ Packsswb(dst, kScratchDoubleReg);
3321 } else {
3322 // Temp registers for shift mask andadditional moves to XMM registers.
3323 Register tmp = i.ToRegister(instr->TempAt(0));
3324 XMMRegister tmp_simd = i.TempSimd128Register(1);
3325 // Unpack the bytes into words, do arithmetic shifts, and repack.
3326 __ Punpckhbw(kScratchDoubleReg, dst);
3327 __ Punpcklbw(dst, dst);
3328 // Prepare shift value
3329 __ movq(tmp, i.InputRegister(1));
3330 // Take shift value modulo 8.
3331 __ andq(tmp, Immediate(7));
3332 __ addq(tmp, Immediate(8));
3333 __ Movq(tmp_simd, tmp);
3334 __ Psraw(kScratchDoubleReg, tmp_simd);
3335 __ Psraw(dst, tmp_simd);
3336 __ Packsswb(dst, kScratchDoubleReg);
3337 }
3338 break;
3339 }
3340 case kX64I8x16Add: {
3341 ASSEMBLE_SIMD_BINOP(paddb);
3342 break;
3343 }
3344 case kX64I8x16AddSatS: {
3345 ASSEMBLE_SIMD_BINOP(paddsb);
3346 break;
3347 }
3348 case kX64I8x16Sub: {
3349 ASSEMBLE_SIMD_BINOP(psubb);
3350 break;
3351 }
3352 case kX64I8x16SubSatS: {
3353 ASSEMBLE_SIMD_BINOP(psubsb);
3354 break;
3355 }
3356 case kX64I8x16Mul: {
3357 XMMRegister dst = i.OutputSimd128Register();
3358 DCHECK_EQ(dst, i.InputSimd128Register(0));
3359 XMMRegister right = i.InputSimd128Register(1);
3360 XMMRegister tmp = i.TempSimd128Register(0);
3361 // I16x8 view of I8x16
3362 // left = AAaa AAaa ... AAaa AAaa
3363 // right= BBbb BBbb ... BBbb BBbb
3364 // t = 00AA 00AA ... 00AA 00AA
3365 // s = 00BB 00BB ... 00BB 00BB
3366 __ Movaps(tmp, dst);
3367 __ Movaps(kScratchDoubleReg, right);
3368 __ Psrlw(tmp, byte{8});
3369 __ Psrlw(kScratchDoubleReg, byte{8});
3370 // dst = left * 256
3371 __ Psllw(dst, byte{8});
3372 // t = I16x8Mul(t, s)
3373 // => __PP __PP ... __PP __PP
3374 __ Pmullw(tmp, kScratchDoubleReg);
3375 // dst = I16x8Mul(left * 256, right)
3376 // => pp__ pp__ ... pp__ pp__
3377 __ Pmullw(dst, right);
3378 // t = I16x8Shl(t, 8)
3379 // => PP00 PP00 ... PP00 PP00
3380 __ Psllw(tmp, byte{8});
3381 // dst = I16x8Shr(dst, 8)
3382 // => 00pp 00pp ... 00pp 00pp
3383 __ Psrlw(dst, byte{8});
3384 // dst = I16x8Or(dst, t)
3385 // => PPpp PPpp ... PPpp PPpp
3386 __ Por(dst, tmp);
3387 break;
3388 }
3389 case kX64I8x16MinS: {
3390 ASSEMBLE_SIMD_BINOP(pminsb);
3391 break;
3392 }
3393 case kX64I8x16MaxS: {
3394 ASSEMBLE_SIMD_BINOP(pmaxsb);
3395 break;
3396 }
3397 case kX64I8x16Eq: {
3398 ASSEMBLE_SIMD_BINOP(pcmpeqb);
3399 break;
3400 }
3401 case kX64I8x16Ne: {
3402 XMMRegister tmp = i.TempSimd128Register(0);
3403 __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3404 __ Pcmpeqb(tmp, tmp);
3405 __ Pxor(i.OutputSimd128Register(), tmp);
3406 break;
3407 }
3408 case kX64I8x16GtS: {
3409 ASSEMBLE_SIMD_BINOP(pcmpgtb);
3410 break;
3411 }
3412 case kX64I8x16GeS: {
3413 XMMRegister dst = i.OutputSimd128Register();
3414 XMMRegister src = i.InputSimd128Register(1);
3415 __ Pminsb(dst, src);
3416 __ Pcmpeqb(dst, src);
3417 break;
3418 }
3419 case kX64I8x16UConvertI16x8: {
3420 ASSEMBLE_SIMD_BINOP(packuswb);
3421 break;
3422 }
3423 case kX64I8x16ShrU: {
3424 XMMRegister dst = i.OutputSimd128Register();
3425 // Unpack the bytes into words, do logical shifts, and repack.
3426 DCHECK_EQ(dst, i.InputSimd128Register(0));
3427 // Temp registers for shift mask andadditional moves to XMM registers.
3428 Register tmp = i.ToRegister(instr->TempAt(0));
3429 XMMRegister tmp_simd = i.TempSimd128Register(1);
3430 if (HasImmediateInput(instr, 1)) {
3431 // Perform 16-bit shift, then mask away high bits.
3432 uint8_t shift = i.InputInt3(1);
3433 __ Psrlw(dst, byte{shift});
3434
3435 uint8_t bmask = 0xff >> shift;
3436 uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3437 __ movl(tmp, Immediate(mask));
3438 __ Movd(tmp_simd, tmp);
3439 __ Pshufd(tmp_simd, tmp_simd, byte{0});
3440 __ Pand(dst, tmp_simd);
3441 } else {
3442 __ Punpckhbw(kScratchDoubleReg, dst);
3443 __ Punpcklbw(dst, dst);
3444 // Prepare shift value
3445 __ movq(tmp, i.InputRegister(1));
3446 // Take shift value modulo 8.
3447 __ andq(tmp, Immediate(7));
3448 __ addq(tmp, Immediate(8));
3449 __ Movq(tmp_simd, tmp);
3450 __ Psrlw(kScratchDoubleReg, tmp_simd);
3451 __ Psrlw(dst, tmp_simd);
3452 __ Packuswb(dst, kScratchDoubleReg);
3453 }
3454 break;
3455 }
3456 case kX64I8x16AddSatU: {
3457 ASSEMBLE_SIMD_BINOP(paddusb);
3458 break;
3459 }
3460 case kX64I8x16SubSatU: {
3461 ASSEMBLE_SIMD_BINOP(psubusb);
3462 break;
3463 }
3464 case kX64I8x16MinU: {
3465 ASSEMBLE_SIMD_BINOP(pminub);
3466 break;
3467 }
3468 case kX64I8x16MaxU: {
3469 ASSEMBLE_SIMD_BINOP(pmaxub);
3470 break;
3471 }
3472 case kX64I8x16GtU: {
3473 XMMRegister dst = i.OutputSimd128Register();
3474 XMMRegister src = i.InputSimd128Register(1);
3475 XMMRegister tmp = i.TempSimd128Register(0);
3476 __ Pmaxub(dst, src);
3477 __ Pcmpeqb(dst, src);
3478 __ Pcmpeqb(tmp, tmp);
3479 __ Pxor(dst, tmp);
3480 break;
3481 }
3482 case kX64I8x16GeU: {
3483 XMMRegister dst = i.OutputSimd128Register();
3484 XMMRegister src = i.InputSimd128Register(1);
3485 __ Pminub(dst, src);
3486 __ Pcmpeqb(dst, src);
3487 break;
3488 }
3489 case kX64I8x16RoundingAverageU: {
3490 ASSEMBLE_SIMD_BINOP(pavgb);
3491 break;
3492 }
3493 case kX64I8x16Abs: {
3494 __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3495 break;
3496 }
3497 case kX64I8x16BitMask: {
3498 __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
3499 break;
3500 }
3501 case kX64I8x16SignSelect: {
3502 __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3503 i.InputSimd128Register(1), i.InputSimd128Register(2));
3504 break;
3505 }
3506 case kX64I16x8SignSelect: {
3507 if (CpuFeatures::IsSupported(AVX)) {
3508 CpuFeatureScope avx_scope(tasm(), AVX);
3509 __ vpsraw(kScratchDoubleReg, i.InputSimd128Register(2), 15);
3510 __ vpblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3511 i.InputSimd128Register(1), kScratchDoubleReg);
3512 } else {
3513 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3514 XMMRegister mask = i.InputSimd128Register(2);
3515 DCHECK_EQ(xmm0, mask);
3516 __ movapd(kScratchDoubleReg, mask);
3517 __ pxor(mask, mask);
3518 __ pcmpgtw(mask, kScratchDoubleReg);
3519 __ pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3520 // Restore mask.
3521 __ movapd(mask, kScratchDoubleReg);
3522 }
3523 break;
3524 }
3525 case kX64I32x4SignSelect: {
3526 __ Blendvps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3527 i.InputSimd128Register(1), i.InputSimd128Register(2));
3528 break;
3529 }
3530 case kX64I64x2SignSelect: {
3531 __ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
3532 i.InputSimd128Register(1), i.InputSimd128Register(2));
3533 break;
3534 }
3535 case kX64S128And: {
3536 ASSEMBLE_SIMD_BINOP(pand);
3537 break;
3538 }
3539 case kX64S128Or: {
3540 ASSEMBLE_SIMD_BINOP(por);
3541 break;
3542 }
3543 case kX64S128Xor: {
3544 ASSEMBLE_SIMD_BINOP(pxor);
3545 break;
3546 }
3547 case kX64S128Not: {
3548 XMMRegister dst = i.OutputSimd128Register();
3549 XMMRegister src = i.InputSimd128Register(0);
3550 if (dst == src) {
3551 __ Movaps(kScratchDoubleReg, dst);
3552 __ Pcmpeqd(dst, dst);
3553 __ Pxor(dst, kScratchDoubleReg);
3554 } else {
3555 __ Pcmpeqd(dst, dst);
3556 __ Pxor(dst, src);
3557 }
3558
3559 break;
3560 }
3561 case kX64S128Select: {
3562 // Mask used here is stored in dst.
3563 XMMRegister dst = i.OutputSimd128Register();
3564 __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3565 __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3566 __ Andps(dst, kScratchDoubleReg);
3567 __ Xorps(dst, i.InputSimd128Register(2));
3568 break;
3569 }
3570 case kX64S128AndNot: {
3571 XMMRegister dst = i.OutputSimd128Register();
3572 DCHECK_EQ(dst, i.InputSimd128Register(0));
3573 // The inputs have been inverted by instruction selector, so we can call
3574 // andnps here without any modifications.
3575 __ Andnps(dst, i.InputSimd128Register(1));
3576 break;
3577 }
3578 case kX64I8x16Swizzle: {
3579 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3580 XMMRegister dst = i.OutputSimd128Register();
3581 XMMRegister mask = i.TempSimd128Register(0);
3582
3583 // Out-of-range indices should return 0, add 112 so that any value > 15
3584 // saturates to 128 (top bit set), so pshufb will zero that lane.
3585 __ Move(mask, uint32_t{0x70707070});
3586 __ Pshufd(mask, mask, uint8_t{0x0});
3587 __ Paddusb(mask, i.InputSimd128Register(1));
3588 __ Pshufb(dst, mask);
3589 break;
3590 }
3591 case kX64I8x16Shuffle: {
3592 XMMRegister dst = i.OutputSimd128Register();
3593 XMMRegister tmp_simd = i.TempSimd128Register(0);
3594 if (instr->InputCount() == 5) { // only one input operand
3595 uint32_t mask[4] = {};
3596 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3597 for (int j = 4; j > 0; j--) {
3598 mask[j - 1] = i.InputUint32(j);
3599 }
3600
3601 SetupSimdImmediateInRegister(tasm(), mask, tmp_simd);
3602 __ Pshufb(dst, tmp_simd);
3603 } else { // two input operands
3604 DCHECK_EQ(6, instr->InputCount());
3605 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0);
3606 uint32_t mask1[4] = {};
3607 for (int j = 5; j > 1; j--) {
3608 uint32_t lanes = i.InputUint32(j);
3609 for (int k = 0; k < 32; k += 8) {
3610 uint8_t lane = lanes >> k;
3611 mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3612 }
3613 }
3614 SetupSimdImmediateInRegister(tasm(), mask1, tmp_simd);
3615 __ Pshufb(kScratchDoubleReg, tmp_simd);
3616 uint32_t mask2[4] = {};
3617 if (instr->InputAt(1)->IsSimd128Register()) {
3618 XMMRegister src1 = i.InputSimd128Register(1);
3619 if (src1 != dst) __ movups(dst, src1);
3620 } else {
3621 __ Movups(dst, i.InputOperand(1));
3622 }
3623 for (int j = 5; j > 1; j--) {
3624 uint32_t lanes = i.InputUint32(j);
3625 for (int k = 0; k < 32; k += 8) {
3626 uint8_t lane = lanes >> k;
3627 mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3628 }
3629 }
3630 SetupSimdImmediateInRegister(tasm(), mask2, tmp_simd);
3631 __ Pshufb(dst, tmp_simd);
3632 __ Por(dst, kScratchDoubleReg);
3633 }
3634 break;
3635 }
3636 case kX64S128Load8Splat: {
3637 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3638 XMMRegister dst = i.OutputSimd128Register();
3639 __ Pinsrb(dst, dst, i.MemoryOperand(), 0);
3640 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3641 __ Pshufb(dst, kScratchDoubleReg);
3642 break;
3643 }
3644 case kX64S128Load16Splat: {
3645 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3646 XMMRegister dst = i.OutputSimd128Register();
3647 __ Pinsrw(dst, dst, i.MemoryOperand(), 0);
3648 __ Pshuflw(dst, dst, uint8_t{0});
3649 __ Punpcklqdq(dst, dst);
3650 break;
3651 }
3652 case kX64S128Load32Splat: {
3653 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3654 if (CpuFeatures::IsSupported(AVX)) {
3655 CpuFeatureScope avx_scope(tasm(), AVX);
3656 __ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
3657 } else {
3658 __ movss(i.OutputSimd128Register(), i.MemoryOperand());
3659 __ shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
3660 byte{0});
3661 }
3662 break;
3663 }
3664 case kX64S128Load64Splat: {
3665 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3666 __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3667 break;
3668 }
3669 case kX64S128Load8x8S: {
3670 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3671 __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3672 break;
3673 }
3674 case kX64S128Load8x8U: {
3675 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3676 __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3677 break;
3678 }
3679 case kX64S128Load16x4S: {
3680 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3681 __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3682 break;
3683 }
3684 case kX64S128Load16x4U: {
3685 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3686 __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3687 break;
3688 }
3689 case kX64S128Load32x2S: {
3690 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3691 __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3692 break;
3693 }
3694 case kX64S128Load32x2U: {
3695 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3696 __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3697 break;
3698 }
3699 case kX64S128Store32Lane: {
3700 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3701 size_t index = 0;
3702 Operand operand = i.MemoryOperand(&index);
3703 uint8_t lane = i.InputUint8(index + 1);
3704 if (lane == 0) {
3705 __ Movss(operand, i.InputSimd128Register(index));
3706 } else {
3707 DCHECK_GE(3, lane);
3708 __ Extractps(operand, i.InputSimd128Register(index), lane);
3709 }
3710 break;
3711 }
3712 case kX64S128Store64Lane: {
3713 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3714 size_t index = 0;
3715 Operand operand = i.MemoryOperand(&index);
3716 uint8_t lane = i.InputUint8(index + 1);
3717 if (lane == 0) {
3718 __ Movlps(operand, i.InputSimd128Register(index));
3719 } else {
3720 DCHECK_EQ(1, lane);
3721 __ Movhps(operand, i.InputSimd128Register(index));
3722 }
3723 break;
3724 }
3725 case kX64S32x4Swizzle: {
3726 DCHECK_EQ(2, instr->InputCount());
3727 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
3728 i.InputUint8(1));
3729 break;
3730 }
3731 case kX64S32x4Shuffle: {
3732 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3733 uint8_t shuffle = i.InputUint8(2);
3734 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3735 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
3736 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
3737 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3738 break;
3739 }
3740 case kX64S16x8Blend: {
3741 ASSEMBLE_SIMD_IMM_SHUFFLE(Pblendw, i.InputUint8(2));
3742 break;
3743 }
3744 case kX64S16x8HalfShuffle1: {
3745 XMMRegister dst = i.OutputSimd128Register();
3746 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(1));
3747 __ Pshufhw(dst, dst, i.InputUint8(2));
3748 break;
3749 }
3750 case kX64S16x8HalfShuffle2: {
3751 XMMRegister dst = i.OutputSimd128Register();
3752 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
3753 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3754 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
3755 __ Pshufhw(dst, dst, i.InputUint8(3));
3756 __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3757 break;
3758 }
3759 case kX64S8x16Alignr: {
3760 ASSEMBLE_SIMD_IMM_SHUFFLE(Palignr, i.InputUint8(2));
3761 break;
3762 }
3763 case kX64S16x8Dup: {
3764 XMMRegister dst = i.OutputSimd128Register();
3765 uint8_t lane = i.InputInt8(1) & 0x7;
3766 uint8_t lane4 = lane & 0x3;
3767 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3768 if (lane < 4) {
3769 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
3770 __ Pshufd(dst, dst, uint8_t{0});
3771 } else {
3772 ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
3773 __ Pshufd(dst, dst, uint8_t{0xaa});
3774 }
3775 break;
3776 }
3777 case kX64S8x16Dup: {
3778 XMMRegister dst = i.OutputSimd128Register();
3779 uint8_t lane = i.InputInt8(1) & 0xf;
3780 DCHECK_EQ(dst, i.InputSimd128Register(0));
3781 if (lane < 8) {
3782 __ Punpcklbw(dst, dst);
3783 } else {
3784 __ Punpckhbw(dst, dst);
3785 }
3786 lane &= 0x7;
3787 uint8_t lane4 = lane & 0x3;
3788 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3789 if (lane < 4) {
3790 __ Pshuflw(dst, dst, half_dup);
3791 __ Pshufd(dst, dst, uint8_t{0});
3792 } else {
3793 __ Pshufhw(dst, dst, half_dup);
3794 __ Pshufd(dst, dst, uint8_t{0xaa});
3795 }
3796 break;
3797 }
3798 case kX64S64x2UnpackHigh:
3799 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhqdq);
3800 break;
3801 case kX64S32x4UnpackHigh:
3802 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhdq);
3803 break;
3804 case kX64S16x8UnpackHigh:
3805 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhwd);
3806 break;
3807 case kX64S8x16UnpackHigh:
3808 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhbw);
3809 break;
3810 case kX64S64x2UnpackLow:
3811 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklqdq);
3812 break;
3813 case kX64S32x4UnpackLow:
3814 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckldq);
3815 break;
3816 case kX64S16x8UnpackLow:
3817 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklwd);
3818 break;
3819 case kX64S8x16UnpackLow:
3820 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklbw);
3821 break;
3822 case kX64S16x8UnzipHigh: {
3823 XMMRegister dst = i.OutputSimd128Register();
3824 XMMRegister src2 = dst;
3825 DCHECK_EQ(dst, i.InputSimd128Register(0));
3826 if (instr->InputCount() == 2) {
3827 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3828 __ Psrld(kScratchDoubleReg, byte{16});
3829 src2 = kScratchDoubleReg;
3830 }
3831 __ Psrld(dst, byte{16});
3832 __ Packusdw(dst, src2);
3833 break;
3834 }
3835 case kX64S16x8UnzipLow: {
3836 XMMRegister dst = i.OutputSimd128Register();
3837 XMMRegister src2 = dst;
3838 DCHECK_EQ(dst, i.InputSimd128Register(0));
3839 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3840 if (instr->InputCount() == 2) {
3841 ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
3842 src2 = kScratchDoubleReg;
3843 }
3844 __ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
3845 __ Packusdw(dst, src2);
3846 break;
3847 }
3848 case kX64S8x16UnzipHigh: {
3849 XMMRegister dst = i.OutputSimd128Register();
3850 XMMRegister src2 = dst;
3851 DCHECK_EQ(dst, i.InputSimd128Register(0));
3852 if (instr->InputCount() == 2) {
3853 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3854 __ Psrlw(kScratchDoubleReg, byte{8});
3855 src2 = kScratchDoubleReg;
3856 }
3857 __ Psrlw(dst, byte{8});
3858 __ Packuswb(dst, src2);
3859 break;
3860 }
3861 case kX64S8x16UnzipLow: {
3862 XMMRegister dst = i.OutputSimd128Register();
3863 XMMRegister src2 = dst;
3864 DCHECK_EQ(dst, i.InputSimd128Register(0));
3865 if (instr->InputCount() == 2) {
3866 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3867 __ Psllw(kScratchDoubleReg, byte{8});
3868 __ Psrlw(kScratchDoubleReg, byte{8});
3869 src2 = kScratchDoubleReg;
3870 }
3871 __ Psllw(dst, byte{8});
3872 __ Psrlw(dst, byte{8});
3873 __ Packuswb(dst, src2);
3874 break;
3875 }
3876 case kX64S8x16TransposeLow: {
3877 XMMRegister dst = i.OutputSimd128Register();
3878 DCHECK_EQ(dst, i.InputSimd128Register(0));
3879 __ Psllw(dst, byte{8});
3880 if (instr->InputCount() == 1) {
3881 __ Movups(kScratchDoubleReg, dst);
3882 } else {
3883 DCHECK_EQ(2, instr->InputCount());
3884 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3885 __ Psllw(kScratchDoubleReg, byte{8});
3886 }
3887 __ Psrlw(dst, byte{8});
3888 __ Por(dst, kScratchDoubleReg);
3889 break;
3890 }
3891 case kX64S8x16TransposeHigh: {
3892 XMMRegister dst = i.OutputSimd128Register();
3893 DCHECK_EQ(dst, i.InputSimd128Register(0));
3894 __ Psrlw(dst, byte{8});
3895 if (instr->InputCount() == 1) {
3896 __ Movups(kScratchDoubleReg, dst);
3897 } else {
3898 DCHECK_EQ(2, instr->InputCount());
3899 ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3900 __ Psrlw(kScratchDoubleReg, byte{8});
3901 }
3902 __ Psllw(kScratchDoubleReg, byte{8});
3903 __ Por(dst, kScratchDoubleReg);
3904 break;
3905 }
3906 case kX64S8x8Reverse:
3907 case kX64S8x4Reverse:
3908 case kX64S8x2Reverse: {
3909 DCHECK_EQ(1, instr->InputCount());
3910 XMMRegister dst = i.OutputSimd128Register();
3911 DCHECK_EQ(dst, i.InputSimd128Register(0));
3912 if (arch_opcode != kX64S8x2Reverse) {
3913 // First shuffle words into position.
3914 uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3915 __ Pshuflw(dst, dst, shuffle_mask);
3916 __ Pshufhw(dst, dst, shuffle_mask);
3917 }
3918 __ Movaps(kScratchDoubleReg, dst);
3919 __ Psrlw(kScratchDoubleReg, byte{8});
3920 __ Psllw(dst, byte{8});
3921 __ Por(dst, kScratchDoubleReg);
3922 break;
3923 }
3924 case kX64V32x4AnyTrue:
3925 case kX64V16x8AnyTrue:
3926 case kX64V8x16AnyTrue: {
3927 Register dst = i.OutputRegister();
3928 XMMRegister src = i.InputSimd128Register(0);
3929
3930 __ xorq(dst, dst);
3931 __ Ptest(src, src);
3932 __ setcc(not_equal, dst);
3933 break;
3934 }
3935 // Need to split up all the different lane structures because the
3936 // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
3937 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
3938 // respectively.
3939 case kX64V32x4AllTrue: {
3940 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
3941 break;
3942 }
3943 case kX64V16x8AllTrue: {
3944 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
3945 break;
3946 }
3947 case kX64V8x16AllTrue: {
3948 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
3949 break;
3950 }
3951 case kWord32AtomicExchangeInt8: {
3952 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3953 __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3954 break;
3955 }
3956 case kWord32AtomicExchangeUint8: {
3957 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3958 __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3959 break;
3960 }
3961 case kWord32AtomicExchangeInt16: {
3962 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3963 __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3964 break;
3965 }
3966 case kWord32AtomicExchangeUint16: {
3967 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3968 __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3969 break;
3970 }
3971 case kWord32AtomicExchangeWord32: {
3972 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3973 break;
3974 }
3975 case kWord32AtomicCompareExchangeInt8: {
3976 __ lock();
3977 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3978 __ movsxbl(rax, rax);
3979 break;
3980 }
3981 case kWord32AtomicCompareExchangeUint8: {
3982 __ lock();
3983 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3984 __ movzxbl(rax, rax);
3985 break;
3986 }
3987 case kWord32AtomicCompareExchangeInt16: {
3988 __ lock();
3989 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3990 __ movsxwl(rax, rax);
3991 break;
3992 }
3993 case kWord32AtomicCompareExchangeUint16: {
3994 __ lock();
3995 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3996 __ movzxwl(rax, rax);
3997 break;
3998 }
3999 case kWord32AtomicCompareExchangeWord32: {
4000 __ lock();
4001 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4002 break;
4003 }
4004 #define ATOMIC_BINOP_CASE(op, inst) \
4005 case kWord32Atomic##op##Int8: \
4006 ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
4007 __ movsxbl(rax, rax); \
4008 break; \
4009 case kWord32Atomic##op##Uint8: \
4010 ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
4011 __ movzxbl(rax, rax); \
4012 break; \
4013 case kWord32Atomic##op##Int16: \
4014 ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
4015 __ movsxwl(rax, rax); \
4016 break; \
4017 case kWord32Atomic##op##Uint16: \
4018 ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
4019 __ movzxwl(rax, rax); \
4020 break; \
4021 case kWord32Atomic##op##Word32: \
4022 ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
4023 break;
4024 ATOMIC_BINOP_CASE(Add, addl)
4025 ATOMIC_BINOP_CASE(Sub, subl)
4026 ATOMIC_BINOP_CASE(And, andl)
4027 ATOMIC_BINOP_CASE(Or, orl)
4028 ATOMIC_BINOP_CASE(Xor, xorl)
4029 #undef ATOMIC_BINOP_CASE
4030 case kX64Word64AtomicExchangeUint8: {
4031 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4032 __ movzxbq(i.InputRegister(0), i.InputRegister(0));
4033 break;
4034 }
4035 case kX64Word64AtomicExchangeUint16: {
4036 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4037 __ movzxwq(i.InputRegister(0), i.InputRegister(0));
4038 break;
4039 }
4040 case kX64Word64AtomicExchangeUint32: {
4041 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
4042 break;
4043 }
4044 case kX64Word64AtomicExchangeUint64: {
4045 __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
4046 break;
4047 }
4048 case kX64Word64AtomicCompareExchangeUint8: {
4049 __ lock();
4050 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4051 __ movzxbq(rax, rax);
4052 break;
4053 }
4054 case kX64Word64AtomicCompareExchangeUint16: {
4055 __ lock();
4056 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4057 __ movzxwq(rax, rax);
4058 break;
4059 }
4060 case kX64Word64AtomicCompareExchangeUint32: {
4061 __ lock();
4062 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4063 // Zero-extend the 32 bit value to 64 bit.
4064 __ movl(rax, rax);
4065 break;
4066 }
4067 case kX64Word64AtomicCompareExchangeUint64: {
4068 __ lock();
4069 __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
4070 break;
4071 }
4072 #define ATOMIC64_BINOP_CASE(op, inst) \
4073 case kX64Word64Atomic##op##Uint8: \
4074 ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
4075 __ movzxbq(rax, rax); \
4076 break; \
4077 case kX64Word64Atomic##op##Uint16: \
4078 ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
4079 __ movzxwq(rax, rax); \
4080 break; \
4081 case kX64Word64Atomic##op##Uint32: \
4082 ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
4083 break; \
4084 case kX64Word64Atomic##op##Uint64: \
4085 ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
4086 break;
4087 ATOMIC64_BINOP_CASE(Add, addq)
4088 ATOMIC64_BINOP_CASE(Sub, subq)
4089 ATOMIC64_BINOP_CASE(And, andq)
4090 ATOMIC64_BINOP_CASE(Or, orq)
4091 ATOMIC64_BINOP_CASE(Xor, xorq)
4092 #undef ATOMIC64_BINOP_CASE
4093 case kWord32AtomicLoadInt8:
4094 case kWord32AtomicLoadUint8:
4095 case kWord32AtomicLoadInt16:
4096 case kWord32AtomicLoadUint16:
4097 case kWord32AtomicLoadWord32:
4098 case kWord32AtomicStoreWord8:
4099 case kWord32AtomicStoreWord16:
4100 case kWord32AtomicStoreWord32:
4101 case kX64Word64AtomicLoadUint8:
4102 case kX64Word64AtomicLoadUint16:
4103 case kX64Word64AtomicLoadUint32:
4104 case kX64Word64AtomicLoadUint64:
4105 case kX64Word64AtomicStoreWord8:
4106 case kX64Word64AtomicStoreWord16:
4107 case kX64Word64AtomicStoreWord32:
4108 case kX64Word64AtomicStoreWord64:
4109 UNREACHABLE(); // Won't be generated by instruction selector.
4110 break;
4111 }
4112 return kSuccess;
4113 } // NOLadability/fn_size)
4114
4115 #undef ASSEMBLE_UNOP
4116 #undef ASSEMBLE_BINOP
4117 #undef ASSEMBLE_COMPARE
4118 #undef ASSEMBLE_MULT
4119 #undef ASSEMBLE_SHIFT
4120 #undef ASSEMBLE_MOVX
4121 #undef ASSEMBLE_SSE_BINOP
4122 #undef ASSEMBLE_SSE_UNOP
4123 #undef ASSEMBLE_AVX_BINOP
4124 #undef ASSEMBLE_IEEE754_BINOP
4125 #undef ASSEMBLE_IEEE754_UNOP
4126 #undef ASSEMBLE_ATOMIC_BINOP
4127 #undef ASSEMBLE_ATOMIC64_BINOP
4128 #undef ASSEMBLE_SIMD_INSTR
4129 #undef ASSEMBLE_SIMD_IMM_INSTR
4130 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4131 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4132 #undef ASSEMBLE_SIMD_ALL_TRUE
4133 #undef ASSEMBLE_SIMD_SHIFT
4134
4135 namespace {
4136
FlagsConditionToCondition(FlagsCondition condition)4137 Condition FlagsConditionToCondition(FlagsCondition condition) {
4138 switch (condition) {
4139 case kUnorderedEqual:
4140 case kEqual:
4141 return equal;
4142 case kUnorderedNotEqual:
4143 case kNotEqual:
4144 return not_equal;
4145 case kSignedLessThan:
4146 return less;
4147 case kSignedGreaterThanOrEqual:
4148 return greater_equal;
4149 case kSignedLessThanOrEqual:
4150 return less_equal;
4151 case kSignedGreaterThan:
4152 return greater;
4153 case kUnsignedLessThan:
4154 return below;
4155 case kUnsignedGreaterThanOrEqual:
4156 return above_equal;
4157 case kUnsignedLessThanOrEqual:
4158 return below_equal;
4159 case kUnsignedGreaterThan:
4160 return above;
4161 case kOverflow:
4162 return overflow;
4163 case kNotOverflow:
4164 return no_overflow;
4165 default:
4166 break;
4167 }
4168 UNREACHABLE();
4169 }
4170
4171 } // namespace
4172
4173 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4174 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4175 Label::Distance flabel_distance =
4176 branch->fallthru ? Label::kNear : Label::kFar;
4177 Label* tlabel = branch->true_label;
4178 Label* flabel = branch->false_label;
4179 if (branch->condition == kUnorderedEqual) {
4180 __ j(parity_even, flabel, flabel_distance);
4181 } else if (branch->condition == kUnorderedNotEqual) {
4182 __ j(parity_even, tlabel);
4183 }
4184 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4185
4186 if (!branch->fallthru) __ jmp(flabel, flabel_distance);
4187 }
4188
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)4189 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
4190 Instruction* instr) {
4191 // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
4192 if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
4193 return;
4194 }
4195
4196 condition = NegateFlagsCondition(condition);
4197 __ movl(kScratchRegister, Immediate(0));
4198 __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
4199 kScratchRegister);
4200 }
4201
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4202 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4203 BranchInfo* branch) {
4204 Label::Distance flabel_distance =
4205 branch->fallthru ? Label::kNear : Label::kFar;
4206 Label* tlabel = branch->true_label;
4207 Label* flabel = branch->false_label;
4208 Label nodeopt;
4209 if (branch->condition == kUnorderedEqual) {
4210 __ j(parity_even, flabel, flabel_distance);
4211 } else if (branch->condition == kUnorderedNotEqual) {
4212 __ j(parity_even, tlabel);
4213 }
4214 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4215
4216 if (FLAG_deopt_every_n_times > 0) {
4217 ExternalReference counter =
4218 ExternalReference::stress_deopt_count(isolate());
4219
4220 __ pushfq();
4221 __ pushq(rax);
4222 __ load_rax(counter);
4223 __ decl(rax);
4224 __ j(not_zero, &nodeopt, Label::kNear);
4225
4226 __ Set(rax, FLAG_deopt_every_n_times);
4227 __ store_rax(counter);
4228 __ popq(rax);
4229 __ popfq();
4230 __ jmp(tlabel);
4231
4232 __ bind(&nodeopt);
4233 __ store_rax(counter);
4234 __ popq(rax);
4235 __ popfq();
4236 }
4237
4238 if (!branch->fallthru) {
4239 __ jmp(flabel, flabel_distance);
4240 }
4241 }
4242
AssembleArchJump(RpoNumber target)4243 void CodeGenerator::AssembleArchJump(RpoNumber target) {
4244 if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
4245 }
4246
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4247 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4248 FlagsCondition condition) {
4249 auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
4250 Label* tlabel = ool->entry();
4251 Label end;
4252 if (condition == kUnorderedEqual) {
4253 __ j(parity_even, &end, Label::kNear);
4254 } else if (condition == kUnorderedNotEqual) {
4255 __ j(parity_even, tlabel);
4256 }
4257 __ j(FlagsConditionToCondition(condition), tlabel);
4258 __ bind(&end);
4259 }
4260
4261 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4262 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4263 FlagsCondition condition) {
4264 X64OperandConverter i(this, instr);
4265 Label done;
4266
4267 // Materialize a full 64-bit 1 or 0 value. The result register is always the
4268 // last output of the instruction.
4269 Label check;
4270 DCHECK_NE(0u, instr->OutputCount());
4271 Register reg = i.OutputRegister(instr->OutputCount() - 1);
4272 if (condition == kUnorderedEqual) {
4273 __ j(parity_odd, &check, Label::kNear);
4274 __ movl(reg, Immediate(0));
4275 __ jmp(&done, Label::kNear);
4276 } else if (condition == kUnorderedNotEqual) {
4277 __ j(parity_odd, &check, Label::kNear);
4278 __ movl(reg, Immediate(1));
4279 __ jmp(&done, Label::kNear);
4280 }
4281 __ bind(&check);
4282 __ setcc(FlagsConditionToCondition(condition), reg);
4283 __ movzxbl(reg, reg);
4284 __ bind(&done);
4285 }
4286
AssembleArchBinarySearchSwitch(Instruction * instr)4287 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4288 X64OperandConverter i(this, instr);
4289 Register input = i.InputRegister(0);
4290 std::vector<std::pair<int32_t, Label*>> cases;
4291 for (size_t index = 2; index < instr->InputCount(); index += 2) {
4292 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4293 }
4294 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4295 cases.data() + cases.size());
4296 }
4297
AssembleArchTableSwitch(Instruction * instr)4298 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4299 X64OperandConverter i(this, instr);
4300 Register input = i.InputRegister(0);
4301 int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
4302 Label** cases = zone()->NewArray<Label*>(case_count);
4303 for (int32_t index = 0; index < case_count; ++index) {
4304 cases[index] = GetLabel(i.InputRpo(index + 2));
4305 }
4306 Label* const table = AddJumpTable(cases, case_count);
4307 __ cmpl(input, Immediate(case_count));
4308 __ j(above_equal, GetLabel(i.InputRpo(1)));
4309 __ leaq(kScratchRegister, Operand(table));
4310 __ jmp(Operand(kScratchRegister, input, times_8, 0));
4311 }
4312
4313 namespace {
4314
4315 static const int kQuadWordSize = 16;
4316
4317 } // namespace
4318
FinishFrame(Frame * frame)4319 void CodeGenerator::FinishFrame(Frame* frame) {
4320 CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor();
4321
4322 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4323 if (saves_fp != 0) {
4324 frame->AlignSavedCalleeRegisterSlots();
4325 if (saves_fp != 0) { // Save callee-saved XMM registers.
4326 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4327 frame->AllocateSavedCalleeRegisterSlots(
4328 saves_fp_count * (kQuadWordSize / kSystemPointerSize));
4329 }
4330 }
4331 const RegList saves = call_descriptor->CalleeSavedRegisters();
4332 if (saves != 0) { // Save callee-saved registers.
4333 int count = 0;
4334 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4335 if (((1 << i) & saves)) {
4336 ++count;
4337 }
4338 }
4339 frame->AllocateSavedCalleeRegisterSlots(count);
4340 }
4341 }
4342
AssembleConstructFrame()4343 void CodeGenerator::AssembleConstructFrame() {
4344 auto call_descriptor = linkage()->GetIncomingDescriptor();
4345 if (frame_access_state()->has_frame()) {
4346 int pc_base = __ pc_offset();
4347
4348 if (call_descriptor->IsCFunctionCall()) {
4349 __ pushq(rbp);
4350 __ movq(rbp, rsp);
4351 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4352 __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4353 // Reserve stack space for saving the c_entry_fp later.
4354 __ AllocateStackSpace(kSystemPointerSize);
4355 }
4356 } else if (call_descriptor->IsJSFunctionCall()) {
4357 __ Prologue();
4358 } else {
4359 __ StubPrologue(info()->GetOutputStackFrameType());
4360 if (call_descriptor->IsWasmFunctionCall()) {
4361 __ pushq(kWasmInstanceRegister);
4362 } else if (call_descriptor->IsWasmImportWrapper() ||
4363 call_descriptor->IsWasmCapiFunction()) {
4364 // Wasm import wrappers are passed a tuple in the place of the instance.
4365 // Unpack the tuple into the instance and the target callable.
4366 // This must be done here in the codegen because it cannot be expressed
4367 // properly in the graph.
4368 __ LoadTaggedPointerField(
4369 kJSFunctionRegister,
4370 FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
4371 __ LoadTaggedPointerField(
4372 kWasmInstanceRegister,
4373 FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
4374 __ pushq(kWasmInstanceRegister);
4375 if (call_descriptor->IsWasmCapiFunction()) {
4376 // Reserve space for saving the PC later.
4377 __ AllocateStackSpace(kSystemPointerSize);
4378 }
4379 }
4380 }
4381
4382 unwinding_info_writer_.MarkFrameConstructed(pc_base);
4383 }
4384 int required_slots =
4385 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4386
4387 if (info()->is_osr()) {
4388 // TurboFan OSR-compiled functions cannot be entered directly.
4389 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4390
4391 // Unoptimized code jumps directly to this entrypoint while the unoptimized
4392 // frame is still on the stack. Optimized code uses OSR values directly from
4393 // the unoptimized frame. Thus, all that needs to be done is to allocate the
4394 // remaining stack slots.
4395 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
4396 osr_pc_offset_ = __ pc_offset();
4397 required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
4398 ResetSpeculationPoison();
4399 }
4400
4401 const RegList saves = call_descriptor->CalleeSavedRegisters();
4402 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4403
4404 if (required_slots > 0) {
4405 DCHECK(frame_access_state()->has_frame());
4406 if (info()->IsWasm() && required_slots > 128) {
4407 // For WebAssembly functions with big frames we have to do the stack
4408 // overflow check before we construct the frame. Otherwise we may not
4409 // have enough space on the stack to call the runtime for the stack
4410 // overflow.
4411 Label done;
4412
4413 // If the frame is bigger than the stack, we throw the stack overflow
4414 // exception unconditionally. Thereby we can avoid the integer overflow
4415 // check in the condition code.
4416 if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
4417 __ movq(kScratchRegister,
4418 FieldOperand(kWasmInstanceRegister,
4419 WasmInstanceObject::kRealStackLimitAddressOffset));
4420 __ movq(kScratchRegister, Operand(kScratchRegister, 0));
4421 __ addq(kScratchRegister,
4422 Immediate(required_slots * kSystemPointerSize));
4423 __ cmpq(rsp, kScratchRegister);
4424 __ j(above_equal, &done, Label::kNear);
4425 }
4426
4427 __ near_call(wasm::WasmCode::kWasmStackOverflow,
4428 RelocInfo::WASM_STUB_CALL);
4429 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4430 RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
4431 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4432 __ bind(&done);
4433 }
4434
4435 // Skip callee-saved and return slots, which are created below.
4436 required_slots -= base::bits::CountPopulation(saves);
4437 required_slots -= base::bits::CountPopulation(saves_fp) *
4438 (kQuadWordSize / kSystemPointerSize);
4439 required_slots -= frame()->GetReturnSlotCount();
4440 if (required_slots > 0) {
4441 __ AllocateStackSpace(required_slots * kSystemPointerSize);
4442 }
4443 }
4444
4445 if (saves_fp != 0) { // Save callee-saved XMM registers.
4446 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4447 const int stack_size = saves_fp_count * kQuadWordSize;
4448 // Adjust the stack pointer.
4449 __ AllocateStackSpace(stack_size);
4450 // Store the registers on the stack.
4451 int slot_idx = 0;
4452 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4453 if (!((1 << i) & saves_fp)) continue;
4454 __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
4455 XMMRegister::from_code(i));
4456 slot_idx++;
4457 }
4458 }
4459
4460 if (saves != 0) { // Save callee-saved registers.
4461 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4462 if (!((1 << i) & saves)) continue;
4463 __ pushq(Register::from_code(i));
4464 }
4465 }
4466
4467 // Allocate return slots (located after callee-saved).
4468 if (frame()->GetReturnSlotCount() > 0) {
4469 __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4470 }
4471 }
4472
AssembleReturn(InstructionOperand * additional_pop_count)4473 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4474 auto call_descriptor = linkage()->GetIncomingDescriptor();
4475
4476 // Restore registers.
4477 const RegList saves = call_descriptor->CalleeSavedRegisters();
4478 if (saves != 0) {
4479 const int returns = frame()->GetReturnSlotCount();
4480 if (returns != 0) {
4481 __ addq(rsp, Immediate(returns * kSystemPointerSize));
4482 }
4483 for (int i = 0; i < Register::kNumRegisters; i++) {
4484 if (!((1 << i) & saves)) continue;
4485 __ popq(Register::from_code(i));
4486 }
4487 }
4488 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4489 if (saves_fp != 0) {
4490 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4491 const int stack_size = saves_fp_count * kQuadWordSize;
4492 // Load the registers from the stack.
4493 int slot_idx = 0;
4494 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4495 if (!((1 << i) & saves_fp)) continue;
4496 __ Movdqu(XMMRegister::from_code(i),
4497 Operand(rsp, kQuadWordSize * slot_idx));
4498 slot_idx++;
4499 }
4500 // Adjust the stack pointer.
4501 __ addq(rsp, Immediate(stack_size));
4502 }
4503
4504 unwinding_info_writer_.MarkBlockWillExit();
4505
4506 // We might need rcx and r10 for scratch.
4507 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
4508 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & r10.bit());
4509 X64OperandConverter g(this, nullptr);
4510 int parameter_count =
4511 static_cast<int>(call_descriptor->StackParameterCount());
4512
4513 // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
4514 // Check RawMachineAssembler::PopAndReturn.
4515 if (parameter_count != 0) {
4516 if (additional_pop_count->IsImmediate()) {
4517 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4518 } else if (__ emit_debug_code()) {
4519 __ cmpq(g.ToRegister(additional_pop_count), Immediate(0));
4520 __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4521 }
4522 }
4523
4524 Register argc_reg = rcx;
4525 #ifdef V8_NO_ARGUMENTS_ADAPTOR
4526 // Functions with JS linkage have at least one parameter (the receiver).
4527 // If {parameter_count} == 0, it means it is a builtin with
4528 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4529 // itself.
4530 const bool drop_jsargs = frame_access_state()->has_frame() &&
4531 call_descriptor->IsJSFunctionCall() &&
4532 parameter_count != 0;
4533 #else
4534 const bool drop_jsargs = false;
4535 #endif
4536 if (call_descriptor->IsCFunctionCall()) {
4537 AssembleDeconstructFrame();
4538 } else if (frame_access_state()->has_frame()) {
4539 if (additional_pop_count->IsImmediate() &&
4540 g.ToConstant(additional_pop_count).ToInt32() == 0) {
4541 // Canonicalize JSFunction return sites for now.
4542 if (return_label_.is_bound()) {
4543 __ jmp(&return_label_);
4544 return;
4545 } else {
4546 __ bind(&return_label_);
4547 }
4548 }
4549 if (drop_jsargs) {
4550 // Get the actual argument count.
4551 __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset));
4552 }
4553 AssembleDeconstructFrame();
4554 }
4555
4556 if (drop_jsargs) {
4557 // We must pop all arguments from the stack (including the receiver). This
4558 // number of arguments is given by max(1 + argc_reg, parameter_count).
4559 int parameter_count_without_receiver =
4560 parameter_count - 1; // Exclude the receiver to simplify the
4561 // computation. We'll account for it at the end.
4562 Label mismatch_return;
4563 Register scratch_reg = r10;
4564 DCHECK_NE(argc_reg, scratch_reg);
4565 __ cmpq(argc_reg, Immediate(parameter_count_without_receiver));
4566 __ j(greater, &mismatch_return, Label::kNear);
4567 __ Ret(parameter_count * kSystemPointerSize, scratch_reg);
4568 __ bind(&mismatch_return);
4569 __ PopReturnAddressTo(scratch_reg);
4570 __ leaq(rsp, Operand(rsp, argc_reg, times_system_pointer_size,
4571 kSystemPointerSize)); // Also pop the receiver.
4572 // We use a return instead of a jump for better return address prediction.
4573 __ PushReturnAddressFrom(scratch_reg);
4574 __ Ret();
4575 } else if (additional_pop_count->IsImmediate()) {
4576 Register scratch_reg = r10;
4577 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4578 size_t pop_size = (parameter_count + additional_count) * kSystemPointerSize;
4579 CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4580 __ Ret(static_cast<int>(pop_size), scratch_reg);
4581 } else {
4582 Register pop_reg = g.ToRegister(additional_pop_count);
4583 Register scratch_reg = pop_reg == r10 ? rcx : r10;
4584 int pop_size = static_cast<int>(parameter_count * kSystemPointerSize);
4585 __ PopReturnAddressTo(scratch_reg);
4586 __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size,
4587 static_cast<int>(pop_size)));
4588 __ PushReturnAddressFrom(scratch_reg);
4589 __ Ret();
4590 }
4591 }
4592
FinishCode()4593 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
4594
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4595 void CodeGenerator::PrepareForDeoptimizationExits(
4596 ZoneDeque<DeoptimizationExit*>* exits) {}
4597
IncrementStackAccessCounter(InstructionOperand * source,InstructionOperand * destination)4598 void CodeGenerator::IncrementStackAccessCounter(
4599 InstructionOperand* source, InstructionOperand* destination) {
4600 DCHECK(FLAG_trace_turbo_stack_accesses);
4601 if (!info()->IsOptimizing() && !info()->IsWasm()) return;
4602 DCHECK_NOT_NULL(debug_name_);
4603 auto IncrementCounter = [&](ExternalReference counter) {
4604 __ incl(__ ExternalReferenceAsOperand(counter));
4605 };
4606 if (source->IsAnyStackSlot()) {
4607 IncrementCounter(
4608 ExternalReference::address_of_load_from_stack_count(debug_name_));
4609 }
4610 if (destination->IsAnyStackSlot()) {
4611 IncrementCounter(
4612 ExternalReference::address_of_store_to_stack_count(debug_name_));
4613 }
4614 }
4615
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4616 void CodeGenerator::AssembleMove(InstructionOperand* source,
4617 InstructionOperand* destination) {
4618 X64OperandConverter g(this, nullptr);
4619 // Helper function to write the given constant to the dst register.
4620 auto MoveConstantToRegister = [&](Register dst, Constant src) {
4621 switch (src.type()) {
4622 case Constant::kInt32: {
4623 if (RelocInfo::IsWasmReference(src.rmode())) {
4624 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4625 } else {
4626 int32_t value = src.ToInt32();
4627 if (value == 0) {
4628 __ xorl(dst, dst);
4629 } else {
4630 __ movl(dst, Immediate(value));
4631 }
4632 }
4633 break;
4634 }
4635 case Constant::kInt64:
4636 if (RelocInfo::IsWasmReference(src.rmode())) {
4637 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4638 } else {
4639 __ Set(dst, src.ToInt64());
4640 }
4641 break;
4642 case Constant::kFloat32:
4643 __ MoveNumber(dst, src.ToFloat32());
4644 break;
4645 case Constant::kFloat64:
4646 __ MoveNumber(dst, src.ToFloat64().value());
4647 break;
4648 case Constant::kExternalReference:
4649 __ Move(dst, src.ToExternalReference());
4650 break;
4651 case Constant::kHeapObject: {
4652 Handle<HeapObject> src_object = src.ToHeapObject();
4653 RootIndex index;
4654 if (IsMaterializableFromRoot(src_object, &index)) {
4655 __ LoadRoot(dst, index);
4656 } else {
4657 __ Move(dst, src_object);
4658 }
4659 break;
4660 }
4661 case Constant::kCompressedHeapObject: {
4662 Handle<HeapObject> src_object = src.ToHeapObject();
4663 RootIndex index;
4664 if (IsMaterializableFromRoot(src_object, &index)) {
4665 __ LoadRoot(dst, index);
4666 } else {
4667 __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4668 }
4669 break;
4670 }
4671 case Constant::kDelayedStringConstant: {
4672 const StringConstantBase* src_constant = src.ToDelayedStringConstant();
4673 __ MoveStringConstant(dst, src_constant);
4674 break;
4675 }
4676 case Constant::kRpoNumber:
4677 UNREACHABLE(); // TODO(dcarney): load of labels on x64.
4678 break;
4679 }
4680 };
4681 // Helper function to write the given constant to the stack.
4682 auto MoveConstantToSlot = [&](Operand dst, Constant src) {
4683 if (!RelocInfo::IsWasmReference(src.rmode())) {
4684 switch (src.type()) {
4685 case Constant::kInt32:
4686 __ movq(dst, Immediate(src.ToInt32()));
4687 return;
4688 case Constant::kInt64:
4689 __ Set(dst, src.ToInt64());
4690 return;
4691 default:
4692 break;
4693 }
4694 }
4695 MoveConstantToRegister(kScratchRegister, src);
4696 __ movq(dst, kScratchRegister);
4697 };
4698
4699 if (FLAG_trace_turbo_stack_accesses) {
4700 IncrementStackAccessCounter(source, destination);
4701 }
4702
4703 // Dispatch on the source and destination operand kinds.
4704 switch (MoveType::InferMove(source, destination)) {
4705 case MoveType::kRegisterToRegister:
4706 if (source->IsRegister()) {
4707 __ movq(g.ToRegister(destination), g.ToRegister(source));
4708 } else {
4709 DCHECK(source->IsFPRegister());
4710 __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4711 }
4712 return;
4713 case MoveType::kRegisterToStack: {
4714 Operand dst = g.ToOperand(destination);
4715 if (source->IsRegister()) {
4716 __ movq(dst, g.ToRegister(source));
4717 } else {
4718 DCHECK(source->IsFPRegister());
4719 XMMRegister src = g.ToDoubleRegister(source);
4720 MachineRepresentation rep =
4721 LocationOperand::cast(source)->representation();
4722 if (rep != MachineRepresentation::kSimd128) {
4723 __ Movsd(dst, src);
4724 } else {
4725 __ Movups(dst, src);
4726 }
4727 }
4728 return;
4729 }
4730 case MoveType::kStackToRegister: {
4731 Operand src = g.ToOperand(source);
4732 if (source->IsStackSlot()) {
4733 __ movq(g.ToRegister(destination), src);
4734 } else {
4735 DCHECK(source->IsFPStackSlot());
4736 XMMRegister dst = g.ToDoubleRegister(destination);
4737 MachineRepresentation rep =
4738 LocationOperand::cast(source)->representation();
4739 if (rep != MachineRepresentation::kSimd128) {
4740 __ Movsd(dst, src);
4741 } else {
4742 __ Movups(dst, src);
4743 }
4744 }
4745 return;
4746 }
4747 case MoveType::kStackToStack: {
4748 Operand src = g.ToOperand(source);
4749 Operand dst = g.ToOperand(destination);
4750 if (source->IsStackSlot()) {
4751 // Spill on demand to use a temporary register for memory-to-memory
4752 // moves.
4753 __ movq(kScratchRegister, src);
4754 __ movq(dst, kScratchRegister);
4755 } else {
4756 MachineRepresentation rep =
4757 LocationOperand::cast(source)->representation();
4758 if (rep != MachineRepresentation::kSimd128) {
4759 __ Movsd(kScratchDoubleReg, src);
4760 __ Movsd(dst, kScratchDoubleReg);
4761 } else {
4762 DCHECK(source->IsSimd128StackSlot());
4763 __ Movups(kScratchDoubleReg, src);
4764 __ Movups(dst, kScratchDoubleReg);
4765 }
4766 }
4767 return;
4768 }
4769 case MoveType::kConstantToRegister: {
4770 Constant src = g.ToConstant(source);
4771 if (destination->IsRegister()) {
4772 MoveConstantToRegister(g.ToRegister(destination), src);
4773 } else {
4774 DCHECK(destination->IsFPRegister());
4775 XMMRegister dst = g.ToDoubleRegister(destination);
4776 if (src.type() == Constant::kFloat32) {
4777 // TODO(turbofan): Can we do better here?
4778 __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4779 } else {
4780 DCHECK_EQ(src.type(), Constant::kFloat64);
4781 __ Move(dst, src.ToFloat64().AsUint64());
4782 }
4783 }
4784 return;
4785 }
4786 case MoveType::kConstantToStack: {
4787 Constant src = g.ToConstant(source);
4788 Operand dst = g.ToOperand(destination);
4789 if (destination->IsStackSlot()) {
4790 MoveConstantToSlot(dst, src);
4791 } else {
4792 DCHECK(destination->IsFPStackSlot());
4793 if (src.type() == Constant::kFloat32) {
4794 __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4795 } else {
4796 DCHECK_EQ(src.type(), Constant::kFloat64);
4797 __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4798 __ movq(dst, kScratchRegister);
4799 }
4800 }
4801 return;
4802 }
4803 }
4804 UNREACHABLE();
4805 }
4806
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4807 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4808 InstructionOperand* destination) {
4809 if (FLAG_trace_turbo_stack_accesses) {
4810 IncrementStackAccessCounter(source, destination);
4811 IncrementStackAccessCounter(destination, source);
4812 }
4813
4814 X64OperandConverter g(this, nullptr);
4815 // Dispatch on the source and destination operand kinds. Not all
4816 // combinations are possible.
4817 switch (MoveType::InferSwap(source, destination)) {
4818 case MoveType::kRegisterToRegister: {
4819 if (source->IsRegister()) {
4820 Register src = g.ToRegister(source);
4821 Register dst = g.ToRegister(destination);
4822 __ movq(kScratchRegister, src);
4823 __ movq(src, dst);
4824 __ movq(dst, kScratchRegister);
4825 } else {
4826 DCHECK(source->IsFPRegister());
4827 XMMRegister src = g.ToDoubleRegister(source);
4828 XMMRegister dst = g.ToDoubleRegister(destination);
4829 __ Movapd(kScratchDoubleReg, src);
4830 __ Movapd(src, dst);
4831 __ Movapd(dst, kScratchDoubleReg);
4832 }
4833 return;
4834 }
4835 case MoveType::kRegisterToStack: {
4836 if (source->IsRegister()) {
4837 Register src = g.ToRegister(source);
4838 Operand dst = g.ToOperand(destination);
4839 __ movq(kScratchRegister, src);
4840 __ movq(src, dst);
4841 __ movq(dst, kScratchRegister);
4842 } else {
4843 DCHECK(source->IsFPRegister());
4844 XMMRegister src = g.ToDoubleRegister(source);
4845 Operand dst = g.ToOperand(destination);
4846 MachineRepresentation rep =
4847 LocationOperand::cast(source)->representation();
4848 if (rep != MachineRepresentation::kSimd128) {
4849 __ Movsd(kScratchDoubleReg, src);
4850 __ Movsd(src, dst);
4851 __ Movsd(dst, kScratchDoubleReg);
4852 } else {
4853 __ Movups(kScratchDoubleReg, src);
4854 __ Movups(src, dst);
4855 __ Movups(dst, kScratchDoubleReg);
4856 }
4857 }
4858 return;
4859 }
4860 case MoveType::kStackToStack: {
4861 Operand src = g.ToOperand(source);
4862 Operand dst = g.ToOperand(destination);
4863 MachineRepresentation rep =
4864 LocationOperand::cast(source)->representation();
4865 if (rep != MachineRepresentation::kSimd128) {
4866 Register tmp = kScratchRegister;
4867 __ movq(tmp, dst);
4868 __ pushq(src); // Then use stack to copy src to destination.
4869 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4870 kSystemPointerSize);
4871 __ popq(dst);
4872 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4873 -kSystemPointerSize);
4874 __ movq(src, tmp);
4875 } else {
4876 // Without AVX, misaligned reads and writes will trap. Move using the
4877 // stack, in two parts.
4878 __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
4879 __ pushq(src); // Then use stack to copy src to destination.
4880 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4881 kSystemPointerSize);
4882 __ popq(dst);
4883 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4884 -kSystemPointerSize);
4885 __ pushq(g.ToOperand(source, kSystemPointerSize));
4886 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4887 kSystemPointerSize);
4888 __ popq(g.ToOperand(destination, kSystemPointerSize));
4889 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4890 -kSystemPointerSize);
4891 __ movups(src, kScratchDoubleReg);
4892 }
4893 return;
4894 }
4895 default:
4896 UNREACHABLE();
4897 }
4898 }
4899
AssembleJumpTable(Label ** targets,size_t target_count)4900 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4901 for (size_t index = 0; index < target_count; ++index) {
4902 __ dq(targets[index]);
4903 }
4904 }
4905
4906 #undef __
4907
4908 } // namespace compiler
4909 } // namespace internal
4910 } // namespace v8
4911