1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <limits>
6 
7 #include "src/base/overflowing-math.h"
8 #include "src/codegen/macro-assembler.h"
9 #include "src/codegen/optimized-compilation-info.h"
10 #include "src/codegen/x64/assembler-x64.h"
11 #include "src/compiler/backend/code-generator-impl.h"
12 #include "src/compiler/backend/code-generator.h"
13 #include "src/compiler/backend/gap-resolver.h"
14 #include "src/compiler/node-matchers.h"
15 #include "src/compiler/osr.h"
16 #include "src/heap/memory-chunk.h"
17 #include "src/objects/smi.h"
18 #include "src/wasm/wasm-code-manager.h"
19 #include "src/wasm/wasm-objects.h"
20 
21 namespace v8 {
22 namespace internal {
23 namespace compiler {
24 
25 #define __ tasm()->
26 
27 // Adds X64 specific methods for decoding operands.
28 class X64OperandConverter : public InstructionOperandConverter {
29  public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)30   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
31       : InstructionOperandConverter(gen, instr) {}
32 
InputImmediate(size_t index)33   Immediate InputImmediate(size_t index) {
34     return ToImmediate(instr_->InputAt(index));
35   }
36 
InputOperand(size_t index,int extra=0)37   Operand InputOperand(size_t index, int extra = 0) {
38     return ToOperand(instr_->InputAt(index), extra);
39   }
40 
OutputOperand()41   Operand OutputOperand() { return ToOperand(instr_->Output()); }
42 
ToImmediate(InstructionOperand * operand)43   Immediate ToImmediate(InstructionOperand* operand) {
44     Constant constant = ToConstant(operand);
45     if (constant.type() == Constant::kFloat64) {
46       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
47       return Immediate(0);
48     }
49     if (RelocInfo::IsWasmReference(constant.rmode())) {
50       return Immediate(constant.ToInt32(), constant.rmode());
51     }
52     return Immediate(constant.ToInt32());
53   }
54 
ToOperand(InstructionOperand * op,int extra=0)55   Operand ToOperand(InstructionOperand* op, int extra = 0) {
56     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
57     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
58   }
59 
SlotToOperand(int slot_index,int extra=0)60   Operand SlotToOperand(int slot_index, int extra = 0) {
61     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
62     return Operand(offset.from_stack_pointer() ? rsp : rbp,
63                    offset.offset() + extra);
64   }
65 
NextOffset(size_t * offset)66   static size_t NextOffset(size_t* offset) {
67     size_t i = *offset;
68     (*offset)++;
69     return i;
70   }
71 
ScaleFor(AddressingMode one,AddressingMode mode)72   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
73     STATIC_ASSERT(0 == static_cast<int>(times_1));
74     STATIC_ASSERT(1 == static_cast<int>(times_2));
75     STATIC_ASSERT(2 == static_cast<int>(times_4));
76     STATIC_ASSERT(3 == static_cast<int>(times_8));
77     int scale = static_cast<int>(mode - one);
78     DCHECK(scale >= 0 && scale < 4);
79     return static_cast<ScaleFactor>(scale);
80   }
81 
MemoryOperand(size_t * offset)82   Operand MemoryOperand(size_t* offset) {
83     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
84     switch (mode) {
85       case kMode_MR: {
86         Register base = InputRegister(NextOffset(offset));
87         int32_t disp = 0;
88         return Operand(base, disp);
89       }
90       case kMode_MRI: {
91         Register base = InputRegister(NextOffset(offset));
92         int32_t disp = InputInt32(NextOffset(offset));
93         return Operand(base, disp);
94       }
95       case kMode_MR1:
96       case kMode_MR2:
97       case kMode_MR4:
98       case kMode_MR8: {
99         Register base = InputRegister(NextOffset(offset));
100         Register index = InputRegister(NextOffset(offset));
101         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
102         int32_t disp = 0;
103         return Operand(base, index, scale, disp);
104       }
105       case kMode_MR1I:
106       case kMode_MR2I:
107       case kMode_MR4I:
108       case kMode_MR8I: {
109         Register base = InputRegister(NextOffset(offset));
110         Register index = InputRegister(NextOffset(offset));
111         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
112         int32_t disp = InputInt32(NextOffset(offset));
113         return Operand(base, index, scale, disp);
114       }
115       case kMode_M1: {
116         Register base = InputRegister(NextOffset(offset));
117         int32_t disp = 0;
118         return Operand(base, disp);
119       }
120       case kMode_M2:
121         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
122         return Operand(no_reg, 0);
123       case kMode_M4:
124       case kMode_M8: {
125         Register index = InputRegister(NextOffset(offset));
126         ScaleFactor scale = ScaleFor(kMode_M1, mode);
127         int32_t disp = 0;
128         return Operand(index, scale, disp);
129       }
130       case kMode_M1I:
131       case kMode_M2I:
132       case kMode_M4I:
133       case kMode_M8I: {
134         Register index = InputRegister(NextOffset(offset));
135         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
136         int32_t disp = InputInt32(NextOffset(offset));
137         return Operand(index, scale, disp);
138       }
139       case kMode_Root: {
140         Register base = kRootRegister;
141         int32_t disp = InputInt32(NextOffset(offset));
142         return Operand(base, disp);
143       }
144       case kMode_None:
145         UNREACHABLE();
146     }
147     UNREACHABLE();
148   }
149 
MemoryOperand(size_t first_input=0)150   Operand MemoryOperand(size_t first_input = 0) {
151     return MemoryOperand(&first_input);
152   }
153 };
154 
155 namespace {
156 
HasAddressingMode(Instruction * instr)157 bool HasAddressingMode(Instruction* instr) {
158   return instr->addressing_mode() != kMode_None;
159 }
160 
HasImmediateInput(Instruction * instr,size_t index)161 bool HasImmediateInput(Instruction* instr, size_t index) {
162   return instr->InputAt(index)->IsImmediate();
163 }
164 
HasRegisterInput(Instruction * instr,size_t index)165 bool HasRegisterInput(Instruction* instr, size_t index) {
166   return instr->InputAt(index)->IsRegister();
167 }
168 
169 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
170  public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)171   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
172       : OutOfLineCode(gen), result_(result) {}
173 
Generate()174   void Generate() final {
175     __ Xorps(result_, result_);
176     __ Divss(result_, result_);
177   }
178 
179  private:
180   XMMRegister const result_;
181 };
182 
183 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
184  public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)185   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
186       : OutOfLineCode(gen), result_(result) {}
187 
Generate()188   void Generate() final {
189     __ Xorpd(result_, result_);
190     __ Divsd(result_, result_);
191   }
192 
193  private:
194   XMMRegister const result_;
195 };
196 
197 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
198  public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)199   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
200                              XMMRegister input, StubCallMode stub_mode,
201                              UnwindingInfoWriter* unwinding_info_writer)
202       : OutOfLineCode(gen),
203         result_(result),
204         input_(input),
205         stub_mode_(stub_mode),
206         unwinding_info_writer_(unwinding_info_writer),
207         isolate_(gen->isolate()),
208         zone_(gen->zone()) {}
209 
Generate()210   void Generate() final {
211     __ AllocateStackSpace(kDoubleSize);
212     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
213                                                       kDoubleSize);
214     __ Movsd(MemOperand(rsp, 0), input_);
215     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
216       // A direct call to a wasm runtime stub defined in this module.
217       // Just encode the stub index. This will be patched when the code
218       // is added to the native module and copied into wasm code space.
219       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
220     } else if (tasm()->options().inline_offheap_trampolines) {
221       // With embedded builtins we do not need the isolate here. This allows
222       // the call to be generated asynchronously.
223       __ CallBuiltin(Builtins::kDoubleToI);
224     } else {
225       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
226     }
227     __ movl(result_, MemOperand(rsp, 0));
228     __ addq(rsp, Immediate(kDoubleSize));
229     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
230                                                       -kDoubleSize);
231   }
232 
233  private:
234   Register const result_;
235   XMMRegister const input_;
236   StubCallMode stub_mode_;
237   UnwindingInfoWriter* const unwinding_info_writer_;
238   Isolate* isolate_;
239   Zone* zone_;
240 };
241 
242 class OutOfLineRecordWrite final : public OutOfLineCode {
243  public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)244   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
245                        Register value, Register scratch0, Register scratch1,
246                        RecordWriteMode mode, StubCallMode stub_mode)
247       : OutOfLineCode(gen),
248         object_(object),
249         operand_(operand),
250         value_(value),
251         scratch0_(scratch0),
252         scratch1_(scratch1),
253         mode_(mode),
254         stub_mode_(stub_mode),
255         zone_(gen->zone()) {}
256 
Generate()257   void Generate() final {
258     if (mode_ > RecordWriteMode::kValueIsPointer) {
259       __ JumpIfSmi(value_, exit());
260     }
261     if (COMPRESS_POINTERS_BOOL) {
262       __ DecompressTaggedPointer(value_, value_);
263     }
264     __ CheckPageFlag(value_, scratch0_,
265                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
266                      exit());
267     __ leaq(scratch1_, operand_);
268 
269     RememberedSetAction const remembered_set_action =
270         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
271                                              : OMIT_REMEMBERED_SET;
272     SaveFPRegsMode const save_fp_mode =
273         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
274 
275     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
276       __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
277     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
278       // A direct call to a wasm runtime stub defined in this module.
279       // Just encode the stub index. This will be patched when the code
280       // is added to the native module and copied into wasm code space.
281       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
282                              save_fp_mode, wasm::WasmCode::kRecordWrite);
283     } else {
284       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
285                              save_fp_mode);
286     }
287   }
288 
289  private:
290   Register const object_;
291   Operand const operand_;
292   Register const value_;
293   Register const scratch0_;
294   Register const scratch1_;
295   RecordWriteMode const mode_;
296   StubCallMode const stub_mode_;
297   Zone* zone_;
298 };
299 
300 class WasmOutOfLineTrap : public OutOfLineCode {
301  public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)302   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
303       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
304 
Generate()305   void Generate() override {
306     X64OperandConverter i(gen_, instr_);
307     TrapId trap_id =
308         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
309     GenerateWithTrapId(trap_id);
310   }
311 
312  protected:
313   CodeGenerator* gen_;
314 
GenerateWithTrapId(TrapId trap_id)315   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
316 
317  private:
GenerateCallToTrap(TrapId trap_id)318   void GenerateCallToTrap(TrapId trap_id) {
319     if (!gen_->wasm_runtime_exception_support()) {
320       // We cannot test calls to the runtime in cctest/test-run-wasm.
321       // Therefore we emit a call to C here instead of a call to the runtime.
322       __ PrepareCallCFunction(0);
323       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
324                        0);
325       __ LeaveFrame(StackFrame::WASM);
326       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
327       size_t pop_size =
328           call_descriptor->StackParameterCount() * kSystemPointerSize;
329       // Use rcx as a scratch register, we return anyways immediately.
330       __ Ret(static_cast<int>(pop_size), rcx);
331     } else {
332       gen_->AssembleSourcePosition(instr_);
333       // A direct call to a wasm runtime stub defined in this module.
334       // Just encode the stub index. This will be patched when the code
335       // is added to the native module and copied into wasm code space.
336       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
337       ReferenceMap* reference_map =
338           gen_->zone()->New<ReferenceMap>(gen_->zone());
339       gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
340       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
341     }
342   }
343 
344   Instruction* instr_;
345 };
346 
347 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
348  public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)349   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
350       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
351 
Generate()352   void Generate() final {
353     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
354     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
355   }
356 
357  private:
358   int pc_;
359 };
360 
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)361 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
362                          InstructionCode opcode, Instruction* instr,
363                          int pc) {
364   const MemoryAccessMode access_mode =
365       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
366   if (access_mode == kMemoryAccessProtected) {
367     zone->New<WasmProtectedInstructionTrap>(codegen, pc, instr);
368   }
369 }
370 
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,X64OperandConverter const & i)371 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
372                                    InstructionCode opcode, Instruction* instr,
373                                    X64OperandConverter const& i) {
374   const MemoryAccessMode access_mode =
375       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
376   if (access_mode == kMemoryAccessPoisoned) {
377     Register value = i.OutputRegister();
378     codegen->tasm()->andq(value, kSpeculationPoisonRegister);
379   }
380 }
381 
382 }  // namespace
383 
384 #define ASSEMBLE_UNOP(asm_instr)         \
385   do {                                   \
386     if (instr->Output()->IsRegister()) { \
387       __ asm_instr(i.OutputRegister());  \
388     } else {                             \
389       __ asm_instr(i.OutputOperand());   \
390     }                                    \
391   } while (false)
392 
393 #define ASSEMBLE_BINOP(asm_instr)                                \
394   do {                                                           \
395     if (HasAddressingMode(instr)) {                              \
396       size_t index = 1;                                          \
397       Operand right = i.MemoryOperand(&index);                   \
398       __ asm_instr(i.InputRegister(0), right);                   \
399     } else {                                                     \
400       if (HasImmediateInput(instr, 1)) {                         \
401         if (HasRegisterInput(instr, 0)) {                        \
402           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
403         } else {                                                 \
404           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
405         }                                                        \
406       } else {                                                   \
407         if (HasRegisterInput(instr, 1)) {                        \
408           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
409         } else {                                                 \
410           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
411         }                                                        \
412       }                                                          \
413     }                                                            \
414   } while (false)
415 
416 #define ASSEMBLE_COMPARE(asm_instr)                              \
417   do {                                                           \
418     if (HasAddressingMode(instr)) {                              \
419       size_t index = 0;                                          \
420       Operand left = i.MemoryOperand(&index);                    \
421       if (HasImmediateInput(instr, index)) {                     \
422         __ asm_instr(left, i.InputImmediate(index));             \
423       } else {                                                   \
424         __ asm_instr(left, i.InputRegister(index));              \
425       }                                                          \
426     } else {                                                     \
427       if (HasImmediateInput(instr, 1)) {                         \
428         if (HasRegisterInput(instr, 0)) {                        \
429           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
430         } else {                                                 \
431           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
432         }                                                        \
433       } else {                                                   \
434         if (HasRegisterInput(instr, 1)) {                        \
435           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
436         } else {                                                 \
437           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
438         }                                                        \
439       }                                                          \
440     }                                                            \
441   } while (false)
442 
443 #define ASSEMBLE_MULT(asm_instr)                              \
444   do {                                                        \
445     if (HasImmediateInput(instr, 1)) {                        \
446       if (HasRegisterInput(instr, 0)) {                       \
447         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
448                      i.InputImmediate(1));                    \
449       } else {                                                \
450         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
451                      i.InputImmediate(1));                    \
452       }                                                       \
453     } else {                                                  \
454       if (HasRegisterInput(instr, 1)) {                       \
455         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
456       } else {                                                \
457         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
458       }                                                       \
459     }                                                         \
460   } while (false)
461 
462 #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
463   do {                                                                     \
464     if (HasImmediateInput(instr, 1)) {                                     \
465       if (instr->Output()->IsRegister()) {                                 \
466         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
467       } else {                                                             \
468         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
469       }                                                                    \
470     } else {                                                               \
471       if (instr->Output()->IsRegister()) {                                 \
472         __ asm_instr##_cl(i.OutputRegister());                             \
473       } else {                                                             \
474         __ asm_instr##_cl(i.OutputOperand());                              \
475       }                                                                    \
476     }                                                                      \
477   } while (false)
478 
479 #define ASSEMBLE_MOVX(asm_instr)                            \
480   do {                                                      \
481     if (HasAddressingMode(instr)) {                         \
482       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
483     } else if (HasRegisterInput(instr, 0)) {                \
484       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
485     } else {                                                \
486       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
487     }                                                       \
488   } while (false)
489 
490 #define ASSEMBLE_SSE_BINOP(asm_instr)                                     \
491   do {                                                                    \
492     if (HasAddressingMode(instr)) {                                       \
493       size_t index = 1;                                                   \
494       Operand right = i.MemoryOperand(&index);                            \
495       __ asm_instr(i.InputDoubleRegister(0), right);                      \
496     } else {                                                              \
497       if (instr->InputAt(1)->IsFPRegister()) {                            \
498         __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
499       } else {                                                            \
500         __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
501       }                                                                   \
502     }                                                                     \
503   } while (false)
504 
505 #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
506   do {                                                                  \
507     if (instr->InputAt(0)->IsFPRegister()) {                            \
508       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
509     } else {                                                            \
510       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
511     }                                                                   \
512   } while (false)
513 
514 #define ASSEMBLE_AVX_BINOP(asm_instr)                                          \
515   do {                                                                         \
516     CpuFeatureScope avx_scope(tasm(), AVX);                                    \
517     if (HasAddressingMode(instr)) {                                            \
518       size_t index = 1;                                                        \
519       Operand right = i.MemoryOperand(&index);                                 \
520       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
521     } else {                                                                   \
522       if (instr->InputAt(1)->IsFPRegister()) {                                 \
523         __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0),       \
524                      i.InputDoubleRegister(1));                                \
525       } else {                                                                 \
526         __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0),       \
527                      i.InputOperand(1));                                       \
528       }                                                                        \
529     }                                                                          \
530   } while (false)
531 
532 #define ASSEMBLE_IEEE754_BINOP(name)                                     \
533   do {                                                                   \
534     __ PrepareCallCFunction(2);                                          \
535     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
536   } while (false)
537 
538 #define ASSEMBLE_IEEE754_UNOP(name)                                      \
539   do {                                                                   \
540     __ PrepareCallCFunction(1);                                          \
541     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
542   } while (false)
543 
544 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
545   do {                                                          \
546     Label binop;                                                \
547     __ bind(&binop);                                            \
548     __ mov_inst(rax, i.MemoryOperand(1));                       \
549     __ movl(i.TempRegister(0), rax);                            \
550     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
551     __ lock();                                                  \
552     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
553     __ j(not_equal, &binop);                                    \
554   } while (false)
555 
556 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
557   do {                                                            \
558     Label binop;                                                  \
559     __ bind(&binop);                                              \
560     __ mov_inst(rax, i.MemoryOperand(1));                         \
561     __ movq(i.TempRegister(0), rax);                              \
562     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
563     __ lock();                                                    \
564     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
565     __ j(not_equal, &binop);                                      \
566   } while (false)
567 
568 // Handles both SSE and AVX codegen. For SSE we use DefineSameAsFirst, so the
569 // dst and first src will be the same. For AVX we don't restrict it that way, so
570 // we will omit unnecessary moves.
571 #define ASSEMBLE_SIMD_BINOP(opcode)                                      \
572   do {                                                                   \
573     if (CpuFeatures::IsSupported(AVX)) {                                 \
574       CpuFeatureScope avx_scope(tasm(), AVX);                            \
575       __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
576                    i.InputSimd128Register(1));                           \
577     } else {                                                             \
578       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));   \
579       __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1));   \
580     }                                                                    \
581   } while (false)
582 
583 #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
584   do {                                                       \
585     if (instr->InputAt(index)->IsSimd128Register()) {        \
586       __ opcode(dst_operand, i.InputSimd128Register(index)); \
587     } else {                                                 \
588       __ opcode(dst_operand, i.InputOperand(index));         \
589     }                                                        \
590   } while (false)
591 
592 #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
593   do {                                                            \
594     if (instr->InputAt(index)->IsSimd128Register()) {             \
595       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
596     } else {                                                      \
597       __ opcode(dst_operand, i.InputOperand(index), imm);         \
598     }                                                             \
599   } while (false)
600 
601 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)             \
602   do {                                                   \
603     XMMRegister dst = i.OutputSimd128Register();         \
604     DCHECK_EQ(dst, i.InputSimd128Register(0));           \
605     byte input_index = instr->InputCount() == 2 ? 1 : 0; \
606     ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);       \
607   } while (false)
608 
609 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm)                              \
610   do {                                                                      \
611     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));        \
612     if (instr->InputAt(1)->IsSimd128Register()) {                           \
613       __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
614     } else {                                                                \
615       __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm);         \
616     }                                                                       \
617   } while (false)
618 
619 #define ASSEMBLE_SIMD_ALL_TRUE(opcode)          \
620   do {                                          \
621     Register dst = i.OutputRegister();          \
622     XMMRegister tmp = i.TempSimd128Register(0); \
623     __ xorq(dst, dst);                          \
624     __ Pxor(tmp, tmp);                          \
625     __ opcode(tmp, i.InputSimd128Register(0));  \
626     __ Ptest(tmp, tmp);                         \
627     __ setcc(equal, dst);                       \
628   } while (false)
629 
630 // This macro will directly emit the opcode if the shift is an immediate - the
631 // shift value will be taken modulo 2^width. Otherwise, it will emit code to
632 // perform the modulus operation.
633 #define ASSEMBLE_SIMD_SHIFT(opcode, width)                 \
634   do {                                                     \
635     XMMRegister dst = i.OutputSimd128Register();           \
636     if (HasImmediateInput(instr, 1)) {                     \
637       if (CpuFeatures::IsSupported(AVX)) {                 \
638         CpuFeatureScope avx_scope(tasm(), AVX);            \
639         __ v##opcode(dst, i.InputSimd128Register(0),       \
640                      byte{i.InputInt##width(1)});          \
641       } else {                                             \
642         DCHECK_EQ(dst, i.InputSimd128Register(0));         \
643         __ opcode(dst, byte{i.InputInt##width(1)});        \
644       }                                                    \
645     } else {                                               \
646       XMMRegister tmp = i.TempSimd128Register(0);          \
647       Register tmp_shift = i.TempRegister(1);              \
648       constexpr int mask = (1 << width) - 1;               \
649       __ movq(tmp_shift, i.InputRegister(1));              \
650       __ andq(tmp_shift, Immediate(mask));                 \
651       __ Movq(tmp, tmp_shift);                             \
652       if (CpuFeatures::IsSupported(AVX)) {                 \
653         CpuFeatureScope avx_scope(tasm(), AVX);            \
654         __ v##opcode(dst, i.InputSimd128Register(0), tmp); \
655       } else {                                             \
656         DCHECK_EQ(dst, i.InputSimd128Register(0));         \
657         __ opcode(dst, tmp);                               \
658       }                                                    \
659     }                                                      \
660   } while (false)
661 
662 #define ASSEMBLE_PINSR(ASM_INSTR)                                     \
663   do {                                                                \
664     EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset()); \
665     XMMRegister dst = i.OutputSimd128Register();                      \
666     XMMRegister src = i.InputSimd128Register(0);                      \
667     uint8_t laneidx = i.InputUint8(1);                                \
668     if (HasAddressingMode(instr)) {                                   \
669       __ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx);            \
670       break;                                                          \
671     }                                                                 \
672     if (instr->InputAt(2)->IsFPRegister()) {                          \
673       __ Movq(kScratchRegister, i.InputDoubleRegister(2));            \
674       __ ASM_INSTR(dst, src, kScratchRegister, laneidx);              \
675     } else if (instr->InputAt(2)->IsRegister()) {                     \
676       __ ASM_INSTR(dst, src, i.InputRegister(2), laneidx);            \
677     } else {                                                          \
678       __ ASM_INSTR(dst, src, i.InputOperand(2), laneidx);             \
679     }                                                                 \
680   } while (false)
681 
AssembleDeconstructFrame()682 void CodeGenerator::AssembleDeconstructFrame() {
683   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
684   __ movq(rsp, rbp);
685   __ popq(rbp);
686 }
687 
AssemblePrepareTailCall()688 void CodeGenerator::AssemblePrepareTailCall() {
689   if (frame_access_state()->has_frame()) {
690     __ movq(rbp, MemOperand(rbp, 0));
691   }
692   frame_access_state()->SetFrameAccessToSP();
693 }
694 
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)695 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
696                                                      Register scratch1,
697                                                      Register scratch2,
698                                                      Register scratch3) {
699   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
700   Label done;
701 
702   // Check if current frame is an arguments adaptor frame.
703   __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
704           Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
705   __ j(not_equal, &done, Label::kNear);
706 
707   // Load arguments count from current arguments adaptor frame (note, it
708   // does not include receiver).
709   Register caller_args_count_reg = scratch1;
710   __ SmiUntag(caller_args_count_reg,
711               Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
712 
713   __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
714   __ bind(&done);
715 }
716 
717 namespace {
718 
AdjustStackPointerForTailCall(Instruction * instr,TurboAssembler * assembler,Linkage * linkage,OptimizedCompilationInfo * info,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)719 void AdjustStackPointerForTailCall(Instruction* instr,
720                                    TurboAssembler* assembler, Linkage* linkage,
721                                    OptimizedCompilationInfo* info,
722                                    FrameAccessState* state,
723                                    int new_slot_above_sp,
724                                    bool allow_shrinkage = true) {
725   int stack_slot_delta;
726   if (instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
727     // For this special tail-call mode, the callee has the same arguments and
728     // linkage as the caller, and arguments adapter frames must be preserved.
729     // Thus we simply have reset the stack pointer register to its original
730     // value before frame construction.
731     // See also: AssembleConstructFrame.
732     DCHECK(!info->is_osr());
733     DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedRegisters(), 0);
734     DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedFPRegisters(), 0);
735     DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0);
736     stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() -
737                         kReturnAddressStackSlotCount) *
738                        -1;
739     DCHECK_LE(stack_slot_delta, 0);
740   } else {
741     int current_sp_offset = state->GetSPToFPSlotCount() +
742                             StandardFrameConstants::kFixedSlotCountAboveFp;
743     stack_slot_delta = new_slot_above_sp - current_sp_offset;
744   }
745 
746   if (stack_slot_delta > 0) {
747     assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
748     state->IncreaseSPDelta(stack_slot_delta);
749   } else if (allow_shrinkage && stack_slot_delta < 0) {
750     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
751     state->IncreaseSPDelta(stack_slot_delta);
752   }
753 }
754 
SetupSimdImmediateInRegister(TurboAssembler * assembler,uint32_t * imms,XMMRegister reg)755 void SetupSimdImmediateInRegister(TurboAssembler* assembler, uint32_t* imms,
756                                   XMMRegister reg) {
757   assembler->Move(reg, make_uint64(imms[3], imms[2]),
758                   make_uint64(imms[1], imms[0]));
759 }
760 
761 }  // namespace
762 
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)763 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
764                                               int first_unused_stack_slot) {
765   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
766   ZoneVector<MoveOperands*> pushes(zone());
767   GetPushCompatibleMoves(instr, flags, &pushes);
768 
769   if (!pushes.empty() &&
770       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
771        first_unused_stack_slot)) {
772     DCHECK(!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp));
773     X64OperandConverter g(this, instr);
774     for (auto move : pushes) {
775       LocationOperand destination_location(
776           LocationOperand::cast(move->destination()));
777       InstructionOperand source(move->source());
778       AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
779                                     frame_access_state(),
780                                     destination_location.index());
781       if (source.IsStackSlot()) {
782         LocationOperand source_location(LocationOperand::cast(source));
783         __ Push(g.SlotToOperand(source_location.index()));
784       } else if (source.IsRegister()) {
785         LocationOperand source_location(LocationOperand::cast(source));
786         __ Push(source_location.GetRegister());
787       } else if (source.IsImmediate()) {
788         __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
789       } else {
790         // Pushes of non-scalar data types is not supported.
791         UNIMPLEMENTED();
792       }
793       frame_access_state()->IncreaseSPDelta(1);
794       move->Eliminate();
795     }
796   }
797   AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
798                                 frame_access_state(), first_unused_stack_slot,
799                                 false);
800 }
801 
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)802 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
803                                              int first_unused_stack_slot) {
804   AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
805                                 frame_access_state(), first_unused_stack_slot);
806 }
807 
808 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()809 void CodeGenerator::AssembleCodeStartRegisterCheck() {
810   __ ComputeCodeStartAddress(rbx);
811   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
812   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
813 }
814 
815 // Check if the code object is marked for deoptimization. If it is, then it
816 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
817 // to:
818 //    1. read from memory the word that contains that bit, which can be found in
819 //       the flags in the referenced {CodeDataContainer} object;
820 //    2. test kMarkedForDeoptimizationBit in those flags; and
821 //    3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()822 void CodeGenerator::BailoutIfDeoptimized() {
823   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
824   __ LoadTaggedPointerField(rbx,
825                             Operand(kJavaScriptCallCodeStartRegister, offset));
826   __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
827            Immediate(1 << Code::kMarkedForDeoptimizationBit));
828   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
829           RelocInfo::CODE_TARGET, not_zero);
830 }
831 
GenerateSpeculationPoisonFromCodeStartRegister()832 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
833   // Set a mask which has all bits set in the normal case, but has all
834   // bits cleared if we are speculatively executing the wrong PC.
835   __ ComputeCodeStartAddress(rbx);
836   __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
837   __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
838   __ movq(rbx, Immediate(-1));
839   __ cmovq(equal, kSpeculationPoisonRegister, rbx);
840 }
841 
AssembleRegisterArgumentPoisoning()842 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
843   __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
844   __ andq(kContextRegister, kSpeculationPoisonRegister);
845   __ andq(rsp, kSpeculationPoisonRegister);
846 }
847 
848 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)849 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
850     Instruction* instr) {
851   X64OperandConverter i(this, instr);
852   InstructionCode opcode = instr->opcode();
853   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
854   switch (arch_opcode) {
855     case kArchCallCodeObject: {
856       if (HasImmediateInput(instr, 0)) {
857         Handle<Code> code = i.InputCode(0);
858         __ Call(code, RelocInfo::CODE_TARGET);
859       } else {
860         Register reg = i.InputRegister(0);
861         DCHECK_IMPLIES(
862             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
863             reg == kJavaScriptCallCodeStartRegister);
864         __ LoadCodeObjectEntry(reg, reg);
865         if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
866           __ RetpolineCall(reg);
867         } else {
868           __ call(reg);
869         }
870       }
871       RecordCallPosition(instr);
872       frame_access_state()->ClearSPDelta();
873       break;
874     }
875     case kArchCallBuiltinPointer: {
876       DCHECK(!HasImmediateInput(instr, 0));
877       Register builtin_index = i.InputRegister(0);
878       __ CallBuiltinByIndex(builtin_index);
879       RecordCallPosition(instr);
880       frame_access_state()->ClearSPDelta();
881       break;
882     }
883     case kArchCallWasmFunction: {
884       if (HasImmediateInput(instr, 0)) {
885         Constant constant = i.ToConstant(instr->InputAt(0));
886         Address wasm_code = static_cast<Address>(constant.ToInt64());
887         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
888           __ near_call(wasm_code, constant.rmode());
889         } else {
890           if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
891             __ RetpolineCall(wasm_code, constant.rmode());
892           } else {
893             __ Call(wasm_code, constant.rmode());
894           }
895         }
896       } else {
897         Register reg = i.InputRegister(0);
898         if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
899           __ RetpolineCall(reg);
900         } else {
901           __ call(reg);
902         }
903       }
904       RecordCallPosition(instr);
905       frame_access_state()->ClearSPDelta();
906       break;
907     }
908     case kArchTailCallCodeObjectFromJSFunction:
909       if (!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
910         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
911                                          i.TempRegister(0), i.TempRegister(1),
912                                          i.TempRegister(2));
913       }
914       V8_FALLTHROUGH;
915     case kArchTailCallCodeObject: {
916       if (HasImmediateInput(instr, 0)) {
917         Handle<Code> code = i.InputCode(0);
918         __ Jump(code, RelocInfo::CODE_TARGET);
919       } else {
920         Register reg = i.InputRegister(0);
921         DCHECK_IMPLIES(
922             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
923             reg == kJavaScriptCallCodeStartRegister);
924         __ LoadCodeObjectEntry(reg, reg);
925         if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
926           __ RetpolineJump(reg);
927         } else {
928           __ jmp(reg);
929         }
930       }
931       unwinding_info_writer_.MarkBlockWillExit();
932       frame_access_state()->ClearSPDelta();
933       frame_access_state()->SetFrameAccessToDefault();
934       break;
935     }
936     case kArchTailCallWasm: {
937       if (HasImmediateInput(instr, 0)) {
938         Constant constant = i.ToConstant(instr->InputAt(0));
939         Address wasm_code = static_cast<Address>(constant.ToInt64());
940         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
941           __ near_jmp(wasm_code, constant.rmode());
942         } else {
943           __ Move(kScratchRegister, wasm_code, constant.rmode());
944           __ jmp(kScratchRegister);
945         }
946       } else {
947         Register reg = i.InputRegister(0);
948         if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
949           __ RetpolineJump(reg);
950         } else {
951           __ jmp(reg);
952         }
953       }
954       unwinding_info_writer_.MarkBlockWillExit();
955       frame_access_state()->ClearSPDelta();
956       frame_access_state()->SetFrameAccessToDefault();
957       break;
958     }
959     case kArchTailCallAddress: {
960       CHECK(!HasImmediateInput(instr, 0));
961       Register reg = i.InputRegister(0);
962       DCHECK_IMPLIES(
963           instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
964           reg == kJavaScriptCallCodeStartRegister);
965       if (instr->HasCallDescriptorFlag(CallDescriptor::kRetpoline)) {
966         __ RetpolineJump(reg);
967       } else {
968         __ jmp(reg);
969       }
970       unwinding_info_writer_.MarkBlockWillExit();
971       frame_access_state()->ClearSPDelta();
972       frame_access_state()->SetFrameAccessToDefault();
973       break;
974     }
975     case kArchCallJSFunction: {
976       Register func = i.InputRegister(0);
977       if (FLAG_debug_code) {
978         // Check the function's context matches the context argument.
979         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
980         __ Assert(equal, AbortReason::kWrongFunctionContext);
981       }
982       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
983       __ LoadTaggedPointerField(rcx,
984                                 FieldOperand(func, JSFunction::kCodeOffset));
985       __ CallCodeObject(rcx);
986       frame_access_state()->ClearSPDelta();
987       RecordCallPosition(instr);
988       break;
989     }
990     case kArchPrepareCallCFunction: {
991       // Frame alignment requires using FP-relative frame addressing.
992       frame_access_state()->SetFrameAccessToFP();
993       int const num_parameters = MiscField::decode(instr->opcode());
994       __ PrepareCallCFunction(num_parameters);
995       break;
996     }
997     case kArchSaveCallerRegisters: {
998       fp_mode_ =
999           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1000       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
1001       // kReturnRegister0 should have been saved before entering the stub.
1002       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
1003       DCHECK(IsAligned(bytes, kSystemPointerSize));
1004       DCHECK_EQ(0, frame_access_state()->sp_delta());
1005       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1006       DCHECK(!caller_registers_saved_);
1007       caller_registers_saved_ = true;
1008       break;
1009     }
1010     case kArchRestoreCallerRegisters: {
1011       DCHECK(fp_mode_ ==
1012              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
1013       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
1014       // Don't overwrite the returned value.
1015       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
1016       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
1017       DCHECK_EQ(0, frame_access_state()->sp_delta());
1018       DCHECK(caller_registers_saved_);
1019       caller_registers_saved_ = false;
1020       break;
1021     }
1022     case kArchPrepareTailCall:
1023       AssemblePrepareTailCall();
1024       break;
1025     case kArchCallCFunction: {
1026       int const num_parameters = MiscField::decode(instr->opcode());
1027       Label return_location;
1028       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1029         // Put the return address in a stack slot.
1030         __ leaq(kScratchRegister, Operand(&return_location, 0));
1031         __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
1032                 kScratchRegister);
1033       }
1034       if (HasImmediateInput(instr, 0)) {
1035         ExternalReference ref = i.InputExternalReference(0);
1036         __ CallCFunction(ref, num_parameters);
1037       } else {
1038         Register func = i.InputRegister(0);
1039         __ CallCFunction(func, num_parameters);
1040       }
1041       __ bind(&return_location);
1042       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1043         RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
1044       }
1045       frame_access_state()->SetFrameAccessToDefault();
1046       // Ideally, we should decrement SP delta to match the change of stack
1047       // pointer in CallCFunction. However, for certain architectures (e.g.
1048       // ARM), there may be more strict alignment requirement, causing old SP
1049       // to be saved on the stack. In those cases, we can not calculate the SP
1050       // delta statically.
1051       frame_access_state()->ClearSPDelta();
1052       if (caller_registers_saved_) {
1053         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
1054         // Here, we assume the sequence to be:
1055         //   kArchSaveCallerRegisters;
1056         //   kArchCallCFunction;
1057         //   kArchRestoreCallerRegisters;
1058         int bytes =
1059             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
1060         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1061       }
1062       // TODO(tebbi): Do we need an lfence here?
1063       break;
1064     }
1065     case kArchJmp:
1066       AssembleArchJump(i.InputRpo(0));
1067       break;
1068     case kArchBinarySearchSwitch:
1069       AssembleArchBinarySearchSwitch(instr);
1070       break;
1071     case kArchTableSwitch:
1072       AssembleArchTableSwitch(instr);
1073       break;
1074     case kArchComment:
1075       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
1076       break;
1077     case kArchAbortCSAAssert:
1078       DCHECK(i.InputRegister(0) == rdx);
1079       {
1080         // We don't actually want to generate a pile of code for this, so just
1081         // claim there is a stack frame, without generating one.
1082         FrameScope scope(tasm(), StackFrame::NONE);
1083         __ Call(
1084             isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
1085             RelocInfo::CODE_TARGET);
1086       }
1087       __ int3();
1088       unwinding_info_writer_.MarkBlockWillExit();
1089       break;
1090     case kArchDebugBreak:
1091       __ DebugBreak();
1092       break;
1093     case kArchThrowTerminator:
1094       unwinding_info_writer_.MarkBlockWillExit();
1095       break;
1096     case kArchNop:
1097       // don't emit code for nops.
1098       break;
1099     case kArchDeoptimize: {
1100       DeoptimizationExit* exit =
1101           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
1102       __ jmp(exit->label());
1103       break;
1104     }
1105     case kArchRet:
1106       AssembleReturn(instr->InputAt(0));
1107       break;
1108     case kArchFramePointer:
1109       __ movq(i.OutputRegister(), rbp);
1110       break;
1111     case kArchParentFramePointer:
1112       if (frame_access_state()->has_frame()) {
1113         __ movq(i.OutputRegister(), Operand(rbp, 0));
1114       } else {
1115         __ movq(i.OutputRegister(), rbp);
1116       }
1117       break;
1118     case kArchStackPointerGreaterThan: {
1119       // Potentially apply an offset to the current stack pointer before the
1120       // comparison to consider the size difference of an optimized frame versus
1121       // the contained unoptimized frames.
1122 
1123       Register lhs_register = rsp;
1124       uint32_t offset;
1125 
1126       if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
1127         lhs_register = kScratchRegister;
1128         __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1129       }
1130 
1131       constexpr size_t kValueIndex = 0;
1132       if (HasAddressingMode(instr)) {
1133         __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1134       } else {
1135         __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1136       }
1137       break;
1138     }
1139     case kArchStackCheckOffset:
1140       __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1141       break;
1142     case kArchTruncateDoubleToI: {
1143       auto result = i.OutputRegister();
1144       auto input = i.InputDoubleRegister(0);
1145       auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
1146           this, result, input, DetermineStubCallMode(),
1147           &unwinding_info_writer_);
1148       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1149       // use of Cvttsd2siq requires the movl below to avoid sign extension.
1150       __ Cvttsd2siq(result, input);
1151       __ cmpq(result, Immediate(1));
1152       __ j(overflow, ool->entry());
1153       __ bind(ool->exit());
1154       __ movl(result, result);
1155       break;
1156     }
1157     case kArchStoreWithWriteBarrier: {
1158       RecordWriteMode mode =
1159           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1160       Register object = i.InputRegister(0);
1161       size_t index = 0;
1162       Operand operand = i.MemoryOperand(&index);
1163       Register value = i.InputRegister(index);
1164       Register scratch0 = i.TempRegister(0);
1165       Register scratch1 = i.TempRegister(1);
1166       auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
1167                                                    scratch0, scratch1, mode,
1168                                                    DetermineStubCallMode());
1169       __ StoreTaggedField(operand, value);
1170       __ CheckPageFlag(object, scratch0,
1171                        MemoryChunk::kPointersFromHereAreInterestingMask,
1172                        not_zero, ool->entry());
1173       __ bind(ool->exit());
1174       break;
1175     }
1176     case kArchWordPoisonOnSpeculation:
1177       DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1178       __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1179       break;
1180     case kX64MFence:
1181       __ mfence();
1182       break;
1183     case kX64LFence:
1184       __ lfence();
1185       break;
1186     case kArchStackSlot: {
1187       FrameOffset offset =
1188           frame_access_state()->GetFrameOffset(i.InputInt32(0));
1189       Register base = offset.from_stack_pointer() ? rsp : rbp;
1190       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1191       break;
1192     }
1193     case kIeee754Float64Acos:
1194       ASSEMBLE_IEEE754_UNOP(acos);
1195       break;
1196     case kIeee754Float64Acosh:
1197       ASSEMBLE_IEEE754_UNOP(acosh);
1198       break;
1199     case kIeee754Float64Asin:
1200       ASSEMBLE_IEEE754_UNOP(asin);
1201       break;
1202     case kIeee754Float64Asinh:
1203       ASSEMBLE_IEEE754_UNOP(asinh);
1204       break;
1205     case kIeee754Float64Atan:
1206       ASSEMBLE_IEEE754_UNOP(atan);
1207       break;
1208     case kIeee754Float64Atanh:
1209       ASSEMBLE_IEEE754_UNOP(atanh);
1210       break;
1211     case kIeee754Float64Atan2:
1212       ASSEMBLE_IEEE754_BINOP(atan2);
1213       break;
1214     case kIeee754Float64Cbrt:
1215       ASSEMBLE_IEEE754_UNOP(cbrt);
1216       break;
1217     case kIeee754Float64Cos:
1218       ASSEMBLE_IEEE754_UNOP(cos);
1219       break;
1220     case kIeee754Float64Cosh:
1221       ASSEMBLE_IEEE754_UNOP(cosh);
1222       break;
1223     case kIeee754Float64Exp:
1224       ASSEMBLE_IEEE754_UNOP(exp);
1225       break;
1226     case kIeee754Float64Expm1:
1227       ASSEMBLE_IEEE754_UNOP(expm1);
1228       break;
1229     case kIeee754Float64Log:
1230       ASSEMBLE_IEEE754_UNOP(log);
1231       break;
1232     case kIeee754Float64Log1p:
1233       ASSEMBLE_IEEE754_UNOP(log1p);
1234       break;
1235     case kIeee754Float64Log2:
1236       ASSEMBLE_IEEE754_UNOP(log2);
1237       break;
1238     case kIeee754Float64Log10:
1239       ASSEMBLE_IEEE754_UNOP(log10);
1240       break;
1241     case kIeee754Float64Pow:
1242       ASSEMBLE_IEEE754_BINOP(pow);
1243       break;
1244     case kIeee754Float64Sin:
1245       ASSEMBLE_IEEE754_UNOP(sin);
1246       break;
1247     case kIeee754Float64Sinh:
1248       ASSEMBLE_IEEE754_UNOP(sinh);
1249       break;
1250     case kIeee754Float64Tan:
1251       ASSEMBLE_IEEE754_UNOP(tan);
1252       break;
1253     case kIeee754Float64Tanh:
1254       ASSEMBLE_IEEE754_UNOP(tanh);
1255       break;
1256     case kX64Add32:
1257       ASSEMBLE_BINOP(addl);
1258       break;
1259     case kX64Add:
1260       ASSEMBLE_BINOP(addq);
1261       break;
1262     case kX64Sub32:
1263       ASSEMBLE_BINOP(subl);
1264       break;
1265     case kX64Sub:
1266       ASSEMBLE_BINOP(subq);
1267       break;
1268     case kX64And32:
1269       ASSEMBLE_BINOP(andl);
1270       break;
1271     case kX64And:
1272       ASSEMBLE_BINOP(andq);
1273       break;
1274     case kX64Cmp8:
1275       ASSEMBLE_COMPARE(cmpb);
1276       break;
1277     case kX64Cmp16:
1278       ASSEMBLE_COMPARE(cmpw);
1279       break;
1280     case kX64Cmp32:
1281       ASSEMBLE_COMPARE(cmpl);
1282       break;
1283     case kX64Cmp:
1284       ASSEMBLE_COMPARE(cmpq);
1285       break;
1286     case kX64Test8:
1287       ASSEMBLE_COMPARE(testb);
1288       break;
1289     case kX64Test16:
1290       ASSEMBLE_COMPARE(testw);
1291       break;
1292     case kX64Test32:
1293       ASSEMBLE_COMPARE(testl);
1294       break;
1295     case kX64Test:
1296       ASSEMBLE_COMPARE(testq);
1297       break;
1298     case kX64Imul32:
1299       ASSEMBLE_MULT(imull);
1300       break;
1301     case kX64Imul:
1302       ASSEMBLE_MULT(imulq);
1303       break;
1304     case kX64ImulHigh32:
1305       if (HasRegisterInput(instr, 1)) {
1306         __ imull(i.InputRegister(1));
1307       } else {
1308         __ imull(i.InputOperand(1));
1309       }
1310       break;
1311     case kX64UmulHigh32:
1312       if (HasRegisterInput(instr, 1)) {
1313         __ mull(i.InputRegister(1));
1314       } else {
1315         __ mull(i.InputOperand(1));
1316       }
1317       break;
1318     case kX64Idiv32:
1319       __ cdq();
1320       __ idivl(i.InputRegister(1));
1321       break;
1322     case kX64Idiv:
1323       __ cqo();
1324       __ idivq(i.InputRegister(1));
1325       break;
1326     case kX64Udiv32:
1327       __ xorl(rdx, rdx);
1328       __ divl(i.InputRegister(1));
1329       break;
1330     case kX64Udiv:
1331       __ xorq(rdx, rdx);
1332       __ divq(i.InputRegister(1));
1333       break;
1334     case kX64Not:
1335       ASSEMBLE_UNOP(notq);
1336       break;
1337     case kX64Not32:
1338       ASSEMBLE_UNOP(notl);
1339       break;
1340     case kX64Neg:
1341       ASSEMBLE_UNOP(negq);
1342       break;
1343     case kX64Neg32:
1344       ASSEMBLE_UNOP(negl);
1345       break;
1346     case kX64Or32:
1347       ASSEMBLE_BINOP(orl);
1348       break;
1349     case kX64Or:
1350       ASSEMBLE_BINOP(orq);
1351       break;
1352     case kX64Xor32:
1353       ASSEMBLE_BINOP(xorl);
1354       break;
1355     case kX64Xor:
1356       ASSEMBLE_BINOP(xorq);
1357       break;
1358     case kX64Shl32:
1359       ASSEMBLE_SHIFT(shll, 5);
1360       break;
1361     case kX64Shl:
1362       ASSEMBLE_SHIFT(shlq, 6);
1363       break;
1364     case kX64Shr32:
1365       ASSEMBLE_SHIFT(shrl, 5);
1366       break;
1367     case kX64Shr:
1368       ASSEMBLE_SHIFT(shrq, 6);
1369       break;
1370     case kX64Sar32:
1371       ASSEMBLE_SHIFT(sarl, 5);
1372       break;
1373     case kX64Sar:
1374       ASSEMBLE_SHIFT(sarq, 6);
1375       break;
1376     case kX64Rol32:
1377       ASSEMBLE_SHIFT(roll, 5);
1378       break;
1379     case kX64Rol:
1380       ASSEMBLE_SHIFT(rolq, 6);
1381       break;
1382     case kX64Ror32:
1383       ASSEMBLE_SHIFT(rorl, 5);
1384       break;
1385     case kX64Ror:
1386       ASSEMBLE_SHIFT(rorq, 6);
1387       break;
1388     case kX64Lzcnt:
1389       if (HasRegisterInput(instr, 0)) {
1390         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1391       } else {
1392         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1393       }
1394       break;
1395     case kX64Lzcnt32:
1396       if (HasRegisterInput(instr, 0)) {
1397         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1398       } else {
1399         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1400       }
1401       break;
1402     case kX64Tzcnt:
1403       if (HasRegisterInput(instr, 0)) {
1404         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1405       } else {
1406         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1407       }
1408       break;
1409     case kX64Tzcnt32:
1410       if (HasRegisterInput(instr, 0)) {
1411         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1412       } else {
1413         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1414       }
1415       break;
1416     case kX64Popcnt:
1417       if (HasRegisterInput(instr, 0)) {
1418         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1419       } else {
1420         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1421       }
1422       break;
1423     case kX64Popcnt32:
1424       if (HasRegisterInput(instr, 0)) {
1425         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1426       } else {
1427         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1428       }
1429       break;
1430     case kX64Bswap:
1431       __ bswapq(i.OutputRegister());
1432       break;
1433     case kX64Bswap32:
1434       __ bswapl(i.OutputRegister());
1435       break;
1436     case kSSEFloat32Cmp:
1437       ASSEMBLE_SSE_BINOP(Ucomiss);
1438       break;
1439     case kSSEFloat32Add:
1440       ASSEMBLE_SSE_BINOP(addss);
1441       break;
1442     case kSSEFloat32Sub:
1443       ASSEMBLE_SSE_BINOP(subss);
1444       break;
1445     case kSSEFloat32Mul:
1446       ASSEMBLE_SSE_BINOP(mulss);
1447       break;
1448     case kSSEFloat32Div:
1449       ASSEMBLE_SSE_BINOP(divss);
1450       // Don't delete this mov. It may improve performance on some CPUs,
1451       // when there is a (v)mulss depending on the result.
1452       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1453       break;
1454     case kSSEFloat32Abs: {
1455       // TODO(bmeurer): Use RIP relative 128-bit constants.
1456       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1457       __ Pcmpeqd(tmp, tmp);
1458       __ Psrlq(tmp, 33);
1459       __ Andps(i.OutputDoubleRegister(), tmp);
1460       break;
1461     }
1462     case kSSEFloat32Neg: {
1463       // TODO(bmeurer): Use RIP relative 128-bit constants.
1464       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1465       __ Pcmpeqd(tmp, tmp);
1466       __ Psllq(tmp, 31);
1467       __ Xorps(i.OutputDoubleRegister(), tmp);
1468       break;
1469     }
1470     case kSSEFloat32Sqrt:
1471       ASSEMBLE_SSE_UNOP(sqrtss);
1472       break;
1473     case kSSEFloat32ToFloat64:
1474       ASSEMBLE_SSE_UNOP(Cvtss2sd);
1475       break;
1476     case kSSEFloat32Round: {
1477       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1478       RoundingMode const mode =
1479           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1480       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1481       break;
1482     }
1483     case kSSEFloat32ToInt32:
1484       if (instr->InputAt(0)->IsFPRegister()) {
1485         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1486       } else {
1487         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1488       }
1489       break;
1490     case kSSEFloat32ToUint32: {
1491       if (instr->InputAt(0)->IsFPRegister()) {
1492         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1493       } else {
1494         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1495       }
1496       break;
1497     }
1498     case kSSEFloat64Cmp:
1499       ASSEMBLE_SSE_BINOP(Ucomisd);
1500       break;
1501     case kSSEFloat64Add:
1502       ASSEMBLE_SSE_BINOP(addsd);
1503       break;
1504     case kSSEFloat64Sub:
1505       ASSEMBLE_SSE_BINOP(subsd);
1506       break;
1507     case kSSEFloat64Mul:
1508       ASSEMBLE_SSE_BINOP(mulsd);
1509       break;
1510     case kSSEFloat64Div:
1511       ASSEMBLE_SSE_BINOP(divsd);
1512       // Don't delete this mov. It may improve performance on some CPUs,
1513       // when there is a (v)mulsd depending on the result.
1514       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1515       break;
1516     case kSSEFloat64Mod: {
1517       __ AllocateStackSpace(kDoubleSize);
1518       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1519                                                        kDoubleSize);
1520       // Move values to st(0) and st(1).
1521       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1522       __ fld_d(Operand(rsp, 0));
1523       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1524       __ fld_d(Operand(rsp, 0));
1525       // Loop while fprem isn't done.
1526       Label mod_loop;
1527       __ bind(&mod_loop);
1528       // This instructions traps on all kinds inputs, but we are assuming the
1529       // floating point control word is set to ignore them all.
1530       __ fprem();
1531       // The following 2 instruction implicitly use rax.
1532       __ fnstsw_ax();
1533       if (CpuFeatures::IsSupported(SAHF)) {
1534         CpuFeatureScope sahf_scope(tasm(), SAHF);
1535         __ sahf();
1536       } else {
1537         __ shrl(rax, Immediate(8));
1538         __ andl(rax, Immediate(0xFF));
1539         __ pushq(rax);
1540         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1541                                                          kSystemPointerSize);
1542         __ popfq();
1543         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1544                                                          -kSystemPointerSize);
1545       }
1546       __ j(parity_even, &mod_loop);
1547       // Move output to stack and clean up.
1548       __ fstp(1);
1549       __ fstp_d(Operand(rsp, 0));
1550       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1551       __ addq(rsp, Immediate(kDoubleSize));
1552       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1553                                                        -kDoubleSize);
1554       break;
1555     }
1556     case kSSEFloat32Max: {
1557       Label compare_swap, done_compare;
1558       if (instr->InputAt(1)->IsFPRegister()) {
1559         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1560       } else {
1561         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1562       }
1563       auto ool =
1564           zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1565       __ j(parity_even, ool->entry());
1566       __ j(above, &done_compare, Label::kNear);
1567       __ j(below, &compare_swap, Label::kNear);
1568       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1569       __ testl(kScratchRegister, Immediate(1));
1570       __ j(zero, &done_compare, Label::kNear);
1571       __ bind(&compare_swap);
1572       if (instr->InputAt(1)->IsFPRegister()) {
1573         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1574       } else {
1575         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1576       }
1577       __ bind(&done_compare);
1578       __ bind(ool->exit());
1579       break;
1580     }
1581     case kSSEFloat32Min: {
1582       Label compare_swap, done_compare;
1583       if (instr->InputAt(1)->IsFPRegister()) {
1584         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1585       } else {
1586         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1587       }
1588       auto ool =
1589           zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1590       __ j(parity_even, ool->entry());
1591       __ j(below, &done_compare, Label::kNear);
1592       __ j(above, &compare_swap, Label::kNear);
1593       if (instr->InputAt(1)->IsFPRegister()) {
1594         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1595       } else {
1596         __ Movss(kScratchDoubleReg, i.InputOperand(1));
1597         __ Movmskps(kScratchRegister, kScratchDoubleReg);
1598       }
1599       __ testl(kScratchRegister, Immediate(1));
1600       __ j(zero, &done_compare, Label::kNear);
1601       __ bind(&compare_swap);
1602       if (instr->InputAt(1)->IsFPRegister()) {
1603         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1604       } else {
1605         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1606       }
1607       __ bind(&done_compare);
1608       __ bind(ool->exit());
1609       break;
1610     }
1611     case kSSEFloat64Max: {
1612       Label compare_swap, done_compare;
1613       if (instr->InputAt(1)->IsFPRegister()) {
1614         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1615       } else {
1616         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1617       }
1618       auto ool =
1619           zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1620       __ j(parity_even, ool->entry());
1621       __ j(above, &done_compare, Label::kNear);
1622       __ j(below, &compare_swap, Label::kNear);
1623       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1624       __ testl(kScratchRegister, Immediate(1));
1625       __ j(zero, &done_compare, Label::kNear);
1626       __ bind(&compare_swap);
1627       if (instr->InputAt(1)->IsFPRegister()) {
1628         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1629       } else {
1630         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1631       }
1632       __ bind(&done_compare);
1633       __ bind(ool->exit());
1634       break;
1635     }
1636     case kSSEFloat64Min: {
1637       Label compare_swap, done_compare;
1638       if (instr->InputAt(1)->IsFPRegister()) {
1639         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1640       } else {
1641         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1642       }
1643       auto ool =
1644           zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1645       __ j(parity_even, ool->entry());
1646       __ j(below, &done_compare, Label::kNear);
1647       __ j(above, &compare_swap, Label::kNear);
1648       if (instr->InputAt(1)->IsFPRegister()) {
1649         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1650       } else {
1651         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1652         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1653       }
1654       __ testl(kScratchRegister, Immediate(1));
1655       __ j(zero, &done_compare, Label::kNear);
1656       __ bind(&compare_swap);
1657       if (instr->InputAt(1)->IsFPRegister()) {
1658         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1659       } else {
1660         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1661       }
1662       __ bind(&done_compare);
1663       __ bind(ool->exit());
1664       break;
1665     }
1666     case kX64F64x2Abs:
1667     case kSSEFloat64Abs: {
1668       // TODO(bmeurer): Use RIP relative 128-bit constants.
1669       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1670       __ Pcmpeqd(tmp, tmp);
1671       __ Psrlq(tmp, 1);
1672       __ Andpd(i.OutputDoubleRegister(), tmp);
1673       break;
1674     }
1675     case kX64F64x2Neg:
1676     case kSSEFloat64Neg: {
1677       // TODO(bmeurer): Use RIP relative 128-bit constants.
1678       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1679       __ Pcmpeqd(tmp, tmp);
1680       __ Psllq(tmp, 63);
1681       __ Xorpd(i.OutputDoubleRegister(), tmp);
1682       break;
1683     }
1684     case kSSEFloat64Sqrt:
1685       ASSEMBLE_SSE_UNOP(Sqrtsd);
1686       break;
1687     case kSSEFloat64Round: {
1688       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1689       RoundingMode const mode =
1690           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1691       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1692       break;
1693     }
1694     case kSSEFloat64ToFloat32:
1695       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1696       break;
1697     case kSSEFloat64ToInt32:
1698       if (instr->InputAt(0)->IsFPRegister()) {
1699         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1700       } else {
1701         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1702       }
1703       break;
1704     case kSSEFloat64ToUint32: {
1705       if (instr->InputAt(0)->IsFPRegister()) {
1706         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1707       } else {
1708         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1709       }
1710       if (MiscField::decode(instr->opcode())) {
1711         __ AssertZeroExtended(i.OutputRegister());
1712       }
1713       break;
1714     }
1715     case kSSEFloat32ToInt64:
1716       if (instr->InputAt(0)->IsFPRegister()) {
1717         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1718       } else {
1719         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1720       }
1721       if (instr->OutputCount() > 1) {
1722         __ Set(i.OutputRegister(1), 1);
1723         Label done;
1724         Label fail;
1725         __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1726         if (instr->InputAt(0)->IsFPRegister()) {
1727           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1728         } else {
1729           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1730         }
1731         // If the input is NaN, then the conversion fails.
1732         __ j(parity_even, &fail, Label::kNear);
1733         // If the input is INT64_MIN, then the conversion succeeds.
1734         __ j(equal, &done, Label::kNear);
1735         __ cmpq(i.OutputRegister(0), Immediate(1));
1736         // If the conversion results in INT64_MIN, but the input was not
1737         // INT64_MIN, then the conversion fails.
1738         __ j(no_overflow, &done, Label::kNear);
1739         __ bind(&fail);
1740         __ Set(i.OutputRegister(1), 0);
1741         __ bind(&done);
1742       }
1743       break;
1744     case kSSEFloat64ToInt64:
1745       if (instr->InputAt(0)->IsFPRegister()) {
1746         __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1747       } else {
1748         __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1749       }
1750       if (instr->OutputCount() > 1) {
1751         __ Set(i.OutputRegister(1), 1);
1752         Label done;
1753         Label fail;
1754         __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1755         if (instr->InputAt(0)->IsFPRegister()) {
1756           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1757         } else {
1758           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1759         }
1760         // If the input is NaN, then the conversion fails.
1761         __ j(parity_even, &fail, Label::kNear);
1762         // If the input is INT64_MIN, then the conversion succeeds.
1763         __ j(equal, &done, Label::kNear);
1764         __ cmpq(i.OutputRegister(0), Immediate(1));
1765         // If the conversion results in INT64_MIN, but the input was not
1766         // INT64_MIN, then the conversion fails.
1767         __ j(no_overflow, &done, Label::kNear);
1768         __ bind(&fail);
1769         __ Set(i.OutputRegister(1), 0);
1770         __ bind(&done);
1771       }
1772       break;
1773     case kSSEFloat32ToUint64: {
1774       Label fail;
1775       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1776       if (instr->InputAt(0)->IsFPRegister()) {
1777         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1778       } else {
1779         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1780       }
1781       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1782       __ bind(&fail);
1783       break;
1784     }
1785     case kSSEFloat64ToUint64: {
1786       Label fail;
1787       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1788       if (instr->InputAt(0)->IsFPRegister()) {
1789         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1790       } else {
1791         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1792       }
1793       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1794       __ bind(&fail);
1795       break;
1796     }
1797     case kSSEInt32ToFloat64:
1798       if (HasRegisterInput(instr, 0)) {
1799         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1800       } else {
1801         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1802       }
1803       break;
1804     case kSSEInt32ToFloat32:
1805       if (HasRegisterInput(instr, 0)) {
1806         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1807       } else {
1808         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1809       }
1810       break;
1811     case kSSEInt64ToFloat32:
1812       if (HasRegisterInput(instr, 0)) {
1813         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1814       } else {
1815         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1816       }
1817       break;
1818     case kSSEInt64ToFloat64:
1819       if (HasRegisterInput(instr, 0)) {
1820         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1821       } else {
1822         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1823       }
1824       break;
1825     case kSSEUint64ToFloat32:
1826       if (HasRegisterInput(instr, 0)) {
1827         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1828       } else {
1829         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1830       }
1831       break;
1832     case kSSEUint64ToFloat64:
1833       if (HasRegisterInput(instr, 0)) {
1834         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1835       } else {
1836         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1837       }
1838       break;
1839     case kSSEUint32ToFloat64:
1840       if (HasRegisterInput(instr, 0)) {
1841         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1842       } else {
1843         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1844       }
1845       break;
1846     case kSSEUint32ToFloat32:
1847       if (HasRegisterInput(instr, 0)) {
1848         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1849       } else {
1850         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1851       }
1852       break;
1853     case kSSEFloat64ExtractLowWord32:
1854       if (instr->InputAt(0)->IsFPStackSlot()) {
1855         __ movl(i.OutputRegister(), i.InputOperand(0));
1856       } else {
1857         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1858       }
1859       break;
1860     case kSSEFloat64ExtractHighWord32:
1861       if (instr->InputAt(0)->IsFPStackSlot()) {
1862         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1863       } else {
1864         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1865       }
1866       break;
1867     case kSSEFloat64InsertLowWord32:
1868       if (HasRegisterInput(instr, 1)) {
1869         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1870       } else {
1871         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1872       }
1873       break;
1874     case kSSEFloat64InsertHighWord32:
1875       if (HasRegisterInput(instr, 1)) {
1876         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1877       } else {
1878         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1879       }
1880       break;
1881     case kSSEFloat64LoadLowWord32:
1882       if (HasRegisterInput(instr, 0)) {
1883         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1884       } else {
1885         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1886       }
1887       break;
1888     case kAVXFloat32Cmp: {
1889       CpuFeatureScope avx_scope(tasm(), AVX);
1890       if (instr->InputAt(1)->IsFPRegister()) {
1891         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1892       } else {
1893         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1894       }
1895       break;
1896     }
1897     case kAVXFloat32Add:
1898       ASSEMBLE_AVX_BINOP(vaddss);
1899       break;
1900     case kAVXFloat32Sub:
1901       ASSEMBLE_AVX_BINOP(vsubss);
1902       break;
1903     case kAVXFloat32Mul:
1904       ASSEMBLE_AVX_BINOP(vmulss);
1905       break;
1906     case kAVXFloat32Div:
1907       ASSEMBLE_AVX_BINOP(vdivss);
1908       // Don't delete this mov. It may improve performance on some CPUs,
1909       // when there is a (v)mulss depending on the result.
1910       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1911       break;
1912     case kAVXFloat64Cmp: {
1913       CpuFeatureScope avx_scope(tasm(), AVX);
1914       if (instr->InputAt(1)->IsFPRegister()) {
1915         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1916       } else {
1917         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1918       }
1919       break;
1920     }
1921     case kAVXFloat64Add:
1922       ASSEMBLE_AVX_BINOP(vaddsd);
1923       break;
1924     case kAVXFloat64Sub:
1925       ASSEMBLE_AVX_BINOP(vsubsd);
1926       break;
1927     case kAVXFloat64Mul:
1928       ASSEMBLE_AVX_BINOP(vmulsd);
1929       break;
1930     case kAVXFloat64Div:
1931       ASSEMBLE_AVX_BINOP(vdivsd);
1932       // Don't delete this mov. It may improve performance on some CPUs,
1933       // when there is a (v)mulsd depending on the result.
1934       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1935       break;
1936     case kAVXFloat32Abs: {
1937       // TODO(bmeurer): Use RIP relative 128-bit constants.
1938       CpuFeatureScope avx_scope(tasm(), AVX);
1939       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1940       __ vpcmpeqd(tmp, tmp, tmp);
1941       __ vpsrlq(tmp, tmp, 33);
1942       if (instr->InputAt(0)->IsFPRegister()) {
1943         __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1944       } else {
1945         __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1946       }
1947       break;
1948     }
1949     case kAVXFloat32Neg: {
1950       // TODO(bmeurer): Use RIP relative 128-bit constants.
1951       CpuFeatureScope avx_scope(tasm(), AVX);
1952       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1953       __ vpcmpeqd(tmp, tmp, tmp);
1954       __ vpsllq(tmp, tmp, 31);
1955       if (instr->InputAt(0)->IsFPRegister()) {
1956         __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1957       } else {
1958         __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1959       }
1960       break;
1961     }
1962     case kAVXFloat64Abs: {
1963       // TODO(bmeurer): Use RIP relative 128-bit constants.
1964       CpuFeatureScope avx_scope(tasm(), AVX);
1965       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1966       __ vpcmpeqd(tmp, tmp, tmp);
1967       __ vpsrlq(tmp, tmp, 1);
1968       if (instr->InputAt(0)->IsFPRegister()) {
1969         __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1970       } else {
1971         __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1972       }
1973       break;
1974     }
1975     case kAVXFloat64Neg: {
1976       // TODO(bmeurer): Use RIP relative 128-bit constants.
1977       CpuFeatureScope avx_scope(tasm(), AVX);
1978       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1979       __ vpcmpeqd(tmp, tmp, tmp);
1980       __ vpsllq(tmp, tmp, 63);
1981       if (instr->InputAt(0)->IsFPRegister()) {
1982         __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1983       } else {
1984         __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1985       }
1986       break;
1987     }
1988     case kSSEFloat64SilenceNaN:
1989       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1990       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1991       break;
1992     case kX64Movsxbl:
1993       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1994       ASSEMBLE_MOVX(movsxbl);
1995       __ AssertZeroExtended(i.OutputRegister());
1996       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1997       break;
1998     case kX64Movzxbl:
1999       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2000       ASSEMBLE_MOVX(movzxbl);
2001       __ AssertZeroExtended(i.OutputRegister());
2002       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2003       break;
2004     case kX64Movsxbq:
2005       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2006       ASSEMBLE_MOVX(movsxbq);
2007       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2008       break;
2009     case kX64Movzxbq:
2010       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2011       ASSEMBLE_MOVX(movzxbq);
2012       __ AssertZeroExtended(i.OutputRegister());
2013       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2014       break;
2015     case kX64Movb: {
2016       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2017       size_t index = 0;
2018       Operand operand = i.MemoryOperand(&index);
2019       if (HasImmediateInput(instr, index)) {
2020         __ movb(operand, Immediate(i.InputInt8(index)));
2021       } else {
2022         __ movb(operand, i.InputRegister(index));
2023       }
2024       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2025       break;
2026     }
2027     case kX64Movsxwl:
2028       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2029       ASSEMBLE_MOVX(movsxwl);
2030       __ AssertZeroExtended(i.OutputRegister());
2031       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2032       break;
2033     case kX64Movzxwl:
2034       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2035       ASSEMBLE_MOVX(movzxwl);
2036       __ AssertZeroExtended(i.OutputRegister());
2037       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2038       break;
2039     case kX64Movsxwq:
2040       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2041       ASSEMBLE_MOVX(movsxwq);
2042       break;
2043     case kX64Movzxwq:
2044       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2045       ASSEMBLE_MOVX(movzxwq);
2046       __ AssertZeroExtended(i.OutputRegister());
2047       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2048       break;
2049     case kX64Movw: {
2050       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2051       size_t index = 0;
2052       Operand operand = i.MemoryOperand(&index);
2053       if (HasImmediateInput(instr, index)) {
2054         __ movw(operand, Immediate(i.InputInt16(index)));
2055       } else {
2056         __ movw(operand, i.InputRegister(index));
2057       }
2058       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2059       break;
2060     }
2061     case kX64Movl:
2062       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2063       if (instr->HasOutput()) {
2064         if (HasAddressingMode(instr)) {
2065           __ movl(i.OutputRegister(), i.MemoryOperand());
2066         } else {
2067           if (HasRegisterInput(instr, 0)) {
2068             __ movl(i.OutputRegister(), i.InputRegister(0));
2069           } else {
2070             __ movl(i.OutputRegister(), i.InputOperand(0));
2071           }
2072         }
2073         __ AssertZeroExtended(i.OutputRegister());
2074       } else {
2075         size_t index = 0;
2076         Operand operand = i.MemoryOperand(&index);
2077         if (HasImmediateInput(instr, index)) {
2078           __ movl(operand, i.InputImmediate(index));
2079         } else {
2080           __ movl(operand, i.InputRegister(index));
2081         }
2082       }
2083       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2084       break;
2085     case kX64Movsxlq:
2086       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2087       ASSEMBLE_MOVX(movsxlq);
2088       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2089       break;
2090     case kX64MovqDecompressTaggedSigned: {
2091       CHECK(instr->HasOutput());
2092       __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
2093       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2094       break;
2095     }
2096     case kX64MovqDecompressTaggedPointer: {
2097       CHECK(instr->HasOutput());
2098       __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
2099       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2100       break;
2101     }
2102     case kX64MovqDecompressAnyTagged: {
2103       CHECK(instr->HasOutput());
2104       __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
2105       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2106       break;
2107     }
2108     case kX64MovqCompressTagged: {
2109       CHECK(!instr->HasOutput());
2110       size_t index = 0;
2111       Operand operand = i.MemoryOperand(&index);
2112       if (HasImmediateInput(instr, index)) {
2113         __ StoreTaggedField(operand, i.InputImmediate(index));
2114       } else {
2115         __ StoreTaggedField(operand, i.InputRegister(index));
2116       }
2117       break;
2118     }
2119     case kX64Movq:
2120       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2121       if (instr->HasOutput()) {
2122         __ movq(i.OutputRegister(), i.MemoryOperand());
2123       } else {
2124         size_t index = 0;
2125         Operand operand = i.MemoryOperand(&index);
2126         if (HasImmediateInput(instr, index)) {
2127           __ movq(operand, i.InputImmediate(index));
2128         } else {
2129           __ movq(operand, i.InputRegister(index));
2130         }
2131       }
2132       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2133       break;
2134     case kX64Movss:
2135       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2136       if (instr->HasOutput()) {
2137         __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
2138       } else {
2139         size_t index = 0;
2140         Operand operand = i.MemoryOperand(&index);
2141         __ Movss(operand, i.InputDoubleRegister(index));
2142       }
2143       break;
2144     case kX64Movsd: {
2145       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2146       if (instr->HasOutput()) {
2147         const MemoryAccessMode access_mode =
2148             static_cast<MemoryAccessMode>(MiscField::decode(opcode));
2149         if (access_mode == kMemoryAccessPoisoned) {
2150           // If we have to poison the loaded value, we load into a general
2151           // purpose register first, mask it with the poison, and move the
2152           // value from the general purpose register into the double register.
2153           __ movq(kScratchRegister, i.MemoryOperand());
2154           __ andq(kScratchRegister, kSpeculationPoisonRegister);
2155           __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2156         } else {
2157           __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2158         }
2159       } else {
2160         size_t index = 0;
2161         Operand operand = i.MemoryOperand(&index);
2162         __ Movsd(operand, i.InputDoubleRegister(index));
2163       }
2164       break;
2165     }
2166     case kX64Movdqu: {
2167       CpuFeatureScope sse_scope(tasm(), SSSE3);
2168       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2169       if (instr->HasOutput()) {
2170         __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2171       } else {
2172         size_t index = 0;
2173         Operand operand = i.MemoryOperand(&index);
2174         __ Movdqu(operand, i.InputSimd128Register(index));
2175       }
2176       break;
2177     }
2178     case kX64BitcastFI:
2179       if (instr->InputAt(0)->IsFPStackSlot()) {
2180         __ movl(i.OutputRegister(), i.InputOperand(0));
2181       } else {
2182         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2183       }
2184       break;
2185     case kX64BitcastDL:
2186       if (instr->InputAt(0)->IsFPStackSlot()) {
2187         __ movq(i.OutputRegister(), i.InputOperand(0));
2188       } else {
2189         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2190       }
2191       break;
2192     case kX64BitcastIF:
2193       if (HasRegisterInput(instr, 0)) {
2194         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2195       } else {
2196         __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
2197       }
2198       break;
2199     case kX64BitcastLD:
2200       if (HasRegisterInput(instr, 0)) {
2201         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2202       } else {
2203         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2204       }
2205       break;
2206     case kX64Lea32: {
2207       AddressingMode mode = AddressingModeField::decode(instr->opcode());
2208       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2209       // and addressing mode just happens to work out. The "addl"/"subl" forms
2210       // in these cases are faster based on measurements.
2211       if (i.InputRegister(0) == i.OutputRegister()) {
2212         if (mode == kMode_MRI) {
2213           int32_t constant_summand = i.InputInt32(1);
2214           DCHECK_NE(0, constant_summand);
2215           if (constant_summand > 0) {
2216             __ addl(i.OutputRegister(), Immediate(constant_summand));
2217           } else {
2218             __ subl(i.OutputRegister(),
2219                     Immediate(base::NegateWithWraparound(constant_summand)));
2220           }
2221         } else if (mode == kMode_MR1) {
2222           if (i.InputRegister(1) == i.OutputRegister()) {
2223             __ shll(i.OutputRegister(), Immediate(1));
2224           } else {
2225             __ addl(i.OutputRegister(), i.InputRegister(1));
2226           }
2227         } else if (mode == kMode_M2) {
2228           __ shll(i.OutputRegister(), Immediate(1));
2229         } else if (mode == kMode_M4) {
2230           __ shll(i.OutputRegister(), Immediate(2));
2231         } else if (mode == kMode_M8) {
2232           __ shll(i.OutputRegister(), Immediate(3));
2233         } else {
2234           __ leal(i.OutputRegister(), i.MemoryOperand());
2235         }
2236       } else if (mode == kMode_MR1 &&
2237                  i.InputRegister(1) == i.OutputRegister()) {
2238         __ addl(i.OutputRegister(), i.InputRegister(0));
2239       } else {
2240         __ leal(i.OutputRegister(), i.MemoryOperand());
2241       }
2242       __ AssertZeroExtended(i.OutputRegister());
2243       break;
2244     }
2245     case kX64Lea: {
2246       AddressingMode mode = AddressingModeField::decode(instr->opcode());
2247       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2248       // and addressing mode just happens to work out. The "addq"/"subq" forms
2249       // in these cases are faster based on measurements.
2250       if (i.InputRegister(0) == i.OutputRegister()) {
2251         if (mode == kMode_MRI) {
2252           int32_t constant_summand = i.InputInt32(1);
2253           if (constant_summand > 0) {
2254             __ addq(i.OutputRegister(), Immediate(constant_summand));
2255           } else if (constant_summand < 0) {
2256             __ subq(i.OutputRegister(), Immediate(-constant_summand));
2257           }
2258         } else if (mode == kMode_MR1) {
2259           if (i.InputRegister(1) == i.OutputRegister()) {
2260             __ shlq(i.OutputRegister(), Immediate(1));
2261           } else {
2262             __ addq(i.OutputRegister(), i.InputRegister(1));
2263           }
2264         } else if (mode == kMode_M2) {
2265           __ shlq(i.OutputRegister(), Immediate(1));
2266         } else if (mode == kMode_M4) {
2267           __ shlq(i.OutputRegister(), Immediate(2));
2268         } else if (mode == kMode_M8) {
2269           __ shlq(i.OutputRegister(), Immediate(3));
2270         } else {
2271           __ leaq(i.OutputRegister(), i.MemoryOperand());
2272         }
2273       } else if (mode == kMode_MR1 &&
2274                  i.InputRegister(1) == i.OutputRegister()) {
2275         __ addq(i.OutputRegister(), i.InputRegister(0));
2276       } else {
2277         __ leaq(i.OutputRegister(), i.MemoryOperand());
2278       }
2279       break;
2280     }
2281     case kX64Dec32:
2282       __ decl(i.OutputRegister());
2283       break;
2284     case kX64Inc32:
2285       __ incl(i.OutputRegister());
2286       break;
2287     case kX64Push:
2288       if (HasAddressingMode(instr)) {
2289         size_t index = 0;
2290         Operand operand = i.MemoryOperand(&index);
2291         __ pushq(operand);
2292         frame_access_state()->IncreaseSPDelta(1);
2293         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2294                                                          kSystemPointerSize);
2295       } else if (HasImmediateInput(instr, 0)) {
2296         __ pushq(i.InputImmediate(0));
2297         frame_access_state()->IncreaseSPDelta(1);
2298         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2299                                                          kSystemPointerSize);
2300       } else if (HasRegisterInput(instr, 0)) {
2301         __ pushq(i.InputRegister(0));
2302         frame_access_state()->IncreaseSPDelta(1);
2303         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2304                                                          kSystemPointerSize);
2305       } else if (instr->InputAt(0)->IsFloatRegister() ||
2306                  instr->InputAt(0)->IsDoubleRegister()) {
2307         // TODO(titzer): use another machine instruction?
2308         __ AllocateStackSpace(kDoubleSize);
2309         frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2310         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2311                                                          kDoubleSize);
2312         __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2313       } else if (instr->InputAt(0)->IsSimd128Register()) {
2314         // TODO(titzer): use another machine instruction?
2315         __ AllocateStackSpace(kSimd128Size);
2316         frame_access_state()->IncreaseSPDelta(kSimd128Size /
2317                                               kSystemPointerSize);
2318         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2319                                                          kSimd128Size);
2320         __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2321       } else if (instr->InputAt(0)->IsStackSlot() ||
2322                  instr->InputAt(0)->IsFloatStackSlot() ||
2323                  instr->InputAt(0)->IsDoubleStackSlot()) {
2324         __ pushq(i.InputOperand(0));
2325         frame_access_state()->IncreaseSPDelta(1);
2326         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2327                                                          kSystemPointerSize);
2328       } else {
2329         DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2330         __ Movups(kScratchDoubleReg, i.InputOperand(0));
2331         // TODO(titzer): use another machine instruction?
2332         __ AllocateStackSpace(kSimd128Size);
2333         frame_access_state()->IncreaseSPDelta(kSimd128Size /
2334                                               kSystemPointerSize);
2335         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2336                                                          kSimd128Size);
2337         __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2338       }
2339       break;
2340     case kX64Poke: {
2341       int slot = MiscField::decode(instr->opcode());
2342       if (HasImmediateInput(instr, 0)) {
2343         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2344       } else if (instr->InputAt(0)->IsFPRegister()) {
2345         LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
2346         if (op->representation() == MachineRepresentation::kFloat64) {
2347           __ Movsd(Operand(rsp, slot * kSystemPointerSize),
2348                    i.InputDoubleRegister(0));
2349         } else {
2350           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2351           __ Movss(Operand(rsp, slot * kSystemPointerSize),
2352                    i.InputFloatRegister(0));
2353         }
2354       } else {
2355         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2356       }
2357       break;
2358     }
2359     case kX64Peek: {
2360       int reverse_slot = i.InputInt32(0);
2361       int offset =
2362           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2363       if (instr->OutputAt(0)->IsFPRegister()) {
2364         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2365         if (op->representation() == MachineRepresentation::kFloat64) {
2366           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2367         } else if (op->representation() == MachineRepresentation::kFloat32) {
2368           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2369         } else {
2370           DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
2371           __ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
2372         }
2373       } else {
2374         __ movq(i.OutputRegister(), Operand(rbp, offset));
2375       }
2376       break;
2377     }
2378     case kX64F64x2Splat: {
2379       XMMRegister dst = i.OutputSimd128Register();
2380       if (instr->InputAt(0)->IsFPRegister()) {
2381         __ Movddup(dst, i.InputDoubleRegister(0));
2382       } else {
2383         __ Movddup(dst, i.InputOperand(0));
2384       }
2385       break;
2386     }
2387     case kX64F64x2ExtractLane: {
2388       __ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2389       __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2390       break;
2391     }
2392     case kX64F64x2Sqrt: {
2393       __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2394       break;
2395     }
2396     case kX64F64x2Add: {
2397       ASSEMBLE_SIMD_BINOP(addpd);
2398       break;
2399     }
2400     case kX64F64x2Sub: {
2401       ASSEMBLE_SIMD_BINOP(subpd);
2402       break;
2403     }
2404     case kX64F64x2Mul: {
2405       ASSEMBLE_SIMD_BINOP(mulpd);
2406       break;
2407     }
2408     case kX64F64x2Div: {
2409       ASSEMBLE_SIMD_BINOP(divpd);
2410       break;
2411     }
2412     case kX64F64x2Min: {
2413       XMMRegister src1 = i.InputSimd128Register(1),
2414                   dst = i.OutputSimd128Register();
2415       DCHECK_EQ(dst, i.InputSimd128Register(0));
2416       // The minpd instruction doesn't propagate NaNs and +0's in its first
2417       // operand. Perform minpd in both orders, merge the resuls, and adjust.
2418       __ Movapd(kScratchDoubleReg, src1);
2419       __ Minpd(kScratchDoubleReg, dst);
2420       __ Minpd(dst, src1);
2421       // propagate -0's and NaNs, which may be non-canonical.
2422       __ Orpd(kScratchDoubleReg, dst);
2423       // Canonicalize NaNs by quieting and clearing the payload.
2424       __ Cmppd(dst, kScratchDoubleReg, int8_t{3});
2425       __ Orpd(kScratchDoubleReg, dst);
2426       __ Psrlq(dst, 13);
2427       __ Andnpd(dst, kScratchDoubleReg);
2428       break;
2429     }
2430     case kX64F64x2Max: {
2431       XMMRegister src1 = i.InputSimd128Register(1),
2432                   dst = i.OutputSimd128Register();
2433       DCHECK_EQ(dst, i.InputSimd128Register(0));
2434       // The maxpd instruction doesn't propagate NaNs and +0's in its first
2435       // operand. Perform maxpd in both orders, merge the resuls, and adjust.
2436       __ Movapd(kScratchDoubleReg, src1);
2437       __ Maxpd(kScratchDoubleReg, dst);
2438       __ Maxpd(dst, src1);
2439       // Find discrepancies.
2440       __ Xorpd(dst, kScratchDoubleReg);
2441       // Propagate NaNs, which may be non-canonical.
2442       __ Orpd(kScratchDoubleReg, dst);
2443       // Propagate sign discrepancy and (subtle) quiet NaNs.
2444       __ Subpd(kScratchDoubleReg, dst);
2445       // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2446       __ Cmppd(dst, kScratchDoubleReg, int8_t{3});
2447       __ Psrlq(dst, 13);
2448       __ Andnpd(dst, kScratchDoubleReg);
2449       break;
2450     }
2451     case kX64F64x2Eq: {
2452       ASSEMBLE_SIMD_BINOP(cmpeqpd);
2453       break;
2454     }
2455     case kX64F64x2Ne: {
2456       ASSEMBLE_SIMD_BINOP(cmpneqpd);
2457       break;
2458     }
2459     case kX64F64x2Lt: {
2460       ASSEMBLE_SIMD_BINOP(cmpltpd);
2461       break;
2462     }
2463     case kX64F64x2Le: {
2464       ASSEMBLE_SIMD_BINOP(cmplepd);
2465       break;
2466     }
2467     case kX64F64x2Qfma: {
2468       if (CpuFeatures::IsSupported(FMA3)) {
2469         CpuFeatureScope fma3_scope(tasm(), FMA3);
2470         __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
2471                        i.InputSimd128Register(2));
2472       } else {
2473         XMMRegister tmp = i.TempSimd128Register(0);
2474         __ Movapd(tmp, i.InputSimd128Register(2));
2475         __ Mulpd(tmp, i.InputSimd128Register(1));
2476         __ Addpd(i.OutputSimd128Register(), tmp);
2477       }
2478       break;
2479     }
2480     case kX64F64x2Qfms: {
2481       if (CpuFeatures::IsSupported(FMA3)) {
2482         CpuFeatureScope fma3_scope(tasm(), FMA3);
2483         __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
2484                         i.InputSimd128Register(2));
2485       } else {
2486         XMMRegister tmp = i.TempSimd128Register(0);
2487         __ Movapd(tmp, i.InputSimd128Register(2));
2488         __ Mulpd(tmp, i.InputSimd128Register(1));
2489         __ Subpd(i.OutputSimd128Register(), tmp);
2490       }
2491       break;
2492     }
2493     case kX64F32x4Splat: {
2494       __ Shufps(i.OutputSimd128Register(), i.InputDoubleRegister(0), 0);
2495       break;
2496     }
2497     case kX64F32x4ExtractLane: {
2498       if (CpuFeatures::IsSupported(AVX)) {
2499         CpuFeatureScope avx_scope(tasm(), AVX);
2500         XMMRegister src = i.InputSimd128Register(0);
2501         // vshufps and leave junk in the 3 high lanes.
2502         __ vshufps(i.OutputDoubleRegister(), src, src, i.InputInt8(1));
2503       } else {
2504         __ extractps(kScratchRegister, i.InputSimd128Register(0),
2505                      i.InputUint8(1));
2506         __ movd(i.OutputDoubleRegister(), kScratchRegister);
2507       }
2508       break;
2509     }
2510     case kX64F32x4ReplaceLane: {
2511       // The insertps instruction uses imm8[5:4] to indicate the lane
2512       // that needs to be replaced.
2513       byte select = i.InputInt8(1) << 4 & 0x30;
2514       if (instr->InputAt(2)->IsFPRegister()) {
2515         __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2516                     select);
2517       } else {
2518         __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2519       }
2520       break;
2521     }
2522     case kX64F32x4SConvertI32x4: {
2523       __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2524       break;
2525     }
2526     case kX64F32x4UConvertI32x4: {
2527       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2528       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2529       XMMRegister dst = i.OutputSimd128Register();
2530       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);  // zeros
2531       __ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55});  // get lo 16 bits
2532       __ Psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
2533       __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
2534       __ Psrld(dst, byte{1});            // divide by 2 to get in unsigned range
2535       __ Cvtdq2ps(dst, dst);             // convert hi exactly
2536       __ Addps(dst, dst);                // double hi, exactly
2537       __ Addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
2538       break;
2539     }
2540     case kX64F32x4Abs: {
2541       XMMRegister dst = i.OutputSimd128Register();
2542       XMMRegister src = i.InputSimd128Register(0);
2543       if (dst == src) {
2544         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2545         __ Psrld(kScratchDoubleReg, byte{1});
2546         __ Andps(i.OutputSimd128Register(), kScratchDoubleReg);
2547       } else {
2548         __ Pcmpeqd(dst, dst);
2549         __ Psrld(dst, byte{1});
2550         __ Andps(dst, i.InputSimd128Register(0));
2551       }
2552       break;
2553     }
2554     case kX64F32x4Neg: {
2555       XMMRegister dst = i.OutputSimd128Register();
2556       XMMRegister src = i.InputSimd128Register(0);
2557       if (dst == src) {
2558         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2559         __ Pslld(kScratchDoubleReg, byte{31});
2560         __ Xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2561       } else {
2562         __ Pcmpeqd(dst, dst);
2563         __ Pslld(dst, byte{31});
2564         __ Xorps(dst, i.InputSimd128Register(0));
2565       }
2566       break;
2567     }
2568     case kX64F32x4Sqrt: {
2569       __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2570       break;
2571     }
2572     case kX64F32x4RecipApprox: {
2573       __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2574       break;
2575     }
2576     case kX64F32x4RecipSqrtApprox: {
2577       __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2578       break;
2579     }
2580     case kX64F32x4Add: {
2581       ASSEMBLE_SIMD_BINOP(addps);
2582       break;
2583     }
2584     case kX64F32x4AddHoriz: {
2585       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2586       __ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2587       break;
2588     }
2589     case kX64F32x4Sub: {
2590       ASSEMBLE_SIMD_BINOP(subps);
2591       break;
2592     }
2593     case kX64F32x4Mul: {
2594       ASSEMBLE_SIMD_BINOP(mulps);
2595       break;
2596     }
2597     case kX64F32x4Div: {
2598       ASSEMBLE_SIMD_BINOP(divps);
2599       break;
2600     }
2601     case kX64F32x4Min: {
2602       XMMRegister src1 = i.InputSimd128Register(1),
2603                   dst = i.OutputSimd128Register();
2604       DCHECK_EQ(dst, i.InputSimd128Register(0));
2605       // The minps instruction doesn't propagate NaNs and +0's in its first
2606       // operand. Perform minps in both orders, merge the resuls, and adjust.
2607       __ Movaps(kScratchDoubleReg, src1);
2608       __ Minps(kScratchDoubleReg, dst);
2609       __ Minps(dst, src1);
2610       // propagate -0's and NaNs, which may be non-canonical.
2611       __ Orps(kScratchDoubleReg, dst);
2612       // Canonicalize NaNs by quieting and clearing the payload.
2613       __ Cmpps(dst, kScratchDoubleReg, int8_t{3});
2614       __ Orps(kScratchDoubleReg, dst);
2615       __ Psrld(dst, byte{10});
2616       __ Andnps(dst, kScratchDoubleReg);
2617       break;
2618     }
2619     case kX64F32x4Max: {
2620       XMMRegister src1 = i.InputSimd128Register(1),
2621                   dst = i.OutputSimd128Register();
2622       DCHECK_EQ(dst, i.InputSimd128Register(0));
2623       // The maxps instruction doesn't propagate NaNs and +0's in its first
2624       // operand. Perform maxps in both orders, merge the resuls, and adjust.
2625       __ Movaps(kScratchDoubleReg, src1);
2626       __ Maxps(kScratchDoubleReg, dst);
2627       __ Maxps(dst, src1);
2628       // Find discrepancies.
2629       __ Xorps(dst, kScratchDoubleReg);
2630       // Propagate NaNs, which may be non-canonical.
2631       __ Orps(kScratchDoubleReg, dst);
2632       // Propagate sign discrepancy and (subtle) quiet NaNs.
2633       __ Subps(kScratchDoubleReg, dst);
2634       // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2635       __ Cmpps(dst, kScratchDoubleReg, int8_t{3});
2636       __ Psrld(dst, byte{10});
2637       __ Andnps(dst, kScratchDoubleReg);
2638       break;
2639     }
2640     case kX64F32x4Eq: {
2641       ASSEMBLE_SIMD_BINOP(cmpeqps);
2642       break;
2643     }
2644     case kX64F32x4Ne: {
2645       ASSEMBLE_SIMD_BINOP(cmpneqps);
2646       break;
2647     }
2648     case kX64F32x4Lt: {
2649       ASSEMBLE_SIMD_BINOP(cmpltps);
2650       break;
2651     }
2652     case kX64F32x4Le: {
2653       ASSEMBLE_SIMD_BINOP(cmpleps);
2654       break;
2655     }
2656     case kX64F32x4Qfma: {
2657       if (CpuFeatures::IsSupported(FMA3)) {
2658         CpuFeatureScope fma3_scope(tasm(), FMA3);
2659         __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2660                        i.InputSimd128Register(2));
2661       } else {
2662         XMMRegister tmp = i.TempSimd128Register(0);
2663         __ Movaps(tmp, i.InputSimd128Register(2));
2664         __ Mulps(tmp, i.InputSimd128Register(1));
2665         __ Addps(i.OutputSimd128Register(), tmp);
2666       }
2667       break;
2668     }
2669     case kX64F32x4Qfms: {
2670       if (CpuFeatures::IsSupported(FMA3)) {
2671         CpuFeatureScope fma3_scope(tasm(), FMA3);
2672         __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2673                         i.InputSimd128Register(2));
2674       } else {
2675         XMMRegister tmp = i.TempSimd128Register(0);
2676         __ Movaps(tmp, i.InputSimd128Register(2));
2677         __ Mulps(tmp, i.InputSimd128Register(1));
2678         __ Subps(i.OutputSimd128Register(), tmp);
2679       }
2680       break;
2681     }
2682     case kX64F32x4Pmin: {
2683       XMMRegister dst = i.OutputSimd128Register();
2684       DCHECK_EQ(dst, i.InputSimd128Register(0));
2685       __ Minps(dst, i.InputSimd128Register(1));
2686       break;
2687     }
2688     case kX64F32x4Pmax: {
2689       XMMRegister dst = i.OutputSimd128Register();
2690       DCHECK_EQ(dst, i.InputSimd128Register(0));
2691       __ Maxps(dst, i.InputSimd128Register(1));
2692       break;
2693     }
2694     case kX64F32x4Round: {
2695       RoundingMode const mode =
2696           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2697       __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
2698       break;
2699     }
2700     case kX64F64x2Round: {
2701       RoundingMode const mode =
2702           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2703       __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
2704       break;
2705     }
2706     case kX64F64x2Pmin: {
2707       XMMRegister dst = i.OutputSimd128Register();
2708       DCHECK_EQ(dst, i.InputSimd128Register(0));
2709       __ Minpd(dst, i.InputSimd128Register(1));
2710       break;
2711     }
2712     case kX64F64x2Pmax: {
2713       XMMRegister dst = i.OutputSimd128Register();
2714       DCHECK_EQ(dst, i.InputSimd128Register(0));
2715       __ Maxpd(dst, i.InputSimd128Register(1));
2716       break;
2717     }
2718     case kX64I64x2Splat: {
2719       XMMRegister dst = i.OutputSimd128Register();
2720       if (HasRegisterInput(instr, 0)) {
2721         __ Movq(dst, i.InputRegister(0));
2722       } else {
2723         __ Movq(dst, i.InputOperand(0));
2724       }
2725       __ Movddup(dst, dst);
2726       break;
2727     }
2728     case kX64I64x2ExtractLane: {
2729       __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2730       break;
2731     }
2732     case kX64I64x2Neg: {
2733       XMMRegister dst = i.OutputSimd128Register();
2734       XMMRegister src = i.InputSimd128Register(0);
2735       if (dst == src) {
2736         __ Movapd(kScratchDoubleReg, src);
2737         src = kScratchDoubleReg;
2738       }
2739       __ Pxor(dst, dst);
2740       __ Psubq(dst, src);
2741       break;
2742     }
2743     case kX64I64x2BitMask: {
2744       __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
2745       break;
2746     }
2747     case kX64I64x2Shl: {
2748       // Take shift value modulo 2^6.
2749       ASSEMBLE_SIMD_SHIFT(psllq, 6);
2750       break;
2751     }
2752     case kX64I64x2ShrS: {
2753       // TODO(zhin): there is vpsraq but requires AVX512
2754       // ShrS on each quadword one at a time
2755       XMMRegister dst = i.OutputSimd128Register();
2756       XMMRegister src = i.InputSimd128Register(0);
2757       Register tmp = i.ToRegister(instr->TempAt(0));
2758       // Modulo 64 not required as sarq_cl will mask cl to 6 bits.
2759 
2760       // lower quadword
2761       __ Pextrq(tmp, src, int8_t{0x0});
2762       __ sarq_cl(tmp);
2763       __ Pinsrq(dst, tmp, uint8_t{0x0});
2764 
2765       // upper quadword
2766       __ Pextrq(tmp, src, int8_t{0x1});
2767       __ sarq_cl(tmp);
2768       __ Pinsrq(dst, tmp, uint8_t{0x1});
2769       break;
2770     }
2771     case kX64I64x2Add: {
2772       ASSEMBLE_SIMD_BINOP(paddq);
2773       break;
2774     }
2775     case kX64I64x2Sub: {
2776       ASSEMBLE_SIMD_BINOP(psubq);
2777       break;
2778     }
2779     case kX64I64x2Mul: {
2780       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2781       XMMRegister left = i.InputSimd128Register(0);
2782       XMMRegister right = i.InputSimd128Register(1);
2783       XMMRegister tmp1 = i.TempSimd128Register(0);
2784       XMMRegister tmp2 = i.TempSimd128Register(1);
2785 
2786       __ Movaps(tmp1, left);
2787       __ Movaps(tmp2, right);
2788 
2789       // Multiply high dword of each qword of left with right.
2790       __ Psrlq(tmp1, 32);
2791       __ Pmuludq(tmp1, right);
2792 
2793       // Multiply high dword of each qword of right with left.
2794       __ Psrlq(tmp2, 32);
2795       __ Pmuludq(tmp2, left);
2796 
2797       __ Paddq(tmp2, tmp1);
2798       __ Psllq(tmp2, 32);
2799 
2800       __ Pmuludq(left, right);
2801       __ Paddq(left, tmp2);  // left == dst
2802       break;
2803     }
2804     case kX64I64x2Eq: {
2805       ASSEMBLE_SIMD_BINOP(pcmpeqq);
2806       break;
2807     }
2808     case kX64I64x2ShrU: {
2809       // Take shift value modulo 2^6.
2810       ASSEMBLE_SIMD_SHIFT(psrlq, 6);
2811       break;
2812     }
2813     case kX64I32x4Splat: {
2814       XMMRegister dst = i.OutputSimd128Register();
2815       if (HasRegisterInput(instr, 0)) {
2816         __ Movd(dst, i.InputRegister(0));
2817       } else {
2818         __ Movd(dst, i.InputOperand(0));
2819       }
2820       __ Pshufd(dst, dst, uint8_t{0x0});
2821       break;
2822     }
2823     case kX64I32x4ExtractLane: {
2824       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2825       break;
2826     }
2827     case kX64I32x4SConvertF32x4: {
2828       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2829       XMMRegister dst = i.OutputSimd128Register();
2830       XMMRegister tmp = i.TempSimd128Register(0);
2831       // NAN->0
2832       __ Movaps(tmp, dst);
2833       __ Cmpeqps(tmp, tmp);
2834       __ Pand(dst, tmp);
2835       // Set top bit if >= 0 (but not -0.0!)
2836       __ Pxor(tmp, dst);
2837       // Convert
2838       __ Cvttps2dq(dst, dst);
2839       // Set top bit if >=0 is now < 0
2840       __ Pand(tmp, dst);
2841       __ Psrad(tmp, byte{31});
2842       // Set positive overflow lanes to 0x7FFFFFFF
2843       __ Pxor(dst, tmp);
2844       break;
2845     }
2846     case kX64I32x4SConvertI16x8Low: {
2847       __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2848       break;
2849     }
2850     case kX64I32x4SConvertI16x8High: {
2851       XMMRegister dst = i.OutputSimd128Register();
2852       __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
2853       __ Pmovsxwd(dst, dst);
2854       break;
2855     }
2856     case kX64I32x4Neg: {
2857       XMMRegister dst = i.OutputSimd128Register();
2858       XMMRegister src = i.InputSimd128Register(0);
2859       if (dst == src) {
2860         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2861         __ Psignd(dst, kScratchDoubleReg);
2862       } else {
2863         __ Pxor(dst, dst);
2864         __ Psubd(dst, src);
2865       }
2866       break;
2867     }
2868     case kX64I32x4Shl: {
2869       // Take shift value modulo 2^5.
2870       ASSEMBLE_SIMD_SHIFT(pslld, 5);
2871       break;
2872     }
2873     case kX64I32x4ShrS: {
2874       // Take shift value modulo 2^5.
2875       ASSEMBLE_SIMD_SHIFT(psrad, 5);
2876       break;
2877     }
2878     case kX64I32x4Add: {
2879       ASSEMBLE_SIMD_BINOP(paddd);
2880       break;
2881     }
2882     case kX64I32x4AddHoriz: {
2883       ASSEMBLE_SIMD_BINOP(phaddd);
2884       break;
2885     }
2886     case kX64I32x4Sub: {
2887       ASSEMBLE_SIMD_BINOP(psubd);
2888       break;
2889     }
2890     case kX64I32x4Mul: {
2891       ASSEMBLE_SIMD_BINOP(pmulld);
2892       break;
2893     }
2894     case kX64I32x4MinS: {
2895       ASSEMBLE_SIMD_BINOP(pminsd);
2896       break;
2897     }
2898     case kX64I32x4MaxS: {
2899       ASSEMBLE_SIMD_BINOP(pmaxsd);
2900       break;
2901     }
2902     case kX64I32x4Eq: {
2903       ASSEMBLE_SIMD_BINOP(pcmpeqd);
2904       break;
2905     }
2906     case kX64I32x4Ne: {
2907       XMMRegister tmp = i.TempSimd128Register(0);
2908       __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2909       __ Pcmpeqd(tmp, tmp);
2910       __ Pxor(i.OutputSimd128Register(), tmp);
2911       break;
2912     }
2913     case kX64I32x4GtS: {
2914       ASSEMBLE_SIMD_BINOP(pcmpgtd);
2915       break;
2916     }
2917     case kX64I32x4GeS: {
2918       XMMRegister dst = i.OutputSimd128Register();
2919       XMMRegister src = i.InputSimd128Register(1);
2920       __ Pminsd(dst, src);
2921       __ Pcmpeqd(dst, src);
2922       break;
2923     }
2924     case kX64I32x4UConvertF32x4: {
2925       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2926       XMMRegister dst = i.OutputSimd128Register();
2927       XMMRegister tmp = i.TempSimd128Register(0);
2928       XMMRegister tmp2 = i.TempSimd128Register(1);
2929       // NAN->0, negative->0
2930       __ Pxor(tmp2, tmp2);
2931       __ Maxps(dst, tmp2);
2932       // scratch: float representation of max_signed
2933       __ Pcmpeqd(tmp2, tmp2);
2934       __ Psrld(tmp2, uint8_t{1});  // 0x7fffffff
2935       __ Cvtdq2ps(tmp2, tmp2);     // 0x4f000000
2936       // tmp: convert (src-max_signed).
2937       // Positive overflow lanes -> 0x7FFFFFFF
2938       // Negative lanes -> 0
2939       __ Movaps(tmp, dst);
2940       __ Subps(tmp, tmp2);
2941       __ Cmpleps(tmp2, tmp);
2942       __ Cvttps2dq(tmp, tmp);
2943       __ Pxor(tmp, tmp2);
2944       __ Pxor(tmp2, tmp2);
2945       __ Pmaxsd(tmp, tmp2);
2946       // convert. Overflow lanes above max_signed will be 0x80000000
2947       __ Cvttps2dq(dst, dst);
2948       // Add (src-max_signed) for overflow lanes.
2949       __ Paddd(dst, tmp);
2950       break;
2951     }
2952     case kX64I32x4UConvertI16x8Low: {
2953       __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2954       break;
2955     }
2956     case kX64I32x4UConvertI16x8High: {
2957       XMMRegister dst = i.OutputSimd128Register();
2958       __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
2959       __ Pmovzxwd(dst, dst);
2960       break;
2961     }
2962     case kX64I32x4ShrU: {
2963       // Take shift value modulo 2^5.
2964       ASSEMBLE_SIMD_SHIFT(psrld, 5);
2965       break;
2966     }
2967     case kX64I32x4MinU: {
2968       ASSEMBLE_SIMD_BINOP(pminud);
2969       break;
2970     }
2971     case kX64I32x4MaxU: {
2972       ASSEMBLE_SIMD_BINOP(pmaxud);
2973       break;
2974     }
2975     case kX64I32x4GtU: {
2976       XMMRegister dst = i.OutputSimd128Register();
2977       XMMRegister src = i.InputSimd128Register(1);
2978       XMMRegister tmp = i.TempSimd128Register(0);
2979       __ Pmaxud(dst, src);
2980       __ Pcmpeqd(dst, src);
2981       __ Pcmpeqd(tmp, tmp);
2982       __ Pxor(dst, tmp);
2983       break;
2984     }
2985     case kX64I32x4GeU: {
2986       XMMRegister dst = i.OutputSimd128Register();
2987       XMMRegister src = i.InputSimd128Register(1);
2988       __ Pminud(dst, src);
2989       __ Pcmpeqd(dst, src);
2990       break;
2991     }
2992     case kX64I32x4Abs: {
2993       __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2994       break;
2995     }
2996     case kX64I32x4BitMask: {
2997       __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
2998       break;
2999     }
3000     case kX64I32x4DotI16x8S: {
3001       ASSEMBLE_SIMD_BINOP(pmaddwd);
3002       break;
3003     }
3004     case kX64S128Const: {
3005       // Emit code for generic constants as all zeros, or ones cases will be
3006       // handled separately by the selector.
3007       XMMRegister dst = i.OutputSimd128Register();
3008       uint32_t imm[4] = {};
3009       for (int j = 0; j < 4; j++) {
3010         imm[j] = i.InputUint32(j);
3011       }
3012       SetupSimdImmediateInRegister(tasm(), imm, dst);
3013       break;
3014     }
3015     case kX64S128Zero: {
3016       XMMRegister dst = i.OutputSimd128Register();
3017       __ Pxor(dst, dst);
3018       break;
3019     }
3020     case kX64S128AllOnes: {
3021       XMMRegister dst = i.OutputSimd128Register();
3022       __ Pcmpeqd(dst, dst);
3023       break;
3024     }
3025     case kX64I16x8Splat: {
3026       XMMRegister dst = i.OutputSimd128Register();
3027       if (HasRegisterInput(instr, 0)) {
3028         __ Movd(dst, i.InputRegister(0));
3029       } else {
3030         __ Movd(dst, i.InputOperand(0));
3031       }
3032       __ Pshuflw(dst, dst, uint8_t{0x0});
3033       __ Pshufd(dst, dst, uint8_t{0x0});
3034       break;
3035     }
3036     case kX64I16x8ExtractLaneS: {
3037       Register dst = i.OutputRegister();
3038       __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
3039       __ movsxwl(dst, dst);
3040       break;
3041     }
3042     case kX64I16x8SConvertI8x16Low: {
3043       __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3044       break;
3045     }
3046     case kX64I16x8SConvertI8x16High: {
3047       XMMRegister dst = i.OutputSimd128Register();
3048       __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
3049       __ Pmovsxbw(dst, dst);
3050       break;
3051     }
3052     case kX64I16x8Neg: {
3053       XMMRegister dst = i.OutputSimd128Register();
3054       XMMRegister src = i.InputSimd128Register(0);
3055       if (dst == src) {
3056         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3057         __ Psignw(dst, kScratchDoubleReg);
3058       } else {
3059         __ Pxor(dst, dst);
3060         __ Psubw(dst, src);
3061       }
3062       break;
3063     }
3064     case kX64I16x8Shl: {
3065       // Take shift value modulo 2^4.
3066       ASSEMBLE_SIMD_SHIFT(psllw, 4);
3067       break;
3068     }
3069     case kX64I16x8ShrS: {
3070       // Take shift value modulo 2^4.
3071       ASSEMBLE_SIMD_SHIFT(psraw, 4);
3072       break;
3073     }
3074     case kX64I16x8SConvertI32x4: {
3075       ASSEMBLE_SIMD_BINOP(packssdw);
3076       break;
3077     }
3078     case kX64I16x8Add: {
3079       ASSEMBLE_SIMD_BINOP(paddw);
3080       break;
3081     }
3082     case kX64I16x8AddSatS: {
3083       ASSEMBLE_SIMD_BINOP(paddsw);
3084       break;
3085     }
3086     case kX64I16x8AddHoriz: {
3087       ASSEMBLE_SIMD_BINOP(phaddw);
3088       break;
3089     }
3090     case kX64I16x8Sub: {
3091       ASSEMBLE_SIMD_BINOP(psubw);
3092       break;
3093     }
3094     case kX64I16x8SubSatS: {
3095       ASSEMBLE_SIMD_BINOP(psubsw);
3096       break;
3097     }
3098     case kX64I16x8Mul: {
3099       ASSEMBLE_SIMD_BINOP(pmullw);
3100       break;
3101     }
3102     case kX64I16x8MinS: {
3103       ASSEMBLE_SIMD_BINOP(pminsw);
3104       break;
3105     }
3106     case kX64I16x8MaxS: {
3107       ASSEMBLE_SIMD_BINOP(pmaxsw);
3108       break;
3109     }
3110     case kX64I16x8Eq: {
3111       ASSEMBLE_SIMD_BINOP(pcmpeqw);
3112       break;
3113     }
3114     case kX64I16x8Ne: {
3115       XMMRegister tmp = i.TempSimd128Register(0);
3116       __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3117       __ Pcmpeqw(tmp, tmp);
3118       __ Pxor(i.OutputSimd128Register(), tmp);
3119       break;
3120     }
3121     case kX64I16x8GtS: {
3122       ASSEMBLE_SIMD_BINOP(pcmpgtw);
3123       break;
3124     }
3125     case kX64I16x8GeS: {
3126       XMMRegister dst = i.OutputSimd128Register();
3127       XMMRegister src = i.InputSimd128Register(1);
3128       __ Pminsw(dst, src);
3129       __ Pcmpeqw(dst, src);
3130       break;
3131     }
3132     case kX64I16x8UConvertI8x16Low: {
3133       __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3134       break;
3135     }
3136     case kX64I16x8UConvertI8x16High: {
3137       XMMRegister dst = i.OutputSimd128Register();
3138       __ Palignr(dst, i.InputSimd128Register(0), uint8_t{8});
3139       __ Pmovzxbw(dst, dst);
3140       break;
3141     }
3142     case kX64I16x8ShrU: {
3143       // Take shift value modulo 2^4.
3144       ASSEMBLE_SIMD_SHIFT(psrlw, 4);
3145       break;
3146     }
3147     case kX64I16x8UConvertI32x4: {
3148       ASSEMBLE_SIMD_BINOP(packusdw);
3149       break;
3150     }
3151     case kX64I16x8AddSatU: {
3152       ASSEMBLE_SIMD_BINOP(paddusw);
3153       break;
3154     }
3155     case kX64I16x8SubSatU: {
3156       ASSEMBLE_SIMD_BINOP(psubusw);
3157       break;
3158     }
3159     case kX64I16x8MinU: {
3160       ASSEMBLE_SIMD_BINOP(pminuw);
3161       break;
3162     }
3163     case kX64I16x8MaxU: {
3164       ASSEMBLE_SIMD_BINOP(pmaxuw);
3165       break;
3166     }
3167     case kX64I16x8GtU: {
3168       XMMRegister dst = i.OutputSimd128Register();
3169       XMMRegister src = i.InputSimd128Register(1);
3170       XMMRegister tmp = i.TempSimd128Register(0);
3171       __ Pmaxuw(dst, src);
3172       __ Pcmpeqw(dst, src);
3173       __ Pcmpeqw(tmp, tmp);
3174       __ Pxor(dst, tmp);
3175       break;
3176     }
3177     case kX64I16x8GeU: {
3178       XMMRegister dst = i.OutputSimd128Register();
3179       XMMRegister src = i.InputSimd128Register(1);
3180       __ Pminuw(dst, src);
3181       __ Pcmpeqw(dst, src);
3182       break;
3183     }
3184     case kX64I16x8RoundingAverageU: {
3185       ASSEMBLE_SIMD_BINOP(pavgw);
3186       break;
3187     }
3188     case kX64I16x8Abs: {
3189       __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3190       break;
3191     }
3192     case kX64I16x8BitMask: {
3193       Register dst = i.OutputRegister();
3194       XMMRegister tmp = i.TempSimd128Register(0);
3195       __ Packsswb(tmp, i.InputSimd128Register(0));
3196       __ Pmovmskb(dst, tmp);
3197       __ shrq(dst, Immediate(8));
3198       break;
3199     }
3200     case kX64I8x16Splat: {
3201       XMMRegister dst = i.OutputSimd128Register();
3202       if (HasRegisterInput(instr, 0)) {
3203         __ Movd(dst, i.InputRegister(0));
3204       } else {
3205         __ Movd(dst, i.InputOperand(0));
3206       }
3207       __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
3208       __ Pshufb(dst, kScratchDoubleReg);
3209       break;
3210     }
3211     case kX64Pextrb: {
3212       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3213       size_t index = 0;
3214       if (HasAddressingMode(instr)) {
3215         Operand operand = i.MemoryOperand(&index);
3216         __ Pextrb(operand, i.InputSimd128Register(index),
3217                   i.InputUint8(index + 1));
3218       } else {
3219         __ Pextrb(i.OutputRegister(), i.InputSimd128Register(0),
3220                   i.InputUint8(1));
3221       }
3222       break;
3223     }
3224     case kX64Pextrw: {
3225       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3226       size_t index = 0;
3227       if (HasAddressingMode(instr)) {
3228         Operand operand = i.MemoryOperand(&index);
3229         __ Pextrw(operand, i.InputSimd128Register(index),
3230                   i.InputUint8(index + 1));
3231       } else {
3232         __ Pextrw(i.OutputRegister(), i.InputSimd128Register(0),
3233                   i.InputUint8(1));
3234       }
3235       break;
3236     }
3237     case kX64I8x16ExtractLaneS: {
3238       Register dst = i.OutputRegister();
3239       __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
3240       __ movsxbl(dst, dst);
3241       break;
3242     }
3243     case kX64Pinsrb: {
3244       ASSEMBLE_PINSR(Pinsrb);
3245       break;
3246     }
3247     case kX64Pinsrw: {
3248       ASSEMBLE_PINSR(Pinsrw);
3249       break;
3250     }
3251     case kX64Pinsrd: {
3252       ASSEMBLE_PINSR(Pinsrd);
3253       break;
3254     }
3255     case kX64Pinsrq: {
3256       ASSEMBLE_PINSR(Pinsrq);
3257       break;
3258     }
3259     case kX64I8x16SConvertI16x8: {
3260       ASSEMBLE_SIMD_BINOP(packsswb);
3261       break;
3262     }
3263     case kX64I8x16Neg: {
3264       XMMRegister dst = i.OutputSimd128Register();
3265       XMMRegister src = i.InputSimd128Register(0);
3266       if (dst == src) {
3267         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3268         __ Psignb(dst, kScratchDoubleReg);
3269       } else {
3270         __ Pxor(dst, dst);
3271         __ Psubb(dst, src);
3272       }
3273       break;
3274     }
3275     case kX64I8x16Shl: {
3276       XMMRegister dst = i.OutputSimd128Register();
3277       DCHECK_EQ(dst, i.InputSimd128Register(0));
3278       // Temp registers for shift mask and additional moves to XMM registers.
3279       Register tmp = i.ToRegister(instr->TempAt(0));
3280       XMMRegister tmp_simd = i.TempSimd128Register(1);
3281       if (HasImmediateInput(instr, 1)) {
3282         // Perform 16-bit shift, then mask away low bits.
3283         uint8_t shift = i.InputInt3(1);
3284         __ Psllw(dst, byte{shift});
3285 
3286         uint8_t bmask = static_cast<uint8_t>(0xff << shift);
3287         uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3288         __ movl(tmp, Immediate(mask));
3289         __ Movd(tmp_simd, tmp);
3290         __ Pshufd(tmp_simd, tmp_simd, uint8_t{0});
3291         __ Pand(dst, tmp_simd);
3292       } else {
3293         // Mask off the unwanted bits before word-shifting.
3294         __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3295         // Take shift value modulo 8.
3296         __ movq(tmp, i.InputRegister(1));
3297         __ andq(tmp, Immediate(7));
3298         __ addq(tmp, Immediate(8));
3299         __ Movq(tmp_simd, tmp);
3300         __ Psrlw(kScratchDoubleReg, tmp_simd);
3301         __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
3302         __ Pand(dst, kScratchDoubleReg);
3303         // TODO(zhin): subq here to avoid asking for another temporary register,
3304         // examine codegen for other i8x16 shifts, they use less instructions.
3305         __ subq(tmp, Immediate(8));
3306         __ Movq(tmp_simd, tmp);
3307         __ Psllw(dst, tmp_simd);
3308       }
3309       break;
3310     }
3311     case kX64I8x16ShrS: {
3312       XMMRegister dst = i.OutputSimd128Register();
3313       DCHECK_EQ(dst, i.InputSimd128Register(0));
3314       if (HasImmediateInput(instr, 1)) {
3315         __ Punpckhbw(kScratchDoubleReg, dst);
3316         __ Punpcklbw(dst, dst);
3317         uint8_t shift = i.InputInt3(1) + 8;
3318         __ Psraw(kScratchDoubleReg, shift);
3319         __ Psraw(dst, shift);
3320         __ Packsswb(dst, kScratchDoubleReg);
3321       } else {
3322         // Temp registers for shift mask andadditional moves to XMM registers.
3323         Register tmp = i.ToRegister(instr->TempAt(0));
3324         XMMRegister tmp_simd = i.TempSimd128Register(1);
3325         // Unpack the bytes into words, do arithmetic shifts, and repack.
3326         __ Punpckhbw(kScratchDoubleReg, dst);
3327         __ Punpcklbw(dst, dst);
3328         // Prepare shift value
3329         __ movq(tmp, i.InputRegister(1));
3330         // Take shift value modulo 8.
3331         __ andq(tmp, Immediate(7));
3332         __ addq(tmp, Immediate(8));
3333         __ Movq(tmp_simd, tmp);
3334         __ Psraw(kScratchDoubleReg, tmp_simd);
3335         __ Psraw(dst, tmp_simd);
3336         __ Packsswb(dst, kScratchDoubleReg);
3337       }
3338       break;
3339     }
3340     case kX64I8x16Add: {
3341       ASSEMBLE_SIMD_BINOP(paddb);
3342       break;
3343     }
3344     case kX64I8x16AddSatS: {
3345       ASSEMBLE_SIMD_BINOP(paddsb);
3346       break;
3347     }
3348     case kX64I8x16Sub: {
3349       ASSEMBLE_SIMD_BINOP(psubb);
3350       break;
3351     }
3352     case kX64I8x16SubSatS: {
3353       ASSEMBLE_SIMD_BINOP(psubsb);
3354       break;
3355     }
3356     case kX64I8x16Mul: {
3357       XMMRegister dst = i.OutputSimd128Register();
3358       DCHECK_EQ(dst, i.InputSimd128Register(0));
3359       XMMRegister right = i.InputSimd128Register(1);
3360       XMMRegister tmp = i.TempSimd128Register(0);
3361       // I16x8 view of I8x16
3362       // left = AAaa AAaa ... AAaa AAaa
3363       // right= BBbb BBbb ... BBbb BBbb
3364       // t = 00AA 00AA ... 00AA 00AA
3365       // s = 00BB 00BB ... 00BB 00BB
3366       __ Movaps(tmp, dst);
3367       __ Movaps(kScratchDoubleReg, right);
3368       __ Psrlw(tmp, byte{8});
3369       __ Psrlw(kScratchDoubleReg, byte{8});
3370       // dst = left * 256
3371       __ Psllw(dst, byte{8});
3372       // t = I16x8Mul(t, s)
3373       //    => __PP __PP ...  __PP  __PP
3374       __ Pmullw(tmp, kScratchDoubleReg);
3375       // dst = I16x8Mul(left * 256, right)
3376       //    => pp__ pp__ ...  pp__  pp__
3377       __ Pmullw(dst, right);
3378       // t = I16x8Shl(t, 8)
3379       //    => PP00 PP00 ...  PP00  PP00
3380       __ Psllw(tmp, byte{8});
3381       // dst = I16x8Shr(dst, 8)
3382       //    => 00pp 00pp ...  00pp  00pp
3383       __ Psrlw(dst, byte{8});
3384       // dst = I16x8Or(dst, t)
3385       //    => PPpp PPpp ...  PPpp  PPpp
3386       __ Por(dst, tmp);
3387       break;
3388     }
3389     case kX64I8x16MinS: {
3390       ASSEMBLE_SIMD_BINOP(pminsb);
3391       break;
3392     }
3393     case kX64I8x16MaxS: {
3394       ASSEMBLE_SIMD_BINOP(pmaxsb);
3395       break;
3396     }
3397     case kX64I8x16Eq: {
3398       ASSEMBLE_SIMD_BINOP(pcmpeqb);
3399       break;
3400     }
3401     case kX64I8x16Ne: {
3402       XMMRegister tmp = i.TempSimd128Register(0);
3403       __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3404       __ Pcmpeqb(tmp, tmp);
3405       __ Pxor(i.OutputSimd128Register(), tmp);
3406       break;
3407     }
3408     case kX64I8x16GtS: {
3409       ASSEMBLE_SIMD_BINOP(pcmpgtb);
3410       break;
3411     }
3412     case kX64I8x16GeS: {
3413       XMMRegister dst = i.OutputSimd128Register();
3414       XMMRegister src = i.InputSimd128Register(1);
3415       __ Pminsb(dst, src);
3416       __ Pcmpeqb(dst, src);
3417       break;
3418     }
3419     case kX64I8x16UConvertI16x8: {
3420       ASSEMBLE_SIMD_BINOP(packuswb);
3421       break;
3422     }
3423     case kX64I8x16ShrU: {
3424       XMMRegister dst = i.OutputSimd128Register();
3425       // Unpack the bytes into words, do logical shifts, and repack.
3426       DCHECK_EQ(dst, i.InputSimd128Register(0));
3427       // Temp registers for shift mask andadditional moves to XMM registers.
3428       Register tmp = i.ToRegister(instr->TempAt(0));
3429       XMMRegister tmp_simd = i.TempSimd128Register(1);
3430       if (HasImmediateInput(instr, 1)) {
3431         // Perform 16-bit shift, then mask away high bits.
3432         uint8_t shift = i.InputInt3(1);
3433         __ Psrlw(dst, byte{shift});
3434 
3435         uint8_t bmask = 0xff >> shift;
3436         uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3437         __ movl(tmp, Immediate(mask));
3438         __ Movd(tmp_simd, tmp);
3439         __ Pshufd(tmp_simd, tmp_simd, byte{0});
3440         __ Pand(dst, tmp_simd);
3441       } else {
3442         __ Punpckhbw(kScratchDoubleReg, dst);
3443         __ Punpcklbw(dst, dst);
3444         // Prepare shift value
3445         __ movq(tmp, i.InputRegister(1));
3446         // Take shift value modulo 8.
3447         __ andq(tmp, Immediate(7));
3448         __ addq(tmp, Immediate(8));
3449         __ Movq(tmp_simd, tmp);
3450         __ Psrlw(kScratchDoubleReg, tmp_simd);
3451         __ Psrlw(dst, tmp_simd);
3452         __ Packuswb(dst, kScratchDoubleReg);
3453       }
3454       break;
3455     }
3456     case kX64I8x16AddSatU: {
3457       ASSEMBLE_SIMD_BINOP(paddusb);
3458       break;
3459     }
3460     case kX64I8x16SubSatU: {
3461       ASSEMBLE_SIMD_BINOP(psubusb);
3462       break;
3463     }
3464     case kX64I8x16MinU: {
3465       ASSEMBLE_SIMD_BINOP(pminub);
3466       break;
3467     }
3468     case kX64I8x16MaxU: {
3469       ASSEMBLE_SIMD_BINOP(pmaxub);
3470       break;
3471     }
3472     case kX64I8x16GtU: {
3473       XMMRegister dst = i.OutputSimd128Register();
3474       XMMRegister src = i.InputSimd128Register(1);
3475       XMMRegister tmp = i.TempSimd128Register(0);
3476       __ Pmaxub(dst, src);
3477       __ Pcmpeqb(dst, src);
3478       __ Pcmpeqb(tmp, tmp);
3479       __ Pxor(dst, tmp);
3480       break;
3481     }
3482     case kX64I8x16GeU: {
3483       XMMRegister dst = i.OutputSimd128Register();
3484       XMMRegister src = i.InputSimd128Register(1);
3485       __ Pminub(dst, src);
3486       __ Pcmpeqb(dst, src);
3487       break;
3488     }
3489     case kX64I8x16RoundingAverageU: {
3490       ASSEMBLE_SIMD_BINOP(pavgb);
3491       break;
3492     }
3493     case kX64I8x16Abs: {
3494       __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3495       break;
3496     }
3497     case kX64I8x16BitMask: {
3498       __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
3499       break;
3500     }
3501     case kX64I8x16SignSelect: {
3502       __ Pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3503                   i.InputSimd128Register(1), i.InputSimd128Register(2));
3504       break;
3505     }
3506     case kX64I16x8SignSelect: {
3507       if (CpuFeatures::IsSupported(AVX)) {
3508         CpuFeatureScope avx_scope(tasm(), AVX);
3509         __ vpsraw(kScratchDoubleReg, i.InputSimd128Register(2), 15);
3510         __ vpblendvb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3511                      i.InputSimd128Register(1), kScratchDoubleReg);
3512       } else {
3513         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3514         XMMRegister mask = i.InputSimd128Register(2);
3515         DCHECK_EQ(xmm0, mask);
3516         __ movapd(kScratchDoubleReg, mask);
3517         __ pxor(mask, mask);
3518         __ pcmpgtw(mask, kScratchDoubleReg);
3519         __ pblendvb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3520         // Restore mask.
3521         __ movapd(mask, kScratchDoubleReg);
3522       }
3523       break;
3524     }
3525     case kX64I32x4SignSelect: {
3526       __ Blendvps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3527                   i.InputSimd128Register(1), i.InputSimd128Register(2));
3528       break;
3529     }
3530     case kX64I64x2SignSelect: {
3531       __ Blendvpd(i.OutputSimd128Register(), i.InputSimd128Register(0),
3532                   i.InputSimd128Register(1), i.InputSimd128Register(2));
3533       break;
3534     }
3535     case kX64S128And: {
3536       ASSEMBLE_SIMD_BINOP(pand);
3537       break;
3538     }
3539     case kX64S128Or: {
3540       ASSEMBLE_SIMD_BINOP(por);
3541       break;
3542     }
3543     case kX64S128Xor: {
3544       ASSEMBLE_SIMD_BINOP(pxor);
3545       break;
3546     }
3547     case kX64S128Not: {
3548       XMMRegister dst = i.OutputSimd128Register();
3549       XMMRegister src = i.InputSimd128Register(0);
3550       if (dst == src) {
3551         __ Movaps(kScratchDoubleReg, dst);
3552         __ Pcmpeqd(dst, dst);
3553         __ Pxor(dst, kScratchDoubleReg);
3554       } else {
3555         __ Pcmpeqd(dst, dst);
3556         __ Pxor(dst, src);
3557       }
3558 
3559       break;
3560     }
3561     case kX64S128Select: {
3562       // Mask used here is stored in dst.
3563       XMMRegister dst = i.OutputSimd128Register();
3564       __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3565       __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3566       __ Andps(dst, kScratchDoubleReg);
3567       __ Xorps(dst, i.InputSimd128Register(2));
3568       break;
3569     }
3570     case kX64S128AndNot: {
3571       XMMRegister dst = i.OutputSimd128Register();
3572       DCHECK_EQ(dst, i.InputSimd128Register(0));
3573       // The inputs have been inverted by instruction selector, so we can call
3574       // andnps here without any modifications.
3575       __ Andnps(dst, i.InputSimd128Register(1));
3576       break;
3577     }
3578     case kX64I8x16Swizzle: {
3579       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3580       XMMRegister dst = i.OutputSimd128Register();
3581       XMMRegister mask = i.TempSimd128Register(0);
3582 
3583       // Out-of-range indices should return 0, add 112 so that any value > 15
3584       // saturates to 128 (top bit set), so pshufb will zero that lane.
3585       __ Move(mask, uint32_t{0x70707070});
3586       __ Pshufd(mask, mask, uint8_t{0x0});
3587       __ Paddusb(mask, i.InputSimd128Register(1));
3588       __ Pshufb(dst, mask);
3589       break;
3590     }
3591     case kX64I8x16Shuffle: {
3592       XMMRegister dst = i.OutputSimd128Register();
3593       XMMRegister tmp_simd = i.TempSimd128Register(0);
3594       if (instr->InputCount() == 5) {  // only one input operand
3595         uint32_t mask[4] = {};
3596         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3597         for (int j = 4; j > 0; j--) {
3598           mask[j - 1] = i.InputUint32(j);
3599         }
3600 
3601         SetupSimdImmediateInRegister(tasm(), mask, tmp_simd);
3602         __ Pshufb(dst, tmp_simd);
3603       } else {  // two input operands
3604         DCHECK_EQ(6, instr->InputCount());
3605         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0);
3606         uint32_t mask1[4] = {};
3607         for (int j = 5; j > 1; j--) {
3608           uint32_t lanes = i.InputUint32(j);
3609           for (int k = 0; k < 32; k += 8) {
3610             uint8_t lane = lanes >> k;
3611             mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3612           }
3613         }
3614         SetupSimdImmediateInRegister(tasm(), mask1, tmp_simd);
3615         __ Pshufb(kScratchDoubleReg, tmp_simd);
3616         uint32_t mask2[4] = {};
3617         if (instr->InputAt(1)->IsSimd128Register()) {
3618           XMMRegister src1 = i.InputSimd128Register(1);
3619           if (src1 != dst) __ movups(dst, src1);
3620         } else {
3621           __ Movups(dst, i.InputOperand(1));
3622         }
3623         for (int j = 5; j > 1; j--) {
3624           uint32_t lanes = i.InputUint32(j);
3625           for (int k = 0; k < 32; k += 8) {
3626             uint8_t lane = lanes >> k;
3627             mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3628           }
3629         }
3630         SetupSimdImmediateInRegister(tasm(), mask2, tmp_simd);
3631         __ Pshufb(dst, tmp_simd);
3632         __ Por(dst, kScratchDoubleReg);
3633       }
3634       break;
3635     }
3636     case kX64S128Load8Splat: {
3637       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3638       XMMRegister dst = i.OutputSimd128Register();
3639       __ Pinsrb(dst, dst, i.MemoryOperand(), 0);
3640       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3641       __ Pshufb(dst, kScratchDoubleReg);
3642       break;
3643     }
3644     case kX64S128Load16Splat: {
3645       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3646       XMMRegister dst = i.OutputSimd128Register();
3647       __ Pinsrw(dst, dst, i.MemoryOperand(), 0);
3648       __ Pshuflw(dst, dst, uint8_t{0});
3649       __ Punpcklqdq(dst, dst);
3650       break;
3651     }
3652     case kX64S128Load32Splat: {
3653       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3654       if (CpuFeatures::IsSupported(AVX)) {
3655         CpuFeatureScope avx_scope(tasm(), AVX);
3656         __ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
3657       } else {
3658         __ movss(i.OutputSimd128Register(), i.MemoryOperand());
3659         __ shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
3660                   byte{0});
3661       }
3662       break;
3663     }
3664     case kX64S128Load64Splat: {
3665       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3666       __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3667       break;
3668     }
3669     case kX64S128Load8x8S: {
3670       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3671       __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3672       break;
3673     }
3674     case kX64S128Load8x8U: {
3675       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3676       __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3677       break;
3678     }
3679     case kX64S128Load16x4S: {
3680       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3681       __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3682       break;
3683     }
3684     case kX64S128Load16x4U: {
3685       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3686       __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3687       break;
3688     }
3689     case kX64S128Load32x2S: {
3690       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3691       __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3692       break;
3693     }
3694     case kX64S128Load32x2U: {
3695       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3696       __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3697       break;
3698     }
3699     case kX64S128Store32Lane: {
3700       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3701       size_t index = 0;
3702       Operand operand = i.MemoryOperand(&index);
3703       uint8_t lane = i.InputUint8(index + 1);
3704       if (lane == 0) {
3705         __ Movss(operand, i.InputSimd128Register(index));
3706       } else {
3707         DCHECK_GE(3, lane);
3708         __ Extractps(operand, i.InputSimd128Register(index), lane);
3709       }
3710       break;
3711     }
3712     case kX64S128Store64Lane: {
3713       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3714       size_t index = 0;
3715       Operand operand = i.MemoryOperand(&index);
3716       uint8_t lane = i.InputUint8(index + 1);
3717       if (lane == 0) {
3718         __ Movlps(operand, i.InputSimd128Register(index));
3719       } else {
3720         DCHECK_EQ(1, lane);
3721         __ Movhps(operand, i.InputSimd128Register(index));
3722       }
3723       break;
3724     }
3725     case kX64S32x4Swizzle: {
3726       DCHECK_EQ(2, instr->InputCount());
3727       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
3728                               i.InputUint8(1));
3729       break;
3730     }
3731     case kX64S32x4Shuffle: {
3732       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
3733       uint8_t shuffle = i.InputUint8(2);
3734       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
3735       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
3736       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
3737       __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3738       break;
3739     }
3740     case kX64S16x8Blend: {
3741       ASSEMBLE_SIMD_IMM_SHUFFLE(Pblendw, i.InputUint8(2));
3742       break;
3743     }
3744     case kX64S16x8HalfShuffle1: {
3745       XMMRegister dst = i.OutputSimd128Register();
3746       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(1));
3747       __ Pshufhw(dst, dst, i.InputUint8(2));
3748       break;
3749     }
3750     case kX64S16x8HalfShuffle2: {
3751       XMMRegister dst = i.OutputSimd128Register();
3752       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
3753       __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3754       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
3755       __ Pshufhw(dst, dst, i.InputUint8(3));
3756       __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3757       break;
3758     }
3759     case kX64S8x16Alignr: {
3760       ASSEMBLE_SIMD_IMM_SHUFFLE(Palignr, i.InputUint8(2));
3761       break;
3762     }
3763     case kX64S16x8Dup: {
3764       XMMRegister dst = i.OutputSimd128Register();
3765       uint8_t lane = i.InputInt8(1) & 0x7;
3766       uint8_t lane4 = lane & 0x3;
3767       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3768       if (lane < 4) {
3769         ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
3770         __ Pshufd(dst, dst, uint8_t{0});
3771       } else {
3772         ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
3773         __ Pshufd(dst, dst, uint8_t{0xaa});
3774       }
3775       break;
3776     }
3777     case kX64S8x16Dup: {
3778       XMMRegister dst = i.OutputSimd128Register();
3779       uint8_t lane = i.InputInt8(1) & 0xf;
3780       DCHECK_EQ(dst, i.InputSimd128Register(0));
3781       if (lane < 8) {
3782         __ Punpcklbw(dst, dst);
3783       } else {
3784         __ Punpckhbw(dst, dst);
3785       }
3786       lane &= 0x7;
3787       uint8_t lane4 = lane & 0x3;
3788       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3789       if (lane < 4) {
3790         __ Pshuflw(dst, dst, half_dup);
3791         __ Pshufd(dst, dst, uint8_t{0});
3792       } else {
3793         __ Pshufhw(dst, dst, half_dup);
3794         __ Pshufd(dst, dst, uint8_t{0xaa});
3795       }
3796       break;
3797     }
3798     case kX64S64x2UnpackHigh:
3799       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhqdq);
3800       break;
3801     case kX64S32x4UnpackHigh:
3802       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhdq);
3803       break;
3804     case kX64S16x8UnpackHigh:
3805       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhwd);
3806       break;
3807     case kX64S8x16UnpackHigh:
3808       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhbw);
3809       break;
3810     case kX64S64x2UnpackLow:
3811       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklqdq);
3812       break;
3813     case kX64S32x4UnpackLow:
3814       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckldq);
3815       break;
3816     case kX64S16x8UnpackLow:
3817       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklwd);
3818       break;
3819     case kX64S8x16UnpackLow:
3820       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklbw);
3821       break;
3822     case kX64S16x8UnzipHigh: {
3823       XMMRegister dst = i.OutputSimd128Register();
3824       XMMRegister src2 = dst;
3825       DCHECK_EQ(dst, i.InputSimd128Register(0));
3826       if (instr->InputCount() == 2) {
3827         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3828         __ Psrld(kScratchDoubleReg, byte{16});
3829         src2 = kScratchDoubleReg;
3830       }
3831       __ Psrld(dst, byte{16});
3832       __ Packusdw(dst, src2);
3833       break;
3834     }
3835     case kX64S16x8UnzipLow: {
3836       XMMRegister dst = i.OutputSimd128Register();
3837       XMMRegister src2 = dst;
3838       DCHECK_EQ(dst, i.InputSimd128Register(0));
3839       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3840       if (instr->InputCount() == 2) {
3841         ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
3842         src2 = kScratchDoubleReg;
3843       }
3844       __ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
3845       __ Packusdw(dst, src2);
3846       break;
3847     }
3848     case kX64S8x16UnzipHigh: {
3849       XMMRegister dst = i.OutputSimd128Register();
3850       XMMRegister src2 = dst;
3851       DCHECK_EQ(dst, i.InputSimd128Register(0));
3852       if (instr->InputCount() == 2) {
3853         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3854         __ Psrlw(kScratchDoubleReg, byte{8});
3855         src2 = kScratchDoubleReg;
3856       }
3857       __ Psrlw(dst, byte{8});
3858       __ Packuswb(dst, src2);
3859       break;
3860     }
3861     case kX64S8x16UnzipLow: {
3862       XMMRegister dst = i.OutputSimd128Register();
3863       XMMRegister src2 = dst;
3864       DCHECK_EQ(dst, i.InputSimd128Register(0));
3865       if (instr->InputCount() == 2) {
3866         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3867         __ Psllw(kScratchDoubleReg, byte{8});
3868         __ Psrlw(kScratchDoubleReg, byte{8});
3869         src2 = kScratchDoubleReg;
3870       }
3871       __ Psllw(dst, byte{8});
3872       __ Psrlw(dst, byte{8});
3873       __ Packuswb(dst, src2);
3874       break;
3875     }
3876     case kX64S8x16TransposeLow: {
3877       XMMRegister dst = i.OutputSimd128Register();
3878       DCHECK_EQ(dst, i.InputSimd128Register(0));
3879       __ Psllw(dst, byte{8});
3880       if (instr->InputCount() == 1) {
3881         __ Movups(kScratchDoubleReg, dst);
3882       } else {
3883         DCHECK_EQ(2, instr->InputCount());
3884         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3885         __ Psllw(kScratchDoubleReg, byte{8});
3886       }
3887       __ Psrlw(dst, byte{8});
3888       __ Por(dst, kScratchDoubleReg);
3889       break;
3890     }
3891     case kX64S8x16TransposeHigh: {
3892       XMMRegister dst = i.OutputSimd128Register();
3893       DCHECK_EQ(dst, i.InputSimd128Register(0));
3894       __ Psrlw(dst, byte{8});
3895       if (instr->InputCount() == 1) {
3896         __ Movups(kScratchDoubleReg, dst);
3897       } else {
3898         DCHECK_EQ(2, instr->InputCount());
3899         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3900         __ Psrlw(kScratchDoubleReg, byte{8});
3901       }
3902       __ Psllw(kScratchDoubleReg, byte{8});
3903       __ Por(dst, kScratchDoubleReg);
3904       break;
3905     }
3906     case kX64S8x8Reverse:
3907     case kX64S8x4Reverse:
3908     case kX64S8x2Reverse: {
3909       DCHECK_EQ(1, instr->InputCount());
3910       XMMRegister dst = i.OutputSimd128Register();
3911       DCHECK_EQ(dst, i.InputSimd128Register(0));
3912       if (arch_opcode != kX64S8x2Reverse) {
3913         // First shuffle words into position.
3914         uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3915         __ Pshuflw(dst, dst, shuffle_mask);
3916         __ Pshufhw(dst, dst, shuffle_mask);
3917       }
3918       __ Movaps(kScratchDoubleReg, dst);
3919       __ Psrlw(kScratchDoubleReg, byte{8});
3920       __ Psllw(dst, byte{8});
3921       __ Por(dst, kScratchDoubleReg);
3922       break;
3923     }
3924     case kX64V32x4AnyTrue:
3925     case kX64V16x8AnyTrue:
3926     case kX64V8x16AnyTrue: {
3927       Register dst = i.OutputRegister();
3928       XMMRegister src = i.InputSimd128Register(0);
3929 
3930       __ xorq(dst, dst);
3931       __ Ptest(src, src);
3932       __ setcc(not_equal, dst);
3933       break;
3934     }
3935     // Need to split up all the different lane structures because the
3936     // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
3937     // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
3938     // respectively.
3939     case kX64V32x4AllTrue: {
3940       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
3941       break;
3942     }
3943     case kX64V16x8AllTrue: {
3944       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
3945       break;
3946     }
3947     case kX64V8x16AllTrue: {
3948       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
3949       break;
3950     }
3951     case kWord32AtomicExchangeInt8: {
3952       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3953       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3954       break;
3955     }
3956     case kWord32AtomicExchangeUint8: {
3957       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3958       __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3959       break;
3960     }
3961     case kWord32AtomicExchangeInt16: {
3962       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3963       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3964       break;
3965     }
3966     case kWord32AtomicExchangeUint16: {
3967       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3968       __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3969       break;
3970     }
3971     case kWord32AtomicExchangeWord32: {
3972       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3973       break;
3974     }
3975     case kWord32AtomicCompareExchangeInt8: {
3976       __ lock();
3977       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3978       __ movsxbl(rax, rax);
3979       break;
3980     }
3981     case kWord32AtomicCompareExchangeUint8: {
3982       __ lock();
3983       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3984       __ movzxbl(rax, rax);
3985       break;
3986     }
3987     case kWord32AtomicCompareExchangeInt16: {
3988       __ lock();
3989       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3990       __ movsxwl(rax, rax);
3991       break;
3992     }
3993     case kWord32AtomicCompareExchangeUint16: {
3994       __ lock();
3995       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3996       __ movzxwl(rax, rax);
3997       break;
3998     }
3999     case kWord32AtomicCompareExchangeWord32: {
4000       __ lock();
4001       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4002       break;
4003     }
4004 #define ATOMIC_BINOP_CASE(op, inst)              \
4005   case kWord32Atomic##op##Int8:                  \
4006     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
4007     __ movsxbl(rax, rax);                        \
4008     break;                                       \
4009   case kWord32Atomic##op##Uint8:                 \
4010     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
4011     __ movzxbl(rax, rax);                        \
4012     break;                                       \
4013   case kWord32Atomic##op##Int16:                 \
4014     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
4015     __ movsxwl(rax, rax);                        \
4016     break;                                       \
4017   case kWord32Atomic##op##Uint16:                \
4018     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
4019     __ movzxwl(rax, rax);                        \
4020     break;                                       \
4021   case kWord32Atomic##op##Word32:                \
4022     ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
4023     break;
4024       ATOMIC_BINOP_CASE(Add, addl)
4025       ATOMIC_BINOP_CASE(Sub, subl)
4026       ATOMIC_BINOP_CASE(And, andl)
4027       ATOMIC_BINOP_CASE(Or, orl)
4028       ATOMIC_BINOP_CASE(Xor, xorl)
4029 #undef ATOMIC_BINOP_CASE
4030     case kX64Word64AtomicExchangeUint8: {
4031       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4032       __ movzxbq(i.InputRegister(0), i.InputRegister(0));
4033       break;
4034     }
4035     case kX64Word64AtomicExchangeUint16: {
4036       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4037       __ movzxwq(i.InputRegister(0), i.InputRegister(0));
4038       break;
4039     }
4040     case kX64Word64AtomicExchangeUint32: {
4041       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
4042       break;
4043     }
4044     case kX64Word64AtomicExchangeUint64: {
4045       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
4046       break;
4047     }
4048     case kX64Word64AtomicCompareExchangeUint8: {
4049       __ lock();
4050       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4051       __ movzxbq(rax, rax);
4052       break;
4053     }
4054     case kX64Word64AtomicCompareExchangeUint16: {
4055       __ lock();
4056       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4057       __ movzxwq(rax, rax);
4058       break;
4059     }
4060     case kX64Word64AtomicCompareExchangeUint32: {
4061       __ lock();
4062       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4063       // Zero-extend the 32 bit value to 64 bit.
4064       __ movl(rax, rax);
4065       break;
4066     }
4067     case kX64Word64AtomicCompareExchangeUint64: {
4068       __ lock();
4069       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
4070       break;
4071     }
4072 #define ATOMIC64_BINOP_CASE(op, inst)              \
4073   case kX64Word64Atomic##op##Uint8:                \
4074     ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
4075     __ movzxbq(rax, rax);                          \
4076     break;                                         \
4077   case kX64Word64Atomic##op##Uint16:               \
4078     ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
4079     __ movzxwq(rax, rax);                          \
4080     break;                                         \
4081   case kX64Word64Atomic##op##Uint32:               \
4082     ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
4083     break;                                         \
4084   case kX64Word64Atomic##op##Uint64:               \
4085     ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
4086     break;
4087       ATOMIC64_BINOP_CASE(Add, addq)
4088       ATOMIC64_BINOP_CASE(Sub, subq)
4089       ATOMIC64_BINOP_CASE(And, andq)
4090       ATOMIC64_BINOP_CASE(Or, orq)
4091       ATOMIC64_BINOP_CASE(Xor, xorq)
4092 #undef ATOMIC64_BINOP_CASE
4093     case kWord32AtomicLoadInt8:
4094     case kWord32AtomicLoadUint8:
4095     case kWord32AtomicLoadInt16:
4096     case kWord32AtomicLoadUint16:
4097     case kWord32AtomicLoadWord32:
4098     case kWord32AtomicStoreWord8:
4099     case kWord32AtomicStoreWord16:
4100     case kWord32AtomicStoreWord32:
4101     case kX64Word64AtomicLoadUint8:
4102     case kX64Word64AtomicLoadUint16:
4103     case kX64Word64AtomicLoadUint32:
4104     case kX64Word64AtomicLoadUint64:
4105     case kX64Word64AtomicStoreWord8:
4106     case kX64Word64AtomicStoreWord16:
4107     case kX64Word64AtomicStoreWord32:
4108     case kX64Word64AtomicStoreWord64:
4109       UNREACHABLE();  // Won't be generated by instruction selector.
4110       break;
4111   }
4112   return kSuccess;
4113 }  // NOLadability/fn_size)
4114 
4115 #undef ASSEMBLE_UNOP
4116 #undef ASSEMBLE_BINOP
4117 #undef ASSEMBLE_COMPARE
4118 #undef ASSEMBLE_MULT
4119 #undef ASSEMBLE_SHIFT
4120 #undef ASSEMBLE_MOVX
4121 #undef ASSEMBLE_SSE_BINOP
4122 #undef ASSEMBLE_SSE_UNOP
4123 #undef ASSEMBLE_AVX_BINOP
4124 #undef ASSEMBLE_IEEE754_BINOP
4125 #undef ASSEMBLE_IEEE754_UNOP
4126 #undef ASSEMBLE_ATOMIC_BINOP
4127 #undef ASSEMBLE_ATOMIC64_BINOP
4128 #undef ASSEMBLE_SIMD_INSTR
4129 #undef ASSEMBLE_SIMD_IMM_INSTR
4130 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4131 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4132 #undef ASSEMBLE_SIMD_ALL_TRUE
4133 #undef ASSEMBLE_SIMD_SHIFT
4134 
4135 namespace {
4136 
FlagsConditionToCondition(FlagsCondition condition)4137 Condition FlagsConditionToCondition(FlagsCondition condition) {
4138   switch (condition) {
4139     case kUnorderedEqual:
4140     case kEqual:
4141       return equal;
4142     case kUnorderedNotEqual:
4143     case kNotEqual:
4144       return not_equal;
4145     case kSignedLessThan:
4146       return less;
4147     case kSignedGreaterThanOrEqual:
4148       return greater_equal;
4149     case kSignedLessThanOrEqual:
4150       return less_equal;
4151     case kSignedGreaterThan:
4152       return greater;
4153     case kUnsignedLessThan:
4154       return below;
4155     case kUnsignedGreaterThanOrEqual:
4156       return above_equal;
4157     case kUnsignedLessThanOrEqual:
4158       return below_equal;
4159     case kUnsignedGreaterThan:
4160       return above;
4161     case kOverflow:
4162       return overflow;
4163     case kNotOverflow:
4164       return no_overflow;
4165     default:
4166       break;
4167   }
4168   UNREACHABLE();
4169 }
4170 
4171 }  // namespace
4172 
4173 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4174 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4175   Label::Distance flabel_distance =
4176       branch->fallthru ? Label::kNear : Label::kFar;
4177   Label* tlabel = branch->true_label;
4178   Label* flabel = branch->false_label;
4179   if (branch->condition == kUnorderedEqual) {
4180     __ j(parity_even, flabel, flabel_distance);
4181   } else if (branch->condition == kUnorderedNotEqual) {
4182     __ j(parity_even, tlabel);
4183   }
4184   __ j(FlagsConditionToCondition(branch->condition), tlabel);
4185 
4186   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
4187 }
4188 
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)4189 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
4190                                             Instruction* instr) {
4191   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
4192   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
4193     return;
4194   }
4195 
4196   condition = NegateFlagsCondition(condition);
4197   __ movl(kScratchRegister, Immediate(0));
4198   __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
4199            kScratchRegister);
4200 }
4201 
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4202 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4203                                             BranchInfo* branch) {
4204   Label::Distance flabel_distance =
4205       branch->fallthru ? Label::kNear : Label::kFar;
4206   Label* tlabel = branch->true_label;
4207   Label* flabel = branch->false_label;
4208   Label nodeopt;
4209   if (branch->condition == kUnorderedEqual) {
4210     __ j(parity_even, flabel, flabel_distance);
4211   } else if (branch->condition == kUnorderedNotEqual) {
4212     __ j(parity_even, tlabel);
4213   }
4214   __ j(FlagsConditionToCondition(branch->condition), tlabel);
4215 
4216   if (FLAG_deopt_every_n_times > 0) {
4217     ExternalReference counter =
4218         ExternalReference::stress_deopt_count(isolate());
4219 
4220     __ pushfq();
4221     __ pushq(rax);
4222     __ load_rax(counter);
4223     __ decl(rax);
4224     __ j(not_zero, &nodeopt, Label::kNear);
4225 
4226     __ Set(rax, FLAG_deopt_every_n_times);
4227     __ store_rax(counter);
4228     __ popq(rax);
4229     __ popfq();
4230     __ jmp(tlabel);
4231 
4232     __ bind(&nodeopt);
4233     __ store_rax(counter);
4234     __ popq(rax);
4235     __ popfq();
4236   }
4237 
4238   if (!branch->fallthru) {
4239     __ jmp(flabel, flabel_distance);
4240   }
4241 }
4242 
AssembleArchJump(RpoNumber target)4243 void CodeGenerator::AssembleArchJump(RpoNumber target) {
4244   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
4245 }
4246 
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4247 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4248                                      FlagsCondition condition) {
4249   auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
4250   Label* tlabel = ool->entry();
4251   Label end;
4252   if (condition == kUnorderedEqual) {
4253     __ j(parity_even, &end, Label::kNear);
4254   } else if (condition == kUnorderedNotEqual) {
4255     __ j(parity_even, tlabel);
4256   }
4257   __ j(FlagsConditionToCondition(condition), tlabel);
4258   __ bind(&end);
4259 }
4260 
4261 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4262 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4263                                         FlagsCondition condition) {
4264   X64OperandConverter i(this, instr);
4265   Label done;
4266 
4267   // Materialize a full 64-bit 1 or 0 value. The result register is always the
4268   // last output of the instruction.
4269   Label check;
4270   DCHECK_NE(0u, instr->OutputCount());
4271   Register reg = i.OutputRegister(instr->OutputCount() - 1);
4272   if (condition == kUnorderedEqual) {
4273     __ j(parity_odd, &check, Label::kNear);
4274     __ movl(reg, Immediate(0));
4275     __ jmp(&done, Label::kNear);
4276   } else if (condition == kUnorderedNotEqual) {
4277     __ j(parity_odd, &check, Label::kNear);
4278     __ movl(reg, Immediate(1));
4279     __ jmp(&done, Label::kNear);
4280   }
4281   __ bind(&check);
4282   __ setcc(FlagsConditionToCondition(condition), reg);
4283   __ movzxbl(reg, reg);
4284   __ bind(&done);
4285 }
4286 
AssembleArchBinarySearchSwitch(Instruction * instr)4287 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4288   X64OperandConverter i(this, instr);
4289   Register input = i.InputRegister(0);
4290   std::vector<std::pair<int32_t, Label*>> cases;
4291   for (size_t index = 2; index < instr->InputCount(); index += 2) {
4292     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4293   }
4294   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4295                                       cases.data() + cases.size());
4296 }
4297 
AssembleArchTableSwitch(Instruction * instr)4298 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4299   X64OperandConverter i(this, instr);
4300   Register input = i.InputRegister(0);
4301   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
4302   Label** cases = zone()->NewArray<Label*>(case_count);
4303   for (int32_t index = 0; index < case_count; ++index) {
4304     cases[index] = GetLabel(i.InputRpo(index + 2));
4305   }
4306   Label* const table = AddJumpTable(cases, case_count);
4307   __ cmpl(input, Immediate(case_count));
4308   __ j(above_equal, GetLabel(i.InputRpo(1)));
4309   __ leaq(kScratchRegister, Operand(table));
4310   __ jmp(Operand(kScratchRegister, input, times_8, 0));
4311 }
4312 
4313 namespace {
4314 
4315 static const int kQuadWordSize = 16;
4316 
4317 }  // namespace
4318 
FinishFrame(Frame * frame)4319 void CodeGenerator::FinishFrame(Frame* frame) {
4320   CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor();
4321 
4322   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4323   if (saves_fp != 0) {
4324     frame->AlignSavedCalleeRegisterSlots();
4325     if (saves_fp != 0) {  // Save callee-saved XMM registers.
4326       const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4327       frame->AllocateSavedCalleeRegisterSlots(
4328           saves_fp_count * (kQuadWordSize / kSystemPointerSize));
4329     }
4330   }
4331   const RegList saves = call_descriptor->CalleeSavedRegisters();
4332   if (saves != 0) {  // Save callee-saved registers.
4333     int count = 0;
4334     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4335       if (((1 << i) & saves)) {
4336         ++count;
4337       }
4338     }
4339     frame->AllocateSavedCalleeRegisterSlots(count);
4340   }
4341 }
4342 
AssembleConstructFrame()4343 void CodeGenerator::AssembleConstructFrame() {
4344   auto call_descriptor = linkage()->GetIncomingDescriptor();
4345   if (frame_access_state()->has_frame()) {
4346     int pc_base = __ pc_offset();
4347 
4348     if (call_descriptor->IsCFunctionCall()) {
4349       __ pushq(rbp);
4350       __ movq(rbp, rsp);
4351       if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4352         __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4353         // Reserve stack space for saving the c_entry_fp later.
4354         __ AllocateStackSpace(kSystemPointerSize);
4355       }
4356     } else if (call_descriptor->IsJSFunctionCall()) {
4357       __ Prologue();
4358     } else {
4359       __ StubPrologue(info()->GetOutputStackFrameType());
4360       if (call_descriptor->IsWasmFunctionCall()) {
4361         __ pushq(kWasmInstanceRegister);
4362       } else if (call_descriptor->IsWasmImportWrapper() ||
4363                  call_descriptor->IsWasmCapiFunction()) {
4364         // Wasm import wrappers are passed a tuple in the place of the instance.
4365         // Unpack the tuple into the instance and the target callable.
4366         // This must be done here in the codegen because it cannot be expressed
4367         // properly in the graph.
4368         __ LoadTaggedPointerField(
4369             kJSFunctionRegister,
4370             FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
4371         __ LoadTaggedPointerField(
4372             kWasmInstanceRegister,
4373             FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
4374         __ pushq(kWasmInstanceRegister);
4375         if (call_descriptor->IsWasmCapiFunction()) {
4376           // Reserve space for saving the PC later.
4377           __ AllocateStackSpace(kSystemPointerSize);
4378         }
4379       }
4380     }
4381 
4382     unwinding_info_writer_.MarkFrameConstructed(pc_base);
4383   }
4384   int required_slots =
4385       frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4386 
4387   if (info()->is_osr()) {
4388     // TurboFan OSR-compiled functions cannot be entered directly.
4389     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4390 
4391     // Unoptimized code jumps directly to this entrypoint while the unoptimized
4392     // frame is still on the stack. Optimized code uses OSR values directly from
4393     // the unoptimized frame. Thus, all that needs to be done is to allocate the
4394     // remaining stack slots.
4395     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
4396     osr_pc_offset_ = __ pc_offset();
4397     required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
4398     ResetSpeculationPoison();
4399   }
4400 
4401   const RegList saves = call_descriptor->CalleeSavedRegisters();
4402   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4403 
4404   if (required_slots > 0) {
4405     DCHECK(frame_access_state()->has_frame());
4406     if (info()->IsWasm() && required_slots > 128) {
4407       // For WebAssembly functions with big frames we have to do the stack
4408       // overflow check before we construct the frame. Otherwise we may not
4409       // have enough space on the stack to call the runtime for the stack
4410       // overflow.
4411       Label done;
4412 
4413       // If the frame is bigger than the stack, we throw the stack overflow
4414       // exception unconditionally. Thereby we can avoid the integer overflow
4415       // check in the condition code.
4416       if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
4417         __ movq(kScratchRegister,
4418                 FieldOperand(kWasmInstanceRegister,
4419                              WasmInstanceObject::kRealStackLimitAddressOffset));
4420         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
4421         __ addq(kScratchRegister,
4422                 Immediate(required_slots * kSystemPointerSize));
4423         __ cmpq(rsp, kScratchRegister);
4424         __ j(above_equal, &done, Label::kNear);
4425       }
4426 
4427       __ near_call(wasm::WasmCode::kWasmStackOverflow,
4428                    RelocInfo::WASM_STUB_CALL);
4429       ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4430       RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
4431       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4432       __ bind(&done);
4433     }
4434 
4435     // Skip callee-saved and return slots, which are created below.
4436     required_slots -= base::bits::CountPopulation(saves);
4437     required_slots -= base::bits::CountPopulation(saves_fp) *
4438                       (kQuadWordSize / kSystemPointerSize);
4439     required_slots -= frame()->GetReturnSlotCount();
4440     if (required_slots > 0) {
4441       __ AllocateStackSpace(required_slots * kSystemPointerSize);
4442     }
4443   }
4444 
4445   if (saves_fp != 0) {  // Save callee-saved XMM registers.
4446     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4447     const int stack_size = saves_fp_count * kQuadWordSize;
4448     // Adjust the stack pointer.
4449     __ AllocateStackSpace(stack_size);
4450     // Store the registers on the stack.
4451     int slot_idx = 0;
4452     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4453       if (!((1 << i) & saves_fp)) continue;
4454       __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
4455                 XMMRegister::from_code(i));
4456       slot_idx++;
4457     }
4458   }
4459 
4460   if (saves != 0) {  // Save callee-saved registers.
4461     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4462       if (!((1 << i) & saves)) continue;
4463       __ pushq(Register::from_code(i));
4464     }
4465   }
4466 
4467   // Allocate return slots (located after callee-saved).
4468   if (frame()->GetReturnSlotCount() > 0) {
4469     __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4470   }
4471 }
4472 
AssembleReturn(InstructionOperand * additional_pop_count)4473 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4474   auto call_descriptor = linkage()->GetIncomingDescriptor();
4475 
4476   // Restore registers.
4477   const RegList saves = call_descriptor->CalleeSavedRegisters();
4478   if (saves != 0) {
4479     const int returns = frame()->GetReturnSlotCount();
4480     if (returns != 0) {
4481       __ addq(rsp, Immediate(returns * kSystemPointerSize));
4482     }
4483     for (int i = 0; i < Register::kNumRegisters; i++) {
4484       if (!((1 << i) & saves)) continue;
4485       __ popq(Register::from_code(i));
4486     }
4487   }
4488   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4489   if (saves_fp != 0) {
4490     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4491     const int stack_size = saves_fp_count * kQuadWordSize;
4492     // Load the registers from the stack.
4493     int slot_idx = 0;
4494     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4495       if (!((1 << i) & saves_fp)) continue;
4496       __ Movdqu(XMMRegister::from_code(i),
4497                 Operand(rsp, kQuadWordSize * slot_idx));
4498       slot_idx++;
4499     }
4500     // Adjust the stack pointer.
4501     __ addq(rsp, Immediate(stack_size));
4502   }
4503 
4504   unwinding_info_writer_.MarkBlockWillExit();
4505 
4506   // We might need rcx and r10 for scratch.
4507   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
4508   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & r10.bit());
4509   X64OperandConverter g(this, nullptr);
4510   int parameter_count =
4511       static_cast<int>(call_descriptor->StackParameterCount());
4512 
4513   // {aditional_pop_count} is only greater than zero if {parameter_count = 0}.
4514   // Check RawMachineAssembler::PopAndReturn.
4515   if (parameter_count != 0) {
4516     if (additional_pop_count->IsImmediate()) {
4517       DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4518     } else if (__ emit_debug_code()) {
4519       __ cmpq(g.ToRegister(additional_pop_count), Immediate(0));
4520       __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4521     }
4522   }
4523 
4524   Register argc_reg = rcx;
4525 #ifdef V8_NO_ARGUMENTS_ADAPTOR
4526   // Functions with JS linkage have at least one parameter (the receiver).
4527   // If {parameter_count} == 0, it means it is a builtin with
4528   // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4529   // itself.
4530   const bool drop_jsargs = frame_access_state()->has_frame() &&
4531                            call_descriptor->IsJSFunctionCall() &&
4532                            parameter_count != 0;
4533 #else
4534   const bool drop_jsargs = false;
4535 #endif
4536   if (call_descriptor->IsCFunctionCall()) {
4537     AssembleDeconstructFrame();
4538   } else if (frame_access_state()->has_frame()) {
4539     if (additional_pop_count->IsImmediate() &&
4540         g.ToConstant(additional_pop_count).ToInt32() == 0) {
4541       // Canonicalize JSFunction return sites for now.
4542       if (return_label_.is_bound()) {
4543         __ jmp(&return_label_);
4544         return;
4545       } else {
4546         __ bind(&return_label_);
4547       }
4548     }
4549     if (drop_jsargs) {
4550       // Get the actual argument count.
4551       __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset));
4552     }
4553     AssembleDeconstructFrame();
4554   }
4555 
4556   if (drop_jsargs) {
4557     // We must pop all arguments from the stack (including the receiver). This
4558     // number of arguments is given by max(1 + argc_reg, parameter_count).
4559     int parameter_count_without_receiver =
4560         parameter_count - 1;  // Exclude the receiver to simplify the
4561                               // computation. We'll account for it at the end.
4562     Label mismatch_return;
4563     Register scratch_reg = r10;
4564     DCHECK_NE(argc_reg, scratch_reg);
4565     __ cmpq(argc_reg, Immediate(parameter_count_without_receiver));
4566     __ j(greater, &mismatch_return, Label::kNear);
4567     __ Ret(parameter_count * kSystemPointerSize, scratch_reg);
4568     __ bind(&mismatch_return);
4569     __ PopReturnAddressTo(scratch_reg);
4570     __ leaq(rsp, Operand(rsp, argc_reg, times_system_pointer_size,
4571                          kSystemPointerSize));  // Also pop the receiver.
4572     // We use a return instead of a jump for better return address prediction.
4573     __ PushReturnAddressFrom(scratch_reg);
4574     __ Ret();
4575   } else if (additional_pop_count->IsImmediate()) {
4576     Register scratch_reg = r10;
4577     int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4578     size_t pop_size = (parameter_count + additional_count) * kSystemPointerSize;
4579     CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4580     __ Ret(static_cast<int>(pop_size), scratch_reg);
4581   } else {
4582     Register pop_reg = g.ToRegister(additional_pop_count);
4583     Register scratch_reg = pop_reg == r10 ? rcx : r10;
4584     int pop_size = static_cast<int>(parameter_count * kSystemPointerSize);
4585     __ PopReturnAddressTo(scratch_reg);
4586     __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size,
4587                          static_cast<int>(pop_size)));
4588     __ PushReturnAddressFrom(scratch_reg);
4589     __ Ret();
4590   }
4591 }
4592 
FinishCode()4593 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
4594 
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4595 void CodeGenerator::PrepareForDeoptimizationExits(
4596     ZoneDeque<DeoptimizationExit*>* exits) {}
4597 
IncrementStackAccessCounter(InstructionOperand * source,InstructionOperand * destination)4598 void CodeGenerator::IncrementStackAccessCounter(
4599     InstructionOperand* source, InstructionOperand* destination) {
4600   DCHECK(FLAG_trace_turbo_stack_accesses);
4601   if (!info()->IsOptimizing() && !info()->IsWasm()) return;
4602   DCHECK_NOT_NULL(debug_name_);
4603   auto IncrementCounter = [&](ExternalReference counter) {
4604     __ incl(__ ExternalReferenceAsOperand(counter));
4605   };
4606   if (source->IsAnyStackSlot()) {
4607     IncrementCounter(
4608         ExternalReference::address_of_load_from_stack_count(debug_name_));
4609   }
4610   if (destination->IsAnyStackSlot()) {
4611     IncrementCounter(
4612         ExternalReference::address_of_store_to_stack_count(debug_name_));
4613   }
4614 }
4615 
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4616 void CodeGenerator::AssembleMove(InstructionOperand* source,
4617                                  InstructionOperand* destination) {
4618   X64OperandConverter g(this, nullptr);
4619   // Helper function to write the given constant to the dst register.
4620   auto MoveConstantToRegister = [&](Register dst, Constant src) {
4621     switch (src.type()) {
4622       case Constant::kInt32: {
4623         if (RelocInfo::IsWasmReference(src.rmode())) {
4624           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4625         } else {
4626           int32_t value = src.ToInt32();
4627           if (value == 0) {
4628             __ xorl(dst, dst);
4629           } else {
4630             __ movl(dst, Immediate(value));
4631           }
4632         }
4633         break;
4634       }
4635       case Constant::kInt64:
4636         if (RelocInfo::IsWasmReference(src.rmode())) {
4637           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4638         } else {
4639           __ Set(dst, src.ToInt64());
4640         }
4641         break;
4642       case Constant::kFloat32:
4643         __ MoveNumber(dst, src.ToFloat32());
4644         break;
4645       case Constant::kFloat64:
4646         __ MoveNumber(dst, src.ToFloat64().value());
4647         break;
4648       case Constant::kExternalReference:
4649         __ Move(dst, src.ToExternalReference());
4650         break;
4651       case Constant::kHeapObject: {
4652         Handle<HeapObject> src_object = src.ToHeapObject();
4653         RootIndex index;
4654         if (IsMaterializableFromRoot(src_object, &index)) {
4655           __ LoadRoot(dst, index);
4656         } else {
4657           __ Move(dst, src_object);
4658         }
4659         break;
4660       }
4661       case Constant::kCompressedHeapObject: {
4662         Handle<HeapObject> src_object = src.ToHeapObject();
4663         RootIndex index;
4664         if (IsMaterializableFromRoot(src_object, &index)) {
4665           __ LoadRoot(dst, index);
4666         } else {
4667           __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4668         }
4669         break;
4670       }
4671       case Constant::kDelayedStringConstant: {
4672         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
4673         __ MoveStringConstant(dst, src_constant);
4674         break;
4675       }
4676       case Constant::kRpoNumber:
4677         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
4678         break;
4679     }
4680   };
4681   // Helper function to write the given constant to the stack.
4682   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
4683     if (!RelocInfo::IsWasmReference(src.rmode())) {
4684       switch (src.type()) {
4685         case Constant::kInt32:
4686           __ movq(dst, Immediate(src.ToInt32()));
4687           return;
4688         case Constant::kInt64:
4689           __ Set(dst, src.ToInt64());
4690           return;
4691         default:
4692           break;
4693       }
4694     }
4695     MoveConstantToRegister(kScratchRegister, src);
4696     __ movq(dst, kScratchRegister);
4697   };
4698 
4699   if (FLAG_trace_turbo_stack_accesses) {
4700     IncrementStackAccessCounter(source, destination);
4701   }
4702 
4703   // Dispatch on the source and destination operand kinds.
4704   switch (MoveType::InferMove(source, destination)) {
4705     case MoveType::kRegisterToRegister:
4706       if (source->IsRegister()) {
4707         __ movq(g.ToRegister(destination), g.ToRegister(source));
4708       } else {
4709         DCHECK(source->IsFPRegister());
4710         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4711       }
4712       return;
4713     case MoveType::kRegisterToStack: {
4714       Operand dst = g.ToOperand(destination);
4715       if (source->IsRegister()) {
4716         __ movq(dst, g.ToRegister(source));
4717       } else {
4718         DCHECK(source->IsFPRegister());
4719         XMMRegister src = g.ToDoubleRegister(source);
4720         MachineRepresentation rep =
4721             LocationOperand::cast(source)->representation();
4722         if (rep != MachineRepresentation::kSimd128) {
4723           __ Movsd(dst, src);
4724         } else {
4725           __ Movups(dst, src);
4726         }
4727       }
4728       return;
4729     }
4730     case MoveType::kStackToRegister: {
4731       Operand src = g.ToOperand(source);
4732       if (source->IsStackSlot()) {
4733         __ movq(g.ToRegister(destination), src);
4734       } else {
4735         DCHECK(source->IsFPStackSlot());
4736         XMMRegister dst = g.ToDoubleRegister(destination);
4737         MachineRepresentation rep =
4738             LocationOperand::cast(source)->representation();
4739         if (rep != MachineRepresentation::kSimd128) {
4740           __ Movsd(dst, src);
4741         } else {
4742           __ Movups(dst, src);
4743         }
4744       }
4745       return;
4746     }
4747     case MoveType::kStackToStack: {
4748       Operand src = g.ToOperand(source);
4749       Operand dst = g.ToOperand(destination);
4750       if (source->IsStackSlot()) {
4751         // Spill on demand to use a temporary register for memory-to-memory
4752         // moves.
4753         __ movq(kScratchRegister, src);
4754         __ movq(dst, kScratchRegister);
4755       } else {
4756         MachineRepresentation rep =
4757             LocationOperand::cast(source)->representation();
4758         if (rep != MachineRepresentation::kSimd128) {
4759           __ Movsd(kScratchDoubleReg, src);
4760           __ Movsd(dst, kScratchDoubleReg);
4761         } else {
4762           DCHECK(source->IsSimd128StackSlot());
4763           __ Movups(kScratchDoubleReg, src);
4764           __ Movups(dst, kScratchDoubleReg);
4765         }
4766       }
4767       return;
4768     }
4769     case MoveType::kConstantToRegister: {
4770       Constant src = g.ToConstant(source);
4771       if (destination->IsRegister()) {
4772         MoveConstantToRegister(g.ToRegister(destination), src);
4773       } else {
4774         DCHECK(destination->IsFPRegister());
4775         XMMRegister dst = g.ToDoubleRegister(destination);
4776         if (src.type() == Constant::kFloat32) {
4777           // TODO(turbofan): Can we do better here?
4778           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4779         } else {
4780           DCHECK_EQ(src.type(), Constant::kFloat64);
4781           __ Move(dst, src.ToFloat64().AsUint64());
4782         }
4783       }
4784       return;
4785     }
4786     case MoveType::kConstantToStack: {
4787       Constant src = g.ToConstant(source);
4788       Operand dst = g.ToOperand(destination);
4789       if (destination->IsStackSlot()) {
4790         MoveConstantToSlot(dst, src);
4791       } else {
4792         DCHECK(destination->IsFPStackSlot());
4793         if (src.type() == Constant::kFloat32) {
4794           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4795         } else {
4796           DCHECK_EQ(src.type(), Constant::kFloat64);
4797           __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4798           __ movq(dst, kScratchRegister);
4799         }
4800       }
4801       return;
4802     }
4803   }
4804   UNREACHABLE();
4805 }
4806 
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4807 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4808                                  InstructionOperand* destination) {
4809   if (FLAG_trace_turbo_stack_accesses) {
4810     IncrementStackAccessCounter(source, destination);
4811     IncrementStackAccessCounter(destination, source);
4812   }
4813 
4814   X64OperandConverter g(this, nullptr);
4815   // Dispatch on the source and destination operand kinds.  Not all
4816   // combinations are possible.
4817   switch (MoveType::InferSwap(source, destination)) {
4818     case MoveType::kRegisterToRegister: {
4819       if (source->IsRegister()) {
4820         Register src = g.ToRegister(source);
4821         Register dst = g.ToRegister(destination);
4822         __ movq(kScratchRegister, src);
4823         __ movq(src, dst);
4824         __ movq(dst, kScratchRegister);
4825       } else {
4826         DCHECK(source->IsFPRegister());
4827         XMMRegister src = g.ToDoubleRegister(source);
4828         XMMRegister dst = g.ToDoubleRegister(destination);
4829         __ Movapd(kScratchDoubleReg, src);
4830         __ Movapd(src, dst);
4831         __ Movapd(dst, kScratchDoubleReg);
4832       }
4833       return;
4834     }
4835     case MoveType::kRegisterToStack: {
4836       if (source->IsRegister()) {
4837         Register src = g.ToRegister(source);
4838         Operand dst = g.ToOperand(destination);
4839         __ movq(kScratchRegister, src);
4840         __ movq(src, dst);
4841         __ movq(dst, kScratchRegister);
4842       } else {
4843         DCHECK(source->IsFPRegister());
4844         XMMRegister src = g.ToDoubleRegister(source);
4845         Operand dst = g.ToOperand(destination);
4846         MachineRepresentation rep =
4847             LocationOperand::cast(source)->representation();
4848         if (rep != MachineRepresentation::kSimd128) {
4849           __ Movsd(kScratchDoubleReg, src);
4850           __ Movsd(src, dst);
4851           __ Movsd(dst, kScratchDoubleReg);
4852         } else {
4853           __ Movups(kScratchDoubleReg, src);
4854           __ Movups(src, dst);
4855           __ Movups(dst, kScratchDoubleReg);
4856         }
4857       }
4858       return;
4859     }
4860     case MoveType::kStackToStack: {
4861       Operand src = g.ToOperand(source);
4862       Operand dst = g.ToOperand(destination);
4863       MachineRepresentation rep =
4864           LocationOperand::cast(source)->representation();
4865       if (rep != MachineRepresentation::kSimd128) {
4866         Register tmp = kScratchRegister;
4867         __ movq(tmp, dst);
4868         __ pushq(src);  // Then use stack to copy src to destination.
4869         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4870                                                          kSystemPointerSize);
4871         __ popq(dst);
4872         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4873                                                          -kSystemPointerSize);
4874         __ movq(src, tmp);
4875       } else {
4876         // Without AVX, misaligned reads and writes will trap. Move using the
4877         // stack, in two parts.
4878         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
4879         __ pushq(src);  // Then use stack to copy src to destination.
4880         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4881                                                          kSystemPointerSize);
4882         __ popq(dst);
4883         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4884                                                          -kSystemPointerSize);
4885         __ pushq(g.ToOperand(source, kSystemPointerSize));
4886         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4887                                                          kSystemPointerSize);
4888         __ popq(g.ToOperand(destination, kSystemPointerSize));
4889         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4890                                                          -kSystemPointerSize);
4891         __ movups(src, kScratchDoubleReg);
4892       }
4893       return;
4894     }
4895     default:
4896       UNREACHABLE();
4897   }
4898 }
4899 
AssembleJumpTable(Label ** targets,size_t target_count)4900 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4901   for (size_t index = 0; index < target_count; ++index) {
4902     __ dq(targets[index]);
4903   }
4904 }
4905 
4906 #undef __
4907 
4908 }  // namespace compiler
4909 }  // namespace internal
4910 }  // namespace v8
4911