1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/backend/code-generator.h"
6 
7 #include <limits>
8 
9 #include "src/base/overflowing-math.h"
10 #include "src/codegen/macro-assembler.h"
11 #include "src/codegen/optimized-compilation-info.h"
12 #include "src/codegen/x64/assembler-x64.h"
13 #include "src/compiler/backend/code-generator-impl.h"
14 #include "src/compiler/backend/gap-resolver.h"
15 #include "src/compiler/node-matchers.h"
16 #include "src/compiler/osr.h"
17 #include "src/heap/heap-inl.h"  // crbug.com/v8/8499
18 #include "src/objects/smi.h"
19 #include "src/wasm/wasm-code-manager.h"
20 #include "src/wasm/wasm-objects.h"
21 
22 namespace v8 {
23 namespace internal {
24 namespace compiler {
25 
26 #define __ tasm()->
27 
28 // Adds X64 specific methods for decoding operands.
29 class X64OperandConverter : public InstructionOperandConverter {
30  public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)31   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
32       : InstructionOperandConverter(gen, instr) {}
33 
InputImmediate(size_t index)34   Immediate InputImmediate(size_t index) {
35     return ToImmediate(instr_->InputAt(index));
36   }
37 
InputOperand(size_t index,int extra=0)38   Operand InputOperand(size_t index, int extra = 0) {
39     return ToOperand(instr_->InputAt(index), extra);
40   }
41 
OutputOperand()42   Operand OutputOperand() { return ToOperand(instr_->Output()); }
43 
ToImmediate(InstructionOperand * operand)44   Immediate ToImmediate(InstructionOperand* operand) {
45     Constant constant = ToConstant(operand);
46     if (constant.type() == Constant::kFloat64) {
47       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
48       return Immediate(0);
49     }
50     if (RelocInfo::IsWasmReference(constant.rmode())) {
51       return Immediate(constant.ToInt32(), constant.rmode());
52     }
53     return Immediate(constant.ToInt32());
54   }
55 
ToOperand(InstructionOperand * op,int extra=0)56   Operand ToOperand(InstructionOperand* op, int extra = 0) {
57     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
58     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59   }
60 
SlotToOperand(int slot_index,int extra=0)61   Operand SlotToOperand(int slot_index, int extra = 0) {
62     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
63     return Operand(offset.from_stack_pointer() ? rsp : rbp,
64                    offset.offset() + extra);
65   }
66 
NextOffset(size_t * offset)67   static size_t NextOffset(size_t* offset) {
68     size_t i = *offset;
69     (*offset)++;
70     return i;
71   }
72 
ScaleFor(AddressingMode one,AddressingMode mode)73   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
74     STATIC_ASSERT(0 == static_cast<int>(times_1));
75     STATIC_ASSERT(1 == static_cast<int>(times_2));
76     STATIC_ASSERT(2 == static_cast<int>(times_4));
77     STATIC_ASSERT(3 == static_cast<int>(times_8));
78     int scale = static_cast<int>(mode - one);
79     DCHECK(scale >= 0 && scale < 4);
80     return static_cast<ScaleFactor>(scale);
81   }
82 
MemoryOperand(size_t * offset)83   Operand MemoryOperand(size_t* offset) {
84     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
85     switch (mode) {
86       case kMode_MR: {
87         Register base = InputRegister(NextOffset(offset));
88         int32_t disp = 0;
89         return Operand(base, disp);
90       }
91       case kMode_MRI: {
92         Register base = InputRegister(NextOffset(offset));
93         int32_t disp = InputInt32(NextOffset(offset));
94         return Operand(base, disp);
95       }
96       case kMode_MR1:
97       case kMode_MR2:
98       case kMode_MR4:
99       case kMode_MR8: {
100         Register base = InputRegister(NextOffset(offset));
101         Register index = InputRegister(NextOffset(offset));
102         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
103         int32_t disp = 0;
104         return Operand(base, index, scale, disp);
105       }
106       case kMode_MR1I:
107       case kMode_MR2I:
108       case kMode_MR4I:
109       case kMode_MR8I: {
110         Register base = InputRegister(NextOffset(offset));
111         Register index = InputRegister(NextOffset(offset));
112         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
113         int32_t disp = InputInt32(NextOffset(offset));
114         return Operand(base, index, scale, disp);
115       }
116       case kMode_M1: {
117         Register base = InputRegister(NextOffset(offset));
118         int32_t disp = 0;
119         return Operand(base, disp);
120       }
121       case kMode_M2:
122         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
123         return Operand(no_reg, 0);
124       case kMode_M4:
125       case kMode_M8: {
126         Register index = InputRegister(NextOffset(offset));
127         ScaleFactor scale = ScaleFor(kMode_M1, mode);
128         int32_t disp = 0;
129         return Operand(index, scale, disp);
130       }
131       case kMode_M1I:
132       case kMode_M2I:
133       case kMode_M4I:
134       case kMode_M8I: {
135         Register index = InputRegister(NextOffset(offset));
136         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
137         int32_t disp = InputInt32(NextOffset(offset));
138         return Operand(index, scale, disp);
139       }
140       case kMode_Root: {
141         Register base = kRootRegister;
142         int32_t disp = InputInt32(NextOffset(offset));
143         return Operand(base, disp);
144       }
145       case kMode_None:
146         UNREACHABLE();
147     }
148     UNREACHABLE();
149   }
150 
MemoryOperand(size_t first_input=0)151   Operand MemoryOperand(size_t first_input = 0) {
152     return MemoryOperand(&first_input);
153   }
154 };
155 
156 namespace {
157 
HasAddressingMode(Instruction * instr)158 bool HasAddressingMode(Instruction* instr) {
159   return instr->addressing_mode() != kMode_None;
160 }
161 
HasImmediateInput(Instruction * instr,size_t index)162 bool HasImmediateInput(Instruction* instr, size_t index) {
163   return instr->InputAt(index)->IsImmediate();
164 }
165 
HasRegisterInput(Instruction * instr,size_t index)166 bool HasRegisterInput(Instruction* instr, size_t index) {
167   return instr->InputAt(index)->IsRegister();
168 }
169 
170 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
171  public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)172   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
173       : OutOfLineCode(gen), result_(result) {}
174 
Generate()175   void Generate() final {
176     __ Xorps(result_, result_);
177     __ Divss(result_, result_);
178   }
179 
180  private:
181   XMMRegister const result_;
182 };
183 
184 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
185  public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)186   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
187       : OutOfLineCode(gen), result_(result) {}
188 
Generate()189   void Generate() final {
190     __ Xorpd(result_, result_);
191     __ Divsd(result_, result_);
192   }
193 
194  private:
195   XMMRegister const result_;
196 };
197 
198 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
199  public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)200   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
201                              XMMRegister input, StubCallMode stub_mode,
202                              UnwindingInfoWriter* unwinding_info_writer)
203       : OutOfLineCode(gen),
204         result_(result),
205         input_(input),
206         stub_mode_(stub_mode),
207         unwinding_info_writer_(unwinding_info_writer),
208         isolate_(gen->isolate()),
209         zone_(gen->zone()) {}
210 
Generate()211   void Generate() final {
212     __ AllocateStackSpace(kDoubleSize);
213     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
214                                                       kDoubleSize);
215     __ Movsd(MemOperand(rsp, 0), input_);
216     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
217       // A direct call to a wasm runtime stub defined in this module.
218       // Just encode the stub index. This will be patched when the code
219       // is added to the native module and copied into wasm code space.
220       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
221     } else if (tasm()->options().inline_offheap_trampolines) {
222       // With embedded builtins we do not need the isolate here. This allows
223       // the call to be generated asynchronously.
224       __ CallBuiltin(Builtins::kDoubleToI);
225     } else {
226       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
227     }
228     __ movl(result_, MemOperand(rsp, 0));
229     __ addq(rsp, Immediate(kDoubleSize));
230     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
231                                                       -kDoubleSize);
232   }
233 
234  private:
235   Register const result_;
236   XMMRegister const input_;
237   StubCallMode stub_mode_;
238   UnwindingInfoWriter* const unwinding_info_writer_;
239   Isolate* isolate_;
240   Zone* zone_;
241 };
242 
243 class OutOfLineRecordWrite final : public OutOfLineCode {
244  public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)245   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
246                        Register value, Register scratch0, Register scratch1,
247                        RecordWriteMode mode, StubCallMode stub_mode)
248       : OutOfLineCode(gen),
249         object_(object),
250         operand_(operand),
251         value_(value),
252         scratch0_(scratch0),
253         scratch1_(scratch1),
254         mode_(mode),
255         stub_mode_(stub_mode),
256         zone_(gen->zone()) {}
257 
Generate()258   void Generate() final {
259     if (mode_ > RecordWriteMode::kValueIsPointer) {
260       __ JumpIfSmi(value_, exit());
261     }
262     if (COMPRESS_POINTERS_BOOL) {
263       __ DecompressTaggedPointer(value_, value_);
264     }
265     __ CheckPageFlag(value_, scratch0_,
266                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
267                      exit());
268     __ leaq(scratch1_, operand_);
269 
270     RememberedSetAction const remembered_set_action =
271         mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
272                                              : OMIT_REMEMBERED_SET;
273     SaveFPRegsMode const save_fp_mode =
274         frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
275 
276     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
277       __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
278     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
279       // A direct call to a wasm runtime stub defined in this module.
280       // Just encode the stub index. This will be patched when the code
281       // is added to the native module and copied into wasm code space.
282       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
283                              save_fp_mode, wasm::WasmCode::kRecordWrite);
284     } else {
285       __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
286                              save_fp_mode);
287     }
288   }
289 
290  private:
291   Register const object_;
292   Operand const operand_;
293   Register const value_;
294   Register const scratch0_;
295   Register const scratch1_;
296   RecordWriteMode const mode_;
297   StubCallMode const stub_mode_;
298   Zone* zone_;
299 };
300 
301 class WasmOutOfLineTrap : public OutOfLineCode {
302  public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)303   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
304       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
305 
Generate()306   void Generate() override {
307     X64OperandConverter i(gen_, instr_);
308     TrapId trap_id =
309         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
310     GenerateWithTrapId(trap_id);
311   }
312 
313  protected:
314   CodeGenerator* gen_;
315 
GenerateWithTrapId(TrapId trap_id)316   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
317 
318  private:
GenerateCallToTrap(TrapId trap_id)319   void GenerateCallToTrap(TrapId trap_id) {
320     if (!gen_->wasm_runtime_exception_support()) {
321       // We cannot test calls to the runtime in cctest/test-run-wasm.
322       // Therefore we emit a call to C here instead of a call to the runtime.
323       __ PrepareCallCFunction(0);
324       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
325                        0);
326       __ LeaveFrame(StackFrame::WASM_COMPILED);
327       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
328       size_t pop_size =
329           call_descriptor->StackParameterCount() * kSystemPointerSize;
330       // Use rcx as a scratch register, we return anyways immediately.
331       __ Ret(static_cast<int>(pop_size), rcx);
332     } else {
333       gen_->AssembleSourcePosition(instr_);
334       // A direct call to a wasm runtime stub defined in this module.
335       // Just encode the stub index. This will be patched when the code
336       // is added to the native module and copied into wasm code space.
337       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
338       ReferenceMap* reference_map =
339           new (gen_->zone()) ReferenceMap(gen_->zone());
340       gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
341       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
342     }
343   }
344 
345   Instruction* instr_;
346 };
347 
348 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
349  public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)350   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
351       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
352 
Generate()353   void Generate() final {
354     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
355     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
356   }
357 
358  private:
359   int pc_;
360 };
361 
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)362 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
363                          InstructionCode opcode, Instruction* instr,
364                          int pc) {
365   const MemoryAccessMode access_mode =
366       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
367   if (access_mode == kMemoryAccessProtected) {
368     new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
369   }
370 }
371 
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,X64OperandConverter const & i)372 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
373                                    InstructionCode opcode, Instruction* instr,
374                                    X64OperandConverter const& i) {
375   const MemoryAccessMode access_mode =
376       static_cast<MemoryAccessMode>(MiscField::decode(opcode));
377   if (access_mode == kMemoryAccessPoisoned) {
378     Register value = i.OutputRegister();
379     codegen->tasm()->andq(value, kSpeculationPoisonRegister);
380   }
381 }
382 
383 }  // namespace
384 
385 #define ASSEMBLE_UNOP(asm_instr)         \
386   do {                                   \
387     if (instr->Output()->IsRegister()) { \
388       __ asm_instr(i.OutputRegister());  \
389     } else {                             \
390       __ asm_instr(i.OutputOperand());   \
391     }                                    \
392   } while (false)
393 
394 #define ASSEMBLE_BINOP(asm_instr)                                \
395   do {                                                           \
396     if (HasAddressingMode(instr)) {                              \
397       size_t index = 1;                                          \
398       Operand right = i.MemoryOperand(&index);                   \
399       __ asm_instr(i.InputRegister(0), right);                   \
400     } else {                                                     \
401       if (HasImmediateInput(instr, 1)) {                         \
402         if (HasRegisterInput(instr, 0)) {                        \
403           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
404         } else {                                                 \
405           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
406         }                                                        \
407       } else {                                                   \
408         if (HasRegisterInput(instr, 1)) {                        \
409           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
410         } else {                                                 \
411           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
412         }                                                        \
413       }                                                          \
414     }                                                            \
415   } while (false)
416 
417 #define ASSEMBLE_COMPARE(asm_instr)                              \
418   do {                                                           \
419     if (HasAddressingMode(instr)) {                              \
420       size_t index = 0;                                          \
421       Operand left = i.MemoryOperand(&index);                    \
422       if (HasImmediateInput(instr, index)) {                     \
423         __ asm_instr(left, i.InputImmediate(index));             \
424       } else {                                                   \
425         __ asm_instr(left, i.InputRegister(index));              \
426       }                                                          \
427     } else {                                                     \
428       if (HasImmediateInput(instr, 1)) {                         \
429         if (HasRegisterInput(instr, 0)) {                        \
430           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
431         } else {                                                 \
432           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
433         }                                                        \
434       } else {                                                   \
435         if (HasRegisterInput(instr, 1)) {                        \
436           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
437         } else {                                                 \
438           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
439         }                                                        \
440       }                                                          \
441     }                                                            \
442   } while (false)
443 
444 #define ASSEMBLE_MULT(asm_instr)                              \
445   do {                                                        \
446     if (HasImmediateInput(instr, 1)) {                        \
447       if (HasRegisterInput(instr, 0)) {                       \
448         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
449                      i.InputImmediate(1));                    \
450       } else {                                                \
451         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
452                      i.InputImmediate(1));                    \
453       }                                                       \
454     } else {                                                  \
455       if (HasRegisterInput(instr, 1)) {                       \
456         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
457       } else {                                                \
458         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
459       }                                                       \
460     }                                                         \
461   } while (false)
462 
463 #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
464   do {                                                                     \
465     if (HasImmediateInput(instr, 1)) {                                     \
466       if (instr->Output()->IsRegister()) {                                 \
467         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
468       } else {                                                             \
469         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
470       }                                                                    \
471     } else {                                                               \
472       if (instr->Output()->IsRegister()) {                                 \
473         __ asm_instr##_cl(i.OutputRegister());                             \
474       } else {                                                             \
475         __ asm_instr##_cl(i.OutputOperand());                              \
476       }                                                                    \
477     }                                                                      \
478   } while (false)
479 
480 #define ASSEMBLE_MOVX(asm_instr)                            \
481   do {                                                      \
482     if (HasAddressingMode(instr)) {                         \
483       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
484     } else if (HasRegisterInput(instr, 0)) {                \
485       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
486     } else {                                                \
487       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
488     }                                                       \
489   } while (false)
490 
491 #define ASSEMBLE_SSE_BINOP(asm_instr)                                   \
492   do {                                                                  \
493     if (instr->InputAt(1)->IsFPRegister()) {                            \
494       __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
495     } else {                                                            \
496       __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
497     }                                                                   \
498   } while (false)
499 
500 #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
501   do {                                                                  \
502     if (instr->InputAt(0)->IsFPRegister()) {                            \
503       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
504     } else {                                                            \
505       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
506     }                                                                   \
507   } while (false)
508 
509 #define ASSEMBLE_AVX_BINOP(asm_instr)                                  \
510   do {                                                                 \
511     CpuFeatureScope avx_scope(tasm(), AVX);                            \
512     if (instr->InputAt(1)->IsFPRegister()) {                           \
513       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
514                    i.InputDoubleRegister(1));                          \
515     } else {                                                           \
516       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
517                    i.InputOperand(1));                                 \
518     }                                                                  \
519   } while (false)
520 
521 #define ASSEMBLE_IEEE754_BINOP(name)                                     \
522   do {                                                                   \
523     __ PrepareCallCFunction(2);                                          \
524     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
525   } while (false)
526 
527 #define ASSEMBLE_IEEE754_UNOP(name)                                      \
528   do {                                                                   \
529     __ PrepareCallCFunction(1);                                          \
530     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
531   } while (false)
532 
533 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
534   do {                                                          \
535     Label binop;                                                \
536     __ bind(&binop);                                            \
537     __ mov_inst(rax, i.MemoryOperand(1));                       \
538     __ movl(i.TempRegister(0), rax);                            \
539     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
540     __ lock();                                                  \
541     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
542     __ j(not_equal, &binop);                                    \
543   } while (false)
544 
545 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
546   do {                                                            \
547     Label binop;                                                  \
548     __ bind(&binop);                                              \
549     __ mov_inst(rax, i.MemoryOperand(1));                         \
550     __ movq(i.TempRegister(0), rax);                              \
551     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
552     __ lock();                                                    \
553     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
554     __ j(not_equal, &binop);                                      \
555   } while (false)
556 
557 #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
558   do {                                                       \
559     if (instr->InputAt(index)->IsSimd128Register()) {        \
560       __ opcode(dst_operand, i.InputSimd128Register(index)); \
561     } else {                                                 \
562       __ opcode(dst_operand, i.InputOperand(index));         \
563     }                                                        \
564   } while (false)
565 
566 #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
567   do {                                                            \
568     if (instr->InputAt(index)->IsSimd128Register()) {             \
569       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
570     } else {                                                      \
571       __ opcode(dst_operand, i.InputOperand(index), imm);         \
572     }                                                             \
573   } while (false)
574 
575 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)             \
576   do {                                                   \
577     XMMRegister dst = i.OutputSimd128Register();         \
578     DCHECK_EQ(dst, i.InputSimd128Register(0));           \
579     byte input_index = instr->InputCount() == 2 ? 1 : 0; \
580     ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);       \
581   } while (false)
582 
583 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm)                            \
584   do {                                                                    \
585     DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));      \
586     __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1), imm); \
587   } while (false)
588 
589 #define ASSEMBLE_SIMD_ALL_TRUE(opcode)          \
590   do {                                          \
591     CpuFeatureScope sse_scope(tasm(), SSE4_1);  \
592     Register dst = i.OutputRegister();          \
593     XMMRegister tmp = i.TempSimd128Register(0); \
594     __ xorq(dst, dst);                          \
595     __ Pxor(tmp, tmp);                          \
596     __ opcode(tmp, i.InputSimd128Register(0));  \
597     __ Ptest(tmp, tmp);                         \
598     __ setcc(equal, dst);                       \
599   } while (false)
600 
601 // This macro will directly emit the opcode if the shift is an immediate - the
602 // shift value will be taken modulo 2^width. Otherwise, it will emit code to
603 // perform the modulus operation.
604 #define ASSEMBLE_SIMD_SHIFT(opcode, width)                     \
605   do {                                                         \
606     XMMRegister dst = i.OutputSimd128Register();               \
607     DCHECK_EQ(dst, i.InputSimd128Register(0));                 \
608     if (HasImmediateInput(instr, 1)) {                         \
609       __ opcode(dst, static_cast<byte>(i.InputInt##width(1))); \
610     } else {                                                   \
611       XMMRegister tmp = i.TempSimd128Register(0);              \
612       Register shift = i.InputRegister(1);                     \
613       constexpr int mask = (1 << width) - 1;                   \
614       __ andq(shift, Immediate(mask));                         \
615       __ Movq(tmp, shift);                                     \
616       __ opcode(dst, tmp);                                     \
617     }                                                          \
618   } while (false)
619 
AssembleDeconstructFrame()620 void CodeGenerator::AssembleDeconstructFrame() {
621   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
622   __ movq(rsp, rbp);
623   __ popq(rbp);
624 }
625 
AssemblePrepareTailCall()626 void CodeGenerator::AssemblePrepareTailCall() {
627   if (frame_access_state()->has_frame()) {
628     __ movq(rbp, MemOperand(rbp, 0));
629   }
630   frame_access_state()->SetFrameAccessToSP();
631 }
632 
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)633 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
634                                                      Register scratch1,
635                                                      Register scratch2,
636                                                      Register scratch3) {
637   DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
638   Label done;
639 
640   // Check if current frame is an arguments adaptor frame.
641   __ cmpq(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
642           Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
643   __ j(not_equal, &done, Label::kNear);
644 
645   // Load arguments count from current arguments adaptor frame (note, it
646   // does not include receiver).
647   Register caller_args_count_reg = scratch1;
648   __ SmiUntag(caller_args_count_reg,
649               Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
650 
651   __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
652   __ bind(&done);
653 }
654 
655 namespace {
656 
AdjustStackPointerForTailCall(TurboAssembler * assembler,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)657 void AdjustStackPointerForTailCall(TurboAssembler* assembler,
658                                    FrameAccessState* state,
659                                    int new_slot_above_sp,
660                                    bool allow_shrinkage = true) {
661   int current_sp_offset = state->GetSPToFPSlotCount() +
662                           StandardFrameConstants::kFixedSlotCountAboveFp;
663   int stack_slot_delta = new_slot_above_sp - current_sp_offset;
664   if (stack_slot_delta > 0) {
665     assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
666     state->IncreaseSPDelta(stack_slot_delta);
667   } else if (allow_shrinkage && stack_slot_delta < 0) {
668     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
669     state->IncreaseSPDelta(stack_slot_delta);
670   }
671 }
672 
SetupShuffleMaskOnStack(TurboAssembler * assembler,uint32_t * mask)673 void SetupShuffleMaskOnStack(TurboAssembler* assembler, uint32_t* mask) {
674   int64_t shuffle_mask = (mask[2]) | (static_cast<uint64_t>(mask[3]) << 32);
675   assembler->movq(kScratchRegister, shuffle_mask);
676   assembler->Push(kScratchRegister);
677   shuffle_mask = (mask[0]) | (static_cast<uint64_t>(mask[1]) << 32);
678   assembler->movq(kScratchRegister, shuffle_mask);
679   assembler->Push(kScratchRegister);
680 }
681 
682 }  // namespace
683 
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)684 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
685                                               int first_unused_stack_slot) {
686   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
687   ZoneVector<MoveOperands*> pushes(zone());
688   GetPushCompatibleMoves(instr, flags, &pushes);
689 
690   if (!pushes.empty() &&
691       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
692        first_unused_stack_slot)) {
693     X64OperandConverter g(this, instr);
694     for (auto move : pushes) {
695       LocationOperand destination_location(
696           LocationOperand::cast(move->destination()));
697       InstructionOperand source(move->source());
698       AdjustStackPointerForTailCall(tasm(), frame_access_state(),
699                                     destination_location.index());
700       if (source.IsStackSlot()) {
701         LocationOperand source_location(LocationOperand::cast(source));
702         __ Push(g.SlotToOperand(source_location.index()));
703       } else if (source.IsRegister()) {
704         LocationOperand source_location(LocationOperand::cast(source));
705         __ Push(source_location.GetRegister());
706       } else if (source.IsImmediate()) {
707         __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
708       } else {
709         // Pushes of non-scalar data types is not supported.
710         UNIMPLEMENTED();
711       }
712       frame_access_state()->IncreaseSPDelta(1);
713       move->Eliminate();
714     }
715   }
716   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
717                                 first_unused_stack_slot, false);
718 }
719 
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)720 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
721                                              int first_unused_stack_slot) {
722   AdjustStackPointerForTailCall(tasm(), frame_access_state(),
723                                 first_unused_stack_slot);
724 }
725 
726 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()727 void CodeGenerator::AssembleCodeStartRegisterCheck() {
728   __ ComputeCodeStartAddress(rbx);
729   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
730   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
731 }
732 
733 // Check if the code object is marked for deoptimization. If it is, then it
734 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
735 // to:
736 //    1. read from memory the word that contains that bit, which can be found in
737 //       the flags in the referenced {CodeDataContainer} object;
738 //    2. test kMarkedForDeoptimizationBit in those flags; and
739 //    3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()740 void CodeGenerator::BailoutIfDeoptimized() {
741   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
742   __ LoadTaggedPointerField(rbx,
743                             Operand(kJavaScriptCallCodeStartRegister, offset));
744   __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
745            Immediate(1 << Code::kMarkedForDeoptimizationBit));
746   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
747           RelocInfo::CODE_TARGET, not_zero);
748 }
749 
GenerateSpeculationPoisonFromCodeStartRegister()750 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
751   // Set a mask which has all bits set in the normal case, but has all
752   // bits cleared if we are speculatively executing the wrong PC.
753   __ ComputeCodeStartAddress(rbx);
754   __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
755   __ cmpq(kJavaScriptCallCodeStartRegister, rbx);
756   __ movq(rbx, Immediate(-1));
757   __ cmovq(equal, kSpeculationPoisonRegister, rbx);
758 }
759 
AssembleRegisterArgumentPoisoning()760 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
761   __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
762   __ andq(kContextRegister, kSpeculationPoisonRegister);
763   __ andq(rsp, kSpeculationPoisonRegister);
764 }
765 
766 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)767 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
768     Instruction* instr) {
769   X64OperandConverter i(this, instr);
770   InstructionCode opcode = instr->opcode();
771   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
772   switch (arch_opcode) {
773     case kArchCallCodeObject: {
774       if (HasImmediateInput(instr, 0)) {
775         Handle<Code> code = i.InputCode(0);
776         __ Call(code, RelocInfo::CODE_TARGET);
777       } else {
778         Register reg = i.InputRegister(0);
779         DCHECK_IMPLIES(
780             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
781             reg == kJavaScriptCallCodeStartRegister);
782         __ LoadCodeObjectEntry(reg, reg);
783         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
784           __ RetpolineCall(reg);
785         } else {
786           __ call(reg);
787         }
788       }
789       RecordCallPosition(instr);
790       frame_access_state()->ClearSPDelta();
791       break;
792     }
793     case kArchCallBuiltinPointer: {
794       DCHECK(!HasImmediateInput(instr, 0));
795       Register builtin_index = i.InputRegister(0);
796       __ CallBuiltinByIndex(builtin_index);
797       RecordCallPosition(instr);
798       frame_access_state()->ClearSPDelta();
799       break;
800     }
801     case kArchCallWasmFunction: {
802       if (HasImmediateInput(instr, 0)) {
803         Constant constant = i.ToConstant(instr->InputAt(0));
804         Address wasm_code = static_cast<Address>(constant.ToInt64());
805         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
806           __ near_call(wasm_code, constant.rmode());
807         } else {
808           if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
809             __ RetpolineCall(wasm_code, constant.rmode());
810           } else {
811             __ Call(wasm_code, constant.rmode());
812           }
813         }
814       } else {
815         Register reg = i.InputRegister(0);
816         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
817           __ RetpolineCall(reg);
818         } else {
819           __ call(reg);
820         }
821       }
822       RecordCallPosition(instr);
823       frame_access_state()->ClearSPDelta();
824       break;
825     }
826     case kArchTailCallCodeObjectFromJSFunction:
827     case kArchTailCallCodeObject: {
828       if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
829         AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
830                                          i.TempRegister(0), i.TempRegister(1),
831                                          i.TempRegister(2));
832       }
833       if (HasImmediateInput(instr, 0)) {
834         Handle<Code> code = i.InputCode(0);
835         __ Jump(code, RelocInfo::CODE_TARGET);
836       } else {
837         Register reg = i.InputRegister(0);
838         DCHECK_IMPLIES(
839             HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
840             reg == kJavaScriptCallCodeStartRegister);
841         __ LoadCodeObjectEntry(reg, reg);
842         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
843           __ RetpolineJump(reg);
844         } else {
845           __ jmp(reg);
846         }
847       }
848       unwinding_info_writer_.MarkBlockWillExit();
849       frame_access_state()->ClearSPDelta();
850       frame_access_state()->SetFrameAccessToDefault();
851       break;
852     }
853     case kArchTailCallWasm: {
854       if (HasImmediateInput(instr, 0)) {
855         Constant constant = i.ToConstant(instr->InputAt(0));
856         Address wasm_code = static_cast<Address>(constant.ToInt64());
857         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
858           __ near_jmp(wasm_code, constant.rmode());
859         } else {
860           __ Move(kScratchRegister, wasm_code, constant.rmode());
861           __ jmp(kScratchRegister);
862         }
863       } else {
864         Register reg = i.InputRegister(0);
865         if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
866           __ RetpolineJump(reg);
867         } else {
868           __ jmp(reg);
869         }
870       }
871       unwinding_info_writer_.MarkBlockWillExit();
872       frame_access_state()->ClearSPDelta();
873       frame_access_state()->SetFrameAccessToDefault();
874       break;
875     }
876     case kArchTailCallAddress: {
877       CHECK(!HasImmediateInput(instr, 0));
878       Register reg = i.InputRegister(0);
879       DCHECK_IMPLIES(
880           HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
881           reg == kJavaScriptCallCodeStartRegister);
882       if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
883         __ RetpolineJump(reg);
884       } else {
885         __ jmp(reg);
886       }
887       unwinding_info_writer_.MarkBlockWillExit();
888       frame_access_state()->ClearSPDelta();
889       frame_access_state()->SetFrameAccessToDefault();
890       break;
891     }
892     case kArchCallJSFunction: {
893       Register func = i.InputRegister(0);
894       if (FLAG_debug_code) {
895         // Check the function's context matches the context argument.
896         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
897         __ Assert(equal, AbortReason::kWrongFunctionContext);
898       }
899       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
900       __ LoadTaggedPointerField(rcx,
901                                 FieldOperand(func, JSFunction::kCodeOffset));
902       __ CallCodeObject(rcx);
903       frame_access_state()->ClearSPDelta();
904       RecordCallPosition(instr);
905       break;
906     }
907     case kArchPrepareCallCFunction: {
908       // Frame alignment requires using FP-relative frame addressing.
909       frame_access_state()->SetFrameAccessToFP();
910       int const num_parameters = MiscField::decode(instr->opcode());
911       __ PrepareCallCFunction(num_parameters);
912       break;
913     }
914     case kArchSaveCallerRegisters: {
915       fp_mode_ =
916           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
917       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
918       // kReturnRegister0 should have been saved before entering the stub.
919       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
920       DCHECK(IsAligned(bytes, kSystemPointerSize));
921       DCHECK_EQ(0, frame_access_state()->sp_delta());
922       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
923       DCHECK(!caller_registers_saved_);
924       caller_registers_saved_ = true;
925       break;
926     }
927     case kArchRestoreCallerRegisters: {
928       DCHECK(fp_mode_ ==
929              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
930       DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
931       // Don't overwrite the returned value.
932       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
933       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
934       DCHECK_EQ(0, frame_access_state()->sp_delta());
935       DCHECK(caller_registers_saved_);
936       caller_registers_saved_ = false;
937       break;
938     }
939     case kArchPrepareTailCall:
940       AssemblePrepareTailCall();
941       break;
942     case kArchCallCFunction: {
943       int const num_parameters = MiscField::decode(instr->opcode());
944       Label return_location;
945       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
946         // Put the return address in a stack slot.
947         __ leaq(kScratchRegister, Operand(&return_location, 0));
948         __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
949                 kScratchRegister);
950       }
951       if (HasImmediateInput(instr, 0)) {
952         ExternalReference ref = i.InputExternalReference(0);
953         __ CallCFunction(ref, num_parameters);
954       } else {
955         Register func = i.InputRegister(0);
956         __ CallCFunction(func, num_parameters);
957       }
958       __ bind(&return_location);
959       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
960         RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
961       }
962       frame_access_state()->SetFrameAccessToDefault();
963       // Ideally, we should decrement SP delta to match the change of stack
964       // pointer in CallCFunction. However, for certain architectures (e.g.
965       // ARM), there may be more strict alignment requirement, causing old SP
966       // to be saved on the stack. In those cases, we can not calculate the SP
967       // delta statically.
968       frame_access_state()->ClearSPDelta();
969       if (caller_registers_saved_) {
970         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
971         // Here, we assume the sequence to be:
972         //   kArchSaveCallerRegisters;
973         //   kArchCallCFunction;
974         //   kArchRestoreCallerRegisters;
975         int bytes =
976             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
977         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
978       }
979       // TODO(tebbi): Do we need an lfence here?
980       break;
981     }
982     case kArchJmp:
983       AssembleArchJump(i.InputRpo(0));
984       break;
985     case kArchBinarySearchSwitch:
986       AssembleArchBinarySearchSwitch(instr);
987       break;
988     case kArchTableSwitch:
989       AssembleArchTableSwitch(instr);
990       break;
991     case kArchComment:
992       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
993       break;
994     case kArchAbortCSAAssert:
995       DCHECK(i.InputRegister(0) == rdx);
996       {
997         // We don't actually want to generate a pile of code for this, so just
998         // claim there is a stack frame, without generating one.
999         FrameScope scope(tasm(), StackFrame::NONE);
1000         __ Call(
1001             isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
1002             RelocInfo::CODE_TARGET);
1003       }
1004       __ int3();
1005       unwinding_info_writer_.MarkBlockWillExit();
1006       break;
1007     case kArchDebugBreak:
1008       __ DebugBreak();
1009       break;
1010     case kArchThrowTerminator:
1011       unwinding_info_writer_.MarkBlockWillExit();
1012       break;
1013     case kArchNop:
1014       // don't emit code for nops.
1015       break;
1016     case kArchDeoptimize: {
1017       DeoptimizationExit* exit =
1018           BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
1019       CodeGenResult result = AssembleDeoptimizerCall(exit);
1020       if (result != kSuccess) return result;
1021       unwinding_info_writer_.MarkBlockWillExit();
1022       break;
1023     }
1024     case kArchRet:
1025       AssembleReturn(instr->InputAt(0));
1026       break;
1027     case kArchFramePointer:
1028       __ movq(i.OutputRegister(), rbp);
1029       break;
1030     case kArchParentFramePointer:
1031       if (frame_access_state()->has_frame()) {
1032         __ movq(i.OutputRegister(), Operand(rbp, 0));
1033       } else {
1034         __ movq(i.OutputRegister(), rbp);
1035       }
1036       break;
1037     case kArchStackPointerGreaterThan: {
1038       // Potentially apply an offset to the current stack pointer before the
1039       // comparison to consider the size difference of an optimized frame versus
1040       // the contained unoptimized frames.
1041 
1042       Register lhs_register = rsp;
1043       uint32_t offset;
1044 
1045       if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
1046         lhs_register = kScratchRegister;
1047         __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1048       }
1049 
1050       constexpr size_t kValueIndex = 0;
1051       if (HasAddressingMode(instr)) {
1052         __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1053       } else {
1054         __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1055       }
1056       break;
1057     }
1058     case kArchStackCheckOffset:
1059       __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1060       break;
1061     case kArchTruncateDoubleToI: {
1062       auto result = i.OutputRegister();
1063       auto input = i.InputDoubleRegister(0);
1064       auto ool = new (zone()) OutOfLineTruncateDoubleToI(
1065           this, result, input, DetermineStubCallMode(),
1066           &unwinding_info_writer_);
1067       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1068       // use of Cvttsd2siq requires the movl below to avoid sign extension.
1069       __ Cvttsd2siq(result, input);
1070       __ cmpq(result, Immediate(1));
1071       __ j(overflow, ool->entry());
1072       __ bind(ool->exit());
1073       __ movl(result, result);
1074       break;
1075     }
1076     case kArchStoreWithWriteBarrier: {
1077       RecordWriteMode mode =
1078           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1079       Register object = i.InputRegister(0);
1080       size_t index = 0;
1081       Operand operand = i.MemoryOperand(&index);
1082       Register value = i.InputRegister(index);
1083       Register scratch0 = i.TempRegister(0);
1084       Register scratch1 = i.TempRegister(1);
1085       auto ool = new (zone())
1086           OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
1087                                mode, DetermineStubCallMode());
1088       __ StoreTaggedField(operand, value);
1089       __ CheckPageFlag(object, scratch0,
1090                        MemoryChunk::kPointersFromHereAreInterestingMask,
1091                        not_zero, ool->entry());
1092       __ bind(ool->exit());
1093       break;
1094     }
1095     case kArchWordPoisonOnSpeculation:
1096       DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
1097       __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
1098       break;
1099     case kX64MFence:
1100       __ mfence();
1101       break;
1102     case kX64LFence:
1103       __ lfence();
1104       break;
1105     case kArchStackSlot: {
1106       FrameOffset offset =
1107           frame_access_state()->GetFrameOffset(i.InputInt32(0));
1108       Register base = offset.from_stack_pointer() ? rsp : rbp;
1109       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1110       break;
1111     }
1112     case kIeee754Float64Acos:
1113       ASSEMBLE_IEEE754_UNOP(acos);
1114       break;
1115     case kIeee754Float64Acosh:
1116       ASSEMBLE_IEEE754_UNOP(acosh);
1117       break;
1118     case kIeee754Float64Asin:
1119       ASSEMBLE_IEEE754_UNOP(asin);
1120       break;
1121     case kIeee754Float64Asinh:
1122       ASSEMBLE_IEEE754_UNOP(asinh);
1123       break;
1124     case kIeee754Float64Atan:
1125       ASSEMBLE_IEEE754_UNOP(atan);
1126       break;
1127     case kIeee754Float64Atanh:
1128       ASSEMBLE_IEEE754_UNOP(atanh);
1129       break;
1130     case kIeee754Float64Atan2:
1131       ASSEMBLE_IEEE754_BINOP(atan2);
1132       break;
1133     case kIeee754Float64Cbrt:
1134       ASSEMBLE_IEEE754_UNOP(cbrt);
1135       break;
1136     case kIeee754Float64Cos:
1137       ASSEMBLE_IEEE754_UNOP(cos);
1138       break;
1139     case kIeee754Float64Cosh:
1140       ASSEMBLE_IEEE754_UNOP(cosh);
1141       break;
1142     case kIeee754Float64Exp:
1143       ASSEMBLE_IEEE754_UNOP(exp);
1144       break;
1145     case kIeee754Float64Expm1:
1146       ASSEMBLE_IEEE754_UNOP(expm1);
1147       break;
1148     case kIeee754Float64Log:
1149       ASSEMBLE_IEEE754_UNOP(log);
1150       break;
1151     case kIeee754Float64Log1p:
1152       ASSEMBLE_IEEE754_UNOP(log1p);
1153       break;
1154     case kIeee754Float64Log2:
1155       ASSEMBLE_IEEE754_UNOP(log2);
1156       break;
1157     case kIeee754Float64Log10:
1158       ASSEMBLE_IEEE754_UNOP(log10);
1159       break;
1160     case kIeee754Float64Pow:
1161       ASSEMBLE_IEEE754_BINOP(pow);
1162       break;
1163     case kIeee754Float64Sin:
1164       ASSEMBLE_IEEE754_UNOP(sin);
1165       break;
1166     case kIeee754Float64Sinh:
1167       ASSEMBLE_IEEE754_UNOP(sinh);
1168       break;
1169     case kIeee754Float64Tan:
1170       ASSEMBLE_IEEE754_UNOP(tan);
1171       break;
1172     case kIeee754Float64Tanh:
1173       ASSEMBLE_IEEE754_UNOP(tanh);
1174       break;
1175     case kX64Add32:
1176       ASSEMBLE_BINOP(addl);
1177       break;
1178     case kX64Add:
1179       ASSEMBLE_BINOP(addq);
1180       break;
1181     case kX64Sub32:
1182       ASSEMBLE_BINOP(subl);
1183       break;
1184     case kX64Sub:
1185       ASSEMBLE_BINOP(subq);
1186       break;
1187     case kX64And32:
1188       ASSEMBLE_BINOP(andl);
1189       break;
1190     case kX64And:
1191       ASSEMBLE_BINOP(andq);
1192       break;
1193     case kX64Cmp8:
1194       ASSEMBLE_COMPARE(cmpb);
1195       break;
1196     case kX64Cmp16:
1197       ASSEMBLE_COMPARE(cmpw);
1198       break;
1199     case kX64Cmp32:
1200       ASSEMBLE_COMPARE(cmpl);
1201       break;
1202     case kX64Cmp:
1203       ASSEMBLE_COMPARE(cmpq);
1204       break;
1205     case kX64Test8:
1206       ASSEMBLE_COMPARE(testb);
1207       break;
1208     case kX64Test16:
1209       ASSEMBLE_COMPARE(testw);
1210       break;
1211     case kX64Test32:
1212       ASSEMBLE_COMPARE(testl);
1213       break;
1214     case kX64Test:
1215       ASSEMBLE_COMPARE(testq);
1216       break;
1217     case kX64Imul32:
1218       ASSEMBLE_MULT(imull);
1219       break;
1220     case kX64Imul:
1221       ASSEMBLE_MULT(imulq);
1222       break;
1223     case kX64ImulHigh32:
1224       if (HasRegisterInput(instr, 1)) {
1225         __ imull(i.InputRegister(1));
1226       } else {
1227         __ imull(i.InputOperand(1));
1228       }
1229       break;
1230     case kX64UmulHigh32:
1231       if (HasRegisterInput(instr, 1)) {
1232         __ mull(i.InputRegister(1));
1233       } else {
1234         __ mull(i.InputOperand(1));
1235       }
1236       break;
1237     case kX64Idiv32:
1238       __ cdq();
1239       __ idivl(i.InputRegister(1));
1240       break;
1241     case kX64Idiv:
1242       __ cqo();
1243       __ idivq(i.InputRegister(1));
1244       break;
1245     case kX64Udiv32:
1246       __ xorl(rdx, rdx);
1247       __ divl(i.InputRegister(1));
1248       break;
1249     case kX64Udiv:
1250       __ xorq(rdx, rdx);
1251       __ divq(i.InputRegister(1));
1252       break;
1253     case kX64Not:
1254       ASSEMBLE_UNOP(notq);
1255       break;
1256     case kX64Not32:
1257       ASSEMBLE_UNOP(notl);
1258       break;
1259     case kX64Neg:
1260       ASSEMBLE_UNOP(negq);
1261       break;
1262     case kX64Neg32:
1263       ASSEMBLE_UNOP(negl);
1264       break;
1265     case kX64Or32:
1266       ASSEMBLE_BINOP(orl);
1267       break;
1268     case kX64Or:
1269       ASSEMBLE_BINOP(orq);
1270       break;
1271     case kX64Xor32:
1272       ASSEMBLE_BINOP(xorl);
1273       break;
1274     case kX64Xor:
1275       ASSEMBLE_BINOP(xorq);
1276       break;
1277     case kX64Shl32:
1278       ASSEMBLE_SHIFT(shll, 5);
1279       break;
1280     case kX64Shl:
1281       ASSEMBLE_SHIFT(shlq, 6);
1282       break;
1283     case kX64Shr32:
1284       ASSEMBLE_SHIFT(shrl, 5);
1285       break;
1286     case kX64Shr:
1287       ASSEMBLE_SHIFT(shrq, 6);
1288       break;
1289     case kX64Sar32:
1290       ASSEMBLE_SHIFT(sarl, 5);
1291       break;
1292     case kX64Sar:
1293       ASSEMBLE_SHIFT(sarq, 6);
1294       break;
1295     case kX64Ror32:
1296       ASSEMBLE_SHIFT(rorl, 5);
1297       break;
1298     case kX64Ror:
1299       ASSEMBLE_SHIFT(rorq, 6);
1300       break;
1301     case kX64Lzcnt:
1302       if (HasRegisterInput(instr, 0)) {
1303         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1304       } else {
1305         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1306       }
1307       break;
1308     case kX64Lzcnt32:
1309       if (HasRegisterInput(instr, 0)) {
1310         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1311       } else {
1312         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1313       }
1314       break;
1315     case kX64Tzcnt:
1316       if (HasRegisterInput(instr, 0)) {
1317         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1318       } else {
1319         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1320       }
1321       break;
1322     case kX64Tzcnt32:
1323       if (HasRegisterInput(instr, 0)) {
1324         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1325       } else {
1326         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1327       }
1328       break;
1329     case kX64Popcnt:
1330       if (HasRegisterInput(instr, 0)) {
1331         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1332       } else {
1333         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1334       }
1335       break;
1336     case kX64Popcnt32:
1337       if (HasRegisterInput(instr, 0)) {
1338         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1339       } else {
1340         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1341       }
1342       break;
1343     case kX64Bswap:
1344       __ bswapq(i.OutputRegister());
1345       break;
1346     case kX64Bswap32:
1347       __ bswapl(i.OutputRegister());
1348       break;
1349     case kSSEFloat32Cmp:
1350       ASSEMBLE_SSE_BINOP(Ucomiss);
1351       break;
1352     case kSSEFloat32Add:
1353       ASSEMBLE_SSE_BINOP(addss);
1354       break;
1355     case kSSEFloat32Sub:
1356       ASSEMBLE_SSE_BINOP(subss);
1357       break;
1358     case kSSEFloat32Mul:
1359       ASSEMBLE_SSE_BINOP(mulss);
1360       break;
1361     case kSSEFloat32Div:
1362       ASSEMBLE_SSE_BINOP(divss);
1363       // Don't delete this mov. It may improve performance on some CPUs,
1364       // when there is a (v)mulss depending on the result.
1365       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1366       break;
1367     case kSSEFloat32Abs: {
1368       // TODO(bmeurer): Use RIP relative 128-bit constants.
1369       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1370       __ Pcmpeqd(tmp, tmp);
1371       __ Psrlq(tmp, 33);
1372       __ Andps(i.OutputDoubleRegister(), tmp);
1373       break;
1374     }
1375     case kSSEFloat32Neg: {
1376       // TODO(bmeurer): Use RIP relative 128-bit constants.
1377       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1378       __ Pcmpeqd(tmp, tmp);
1379       __ Psllq(tmp, 31);
1380       __ Xorps(i.OutputDoubleRegister(), tmp);
1381       break;
1382     }
1383     case kSSEFloat32Sqrt:
1384       ASSEMBLE_SSE_UNOP(sqrtss);
1385       break;
1386     case kSSEFloat32ToFloat64:
1387       ASSEMBLE_SSE_UNOP(Cvtss2sd);
1388       break;
1389     case kSSEFloat32Round: {
1390       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1391       RoundingMode const mode =
1392           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1393       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1394       break;
1395     }
1396     case kSSEFloat32ToInt32:
1397       if (instr->InputAt(0)->IsFPRegister()) {
1398         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1399       } else {
1400         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1401       }
1402       break;
1403     case kSSEFloat32ToUint32: {
1404       if (instr->InputAt(0)->IsFPRegister()) {
1405         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1406       } else {
1407         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1408       }
1409       break;
1410     }
1411     case kSSEFloat64Cmp:
1412       ASSEMBLE_SSE_BINOP(Ucomisd);
1413       break;
1414     case kSSEFloat64Add:
1415       ASSEMBLE_SSE_BINOP(addsd);
1416       break;
1417     case kSSEFloat64Sub:
1418       ASSEMBLE_SSE_BINOP(subsd);
1419       break;
1420     case kSSEFloat64Mul:
1421       ASSEMBLE_SSE_BINOP(mulsd);
1422       break;
1423     case kSSEFloat64Div:
1424       ASSEMBLE_SSE_BINOP(divsd);
1425       // Don't delete this mov. It may improve performance on some CPUs,
1426       // when there is a (v)mulsd depending on the result.
1427       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1428       break;
1429     case kSSEFloat64Mod: {
1430       __ AllocateStackSpace(kDoubleSize);
1431       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1432                                                        kDoubleSize);
1433       // Move values to st(0) and st(1).
1434       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1435       __ fld_d(Operand(rsp, 0));
1436       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1437       __ fld_d(Operand(rsp, 0));
1438       // Loop while fprem isn't done.
1439       Label mod_loop;
1440       __ bind(&mod_loop);
1441       // This instructions traps on all kinds inputs, but we are assuming the
1442       // floating point control word is set to ignore them all.
1443       __ fprem();
1444       // The following 2 instruction implicitly use rax.
1445       __ fnstsw_ax();
1446       if (CpuFeatures::IsSupported(SAHF)) {
1447         CpuFeatureScope sahf_scope(tasm(), SAHF);
1448         __ sahf();
1449       } else {
1450         __ shrl(rax, Immediate(8));
1451         __ andl(rax, Immediate(0xFF));
1452         __ pushq(rax);
1453         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1454                                                          kSystemPointerSize);
1455         __ popfq();
1456         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1457                                                          -kSystemPointerSize);
1458       }
1459       __ j(parity_even, &mod_loop);
1460       // Move output to stack and clean up.
1461       __ fstp(1);
1462       __ fstp_d(Operand(rsp, 0));
1463       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1464       __ addq(rsp, Immediate(kDoubleSize));
1465       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1466                                                        -kDoubleSize);
1467       break;
1468     }
1469     case kSSEFloat32Max: {
1470       Label compare_swap, done_compare;
1471       if (instr->InputAt(1)->IsFPRegister()) {
1472         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1473       } else {
1474         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1475       }
1476       auto ool =
1477           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1478       __ j(parity_even, ool->entry());
1479       __ j(above, &done_compare, Label::kNear);
1480       __ j(below, &compare_swap, Label::kNear);
1481       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1482       __ testl(kScratchRegister, Immediate(1));
1483       __ j(zero, &done_compare, Label::kNear);
1484       __ bind(&compare_swap);
1485       if (instr->InputAt(1)->IsFPRegister()) {
1486         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1487       } else {
1488         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1489       }
1490       __ bind(&done_compare);
1491       __ bind(ool->exit());
1492       break;
1493     }
1494     case kSSEFloat32Min: {
1495       Label compare_swap, done_compare;
1496       if (instr->InputAt(1)->IsFPRegister()) {
1497         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1498       } else {
1499         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1500       }
1501       auto ool =
1502           new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1503       __ j(parity_even, ool->entry());
1504       __ j(below, &done_compare, Label::kNear);
1505       __ j(above, &compare_swap, Label::kNear);
1506       if (instr->InputAt(1)->IsFPRegister()) {
1507         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1508       } else {
1509         __ Movss(kScratchDoubleReg, i.InputOperand(1));
1510         __ Movmskps(kScratchRegister, kScratchDoubleReg);
1511       }
1512       __ testl(kScratchRegister, Immediate(1));
1513       __ j(zero, &done_compare, Label::kNear);
1514       __ bind(&compare_swap);
1515       if (instr->InputAt(1)->IsFPRegister()) {
1516         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1517       } else {
1518         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1519       }
1520       __ bind(&done_compare);
1521       __ bind(ool->exit());
1522       break;
1523     }
1524     case kSSEFloat64Max: {
1525       Label compare_swap, done_compare;
1526       if (instr->InputAt(1)->IsFPRegister()) {
1527         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1528       } else {
1529         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1530       }
1531       auto ool =
1532           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1533       __ j(parity_even, ool->entry());
1534       __ j(above, &done_compare, Label::kNear);
1535       __ j(below, &compare_swap, Label::kNear);
1536       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1537       __ testl(kScratchRegister, Immediate(1));
1538       __ j(zero, &done_compare, Label::kNear);
1539       __ bind(&compare_swap);
1540       if (instr->InputAt(1)->IsFPRegister()) {
1541         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1542       } else {
1543         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1544       }
1545       __ bind(&done_compare);
1546       __ bind(ool->exit());
1547       break;
1548     }
1549     case kSSEFloat64Min: {
1550       Label compare_swap, done_compare;
1551       if (instr->InputAt(1)->IsFPRegister()) {
1552         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1553       } else {
1554         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1555       }
1556       auto ool =
1557           new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1558       __ j(parity_even, ool->entry());
1559       __ j(below, &done_compare, Label::kNear);
1560       __ j(above, &compare_swap, Label::kNear);
1561       if (instr->InputAt(1)->IsFPRegister()) {
1562         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1563       } else {
1564         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1565         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1566       }
1567       __ testl(kScratchRegister, Immediate(1));
1568       __ j(zero, &done_compare, Label::kNear);
1569       __ bind(&compare_swap);
1570       if (instr->InputAt(1)->IsFPRegister()) {
1571         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1572       } else {
1573         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1574       }
1575       __ bind(&done_compare);
1576       __ bind(ool->exit());
1577       break;
1578     }
1579     case kX64F64x2Abs:
1580     case kSSEFloat64Abs: {
1581       // TODO(bmeurer): Use RIP relative 128-bit constants.
1582       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1583       __ Pcmpeqd(tmp, tmp);
1584       __ Psrlq(tmp, 1);
1585       __ Andpd(i.OutputDoubleRegister(), tmp);
1586       break;
1587     }
1588     case kX64F64x2Neg:
1589     case kSSEFloat64Neg: {
1590       // TODO(bmeurer): Use RIP relative 128-bit constants.
1591       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1592       __ Pcmpeqd(tmp, tmp);
1593       __ Psllq(tmp, 63);
1594       __ Xorpd(i.OutputDoubleRegister(), tmp);
1595       break;
1596     }
1597     case kSSEFloat64Sqrt:
1598       ASSEMBLE_SSE_UNOP(Sqrtsd);
1599       break;
1600     case kSSEFloat64Round: {
1601       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1602       RoundingMode const mode =
1603           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1604       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1605       break;
1606     }
1607     case kSSEFloat64ToFloat32:
1608       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1609       break;
1610     case kSSEFloat64ToInt32:
1611       if (instr->InputAt(0)->IsFPRegister()) {
1612         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1613       } else {
1614         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1615       }
1616       break;
1617     case kSSEFloat64ToUint32: {
1618       if (instr->InputAt(0)->IsFPRegister()) {
1619         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1620       } else {
1621         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1622       }
1623       if (MiscField::decode(instr->opcode())) {
1624         __ AssertZeroExtended(i.OutputRegister());
1625       }
1626       break;
1627     }
1628     case kSSEFloat32ToInt64:
1629       if (instr->InputAt(0)->IsFPRegister()) {
1630         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1631       } else {
1632         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1633       }
1634       if (instr->OutputCount() > 1) {
1635         __ Set(i.OutputRegister(1), 1);
1636         Label done;
1637         Label fail;
1638         __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1639         if (instr->InputAt(0)->IsFPRegister()) {
1640           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1641         } else {
1642           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1643         }
1644         // If the input is NaN, then the conversion fails.
1645         __ j(parity_even, &fail, Label::kNear);
1646         // If the input is INT64_MIN, then the conversion succeeds.
1647         __ j(equal, &done, Label::kNear);
1648         __ cmpq(i.OutputRegister(0), Immediate(1));
1649         // If the conversion results in INT64_MIN, but the input was not
1650         // INT64_MIN, then the conversion fails.
1651         __ j(no_overflow, &done, Label::kNear);
1652         __ bind(&fail);
1653         __ Set(i.OutputRegister(1), 0);
1654         __ bind(&done);
1655       }
1656       break;
1657     case kSSEFloat64ToInt64:
1658       if (instr->InputAt(0)->IsFPRegister()) {
1659         __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1660       } else {
1661         __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1662       }
1663       if (instr->OutputCount() > 1) {
1664         __ Set(i.OutputRegister(1), 1);
1665         Label done;
1666         Label fail;
1667         __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1668         if (instr->InputAt(0)->IsFPRegister()) {
1669           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1670         } else {
1671           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1672         }
1673         // If the input is NaN, then the conversion fails.
1674         __ j(parity_even, &fail, Label::kNear);
1675         // If the input is INT64_MIN, then the conversion succeeds.
1676         __ j(equal, &done, Label::kNear);
1677         __ cmpq(i.OutputRegister(0), Immediate(1));
1678         // If the conversion results in INT64_MIN, but the input was not
1679         // INT64_MIN, then the conversion fails.
1680         __ j(no_overflow, &done, Label::kNear);
1681         __ bind(&fail);
1682         __ Set(i.OutputRegister(1), 0);
1683         __ bind(&done);
1684       }
1685       break;
1686     case kSSEFloat32ToUint64: {
1687       Label fail;
1688       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1689       if (instr->InputAt(0)->IsFPRegister()) {
1690         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1691       } else {
1692         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1693       }
1694       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1695       __ bind(&fail);
1696       break;
1697     }
1698     case kSSEFloat64ToUint64: {
1699       Label fail;
1700       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1701       if (instr->InputAt(0)->IsFPRegister()) {
1702         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1703       } else {
1704         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1705       }
1706       if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1707       __ bind(&fail);
1708       break;
1709     }
1710     case kSSEInt32ToFloat64:
1711       if (HasRegisterInput(instr, 0)) {
1712         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1713       } else {
1714         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1715       }
1716       break;
1717     case kSSEInt32ToFloat32:
1718       if (HasRegisterInput(instr, 0)) {
1719         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1720       } else {
1721         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1722       }
1723       break;
1724     case kSSEInt64ToFloat32:
1725       if (HasRegisterInput(instr, 0)) {
1726         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1727       } else {
1728         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1729       }
1730       break;
1731     case kSSEInt64ToFloat64:
1732       if (HasRegisterInput(instr, 0)) {
1733         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1734       } else {
1735         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1736       }
1737       break;
1738     case kSSEUint64ToFloat32:
1739       if (HasRegisterInput(instr, 0)) {
1740         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1741       } else {
1742         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1743       }
1744       break;
1745     case kSSEUint64ToFloat64:
1746       if (HasRegisterInput(instr, 0)) {
1747         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1748       } else {
1749         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1750       }
1751       break;
1752     case kSSEUint32ToFloat64:
1753       if (HasRegisterInput(instr, 0)) {
1754         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1755       } else {
1756         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1757       }
1758       break;
1759     case kSSEUint32ToFloat32:
1760       if (HasRegisterInput(instr, 0)) {
1761         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1762       } else {
1763         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1764       }
1765       break;
1766     case kSSEFloat64ExtractLowWord32:
1767       if (instr->InputAt(0)->IsFPStackSlot()) {
1768         __ movl(i.OutputRegister(), i.InputOperand(0));
1769       } else {
1770         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1771       }
1772       break;
1773     case kSSEFloat64ExtractHighWord32:
1774       if (instr->InputAt(0)->IsFPStackSlot()) {
1775         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1776       } else {
1777         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1778       }
1779       break;
1780     case kSSEFloat64InsertLowWord32:
1781       if (HasRegisterInput(instr, 1)) {
1782         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1783       } else {
1784         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1785       }
1786       break;
1787     case kSSEFloat64InsertHighWord32:
1788       if (HasRegisterInput(instr, 1)) {
1789         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1790       } else {
1791         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1792       }
1793       break;
1794     case kSSEFloat64LoadLowWord32:
1795       if (HasRegisterInput(instr, 0)) {
1796         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1797       } else {
1798         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1799       }
1800       break;
1801     case kAVXFloat32Cmp: {
1802       CpuFeatureScope avx_scope(tasm(), AVX);
1803       if (instr->InputAt(1)->IsFPRegister()) {
1804         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1805       } else {
1806         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1807       }
1808       break;
1809     }
1810     case kAVXFloat32Add:
1811       ASSEMBLE_AVX_BINOP(vaddss);
1812       break;
1813     case kAVXFloat32Sub:
1814       ASSEMBLE_AVX_BINOP(vsubss);
1815       break;
1816     case kAVXFloat32Mul:
1817       ASSEMBLE_AVX_BINOP(vmulss);
1818       break;
1819     case kAVXFloat32Div:
1820       ASSEMBLE_AVX_BINOP(vdivss);
1821       // Don't delete this mov. It may improve performance on some CPUs,
1822       // when there is a (v)mulss depending on the result.
1823       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1824       break;
1825     case kAVXFloat64Cmp: {
1826       CpuFeatureScope avx_scope(tasm(), AVX);
1827       if (instr->InputAt(1)->IsFPRegister()) {
1828         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1829       } else {
1830         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1831       }
1832       break;
1833     }
1834     case kAVXFloat64Add:
1835       ASSEMBLE_AVX_BINOP(vaddsd);
1836       break;
1837     case kAVXFloat64Sub:
1838       ASSEMBLE_AVX_BINOP(vsubsd);
1839       break;
1840     case kAVXFloat64Mul:
1841       ASSEMBLE_AVX_BINOP(vmulsd);
1842       break;
1843     case kAVXFloat64Div:
1844       ASSEMBLE_AVX_BINOP(vdivsd);
1845       // Don't delete this mov. It may improve performance on some CPUs,
1846       // when there is a (v)mulsd depending on the result.
1847       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1848       break;
1849     case kAVXFloat32Abs: {
1850       // TODO(bmeurer): Use RIP relative 128-bit constants.
1851       CpuFeatureScope avx_scope(tasm(), AVX);
1852       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1853       __ vpcmpeqd(tmp, tmp, tmp);
1854       __ vpsrlq(tmp, tmp, 33);
1855       if (instr->InputAt(0)->IsFPRegister()) {
1856         __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1857       } else {
1858         __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1859       }
1860       break;
1861     }
1862     case kAVXFloat32Neg: {
1863       // TODO(bmeurer): Use RIP relative 128-bit constants.
1864       CpuFeatureScope avx_scope(tasm(), AVX);
1865       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1866       __ vpcmpeqd(tmp, tmp, tmp);
1867       __ vpsllq(tmp, tmp, 31);
1868       if (instr->InputAt(0)->IsFPRegister()) {
1869         __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1870       } else {
1871         __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1872       }
1873       break;
1874     }
1875     case kAVXFloat64Abs: {
1876       // TODO(bmeurer): Use RIP relative 128-bit constants.
1877       CpuFeatureScope avx_scope(tasm(), AVX);
1878       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1879       __ vpcmpeqd(tmp, tmp, tmp);
1880       __ vpsrlq(tmp, tmp, 1);
1881       if (instr->InputAt(0)->IsFPRegister()) {
1882         __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1883       } else {
1884         __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1885       }
1886       break;
1887     }
1888     case kAVXFloat64Neg: {
1889       // TODO(bmeurer): Use RIP relative 128-bit constants.
1890       CpuFeatureScope avx_scope(tasm(), AVX);
1891       XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
1892       __ vpcmpeqd(tmp, tmp, tmp);
1893       __ vpsllq(tmp, tmp, 63);
1894       if (instr->InputAt(0)->IsFPRegister()) {
1895         __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
1896       } else {
1897         __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
1898       }
1899       break;
1900     }
1901     case kSSEFloat64SilenceNaN:
1902       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1903       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1904       break;
1905     case kX64Movsxbl:
1906       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1907       ASSEMBLE_MOVX(movsxbl);
1908       __ AssertZeroExtended(i.OutputRegister());
1909       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1910       break;
1911     case kX64Movzxbl:
1912       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1913       ASSEMBLE_MOVX(movzxbl);
1914       __ AssertZeroExtended(i.OutputRegister());
1915       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1916       break;
1917     case kX64Movsxbq:
1918       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1919       ASSEMBLE_MOVX(movsxbq);
1920       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1921       break;
1922     case kX64Movzxbq:
1923       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1924       ASSEMBLE_MOVX(movzxbq);
1925       __ AssertZeroExtended(i.OutputRegister());
1926       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1927       break;
1928     case kX64Movb: {
1929       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1930       size_t index = 0;
1931       Operand operand = i.MemoryOperand(&index);
1932       if (HasImmediateInput(instr, index)) {
1933         __ movb(operand, Immediate(i.InputInt8(index)));
1934       } else {
1935         __ movb(operand, i.InputRegister(index));
1936       }
1937       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1938       break;
1939     }
1940     case kX64Movsxwl:
1941       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1942       ASSEMBLE_MOVX(movsxwl);
1943       __ AssertZeroExtended(i.OutputRegister());
1944       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1945       break;
1946     case kX64Movzxwl:
1947       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1948       ASSEMBLE_MOVX(movzxwl);
1949       __ AssertZeroExtended(i.OutputRegister());
1950       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1951       break;
1952     case kX64Movsxwq:
1953       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1954       ASSEMBLE_MOVX(movsxwq);
1955       break;
1956     case kX64Movzxwq:
1957       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1958       ASSEMBLE_MOVX(movzxwq);
1959       __ AssertZeroExtended(i.OutputRegister());
1960       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1961       break;
1962     case kX64Movw: {
1963       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1964       size_t index = 0;
1965       Operand operand = i.MemoryOperand(&index);
1966       if (HasImmediateInput(instr, index)) {
1967         __ movw(operand, Immediate(i.InputInt16(index)));
1968       } else {
1969         __ movw(operand, i.InputRegister(index));
1970       }
1971       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1972       break;
1973     }
1974     case kX64Movl:
1975       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
1976       if (instr->HasOutput()) {
1977         if (HasAddressingMode(instr)) {
1978           __ movl(i.OutputRegister(), i.MemoryOperand());
1979         } else {
1980           if (HasRegisterInput(instr, 0)) {
1981             __ movl(i.OutputRegister(), i.InputRegister(0));
1982           } else {
1983             __ movl(i.OutputRegister(), i.InputOperand(0));
1984           }
1985         }
1986         __ AssertZeroExtended(i.OutputRegister());
1987       } else {
1988         size_t index = 0;
1989         Operand operand = i.MemoryOperand(&index);
1990         if (HasImmediateInput(instr, index)) {
1991           __ movl(operand, i.InputImmediate(index));
1992         } else {
1993           __ movl(operand, i.InputRegister(index));
1994         }
1995       }
1996       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1997       break;
1998     case kX64Movsxlq:
1999       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2000       ASSEMBLE_MOVX(movsxlq);
2001       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2002       break;
2003     case kX64MovqDecompressTaggedSigned: {
2004       CHECK(instr->HasOutput());
2005       __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
2006       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2007       break;
2008     }
2009     case kX64MovqDecompressTaggedPointer: {
2010       CHECK(instr->HasOutput());
2011       __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
2012       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2013       break;
2014     }
2015     case kX64MovqDecompressAnyTagged: {
2016       CHECK(instr->HasOutput());
2017       __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
2018       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2019       break;
2020     }
2021     case kX64MovqCompressTagged: {
2022       CHECK(!instr->HasOutput());
2023       size_t index = 0;
2024       Operand operand = i.MemoryOperand(&index);
2025       if (HasImmediateInput(instr, index)) {
2026         __ StoreTaggedField(operand, i.InputImmediate(index));
2027       } else {
2028         __ StoreTaggedField(operand, i.InputRegister(index));
2029       }
2030       break;
2031     }
2032     case kX64Movq:
2033       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2034       if (instr->HasOutput()) {
2035         __ movq(i.OutputRegister(), i.MemoryOperand());
2036       } else {
2037         size_t index = 0;
2038         Operand operand = i.MemoryOperand(&index);
2039         if (HasImmediateInput(instr, index)) {
2040           __ movq(operand, i.InputImmediate(index));
2041         } else {
2042           __ movq(operand, i.InputRegister(index));
2043         }
2044       }
2045       EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
2046       break;
2047     case kX64Movss:
2048       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2049       if (instr->HasOutput()) {
2050         __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
2051       } else {
2052         size_t index = 0;
2053         Operand operand = i.MemoryOperand(&index);
2054         __ Movss(operand, i.InputDoubleRegister(index));
2055       }
2056       break;
2057     case kX64Movsd: {
2058       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2059       if (instr->HasOutput()) {
2060         const MemoryAccessMode access_mode =
2061             static_cast<MemoryAccessMode>(MiscField::decode(opcode));
2062         if (access_mode == kMemoryAccessPoisoned) {
2063           // If we have to poison the loaded value, we load into a general
2064           // purpose register first, mask it with the poison, and move the
2065           // value from the general purpose register into the double register.
2066           __ movq(kScratchRegister, i.MemoryOperand());
2067           __ andq(kScratchRegister, kSpeculationPoisonRegister);
2068           __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2069         } else {
2070           __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2071         }
2072       } else {
2073         size_t index = 0;
2074         Operand operand = i.MemoryOperand(&index);
2075         __ Movsd(operand, i.InputDoubleRegister(index));
2076       }
2077       break;
2078     }
2079     case kX64Movdqu: {
2080       CpuFeatureScope sse_scope(tasm(), SSSE3);
2081       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2082       if (instr->HasOutput()) {
2083         __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2084       } else {
2085         size_t index = 0;
2086         Operand operand = i.MemoryOperand(&index);
2087         __ Movdqu(operand, i.InputSimd128Register(index));
2088       }
2089       break;
2090     }
2091     case kX64BitcastFI:
2092       if (instr->InputAt(0)->IsFPStackSlot()) {
2093         __ movl(i.OutputRegister(), i.InputOperand(0));
2094       } else {
2095         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2096       }
2097       break;
2098     case kX64BitcastDL:
2099       if (instr->InputAt(0)->IsFPStackSlot()) {
2100         __ movq(i.OutputRegister(), i.InputOperand(0));
2101       } else {
2102         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2103       }
2104       break;
2105     case kX64BitcastIF:
2106       if (HasRegisterInput(instr, 0)) {
2107         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2108       } else {
2109         __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
2110       }
2111       break;
2112     case kX64BitcastLD:
2113       if (HasRegisterInput(instr, 0)) {
2114         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2115       } else {
2116         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2117       }
2118       break;
2119     case kX64Lea32: {
2120       AddressingMode mode = AddressingModeField::decode(instr->opcode());
2121       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2122       // and addressing mode just happens to work out. The "addl"/"subl" forms
2123       // in these cases are faster based on measurements.
2124       if (i.InputRegister(0) == i.OutputRegister()) {
2125         if (mode == kMode_MRI) {
2126           int32_t constant_summand = i.InputInt32(1);
2127           DCHECK_NE(0, constant_summand);
2128           if (constant_summand > 0) {
2129             __ addl(i.OutputRegister(), Immediate(constant_summand));
2130           } else {
2131             __ subl(i.OutputRegister(),
2132                     Immediate(base::NegateWithWraparound(constant_summand)));
2133           }
2134         } else if (mode == kMode_MR1) {
2135           if (i.InputRegister(1) == i.OutputRegister()) {
2136             __ shll(i.OutputRegister(), Immediate(1));
2137           } else {
2138             __ addl(i.OutputRegister(), i.InputRegister(1));
2139           }
2140         } else if (mode == kMode_M2) {
2141           __ shll(i.OutputRegister(), Immediate(1));
2142         } else if (mode == kMode_M4) {
2143           __ shll(i.OutputRegister(), Immediate(2));
2144         } else if (mode == kMode_M8) {
2145           __ shll(i.OutputRegister(), Immediate(3));
2146         } else {
2147           __ leal(i.OutputRegister(), i.MemoryOperand());
2148         }
2149       } else if (mode == kMode_MR1 &&
2150                  i.InputRegister(1) == i.OutputRegister()) {
2151         __ addl(i.OutputRegister(), i.InputRegister(0));
2152       } else {
2153         __ leal(i.OutputRegister(), i.MemoryOperand());
2154       }
2155       __ AssertZeroExtended(i.OutputRegister());
2156       break;
2157     }
2158     case kX64Lea: {
2159       AddressingMode mode = AddressingModeField::decode(instr->opcode());
2160       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2161       // and addressing mode just happens to work out. The "addq"/"subq" forms
2162       // in these cases are faster based on measurements.
2163       if (i.InputRegister(0) == i.OutputRegister()) {
2164         if (mode == kMode_MRI) {
2165           int32_t constant_summand = i.InputInt32(1);
2166           if (constant_summand > 0) {
2167             __ addq(i.OutputRegister(), Immediate(constant_summand));
2168           } else if (constant_summand < 0) {
2169             __ subq(i.OutputRegister(), Immediate(-constant_summand));
2170           }
2171         } else if (mode == kMode_MR1) {
2172           if (i.InputRegister(1) == i.OutputRegister()) {
2173             __ shlq(i.OutputRegister(), Immediate(1));
2174           } else {
2175             __ addq(i.OutputRegister(), i.InputRegister(1));
2176           }
2177         } else if (mode == kMode_M2) {
2178           __ shlq(i.OutputRegister(), Immediate(1));
2179         } else if (mode == kMode_M4) {
2180           __ shlq(i.OutputRegister(), Immediate(2));
2181         } else if (mode == kMode_M8) {
2182           __ shlq(i.OutputRegister(), Immediate(3));
2183         } else {
2184           __ leaq(i.OutputRegister(), i.MemoryOperand());
2185         }
2186       } else if (mode == kMode_MR1 &&
2187                  i.InputRegister(1) == i.OutputRegister()) {
2188         __ addq(i.OutputRegister(), i.InputRegister(0));
2189       } else {
2190         __ leaq(i.OutputRegister(), i.MemoryOperand());
2191       }
2192       break;
2193     }
2194     case kX64Dec32:
2195       __ decl(i.OutputRegister());
2196       break;
2197     case kX64Inc32:
2198       __ incl(i.OutputRegister());
2199       break;
2200     case kX64Push:
2201       if (HasAddressingMode(instr)) {
2202         size_t index = 0;
2203         Operand operand = i.MemoryOperand(&index);
2204         __ pushq(operand);
2205         frame_access_state()->IncreaseSPDelta(1);
2206         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2207                                                          kSystemPointerSize);
2208       } else if (HasImmediateInput(instr, 0)) {
2209         __ pushq(i.InputImmediate(0));
2210         frame_access_state()->IncreaseSPDelta(1);
2211         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2212                                                          kSystemPointerSize);
2213       } else if (HasRegisterInput(instr, 0)) {
2214         __ pushq(i.InputRegister(0));
2215         frame_access_state()->IncreaseSPDelta(1);
2216         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2217                                                          kSystemPointerSize);
2218       } else if (instr->InputAt(0)->IsFloatRegister() ||
2219                  instr->InputAt(0)->IsDoubleRegister()) {
2220         // TODO(titzer): use another machine instruction?
2221         __ AllocateStackSpace(kDoubleSize);
2222         frame_access_state()->IncreaseSPDelta(kDoubleSize / kSystemPointerSize);
2223         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2224                                                          kDoubleSize);
2225         __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2226       } else if (instr->InputAt(0)->IsSimd128Register()) {
2227         // TODO(titzer): use another machine instruction?
2228         __ AllocateStackSpace(kSimd128Size);
2229         frame_access_state()->IncreaseSPDelta(kSimd128Size /
2230                                               kSystemPointerSize);
2231         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2232                                                          kSimd128Size);
2233         __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2234       } else if (instr->InputAt(0)->IsStackSlot() ||
2235                  instr->InputAt(0)->IsFloatStackSlot() ||
2236                  instr->InputAt(0)->IsDoubleStackSlot()) {
2237         __ pushq(i.InputOperand(0));
2238         frame_access_state()->IncreaseSPDelta(1);
2239         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2240                                                          kSystemPointerSize);
2241       } else {
2242         DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2243         __ Movups(kScratchDoubleReg, i.InputOperand(0));
2244         // TODO(titzer): use another machine instruction?
2245         __ AllocateStackSpace(kSimd128Size);
2246         frame_access_state()->IncreaseSPDelta(kSimd128Size /
2247                                               kSystemPointerSize);
2248         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2249                                                          kSimd128Size);
2250         __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2251       }
2252       break;
2253     case kX64Poke: {
2254       int slot = MiscField::decode(instr->opcode());
2255       if (HasImmediateInput(instr, 0)) {
2256         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2257       } else {
2258         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2259       }
2260       break;
2261     }
2262     case kX64Peek: {
2263       int reverse_slot = i.InputInt32(0);
2264       int offset =
2265           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2266       if (instr->OutputAt(0)->IsFPRegister()) {
2267         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2268         if (op->representation() == MachineRepresentation::kFloat64) {
2269           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2270         } else {
2271           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2272           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2273         }
2274       } else {
2275         __ movq(i.OutputRegister(), Operand(rbp, offset));
2276       }
2277       break;
2278     }
2279     case kX64F64x2Splat: {
2280       CpuFeatureScope sse_scope(tasm(), SSE3);
2281       XMMRegister dst = i.OutputSimd128Register();
2282       if (instr->InputAt(0)->IsFPRegister()) {
2283         __ Movddup(dst, i.InputDoubleRegister(0));
2284       } else {
2285         __ Movddup(dst, i.InputOperand(0));
2286       }
2287       break;
2288     }
2289     case kX64F64x2ReplaceLane: {
2290       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2291       if (instr->InputAt(2)->IsFPRegister()) {
2292         __ Movq(kScratchRegister, i.InputDoubleRegister(2));
2293         __ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
2294       } else {
2295         __ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2296       }
2297       break;
2298     }
2299     case kX64F64x2ExtractLane: {
2300       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2301       __ Pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2302       __ Movq(i.OutputDoubleRegister(), kScratchRegister);
2303       break;
2304     }
2305     case kX64F64x2Sqrt: {
2306       __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2307       break;
2308     }
2309     case kX64F64x2Add: {
2310       ASSEMBLE_SSE_BINOP(Addpd);
2311       break;
2312     }
2313     case kX64F64x2Sub: {
2314       ASSEMBLE_SSE_BINOP(Subpd);
2315       break;
2316     }
2317     case kX64F64x2Mul: {
2318       ASSEMBLE_SSE_BINOP(Mulpd);
2319       break;
2320     }
2321     case kX64F64x2Div: {
2322       ASSEMBLE_SSE_BINOP(Divpd);
2323       break;
2324     }
2325     case kX64F64x2Min: {
2326       XMMRegister src1 = i.InputSimd128Register(1),
2327                   dst = i.OutputSimd128Register();
2328       DCHECK_EQ(dst, i.InputSimd128Register(0));
2329       // The minpd instruction doesn't propagate NaNs and +0's in its first
2330       // operand. Perform minpd in both orders, merge the resuls, and adjust.
2331       __ Movapd(kScratchDoubleReg, src1);
2332       __ Minpd(kScratchDoubleReg, dst);
2333       __ Minpd(dst, src1);
2334       // propagate -0's and NaNs, which may be non-canonical.
2335       __ Orpd(kScratchDoubleReg, dst);
2336       // Canonicalize NaNs by quieting and clearing the payload.
2337       __ Cmppd(dst, kScratchDoubleReg, static_cast<int8_t>(3));
2338       __ Orpd(kScratchDoubleReg, dst);
2339       __ Psrlq(dst, 13);
2340       __ Andnpd(dst, kScratchDoubleReg);
2341       break;
2342     }
2343     case kX64F64x2Max: {
2344       XMMRegister src1 = i.InputSimd128Register(1),
2345                   dst = i.OutputSimd128Register();
2346       DCHECK_EQ(dst, i.InputSimd128Register(0));
2347       // The maxpd instruction doesn't propagate NaNs and +0's in its first
2348       // operand. Perform maxpd in both orders, merge the resuls, and adjust.
2349       __ Movapd(kScratchDoubleReg, src1);
2350       __ Maxpd(kScratchDoubleReg, dst);
2351       __ Maxpd(dst, src1);
2352       // Find discrepancies.
2353       __ Xorpd(dst, kScratchDoubleReg);
2354       // Propagate NaNs, which may be non-canonical.
2355       __ Orpd(kScratchDoubleReg, dst);
2356       // Propagate sign discrepancy and (subtle) quiet NaNs.
2357       __ Subpd(kScratchDoubleReg, dst);
2358       // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2359       __ Cmppd(dst, kScratchDoubleReg, static_cast<int8_t>(3));
2360       __ Psrlq(dst, 13);
2361       __ Andnpd(dst, kScratchDoubleReg);
2362       break;
2363     }
2364     case kX64F64x2Eq: {
2365       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2366       __ Cmpeqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2367       break;
2368     }
2369     case kX64F64x2Ne: {
2370       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2371       __ Cmpneqpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2372       break;
2373     }
2374     case kX64F64x2Lt: {
2375       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2376       __ Cmpltpd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2377       break;
2378     }
2379     case kX64F64x2Le: {
2380       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2381       __ Cmplepd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2382       break;
2383     }
2384     case kX64F64x2Qfma: {
2385       if (CpuFeatures::IsSupported(FMA3)) {
2386         CpuFeatureScope fma3_scope(tasm(), FMA3);
2387         __ vfmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
2388                        i.InputSimd128Register(2));
2389       } else {
2390         XMMRegister tmp = i.TempSimd128Register(0);
2391         __ Movapd(tmp, i.InputSimd128Register(2));
2392         __ Mulpd(tmp, i.InputSimd128Register(1));
2393         __ Addpd(i.OutputSimd128Register(), tmp);
2394       }
2395       break;
2396     }
2397     case kX64F64x2Qfms: {
2398       if (CpuFeatures::IsSupported(FMA3)) {
2399         CpuFeatureScope fma3_scope(tasm(), FMA3);
2400         __ vfnmadd231pd(i.OutputSimd128Register(), i.InputSimd128Register(1),
2401                         i.InputSimd128Register(2));
2402       } else {
2403         XMMRegister tmp = i.TempSimd128Register(0);
2404         __ Movapd(tmp, i.InputSimd128Register(2));
2405         __ Mulpd(tmp, i.InputSimd128Register(1));
2406         __ Subpd(i.OutputSimd128Register(), tmp);
2407       }
2408       break;
2409     }
2410     // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2411     case kX64F32x4Splat: {
2412       XMMRegister dst = i.OutputSimd128Register();
2413       if (instr->InputAt(0)->IsFPRegister()) {
2414         __ Movss(dst, i.InputDoubleRegister(0));
2415       } else {
2416         __ Movss(dst, i.InputOperand(0));
2417       }
2418       __ Shufps(dst, dst, static_cast<byte>(0x0));
2419       break;
2420     }
2421     case kX64F32x4ExtractLane: {
2422       __ Extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2423       __ Movd(i.OutputDoubleRegister(), kScratchRegister);
2424       break;
2425     }
2426     case kX64F32x4ReplaceLane: {
2427       // The insertps instruction uses imm8[5:4] to indicate the lane
2428       // that needs to be replaced.
2429       byte select = i.InputInt8(1) << 4 & 0x30;
2430       if (instr->InputAt(2)->IsFPRegister()) {
2431         __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2432                     select);
2433       } else {
2434         __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2435       }
2436       break;
2437     }
2438     case kX64F32x4SConvertI32x4: {
2439       __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2440       break;
2441     }
2442     case kX64F32x4UConvertI32x4: {
2443       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2444       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2445       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2446       XMMRegister dst = i.OutputSimd128Register();
2447       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);  // zeros
2448       __ Pblendw(kScratchDoubleReg, dst,
2449                  static_cast<uint8_t>(0x55));             // get lo 16 bits
2450       __ Psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
2451       __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
2452       __ Psrld(dst,
2453                static_cast<byte>(1));    // divide by 2 to get in unsigned range
2454       __ Cvtdq2ps(dst, dst);             // convert hi exactly
2455       __ Addps(dst, dst);                // double hi, exactly
2456       __ Addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
2457       break;
2458     }
2459     case kX64F32x4Abs: {
2460       XMMRegister dst = i.OutputSimd128Register();
2461       XMMRegister src = i.InputSimd128Register(0);
2462       if (dst == src) {
2463         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2464         __ Psrld(kScratchDoubleReg, static_cast<byte>(1));
2465         __ Andps(i.OutputSimd128Register(), kScratchDoubleReg);
2466       } else {
2467         __ Pcmpeqd(dst, dst);
2468         __ Psrld(dst, static_cast<byte>(1));
2469         __ Andps(dst, i.InputSimd128Register(0));
2470       }
2471       break;
2472     }
2473     case kX64F32x4Neg: {
2474       XMMRegister dst = i.OutputSimd128Register();
2475       XMMRegister src = i.InputSimd128Register(0);
2476       if (dst == src) {
2477         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2478         __ Pslld(kScratchDoubleReg, static_cast<byte>(31));
2479         __ Xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2480       } else {
2481         __ Pcmpeqd(dst, dst);
2482         __ Pslld(dst, static_cast<byte>(31));
2483         __ Xorps(dst, i.InputSimd128Register(0));
2484       }
2485       break;
2486     }
2487     case kX64F32x4Sqrt: {
2488       __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2489       break;
2490     }
2491     case kX64F32x4RecipApprox: {
2492       __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2493       break;
2494     }
2495     case kX64F32x4RecipSqrtApprox: {
2496       __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2497       break;
2498     }
2499     case kX64F32x4Add: {
2500       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2501       __ Addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2502       break;
2503     }
2504     case kX64F32x4AddHoriz: {
2505       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2506       CpuFeatureScope sse_scope(tasm(), SSE3);
2507       __ Haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2508       break;
2509     }
2510     case kX64F32x4Sub: {
2511       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2512       __ Subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2513       break;
2514     }
2515     case kX64F32x4Mul: {
2516       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2517       __ Mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2518       break;
2519     }
2520     case kX64F32x4Div: {
2521       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2522       __ Divps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2523       break;
2524     }
2525     case kX64F32x4Min: {
2526       XMMRegister src1 = i.InputSimd128Register(1),
2527                   dst = i.OutputSimd128Register();
2528       DCHECK_EQ(dst, i.InputSimd128Register(0));
2529       // The minps instruction doesn't propagate NaNs and +0's in its first
2530       // operand. Perform minps in both orders, merge the resuls, and adjust.
2531       __ Movaps(kScratchDoubleReg, src1);
2532       __ Minps(kScratchDoubleReg, dst);
2533       __ Minps(dst, src1);
2534       // propagate -0's and NaNs, which may be non-canonical.
2535       __ Orps(kScratchDoubleReg, dst);
2536       // Canonicalize NaNs by quieting and clearing the payload.
2537       __ Cmpps(dst, kScratchDoubleReg, static_cast<int8_t>(3));
2538       __ Orps(kScratchDoubleReg, dst);
2539       __ Psrld(dst, static_cast<byte>(10));
2540       __ Andnps(dst, kScratchDoubleReg);
2541       break;
2542     }
2543     case kX64F32x4Max: {
2544       XMMRegister src1 = i.InputSimd128Register(1),
2545                   dst = i.OutputSimd128Register();
2546       DCHECK_EQ(dst, i.InputSimd128Register(0));
2547       // The maxps instruction doesn't propagate NaNs and +0's in its first
2548       // operand. Perform maxps in both orders, merge the resuls, and adjust.
2549       __ Movaps(kScratchDoubleReg, src1);
2550       __ Maxps(kScratchDoubleReg, dst);
2551       __ Maxps(dst, src1);
2552       // Find discrepancies.
2553       __ Xorps(dst, kScratchDoubleReg);
2554       // Propagate NaNs, which may be non-canonical.
2555       __ Orps(kScratchDoubleReg, dst);
2556       // Propagate sign discrepancy and (subtle) quiet NaNs.
2557       __ Subps(kScratchDoubleReg, dst);
2558       // Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
2559       __ Cmpps(dst, kScratchDoubleReg, static_cast<int8_t>(3));
2560       __ Psrld(dst, static_cast<byte>(10));
2561       __ Andnps(dst, kScratchDoubleReg);
2562       break;
2563     }
2564     case kX64F32x4Eq: {
2565       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2566       __ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2567                static_cast<int8_t>(0x0));
2568       break;
2569     }
2570     case kX64F32x4Ne: {
2571       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2572       __ Cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2573                static_cast<int8_t>(0x4));
2574       break;
2575     }
2576     case kX64F32x4Lt: {
2577       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2578       __ Cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2579       break;
2580     }
2581     case kX64F32x4Le: {
2582       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2583       __ Cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2584       break;
2585     }
2586     case kX64F32x4Qfma: {
2587       if (CpuFeatures::IsSupported(FMA3)) {
2588         CpuFeatureScope fma3_scope(tasm(), FMA3);
2589         __ vfmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2590                        i.InputSimd128Register(2));
2591       } else {
2592         XMMRegister tmp = i.TempSimd128Register(0);
2593         __ Movaps(tmp, i.InputSimd128Register(2));
2594         __ Mulps(tmp, i.InputSimd128Register(1));
2595         __ Addps(i.OutputSimd128Register(), tmp);
2596       }
2597       break;
2598     }
2599     case kX64F32x4Qfms: {
2600       if (CpuFeatures::IsSupported(FMA3)) {
2601         CpuFeatureScope fma3_scope(tasm(), FMA3);
2602         __ vfnmadd231ps(i.OutputSimd128Register(), i.InputSimd128Register(1),
2603                         i.InputSimd128Register(2));
2604       } else {
2605         XMMRegister tmp = i.TempSimd128Register(0);
2606         __ Movaps(tmp, i.InputSimd128Register(2));
2607         __ Mulps(tmp, i.InputSimd128Register(1));
2608         __ Subps(i.OutputSimd128Register(), tmp);
2609       }
2610       break;
2611     }
2612     case kX64I64x2Splat: {
2613       CpuFeatureScope sse_scope(tasm(), SSE3);
2614       XMMRegister dst = i.OutputSimd128Register();
2615       if (HasRegisterInput(instr, 0)) {
2616         __ Movq(dst, i.InputRegister(0));
2617       } else {
2618         __ Movq(dst, i.InputOperand(0));
2619       }
2620       __ Movddup(dst, dst);
2621       break;
2622     }
2623     case kX64I64x2ExtractLane: {
2624       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2625       __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2626       break;
2627     }
2628     case kX64I64x2ReplaceLane: {
2629       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2630       if (HasRegisterInput(instr, 2)) {
2631         __ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
2632                   i.InputInt8(1));
2633       } else {
2634         __ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2635       }
2636       break;
2637     }
2638     case kX64I64x2Neg: {
2639       XMMRegister dst = i.OutputSimd128Register();
2640       XMMRegister src = i.InputSimd128Register(0);
2641       if (dst == src) {
2642         __ Movapd(kScratchDoubleReg, src);
2643         src = kScratchDoubleReg;
2644       }
2645       __ Pxor(dst, dst);
2646       __ Psubq(dst, src);
2647       break;
2648     }
2649     case kX64I64x2Shl: {
2650       // Take shift value modulo 2^6.
2651       ASSEMBLE_SIMD_SHIFT(Psllq, 6);
2652       break;
2653     }
2654     case kX64I64x2ShrS: {
2655       // TODO(zhin): there is vpsraq but requires AVX512
2656       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2657       // ShrS on each quadword one at a time
2658       XMMRegister dst = i.OutputSimd128Register();
2659       XMMRegister src = i.InputSimd128Register(0);
2660       Register tmp = i.ToRegister(instr->TempAt(0));
2661       // Modulo 64 not required as sarq_cl will mask cl to 6 bits.
2662 
2663       // lower quadword
2664       __ Pextrq(tmp, src, static_cast<int8_t>(0x0));
2665       __ sarq_cl(tmp);
2666       __ Pinsrq(dst, tmp, static_cast<int8_t>(0x0));
2667 
2668       // upper quadword
2669       __ Pextrq(tmp, src, static_cast<int8_t>(0x1));
2670       __ sarq_cl(tmp);
2671       __ Pinsrq(dst, tmp, static_cast<int8_t>(0x1));
2672       break;
2673     }
2674     case kX64I64x2Add: {
2675       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2676       __ Paddq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2677       break;
2678     }
2679     case kX64I64x2Sub: {
2680       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2681       __ Psubq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2682       break;
2683     }
2684     case kX64I64x2Mul: {
2685       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2686       XMMRegister left = i.InputSimd128Register(0);
2687       XMMRegister right = i.InputSimd128Register(1);
2688       XMMRegister tmp1 = i.TempSimd128Register(0);
2689       XMMRegister tmp2 = i.TempSimd128Register(1);
2690 
2691       __ Movaps(tmp1, left);
2692       __ Movaps(tmp2, right);
2693 
2694       // Multiply high dword of each qword of left with right.
2695       __ Psrlq(tmp1, 32);
2696       __ Pmuludq(tmp1, right);
2697 
2698       // Multiply high dword of each qword of right with left.
2699       __ Psrlq(tmp2, 32);
2700       __ Pmuludq(tmp2, left);
2701 
2702       __ Paddq(tmp2, tmp1);
2703       __ Psllq(tmp2, 32);
2704 
2705       __ Pmuludq(left, right);
2706       __ Paddq(left, tmp2);  // left == dst
2707       break;
2708     }
2709     case kX64I64x2MinS: {
2710       if (CpuFeatures::IsSupported(SSE4_2)) {
2711         CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
2712         XMMRegister dst = i.OutputSimd128Register();
2713         XMMRegister src0 = i.InputSimd128Register(0);
2714         XMMRegister src1 = i.InputSimd128Register(1);
2715         XMMRegister tmp = i.TempSimd128Register(0);
2716         DCHECK_EQ(tmp, xmm0);
2717 
2718         __ movaps(tmp, src1);
2719         __ pcmpgtq(tmp, src0);
2720         __ movaps(dst, src1);
2721         __ blendvpd(dst, src0);  // implicit use of xmm0 as mask
2722       } else {
2723         CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
2724         XMMRegister dst = i.OutputSimd128Register();
2725         XMMRegister src = i.InputSimd128Register(1);
2726         XMMRegister tmp = i.TempSimd128Register(0);
2727         Register tmp1 = i.TempRegister(1);
2728         Register tmp2 = i.TempRegister(2);
2729         DCHECK_EQ(dst, i.InputSimd128Register(0));
2730         // backup src since we cannot change it
2731         __ movaps(tmp, src);
2732 
2733         // compare the lower quardwords
2734         __ movq(tmp1, dst);
2735         __ movq(tmp2, tmp);
2736         __ cmpq(tmp1, tmp2);
2737         // tmp2 now has the min of lower quadwords
2738         __ cmovq(less_equal, tmp2, tmp1);
2739         // tmp1 now has the higher quadword
2740         // must do this before movq, movq clears top quadword
2741         __ pextrq(tmp1, dst, 1);
2742         // save tmp2 into dst
2743         __ movq(dst, tmp2);
2744         // tmp2 now has the higher quadword
2745         __ pextrq(tmp2, tmp, 1);
2746         //  compare higher quadwords
2747         __ cmpq(tmp1, tmp2);
2748         // tmp2 now has the min of higher quadwords
2749         __ cmovq(less_equal, tmp2, tmp1);
2750         __ movq(tmp, tmp2);
2751         // dst = [tmp[0], dst[0]]
2752         __ punpcklqdq(dst, tmp);
2753       }
2754       break;
2755     }
2756     case kX64I64x2MaxS: {
2757       CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
2758       XMMRegister dst = i.OutputSimd128Register();
2759       XMMRegister src = i.InputSimd128Register(1);
2760       XMMRegister tmp = i.TempSimd128Register(0);
2761       DCHECK_EQ(dst, i.InputSimd128Register(0));
2762       DCHECK_EQ(tmp, xmm0);
2763 
2764       __ movaps(tmp, src);
2765       __ pcmpgtq(tmp, dst);
2766       __ blendvpd(dst, src);  // implicit use of xmm0 as mask
2767       break;
2768     }
2769     case kX64I64x2Eq: {
2770       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2771       __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2772       break;
2773     }
2774     case kX64I64x2Ne: {
2775       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2776       XMMRegister tmp = i.TempSimd128Register(0);
2777       __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2778       __ Pcmpeqq(tmp, tmp);
2779       __ Pxor(i.OutputSimd128Register(), tmp);
2780       break;
2781     }
2782     case kX64I64x2GtS: {
2783       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2784       __ Pcmpgtq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2785       break;
2786     }
2787     case kX64I64x2GeS: {
2788       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2789       XMMRegister dst = i.OutputSimd128Register();
2790       XMMRegister src = i.InputSimd128Register(1);
2791       XMMRegister tmp = i.TempSimd128Register(0);
2792 
2793       __ Movaps(tmp, src);
2794       __ Pcmpgtq(tmp, dst);
2795       __ Pcmpeqd(dst, dst);
2796       __ Pxor(dst, tmp);
2797       break;
2798     }
2799     case kX64I64x2ShrU: {
2800       // Take shift value modulo 2^6.
2801       ASSEMBLE_SIMD_SHIFT(Psrlq, 6);
2802       break;
2803     }
2804     case kX64I64x2MinU: {
2805       CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
2806       CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
2807       XMMRegister dst = i.OutputSimd128Register();
2808       XMMRegister src0 = i.InputSimd128Register(0);
2809       XMMRegister src1 = i.InputSimd128Register(1);
2810       XMMRegister tmp0 = i.TempSimd128Register(0);
2811       XMMRegister tmp1 = i.TempSimd128Register(1);
2812       DCHECK_EQ(tmp1, xmm0);
2813 
2814       __ movaps(dst, src1);
2815       __ movaps(tmp0, src0);
2816 
2817       __ pcmpeqd(tmp1, tmp1);
2818       __ psllq(tmp1, 63);
2819 
2820       __ pxor(tmp0, tmp1);
2821       __ pxor(tmp1, dst);
2822 
2823       __ pcmpgtq(tmp1, tmp0);
2824       __ blendvpd(dst, src0);  // implicit use of xmm0 as mask
2825       break;
2826     }
2827     case kX64I64x2MaxU: {
2828       CpuFeatureScope sse_scope_4_2(tasm(), SSE4_2);
2829       CpuFeatureScope sse_scope_4_1(tasm(), SSE4_1);
2830       XMMRegister dst = i.OutputSimd128Register();
2831       XMMRegister src = i.InputSimd128Register(1);
2832       XMMRegister dst_tmp = i.TempSimd128Register(0);
2833       XMMRegister tmp = i.TempSimd128Register(1);
2834       DCHECK_EQ(dst, i.InputSimd128Register(0));
2835       DCHECK_EQ(tmp, xmm0);
2836 
2837       __ movaps(dst_tmp, dst);
2838 
2839       __ pcmpeqd(tmp, tmp);
2840       __ psllq(tmp, 63);
2841 
2842       __ pxor(dst_tmp, tmp);
2843       __ pxor(tmp, src);
2844 
2845       __ pcmpgtq(tmp, dst_tmp);
2846       __ blendvpd(dst, src);  // implicit use of xmm0 as mask
2847       break;
2848     }
2849     case kX64I64x2GtU: {
2850       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2851       XMMRegister dst = i.OutputSimd128Register();
2852       XMMRegister src = i.InputSimd128Register(1);
2853       XMMRegister tmp = i.TempSimd128Register(0);
2854 
2855       __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2856       __ Psllq(kScratchDoubleReg, 63);
2857 
2858       __ Movaps(tmp, src);
2859       __ Pxor(tmp, kScratchDoubleReg);
2860       __ Pxor(dst, kScratchDoubleReg);
2861       __ Pcmpgtq(dst, tmp);
2862       break;
2863     }
2864     case kX64I64x2GeU: {
2865       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2866       CpuFeatureScope sse_scope(tasm(), SSE4_2);
2867       XMMRegister dst = i.OutputSimd128Register();
2868       XMMRegister src = i.InputSimd128Register(1);
2869       XMMRegister tmp = i.TempSimd128Register(0);
2870 
2871       __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2872       __ Psllq(kScratchDoubleReg, 63);
2873 
2874       __ Movaps(tmp, src);
2875       __ Pxor(dst, kScratchDoubleReg);
2876       __ Pxor(tmp, kScratchDoubleReg);
2877       __ Pcmpgtq(tmp, dst);
2878       __ Pcmpeqd(dst, dst);
2879       __ Pxor(dst, tmp);
2880       break;
2881     }
2882     case kX64I32x4Splat: {
2883       XMMRegister dst = i.OutputSimd128Register();
2884       if (HasRegisterInput(instr, 0)) {
2885         __ Movd(dst, i.InputRegister(0));
2886       } else {
2887         __ Movd(dst, i.InputOperand(0));
2888       }
2889       __ Pshufd(dst, dst, static_cast<uint8_t>(0x0));
2890       break;
2891     }
2892     case kX64I32x4ExtractLane: {
2893       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2894       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2895       break;
2896     }
2897     case kX64I32x4ReplaceLane: {
2898       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2899       if (HasRegisterInput(instr, 2)) {
2900         __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2901                   i.InputInt8(1));
2902       } else {
2903         __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2904       }
2905       break;
2906     }
2907     case kX64I32x4SConvertF32x4: {
2908       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2909       XMMRegister dst = i.OutputSimd128Register();
2910       XMMRegister tmp = i.TempSimd128Register(0);
2911       // NAN->0
2912       __ Movaps(tmp, dst);
2913       __ Cmpeqps(tmp, tmp);
2914       __ Pand(dst, tmp);
2915       // Set top bit if >= 0 (but not -0.0!)
2916       __ Pxor(tmp, dst);
2917       // Convert
2918       __ Cvttps2dq(dst, dst);
2919       // Set top bit if >=0 is now < 0
2920       __ Pand(tmp, dst);
2921       __ Psrad(tmp, static_cast<byte>(31));
2922       // Set positive overflow lanes to 0x7FFFFFFF
2923       __ Pxor(dst, tmp);
2924       break;
2925     }
2926     case kX64I32x4SConvertI16x8Low: {
2927       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2928       __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2929       break;
2930     }
2931     case kX64I32x4SConvertI16x8High: {
2932       XMMRegister dst = i.OutputSimd128Register();
2933       __ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
2934       __ Pmovsxwd(dst, dst);
2935       break;
2936     }
2937     case kX64I32x4Neg: {
2938       CpuFeatureScope sse_scope(tasm(), SSSE3);
2939       XMMRegister dst = i.OutputSimd128Register();
2940       XMMRegister src = i.InputSimd128Register(0);
2941       if (dst == src) {
2942         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2943         __ Psignd(dst, kScratchDoubleReg);
2944       } else {
2945         __ Pxor(dst, dst);
2946         __ Psubd(dst, src);
2947       }
2948       break;
2949     }
2950     case kX64I32x4Shl: {
2951       // Take shift value modulo 2^5.
2952       ASSEMBLE_SIMD_SHIFT(Pslld, 5);
2953       break;
2954     }
2955     case kX64I32x4ShrS: {
2956       // Take shift value modulo 2^5.
2957       ASSEMBLE_SIMD_SHIFT(Psrad, 5);
2958       break;
2959     }
2960     case kX64I32x4Add: {
2961       __ Paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2962       break;
2963     }
2964     case kX64I32x4AddHoriz: {
2965       CpuFeatureScope sse_scope(tasm(), SSSE3);
2966       __ Phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2967       break;
2968     }
2969     case kX64I32x4Sub: {
2970       __ Psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2971       break;
2972     }
2973     case kX64I32x4Mul: {
2974       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2975       __ Pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2976       break;
2977     }
2978     case kX64I32x4MinS: {
2979       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2980       __ Pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2981       break;
2982     }
2983     case kX64I32x4MaxS: {
2984       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2985       __ Pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2986       break;
2987     }
2988     case kX64I32x4Eq: {
2989       __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2990       break;
2991     }
2992     case kX64I32x4Ne: {
2993       XMMRegister tmp = i.TempSimd128Register(0);
2994       __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2995       __ Pcmpeqd(tmp, tmp);
2996       __ Pxor(i.OutputSimd128Register(), tmp);
2997       break;
2998     }
2999     case kX64I32x4GtS: {
3000       __ Pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
3001       break;
3002     }
3003     case kX64I32x4GeS: {
3004       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3005       XMMRegister dst = i.OutputSimd128Register();
3006       XMMRegister src = i.InputSimd128Register(1);
3007       __ Pminsd(dst, src);
3008       __ Pcmpeqd(dst, src);
3009       break;
3010     }
3011     case kX64I32x4UConvertF32x4: {
3012       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3013       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3014       XMMRegister dst = i.OutputSimd128Register();
3015       XMMRegister tmp = i.TempSimd128Register(0);
3016       XMMRegister tmp2 = i.TempSimd128Register(1);
3017       // NAN->0, negative->0
3018       __ Pxor(tmp2, tmp2);
3019       __ Maxps(dst, tmp2);
3020       // scratch: float representation of max_signed
3021       __ Pcmpeqd(tmp2, tmp2);
3022       __ Psrld(tmp2, static_cast<uint8_t>(1));  // 0x7fffffff
3023       __ Cvtdq2ps(tmp2, tmp2);                  // 0x4f000000
3024       // tmp: convert (src-max_signed).
3025       // Positive overflow lanes -> 0x7FFFFFFF
3026       // Negative lanes -> 0
3027       __ Movaps(tmp, dst);
3028       __ Subps(tmp, tmp2);
3029       __ Cmpleps(tmp2, tmp);
3030       __ Cvttps2dq(tmp, tmp);
3031       __ Pxor(tmp, tmp2);
3032       __ Pxor(tmp2, tmp2);
3033       __ Pmaxsd(tmp, tmp2);
3034       // convert. Overflow lanes above max_signed will be 0x80000000
3035       __ Cvttps2dq(dst, dst);
3036       // Add (src-max_signed) for overflow lanes.
3037       __ Paddd(dst, tmp);
3038       break;
3039     }
3040     case kX64I32x4UConvertI16x8Low: {
3041       __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3042       break;
3043     }
3044     case kX64I32x4UConvertI16x8High: {
3045       XMMRegister dst = i.OutputSimd128Register();
3046       __ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
3047       __ Pmovzxwd(dst, dst);
3048       break;
3049     }
3050     case kX64I32x4ShrU: {
3051       // Take shift value modulo 2^5.
3052       ASSEMBLE_SIMD_SHIFT(Psrld, 5);
3053       break;
3054     }
3055     case kX64I32x4MinU: {
3056       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3057       __ Pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
3058       break;
3059     }
3060     case kX64I32x4MaxU: {
3061       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3062       __ Pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
3063       break;
3064     }
3065     case kX64I32x4GtU: {
3066       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3067       XMMRegister dst = i.OutputSimd128Register();
3068       XMMRegister src = i.InputSimd128Register(1);
3069       XMMRegister tmp = i.TempSimd128Register(0);
3070       __ Pmaxud(dst, src);
3071       __ Pcmpeqd(dst, src);
3072       __ Pcmpeqd(tmp, tmp);
3073       __ Pxor(dst, tmp);
3074       break;
3075     }
3076     case kX64I32x4GeU: {
3077       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3078       XMMRegister dst = i.OutputSimd128Register();
3079       XMMRegister src = i.InputSimd128Register(1);
3080       __ Pminud(dst, src);
3081       __ Pcmpeqd(dst, src);
3082       break;
3083     }
3084     case kX64I32x4Abs: {
3085       __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3086       break;
3087     }
3088     case kX64S128Zero: {
3089       XMMRegister dst = i.OutputSimd128Register();
3090       __ Xorps(dst, dst);
3091       break;
3092     }
3093     case kX64I16x8Splat: {
3094       XMMRegister dst = i.OutputSimd128Register();
3095       if (HasRegisterInput(instr, 0)) {
3096         __ Movd(dst, i.InputRegister(0));
3097       } else {
3098         __ Movd(dst, i.InputOperand(0));
3099       }
3100       __ Pshuflw(dst, dst, static_cast<uint8_t>(0x0));
3101       __ Pshufd(dst, dst, static_cast<uint8_t>(0x0));
3102       break;
3103     }
3104     case kX64I16x8ExtractLaneU: {
3105       Register dst = i.OutputRegister();
3106       __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
3107       break;
3108     }
3109     case kX64I16x8ExtractLaneS: {
3110       Register dst = i.OutputRegister();
3111       __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
3112       __ movsxwl(dst, dst);
3113       break;
3114     }
3115     case kX64I16x8ReplaceLane: {
3116       if (HasRegisterInput(instr, 2)) {
3117         __ Pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
3118                   i.InputInt8(1));
3119       } else {
3120         __ Pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
3121       }
3122       break;
3123     }
3124     case kX64I16x8SConvertI8x16Low: {
3125       __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3126       break;
3127     }
3128     case kX64I16x8SConvertI8x16High: {
3129       XMMRegister dst = i.OutputSimd128Register();
3130       __ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
3131       __ Pmovsxbw(dst, dst);
3132       break;
3133     }
3134     case kX64I16x8Neg: {
3135       XMMRegister dst = i.OutputSimd128Register();
3136       XMMRegister src = i.InputSimd128Register(0);
3137       if (dst == src) {
3138         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3139         __ Psignw(dst, kScratchDoubleReg);
3140       } else {
3141         __ Pxor(dst, dst);
3142         __ Psubw(dst, src);
3143       }
3144       break;
3145     }
3146     case kX64I16x8Shl: {
3147       // Take shift value modulo 2^4.
3148       ASSEMBLE_SIMD_SHIFT(Psllw, 4);
3149       break;
3150     }
3151     case kX64I16x8ShrS: {
3152       // Take shift value modulo 2^4.
3153       ASSEMBLE_SIMD_SHIFT(Psraw, 4);
3154       break;
3155     }
3156     case kX64I16x8SConvertI32x4: {
3157       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3158       __ Packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3159       break;
3160     }
3161     case kX64I16x8Add: {
3162       __ Paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3163       break;
3164     }
3165     case kX64I16x8AddSaturateS: {
3166       __ Paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3167       break;
3168     }
3169     case kX64I16x8AddHoriz: {
3170       __ Phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3171       break;
3172     }
3173     case kX64I16x8Sub: {
3174       __ Psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3175       break;
3176     }
3177     case kX64I16x8SubSaturateS: {
3178       __ Psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3179       break;
3180     }
3181     case kX64I16x8Mul: {
3182       __ Pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3183       break;
3184     }
3185     case kX64I16x8MinS: {
3186       __ Pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3187       break;
3188     }
3189     case kX64I16x8MaxS: {
3190       __ Pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3191       break;
3192     }
3193     case kX64I16x8Eq: {
3194       __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3195       break;
3196     }
3197     case kX64I16x8Ne: {
3198       XMMRegister tmp = i.TempSimd128Register(0);
3199       __ Pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3200       __ Pcmpeqw(tmp, tmp);
3201       __ Pxor(i.OutputSimd128Register(), tmp);
3202       break;
3203     }
3204     case kX64I16x8GtS: {
3205       __ Pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3206       break;
3207     }
3208     case kX64I16x8GeS: {
3209       XMMRegister dst = i.OutputSimd128Register();
3210       XMMRegister src = i.InputSimd128Register(1);
3211       __ Pminsw(dst, src);
3212       __ Pcmpeqw(dst, src);
3213       break;
3214     }
3215     case kX64I16x8UConvertI8x16Low: {
3216       __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3217       break;
3218     }
3219     case kX64I16x8UConvertI8x16High: {
3220       XMMRegister dst = i.OutputSimd128Register();
3221       __ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
3222       __ Pmovzxbw(dst, dst);
3223       break;
3224     }
3225     case kX64I16x8ShrU: {
3226       // Take shift value modulo 2^4.
3227       ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
3228       break;
3229     }
3230     case kX64I16x8UConvertI32x4: {
3231       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3232       __ Packusdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3233       break;
3234     }
3235     case kX64I16x8AddSaturateU: {
3236       __ Paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3237       break;
3238     }
3239     case kX64I16x8SubSaturateU: {
3240       __ Psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3241       break;
3242     }
3243     case kX64I16x8MinU: {
3244       __ Pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3245       break;
3246     }
3247     case kX64I16x8MaxU: {
3248       __ Pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3249       break;
3250     }
3251     case kX64I16x8GtU: {
3252       XMMRegister dst = i.OutputSimd128Register();
3253       XMMRegister src = i.InputSimd128Register(1);
3254       XMMRegister tmp = i.TempSimd128Register(0);
3255       __ Pmaxuw(dst, src);
3256       __ Pcmpeqw(dst, src);
3257       __ Pcmpeqw(tmp, tmp);
3258       __ Pxor(dst, tmp);
3259       break;
3260     }
3261     case kX64I16x8GeU: {
3262       XMMRegister dst = i.OutputSimd128Register();
3263       XMMRegister src = i.InputSimd128Register(1);
3264       __ Pminuw(dst, src);
3265       __ Pcmpeqw(dst, src);
3266       break;
3267     }
3268     case kX64I16x8RoundingAverageU: {
3269       __ Pavgw(i.OutputSimd128Register(), i.InputSimd128Register(1));
3270       break;
3271     }
3272     case kX64I16x8Abs: {
3273       __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3274       break;
3275     }
3276     case kX64I8x16Splat: {
3277       XMMRegister dst = i.OutputSimd128Register();
3278       if (HasRegisterInput(instr, 0)) {
3279         __ Movd(dst, i.InputRegister(0));
3280       } else {
3281         __ Movd(dst, i.InputOperand(0));
3282       }
3283       __ Xorps(kScratchDoubleReg, kScratchDoubleReg);
3284       __ Pshufb(dst, kScratchDoubleReg);
3285       break;
3286     }
3287     case kX64I8x16ExtractLaneU: {
3288       Register dst = i.OutputRegister();
3289       __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
3290       break;
3291     }
3292     case kX64I8x16ExtractLaneS: {
3293       Register dst = i.OutputRegister();
3294       __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
3295       __ movsxbl(dst, dst);
3296       break;
3297     }
3298     case kX64I8x16ReplaceLane: {
3299       if (HasRegisterInput(instr, 2)) {
3300         __ Pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
3301                   i.InputInt8(1));
3302       } else {
3303         __ Pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
3304       }
3305       break;
3306     }
3307     case kX64I8x16SConvertI16x8: {
3308       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3309       __ Packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3310       break;
3311     }
3312     case kX64I8x16Neg: {
3313       XMMRegister dst = i.OutputSimd128Register();
3314       XMMRegister src = i.InputSimd128Register(0);
3315       if (dst == src) {
3316         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3317         __ Psignb(dst, kScratchDoubleReg);
3318       } else {
3319         __ Pxor(dst, dst);
3320         __ Psubb(dst, src);
3321       }
3322       break;
3323     }
3324     case kX64I8x16Shl: {
3325       XMMRegister dst = i.OutputSimd128Register();
3326       DCHECK_EQ(dst, i.InputSimd128Register(0));
3327       // Temp registers for shift mask and additional moves to XMM registers.
3328       Register tmp = i.ToRegister(instr->TempAt(0));
3329       XMMRegister tmp_simd = i.TempSimd128Register(1);
3330       if (HasImmediateInput(instr, 1)) {
3331         // Perform 16-bit shift, then mask away low bits.
3332         uint8_t shift = i.InputInt3(1);
3333         __ Psllw(dst, static_cast<byte>(shift));
3334 
3335         uint8_t bmask = static_cast<uint8_t>(0xff << shift);
3336         uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3337         __ movl(tmp, Immediate(mask));
3338         __ Movd(tmp_simd, tmp);
3339         __ Pshufd(tmp_simd, tmp_simd, static_cast<uint8_t>(0));
3340         __ Pand(dst, tmp_simd);
3341       } else {
3342         Register shift = i.InputRegister(1);
3343         // Mask off the unwanted bits before word-shifting.
3344         __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3345         // Take shift value modulo 8.
3346         __ andq(shift, Immediate(7));
3347         __ movq(tmp, shift);
3348         __ addq(tmp, Immediate(8));
3349         __ Movq(tmp_simd, tmp);
3350         __ Psrlw(kScratchDoubleReg, tmp_simd);
3351         __ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
3352         __ Pand(dst, kScratchDoubleReg);
3353         __ Movq(tmp_simd, shift);
3354         __ Psllw(dst, tmp_simd);
3355       }
3356       break;
3357     }
3358     case kX64I8x16ShrS: {
3359       XMMRegister dst = i.OutputSimd128Register();
3360       DCHECK_EQ(dst, i.InputSimd128Register(0));
3361       if (HasImmediateInput(instr, 1)) {
3362         __ Punpckhbw(kScratchDoubleReg, dst);
3363         __ Punpcklbw(dst, dst);
3364         uint8_t shift = i.InputInt3(1) + 8;
3365         __ Psraw(kScratchDoubleReg, shift);
3366         __ Psraw(dst, shift);
3367         __ Packsswb(dst, kScratchDoubleReg);
3368       } else {
3369         // Temp registers for shift mask andadditional moves to XMM registers.
3370         Register tmp = i.ToRegister(instr->TempAt(0));
3371         XMMRegister tmp_simd = i.TempSimd128Register(1);
3372         // Unpack the bytes into words, do arithmetic shifts, and repack.
3373         __ Punpckhbw(kScratchDoubleReg, dst);
3374         __ Punpcklbw(dst, dst);
3375         // Prepare shift value
3376         __ movq(tmp, i.InputRegister(1));
3377         // Take shift value modulo 8.
3378         __ andq(tmp, Immediate(7));
3379         __ addq(tmp, Immediate(8));
3380         __ Movq(tmp_simd, tmp);
3381         __ Psraw(kScratchDoubleReg, tmp_simd);
3382         __ Psraw(dst, tmp_simd);
3383         __ Packsswb(dst, kScratchDoubleReg);
3384       }
3385       break;
3386     }
3387     case kX64I8x16Add: {
3388       __ Paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3389       break;
3390     }
3391     case kX64I8x16AddSaturateS: {
3392       __ Paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3393       break;
3394     }
3395     case kX64I8x16Sub: {
3396       __ Psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3397       break;
3398     }
3399     case kX64I8x16SubSaturateS: {
3400       __ Psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3401       break;
3402     }
3403     case kX64I8x16Mul: {
3404       XMMRegister dst = i.OutputSimd128Register();
3405       DCHECK_EQ(dst, i.InputSimd128Register(0));
3406       XMMRegister right = i.InputSimd128Register(1);
3407       XMMRegister tmp = i.TempSimd128Register(0);
3408       // I16x8 view of I8x16
3409       // left = AAaa AAaa ... AAaa AAaa
3410       // right= BBbb BBbb ... BBbb BBbb
3411       // t = 00AA 00AA ... 00AA 00AA
3412       // s = 00BB 00BB ... 00BB 00BB
3413       __ Movaps(tmp, dst);
3414       __ Movaps(kScratchDoubleReg, right);
3415       __ Psrlw(tmp, static_cast<byte>(8));
3416       __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3417       // dst = left * 256
3418       __ Psllw(dst, static_cast<byte>(8));
3419       // t = I16x8Mul(t, s)
3420       //    => __PP __PP ...  __PP  __PP
3421       __ Pmullw(tmp, kScratchDoubleReg);
3422       // dst = I16x8Mul(left * 256, right)
3423       //    => pp__ pp__ ...  pp__  pp__
3424       __ Pmullw(dst, right);
3425       // t = I16x8Shl(t, 8)
3426       //    => PP00 PP00 ...  PP00  PP00
3427       __ Psllw(tmp, static_cast<byte>(8));
3428       // dst = I16x8Shr(dst, 8)
3429       //    => 00pp 00pp ...  00pp  00pp
3430       __ Psrlw(dst, static_cast<byte>(8));
3431       // dst = I16x8Or(dst, t)
3432       //    => PPpp PPpp ...  PPpp  PPpp
3433       __ Por(dst, tmp);
3434       break;
3435     }
3436     case kX64I8x16MinS: {
3437       __ Pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3438       break;
3439     }
3440     case kX64I8x16MaxS: {
3441       __ Pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3442       break;
3443     }
3444     case kX64I8x16Eq: {
3445       __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3446       break;
3447     }
3448     case kX64I8x16Ne: {
3449       XMMRegister tmp = i.TempSimd128Register(0);
3450       __ Pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3451       __ Pcmpeqb(tmp, tmp);
3452       __ Pxor(i.OutputSimd128Register(), tmp);
3453       break;
3454     }
3455     case kX64I8x16GtS: {
3456       __ Pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3457       break;
3458     }
3459     case kX64I8x16GeS: {
3460       XMMRegister dst = i.OutputSimd128Register();
3461       XMMRegister src = i.InputSimd128Register(1);
3462       __ Pminsb(dst, src);
3463       __ Pcmpeqb(dst, src);
3464       break;
3465     }
3466     case kX64I8x16UConvertI16x8: {
3467       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3468       __ Packuswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3469       break;
3470     }
3471     case kX64I8x16ShrU: {
3472       XMMRegister dst = i.OutputSimd128Register();
3473       // Unpack the bytes into words, do logical shifts, and repack.
3474       DCHECK_EQ(dst, i.InputSimd128Register(0));
3475       // Temp registers for shift mask andadditional moves to XMM registers.
3476       Register tmp = i.ToRegister(instr->TempAt(0));
3477       XMMRegister tmp_simd = i.TempSimd128Register(1);
3478       if (HasImmediateInput(instr, 1)) {
3479         // Perform 16-bit shift, then mask away high bits.
3480         uint8_t shift = i.InputInt3(1);
3481         __ Psrlw(dst, static_cast<byte>(shift));
3482 
3483         uint8_t bmask = 0xff >> shift;
3484         uint32_t mask = bmask << 24 | bmask << 16 | bmask << 8 | bmask;
3485         __ movl(tmp, Immediate(mask));
3486         __ Movd(tmp_simd, tmp);
3487         __ Pshufd(tmp_simd, tmp_simd, static_cast<byte>(0));
3488         __ Pand(dst, tmp_simd);
3489       } else {
3490         __ Punpckhbw(kScratchDoubleReg, dst);
3491         __ Punpcklbw(dst, dst);
3492         // Prepare shift value
3493         __ movq(tmp, i.InputRegister(1));
3494         // Take shift value modulo 8.
3495         __ andq(tmp, Immediate(7));
3496         __ addq(tmp, Immediate(8));
3497         __ Movq(tmp_simd, tmp);
3498         __ Psrlw(kScratchDoubleReg, tmp_simd);
3499         __ Psrlw(dst, tmp_simd);
3500         __ Packuswb(dst, kScratchDoubleReg);
3501       }
3502       break;
3503     }
3504     case kX64I8x16AddSaturateU: {
3505       __ Paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3506       break;
3507     }
3508     case kX64I8x16SubSaturateU: {
3509       __ Psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3510       break;
3511     }
3512     case kX64I8x16MinU: {
3513       __ Pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
3514       break;
3515     }
3516     case kX64I8x16MaxU: {
3517       __ Pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
3518       break;
3519     }
3520     case kX64I8x16GtU: {
3521       XMMRegister dst = i.OutputSimd128Register();
3522       XMMRegister src = i.InputSimd128Register(1);
3523       XMMRegister tmp = i.TempSimd128Register(0);
3524       __ Pmaxub(dst, src);
3525       __ Pcmpeqb(dst, src);
3526       __ Pcmpeqb(tmp, tmp);
3527       __ Pxor(dst, tmp);
3528       break;
3529     }
3530     case kX64I8x16GeU: {
3531       XMMRegister dst = i.OutputSimd128Register();
3532       XMMRegister src = i.InputSimd128Register(1);
3533       __ Pminub(dst, src);
3534       __ Pcmpeqb(dst, src);
3535       break;
3536     }
3537     case kX64I8x16RoundingAverageU: {
3538       __ Pavgb(i.OutputSimd128Register(), i.InputSimd128Register(1));
3539       break;
3540     }
3541     case kX64I8x16Abs: {
3542       __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3543       break;
3544     }
3545     case kX64S128And: {
3546       __ Pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
3547       break;
3548     }
3549     case kX64S128Or: {
3550       __ Por(i.OutputSimd128Register(), i.InputSimd128Register(1));
3551       break;
3552     }
3553     case kX64S128Xor: {
3554       __ Pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
3555       break;
3556     }
3557     case kX64S128Not: {
3558       XMMRegister dst = i.OutputSimd128Register();
3559       XMMRegister src = i.InputSimd128Register(0);
3560       if (dst == src) {
3561         __ Movaps(kScratchDoubleReg, dst);
3562         __ Pcmpeqd(dst, dst);
3563         __ Pxor(dst, kScratchDoubleReg);
3564       } else {
3565         __ Pcmpeqd(dst, dst);
3566         __ Pxor(dst, src);
3567       }
3568 
3569       break;
3570     }
3571     case kX64S128Select: {
3572       // Mask used here is stored in dst.
3573       XMMRegister dst = i.OutputSimd128Register();
3574       __ Movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3575       __ Xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3576       __ Andps(dst, kScratchDoubleReg);
3577       __ Xorps(dst, i.InputSimd128Register(2));
3578       break;
3579     }
3580     case kX64S128AndNot: {
3581       XMMRegister dst = i.OutputSimd128Register();
3582       DCHECK_EQ(dst, i.InputSimd128Register(0));
3583       // The inputs have been inverted by instruction selector, so we can call
3584       // andnps here without any modifications.
3585       __ Andnps(dst, i.InputSimd128Register(1));
3586       break;
3587     }
3588     case kX64S8x16Swizzle: {
3589       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3590       XMMRegister dst = i.OutputSimd128Register();
3591       XMMRegister mask = i.TempSimd128Register(0);
3592 
3593       // Out-of-range indices should return 0, add 112 so that any value > 15
3594       // saturates to 128 (top bit set), so pshufb will zero that lane.
3595       __ Move(mask, static_cast<uint32_t>(0x70707070));
3596       __ Pshufd(mask, mask, static_cast<uint8_t>(0x0));
3597       __ Paddusb(mask, i.InputSimd128Register(1));
3598       __ Pshufb(dst, mask);
3599       break;
3600     }
3601     case kX64S8x16Shuffle: {
3602       XMMRegister dst = i.OutputSimd128Register();
3603       Register tmp = i.TempRegister(0);
3604       // Prepare 16 byte aligned buffer for shuffle control mask
3605       __ movq(tmp, rsp);
3606       __ andq(rsp, Immediate(-16));
3607       if (instr->InputCount() == 5) {  // only one input operand
3608         uint32_t mask[4] = {};
3609         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3610         for (int j = 4; j > 0; j--) {
3611           mask[j - 1] = i.InputUint32(j);
3612         }
3613 
3614         SetupShuffleMaskOnStack(tasm(), mask);
3615         __ Pshufb(dst, Operand(rsp, 0));
3616       } else {  // two input operands
3617         DCHECK_EQ(6, instr->InputCount());
3618         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 0);
3619         uint32_t mask[4] = {};
3620         for (int j = 5; j > 1; j--) {
3621           uint32_t lanes = i.InputUint32(j);
3622           for (int k = 0; k < 32; k += 8) {
3623             uint8_t lane = lanes >> k;
3624             mask[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3625           }
3626         }
3627         SetupShuffleMaskOnStack(tasm(), mask);
3628         __ Pshufb(kScratchDoubleReg, Operand(rsp, 0));
3629         uint32_t mask1[4] = {};
3630         if (instr->InputAt(1)->IsSimd128Register()) {
3631           XMMRegister src1 = i.InputSimd128Register(1);
3632           if (src1 != dst) __ movups(dst, src1);
3633         } else {
3634           __ Movups(dst, i.InputOperand(1));
3635         }
3636         for (int j = 5; j > 1; j--) {
3637           uint32_t lanes = i.InputUint32(j);
3638           for (int k = 0; k < 32; k += 8) {
3639             uint8_t lane = lanes >> k;
3640             mask1[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3641           }
3642         }
3643         SetupShuffleMaskOnStack(tasm(), mask1);
3644         __ Pshufb(dst, Operand(rsp, 0));
3645         __ Por(dst, kScratchDoubleReg);
3646       }
3647       __ movq(rsp, tmp);
3648       break;
3649     }
3650     case kX64S8x16LoadSplat: {
3651       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3652       __ Pinsrb(i.OutputSimd128Register(), i.MemoryOperand(), 0);
3653       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3654       __ Pshufb(i.OutputSimd128Register(), kScratchDoubleReg);
3655       break;
3656     }
3657     case kX64S16x8LoadSplat: {
3658       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3659       __ Pinsrw(i.OutputSimd128Register(), i.MemoryOperand(), 0);
3660       __ Pshuflw(i.OutputSimd128Register(), i.OutputSimd128Register(),
3661                  static_cast<uint8_t>(0));
3662       __ Punpcklqdq(i.OutputSimd128Register(), i.OutputSimd128Register());
3663       break;
3664     }
3665     case kX64S32x4LoadSplat: {
3666       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3667       if (CpuFeatures::IsSupported(AVX)) {
3668         CpuFeatureScope avx_scope(tasm(), AVX);
3669         __ vbroadcastss(i.OutputSimd128Register(), i.MemoryOperand());
3670       } else {
3671         __ Movss(i.OutputSimd128Register(), i.MemoryOperand());
3672         __ Shufps(i.OutputSimd128Register(), i.OutputSimd128Register(),
3673                   static_cast<byte>(0));
3674       }
3675       break;
3676     }
3677     case kX64S64x2LoadSplat: {
3678       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3679       __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3680       break;
3681     }
3682     case kX64I16x8Load8x8S: {
3683       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3684       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3685       __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3686       break;
3687     }
3688     case kX64I16x8Load8x8U: {
3689       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3690       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3691       __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3692       break;
3693     }
3694     case kX64I32x4Load16x4S: {
3695       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3696       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3697       __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3698       break;
3699     }
3700     case kX64I32x4Load16x4U: {
3701       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3702       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3703       __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3704       break;
3705     }
3706     case kX64I64x2Load32x2S: {
3707       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3708       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3709       __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3710       break;
3711     }
3712     case kX64I64x2Load32x2U: {
3713       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3714       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3715       __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3716       break;
3717     }
3718     case kX64S32x4Swizzle: {
3719       DCHECK_EQ(2, instr->InputCount());
3720       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
3721                               i.InputUint8(1));
3722       break;
3723     }
3724     case kX64S32x4Shuffle: {
3725       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
3726       uint8_t shuffle = i.InputUint8(2);
3727       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
3728       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
3729       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
3730       __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3731       break;
3732     }
3733     case kX64S16x8Blend: {
3734       ASSEMBLE_SIMD_IMM_SHUFFLE(Pblendw, i.InputUint8(2));
3735       break;
3736     }
3737     case kX64S16x8HalfShuffle1: {
3738       XMMRegister dst = i.OutputSimd128Register();
3739       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(1));
3740       __ Pshufhw(dst, dst, i.InputUint8(2));
3741       break;
3742     }
3743     case kX64S16x8HalfShuffle2: {
3744       XMMRegister dst = i.OutputSimd128Register();
3745       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
3746       __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3747       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
3748       __ Pshufhw(dst, dst, i.InputUint8(3));
3749       __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3750       break;
3751     }
3752     case kX64S8x16Alignr: {
3753       ASSEMBLE_SIMD_IMM_SHUFFLE(Palignr, i.InputUint8(2));
3754       break;
3755     }
3756     case kX64S16x8Dup: {
3757       XMMRegister dst = i.OutputSimd128Register();
3758       uint8_t lane = i.InputInt8(1) & 0x7;
3759       uint8_t lane4 = lane & 0x3;
3760       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3761       if (lane < 4) {
3762         ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
3763         __ Pshufd(dst, dst, static_cast<uint8_t>(0));
3764       } else {
3765         ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
3766         __ Pshufd(dst, dst, static_cast<uint8_t>(0xaa));
3767       }
3768       break;
3769     }
3770     case kX64S8x16Dup: {
3771       XMMRegister dst = i.OutputSimd128Register();
3772       uint8_t lane = i.InputInt8(1) & 0xf;
3773       DCHECK_EQ(dst, i.InputSimd128Register(0));
3774       if (lane < 8) {
3775         __ Punpcklbw(dst, dst);
3776       } else {
3777         __ Punpckhbw(dst, dst);
3778       }
3779       lane &= 0x7;
3780       uint8_t lane4 = lane & 0x3;
3781       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3782       if (lane < 4) {
3783         __ Pshuflw(dst, dst, half_dup);
3784         __ Pshufd(dst, dst, static_cast<uint8_t>(0));
3785       } else {
3786         __ Pshufhw(dst, dst, half_dup);
3787         __ Pshufd(dst, dst, static_cast<uint8_t>(0xaa));
3788       }
3789       break;
3790     }
3791     case kX64S64x2UnpackHigh:
3792       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhqdq);
3793       break;
3794     case kX64S32x4UnpackHigh:
3795       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhdq);
3796       break;
3797     case kX64S16x8UnpackHigh:
3798       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhwd);
3799       break;
3800     case kX64S8x16UnpackHigh:
3801       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckhbw);
3802       break;
3803     case kX64S64x2UnpackLow:
3804       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklqdq);
3805       break;
3806     case kX64S32x4UnpackLow:
3807       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpckldq);
3808       break;
3809     case kX64S16x8UnpackLow:
3810       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklwd);
3811       break;
3812     case kX64S8x16UnpackLow:
3813       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(Punpcklbw);
3814       break;
3815     case kX64S16x8UnzipHigh: {
3816       XMMRegister dst = i.OutputSimd128Register();
3817       XMMRegister src2 = dst;
3818       DCHECK_EQ(dst, i.InputSimd128Register(0));
3819       if (instr->InputCount() == 2) {
3820         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3821         __ Psrld(kScratchDoubleReg, static_cast<byte>(16));
3822         src2 = kScratchDoubleReg;
3823       }
3824       __ Psrld(dst, static_cast<byte>(16));
3825       __ Packusdw(dst, src2);
3826       break;
3827     }
3828     case kX64S16x8UnzipLow: {
3829       XMMRegister dst = i.OutputSimd128Register();
3830       XMMRegister src2 = dst;
3831       DCHECK_EQ(dst, i.InputSimd128Register(0));
3832       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3833       if (instr->InputCount() == 2) {
3834         ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1,
3835                                 static_cast<uint8_t>(0x55));
3836         src2 = kScratchDoubleReg;
3837       }
3838       __ Pblendw(dst, kScratchDoubleReg, static_cast<uint8_t>(0xaa));
3839       __ Packusdw(dst, src2);
3840       break;
3841     }
3842     case kX64S8x16UnzipHigh: {
3843       XMMRegister dst = i.OutputSimd128Register();
3844       XMMRegister src2 = dst;
3845       DCHECK_EQ(dst, i.InputSimd128Register(0));
3846       if (instr->InputCount() == 2) {
3847         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3848         __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3849         src2 = kScratchDoubleReg;
3850       }
3851       __ Psrlw(dst, static_cast<byte>(8));
3852       __ Packuswb(dst, src2);
3853       break;
3854     }
3855     case kX64S8x16UnzipLow: {
3856       XMMRegister dst = i.OutputSimd128Register();
3857       XMMRegister src2 = dst;
3858       DCHECK_EQ(dst, i.InputSimd128Register(0));
3859       if (instr->InputCount() == 2) {
3860         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3861         __ Psllw(kScratchDoubleReg, static_cast<byte>(8));
3862         __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3863         src2 = kScratchDoubleReg;
3864       }
3865       __ Psllw(dst, static_cast<byte>(8));
3866       __ Psrlw(dst, static_cast<byte>(8));
3867       __ Packuswb(dst, src2);
3868       break;
3869     }
3870     case kX64S8x16TransposeLow: {
3871       XMMRegister dst = i.OutputSimd128Register();
3872       DCHECK_EQ(dst, i.InputSimd128Register(0));
3873       __ Psllw(dst, static_cast<byte>(8));
3874       if (instr->InputCount() == 1) {
3875         __ Movups(kScratchDoubleReg, dst);
3876       } else {
3877         DCHECK_EQ(2, instr->InputCount());
3878         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3879         __ Psllw(kScratchDoubleReg, static_cast<byte>(8));
3880       }
3881       __ Psrlw(dst, static_cast<byte>(8));
3882       __ Por(dst, kScratchDoubleReg);
3883       break;
3884     }
3885     case kX64S8x16TransposeHigh: {
3886       XMMRegister dst = i.OutputSimd128Register();
3887       DCHECK_EQ(dst, i.InputSimd128Register(0));
3888       __ Psrlw(dst, static_cast<byte>(8));
3889       if (instr->InputCount() == 1) {
3890         __ Movups(kScratchDoubleReg, dst);
3891       } else {
3892         DCHECK_EQ(2, instr->InputCount());
3893         ASSEMBLE_SIMD_INSTR(Movups, kScratchDoubleReg, 1);
3894         __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3895       }
3896       __ Psllw(kScratchDoubleReg, static_cast<byte>(8));
3897       __ Por(dst, kScratchDoubleReg);
3898       break;
3899     }
3900     case kX64S8x8Reverse:
3901     case kX64S8x4Reverse:
3902     case kX64S8x2Reverse: {
3903       DCHECK_EQ(1, instr->InputCount());
3904       XMMRegister dst = i.OutputSimd128Register();
3905       DCHECK_EQ(dst, i.InputSimd128Register(0));
3906       if (arch_opcode != kX64S8x2Reverse) {
3907         // First shuffle words into position.
3908         uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
3909         __ Pshuflw(dst, dst, shuffle_mask);
3910         __ Pshufhw(dst, dst, shuffle_mask);
3911       }
3912       __ Movaps(kScratchDoubleReg, dst);
3913       __ Psrlw(kScratchDoubleReg, static_cast<byte>(8));
3914       __ Psllw(dst, static_cast<byte>(8));
3915       __ Por(dst, kScratchDoubleReg);
3916       break;
3917     }
3918     case kX64S1x2AnyTrue:
3919     case kX64S1x4AnyTrue:
3920     case kX64S1x8AnyTrue:
3921     case kX64S1x16AnyTrue: {
3922       CpuFeatureScope sse_scope(tasm(), SSE4_1);
3923       Register dst = i.OutputRegister();
3924       XMMRegister src = i.InputSimd128Register(0);
3925 
3926       __ xorq(dst, dst);
3927       __ Ptest(src, src);
3928       __ setcc(not_equal, dst);
3929       break;
3930     }
3931     // Need to split up all the different lane structures because the
3932     // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
3933     // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
3934     // respectively.
3935     case kX64S1x2AllTrue: {
3936       ASSEMBLE_SIMD_ALL_TRUE(pcmpeqq);
3937       break;
3938     }
3939     case kX64S1x4AllTrue: {
3940       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
3941       break;
3942     }
3943     case kX64S1x8AllTrue: {
3944       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
3945       break;
3946     }
3947     case kX64S1x16AllTrue: {
3948       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
3949       break;
3950     }
3951     case kWord32AtomicExchangeInt8: {
3952       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3953       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
3954       break;
3955     }
3956     case kWord32AtomicExchangeUint8: {
3957       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3958       __ movzxbl(i.InputRegister(0), i.InputRegister(0));
3959       break;
3960     }
3961     case kWord32AtomicExchangeInt16: {
3962       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3963       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
3964       break;
3965     }
3966     case kWord32AtomicExchangeUint16: {
3967       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3968       __ movzxwl(i.InputRegister(0), i.InputRegister(0));
3969       break;
3970     }
3971     case kWord32AtomicExchangeWord32: {
3972       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3973       break;
3974     }
3975     case kWord32AtomicCompareExchangeInt8: {
3976       __ lock();
3977       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3978       __ movsxbl(rax, rax);
3979       break;
3980     }
3981     case kWord32AtomicCompareExchangeUint8: {
3982       __ lock();
3983       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3984       __ movzxbl(rax, rax);
3985       break;
3986     }
3987     case kWord32AtomicCompareExchangeInt16: {
3988       __ lock();
3989       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3990       __ movsxwl(rax, rax);
3991       break;
3992     }
3993     case kWord32AtomicCompareExchangeUint16: {
3994       __ lock();
3995       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3996       __ movzxwl(rax, rax);
3997       break;
3998     }
3999     case kWord32AtomicCompareExchangeWord32: {
4000       __ lock();
4001       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4002       break;
4003     }
4004 #define ATOMIC_BINOP_CASE(op, inst)              \
4005   case kWord32Atomic##op##Int8:                  \
4006     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
4007     __ movsxbl(rax, rax);                        \
4008     break;                                       \
4009   case kWord32Atomic##op##Uint8:                 \
4010     ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
4011     __ movzxbl(rax, rax);                        \
4012     break;                                       \
4013   case kWord32Atomic##op##Int16:                 \
4014     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
4015     __ movsxwl(rax, rax);                        \
4016     break;                                       \
4017   case kWord32Atomic##op##Uint16:                \
4018     ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
4019     __ movzxwl(rax, rax);                        \
4020     break;                                       \
4021   case kWord32Atomic##op##Word32:                \
4022     ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
4023     break;
4024       ATOMIC_BINOP_CASE(Add, addl)
4025       ATOMIC_BINOP_CASE(Sub, subl)
4026       ATOMIC_BINOP_CASE(And, andl)
4027       ATOMIC_BINOP_CASE(Or, orl)
4028       ATOMIC_BINOP_CASE(Xor, xorl)
4029 #undef ATOMIC_BINOP_CASE
4030     case kX64Word64AtomicExchangeUint8: {
4031       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4032       __ movzxbq(i.InputRegister(0), i.InputRegister(0));
4033       break;
4034     }
4035     case kX64Word64AtomicExchangeUint16: {
4036       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4037       __ movzxwq(i.InputRegister(0), i.InputRegister(0));
4038       break;
4039     }
4040     case kX64Word64AtomicExchangeUint32: {
4041       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
4042       break;
4043     }
4044     case kX64Word64AtomicExchangeUint64: {
4045       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
4046       break;
4047     }
4048     case kX64Word64AtomicCompareExchangeUint8: {
4049       __ lock();
4050       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4051       __ movzxbq(rax, rax);
4052       break;
4053     }
4054     case kX64Word64AtomicCompareExchangeUint16: {
4055       __ lock();
4056       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4057       __ movzxwq(rax, rax);
4058       break;
4059     }
4060     case kX64Word64AtomicCompareExchangeUint32: {
4061       __ lock();
4062       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4063       // Zero-extend the 32 bit value to 64 bit.
4064       __ movl(rax, rax);
4065       break;
4066     }
4067     case kX64Word64AtomicCompareExchangeUint64: {
4068       __ lock();
4069       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
4070       break;
4071     }
4072 #define ATOMIC64_BINOP_CASE(op, inst)              \
4073   case kX64Word64Atomic##op##Uint8:                \
4074     ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
4075     __ movzxbq(rax, rax);                          \
4076     break;                                         \
4077   case kX64Word64Atomic##op##Uint16:               \
4078     ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
4079     __ movzxwq(rax, rax);                          \
4080     break;                                         \
4081   case kX64Word64Atomic##op##Uint32:               \
4082     ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
4083     break;                                         \
4084   case kX64Word64Atomic##op##Uint64:               \
4085     ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
4086     break;
4087       ATOMIC64_BINOP_CASE(Add, addq)
4088       ATOMIC64_BINOP_CASE(Sub, subq)
4089       ATOMIC64_BINOP_CASE(And, andq)
4090       ATOMIC64_BINOP_CASE(Or, orq)
4091       ATOMIC64_BINOP_CASE(Xor, xorq)
4092 #undef ATOMIC64_BINOP_CASE
4093     case kWord32AtomicLoadInt8:
4094     case kWord32AtomicLoadUint8:
4095     case kWord32AtomicLoadInt16:
4096     case kWord32AtomicLoadUint16:
4097     case kWord32AtomicLoadWord32:
4098     case kWord32AtomicStoreWord8:
4099     case kWord32AtomicStoreWord16:
4100     case kWord32AtomicStoreWord32:
4101     case kX64Word64AtomicLoadUint8:
4102     case kX64Word64AtomicLoadUint16:
4103     case kX64Word64AtomicLoadUint32:
4104     case kX64Word64AtomicLoadUint64:
4105     case kX64Word64AtomicStoreWord8:
4106     case kX64Word64AtomicStoreWord16:
4107     case kX64Word64AtomicStoreWord32:
4108     case kX64Word64AtomicStoreWord64:
4109       UNREACHABLE();  // Won't be generated by instruction selector.
4110       break;
4111   }
4112   return kSuccess;
4113 }  // NOLadability/fn_size)
4114 
4115 #undef ASSEMBLE_UNOP
4116 #undef ASSEMBLE_BINOP
4117 #undef ASSEMBLE_COMPARE
4118 #undef ASSEMBLE_MULT
4119 #undef ASSEMBLE_SHIFT
4120 #undef ASSEMBLE_MOVX
4121 #undef ASSEMBLE_SSE_BINOP
4122 #undef ASSEMBLE_SSE_UNOP
4123 #undef ASSEMBLE_AVX_BINOP
4124 #undef ASSEMBLE_IEEE754_BINOP
4125 #undef ASSEMBLE_IEEE754_UNOP
4126 #undef ASSEMBLE_ATOMIC_BINOP
4127 #undef ASSEMBLE_ATOMIC64_BINOP
4128 #undef ASSEMBLE_SIMD_INSTR
4129 #undef ASSEMBLE_SIMD_IMM_INSTR
4130 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4131 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4132 #undef ASSEMBLE_SIMD_ALL_TRUE
4133 #undef ASSEMBLE_SIMD_SHIFT
4134 
4135 namespace {
4136 
FlagsConditionToCondition(FlagsCondition condition)4137 Condition FlagsConditionToCondition(FlagsCondition condition) {
4138   switch (condition) {
4139     case kUnorderedEqual:
4140     case kEqual:
4141       return equal;
4142     case kUnorderedNotEqual:
4143     case kNotEqual:
4144       return not_equal;
4145     case kSignedLessThan:
4146       return less;
4147     case kSignedGreaterThanOrEqual:
4148       return greater_equal;
4149     case kSignedLessThanOrEqual:
4150       return less_equal;
4151     case kSignedGreaterThan:
4152       return greater;
4153     case kUnsignedLessThan:
4154       return below;
4155     case kUnsignedGreaterThanOrEqual:
4156       return above_equal;
4157     case kUnsignedLessThanOrEqual:
4158       return below_equal;
4159     case kUnsignedGreaterThan:
4160       return above;
4161     case kOverflow:
4162       return overflow;
4163     case kNotOverflow:
4164       return no_overflow;
4165     default:
4166       break;
4167   }
4168   UNREACHABLE();
4169 }
4170 
4171 }  // namespace
4172 
4173 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4174 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4175   Label::Distance flabel_distance =
4176       branch->fallthru ? Label::kNear : Label::kFar;
4177   Label* tlabel = branch->true_label;
4178   Label* flabel = branch->false_label;
4179   if (branch->condition == kUnorderedEqual) {
4180     __ j(parity_even, flabel, flabel_distance);
4181   } else if (branch->condition == kUnorderedNotEqual) {
4182     __ j(parity_even, tlabel);
4183   }
4184   __ j(FlagsConditionToCondition(branch->condition), tlabel);
4185 
4186   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
4187 }
4188 
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)4189 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
4190                                             Instruction* instr) {
4191   // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
4192   if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
4193     return;
4194   }
4195 
4196   condition = NegateFlagsCondition(condition);
4197   __ movl(kScratchRegister, Immediate(0));
4198   __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
4199            kScratchRegister);
4200 }
4201 
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4202 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4203                                             BranchInfo* branch) {
4204   Label::Distance flabel_distance =
4205       branch->fallthru ? Label::kNear : Label::kFar;
4206   Label* tlabel = branch->true_label;
4207   Label* flabel = branch->false_label;
4208   Label nodeopt;
4209   if (branch->condition == kUnorderedEqual) {
4210     __ j(parity_even, flabel, flabel_distance);
4211   } else if (branch->condition == kUnorderedNotEqual) {
4212     __ j(parity_even, tlabel);
4213   }
4214   __ j(FlagsConditionToCondition(branch->condition), tlabel);
4215 
4216   if (FLAG_deopt_every_n_times > 0) {
4217     ExternalReference counter =
4218         ExternalReference::stress_deopt_count(isolate());
4219 
4220     __ pushfq();
4221     __ pushq(rax);
4222     __ load_rax(counter);
4223     __ decl(rax);
4224     __ j(not_zero, &nodeopt, Label::kNear);
4225 
4226     __ Set(rax, FLAG_deopt_every_n_times);
4227     __ store_rax(counter);
4228     __ popq(rax);
4229     __ popfq();
4230     __ jmp(tlabel);
4231 
4232     __ bind(&nodeopt);
4233     __ store_rax(counter);
4234     __ popq(rax);
4235     __ popfq();
4236   }
4237 
4238   if (!branch->fallthru) {
4239     __ jmp(flabel, flabel_distance);
4240   }
4241 }
4242 
AssembleArchJump(RpoNumber target)4243 void CodeGenerator::AssembleArchJump(RpoNumber target) {
4244   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
4245 }
4246 
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4247 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4248                                      FlagsCondition condition) {
4249   auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
4250   Label* tlabel = ool->entry();
4251   Label end;
4252   if (condition == kUnorderedEqual) {
4253     __ j(parity_even, &end, Label::kNear);
4254   } else if (condition == kUnorderedNotEqual) {
4255     __ j(parity_even, tlabel);
4256   }
4257   __ j(FlagsConditionToCondition(condition), tlabel);
4258   __ bind(&end);
4259 }
4260 
4261 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4262 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4263                                         FlagsCondition condition) {
4264   X64OperandConverter i(this, instr);
4265   Label done;
4266 
4267   // Materialize a full 64-bit 1 or 0 value. The result register is always the
4268   // last output of the instruction.
4269   Label check;
4270   DCHECK_NE(0u, instr->OutputCount());
4271   Register reg = i.OutputRegister(instr->OutputCount() - 1);
4272   if (condition == kUnorderedEqual) {
4273     __ j(parity_odd, &check, Label::kNear);
4274     __ movl(reg, Immediate(0));
4275     __ jmp(&done, Label::kNear);
4276   } else if (condition == kUnorderedNotEqual) {
4277     __ j(parity_odd, &check, Label::kNear);
4278     __ movl(reg, Immediate(1));
4279     __ jmp(&done, Label::kNear);
4280   }
4281   __ bind(&check);
4282   __ setcc(FlagsConditionToCondition(condition), reg);
4283   __ movzxbl(reg, reg);
4284   __ bind(&done);
4285 }
4286 
AssembleArchBinarySearchSwitch(Instruction * instr)4287 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4288   X64OperandConverter i(this, instr);
4289   Register input = i.InputRegister(0);
4290   std::vector<std::pair<int32_t, Label*>> cases;
4291   for (size_t index = 2; index < instr->InputCount(); index += 2) {
4292     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4293   }
4294   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4295                                       cases.data() + cases.size());
4296 }
4297 
AssembleArchTableSwitch(Instruction * instr)4298 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4299   X64OperandConverter i(this, instr);
4300   Register input = i.InputRegister(0);
4301   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
4302   Label** cases = zone()->NewArray<Label*>(case_count);
4303   for (int32_t index = 0; index < case_count; ++index) {
4304     cases[index] = GetLabel(i.InputRpo(index + 2));
4305   }
4306   Label* const table = AddJumpTable(cases, case_count);
4307   __ cmpl(input, Immediate(case_count));
4308   __ j(above_equal, GetLabel(i.InputRpo(1)));
4309   __ leaq(kScratchRegister, Operand(table));
4310   __ jmp(Operand(kScratchRegister, input, times_8, 0));
4311 }
4312 
4313 namespace {
4314 
4315 static const int kQuadWordSize = 16;
4316 
4317 }  // namespace
4318 
FinishFrame(Frame * frame)4319 void CodeGenerator::FinishFrame(Frame* frame) {
4320   auto call_descriptor = linkage()->GetIncomingDescriptor();
4321 
4322   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4323   if (saves_fp != 0) {
4324     frame->AlignSavedCalleeRegisterSlots();
4325     if (saves_fp != 0) {  // Save callee-saved XMM registers.
4326       const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4327       frame->AllocateSavedCalleeRegisterSlots(
4328           saves_fp_count * (kQuadWordSize / kSystemPointerSize));
4329     }
4330   }
4331   const RegList saves = call_descriptor->CalleeSavedRegisters();
4332   if (saves != 0) {  // Save callee-saved registers.
4333     int count = 0;
4334     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4335       if (((1 << i) & saves)) {
4336         ++count;
4337       }
4338     }
4339     frame->AllocateSavedCalleeRegisterSlots(count);
4340   }
4341 }
4342 
AssembleConstructFrame()4343 void CodeGenerator::AssembleConstructFrame() {
4344   auto call_descriptor = linkage()->GetIncomingDescriptor();
4345   if (frame_access_state()->has_frame()) {
4346     int pc_base = __ pc_offset();
4347 
4348     if (call_descriptor->IsCFunctionCall()) {
4349       __ pushq(rbp);
4350       __ movq(rbp, rsp);
4351       if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4352         __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4353         // Reserve stack space for saving the c_entry_fp later.
4354         __ AllocateStackSpace(kSystemPointerSize);
4355       }
4356     } else if (call_descriptor->IsJSFunctionCall()) {
4357       __ Prologue();
4358       if (call_descriptor->PushArgumentCount()) {
4359         __ pushq(kJavaScriptCallArgCountRegister);
4360       }
4361     } else {
4362       __ StubPrologue(info()->GetOutputStackFrameType());
4363       if (call_descriptor->IsWasmFunctionCall()) {
4364         __ pushq(kWasmInstanceRegister);
4365       } else if (call_descriptor->IsWasmImportWrapper() ||
4366                  call_descriptor->IsWasmCapiFunction()) {
4367         // Wasm import wrappers are passed a tuple in the place of the instance.
4368         // Unpack the tuple into the instance and the target callable.
4369         // This must be done here in the codegen because it cannot be expressed
4370         // properly in the graph.
4371         __ LoadTaggedPointerField(
4372             kJSFunctionRegister,
4373             FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
4374         __ LoadTaggedPointerField(
4375             kWasmInstanceRegister,
4376             FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
4377         __ pushq(kWasmInstanceRegister);
4378         if (call_descriptor->IsWasmCapiFunction()) {
4379           // Reserve space for saving the PC later.
4380           __ AllocateStackSpace(kSystemPointerSize);
4381         }
4382       }
4383     }
4384 
4385     unwinding_info_writer_.MarkFrameConstructed(pc_base);
4386   }
4387   int required_slots =
4388       frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4389 
4390   if (info()->is_osr()) {
4391     // TurboFan OSR-compiled functions cannot be entered directly.
4392     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4393 
4394     // Unoptimized code jumps directly to this entrypoint while the unoptimized
4395     // frame is still on the stack. Optimized code uses OSR values directly from
4396     // the unoptimized frame. Thus, all that needs to be done is to allocate the
4397     // remaining stack slots.
4398     if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
4399     osr_pc_offset_ = __ pc_offset();
4400     required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
4401     ResetSpeculationPoison();
4402   }
4403 
4404   const RegList saves = call_descriptor->CalleeSavedRegisters();
4405   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4406 
4407   if (required_slots > 0) {
4408     DCHECK(frame_access_state()->has_frame());
4409     if (info()->IsWasm() && required_slots > 128) {
4410       // For WebAssembly functions with big frames we have to do the stack
4411       // overflow check before we construct the frame. Otherwise we may not
4412       // have enough space on the stack to call the runtime for the stack
4413       // overflow.
4414       Label done;
4415 
4416       // If the frame is bigger than the stack, we throw the stack overflow
4417       // exception unconditionally. Thereby we can avoid the integer overflow
4418       // check in the condition code.
4419       if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
4420         __ movq(kScratchRegister,
4421                 FieldOperand(kWasmInstanceRegister,
4422                              WasmInstanceObject::kRealStackLimitAddressOffset));
4423         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
4424         __ addq(kScratchRegister,
4425                 Immediate(required_slots * kSystemPointerSize));
4426         __ cmpq(rsp, kScratchRegister);
4427         __ j(above_equal, &done, Label::kNear);
4428       }
4429 
4430       __ near_call(wasm::WasmCode::kWasmStackOverflow,
4431                    RelocInfo::WASM_STUB_CALL);
4432       ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
4433       RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
4434       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4435       __ bind(&done);
4436     }
4437 
4438     // Skip callee-saved and return slots, which are created below.
4439     required_slots -= base::bits::CountPopulation(saves);
4440     required_slots -= base::bits::CountPopulation(saves_fp) *
4441                       (kQuadWordSize / kSystemPointerSize);
4442     required_slots -= frame()->GetReturnSlotCount();
4443     if (required_slots > 0) {
4444       __ AllocateStackSpace(required_slots * kSystemPointerSize);
4445     }
4446   }
4447 
4448   if (saves_fp != 0) {  // Save callee-saved XMM registers.
4449     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4450     const int stack_size = saves_fp_count * kQuadWordSize;
4451     // Adjust the stack pointer.
4452     __ AllocateStackSpace(stack_size);
4453     // Store the registers on the stack.
4454     int slot_idx = 0;
4455     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4456       if (!((1 << i) & saves_fp)) continue;
4457       __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
4458                 XMMRegister::from_code(i));
4459       slot_idx++;
4460     }
4461   }
4462 
4463   if (saves != 0) {  // Save callee-saved registers.
4464     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4465       if (!((1 << i) & saves)) continue;
4466       __ pushq(Register::from_code(i));
4467     }
4468   }
4469 
4470   // Allocate return slots (located after callee-saved).
4471   if (frame()->GetReturnSlotCount() > 0) {
4472     __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4473   }
4474 }
4475 
AssembleReturn(InstructionOperand * pop)4476 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
4477   auto call_descriptor = linkage()->GetIncomingDescriptor();
4478 
4479   // Restore registers.
4480   const RegList saves = call_descriptor->CalleeSavedRegisters();
4481   if (saves != 0) {
4482     const int returns = frame()->GetReturnSlotCount();
4483     if (returns != 0) {
4484       __ addq(rsp, Immediate(returns * kSystemPointerSize));
4485     }
4486     for (int i = 0; i < Register::kNumRegisters; i++) {
4487       if (!((1 << i) & saves)) continue;
4488       __ popq(Register::from_code(i));
4489     }
4490   }
4491   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4492   if (saves_fp != 0) {
4493     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4494     const int stack_size = saves_fp_count * kQuadWordSize;
4495     // Load the registers from the stack.
4496     int slot_idx = 0;
4497     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4498       if (!((1 << i) & saves_fp)) continue;
4499       __ Movdqu(XMMRegister::from_code(i),
4500                 Operand(rsp, kQuadWordSize * slot_idx));
4501       slot_idx++;
4502     }
4503     // Adjust the stack pointer.
4504     __ addq(rsp, Immediate(stack_size));
4505   }
4506 
4507   unwinding_info_writer_.MarkBlockWillExit();
4508 
4509   // Might need rcx for scratch if pop_size is too big or if there is a variable
4510   // pop count.
4511   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
4512   DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
4513   size_t pop_size = call_descriptor->StackParameterCount() * kSystemPointerSize;
4514   X64OperandConverter g(this, nullptr);
4515   if (call_descriptor->IsCFunctionCall()) {
4516     AssembleDeconstructFrame();
4517   } else if (frame_access_state()->has_frame()) {
4518     if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
4519       // Canonicalize JSFunction return sites for now.
4520       if (return_label_.is_bound()) {
4521         __ jmp(&return_label_);
4522         return;
4523       } else {
4524         __ bind(&return_label_);
4525         AssembleDeconstructFrame();
4526       }
4527     } else {
4528       AssembleDeconstructFrame();
4529     }
4530   }
4531 
4532   if (pop->IsImmediate()) {
4533     pop_size += g.ToConstant(pop).ToInt32() * kSystemPointerSize;
4534     CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4535     __ Ret(static_cast<int>(pop_size), rcx);
4536   } else {
4537     Register pop_reg = g.ToRegister(pop);
4538     Register scratch_reg = pop_reg == rcx ? rdx : rcx;
4539     __ popq(scratch_reg);
4540     __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
4541     __ jmp(scratch_reg);
4542   }
4543 }
4544 
FinishCode()4545 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
4546 
PrepareForDeoptimizationExits(int deopt_count)4547 void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {}
4548 
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4549 void CodeGenerator::AssembleMove(InstructionOperand* source,
4550                                  InstructionOperand* destination) {
4551   X64OperandConverter g(this, nullptr);
4552   // Helper function to write the given constant to the dst register.
4553   auto MoveConstantToRegister = [&](Register dst, Constant src) {
4554     switch (src.type()) {
4555       case Constant::kInt32: {
4556         if (RelocInfo::IsWasmReference(src.rmode())) {
4557           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4558         } else {
4559           int32_t value = src.ToInt32();
4560           if (value == 0) {
4561             __ xorl(dst, dst);
4562           } else {
4563             __ movl(dst, Immediate(value));
4564           }
4565         }
4566         break;
4567       }
4568       case Constant::kInt64:
4569         if (RelocInfo::IsWasmReference(src.rmode())) {
4570           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4571         } else {
4572           __ Set(dst, src.ToInt64());
4573         }
4574         break;
4575       case Constant::kFloat32:
4576         __ MoveNumber(dst, src.ToFloat32());
4577         break;
4578       case Constant::kFloat64:
4579         __ MoveNumber(dst, src.ToFloat64().value());
4580         break;
4581       case Constant::kExternalReference:
4582         __ Move(dst, src.ToExternalReference());
4583         break;
4584       case Constant::kHeapObject: {
4585         Handle<HeapObject> src_object = src.ToHeapObject();
4586         RootIndex index;
4587         if (IsMaterializableFromRoot(src_object, &index)) {
4588           __ LoadRoot(dst, index);
4589         } else {
4590           __ Move(dst, src_object);
4591         }
4592         break;
4593       }
4594       case Constant::kCompressedHeapObject: {
4595         Handle<HeapObject> src_object = src.ToHeapObject();
4596         RootIndex index;
4597         if (IsMaterializableFromRoot(src_object, &index)) {
4598           __ LoadRoot(dst, index);
4599         } else {
4600           __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4601         }
4602         break;
4603       }
4604       case Constant::kDelayedStringConstant: {
4605         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
4606         __ MoveStringConstant(dst, src_constant);
4607         break;
4608       }
4609       case Constant::kRpoNumber:
4610         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
4611         break;
4612     }
4613   };
4614   // Helper function to write the given constant to the stack.
4615   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
4616     if (!RelocInfo::IsWasmReference(src.rmode())) {
4617       switch (src.type()) {
4618         case Constant::kInt32:
4619           __ movq(dst, Immediate(src.ToInt32()));
4620           return;
4621         case Constant::kInt64:
4622           __ Set(dst, src.ToInt64());
4623           return;
4624         default:
4625           break;
4626       }
4627     }
4628     MoveConstantToRegister(kScratchRegister, src);
4629     __ movq(dst, kScratchRegister);
4630   };
4631   // Dispatch on the source and destination operand kinds.
4632   switch (MoveType::InferMove(source, destination)) {
4633     case MoveType::kRegisterToRegister:
4634       if (source->IsRegister()) {
4635         __ movq(g.ToRegister(destination), g.ToRegister(source));
4636       } else {
4637         DCHECK(source->IsFPRegister());
4638         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4639       }
4640       return;
4641     case MoveType::kRegisterToStack: {
4642       Operand dst = g.ToOperand(destination);
4643       if (source->IsRegister()) {
4644         __ movq(dst, g.ToRegister(source));
4645       } else {
4646         DCHECK(source->IsFPRegister());
4647         XMMRegister src = g.ToDoubleRegister(source);
4648         MachineRepresentation rep =
4649             LocationOperand::cast(source)->representation();
4650         if (rep != MachineRepresentation::kSimd128) {
4651           __ Movsd(dst, src);
4652         } else {
4653           __ Movups(dst, src);
4654         }
4655       }
4656       return;
4657     }
4658     case MoveType::kStackToRegister: {
4659       Operand src = g.ToOperand(source);
4660       if (source->IsStackSlot()) {
4661         __ movq(g.ToRegister(destination), src);
4662       } else {
4663         DCHECK(source->IsFPStackSlot());
4664         XMMRegister dst = g.ToDoubleRegister(destination);
4665         MachineRepresentation rep =
4666             LocationOperand::cast(source)->representation();
4667         if (rep != MachineRepresentation::kSimd128) {
4668           __ Movsd(dst, src);
4669         } else {
4670           __ Movups(dst, src);
4671         }
4672       }
4673       return;
4674     }
4675     case MoveType::kStackToStack: {
4676       Operand src = g.ToOperand(source);
4677       Operand dst = g.ToOperand(destination);
4678       if (source->IsStackSlot()) {
4679         // Spill on demand to use a temporary register for memory-to-memory
4680         // moves.
4681         __ movq(kScratchRegister, src);
4682         __ movq(dst, kScratchRegister);
4683       } else {
4684         MachineRepresentation rep =
4685             LocationOperand::cast(source)->representation();
4686         if (rep != MachineRepresentation::kSimd128) {
4687           __ Movsd(kScratchDoubleReg, src);
4688           __ Movsd(dst, kScratchDoubleReg);
4689         } else {
4690           DCHECK(source->IsSimd128StackSlot());
4691           __ Movups(kScratchDoubleReg, src);
4692           __ Movups(dst, kScratchDoubleReg);
4693         }
4694       }
4695       return;
4696     }
4697     case MoveType::kConstantToRegister: {
4698       Constant src = g.ToConstant(source);
4699       if (destination->IsRegister()) {
4700         MoveConstantToRegister(g.ToRegister(destination), src);
4701       } else {
4702         DCHECK(destination->IsFPRegister());
4703         XMMRegister dst = g.ToDoubleRegister(destination);
4704         if (src.type() == Constant::kFloat32) {
4705           // TODO(turbofan): Can we do better here?
4706           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4707         } else {
4708           DCHECK_EQ(src.type(), Constant::kFloat64);
4709           __ Move(dst, src.ToFloat64().AsUint64());
4710         }
4711       }
4712       return;
4713     }
4714     case MoveType::kConstantToStack: {
4715       Constant src = g.ToConstant(source);
4716       Operand dst = g.ToOperand(destination);
4717       if (destination->IsStackSlot()) {
4718         MoveConstantToSlot(dst, src);
4719       } else {
4720         DCHECK(destination->IsFPStackSlot());
4721         if (src.type() == Constant::kFloat32) {
4722           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4723         } else {
4724           DCHECK_EQ(src.type(), Constant::kFloat64);
4725           __ movq(kScratchRegister, src.ToFloat64().AsUint64());
4726           __ movq(dst, kScratchRegister);
4727         }
4728       }
4729       return;
4730     }
4731   }
4732   UNREACHABLE();
4733 }
4734 
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4735 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4736                                  InstructionOperand* destination) {
4737   X64OperandConverter g(this, nullptr);
4738   // Dispatch on the source and destination operand kinds.  Not all
4739   // combinations are possible.
4740   switch (MoveType::InferSwap(source, destination)) {
4741     case MoveType::kRegisterToRegister: {
4742       if (source->IsRegister()) {
4743         Register src = g.ToRegister(source);
4744         Register dst = g.ToRegister(destination);
4745         __ movq(kScratchRegister, src);
4746         __ movq(src, dst);
4747         __ movq(dst, kScratchRegister);
4748       } else {
4749         DCHECK(source->IsFPRegister());
4750         XMMRegister src = g.ToDoubleRegister(source);
4751         XMMRegister dst = g.ToDoubleRegister(destination);
4752         __ Movapd(kScratchDoubleReg, src);
4753         __ Movapd(src, dst);
4754         __ Movapd(dst, kScratchDoubleReg);
4755       }
4756       return;
4757     }
4758     case MoveType::kRegisterToStack: {
4759       if (source->IsRegister()) {
4760         Register src = g.ToRegister(source);
4761         __ pushq(src);
4762         frame_access_state()->IncreaseSPDelta(1);
4763         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4764                                                          kSystemPointerSize);
4765         __ movq(src, g.ToOperand(destination));
4766         frame_access_state()->IncreaseSPDelta(-1);
4767         __ popq(g.ToOperand(destination));
4768         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4769                                                          -kSystemPointerSize);
4770       } else {
4771         DCHECK(source->IsFPRegister());
4772         XMMRegister src = g.ToDoubleRegister(source);
4773         Operand dst = g.ToOperand(destination);
4774         MachineRepresentation rep =
4775             LocationOperand::cast(source)->representation();
4776         if (rep != MachineRepresentation::kSimd128) {
4777           __ Movsd(kScratchDoubleReg, src);
4778           __ Movsd(src, dst);
4779           __ Movsd(dst, kScratchDoubleReg);
4780         } else {
4781           __ Movups(kScratchDoubleReg, src);
4782           __ Movups(src, dst);
4783           __ Movups(dst, kScratchDoubleReg);
4784         }
4785       }
4786       return;
4787     }
4788     case MoveType::kStackToStack: {
4789       Operand src = g.ToOperand(source);
4790       Operand dst = g.ToOperand(destination);
4791       MachineRepresentation rep =
4792           LocationOperand::cast(source)->representation();
4793       if (rep != MachineRepresentation::kSimd128) {
4794         Register tmp = kScratchRegister;
4795         __ movq(tmp, dst);
4796         __ pushq(src);  // Then use stack to copy src to destination.
4797         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4798                                                          kSystemPointerSize);
4799         __ popq(dst);
4800         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4801                                                          -kSystemPointerSize);
4802         __ movq(src, tmp);
4803       } else {
4804         // Without AVX, misaligned reads and writes will trap. Move using the
4805         // stack, in two parts.
4806         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
4807         __ pushq(src);  // Then use stack to copy src to destination.
4808         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4809                                                          kSystemPointerSize);
4810         __ popq(dst);
4811         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4812                                                          -kSystemPointerSize);
4813         __ pushq(g.ToOperand(source, kSystemPointerSize));
4814         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4815                                                          kSystemPointerSize);
4816         __ popq(g.ToOperand(destination, kSystemPointerSize));
4817         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
4818                                                          -kSystemPointerSize);
4819         __ movups(src, kScratchDoubleReg);
4820       }
4821       return;
4822     }
4823     default:
4824       UNREACHABLE();
4825   }
4826 }
4827 
AssembleJumpTable(Label ** targets,size_t target_count)4828 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4829   for (size_t index = 0; index < target_count; ++index) {
4830     __ dq(targets[index]);
4831   }
4832 }
4833 
4834 #undef __
4835 
4836 }  // namespace compiler
4837 }  // namespace internal
4838 }  // namespace v8
4839