1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <limits>
6 
7 #include "src/base/overflowing-math.h"
8 #include "src/codegen/assembler.h"
9 #include "src/codegen/cpu-features.h"
10 #include "src/codegen/external-reference.h"
11 #include "src/codegen/macro-assembler.h"
12 #include "src/codegen/optimized-compilation-info.h"
13 #include "src/codegen/x64/assembler-x64.h"
14 #include "src/codegen/x64/register-x64.h"
15 #include "src/common/globals.h"
16 #include "src/compiler/backend/code-generator-impl.h"
17 #include "src/compiler/backend/code-generator.h"
18 #include "src/compiler/backend/gap-resolver.h"
19 #include "src/compiler/backend/instruction-codes.h"
20 #include "src/compiler/node-matchers.h"
21 #include "src/compiler/osr.h"
22 #include "src/heap/memory-chunk.h"
23 #include "src/objects/code-kind.h"
24 #include "src/objects/smi.h"
25 
26 #if V8_ENABLE_WEBASSEMBLY
27 #include "src/wasm/wasm-code-manager.h"
28 #include "src/wasm/wasm-objects.h"
29 #endif  // V8_ENABLE_WEBASSEMBLY
30 
31 namespace v8 {
32 namespace internal {
33 namespace compiler {
34 
35 #define __ tasm()->
36 
37 // Adds X64 specific methods for decoding operands.
38 class X64OperandConverter : public InstructionOperandConverter {
39  public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)40   X64OperandConverter(CodeGenerator* gen, Instruction* instr)
41       : InstructionOperandConverter(gen, instr) {}
42 
InputImmediate(size_t index)43   Immediate InputImmediate(size_t index) {
44     return ToImmediate(instr_->InputAt(index));
45   }
46 
InputOperand(size_t index,int extra=0)47   Operand InputOperand(size_t index, int extra = 0) {
48     return ToOperand(instr_->InputAt(index), extra);
49   }
50 
OutputOperand()51   Operand OutputOperand() { return ToOperand(instr_->Output()); }
52 
ToImmediate(InstructionOperand * operand)53   Immediate ToImmediate(InstructionOperand* operand) {
54     Constant constant = ToConstant(operand);
55     if (constant.type() == Constant::kFloat64) {
56       DCHECK_EQ(0, constant.ToFloat64().AsUint64());
57       return Immediate(0);
58     }
59     if (RelocInfo::IsWasmReference(constant.rmode())) {
60       return Immediate(constant.ToInt32(), constant.rmode());
61     }
62     return Immediate(constant.ToInt32());
63   }
64 
ToOperand(InstructionOperand * op,int extra=0)65   Operand ToOperand(InstructionOperand* op, int extra = 0) {
66     DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
67     return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
68   }
69 
SlotToOperand(int slot_index,int extra=0)70   Operand SlotToOperand(int slot_index, int extra = 0) {
71     FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
72     return Operand(offset.from_stack_pointer() ? rsp : rbp,
73                    offset.offset() + extra);
74   }
75 
NextOffset(size_t * offset)76   static size_t NextOffset(size_t* offset) {
77     size_t i = *offset;
78     (*offset)++;
79     return i;
80   }
81 
ScaleFor(AddressingMode one,AddressingMode mode)82   static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
83     STATIC_ASSERT(0 == static_cast<int>(times_1));
84     STATIC_ASSERT(1 == static_cast<int>(times_2));
85     STATIC_ASSERT(2 == static_cast<int>(times_4));
86     STATIC_ASSERT(3 == static_cast<int>(times_8));
87     int scale = static_cast<int>(mode - one);
88     DCHECK(scale >= 0 && scale < 4);
89     return static_cast<ScaleFactor>(scale);
90   }
91 
MemoryOperand(size_t * offset)92   Operand MemoryOperand(size_t* offset) {
93     AddressingMode mode = AddressingModeField::decode(instr_->opcode());
94     switch (mode) {
95       case kMode_MR: {
96         Register base = InputRegister(NextOffset(offset));
97         int32_t disp = 0;
98         return Operand(base, disp);
99       }
100       case kMode_MRI: {
101         Register base = InputRegister(NextOffset(offset));
102         int32_t disp = InputInt32(NextOffset(offset));
103         return Operand(base, disp);
104       }
105       case kMode_MR1:
106       case kMode_MR2:
107       case kMode_MR4:
108       case kMode_MR8: {
109         Register base = InputRegister(NextOffset(offset));
110         Register index = InputRegister(NextOffset(offset));
111         ScaleFactor scale = ScaleFor(kMode_MR1, mode);
112         int32_t disp = 0;
113         return Operand(base, index, scale, disp);
114       }
115       case kMode_MR1I:
116       case kMode_MR2I:
117       case kMode_MR4I:
118       case kMode_MR8I: {
119         Register base = InputRegister(NextOffset(offset));
120         Register index = InputRegister(NextOffset(offset));
121         ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
122         int32_t disp = InputInt32(NextOffset(offset));
123         return Operand(base, index, scale, disp);
124       }
125       case kMode_M1: {
126         Register base = InputRegister(NextOffset(offset));
127         int32_t disp = 0;
128         return Operand(base, disp);
129       }
130       case kMode_M2:
131         UNREACHABLE();  // Should use kModeMR with more compact encoding instead
132       case kMode_M4:
133       case kMode_M8: {
134         Register index = InputRegister(NextOffset(offset));
135         ScaleFactor scale = ScaleFor(kMode_M1, mode);
136         int32_t disp = 0;
137         return Operand(index, scale, disp);
138       }
139       case kMode_M1I:
140       case kMode_M2I:
141       case kMode_M4I:
142       case kMode_M8I: {
143         Register index = InputRegister(NextOffset(offset));
144         ScaleFactor scale = ScaleFor(kMode_M1I, mode);
145         int32_t disp = InputInt32(NextOffset(offset));
146         return Operand(index, scale, disp);
147       }
148       case kMode_Root: {
149         Register base = kRootRegister;
150         int32_t disp = InputInt32(NextOffset(offset));
151         return Operand(base, disp);
152       }
153       case kMode_None:
154         UNREACHABLE();
155     }
156     UNREACHABLE();
157   }
158 
MemoryOperand(size_t first_input=0)159   Operand MemoryOperand(size_t first_input = 0) {
160     return MemoryOperand(&first_input);
161   }
162 };
163 
164 namespace {
165 
HasAddressingMode(Instruction * instr)166 bool HasAddressingMode(Instruction* instr) {
167   return instr->addressing_mode() != kMode_None;
168 }
169 
HasImmediateInput(Instruction * instr,size_t index)170 bool HasImmediateInput(Instruction* instr, size_t index) {
171   return instr->InputAt(index)->IsImmediate();
172 }
173 
HasRegisterInput(Instruction * instr,size_t index)174 bool HasRegisterInput(Instruction* instr, size_t index) {
175   return instr->InputAt(index)->IsRegister();
176 }
177 
178 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
179  public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)180   OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
181       : OutOfLineCode(gen), result_(result) {}
182 
Generate()183   void Generate() final {
184     __ Xorps(result_, result_);
185     __ Divss(result_, result_);
186   }
187 
188  private:
189   XMMRegister const result_;
190 };
191 
192 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
193  public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)194   OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
195       : OutOfLineCode(gen), result_(result) {}
196 
Generate()197   void Generate() final {
198     __ Xorpd(result_, result_);
199     __ Divsd(result_, result_);
200   }
201 
202  private:
203   XMMRegister const result_;
204 };
205 
206 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
207  public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)208   OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
209                              XMMRegister input, StubCallMode stub_mode,
210                              UnwindingInfoWriter* unwinding_info_writer)
211       : OutOfLineCode(gen),
212         result_(result),
213         input_(input),
214 #if V8_ENABLE_WEBASSEMBLY
215         stub_mode_(stub_mode),
216 #endif  // V8_ENABLE_WEBASSEMBLY
217         unwinding_info_writer_(unwinding_info_writer),
218         isolate_(gen->isolate()),
219         zone_(gen->zone()) {
220   }
221 
Generate()222   void Generate() final {
223     __ AllocateStackSpace(kDoubleSize);
224     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
225                                                       kDoubleSize);
226     __ Movsd(MemOperand(rsp, 0), input_);
227 #if V8_ENABLE_WEBASSEMBLY
228     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
229       // A direct call to a wasm runtime stub defined in this module.
230       // Just encode the stub index. This will be patched when the code
231       // is added to the native module and copied into wasm code space.
232       __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
233 #else
234     // For balance.
235     if (false) {
236 #endif  // V8_ENABLE_WEBASSEMBLY
237     } else if (tasm()->options().inline_offheap_trampolines) {
238       // With embedded builtins we do not need the isolate here. This allows
239       // the call to be generated asynchronously.
240       __ CallBuiltin(Builtin::kDoubleToI);
241     } else {
242       __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
243     }
244     __ movl(result_, MemOperand(rsp, 0));
245     __ addq(rsp, Immediate(kDoubleSize));
246     unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
247                                                       -kDoubleSize);
248   }
249 
250  private:
251   Register const result_;
252   XMMRegister const input_;
253 #if V8_ENABLE_WEBASSEMBLY
254   StubCallMode stub_mode_;
255 #endif  // V8_ENABLE_WEBASSEMBLY
256   UnwindingInfoWriter* const unwinding_info_writer_;
257   Isolate* isolate_;
258   Zone* zone_;
259 };
260 
261 class OutOfLineRecordWrite final : public OutOfLineCode {
262  public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)263   OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
264                        Register value, Register scratch0, Register scratch1,
265                        RecordWriteMode mode, StubCallMode stub_mode)
266       : OutOfLineCode(gen),
267         object_(object),
268         operand_(operand),
269         value_(value),
270         scratch0_(scratch0),
271         scratch1_(scratch1),
272         mode_(mode),
273 #if V8_ENABLE_WEBASSEMBLY
274         stub_mode_(stub_mode),
275 #endif  // V8_ENABLE_WEBASSEMBLY
276         zone_(gen->zone()) {
277     DCHECK(!AreAliased(object, scratch0, scratch1));
278     DCHECK(!AreAliased(value, scratch0, scratch1));
279   }
280 
Generate()281   void Generate() final {
282     if (COMPRESS_POINTERS_BOOL) {
283       __ DecompressTaggedPointer(value_, value_);
284     }
285     __ CheckPageFlag(value_, scratch0_,
286                      MemoryChunk::kPointersToHereAreInterestingMask, zero,
287                      exit());
288     __ leaq(scratch1_, operand_);
289 
290     RememberedSetAction const remembered_set_action =
291         mode_ > RecordWriteMode::kValueIsMap ? RememberedSetAction::kEmit
292                                              : RememberedSetAction::kOmit;
293     SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
294                                             ? SaveFPRegsMode::kSave
295                                             : SaveFPRegsMode::kIgnore;
296 
297     if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
298       __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
299 #if V8_ENABLE_WEBASSEMBLY
300     } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
301       // A direct call to a wasm runtime stub defined in this module.
302       // Just encode the stub index. This will be patched when the code
303       // is added to the native module and copied into wasm code space.
304       __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
305                                           remembered_set_action, save_fp_mode,
306                                           StubCallMode::kCallWasmRuntimeStub);
307 #endif  // V8_ENABLE_WEBASSEMBLY
308     } else {
309       __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
310                                           remembered_set_action, save_fp_mode);
311     }
312   }
313 
314  private:
315   Register const object_;
316   Operand const operand_;
317   Register const value_;
318   Register const scratch0_;
319   Register const scratch1_;
320   RecordWriteMode const mode_;
321 #if V8_ENABLE_WEBASSEMBLY
322   StubCallMode const stub_mode_;
323 #endif  // V8_ENABLE_WEBASSEMBLY
324   Zone* zone_;
325 };
326 
327 template <std::memory_order order>
EmitStore(TurboAssembler * tasm,Operand operand,Register value,MachineRepresentation rep)328 void EmitStore(TurboAssembler* tasm, Operand operand, Register value,
329                MachineRepresentation rep) {
330   if (order == std::memory_order_relaxed) {
331     switch (rep) {
332       case MachineRepresentation::kWord8:
333         tasm->movb(operand, value);
334         break;
335       case MachineRepresentation::kWord16:
336         tasm->movw(operand, value);
337         break;
338       case MachineRepresentation::kWord32:
339         tasm->movl(operand, value);
340         break;
341       case MachineRepresentation::kWord64:
342         tasm->movq(operand, value);
343         break;
344       case MachineRepresentation::kTagged:
345         tasm->StoreTaggedField(operand, value);
346         break;
347       default:
348         UNREACHABLE();
349     }
350     return;
351   }
352 
353   DCHECK_EQ(order, std::memory_order_seq_cst);
354   switch (rep) {
355     case MachineRepresentation::kWord8:
356       tasm->movq(kScratchRegister, value);
357       tasm->xchgb(kScratchRegister, operand);
358       break;
359     case MachineRepresentation::kWord16:
360       tasm->movq(kScratchRegister, value);
361       tasm->xchgw(kScratchRegister, operand);
362       break;
363     case MachineRepresentation::kWord32:
364       tasm->movq(kScratchRegister, value);
365       tasm->xchgl(kScratchRegister, operand);
366       break;
367     case MachineRepresentation::kWord64:
368       tasm->movq(kScratchRegister, value);
369       tasm->xchgq(kScratchRegister, operand);
370       break;
371     case MachineRepresentation::kTagged:
372       tasm->AtomicStoreTaggedField(operand, value);
373       break;
374     default:
375       UNREACHABLE();
376   }
377 }
378 
379 template <std::memory_order order>
380 void EmitStore(TurboAssembler* tasm, Operand operand, Immediate value,
381                MachineRepresentation rep);
382 
383 template <>
EmitStore(TurboAssembler * tasm,Operand operand,Immediate value,MachineRepresentation rep)384 void EmitStore<std::memory_order_relaxed>(TurboAssembler* tasm, Operand operand,
385                                           Immediate value,
386                                           MachineRepresentation rep) {
387   switch (rep) {
388     case MachineRepresentation::kWord8:
389       tasm->movb(operand, value);
390       break;
391     case MachineRepresentation::kWord16:
392       tasm->movw(operand, value);
393       break;
394     case MachineRepresentation::kWord32:
395       tasm->movl(operand, value);
396       break;
397     case MachineRepresentation::kWord64:
398       tasm->movq(operand, value);
399       break;
400     case MachineRepresentation::kTagged:
401       tasm->StoreTaggedField(operand, value);
402       break;
403     default:
404       UNREACHABLE();
405   }
406 }
407 
408 #ifdef V8_IS_TSAN
EmitMemoryProbeForTrapHandlerIfNeeded(TurboAssembler * tasm,Register scratch,Operand operand,StubCallMode mode,int size)409 void EmitMemoryProbeForTrapHandlerIfNeeded(TurboAssembler* tasm,
410                                            Register scratch, Operand operand,
411                                            StubCallMode mode, int size) {
412 #if V8_ENABLE_WEBASSEMBLY && V8_TRAP_HANDLER_SUPPORTED
413   // The wasm OOB trap handler needs to be able to look up the faulting
414   // instruction pointer to handle the SIGSEGV raised by an OOB access. It
415   // will not handle SIGSEGVs raised by the TSAN store helpers. Emit a
416   // redundant load here to give the trap handler a chance to handle any
417   // OOB SIGSEGVs.
418   if (trap_handler::IsTrapHandlerEnabled() &&
419       mode == StubCallMode::kCallWasmRuntimeStub) {
420     switch (size) {
421       case kInt8Size:
422         tasm->movb(scratch, operand);
423         break;
424       case kInt16Size:
425         tasm->movw(scratch, operand);
426         break;
427       case kInt32Size:
428         tasm->movl(scratch, operand);
429         break;
430       case kInt64Size:
431         tasm->movq(scratch, operand);
432         break;
433       default:
434         UNREACHABLE();
435     }
436   }
437 #endif
438 }
439 
440 class OutOfLineTSANStore : public OutOfLineCode {
441  public:
OutOfLineTSANStore(CodeGenerator * gen,Operand operand,Register value,Register scratch0,StubCallMode stub_mode,int size,std::memory_order order)442   OutOfLineTSANStore(CodeGenerator* gen, Operand operand, Register value,
443                      Register scratch0, StubCallMode stub_mode, int size,
444                      std::memory_order order)
445       : OutOfLineCode(gen),
446         operand_(operand),
447         value_(value),
448         scratch0_(scratch0),
449 #if V8_ENABLE_WEBASSEMBLY
450         stub_mode_(stub_mode),
451 #endif  // V8_ENABLE_WEBASSEMBLY
452         size_(size),
453         memory_order_(order),
454         zone_(gen->zone()) {
455     DCHECK(!AreAliased(value, scratch0));
456   }
457 
Generate()458   void Generate() final {
459     const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
460                                             ? SaveFPRegsMode::kSave
461                                             : SaveFPRegsMode::kIgnore;
462     __ leaq(scratch0_, operand_);
463 
464 #if V8_ENABLE_WEBASSEMBLY
465     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
466       // A direct call to a wasm runtime stub defined in this module.
467       // Just encode the stub index. This will be patched when the code
468       // is added to the native module and copied into wasm code space.
469       tasm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
470                                 StubCallMode::kCallWasmRuntimeStub,
471                                 memory_order_);
472       return;
473     }
474 #endif  // V8_ENABLE_WEBASSEMBLY
475 
476     tasm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
477                               StubCallMode::kCallBuiltinPointer, memory_order_);
478   }
479 
480  private:
481   Operand const operand_;
482   Register const value_;
483   Register const scratch0_;
484 #if V8_ENABLE_WEBASSEMBLY
485   StubCallMode const stub_mode_;
486 #endif  // V8_ENABLE_WEBASSEMBLY
487   int size_;
488   const std::memory_order memory_order_;
489   Zone* zone_;
490 };
491 
EmitTSANStoreOOL(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,Register value_reg,X64OperandConverter & i,StubCallMode mode,int size,std::memory_order order)492 void EmitTSANStoreOOL(Zone* zone, CodeGenerator* codegen, TurboAssembler* tasm,
493                       Operand operand, Register value_reg,
494                       X64OperandConverter& i, StubCallMode mode, int size,
495                       std::memory_order order) {
496   // The FOR_TESTING code doesn't initialize the root register. We can't call
497   // the TSAN builtin since we need to load the external reference through the
498   // root register.
499   // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
500   // path. It is not crucial, but it would be nice to remove this restriction.
501   DCHECK_NE(codegen->code_kind(), CodeKind::FOR_TESTING);
502 
503   Register scratch0 = i.TempRegister(0);
504   auto tsan_ool = zone->New<OutOfLineTSANStore>(codegen, operand, value_reg,
505                                                 scratch0, mode, size, order);
506   tasm->jmp(tsan_ool->entry());
507   tasm->bind(tsan_ool->exit());
508 }
509 
510 template <std::memory_order order>
GetTSANValueRegister(TurboAssembler * tasm,Register value,X64OperandConverter & i)511 Register GetTSANValueRegister(TurboAssembler* tasm, Register value,
512                               X64OperandConverter& i) {
513   return value;
514 }
515 
516 template <std::memory_order order>
517 Register GetTSANValueRegister(TurboAssembler* tasm, Immediate value,
518                               X64OperandConverter& i);
519 
520 template <>
GetTSANValueRegister(TurboAssembler * tasm,Immediate value,X64OperandConverter & i)521 Register GetTSANValueRegister<std::memory_order_relaxed>(
522     TurboAssembler* tasm, Immediate value, X64OperandConverter& i) {
523   Register value_reg = i.TempRegister(1);
524   tasm->movq(value_reg, value);
525   return value_reg;
526 }
527 
528 template <std::memory_order order, typename ValueT>
EmitTSANAwareStore(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,ValueT value,X64OperandConverter & i,StubCallMode stub_call_mode,MachineRepresentation rep)529 void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
530                         TurboAssembler* tasm, Operand operand, ValueT value,
531                         X64OperandConverter& i, StubCallMode stub_call_mode,
532                         MachineRepresentation rep) {
533   // The FOR_TESTING code doesn't initialize the root register. We can't call
534   // the TSAN builtin since we need to load the external reference through the
535   // root register.
536   // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
537   // path. It is not crucial, but it would be nice to remove this restriction.
538   if (codegen->code_kind() != CodeKind::FOR_TESTING) {
539     int size = ElementSizeInBytes(rep);
540     EmitMemoryProbeForTrapHandlerIfNeeded(tasm, i.TempRegister(0), operand,
541                                           stub_call_mode, size);
542     Register value_reg = GetTSANValueRegister<order>(tasm, value, i);
543     EmitTSANStoreOOL(zone, codegen, tasm, operand, value_reg, i, stub_call_mode,
544                      size, order);
545   } else {
546     EmitStore<order>(tasm, operand, value, rep);
547   }
548 }
549 
550 class OutOfLineTSANRelaxedLoad final : public OutOfLineCode {
551  public:
OutOfLineTSANRelaxedLoad(CodeGenerator * gen,Operand operand,Register scratch0,StubCallMode stub_mode,int size)552   OutOfLineTSANRelaxedLoad(CodeGenerator* gen, Operand operand,
553                            Register scratch0, StubCallMode stub_mode, int size)
554       : OutOfLineCode(gen),
555         operand_(operand),
556         scratch0_(scratch0),
557 #if V8_ENABLE_WEBASSEMBLY
558         stub_mode_(stub_mode),
559 #endif  // V8_ENABLE_WEBASSEMBLY
560         size_(size),
561         zone_(gen->zone()) {
562   }
563 
Generate()564   void Generate() final {
565     const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
566                                             ? SaveFPRegsMode::kSave
567                                             : SaveFPRegsMode::kIgnore;
568     __ leaq(scratch0_, operand_);
569 
570 #if V8_ENABLE_WEBASSEMBLY
571     if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
572       // A direct call to a wasm runtime stub defined in this module.
573       // Just encode the stub index. This will be patched when the code
574       // is added to the native module and copied into wasm code space.
575       __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
576                                  StubCallMode::kCallWasmRuntimeStub);
577       return;
578     }
579 #endif  // V8_ENABLE_WEBASSEMBLY
580 
581     __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
582                                StubCallMode::kCallBuiltinPointer);
583   }
584 
585  private:
586   Operand const operand_;
587   Register const scratch0_;
588 #if V8_ENABLE_WEBASSEMBLY
589   StubCallMode const stub_mode_;
590 #endif  // V8_ENABLE_WEBASSEMBLY
591   int size_;
592   Zone* zone_;
593 };
594 
EmitTSANRelaxedLoadOOLIfNeeded(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,X64OperandConverter & i,StubCallMode mode,int size)595 void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
596                                     TurboAssembler* tasm, Operand operand,
597                                     X64OperandConverter& i, StubCallMode mode,
598                                     int size) {
599   // The FOR_TESTING code doesn't initialize the root register. We can't call
600   // the TSAN builtin since we need to load the external reference through the
601   // root register.
602   // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
603   // path. It is not crucial, but it would be nice to remove this if.
604   if (codegen->code_kind() == CodeKind::FOR_TESTING) return;
605 
606   Register scratch0 = i.TempRegister(0);
607   auto tsan_ool = zone->New<OutOfLineTSANRelaxedLoad>(codegen, operand,
608                                                       scratch0, mode, size);
609   tasm->jmp(tsan_ool->entry());
610   tasm->bind(tsan_ool->exit());
611 }
612 
613 #else
614 template <std::memory_order order, typename ValueT>
EmitTSANAwareStore(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,ValueT value,X64OperandConverter & i,StubCallMode stub_call_mode,MachineRepresentation rep)615 void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
616                         TurboAssembler* tasm, Operand operand, ValueT value,
617                         X64OperandConverter& i, StubCallMode stub_call_mode,
618                         MachineRepresentation rep) {
619   DCHECK(order == std::memory_order_relaxed ||
620          order == std::memory_order_seq_cst);
621   EmitStore<order>(tasm, operand, value, rep);
622 }
623 
EmitTSANRelaxedLoadOOLIfNeeded(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,X64OperandConverter & i,StubCallMode mode,int size)624 void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
625                                     TurboAssembler* tasm, Operand operand,
626                                     X64OperandConverter& i, StubCallMode mode,
627                                     int size) {}
628 #endif  // V8_IS_TSAN
629 
630 #if V8_ENABLE_WEBASSEMBLY
631 class WasmOutOfLineTrap : public OutOfLineCode {
632  public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)633   WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
634       : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
635 
Generate()636   void Generate() override {
637     X64OperandConverter i(gen_, instr_);
638     TrapId trap_id =
639         static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
640     GenerateWithTrapId(trap_id);
641   }
642 
643  protected:
644   CodeGenerator* gen_;
645 
GenerateWithTrapId(TrapId trap_id)646   void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
647 
648  private:
GenerateCallToTrap(TrapId trap_id)649   void GenerateCallToTrap(TrapId trap_id) {
650     if (!gen_->wasm_runtime_exception_support()) {
651       // We cannot test calls to the runtime in cctest/test-run-wasm.
652       // Therefore we emit a call to C here instead of a call to the runtime.
653       __ PrepareCallCFunction(0);
654       __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
655                        0);
656       __ LeaveFrame(StackFrame::WASM);
657       auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
658       size_t pop_size =
659           call_descriptor->ParameterSlotCount() * kSystemPointerSize;
660       // Use rcx as a scratch register, we return anyways immediately.
661       __ Ret(static_cast<int>(pop_size), rcx);
662     } else {
663       gen_->AssembleSourcePosition(instr_);
664       // A direct call to a wasm runtime stub defined in this module.
665       // Just encode the stub index. This will be patched when the code
666       // is added to the native module and copied into wasm code space.
667       __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
668       ReferenceMap* reference_map =
669           gen_->zone()->New<ReferenceMap>(gen_->zone());
670       gen_->RecordSafepoint(reference_map);
671       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
672     }
673   }
674 
675   Instruction* instr_;
676 };
677 
678 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
679  public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)680   WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
681       : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
682 
Generate()683   void Generate() final {
684     DCHECK(FLAG_wasm_bounds_checks && !FLAG_wasm_enforce_bounds_checks);
685     gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
686     GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
687   }
688 
689  private:
690   int pc_;
691 };
692 
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)693 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
694                          InstructionCode opcode, Instruction* instr,
695                          int pc) {
696   const MemoryAccessMode access_mode = instr->memory_access_mode();
697   if (access_mode == kMemoryAccessProtected) {
698     zone->New<WasmProtectedInstructionTrap>(codegen, pc, instr);
699   }
700 }
701 
702 #else
703 
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)704 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
705                          InstructionCode opcode, Instruction* instr, int pc) {
706   DCHECK_NE(kMemoryAccessProtected, instr->memory_access_mode());
707 }
708 
709 #endif  // V8_ENABLE_WEBASSEMBLY
710 
711 }  // namespace
712 
713 #define ASSEMBLE_UNOP(asm_instr)         \
714   do {                                   \
715     if (instr->Output()->IsRegister()) { \
716       __ asm_instr(i.OutputRegister());  \
717     } else {                             \
718       __ asm_instr(i.OutputOperand());   \
719     }                                    \
720   } while (false)
721 
722 #define ASSEMBLE_BINOP(asm_instr)                                \
723   do {                                                           \
724     if (HasAddressingMode(instr)) {                              \
725       size_t index = 1;                                          \
726       Operand right = i.MemoryOperand(&index);                   \
727       __ asm_instr(i.InputRegister(0), right);                   \
728     } else {                                                     \
729       if (HasImmediateInput(instr, 1)) {                         \
730         if (HasRegisterInput(instr, 0)) {                        \
731           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
732         } else {                                                 \
733           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
734         }                                                        \
735       } else {                                                   \
736         if (HasRegisterInput(instr, 1)) {                        \
737           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
738         } else {                                                 \
739           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
740         }                                                        \
741       }                                                          \
742     }                                                            \
743   } while (false)
744 
745 #define ASSEMBLE_COMPARE(asm_instr)                              \
746   do {                                                           \
747     if (HasAddressingMode(instr)) {                              \
748       size_t index = 0;                                          \
749       Operand left = i.MemoryOperand(&index);                    \
750       if (HasImmediateInput(instr, index)) {                     \
751         __ asm_instr(left, i.InputImmediate(index));             \
752       } else {                                                   \
753         __ asm_instr(left, i.InputRegister(index));              \
754       }                                                          \
755     } else {                                                     \
756       if (HasImmediateInput(instr, 1)) {                         \
757         if (HasRegisterInput(instr, 0)) {                        \
758           __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
759         } else {                                                 \
760           __ asm_instr(i.InputOperand(0), i.InputImmediate(1));  \
761         }                                                        \
762       } else {                                                   \
763         if (HasRegisterInput(instr, 1)) {                        \
764           __ asm_instr(i.InputRegister(0), i.InputRegister(1));  \
765         } else {                                                 \
766           __ asm_instr(i.InputRegister(0), i.InputOperand(1));   \
767         }                                                        \
768       }                                                          \
769     }                                                            \
770   } while (false)
771 
772 #define ASSEMBLE_MULT(asm_instr)                              \
773   do {                                                        \
774     if (HasImmediateInput(instr, 1)) {                        \
775       if (HasRegisterInput(instr, 0)) {                       \
776         __ asm_instr(i.OutputRegister(), i.InputRegister(0),  \
777                      i.InputImmediate(1));                    \
778       } else {                                                \
779         __ asm_instr(i.OutputRegister(), i.InputOperand(0),   \
780                      i.InputImmediate(1));                    \
781       }                                                       \
782     } else {                                                  \
783       if (HasRegisterInput(instr, 1)) {                       \
784         __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
785       } else {                                                \
786         __ asm_instr(i.OutputRegister(), i.InputOperand(1));  \
787       }                                                       \
788     }                                                         \
789   } while (false)
790 
791 #define ASSEMBLE_SHIFT(asm_instr, width)                                   \
792   do {                                                                     \
793     if (HasImmediateInput(instr, 1)) {                                     \
794       if (instr->Output()->IsRegister()) {                                 \
795         __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
796       } else {                                                             \
797         __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1)));  \
798       }                                                                    \
799     } else {                                                               \
800       if (instr->Output()->IsRegister()) {                                 \
801         __ asm_instr##_cl(i.OutputRegister());                             \
802       } else {                                                             \
803         __ asm_instr##_cl(i.OutputOperand());                              \
804       }                                                                    \
805     }                                                                      \
806   } while (false)
807 
808 #define ASSEMBLE_MOVX(asm_instr)                            \
809   do {                                                      \
810     if (HasAddressingMode(instr)) {                         \
811       __ asm_instr(i.OutputRegister(), i.MemoryOperand());  \
812     } else if (HasRegisterInput(instr, 0)) {                \
813       __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
814     } else {                                                \
815       __ asm_instr(i.OutputRegister(), i.InputOperand(0));  \
816     }                                                       \
817   } while (false)
818 
819 #define ASSEMBLE_SSE_BINOP(asm_instr)                                     \
820   do {                                                                    \
821     if (HasAddressingMode(instr)) {                                       \
822       size_t index = 1;                                                   \
823       Operand right = i.MemoryOperand(&index);                            \
824       __ asm_instr(i.InputDoubleRegister(0), right);                      \
825     } else {                                                              \
826       if (instr->InputAt(1)->IsFPRegister()) {                            \
827         __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
828       } else {                                                            \
829         __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1));        \
830       }                                                                   \
831     }                                                                     \
832   } while (false)
833 
834 #define ASSEMBLE_SSE_UNOP(asm_instr)                                    \
835   do {                                                                  \
836     if (instr->InputAt(0)->IsFPRegister()) {                            \
837       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
838     } else {                                                            \
839       __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0));        \
840     }                                                                   \
841   } while (false)
842 
843 #define ASSEMBLE_AVX_BINOP(asm_instr)                                          \
844   do {                                                                         \
845     CpuFeatureScope avx_scope(tasm(), AVX);                                    \
846     if (HasAddressingMode(instr)) {                                            \
847       size_t index = 1;                                                        \
848       Operand right = i.MemoryOperand(&index);                                 \
849       __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
850     } else {                                                                   \
851       if (instr->InputAt(1)->IsFPRegister()) {                                 \
852         __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0),       \
853                      i.InputDoubleRegister(1));                                \
854       } else {                                                                 \
855         __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0),       \
856                      i.InputOperand(1));                                       \
857       }                                                                        \
858     }                                                                          \
859   } while (false)
860 
861 #define ASSEMBLE_IEEE754_BINOP(name)                                     \
862   do {                                                                   \
863     __ PrepareCallCFunction(2);                                          \
864     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
865   } while (false)
866 
867 #define ASSEMBLE_IEEE754_UNOP(name)                                      \
868   do {                                                                   \
869     __ PrepareCallCFunction(1);                                          \
870     __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
871   } while (false)
872 
873 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
874   do {                                                          \
875     Label binop;                                                \
876     __ bind(&binop);                                            \
877     __ mov_inst(rax, i.MemoryOperand(1));                       \
878     __ movl(i.TempRegister(0), rax);                            \
879     __ bin_inst(i.TempRegister(0), i.InputRegister(0));         \
880     __ lock();                                                  \
881     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));     \
882     __ j(not_equal, &binop);                                    \
883   } while (false)
884 
885 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
886   do {                                                            \
887     Label binop;                                                  \
888     __ bind(&binop);                                              \
889     __ mov_inst(rax, i.MemoryOperand(1));                         \
890     __ movq(i.TempRegister(0), rax);                              \
891     __ bin_inst(i.TempRegister(0), i.InputRegister(0));           \
892     __ lock();                                                    \
893     __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0));       \
894     __ j(not_equal, &binop);                                      \
895   } while (false)
896 
897 // Handles both SSE and AVX codegen. For SSE we use DefineSameAsFirst, so the
898 // dst and first src will be the same. For AVX we don't restrict it that way, so
899 // we will omit unnecessary moves.
900 #define ASSEMBLE_SIMD_BINOP(opcode)                                      \
901   do {                                                                   \
902     if (CpuFeatures::IsSupported(AVX)) {                                 \
903       CpuFeatureScope avx_scope(tasm(), AVX);                            \
904       __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
905                    i.InputSimd128Register(1));                           \
906     } else {                                                             \
907       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));   \
908       __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1));   \
909     }                                                                    \
910   } while (false)
911 
912 #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index)      \
913   do {                                                       \
914     if (instr->InputAt(index)->IsSimd128Register()) {        \
915       __ opcode(dst_operand, i.InputSimd128Register(index)); \
916     } else {                                                 \
917       __ opcode(dst_operand, i.InputOperand(index));         \
918     }                                                        \
919   } while (false)
920 
921 #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm)  \
922   do {                                                            \
923     if (instr->InputAt(index)->IsSimd128Register()) {             \
924       __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
925     } else {                                                      \
926       __ opcode(dst_operand, i.InputOperand(index), imm);         \
927     }                                                             \
928   } while (false)
929 
930 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode)                    \
931   do {                                                          \
932     XMMRegister dst = i.OutputSimd128Register();                \
933     byte input_index = instr->InputCount() == 2 ? 1 : 0;        \
934     if (CpuFeatures::IsSupported(AVX)) {                        \
935       CpuFeatureScope avx_scope(tasm(), AVX);                   \
936       DCHECK(instr->InputAt(input_index)->IsSimd128Register()); \
937       __ v##opcode(dst, i.InputSimd128Register(0),              \
938                    i.InputSimd128Register(input_index));        \
939     } else {                                                    \
940       DCHECK_EQ(dst, i.InputSimd128Register(0));                \
941       ASSEMBLE_SIMD_INSTR(opcode, dst, input_index);            \
942     }                                                           \
943   } while (false)
944 
945 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm)                \
946   do {                                                        \
947     XMMRegister dst = i.OutputSimd128Register();              \
948     XMMRegister src = i.InputSimd128Register(0);              \
949     if (CpuFeatures::IsSupported(AVX)) {                      \
950       CpuFeatureScope avx_scope(tasm(), AVX);                 \
951       DCHECK(instr->InputAt(1)->IsSimd128Register());         \
952       __ v##opcode(dst, src, i.InputSimd128Register(1), imm); \
953     } else {                                                  \
954       DCHECK_EQ(dst, src);                                    \
955       if (instr->InputAt(1)->IsSimd128Register()) {           \
956         __ opcode(dst, i.InputSimd128Register(1), imm);       \
957       } else {                                                \
958         __ opcode(dst, i.InputOperand(1), imm);               \
959       }                                                       \
960     }                                                         \
961   } while (false)
962 
963 #define ASSEMBLE_SIMD_ALL_TRUE(opcode)                       \
964   do {                                                       \
965     Register dst = i.OutputRegister();                       \
966     __ xorq(dst, dst);                                       \
967     __ Pxor(kScratchDoubleReg, kScratchDoubleReg);           \
968     __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \
969     __ Ptest(kScratchDoubleReg, kScratchDoubleReg);          \
970     __ setcc(equal, dst);                                    \
971   } while (false)
972 
973 // This macro will directly emit the opcode if the shift is an immediate - the
974 // shift value will be taken modulo 2^width. Otherwise, it will emit code to
975 // perform the modulus operation.
976 #define ASSEMBLE_SIMD_SHIFT(opcode, width)                               \
977   do {                                                                   \
978     XMMRegister dst = i.OutputSimd128Register();                         \
979     if (HasImmediateInput(instr, 1)) {                                   \
980       if (CpuFeatures::IsSupported(AVX)) {                               \
981         CpuFeatureScope avx_scope(tasm(), AVX);                          \
982         __ v##opcode(dst, i.InputSimd128Register(0),                     \
983                      byte{i.InputInt##width(1)});                        \
984       } else {                                                           \
985         DCHECK_EQ(dst, i.InputSimd128Register(0));                       \
986         __ opcode(dst, byte{i.InputInt##width(1)});                      \
987       }                                                                  \
988     } else {                                                             \
989       constexpr int mask = (1 << width) - 1;                             \
990       __ movq(kScratchRegister, i.InputRegister(1));                     \
991       __ andq(kScratchRegister, Immediate(mask));                        \
992       __ Movq(kScratchDoubleReg, kScratchRegister);                      \
993       if (CpuFeatures::IsSupported(AVX)) {                               \
994         CpuFeatureScope avx_scope(tasm(), AVX);                          \
995         __ v##opcode(dst, i.InputSimd128Register(0), kScratchDoubleReg); \
996       } else {                                                           \
997         DCHECK_EQ(dst, i.InputSimd128Register(0));                       \
998         __ opcode(dst, kScratchDoubleReg);                               \
999       }                                                                  \
1000     }                                                                    \
1001   } while (false)
1002 
1003 #define ASSEMBLE_PINSR(ASM_INSTR)                                        \
1004   do {                                                                   \
1005     XMMRegister dst = i.OutputSimd128Register();                         \
1006     XMMRegister src = i.InputSimd128Register(0);                         \
1007     uint8_t laneidx = i.InputUint8(1);                                   \
1008     uint32_t load_offset;                                                \
1009     if (HasAddressingMode(instr)) {                                      \
1010       __ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx, &load_offset); \
1011     } else if (instr->InputAt(2)->IsFPRegister()) {                      \
1012       __ Movq(kScratchRegister, i.InputDoubleRegister(2));               \
1013       __ ASM_INSTR(dst, src, kScratchRegister, laneidx, &load_offset);   \
1014     } else if (instr->InputAt(2)->IsRegister()) {                        \
1015       __ ASM_INSTR(dst, src, i.InputRegister(2), laneidx, &load_offset); \
1016     } else {                                                             \
1017       __ ASM_INSTR(dst, src, i.InputOperand(2), laneidx, &load_offset);  \
1018     }                                                                    \
1019     EmitOOLTrapIfNeeded(zone(), this, opcode, instr, load_offset);       \
1020   } while (false)
1021 
1022 #define ASSEMBLE_SEQ_CST_STORE(rep)                                       \
1023   do {                                                                    \
1024     Register value = i.InputRegister(0);                                  \
1025     Operand operand = i.MemoryOperand(1);                                 \
1026     EmitTSANAwareStore<std::memory_order_seq_cst>(                        \
1027         zone(), this, tasm(), operand, value, i, DetermineStubCallMode(), \
1028         rep);                                                             \
1029   } while (false)
1030 
AssembleDeconstructFrame()1031 void CodeGenerator::AssembleDeconstructFrame() {
1032   unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
1033   __ movq(rsp, rbp);
1034   __ popq(rbp);
1035 }
1036 
AssemblePrepareTailCall()1037 void CodeGenerator::AssemblePrepareTailCall() {
1038   if (frame_access_state()->has_frame()) {
1039     __ movq(rbp, MemOperand(rbp, 0));
1040   }
1041   frame_access_state()->SetFrameAccessToSP();
1042 }
1043 
1044 namespace {
1045 
AdjustStackPointerForTailCall(Instruction * instr,TurboAssembler * assembler,Linkage * linkage,OptimizedCompilationInfo * info,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)1046 void AdjustStackPointerForTailCall(Instruction* instr,
1047                                    TurboAssembler* assembler, Linkage* linkage,
1048                                    OptimizedCompilationInfo* info,
1049                                    FrameAccessState* state,
1050                                    int new_slot_above_sp,
1051                                    bool allow_shrinkage = true) {
1052   int stack_slot_delta;
1053   if (instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
1054     // For this special tail-call mode, the callee has the same arguments and
1055     // linkage as the caller, and arguments adapter frames must be preserved.
1056     // Thus we simply have reset the stack pointer register to its original
1057     // value before frame construction.
1058     // See also: AssembleConstructFrame.
1059     DCHECK(!info->is_osr());
1060     DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedRegisters(), 0);
1061     DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedFPRegisters(), 0);
1062     DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0);
1063     stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() -
1064                         kReturnAddressStackSlotCount) *
1065                        -1;
1066     DCHECK_LE(stack_slot_delta, 0);
1067   } else {
1068     int current_sp_offset = state->GetSPToFPSlotCount() +
1069                             StandardFrameConstants::kFixedSlotCountAboveFp;
1070     stack_slot_delta = new_slot_above_sp - current_sp_offset;
1071   }
1072 
1073   if (stack_slot_delta > 0) {
1074     assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
1075     state->IncreaseSPDelta(stack_slot_delta);
1076   } else if (allow_shrinkage && stack_slot_delta < 0) {
1077     assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
1078     state->IncreaseSPDelta(stack_slot_delta);
1079   }
1080 }
1081 
SetupSimdImmediateInRegister(TurboAssembler * assembler,uint32_t * imms,XMMRegister reg)1082 void SetupSimdImmediateInRegister(TurboAssembler* assembler, uint32_t* imms,
1083                                   XMMRegister reg) {
1084   assembler->Move(reg, make_uint64(imms[3], imms[2]),
1085                   make_uint64(imms[1], imms[0]));
1086 }
1087 
1088 }  // namespace
1089 
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_slot_offset)1090 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
1091                                               int first_unused_slot_offset) {
1092   CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
1093   ZoneVector<MoveOperands*> pushes(zone());
1094   GetPushCompatibleMoves(instr, flags, &pushes);
1095 
1096   if (!pushes.empty() &&
1097       (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
1098        first_unused_slot_offset)) {
1099     DCHECK(!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp));
1100     X64OperandConverter g(this, instr);
1101     for (auto move : pushes) {
1102       LocationOperand destination_location(
1103           LocationOperand::cast(move->destination()));
1104       InstructionOperand source(move->source());
1105       AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1106                                     frame_access_state(),
1107                                     destination_location.index());
1108       if (source.IsStackSlot()) {
1109         LocationOperand source_location(LocationOperand::cast(source));
1110         __ Push(g.SlotToOperand(source_location.index()));
1111       } else if (source.IsRegister()) {
1112         LocationOperand source_location(LocationOperand::cast(source));
1113         __ Push(source_location.GetRegister());
1114       } else if (source.IsImmediate()) {
1115         __ Push(Immediate(ImmediateOperand::cast(source).inline_int32_value()));
1116       } else {
1117         // Pushes of non-scalar data types is not supported.
1118         UNIMPLEMENTED();
1119       }
1120       frame_access_state()->IncreaseSPDelta(1);
1121       move->Eliminate();
1122     }
1123   }
1124   AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1125                                 frame_access_state(), first_unused_slot_offset,
1126                                 false);
1127 }
1128 
AssembleTailCallAfterGap(Instruction * instr,int first_unused_slot_offset)1129 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
1130                                              int first_unused_slot_offset) {
1131   AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1132                                 frame_access_state(), first_unused_slot_offset);
1133 }
1134 
1135 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()1136 void CodeGenerator::AssembleCodeStartRegisterCheck() {
1137   __ ComputeCodeStartAddress(rbx);
1138   __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
1139   __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
1140 }
1141 
1142 // Check if the code object is marked for deoptimization. If it is, then it
1143 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
1144 // to:
1145 //    1. read from memory the word that contains that bit, which can be found in
1146 //       the flags in the referenced {CodeDataContainer} object;
1147 //    2. test kMarkedForDeoptimizationBit in those flags; and
1148 //    3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()1149 void CodeGenerator::BailoutIfDeoptimized() {
1150   int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
1151   __ LoadTaggedPointerField(rbx,
1152                             Operand(kJavaScriptCallCodeStartRegister, offset));
1153   __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
1154            Immediate(1 << Code::kMarkedForDeoptimizationBit));
1155   __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
1156           RelocInfo::CODE_TARGET, not_zero);
1157 }
1158 
1159 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)1160 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
1161     Instruction* instr) {
1162   X64OperandConverter i(this, instr);
1163   InstructionCode opcode = instr->opcode();
1164   ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
1165   switch (arch_opcode) {
1166     case kArchCallCodeObject: {
1167       if (HasImmediateInput(instr, 0)) {
1168         Handle<Code> code = i.InputCode(0);
1169         __ Call(code, RelocInfo::CODE_TARGET);
1170       } else {
1171         Register reg = i.InputRegister(0);
1172         DCHECK_IMPLIES(
1173             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1174             reg == kJavaScriptCallCodeStartRegister);
1175         __ LoadCodeObjectEntry(reg, reg);
1176         __ call(reg);
1177       }
1178       RecordCallPosition(instr);
1179       frame_access_state()->ClearSPDelta();
1180       break;
1181     }
1182     case kArchCallBuiltinPointer: {
1183       DCHECK(!HasImmediateInput(instr, 0));
1184       Register builtin_index = i.InputRegister(0);
1185       __ CallBuiltinByIndex(builtin_index);
1186       RecordCallPosition(instr);
1187       frame_access_state()->ClearSPDelta();
1188       break;
1189     }
1190 #if V8_ENABLE_WEBASSEMBLY
1191     case kArchCallWasmFunction: {
1192       if (HasImmediateInput(instr, 0)) {
1193         Constant constant = i.ToConstant(instr->InputAt(0));
1194         Address wasm_code = static_cast<Address>(constant.ToInt64());
1195         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1196           __ near_call(wasm_code, constant.rmode());
1197         } else {
1198           __ Call(wasm_code, constant.rmode());
1199         }
1200       } else {
1201         __ call(i.InputRegister(0));
1202       }
1203       RecordCallPosition(instr);
1204       frame_access_state()->ClearSPDelta();
1205       break;
1206     }
1207     case kArchTailCallWasm: {
1208       if (HasImmediateInput(instr, 0)) {
1209         Constant constant = i.ToConstant(instr->InputAt(0));
1210         Address wasm_code = static_cast<Address>(constant.ToInt64());
1211         if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1212           __ near_jmp(wasm_code, constant.rmode());
1213         } else {
1214           __ Move(kScratchRegister, wasm_code, constant.rmode());
1215           __ jmp(kScratchRegister);
1216         }
1217       } else {
1218         __ jmp(i.InputRegister(0));
1219       }
1220       unwinding_info_writer_.MarkBlockWillExit();
1221       frame_access_state()->ClearSPDelta();
1222       frame_access_state()->SetFrameAccessToDefault();
1223       break;
1224     }
1225 #endif  // V8_ENABLE_WEBASSEMBLY
1226     case kArchTailCallCodeObject: {
1227       if (HasImmediateInput(instr, 0)) {
1228         Handle<Code> code = i.InputCode(0);
1229         __ Jump(code, RelocInfo::CODE_TARGET);
1230       } else {
1231         Register reg = i.InputRegister(0);
1232         DCHECK_IMPLIES(
1233             instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1234             reg == kJavaScriptCallCodeStartRegister);
1235         __ LoadCodeObjectEntry(reg, reg);
1236         __ jmp(reg);
1237       }
1238       unwinding_info_writer_.MarkBlockWillExit();
1239       frame_access_state()->ClearSPDelta();
1240       frame_access_state()->SetFrameAccessToDefault();
1241       break;
1242     }
1243     case kArchTailCallAddress: {
1244       CHECK(!HasImmediateInput(instr, 0));
1245       Register reg = i.InputRegister(0);
1246       DCHECK_IMPLIES(
1247           instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1248           reg == kJavaScriptCallCodeStartRegister);
1249       __ jmp(reg);
1250       unwinding_info_writer_.MarkBlockWillExit();
1251       frame_access_state()->ClearSPDelta();
1252       frame_access_state()->SetFrameAccessToDefault();
1253       break;
1254     }
1255     case kArchCallJSFunction: {
1256       Register func = i.InputRegister(0);
1257       if (FLAG_debug_code) {
1258         // Check the function's context matches the context argument.
1259         __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
1260         __ Assert(equal, AbortReason::kWrongFunctionContext);
1261       }
1262       static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
1263       __ LoadTaggedPointerField(rcx,
1264                                 FieldOperand(func, JSFunction::kCodeOffset));
1265       __ CallCodeTObject(rcx);
1266       frame_access_state()->ClearSPDelta();
1267       RecordCallPosition(instr);
1268       break;
1269     }
1270     case kArchPrepareCallCFunction: {
1271       // Frame alignment requires using FP-relative frame addressing.
1272       frame_access_state()->SetFrameAccessToFP();
1273       int const num_parameters = MiscField::decode(instr->opcode());
1274       __ PrepareCallCFunction(num_parameters);
1275       break;
1276     }
1277     case kArchSaveCallerRegisters: {
1278       fp_mode_ =
1279           static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1280       DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
1281              fp_mode_ == SaveFPRegsMode::kSave);
1282       // kReturnRegister0 should have been saved before entering the stub.
1283       int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
1284       DCHECK(IsAligned(bytes, kSystemPointerSize));
1285       DCHECK_EQ(0, frame_access_state()->sp_delta());
1286       frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1287       DCHECK(!caller_registers_saved_);
1288       caller_registers_saved_ = true;
1289       break;
1290     }
1291     case kArchRestoreCallerRegisters: {
1292       DCHECK(fp_mode_ ==
1293              static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
1294       DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
1295              fp_mode_ == SaveFPRegsMode::kSave);
1296       // Don't overwrite the returned value.
1297       int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
1298       frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
1299       DCHECK_EQ(0, frame_access_state()->sp_delta());
1300       DCHECK(caller_registers_saved_);
1301       caller_registers_saved_ = false;
1302       break;
1303     }
1304     case kArchPrepareTailCall:
1305       AssemblePrepareTailCall();
1306       break;
1307     case kArchCallCFunction: {
1308       int const num_gp_parameters = ParamField::decode(instr->opcode());
1309       int const num_fp_parameters = FPParamField::decode(instr->opcode());
1310       Label return_location;
1311 #if V8_ENABLE_WEBASSEMBLY
1312       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1313         // Put the return address in a stack slot.
1314         __ leaq(kScratchRegister, Operand(&return_location, 0));
1315         __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
1316                 kScratchRegister);
1317       }
1318 #endif  // V8_ENABLE_WEBASSEMBLY
1319       if (HasImmediateInput(instr, 0)) {
1320         ExternalReference ref = i.InputExternalReference(0);
1321         __ CallCFunction(ref, num_gp_parameters + num_fp_parameters);
1322       } else {
1323         Register func = i.InputRegister(0);
1324         __ CallCFunction(func, num_gp_parameters + num_fp_parameters);
1325       }
1326       __ bind(&return_location);
1327 #if V8_ENABLE_WEBASSEMBLY
1328       if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1329         RecordSafepoint(instr->reference_map());
1330       }
1331 #endif  // V8_ENABLE_WEBASSEMBLY
1332       frame_access_state()->SetFrameAccessToDefault();
1333       // Ideally, we should decrement SP delta to match the change of stack
1334       // pointer in CallCFunction. However, for certain architectures (e.g.
1335       // ARM), there may be more strict alignment requirement, causing old SP
1336       // to be saved on the stack. In those cases, we can not calculate the SP
1337       // delta statically.
1338       frame_access_state()->ClearSPDelta();
1339       if (caller_registers_saved_) {
1340         // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
1341         // Here, we assume the sequence to be:
1342         //   kArchSaveCallerRegisters;
1343         //   kArchCallCFunction;
1344         //   kArchRestoreCallerRegisters;
1345         int bytes =
1346             __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
1347         frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1348       }
1349       // TODO(turbofan): Do we need an lfence here?
1350       break;
1351     }
1352     case kArchJmp:
1353       AssembleArchJump(i.InputRpo(0));
1354       break;
1355     case kArchBinarySearchSwitch:
1356       AssembleArchBinarySearchSwitch(instr);
1357       break;
1358     case kArchTableSwitch:
1359       AssembleArchTableSwitch(instr);
1360       break;
1361     case kArchComment:
1362       __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
1363       break;
1364     case kArchAbortCSADcheck:
1365       DCHECK(i.InputRegister(0) == rdx);
1366       {
1367         // We don't actually want to generate a pile of code for this, so just
1368         // claim there is a stack frame, without generating one.
1369         FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
1370         __ Call(isolate()->builtins()->code_handle(Builtin::kAbortCSADcheck),
1371                 RelocInfo::CODE_TARGET);
1372       }
1373       __ int3();
1374       unwinding_info_writer_.MarkBlockWillExit();
1375       break;
1376     case kArchDebugBreak:
1377       __ DebugBreak();
1378       break;
1379     case kArchThrowTerminator:
1380       unwinding_info_writer_.MarkBlockWillExit();
1381       break;
1382     case kArchNop:
1383       // don't emit code for nops.
1384       break;
1385     case kArchDeoptimize: {
1386       DeoptimizationExit* exit =
1387           BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
1388       __ jmp(exit->label());
1389       break;
1390     }
1391     case kArchRet:
1392       AssembleReturn(instr->InputAt(0));
1393       break;
1394     case kArchFramePointer:
1395       __ movq(i.OutputRegister(), rbp);
1396       break;
1397     case kArchParentFramePointer:
1398       if (frame_access_state()->has_frame()) {
1399         __ movq(i.OutputRegister(), Operand(rbp, 0));
1400       } else {
1401         __ movq(i.OutputRegister(), rbp);
1402       }
1403       break;
1404     case kArchStackPointerGreaterThan: {
1405       // Potentially apply an offset to the current stack pointer before the
1406       // comparison to consider the size difference of an optimized frame versus
1407       // the contained unoptimized frames.
1408 
1409       Register lhs_register = rsp;
1410       uint32_t offset;
1411 
1412       if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
1413         lhs_register = kScratchRegister;
1414         __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1415       }
1416 
1417       constexpr size_t kValueIndex = 0;
1418       if (HasAddressingMode(instr)) {
1419         __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1420       } else {
1421         __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1422       }
1423       break;
1424     }
1425     case kArchStackCheckOffset:
1426       __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1427       break;
1428     case kArchTruncateDoubleToI: {
1429       auto result = i.OutputRegister();
1430       auto input = i.InputDoubleRegister(0);
1431       auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
1432           this, result, input, DetermineStubCallMode(),
1433           &unwinding_info_writer_);
1434       // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1435       // use of Cvttsd2siq requires the movl below to avoid sign extension.
1436       __ Cvttsd2siq(result, input);
1437       __ cmpq(result, Immediate(1));
1438       __ j(overflow, ool->entry());
1439       __ bind(ool->exit());
1440       __ movl(result, result);
1441       break;
1442     }
1443     case kArchStoreWithWriteBarrier:  // Fall through.
1444     case kArchAtomicStoreWithWriteBarrier: {
1445       RecordWriteMode mode =
1446           static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1447       Register object = i.InputRegister(0);
1448       size_t index = 0;
1449       Operand operand = i.MemoryOperand(&index);
1450       Register value = i.InputRegister(index);
1451       Register scratch0 = i.TempRegister(0);
1452       Register scratch1 = i.TempRegister(1);
1453       auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
1454                                                    scratch0, scratch1, mode,
1455                                                    DetermineStubCallMode());
1456       if (arch_opcode == kArchStoreWithWriteBarrier) {
1457         EmitTSANAwareStore<std::memory_order_relaxed>(
1458             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
1459             MachineRepresentation::kTagged);
1460       } else {
1461         DCHECK_EQ(arch_opcode, kArchAtomicStoreWithWriteBarrier);
1462         EmitTSANAwareStore<std::memory_order_seq_cst>(
1463             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
1464             MachineRepresentation::kTagged);
1465       }
1466       if (mode > RecordWriteMode::kValueIsPointer) {
1467         __ JumpIfSmi(value, ool->exit());
1468       }
1469       __ CheckPageFlag(object, scratch0,
1470                        MemoryChunk::kPointersFromHereAreInterestingMask,
1471                        not_zero, ool->entry());
1472       __ bind(ool->exit());
1473       break;
1474     }
1475     case kX64MFence:
1476       __ mfence();
1477       break;
1478     case kX64LFence:
1479       __ lfence();
1480       break;
1481     case kArchStackSlot: {
1482       FrameOffset offset =
1483           frame_access_state()->GetFrameOffset(i.InputInt32(0));
1484       Register base = offset.from_stack_pointer() ? rsp : rbp;
1485       __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1486       break;
1487     }
1488     case kIeee754Float64Acos:
1489       ASSEMBLE_IEEE754_UNOP(acos);
1490       break;
1491     case kIeee754Float64Acosh:
1492       ASSEMBLE_IEEE754_UNOP(acosh);
1493       break;
1494     case kIeee754Float64Asin:
1495       ASSEMBLE_IEEE754_UNOP(asin);
1496       break;
1497     case kIeee754Float64Asinh:
1498       ASSEMBLE_IEEE754_UNOP(asinh);
1499       break;
1500     case kIeee754Float64Atan:
1501       ASSEMBLE_IEEE754_UNOP(atan);
1502       break;
1503     case kIeee754Float64Atanh:
1504       ASSEMBLE_IEEE754_UNOP(atanh);
1505       break;
1506     case kIeee754Float64Atan2:
1507       ASSEMBLE_IEEE754_BINOP(atan2);
1508       break;
1509     case kIeee754Float64Cbrt:
1510       ASSEMBLE_IEEE754_UNOP(cbrt);
1511       break;
1512     case kIeee754Float64Cos:
1513       ASSEMBLE_IEEE754_UNOP(cos);
1514       break;
1515     case kIeee754Float64Cosh:
1516       ASSEMBLE_IEEE754_UNOP(cosh);
1517       break;
1518     case kIeee754Float64Exp:
1519       ASSEMBLE_IEEE754_UNOP(exp);
1520       break;
1521     case kIeee754Float64Expm1:
1522       ASSEMBLE_IEEE754_UNOP(expm1);
1523       break;
1524     case kIeee754Float64Log:
1525       ASSEMBLE_IEEE754_UNOP(log);
1526       break;
1527     case kIeee754Float64Log1p:
1528       ASSEMBLE_IEEE754_UNOP(log1p);
1529       break;
1530     case kIeee754Float64Log2:
1531       ASSEMBLE_IEEE754_UNOP(log2);
1532       break;
1533     case kIeee754Float64Log10:
1534       ASSEMBLE_IEEE754_UNOP(log10);
1535       break;
1536     case kIeee754Float64Pow:
1537       ASSEMBLE_IEEE754_BINOP(pow);
1538       break;
1539     case kIeee754Float64Sin:
1540       ASSEMBLE_IEEE754_UNOP(sin);
1541       break;
1542     case kIeee754Float64Sinh:
1543       ASSEMBLE_IEEE754_UNOP(sinh);
1544       break;
1545     case kIeee754Float64Tan:
1546       ASSEMBLE_IEEE754_UNOP(tan);
1547       break;
1548     case kIeee754Float64Tanh:
1549       ASSEMBLE_IEEE754_UNOP(tanh);
1550       break;
1551     case kX64Add32:
1552       ASSEMBLE_BINOP(addl);
1553       break;
1554     case kX64Add:
1555       ASSEMBLE_BINOP(addq);
1556       break;
1557     case kX64Sub32:
1558       ASSEMBLE_BINOP(subl);
1559       break;
1560     case kX64Sub:
1561       ASSEMBLE_BINOP(subq);
1562       break;
1563     case kX64And32:
1564       ASSEMBLE_BINOP(andl);
1565       break;
1566     case kX64And:
1567       ASSEMBLE_BINOP(andq);
1568       break;
1569     case kX64Cmp8:
1570       ASSEMBLE_COMPARE(cmpb);
1571       break;
1572     case kX64Cmp16:
1573       ASSEMBLE_COMPARE(cmpw);
1574       break;
1575     case kX64Cmp32:
1576       ASSEMBLE_COMPARE(cmpl);
1577       break;
1578     case kX64Cmp:
1579       ASSEMBLE_COMPARE(cmpq);
1580       break;
1581     case kX64Test8:
1582       ASSEMBLE_COMPARE(testb);
1583       break;
1584     case kX64Test16:
1585       ASSEMBLE_COMPARE(testw);
1586       break;
1587     case kX64Test32:
1588       ASSEMBLE_COMPARE(testl);
1589       break;
1590     case kX64Test:
1591       ASSEMBLE_COMPARE(testq);
1592       break;
1593     case kX64Imul32:
1594       ASSEMBLE_MULT(imull);
1595       break;
1596     case kX64Imul:
1597       ASSEMBLE_MULT(imulq);
1598       break;
1599     case kX64ImulHigh32:
1600       if (HasRegisterInput(instr, 1)) {
1601         __ imull(i.InputRegister(1));
1602       } else {
1603         __ imull(i.InputOperand(1));
1604       }
1605       break;
1606     case kX64UmulHigh32:
1607       if (HasRegisterInput(instr, 1)) {
1608         __ mull(i.InputRegister(1));
1609       } else {
1610         __ mull(i.InputOperand(1));
1611       }
1612       break;
1613     case kX64Idiv32:
1614       __ cdq();
1615       __ idivl(i.InputRegister(1));
1616       break;
1617     case kX64Idiv:
1618       __ cqo();
1619       __ idivq(i.InputRegister(1));
1620       break;
1621     case kX64Udiv32:
1622       __ xorl(rdx, rdx);
1623       __ divl(i.InputRegister(1));
1624       break;
1625     case kX64Udiv:
1626       __ xorq(rdx, rdx);
1627       __ divq(i.InputRegister(1));
1628       break;
1629     case kX64Not:
1630       ASSEMBLE_UNOP(notq);
1631       break;
1632     case kX64Not32:
1633       ASSEMBLE_UNOP(notl);
1634       break;
1635     case kX64Neg:
1636       ASSEMBLE_UNOP(negq);
1637       break;
1638     case kX64Neg32:
1639       ASSEMBLE_UNOP(negl);
1640       break;
1641     case kX64Or32:
1642       ASSEMBLE_BINOP(orl);
1643       break;
1644     case kX64Or:
1645       ASSEMBLE_BINOP(orq);
1646       break;
1647     case kX64Xor32:
1648       ASSEMBLE_BINOP(xorl);
1649       break;
1650     case kX64Xor:
1651       ASSEMBLE_BINOP(xorq);
1652       break;
1653     case kX64Shl32:
1654       ASSEMBLE_SHIFT(shll, 5);
1655       break;
1656     case kX64Shl:
1657       ASSEMBLE_SHIFT(shlq, 6);
1658       break;
1659     case kX64Shr32:
1660       ASSEMBLE_SHIFT(shrl, 5);
1661       break;
1662     case kX64Shr:
1663       ASSEMBLE_SHIFT(shrq, 6);
1664       break;
1665     case kX64Sar32:
1666       ASSEMBLE_SHIFT(sarl, 5);
1667       break;
1668     case kX64Sar:
1669       ASSEMBLE_SHIFT(sarq, 6);
1670       break;
1671     case kX64Rol32:
1672       ASSEMBLE_SHIFT(roll, 5);
1673       break;
1674     case kX64Rol:
1675       ASSEMBLE_SHIFT(rolq, 6);
1676       break;
1677     case kX64Ror32:
1678       ASSEMBLE_SHIFT(rorl, 5);
1679       break;
1680     case kX64Ror:
1681       ASSEMBLE_SHIFT(rorq, 6);
1682       break;
1683     case kX64Lzcnt:
1684       if (HasRegisterInput(instr, 0)) {
1685         __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1686       } else {
1687         __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1688       }
1689       break;
1690     case kX64Lzcnt32:
1691       if (HasRegisterInput(instr, 0)) {
1692         __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1693       } else {
1694         __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1695       }
1696       break;
1697     case kX64Tzcnt:
1698       if (HasRegisterInput(instr, 0)) {
1699         __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1700       } else {
1701         __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1702       }
1703       break;
1704     case kX64Tzcnt32:
1705       if (HasRegisterInput(instr, 0)) {
1706         __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1707       } else {
1708         __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1709       }
1710       break;
1711     case kX64Popcnt:
1712       if (HasRegisterInput(instr, 0)) {
1713         __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1714       } else {
1715         __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1716       }
1717       break;
1718     case kX64Popcnt32:
1719       if (HasRegisterInput(instr, 0)) {
1720         __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1721       } else {
1722         __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1723       }
1724       break;
1725     case kX64Bswap:
1726       __ bswapq(i.OutputRegister());
1727       break;
1728     case kX64Bswap32:
1729       __ bswapl(i.OutputRegister());
1730       break;
1731     case kSSEFloat32Cmp:
1732       ASSEMBLE_SSE_BINOP(Ucomiss);
1733       break;
1734     case kSSEFloat32Add:
1735       ASSEMBLE_SSE_BINOP(addss);
1736       break;
1737     case kSSEFloat32Sub:
1738       ASSEMBLE_SSE_BINOP(subss);
1739       break;
1740     case kSSEFloat32Mul:
1741       ASSEMBLE_SSE_BINOP(mulss);
1742       break;
1743     case kSSEFloat32Div:
1744       ASSEMBLE_SSE_BINOP(divss);
1745       // Don't delete this mov. It may improve performance on some CPUs,
1746       // when there is a (v)mulss depending on the result.
1747       __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1748       break;
1749     case kSSEFloat32Sqrt:
1750       ASSEMBLE_SSE_UNOP(sqrtss);
1751       break;
1752     case kSSEFloat32ToFloat64:
1753       ASSEMBLE_SSE_UNOP(Cvtss2sd);
1754       break;
1755     case kSSEFloat32Round: {
1756       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1757       RoundingMode const mode =
1758           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1759       __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1760       break;
1761     }
1762     case kSSEFloat32ToInt32:
1763       if (instr->InputAt(0)->IsFPRegister()) {
1764         __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1765       } else {
1766         __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1767       }
1768       break;
1769     case kSSEFloat32ToUint32: {
1770       if (instr->InputAt(0)->IsFPRegister()) {
1771         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1772       } else {
1773         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1774       }
1775       break;
1776     }
1777     case kSSEFloat64Cmp:
1778       ASSEMBLE_SSE_BINOP(Ucomisd);
1779       break;
1780     case kSSEFloat64Add:
1781       ASSEMBLE_SSE_BINOP(addsd);
1782       break;
1783     case kSSEFloat64Sub:
1784       ASSEMBLE_SSE_BINOP(subsd);
1785       break;
1786     case kSSEFloat64Mul:
1787       ASSEMBLE_SSE_BINOP(mulsd);
1788       break;
1789     case kSSEFloat64Div:
1790       ASSEMBLE_SSE_BINOP(divsd);
1791       // Don't delete this mov. It may improve performance on some CPUs,
1792       // when there is a (v)mulsd depending on the result.
1793       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1794       break;
1795     case kSSEFloat64Mod: {
1796       __ AllocateStackSpace(kDoubleSize);
1797       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1798                                                        kDoubleSize);
1799       // Move values to st(0) and st(1).
1800       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1801       __ fld_d(Operand(rsp, 0));
1802       __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1803       __ fld_d(Operand(rsp, 0));
1804       // Loop while fprem isn't done.
1805       Label mod_loop;
1806       __ bind(&mod_loop);
1807       // This instructions traps on all kinds inputs, but we are assuming the
1808       // floating point control word is set to ignore them all.
1809       __ fprem();
1810       // The following 2 instruction implicitly use rax.
1811       __ fnstsw_ax();
1812       if (CpuFeatures::IsSupported(SAHF)) {
1813         CpuFeatureScope sahf_scope(tasm(), SAHF);
1814         __ sahf();
1815       } else {
1816         __ shrl(rax, Immediate(8));
1817         __ andl(rax, Immediate(0xFF));
1818         __ pushq(rax);
1819         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1820                                                          kSystemPointerSize);
1821         __ popfq();
1822         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1823                                                          -kSystemPointerSize);
1824       }
1825       __ j(parity_even, &mod_loop);
1826       // Move output to stack and clean up.
1827       __ fstp(1);
1828       __ fstp_d(Operand(rsp, 0));
1829       __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1830       __ addq(rsp, Immediate(kDoubleSize));
1831       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1832                                                        -kDoubleSize);
1833       break;
1834     }
1835     case kSSEFloat32Max: {
1836       Label compare_swap, done_compare;
1837       if (instr->InputAt(1)->IsFPRegister()) {
1838         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1839       } else {
1840         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1841       }
1842       auto ool =
1843           zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1844       __ j(parity_even, ool->entry());
1845       __ j(above, &done_compare, Label::kNear);
1846       __ j(below, &compare_swap, Label::kNear);
1847       __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1848       __ testl(kScratchRegister, Immediate(1));
1849       __ j(zero, &done_compare, Label::kNear);
1850       __ bind(&compare_swap);
1851       if (instr->InputAt(1)->IsFPRegister()) {
1852         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1853       } else {
1854         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1855       }
1856       __ bind(&done_compare);
1857       __ bind(ool->exit());
1858       break;
1859     }
1860     case kSSEFloat32Min: {
1861       Label compare_swap, done_compare;
1862       if (instr->InputAt(1)->IsFPRegister()) {
1863         __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1864       } else {
1865         __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1866       }
1867       auto ool =
1868           zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1869       __ j(parity_even, ool->entry());
1870       __ j(below, &done_compare, Label::kNear);
1871       __ j(above, &compare_swap, Label::kNear);
1872       if (instr->InputAt(1)->IsFPRegister()) {
1873         __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1874       } else {
1875         __ Movss(kScratchDoubleReg, i.InputOperand(1));
1876         __ Movmskps(kScratchRegister, kScratchDoubleReg);
1877       }
1878       __ testl(kScratchRegister, Immediate(1));
1879       __ j(zero, &done_compare, Label::kNear);
1880       __ bind(&compare_swap);
1881       if (instr->InputAt(1)->IsFPRegister()) {
1882         __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1883       } else {
1884         __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1885       }
1886       __ bind(&done_compare);
1887       __ bind(ool->exit());
1888       break;
1889     }
1890     case kSSEFloat64Max: {
1891       Label compare_swap, done_compare;
1892       if (instr->InputAt(1)->IsFPRegister()) {
1893         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1894       } else {
1895         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1896       }
1897       auto ool =
1898           zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1899       __ j(parity_even, ool->entry());
1900       __ j(above, &done_compare, Label::kNear);
1901       __ j(below, &compare_swap, Label::kNear);
1902       __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1903       __ testl(kScratchRegister, Immediate(1));
1904       __ j(zero, &done_compare, Label::kNear);
1905       __ bind(&compare_swap);
1906       if (instr->InputAt(1)->IsFPRegister()) {
1907         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1908       } else {
1909         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1910       }
1911       __ bind(&done_compare);
1912       __ bind(ool->exit());
1913       break;
1914     }
1915     case kSSEFloat64Min: {
1916       Label compare_swap, done_compare;
1917       if (instr->InputAt(1)->IsFPRegister()) {
1918         __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1919       } else {
1920         __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1921       }
1922       auto ool =
1923           zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1924       __ j(parity_even, ool->entry());
1925       __ j(below, &done_compare, Label::kNear);
1926       __ j(above, &compare_swap, Label::kNear);
1927       if (instr->InputAt(1)->IsFPRegister()) {
1928         __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1929       } else {
1930         __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1931         __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1932       }
1933       __ testl(kScratchRegister, Immediate(1));
1934       __ j(zero, &done_compare, Label::kNear);
1935       __ bind(&compare_swap);
1936       if (instr->InputAt(1)->IsFPRegister()) {
1937         __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1938       } else {
1939         __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1940       }
1941       __ bind(&done_compare);
1942       __ bind(ool->exit());
1943       break;
1944     }
1945     case kSSEFloat64Sqrt:
1946       ASSEMBLE_SSE_UNOP(Sqrtsd);
1947       break;
1948     case kSSEFloat64Round: {
1949       CpuFeatureScope sse_scope(tasm(), SSE4_1);
1950       RoundingMode const mode =
1951           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1952       __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1953       break;
1954     }
1955     case kSSEFloat64ToFloat32:
1956       ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1957       break;
1958     case kSSEFloat64ToInt32:
1959       if (instr->InputAt(0)->IsFPRegister()) {
1960         __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1961       } else {
1962         __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1963       }
1964       break;
1965     case kSSEFloat64ToUint32: {
1966       if (instr->InputAt(0)->IsFPRegister()) {
1967         __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1968       } else {
1969         __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1970       }
1971       if (MiscField::decode(instr->opcode())) {
1972         __ AssertZeroExtended(i.OutputRegister());
1973       }
1974       break;
1975     }
1976     case kSSEFloat32ToInt64:
1977       if (instr->InputAt(0)->IsFPRegister()) {
1978         __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1979       } else {
1980         __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1981       }
1982       if (instr->OutputCount() > 1) {
1983         __ Move(i.OutputRegister(1), 1);
1984         Label done;
1985         Label fail;
1986         __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1987         if (instr->InputAt(0)->IsFPRegister()) {
1988           __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1989         } else {
1990           __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1991         }
1992         // If the input is NaN, then the conversion fails.
1993         __ j(parity_even, &fail, Label::kNear);
1994         // If the input is INT64_MIN, then the conversion succeeds.
1995         __ j(equal, &done, Label::kNear);
1996         __ cmpq(i.OutputRegister(0), Immediate(1));
1997         // If the conversion results in INT64_MIN, but the input was not
1998         // INT64_MIN, then the conversion fails.
1999         __ j(no_overflow, &done, Label::kNear);
2000         __ bind(&fail);
2001         __ Move(i.OutputRegister(1), 0);
2002         __ bind(&done);
2003       }
2004       break;
2005     case kSSEFloat64ToInt64:
2006       if (instr->InputAt(0)->IsFPRegister()) {
2007         __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
2008       } else {
2009         __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
2010       }
2011       if (instr->OutputCount() > 1) {
2012         __ Move(i.OutputRegister(1), 1);
2013         Label done;
2014         Label fail;
2015         __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
2016         if (instr->InputAt(0)->IsFPRegister()) {
2017           __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
2018         } else {
2019           __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
2020         }
2021         // If the input is NaN, then the conversion fails.
2022         __ j(parity_even, &fail, Label::kNear);
2023         // If the input is INT64_MIN, then the conversion succeeds.
2024         __ j(equal, &done, Label::kNear);
2025         __ cmpq(i.OutputRegister(0), Immediate(1));
2026         // If the conversion results in INT64_MIN, but the input was not
2027         // INT64_MIN, then the conversion fails.
2028         __ j(no_overflow, &done, Label::kNear);
2029         __ bind(&fail);
2030         __ Move(i.OutputRegister(1), 0);
2031         __ bind(&done);
2032       }
2033       break;
2034     case kSSEFloat32ToUint64: {
2035       Label fail;
2036       if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2037       if (instr->InputAt(0)->IsFPRegister()) {
2038         __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2039       } else {
2040         __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2041       }
2042       if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2043       __ bind(&fail);
2044       break;
2045     }
2046     case kSSEFloat64ToUint64: {
2047       Label fail;
2048       if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2049       if (instr->InputAt(0)->IsFPRegister()) {
2050         __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2051       } else {
2052         __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2053       }
2054       if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2055       __ bind(&fail);
2056       break;
2057     }
2058     case kSSEInt32ToFloat64:
2059       if (HasRegisterInput(instr, 0)) {
2060         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2061       } else {
2062         __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2063       }
2064       break;
2065     case kSSEInt32ToFloat32:
2066       if (HasRegisterInput(instr, 0)) {
2067         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2068       } else {
2069         __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2070       }
2071       break;
2072     case kSSEInt64ToFloat32:
2073       if (HasRegisterInput(instr, 0)) {
2074         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2075       } else {
2076         __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2077       }
2078       break;
2079     case kSSEInt64ToFloat64:
2080       if (HasRegisterInput(instr, 0)) {
2081         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2082       } else {
2083         __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2084       }
2085       break;
2086     case kSSEUint64ToFloat32:
2087       if (HasRegisterInput(instr, 0)) {
2088         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2089       } else {
2090         __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2091       }
2092       break;
2093     case kSSEUint64ToFloat64:
2094       if (HasRegisterInput(instr, 0)) {
2095         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2096       } else {
2097         __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2098       }
2099       break;
2100     case kSSEUint32ToFloat64:
2101       if (HasRegisterInput(instr, 0)) {
2102         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2103       } else {
2104         __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2105       }
2106       break;
2107     case kSSEUint32ToFloat32:
2108       if (HasRegisterInput(instr, 0)) {
2109         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2110       } else {
2111         __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2112       }
2113       break;
2114     case kSSEFloat64ExtractLowWord32:
2115       if (instr->InputAt(0)->IsFPStackSlot()) {
2116         __ movl(i.OutputRegister(), i.InputOperand(0));
2117       } else {
2118         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2119       }
2120       break;
2121     case kSSEFloat64ExtractHighWord32:
2122       if (instr->InputAt(0)->IsFPStackSlot()) {
2123         __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
2124       } else {
2125         __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
2126       }
2127       break;
2128     case kSSEFloat64InsertLowWord32:
2129       if (HasRegisterInput(instr, 1)) {
2130         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
2131       } else {
2132         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
2133       }
2134       break;
2135     case kSSEFloat64InsertHighWord32:
2136       if (HasRegisterInput(instr, 1)) {
2137         __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
2138       } else {
2139         __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
2140       }
2141       break;
2142     case kSSEFloat64LoadLowWord32:
2143       if (HasRegisterInput(instr, 0)) {
2144         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2145       } else {
2146         __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
2147       }
2148       break;
2149     case kAVXFloat32Cmp: {
2150       CpuFeatureScope avx_scope(tasm(), AVX);
2151       if (instr->InputAt(1)->IsFPRegister()) {
2152         __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2153       } else {
2154         __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
2155       }
2156       break;
2157     }
2158     case kAVXFloat32Add:
2159       ASSEMBLE_AVX_BINOP(vaddss);
2160       break;
2161     case kAVXFloat32Sub:
2162       ASSEMBLE_AVX_BINOP(vsubss);
2163       break;
2164     case kAVXFloat32Mul:
2165       ASSEMBLE_AVX_BINOP(vmulss);
2166       break;
2167     case kAVXFloat32Div:
2168       ASSEMBLE_AVX_BINOP(vdivss);
2169       // Don't delete this mov. It may improve performance on some CPUs,
2170       // when there is a (v)mulss depending on the result.
2171       __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2172       break;
2173     case kAVXFloat64Cmp: {
2174       CpuFeatureScope avx_scope(tasm(), AVX);
2175       if (instr->InputAt(1)->IsFPRegister()) {
2176         __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2177       } else {
2178         __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
2179       }
2180       break;
2181     }
2182     case kAVXFloat64Add:
2183       ASSEMBLE_AVX_BINOP(vaddsd);
2184       break;
2185     case kAVXFloat64Sub:
2186       ASSEMBLE_AVX_BINOP(vsubsd);
2187       break;
2188     case kAVXFloat64Mul:
2189       ASSEMBLE_AVX_BINOP(vmulsd);
2190       break;
2191     case kAVXFloat64Div:
2192       ASSEMBLE_AVX_BINOP(vdivsd);
2193       // Don't delete this mov. It may improve performance on some CPUs,
2194       // when there is a (v)mulsd depending on the result.
2195       __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2196       break;
2197     case kX64Float32Abs: {
2198       __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2199                kScratchRegister);
2200       break;
2201     }
2202     case kX64Float32Neg: {
2203       __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2204                kScratchRegister);
2205       break;
2206     }
2207     case kX64F64x2Abs:
2208     case kX64Float64Abs: {
2209       __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2210                kScratchRegister);
2211       break;
2212     }
2213     case kX64F64x2Neg:
2214     case kX64Float64Neg: {
2215       __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2216                kScratchRegister);
2217       break;
2218     }
2219     case kSSEFloat64SilenceNaN:
2220       __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
2221       __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
2222       break;
2223     case kX64Movsxbl:
2224       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2225       ASSEMBLE_MOVX(movsxbl);
2226       __ AssertZeroExtended(i.OutputRegister());
2227       break;
2228     case kX64Movzxbl:
2229       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2230       ASSEMBLE_MOVX(movzxbl);
2231       __ AssertZeroExtended(i.OutputRegister());
2232       break;
2233     case kX64Movsxbq:
2234       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2235       ASSEMBLE_MOVX(movsxbq);
2236       break;
2237     case kX64Movzxbq:
2238       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2239       ASSEMBLE_MOVX(movzxbq);
2240       __ AssertZeroExtended(i.OutputRegister());
2241       break;
2242     case kX64Movb: {
2243       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2244       size_t index = 0;
2245       Operand operand = i.MemoryOperand(&index);
2246       if (HasImmediateInput(instr, index)) {
2247         Immediate value(Immediate(i.InputInt8(index)));
2248         EmitTSANAwareStore<std::memory_order_relaxed>(
2249             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2250             MachineRepresentation::kWord8);
2251       } else {
2252         Register value(i.InputRegister(index));
2253         EmitTSANAwareStore<std::memory_order_relaxed>(
2254             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2255             MachineRepresentation::kWord8);
2256       }
2257       break;
2258     }
2259     case kX64Movsxwl:
2260       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2261       ASSEMBLE_MOVX(movsxwl);
2262       __ AssertZeroExtended(i.OutputRegister());
2263       break;
2264     case kX64Movzxwl:
2265       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2266       ASSEMBLE_MOVX(movzxwl);
2267       __ AssertZeroExtended(i.OutputRegister());
2268       break;
2269     case kX64Movsxwq:
2270       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2271       ASSEMBLE_MOVX(movsxwq);
2272       break;
2273     case kX64Movzxwq:
2274       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2275       ASSEMBLE_MOVX(movzxwq);
2276       __ AssertZeroExtended(i.OutputRegister());
2277       break;
2278     case kX64Movw: {
2279       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2280       size_t index = 0;
2281       Operand operand = i.MemoryOperand(&index);
2282       if (HasImmediateInput(instr, index)) {
2283         Immediate value(Immediate(i.InputInt16(index)));
2284         EmitTSANAwareStore<std::memory_order_relaxed>(
2285             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2286             MachineRepresentation::kWord16);
2287       } else {
2288         Register value(i.InputRegister(index));
2289         EmitTSANAwareStore<std::memory_order_relaxed>(
2290             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2291             MachineRepresentation::kWord16);
2292       }
2293       break;
2294     }
2295     case kX64Movl:
2296       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2297       if (instr->HasOutput()) {
2298         if (HasAddressingMode(instr)) {
2299           Operand address(i.MemoryOperand());
2300           __ movl(i.OutputRegister(), address);
2301           EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2302                                          DetermineStubCallMode(), kInt32Size);
2303         } else {
2304           if (HasRegisterInput(instr, 0)) {
2305             __ movl(i.OutputRegister(), i.InputRegister(0));
2306           } else {
2307             __ movl(i.OutputRegister(), i.InputOperand(0));
2308           }
2309         }
2310         __ AssertZeroExtended(i.OutputRegister());
2311       } else {
2312         size_t index = 0;
2313         Operand operand = i.MemoryOperand(&index);
2314         if (HasImmediateInput(instr, index)) {
2315           Immediate value(i.InputImmediate(index));
2316           EmitTSANAwareStore<std::memory_order_relaxed>(
2317               zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2318               MachineRepresentation::kWord32);
2319         } else {
2320           Register value(i.InputRegister(index));
2321           EmitTSANAwareStore<std::memory_order_relaxed>(
2322               zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2323               MachineRepresentation::kWord32);
2324         }
2325       }
2326       break;
2327     case kX64Movsxlq:
2328       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2329       ASSEMBLE_MOVX(movsxlq);
2330       break;
2331     case kX64MovqDecompressTaggedSigned: {
2332       CHECK(instr->HasOutput());
2333       Operand address(i.MemoryOperand());
2334       __ DecompressTaggedSigned(i.OutputRegister(), address);
2335       EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2336                                      DetermineStubCallMode(), kTaggedSize);
2337       break;
2338     }
2339     case kX64MovqDecompressTaggedPointer: {
2340       CHECK(instr->HasOutput());
2341       Operand address(i.MemoryOperand());
2342       __ DecompressTaggedPointer(i.OutputRegister(), address);
2343       EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2344                                      DetermineStubCallMode(), kTaggedSize);
2345       break;
2346     }
2347     case kX64MovqDecompressAnyTagged: {
2348       CHECK(instr->HasOutput());
2349       Operand address(i.MemoryOperand());
2350       __ DecompressAnyTagged(i.OutputRegister(), address);
2351       EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2352                                      DetermineStubCallMode(), kTaggedSize);
2353       break;
2354     }
2355     case kX64MovqCompressTagged: {
2356       CHECK(!instr->HasOutput());
2357       size_t index = 0;
2358       Operand operand = i.MemoryOperand(&index);
2359       if (HasImmediateInput(instr, index)) {
2360         Immediate value(i.InputImmediate(index));
2361         EmitTSANAwareStore<std::memory_order_relaxed>(
2362             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2363             MachineRepresentation::kTagged);
2364       } else {
2365         Register value(i.InputRegister(index));
2366         EmitTSANAwareStore<std::memory_order_relaxed>(
2367             zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2368             MachineRepresentation::kTagged);
2369       }
2370       break;
2371     }
2372     case kX64Movq:
2373       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2374       if (instr->HasOutput()) {
2375         Operand address(i.MemoryOperand());
2376         __ movq(i.OutputRegister(), address);
2377         EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2378                                        DetermineStubCallMode(), kInt64Size);
2379       } else {
2380         size_t index = 0;
2381         Operand operand = i.MemoryOperand(&index);
2382         if (HasImmediateInput(instr, index)) {
2383           Immediate value(i.InputImmediate(index));
2384           EmitTSANAwareStore<std::memory_order_relaxed>(
2385               zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2386               MachineRepresentation::kWord64);
2387         } else {
2388           Register value(i.InputRegister(index));
2389           EmitTSANAwareStore<std::memory_order_relaxed>(
2390               zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2391               MachineRepresentation::kWord64);
2392         }
2393       }
2394       break;
2395     case kX64Movss:
2396       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2397       if (instr->HasOutput()) {
2398         __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
2399       } else {
2400         size_t index = 0;
2401         Operand operand = i.MemoryOperand(&index);
2402         __ Movss(operand, i.InputDoubleRegister(index));
2403       }
2404       break;
2405     case kX64Movsd: {
2406       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2407       if (instr->HasOutput()) {
2408         __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2409       } else {
2410         size_t index = 0;
2411         Operand operand = i.MemoryOperand(&index);
2412         __ Movsd(operand, i.InputDoubleRegister(index));
2413       }
2414       break;
2415     }
2416     case kX64Movdqu: {
2417       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2418       if (instr->HasOutput()) {
2419         __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2420       } else {
2421         size_t index = 0;
2422         Operand operand = i.MemoryOperand(&index);
2423         __ Movdqu(operand, i.InputSimd128Register(index));
2424       }
2425       break;
2426     }
2427     case kX64BitcastFI:
2428       if (instr->InputAt(0)->IsFPStackSlot()) {
2429         __ movl(i.OutputRegister(), i.InputOperand(0));
2430       } else {
2431         __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2432       }
2433       break;
2434     case kX64BitcastDL:
2435       if (instr->InputAt(0)->IsFPStackSlot()) {
2436         __ movq(i.OutputRegister(), i.InputOperand(0));
2437       } else {
2438         __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2439       }
2440       break;
2441     case kX64BitcastIF:
2442       if (HasRegisterInput(instr, 0)) {
2443         __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2444       } else {
2445         __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
2446       }
2447       break;
2448     case kX64BitcastLD:
2449       if (HasRegisterInput(instr, 0)) {
2450         __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2451       } else {
2452         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2453       }
2454       break;
2455     case kX64Lea32: {
2456       AddressingMode mode = AddressingModeField::decode(instr->opcode());
2457       // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2458       // and addressing mode just happens to work out. The "addl"/"subl" forms
2459       // in these cases are faster based on measurements.
2460       if (i.InputRegister(0) == i.OutputRegister()) {
2461         if (mode == kMode_MRI) {
2462           int32_t constant_summand = i.InputInt32(1);
2463           DCHECK_NE(0, constant_summand);
2464           if (constant_summand > 0) {
2465             __ addl(i.OutputRegister(), Immediate(constant_summand));
2466           } else {
2467             __ subl(i.OutputRegister(),
2468                     Immediate(base::NegateWithWraparound(constant_summand)));
2469           }
2470         } else if (mode == kMode_MR1) {
2471           if (i.InputRegister(1) == i.OutputRegister()) {
2472             __ shll(i.OutputRegister(), Immediate(1));
2473           } else {
2474             __ addl(i.OutputRegister(), i.InputRegister(1));
2475           }
2476         } else if (mode == kMode_M2) {
2477           __ shll(i.OutputRegister(), Immediate(1));
2478         } else if (mode == kMode_M4) {
2479           __ shll(i.OutputRegister(), Immediate(2));
2480         } else if (mode == kMode_M8) {
2481           __ shll(i.OutputRegister(), Immediate(3));
2482         } else {
2483           __ leal(i.OutputRegister(), i.MemoryOperand());
2484         }
2485       } else if (mode == kMode_MR1 &&
2486                  i.InputRegister(1) == i.OutputRegister()) {
2487         __ addl(i.OutputRegister(), i.InputRegister(0));
2488       } else {
2489         __ leal(i.OutputRegister(), i.MemoryOperand());
2490       }
2491       __ AssertZeroExtended(i.OutputRegister());
2492       break;
2493     }
2494     case kX64Lea: {
2495       AddressingMode mode = AddressingModeField::decode(instr->opcode());
2496       // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2497       // and addressing mode just happens to work out. The "addq"/"subq" forms
2498       // in these cases are faster based on measurements.
2499       if (i.InputRegister(0) == i.OutputRegister()) {
2500         if (mode == kMode_MRI) {
2501           int32_t constant_summand = i.InputInt32(1);
2502           if (constant_summand > 0) {
2503             __ addq(i.OutputRegister(), Immediate(constant_summand));
2504           } else if (constant_summand < 0) {
2505             __ subq(i.OutputRegister(), Immediate(-constant_summand));
2506           }
2507         } else if (mode == kMode_MR1) {
2508           if (i.InputRegister(1) == i.OutputRegister()) {
2509             __ shlq(i.OutputRegister(), Immediate(1));
2510           } else {
2511             __ addq(i.OutputRegister(), i.InputRegister(1));
2512           }
2513         } else if (mode == kMode_M2) {
2514           __ shlq(i.OutputRegister(), Immediate(1));
2515         } else if (mode == kMode_M4) {
2516           __ shlq(i.OutputRegister(), Immediate(2));
2517         } else if (mode == kMode_M8) {
2518           __ shlq(i.OutputRegister(), Immediate(3));
2519         } else {
2520           __ leaq(i.OutputRegister(), i.MemoryOperand());
2521         }
2522       } else if (mode == kMode_MR1 &&
2523                  i.InputRegister(1) == i.OutputRegister()) {
2524         __ addq(i.OutputRegister(), i.InputRegister(0));
2525       } else {
2526         __ leaq(i.OutputRegister(), i.MemoryOperand());
2527       }
2528       break;
2529     }
2530     case kX64Dec32:
2531       __ decl(i.OutputRegister());
2532       break;
2533     case kX64Inc32:
2534       __ incl(i.OutputRegister());
2535       break;
2536     case kX64Push: {
2537       int stack_decrement = i.InputInt32(0);
2538       int slots = stack_decrement / kSystemPointerSize;
2539       // Whenever codegen uses pushq, we need to check if stack_decrement
2540       // contains any extra padding and adjust the stack before the pushq.
2541       if (HasImmediateInput(instr, 1)) {
2542         __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2543         __ pushq(i.InputImmediate(1));
2544       } else if (HasAddressingMode(instr)) {
2545         __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2546         size_t index = 1;
2547         Operand operand = i.MemoryOperand(&index);
2548         __ pushq(operand);
2549       } else {
2550         InstructionOperand* input = instr->InputAt(1);
2551         if (input->IsRegister()) {
2552           __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2553           __ pushq(i.InputRegister(1));
2554         } else if (input->IsFloatRegister() || input->IsDoubleRegister()) {
2555           DCHECK_GE(stack_decrement, kSystemPointerSize);
2556           __ AllocateStackSpace(stack_decrement);
2557           __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
2558         } else if (input->IsSimd128Register()) {
2559           DCHECK_GE(stack_decrement, kSimd128Size);
2560           __ AllocateStackSpace(stack_decrement);
2561           // TODO(bbudge) Use Movaps when slots are aligned.
2562           __ Movups(Operand(rsp, 0), i.InputSimd128Register(1));
2563         } else if (input->IsStackSlot() || input->IsFloatStackSlot() ||
2564                    input->IsDoubleStackSlot()) {
2565           __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2566           __ pushq(i.InputOperand(1));
2567         } else {
2568           DCHECK(input->IsSimd128StackSlot());
2569           DCHECK_GE(stack_decrement, kSimd128Size);
2570           // TODO(bbudge) Use Movaps when slots are aligned.
2571           __ Movups(kScratchDoubleReg, i.InputOperand(1));
2572           __ AllocateStackSpace(stack_decrement);
2573           __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2574         }
2575       }
2576       frame_access_state()->IncreaseSPDelta(slots);
2577       unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2578                                                        stack_decrement);
2579       break;
2580     }
2581     case kX64Poke: {
2582       int slot = MiscField::decode(instr->opcode());
2583       if (HasImmediateInput(instr, 0)) {
2584         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2585       } else if (instr->InputAt(0)->IsFPRegister()) {
2586         LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
2587         if (op->representation() == MachineRepresentation::kFloat64) {
2588           __ Movsd(Operand(rsp, slot * kSystemPointerSize),
2589                    i.InputDoubleRegister(0));
2590         } else {
2591           DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2592           __ Movss(Operand(rsp, slot * kSystemPointerSize),
2593                    i.InputFloatRegister(0));
2594         }
2595       } else {
2596         __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2597       }
2598       break;
2599     }
2600     case kX64Peek: {
2601       int reverse_slot = i.InputInt32(0);
2602       int offset =
2603           FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2604       if (instr->OutputAt(0)->IsFPRegister()) {
2605         LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2606         if (op->representation() == MachineRepresentation::kFloat64) {
2607           __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2608         } else if (op->representation() == MachineRepresentation::kFloat32) {
2609           __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2610         } else {
2611           DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
2612           __ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
2613         }
2614       } else {
2615         __ movq(i.OutputRegister(), Operand(rbp, offset));
2616       }
2617       break;
2618     }
2619     case kX64F64x2Splat: {
2620       XMMRegister dst = i.OutputSimd128Register();
2621       if (instr->InputAt(0)->IsFPRegister()) {
2622         __ Movddup(dst, i.InputDoubleRegister(0));
2623       } else {
2624         __ Movddup(dst, i.InputOperand(0));
2625       }
2626       break;
2627     }
2628     case kX64F64x2ExtractLane: {
2629       __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2630                           i.InputUint8(1));
2631       break;
2632     }
2633     case kX64F64x2ReplaceLane: {
2634       __ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2635                           i.InputDoubleRegister(2), i.InputInt8(1));
2636       break;
2637     }
2638     case kX64F64x2Sqrt: {
2639       __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2640       break;
2641     }
2642     case kX64F64x2Add: {
2643       ASSEMBLE_SIMD_BINOP(addpd);
2644       break;
2645     }
2646     case kX64F64x2Sub: {
2647       ASSEMBLE_SIMD_BINOP(subpd);
2648       break;
2649     }
2650     case kX64F64x2Mul: {
2651       ASSEMBLE_SIMD_BINOP(mulpd);
2652       break;
2653     }
2654     case kX64F64x2Div: {
2655       ASSEMBLE_SIMD_BINOP(divpd);
2656       break;
2657     }
2658     case kX64F64x2Min: {
2659       // Avoids a move in no-AVX case if dst = src0.
2660       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2661       __ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2662                   i.InputSimd128Register(1), kScratchDoubleReg);
2663       break;
2664     }
2665     case kX64F64x2Max: {
2666       // Avoids a move in no-AVX case if dst = src0.
2667       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2668       __ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2669                   i.InputSimd128Register(1), kScratchDoubleReg);
2670       break;
2671     }
2672     case kX64F64x2Eq: {
2673       ASSEMBLE_SIMD_BINOP(cmpeqpd);
2674       break;
2675     }
2676     case kX64F64x2Ne: {
2677       ASSEMBLE_SIMD_BINOP(cmpneqpd);
2678       break;
2679     }
2680     case kX64F64x2Lt: {
2681       ASSEMBLE_SIMD_BINOP(cmpltpd);
2682       break;
2683     }
2684     case kX64F64x2Le: {
2685       ASSEMBLE_SIMD_BINOP(cmplepd);
2686       break;
2687     }
2688     case kX64F64x2Qfma: {
2689       __ F64x2Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2690                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2691                    kScratchDoubleReg);
2692       break;
2693     }
2694     case kX64F64x2Qfms: {
2695       __ F64x2Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2696                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2697                    kScratchDoubleReg);
2698       break;
2699     }
2700     case kX64F64x2ConvertLowI32x4S: {
2701       __ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2702       break;
2703     }
2704     case kX64F64x2ConvertLowI32x4U: {
2705       __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
2706                                i.InputSimd128Register(0), kScratchRegister);
2707       break;
2708     }
2709     case kX64F64x2PromoteLowF32x4: {
2710       if (HasAddressingMode(instr)) {
2711         EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2712         __ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
2713       } else {
2714         __ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2715       }
2716       break;
2717     }
2718     case kX64F32x4DemoteF64x2Zero: {
2719       __ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2720       break;
2721     }
2722     case kX64I32x4TruncSatF64x2SZero: {
2723       __ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
2724                                  i.InputSimd128Register(0), kScratchDoubleReg,
2725                                  kScratchRegister);
2726       break;
2727     }
2728     case kX64I32x4TruncSatF64x2UZero: {
2729       __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
2730                                  i.InputSimd128Register(0), kScratchDoubleReg,
2731                                  kScratchRegister);
2732       break;
2733     }
2734     case kX64F32x4Splat: {
2735       __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
2736       break;
2737     }
2738     case kX64F32x4ExtractLane: {
2739       __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2740                           i.InputUint8(1));
2741       break;
2742     }
2743     case kX64F32x4ReplaceLane: {
2744       // The insertps instruction uses imm8[5:4] to indicate the lane
2745       // that needs to be replaced.
2746       byte select = i.InputInt8(1) << 4 & 0x30;
2747       if (instr->InputAt(2)->IsFPRegister()) {
2748         __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2749                     select);
2750       } else {
2751         __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2752       }
2753       break;
2754     }
2755     case kX64F32x4SConvertI32x4: {
2756       __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2757       break;
2758     }
2759     case kX64F32x4UConvertI32x4: {
2760       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2761       DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2762       XMMRegister dst = i.OutputSimd128Register();
2763       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);  // zeros
2764       __ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55});  // get lo 16 bits
2765       __ Psubd(dst, kScratchDoubleReg);                   // get hi 16 bits
2766       __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);  // convert lo exactly
2767       __ Psrld(dst, byte{1});            // divide by 2 to get in unsigned range
2768       __ Cvtdq2ps(dst, dst);             // convert hi exactly
2769       __ Addps(dst, dst);                // double hi, exactly
2770       __ Addps(dst, kScratchDoubleReg);  // add hi and lo, may round.
2771       break;
2772     }
2773     case kX64F32x4Abs: {
2774       XMMRegister dst = i.OutputSimd128Register();
2775       XMMRegister src = i.InputSimd128Register(0);
2776       if (dst == src) {
2777         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2778         __ Psrld(kScratchDoubleReg, byte{1});
2779         __ Andps(dst, kScratchDoubleReg);
2780       } else {
2781         __ Pcmpeqd(dst, dst);
2782         __ Psrld(dst, byte{1});
2783         __ Andps(dst, src);
2784       }
2785       break;
2786     }
2787     case kX64F32x4Neg: {
2788       XMMRegister dst = i.OutputSimd128Register();
2789       XMMRegister src = i.InputSimd128Register(0);
2790       if (dst == src) {
2791         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2792         __ Pslld(kScratchDoubleReg, byte{31});
2793         __ Xorps(dst, kScratchDoubleReg);
2794       } else {
2795         __ Pcmpeqd(dst, dst);
2796         __ Pslld(dst, byte{31});
2797         __ Xorps(dst, src);
2798       }
2799       break;
2800     }
2801     case kX64F32x4Sqrt: {
2802       __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2803       break;
2804     }
2805     case kX64F32x4RecipApprox: {
2806       __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2807       break;
2808     }
2809     case kX64F32x4RecipSqrtApprox: {
2810       __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2811       break;
2812     }
2813     case kX64F32x4Add: {
2814       ASSEMBLE_SIMD_BINOP(addps);
2815       break;
2816     }
2817     case kX64F32x4Sub: {
2818       ASSEMBLE_SIMD_BINOP(subps);
2819       break;
2820     }
2821     case kX64F32x4Mul: {
2822       ASSEMBLE_SIMD_BINOP(mulps);
2823       break;
2824     }
2825     case kX64F32x4Div: {
2826       ASSEMBLE_SIMD_BINOP(divps);
2827       break;
2828     }
2829     case kX64F32x4Min: {
2830       __ F32x4Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2831                   i.InputSimd128Register(1), kScratchDoubleReg);
2832       break;
2833     }
2834     case kX64F32x4Max: {
2835       __ F32x4Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2836                   i.InputSimd128Register(1), kScratchDoubleReg);
2837       break;
2838     }
2839     case kX64F32x4Eq: {
2840       ASSEMBLE_SIMD_BINOP(cmpeqps);
2841       break;
2842     }
2843     case kX64F32x4Ne: {
2844       ASSEMBLE_SIMD_BINOP(cmpneqps);
2845       break;
2846     }
2847     case kX64F32x4Lt: {
2848       ASSEMBLE_SIMD_BINOP(cmpltps);
2849       break;
2850     }
2851     case kX64F32x4Le: {
2852       ASSEMBLE_SIMD_BINOP(cmpleps);
2853       break;
2854     }
2855     case kX64F32x4Qfma: {
2856       __ F32x4Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2857                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2858                    kScratchDoubleReg);
2859       break;
2860     }
2861     case kX64F32x4Qfms: {
2862       __ F32x4Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2863                    i.InputSimd128Register(1), i.InputSimd128Register(2),
2864                    kScratchDoubleReg);
2865       break;
2866     }
2867     case kX64F32x4Pmin: {
2868       ASSEMBLE_SIMD_BINOP(minps);
2869       break;
2870     }
2871     case kX64F32x4Pmax: {
2872       ASSEMBLE_SIMD_BINOP(maxps);
2873       break;
2874     }
2875     case kX64F32x4Round: {
2876       RoundingMode const mode =
2877           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2878       __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
2879       break;
2880     }
2881     case kX64F64x2Round: {
2882       RoundingMode const mode =
2883           static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2884       __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
2885       break;
2886     }
2887     case kX64F64x2Pmin: {
2888       ASSEMBLE_SIMD_BINOP(minpd);
2889       break;
2890     }
2891     case kX64F64x2Pmax: {
2892       ASSEMBLE_SIMD_BINOP(maxpd);
2893       break;
2894     }
2895     case kX64I64x2Splat: {
2896       XMMRegister dst = i.OutputSimd128Register();
2897       if (HasRegisterInput(instr, 0)) {
2898         __ Movq(dst, i.InputRegister(0));
2899         __ Movddup(dst, dst);
2900       } else {
2901         __ Movddup(dst, i.InputOperand(0));
2902       }
2903       break;
2904     }
2905     case kX64I64x2ExtractLane: {
2906       __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2907       break;
2908     }
2909     case kX64I64x2Abs: {
2910       __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0),
2911                   kScratchDoubleReg);
2912       break;
2913     }
2914     case kX64I64x2Neg: {
2915       __ I64x2Neg(i.OutputSimd128Register(), i.InputSimd128Register(0),
2916                   kScratchDoubleReg);
2917       break;
2918     }
2919     case kX64I64x2BitMask: {
2920       __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
2921       break;
2922     }
2923     case kX64I64x2Shl: {
2924       // Take shift value modulo 2^6.
2925       ASSEMBLE_SIMD_SHIFT(psllq, 6);
2926       break;
2927     }
2928     case kX64I64x2ShrS: {
2929       // TODO(zhin): there is vpsraq but requires AVX512
2930       XMMRegister dst = i.OutputSimd128Register();
2931       XMMRegister src = i.InputSimd128Register(0);
2932       if (HasImmediateInput(instr, 1)) {
2933         __ I64x2ShrS(dst, src, i.InputInt6(1), kScratchDoubleReg);
2934       } else {
2935         __ I64x2ShrS(dst, src, i.InputRegister(1), kScratchDoubleReg,
2936                      i.TempSimd128Register(0), kScratchRegister);
2937       }
2938       break;
2939     }
2940     case kX64I64x2Add: {
2941       ASSEMBLE_SIMD_BINOP(paddq);
2942       break;
2943     }
2944     case kX64I64x2Sub: {
2945       ASSEMBLE_SIMD_BINOP(psubq);
2946       break;
2947     }
2948     case kX64I64x2Mul: {
2949       __ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2950                   i.InputSimd128Register(1), i.TempSimd128Register(0),
2951                   kScratchDoubleReg);
2952       break;
2953     }
2954     case kX64I64x2Eq: {
2955       CpuFeatureScope sse_scope(tasm(), SSE4_1);
2956       ASSEMBLE_SIMD_BINOP(pcmpeqq);
2957       break;
2958     }
2959     case kX64I64x2Ne: {
2960       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2961       __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2962       __ Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
2963       __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2964       break;
2965     }
2966     case kX64I64x2GtS: {
2967       __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2968                   i.InputSimd128Register(1), kScratchDoubleReg);
2969       break;
2970     }
2971     case kX64I64x2GeS: {
2972       __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2973                   i.InputSimd128Register(1), kScratchDoubleReg);
2974       break;
2975     }
2976     case kX64I64x2ShrU: {
2977       // Take shift value modulo 2^6.
2978       ASSEMBLE_SIMD_SHIFT(psrlq, 6);
2979       break;
2980     }
2981     case kX64I64x2ExtMulLowI32x4S: {
2982       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2983                      i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
2984                      /*is_signed=*/true);
2985       break;
2986     }
2987     case kX64I64x2ExtMulHighI32x4S: {
2988       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2989                      i.InputSimd128Register(1), kScratchDoubleReg,
2990                      /*low=*/false,
2991                      /*is_signed=*/true);
2992       break;
2993     }
2994     case kX64I64x2ExtMulLowI32x4U: {
2995       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2996                      i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
2997                      /*is_signed=*/false);
2998       break;
2999     }
3000     case kX64I64x2ExtMulHighI32x4U: {
3001       __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3002                      i.InputSimd128Register(1), kScratchDoubleReg,
3003                      /*low=*/false,
3004                      /*is_signed=*/false);
3005       break;
3006     }
3007     case kX64I64x2SConvertI32x4Low: {
3008       __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3009       break;
3010     }
3011     case kX64I64x2SConvertI32x4High: {
3012       __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
3013                                 i.InputSimd128Register(0));
3014       break;
3015     }
3016     case kX64I64x2UConvertI32x4Low: {
3017       __ Pmovzxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3018       break;
3019     }
3020     case kX64I64x2UConvertI32x4High: {
3021       __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
3022                                 i.InputSimd128Register(0), kScratchDoubleReg);
3023       break;
3024     }
3025     case kX64I32x4Splat: {
3026       XMMRegister dst = i.OutputSimd128Register();
3027       if (HasRegisterInput(instr, 0)) {
3028         __ Movd(dst, i.InputRegister(0));
3029       } else {
3030         // TODO(v8:9198): Pshufd can load from aligned memory once supported.
3031         __ Movd(dst, i.InputOperand(0));
3032       }
3033       __ Pshufd(dst, dst, uint8_t{0x0});
3034       break;
3035     }
3036     case kX64I32x4ExtractLane: {
3037       __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
3038       break;
3039     }
3040     case kX64I32x4SConvertF32x4: {
3041       __ I32x4SConvertF32x4(i.OutputSimd128Register(),
3042                             i.InputSimd128Register(0), kScratchDoubleReg,
3043                             kScratchRegister);
3044       break;
3045     }
3046     case kX64I32x4SConvertI16x8Low: {
3047       __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3048       break;
3049     }
3050     case kX64I32x4SConvertI16x8High: {
3051       __ I32x4SConvertI16x8High(i.OutputSimd128Register(),
3052                                 i.InputSimd128Register(0));
3053       break;
3054     }
3055     case kX64I32x4Neg: {
3056       XMMRegister dst = i.OutputSimd128Register();
3057       XMMRegister src = i.InputSimd128Register(0);
3058       if (dst == src) {
3059         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3060         __ Psignd(dst, kScratchDoubleReg);
3061       } else {
3062         __ Pxor(dst, dst);
3063         __ Psubd(dst, src);
3064       }
3065       break;
3066     }
3067     case kX64I32x4Shl: {
3068       // Take shift value modulo 2^5.
3069       ASSEMBLE_SIMD_SHIFT(pslld, 5);
3070       break;
3071     }
3072     case kX64I32x4ShrS: {
3073       // Take shift value modulo 2^5.
3074       ASSEMBLE_SIMD_SHIFT(psrad, 5);
3075       break;
3076     }
3077     case kX64I32x4Add: {
3078       ASSEMBLE_SIMD_BINOP(paddd);
3079       break;
3080     }
3081     case kX64I32x4Sub: {
3082       ASSEMBLE_SIMD_BINOP(psubd);
3083       break;
3084     }
3085     case kX64I32x4Mul: {
3086       ASSEMBLE_SIMD_BINOP(pmulld);
3087       break;
3088     }
3089     case kX64I32x4MinS: {
3090       ASSEMBLE_SIMD_BINOP(pminsd);
3091       break;
3092     }
3093     case kX64I32x4MaxS: {
3094       ASSEMBLE_SIMD_BINOP(pmaxsd);
3095       break;
3096     }
3097     case kX64I32x4Eq: {
3098       ASSEMBLE_SIMD_BINOP(pcmpeqd);
3099       break;
3100     }
3101     case kX64I32x4Ne: {
3102       __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
3103       __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3104       __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3105       break;
3106     }
3107     case kX64I32x4GtS: {
3108       ASSEMBLE_SIMD_BINOP(pcmpgtd);
3109       break;
3110     }
3111     case kX64I32x4GeS: {
3112       XMMRegister dst = i.OutputSimd128Register();
3113       XMMRegister src = i.InputSimd128Register(1);
3114       __ Pminsd(dst, src);
3115       __ Pcmpeqd(dst, src);
3116       break;
3117     }
3118     case kX64I32x4UConvertF32x4: {
3119       DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3120       XMMRegister dst = i.OutputSimd128Register();
3121       XMMRegister tmp = i.TempSimd128Register(0);
3122       XMMRegister tmp2 = i.TempSimd128Register(1);
3123       // NAN->0, negative->0
3124       __ Pxor(tmp2, tmp2);
3125       __ Maxps(dst, tmp2);
3126       // scratch: float representation of max_signed
3127       __ Pcmpeqd(tmp2, tmp2);
3128       __ Psrld(tmp2, uint8_t{1});  // 0x7fffffff
3129       __ Cvtdq2ps(tmp2, tmp2);     // 0x4f000000
3130       // tmp: convert (src-max_signed).
3131       // Positive overflow lanes -> 0x7FFFFFFF
3132       // Negative lanes -> 0
3133       __ Movaps(tmp, dst);
3134       __ Subps(tmp, tmp2);
3135       __ Cmpleps(tmp2, tmp);
3136       __ Cvttps2dq(tmp, tmp);
3137       __ Pxor(tmp, tmp2);
3138       __ Pxor(tmp2, tmp2);
3139       __ Pmaxsd(tmp, tmp2);
3140       // convert. Overflow lanes above max_signed will be 0x80000000
3141       __ Cvttps2dq(dst, dst);
3142       // Add (src-max_signed) for overflow lanes.
3143       __ Paddd(dst, tmp);
3144       break;
3145     }
3146     case kX64I32x4UConvertI16x8Low: {
3147       __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3148       break;
3149     }
3150     case kX64I32x4UConvertI16x8High: {
3151       __ I32x4UConvertI16x8High(i.OutputSimd128Register(),
3152                                 i.InputSimd128Register(0), kScratchDoubleReg);
3153       break;
3154     }
3155     case kX64I32x4ShrU: {
3156       // Take shift value modulo 2^5.
3157       ASSEMBLE_SIMD_SHIFT(psrld, 5);
3158       break;
3159     }
3160     case kX64I32x4MinU: {
3161       ASSEMBLE_SIMD_BINOP(pminud);
3162       break;
3163     }
3164     case kX64I32x4MaxU: {
3165       ASSEMBLE_SIMD_BINOP(pmaxud);
3166       break;
3167     }
3168     case kX64I32x4GtU: {
3169       XMMRegister dst = i.OutputSimd128Register();
3170       XMMRegister src = i.InputSimd128Register(1);
3171       __ Pmaxud(dst, src);
3172       __ Pcmpeqd(dst, src);
3173       __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3174       __ Pxor(dst, kScratchDoubleReg);
3175       break;
3176     }
3177     case kX64I32x4GeU: {
3178       XMMRegister dst = i.OutputSimd128Register();
3179       XMMRegister src = i.InputSimd128Register(1);
3180       __ Pminud(dst, src);
3181       __ Pcmpeqd(dst, src);
3182       break;
3183     }
3184     case kX64I32x4Abs: {
3185       __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3186       break;
3187     }
3188     case kX64I32x4BitMask: {
3189       __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
3190       break;
3191     }
3192     case kX64I32x4DotI16x8S: {
3193       ASSEMBLE_SIMD_BINOP(pmaddwd);
3194       break;
3195     }
3196     case kX64I32x4ExtAddPairwiseI16x8S: {
3197       __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
3198                                    i.InputSimd128Register(0), kScratchRegister);
3199       break;
3200     }
3201     case kX64I32x4ExtAddPairwiseI16x8U: {
3202       __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
3203                                    i.InputSimd128Register(0),
3204                                    kScratchDoubleReg);
3205       break;
3206     }
3207     case kX64S128Const: {
3208       // Emit code for generic constants as all zeros, or ones cases will be
3209       // handled separately by the selector.
3210       XMMRegister dst = i.OutputSimd128Register();
3211       uint32_t imm[4] = {};
3212       for (int j = 0; j < 4; j++) {
3213         imm[j] = i.InputUint32(j);
3214       }
3215       SetupSimdImmediateInRegister(tasm(), imm, dst);
3216       break;
3217     }
3218     case kX64S128Zero: {
3219       XMMRegister dst = i.OutputSimd128Register();
3220       __ Pxor(dst, dst);
3221       break;
3222     }
3223     case kX64S128AllOnes: {
3224       XMMRegister dst = i.OutputSimd128Register();
3225       __ Pcmpeqd(dst, dst);
3226       break;
3227     }
3228     case kX64I16x8Splat: {
3229       XMMRegister dst = i.OutputSimd128Register();
3230       if (HasRegisterInput(instr, 0)) {
3231         __ I16x8Splat(dst, i.InputRegister(0));
3232       } else {
3233         __ I16x8Splat(dst, i.InputOperand(0));
3234       }
3235       break;
3236     }
3237     case kX64I16x8ExtractLaneS: {
3238       Register dst = i.OutputRegister();
3239       __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
3240       __ movsxwl(dst, dst);
3241       break;
3242     }
3243     case kX64I16x8SConvertI8x16Low: {
3244       __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3245       break;
3246     }
3247     case kX64I16x8SConvertI8x16High: {
3248       __ I16x8SConvertI8x16High(i.OutputSimd128Register(),
3249                                 i.InputSimd128Register(0));
3250       break;
3251     }
3252     case kX64I16x8Neg: {
3253       XMMRegister dst = i.OutputSimd128Register();
3254       XMMRegister src = i.InputSimd128Register(0);
3255       if (dst == src) {
3256         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3257         __ Psignw(dst, kScratchDoubleReg);
3258       } else {
3259         __ Pxor(dst, dst);
3260         __ Psubw(dst, src);
3261       }
3262       break;
3263     }
3264     case kX64I16x8Shl: {
3265       // Take shift value modulo 2^4.
3266       ASSEMBLE_SIMD_SHIFT(psllw, 4);
3267       break;
3268     }
3269     case kX64I16x8ShrS: {
3270       // Take shift value modulo 2^4.
3271       ASSEMBLE_SIMD_SHIFT(psraw, 4);
3272       break;
3273     }
3274     case kX64I16x8SConvertI32x4: {
3275       ASSEMBLE_SIMD_BINOP(packssdw);
3276       break;
3277     }
3278     case kX64I16x8Add: {
3279       ASSEMBLE_SIMD_BINOP(paddw);
3280       break;
3281     }
3282     case kX64I16x8AddSatS: {
3283       ASSEMBLE_SIMD_BINOP(paddsw);
3284       break;
3285     }
3286     case kX64I16x8Sub: {
3287       ASSEMBLE_SIMD_BINOP(psubw);
3288       break;
3289     }
3290     case kX64I16x8SubSatS: {
3291       ASSEMBLE_SIMD_BINOP(psubsw);
3292       break;
3293     }
3294     case kX64I16x8Mul: {
3295       ASSEMBLE_SIMD_BINOP(pmullw);
3296       break;
3297     }
3298     case kX64I16x8MinS: {
3299       ASSEMBLE_SIMD_BINOP(pminsw);
3300       break;
3301     }
3302     case kX64I16x8MaxS: {
3303       ASSEMBLE_SIMD_BINOP(pmaxsw);
3304       break;
3305     }
3306     case kX64I16x8Eq: {
3307       ASSEMBLE_SIMD_BINOP(pcmpeqw);
3308       break;
3309     }
3310     case kX64I16x8Ne: {
3311       XMMRegister dst = i.OutputSimd128Register();
3312       __ Pcmpeqw(dst, i.InputSimd128Register(1));
3313       __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3314       __ Pxor(dst, kScratchDoubleReg);
3315       break;
3316     }
3317     case kX64I16x8GtS: {
3318       ASSEMBLE_SIMD_BINOP(pcmpgtw);
3319       break;
3320     }
3321     case kX64I16x8GeS: {
3322       XMMRegister dst = i.OutputSimd128Register();
3323       XMMRegister src = i.InputSimd128Register(1);
3324       __ Pminsw(dst, src);
3325       __ Pcmpeqw(dst, src);
3326       break;
3327     }
3328     case kX64I16x8UConvertI8x16Low: {
3329       __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3330       break;
3331     }
3332     case kX64I16x8UConvertI8x16High: {
3333       __ I16x8UConvertI8x16High(i.OutputSimd128Register(),
3334                                 i.InputSimd128Register(0), kScratchDoubleReg);
3335       break;
3336     }
3337     case kX64I16x8ShrU: {
3338       // Take shift value modulo 2^4.
3339       ASSEMBLE_SIMD_SHIFT(psrlw, 4);
3340       break;
3341     }
3342     case kX64I16x8UConvertI32x4: {
3343       ASSEMBLE_SIMD_BINOP(packusdw);
3344       break;
3345     }
3346     case kX64I16x8AddSatU: {
3347       ASSEMBLE_SIMD_BINOP(paddusw);
3348       break;
3349     }
3350     case kX64I16x8SubSatU: {
3351       ASSEMBLE_SIMD_BINOP(psubusw);
3352       break;
3353     }
3354     case kX64I16x8MinU: {
3355       ASSEMBLE_SIMD_BINOP(pminuw);
3356       break;
3357     }
3358     case kX64I16x8MaxU: {
3359       ASSEMBLE_SIMD_BINOP(pmaxuw);
3360       break;
3361     }
3362     case kX64I16x8GtU: {
3363       XMMRegister dst = i.OutputSimd128Register();
3364       XMMRegister src = i.InputSimd128Register(1);
3365       __ Pmaxuw(dst, src);
3366       __ Pcmpeqw(dst, src);
3367       __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3368       __ Pxor(dst, kScratchDoubleReg);
3369       break;
3370     }
3371     case kX64I16x8GeU: {
3372       XMMRegister dst = i.OutputSimd128Register();
3373       XMMRegister src = i.InputSimd128Register(1);
3374       __ Pminuw(dst, src);
3375       __ Pcmpeqw(dst, src);
3376       break;
3377     }
3378     case kX64I16x8RoundingAverageU: {
3379       ASSEMBLE_SIMD_BINOP(pavgw);
3380       break;
3381     }
3382     case kX64I16x8Abs: {
3383       __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3384       break;
3385     }
3386     case kX64I16x8BitMask: {
3387       Register dst = i.OutputRegister();
3388       __ Packsswb(kScratchDoubleReg, i.InputSimd128Register(0));
3389       __ Pmovmskb(dst, kScratchDoubleReg);
3390       __ shrq(dst, Immediate(8));
3391       break;
3392     }
3393     case kX64I16x8ExtMulLowI8x16S: {
3394       __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
3395                         i.InputSimd128Register(1), kScratchDoubleReg,
3396                         /*is_signed=*/true);
3397       break;
3398     }
3399     case kX64I16x8ExtMulHighI8x16S: {
3400       __ I16x8ExtMulHighS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3401                           i.InputSimd128Register(1), kScratchDoubleReg);
3402       break;
3403     }
3404     case kX64I16x8ExtMulLowI8x16U: {
3405       __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
3406                         i.InputSimd128Register(1), kScratchDoubleReg,
3407                         /*is_signed=*/false);
3408       break;
3409     }
3410     case kX64I16x8ExtMulHighI8x16U: {
3411       __ I16x8ExtMulHighU(i.OutputSimd128Register(), i.InputSimd128Register(0),
3412                           i.InputSimd128Register(1), kScratchDoubleReg);
3413       break;
3414     }
3415     case kX64I16x8ExtAddPairwiseI8x16S: {
3416       __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
3417                                    i.InputSimd128Register(0), kScratchDoubleReg,
3418                                    kScratchRegister);
3419       break;
3420     }
3421     case kX64I16x8ExtAddPairwiseI8x16U: {
3422       __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
3423                                    i.InputSimd128Register(0), kScratchRegister);
3424       break;
3425     }
3426     case kX64I16x8Q15MulRSatS: {
3427       __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3428                           i.InputSimd128Register(1), kScratchDoubleReg);
3429       break;
3430     }
3431     case kX64I8x16Splat: {
3432       XMMRegister dst = i.OutputSimd128Register();
3433       if (HasRegisterInput(instr, 0)) {
3434         __ I8x16Splat(dst, i.InputRegister(0), kScratchDoubleReg);
3435       } else {
3436         __ I8x16Splat(dst, i.InputOperand(0), kScratchDoubleReg);
3437       }
3438       break;
3439     }
3440     case kX64Pextrb: {
3441       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3442       size_t index = 0;
3443       if (HasAddressingMode(instr)) {
3444         Operand operand = i.MemoryOperand(&index);
3445         __ Pextrb(operand, i.InputSimd128Register(index),
3446                   i.InputUint8(index + 1));
3447       } else {
3448         __ Pextrb(i.OutputRegister(), i.InputSimd128Register(0),
3449                   i.InputUint8(1));
3450       }
3451       break;
3452     }
3453     case kX64Pextrw: {
3454       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3455       size_t index = 0;
3456       if (HasAddressingMode(instr)) {
3457         Operand operand = i.MemoryOperand(&index);
3458         __ Pextrw(operand, i.InputSimd128Register(index),
3459                   i.InputUint8(index + 1));
3460       } else {
3461         __ Pextrw(i.OutputRegister(), i.InputSimd128Register(0),
3462                   i.InputUint8(1));
3463       }
3464       break;
3465     }
3466     case kX64I8x16ExtractLaneS: {
3467       Register dst = i.OutputRegister();
3468       __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
3469       __ movsxbl(dst, dst);
3470       break;
3471     }
3472     case kX64Pinsrb: {
3473       ASSEMBLE_PINSR(Pinsrb);
3474       break;
3475     }
3476     case kX64Pinsrw: {
3477       ASSEMBLE_PINSR(Pinsrw);
3478       break;
3479     }
3480     case kX64Pinsrd: {
3481       ASSEMBLE_PINSR(Pinsrd);
3482       break;
3483     }
3484     case kX64Pinsrq: {
3485       ASSEMBLE_PINSR(Pinsrq);
3486       break;
3487     }
3488     case kX64I8x16SConvertI16x8: {
3489       ASSEMBLE_SIMD_BINOP(packsswb);
3490       break;
3491     }
3492     case kX64I8x16Neg: {
3493       XMMRegister dst = i.OutputSimd128Register();
3494       XMMRegister src = i.InputSimd128Register(0);
3495       if (dst == src) {
3496         __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3497         __ Psignb(dst, kScratchDoubleReg);
3498       } else {
3499         __ Pxor(dst, dst);
3500         __ Psubb(dst, src);
3501       }
3502       break;
3503     }
3504     case kX64I8x16Shl: {
3505       XMMRegister dst = i.OutputSimd128Register();
3506       XMMRegister src = i.InputSimd128Register(0);
3507       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3508       if (HasImmediateInput(instr, 1)) {
3509         __ I8x16Shl(dst, src, i.InputInt3(1), kScratchRegister,
3510                     kScratchDoubleReg);
3511       } else {
3512         __ I8x16Shl(dst, src, i.InputRegister(1), kScratchRegister,
3513                     kScratchDoubleReg, i.TempSimd128Register(0));
3514       }
3515       break;
3516     }
3517     case kX64I8x16ShrS: {
3518       XMMRegister dst = i.OutputSimd128Register();
3519       XMMRegister src = i.InputSimd128Register(0);
3520       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3521       if (HasImmediateInput(instr, 1)) {
3522         __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
3523       } else {
3524         __ I8x16ShrS(dst, src, i.InputRegister(1), kScratchRegister,
3525                      kScratchDoubleReg, i.TempSimd128Register(0));
3526       }
3527       break;
3528     }
3529     case kX64I8x16Add: {
3530       ASSEMBLE_SIMD_BINOP(paddb);
3531       break;
3532     }
3533     case kX64I8x16AddSatS: {
3534       ASSEMBLE_SIMD_BINOP(paddsb);
3535       break;
3536     }
3537     case kX64I8x16Sub: {
3538       ASSEMBLE_SIMD_BINOP(psubb);
3539       break;
3540     }
3541     case kX64I8x16SubSatS: {
3542       ASSEMBLE_SIMD_BINOP(psubsb);
3543       break;
3544     }
3545     case kX64I8x16MinS: {
3546       ASSEMBLE_SIMD_BINOP(pminsb);
3547       break;
3548     }
3549     case kX64I8x16MaxS: {
3550       ASSEMBLE_SIMD_BINOP(pmaxsb);
3551       break;
3552     }
3553     case kX64I8x16Eq: {
3554       ASSEMBLE_SIMD_BINOP(pcmpeqb);
3555       break;
3556     }
3557     case kX64I8x16Ne: {
3558       XMMRegister dst = i.OutputSimd128Register();
3559       __ Pcmpeqb(dst, i.InputSimd128Register(1));
3560       __ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3561       __ Pxor(dst, kScratchDoubleReg);
3562       break;
3563     }
3564     case kX64I8x16GtS: {
3565       ASSEMBLE_SIMD_BINOP(pcmpgtb);
3566       break;
3567     }
3568     case kX64I8x16GeS: {
3569       XMMRegister dst = i.OutputSimd128Register();
3570       XMMRegister src = i.InputSimd128Register(1);
3571       __ Pminsb(dst, src);
3572       __ Pcmpeqb(dst, src);
3573       break;
3574     }
3575     case kX64I8x16UConvertI16x8: {
3576       ASSEMBLE_SIMD_BINOP(packuswb);
3577       break;
3578     }
3579     case kX64I8x16ShrU: {
3580       XMMRegister dst = i.OutputSimd128Register();
3581       XMMRegister src = i.InputSimd128Register(0);
3582       DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3583       if (HasImmediateInput(instr, 1)) {
3584         __ I8x16ShrU(dst, src, i.InputInt3(1), kScratchRegister,
3585                      kScratchDoubleReg);
3586       } else {
3587         __ I8x16ShrU(dst, src, i.InputRegister(1), kScratchRegister,
3588                      kScratchDoubleReg, i.TempSimd128Register(0));
3589       }
3590       break;
3591     }
3592     case kX64I8x16AddSatU: {
3593       ASSEMBLE_SIMD_BINOP(paddusb);
3594       break;
3595     }
3596     case kX64I8x16SubSatU: {
3597       ASSEMBLE_SIMD_BINOP(psubusb);
3598       break;
3599     }
3600     case kX64I8x16MinU: {
3601       ASSEMBLE_SIMD_BINOP(pminub);
3602       break;
3603     }
3604     case kX64I8x16MaxU: {
3605       ASSEMBLE_SIMD_BINOP(pmaxub);
3606       break;
3607     }
3608     case kX64I8x16GtU: {
3609       XMMRegister dst = i.OutputSimd128Register();
3610       XMMRegister src = i.InputSimd128Register(1);
3611       __ Pmaxub(dst, src);
3612       __ Pcmpeqb(dst, src);
3613       __ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3614       __ Pxor(dst, kScratchDoubleReg);
3615       break;
3616     }
3617     case kX64I8x16GeU: {
3618       XMMRegister dst = i.OutputSimd128Register();
3619       XMMRegister src = i.InputSimd128Register(1);
3620       __ Pminub(dst, src);
3621       __ Pcmpeqb(dst, src);
3622       break;
3623     }
3624     case kX64I8x16RoundingAverageU: {
3625       ASSEMBLE_SIMD_BINOP(pavgb);
3626       break;
3627     }
3628     case kX64I8x16Abs: {
3629       __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3630       break;
3631     }
3632     case kX64I8x16BitMask: {
3633       __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
3634       break;
3635     }
3636     case kX64I32x4ExtMulLowI16x8S: {
3637       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3638                      i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3639                      /*is_signed=*/true);
3640       break;
3641     }
3642     case kX64I32x4ExtMulHighI16x8S: {
3643       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3644                      i.InputSimd128Register(1), kScratchDoubleReg,
3645                      /*low=*/false,
3646                      /*is_signed=*/true);
3647       break;
3648     }
3649     case kX64I32x4ExtMulLowI16x8U: {
3650       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3651                      i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3652                      /*is_signed=*/false);
3653       break;
3654     }
3655     case kX64I32x4ExtMulHighI16x8U: {
3656       __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3657                      i.InputSimd128Register(1), kScratchDoubleReg,
3658                      /*low=*/false,
3659                      /*is_signed=*/false);
3660       break;
3661     }
3662     case kX64S128And: {
3663       ASSEMBLE_SIMD_BINOP(pand);
3664       break;
3665     }
3666     case kX64S128Or: {
3667       ASSEMBLE_SIMD_BINOP(por);
3668       break;
3669     }
3670     case kX64S128Xor: {
3671       ASSEMBLE_SIMD_BINOP(pxor);
3672       break;
3673     }
3674     case kX64S128Not: {
3675       __ S128Not(i.OutputSimd128Register(), i.InputSimd128Register(0),
3676                  kScratchDoubleReg);
3677       break;
3678     }
3679     case kX64S128Select: {
3680       __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
3681                     i.InputSimd128Register(1), i.InputSimd128Register(2),
3682                     kScratchDoubleReg);
3683       break;
3684     }
3685     case kX64S128AndNot: {
3686       XMMRegister dst = i.OutputSimd128Register();
3687       DCHECK_EQ(dst, i.InputSimd128Register(0));
3688       // The inputs have been inverted by instruction selector, so we can call
3689       // andnps here without any modifications.
3690       __ Andnps(dst, i.InputSimd128Register(1));
3691       break;
3692     }
3693     case kX64I8x16Swizzle: {
3694       __ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
3695                       i.InputSimd128Register(1), kScratchDoubleReg,
3696                       kScratchRegister, MiscField::decode(instr->opcode()));
3697       break;
3698     }
3699     case kX64I8x16Shuffle: {
3700       XMMRegister dst = i.OutputSimd128Register();
3701       XMMRegister tmp_simd = i.TempSimd128Register(0);
3702       DCHECK_NE(tmp_simd, i.InputSimd128Register(0));
3703       if (instr->InputCount() == 5) {  // only one input operand
3704         uint32_t mask[4] = {};
3705         DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3706         for (int j = 4; j > 0; j--) {
3707           mask[j - 1] = i.InputUint32(j);
3708         }
3709 
3710         SetupSimdImmediateInRegister(tasm(), mask, tmp_simd);
3711         __ Pshufb(dst, tmp_simd);
3712       } else {  // two input operands
3713         DCHECK_NE(tmp_simd, i.InputSimd128Register(1));
3714         DCHECK_EQ(6, instr->InputCount());
3715         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 0);
3716         uint32_t mask1[4] = {};
3717         for (int j = 5; j > 1; j--) {
3718           uint32_t lanes = i.InputUint32(j);
3719           for (int k = 0; k < 32; k += 8) {
3720             uint8_t lane = lanes >> k;
3721             mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3722           }
3723         }
3724         SetupSimdImmediateInRegister(tasm(), mask1, tmp_simd);
3725         __ Pshufb(kScratchDoubleReg, tmp_simd);
3726         uint32_t mask2[4] = {};
3727         if (instr->InputAt(1)->IsSimd128Register()) {
3728           XMMRegister src1 = i.InputSimd128Register(1);
3729           if (src1 != dst) __ Movdqa(dst, src1);
3730         } else {
3731           __ Movdqu(dst, i.InputOperand(1));
3732         }
3733         for (int j = 5; j > 1; j--) {
3734           uint32_t lanes = i.InputUint32(j);
3735           for (int k = 0; k < 32; k += 8) {
3736             uint8_t lane = lanes >> k;
3737             mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3738           }
3739         }
3740         SetupSimdImmediateInRegister(tasm(), mask2, tmp_simd);
3741         __ Pshufb(dst, tmp_simd);
3742         __ Por(dst, kScratchDoubleReg);
3743       }
3744       break;
3745     }
3746     case kX64I8x16Popcnt: {
3747       __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0),
3748                      i.TempSimd128Register(0), kScratchDoubleReg,
3749                      kScratchRegister);
3750       break;
3751     }
3752     case kX64S128Load8Splat: {
3753       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3754       __ S128Load8Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3755                         kScratchDoubleReg);
3756       break;
3757     }
3758     case kX64S128Load16Splat: {
3759       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3760       __ S128Load16Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3761                          kScratchDoubleReg);
3762       break;
3763     }
3764     case kX64S128Load32Splat: {
3765       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3766       __ S128Load32Splat(i.OutputSimd128Register(), i.MemoryOperand());
3767       break;
3768     }
3769     case kX64S128Load64Splat: {
3770       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3771       __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3772       break;
3773     }
3774     case kX64S128Load8x8S: {
3775       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3776       __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3777       break;
3778     }
3779     case kX64S128Load8x8U: {
3780       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3781       __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3782       break;
3783     }
3784     case kX64S128Load16x4S: {
3785       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3786       __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3787       break;
3788     }
3789     case kX64S128Load16x4U: {
3790       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3791       __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3792       break;
3793     }
3794     case kX64S128Load32x2S: {
3795       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3796       __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3797       break;
3798     }
3799     case kX64S128Load32x2U: {
3800       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3801       __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3802       break;
3803     }
3804     case kX64S128Store32Lane: {
3805       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3806       size_t index = 0;
3807       Operand operand = i.MemoryOperand(&index);
3808       uint8_t lane = i.InputUint8(index + 1);
3809       __ S128Store32Lane(operand, i.InputSimd128Register(index), lane);
3810       break;
3811     }
3812     case kX64S128Store64Lane: {
3813       EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3814       size_t index = 0;
3815       Operand operand = i.MemoryOperand(&index);
3816       uint8_t lane = i.InputUint8(index + 1);
3817       __ S128Store64Lane(operand, i.InputSimd128Register(index), lane);
3818       break;
3819     }
3820     case kX64Shufps: {
3821       __ Shufps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3822                 i.InputSimd128Register(1), i.InputUint8(2));
3823       break;
3824     }
3825     case kX64S32x4Rotate: {
3826       XMMRegister dst = i.OutputSimd128Register();
3827       XMMRegister src = i.InputSimd128Register(0);
3828       uint8_t mask = i.InputUint8(1);
3829       if (dst == src) {
3830         // 1-byte shorter encoding than pshufd.
3831         __ Shufps(dst, src, src, mask);
3832       } else {
3833         __ Pshufd(dst, src, mask);
3834       }
3835       break;
3836     }
3837     case kX64S32x4Swizzle: {
3838       DCHECK_EQ(2, instr->InputCount());
3839       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
3840                               i.InputUint8(1));
3841       break;
3842     }
3843     case kX64S32x4Shuffle: {
3844       DCHECK_EQ(4, instr->InputCount());  // Swizzles should be handled above.
3845       uint8_t shuffle = i.InputUint8(2);
3846       DCHECK_NE(0xe4, shuffle);  // A simple blend should be handled below.
3847       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
3848       ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
3849       __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3850       break;
3851     }
3852     case kX64S16x8Blend: {
3853       ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, i.InputUint8(2));
3854       break;
3855     }
3856     case kX64S16x8HalfShuffle1: {
3857       XMMRegister dst = i.OutputSimd128Register();
3858       uint8_t mask_lo = i.InputUint8(1);
3859       uint8_t mask_hi = i.InputUint8(2);
3860       if (mask_lo != 0xe4) {
3861         ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, mask_lo);
3862         if (mask_hi != 0xe4) __ Pshufhw(dst, dst, mask_hi);
3863       } else {
3864         DCHECK_NE(mask_hi, 0xe4);
3865         ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, mask_hi);
3866       }
3867       break;
3868     }
3869     case kX64S16x8HalfShuffle2: {
3870       XMMRegister dst = i.OutputSimd128Register();
3871       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
3872       __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3873       ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
3874       __ Pshufhw(dst, dst, i.InputUint8(3));
3875       __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3876       break;
3877     }
3878     case kX64S8x16Alignr: {
3879       ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, i.InputUint8(2));
3880       break;
3881     }
3882     case kX64S16x8Dup: {
3883       XMMRegister dst = i.OutputSimd128Register();
3884       uint8_t lane = i.InputInt8(1) & 0x7;
3885       uint8_t lane4 = lane & 0x3;
3886       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3887       if (lane < 4) {
3888         ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
3889         __ Punpcklqdq(dst, dst);
3890       } else {
3891         ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
3892         __ Punpckhqdq(dst, dst);
3893       }
3894       break;
3895     }
3896     case kX64S8x16Dup: {
3897       XMMRegister dst = i.OutputSimd128Register();
3898       uint8_t lane = i.InputInt8(1) & 0xf;
3899       DCHECK_EQ(dst, i.InputSimd128Register(0));
3900       if (lane < 8) {
3901         __ Punpcklbw(dst, dst);
3902       } else {
3903         __ Punpckhbw(dst, dst);
3904       }
3905       lane &= 0x7;
3906       uint8_t lane4 = lane & 0x3;
3907       uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3908       if (lane < 4) {
3909         __ Pshuflw(dst, dst, half_dup);
3910         __ Punpcklqdq(dst, dst);
3911       } else {
3912         __ Pshufhw(dst, dst, half_dup);
3913         __ Punpckhqdq(dst, dst);
3914       }
3915       break;
3916     }
3917     case kX64S64x2UnpackHigh:
3918       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3919       break;
3920     case kX64S32x4UnpackHigh:
3921       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3922       break;
3923     case kX64S16x8UnpackHigh:
3924       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3925       break;
3926     case kX64S8x16UnpackHigh:
3927       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3928       break;
3929     case kX64S64x2UnpackLow:
3930       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3931       break;
3932     case kX64S32x4UnpackLow:
3933       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3934       break;
3935     case kX64S16x8UnpackLow:
3936       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3937       break;
3938     case kX64S8x16UnpackLow:
3939       ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3940       break;
3941     case kX64S16x8UnzipHigh: {
3942       XMMRegister dst = i.OutputSimd128Register();
3943       XMMRegister src2 = dst;
3944       DCHECK_EQ(dst, i.InputSimd128Register(0));
3945       if (instr->InputCount() == 2) {
3946         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
3947         __ Psrld(kScratchDoubleReg, byte{16});
3948         src2 = kScratchDoubleReg;
3949       }
3950       __ Psrld(dst, byte{16});
3951       __ Packusdw(dst, src2);
3952       break;
3953     }
3954     case kX64S16x8UnzipLow: {
3955       XMMRegister dst = i.OutputSimd128Register();
3956       XMMRegister src2 = dst;
3957       DCHECK_EQ(dst, i.InputSimd128Register(0));
3958       __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3959       if (instr->InputCount() == 2) {
3960         ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
3961         src2 = kScratchDoubleReg;
3962       }
3963       __ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
3964       __ Packusdw(dst, src2);
3965       break;
3966     }
3967     case kX64S8x16UnzipHigh: {
3968       XMMRegister dst = i.OutputSimd128Register();
3969       XMMRegister src2 = dst;
3970       DCHECK_EQ(dst, i.InputSimd128Register(0));
3971       if (instr->InputCount() == 2) {
3972         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
3973         __ Psrlw(kScratchDoubleReg, byte{8});
3974         src2 = kScratchDoubleReg;
3975       }
3976       __ Psrlw(dst, byte{8});
3977       __ Packuswb(dst, src2);
3978       break;
3979     }
3980     case kX64S8x16UnzipLow: {
3981       XMMRegister dst = i.OutputSimd128Register();
3982       XMMRegister src2 = dst;
3983       DCHECK_EQ(dst, i.InputSimd128Register(0));
3984       if (instr->InputCount() == 2) {
3985         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
3986         __ Psllw(kScratchDoubleReg, byte{8});
3987         __ Psrlw(kScratchDoubleReg, byte{8});
3988         src2 = kScratchDoubleReg;
3989       }
3990       __ Psllw(dst, byte{8});
3991       __ Psrlw(dst, byte{8});
3992       __ Packuswb(dst, src2);
3993       break;
3994     }
3995     case kX64S8x16TransposeLow: {
3996       XMMRegister dst = i.OutputSimd128Register();
3997       DCHECK_EQ(dst, i.InputSimd128Register(0));
3998       __ Psllw(dst, byte{8});
3999       if (instr->InputCount() == 1) {
4000         __ Movdqa(kScratchDoubleReg, dst);
4001       } else {
4002         DCHECK_EQ(2, instr->InputCount());
4003         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4004         __ Psllw(kScratchDoubleReg, byte{8});
4005       }
4006       __ Psrlw(dst, byte{8});
4007       __ Por(dst, kScratchDoubleReg);
4008       break;
4009     }
4010     case kX64S8x16TransposeHigh: {
4011       XMMRegister dst = i.OutputSimd128Register();
4012       DCHECK_EQ(dst, i.InputSimd128Register(0));
4013       __ Psrlw(dst, byte{8});
4014       if (instr->InputCount() == 1) {
4015         __ Movdqa(kScratchDoubleReg, dst);
4016       } else {
4017         DCHECK_EQ(2, instr->InputCount());
4018         ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4019         __ Psrlw(kScratchDoubleReg, byte{8});
4020       }
4021       __ Psllw(kScratchDoubleReg, byte{8});
4022       __ Por(dst, kScratchDoubleReg);
4023       break;
4024     }
4025     case kX64S8x8Reverse:
4026     case kX64S8x4Reverse:
4027     case kX64S8x2Reverse: {
4028       DCHECK_EQ(1, instr->InputCount());
4029       XMMRegister dst = i.OutputSimd128Register();
4030       DCHECK_EQ(dst, i.InputSimd128Register(0));
4031       if (arch_opcode != kX64S8x2Reverse) {
4032         // First shuffle words into position.
4033         uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
4034         __ Pshuflw(dst, dst, shuffle_mask);
4035         __ Pshufhw(dst, dst, shuffle_mask);
4036       }
4037       __ Movdqa(kScratchDoubleReg, dst);
4038       __ Psrlw(kScratchDoubleReg, byte{8});
4039       __ Psllw(dst, byte{8});
4040       __ Por(dst, kScratchDoubleReg);
4041       break;
4042     }
4043     case kX64V128AnyTrue: {
4044       Register dst = i.OutputRegister();
4045       XMMRegister src = i.InputSimd128Register(0);
4046 
4047       __ xorq(dst, dst);
4048       __ Ptest(src, src);
4049       __ setcc(not_equal, dst);
4050       break;
4051     }
4052     // Need to split up all the different lane structures because the
4053     // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
4054     // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
4055     // respectively.
4056     case kX64I64x2AllTrue: {
4057       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
4058       break;
4059     }
4060     case kX64I32x4AllTrue: {
4061       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
4062       break;
4063     }
4064     case kX64I16x8AllTrue: {
4065       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
4066       break;
4067     }
4068     case kX64I8x16AllTrue: {
4069       ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
4070       break;
4071     }
4072     case kAtomicStoreWord8: {
4073       ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord8);
4074       break;
4075     }
4076     case kAtomicStoreWord16: {
4077       ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord16);
4078       break;
4079     }
4080     case kAtomicStoreWord32: {
4081       ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord32);
4082       break;
4083     }
4084     case kX64Word64AtomicStoreWord64: {
4085       ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord64);
4086       break;
4087     }
4088     case kAtomicExchangeInt8: {
4089       DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4090       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4091       __ movsxbl(i.InputRegister(0), i.InputRegister(0));
4092       break;
4093     }
4094     case kAtomicExchangeUint8: {
4095       __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4096       switch (AtomicWidthField::decode(opcode)) {
4097         case AtomicWidth::kWord32:
4098           __ movzxbl(i.InputRegister(0), i.InputRegister(0));
4099           break;
4100         case AtomicWidth::kWord64:
4101           __ movzxbq(i.InputRegister(0), i.InputRegister(0));
4102           break;
4103       }
4104       break;
4105     }
4106     case kAtomicExchangeInt16: {
4107       DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4108       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4109       __ movsxwl(i.InputRegister(0), i.InputRegister(0));
4110       break;
4111     }
4112     case kAtomicExchangeUint16: {
4113       __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4114       switch (AtomicWidthField::decode(opcode)) {
4115         case AtomicWidth::kWord32:
4116           __ movzxwl(i.InputRegister(0), i.InputRegister(0));
4117           break;
4118         case AtomicWidth::kWord64:
4119           __ movzxwq(i.InputRegister(0), i.InputRegister(0));
4120           break;
4121       }
4122       break;
4123     }
4124     case kAtomicExchangeWord32: {
4125       __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
4126       break;
4127     }
4128     case kAtomicCompareExchangeInt8: {
4129       DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4130       __ lock();
4131       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4132       __ movsxbl(rax, rax);
4133       break;
4134     }
4135     case kAtomicCompareExchangeUint8: {
4136       __ lock();
4137       __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4138       switch (AtomicWidthField::decode(opcode)) {
4139         case AtomicWidth::kWord32:
4140           __ movzxbl(rax, rax);
4141           break;
4142         case AtomicWidth::kWord64:
4143           __ movzxbq(rax, rax);
4144           break;
4145       }
4146       break;
4147     }
4148     case kAtomicCompareExchangeInt16: {
4149       DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4150       __ lock();
4151       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4152       __ movsxwl(rax, rax);
4153       break;
4154     }
4155     case kAtomicCompareExchangeUint16: {
4156       __ lock();
4157       __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4158       switch (AtomicWidthField::decode(opcode)) {
4159         case AtomicWidth::kWord32:
4160           __ movzxwl(rax, rax);
4161           break;
4162         case AtomicWidth::kWord64:
4163           __ movzxwq(rax, rax);
4164           break;
4165       }
4166       break;
4167     }
4168     case kAtomicCompareExchangeWord32: {
4169       __ lock();
4170       __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4171       if (AtomicWidthField::decode(opcode) == AtomicWidth::kWord64) {
4172         // Zero-extend the 32 bit value to 64 bit.
4173         __ movl(rax, rax);
4174       }
4175       break;
4176     }
4177     case kX64Word64AtomicExchangeUint64: {
4178       __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
4179       break;
4180     }
4181     case kX64Word64AtomicCompareExchangeUint64: {
4182       __ lock();
4183       __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
4184       break;
4185     }
4186 #define ATOMIC_BINOP_CASE(op, inst32, inst64)                          \
4187   case kAtomic##op##Int8:                                              \
4188     DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
4189     ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb);                     \
4190     __ movsxbl(rax, rax);                                              \
4191     break;                                                             \
4192   case kAtomic##op##Uint8:                                             \
4193     switch (AtomicWidthField::decode(opcode)) {                        \
4194       case AtomicWidth::kWord32:                                       \
4195         ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb);                 \
4196         __ movzxbl(rax, rax);                                          \
4197         break;                                                         \
4198       case AtomicWidth::kWord64:                                       \
4199         ASSEMBLE_ATOMIC64_BINOP(inst64, movb, cmpxchgb);               \
4200         __ movzxbq(rax, rax);                                          \
4201         break;                                                         \
4202     }                                                                  \
4203     break;                                                             \
4204   case kAtomic##op##Int16:                                             \
4205     DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
4206     ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw);                     \
4207     __ movsxwl(rax, rax);                                              \
4208     break;                                                             \
4209   case kAtomic##op##Uint16:                                            \
4210     switch (AtomicWidthField::decode(opcode)) {                        \
4211       case AtomicWidth::kWord32:                                       \
4212         ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw);                 \
4213         __ movzxwl(rax, rax);                                          \
4214         break;                                                         \
4215       case AtomicWidth::kWord64:                                       \
4216         ASSEMBLE_ATOMIC64_BINOP(inst64, movw, cmpxchgw);               \
4217         __ movzxwq(rax, rax);                                          \
4218         break;                                                         \
4219     }                                                                  \
4220     break;                                                             \
4221   case kAtomic##op##Word32:                                            \
4222     switch (AtomicWidthField::decode(opcode)) {                        \
4223       case AtomicWidth::kWord32:                                       \
4224         ASSEMBLE_ATOMIC_BINOP(inst32, movl, cmpxchgl);                 \
4225         break;                                                         \
4226       case AtomicWidth::kWord64:                                       \
4227         ASSEMBLE_ATOMIC64_BINOP(inst64, movl, cmpxchgl);               \
4228         break;                                                         \
4229     }                                                                  \
4230     break;                                                             \
4231   case kX64Word64Atomic##op##Uint64:                                   \
4232     ASSEMBLE_ATOMIC64_BINOP(inst64, movq, cmpxchgq);                   \
4233     break;
4234       ATOMIC_BINOP_CASE(Add, addl, addq)
4235       ATOMIC_BINOP_CASE(Sub, subl, subq)
4236       ATOMIC_BINOP_CASE(And, andl, andq)
4237       ATOMIC_BINOP_CASE(Or, orl, orq)
4238       ATOMIC_BINOP_CASE(Xor, xorl, xorq)
4239 #undef ATOMIC_BINOP_CASE
4240 
4241     case kAtomicLoadInt8:
4242     case kAtomicLoadUint8:
4243     case kAtomicLoadInt16:
4244     case kAtomicLoadUint16:
4245     case kAtomicLoadWord32:
4246       UNREACHABLE();  // Won't be generated by instruction selector.
4247   }
4248   return kSuccess;
4249 }  // NOLadability/fn_size)
4250 
4251 #undef ASSEMBLE_PINSR
4252 #undef ASSEMBLE_UNOP
4253 #undef ASSEMBLE_BINOP
4254 #undef ASSEMBLE_COMPARE
4255 #undef ASSEMBLE_MULT
4256 #undef ASSEMBLE_SHIFT
4257 #undef ASSEMBLE_MOVX
4258 #undef ASSEMBLE_SSE_BINOP
4259 #undef ASSEMBLE_SSE_UNOP
4260 #undef ASSEMBLE_AVX_BINOP
4261 #undef ASSEMBLE_IEEE754_BINOP
4262 #undef ASSEMBLE_IEEE754_UNOP
4263 #undef ASSEMBLE_ATOMIC_BINOP
4264 #undef ASSEMBLE_ATOMIC64_BINOP
4265 #undef ASSEMBLE_SIMD_INSTR
4266 #undef ASSEMBLE_SIMD_IMM_INSTR
4267 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4268 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4269 #undef ASSEMBLE_SIMD_ALL_TRUE
4270 #undef ASSEMBLE_SIMD_SHIFT
4271 #undef ASSEMBLE_SEQ_CST_STORE
4272 
4273 namespace {
4274 
FlagsConditionToCondition(FlagsCondition condition)4275 Condition FlagsConditionToCondition(FlagsCondition condition) {
4276   switch (condition) {
4277     case kUnorderedEqual:
4278     case kEqual:
4279       return equal;
4280     case kUnorderedNotEqual:
4281     case kNotEqual:
4282       return not_equal;
4283     case kSignedLessThan:
4284       return less;
4285     case kSignedGreaterThanOrEqual:
4286       return greater_equal;
4287     case kSignedLessThanOrEqual:
4288       return less_equal;
4289     case kSignedGreaterThan:
4290       return greater;
4291     case kUnsignedLessThan:
4292       return below;
4293     case kUnsignedGreaterThanOrEqual:
4294       return above_equal;
4295     case kUnsignedLessThanOrEqual:
4296       return below_equal;
4297     case kUnsignedGreaterThan:
4298       return above;
4299     case kOverflow:
4300       return overflow;
4301     case kNotOverflow:
4302       return no_overflow;
4303     default:
4304       break;
4305   }
4306   UNREACHABLE();
4307 }
4308 
4309 }  // namespace
4310 
4311 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4312 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4313   Label::Distance flabel_distance =
4314       branch->fallthru ? Label::kNear : Label::kFar;
4315   Label* tlabel = branch->true_label;
4316   Label* flabel = branch->false_label;
4317   if (branch->condition == kUnorderedEqual) {
4318     __ j(parity_even, flabel, flabel_distance);
4319   } else if (branch->condition == kUnorderedNotEqual) {
4320     __ j(parity_even, tlabel);
4321   }
4322   __ j(FlagsConditionToCondition(branch->condition), tlabel);
4323 
4324   if (!branch->fallthru) __ jmp(flabel, flabel_distance);
4325 }
4326 
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4327 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4328                                             BranchInfo* branch) {
4329   Label::Distance flabel_distance =
4330       branch->fallthru ? Label::kNear : Label::kFar;
4331   Label* tlabel = branch->true_label;
4332   Label* flabel = branch->false_label;
4333   Label nodeopt;
4334   if (branch->condition == kUnorderedEqual) {
4335     __ j(parity_even, flabel, flabel_distance);
4336   } else if (branch->condition == kUnorderedNotEqual) {
4337     __ j(parity_even, tlabel);
4338   }
4339   __ j(FlagsConditionToCondition(branch->condition), tlabel);
4340 
4341   if (FLAG_deopt_every_n_times > 0) {
4342     ExternalReference counter =
4343         ExternalReference::stress_deopt_count(isolate());
4344 
4345     __ pushfq();
4346     __ pushq(rax);
4347     __ load_rax(counter);
4348     __ decl(rax);
4349     __ j(not_zero, &nodeopt, Label::kNear);
4350 
4351     __ Move(rax, FLAG_deopt_every_n_times);
4352     __ store_rax(counter);
4353     __ popq(rax);
4354     __ popfq();
4355     __ jmp(tlabel);
4356 
4357     __ bind(&nodeopt);
4358     __ store_rax(counter);
4359     __ popq(rax);
4360     __ popfq();
4361   }
4362 
4363   if (!branch->fallthru) {
4364     __ jmp(flabel, flabel_distance);
4365   }
4366 }
4367 
AssembleArchJump(RpoNumber target)4368 void CodeGenerator::AssembleArchJump(RpoNumber target) {
4369   if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
4370 }
4371 
4372 #if V8_ENABLE_WEBASSEMBLY
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4373 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4374                                      FlagsCondition condition) {
4375   auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
4376   Label* tlabel = ool->entry();
4377   Label end;
4378   if (condition == kUnorderedEqual) {
4379     __ j(parity_even, &end, Label::kNear);
4380   } else if (condition == kUnorderedNotEqual) {
4381     __ j(parity_even, tlabel);
4382   }
4383   __ j(FlagsConditionToCondition(condition), tlabel);
4384   __ bind(&end);
4385 }
4386 #endif  // V8_ENABLE_WEBASSEMBLY
4387 
4388 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4389 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4390                                         FlagsCondition condition) {
4391   X64OperandConverter i(this, instr);
4392   Label done;
4393 
4394   // Materialize a full 64-bit 1 or 0 value. The result register is always the
4395   // last output of the instruction.
4396   Label check;
4397   DCHECK_NE(0u, instr->OutputCount());
4398   Register reg = i.OutputRegister(instr->OutputCount() - 1);
4399   if (condition == kUnorderedEqual) {
4400     __ j(parity_odd, &check, Label::kNear);
4401     __ Move(reg, 0);
4402     __ jmp(&done, Label::kNear);
4403   } else if (condition == kUnorderedNotEqual) {
4404     __ j(parity_odd, &check, Label::kNear);
4405     __ Move(reg, 1);
4406     __ jmp(&done, Label::kNear);
4407   }
4408   __ bind(&check);
4409   __ setcc(FlagsConditionToCondition(condition), reg);
4410   __ movzxbl(reg, reg);
4411   __ bind(&done);
4412 }
4413 
AssembleArchBinarySearchSwitch(Instruction * instr)4414 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4415   X64OperandConverter i(this, instr);
4416   Register input = i.InputRegister(0);
4417   std::vector<std::pair<int32_t, Label*>> cases;
4418   for (size_t index = 2; index < instr->InputCount(); index += 2) {
4419     cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4420   }
4421   AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4422                                       cases.data() + cases.size());
4423 }
4424 
AssembleArchTableSwitch(Instruction * instr)4425 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4426   X64OperandConverter i(this, instr);
4427   Register input = i.InputRegister(0);
4428   int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
4429   Label** cases = zone()->NewArray<Label*>(case_count);
4430   for (int32_t index = 0; index < case_count; ++index) {
4431     cases[index] = GetLabel(i.InputRpo(index + 2));
4432   }
4433   Label* const table = AddJumpTable(cases, case_count);
4434   __ cmpl(input, Immediate(case_count));
4435   __ j(above_equal, GetLabel(i.InputRpo(1)));
4436   __ leaq(kScratchRegister, Operand(table));
4437   __ jmp(Operand(kScratchRegister, input, times_8, 0));
4438 }
4439 
AssembleArchSelect(Instruction * instr,FlagsCondition condition)4440 void CodeGenerator::AssembleArchSelect(Instruction* instr,
4441                                        FlagsCondition condition) {
4442   X64OperandConverter i(this, instr);
4443   MachineRepresentation rep =
4444       LocationOperand::cast(instr->OutputAt(0))->representation();
4445   Condition cc = FlagsConditionToCondition(condition);
4446   DCHECK_EQ(i.OutputRegister(), i.InputRegister(instr->InputCount() - 2));
4447   size_t last_input = instr->InputCount() - 1;
4448   // kUnorderedNotEqual can be implemented more efficiently than
4449   // kUnorderedEqual. As the OR of two flags, it can be done with just two
4450   // cmovs. If the condition was originally a kUnorderedEqual, expect the
4451   // instruction selector to have inverted it and swapped the input.
4452   DCHECK_NE(condition, kUnorderedEqual);
4453   if (rep == MachineRepresentation::kWord32) {
4454     if (HasRegisterInput(instr, last_input)) {
4455       __ cmovl(cc, i.OutputRegister(), i.InputRegister(last_input));
4456       if (condition == kUnorderedNotEqual) {
4457         __ cmovl(parity_even, i.OutputRegister(), i.InputRegister(last_input));
4458       }
4459     } else {
4460       __ cmovl(cc, i.OutputRegister(), i.InputOperand(last_input));
4461       if (condition == kUnorderedNotEqual) {
4462         __ cmovl(parity_even, i.OutputRegister(), i.InputOperand(last_input));
4463       }
4464     }
4465   } else {
4466     DCHECK_EQ(rep, MachineRepresentation::kWord64);
4467     if (HasRegisterInput(instr, last_input)) {
4468       __ cmovq(cc, i.OutputRegister(), i.InputRegister(last_input));
4469       if (condition == kUnorderedNotEqual) {
4470         __ cmovq(parity_even, i.OutputRegister(), i.InputRegister(last_input));
4471       }
4472     } else {
4473       __ cmovq(cc, i.OutputRegister(), i.InputOperand(last_input));
4474       if (condition == kUnorderedNotEqual) {
4475         __ cmovq(parity_even, i.OutputRegister(), i.InputOperand(last_input));
4476       }
4477     }
4478   }
4479 }
4480 
4481 namespace {
4482 
4483 static const int kQuadWordSize = 16;
4484 
4485 }  // namespace
4486 
FinishFrame(Frame * frame)4487 void CodeGenerator::FinishFrame(Frame* frame) {
4488   CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor();
4489 
4490   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4491   if (saves_fp != 0) {  // Save callee-saved XMM registers.
4492     frame->AlignSavedCalleeRegisterSlots();
4493     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4494     frame->AllocateSavedCalleeRegisterSlots(
4495         saves_fp_count * (kQuadWordSize / kSystemPointerSize));
4496   }
4497   const RegList saves = call_descriptor->CalleeSavedRegisters();
4498   if (saves != 0) {  // Save callee-saved registers.
4499     int count = 0;
4500     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4501       if (((1 << i) & saves)) {
4502         ++count;
4503       }
4504     }
4505     frame->AllocateSavedCalleeRegisterSlots(count);
4506   }
4507 }
4508 
AssembleConstructFrame()4509 void CodeGenerator::AssembleConstructFrame() {
4510   auto call_descriptor = linkage()->GetIncomingDescriptor();
4511   if (frame_access_state()->has_frame()) {
4512     int pc_base = __ pc_offset();
4513 
4514     if (call_descriptor->IsCFunctionCall()) {
4515       __ pushq(rbp);
4516       __ movq(rbp, rsp);
4517 #if V8_ENABLE_WEBASSEMBLY
4518       if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4519         __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4520         // Reserve stack space for saving the c_entry_fp later.
4521         __ AllocateStackSpace(kSystemPointerSize);
4522       }
4523 #endif  // V8_ENABLE_WEBASSEMBLY
4524     } else if (call_descriptor->IsJSFunctionCall()) {
4525       __ Prologue();
4526     } else {
4527       __ StubPrologue(info()->GetOutputStackFrameType());
4528 #if V8_ENABLE_WEBASSEMBLY
4529       if (call_descriptor->IsWasmFunctionCall()) {
4530         __ pushq(kWasmInstanceRegister);
4531       } else if (call_descriptor->IsWasmImportWrapper() ||
4532                  call_descriptor->IsWasmCapiFunction()) {
4533         // Wasm import wrappers are passed a tuple in the place of the instance.
4534         // Unpack the tuple into the instance and the target callable.
4535         // This must be done here in the codegen because it cannot be expressed
4536         // properly in the graph.
4537         __ LoadTaggedPointerField(
4538             kJSFunctionRegister,
4539             FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
4540         __ LoadTaggedPointerField(
4541             kWasmInstanceRegister,
4542             FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
4543         __ pushq(kWasmInstanceRegister);
4544         if (call_descriptor->IsWasmCapiFunction()) {
4545           // Reserve space for saving the PC later.
4546           __ AllocateStackSpace(kSystemPointerSize);
4547         }
4548       }
4549 #endif  // V8_ENABLE_WEBASSEMBLY
4550     }
4551 
4552     unwinding_info_writer_.MarkFrameConstructed(pc_base);
4553   }
4554   int required_slots =
4555       frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4556 
4557   if (info()->is_osr()) {
4558     // TurboFan OSR-compiled functions cannot be entered directly.
4559     __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4560 
4561     // Unoptimized code jumps directly to this entrypoint while the unoptimized
4562     // frame is still on the stack. Optimized code uses OSR values directly from
4563     // the unoptimized frame. Thus, all that needs to be done is to allocate the
4564     // remaining stack slots.
4565     __ RecordComment("-- OSR entrypoint --");
4566     osr_pc_offset_ = __ pc_offset();
4567     required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
4568   }
4569 
4570   const RegList saves = call_descriptor->CalleeSavedRegisters();
4571   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4572 
4573   if (required_slots > 0) {
4574     DCHECK(frame_access_state()->has_frame());
4575 #if V8_ENABLE_WEBASSEMBLY
4576     if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
4577       // For WebAssembly functions with big frames we have to do the stack
4578       // overflow check before we construct the frame. Otherwise we may not
4579       // have enough space on the stack to call the runtime for the stack
4580       // overflow.
4581       Label done;
4582 
4583       // If the frame is bigger than the stack, we throw the stack overflow
4584       // exception unconditionally. Thereby we can avoid the integer overflow
4585       // check in the condition code.
4586       if (required_slots * kSystemPointerSize < FLAG_stack_size * KB) {
4587         __ movq(kScratchRegister,
4588                 FieldOperand(kWasmInstanceRegister,
4589                              WasmInstanceObject::kRealStackLimitAddressOffset));
4590         __ movq(kScratchRegister, Operand(kScratchRegister, 0));
4591         __ addq(kScratchRegister,
4592                 Immediate(required_slots * kSystemPointerSize));
4593         __ cmpq(rsp, kScratchRegister);
4594         __ j(above_equal, &done, Label::kNear);
4595       }
4596 
4597       __ near_call(wasm::WasmCode::kWasmStackOverflow,
4598                    RelocInfo::WASM_STUB_CALL);
4599       // The call does not return, hence we can ignore any references and just
4600       // define an empty safepoint.
4601       ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4602       RecordSafepoint(reference_map);
4603       __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4604       __ bind(&done);
4605     }
4606 #endif  // V8_ENABLE_WEBASSEMBLY
4607 
4608     // Skip callee-saved and return slots, which are created below.
4609     required_slots -= base::bits::CountPopulation(saves);
4610     required_slots -= base::bits::CountPopulation(saves_fp) *
4611                       (kQuadWordSize / kSystemPointerSize);
4612     required_slots -= frame()->GetReturnSlotCount();
4613     if (required_slots > 0) {
4614       __ AllocateStackSpace(required_slots * kSystemPointerSize);
4615     }
4616   }
4617 
4618   if (saves_fp != 0) {  // Save callee-saved XMM registers.
4619     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4620     const int stack_size = saves_fp_count * kQuadWordSize;
4621     // Adjust the stack pointer.
4622     __ AllocateStackSpace(stack_size);
4623     // Store the registers on the stack.
4624     int slot_idx = 0;
4625     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4626       if (!((1 << i) & saves_fp)) continue;
4627       __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
4628                 XMMRegister::from_code(i));
4629       slot_idx++;
4630     }
4631   }
4632 
4633   if (saves != 0) {  // Save callee-saved registers.
4634     for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4635       if (!((1 << i) & saves)) continue;
4636       __ pushq(Register::from_code(i));
4637     }
4638   }
4639 
4640   // Allocate return slots (located after callee-saved).
4641   if (frame()->GetReturnSlotCount() > 0) {
4642     __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4643   }
4644 }
4645 
AssembleReturn(InstructionOperand * additional_pop_count)4646 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4647   auto call_descriptor = linkage()->GetIncomingDescriptor();
4648 
4649   // Restore registers.
4650   const RegList saves = call_descriptor->CalleeSavedRegisters();
4651   if (saves != 0) {
4652     const int returns = frame()->GetReturnSlotCount();
4653     if (returns != 0) {
4654       __ addq(rsp, Immediate(returns * kSystemPointerSize));
4655     }
4656     for (int i = 0; i < Register::kNumRegisters; i++) {
4657       if (!((1 << i) & saves)) continue;
4658       __ popq(Register::from_code(i));
4659     }
4660   }
4661   const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4662   if (saves_fp != 0) {
4663     const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4664     const int stack_size = saves_fp_count * kQuadWordSize;
4665     // Load the registers from the stack.
4666     int slot_idx = 0;
4667     for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4668       if (!((1 << i) & saves_fp)) continue;
4669       __ Movdqu(XMMRegister::from_code(i),
4670                 Operand(rsp, kQuadWordSize * slot_idx));
4671       slot_idx++;
4672     }
4673     // Adjust the stack pointer.
4674     __ addq(rsp, Immediate(stack_size));
4675   }
4676 
4677   unwinding_info_writer_.MarkBlockWillExit();
4678 
4679   X64OperandConverter g(this, nullptr);
4680   int parameter_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
4681 
4682   // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
4683   // Check RawMachineAssembler::PopAndReturn.
4684   if (parameter_slots != 0) {
4685     if (additional_pop_count->IsImmediate()) {
4686       DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4687     } else if (FLAG_debug_code) {
4688       __ cmpq(g.ToRegister(additional_pop_count), Immediate(0));
4689       __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4690     }
4691   }
4692 
4693   Register argc_reg = rcx;
4694   // Functions with JS linkage have at least one parameter (the receiver).
4695   // If {parameter_slots} == 0, it means it is a builtin with
4696   // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4697   // itself.
4698   const bool drop_jsargs = parameter_slots != 0 &&
4699                            frame_access_state()->has_frame() &&
4700                            call_descriptor->IsJSFunctionCall();
4701   if (call_descriptor->IsCFunctionCall()) {
4702     AssembleDeconstructFrame();
4703   } else if (frame_access_state()->has_frame()) {
4704     if (additional_pop_count->IsImmediate() &&
4705         g.ToConstant(additional_pop_count).ToInt32() == 0) {
4706       // Canonicalize JSFunction return sites for now.
4707       if (return_label_.is_bound()) {
4708         __ jmp(&return_label_);
4709         return;
4710       } else {
4711         __ bind(&return_label_);
4712       }
4713     }
4714     if (drop_jsargs) {
4715       // Get the actual argument count.
4716       DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & argc_reg.bit());
4717       __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset));
4718     }
4719     AssembleDeconstructFrame();
4720   }
4721 
4722   if (drop_jsargs) {
4723     // We must pop all arguments from the stack (including the receiver).
4724     // The number of arguments without the receiver is
4725     // max(argc_reg, parameter_slots-1), and the receiver is added in
4726     // DropArguments().
4727     Label mismatch_return;
4728     Register scratch_reg = r10;
4729     DCHECK_NE(argc_reg, scratch_reg);
4730     DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & scratch_reg.bit());
4731     DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & argc_reg.bit());
4732     if (kJSArgcIncludesReceiver) {
4733       __ cmpq(argc_reg, Immediate(parameter_slots));
4734     } else {
4735       int parameter_slots_without_receiver = parameter_slots - 1;
4736       __ cmpq(argc_reg, Immediate(parameter_slots_without_receiver));
4737     }
4738     __ j(greater, &mismatch_return, Label::kNear);
4739     __ Ret(parameter_slots * kSystemPointerSize, scratch_reg);
4740     __ bind(&mismatch_return);
4741     __ DropArguments(argc_reg, scratch_reg, TurboAssembler::kCountIsInteger,
4742                      kJSArgcIncludesReceiver
4743                          ? TurboAssembler::kCountIncludesReceiver
4744                          : TurboAssembler::kCountExcludesReceiver);
4745     // We use a return instead of a jump for better return address prediction.
4746     __ Ret();
4747   } else if (additional_pop_count->IsImmediate()) {
4748     Register scratch_reg = r10;
4749     DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & scratch_reg.bit());
4750     int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4751     size_t pop_size = (parameter_slots + additional_count) * kSystemPointerSize;
4752     CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4753     __ Ret(static_cast<int>(pop_size), scratch_reg);
4754   } else {
4755     Register pop_reg = g.ToRegister(additional_pop_count);
4756     Register scratch_reg = pop_reg == r10 ? rcx : r10;
4757     DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & scratch_reg.bit());
4758     DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & pop_reg.bit());
4759     int pop_size = static_cast<int>(parameter_slots * kSystemPointerSize);
4760     __ PopReturnAddressTo(scratch_reg);
4761     __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size,
4762                          static_cast<int>(pop_size)));
4763     __ PushReturnAddressFrom(scratch_reg);
4764     __ Ret();
4765   }
4766 }
4767 
FinishCode()4768 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
4769 
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4770 void CodeGenerator::PrepareForDeoptimizationExits(
4771     ZoneDeque<DeoptimizationExit*>* exits) {}
4772 
IncrementStackAccessCounter(InstructionOperand * source,InstructionOperand * destination)4773 void CodeGenerator::IncrementStackAccessCounter(
4774     InstructionOperand* source, InstructionOperand* destination) {
4775   DCHECK(FLAG_trace_turbo_stack_accesses);
4776   if (!info()->IsOptimizing()) {
4777 #if V8_ENABLE_WEBASSEMBLY
4778     if (!info()->IsWasm()) return;
4779 #else
4780     return;
4781 #endif  // V8_ENABLE_WEBASSEMBLY
4782   }
4783   DCHECK_NOT_NULL(debug_name_);
4784   auto IncrementCounter = [&](ExternalReference counter) {
4785     __ incl(__ ExternalReferenceAsOperand(counter));
4786   };
4787   if (source->IsAnyStackSlot()) {
4788     IncrementCounter(
4789         ExternalReference::address_of_load_from_stack_count(debug_name_));
4790   }
4791   if (destination->IsAnyStackSlot()) {
4792     IncrementCounter(
4793         ExternalReference::address_of_store_to_stack_count(debug_name_));
4794   }
4795 }
4796 
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4797 void CodeGenerator::AssembleMove(InstructionOperand* source,
4798                                  InstructionOperand* destination) {
4799   X64OperandConverter g(this, nullptr);
4800   // Helper function to write the given constant to the dst register.
4801   auto MoveConstantToRegister = [&](Register dst, Constant src) {
4802     switch (src.type()) {
4803       case Constant::kInt32: {
4804         if (RelocInfo::IsWasmReference(src.rmode())) {
4805           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4806         } else {
4807           int32_t value = src.ToInt32();
4808           if (value == 0) {
4809             __ xorl(dst, dst);
4810           } else {
4811             __ movl(dst, Immediate(value));
4812           }
4813         }
4814         break;
4815       }
4816       case Constant::kInt64:
4817         if (RelocInfo::IsWasmReference(src.rmode())) {
4818           __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4819         } else {
4820           __ Move(dst, src.ToInt64());
4821         }
4822         break;
4823       case Constant::kFloat32:
4824         __ MoveNumber(dst, src.ToFloat32());
4825         break;
4826       case Constant::kFloat64:
4827         __ MoveNumber(dst, src.ToFloat64().value());
4828         break;
4829       case Constant::kExternalReference:
4830         __ Move(dst, src.ToExternalReference());
4831         break;
4832       case Constant::kHeapObject: {
4833         Handle<HeapObject> src_object = src.ToHeapObject();
4834         RootIndex index;
4835         if (IsMaterializableFromRoot(src_object, &index)) {
4836           __ LoadRoot(dst, index);
4837         } else {
4838           __ Move(dst, src_object);
4839         }
4840         break;
4841       }
4842       case Constant::kCompressedHeapObject: {
4843         Handle<HeapObject> src_object = src.ToHeapObject();
4844         RootIndex index;
4845         if (IsMaterializableFromRoot(src_object, &index)) {
4846           __ LoadRoot(dst, index);
4847         } else {
4848           __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4849         }
4850         break;
4851       }
4852       case Constant::kDelayedStringConstant: {
4853         const StringConstantBase* src_constant = src.ToDelayedStringConstant();
4854         __ MoveStringConstant(dst, src_constant);
4855         break;
4856       }
4857       case Constant::kRpoNumber:
4858         UNREACHABLE();  // TODO(dcarney): load of labels on x64.
4859     }
4860   };
4861   // Helper function to write the given constant to the stack.
4862   auto MoveConstantToSlot = [&](Operand dst, Constant src) {
4863     if (!RelocInfo::IsWasmReference(src.rmode())) {
4864       switch (src.type()) {
4865         case Constant::kInt32:
4866           __ Move(dst, src.ToInt32());
4867           return;
4868         case Constant::kInt64:
4869           __ Move(dst, src.ToInt64());
4870           return;
4871         default:
4872           break;
4873       }
4874     }
4875     MoveConstantToRegister(kScratchRegister, src);
4876     __ movq(dst, kScratchRegister);
4877   };
4878 
4879   if (FLAG_trace_turbo_stack_accesses) {
4880     IncrementStackAccessCounter(source, destination);
4881   }
4882 
4883   // Dispatch on the source and destination operand kinds.
4884   switch (MoveType::InferMove(source, destination)) {
4885     case MoveType::kRegisterToRegister:
4886       if (source->IsRegister()) {
4887         __ movq(g.ToRegister(destination), g.ToRegister(source));
4888       } else {
4889         DCHECK(source->IsFPRegister());
4890         __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4891       }
4892       return;
4893     case MoveType::kRegisterToStack: {
4894       Operand dst = g.ToOperand(destination);
4895       if (source->IsRegister()) {
4896         __ movq(dst, g.ToRegister(source));
4897       } else {
4898         DCHECK(source->IsFPRegister());
4899         XMMRegister src = g.ToDoubleRegister(source);
4900         MachineRepresentation rep =
4901             LocationOperand::cast(source)->representation();
4902         if (rep != MachineRepresentation::kSimd128) {
4903           __ Movsd(dst, src);
4904         } else {
4905           __ Movups(dst, src);
4906         }
4907       }
4908       return;
4909     }
4910     case MoveType::kStackToRegister: {
4911       Operand src = g.ToOperand(source);
4912       if (source->IsStackSlot()) {
4913         __ movq(g.ToRegister(destination), src);
4914       } else {
4915         DCHECK(source->IsFPStackSlot());
4916         XMMRegister dst = g.ToDoubleRegister(destination);
4917         MachineRepresentation rep =
4918             LocationOperand::cast(source)->representation();
4919         if (rep != MachineRepresentation::kSimd128) {
4920           __ Movsd(dst, src);
4921         } else {
4922           __ Movups(dst, src);
4923         }
4924       }
4925       return;
4926     }
4927     case MoveType::kStackToStack: {
4928       Operand src = g.ToOperand(source);
4929       Operand dst = g.ToOperand(destination);
4930       if (source->IsStackSlot()) {
4931         // Spill on demand to use a temporary register for memory-to-memory
4932         // moves.
4933         __ movq(kScratchRegister, src);
4934         __ movq(dst, kScratchRegister);
4935       } else {
4936         MachineRepresentation rep =
4937             LocationOperand::cast(source)->representation();
4938         if (rep != MachineRepresentation::kSimd128) {
4939           __ Movsd(kScratchDoubleReg, src);
4940           __ Movsd(dst, kScratchDoubleReg);
4941         } else {
4942           DCHECK(source->IsSimd128StackSlot());
4943           __ Movups(kScratchDoubleReg, src);
4944           __ Movups(dst, kScratchDoubleReg);
4945         }
4946       }
4947       return;
4948     }
4949     case MoveType::kConstantToRegister: {
4950       Constant src = g.ToConstant(source);
4951       if (destination->IsRegister()) {
4952         MoveConstantToRegister(g.ToRegister(destination), src);
4953       } else {
4954         DCHECK(destination->IsFPRegister());
4955         XMMRegister dst = g.ToDoubleRegister(destination);
4956         if (src.type() == Constant::kFloat32) {
4957           // TODO(turbofan): Can we do better here?
4958           __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4959         } else {
4960           DCHECK_EQ(src.type(), Constant::kFloat64);
4961           __ Move(dst, src.ToFloat64().AsUint64());
4962         }
4963       }
4964       return;
4965     }
4966     case MoveType::kConstantToStack: {
4967       Constant src = g.ToConstant(source);
4968       Operand dst = g.ToOperand(destination);
4969       if (destination->IsStackSlot()) {
4970         MoveConstantToSlot(dst, src);
4971       } else {
4972         DCHECK(destination->IsFPStackSlot());
4973         if (src.type() == Constant::kFloat32) {
4974           __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4975         } else {
4976           DCHECK_EQ(src.type(), Constant::kFloat64);
4977           __ Move(dst, src.ToFloat64().AsUint64());
4978         }
4979       }
4980       return;
4981     }
4982   }
4983   UNREACHABLE();
4984 }
4985 
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4986 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4987                                  InstructionOperand* destination) {
4988   if (FLAG_trace_turbo_stack_accesses) {
4989     IncrementStackAccessCounter(source, destination);
4990     IncrementStackAccessCounter(destination, source);
4991   }
4992 
4993   X64OperandConverter g(this, nullptr);
4994   // Dispatch on the source and destination operand kinds.  Not all
4995   // combinations are possible.
4996   switch (MoveType::InferSwap(source, destination)) {
4997     case MoveType::kRegisterToRegister: {
4998       if (source->IsRegister()) {
4999         Register src = g.ToRegister(source);
5000         Register dst = g.ToRegister(destination);
5001         __ movq(kScratchRegister, src);
5002         __ movq(src, dst);
5003         __ movq(dst, kScratchRegister);
5004       } else {
5005         DCHECK(source->IsFPRegister());
5006         XMMRegister src = g.ToDoubleRegister(source);
5007         XMMRegister dst = g.ToDoubleRegister(destination);
5008         __ Movapd(kScratchDoubleReg, src);
5009         __ Movapd(src, dst);
5010         __ Movapd(dst, kScratchDoubleReg);
5011       }
5012       return;
5013     }
5014     case MoveType::kRegisterToStack: {
5015       if (source->IsRegister()) {
5016         Register src = g.ToRegister(source);
5017         Operand dst = g.ToOperand(destination);
5018         __ movq(kScratchRegister, src);
5019         __ movq(src, dst);
5020         __ movq(dst, kScratchRegister);
5021       } else {
5022         DCHECK(source->IsFPRegister());
5023         XMMRegister src = g.ToDoubleRegister(source);
5024         Operand dst = g.ToOperand(destination);
5025         MachineRepresentation rep =
5026             LocationOperand::cast(source)->representation();
5027         if (rep != MachineRepresentation::kSimd128) {
5028           __ Movsd(kScratchDoubleReg, src);
5029           __ Movsd(src, dst);
5030           __ Movsd(dst, kScratchDoubleReg);
5031         } else {
5032           __ Movups(kScratchDoubleReg, src);
5033           __ Movups(src, dst);
5034           __ Movups(dst, kScratchDoubleReg);
5035         }
5036       }
5037       return;
5038     }
5039     case MoveType::kStackToStack: {
5040       Operand src = g.ToOperand(source);
5041       Operand dst = g.ToOperand(destination);
5042       MachineRepresentation rep =
5043           LocationOperand::cast(source)->representation();
5044       if (rep != MachineRepresentation::kSimd128) {
5045         Register tmp = kScratchRegister;
5046         __ movq(tmp, dst);
5047         __ pushq(src);  // Then use stack to copy src to destination.
5048         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5049                                                          kSystemPointerSize);
5050         __ popq(dst);
5051         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5052                                                          -kSystemPointerSize);
5053         __ movq(src, tmp);
5054       } else {
5055         // Without AVX, misaligned reads and writes will trap. Move using the
5056         // stack, in two parts.
5057         __ movups(kScratchDoubleReg, dst);  // Save dst in scratch register.
5058         __ pushq(src);  // Then use stack to copy src to destination.
5059         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5060                                                          kSystemPointerSize);
5061         __ popq(dst);
5062         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5063                                                          -kSystemPointerSize);
5064         __ pushq(g.ToOperand(source, kSystemPointerSize));
5065         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5066                                                          kSystemPointerSize);
5067         __ popq(g.ToOperand(destination, kSystemPointerSize));
5068         unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5069                                                          -kSystemPointerSize);
5070         __ movups(src, kScratchDoubleReg);
5071       }
5072       return;
5073     }
5074     default:
5075       UNREACHABLE();
5076   }
5077 }
5078 
AssembleJumpTable(Label ** targets,size_t target_count)5079 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
5080   for (size_t index = 0; index < target_count; ++index) {
5081     __ dq(targets[index]);
5082   }
5083 }
5084 
5085 #undef __
5086 
5087 }  // namespace compiler
5088 }  // namespace internal
5089 }  // namespace v8
5090