1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <limits>
6
7 #include "src/base/overflowing-math.h"
8 #include "src/codegen/assembler.h"
9 #include "src/codegen/cpu-features.h"
10 #include "src/codegen/external-reference.h"
11 #include "src/codegen/macro-assembler.h"
12 #include "src/codegen/optimized-compilation-info.h"
13 #include "src/codegen/x64/assembler-x64.h"
14 #include "src/codegen/x64/register-x64.h"
15 #include "src/common/globals.h"
16 #include "src/compiler/backend/code-generator-impl.h"
17 #include "src/compiler/backend/code-generator.h"
18 #include "src/compiler/backend/gap-resolver.h"
19 #include "src/compiler/backend/instruction-codes.h"
20 #include "src/compiler/node-matchers.h"
21 #include "src/compiler/osr.h"
22 #include "src/heap/memory-chunk.h"
23 #include "src/objects/code-kind.h"
24 #include "src/objects/smi.h"
25
26 #if V8_ENABLE_WEBASSEMBLY
27 #include "src/wasm/wasm-code-manager.h"
28 #include "src/wasm/wasm-objects.h"
29 #endif // V8_ENABLE_WEBASSEMBLY
30
31 namespace v8 {
32 namespace internal {
33 namespace compiler {
34
35 #define __ tasm()->
36
37 // Adds X64 specific methods for decoding operands.
38 class X64OperandConverter : public InstructionOperandConverter {
39 public:
X64OperandConverter(CodeGenerator * gen,Instruction * instr)40 X64OperandConverter(CodeGenerator* gen, Instruction* instr)
41 : InstructionOperandConverter(gen, instr) {}
42
InputImmediate(size_t index)43 Immediate InputImmediate(size_t index) {
44 return ToImmediate(instr_->InputAt(index));
45 }
46
InputOperand(size_t index,int extra=0)47 Operand InputOperand(size_t index, int extra = 0) {
48 return ToOperand(instr_->InputAt(index), extra);
49 }
50
OutputOperand()51 Operand OutputOperand() { return ToOperand(instr_->Output()); }
52
ToImmediate(InstructionOperand * operand)53 Immediate ToImmediate(InstructionOperand* operand) {
54 Constant constant = ToConstant(operand);
55 if (constant.type() == Constant::kFloat64) {
56 DCHECK_EQ(0, constant.ToFloat64().AsUint64());
57 return Immediate(0);
58 }
59 if (RelocInfo::IsWasmReference(constant.rmode())) {
60 return Immediate(constant.ToInt32(), constant.rmode());
61 }
62 return Immediate(constant.ToInt32());
63 }
64
ToOperand(InstructionOperand * op,int extra=0)65 Operand ToOperand(InstructionOperand* op, int extra = 0) {
66 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
67 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
68 }
69
SlotToOperand(int slot_index,int extra=0)70 Operand SlotToOperand(int slot_index, int extra = 0) {
71 FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
72 return Operand(offset.from_stack_pointer() ? rsp : rbp,
73 offset.offset() + extra);
74 }
75
NextOffset(size_t * offset)76 static size_t NextOffset(size_t* offset) {
77 size_t i = *offset;
78 (*offset)++;
79 return i;
80 }
81
ScaleFor(AddressingMode one,AddressingMode mode)82 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
83 STATIC_ASSERT(0 == static_cast<int>(times_1));
84 STATIC_ASSERT(1 == static_cast<int>(times_2));
85 STATIC_ASSERT(2 == static_cast<int>(times_4));
86 STATIC_ASSERT(3 == static_cast<int>(times_8));
87 int scale = static_cast<int>(mode - one);
88 DCHECK(scale >= 0 && scale < 4);
89 return static_cast<ScaleFactor>(scale);
90 }
91
MemoryOperand(size_t * offset)92 Operand MemoryOperand(size_t* offset) {
93 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
94 switch (mode) {
95 case kMode_MR: {
96 Register base = InputRegister(NextOffset(offset));
97 int32_t disp = 0;
98 return Operand(base, disp);
99 }
100 case kMode_MRI: {
101 Register base = InputRegister(NextOffset(offset));
102 int32_t disp = InputInt32(NextOffset(offset));
103 return Operand(base, disp);
104 }
105 case kMode_MR1:
106 case kMode_MR2:
107 case kMode_MR4:
108 case kMode_MR8: {
109 Register base = InputRegister(NextOffset(offset));
110 Register index = InputRegister(NextOffset(offset));
111 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
112 int32_t disp = 0;
113 return Operand(base, index, scale, disp);
114 }
115 case kMode_MR1I:
116 case kMode_MR2I:
117 case kMode_MR4I:
118 case kMode_MR8I: {
119 Register base = InputRegister(NextOffset(offset));
120 Register index = InputRegister(NextOffset(offset));
121 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
122 int32_t disp = InputInt32(NextOffset(offset));
123 return Operand(base, index, scale, disp);
124 }
125 case kMode_M1: {
126 Register base = InputRegister(NextOffset(offset));
127 int32_t disp = 0;
128 return Operand(base, disp);
129 }
130 case kMode_M2:
131 UNREACHABLE(); // Should use kModeMR with more compact encoding instead
132 case kMode_M4:
133 case kMode_M8: {
134 Register index = InputRegister(NextOffset(offset));
135 ScaleFactor scale = ScaleFor(kMode_M1, mode);
136 int32_t disp = 0;
137 return Operand(index, scale, disp);
138 }
139 case kMode_M1I:
140 case kMode_M2I:
141 case kMode_M4I:
142 case kMode_M8I: {
143 Register index = InputRegister(NextOffset(offset));
144 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
145 int32_t disp = InputInt32(NextOffset(offset));
146 return Operand(index, scale, disp);
147 }
148 case kMode_Root: {
149 Register base = kRootRegister;
150 int32_t disp = InputInt32(NextOffset(offset));
151 return Operand(base, disp);
152 }
153 case kMode_None:
154 UNREACHABLE();
155 }
156 UNREACHABLE();
157 }
158
MemoryOperand(size_t first_input=0)159 Operand MemoryOperand(size_t first_input = 0) {
160 return MemoryOperand(&first_input);
161 }
162 };
163
164 namespace {
165
HasAddressingMode(Instruction * instr)166 bool HasAddressingMode(Instruction* instr) {
167 return instr->addressing_mode() != kMode_None;
168 }
169
HasImmediateInput(Instruction * instr,size_t index)170 bool HasImmediateInput(Instruction* instr, size_t index) {
171 return instr->InputAt(index)->IsImmediate();
172 }
173
HasRegisterInput(Instruction * instr,size_t index)174 bool HasRegisterInput(Instruction* instr, size_t index) {
175 return instr->InputAt(index)->IsRegister();
176 }
177
178 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
179 public:
OutOfLineLoadFloat32NaN(CodeGenerator * gen,XMMRegister result)180 OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
181 : OutOfLineCode(gen), result_(result) {}
182
Generate()183 void Generate() final {
184 __ Xorps(result_, result_);
185 __ Divss(result_, result_);
186 }
187
188 private:
189 XMMRegister const result_;
190 };
191
192 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
193 public:
OutOfLineLoadFloat64NaN(CodeGenerator * gen,XMMRegister result)194 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
195 : OutOfLineCode(gen), result_(result) {}
196
Generate()197 void Generate() final {
198 __ Xorpd(result_, result_);
199 __ Divsd(result_, result_);
200 }
201
202 private:
203 XMMRegister const result_;
204 };
205
206 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
207 public:
OutOfLineTruncateDoubleToI(CodeGenerator * gen,Register result,XMMRegister input,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)208 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
209 XMMRegister input, StubCallMode stub_mode,
210 UnwindingInfoWriter* unwinding_info_writer)
211 : OutOfLineCode(gen),
212 result_(result),
213 input_(input),
214 #if V8_ENABLE_WEBASSEMBLY
215 stub_mode_(stub_mode),
216 #endif // V8_ENABLE_WEBASSEMBLY
217 unwinding_info_writer_(unwinding_info_writer),
218 isolate_(gen->isolate()),
219 zone_(gen->zone()) {
220 }
221
Generate()222 void Generate() final {
223 __ AllocateStackSpace(kDoubleSize);
224 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
225 kDoubleSize);
226 __ Movsd(MemOperand(rsp, 0), input_);
227 #if V8_ENABLE_WEBASSEMBLY
228 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
229 // A direct call to a wasm runtime stub defined in this module.
230 // Just encode the stub index. This will be patched when the code
231 // is added to the native module and copied into wasm code space.
232 __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
233 #else
234 // For balance.
235 if (false) {
236 #endif // V8_ENABLE_WEBASSEMBLY
237 } else if (tasm()->options().inline_offheap_trampolines) {
238 // With embedded builtins we do not need the isolate here. This allows
239 // the call to be generated asynchronously.
240 __ CallBuiltin(Builtin::kDoubleToI);
241 } else {
242 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
243 }
244 __ movl(result_, MemOperand(rsp, 0));
245 __ addq(rsp, Immediate(kDoubleSize));
246 unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
247 -kDoubleSize);
248 }
249
250 private:
251 Register const result_;
252 XMMRegister const input_;
253 #if V8_ENABLE_WEBASSEMBLY
254 StubCallMode stub_mode_;
255 #endif // V8_ENABLE_WEBASSEMBLY
256 UnwindingInfoWriter* const unwinding_info_writer_;
257 Isolate* isolate_;
258 Zone* zone_;
259 };
260
261 class OutOfLineRecordWrite final : public OutOfLineCode {
262 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand operand,Register value,Register scratch0,Register scratch1,RecordWriteMode mode,StubCallMode stub_mode)263 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
264 Register value, Register scratch0, Register scratch1,
265 RecordWriteMode mode, StubCallMode stub_mode)
266 : OutOfLineCode(gen),
267 object_(object),
268 operand_(operand),
269 value_(value),
270 scratch0_(scratch0),
271 scratch1_(scratch1),
272 mode_(mode),
273 #if V8_ENABLE_WEBASSEMBLY
274 stub_mode_(stub_mode),
275 #endif // V8_ENABLE_WEBASSEMBLY
276 zone_(gen->zone()) {
277 DCHECK(!AreAliased(object, scratch0, scratch1));
278 DCHECK(!AreAliased(value, scratch0, scratch1));
279 }
280
Generate()281 void Generate() final {
282 if (COMPRESS_POINTERS_BOOL) {
283 __ DecompressTaggedPointer(value_, value_);
284 }
285 __ CheckPageFlag(value_, scratch0_,
286 MemoryChunk::kPointersToHereAreInterestingMask, zero,
287 exit());
288 __ leaq(scratch1_, operand_);
289
290 RememberedSetAction const remembered_set_action =
291 mode_ > RecordWriteMode::kValueIsMap ? RememberedSetAction::kEmit
292 : RememberedSetAction::kOmit;
293 SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
294 ? SaveFPRegsMode::kSave
295 : SaveFPRegsMode::kIgnore;
296
297 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
298 __ CallEphemeronKeyBarrier(object_, scratch1_, save_fp_mode);
299 #if V8_ENABLE_WEBASSEMBLY
300 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
301 // A direct call to a wasm runtime stub defined in this module.
302 // Just encode the stub index. This will be patched when the code
303 // is added to the native module and copied into wasm code space.
304 __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
305 remembered_set_action, save_fp_mode,
306 StubCallMode::kCallWasmRuntimeStub);
307 #endif // V8_ENABLE_WEBASSEMBLY
308 } else {
309 __ CallRecordWriteStubSaveRegisters(object_, scratch1_,
310 remembered_set_action, save_fp_mode);
311 }
312 }
313
314 private:
315 Register const object_;
316 Operand const operand_;
317 Register const value_;
318 Register const scratch0_;
319 Register const scratch1_;
320 RecordWriteMode const mode_;
321 #if V8_ENABLE_WEBASSEMBLY
322 StubCallMode const stub_mode_;
323 #endif // V8_ENABLE_WEBASSEMBLY
324 Zone* zone_;
325 };
326
327 template <std::memory_order order>
EmitStore(TurboAssembler * tasm,Operand operand,Register value,MachineRepresentation rep)328 void EmitStore(TurboAssembler* tasm, Operand operand, Register value,
329 MachineRepresentation rep) {
330 if (order == std::memory_order_relaxed) {
331 switch (rep) {
332 case MachineRepresentation::kWord8:
333 tasm->movb(operand, value);
334 break;
335 case MachineRepresentation::kWord16:
336 tasm->movw(operand, value);
337 break;
338 case MachineRepresentation::kWord32:
339 tasm->movl(operand, value);
340 break;
341 case MachineRepresentation::kWord64:
342 tasm->movq(operand, value);
343 break;
344 case MachineRepresentation::kTagged:
345 tasm->StoreTaggedField(operand, value);
346 break;
347 default:
348 UNREACHABLE();
349 }
350 return;
351 }
352
353 DCHECK_EQ(order, std::memory_order_seq_cst);
354 switch (rep) {
355 case MachineRepresentation::kWord8:
356 tasm->movq(kScratchRegister, value);
357 tasm->xchgb(kScratchRegister, operand);
358 break;
359 case MachineRepresentation::kWord16:
360 tasm->movq(kScratchRegister, value);
361 tasm->xchgw(kScratchRegister, operand);
362 break;
363 case MachineRepresentation::kWord32:
364 tasm->movq(kScratchRegister, value);
365 tasm->xchgl(kScratchRegister, operand);
366 break;
367 case MachineRepresentation::kWord64:
368 tasm->movq(kScratchRegister, value);
369 tasm->xchgq(kScratchRegister, operand);
370 break;
371 case MachineRepresentation::kTagged:
372 tasm->AtomicStoreTaggedField(operand, value);
373 break;
374 default:
375 UNREACHABLE();
376 }
377 }
378
379 template <std::memory_order order>
380 void EmitStore(TurboAssembler* tasm, Operand operand, Immediate value,
381 MachineRepresentation rep);
382
383 template <>
EmitStore(TurboAssembler * tasm,Operand operand,Immediate value,MachineRepresentation rep)384 void EmitStore<std::memory_order_relaxed>(TurboAssembler* tasm, Operand operand,
385 Immediate value,
386 MachineRepresentation rep) {
387 switch (rep) {
388 case MachineRepresentation::kWord8:
389 tasm->movb(operand, value);
390 break;
391 case MachineRepresentation::kWord16:
392 tasm->movw(operand, value);
393 break;
394 case MachineRepresentation::kWord32:
395 tasm->movl(operand, value);
396 break;
397 case MachineRepresentation::kWord64:
398 tasm->movq(operand, value);
399 break;
400 case MachineRepresentation::kTagged:
401 tasm->StoreTaggedField(operand, value);
402 break;
403 default:
404 UNREACHABLE();
405 }
406 }
407
408 #ifdef V8_IS_TSAN
EmitMemoryProbeForTrapHandlerIfNeeded(TurboAssembler * tasm,Register scratch,Operand operand,StubCallMode mode,int size)409 void EmitMemoryProbeForTrapHandlerIfNeeded(TurboAssembler* tasm,
410 Register scratch, Operand operand,
411 StubCallMode mode, int size) {
412 #if V8_ENABLE_WEBASSEMBLY && V8_TRAP_HANDLER_SUPPORTED
413 // The wasm OOB trap handler needs to be able to look up the faulting
414 // instruction pointer to handle the SIGSEGV raised by an OOB access. It
415 // will not handle SIGSEGVs raised by the TSAN store helpers. Emit a
416 // redundant load here to give the trap handler a chance to handle any
417 // OOB SIGSEGVs.
418 if (trap_handler::IsTrapHandlerEnabled() &&
419 mode == StubCallMode::kCallWasmRuntimeStub) {
420 switch (size) {
421 case kInt8Size:
422 tasm->movb(scratch, operand);
423 break;
424 case kInt16Size:
425 tasm->movw(scratch, operand);
426 break;
427 case kInt32Size:
428 tasm->movl(scratch, operand);
429 break;
430 case kInt64Size:
431 tasm->movq(scratch, operand);
432 break;
433 default:
434 UNREACHABLE();
435 }
436 }
437 #endif
438 }
439
440 class OutOfLineTSANStore : public OutOfLineCode {
441 public:
OutOfLineTSANStore(CodeGenerator * gen,Operand operand,Register value,Register scratch0,StubCallMode stub_mode,int size,std::memory_order order)442 OutOfLineTSANStore(CodeGenerator* gen, Operand operand, Register value,
443 Register scratch0, StubCallMode stub_mode, int size,
444 std::memory_order order)
445 : OutOfLineCode(gen),
446 operand_(operand),
447 value_(value),
448 scratch0_(scratch0),
449 #if V8_ENABLE_WEBASSEMBLY
450 stub_mode_(stub_mode),
451 #endif // V8_ENABLE_WEBASSEMBLY
452 size_(size),
453 memory_order_(order),
454 zone_(gen->zone()) {
455 DCHECK(!AreAliased(value, scratch0));
456 }
457
Generate()458 void Generate() final {
459 const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
460 ? SaveFPRegsMode::kSave
461 : SaveFPRegsMode::kIgnore;
462 __ leaq(scratch0_, operand_);
463
464 #if V8_ENABLE_WEBASSEMBLY
465 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
466 // A direct call to a wasm runtime stub defined in this module.
467 // Just encode the stub index. This will be patched when the code
468 // is added to the native module and copied into wasm code space.
469 tasm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
470 StubCallMode::kCallWasmRuntimeStub,
471 memory_order_);
472 return;
473 }
474 #endif // V8_ENABLE_WEBASSEMBLY
475
476 tasm()->CallTSANStoreStub(scratch0_, value_, save_fp_mode, size_,
477 StubCallMode::kCallBuiltinPointer, memory_order_);
478 }
479
480 private:
481 Operand const operand_;
482 Register const value_;
483 Register const scratch0_;
484 #if V8_ENABLE_WEBASSEMBLY
485 StubCallMode const stub_mode_;
486 #endif // V8_ENABLE_WEBASSEMBLY
487 int size_;
488 const std::memory_order memory_order_;
489 Zone* zone_;
490 };
491
EmitTSANStoreOOL(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,Register value_reg,X64OperandConverter & i,StubCallMode mode,int size,std::memory_order order)492 void EmitTSANStoreOOL(Zone* zone, CodeGenerator* codegen, TurboAssembler* tasm,
493 Operand operand, Register value_reg,
494 X64OperandConverter& i, StubCallMode mode, int size,
495 std::memory_order order) {
496 // The FOR_TESTING code doesn't initialize the root register. We can't call
497 // the TSAN builtin since we need to load the external reference through the
498 // root register.
499 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
500 // path. It is not crucial, but it would be nice to remove this restriction.
501 DCHECK_NE(codegen->code_kind(), CodeKind::FOR_TESTING);
502
503 Register scratch0 = i.TempRegister(0);
504 auto tsan_ool = zone->New<OutOfLineTSANStore>(codegen, operand, value_reg,
505 scratch0, mode, size, order);
506 tasm->jmp(tsan_ool->entry());
507 tasm->bind(tsan_ool->exit());
508 }
509
510 template <std::memory_order order>
GetTSANValueRegister(TurboAssembler * tasm,Register value,X64OperandConverter & i)511 Register GetTSANValueRegister(TurboAssembler* tasm, Register value,
512 X64OperandConverter& i) {
513 return value;
514 }
515
516 template <std::memory_order order>
517 Register GetTSANValueRegister(TurboAssembler* tasm, Immediate value,
518 X64OperandConverter& i);
519
520 template <>
GetTSANValueRegister(TurboAssembler * tasm,Immediate value,X64OperandConverter & i)521 Register GetTSANValueRegister<std::memory_order_relaxed>(
522 TurboAssembler* tasm, Immediate value, X64OperandConverter& i) {
523 Register value_reg = i.TempRegister(1);
524 tasm->movq(value_reg, value);
525 return value_reg;
526 }
527
528 template <std::memory_order order, typename ValueT>
EmitTSANAwareStore(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,ValueT value,X64OperandConverter & i,StubCallMode stub_call_mode,MachineRepresentation rep)529 void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
530 TurboAssembler* tasm, Operand operand, ValueT value,
531 X64OperandConverter& i, StubCallMode stub_call_mode,
532 MachineRepresentation rep) {
533 // The FOR_TESTING code doesn't initialize the root register. We can't call
534 // the TSAN builtin since we need to load the external reference through the
535 // root register.
536 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
537 // path. It is not crucial, but it would be nice to remove this restriction.
538 if (codegen->code_kind() != CodeKind::FOR_TESTING) {
539 int size = ElementSizeInBytes(rep);
540 EmitMemoryProbeForTrapHandlerIfNeeded(tasm, i.TempRegister(0), operand,
541 stub_call_mode, size);
542 Register value_reg = GetTSANValueRegister<order>(tasm, value, i);
543 EmitTSANStoreOOL(zone, codegen, tasm, operand, value_reg, i, stub_call_mode,
544 size, order);
545 } else {
546 EmitStore<order>(tasm, operand, value, rep);
547 }
548 }
549
550 class OutOfLineTSANRelaxedLoad final : public OutOfLineCode {
551 public:
OutOfLineTSANRelaxedLoad(CodeGenerator * gen,Operand operand,Register scratch0,StubCallMode stub_mode,int size)552 OutOfLineTSANRelaxedLoad(CodeGenerator* gen, Operand operand,
553 Register scratch0, StubCallMode stub_mode, int size)
554 : OutOfLineCode(gen),
555 operand_(operand),
556 scratch0_(scratch0),
557 #if V8_ENABLE_WEBASSEMBLY
558 stub_mode_(stub_mode),
559 #endif // V8_ENABLE_WEBASSEMBLY
560 size_(size),
561 zone_(gen->zone()) {
562 }
563
Generate()564 void Generate() final {
565 const SaveFPRegsMode save_fp_mode = frame()->DidAllocateDoubleRegisters()
566 ? SaveFPRegsMode::kSave
567 : SaveFPRegsMode::kIgnore;
568 __ leaq(scratch0_, operand_);
569
570 #if V8_ENABLE_WEBASSEMBLY
571 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
572 // A direct call to a wasm runtime stub defined in this module.
573 // Just encode the stub index. This will be patched when the code
574 // is added to the native module and copied into wasm code space.
575 __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
576 StubCallMode::kCallWasmRuntimeStub);
577 return;
578 }
579 #endif // V8_ENABLE_WEBASSEMBLY
580
581 __ CallTSANRelaxedLoadStub(scratch0_, save_fp_mode, size_,
582 StubCallMode::kCallBuiltinPointer);
583 }
584
585 private:
586 Operand const operand_;
587 Register const scratch0_;
588 #if V8_ENABLE_WEBASSEMBLY
589 StubCallMode const stub_mode_;
590 #endif // V8_ENABLE_WEBASSEMBLY
591 int size_;
592 Zone* zone_;
593 };
594
EmitTSANRelaxedLoadOOLIfNeeded(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,X64OperandConverter & i,StubCallMode mode,int size)595 void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
596 TurboAssembler* tasm, Operand operand,
597 X64OperandConverter& i, StubCallMode mode,
598 int size) {
599 // The FOR_TESTING code doesn't initialize the root register. We can't call
600 // the TSAN builtin since we need to load the external reference through the
601 // root register.
602 // TODO(solanes, v8:7790, v8:11600): See if we can support the FOR_TESTING
603 // path. It is not crucial, but it would be nice to remove this if.
604 if (codegen->code_kind() == CodeKind::FOR_TESTING) return;
605
606 Register scratch0 = i.TempRegister(0);
607 auto tsan_ool = zone->New<OutOfLineTSANRelaxedLoad>(codegen, operand,
608 scratch0, mode, size);
609 tasm->jmp(tsan_ool->entry());
610 tasm->bind(tsan_ool->exit());
611 }
612
613 #else
614 template <std::memory_order order, typename ValueT>
EmitTSANAwareStore(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,ValueT value,X64OperandConverter & i,StubCallMode stub_call_mode,MachineRepresentation rep)615 void EmitTSANAwareStore(Zone* zone, CodeGenerator* codegen,
616 TurboAssembler* tasm, Operand operand, ValueT value,
617 X64OperandConverter& i, StubCallMode stub_call_mode,
618 MachineRepresentation rep) {
619 DCHECK(order == std::memory_order_relaxed ||
620 order == std::memory_order_seq_cst);
621 EmitStore<order>(tasm, operand, value, rep);
622 }
623
EmitTSANRelaxedLoadOOLIfNeeded(Zone * zone,CodeGenerator * codegen,TurboAssembler * tasm,Operand operand,X64OperandConverter & i,StubCallMode mode,int size)624 void EmitTSANRelaxedLoadOOLIfNeeded(Zone* zone, CodeGenerator* codegen,
625 TurboAssembler* tasm, Operand operand,
626 X64OperandConverter& i, StubCallMode mode,
627 int size) {}
628 #endif // V8_IS_TSAN
629
630 #if V8_ENABLE_WEBASSEMBLY
631 class WasmOutOfLineTrap : public OutOfLineCode {
632 public:
WasmOutOfLineTrap(CodeGenerator * gen,Instruction * instr)633 WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
634 : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
635
Generate()636 void Generate() override {
637 X64OperandConverter i(gen_, instr_);
638 TrapId trap_id =
639 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
640 GenerateWithTrapId(trap_id);
641 }
642
643 protected:
644 CodeGenerator* gen_;
645
GenerateWithTrapId(TrapId trap_id)646 void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
647
648 private:
GenerateCallToTrap(TrapId trap_id)649 void GenerateCallToTrap(TrapId trap_id) {
650 if (!gen_->wasm_runtime_exception_support()) {
651 // We cannot test calls to the runtime in cctest/test-run-wasm.
652 // Therefore we emit a call to C here instead of a call to the runtime.
653 __ PrepareCallCFunction(0);
654 __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
655 0);
656 __ LeaveFrame(StackFrame::WASM);
657 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
658 size_t pop_size =
659 call_descriptor->ParameterSlotCount() * kSystemPointerSize;
660 // Use rcx as a scratch register, we return anyways immediately.
661 __ Ret(static_cast<int>(pop_size), rcx);
662 } else {
663 gen_->AssembleSourcePosition(instr_);
664 // A direct call to a wasm runtime stub defined in this module.
665 // Just encode the stub index. This will be patched when the code
666 // is added to the native module and copied into wasm code space.
667 __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
668 ReferenceMap* reference_map =
669 gen_->zone()->New<ReferenceMap>(gen_->zone());
670 gen_->RecordSafepoint(reference_map);
671 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
672 }
673 }
674
675 Instruction* instr_;
676 };
677
678 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
679 public:
WasmProtectedInstructionTrap(CodeGenerator * gen,int pc,Instruction * instr)680 WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
681 : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
682
Generate()683 void Generate() final {
684 DCHECK(FLAG_wasm_bounds_checks && !FLAG_wasm_enforce_bounds_checks);
685 gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
686 GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
687 }
688
689 private:
690 int pc_;
691 };
692
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)693 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
694 InstructionCode opcode, Instruction* instr,
695 int pc) {
696 const MemoryAccessMode access_mode = instr->memory_access_mode();
697 if (access_mode == kMemoryAccessProtected) {
698 zone->New<WasmProtectedInstructionTrap>(codegen, pc, instr);
699 }
700 }
701
702 #else
703
EmitOOLTrapIfNeeded(Zone * zone,CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,int pc)704 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
705 InstructionCode opcode, Instruction* instr, int pc) {
706 DCHECK_NE(kMemoryAccessProtected, instr->memory_access_mode());
707 }
708
709 #endif // V8_ENABLE_WEBASSEMBLY
710
711 } // namespace
712
713 #define ASSEMBLE_UNOP(asm_instr) \
714 do { \
715 if (instr->Output()->IsRegister()) { \
716 __ asm_instr(i.OutputRegister()); \
717 } else { \
718 __ asm_instr(i.OutputOperand()); \
719 } \
720 } while (false)
721
722 #define ASSEMBLE_BINOP(asm_instr) \
723 do { \
724 if (HasAddressingMode(instr)) { \
725 size_t index = 1; \
726 Operand right = i.MemoryOperand(&index); \
727 __ asm_instr(i.InputRegister(0), right); \
728 } else { \
729 if (HasImmediateInput(instr, 1)) { \
730 if (HasRegisterInput(instr, 0)) { \
731 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
732 } else { \
733 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
734 } \
735 } else { \
736 if (HasRegisterInput(instr, 1)) { \
737 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
738 } else { \
739 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
740 } \
741 } \
742 } \
743 } while (false)
744
745 #define ASSEMBLE_COMPARE(asm_instr) \
746 do { \
747 if (HasAddressingMode(instr)) { \
748 size_t index = 0; \
749 Operand left = i.MemoryOperand(&index); \
750 if (HasImmediateInput(instr, index)) { \
751 __ asm_instr(left, i.InputImmediate(index)); \
752 } else { \
753 __ asm_instr(left, i.InputRegister(index)); \
754 } \
755 } else { \
756 if (HasImmediateInput(instr, 1)) { \
757 if (HasRegisterInput(instr, 0)) { \
758 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
759 } else { \
760 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
761 } \
762 } else { \
763 if (HasRegisterInput(instr, 1)) { \
764 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
765 } else { \
766 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
767 } \
768 } \
769 } \
770 } while (false)
771
772 #define ASSEMBLE_MULT(asm_instr) \
773 do { \
774 if (HasImmediateInput(instr, 1)) { \
775 if (HasRegisterInput(instr, 0)) { \
776 __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
777 i.InputImmediate(1)); \
778 } else { \
779 __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
780 i.InputImmediate(1)); \
781 } \
782 } else { \
783 if (HasRegisterInput(instr, 1)) { \
784 __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
785 } else { \
786 __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
787 } \
788 } \
789 } while (false)
790
791 #define ASSEMBLE_SHIFT(asm_instr, width) \
792 do { \
793 if (HasImmediateInput(instr, 1)) { \
794 if (instr->Output()->IsRegister()) { \
795 __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
796 } else { \
797 __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
798 } \
799 } else { \
800 if (instr->Output()->IsRegister()) { \
801 __ asm_instr##_cl(i.OutputRegister()); \
802 } else { \
803 __ asm_instr##_cl(i.OutputOperand()); \
804 } \
805 } \
806 } while (false)
807
808 #define ASSEMBLE_MOVX(asm_instr) \
809 do { \
810 if (HasAddressingMode(instr)) { \
811 __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
812 } else if (HasRegisterInput(instr, 0)) { \
813 __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
814 } else { \
815 __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
816 } \
817 } while (false)
818
819 #define ASSEMBLE_SSE_BINOP(asm_instr) \
820 do { \
821 if (HasAddressingMode(instr)) { \
822 size_t index = 1; \
823 Operand right = i.MemoryOperand(&index); \
824 __ asm_instr(i.InputDoubleRegister(0), right); \
825 } else { \
826 if (instr->InputAt(1)->IsFPRegister()) { \
827 __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
828 } else { \
829 __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
830 } \
831 } \
832 } while (false)
833
834 #define ASSEMBLE_SSE_UNOP(asm_instr) \
835 do { \
836 if (instr->InputAt(0)->IsFPRegister()) { \
837 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
838 } else { \
839 __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
840 } \
841 } while (false)
842
843 #define ASSEMBLE_AVX_BINOP(asm_instr) \
844 do { \
845 CpuFeatureScope avx_scope(tasm(), AVX); \
846 if (HasAddressingMode(instr)) { \
847 size_t index = 1; \
848 Operand right = i.MemoryOperand(&index); \
849 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), right); \
850 } else { \
851 if (instr->InputAt(1)->IsFPRegister()) { \
852 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
853 i.InputDoubleRegister(1)); \
854 } else { \
855 __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
856 i.InputOperand(1)); \
857 } \
858 } \
859 } while (false)
860
861 #define ASSEMBLE_IEEE754_BINOP(name) \
862 do { \
863 __ PrepareCallCFunction(2); \
864 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
865 } while (false)
866
867 #define ASSEMBLE_IEEE754_UNOP(name) \
868 do { \
869 __ PrepareCallCFunction(1); \
870 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
871 } while (false)
872
873 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
874 do { \
875 Label binop; \
876 __ bind(&binop); \
877 __ mov_inst(rax, i.MemoryOperand(1)); \
878 __ movl(i.TempRegister(0), rax); \
879 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
880 __ lock(); \
881 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
882 __ j(not_equal, &binop); \
883 } while (false)
884
885 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
886 do { \
887 Label binop; \
888 __ bind(&binop); \
889 __ mov_inst(rax, i.MemoryOperand(1)); \
890 __ movq(i.TempRegister(0), rax); \
891 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
892 __ lock(); \
893 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
894 __ j(not_equal, &binop); \
895 } while (false)
896
897 // Handles both SSE and AVX codegen. For SSE we use DefineSameAsFirst, so the
898 // dst and first src will be the same. For AVX we don't restrict it that way, so
899 // we will omit unnecessary moves.
900 #define ASSEMBLE_SIMD_BINOP(opcode) \
901 do { \
902 if (CpuFeatures::IsSupported(AVX)) { \
903 CpuFeatureScope avx_scope(tasm(), AVX); \
904 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
905 i.InputSimd128Register(1)); \
906 } else { \
907 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
908 __ opcode(i.OutputSimd128Register(), i.InputSimd128Register(1)); \
909 } \
910 } while (false)
911
912 #define ASSEMBLE_SIMD_INSTR(opcode, dst_operand, index) \
913 do { \
914 if (instr->InputAt(index)->IsSimd128Register()) { \
915 __ opcode(dst_operand, i.InputSimd128Register(index)); \
916 } else { \
917 __ opcode(dst_operand, i.InputOperand(index)); \
918 } \
919 } while (false)
920
921 #define ASSEMBLE_SIMD_IMM_INSTR(opcode, dst_operand, index, imm) \
922 do { \
923 if (instr->InputAt(index)->IsSimd128Register()) { \
924 __ opcode(dst_operand, i.InputSimd128Register(index), imm); \
925 } else { \
926 __ opcode(dst_operand, i.InputOperand(index), imm); \
927 } \
928 } while (false)
929
930 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
931 do { \
932 XMMRegister dst = i.OutputSimd128Register(); \
933 byte input_index = instr->InputCount() == 2 ? 1 : 0; \
934 if (CpuFeatures::IsSupported(AVX)) { \
935 CpuFeatureScope avx_scope(tasm(), AVX); \
936 DCHECK(instr->InputAt(input_index)->IsSimd128Register()); \
937 __ v##opcode(dst, i.InputSimd128Register(0), \
938 i.InputSimd128Register(input_index)); \
939 } else { \
940 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
941 ASSEMBLE_SIMD_INSTR(opcode, dst, input_index); \
942 } \
943 } while (false)
944
945 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, imm) \
946 do { \
947 XMMRegister dst = i.OutputSimd128Register(); \
948 XMMRegister src = i.InputSimd128Register(0); \
949 if (CpuFeatures::IsSupported(AVX)) { \
950 CpuFeatureScope avx_scope(tasm(), AVX); \
951 DCHECK(instr->InputAt(1)->IsSimd128Register()); \
952 __ v##opcode(dst, src, i.InputSimd128Register(1), imm); \
953 } else { \
954 DCHECK_EQ(dst, src); \
955 if (instr->InputAt(1)->IsSimd128Register()) { \
956 __ opcode(dst, i.InputSimd128Register(1), imm); \
957 } else { \
958 __ opcode(dst, i.InputOperand(1), imm); \
959 } \
960 } \
961 } while (false)
962
963 #define ASSEMBLE_SIMD_ALL_TRUE(opcode) \
964 do { \
965 Register dst = i.OutputRegister(); \
966 __ xorq(dst, dst); \
967 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); \
968 __ opcode(kScratchDoubleReg, i.InputSimd128Register(0)); \
969 __ Ptest(kScratchDoubleReg, kScratchDoubleReg); \
970 __ setcc(equal, dst); \
971 } while (false)
972
973 // This macro will directly emit the opcode if the shift is an immediate - the
974 // shift value will be taken modulo 2^width. Otherwise, it will emit code to
975 // perform the modulus operation.
976 #define ASSEMBLE_SIMD_SHIFT(opcode, width) \
977 do { \
978 XMMRegister dst = i.OutputSimd128Register(); \
979 if (HasImmediateInput(instr, 1)) { \
980 if (CpuFeatures::IsSupported(AVX)) { \
981 CpuFeatureScope avx_scope(tasm(), AVX); \
982 __ v##opcode(dst, i.InputSimd128Register(0), \
983 byte{i.InputInt##width(1)}); \
984 } else { \
985 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
986 __ opcode(dst, byte{i.InputInt##width(1)}); \
987 } \
988 } else { \
989 constexpr int mask = (1 << width) - 1; \
990 __ movq(kScratchRegister, i.InputRegister(1)); \
991 __ andq(kScratchRegister, Immediate(mask)); \
992 __ Movq(kScratchDoubleReg, kScratchRegister); \
993 if (CpuFeatures::IsSupported(AVX)) { \
994 CpuFeatureScope avx_scope(tasm(), AVX); \
995 __ v##opcode(dst, i.InputSimd128Register(0), kScratchDoubleReg); \
996 } else { \
997 DCHECK_EQ(dst, i.InputSimd128Register(0)); \
998 __ opcode(dst, kScratchDoubleReg); \
999 } \
1000 } \
1001 } while (false)
1002
1003 #define ASSEMBLE_PINSR(ASM_INSTR) \
1004 do { \
1005 XMMRegister dst = i.OutputSimd128Register(); \
1006 XMMRegister src = i.InputSimd128Register(0); \
1007 uint8_t laneidx = i.InputUint8(1); \
1008 uint32_t load_offset; \
1009 if (HasAddressingMode(instr)) { \
1010 __ ASM_INSTR(dst, src, i.MemoryOperand(2), laneidx, &load_offset); \
1011 } else if (instr->InputAt(2)->IsFPRegister()) { \
1012 __ Movq(kScratchRegister, i.InputDoubleRegister(2)); \
1013 __ ASM_INSTR(dst, src, kScratchRegister, laneidx, &load_offset); \
1014 } else if (instr->InputAt(2)->IsRegister()) { \
1015 __ ASM_INSTR(dst, src, i.InputRegister(2), laneidx, &load_offset); \
1016 } else { \
1017 __ ASM_INSTR(dst, src, i.InputOperand(2), laneidx, &load_offset); \
1018 } \
1019 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, load_offset); \
1020 } while (false)
1021
1022 #define ASSEMBLE_SEQ_CST_STORE(rep) \
1023 do { \
1024 Register value = i.InputRegister(0); \
1025 Operand operand = i.MemoryOperand(1); \
1026 EmitTSANAwareStore<std::memory_order_seq_cst>( \
1027 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(), \
1028 rep); \
1029 } while (false)
1030
AssembleDeconstructFrame()1031 void CodeGenerator::AssembleDeconstructFrame() {
1032 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
1033 __ movq(rsp, rbp);
1034 __ popq(rbp);
1035 }
1036
AssemblePrepareTailCall()1037 void CodeGenerator::AssemblePrepareTailCall() {
1038 if (frame_access_state()->has_frame()) {
1039 __ movq(rbp, MemOperand(rbp, 0));
1040 }
1041 frame_access_state()->SetFrameAccessToSP();
1042 }
1043
1044 namespace {
1045
AdjustStackPointerForTailCall(Instruction * instr,TurboAssembler * assembler,Linkage * linkage,OptimizedCompilationInfo * info,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)1046 void AdjustStackPointerForTailCall(Instruction* instr,
1047 TurboAssembler* assembler, Linkage* linkage,
1048 OptimizedCompilationInfo* info,
1049 FrameAccessState* state,
1050 int new_slot_above_sp,
1051 bool allow_shrinkage = true) {
1052 int stack_slot_delta;
1053 if (instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp)) {
1054 // For this special tail-call mode, the callee has the same arguments and
1055 // linkage as the caller, and arguments adapter frames must be preserved.
1056 // Thus we simply have reset the stack pointer register to its original
1057 // value before frame construction.
1058 // See also: AssembleConstructFrame.
1059 DCHECK(!info->is_osr());
1060 DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedRegisters(), 0);
1061 DCHECK_EQ(linkage->GetIncomingDescriptor()->CalleeSavedFPRegisters(), 0);
1062 DCHECK_EQ(state->frame()->GetReturnSlotCount(), 0);
1063 stack_slot_delta = (state->frame()->GetTotalFrameSlotCount() -
1064 kReturnAddressStackSlotCount) *
1065 -1;
1066 DCHECK_LE(stack_slot_delta, 0);
1067 } else {
1068 int current_sp_offset = state->GetSPToFPSlotCount() +
1069 StandardFrameConstants::kFixedSlotCountAboveFp;
1070 stack_slot_delta = new_slot_above_sp - current_sp_offset;
1071 }
1072
1073 if (stack_slot_delta > 0) {
1074 assembler->AllocateStackSpace(stack_slot_delta * kSystemPointerSize);
1075 state->IncreaseSPDelta(stack_slot_delta);
1076 } else if (allow_shrinkage && stack_slot_delta < 0) {
1077 assembler->addq(rsp, Immediate(-stack_slot_delta * kSystemPointerSize));
1078 state->IncreaseSPDelta(stack_slot_delta);
1079 }
1080 }
1081
SetupSimdImmediateInRegister(TurboAssembler * assembler,uint32_t * imms,XMMRegister reg)1082 void SetupSimdImmediateInRegister(TurboAssembler* assembler, uint32_t* imms,
1083 XMMRegister reg) {
1084 assembler->Move(reg, make_uint64(imms[3], imms[2]),
1085 make_uint64(imms[1], imms[0]));
1086 }
1087
1088 } // namespace
1089
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_slot_offset)1090 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
1091 int first_unused_slot_offset) {
1092 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
1093 ZoneVector<MoveOperands*> pushes(zone());
1094 GetPushCompatibleMoves(instr, flags, &pushes);
1095
1096 if (!pushes.empty() &&
1097 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
1098 first_unused_slot_offset)) {
1099 DCHECK(!instr->HasCallDescriptorFlag(CallDescriptor::kIsTailCallForTierUp));
1100 X64OperandConverter g(this, instr);
1101 for (auto move : pushes) {
1102 LocationOperand destination_location(
1103 LocationOperand::cast(move->destination()));
1104 InstructionOperand source(move->source());
1105 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1106 frame_access_state(),
1107 destination_location.index());
1108 if (source.IsStackSlot()) {
1109 LocationOperand source_location(LocationOperand::cast(source));
1110 __ Push(g.SlotToOperand(source_location.index()));
1111 } else if (source.IsRegister()) {
1112 LocationOperand source_location(LocationOperand::cast(source));
1113 __ Push(source_location.GetRegister());
1114 } else if (source.IsImmediate()) {
1115 __ Push(Immediate(ImmediateOperand::cast(source).inline_int32_value()));
1116 } else {
1117 // Pushes of non-scalar data types is not supported.
1118 UNIMPLEMENTED();
1119 }
1120 frame_access_state()->IncreaseSPDelta(1);
1121 move->Eliminate();
1122 }
1123 }
1124 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1125 frame_access_state(), first_unused_slot_offset,
1126 false);
1127 }
1128
AssembleTailCallAfterGap(Instruction * instr,int first_unused_slot_offset)1129 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
1130 int first_unused_slot_offset) {
1131 AdjustStackPointerForTailCall(instr, tasm(), linkage(), info(),
1132 frame_access_state(), first_unused_slot_offset);
1133 }
1134
1135 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()1136 void CodeGenerator::AssembleCodeStartRegisterCheck() {
1137 __ ComputeCodeStartAddress(rbx);
1138 __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
1139 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
1140 }
1141
1142 // Check if the code object is marked for deoptimization. If it is, then it
1143 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
1144 // to:
1145 // 1. read from memory the word that contains that bit, which can be found in
1146 // the flags in the referenced {CodeDataContainer} object;
1147 // 2. test kMarkedForDeoptimizationBit in those flags; and
1148 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()1149 void CodeGenerator::BailoutIfDeoptimized() {
1150 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
1151 __ LoadTaggedPointerField(rbx,
1152 Operand(kJavaScriptCallCodeStartRegister, offset));
1153 __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
1154 Immediate(1 << Code::kMarkedForDeoptimizationBit));
1155 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
1156 RelocInfo::CODE_TARGET, not_zero);
1157 }
1158
1159 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)1160 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
1161 Instruction* instr) {
1162 X64OperandConverter i(this, instr);
1163 InstructionCode opcode = instr->opcode();
1164 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
1165 switch (arch_opcode) {
1166 case kArchCallCodeObject: {
1167 if (HasImmediateInput(instr, 0)) {
1168 Handle<Code> code = i.InputCode(0);
1169 __ Call(code, RelocInfo::CODE_TARGET);
1170 } else {
1171 Register reg = i.InputRegister(0);
1172 DCHECK_IMPLIES(
1173 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1174 reg == kJavaScriptCallCodeStartRegister);
1175 __ LoadCodeObjectEntry(reg, reg);
1176 __ call(reg);
1177 }
1178 RecordCallPosition(instr);
1179 frame_access_state()->ClearSPDelta();
1180 break;
1181 }
1182 case kArchCallBuiltinPointer: {
1183 DCHECK(!HasImmediateInput(instr, 0));
1184 Register builtin_index = i.InputRegister(0);
1185 __ CallBuiltinByIndex(builtin_index);
1186 RecordCallPosition(instr);
1187 frame_access_state()->ClearSPDelta();
1188 break;
1189 }
1190 #if V8_ENABLE_WEBASSEMBLY
1191 case kArchCallWasmFunction: {
1192 if (HasImmediateInput(instr, 0)) {
1193 Constant constant = i.ToConstant(instr->InputAt(0));
1194 Address wasm_code = static_cast<Address>(constant.ToInt64());
1195 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1196 __ near_call(wasm_code, constant.rmode());
1197 } else {
1198 __ Call(wasm_code, constant.rmode());
1199 }
1200 } else {
1201 __ call(i.InputRegister(0));
1202 }
1203 RecordCallPosition(instr);
1204 frame_access_state()->ClearSPDelta();
1205 break;
1206 }
1207 case kArchTailCallWasm: {
1208 if (HasImmediateInput(instr, 0)) {
1209 Constant constant = i.ToConstant(instr->InputAt(0));
1210 Address wasm_code = static_cast<Address>(constant.ToInt64());
1211 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
1212 __ near_jmp(wasm_code, constant.rmode());
1213 } else {
1214 __ Move(kScratchRegister, wasm_code, constant.rmode());
1215 __ jmp(kScratchRegister);
1216 }
1217 } else {
1218 __ jmp(i.InputRegister(0));
1219 }
1220 unwinding_info_writer_.MarkBlockWillExit();
1221 frame_access_state()->ClearSPDelta();
1222 frame_access_state()->SetFrameAccessToDefault();
1223 break;
1224 }
1225 #endif // V8_ENABLE_WEBASSEMBLY
1226 case kArchTailCallCodeObject: {
1227 if (HasImmediateInput(instr, 0)) {
1228 Handle<Code> code = i.InputCode(0);
1229 __ Jump(code, RelocInfo::CODE_TARGET);
1230 } else {
1231 Register reg = i.InputRegister(0);
1232 DCHECK_IMPLIES(
1233 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1234 reg == kJavaScriptCallCodeStartRegister);
1235 __ LoadCodeObjectEntry(reg, reg);
1236 __ jmp(reg);
1237 }
1238 unwinding_info_writer_.MarkBlockWillExit();
1239 frame_access_state()->ClearSPDelta();
1240 frame_access_state()->SetFrameAccessToDefault();
1241 break;
1242 }
1243 case kArchTailCallAddress: {
1244 CHECK(!HasImmediateInput(instr, 0));
1245 Register reg = i.InputRegister(0);
1246 DCHECK_IMPLIES(
1247 instr->HasCallDescriptorFlag(CallDescriptor::kFixedTargetRegister),
1248 reg == kJavaScriptCallCodeStartRegister);
1249 __ jmp(reg);
1250 unwinding_info_writer_.MarkBlockWillExit();
1251 frame_access_state()->ClearSPDelta();
1252 frame_access_state()->SetFrameAccessToDefault();
1253 break;
1254 }
1255 case kArchCallJSFunction: {
1256 Register func = i.InputRegister(0);
1257 if (FLAG_debug_code) {
1258 // Check the function's context matches the context argument.
1259 __ cmp_tagged(rsi, FieldOperand(func, JSFunction::kContextOffset));
1260 __ Assert(equal, AbortReason::kWrongFunctionContext);
1261 }
1262 static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
1263 __ LoadTaggedPointerField(rcx,
1264 FieldOperand(func, JSFunction::kCodeOffset));
1265 __ CallCodeTObject(rcx);
1266 frame_access_state()->ClearSPDelta();
1267 RecordCallPosition(instr);
1268 break;
1269 }
1270 case kArchPrepareCallCFunction: {
1271 // Frame alignment requires using FP-relative frame addressing.
1272 frame_access_state()->SetFrameAccessToFP();
1273 int const num_parameters = MiscField::decode(instr->opcode());
1274 __ PrepareCallCFunction(num_parameters);
1275 break;
1276 }
1277 case kArchSaveCallerRegisters: {
1278 fp_mode_ =
1279 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
1280 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
1281 fp_mode_ == SaveFPRegsMode::kSave);
1282 // kReturnRegister0 should have been saved before entering the stub.
1283 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
1284 DCHECK(IsAligned(bytes, kSystemPointerSize));
1285 DCHECK_EQ(0, frame_access_state()->sp_delta());
1286 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1287 DCHECK(!caller_registers_saved_);
1288 caller_registers_saved_ = true;
1289 break;
1290 }
1291 case kArchRestoreCallerRegisters: {
1292 DCHECK(fp_mode_ ==
1293 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
1294 DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
1295 fp_mode_ == SaveFPRegsMode::kSave);
1296 // Don't overwrite the returned value.
1297 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
1298 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
1299 DCHECK_EQ(0, frame_access_state()->sp_delta());
1300 DCHECK(caller_registers_saved_);
1301 caller_registers_saved_ = false;
1302 break;
1303 }
1304 case kArchPrepareTailCall:
1305 AssemblePrepareTailCall();
1306 break;
1307 case kArchCallCFunction: {
1308 int const num_gp_parameters = ParamField::decode(instr->opcode());
1309 int const num_fp_parameters = FPParamField::decode(instr->opcode());
1310 Label return_location;
1311 #if V8_ENABLE_WEBASSEMBLY
1312 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1313 // Put the return address in a stack slot.
1314 __ leaq(kScratchRegister, Operand(&return_location, 0));
1315 __ movq(MemOperand(rbp, WasmExitFrameConstants::kCallingPCOffset),
1316 kScratchRegister);
1317 }
1318 #endif // V8_ENABLE_WEBASSEMBLY
1319 if (HasImmediateInput(instr, 0)) {
1320 ExternalReference ref = i.InputExternalReference(0);
1321 __ CallCFunction(ref, num_gp_parameters + num_fp_parameters);
1322 } else {
1323 Register func = i.InputRegister(0);
1324 __ CallCFunction(func, num_gp_parameters + num_fp_parameters);
1325 }
1326 __ bind(&return_location);
1327 #if V8_ENABLE_WEBASSEMBLY
1328 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
1329 RecordSafepoint(instr->reference_map());
1330 }
1331 #endif // V8_ENABLE_WEBASSEMBLY
1332 frame_access_state()->SetFrameAccessToDefault();
1333 // Ideally, we should decrement SP delta to match the change of stack
1334 // pointer in CallCFunction. However, for certain architectures (e.g.
1335 // ARM), there may be more strict alignment requirement, causing old SP
1336 // to be saved on the stack. In those cases, we can not calculate the SP
1337 // delta statically.
1338 frame_access_state()->ClearSPDelta();
1339 if (caller_registers_saved_) {
1340 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
1341 // Here, we assume the sequence to be:
1342 // kArchSaveCallerRegisters;
1343 // kArchCallCFunction;
1344 // kArchRestoreCallerRegisters;
1345 int bytes =
1346 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
1347 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
1348 }
1349 // TODO(turbofan): Do we need an lfence here?
1350 break;
1351 }
1352 case kArchJmp:
1353 AssembleArchJump(i.InputRpo(0));
1354 break;
1355 case kArchBinarySearchSwitch:
1356 AssembleArchBinarySearchSwitch(instr);
1357 break;
1358 case kArchTableSwitch:
1359 AssembleArchTableSwitch(instr);
1360 break;
1361 case kArchComment:
1362 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
1363 break;
1364 case kArchAbortCSADcheck:
1365 DCHECK(i.InputRegister(0) == rdx);
1366 {
1367 // We don't actually want to generate a pile of code for this, so just
1368 // claim there is a stack frame, without generating one.
1369 FrameScope scope(tasm(), StackFrame::NO_FRAME_TYPE);
1370 __ Call(isolate()->builtins()->code_handle(Builtin::kAbortCSADcheck),
1371 RelocInfo::CODE_TARGET);
1372 }
1373 __ int3();
1374 unwinding_info_writer_.MarkBlockWillExit();
1375 break;
1376 case kArchDebugBreak:
1377 __ DebugBreak();
1378 break;
1379 case kArchThrowTerminator:
1380 unwinding_info_writer_.MarkBlockWillExit();
1381 break;
1382 case kArchNop:
1383 // don't emit code for nops.
1384 break;
1385 case kArchDeoptimize: {
1386 DeoptimizationExit* exit =
1387 BuildTranslation(instr, -1, 0, 0, OutputFrameStateCombine::Ignore());
1388 __ jmp(exit->label());
1389 break;
1390 }
1391 case kArchRet:
1392 AssembleReturn(instr->InputAt(0));
1393 break;
1394 case kArchFramePointer:
1395 __ movq(i.OutputRegister(), rbp);
1396 break;
1397 case kArchParentFramePointer:
1398 if (frame_access_state()->has_frame()) {
1399 __ movq(i.OutputRegister(), Operand(rbp, 0));
1400 } else {
1401 __ movq(i.OutputRegister(), rbp);
1402 }
1403 break;
1404 case kArchStackPointerGreaterThan: {
1405 // Potentially apply an offset to the current stack pointer before the
1406 // comparison to consider the size difference of an optimized frame versus
1407 // the contained unoptimized frames.
1408
1409 Register lhs_register = rsp;
1410 uint32_t offset;
1411
1412 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
1413 lhs_register = kScratchRegister;
1414 __ leaq(lhs_register, Operand(rsp, static_cast<int32_t>(offset) * -1));
1415 }
1416
1417 constexpr size_t kValueIndex = 0;
1418 if (HasAddressingMode(instr)) {
1419 __ cmpq(lhs_register, i.MemoryOperand(kValueIndex));
1420 } else {
1421 __ cmpq(lhs_register, i.InputRegister(kValueIndex));
1422 }
1423 break;
1424 }
1425 case kArchStackCheckOffset:
1426 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
1427 break;
1428 case kArchTruncateDoubleToI: {
1429 auto result = i.OutputRegister();
1430 auto input = i.InputDoubleRegister(0);
1431 auto ool = zone()->New<OutOfLineTruncateDoubleToI>(
1432 this, result, input, DetermineStubCallMode(),
1433 &unwinding_info_writer_);
1434 // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
1435 // use of Cvttsd2siq requires the movl below to avoid sign extension.
1436 __ Cvttsd2siq(result, input);
1437 __ cmpq(result, Immediate(1));
1438 __ j(overflow, ool->entry());
1439 __ bind(ool->exit());
1440 __ movl(result, result);
1441 break;
1442 }
1443 case kArchStoreWithWriteBarrier: // Fall through.
1444 case kArchAtomicStoreWithWriteBarrier: {
1445 RecordWriteMode mode =
1446 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
1447 Register object = i.InputRegister(0);
1448 size_t index = 0;
1449 Operand operand = i.MemoryOperand(&index);
1450 Register value = i.InputRegister(index);
1451 Register scratch0 = i.TempRegister(0);
1452 Register scratch1 = i.TempRegister(1);
1453 auto ool = zone()->New<OutOfLineRecordWrite>(this, object, operand, value,
1454 scratch0, scratch1, mode,
1455 DetermineStubCallMode());
1456 if (arch_opcode == kArchStoreWithWriteBarrier) {
1457 EmitTSANAwareStore<std::memory_order_relaxed>(
1458 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
1459 MachineRepresentation::kTagged);
1460 } else {
1461 DCHECK_EQ(arch_opcode, kArchAtomicStoreWithWriteBarrier);
1462 EmitTSANAwareStore<std::memory_order_seq_cst>(
1463 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
1464 MachineRepresentation::kTagged);
1465 }
1466 if (mode > RecordWriteMode::kValueIsPointer) {
1467 __ JumpIfSmi(value, ool->exit());
1468 }
1469 __ CheckPageFlag(object, scratch0,
1470 MemoryChunk::kPointersFromHereAreInterestingMask,
1471 not_zero, ool->entry());
1472 __ bind(ool->exit());
1473 break;
1474 }
1475 case kX64MFence:
1476 __ mfence();
1477 break;
1478 case kX64LFence:
1479 __ lfence();
1480 break;
1481 case kArchStackSlot: {
1482 FrameOffset offset =
1483 frame_access_state()->GetFrameOffset(i.InputInt32(0));
1484 Register base = offset.from_stack_pointer() ? rsp : rbp;
1485 __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
1486 break;
1487 }
1488 case kIeee754Float64Acos:
1489 ASSEMBLE_IEEE754_UNOP(acos);
1490 break;
1491 case kIeee754Float64Acosh:
1492 ASSEMBLE_IEEE754_UNOP(acosh);
1493 break;
1494 case kIeee754Float64Asin:
1495 ASSEMBLE_IEEE754_UNOP(asin);
1496 break;
1497 case kIeee754Float64Asinh:
1498 ASSEMBLE_IEEE754_UNOP(asinh);
1499 break;
1500 case kIeee754Float64Atan:
1501 ASSEMBLE_IEEE754_UNOP(atan);
1502 break;
1503 case kIeee754Float64Atanh:
1504 ASSEMBLE_IEEE754_UNOP(atanh);
1505 break;
1506 case kIeee754Float64Atan2:
1507 ASSEMBLE_IEEE754_BINOP(atan2);
1508 break;
1509 case kIeee754Float64Cbrt:
1510 ASSEMBLE_IEEE754_UNOP(cbrt);
1511 break;
1512 case kIeee754Float64Cos:
1513 ASSEMBLE_IEEE754_UNOP(cos);
1514 break;
1515 case kIeee754Float64Cosh:
1516 ASSEMBLE_IEEE754_UNOP(cosh);
1517 break;
1518 case kIeee754Float64Exp:
1519 ASSEMBLE_IEEE754_UNOP(exp);
1520 break;
1521 case kIeee754Float64Expm1:
1522 ASSEMBLE_IEEE754_UNOP(expm1);
1523 break;
1524 case kIeee754Float64Log:
1525 ASSEMBLE_IEEE754_UNOP(log);
1526 break;
1527 case kIeee754Float64Log1p:
1528 ASSEMBLE_IEEE754_UNOP(log1p);
1529 break;
1530 case kIeee754Float64Log2:
1531 ASSEMBLE_IEEE754_UNOP(log2);
1532 break;
1533 case kIeee754Float64Log10:
1534 ASSEMBLE_IEEE754_UNOP(log10);
1535 break;
1536 case kIeee754Float64Pow:
1537 ASSEMBLE_IEEE754_BINOP(pow);
1538 break;
1539 case kIeee754Float64Sin:
1540 ASSEMBLE_IEEE754_UNOP(sin);
1541 break;
1542 case kIeee754Float64Sinh:
1543 ASSEMBLE_IEEE754_UNOP(sinh);
1544 break;
1545 case kIeee754Float64Tan:
1546 ASSEMBLE_IEEE754_UNOP(tan);
1547 break;
1548 case kIeee754Float64Tanh:
1549 ASSEMBLE_IEEE754_UNOP(tanh);
1550 break;
1551 case kX64Add32:
1552 ASSEMBLE_BINOP(addl);
1553 break;
1554 case kX64Add:
1555 ASSEMBLE_BINOP(addq);
1556 break;
1557 case kX64Sub32:
1558 ASSEMBLE_BINOP(subl);
1559 break;
1560 case kX64Sub:
1561 ASSEMBLE_BINOP(subq);
1562 break;
1563 case kX64And32:
1564 ASSEMBLE_BINOP(andl);
1565 break;
1566 case kX64And:
1567 ASSEMBLE_BINOP(andq);
1568 break;
1569 case kX64Cmp8:
1570 ASSEMBLE_COMPARE(cmpb);
1571 break;
1572 case kX64Cmp16:
1573 ASSEMBLE_COMPARE(cmpw);
1574 break;
1575 case kX64Cmp32:
1576 ASSEMBLE_COMPARE(cmpl);
1577 break;
1578 case kX64Cmp:
1579 ASSEMBLE_COMPARE(cmpq);
1580 break;
1581 case kX64Test8:
1582 ASSEMBLE_COMPARE(testb);
1583 break;
1584 case kX64Test16:
1585 ASSEMBLE_COMPARE(testw);
1586 break;
1587 case kX64Test32:
1588 ASSEMBLE_COMPARE(testl);
1589 break;
1590 case kX64Test:
1591 ASSEMBLE_COMPARE(testq);
1592 break;
1593 case kX64Imul32:
1594 ASSEMBLE_MULT(imull);
1595 break;
1596 case kX64Imul:
1597 ASSEMBLE_MULT(imulq);
1598 break;
1599 case kX64ImulHigh32:
1600 if (HasRegisterInput(instr, 1)) {
1601 __ imull(i.InputRegister(1));
1602 } else {
1603 __ imull(i.InputOperand(1));
1604 }
1605 break;
1606 case kX64UmulHigh32:
1607 if (HasRegisterInput(instr, 1)) {
1608 __ mull(i.InputRegister(1));
1609 } else {
1610 __ mull(i.InputOperand(1));
1611 }
1612 break;
1613 case kX64Idiv32:
1614 __ cdq();
1615 __ idivl(i.InputRegister(1));
1616 break;
1617 case kX64Idiv:
1618 __ cqo();
1619 __ idivq(i.InputRegister(1));
1620 break;
1621 case kX64Udiv32:
1622 __ xorl(rdx, rdx);
1623 __ divl(i.InputRegister(1));
1624 break;
1625 case kX64Udiv:
1626 __ xorq(rdx, rdx);
1627 __ divq(i.InputRegister(1));
1628 break;
1629 case kX64Not:
1630 ASSEMBLE_UNOP(notq);
1631 break;
1632 case kX64Not32:
1633 ASSEMBLE_UNOP(notl);
1634 break;
1635 case kX64Neg:
1636 ASSEMBLE_UNOP(negq);
1637 break;
1638 case kX64Neg32:
1639 ASSEMBLE_UNOP(negl);
1640 break;
1641 case kX64Or32:
1642 ASSEMBLE_BINOP(orl);
1643 break;
1644 case kX64Or:
1645 ASSEMBLE_BINOP(orq);
1646 break;
1647 case kX64Xor32:
1648 ASSEMBLE_BINOP(xorl);
1649 break;
1650 case kX64Xor:
1651 ASSEMBLE_BINOP(xorq);
1652 break;
1653 case kX64Shl32:
1654 ASSEMBLE_SHIFT(shll, 5);
1655 break;
1656 case kX64Shl:
1657 ASSEMBLE_SHIFT(shlq, 6);
1658 break;
1659 case kX64Shr32:
1660 ASSEMBLE_SHIFT(shrl, 5);
1661 break;
1662 case kX64Shr:
1663 ASSEMBLE_SHIFT(shrq, 6);
1664 break;
1665 case kX64Sar32:
1666 ASSEMBLE_SHIFT(sarl, 5);
1667 break;
1668 case kX64Sar:
1669 ASSEMBLE_SHIFT(sarq, 6);
1670 break;
1671 case kX64Rol32:
1672 ASSEMBLE_SHIFT(roll, 5);
1673 break;
1674 case kX64Rol:
1675 ASSEMBLE_SHIFT(rolq, 6);
1676 break;
1677 case kX64Ror32:
1678 ASSEMBLE_SHIFT(rorl, 5);
1679 break;
1680 case kX64Ror:
1681 ASSEMBLE_SHIFT(rorq, 6);
1682 break;
1683 case kX64Lzcnt:
1684 if (HasRegisterInput(instr, 0)) {
1685 __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1686 } else {
1687 __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1688 }
1689 break;
1690 case kX64Lzcnt32:
1691 if (HasRegisterInput(instr, 0)) {
1692 __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1693 } else {
1694 __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1695 }
1696 break;
1697 case kX64Tzcnt:
1698 if (HasRegisterInput(instr, 0)) {
1699 __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1700 } else {
1701 __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1702 }
1703 break;
1704 case kX64Tzcnt32:
1705 if (HasRegisterInput(instr, 0)) {
1706 __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1707 } else {
1708 __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1709 }
1710 break;
1711 case kX64Popcnt:
1712 if (HasRegisterInput(instr, 0)) {
1713 __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1714 } else {
1715 __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1716 }
1717 break;
1718 case kX64Popcnt32:
1719 if (HasRegisterInput(instr, 0)) {
1720 __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1721 } else {
1722 __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1723 }
1724 break;
1725 case kX64Bswap:
1726 __ bswapq(i.OutputRegister());
1727 break;
1728 case kX64Bswap32:
1729 __ bswapl(i.OutputRegister());
1730 break;
1731 case kSSEFloat32Cmp:
1732 ASSEMBLE_SSE_BINOP(Ucomiss);
1733 break;
1734 case kSSEFloat32Add:
1735 ASSEMBLE_SSE_BINOP(addss);
1736 break;
1737 case kSSEFloat32Sub:
1738 ASSEMBLE_SSE_BINOP(subss);
1739 break;
1740 case kSSEFloat32Mul:
1741 ASSEMBLE_SSE_BINOP(mulss);
1742 break;
1743 case kSSEFloat32Div:
1744 ASSEMBLE_SSE_BINOP(divss);
1745 // Don't delete this mov. It may improve performance on some CPUs,
1746 // when there is a (v)mulss depending on the result.
1747 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1748 break;
1749 case kSSEFloat32Sqrt:
1750 ASSEMBLE_SSE_UNOP(sqrtss);
1751 break;
1752 case kSSEFloat32ToFloat64:
1753 ASSEMBLE_SSE_UNOP(Cvtss2sd);
1754 break;
1755 case kSSEFloat32Round: {
1756 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1757 RoundingMode const mode =
1758 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1759 __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1760 break;
1761 }
1762 case kSSEFloat32ToInt32:
1763 if (instr->InputAt(0)->IsFPRegister()) {
1764 __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1765 } else {
1766 __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1767 }
1768 break;
1769 case kSSEFloat32ToUint32: {
1770 if (instr->InputAt(0)->IsFPRegister()) {
1771 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1772 } else {
1773 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1774 }
1775 break;
1776 }
1777 case kSSEFloat64Cmp:
1778 ASSEMBLE_SSE_BINOP(Ucomisd);
1779 break;
1780 case kSSEFloat64Add:
1781 ASSEMBLE_SSE_BINOP(addsd);
1782 break;
1783 case kSSEFloat64Sub:
1784 ASSEMBLE_SSE_BINOP(subsd);
1785 break;
1786 case kSSEFloat64Mul:
1787 ASSEMBLE_SSE_BINOP(mulsd);
1788 break;
1789 case kSSEFloat64Div:
1790 ASSEMBLE_SSE_BINOP(divsd);
1791 // Don't delete this mov. It may improve performance on some CPUs,
1792 // when there is a (v)mulsd depending on the result.
1793 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1794 break;
1795 case kSSEFloat64Mod: {
1796 __ AllocateStackSpace(kDoubleSize);
1797 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1798 kDoubleSize);
1799 // Move values to st(0) and st(1).
1800 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1801 __ fld_d(Operand(rsp, 0));
1802 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1803 __ fld_d(Operand(rsp, 0));
1804 // Loop while fprem isn't done.
1805 Label mod_loop;
1806 __ bind(&mod_loop);
1807 // This instructions traps on all kinds inputs, but we are assuming the
1808 // floating point control word is set to ignore them all.
1809 __ fprem();
1810 // The following 2 instruction implicitly use rax.
1811 __ fnstsw_ax();
1812 if (CpuFeatures::IsSupported(SAHF)) {
1813 CpuFeatureScope sahf_scope(tasm(), SAHF);
1814 __ sahf();
1815 } else {
1816 __ shrl(rax, Immediate(8));
1817 __ andl(rax, Immediate(0xFF));
1818 __ pushq(rax);
1819 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1820 kSystemPointerSize);
1821 __ popfq();
1822 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1823 -kSystemPointerSize);
1824 }
1825 __ j(parity_even, &mod_loop);
1826 // Move output to stack and clean up.
1827 __ fstp(1);
1828 __ fstp_d(Operand(rsp, 0));
1829 __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1830 __ addq(rsp, Immediate(kDoubleSize));
1831 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1832 -kDoubleSize);
1833 break;
1834 }
1835 case kSSEFloat32Max: {
1836 Label compare_swap, done_compare;
1837 if (instr->InputAt(1)->IsFPRegister()) {
1838 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1839 } else {
1840 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1841 }
1842 auto ool =
1843 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1844 __ j(parity_even, ool->entry());
1845 __ j(above, &done_compare, Label::kNear);
1846 __ j(below, &compare_swap, Label::kNear);
1847 __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1848 __ testl(kScratchRegister, Immediate(1));
1849 __ j(zero, &done_compare, Label::kNear);
1850 __ bind(&compare_swap);
1851 if (instr->InputAt(1)->IsFPRegister()) {
1852 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1853 } else {
1854 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1855 }
1856 __ bind(&done_compare);
1857 __ bind(ool->exit());
1858 break;
1859 }
1860 case kSSEFloat32Min: {
1861 Label compare_swap, done_compare;
1862 if (instr->InputAt(1)->IsFPRegister()) {
1863 __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1864 } else {
1865 __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1866 }
1867 auto ool =
1868 zone()->New<OutOfLineLoadFloat32NaN>(this, i.OutputDoubleRegister());
1869 __ j(parity_even, ool->entry());
1870 __ j(below, &done_compare, Label::kNear);
1871 __ j(above, &compare_swap, Label::kNear);
1872 if (instr->InputAt(1)->IsFPRegister()) {
1873 __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1874 } else {
1875 __ Movss(kScratchDoubleReg, i.InputOperand(1));
1876 __ Movmskps(kScratchRegister, kScratchDoubleReg);
1877 }
1878 __ testl(kScratchRegister, Immediate(1));
1879 __ j(zero, &done_compare, Label::kNear);
1880 __ bind(&compare_swap);
1881 if (instr->InputAt(1)->IsFPRegister()) {
1882 __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1883 } else {
1884 __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1885 }
1886 __ bind(&done_compare);
1887 __ bind(ool->exit());
1888 break;
1889 }
1890 case kSSEFloat64Max: {
1891 Label compare_swap, done_compare;
1892 if (instr->InputAt(1)->IsFPRegister()) {
1893 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1894 } else {
1895 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1896 }
1897 auto ool =
1898 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1899 __ j(parity_even, ool->entry());
1900 __ j(above, &done_compare, Label::kNear);
1901 __ j(below, &compare_swap, Label::kNear);
1902 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1903 __ testl(kScratchRegister, Immediate(1));
1904 __ j(zero, &done_compare, Label::kNear);
1905 __ bind(&compare_swap);
1906 if (instr->InputAt(1)->IsFPRegister()) {
1907 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1908 } else {
1909 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1910 }
1911 __ bind(&done_compare);
1912 __ bind(ool->exit());
1913 break;
1914 }
1915 case kSSEFloat64Min: {
1916 Label compare_swap, done_compare;
1917 if (instr->InputAt(1)->IsFPRegister()) {
1918 __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1919 } else {
1920 __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1921 }
1922 auto ool =
1923 zone()->New<OutOfLineLoadFloat64NaN>(this, i.OutputDoubleRegister());
1924 __ j(parity_even, ool->entry());
1925 __ j(below, &done_compare, Label::kNear);
1926 __ j(above, &compare_swap, Label::kNear);
1927 if (instr->InputAt(1)->IsFPRegister()) {
1928 __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1929 } else {
1930 __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1931 __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1932 }
1933 __ testl(kScratchRegister, Immediate(1));
1934 __ j(zero, &done_compare, Label::kNear);
1935 __ bind(&compare_swap);
1936 if (instr->InputAt(1)->IsFPRegister()) {
1937 __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1938 } else {
1939 __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1940 }
1941 __ bind(&done_compare);
1942 __ bind(ool->exit());
1943 break;
1944 }
1945 case kSSEFloat64Sqrt:
1946 ASSEMBLE_SSE_UNOP(Sqrtsd);
1947 break;
1948 case kSSEFloat64Round: {
1949 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1950 RoundingMode const mode =
1951 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1952 __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1953 break;
1954 }
1955 case kSSEFloat64ToFloat32:
1956 ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1957 break;
1958 case kSSEFloat64ToInt32:
1959 if (instr->InputAt(0)->IsFPRegister()) {
1960 __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1961 } else {
1962 __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1963 }
1964 break;
1965 case kSSEFloat64ToUint32: {
1966 if (instr->InputAt(0)->IsFPRegister()) {
1967 __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1968 } else {
1969 __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1970 }
1971 if (MiscField::decode(instr->opcode())) {
1972 __ AssertZeroExtended(i.OutputRegister());
1973 }
1974 break;
1975 }
1976 case kSSEFloat32ToInt64:
1977 if (instr->InputAt(0)->IsFPRegister()) {
1978 __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1979 } else {
1980 __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1981 }
1982 if (instr->OutputCount() > 1) {
1983 __ Move(i.OutputRegister(1), 1);
1984 Label done;
1985 Label fail;
1986 __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1987 if (instr->InputAt(0)->IsFPRegister()) {
1988 __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1989 } else {
1990 __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1991 }
1992 // If the input is NaN, then the conversion fails.
1993 __ j(parity_even, &fail, Label::kNear);
1994 // If the input is INT64_MIN, then the conversion succeeds.
1995 __ j(equal, &done, Label::kNear);
1996 __ cmpq(i.OutputRegister(0), Immediate(1));
1997 // If the conversion results in INT64_MIN, but the input was not
1998 // INT64_MIN, then the conversion fails.
1999 __ j(no_overflow, &done, Label::kNear);
2000 __ bind(&fail);
2001 __ Move(i.OutputRegister(1), 0);
2002 __ bind(&done);
2003 }
2004 break;
2005 case kSSEFloat64ToInt64:
2006 if (instr->InputAt(0)->IsFPRegister()) {
2007 __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
2008 } else {
2009 __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
2010 }
2011 if (instr->OutputCount() > 1) {
2012 __ Move(i.OutputRegister(1), 1);
2013 Label done;
2014 Label fail;
2015 __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
2016 if (instr->InputAt(0)->IsFPRegister()) {
2017 __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
2018 } else {
2019 __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
2020 }
2021 // If the input is NaN, then the conversion fails.
2022 __ j(parity_even, &fail, Label::kNear);
2023 // If the input is INT64_MIN, then the conversion succeeds.
2024 __ j(equal, &done, Label::kNear);
2025 __ cmpq(i.OutputRegister(0), Immediate(1));
2026 // If the conversion results in INT64_MIN, but the input was not
2027 // INT64_MIN, then the conversion fails.
2028 __ j(no_overflow, &done, Label::kNear);
2029 __ bind(&fail);
2030 __ Move(i.OutputRegister(1), 0);
2031 __ bind(&done);
2032 }
2033 break;
2034 case kSSEFloat32ToUint64: {
2035 Label fail;
2036 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2037 if (instr->InputAt(0)->IsFPRegister()) {
2038 __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2039 } else {
2040 __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2041 }
2042 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2043 __ bind(&fail);
2044 break;
2045 }
2046 case kSSEFloat64ToUint64: {
2047 Label fail;
2048 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 0);
2049 if (instr->InputAt(0)->IsFPRegister()) {
2050 __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
2051 } else {
2052 __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
2053 }
2054 if (instr->OutputCount() > 1) __ Move(i.OutputRegister(1), 1);
2055 __ bind(&fail);
2056 break;
2057 }
2058 case kSSEInt32ToFloat64:
2059 if (HasRegisterInput(instr, 0)) {
2060 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2061 } else {
2062 __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2063 }
2064 break;
2065 case kSSEInt32ToFloat32:
2066 if (HasRegisterInput(instr, 0)) {
2067 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2068 } else {
2069 __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2070 }
2071 break;
2072 case kSSEInt64ToFloat32:
2073 if (HasRegisterInput(instr, 0)) {
2074 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2075 } else {
2076 __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2077 }
2078 break;
2079 case kSSEInt64ToFloat64:
2080 if (HasRegisterInput(instr, 0)) {
2081 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2082 } else {
2083 __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2084 }
2085 break;
2086 case kSSEUint64ToFloat32:
2087 if (HasRegisterInput(instr, 0)) {
2088 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2089 } else {
2090 __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2091 }
2092 break;
2093 case kSSEUint64ToFloat64:
2094 if (HasRegisterInput(instr, 0)) {
2095 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2096 } else {
2097 __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2098 }
2099 break;
2100 case kSSEUint32ToFloat64:
2101 if (HasRegisterInput(instr, 0)) {
2102 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
2103 } else {
2104 __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
2105 }
2106 break;
2107 case kSSEUint32ToFloat32:
2108 if (HasRegisterInput(instr, 0)) {
2109 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
2110 } else {
2111 __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
2112 }
2113 break;
2114 case kSSEFloat64ExtractLowWord32:
2115 if (instr->InputAt(0)->IsFPStackSlot()) {
2116 __ movl(i.OutputRegister(), i.InputOperand(0));
2117 } else {
2118 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2119 }
2120 break;
2121 case kSSEFloat64ExtractHighWord32:
2122 if (instr->InputAt(0)->IsFPStackSlot()) {
2123 __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
2124 } else {
2125 __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
2126 }
2127 break;
2128 case kSSEFloat64InsertLowWord32:
2129 if (HasRegisterInput(instr, 1)) {
2130 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
2131 } else {
2132 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
2133 }
2134 break;
2135 case kSSEFloat64InsertHighWord32:
2136 if (HasRegisterInput(instr, 1)) {
2137 __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
2138 } else {
2139 __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
2140 }
2141 break;
2142 case kSSEFloat64LoadLowWord32:
2143 if (HasRegisterInput(instr, 0)) {
2144 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2145 } else {
2146 __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
2147 }
2148 break;
2149 case kAVXFloat32Cmp: {
2150 CpuFeatureScope avx_scope(tasm(), AVX);
2151 if (instr->InputAt(1)->IsFPRegister()) {
2152 __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2153 } else {
2154 __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
2155 }
2156 break;
2157 }
2158 case kAVXFloat32Add:
2159 ASSEMBLE_AVX_BINOP(vaddss);
2160 break;
2161 case kAVXFloat32Sub:
2162 ASSEMBLE_AVX_BINOP(vsubss);
2163 break;
2164 case kAVXFloat32Mul:
2165 ASSEMBLE_AVX_BINOP(vmulss);
2166 break;
2167 case kAVXFloat32Div:
2168 ASSEMBLE_AVX_BINOP(vdivss);
2169 // Don't delete this mov. It may improve performance on some CPUs,
2170 // when there is a (v)mulss depending on the result.
2171 __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2172 break;
2173 case kAVXFloat64Cmp: {
2174 CpuFeatureScope avx_scope(tasm(), AVX);
2175 if (instr->InputAt(1)->IsFPRegister()) {
2176 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
2177 } else {
2178 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
2179 }
2180 break;
2181 }
2182 case kAVXFloat64Add:
2183 ASSEMBLE_AVX_BINOP(vaddsd);
2184 break;
2185 case kAVXFloat64Sub:
2186 ASSEMBLE_AVX_BINOP(vsubsd);
2187 break;
2188 case kAVXFloat64Mul:
2189 ASSEMBLE_AVX_BINOP(vmulsd);
2190 break;
2191 case kAVXFloat64Div:
2192 ASSEMBLE_AVX_BINOP(vdivsd);
2193 // Don't delete this mov. It may improve performance on some CPUs,
2194 // when there is a (v)mulsd depending on the result.
2195 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
2196 break;
2197 case kX64Float32Abs: {
2198 __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2199 kScratchRegister);
2200 break;
2201 }
2202 case kX64Float32Neg: {
2203 __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2204 kScratchRegister);
2205 break;
2206 }
2207 case kX64F64x2Abs:
2208 case kX64Float64Abs: {
2209 __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2210 kScratchRegister);
2211 break;
2212 }
2213 case kX64F64x2Neg:
2214 case kX64Float64Neg: {
2215 __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2216 kScratchRegister);
2217 break;
2218 }
2219 case kSSEFloat64SilenceNaN:
2220 __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
2221 __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
2222 break;
2223 case kX64Movsxbl:
2224 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2225 ASSEMBLE_MOVX(movsxbl);
2226 __ AssertZeroExtended(i.OutputRegister());
2227 break;
2228 case kX64Movzxbl:
2229 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2230 ASSEMBLE_MOVX(movzxbl);
2231 __ AssertZeroExtended(i.OutputRegister());
2232 break;
2233 case kX64Movsxbq:
2234 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2235 ASSEMBLE_MOVX(movsxbq);
2236 break;
2237 case kX64Movzxbq:
2238 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2239 ASSEMBLE_MOVX(movzxbq);
2240 __ AssertZeroExtended(i.OutputRegister());
2241 break;
2242 case kX64Movb: {
2243 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2244 size_t index = 0;
2245 Operand operand = i.MemoryOperand(&index);
2246 if (HasImmediateInput(instr, index)) {
2247 Immediate value(Immediate(i.InputInt8(index)));
2248 EmitTSANAwareStore<std::memory_order_relaxed>(
2249 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2250 MachineRepresentation::kWord8);
2251 } else {
2252 Register value(i.InputRegister(index));
2253 EmitTSANAwareStore<std::memory_order_relaxed>(
2254 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2255 MachineRepresentation::kWord8);
2256 }
2257 break;
2258 }
2259 case kX64Movsxwl:
2260 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2261 ASSEMBLE_MOVX(movsxwl);
2262 __ AssertZeroExtended(i.OutputRegister());
2263 break;
2264 case kX64Movzxwl:
2265 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2266 ASSEMBLE_MOVX(movzxwl);
2267 __ AssertZeroExtended(i.OutputRegister());
2268 break;
2269 case kX64Movsxwq:
2270 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2271 ASSEMBLE_MOVX(movsxwq);
2272 break;
2273 case kX64Movzxwq:
2274 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2275 ASSEMBLE_MOVX(movzxwq);
2276 __ AssertZeroExtended(i.OutputRegister());
2277 break;
2278 case kX64Movw: {
2279 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2280 size_t index = 0;
2281 Operand operand = i.MemoryOperand(&index);
2282 if (HasImmediateInput(instr, index)) {
2283 Immediate value(Immediate(i.InputInt16(index)));
2284 EmitTSANAwareStore<std::memory_order_relaxed>(
2285 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2286 MachineRepresentation::kWord16);
2287 } else {
2288 Register value(i.InputRegister(index));
2289 EmitTSANAwareStore<std::memory_order_relaxed>(
2290 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2291 MachineRepresentation::kWord16);
2292 }
2293 break;
2294 }
2295 case kX64Movl:
2296 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2297 if (instr->HasOutput()) {
2298 if (HasAddressingMode(instr)) {
2299 Operand address(i.MemoryOperand());
2300 __ movl(i.OutputRegister(), address);
2301 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2302 DetermineStubCallMode(), kInt32Size);
2303 } else {
2304 if (HasRegisterInput(instr, 0)) {
2305 __ movl(i.OutputRegister(), i.InputRegister(0));
2306 } else {
2307 __ movl(i.OutputRegister(), i.InputOperand(0));
2308 }
2309 }
2310 __ AssertZeroExtended(i.OutputRegister());
2311 } else {
2312 size_t index = 0;
2313 Operand operand = i.MemoryOperand(&index);
2314 if (HasImmediateInput(instr, index)) {
2315 Immediate value(i.InputImmediate(index));
2316 EmitTSANAwareStore<std::memory_order_relaxed>(
2317 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2318 MachineRepresentation::kWord32);
2319 } else {
2320 Register value(i.InputRegister(index));
2321 EmitTSANAwareStore<std::memory_order_relaxed>(
2322 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2323 MachineRepresentation::kWord32);
2324 }
2325 }
2326 break;
2327 case kX64Movsxlq:
2328 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2329 ASSEMBLE_MOVX(movsxlq);
2330 break;
2331 case kX64MovqDecompressTaggedSigned: {
2332 CHECK(instr->HasOutput());
2333 Operand address(i.MemoryOperand());
2334 __ DecompressTaggedSigned(i.OutputRegister(), address);
2335 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2336 DetermineStubCallMode(), kTaggedSize);
2337 break;
2338 }
2339 case kX64MovqDecompressTaggedPointer: {
2340 CHECK(instr->HasOutput());
2341 Operand address(i.MemoryOperand());
2342 __ DecompressTaggedPointer(i.OutputRegister(), address);
2343 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2344 DetermineStubCallMode(), kTaggedSize);
2345 break;
2346 }
2347 case kX64MovqDecompressAnyTagged: {
2348 CHECK(instr->HasOutput());
2349 Operand address(i.MemoryOperand());
2350 __ DecompressAnyTagged(i.OutputRegister(), address);
2351 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2352 DetermineStubCallMode(), kTaggedSize);
2353 break;
2354 }
2355 case kX64MovqCompressTagged: {
2356 CHECK(!instr->HasOutput());
2357 size_t index = 0;
2358 Operand operand = i.MemoryOperand(&index);
2359 if (HasImmediateInput(instr, index)) {
2360 Immediate value(i.InputImmediate(index));
2361 EmitTSANAwareStore<std::memory_order_relaxed>(
2362 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2363 MachineRepresentation::kTagged);
2364 } else {
2365 Register value(i.InputRegister(index));
2366 EmitTSANAwareStore<std::memory_order_relaxed>(
2367 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2368 MachineRepresentation::kTagged);
2369 }
2370 break;
2371 }
2372 case kX64Movq:
2373 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2374 if (instr->HasOutput()) {
2375 Operand address(i.MemoryOperand());
2376 __ movq(i.OutputRegister(), address);
2377 EmitTSANRelaxedLoadOOLIfNeeded(zone(), this, tasm(), address, i,
2378 DetermineStubCallMode(), kInt64Size);
2379 } else {
2380 size_t index = 0;
2381 Operand operand = i.MemoryOperand(&index);
2382 if (HasImmediateInput(instr, index)) {
2383 Immediate value(i.InputImmediate(index));
2384 EmitTSANAwareStore<std::memory_order_relaxed>(
2385 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2386 MachineRepresentation::kWord64);
2387 } else {
2388 Register value(i.InputRegister(index));
2389 EmitTSANAwareStore<std::memory_order_relaxed>(
2390 zone(), this, tasm(), operand, value, i, DetermineStubCallMode(),
2391 MachineRepresentation::kWord64);
2392 }
2393 }
2394 break;
2395 case kX64Movss:
2396 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2397 if (instr->HasOutput()) {
2398 __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
2399 } else {
2400 size_t index = 0;
2401 Operand operand = i.MemoryOperand(&index);
2402 __ Movss(operand, i.InputDoubleRegister(index));
2403 }
2404 break;
2405 case kX64Movsd: {
2406 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2407 if (instr->HasOutput()) {
2408 __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
2409 } else {
2410 size_t index = 0;
2411 Operand operand = i.MemoryOperand(&index);
2412 __ Movsd(operand, i.InputDoubleRegister(index));
2413 }
2414 break;
2415 }
2416 case kX64Movdqu: {
2417 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2418 if (instr->HasOutput()) {
2419 __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
2420 } else {
2421 size_t index = 0;
2422 Operand operand = i.MemoryOperand(&index);
2423 __ Movdqu(operand, i.InputSimd128Register(index));
2424 }
2425 break;
2426 }
2427 case kX64BitcastFI:
2428 if (instr->InputAt(0)->IsFPStackSlot()) {
2429 __ movl(i.OutputRegister(), i.InputOperand(0));
2430 } else {
2431 __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
2432 }
2433 break;
2434 case kX64BitcastDL:
2435 if (instr->InputAt(0)->IsFPStackSlot()) {
2436 __ movq(i.OutputRegister(), i.InputOperand(0));
2437 } else {
2438 __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
2439 }
2440 break;
2441 case kX64BitcastIF:
2442 if (HasRegisterInput(instr, 0)) {
2443 __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
2444 } else {
2445 __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
2446 }
2447 break;
2448 case kX64BitcastLD:
2449 if (HasRegisterInput(instr, 0)) {
2450 __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
2451 } else {
2452 __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
2453 }
2454 break;
2455 case kX64Lea32: {
2456 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2457 // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
2458 // and addressing mode just happens to work out. The "addl"/"subl" forms
2459 // in these cases are faster based on measurements.
2460 if (i.InputRegister(0) == i.OutputRegister()) {
2461 if (mode == kMode_MRI) {
2462 int32_t constant_summand = i.InputInt32(1);
2463 DCHECK_NE(0, constant_summand);
2464 if (constant_summand > 0) {
2465 __ addl(i.OutputRegister(), Immediate(constant_summand));
2466 } else {
2467 __ subl(i.OutputRegister(),
2468 Immediate(base::NegateWithWraparound(constant_summand)));
2469 }
2470 } else if (mode == kMode_MR1) {
2471 if (i.InputRegister(1) == i.OutputRegister()) {
2472 __ shll(i.OutputRegister(), Immediate(1));
2473 } else {
2474 __ addl(i.OutputRegister(), i.InputRegister(1));
2475 }
2476 } else if (mode == kMode_M2) {
2477 __ shll(i.OutputRegister(), Immediate(1));
2478 } else if (mode == kMode_M4) {
2479 __ shll(i.OutputRegister(), Immediate(2));
2480 } else if (mode == kMode_M8) {
2481 __ shll(i.OutputRegister(), Immediate(3));
2482 } else {
2483 __ leal(i.OutputRegister(), i.MemoryOperand());
2484 }
2485 } else if (mode == kMode_MR1 &&
2486 i.InputRegister(1) == i.OutputRegister()) {
2487 __ addl(i.OutputRegister(), i.InputRegister(0));
2488 } else {
2489 __ leal(i.OutputRegister(), i.MemoryOperand());
2490 }
2491 __ AssertZeroExtended(i.OutputRegister());
2492 break;
2493 }
2494 case kX64Lea: {
2495 AddressingMode mode = AddressingModeField::decode(instr->opcode());
2496 // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2497 // and addressing mode just happens to work out. The "addq"/"subq" forms
2498 // in these cases are faster based on measurements.
2499 if (i.InputRegister(0) == i.OutputRegister()) {
2500 if (mode == kMode_MRI) {
2501 int32_t constant_summand = i.InputInt32(1);
2502 if (constant_summand > 0) {
2503 __ addq(i.OutputRegister(), Immediate(constant_summand));
2504 } else if (constant_summand < 0) {
2505 __ subq(i.OutputRegister(), Immediate(-constant_summand));
2506 }
2507 } else if (mode == kMode_MR1) {
2508 if (i.InputRegister(1) == i.OutputRegister()) {
2509 __ shlq(i.OutputRegister(), Immediate(1));
2510 } else {
2511 __ addq(i.OutputRegister(), i.InputRegister(1));
2512 }
2513 } else if (mode == kMode_M2) {
2514 __ shlq(i.OutputRegister(), Immediate(1));
2515 } else if (mode == kMode_M4) {
2516 __ shlq(i.OutputRegister(), Immediate(2));
2517 } else if (mode == kMode_M8) {
2518 __ shlq(i.OutputRegister(), Immediate(3));
2519 } else {
2520 __ leaq(i.OutputRegister(), i.MemoryOperand());
2521 }
2522 } else if (mode == kMode_MR1 &&
2523 i.InputRegister(1) == i.OutputRegister()) {
2524 __ addq(i.OutputRegister(), i.InputRegister(0));
2525 } else {
2526 __ leaq(i.OutputRegister(), i.MemoryOperand());
2527 }
2528 break;
2529 }
2530 case kX64Dec32:
2531 __ decl(i.OutputRegister());
2532 break;
2533 case kX64Inc32:
2534 __ incl(i.OutputRegister());
2535 break;
2536 case kX64Push: {
2537 int stack_decrement = i.InputInt32(0);
2538 int slots = stack_decrement / kSystemPointerSize;
2539 // Whenever codegen uses pushq, we need to check if stack_decrement
2540 // contains any extra padding and adjust the stack before the pushq.
2541 if (HasImmediateInput(instr, 1)) {
2542 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2543 __ pushq(i.InputImmediate(1));
2544 } else if (HasAddressingMode(instr)) {
2545 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2546 size_t index = 1;
2547 Operand operand = i.MemoryOperand(&index);
2548 __ pushq(operand);
2549 } else {
2550 InstructionOperand* input = instr->InputAt(1);
2551 if (input->IsRegister()) {
2552 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2553 __ pushq(i.InputRegister(1));
2554 } else if (input->IsFloatRegister() || input->IsDoubleRegister()) {
2555 DCHECK_GE(stack_decrement, kSystemPointerSize);
2556 __ AllocateStackSpace(stack_decrement);
2557 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
2558 } else if (input->IsSimd128Register()) {
2559 DCHECK_GE(stack_decrement, kSimd128Size);
2560 __ AllocateStackSpace(stack_decrement);
2561 // TODO(bbudge) Use Movaps when slots are aligned.
2562 __ Movups(Operand(rsp, 0), i.InputSimd128Register(1));
2563 } else if (input->IsStackSlot() || input->IsFloatStackSlot() ||
2564 input->IsDoubleStackSlot()) {
2565 __ AllocateStackSpace(stack_decrement - kSystemPointerSize);
2566 __ pushq(i.InputOperand(1));
2567 } else {
2568 DCHECK(input->IsSimd128StackSlot());
2569 DCHECK_GE(stack_decrement, kSimd128Size);
2570 // TODO(bbudge) Use Movaps when slots are aligned.
2571 __ Movups(kScratchDoubleReg, i.InputOperand(1));
2572 __ AllocateStackSpace(stack_decrement);
2573 __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2574 }
2575 }
2576 frame_access_state()->IncreaseSPDelta(slots);
2577 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2578 stack_decrement);
2579 break;
2580 }
2581 case kX64Poke: {
2582 int slot = MiscField::decode(instr->opcode());
2583 if (HasImmediateInput(instr, 0)) {
2584 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputImmediate(0));
2585 } else if (instr->InputAt(0)->IsFPRegister()) {
2586 LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
2587 if (op->representation() == MachineRepresentation::kFloat64) {
2588 __ Movsd(Operand(rsp, slot * kSystemPointerSize),
2589 i.InputDoubleRegister(0));
2590 } else {
2591 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2592 __ Movss(Operand(rsp, slot * kSystemPointerSize),
2593 i.InputFloatRegister(0));
2594 }
2595 } else {
2596 __ movq(Operand(rsp, slot * kSystemPointerSize), i.InputRegister(0));
2597 }
2598 break;
2599 }
2600 case kX64Peek: {
2601 int reverse_slot = i.InputInt32(0);
2602 int offset =
2603 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2604 if (instr->OutputAt(0)->IsFPRegister()) {
2605 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2606 if (op->representation() == MachineRepresentation::kFloat64) {
2607 __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2608 } else if (op->representation() == MachineRepresentation::kFloat32) {
2609 __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2610 } else {
2611 DCHECK_EQ(MachineRepresentation::kSimd128, op->representation());
2612 __ Movdqu(i.OutputSimd128Register(), Operand(rbp, offset));
2613 }
2614 } else {
2615 __ movq(i.OutputRegister(), Operand(rbp, offset));
2616 }
2617 break;
2618 }
2619 case kX64F64x2Splat: {
2620 XMMRegister dst = i.OutputSimd128Register();
2621 if (instr->InputAt(0)->IsFPRegister()) {
2622 __ Movddup(dst, i.InputDoubleRegister(0));
2623 } else {
2624 __ Movddup(dst, i.InputOperand(0));
2625 }
2626 break;
2627 }
2628 case kX64F64x2ExtractLane: {
2629 __ F64x2ExtractLane(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
2630 i.InputUint8(1));
2631 break;
2632 }
2633 case kX64F64x2ReplaceLane: {
2634 __ F64x2ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2635 i.InputDoubleRegister(2), i.InputInt8(1));
2636 break;
2637 }
2638 case kX64F64x2Sqrt: {
2639 __ Sqrtpd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2640 break;
2641 }
2642 case kX64F64x2Add: {
2643 ASSEMBLE_SIMD_BINOP(addpd);
2644 break;
2645 }
2646 case kX64F64x2Sub: {
2647 ASSEMBLE_SIMD_BINOP(subpd);
2648 break;
2649 }
2650 case kX64F64x2Mul: {
2651 ASSEMBLE_SIMD_BINOP(mulpd);
2652 break;
2653 }
2654 case kX64F64x2Div: {
2655 ASSEMBLE_SIMD_BINOP(divpd);
2656 break;
2657 }
2658 case kX64F64x2Min: {
2659 // Avoids a move in no-AVX case if dst = src0.
2660 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2661 __ F64x2Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2662 i.InputSimd128Register(1), kScratchDoubleReg);
2663 break;
2664 }
2665 case kX64F64x2Max: {
2666 // Avoids a move in no-AVX case if dst = src0.
2667 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2668 __ F64x2Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2669 i.InputSimd128Register(1), kScratchDoubleReg);
2670 break;
2671 }
2672 case kX64F64x2Eq: {
2673 ASSEMBLE_SIMD_BINOP(cmpeqpd);
2674 break;
2675 }
2676 case kX64F64x2Ne: {
2677 ASSEMBLE_SIMD_BINOP(cmpneqpd);
2678 break;
2679 }
2680 case kX64F64x2Lt: {
2681 ASSEMBLE_SIMD_BINOP(cmpltpd);
2682 break;
2683 }
2684 case kX64F64x2Le: {
2685 ASSEMBLE_SIMD_BINOP(cmplepd);
2686 break;
2687 }
2688 case kX64F64x2Qfma: {
2689 __ F64x2Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2690 i.InputSimd128Register(1), i.InputSimd128Register(2),
2691 kScratchDoubleReg);
2692 break;
2693 }
2694 case kX64F64x2Qfms: {
2695 __ F64x2Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2696 i.InputSimd128Register(1), i.InputSimd128Register(2),
2697 kScratchDoubleReg);
2698 break;
2699 }
2700 case kX64F64x2ConvertLowI32x4S: {
2701 __ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2702 break;
2703 }
2704 case kX64F64x2ConvertLowI32x4U: {
2705 __ F64x2ConvertLowI32x4U(i.OutputSimd128Register(),
2706 i.InputSimd128Register(0), kScratchRegister);
2707 break;
2708 }
2709 case kX64F64x2PromoteLowF32x4: {
2710 if (HasAddressingMode(instr)) {
2711 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
2712 __ Cvtps2pd(i.OutputSimd128Register(), i.MemoryOperand());
2713 } else {
2714 __ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2715 }
2716 break;
2717 }
2718 case kX64F32x4DemoteF64x2Zero: {
2719 __ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2720 break;
2721 }
2722 case kX64I32x4TruncSatF64x2SZero: {
2723 __ I32x4TruncSatF64x2SZero(i.OutputSimd128Register(),
2724 i.InputSimd128Register(0), kScratchDoubleReg,
2725 kScratchRegister);
2726 break;
2727 }
2728 case kX64I32x4TruncSatF64x2UZero: {
2729 __ I32x4TruncSatF64x2UZero(i.OutputSimd128Register(),
2730 i.InputSimd128Register(0), kScratchDoubleReg,
2731 kScratchRegister);
2732 break;
2733 }
2734 case kX64F32x4Splat: {
2735 __ F32x4Splat(i.OutputSimd128Register(), i.InputDoubleRegister(0));
2736 break;
2737 }
2738 case kX64F32x4ExtractLane: {
2739 __ F32x4ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
2740 i.InputUint8(1));
2741 break;
2742 }
2743 case kX64F32x4ReplaceLane: {
2744 // The insertps instruction uses imm8[5:4] to indicate the lane
2745 // that needs to be replaced.
2746 byte select = i.InputInt8(1) << 4 & 0x30;
2747 if (instr->InputAt(2)->IsFPRegister()) {
2748 __ Insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2),
2749 select);
2750 } else {
2751 __ Insertps(i.OutputSimd128Register(), i.InputOperand(2), select);
2752 }
2753 break;
2754 }
2755 case kX64F32x4SConvertI32x4: {
2756 __ Cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2757 break;
2758 }
2759 case kX64F32x4UConvertI32x4: {
2760 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2761 DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2762 XMMRegister dst = i.OutputSimd128Register();
2763 __ Pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2764 __ Pblendw(kScratchDoubleReg, dst, uint8_t{0x55}); // get lo 16 bits
2765 __ Psubd(dst, kScratchDoubleReg); // get hi 16 bits
2766 __ Cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2767 __ Psrld(dst, byte{1}); // divide by 2 to get in unsigned range
2768 __ Cvtdq2ps(dst, dst); // convert hi exactly
2769 __ Addps(dst, dst); // double hi, exactly
2770 __ Addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2771 break;
2772 }
2773 case kX64F32x4Abs: {
2774 XMMRegister dst = i.OutputSimd128Register();
2775 XMMRegister src = i.InputSimd128Register(0);
2776 if (dst == src) {
2777 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2778 __ Psrld(kScratchDoubleReg, byte{1});
2779 __ Andps(dst, kScratchDoubleReg);
2780 } else {
2781 __ Pcmpeqd(dst, dst);
2782 __ Psrld(dst, byte{1});
2783 __ Andps(dst, src);
2784 }
2785 break;
2786 }
2787 case kX64F32x4Neg: {
2788 XMMRegister dst = i.OutputSimd128Register();
2789 XMMRegister src = i.InputSimd128Register(0);
2790 if (dst == src) {
2791 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2792 __ Pslld(kScratchDoubleReg, byte{31});
2793 __ Xorps(dst, kScratchDoubleReg);
2794 } else {
2795 __ Pcmpeqd(dst, dst);
2796 __ Pslld(dst, byte{31});
2797 __ Xorps(dst, src);
2798 }
2799 break;
2800 }
2801 case kX64F32x4Sqrt: {
2802 __ Sqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2803 break;
2804 }
2805 case kX64F32x4RecipApprox: {
2806 __ Rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2807 break;
2808 }
2809 case kX64F32x4RecipSqrtApprox: {
2810 __ Rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2811 break;
2812 }
2813 case kX64F32x4Add: {
2814 ASSEMBLE_SIMD_BINOP(addps);
2815 break;
2816 }
2817 case kX64F32x4Sub: {
2818 ASSEMBLE_SIMD_BINOP(subps);
2819 break;
2820 }
2821 case kX64F32x4Mul: {
2822 ASSEMBLE_SIMD_BINOP(mulps);
2823 break;
2824 }
2825 case kX64F32x4Div: {
2826 ASSEMBLE_SIMD_BINOP(divps);
2827 break;
2828 }
2829 case kX64F32x4Min: {
2830 __ F32x4Min(i.OutputSimd128Register(), i.InputSimd128Register(0),
2831 i.InputSimd128Register(1), kScratchDoubleReg);
2832 break;
2833 }
2834 case kX64F32x4Max: {
2835 __ F32x4Max(i.OutputSimd128Register(), i.InputSimd128Register(0),
2836 i.InputSimd128Register(1), kScratchDoubleReg);
2837 break;
2838 }
2839 case kX64F32x4Eq: {
2840 ASSEMBLE_SIMD_BINOP(cmpeqps);
2841 break;
2842 }
2843 case kX64F32x4Ne: {
2844 ASSEMBLE_SIMD_BINOP(cmpneqps);
2845 break;
2846 }
2847 case kX64F32x4Lt: {
2848 ASSEMBLE_SIMD_BINOP(cmpltps);
2849 break;
2850 }
2851 case kX64F32x4Le: {
2852 ASSEMBLE_SIMD_BINOP(cmpleps);
2853 break;
2854 }
2855 case kX64F32x4Qfma: {
2856 __ F32x4Qfma(i.OutputSimd128Register(), i.InputSimd128Register(0),
2857 i.InputSimd128Register(1), i.InputSimd128Register(2),
2858 kScratchDoubleReg);
2859 break;
2860 }
2861 case kX64F32x4Qfms: {
2862 __ F32x4Qfms(i.OutputSimd128Register(), i.InputSimd128Register(0),
2863 i.InputSimd128Register(1), i.InputSimd128Register(2),
2864 kScratchDoubleReg);
2865 break;
2866 }
2867 case kX64F32x4Pmin: {
2868 ASSEMBLE_SIMD_BINOP(minps);
2869 break;
2870 }
2871 case kX64F32x4Pmax: {
2872 ASSEMBLE_SIMD_BINOP(maxps);
2873 break;
2874 }
2875 case kX64F32x4Round: {
2876 RoundingMode const mode =
2877 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2878 __ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
2879 break;
2880 }
2881 case kX64F64x2Round: {
2882 RoundingMode const mode =
2883 static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
2884 __ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
2885 break;
2886 }
2887 case kX64F64x2Pmin: {
2888 ASSEMBLE_SIMD_BINOP(minpd);
2889 break;
2890 }
2891 case kX64F64x2Pmax: {
2892 ASSEMBLE_SIMD_BINOP(maxpd);
2893 break;
2894 }
2895 case kX64I64x2Splat: {
2896 XMMRegister dst = i.OutputSimd128Register();
2897 if (HasRegisterInput(instr, 0)) {
2898 __ Movq(dst, i.InputRegister(0));
2899 __ Movddup(dst, dst);
2900 } else {
2901 __ Movddup(dst, i.InputOperand(0));
2902 }
2903 break;
2904 }
2905 case kX64I64x2ExtractLane: {
2906 __ Pextrq(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2907 break;
2908 }
2909 case kX64I64x2Abs: {
2910 __ I64x2Abs(i.OutputSimd128Register(), i.InputSimd128Register(0),
2911 kScratchDoubleReg);
2912 break;
2913 }
2914 case kX64I64x2Neg: {
2915 __ I64x2Neg(i.OutputSimd128Register(), i.InputSimd128Register(0),
2916 kScratchDoubleReg);
2917 break;
2918 }
2919 case kX64I64x2BitMask: {
2920 __ Movmskpd(i.OutputRegister(), i.InputSimd128Register(0));
2921 break;
2922 }
2923 case kX64I64x2Shl: {
2924 // Take shift value modulo 2^6.
2925 ASSEMBLE_SIMD_SHIFT(psllq, 6);
2926 break;
2927 }
2928 case kX64I64x2ShrS: {
2929 // TODO(zhin): there is vpsraq but requires AVX512
2930 XMMRegister dst = i.OutputSimd128Register();
2931 XMMRegister src = i.InputSimd128Register(0);
2932 if (HasImmediateInput(instr, 1)) {
2933 __ I64x2ShrS(dst, src, i.InputInt6(1), kScratchDoubleReg);
2934 } else {
2935 __ I64x2ShrS(dst, src, i.InputRegister(1), kScratchDoubleReg,
2936 i.TempSimd128Register(0), kScratchRegister);
2937 }
2938 break;
2939 }
2940 case kX64I64x2Add: {
2941 ASSEMBLE_SIMD_BINOP(paddq);
2942 break;
2943 }
2944 case kX64I64x2Sub: {
2945 ASSEMBLE_SIMD_BINOP(psubq);
2946 break;
2947 }
2948 case kX64I64x2Mul: {
2949 __ I64x2Mul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2950 i.InputSimd128Register(1), i.TempSimd128Register(0),
2951 kScratchDoubleReg);
2952 break;
2953 }
2954 case kX64I64x2Eq: {
2955 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2956 ASSEMBLE_SIMD_BINOP(pcmpeqq);
2957 break;
2958 }
2959 case kX64I64x2Ne: {
2960 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2961 __ Pcmpeqq(i.OutputSimd128Register(), i.InputSimd128Register(1));
2962 __ Pcmpeqq(kScratchDoubleReg, kScratchDoubleReg);
2963 __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2964 break;
2965 }
2966 case kX64I64x2GtS: {
2967 __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2968 i.InputSimd128Register(1), kScratchDoubleReg);
2969 break;
2970 }
2971 case kX64I64x2GeS: {
2972 __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
2973 i.InputSimd128Register(1), kScratchDoubleReg);
2974 break;
2975 }
2976 case kX64I64x2ShrU: {
2977 // Take shift value modulo 2^6.
2978 ASSEMBLE_SIMD_SHIFT(psrlq, 6);
2979 break;
2980 }
2981 case kX64I64x2ExtMulLowI32x4S: {
2982 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2983 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
2984 /*is_signed=*/true);
2985 break;
2986 }
2987 case kX64I64x2ExtMulHighI32x4S: {
2988 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2989 i.InputSimd128Register(1), kScratchDoubleReg,
2990 /*low=*/false,
2991 /*is_signed=*/true);
2992 break;
2993 }
2994 case kX64I64x2ExtMulLowI32x4U: {
2995 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
2996 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
2997 /*is_signed=*/false);
2998 break;
2999 }
3000 case kX64I64x2ExtMulHighI32x4U: {
3001 __ I64x2ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3002 i.InputSimd128Register(1), kScratchDoubleReg,
3003 /*low=*/false,
3004 /*is_signed=*/false);
3005 break;
3006 }
3007 case kX64I64x2SConvertI32x4Low: {
3008 __ Pmovsxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3009 break;
3010 }
3011 case kX64I64x2SConvertI32x4High: {
3012 __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
3013 i.InputSimd128Register(0));
3014 break;
3015 }
3016 case kX64I64x2UConvertI32x4Low: {
3017 __ Pmovzxdq(i.OutputSimd128Register(), i.InputSimd128Register(0));
3018 break;
3019 }
3020 case kX64I64x2UConvertI32x4High: {
3021 __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
3022 i.InputSimd128Register(0), kScratchDoubleReg);
3023 break;
3024 }
3025 case kX64I32x4Splat: {
3026 XMMRegister dst = i.OutputSimd128Register();
3027 if (HasRegisterInput(instr, 0)) {
3028 __ Movd(dst, i.InputRegister(0));
3029 } else {
3030 // TODO(v8:9198): Pshufd can load from aligned memory once supported.
3031 __ Movd(dst, i.InputOperand(0));
3032 }
3033 __ Pshufd(dst, dst, uint8_t{0x0});
3034 break;
3035 }
3036 case kX64I32x4ExtractLane: {
3037 __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
3038 break;
3039 }
3040 case kX64I32x4SConvertF32x4: {
3041 __ I32x4SConvertF32x4(i.OutputSimd128Register(),
3042 i.InputSimd128Register(0), kScratchDoubleReg,
3043 kScratchRegister);
3044 break;
3045 }
3046 case kX64I32x4SConvertI16x8Low: {
3047 __ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3048 break;
3049 }
3050 case kX64I32x4SConvertI16x8High: {
3051 __ I32x4SConvertI16x8High(i.OutputSimd128Register(),
3052 i.InputSimd128Register(0));
3053 break;
3054 }
3055 case kX64I32x4Neg: {
3056 XMMRegister dst = i.OutputSimd128Register();
3057 XMMRegister src = i.InputSimd128Register(0);
3058 if (dst == src) {
3059 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3060 __ Psignd(dst, kScratchDoubleReg);
3061 } else {
3062 __ Pxor(dst, dst);
3063 __ Psubd(dst, src);
3064 }
3065 break;
3066 }
3067 case kX64I32x4Shl: {
3068 // Take shift value modulo 2^5.
3069 ASSEMBLE_SIMD_SHIFT(pslld, 5);
3070 break;
3071 }
3072 case kX64I32x4ShrS: {
3073 // Take shift value modulo 2^5.
3074 ASSEMBLE_SIMD_SHIFT(psrad, 5);
3075 break;
3076 }
3077 case kX64I32x4Add: {
3078 ASSEMBLE_SIMD_BINOP(paddd);
3079 break;
3080 }
3081 case kX64I32x4Sub: {
3082 ASSEMBLE_SIMD_BINOP(psubd);
3083 break;
3084 }
3085 case kX64I32x4Mul: {
3086 ASSEMBLE_SIMD_BINOP(pmulld);
3087 break;
3088 }
3089 case kX64I32x4MinS: {
3090 ASSEMBLE_SIMD_BINOP(pminsd);
3091 break;
3092 }
3093 case kX64I32x4MaxS: {
3094 ASSEMBLE_SIMD_BINOP(pmaxsd);
3095 break;
3096 }
3097 case kX64I32x4Eq: {
3098 ASSEMBLE_SIMD_BINOP(pcmpeqd);
3099 break;
3100 }
3101 case kX64I32x4Ne: {
3102 __ Pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
3103 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3104 __ Pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3105 break;
3106 }
3107 case kX64I32x4GtS: {
3108 ASSEMBLE_SIMD_BINOP(pcmpgtd);
3109 break;
3110 }
3111 case kX64I32x4GeS: {
3112 XMMRegister dst = i.OutputSimd128Register();
3113 XMMRegister src = i.InputSimd128Register(1);
3114 __ Pminsd(dst, src);
3115 __ Pcmpeqd(dst, src);
3116 break;
3117 }
3118 case kX64I32x4UConvertF32x4: {
3119 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3120 XMMRegister dst = i.OutputSimd128Register();
3121 XMMRegister tmp = i.TempSimd128Register(0);
3122 XMMRegister tmp2 = i.TempSimd128Register(1);
3123 // NAN->0, negative->0
3124 __ Pxor(tmp2, tmp2);
3125 __ Maxps(dst, tmp2);
3126 // scratch: float representation of max_signed
3127 __ Pcmpeqd(tmp2, tmp2);
3128 __ Psrld(tmp2, uint8_t{1}); // 0x7fffffff
3129 __ Cvtdq2ps(tmp2, tmp2); // 0x4f000000
3130 // tmp: convert (src-max_signed).
3131 // Positive overflow lanes -> 0x7FFFFFFF
3132 // Negative lanes -> 0
3133 __ Movaps(tmp, dst);
3134 __ Subps(tmp, tmp2);
3135 __ Cmpleps(tmp2, tmp);
3136 __ Cvttps2dq(tmp, tmp);
3137 __ Pxor(tmp, tmp2);
3138 __ Pxor(tmp2, tmp2);
3139 __ Pmaxsd(tmp, tmp2);
3140 // convert. Overflow lanes above max_signed will be 0x80000000
3141 __ Cvttps2dq(dst, dst);
3142 // Add (src-max_signed) for overflow lanes.
3143 __ Paddd(dst, tmp);
3144 break;
3145 }
3146 case kX64I32x4UConvertI16x8Low: {
3147 __ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3148 break;
3149 }
3150 case kX64I32x4UConvertI16x8High: {
3151 __ I32x4UConvertI16x8High(i.OutputSimd128Register(),
3152 i.InputSimd128Register(0), kScratchDoubleReg);
3153 break;
3154 }
3155 case kX64I32x4ShrU: {
3156 // Take shift value modulo 2^5.
3157 ASSEMBLE_SIMD_SHIFT(psrld, 5);
3158 break;
3159 }
3160 case kX64I32x4MinU: {
3161 ASSEMBLE_SIMD_BINOP(pminud);
3162 break;
3163 }
3164 case kX64I32x4MaxU: {
3165 ASSEMBLE_SIMD_BINOP(pmaxud);
3166 break;
3167 }
3168 case kX64I32x4GtU: {
3169 XMMRegister dst = i.OutputSimd128Register();
3170 XMMRegister src = i.InputSimd128Register(1);
3171 __ Pmaxud(dst, src);
3172 __ Pcmpeqd(dst, src);
3173 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3174 __ Pxor(dst, kScratchDoubleReg);
3175 break;
3176 }
3177 case kX64I32x4GeU: {
3178 XMMRegister dst = i.OutputSimd128Register();
3179 XMMRegister src = i.InputSimd128Register(1);
3180 __ Pminud(dst, src);
3181 __ Pcmpeqd(dst, src);
3182 break;
3183 }
3184 case kX64I32x4Abs: {
3185 __ Pabsd(i.OutputSimd128Register(), i.InputSimd128Register(0));
3186 break;
3187 }
3188 case kX64I32x4BitMask: {
3189 __ Movmskps(i.OutputRegister(), i.InputSimd128Register(0));
3190 break;
3191 }
3192 case kX64I32x4DotI16x8S: {
3193 ASSEMBLE_SIMD_BINOP(pmaddwd);
3194 break;
3195 }
3196 case kX64I32x4ExtAddPairwiseI16x8S: {
3197 __ I32x4ExtAddPairwiseI16x8S(i.OutputSimd128Register(),
3198 i.InputSimd128Register(0), kScratchRegister);
3199 break;
3200 }
3201 case kX64I32x4ExtAddPairwiseI16x8U: {
3202 __ I32x4ExtAddPairwiseI16x8U(i.OutputSimd128Register(),
3203 i.InputSimd128Register(0),
3204 kScratchDoubleReg);
3205 break;
3206 }
3207 case kX64S128Const: {
3208 // Emit code for generic constants as all zeros, or ones cases will be
3209 // handled separately by the selector.
3210 XMMRegister dst = i.OutputSimd128Register();
3211 uint32_t imm[4] = {};
3212 for (int j = 0; j < 4; j++) {
3213 imm[j] = i.InputUint32(j);
3214 }
3215 SetupSimdImmediateInRegister(tasm(), imm, dst);
3216 break;
3217 }
3218 case kX64S128Zero: {
3219 XMMRegister dst = i.OutputSimd128Register();
3220 __ Pxor(dst, dst);
3221 break;
3222 }
3223 case kX64S128AllOnes: {
3224 XMMRegister dst = i.OutputSimd128Register();
3225 __ Pcmpeqd(dst, dst);
3226 break;
3227 }
3228 case kX64I16x8Splat: {
3229 XMMRegister dst = i.OutputSimd128Register();
3230 if (HasRegisterInput(instr, 0)) {
3231 __ I16x8Splat(dst, i.InputRegister(0));
3232 } else {
3233 __ I16x8Splat(dst, i.InputOperand(0));
3234 }
3235 break;
3236 }
3237 case kX64I16x8ExtractLaneS: {
3238 Register dst = i.OutputRegister();
3239 __ Pextrw(dst, i.InputSimd128Register(0), i.InputUint8(1));
3240 __ movsxwl(dst, dst);
3241 break;
3242 }
3243 case kX64I16x8SConvertI8x16Low: {
3244 __ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3245 break;
3246 }
3247 case kX64I16x8SConvertI8x16High: {
3248 __ I16x8SConvertI8x16High(i.OutputSimd128Register(),
3249 i.InputSimd128Register(0));
3250 break;
3251 }
3252 case kX64I16x8Neg: {
3253 XMMRegister dst = i.OutputSimd128Register();
3254 XMMRegister src = i.InputSimd128Register(0);
3255 if (dst == src) {
3256 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3257 __ Psignw(dst, kScratchDoubleReg);
3258 } else {
3259 __ Pxor(dst, dst);
3260 __ Psubw(dst, src);
3261 }
3262 break;
3263 }
3264 case kX64I16x8Shl: {
3265 // Take shift value modulo 2^4.
3266 ASSEMBLE_SIMD_SHIFT(psllw, 4);
3267 break;
3268 }
3269 case kX64I16x8ShrS: {
3270 // Take shift value modulo 2^4.
3271 ASSEMBLE_SIMD_SHIFT(psraw, 4);
3272 break;
3273 }
3274 case kX64I16x8SConvertI32x4: {
3275 ASSEMBLE_SIMD_BINOP(packssdw);
3276 break;
3277 }
3278 case kX64I16x8Add: {
3279 ASSEMBLE_SIMD_BINOP(paddw);
3280 break;
3281 }
3282 case kX64I16x8AddSatS: {
3283 ASSEMBLE_SIMD_BINOP(paddsw);
3284 break;
3285 }
3286 case kX64I16x8Sub: {
3287 ASSEMBLE_SIMD_BINOP(psubw);
3288 break;
3289 }
3290 case kX64I16x8SubSatS: {
3291 ASSEMBLE_SIMD_BINOP(psubsw);
3292 break;
3293 }
3294 case kX64I16x8Mul: {
3295 ASSEMBLE_SIMD_BINOP(pmullw);
3296 break;
3297 }
3298 case kX64I16x8MinS: {
3299 ASSEMBLE_SIMD_BINOP(pminsw);
3300 break;
3301 }
3302 case kX64I16x8MaxS: {
3303 ASSEMBLE_SIMD_BINOP(pmaxsw);
3304 break;
3305 }
3306 case kX64I16x8Eq: {
3307 ASSEMBLE_SIMD_BINOP(pcmpeqw);
3308 break;
3309 }
3310 case kX64I16x8Ne: {
3311 XMMRegister dst = i.OutputSimd128Register();
3312 __ Pcmpeqw(dst, i.InputSimd128Register(1));
3313 __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3314 __ Pxor(dst, kScratchDoubleReg);
3315 break;
3316 }
3317 case kX64I16x8GtS: {
3318 ASSEMBLE_SIMD_BINOP(pcmpgtw);
3319 break;
3320 }
3321 case kX64I16x8GeS: {
3322 XMMRegister dst = i.OutputSimd128Register();
3323 XMMRegister src = i.InputSimd128Register(1);
3324 __ Pminsw(dst, src);
3325 __ Pcmpeqw(dst, src);
3326 break;
3327 }
3328 case kX64I16x8UConvertI8x16Low: {
3329 __ Pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3330 break;
3331 }
3332 case kX64I16x8UConvertI8x16High: {
3333 __ I16x8UConvertI8x16High(i.OutputSimd128Register(),
3334 i.InputSimd128Register(0), kScratchDoubleReg);
3335 break;
3336 }
3337 case kX64I16x8ShrU: {
3338 // Take shift value modulo 2^4.
3339 ASSEMBLE_SIMD_SHIFT(psrlw, 4);
3340 break;
3341 }
3342 case kX64I16x8UConvertI32x4: {
3343 ASSEMBLE_SIMD_BINOP(packusdw);
3344 break;
3345 }
3346 case kX64I16x8AddSatU: {
3347 ASSEMBLE_SIMD_BINOP(paddusw);
3348 break;
3349 }
3350 case kX64I16x8SubSatU: {
3351 ASSEMBLE_SIMD_BINOP(psubusw);
3352 break;
3353 }
3354 case kX64I16x8MinU: {
3355 ASSEMBLE_SIMD_BINOP(pminuw);
3356 break;
3357 }
3358 case kX64I16x8MaxU: {
3359 ASSEMBLE_SIMD_BINOP(pmaxuw);
3360 break;
3361 }
3362 case kX64I16x8GtU: {
3363 XMMRegister dst = i.OutputSimd128Register();
3364 XMMRegister src = i.InputSimd128Register(1);
3365 __ Pmaxuw(dst, src);
3366 __ Pcmpeqw(dst, src);
3367 __ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3368 __ Pxor(dst, kScratchDoubleReg);
3369 break;
3370 }
3371 case kX64I16x8GeU: {
3372 XMMRegister dst = i.OutputSimd128Register();
3373 XMMRegister src = i.InputSimd128Register(1);
3374 __ Pminuw(dst, src);
3375 __ Pcmpeqw(dst, src);
3376 break;
3377 }
3378 case kX64I16x8RoundingAverageU: {
3379 ASSEMBLE_SIMD_BINOP(pavgw);
3380 break;
3381 }
3382 case kX64I16x8Abs: {
3383 __ Pabsw(i.OutputSimd128Register(), i.InputSimd128Register(0));
3384 break;
3385 }
3386 case kX64I16x8BitMask: {
3387 Register dst = i.OutputRegister();
3388 __ Packsswb(kScratchDoubleReg, i.InputSimd128Register(0));
3389 __ Pmovmskb(dst, kScratchDoubleReg);
3390 __ shrq(dst, Immediate(8));
3391 break;
3392 }
3393 case kX64I16x8ExtMulLowI8x16S: {
3394 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
3395 i.InputSimd128Register(1), kScratchDoubleReg,
3396 /*is_signed=*/true);
3397 break;
3398 }
3399 case kX64I16x8ExtMulHighI8x16S: {
3400 __ I16x8ExtMulHighS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3401 i.InputSimd128Register(1), kScratchDoubleReg);
3402 break;
3403 }
3404 case kX64I16x8ExtMulLowI8x16U: {
3405 __ I16x8ExtMulLow(i.OutputSimd128Register(), i.InputSimd128Register(0),
3406 i.InputSimd128Register(1), kScratchDoubleReg,
3407 /*is_signed=*/false);
3408 break;
3409 }
3410 case kX64I16x8ExtMulHighI8x16U: {
3411 __ I16x8ExtMulHighU(i.OutputSimd128Register(), i.InputSimd128Register(0),
3412 i.InputSimd128Register(1), kScratchDoubleReg);
3413 break;
3414 }
3415 case kX64I16x8ExtAddPairwiseI8x16S: {
3416 __ I16x8ExtAddPairwiseI8x16S(i.OutputSimd128Register(),
3417 i.InputSimd128Register(0), kScratchDoubleReg,
3418 kScratchRegister);
3419 break;
3420 }
3421 case kX64I16x8ExtAddPairwiseI8x16U: {
3422 __ I16x8ExtAddPairwiseI8x16U(i.OutputSimd128Register(),
3423 i.InputSimd128Register(0), kScratchRegister);
3424 break;
3425 }
3426 case kX64I16x8Q15MulRSatS: {
3427 __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
3428 i.InputSimd128Register(1), kScratchDoubleReg);
3429 break;
3430 }
3431 case kX64I8x16Splat: {
3432 XMMRegister dst = i.OutputSimd128Register();
3433 if (HasRegisterInput(instr, 0)) {
3434 __ I8x16Splat(dst, i.InputRegister(0), kScratchDoubleReg);
3435 } else {
3436 __ I8x16Splat(dst, i.InputOperand(0), kScratchDoubleReg);
3437 }
3438 break;
3439 }
3440 case kX64Pextrb: {
3441 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3442 size_t index = 0;
3443 if (HasAddressingMode(instr)) {
3444 Operand operand = i.MemoryOperand(&index);
3445 __ Pextrb(operand, i.InputSimd128Register(index),
3446 i.InputUint8(index + 1));
3447 } else {
3448 __ Pextrb(i.OutputRegister(), i.InputSimd128Register(0),
3449 i.InputUint8(1));
3450 }
3451 break;
3452 }
3453 case kX64Pextrw: {
3454 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3455 size_t index = 0;
3456 if (HasAddressingMode(instr)) {
3457 Operand operand = i.MemoryOperand(&index);
3458 __ Pextrw(operand, i.InputSimd128Register(index),
3459 i.InputUint8(index + 1));
3460 } else {
3461 __ Pextrw(i.OutputRegister(), i.InputSimd128Register(0),
3462 i.InputUint8(1));
3463 }
3464 break;
3465 }
3466 case kX64I8x16ExtractLaneS: {
3467 Register dst = i.OutputRegister();
3468 __ Pextrb(dst, i.InputSimd128Register(0), i.InputUint8(1));
3469 __ movsxbl(dst, dst);
3470 break;
3471 }
3472 case kX64Pinsrb: {
3473 ASSEMBLE_PINSR(Pinsrb);
3474 break;
3475 }
3476 case kX64Pinsrw: {
3477 ASSEMBLE_PINSR(Pinsrw);
3478 break;
3479 }
3480 case kX64Pinsrd: {
3481 ASSEMBLE_PINSR(Pinsrd);
3482 break;
3483 }
3484 case kX64Pinsrq: {
3485 ASSEMBLE_PINSR(Pinsrq);
3486 break;
3487 }
3488 case kX64I8x16SConvertI16x8: {
3489 ASSEMBLE_SIMD_BINOP(packsswb);
3490 break;
3491 }
3492 case kX64I8x16Neg: {
3493 XMMRegister dst = i.OutputSimd128Register();
3494 XMMRegister src = i.InputSimd128Register(0);
3495 if (dst == src) {
3496 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3497 __ Psignb(dst, kScratchDoubleReg);
3498 } else {
3499 __ Pxor(dst, dst);
3500 __ Psubb(dst, src);
3501 }
3502 break;
3503 }
3504 case kX64I8x16Shl: {
3505 XMMRegister dst = i.OutputSimd128Register();
3506 XMMRegister src = i.InputSimd128Register(0);
3507 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3508 if (HasImmediateInput(instr, 1)) {
3509 __ I8x16Shl(dst, src, i.InputInt3(1), kScratchRegister,
3510 kScratchDoubleReg);
3511 } else {
3512 __ I8x16Shl(dst, src, i.InputRegister(1), kScratchRegister,
3513 kScratchDoubleReg, i.TempSimd128Register(0));
3514 }
3515 break;
3516 }
3517 case kX64I8x16ShrS: {
3518 XMMRegister dst = i.OutputSimd128Register();
3519 XMMRegister src = i.InputSimd128Register(0);
3520 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3521 if (HasImmediateInput(instr, 1)) {
3522 __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
3523 } else {
3524 __ I8x16ShrS(dst, src, i.InputRegister(1), kScratchRegister,
3525 kScratchDoubleReg, i.TempSimd128Register(0));
3526 }
3527 break;
3528 }
3529 case kX64I8x16Add: {
3530 ASSEMBLE_SIMD_BINOP(paddb);
3531 break;
3532 }
3533 case kX64I8x16AddSatS: {
3534 ASSEMBLE_SIMD_BINOP(paddsb);
3535 break;
3536 }
3537 case kX64I8x16Sub: {
3538 ASSEMBLE_SIMD_BINOP(psubb);
3539 break;
3540 }
3541 case kX64I8x16SubSatS: {
3542 ASSEMBLE_SIMD_BINOP(psubsb);
3543 break;
3544 }
3545 case kX64I8x16MinS: {
3546 ASSEMBLE_SIMD_BINOP(pminsb);
3547 break;
3548 }
3549 case kX64I8x16MaxS: {
3550 ASSEMBLE_SIMD_BINOP(pmaxsb);
3551 break;
3552 }
3553 case kX64I8x16Eq: {
3554 ASSEMBLE_SIMD_BINOP(pcmpeqb);
3555 break;
3556 }
3557 case kX64I8x16Ne: {
3558 XMMRegister dst = i.OutputSimd128Register();
3559 __ Pcmpeqb(dst, i.InputSimd128Register(1));
3560 __ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3561 __ Pxor(dst, kScratchDoubleReg);
3562 break;
3563 }
3564 case kX64I8x16GtS: {
3565 ASSEMBLE_SIMD_BINOP(pcmpgtb);
3566 break;
3567 }
3568 case kX64I8x16GeS: {
3569 XMMRegister dst = i.OutputSimd128Register();
3570 XMMRegister src = i.InputSimd128Register(1);
3571 __ Pminsb(dst, src);
3572 __ Pcmpeqb(dst, src);
3573 break;
3574 }
3575 case kX64I8x16UConvertI16x8: {
3576 ASSEMBLE_SIMD_BINOP(packuswb);
3577 break;
3578 }
3579 case kX64I8x16ShrU: {
3580 XMMRegister dst = i.OutputSimd128Register();
3581 XMMRegister src = i.InputSimd128Register(0);
3582 DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
3583 if (HasImmediateInput(instr, 1)) {
3584 __ I8x16ShrU(dst, src, i.InputInt3(1), kScratchRegister,
3585 kScratchDoubleReg);
3586 } else {
3587 __ I8x16ShrU(dst, src, i.InputRegister(1), kScratchRegister,
3588 kScratchDoubleReg, i.TempSimd128Register(0));
3589 }
3590 break;
3591 }
3592 case kX64I8x16AddSatU: {
3593 ASSEMBLE_SIMD_BINOP(paddusb);
3594 break;
3595 }
3596 case kX64I8x16SubSatU: {
3597 ASSEMBLE_SIMD_BINOP(psubusb);
3598 break;
3599 }
3600 case kX64I8x16MinU: {
3601 ASSEMBLE_SIMD_BINOP(pminub);
3602 break;
3603 }
3604 case kX64I8x16MaxU: {
3605 ASSEMBLE_SIMD_BINOP(pmaxub);
3606 break;
3607 }
3608 case kX64I8x16GtU: {
3609 XMMRegister dst = i.OutputSimd128Register();
3610 XMMRegister src = i.InputSimd128Register(1);
3611 __ Pmaxub(dst, src);
3612 __ Pcmpeqb(dst, src);
3613 __ Pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3614 __ Pxor(dst, kScratchDoubleReg);
3615 break;
3616 }
3617 case kX64I8x16GeU: {
3618 XMMRegister dst = i.OutputSimd128Register();
3619 XMMRegister src = i.InputSimd128Register(1);
3620 __ Pminub(dst, src);
3621 __ Pcmpeqb(dst, src);
3622 break;
3623 }
3624 case kX64I8x16RoundingAverageU: {
3625 ASSEMBLE_SIMD_BINOP(pavgb);
3626 break;
3627 }
3628 case kX64I8x16Abs: {
3629 __ Pabsb(i.OutputSimd128Register(), i.InputSimd128Register(0));
3630 break;
3631 }
3632 case kX64I8x16BitMask: {
3633 __ Pmovmskb(i.OutputRegister(), i.InputSimd128Register(0));
3634 break;
3635 }
3636 case kX64I32x4ExtMulLowI16x8S: {
3637 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3638 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3639 /*is_signed=*/true);
3640 break;
3641 }
3642 case kX64I32x4ExtMulHighI16x8S: {
3643 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3644 i.InputSimd128Register(1), kScratchDoubleReg,
3645 /*low=*/false,
3646 /*is_signed=*/true);
3647 break;
3648 }
3649 case kX64I32x4ExtMulLowI16x8U: {
3650 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3651 i.InputSimd128Register(1), kScratchDoubleReg, /*low=*/true,
3652 /*is_signed=*/false);
3653 break;
3654 }
3655 case kX64I32x4ExtMulHighI16x8U: {
3656 __ I32x4ExtMul(i.OutputSimd128Register(), i.InputSimd128Register(0),
3657 i.InputSimd128Register(1), kScratchDoubleReg,
3658 /*low=*/false,
3659 /*is_signed=*/false);
3660 break;
3661 }
3662 case kX64S128And: {
3663 ASSEMBLE_SIMD_BINOP(pand);
3664 break;
3665 }
3666 case kX64S128Or: {
3667 ASSEMBLE_SIMD_BINOP(por);
3668 break;
3669 }
3670 case kX64S128Xor: {
3671 ASSEMBLE_SIMD_BINOP(pxor);
3672 break;
3673 }
3674 case kX64S128Not: {
3675 __ S128Not(i.OutputSimd128Register(), i.InputSimd128Register(0),
3676 kScratchDoubleReg);
3677 break;
3678 }
3679 case kX64S128Select: {
3680 __ S128Select(i.OutputSimd128Register(), i.InputSimd128Register(0),
3681 i.InputSimd128Register(1), i.InputSimd128Register(2),
3682 kScratchDoubleReg);
3683 break;
3684 }
3685 case kX64S128AndNot: {
3686 XMMRegister dst = i.OutputSimd128Register();
3687 DCHECK_EQ(dst, i.InputSimd128Register(0));
3688 // The inputs have been inverted by instruction selector, so we can call
3689 // andnps here without any modifications.
3690 __ Andnps(dst, i.InputSimd128Register(1));
3691 break;
3692 }
3693 case kX64I8x16Swizzle: {
3694 __ I8x16Swizzle(i.OutputSimd128Register(), i.InputSimd128Register(0),
3695 i.InputSimd128Register(1), kScratchDoubleReg,
3696 kScratchRegister, MiscField::decode(instr->opcode()));
3697 break;
3698 }
3699 case kX64I8x16Shuffle: {
3700 XMMRegister dst = i.OutputSimd128Register();
3701 XMMRegister tmp_simd = i.TempSimd128Register(0);
3702 DCHECK_NE(tmp_simd, i.InputSimd128Register(0));
3703 if (instr->InputCount() == 5) { // only one input operand
3704 uint32_t mask[4] = {};
3705 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3706 for (int j = 4; j > 0; j--) {
3707 mask[j - 1] = i.InputUint32(j);
3708 }
3709
3710 SetupSimdImmediateInRegister(tasm(), mask, tmp_simd);
3711 __ Pshufb(dst, tmp_simd);
3712 } else { // two input operands
3713 DCHECK_NE(tmp_simd, i.InputSimd128Register(1));
3714 DCHECK_EQ(6, instr->InputCount());
3715 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 0);
3716 uint32_t mask1[4] = {};
3717 for (int j = 5; j > 1; j--) {
3718 uint32_t lanes = i.InputUint32(j);
3719 for (int k = 0; k < 32; k += 8) {
3720 uint8_t lane = lanes >> k;
3721 mask1[j - 2] |= (lane < kSimd128Size ? lane : 0x80) << k;
3722 }
3723 }
3724 SetupSimdImmediateInRegister(tasm(), mask1, tmp_simd);
3725 __ Pshufb(kScratchDoubleReg, tmp_simd);
3726 uint32_t mask2[4] = {};
3727 if (instr->InputAt(1)->IsSimd128Register()) {
3728 XMMRegister src1 = i.InputSimd128Register(1);
3729 if (src1 != dst) __ Movdqa(dst, src1);
3730 } else {
3731 __ Movdqu(dst, i.InputOperand(1));
3732 }
3733 for (int j = 5; j > 1; j--) {
3734 uint32_t lanes = i.InputUint32(j);
3735 for (int k = 0; k < 32; k += 8) {
3736 uint8_t lane = lanes >> k;
3737 mask2[j - 2] |= (lane >= kSimd128Size ? (lane & 0x0F) : 0x80) << k;
3738 }
3739 }
3740 SetupSimdImmediateInRegister(tasm(), mask2, tmp_simd);
3741 __ Pshufb(dst, tmp_simd);
3742 __ Por(dst, kScratchDoubleReg);
3743 }
3744 break;
3745 }
3746 case kX64I8x16Popcnt: {
3747 __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0),
3748 i.TempSimd128Register(0), kScratchDoubleReg,
3749 kScratchRegister);
3750 break;
3751 }
3752 case kX64S128Load8Splat: {
3753 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3754 __ S128Load8Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3755 kScratchDoubleReg);
3756 break;
3757 }
3758 case kX64S128Load16Splat: {
3759 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3760 __ S128Load16Splat(i.OutputSimd128Register(), i.MemoryOperand(),
3761 kScratchDoubleReg);
3762 break;
3763 }
3764 case kX64S128Load32Splat: {
3765 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3766 __ S128Load32Splat(i.OutputSimd128Register(), i.MemoryOperand());
3767 break;
3768 }
3769 case kX64S128Load64Splat: {
3770 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3771 __ Movddup(i.OutputSimd128Register(), i.MemoryOperand());
3772 break;
3773 }
3774 case kX64S128Load8x8S: {
3775 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3776 __ Pmovsxbw(i.OutputSimd128Register(), i.MemoryOperand());
3777 break;
3778 }
3779 case kX64S128Load8x8U: {
3780 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3781 __ Pmovzxbw(i.OutputSimd128Register(), i.MemoryOperand());
3782 break;
3783 }
3784 case kX64S128Load16x4S: {
3785 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3786 __ Pmovsxwd(i.OutputSimd128Register(), i.MemoryOperand());
3787 break;
3788 }
3789 case kX64S128Load16x4U: {
3790 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3791 __ Pmovzxwd(i.OutputSimd128Register(), i.MemoryOperand());
3792 break;
3793 }
3794 case kX64S128Load32x2S: {
3795 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3796 __ Pmovsxdq(i.OutputSimd128Register(), i.MemoryOperand());
3797 break;
3798 }
3799 case kX64S128Load32x2U: {
3800 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3801 __ Pmovzxdq(i.OutputSimd128Register(), i.MemoryOperand());
3802 break;
3803 }
3804 case kX64S128Store32Lane: {
3805 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3806 size_t index = 0;
3807 Operand operand = i.MemoryOperand(&index);
3808 uint8_t lane = i.InputUint8(index + 1);
3809 __ S128Store32Lane(operand, i.InputSimd128Register(index), lane);
3810 break;
3811 }
3812 case kX64S128Store64Lane: {
3813 EmitOOLTrapIfNeeded(zone(), this, opcode, instr, __ pc_offset());
3814 size_t index = 0;
3815 Operand operand = i.MemoryOperand(&index);
3816 uint8_t lane = i.InputUint8(index + 1);
3817 __ S128Store64Lane(operand, i.InputSimd128Register(index), lane);
3818 break;
3819 }
3820 case kX64Shufps: {
3821 __ Shufps(i.OutputSimd128Register(), i.InputSimd128Register(0),
3822 i.InputSimd128Register(1), i.InputUint8(2));
3823 break;
3824 }
3825 case kX64S32x4Rotate: {
3826 XMMRegister dst = i.OutputSimd128Register();
3827 XMMRegister src = i.InputSimd128Register(0);
3828 uint8_t mask = i.InputUint8(1);
3829 if (dst == src) {
3830 // 1-byte shorter encoding than pshufd.
3831 __ Shufps(dst, src, src, mask);
3832 } else {
3833 __ Pshufd(dst, src, mask);
3834 }
3835 break;
3836 }
3837 case kX64S32x4Swizzle: {
3838 DCHECK_EQ(2, instr->InputCount());
3839 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0,
3840 i.InputUint8(1));
3841 break;
3842 }
3843 case kX64S32x4Shuffle: {
3844 DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3845 uint8_t shuffle = i.InputUint8(2);
3846 DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3847 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, kScratchDoubleReg, 1, shuffle);
3848 ASSEMBLE_SIMD_IMM_INSTR(Pshufd, i.OutputSimd128Register(), 0, shuffle);
3849 __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputUint8(3));
3850 break;
3851 }
3852 case kX64S16x8Blend: {
3853 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, i.InputUint8(2));
3854 break;
3855 }
3856 case kX64S16x8HalfShuffle1: {
3857 XMMRegister dst = i.OutputSimd128Register();
3858 uint8_t mask_lo = i.InputUint8(1);
3859 uint8_t mask_hi = i.InputUint8(2);
3860 if (mask_lo != 0xe4) {
3861 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, mask_lo);
3862 if (mask_hi != 0xe4) __ Pshufhw(dst, dst, mask_hi);
3863 } else {
3864 DCHECK_NE(mask_hi, 0xe4);
3865 ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, mask_hi);
3866 }
3867 break;
3868 }
3869 case kX64S16x8HalfShuffle2: {
3870 XMMRegister dst = i.OutputSimd128Register();
3871 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, kScratchDoubleReg, 1, i.InputUint8(2));
3872 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputUint8(3));
3873 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, i.InputUint8(2));
3874 __ Pshufhw(dst, dst, i.InputUint8(3));
3875 __ Pblendw(dst, kScratchDoubleReg, i.InputUint8(4));
3876 break;
3877 }
3878 case kX64S8x16Alignr: {
3879 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, i.InputUint8(2));
3880 break;
3881 }
3882 case kX64S16x8Dup: {
3883 XMMRegister dst = i.OutputSimd128Register();
3884 uint8_t lane = i.InputInt8(1) & 0x7;
3885 uint8_t lane4 = lane & 0x3;
3886 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3887 if (lane < 4) {
3888 ASSEMBLE_SIMD_IMM_INSTR(Pshuflw, dst, 0, half_dup);
3889 __ Punpcklqdq(dst, dst);
3890 } else {
3891 ASSEMBLE_SIMD_IMM_INSTR(Pshufhw, dst, 0, half_dup);
3892 __ Punpckhqdq(dst, dst);
3893 }
3894 break;
3895 }
3896 case kX64S8x16Dup: {
3897 XMMRegister dst = i.OutputSimd128Register();
3898 uint8_t lane = i.InputInt8(1) & 0xf;
3899 DCHECK_EQ(dst, i.InputSimd128Register(0));
3900 if (lane < 8) {
3901 __ Punpcklbw(dst, dst);
3902 } else {
3903 __ Punpckhbw(dst, dst);
3904 }
3905 lane &= 0x7;
3906 uint8_t lane4 = lane & 0x3;
3907 uint8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3908 if (lane < 4) {
3909 __ Pshuflw(dst, dst, half_dup);
3910 __ Punpcklqdq(dst, dst);
3911 } else {
3912 __ Pshufhw(dst, dst, half_dup);
3913 __ Punpckhqdq(dst, dst);
3914 }
3915 break;
3916 }
3917 case kX64S64x2UnpackHigh:
3918 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3919 break;
3920 case kX64S32x4UnpackHigh:
3921 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3922 break;
3923 case kX64S16x8UnpackHigh:
3924 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3925 break;
3926 case kX64S8x16UnpackHigh:
3927 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3928 break;
3929 case kX64S64x2UnpackLow:
3930 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3931 break;
3932 case kX64S32x4UnpackLow:
3933 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3934 break;
3935 case kX64S16x8UnpackLow:
3936 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3937 break;
3938 case kX64S8x16UnpackLow:
3939 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3940 break;
3941 case kX64S16x8UnzipHigh: {
3942 XMMRegister dst = i.OutputSimd128Register();
3943 XMMRegister src2 = dst;
3944 DCHECK_EQ(dst, i.InputSimd128Register(0));
3945 if (instr->InputCount() == 2) {
3946 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
3947 __ Psrld(kScratchDoubleReg, byte{16});
3948 src2 = kScratchDoubleReg;
3949 }
3950 __ Psrld(dst, byte{16});
3951 __ Packusdw(dst, src2);
3952 break;
3953 }
3954 case kX64S16x8UnzipLow: {
3955 XMMRegister dst = i.OutputSimd128Register();
3956 XMMRegister src2 = dst;
3957 DCHECK_EQ(dst, i.InputSimd128Register(0));
3958 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3959 if (instr->InputCount() == 2) {
3960 ASSEMBLE_SIMD_IMM_INSTR(Pblendw, kScratchDoubleReg, 1, uint8_t{0x55});
3961 src2 = kScratchDoubleReg;
3962 }
3963 __ Pblendw(dst, kScratchDoubleReg, uint8_t{0xaa});
3964 __ Packusdw(dst, src2);
3965 break;
3966 }
3967 case kX64S8x16UnzipHigh: {
3968 XMMRegister dst = i.OutputSimd128Register();
3969 XMMRegister src2 = dst;
3970 DCHECK_EQ(dst, i.InputSimd128Register(0));
3971 if (instr->InputCount() == 2) {
3972 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
3973 __ Psrlw(kScratchDoubleReg, byte{8});
3974 src2 = kScratchDoubleReg;
3975 }
3976 __ Psrlw(dst, byte{8});
3977 __ Packuswb(dst, src2);
3978 break;
3979 }
3980 case kX64S8x16UnzipLow: {
3981 XMMRegister dst = i.OutputSimd128Register();
3982 XMMRegister src2 = dst;
3983 DCHECK_EQ(dst, i.InputSimd128Register(0));
3984 if (instr->InputCount() == 2) {
3985 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
3986 __ Psllw(kScratchDoubleReg, byte{8});
3987 __ Psrlw(kScratchDoubleReg, byte{8});
3988 src2 = kScratchDoubleReg;
3989 }
3990 __ Psllw(dst, byte{8});
3991 __ Psrlw(dst, byte{8});
3992 __ Packuswb(dst, src2);
3993 break;
3994 }
3995 case kX64S8x16TransposeLow: {
3996 XMMRegister dst = i.OutputSimd128Register();
3997 DCHECK_EQ(dst, i.InputSimd128Register(0));
3998 __ Psllw(dst, byte{8});
3999 if (instr->InputCount() == 1) {
4000 __ Movdqa(kScratchDoubleReg, dst);
4001 } else {
4002 DCHECK_EQ(2, instr->InputCount());
4003 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4004 __ Psllw(kScratchDoubleReg, byte{8});
4005 }
4006 __ Psrlw(dst, byte{8});
4007 __ Por(dst, kScratchDoubleReg);
4008 break;
4009 }
4010 case kX64S8x16TransposeHigh: {
4011 XMMRegister dst = i.OutputSimd128Register();
4012 DCHECK_EQ(dst, i.InputSimd128Register(0));
4013 __ Psrlw(dst, byte{8});
4014 if (instr->InputCount() == 1) {
4015 __ Movdqa(kScratchDoubleReg, dst);
4016 } else {
4017 DCHECK_EQ(2, instr->InputCount());
4018 ASSEMBLE_SIMD_INSTR(Movdqu, kScratchDoubleReg, 1);
4019 __ Psrlw(kScratchDoubleReg, byte{8});
4020 }
4021 __ Psllw(kScratchDoubleReg, byte{8});
4022 __ Por(dst, kScratchDoubleReg);
4023 break;
4024 }
4025 case kX64S8x8Reverse:
4026 case kX64S8x4Reverse:
4027 case kX64S8x2Reverse: {
4028 DCHECK_EQ(1, instr->InputCount());
4029 XMMRegister dst = i.OutputSimd128Register();
4030 DCHECK_EQ(dst, i.InputSimd128Register(0));
4031 if (arch_opcode != kX64S8x2Reverse) {
4032 // First shuffle words into position.
4033 uint8_t shuffle_mask = arch_opcode == kX64S8x4Reverse ? 0xB1 : 0x1B;
4034 __ Pshuflw(dst, dst, shuffle_mask);
4035 __ Pshufhw(dst, dst, shuffle_mask);
4036 }
4037 __ Movdqa(kScratchDoubleReg, dst);
4038 __ Psrlw(kScratchDoubleReg, byte{8});
4039 __ Psllw(dst, byte{8});
4040 __ Por(dst, kScratchDoubleReg);
4041 break;
4042 }
4043 case kX64V128AnyTrue: {
4044 Register dst = i.OutputRegister();
4045 XMMRegister src = i.InputSimd128Register(0);
4046
4047 __ xorq(dst, dst);
4048 __ Ptest(src, src);
4049 __ setcc(not_equal, dst);
4050 break;
4051 }
4052 // Need to split up all the different lane structures because the
4053 // comparison instruction used matters, e.g. given 0xff00, pcmpeqb returns
4054 // 0x0011, pcmpeqw returns 0x0000, ptest will set ZF to 0 and 1
4055 // respectively.
4056 case kX64I64x2AllTrue: {
4057 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqq);
4058 break;
4059 }
4060 case kX64I32x4AllTrue: {
4061 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqd);
4062 break;
4063 }
4064 case kX64I16x8AllTrue: {
4065 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqw);
4066 break;
4067 }
4068 case kX64I8x16AllTrue: {
4069 ASSEMBLE_SIMD_ALL_TRUE(Pcmpeqb);
4070 break;
4071 }
4072 case kAtomicStoreWord8: {
4073 ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord8);
4074 break;
4075 }
4076 case kAtomicStoreWord16: {
4077 ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord16);
4078 break;
4079 }
4080 case kAtomicStoreWord32: {
4081 ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord32);
4082 break;
4083 }
4084 case kX64Word64AtomicStoreWord64: {
4085 ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord64);
4086 break;
4087 }
4088 case kAtomicExchangeInt8: {
4089 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4090 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4091 __ movsxbl(i.InputRegister(0), i.InputRegister(0));
4092 break;
4093 }
4094 case kAtomicExchangeUint8: {
4095 __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
4096 switch (AtomicWidthField::decode(opcode)) {
4097 case AtomicWidth::kWord32:
4098 __ movzxbl(i.InputRegister(0), i.InputRegister(0));
4099 break;
4100 case AtomicWidth::kWord64:
4101 __ movzxbq(i.InputRegister(0), i.InputRegister(0));
4102 break;
4103 }
4104 break;
4105 }
4106 case kAtomicExchangeInt16: {
4107 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4108 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4109 __ movsxwl(i.InputRegister(0), i.InputRegister(0));
4110 break;
4111 }
4112 case kAtomicExchangeUint16: {
4113 __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
4114 switch (AtomicWidthField::decode(opcode)) {
4115 case AtomicWidth::kWord32:
4116 __ movzxwl(i.InputRegister(0), i.InputRegister(0));
4117 break;
4118 case AtomicWidth::kWord64:
4119 __ movzxwq(i.InputRegister(0), i.InputRegister(0));
4120 break;
4121 }
4122 break;
4123 }
4124 case kAtomicExchangeWord32: {
4125 __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
4126 break;
4127 }
4128 case kAtomicCompareExchangeInt8: {
4129 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4130 __ lock();
4131 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4132 __ movsxbl(rax, rax);
4133 break;
4134 }
4135 case kAtomicCompareExchangeUint8: {
4136 __ lock();
4137 __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
4138 switch (AtomicWidthField::decode(opcode)) {
4139 case AtomicWidth::kWord32:
4140 __ movzxbl(rax, rax);
4141 break;
4142 case AtomicWidth::kWord64:
4143 __ movzxbq(rax, rax);
4144 break;
4145 }
4146 break;
4147 }
4148 case kAtomicCompareExchangeInt16: {
4149 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32);
4150 __ lock();
4151 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4152 __ movsxwl(rax, rax);
4153 break;
4154 }
4155 case kAtomicCompareExchangeUint16: {
4156 __ lock();
4157 __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
4158 switch (AtomicWidthField::decode(opcode)) {
4159 case AtomicWidth::kWord32:
4160 __ movzxwl(rax, rax);
4161 break;
4162 case AtomicWidth::kWord64:
4163 __ movzxwq(rax, rax);
4164 break;
4165 }
4166 break;
4167 }
4168 case kAtomicCompareExchangeWord32: {
4169 __ lock();
4170 __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
4171 if (AtomicWidthField::decode(opcode) == AtomicWidth::kWord64) {
4172 // Zero-extend the 32 bit value to 64 bit.
4173 __ movl(rax, rax);
4174 }
4175 break;
4176 }
4177 case kX64Word64AtomicExchangeUint64: {
4178 __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
4179 break;
4180 }
4181 case kX64Word64AtomicCompareExchangeUint64: {
4182 __ lock();
4183 __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
4184 break;
4185 }
4186 #define ATOMIC_BINOP_CASE(op, inst32, inst64) \
4187 case kAtomic##op##Int8: \
4188 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
4189 ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb); \
4190 __ movsxbl(rax, rax); \
4191 break; \
4192 case kAtomic##op##Uint8: \
4193 switch (AtomicWidthField::decode(opcode)) { \
4194 case AtomicWidth::kWord32: \
4195 ASSEMBLE_ATOMIC_BINOP(inst32, movb, cmpxchgb); \
4196 __ movzxbl(rax, rax); \
4197 break; \
4198 case AtomicWidth::kWord64: \
4199 ASSEMBLE_ATOMIC64_BINOP(inst64, movb, cmpxchgb); \
4200 __ movzxbq(rax, rax); \
4201 break; \
4202 } \
4203 break; \
4204 case kAtomic##op##Int16: \
4205 DCHECK_EQ(AtomicWidthField::decode(opcode), AtomicWidth::kWord32); \
4206 ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw); \
4207 __ movsxwl(rax, rax); \
4208 break; \
4209 case kAtomic##op##Uint16: \
4210 switch (AtomicWidthField::decode(opcode)) { \
4211 case AtomicWidth::kWord32: \
4212 ASSEMBLE_ATOMIC_BINOP(inst32, movw, cmpxchgw); \
4213 __ movzxwl(rax, rax); \
4214 break; \
4215 case AtomicWidth::kWord64: \
4216 ASSEMBLE_ATOMIC64_BINOP(inst64, movw, cmpxchgw); \
4217 __ movzxwq(rax, rax); \
4218 break; \
4219 } \
4220 break; \
4221 case kAtomic##op##Word32: \
4222 switch (AtomicWidthField::decode(opcode)) { \
4223 case AtomicWidth::kWord32: \
4224 ASSEMBLE_ATOMIC_BINOP(inst32, movl, cmpxchgl); \
4225 break; \
4226 case AtomicWidth::kWord64: \
4227 ASSEMBLE_ATOMIC64_BINOP(inst64, movl, cmpxchgl); \
4228 break; \
4229 } \
4230 break; \
4231 case kX64Word64Atomic##op##Uint64: \
4232 ASSEMBLE_ATOMIC64_BINOP(inst64, movq, cmpxchgq); \
4233 break;
4234 ATOMIC_BINOP_CASE(Add, addl, addq)
4235 ATOMIC_BINOP_CASE(Sub, subl, subq)
4236 ATOMIC_BINOP_CASE(And, andl, andq)
4237 ATOMIC_BINOP_CASE(Or, orl, orq)
4238 ATOMIC_BINOP_CASE(Xor, xorl, xorq)
4239 #undef ATOMIC_BINOP_CASE
4240
4241 case kAtomicLoadInt8:
4242 case kAtomicLoadUint8:
4243 case kAtomicLoadInt16:
4244 case kAtomicLoadUint16:
4245 case kAtomicLoadWord32:
4246 UNREACHABLE(); // Won't be generated by instruction selector.
4247 }
4248 return kSuccess;
4249 } // NOLadability/fn_size)
4250
4251 #undef ASSEMBLE_PINSR
4252 #undef ASSEMBLE_UNOP
4253 #undef ASSEMBLE_BINOP
4254 #undef ASSEMBLE_COMPARE
4255 #undef ASSEMBLE_MULT
4256 #undef ASSEMBLE_SHIFT
4257 #undef ASSEMBLE_MOVX
4258 #undef ASSEMBLE_SSE_BINOP
4259 #undef ASSEMBLE_SSE_UNOP
4260 #undef ASSEMBLE_AVX_BINOP
4261 #undef ASSEMBLE_IEEE754_BINOP
4262 #undef ASSEMBLE_IEEE754_UNOP
4263 #undef ASSEMBLE_ATOMIC_BINOP
4264 #undef ASSEMBLE_ATOMIC64_BINOP
4265 #undef ASSEMBLE_SIMD_INSTR
4266 #undef ASSEMBLE_SIMD_IMM_INSTR
4267 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4268 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4269 #undef ASSEMBLE_SIMD_ALL_TRUE
4270 #undef ASSEMBLE_SIMD_SHIFT
4271 #undef ASSEMBLE_SEQ_CST_STORE
4272
4273 namespace {
4274
FlagsConditionToCondition(FlagsCondition condition)4275 Condition FlagsConditionToCondition(FlagsCondition condition) {
4276 switch (condition) {
4277 case kUnorderedEqual:
4278 case kEqual:
4279 return equal;
4280 case kUnorderedNotEqual:
4281 case kNotEqual:
4282 return not_equal;
4283 case kSignedLessThan:
4284 return less;
4285 case kSignedGreaterThanOrEqual:
4286 return greater_equal;
4287 case kSignedLessThanOrEqual:
4288 return less_equal;
4289 case kSignedGreaterThan:
4290 return greater;
4291 case kUnsignedLessThan:
4292 return below;
4293 case kUnsignedGreaterThanOrEqual:
4294 return above_equal;
4295 case kUnsignedLessThanOrEqual:
4296 return below_equal;
4297 case kUnsignedGreaterThan:
4298 return above;
4299 case kOverflow:
4300 return overflow;
4301 case kNotOverflow:
4302 return no_overflow;
4303 default:
4304 break;
4305 }
4306 UNREACHABLE();
4307 }
4308
4309 } // namespace
4310
4311 // Assembles branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)4312 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
4313 Label::Distance flabel_distance =
4314 branch->fallthru ? Label::kNear : Label::kFar;
4315 Label* tlabel = branch->true_label;
4316 Label* flabel = branch->false_label;
4317 if (branch->condition == kUnorderedEqual) {
4318 __ j(parity_even, flabel, flabel_distance);
4319 } else if (branch->condition == kUnorderedNotEqual) {
4320 __ j(parity_even, tlabel);
4321 }
4322 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4323
4324 if (!branch->fallthru) __ jmp(flabel, flabel_distance);
4325 }
4326
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)4327 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
4328 BranchInfo* branch) {
4329 Label::Distance flabel_distance =
4330 branch->fallthru ? Label::kNear : Label::kFar;
4331 Label* tlabel = branch->true_label;
4332 Label* flabel = branch->false_label;
4333 Label nodeopt;
4334 if (branch->condition == kUnorderedEqual) {
4335 __ j(parity_even, flabel, flabel_distance);
4336 } else if (branch->condition == kUnorderedNotEqual) {
4337 __ j(parity_even, tlabel);
4338 }
4339 __ j(FlagsConditionToCondition(branch->condition), tlabel);
4340
4341 if (FLAG_deopt_every_n_times > 0) {
4342 ExternalReference counter =
4343 ExternalReference::stress_deopt_count(isolate());
4344
4345 __ pushfq();
4346 __ pushq(rax);
4347 __ load_rax(counter);
4348 __ decl(rax);
4349 __ j(not_zero, &nodeopt, Label::kNear);
4350
4351 __ Move(rax, FLAG_deopt_every_n_times);
4352 __ store_rax(counter);
4353 __ popq(rax);
4354 __ popfq();
4355 __ jmp(tlabel);
4356
4357 __ bind(&nodeopt);
4358 __ store_rax(counter);
4359 __ popq(rax);
4360 __ popfq();
4361 }
4362
4363 if (!branch->fallthru) {
4364 __ jmp(flabel, flabel_distance);
4365 }
4366 }
4367
AssembleArchJump(RpoNumber target)4368 void CodeGenerator::AssembleArchJump(RpoNumber target) {
4369 if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
4370 }
4371
4372 #if V8_ENABLE_WEBASSEMBLY
AssembleArchTrap(Instruction * instr,FlagsCondition condition)4373 void CodeGenerator::AssembleArchTrap(Instruction* instr,
4374 FlagsCondition condition) {
4375 auto ool = zone()->New<WasmOutOfLineTrap>(this, instr);
4376 Label* tlabel = ool->entry();
4377 Label end;
4378 if (condition == kUnorderedEqual) {
4379 __ j(parity_even, &end, Label::kNear);
4380 } else if (condition == kUnorderedNotEqual) {
4381 __ j(parity_even, tlabel);
4382 }
4383 __ j(FlagsConditionToCondition(condition), tlabel);
4384 __ bind(&end);
4385 }
4386 #endif // V8_ENABLE_WEBASSEMBLY
4387
4388 // Assembles boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)4389 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
4390 FlagsCondition condition) {
4391 X64OperandConverter i(this, instr);
4392 Label done;
4393
4394 // Materialize a full 64-bit 1 or 0 value. The result register is always the
4395 // last output of the instruction.
4396 Label check;
4397 DCHECK_NE(0u, instr->OutputCount());
4398 Register reg = i.OutputRegister(instr->OutputCount() - 1);
4399 if (condition == kUnorderedEqual) {
4400 __ j(parity_odd, &check, Label::kNear);
4401 __ Move(reg, 0);
4402 __ jmp(&done, Label::kNear);
4403 } else if (condition == kUnorderedNotEqual) {
4404 __ j(parity_odd, &check, Label::kNear);
4405 __ Move(reg, 1);
4406 __ jmp(&done, Label::kNear);
4407 }
4408 __ bind(&check);
4409 __ setcc(FlagsConditionToCondition(condition), reg);
4410 __ movzxbl(reg, reg);
4411 __ bind(&done);
4412 }
4413
AssembleArchBinarySearchSwitch(Instruction * instr)4414 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
4415 X64OperandConverter i(this, instr);
4416 Register input = i.InputRegister(0);
4417 std::vector<std::pair<int32_t, Label*>> cases;
4418 for (size_t index = 2; index < instr->InputCount(); index += 2) {
4419 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
4420 }
4421 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4422 cases.data() + cases.size());
4423 }
4424
AssembleArchTableSwitch(Instruction * instr)4425 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4426 X64OperandConverter i(this, instr);
4427 Register input = i.InputRegister(0);
4428 int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
4429 Label** cases = zone()->NewArray<Label*>(case_count);
4430 for (int32_t index = 0; index < case_count; ++index) {
4431 cases[index] = GetLabel(i.InputRpo(index + 2));
4432 }
4433 Label* const table = AddJumpTable(cases, case_count);
4434 __ cmpl(input, Immediate(case_count));
4435 __ j(above_equal, GetLabel(i.InputRpo(1)));
4436 __ leaq(kScratchRegister, Operand(table));
4437 __ jmp(Operand(kScratchRegister, input, times_8, 0));
4438 }
4439
AssembleArchSelect(Instruction * instr,FlagsCondition condition)4440 void CodeGenerator::AssembleArchSelect(Instruction* instr,
4441 FlagsCondition condition) {
4442 X64OperandConverter i(this, instr);
4443 MachineRepresentation rep =
4444 LocationOperand::cast(instr->OutputAt(0))->representation();
4445 Condition cc = FlagsConditionToCondition(condition);
4446 DCHECK_EQ(i.OutputRegister(), i.InputRegister(instr->InputCount() - 2));
4447 size_t last_input = instr->InputCount() - 1;
4448 // kUnorderedNotEqual can be implemented more efficiently than
4449 // kUnorderedEqual. As the OR of two flags, it can be done with just two
4450 // cmovs. If the condition was originally a kUnorderedEqual, expect the
4451 // instruction selector to have inverted it and swapped the input.
4452 DCHECK_NE(condition, kUnorderedEqual);
4453 if (rep == MachineRepresentation::kWord32) {
4454 if (HasRegisterInput(instr, last_input)) {
4455 __ cmovl(cc, i.OutputRegister(), i.InputRegister(last_input));
4456 if (condition == kUnorderedNotEqual) {
4457 __ cmovl(parity_even, i.OutputRegister(), i.InputRegister(last_input));
4458 }
4459 } else {
4460 __ cmovl(cc, i.OutputRegister(), i.InputOperand(last_input));
4461 if (condition == kUnorderedNotEqual) {
4462 __ cmovl(parity_even, i.OutputRegister(), i.InputOperand(last_input));
4463 }
4464 }
4465 } else {
4466 DCHECK_EQ(rep, MachineRepresentation::kWord64);
4467 if (HasRegisterInput(instr, last_input)) {
4468 __ cmovq(cc, i.OutputRegister(), i.InputRegister(last_input));
4469 if (condition == kUnorderedNotEqual) {
4470 __ cmovq(parity_even, i.OutputRegister(), i.InputRegister(last_input));
4471 }
4472 } else {
4473 __ cmovq(cc, i.OutputRegister(), i.InputOperand(last_input));
4474 if (condition == kUnorderedNotEqual) {
4475 __ cmovq(parity_even, i.OutputRegister(), i.InputOperand(last_input));
4476 }
4477 }
4478 }
4479 }
4480
4481 namespace {
4482
4483 static const int kQuadWordSize = 16;
4484
4485 } // namespace
4486
FinishFrame(Frame * frame)4487 void CodeGenerator::FinishFrame(Frame* frame) {
4488 CallDescriptor* call_descriptor = linkage()->GetIncomingDescriptor();
4489
4490 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4491 if (saves_fp != 0) { // Save callee-saved XMM registers.
4492 frame->AlignSavedCalleeRegisterSlots();
4493 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4494 frame->AllocateSavedCalleeRegisterSlots(
4495 saves_fp_count * (kQuadWordSize / kSystemPointerSize));
4496 }
4497 const RegList saves = call_descriptor->CalleeSavedRegisters();
4498 if (saves != 0) { // Save callee-saved registers.
4499 int count = 0;
4500 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4501 if (((1 << i) & saves)) {
4502 ++count;
4503 }
4504 }
4505 frame->AllocateSavedCalleeRegisterSlots(count);
4506 }
4507 }
4508
AssembleConstructFrame()4509 void CodeGenerator::AssembleConstructFrame() {
4510 auto call_descriptor = linkage()->GetIncomingDescriptor();
4511 if (frame_access_state()->has_frame()) {
4512 int pc_base = __ pc_offset();
4513
4514 if (call_descriptor->IsCFunctionCall()) {
4515 __ pushq(rbp);
4516 __ movq(rbp, rsp);
4517 #if V8_ENABLE_WEBASSEMBLY
4518 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
4519 __ Push(Immediate(StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY)));
4520 // Reserve stack space for saving the c_entry_fp later.
4521 __ AllocateStackSpace(kSystemPointerSize);
4522 }
4523 #endif // V8_ENABLE_WEBASSEMBLY
4524 } else if (call_descriptor->IsJSFunctionCall()) {
4525 __ Prologue();
4526 } else {
4527 __ StubPrologue(info()->GetOutputStackFrameType());
4528 #if V8_ENABLE_WEBASSEMBLY
4529 if (call_descriptor->IsWasmFunctionCall()) {
4530 __ pushq(kWasmInstanceRegister);
4531 } else if (call_descriptor->IsWasmImportWrapper() ||
4532 call_descriptor->IsWasmCapiFunction()) {
4533 // Wasm import wrappers are passed a tuple in the place of the instance.
4534 // Unpack the tuple into the instance and the target callable.
4535 // This must be done here in the codegen because it cannot be expressed
4536 // properly in the graph.
4537 __ LoadTaggedPointerField(
4538 kJSFunctionRegister,
4539 FieldOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
4540 __ LoadTaggedPointerField(
4541 kWasmInstanceRegister,
4542 FieldOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
4543 __ pushq(kWasmInstanceRegister);
4544 if (call_descriptor->IsWasmCapiFunction()) {
4545 // Reserve space for saving the PC later.
4546 __ AllocateStackSpace(kSystemPointerSize);
4547 }
4548 }
4549 #endif // V8_ENABLE_WEBASSEMBLY
4550 }
4551
4552 unwinding_info_writer_.MarkFrameConstructed(pc_base);
4553 }
4554 int required_slots =
4555 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
4556
4557 if (info()->is_osr()) {
4558 // TurboFan OSR-compiled functions cannot be entered directly.
4559 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4560
4561 // Unoptimized code jumps directly to this entrypoint while the unoptimized
4562 // frame is still on the stack. Optimized code uses OSR values directly from
4563 // the unoptimized frame. Thus, all that needs to be done is to allocate the
4564 // remaining stack slots.
4565 __ RecordComment("-- OSR entrypoint --");
4566 osr_pc_offset_ = __ pc_offset();
4567 required_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
4568 }
4569
4570 const RegList saves = call_descriptor->CalleeSavedRegisters();
4571 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4572
4573 if (required_slots > 0) {
4574 DCHECK(frame_access_state()->has_frame());
4575 #if V8_ENABLE_WEBASSEMBLY
4576 if (info()->IsWasm() && required_slots * kSystemPointerSize > 4 * KB) {
4577 // For WebAssembly functions with big frames we have to do the stack
4578 // overflow check before we construct the frame. Otherwise we may not
4579 // have enough space on the stack to call the runtime for the stack
4580 // overflow.
4581 Label done;
4582
4583 // If the frame is bigger than the stack, we throw the stack overflow
4584 // exception unconditionally. Thereby we can avoid the integer overflow
4585 // check in the condition code.
4586 if (required_slots * kSystemPointerSize < FLAG_stack_size * KB) {
4587 __ movq(kScratchRegister,
4588 FieldOperand(kWasmInstanceRegister,
4589 WasmInstanceObject::kRealStackLimitAddressOffset));
4590 __ movq(kScratchRegister, Operand(kScratchRegister, 0));
4591 __ addq(kScratchRegister,
4592 Immediate(required_slots * kSystemPointerSize));
4593 __ cmpq(rsp, kScratchRegister);
4594 __ j(above_equal, &done, Label::kNear);
4595 }
4596
4597 __ near_call(wasm::WasmCode::kWasmStackOverflow,
4598 RelocInfo::WASM_STUB_CALL);
4599 // The call does not return, hence we can ignore any references and just
4600 // define an empty safepoint.
4601 ReferenceMap* reference_map = zone()->New<ReferenceMap>(zone());
4602 RecordSafepoint(reference_map);
4603 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4604 __ bind(&done);
4605 }
4606 #endif // V8_ENABLE_WEBASSEMBLY
4607
4608 // Skip callee-saved and return slots, which are created below.
4609 required_slots -= base::bits::CountPopulation(saves);
4610 required_slots -= base::bits::CountPopulation(saves_fp) *
4611 (kQuadWordSize / kSystemPointerSize);
4612 required_slots -= frame()->GetReturnSlotCount();
4613 if (required_slots > 0) {
4614 __ AllocateStackSpace(required_slots * kSystemPointerSize);
4615 }
4616 }
4617
4618 if (saves_fp != 0) { // Save callee-saved XMM registers.
4619 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4620 const int stack_size = saves_fp_count * kQuadWordSize;
4621 // Adjust the stack pointer.
4622 __ AllocateStackSpace(stack_size);
4623 // Store the registers on the stack.
4624 int slot_idx = 0;
4625 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4626 if (!((1 << i) & saves_fp)) continue;
4627 __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
4628 XMMRegister::from_code(i));
4629 slot_idx++;
4630 }
4631 }
4632
4633 if (saves != 0) { // Save callee-saved registers.
4634 for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4635 if (!((1 << i) & saves)) continue;
4636 __ pushq(Register::from_code(i));
4637 }
4638 }
4639
4640 // Allocate return slots (located after callee-saved).
4641 if (frame()->GetReturnSlotCount() > 0) {
4642 __ AllocateStackSpace(frame()->GetReturnSlotCount() * kSystemPointerSize);
4643 }
4644 }
4645
AssembleReturn(InstructionOperand * additional_pop_count)4646 void CodeGenerator::AssembleReturn(InstructionOperand* additional_pop_count) {
4647 auto call_descriptor = linkage()->GetIncomingDescriptor();
4648
4649 // Restore registers.
4650 const RegList saves = call_descriptor->CalleeSavedRegisters();
4651 if (saves != 0) {
4652 const int returns = frame()->GetReturnSlotCount();
4653 if (returns != 0) {
4654 __ addq(rsp, Immediate(returns * kSystemPointerSize));
4655 }
4656 for (int i = 0; i < Register::kNumRegisters; i++) {
4657 if (!((1 << i) & saves)) continue;
4658 __ popq(Register::from_code(i));
4659 }
4660 }
4661 const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
4662 if (saves_fp != 0) {
4663 const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
4664 const int stack_size = saves_fp_count * kQuadWordSize;
4665 // Load the registers from the stack.
4666 int slot_idx = 0;
4667 for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
4668 if (!((1 << i) & saves_fp)) continue;
4669 __ Movdqu(XMMRegister::from_code(i),
4670 Operand(rsp, kQuadWordSize * slot_idx));
4671 slot_idx++;
4672 }
4673 // Adjust the stack pointer.
4674 __ addq(rsp, Immediate(stack_size));
4675 }
4676
4677 unwinding_info_writer_.MarkBlockWillExit();
4678
4679 X64OperandConverter g(this, nullptr);
4680 int parameter_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
4681
4682 // {aditional_pop_count} is only greater than zero if {parameter_slots = 0}.
4683 // Check RawMachineAssembler::PopAndReturn.
4684 if (parameter_slots != 0) {
4685 if (additional_pop_count->IsImmediate()) {
4686 DCHECK_EQ(g.ToConstant(additional_pop_count).ToInt32(), 0);
4687 } else if (FLAG_debug_code) {
4688 __ cmpq(g.ToRegister(additional_pop_count), Immediate(0));
4689 __ Assert(equal, AbortReason::kUnexpectedAdditionalPopValue);
4690 }
4691 }
4692
4693 Register argc_reg = rcx;
4694 // Functions with JS linkage have at least one parameter (the receiver).
4695 // If {parameter_slots} == 0, it means it is a builtin with
4696 // kDontAdaptArgumentsSentinel, which takes care of JS arguments popping
4697 // itself.
4698 const bool drop_jsargs = parameter_slots != 0 &&
4699 frame_access_state()->has_frame() &&
4700 call_descriptor->IsJSFunctionCall();
4701 if (call_descriptor->IsCFunctionCall()) {
4702 AssembleDeconstructFrame();
4703 } else if (frame_access_state()->has_frame()) {
4704 if (additional_pop_count->IsImmediate() &&
4705 g.ToConstant(additional_pop_count).ToInt32() == 0) {
4706 // Canonicalize JSFunction return sites for now.
4707 if (return_label_.is_bound()) {
4708 __ jmp(&return_label_);
4709 return;
4710 } else {
4711 __ bind(&return_label_);
4712 }
4713 }
4714 if (drop_jsargs) {
4715 // Get the actual argument count.
4716 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & argc_reg.bit());
4717 __ movq(argc_reg, Operand(rbp, StandardFrameConstants::kArgCOffset));
4718 }
4719 AssembleDeconstructFrame();
4720 }
4721
4722 if (drop_jsargs) {
4723 // We must pop all arguments from the stack (including the receiver).
4724 // The number of arguments without the receiver is
4725 // max(argc_reg, parameter_slots-1), and the receiver is added in
4726 // DropArguments().
4727 Label mismatch_return;
4728 Register scratch_reg = r10;
4729 DCHECK_NE(argc_reg, scratch_reg);
4730 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & scratch_reg.bit());
4731 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & argc_reg.bit());
4732 if (kJSArgcIncludesReceiver) {
4733 __ cmpq(argc_reg, Immediate(parameter_slots));
4734 } else {
4735 int parameter_slots_without_receiver = parameter_slots - 1;
4736 __ cmpq(argc_reg, Immediate(parameter_slots_without_receiver));
4737 }
4738 __ j(greater, &mismatch_return, Label::kNear);
4739 __ Ret(parameter_slots * kSystemPointerSize, scratch_reg);
4740 __ bind(&mismatch_return);
4741 __ DropArguments(argc_reg, scratch_reg, TurboAssembler::kCountIsInteger,
4742 kJSArgcIncludesReceiver
4743 ? TurboAssembler::kCountIncludesReceiver
4744 : TurboAssembler::kCountExcludesReceiver);
4745 // We use a return instead of a jump for better return address prediction.
4746 __ Ret();
4747 } else if (additional_pop_count->IsImmediate()) {
4748 Register scratch_reg = r10;
4749 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & scratch_reg.bit());
4750 int additional_count = g.ToConstant(additional_pop_count).ToInt32();
4751 size_t pop_size = (parameter_slots + additional_count) * kSystemPointerSize;
4752 CHECK_LE(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
4753 __ Ret(static_cast<int>(pop_size), scratch_reg);
4754 } else {
4755 Register pop_reg = g.ToRegister(additional_pop_count);
4756 Register scratch_reg = pop_reg == r10 ? rcx : r10;
4757 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & scratch_reg.bit());
4758 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & pop_reg.bit());
4759 int pop_size = static_cast<int>(parameter_slots * kSystemPointerSize);
4760 __ PopReturnAddressTo(scratch_reg);
4761 __ leaq(rsp, Operand(rsp, pop_reg, times_system_pointer_size,
4762 static_cast<int>(pop_size)));
4763 __ PushReturnAddressFrom(scratch_reg);
4764 __ Ret();
4765 }
4766 }
4767
FinishCode()4768 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
4769
PrepareForDeoptimizationExits(ZoneDeque<DeoptimizationExit * > * exits)4770 void CodeGenerator::PrepareForDeoptimizationExits(
4771 ZoneDeque<DeoptimizationExit*>* exits) {}
4772
IncrementStackAccessCounter(InstructionOperand * source,InstructionOperand * destination)4773 void CodeGenerator::IncrementStackAccessCounter(
4774 InstructionOperand* source, InstructionOperand* destination) {
4775 DCHECK(FLAG_trace_turbo_stack_accesses);
4776 if (!info()->IsOptimizing()) {
4777 #if V8_ENABLE_WEBASSEMBLY
4778 if (!info()->IsWasm()) return;
4779 #else
4780 return;
4781 #endif // V8_ENABLE_WEBASSEMBLY
4782 }
4783 DCHECK_NOT_NULL(debug_name_);
4784 auto IncrementCounter = [&](ExternalReference counter) {
4785 __ incl(__ ExternalReferenceAsOperand(counter));
4786 };
4787 if (source->IsAnyStackSlot()) {
4788 IncrementCounter(
4789 ExternalReference::address_of_load_from_stack_count(debug_name_));
4790 }
4791 if (destination->IsAnyStackSlot()) {
4792 IncrementCounter(
4793 ExternalReference::address_of_store_to_stack_count(debug_name_));
4794 }
4795 }
4796
AssembleMove(InstructionOperand * source,InstructionOperand * destination)4797 void CodeGenerator::AssembleMove(InstructionOperand* source,
4798 InstructionOperand* destination) {
4799 X64OperandConverter g(this, nullptr);
4800 // Helper function to write the given constant to the dst register.
4801 auto MoveConstantToRegister = [&](Register dst, Constant src) {
4802 switch (src.type()) {
4803 case Constant::kInt32: {
4804 if (RelocInfo::IsWasmReference(src.rmode())) {
4805 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4806 } else {
4807 int32_t value = src.ToInt32();
4808 if (value == 0) {
4809 __ xorl(dst, dst);
4810 } else {
4811 __ movl(dst, Immediate(value));
4812 }
4813 }
4814 break;
4815 }
4816 case Constant::kInt64:
4817 if (RelocInfo::IsWasmReference(src.rmode())) {
4818 __ movq(dst, Immediate64(src.ToInt64(), src.rmode()));
4819 } else {
4820 __ Move(dst, src.ToInt64());
4821 }
4822 break;
4823 case Constant::kFloat32:
4824 __ MoveNumber(dst, src.ToFloat32());
4825 break;
4826 case Constant::kFloat64:
4827 __ MoveNumber(dst, src.ToFloat64().value());
4828 break;
4829 case Constant::kExternalReference:
4830 __ Move(dst, src.ToExternalReference());
4831 break;
4832 case Constant::kHeapObject: {
4833 Handle<HeapObject> src_object = src.ToHeapObject();
4834 RootIndex index;
4835 if (IsMaterializableFromRoot(src_object, &index)) {
4836 __ LoadRoot(dst, index);
4837 } else {
4838 __ Move(dst, src_object);
4839 }
4840 break;
4841 }
4842 case Constant::kCompressedHeapObject: {
4843 Handle<HeapObject> src_object = src.ToHeapObject();
4844 RootIndex index;
4845 if (IsMaterializableFromRoot(src_object, &index)) {
4846 __ LoadRoot(dst, index);
4847 } else {
4848 __ Move(dst, src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT);
4849 }
4850 break;
4851 }
4852 case Constant::kDelayedStringConstant: {
4853 const StringConstantBase* src_constant = src.ToDelayedStringConstant();
4854 __ MoveStringConstant(dst, src_constant);
4855 break;
4856 }
4857 case Constant::kRpoNumber:
4858 UNREACHABLE(); // TODO(dcarney): load of labels on x64.
4859 }
4860 };
4861 // Helper function to write the given constant to the stack.
4862 auto MoveConstantToSlot = [&](Operand dst, Constant src) {
4863 if (!RelocInfo::IsWasmReference(src.rmode())) {
4864 switch (src.type()) {
4865 case Constant::kInt32:
4866 __ Move(dst, src.ToInt32());
4867 return;
4868 case Constant::kInt64:
4869 __ Move(dst, src.ToInt64());
4870 return;
4871 default:
4872 break;
4873 }
4874 }
4875 MoveConstantToRegister(kScratchRegister, src);
4876 __ movq(dst, kScratchRegister);
4877 };
4878
4879 if (FLAG_trace_turbo_stack_accesses) {
4880 IncrementStackAccessCounter(source, destination);
4881 }
4882
4883 // Dispatch on the source and destination operand kinds.
4884 switch (MoveType::InferMove(source, destination)) {
4885 case MoveType::kRegisterToRegister:
4886 if (source->IsRegister()) {
4887 __ movq(g.ToRegister(destination), g.ToRegister(source));
4888 } else {
4889 DCHECK(source->IsFPRegister());
4890 __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4891 }
4892 return;
4893 case MoveType::kRegisterToStack: {
4894 Operand dst = g.ToOperand(destination);
4895 if (source->IsRegister()) {
4896 __ movq(dst, g.ToRegister(source));
4897 } else {
4898 DCHECK(source->IsFPRegister());
4899 XMMRegister src = g.ToDoubleRegister(source);
4900 MachineRepresentation rep =
4901 LocationOperand::cast(source)->representation();
4902 if (rep != MachineRepresentation::kSimd128) {
4903 __ Movsd(dst, src);
4904 } else {
4905 __ Movups(dst, src);
4906 }
4907 }
4908 return;
4909 }
4910 case MoveType::kStackToRegister: {
4911 Operand src = g.ToOperand(source);
4912 if (source->IsStackSlot()) {
4913 __ movq(g.ToRegister(destination), src);
4914 } else {
4915 DCHECK(source->IsFPStackSlot());
4916 XMMRegister dst = g.ToDoubleRegister(destination);
4917 MachineRepresentation rep =
4918 LocationOperand::cast(source)->representation();
4919 if (rep != MachineRepresentation::kSimd128) {
4920 __ Movsd(dst, src);
4921 } else {
4922 __ Movups(dst, src);
4923 }
4924 }
4925 return;
4926 }
4927 case MoveType::kStackToStack: {
4928 Operand src = g.ToOperand(source);
4929 Operand dst = g.ToOperand(destination);
4930 if (source->IsStackSlot()) {
4931 // Spill on demand to use a temporary register for memory-to-memory
4932 // moves.
4933 __ movq(kScratchRegister, src);
4934 __ movq(dst, kScratchRegister);
4935 } else {
4936 MachineRepresentation rep =
4937 LocationOperand::cast(source)->representation();
4938 if (rep != MachineRepresentation::kSimd128) {
4939 __ Movsd(kScratchDoubleReg, src);
4940 __ Movsd(dst, kScratchDoubleReg);
4941 } else {
4942 DCHECK(source->IsSimd128StackSlot());
4943 __ Movups(kScratchDoubleReg, src);
4944 __ Movups(dst, kScratchDoubleReg);
4945 }
4946 }
4947 return;
4948 }
4949 case MoveType::kConstantToRegister: {
4950 Constant src = g.ToConstant(source);
4951 if (destination->IsRegister()) {
4952 MoveConstantToRegister(g.ToRegister(destination), src);
4953 } else {
4954 DCHECK(destination->IsFPRegister());
4955 XMMRegister dst = g.ToDoubleRegister(destination);
4956 if (src.type() == Constant::kFloat32) {
4957 // TODO(turbofan): Can we do better here?
4958 __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
4959 } else {
4960 DCHECK_EQ(src.type(), Constant::kFloat64);
4961 __ Move(dst, src.ToFloat64().AsUint64());
4962 }
4963 }
4964 return;
4965 }
4966 case MoveType::kConstantToStack: {
4967 Constant src = g.ToConstant(source);
4968 Operand dst = g.ToOperand(destination);
4969 if (destination->IsStackSlot()) {
4970 MoveConstantToSlot(dst, src);
4971 } else {
4972 DCHECK(destination->IsFPStackSlot());
4973 if (src.type() == Constant::kFloat32) {
4974 __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
4975 } else {
4976 DCHECK_EQ(src.type(), Constant::kFloat64);
4977 __ Move(dst, src.ToFloat64().AsUint64());
4978 }
4979 }
4980 return;
4981 }
4982 }
4983 UNREACHABLE();
4984 }
4985
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)4986 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4987 InstructionOperand* destination) {
4988 if (FLAG_trace_turbo_stack_accesses) {
4989 IncrementStackAccessCounter(source, destination);
4990 IncrementStackAccessCounter(destination, source);
4991 }
4992
4993 X64OperandConverter g(this, nullptr);
4994 // Dispatch on the source and destination operand kinds. Not all
4995 // combinations are possible.
4996 switch (MoveType::InferSwap(source, destination)) {
4997 case MoveType::kRegisterToRegister: {
4998 if (source->IsRegister()) {
4999 Register src = g.ToRegister(source);
5000 Register dst = g.ToRegister(destination);
5001 __ movq(kScratchRegister, src);
5002 __ movq(src, dst);
5003 __ movq(dst, kScratchRegister);
5004 } else {
5005 DCHECK(source->IsFPRegister());
5006 XMMRegister src = g.ToDoubleRegister(source);
5007 XMMRegister dst = g.ToDoubleRegister(destination);
5008 __ Movapd(kScratchDoubleReg, src);
5009 __ Movapd(src, dst);
5010 __ Movapd(dst, kScratchDoubleReg);
5011 }
5012 return;
5013 }
5014 case MoveType::kRegisterToStack: {
5015 if (source->IsRegister()) {
5016 Register src = g.ToRegister(source);
5017 Operand dst = g.ToOperand(destination);
5018 __ movq(kScratchRegister, src);
5019 __ movq(src, dst);
5020 __ movq(dst, kScratchRegister);
5021 } else {
5022 DCHECK(source->IsFPRegister());
5023 XMMRegister src = g.ToDoubleRegister(source);
5024 Operand dst = g.ToOperand(destination);
5025 MachineRepresentation rep =
5026 LocationOperand::cast(source)->representation();
5027 if (rep != MachineRepresentation::kSimd128) {
5028 __ Movsd(kScratchDoubleReg, src);
5029 __ Movsd(src, dst);
5030 __ Movsd(dst, kScratchDoubleReg);
5031 } else {
5032 __ Movups(kScratchDoubleReg, src);
5033 __ Movups(src, dst);
5034 __ Movups(dst, kScratchDoubleReg);
5035 }
5036 }
5037 return;
5038 }
5039 case MoveType::kStackToStack: {
5040 Operand src = g.ToOperand(source);
5041 Operand dst = g.ToOperand(destination);
5042 MachineRepresentation rep =
5043 LocationOperand::cast(source)->representation();
5044 if (rep != MachineRepresentation::kSimd128) {
5045 Register tmp = kScratchRegister;
5046 __ movq(tmp, dst);
5047 __ pushq(src); // Then use stack to copy src to destination.
5048 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5049 kSystemPointerSize);
5050 __ popq(dst);
5051 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5052 -kSystemPointerSize);
5053 __ movq(src, tmp);
5054 } else {
5055 // Without AVX, misaligned reads and writes will trap. Move using the
5056 // stack, in two parts.
5057 __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
5058 __ pushq(src); // Then use stack to copy src to destination.
5059 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5060 kSystemPointerSize);
5061 __ popq(dst);
5062 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5063 -kSystemPointerSize);
5064 __ pushq(g.ToOperand(source, kSystemPointerSize));
5065 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5066 kSystemPointerSize);
5067 __ popq(g.ToOperand(destination, kSystemPointerSize));
5068 unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
5069 -kSystemPointerSize);
5070 __ movups(src, kScratchDoubleReg);
5071 }
5072 return;
5073 }
5074 default:
5075 UNREACHABLE();
5076 }
5077 }
5078
AssembleJumpTable(Label ** targets,size_t target_count)5079 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
5080 for (size_t index = 0; index < target_count; ++index) {
5081 __ dq(targets[index]);
5082 }
5083 }
5084
5085 #undef __
5086
5087 } // namespace compiler
5088 } // namespace internal
5089 } // namespace v8
5090