1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/wasm/baseline/liftoff-assembler.h"
6 
7 #include <sstream>
8 
9 #include "src/base/optional.h"
10 #include "src/codegen/assembler-inl.h"
11 #include "src/codegen/macro-assembler-inl.h"
12 #include "src/compiler/linkage.h"
13 #include "src/compiler/wasm-compiler.h"
14 #include "src/utils/ostreams.h"
15 #include "src/wasm/function-body-decoder-impl.h"
16 #include "src/wasm/wasm-linkage.h"
17 #include "src/wasm/wasm-opcodes.h"
18 
19 namespace v8 {
20 namespace internal {
21 namespace wasm {
22 
23 using VarState = LiftoffAssembler::VarState;
24 
25 constexpr ValueType LiftoffAssembler::kWasmIntPtr;
26 
27 namespace {
28 
29 class StackTransferRecipe {
30   struct RegisterMove {
31     LiftoffRegister src;
32     ValueType type;
RegisterMovev8::internal::wasm::__anona5f225d60111::StackTransferRecipe::RegisterMove33     constexpr RegisterMove(LiftoffRegister src, ValueType type)
34         : src(src), type(type) {}
35   };
36 
37   struct RegisterLoad {
38     enum LoadKind : uint8_t {
39       kConstant,      // load a constant value into a register.
40       kStack,         // fill a register from a stack slot.
41       kLowHalfStack,  // fill a register from the low half of a stack slot.
42       kHighHalfStack  // fill a register from the high half of a stack slot.
43     };
44 
45     LoadKind kind;
46     ValueType type;
47     int32_t value;  // i32 constant value or stack offset, depending on kind.
48 
49     // Named constructors.
Constv8::internal::wasm::__anona5f225d60111::StackTransferRecipe::RegisterLoad50     static RegisterLoad Const(WasmValue constant) {
51       if (constant.type() == kWasmI32) {
52         return {kConstant, kWasmI32, constant.to_i32()};
53       }
54       DCHECK_EQ(kWasmI64, constant.type());
55       DCHECK_EQ(constant.to_i32_unchecked(), constant.to_i64_unchecked());
56       return {kConstant, kWasmI64, constant.to_i32_unchecked()};
57     }
Stackv8::internal::wasm::__anona5f225d60111::StackTransferRecipe::RegisterLoad58     static RegisterLoad Stack(int32_t offset, ValueType type) {
59       return {kStack, type, offset};
60     }
HalfStackv8::internal::wasm::__anona5f225d60111::StackTransferRecipe::RegisterLoad61     static RegisterLoad HalfStack(int32_t offset, RegPairHalf half) {
62       return {half == kLowWord ? kLowHalfStack : kHighHalfStack, kWasmI32,
63               offset};
64     }
65 
66    private:
RegisterLoadv8::internal::wasm::__anona5f225d60111::StackTransferRecipe::RegisterLoad67     RegisterLoad(LoadKind kind, ValueType type, int32_t value)
68         : kind(kind), type(type), value(value) {}
69   };
70 
71  public:
StackTransferRecipe(LiftoffAssembler * wasm_asm)72   explicit StackTransferRecipe(LiftoffAssembler* wasm_asm) : asm_(wasm_asm) {}
~StackTransferRecipe()73   ~StackTransferRecipe() { Execute(); }
74 
Execute()75   void Execute() {
76     // First, execute register moves. Then load constants and stack values into
77     // registers.
78     ExecuteMoves();
79     DCHECK(move_dst_regs_.is_empty());
80     ExecuteLoads();
81     DCHECK(load_dst_regs_.is_empty());
82   }
83 
TransferStackSlot(const VarState & dst,const VarState & src)84   void TransferStackSlot(const VarState& dst, const VarState& src) {
85     DCHECK_EQ(dst.type(), src.type());
86     switch (dst.loc()) {
87       case VarState::kStack:
88         switch (src.loc()) {
89           case VarState::kStack:
90             if (src.offset() == dst.offset()) break;
91             asm_->MoveStackValue(dst.offset(), src.offset(), src.type());
92             break;
93           case VarState::kRegister:
94             asm_->Spill(dst.offset(), src.reg(), src.type());
95             break;
96           case VarState::kIntConst:
97             asm_->Spill(dst.offset(), src.constant());
98             break;
99         }
100         break;
101       case VarState::kRegister:
102         LoadIntoRegister(dst.reg(), src, src.offset());
103         break;
104       case VarState::kIntConst:
105         DCHECK_EQ(dst, src);
106         break;
107     }
108   }
109 
LoadIntoRegister(LiftoffRegister dst,const LiftoffAssembler::VarState & src,uint32_t src_offset)110   void LoadIntoRegister(LiftoffRegister dst,
111                         const LiftoffAssembler::VarState& src,
112                         uint32_t src_offset) {
113     switch (src.loc()) {
114       case VarState::kStack:
115         LoadStackSlot(dst, src_offset, src.type());
116         break;
117       case VarState::kRegister:
118         DCHECK_EQ(dst.reg_class(), src.reg_class());
119         if (dst != src.reg()) MoveRegister(dst, src.reg(), src.type());
120         break;
121       case VarState::kIntConst:
122         LoadConstant(dst, src.constant());
123         break;
124     }
125   }
126 
LoadI64HalfIntoRegister(LiftoffRegister dst,const LiftoffAssembler::VarState & src,int offset,RegPairHalf half)127   void LoadI64HalfIntoRegister(LiftoffRegister dst,
128                                const LiftoffAssembler::VarState& src,
129                                int offset, RegPairHalf half) {
130     // Use CHECK such that the remaining code is statically dead if
131     // {kNeedI64RegPair} is false.
132     CHECK(kNeedI64RegPair);
133     DCHECK_EQ(kWasmI64, src.type());
134     switch (src.loc()) {
135       case VarState::kStack:
136         LoadI64HalfStackSlot(dst, offset, half);
137         break;
138       case VarState::kRegister: {
139         LiftoffRegister src_half =
140             half == kLowWord ? src.reg().low() : src.reg().high();
141         if (dst != src_half) MoveRegister(dst, src_half, kWasmI32);
142         break;
143       }
144       case VarState::kIntConst:
145         int32_t value = src.i32_const();
146         // The high word is the sign extension of the low word.
147         if (half == kHighWord) value = value >> 31;
148         LoadConstant(dst, WasmValue(value));
149         break;
150     }
151   }
152 
MoveRegister(LiftoffRegister dst,LiftoffRegister src,ValueType type)153   void MoveRegister(LiftoffRegister dst, LiftoffRegister src, ValueType type) {
154     DCHECK_NE(dst, src);
155     DCHECK_EQ(dst.reg_class(), src.reg_class());
156     DCHECK_EQ(reg_class_for(type), src.reg_class());
157     if (src.is_gp_pair()) {
158       DCHECK_EQ(kWasmI64, type);
159       if (dst.low() != src.low()) MoveRegister(dst.low(), src.low(), kWasmI32);
160       if (dst.high() != src.high())
161         MoveRegister(dst.high(), src.high(), kWasmI32);
162       return;
163     }
164     if (src.is_fp_pair()) {
165       DCHECK_EQ(kWasmS128, type);
166       if (dst.low() != src.low()) {
167         MoveRegister(dst.low(), src.low(), kWasmF64);
168         MoveRegister(dst.high(), src.high(), kWasmF64);
169       }
170       return;
171     }
172     if (move_dst_regs_.has(dst)) {
173       DCHECK_EQ(register_move(dst)->src, src);
174       // Non-fp registers can only occur with the exact same type.
175       DCHECK_IMPLIES(!dst.is_fp(), register_move(dst)->type == type);
176       // It can happen that one fp register holds both the f32 zero and the f64
177       // zero, as the initial value for local variables. Move the value as f64
178       // in that case.
179       if (type == kWasmF64) register_move(dst)->type = kWasmF64;
180       return;
181     }
182     move_dst_regs_.set(dst);
183     ++*src_reg_use_count(src);
184     *register_move(dst) = {src, type};
185   }
186 
LoadConstant(LiftoffRegister dst,WasmValue value)187   void LoadConstant(LiftoffRegister dst, WasmValue value) {
188     DCHECK(!load_dst_regs_.has(dst));
189     load_dst_regs_.set(dst);
190     if (dst.is_gp_pair()) {
191       DCHECK_EQ(kWasmI64, value.type());
192       int64_t i64 = value.to_i64();
193       *register_load(dst.low()) =
194           RegisterLoad::Const(WasmValue(static_cast<int32_t>(i64)));
195       *register_load(dst.high()) =
196           RegisterLoad::Const(WasmValue(static_cast<int32_t>(i64 >> 32)));
197     } else {
198       *register_load(dst) = RegisterLoad::Const(value);
199     }
200   }
201 
LoadStackSlot(LiftoffRegister dst,uint32_t stack_offset,ValueType type)202   void LoadStackSlot(LiftoffRegister dst, uint32_t stack_offset,
203                      ValueType type) {
204     if (load_dst_regs_.has(dst)) {
205       // It can happen that we spilled the same register to different stack
206       // slots, and then we reload them later into the same dst register.
207       // In that case, it is enough to load one of the stack slots.
208       return;
209     }
210     load_dst_regs_.set(dst);
211     if (dst.is_gp_pair()) {
212       DCHECK_EQ(kWasmI64, type);
213       *register_load(dst.low()) =
214           RegisterLoad::HalfStack(stack_offset, kLowWord);
215       *register_load(dst.high()) =
216           RegisterLoad::HalfStack(stack_offset, kHighWord);
217     } else if (dst.is_fp_pair()) {
218       DCHECK_EQ(kWasmS128, type);
219       // load_dst_regs_.set above will set both low and high fp regs.
220       // But unlike gp_pair, we load a kWasm128 in one go in ExecuteLoads.
221       // So unset the top fp register to skip loading it.
222       load_dst_regs_.clear(dst.high());
223       *register_load(dst.low()) = RegisterLoad::Stack(stack_offset, type);
224     } else {
225       *register_load(dst) = RegisterLoad::Stack(stack_offset, type);
226     }
227   }
228 
LoadI64HalfStackSlot(LiftoffRegister dst,int offset,RegPairHalf half)229   void LoadI64HalfStackSlot(LiftoffRegister dst, int offset, RegPairHalf half) {
230     if (load_dst_regs_.has(dst)) {
231       // It can happen that we spilled the same register to different stack
232       // slots, and then we reload them later into the same dst register.
233       // In that case, it is enough to load one of the stack slots.
234       return;
235     }
236     load_dst_regs_.set(dst);
237     *register_load(dst) = RegisterLoad::HalfStack(offset, half);
238   }
239 
240  private:
241   using MovesStorage =
242       std::aligned_storage<kAfterMaxLiftoffRegCode * sizeof(RegisterMove),
243                            alignof(RegisterMove)>::type;
244   using LoadsStorage =
245       std::aligned_storage<kAfterMaxLiftoffRegCode * sizeof(RegisterLoad),
246                            alignof(RegisterLoad)>::type;
247 
248   ASSERT_TRIVIALLY_COPYABLE(RegisterMove);
249   ASSERT_TRIVIALLY_COPYABLE(RegisterLoad);
250 
251   MovesStorage register_moves_;  // uninitialized
252   LoadsStorage register_loads_;  // uninitialized
253   int src_reg_use_count_[kAfterMaxLiftoffRegCode] = {0};
254   LiftoffRegList move_dst_regs_;
255   LiftoffRegList load_dst_regs_;
256   LiftoffAssembler* const asm_;
257 
register_move(LiftoffRegister reg)258   RegisterMove* register_move(LiftoffRegister reg) {
259     return reinterpret_cast<RegisterMove*>(&register_moves_) +
260            reg.liftoff_code();
261   }
register_load(LiftoffRegister reg)262   RegisterLoad* register_load(LiftoffRegister reg) {
263     return reinterpret_cast<RegisterLoad*>(&register_loads_) +
264            reg.liftoff_code();
265   }
src_reg_use_count(LiftoffRegister reg)266   int* src_reg_use_count(LiftoffRegister reg) {
267     return src_reg_use_count_ + reg.liftoff_code();
268   }
269 
ExecuteMove(LiftoffRegister dst)270   void ExecuteMove(LiftoffRegister dst) {
271     RegisterMove* move = register_move(dst);
272     DCHECK_EQ(0, *src_reg_use_count(dst));
273     asm_->Move(dst, move->src, move->type);
274     ClearExecutedMove(dst);
275   }
276 
ClearExecutedMove(LiftoffRegister dst)277   void ClearExecutedMove(LiftoffRegister dst) {
278     DCHECK(move_dst_regs_.has(dst));
279     move_dst_regs_.clear(dst);
280     RegisterMove* move = register_move(dst);
281     DCHECK_LT(0, *src_reg_use_count(move->src));
282     if (--*src_reg_use_count(move->src)) return;
283     // src count dropped to zero. If this is a destination register, execute
284     // that move now.
285     if (!move_dst_regs_.has(move->src)) return;
286     ExecuteMove(move->src);
287   }
288 
ExecuteMoves()289   void ExecuteMoves() {
290     // Execute all moves whose {dst} is not being used as src in another move.
291     // If any src count drops to zero, also (transitively) execute the
292     // corresponding move to that register.
293     for (LiftoffRegister dst : move_dst_regs_) {
294       // Check if already handled via transitivity in {ClearExecutedMove}.
295       if (!move_dst_regs_.has(dst)) continue;
296       if (*src_reg_use_count(dst)) continue;
297       ExecuteMove(dst);
298     }
299 
300     // All remaining moves are parts of a cycle. Just spill the first one, then
301     // process all remaining moves in that cycle. Repeat for all cycles.
302     int last_spill_offset = asm_->TopSpillOffset();
303     while (!move_dst_regs_.is_empty()) {
304       // TODO(clemensb): Use an unused register if available.
305       LiftoffRegister dst = move_dst_regs_.GetFirstRegSet();
306       RegisterMove* move = register_move(dst);
307       last_spill_offset += LiftoffAssembler::SlotSizeForType(move->type);
308       LiftoffRegister spill_reg = move->src;
309       asm_->Spill(last_spill_offset, spill_reg, move->type);
310       // Remember to reload into the destination register later.
311       LoadStackSlot(dst, last_spill_offset, move->type);
312       ClearExecutedMove(dst);
313     }
314   }
315 
ExecuteLoads()316   void ExecuteLoads() {
317     for (LiftoffRegister dst : load_dst_regs_) {
318       RegisterLoad* load = register_load(dst);
319       switch (load->kind) {
320         case RegisterLoad::kConstant:
321           asm_->LoadConstant(dst, load->type == kWasmI64
322                                       ? WasmValue(int64_t{load->value})
323                                       : WasmValue(int32_t{load->value}));
324           break;
325         case RegisterLoad::kStack:
326           if (kNeedS128RegPair && load->type == kWasmS128) {
327             asm_->Fill(LiftoffRegister::ForFpPair(dst.fp()), load->value,
328                        load->type);
329           } else {
330             asm_->Fill(dst, load->value, load->type);
331           }
332           break;
333         case RegisterLoad::kLowHalfStack:
334           // Half of a register pair, {dst} must be a gp register.
335           asm_->FillI64Half(dst.gp(), load->value, kLowWord);
336           break;
337         case RegisterLoad::kHighHalfStack:
338           // Half of a register pair, {dst} must be a gp register.
339           asm_->FillI64Half(dst.gp(), load->value, kHighWord);
340           break;
341       }
342     }
343     load_dst_regs_ = {};
344   }
345 
346   DISALLOW_COPY_AND_ASSIGN(StackTransferRecipe);
347 };
348 
349 class RegisterReuseMap {
350  public:
Add(LiftoffRegister src,LiftoffRegister dst)351   void Add(LiftoffRegister src, LiftoffRegister dst) {
352     if (auto previous = Lookup(src)) {
353       DCHECK_EQ(previous, dst);
354       return;
355     }
356     map_.emplace_back(src);
357     map_.emplace_back(dst);
358   }
359 
Lookup(LiftoffRegister src)360   base::Optional<LiftoffRegister> Lookup(LiftoffRegister src) {
361     for (auto it = map_.begin(), end = map_.end(); it != end; it += 2) {
362       if (it->is_gp_pair() == src.is_gp_pair() &&
363           it->is_fp_pair() == src.is_fp_pair() && *it == src)
364         return *(it + 1);
365     }
366     return {};
367   }
368 
369  private:
370   // {map_} holds pairs of <src, dst>.
371   base::SmallVector<LiftoffRegister, 8> map_;
372 };
373 
374 enum MergeKeepStackSlots : bool {
375   kKeepStackSlots = true,
376   kTurnStackSlotsIntoRegisters = false
377 };
378 enum MergeAllowConstants : bool {
379   kConstantsAllowed = true,
380   kConstantsNotAllowed = false
381 };
382 enum ReuseRegisters : bool {
383   kReuseRegisters = true,
384   kNoReuseRegisters = false
385 };
InitMergeRegion(LiftoffAssembler::CacheState * state,const VarState * source,VarState * target,uint32_t count,MergeKeepStackSlots keep_stack_slots,MergeAllowConstants allow_constants,ReuseRegisters reuse_registers,LiftoffRegList used_regs)386 void InitMergeRegion(LiftoffAssembler::CacheState* state,
387                      const VarState* source, VarState* target, uint32_t count,
388                      MergeKeepStackSlots keep_stack_slots,
389                      MergeAllowConstants allow_constants,
390                      ReuseRegisters reuse_registers, LiftoffRegList used_regs) {
391   RegisterReuseMap register_reuse_map;
392   for (const VarState* source_end = source + count; source < source_end;
393        ++source, ++target) {
394     if ((source->is_stack() && keep_stack_slots) ||
395         (source->is_const() && allow_constants)) {
396       *target = *source;
397       continue;
398     }
399     base::Optional<LiftoffRegister> reg;
400     // First try: Keep the same register, if it's free.
401     if (source->is_reg() && state->is_free(source->reg())) {
402       reg = source->reg();
403     }
404     // Second try: Use the same register we used before (if we reuse registers).
405     if (!reg && reuse_registers) {
406       reg = register_reuse_map.Lookup(source->reg());
407     }
408     // Third try: Use any free register.
409     RegClass rc = reg_class_for(source->type());
410     if (!reg && state->has_unused_register(rc, used_regs)) {
411       reg = state->unused_register(rc, used_regs);
412     }
413     if (!reg) {
414       // No free register; make this a stack slot.
415       *target = VarState(source->type(), source->offset());
416       continue;
417     }
418     if (reuse_registers) register_reuse_map.Add(source->reg(), *reg);
419     state->inc_used(*reg);
420     *target = VarState(source->type(), *reg, source->offset());
421   }
422 }
423 
424 }  // namespace
425 
426 // TODO(clemensb): Don't copy the full parent state (this makes us N^2).
InitMerge(const CacheState & source,uint32_t num_locals,uint32_t arity,uint32_t stack_depth)427 void LiftoffAssembler::CacheState::InitMerge(const CacheState& source,
428                                              uint32_t num_locals,
429                                              uint32_t arity,
430                                              uint32_t stack_depth) {
431   // |------locals------|---(in between)----|--(discarded)--|----merge----|
432   //  <-- num_locals --> <-- stack_depth -->^stack_base      <-- arity -->
433 
434   uint32_t stack_base = stack_depth + num_locals;
435   uint32_t target_height = stack_base + arity;
436   uint32_t discarded = source.stack_height() - target_height;
437   DCHECK(stack_state.empty());
438 
439   DCHECK_GE(source.stack_height(), stack_base);
440   stack_state.resize_no_init(target_height);
441 
442   const VarState* source_begin = source.stack_state.data();
443   VarState* target_begin = stack_state.data();
444 
445   // Try to keep locals and the merge region in their registers. Register used
446   // multiple times need to be copied to another free register. Compute the list
447   // of used registers.
448   LiftoffRegList used_regs;
449   for (auto& src : VectorOf(source_begin, num_locals)) {
450     if (src.is_reg()) used_regs.set(src.reg());
451   }
452   for (auto& src : VectorOf(source_begin + stack_base + discarded, arity)) {
453     if (src.is_reg()) used_regs.set(src.reg());
454   }
455 
456   // Initialize the merge region. If this region moves, try to turn stack slots
457   // into registers since we need to load the value anyways.
458   MergeKeepStackSlots keep_merge_stack_slots =
459       discarded == 0 ? kKeepStackSlots : kTurnStackSlotsIntoRegisters;
460   InitMergeRegion(this, source_begin + stack_base + discarded,
461                   target_begin + stack_base, arity, keep_merge_stack_slots,
462                   kConstantsNotAllowed, kNoReuseRegisters, used_regs);
463 
464   // Initialize the locals region. Here, stack slots stay stack slots (because
465   // they do not move). Try to keep register in registers, but avoid duplicates.
466   InitMergeRegion(this, source_begin, target_begin, num_locals, kKeepStackSlots,
467                   kConstantsNotAllowed, kNoReuseRegisters, used_regs);
468   // Sanity check: All the {used_regs} are really in use now.
469   DCHECK_EQ(used_regs, used_registers & used_regs);
470 
471   // Last, initialize the section in between. Here, constants are allowed, but
472   // registers which are already used for the merge region or locals must be
473   // moved to other registers or spilled. If a register appears twice in the
474   // source region, ensure to use the same register twice in the target region.
475   InitMergeRegion(this, source_begin + num_locals, target_begin + num_locals,
476                   stack_depth, kKeepStackSlots, kConstantsAllowed,
477                   kReuseRegisters, used_regs);
478 }
479 
Steal(const CacheState & source)480 void LiftoffAssembler::CacheState::Steal(const CacheState& source) {
481   // Just use the move assignment operator.
482   *this = std::move(source);
483 }
484 
Split(const CacheState & source)485 void LiftoffAssembler::CacheState::Split(const CacheState& source) {
486   // Call the private copy assignment operator.
487   *this = source;
488 }
489 
490 namespace {
491 
DefaultLiftoffOptions()492 constexpr AssemblerOptions DefaultLiftoffOptions() {
493   return AssemblerOptions{};
494 }
495 
496 }  // namespace
497 
LiftoffAssembler(std::unique_ptr<AssemblerBuffer> buffer)498 LiftoffAssembler::LiftoffAssembler(std::unique_ptr<AssemblerBuffer> buffer)
499     : TurboAssembler(nullptr, DefaultLiftoffOptions(), CodeObjectRequired::kNo,
500                      std::move(buffer)) {
501   set_abort_hard(true);  // Avoid calls to Abort.
502 }
503 
~LiftoffAssembler()504 LiftoffAssembler::~LiftoffAssembler() {
505   if (num_locals_ > kInlineLocalTypes) {
506     free(more_local_types_);
507   }
508 }
509 
LoadToRegister(VarState slot,LiftoffRegList pinned)510 LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
511                                                  LiftoffRegList pinned) {
512   switch (slot.loc()) {
513     case VarState::kStack: {
514       LiftoffRegister reg =
515           GetUnusedRegister(reg_class_for(slot.type()), pinned);
516       Fill(reg, slot.offset(), slot.type());
517       return reg;
518     }
519     case VarState::kRegister:
520       cache_state_.dec_used(slot.reg());
521       return slot.reg();
522     case VarState::kIntConst: {
523       RegClass rc =
524           kNeedI64RegPair && slot.type() == kWasmI64 ? kGpRegPair : kGpReg;
525       LiftoffRegister reg = GetUnusedRegister(rc, pinned);
526       LoadConstant(reg, slot.constant());
527       return reg;
528     }
529   }
530   UNREACHABLE();
531 }
532 
PopToRegister(LiftoffRegList pinned)533 LiftoffRegister LiftoffAssembler::PopToRegister(LiftoffRegList pinned) {
534   DCHECK(!cache_state_.stack_state.empty());
535   VarState slot = cache_state_.stack_state.back();
536   cache_state_.stack_state.pop_back();
537   return LoadToRegister(slot, pinned);
538 }
539 
PeekToRegister(int index,LiftoffRegList pinned)540 LiftoffRegister LiftoffAssembler::PeekToRegister(int index,
541                                                  LiftoffRegList pinned) {
542   DCHECK_LT(index, cache_state_.stack_state.size());
543   VarState& slot = cache_state_.stack_state.end()[-1 - index];
544   LiftoffRegister reg = LoadToRegister(slot, pinned);
545   if (!slot.is_reg()) {
546     slot.MakeRegister(reg);
547   }
548   return reg;
549 }
550 
MergeFullStackWith(const CacheState & target,const CacheState & source)551 void LiftoffAssembler::MergeFullStackWith(const CacheState& target,
552                                           const CacheState& source) {
553   DCHECK_EQ(source.stack_height(), target.stack_height());
554   // TODO(clemensb): Reuse the same StackTransferRecipe object to save some
555   // allocations.
556   StackTransferRecipe transfers(this);
557   for (uint32_t i = 0, e = source.stack_height(); i < e; ++i) {
558     transfers.TransferStackSlot(target.stack_state[i], source.stack_state[i]);
559   }
560 }
561 
MergeStackWith(const CacheState & target,uint32_t arity)562 void LiftoffAssembler::MergeStackWith(const CacheState& target,
563                                       uint32_t arity) {
564   // Before: ----------------|----- (discarded) ----|--- arity ---|
565   //                         ^target_stack_height   ^stack_base   ^stack_height
566   // After:  ----|-- arity --|
567   //             ^           ^target_stack_height
568   //             ^target_stack_base
569   uint32_t stack_height = cache_state_.stack_height();
570   uint32_t target_stack_height = target.stack_height();
571   DCHECK_LE(target_stack_height, stack_height);
572   DCHECK_LE(arity, target_stack_height);
573   uint32_t stack_base = stack_height - arity;
574   uint32_t target_stack_base = target_stack_height - arity;
575   StackTransferRecipe transfers(this);
576   for (uint32_t i = 0; i < target_stack_base; ++i) {
577     transfers.TransferStackSlot(target.stack_state[i],
578                                 cache_state_.stack_state[i]);
579   }
580   for (uint32_t i = 0; i < arity; ++i) {
581     transfers.TransferStackSlot(target.stack_state[target_stack_base + i],
582                                 cache_state_.stack_state[stack_base + i]);
583   }
584 }
585 
Spill(VarState * slot)586 void LiftoffAssembler::Spill(VarState* slot) {
587   switch (slot->loc()) {
588     case VarState::kStack:
589       return;
590     case VarState::kRegister:
591       Spill(slot->offset(), slot->reg(), slot->type());
592       cache_state_.dec_used(slot->reg());
593       break;
594     case VarState::kIntConst:
595       Spill(slot->offset(), slot->constant());
596       break;
597   }
598   slot->MakeStack();
599 }
600 
SpillLocals()601 void LiftoffAssembler::SpillLocals() {
602   for (uint32_t i = 0; i < num_locals_; ++i) {
603     Spill(&cache_state_.stack_state[i]);
604   }
605 }
606 
SpillAllRegisters()607 void LiftoffAssembler::SpillAllRegisters() {
608   for (uint32_t i = 0, e = cache_state_.stack_height(); i < e; ++i) {
609     auto& slot = cache_state_.stack_state[i];
610     if (!slot.is_reg()) continue;
611     Spill(slot.offset(), slot.reg(), slot.type());
612     slot.MakeStack();
613   }
614   cache_state_.reset_used_registers();
615 }
616 
617 namespace {
PrepareStackTransfers(const FunctionSig * sig,compiler::CallDescriptor * call_descriptor,const VarState * slots,LiftoffStackSlots * stack_slots,StackTransferRecipe * stack_transfers,LiftoffRegList * param_regs)618 void PrepareStackTransfers(const FunctionSig* sig,
619                            compiler::CallDescriptor* call_descriptor,
620                            const VarState* slots,
621                            LiftoffStackSlots* stack_slots,
622                            StackTransferRecipe* stack_transfers,
623                            LiftoffRegList* param_regs) {
624   // Process parameters backwards, such that pushes of caller frame slots are
625   // in the correct order.
626   uint32_t call_desc_input_idx =
627       static_cast<uint32_t>(call_descriptor->InputCount());
628   uint32_t num_params = static_cast<uint32_t>(sig->parameter_count());
629   for (uint32_t i = num_params; i > 0; --i) {
630     const uint32_t param = i - 1;
631     ValueType type = sig->GetParam(param);
632     const bool is_gp_pair = kNeedI64RegPair && type == kWasmI64;
633     const int num_lowered_params = is_gp_pair ? 2 : 1;
634     const VarState& slot = slots[param];
635     const uint32_t stack_offset = slot.offset();
636     // Process both halfs of a register pair separately, because they are passed
637     // as separate parameters. One or both of them could end up on the stack.
638     for (int lowered_idx = 0; lowered_idx < num_lowered_params; ++lowered_idx) {
639       const RegPairHalf half =
640           is_gp_pair && lowered_idx == 0 ? kHighWord : kLowWord;
641       --call_desc_input_idx;
642       compiler::LinkageLocation loc =
643           call_descriptor->GetInputLocation(call_desc_input_idx);
644       if (loc.IsRegister()) {
645         DCHECK(!loc.IsAnyRegister());
646         RegClass rc = is_gp_pair ? kGpReg : reg_class_for(type);
647         int reg_code = loc.AsRegister();
648 
649         // Initialize to anything, will be set in all branches below.
650         LiftoffRegister reg = kGpCacheRegList.GetFirstRegSet();
651         if (!kSimpleFPAliasing && type == kWasmF32) {
652           // Liftoff assumes a one-to-one mapping between float registers and
653           // double registers, and so does not distinguish between f32 and f64
654           // registers. The f32 register code must therefore be halved in order
655           // to pass the f64 code to Liftoff.
656           DCHECK_EQ(0, reg_code % 2);
657           reg = LiftoffRegister::from_code(rc, (reg_code / 2));
658         } else if (kNeedS128RegPair && type == kWasmS128) {
659           // Similarly for double registers and SIMD registers, the SIMD code
660           // needs to be doubled to pass the f64 code to Liftoff.
661           reg = LiftoffRegister::ForFpPair(
662               DoubleRegister::from_code(reg_code * 2));
663         } else {
664           reg = LiftoffRegister::from_code(rc, reg_code);
665         }
666 
667         param_regs->set(reg);
668         if (is_gp_pair) {
669           stack_transfers->LoadI64HalfIntoRegister(reg, slot, stack_offset,
670                                                    half);
671         } else {
672           stack_transfers->LoadIntoRegister(reg, slot, stack_offset);
673         }
674       } else {
675         DCHECK(loc.IsCallerFrameSlot());
676         stack_slots->Add(slot, stack_offset, half);
677       }
678     }
679   }
680 }
681 
682 }  // namespace
683 
PrepareBuiltinCall(const FunctionSig * sig,compiler::CallDescriptor * call_descriptor,std::initializer_list<VarState> params)684 void LiftoffAssembler::PrepareBuiltinCall(
685     const FunctionSig* sig, compiler::CallDescriptor* call_descriptor,
686     std::initializer_list<VarState> params) {
687   LiftoffStackSlots stack_slots(this);
688   StackTransferRecipe stack_transfers(this);
689   LiftoffRegList param_regs;
690   PrepareStackTransfers(sig, call_descriptor, params.begin(), &stack_slots,
691                         &stack_transfers, &param_regs);
692   // Create all the slots.
693   stack_slots.Construct();
694   // Execute the stack transfers before filling the instance register.
695   stack_transfers.Execute();
696 
697   // Reset register use counters.
698   cache_state_.reset_used_registers();
699   SpillAllRegisters();
700 }
701 
PrepareCall(const FunctionSig * sig,compiler::CallDescriptor * call_descriptor,Register * target,Register * target_instance)702 void LiftoffAssembler::PrepareCall(const FunctionSig* sig,
703                                    compiler::CallDescriptor* call_descriptor,
704                                    Register* target,
705                                    Register* target_instance) {
706   uint32_t num_params = static_cast<uint32_t>(sig->parameter_count());
707   // Input 0 is the call target.
708   constexpr size_t kInputShift = 1;
709 
710   // Spill all cache slots which are not being used as parameters.
711   // Don't update any register use counters, they will be reset later anyway.
712   for (uint32_t idx = 0, end = cache_state_.stack_height() - num_params;
713        idx < end; ++idx) {
714     VarState& slot = cache_state_.stack_state[idx];
715     if (!slot.is_reg()) continue;
716     Spill(slot.offset(), slot.reg(), slot.type());
717     slot.MakeStack();
718   }
719 
720   LiftoffStackSlots stack_slots(this);
721   StackTransferRecipe stack_transfers(this);
722   LiftoffRegList param_regs;
723 
724   // Move the target instance (if supplied) into the correct instance register.
725   compiler::LinkageLocation instance_loc =
726       call_descriptor->GetInputLocation(kInputShift);
727   DCHECK(instance_loc.IsRegister() && !instance_loc.IsAnyRegister());
728   Register instance_reg = Register::from_code(instance_loc.AsRegister());
729   param_regs.set(instance_reg);
730   if (target_instance && *target_instance != instance_reg) {
731     stack_transfers.MoveRegister(LiftoffRegister(instance_reg),
732                                  LiftoffRegister(*target_instance),
733                                  kWasmIntPtr);
734   }
735 
736   if (num_params) {
737     uint32_t param_base = cache_state_.stack_height() - num_params;
738     PrepareStackTransfers(sig, call_descriptor,
739                           &cache_state_.stack_state[param_base], &stack_slots,
740                           &stack_transfers, &param_regs);
741   }
742 
743   // If the target register overlaps with a parameter register, then move the
744   // target to another free register, or spill to the stack.
745   if (target && param_regs.has(LiftoffRegister(*target))) {
746     // Try to find another free register.
747     LiftoffRegList free_regs = kGpCacheRegList.MaskOut(param_regs);
748     if (!free_regs.is_empty()) {
749       LiftoffRegister new_target = free_regs.GetFirstRegSet();
750       stack_transfers.MoveRegister(new_target, LiftoffRegister(*target),
751                                    kWasmIntPtr);
752       *target = new_target.gp();
753     } else {
754       stack_slots.Add(LiftoffAssembler::VarState(LiftoffAssembler::kWasmIntPtr,
755                                                  LiftoffRegister(*target), 0));
756       *target = no_reg;
757     }
758   }
759 
760   // Create all the slots.
761   stack_slots.Construct();
762   // Execute the stack transfers before filling the instance register.
763   stack_transfers.Execute();
764 
765   // Pop parameters from the value stack.
766   cache_state_.stack_state.pop_back(num_params);
767 
768   // Reset register use counters.
769   cache_state_.reset_used_registers();
770 
771   // Reload the instance from the stack.
772   if (!target_instance) {
773     FillInstanceInto(instance_reg);
774   }
775 }
776 
FinishCall(const FunctionSig * sig,compiler::CallDescriptor * call_descriptor)777 void LiftoffAssembler::FinishCall(const FunctionSig* sig,
778                                   compiler::CallDescriptor* call_descriptor) {
779   const size_t return_count = sig->return_count();
780   if (return_count != 0) {
781     DCHECK_EQ(1, return_count);
782     ValueType return_type = sig->GetReturn(0);
783     const bool needs_gp_pair = needs_gp_reg_pair(return_type);
784     const bool needs_fp_pair = needs_fp_reg_pair(return_type);
785     DCHECK_EQ(needs_gp_pair ? 2 : 1, call_descriptor->ReturnCount());
786     RegClass rc = needs_gp_pair
787                       ? kGpReg
788                       : needs_fp_pair ? kFpReg : reg_class_for(return_type);
789 #if V8_TARGET_ARCH_ARM
790     // If the return register was not d0 for f32, the code value would have to
791     // be halved as is done for the parameter registers.
792     DCHECK_EQ(call_descriptor->GetReturnLocation(0).AsRegister(), 0);
793 #endif
794     LiftoffRegister return_reg = LiftoffRegister::from_code(
795         rc, call_descriptor->GetReturnLocation(0).AsRegister());
796     DCHECK(GetCacheRegList(rc).has(return_reg));
797     if (needs_gp_pair) {
798       LiftoffRegister high_reg = LiftoffRegister::from_code(
799           rc, call_descriptor->GetReturnLocation(1).AsRegister());
800       DCHECK(GetCacheRegList(rc).has(high_reg));
801       return_reg = LiftoffRegister::ForPair(return_reg.gp(), high_reg.gp());
802     } else if (needs_fp_pair) {
803       DCHECK_EQ(0, return_reg.fp().code() % 2);
804       return_reg = LiftoffRegister::ForFpPair(return_reg.fp());
805     }
806     DCHECK(!cache_state_.is_used(return_reg));
807     PushRegister(return_type, return_reg);
808   }
809 }
810 
Move(LiftoffRegister dst,LiftoffRegister src,ValueType type)811 void LiftoffAssembler::Move(LiftoffRegister dst, LiftoffRegister src,
812                             ValueType type) {
813   DCHECK_EQ(dst.reg_class(), src.reg_class());
814   DCHECK_NE(dst, src);
815   if (kNeedI64RegPair && dst.is_gp_pair()) {
816     // Use the {StackTransferRecipe} to move pairs, as the registers in the
817     // pairs might overlap.
818     StackTransferRecipe(this).MoveRegister(dst, src, type);
819   } else if (dst.is_gp()) {
820     Move(dst.gp(), src.gp(), type);
821   } else {
822     Move(dst.fp(), src.fp(), type);
823   }
824 }
825 
ParallelRegisterMove(Vector<ParallelRegisterMoveTuple> tuples)826 void LiftoffAssembler::ParallelRegisterMove(
827     Vector<ParallelRegisterMoveTuple> tuples) {
828   StackTransferRecipe stack_transfers(this);
829   for (auto tuple : tuples) {
830     if (tuple.dst == tuple.src) continue;
831     stack_transfers.MoveRegister(tuple.dst, tuple.src, tuple.type);
832   }
833 }
834 
MoveToReturnRegisters(const FunctionSig * sig)835 void LiftoffAssembler::MoveToReturnRegisters(const FunctionSig* sig) {
836   // We do not support multi-value yet.
837   DCHECK_EQ(1, sig->return_count());
838   ValueType return_type = sig->GetReturn(0);
839   StackTransferRecipe stack_transfers(this);
840   // Defaults to a gp reg, will be set below if return type is not gp.
841   LiftoffRegister return_reg = LiftoffRegister(kGpReturnRegisters[0]);
842 
843   if (needs_gp_reg_pair(return_type)) {
844     return_reg =
845         LiftoffRegister::ForPair(kGpReturnRegisters[0], kGpReturnRegisters[1]);
846   } else if (needs_fp_reg_pair(return_type)) {
847     return_reg = LiftoffRegister::ForFpPair(kFpReturnRegisters[0]);
848   } else if (reg_class_for(return_type) == kFpReg) {
849     return_reg = LiftoffRegister(kFpReturnRegisters[0]);
850   } else {
851     DCHECK_EQ(kGpReg, reg_class_for(return_type));
852   }
853   stack_transfers.LoadIntoRegister(return_reg, cache_state_.stack_state.back(),
854                                    cache_state_.stack_state.back().offset());
855 }
856 
857 #ifdef ENABLE_SLOW_DCHECKS
ValidateCacheState() const858 bool LiftoffAssembler::ValidateCacheState() const {
859   uint32_t register_use_count[kAfterMaxLiftoffRegCode] = {0};
860   LiftoffRegList used_regs;
861   for (const VarState& var : cache_state_.stack_state) {
862     if (!var.is_reg()) continue;
863     LiftoffRegister reg = var.reg();
864     if ((kNeedI64RegPair || kNeedS128RegPair) && reg.is_pair()) {
865       ++register_use_count[reg.low().liftoff_code()];
866       ++register_use_count[reg.high().liftoff_code()];
867     } else {
868       ++register_use_count[reg.liftoff_code()];
869     }
870     used_regs.set(reg);
871   }
872   bool valid = memcmp(register_use_count, cache_state_.register_use_count,
873                       sizeof(register_use_count)) == 0 &&
874                used_regs == cache_state_.used_registers;
875   if (valid) return true;
876   std::ostringstream os;
877   os << "Error in LiftoffAssembler::ValidateCacheState().\n";
878   os << "expected: used_regs " << used_regs << ", counts "
879      << PrintCollection(register_use_count) << "\n";
880   os << "found:    used_regs " << cache_state_.used_registers << ", counts "
881      << PrintCollection(cache_state_.register_use_count) << "\n";
882   os << "Use --trace-wasm-decoder and --trace-liftoff to debug.";
883   FATAL("%s", os.str().c_str());
884 }
885 #endif
886 
SpillOneRegister(LiftoffRegList candidates,LiftoffRegList pinned)887 LiftoffRegister LiftoffAssembler::SpillOneRegister(LiftoffRegList candidates,
888                                                    LiftoffRegList pinned) {
889   // Spill one cached value to free a register.
890   LiftoffRegister spill_reg = cache_state_.GetNextSpillReg(candidates, pinned);
891   SpillRegister(spill_reg);
892   return spill_reg;
893 }
894 
SpillAdjacentFpRegisters(LiftoffRegList pinned)895 LiftoffRegister LiftoffAssembler::SpillAdjacentFpRegisters(
896     LiftoffRegList pinned) {
897   // We end up in this call only when:
898   // [1] kNeedS128RegPair, and
899   // [2] there are no pair of adjacent FP registers that are free
900   CHECK(kNeedS128RegPair);
901   DCHECK(!kFpCacheRegList.MaskOut(pinned)
902               .MaskOut(cache_state_.used_registers)
903               .HasAdjacentFpRegsSet());
904 
905   // Special logic, if the top fp register is even, we might hit a case of an
906   // invalid register in case 2.
907   LiftoffRegister last_fp = kFpCacheRegList.GetLastRegSet();
908   if (last_fp.fp().code() % 2 == 0) {
909     pinned.set(last_fp);
910   }
911 
912   // We can try to optimize the spilling here:
913   // 1. Try to get a free fp register, either:
914   //  a. This register is already free, or
915   //  b. it had to be spilled.
916   // 2. If 1a, the adjacent register is used (invariant [2]), spill it.
917   // 3. If 1b, check the adjacent register:
918   //  a. If free, done!
919   //  b. If used, spill it.
920   // We spill one register in 2 and 3a, and two registers in 3b.
921 
922   LiftoffRegister first_reg = GetUnusedRegister(kFpCacheRegList, pinned);
923   LiftoffRegister second_reg = first_reg, low_reg = first_reg;
924 
925   if (first_reg.fp().code() % 2 == 0) {
926     second_reg =
927         LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() + 1);
928   } else {
929     second_reg =
930         LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() - 1);
931     low_reg = second_reg;
932   }
933 
934   if (cache_state_.is_used(second_reg)) {
935     SpillRegister(second_reg);
936   }
937 
938   return low_reg;
939 }
940 
SpillRegister(LiftoffRegister reg)941 void LiftoffAssembler::SpillRegister(LiftoffRegister reg) {
942   int remaining_uses = cache_state_.get_use_count(reg);
943   DCHECK_LT(0, remaining_uses);
944   for (uint32_t idx = cache_state_.stack_height() - 1;; --idx) {
945     DCHECK_GT(cache_state_.stack_height(), idx);
946     auto* slot = &cache_state_.stack_state[idx];
947     if (!slot->is_reg() || !slot->reg().overlaps(reg)) continue;
948     if (slot->reg().is_pair()) {
949       // Make sure to decrement *both* registers in a pair, because the
950       // {clear_used} call below only clears one of them.
951       cache_state_.dec_used(slot->reg().low());
952       cache_state_.dec_used(slot->reg().high());
953     }
954     Spill(slot->offset(), slot->reg(), slot->type());
955     slot->MakeStack();
956     if (--remaining_uses == 0) break;
957   }
958   cache_state_.clear_used(reg);
959 }
960 
set_num_locals(uint32_t num_locals)961 void LiftoffAssembler::set_num_locals(uint32_t num_locals) {
962   DCHECK_EQ(0, num_locals_);  // only call this once.
963   num_locals_ = num_locals;
964   if (num_locals > kInlineLocalTypes) {
965     more_local_types_ =
966         reinterpret_cast<ValueType*>(malloc(num_locals * sizeof(ValueType)));
967     DCHECK_NOT_NULL(more_local_types_);
968   }
969 }
970 
operator <<(std::ostream & os,VarState slot)971 std::ostream& operator<<(std::ostream& os, VarState slot) {
972   os << slot.type().type_name() << ":";
973   switch (slot.loc()) {
974     case VarState::kStack:
975       return os << "s";
976     case VarState::kRegister:
977       return os << slot.reg();
978     case VarState::kIntConst:
979       return os << "c" << slot.i32_const();
980   }
981   UNREACHABLE();
982 }
983 
984 }  // namespace wasm
985 }  // namespace internal
986 }  // namespace v8
987