1 // Copyright 2015, ARM Limited
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_A64_ASSEMBLER_A64_H_
28 #define VIXL_A64_ASSEMBLER_A64_H_
29 
30 #include "jit/arm64/vixl/Cpu-vixl.h"
31 #include "jit/arm64/vixl/Globals-vixl.h"
32 #include "jit/arm64/vixl/Instructions-vixl.h"
33 #include "jit/arm64/vixl/MozBaseAssembler-vixl.h"
34 #include "jit/arm64/vixl/Utils-vixl.h"
35 
36 #include "jit/JitSpewer.h"
37 
38 #include "jit/shared/Assembler-shared.h"
39 #include "jit/shared/Disassembler-shared.h"
40 #include "jit/shared/IonAssemblerBufferWithConstantPools.h"
41 
42 #if defined(_M_ARM64)
43 #ifdef mvn
44 #undef mvn
45 #endif
46 #endif
47 
48 namespace vixl {
49 
50 using js::jit::BufferOffset;
51 using js::jit::Label;
52 using js::jit::Address;
53 using js::jit::BaseIndex;
54 using js::jit::DisassemblerSpew;
55 
56 using LabelDoc = DisassemblerSpew::LabelDoc;
57 
58 typedef uint64_t RegList;
59 static const int kRegListSizeInBits = sizeof(RegList) * 8;
60 
61 
62 // Registers.
63 
64 // Some CPURegister methods can return Register or VRegister types, so we need
65 // to declare them in advance.
66 class Register;
67 class VRegister;
68 
69 class CPURegister {
70  public:
71   enum RegisterType {
72     // The kInvalid value is used to detect uninitialized static instances,
73     // which are always zero-initialized before any constructors are called.
74     kInvalid = 0,
75     kRegister,
76     kVRegister,
77     kFPRegister = kVRegister,
78     kNoRegister
79   };
80 
CPURegister()81   constexpr CPURegister() : code_(0), size_(0), type_(kNoRegister) {
82   }
83 
CPURegister(unsigned code,unsigned size,RegisterType type)84   constexpr CPURegister(unsigned code, unsigned size, RegisterType type)
85       : code_(code), size_(size), type_(type) {
86   }
87 
code()88   unsigned code() const {
89     VIXL_ASSERT(IsValid());
90     return code_;
91   }
92 
type()93   RegisterType type() const {
94     VIXL_ASSERT(IsValidOrNone());
95     return type_;
96   }
97 
Bit()98   RegList Bit() const {
99     VIXL_ASSERT(code_ < (sizeof(RegList) * 8));
100     return IsValid() ? (static_cast<RegList>(1) << code_) : 0;
101   }
102 
size()103   unsigned size() const {
104     VIXL_ASSERT(IsValid());
105     return size_;
106   }
107 
SizeInBytes()108   int SizeInBytes() const {
109     VIXL_ASSERT(IsValid());
110     VIXL_ASSERT(size() % 8 == 0);
111     return size_ / 8;
112   }
113 
SizeInBits()114   int SizeInBits() const {
115     VIXL_ASSERT(IsValid());
116     return size_;
117   }
118 
Is8Bits()119   bool Is8Bits() const {
120     VIXL_ASSERT(IsValid());
121     return size_ == 8;
122   }
123 
Is16Bits()124   bool Is16Bits() const {
125     VIXL_ASSERT(IsValid());
126     return size_ == 16;
127   }
128 
Is32Bits()129   bool Is32Bits() const {
130     VIXL_ASSERT(IsValid());
131     return size_ == 32;
132   }
133 
Is64Bits()134   bool Is64Bits() const {
135     VIXL_ASSERT(IsValid());
136     return size_ == 64;
137   }
138 
Is128Bits()139   bool Is128Bits() const {
140     VIXL_ASSERT(IsValid());
141     return size_ == 128;
142   }
143 
IsValid()144   bool IsValid() const {
145     if (IsValidRegister() || IsValidVRegister()) {
146       VIXL_ASSERT(!IsNone());
147       return true;
148     } else {
149       // This assert is hit when the register has not been properly initialized.
150       // One cause for this can be an initialisation order fiasco. See
151       // https://isocpp.org/wiki/faq/ctors#static-init-order for some details.
152       VIXL_ASSERT(IsNone());
153       return false;
154     }
155   }
156 
IsValidRegister()157   bool IsValidRegister() const {
158     return IsRegister() &&
159            ((size_ == kWRegSize) || (size_ == kXRegSize)) &&
160            ((code_ < kNumberOfRegisters) || (code_ == kSPRegInternalCode));
161   }
162 
IsValidVRegister()163   bool IsValidVRegister() const {
164     return IsVRegister() &&
165            ((size_ == kBRegSize) || (size_ == kHRegSize) ||
166             (size_ == kSRegSize) || (size_ == kDRegSize) ||
167             (size_ == kQRegSize)) &&
168            (code_ < kNumberOfVRegisters);
169   }
170 
IsValidFPRegister()171   bool IsValidFPRegister() const {
172     return IsFPRegister() && (code_ < kNumberOfVRegisters);
173   }
174 
IsNone()175   bool IsNone() const {
176     // kNoRegister types should always have size 0 and code 0.
177     VIXL_ASSERT((type_ != kNoRegister) || (code_ == 0));
178     VIXL_ASSERT((type_ != kNoRegister) || (size_ == 0));
179 
180     return type_ == kNoRegister;
181   }
182 
Aliases(const CPURegister & other)183   bool Aliases(const CPURegister& other) const {
184     VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
185     return (code_ == other.code_) && (type_ == other.type_);
186   }
187 
Is(const CPURegister & other)188   bool Is(const CPURegister& other) const {
189     VIXL_ASSERT(IsValidOrNone() && other.IsValidOrNone());
190     return Aliases(other) && (size_ == other.size_);
191   }
192 
IsZero()193   bool IsZero() const {
194     VIXL_ASSERT(IsValid());
195     return IsRegister() && (code_ == kZeroRegCode);
196   }
197 
IsSP()198   bool IsSP() const {
199     VIXL_ASSERT(IsValid());
200     return IsRegister() && (code_ == kSPRegInternalCode);
201   }
202 
IsRegister()203   bool IsRegister() const {
204     return type_ == kRegister;
205   }
206 
IsVRegister()207   bool IsVRegister() const {
208     return type_ == kVRegister;
209   }
210 
IsFPRegister()211   bool IsFPRegister() const {
212     return IsS() || IsD();
213   }
214 
IsW()215   bool IsW() const { return IsValidRegister() && Is32Bits(); }
IsX()216   bool IsX() const { return IsValidRegister() && Is64Bits(); }
217 
218   // These assertions ensure that the size and type of the register are as
219   // described. They do not consider the number of lanes that make up a vector.
220   // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD()
221   // does not imply Is1D() or Is8B().
222   // Check the number of lanes, ie. the format of the vector, using methods such
223   // as Is8B(), Is1D(), etc. in the VRegister class.
IsV()224   bool IsV() const { return IsVRegister(); }
IsB()225   bool IsB() const { return IsV() && Is8Bits(); }
IsH()226   bool IsH() const { return IsV() && Is16Bits(); }
IsS()227   bool IsS() const { return IsV() && Is32Bits(); }
IsD()228   bool IsD() const { return IsV() && Is64Bits(); }
IsQ()229   bool IsQ() const { return IsV() && Is128Bits(); }
230 
231   const Register& W() const;
232   const Register& X() const;
233   const VRegister& V() const;
234   const VRegister& B() const;
235   const VRegister& H() const;
236   const VRegister& S() const;
237   const VRegister& D() const;
238   const VRegister& Q() const;
239 
IsSameSizeAndType(const CPURegister & other)240   bool IsSameSizeAndType(const CPURegister& other) const {
241     return (size_ == other.size_) && (type_ == other.type_);
242   }
243 
244  protected:
245   unsigned code_;
246   unsigned size_;
247   RegisterType type_;
248 
249  private:
IsValidOrNone()250   bool IsValidOrNone() const {
251     return IsValid() || IsNone();
252   }
253 };
254 
255 
256 class Register : public CPURegister {
257  public:
Register()258   Register() : CPURegister() {}
Register(const CPURegister & other)259   explicit Register(const CPURegister& other)
260       : CPURegister(other.code(), other.size(), other.type()) {
261     VIXL_ASSERT(IsValidRegister());
262   }
Register(unsigned code,unsigned size)263   constexpr Register(unsigned code, unsigned size)
264       : CPURegister(code, size, kRegister) {}
265 
Register(js::jit::Register r,unsigned size)266   constexpr Register(js::jit::Register r, unsigned size)
267     : CPURegister(r.code(), size, kRegister) {}
268 
IsValid()269   bool IsValid() const {
270     VIXL_ASSERT(IsRegister() || IsNone());
271     return IsValidRegister();
272   }
273 
asUnsized()274   js::jit::Register asUnsized() const {
275     // asUnsized() is only ever used on temp registers or on registers that
276     // are known not to be SP, and there should be no risk of it being
277     // applied to SP.  Check anyway.
278     VIXL_ASSERT(code_ != kSPRegInternalCode);
279     return js::jit::Register::FromCode((js::jit::Register::Code)code_);
280   }
281 
282 
283   static const Register& WRegFromCode(unsigned code);
284   static const Register& XRegFromCode(unsigned code);
285 
286  private:
287   static const Register wregisters[];
288   static const Register xregisters[];
289 };
290 
291 
292 class VRegister : public CPURegister {
293  public:
VRegister()294   VRegister() : CPURegister(), lanes_(1) {}
VRegister(const CPURegister & other)295   explicit VRegister(const CPURegister& other)
296       : CPURegister(other.code(), other.size(), other.type()), lanes_(1) {
297     VIXL_ASSERT(IsValidVRegister());
298     VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
299   }
300   constexpr VRegister(unsigned code, unsigned size, unsigned lanes = 1)
CPURegister(code,size,kVRegister)301       : CPURegister(code, size, kVRegister), lanes_(lanes) {
302     // VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
303   }
VRegister(js::jit::FloatRegister r)304   constexpr VRegister(js::jit::FloatRegister r)
305       : CPURegister(r.encoding(), r.size() * 8, kVRegister), lanes_(1) {
306   }
VRegister(js::jit::FloatRegister r,unsigned size)307   constexpr VRegister(js::jit::FloatRegister r, unsigned size)
308       : CPURegister(r.encoding(), size, kVRegister), lanes_(1) {
309   }
VRegister(unsigned code,VectorFormat format)310   VRegister(unsigned code, VectorFormat format)
311       : CPURegister(code, RegisterSizeInBitsFromFormat(format), kVRegister),
312         lanes_(IsVectorFormat(format) ? LaneCountFromFormat(format) : 1) {
313     VIXL_ASSERT(IsPowerOf2(lanes_) && (lanes_ <= 16));
314   }
315 
IsValid()316   bool IsValid() const {
317     VIXL_ASSERT(IsVRegister() || IsNone());
318     return IsValidVRegister();
319   }
320 
321   static const VRegister& BRegFromCode(unsigned code);
322   static const VRegister& HRegFromCode(unsigned code);
323   static const VRegister& SRegFromCode(unsigned code);
324   static const VRegister& DRegFromCode(unsigned code);
325   static const VRegister& QRegFromCode(unsigned code);
326   static const VRegister& VRegFromCode(unsigned code);
327 
V8B()328   VRegister V8B() const { return VRegister(code_, kDRegSize, 8); }
V16B()329   VRegister V16B() const { return VRegister(code_, kQRegSize, 16); }
V4H()330   VRegister V4H() const { return VRegister(code_, kDRegSize, 4); }
V8H()331   VRegister V8H() const { return VRegister(code_, kQRegSize, 8); }
V2S()332   VRegister V2S() const { return VRegister(code_, kDRegSize, 2); }
V4S()333   VRegister V4S() const { return VRegister(code_, kQRegSize, 4); }
V2D()334   VRegister V2D() const { return VRegister(code_, kQRegSize, 2); }
V1D()335   VRegister V1D() const { return VRegister(code_, kDRegSize, 1); }
336 
Is8B()337   bool Is8B() const { return (Is64Bits() && (lanes_ == 8)); }
Is16B()338   bool Is16B() const { return (Is128Bits() && (lanes_ == 16)); }
Is4H()339   bool Is4H() const { return (Is64Bits() && (lanes_ == 4)); }
Is8H()340   bool Is8H() const { return (Is128Bits() && (lanes_ == 8)); }
Is2S()341   bool Is2S() const { return (Is64Bits() && (lanes_ == 2)); }
Is4S()342   bool Is4S() const { return (Is128Bits() && (lanes_ == 4)); }
Is1D()343   bool Is1D() const { return (Is64Bits() && (lanes_ == 1)); }
Is2D()344   bool Is2D() const { return (Is128Bits() && (lanes_ == 2)); }
345 
346   // For consistency, we assert the number of lanes of these scalar registers,
347   // even though there are no vectors of equivalent total size with which they
348   // could alias.
Is1B()349   bool Is1B() const {
350     VIXL_ASSERT(!(Is8Bits() && IsVector()));
351     return Is8Bits();
352   }
Is1H()353   bool Is1H() const {
354     VIXL_ASSERT(!(Is16Bits() && IsVector()));
355     return Is16Bits();
356   }
Is1S()357   bool Is1S() const {
358     VIXL_ASSERT(!(Is32Bits() && IsVector()));
359     return Is32Bits();
360   }
361 
IsLaneSizeB()362   bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSize; }
IsLaneSizeH()363   bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSize; }
IsLaneSizeS()364   bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSize; }
IsLaneSizeD()365   bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSize; }
366 
lanes()367   int lanes() const {
368     return lanes_;
369   }
370 
IsScalar()371   bool IsScalar() const {
372     return lanes_ == 1;
373   }
374 
IsVector()375   bool IsVector() const {
376     return lanes_ > 1;
377   }
378 
IsSameFormat(const VRegister & other)379   bool IsSameFormat(const VRegister& other) const {
380     return (size_ == other.size_) && (lanes_ == other.lanes_);
381   }
382 
LaneSizeInBytes()383   unsigned LaneSizeInBytes() const {
384     return SizeInBytes() / lanes_;
385   }
386 
LaneSizeInBits()387   unsigned LaneSizeInBits() const {
388     return LaneSizeInBytes() * 8;
389   }
390 
391  private:
392   static const VRegister bregisters[];
393   static const VRegister hregisters[];
394   static const VRegister sregisters[];
395   static const VRegister dregisters[];
396   static const VRegister qregisters[];
397   static const VRegister vregisters[];
398   int lanes_;
399 };
400 
401 
402 // Backward compatibility for FPRegisters.
403 typedef VRegister FPRegister;
404 
405 // No*Reg is used to indicate an unused argument, or an error case. Note that
406 // these all compare equal (using the Is() method). The Register and VRegister
407 // variants are provided for convenience.
408 const Register NoReg;
409 const VRegister NoVReg;
410 const FPRegister NoFPReg;  // For backward compatibility.
411 const CPURegister NoCPUReg;
412 
413 
414 #define DEFINE_REGISTERS(N)  \
415 constexpr Register w##N(N, kWRegSize);  \
416 constexpr Register x##N(N, kXRegSize);
417 REGISTER_CODE_LIST(DEFINE_REGISTERS)
418 #undef DEFINE_REGISTERS
419 constexpr Register wsp(kSPRegInternalCode, kWRegSize);
420 constexpr Register sp(kSPRegInternalCode, kXRegSize);
421 
422 
423 #define DEFINE_VREGISTERS(N)  \
424 constexpr VRegister b##N(N, kBRegSize);  \
425 constexpr VRegister h##N(N, kHRegSize);  \
426 constexpr VRegister s##N(N, kSRegSize);  \
427 constexpr VRegister d##N(N, kDRegSize);  \
428 constexpr VRegister q##N(N, kQRegSize);  \
429 constexpr VRegister v##N(N, kQRegSize);
430 REGISTER_CODE_LIST(DEFINE_VREGISTERS)
431 #undef DEFINE_VREGISTERS
432 
433 
434 // Registers aliases.
435 constexpr Register ip0 = x16;
436 constexpr Register ip1 = x17;
437 constexpr Register lr = x30;
438 constexpr Register xzr = x31;
439 constexpr Register wzr = w31;
440 
441 
442 // AreAliased returns true if any of the named registers overlap. Arguments
443 // set to NoReg are ignored. The system stack pointer may be specified.
444 bool AreAliased(const CPURegister& reg1,
445                 const CPURegister& reg2,
446                 const CPURegister& reg3 = NoReg,
447                 const CPURegister& reg4 = NoReg,
448                 const CPURegister& reg5 = NoReg,
449                 const CPURegister& reg6 = NoReg,
450                 const CPURegister& reg7 = NoReg,
451                 const CPURegister& reg8 = NoReg);
452 
453 
454 // AreSameSizeAndType returns true if all of the specified registers have the
455 // same size, and are of the same type. The system stack pointer may be
456 // specified. Arguments set to NoReg are ignored, as are any subsequent
457 // arguments. At least one argument (reg1) must be valid (not NoCPUReg).
458 bool AreSameSizeAndType(const CPURegister& reg1,
459                         const CPURegister& reg2,
460                         const CPURegister& reg3 = NoCPUReg,
461                         const CPURegister& reg4 = NoCPUReg,
462                         const CPURegister& reg5 = NoCPUReg,
463                         const CPURegister& reg6 = NoCPUReg,
464                         const CPURegister& reg7 = NoCPUReg,
465                         const CPURegister& reg8 = NoCPUReg);
466 
467 
468 // AreSameFormat returns true if all of the specified VRegisters have the same
469 // vector format. Arguments set to NoReg are ignored, as are any subsequent
470 // arguments. At least one argument (reg1) must be valid (not NoVReg).
471 bool AreSameFormat(const VRegister& reg1,
472                    const VRegister& reg2,
473                    const VRegister& reg3 = NoVReg,
474                    const VRegister& reg4 = NoVReg);
475 
476 
477 // AreConsecutive returns true if all of the specified VRegisters are
478 // consecutive in the register file. Arguments set to NoReg are ignored, as are
479 // any subsequent arguments. At least one argument (reg1) must be valid
480 // (not NoVReg).
481 bool AreConsecutive(const VRegister& reg1,
482                     const VRegister& reg2,
483                     const VRegister& reg3 = NoVReg,
484                     const VRegister& reg4 = NoVReg);
485 
486 
487 // Lists of registers.
488 class CPURegList {
489  public:
490   explicit CPURegList(CPURegister reg1,
491                       CPURegister reg2 = NoCPUReg,
492                       CPURegister reg3 = NoCPUReg,
493                       CPURegister reg4 = NoCPUReg)
494       : list_(reg1.Bit() | reg2.Bit() | reg3.Bit() | reg4.Bit()),
495         size_(reg1.size()), type_(reg1.type()) {
496     VIXL_ASSERT(AreSameSizeAndType(reg1, reg2, reg3, reg4));
497     VIXL_ASSERT(IsValid());
498   }
499 
CPURegList(CPURegister::RegisterType type,unsigned size,RegList list)500   CPURegList(CPURegister::RegisterType type, unsigned size, RegList list)
501       : list_(list), size_(size), type_(type) {
502     VIXL_ASSERT(IsValid());
503   }
504 
CPURegList(CPURegister::RegisterType type,unsigned size,unsigned first_reg,unsigned last_reg)505   CPURegList(CPURegister::RegisterType type, unsigned size,
506              unsigned first_reg, unsigned last_reg)
507       : size_(size), type_(type) {
508     VIXL_ASSERT(((type == CPURegister::kRegister) &&
509                  (last_reg < kNumberOfRegisters)) ||
510                 ((type == CPURegister::kVRegister) &&
511                  (last_reg < kNumberOfVRegisters)));
512     VIXL_ASSERT(last_reg >= first_reg);
513     list_ = (UINT64_C(1) << (last_reg + 1)) - 1;
514     list_ &= ~((UINT64_C(1) << first_reg) - 1);
515     VIXL_ASSERT(IsValid());
516   }
517 
type()518   CPURegister::RegisterType type() const {
519     VIXL_ASSERT(IsValid());
520     return type_;
521   }
522 
523   // Combine another CPURegList into this one. Registers that already exist in
524   // this list are left unchanged. The type and size of the registers in the
525   // 'other' list must match those in this list.
Combine(const CPURegList & other)526   void Combine(const CPURegList& other) {
527     VIXL_ASSERT(IsValid());
528     VIXL_ASSERT(other.type() == type_);
529     VIXL_ASSERT(other.RegisterSizeInBits() == size_);
530     list_ |= other.list();
531   }
532 
533   // Remove every register in the other CPURegList from this one. Registers that
534   // do not exist in this list are ignored. The type and size of the registers
535   // in the 'other' list must match those in this list.
Remove(const CPURegList & other)536   void Remove(const CPURegList& other) {
537     VIXL_ASSERT(IsValid());
538     VIXL_ASSERT(other.type() == type_);
539     VIXL_ASSERT(other.RegisterSizeInBits() == size_);
540     list_ &= ~other.list();
541   }
542 
543   // Variants of Combine and Remove which take a single register.
Combine(const CPURegister & other)544   void Combine(const CPURegister& other) {
545     VIXL_ASSERT(other.type() == type_);
546     VIXL_ASSERT(other.size() == size_);
547     Combine(other.code());
548   }
549 
Remove(const CPURegister & other)550   void Remove(const CPURegister& other) {
551     VIXL_ASSERT(other.type() == type_);
552     VIXL_ASSERT(other.size() == size_);
553     Remove(other.code());
554   }
555 
556   // Variants of Combine and Remove which take a single register by its code;
557   // the type and size of the register is inferred from this list.
Combine(int code)558   void Combine(int code) {
559     VIXL_ASSERT(IsValid());
560     VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
561     list_ |= (UINT64_C(1) << code);
562   }
563 
Remove(int code)564   void Remove(int code) {
565     VIXL_ASSERT(IsValid());
566     VIXL_ASSERT(CPURegister(code, size_, type_).IsValid());
567     list_ &= ~(UINT64_C(1) << code);
568   }
569 
Union(const CPURegList & list_1,const CPURegList & list_2)570   static CPURegList Union(const CPURegList& list_1, const CPURegList& list_2) {
571     VIXL_ASSERT(list_1.type_ == list_2.type_);
572     VIXL_ASSERT(list_1.size_ == list_2.size_);
573     return CPURegList(list_1.type_, list_1.size_, list_1.list_ | list_2.list_);
574   }
575   static CPURegList Union(const CPURegList& list_1,
576                           const CPURegList& list_2,
577                           const CPURegList& list_3);
578   static CPURegList Union(const CPURegList& list_1,
579                           const CPURegList& list_2,
580                           const CPURegList& list_3,
581                           const CPURegList& list_4);
582 
Intersection(const CPURegList & list_1,const CPURegList & list_2)583   static CPURegList Intersection(const CPURegList& list_1,
584                                  const CPURegList& list_2) {
585     VIXL_ASSERT(list_1.type_ == list_2.type_);
586     VIXL_ASSERT(list_1.size_ == list_2.size_);
587     return CPURegList(list_1.type_, list_1.size_, list_1.list_ & list_2.list_);
588   }
589   static CPURegList Intersection(const CPURegList& list_1,
590                                  const CPURegList& list_2,
591                                  const CPURegList& list_3);
592   static CPURegList Intersection(const CPURegList& list_1,
593                                  const CPURegList& list_2,
594                                  const CPURegList& list_3,
595                                  const CPURegList& list_4);
596 
Overlaps(const CPURegList & other)597   bool Overlaps(const CPURegList& other) const {
598     return (type_ == other.type_) && ((list_ & other.list_) != 0);
599   }
600 
list()601   RegList list() const {
602     VIXL_ASSERT(IsValid());
603     return list_;
604   }
605 
set_list(RegList new_list)606   void set_list(RegList new_list) {
607     VIXL_ASSERT(IsValid());
608     list_ = new_list;
609   }
610 
611   // Remove all callee-saved registers from the list. This can be useful when
612   // preparing registers for an AAPCS64 function call, for example.
613   void RemoveCalleeSaved();
614 
615   CPURegister PopLowestIndex();
616   CPURegister PopHighestIndex();
617 
618   // AAPCS64 callee-saved registers.
619   static CPURegList GetCalleeSaved(unsigned size = kXRegSize);
620   static CPURegList GetCalleeSavedV(unsigned size = kDRegSize);
621 
622   // AAPCS64 caller-saved registers. Note that this includes lr.
623   // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top
624   // 64-bits being caller-saved.
625   static CPURegList GetCallerSaved(unsigned size = kXRegSize);
626   static CPURegList GetCallerSavedV(unsigned size = kDRegSize);
627 
IsEmpty()628   bool IsEmpty() const {
629     VIXL_ASSERT(IsValid());
630     return list_ == 0;
631   }
632 
IncludesAliasOf(const CPURegister & other)633   bool IncludesAliasOf(const CPURegister& other) const {
634     VIXL_ASSERT(IsValid());
635     return (type_ == other.type()) && ((other.Bit() & list_) != 0);
636   }
637 
IncludesAliasOf(int code)638   bool IncludesAliasOf(int code) const {
639     VIXL_ASSERT(IsValid());
640     return ((code & list_) != 0);
641   }
642 
Count()643   int Count() const {
644     VIXL_ASSERT(IsValid());
645     return CountSetBits(list_);
646   }
647 
RegisterSizeInBits()648   unsigned RegisterSizeInBits() const {
649     VIXL_ASSERT(IsValid());
650     return size_;
651   }
652 
RegisterSizeInBytes()653   unsigned RegisterSizeInBytes() const {
654     int size_in_bits = RegisterSizeInBits();
655     VIXL_ASSERT((size_in_bits % 8) == 0);
656     return size_in_bits / 8;
657   }
658 
TotalSizeInBytes()659   unsigned TotalSizeInBytes() const {
660     VIXL_ASSERT(IsValid());
661     return RegisterSizeInBytes() * Count();
662   }
663 
664  private:
665   RegList list_;
666   unsigned size_;
667   CPURegister::RegisterType type_;
668 
669   bool IsValid() const;
670 };
671 
672 
673 // AAPCS64 callee-saved registers.
674 extern const CPURegList kCalleeSaved;
675 extern const CPURegList kCalleeSavedV;
676 
677 
678 // AAPCS64 caller-saved registers. Note that this includes lr.
679 extern const CPURegList kCallerSaved;
680 extern const CPURegList kCallerSavedV;
681 
682 
683 // Operand.
684 class Operand {
685  public:
686   // #<immediate>
687   // where <immediate> is int64_t.
688   // This is allowed to be an implicit constructor because Operand is
689   // a wrapper class that doesn't normally perform any type conversion.
690   Operand(int64_t immediate = 0);           // NOLINT(runtime/explicit)
691 
692   // rm, {<shift> #<shift_amount>}
693   // where <shift> is one of {LSL, LSR, ASR, ROR}.
694   //       <shift_amount> is uint6_t.
695   // This is allowed to be an implicit constructor because Operand is
696   // a wrapper class that doesn't normally perform any type conversion.
697   Operand(Register reg,
698           Shift shift = LSL,
699           unsigned shift_amount = 0);   // NOLINT(runtime/explicit)
700 
701   // rm, {<extend> {#<shift_amount>}}
702   // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}.
703   //       <shift_amount> is uint2_t.
704   explicit Operand(Register reg, Extend extend, unsigned shift_amount = 0);
705 
706   bool IsImmediate() const;
707   bool IsShiftedRegister() const;
708   bool IsExtendedRegister() const;
709   bool IsZero() const;
710 
711   // This returns an LSL shift (<= 4) operand as an equivalent extend operand,
712   // which helps in the encoding of instructions that use the stack pointer.
713   Operand ToExtendedRegister() const;
714 
immediate()715   int64_t immediate() const {
716     VIXL_ASSERT(IsImmediate());
717     return immediate_;
718   }
719 
reg()720   Register reg() const {
721     VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
722     return reg_;
723   }
724 
maybeReg()725   CPURegister maybeReg() const {
726     if (IsShiftedRegister() || IsExtendedRegister())
727       return reg_;
728     return NoCPUReg;
729   }
730 
shift()731   Shift shift() const {
732     VIXL_ASSERT(IsShiftedRegister());
733     return shift_;
734   }
735 
extend()736   Extend extend() const {
737     VIXL_ASSERT(IsExtendedRegister());
738     return extend_;
739   }
740 
shift_amount()741   unsigned shift_amount() const {
742     VIXL_ASSERT(IsShiftedRegister() || IsExtendedRegister());
743     return shift_amount_;
744   }
745 
746  private:
747   int64_t immediate_;
748   Register reg_;
749   Shift shift_;
750   Extend extend_;
751   unsigned shift_amount_;
752 };
753 
754 
755 // MemOperand represents the addressing mode of a load or store instruction.
756 class MemOperand {
757  public:
758   explicit MemOperand(Register base,
759                       int64_t offset = 0,
760                       AddrMode addrmode = Offset);
761   MemOperand(Register base,
762              Register regoffset,
763              Shift shift = LSL,
764              unsigned shift_amount = 0);
765   MemOperand(Register base,
766              Register regoffset,
767              Extend extend,
768              unsigned shift_amount = 0);
769   MemOperand(Register base,
770              const Operand& offset,
771              AddrMode addrmode = Offset);
772 
773   // Adapter constructors using C++11 delegating.
774   // TODO: If sp == kSPRegInternalCode, the xzr check isn't necessary.
MemOperand(js::jit::Address addr)775   explicit MemOperand(js::jit::Address addr)
776     : MemOperand(IsHiddenSP(addr.base) ? sp : Register(AsRegister(addr.base), 64),
777                  (ptrdiff_t)addr.offset) {
778   }
779 
base()780   const Register& base() const { return base_; }
regoffset()781   const Register& regoffset() const { return regoffset_; }
offset()782   int64_t offset() const { return offset_; }
addrmode()783   AddrMode addrmode() const { return addrmode_; }
shift()784   Shift shift() const { return shift_; }
extend()785   Extend extend() const { return extend_; }
shift_amount()786   unsigned shift_amount() const { return shift_amount_; }
787   bool IsImmediateOffset() const;
788   bool IsRegisterOffset() const;
789   bool IsPreIndex() const;
790   bool IsPostIndex() const;
791 
792   void AddOffset(int64_t offset);
793 
794  private:
795   Register base_;
796   Register regoffset_;
797   int64_t offset_;
798   AddrMode addrmode_;
799   Shift shift_;
800   Extend extend_;
801   unsigned shift_amount_;
802 };
803 
804 
805 // Control whether or not position-independent code should be emitted.
806 enum PositionIndependentCodeOption {
807   // All code generated will be position-independent; all branches and
808   // references to labels generated with the Label class will use PC-relative
809   // addressing.
810   PositionIndependentCode,
811 
812   // Allow VIXL to generate code that refers to absolute addresses. With this
813   // option, it will not be possible to copy the code buffer and run it from a
814   // different address; code must be generated in its final location.
815   PositionDependentCode,
816 
817   // Allow VIXL to assume that the bottom 12 bits of the address will be
818   // constant, but that the top 48 bits may change. This allows `adrp` to
819   // function in systems which copy code between pages, but otherwise maintain
820   // 4KB page alignment.
821   PageOffsetDependentCode
822 };
823 
824 
825 // Control how scaled- and unscaled-offset loads and stores are generated.
826 enum LoadStoreScalingOption {
827   // Prefer scaled-immediate-offset instructions, but emit unscaled-offset,
828   // register-offset, pre-index or post-index instructions if necessary.
829   PreferScaledOffset,
830 
831   // Prefer unscaled-immediate-offset instructions, but emit scaled-offset,
832   // register-offset, pre-index or post-index instructions if necessary.
833   PreferUnscaledOffset,
834 
835   // Require scaled-immediate-offset instructions.
836   RequireScaledOffset,
837 
838   // Require unscaled-immediate-offset instructions.
839   RequireUnscaledOffset
840 };
841 
842 
843 // Assembler.
844 class Assembler : public MozBaseAssembler {
845  public:
846   Assembler(PositionIndependentCodeOption pic = PositionIndependentCode);
847 
848   // System functions.
849 
850   // Finalize a code buffer of generated instructions. This function must be
851   // called before executing or copying code from the buffer.
852   void FinalizeCode();
853 
854 #define COPYENUM(v) static const Condition v = vixl::v
855 #define COPYENUM_(v) static const Condition v = vixl::v##_
856   COPYENUM(Equal);
857   COPYENUM(Zero);
858   COPYENUM(NotEqual);
859   COPYENUM(NonZero);
860   COPYENUM(AboveOrEqual);
861   COPYENUM(CarrySet);
862   COPYENUM(Below);
863   COPYENUM(CarryClear);
864   COPYENUM(Signed);
865   COPYENUM(NotSigned);
866   COPYENUM(Overflow);
867   COPYENUM(NoOverflow);
868   COPYENUM(Above);
869   COPYENUM(BelowOrEqual);
870   COPYENUM_(GreaterThanOrEqual);
871   COPYENUM_(LessThan);
872   COPYENUM_(GreaterThan);
873   COPYENUM_(LessThanOrEqual);
874   COPYENUM(Always);
875   COPYENUM(Never);
876 #undef COPYENUM
877 #undef COPYENUM_
878 
879   // Bit set when a DoubleCondition does not map to a single ARM condition.
880   // The MacroAssembler must special-case these conditions, or else
881   // ConditionFromDoubleCondition will complain.
882   static const int DoubleConditionBitSpecial = 0x100;
883 
884   enum DoubleCondition {
885     DoubleOrdered                        = Condition::vc,
886     DoubleEqual                          = Condition::eq,
887     DoubleNotEqual                       = Condition::ne | DoubleConditionBitSpecial,
888     DoubleGreaterThan                    = Condition::gt,
889     DoubleGreaterThanOrEqual             = Condition::ge,
890     DoubleLessThan                       = Condition::lo, // Could also use Condition::mi.
891     DoubleLessThanOrEqual                = Condition::ls,
892 
893     // If either operand is NaN, these conditions always evaluate to true.
894     DoubleUnordered                      = Condition::vs,
895     DoubleEqualOrUnordered               = Condition::eq | DoubleConditionBitSpecial,
896     DoubleNotEqualOrUnordered            = Condition::ne,
897     DoubleGreaterThanOrUnordered         = Condition::hi,
898     DoubleGreaterThanOrEqualOrUnordered  = Condition::hs,
899     DoubleLessThanOrUnordered            = Condition::lt,
900     DoubleLessThanOrEqualOrUnordered     = Condition::le
901   };
902 
InvertCondition(Condition cond)903   static inline Condition InvertCondition(Condition cond) {
904     // Conditions al and nv behave identically, as "always true". They can't be
905     // inverted, because there is no "always false" condition.
906     VIXL_ASSERT((cond != al) && (cond != nv));
907     return static_cast<Condition>(cond ^ 1);
908   }
909 
910   // This is chaging the condition codes for cmp a, b to the same codes for cmp b, a.
InvertCmpCondition(Condition cond)911   static inline Condition InvertCmpCondition(Condition cond) {
912     // Conditions al and nv behave identically, as "always true". They can't be
913     // inverted, because there is no "always false" condition.
914     switch (cond) {
915     case eq:
916     case ne:
917       return cond;
918     case gt:
919       return le;
920     case le:
921       return gt;
922     case ge:
923       return lt;
924     case lt:
925       return ge;
926     case hi:
927       return lo;
928     case lo:
929       return hi;
930     case hs:
931       return ls;
932     case ls:
933       return hs;
934     case mi:
935       return pl;
936     case pl:
937       return mi;
938     default:
939       MOZ_CRASH("TODO: figure this case out.");
940     }
941     return static_cast<Condition>(cond ^ 1);
942   }
943 
InvertCondition(DoubleCondition cond)944   static inline DoubleCondition InvertCondition(DoubleCondition cond) {
945       switch (cond) {
946 	case DoubleOrdered:
947 	  return DoubleUnordered;
948 	case DoubleEqual:
949 	  return DoubleNotEqualOrUnordered;
950 	case DoubleNotEqual:
951 	  return DoubleEqualOrUnordered;
952 	case DoubleGreaterThan:
953 	  return DoubleLessThanOrEqualOrUnordered;
954 	case DoubleGreaterThanOrEqual:
955 	  return DoubleLessThanOrUnordered;
956 	case DoubleLessThan:
957 	  return DoubleGreaterThanOrEqualOrUnordered;
958 	case DoubleLessThanOrEqual:
959 	  return DoubleGreaterThanOrUnordered;
960 	case DoubleUnordered:
961 	  return DoubleOrdered;
962 	case DoubleEqualOrUnordered:
963 	  return DoubleNotEqual;
964 	case DoubleNotEqualOrUnordered:
965 	  return DoubleEqual;
966 	case DoubleGreaterThanOrUnordered:
967 	  return DoubleLessThanOrEqual;
968 	case DoubleGreaterThanOrEqualOrUnordered:
969 	  return DoubleLessThan;
970 	case DoubleLessThanOrUnordered:
971 	  return DoubleGreaterThanOrEqual;
972 	case DoubleLessThanOrEqualOrUnordered:
973 	  return DoubleGreaterThan;
974 	default:
975 	  MOZ_CRASH("Bad condition");
976     }
977   }
978 
ConditionFromDoubleCondition(DoubleCondition cond)979   static inline Condition ConditionFromDoubleCondition(DoubleCondition cond) {
980     VIXL_ASSERT(!(cond & DoubleConditionBitSpecial));
981     return static_cast<Condition>(cond);
982   }
983 
984   // Instruction set functions.
985 
986   // Branch / Jump instructions.
987   // Branch to register.
988   void br(const Register& xn);
989   static void br(Instruction* at, const Register& xn);
990 
991   // Branch with link to register.
992   void blr(const Register& xn);
993   static void blr(Instruction* at, const Register& blr);
994 
995   // Branch to register with return hint.
996   void ret(const Register& xn = lr);
997 
998   // Unconditional branch to label.
999   BufferOffset b(Label* label);
1000 
1001   // Conditional branch to label.
1002   BufferOffset b(Label* label, Condition cond);
1003 
1004   // Unconditional branch to PC offset.
1005   BufferOffset b(int imm26, const LabelDoc& doc);
1006   static void b(Instruction* at, int imm26);
1007 
1008   // Conditional branch to PC offset.
1009   BufferOffset b(int imm19, Condition cond, const LabelDoc& doc);
1010   static void b(Instruction*at, int imm19, Condition cond);
1011 
1012   // Branch with link to label.
1013   void bl(Label* label);
1014 
1015   // Branch with link to PC offset.
1016   void bl(int imm26, const LabelDoc& doc);
1017   static void bl(Instruction* at, int imm26);
1018 
1019   // Compare and branch to label if zero.
1020   void cbz(const Register& rt, Label* label);
1021 
1022   // Compare and branch to PC offset if zero.
1023   void cbz(const Register& rt, int imm19, const LabelDoc& doc);
1024   static void cbz(Instruction* at, const Register& rt, int imm19);
1025 
1026   // Compare and branch to label if not zero.
1027   void cbnz(const Register& rt, Label* label);
1028 
1029   // Compare and branch to PC offset if not zero.
1030   void cbnz(const Register& rt, int imm19, const LabelDoc& doc);
1031   static void cbnz(Instruction* at, const Register& rt, int imm19);
1032 
1033   // Table lookup from one register.
1034   void tbl(const VRegister& vd,
1035            const VRegister& vn,
1036            const VRegister& vm);
1037 
1038   // Table lookup from two registers.
1039   void tbl(const VRegister& vd,
1040            const VRegister& vn,
1041            const VRegister& vn2,
1042            const VRegister& vm);
1043 
1044   // Table lookup from three registers.
1045   void tbl(const VRegister& vd,
1046            const VRegister& vn,
1047            const VRegister& vn2,
1048            const VRegister& vn3,
1049            const VRegister& vm);
1050 
1051   // Table lookup from four registers.
1052   void tbl(const VRegister& vd,
1053            const VRegister& vn,
1054            const VRegister& vn2,
1055            const VRegister& vn3,
1056            const VRegister& vn4,
1057            const VRegister& vm);
1058 
1059   // Table lookup extension from one register.
1060   void tbx(const VRegister& vd,
1061            const VRegister& vn,
1062            const VRegister& vm);
1063 
1064   // Table lookup extension from two registers.
1065   void tbx(const VRegister& vd,
1066            const VRegister& vn,
1067            const VRegister& vn2,
1068            const VRegister& vm);
1069 
1070   // Table lookup extension from three registers.
1071   void tbx(const VRegister& vd,
1072            const VRegister& vn,
1073            const VRegister& vn2,
1074            const VRegister& vn3,
1075            const VRegister& vm);
1076 
1077   // Table lookup extension from four registers.
1078   void tbx(const VRegister& vd,
1079            const VRegister& vn,
1080            const VRegister& vn2,
1081            const VRegister& vn3,
1082            const VRegister& vn4,
1083            const VRegister& vm);
1084 
1085   // Test bit and branch to label if zero.
1086   void tbz(const Register& rt, unsigned bit_pos, Label* label);
1087 
1088   // Test bit and branch to PC offset if zero.
1089   void tbz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc);
1090   static void tbz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14);
1091 
1092   // Test bit and branch to label if not zero.
1093   void tbnz(const Register& rt, unsigned bit_pos, Label* label);
1094 
1095   // Test bit and branch to PC offset if not zero.
1096   void tbnz(const Register& rt, unsigned bit_pos, int imm14, const LabelDoc& doc);
1097   static void tbnz(Instruction* at, const Register& rt, unsigned bit_pos, int imm14);
1098 
1099   // Address calculation instructions.
1100   // Calculate a PC-relative address. Unlike for branches the offset in adr is
1101   // unscaled (i.e. the result can be unaligned).
1102 
1103   // Calculate the address of a label.
1104   void adr(const Register& rd, Label* label);
1105 
1106   // Calculate the address of a PC offset.
1107   void adr(const Register& rd, int imm21, const LabelDoc& doc);
1108   static void adr(Instruction* at, const Register& rd, int imm21);
1109 
1110   // Calculate the page address of a label.
1111   void adrp(const Register& rd, Label* label);
1112 
1113   // Calculate the page address of a PC offset.
1114   void adrp(const Register& rd, int imm21, const LabelDoc& doc);
1115   static void adrp(Instruction* at, const Register& rd, int imm21);
1116 
1117   // Data Processing instructions.
1118   // Add.
1119   void add(const Register& rd,
1120            const Register& rn,
1121            const Operand& operand);
1122 
1123   // Add and update status flags.
1124   void adds(const Register& rd,
1125             const Register& rn,
1126             const Operand& operand);
1127 
1128   // Compare negative.
1129   void cmn(const Register& rn, const Operand& operand);
1130 
1131   // Subtract.
1132   void sub(const Register& rd,
1133            const Register& rn,
1134            const Operand& operand);
1135 
1136   // Subtract and update status flags.
1137   void subs(const Register& rd,
1138             const Register& rn,
1139             const Operand& operand);
1140 
1141   // Compare.
1142   void cmp(const Register& rn, const Operand& operand);
1143 
1144   // Negate.
1145   void neg(const Register& rd,
1146            const Operand& operand);
1147 
1148   // Negate and update status flags.
1149   void negs(const Register& rd,
1150             const Operand& operand);
1151 
1152   // Add with carry bit.
1153   void adc(const Register& rd,
1154            const Register& rn,
1155            const Operand& operand);
1156 
1157   // Add with carry bit and update status flags.
1158   void adcs(const Register& rd,
1159             const Register& rn,
1160             const Operand& operand);
1161 
1162   // Subtract with carry bit.
1163   void sbc(const Register& rd,
1164            const Register& rn,
1165            const Operand& operand);
1166 
1167   // Subtract with carry bit and update status flags.
1168   void sbcs(const Register& rd,
1169             const Register& rn,
1170             const Operand& operand);
1171 
1172   // Negate with carry bit.
1173   void ngc(const Register& rd,
1174            const Operand& operand);
1175 
1176   // Negate with carry bit and update status flags.
1177   void ngcs(const Register& rd,
1178             const Operand& operand);
1179 
1180   // Logical instructions.
1181   // Bitwise and (A & B).
1182   void and_(const Register& rd,
1183             const Register& rn,
1184             const Operand& operand);
1185 
1186   // Bitwise and (A & B) and update status flags.
1187   BufferOffset ands(const Register& rd,
1188                     const Register& rn,
1189                     const Operand& operand);
1190 
1191   // Bit test and set flags.
1192   BufferOffset tst(const Register& rn, const Operand& operand);
1193 
1194   // Bit clear (A & ~B).
1195   void bic(const Register& rd,
1196            const Register& rn,
1197            const Operand& operand);
1198 
1199   // Bit clear (A & ~B) and update status flags.
1200   void bics(const Register& rd,
1201             const Register& rn,
1202             const Operand& operand);
1203 
1204   // Bitwise or (A | B).
1205   void orr(const Register& rd, const Register& rn, const Operand& operand);
1206 
1207   // Bitwise nor (A | ~B).
1208   void orn(const Register& rd, const Register& rn, const Operand& operand);
1209 
1210   // Bitwise eor/xor (A ^ B).
1211   void eor(const Register& rd, const Register& rn, const Operand& operand);
1212 
1213   // Bitwise enor/xnor (A ^ ~B).
1214   void eon(const Register& rd, const Register& rn, const Operand& operand);
1215 
1216   // Logical shift left by variable.
1217   void lslv(const Register& rd, const Register& rn, const Register& rm);
1218 
1219   // Logical shift right by variable.
1220   void lsrv(const Register& rd, const Register& rn, const Register& rm);
1221 
1222   // Arithmetic shift right by variable.
1223   void asrv(const Register& rd, const Register& rn, const Register& rm);
1224 
1225   // Rotate right by variable.
1226   void rorv(const Register& rd, const Register& rn, const Register& rm);
1227 
1228   // Bitfield instructions.
1229   // Bitfield move.
1230   void bfm(const Register& rd,
1231            const Register& rn,
1232            unsigned immr,
1233            unsigned imms);
1234 
1235   // Signed bitfield move.
1236   void sbfm(const Register& rd,
1237             const Register& rn,
1238             unsigned immr,
1239             unsigned imms);
1240 
1241   // Unsigned bitfield move.
1242   void ubfm(const Register& rd,
1243             const Register& rn,
1244             unsigned immr,
1245             unsigned imms);
1246 
1247   // Bfm aliases.
1248   // Bitfield insert.
bfi(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1249   void bfi(const Register& rd,
1250            const Register& rn,
1251            unsigned lsb,
1252            unsigned width) {
1253     VIXL_ASSERT(width >= 1);
1254     VIXL_ASSERT(lsb + width <= rn.size());
1255     bfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1256   }
1257 
1258   // Bitfield extract and insert low.
bfxil(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1259   void bfxil(const Register& rd,
1260              const Register& rn,
1261              unsigned lsb,
1262              unsigned width) {
1263     VIXL_ASSERT(width >= 1);
1264     VIXL_ASSERT(lsb + width <= rn.size());
1265     bfm(rd, rn, lsb, lsb + width - 1);
1266   }
1267 
1268   // Sbfm aliases.
1269   // Arithmetic shift right.
asr(const Register & rd,const Register & rn,unsigned shift)1270   void asr(const Register& rd, const Register& rn, unsigned shift) {
1271     VIXL_ASSERT(shift < rd.size());
1272     sbfm(rd, rn, shift, rd.size() - 1);
1273   }
1274 
1275   // Signed bitfield insert with zero at right.
sbfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1276   void sbfiz(const Register& rd,
1277              const Register& rn,
1278              unsigned lsb,
1279              unsigned width) {
1280     VIXL_ASSERT(width >= 1);
1281     VIXL_ASSERT(lsb + width <= rn.size());
1282     sbfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1283   }
1284 
1285   // Signed bitfield extract.
sbfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1286   void sbfx(const Register& rd,
1287             const Register& rn,
1288             unsigned lsb,
1289             unsigned width) {
1290     VIXL_ASSERT(width >= 1);
1291     VIXL_ASSERT(lsb + width <= rn.size());
1292     sbfm(rd, rn, lsb, lsb + width - 1);
1293   }
1294 
1295   // Signed extend byte.
sxtb(const Register & rd,const Register & rn)1296   void sxtb(const Register& rd, const Register& rn) {
1297     sbfm(rd, rn, 0, 7);
1298   }
1299 
1300   // Signed extend halfword.
sxth(const Register & rd,const Register & rn)1301   void sxth(const Register& rd, const Register& rn) {
1302     sbfm(rd, rn, 0, 15);
1303   }
1304 
1305   // Signed extend word.
sxtw(const Register & rd,const Register & rn)1306   void sxtw(const Register& rd, const Register& rn) {
1307     sbfm(rd, rn, 0, 31);
1308   }
1309 
1310   // Ubfm aliases.
1311   // Logical shift left.
lsl(const Register & rd,const Register & rn,unsigned shift)1312   void lsl(const Register& rd, const Register& rn, unsigned shift) {
1313     unsigned reg_size = rd.size();
1314     VIXL_ASSERT(shift < reg_size);
1315     ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1);
1316   }
1317 
1318   // Logical shift right.
lsr(const Register & rd,const Register & rn,unsigned shift)1319   void lsr(const Register& rd, const Register& rn, unsigned shift) {
1320     VIXL_ASSERT(shift < rd.size());
1321     ubfm(rd, rn, shift, rd.size() - 1);
1322   }
1323 
1324   // Unsigned bitfield insert with zero at right.
ubfiz(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1325   void ubfiz(const Register& rd,
1326              const Register& rn,
1327              unsigned lsb,
1328              unsigned width) {
1329     VIXL_ASSERT(width >= 1);
1330     VIXL_ASSERT(lsb + width <= rn.size());
1331     ubfm(rd, rn, (rd.size() - lsb) & (rd.size() - 1), width - 1);
1332   }
1333 
1334   // Unsigned bitfield extract.
ubfx(const Register & rd,const Register & rn,unsigned lsb,unsigned width)1335   void ubfx(const Register& rd,
1336             const Register& rn,
1337             unsigned lsb,
1338             unsigned width) {
1339     VIXL_ASSERT(width >= 1);
1340     VIXL_ASSERT(lsb + width <= rn.size());
1341     ubfm(rd, rn, lsb, lsb + width - 1);
1342   }
1343 
1344   // Unsigned extend byte.
uxtb(const Register & rd,const Register & rn)1345   void uxtb(const Register& rd, const Register& rn) {
1346     ubfm(rd, rn, 0, 7);
1347   }
1348 
1349   // Unsigned extend halfword.
uxth(const Register & rd,const Register & rn)1350   void uxth(const Register& rd, const Register& rn) {
1351     ubfm(rd, rn, 0, 15);
1352   }
1353 
1354   // Unsigned extend word.
uxtw(const Register & rd,const Register & rn)1355   void uxtw(const Register& rd, const Register& rn) {
1356     ubfm(rd, rn, 0, 31);
1357   }
1358 
1359   // Extract.
1360   void extr(const Register& rd,
1361             const Register& rn,
1362             const Register& rm,
1363             unsigned lsb);
1364 
1365   // Conditional select: rd = cond ? rn : rm.
1366   void csel(const Register& rd,
1367             const Register& rn,
1368             const Register& rm,
1369             Condition cond);
1370 
1371   // Conditional select increment: rd = cond ? rn : rm + 1.
1372   void csinc(const Register& rd,
1373              const Register& rn,
1374              const Register& rm,
1375              Condition cond);
1376 
1377   // Conditional select inversion: rd = cond ? rn : ~rm.
1378   void csinv(const Register& rd,
1379              const Register& rn,
1380              const Register& rm,
1381              Condition cond);
1382 
1383   // Conditional select negation: rd = cond ? rn : -rm.
1384   void csneg(const Register& rd,
1385              const Register& rn,
1386              const Register& rm,
1387              Condition cond);
1388 
1389   // Conditional set: rd = cond ? 1 : 0.
1390   void cset(const Register& rd, Condition cond);
1391 
1392   // Conditional set mask: rd = cond ? -1 : 0.
1393   void csetm(const Register& rd, Condition cond);
1394 
1395   // Conditional increment: rd = cond ? rn + 1 : rn.
1396   void cinc(const Register& rd, const Register& rn, Condition cond);
1397 
1398   // Conditional invert: rd = cond ? ~rn : rn.
1399   void cinv(const Register& rd, const Register& rn, Condition cond);
1400 
1401   // Conditional negate: rd = cond ? -rn : rn.
1402   void cneg(const Register& rd, const Register& rn, Condition cond);
1403 
1404   // Rotate right.
ror(const Register & rd,const Register & rs,unsigned shift)1405   void ror(const Register& rd, const Register& rs, unsigned shift) {
1406     extr(rd, rs, rs, shift);
1407   }
1408 
1409   // Conditional comparison.
1410   // Conditional compare negative.
1411   void ccmn(const Register& rn,
1412             const Operand& operand,
1413             StatusFlags nzcv,
1414             Condition cond);
1415 
1416   // Conditional compare.
1417   void ccmp(const Register& rn,
1418             const Operand& operand,
1419             StatusFlags nzcv,
1420             Condition cond);
1421 
1422   // CRC-32 checksum from byte.
1423   void crc32b(const Register& rd,
1424               const Register& rn,
1425               const Register& rm);
1426 
1427   // CRC-32 checksum from half-word.
1428   void crc32h(const Register& rd,
1429               const Register& rn,
1430               const Register& rm);
1431 
1432   // CRC-32 checksum from word.
1433   void crc32w(const Register& rd,
1434               const Register& rn,
1435               const Register& rm);
1436 
1437   // CRC-32 checksum from double word.
1438   void crc32x(const Register& rd,
1439               const Register& rn,
1440               const Register& rm);
1441 
1442   // CRC-32 C checksum from byte.
1443   void crc32cb(const Register& rd,
1444                const Register& rn,
1445                const Register& rm);
1446 
1447   // CRC-32 C checksum from half-word.
1448   void crc32ch(const Register& rd,
1449                const Register& rn,
1450                const Register& rm);
1451 
1452   // CRC-32 C checksum from word.
1453   void crc32cw(const Register& rd,
1454                const Register& rn,
1455                const Register& rm);
1456 
1457   // CRC-32C checksum from double word.
1458   void crc32cx(const Register& rd,
1459                const Register& rn,
1460                const Register& rm);
1461 
1462   // Multiply.
1463   void mul(const Register& rd, const Register& rn, const Register& rm);
1464 
1465   // Negated multiply.
1466   void mneg(const Register& rd, const Register& rn, const Register& rm);
1467 
1468   // Signed long multiply: 32 x 32 -> 64-bit.
1469   void smull(const Register& rd, const Register& rn, const Register& rm);
1470 
1471   // Signed multiply high: 64 x 64 -> 64-bit <127:64>.
1472   void smulh(const Register& xd, const Register& xn, const Register& xm);
1473 
1474   // Multiply and accumulate.
1475   void madd(const Register& rd,
1476             const Register& rn,
1477             const Register& rm,
1478             const Register& ra);
1479 
1480   // Multiply and subtract.
1481   void msub(const Register& rd,
1482             const Register& rn,
1483             const Register& rm,
1484             const Register& ra);
1485 
1486   // Signed long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1487   void smaddl(const Register& rd,
1488               const Register& rn,
1489               const Register& rm,
1490               const Register& ra);
1491 
1492   // Unsigned long multiply and accumulate: 32 x 32 + 64 -> 64-bit.
1493   void umaddl(const Register& rd,
1494               const Register& rn,
1495               const Register& rm,
1496               const Register& ra);
1497 
1498   // Unsigned long multiply: 32 x 32 -> 64-bit.
umull(const Register & rd,const Register & rn,const Register & rm)1499   void umull(const Register& rd,
1500              const Register& rn,
1501              const Register& rm) {
1502     umaddl(rd, rn, rm, xzr);
1503   }
1504 
1505   // Unsigned multiply high: 64 x 64 -> 64-bit <127:64>.
1506   void umulh(const Register& xd,
1507              const Register& xn,
1508              const Register& xm);
1509 
1510   // Signed long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1511   void smsubl(const Register& rd,
1512               const Register& rn,
1513               const Register& rm,
1514               const Register& ra);
1515 
1516   // Unsigned long multiply and subtract: 64 - (32 x 32) -> 64-bit.
1517   void umsubl(const Register& rd,
1518               const Register& rn,
1519               const Register& rm,
1520               const Register& ra);
1521 
1522   // Signed integer divide.
1523   void sdiv(const Register& rd, const Register& rn, const Register& rm);
1524 
1525   // Unsigned integer divide.
1526   void udiv(const Register& rd, const Register& rn, const Register& rm);
1527 
1528   // Bit reverse.
1529   void rbit(const Register& rd, const Register& rn);
1530 
1531   // Reverse bytes in 16-bit half words.
1532   void rev16(const Register& rd, const Register& rn);
1533 
1534   // Reverse bytes in 32-bit words.
1535   void rev32(const Register& rd, const Register& rn);
1536 
1537   // Reverse bytes.
1538   void rev(const Register& rd, const Register& rn);
1539 
1540   // Count leading zeroes.
1541   void clz(const Register& rd, const Register& rn);
1542 
1543   // Count leading sign bits.
1544   void cls(const Register& rd, const Register& rn);
1545 
1546   // Memory instructions.
1547   // Load integer or FP register.
1548   void ldr(const CPURegister& rt, const MemOperand& src,
1549            LoadStoreScalingOption option = PreferScaledOffset);
1550 
1551   // Store integer or FP register.
1552   void str(const CPURegister& rt, const MemOperand& dst,
1553            LoadStoreScalingOption option = PreferScaledOffset);
1554 
1555   // Load word with sign extension.
1556   void ldrsw(const Register& rt, const MemOperand& src,
1557              LoadStoreScalingOption option = PreferScaledOffset);
1558 
1559   // Load byte.
1560   void ldrb(const Register& rt, const MemOperand& src,
1561             LoadStoreScalingOption option = PreferScaledOffset);
1562 
1563   // Store byte.
1564   void strb(const Register& rt, const MemOperand& dst,
1565             LoadStoreScalingOption option = PreferScaledOffset);
1566 
1567   // Load byte with sign extension.
1568   void ldrsb(const Register& rt, const MemOperand& src,
1569              LoadStoreScalingOption option = PreferScaledOffset);
1570 
1571   // Load half-word.
1572   void ldrh(const Register& rt, const MemOperand& src,
1573             LoadStoreScalingOption option = PreferScaledOffset);
1574 
1575   // Store half-word.
1576   void strh(const Register& rt, const MemOperand& dst,
1577             LoadStoreScalingOption option = PreferScaledOffset);
1578 
1579   // Load half-word with sign extension.
1580   void ldrsh(const Register& rt, const MemOperand& src,
1581              LoadStoreScalingOption option = PreferScaledOffset);
1582 
1583   // Load integer or FP register (with unscaled offset).
1584   void ldur(const CPURegister& rt, const MemOperand& src,
1585             LoadStoreScalingOption option = PreferUnscaledOffset);
1586 
1587   // Store integer or FP register (with unscaled offset).
1588   void stur(const CPURegister& rt, const MemOperand& src,
1589             LoadStoreScalingOption option = PreferUnscaledOffset);
1590 
1591   // Load word with sign extension.
1592   void ldursw(const Register& rt, const MemOperand& src,
1593               LoadStoreScalingOption option = PreferUnscaledOffset);
1594 
1595   // Load byte (with unscaled offset).
1596   void ldurb(const Register& rt, const MemOperand& src,
1597              LoadStoreScalingOption option = PreferUnscaledOffset);
1598 
1599   // Store byte (with unscaled offset).
1600   void sturb(const Register& rt, const MemOperand& dst,
1601              LoadStoreScalingOption option = PreferUnscaledOffset);
1602 
1603   // Load byte with sign extension (and unscaled offset).
1604   void ldursb(const Register& rt, const MemOperand& src,
1605               LoadStoreScalingOption option = PreferUnscaledOffset);
1606 
1607   // Load half-word (with unscaled offset).
1608   void ldurh(const Register& rt, const MemOperand& src,
1609              LoadStoreScalingOption option = PreferUnscaledOffset);
1610 
1611   // Store half-word (with unscaled offset).
1612   void sturh(const Register& rt, const MemOperand& dst,
1613              LoadStoreScalingOption option = PreferUnscaledOffset);
1614 
1615   // Load half-word with sign extension (and unscaled offset).
1616   void ldursh(const Register& rt, const MemOperand& src,
1617               LoadStoreScalingOption option = PreferUnscaledOffset);
1618 
1619   // Load integer or FP register pair.
1620   void ldp(const CPURegister& rt, const CPURegister& rt2,
1621            const MemOperand& src);
1622 
1623   // Store integer or FP register pair.
1624   void stp(const CPURegister& rt, const CPURegister& rt2,
1625            const MemOperand& dst);
1626 
1627   // Load word pair with sign extension.
1628   void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src);
1629 
1630   // Load integer or FP register pair, non-temporal.
1631   void ldnp(const CPURegister& rt, const CPURegister& rt2,
1632             const MemOperand& src);
1633 
1634   // Store integer or FP register pair, non-temporal.
1635   void stnp(const CPURegister& rt, const CPURegister& rt2,
1636             const MemOperand& dst);
1637 
1638   // Load integer or FP register from pc + imm19 << 2.
1639   void ldr(const CPURegister& rt, int imm19);
1640   static void ldr(Instruction* at, const CPURegister& rt, int imm19);
1641 
1642   // Load word with sign extension from pc + imm19 << 2.
1643   void ldrsw(const Register& rt, int imm19);
1644 
1645   // Store exclusive byte.
1646   void stxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1647 
1648   // Store exclusive half-word.
1649   void stxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1650 
1651   // Store exclusive register.
1652   void stxr(const Register& rs, const Register& rt, const MemOperand& dst);
1653 
1654   // Load exclusive byte.
1655   void ldxrb(const Register& rt, const MemOperand& src);
1656 
1657   // Load exclusive half-word.
1658   void ldxrh(const Register& rt, const MemOperand& src);
1659 
1660   // Load exclusive register.
1661   void ldxr(const Register& rt, const MemOperand& src);
1662 
1663   // Store exclusive register pair.
1664   void stxp(const Register& rs,
1665             const Register& rt,
1666             const Register& rt2,
1667             const MemOperand& dst);
1668 
1669   // Load exclusive register pair.
1670   void ldxp(const Register& rt, const Register& rt2, const MemOperand& src);
1671 
1672   // Store-release exclusive byte.
1673   void stlxrb(const Register& rs, const Register& rt, const MemOperand& dst);
1674 
1675   // Store-release exclusive half-word.
1676   void stlxrh(const Register& rs, const Register& rt, const MemOperand& dst);
1677 
1678   // Store-release exclusive register.
1679   void stlxr(const Register& rs, const Register& rt, const MemOperand& dst);
1680 
1681   // Load-acquire exclusive byte.
1682   void ldaxrb(const Register& rt, const MemOperand& src);
1683 
1684   // Load-acquire exclusive half-word.
1685   void ldaxrh(const Register& rt, const MemOperand& src);
1686 
1687   // Load-acquire exclusive register.
1688   void ldaxr(const Register& rt, const MemOperand& src);
1689 
1690   // Store-release exclusive register pair.
1691   void stlxp(const Register& rs,
1692              const Register& rt,
1693              const Register& rt2,
1694              const MemOperand& dst);
1695 
1696   // Load-acquire exclusive register pair.
1697   void ldaxp(const Register& rt, const Register& rt2, const MemOperand& src);
1698 
1699   // Store-release byte.
1700   void stlrb(const Register& rt, const MemOperand& dst);
1701 
1702   // Store-release half-word.
1703   void stlrh(const Register& rt, const MemOperand& dst);
1704 
1705   // Store-release register.
1706   void stlr(const Register& rt, const MemOperand& dst);
1707 
1708   // Load-acquire byte.
1709   void ldarb(const Register& rt, const MemOperand& src);
1710 
1711   // Load-acquire half-word.
1712   void ldarh(const Register& rt, const MemOperand& src);
1713 
1714   // Load-acquire register.
1715   void ldar(const Register& rt, const MemOperand& src);
1716 
1717   // Prefetch memory.
1718   void prfm(PrefetchOperation op, const MemOperand& addr,
1719             LoadStoreScalingOption option = PreferScaledOffset);
1720 
1721   // Prefetch memory (with unscaled offset).
1722   void prfum(PrefetchOperation op, const MemOperand& addr,
1723              LoadStoreScalingOption option = PreferUnscaledOffset);
1724 
1725   // Prefetch from pc + imm19 << 2.
1726   void prfm(PrefetchOperation op, int imm19);
1727 
1728   // Move instructions. The default shift of -1 indicates that the move
1729   // instruction will calculate an appropriate 16-bit immediate and left shift
1730   // that is equal to the 64-bit immediate argument. If an explicit left shift
1731   // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value.
1732   //
1733   // For movk, an explicit shift can be used to indicate which half word should
1734   // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant
1735   // half word with zero, whereas movk(x0, 0, 48) will overwrite the
1736   // most-significant.
1737 
1738   // Move immediate and keep.
1739   void movk(const Register& rd, uint64_t imm, int shift = -1) {
1740     MoveWide(rd, imm, shift, MOVK);
1741   }
1742 
1743   // Move inverted immediate.
1744   void movn(const Register& rd, uint64_t imm, int shift = -1) {
1745     MoveWide(rd, imm, shift, MOVN);
1746   }
1747 
1748   // Move immediate.
1749   void movz(const Register& rd, uint64_t imm, int shift = -1) {
1750     MoveWide(rd, imm, shift, MOVZ);
1751   }
1752 
1753   // Misc instructions.
1754   // Monitor debug-mode breakpoint.
1755   void brk(int code);
1756 
1757   // Halting debug-mode breakpoint.
1758   void hlt(int code);
1759 
1760   // Generate exception targeting EL1.
1761   void svc(int code);
1762   static void svc(Instruction* at, int code);
1763 
1764   // Move register to register.
1765   void mov(const Register& rd, const Register& rn);
1766 
1767   // Move inverted operand to register.
1768   void mvn(const Register& rd, const Operand& operand);
1769 
1770   // System instructions.
1771   // Move to register from system register.
1772   void mrs(const Register& rt, SystemRegister sysreg);
1773 
1774   // Move from register to system register.
1775   void msr(SystemRegister sysreg, const Register& rt);
1776 
1777   // System instruction.
1778   void sys(int op1, int crn, int crm, int op2, const Register& rt = xzr);
1779 
1780   // System instruction with pre-encoded op (op1:crn:crm:op2).
1781   void sys(int op, const Register& rt = xzr);
1782 
1783   // System data cache operation.
1784   void dc(DataCacheOp op, const Register& rt);
1785 
1786   // System instruction cache operation.
1787   void ic(InstructionCacheOp op, const Register& rt);
1788 
1789   // System hint.
1790   BufferOffset hint(SystemHint code);
1791   static void hint(Instruction* at, SystemHint code);
1792 
1793   // Clear exclusive monitor.
1794   void clrex(int imm4 = 0xf);
1795 
1796   // Data memory barrier.
1797   void dmb(BarrierDomain domain, BarrierType type);
1798 
1799   // Data synchronization barrier.
1800   void dsb(BarrierDomain domain, BarrierType type);
1801 
1802   // Instruction synchronization barrier.
1803   void isb();
1804 
1805   // Alias for system instructions.
1806   // No-op.
nop()1807   BufferOffset nop() {
1808     return hint(NOP);
1809   }
1810   static void nop(Instruction* at);
1811 
1812   // Alias for system instructions.
1813   // Conditional speculation barrier.
csdb()1814   BufferOffset csdb() {
1815     return hint(CSDB);
1816   }
1817   static void csdb(Instruction* at);
1818 
1819   // FP and NEON instructions.
1820   // Move double precision immediate to FP register.
1821   void fmov(const VRegister& vd, double imm);
1822 
1823   // Move single precision immediate to FP register.
1824   void fmov(const VRegister& vd, float imm);
1825 
1826   // Move FP register to register.
1827   void fmov(const Register& rd, const VRegister& fn);
1828 
1829   // Move register to FP register.
1830   void fmov(const VRegister& vd, const Register& rn);
1831 
1832   // Move FP register to FP register.
1833   void fmov(const VRegister& vd, const VRegister& fn);
1834 
1835   // Move 64-bit register to top half of 128-bit FP register.
1836   void fmov(const VRegister& vd, int index, const Register& rn);
1837 
1838   // Move top half of 128-bit FP register to 64-bit register.
1839   void fmov(const Register& rd, const VRegister& vn, int index);
1840 
1841   // FP add.
1842   void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1843 
1844   // FP subtract.
1845   void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1846 
1847   // FP multiply.
1848   void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm);
1849 
1850   // FP fused multiply-add.
1851   void fmadd(const VRegister& vd,
1852              const VRegister& vn,
1853              const VRegister& vm,
1854              const VRegister& va);
1855 
1856   // FP fused multiply-subtract.
1857   void fmsub(const VRegister& vd,
1858              const VRegister& vn,
1859              const VRegister& vm,
1860              const VRegister& va);
1861 
1862   // FP fused multiply-add and negate.
1863   void fnmadd(const VRegister& vd,
1864               const VRegister& vn,
1865               const VRegister& vm,
1866               const VRegister& va);
1867 
1868   // FP fused multiply-subtract and negate.
1869   void fnmsub(const VRegister& vd,
1870               const VRegister& vn,
1871               const VRegister& vm,
1872               const VRegister& va);
1873 
1874   // FP multiply-negate scalar.
1875   void fnmul(const VRegister& vd,
1876              const VRegister& vn,
1877              const VRegister& vm);
1878 
1879   // FP reciprocal exponent scalar.
1880   void frecpx(const VRegister& vd,
1881               const VRegister& vn);
1882 
1883   // FP divide.
1884   void fdiv(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1885 
1886   // FP maximum.
1887   void fmax(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1888 
1889   // FP minimum.
1890   void fmin(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1891 
1892   // FP maximum number.
1893   void fmaxnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1894 
1895   // FP minimum number.
1896   void fminnm(const VRegister& vd, const VRegister& fn, const VRegister& vm);
1897 
1898   // FP absolute.
1899   void fabs(const VRegister& vd, const VRegister& vn);
1900 
1901   // FP negate.
1902   void fneg(const VRegister& vd, const VRegister& vn);
1903 
1904   // FP square root.
1905   void fsqrt(const VRegister& vd, const VRegister& vn);
1906 
1907   // FP round to integer, nearest with ties to away.
1908   void frinta(const VRegister& vd, const VRegister& vn);
1909 
1910   // FP round to integer, implicit rounding.
1911   void frinti(const VRegister& vd, const VRegister& vn);
1912 
1913   // FP round to integer, toward minus infinity.
1914   void frintm(const VRegister& vd, const VRegister& vn);
1915 
1916   // FP round to integer, nearest with ties to even.
1917   void frintn(const VRegister& vd, const VRegister& vn);
1918 
1919   // FP round to integer, toward plus infinity.
1920   void frintp(const VRegister& vd, const VRegister& vn);
1921 
1922   // FP round to integer, exact, implicit rounding.
1923   void frintx(const VRegister& vd, const VRegister& vn);
1924 
1925   // FP round to integer, towards zero.
1926   void frintz(const VRegister& vd, const VRegister& vn);
1927 
1928   void FPCompareMacro(const VRegister& vn,
1929                       double value,
1930                       FPTrapFlags trap);
1931 
1932   void FPCompareMacro(const VRegister& vn,
1933                       const VRegister& vm,
1934                       FPTrapFlags trap);
1935 
1936   // FP compare registers.
1937   void fcmp(const VRegister& vn, const VRegister& vm);
1938 
1939   // FP compare immediate.
1940   void fcmp(const VRegister& vn, double value);
1941 
1942   void FPCCompareMacro(const VRegister& vn,
1943                        const VRegister& vm,
1944                        StatusFlags nzcv,
1945                        Condition cond,
1946                        FPTrapFlags trap);
1947 
1948   // FP conditional compare.
1949   void fccmp(const VRegister& vn,
1950              const VRegister& vm,
1951              StatusFlags nzcv,
1952              Condition cond);
1953 
1954   // FP signaling compare registers.
1955   void fcmpe(const VRegister& vn, const VRegister& vm);
1956 
1957   // FP signaling compare immediate.
1958   void fcmpe(const VRegister& vn, double value);
1959 
1960   // FP conditional signaling compare.
1961   void fccmpe(const VRegister& vn,
1962               const VRegister& vm,
1963               StatusFlags nzcv,
1964               Condition cond);
1965 
1966   // FP conditional select.
1967   void fcsel(const VRegister& vd,
1968              const VRegister& vn,
1969              const VRegister& vm,
1970              Condition cond);
1971 
1972   // Common FP Convert functions.
1973   void NEONFPConvertToInt(const Register& rd,
1974                           const VRegister& vn,
1975                           Instr op);
1976   void NEONFPConvertToInt(const VRegister& vd,
1977                           const VRegister& vn,
1978                           Instr op);
1979 
1980   // FP convert between precisions.
1981   void fcvt(const VRegister& vd, const VRegister& vn);
1982 
1983   // FP convert to higher precision.
1984   void fcvtl(const VRegister& vd, const VRegister& vn);
1985 
1986   // FP convert to higher precision (second part).
1987   void fcvtl2(const VRegister& vd, const VRegister& vn);
1988 
1989   // FP convert to lower precision.
1990   void fcvtn(const VRegister& vd, const VRegister& vn);
1991 
1992   // FP convert to lower prevision (second part).
1993   void fcvtn2(const VRegister& vd, const VRegister& vn);
1994 
1995   // FP convert to lower precision, rounding to odd.
1996   void fcvtxn(const VRegister& vd, const VRegister& vn);
1997 
1998   // FP convert to lower precision, rounding to odd (second part).
1999   void fcvtxn2(const VRegister& vd, const VRegister& vn);
2000 
2001   // FP convert to signed integer, nearest with ties to away.
2002   void fcvtas(const Register& rd, const VRegister& vn);
2003 
2004   // FP convert to unsigned integer, nearest with ties to away.
2005   void fcvtau(const Register& rd, const VRegister& vn);
2006 
2007   // FP convert to signed integer, nearest with ties to away.
2008   void fcvtas(const VRegister& vd, const VRegister& vn);
2009 
2010   // FP convert to unsigned integer, nearest with ties to away.
2011   void fcvtau(const VRegister& vd, const VRegister& vn);
2012 
2013   // FP convert to signed integer, round towards -infinity.
2014   void fcvtms(const Register& rd, const VRegister& vn);
2015 
2016   // FP convert to unsigned integer, round towards -infinity.
2017   void fcvtmu(const Register& rd, const VRegister& vn);
2018 
2019   // FP convert to signed integer, round towards -infinity.
2020   void fcvtms(const VRegister& vd, const VRegister& vn);
2021 
2022   // FP convert to unsigned integer, round towards -infinity.
2023   void fcvtmu(const VRegister& vd, const VRegister& vn);
2024 
2025   // FP convert to signed integer, nearest with ties to even.
2026   void fcvtns(const Register& rd, const VRegister& vn);
2027 
2028   // FP convert to unsigned integer, nearest with ties to even.
2029   void fcvtnu(const Register& rd, const VRegister& vn);
2030 
2031   // FP convert to signed integer, nearest with ties to even.
2032   void fcvtns(const VRegister& rd, const VRegister& vn);
2033 
2034   // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3].
2035   void fjcvtzs(const Register& rd, const VRegister& vn);
2036 
2037   // FP convert to unsigned integer, nearest with ties to even.
2038   void fcvtnu(const VRegister& rd, const VRegister& vn);
2039 
2040   // FP convert to signed integer or fixed-point, round towards zero.
2041   void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0);
2042 
2043   // FP convert to unsigned integer or fixed-point, round towards zero.
2044   void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0);
2045 
2046   // FP convert to signed integer or fixed-point, round towards zero.
2047   void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0);
2048 
2049   // FP convert to unsigned integer or fixed-point, round towards zero.
2050   void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0);
2051 
2052   // FP convert to signed integer, round towards +infinity.
2053   void fcvtps(const Register& rd, const VRegister& vn);
2054 
2055   // FP convert to unsigned integer, round towards +infinity.
2056   void fcvtpu(const Register& rd, const VRegister& vn);
2057 
2058   // FP convert to signed integer, round towards +infinity.
2059   void fcvtps(const VRegister& vd, const VRegister& vn);
2060 
2061   // FP convert to unsigned integer, round towards +infinity.
2062   void fcvtpu(const VRegister& vd, const VRegister& vn);
2063 
2064   // Convert signed integer or fixed point to FP.
2065   void scvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2066 
2067   // Convert unsigned integer or fixed point to FP.
2068   void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0);
2069 
2070   // Convert signed integer or fixed-point to FP.
2071   void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2072 
2073   // Convert unsigned integer or fixed-point to FP.
2074   void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0);
2075 
2076   // Unsigned absolute difference.
2077   void uabd(const VRegister& vd,
2078             const VRegister& vn,
2079             const VRegister& vm);
2080 
2081   // Signed absolute difference.
2082   void sabd(const VRegister& vd,
2083             const VRegister& vn,
2084             const VRegister& vm);
2085 
2086   // Unsigned absolute difference and accumulate.
2087   void uaba(const VRegister& vd,
2088             const VRegister& vn,
2089             const VRegister& vm);
2090 
2091   // Signed absolute difference and accumulate.
2092   void saba(const VRegister& vd,
2093             const VRegister& vn,
2094             const VRegister& vm);
2095 
2096   // Add.
2097   void add(const VRegister& vd,
2098            const VRegister& vn,
2099            const VRegister& vm);
2100 
2101   // Subtract.
2102   void sub(const VRegister& vd,
2103            const VRegister& vn,
2104            const VRegister& vm);
2105 
2106   // Unsigned halving add.
2107   void uhadd(const VRegister& vd,
2108              const VRegister& vn,
2109              const VRegister& vm);
2110 
2111   // Signed halving add.
2112   void shadd(const VRegister& vd,
2113              const VRegister& vn,
2114              const VRegister& vm);
2115 
2116   // Unsigned rounding halving add.
2117   void urhadd(const VRegister& vd,
2118               const VRegister& vn,
2119               const VRegister& vm);
2120 
2121   // Signed rounding halving add.
2122   void srhadd(const VRegister& vd,
2123               const VRegister& vn,
2124               const VRegister& vm);
2125 
2126   // Unsigned halving sub.
2127   void uhsub(const VRegister& vd,
2128              const VRegister& vn,
2129              const VRegister& vm);
2130 
2131   // Signed halving sub.
2132   void shsub(const VRegister& vd,
2133              const VRegister& vn,
2134              const VRegister& vm);
2135 
2136   // Unsigned saturating add.
2137   void uqadd(const VRegister& vd,
2138              const VRegister& vn,
2139              const VRegister& vm);
2140 
2141   // Signed saturating add.
2142   void sqadd(const VRegister& vd,
2143              const VRegister& vn,
2144              const VRegister& vm);
2145 
2146   // Unsigned saturating subtract.
2147   void uqsub(const VRegister& vd,
2148              const VRegister& vn,
2149              const VRegister& vm);
2150 
2151   // Signed saturating subtract.
2152   void sqsub(const VRegister& vd,
2153              const VRegister& vn,
2154              const VRegister& vm);
2155 
2156   // Add pairwise.
2157   void addp(const VRegister& vd,
2158             const VRegister& vn,
2159             const VRegister& vm);
2160 
2161   // Add pair of elements scalar.
2162   void addp(const VRegister& vd,
2163             const VRegister& vn);
2164 
2165   // Multiply-add to accumulator.
2166   void mla(const VRegister& vd,
2167            const VRegister& vn,
2168            const VRegister& vm);
2169 
2170   // Multiply-subtract to accumulator.
2171   void mls(const VRegister& vd,
2172            const VRegister& vn,
2173            const VRegister& vm);
2174 
2175   // Multiply.
2176   void mul(const VRegister& vd,
2177            const VRegister& vn,
2178            const VRegister& vm);
2179 
2180   // Multiply by scalar element.
2181   void mul(const VRegister& vd,
2182            const VRegister& vn,
2183            const VRegister& vm,
2184            int vm_index);
2185 
2186   // Multiply-add by scalar element.
2187   void mla(const VRegister& vd,
2188            const VRegister& vn,
2189            const VRegister& vm,
2190            int vm_index);
2191 
2192   // Multiply-subtract by scalar element.
2193   void mls(const VRegister& vd,
2194            const VRegister& vn,
2195            const VRegister& vm,
2196            int vm_index);
2197 
2198   // Signed long multiply-add by scalar element.
2199   void smlal(const VRegister& vd,
2200              const VRegister& vn,
2201              const VRegister& vm,
2202              int vm_index);
2203 
2204   // Signed long multiply-add by scalar element (second part).
2205   void smlal2(const VRegister& vd,
2206               const VRegister& vn,
2207               const VRegister& vm,
2208               int vm_index);
2209 
2210   // Unsigned long multiply-add by scalar element.
2211   void umlal(const VRegister& vd,
2212              const VRegister& vn,
2213              const VRegister& vm,
2214              int vm_index);
2215 
2216   // Unsigned long multiply-add by scalar element (second part).
2217   void umlal2(const VRegister& vd,
2218               const VRegister& vn,
2219               const VRegister& vm,
2220               int vm_index);
2221 
2222   // Signed long multiply-sub by scalar element.
2223   void smlsl(const VRegister& vd,
2224              const VRegister& vn,
2225              const VRegister& vm,
2226              int vm_index);
2227 
2228   // Signed long multiply-sub by scalar element (second part).
2229   void smlsl2(const VRegister& vd,
2230               const VRegister& vn,
2231               const VRegister& vm,
2232               int vm_index);
2233 
2234   // Unsigned long multiply-sub by scalar element.
2235   void umlsl(const VRegister& vd,
2236              const VRegister& vn,
2237              const VRegister& vm,
2238              int vm_index);
2239 
2240   // Unsigned long multiply-sub by scalar element (second part).
2241   void umlsl2(const VRegister& vd,
2242               const VRegister& vn,
2243               const VRegister& vm,
2244               int vm_index);
2245 
2246   // Signed long multiply by scalar element.
2247   void smull(const VRegister& vd,
2248              const VRegister& vn,
2249              const VRegister& vm,
2250              int vm_index);
2251 
2252   // Signed long multiply by scalar element (second part).
2253   void smull2(const VRegister& vd,
2254               const VRegister& vn,
2255               const VRegister& vm,
2256               int vm_index);
2257 
2258   // Unsigned long multiply by scalar element.
2259   void umull(const VRegister& vd,
2260              const VRegister& vn,
2261              const VRegister& vm,
2262              int vm_index);
2263 
2264   // Unsigned long multiply by scalar element (second part).
2265   void umull2(const VRegister& vd,
2266               const VRegister& vn,
2267               const VRegister& vm,
2268               int vm_index);
2269 
2270   // Signed saturating double long multiply by element.
2271   void sqdmull(const VRegister& vd,
2272                const VRegister& vn,
2273                const VRegister& vm,
2274                int vm_index);
2275 
2276   // Signed saturating double long multiply by element (second part).
2277   void sqdmull2(const VRegister& vd,
2278                 const VRegister& vn,
2279                 const VRegister& vm,
2280                 int vm_index);
2281 
2282   // Signed saturating doubling long multiply-add by element.
2283   void sqdmlal(const VRegister& vd,
2284                const VRegister& vn,
2285                const VRegister& vm,
2286                int vm_index);
2287 
2288   // Signed saturating doubling long multiply-add by element (second part).
2289   void sqdmlal2(const VRegister& vd,
2290                 const VRegister& vn,
2291                 const VRegister& vm,
2292                 int vm_index);
2293 
2294   // Signed saturating doubling long multiply-sub by element.
2295   void sqdmlsl(const VRegister& vd,
2296                const VRegister& vn,
2297                const VRegister& vm,
2298                int vm_index);
2299 
2300   // Signed saturating doubling long multiply-sub by element (second part).
2301   void sqdmlsl2(const VRegister& vd,
2302                 const VRegister& vn,
2303                 const VRegister& vm,
2304                 int vm_index);
2305 
2306   // Compare equal.
2307   void cmeq(const VRegister& vd,
2308             const VRegister& vn,
2309             const VRegister& vm);
2310 
2311   // Compare signed greater than or equal.
2312   void cmge(const VRegister& vd,
2313             const VRegister& vn,
2314             const VRegister& vm);
2315 
2316   // Compare signed greater than.
2317   void cmgt(const VRegister& vd,
2318             const VRegister& vn,
2319             const VRegister& vm);
2320 
2321   // Compare unsigned higher.
2322   void cmhi(const VRegister& vd,
2323             const VRegister& vn,
2324             const VRegister& vm);
2325 
2326   // Compare unsigned higher or same.
2327   void cmhs(const VRegister& vd,
2328             const VRegister& vn,
2329             const VRegister& vm);
2330 
2331   // Compare bitwise test bits nonzero.
2332   void cmtst(const VRegister& vd,
2333              const VRegister& vn,
2334              const VRegister& vm);
2335 
2336   // Compare bitwise to zero.
2337   void cmeq(const VRegister& vd,
2338             const VRegister& vn,
2339             int value);
2340 
2341   // Compare signed greater than or equal to zero.
2342   void cmge(const VRegister& vd,
2343             const VRegister& vn,
2344             int value);
2345 
2346   // Compare signed greater than zero.
2347   void cmgt(const VRegister& vd,
2348             const VRegister& vn,
2349             int value);
2350 
2351   // Compare signed less than or equal to zero.
2352   void cmle(const VRegister& vd,
2353             const VRegister& vn,
2354             int value);
2355 
2356   // Compare signed less than zero.
2357   void cmlt(const VRegister& vd,
2358             const VRegister& vn,
2359             int value);
2360 
2361   // Signed shift left by register.
2362   void sshl(const VRegister& vd,
2363             const VRegister& vn,
2364             const VRegister& vm);
2365 
2366   // Unsigned shift left by register.
2367   void ushl(const VRegister& vd,
2368             const VRegister& vn,
2369             const VRegister& vm);
2370 
2371   // Signed saturating shift left by register.
2372   void sqshl(const VRegister& vd,
2373              const VRegister& vn,
2374              const VRegister& vm);
2375 
2376   // Unsigned saturating shift left by register.
2377   void uqshl(const VRegister& vd,
2378              const VRegister& vn,
2379              const VRegister& vm);
2380 
2381   // Signed rounding shift left by register.
2382   void srshl(const VRegister& vd,
2383              const VRegister& vn,
2384              const VRegister& vm);
2385 
2386   // Unsigned rounding shift left by register.
2387   void urshl(const VRegister& vd,
2388              const VRegister& vn,
2389              const VRegister& vm);
2390 
2391   // Signed saturating rounding shift left by register.
2392   void sqrshl(const VRegister& vd,
2393               const VRegister& vn,
2394               const VRegister& vm);
2395 
2396   // Unsigned saturating rounding shift left by register.
2397   void uqrshl(const VRegister& vd,
2398               const VRegister& vn,
2399               const VRegister& vm);
2400 
2401   // Bitwise and.
2402   void and_(const VRegister& vd,
2403             const VRegister& vn,
2404             const VRegister& vm);
2405 
2406   // Bitwise or.
2407   void orr(const VRegister& vd,
2408            const VRegister& vn,
2409            const VRegister& vm);
2410 
2411   // Bitwise or immediate.
2412   void orr(const VRegister& vd,
2413            const int imm8,
2414            const int left_shift = 0);
2415 
2416   // Move register to register.
2417   void mov(const VRegister& vd,
2418            const VRegister& vn);
2419 
2420   // Bitwise orn.
2421   void orn(const VRegister& vd,
2422            const VRegister& vn,
2423            const VRegister& vm);
2424 
2425   // Bitwise eor.
2426   void eor(const VRegister& vd,
2427            const VRegister& vn,
2428            const VRegister& vm);
2429 
2430   // Bit clear immediate.
2431   void bic(const VRegister& vd,
2432            const int imm8,
2433            const int left_shift = 0);
2434 
2435   // Bit clear.
2436   void bic(const VRegister& vd,
2437            const VRegister& vn,
2438            const VRegister& vm);
2439 
2440   // Bitwise insert if false.
2441   void bif(const VRegister& vd,
2442            const VRegister& vn,
2443            const VRegister& vm);
2444 
2445   // Bitwise insert if true.
2446   void bit(const VRegister& vd,
2447            const VRegister& vn,
2448            const VRegister& vm);
2449 
2450   // Bitwise select.
2451   void bsl(const VRegister& vd,
2452            const VRegister& vn,
2453            const VRegister& vm);
2454 
2455   // Polynomial multiply.
2456   void pmul(const VRegister& vd,
2457             const VRegister& vn,
2458             const VRegister& vm);
2459 
2460   // Vector move immediate.
2461   void movi(const VRegister& vd,
2462             const uint64_t imm,
2463             Shift shift = LSL,
2464             const int shift_amount = 0);
2465 
2466   // Bitwise not.
2467   void mvn(const VRegister& vd,
2468            const VRegister& vn);
2469 
2470   // Vector move inverted immediate.
2471   void mvni(const VRegister& vd,
2472             const int imm8,
2473             Shift shift = LSL,
2474             const int shift_amount = 0);
2475 
2476   // Signed saturating accumulate of unsigned value.
2477   void suqadd(const VRegister& vd,
2478               const VRegister& vn);
2479 
2480   // Unsigned saturating accumulate of signed value.
2481   void usqadd(const VRegister& vd,
2482               const VRegister& vn);
2483 
2484   // Absolute value.
2485   void abs(const VRegister& vd,
2486            const VRegister& vn);
2487 
2488   // Signed saturating absolute value.
2489   void sqabs(const VRegister& vd,
2490              const VRegister& vn);
2491 
2492   // Negate.
2493   void neg(const VRegister& vd,
2494            const VRegister& vn);
2495 
2496   // Signed saturating negate.
2497   void sqneg(const VRegister& vd,
2498              const VRegister& vn);
2499 
2500   // Bitwise not.
2501   void not_(const VRegister& vd,
2502             const VRegister& vn);
2503 
2504   // Extract narrow.
2505   void xtn(const VRegister& vd,
2506            const VRegister& vn);
2507 
2508   // Extract narrow (second part).
2509   void xtn2(const VRegister& vd,
2510             const VRegister& vn);
2511 
2512   // Signed saturating extract narrow.
2513   void sqxtn(const VRegister& vd,
2514              const VRegister& vn);
2515 
2516   // Signed saturating extract narrow (second part).
2517   void sqxtn2(const VRegister& vd,
2518               const VRegister& vn);
2519 
2520   // Unsigned saturating extract narrow.
2521   void uqxtn(const VRegister& vd,
2522              const VRegister& vn);
2523 
2524   // Unsigned saturating extract narrow (second part).
2525   void uqxtn2(const VRegister& vd,
2526               const VRegister& vn);
2527 
2528   // Signed saturating extract unsigned narrow.
2529   void sqxtun(const VRegister& vd,
2530               const VRegister& vn);
2531 
2532   // Signed saturating extract unsigned narrow (second part).
2533   void sqxtun2(const VRegister& vd,
2534                const VRegister& vn);
2535 
2536   // Extract vector from pair of vectors.
2537   void ext(const VRegister& vd,
2538            const VRegister& vn,
2539            const VRegister& vm,
2540            int index);
2541 
2542   // Duplicate vector element to vector or scalar.
2543   void dup(const VRegister& vd,
2544            const VRegister& vn,
2545            int vn_index);
2546 
2547   // Move vector element to scalar.
2548   void mov(const VRegister& vd,
2549            const VRegister& vn,
2550            int vn_index);
2551 
2552   // Duplicate general-purpose register to vector.
2553   void dup(const VRegister& vd,
2554            const Register& rn);
2555 
2556   // Insert vector element from another vector element.
2557   void ins(const VRegister& vd,
2558            int vd_index,
2559            const VRegister& vn,
2560            int vn_index);
2561 
2562   // Move vector element to another vector element.
2563   void mov(const VRegister& vd,
2564            int vd_index,
2565            const VRegister& vn,
2566            int vn_index);
2567 
2568   // Insert vector element from general-purpose register.
2569   void ins(const VRegister& vd,
2570            int vd_index,
2571            const Register& rn);
2572 
2573   // Move general-purpose register to a vector element.
2574   void mov(const VRegister& vd,
2575            int vd_index,
2576            const Register& rn);
2577 
2578   // Unsigned move vector element to general-purpose register.
2579   void umov(const Register& rd,
2580             const VRegister& vn,
2581             int vn_index);
2582 
2583   // Move vector element to general-purpose register.
2584   void mov(const Register& rd,
2585            const VRegister& vn,
2586            int vn_index);
2587 
2588   // Signed move vector element to general-purpose register.
2589   void smov(const Register& rd,
2590             const VRegister& vn,
2591             int vn_index);
2592 
2593   // One-element structure load to one register.
2594   void ld1(const VRegister& vt,
2595            const MemOperand& src);
2596 
2597   // One-element structure load to two registers.
2598   void ld1(const VRegister& vt,
2599            const VRegister& vt2,
2600            const MemOperand& src);
2601 
2602   // One-element structure load to three registers.
2603   void ld1(const VRegister& vt,
2604            const VRegister& vt2,
2605            const VRegister& vt3,
2606            const MemOperand& src);
2607 
2608   // One-element structure load to four registers.
2609   void ld1(const VRegister& vt,
2610            const VRegister& vt2,
2611            const VRegister& vt3,
2612            const VRegister& vt4,
2613            const MemOperand& src);
2614 
2615   // One-element single structure load to one lane.
2616   void ld1(const VRegister& vt,
2617            int lane,
2618            const MemOperand& src);
2619 
2620   // One-element single structure load to all lanes.
2621   void ld1r(const VRegister& vt,
2622             const MemOperand& src);
2623 
2624   // Two-element structure load.
2625   void ld2(const VRegister& vt,
2626            const VRegister& vt2,
2627            const MemOperand& src);
2628 
2629   // Two-element single structure load to one lane.
2630   void ld2(const VRegister& vt,
2631            const VRegister& vt2,
2632            int lane,
2633            const MemOperand& src);
2634 
2635   // Two-element single structure load to all lanes.
2636   void ld2r(const VRegister& vt,
2637             const VRegister& vt2,
2638             const MemOperand& src);
2639 
2640   // Three-element structure load.
2641   void ld3(const VRegister& vt,
2642            const VRegister& vt2,
2643            const VRegister& vt3,
2644            const MemOperand& src);
2645 
2646   // Three-element single structure load to one lane.
2647   void ld3(const VRegister& vt,
2648            const VRegister& vt2,
2649            const VRegister& vt3,
2650            int lane,
2651            const MemOperand& src);
2652 
2653   // Three-element single structure load to all lanes.
2654   void ld3r(const VRegister& vt,
2655             const VRegister& vt2,
2656             const VRegister& vt3,
2657             const MemOperand& src);
2658 
2659   // Four-element structure load.
2660   void ld4(const VRegister& vt,
2661            const VRegister& vt2,
2662            const VRegister& vt3,
2663            const VRegister& vt4,
2664            const MemOperand& src);
2665 
2666   // Four-element single structure load to one lane.
2667   void ld4(const VRegister& vt,
2668            const VRegister& vt2,
2669            const VRegister& vt3,
2670            const VRegister& vt4,
2671            int lane,
2672            const MemOperand& src);
2673 
2674   // Four-element single structure load to all lanes.
2675   void ld4r(const VRegister& vt,
2676             const VRegister& vt2,
2677             const VRegister& vt3,
2678             const VRegister& vt4,
2679             const MemOperand& src);
2680 
2681   // Count leading sign bits.
2682   void cls(const VRegister& vd,
2683            const VRegister& vn);
2684 
2685   // Count leading zero bits (vector).
2686   void clz(const VRegister& vd,
2687            const VRegister& vn);
2688 
2689   // Population count per byte.
2690   void cnt(const VRegister& vd,
2691            const VRegister& vn);
2692 
2693   // Reverse bit order.
2694   void rbit(const VRegister& vd,
2695             const VRegister& vn);
2696 
2697   // Reverse elements in 16-bit halfwords.
2698   void rev16(const VRegister& vd,
2699              const VRegister& vn);
2700 
2701   // Reverse elements in 32-bit words.
2702   void rev32(const VRegister& vd,
2703              const VRegister& vn);
2704 
2705   // Reverse elements in 64-bit doublewords.
2706   void rev64(const VRegister& vd,
2707              const VRegister& vn);
2708 
2709   // Unsigned reciprocal square root estimate.
2710   void ursqrte(const VRegister& vd,
2711                const VRegister& vn);
2712 
2713   // Unsigned reciprocal estimate.
2714   void urecpe(const VRegister& vd,
2715               const VRegister& vn);
2716 
2717   // Signed pairwise long add.
2718   void saddlp(const VRegister& vd,
2719               const VRegister& vn);
2720 
2721   // Unsigned pairwise long add.
2722   void uaddlp(const VRegister& vd,
2723               const VRegister& vn);
2724 
2725   // Signed pairwise long add and accumulate.
2726   void sadalp(const VRegister& vd,
2727               const VRegister& vn);
2728 
2729   // Unsigned pairwise long add and accumulate.
2730   void uadalp(const VRegister& vd,
2731               const VRegister& vn);
2732 
2733   // Shift left by immediate.
2734   void shl(const VRegister& vd,
2735            const VRegister& vn,
2736            int shift);
2737 
2738   // Signed saturating shift left by immediate.
2739   void sqshl(const VRegister& vd,
2740              const VRegister& vn,
2741              int shift);
2742 
2743   // Signed saturating shift left unsigned by immediate.
2744   void sqshlu(const VRegister& vd,
2745               const VRegister& vn,
2746               int shift);
2747 
2748   // Unsigned saturating shift left by immediate.
2749   void uqshl(const VRegister& vd,
2750              const VRegister& vn,
2751              int shift);
2752 
2753   // Signed shift left long by immediate.
2754   void sshll(const VRegister& vd,
2755              const VRegister& vn,
2756              int shift);
2757 
2758   // Signed shift left long by immediate (second part).
2759   void sshll2(const VRegister& vd,
2760               const VRegister& vn,
2761               int shift);
2762 
2763   // Signed extend long.
2764   void sxtl(const VRegister& vd,
2765             const VRegister& vn);
2766 
2767   // Signed extend long (second part).
2768   void sxtl2(const VRegister& vd,
2769              const VRegister& vn);
2770 
2771   // Unsigned shift left long by immediate.
2772   void ushll(const VRegister& vd,
2773              const VRegister& vn,
2774              int shift);
2775 
2776   // Unsigned shift left long by immediate (second part).
2777   void ushll2(const VRegister& vd,
2778               const VRegister& vn,
2779               int shift);
2780 
2781   // Shift left long by element size.
2782   void shll(const VRegister& vd,
2783             const VRegister& vn,
2784             int shift);
2785 
2786   // Shift left long by element size (second part).
2787   void shll2(const VRegister& vd,
2788              const VRegister& vn,
2789              int shift);
2790 
2791   // Unsigned extend long.
2792   void uxtl(const VRegister& vd,
2793             const VRegister& vn);
2794 
2795   // Unsigned extend long (second part).
2796   void uxtl2(const VRegister& vd,
2797              const VRegister& vn);
2798 
2799   // Shift left by immediate and insert.
2800   void sli(const VRegister& vd,
2801            const VRegister& vn,
2802            int shift);
2803 
2804   // Shift right by immediate and insert.
2805   void sri(const VRegister& vd,
2806            const VRegister& vn,
2807            int shift);
2808 
2809   // Signed maximum.
2810   void smax(const VRegister& vd,
2811             const VRegister& vn,
2812             const VRegister& vm);
2813 
2814   // Signed pairwise maximum.
2815   void smaxp(const VRegister& vd,
2816              const VRegister& vn,
2817              const VRegister& vm);
2818 
2819   // Add across vector.
2820   void addv(const VRegister& vd,
2821             const VRegister& vn);
2822 
2823   // Signed add long across vector.
2824   void saddlv(const VRegister& vd,
2825               const VRegister& vn);
2826 
2827   // Unsigned add long across vector.
2828   void uaddlv(const VRegister& vd,
2829               const VRegister& vn);
2830 
2831   // FP maximum number across vector.
2832   void fmaxnmv(const VRegister& vd,
2833                const VRegister& vn);
2834 
2835   // FP maximum across vector.
2836   void fmaxv(const VRegister& vd,
2837              const VRegister& vn);
2838 
2839   // FP minimum number across vector.
2840   void fminnmv(const VRegister& vd,
2841                const VRegister& vn);
2842 
2843   // FP minimum across vector.
2844   void fminv(const VRegister& vd,
2845              const VRegister& vn);
2846 
2847   // Signed maximum across vector.
2848   void smaxv(const VRegister& vd,
2849              const VRegister& vn);
2850 
2851   // Signed minimum.
2852   void smin(const VRegister& vd,
2853             const VRegister& vn,
2854             const VRegister& vm);
2855 
2856   // Signed minimum pairwise.
2857   void sminp(const VRegister& vd,
2858              const VRegister& vn,
2859              const VRegister& vm);
2860 
2861   // Signed minimum across vector.
2862   void sminv(const VRegister& vd,
2863              const VRegister& vn);
2864 
2865   // One-element structure store from one register.
2866   void st1(const VRegister& vt,
2867            const MemOperand& src);
2868 
2869   // One-element structure store from two registers.
2870   void st1(const VRegister& vt,
2871            const VRegister& vt2,
2872            const MemOperand& src);
2873 
2874   // One-element structure store from three registers.
2875   void st1(const VRegister& vt,
2876            const VRegister& vt2,
2877            const VRegister& vt3,
2878            const MemOperand& src);
2879 
2880   // One-element structure store from four registers.
2881   void st1(const VRegister& vt,
2882            const VRegister& vt2,
2883            const VRegister& vt3,
2884            const VRegister& vt4,
2885            const MemOperand& src);
2886 
2887   // One-element single structure store from one lane.
2888   void st1(const VRegister& vt,
2889            int lane,
2890            const MemOperand& src);
2891 
2892   // Two-element structure store from two registers.
2893   void st2(const VRegister& vt,
2894            const VRegister& vt2,
2895            const MemOperand& src);
2896 
2897   // Two-element single structure store from two lanes.
2898   void st2(const VRegister& vt,
2899            const VRegister& vt2,
2900            int lane,
2901            const MemOperand& src);
2902 
2903   // Three-element structure store from three registers.
2904   void st3(const VRegister& vt,
2905            const VRegister& vt2,
2906            const VRegister& vt3,
2907            const MemOperand& src);
2908 
2909   // Three-element single structure store from three lanes.
2910   void st3(const VRegister& vt,
2911            const VRegister& vt2,
2912            const VRegister& vt3,
2913            int lane,
2914            const MemOperand& src);
2915 
2916   // Four-element structure store from four registers.
2917   void st4(const VRegister& vt,
2918            const VRegister& vt2,
2919            const VRegister& vt3,
2920            const VRegister& vt4,
2921            const MemOperand& src);
2922 
2923   // Four-element single structure store from four lanes.
2924   void st4(const VRegister& vt,
2925            const VRegister& vt2,
2926            const VRegister& vt3,
2927            const VRegister& vt4,
2928            int lane,
2929            const MemOperand& src);
2930 
2931   // Unsigned add long.
2932   void uaddl(const VRegister& vd,
2933              const VRegister& vn,
2934              const VRegister& vm);
2935 
2936   // Unsigned add long (second part).
2937   void uaddl2(const VRegister& vd,
2938               const VRegister& vn,
2939               const VRegister& vm);
2940 
2941   // Unsigned add wide.
2942   void uaddw(const VRegister& vd,
2943              const VRegister& vn,
2944              const VRegister& vm);
2945 
2946   // Unsigned add wide (second part).
2947   void uaddw2(const VRegister& vd,
2948               const VRegister& vn,
2949               const VRegister& vm);
2950 
2951   // Signed add long.
2952   void saddl(const VRegister& vd,
2953              const VRegister& vn,
2954              const VRegister& vm);
2955 
2956   // Signed add long (second part).
2957   void saddl2(const VRegister& vd,
2958               const VRegister& vn,
2959               const VRegister& vm);
2960 
2961   // Signed add wide.
2962   void saddw(const VRegister& vd,
2963              const VRegister& vn,
2964              const VRegister& vm);
2965 
2966   // Signed add wide (second part).
2967   void saddw2(const VRegister& vd,
2968               const VRegister& vn,
2969               const VRegister& vm);
2970 
2971   // Unsigned subtract long.
2972   void usubl(const VRegister& vd,
2973              const VRegister& vn,
2974              const VRegister& vm);
2975 
2976   // Unsigned subtract long (second part).
2977   void usubl2(const VRegister& vd,
2978               const VRegister& vn,
2979               const VRegister& vm);
2980 
2981   // Unsigned subtract wide.
2982   void usubw(const VRegister& vd,
2983              const VRegister& vn,
2984              const VRegister& vm);
2985 
2986   // Unsigned subtract wide (second part).
2987   void usubw2(const VRegister& vd,
2988               const VRegister& vn,
2989               const VRegister& vm);
2990 
2991   // Signed subtract long.
2992   void ssubl(const VRegister& vd,
2993              const VRegister& vn,
2994              const VRegister& vm);
2995 
2996   // Signed subtract long (second part).
2997   void ssubl2(const VRegister& vd,
2998               const VRegister& vn,
2999               const VRegister& vm);
3000 
3001   // Signed integer subtract wide.
3002   void ssubw(const VRegister& vd,
3003              const VRegister& vn,
3004              const VRegister& vm);
3005 
3006   // Signed integer subtract wide (second part).
3007   void ssubw2(const VRegister& vd,
3008               const VRegister& vn,
3009               const VRegister& vm);
3010 
3011   // Unsigned maximum.
3012   void umax(const VRegister& vd,
3013             const VRegister& vn,
3014             const VRegister& vm);
3015 
3016   // Unsigned pairwise maximum.
3017   void umaxp(const VRegister& vd,
3018              const VRegister& vn,
3019              const VRegister& vm);
3020 
3021   // Unsigned maximum across vector.
3022   void umaxv(const VRegister& vd,
3023              const VRegister& vn);
3024 
3025   // Unsigned minimum.
3026   void umin(const VRegister& vd,
3027             const VRegister& vn,
3028             const VRegister& vm);
3029 
3030   // Unsigned pairwise minimum.
3031   void uminp(const VRegister& vd,
3032              const VRegister& vn,
3033              const VRegister& vm);
3034 
3035   // Unsigned minimum across vector.
3036   void uminv(const VRegister& vd,
3037              const VRegister& vn);
3038 
3039   // Transpose vectors (primary).
3040   void trn1(const VRegister& vd,
3041             const VRegister& vn,
3042             const VRegister& vm);
3043 
3044   // Transpose vectors (secondary).
3045   void trn2(const VRegister& vd,
3046             const VRegister& vn,
3047             const VRegister& vm);
3048 
3049   // Unzip vectors (primary).
3050   void uzp1(const VRegister& vd,
3051             const VRegister& vn,
3052             const VRegister& vm);
3053 
3054   // Unzip vectors (secondary).
3055   void uzp2(const VRegister& vd,
3056             const VRegister& vn,
3057             const VRegister& vm);
3058 
3059   // Zip vectors (primary).
3060   void zip1(const VRegister& vd,
3061             const VRegister& vn,
3062             const VRegister& vm);
3063 
3064   // Zip vectors (secondary).
3065   void zip2(const VRegister& vd,
3066             const VRegister& vn,
3067             const VRegister& vm);
3068 
3069   // Signed shift right by immediate.
3070   void sshr(const VRegister& vd,
3071             const VRegister& vn,
3072             int shift);
3073 
3074   // Unsigned shift right by immediate.
3075   void ushr(const VRegister& vd,
3076             const VRegister& vn,
3077             int shift);
3078 
3079   // Signed rounding shift right by immediate.
3080   void srshr(const VRegister& vd,
3081              const VRegister& vn,
3082              int shift);
3083 
3084   // Unsigned rounding shift right by immediate.
3085   void urshr(const VRegister& vd,
3086              const VRegister& vn,
3087              int shift);
3088 
3089   // Signed shift right by immediate and accumulate.
3090   void ssra(const VRegister& vd,
3091             const VRegister& vn,
3092             int shift);
3093 
3094   // Unsigned shift right by immediate and accumulate.
3095   void usra(const VRegister& vd,
3096             const VRegister& vn,
3097             int shift);
3098 
3099   // Signed rounding shift right by immediate and accumulate.
3100   void srsra(const VRegister& vd,
3101              const VRegister& vn,
3102              int shift);
3103 
3104   // Unsigned rounding shift right by immediate and accumulate.
3105   void ursra(const VRegister& vd,
3106              const VRegister& vn,
3107              int shift);
3108 
3109   // Shift right narrow by immediate.
3110   void shrn(const VRegister& vd,
3111             const VRegister& vn,
3112             int shift);
3113 
3114   // Shift right narrow by immediate (second part).
3115   void shrn2(const VRegister& vd,
3116              const VRegister& vn,
3117              int shift);
3118 
3119   // Rounding shift right narrow by immediate.
3120   void rshrn(const VRegister& vd,
3121              const VRegister& vn,
3122              int shift);
3123 
3124   // Rounding shift right narrow by immediate (second part).
3125   void rshrn2(const VRegister& vd,
3126               const VRegister& vn,
3127               int shift);
3128 
3129   // Unsigned saturating shift right narrow by immediate.
3130   void uqshrn(const VRegister& vd,
3131               const VRegister& vn,
3132               int shift);
3133 
3134   // Unsigned saturating shift right narrow by immediate (second part).
3135   void uqshrn2(const VRegister& vd,
3136                const VRegister& vn,
3137                int shift);
3138 
3139   // Unsigned saturating rounding shift right narrow by immediate.
3140   void uqrshrn(const VRegister& vd,
3141                const VRegister& vn,
3142                int shift);
3143 
3144   // Unsigned saturating rounding shift right narrow by immediate (second part).
3145   void uqrshrn2(const VRegister& vd,
3146                 const VRegister& vn,
3147                 int shift);
3148 
3149   // Signed saturating shift right narrow by immediate.
3150   void sqshrn(const VRegister& vd,
3151               const VRegister& vn,
3152               int shift);
3153 
3154   // Signed saturating shift right narrow by immediate (second part).
3155   void sqshrn2(const VRegister& vd,
3156                const VRegister& vn,
3157                int shift);
3158 
3159   // Signed saturating rounded shift right narrow by immediate.
3160   void sqrshrn(const VRegister& vd,
3161                const VRegister& vn,
3162                int shift);
3163 
3164   // Signed saturating rounded shift right narrow by immediate (second part).
3165   void sqrshrn2(const VRegister& vd,
3166                 const VRegister& vn,
3167                 int shift);
3168 
3169   // Signed saturating shift right unsigned narrow by immediate.
3170   void sqshrun(const VRegister& vd,
3171                const VRegister& vn,
3172                int shift);
3173 
3174   // Signed saturating shift right unsigned narrow by immediate (second part).
3175   void sqshrun2(const VRegister& vd,
3176                 const VRegister& vn,
3177                 int shift);
3178 
3179   // Signed sat rounded shift right unsigned narrow by immediate.
3180   void sqrshrun(const VRegister& vd,
3181                 const VRegister& vn,
3182                 int shift);
3183 
3184   // Signed sat rounded shift right unsigned narrow by immediate (second part).
3185   void sqrshrun2(const VRegister& vd,
3186                  const VRegister& vn,
3187                  int shift);
3188 
3189   // FP reciprocal step.
3190   void frecps(const VRegister& vd,
3191               const VRegister& vn,
3192               const VRegister& vm);
3193 
3194   // FP reciprocal estimate.
3195   void frecpe(const VRegister& vd,
3196               const VRegister& vn);
3197 
3198   // FP reciprocal square root estimate.
3199   void frsqrte(const VRegister& vd,
3200                const VRegister& vn);
3201 
3202   // FP reciprocal square root step.
3203   void frsqrts(const VRegister& vd,
3204                const VRegister& vn,
3205                const VRegister& vm);
3206 
3207   // Signed absolute difference and accumulate long.
3208   void sabal(const VRegister& vd,
3209              const VRegister& vn,
3210              const VRegister& vm);
3211 
3212   // Signed absolute difference and accumulate long (second part).
3213   void sabal2(const VRegister& vd,
3214               const VRegister& vn,
3215               const VRegister& vm);
3216 
3217   // Unsigned absolute difference and accumulate long.
3218   void uabal(const VRegister& vd,
3219              const VRegister& vn,
3220              const VRegister& vm);
3221 
3222   // Unsigned absolute difference and accumulate long (second part).
3223   void uabal2(const VRegister& vd,
3224               const VRegister& vn,
3225               const VRegister& vm);
3226 
3227   // Signed absolute difference long.
3228   void sabdl(const VRegister& vd,
3229              const VRegister& vn,
3230              const VRegister& vm);
3231 
3232   // Signed absolute difference long (second part).
3233   void sabdl2(const VRegister& vd,
3234               const VRegister& vn,
3235               const VRegister& vm);
3236 
3237   // Unsigned absolute difference long.
3238   void uabdl(const VRegister& vd,
3239              const VRegister& vn,
3240              const VRegister& vm);
3241 
3242   // Unsigned absolute difference long (second part).
3243   void uabdl2(const VRegister& vd,
3244               const VRegister& vn,
3245               const VRegister& vm);
3246 
3247   // Polynomial multiply long.
3248   void pmull(const VRegister& vd,
3249              const VRegister& vn,
3250              const VRegister& vm);
3251 
3252   // Polynomial multiply long (second part).
3253   void pmull2(const VRegister& vd,
3254               const VRegister& vn,
3255               const VRegister& vm);
3256 
3257   // Signed long multiply-add.
3258   void smlal(const VRegister& vd,
3259              const VRegister& vn,
3260              const VRegister& vm);
3261 
3262   // Signed long multiply-add (second part).
3263   void smlal2(const VRegister& vd,
3264               const VRegister& vn,
3265               const VRegister& vm);
3266 
3267   // Unsigned long multiply-add.
3268   void umlal(const VRegister& vd,
3269              const VRegister& vn,
3270              const VRegister& vm);
3271 
3272   // Unsigned long multiply-add (second part).
3273   void umlal2(const VRegister& vd,
3274               const VRegister& vn,
3275               const VRegister& vm);
3276 
3277   // Signed long multiply-sub.
3278   void smlsl(const VRegister& vd,
3279              const VRegister& vn,
3280              const VRegister& vm);
3281 
3282   // Signed long multiply-sub (second part).
3283   void smlsl2(const VRegister& vd,
3284               const VRegister& vn,
3285               const VRegister& vm);
3286 
3287   // Unsigned long multiply-sub.
3288   void umlsl(const VRegister& vd,
3289              const VRegister& vn,
3290              const VRegister& vm);
3291 
3292   // Unsigned long multiply-sub (second part).
3293   void umlsl2(const VRegister& vd,
3294               const VRegister& vn,
3295               const VRegister& vm);
3296 
3297   // Signed long multiply.
3298   void smull(const VRegister& vd,
3299              const VRegister& vn,
3300              const VRegister& vm);
3301 
3302   // Signed long multiply (second part).
3303   void smull2(const VRegister& vd,
3304               const VRegister& vn,
3305               const VRegister& vm);
3306 
3307   // Signed saturating doubling long multiply-add.
3308   void sqdmlal(const VRegister& vd,
3309                const VRegister& vn,
3310                const VRegister& vm);
3311 
3312   // Signed saturating doubling long multiply-add (second part).
3313   void sqdmlal2(const VRegister& vd,
3314                 const VRegister& vn,
3315                 const VRegister& vm);
3316 
3317   // Signed saturating doubling long multiply-subtract.
3318   void sqdmlsl(const VRegister& vd,
3319                const VRegister& vn,
3320                const VRegister& vm);
3321 
3322   // Signed saturating doubling long multiply-subtract (second part).
3323   void sqdmlsl2(const VRegister& vd,
3324                 const VRegister& vn,
3325                 const VRegister& vm);
3326 
3327   // Signed saturating doubling long multiply.
3328   void sqdmull(const VRegister& vd,
3329                const VRegister& vn,
3330                const VRegister& vm);
3331 
3332   // Signed saturating doubling long multiply (second part).
3333   void sqdmull2(const VRegister& vd,
3334                 const VRegister& vn,
3335                 const VRegister& vm);
3336 
3337   // Signed saturating doubling multiply returning high half.
3338   void sqdmulh(const VRegister& vd,
3339                const VRegister& vn,
3340                const VRegister& vm);
3341 
3342   // Signed saturating rounding doubling multiply returning high half.
3343   void sqrdmulh(const VRegister& vd,
3344                 const VRegister& vn,
3345                 const VRegister& vm);
3346 
3347   // Signed saturating doubling multiply element returning high half.
3348   void sqdmulh(const VRegister& vd,
3349                const VRegister& vn,
3350                const VRegister& vm,
3351                int vm_index);
3352 
3353   // Signed saturating rounding doubling multiply element returning high half.
3354   void sqrdmulh(const VRegister& vd,
3355                 const VRegister& vn,
3356                 const VRegister& vm,
3357                 int vm_index);
3358 
3359   // Unsigned long multiply long.
3360   void umull(const VRegister& vd,
3361              const VRegister& vn,
3362              const VRegister& vm);
3363 
3364   // Unsigned long multiply (second part).
3365   void umull2(const VRegister& vd,
3366               const VRegister& vn,
3367               const VRegister& vm);
3368 
3369   // Add narrow returning high half.
3370   void addhn(const VRegister& vd,
3371              const VRegister& vn,
3372              const VRegister& vm);
3373 
3374   // Add narrow returning high half (second part).
3375   void addhn2(const VRegister& vd,
3376               const VRegister& vn,
3377               const VRegister& vm);
3378 
3379   // Rounding add narrow returning high half.
3380   void raddhn(const VRegister& vd,
3381               const VRegister& vn,
3382               const VRegister& vm);
3383 
3384   // Rounding add narrow returning high half (second part).
3385   void raddhn2(const VRegister& vd,
3386                const VRegister& vn,
3387                const VRegister& vm);
3388 
3389   // Subtract narrow returning high half.
3390   void subhn(const VRegister& vd,
3391              const VRegister& vn,
3392              const VRegister& vm);
3393 
3394   // Subtract narrow returning high half (second part).
3395   void subhn2(const VRegister& vd,
3396               const VRegister& vn,
3397               const VRegister& vm);
3398 
3399   // Rounding subtract narrow returning high half.
3400   void rsubhn(const VRegister& vd,
3401               const VRegister& vn,
3402               const VRegister& vm);
3403 
3404   // Rounding subtract narrow returning high half (second part).
3405   void rsubhn2(const VRegister& vd,
3406                const VRegister& vn,
3407                const VRegister& vm);
3408 
3409   // FP vector multiply accumulate.
3410   void fmla(const VRegister& vd,
3411             const VRegister& vn,
3412             const VRegister& vm);
3413 
3414   // FP vector multiply subtract.
3415   void fmls(const VRegister& vd,
3416             const VRegister& vn,
3417             const VRegister& vm);
3418 
3419   // FP vector multiply extended.
3420   void fmulx(const VRegister& vd,
3421              const VRegister& vn,
3422              const VRegister& vm);
3423 
3424   // FP absolute greater than or equal.
3425   void facge(const VRegister& vd,
3426              const VRegister& vn,
3427              const VRegister& vm);
3428 
3429   // FP absolute greater than.
3430   void facgt(const VRegister& vd,
3431              const VRegister& vn,
3432              const VRegister& vm);
3433 
3434   // FP multiply by element.
3435   void fmul(const VRegister& vd,
3436             const VRegister& vn,
3437             const VRegister& vm,
3438             int vm_index);
3439 
3440   // FP fused multiply-add to accumulator by element.
3441   void fmla(const VRegister& vd,
3442             const VRegister& vn,
3443             const VRegister& vm,
3444             int vm_index);
3445 
3446   // FP fused multiply-sub from accumulator by element.
3447   void fmls(const VRegister& vd,
3448             const VRegister& vn,
3449             const VRegister& vm,
3450             int vm_index);
3451 
3452   // FP multiply extended by element.
3453   void fmulx(const VRegister& vd,
3454              const VRegister& vn,
3455              const VRegister& vm,
3456              int vm_index);
3457 
3458   // FP compare equal.
3459   void fcmeq(const VRegister& vd,
3460              const VRegister& vn,
3461              const VRegister& vm);
3462 
3463   // FP greater than.
3464   void fcmgt(const VRegister& vd,
3465              const VRegister& vn,
3466              const VRegister& vm);
3467 
3468   // FP greater than or equal.
3469   void fcmge(const VRegister& vd,
3470              const VRegister& vn,
3471              const VRegister& vm);
3472 
3473   // FP compare equal to zero.
3474   void fcmeq(const VRegister& vd,
3475              const VRegister& vn,
3476              double imm);
3477 
3478   // FP greater than zero.
3479   void fcmgt(const VRegister& vd,
3480              const VRegister& vn,
3481              double imm);
3482 
3483   // FP greater than or equal to zero.
3484   void fcmge(const VRegister& vd,
3485              const VRegister& vn,
3486              double imm);
3487 
3488   // FP less than or equal to zero.
3489   void fcmle(const VRegister& vd,
3490              const VRegister& vn,
3491              double imm);
3492 
3493   // FP less than to zero.
3494   void fcmlt(const VRegister& vd,
3495              const VRegister& vn,
3496              double imm);
3497 
3498   // FP absolute difference.
3499   void fabd(const VRegister& vd,
3500             const VRegister& vn,
3501             const VRegister& vm);
3502 
3503   // FP pairwise add vector.
3504   void faddp(const VRegister& vd,
3505              const VRegister& vn,
3506              const VRegister& vm);
3507 
3508   // FP pairwise add scalar.
3509   void faddp(const VRegister& vd,
3510              const VRegister& vn);
3511 
3512   // FP pairwise maximum vector.
3513   void fmaxp(const VRegister& vd,
3514              const VRegister& vn,
3515              const VRegister& vm);
3516 
3517   // FP pairwise maximum scalar.
3518   void fmaxp(const VRegister& vd,
3519              const VRegister& vn);
3520 
3521   // FP pairwise minimum vector.
3522   void fminp(const VRegister& vd,
3523              const VRegister& vn,
3524              const VRegister& vm);
3525 
3526   // FP pairwise minimum scalar.
3527   void fminp(const VRegister& vd,
3528              const VRegister& vn);
3529 
3530   // FP pairwise maximum number vector.
3531   void fmaxnmp(const VRegister& vd,
3532                const VRegister& vn,
3533                const VRegister& vm);
3534 
3535   // FP pairwise maximum number scalar.
3536   void fmaxnmp(const VRegister& vd,
3537                const VRegister& vn);
3538 
3539   // FP pairwise minimum number vector.
3540   void fminnmp(const VRegister& vd,
3541                const VRegister& vn,
3542                const VRegister& vm);
3543 
3544   // FP pairwise minimum number scalar.
3545   void fminnmp(const VRegister& vd,
3546                const VRegister& vn);
3547 
3548   // Emit generic instructions.
3549   // Emit raw instructions into the instruction stream.
dci(Instr raw_inst)3550   void dci(Instr raw_inst) { Emit(raw_inst); }
3551 
3552   // Emit 32 bits of data into the instruction stream.
dc32(uint32_t data)3553   void dc32(uint32_t data) {
3554     EmitData(&data, sizeof(data));
3555   }
3556 
3557   // Emit 64 bits of data into the instruction stream.
dc64(uint64_t data)3558   void dc64(uint64_t data) {
3559     EmitData(&data, sizeof(data));
3560   }
3561 
3562   // Code generation helpers.
3563 
3564   // Register encoding.
Rd(CPURegister rd)3565   static Instr Rd(CPURegister rd) {
3566     VIXL_ASSERT(rd.code() != kSPRegInternalCode);
3567     return rd.code() << Rd_offset;
3568   }
3569 
Rn(CPURegister rn)3570   static Instr Rn(CPURegister rn) {
3571     VIXL_ASSERT(rn.code() != kSPRegInternalCode);
3572     return rn.code() << Rn_offset;
3573   }
3574 
Rm(CPURegister rm)3575   static Instr Rm(CPURegister rm) {
3576     VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3577     return rm.code() << Rm_offset;
3578   }
3579 
RmNot31(CPURegister rm)3580   static Instr RmNot31(CPURegister rm) {
3581     VIXL_ASSERT(rm.code() != kSPRegInternalCode);
3582     VIXL_ASSERT(!rm.IsZero());
3583     return Rm(rm);
3584   }
3585 
Ra(CPURegister ra)3586   static Instr Ra(CPURegister ra) {
3587     VIXL_ASSERT(ra.code() != kSPRegInternalCode);
3588     return ra.code() << Ra_offset;
3589   }
3590 
Rt(CPURegister rt)3591   static Instr Rt(CPURegister rt) {
3592     VIXL_ASSERT(rt.code() != kSPRegInternalCode);
3593     return rt.code() << Rt_offset;
3594   }
3595 
Rt2(CPURegister rt2)3596   static Instr Rt2(CPURegister rt2) {
3597     VIXL_ASSERT(rt2.code() != kSPRegInternalCode);
3598     return rt2.code() << Rt2_offset;
3599   }
3600 
Rs(CPURegister rs)3601   static Instr Rs(CPURegister rs) {
3602     VIXL_ASSERT(rs.code() != kSPRegInternalCode);
3603     return rs.code() << Rs_offset;
3604   }
3605 
3606   // These encoding functions allow the stack pointer to be encoded, and
3607   // disallow the zero register.
RdSP(Register rd)3608   static Instr RdSP(Register rd) {
3609     VIXL_ASSERT(!rd.IsZero());
3610     return (rd.code() & kRegCodeMask) << Rd_offset;
3611   }
3612 
RnSP(Register rn)3613   static Instr RnSP(Register rn) {
3614     VIXL_ASSERT(!rn.IsZero());
3615     return (rn.code() & kRegCodeMask) << Rn_offset;
3616   }
3617 
3618   // Flags encoding.
Flags(FlagsUpdate S)3619   static Instr Flags(FlagsUpdate S) {
3620     if (S == SetFlags) {
3621       return 1 << FlagsUpdate_offset;
3622     } else if (S == LeaveFlags) {
3623       return 0 << FlagsUpdate_offset;
3624     }
3625     VIXL_UNREACHABLE();
3626     return 0;
3627   }
3628 
Cond(Condition cond)3629   static Instr Cond(Condition cond) {
3630     return cond << Condition_offset;
3631   }
3632 
3633   // PC-relative address encoding.
ImmPCRelAddress(int imm21)3634   static Instr ImmPCRelAddress(int imm21) {
3635     VIXL_ASSERT(IsInt21(imm21));
3636     Instr imm = static_cast<Instr>(TruncateToUint21(imm21));
3637     Instr immhi = (imm >> ImmPCRelLo_width) << ImmPCRelHi_offset;
3638     Instr immlo = imm << ImmPCRelLo_offset;
3639     return (immhi & ImmPCRelHi_mask) | (immlo & ImmPCRelLo_mask);
3640   }
3641 
3642   // Branch encoding.
ImmUncondBranch(int imm26)3643   static Instr ImmUncondBranch(int imm26) {
3644     VIXL_ASSERT(IsInt26(imm26));
3645     return TruncateToUint26(imm26) << ImmUncondBranch_offset;
3646   }
3647 
ImmCondBranch(int imm19)3648   static Instr ImmCondBranch(int imm19) {
3649     VIXL_ASSERT(IsInt19(imm19));
3650     return TruncateToUint19(imm19) << ImmCondBranch_offset;
3651   }
3652 
ImmCmpBranch(int imm19)3653   static Instr ImmCmpBranch(int imm19) {
3654     VIXL_ASSERT(IsInt19(imm19));
3655     return TruncateToUint19(imm19) << ImmCmpBranch_offset;
3656   }
3657 
ImmTestBranch(int imm14)3658   static Instr ImmTestBranch(int imm14) {
3659     VIXL_ASSERT(IsInt14(imm14));
3660     return TruncateToUint14(imm14) << ImmTestBranch_offset;
3661   }
3662 
ImmTestBranchBit(unsigned bit_pos)3663   static Instr ImmTestBranchBit(unsigned bit_pos) {
3664     VIXL_ASSERT(IsUint6(bit_pos));
3665     // Subtract five from the shift offset, as we need bit 5 from bit_pos.
3666     unsigned b5 = bit_pos << (ImmTestBranchBit5_offset - 5);
3667     unsigned b40 = bit_pos << ImmTestBranchBit40_offset;
3668     b5 &= ImmTestBranchBit5_mask;
3669     b40 &= ImmTestBranchBit40_mask;
3670     return b5 | b40;
3671   }
3672 
3673   // Data Processing encoding.
SF(Register rd)3674   static Instr SF(Register rd) {
3675       return rd.Is64Bits() ? SixtyFourBits : ThirtyTwoBits;
3676   }
3677 
ImmAddSub(int imm)3678   static Instr ImmAddSub(int imm) {
3679     VIXL_ASSERT(IsImmAddSub(imm));
3680     if (IsUint12(imm)) {  // No shift required.
3681       imm <<= ImmAddSub_offset;
3682     } else {
3683       imm = ((imm >> 12) << ImmAddSub_offset) | (1 << ShiftAddSub_offset);
3684     }
3685     return imm;
3686   }
3687 
ImmS(unsigned imms,unsigned reg_size)3688   static Instr ImmS(unsigned imms, unsigned reg_size) {
3689     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(imms)) ||
3690            ((reg_size == kWRegSize) && IsUint5(imms)));
3691     USE(reg_size);
3692     return imms << ImmS_offset;
3693   }
3694 
ImmR(unsigned immr,unsigned reg_size)3695   static Instr ImmR(unsigned immr, unsigned reg_size) {
3696     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
3697            ((reg_size == kWRegSize) && IsUint5(immr)));
3698     USE(reg_size);
3699     VIXL_ASSERT(IsUint6(immr));
3700     return immr << ImmR_offset;
3701   }
3702 
ImmSetBits(unsigned imms,unsigned reg_size)3703   static Instr ImmSetBits(unsigned imms, unsigned reg_size) {
3704     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3705     VIXL_ASSERT(IsUint6(imms));
3706     VIXL_ASSERT((reg_size == kXRegSize) || IsUint6(imms + 3));
3707     USE(reg_size);
3708     return imms << ImmSetBits_offset;
3709   }
3710 
ImmRotate(unsigned immr,unsigned reg_size)3711   static Instr ImmRotate(unsigned immr, unsigned reg_size) {
3712     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3713     VIXL_ASSERT(((reg_size == kXRegSize) && IsUint6(immr)) ||
3714            ((reg_size == kWRegSize) && IsUint5(immr)));
3715     USE(reg_size);
3716     return immr << ImmRotate_offset;
3717   }
3718 
ImmLLiteral(int imm19)3719   static Instr ImmLLiteral(int imm19) {
3720     VIXL_ASSERT(IsInt19(imm19));
3721     return TruncateToUint19(imm19) << ImmLLiteral_offset;
3722   }
3723 
BitN(unsigned bitn,unsigned reg_size)3724   static Instr BitN(unsigned bitn, unsigned reg_size) {
3725     VIXL_ASSERT((reg_size == kWRegSize) || (reg_size == kXRegSize));
3726     VIXL_ASSERT((reg_size == kXRegSize) || (bitn == 0));
3727     USE(reg_size);
3728     return bitn << BitN_offset;
3729   }
3730 
ShiftDP(Shift shift)3731   static Instr ShiftDP(Shift shift) {
3732     VIXL_ASSERT(shift == LSL || shift == LSR || shift == ASR || shift == ROR);
3733     return shift << ShiftDP_offset;
3734   }
3735 
ImmDPShift(unsigned amount)3736   static Instr ImmDPShift(unsigned amount) {
3737     VIXL_ASSERT(IsUint6(amount));
3738     return amount << ImmDPShift_offset;
3739   }
3740 
ExtendMode(Extend extend)3741   static Instr ExtendMode(Extend extend) {
3742     return extend << ExtendMode_offset;
3743   }
3744 
ImmExtendShift(unsigned left_shift)3745   static Instr ImmExtendShift(unsigned left_shift) {
3746     VIXL_ASSERT(left_shift <= 4);
3747     return left_shift << ImmExtendShift_offset;
3748   }
3749 
ImmCondCmp(unsigned imm)3750   static Instr ImmCondCmp(unsigned imm) {
3751     VIXL_ASSERT(IsUint5(imm));
3752     return imm << ImmCondCmp_offset;
3753   }
3754 
Nzcv(StatusFlags nzcv)3755   static Instr Nzcv(StatusFlags nzcv) {
3756     return ((nzcv >> Flags_offset) & 0xf) << Nzcv_offset;
3757   }
3758 
3759   // MemOperand offset encoding.
ImmLSUnsigned(int imm12)3760   static Instr ImmLSUnsigned(int imm12) {
3761     VIXL_ASSERT(IsUint12(imm12));
3762     return imm12 << ImmLSUnsigned_offset;
3763   }
3764 
ImmLS(int imm9)3765   static Instr ImmLS(int imm9) {
3766     VIXL_ASSERT(IsInt9(imm9));
3767     return TruncateToUint9(imm9) << ImmLS_offset;
3768   }
3769 
ImmLSPair(int imm7,unsigned access_size)3770   static Instr ImmLSPair(int imm7, unsigned access_size) {
3771     VIXL_ASSERT(((imm7 >> access_size) << access_size) == imm7);
3772     int scaled_imm7 = imm7 >> access_size;
3773     VIXL_ASSERT(IsInt7(scaled_imm7));
3774     return TruncateToUint7(scaled_imm7) << ImmLSPair_offset;
3775   }
3776 
ImmShiftLS(unsigned shift_amount)3777   static Instr ImmShiftLS(unsigned shift_amount) {
3778     VIXL_ASSERT(IsUint1(shift_amount));
3779     return shift_amount << ImmShiftLS_offset;
3780   }
3781 
ImmPrefetchOperation(int imm5)3782   static Instr ImmPrefetchOperation(int imm5) {
3783     VIXL_ASSERT(IsUint5(imm5));
3784     return imm5 << ImmPrefetchOperation_offset;
3785   }
3786 
ImmException(int imm16)3787   static Instr ImmException(int imm16) {
3788     VIXL_ASSERT(IsUint16(imm16));
3789     return imm16 << ImmException_offset;
3790   }
3791 
ImmSystemRegister(int imm15)3792   static Instr ImmSystemRegister(int imm15) {
3793     VIXL_ASSERT(IsUint15(imm15));
3794     return imm15 << ImmSystemRegister_offset;
3795   }
3796 
ImmHint(int imm7)3797   static Instr ImmHint(int imm7) {
3798     VIXL_ASSERT(IsUint7(imm7));
3799     return imm7 << ImmHint_offset;
3800   }
3801 
CRm(int imm4)3802   static Instr CRm(int imm4) {
3803     VIXL_ASSERT(IsUint4(imm4));
3804     return imm4 << CRm_offset;
3805   }
3806 
CRn(int imm4)3807   static Instr CRn(int imm4) {
3808     VIXL_ASSERT(IsUint4(imm4));
3809     return imm4 << CRn_offset;
3810   }
3811 
SysOp(int imm14)3812   static Instr SysOp(int imm14) {
3813     VIXL_ASSERT(IsUint14(imm14));
3814     return imm14 << SysOp_offset;
3815   }
3816 
ImmSysOp1(int imm3)3817   static Instr ImmSysOp1(int imm3) {
3818     VIXL_ASSERT(IsUint3(imm3));
3819     return imm3 << SysOp1_offset;
3820   }
3821 
ImmSysOp2(int imm3)3822   static Instr ImmSysOp2(int imm3) {
3823     VIXL_ASSERT(IsUint3(imm3));
3824     return imm3 << SysOp2_offset;
3825   }
3826 
ImmBarrierDomain(int imm2)3827   static Instr ImmBarrierDomain(int imm2) {
3828     VIXL_ASSERT(IsUint2(imm2));
3829     return imm2 << ImmBarrierDomain_offset;
3830   }
3831 
ImmBarrierType(int imm2)3832   static Instr ImmBarrierType(int imm2) {
3833     VIXL_ASSERT(IsUint2(imm2));
3834     return imm2 << ImmBarrierType_offset;
3835   }
3836 
3837   // Move immediates encoding.
ImmMoveWide(uint64_t imm)3838   static Instr ImmMoveWide(uint64_t imm) {
3839     VIXL_ASSERT(IsUint16(imm));
3840     return static_cast<Instr>(imm << ImmMoveWide_offset);
3841   }
3842 
ShiftMoveWide(int64_t shift)3843   static Instr ShiftMoveWide(int64_t shift) {
3844     VIXL_ASSERT(IsUint2(shift));
3845     return static_cast<Instr>(shift << ShiftMoveWide_offset);
3846   }
3847 
3848   // FP Immediates.
3849   static Instr ImmFP32(float imm);
3850   static Instr ImmFP64(double imm);
3851 
3852   // FP register type.
FPType(FPRegister fd)3853   static Instr FPType(FPRegister fd) {
3854     return fd.Is64Bits() ? FP64 : FP32;
3855   }
3856 
FPScale(unsigned scale)3857   static Instr FPScale(unsigned scale) {
3858     VIXL_ASSERT(IsUint6(scale));
3859     return scale << FPScale_offset;
3860   }
3861 
3862   // Immediate field checking helpers.
3863   static bool IsImmAddSub(int64_t immediate);
3864   static bool IsImmConditionalCompare(int64_t immediate);
3865   static bool IsImmFP32(float imm);
3866   static bool IsImmFP64(double imm);
3867   static bool IsImmLogical(uint64_t value,
3868                            unsigned width,
3869                            unsigned* n = NULL,
3870                            unsigned* imm_s = NULL,
3871                            unsigned* imm_r = NULL);
3872   static bool IsImmLSPair(int64_t offset, unsigned access_size);
3873   static bool IsImmLSScaled(int64_t offset, unsigned access_size);
3874   static bool IsImmLSUnscaled(int64_t offset);
3875   static bool IsImmMovn(uint64_t imm, unsigned reg_size);
3876   static bool IsImmMovz(uint64_t imm, unsigned reg_size);
3877 
3878   // Instruction bits for vector format in data processing operations.
VFormat(VRegister vd)3879   static Instr VFormat(VRegister vd) {
3880     if (vd.Is64Bits()) {
3881       switch (vd.lanes()) {
3882         case 2: return NEON_2S;
3883         case 4: return NEON_4H;
3884         case 8: return NEON_8B;
3885         default: return 0xffffffff;
3886       }
3887     } else {
3888       VIXL_ASSERT(vd.Is128Bits());
3889       switch (vd.lanes()) {
3890         case 2: return NEON_2D;
3891         case 4: return NEON_4S;
3892         case 8: return NEON_8H;
3893         case 16: return NEON_16B;
3894         default: return 0xffffffff;
3895       }
3896     }
3897   }
3898 
3899   // Instruction bits for vector format in floating point data processing
3900   // operations.
FPFormat(VRegister vd)3901   static Instr FPFormat(VRegister vd) {
3902     if (vd.lanes() == 1) {
3903       // Floating point scalar formats.
3904       VIXL_ASSERT(vd.Is32Bits() || vd.Is64Bits());
3905       return vd.Is64Bits() ? FP64 : FP32;
3906     }
3907 
3908     // Two lane floating point vector formats.
3909     if (vd.lanes() == 2) {
3910       VIXL_ASSERT(vd.Is64Bits() || vd.Is128Bits());
3911       return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S;
3912     }
3913 
3914     // Four lane floating point vector format.
3915     VIXL_ASSERT((vd.lanes() == 4) && vd.Is128Bits());
3916     return NEON_FP_4S;
3917   }
3918 
3919   // Instruction bits for vector format in load and store operations.
LSVFormat(VRegister vd)3920   static Instr LSVFormat(VRegister vd) {
3921     if (vd.Is64Bits()) {
3922       switch (vd.lanes()) {
3923         case 1: return LS_NEON_1D;
3924         case 2: return LS_NEON_2S;
3925         case 4: return LS_NEON_4H;
3926         case 8: return LS_NEON_8B;
3927         default: return 0xffffffff;
3928       }
3929     } else {
3930       VIXL_ASSERT(vd.Is128Bits());
3931       switch (vd.lanes()) {
3932         case 2: return LS_NEON_2D;
3933         case 4: return LS_NEON_4S;
3934         case 8: return LS_NEON_8H;
3935         case 16: return LS_NEON_16B;
3936         default: return 0xffffffff;
3937       }
3938     }
3939   }
3940 
3941   // Instruction bits for scalar format in data processing operations.
SFormat(VRegister vd)3942   static Instr SFormat(VRegister vd) {
3943     VIXL_ASSERT(vd.lanes() == 1);
3944     switch (vd.SizeInBytes()) {
3945       case 1: return NEON_B;
3946       case 2: return NEON_H;
3947       case 4: return NEON_S;
3948       case 8: return NEON_D;
3949       default: return 0xffffffff;
3950     }
3951   }
3952 
ImmNEONHLM(int index,int num_bits)3953   static Instr ImmNEONHLM(int index, int num_bits) {
3954     int h, l, m;
3955     if (num_bits == 3) {
3956       VIXL_ASSERT(IsUint3(index));
3957       h  = (index >> 2) & 1;
3958       l  = (index >> 1) & 1;
3959       m  = (index >> 0) & 1;
3960     } else if (num_bits == 2) {
3961       VIXL_ASSERT(IsUint2(index));
3962       h  = (index >> 1) & 1;
3963       l  = (index >> 0) & 1;
3964       m  = 0;
3965     } else {
3966       VIXL_ASSERT(IsUint1(index) && (num_bits == 1));
3967       h  = (index >> 0) & 1;
3968       l  = 0;
3969       m  = 0;
3970     }
3971     return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset);
3972   }
3973 
ImmNEONExt(int imm4)3974   static Instr ImmNEONExt(int imm4) {
3975     VIXL_ASSERT(IsUint4(imm4));
3976     return imm4 << ImmNEONExt_offset;
3977   }
3978 
ImmNEON5(Instr format,int index)3979   static Instr ImmNEON5(Instr format, int index) {
3980     VIXL_ASSERT(IsUint4(index));
3981     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
3982     int imm5 = (index << (s + 1)) | (1 << s);
3983     return imm5 << ImmNEON5_offset;
3984   }
3985 
ImmNEON4(Instr format,int index)3986   static Instr ImmNEON4(Instr format, int index) {
3987     VIXL_ASSERT(IsUint4(index));
3988     int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format));
3989     int imm4 = index << s;
3990     return imm4 << ImmNEON4_offset;
3991   }
3992 
ImmNEONabcdefgh(int imm8)3993   static Instr ImmNEONabcdefgh(int imm8) {
3994     VIXL_ASSERT(IsUint8(imm8));
3995     Instr instr;
3996     instr  = ((imm8 >> 5) & 7) << ImmNEONabc_offset;
3997     instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset;
3998     return instr;
3999   }
4000 
NEONCmode(int cmode)4001   static Instr NEONCmode(int cmode) {
4002     VIXL_ASSERT(IsUint4(cmode));
4003     return cmode << NEONCmode_offset;
4004   }
4005 
NEONModImmOp(int op)4006   static Instr NEONModImmOp(int op) {
4007     VIXL_ASSERT(IsUint1(op));
4008     return op << NEONModImmOp_offset;
4009   }
4010 
size()4011   size_t size() const {
4012     return SizeOfCodeGenerated();
4013   }
4014 
SizeOfCodeGenerated()4015   size_t SizeOfCodeGenerated() const {
4016     return armbuffer_.size();
4017   }
4018 
pic()4019   PositionIndependentCodeOption pic() const {
4020     return pic_;
4021   }
4022 
GetCPUFeatures()4023   CPUFeatures* GetCPUFeatures() { return &cpu_features_; }
4024 
SetCPUFeatures(const CPUFeatures & cpu_features)4025   void SetCPUFeatures(const CPUFeatures& cpu_features) {
4026     cpu_features_ = cpu_features;
4027   }
4028 
AllowPageOffsetDependentCode()4029   bool AllowPageOffsetDependentCode() const {
4030     return (pic() == PageOffsetDependentCode) ||
4031            (pic() == PositionDependentCode);
4032   }
4033 
AppropriateZeroRegFor(const CPURegister & reg)4034   static const Register& AppropriateZeroRegFor(const CPURegister& reg) {
4035     return reg.Is64Bits() ? xzr : wzr;
4036   }
4037 
4038 
4039  protected:
4040   void LoadStore(const CPURegister& rt,
4041                  const MemOperand& addr,
4042                  LoadStoreOp op,
4043                  LoadStoreScalingOption option = PreferScaledOffset);
4044 
4045   void LoadStorePair(const CPURegister& rt,
4046                      const CPURegister& rt2,
4047                      const MemOperand& addr,
4048                      LoadStorePairOp op);
4049   void LoadStoreStruct(const VRegister& vt,
4050                        const MemOperand& addr,
4051                        NEONLoadStoreMultiStructOp op);
4052   void LoadStoreStruct1(const VRegister& vt,
4053                         int reg_count,
4054                         const MemOperand& addr);
4055   void LoadStoreStructSingle(const VRegister& vt,
4056                              uint32_t lane,
4057                              const MemOperand& addr,
4058                              NEONLoadStoreSingleStructOp op);
4059   void LoadStoreStructSingleAllLanes(const VRegister& vt,
4060                                      const MemOperand& addr,
4061                                      NEONLoadStoreSingleStructOp op);
4062   void LoadStoreStructVerify(const VRegister& vt,
4063                              const MemOperand& addr,
4064                              Instr op);
4065 
4066   void Prefetch(PrefetchOperation op,
4067                 const MemOperand& addr,
4068                 LoadStoreScalingOption option = PreferScaledOffset);
4069 
4070   BufferOffset Logical(const Register& rd,
4071                        const Register& rn,
4072                        const Operand& operand,
4073                        LogicalOp op);
4074   BufferOffset LogicalImmediate(const Register& rd,
4075                                 const Register& rn,
4076                                 unsigned n,
4077                                 unsigned imm_s,
4078                                 unsigned imm_r,
4079                                 LogicalOp op);
4080 
4081   void ConditionalCompare(const Register& rn,
4082                           const Operand& operand,
4083                           StatusFlags nzcv,
4084                           Condition cond,
4085                           ConditionalCompareOp op);
4086 
4087   void AddSubWithCarry(const Register& rd,
4088                        const Register& rn,
4089                        const Operand& operand,
4090                        FlagsUpdate S,
4091                        AddSubWithCarryOp op);
4092 
4093 
4094   // Functions for emulating operands not directly supported by the instruction
4095   // set.
4096   void EmitShift(const Register& rd,
4097                  const Register& rn,
4098                  Shift shift,
4099                  unsigned amount);
4100   void EmitExtendShift(const Register& rd,
4101                        const Register& rn,
4102                        Extend extend,
4103                        unsigned left_shift);
4104 
4105   void AddSub(const Register& rd,
4106               const Register& rn,
4107               const Operand& operand,
4108               FlagsUpdate S,
4109               AddSubOp op);
4110 
4111   void NEONTable(const VRegister& vd,
4112                  const VRegister& vn,
4113                  const VRegister& vm,
4114                  NEONTableOp op);
4115 
4116   // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified
4117   // registers. Only simple loads are supported; sign- and zero-extension (such
4118   // as in LDPSW_x or LDRB_w) are not supported.
4119   static LoadStoreOp LoadOpFor(const CPURegister& rt);
4120   static LoadStorePairOp LoadPairOpFor(const CPURegister& rt,
4121                                        const CPURegister& rt2);
4122   static LoadStoreOp StoreOpFor(const CPURegister& rt);
4123   static LoadStorePairOp StorePairOpFor(const CPURegister& rt,
4124                                         const CPURegister& rt2);
4125   static LoadStorePairNonTemporalOp LoadPairNonTemporalOpFor(
4126     const CPURegister& rt, const CPURegister& rt2);
4127   static LoadStorePairNonTemporalOp StorePairNonTemporalOpFor(
4128     const CPURegister& rt, const CPURegister& rt2);
4129   static LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt);
4130 
4131   // Convenience pass-through for CPU feature checks.
4132   bool CPUHas(CPUFeatures::Feature feature0,
4133               CPUFeatures::Feature feature1 = CPUFeatures::kNone,
4134               CPUFeatures::Feature feature2 = CPUFeatures::kNone,
4135               CPUFeatures::Feature feature3 = CPUFeatures::kNone) const {
4136     return cpu_features_.Has(feature0, feature1, feature2, feature3);
4137   }
4138 
4139   // Determine whether the target CPU has the specified registers, based on the
4140   // currently-enabled CPU features. Presence of a register does not imply
4141   // support for arbitrary operations on it. For example, CPUs with FP have H
4142   // registers, but most half-precision operations require the FPHalf feature.
4143   //
4144   // These are used to check CPU features in loads and stores that have the same
4145   // entry point for both integer and FP registers.
4146   bool CPUHas(const CPURegister& rt) const;
4147   bool CPUHas(const CPURegister& rt, const CPURegister& rt2) const;
4148 
4149   bool CPUHas(SystemRegister sysreg) const;
4150 
4151  private:
4152   static uint32_t FP32ToImm8(float imm);
4153   static uint32_t FP64ToImm8(double imm);
4154 
4155   // Instruction helpers.
4156   void MoveWide(const Register& rd,
4157                 uint64_t imm,
4158                 int shift,
4159                 MoveWideImmediateOp mov_op);
4160   BufferOffset DataProcShiftedRegister(const Register& rd,
4161                                        const Register& rn,
4162                                        const Operand& operand,
4163                                        FlagsUpdate S,
4164                                        Instr op);
4165   void DataProcExtendedRegister(const Register& rd,
4166                                 const Register& rn,
4167                                 const Operand& operand,
4168                                 FlagsUpdate S,
4169                                 Instr op);
4170   void LoadStorePairNonTemporal(const CPURegister& rt,
4171                                 const CPURegister& rt2,
4172                                 const MemOperand& addr,
4173                                 LoadStorePairNonTemporalOp op);
4174   void LoadLiteral(const CPURegister& rt, uint64_t imm, LoadLiteralOp op);
4175   void ConditionalSelect(const Register& rd,
4176                          const Register& rn,
4177                          const Register& rm,
4178                          Condition cond,
4179                          ConditionalSelectOp op);
4180   void DataProcessing1Source(const Register& rd,
4181                              const Register& rn,
4182                              DataProcessing1SourceOp op);
4183   void DataProcessing3Source(const Register& rd,
4184                              const Register& rn,
4185                              const Register& rm,
4186                              const Register& ra,
4187                              DataProcessing3SourceOp op);
4188   void FPDataProcessing1Source(const VRegister& fd,
4189                                const VRegister& fn,
4190                                FPDataProcessing1SourceOp op);
4191   void FPDataProcessing3Source(const VRegister& fd,
4192                                const VRegister& fn,
4193                                const VRegister& fm,
4194                                const VRegister& fa,
4195                                FPDataProcessing3SourceOp op);
4196   void NEONAcrossLanesL(const VRegister& vd,
4197                         const VRegister& vn,
4198                         NEONAcrossLanesOp op);
4199   void NEONAcrossLanes(const VRegister& vd,
4200                        const VRegister& vn,
4201                        NEONAcrossLanesOp op);
4202   void NEONModifiedImmShiftLsl(const VRegister& vd,
4203                                const int imm8,
4204                                const int left_shift,
4205                                NEONModifiedImmediateOp op);
4206   void NEONModifiedImmShiftMsl(const VRegister& vd,
4207                                const int imm8,
4208                                const int shift_amount,
4209                                NEONModifiedImmediateOp op);
4210   void NEONFP2Same(const VRegister& vd,
4211                    const VRegister& vn,
4212                    Instr vop);
4213   void NEON3Same(const VRegister& vd,
4214                  const VRegister& vn,
4215                  const VRegister& vm,
4216                  NEON3SameOp vop);
4217   void NEONFP3Same(const VRegister& vd,
4218                    const VRegister& vn,
4219                    const VRegister& vm,
4220                    Instr op);
4221   void NEON3DifferentL(const VRegister& vd,
4222                        const VRegister& vn,
4223                        const VRegister& vm,
4224                        NEON3DifferentOp vop);
4225   void NEON3DifferentW(const VRegister& vd,
4226                        const VRegister& vn,
4227                        const VRegister& vm,
4228                        NEON3DifferentOp vop);
4229   void NEON3DifferentHN(const VRegister& vd,
4230                         const VRegister& vn,
4231                         const VRegister& vm,
4232                         NEON3DifferentOp vop);
4233   void NEONFP2RegMisc(const VRegister& vd,
4234                       const VRegister& vn,
4235                       NEON2RegMiscOp vop,
4236                       double value = 0.0);
4237   void NEON2RegMisc(const VRegister& vd,
4238                     const VRegister& vn,
4239                     NEON2RegMiscOp vop,
4240                     int value = 0);
4241   void NEONFP2RegMisc(const VRegister& vd,
4242                       const VRegister& vn,
4243                       Instr op);
4244   void NEONAddlp(const VRegister& vd,
4245                  const VRegister& vn,
4246                  NEON2RegMiscOp op);
4247   void NEONPerm(const VRegister& vd,
4248                 const VRegister& vn,
4249                 const VRegister& vm,
4250                 NEONPermOp op);
4251   void NEONFPByElement(const VRegister& vd,
4252                        const VRegister& vn,
4253                        const VRegister& vm,
4254                        int vm_index,
4255                        NEONByIndexedElementOp op);
4256   void NEONByElement(const VRegister& vd,
4257                      const VRegister& vn,
4258                      const VRegister& vm,
4259                      int vm_index,
4260                      NEONByIndexedElementOp op);
4261   void NEONByElementL(const VRegister& vd,
4262                       const VRegister& vn,
4263                       const VRegister& vm,
4264                       int vm_index,
4265                       NEONByIndexedElementOp op);
4266   void NEONShiftImmediate(const VRegister& vd,
4267                           const VRegister& vn,
4268                           NEONShiftImmediateOp op,
4269                           int immh_immb);
4270   void NEONShiftLeftImmediate(const VRegister& vd,
4271                               const VRegister& vn,
4272                               int shift,
4273                               NEONShiftImmediateOp op);
4274   void NEONShiftRightImmediate(const VRegister& vd,
4275                                const VRegister& vn,
4276                                int shift,
4277                                NEONShiftImmediateOp op);
4278   void NEONShiftImmediateL(const VRegister& vd,
4279                            const VRegister& vn,
4280                            int shift,
4281                            NEONShiftImmediateOp op);
4282   void NEONShiftImmediateN(const VRegister& vd,
4283                            const VRegister& vn,
4284                            int shift,
4285                            NEONShiftImmediateOp op);
4286   void NEONXtn(const VRegister& vd,
4287                const VRegister& vn,
4288                NEON2RegMiscOp vop);
4289 
4290   Instr LoadStoreStructAddrModeField(const MemOperand& addr);
4291 
4292   // Encode the specified MemOperand for the specified access size and scaling
4293   // preference.
4294   Instr LoadStoreMemOperand(const MemOperand& addr,
4295                             unsigned access_size,
4296                             LoadStoreScalingOption option);
4297 
4298  protected:
4299   // Prevent generation of a literal pool for the next |maxInst| instructions.
4300   // Guarantees instruction linearity.
4301   class AutoBlockLiteralPool {
4302     ARMBuffer* armbuffer_;
4303 
4304    public:
AutoBlockLiteralPool(Assembler * assembler,size_t maxInst)4305     AutoBlockLiteralPool(Assembler* assembler, size_t maxInst)
4306       : armbuffer_(&assembler->armbuffer_) {
4307       armbuffer_->enterNoPool(maxInst);
4308     }
~AutoBlockLiteralPool()4309     ~AutoBlockLiteralPool() {
4310       armbuffer_->leaveNoPool();
4311     }
4312   };
4313 
4314  protected:
4315   // Buffer where the code is emitted.
4316   PositionIndependentCodeOption pic_;
4317 
4318   CPUFeatures cpu_features_;
4319 
4320 #ifdef DEBUG
4321   bool finalized_;
4322 #endif
4323 };
4324 
4325 }  // namespace vixl
4326 
4327 #endif  // VIXL_A64_ASSEMBLER_A64_H_
4328