1 // Copyright 2013 the V8 project authors. All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following
11 // disclaimer in the documentation and/or other materials provided
12 // with the distribution.
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived
15 // from this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #if V8_TARGET_ARCH_ARM64
30
31 #include "src/codegen/arm64/assembler-arm64.h"
32
33 #include "src/base/bits.h"
34 #include "src/base/cpu.h"
35 #include "src/codegen/arm64/assembler-arm64-inl.h"
36 #include "src/codegen/register-configuration.h"
37 #include "src/codegen/safepoint-table.h"
38 #include "src/codegen/string-constants.h"
39 #include "src/execution/frame-constants.h"
40
41 namespace v8 {
42 namespace internal {
43
44 // -----------------------------------------------------------------------------
45 // CpuFeatures implementation.
46
ProbeImpl(bool cross_compile)47 void CpuFeatures::ProbeImpl(bool cross_compile) {
48 // AArch64 has no configuration options, no further probing is required.
49 supported_ = 0;
50
51 // Only use statically determined features for cross compile (snapshot).
52 if (cross_compile) return;
53
54 // We used to probe for coherent cache support, but on older CPUs it
55 // causes crashes (crbug.com/524337), and newer CPUs don't even have
56 // the feature any more.
57 }
58
PrintTarget()59 void CpuFeatures::PrintTarget() {}
PrintFeatures()60 void CpuFeatures::PrintFeatures() {}
61
62 // -----------------------------------------------------------------------------
63 // CPURegList utilities.
64
PopLowestIndex()65 CPURegister CPURegList::PopLowestIndex() {
66 if (IsEmpty()) {
67 return NoCPUReg;
68 }
69 int index = base::bits::CountTrailingZeros(list_);
70 DCHECK((1LL << index) & list_);
71 Remove(index);
72 return CPURegister::Create(index, size_, type_);
73 }
74
PopHighestIndex()75 CPURegister CPURegList::PopHighestIndex() {
76 if (IsEmpty()) {
77 return NoCPUReg;
78 }
79 int index = CountLeadingZeros(list_, kRegListSizeInBits);
80 index = kRegListSizeInBits - 1 - index;
81 DCHECK((1LL << index) & list_);
82 Remove(index);
83 return CPURegister::Create(index, size_, type_);
84 }
85
Align()86 void CPURegList::Align() {
87 // Use padreg, if necessary, to maintain stack alignment.
88 if (Count() % 2 != 0) {
89 if (IncludesAliasOf(padreg)) {
90 Remove(padreg);
91 } else {
92 Combine(padreg);
93 }
94 }
95
96 DCHECK_EQ(Count() % 2, 0);
97 }
98
GetCalleeSaved(int size)99 CPURegList CPURegList::GetCalleeSaved(int size) {
100 return CPURegList(CPURegister::kRegister, size, 19, 28);
101 }
102
GetCalleeSavedV(int size)103 CPURegList CPURegList::GetCalleeSavedV(int size) {
104 return CPURegList(CPURegister::kVRegister, size, 8, 15);
105 }
106
GetCallerSaved(int size)107 CPURegList CPURegList::GetCallerSaved(int size) {
108 // x18 is the platform register and is reserved for the use of platform ABIs.
109 // Registers x0-x17 are caller-saved.
110 CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 17);
111 return list;
112 }
113
GetCallerSavedV(int size)114 CPURegList CPURegList::GetCallerSavedV(int size) {
115 // Registers d0-d7 and d16-d31 are caller-saved.
116 CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7);
117 list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31));
118 return list;
119 }
120
121 // -----------------------------------------------------------------------------
122 // Implementation of RelocInfo
123
124 const int RelocInfo::kApplyMask =
125 RelocInfo::ModeMask(RelocInfo::CODE_TARGET) |
126 RelocInfo::ModeMask(RelocInfo::RUNTIME_ENTRY) |
127 RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE);
128
IsCodedSpecially()129 bool RelocInfo::IsCodedSpecially() {
130 // The deserializer needs to know whether a pointer is specially coded. Being
131 // specially coded on ARM64 means that it is an immediate branch.
132 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
133 if (instr->IsLdrLiteralX()) {
134 return false;
135 } else {
136 DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
137 return true;
138 }
139 }
140
IsInConstantPool()141 bool RelocInfo::IsInConstantPool() {
142 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
143 return instr->IsLdrLiteralX();
144 }
145
wasm_call_tag() const146 uint32_t RelocInfo::wasm_call_tag() const {
147 DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
148 Instruction* instr = reinterpret_cast<Instruction*>(pc_);
149 if (instr->IsLdrLiteralX()) {
150 return static_cast<uint32_t>(
151 Memory<Address>(Assembler::target_pointer_address_at(pc_)));
152 } else {
153 DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
154 return static_cast<uint32_t>(instr->ImmPCOffset() / kInstrSize);
155 }
156 }
157
AreAliased(const CPURegister & reg1,const CPURegister & reg2,const CPURegister & reg3,const CPURegister & reg4,const CPURegister & reg5,const CPURegister & reg6,const CPURegister & reg7,const CPURegister & reg8)158 bool AreAliased(const CPURegister& reg1, const CPURegister& reg2,
159 const CPURegister& reg3, const CPURegister& reg4,
160 const CPURegister& reg5, const CPURegister& reg6,
161 const CPURegister& reg7, const CPURegister& reg8) {
162 int number_of_valid_regs = 0;
163 int number_of_valid_fpregs = 0;
164
165 RegList unique_regs = 0;
166 RegList unique_fpregs = 0;
167
168 const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};
169
170 for (unsigned i = 0; i < arraysize(regs); i++) {
171 if (regs[i].IsRegister()) {
172 number_of_valid_regs++;
173 unique_regs |= regs[i].bit();
174 } else if (regs[i].IsVRegister()) {
175 number_of_valid_fpregs++;
176 unique_fpregs |= regs[i].bit();
177 } else {
178 DCHECK(!regs[i].is_valid());
179 }
180 }
181
182 int number_of_unique_regs =
183 CountSetBits(unique_regs, sizeof(unique_regs) * kBitsPerByte);
184 int number_of_unique_fpregs =
185 CountSetBits(unique_fpregs, sizeof(unique_fpregs) * kBitsPerByte);
186
187 DCHECK(number_of_valid_regs >= number_of_unique_regs);
188 DCHECK(number_of_valid_fpregs >= number_of_unique_fpregs);
189
190 return (number_of_valid_regs != number_of_unique_regs) ||
191 (number_of_valid_fpregs != number_of_unique_fpregs);
192 }
193
AreSameSizeAndType(const CPURegister & reg1,const CPURegister & reg2,const CPURegister & reg3,const CPURegister & reg4,const CPURegister & reg5,const CPURegister & reg6,const CPURegister & reg7,const CPURegister & reg8)194 bool AreSameSizeAndType(const CPURegister& reg1, const CPURegister& reg2,
195 const CPURegister& reg3, const CPURegister& reg4,
196 const CPURegister& reg5, const CPURegister& reg6,
197 const CPURegister& reg7, const CPURegister& reg8) {
198 DCHECK(reg1.is_valid());
199 bool match = true;
200 match &= !reg2.is_valid() || reg2.IsSameSizeAndType(reg1);
201 match &= !reg3.is_valid() || reg3.IsSameSizeAndType(reg1);
202 match &= !reg4.is_valid() || reg4.IsSameSizeAndType(reg1);
203 match &= !reg5.is_valid() || reg5.IsSameSizeAndType(reg1);
204 match &= !reg6.is_valid() || reg6.IsSameSizeAndType(reg1);
205 match &= !reg7.is_valid() || reg7.IsSameSizeAndType(reg1);
206 match &= !reg8.is_valid() || reg8.IsSameSizeAndType(reg1);
207 return match;
208 }
209
AreSameFormat(const VRegister & reg1,const VRegister & reg2,const VRegister & reg3,const VRegister & reg4)210 bool AreSameFormat(const VRegister& reg1, const VRegister& reg2,
211 const VRegister& reg3, const VRegister& reg4) {
212 DCHECK(reg1.is_valid());
213 return (!reg2.is_valid() || reg2.IsSameFormat(reg1)) &&
214 (!reg3.is_valid() || reg3.IsSameFormat(reg1)) &&
215 (!reg4.is_valid() || reg4.IsSameFormat(reg1));
216 }
217
AreConsecutive(const VRegister & reg1,const VRegister & reg2,const VRegister & reg3,const VRegister & reg4)218 bool AreConsecutive(const VRegister& reg1, const VRegister& reg2,
219 const VRegister& reg3, const VRegister& reg4) {
220 DCHECK(reg1.is_valid());
221 if (!reg2.is_valid()) {
222 DCHECK(!reg3.is_valid() && !reg4.is_valid());
223 return true;
224 } else if (reg2.code() != ((reg1.code() + 1) % kNumberOfVRegisters)) {
225 return false;
226 }
227
228 if (!reg3.is_valid()) {
229 DCHECK(!reg4.is_valid());
230 return true;
231 } else if (reg3.code() != ((reg2.code() + 1) % kNumberOfVRegisters)) {
232 return false;
233 }
234
235 if (!reg4.is_valid()) {
236 return true;
237 } else if (reg4.code() != ((reg3.code() + 1) % kNumberOfVRegisters)) {
238 return false;
239 }
240
241 return true;
242 }
243
NeedsRelocation(const Assembler * assembler) const244 bool Operand::NeedsRelocation(const Assembler* assembler) const {
245 RelocInfo::Mode rmode = immediate_.rmode();
246
247 if (RelocInfo::IsOnlyForSerializer(rmode)) {
248 return assembler->options().record_reloc_info_for_serialization;
249 }
250
251 return !RelocInfo::IsNone(rmode);
252 }
253
254 // Assembler
Assembler(const AssemblerOptions & options,std::unique_ptr<AssemblerBuffer> buffer)255 Assembler::Assembler(const AssemblerOptions& options,
256 std::unique_ptr<AssemblerBuffer> buffer)
257 : AssemblerBase(options, std::move(buffer)),
258 unresolved_branches_(),
259 constpool_(this) {
260 veneer_pool_blocked_nesting_ = 0;
261 Reset();
262
263 #if defined(V8_OS_WIN)
264 if (options.collect_win64_unwind_info) {
265 xdata_encoder_ = std::make_unique<win64_unwindinfo::XdataEncoder>(*this);
266 }
267 #endif
268 }
269
~Assembler()270 Assembler::~Assembler() {
271 DCHECK(constpool_.IsEmpty());
272 DCHECK_EQ(veneer_pool_blocked_nesting_, 0);
273 }
274
AbortedCodeGeneration()275 void Assembler::AbortedCodeGeneration() { constpool_.Clear(); }
276
Reset()277 void Assembler::Reset() {
278 #ifdef DEBUG
279 DCHECK((pc_ >= buffer_start_) && (pc_ < buffer_start_ + buffer_->size()));
280 DCHECK_EQ(veneer_pool_blocked_nesting_, 0);
281 DCHECK(unresolved_branches_.empty());
282 memset(buffer_start_, 0, pc_ - buffer_start_);
283 #endif
284 pc_ = buffer_start_;
285 reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
286 constpool_.Clear();
287 next_veneer_pool_check_ = kMaxInt;
288 }
289
290 #if defined(V8_OS_WIN)
GetUnwindInfo() const291 win64_unwindinfo::BuiltinUnwindInfo Assembler::GetUnwindInfo() const {
292 DCHECK(options().collect_win64_unwind_info);
293 DCHECK_NOT_NULL(xdata_encoder_);
294 return xdata_encoder_->unwinding_info();
295 }
296 #endif
297
AllocateAndInstallRequestedHeapObjects(Isolate * isolate)298 void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
299 DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty());
300 for (auto& request : heap_object_requests_) {
301 Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset();
302 switch (request.kind()) {
303 case HeapObjectRequest::kHeapNumber: {
304 Handle<HeapObject> object =
305 isolate->factory()->NewHeapNumber<AllocationType::kOld>(
306 request.heap_number());
307 EmbeddedObjectIndex index = AddEmbeddedObject(object);
308 set_embedded_object_index_referenced_from(pc, index);
309 break;
310 }
311 case HeapObjectRequest::kStringConstant: {
312 const StringConstantBase* str = request.string();
313 CHECK_NOT_NULL(str);
314 EmbeddedObjectIndex index =
315 AddEmbeddedObject(str->AllocateStringConstant(isolate));
316 set_embedded_object_index_referenced_from(pc, index);
317 break;
318 }
319 }
320 }
321 }
322
GetCode(Isolate * isolate,CodeDesc * desc,SafepointTableBuilder * safepoint_table_builder,int handler_table_offset)323 void Assembler::GetCode(Isolate* isolate, CodeDesc* desc,
324 SafepointTableBuilder* safepoint_table_builder,
325 int handler_table_offset) {
326 // Emit constant pool if necessary.
327 ForceConstantPoolEmissionWithoutJump();
328 DCHECK(constpool_.IsEmpty());
329
330 int code_comments_size = WriteCodeComments();
331
332 AllocateAndInstallRequestedHeapObjects(isolate);
333
334 // Set up code descriptor.
335 // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to
336 // this point to make CodeDesc initialization less fiddly.
337
338 static constexpr int kConstantPoolSize = 0;
339 const int instruction_size = pc_offset();
340 const int code_comments_offset = instruction_size - code_comments_size;
341 const int constant_pool_offset = code_comments_offset - kConstantPoolSize;
342 const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable)
343 ? constant_pool_offset
344 : handler_table_offset;
345 const int safepoint_table_offset =
346 (safepoint_table_builder == kNoSafepointTable)
347 ? handler_table_offset2
348 : safepoint_table_builder->GetCodeOffset();
349 const int reloc_info_offset =
350 static_cast<int>(reloc_info_writer.pos() - buffer_->start());
351 CodeDesc::Initialize(desc, this, safepoint_table_offset,
352 handler_table_offset2, constant_pool_offset,
353 code_comments_offset, reloc_info_offset);
354 }
355
Align(int m)356 void Assembler::Align(int m) {
357 DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m));
358 while ((pc_offset() & (m - 1)) != 0) {
359 nop();
360 }
361 }
362
CodeTargetAlign()363 void Assembler::CodeTargetAlign() {
364 // Preferred alignment of jump targets on some ARM chips.
365 Align(8);
366 }
367
CheckLabelLinkChain(Label const * label)368 void Assembler::CheckLabelLinkChain(Label const* label) {
369 #ifdef DEBUG
370 if (label->is_linked()) {
371 static const int kMaxLinksToCheck = 64; // Avoid O(n2) behaviour.
372 int links_checked = 0;
373 int64_t linkoffset = label->pos();
374 bool end_of_chain = false;
375 while (!end_of_chain) {
376 if (++links_checked > kMaxLinksToCheck) break;
377 Instruction* link = InstructionAt(linkoffset);
378 int64_t linkpcoffset = link->ImmPCOffset();
379 int64_t prevlinkoffset = linkoffset + linkpcoffset;
380
381 end_of_chain = (linkoffset == prevlinkoffset);
382 linkoffset = linkoffset + linkpcoffset;
383 }
384 }
385 #endif
386 }
387
RemoveBranchFromLabelLinkChain(Instruction * branch,Label * label,Instruction * label_veneer)388 void Assembler::RemoveBranchFromLabelLinkChain(Instruction* branch,
389 Label* label,
390 Instruction* label_veneer) {
391 DCHECK(label->is_linked());
392
393 CheckLabelLinkChain(label);
394
395 Instruction* link = InstructionAt(label->pos());
396 Instruction* prev_link = link;
397 Instruction* next_link;
398 bool end_of_chain = false;
399
400 while (link != branch && !end_of_chain) {
401 next_link = link->ImmPCOffsetTarget();
402 end_of_chain = (link == next_link);
403 prev_link = link;
404 link = next_link;
405 }
406
407 DCHECK(branch == link);
408 next_link = branch->ImmPCOffsetTarget();
409
410 if (branch == prev_link) {
411 // The branch is the first instruction in the chain.
412 if (branch == next_link) {
413 // It is also the last instruction in the chain, so it is the only branch
414 // currently referring to this label.
415 label->Unuse();
416 } else {
417 label->link_to(
418 static_cast<int>(reinterpret_cast<byte*>(next_link) - buffer_start_));
419 }
420
421 } else if (branch == next_link) {
422 // The branch is the last (but not also the first) instruction in the chain.
423 prev_link->SetImmPCOffsetTarget(options(), prev_link);
424
425 } else {
426 // The branch is in the middle of the chain.
427 if (prev_link->IsTargetInImmPCOffsetRange(next_link)) {
428 prev_link->SetImmPCOffsetTarget(options(), next_link);
429 } else if (label_veneer != nullptr) {
430 // Use the veneer for all previous links in the chain.
431 prev_link->SetImmPCOffsetTarget(options(), prev_link);
432
433 end_of_chain = false;
434 link = next_link;
435 while (!end_of_chain) {
436 next_link = link->ImmPCOffsetTarget();
437 end_of_chain = (link == next_link);
438 link->SetImmPCOffsetTarget(options(), label_veneer);
439 link = next_link;
440 }
441 } else {
442 // The assert below will fire.
443 // Some other work could be attempted to fix up the chain, but it would be
444 // rather complicated. If we crash here, we may want to consider using an
445 // other mechanism than a chain of branches.
446 //
447 // Note that this situation currently should not happen, as we always call
448 // this function with a veneer to the target label.
449 // However this could happen with a MacroAssembler in the following state:
450 // [previous code]
451 // B(label);
452 // [20KB code]
453 // Tbz(label); // First tbz. Pointing to unconditional branch.
454 // [20KB code]
455 // Tbz(label); // Second tbz. Pointing to the first tbz.
456 // [more code]
457 // and this function is called to remove the first tbz from the label link
458 // chain. Since tbz has a range of +-32KB, the second tbz cannot point to
459 // the unconditional branch.
460 CHECK(prev_link->IsTargetInImmPCOffsetRange(next_link));
461 UNREACHABLE();
462 }
463 }
464
465 CheckLabelLinkChain(label);
466 }
467
bind(Label * label)468 void Assembler::bind(Label* label) {
469 // Bind label to the address at pc_. All instructions (most likely branches)
470 // that are linked to this label will be updated to point to the newly-bound
471 // label.
472
473 DCHECK(!label->is_near_linked());
474 DCHECK(!label->is_bound());
475
476 DeleteUnresolvedBranchInfoForLabel(label);
477
478 // If the label is linked, the link chain looks something like this:
479 //
480 // |--I----I-------I-------L
481 // |---------------------->| pc_offset
482 // |-------------->| linkoffset = label->pos()
483 // |<------| link->ImmPCOffset()
484 // |------>| prevlinkoffset = linkoffset + link->ImmPCOffset()
485 //
486 // On each iteration, the last link is updated and then removed from the
487 // chain until only one remains. At that point, the label is bound.
488 //
489 // If the label is not linked, no preparation is required before binding.
490 while (label->is_linked()) {
491 int linkoffset = label->pos();
492 Instruction* link = InstructionAt(linkoffset);
493 int prevlinkoffset = linkoffset + static_cast<int>(link->ImmPCOffset());
494
495 CheckLabelLinkChain(label);
496
497 DCHECK_GE(linkoffset, 0);
498 DCHECK(linkoffset < pc_offset());
499 DCHECK((linkoffset > prevlinkoffset) ||
500 (linkoffset - prevlinkoffset == kStartOfLabelLinkChain));
501 DCHECK_GE(prevlinkoffset, 0);
502
503 // Update the link to point to the label.
504 if (link->IsUnresolvedInternalReference()) {
505 // Internal references do not get patched to an instruction but directly
506 // to an address.
507 internal_reference_positions_.push_back(linkoffset);
508 PatchingAssembler patcher(options(), reinterpret_cast<byte*>(link), 2);
509 patcher.dc64(reinterpret_cast<uintptr_t>(pc_));
510 } else {
511 link->SetImmPCOffsetTarget(options(),
512 reinterpret_cast<Instruction*>(pc_));
513 }
514
515 // Link the label to the previous link in the chain.
516 if (linkoffset - prevlinkoffset == kStartOfLabelLinkChain) {
517 // We hit kStartOfLabelLinkChain, so the chain is fully processed.
518 label->Unuse();
519 } else {
520 // Update the label for the next iteration.
521 label->link_to(prevlinkoffset);
522 }
523 }
524 label->bind_to(pc_offset());
525
526 DCHECK(label->is_bound());
527 DCHECK(!label->is_linked());
528 }
529
LinkAndGetByteOffsetTo(Label * label)530 int Assembler::LinkAndGetByteOffsetTo(Label* label) {
531 DCHECK_EQ(sizeof(*pc_), 1);
532 CheckLabelLinkChain(label);
533
534 int offset;
535 if (label->is_bound()) {
536 // The label is bound, so it does not need to be updated. Referring
537 // instructions must link directly to the label as they will not be
538 // updated.
539 //
540 // In this case, label->pos() returns the offset of the label from the
541 // start of the buffer.
542 //
543 // Note that offset can be zero for self-referential instructions. (This
544 // could be useful for ADR, for example.)
545 offset = label->pos() - pc_offset();
546 DCHECK_LE(offset, 0);
547 } else {
548 if (label->is_linked()) {
549 // The label is linked, so the referring instruction should be added onto
550 // the end of the label's link chain.
551 //
552 // In this case, label->pos() returns the offset of the last linked
553 // instruction from the start of the buffer.
554 offset = label->pos() - pc_offset();
555 DCHECK_NE(offset, kStartOfLabelLinkChain);
556 // Note that the offset here needs to be PC-relative only so that the
557 // first instruction in a buffer can link to an unbound label. Otherwise,
558 // the offset would be 0 for this case, and 0 is reserved for
559 // kStartOfLabelLinkChain.
560 } else {
561 // The label is unused, so it now becomes linked and the referring
562 // instruction is at the start of the new link chain.
563 offset = kStartOfLabelLinkChain;
564 }
565 // The instruction at pc is now the last link in the label's chain.
566 label->link_to(pc_offset());
567 }
568
569 return offset;
570 }
571
DeleteUnresolvedBranchInfoForLabelTraverse(Label * label)572 void Assembler::DeleteUnresolvedBranchInfoForLabelTraverse(Label* label) {
573 DCHECK(label->is_linked());
574 CheckLabelLinkChain(label);
575
576 int link_offset = label->pos();
577 int link_pcoffset;
578 bool end_of_chain = false;
579
580 while (!end_of_chain) {
581 Instruction* link = InstructionAt(link_offset);
582 link_pcoffset = static_cast<int>(link->ImmPCOffset());
583
584 // ADR instructions are not handled by veneers.
585 if (link->IsImmBranch()) {
586 int max_reachable_pc =
587 static_cast<int>(InstructionOffset(link) +
588 Instruction::ImmBranchRange(link->BranchType()));
589 using unresolved_info_it = std::multimap<int, FarBranchInfo>::iterator;
590 std::pair<unresolved_info_it, unresolved_info_it> range;
591 range = unresolved_branches_.equal_range(max_reachable_pc);
592 unresolved_info_it it;
593 for (it = range.first; it != range.second; ++it) {
594 if (it->second.pc_offset_ == link_offset) {
595 unresolved_branches_.erase(it);
596 break;
597 }
598 }
599 }
600
601 end_of_chain = (link_pcoffset == 0);
602 link_offset = link_offset + link_pcoffset;
603 }
604 }
605
DeleteUnresolvedBranchInfoForLabel(Label * label)606 void Assembler::DeleteUnresolvedBranchInfoForLabel(Label* label) {
607 if (unresolved_branches_.empty()) {
608 DCHECK_EQ(next_veneer_pool_check_, kMaxInt);
609 return;
610 }
611
612 if (label->is_linked()) {
613 // Branches to this label will be resolved when the label is bound, normally
614 // just after all the associated info has been deleted.
615 DeleteUnresolvedBranchInfoForLabelTraverse(label);
616 }
617 if (unresolved_branches_.empty()) {
618 next_veneer_pool_check_ = kMaxInt;
619 } else {
620 next_veneer_pool_check_ =
621 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
622 }
623 }
624
IsConstantPoolAt(Instruction * instr)625 bool Assembler::IsConstantPoolAt(Instruction* instr) {
626 // The constant pool marker is made of two instructions. These instructions
627 // will never be emitted by the JIT, so checking for the first one is enough:
628 // 0: ldr xzr, #<size of pool>
629 bool result = instr->IsLdrLiteralX() && (instr->Rt() == kZeroRegCode);
630
631 // It is still worth asserting the marker is complete.
632 // 4: blr xzr
633 DCHECK(!result || (instr->following()->IsBranchAndLinkToRegister() &&
634 instr->following()->Rn() == kZeroRegCode));
635
636 return result;
637 }
638
ConstantPoolSizeAt(Instruction * instr)639 int Assembler::ConstantPoolSizeAt(Instruction* instr) {
640 #ifdef USE_SIMULATOR
641 // Assembler::debug() embeds constants directly into the instruction stream.
642 // Although this is not a genuine constant pool, treat it like one to avoid
643 // disassembling the constants.
644 if ((instr->Mask(ExceptionMask) == HLT) &&
645 (instr->ImmException() == kImmExceptionIsDebug)) {
646 const char* message = reinterpret_cast<const char*>(
647 instr->InstructionAtOffset(kDebugMessageOffset));
648 int size = static_cast<int>(kDebugMessageOffset + strlen(message) + 1);
649 return RoundUp(size, kInstrSize) / kInstrSize;
650 }
651 // Same for printf support, see MacroAssembler::CallPrintf().
652 if ((instr->Mask(ExceptionMask) == HLT) &&
653 (instr->ImmException() == kImmExceptionIsPrintf)) {
654 return kPrintfLength / kInstrSize;
655 }
656 #endif
657 if (IsConstantPoolAt(instr)) {
658 return instr->ImmLLiteral();
659 } else {
660 return -1;
661 }
662 }
663
EmitPoolGuard()664 void Assembler::EmitPoolGuard() {
665 // We must generate only one instruction as this is used in scopes that
666 // control the size of the code generated.
667 Emit(BLR | Rn(xzr));
668 }
669
StartBlockVeneerPool()670 void Assembler::StartBlockVeneerPool() { ++veneer_pool_blocked_nesting_; }
671
EndBlockVeneerPool()672 void Assembler::EndBlockVeneerPool() {
673 if (--veneer_pool_blocked_nesting_ == 0) {
674 // Check the veneer pool hasn't been blocked for too long.
675 DCHECK(unresolved_branches_.empty() ||
676 (pc_offset() < unresolved_branches_first_limit()));
677 }
678 }
679
br(const Register & xn)680 void Assembler::br(const Register& xn) {
681 DCHECK(xn.Is64Bits());
682 Emit(BR | Rn(xn));
683 }
684
blr(const Register & xn)685 void Assembler::blr(const Register& xn) {
686 DCHECK(xn.Is64Bits());
687 // The pattern 'blr xzr' is used as a guard to detect when execution falls
688 // through the constant pool. It should not be emitted.
689 DCHECK_NE(xn, xzr);
690 Emit(BLR | Rn(xn));
691 }
692
ret(const Register & xn)693 void Assembler::ret(const Register& xn) {
694 DCHECK(xn.Is64Bits());
695 Emit(RET | Rn(xn));
696 }
697
b(int imm26)698 void Assembler::b(int imm26) { Emit(B | ImmUncondBranch(imm26)); }
699
b(Label * label)700 void Assembler::b(Label* label) { b(LinkAndGetInstructionOffsetTo(label)); }
701
b(int imm19,Condition cond)702 void Assembler::b(int imm19, Condition cond) {
703 Emit(B_cond | ImmCondBranch(imm19) | cond);
704 }
705
b(Label * label,Condition cond)706 void Assembler::b(Label* label, Condition cond) {
707 b(LinkAndGetInstructionOffsetTo(label), cond);
708 }
709
bl(int imm26)710 void Assembler::bl(int imm26) { Emit(BL | ImmUncondBranch(imm26)); }
711
bl(Label * label)712 void Assembler::bl(Label* label) { bl(LinkAndGetInstructionOffsetTo(label)); }
713
cbz(const Register & rt,int imm19)714 void Assembler::cbz(const Register& rt, int imm19) {
715 Emit(SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt));
716 }
717
cbz(const Register & rt,Label * label)718 void Assembler::cbz(const Register& rt, Label* label) {
719 cbz(rt, LinkAndGetInstructionOffsetTo(label));
720 }
721
cbnz(const Register & rt,int imm19)722 void Assembler::cbnz(const Register& rt, int imm19) {
723 Emit(SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt));
724 }
725
cbnz(const Register & rt,Label * label)726 void Assembler::cbnz(const Register& rt, Label* label) {
727 cbnz(rt, LinkAndGetInstructionOffsetTo(label));
728 }
729
tbz(const Register & rt,unsigned bit_pos,int imm14)730 void Assembler::tbz(const Register& rt, unsigned bit_pos, int imm14) {
731 DCHECK(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSizeInBits)));
732 Emit(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
733 }
734
tbz(const Register & rt,unsigned bit_pos,Label * label)735 void Assembler::tbz(const Register& rt, unsigned bit_pos, Label* label) {
736 tbz(rt, bit_pos, LinkAndGetInstructionOffsetTo(label));
737 }
738
tbnz(const Register & rt,unsigned bit_pos,int imm14)739 void Assembler::tbnz(const Register& rt, unsigned bit_pos, int imm14) {
740 DCHECK(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSizeInBits)));
741 Emit(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
742 }
743
tbnz(const Register & rt,unsigned bit_pos,Label * label)744 void Assembler::tbnz(const Register& rt, unsigned bit_pos, Label* label) {
745 tbnz(rt, bit_pos, LinkAndGetInstructionOffsetTo(label));
746 }
747
adr(const Register & rd,int imm21)748 void Assembler::adr(const Register& rd, int imm21) {
749 DCHECK(rd.Is64Bits());
750 Emit(ADR | ImmPCRelAddress(imm21) | Rd(rd));
751 }
752
adr(const Register & rd,Label * label)753 void Assembler::adr(const Register& rd, Label* label) {
754 adr(rd, LinkAndGetByteOffsetTo(label));
755 }
756
nop(NopMarkerTypes n)757 void Assembler::nop(NopMarkerTypes n) {
758 DCHECK((FIRST_NOP_MARKER <= n) && (n <= LAST_NOP_MARKER));
759 mov(Register::XRegFromCode(n), Register::XRegFromCode(n));
760 }
761
add(const Register & rd,const Register & rn,const Operand & operand)762 void Assembler::add(const Register& rd, const Register& rn,
763 const Operand& operand) {
764 AddSub(rd, rn, operand, LeaveFlags, ADD);
765 }
766
adds(const Register & rd,const Register & rn,const Operand & operand)767 void Assembler::adds(const Register& rd, const Register& rn,
768 const Operand& operand) {
769 AddSub(rd, rn, operand, SetFlags, ADD);
770 }
771
cmn(const Register & rn,const Operand & operand)772 void Assembler::cmn(const Register& rn, const Operand& operand) {
773 Register zr = AppropriateZeroRegFor(rn);
774 adds(zr, rn, operand);
775 }
776
sub(const Register & rd,const Register & rn,const Operand & operand)777 void Assembler::sub(const Register& rd, const Register& rn,
778 const Operand& operand) {
779 AddSub(rd, rn, operand, LeaveFlags, SUB);
780 }
781
subs(const Register & rd,const Register & rn,const Operand & operand)782 void Assembler::subs(const Register& rd, const Register& rn,
783 const Operand& operand) {
784 AddSub(rd, rn, operand, SetFlags, SUB);
785 }
786
cmp(const Register & rn,const Operand & operand)787 void Assembler::cmp(const Register& rn, const Operand& operand) {
788 Register zr = AppropriateZeroRegFor(rn);
789 subs(zr, rn, operand);
790 }
791
neg(const Register & rd,const Operand & operand)792 void Assembler::neg(const Register& rd, const Operand& operand) {
793 Register zr = AppropriateZeroRegFor(rd);
794 sub(rd, zr, operand);
795 }
796
negs(const Register & rd,const Operand & operand)797 void Assembler::negs(const Register& rd, const Operand& operand) {
798 Register zr = AppropriateZeroRegFor(rd);
799 subs(rd, zr, operand);
800 }
801
adc(const Register & rd,const Register & rn,const Operand & operand)802 void Assembler::adc(const Register& rd, const Register& rn,
803 const Operand& operand) {
804 AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC);
805 }
806
adcs(const Register & rd,const Register & rn,const Operand & operand)807 void Assembler::adcs(const Register& rd, const Register& rn,
808 const Operand& operand) {
809 AddSubWithCarry(rd, rn, operand, SetFlags, ADC);
810 }
811
sbc(const Register & rd,const Register & rn,const Operand & operand)812 void Assembler::sbc(const Register& rd, const Register& rn,
813 const Operand& operand) {
814 AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC);
815 }
816
sbcs(const Register & rd,const Register & rn,const Operand & operand)817 void Assembler::sbcs(const Register& rd, const Register& rn,
818 const Operand& operand) {
819 AddSubWithCarry(rd, rn, operand, SetFlags, SBC);
820 }
821
ngc(const Register & rd,const Operand & operand)822 void Assembler::ngc(const Register& rd, const Operand& operand) {
823 Register zr = AppropriateZeroRegFor(rd);
824 sbc(rd, zr, operand);
825 }
826
ngcs(const Register & rd,const Operand & operand)827 void Assembler::ngcs(const Register& rd, const Operand& operand) {
828 Register zr = AppropriateZeroRegFor(rd);
829 sbcs(rd, zr, operand);
830 }
831
832 // Logical instructions.
and_(const Register & rd,const Register & rn,const Operand & operand)833 void Assembler::and_(const Register& rd, const Register& rn,
834 const Operand& operand) {
835 Logical(rd, rn, operand, AND);
836 }
837
ands(const Register & rd,const Register & rn,const Operand & operand)838 void Assembler::ands(const Register& rd, const Register& rn,
839 const Operand& operand) {
840 Logical(rd, rn, operand, ANDS);
841 }
842
tst(const Register & rn,const Operand & operand)843 void Assembler::tst(const Register& rn, const Operand& operand) {
844 ands(AppropriateZeroRegFor(rn), rn, operand);
845 }
846
bic(const Register & rd,const Register & rn,const Operand & operand)847 void Assembler::bic(const Register& rd, const Register& rn,
848 const Operand& operand) {
849 Logical(rd, rn, operand, BIC);
850 }
851
bics(const Register & rd,const Register & rn,const Operand & operand)852 void Assembler::bics(const Register& rd, const Register& rn,
853 const Operand& operand) {
854 Logical(rd, rn, operand, BICS);
855 }
856
orr(const Register & rd,const Register & rn,const Operand & operand)857 void Assembler::orr(const Register& rd, const Register& rn,
858 const Operand& operand) {
859 Logical(rd, rn, operand, ORR);
860 }
861
orn(const Register & rd,const Register & rn,const Operand & operand)862 void Assembler::orn(const Register& rd, const Register& rn,
863 const Operand& operand) {
864 Logical(rd, rn, operand, ORN);
865 }
866
eor(const Register & rd,const Register & rn,const Operand & operand)867 void Assembler::eor(const Register& rd, const Register& rn,
868 const Operand& operand) {
869 Logical(rd, rn, operand, EOR);
870 }
871
eon(const Register & rd,const Register & rn,const Operand & operand)872 void Assembler::eon(const Register& rd, const Register& rn,
873 const Operand& operand) {
874 Logical(rd, rn, operand, EON);
875 }
876
lslv(const Register & rd,const Register & rn,const Register & rm)877 void Assembler::lslv(const Register& rd, const Register& rn,
878 const Register& rm) {
879 DCHECK(rd.SizeInBits() == rn.SizeInBits());
880 DCHECK(rd.SizeInBits() == rm.SizeInBits());
881 Emit(SF(rd) | LSLV | Rm(rm) | Rn(rn) | Rd(rd));
882 }
883
lsrv(const Register & rd,const Register & rn,const Register & rm)884 void Assembler::lsrv(const Register& rd, const Register& rn,
885 const Register& rm) {
886 DCHECK(rd.SizeInBits() == rn.SizeInBits());
887 DCHECK(rd.SizeInBits() == rm.SizeInBits());
888 Emit(SF(rd) | LSRV | Rm(rm) | Rn(rn) | Rd(rd));
889 }
890
asrv(const Register & rd,const Register & rn,const Register & rm)891 void Assembler::asrv(const Register& rd, const Register& rn,
892 const Register& rm) {
893 DCHECK(rd.SizeInBits() == rn.SizeInBits());
894 DCHECK(rd.SizeInBits() == rm.SizeInBits());
895 Emit(SF(rd) | ASRV | Rm(rm) | Rn(rn) | Rd(rd));
896 }
897
rorv(const Register & rd,const Register & rn,const Register & rm)898 void Assembler::rorv(const Register& rd, const Register& rn,
899 const Register& rm) {
900 DCHECK(rd.SizeInBits() == rn.SizeInBits());
901 DCHECK(rd.SizeInBits() == rm.SizeInBits());
902 Emit(SF(rd) | RORV | Rm(rm) | Rn(rn) | Rd(rd));
903 }
904
905 // Bitfield operations.
bfm(const Register & rd,const Register & rn,int immr,int imms)906 void Assembler::bfm(const Register& rd, const Register& rn, int immr,
907 int imms) {
908 DCHECK(rd.SizeInBits() == rn.SizeInBits());
909 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
910 Emit(SF(rd) | BFM | N | ImmR(immr, rd.SizeInBits()) |
911 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
912 }
913
sbfm(const Register & rd,const Register & rn,int immr,int imms)914 void Assembler::sbfm(const Register& rd, const Register& rn, int immr,
915 int imms) {
916 DCHECK(rd.Is64Bits() || rn.Is32Bits());
917 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
918 Emit(SF(rd) | SBFM | N | ImmR(immr, rd.SizeInBits()) |
919 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
920 }
921
ubfm(const Register & rd,const Register & rn,int immr,int imms)922 void Assembler::ubfm(const Register& rd, const Register& rn, int immr,
923 int imms) {
924 DCHECK(rd.SizeInBits() == rn.SizeInBits());
925 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
926 Emit(SF(rd) | UBFM | N | ImmR(immr, rd.SizeInBits()) |
927 ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
928 }
929
extr(const Register & rd,const Register & rn,const Register & rm,int lsb)930 void Assembler::extr(const Register& rd, const Register& rn, const Register& rm,
931 int lsb) {
932 DCHECK(rd.SizeInBits() == rn.SizeInBits());
933 DCHECK(rd.SizeInBits() == rm.SizeInBits());
934 Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
935 Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.SizeInBits()) | Rn(rn) |
936 Rd(rd));
937 }
938
csel(const Register & rd,const Register & rn,const Register & rm,Condition cond)939 void Assembler::csel(const Register& rd, const Register& rn, const Register& rm,
940 Condition cond) {
941 ConditionalSelect(rd, rn, rm, cond, CSEL);
942 }
943
csinc(const Register & rd,const Register & rn,const Register & rm,Condition cond)944 void Assembler::csinc(const Register& rd, const Register& rn,
945 const Register& rm, Condition cond) {
946 ConditionalSelect(rd, rn, rm, cond, CSINC);
947 }
948
csinv(const Register & rd,const Register & rn,const Register & rm,Condition cond)949 void Assembler::csinv(const Register& rd, const Register& rn,
950 const Register& rm, Condition cond) {
951 ConditionalSelect(rd, rn, rm, cond, CSINV);
952 }
953
csneg(const Register & rd,const Register & rn,const Register & rm,Condition cond)954 void Assembler::csneg(const Register& rd, const Register& rn,
955 const Register& rm, Condition cond) {
956 ConditionalSelect(rd, rn, rm, cond, CSNEG);
957 }
958
cset(const Register & rd,Condition cond)959 void Assembler::cset(const Register& rd, Condition cond) {
960 DCHECK((cond != al) && (cond != nv));
961 Register zr = AppropriateZeroRegFor(rd);
962 csinc(rd, zr, zr, NegateCondition(cond));
963 }
964
csetm(const Register & rd,Condition cond)965 void Assembler::csetm(const Register& rd, Condition cond) {
966 DCHECK((cond != al) && (cond != nv));
967 Register zr = AppropriateZeroRegFor(rd);
968 csinv(rd, zr, zr, NegateCondition(cond));
969 }
970
cinc(const Register & rd,const Register & rn,Condition cond)971 void Assembler::cinc(const Register& rd, const Register& rn, Condition cond) {
972 DCHECK((cond != al) && (cond != nv));
973 csinc(rd, rn, rn, NegateCondition(cond));
974 }
975
cinv(const Register & rd,const Register & rn,Condition cond)976 void Assembler::cinv(const Register& rd, const Register& rn, Condition cond) {
977 DCHECK((cond != al) && (cond != nv));
978 csinv(rd, rn, rn, NegateCondition(cond));
979 }
980
cneg(const Register & rd,const Register & rn,Condition cond)981 void Assembler::cneg(const Register& rd, const Register& rn, Condition cond) {
982 DCHECK((cond != al) && (cond != nv));
983 csneg(rd, rn, rn, NegateCondition(cond));
984 }
985
ConditionalSelect(const Register & rd,const Register & rn,const Register & rm,Condition cond,ConditionalSelectOp op)986 void Assembler::ConditionalSelect(const Register& rd, const Register& rn,
987 const Register& rm, Condition cond,
988 ConditionalSelectOp op) {
989 DCHECK(rd.SizeInBits() == rn.SizeInBits());
990 DCHECK(rd.SizeInBits() == rm.SizeInBits());
991 Emit(SF(rd) | op | Rm(rm) | Cond(cond) | Rn(rn) | Rd(rd));
992 }
993
ccmn(const Register & rn,const Operand & operand,StatusFlags nzcv,Condition cond)994 void Assembler::ccmn(const Register& rn, const Operand& operand,
995 StatusFlags nzcv, Condition cond) {
996 ConditionalCompare(rn, operand, nzcv, cond, CCMN);
997 }
998
ccmp(const Register & rn,const Operand & operand,StatusFlags nzcv,Condition cond)999 void Assembler::ccmp(const Register& rn, const Operand& operand,
1000 StatusFlags nzcv, Condition cond) {
1001 ConditionalCompare(rn, operand, nzcv, cond, CCMP);
1002 }
1003
DataProcessing3Source(const Register & rd,const Register & rn,const Register & rm,const Register & ra,DataProcessing3SourceOp op)1004 void Assembler::DataProcessing3Source(const Register& rd, const Register& rn,
1005 const Register& rm, const Register& ra,
1006 DataProcessing3SourceOp op) {
1007 Emit(SF(rd) | op | Rm(rm) | Ra(ra) | Rn(rn) | Rd(rd));
1008 }
1009
mul(const Register & rd,const Register & rn,const Register & rm)1010 void Assembler::mul(const Register& rd, const Register& rn,
1011 const Register& rm) {
1012 DCHECK(AreSameSizeAndType(rd, rn, rm));
1013 Register zr = AppropriateZeroRegFor(rn);
1014 DataProcessing3Source(rd, rn, rm, zr, MADD);
1015 }
1016
madd(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1017 void Assembler::madd(const Register& rd, const Register& rn, const Register& rm,
1018 const Register& ra) {
1019 DCHECK(AreSameSizeAndType(rd, rn, rm, ra));
1020 DataProcessing3Source(rd, rn, rm, ra, MADD);
1021 }
1022
mneg(const Register & rd,const Register & rn,const Register & rm)1023 void Assembler::mneg(const Register& rd, const Register& rn,
1024 const Register& rm) {
1025 DCHECK(AreSameSizeAndType(rd, rn, rm));
1026 Register zr = AppropriateZeroRegFor(rn);
1027 DataProcessing3Source(rd, rn, rm, zr, MSUB);
1028 }
1029
msub(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1030 void Assembler::msub(const Register& rd, const Register& rn, const Register& rm,
1031 const Register& ra) {
1032 DCHECK(AreSameSizeAndType(rd, rn, rm, ra));
1033 DataProcessing3Source(rd, rn, rm, ra, MSUB);
1034 }
1035
smaddl(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1036 void Assembler::smaddl(const Register& rd, const Register& rn,
1037 const Register& rm, const Register& ra) {
1038 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1039 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1040 DataProcessing3Source(rd, rn, rm, ra, SMADDL_x);
1041 }
1042
smsubl(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1043 void Assembler::smsubl(const Register& rd, const Register& rn,
1044 const Register& rm, const Register& ra) {
1045 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1046 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1047 DataProcessing3Source(rd, rn, rm, ra, SMSUBL_x);
1048 }
1049
umaddl(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1050 void Assembler::umaddl(const Register& rd, const Register& rn,
1051 const Register& rm, const Register& ra) {
1052 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1053 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1054 DataProcessing3Source(rd, rn, rm, ra, UMADDL_x);
1055 }
1056
umsubl(const Register & rd,const Register & rn,const Register & rm,const Register & ra)1057 void Assembler::umsubl(const Register& rd, const Register& rn,
1058 const Register& rm, const Register& ra) {
1059 DCHECK(rd.Is64Bits() && ra.Is64Bits());
1060 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1061 DataProcessing3Source(rd, rn, rm, ra, UMSUBL_x);
1062 }
1063
smull(const Register & rd,const Register & rn,const Register & rm)1064 void Assembler::smull(const Register& rd, const Register& rn,
1065 const Register& rm) {
1066 DCHECK(rd.Is64Bits());
1067 DCHECK(rn.Is32Bits() && rm.Is32Bits());
1068 DataProcessing3Source(rd, rn, rm, xzr, SMADDL_x);
1069 }
1070
smulh(const Register & rd,const Register & rn,const Register & rm)1071 void Assembler::smulh(const Register& rd, const Register& rn,
1072 const Register& rm) {
1073 DCHECK(AreSameSizeAndType(rd, rn, rm));
1074 DataProcessing3Source(rd, rn, rm, xzr, SMULH_x);
1075 }
1076
sdiv(const Register & rd,const Register & rn,const Register & rm)1077 void Assembler::sdiv(const Register& rd, const Register& rn,
1078 const Register& rm) {
1079 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1080 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1081 Emit(SF(rd) | SDIV | Rm(rm) | Rn(rn) | Rd(rd));
1082 }
1083
udiv(const Register & rd,const Register & rn,const Register & rm)1084 void Assembler::udiv(const Register& rd, const Register& rn,
1085 const Register& rm) {
1086 DCHECK(rd.SizeInBits() == rn.SizeInBits());
1087 DCHECK(rd.SizeInBits() == rm.SizeInBits());
1088 Emit(SF(rd) | UDIV | Rm(rm) | Rn(rn) | Rd(rd));
1089 }
1090
rbit(const Register & rd,const Register & rn)1091 void Assembler::rbit(const Register& rd, const Register& rn) {
1092 DataProcessing1Source(rd, rn, RBIT);
1093 }
1094
rev16(const Register & rd,const Register & rn)1095 void Assembler::rev16(const Register& rd, const Register& rn) {
1096 DataProcessing1Source(rd, rn, REV16);
1097 }
1098
rev32(const Register & rd,const Register & rn)1099 void Assembler::rev32(const Register& rd, const Register& rn) {
1100 DCHECK(rd.Is64Bits());
1101 DataProcessing1Source(rd, rn, REV);
1102 }
1103
rev(const Register & rd,const Register & rn)1104 void Assembler::rev(const Register& rd, const Register& rn) {
1105 DataProcessing1Source(rd, rn, rd.Is64Bits() ? REV_x : REV_w);
1106 }
1107
clz(const Register & rd,const Register & rn)1108 void Assembler::clz(const Register& rd, const Register& rn) {
1109 DataProcessing1Source(rd, rn, CLZ);
1110 }
1111
cls(const Register & rd,const Register & rn)1112 void Assembler::cls(const Register& rd, const Register& rn) {
1113 DataProcessing1Source(rd, rn, CLS);
1114 }
1115
pacia1716()1116 void Assembler::pacia1716() { Emit(PACIA1716); }
autia1716()1117 void Assembler::autia1716() { Emit(AUTIA1716); }
paciasp()1118 void Assembler::paciasp() { Emit(PACIASP); }
autiasp()1119 void Assembler::autiasp() { Emit(AUTIASP); }
1120
bti(BranchTargetIdentifier id)1121 void Assembler::bti(BranchTargetIdentifier id) {
1122 SystemHint op;
1123 switch (id) {
1124 case BranchTargetIdentifier::kBti:
1125 op = BTI;
1126 break;
1127 case BranchTargetIdentifier::kBtiCall:
1128 op = BTI_c;
1129 break;
1130 case BranchTargetIdentifier::kBtiJump:
1131 op = BTI_j;
1132 break;
1133 case BranchTargetIdentifier::kBtiJumpCall:
1134 op = BTI_jc;
1135 break;
1136 case BranchTargetIdentifier::kNone:
1137 case BranchTargetIdentifier::kPaciasp:
1138 // We always want to generate a BTI instruction here, so disallow
1139 // skipping its generation or generating a PACIASP instead.
1140 UNREACHABLE();
1141 }
1142 hint(op);
1143 }
1144
ldp(const CPURegister & rt,const CPURegister & rt2,const MemOperand & src)1145 void Assembler::ldp(const CPURegister& rt, const CPURegister& rt2,
1146 const MemOperand& src) {
1147 LoadStorePair(rt, rt2, src, LoadPairOpFor(rt, rt2));
1148 }
1149
stp(const CPURegister & rt,const CPURegister & rt2,const MemOperand & dst)1150 void Assembler::stp(const CPURegister& rt, const CPURegister& rt2,
1151 const MemOperand& dst) {
1152 LoadStorePair(rt, rt2, dst, StorePairOpFor(rt, rt2));
1153
1154 #if defined(V8_OS_WIN)
1155 if (xdata_encoder_ && rt == x29 && rt2 == lr && dst.base().IsSP()) {
1156 xdata_encoder_->onSaveFpLr();
1157 }
1158 #endif
1159 }
1160
ldpsw(const Register & rt,const Register & rt2,const MemOperand & src)1161 void Assembler::ldpsw(const Register& rt, const Register& rt2,
1162 const MemOperand& src) {
1163 DCHECK(rt.Is64Bits());
1164 LoadStorePair(rt, rt2, src, LDPSW_x);
1165 }
1166
LoadStorePair(const CPURegister & rt,const CPURegister & rt2,const MemOperand & addr,LoadStorePairOp op)1167 void Assembler::LoadStorePair(const CPURegister& rt, const CPURegister& rt2,
1168 const MemOperand& addr, LoadStorePairOp op) {
1169 // 'rt' and 'rt2' can only be aliased for stores.
1170 DCHECK(((op & LoadStorePairLBit) == 0) || rt != rt2);
1171 DCHECK(AreSameSizeAndType(rt, rt2));
1172 DCHECK(IsImmLSPair(addr.offset(), CalcLSPairDataSize(op)));
1173 int offset = static_cast<int>(addr.offset());
1174
1175 Instr memop = op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) |
1176 ImmLSPair(offset, CalcLSPairDataSize(op));
1177
1178 Instr addrmodeop;
1179 if (addr.IsImmediateOffset()) {
1180 addrmodeop = LoadStorePairOffsetFixed;
1181 } else {
1182 // Pre-index and post-index modes.
1183 DCHECK_NE(rt, addr.base());
1184 DCHECK_NE(rt2, addr.base());
1185 DCHECK_NE(addr.offset(), 0);
1186 if (addr.IsPreIndex()) {
1187 addrmodeop = LoadStorePairPreIndexFixed;
1188 } else {
1189 DCHECK(addr.IsPostIndex());
1190 addrmodeop = LoadStorePairPostIndexFixed;
1191 }
1192 }
1193 Emit(addrmodeop | memop);
1194 }
1195
1196 // Memory instructions.
ldrb(const Register & rt,const MemOperand & src)1197 void Assembler::ldrb(const Register& rt, const MemOperand& src) {
1198 LoadStore(rt, src, LDRB_w);
1199 }
1200
strb(const Register & rt,const MemOperand & dst)1201 void Assembler::strb(const Register& rt, const MemOperand& dst) {
1202 LoadStore(rt, dst, STRB_w);
1203 }
1204
ldrsb(const Register & rt,const MemOperand & src)1205 void Assembler::ldrsb(const Register& rt, const MemOperand& src) {
1206 LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w);
1207 }
1208
ldrh(const Register & rt,const MemOperand & src)1209 void Assembler::ldrh(const Register& rt, const MemOperand& src) {
1210 LoadStore(rt, src, LDRH_w);
1211 }
1212
strh(const Register & rt,const MemOperand & dst)1213 void Assembler::strh(const Register& rt, const MemOperand& dst) {
1214 LoadStore(rt, dst, STRH_w);
1215 }
1216
ldrsh(const Register & rt,const MemOperand & src)1217 void Assembler::ldrsh(const Register& rt, const MemOperand& src) {
1218 LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w);
1219 }
1220
ldr(const CPURegister & rt,const MemOperand & src)1221 void Assembler::ldr(const CPURegister& rt, const MemOperand& src) {
1222 LoadStore(rt, src, LoadOpFor(rt));
1223 }
1224
str(const CPURegister & rt,const MemOperand & src)1225 void Assembler::str(const CPURegister& rt, const MemOperand& src) {
1226 LoadStore(rt, src, StoreOpFor(rt));
1227 }
1228
ldrsw(const Register & rt,const MemOperand & src)1229 void Assembler::ldrsw(const Register& rt, const MemOperand& src) {
1230 DCHECK(rt.Is64Bits());
1231 LoadStore(rt, src, LDRSW_x);
1232 }
1233
ldr_pcrel(const CPURegister & rt,int imm19)1234 void Assembler::ldr_pcrel(const CPURegister& rt, int imm19) {
1235 // The pattern 'ldr xzr, #offset' is used to indicate the beginning of a
1236 // constant pool. It should not be emitted.
1237 DCHECK(!rt.IsZero());
1238 Emit(LoadLiteralOpFor(rt) | ImmLLiteral(imm19) | Rt(rt));
1239 }
1240
EmbeddedNumber(double number)1241 Operand Operand::EmbeddedNumber(double number) {
1242 int32_t smi;
1243 if (DoubleToSmiInteger(number, &smi)) {
1244 return Operand(Immediate(Smi::FromInt(smi)));
1245 }
1246 Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
1247 result.heap_object_request_.emplace(number);
1248 DCHECK(result.IsHeapObjectRequest());
1249 return result;
1250 }
1251
EmbeddedStringConstant(const StringConstantBase * str)1252 Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) {
1253 Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
1254 result.heap_object_request_.emplace(str);
1255 DCHECK(result.IsHeapObjectRequest());
1256 return result;
1257 }
1258
ldr(const CPURegister & rt,const Operand & operand)1259 void Assembler::ldr(const CPURegister& rt, const Operand& operand) {
1260 if (operand.IsHeapObjectRequest()) {
1261 BlockPoolsScope no_pool_before_ldr_of_heap_object_request(this);
1262 RequestHeapObject(operand.heap_object_request());
1263 ldr(rt, operand.immediate_for_heap_object_request());
1264 } else {
1265 ldr(rt, operand.immediate());
1266 }
1267 }
1268
ldr(const CPURegister & rt,const Immediate & imm)1269 void Assembler::ldr(const CPURegister& rt, const Immediate& imm) {
1270 BlockPoolsScope no_pool_before_ldr_pcrel_instr(this);
1271 RecordRelocInfo(imm.rmode(), imm.value());
1272 // The load will be patched when the constpool is emitted, patching code
1273 // expect a load literal with offset 0.
1274 ldr_pcrel(rt, 0);
1275 }
1276
ldar(const Register & rt,const Register & rn)1277 void Assembler::ldar(const Register& rt, const Register& rn) {
1278 DCHECK(rn.Is64Bits());
1279 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? LDAR_w : LDAR_x;
1280 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1281 }
1282
ldaxr(const Register & rt,const Register & rn)1283 void Assembler::ldaxr(const Register& rt, const Register& rn) {
1284 DCHECK(rn.Is64Bits());
1285 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? LDAXR_w : LDAXR_x;
1286 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1287 }
1288
stlr(const Register & rt,const Register & rn)1289 void Assembler::stlr(const Register& rt, const Register& rn) {
1290 DCHECK(rn.Is64Bits());
1291 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? STLR_w : STLR_x;
1292 Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1293 }
1294
stlxr(const Register & rs,const Register & rt,const Register & rn)1295 void Assembler::stlxr(const Register& rs, const Register& rt,
1296 const Register& rn) {
1297 DCHECK(rn.Is64Bits());
1298 DCHECK(rs != rt && rs != rn);
1299 LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? STLXR_w : STLXR_x;
1300 Emit(op | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1301 }
1302
ldarb(const Register & rt,const Register & rn)1303 void Assembler::ldarb(const Register& rt, const Register& rn) {
1304 DCHECK(rt.Is32Bits());
1305 DCHECK(rn.Is64Bits());
1306 Emit(LDAR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1307 }
1308
ldaxrb(const Register & rt,const Register & rn)1309 void Assembler::ldaxrb(const Register& rt, const Register& rn) {
1310 DCHECK(rt.Is32Bits());
1311 DCHECK(rn.Is64Bits());
1312 Emit(LDAXR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1313 }
1314
stlrb(const Register & rt,const Register & rn)1315 void Assembler::stlrb(const Register& rt, const Register& rn) {
1316 DCHECK(rt.Is32Bits());
1317 DCHECK(rn.Is64Bits());
1318 Emit(STLR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1319 }
1320
stlxrb(const Register & rs,const Register & rt,const Register & rn)1321 void Assembler::stlxrb(const Register& rs, const Register& rt,
1322 const Register& rn) {
1323 DCHECK(rs.Is32Bits());
1324 DCHECK(rt.Is32Bits());
1325 DCHECK(rn.Is64Bits());
1326 DCHECK(rs != rt && rs != rn);
1327 Emit(STLXR_b | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1328 }
1329
ldarh(const Register & rt,const Register & rn)1330 void Assembler::ldarh(const Register& rt, const Register& rn) {
1331 DCHECK(rt.Is32Bits());
1332 DCHECK(rn.Is64Bits());
1333 Emit(LDAR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1334 }
1335
ldaxrh(const Register & rt,const Register & rn)1336 void Assembler::ldaxrh(const Register& rt, const Register& rn) {
1337 DCHECK(rt.Is32Bits());
1338 DCHECK(rn.Is64Bits());
1339 Emit(LDAXR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1340 }
1341
stlrh(const Register & rt,const Register & rn)1342 void Assembler::stlrh(const Register& rt, const Register& rn) {
1343 DCHECK(rt.Is32Bits());
1344 DCHECK(rn.Is64Bits());
1345 Emit(STLR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1346 }
1347
stlxrh(const Register & rs,const Register & rt,const Register & rn)1348 void Assembler::stlxrh(const Register& rs, const Register& rt,
1349 const Register& rn) {
1350 DCHECK(rs.Is32Bits());
1351 DCHECK(rt.Is32Bits());
1352 DCHECK(rn.Is64Bits());
1353 DCHECK(rs != rt && rs != rn);
1354 Emit(STLXR_h | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1355 }
1356
NEON3DifferentL(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEON3DifferentOp vop)1357 void Assembler::NEON3DifferentL(const VRegister& vd, const VRegister& vn,
1358 const VRegister& vm, NEON3DifferentOp vop) {
1359 DCHECK(AreSameFormat(vn, vm));
1360 DCHECK((vn.Is1H() && vd.Is1S()) || (vn.Is1S() && vd.Is1D()) ||
1361 (vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
1362 (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
1363 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
1364 Instr format, op = vop;
1365 if (vd.IsScalar()) {
1366 op |= NEON_Q | NEONScalar;
1367 format = SFormat(vn);
1368 } else {
1369 format = VFormat(vn);
1370 }
1371 Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
1372 }
1373
NEON3DifferentW(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEON3DifferentOp vop)1374 void Assembler::NEON3DifferentW(const VRegister& vd, const VRegister& vn,
1375 const VRegister& vm, NEON3DifferentOp vop) {
1376 DCHECK(AreSameFormat(vd, vn));
1377 DCHECK((vm.Is8B() && vd.Is8H()) || (vm.Is4H() && vd.Is4S()) ||
1378 (vm.Is2S() && vd.Is2D()) || (vm.Is16B() && vd.Is8H()) ||
1379 (vm.Is8H() && vd.Is4S()) || (vm.Is4S() && vd.Is2D()));
1380 Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd));
1381 }
1382
NEON3DifferentHN(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEON3DifferentOp vop)1383 void Assembler::NEON3DifferentHN(const VRegister& vd, const VRegister& vn,
1384 const VRegister& vm, NEON3DifferentOp vop) {
1385 DCHECK(AreSameFormat(vm, vn));
1386 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
1387 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
1388 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
1389 Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd));
1390 }
1391
1392 #define NEON_3DIFF_LONG_LIST(V) \
1393 V(pmull, NEON_PMULL, vn.IsVector() && vn.Is8B()) \
1394 V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B()) \
1395 V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD()) \
1396 V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ()) \
1397 V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD()) \
1398 V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ()) \
1399 V(uabal, NEON_UABAL, vn.IsVector() && vn.IsD()) \
1400 V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ()) \
1401 V(sabdl, NEON_SABDL, vn.IsVector() && vn.IsD()) \
1402 V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ()) \
1403 V(uabdl, NEON_UABDL, vn.IsVector() && vn.IsD()) \
1404 V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ()) \
1405 V(smlal, NEON_SMLAL, vn.IsVector() && vn.IsD()) \
1406 V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ()) \
1407 V(umlal, NEON_UMLAL, vn.IsVector() && vn.IsD()) \
1408 V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ()) \
1409 V(smlsl, NEON_SMLSL, vn.IsVector() && vn.IsD()) \
1410 V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ()) \
1411 V(umlsl, NEON_UMLSL, vn.IsVector() && vn.IsD()) \
1412 V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ()) \
1413 V(smull, NEON_SMULL, vn.IsVector() && vn.IsD()) \
1414 V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ()) \
1415 V(umull, NEON_UMULL, vn.IsVector() && vn.IsD()) \
1416 V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ()) \
1417 V(ssubl, NEON_SSUBL, vn.IsVector() && vn.IsD()) \
1418 V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ()) \
1419 V(uaddl, NEON_UADDL, vn.IsVector() && vn.IsD()) \
1420 V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ()) \
1421 V(usubl, NEON_USUBL, vn.IsVector() && vn.IsD()) \
1422 V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ()) \
1423 V(sqdmlal, NEON_SQDMLAL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1424 V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
1425 V(sqdmlsl, NEON_SQDMLSL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1426 V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
1427 V(sqdmull, NEON_SQDMULL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S()) \
1428 V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S())
1429
1430 #define DEFINE_ASM_FUNC(FN, OP, AS) \
1431 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
1432 const VRegister& vm) { \
1433 DCHECK(AS); \
1434 NEON3DifferentL(vd, vn, vm, OP); \
1435 }
1436 NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
1437 #undef DEFINE_ASM_FUNC
1438
1439 #define NEON_3DIFF_HN_LIST(V) \
1440 V(addhn, NEON_ADDHN, vd.IsD()) \
1441 V(addhn2, NEON_ADDHN2, vd.IsQ()) \
1442 V(raddhn, NEON_RADDHN, vd.IsD()) \
1443 V(raddhn2, NEON_RADDHN2, vd.IsQ()) \
1444 V(subhn, NEON_SUBHN, vd.IsD()) \
1445 V(subhn2, NEON_SUBHN2, vd.IsQ()) \
1446 V(rsubhn, NEON_RSUBHN, vd.IsD()) \
1447 V(rsubhn2, NEON_RSUBHN2, vd.IsQ())
1448
1449 #define DEFINE_ASM_FUNC(FN, OP, AS) \
1450 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
1451 const VRegister& vm) { \
1452 DCHECK(AS); \
1453 NEON3DifferentHN(vd, vn, vm, OP); \
1454 }
NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)1455 NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
1456 #undef DEFINE_ASM_FUNC
1457
1458 void Assembler::NEONPerm(const VRegister& vd, const VRegister& vn,
1459 const VRegister& vm, NEONPermOp op) {
1460 DCHECK(AreSameFormat(vd, vn, vm));
1461 DCHECK(!vd.Is1D());
1462 Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
1463 }
1464
trn1(const VRegister & vd,const VRegister & vn,const VRegister & vm)1465 void Assembler::trn1(const VRegister& vd, const VRegister& vn,
1466 const VRegister& vm) {
1467 NEONPerm(vd, vn, vm, NEON_TRN1);
1468 }
1469
trn2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1470 void Assembler::trn2(const VRegister& vd, const VRegister& vn,
1471 const VRegister& vm) {
1472 NEONPerm(vd, vn, vm, NEON_TRN2);
1473 }
1474
uzp1(const VRegister & vd,const VRegister & vn,const VRegister & vm)1475 void Assembler::uzp1(const VRegister& vd, const VRegister& vn,
1476 const VRegister& vm) {
1477 NEONPerm(vd, vn, vm, NEON_UZP1);
1478 }
1479
uzp2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1480 void Assembler::uzp2(const VRegister& vd, const VRegister& vn,
1481 const VRegister& vm) {
1482 NEONPerm(vd, vn, vm, NEON_UZP2);
1483 }
1484
zip1(const VRegister & vd,const VRegister & vn,const VRegister & vm)1485 void Assembler::zip1(const VRegister& vd, const VRegister& vn,
1486 const VRegister& vm) {
1487 NEONPerm(vd, vn, vm, NEON_ZIP1);
1488 }
1489
zip2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1490 void Assembler::zip2(const VRegister& vd, const VRegister& vn,
1491 const VRegister& vm) {
1492 NEONPerm(vd, vn, vm, NEON_ZIP2);
1493 }
1494
NEONShiftImmediate(const VRegister & vd,const VRegister & vn,NEONShiftImmediateOp op,int immh_immb)1495 void Assembler::NEONShiftImmediate(const VRegister& vd, const VRegister& vn,
1496 NEONShiftImmediateOp op, int immh_immb) {
1497 DCHECK(AreSameFormat(vd, vn));
1498 Instr q, scalar;
1499 if (vn.IsScalar()) {
1500 q = NEON_Q;
1501 scalar = NEONScalar;
1502 } else {
1503 q = vd.IsD() ? 0 : NEON_Q;
1504 scalar = 0;
1505 }
1506 Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
1507 }
1508
NEONShiftLeftImmediate(const VRegister & vd,const VRegister & vn,int shift,NEONShiftImmediateOp op)1509 void Assembler::NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn,
1510 int shift, NEONShiftImmediateOp op) {
1511 int laneSizeInBits = vn.LaneSizeInBits();
1512 DCHECK((shift >= 0) && (shift < laneSizeInBits));
1513 NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
1514 }
1515
NEONShiftRightImmediate(const VRegister & vd,const VRegister & vn,int shift,NEONShiftImmediateOp op)1516 void Assembler::NEONShiftRightImmediate(const VRegister& vd,
1517 const VRegister& vn, int shift,
1518 NEONShiftImmediateOp op) {
1519 int laneSizeInBits = vn.LaneSizeInBits();
1520 DCHECK((shift >= 1) && (shift <= laneSizeInBits));
1521 NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
1522 }
1523
NEONShiftImmediateL(const VRegister & vd,const VRegister & vn,int shift,NEONShiftImmediateOp op)1524 void Assembler::NEONShiftImmediateL(const VRegister& vd, const VRegister& vn,
1525 int shift, NEONShiftImmediateOp op) {
1526 int laneSizeInBits = vn.LaneSizeInBits();
1527 DCHECK((shift >= 0) && (shift < laneSizeInBits));
1528 int immh_immb = (laneSizeInBits + shift) << 16;
1529
1530 DCHECK((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
1531 (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
1532 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
1533 Instr q;
1534 q = vn.IsD() ? 0 : NEON_Q;
1535 Emit(q | op | immh_immb | Rn(vn) | Rd(vd));
1536 }
1537
NEONShiftImmediateN(const VRegister & vd,const VRegister & vn,int shift,NEONShiftImmediateOp op)1538 void Assembler::NEONShiftImmediateN(const VRegister& vd, const VRegister& vn,
1539 int shift, NEONShiftImmediateOp op) {
1540 Instr q, scalar;
1541 int laneSizeInBits = vd.LaneSizeInBits();
1542 DCHECK((shift >= 1) && (shift <= laneSizeInBits));
1543 int immh_immb = (2 * laneSizeInBits - shift) << 16;
1544
1545 if (vn.IsScalar()) {
1546 DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
1547 (vd.Is1S() && vn.Is1D()));
1548 q = NEON_Q;
1549 scalar = NEONScalar;
1550 } else {
1551 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
1552 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
1553 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
1554 scalar = 0;
1555 q = vd.IsD() ? 0 : NEON_Q;
1556 }
1557 Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
1558 }
1559
shl(const VRegister & vd,const VRegister & vn,int shift)1560 void Assembler::shl(const VRegister& vd, const VRegister& vn, int shift) {
1561 DCHECK(vd.IsVector() || vd.Is1D());
1562 NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL);
1563 }
1564
sli(const VRegister & vd,const VRegister & vn,int shift)1565 void Assembler::sli(const VRegister& vd, const VRegister& vn, int shift) {
1566 DCHECK(vd.IsVector() || vd.Is1D());
1567 NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI);
1568 }
1569
sqshl(const VRegister & vd,const VRegister & vn,int shift)1570 void Assembler::sqshl(const VRegister& vd, const VRegister& vn, int shift) {
1571 NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm);
1572 }
1573
sqshlu(const VRegister & vd,const VRegister & vn,int shift)1574 void Assembler::sqshlu(const VRegister& vd, const VRegister& vn, int shift) {
1575 NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU);
1576 }
1577
uqshl(const VRegister & vd,const VRegister & vn,int shift)1578 void Assembler::uqshl(const VRegister& vd, const VRegister& vn, int shift) {
1579 NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm);
1580 }
1581
sshll(const VRegister & vd,const VRegister & vn,int shift)1582 void Assembler::sshll(const VRegister& vd, const VRegister& vn, int shift) {
1583 DCHECK(vn.IsD());
1584 NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
1585 }
1586
sshll2(const VRegister & vd,const VRegister & vn,int shift)1587 void Assembler::sshll2(const VRegister& vd, const VRegister& vn, int shift) {
1588 DCHECK(vn.IsQ());
1589 NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
1590 }
1591
sxtl(const VRegister & vd,const VRegister & vn)1592 void Assembler::sxtl(const VRegister& vd, const VRegister& vn) {
1593 sshll(vd, vn, 0);
1594 }
1595
sxtl2(const VRegister & vd,const VRegister & vn)1596 void Assembler::sxtl2(const VRegister& vd, const VRegister& vn) {
1597 sshll2(vd, vn, 0);
1598 }
1599
ushll(const VRegister & vd,const VRegister & vn,int shift)1600 void Assembler::ushll(const VRegister& vd, const VRegister& vn, int shift) {
1601 DCHECK(vn.IsD());
1602 NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
1603 }
1604
ushll2(const VRegister & vd,const VRegister & vn,int shift)1605 void Assembler::ushll2(const VRegister& vd, const VRegister& vn, int shift) {
1606 DCHECK(vn.IsQ());
1607 NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
1608 }
1609
uxtl(const VRegister & vd,const VRegister & vn)1610 void Assembler::uxtl(const VRegister& vd, const VRegister& vn) {
1611 ushll(vd, vn, 0);
1612 }
1613
uxtl2(const VRegister & vd,const VRegister & vn)1614 void Assembler::uxtl2(const VRegister& vd, const VRegister& vn) {
1615 ushll2(vd, vn, 0);
1616 }
1617
sri(const VRegister & vd,const VRegister & vn,int shift)1618 void Assembler::sri(const VRegister& vd, const VRegister& vn, int shift) {
1619 DCHECK(vd.IsVector() || vd.Is1D());
1620 NEONShiftRightImmediate(vd, vn, shift, NEON_SRI);
1621 }
1622
sshr(const VRegister & vd,const VRegister & vn,int shift)1623 void Assembler::sshr(const VRegister& vd, const VRegister& vn, int shift) {
1624 DCHECK(vd.IsVector() || vd.Is1D());
1625 NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR);
1626 }
1627
ushr(const VRegister & vd,const VRegister & vn,int shift)1628 void Assembler::ushr(const VRegister& vd, const VRegister& vn, int shift) {
1629 DCHECK(vd.IsVector() || vd.Is1D());
1630 NEONShiftRightImmediate(vd, vn, shift, NEON_USHR);
1631 }
1632
srshr(const VRegister & vd,const VRegister & vn,int shift)1633 void Assembler::srshr(const VRegister& vd, const VRegister& vn, int shift) {
1634 DCHECK(vd.IsVector() || vd.Is1D());
1635 NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR);
1636 }
1637
urshr(const VRegister & vd,const VRegister & vn,int shift)1638 void Assembler::urshr(const VRegister& vd, const VRegister& vn, int shift) {
1639 DCHECK(vd.IsVector() || vd.Is1D());
1640 NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR);
1641 }
1642
ssra(const VRegister & vd,const VRegister & vn,int shift)1643 void Assembler::ssra(const VRegister& vd, const VRegister& vn, int shift) {
1644 DCHECK(vd.IsVector() || vd.Is1D());
1645 NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA);
1646 }
1647
usra(const VRegister & vd,const VRegister & vn,int shift)1648 void Assembler::usra(const VRegister& vd, const VRegister& vn, int shift) {
1649 DCHECK(vd.IsVector() || vd.Is1D());
1650 NEONShiftRightImmediate(vd, vn, shift, NEON_USRA);
1651 }
1652
srsra(const VRegister & vd,const VRegister & vn,int shift)1653 void Assembler::srsra(const VRegister& vd, const VRegister& vn, int shift) {
1654 DCHECK(vd.IsVector() || vd.Is1D());
1655 NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA);
1656 }
1657
ursra(const VRegister & vd,const VRegister & vn,int shift)1658 void Assembler::ursra(const VRegister& vd, const VRegister& vn, int shift) {
1659 DCHECK(vd.IsVector() || vd.Is1D());
1660 NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA);
1661 }
1662
shrn(const VRegister & vd,const VRegister & vn,int shift)1663 void Assembler::shrn(const VRegister& vd, const VRegister& vn, int shift) {
1664 DCHECK(vn.IsVector() && vd.IsD());
1665 NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
1666 }
1667
shrn2(const VRegister & vd,const VRegister & vn,int shift)1668 void Assembler::shrn2(const VRegister& vd, const VRegister& vn, int shift) {
1669 DCHECK(vn.IsVector() && vd.IsQ());
1670 NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
1671 }
1672
rshrn(const VRegister & vd,const VRegister & vn,int shift)1673 void Assembler::rshrn(const VRegister& vd, const VRegister& vn, int shift) {
1674 DCHECK(vn.IsVector() && vd.IsD());
1675 NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
1676 }
1677
rshrn2(const VRegister & vd,const VRegister & vn,int shift)1678 void Assembler::rshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1679 DCHECK(vn.IsVector() && vd.IsQ());
1680 NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
1681 }
1682
sqshrn(const VRegister & vd,const VRegister & vn,int shift)1683 void Assembler::sqshrn(const VRegister& vd, const VRegister& vn, int shift) {
1684 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1685 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
1686 }
1687
sqshrn2(const VRegister & vd,const VRegister & vn,int shift)1688 void Assembler::sqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1689 DCHECK(vn.IsVector() && vd.IsQ());
1690 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
1691 }
1692
sqrshrn(const VRegister & vd,const VRegister & vn,int shift)1693 void Assembler::sqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
1694 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1695 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
1696 }
1697
sqrshrn2(const VRegister & vd,const VRegister & vn,int shift)1698 void Assembler::sqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1699 DCHECK(vn.IsVector() && vd.IsQ());
1700 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
1701 }
1702
sqshrun(const VRegister & vd,const VRegister & vn,int shift)1703 void Assembler::sqshrun(const VRegister& vd, const VRegister& vn, int shift) {
1704 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1705 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
1706 }
1707
sqshrun2(const VRegister & vd,const VRegister & vn,int shift)1708 void Assembler::sqshrun2(const VRegister& vd, const VRegister& vn, int shift) {
1709 DCHECK(vn.IsVector() && vd.IsQ());
1710 NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
1711 }
1712
sqrshrun(const VRegister & vd,const VRegister & vn,int shift)1713 void Assembler::sqrshrun(const VRegister& vd, const VRegister& vn, int shift) {
1714 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1715 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
1716 }
1717
sqrshrun2(const VRegister & vd,const VRegister & vn,int shift)1718 void Assembler::sqrshrun2(const VRegister& vd, const VRegister& vn, int shift) {
1719 DCHECK(vn.IsVector() && vd.IsQ());
1720 NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
1721 }
1722
uqshrn(const VRegister & vd,const VRegister & vn,int shift)1723 void Assembler::uqshrn(const VRegister& vd, const VRegister& vn, int shift) {
1724 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1725 NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
1726 }
1727
uqshrn2(const VRegister & vd,const VRegister & vn,int shift)1728 void Assembler::uqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1729 DCHECK(vn.IsVector() && vd.IsQ());
1730 NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
1731 }
1732
uqrshrn(const VRegister & vd,const VRegister & vn,int shift)1733 void Assembler::uqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
1734 DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
1735 NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
1736 }
1737
uqrshrn2(const VRegister & vd,const VRegister & vn,int shift)1738 void Assembler::uqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
1739 DCHECK(vn.IsVector() && vd.IsQ());
1740 NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
1741 }
1742
uaddw(const VRegister & vd,const VRegister & vn,const VRegister & vm)1743 void Assembler::uaddw(const VRegister& vd, const VRegister& vn,
1744 const VRegister& vm) {
1745 DCHECK(vm.IsD());
1746 NEON3DifferentW(vd, vn, vm, NEON_UADDW);
1747 }
1748
uaddw2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1749 void Assembler::uaddw2(const VRegister& vd, const VRegister& vn,
1750 const VRegister& vm) {
1751 DCHECK(vm.IsQ());
1752 NEON3DifferentW(vd, vn, vm, NEON_UADDW2);
1753 }
1754
saddw(const VRegister & vd,const VRegister & vn,const VRegister & vm)1755 void Assembler::saddw(const VRegister& vd, const VRegister& vn,
1756 const VRegister& vm) {
1757 DCHECK(vm.IsD());
1758 NEON3DifferentW(vd, vn, vm, NEON_SADDW);
1759 }
1760
saddw2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1761 void Assembler::saddw2(const VRegister& vd, const VRegister& vn,
1762 const VRegister& vm) {
1763 DCHECK(vm.IsQ());
1764 NEON3DifferentW(vd, vn, vm, NEON_SADDW2);
1765 }
1766
usubw(const VRegister & vd,const VRegister & vn,const VRegister & vm)1767 void Assembler::usubw(const VRegister& vd, const VRegister& vn,
1768 const VRegister& vm) {
1769 DCHECK(vm.IsD());
1770 NEON3DifferentW(vd, vn, vm, NEON_USUBW);
1771 }
1772
usubw2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1773 void Assembler::usubw2(const VRegister& vd, const VRegister& vn,
1774 const VRegister& vm) {
1775 DCHECK(vm.IsQ());
1776 NEON3DifferentW(vd, vn, vm, NEON_USUBW2);
1777 }
1778
ssubw(const VRegister & vd,const VRegister & vn,const VRegister & vm)1779 void Assembler::ssubw(const VRegister& vd, const VRegister& vn,
1780 const VRegister& vm) {
1781 DCHECK(vm.IsD());
1782 NEON3DifferentW(vd, vn, vm, NEON_SSUBW);
1783 }
1784
ssubw2(const VRegister & vd,const VRegister & vn,const VRegister & vm)1785 void Assembler::ssubw2(const VRegister& vd, const VRegister& vn,
1786 const VRegister& vm) {
1787 DCHECK(vm.IsQ());
1788 NEON3DifferentW(vd, vn, vm, NEON_SSUBW2);
1789 }
1790
mov(const Register & rd,const Register & rm)1791 void Assembler::mov(const Register& rd, const Register& rm) {
1792 // Moves involving the stack pointer are encoded as add immediate with
1793 // second operand of zero. Otherwise, orr with first operand zr is
1794 // used.
1795 if (rd.IsSP() || rm.IsSP()) {
1796 add(rd, rm, 0);
1797 } else {
1798 orr(rd, AppropriateZeroRegFor(rd), rm);
1799 }
1800 }
1801
ins(const VRegister & vd,int vd_index,const Register & rn)1802 void Assembler::ins(const VRegister& vd, int vd_index, const Register& rn) {
1803 // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
1804 // number of lanes, and T is b, h, s or d.
1805 int lane_size = vd.LaneSizeInBytes();
1806 NEONFormatField format;
1807 switch (lane_size) {
1808 case 1:
1809 format = NEON_16B;
1810 DCHECK(rn.IsW());
1811 break;
1812 case 2:
1813 format = NEON_8H;
1814 DCHECK(rn.IsW());
1815 break;
1816 case 4:
1817 format = NEON_4S;
1818 DCHECK(rn.IsW());
1819 break;
1820 default:
1821 DCHECK_EQ(lane_size, 8);
1822 DCHECK(rn.IsX());
1823 format = NEON_2D;
1824 break;
1825 }
1826
1827 DCHECK((0 <= vd_index) &&
1828 (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
1829 Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd));
1830 }
1831
mov(const Register & rd,const VRegister & vn,int vn_index)1832 void Assembler::mov(const Register& rd, const VRegister& vn, int vn_index) {
1833 DCHECK_GE(vn.SizeInBytes(), 4);
1834 umov(rd, vn, vn_index);
1835 }
1836
smov(const Register & rd,const VRegister & vn,int vn_index)1837 void Assembler::smov(const Register& rd, const VRegister& vn, int vn_index) {
1838 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
1839 // number of lanes, and T is b, h, s.
1840 int lane_size = vn.LaneSizeInBytes();
1841 NEONFormatField format;
1842 Instr q = 0;
1843 switch (lane_size) {
1844 case 1:
1845 format = NEON_16B;
1846 break;
1847 case 2:
1848 format = NEON_8H;
1849 break;
1850 default:
1851 DCHECK_EQ(lane_size, 4);
1852 DCHECK(rd.IsX());
1853 format = NEON_4S;
1854 break;
1855 }
1856 q = rd.IsW() ? 0 : NEON_Q;
1857 DCHECK((0 <= vn_index) &&
1858 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
1859 Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
1860 }
1861
cls(const VRegister & vd,const VRegister & vn)1862 void Assembler::cls(const VRegister& vd, const VRegister& vn) {
1863 DCHECK(AreSameFormat(vd, vn));
1864 DCHECK(!vd.Is1D() && !vd.Is2D());
1865 Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd));
1866 }
1867
clz(const VRegister & vd,const VRegister & vn)1868 void Assembler::clz(const VRegister& vd, const VRegister& vn) {
1869 DCHECK(AreSameFormat(vd, vn));
1870 DCHECK(!vd.Is1D() && !vd.Is2D());
1871 Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd));
1872 }
1873
cnt(const VRegister & vd,const VRegister & vn)1874 void Assembler::cnt(const VRegister& vd, const VRegister& vn) {
1875 DCHECK(AreSameFormat(vd, vn));
1876 DCHECK(vd.Is8B() || vd.Is16B());
1877 Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd));
1878 }
1879
rev16(const VRegister & vd,const VRegister & vn)1880 void Assembler::rev16(const VRegister& vd, const VRegister& vn) {
1881 DCHECK(AreSameFormat(vd, vn));
1882 DCHECK(vd.Is8B() || vd.Is16B());
1883 Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd));
1884 }
1885
rev32(const VRegister & vd,const VRegister & vn)1886 void Assembler::rev32(const VRegister& vd, const VRegister& vn) {
1887 DCHECK(AreSameFormat(vd, vn));
1888 DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H());
1889 Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd));
1890 }
1891
rev64(const VRegister & vd,const VRegister & vn)1892 void Assembler::rev64(const VRegister& vd, const VRegister& vn) {
1893 DCHECK(AreSameFormat(vd, vn));
1894 DCHECK(!vd.Is1D() && !vd.Is2D());
1895 Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd));
1896 }
1897
ursqrte(const VRegister & vd,const VRegister & vn)1898 void Assembler::ursqrte(const VRegister& vd, const VRegister& vn) {
1899 DCHECK(AreSameFormat(vd, vn));
1900 DCHECK(vd.Is2S() || vd.Is4S());
1901 Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd));
1902 }
1903
urecpe(const VRegister & vd,const VRegister & vn)1904 void Assembler::urecpe(const VRegister& vd, const VRegister& vn) {
1905 DCHECK(AreSameFormat(vd, vn));
1906 DCHECK(vd.Is2S() || vd.Is4S());
1907 Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd));
1908 }
1909
NEONAddlp(const VRegister & vd,const VRegister & vn,NEON2RegMiscOp op)1910 void Assembler::NEONAddlp(const VRegister& vd, const VRegister& vn,
1911 NEON2RegMiscOp op) {
1912 DCHECK((op == NEON_SADDLP) || (op == NEON_UADDLP) || (op == NEON_SADALP) ||
1913 (op == NEON_UADALP));
1914
1915 DCHECK((vn.Is8B() && vd.Is4H()) || (vn.Is4H() && vd.Is2S()) ||
1916 (vn.Is2S() && vd.Is1D()) || (vn.Is16B() && vd.Is8H()) ||
1917 (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
1918 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
1919 }
1920
saddlp(const VRegister & vd,const VRegister & vn)1921 void Assembler::saddlp(const VRegister& vd, const VRegister& vn) {
1922 NEONAddlp(vd, vn, NEON_SADDLP);
1923 }
1924
uaddlp(const VRegister & vd,const VRegister & vn)1925 void Assembler::uaddlp(const VRegister& vd, const VRegister& vn) {
1926 NEONAddlp(vd, vn, NEON_UADDLP);
1927 }
1928
sadalp(const VRegister & vd,const VRegister & vn)1929 void Assembler::sadalp(const VRegister& vd, const VRegister& vn) {
1930 NEONAddlp(vd, vn, NEON_SADALP);
1931 }
1932
uadalp(const VRegister & vd,const VRegister & vn)1933 void Assembler::uadalp(const VRegister& vd, const VRegister& vn) {
1934 NEONAddlp(vd, vn, NEON_UADALP);
1935 }
1936
NEONAcrossLanesL(const VRegister & vd,const VRegister & vn,NEONAcrossLanesOp op)1937 void Assembler::NEONAcrossLanesL(const VRegister& vd, const VRegister& vn,
1938 NEONAcrossLanesOp op) {
1939 DCHECK((vn.Is8B() && vd.Is1H()) || (vn.Is16B() && vd.Is1H()) ||
1940 (vn.Is4H() && vd.Is1S()) || (vn.Is8H() && vd.Is1S()) ||
1941 (vn.Is4S() && vd.Is1D()));
1942 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
1943 }
1944
saddlv(const VRegister & vd,const VRegister & vn)1945 void Assembler::saddlv(const VRegister& vd, const VRegister& vn) {
1946 NEONAcrossLanesL(vd, vn, NEON_SADDLV);
1947 }
1948
uaddlv(const VRegister & vd,const VRegister & vn)1949 void Assembler::uaddlv(const VRegister& vd, const VRegister& vn) {
1950 NEONAcrossLanesL(vd, vn, NEON_UADDLV);
1951 }
1952
NEONAcrossLanes(const VRegister & vd,const VRegister & vn,NEONAcrossLanesOp op)1953 void Assembler::NEONAcrossLanes(const VRegister& vd, const VRegister& vn,
1954 NEONAcrossLanesOp op) {
1955 DCHECK((vn.Is8B() && vd.Is1B()) || (vn.Is16B() && vd.Is1B()) ||
1956 (vn.Is4H() && vd.Is1H()) || (vn.Is8H() && vd.Is1H()) ||
1957 (vn.Is4S() && vd.Is1S()));
1958 if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
1959 Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
1960 } else {
1961 Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
1962 }
1963 }
1964
1965 #define NEON_ACROSSLANES_LIST(V) \
1966 V(fmaxv, NEON_FMAXV, vd.Is1S()) \
1967 V(fminv, NEON_FMINV, vd.Is1S()) \
1968 V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \
1969 V(fminnmv, NEON_FMINNMV, vd.Is1S()) \
1970 V(addv, NEON_ADDV, true) \
1971 V(smaxv, NEON_SMAXV, true) \
1972 V(sminv, NEON_SMINV, true) \
1973 V(umaxv, NEON_UMAXV, true) \
1974 V(uminv, NEON_UMINV, true)
1975
1976 #define DEFINE_ASM_FUNC(FN, OP, AS) \
1977 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
1978 DCHECK(AS); \
1979 NEONAcrossLanes(vd, vn, OP); \
1980 }
NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)1981 NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
1982 #undef DEFINE_ASM_FUNC
1983
1984 void Assembler::mov(const VRegister& vd, int vd_index, const Register& rn) {
1985 ins(vd, vd_index, rn);
1986 }
1987
umov(const Register & rd,const VRegister & vn,int vn_index)1988 void Assembler::umov(const Register& rd, const VRegister& vn, int vn_index) {
1989 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
1990 // number of lanes, and T is b, h, s or d.
1991 int lane_size = vn.LaneSizeInBytes();
1992 NEONFormatField format;
1993 Instr q = 0;
1994 switch (lane_size) {
1995 case 1:
1996 format = NEON_16B;
1997 DCHECK(rd.IsW());
1998 break;
1999 case 2:
2000 format = NEON_8H;
2001 DCHECK(rd.IsW());
2002 break;
2003 case 4:
2004 format = NEON_4S;
2005 DCHECK(rd.IsW());
2006 break;
2007 default:
2008 DCHECK_EQ(lane_size, 8);
2009 DCHECK(rd.IsX());
2010 format = NEON_2D;
2011 q = NEON_Q;
2012 break;
2013 }
2014
2015 DCHECK((0 <= vn_index) &&
2016 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2017 Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
2018 }
2019
mov(const VRegister & vd,const VRegister & vn,int vn_index)2020 void Assembler::mov(const VRegister& vd, const VRegister& vn, int vn_index) {
2021 DCHECK(vd.IsScalar());
2022 dup(vd, vn, vn_index);
2023 }
2024
dup(const VRegister & vd,const Register & rn)2025 void Assembler::dup(const VRegister& vd, const Register& rn) {
2026 DCHECK(!vd.Is1D());
2027 DCHECK_EQ(vd.Is2D(), rn.IsX());
2028 Instr q = vd.IsD() ? 0 : NEON_Q;
2029 Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd));
2030 }
2031
ins(const VRegister & vd,int vd_index,const VRegister & vn,int vn_index)2032 void Assembler::ins(const VRegister& vd, int vd_index, const VRegister& vn,
2033 int vn_index) {
2034 DCHECK(AreSameFormat(vd, vn));
2035 // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
2036 // number of lanes, and T is b, h, s or d.
2037 int lane_size = vd.LaneSizeInBytes();
2038 NEONFormatField format;
2039 switch (lane_size) {
2040 case 1:
2041 format = NEON_16B;
2042 break;
2043 case 2:
2044 format = NEON_8H;
2045 break;
2046 case 4:
2047 format = NEON_4S;
2048 break;
2049 default:
2050 DCHECK_EQ(lane_size, 8);
2051 format = NEON_2D;
2052 break;
2053 }
2054
2055 DCHECK((0 <= vd_index) &&
2056 (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2057 DCHECK((0 <= vn_index) &&
2058 (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
2059 Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) |
2060 ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd));
2061 }
2062
NEONTable(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEONTableOp op)2063 void Assembler::NEONTable(const VRegister& vd, const VRegister& vn,
2064 const VRegister& vm, NEONTableOp op) {
2065 DCHECK(vd.Is16B() || vd.Is8B());
2066 DCHECK(vn.Is16B());
2067 DCHECK(AreSameFormat(vd, vm));
2068 Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd));
2069 }
2070
tbl(const VRegister & vd,const VRegister & vn,const VRegister & vm)2071 void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2072 const VRegister& vm) {
2073 NEONTable(vd, vn, vm, NEON_TBL_1v);
2074 }
2075
tbl(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vm)2076 void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2077 const VRegister& vn2, const VRegister& vm) {
2078 USE(vn2);
2079 DCHECK(AreSameFormat(vn, vn2));
2080 DCHECK(AreConsecutive(vn, vn2));
2081 NEONTable(vd, vn, vm, NEON_TBL_2v);
2082 }
2083
tbl(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vn3,const VRegister & vm)2084 void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2085 const VRegister& vn2, const VRegister& vn3,
2086 const VRegister& vm) {
2087 USE(vn2);
2088 USE(vn3);
2089 DCHECK(AreSameFormat(vn, vn2, vn3));
2090 DCHECK(AreConsecutive(vn, vn2, vn3));
2091 NEONTable(vd, vn, vm, NEON_TBL_3v);
2092 }
2093
tbl(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vn3,const VRegister & vn4,const VRegister & vm)2094 void Assembler::tbl(const VRegister& vd, const VRegister& vn,
2095 const VRegister& vn2, const VRegister& vn3,
2096 const VRegister& vn4, const VRegister& vm) {
2097 USE(vn2);
2098 USE(vn3);
2099 USE(vn4);
2100 DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
2101 DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
2102 NEONTable(vd, vn, vm, NEON_TBL_4v);
2103 }
2104
tbx(const VRegister & vd,const VRegister & vn,const VRegister & vm)2105 void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2106 const VRegister& vm) {
2107 NEONTable(vd, vn, vm, NEON_TBX_1v);
2108 }
2109
tbx(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vm)2110 void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2111 const VRegister& vn2, const VRegister& vm) {
2112 USE(vn2);
2113 DCHECK(AreSameFormat(vn, vn2));
2114 DCHECK(AreConsecutive(vn, vn2));
2115 NEONTable(vd, vn, vm, NEON_TBX_2v);
2116 }
2117
tbx(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vn3,const VRegister & vm)2118 void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2119 const VRegister& vn2, const VRegister& vn3,
2120 const VRegister& vm) {
2121 USE(vn2);
2122 USE(vn3);
2123 DCHECK(AreSameFormat(vn, vn2, vn3));
2124 DCHECK(AreConsecutive(vn, vn2, vn3));
2125 NEONTable(vd, vn, vm, NEON_TBX_3v);
2126 }
2127
tbx(const VRegister & vd,const VRegister & vn,const VRegister & vn2,const VRegister & vn3,const VRegister & vn4,const VRegister & vm)2128 void Assembler::tbx(const VRegister& vd, const VRegister& vn,
2129 const VRegister& vn2, const VRegister& vn3,
2130 const VRegister& vn4, const VRegister& vm) {
2131 USE(vn2);
2132 USE(vn3);
2133 USE(vn4);
2134 DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
2135 DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
2136 NEONTable(vd, vn, vm, NEON_TBX_4v);
2137 }
2138
mov(const VRegister & vd,int vd_index,const VRegister & vn,int vn_index)2139 void Assembler::mov(const VRegister& vd, int vd_index, const VRegister& vn,
2140 int vn_index) {
2141 ins(vd, vd_index, vn, vn_index);
2142 }
2143
mvn(const Register & rd,const Operand & operand)2144 void Assembler::mvn(const Register& rd, const Operand& operand) {
2145 orn(rd, AppropriateZeroRegFor(rd), operand);
2146 }
2147
mrs(const Register & rt,SystemRegister sysreg)2148 void Assembler::mrs(const Register& rt, SystemRegister sysreg) {
2149 DCHECK(rt.Is64Bits());
2150 Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt));
2151 }
2152
msr(SystemRegister sysreg,const Register & rt)2153 void Assembler::msr(SystemRegister sysreg, const Register& rt) {
2154 DCHECK(rt.Is64Bits());
2155 Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg));
2156 }
2157
hint(SystemHint code)2158 void Assembler::hint(SystemHint code) { Emit(HINT | ImmHint(code) | Rt(xzr)); }
2159
2160 // NEON structure loads and stores.
LoadStoreStructAddrModeField(const MemOperand & addr)2161 Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
2162 Instr addr_field = RnSP(addr.base());
2163
2164 if (addr.IsPostIndex()) {
2165 static_assert(NEONLoadStoreMultiStructPostIndex ==
2166 static_cast<NEONLoadStoreMultiStructPostIndexOp>(
2167 NEONLoadStoreSingleStructPostIndex),
2168 "Opcodes must match for NEON post index memop.");
2169
2170 addr_field |= NEONLoadStoreMultiStructPostIndex;
2171 if (addr.offset() == 0) {
2172 addr_field |= RmNot31(addr.regoffset());
2173 } else {
2174 // The immediate post index addressing mode is indicated by rm = 31.
2175 // The immediate is implied by the number of vector registers used.
2176 addr_field |= (0x1F << Rm_offset);
2177 }
2178 } else {
2179 DCHECK(addr.IsImmediateOffset() && (addr.offset() == 0));
2180 }
2181 return addr_field;
2182 }
2183
LoadStoreStructVerify(const VRegister & vt,const MemOperand & addr,Instr op)2184 void Assembler::LoadStoreStructVerify(const VRegister& vt,
2185 const MemOperand& addr, Instr op) {
2186 #ifdef DEBUG
2187 // Assert that addressing mode is either offset (with immediate 0), post
2188 // index by immediate of the size of the register list, or post index by a
2189 // value in a core register.
2190 if (addr.IsImmediateOffset()) {
2191 DCHECK_EQ(addr.offset(), 0);
2192 } else {
2193 int offset = vt.SizeInBytes();
2194 switch (op) {
2195 case NEON_LD1_1v:
2196 case NEON_ST1_1v:
2197 offset *= 1;
2198 break;
2199 case NEONLoadStoreSingleStructLoad1:
2200 case NEONLoadStoreSingleStructStore1:
2201 case NEON_LD1R:
2202 offset = (offset / vt.LaneCount()) * 1;
2203 break;
2204
2205 case NEON_LD1_2v:
2206 case NEON_ST1_2v:
2207 case NEON_LD2:
2208 case NEON_ST2:
2209 offset *= 2;
2210 break;
2211 case NEONLoadStoreSingleStructLoad2:
2212 case NEONLoadStoreSingleStructStore2:
2213 case NEON_LD2R:
2214 offset = (offset / vt.LaneCount()) * 2;
2215 break;
2216
2217 case NEON_LD1_3v:
2218 case NEON_ST1_3v:
2219 case NEON_LD3:
2220 case NEON_ST3:
2221 offset *= 3;
2222 break;
2223 case NEONLoadStoreSingleStructLoad3:
2224 case NEONLoadStoreSingleStructStore3:
2225 case NEON_LD3R:
2226 offset = (offset / vt.LaneCount()) * 3;
2227 break;
2228
2229 case NEON_LD1_4v:
2230 case NEON_ST1_4v:
2231 case NEON_LD4:
2232 case NEON_ST4:
2233 offset *= 4;
2234 break;
2235 case NEONLoadStoreSingleStructLoad4:
2236 case NEONLoadStoreSingleStructStore4:
2237 case NEON_LD4R:
2238 offset = (offset / vt.LaneCount()) * 4;
2239 break;
2240 default:
2241 UNREACHABLE();
2242 }
2243 DCHECK(addr.regoffset() != NoReg || addr.offset() == offset);
2244 }
2245 #else
2246 USE(vt);
2247 USE(addr);
2248 USE(op);
2249 #endif
2250 }
2251
LoadStoreStruct(const VRegister & vt,const MemOperand & addr,NEONLoadStoreMultiStructOp op)2252 void Assembler::LoadStoreStruct(const VRegister& vt, const MemOperand& addr,
2253 NEONLoadStoreMultiStructOp op) {
2254 LoadStoreStructVerify(vt, addr, op);
2255 DCHECK(vt.IsVector() || vt.Is1D());
2256 Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
2257 }
2258
LoadStoreStructSingleAllLanes(const VRegister & vt,const MemOperand & addr,NEONLoadStoreSingleStructOp op)2259 void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt,
2260 const MemOperand& addr,
2261 NEONLoadStoreSingleStructOp op) {
2262 LoadStoreStructVerify(vt, addr, op);
2263 Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
2264 }
2265
ld1(const VRegister & vt,const MemOperand & src)2266 void Assembler::ld1(const VRegister& vt, const MemOperand& src) {
2267 LoadStoreStruct(vt, src, NEON_LD1_1v);
2268 }
2269
ld1(const VRegister & vt,const VRegister & vt2,const MemOperand & src)2270 void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2271 const MemOperand& src) {
2272 USE(vt2);
2273 DCHECK(AreSameFormat(vt, vt2));
2274 DCHECK(AreConsecutive(vt, vt2));
2275 LoadStoreStruct(vt, src, NEON_LD1_2v);
2276 }
2277
ld1(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & src)2278 void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2279 const VRegister& vt3, const MemOperand& src) {
2280 USE(vt2);
2281 USE(vt3);
2282 DCHECK(AreSameFormat(vt, vt2, vt3));
2283 DCHECK(AreConsecutive(vt, vt2, vt3));
2284 LoadStoreStruct(vt, src, NEON_LD1_3v);
2285 }
2286
ld1(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & src)2287 void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
2288 const VRegister& vt3, const VRegister& vt4,
2289 const MemOperand& src) {
2290 USE(vt2);
2291 USE(vt3);
2292 USE(vt4);
2293 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2294 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2295 LoadStoreStruct(vt, src, NEON_LD1_4v);
2296 }
2297
ld2(const VRegister & vt,const VRegister & vt2,const MemOperand & src)2298 void Assembler::ld2(const VRegister& vt, const VRegister& vt2,
2299 const MemOperand& src) {
2300 USE(vt2);
2301 DCHECK(AreSameFormat(vt, vt2));
2302 DCHECK(AreConsecutive(vt, vt2));
2303 LoadStoreStruct(vt, src, NEON_LD2);
2304 }
2305
ld2(const VRegister & vt,const VRegister & vt2,int lane,const MemOperand & src)2306 void Assembler::ld2(const VRegister& vt, const VRegister& vt2, int lane,
2307 const MemOperand& src) {
2308 USE(vt2);
2309 DCHECK(AreSameFormat(vt, vt2));
2310 DCHECK(AreConsecutive(vt, vt2));
2311 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2);
2312 }
2313
ld2r(const VRegister & vt,const VRegister & vt2,const MemOperand & src)2314 void Assembler::ld2r(const VRegister& vt, const VRegister& vt2,
2315 const MemOperand& src) {
2316 USE(vt2);
2317 DCHECK(AreSameFormat(vt, vt2));
2318 DCHECK(AreConsecutive(vt, vt2));
2319 LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R);
2320 }
2321
ld3(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & src)2322 void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
2323 const VRegister& vt3, const MemOperand& src) {
2324 USE(vt2);
2325 USE(vt3);
2326 DCHECK(AreSameFormat(vt, vt2, vt3));
2327 DCHECK(AreConsecutive(vt, vt2, vt3));
2328 LoadStoreStruct(vt, src, NEON_LD3);
2329 }
2330
ld3(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,int lane,const MemOperand & src)2331 void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
2332 const VRegister& vt3, int lane, const MemOperand& src) {
2333 USE(vt2);
2334 USE(vt3);
2335 DCHECK(AreSameFormat(vt, vt2, vt3));
2336 DCHECK(AreConsecutive(vt, vt2, vt3));
2337 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3);
2338 }
2339
ld3r(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & src)2340 void Assembler::ld3r(const VRegister& vt, const VRegister& vt2,
2341 const VRegister& vt3, const MemOperand& src) {
2342 USE(vt2);
2343 USE(vt3);
2344 DCHECK(AreSameFormat(vt, vt2, vt3));
2345 DCHECK(AreConsecutive(vt, vt2, vt3));
2346 LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R);
2347 }
2348
ld4(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & src)2349 void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
2350 const VRegister& vt3, const VRegister& vt4,
2351 const MemOperand& src) {
2352 USE(vt2);
2353 USE(vt3);
2354 USE(vt4);
2355 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2356 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2357 LoadStoreStruct(vt, src, NEON_LD4);
2358 }
2359
ld4(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,int lane,const MemOperand & src)2360 void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
2361 const VRegister& vt3, const VRegister& vt4, int lane,
2362 const MemOperand& src) {
2363 USE(vt2);
2364 USE(vt3);
2365 USE(vt4);
2366 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2367 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2368 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4);
2369 }
2370
ld4r(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & src)2371 void Assembler::ld4r(const VRegister& vt, const VRegister& vt2,
2372 const VRegister& vt3, const VRegister& vt4,
2373 const MemOperand& src) {
2374 USE(vt2);
2375 USE(vt3);
2376 USE(vt4);
2377 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2378 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2379 LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R);
2380 }
2381
st1(const VRegister & vt,const MemOperand & src)2382 void Assembler::st1(const VRegister& vt, const MemOperand& src) {
2383 LoadStoreStruct(vt, src, NEON_ST1_1v);
2384 }
2385
st1(const VRegister & vt,const VRegister & vt2,const MemOperand & src)2386 void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2387 const MemOperand& src) {
2388 USE(vt2);
2389 DCHECK(AreSameFormat(vt, vt2));
2390 DCHECK(AreConsecutive(vt, vt2));
2391 LoadStoreStruct(vt, src, NEON_ST1_2v);
2392 }
2393
st1(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & src)2394 void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2395 const VRegister& vt3, const MemOperand& src) {
2396 USE(vt2);
2397 USE(vt3);
2398 DCHECK(AreSameFormat(vt, vt2, vt3));
2399 DCHECK(AreConsecutive(vt, vt2, vt3));
2400 LoadStoreStruct(vt, src, NEON_ST1_3v);
2401 }
2402
st1(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & src)2403 void Assembler::st1(const VRegister& vt, const VRegister& vt2,
2404 const VRegister& vt3, const VRegister& vt4,
2405 const MemOperand& src) {
2406 USE(vt2);
2407 USE(vt3);
2408 USE(vt4);
2409 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2410 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2411 LoadStoreStruct(vt, src, NEON_ST1_4v);
2412 }
2413
st2(const VRegister & vt,const VRegister & vt2,const MemOperand & dst)2414 void Assembler::st2(const VRegister& vt, const VRegister& vt2,
2415 const MemOperand& dst) {
2416 USE(vt2);
2417 DCHECK(AreSameFormat(vt, vt2));
2418 DCHECK(AreConsecutive(vt, vt2));
2419 LoadStoreStruct(vt, dst, NEON_ST2);
2420 }
2421
st2(const VRegister & vt,const VRegister & vt2,int lane,const MemOperand & dst)2422 void Assembler::st2(const VRegister& vt, const VRegister& vt2, int lane,
2423 const MemOperand& dst) {
2424 USE(vt2);
2425 DCHECK(AreSameFormat(vt, vt2));
2426 DCHECK(AreConsecutive(vt, vt2));
2427 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2);
2428 }
2429
st3(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const MemOperand & dst)2430 void Assembler::st3(const VRegister& vt, const VRegister& vt2,
2431 const VRegister& vt3, const MemOperand& dst) {
2432 USE(vt2);
2433 USE(vt3);
2434 DCHECK(AreSameFormat(vt, vt2, vt3));
2435 DCHECK(AreConsecutive(vt, vt2, vt3));
2436 LoadStoreStruct(vt, dst, NEON_ST3);
2437 }
2438
st3(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,int lane,const MemOperand & dst)2439 void Assembler::st3(const VRegister& vt, const VRegister& vt2,
2440 const VRegister& vt3, int lane, const MemOperand& dst) {
2441 USE(vt2);
2442 USE(vt3);
2443 DCHECK(AreSameFormat(vt, vt2, vt3));
2444 DCHECK(AreConsecutive(vt, vt2, vt3));
2445 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3);
2446 }
2447
st4(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,const MemOperand & dst)2448 void Assembler::st4(const VRegister& vt, const VRegister& vt2,
2449 const VRegister& vt3, const VRegister& vt4,
2450 const MemOperand& dst) {
2451 USE(vt2);
2452 USE(vt3);
2453 USE(vt4);
2454 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2455 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2456 LoadStoreStruct(vt, dst, NEON_ST4);
2457 }
2458
st4(const VRegister & vt,const VRegister & vt2,const VRegister & vt3,const VRegister & vt4,int lane,const MemOperand & dst)2459 void Assembler::st4(const VRegister& vt, const VRegister& vt2,
2460 const VRegister& vt3, const VRegister& vt4, int lane,
2461 const MemOperand& dst) {
2462 USE(vt2);
2463 USE(vt3);
2464 USE(vt4);
2465 DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
2466 DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
2467 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4);
2468 }
2469
LoadStoreStructSingle(const VRegister & vt,uint32_t lane,const MemOperand & addr,NEONLoadStoreSingleStructOp op)2470 void Assembler::LoadStoreStructSingle(const VRegister& vt, uint32_t lane,
2471 const MemOperand& addr,
2472 NEONLoadStoreSingleStructOp op) {
2473 LoadStoreStructVerify(vt, addr, op);
2474
2475 // We support vt arguments of the form vt.VxT() or vt.T(), where x is the
2476 // number of lanes, and T is b, h, s or d.
2477 unsigned lane_size = vt.LaneSizeInBytes();
2478 DCHECK_LT(lane, kQRegSize / lane_size);
2479
2480 // Lane size is encoded in the opcode field. Lane index is encoded in the Q,
2481 // S and size fields.
2482 lane *= lane_size;
2483
2484 // Encodings for S[0]/D[0] and S[2]/D[1] are distinguished using the least-
2485 // significant bit of the size field, so we increment lane here to account for
2486 // that.
2487 if (lane_size == 8) lane++;
2488
2489 Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask;
2490 Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask;
2491 Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask;
2492
2493 Instr instr = op;
2494 switch (lane_size) {
2495 case 1:
2496 instr |= NEONLoadStoreSingle_b;
2497 break;
2498 case 2:
2499 instr |= NEONLoadStoreSingle_h;
2500 break;
2501 case 4:
2502 instr |= NEONLoadStoreSingle_s;
2503 break;
2504 default:
2505 DCHECK_EQ(lane_size, 8U);
2506 instr |= NEONLoadStoreSingle_d;
2507 }
2508
2509 Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt));
2510 }
2511
ld1(const VRegister & vt,int lane,const MemOperand & src)2512 void Assembler::ld1(const VRegister& vt, int lane, const MemOperand& src) {
2513 LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1);
2514 }
2515
ld1r(const VRegister & vt,const MemOperand & src)2516 void Assembler::ld1r(const VRegister& vt, const MemOperand& src) {
2517 LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R);
2518 }
2519
st1(const VRegister & vt,int lane,const MemOperand & dst)2520 void Assembler::st1(const VRegister& vt, int lane, const MemOperand& dst) {
2521 LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
2522 }
2523
dmb(BarrierDomain domain,BarrierType type)2524 void Assembler::dmb(BarrierDomain domain, BarrierType type) {
2525 Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type));
2526 }
2527
dsb(BarrierDomain domain,BarrierType type)2528 void Assembler::dsb(BarrierDomain domain, BarrierType type) {
2529 Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type));
2530 }
2531
isb()2532 void Assembler::isb() {
2533 Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
2534 }
2535
csdb()2536 void Assembler::csdb() { hint(CSDB); }
2537
fmov(const VRegister & vd,double imm)2538 void Assembler::fmov(const VRegister& vd, double imm) {
2539 if (vd.IsScalar()) {
2540 DCHECK(vd.Is1D());
2541 Emit(FMOV_d_imm | Rd(vd) | ImmFP(imm));
2542 } else {
2543 DCHECK(vd.Is2D());
2544 Instr op = NEONModifiedImmediate_MOVI | NEONModifiedImmediateOpBit;
2545 Emit(NEON_Q | op | ImmNEONFP(imm) | NEONCmode(0xF) | Rd(vd));
2546 }
2547 }
2548
fmov(const VRegister & vd,float imm)2549 void Assembler::fmov(const VRegister& vd, float imm) {
2550 if (vd.IsScalar()) {
2551 DCHECK(vd.Is1S());
2552 Emit(FMOV_s_imm | Rd(vd) | ImmFP(imm));
2553 } else {
2554 DCHECK(vd.Is2S() | vd.Is4S());
2555 Instr op = NEONModifiedImmediate_MOVI;
2556 Instr q = vd.Is4S() ? NEON_Q : 0;
2557 Emit(q | op | ImmNEONFP(imm) | NEONCmode(0xF) | Rd(vd));
2558 }
2559 }
2560
fmov(const Register & rd,const VRegister & fn)2561 void Assembler::fmov(const Register& rd, const VRegister& fn) {
2562 DCHECK_EQ(rd.SizeInBits(), fn.SizeInBits());
2563 FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd;
2564 Emit(op | Rd(rd) | Rn(fn));
2565 }
2566
fmov(const VRegister & vd,const Register & rn)2567 void Assembler::fmov(const VRegister& vd, const Register& rn) {
2568 DCHECK_EQ(vd.SizeInBits(), rn.SizeInBits());
2569 FPIntegerConvertOp op = vd.Is32Bits() ? FMOV_sw : FMOV_dx;
2570 Emit(op | Rd(vd) | Rn(rn));
2571 }
2572
fmov(const VRegister & vd,const VRegister & vn)2573 void Assembler::fmov(const VRegister& vd, const VRegister& vn) {
2574 DCHECK_EQ(vd.SizeInBits(), vn.SizeInBits());
2575 Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn));
2576 }
2577
fmov(const VRegister & vd,int index,const Register & rn)2578 void Assembler::fmov(const VRegister& vd, int index, const Register& rn) {
2579 DCHECK((index == 1) && vd.Is1D() && rn.IsX());
2580 USE(index);
2581 Emit(FMOV_d1_x | Rd(vd) | Rn(rn));
2582 }
2583
fmov(const Register & rd,const VRegister & vn,int index)2584 void Assembler::fmov(const Register& rd, const VRegister& vn, int index) {
2585 DCHECK((index == 1) && vn.Is1D() && rd.IsX());
2586 USE(index);
2587 Emit(FMOV_x_d1 | Rd(rd) | Rn(vn));
2588 }
2589
fmadd(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa)2590 void Assembler::fmadd(const VRegister& fd, const VRegister& fn,
2591 const VRegister& fm, const VRegister& fa) {
2592 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMADD_s : FMADD_d);
2593 }
2594
fmsub(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa)2595 void Assembler::fmsub(const VRegister& fd, const VRegister& fn,
2596 const VRegister& fm, const VRegister& fa) {
2597 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMSUB_s : FMSUB_d);
2598 }
2599
fnmadd(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa)2600 void Assembler::fnmadd(const VRegister& fd, const VRegister& fn,
2601 const VRegister& fm, const VRegister& fa) {
2602 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMADD_s : FNMADD_d);
2603 }
2604
fnmsub(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa)2605 void Assembler::fnmsub(const VRegister& fd, const VRegister& fn,
2606 const VRegister& fm, const VRegister& fa) {
2607 FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMSUB_s : FNMSUB_d);
2608 }
2609
fnmul(const VRegister & vd,const VRegister & vn,const VRegister & vm)2610 void Assembler::fnmul(const VRegister& vd, const VRegister& vn,
2611 const VRegister& vm) {
2612 DCHECK(AreSameSizeAndType(vd, vn, vm));
2613 Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d;
2614 Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
2615 }
2616
fcmp(const VRegister & fn,const VRegister & fm)2617 void Assembler::fcmp(const VRegister& fn, const VRegister& fm) {
2618 DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
2619 Emit(FPType(fn) | FCMP | Rm(fm) | Rn(fn));
2620 }
2621
fcmp(const VRegister & fn,double value)2622 void Assembler::fcmp(const VRegister& fn, double value) {
2623 USE(value);
2624 // Although the fcmp instruction can strictly only take an immediate value of
2625 // +0.0, we don't need to check for -0.0 because the sign of 0.0 doesn't
2626 // affect the result of the comparison.
2627 DCHECK_EQ(value, 0.0);
2628 Emit(FPType(fn) | FCMP_zero | Rn(fn));
2629 }
2630
fccmp(const VRegister & fn,const VRegister & fm,StatusFlags nzcv,Condition cond)2631 void Assembler::fccmp(const VRegister& fn, const VRegister& fm,
2632 StatusFlags nzcv, Condition cond) {
2633 DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
2634 Emit(FPType(fn) | FCCMP | Rm(fm) | Cond(cond) | Rn(fn) | Nzcv(nzcv));
2635 }
2636
fcsel(const VRegister & fd,const VRegister & fn,const VRegister & fm,Condition cond)2637 void Assembler::fcsel(const VRegister& fd, const VRegister& fn,
2638 const VRegister& fm, Condition cond) {
2639 DCHECK_EQ(fd.SizeInBits(), fn.SizeInBits());
2640 DCHECK_EQ(fd.SizeInBits(), fm.SizeInBits());
2641 Emit(FPType(fd) | FCSEL | Rm(fm) | Cond(cond) | Rn(fn) | Rd(fd));
2642 }
2643
NEONFPConvertToInt(const Register & rd,const VRegister & vn,Instr op)2644 void Assembler::NEONFPConvertToInt(const Register& rd, const VRegister& vn,
2645 Instr op) {
2646 Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd));
2647 }
2648
NEONFPConvertToInt(const VRegister & vd,const VRegister & vn,Instr op)2649 void Assembler::NEONFPConvertToInt(const VRegister& vd, const VRegister& vn,
2650 Instr op) {
2651 if (vn.IsScalar()) {
2652 DCHECK((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D()));
2653 op |= NEON_Q | NEONScalar;
2654 }
2655 Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
2656 }
2657
fcvt(const VRegister & vd,const VRegister & vn)2658 void Assembler::fcvt(const VRegister& vd, const VRegister& vn) {
2659 FPDataProcessing1SourceOp op;
2660 if (vd.Is1D()) {
2661 DCHECK(vn.Is1S() || vn.Is1H());
2662 op = vn.Is1S() ? FCVT_ds : FCVT_dh;
2663 } else if (vd.Is1S()) {
2664 DCHECK(vn.Is1D() || vn.Is1H());
2665 op = vn.Is1D() ? FCVT_sd : FCVT_sh;
2666 } else {
2667 DCHECK(vd.Is1H());
2668 DCHECK(vn.Is1D() || vn.Is1S());
2669 op = vn.Is1D() ? FCVT_hd : FCVT_hs;
2670 }
2671 FPDataProcessing1Source(vd, vn, op);
2672 }
2673
fcvtl(const VRegister & vd,const VRegister & vn)2674 void Assembler::fcvtl(const VRegister& vd, const VRegister& vn) {
2675 DCHECK((vd.Is4S() && vn.Is4H()) || (vd.Is2D() && vn.Is2S()));
2676 Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
2677 Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd));
2678 }
2679
fcvtl2(const VRegister & vd,const VRegister & vn)2680 void Assembler::fcvtl2(const VRegister& vd, const VRegister& vn) {
2681 DCHECK((vd.Is4S() && vn.Is8H()) || (vd.Is2D() && vn.Is4S()));
2682 Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
2683 Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd));
2684 }
2685
fcvtn(const VRegister & vd,const VRegister & vn)2686 void Assembler::fcvtn(const VRegister& vd, const VRegister& vn) {
2687 DCHECK((vn.Is4S() && vd.Is4H()) || (vn.Is2D() && vd.Is2S()));
2688 Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
2689 Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd));
2690 }
2691
fcvtn2(const VRegister & vd,const VRegister & vn)2692 void Assembler::fcvtn2(const VRegister& vd, const VRegister& vn) {
2693 DCHECK((vn.Is4S() && vd.Is8H()) || (vn.Is2D() && vd.Is4S()));
2694 Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
2695 Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd));
2696 }
2697
fcvtxn(const VRegister & vd,const VRegister & vn)2698 void Assembler::fcvtxn(const VRegister& vd, const VRegister& vn) {
2699 Instr format = 1 << NEONSize_offset;
2700 if (vd.IsScalar()) {
2701 DCHECK(vd.Is1S() && vn.Is1D());
2702 Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd));
2703 } else {
2704 DCHECK(vd.Is2S() && vn.Is2D());
2705 Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd));
2706 }
2707 }
2708
fcvtxn2(const VRegister & vd,const VRegister & vn)2709 void Assembler::fcvtxn2(const VRegister& vd, const VRegister& vn) {
2710 DCHECK(vd.Is4S() && vn.Is2D());
2711 Instr format = 1 << NEONSize_offset;
2712 Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
2713 }
2714
2715 #define NEON_FP2REGMISC_FCVT_LIST(V) \
2716 V(fcvtnu, NEON_FCVTNU, FCVTNU) \
2717 V(fcvtns, NEON_FCVTNS, FCVTNS) \
2718 V(fcvtpu, NEON_FCVTPU, FCVTPU) \
2719 V(fcvtps, NEON_FCVTPS, FCVTPS) \
2720 V(fcvtmu, NEON_FCVTMU, FCVTMU) \
2721 V(fcvtms, NEON_FCVTMS, FCVTMS) \
2722 V(fcvtau, NEON_FCVTAU, FCVTAU) \
2723 V(fcvtas, NEON_FCVTAS, FCVTAS)
2724
2725 #define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP) \
2726 void Assembler::FN(const Register& rd, const VRegister& vn) { \
2727 NEONFPConvertToInt(rd, vn, SCA_OP); \
2728 } \
2729 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
2730 NEONFPConvertToInt(vd, vn, VEC_OP); \
2731 }
NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)2732 NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
2733 #undef DEFINE_ASM_FUNCS
2734
2735 void Assembler::scvtf(const VRegister& vd, const VRegister& vn, int fbits) {
2736 DCHECK_GE(fbits, 0);
2737 if (fbits == 0) {
2738 NEONFP2RegMisc(vd, vn, NEON_SCVTF);
2739 } else {
2740 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
2741 NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm);
2742 }
2743 }
2744
ucvtf(const VRegister & vd,const VRegister & vn,int fbits)2745 void Assembler::ucvtf(const VRegister& vd, const VRegister& vn, int fbits) {
2746 DCHECK_GE(fbits, 0);
2747 if (fbits == 0) {
2748 NEONFP2RegMisc(vd, vn, NEON_UCVTF);
2749 } else {
2750 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
2751 NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm);
2752 }
2753 }
2754
scvtf(const VRegister & vd,const Register & rn,int fbits)2755 void Assembler::scvtf(const VRegister& vd, const Register& rn, int fbits) {
2756 DCHECK_GE(fbits, 0);
2757 if (fbits == 0) {
2758 Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd));
2759 } else {
2760 Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
2761 Rd(vd));
2762 }
2763 }
2764
ucvtf(const VRegister & fd,const Register & rn,int fbits)2765 void Assembler::ucvtf(const VRegister& fd, const Register& rn, int fbits) {
2766 DCHECK_GE(fbits, 0);
2767 if (fbits == 0) {
2768 Emit(SF(rn) | FPType(fd) | UCVTF | Rn(rn) | Rd(fd));
2769 } else {
2770 Emit(SF(rn) | FPType(fd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
2771 Rd(fd));
2772 }
2773 }
2774
NEON3Same(const VRegister & vd,const VRegister & vn,const VRegister & vm,NEON3SameOp vop)2775 void Assembler::NEON3Same(const VRegister& vd, const VRegister& vn,
2776 const VRegister& vm, NEON3SameOp vop) {
2777 DCHECK(AreSameFormat(vd, vn, vm));
2778 DCHECK(vd.IsVector() || !vd.IsQ());
2779
2780 Instr format, op = vop;
2781 if (vd.IsScalar()) {
2782 op |= NEON_Q | NEONScalar;
2783 format = SFormat(vd);
2784 } else {
2785 format = VFormat(vd);
2786 }
2787
2788 Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
2789 }
2790
NEONFP3Same(const VRegister & vd,const VRegister & vn,const VRegister & vm,Instr op)2791 void Assembler::NEONFP3Same(const VRegister& vd, const VRegister& vn,
2792 const VRegister& vm, Instr op) {
2793 DCHECK(AreSameFormat(vd, vn, vm));
2794 Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
2795 }
2796
2797 #define NEON_FP2REGMISC_LIST(V) \
2798 V(fabs, NEON_FABS, FABS) \
2799 V(fneg, NEON_FNEG, FNEG) \
2800 V(fsqrt, NEON_FSQRT, FSQRT) \
2801 V(frintn, NEON_FRINTN, FRINTN) \
2802 V(frinta, NEON_FRINTA, FRINTA) \
2803 V(frintp, NEON_FRINTP, FRINTP) \
2804 V(frintm, NEON_FRINTM, FRINTM) \
2805 V(frintx, NEON_FRINTX, FRINTX) \
2806 V(frintz, NEON_FRINTZ, FRINTZ) \
2807 V(frinti, NEON_FRINTI, FRINTI) \
2808 V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \
2809 V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar)
2810
2811 #define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
2812 void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
2813 Instr op; \
2814 if (vd.IsScalar()) { \
2815 DCHECK(vd.Is1S() || vd.Is1D()); \
2816 op = SCA_OP; \
2817 } else { \
2818 DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
2819 op = VEC_OP; \
2820 } \
2821 NEONFP2RegMisc(vd, vn, op); \
2822 }
NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)2823 NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
2824 #undef DEFINE_ASM_FUNC
2825
2826 void Assembler::shll(const VRegister& vd, const VRegister& vn, int shift) {
2827 DCHECK((vd.Is8H() && vn.Is8B() && shift == 8) ||
2828 (vd.Is4S() && vn.Is4H() && shift == 16) ||
2829 (vd.Is2D() && vn.Is2S() && shift == 32));
2830 USE(shift);
2831 Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
2832 }
2833
shll2(const VRegister & vd,const VRegister & vn,int shift)2834 void Assembler::shll2(const VRegister& vd, const VRegister& vn, int shift) {
2835 USE(shift);
2836 DCHECK((vd.Is8H() && vn.Is16B() && shift == 8) ||
2837 (vd.Is4S() && vn.Is8H() && shift == 16) ||
2838 (vd.Is2D() && vn.Is4S() && shift == 32));
2839 Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
2840 }
2841
NEONFP2RegMisc(const VRegister & vd,const VRegister & vn,NEON2RegMiscOp vop,double value)2842 void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
2843 NEON2RegMiscOp vop, double value) {
2844 DCHECK(AreSameFormat(vd, vn));
2845 DCHECK_EQ(value, 0.0);
2846 USE(value);
2847
2848 Instr op = vop;
2849 if (vd.IsScalar()) {
2850 DCHECK(vd.Is1S() || vd.Is1D());
2851 op |= NEON_Q | NEONScalar;
2852 } else {
2853 DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S());
2854 }
2855
2856 Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
2857 }
2858
fcmeq(const VRegister & vd,const VRegister & vn,double value)2859 void Assembler::fcmeq(const VRegister& vd, const VRegister& vn, double value) {
2860 NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
2861 }
2862
fcmge(const VRegister & vd,const VRegister & vn,double value)2863 void Assembler::fcmge(const VRegister& vd, const VRegister& vn, double value) {
2864 NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
2865 }
2866
fcmgt(const VRegister & vd,const VRegister & vn,double value)2867 void Assembler::fcmgt(const VRegister& vd, const VRegister& vn, double value) {
2868 NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
2869 }
2870
fcmle(const VRegister & vd,const VRegister & vn,double value)2871 void Assembler::fcmle(const VRegister& vd, const VRegister& vn, double value) {
2872 NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
2873 }
2874
fcmlt(const VRegister & vd,const VRegister & vn,double value)2875 void Assembler::fcmlt(const VRegister& vd, const VRegister& vn, double value) {
2876 NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
2877 }
2878
frecpx(const VRegister & vd,const VRegister & vn)2879 void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
2880 DCHECK(vd.IsScalar());
2881 DCHECK(AreSameFormat(vd, vn));
2882 DCHECK(vd.Is1S() || vd.Is1D());
2883 Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd));
2884 }
2885
fcvtzs(const Register & rd,const VRegister & vn,int fbits)2886 void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) {
2887 DCHECK(vn.Is1S() || vn.Is1D());
2888 DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
2889 if (fbits == 0) {
2890 Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd));
2891 } else {
2892 Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) |
2893 Rd(rd));
2894 }
2895 }
2896
fcvtzs(const VRegister & vd,const VRegister & vn,int fbits)2897 void Assembler::fcvtzs(const VRegister& vd, const VRegister& vn, int fbits) {
2898 DCHECK_GE(fbits, 0);
2899 if (fbits == 0) {
2900 NEONFP2RegMisc(vd, vn, NEON_FCVTZS);
2901 } else {
2902 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
2903 NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm);
2904 }
2905 }
2906
fcvtzu(const Register & rd,const VRegister & vn,int fbits)2907 void Assembler::fcvtzu(const Register& rd, const VRegister& vn, int fbits) {
2908 DCHECK(vn.Is1S() || vn.Is1D());
2909 DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
2910 if (fbits == 0) {
2911 Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd));
2912 } else {
2913 Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) |
2914 Rd(rd));
2915 }
2916 }
2917
fcvtzu(const VRegister & vd,const VRegister & vn,int fbits)2918 void Assembler::fcvtzu(const VRegister& vd, const VRegister& vn, int fbits) {
2919 DCHECK_GE(fbits, 0);
2920 if (fbits == 0) {
2921 NEONFP2RegMisc(vd, vn, NEON_FCVTZU);
2922 } else {
2923 DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
2924 NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm);
2925 }
2926 }
2927
NEONFP2RegMisc(const VRegister & vd,const VRegister & vn,Instr op)2928 void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
2929 Instr op) {
2930 DCHECK(AreSameFormat(vd, vn));
2931 Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
2932 }
2933
NEON2RegMisc(const VRegister & vd,const VRegister & vn,NEON2RegMiscOp vop,int value)2934 void Assembler::NEON2RegMisc(const VRegister& vd, const VRegister& vn,
2935 NEON2RegMiscOp vop, int value) {
2936 DCHECK(AreSameFormat(vd, vn));
2937 DCHECK_EQ(value, 0);
2938 USE(value);
2939
2940 Instr format, op = vop;
2941 if (vd.IsScalar()) {
2942 op |= NEON_Q | NEONScalar;
2943 format = SFormat(vd);
2944 } else {
2945 format = VFormat(vd);
2946 }
2947
2948 Emit(format | op | Rn(vn) | Rd(vd));
2949 }
2950
cmeq(const VRegister & vd,const VRegister & vn,int value)2951 void Assembler::cmeq(const VRegister& vd, const VRegister& vn, int value) {
2952 DCHECK(vd.IsVector() || vd.Is1D());
2953 NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value);
2954 }
2955
cmge(const VRegister & vd,const VRegister & vn,int value)2956 void Assembler::cmge(const VRegister& vd, const VRegister& vn, int value) {
2957 DCHECK(vd.IsVector() || vd.Is1D());
2958 NEON2RegMisc(vd, vn, NEON_CMGE_zero, value);
2959 }
2960
cmgt(const VRegister & vd,const VRegister & vn,int value)2961 void Assembler::cmgt(const VRegister& vd, const VRegister& vn, int value) {
2962 DCHECK(vd.IsVector() || vd.Is1D());
2963 NEON2RegMisc(vd, vn, NEON_CMGT_zero, value);
2964 }
2965
cmle(const VRegister & vd,const VRegister & vn,int value)2966 void Assembler::cmle(const VRegister& vd, const VRegister& vn, int value) {
2967 DCHECK(vd.IsVector() || vd.Is1D());
2968 NEON2RegMisc(vd, vn, NEON_CMLE_zero, value);
2969 }
2970
cmlt(const VRegister & vd,const VRegister & vn,int value)2971 void Assembler::cmlt(const VRegister& vd, const VRegister& vn, int value) {
2972 DCHECK(vd.IsVector() || vd.Is1D());
2973 NEON2RegMisc(vd, vn, NEON_CMLT_zero, value);
2974 }
2975
2976 #define NEON_3SAME_LIST(V) \
2977 V(add, NEON_ADD, vd.IsVector() || vd.Is1D()) \
2978 V(addp, NEON_ADDP, vd.IsVector() || vd.Is1D()) \
2979 V(sub, NEON_SUB, vd.IsVector() || vd.Is1D()) \
2980 V(cmeq, NEON_CMEQ, vd.IsVector() || vd.Is1D()) \
2981 V(cmge, NEON_CMGE, vd.IsVector() || vd.Is1D()) \
2982 V(cmgt, NEON_CMGT, vd.IsVector() || vd.Is1D()) \
2983 V(cmhi, NEON_CMHI, vd.IsVector() || vd.Is1D()) \
2984 V(cmhs, NEON_CMHS, vd.IsVector() || vd.Is1D()) \
2985 V(cmtst, NEON_CMTST, vd.IsVector() || vd.Is1D()) \
2986 V(sshl, NEON_SSHL, vd.IsVector() || vd.Is1D()) \
2987 V(ushl, NEON_USHL, vd.IsVector() || vd.Is1D()) \
2988 V(srshl, NEON_SRSHL, vd.IsVector() || vd.Is1D()) \
2989 V(urshl, NEON_URSHL, vd.IsVector() || vd.Is1D()) \
2990 V(sqdmulh, NEON_SQDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
2991 V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
2992 V(shadd, NEON_SHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
2993 V(uhadd, NEON_UHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
2994 V(srhadd, NEON_SRHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
2995 V(urhadd, NEON_URHADD, vd.IsVector() && !vd.IsLaneSizeD()) \
2996 V(shsub, NEON_SHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
2997 V(uhsub, NEON_UHSUB, vd.IsVector() && !vd.IsLaneSizeD()) \
2998 V(smax, NEON_SMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
2999 V(smaxp, NEON_SMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
3000 V(smin, NEON_SMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
3001 V(sminp, NEON_SMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
3002 V(umax, NEON_UMAX, vd.IsVector() && !vd.IsLaneSizeD()) \
3003 V(umaxp, NEON_UMAXP, vd.IsVector() && !vd.IsLaneSizeD()) \
3004 V(umin, NEON_UMIN, vd.IsVector() && !vd.IsLaneSizeD()) \
3005 V(uminp, NEON_UMINP, vd.IsVector() && !vd.IsLaneSizeD()) \
3006 V(saba, NEON_SABA, vd.IsVector() && !vd.IsLaneSizeD()) \
3007 V(sabd, NEON_SABD, vd.IsVector() && !vd.IsLaneSizeD()) \
3008 V(uaba, NEON_UABA, vd.IsVector() && !vd.IsLaneSizeD()) \
3009 V(uabd, NEON_UABD, vd.IsVector() && !vd.IsLaneSizeD()) \
3010 V(mla, NEON_MLA, vd.IsVector() && !vd.IsLaneSizeD()) \
3011 V(mls, NEON_MLS, vd.IsVector() && !vd.IsLaneSizeD()) \
3012 V(mul, NEON_MUL, vd.IsVector() && !vd.IsLaneSizeD()) \
3013 V(and_, NEON_AND, vd.Is8B() || vd.Is16B()) \
3014 V(orr, NEON_ORR, vd.Is8B() || vd.Is16B()) \
3015 V(orn, NEON_ORN, vd.Is8B() || vd.Is16B()) \
3016 V(eor, NEON_EOR, vd.Is8B() || vd.Is16B()) \
3017 V(bic, NEON_BIC, vd.Is8B() || vd.Is16B()) \
3018 V(bit, NEON_BIT, vd.Is8B() || vd.Is16B()) \
3019 V(bif, NEON_BIF, vd.Is8B() || vd.Is16B()) \
3020 V(bsl, NEON_BSL, vd.Is8B() || vd.Is16B()) \
3021 V(pmul, NEON_PMUL, vd.Is8B() || vd.Is16B()) \
3022 V(uqadd, NEON_UQADD, true) \
3023 V(sqadd, NEON_SQADD, true) \
3024 V(uqsub, NEON_UQSUB, true) \
3025 V(sqsub, NEON_SQSUB, true) \
3026 V(sqshl, NEON_SQSHL, true) \
3027 V(uqshl, NEON_UQSHL, true) \
3028 V(sqrshl, NEON_SQRSHL, true) \
3029 V(uqrshl, NEON_UQRSHL, true)
3030
3031 #define DEFINE_ASM_FUNC(FN, OP, AS) \
3032 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3033 const VRegister& vm) { \
3034 DCHECK(AS); \
3035 NEON3Same(vd, vn, vm, OP); \
3036 }
3037 NEON_3SAME_LIST(DEFINE_ASM_FUNC)
3038 #undef DEFINE_ASM_FUNC
3039
3040 #define NEON_FP3SAME_LIST_V2(V) \
3041 V(fadd, NEON_FADD, FADD) \
3042 V(fsub, NEON_FSUB, FSUB) \
3043 V(fmul, NEON_FMUL, FMUL) \
3044 V(fdiv, NEON_FDIV, FDIV) \
3045 V(fmax, NEON_FMAX, FMAX) \
3046 V(fmaxnm, NEON_FMAXNM, FMAXNM) \
3047 V(fmin, NEON_FMIN, FMIN) \
3048 V(fminnm, NEON_FMINNM, FMINNM) \
3049 V(fmulx, NEON_FMULX, NEON_FMULX_scalar) \
3050 V(frecps, NEON_FRECPS, NEON_FRECPS_scalar) \
3051 V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar) \
3052 V(fabd, NEON_FABD, NEON_FABD_scalar) \
3053 V(fmla, NEON_FMLA, 0) \
3054 V(fmls, NEON_FMLS, 0) \
3055 V(facge, NEON_FACGE, NEON_FACGE_scalar) \
3056 V(facgt, NEON_FACGT, NEON_FACGT_scalar) \
3057 V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar) \
3058 V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar) \
3059 V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar) \
3060 V(faddp, NEON_FADDP, 0) \
3061 V(fmaxp, NEON_FMAXP, 0) \
3062 V(fminp, NEON_FMINP, 0) \
3063 V(fmaxnmp, NEON_FMAXNMP, 0) \
3064 V(fminnmp, NEON_FMINNMP, 0)
3065
3066 #define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP) \
3067 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3068 const VRegister& vm) { \
3069 Instr op; \
3070 if ((SCA_OP != 0) && vd.IsScalar()) { \
3071 DCHECK(vd.Is1S() || vd.Is1D()); \
3072 op = SCA_OP; \
3073 } else { \
3074 DCHECK(vd.IsVector()); \
3075 DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S()); \
3076 op = VEC_OP; \
3077 } \
3078 NEONFP3Same(vd, vn, vm, op); \
3079 }
NEON_FP3SAME_LIST_V2(DEFINE_ASM_FUNC)3080 NEON_FP3SAME_LIST_V2(DEFINE_ASM_FUNC)
3081 #undef DEFINE_ASM_FUNC
3082
3083 void Assembler::addp(const VRegister& vd, const VRegister& vn) {
3084 DCHECK((vd.Is1D() && vn.Is2D()));
3085 Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd));
3086 }
3087
faddp(const VRegister & vd,const VRegister & vn)3088 void Assembler::faddp(const VRegister& vd, const VRegister& vn) {
3089 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3090 Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
3091 }
3092
fmaxp(const VRegister & vd,const VRegister & vn)3093 void Assembler::fmaxp(const VRegister& vd, const VRegister& vn) {
3094 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3095 Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
3096 }
3097
fminp(const VRegister & vd,const VRegister & vn)3098 void Assembler::fminp(const VRegister& vd, const VRegister& vn) {
3099 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3100 Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
3101 }
3102
fmaxnmp(const VRegister & vd,const VRegister & vn)3103 void Assembler::fmaxnmp(const VRegister& vd, const VRegister& vn) {
3104 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3105 Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
3106 }
3107
fminnmp(const VRegister & vd,const VRegister & vn)3108 void Assembler::fminnmp(const VRegister& vd, const VRegister& vn) {
3109 DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
3110 Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
3111 }
3112
orr(const VRegister & vd,const int imm8,const int left_shift)3113 void Assembler::orr(const VRegister& vd, const int imm8, const int left_shift) {
3114 NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_ORR);
3115 }
3116
mov(const VRegister & vd,const VRegister & vn)3117 void Assembler::mov(const VRegister& vd, const VRegister& vn) {
3118 DCHECK(AreSameFormat(vd, vn));
3119 if (vd.IsD()) {
3120 orr(vd.V8B(), vn.V8B(), vn.V8B());
3121 } else {
3122 DCHECK(vd.IsQ());
3123 orr(vd.V16B(), vn.V16B(), vn.V16B());
3124 }
3125 }
3126
bic(const VRegister & vd,const int imm8,const int left_shift)3127 void Assembler::bic(const VRegister& vd, const int imm8, const int left_shift) {
3128 NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_BIC);
3129 }
3130
movi(const VRegister & vd,const uint64_t imm,Shift shift,const int shift_amount)3131 void Assembler::movi(const VRegister& vd, const uint64_t imm, Shift shift,
3132 const int shift_amount) {
3133 DCHECK((shift == LSL) || (shift == MSL));
3134 if (vd.Is2D() || vd.Is1D()) {
3135 DCHECK_EQ(shift_amount, 0);
3136 int imm8 = 0;
3137 for (int i = 0; i < 8; ++i) {
3138 int byte = (imm >> (i * 8)) & 0xFF;
3139 DCHECK((byte == 0) || (byte == 0xFF));
3140 if (byte == 0xFF) {
3141 imm8 |= (1 << i);
3142 }
3143 }
3144 Instr q = vd.Is2D() ? NEON_Q : 0;
3145 Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI |
3146 ImmNEONabcdefgh(imm8) | NEONCmode(0xE) | Rd(vd));
3147 } else if (shift == LSL) {
3148 NEONModifiedImmShiftLsl(vd, static_cast<int>(imm), shift_amount,
3149 NEONModifiedImmediate_MOVI);
3150 } else {
3151 NEONModifiedImmShiftMsl(vd, static_cast<int>(imm), shift_amount,
3152 NEONModifiedImmediate_MOVI);
3153 }
3154 }
3155
mvn(const VRegister & vd,const VRegister & vn)3156 void Assembler::mvn(const VRegister& vd, const VRegister& vn) {
3157 DCHECK(AreSameFormat(vd, vn));
3158 if (vd.IsD()) {
3159 not_(vd.V8B(), vn.V8B());
3160 } else {
3161 DCHECK(vd.IsQ());
3162 not_(vd.V16B(), vn.V16B());
3163 }
3164 }
3165
mvni(const VRegister & vd,const int imm8,Shift shift,const int shift_amount)3166 void Assembler::mvni(const VRegister& vd, const int imm8, Shift shift,
3167 const int shift_amount) {
3168 DCHECK((shift == LSL) || (shift == MSL));
3169 if (shift == LSL) {
3170 NEONModifiedImmShiftLsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
3171 } else {
3172 NEONModifiedImmShiftMsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
3173 }
3174 }
3175
NEONFPByElement(const VRegister & vd,const VRegister & vn,const VRegister & vm,int vm_index,NEONByIndexedElementOp vop)3176 void Assembler::NEONFPByElement(const VRegister& vd, const VRegister& vn,
3177 const VRegister& vm, int vm_index,
3178 NEONByIndexedElementOp vop) {
3179 DCHECK(AreSameFormat(vd, vn));
3180 DCHECK((vd.Is2S() && vm.Is1S()) || (vd.Is4S() && vm.Is1S()) ||
3181 (vd.Is1S() && vm.Is1S()) || (vd.Is2D() && vm.Is1D()) ||
3182 (vd.Is1D() && vm.Is1D()));
3183 DCHECK((vm.Is1S() && (vm_index < 4)) || (vm.Is1D() && (vm_index < 2)));
3184
3185 Instr op = vop;
3186 int index_num_bits = vm.Is1S() ? 2 : 1;
3187 if (vd.IsScalar()) {
3188 op |= NEON_Q | NEONScalar;
3189 }
3190
3191 Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) |
3192 Rn(vn) | Rd(vd));
3193 }
3194
NEONByElement(const VRegister & vd,const VRegister & vn,const VRegister & vm,int vm_index,NEONByIndexedElementOp vop)3195 void Assembler::NEONByElement(const VRegister& vd, const VRegister& vn,
3196 const VRegister& vm, int vm_index,
3197 NEONByIndexedElementOp vop) {
3198 DCHECK(AreSameFormat(vd, vn));
3199 DCHECK((vd.Is4H() && vm.Is1H()) || (vd.Is8H() && vm.Is1H()) ||
3200 (vd.Is1H() && vm.Is1H()) || (vd.Is2S() && vm.Is1S()) ||
3201 (vd.Is4S() && vm.Is1S()) || (vd.Is1S() && vm.Is1S()));
3202 DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
3203 (vm.Is1S() && (vm_index < 4)));
3204
3205 Instr format, op = vop;
3206 int index_num_bits = vm.Is1H() ? 3 : 2;
3207 if (vd.IsScalar()) {
3208 op |= NEONScalar | NEON_Q;
3209 format = SFormat(vn);
3210 } else {
3211 format = VFormat(vn);
3212 }
3213 Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
3214 Rd(vd));
3215 }
3216
NEONByElementL(const VRegister & vd,const VRegister & vn,const VRegister & vm,int vm_index,NEONByIndexedElementOp vop)3217 void Assembler::NEONByElementL(const VRegister& vd, const VRegister& vn,
3218 const VRegister& vm, int vm_index,
3219 NEONByIndexedElementOp vop) {
3220 DCHECK((vd.Is4S() && vn.Is4H() && vm.Is1H()) ||
3221 (vd.Is4S() && vn.Is8H() && vm.Is1H()) ||
3222 (vd.Is1S() && vn.Is1H() && vm.Is1H()) ||
3223 (vd.Is2D() && vn.Is2S() && vm.Is1S()) ||
3224 (vd.Is2D() && vn.Is4S() && vm.Is1S()) ||
3225 (vd.Is1D() && vn.Is1S() && vm.Is1S()));
3226
3227 DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
3228 (vm.Is1S() && (vm_index < 4)));
3229
3230 Instr format, op = vop;
3231 int index_num_bits = vm.Is1H() ? 3 : 2;
3232 if (vd.IsScalar()) {
3233 op |= NEONScalar | NEON_Q;
3234 format = SFormat(vn);
3235 } else {
3236 format = VFormat(vn);
3237 }
3238 Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
3239 Rd(vd));
3240 }
3241
3242 #define NEON_BYELEMENT_LIST(V) \
3243 V(mul, NEON_MUL_byelement, vn.IsVector()) \
3244 V(mla, NEON_MLA_byelement, vn.IsVector()) \
3245 V(mls, NEON_MLS_byelement, vn.IsVector()) \
3246 V(sqdmulh, NEON_SQDMULH_byelement, true) \
3247 V(sqrdmulh, NEON_SQRDMULH_byelement, true)
3248
3249 #define DEFINE_ASM_FUNC(FN, OP, AS) \
3250 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3251 const VRegister& vm, int vm_index) { \
3252 DCHECK(AS); \
3253 NEONByElement(vd, vn, vm, vm_index, OP); \
3254 }
3255 NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
3256 #undef DEFINE_ASM_FUNC
3257
3258 #define NEON_FPBYELEMENT_LIST(V) \
3259 V(fmul, NEON_FMUL_byelement) \
3260 V(fmla, NEON_FMLA_byelement) \
3261 V(fmls, NEON_FMLS_byelement) \
3262 V(fmulx, NEON_FMULX_byelement)
3263
3264 #define DEFINE_ASM_FUNC(FN, OP) \
3265 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3266 const VRegister& vm, int vm_index) { \
3267 NEONFPByElement(vd, vn, vm, vm_index, OP); \
3268 }
NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)3269 NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
3270 #undef DEFINE_ASM_FUNC
3271
3272 #define NEON_BYELEMENT_LONG_LIST(V) \
3273 V(sqdmull, NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD()) \
3274 V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ()) \
3275 V(sqdmlal, NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD()) \
3276 V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3277 V(sqdmlsl, NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD()) \
3278 V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ()) \
3279 V(smull, NEON_SMULL_byelement, vn.IsVector() && vn.IsD()) \
3280 V(smull2, NEON_SMULL_byelement, vn.IsVector() && vn.IsQ()) \
3281 V(umull, NEON_UMULL_byelement, vn.IsVector() && vn.IsD()) \
3282 V(umull2, NEON_UMULL_byelement, vn.IsVector() && vn.IsQ()) \
3283 V(smlal, NEON_SMLAL_byelement, vn.IsVector() && vn.IsD()) \
3284 V(smlal2, NEON_SMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3285 V(umlal, NEON_UMLAL_byelement, vn.IsVector() && vn.IsD()) \
3286 V(umlal2, NEON_UMLAL_byelement, vn.IsVector() && vn.IsQ()) \
3287 V(smlsl, NEON_SMLSL_byelement, vn.IsVector() && vn.IsD()) \
3288 V(smlsl2, NEON_SMLSL_byelement, vn.IsVector() && vn.IsQ()) \
3289 V(umlsl, NEON_UMLSL_byelement, vn.IsVector() && vn.IsD()) \
3290 V(umlsl2, NEON_UMLSL_byelement, vn.IsVector() && vn.IsQ())
3291
3292 #define DEFINE_ASM_FUNC(FN, OP, AS) \
3293 void Assembler::FN(const VRegister& vd, const VRegister& vn, \
3294 const VRegister& vm, int vm_index) { \
3295 DCHECK(AS); \
3296 NEONByElementL(vd, vn, vm, vm_index, OP); \
3297 }
3298 NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
3299 #undef DEFINE_ASM_FUNC
3300
3301 void Assembler::suqadd(const VRegister& vd, const VRegister& vn) {
3302 NEON2RegMisc(vd, vn, NEON_SUQADD);
3303 }
3304
usqadd(const VRegister & vd,const VRegister & vn)3305 void Assembler::usqadd(const VRegister& vd, const VRegister& vn) {
3306 NEON2RegMisc(vd, vn, NEON_USQADD);
3307 }
3308
abs(const VRegister & vd,const VRegister & vn)3309 void Assembler::abs(const VRegister& vd, const VRegister& vn) {
3310 DCHECK(vd.IsVector() || vd.Is1D());
3311 NEON2RegMisc(vd, vn, NEON_ABS);
3312 }
3313
sqabs(const VRegister & vd,const VRegister & vn)3314 void Assembler::sqabs(const VRegister& vd, const VRegister& vn) {
3315 NEON2RegMisc(vd, vn, NEON_SQABS);
3316 }
3317
neg(const VRegister & vd,const VRegister & vn)3318 void Assembler::neg(const VRegister& vd, const VRegister& vn) {
3319 DCHECK(vd.IsVector() || vd.Is1D());
3320 NEON2RegMisc(vd, vn, NEON_NEG);
3321 }
3322
sqneg(const VRegister & vd,const VRegister & vn)3323 void Assembler::sqneg(const VRegister& vd, const VRegister& vn) {
3324 NEON2RegMisc(vd, vn, NEON_SQNEG);
3325 }
3326
NEONXtn(const VRegister & vd,const VRegister & vn,NEON2RegMiscOp vop)3327 void Assembler::NEONXtn(const VRegister& vd, const VRegister& vn,
3328 NEON2RegMiscOp vop) {
3329 Instr format, op = vop;
3330 if (vd.IsScalar()) {
3331 DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
3332 (vd.Is1S() && vn.Is1D()));
3333 op |= NEON_Q | NEONScalar;
3334 format = SFormat(vd);
3335 } else {
3336 DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
3337 (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
3338 (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
3339 format = VFormat(vd);
3340 }
3341 Emit(format | op | Rn(vn) | Rd(vd));
3342 }
3343
xtn(const VRegister & vd,const VRegister & vn)3344 void Assembler::xtn(const VRegister& vd, const VRegister& vn) {
3345 DCHECK(vd.IsVector() && vd.IsD());
3346 NEONXtn(vd, vn, NEON_XTN);
3347 }
3348
xtn2(const VRegister & vd,const VRegister & vn)3349 void Assembler::xtn2(const VRegister& vd, const VRegister& vn) {
3350 DCHECK(vd.IsVector() && vd.IsQ());
3351 NEONXtn(vd, vn, NEON_XTN);
3352 }
3353
sqxtn(const VRegister & vd,const VRegister & vn)3354 void Assembler::sqxtn(const VRegister& vd, const VRegister& vn) {
3355 DCHECK(vd.IsScalar() || vd.IsD());
3356 NEONXtn(vd, vn, NEON_SQXTN);
3357 }
3358
sqxtn2(const VRegister & vd,const VRegister & vn)3359 void Assembler::sqxtn2(const VRegister& vd, const VRegister& vn) {
3360 DCHECK(vd.IsVector() && vd.IsQ());
3361 NEONXtn(vd, vn, NEON_SQXTN);
3362 }
3363
sqxtun(const VRegister & vd,const VRegister & vn)3364 void Assembler::sqxtun(const VRegister& vd, const VRegister& vn) {
3365 DCHECK(vd.IsScalar() || vd.IsD());
3366 NEONXtn(vd, vn, NEON_SQXTUN);
3367 }
3368
sqxtun2(const VRegister & vd,const VRegister & vn)3369 void Assembler::sqxtun2(const VRegister& vd, const VRegister& vn) {
3370 DCHECK(vd.IsVector() && vd.IsQ());
3371 NEONXtn(vd, vn, NEON_SQXTUN);
3372 }
3373
uqxtn(const VRegister & vd,const VRegister & vn)3374 void Assembler::uqxtn(const VRegister& vd, const VRegister& vn) {
3375 DCHECK(vd.IsScalar() || vd.IsD());
3376 NEONXtn(vd, vn, NEON_UQXTN);
3377 }
3378
uqxtn2(const VRegister & vd,const VRegister & vn)3379 void Assembler::uqxtn2(const VRegister& vd, const VRegister& vn) {
3380 DCHECK(vd.IsVector() && vd.IsQ());
3381 NEONXtn(vd, vn, NEON_UQXTN);
3382 }
3383
3384 // NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size".
not_(const VRegister & vd,const VRegister & vn)3385 void Assembler::not_(const VRegister& vd, const VRegister& vn) {
3386 DCHECK(AreSameFormat(vd, vn));
3387 DCHECK(vd.Is8B() || vd.Is16B());
3388 Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
3389 }
3390
rbit(const VRegister & vd,const VRegister & vn)3391 void Assembler::rbit(const VRegister& vd, const VRegister& vn) {
3392 DCHECK(AreSameFormat(vd, vn));
3393 DCHECK(vd.Is8B() || vd.Is16B());
3394 Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
3395 }
3396
ext(const VRegister & vd,const VRegister & vn,const VRegister & vm,int index)3397 void Assembler::ext(const VRegister& vd, const VRegister& vn,
3398 const VRegister& vm, int index) {
3399 DCHECK(AreSameFormat(vd, vn, vm));
3400 DCHECK(vd.Is8B() || vd.Is16B());
3401 DCHECK((0 <= index) && (index < vd.LaneCount()));
3402 Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd));
3403 }
3404
dup(const VRegister & vd,const VRegister & vn,int vn_index)3405 void Assembler::dup(const VRegister& vd, const VRegister& vn, int vn_index) {
3406 Instr q, scalar;
3407
3408 // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
3409 // number of lanes, and T is b, h, s or d.
3410 int lane_size = vn.LaneSizeInBytes();
3411 NEONFormatField format;
3412 switch (lane_size) {
3413 case 1:
3414 format = NEON_16B;
3415 break;
3416 case 2:
3417 format = NEON_8H;
3418 break;
3419 case 4:
3420 format = NEON_4S;
3421 break;
3422 default:
3423 DCHECK_EQ(lane_size, 8);
3424 format = NEON_2D;
3425 break;
3426 }
3427
3428 if (vd.IsScalar()) {
3429 q = NEON_Q;
3430 scalar = NEONScalar;
3431 } else {
3432 DCHECK(!vd.Is1D());
3433 q = vd.IsD() ? 0 : NEON_Q;
3434 scalar = 0;
3435 }
3436 Emit(q | scalar | NEON_DUP_ELEMENT | ImmNEON5(format, vn_index) | Rn(vn) |
3437 Rd(vd));
3438 }
3439
dcptr(Label * label)3440 void Assembler::dcptr(Label* label) {
3441 BlockPoolsScope no_pool_inbetween(this);
3442 RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
3443 if (label->is_bound()) {
3444 // The label is bound, so it does not need to be updated and the internal
3445 // reference should be emitted.
3446 //
3447 // In this case, label->pos() returns the offset of the label from the
3448 // start of the buffer.
3449 internal_reference_positions_.push_back(pc_offset());
3450 dc64(reinterpret_cast<uintptr_t>(buffer_start_ + label->pos()));
3451 } else {
3452 int32_t offset;
3453 if (label->is_linked()) {
3454 // The label is linked, so the internal reference should be added
3455 // onto the end of the label's link chain.
3456 //
3457 // In this case, label->pos() returns the offset of the last linked
3458 // instruction from the start of the buffer.
3459 offset = label->pos() - pc_offset();
3460 DCHECK_NE(offset, kStartOfLabelLinkChain);
3461 } else {
3462 // The label is unused, so it now becomes linked and the internal
3463 // reference is at the start of the new link chain.
3464 offset = kStartOfLabelLinkChain;
3465 }
3466 // The instruction at pc is now the last link in the label's chain.
3467 label->link_to(pc_offset());
3468
3469 // Traditionally the offset to the previous instruction in the chain is
3470 // encoded in the instruction payload (e.g. branch range) but internal
3471 // references are not instructions so while unbound they are encoded as
3472 // two consecutive brk instructions. The two 16-bit immediates are used
3473 // to encode the offset.
3474 offset >>= kInstrSizeLog2;
3475 DCHECK(is_int32(offset));
3476 uint32_t high16 = unsigned_bitextract_32(31, 16, offset);
3477 uint32_t low16 = unsigned_bitextract_32(15, 0, offset);
3478
3479 brk(high16);
3480 brk(low16);
3481 }
3482 }
3483
3484 // Below, a difference in case for the same letter indicates a
3485 // negated bit. If b is 1, then B is 0.
FPToImm8(double imm)3486 uint32_t Assembler::FPToImm8(double imm) {
3487 DCHECK(IsImmFP64(imm));
3488 // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
3489 // 0000.0000.0000.0000.0000.0000.0000.0000
3490 uint64_t bits = bit_cast<uint64_t>(imm);
3491 // bit7: a000.0000
3492 uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
3493 // bit6: 0b00.0000
3494 uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
3495 // bit5_to_0: 00cd.efgh
3496 uint64_t bit5_to_0 = (bits >> 48) & 0x3F;
3497
3498 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
3499 }
3500
ImmFP(double imm)3501 Instr Assembler::ImmFP(double imm) { return FPToImm8(imm) << ImmFP_offset; }
ImmNEONFP(double imm)3502 Instr Assembler::ImmNEONFP(double imm) {
3503 return ImmNEONabcdefgh(FPToImm8(imm));
3504 }
3505
3506 // Code generation helpers.
MoveWide(const Register & rd,uint64_t imm,int shift,MoveWideImmediateOp mov_op)3507 void Assembler::MoveWide(const Register& rd, uint64_t imm, int shift,
3508 MoveWideImmediateOp mov_op) {
3509 // Ignore the top 32 bits of an immediate if we're moving to a W register.
3510 if (rd.Is32Bits()) {
3511 // Check that the top 32 bits are zero (a positive 32-bit number) or top
3512 // 33 bits are one (a negative 32-bit number, sign extended to 64 bits).
3513 DCHECK(((imm >> kWRegSizeInBits) == 0) ||
3514 ((imm >> (kWRegSizeInBits - 1)) == 0x1FFFFFFFF));
3515 imm &= kWRegMask;
3516 }
3517
3518 if (shift >= 0) {
3519 // Explicit shift specified.
3520 DCHECK((shift == 0) || (shift == 16) || (shift == 32) || (shift == 48));
3521 DCHECK(rd.Is64Bits() || (shift == 0) || (shift == 16));
3522 shift /= 16;
3523 } else {
3524 // Calculate a new immediate and shift combination to encode the immediate
3525 // argument.
3526 shift = 0;
3527 if ((imm & ~0xFFFFULL) == 0) {
3528 // Nothing to do.
3529 } else if ((imm & ~(0xFFFFULL << 16)) == 0) {
3530 imm >>= 16;
3531 shift = 1;
3532 } else if ((imm & ~(0xFFFFULL << 32)) == 0) {
3533 DCHECK(rd.Is64Bits());
3534 imm >>= 32;
3535 shift = 2;
3536 } else if ((imm & ~(0xFFFFULL << 48)) == 0) {
3537 DCHECK(rd.Is64Bits());
3538 imm >>= 48;
3539 shift = 3;
3540 }
3541 }
3542
3543 DCHECK(is_uint16(imm));
3544
3545 Emit(SF(rd) | MoveWideImmediateFixed | mov_op | Rd(rd) |
3546 ImmMoveWide(static_cast<int>(imm)) | ShiftMoveWide(shift));
3547 }
3548
AddSub(const Register & rd,const Register & rn,const Operand & operand,FlagsUpdate S,AddSubOp op)3549 void Assembler::AddSub(const Register& rd, const Register& rn,
3550 const Operand& operand, FlagsUpdate S, AddSubOp op) {
3551 DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
3552 DCHECK(!operand.NeedsRelocation(this));
3553 if (operand.IsImmediate()) {
3554 int64_t immediate = operand.ImmediateValue();
3555 DCHECK(IsImmAddSub(immediate));
3556 Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
3557 Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) |
3558 ImmAddSub(static_cast<int>(immediate)) | dest_reg | RnSP(rn));
3559 } else if (operand.IsShiftedRegister()) {
3560 DCHECK_EQ(operand.reg().SizeInBits(), rd.SizeInBits());
3561 DCHECK_NE(operand.shift(), ROR);
3562
3563 // For instructions of the form:
3564 // add/sub wsp, <Wn>, <Wm> [, LSL #0-3 ]
3565 // add/sub <Wd>, wsp, <Wm> [, LSL #0-3 ]
3566 // add/sub wsp, wsp, <Wm> [, LSL #0-3 ]
3567 // adds/subs <Wd>, wsp, <Wm> [, LSL #0-3 ]
3568 // or their 64-bit register equivalents, convert the operand from shifted to
3569 // extended register mode, and emit an add/sub extended instruction.
3570 if (rn.IsSP() || rd.IsSP()) {
3571 DCHECK(!(rd.IsSP() && (S == SetFlags)));
3572 DataProcExtendedRegister(rd, rn, operand.ToExtendedRegister(), S,
3573 AddSubExtendedFixed | op);
3574 } else {
3575 DataProcShiftedRegister(rd, rn, operand, S, AddSubShiftedFixed | op);
3576 }
3577 } else {
3578 DCHECK(operand.IsExtendedRegister());
3579 DataProcExtendedRegister(rd, rn, operand, S, AddSubExtendedFixed | op);
3580 }
3581 }
3582
AddSubWithCarry(const Register & rd,const Register & rn,const Operand & operand,FlagsUpdate S,AddSubWithCarryOp op)3583 void Assembler::AddSubWithCarry(const Register& rd, const Register& rn,
3584 const Operand& operand, FlagsUpdate S,
3585 AddSubWithCarryOp op) {
3586 DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
3587 DCHECK_EQ(rd.SizeInBits(), operand.reg().SizeInBits());
3588 DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
3589 DCHECK(!operand.NeedsRelocation(this));
3590 Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | Rn(rn) | Rd(rd));
3591 }
3592
hlt(int code)3593 void Assembler::hlt(int code) {
3594 DCHECK(is_uint16(code));
3595 Emit(HLT | ImmException(code));
3596 }
3597
brk(int code)3598 void Assembler::brk(int code) {
3599 DCHECK(is_uint16(code));
3600 Emit(BRK | ImmException(code));
3601 }
3602
EmitStringData(const char * string)3603 void Assembler::EmitStringData(const char* string) {
3604 size_t len = strlen(string) + 1;
3605 DCHECK_LE(RoundUp(len, kInstrSize), static_cast<size_t>(kGap));
3606 EmitData(string, static_cast<int>(len));
3607 // Pad with nullptr characters until pc_ is aligned.
3608 const char pad[] = {'\0', '\0', '\0', '\0'};
3609 static_assert(sizeof(pad) == kInstrSize,
3610 "Size of padding must match instruction size.");
3611 EmitData(pad, RoundUp(pc_offset(), kInstrSize) - pc_offset());
3612 }
3613
debug(const char * message,uint32_t code,Instr params)3614 void Assembler::debug(const char* message, uint32_t code, Instr params) {
3615 if (options().enable_simulator_code) {
3616 // The arguments to the debug marker need to be contiguous in memory, so
3617 // make sure we don't try to emit pools.
3618 BlockPoolsScope scope(this);
3619
3620 Label start;
3621 bind(&start);
3622
3623 // Refer to instructions-arm64.h for a description of the marker and its
3624 // arguments.
3625 hlt(kImmExceptionIsDebug);
3626 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugCodeOffset);
3627 dc32(code);
3628 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugParamsOffset);
3629 dc32(params);
3630 DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugMessageOffset);
3631 EmitStringData(message);
3632 hlt(kImmExceptionIsUnreachable);
3633
3634 return;
3635 }
3636
3637 if (params & BREAK) {
3638 brk(0);
3639 }
3640 }
3641
Logical(const Register & rd,const Register & rn,const Operand & operand,LogicalOp op)3642 void Assembler::Logical(const Register& rd, const Register& rn,
3643 const Operand& operand, LogicalOp op) {
3644 DCHECK(rd.SizeInBits() == rn.SizeInBits());
3645 DCHECK(!operand.NeedsRelocation(this));
3646 if (operand.IsImmediate()) {
3647 int64_t immediate = operand.ImmediateValue();
3648 unsigned reg_size = rd.SizeInBits();
3649
3650 DCHECK_NE(immediate, 0);
3651 DCHECK_NE(immediate, -1);
3652 DCHECK(rd.Is64Bits() || is_uint32(immediate));
3653
3654 // If the operation is NOT, invert the operation and immediate.
3655 if ((op & NOT) == NOT) {
3656 op = static_cast<LogicalOp>(op & ~NOT);
3657 immediate = rd.Is64Bits() ? ~immediate : (~immediate & kWRegMask);
3658 }
3659
3660 unsigned n, imm_s, imm_r;
3661 if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
3662 // Immediate can be encoded in the instruction.
3663 LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
3664 } else {
3665 // This case is handled in the macro assembler.
3666 UNREACHABLE();
3667 }
3668 } else {
3669 DCHECK(operand.IsShiftedRegister());
3670 DCHECK(operand.reg().SizeInBits() == rd.SizeInBits());
3671 Instr dp_op = static_cast<Instr>(op | LogicalShiftedFixed);
3672 DataProcShiftedRegister(rd, rn, operand, LeaveFlags, dp_op);
3673 }
3674 }
3675
LogicalImmediate(const Register & rd,const Register & rn,unsigned n,unsigned imm_s,unsigned imm_r,LogicalOp op)3676 void Assembler::LogicalImmediate(const Register& rd, const Register& rn,
3677 unsigned n, unsigned imm_s, unsigned imm_r,
3678 LogicalOp op) {
3679 unsigned reg_size = rd.SizeInBits();
3680 Instr dest_reg = (op == ANDS) ? Rd(rd) : RdSP(rd);
3681 Emit(SF(rd) | LogicalImmediateFixed | op | BitN(n, reg_size) |
3682 ImmSetBits(imm_s, reg_size) | ImmRotate(imm_r, reg_size) | dest_reg |
3683 Rn(rn));
3684 }
3685
ConditionalCompare(const Register & rn,const Operand & operand,StatusFlags nzcv,Condition cond,ConditionalCompareOp op)3686 void Assembler::ConditionalCompare(const Register& rn, const Operand& operand,
3687 StatusFlags nzcv, Condition cond,
3688 ConditionalCompareOp op) {
3689 Instr ccmpop;
3690 DCHECK(!operand.NeedsRelocation(this));
3691 if (operand.IsImmediate()) {
3692 int64_t immediate = operand.ImmediateValue();
3693 DCHECK(IsImmConditionalCompare(immediate));
3694 ccmpop = ConditionalCompareImmediateFixed | op |
3695 ImmCondCmp(static_cast<unsigned>(immediate));
3696 } else {
3697 DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
3698 ccmpop = ConditionalCompareRegisterFixed | op | Rm(operand.reg());
3699 }
3700 Emit(SF(rn) | ccmpop | Cond(cond) | Rn(rn) | Nzcv(nzcv));
3701 }
3702
DataProcessing1Source(const Register & rd,const Register & rn,DataProcessing1SourceOp op)3703 void Assembler::DataProcessing1Source(const Register& rd, const Register& rn,
3704 DataProcessing1SourceOp op) {
3705 DCHECK(rd.SizeInBits() == rn.SizeInBits());
3706 Emit(SF(rn) | op | Rn(rn) | Rd(rd));
3707 }
3708
FPDataProcessing1Source(const VRegister & vd,const VRegister & vn,FPDataProcessing1SourceOp op)3709 void Assembler::FPDataProcessing1Source(const VRegister& vd,
3710 const VRegister& vn,
3711 FPDataProcessing1SourceOp op) {
3712 Emit(FPType(vn) | op | Rn(vn) | Rd(vd));
3713 }
3714
FPDataProcessing2Source(const VRegister & fd,const VRegister & fn,const VRegister & fm,FPDataProcessing2SourceOp op)3715 void Assembler::FPDataProcessing2Source(const VRegister& fd,
3716 const VRegister& fn,
3717 const VRegister& fm,
3718 FPDataProcessing2SourceOp op) {
3719 DCHECK(fd.SizeInBits() == fn.SizeInBits());
3720 DCHECK(fd.SizeInBits() == fm.SizeInBits());
3721 Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd));
3722 }
3723
FPDataProcessing3Source(const VRegister & fd,const VRegister & fn,const VRegister & fm,const VRegister & fa,FPDataProcessing3SourceOp op)3724 void Assembler::FPDataProcessing3Source(const VRegister& fd,
3725 const VRegister& fn,
3726 const VRegister& fm,
3727 const VRegister& fa,
3728 FPDataProcessing3SourceOp op) {
3729 DCHECK(AreSameSizeAndType(fd, fn, fm, fa));
3730 Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd) | Ra(fa));
3731 }
3732
NEONModifiedImmShiftLsl(const VRegister & vd,const int imm8,const int left_shift,NEONModifiedImmediateOp op)3733 void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8,
3734 const int left_shift,
3735 NEONModifiedImmediateOp op) {
3736 DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() || vd.Is2S() ||
3737 vd.Is4S());
3738 DCHECK((left_shift == 0) || (left_shift == 8) || (left_shift == 16) ||
3739 (left_shift == 24));
3740 DCHECK(is_uint8(imm8));
3741
3742 int cmode_1, cmode_2, cmode_3;
3743 if (vd.Is8B() || vd.Is16B()) {
3744 DCHECK_EQ(op, NEONModifiedImmediate_MOVI);
3745 cmode_1 = 1;
3746 cmode_2 = 1;
3747 cmode_3 = 1;
3748 } else {
3749 cmode_1 = (left_shift >> 3) & 1;
3750 cmode_2 = left_shift >> 4;
3751 cmode_3 = 0;
3752 if (vd.Is4H() || vd.Is8H()) {
3753 DCHECK((left_shift == 0) || (left_shift == 8));
3754 cmode_3 = 1;
3755 }
3756 }
3757 int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1);
3758
3759 Instr q = vd.IsQ() ? NEON_Q : 0;
3760
3761 Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
3762 }
3763
NEONModifiedImmShiftMsl(const VRegister & vd,const int imm8,const int shift_amount,NEONModifiedImmediateOp op)3764 void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8,
3765 const int shift_amount,
3766 NEONModifiedImmediateOp op) {
3767 DCHECK(vd.Is2S() || vd.Is4S());
3768 DCHECK((shift_amount == 8) || (shift_amount == 16));
3769 DCHECK(is_uint8(imm8));
3770
3771 int cmode_0 = (shift_amount >> 4) & 1;
3772 int cmode = 0xC | cmode_0;
3773
3774 Instr q = vd.IsQ() ? NEON_Q : 0;
3775
3776 Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
3777 }
3778
EmitShift(const Register & rd,const Register & rn,Shift shift,unsigned shift_amount)3779 void Assembler::EmitShift(const Register& rd, const Register& rn, Shift shift,
3780 unsigned shift_amount) {
3781 switch (shift) {
3782 case LSL:
3783 lsl(rd, rn, shift_amount);
3784 break;
3785 case LSR:
3786 lsr(rd, rn, shift_amount);
3787 break;
3788 case ASR:
3789 asr(rd, rn, shift_amount);
3790 break;
3791 case ROR:
3792 ror(rd, rn, shift_amount);
3793 break;
3794 default:
3795 UNREACHABLE();
3796 }
3797 }
3798
EmitExtendShift(const Register & rd,const Register & rn,Extend extend,unsigned left_shift)3799 void Assembler::EmitExtendShift(const Register& rd, const Register& rn,
3800 Extend extend, unsigned left_shift) {
3801 DCHECK(rd.SizeInBits() >= rn.SizeInBits());
3802 unsigned reg_size = rd.SizeInBits();
3803 // Use the correct size of register.
3804 Register rn_ = Register::Create(rn.code(), rd.SizeInBits());
3805 // Bits extracted are high_bit:0.
3806 unsigned high_bit = (8 << (extend & 0x3)) - 1;
3807 // Number of bits left in the result that are not introduced by the shift.
3808 unsigned non_shift_bits = (reg_size - left_shift) & (reg_size - 1);
3809
3810 if ((non_shift_bits > high_bit) || (non_shift_bits == 0)) {
3811 switch (extend) {
3812 case UXTB:
3813 case UXTH:
3814 case UXTW:
3815 ubfm(rd, rn_, non_shift_bits, high_bit);
3816 break;
3817 case SXTB:
3818 case SXTH:
3819 case SXTW:
3820 sbfm(rd, rn_, non_shift_bits, high_bit);
3821 break;
3822 case UXTX:
3823 case SXTX: {
3824 DCHECK_EQ(rn.SizeInBits(), kXRegSizeInBits);
3825 // Nothing to extend. Just shift.
3826 lsl(rd, rn_, left_shift);
3827 break;
3828 }
3829 default:
3830 UNREACHABLE();
3831 }
3832 } else {
3833 // No need to extend as the extended bits would be shifted away.
3834 lsl(rd, rn_, left_shift);
3835 }
3836 }
3837
DataProcShiftedRegister(const Register & rd,const Register & rn,const Operand & operand,FlagsUpdate S,Instr op)3838 void Assembler::DataProcShiftedRegister(const Register& rd, const Register& rn,
3839 const Operand& operand, FlagsUpdate S,
3840 Instr op) {
3841 DCHECK(operand.IsShiftedRegister());
3842 DCHECK(rn.Is64Bits() || (rn.Is32Bits() && is_uint5(operand.shift_amount())));
3843 DCHECK(!operand.NeedsRelocation(this));
3844 Emit(SF(rd) | op | Flags(S) | ShiftDP(operand.shift()) |
3845 ImmDPShift(operand.shift_amount()) | Rm(operand.reg()) | Rn(rn) |
3846 Rd(rd));
3847 }
3848
DataProcExtendedRegister(const Register & rd,const Register & rn,const Operand & operand,FlagsUpdate S,Instr op)3849 void Assembler::DataProcExtendedRegister(const Register& rd, const Register& rn,
3850 const Operand& operand, FlagsUpdate S,
3851 Instr op) {
3852 DCHECK(!operand.NeedsRelocation(this));
3853 Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
3854 Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) |
3855 ExtendMode(operand.extend()) | ImmExtendShift(operand.shift_amount()) |
3856 dest_reg | RnSP(rn));
3857 }
3858
IsImmAddSub(int64_t immediate)3859 bool Assembler::IsImmAddSub(int64_t immediate) {
3860 return is_uint12(immediate) ||
3861 (is_uint12(immediate >> 12) && ((immediate & 0xFFF) == 0));
3862 }
3863
LoadStore(const CPURegister & rt,const MemOperand & addr,LoadStoreOp op)3864 void Assembler::LoadStore(const CPURegister& rt, const MemOperand& addr,
3865 LoadStoreOp op) {
3866 Instr memop = op | Rt(rt) | RnSP(addr.base());
3867
3868 if (addr.IsImmediateOffset()) {
3869 unsigned size = CalcLSDataSize(op);
3870 if (IsImmLSScaled(addr.offset(), size)) {
3871 int offset = static_cast<int>(addr.offset());
3872 // Use the scaled addressing mode.
3873 Emit(LoadStoreUnsignedOffsetFixed | memop |
3874 ImmLSUnsigned(offset >> size));
3875 } else if (IsImmLSUnscaled(addr.offset())) {
3876 int offset = static_cast<int>(addr.offset());
3877 // Use the unscaled addressing mode.
3878 Emit(LoadStoreUnscaledOffsetFixed | memop | ImmLS(offset));
3879 } else {
3880 // This case is handled in the macro assembler.
3881 UNREACHABLE();
3882 }
3883 } else if (addr.IsRegisterOffset()) {
3884 Extend ext = addr.extend();
3885 Shift shift = addr.shift();
3886 unsigned shift_amount = addr.shift_amount();
3887
3888 // LSL is encoded in the option field as UXTX.
3889 if (shift == LSL) {
3890 ext = UXTX;
3891 }
3892
3893 // Shifts are encoded in one bit, indicating a left shift by the memory
3894 // access size.
3895 DCHECK((shift_amount == 0) ||
3896 (shift_amount == static_cast<unsigned>(CalcLSDataSize(op))));
3897 Emit(LoadStoreRegisterOffsetFixed | memop | Rm(addr.regoffset()) |
3898 ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0));
3899 } else {
3900 // Pre-index and post-index modes.
3901 DCHECK_NE(rt, addr.base());
3902 if (IsImmLSUnscaled(addr.offset())) {
3903 int offset = static_cast<int>(addr.offset());
3904 if (addr.IsPreIndex()) {
3905 Emit(LoadStorePreIndexFixed | memop | ImmLS(offset));
3906 } else {
3907 DCHECK(addr.IsPostIndex());
3908 Emit(LoadStorePostIndexFixed | memop | ImmLS(offset));
3909 }
3910 } else {
3911 // This case is handled in the macro assembler.
3912 UNREACHABLE();
3913 }
3914 }
3915 }
3916
IsImmLSUnscaled(int64_t offset)3917 bool Assembler::IsImmLSUnscaled(int64_t offset) { return is_int9(offset); }
3918
IsImmLSScaled(int64_t offset,unsigned size)3919 bool Assembler::IsImmLSScaled(int64_t offset, unsigned size) {
3920 bool offset_is_size_multiple =
3921 (static_cast<int64_t>(static_cast<uint64_t>(offset >> size) << size) ==
3922 offset);
3923 return offset_is_size_multiple && is_uint12(offset >> size);
3924 }
3925
IsImmLSPair(int64_t offset,unsigned size)3926 bool Assembler::IsImmLSPair(int64_t offset, unsigned size) {
3927 bool offset_is_size_multiple =
3928 (static_cast<int64_t>(static_cast<uint64_t>(offset >> size) << size) ==
3929 offset);
3930 return offset_is_size_multiple && is_int7(offset >> size);
3931 }
3932
IsImmLLiteral(int64_t offset)3933 bool Assembler::IsImmLLiteral(int64_t offset) {
3934 int inst_size = static_cast<int>(kInstrSizeLog2);
3935 bool offset_is_inst_multiple =
3936 (static_cast<int64_t>(static_cast<uint64_t>(offset >> inst_size)
3937 << inst_size) == offset);
3938 DCHECK_GT(offset, 0);
3939 offset >>= kLoadLiteralScaleLog2;
3940 return offset_is_inst_multiple && is_intn(offset, ImmLLiteral_width);
3941 }
3942
3943 // Test if a given value can be encoded in the immediate field of a logical
3944 // instruction.
3945 // If it can be encoded, the function returns true, and values pointed to by n,
3946 // imm_s and imm_r are updated with immediates encoded in the format required
3947 // by the corresponding fields in the logical instruction.
3948 // If it can not be encoded, the function returns false, and the values pointed
3949 // to by n, imm_s and imm_r are undefined.
IsImmLogical(uint64_t value,unsigned width,unsigned * n,unsigned * imm_s,unsigned * imm_r)3950 bool Assembler::IsImmLogical(uint64_t value, unsigned width, unsigned* n,
3951 unsigned* imm_s, unsigned* imm_r) {
3952 DCHECK((n != nullptr) && (imm_s != nullptr) && (imm_r != nullptr));
3953 DCHECK((width == kWRegSizeInBits) || (width == kXRegSizeInBits));
3954
3955 bool negate = false;
3956
3957 // Logical immediates are encoded using parameters n, imm_s and imm_r using
3958 // the following table:
3959 //
3960 // N imms immr size S R
3961 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
3962 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
3963 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
3964 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
3965 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
3966 // 0 11110s xxxxxr 2 UInt(s) UInt(r)
3967 // (s bits must not be all set)
3968 //
3969 // A pattern is constructed of size bits, where the least significant S+1 bits
3970 // are set. The pattern is rotated right by R, and repeated across a 32 or
3971 // 64-bit value, depending on destination register width.
3972 //
3973 // Put another way: the basic format of a logical immediate is a single
3974 // contiguous stretch of 1 bits, repeated across the whole word at intervals
3975 // given by a power of 2. To identify them quickly, we first locate the
3976 // lowest stretch of 1 bits, then the next 1 bit above that; that combination
3977 // is different for every logical immediate, so it gives us all the
3978 // information we need to identify the only logical immediate that our input
3979 // could be, and then we simply check if that's the value we actually have.
3980 //
3981 // (The rotation parameter does give the possibility of the stretch of 1 bits
3982 // going 'round the end' of the word. To deal with that, we observe that in
3983 // any situation where that happens the bitwise NOT of the value is also a
3984 // valid logical immediate. So we simply invert the input whenever its low bit
3985 // is set, and then we know that the rotated case can't arise.)
3986
3987 if (value & 1) {
3988 // If the low bit is 1, negate the value, and set a flag to remember that we
3989 // did (so that we can adjust the return values appropriately).
3990 negate = true;
3991 value = ~value;
3992 }
3993
3994 if (width == kWRegSizeInBits) {
3995 // To handle 32-bit logical immediates, the very easiest thing is to repeat
3996 // the input value twice to make a 64-bit word. The correct encoding of that
3997 // as a logical immediate will also be the correct encoding of the 32-bit
3998 // value.
3999
4000 // The most-significant 32 bits may not be zero (ie. negate is true) so
4001 // shift the value left before duplicating it.
4002 value <<= kWRegSizeInBits;
4003 value |= value >> kWRegSizeInBits;
4004 }
4005
4006 // The basic analysis idea: imagine our input word looks like this.
4007 //
4008 // 0011111000111110001111100011111000111110001111100011111000111110
4009 // c b a
4010 // |<--d-->|
4011 //
4012 // We find the lowest set bit (as an actual power-of-2 value, not its index)
4013 // and call it a. Then we add a to our original number, which wipes out the
4014 // bottommost stretch of set bits and replaces it with a 1 carried into the
4015 // next zero bit. Then we look for the new lowest set bit, which is in
4016 // position b, and subtract it, so now our number is just like the original
4017 // but with the lowest stretch of set bits completely gone. Now we find the
4018 // lowest set bit again, which is position c in the diagram above. Then we'll
4019 // measure the distance d between bit positions a and c (using CLZ), and that
4020 // tells us that the only valid logical immediate that could possibly be equal
4021 // to this number is the one in which a stretch of bits running from a to just
4022 // below b is replicated every d bits.
4023 uint64_t a = LargestPowerOf2Divisor(value);
4024 uint64_t value_plus_a = value + a;
4025 uint64_t b = LargestPowerOf2Divisor(value_plus_a);
4026 uint64_t value_plus_a_minus_b = value_plus_a - b;
4027 uint64_t c = LargestPowerOf2Divisor(value_plus_a_minus_b);
4028
4029 int d, clz_a, out_n;
4030 uint64_t mask;
4031
4032 if (c != 0) {
4033 // The general case, in which there is more than one stretch of set bits.
4034 // Compute the repeat distance d, and set up a bitmask covering the basic
4035 // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
4036 // of these cases the N bit of the output will be zero.
4037 clz_a = CountLeadingZeros(a, kXRegSizeInBits);
4038 int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
4039 d = clz_a - clz_c;
4040 mask = ((uint64_t{1} << d) - 1);
4041 out_n = 0;
4042 } else {
4043 // Handle degenerate cases.
4044 //
4045 // If any of those 'find lowest set bit' operations didn't find a set bit at
4046 // all, then the word will have been zero thereafter, so in particular the
4047 // last lowest_set_bit operation will have returned zero. So we can test for
4048 // all the special case conditions in one go by seeing if c is zero.
4049 if (a == 0) {
4050 // The input was zero (or all 1 bits, which will come to here too after we
4051 // inverted it at the start of the function), for which we just return
4052 // false.
4053 return false;
4054 } else {
4055 // Otherwise, if c was zero but a was not, then there's just one stretch
4056 // of set bits in our word, meaning that we have the trivial case of
4057 // d == 64 and only one 'repetition'. Set up all the same variables as in
4058 // the general case above, and set the N bit in the output.
4059 clz_a = CountLeadingZeros(a, kXRegSizeInBits);
4060 d = 64;
4061 mask = ~uint64_t{0};
4062 out_n = 1;
4063 }
4064 }
4065
4066 // If the repeat period d is not a power of two, it can't be encoded.
4067 if (!base::bits::IsPowerOfTwo(d)) {
4068 return false;
4069 }
4070
4071 if (((b - a) & ~mask) != 0) {
4072 // If the bit stretch (b - a) does not fit within the mask derived from the
4073 // repeat period, then fail.
4074 return false;
4075 }
4076
4077 // The only possible option is b - a repeated every d bits. Now we're going to
4078 // actually construct the valid logical immediate derived from that
4079 // specification, and see if it equals our original input.
4080 //
4081 // To repeat a value every d bits, we multiply it by a number of the form
4082 // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
4083 // be derived using a table lookup on CLZ(d).
4084 static const uint64_t multipliers[] = {
4085 0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL,
4086 0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
4087 };
4088 int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
4089 // Ensure that the index to the multipliers array is within bounds.
4090 DCHECK((multiplier_idx >= 0) &&
4091 (static_cast<size_t>(multiplier_idx) < arraysize(multipliers)));
4092 uint64_t multiplier = multipliers[multiplier_idx];
4093 uint64_t candidate = (b - a) * multiplier;
4094
4095 if (value != candidate) {
4096 // The candidate pattern doesn't match our input value, so fail.
4097 return false;
4098 }
4099
4100 // We have a match! This is a valid logical immediate, so now we have to
4101 // construct the bits and pieces of the instruction encoding that generates
4102 // it.
4103
4104 // Count the set bits in our basic stretch. The special case of clz(0) == -1
4105 // makes the answer come out right for stretches that reach the very top of
4106 // the word (e.g. numbers like 0xFFFFC00000000000).
4107 int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
4108 int s = clz_a - clz_b;
4109
4110 // Decide how many bits to rotate right by, to put the low bit of that basic
4111 // stretch in position a.
4112 int r;
4113 if (negate) {
4114 // If we inverted the input right at the start of this function, here's
4115 // where we compensate: the number of set bits becomes the number of clear
4116 // bits, and the rotation count is based on position b rather than position
4117 // a (since b is the location of the 'lowest' 1 bit after inversion).
4118 s = d - s;
4119 r = (clz_b + 1) & (d - 1);
4120 } else {
4121 r = (clz_a + 1) & (d - 1);
4122 }
4123
4124 // Now we're done, except for having to encode the S output in such a way that
4125 // it gives both the number of set bits and the length of the repeated
4126 // segment. The s field is encoded like this:
4127 //
4128 // imms size S
4129 // ssssss 64 UInt(ssssss)
4130 // 0sssss 32 UInt(sssss)
4131 // 10ssss 16 UInt(ssss)
4132 // 110sss 8 UInt(sss)
4133 // 1110ss 4 UInt(ss)
4134 // 11110s 2 UInt(s)
4135 //
4136 // So we 'or' (-d * 2) with our computed s to form imms.
4137 *n = out_n;
4138 *imm_s = ((-d * 2) | (s - 1)) & 0x3F;
4139 *imm_r = r;
4140
4141 return true;
4142 }
4143
IsImmConditionalCompare(int64_t immediate)4144 bool Assembler::IsImmConditionalCompare(int64_t immediate) {
4145 return is_uint5(immediate);
4146 }
4147
IsImmFP32(float imm)4148 bool Assembler::IsImmFP32(float imm) {
4149 // Valid values will have the form:
4150 // aBbb.bbbc.defg.h000.0000.0000.0000.0000
4151 uint32_t bits = bit_cast<uint32_t>(imm);
4152 // bits[19..0] are cleared.
4153 if ((bits & 0x7FFFF) != 0) {
4154 return false;
4155 }
4156
4157 // bits[29..25] are all set or all cleared.
4158 uint32_t b_pattern = (bits >> 16) & 0x3E00;
4159 if (b_pattern != 0 && b_pattern != 0x3E00) {
4160 return false;
4161 }
4162
4163 // bit[30] and bit[29] are opposite.
4164 if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
4165 return false;
4166 }
4167
4168 return true;
4169 }
4170
IsImmFP64(double imm)4171 bool Assembler::IsImmFP64(double imm) {
4172 // Valid values will have the form:
4173 // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
4174 // 0000.0000.0000.0000.0000.0000.0000.0000
4175 uint64_t bits = bit_cast<uint64_t>(imm);
4176 // bits[47..0] are cleared.
4177 if ((bits & 0xFFFFFFFFFFFFL) != 0) {
4178 return false;
4179 }
4180
4181 // bits[61..54] are all set or all cleared.
4182 uint32_t b_pattern = (bits >> 48) & 0x3FC0;
4183 if (b_pattern != 0 && b_pattern != 0x3FC0) {
4184 return false;
4185 }
4186
4187 // bit[62] and bit[61] are opposite.
4188 if (((bits ^ (bits << 1)) & 0x4000000000000000L) == 0) {
4189 return false;
4190 }
4191
4192 return true;
4193 }
4194
GrowBuffer()4195 void Assembler::GrowBuffer() {
4196 // Compute new buffer size.
4197 int old_size = buffer_->size();
4198 int new_size = std::min(2 * old_size, old_size + 1 * MB);
4199
4200 // Some internal data structures overflow for very large buffers,
4201 // they must ensure that kMaximalBufferSize is not too large.
4202 if (new_size > kMaximalBufferSize) {
4203 V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
4204 }
4205
4206 // Set up new buffer.
4207 std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size);
4208 DCHECK_EQ(new_size, new_buffer->size());
4209 byte* new_start = new_buffer->start();
4210
4211 // Copy the data.
4212 intptr_t pc_delta = new_start - buffer_start_;
4213 intptr_t rc_delta = (new_start + new_size) - (buffer_start_ + old_size);
4214 size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos();
4215 memmove(new_start, buffer_start_, pc_offset());
4216 memmove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(),
4217 reloc_size);
4218
4219 // Switch buffers.
4220 buffer_ = std::move(new_buffer);
4221 buffer_start_ = new_start;
4222 pc_ += pc_delta;
4223 reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta,
4224 reloc_info_writer.last_pc() + pc_delta);
4225
4226 // None of our relocation types are pc relative pointing outside the code
4227 // buffer nor pc absolute pointing inside the code buffer, so there is no need
4228 // to relocate any emitted relocation entries.
4229
4230 // Relocate internal references.
4231 for (auto pos : internal_reference_positions_) {
4232 Address address = reinterpret_cast<intptr_t>(buffer_start_) + pos;
4233 intptr_t internal_ref = ReadUnalignedValue<intptr_t>(address);
4234 internal_ref += pc_delta;
4235 WriteUnalignedValue<intptr_t>(address, internal_ref);
4236 }
4237
4238 // Pending relocation entries are also relative, no need to relocate.
4239 }
4240
RecordRelocInfo(RelocInfo::Mode rmode,intptr_t data,ConstantPoolMode constant_pool_mode)4241 void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data,
4242 ConstantPoolMode constant_pool_mode) {
4243 if ((rmode == RelocInfo::INTERNAL_REFERENCE) ||
4244 (rmode == RelocInfo::CONST_POOL) || (rmode == RelocInfo::VENEER_POOL) ||
4245 (rmode == RelocInfo::DEOPT_SCRIPT_OFFSET) ||
4246 (rmode == RelocInfo::DEOPT_INLINING_ID) ||
4247 (rmode == RelocInfo::DEOPT_REASON) || (rmode == RelocInfo::DEOPT_ID)) {
4248 // Adjust code for new modes.
4249 DCHECK(RelocInfo::IsDeoptReason(rmode) || RelocInfo::IsDeoptId(rmode) ||
4250 RelocInfo::IsDeoptPosition(rmode) ||
4251 RelocInfo::IsInternalReference(rmode) ||
4252 RelocInfo::IsConstPool(rmode) || RelocInfo::IsVeneerPool(rmode));
4253 // These modes do not need an entry in the constant pool.
4254 } else if (constant_pool_mode == NEEDS_POOL_ENTRY) {
4255 if (RelocInfo::IsEmbeddedObjectMode(rmode)) {
4256 Handle<HeapObject> handle(reinterpret_cast<Address*>(data));
4257 data = AddEmbeddedObject(handle);
4258 }
4259 if (rmode == RelocInfo::COMPRESSED_EMBEDDED_OBJECT) {
4260 if (constpool_.RecordEntry(static_cast<uint32_t>(data), rmode) ==
4261 RelocInfoStatus::kMustOmitForDuplicate) {
4262 return;
4263 }
4264 } else {
4265 if (constpool_.RecordEntry(static_cast<uint64_t>(data), rmode) ==
4266 RelocInfoStatus::kMustOmitForDuplicate) {
4267 return;
4268 }
4269 }
4270 }
4271 // For modes that cannot use the constant pool, a different sequence of
4272 // instructions will be emitted by this function's caller.
4273
4274 if (!ShouldRecordRelocInfo(rmode)) return;
4275
4276 // Callers should ensure that constant pool emission is blocked until the
4277 // instruction the reloc info is associated with has been emitted.
4278 DCHECK(constpool_.IsBlocked());
4279
4280 // We do not try to reuse pool constants.
4281 RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code());
4282
4283 DCHECK_GE(buffer_space(), kMaxRelocSize); // too late to grow buffer here
4284 reloc_info_writer.Write(&rinfo);
4285 }
4286
near_jump(int offset,RelocInfo::Mode rmode)4287 void Assembler::near_jump(int offset, RelocInfo::Mode rmode) {
4288 BlockPoolsScope no_pool_before_b_instr(this);
4289 if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode, offset, NO_POOL_ENTRY);
4290 b(offset);
4291 }
4292
near_call(int offset,RelocInfo::Mode rmode)4293 void Assembler::near_call(int offset, RelocInfo::Mode rmode) {
4294 BlockPoolsScope no_pool_before_bl_instr(this);
4295 if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode, offset, NO_POOL_ENTRY);
4296 bl(offset);
4297 }
4298
near_call(HeapObjectRequest request)4299 void Assembler::near_call(HeapObjectRequest request) {
4300 BlockPoolsScope no_pool_before_bl_instr(this);
4301 RequestHeapObject(request);
4302 EmbeddedObjectIndex index = AddEmbeddedObject(Handle<Code>());
4303 RecordRelocInfo(RelocInfo::CODE_TARGET, index, NO_POOL_ENTRY);
4304 DCHECK(is_int32(index));
4305 bl(static_cast<int>(index));
4306 }
4307
4308 // Constant Pool
4309
EmitPrologue(Alignment require_alignment)4310 void ConstantPool::EmitPrologue(Alignment require_alignment) {
4311 // Recorded constant pool size is expressed in number of 32-bits words,
4312 // and includes prologue and alignment, but not the jump around the pool
4313 // and the size of the marker itself.
4314 const int marker_size = 1;
4315 int word_count =
4316 ComputeSize(Jump::kOmitted, require_alignment) / kInt32Size - marker_size;
4317 assm_->Emit(LDR_x_lit | Assembler::ImmLLiteral(word_count) |
4318 Assembler::Rt(xzr));
4319 assm_->EmitPoolGuard();
4320 }
4321
PrologueSize(Jump require_jump) const4322 int ConstantPool::PrologueSize(Jump require_jump) const {
4323 // Prologue is:
4324 // b over ;; if require_jump
4325 // ldr xzr, #pool_size
4326 // blr xzr
4327 int prologue_size = require_jump == Jump::kRequired ? kInstrSize : 0;
4328 prologue_size += 2 * kInstrSize;
4329 return prologue_size;
4330 }
4331
SetLoadOffsetToConstPoolEntry(int load_offset,Instruction * entry_offset,const ConstantPoolKey & key)4332 void ConstantPool::SetLoadOffsetToConstPoolEntry(int load_offset,
4333 Instruction* entry_offset,
4334 const ConstantPoolKey& key) {
4335 Instruction* instr = assm_->InstructionAt(load_offset);
4336 // Instruction to patch must be 'ldr rd, [pc, #offset]' with offset == 0.
4337 DCHECK(instr->IsLdrLiteral() && instr->ImmLLiteral() == 0);
4338 instr->SetImmPCOffsetTarget(assm_->options(), entry_offset);
4339 }
4340
Check(Emission force_emit,Jump require_jump,size_t margin)4341 void ConstantPool::Check(Emission force_emit, Jump require_jump,
4342 size_t margin) {
4343 // Some short sequence of instruction must not be broken up by constant pool
4344 // emission, such sequences are protected by a ConstPool::BlockScope.
4345 if (IsBlocked()) {
4346 // Something is wrong if emission is forced and blocked at the same time.
4347 DCHECK_EQ(force_emit, Emission::kIfNeeded);
4348 return;
4349 }
4350
4351 // We emit a constant pool only if :
4352 // * it is not empty
4353 // * emission is forced by parameter force_emit (e.g. at function end).
4354 // * emission is mandatory or opportune according to {ShouldEmitNow}.
4355 if (!IsEmpty() && (force_emit == Emission::kForced ||
4356 ShouldEmitNow(require_jump, margin))) {
4357 // Emit veneers for branches that would go out of range during emission of
4358 // the constant pool.
4359 int worst_case_size = ComputeSize(Jump::kRequired, Alignment::kRequired);
4360 assm_->CheckVeneerPool(false, require_jump == Jump::kRequired,
4361 assm_->kVeneerDistanceMargin + worst_case_size +
4362 static_cast<int>(margin));
4363
4364 // Check that the code buffer is large enough before emitting the constant
4365 // pool (this includes the gap to the relocation information).
4366 int needed_space = worst_case_size + assm_->kGap;
4367 while (assm_->buffer_space() <= needed_space) {
4368 assm_->GrowBuffer();
4369 }
4370
4371 EmitAndClear(require_jump);
4372 }
4373 // Since a constant pool is (now) empty, move the check offset forward by
4374 // the standard interval.
4375 SetNextCheckIn(ConstantPool::kCheckInterval);
4376 }
4377
4378 // Pool entries are accessed with pc relative load therefore this cannot be more
4379 // than 1 * MB. Since constant pool emission checks are interval based, and we
4380 // want to keep entries close to the code, we try to emit every 64KB.
4381 const size_t ConstantPool::kMaxDistToPool32 = 1 * MB;
4382 const size_t ConstantPool::kMaxDistToPool64 = 1 * MB;
4383 const size_t ConstantPool::kCheckInterval = 128 * kInstrSize;
4384 const size_t ConstantPool::kApproxDistToPool32 = 64 * KB;
4385 const size_t ConstantPool::kApproxDistToPool64 = kApproxDistToPool32;
4386
4387 const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB;
4388 const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB;
4389 const size_t ConstantPool::kApproxMaxEntryCount = 512;
4390
ShouldEmitVeneer(int max_reachable_pc,size_t margin)4391 bool Assembler::ShouldEmitVeneer(int max_reachable_pc, size_t margin) {
4392 // Account for the branch around the veneers and the guard.
4393 int protection_offset = 2 * kInstrSize;
4394 return static_cast<intptr_t>(pc_offset() + margin + protection_offset +
4395 unresolved_branches_.size() *
4396 kMaxVeneerCodeSize) >= max_reachable_pc;
4397 }
4398
RecordVeneerPool(int location_offset,int size)4399 void Assembler::RecordVeneerPool(int location_offset, int size) {
4400 Assembler::BlockPoolsScope block_pools(this, PoolEmissionCheck::kSkip);
4401 RelocInfo rinfo(reinterpret_cast<Address>(buffer_start_) + location_offset,
4402 RelocInfo::VENEER_POOL, static_cast<intptr_t>(size), Code());
4403 reloc_info_writer.Write(&rinfo);
4404 }
4405
EmitVeneers(bool force_emit,bool need_protection,size_t margin)4406 void Assembler::EmitVeneers(bool force_emit, bool need_protection,
4407 size_t margin) {
4408 BlockPoolsScope scope(this, PoolEmissionCheck::kSkip);
4409 RecordComment("[ Veneers");
4410
4411 // The exact size of the veneer pool must be recorded (see the comment at the
4412 // declaration site of RecordConstPool()), but computing the number of
4413 // veneers that will be generated is not obvious. So instead we remember the
4414 // current position and will record the size after the pool has been
4415 // generated.
4416 Label size_check;
4417 bind(&size_check);
4418 int veneer_pool_relocinfo_loc = pc_offset();
4419
4420 Label end;
4421 if (need_protection) {
4422 b(&end);
4423 }
4424
4425 EmitVeneersGuard();
4426
4427 #ifdef DEBUG
4428 Label veneer_size_check;
4429 #endif
4430
4431 std::multimap<int, FarBranchInfo>::iterator it, it_to_delete;
4432
4433 it = unresolved_branches_.begin();
4434 while (it != unresolved_branches_.end()) {
4435 if (force_emit || ShouldEmitVeneer(it->first, margin)) {
4436 Instruction* branch = InstructionAt(it->second.pc_offset_);
4437 Label* label = it->second.label_;
4438
4439 #ifdef DEBUG
4440 bind(&veneer_size_check);
4441 #endif
4442 // Patch the branch to point to the current position, and emit a branch
4443 // to the label.
4444 Instruction* veneer = reinterpret_cast<Instruction*>(pc_);
4445 RemoveBranchFromLabelLinkChain(branch, label, veneer);
4446 branch->SetImmPCOffsetTarget(options(), veneer);
4447 b(label);
4448 #ifdef DEBUG
4449 DCHECK(SizeOfCodeGeneratedSince(&veneer_size_check) <=
4450 static_cast<uint64_t>(kMaxVeneerCodeSize));
4451 veneer_size_check.Unuse();
4452 #endif
4453
4454 it_to_delete = it++;
4455 unresolved_branches_.erase(it_to_delete);
4456 } else {
4457 ++it;
4458 }
4459 }
4460
4461 // Record the veneer pool size.
4462 int pool_size = static_cast<int>(SizeOfCodeGeneratedSince(&size_check));
4463 RecordVeneerPool(veneer_pool_relocinfo_loc, pool_size);
4464
4465 if (unresolved_branches_.empty()) {
4466 next_veneer_pool_check_ = kMaxInt;
4467 } else {
4468 next_veneer_pool_check_ =
4469 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
4470 }
4471
4472 bind(&end);
4473
4474 RecordComment("]");
4475 }
4476
CheckVeneerPool(bool force_emit,bool require_jump,size_t margin)4477 void Assembler::CheckVeneerPool(bool force_emit, bool require_jump,
4478 size_t margin) {
4479 // There is nothing to do if there are no pending veneer pool entries.
4480 if (unresolved_branches_.empty()) {
4481 DCHECK_EQ(next_veneer_pool_check_, kMaxInt);
4482 return;
4483 }
4484
4485 DCHECK(pc_offset() < unresolved_branches_first_limit());
4486
4487 // Some short sequence of instruction mustn't be broken up by veneer pool
4488 // emission, such sequences are protected by calls to BlockVeneerPoolFor and
4489 // BlockVeneerPoolScope.
4490 if (is_veneer_pool_blocked()) {
4491 DCHECK(!force_emit);
4492 return;
4493 }
4494
4495 if (!require_jump) {
4496 // Prefer emitting veneers protected by an existing instruction.
4497 margin *= kVeneerNoProtectionFactor;
4498 }
4499 if (force_emit || ShouldEmitVeneers(margin)) {
4500 EmitVeneers(force_emit, require_jump, margin);
4501 } else {
4502 next_veneer_pool_check_ =
4503 unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
4504 }
4505 }
4506
buffer_space() const4507 int Assembler::buffer_space() const {
4508 return static_cast<int>(reloc_info_writer.pos() - pc_);
4509 }
4510
RecordConstPool(int size)4511 void Assembler::RecordConstPool(int size) {
4512 // We only need this for debugger support, to correctly compute offsets in the
4513 // code.
4514 Assembler::BlockPoolsScope block_pools(this);
4515 RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
4516 }
4517
PatchAdrFar(int64_t target_offset)4518 void PatchingAssembler::PatchAdrFar(int64_t target_offset) {
4519 // The code at the current instruction should be:
4520 // adr rd, 0
4521 // nop (adr_far)
4522 // nop (adr_far)
4523 // movz scratch, 0
4524
4525 // Verify the expected code.
4526 Instruction* expected_adr = InstructionAt(0);
4527 CHECK(expected_adr->IsAdr() && (expected_adr->ImmPCRel() == 0));
4528 int rd_code = expected_adr->Rd();
4529 for (int i = 0; i < kAdrFarPatchableNNops; ++i) {
4530 CHECK(InstructionAt((i + 1) * kInstrSize)->IsNop(ADR_FAR_NOP));
4531 }
4532 Instruction* expected_movz =
4533 InstructionAt((kAdrFarPatchableNInstrs - 1) * kInstrSize);
4534 CHECK(expected_movz->IsMovz() && (expected_movz->ImmMoveWide() == 0) &&
4535 (expected_movz->ShiftMoveWide() == 0));
4536 int scratch_code = expected_movz->Rd();
4537
4538 // Patch to load the correct address.
4539 Register rd = Register::XRegFromCode(rd_code);
4540 Register scratch = Register::XRegFromCode(scratch_code);
4541 // Addresses are only 48 bits.
4542 adr(rd, target_offset & 0xFFFF);
4543 movz(scratch, (target_offset >> 16) & 0xFFFF, 16);
4544 movk(scratch, (target_offset >> 32) & 0xFFFF, 32);
4545 DCHECK_EQ(target_offset >> 48, 0);
4546 add(rd, rd, scratch);
4547 }
4548
PatchSubSp(uint32_t immediate)4549 void PatchingAssembler::PatchSubSp(uint32_t immediate) {
4550 // The code at the current instruction should be:
4551 // sub sp, sp, #0
4552
4553 // Verify the expected code.
4554 Instruction* expected_adr = InstructionAt(0);
4555 CHECK(expected_adr->IsAddSubImmediate());
4556 sub(sp, sp, immediate);
4557 }
4558
4559 #undef NEON_3DIFF_LONG_LIST
4560 #undef NEON_3DIFF_HN_LIST
4561 #undef NEON_ACROSSLANES_LIST
4562 #undef NEON_FP2REGMISC_FCVT_LIST
4563 #undef NEON_FP2REGMISC_LIST
4564 #undef NEON_3SAME_LIST
4565 #undef NEON_FP3SAME_LIST_V2
4566 #undef NEON_BYELEMENT_LIST
4567 #undef NEON_FPBYELEMENT_LIST
4568 #undef NEON_BYELEMENT_LONG_LIST
4569
4570 } // namespace internal
4571 } // namespace v8
4572
4573 #endif // V8_TARGET_ARCH_ARM64
4574